把自己的网站卖给别人后对方做违法,成都网站托管,系统流小说,网站备案资料下载python几万条微博高频词分析看到别人有做影视热评的分析统计#xff0c;觉得挺好玩的#xff0c;就来试试看看效果Screenshot_2018-05-21-11-00-42-879_com.master.wei.png思路抓取想要的微博数据写入数据库分词统计出词汇出现次数过滤无意义的干扰词存入数据库写接口#x…python几万条微博高频词分析看到别人有做影视热评的分析统计觉得挺好玩的就来试试看看效果Screenshot_2018-05-21-11-00-42-879_com.master.wei.png思路抓取想要的微博数据写入数据库分词统计出词汇出现次数过滤无意义的干扰词存入数据库写接口然后Android端展示代码数据库连接 masterWeiBo.Utils.Sqlimport pymysqlimport pymysql.cursorsimport threadingclass Mydb(object):tableNamemasterdef __init__(self):self.lockthreading.Lock()self.client pymysql.connect(hostlocalhost,charsetutf8, port3306, userroot, passwdck123, dbweibo, cursorclasspymysql.cursors.DictCursor)self.client.autocommit(True)self.cursor self.client.cursor()开始import jiebafrom masterWeiBo.Utils.Sql import Mydb as db# 创建停用词listdef stopwordslist(filepath):stopwords [line.strip() for line in open(filepath, r, encodingutf-8).readlines()]return stopwordscursor db().cursor#如果不存在词表就创建cursor.execute(CREATE TABLE IF NOT EXISTS weibo.masterWeiBo_category (id INT NOT NULL AUTO_INCREMENT,count INT NOT NULL DEFAULT 0,category VARCHAR(100) NOT NULL,wordsTop10 VARCHAR(1000) NULL,PRIMARY KEY (id));)#清空词表cursor.execute(DELETE FROM weibo.masterWeiBo_category)#获取分类分词cursor.execute(SELECT count(id) as countd, come FROM weibo.masterWeiBo_master GROUP BY come)results cursor.fetchall()print(results)dicts[]#加载过滤词汇stopwords stopwordslist(/root/PYServer/myFirstPYServer/words.txt)for result in results:each{}each[count]result[countd]each[come]result[come]print(result[countd])print(result[come])cursor.execute(SELECT content from weibo.masterWeiBo_master where come result[come])contents cursor.fetchall()articals#把指定分类的内容拼接起来for artical in contents:articals,artical[content]#结巴分词cuts jieba.cut(articals)words{}#统计词频for cut in cuts:if(cut in words):words[cut]words[cut]1else:words[cut]1#按词频倒序排列sortedWords sorted(words.items(), keylambda d: d[1], reverseTrue)wordsTop10i0#获取top10词汇for key ,value in sortedWords:#过滤无效词汇if(key in stopwords or key.__len__()2):continuewordsTop10key,str(value);i1if(i10):wordsTop10wordsTop10[:wordsTop10.__len__()-1]breakeach[wordsTop10]wordsTop10dicts.append(each)#写入数据库for value in dicts:sql INSERT INTO weibo.masterWeiBo_category (count,category,wordsTop10) values( str(value[count]) , value[come] , value[wordsTop10] )print(sql)cursor.execute(sql)cursor.close()print(dicts)大功告成