建站论坛,图片生成器免费,seo优化器,百度大数据分析1. 需要的三个包#xff1a; from wordcloud import WordCloud #词云库
import matplotlib.pyplot as plt #数学绘图库
import jieba; 2. 定义变量#xff08;将对于的变量到一个全局的文件中#xff09;#xff1a; import re;
pdurl_firsthttps://movie.do…1. 需要的三个包 from wordcloud import WordCloud #词云库
import matplotlib.pyplot as plt #数学绘图库
import jieba; 2. 定义变量将对于的变量到一个全局的文件中 import re;
pdurl_firsthttps://movie.douban.com/subject/26363254/comments?start0
head{User-Agent:Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Ubuntu Chromium/59.0.3071.109 Chrome/59.0.3071.109 Safari/537.36}
regre.compile(ra href(.*?)amp;.*?classnext) #下一页
cookies{__utma:30149280.503249607.1504402391.1504402391.1504402391.1,_utmb:30149280.2.9.1504402391,__utmc:30149280,__utmt:1,__utmz:30149280.1504402391.1.1.utmcsr(direct)|utmccn(direct)|utmcmd(none),ap:1,as:https://movie.douban.com/subject/26363254/comments?start225limit20sortnew_scorestatusP,bid:g7k4BGd2sRk,ck:76vs,dbcl2:166279730:fohmXhoM9uU,ps:y,push_doumail_num:0,push_doumail_num:0} 3. 抓取数据 import requests;
import re;
from GrabData import Param;
import pandas as pd;
from bs4 import BeautifulSoup;class GrabComent:ren re.compile(rspan classvotes(.*?)/span.*?comment.*?/span.*?span.*?class(.*?)/a.*?span(.*?)/span.*?title(.*?)/span.*?title(.*?)p .*? (.*?)/p,re.S)def __init__(self):print(开始抓取数据);html requests.get(Param.pdurl_first, headersParam.head, cookiesParam.cookies);while html.status_code 200:url_next https://movie.douban.com/subject/26363254/comments re.findall(Param.reg, html.text)[0]zhanlang re.findall(self.ren, html.text)print(zhanlang)data pd.DataFrame(zhanlang)data.to_csv(H:\\python_projects\\ticket\\zhanlangpinglun.csv, headerFalse, indexFalse,modea) # 写入csv文件,a是追加模式data []zhanlang []print(下一页地址url_next);html requests.get(url_next, cookiesParam.cookies, headersParam.head)if __name__ __main__:GrabComent(); 4. 生成云图 from wordcloud import WordCloud #词云库
import matplotlib.pyplot as plt #数学绘图库
import jieba;class WordYun:def __init__(self):print(开始读取文件!);self.main();def main(self):text self.readFile();self.showTitle(text);def showTitle(self,text1):wc1 WordCloud(background_colorwhite,width1000,height860,font_pathD:\\Windows\\Fonts\\STFANGSO.ttf, # 不加这一句显示口字形乱码margin2);wc2 wc1.generate(text1) # 我们观察到generate()接受一个Unicode的对象所以之前要把文本处理成unicode类型plt.imshow(wc2)plt.axis(off)plt.show();def readFile(self):a []f open(rH:\\python_projects\\ticket\\zhanlangpinglun.csv, r).read()words list(jieba.cut(f))for word in words:if len(word) 1:a.append(word);txt r .join(a)print(readFile返回的结果txt);return txt;if __name__ __main__:WordYun(); 转载于:https://www.cnblogs.com/wangshunyao/p/7534883.html