【Python】Python抓取了王力宏事件的相關(guān)報道,發(fā)現(xiàn)了一個更大的瓜

有網(wǎng)友還對他的前妻的發(fā)文做了一個簡短的總結(jié),如下圖所示


@retry(stop=stop_after_attempt(7))
def?do_requests(uid,?pageNum):
????headers?=?{
????????"cookie":?"SCF=Anhuv5v0Lu8oFE06-PmKm-uqVmUQgSwrLYauTMNCvEmRH0iOd-jT0poB-pgkpX_aJsOYqZjgw_F8TAZ0SL_aE9Q.;?_T_WM=32be9637e54d4f58408755d6f8100d5c;?SUB=_2A25MueV4DeRhGeRN7lQY8ynEwziIHXVsRYswrDV6PUJbkdAKLRPSkW1NU7D9XCuoP6vJEUUVjb0HcSPigsLzxFaW;?SSOLoginState=1639814440",
????????"user-agent":?"Mozilla/5.0?(Windows?NT?10.0;?Win64;?x64)?AppleWebKit/537.36?(KHTML,?like?Gecko)?Chrome/96.0.4664.110?Safari/537.36"
????}
????url?=?"https://weibo.cn/repost/L6w2sfDXb?&uid={}&&page={}".format(uid,?pageNum)
????response?=?requests.get(url,?headers?=?headers)
????return?response.text
def?get_comment(html_data):
????html_text?=?BeautifulSoup(html_data,?'lxml')
????comment_list?=?html_text.select("span.ctt")
????return?comment_list
然后我們根據(jù)抓取到的評論生成詞云圖,代碼如下
def?jieba_():
????stop_words?=?set([line.strip()?for?line?in?open("chineseStopWords.txt",?encoding="GBK").readlines()])
????for?word?in?["回復",?"有沒有"]:
????????stop_words.add(word)
????comment_list?=?[]
????with?open("comment_data.txt",?"r",?encoding="utf-8")?as?comment_data_list:
????????for?comment?in?comment_data_list:
????????????comment_list.append(comment)
????text?=?",?".join(comment_list)
????word_num?=?jieba.lcut(text,?cut_all=False)
????rule?=?re.compile(r"^[\u4e00-\u9fa5]+$")
????word_num_selected?=?[word?for?word?in?word_num?if?word?not?in?stop_words?and
?????????????????????????re.search(rule,?word)?and?len(word)?>=?2]
????return?word_num_selected
def?plot_word_cloud(text):
????#?打開詞云背景圖
????cloud_mask?=?np.array(Image.open('gua_1.jpg'))
????#?定義詞云的一些屬性
????wc?=?WordCloud(
????????#?背景圖分割顏色為白色
????????background_color='white',
????????#?背景圖樣
????????mask=cloud_mask,
????????#?顯示最大詞數(shù)
????????max_words=200,
????????#?顯示中文
????????font_path='KAITI.ttf',
????????#?最大尺寸
????????max_font_size=100
????)
????text_?=?",?".join(text)
????#?詞云函數(shù)
????x?=?wc.generate(text_)
????#?生成詞云圖片
????image?=?x.to_image()
????#?展示詞云圖片
????image.show()
????#?保存詞云圖片
????wc.to_file('melon_1.png')


往期精彩回顧 本站qq群955171419,加入微信群請掃碼:
評論
圖片
表情
