Python定時(shí)爬取 微博熱搜 并動(dòng)態(tài)展示
作者:葉庭云
來源:凹凸數(shù)據(jù)
本文介紹了可以實(shí)現(xiàn)定時(shí)執(zhí)行任務(wù)的schedule模塊,利用它實(shí)現(xiàn)定時(shí)爬取微博熱搜數(shù)據(jù),保存到CSV文件里。 講解pyehcarts繪制基本時(shí)間輪播圖,最后利用pyehcarts實(shí)現(xiàn)數(shù)據(jù)的動(dòng)態(tài)圖可視化。

schedule模塊定時(shí)執(zhí)行任務(wù)
#?安裝
pip?install?schedule?-i?http://pypi.douban.com/simple?--trusted-host?pypi.douban.com
import?schedule
import?time
?
def?run():
????print("I'm?doing?something...")
?
schedule.every(10).minutes.do(run)????#?每隔十分鐘執(zhí)行一次任務(wù)
schedule.every().hour.do(run)?????????#?每隔一小時(shí)執(zhí)行一次任務(wù)
schedule.every().day.at("10:30").do(run)??#?每天的10:30執(zhí)行一次任務(wù)
schedule.every().monday.do(run)??#?每周一的這個(gè)時(shí)候執(zhí)行一次任務(wù)
schedule.every().wednesday.at("13:15").do(run)?#?每周三13:15執(zhí)行一次任務(wù)
?
while?True:
????schedule.run_pending()??# run_pending:運(yùn)行所有可以運(yùn)行的任務(wù)
爬取微博熱搜數(shù)據(jù)


#?-*-?coding:?UTF-8?-*-
"""
@File ???:微博熱搜榜.py
@Author ?:葉庭云
@Date ???:2020/9/18 15:01
"""
import?schedule
import?pandas?as?pd
from?datetime?import?datetime
import?logging
logging.basicConfig(level=logging.INFO,?format='%(asctime)s?-?%(levelname)s:?%(message)s')
count?=?0
def?get_content():
????global?count???#?全局變量count
????print('-----------?正在爬取數(shù)據(jù)?-------------')
????url?=?'https://s.weibo.com/top/summary?cate=realtimehot&sudaref=s.weibo.com&display=0&retcode=6102'
????df?=?pd.read_html(url)[0][1:11][['序號(hào)',?'關(guān)鍵詞']]???#?獲取熱搜前10
????time_?=?datetime.now().strftime("%Y/%m/%d?%H:%M")?????#?獲取當(dāng)前時(shí)間
????df['序號(hào)']?=?df['序號(hào)'].apply(int)
????df['熱度']?=?df['關(guān)鍵詞'].str.split('??',?expand=True)[1]
????df['關(guān)鍵詞']?=?df['關(guān)鍵詞'].str.split('??',?expand=True)[0]
????df['時(shí)間']?=?[time_]?*?len(df['序號(hào)'])
????if?count?==?0:
????????df.to_csv('datas.csv',?mode='a+',?index=False)
????????count?+=?1
????else:
????????df.to_csv('datas.csv',?mode='a+',?index=False,?header=False)
#?定時(shí)爬蟲
schedule.every(1).minutes.do(get_content)
while?True:
????schedule.run_pending()
pyehcarts動(dòng)態(tài)圖可視化
基本時(shí)間輪播圖
from?pyecharts?import?options?as?opts
from?pyecharts.charts?import?Bar,?Timeline
from?pyecharts.faker?import?Faker
from?pyecharts.globals?import?CurrentConfig,?ThemeType
CurrentConfig.ONLINE_HOST?=?'D:/python/pyecharts-assets-master/assets/'
tl?=?Timeline(init_opts=opts.InitOpts(theme=ThemeType.LIGHT))
for?i?in?range(2015,?2020):
????bar?=?(
????????Bar()
????????.add_xaxis(Faker.choose())
????????.add_yaxis("商家A",?Faker.values())
????????.add_yaxis("商家B",?Faker.values())
????????.set_global_opts(title_opts=opts.TitleOpts("商店{}年商品銷售額".format(i)))
????)
????tl.add(bar,?"{}年".format(i))
tl.render("timeline_multi_axis.html")

from?pyecharts?import?options?as?opts
from?pyecharts.charts?import?Bar,?Timeline
from?pyecharts.faker?import?Faker
from?pyecharts.globals?import?ThemeType,?CurrentConfig
CurrentConfig.ONLINE_HOST?=?'D:/python/pyecharts-assets-master/assets/'
tl?=?Timeline(init_opts=opts.InitOpts(theme=ThemeType.DARK))
for?i?in?range(2015,?2020):
????bar?=?(
????????Bar()
????????.add_xaxis(Faker.choose())
????????.add_yaxis("商家A",?Faker.values(),?label_opts=opts.LabelOpts(position="right"))
????????.add_yaxis("商家B",?Faker.values(),?label_opts=opts.LabelOpts(position="right"))
????????.reversal_axis()
????????.set_global_opts(
????????????title_opts=opts.TitleOpts("Timeline-Bar-Reversal?(時(shí)間:?{}?年)".format(i))
????????)
????)
????tl.add(bar,?"{}年".format(i))
tl.render("timeline_bar_reversal.html")

微博熱搜動(dòng)態(tài)圖
import?pandas?as?pd
from?pyecharts?import?options?as?opts
from?pyecharts.charts?import?Bar,?Timeline,?Grid
from?pyecharts.globals?import?ThemeType,?CurrentConfig
CurrentConfig.ONLINE_HOST?=?'D:/python/pyecharts-assets-master/assets/'
df?=?pd.read_csv('datas.csv')
#?print(df.info())
t?=?Timeline(init_opts=opts.InitOpts(theme=ThemeType.MACARONS))??#?定制主題
for?i?in?range(34):
????bar?=?(
????????Bar()
????????.add_xaxis(list(df['關(guān)鍵詞'][i*10:?i*10+10][::-1]))?????????#?x軸數(shù)據(jù)
????????.add_yaxis('熱度',?list(df['熱度'][i*10:?i*10+10][::-1]))???#?y軸數(shù)據(jù)
????????.reversal_axis()?????#?翻轉(zhuǎn)
????????.set_global_opts(????#?全局配置項(xiàng)
????????????title_opts=opts.TitleOpts(??#?標(biāo)題配置項(xiàng)
????????????????title=f"{list(df['時(shí)間'])[i*10]}",
????????????????pos_right="5%",?pos_bottom="15%",
????????????????title_textstyle_opts=opts.TextStyleOpts(
????????????????????font_family='KaiTi',?font_size=24,?color='#FF1493'
????????????????)
????????????),
????????????xaxis_opts=opts.AxisOpts(???#?x軸配置項(xiàng)
????????????????splitline_opts=opts.SplitLineOpts(is_show=True),
????????????),
????????????yaxis_opts=opts.AxisOpts(???#?y軸配置項(xiàng)
????????????????splitline_opts=opts.SplitLineOpts(is_show=True),
????????????????axislabel_opts=opts.LabelOpts(color='#DC143C')
????????????)
????????)
????????.set_series_opts(????#?系列配置項(xiàng)
????????????label_opts=opts.LabelOpts(??#?標(biāo)簽配置
????????????????position="right",?color='#9400D3')
????????)
????)
????grid?=?(
????????Grid()
????????????.add(bar,?grid_opts=opts.GridOpts(pos_left="24%"))
????)
????t.add(grid,?"")
????t.add_schema(
????????play_interval=100,??????????#?輪播速度
????????is_timeline_show=False,?????#?是否顯示?timeline?組件
????????is_auto_play=True,??????????#?是否自動(dòng)播放
????)
t.render('時(shí)間輪播圖.html')

評(píng)論
圖片
表情
