

完整代碼如下:
import requests
from lxml import etree
import random
import os
from multiprocessing.dummy import Pool
if not os.path.exists('./視頻'):
os.mkdir('./視頻')
urls=[]
url='https://www.pearvideo.com/category_5'
headers={'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.72 Safari/537.36 Edg/89.0.774.45'}
page_text=requests.get(url=url,headers=headers).text
tree=etree.HTML(page_text)
li_list=tree.xpath('//ul[@id="listvideoListUl"]/li')
for li in li_list:
a_url='https://www.pearvideo.com/'+li.xpath('./div/a/@href')[0]
name=li.xpath('./div/a/div[2]/text()')[0]+'.mp4'
mrd=random.random()
code=li.xpath('./div/a/@href')[0][-7:]
new_headers={'Referer': a_url,
'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.82 Safari/537.36 Edg/89.0.774.50'
}
new_url='https://www.pearvideo.com/videoStatus.jsp?contId='+str(code)+'&mrd='+str(mrd)
r=requests.get(url=new_url,headers=new_headers)
video_url=eval(r.text)['videoInfo']['videos']['srcUrl']
old=video_url.split('/')[-1].split('-')[0]
new='cont-'+str(code)
true_video_url=video_url.replace(old,new)
dic={'name':name,
'my_url':true_video_url}
urls.append(dic)
#使用線程池對數(shù)據(jù)視頻進行請求
def get_video_data(dic):
print(dic['name']+'開始下載'+'\n')
data_url=dic['my_url']
data=requests.get(url=data_url,headers=headers).content
with open('./視頻/'+dic['name'],'wb') as f:
f.write(data)
print(dic['name']+'下載成功')
pool=Pool(4)
pool.map(get_video_data,urls)
pool.close()
pool.join()
說明:
當(dāng)前日期(2021/3/14)版本的梨視頻的視頻偽url由ajax獲取。
部分代碼解釋:
1:模塊
import requests #網(wǎng)路爬蟲標準庫(代替urllib)
from lxml import etree #用于解析頁面信息
import random #梨視頻的url中有一段需要隨機數(shù)
import os #主要用于生成文件夾存放視頻
from multiprocessing.dummy import Pool #導(dǎo)入線程池對應(yīng)類
2:獲取視頻偽url
#參數(shù)準備
mrd=random.random()
code=li.xpath('./div/a/@href')[0][-7:]
new_headers={
'Referer': a_url,
'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.82 Safari/537.36 Edg/89.0.774.50'
}
#獲取url
new_url='https://www.pearvideo.com/videoStatus.jsp?contId='+str(code)+'&mrd='+str(mrd)
r=requests.get(url=new_url,headers=new_headers)
video_url=eval(r.text)['videoInfo']['videos']['srcUrl']
3:獲取真正url
經(jīng)本人實驗,使用上文獲得的url爬取視頻下載內(nèi)容為空。
此處視頻地址做了加密即ajax中得到的地址需要加上cont-,并且修改一段數(shù)字為id才是真地址
真地址:"https://video.pearvideo.com/mp4/third/20201120/cont-1708144-10305425-222728-hd.mp4"
偽地址:"https://video.pearvideo.com/mp4/third/20201120/1606132035863-10305425-222728-hd.mp4"
#僅需要做幾個簡單的截取切片操作就可以替換相關(guān)內(nèi)容
old=video_url.split('/')[-1].split('-')[0]
new='cont-'+str(code)
true_video_url=video_url.replace(old,new)
4:存儲
#使用線程池對數(shù)據(jù)視頻進行請求
def get_video_data(dic):
print(dic['name']+'開始下載'+'\n')
data_url=dic['my_url']
data=requests.get(url=data_url,headers=headers).content
with open('./視頻/'+dic['name'],'wb') as f:
f.write(data)
print(dic['name']+'下載成功')
pool=Pool(4)
pool.map(get_video_data,urls)
pool.close()
pool.join()
5:結(jié)果

到此這篇關(guān)于python爬取梨視頻生活板塊最熱視頻的文章就介紹到這了
掃下方二維碼加老師微信
或是搜索老師微信號:XTUOL1988【切記備注:學(xué)習(xí)Python】
邀您來聽Python web開發(fā),Python爬蟲,Python數(shù)據(jù)分析,人工智能 免費精品教程,0基礎(chǔ)入門到企業(yè)項目實戰(zhàn)教學(xué)!
歡迎大家點贊,留言,轉(zhuǎn)發(fā),轉(zhuǎn)載,感謝大家的相伴與支持
萬水千山總是情,點個【在看】行不行
*聲明:本文于網(wǎng)絡(luò)整理,版權(quán)歸原作者所有,如來源信息有誤或侵犯權(quán)益,請聯(lián)系我們刪除或授權(quán)事宜