利用Python多線程爬取王者榮耀高清壁紙

01
需求分析
url?=?"https://pvp.qq.com/web201605/wallpaper.shtml"


02
解析數(shù)據(jù)
response 復(fù)制到了json.cn網(wǎng)站數(shù)據(jù)是錯(cuò)誤的 jsoncallback=Jquery的數(shù)據(jù)刪掉 每一個(gè)Object就是一組圖片 sProdImgNo_1 是封面小圖 ()

03
編寫代碼
#?通過編號(hào)來獲取不同規(guī)格的圖片?必須把?200?-->?0
#?發(fā)現(xiàn)圖片的url做了編碼了?parse.unquote?進(jìn)行了一個(gè)解碼
def?extract_images(data):
????image_urls?=?[]
????for?x?in?range(1,9):
????????image_url?=?parse.unquote(data['sProdImgNo_%d'%x]).replace('200',?'0')
????????image_urls.append(image_url)
????return?image_urls
class?Producer(threading.Thread):
????def?__init__(self,page_ueue,image_ueue,*args,**kwargs):
????????super(Producer,self).__init__(*args,**kwargs)?#?初始化父類的init方法屬性,父類也有__init__方法,如果不初始化,會(huì)報(bào)錯(cuò).
????????self.page_ueue?=?page_ueue
????????self.?image_ueue?=?image_ueue
????def?run(self)?->?None:
????????while?not?self.page_ueue.empty():
????????????page_url?=?self.page_ueue.get()
????????????res?=?requests.get(page_url,?headers=headers)
????????????result?=?res.json()?#?response.json()?是requests第三方庫提供的?是將json類型的數(shù)據(jù)轉(zhuǎn)換成python字典的
方法
????????????datas?=?result['List']
????????????for?data?in?datas:
????????????????#?extract_images()定義的全局函數(shù)函數(shù)將圖片url的200改成0,并且解碼圖片url(因?yàn)?張圖片大小不一樣,就是由這個(gè)字符串控制,因?yàn)榭吹綀D片url中有特殊字符%13%aab...)
????????????????image_urls?=?extract_images(data)?
????????????????name?=?parse.unquote(data['sProdName'])
????????????????dirpath?=?os.path.join('image',?name)?#?動(dòng)態(tài)的取添加路徑?os.path.join()
????????????????if?not?os.path.exists(dirpath):
????????????????????os.mkdir(dirpath)
????????????????#?把圖片的url放到隊(duì)列當(dāng)中
????????????????for?index,image_url?in?enumerate(image_urls):?#??為圖片命名?enumerate()來解決圖片名字的問題?1.jpg?2.jpg?3.jpg
????????????????????self.image_ueue.put({'image_url':image_url,'image_path':os.path.join(dirpath,'%d.jpg'%(index+1))})
class?Comsumer(threading.Thread):
????def?__init__(self,?image_ueue,*args,**kwargs):
????????super(Comsumer,self).__init__(*args,**kwargs)
????????self.image_ueue?=?image_ueue
????def?run(self)?->?None:
????????while?True:
????????????try:
????????????????image_obj?=?self.image_ueue.get(timeout=10)
????????????????image_url?=?image_obj.get('image_url')
????????????????image_path?=?image_obj.get('image_path')
????????????????try:
????????????????????request.urlretrieve(image_url,image_path)
????????????????????print("%s下載成功!"%image_path)
????????????????except:
????????????????????print('下載失敗')
????????????except:
????????????????break
#?創(chuàng)建了3個(gè)生產(chǎn)者線程?5個(gè)消費(fèi)者線程?(因?yàn)橄M(fèi)者做的事情比較多?發(fā)起請求?保存圖片)
def?main():
????#?創(chuàng)建頁面url隊(duì)列一
????page_ueue?=?Queue(10)
????#?創(chuàng)建圖片url隊(duì)列
????image_ueue?=?Queue(3000)
????for?i?in?range(10):?#?咱們就爬取10頁
????????img_url?=?"https://apps.game.qq.com/cgi-bin/ams/module/ishow/V1.0/query/workList_inc.cgi?activityId=2735&sVerifyCode=ABCD&sDataType=JSON&iListNum=20&totalpage=0&page={}&iOrder=0&iSortNumClose=1&171003449092155893818_1620870158277&iAMSActivityId=51991&_everyRead=true&iTypeId=2&iFlowId=267733&iActId=2735&iModuleId=2735&_=1620870158575".format(i)
????????page_ueue.put(img_url)
????#?創(chuàng)建3個(gè)生產(chǎn)者線程
????for?i?in?range(3):
????????pt?=?Producer(page_ueue,image_ueue)
????????pt.start()
????#?創(chuàng)建5個(gè)消費(fèi)者線程
????for?i?in?range(5):
????????ct?=?Comsumer(image_ueue)
????????ct.start()
04
程序運(yùn)行
if?__name__?==?'__main__':
????main()

加入知識(shí)星球【我們談?wù)摂?shù)據(jù)科學(xué)】
500+小伙伴一起學(xué)習(xí)!
·?推薦閱讀?·
盤點(diǎn)2021最佳數(shù)據(jù)可視化作品
「Python實(shí)用秘技04」pdf文件批量添加文字水印
評論
圖片
表情
