|
发表于 2020-3-27 22:17:45
|
显示全部楼层
因为前五个英雄不在静态html里,需要获取json
import requests
import time
import json
from lxml import etree
from string import digits
base_url = 'https://pvp.qq.com/web201605/js/herolist.json'
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.132 Safari/537.36"
}
response = requests.get(base_url, headers = headers)
data_str = response.text
data_list = json.loads(data_str)
for i in data_list:
ename = i['ename']
url = 'https://pvp.qq.com/web201605/herodetail/' + str(ename) + '.shtml'
response = requests.get(url, headers=headers)
response.encoding = "gbk"
html = etree.HTML(response.text)
hero = html.xpath('//div[@class="crumb"]/label/text()')
skin_names = html.xpath('//div[@class="pic-pf"]/ul[@class="pic-pf-list pic-pf-list3"]/@data-imgname')
for j in skin_names:
skin_na = j.translate(str.maketrans('', '', digits)).split('|')
skin_name = []
for i in skin_na:
skin_name.append(i.strip('&'))
for skin_num in range(1, len(skin_name) + 1):
skin_url = 'http://game.gtimg.cn/images/yxzj/img201606/skin/hero-info/' + str(ename) + '/' + str(ename) + '-bigskin-' + str(skin_num) + '.jpg'
# print(skin_url)
skin_data = requests.get(skin_url, headers=headers).content
with open('skin_photo\\' + str(hero[0]) + '-' + skin_name[skin_num - 1] + '.jpg', 'wb') as f:
print('正在下载图片:', str(hero[0]) + '-' + skin_name[skin_num - 1])
f.write(skin_data) |
|