马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
import requests
import re
import time
from bs4 import BeautifulSoup
from urllib.request import urlretrieve
import os
print("=============================")
Url = "https://pvp.qq.com/web201605/herolist.shtml"
headers = {
"user-agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.14 Safari/537.36 Edg/83.0.478.13"
}
cwd = os.getcwd()
def here_name():
try:
os.mkdir("王者头像")
except:
pass
finally:
x = 1
global cwd
html_ = requests.get(Url, headers=headers).content.decode("gbk")
heros = re.findall('height="91" alt="(.*?)">', html_, re.S)
heros.pop()
with open(cwd + "\\王者头像\\hero","w",encoding="utf-8") as f:
for hero in heros:
f.write(hero+"\n")
print("=============================")
print("总共有%d" % (len(heros)))
pictures = re.findall('<img.*?src="(.*?)"',html_,re.S)
for picture in pictures:
if picture[:2] == "//":
urlretrieve("http:"+picture,cwd+"\\王者头像\"+str(x)+".jpg")
x+=1
else:
pass
if __name__ == "__main__":
here_name()
"""
<li><a href="herodetail/105.shtml" target="_blank"><img
src="//game.gtimg.cn/images/yxzj/img201606/heroimg/105/105.jpg" width="91"
height="91" alt="廉颇">廉颇</a></li>"""
那个廉颇那个是我懒得删了 |