简单爬虫,爬取王者荣耀英雄皮肤
本帖最后由 一个账号 于 2020-3-26 09:59 编辑import urllib.request
import re
import random
import os
import time
def url_open(url):
'''
listip = ['117.94.213.165:8118','119.179.136.97:8060','114.228.73.217:6666']
proxy_support = urllib.request.ProxyHandler({'http':random.choice(listip)})
opener = urllib.request.build_opener(proxy_support)
opener.addheaders = [('User-Agent',' Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36')]
urllib.request.install_opener(opener)
'''
#这一部分使用的代理,我用着太慢,就没使了
req = urllib.request.Request(url)
req.add_header('User-Agent',' Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36')
response = urllib.request.urlopen(req)
html = response.read()
return html
def make_file(url):
html = url_open(url).decode('gbk')
a = html.find("cover-name") + 12
b = html.find('</h2>')
name = html
os.mkdir(name)
os.chdir(name)
save_hero(url)
os.chdir(path = "..")
def save_hero(url):
html = url_open(url).decode("gbk")
a = re.findall(r'game.gtimg.cn/.+hero-info/\d{3}/\d{3}-bigskin-.\.jpg',html)
a = a
for i in range(1,7):
a = a[:67] + str(i) + a
img_url = "http://" + a
file_name = img_url.split('/')[-1]
try:
imghtml = url_open(img_url)
with open(file_name,'wb') as f:
f.write(imghtml)
except:
break
def heropic(folder = 'herolist'):
os.mkdir(folder)
os.chdir(folder)
url = 'https://pvp.qq.com/web201605/herolist.shtml'
html = url_open(url).decode("gbk")
herolist = re.findall(r'herodetail/\d\d\d\.shtml',html)
herodetail = []
for i in range(len(herolist)):
herodetail.append( 'https://pvp.qq.com/web201605/' + herolist)
for j in range(len(herodetail)):
make_file(herodetail)
if __name__ == "__main__":
heropic()
模仿着小甲鱼的爬虫写了一下,可能有一些代码冗余
还有就是在源代码没看到前五个英雄的链接,所以只能爬93个英雄的皮肤
关于这点我也不知道咋办,希望有大佬指导下 嘻嘻嘻嘻嘻嘻嘻学习 楼主无敌于世间
小白路过,学习学习 新手上路学习学习 666 哈 呃呃呃呃呃 学习学习 瞅一瞅 想见识一下
涨涨见识
{:10_254:}{:10_254:} 看看拉 学习学习 学习 无敌 厉害啊 回复学习学习
是只能看的那种吗