|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
本帖最后由 一个账号 于 2020-3-26 09:59 编辑
- import urllib.request
- import re
- import random
- import os
- import time
- def url_open(url):
- '''
- listip = ['117.94.213.165:8118','119.179.136.97:8060','114.228.73.217:6666']
-
- proxy_support = urllib.request.ProxyHandler({'http':random.choice(listip)})
- opener = urllib.request.build_opener(proxy_support)
- opener.addheaders = [('User-Agent',' Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36')]
- urllib.request.install_opener(opener)
- '''
- #这一部分使用的代理,我用着太慢,就没使了
- req = urllib.request.Request(url)
- req.add_header('User-Agent',' Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36')
- response = urllib.request.urlopen(req)
- html = response.read()
- return html
- def make_file(url):
- html = url_open(url).decode('gbk')
- a = html.find("cover-name") + 12
- b = html.find('</h2>')
- name = html[a:b]
- os.mkdir(name)
- os.chdir(name)
- save_hero(url)
- os.chdir(path = "..")
- def save_hero(url):
- html = url_open(url).decode("gbk")
- a = re.findall(r'game.gtimg.cn/.+hero-info/\d{3}/\d{3}-bigskin-.\.jpg',html)
- a = a[0]
- for i in range(1,7):
- a = a[:67] + str(i) + a[68:]
- img_url = "http://" + a
- file_name = img_url.split('/')[-1]
- try:
- imghtml = url_open(img_url)
- with open(file_name,'wb') as f:
- f.write(imghtml)
- except:
- break
-
-
- def heropic(folder = 'herolist'):
- os.mkdir(folder)
- os.chdir(folder)
-
- url = 'https://pvp.qq.com/web201605/herolist.shtml'
- html = url_open(url).decode("gbk")
- herolist = re.findall(r'herodetail/\d\d\d\.shtml',html)
- herodetail = []
- for i in range(len(herolist)):
- herodetail.append( 'https://pvp.qq.com/web201605/' + herolist[i])
- for j in range(len(herodetail)):
- make_file(herodetail[j])
- if __name__ == "__main__":
- heropic()
复制代码
模仿着小甲鱼的爬虫写了一下,可能有一些代码冗余
还有就是在源代码没看到前五个英雄的链接,所以只能爬93个英雄的皮肤
关于这点我也不知道咋办,希望有大佬指导下 |
|