马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
import requests
import urllib.request
import random
import re
import os
def install():
users = [
"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.221 Safari/537.36 SE 2.X MetaSr 1.0",
"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Trident/4.0; Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1) ; Maxthon/3.0)",
"Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1) ; QIHU 360EE)"
]
ips = [
"http://61.191.41.130",
"http://122.238.12.191",
"http://222.161.56.166"
]
ip = random.choice(ips)
user = random.choice(users)
return ip,user
def get_html(url):
num = install()
rep = requests.get(url,data={'User-Agent':num[1]},proxies={'https':num[0]},stream=True)
return rep
name = input('请输入要查找的商品:')
num = int(input('请输入页数:'))
key = urllib.request.quote(name)
os.mkdir(name)
os.chdir(name)
def main():
for j in range(num):
url = 'https://s.taobao.com/search?q=' + key + '&s='+ str(j*44)
html = get_html(url).text
s = re.findall(r'"pic_url":"([^"]+?)".+?"view_price":"([^"]+?)".+?"view_sales":"([^"]+?)"',html)
os.mkdir('%s%d'%(name,j+1))
os.chdir('%s%d'%(name,j+1))
for i in s:
img = get_html('http:' + str(i[0]))#在这获取html不能使用代理
print(img)
with open('%s元_%s.jpg'%(i[1],i[2]),'wb') as f:
f.write(img.content)
os.chdir(os.pardir)
if __name__ == '__main__':
main()
|