|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
import urllib
import urllib.request
import time
import re
for p in range(1,8):
url='https://www.tupianzj.com/meinv/20200324/207357_'+str(p)+'.html'
headers={'UA':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36'}
data=requests.get(url,headers=headers)
data.encoding='utf-8'
html=etree.HTML(data.text)
license=html.xpath("//ul[@class='list_con_box_ul']/li/a/@href")
print(license)
print('第'+str(p)+'页所有图片网址获取完毕!')
print('____________________________________')
print('正在下载'+str(p)+'页所有图片,')
time.sleep(8)
for i in range(0,len(license)):
print('正在爬取'+str(p)+'页的第'+str(i+1)+'位')
url_pic="https://www.tupianzj.com"+str(license[i])
headers={'UA':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36'}
data=requests.get(url_pic,headers=headers)
data.encoding='utf-8'
html=etree.HTML(data.text)
page=html.xpath("//div[@class='pages']/ul/li[1]/a/text()")
page=re.findall("\照片",page[0])[0]
url_pics=url_pic.split('.',-1)
url_pics=url_pics[0]+'.'+url_pics[1]+'.'+url_pics[2]
for j in range(1,int(page)):
url_page=url_pics+'_'+str(j+1)+'.html'
headers={'UA':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36'}
data=requests.get(url_page,headers=headers)
data.encoding='utf-8'
html=etree.HTML(data.text)
pages=html.xpath("//div[@id='bigpic']/a[2]/img/@src")
print(pages)
time.sleep(1)
urllib.request.urlretrieve(pages[0],'D:/新建文件夹/'+str(p)+'.'+str(i+1)+'.'+str(j)+'.jpg')
print('下载结束了')
(新手一枚)请大家看下 ,最后我保存到哪里去了? 我文件夹里也没有
|
|