|
发表于 2021-4-17 15:48:38
|
显示全部楼层
import requests
#from lxml import etree python3.5以上版本不可这样导入
from lxml import html
etree = html.etree
import os
import time
def geturl(url):
headers = {"User-Agent":"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.72 Safari/537.36"}
#params = {"show_raw":1}
#response = requests.get(url,params=params,headers = headers)
response = requests.get(url,headers = headers)
response.encoding = 'gbk'
html = response.text
return html
def get_mggs(url):
mggs = []
html = geturl(url)
html = etree.HTML(html)
items = html.xpath('//ul[@class="list_con_box_ul"]/li')
for li in items:
href = li.xpath("./a/@href")[0]
title = li.xpath("./a/@title")[0]
mgg = [title,href]
mggs.append(mgg)
return mggs
def getmgg():
mggs = get_mggs(url)
for mgg in mggs:
count = 1
mggurl1 = url.split("/meinv")[0] + mgg[1]
if not os.path.exists(mgg[0]):
dil = dils(mgg[0])
else:
os.chdir(mgg[0])
for i in range(15):
mggurl = mggurl1
if count == 1:
mggurl = mggurl1
else:
mggurl = mggurl1.split(".html")[0] + "_" + str(count) + ".html"
count += 1
headers = {"User-Agent":"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.72 Safari/537.36"}
resp = requests.get(url=mggurl,headers=headers)
links = []
if resp.status_code ==200:
html = resp.text
html = etree.HTML(html)
link = html.xpath('//div[@id="bigpic"]/a[2]/img/@src')[0]
mm_jpg = requests.get(link,headers=headers)
with open(link.split("/")[-1],'wb')as f:
f.write(mm_jpg.content)
print("成功下载一张图片")
time.sleep(1)
os.chdir(".\\..")
#print("成功下载一套图片")
def dils(name = "ooxx"):
os.mkdir(name)
os.chdir(name)
if __name__ == "__main__":
url1 = "https://www.tupianzj.com/meinv/"
lis = {"清纯美女":"xiezhen/","性感":"xinggan/",
"古装":"guzhuang/","人体艺术":"yishu/",
"香车美女":"chemo/","丝袜美女":"siwa/"
}
name = input("选择要下载的图片分类(清纯美女,性感,古装,人体艺术,香车美女,丝袜美女)")
url = url1 + lis[name]
geturl(url)
mggs_url = get_mggs(url)
maggs = getmgg()
[code][/code] |
|