这lxml也安装了
import requests
#from lxml import etreepython3.5以上版本不可这样导入
from lxml import html
etree = html.etree
import os
import time
def geturl(url):
headers = {"User-Agent":"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.72 Safari/537.36"}
#params = {"show_raw":1}
#response = requests.get(url,params=params,headers = headers)
response = requests.get(url,headers = headers)
response.encoding = 'gbk'
html = response.text
return html
def get_mggs(url):
mggs = []
html = geturl(url)
html = etree.HTML(html)
items = html.xpath('//ul[@class="list_con_box_ul"]/li')
for li in items:
href = li.xpath("./a/@href")
title = li.xpath("./a/@title")
mgg =
mggs.append(mgg)
return mggs
def getmgg():
mggs = get_mggs(url)
for mgg in mggs:
count = 1
mggurl1 = url.split("/meinv") + mgg
if not os.path.exists(mgg):
dil = dils(mgg)
else:
os.chdir(mgg)
for i in range(15):
mggurl = mggurl1
if count == 1:
mggurl = mggurl1
else:
mggurl = mggurl1.split(".html") + "_" + str(count) + ".html"
count += 1
headers = {"User-Agent":"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.72 Safari/537.36"}
resp = requests.get(url=mggurl,headers=headers)
links = []
if resp.status_code ==200:
html = resp.text
html = etree.HTML(html)
link = html.xpath('//div[@id="bigpic"]/a/img/@src')
mm_jpg = requests.get(link,headers=headers)
with open(link.split("/")[-1],'wb')as f:
f.write(mm_jpg.content)
print("成功下载一张图片")
time.sleep(1)
os.chdir(".\\..")
#print("成功下载一套图片")
def dils(name = "ooxx"):
os.mkdir(name)
os.chdir(name)
if __name__ == "__main__":
url1 = "https://www.tupianzj.com/meinv/"
lis = {"清纯美女":"xiezhen/","性感":"xinggan/",
"古装":"guzhuang/","人体艺术":"yishu/",
"香车美女":"chemo/","丝袜美女":"siwa/"
}
name = input("选择要下载的图片分类(清纯美女,性感,古装,人体艺术,香车美女,丝袜美女)")
url = url1 + lis
geturl(url)
mggs_url = get_mggs(url)
maggs = getmgg()
import requests
#from lxml import etreepython3.5以上版本不可这样导入
from lxml import html
etree = html.etree
import os
import time
def geturl(url):
headers = {"User-Agent":"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.72 Safari/537.36"}
#params = {"show_raw":1}
#response = requests.get(url,params=params,headers = headers)
response = requests.get(url,headers = headers)
response.encoding = 'gbk'
html = response.text
return html
def get_mggs(url):
mggs = []
html = geturl(url)
html = etree.HTML(html)
items = html.xpath('//ul[@class="list_con_box_ul"]/li')
for li in items:
href = li.xpath("./a/@href")
title = li.xpath("./a/@title")
mgg =
mggs.append(mgg)
return mggs
def getmgg():
mggs = get_mggs(url)
for mgg in mggs:
count = 1
mggurl1 = url.split("/meinv") + mgg
if not os.path.exists(mgg):
dil = dils(mgg)
else:
os.chdir(mgg)
for i in range(15):
mggurl = mggurl1
if count == 1:
mggurl = mggurl1
else:
mggurl = mggurl1.split(".html") + "_" + str(count) + ".html"
count += 1
headers = {"User-Agent":"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.72 Safari/537.36"}
resp = requests.get(url=mggurl,headers=headers)
links = []
if resp.status_code ==200:
html = resp.text
html = etree.HTML(html)
link = html.xpath('//div[@id="bigpic"]/a/img/@src')
mm_jpg = requests.get(link,headers=headers)
with open(link.split("/")[-1],'wb')as f:
f.write(mm_jpg.content)
print("成功下载一张图片")
time.sleep(1)
os.chdir(".\\..")
#print("成功下载一套图片")
def dils(name = "ooxx"):
os.mkdir(name)
os.chdir(name)
if __name__ == "__main__":
url1 = "https://www.tupianzj.com/meinv/"
lis = {"清纯美女":"xiezhen/","性感":"xinggan/",
"古装":"guzhuang/","人体艺术":"yishu/",
"香车美女":"chemo/","丝袜美女":"siwa/"
}
name = input("选择要下载的图片分类(清纯美女,性感,古装,人体艺术,香车美女,丝袜美女)")
url = url1 + lis
geturl(url)
mggs_url = get_mggs(url)
maggs = getmgg()
我刚开始学Python你就给我看这个? FengYang.X 发表于 2021-4-14 17:22
文件损坏了啊
直接改后缀名就行了 文件损坏 无法解压
页:
1
[2]