本帖最后由 1q23w31 于 2020-8-25 14:05 编辑 import requests
from lxml import etree
import os
headers = {
'Accept-Language': 'zh-CN',
'Cache-Control': 'no-cache',
'Connection': 'Keep-Alive',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36 Edge/18.18363'
}
url = 'https://cn.freeimages.com/'
html = requests.get(url=url,headers=headers).text
tree = etree.HTML(html)
li_list = tree.xpath('//div[@class="collage-body"]/ul/li')
if not os.path.exists('./photo'):
os.mkdir('./photo')
for each in li_list:
img_src = each.xpath('./a/img/@src')[0]
img_name = img_src.split('/')[-1]
img_path = 'patu' + img_name
image = requests.get(img_src,headers=headers).content
img_path ='photo/' + img_name
with open(img_path,'wb') as f:
f.write(image)
print(img_name,'爬取完成!!!')
源代码错误在第28行 |