爬取房天下装修图片保存本地
谢谢大捞们捧场{:10_254:}from time import sleep
import requests,json
from lxml import etree
import os
def processing(strs):
s = ''# 定义保存内容的字符串
for n in strs:
n = ''.join(n.split())# 去除空字符
s = s + n# 拼接字符串
return s # 返回拼接后的字符串
def run(url):
try:
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.107 Safari/537.36',
'HOST': 'home.fang.com'}
response = requests.get(url=url,headers=headers)
if response.status_code == 200:
html = etree.HTML(response.text)
div_list = html.xpath('//div[@class="photo_list"]/ul/li')
for div in div_list:
item = {}
names = div.xpath('./ol/p/a/text()')
name = processing(names)
item['name'] = name
hrefs = div.xpath('./ol/p/a/@href')
href = 'https://home.fang.com' + processing(hrefs)
item['href'] = href
TP(img_url=href,headers=headers)
sleep(5)
except:
return None
def TP(img_url,headers):
print('下载装修效果图中' + img_url)
img_response = requests.get(url=img_url,headers=headers)
img_html = etree.HTML(img_response.text)
li_list = img_html.xpath('//div[@id="BoxUl"]/ul//li')
for li in li_list:
img_pngs = li.xpath('./span/img/@src|./span/img/@src2')
img_png = processing(img_pngs)
img_names = li.xpath('./input/@value')
img_name = processing(img_names) + '.jpg'
try:
headers1 = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.107 Safari/537.36'}
response_img = requests.get(url=img_png,headers=headers1)
if not os.path.exists(img_name):
with open(img_name,'wb') as f:
f.write(response_img.content)
sleep(0.5)
except requests.ConnectionError:
print('保存失败!')
if __name__ == '__main__':
for page in range(1,2):
url = f'https://home.fang.com/album/s24/{page}/'
print('正在分析:' + url)
run(url=url)
sleep(5)
{:10_254:} 自己爬的时候 加了代理IP {:10_254:} 学习学习,顺便领个币 静态爬大体上都一样,有空玩一玩动态爬,反反爬 6666 冲冲冲 感谢分享! wp231957 发表于 2021-7-29 06:45
静态爬大体上都一样,有空玩一玩动态爬,反反爬
好得{:10_254:} 学习还可以领币?? 学习! {:10_279:} {:10_254:} 反反爬还在学习中 11 学习一下
页:
[1]