|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
谢谢大捞们捧场
- from time import sleep
- import requests,json
- from lxml import etree
- import os
- def processing(strs):
- s = '' # 定义保存内容的字符串
- for n in strs:
- n = ''.join(n.split()) # 去除空字符
- s = s + n # 拼接字符串
- return s # 返回拼接后的字符串
- def run(url):
- try:
- headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.107 Safari/537.36',
- 'HOST': 'home.fang.com'}
- response = requests.get(url=url,headers=headers)
- if response.status_code == 200:
- html = etree.HTML(response.text)
- div_list = html.xpath('//div[@class="photo_list"]/ul/li')
- for div in div_list:
- item = {}
- names = div.xpath('./ol/p/a/text()')
- name = processing(names)
- item['name'] = name
- hrefs = div.xpath('./ol/p/a/@href')
- href = 'https://home.fang.com' + processing(hrefs)
- item['href'] = href
- TP(img_url=href,headers=headers)
- sleep(5)
- except:
- return None
- def TP(img_url,headers):
- print('下载装修效果图中' + img_url)
- img_response = requests.get(url=img_url,headers=headers)
- img_html = etree.HTML(img_response.text)
- li_list = img_html.xpath('//div[@id="BoxUl"]/ul//li')
- for li in li_list:
- img_pngs = li.xpath('./span/img/@src|./span/img/@src2')
- img_png = processing(img_pngs)
- img_names = li.xpath('./input/@value')
- img_name = processing(img_names) + '.jpg'
- try:
- headers1 = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.107 Safari/537.36'}
- response_img = requests.get(url=img_png,headers=headers1)
- if not os.path.exists(img_name):
- with open(img_name,'wb') as f:
- f.write(response_img.content)
- sleep(0.5)
- except requests.ConnectionError:
- print('保存失败!')
- if __name__ == '__main__':
- for page in range(1,2):
- url = f'https://home.fang.com/album/s24/{page}/'
- print('正在分析:' + url)
- run(url=url)
- sleep(5)
复制代码
|
|