|
发表于 2022-4-8 17:18:37
|
显示全部楼层
- import requests
- import parsel
- import os
- filename = '壁纸\\'
- if not os.path.exists(filename):
- os.mkdir(filename)
- def main(page=1):
- if page == 1:
- url = 'http://www.netbian.com/index.htm'
- else:
- url = f'http://www.netbian.com/index_{page}.htm'
- headers = {
- 'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.82 Safari/537.36'
- }
- response = requests.get(url=url, headers=headers)
- response.encoding = response.apparent_encoding
- # print(response.text)
- selector = parsel.Selector(response.text)
- href = selector.css('.list li a::attr(href)').getall()
- lis = selector.css('.list li')
- #print(href)
- for li in lis:
- title = li.css('b::text').get()
- if title:
- li_url = 'http://www.netbian.com/' + li.css('a::attr(href)').get()
- response_2 = requests.get(url=li_url, headers=headers)
- selector_2 = parsel.Selector(response_2.text)
- img_url = selector_2.css('.pic img::attr(src)').get()
- img_content = requests.get(url=img_url).content
- with open(filename + title +'.jpg', mode='wb') as f:
- f.write(img_content)
- print(title, img_url)
- if __name__ == '__main__':
- main(2) # 爬取第二页
复制代码 |
|