想要爬取任意页的图片代码该怎么加
import requestsimport parsel
import os
filename = '壁纸\\'
if not os.path.exists(filename):
os.mkdir(filename)
url = 'http://www.netbian.com/index.htm'
headers = {
'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.82 Safari/537.36'
}
response = requests.get(url=url, headers=headers)
response.encoding = response.apparent_encoding
#print(response.text)
selector = parsel.Selector(response.text)
href = selector.css('.list li a::attr(href)').getall()
lis = selector.css('.list li')
#print(href)
for li in lis:
title = li.css('b::text').get()
if title:
li_url = 'http://www.netbian.com/' + li.css('a::attr(href)').get()
response_2 = requests.get(url=li_url, headers=headers)
selector_2 = parsel.Selector(response_2.text)
img_url = selector_2.css('.pic img::attr(src)').get()
img_content = requests.get(url=img_url).content
with open(filename + title +'.jpg', mode='wb') as f:
f.write(img_content)
print(title, img_url) import requests
import parsel
import os
filename = '壁纸\\'
if not os.path.exists(filename):
os.mkdir(filename)
def main(page=1):
if page == 1:
url = 'http://www.netbian.com/index.htm'
else:
url = f'http://www.netbian.com/index_{page}.htm'
headers = {
'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.82 Safari/537.36'
}
response = requests.get(url=url, headers=headers)
response.encoding = response.apparent_encoding
# print(response.text)
selector = parsel.Selector(response.text)
href = selector.css('.list li a::attr(href)').getall()
lis = selector.css('.list li')
#print(href)
for li in lis:
title = li.css('b::text').get()
if title:
li_url = 'http://www.netbian.com/' + li.css('a::attr(href)').get()
response_2 = requests.get(url=li_url, headers=headers)
selector_2 = parsel.Selector(response_2.text)
img_url = selector_2.css('.pic img::attr(src)').get()
img_content = requests.get(url=img_url).content
with open(filename + title +'.jpg', mode='wb') as f:
f.write(img_content)
print(title, img_url)
if __name__ == '__main__':
main(2) # 爬取第二页
页:
[1]