jiandan的url改过了,所以看起来不一样了。
girl后面的“乱码”可以算出来,也可以直接从当前页面获得下一个页面的url。
你可以参考这个:# _*_ coding: utf-8 _*_
# Developer: suchocolate
# Date: 8/26/2020 15:09
# File name: jiandan.py
# Development tool: PyCharm
import requests
import os
import re
def main():
dir_name = 'pics' # 存储图片
if not os.path.exists(dir_name):
os.mkdir(dir_name)
os.chdir(dir_name)
# num = int(input('请输入想下载的页数:'))
num = 3
url = 'http://jandan.net/girl'
headers = {'user-agent': 'firefox'}
result = [] # 存储图片的url
r = requests.get(url, headers=headers)
nx_page = re.findall(r'Comments" href="(.*?)"', r.text)[0] # 找到下一页的url
for item in range(num):
r = requests.get('http:' + nx_page, headers=headers)
result.extend(re.findall(r'<img src="(.*?)"', r.text)) # 把当前页面图片的url存到result
nx_page = re.findall(r'Comments" href="(.*?)"', r.text)[0] # 找到下一页的url
pic_num = len(result)
print(f'总共{pic_num}张图片')
dl_counter = 1
for item in result:
pic_name = item.split('/')[-1]
try:
r = requests.get('http:' + item, headers=headers, timeout=5)
except Exception as e:
print(e)
with open(pic_name, 'wb') as f:
f.write(r.content)
print(f'已下载{pic_name}, 共下载{dl_counter}。')
dl_counter += 1
if __name__ == '__main__':
main()
|