|
发表于 2021-7-20 14:22:28
|
显示全部楼层
本楼为最佳答案
 jiandan的url改过了,所以看起来不一样了。
girl后面的“乱码”可以算出来,也可以直接从当前页面获得下一个页面的url。
你可以参考这个:- # _*_ coding: utf-8 _*_
- # Developer: suchocolate
- # Date: 8/26/2020 15:09
- # File name: jiandan.py
- # Development tool: PyCharm
- import requests
- import os
- import re
- def main():
- dir_name = 'pics' # 存储图片
- if not os.path.exists(dir_name):
- os.mkdir(dir_name)
- os.chdir(dir_name)
- # num = int(input('请输入想下载的页数:'))
- num = 3
- url = 'http://jandan.net/girl'
- headers = {'user-agent': 'firefox'}
- result = [] # 存储图片的url
- r = requests.get(url, headers=headers)
- nx_page = re.findall(r'Comments" href="(.*?)"', r.text)[0] # 找到下一页的url
- for item in range(num):
- r = requests.get('http:' + nx_page, headers=headers)
- result.extend(re.findall(r'<img src="(.*?)"', r.text)) # 把当前页面图片的url存到result
- nx_page = re.findall(r'Comments" href="(.*?)"', r.text)[0] # 找到下一页的url
- pic_num = len(result)
- print(f'总共{pic_num}张图片')
- dl_counter = 1
- for item in result:
- pic_name = item.split('/')[-1]
- try:
- r = requests.get('http:' + item, headers=headers, timeout=5)
- except Exception as e:
- print(e)
- with open(pic_name, 'wb') as f:
- f.write(r.content)
- print(f'已下载{pic_name}, 共下载{dl_counter}。')
- dl_counter += 1
- if __name__ == '__main__':
- main()
复制代码 |
|