|
发表于 2020-9-7 09:10:27
|
显示全部楼层
已更新
- # _*_ coding: utf-8 _*_
- # Developer: suchcoolate
- # Date: 6/28/2020 09:25
- # File name: baidu_pic_search_downloader.py
- # Development tool: PyCharm
- import requests
- import os
- import re
- def main():
- # name = input('请输入百度图片搜索关键词:')
- name = '鞠婧祎高清壁纸'
- # num = int(input('请输入想下载的页数:'))
- num = 2
- # dir_name = input('请输入图片存放文件夹名称:')
- dir_name = 'pics'
- result_list = [] # 存储图片url
- index_headers = {'user-agent': 'firefox'}
- ajax_headers = {'user-agent': 'firefox', 'X-Requested-With': 'XMLHttpRequest'}
- index_url = 'https://image.baidu.com/search/index?'
- index_params = {
- 'tn': 'baiduimage',
- 'ipn': 'r',
- 'ct': '201326592',
- 'cl': '2',
- 'lm': '-1',
- 'st': '-1',
- 'fm': 'index',
- 'fr': '',
- 'hs': '0',
- 'xthttps': '111111',
- 'sf': '1',
- 'fmq': '',
- 'pv': '',
- 'ic': '0',
- 'nc': '1',
- 'z': '',
- 'se': '1',
- 'showtab': '0',
- 'fb': '0',
- 'width': '',
- 'height': '',
- 'face': '0',
- 'istype': '2',
- 'ie': 'utf-8',
- 'word': name,
- 'oq': name,
- 'rsp': '-1'
- }
- ajax_url = 'https://image.baidu.com/search/acjson?'
- ajax_params = {
- 'tn': 'resultjson_com',
- 'ipn': 'rj',
- 'ct': '201326592',
- 'is': '',
- 'fp': 'result',
- 'queryWord': name,
- 'cl': '2',
- 'lm': '-1',
- 'ie': 'utf-8',
- 'oe': 'utf-8',
- 'adpicid': '',
- 'st': '-1',
- 'z': '',
- 'ic': '0',
- 'hd': '',
- 'latest': '',
- 'copyright': '',
- 'word': name,
- 's': '',
- 'se': '',
- 'tab': '',
- 'width': '',
- 'height': '',
- 'face': '0',
- 'istype': '2',
- 'qc': '',
- 'nc': '1',
- 'fr': '',
- 'expermode': '',
- 'force': '',
- 'pn': '30',
- 'rn': '30',
- 'gsm': ''
- }
- # 获取首页图片
- r = requests.get(index_url, headers=index_headers, params=index_params)
- result = re.findall('"hoverURL":"(.*?)"', r.text)
- result_list.extend(result)
- # 解析获取到的json数据,把图片url收集起来
- for item in range(1, num + 1):
- r = requests.get(ajax_url, headers=ajax_headers, params=ajax_params)
- j_data = r.json()
- for j in j_data['data'][:-1]:
- if not j['hoverURL']:
- continue
- result_list.append(j['hoverURL'])
- ajax_params['gsm'] = j_data['gsm']
- ajax_params['pn'] = str(30 * (item + 1))
- # print(result_list) # 打印所有图片url
- # 检查文件是否存在
- if not os.path.exists(dir_name):
- os.mkdir(dir_name)
- os.chdir(dir_name)
- # 下载图片
- counter = 1 # 计数器
- for item in result_list:
- # print(counter, item)
- r = requests.get(item, headers=index_headers)
- # pic_name是图片名称
- pic_name = item.split('/')[-1]
- with open(pic_name, 'wb') as f:
- f.write(r.content)
- print(f'已下载{pic_name},共下载{counter}张。')
- counter += 1
- if __name__ == '__main__':
- main()
复制代码 |
|