|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
上代码
- # 导包
- import requests
- from time import sleep
- import os
- import threading
- import parsel
- if not os.path.exists('image'):
- os.mkdir('image')
- # base_url = 'https://anime-pictures.net/pictures/view_posts/0?lang=en'
- headers = {
- 'User-Agent':
- 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.89 Safari/537.36'
- }
- def get(url, headers):
- '''请求数据'''
- response = requests.get(url, headers)
- html_data = response.text
- return html_data
- def parsel_data(html_data):
- '''筛选数据'''
- selector = parsel.Selector(html_data)
- result_list = selector.xpath('//span[@class="img_block_big"]')
- for result in result_list:
- image_url = result.xpath('./a/picture/source/img/@src').extract_first()
- image_id = result.xpath('./a/picture/source/img/@id').extract_first()
- img_url = 'https:' + image_url # 手动拼url
- all_title = img_url
- img_name = image_id + '.' + img_url.split('.')[-1]
- img_data = requests.get(url=all_title, headers=headers).content
- yield all_title, image_id, img_data, img_name
- def save(all_title, image_id, img_data,img_name):
- '''保存数据'''
- try:
- with open('image\\' + img_name, mode='wb') as f:
- print('保存成功:', image_id)
- f.write(img_data)
- except:
- pass
- print('保存失败:', image_id, '(|・ω・` ))')
- def start_save(base_url):
- html_data = get(url=base_url, headers=headers)
- for image_data in parsel_data(html_data):
- all_title = image_data[0] # url https://xxxxxxxxxx...
- img_id = image_data[1] # ID
- img_data = image_data[2] # 数据
- img_name = image_data[3] #文件名
- save(all_title=all_title, image_id=img_id, img_data=img_data,img_name = img_name)
- sleep(5)
- def main(page):
- for page in range(0, page + 1):
- print('###############正在下载第{}页数据###############'.format(page))
- base_url = 'https://anime-pictures.net/pictures/view_posts/0?lang=en'.format(page)
- if page>0:
- print('休息哈|ू・ω・` )')
- sleep(2)
- my_thread = threading.Thread(target=start_save, args=(base_url,)) #启动多线程
- my_thread.setDaemon(True)
- my_thread.start()
-
- if __name__ == '__main__':
- lock = threading.RLock()
- main(6300)
复制代码
异常,这个异常很奇怪
他一直显示保存成功 但是 实际的文件夹之保存了几张而已
问题已解决,三十秒内下载了上千张图片。
请看代码:
- # 导包
- import requests
- from time import sleep
- import os
- import threading
- import parsel
- import random
- if not os.path.exists('image'):
- os.mkdir('image')
- # base_url = 'https://anime-pictures.net/pictures/view_posts/0?lang=en'
- headers = {
- 'User-Agent':
- 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.89 Safari/537.36'
- }
- def get(url, headers):
- '''请求数据'''
- response = requests.get(url, headers)
- html_data = response.text
- return html_data
- def parsel_data(html_data):
- '''筛选数据'''
- selector = parsel.Selector(html_data)
- result_list = selector.xpath('//span[@class="img_block_big"]')
- for result in result_list:
- image_url = result.xpath('./a/picture/source/img/@src').extract_first()
- image_id = result.xpath('./a/picture/source/img/@id').extract_first()
- img_url = 'https:' + image_url # 手动拼url
- all_title = img_url
- img_name = image_id + '.' + img_url.split('.')[-1]
- img_data = requests.get(url=all_title, headers=headers).content
- yield all_title, image_id, img_data, img_name
- def save(all_title, image_id, img_data,img_name):
- '''保存数据'''
- try:
- with open('image\\' + str(random.randint(0,1000000)) + os.path.splitext(img_name)[1], mode='wb') as f:
- print('保存成功:', image_id)
- f.write(img_data)
- except:
- print('保存失败:', image_id, '(|・ω・` ))')
- def start_save(base_url):
- html_data = get(url=base_url, headers=headers)
- for image_data in parsel_data(html_data):
- all_title = image_data[0] # url https://xxxxxxxxxx...
- img_id = image_data[1] # ID
- img_data = image_data[2] # 数据
- img_name = image_data[3] #文件名
- save(all_title=all_title, image_id=img_id, img_data=img_data,img_name = img_name)
- def main(page):
- for page in range(0, page + 1):
- print('###############正在下载第{}页数据###############'.format(page))
- base_url = 'https://anime-pictures.net/pictures/view_posts/0?lang=en'.format(page)
- if page>0:
- print('休息哈|ू・ω・` )')
- sleep(2)
- my_thread = threading.Thread(target=start_save, args=(base_url,)) #启动多线程
- my_thread.setDaemon(True)
- my_thread.start()
-
- if __name__ == '__main__':
- lock = threading.RLock()
- main(6300)
复制代码
|
|