|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
上代码 # 导包
import requests
from time import sleep
import os
import threading
import parsel
if not os.path.exists('image'):
os.mkdir('image')
# base_url = 'https://anime-pictures.net/pictures/view_posts/0?lang=en'
headers = {
'User-Agent':
'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.89 Safari/537.36'
}
def get(url, headers):
'''请求数据'''
response = requests.get(url, headers)
html_data = response.text
return html_data
def parsel_data(html_data):
'''筛选数据'''
selector = parsel.Selector(html_data)
result_list = selector.xpath('//span[@class="img_block_big"]')
for result in result_list:
image_url = result.xpath('./a/picture/source/img/@src').extract_first()
image_id = result.xpath('./a/picture/source/img/@id').extract_first()
img_url = 'https:' + image_url # 手动拼url
all_title = img_url
img_name = image_id + '.' + img_url.split('.')[-1]
img_data = requests.get(url=all_title, headers=headers).content
yield all_title, image_id, img_data, img_name
def save(all_title, image_id, img_data,img_name):
'''保存数据'''
try:
with open('image\\' + img_name, mode='wb') as f:
print('保存成功:', image_id)
f.write(img_data)
except:
pass
print('保存失败:', image_id, '(|・ω・` ))')
def start_save(base_url):
html_data = get(url=base_url, headers=headers)
for image_data in parsel_data(html_data):
all_title = image_data[0] # url https://xxxxxxxxxx...
img_id = image_data[1] # ID
img_data = image_data[2] # 数据
img_name = image_data[3] #文件名
save(all_title=all_title, image_id=img_id, img_data=img_data,img_name = img_name)
sleep(5)
def main(page):
for page in range(0, page + 1):
print('###############正在下载第{}页数据###############'.format(page))
base_url = 'https://anime-pictures.net/pictures/view_posts/0?lang=en'.format(page)
if page>0:
print('休息哈|ू・ω・` )')
sleep(2)
my_thread = threading.Thread(target=start_save, args=(base_url,)) #启动多线程
my_thread.setDaemon(True)
my_thread.start()
if __name__ == '__main__':
lock = threading.RLock()
main(6300)
异常,这个异常很奇怪
他一直显示保存成功 但是 实际的文件夹之保存了几张而已
问题已解决,三十秒内下载了上千张图片。
请看代码: # 导包
import requests
from time import sleep
import os
import threading
import parsel
import random
if not os.path.exists('image'):
os.mkdir('image')
# base_url = 'https://anime-pictures.net/pictures/view_posts/0?lang=en'
headers = {
'User-Agent':
'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.89 Safari/537.36'
}
def get(url, headers):
'''请求数据'''
response = requests.get(url, headers)
html_data = response.text
return html_data
def parsel_data(html_data):
'''筛选数据'''
selector = parsel.Selector(html_data)
result_list = selector.xpath('//span[@class="img_block_big"]')
for result in result_list:
image_url = result.xpath('./a/picture/source/img/@src').extract_first()
image_id = result.xpath('./a/picture/source/img/@id').extract_first()
img_url = 'https:' + image_url # 手动拼url
all_title = img_url
img_name = image_id + '.' + img_url.split('.')[-1]
img_data = requests.get(url=all_title, headers=headers).content
yield all_title, image_id, img_data, img_name
def save(all_title, image_id, img_data,img_name):
'''保存数据'''
try:
with open('image\\' + str(random.randint(0,1000000)) + os.path.splitext(img_name)[1], mode='wb') as f:
print('保存成功:', image_id)
f.write(img_data)
except:
print('保存失败:', image_id, '(|・ω・` ))')
def start_save(base_url):
html_data = get(url=base_url, headers=headers)
for image_data in parsel_data(html_data):
all_title = image_data[0] # url https://xxxxxxxxxx...
img_id = image_data[1] # ID
img_data = image_data[2] # 数据
img_name = image_data[3] #文件名
save(all_title=all_title, image_id=img_id, img_data=img_data,img_name = img_name)
def main(page):
for page in range(0, page + 1):
print('###############正在下载第{}页数据###############'.format(page))
base_url = 'https://anime-pictures.net/pictures/view_posts/0?lang=en'.format(page)
if page>0:
print('休息哈|ू・ω・` )')
sleep(2)
my_thread = threading.Thread(target=start_save, args=(base_url,)) #启动多线程
my_thread.setDaemon(True)
my_thread.start()
if __name__ == '__main__':
lock = threading.RLock()
main(6300)
|
|