|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
# coding=gbk
import aiohttp
from lxml import etree
import asyncio
import os
#使用异步协程的方式爬彼岸图库的图片
#在同目录下创建文件夹
if not os.path.exists('./bian'):
os.mkdir('./bian')
#指定ua
#输入相关参数
yema = input('请输入要爬取的页码:')
leixiang = input('请输入详情页类型:(拼音方式)')
#url与名字存取列表
danye_url = []
shuju_ye_url = []
mz = []
#该函数完成对主页的爬取以及解析出单页的url
async def zhuye(yanma,leixiang):
headers = {
"user-agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.62 Safari/537.36"
}
url = 'http://pic.netbian.com/4k' + leixiang+'/index_'+yanma+'.html'
async with aiohttp.ClientSession() as session:
async with await session.get(url=url,headers=headers) as zongye:
zongye_yuanma = await zongye.text()
shili_1 = etree.HTML(zongye_yuanma)
shuli = shili_1.xpath('//div[@id="main"]/div[3]/ul/li')
for li in shuli:
danyeurl = 'http://pic.netbian.com' + li.xpath('./a/@href')[0]
danye_url.append(danyeurl)
print(danyeurl)
#该函数完成对单页的请求与解析出高清图片的url与图片名字
async def danye(danye_url):
headers = {
"user-agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.62 Safari/537.36"
}
for i in danye_url:
async with aiohttp.ClientSession() as danyeqingqiu:
async with await danyeqingqiu.get(url=i,headers=headers) as danye_qq:
danye_sj = await danye_qq.text()
shuju = etree.HTML(danye_sj)
shuju_1 = 'http://pic.netbian.com' + shuju.xpath('//div[@class="photo_pic"]//img/@src')[0]
shuju_ye_url.append(shuju_1)
print(shuju_ye_url)
mingzi = shuju.xpath('//div[@class="photo_pic"]//img/@alt')[0]
mz = mz.encode('iso-8859-1').decode('gbk')
mz.append(mingzi)
print(mz)
#该函数完成对高清页的请求以及持久化储存
async def shuju(shuju_ye_url,mz):
headers = {
"user-agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.62 Safari/537.36"
}
for c in shuju_ye_url:
async with aiohttp.ClientSession() as shujuye:
async with await shujuye.get(url=c,headers=headers) as shuju_xz:
shu = await shuju_xz.read()
for m in mz:
name ='/bian'+ str(m) + '.jpg'
with open(name, 'wb') as op:
op.write(shu)
print(m + '打印成功')
#print(danye_url)
#print(shuju_ye_url)
#print(mz)
#创建TACK列表
tack = [asyncio.ensure_future(zhuye(yema,leixiang)),
asyncio.ensure_future(danye(danye_url)),
asyncio.ensure_future(shuju(shuju_ye_url,mz))]
#创建事件循环并将任务列表加入到事件循环中
loop = asyncio.get_event_loop()
loop.run_until_complete(asyncio.wait(tack))
|
|