马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
异步协程爬妹子,下载速度杠杠滴!内容我就不多说了,祝大伙身体健康import requests
from lxml import etree
import os
import aiofiles
import aiohttp
import asyncio
import re
from bs4 import BeautifulSoup
import os.path
jpg = re.compile(r'https.*?jpg',re.S)
div = re.compile(r'div class=.*?div class="clear"',re.S)
#jpg = re.compile(r'div class="single-content".*?(?P<jpg>https.*{0,}jpg)div class="clear"',re.S)
async def dowload(url ):
async with aiohttp.ClientSession() as session:
async with session.get(url) as resp:
response = await resp.text()
html = BeautifulSoup(response,"html.parser")
div = html.find('div',class_="single-content")
jp = div.find_all("p")
for i in jp:
img = i.find("img")
jpg = img.get("src")
name = jpg.split("/")[-2]+jpg.split("/")[-1]
async with session.get(jpg) as res:
async with aiofiles.open(name,"wb")as f:
await f.write(await res.content.read())
def callback(future):
print(future.result())
async def geturl():
resp = requests.get(url)
tree = etree.HTML(resp.text)
lis = tree.xpath('//*[@id="main"]/article')
tasks = []
for li in lis:
link = li.xpath('./div/figure/span/a/@href')[0]
name = li.xpath('./div/figure/span/a/img/@alt')[0]
#拿到所有的url 开始准备异步任务
task = asyncio.create_task(dowload(link))
task.add_done_callback(callback)
tasks.append(task)
await asyncio.wait(tasks)
if __name__ == '__main__':
y_url = "http://www.liangtulaile.com"
dic = {"性感": "/xinggan/", "尤物": "/youwu/", "制服": "/zhifu/", "丝袜": "/siwa/",
"清纯": "/qingchun/", "Cosplay": "/cosplay/"}
while True:
print("输入“q” 退出")
guss = input("你想下载的类型(性感 尤物 制服 丝袜 清纯 Cosplay):")
if guss == 'q':
break
else:
namber = input("你想下载第几页:")
print("开始下载,请稍等...")
name = dic[guss]
url = y_url + name + 'page/' + namber
dir_name = guss + namber
if os.path.exists(dir_name):
os.chdir(dir_name)
else:
os.mkdir(dir_name)
os.chdir(dir_name)
loop=asyncio.get_event_loop()
loop.run_until_complete(geturl())
print("感谢使用!!")
|