|

楼主 |
发表于 2022-4-1 20:34:59
|
显示全部楼层
原代码:
import requests
import asyncio
import aiohttp
from lxml import etree
import aiofiles
"""
1.同步操作:首先获取番茄小说指定小说的页面url,此处用的是西游记
2.利用xpath将url中章节的名称以及超链接取出
3.异步操作:将超链接拼接成新的url,进而获取对应章节的内容
4.将内容进行保存
"""
async def aiodownload(href,name):
url = 'https://fanqienovel.com'+f'href'
async with aiohttp.ClientSession() as session:
async with session.get(url) as resp:
html = etree.HTML(resp.text)
async with aiohttp.ClientSession() as session:
async with session.get(url) as resp:
content = html.xpath('//*[@class="muye-reader-content noselect"]/div/p/text()')
async with aiofiles.open(f'{name}','w',encoding='utf-8') as f:
await f.write(content)
async def get_name(url):
headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.74 Safari/537.36 Edg/99.0.1150.55'}
resp = requests.get(url,headers=headers)
html = etree.HTML(resp.text)
chapter = html.xpath('//*[@class="page-directory-content"]/div/div[2]')[0]
divs = chapter.xpath('./div')
tasks = []
for div in divs:
name = div.xpath('./a/text()')[0]
href = div.xpath('./a/@href')[0]
#准备异步任务
tasks.append(aiodownload(href,name))
await asyncio.wait(tasks)
if __name__ == '__main__':
url = 'https://fanqienovel.com/page/7042250335695408159'
asyncio.run(get_name(url)) |
|