| 
 | 
 
 
 楼主 |
发表于 2022-4-1 20:34:59
|
显示全部楼层
 
 
 
原代码: 
import requests 
import asyncio 
import aiohttp 
from lxml import etree 
import aiofiles 
""" 
1.同步操作:首先获取番茄小说指定小说的页面url,此处用的是西游记 
2.利用xpath将url中章节的名称以及超链接取出 
3.异步操作:将超链接拼接成新的url,进而获取对应章节的内容 
4.将内容进行保存 
""" 
async def aiodownload(href,name): 
    url = 'https://fanqienovel.com'+f'href' 
    async with aiohttp.ClientSession() as session: 
        async with session.get(url) as resp: 
            html = etree.HTML(resp.text) 
            async with aiohttp.ClientSession() as session: 
                async with session.get(url) as resp: 
                    content = html.xpath('//*[@class="muye-reader-content noselect"]/div/p/text()') 
                    async with aiofiles.open(f'{name}','w',encoding='utf-8') as f: 
                        await f.write(content) 
 
 
 
async def get_name(url): 
    headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.74 Safari/537.36 Edg/99.0.1150.55'} 
    resp = requests.get(url,headers=headers) 
    html = etree.HTML(resp.text) 
    chapter = html.xpath('//*[@class="page-directory-content"]/div/div[2]')[0] 
    divs = chapter.xpath('./div') 
    tasks = [] 
    for div in divs: 
        name = div.xpath('./a/text()')[0] 
        href = div.xpath('./a/@href')[0] 
        #准备异步任务 
        tasks.append(aiodownload(href,name)) 
    await asyncio.wait(tasks) 
 
if __name__ == '__main__': 
    url = 'https://fanqienovel.com/page/7042250335695408159' 
    asyncio.run(get_name(url)) |   
 
 
 
 |