|  | 
 
| 
import requests
x
马上注册,结交更多好友,享用更多功能^_^您需要 登录 才可以下载或查看,没有账号?立即注册  from bs4 import BeautifulSoup
 import os
 path=os.getcwd()
 passage=0
 print("请配合笔趣阁使用http://www.blkzfk.com")
 name=input("请输入小说名:")
 name="\\"+name+".txt"
 url=input("开始章节地址:")
 endurl=input("结束章节地址:")
 head={}
 head['User-Agent']='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36 Edg/110.0.1587.50'
 while True:
 r=requests.get(url,headers=head)
 r.encoding=r.apparent_encoding
 soup=BeautifulSoup(r.text,"html.parser")
 #标题
 title=soup.select("#wrapper > div.content_read > div > div.bookname > h1")[0].get_text()
 #正文部分
 text=soup.select("#content.content")[0].get_text()
 with open(path+name,'a',encoding='utf-8') as f:
 for i in title:
 f.write(i)
 f.write('\n')
 for x in text:
 f.write(x)
 f.write('\n')
 passage+=1
 nexturl="http://www.blkzfk.com/zfk"+soup.select("#wrapper > div.content_read > div > div.bookname > div.bottem1 > a:nth-child(1)")[0]['href']
 print(f"已成功爬取第{passage}章")
 if(url==endurl):
 break
 url=nexturl
 os.system("pause")
 
 Traceback (most recent call last):
 File "C:\Users\10429\Desktop\novel-spider.py", line 18, in <module>
 title=soup.select("#wrapper > div.content_read > div > div.bookname > h1")[0].get_text()
 IndexError: list index out of range
 
 | 
 |