|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
import requests
from bs4 import BeautifulSoup
import os
path=os.getcwd()
passage=0
print("请配合笔趣阁使用http://www.blkzfk.com")
name=input("请输入小说名:")
name="\\"+name+".txt"
url=input("开始章节地址:")
endurl=input("结束章节地址:")
head={}
head['User-Agent']='Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36 Edg/110.0.1587.50'
while True:
r=requests.get(url,headers=head)
r.encoding=r.apparent_encoding
soup=BeautifulSoup(r.text,"html.parser")
#标题
title=soup.select("#wrapper > div.content_read > div > div.bookname > h1")[0].get_text()
#正文部分
text=soup.select("#content.content")[0].get_text()
with open(path+name,'a',encoding='utf-8') as f:
for i in title:
f.write(i)
f.write('\n')
for x in text:
f.write(x)
f.write('\n')
passage+=1
nexturl="http://www.blkzfk.com/zfk"+soup.select("#wrapper > div.content_read > div > div.bookname > div.bottem1 > a:nth-child(1)")[0]['href']
print(f"已成功爬取第{passage}章")
if(url==endurl):
break
url=nexturl
os.system("pause")
Traceback (most recent call last):
File "C:\Users\10429\Desktop\novel-spider.py", line 18, in <module>
title=soup.select("#wrapper > div.content_read > div > div.bookname > h1")[0].get_text()
IndexError: list index out of range
|
|