|
发表于 2021-6-23 03:21:34
|
显示全部楼层
import re
import requests
from bs4 import BeautifulSoup
from tqdm import tqdm
response=requests.get('https://www.sobiquge.com/book/29105/')
home_page=response.content.decode()
# print(home_page)
soup=BeautifulSoup(home_page,'lxml')
# print(re.findall('http.+',soup.head))
url=soup.find_all('dd')
# print(url)
final_url=[]
for i in url:
half_url=re.findall(r'/book.*html',str(i))
zhangjie_url='https://www.sobiquge.com'+half_url[0]
final_url.append(zhangjie_url)
# print(final_url)
with open('D:\python\超神机械师.txt','w',encoding='utf-8') as fp:
for i in tqdm(final_url):
response = requests.get(i)
home_page = response.content.decode()
soup = BeautifulSoup(home_page, 'lxml')
title=re.findall(r'\d.*?_',str(soup.find('title')))
title=title[0][0:-1]
scarpt=soup.find(id="content")
fp.write(title+'\n')
fp.write(scarpt.text+'\n')
新手摸索着写的 |
|