 给你稍微改了一下:import requests
from bs4 import BeautifulSoup
url = 'https://www.shicimingju.com/book/sanguoyanyi.html'
headers = {'User-Agent':
'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 '
'Safari/537.36'}
page_test = requests.get(url,headers=headers)
page_test.encoding = 'utf-8'
soup = BeautifulSoup(page_test.text,'lxml')
li_list = soup.select('.tabli')
print('li_list:',li_list)
fp = open('./sanguoyanyi.txt','w',encoding='utf-8')
for li in li_list:
print(type(li))
print(li)
title = li.string
# FIXME
detail_url = 'http://www.shicimingju.com' + li['href']
try:
detail_page_text = requests.get(url=detail_url,headers=headers).content
detail_soup =BeautifulSoup(detail_page_text,'html.parser')
div_tags = detail_soup.findAll('p')
if div_tags:
fp.write(title + '\n\n')
for div_tag in div_tags:
content = div_tag.text
print(content)
fp.write(content + '\n')
else:
fp.write('\n')
print(title,'爬虫成功')
else:
print(title,'爬取失败')
except requests.exceptions.RequestException as e:
print(f'请求失败:{e}')
fp.close()
|