|
20鱼币
大佬们,我在自学bs4时遇到编码问题的报错求解决
#网站url:https://sanguo.5000yan.com/
#源码如下
import requests
from bs4 import BeautifulSoup
if __name__ =='__main__':
url='https://sanguo.5000yan.com/'
headers={ 'User-Agent' : 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.88 Safari/537.36'
}
req=requests.get(url=url,headers=headers).content#这里使用content是因为使用text会乱码
soup=BeautifulSoup(req,'lxml')
san_list=soup.select('.sidamingzhu-list-mulu li')
fp=open('./sanguo.txt','w',encoding='utf-8')
for li in san_list:
title=li.a.string
list_url=li.a['href']
page_text=requests.get(url=list_url,headers=headers).content#这里不管是.content还是.text都是乱码
soup1=BeautifulSoup(page_text,'lxml')
result=soup.find('div','class_="grap"').text
fp.write(title +':'+result + '\n')#运行是这里报错AttributeError: 'NoneType' object has no attribute 'text'
print(title,'爬取成功!')
- import requests
- from bs4 import BeautifulSoup
- if __name__ =='__main__':
- url='https://sanguo.5000yan.com/'
- headers={ 'User-Agent' : 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.88 Safari/537.36'
- }
- req=requests.get(url=url,headers=headers).content#这里使用content是因为使用text会乱码
- soup=BeautifulSoup(req,'lxml')
- san_list=soup.select('.sidamingzhu-list-mulu li')
- fp=open('./sanguo.txt','w',encoding='utf-8')
- for li in san_list:
- title=li.a.string
- list_url=li.a['href']
- resp=requests.get(url=list_url,headers=headers)
- resp.encoding = 'utf-8' # 指定编码为 'utf-8'
- soup1=BeautifulSoup(resp.text,'lxml')
- result=soup1.find('div',class_="grap").text
- fp.write(title +':'+result + '\n')
- print(title,'爬取成功!')
复制代码
|
|