|
|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
运行爬虫时,就会报错:
File "D:\Python(zzt)\甲鱼文件任务\爬虫\GetTop250.py", line 45, in find_depth
soup = bs4.BeautifulSoup(res.text,'html.parser')
AttributeError: 'NoneType' object has no attribute 'text'
还请各位大神帮忙指点!!!
(下面是源代码)
import requests
import bs4
import re
def open_url(url):
headers={'User-Agent' : 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36'}
res = requests.get(url,headers=headers)
def find_movies(res):
soup=bs4.BeautifulSoup(res.text,'html.parser')
#获取电影名
movies=[]
target = soup.find_all('div',class_='hd')
for each in target:
movies.append(each.a.span.text)
#获取电影评分
ranks=[]
target = soup.fina_all('span',class_='rating_num')
for each in target:
ranks.append('%s' % each.text)
#获取电影资料
message=[]
target = soup.find_all('div',class_='bd')
for each in target:
try:
message.append(each.p.text.split('\n')[1].strip()+each.p.text.split('\n')[2].strip())
except:
continue
result=[]
length = len(movies)
for i in range(length):
result.append(movies[i]+ranks[i]+message[i]+'\n')
return result
#找出一共有多少个页面
def find_depth(res):
soup = bs4.BeautifulSoup(res.text,'html.parser')
depth = soup.find('span',class_='next').previous_sibling.text
return int(depth)
def main():
host='https://movie.douban.com/top250'
res=open_url(host)
depth = find_depth(res)
result = []
for i in range(depth):
url=host+ '?start='+str(25*i)+'&filter='
res = open_url(url)
result.extend(find_movies(res))
with open ('豆瓣250爬虫下载.txt','w',encode='utf-8')as f:
for each in result:
print('打印成功')
f.write(each)
if __name__=='__main__':
main()
这样试试:
- import requests
- import bs4
- import re
- def open_url(url):
- headers={'User-Agent' : 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36'}
- res = requests.get(url,headers=headers)
- return res
- def find_movies(res):
- soup=bs4.BeautifulSoup(res.text,'html.parser')
- #获取电影名
- movies=[]
- target = soup.find_all('div',class_='hd')
- for each in target:
- movies.append(each.a.span.text)
-
- #获取电影评分
- ranks=[]
- target = soup.fina_all('span',class_='rating_num')
- for each in target:
- ranks.append('%s' % each.text)
-
- #获取电影资料
- message=[]
- target = soup.find_all('div',class_='bd')
- for each in target:
- try:
- message.append(each.p.text.split('\n')[1].strip()+each.p.text.split('\n')[2].strip())
- except:
- continue
-
- result=[]
- length = len(movies)
- for i in range(length):
- result.append(movies[i]+ranks[i]+message[i]+'\n')
- return result
- #找出一共有多少个页面
- def find_depth(res):
- soup = bs4.BeautifulSoup(res.text,'html.parser')
- depth = soup.find('span',class_='next').previous_sibling.text
- return int(depth)
-
- def main():
- host='https://movie.douban.com/top250'
- res=open_url(host)
- depth = find_depth(res)
- result = []
- for i in range(depth):
- url=host+ '?start='+str(25*i)+'&filter='
- res = open_url(url)
- result.extend(find_movies(res))
-
-
- with open ('豆瓣250爬虫下载.txt','w',encode='utf-8')as f:
- for each in result:
- print('打印成功')
- f.write(each)
-
- if __name__=='__main__':
- main()
复制代码
|
|