|
|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
刚开始学习爬虫,写的代码准备先爬豆瓣250第一页的信息,代码和小甲鱼的代码清单几乎一致(《爬取豆瓣TOP250电影排行榜(有彩蛋)》)。系统报错“IndexError: list index out of range”,有大佬知道是什么原因吗?谢谢!
代码如下:
import requests
import bs4
import re
def open_url(url):
headers={"user-agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36"}
res=requests.get(url,headers=headers)
return res
def find_movies(res):
soup=bs4.BeautifulSoup(res.text,"html.parser")
#电影名
movies=[]
targets=soup.find_all("div",class_="hd")
for each in targets:
movies.append(each.a.span.text)
#评分
ranks=[]
targets=soup.find_all("span",class_="rating_num")
for each in targets:
ranks.append("评分:%s"%each.text)
#资料
messages=[]
targets=soup.find_all("div",class_="bd")
for each in targets:
try:
messages.append(each.p.text.split("\n")[1].strip()+each.p.text.split("/n")[2].strip())
except:
continue
result = []
length = len(movies)
for i in range(length):
result.append(movies[i] + ranks[i] + messages[i] + '\n')
return result
def main():
host="https://movie.douban.com/top250"
res=open_url(host)
result=[]
result.extend(find_movies(res))
with open("豆瓣250.txt","w",encoding="utf-8") as f:
for each in result:
f.write(each)
if __name__ == "__main__":
main()
报错如下:
Traceback (most recent call last):
File "C:\Users\mi\Desktop\豆瓣250.py", line 52, in <module>
main()
File "C:\Users\mi\Desktop\豆瓣250.py", line 46, in main
result.extend(find_movies(res))
File "C:\Users\mi\Desktop\豆瓣250.py", line 37, in find_movies
result.append(movies[i] + ranks[i] + messages[i] + '\n')
IndexError: list index out of range
|
|