一个列表推导式的错误(代码内含爬虫)
代码:from bs4 import BeautifulSoup
import requests as re
headers={}
headers["User-Agent"]="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.131 Safari/537.36 Edg/92.0.902.73"
web = None
soup = None
page_url = ["https://movie.douban.com/top250"]
def fulsh(url_name=''):
global web,soup
web = re.get(url_name,headers=headers)
soup = BeautifulSoup(web.text,"html.parser")
def find():
a=soup.find_all("div",class_="hd")
name=]
b = soup.find_all("div",class_="bd")
writer = [x.p.text.strip()\
.replace("\n",'').replace("\t",'').replace(" ",'').replace("\xa0",'')\
for x in ]
c = soup.find_all("p",class_="quote")
world = ]
b = (name,writer,world)
return [(name+' ',writer+' ',world) for x in range(len(name))]
#return [(b+' ',b+' ',b+' ') for x in range(len(b))]
def main():
global page_url
return_=[]
fulsh(page_url)
page_url.extend([''.join(["https://movie.douban.com/top250",x.attrs['href']]) for x in soup.find_all("div",class_="paginator").find_all("a")])
for i in page_url:
fulsh(i)
return_.extend(find())
retrun_='\n'.join(return_)
return return_
if __name__ == '__main__':
main()
报错代码:
Traceback (most recent call last):
File "C:/Users/中维电器维修/Desktop/top250.py", line 37, in <module>
main()
File "C:/Users/中维电器维修/Desktop/top250.py", line 33, in main
return_.extend(find())
File "C:/Users/中维电器维修/Desktop/top250.py", line 22, in find
return [(name+' ',writer+' ',world) for x in range(1,23)]
File "C:/Users/中维电器维修/Desktop/top250.py", line 22, in <listcomp>
return [(name+' ',writer+' ',world) for x in range(1,23)]
IndexError: list index out of range
本人python版本是3.8和3.9 列表超出索引了。
return [(name+' ',writer+' ',world) for x in range(len(name))]
索引用的全部是x?是否长度会不一样? 最基本的排错方法,发现哪个列表索引超出范围
就在生成该列表之后,使用该列表之前print一下,看看是否是空列表或者真的超索引了 wp231957 发表于 2021-8-17 16:18
最基本的排错方法,发现哪个列表索引超出范围
就在生成该列表之后,使用该列表之前print一下,看看是否 ...
不是,列表没问题,都是25,我自己导出来之后就没问题,但在函数里执行就有问题,不信你自己试试
页:
[1]