根据大佬的代码,自己弄出了自己想要的部分模样import requests
import bs4
import openpyxl
#import os
# os.chdir("D:\BianCheng\Pythonwork\爬虫爬出来资料")
header ={'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 11_1_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36'}
def open_url(url):
res = requests.get(url, headers=header)
return res
def look_movie(res):
soup = bs4.BeautifulSoup(res.text, 'html.parser')
targets = soup.find_all("div", class_='hd')
data = []
for each in targets:
data.append(each.a.span.text)
return data
def main():
yeshu = 0
data = []
while yeshu <= 225:
url = "http://movie.douban.com/top250?start=" + str(yeshu) + "&filter="
res = open_url(url)
data.extend(look_movie(res))
yeshu += 25
to_excel(data, yeshu)
def to_excel(data, yeshu):
wb = openpyxl.Workbook()
wb.guess_types = True
ws = wb.active
#表格三件套
# data_rank = [str(i) for i in range(yeshu+1-len(data), yeshu+1)]
# for movie_rank,movie_name in zip(data_rank, data):
# print("%s 排名:%s" % (movie_name, movie_rank))
nub = 0
data_rank = []
kongge = []
for i in range(1,251):
data_rank.append("第%d名"%i)
if i % 10 == 0:
ws.append(data_rank)
ws.append(data[0+nub:i])
ws.append(kongge)
nub += 10
data_rank = []
wb.save("豆瓣250电影榜.xlsx")
if __name__ == '__main__':
main()
|