怎么把豆瓣爬到的数据放进去一个表格里啊
import requestsimport bs4
import openpyxl
import os
os.chdir("D:\BianCheng\Pythonwork\爬虫爬出来资料")
header ={}
header['User-Agent'] = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 11_1_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36'
def open_url(url):
res = requests.get(url, headers=header)
return res
def look_movie(res):
soup = bs4.BeautifulSoup(res.text, 'html.parser')
targets = soup.find_all("div", class_='hd')
list1 = []
for each in targets:
list1.append(each.a.span.text)
return list1
def main():
yeshu = 0
while yeshu <= 25:
url = "http://movie.douban.com/top250?start=" + str(yeshu) + "&filter="
res = open_url(url)
data = look_movie(res)
yeshu += 25
print(data)
# to_excel(data)这个就是存表格的,但运行它就报错,不运行注释了就正常...
def to_excel(data):
wb = openpyxl.Workbook()
wb.guess_types = True
ws = wb.active
ws.append(['1','2','3','4','5'])
for each in data:
ws.append(each)
wb.save("豆瓣250电影榜.xlsx")
if __name__ == '__main__':
main()
我根据极客爬虫篇里摘取出一些可以用的,但就是用不了现在,希望有小伙伴解救一下谢谢
终于帮你改完了 满意请给最佳
import requests
import bs4
import openpyxl
import os
# os.chdir("D:\BianCheng\Pythonwork\爬虫爬出来资料")
header ={'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 11_1_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36'}
def open_url(url):
res = requests.get(url, headers=header)
return res
def look_movie(res):
soup = bs4.BeautifulSoup(res.text, 'html.parser')
targets = soup.find_all("div", class_='hd')
list1 = []
for each in targets:
list1.append(each.a.span.text)
return list1
def main():
yeshu = 0
data = []
while yeshu <= 225:
url = "http://movie.douban.com/top250?start=" + str(yeshu) + "&filter="
res = open_url(url)
data.extend(look_movie(res))
yeshu += 25
to_excel(data, yeshu)
def to_excel(data, yeshu):
wb = openpyxl.Workbook()
wb.guess_types = True
ws = wb.active
data_rank =
ws.append(data_rank)
for movie_rank,movie_name in zip(data_rank, data):
print("%s 排名:%s" % (movie_name, movie_rank))
print(data)
ws.append(data)
wb.save("豆瓣250电影榜.xlsx")
if __name__ == '__main__':
main()
591821661 发表于 2021-4-7 00:00
终于帮你改完了 满意请给最佳
多谢大佬解惑 根据大佬的代码,自己弄出了自己想要的部分模样{:10_298:}
import requests
import bs4
import openpyxl
#import os
# os.chdir("D:\BianCheng\Pythonwork\爬虫爬出来资料")
header ={'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 11_1_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36'}
def open_url(url):
res = requests.get(url, headers=header)
return res
def look_movie(res):
soup = bs4.BeautifulSoup(res.text, 'html.parser')
targets = soup.find_all("div", class_='hd')
data = []
for each in targets:
data.append(each.a.span.text)
return data
def main():
yeshu = 0
data = []
while yeshu <= 225:
url = "http://movie.douban.com/top250?start=" + str(yeshu) + "&filter="
res = open_url(url)
data.extend(look_movie(res))
yeshu += 25
to_excel(data, yeshu)
def to_excel(data, yeshu):
wb = openpyxl.Workbook()
wb.guess_types = True
ws = wb.active
#表格三件套
# data_rank =
# for movie_rank,movie_name in zip(data_rank, data):
# print("%s 排名:%s" % (movie_name, movie_rank))
nub = 0
data_rank = []
kongge = []
for i in range(1,251):
data_rank.append("第%d名"%i)
if i % 10 == 0:
ws.append(data_rank)
ws.append(data)
ws.append(kongge)
nub += 10
data_rank = []
wb.save("豆瓣250电影榜.xlsx")
if __name__ == '__main__':
main()
页:
[1]