|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
- import requests
- import bs4
- import openpyxl
- import os
- os.chdir("D:\BianCheng\Pythonwork\爬虫爬出来资料")
- header ={}
- header['User-Agent'] = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 11_1_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36'
- def open_url(url):
- res = requests.get(url, headers=header)
- return res
- def look_movie(res):
- soup = bs4.BeautifulSoup(res.text, 'html.parser')
- targets = soup.find_all("div", class_='hd')
- list1 = []
- for each in targets:
- list1.append(each.a.span.text)
- return list1
- def main():
- yeshu = 0
- while yeshu <= 25:
- url = "http://movie.douban.com/top250?start=" + str(yeshu) + "&filter="
- res = open_url(url)
- data = look_movie(res)
- yeshu += 25
- print(data)
- # to_excel(data)这个就是存表格的,但运行它就报错,不运行注释了就正常...
- def to_excel(data):
- wb = openpyxl.Workbook()
- wb.guess_types = True
- ws = wb.active
- ws.append(['1','2','3','4','5'])
- for each in data:
- ws.append(each)
- wb.save("豆瓣250电影榜.xlsx")
- if __name__ == '__main__':
- main()
复制代码
我根据极客爬虫篇里摘取出一些可以用的,但就是用不了现在,希望有小伙伴解救一下谢谢
终于帮你改完了 满意请给最佳
- import requests
- import bs4
- import openpyxl
- import os
- # os.chdir("D:\BianCheng\Pythonwork\爬虫爬出来资料")
- header ={'User-Agent':'Mozilla/5.0 (Macintosh; Intel Mac OS X 11_1_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36'}
- def open_url(url):
- res = requests.get(url, headers=header)
- return res
- def look_movie(res):
- soup = bs4.BeautifulSoup(res.text, 'html.parser')
- targets = soup.find_all("div", class_='hd')
- list1 = []
- for each in targets:
- list1.append(each.a.span.text)
- return list1
- def main():
- yeshu = 0
- data = []
- while yeshu <= 225:
- url = "http://movie.douban.com/top250?start=" + str(yeshu) + "&filter="
- res = open_url(url)
- data.extend(look_movie(res))
- yeshu += 25
- to_excel(data, yeshu)
- def to_excel(data, yeshu):
- wb = openpyxl.Workbook()
- wb.guess_types = True
- ws = wb.active
- data_rank = [str(i) for i in range(yeshu+1-len(data), yeshu+1)]
- ws.append(data_rank)
- for movie_rank,movie_name in zip(data_rank, data):
- print("%s 排名:%s" % (movie_name, movie_rank))
- print(data)
- ws.append(data)
- wb.save("豆瓣250电影榜.xlsx")
- if __name__ == '__main__':
- main()
复制代码
|
|