|
|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
爬取豆瓣读书top250,为什么代码只爬到了第205就不爬了
- import re
- import requests
- import csv
- headers = {
- "User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:66.0) Gecko/20100101 Firefox/66.0",
- "Referer":"https://book.douban.com/top250?start=50"
- }
- books = []
- for i in range(10):#共10页
- url = "https://book.douban.com/top250?start="+str(i*25)
- response = requests.get(url,headers=headers)
- text = response.content.decode("utf-8")
- #所有书的名字
- names = re.findall(r'<div\sclass="pl2">.*?<a.*?>(.*?)</a>',text,re.DOTALL)
- # print(names)
- all_names = []
- for name in names:
- name = name.replace("\n", "").strip()
- beizhu = re.findall(r"<span.*?>(.*?)</span>",name,re.DOTALL)
- name = re.sub(r"<span.*?>.*?</span>","",name,re.DOTALL).strip()
- if beizhu == []:
- name = name
- else:
- name = name+beizhu[0]
- all_names.append(name)
- #所有书的其他名字
- other_names = re.findall(r'<div\sclass="pl2">.*?<a.*?>.*?</a>.*?<span.*?>(.*?)</span>',text,re.DOTALL)
- #所有书的作者
- writers = re.findall(r'<div\sclass="pl2">.*?<p\sclass="pl">(.*?)</p>',text,re.DOTALL)
- #所有书的评分
- scores = re.findall(r'<div\sclass="star clearfix">.*?<span\sclass="rating_nums">(.*?)</span>',text,re.DOTALL)
- #所有书的评价人数
- all_people = []
- people = re.findall(r'<div\sclass="star clearfix">.*?<span\sclass="pl">(.*?)</span>',text,re.DOTALL)
- for each_people in people:
- each_people = each_people.replace("\n","").replace("(","").replace(")","").strip()
- all_people.append(each_people)
- #所有书的优美句子
- nice_statments = re.findall(r'<p\sclass="quote"\sstyle=".*?">.*?<span.*?>(.*?)</span>',text,re.DOTALL)
- #完成每本书对应数据的整合
- each_books = list(zip(all_names,other_names,writers,scores,all_people,nice_statments))
- for each_book in each_books:
- all_names,other_names,writers,scores,all_people,nice_statments=each_book
- book = {
- "name":all_names,
- "other_name":other_names,
- "writer":writers,
- "score":scores,
- "people":all_people,
- "nice_statment":nice_statments
- }
- books.append(book)
- # books.sort(key=lambda x:float(x["score"]),reverse = True)
- header = ["name","other_name","writer","score","people","nice_statment"]
- with open("doubandushu1.csv","w",encoding = "utf-8",newline="")as f:
- writer = csv.DictWriter(f,header)#请求csv标题
- writer.writeheader()#写入csv标题
- writer.writerows(books)#写入内容
复制代码
|
|