|
|

楼主 |
发表于 2019-1-22 15:31:44
|
显示全部楼层
自己解决
import urllib.request as urlrequest
from bs4 import BeautifulSoup
for i in range(0,251,25):
url_visit = "https://book.douban.com/top250?start={}".format(i)
crawl_content = urlrequest.urlopen(url_visit).read()
soup = BeautifulSoup(crawl_content,"html.parser")
book_list = soup.find_all(class_="item")
for book in book_list:
try:
abstract=book.find(class_='inq').get_text()
except AttributeError:
abstract="None"
rating = book.find(class_="rating_nums").get_text()
link = book.find(class_="nbg")['href']
title = book.find(class_="pl2")
title_name = title.find("a")['title']
with open("book250.xls","a") as outputfile:
outputfile.write("{}\t{}\t{}\t{}\n".format(title_name,rating,abstract,link))
|
|