爬取豆瓣前250(正则表达式)
import requestsimport bs4
import re
import time
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.14 Safari/537.36 Edg/83.0.478.13"}
x = 1
with open("豆瓣前250电影.txt", "w") as writer:
for i in (range(0, 10)):
html = requests.get("https://movie.douban.com/top250?start=" +
str(i)+"&filter=", headers=headers).text
titles = re.findall('<span class="title">(.+?)</span>', html, re.S)
for title in titles:
if title[:3] != "&nb":
writer.writelines("第"+str(x)+":"+title+"\n")
x += 1
print(title)
else:
pass
print("结束")
带写在txt里哟 @liuzhengyuan @heidern0612 来人呀 学习了~ 学习哈
页:
[1]