|
发表于 2023-6-3 13:12:45
|
显示全部楼层
本楼为最佳答案
- import requests
- import re
- import csv
- f = open("date.csv", mode="w")
- csvwriter = csv.writer(f)
- num = 0
- while num <= 250:
- print(num)
- # 获取网页的内容
- url = f"https://movie.douban.com/top250?start={num}&filter="
- headers = {
- "User-Agent":"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36"
- }
- resp = requests.get(url,headers=headers)
- page_content = resp.text
- # 对网页的内容进行解析
- obj = re.compile(r'<li>.*?<span class="title">(?P<name>.*?)</span>'
- r'.*?<br>(?P<year>.*?) .*?<span class="rating_num" property="v:average">'
- r'(?P<score>.*?)</span>.*?<span>(?P<num>.*?)人评价</span>', re.S)
- result = obj.finditer(page_content)
- for it in result:
- # print(it.group("name"))
- # print(it.group("score"))
- # print(it.group("num"))
- # print(it.group("year").strip())
- dic = it.groupdict()
- dic['year'] = dic['year'].strip()
- csvwriter.writerow(dic.values())
- num += 25
- f.close()
- print("over!")
复制代码
没仔细看代码,失礼了 |
|