|
发表于 2020-6-15 17:04:57
|
显示全部楼层
- import requests
- import bs4
- import re
- import random
- def open_url(url):
- iplist = ["60.2.44.182:30963","119.254.94.93:46323","113.128.148.50:8118","61.135.155.82:443","124.93.201.59:59618","120.198.76.45:41443"]
- proxies = {"http":random.choice(iplist)}
- headers = {"user-agent":"Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36"}
- res = requests.get(url,headers=headers,proxies=proxies)
- print(res.text)
- return res
- def find_movies(res):
- soup = bs4.BeautifulSoup(res.text,"html.parser")
- movies = []
- targets1 = soup.find_all("div",class_="p12")
- for each in targets1:
- movies.append(each.a.span.text)
- result = []
- length = len(movies)
- for i in range(length):
- result.append(moives[i]+'\n')
- return result
- def main():
- host = "https://movie.douban.com/chart"
- res = open_url(host)
- result = []
- result.extend(find_movies(res))
- with open("豆瓣排行榜.txt", "w", encoding="utf-8") as f:
- for each in result:
- f.write(each)
- if __name__ == "__main__":
- main()
复制代码 |
|