|
发表于 2020-7-14 22:22:21
|
显示全部楼层
import requests
import bs4
#headers 要用大括号;不加headers 响应码为418;
headers={'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.106 Safari/537.36'}
res = requests.get('https://movie.douban.com/top250',headers=headers)
# print(res.status_code)
#解决编码问题
# print(res.text.encode('gbk','ignore').decode('gbk'))
#两个易错点:(1)text不是txt(2)html.parser 中间有个点,需要特别注意;
soup = bs4.BeautifulSoup(res.text,"html.parser")
targets = soup.find_all('div',class_='hd')
for each in targets:
print(each.a.span.text) |
|