|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
import requests
from bs4 import BeautifulSoup
import bs4
def getHTMLText(url):
try:
headers = {'User-Agent':'Mozilla/5.0',\
'Accept':'*/*',\
'Accept-Encoding':'gzip',\
'Accept-Language':'zh-Hans-CN',\
'Cha-Control':'no-cacha',\
'Host':'img3.doubanio.com',\
}
r = requests.get(url,headers=headers)
r.encoding = r.apparent_encoding
r.raise_for_status
print(r.status_code)
return r,text
except:
return '爬取失败'
def fillTop(html,ulist):
soup = BeautifulSoup(html,"html.parser")
movie = soup.find_all('li')
for i in movie:
ulist.append(i.div.div.em.text)
def printMovie(ulist):
print(ulist)
def main():
url = "https://movie.douban.com/top250"
html = getHTMLText(url)
unifo = []
fillTop(html,unifo)
printMovie(unifo)
main()
404了,
只用加 User-Agent 就可以了
- import requests
- from bs4 import BeautifulSoup
- import bs4
- def getHTMLText(url):
- try:
- headers = {'User-Agent': 'Mozilla/5.0'}
- r = requests.get(url, headers=headers)
- r.encoding = r.apparent_encoding
- print(r.status_code)
- return r.text
- except Exception:
- return '爬取失败'
- def fillTop(html, ulist):
- soup = BeautifulSoup(html, "html.parser")
- movie = soup.find_all('li')
- for i in movie:
- ulist.append(i.div.div.em.text)
- def printMovie(ulist):
- print(ulist)
- def main():
- url = "https://movie.douban.com/top250"
- html = getHTMLText(url)
- unifo = []
- fillTop(html, unifo)
- printMovie(unifo)
- main()
复制代码
|
|