|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
爬虫的目的是爬取http://www.zuihaodaxue.com/zuihaodaxuepaiming2019.html这个网页上的大学排名列表。但是返回的内容是空的。 - import requests
- from bs4 import BeautifulSoup
- import bs4
- def getHTMLText(url):
- try:
- r = requests.get(url,timeout = 30)
- r.raise_for_status()
- r.encoding = r.apparent_encoding
- return r.text
- except:
- return""
-
- def fillUnivList(ulist,html):
- soup = BeautifulSoup(html,"html.parser")
-
- for tr in soup.find('tbody').children:
- if isinstance(tr,bs4.element.Tag):
- tds = tr('td')
- ulist.append([tds[0].string,tds[1].string,tds[2].string])
-
-
- def printUnivList(ulist,num):
- print("{:^10}\t{:^6}\t{:^10}".format("排名","学校名称","总分"))
- for i in range(num):
- u=ulist[i]
- print("{:^10}\t{:^6}\t{:^10}".format("u[0]","u[1]","u[2]"))
-
-
- def main():
- uinfo=[]
- url=" http://www.zuihaodaxue.com/zuihaodaxuepaiming2019.html"
- html = getHTMLText(url)
- fillUnivList(uinfo,html)
- printUnivList(uinfo,20)
- main()
复制代码
- import requests
- from bs4 import BeautifulSoup
- import bs4
- def getHTMLText(url):
- try:
- r = requests.get(url, timeout=30)
- r.raise_for_status()
- r.encoding = r.apparent_encoding
- return r.text
- except:
- return ""
- def fillUnivList(ulist, html):
- soup = BeautifulSoup(html, "html.parser")
- for tr in soup.find('tbody').children:
- if isinstance(tr, bs4.element.Tag):
- tds = tr('td')
- ulist.append([tds[0].string, tds[1].string, tds[3].string])
- def printUnivList(ulist, num):
- print("{:^10}\t{:^6}\t{:^10}".format("排名", "学校名称", "总分"))
- for i in range(num):
- u = ulist[i]
- print("{:^10}\t{:^10}\t{:^10}".format(u[0], u[1], u[2]))
- def main():
- uinfo = []
- url = " http://www.zuihaodaxue.com/zuihaodaxuepaiming2019.html"
- html = getHTMLText(url)
- fillUnivList(uinfo, html)
- return printUnivList(uinfo, 20)
- main()
复制代码
都帮你改了下,就是总分那边不居中
|
|