|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
import requests
from bs4 import BeautifulSoup
import bs4
def getHTMLText(url):
try:
r=requests.get(url,timeout=30)
r.raise_for_status()
r.encoding=r.apparent_encoding
return r.text
except:
return ""
def fillUnivList(ulist,html):
soup=BeautifulSoup(html,"html.parser")
for tr in soup.find('tbody').children:
if isinstance(tr,bs4.element.Tag):
tds=tr('td')
ulist.append([tds[0].string,tds[1].string,tds[2].string])
def printUnivList(ulist,num):
print("{:^10}\t{:^6}\t{:^10}".format("排名","学校名称","分数"))
for i in range(num):
u=ulist[i]
print("{:^10}\t{:^6}\t{:^10}".format(u[0],u[1],u[2]))
def main():
uinfo=[]
url="http://www.zuihaodaxue.cn/zuihaodaxuepaiming2019.html"
html=getHTMLText(url)
fillUnivList(uinfo,html)
printUnivList(uinfo,20)
if __name__=='__main__':
main()
代码如上
为什么编译器会给我一个错误(AttributeError: 'NoneType' object has no attribute 'children'),应该怎么修改?
2019年数据
如果爬2020的话,url最后改成202011
- import requests
- from bs4 import BeautifulSoup
- import bs4
- def getHTMLText(url):
- try:
- r=requests.get(url,timeout=30)
- r.raise_for_status()
- r.encoding=r.apparent_encoding
- return r.text
- except:
- return ""
- def fillUnivList(ulist,html):
- soup=BeautifulSoup(html,"html.parser")
- for tr in soup.find('tbody').children:
- if isinstance(tr,bs4.element.Tag):
- tds=tr('td')
- ulist.append([str(int(tds[0].text)),tds[1].find('a').text,str(float(tds[4].text))])
- def printUnivList(ulist,num):
- print("{:^10}\t{:^6}\t{:^10}".format("排名","学校名称","分数"))
- for i in range(num):
- u=ulist[i]
- print("{:^10}\t{:^6}\t{:^10}".format(u[0],u[1],u[2]))
- def main():
- uinfo=[]
- url="https://www.shanghairanking.cn/rankings/bcur/201911"
- html=getHTMLText(url)
- fillUnivList(uinfo,html)
- printUnivList(uinfo,20)
-
- if __name__=='__main__':
- main()
复制代码
|
|