|
|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
import requests
from bs4 import BeautifulSoup
import traceback
import re
def getHTMLText(url):
try:
r=requests.get(url,timeout=30)
r.raise_for_status()
r.encoding=r.apparent_encoding
return r.text
except:
return""
def getStockList(lst,stockURL):
html=getHTMLText(stockURL)
soup=BeautifulSoup(html,"html.parser")
a=soup.find_all("a")
for i in a:
try:
href=i.attrs["href"]
lst.append(re.findall(r"[s][hz]\d{6}",href)[0])
except:
continue
def getStockInfo(lst,stockURL,fpath):
for stock in lst:
url=stockURL+stock+".html"
html=getHTMLText(url)
try:
if html=="":
continue
infoDict={}
soup=BeautifulSoup(html,"html.parser")
stockInfo=soup.find("div",attrs={"class":"stock-bets"})
name=stockInfo.find_all(attrs={"class":"bets-name"})[0]
infoDict.update({"股票名称":name.text.split()[0]})
keyList=stockInfo.find_all("dt")
valueList=stockInfo.find_all("dd")
for i in range(len(keyList)):
key=keyList[i].text
val=valueList[i].text
infoDict[key]=val
with open(fpath,"a",encoding="utf-8") as f:
f.write(str(infoDict)+"\n")
except:
traceback.print_exc()
continue
def main():
stock_list_url="http://quote.eastmoney.com/stocklist.html"
stock_info_url="https://gupiao.baidu.com/stock/"
output_file="E://BaiduStockInfo.txt"
slist=[]
getStockList(slist,stock_list_url)
getStockInfo(slist,stock_info_url,output_file)
main()
Traceback (most recent call last):
File "E:\xiaojiayu\股票数据定向爬虫.py", line 37, in getStockInfo
name=stockInfo.find_all(attrs={"class":"bets-name"})[0]
AttributeError: 'NoneType' object has no attribute 'find_all'出现这种错误,怎么解决] |
|