|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
本帖最后由 JeremyCheung 于 2022-4-26 22:55 编辑
请问个各位大神这个代码为什么爬不到股票的信息?求求帮助!请多多指教!谢谢!
import requests
from fake_user_agent.main import user_agent
import re
import csv
def getHtml(url):
r = requests.get(url, headers={
'User-Agent': user_agent("chrome"),
})
r.encoding = r.apparent_encoding
return r.text
stockUrl = 'http://quote.eastmoney.com/stocklist.html'
PATTERN_STOCK = "<li><a.*>(\w*)\((\d{6})\)</a></li>"
if __name__ == '__main__':
html = getHtml(stockUrl)
reslist = re.findall(PATTERN_STOCK, html)
# 数据清洗:去掉非个股,个股以6(沪市),0(深市),3(创业板)开头
datalist = reslist[:]
for res in reslist:
print(res)
if not (str(res[1]).startswith('6') or str(res[1]).startswith('3') or str(res[1]).startswith('0')):
datalist.remove(res)
f = open('C:/Users/MC/Desktop/stock csv/stock.csv', 'w+', encoding='utf-8', newline="")
writer = csv.writer(f)
writer.writerow(('名称', '代码'))
for data in datalist:
writer.writerow((data[0], data[1]))
f.close()
非原创的~~
本帖最后由 isdkz 于 2022-4-27 06:25 编辑
股票信息是 js 动态渲染的,换个模块,使用 requests-html 之前先执行以下命令安装:
pip install requests-html -i https://mirrors.aliyun.com/pypi/simple
对你的代码修改如下:from requests_html import HTMLSession
import csv
def getHtml(url):
sess = HTMLSession()
r = sess.get(url)
r.html.render()
return r.html
stockUrl = 'http://quote.eastmoney.com/stocklist.html'
if __name__ == '__main__':
html = getHtml(stockUrl)
# 数据清洗:去掉非个股,个股以6(沪市),0(深市),3(创业板)开头
datalist = html.find('#table_wrapper-table > tbody > tr')
reslist = datalist[:]
print(datalist)
for res in reslist:
temp = res.find('td:nth-child(2) > a')[0].text
print(temp)
if (not ((temp.startswith('6') or
temp.startswith('3') or temp.startswith('0')))):
datalist.remove(res)
f = open('stock.csv', 'w+', encoding='utf-8-sig', newline="")
writer = csv.writer(f)
writer.writerow(('名称', '代码'))
for data in datalist:
temp0 = data.find('td.mywidth > a')[0].text
temp1 = data.find('td:nth-child(2) > a')[0].text
writer.writerow((temp0, '\t' + temp1))
f.close()
|
|