爬虫_同花顺行情中心股票数据-回复 有彩蛋
import jsonimport requests
from bs4 import BeautifulSoup
import time
import pandas as pd
def get_one_page(url):
try:
headers = {
'Host': 'q.10jqka.com.cn',
'Referer': 'http//q.10jqka.com.cn/',
'User-Agent': 'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.112 Safari/537.36',
'X-Requested-With': 'XMLHttpRequest'
}
response = requests.get(url, headers=headers)
if response.status_code == 200:
return response.text
return None
except RequestException:
return None
def parse_one_page(html):
soup = BeautifulSoup(html, 'lxml')
tr_list = soup.select('tbody tr')
yeji = []
for each_tr in tr_list:
td_list = each_tr.select('td')
data = {
'股票价格': td_list.text,
'股票简称': td_list.text,
'现价': td_list.text,
'涨幅': td_list.text,
'涨跌': td_list.text,
'涨速': td_list.text,
'换手': td_list.text
}
yeji.append(data)
return yeji
def main(offset):
url = 'http://q.10jqka.com.cn/index/index/board/all/field/zdf/order/desc/page/%s/ajax/1/'%offset
html = get_one_page(url)
data.extend(parse_one_page(html))
if __name__ == '__main__':
data = []
for i in range(1, 10):
main(offset=i)
time.sleep(10)
json_result = json.dumps(data)
with open('data.json', 'w') as f:
f.write(json_result)
with open('data.json', 'r') as f:
data = f.read()
data = json.loads(data)
df = pd.DataFrame(data, columns=['股票价格', '股票简称', '现价', '涨幅', '涨跌', '涨速', '换手'])
df.to_csv("data1.csv", index=False, encoding='utf_8_sig')**** Hidden Message ***** 本帖最后由 alphamast 于 2019-1-8 01:32 编辑
bs4.FeatureNotFound: Couldn't find a tree builder with the features you requested: lxml. Do you need to install a parser library? alphamast 发表于 2019-1-7 19:52
bs4.FeatureNotFound: Couldn't find a tree builder with the features you requested: lxml. Do you need ...
你得安装bs4库就可以了 彩蛋在哪里 一个比一个优秀,秀得我头皮发麻 123 看我四条A 见识一下 小白请教楼主,这个爬下来,之后还要做数据清洗和筛选的吧 大佬666 好东西,谢谢楼主 感謝大大 彩蛋是输出的结果吗? 就是想看一看 想看想看,向您学习 45 学习学习 66 运行后无结果, 你好,我运行了您的代码没有报错,也没有输出结果