|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
- import json
- import requests
- from bs4 import BeautifulSoup
- import time
- import pandas as pd
- def get_one_page(url):
- try:
- headers = {
- 'Host': 'q.10jqka.com.cn',
- 'Referer': 'http//q.10jqka.com.cn/',
- 'User-Agent': 'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.112 Safari/537.36',
- 'X-Requested-With': 'XMLHttpRequest'
- }
- response = requests.get(url, headers=headers)
- if response.status_code == 200:
- return response.text
- return None
- except RequestException:
- return None
- def parse_one_page(html):
- soup = BeautifulSoup(html, 'lxml')
- tr_list = soup.select('tbody tr')
- yeji = []
- for each_tr in tr_list:
- td_list = each_tr.select('td')
- data = {
- '股票价格': td_list[1].text,
- '股票简称': td_list[2].text,
- '现价': td_list[3].text,
- '涨幅': td_list[4].text,
- '涨跌': td_list[5].text,
- '涨速': td_list[6].text,
- '换手': td_list[7].text
- }
- yeji.append(data)
- return yeji
- def main(offset):
- url = 'http://q.10jqka.com.cn/index/index/board/all/field/zdf/order/desc/page/%s/ajax/1/'%offset
- html = get_one_page(url)
- data.extend(parse_one_page(html))
- if __name__ == '__main__':
- data = []
- for i in range(1, 10):
- main(offset=i)
- time.sleep(10)
- json_result = json.dumps(data)
- with open('data.json', 'w') as f:
- f.write(json_result)
- with open('data.json', 'r') as f:
- data = f.read()
- data = json.loads(data)
- df = pd.DataFrame(data, columns=['股票价格', '股票简称', '现价', '涨幅', '涨跌', '涨速', '换手'])
- df.to_csv("data1.csv", index=False, encoding='utf_8_sig')
复制代码 |
|