import requests
import time
import datetime
import json
import pandas as pd
# 目标:爬取天天基金网,开放基金中股票型基金的持仓股票情况
class TianTian_Fund():
def __init__(self):
# 获取基金排行url
self.fund_name_url = 'https://fundmobapi.eastmoney.com/FundMNewApi/FundMNRankNewList?fundtype={}&SortColumn=RZDF&Sort=desc&pageIndex={}&pagesize=30&companyid=&deviceid=Wap&plat=Wap&product=EFund&version=2.0.0&Uid=&_=1599579446935'
# 获取基金调仓日期rul
self.fund_position_url_1 = 'https://fundmobapi.eastmoney.com/FundMApi/FundIVInfo.ashx?FCODE={}&deviceid=Wap&plat=Wap&product=EFund&version=2.0.0&Uid='
# 获取基金仓位情况rul
self.fund_position_url_2 = 'https://fundmobapi.eastmoney.com/FundMNewApi/FundMNInverstPosition?FCODE={}&deviceid=Wap&plat=Wap&product=EFund&version=2.0.0&Uid=&DATE={}'
self.headers_1 = {
'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 13_2_3 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.0.3 Mobile/15E148 Safari/604.1'}
# 保存数据
self.data = []
date_temp = datetime.datetime.now() # 获取当前日期
self.date = str(date_temp.year) + str(date_temp.month).zfill(2) + str(date_temp.day).zfill(2) # 获取当前日期,年月日
def Get_Fund_Nname(self, t, page_nums):
for page_num in range(1, page_nums):
fund_name_url = self.fund_name_url.format(t, page_num) # 构造地址
print(f'正在爬取第{page_num}页,url为{fund_name_url}')
fund_name_response = requests.get(url=fund_name_url, headers=self.headers_1)
fund_name_str = fund_name_response.content.decode('utf-8') # 获取数据
# fund_name_str=fund_name_str[fund_name_str.index('(')+1:-1]#把字符串前后不能解析为json的内容去除,变成字典形式,就能解析为json
# print(fund_name_str)
fund_name_data = json.loads(fund_name_str) # 解析为json格式
fund_name_data = fund_name_data['Datas']
self.Save_Fund_data(fund_name_data, page_num) # 保存json原始数据
for i in range(len(fund_name_data)):
data_temp = {}
data_temp['fund_name'] = fund_name_data[i]['SHORTNAME'] # 提取基金名称
data_temp['fund_value'] = fund_name_data[i]['DWJZ'] # 提取基金净值
data_temp['fund_date'] = fund_name_data[i]['FSRQ'] # 提取日期
data_temp['fund_id'] = fund_name_data[i]['FCODE'] # 提取基金编号id
stock = self.Get_Fund_Position(data_temp['fund_id'])
data_temp['stock'] = stock
self.data.append(data_temp)
time.sleep(3)
print(self.data)
def Save_Fund_data(self, fund_name_data, page_num): # 保存数据
with open(f'{self.date}天天基金-名称-{page_num}.json', mode='w', encoding='utf-8') as file:
json.dump(fund_name_data, file, ensure_ascii=False, indent=4)
print('保存成功')
def Save_Fund_all(self): # 保存数据
with open(f'{self.date}天天基金.csv', mode='w', encoding='utf-8') as file:
# 表头
file.write('fund_name' + ',' + 'fund_value' + ',' + 'fund_date' + ',' + 'fund_id' + ',')
for j in range(1, 11):
file.write(f'stock_name{j}' + ',' + f'stock_position{j}' + ',' + f'stock_operation{j}' + ',')
file.write('\n')
# 内容
for i in self.data:
file.write(i['fund_name'] + ',' + i['fund_value'] + ',' + i['fund_date'] + ',' + i['fund_id'] + ',')
if i['stock'] != None:
for k in i['stock']:
file.write(str(k['stock_name']) + ',' + str(k['stock_position']) + ',' + str(
k['stock_operation']) + ',')
else:
for k in range(10):
file.write('' + ',' + '' + ',' + '' + ',')
file.write('\n')
def Get_Fund_Position(self, fund_id):
# 先获取调仓日期
fund_position_url_1 = self.fund_position_url_1.format(fund_id)
print(fund_position_url_1)
fund_position_response_1 = requests.get(url=fund_position_url_1, headers=self.headers_1)
fund_position_str_1 = fund_position_response_1.content.decode()
fund_position_json_1 = json.loads(fund_position_str_1)
fund_position_date = fund_position_json_1['Datas'][0]['DATE'] # 获取最新调仓日期
try:
fund_position_date = fund_position_date[0]
except:
fund_position_date = None
time.sleep(1)
stock_all = []
if fund_position_date != None:
# 再获取仓位信息
fund_position_url_2 = self.fund_position_url_2.format(fund_id, fund_position_date)
print(fund_position_url_2)
fund_position_response_2 = requests.get(url=fund_position_url_2, headers=self.headers_1)
fund_position_str_2 = fund_position_response_2.content.decode()
fund_position_json_2 = json.loads(fund_position_str_2)
fund_position_json_2 = fund_position_json_2['Datas']['fundStocks']
for i in range(len(fund_position_json_2)):
stock = {}
stock['stock_name'] = fund_position_json_2[i]['GPJC']
stock['stock_position'] = fund_position_json_2[i]['JZBL'] + '%'
stock['stock_operation'] = fund_position_json_2[i]['PCTNVCHGTYPE']
stock_all.append(stock)
time.sleep(1)
else:
stock = {'stock_name': None, 'stock_position': None, 'stock_operation': None}
stock_all.append(stock)
return stock_all
def Process_Data(self):
data = pd.read_csv(f'{self.date}天天基金.csv', sep=',')
# print(data.info())
with open(f'{self.date}持仓股分析.txt', mode='w', encoding='utf-8') as file:
for n in range(1, 11):
data1 = data.groupby(by=f'stock_name{n}').count()['fund_name'] # 最多持仓股
a = [i for i in data1.values]
b = [i for i in data1.index]
c = f'stock_name{n}分析:持仓股最多的股票为‘{b[a.index(max(a))]}’,一共{len(data)}个基金,有{max(a)}家持仓' + '\n'
print(c)
file.write(c)
if __name__ == '__main__':
time1 = datetime.datetime.now()
tiantian = TianTian_Fund()
t = 27 # t=25股票型基金,t=26#指数型基金,t=27#混合型基金
page_nums = 10
tiantian.Get_Fund_Nname(t, page_nums) # 传入页码参数
tiantian.Save_Fund_all()
time2 = datetime.datetime.now()
delta = time1 - time2
print(f'耗时{delta.seconds}秒')
tiantian.Process_Data()