|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
- """
- Fund_Filter.py
- 全市场公募基金筛选(开放式+ETF+LOF+QDII+REITs+货币)
- Author = shadowmage
- """
- import akshare as ak
- import pandas as pd
- from datetime import datetime
- from concurrent.futures import ProcessPoolExecutor
- from tqdm import tqdm
- import os
- N_PROC = max(1, os.cpu_count() - 2)
- def get_all_codes():
- # 1) 开放式基金
- open_df = ak.fund_name_em()
- open_df = open_df.rename(columns={'基金代码': 'code', '基金简称': 'name', '基金类型': 'type'})
- # 2) ETF
- etf_df = ak.fund_etf_spot_em()[['代码', '名称']].rename(columns={'代码': 'code', '名称': 'name'})
- etf_df['type'] = 'ETF'
- # 3) LOF
- lof_df = ak.fund_lof_spot_em()[['代码', '名称']].rename(columns={'代码': 'code', '名称': 'name'})
- lof_df['type'] = 'LOF'
- # 4) QDII(香港)
- qdii_df = ak.fund_hk_rank_em()[['基金代码', '基金简称']].rename(columns={'基金代码': 'code', '基金简称': 'name'})
- qdii_df['type'] = 'QDII'
- # 5) REITs
- reits_df = ak.reits_realtime_em()[['代码', '名称']].rename(columns={'代码': 'code', '名称': 'name'})
- reits_df['type'] = 'REITs'
- # 6) 货币型
- money_df = ak.fund_money_fund_daily_em()[['基金代码', '基金简称']].rename(columns={'基金代码': 'code', '基金简称': 'name'})
- money_df['type'] = '货币型'
- # 合并并去重
- all_df = pd.concat([open_df, etf_df, lof_df, qdii_df, reits_df, money_df], ignore_index=True)
- all_df.drop_duplicates(subset=['code'], inplace=True)
- return all_df
- base_df = get_all_codes()
- print(f'全市场基金(含货基)共 {len(base_df)} 只')
- def calc_one(row):
- code = row['code']
- try:
- # 基本信息(成立日、规模)
- info = ak.fund_individual_basic_info_xq(symbol=code)
- setup = pd.to_datetime(info.loc[info['item'] == '成立时间', 'value'].iloc[0])
- scale = float(info.loc[info['item'] == '最新规模', 'value'].iloc[0].replace('亿', ''))
- age = (datetime.now() - setup).days / 365.25
- if scale <= 1 or age <= 4:
- return None
- # 历史净值
- nav_df = ak.fund_open_fund_info_em(symbol=code, indicator='单位净值走势')
- if nav_df.empty or len(nav_df) < 242:
- return None
- nav = nav_df.sort_values('净值日期')['单位净值'].astype(float)
- # 年化收益
- years = len(nav) / 242
- annual = (nav.iloc[-1] / nav.iloc[0]) ** (1 / years) - 1
- if annual <= 0.035:
- return None
- # 上涨比例
- daily = nav.pct_change().dropna()
- up_day = (daily > 0).mean()
- monthly = nav.resample('M').last().pct_change().dropna()
- up_month = (monthly > 0).mean()
- if up_day <= 0.9 or up_month <= 0.9:
- return None
- return {
- '基金代码': code,
- '基金简称': info.loc[info['item'] == '基金简称', 'value'].iloc[0],
- '基金类型': row['type'],
- '成立日期': setup.date(),
- '基金规模(亿元)': scale,
- '成立年限(年)': round(age, 2),
- '年化收益率': round(annual * 100, 2),
- '上涨日数比例': round(up_day * 100, 2),
- '上涨月份比例': round(up_month * 100, 2)
- }
- except Exception:
- return None
- def split_list(lst, n):
- k, m = divmod(len(lst), n)
- return [lst[i*k + min(i, m):(i+1)*k + min(i+1, m)] for i in range(n)]
- def worker(sub_df):
- return [calc_one(r) for _, r in sub_df.iterrows() if calc_one(r)]
- if __name__ == '__main__':
- batches = split_list(base_df, N_PROC)
- results = []
- with ProcessPoolExecutor(max_workers=N_PROC) as pool:
- for part in tqdm(pool.map(worker, batches), total=len(codes), desc='基金只数'):
- results.extend(part)
- final_df = pd.DataFrame(results).sort_values('年化收益率', ascending=False)
- final_df.to_excel('filtered_funds.xlsx', index=False, sheet_name='基金筛选结果')
- print('\n全市场筛选完成:filtered_funds.xlsx')
- print(final_df.head())
复制代码
目前发现执行效率偏低,想要提高效率,有没有什么方法? |
|