|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
def industry():
# 导入数据
data = pd.read_csv(r'ALL.csv', encoding='gbk')
indcd = data['Indcd'].value_counts().index
year = data['Accper'].value_counts().index
data_remo = pd.DataFrame(columns=data.columns) # 新建一个df用于储存
for i in indcd:
data_group = data[data['Indcd'].isin([i])].copy() # 按行业分组
for j in year:
data_group = data_group[data_group['Accper'].isin([j])].copy() #按时间分组
data_group['RDSV'], cut_bin = pd.qcut(data_group['RDS_N'], q=5, labels=['0', '1', '2', '3','4'], retbins=True, duplicates='drop')
data_group['SEINSV'], cut_bin = pd.qcut(data_group['SEINS_N'], q=5, labels=['0', '1', '2', '3','4'], retbins=True, duplicates='drop')
data_group['STRA'] = data_group['SEINSV'].astype(int) + data_group['RDSV'].astype(int)
data_group = data_group.reset_index(drop=True)
data_remo = data_remo.append(data_group, ignore_index=True)
final_pr = ['Indcd', 'Stkcd', 'Accper', 'STRA']
data_remo = data_remo.loc[:, final_pr]
data_remo.set_index('Indcd', inplace=True)
print(data_remo)
# data_remo.to_csv(r"C:\Users\ASUS\Desktop\FINAL.csv", encoding='gbk')
return 0
industry()
运行结果及详细报错内容Traceback (most recent call last):
File "C:\Users\ASUS\PycharmProjects\A01-2021-Strategy\main.py", line 89, in <module>
industry()
File "C:\Users\ASUS\PycharmProjects\A01-2021-Strategy\main.py", line 58, in industry
data_group['RDSV'], cut_bin = pd.qcut(data_group['RDS_N'], 5, labels=bin_labels_1, retbins=True, duplicates='drop')
File "D:\Anaconda\lib\site-packages\pandas\core\reshape\tile.py", line 372, in qcut
fac, bins = _bins_to_cuts(
File "D:\Anaconda\lib\site-packages\pandas\core\reshape\tile.py", line 446, in _bins_to_cuts
raise ValueError(
ValueError: Bin labels must be one fewer than the number of bin edges
进程已结束,退出代码1
1.根据其他帖子,在分箱的时候我加上了.rank(method='first'),但是运行结果还是和没加之前一样
2.使用的 qcut 函数在分箱时,要求标签个数比分箱个数少一个,我分箱数改成了6,仍然报错data_group['RDSV'], cut_bin = pd.qcut(data_group['RDS_N'].rank(method='first'), q=6, labels=['0', '1', '2', '3','4'], retbins=True, duplicates='drop')
data_group['SEINSV'], cut_bin = pd.qcut(data_group['SEINS_N'].rank(method='first'), q=6, labels=['0', '1', '2', '3','4'], retbins=True, duplicates='drop')
|
|