|
|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
有没有大佬能教教我。。。我在用model_selection KFold() 时候(cross_validation不能用了)出现了TypeError: __init__() got multiple values for argument 'shuffle' 代码如下,万分感谢!!!
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import KFold, cross_val_score
from sklearn.metrics import confusion_matrix, recall_score, classification_report
def printing_Kfold_scores(x_train_data, y_train_data):
fold = KFold(len(y_train_data), 5, shuffle=False, random_state=None)
c_param_range = [0.01, 0.1, 1, 10, 100] #惩罚力度
results_table = pd.DataFrame(index = range(len(c_param_range), 2), columns = ['C_parameter','Mean recall score'])
results_table['C_parameter'] = c_param_range
j = 0
for c_param in c_param_range:
print('_______________________________________________')
print('C parameter: ', c_param)
print('_______________________________________________')
print('')
recall_accs = []
for iteration, indices in enumerate(fold, start=1):
lr = LogisticRegression(C = c_param, penalty= 'l1')
lr.fit(x_train_data.iloc[indices[0],:], y_train_data.iloc[indices[0],:].values.ravel())
y_pred_undersample = lr.predict(x_train_data.iloc[indices[1],:].values)
recall_acc = recall_score(y_train_data.iloc[indices[1],:].values, y_pred_undersample)
recall_accs.append(recall_acc)
print('Iteration', iteration, ':recall_score = ', recall_acc)
results_table.ix[j, 'Mean recall score'] = np.mean(recall_accs)
j += 1
print('')
print('Mean recall score ', np.mean(recall_accs))
print('')
best_c = results_table.loc[results_table['Mean recall score'].idxmax()]['C_parameter']
print('************************************************************************')
print('Best model to choose from cross validation is with C parameter = ', best_c)
print('************************************************************************')
return best_c
best_c = printing_Kfold_scores(X_train_undersample, y_train_undersample)
|
|