|
楼主 |
发表于 2024-10-22 00:38:00
|
显示全部楼层
Fitting 5 folds for each of 40 candidates, totalling 200 fits
/opt/conda/lib/python3.10/site-packages/sklearn/base.py:432: UserWarning: X has feature names, but GradientBoostingClassifier was fitted without feature names
warnings.warn(
---------------------------------------------------------------------------
NotFittedError Traceback (most recent call last)
Cell In[25], line 12
10 gbc_grid = GridSearchCV(gbc, param_test, cv=5, refit=True, verbose=1)
11 gbc_grid.fit(X_train,y_train)
---> 12 train_score = gbc.score(X_train,y_train)
13 gbc_pre = gbc.predict(X_valid)
14 valid_score = accuracy_score(y_valid,gbc_pre)
File /opt/conda/lib/python3.10/site-packages/sklearn/base.py:668, in ClassifierMixin.score(self, X, y, sample_weight)
643 """
644 Return the mean accuracy on the given test data and labels.
645
(...)
664 Mean accuracy of ``self.predict(X)`` w.r.t. `y`.
665 """
666 from .metrics import accuracy_score
--> 668 return accuracy_score(y, self.predict(X), sample_weight=sample_weight)
File /opt/conda/lib/python3.10/site-packages/sklearn/ensemble/_gb.py:1308, in GradientBoostingClassifier.predict(self, X)
1293 def predict(self, X):
1294 """Predict class for X.
1295
1296 Parameters
(...)
1306 The predicted values.
1307 """
-> 1308 raw_predictions = self.decision_function(X)
1309 encoded_labels = self._loss._raw_prediction_to_decision(raw_predictions)
1310 return self.classes_.take(encoded_labels, axis=0)
File /opt/conda/lib/python3.10/site-packages/sklearn/ensemble/_gb.py:1264, in GradientBoostingClassifier.decision_function(self, X)
1243 """Compute the decision function of ``X``.
1244
1245 Parameters
(...)
1259 array of shape (n_samples,).
1260 """
1261 X = self._validate_data(
1262 X, dtype=DTYPE, order="C", accept_sparse="csr", reset=False
1263 )
-> 1264 raw_predictions = self._raw_predict(X)
1265 if raw_predictions.shape[1] == 1:
1266 return raw_predictions.ravel()
File /opt/conda/lib/python3.10/site-packages/sklearn/ensemble/_gb.py:687, in BaseGradientBoosting._raw_predict(self, X)
685 def _raw_predict(self, X):
686 """Return the sum of the trees raw predictions (+ init estimator)."""
--> 687 raw_predictions = self._raw_predict_init(X)
688 predict_stages(self.estimators_, X, self.learning_rate, raw_predictions)
689 return raw_predictions
File /opt/conda/lib/python3.10/site-packages/sklearn/ensemble/_gb.py:673, in BaseGradientBoosting._raw_predict_init(self, X)
671 def _raw_predict_init(self, X):
672 """Check input and compute raw predictions of the init estimator."""
--> 673 self._check_initialized()
674 X = self.estimators_[0, 0]._validate_X_predict(X, check_input=True)
675 if self.init_ == "zero":
File /opt/conda/lib/python3.10/site-packages/sklearn/ensemble/_gb.py:382, in BaseGradientBoosting._check_initialized(self)
380 def _check_initialized(self):
381 """Check that the estimator is initialized, raising an error if not."""
--> 382 check_is_fitted(self)
File /opt/conda/lib/python3.10/site-packages/sklearn/utils/validation.py:1390, in check_is_fitted(estimator, attributes, msg, all_or_any)
1385 fitted = [
1386 v for v in vars(estimator) if v.endswith("_") and not v.startswith("__")
1387 ]
1389 if not fitted:
-> 1390 raise NotFittedError(msg % {"name": type(estimator).__name__})
NotFittedError: This GradientBoostingClassifier instance is not fitted yet. Call 'fit' with appropriate arguments before using this estimator.
训练模型时报错,代码如下#使用dnn模型
import torch
import torch.nn as nn
from torch.utils import data
from torch.utils.data import Dataset,DataLoader
from torch import optim
#定义神经网络模型
dropout1, dropout2 = 0.3, 0.6
class SimpleNN(nn.Module):
def __init__(self):
super(SimpleNN, self).__init__() # 这里需要用 SimpleNN
self.dense = nn.Sequential(
nn.Flatten(),
nn.Linear(12, 128),
nn.ReLU(),
nn.Dropout(dropout1),
nn.Linear(128, 256),
nn.ReLU(),
nn.Dropout(dropout2),
nn.Linear(256, 1),
)
def forward(self, X):
x = self.dense(X)
output = torch.sigmoid(x)
return output
#定义数据集
class SimpleDataset(Dataset):
def __init__(self,X,y):
# Initialize data, download, etc.
self.X = X
self.y = y
# support indexing such that dataset[i] can be used to get i-th sample
def __getitem__(self, index):
return self.X[index], self.y[index]
# we can call len(dataset) to return the size
def __len__(self):
return len(self.X)
#初始化模型和优化器
nn_model = SimpleNN()
loss = nn.BCELoss()#定义损失函数
optimizer = optim.Adam(nn_model.parameters(),lr=0.0001)#定义优化器
#初始化列表
acc_list = []
loss_list = []
#k折交叉验证选取训练集和验证集
def get_k_fold_data(k, i, X, y):
assert k > 1
fold_size = X.shape[0] // k
X_train, y_train = None, None
for j in range(k):#slice不能直接用于Series和Dataframe,pandas中需要用.iloc或 .loc作为索引下标
start = j * fold_size #起始索引
end = (j + 1) * fold_size #结束索引
if j == i:
X_valid, y_valid = X.iloc[start:end], y.iloc[start:end]
elif X_train is None:
X_train, y_train = X.iloc[start:end], y.iloc[start:end]
else:
X_train = pd.concat([X_train, X.iloc[start:end]],ignore_index = True)
y_train = pd.concat([y_train, y.iloc[start:end]],ignore_index = True)
return X_train, y_train, X_valid, y_valid #该四个数据类型为dataframe
# 开始训练
batch_size = 99
k = 5
num_epochs = 1000
for i in range(k):
X_train,y_train, X_valid,y_valid = get_k_fold_data(k, i, X, y) #获取第k折的训练集和验证集
print(f'FOLD {i}')
print('--------------------------------')
#创建数据集
train_dataset = SimpleDataset(X_train,y_train)
valid_dataset = SimpleDataset(X_valid,y_valid)
# 获取一个数据迭代器
train_iter = DataLoader(dataset=train_dataset,batch_size=batch_size,shuffle=True,num_workers=2)
valid_iter = DataLoader(dataset=valid_dataset,batch_size=batch_size,shuffle=True,num_workers=2)
#开始迭代
for epoch in range(num_epochs):
train_loss = 0
num_right = 0
for tensor_x, tensor_y in train_iter:#训练集执行梯度更新
tensor_x = tensor_x.float()
tensor_y = tensor_y.float().reshape(-1, 1)
optimizer.zero_grad() #梯度清零
pre_train = nn_model(tensor_x)
train_l = loss(pre_train, tensor_y) #损失应避免与全局变量loss重名
train_l.backward()#前向传播
optimizer.step()#梯度下降
train_loss += train_l.item() * len(tensor_x)
result = [1 if out >= 0.5 else 0 for out in pre_train]
num_right += np.sum(np.array(result) == tensor_y.numpy().reshape(-1))
train_loss = train_loss / len(train_iter.dataset)
train_accuracy = num_right / len(train_iter.dataset)
if epoch % 200 == 0:
print('Loss: {} Accuracy: {} Epoch:{}'.format(train_loss, train_accuracy, epoch))
with torch.no_grad():
valid_loss = 0
num_right = 0
for tensor_x, tensor_y in valid_iter:
tensor_x = tensor_x.float()
tensor_y = tensor_y.float().reshape(-1, 1)
pre_valid = nn_model(tensor_x)
valid_l = loss(pre_valid, tensor_y)
valid_loss += valid_l.item() * len(tensor_x)
result = [1 if out >= 0.5 else 0 for out in pre_valid]
num_right += np.sum(np.array(result) == tensor_y.numpy().reshape(-1))
valid_loss = valid_loss / len(valid_iter.dataset)
valid_accuracy = num_right / len(valid_iter.dataset)
if epoch % 200 == 0:
print('Valid Loss: {} Accuracy: {} Epoch:{}'.format(valid_loss, valid_accuracy, epoch))
#将每次迭代的结果写入列表
loss_list.append(valid_loss)
acc_list.append(valid_accuracy)
print('Training Ended')
print('Average Loss: {} Average Accuracy: {}'.format(np.mean(loss_list), np.mean(acc_list)))
|
|