|

楼主 |
发表于 2024-10-22 00:38:00
|
显示全部楼层
- Fitting 5 folds for each of 40 candidates, totalling 200 fits
- /opt/conda/lib/python3.10/site-packages/sklearn/base.py:432: UserWarning: X has feature names, but GradientBoostingClassifier was fitted without feature names
- warnings.warn(
- ---------------------------------------------------------------------------
- NotFittedError Traceback (most recent call last)
- Cell In[25], line 12
- 10 gbc_grid = GridSearchCV(gbc, param_test, cv=5, refit=True, verbose=1)
- 11 gbc_grid.fit(X_train,y_train)
- ---> 12 train_score = gbc.score(X_train,y_train)
- 13 gbc_pre = gbc.predict(X_valid)
- 14 valid_score = accuracy_score(y_valid,gbc_pre)
- File /opt/conda/lib/python3.10/site-packages/sklearn/base.py:668, in ClassifierMixin.score(self, X, y, sample_weight)
- 643 """
- 644 Return the mean accuracy on the given test data and labels.
- 645
- (...)
- 664 Mean accuracy of ``self.predict(X)`` w.r.t. `y`.
- 665 """
- 666 from .metrics import accuracy_score
- --> 668 return accuracy_score(y, self.predict(X), sample_weight=sample_weight)
- File /opt/conda/lib/python3.10/site-packages/sklearn/ensemble/_gb.py:1308, in GradientBoostingClassifier.predict(self, X)
- 1293 def predict(self, X):
- 1294 """Predict class for X.
- 1295
- 1296 Parameters
- (...)
- 1306 The predicted values.
- 1307 """
- -> 1308 raw_predictions = self.decision_function(X)
- 1309 encoded_labels = self._loss._raw_prediction_to_decision(raw_predictions)
- 1310 return self.classes_.take(encoded_labels, axis=0)
- File /opt/conda/lib/python3.10/site-packages/sklearn/ensemble/_gb.py:1264, in GradientBoostingClassifier.decision_function(self, X)
- 1243 """Compute the decision function of ``X``.
- 1244
- 1245 Parameters
- (...)
- 1259 array of shape (n_samples,).
- 1260 """
- 1261 X = self._validate_data(
- 1262 X, dtype=DTYPE, order="C", accept_sparse="csr", reset=False
- 1263 )
- -> 1264 raw_predictions = self._raw_predict(X)
- 1265 if raw_predictions.shape[1] == 1:
- 1266 return raw_predictions.ravel()
- File /opt/conda/lib/python3.10/site-packages/sklearn/ensemble/_gb.py:687, in BaseGradientBoosting._raw_predict(self, X)
- 685 def _raw_predict(self, X):
- 686 """Return the sum of the trees raw predictions (+ init estimator)."""
- --> 687 raw_predictions = self._raw_predict_init(X)
- 688 predict_stages(self.estimators_, X, self.learning_rate, raw_predictions)
- 689 return raw_predictions
- File /opt/conda/lib/python3.10/site-packages/sklearn/ensemble/_gb.py:673, in BaseGradientBoosting._raw_predict_init(self, X)
- 671 def _raw_predict_init(self, X):
- 672 """Check input and compute raw predictions of the init estimator."""
- --> 673 self._check_initialized()
- 674 X = self.estimators_[0, 0]._validate_X_predict(X, check_input=True)
- 675 if self.init_ == "zero":
- File /opt/conda/lib/python3.10/site-packages/sklearn/ensemble/_gb.py:382, in BaseGradientBoosting._check_initialized(self)
- 380 def _check_initialized(self):
- 381 """Check that the estimator is initialized, raising an error if not."""
- --> 382 check_is_fitted(self)
- File /opt/conda/lib/python3.10/site-packages/sklearn/utils/validation.py:1390, in check_is_fitted(estimator, attributes, msg, all_or_any)
- 1385 fitted = [
- 1386 v for v in vars(estimator) if v.endswith("_") and not v.startswith("__")
- 1387 ]
- 1389 if not fitted:
- -> 1390 raise NotFittedError(msg % {"name": type(estimator).__name__})
- NotFittedError: This GradientBoostingClassifier instance is not fitted yet. Call 'fit' with appropriate arguments before using this estimator.
复制代码
训练模型时报错,代码如下
- #使用dnn模型
- import torch
- import torch.nn as nn
- from torch.utils import data
- from torch.utils.data import Dataset,DataLoader
- from torch import optim
- #定义神经网络模型
- dropout1, dropout2 = 0.3, 0.6
- class SimpleNN(nn.Module):
- def __init__(self):
- super(SimpleNN, self).__init__() # 这里需要用 SimpleNN
- self.dense = nn.Sequential(
- nn.Flatten(),
- nn.Linear(12, 128),
- nn.ReLU(),
- nn.Dropout(dropout1),
- nn.Linear(128, 256),
- nn.ReLU(),
- nn.Dropout(dropout2),
- nn.Linear(256, 1),
- )
-
- def forward(self, X):
- x = self.dense(X)
- output = torch.sigmoid(x)
- return output
- #定义数据集
- class SimpleDataset(Dataset):
- def __init__(self,X,y):
- # Initialize data, download, etc.
- self.X = X
- self.y = y
- # support indexing such that dataset[i] can be used to get i-th sample
- def __getitem__(self, index):
- return self.X[index], self.y[index]
- # we can call len(dataset) to return the size
- def __len__(self):
- return len(self.X)
-
- #初始化模型和优化器
- nn_model = SimpleNN()
- loss = nn.BCELoss()#定义损失函数
- optimizer = optim.Adam(nn_model.parameters(),lr=0.0001)#定义优化器
- #初始化列表
- acc_list = []
- loss_list = []
- #k折交叉验证选取训练集和验证集
- def get_k_fold_data(k, i, X, y):
- assert k > 1
- fold_size = X.shape[0] // k
- X_train, y_train = None, None
- for j in range(k):#slice不能直接用于Series和Dataframe,pandas中需要用.iloc或 .loc作为索引下标
- start = j * fold_size #起始索引
- end = (j + 1) * fold_size #结束索引
- if j == i:
- X_valid, y_valid = X.iloc[start:end], y.iloc[start:end]
- elif X_train is None:
- X_train, y_train = X.iloc[start:end], y.iloc[start:end]
- else:
- X_train = pd.concat([X_train, X.iloc[start:end]],ignore_index = True)
- y_train = pd.concat([y_train, y.iloc[start:end]],ignore_index = True)
- return X_train, y_train, X_valid, y_valid #该四个数据类型为dataframe
- # 开始训练
- batch_size = 99
- k = 5
- num_epochs = 1000
- for i in range(k):
- X_train,y_train, X_valid,y_valid = get_k_fold_data(k, i, X, y) #获取第k折的训练集和验证集
- print(f'FOLD {i}')
- print('--------------------------------')
-
- #创建数据集
- train_dataset = SimpleDataset(X_train,y_train)
- valid_dataset = SimpleDataset(X_valid,y_valid)
- # 获取一个数据迭代器
- train_iter = DataLoader(dataset=train_dataset,batch_size=batch_size,shuffle=True,num_workers=2)
- valid_iter = DataLoader(dataset=valid_dataset,batch_size=batch_size,shuffle=True,num_workers=2)
-
- #开始迭代
- for epoch in range(num_epochs):
- train_loss = 0
- num_right = 0
- for tensor_x, tensor_y in train_iter:#训练集执行梯度更新
- tensor_x = tensor_x.float()
- tensor_y = tensor_y.float().reshape(-1, 1)
- optimizer.zero_grad() #梯度清零
- pre_train = nn_model(tensor_x)
- train_l = loss(pre_train, tensor_y) #损失应避免与全局变量loss重名
- train_l.backward()#前向传播
- optimizer.step()#梯度下降
- train_loss += train_l.item() * len(tensor_x)
- result = [1 if out >= 0.5 else 0 for out in pre_train]
- num_right += np.sum(np.array(result) == tensor_y.numpy().reshape(-1))
- train_loss = train_loss / len(train_iter.dataset)
- train_accuracy = num_right / len(train_iter.dataset)
- if epoch % 200 == 0:
- print('Loss: {} Accuracy: {} Epoch:{}'.format(train_loss, train_accuracy, epoch))
-
- with torch.no_grad():
- valid_loss = 0
- num_right = 0
- for tensor_x, tensor_y in valid_iter:
- tensor_x = tensor_x.float()
- tensor_y = tensor_y.float().reshape(-1, 1)
- pre_valid = nn_model(tensor_x)
- valid_l = loss(pre_valid, tensor_y)
- valid_loss += valid_l.item() * len(tensor_x)
- result = [1 if out >= 0.5 else 0 for out in pre_valid]
- num_right += np.sum(np.array(result) == tensor_y.numpy().reshape(-1))
-
- valid_loss = valid_loss / len(valid_iter.dataset)
- valid_accuracy = num_right / len(valid_iter.dataset)
-
- if epoch % 200 == 0:
- print('Valid Loss: {} Accuracy: {} Epoch:{}'.format(valid_loss, valid_accuracy, epoch))
-
- #将每次迭代的结果写入列表
- loss_list.append(valid_loss)
- acc_list.append(valid_accuracy)
-
- print('Training Ended')
- print('Average Loss: {} Average Accuracy: {}'.format(np.mean(loss_list), np.mean(acc_list)))
复制代码 |
|