Terence888 发表于 2024-10-22 00:31:18

gbdt模型训练遇到报错

代码如下#Gradient Boost Decision Tree GBDT
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score

gbc = GradientBoostingClassifier()
param_test = {'n_estimators': ,
            'min_samples_split': ,
            'max_depth': }
gbc_grid = GridSearchCV(gbc, param_test, cv=5, refit=True, verbose=1)
gbc_grid.fit(X_train,y_train)
train_score = gbc.score(X_train,y_train)
gbc_pre = gbc.predict(X_valid)
valid_score = accuracy_score(y_valid,gbc_pre)
print(valid_score)

报错如下
Fitting 5 folds for each of 40 candidates, totalling 200 fits
/opt/conda/lib/python3.10/site-packages/sklearn/base.py:432: UserWarning: X has feature names, but GradientBoostingClassifier was fitted without feature names
warnings.warn(
---------------------------------------------------------------------------
NotFittedError                            Traceback (most recent call last)
Cell In, line 12
   10 gbc_grid = GridSearchCV(gbc, param_test, cv=5, refit=True, verbose=1)
   11 gbc_grid.fit(X_train,y_train)
---> 12 train_score = gbc.score(X_train,y_train)
   13 gbc_pre = gbc.predict(X_valid)
   14 valid_score = accuracy_score(y_valid,gbc_pre)

File /opt/conda/lib/python3.10/site-packages/sklearn/base.py:668, in ClassifierMixin.score(self, X, y, sample_weight)
    643 """
    644 Return the mean accuracy on the given test data and labels.
    645
   (...)
    664   Mean accuracy of ``self.predict(X)`` w.r.t. `y`.
    665 """
    666 from .metrics import accuracy_score
--> 668 return accuracy_score(y, self.predict(X), sample_weight=sample_weight)

File /opt/conda/lib/python3.10/site-packages/sklearn/ensemble/_gb.py:1308, in GradientBoostingClassifier.predict(self, X)
   1293 def predict(self, X):
   1294   """Predict class for X.
   1295
   1296   Parameters
   (...)
   1306         The predicted values.
   1307   """
-> 1308   raw_predictions = self.decision_function(X)
   1309   encoded_labels = self._loss._raw_prediction_to_decision(raw_predictions)
   1310   return self.classes_.take(encoded_labels, axis=0)

File /opt/conda/lib/python3.10/site-packages/sklearn/ensemble/_gb.py:1264, in GradientBoostingClassifier.decision_function(self, X)
   1243 """Compute the decision function of ``X``.
   1244
   1245 Parameters
   (...)
   1259   array of shape (n_samples,).
   1260 """
   1261 X = self._validate_data(
   1262   X, dtype=DTYPE, order="C", accept_sparse="csr", reset=False
   1263 )
-> 1264 raw_predictions = self._raw_predict(X)
   1265 if raw_predictions.shape == 1:
   1266   return raw_predictions.ravel()

File /opt/conda/lib/python3.10/site-packages/sklearn/ensemble/_gb.py:687, in BaseGradientBoosting._raw_predict(self, X)
    685 def _raw_predict(self, X):
    686   """Return the sum of the trees raw predictions (+ init estimator)."""
--> 687   raw_predictions = self._raw_predict_init(X)
    688   predict_stages(self.estimators_, X, self.learning_rate, raw_predictions)
    689   return raw_predictions

File /opt/conda/lib/python3.10/site-packages/sklearn/ensemble/_gb.py:673, in BaseGradientBoosting._raw_predict_init(self, X)
    671 def _raw_predict_init(self, X):
    672   """Check input and compute raw predictions of the init estimator."""
--> 673   self._check_initialized()
    674   X = self.estimators_._validate_X_predict(X, check_input=True)
    675   if self.init_ == "zero":

File /opt/conda/lib/python3.10/site-packages/sklearn/ensemble/_gb.py:382, in BaseGradientBoosting._check_initialized(self)
    380 def _check_initialized(self):
    381   """Check that the estimator is initialized, raising an error if not."""
--> 382   check_is_fitted(self)

File /opt/conda/lib/python3.10/site-packages/sklearn/utils/validation.py:1390, in check_is_fitted(estimator, attributes, msg, all_or_any)
   1385   fitted = [
   1386         v for v in vars(estimator) if v.endswith("_") and not v.startswith("__")
   1387   ]
   1389 if not fitted:
-> 1390   raise NotFittedError(msg % {"name": type(estimator).__name__})

NotFittedError: This GradientBoostingClassifier instance is not fitted yet. Call 'fit' with appropriate arguments before using this estimator.

FishC 发表于 2024-10-22 00:31:26

从报错信息来看,`GradientBoostingClassifier`实例`gbc`没有经过训练,所以无法进行评分或者预测。这是因为你在`GridSearchCV`中训练的是`gbc_grid`,而你试图用`gbc`进行评分和预测。

以下是你需要做的更改:

1. 使用`gbc_grid.best_estimator_`来获取经过训练的最佳模型实例。

2. 将`gbc.score(X_train, y_train)`改为`gbc_grid.score(X_train, y_train)`。

3. 将`gbc.predict(X_valid)`改为`gbc_grid.predict(X_valid)`。

修改后的代码应该如下所示:

param_test = {'n_estimators': ,
            'min_samples_split': ,
            'max_depth': }
gbc_grid = GridSearchCV(gbc, param_test, cv=5, refit=True, verbose=1)

# Train the model using X_train and y_train
gbc_grid.fit(X_train, y_train)

# Use the best estimator found
train_score = gbc_grid.score(X_train, y_train)
gbc_pre = gbc_grid.predict(X_valid)
valid_score = accuracy_score(y_valid, gbc_pre)

print(valid_score)

请注意,`param_test`中的参数列表应该包含具体的值供GridSearchCV搜索。另外,确保你传入的`X_train`和`X_valid`数据形式与模型的期望一致。

以上回复来自 FishC AI 小助理,如未能正确解答您的问题,请继续追问。
页: [1]
查看完整版本: gbdt模型训练遇到报错