gbdt模型训练遇到报错
代码如下#Gradient Boost Decision Tree GBDTfrom sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score
gbc = GradientBoostingClassifier()
param_test = {'n_estimators': ,
'min_samples_split': ,
'max_depth': }
gbc_grid = GridSearchCV(gbc, param_test, cv=5, refit=True, verbose=1)
gbc_grid.fit(X_train,y_train)
train_score = gbc.score(X_train,y_train)
gbc_pre = gbc.predict(X_valid)
valid_score = accuracy_score(y_valid,gbc_pre)
print(valid_score)
报错如下
Fitting 5 folds for each of 40 candidates, totalling 200 fits
/opt/conda/lib/python3.10/site-packages/sklearn/base.py:432: UserWarning: X has feature names, but GradientBoostingClassifier was fitted without feature names
warnings.warn(
---------------------------------------------------------------------------
NotFittedError Traceback (most recent call last)
Cell In, line 12
10 gbc_grid = GridSearchCV(gbc, param_test, cv=5, refit=True, verbose=1)
11 gbc_grid.fit(X_train,y_train)
---> 12 train_score = gbc.score(X_train,y_train)
13 gbc_pre = gbc.predict(X_valid)
14 valid_score = accuracy_score(y_valid,gbc_pre)
File /opt/conda/lib/python3.10/site-packages/sklearn/base.py:668, in ClassifierMixin.score(self, X, y, sample_weight)
643 """
644 Return the mean accuracy on the given test data and labels.
645
(...)
664 Mean accuracy of ``self.predict(X)`` w.r.t. `y`.
665 """
666 from .metrics import accuracy_score
--> 668 return accuracy_score(y, self.predict(X), sample_weight=sample_weight)
File /opt/conda/lib/python3.10/site-packages/sklearn/ensemble/_gb.py:1308, in GradientBoostingClassifier.predict(self, X)
1293 def predict(self, X):
1294 """Predict class for X.
1295
1296 Parameters
(...)
1306 The predicted values.
1307 """
-> 1308 raw_predictions = self.decision_function(X)
1309 encoded_labels = self._loss._raw_prediction_to_decision(raw_predictions)
1310 return self.classes_.take(encoded_labels, axis=0)
File /opt/conda/lib/python3.10/site-packages/sklearn/ensemble/_gb.py:1264, in GradientBoostingClassifier.decision_function(self, X)
1243 """Compute the decision function of ``X``.
1244
1245 Parameters
(...)
1259 array of shape (n_samples,).
1260 """
1261 X = self._validate_data(
1262 X, dtype=DTYPE, order="C", accept_sparse="csr", reset=False
1263 )
-> 1264 raw_predictions = self._raw_predict(X)
1265 if raw_predictions.shape == 1:
1266 return raw_predictions.ravel()
File /opt/conda/lib/python3.10/site-packages/sklearn/ensemble/_gb.py:687, in BaseGradientBoosting._raw_predict(self, X)
685 def _raw_predict(self, X):
686 """Return the sum of the trees raw predictions (+ init estimator)."""
--> 687 raw_predictions = self._raw_predict_init(X)
688 predict_stages(self.estimators_, X, self.learning_rate, raw_predictions)
689 return raw_predictions
File /opt/conda/lib/python3.10/site-packages/sklearn/ensemble/_gb.py:673, in BaseGradientBoosting._raw_predict_init(self, X)
671 def _raw_predict_init(self, X):
672 """Check input and compute raw predictions of the init estimator."""
--> 673 self._check_initialized()
674 X = self.estimators_._validate_X_predict(X, check_input=True)
675 if self.init_ == "zero":
File /opt/conda/lib/python3.10/site-packages/sklearn/ensemble/_gb.py:382, in BaseGradientBoosting._check_initialized(self)
380 def _check_initialized(self):
381 """Check that the estimator is initialized, raising an error if not."""
--> 382 check_is_fitted(self)
File /opt/conda/lib/python3.10/site-packages/sklearn/utils/validation.py:1390, in check_is_fitted(estimator, attributes, msg, all_or_any)
1385 fitted = [
1386 v for v in vars(estimator) if v.endswith("_") and not v.startswith("__")
1387 ]
1389 if not fitted:
-> 1390 raise NotFittedError(msg % {"name": type(estimator).__name__})
NotFittedError: This GradientBoostingClassifier instance is not fitted yet. Call 'fit' with appropriate arguments before using this estimator. 从报错信息来看,`GradientBoostingClassifier`实例`gbc`没有经过训练,所以无法进行评分或者预测。这是因为你在`GridSearchCV`中训练的是`gbc_grid`,而你试图用`gbc`进行评分和预测。
以下是你需要做的更改:
1. 使用`gbc_grid.best_estimator_`来获取经过训练的最佳模型实例。
2. 将`gbc.score(X_train, y_train)`改为`gbc_grid.score(X_train, y_train)`。
3. 将`gbc.predict(X_valid)`改为`gbc_grid.predict(X_valid)`。
修改后的代码应该如下所示:
param_test = {'n_estimators': ,
'min_samples_split': ,
'max_depth': }
gbc_grid = GridSearchCV(gbc, param_test, cv=5, refit=True, verbose=1)
# Train the model using X_train and y_train
gbc_grid.fit(X_train, y_train)
# Use the best estimator found
train_score = gbc_grid.score(X_train, y_train)
gbc_pre = gbc_grid.predict(X_valid)
valid_score = accuracy_score(y_valid, gbc_pre)
print(valid_score)
请注意,`param_test`中的参数列表应该包含具体的值供GridSearchCV搜索。另外,确保你传入的`X_train`和`X_valid`数据形式与模型的期望一致。
以上回复来自 FishC AI 小助理,如未能正确解答您的问题,请继续追问。
页:
[1]