Terence888 发表于 2024-10-26 21:26:08

xgboost模型训练时报错

#xgboost拟合训练数据,进行模型训练
import xgboost
xgb_reg = xgboost.XGBRegressor()
xgb_reg.fit(X_train, y_train)

#模型评估,对测试集进行预测
y_preds = xgb_reg.predict(X_test)

output = pd.DataFrame({'Id':test_data.Id,'SalePrice':y_preds})
output

报错---------------------------------------------------------------------------
XGBoostError                              Traceback (most recent call last)
Cell In, line 4
      2 import xgboost
      3 xgb_reg = xgboost.XGBRegressor()
----> 4 xgb_reg.fit(X_train, y_train)
      6 #模型评估,对测试集进行预测
      7 y_preds = xgb_reg.predict(X_test)

File /opt/conda/lib/python3.10/site-packages/xgboost/core.py:730, in require_keyword_args.<locals>.throw_if.<locals>.inner_f(*args, **kwargs)
    728 for k, arg in zip(sig.parameters, args):
    729   kwargs = arg
--> 730 return func(**kwargs)

File /opt/conda/lib/python3.10/site-packages/xgboost/sklearn.py:1055, in XGBModel.fit(self, X, y, sample_weight, base_margin, eval_set, eval_metric, early_stopping_rounds, verbose, xgb_model, sample_weight_eval_set, base_margin_eval_set, feature_weights, callbacks)
   1053 with config_context(verbosity=self.verbosity):
   1054   evals_result: TrainingCallback.EvalsLog = {}
-> 1055   train_dmatrix, evals = _wrap_evaluation_matrices(
   1056         missing=self.missing,
   1057         X=X,
   1058         y=y,
   1059         group=None,
   1060         qid=None,
   1061         sample_weight=sample_weight,
   1062         base_margin=base_margin,
   1063         feature_weights=feature_weights,
   1064         eval_set=eval_set,
   1065         sample_weight_eval_set=sample_weight_eval_set,
   1066         base_margin_eval_set=base_margin_eval_set,
   1067         eval_group=None,
   1068         eval_qid=None,
   1069         create_dmatrix=self._create_dmatrix,
   1070         enable_categorical=self.enable_categorical,
   1071         feature_types=self.feature_types,
   1072   )
   1073   params = self.get_xgb_params()
   1075   if callable(self.objective):

File /opt/conda/lib/python3.10/site-packages/xgboost/sklearn.py:521, in _wrap_evaluation_matrices(missing, X, y, group, qid, sample_weight, base_margin, feature_weights, eval_set, sample_weight_eval_set, base_margin_eval_set, eval_group, eval_qid, create_dmatrix, enable_categorical, feature_types)
    501 def _wrap_evaluation_matrices(
    502   missing: float,
    503   X: Any,
   (...)
    517   feature_types: Optional,
    518 ) -> Tuple]]:
    519   """Convert array_like evaluation matrices into DMatrix.Perform validation on the
    520   way."""
--> 521   train_dmatrix = create_dmatrix(
    522         data=X,
    523         label=y,
    524         group=group,
    525         qid=qid,
    526         weight=sample_weight,
    527         base_margin=base_margin,
    528         feature_weights=feature_weights,
    529         missing=missing,
    530         enable_categorical=enable_categorical,
    531         feature_types=feature_types,
    532         ref=None,
    533   )
    535   n_validation = 0 if eval_set is None else len(eval_set)
    537   def validate_or_none(meta: Optional, name: str) -> Sequence:

File /opt/conda/lib/python3.10/site-packages/xgboost/sklearn.py:958, in XGBModel._create_dmatrix(self, ref, **kwargs)
    956 if _can_use_qdm(self.tree_method) and self.booster != "gblinear":
    957   try:
--> 958         return QuantileDMatrix(
    959             **kwargs, ref=ref, nthread=self.n_jobs, max_bin=self.max_bin
    960         )
    961   except TypeError:# `QuantileDMatrix` supports lesser types than DMatrix
    962         pass

File /opt/conda/lib/python3.10/site-packages/xgboost/core.py:730, in require_keyword_args.<locals>.throw_if.<locals>.inner_f(*args, **kwargs)
    728 for k, arg in zip(sig.parameters, args):
    729   kwargs = arg
--> 730 return func(**kwargs)

File /opt/conda/lib/python3.10/site-packages/xgboost/core.py:1529, in QuantileDMatrix.__init__(self, data, label, weight, base_margin, missing, silent, feature_names, feature_types, nthread, max_bin, ref, group, qid, label_lower_bound, label_upper_bound, feature_weights, enable_categorical, data_split_mode)
   1509   if any(
   1510         info is not None
   1511         for info in (
   (...)
   1522         )
   1523   ):
   1524         raise ValueError(
   1525             "If data iterator is used as input, data like label should be "
   1526             "specified as batch argument."
   1527         )
-> 1529 self._init(
   1530   data,
   1531   ref=ref,
   1532   label=label,
   1533   weight=weight,
   1534   base_margin=base_margin,
   1535   group=group,
   1536   qid=qid,
   1537   label_lower_bound=label_lower_bound,
   1538   label_upper_bound=label_upper_bound,
   1539   feature_weights=feature_weights,
   1540   feature_names=feature_names,
   1541   feature_types=feature_types,
   1542   enable_categorical=enable_categorical,
   1543 )

File /opt/conda/lib/python3.10/site-packages/xgboost/core.py:1588, in QuantileDMatrix._init(self, data, ref, enable_categorical, **meta)
   1576 config = make_jcargs(
   1577   nthread=self.nthread, missing=self.missing, max_bin=self.max_bin
   1578 )
   1579 ret = _LIB.XGQuantileDMatrixCreateFromCallback(
   1580   None,
   1581   it.proxy.handle,
   (...)
   1586   ctypes.byref(handle),
   1587 )
-> 1588 it.reraise()
   1589 # delay check_call to throw intermediate exception first
   1590 _check_call(ret)

File /opt/conda/lib/python3.10/site-packages/xgboost/core.py:576, in DataIter.reraise(self)
    574 exc = self._exception
    575 self._exception = None
--> 576 raise exc

File /opt/conda/lib/python3.10/site-packages/xgboost/core.py:557, in DataIter._handle_exception(self, fn, dft_ret)
    554   return dft_ret
    556 try:
--> 557   return fn()
    558 except Exception as e:# pylint: disable=broad-except
    559   # Defer the exception in order to return 0 and stop the iteration.
    560   # Exception inside a ctype callback function has no effect except
    561   # for printing to stderr (doesn't stop the execution).
    562   tb = sys.exc_info()

File /opt/conda/lib/python3.10/site-packages/xgboost/core.py:641, in DataIter._next_wrapper.<locals>.<lambda>()
    638   self._data_ref = ref
    640 # pylint: disable=not-callable
--> 641 return self._handle_exception(lambda: self.next(input_data), 0)

File /opt/conda/lib/python3.10/site-packages/xgboost/data.py:1280, in SingleBatchInternalIter.next(self, input_data)
   1278   return 0
   1279 self.it += 1
-> 1280 input_data(**self.kwargs)
   1281 return 1

File /opt/conda/lib/python3.10/site-packages/xgboost/core.py:730, in require_keyword_args.<locals>.throw_if.<locals>.inner_f(*args, **kwargs)
    728 for k, arg in zip(sig.parameters, args):
    729   kwargs = arg
--> 730 return func(**kwargs)

File /opt/conda/lib/python3.10/site-packages/xgboost/core.py:633, in DataIter._next_wrapper.<locals>.input_data(data, feature_names, feature_types, **kwargs)
    631 self._temporary_data = (new, cat_codes, feature_names, feature_types)
    632 dispatch_proxy_set_data(self.proxy, new, cat_codes, self._allow_host)
--> 633 self.proxy.set_info(
    634   feature_names=feature_names,
    635   feature_types=feature_types,
    636   **kwargs,
    637 )
    638 self._data_ref = ref

File /opt/conda/lib/python3.10/site-packages/xgboost/core.py:730, in require_keyword_args.<locals>.throw_if.<locals>.inner_f(*args, **kwargs)
    728 for k, arg in zip(sig.parameters, args):
    729   kwargs = arg
--> 730 return func(**kwargs)

File /opt/conda/lib/python3.10/site-packages/xgboost/core.py:932, in DMatrix.set_info(self, label, weight, base_margin, group, qid, label_lower_bound, label_upper_bound, feature_names, feature_types, feature_weights)
    929 from .data import dispatch_meta_backend
    931 if label is not None:
--> 932   self.set_label(label)
    933 if weight is not None:
    934   self.set_weight(weight)

File /opt/conda/lib/python3.10/site-packages/xgboost/core.py:1070, in DMatrix.set_label(self, label)
   1061 """Set label of dmatrix
   1062
   1063 Parameters
   (...)
   1066   The label information to be set into DMatrix
   1067 """
   1068 from .data import dispatch_meta_backend
-> 1070 dispatch_meta_backend(self, label, "label", "float")

File /opt/conda/lib/python3.10/site-packages/xgboost/data.py:1225, in dispatch_meta_backend(matrix, data, name, dtype)
   1223   return
   1224 if _is_pandas_series(data):
-> 1225   _meta_from_pandas_series(data, name, dtype, handle)
   1226   return
   1227 if _is_dlpack(data):

File /opt/conda/lib/python3.10/site-packages/xgboost/data.py:545, in _meta_from_pandas_series(data, name, dtype, handle)
    543   data = data.to_dense()# type: ignore
    544 assert len(data.shape) == 1 or data.shape == 0 or data.shape == 1
--> 545 _meta_from_numpy(data, name, dtype, handle)

File /opt/conda/lib/python3.10/site-packages/xgboost/data.py:1159, in _meta_from_numpy(data, field, dtype, handle)
   1157   raise ValueError("Masked array is not supported.")
   1158 interface_str = _array_interface(data)
-> 1159 _check_call(_LIB.XGDMatrixSetInfoFromInterface(handle, c_str(field), interface_str))

File /opt/conda/lib/python3.10/site-packages/xgboost/core.py:282, in _check_call(ret)
    271 """Check the return value of C API call
    272
    273 This function will raise exception when error occurs.
   (...)
    279   return value from API calls
    280 """
    281 if ret != 0:
--> 282   raise XGBoostError(py_str(_LIB.XGBGetLastError()))

XGBoostError: /workspace/src/data/data.cc:507: Check failed: valid: Label contains NaN, infinity or a value too large.
Stack trace:
(0) /opt/conda/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(+0x3588ca)
(1) /opt/conda/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(+0x38a21d)
(2) /opt/conda/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(+0x38ab51)
(3) /opt/conda/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(XGDMatrixSetInfoFromInterface+0xb0)
(4) /opt/conda/lib/python3.10/lib-dynload/../../libffi.so.8(+0x6a4a)
(5) /opt/conda/lib/python3.10/lib-dynload/../../libffi.so.8(+0x5fea)
(6) /opt/conda/lib/python3.10/lib-dynload/_ctypes.cpython-310-x86_64-linux-gnu.so(+0x12461)
(7) /opt/conda/lib/python3.10/lib-dynload/_ctypes.cpython-310-x86_64-linux-gnu.so(+0x86eb)
(8) /opt/conda/bin/python3.10(_PyObject_MakeTpCall+0x26b)

FishC 发表于 2024-10-26 21:32:57

根据错误信息,`XGBoostError: Label contains NaN, infinity or a value too large.` 你可以推断出 `y_train` 包含无效值(`NaN`、无穷大或过大的值)。

解决方法如下:

1. **检查和处理NaN值**:
   确保 `y_train` 中没有缺失值。
   import numpy as np
   y_train = np.where(np.isnan(y_train), 0, y_train)# 用0替换NaN值,可以根据实际情况选择合适的替换值

2. **检查和处理无穷大值**:
   确保 `y_train` 中没有无穷大的值。
   y_train = np.where(np.isinf(y_train), 0, y_train)# 用0替换无穷大值,可以根据实际情况选择合适的替换值

3. **检查和处理异常大的值**:
   确保 `y_train` 中没有异常大的值。
   y_train = np.where(y_train > 1e10, 1e10, y_train)# 用1e10限制最大值,可以根据实际阈值选择

总之,执行以下综合处理以确保数据有效:
import numpy as np

# 处理 NaN 值
y_train = np.where(np.isnan(y_train), 0, y_train)

# 处理无穷大值
y_train = np.where(np.isinf(y_train), 0, y_train)

# 处理异常大的值
y_train = np.where(y_train > 1e10, 1e10, y_train)

# 然后再进行模型训练
xgb_reg.fit(X_train, y_train)

确保 `X_train` 和 `y_train` 的长度一致,并确认 `X_train` 中也没有无效数据。如果问题仍然存在,详细检查数据处理流程,确保所有步骤中数据都没有被意外修改。

以上回复来自 FishC AI 小助理,如未能正确解答您的问题,请继续追问。
页: [1]
查看完整版本: xgboost模型训练时报错