鱼C论坛

 找回密码
 立即注册
查看: 92|回复: 1

xgboost模型训练时报错

[复制链接]
发表于 2024-10-26 21:26:08 | 显示全部楼层 |阅读模式

马上注册,结交更多好友,享用更多功能^_^

您需要 登录 才可以下载或查看,没有账号?立即注册

x
#xgboost拟合训练数据,进行模型训练
import xgboost
xgb_reg = xgboost.XGBRegressor()
xgb_reg.fit(X_train, y_train)

#模型评估,对测试集进行预测
y_preds = xgb_reg.predict(X_test)

output = pd.DataFrame({'Id':test_data.Id,'SalePrice':y_preds})
output

报错
---------------------------------------------------------------------------
XGBoostError                              Traceback (most recent call last)
Cell In[40], line 4
      2 import xgboost
      3 xgb_reg = xgboost.XGBRegressor()
----> 4 xgb_reg.fit(X_train, y_train)
      6 #模型评估,对测试集进行预测
      7 y_preds = xgb_reg.predict(X_test)

File /opt/conda/lib/python3.10/site-packages/xgboost/core.py:730, in require_keyword_args.<locals>.throw_if.<locals>.inner_f(*args, **kwargs)
    728 for k, arg in zip(sig.parameters, args):
    729     kwargs[k] = arg
--> 730 return func(**kwargs)

File /opt/conda/lib/python3.10/site-packages/xgboost/sklearn.py:1055, in XGBModel.fit(self, X, y, sample_weight, base_margin, eval_set, eval_metric, early_stopping_rounds, verbose, xgb_model, sample_weight_eval_set, base_margin_eval_set, feature_weights, callbacks)
   1053 with config_context(verbosity=self.verbosity):
   1054     evals_result: TrainingCallback.EvalsLog = {}
-> 1055     train_dmatrix, evals = _wrap_evaluation_matrices(
   1056         missing=self.missing,
   1057         X=X,
   1058         y=y,
   1059         group=None,
   1060         qid=None,
   1061         sample_weight=sample_weight,
   1062         base_margin=base_margin,
   1063         feature_weights=feature_weights,
   1064         eval_set=eval_set,
   1065         sample_weight_eval_set=sample_weight_eval_set,
   1066         base_margin_eval_set=base_margin_eval_set,
   1067         eval_group=None,
   1068         eval_qid=None,
   1069         create_dmatrix=self._create_dmatrix,
   1070         enable_categorical=self.enable_categorical,
   1071         feature_types=self.feature_types,
   1072     )
   1073     params = self.get_xgb_params()
   1075     if callable(self.objective):

File /opt/conda/lib/python3.10/site-packages/xgboost/sklearn.py:521, in _wrap_evaluation_matrices(missing, X, y, group, qid, sample_weight, base_margin, feature_weights, eval_set, sample_weight_eval_set, base_margin_eval_set, eval_group, eval_qid, create_dmatrix, enable_categorical, feature_types)
    501 def _wrap_evaluation_matrices(
    502     missing: float,
    503     X: Any,
   (...)
    517     feature_types: Optional[FeatureTypes],
    518 ) -> Tuple[Any, List[Tuple[Any, str]]]:
    519     """Convert array_like evaluation matrices into DMatrix.  Perform validation on the
    520     way."""
--> 521     train_dmatrix = create_dmatrix(
    522         data=X,
    523         label=y,
    524         group=group,
    525         qid=qid,
    526         weight=sample_weight,
    527         base_margin=base_margin,
    528         feature_weights=feature_weights,
    529         missing=missing,
    530         enable_categorical=enable_categorical,
    531         feature_types=feature_types,
    532         ref=None,
    533     )
    535     n_validation = 0 if eval_set is None else len(eval_set)
    537     def validate_or_none(meta: Optional[Sequence], name: str) -> Sequence:

File /opt/conda/lib/python3.10/site-packages/xgboost/sklearn.py:958, in XGBModel._create_dmatrix(self, ref, **kwargs)
    956 if _can_use_qdm(self.tree_method) and self.booster != "gblinear":
    957     try:
--> 958         return QuantileDMatrix(
    959             **kwargs, ref=ref, nthread=self.n_jobs, max_bin=self.max_bin
    960         )
    961     except TypeError:  # `QuantileDMatrix` supports lesser types than DMatrix
    962         pass

File /opt/conda/lib/python3.10/site-packages/xgboost/core.py:730, in require_keyword_args.<locals>.throw_if.<locals>.inner_f(*args, **kwargs)
    728 for k, arg in zip(sig.parameters, args):
    729     kwargs[k] = arg
--> 730 return func(**kwargs)

File /opt/conda/lib/python3.10/site-packages/xgboost/core.py:1529, in QuantileDMatrix.__init__(self, data, label, weight, base_margin, missing, silent, feature_names, feature_types, nthread, max_bin, ref, group, qid, label_lower_bound, label_upper_bound, feature_weights, enable_categorical, data_split_mode)
   1509     if any(
   1510         info is not None
   1511         for info in (
   (...)
   1522         )
   1523     ):
   1524         raise ValueError(
   1525             "If data iterator is used as input, data like label should be "
   1526             "specified as batch argument."
   1527         )
-> 1529 self._init(
   1530     data,
   1531     ref=ref,
   1532     label=label,
   1533     weight=weight,
   1534     base_margin=base_margin,
   1535     group=group,
   1536     qid=qid,
   1537     label_lower_bound=label_lower_bound,
   1538     label_upper_bound=label_upper_bound,
   1539     feature_weights=feature_weights,
   1540     feature_names=feature_names,
   1541     feature_types=feature_types,
   1542     enable_categorical=enable_categorical,
   1543 )

File /opt/conda/lib/python3.10/site-packages/xgboost/core.py:1588, in QuantileDMatrix._init(self, data, ref, enable_categorical, **meta)
   1576 config = make_jcargs(
   1577     nthread=self.nthread, missing=self.missing, max_bin=self.max_bin
   1578 )
   1579 ret = _LIB.XGQuantileDMatrixCreateFromCallback(
   1580     None,
   1581     it.proxy.handle,
   (...)
   1586     ctypes.byref(handle),
   1587 )
-> 1588 it.reraise()
   1589 # delay check_call to throw intermediate exception first
   1590 _check_call(ret)

File /opt/conda/lib/python3.10/site-packages/xgboost/core.py:576, in DataIter.reraise(self)
    574 exc = self._exception
    575 self._exception = None
--> 576 raise exc

File /opt/conda/lib/python3.10/site-packages/xgboost/core.py:557, in DataIter._handle_exception(self, fn, dft_ret)
    554     return dft_ret
    556 try:
--> 557     return fn()
    558 except Exception as e:  # pylint: disable=broad-except
    559     # Defer the exception in order to return 0 and stop the iteration.
    560     # Exception inside a ctype callback function has no effect except
    561     # for printing to stderr (doesn't stop the execution).
    562     tb = sys.exc_info()[2]

File /opt/conda/lib/python3.10/site-packages/xgboost/core.py:641, in DataIter._next_wrapper.<locals>.<lambda>()
    638     self._data_ref = ref
    640 # pylint: disable=not-callable
--> 641 return self._handle_exception(lambda: self.next(input_data), 0)

File /opt/conda/lib/python3.10/site-packages/xgboost/data.py:1280, in SingleBatchInternalIter.next(self, input_data)
   1278     return 0
   1279 self.it += 1
-> 1280 input_data(**self.kwargs)
   1281 return 1

File /opt/conda/lib/python3.10/site-packages/xgboost/core.py:730, in require_keyword_args.<locals>.throw_if.<locals>.inner_f(*args, **kwargs)
    728 for k, arg in zip(sig.parameters, args):
    729     kwargs[k] = arg
--> 730 return func(**kwargs)

File /opt/conda/lib/python3.10/site-packages/xgboost/core.py:633, in DataIter._next_wrapper.<locals>.input_data(data, feature_names, feature_types, **kwargs)
    631 self._temporary_data = (new, cat_codes, feature_names, feature_types)
    632 dispatch_proxy_set_data(self.proxy, new, cat_codes, self._allow_host)
--> 633 self.proxy.set_info(
    634     feature_names=feature_names,
    635     feature_types=feature_types,
    636     **kwargs,
    637 )
    638 self._data_ref = ref

File /opt/conda/lib/python3.10/site-packages/xgboost/core.py:730, in require_keyword_args.<locals>.throw_if.<locals>.inner_f(*args, **kwargs)
    728 for k, arg in zip(sig.parameters, args):
    729     kwargs[k] = arg
--> 730 return func(**kwargs)

File /opt/conda/lib/python3.10/site-packages/xgboost/core.py:932, in DMatrix.set_info(self, label, weight, base_margin, group, qid, label_lower_bound, label_upper_bound, feature_names, feature_types, feature_weights)
    929 from .data import dispatch_meta_backend
    931 if label is not None:
--> 932     self.set_label(label)
    933 if weight is not None:
    934     self.set_weight(weight)

File /opt/conda/lib/python3.10/site-packages/xgboost/core.py:1070, in DMatrix.set_label(self, label)
   1061 """Set label of dmatrix
   1062 
   1063 Parameters
   (...)
   1066     The label information to be set into DMatrix
   1067 """
   1068 from .data import dispatch_meta_backend
-> 1070 dispatch_meta_backend(self, label, "label", "float")

File /opt/conda/lib/python3.10/site-packages/xgboost/data.py:1225, in dispatch_meta_backend(matrix, data, name, dtype)
   1223     return
   1224 if _is_pandas_series(data):
-> 1225     _meta_from_pandas_series(data, name, dtype, handle)
   1226     return
   1227 if _is_dlpack(data):

File /opt/conda/lib/python3.10/site-packages/xgboost/data.py:545, in _meta_from_pandas_series(data, name, dtype, handle)
    543     data = data.to_dense()  # type: ignore
    544 assert len(data.shape) == 1 or data.shape[1] == 0 or data.shape[1] == 1
--> 545 _meta_from_numpy(data, name, dtype, handle)

File /opt/conda/lib/python3.10/site-packages/xgboost/data.py:1159, in _meta_from_numpy(data, field, dtype, handle)
   1157     raise ValueError("Masked array is not supported.")
   1158 interface_str = _array_interface(data)
-> 1159 _check_call(_LIB.XGDMatrixSetInfoFromInterface(handle, c_str(field), interface_str))

File /opt/conda/lib/python3.10/site-packages/xgboost/core.py:282, in _check_call(ret)
    271 """Check the return value of C API call
    272 
    273 This function will raise exception when error occurs.
   (...)
    279     return value from API calls
    280 """
    281 if ret != 0:
--> 282     raise XGBoostError(py_str(_LIB.XGBGetLastError()))

XGBoostError: [13:24:13] /workspace/src/data/data.cc:507: Check failed: valid: Label contains NaN, infinity or a value too large.
Stack trace:
  [bt] (0) /opt/conda/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(+0x3588ca) [0x7a256777a8ca]
  [bt] (1) /opt/conda/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(+0x38a21d) [0x7a25677ac21d]
  [bt] (2) /opt/conda/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(+0x38ab51) [0x7a25677acb51]
  [bt] (3) /opt/conda/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(XGDMatrixSetInfoFromInterface+0xb0) [0x7a25675803a0]
  [bt] (4) /opt/conda/lib/python3.10/lib-dynload/../../libffi.so.8(+0x6a4a) [0x7a260b0e4a4a]
  [bt] (5) /opt/conda/lib/python3.10/lib-dynload/../../libffi.so.8(+0x5fea) [0x7a260b0e3fea]
  [bt] (6) /opt/conda/lib/python3.10/lib-dynload/_ctypes.cpython-310-x86_64-linux-gnu.so(+0x12461) [0x7a260b0fc461]
  [bt] (7) /opt/conda/lib/python3.10/lib-dynload/_ctypes.cpython-310-x86_64-linux-gnu.so(+0x86eb) [0x7a260b0f26eb]
  [bt] (8) /opt/conda/bin/python3.10(_PyObject_MakeTpCall+0x26b) [0x5cd1861cfa6b]
想知道小甲鱼最近在做啥?请访问 -> ilovefishc.com
回复

使用道具 举报

发表于 2024-10-26 21:32:57 | 显示全部楼层
根据错误信息,`XGBoostError: Label contains NaN, infinity or a value too large.` 你可以推断出 `y_train` 包含无效值(`NaN`、无穷大或过大的值)。

解决方法如下:

1. **检查和处理NaN值**:
   确保 `y_train` 中没有缺失值。
import numpy as np
   y_train = np.where(np.isnan(y_train), 0, y_train)  # 用0替换NaN值,可以根据实际情况选择合适的替换值

2. **检查和处理无穷大值**:
   确保 `y_train` 中没有无穷大的值。
y_train = np.where(np.isinf(y_train), 0, y_train)  # 用0替换无穷大值,可以根据实际情况选择合适的替换值

3. **检查和处理异常大的值**:
   确保 `y_train` 中没有异常大的值。
y_train = np.where(y_train > 1e10, 1e10, y_train)  # 用1e10限制最大值,可以根据实际阈值选择

总之,执行以下综合处理以确保数据有效:
import numpy as np

# 处理 NaN 值
y_train = np.where(np.isnan(y_train), 0, y_train)

# 处理无穷大值
y_train = np.where(np.isinf(y_train), 0, y_train)

# 处理异常大的值
y_train = np.where(y_train > 1e10, 1e10, y_train)

# 然后再进行模型训练
xgb_reg.fit(X_train, y_train)

确保 `X_train` 和 `y_train` 的长度一致,并确认 `X_train` 中也没有无效数据。如果问题仍然存在,详细检查数据处理流程,确保所有步骤中数据都没有被意外修改。

以上回复来自 FishC AI 小助理,如未能正确解答您的问题,请继续追问。
想知道小甲鱼最近在做啥?请访问 -> ilovefishc.com
回复 支持 反对

使用道具 举报

您需要登录后才可以回帖 登录 | 立即注册

本版积分规则

小黑屋|手机版|Archiver|鱼C工作室 ( 粤ICP备18085999号-1 | 粤公网安备 44051102000585号)

GMT+8, 2024-11-21 19:48

Powered by Discuz! X3.4

© 2001-2023 Discuz! Team.

快速回复 返回顶部 返回列表