鱼C论坛

 找回密码
 立即注册
查看: 567|回复: 1

xgboost模型训练时报错

[复制链接]
发表于 2024-10-26 21:26:08 | 显示全部楼层 |阅读模式

马上注册,结交更多好友,享用更多功能^_^

您需要 登录 才可以下载或查看,没有账号?立即注册

x
  1. #xgboost拟合训练数据,进行模型训练
  2. import xgboost
  3. xgb_reg = xgboost.XGBRegressor()
  4. xgb_reg.fit(X_train, y_train)

  5. #模型评估,对测试集进行预测
  6. y_preds = xgb_reg.predict(X_test)

  7. output = pd.DataFrame({'Id':test_data.Id,'SalePrice':y_preds})
  8. output
复制代码


报错
  1. ---------------------------------------------------------------------------
  2. XGBoostError                              Traceback (most recent call last)
  3. Cell In[40], line 4
  4.       2 import xgboost
  5.       3 xgb_reg = xgboost.XGBRegressor()
  6. ----> 4 xgb_reg.fit(X_train, y_train)
  7.       6 #模型评估,对测试集进行预测
  8.       7 y_preds = xgb_reg.predict(X_test)

  9. File /opt/conda/lib/python3.10/site-packages/xgboost/core.py:730, in require_keyword_args.<locals>.throw_if.<locals>.inner_f(*args, **kwargs)
  10.     728 for k, arg in zip(sig.parameters, args):
  11.     729     kwargs[k] = arg
  12. --> 730 return func(**kwargs)

  13. File /opt/conda/lib/python3.10/site-packages/xgboost/sklearn.py:1055, in XGBModel.fit(self, X, y, sample_weight, base_margin, eval_set, eval_metric, early_stopping_rounds, verbose, xgb_model, sample_weight_eval_set, base_margin_eval_set, feature_weights, callbacks)
  14.    1053 with config_context(verbosity=self.verbosity):
  15.    1054     evals_result: TrainingCallback.EvalsLog = {}
  16. -> 1055     train_dmatrix, evals = _wrap_evaluation_matrices(
  17.    1056         missing=self.missing,
  18.    1057         X=X,
  19.    1058         y=y,
  20.    1059         group=None,
  21.    1060         qid=None,
  22.    1061         sample_weight=sample_weight,
  23.    1062         base_margin=base_margin,
  24.    1063         feature_weights=feature_weights,
  25.    1064         eval_set=eval_set,
  26.    1065         sample_weight_eval_set=sample_weight_eval_set,
  27.    1066         base_margin_eval_set=base_margin_eval_set,
  28.    1067         eval_group=None,
  29.    1068         eval_qid=None,
  30.    1069         create_dmatrix=self._create_dmatrix,
  31.    1070         enable_categorical=self.enable_categorical,
  32.    1071         feature_types=self.feature_types,
  33.    1072     )
  34.    1073     params = self.get_xgb_params()
  35.    1075     if callable(self.objective):

  36. File /opt/conda/lib/python3.10/site-packages/xgboost/sklearn.py:521, in _wrap_evaluation_matrices(missing, X, y, group, qid, sample_weight, base_margin, feature_weights, eval_set, sample_weight_eval_set, base_margin_eval_set, eval_group, eval_qid, create_dmatrix, enable_categorical, feature_types)
  37.     501 def _wrap_evaluation_matrices(
  38.     502     missing: float,
  39.     503     X: Any,
  40.    (...)
  41.     517     feature_types: Optional[FeatureTypes],
  42.     518 ) -> Tuple[Any, List[Tuple[Any, str]]]:
  43.     519     """Convert array_like evaluation matrices into DMatrix.  Perform validation on the
  44.     520     way."""
  45. --> 521     train_dmatrix = create_dmatrix(
  46.     522         data=X,
  47.     523         label=y,
  48.     524         group=group,
  49.     525         qid=qid,
  50.     526         weight=sample_weight,
  51.     527         base_margin=base_margin,
  52.     528         feature_weights=feature_weights,
  53.     529         missing=missing,
  54.     530         enable_categorical=enable_categorical,
  55.     531         feature_types=feature_types,
  56.     532         ref=None,
  57.     533     )
  58.     535     n_validation = 0 if eval_set is None else len(eval_set)
  59.     537     def validate_or_none(meta: Optional[Sequence], name: str) -> Sequence:

  60. File /opt/conda/lib/python3.10/site-packages/xgboost/sklearn.py:958, in XGBModel._create_dmatrix(self, ref, **kwargs)
  61.     956 if _can_use_qdm(self.tree_method) and self.booster != "gblinear":
  62.     957     try:
  63. --> 958         return QuantileDMatrix(
  64.     959             **kwargs, ref=ref, nthread=self.n_jobs, max_bin=self.max_bin
  65.     960         )
  66.     961     except TypeError:  # `QuantileDMatrix` supports lesser types than DMatrix
  67.     962         pass

  68. File /opt/conda/lib/python3.10/site-packages/xgboost/core.py:730, in require_keyword_args.<locals>.throw_if.<locals>.inner_f(*args, **kwargs)
  69.     728 for k, arg in zip(sig.parameters, args):
  70.     729     kwargs[k] = arg
  71. --> 730 return func(**kwargs)

  72. File /opt/conda/lib/python3.10/site-packages/xgboost/core.py:1529, in QuantileDMatrix.__init__(self, data, label, weight, base_margin, missing, silent, feature_names, feature_types, nthread, max_bin, ref, group, qid, label_lower_bound, label_upper_bound, feature_weights, enable_categorical, data_split_mode)
  73.    1509     if any(
  74.    1510         info is not None
  75.    1511         for info in (
  76.    (...)
  77.    1522         )
  78.    1523     ):
  79.    1524         raise ValueError(
  80.    1525             "If data iterator is used as input, data like label should be "
  81.    1526             "specified as batch argument."
  82.    1527         )
  83. -> 1529 self._init(
  84.    1530     data,
  85.    1531     ref=ref,
  86.    1532     label=label,
  87.    1533     weight=weight,
  88.    1534     base_margin=base_margin,
  89.    1535     group=group,
  90.    1536     qid=qid,
  91.    1537     label_lower_bound=label_lower_bound,
  92.    1538     label_upper_bound=label_upper_bound,
  93.    1539     feature_weights=feature_weights,
  94.    1540     feature_names=feature_names,
  95.    1541     feature_types=feature_types,
  96.    1542     enable_categorical=enable_categorical,
  97.    1543 )

  98. File /opt/conda/lib/python3.10/site-packages/xgboost/core.py:1588, in QuantileDMatrix._init(self, data, ref, enable_categorical, **meta)
  99.    1576 config = make_jcargs(
  100.    1577     nthread=self.nthread, missing=self.missing, max_bin=self.max_bin
  101.    1578 )
  102.    1579 ret = _LIB.XGQuantileDMatrixCreateFromCallback(
  103.    1580     None,
  104.    1581     it.proxy.handle,
  105.    (...)
  106.    1586     ctypes.byref(handle),
  107.    1587 )
  108. -> 1588 it.reraise()
  109.    1589 # delay check_call to throw intermediate exception first
  110.    1590 _check_call(ret)

  111. File /opt/conda/lib/python3.10/site-packages/xgboost/core.py:576, in DataIter.reraise(self)
  112.     574 exc = self._exception
  113.     575 self._exception = None
  114. --> 576 raise exc

  115. File /opt/conda/lib/python3.10/site-packages/xgboost/core.py:557, in DataIter._handle_exception(self, fn, dft_ret)
  116.     554     return dft_ret
  117.     556 try:
  118. --> 557     return fn()
  119.     558 except Exception as e:  # pylint: disable=broad-except
  120.     559     # Defer the exception in order to return 0 and stop the iteration.
  121.     560     # Exception inside a ctype callback function has no effect except
  122.     561     # for printing to stderr (doesn't stop the execution).
  123.     562     tb = sys.exc_info()[2]

  124. File /opt/conda/lib/python3.10/site-packages/xgboost/core.py:641, in DataIter._next_wrapper.<locals>.<lambda>()
  125.     638     self._data_ref = ref
  126.     640 # pylint: disable=not-callable
  127. --> 641 return self._handle_exception(lambda: self.next(input_data), 0)

  128. File /opt/conda/lib/python3.10/site-packages/xgboost/data.py:1280, in SingleBatchInternalIter.next(self, input_data)
  129.    1278     return 0
  130.    1279 self.it += 1
  131. -> 1280 input_data(**self.kwargs)
  132.    1281 return 1

  133. File /opt/conda/lib/python3.10/site-packages/xgboost/core.py:730, in require_keyword_args.<locals>.throw_if.<locals>.inner_f(*args, **kwargs)
  134.     728 for k, arg in zip(sig.parameters, args):
  135.     729     kwargs[k] = arg
  136. --> 730 return func(**kwargs)

  137. File /opt/conda/lib/python3.10/site-packages/xgboost/core.py:633, in DataIter._next_wrapper.<locals>.input_data(data, feature_names, feature_types, **kwargs)
  138.     631 self._temporary_data = (new, cat_codes, feature_names, feature_types)
  139.     632 dispatch_proxy_set_data(self.proxy, new, cat_codes, self._allow_host)
  140. --> 633 self.proxy.set_info(
  141.     634     feature_names=feature_names,
  142.     635     feature_types=feature_types,
  143.     636     **kwargs,
  144.     637 )
  145.     638 self._data_ref = ref

  146. File /opt/conda/lib/python3.10/site-packages/xgboost/core.py:730, in require_keyword_args.<locals>.throw_if.<locals>.inner_f(*args, **kwargs)
  147.     728 for k, arg in zip(sig.parameters, args):
  148.     729     kwargs[k] = arg
  149. --> 730 return func(**kwargs)

  150. File /opt/conda/lib/python3.10/site-packages/xgboost/core.py:932, in DMatrix.set_info(self, label, weight, base_margin, group, qid, label_lower_bound, label_upper_bound, feature_names, feature_types, feature_weights)
  151.     929 from .data import dispatch_meta_backend
  152.     931 if label is not None:
  153. --> 932     self.set_label(label)
  154.     933 if weight is not None:
  155.     934     self.set_weight(weight)

  156. File /opt/conda/lib/python3.10/site-packages/xgboost/core.py:1070, in DMatrix.set_label(self, label)
  157.    1061 """Set label of dmatrix
  158.    1062
  159.    1063 Parameters
  160.    (...)
  161.    1066     The label information to be set into DMatrix
  162.    1067 """
  163.    1068 from .data import dispatch_meta_backend
  164. -> 1070 dispatch_meta_backend(self, label, "label", "float")

  165. File /opt/conda/lib/python3.10/site-packages/xgboost/data.py:1225, in dispatch_meta_backend(matrix, data, name, dtype)
  166.    1223     return
  167.    1224 if _is_pandas_series(data):
  168. -> 1225     _meta_from_pandas_series(data, name, dtype, handle)
  169.    1226     return
  170.    1227 if _is_dlpack(data):

  171. File /opt/conda/lib/python3.10/site-packages/xgboost/data.py:545, in _meta_from_pandas_series(data, name, dtype, handle)
  172.     543     data = data.to_dense()  # type: ignore
  173.     544 assert len(data.shape) == 1 or data.shape[1] == 0 or data.shape[1] == 1
  174. --> 545 _meta_from_numpy(data, name, dtype, handle)

  175. File /opt/conda/lib/python3.10/site-packages/xgboost/data.py:1159, in _meta_from_numpy(data, field, dtype, handle)
  176.    1157     raise ValueError("Masked array is not supported.")
  177.    1158 interface_str = _array_interface(data)
  178. -> 1159 _check_call(_LIB.XGDMatrixSetInfoFromInterface(handle, c_str(field), interface_str))

  179. File /opt/conda/lib/python3.10/site-packages/xgboost/core.py:282, in _check_call(ret)
  180.     271 """Check the return value of C API call
  181.     272
  182.     273 This function will raise exception when error occurs.
  183.    (...)
  184.     279     return value from API calls
  185.     280 """
  186.     281 if ret != 0:
  187. --> 282     raise XGBoostError(py_str(_LIB.XGBGetLastError()))

  188. XGBoostError: [13:24:13] /workspace/src/data/data.cc:507: Check failed: valid: Label contains NaN, infinity or a value too large.
  189. Stack trace:
  190.   [bt] (0) /opt/conda/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(+0x3588ca) [0x7a256777a8ca]
  191.   [bt] (1) /opt/conda/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(+0x38a21d) [0x7a25677ac21d]
  192.   [bt] (2) /opt/conda/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(+0x38ab51) [0x7a25677acb51]
  193.   [bt] (3) /opt/conda/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(XGDMatrixSetInfoFromInterface+0xb0) [0x7a25675803a0]
  194.   [bt] (4) /opt/conda/lib/python3.10/lib-dynload/../../libffi.so.8(+0x6a4a) [0x7a260b0e4a4a]
  195.   [bt] (5) /opt/conda/lib/python3.10/lib-dynload/../../libffi.so.8(+0x5fea) [0x7a260b0e3fea]
  196.   [bt] (6) /opt/conda/lib/python3.10/lib-dynload/_ctypes.cpython-310-x86_64-linux-gnu.so(+0x12461) [0x7a260b0fc461]
  197.   [bt] (7) /opt/conda/lib/python3.10/lib-dynload/_ctypes.cpython-310-x86_64-linux-gnu.so(+0x86eb) [0x7a260b0f26eb]
  198.   [bt] (8) /opt/conda/bin/python3.10(_PyObject_MakeTpCall+0x26b) [0x5cd1861cfa6b]
复制代码
小甲鱼最新课程 -> https://ilovefishc.com
回复

使用道具 举报

发表于 2024-10-26 21:32:57 | 显示全部楼层
根据错误信息,`XGBoostError: Label contains NaN, infinity or a value too large.` 你可以推断出 `y_train` 包含无效值(`NaN`、无穷大或过大的值)。

解决方法如下:

1. **检查和处理NaN值**:
   确保 `y_train` 中没有缺失值。
  
  1. import numpy as np
  2.    y_train = np.where(np.isnan(y_train), 0, y_train)  # 用0替换NaN值,可以根据实际情况选择合适的替换值
复制代码


2. **检查和处理无穷大值**:
   确保 `y_train` 中没有无穷大的值。
  
  1. y_train = np.where(np.isinf(y_train), 0, y_train)  # 用0替换无穷大值,可以根据实际情况选择合适的替换值
复制代码


3. **检查和处理异常大的值**:
   确保 `y_train` 中没有异常大的值。
  
  1. y_train = np.where(y_train > 1e10, 1e10, y_train)  # 用1e10限制最大值,可以根据实际阈值选择
复制代码


总之,执行以下综合处理以确保数据有效:
  1. import numpy as np

  2. # 处理 NaN 值
  3. y_train = np.where(np.isnan(y_train), 0, y_train)

  4. # 处理无穷大值
  5. y_train = np.where(np.isinf(y_train), 0, y_train)

  6. # 处理异常大的值
  7. y_train = np.where(y_train > 1e10, 1e10, y_train)

  8. # 然后再进行模型训练
  9. xgb_reg.fit(X_train, y_train)
复制代码


确保 `X_train` 和 `y_train` 的长度一致,并确认 `X_train` 中也没有无效数据。如果问题仍然存在,详细检查数据处理流程,确保所有步骤中数据都没有被意外修改。

以上回复来自 FishC AI 小助理,如未能正确解答您的问题,请继续追问。
小甲鱼最新课程 -> https://ilovefishc.com
回复 支持 反对

使用道具 举报

您需要登录后才可以回帖 登录 | 立即注册

本版积分规则

小黑屋|手机版|Archiver|鱼C工作室 ( 粤ICP备18085999号-1 | 粤公网安备 44051102000585号)

GMT+8, 2025-4-19 17:22

Powered by Discuz! X3.4

© 2001-2023 Discuz! Team.

快速回复 返回顶部 返回列表