xgboost模型训练时报错
#xgboost拟合训练数据,进行模型训练import xgboost
xgb_reg = xgboost.XGBRegressor()
xgb_reg.fit(X_train, y_train)
#模型评估,对测试集进行预测
y_preds = xgb_reg.predict(X_test)
output = pd.DataFrame({'Id':test_data.Id,'SalePrice':y_preds})
output
报错---------------------------------------------------------------------------
XGBoostError Traceback (most recent call last)
Cell In, line 4
2 import xgboost
3 xgb_reg = xgboost.XGBRegressor()
----> 4 xgb_reg.fit(X_train, y_train)
6 #模型评估,对测试集进行预测
7 y_preds = xgb_reg.predict(X_test)
File /opt/conda/lib/python3.10/site-packages/xgboost/core.py:730, in require_keyword_args.<locals>.throw_if.<locals>.inner_f(*args, **kwargs)
728 for k, arg in zip(sig.parameters, args):
729 kwargs = arg
--> 730 return func(**kwargs)
File /opt/conda/lib/python3.10/site-packages/xgboost/sklearn.py:1055, in XGBModel.fit(self, X, y, sample_weight, base_margin, eval_set, eval_metric, early_stopping_rounds, verbose, xgb_model, sample_weight_eval_set, base_margin_eval_set, feature_weights, callbacks)
1053 with config_context(verbosity=self.verbosity):
1054 evals_result: TrainingCallback.EvalsLog = {}
-> 1055 train_dmatrix, evals = _wrap_evaluation_matrices(
1056 missing=self.missing,
1057 X=X,
1058 y=y,
1059 group=None,
1060 qid=None,
1061 sample_weight=sample_weight,
1062 base_margin=base_margin,
1063 feature_weights=feature_weights,
1064 eval_set=eval_set,
1065 sample_weight_eval_set=sample_weight_eval_set,
1066 base_margin_eval_set=base_margin_eval_set,
1067 eval_group=None,
1068 eval_qid=None,
1069 create_dmatrix=self._create_dmatrix,
1070 enable_categorical=self.enable_categorical,
1071 feature_types=self.feature_types,
1072 )
1073 params = self.get_xgb_params()
1075 if callable(self.objective):
File /opt/conda/lib/python3.10/site-packages/xgboost/sklearn.py:521, in _wrap_evaluation_matrices(missing, X, y, group, qid, sample_weight, base_margin, feature_weights, eval_set, sample_weight_eval_set, base_margin_eval_set, eval_group, eval_qid, create_dmatrix, enable_categorical, feature_types)
501 def _wrap_evaluation_matrices(
502 missing: float,
503 X: Any,
(...)
517 feature_types: Optional,
518 ) -> Tuple]]:
519 """Convert array_like evaluation matrices into DMatrix.Perform validation on the
520 way."""
--> 521 train_dmatrix = create_dmatrix(
522 data=X,
523 label=y,
524 group=group,
525 qid=qid,
526 weight=sample_weight,
527 base_margin=base_margin,
528 feature_weights=feature_weights,
529 missing=missing,
530 enable_categorical=enable_categorical,
531 feature_types=feature_types,
532 ref=None,
533 )
535 n_validation = 0 if eval_set is None else len(eval_set)
537 def validate_or_none(meta: Optional, name: str) -> Sequence:
File /opt/conda/lib/python3.10/site-packages/xgboost/sklearn.py:958, in XGBModel._create_dmatrix(self, ref, **kwargs)
956 if _can_use_qdm(self.tree_method) and self.booster != "gblinear":
957 try:
--> 958 return QuantileDMatrix(
959 **kwargs, ref=ref, nthread=self.n_jobs, max_bin=self.max_bin
960 )
961 except TypeError:# `QuantileDMatrix` supports lesser types than DMatrix
962 pass
File /opt/conda/lib/python3.10/site-packages/xgboost/core.py:730, in require_keyword_args.<locals>.throw_if.<locals>.inner_f(*args, **kwargs)
728 for k, arg in zip(sig.parameters, args):
729 kwargs = arg
--> 730 return func(**kwargs)
File /opt/conda/lib/python3.10/site-packages/xgboost/core.py:1529, in QuantileDMatrix.__init__(self, data, label, weight, base_margin, missing, silent, feature_names, feature_types, nthread, max_bin, ref, group, qid, label_lower_bound, label_upper_bound, feature_weights, enable_categorical, data_split_mode)
1509 if any(
1510 info is not None
1511 for info in (
(...)
1522 )
1523 ):
1524 raise ValueError(
1525 "If data iterator is used as input, data like label should be "
1526 "specified as batch argument."
1527 )
-> 1529 self._init(
1530 data,
1531 ref=ref,
1532 label=label,
1533 weight=weight,
1534 base_margin=base_margin,
1535 group=group,
1536 qid=qid,
1537 label_lower_bound=label_lower_bound,
1538 label_upper_bound=label_upper_bound,
1539 feature_weights=feature_weights,
1540 feature_names=feature_names,
1541 feature_types=feature_types,
1542 enable_categorical=enable_categorical,
1543 )
File /opt/conda/lib/python3.10/site-packages/xgboost/core.py:1588, in QuantileDMatrix._init(self, data, ref, enable_categorical, **meta)
1576 config = make_jcargs(
1577 nthread=self.nthread, missing=self.missing, max_bin=self.max_bin
1578 )
1579 ret = _LIB.XGQuantileDMatrixCreateFromCallback(
1580 None,
1581 it.proxy.handle,
(...)
1586 ctypes.byref(handle),
1587 )
-> 1588 it.reraise()
1589 # delay check_call to throw intermediate exception first
1590 _check_call(ret)
File /opt/conda/lib/python3.10/site-packages/xgboost/core.py:576, in DataIter.reraise(self)
574 exc = self._exception
575 self._exception = None
--> 576 raise exc
File /opt/conda/lib/python3.10/site-packages/xgboost/core.py:557, in DataIter._handle_exception(self, fn, dft_ret)
554 return dft_ret
556 try:
--> 557 return fn()
558 except Exception as e:# pylint: disable=broad-except
559 # Defer the exception in order to return 0 and stop the iteration.
560 # Exception inside a ctype callback function has no effect except
561 # for printing to stderr (doesn't stop the execution).
562 tb = sys.exc_info()
File /opt/conda/lib/python3.10/site-packages/xgboost/core.py:641, in DataIter._next_wrapper.<locals>.<lambda>()
638 self._data_ref = ref
640 # pylint: disable=not-callable
--> 641 return self._handle_exception(lambda: self.next(input_data), 0)
File /opt/conda/lib/python3.10/site-packages/xgboost/data.py:1280, in SingleBatchInternalIter.next(self, input_data)
1278 return 0
1279 self.it += 1
-> 1280 input_data(**self.kwargs)
1281 return 1
File /opt/conda/lib/python3.10/site-packages/xgboost/core.py:730, in require_keyword_args.<locals>.throw_if.<locals>.inner_f(*args, **kwargs)
728 for k, arg in zip(sig.parameters, args):
729 kwargs = arg
--> 730 return func(**kwargs)
File /opt/conda/lib/python3.10/site-packages/xgboost/core.py:633, in DataIter._next_wrapper.<locals>.input_data(data, feature_names, feature_types, **kwargs)
631 self._temporary_data = (new, cat_codes, feature_names, feature_types)
632 dispatch_proxy_set_data(self.proxy, new, cat_codes, self._allow_host)
--> 633 self.proxy.set_info(
634 feature_names=feature_names,
635 feature_types=feature_types,
636 **kwargs,
637 )
638 self._data_ref = ref
File /opt/conda/lib/python3.10/site-packages/xgboost/core.py:730, in require_keyword_args.<locals>.throw_if.<locals>.inner_f(*args, **kwargs)
728 for k, arg in zip(sig.parameters, args):
729 kwargs = arg
--> 730 return func(**kwargs)
File /opt/conda/lib/python3.10/site-packages/xgboost/core.py:932, in DMatrix.set_info(self, label, weight, base_margin, group, qid, label_lower_bound, label_upper_bound, feature_names, feature_types, feature_weights)
929 from .data import dispatch_meta_backend
931 if label is not None:
--> 932 self.set_label(label)
933 if weight is not None:
934 self.set_weight(weight)
File /opt/conda/lib/python3.10/site-packages/xgboost/core.py:1070, in DMatrix.set_label(self, label)
1061 """Set label of dmatrix
1062
1063 Parameters
(...)
1066 The label information to be set into DMatrix
1067 """
1068 from .data import dispatch_meta_backend
-> 1070 dispatch_meta_backend(self, label, "label", "float")
File /opt/conda/lib/python3.10/site-packages/xgboost/data.py:1225, in dispatch_meta_backend(matrix, data, name, dtype)
1223 return
1224 if _is_pandas_series(data):
-> 1225 _meta_from_pandas_series(data, name, dtype, handle)
1226 return
1227 if _is_dlpack(data):
File /opt/conda/lib/python3.10/site-packages/xgboost/data.py:545, in _meta_from_pandas_series(data, name, dtype, handle)
543 data = data.to_dense()# type: ignore
544 assert len(data.shape) == 1 or data.shape == 0 or data.shape == 1
--> 545 _meta_from_numpy(data, name, dtype, handle)
File /opt/conda/lib/python3.10/site-packages/xgboost/data.py:1159, in _meta_from_numpy(data, field, dtype, handle)
1157 raise ValueError("Masked array is not supported.")
1158 interface_str = _array_interface(data)
-> 1159 _check_call(_LIB.XGDMatrixSetInfoFromInterface(handle, c_str(field), interface_str))
File /opt/conda/lib/python3.10/site-packages/xgboost/core.py:282, in _check_call(ret)
271 """Check the return value of C API call
272
273 This function will raise exception when error occurs.
(...)
279 return value from API calls
280 """
281 if ret != 0:
--> 282 raise XGBoostError(py_str(_LIB.XGBGetLastError()))
XGBoostError: /workspace/src/data/data.cc:507: Check failed: valid: Label contains NaN, infinity or a value too large.
Stack trace:
(0) /opt/conda/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(+0x3588ca)
(1) /opt/conda/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(+0x38a21d)
(2) /opt/conda/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(+0x38ab51)
(3) /opt/conda/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(XGDMatrixSetInfoFromInterface+0xb0)
(4) /opt/conda/lib/python3.10/lib-dynload/../../libffi.so.8(+0x6a4a)
(5) /opt/conda/lib/python3.10/lib-dynload/../../libffi.so.8(+0x5fea)
(6) /opt/conda/lib/python3.10/lib-dynload/_ctypes.cpython-310-x86_64-linux-gnu.so(+0x12461)
(7) /opt/conda/lib/python3.10/lib-dynload/_ctypes.cpython-310-x86_64-linux-gnu.so(+0x86eb)
(8) /opt/conda/bin/python3.10(_PyObject_MakeTpCall+0x26b)
根据错误信息,`XGBoostError: Label contains NaN, infinity or a value too large.` 你可以推断出 `y_train` 包含无效值(`NaN`、无穷大或过大的值)。
解决方法如下:
1. **检查和处理NaN值**:
确保 `y_train` 中没有缺失值。
import numpy as np
y_train = np.where(np.isnan(y_train), 0, y_train)# 用0替换NaN值,可以根据实际情况选择合适的替换值
2. **检查和处理无穷大值**:
确保 `y_train` 中没有无穷大的值。
y_train = np.where(np.isinf(y_train), 0, y_train)# 用0替换无穷大值,可以根据实际情况选择合适的替换值
3. **检查和处理异常大的值**:
确保 `y_train` 中没有异常大的值。
y_train = np.where(y_train > 1e10, 1e10, y_train)# 用1e10限制最大值,可以根据实际阈值选择
总之,执行以下综合处理以确保数据有效:
import numpy as np
# 处理 NaN 值
y_train = np.where(np.isnan(y_train), 0, y_train)
# 处理无穷大值
y_train = np.where(np.isinf(y_train), 0, y_train)
# 处理异常大的值
y_train = np.where(y_train > 1e10, 1e10, y_train)
# 然后再进行模型训练
xgb_reg.fit(X_train, y_train)
确保 `X_train` 和 `y_train` 的长度一致,并确认 `X_train` 中也没有无效数据。如果问题仍然存在,详细检查数据处理流程,确保所有步骤中数据都没有被意外修改。
以上回复来自 FishC AI 小助理,如未能正确解答您的问题,请继续追问。
页:
[1]