From 0854ea7ba1c04a60f3e88ee6d1247f62f31670ce Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Tue, 12 Sep 2023 03:24:18 +0000 Subject: [PATCH 1/3] fix(deps): update dependency xgboost to v2 --- poetry.lock | 11 +++-------- pyproject.toml | 2 +- 2 files changed, 4 insertions(+), 9 deletions(-) diff --git a/poetry.lock b/poetry.lock index 6f7f6ec..b44b2c8 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1827,17 +1827,12 @@ test = ["pytest (>=6.0.0)"] [[package]] name = "xgboost" -version = "1.7.6" +version = "2.0.0" description = "XGBoost Python Package" optional = false python-versions = ">=3.8" files = [ - {file = "xgboost-1.7.6-py3-none-macosx_10_15_x86_64.macosx_11_0_x86_64.macosx_12_0_x86_64.whl", hash = "sha256:4c34675b4d2678c624ddde5d45361e7e16046923e362e4e609b88353e6b87124"}, - {file = "xgboost-1.7.6-py3-none-macosx_12_0_arm64.whl", hash = "sha256:59b4b366d2cafc7f645e87d897983a5b59be02876194b1d213bd8d8b811d8ce8"}, - {file = "xgboost-1.7.6-py3-none-manylinux2014_aarch64.whl", hash = "sha256:281c3c6f4fbed2d36bf95cd02a641afa95e72e9abde70064056da5e76233e8df"}, - {file = "xgboost-1.7.6-py3-none-manylinux2014_x86_64.whl", hash = "sha256:b1d5db49b199152d62bd9217c98760207d3de86d2b9d243260c573ffe638f80a"}, - {file = "xgboost-1.7.6-py3-none-win_amd64.whl", hash = "sha256:127cf1f5e2ec25cd41429394c6719b87af1456ce583e89f0bffd35d02ad18bcb"}, - {file = "xgboost-1.7.6.tar.gz", hash = "sha256:1c527554a400445e0c38186039ba1a00425dcdb4e40b37eed0e74cb39a159c47"}, + {file = "xgboost-2.0.0.tar.gz", hash = "sha256:a89a4504c486043dbfdad41e5f426e2a0b4e5494a5f3ca99cf7ad85a665c79e7"}, ] [package.dependencies] @@ -1870,4 +1865,4 @@ testing = ["big-O", "flake8 (<5)", "jaraco.functools", "jaraco.itertools", "more [metadata] lock-version = "2.0" python-versions = "^3.8" -content-hash = "05b1a19a0029ea17c9206a2be1948760c0c70875c0414eb0f803f4020c7ff856" +content-hash = "20778a31bd269015b0c32cdd68e7338f19f8beffed5209635c69d613896bad73" diff --git a/pyproject.toml b/pyproject.toml index 9748258..2a2c9fe 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,7 +24,7 @@ packages = [ [tool.poetry.dependencies] python = "^3.8" -xgboost = "^1.7.5" +xgboost = "^2.0.0" lightgbm = "^3.3.5" catboost = "^1.2" pyhumps = "^3.8.0" From 1559141aea27fdc58f47ca41d9b24e0d657477d8 Mon Sep 17 00:00:00 2001 From: 34j <55338215+34j@users.noreply.github.com> Date: Wed, 11 Oct 2023 21:01:41 +0900 Subject: [PATCH 2/3] fix(base): support `sample_weight` parameter in xgboost and use weight in `DMatrix` --- src/boost_loss/base.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/src/boost_loss/base.py b/src/boost_loss/base.py index 489f236..fe9bcae 100644 --- a/src/boost_loss/base.py +++ b/src/boost_loss/base.py @@ -30,7 +30,10 @@ def _dataset_to_ndarray( return y_, weight if isinstance(y, xgb.DMatrix): y_ = y.get_label() - return y_, np.ones_like(y_) + weight = y.get_weight() + if weight is None or weight.size == 0: + weight = np.ones_like(y_) + return y_, weight return y, np.ones_like(y) @@ -340,12 +343,16 @@ def eval_metric_lgb( self, y_true: NDArray | lgb.Dataset | xgb.DMatrix, y_pred: NDArray | lgb.Dataset | xgb.DMatrix, + sample_weight: NDArray | lgb.Dataset | xgb.DMatrix | None = None + # not used, exists for eval_metric_xgb_sklearn ) -> tuple[str, float, bool]: """LightGBM-compatible interface""" if isinstance(y_pred, lgb.Dataset) or isinstance(y_pred, xgb.DMatrix): # NOTE: swap (it is so fucking that the order is inconsistent) y_true, y_pred = y_pred, y_true y_true, weight = _dataset_to_ndarray(y=y_true) + if sample_weight is not None: + weight = sample_weight y_pred, _ = _dataset_to_ndarray(y=y_pred) loss = self.loss(y_true=y_true, y_pred=y_pred) if isinstance(loss, float) and not np.allclose(weight, 1.0): @@ -372,9 +379,12 @@ def eval_metric_xgb_sklearn( self, y_true: NDArray | lgb.Dataset | xgb.DMatrix, y_pred: NDArray | lgb.Dataset | xgb.DMatrix, + sample_weight: NDArray | lgb.Dataset | xgb.DMatrix | None = None, ) -> float: """XGBoost-sklearn-api-compatible interface""" - result = self.eval_metric_lgb(y_true=y_true, y_pred=y_pred) + result = self.eval_metric_lgb( + y_true=y_true, y_pred=y_pred, sample_weight=sample_weight + ) return result[1] def __add__(self, other: LossBase) -> LossBase: From 5622d2c6ec68f631c601df8e9c2b699f16452124 Mon Sep 17 00:00:00 2001 From: 34j <55338215+34j@users.noreply.github.com> Date: Wed, 11 Oct 2023 21:02:28 +0900 Subject: [PATCH 3/3] test(test_base): specify base_score=0.5 to get consistent results --- tests/test_base.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/test_base.py b/tests/test_base.py index d9e56ee..8f47190 100644 --- a/tests/test_base.py +++ b/tests/test_base.py @@ -98,6 +98,7 @@ def xgboost_baseline(self): raise SkipTest(f"XGBoost does not support {self.loss_name} loss.") model = xgb.XGBRegressor( objective=self.loss_names["xgboost"][self.loss_name], + base_score=0.5, ) model.fit( self.X_train,