From 8d39fe1369a83344e53e0c9d0d2dd6f94647503f Mon Sep 17 00:00:00 2001 From: James Lamb Date: Mon, 6 Jun 2022 19:53:28 -0500 Subject: [PATCH 1/3] [python-package] add type hints on cv() --- python-package/lightgbm/compat.py | 5 +++-- python-package/lightgbm/engine.py | 37 +++++++++++++++++++++++-------- 2 files changed, 31 insertions(+), 11 deletions(-) diff --git a/python-package/lightgbm/compat.py b/python-package/lightgbm/compat.py index 06a7cf89bd4f..7338f8b1e3eb 100644 --- a/python-package/lightgbm/compat.py +++ b/python-package/lightgbm/compat.py @@ -75,9 +75,9 @@ def __init__(self, *args, **kwargs): from sklearn.utils.validation import assert_all_finite, check_array, check_X_y try: from sklearn.exceptions import NotFittedError - from sklearn.model_selection import GroupKFold, StratifiedKFold + from sklearn.model_selection import BaseCrossValidator, GroupKFold, StratifiedKFold except ImportError: - from sklearn.cross_validation import GroupKFold, StratifiedKFold + from sklearn.cross_validation import BaseCrossValidator, GroupKFold, StratifiedKFold from sklearn.utils.validation import NotFittedError try: from sklearn.utils.validation import _check_sample_weight @@ -90,6 +90,7 @@ def _check_sample_weight(sample_weight, X, dtype=None): return sample_weight SKLEARN_INSTALLED = True + _LGBMBaseCrossValidator = BaseCrossValidator _LGBMModelBase = BaseEstimator _LGBMRegressorBase = RegressorMixin _LGBMClassifierBase = ClassifierMixin diff --git a/python-package/lightgbm/engine.py b/python-package/lightgbm/engine.py index 71a9a115d342..d1481a502f1f 100644 --- a/python-package/lightgbm/engine.py +++ b/python-package/lightgbm/engine.py @@ -4,19 +4,24 @@ import copy from operator import attrgetter from pathlib import Path -from typing import Any, Callable, Dict, List, Optional, Tuple, Union +from typing import Any, Callable, Dict, Generator, Iterable, List, Optional, Tuple, Union import numpy as np from . import callback from .basic import Booster, Dataset, LightGBMError, _choose_param_value, _ConfigAliases, _InnerPredictor, _log_warning -from .compat import SKLEARN_INSTALLED, _LGBMGroupKFold, _LGBMStratifiedKFold +from .compat import SKLEARN_INSTALLED, _LGBMBaseCrossValidator, _LGBMGroupKFold, _LGBMStratifiedKFold _LGBM_CustomMetricFunction = Callable[ [np.ndarray, Dataset], Tuple[str, float, bool] ] +_LGBM_PreprocCallable = Callable[ + [Dataset, Dataset, Dict[str, Any]], + Tuple[Dataset, Dataset, Dict[str, Any]] +] + def train( params: Dict[str, Any], @@ -285,7 +290,7 @@ def __init__(self): self.boosters = [] self.best_iteration = -1 - def _append(self, booster): + def _append(self, booster: Booster) -> None: """Add a booster to CVBooster.""" self.boosters.append(booster) @@ -373,12 +378,25 @@ def _agg_cv_result(raw_results): return [('cv_agg', k, np.mean(v), metric_type[k], np.std(v)) for k, v in cvmap.items()] -def cv(params, train_set, num_boost_round=100, - folds=None, nfold=5, stratified=True, shuffle=True, - metrics=None, feval=None, init_model=None, - feature_name='auto', categorical_feature='auto', - fpreproc=None, seed=0, callbacks=None, eval_train_metric=False, - return_cvbooster=False): +def cv( + params: Dict[str, Any], + train_set: Dataset, + num_boost_round: int = 100, + folds: Optional[Union[Iterable[Tuple[np.ndarray, np.ndarray]], _LGBMBaseCrossValidator]] = None, + nfold: int = 5, + stratified: bool = True, + shuffle: bool = True, + metrics: Optional[Union[str, List[str]]] = None, + feval: Optional[Union[_LGBM_CustomMetricFunction, List[_LGBM_CustomMetricFunction]]] = None, + init_model: Optional[Union[str, Path, Booster]] = None, + feature_name: Union[str, List[str]] = 'auto', + categorical_feature: Union[str, List[str]] = 'auto', + fpreproc: Optional[_LGBM_PreprocCallable] = None, + seed: int = 0, + callbacks: Optional[List[Callable]] = None, + eval_train_metric: bool = False, + return_cvbooster: bool = False +) -> Dict[str, Any]: """Perform the cross-validation with given parameters. Parameters @@ -486,6 +504,7 @@ def cv(params, train_set, num_boost_round=100, ...}. If ``return_cvbooster=True``, also returns trained boosters via ``cvbooster`` key. """ + if not isinstance(train_set, Dataset): raise TypeError("Training only accepts Dataset object") params = copy.deepcopy(params) From 94fb85397ae3c39777e758d352489e30f3e91b58 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Mon, 6 Jun 2022 19:56:26 -0500 Subject: [PATCH 2/3] remove inadvertent changes --- python-package/lightgbm/engine.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/python-package/lightgbm/engine.py b/python-package/lightgbm/engine.py index d1481a502f1f..2cb837a4759d 100644 --- a/python-package/lightgbm/engine.py +++ b/python-package/lightgbm/engine.py @@ -4,7 +4,7 @@ import copy from operator import attrgetter from pathlib import Path -from typing import Any, Callable, Dict, Generator, Iterable, List, Optional, Tuple, Union +from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, Union import numpy as np @@ -290,7 +290,7 @@ def __init__(self): self.boosters = [] self.best_iteration = -1 - def _append(self, booster: Booster) -> None: + def _append(self, booster): """Add a booster to CVBooster.""" self.boosters.append(booster) @@ -504,7 +504,6 @@ def cv( ...}. If ``return_cvbooster=True``, also returns trained boosters via ``cvbooster`` key. """ - if not isinstance(train_set, Dataset): raise TypeError("Training only accepts Dataset object") params = copy.deepcopy(params) From 5c582009b6795b6c142bd4be940a3fc277305f59 Mon Sep 17 00:00:00 2001 From: James Lamb Date: Mon, 13 Jun 2022 19:15:30 -0500 Subject: [PATCH 3/3] Apply suggestions from code review Co-authored-by: Nikita Titov --- python-package/lightgbm/engine.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/python-package/lightgbm/engine.py b/python-package/lightgbm/engine.py index 2cb837a4759d..2edf18435c17 100644 --- a/python-package/lightgbm/engine.py +++ b/python-package/lightgbm/engine.py @@ -17,7 +17,7 @@ Tuple[str, float, bool] ] -_LGBM_PreprocCallable = Callable[ +_LGBM_PreprocFunction = Callable[ [Dataset, Dataset, Dict[str, Any]], Tuple[Dataset, Dataset, Dict[str, Any]] ] @@ -390,8 +390,8 @@ def cv( feval: Optional[Union[_LGBM_CustomMetricFunction, List[_LGBM_CustomMetricFunction]]] = None, init_model: Optional[Union[str, Path, Booster]] = None, feature_name: Union[str, List[str]] = 'auto', - categorical_feature: Union[str, List[str]] = 'auto', - fpreproc: Optional[_LGBM_PreprocCallable] = None, + categorical_feature: Union[str, List[str], List[int]] = 'auto', + fpreproc: Optional[_LGBM_PreprocFunction] = None, seed: int = 0, callbacks: Optional[List[Callable]] = None, eval_train_metric: bool = False,