From ea042fdd6ecf66c527a435689b29b298e627fe8d Mon Sep 17 00:00:00 2001 From: Nikita Titov Date: Mon, 13 Dec 2021 00:07:25 +0300 Subject: [PATCH 1/5] Update test_sklearn.py --- tests/python_package_test/test_sklearn.py | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/tests/python_package_test/test_sklearn.py b/tests/python_package_test/test_sklearn.py index acc3b9c512f6..1f998b13621b 100644 --- a/tests/python_package_test/test_sklearn.py +++ b/tests/python_package_test/test_sklearn.py @@ -294,20 +294,23 @@ def test_stacking_regressor(): def test_grid_search(): X, y = load_iris(return_X_y=True) y = y.astype(str) # utilize label encoder at it's max power - X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, - random_state=42) - X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.1, - random_state=42) + X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42) + X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.1, random_state=42) params = dict(subsample=0.8, subsample_freq=1) grid_params = dict(boosting_type=['rf', 'gbdt'], n_estimators=[4, 6], reg_alpha=[0.01, 0.005]) - fit_params = dict(eval_set=[(X_val, y_val)], - eval_metric=constant_metric, - callbacks=[lgb.early_stopping(2)]) - grid = GridSearchCV(estimator=lgb.LGBMClassifier(**params), param_grid=grid_params, - cv=2) + evals_result = {} + fit_params = dict( + eval_set=[(X_val, y_val)], + eval_metric=constant_metric, + callbacks=[ + lgb.early_stopping(2), + lgb.record_evaluation(evals_result) + ] + ) + grid = GridSearchCV(estimator=lgb.LGBMClassifier(**params), param_grid=grid_params, cv=2) grid.fit(X_train, y_train, **fit_params) score = grid.score(X_test, y_test) # utilizes GridSearchCV default refit=True assert grid.best_params_['boosting_type'] in ['rf', 'gbdt'] @@ -319,6 +322,7 @@ def test_grid_search(): assert grid.best_estimator_.best_score_['valid_0']['error'] == 0 assert score >= 0.2 assert score <= 1. + assert evals_result == grid.best_estimator_.evals_result_ def test_random_search(): From 085d18b6b205a9db76df0d7758ed11b4da118a33 Mon Sep 17 00:00:00 2001 From: Nikita Titov Date: Mon, 13 Dec 2021 00:08:21 +0300 Subject: [PATCH 2/5] Update python_package.yml --- .github/workflows/python_package.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/python_package.yml b/.github/workflows/python_package.yml index f97aaa8f2cbc..2cba95bcfb55 100644 --- a/.github/workflows/python_package.yml +++ b/.github/workflows/python_package.yml @@ -3,7 +3,7 @@ name: Python-package on: push: branches: - - master + - record_callback pull_request: branches: - master From 08c8c4eafcbfa8ba5c9e566e8583ad4f2731e4c0 Mon Sep 17 00:00:00 2001 From: Nikita Titov Date: Mon, 13 Dec 2021 00:31:29 +0300 Subject: [PATCH 3/5] Update python_package.yml --- .github/workflows/python_package.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/python_package.yml b/.github/workflows/python_package.yml index 2cba95bcfb55..f97aaa8f2cbc 100644 --- a/.github/workflows/python_package.yml +++ b/.github/workflows/python_package.yml @@ -3,7 +3,7 @@ name: Python-package on: push: branches: - - record_callback + - master pull_request: branches: - master From 36208caae550b8885d043ad996d2e1daf6424d39 Mon Sep 17 00:00:00 2001 From: Nikita Titov Date: Mon, 13 Dec 2021 00:31:37 +0300 Subject: [PATCH 4/5] Update callback.py --- python-package/lightgbm/callback.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/python-package/lightgbm/callback.py b/python-package/lightgbm/callback.py index 092dfb463f8e..14462af151e1 100644 --- a/python-package/lightgbm/callback.py +++ b/python-package/lightgbm/callback.py @@ -128,18 +128,24 @@ def record_evaluation(eval_result: Dict[str, Dict[str, List[Any]]]) -> Callable: """ if not isinstance(eval_result, dict): raise TypeError('eval_result should be a dictionary') - eval_result.clear() + inited = False def _init(env: CallbackEnv) -> None: + nonlocal inited + eval_result.clear() for data_name, eval_name, _, _ in env.evaluation_result_list: eval_result.setdefault(data_name, collections.OrderedDict()) eval_result[data_name].setdefault(eval_name, []) + inited = True def _callback(env: CallbackEnv) -> None: - if not eval_result: + nonlocal inited + if not inited: _init(env) for data_name, eval_name, result, _ in env.evaluation_result_list: eval_result[data_name][eval_name].append(result) + if env.iteration == env.end_iteration - 1: + inited = False _callback.order = 20 # type: ignore return _callback From 1ea4585105fe7f8d8014abe40ccb4386dff8098d Mon Sep 17 00:00:00 2001 From: Nikita Titov Date: Mon, 13 Dec 2021 01:41:30 +0300 Subject: [PATCH 5/5] Update callback.py --- python-package/lightgbm/callback.py | 17 ++--------------- 1 file changed, 2 insertions(+), 15 deletions(-) diff --git a/python-package/lightgbm/callback.py b/python-package/lightgbm/callback.py index 14462af151e1..45e21f298480 100644 --- a/python-package/lightgbm/callback.py +++ b/python-package/lightgbm/callback.py @@ -128,24 +128,18 @@ def record_evaluation(eval_result: Dict[str, Dict[str, List[Any]]]) -> Callable: """ if not isinstance(eval_result, dict): raise TypeError('eval_result should be a dictionary') - inited = False def _init(env: CallbackEnv) -> None: - nonlocal inited eval_result.clear() for data_name, eval_name, _, _ in env.evaluation_result_list: eval_result.setdefault(data_name, collections.OrderedDict()) eval_result[data_name].setdefault(eval_name, []) - inited = True def _callback(env: CallbackEnv) -> None: - nonlocal inited - if not inited: + if env.iteration == env.begin_iteration: _init(env) for data_name, eval_name, result, _ in env.evaluation_result_list: eval_result[data_name][eval_name].append(result) - if env.iteration == env.end_iteration - 1: - inited = False _callback.order = 20 # type: ignore return _callback @@ -227,7 +221,6 @@ def early_stopping(stopping_rounds: int, first_metric_only: bool = False, verbos best_score_list: list = [] cmp_op = [] enabled = True - inited = False first_metric = '' def _init(env: CallbackEnv) -> None: @@ -236,7 +229,6 @@ def _init(env: CallbackEnv) -> None: nonlocal best_score_list nonlocal cmp_op nonlocal enabled - nonlocal inited nonlocal first_metric enabled = not any(env.params.get(boost_alias, "") == 'dart' for boost_alias in _ConfigAliases.get("boosting")) @@ -255,7 +247,6 @@ def _init(env: CallbackEnv) -> None: best_iter = [] best_score_list = [] cmp_op = [] - inited = True first_metric = '' n_metrics = len(set(m[1] for m in env.evaluation_result_list)) @@ -299,7 +290,6 @@ def _init(env: CallbackEnv) -> None: def _final_iteration_check(env: CallbackEnv, eval_name_splitted: List[str], i: int) -> None: nonlocal best_iter nonlocal best_score_list - nonlocal inited if env.iteration == env.end_iteration - 1: if verbose: best_score_str = '\t'.join([_format_eval_result(x) for x in best_score_list[i]]) @@ -307,7 +297,6 @@ def _final_iteration_check(env: CallbackEnv, eval_name_splitted: List[str], i: i f'Best iteration is:\n[{best_iter[i] + 1}]\t{best_score_str}') if first_metric_only: _log_info(f"Evaluated only: {eval_name_splitted[-1]}") - inited = False raise EarlyStopException(best_iter[i], best_score_list[i]) def _callback(env: CallbackEnv) -> None: @@ -316,9 +305,8 @@ def _callback(env: CallbackEnv) -> None: nonlocal best_score_list nonlocal cmp_op nonlocal enabled - nonlocal inited nonlocal first_metric - if not inited: + if env.iteration == env.begin_iteration: _init(env) if not enabled: return @@ -342,7 +330,6 @@ def _callback(env: CallbackEnv) -> None: _log_info(f"Early stopping, best iteration is:\n[{best_iter[i] + 1}]\t{eval_result_str}") if first_metric_only: _log_info(f"Evaluated only: {eval_name_splitted[-1]}") - inited = False raise EarlyStopException(best_iter[i], best_score_list[i]) _final_iteration_check(env, eval_name_splitted, i) _callback.order = 30 # type: ignore