From 90f68289d2c057f5ba07e13c2e2ceb3275ba9b1d Mon Sep 17 00:00:00 2001 From: Nikita Titov Date: Thu, 24 Feb 2022 00:06:14 +0300 Subject: [PATCH 1/2] fixes for supporting 2d numpy arrays for predictions, grads and hess in multiclass custom objective --- python-package/lightgbm/basic.py | 31 ++++++++++-------------- python-package/lightgbm/engine.py | 37 +++++++++++++---------------- python-package/lightgbm/plotting.py | 4 ++-- python-package/lightgbm/sklearn.py | 26 +++++++------------- 4 files changed, 39 insertions(+), 59 deletions(-) diff --git a/python-package/lightgbm/basic.py b/python-package/lightgbm/basic.py index f1d20abd055e..ffbe160b6b15 100644 --- a/python-package/lightgbm/basic.py +++ b/python-package/lightgbm/basic.py @@ -2751,7 +2751,7 @@ def trees_to_dataframe(self): - ``missing_direction`` : str, split direction that missing values should go to. ``None`` for leaf nodes. - ``missing_type`` : str, describes what types of values are treated as missing. - ``value`` : float64, predicted value for this leaf node, multiplied by the learning rate. - - ``weight`` : float64 or int64, sum of hessian (second-order derivative of objective), summed over observations that fall in this node. + - ``weight`` : float64 or int64, sum of Hessian (second-order derivative of objective), summed over observations that fall in this node. - ``count`` : int64, number of records in the training data that fall into this node. Returns @@ -2960,7 +2960,7 @@ def update(self, train_set=None, fobj=None): The value of the second order derivative (Hessian) of the loss with respect to the elements of preds for each sample point. - For multi-class task, preds are a [n_samples, n_classes] numpy 2-D array, + For multi-class task, preds are numpy 2-D array of shape = [n_samples, n_classes], and grad and hess should be returned in the same format. Returns @@ -2999,9 +2999,6 @@ def update(self, train_set=None, fobj=None): if not self.__set_objective_to_none: self.reset_parameter({"objective": "none"}).__set_objective_to_none = True grad, hess = fobj(self.__inner_predict(0), self.train_set) - if self.num_model_per_iteration() > 1: - grad = grad.ravel(order='F') - hess = hess.ravel(order='F') return self.__boost(grad, hess) def __boost(self, grad, hess): @@ -3011,7 +3008,7 @@ def __boost(self, grad, hess): Score is returned before any transformation, e.g. it is raw margin instead of probability of positive class for binary task. - For multi-class task, preds are a [n_samples, n_classes] numpy 2-D array, + For multi-class task, score are numpy 2-D array of shape = [n_samples, n_classes], and grad and hess should be returned in the same format. Parameters @@ -3028,6 +3025,9 @@ def __boost(self, grad, hess): is_finished : bool Whether the boost was successfully finished. """ + if self.__num_class > 1: + grad = grad.ravel(order='F') + hess = hess.ravel(order='F') grad = list_to_1d_numpy(grad, name='gradient') hess = list_to_1d_numpy(hess, name='hessian') assert grad.flags.c_contiguous @@ -3035,12 +3035,11 @@ def __boost(self, grad, hess): if len(grad) != len(hess): raise ValueError(f"Lengths of gradient ({len(grad)}) and Hessian ({len(hess)}) don't match") num_train_data = self.train_set.num_data() - num_models = self.__num_class - if len(grad) != num_train_data * num_models: + if len(grad) != num_train_data * self.__num_class: raise ValueError( f"Lengths of gradient ({len(grad)}) and Hessian ({len(hess)}) " f"don't match training data length ({num_train_data}) * " - f"number of models per one iteration ({num_models})" + f"number of models per one iteration ({self.__num_class})" ) is_finished = ctypes.c_int(0) _safe_call(_LIB.LGBM_BoosterUpdateOneIterCustom( @@ -3148,8 +3147,9 @@ def eval(self, data, name, feval=None): Should accept two parameters: preds, eval_data, and return (eval_name, eval_result, is_higher_better) or list of such tuples. - preds : numpy 1-D array + preds : numpy 1-D array or numpy 2-D array (for multi-class task) The predicted values. + For multi-class task, preds are numpy 2-D array of shape = [n_samples, n_classes]. If ``fobj`` is specified, predicted values are returned before any transformation, e.g. they are raw margin instead of probability of positive class for binary task in this case. eval_data : Dataset @@ -3161,9 +3161,6 @@ def eval(self, data, name, feval=None): is_higher_better : bool Is eval result higher better, e.g. AUC is ``is_higher_better``. - For multi-class task, preds are a [n_samples, n_classes] numpy 2-D array, - and grad and hess should be returned in the same format. - Returns ------- result : list @@ -3198,6 +3195,7 @@ def eval_train(self, feval=None): preds : numpy 1-D array or numpy 2-D array (for multi-class task) The predicted values. + For multi-class task, preds are numpy 2-D array of shape = [n_samples, n_classes]. If ``fobj`` is specified, predicted values are returned before any transformation, e.g. they are raw margin instead of probability of positive class for binary task in this case. eval_data : Dataset @@ -3209,9 +3207,6 @@ def eval_train(self, feval=None): is_higher_better : bool Is eval result higher better, e.g. AUC is ``is_higher_better``. - For multi-class task, preds are a [n_samples, n_classes] numpy 2-D array, - and grad and hess should be returned in the same format. - Returns ------- result : list @@ -3231,6 +3226,7 @@ def eval_valid(self, feval=None): preds : numpy 1-D array or numpy 2-D array (for multi-class task) The predicted values. + For multi-class task, preds are numpy 2-D array of shape = [n_samples, n_classes]. If ``fobj`` is specified, predicted values are returned before any transformation, e.g. they are raw margin instead of probability of positive class for binary task in this case. eval_data : Dataset @@ -3242,9 +3238,6 @@ def eval_valid(self, feval=None): is_higher_better : bool Is eval result higher better, e.g. AUC is ``is_higher_better``. - For multi-class task, preds are a [n_samples, n_classes] numpy 2-D array, - and grad and hess should be returned in the same format. - Returns ------- result : list diff --git a/python-package/lightgbm/engine.py b/python-package/lightgbm/engine.py index 2b8a630b1915..9b4ff70c217c 100644 --- a/python-package/lightgbm/engine.py +++ b/python-package/lightgbm/engine.py @@ -9,13 +9,12 @@ import numpy as np from . import callback -from .basic import (Booster, Dataset, LightGBMError, _ArrayLike, _choose_param_value, _ConfigAliases, _InnerPredictor, - _log_warning) +from .basic import Booster, Dataset, LightGBMError, _choose_param_value, _ConfigAliases, _InnerPredictor, _log_warning from .compat import SKLEARN_INSTALLED, _LGBMGroupKFold, _LGBMStratifiedKFold _LGBM_CustomObjectiveFunction = Callable[ [np.ndarray, Dataset], - Tuple[_ArrayLike, _ArrayLike] + Tuple[np.ndarray, np.ndarray] ] _LGBM_CustomMetricFunction = Callable[ [np.ndarray, Dataset], @@ -56,30 +55,30 @@ def train( Should accept two parameters: preds, train_data, and return (grad, hess). - preds : numpy 1-D array + preds : numpy 1-D array or numpy 2-D array (for multi-class task) The predicted values. Predicted values are returned before any transformation, e.g. they are raw margin instead of probability of positive class for binary task. train_data : Dataset The training dataset. - grad : list, numpy 1-D array or pandas Series + grad : numpy 1-D array or numpy 2-D array (for multi-class task) The value of the first order derivative (gradient) of the loss with respect to the elements of preds for each sample point. - hess : list, numpy 1-D array or pandas Series + hess : numpy 1-D array or numpy 2-D array (for multi-class task) The value of the second order derivative (Hessian) of the loss with respect to the elements of preds for each sample point. - For multi-class task, the preds is group by class_id first, then group by row_id. - If you want to get i-th row preds in j-th class, the access way is score[j * num_data + i] - and you should group grad and hess in this way as well. + For multi-class task, preds are numpy 2-D array of shape = [n_samples, n_classes], + and grad and hess should be returned in the same format. feval : callable, list of callable, or None, optional (default=None) Customized evaluation function. Each evaluation function should accept two parameters: preds, eval_data, and return (eval_name, eval_result, is_higher_better) or list of such tuples. - preds : numpy 1-D array + preds : numpy 1-D array or numpy 2-D array (for multi-class task) The predicted values. + For multi-class task, preds are numpy 2-D array of shape = [n_samples, n_classes]. If ``fobj`` is specified, predicted values are returned before any transformation, e.g. they are raw margin instead of probability of positive class for binary task in this case. eval_data : Dataset @@ -91,8 +90,6 @@ def train( is_higher_better : bool Is eval result higher better, e.g. AUC is ``is_higher_better``. - For multi-class task, the preds is group by class_id first, then group by row_id. - If you want to get i-th row preds in j-th class, the access way is preds[j * num_data + i]. To ignore the default metric corresponding to the used objective, set the ``metric`` parameter to the string ``"None"`` in ``params``. init_model : str, pathlib.Path, Booster or None, optional (default=None) @@ -411,30 +408,30 @@ def cv(params, train_set, num_boost_round=100, Should accept two parameters: preds, train_data, and return (grad, hess). - preds : numpy 1-D array + preds : numpy 1-D array or numpy 2-D array (for multi-class task) The predicted values. Predicted values are returned before any transformation, e.g. they are raw margin instead of probability of positive class for binary task. train_data : Dataset The training dataset. - grad : list, numpy 1-D array or pandas Series + grad : numpy 1-D array or numpy 2-D array (for multi-class task) The value of the first order derivative (gradient) of the loss with respect to the elements of preds for each sample point. - hess : list, numpy 1-D array or pandas Series + hess : numpy 1-D array or numpy 2-D array (for multi-class task) The value of the second order derivative (Hessian) of the loss with respect to the elements of preds for each sample point. - For multi-class task, the preds is group by class_id first, then group by row_id. - If you want to get i-th row preds in j-th class, the access way is score[j * num_data + i] - and you should group grad and hess in this way as well. + For multi-class task, preds are numpy 2-D array of shape = [n_samples, n_classes], + and grad and hess should be returned in the same format. feval : callable, list of callable, or None, optional (default=None) Customized evaluation function. Each evaluation function should accept two parameters: preds, eval_data, and return (eval_name, eval_result, is_higher_better) or list of such tuples. - preds : numpy 1-D array + preds : numpy 1-D array or numpy 2-D array (for multi-class task) The predicted values. + For multi-class task, preds are numpy 2-D array of shape = [n_samples, n_classes]. If ``fobj`` is specified, predicted values are returned before any transformation, e.g. they are raw margin instead of probability of positive class for binary task in this case. eval_data : Dataset @@ -446,8 +443,6 @@ def cv(params, train_set, num_boost_round=100, is_higher_better : bool Is eval result higher better, e.g. AUC is ``is_higher_better``. - For multi-class task, the preds is group by class_id first, then group by row_id. - If you want to get i-th row preds in j-th class, the access way is preds[j * num_data + i]. To ignore the default metric corresponding to the used objective, set ``metrics`` to the string ``"None"``. init_model : str, pathlib.Path, Booster or None, optional (default=None) diff --git a/python-package/lightgbm/plotting.py b/python-package/lightgbm/plotting.py index eb625c8a1193..f7d35045d21f 100644 --- a/python-package/lightgbm/plotting.py +++ b/python-package/lightgbm/plotting.py @@ -556,7 +556,7 @@ def create_tree_digraph( - ``'internal_count'`` : number of records from the training data that fall into this non-leaf node - ``'internal_weight'`` : total weight of all nodes that fall into this non-leaf node - ``'leaf_count'`` : number of records from the training data that fall into this leaf node - - ``'leaf_weight'`` : total weight (sum of hessian) of all observations that fall into this leaf node + - ``'leaf_weight'`` : total weight (sum of Hessian) of all observations that fall into this leaf node - ``'data_percentage'`` : percentage of training data that fall into this node precision : int or None, optional (default=3) Used to restrict the display of floating point values to a certain precision. @@ -649,7 +649,7 @@ def plot_tree( - ``'internal_count'`` : number of records from the training data that fall into this non-leaf node - ``'internal_weight'`` : total weight of all nodes that fall into this non-leaf node - ``'leaf_count'`` : number of records from the training data that fall into this leaf node - - ``'leaf_weight'`` : total weight (sum of hessian) of all observations that fall into this leaf node + - ``'leaf_weight'`` : total weight (sum of Hessian) of all observations that fall into this leaf node - ``'data_percentage'`` : percentage of training data that fall into this node precision : int or None, optional (default=3) Used to restrict the display of floating point values to a certain precision. diff --git a/python-package/lightgbm/sklearn.py b/python-package/lightgbm/sklearn.py index 2d401fc526b8..77bf3ffedd1e 100644 --- a/python-package/lightgbm/sklearn.py +++ b/python-package/lightgbm/sklearn.py @@ -6,7 +6,7 @@ import numpy as np -from .basic import Booster, Dataset, LightGBMError, _ArrayLike, _choose_param_value, _ConfigAliases, _log_warning +from .basic import Booster, Dataset, LightGBMError, _choose_param_value, _ConfigAliases, _log_warning from .callback import record_evaluation from .compat import (SKLEARN_INSTALLED, LGBMNotFittedError, _LGBMAssertAllFinite, _LGBMCheckArray, _LGBMCheckClassificationTargets, _LGBMCheckSampleWeight, _LGBMCheckXY, _LGBMClassifierBase, @@ -19,11 +19,11 @@ _LGBM_ScikitCustomObjectiveFunction = Union[ Callable[ [np.ndarray, np.ndarray], - Tuple[_ArrayLike, _ArrayLike] + Tuple[np.ndarray, np.ndarray] ], Callable[ [np.ndarray, np.ndarray, np.ndarray], - Tuple[_ArrayLike, _ArrayLike] + Tuple[np.ndarray, np.ndarray] ], ] _LGBM_ScikitCustomEvalFunction = Union[ @@ -72,13 +72,13 @@ def __init__(self, func: _LGBM_ScikitCustomObjectiveFunction): grad : numpy 1-D array of shape = [n_samples] or numpy 2-D array of shape [n_samples, n_classes] (for multi-class task) The value of the first order derivative (gradient) of the loss with respect to the elements of y_pred for each sample point. - hess : numpy 1-D array of shape = [n_samples] or numpy 2-D array of shape [n_samples, n_classes] (for multi-class task) + hess : numpy 1-D array of shape = [n_samples] or numpy 2-D array of shape = [n_samples, n_classes] (for multi-class task) The value of the second order derivative (Hessian) of the loss with respect to the elements of y_pred for each sample point. .. note:: - For multi-class task, preds are a [n_samples, n_classes] numpy 2-D array, + For multi-class task, y_pred are numpy 2-D array of shape = [n_samples, n_classes], and grad and hess should be returned in the same format. """ self.func = func @@ -95,10 +95,10 @@ def __call__(self, preds, dataset): Returns ------- - grad : numpy 1-D array of shape = [n_samples] or numpy 2-D array of shape [n_samples, n_classes] (for multi-class task) + grad : numpy 1-D array of shape = [n_samples] or numpy 2-D array of shape = [n_samples, n_classes] (for multi-class task) The value of the first order derivative (gradient) of the loss with respect to the elements of preds for each sample point. - hess : numpy 1-D array of shape = [n_samples] or numpy 2-D array of shape [n_samples, n_classes] (for multi-class task) + hess : numpy 1-D array of shape = [n_samples] or numpy 2-D array of shape = [n_samples, n_classes] (for multi-class task) The value of the second order derivative (Hessian) of the loss with respect to the elements of preds for each sample point. """ @@ -162,11 +162,6 @@ def __init__(self, func: _LGBM_ScikitCustomEvalFunction): The eval result. is_higher_better : bool Is eval result higher better, e.g. AUC is ``is_higher_better``. - - .. note:: - - For multi-class task, preds are a [n_samples, n_classes] numpy 2-D array, - and grad and hess should be returned in the same format. """ self.func = func @@ -297,9 +292,6 @@ def __call__(self, preds, dataset): The eval result. is_higher_better : bool Is eval result higher better, e.g. AUC is ``is_higher_better``. - - For multi-class task, preds are a [n_samples, n_classes] numpy 2-D array, - and grad and hess should be returned in the same format. """ _lgbmmodel_doc_predict = ( @@ -415,7 +407,7 @@ def __init__( min_split_gain : float, optional (default=0.) Minimum loss reduction required to make a further partition on a leaf node of the tree. min_child_weight : float, optional (default=1e-3) - Minimum sum of instance weight (hessian) needed in a child (leaf). + Minimum sum of instance weight (Hessian) needed in a child (leaf). min_child_samples : int, optional (default=20) Minimum number of data needed in a child (leaf). subsample : float, optional (default=1.) @@ -473,7 +465,7 @@ def __init__( The value of the second order derivative (Hessian) of the loss with respect to the elements of y_pred for each sample point. - For multi-class task, preds are a [n_samples, n_classes] numpy 2-D array, + For multi-class task, y_pred are numpy 2-D array of shape = [n_samples, n_classes], and grad and hess should be returned in the same format. """ if not SKLEARN_INSTALLED: From b8e10387ea9fa9de9e6e6c4b805fbc42c0f0ea27 Mon Sep 17 00:00:00 2001 From: Nikita Titov Date: Thu, 24 Feb 2022 01:39:00 +0300 Subject: [PATCH 2/2] Apply suggestions from code review MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: José Morales --- python-package/lightgbm/sklearn.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python-package/lightgbm/sklearn.py b/python-package/lightgbm/sklearn.py index 77bf3ffedd1e..7ebba0bc962c 100644 --- a/python-package/lightgbm/sklearn.py +++ b/python-package/lightgbm/sklearn.py @@ -78,7 +78,7 @@ def __init__(self, func: _LGBM_ScikitCustomObjectiveFunction): .. note:: - For multi-class task, y_pred are numpy 2-D array of shape = [n_samples, n_classes], + For multi-class task, y_pred is a numpy 2-D array of shape = [n_samples, n_classes], and grad and hess should be returned in the same format. """ self.func = func @@ -465,7 +465,7 @@ def __init__( The value of the second order derivative (Hessian) of the loss with respect to the elements of y_pred for each sample point. - For multi-class task, y_pred are numpy 2-D array of shape = [n_samples, n_classes], + For multi-class task, y_pred is a numpy 2-D array of shape = [n_samples, n_classes], and grad and hess should be returned in the same format. """ if not SKLEARN_INSTALLED: