Skip to content

Commit

Permalink
[python] fixes for supporting 2d numpy arrays for predictions, grads …
Browse files Browse the repository at this point in the history
…and hess in multiclass custom objective and eval (#5030)

* fixes for supporting 2d numpy arrays for predictions, grads and hess in multiclass custom objective

* Apply suggestions from code review

Co-authored-by: José Morales <[email protected]>

Co-authored-by: José Morales <[email protected]>
  • Loading branch information
StrikerRUS and jmoralez authored Feb 26, 2022
1 parent 7e47804 commit 5631366
Show file tree
Hide file tree
Showing 4 changed files with 39 additions and 59 deletions.
31 changes: 12 additions & 19 deletions python-package/lightgbm/basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -2752,7 +2752,7 @@ def trees_to_dataframe(self):
- ``missing_direction`` : str, split direction that missing values should go to. ``None`` for leaf nodes.
- ``missing_type`` : str, describes what types of values are treated as missing.
- ``value`` : float64, predicted value for this leaf node, multiplied by the learning rate.
- ``weight`` : float64 or int64, sum of hessian (second-order derivative of objective), summed over observations that fall in this node.
- ``weight`` : float64 or int64, sum of Hessian (second-order derivative of objective), summed over observations that fall in this node.
- ``count`` : int64, number of records in the training data that fall into this node.
Returns
Expand Down Expand Up @@ -2961,7 +2961,7 @@ def update(self, train_set=None, fobj=None):
The value of the second order derivative (Hessian) of the loss
with respect to the elements of preds for each sample point.
For multi-class task, preds are a [n_samples, n_classes] numpy 2-D array,
For multi-class task, preds are numpy 2-D array of shape = [n_samples, n_classes],
and grad and hess should be returned in the same format.
Returns
Expand Down Expand Up @@ -3000,9 +3000,6 @@ def update(self, train_set=None, fobj=None):
if not self.__set_objective_to_none:
self.reset_parameter({"objective": "none"}).__set_objective_to_none = True
grad, hess = fobj(self.__inner_predict(0), self.train_set)
if self.num_model_per_iteration() > 1:
grad = grad.ravel(order='F')
hess = hess.ravel(order='F')
return self.__boost(grad, hess)

def __boost(self, grad, hess):
Expand All @@ -3012,7 +3009,7 @@ def __boost(self, grad, hess):
Score is returned before any transformation,
e.g. it is raw margin instead of probability of positive class for binary task.
For multi-class task, preds are a [n_samples, n_classes] numpy 2-D array,
For multi-class task, score are numpy 2-D array of shape = [n_samples, n_classes],
and grad and hess should be returned in the same format.
Parameters
Expand All @@ -3029,19 +3026,21 @@ def __boost(self, grad, hess):
is_finished : bool
Whether the boost was successfully finished.
"""
if self.__num_class > 1:
grad = grad.ravel(order='F')
hess = hess.ravel(order='F')
grad = list_to_1d_numpy(grad, name='gradient')
hess = list_to_1d_numpy(hess, name='hessian')
assert grad.flags.c_contiguous
assert hess.flags.c_contiguous
if len(grad) != len(hess):
raise ValueError(f"Lengths of gradient ({len(grad)}) and Hessian ({len(hess)}) don't match")
num_train_data = self.train_set.num_data()
num_models = self.__num_class
if len(grad) != num_train_data * num_models:
if len(grad) != num_train_data * self.__num_class:
raise ValueError(
f"Lengths of gradient ({len(grad)}) and Hessian ({len(hess)}) "
f"don't match training data length ({num_train_data}) * "
f"number of models per one iteration ({num_models})"
f"number of models per one iteration ({self.__num_class})"
)
is_finished = ctypes.c_int(0)
_safe_call(_LIB.LGBM_BoosterUpdateOneIterCustom(
Expand Down Expand Up @@ -3149,8 +3148,9 @@ def eval(self, data, name, feval=None):
Should accept two parameters: preds, eval_data,
and return (eval_name, eval_result, is_higher_better) or list of such tuples.
preds : numpy 1-D array
preds : numpy 1-D array or numpy 2-D array (for multi-class task)
The predicted values.
For multi-class task, preds are numpy 2-D array of shape = [n_samples, n_classes].
If ``fobj`` is specified, predicted values are returned before any transformation,
e.g. they are raw margin instead of probability of positive class for binary task in this case.
eval_data : Dataset
Expand All @@ -3162,9 +3162,6 @@ def eval(self, data, name, feval=None):
is_higher_better : bool
Is eval result higher better, e.g. AUC is ``is_higher_better``.
For multi-class task, preds are a [n_samples, n_classes] numpy 2-D array,
and grad and hess should be returned in the same format.
Returns
-------
result : list
Expand Down Expand Up @@ -3199,6 +3196,7 @@ def eval_train(self, feval=None):
preds : numpy 1-D array or numpy 2-D array (for multi-class task)
The predicted values.
For multi-class task, preds are numpy 2-D array of shape = [n_samples, n_classes].
If ``fobj`` is specified, predicted values are returned before any transformation,
e.g. they are raw margin instead of probability of positive class for binary task in this case.
eval_data : Dataset
Expand All @@ -3210,9 +3208,6 @@ def eval_train(self, feval=None):
is_higher_better : bool
Is eval result higher better, e.g. AUC is ``is_higher_better``.
For multi-class task, preds are a [n_samples, n_classes] numpy 2-D array,
and grad and hess should be returned in the same format.
Returns
-------
result : list
Expand All @@ -3232,6 +3227,7 @@ def eval_valid(self, feval=None):
preds : numpy 1-D array or numpy 2-D array (for multi-class task)
The predicted values.
For multi-class task, preds are numpy 2-D array of shape = [n_samples, n_classes].
If ``fobj`` is specified, predicted values are returned before any transformation,
e.g. they are raw margin instead of probability of positive class for binary task in this case.
eval_data : Dataset
Expand All @@ -3243,9 +3239,6 @@ def eval_valid(self, feval=None):
is_higher_better : bool
Is eval result higher better, e.g. AUC is ``is_higher_better``.
For multi-class task, preds are a [n_samples, n_classes] numpy 2-D array,
and grad and hess should be returned in the same format.
Returns
-------
result : list
Expand Down
37 changes: 16 additions & 21 deletions python-package/lightgbm/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,12 @@
import numpy as np

from . import callback
from .basic import (Booster, Dataset, LightGBMError, _ArrayLike, _choose_param_value, _ConfigAliases, _InnerPredictor,
_log_warning)
from .basic import Booster, Dataset, LightGBMError, _choose_param_value, _ConfigAliases, _InnerPredictor, _log_warning
from .compat import SKLEARN_INSTALLED, _LGBMGroupKFold, _LGBMStratifiedKFold

_LGBM_CustomObjectiveFunction = Callable[
[np.ndarray, Dataset],
Tuple[_ArrayLike, _ArrayLike]
Tuple[np.ndarray, np.ndarray]
]
_LGBM_CustomMetricFunction = Callable[
[np.ndarray, Dataset],
Expand Down Expand Up @@ -56,30 +55,30 @@ def train(
Should accept two parameters: preds, train_data,
and return (grad, hess).
preds : numpy 1-D array
preds : numpy 1-D array or numpy 2-D array (for multi-class task)
The predicted values.
Predicted values are returned before any transformation,
e.g. they are raw margin instead of probability of positive class for binary task.
train_data : Dataset
The training dataset.
grad : list, numpy 1-D array or pandas Series
grad : numpy 1-D array or numpy 2-D array (for multi-class task)
The value of the first order derivative (gradient) of the loss
with respect to the elements of preds for each sample point.
hess : list, numpy 1-D array or pandas Series
hess : numpy 1-D array or numpy 2-D array (for multi-class task)
The value of the second order derivative (Hessian) of the loss
with respect to the elements of preds for each sample point.
For multi-class task, the preds is group by class_id first, then group by row_id.
If you want to get i-th row preds in j-th class, the access way is score[j * num_data + i]
and you should group grad and hess in this way as well.
For multi-class task, preds are numpy 2-D array of shape = [n_samples, n_classes],
and grad and hess should be returned in the same format.
feval : callable, list of callable, or None, optional (default=None)
Customized evaluation function.
Each evaluation function should accept two parameters: preds, eval_data,
and return (eval_name, eval_result, is_higher_better) or list of such tuples.
preds : numpy 1-D array
preds : numpy 1-D array or numpy 2-D array (for multi-class task)
The predicted values.
For multi-class task, preds are numpy 2-D array of shape = [n_samples, n_classes].
If ``fobj`` is specified, predicted values are returned before any transformation,
e.g. they are raw margin instead of probability of positive class for binary task in this case.
eval_data : Dataset
Expand All @@ -91,8 +90,6 @@ def train(
is_higher_better : bool
Is eval result higher better, e.g. AUC is ``is_higher_better``.
For multi-class task, the preds is group by class_id first, then group by row_id.
If you want to get i-th row preds in j-th class, the access way is preds[j * num_data + i].
To ignore the default metric corresponding to the used objective,
set the ``metric`` parameter to the string ``"None"`` in ``params``.
init_model : str, pathlib.Path, Booster or None, optional (default=None)
Expand Down Expand Up @@ -411,30 +408,30 @@ def cv(params, train_set, num_boost_round=100,
Should accept two parameters: preds, train_data,
and return (grad, hess).
preds : numpy 1-D array
preds : numpy 1-D array or numpy 2-D array (for multi-class task)
The predicted values.
Predicted values are returned before any transformation,
e.g. they are raw margin instead of probability of positive class for binary task.
train_data : Dataset
The training dataset.
grad : list, numpy 1-D array or pandas Series
grad : numpy 1-D array or numpy 2-D array (for multi-class task)
The value of the first order derivative (gradient) of the loss
with respect to the elements of preds for each sample point.
hess : list, numpy 1-D array or pandas Series
hess : numpy 1-D array or numpy 2-D array (for multi-class task)
The value of the second order derivative (Hessian) of the loss
with respect to the elements of preds for each sample point.
For multi-class task, the preds is group by class_id first, then group by row_id.
If you want to get i-th row preds in j-th class, the access way is score[j * num_data + i]
and you should group grad and hess in this way as well.
For multi-class task, preds are numpy 2-D array of shape = [n_samples, n_classes],
and grad and hess should be returned in the same format.
feval : callable, list of callable, or None, optional (default=None)
Customized evaluation function.
Each evaluation function should accept two parameters: preds, eval_data,
and return (eval_name, eval_result, is_higher_better) or list of such tuples.
preds : numpy 1-D array
preds : numpy 1-D array or numpy 2-D array (for multi-class task)
The predicted values.
For multi-class task, preds are numpy 2-D array of shape = [n_samples, n_classes].
If ``fobj`` is specified, predicted values are returned before any transformation,
e.g. they are raw margin instead of probability of positive class for binary task in this case.
eval_data : Dataset
Expand All @@ -446,8 +443,6 @@ def cv(params, train_set, num_boost_round=100,
is_higher_better : bool
Is eval result higher better, e.g. AUC is ``is_higher_better``.
For multi-class task, the preds is group by class_id first, then group by row_id.
If you want to get i-th row preds in j-th class, the access way is preds[j * num_data + i].
To ignore the default metric corresponding to the used objective,
set ``metrics`` to the string ``"None"``.
init_model : str, pathlib.Path, Booster or None, optional (default=None)
Expand Down
4 changes: 2 additions & 2 deletions python-package/lightgbm/plotting.py
Original file line number Diff line number Diff line change
Expand Up @@ -556,7 +556,7 @@ def create_tree_digraph(
- ``'internal_count'`` : number of records from the training data that fall into this non-leaf node
- ``'internal_weight'`` : total weight of all nodes that fall into this non-leaf node
- ``'leaf_count'`` : number of records from the training data that fall into this leaf node
- ``'leaf_weight'`` : total weight (sum of hessian) of all observations that fall into this leaf node
- ``'leaf_weight'`` : total weight (sum of Hessian) of all observations that fall into this leaf node
- ``'data_percentage'`` : percentage of training data that fall into this node
precision : int or None, optional (default=3)
Used to restrict the display of floating point values to a certain precision.
Expand Down Expand Up @@ -649,7 +649,7 @@ def plot_tree(
- ``'internal_count'`` : number of records from the training data that fall into this non-leaf node
- ``'internal_weight'`` : total weight of all nodes that fall into this non-leaf node
- ``'leaf_count'`` : number of records from the training data that fall into this leaf node
- ``'leaf_weight'`` : total weight (sum of hessian) of all observations that fall into this leaf node
- ``'leaf_weight'`` : total weight (sum of Hessian) of all observations that fall into this leaf node
- ``'data_percentage'`` : percentage of training data that fall into this node
precision : int or None, optional (default=3)
Used to restrict the display of floating point values to a certain precision.
Expand Down
26 changes: 9 additions & 17 deletions python-package/lightgbm/sklearn.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

import numpy as np

from .basic import Booster, Dataset, LightGBMError, _ArrayLike, _choose_param_value, _ConfigAliases, _log_warning
from .basic import Booster, Dataset, LightGBMError, _choose_param_value, _ConfigAliases, _log_warning
from .callback import record_evaluation
from .compat import (SKLEARN_INSTALLED, LGBMNotFittedError, _LGBMAssertAllFinite, _LGBMCheckArray,
_LGBMCheckClassificationTargets, _LGBMCheckSampleWeight, _LGBMCheckXY, _LGBMClassifierBase,
Expand All @@ -19,11 +19,11 @@
_LGBM_ScikitCustomObjectiveFunction = Union[
Callable[
[np.ndarray, np.ndarray],
Tuple[_ArrayLike, _ArrayLike]
Tuple[np.ndarray, np.ndarray]
],
Callable[
[np.ndarray, np.ndarray, np.ndarray],
Tuple[_ArrayLike, _ArrayLike]
Tuple[np.ndarray, np.ndarray]
],
]
_LGBM_ScikitCustomEvalFunction = Union[
Expand Down Expand Up @@ -72,13 +72,13 @@ def __init__(self, func: _LGBM_ScikitCustomObjectiveFunction):
grad : numpy 1-D array of shape = [n_samples] or numpy 2-D array of shape [n_samples, n_classes] (for multi-class task)
The value of the first order derivative (gradient) of the loss
with respect to the elements of y_pred for each sample point.
hess : numpy 1-D array of shape = [n_samples] or numpy 2-D array of shape [n_samples, n_classes] (for multi-class task)
hess : numpy 1-D array of shape = [n_samples] or numpy 2-D array of shape = [n_samples, n_classes] (for multi-class task)
The value of the second order derivative (Hessian) of the loss
with respect to the elements of y_pred for each sample point.
.. note::
For multi-class task, preds are a [n_samples, n_classes] numpy 2-D array,
For multi-class task, y_pred is a numpy 2-D array of shape = [n_samples, n_classes],
and grad and hess should be returned in the same format.
"""
self.func = func
Expand All @@ -95,10 +95,10 @@ def __call__(self, preds, dataset):
Returns
-------
grad : numpy 1-D array of shape = [n_samples] or numpy 2-D array of shape [n_samples, n_classes] (for multi-class task)
grad : numpy 1-D array of shape = [n_samples] or numpy 2-D array of shape = [n_samples, n_classes] (for multi-class task)
The value of the first order derivative (gradient) of the loss
with respect to the elements of preds for each sample point.
hess : numpy 1-D array of shape = [n_samples] or numpy 2-D array of shape [n_samples, n_classes] (for multi-class task)
hess : numpy 1-D array of shape = [n_samples] or numpy 2-D array of shape = [n_samples, n_classes] (for multi-class task)
The value of the second order derivative (Hessian) of the loss
with respect to the elements of preds for each sample point.
"""
Expand Down Expand Up @@ -162,11 +162,6 @@ def __init__(self, func: _LGBM_ScikitCustomEvalFunction):
The eval result.
is_higher_better : bool
Is eval result higher better, e.g. AUC is ``is_higher_better``.
.. note::
For multi-class task, preds are a [n_samples, n_classes] numpy 2-D array,
and grad and hess should be returned in the same format.
"""
self.func = func

Expand Down Expand Up @@ -297,9 +292,6 @@ def __call__(self, preds, dataset):
The eval result.
is_higher_better : bool
Is eval result higher better, e.g. AUC is ``is_higher_better``.
For multi-class task, preds are a [n_samples, n_classes] numpy 2-D array,
and grad and hess should be returned in the same format.
"""

_lgbmmodel_doc_predict = (
Expand Down Expand Up @@ -415,7 +407,7 @@ def __init__(
min_split_gain : float, optional (default=0.)
Minimum loss reduction required to make a further partition on a leaf node of the tree.
min_child_weight : float, optional (default=1e-3)
Minimum sum of instance weight (hessian) needed in a child (leaf).
Minimum sum of instance weight (Hessian) needed in a child (leaf).
min_child_samples : int, optional (default=20)
Minimum number of data needed in a child (leaf).
subsample : float, optional (default=1.)
Expand Down Expand Up @@ -473,7 +465,7 @@ def __init__(
The value of the second order derivative (Hessian) of the loss
with respect to the elements of y_pred for each sample point.
For multi-class task, preds are a [n_samples, n_classes] numpy 2-D array,
For multi-class task, y_pred is a numpy 2-D array of shape = [n_samples, n_classes],
and grad and hess should be returned in the same format.
"""
if not SKLEARN_INSTALLED:
Expand Down

0 comments on commit 5631366

Please sign in to comment.