Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[python] fixes for supporting 2d numpy arrays for predictions, grads and hess in multiclass custom objective and eval #5030

Merged
merged 2 commits into from
Feb 26, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 12 additions & 19 deletions python-package/lightgbm/basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -2751,7 +2751,7 @@ def trees_to_dataframe(self):
- ``missing_direction`` : str, split direction that missing values should go to. ``None`` for leaf nodes.
- ``missing_type`` : str, describes what types of values are treated as missing.
- ``value`` : float64, predicted value for this leaf node, multiplied by the learning rate.
- ``weight`` : float64 or int64, sum of hessian (second-order derivative of objective), summed over observations that fall in this node.
- ``weight`` : float64 or int64, sum of Hessian (second-order derivative of objective), summed over observations that fall in this node.
- ``count`` : int64, number of records in the training data that fall into this node.

Returns
Expand Down Expand Up @@ -2960,7 +2960,7 @@ def update(self, train_set=None, fobj=None):
The value of the second order derivative (Hessian) of the loss
with respect to the elements of preds for each sample point.

For multi-class task, preds are a [n_samples, n_classes] numpy 2-D array,
For multi-class task, preds are numpy 2-D array of shape = [n_samples, n_classes],
and grad and hess should be returned in the same format.

Returns
Expand Down Expand Up @@ -2999,9 +2999,6 @@ def update(self, train_set=None, fobj=None):
if not self.__set_objective_to_none:
self.reset_parameter({"objective": "none"}).__set_objective_to_none = True
grad, hess = fobj(self.__inner_predict(0), self.train_set)
if self.num_model_per_iteration() > 1:
grad = grad.ravel(order='F')
hess = hess.ravel(order='F')
return self.__boost(grad, hess)

def __boost(self, grad, hess):
Expand All @@ -3011,7 +3008,7 @@ def __boost(self, grad, hess):

Score is returned before any transformation,
e.g. it is raw margin instead of probability of positive class for binary task.
For multi-class task, preds are a [n_samples, n_classes] numpy 2-D array,
For multi-class task, score are numpy 2-D array of shape = [n_samples, n_classes],
and grad and hess should be returned in the same format.

Parameters
Expand All @@ -3028,19 +3025,21 @@ def __boost(self, grad, hess):
is_finished : bool
Whether the boost was successfully finished.
"""
if self.__num_class > 1:
grad = grad.ravel(order='F')
hess = hess.ravel(order='F')
grad = list_to_1d_numpy(grad, name='gradient')
hess = list_to_1d_numpy(hess, name='hessian')
assert grad.flags.c_contiguous
assert hess.flags.c_contiguous
if len(grad) != len(hess):
raise ValueError(f"Lengths of gradient ({len(grad)}) and Hessian ({len(hess)}) don't match")
num_train_data = self.train_set.num_data()
num_models = self.__num_class
if len(grad) != num_train_data * num_models:
if len(grad) != num_train_data * self.__num_class:
raise ValueError(
f"Lengths of gradient ({len(grad)}) and Hessian ({len(hess)}) "
f"don't match training data length ({num_train_data}) * "
f"number of models per one iteration ({num_models})"
f"number of models per one iteration ({self.__num_class})"
)
is_finished = ctypes.c_int(0)
_safe_call(_LIB.LGBM_BoosterUpdateOneIterCustom(
Expand Down Expand Up @@ -3148,8 +3147,9 @@ def eval(self, data, name, feval=None):
Should accept two parameters: preds, eval_data,
and return (eval_name, eval_result, is_higher_better) or list of such tuples.

preds : numpy 1-D array
preds : numpy 1-D array or numpy 2-D array (for multi-class task)
The predicted values.
For multi-class task, preds are numpy 2-D array of shape = [n_samples, n_classes].
If ``fobj`` is specified, predicted values are returned before any transformation,
e.g. they are raw margin instead of probability of positive class for binary task in this case.
eval_data : Dataset
Expand All @@ -3161,9 +3161,6 @@ def eval(self, data, name, feval=None):
is_higher_better : bool
Is eval result higher better, e.g. AUC is ``is_higher_better``.

For multi-class task, preds are a [n_samples, n_classes] numpy 2-D array,
and grad and hess should be returned in the same format.

Returns
-------
result : list
Expand Down Expand Up @@ -3198,6 +3195,7 @@ def eval_train(self, feval=None):

preds : numpy 1-D array or numpy 2-D array (for multi-class task)
The predicted values.
For multi-class task, preds are numpy 2-D array of shape = [n_samples, n_classes].
If ``fobj`` is specified, predicted values are returned before any transformation,
e.g. they are raw margin instead of probability of positive class for binary task in this case.
eval_data : Dataset
Expand All @@ -3209,9 +3207,6 @@ def eval_train(self, feval=None):
is_higher_better : bool
Is eval result higher better, e.g. AUC is ``is_higher_better``.

For multi-class task, preds are a [n_samples, n_classes] numpy 2-D array,
and grad and hess should be returned in the same format.

Returns
-------
result : list
Expand All @@ -3231,6 +3226,7 @@ def eval_valid(self, feval=None):

preds : numpy 1-D array or numpy 2-D array (for multi-class task)
The predicted values.
For multi-class task, preds are numpy 2-D array of shape = [n_samples, n_classes].
If ``fobj`` is specified, predicted values are returned before any transformation,
e.g. they are raw margin instead of probability of positive class for binary task in this case.
eval_data : Dataset
Expand All @@ -3242,9 +3238,6 @@ def eval_valid(self, feval=None):
is_higher_better : bool
Is eval result higher better, e.g. AUC is ``is_higher_better``.

For multi-class task, preds are a [n_samples, n_classes] numpy 2-D array,
and grad and hess should be returned in the same format.

Returns
-------
result : list
Expand Down
37 changes: 16 additions & 21 deletions python-package/lightgbm/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,12 @@
import numpy as np

from . import callback
from .basic import (Booster, Dataset, LightGBMError, _ArrayLike, _choose_param_value, _ConfigAliases, _InnerPredictor,
_log_warning)
from .basic import Booster, Dataset, LightGBMError, _choose_param_value, _ConfigAliases, _InnerPredictor, _log_warning
from .compat import SKLEARN_INSTALLED, _LGBMGroupKFold, _LGBMStratifiedKFold

_LGBM_CustomObjectiveFunction = Callable[
[np.ndarray, Dataset],
Tuple[_ArrayLike, _ArrayLike]
Tuple[np.ndarray, np.ndarray]
]
_LGBM_CustomMetricFunction = Callable[
[np.ndarray, Dataset],
Expand Down Expand Up @@ -56,30 +55,30 @@ def train(
Should accept two parameters: preds, train_data,
and return (grad, hess).

preds : numpy 1-D array
preds : numpy 1-D array or numpy 2-D array (for multi-class task)
The predicted values.
Predicted values are returned before any transformation,
e.g. they are raw margin instead of probability of positive class for binary task.
train_data : Dataset
The training dataset.
grad : list, numpy 1-D array or pandas Series
grad : numpy 1-D array or numpy 2-D array (for multi-class task)
The value of the first order derivative (gradient) of the loss
with respect to the elements of preds for each sample point.
hess : list, numpy 1-D array or pandas Series
hess : numpy 1-D array or numpy 2-D array (for multi-class task)
The value of the second order derivative (Hessian) of the loss
with respect to the elements of preds for each sample point.

For multi-class task, the preds is group by class_id first, then group by row_id.
If you want to get i-th row preds in j-th class, the access way is score[j * num_data + i]
and you should group grad and hess in this way as well.
For multi-class task, preds are numpy 2-D array of shape = [n_samples, n_classes],
and grad and hess should be returned in the same format.

feval : callable, list of callable, or None, optional (default=None)
Customized evaluation function.
Each evaluation function should accept two parameters: preds, eval_data,
and return (eval_name, eval_result, is_higher_better) or list of such tuples.

preds : numpy 1-D array
preds : numpy 1-D array or numpy 2-D array (for multi-class task)
The predicted values.
For multi-class task, preds are numpy 2-D array of shape = [n_samples, n_classes].
If ``fobj`` is specified, predicted values are returned before any transformation,
e.g. they are raw margin instead of probability of positive class for binary task in this case.
eval_data : Dataset
Expand All @@ -91,8 +90,6 @@ def train(
is_higher_better : bool
Is eval result higher better, e.g. AUC is ``is_higher_better``.

For multi-class task, the preds is group by class_id first, then group by row_id.
If you want to get i-th row preds in j-th class, the access way is preds[j * num_data + i].
To ignore the default metric corresponding to the used objective,
set the ``metric`` parameter to the string ``"None"`` in ``params``.
init_model : str, pathlib.Path, Booster or None, optional (default=None)
Expand Down Expand Up @@ -411,30 +408,30 @@ def cv(params, train_set, num_boost_round=100,
Should accept two parameters: preds, train_data,
and return (grad, hess).

preds : numpy 1-D array
preds : numpy 1-D array or numpy 2-D array (for multi-class task)
The predicted values.
Predicted values are returned before any transformation,
e.g. they are raw margin instead of probability of positive class for binary task.
train_data : Dataset
The training dataset.
grad : list, numpy 1-D array or pandas Series
grad : numpy 1-D array or numpy 2-D array (for multi-class task)
The value of the first order derivative (gradient) of the loss
with respect to the elements of preds for each sample point.
hess : list, numpy 1-D array or pandas Series
hess : numpy 1-D array or numpy 2-D array (for multi-class task)
The value of the second order derivative (Hessian) of the loss
with respect to the elements of preds for each sample point.

For multi-class task, the preds is group by class_id first, then group by row_id.
If you want to get i-th row preds in j-th class, the access way is score[j * num_data + i]
and you should group grad and hess in this way as well.
For multi-class task, preds are numpy 2-D array of shape = [n_samples, n_classes],
and grad and hess should be returned in the same format.

feval : callable, list of callable, or None, optional (default=None)
Customized evaluation function.
Each evaluation function should accept two parameters: preds, eval_data,
and return (eval_name, eval_result, is_higher_better) or list of such tuples.

preds : numpy 1-D array
preds : numpy 1-D array or numpy 2-D array (for multi-class task)
The predicted values.
For multi-class task, preds are numpy 2-D array of shape = [n_samples, n_classes].
If ``fobj`` is specified, predicted values are returned before any transformation,
e.g. they are raw margin instead of probability of positive class for binary task in this case.
eval_data : Dataset
Expand All @@ -446,8 +443,6 @@ def cv(params, train_set, num_boost_round=100,
is_higher_better : bool
Is eval result higher better, e.g. AUC is ``is_higher_better``.

For multi-class task, the preds is group by class_id first, then group by row_id.
If you want to get i-th row preds in j-th class, the access way is preds[j * num_data + i].
To ignore the default metric corresponding to the used objective,
set ``metrics`` to the string ``"None"``.
init_model : str, pathlib.Path, Booster or None, optional (default=None)
Expand Down
4 changes: 2 additions & 2 deletions python-package/lightgbm/plotting.py
Original file line number Diff line number Diff line change
Expand Up @@ -556,7 +556,7 @@ def create_tree_digraph(
- ``'internal_count'`` : number of records from the training data that fall into this non-leaf node
- ``'internal_weight'`` : total weight of all nodes that fall into this non-leaf node
- ``'leaf_count'`` : number of records from the training data that fall into this leaf node
- ``'leaf_weight'`` : total weight (sum of hessian) of all observations that fall into this leaf node
- ``'leaf_weight'`` : total weight (sum of Hessian) of all observations that fall into this leaf node
- ``'data_percentage'`` : percentage of training data that fall into this node
precision : int or None, optional (default=3)
Used to restrict the display of floating point values to a certain precision.
Expand Down Expand Up @@ -649,7 +649,7 @@ def plot_tree(
- ``'internal_count'`` : number of records from the training data that fall into this non-leaf node
- ``'internal_weight'`` : total weight of all nodes that fall into this non-leaf node
- ``'leaf_count'`` : number of records from the training data that fall into this leaf node
- ``'leaf_weight'`` : total weight (sum of hessian) of all observations that fall into this leaf node
- ``'leaf_weight'`` : total weight (sum of Hessian) of all observations that fall into this leaf node
- ``'data_percentage'`` : percentage of training data that fall into this node
precision : int or None, optional (default=3)
Used to restrict the display of floating point values to a certain precision.
Expand Down
26 changes: 9 additions & 17 deletions python-package/lightgbm/sklearn.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

import numpy as np

from .basic import Booster, Dataset, LightGBMError, _ArrayLike, _choose_param_value, _ConfigAliases, _log_warning
from .basic import Booster, Dataset, LightGBMError, _choose_param_value, _ConfigAliases, _log_warning
from .callback import record_evaluation
from .compat import (SKLEARN_INSTALLED, LGBMNotFittedError, _LGBMAssertAllFinite, _LGBMCheckArray,
_LGBMCheckClassificationTargets, _LGBMCheckSampleWeight, _LGBMCheckXY, _LGBMClassifierBase,
Expand All @@ -19,11 +19,11 @@
_LGBM_ScikitCustomObjectiveFunction = Union[
Callable[
[np.ndarray, np.ndarray],
Tuple[_ArrayLike, _ArrayLike]
Tuple[np.ndarray, np.ndarray]
],
Callable[
[np.ndarray, np.ndarray, np.ndarray],
Tuple[_ArrayLike, _ArrayLike]
Tuple[np.ndarray, np.ndarray]
],
]
_LGBM_ScikitCustomEvalFunction = Union[
Expand Down Expand Up @@ -72,13 +72,13 @@ def __init__(self, func: _LGBM_ScikitCustomObjectiveFunction):
grad : numpy 1-D array of shape = [n_samples] or numpy 2-D array of shape [n_samples, n_classes] (for multi-class task)
The value of the first order derivative (gradient) of the loss
with respect to the elements of y_pred for each sample point.
hess : numpy 1-D array of shape = [n_samples] or numpy 2-D array of shape [n_samples, n_classes] (for multi-class task)
hess : numpy 1-D array of shape = [n_samples] or numpy 2-D array of shape = [n_samples, n_classes] (for multi-class task)
The value of the second order derivative (Hessian) of the loss
with respect to the elements of y_pred for each sample point.

.. note::

For multi-class task, preds are a [n_samples, n_classes] numpy 2-D array,
For multi-class task, y_pred is a numpy 2-D array of shape = [n_samples, n_classes],
and grad and hess should be returned in the same format.
"""
self.func = func
Expand All @@ -95,10 +95,10 @@ def __call__(self, preds, dataset):

Returns
-------
grad : numpy 1-D array of shape = [n_samples] or numpy 2-D array of shape [n_samples, n_classes] (for multi-class task)
grad : numpy 1-D array of shape = [n_samples] or numpy 2-D array of shape = [n_samples, n_classes] (for multi-class task)
The value of the first order derivative (gradient) of the loss
with respect to the elements of preds for each sample point.
hess : numpy 1-D array of shape = [n_samples] or numpy 2-D array of shape [n_samples, n_classes] (for multi-class task)
hess : numpy 1-D array of shape = [n_samples] or numpy 2-D array of shape = [n_samples, n_classes] (for multi-class task)
The value of the second order derivative (Hessian) of the loss
with respect to the elements of preds for each sample point.
"""
Expand Down Expand Up @@ -162,11 +162,6 @@ def __init__(self, func: _LGBM_ScikitCustomEvalFunction):
The eval result.
is_higher_better : bool
Is eval result higher better, e.g. AUC is ``is_higher_better``.

.. note::

For multi-class task, preds are a [n_samples, n_classes] numpy 2-D array,
and grad and hess should be returned in the same format.
"""
self.func = func

Expand Down Expand Up @@ -297,9 +292,6 @@ def __call__(self, preds, dataset):
The eval result.
is_higher_better : bool
Is eval result higher better, e.g. AUC is ``is_higher_better``.

For multi-class task, preds are a [n_samples, n_classes] numpy 2-D array,
and grad and hess should be returned in the same format.
"""

_lgbmmodel_doc_predict = (
Expand Down Expand Up @@ -415,7 +407,7 @@ def __init__(
min_split_gain : float, optional (default=0.)
Minimum loss reduction required to make a further partition on a leaf node of the tree.
min_child_weight : float, optional (default=1e-3)
Minimum sum of instance weight (hessian) needed in a child (leaf).
Minimum sum of instance weight (Hessian) needed in a child (leaf).
min_child_samples : int, optional (default=20)
Minimum number of data needed in a child (leaf).
subsample : float, optional (default=1.)
Expand Down Expand Up @@ -473,7 +465,7 @@ def __init__(
The value of the second order derivative (Hessian) of the loss
with respect to the elements of y_pred for each sample point.

For multi-class task, preds are a [n_samples, n_classes] numpy 2-D array,
For multi-class task, y_pred is a numpy 2-D array of shape = [n_samples, n_classes],
and grad and hess should be returned in the same format.
"""
if not SKLEARN_INSTALLED:
Expand Down