From 057ba0780149361dbb35766a22463f5f29a96348 Mon Sep 17 00:00:00 2001 From: Nikita Titov Date: Thu, 17 Feb 2022 01:28:54 +0300 Subject: [PATCH] [docs] document rounding behavior of floating point numbers in categorical features (#5009) --- docs/Advanced-Topics.rst | 1 + python-package/lightgbm/basic.py | 2 ++ python-package/lightgbm/engine.py | 2 ++ python-package/lightgbm/sklearn.py | 1 + 4 files changed, 6 insertions(+) diff --git a/docs/Advanced-Topics.rst b/docs/Advanced-Topics.rst index 8ef239d22de4..b2ccc99fe656 100644 --- a/docs/Advanced-Topics.rst +++ b/docs/Advanced-Topics.rst @@ -25,6 +25,7 @@ Categorical Feature Support - Categorical features must be encoded as non-negative integers (``int``) less than ``Int32.MaxValue`` (2147483647). It is best to use a contiguous range of integers started from zero. + Floating point numbers in categorical features will be rounded towards 0. - Use ``min_data_per_group``, ``cat_smooth`` to deal with over-fitting (when ``#data`` is small or ``#category`` is large). diff --git a/python-package/lightgbm/basic.py b/python-package/lightgbm/basic.py index b3c6788fef57..7eff32e1aa99 100644 --- a/python-package/lightgbm/basic.py +++ b/python-package/lightgbm/basic.py @@ -1159,6 +1159,7 @@ def __init__(self, data, label=None, reference=None, Large values could be memory consuming. Consider using consecutive integers starting from zero. All negative values in categorical features will be treated as missing values. The output cannot be monotonically constrained with respect to a categorical feature. + Floating point numbers in categorical features will be rounded towards 0. params : dict or None, optional (default=None) Other parameters for Dataset. free_raw_data : bool, optional (default=True) @@ -3563,6 +3564,7 @@ def refit( Large values could be memory consuming. Consider using consecutive integers starting from zero. All negative values in categorical features will be treated as missing values. The output cannot be monotonically constrained with respect to a categorical feature. + Floating point numbers in categorical features will be rounded towards 0. dataset_params : dict or None, optional (default=None) Other parameters for Dataset ``data``. free_raw_data : bool, optional (default=True) diff --git a/python-package/lightgbm/engine.py b/python-package/lightgbm/engine.py index 0d3c4b58543f..e0f7ebb59653 100644 --- a/python-package/lightgbm/engine.py +++ b/python-package/lightgbm/engine.py @@ -109,6 +109,7 @@ def train( Large values could be memory consuming. Consider using consecutive integers starting from zero. All negative values in categorical features will be treated as missing values. The output cannot be monotonically constrained with respect to a categorical feature. + Floating point numbers in categorical features will be rounded towards 0. keep_training_booster : bool, optional (default=False) Whether the returned Booster will be used to keep training. If False, the returned value will be converted into _InnerPredictor before returning. @@ -463,6 +464,7 @@ def cv(params, train_set, num_boost_round=100, Large values could be memory consuming. Consider using consecutive integers starting from zero. All negative values in categorical features will be treated as missing values. The output cannot be monotonically constrained with respect to a categorical feature. + Floating point numbers in categorical features will be rounded towards 0. fpreproc : callable or None, optional (default=None) Preprocessing function that takes (dtrain, dtest, params) and returns transformed versions of those. diff --git a/python-package/lightgbm/sklearn.py b/python-package/lightgbm/sklearn.py index fa1769897736..800bfcb5079e 100644 --- a/python-package/lightgbm/sklearn.py +++ b/python-package/lightgbm/sklearn.py @@ -262,6 +262,7 @@ def __call__(self, preds, dataset): Large values could be memory consuming. Consider using consecutive integers starting from zero. All negative values in categorical features will be treated as missing values. The output cannot be monotonically constrained with respect to a categorical feature. + Floating point numbers in categorical features will be rounded towards 0. callbacks : list of callable, or None, optional (default=None) List of callback functions that are applied at each iteration. See Callbacks in Python API for more information.