Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[python-package][R-package] allow using feature names when retrieving number of bins #5116

Merged
merged 10 commits into from
May 17, 2022
6 changes: 6 additions & 0 deletions R-package/R/lgb.Dataset.R
Original file line number Diff line number Diff line change
Expand Up @@ -381,6 +381,12 @@ Dataset <- R6::R6Class(
if (lgb.is.null.handle(x = private$handle)) {
stop("Cannot get number of bins in feature before constructing Dataset.")
}
if (is.character(feature)) {
feature <- which(colnames(self) == feature)
if (length(feature) == 0L) {
stop("feature not found")
}
jameslamb marked this conversation as resolved.
Show resolved Hide resolved
}
num_bin <- integer(1L)
.Call(
LGBM_DatasetGetFeatureNumBin_R
Expand Down
2 changes: 2 additions & 0 deletions R-package/tests/testthat/test_dataset.R
Original file line number Diff line number Diff line change
Expand Up @@ -547,4 +547,6 @@ test_that("lgb.Dataset$get_feature_num_bin() works", {
)
actual_num_bins <- sapply(1L:5L, ds$get_feature_num_bin)
expect_identical(actual_num_bins, expected_num_bins)
bins_by_name <- sapply(colnames(raw_mat), ds$get_feature_num_bin)
expect_identical(unname(bins_by_name), expected_num_bins)
})
8 changes: 5 additions & 3 deletions python-package/lightgbm/basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -2386,20 +2386,22 @@ def num_feature(self):
else:
raise LightGBMError("Cannot get num_feature before construct dataset")

def feature_num_bin(self, feature: int) -> int:
def feature_num_bin(self, feature: Union[int, str]) -> int:
"""Get the number of bins for a feature.

Parameters
----------
feature : int
Index of the feature.
feature : int or str
Index or name of the feature.

Returns
-------
number_of_bins : int
The number of constructed bins for the feature in the Dataset.
"""
if self.handle is not None:
if isinstance(feature, str):
feature = self.feature_name.index(feature)
jmoralez marked this conversation as resolved.
Show resolved Hide resolved
ret = ctypes.c_int(0)
_safe_call(_LIB.LGBM_DatasetGetFeatureNumBin(self.handle,
ctypes.c_int(feature),
Expand Down
6 changes: 5 additions & 1 deletion tests/python_package_test/test_basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -634,7 +634,9 @@ def test_feature_num_bin(min_data_in_bin):
np.array([1, 2] * 49 + 2 * [np.nan]),
np.zeros(100),
]).T
ds = lgb.Dataset(X, params={'min_data_in_bin': min_data_in_bin}).construct()
feature_name = [f'x{i}' for i in range(X.shape[1])]
ds = lgb.Dataset(X, params={'min_data_in_bin': min_data_in_bin}, feature_name=feature_name)
ds.construct()
expected_num_bins = [
100 // min_data_in_bin + 1, # extra bin for zero
3, # 0, 1, 2
Expand All @@ -644,6 +646,8 @@ def test_feature_num_bin(min_data_in_bin):
]
actual_num_bins = [ds.feature_num_bin(i) for i in range(X.shape[1])]
assert actual_num_bins == expected_num_bins
bins_by_name = [ds.feature_num_bin(name) for name in feature_name]
assert bins_by_name == expected_num_bins


def test_feature_num_bin_with_max_bin_by_feature():
Expand Down