Skip to content

Commit

Permalink
[fix] fix duplicate added initial scores for single-leaf trees (#fixes
Browse files Browse the repository at this point in the history
…#4708)

* fix duplicate added initial scores for single-leaf trees

* add test case

* Fix import in Python test

* commit python suggestions

Co-authored-by: Nikita Titov <[email protected]>
  • Loading branch information
shiyu1994 and StrikerRUS authored Mar 9, 2022
1 parent 3032b64 commit f6d654b
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 12 deletions.
19 changes: 7 additions & 12 deletions src/boosting/gbdt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -419,20 +419,15 @@ bool GBDT::TrainOneIter(const score_t* gradients, const score_t* hessians) {
} else {
// only add default score one-time
if (models_.size() < static_cast<size_t>(num_tree_per_iteration_)) {
double output = 0.0;
if (!class_need_train_[cur_tree_id]) {
if (objective_function_ != nullptr) {
output = objective_function_->BoostFromScore(cur_tree_id);
if (objective_function_ != nullptr && !config_->boost_from_average && !train_score_updater_->has_init_score()) {
init_scores[cur_tree_id] = ObtainAutomaticInitialScore(objective_function_, cur_tree_id);
// updates scores
train_score_updater_->AddScore(init_scores[cur_tree_id], cur_tree_id);
for (auto& score_updater : valid_score_updater_) {
score_updater->AddScore(init_scores[cur_tree_id], cur_tree_id);
}
} else {
output = init_scores[cur_tree_id];
}
new_tree->AsConstantTree(output);
// updates scores
train_score_updater_->AddScore(output, cur_tree_id);
for (auto& score_updater : valid_score_updater_) {
score_updater->AddScore(output, cur_tree_id);
}
new_tree->AsConstantTree(init_scores[cur_tree_id]);
}
}
// add model
Expand Down
28 changes: 28 additions & 0 deletions tests/python_package_test/test_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -3424,3 +3424,31 @@ def test_pandas_nullable_dtypes():

# test equal predictions
np.testing.assert_allclose(preds, preds_nullable_dtypes)


def test_boost_from_average_with_single_leaf_trees():
# test data are taken from bug report
# https://github.com/microsoft/LightGBM/issues/4708
X = np.array([
[1021.0589, 1018.9578],
[1023.85754, 1018.7854],
[1024.5468, 1018.88513],
[1019.02954, 1018.88513],
[1016.79926, 1018.88513],
[1007.6, 1018.88513]], dtype=np.float32)
y = np.array([1023.8, 1024.6, 1024.4, 1023.8, 1022.0, 1014.4], dtype=np.float32)
params = {
"extra_trees": True,
"min_data_in_bin": 1,
"extra_seed": 7,
"objective": "regression",
"verbose": -1,
"boost_from_average": True,
"min_data_in_leaf": 1,
}
train_set = lgb.Dataset(X, y)
model = lgb.train(params=params, train_set=train_set, num_boost_round=10)

preds = model.predict(X)
mean_preds = np.mean(preds)
assert y.min() <= mean_preds <= y.max()

0 comments on commit f6d654b

Please sign in to comment.