Skip to content

Commit

Permalink
[R-package] move more finalizer logic into C++ side to address memory…
Browse files Browse the repository at this point in the history
… leaks (#4353)

* [R-package] move more finalizer logic intoo C++ side

* add C finalizers

* use gc()

* put skip() back

* Update R-package/tests/testthat/test_lgb.Booster.R

Co-authored-by: Nikita Titov <[email protected]>

Co-authored-by: Nikita Titov <[email protected]>
  • Loading branch information
jameslamb and StrikerRUS authored Jun 13, 2021
1 parent f0bca1a commit 53ffba7
Show file tree
Hide file tree
Showing 7 changed files with 96 additions and 32 deletions.
19 changes: 5 additions & 14 deletions R-package/R/lgb.Booster.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,21 +11,12 @@ Booster <- R6::R6Class(

# Finalize will free up the handles
finalize = function() {

# Check the need for freeing handle
if (!lgb.is.null.handle(x = private$handle)) {

# Freeing up handle
.Call(
LGBM_BoosterFree_R
, private$handle
)
private$handle <- NULL

}

.Call(
LGBM_BoosterFree_R
, private$handle
)
private$handle <- NULL
return(invisible(NULL))

},

# Initialize will create a starter booster
Expand Down
19 changes: 5 additions & 14 deletions R-package/R/lgb.Dataset.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,21 +8,12 @@ Dataset <- R6::R6Class(

# Finalize will free up the handles
finalize = function() {

# Check the need for freeing handle
if (!lgb.is.null.handle(x = private$handle)) {

# Freeing up handle
.Call(
LGBM_DatasetFree_R
, private$handle
)
private$handle <- NULL

}

.Call(
LGBM_DatasetFree_R
, private$handle
)
private$handle <- NULL
return(invisible(NULL))

},

# Initialize will create a starter dataset
Expand Down
3 changes: 1 addition & 2 deletions R-package/R/lgb.Predictor.R
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,8 @@ Predictor <- R6::R6Class(
finalize = function() {

# Check the need for freeing handle
if (private$need_free_handle && !lgb.is.null.handle(x = private$handle)) {
if (private$need_free_handle) {

# Freeing up handle
.Call(
LGBM_BoosterFree_R
, private$handle
Expand Down
19 changes: 17 additions & 2 deletions R-package/src/lightgbm_R.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,10 @@ SEXP LGBM_HandleIsNull_R(SEXP handle) {
return Rf_ScalarLogical(R_ExternalPtrAddr(handle) == NULL);
}

void _DatasetFinalizer(SEXP handle) {
LGBM_DatasetFree_R(handle);
}

SEXP LGBM_DatasetCreateFromFile_R(SEXP filename,
SEXP parameters,
SEXP reference) {
Expand All @@ -59,6 +63,7 @@ SEXP LGBM_DatasetCreateFromFile_R(SEXP filename,
CHECK_CALL(LGBM_DatasetCreateFromFile(CHAR(Rf_asChar(filename)), CHAR(Rf_asChar(parameters)),
ref, &handle));
ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue));
R_RegisterCFinalizerEx(ret, _DatasetFinalizer, TRUE);
UNPROTECT(1);
return ret;
R_API_END();
Expand Down Expand Up @@ -90,6 +95,7 @@ SEXP LGBM_DatasetCreateFromCSC_R(SEXP indptr,
p_data, C_API_DTYPE_FLOAT64, nindptr, ndata,
nrow, CHAR(Rf_asChar(parameters)), ref, &handle));
ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue));
R_RegisterCFinalizerEx(ret, _DatasetFinalizer, TRUE);
UNPROTECT(1);
return ret;
R_API_END();
Expand All @@ -113,6 +119,7 @@ SEXP LGBM_DatasetCreateFromMat_R(SEXP data,
CHECK_CALL(LGBM_DatasetCreateFromMat(p_mat, C_API_DTYPE_FLOAT64, nrow, ncol, COL_MAJOR,
CHAR(Rf_asChar(parameters)), ref, &handle));
ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue));
R_RegisterCFinalizerEx(ret, _DatasetFinalizer, TRUE);
UNPROTECT(1);
return ret;
R_API_END();
Expand All @@ -136,6 +143,7 @@ SEXP LGBM_DatasetGetSubset_R(SEXP handle,
idxvec.data(), len, CHAR(Rf_asChar(parameters)),
&res));
ret = PROTECT(R_MakeExternalPtr(res, R_NilValue, R_NilValue));
R_RegisterCFinalizerEx(ret, _DatasetFinalizer, TRUE);
UNPROTECT(1);
return ret;
R_API_END();
Expand Down Expand Up @@ -211,7 +219,7 @@ SEXP LGBM_DatasetSaveBinary_R(SEXP handle,

SEXP LGBM_DatasetFree_R(SEXP handle) {
R_API_BEGIN();
if (R_ExternalPtrAddr(handle)) {
if (!Rf_isNull(handle) && R_ExternalPtrAddr(handle)) {
CHECK_CALL(LGBM_DatasetFree(R_ExternalPtrAddr(handle)));
R_ClearExternalPtr(handle);
}
Expand Down Expand Up @@ -320,9 +328,13 @@ SEXP LGBM_DatasetGetNumFeature_R(SEXP handle,

// --- start Booster interfaces

void _BoosterFinalizer(SEXP handle) {
LGBM_BoosterFree_R(handle);
}

SEXP LGBM_BoosterFree_R(SEXP handle) {
R_API_BEGIN();
if (R_ExternalPtrAddr(handle)) {
if (!Rf_isNull(handle) && R_ExternalPtrAddr(handle)) {
CHECK_CALL(LGBM_BoosterFree(R_ExternalPtrAddr(handle)));
R_ClearExternalPtr(handle);
}
Expand All @@ -336,6 +348,7 @@ SEXP LGBM_BoosterCreate_R(SEXP train_data,
BoosterHandle handle = nullptr;
CHECK_CALL(LGBM_BoosterCreate(R_ExternalPtrAddr(train_data), CHAR(Rf_asChar(parameters)), &handle));
ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue));
R_RegisterCFinalizerEx(ret, _BoosterFinalizer, TRUE);
UNPROTECT(1);
return ret;
R_API_END();
Expand All @@ -348,6 +361,7 @@ SEXP LGBM_BoosterCreateFromModelfile_R(SEXP filename) {
BoosterHandle handle = nullptr;
CHECK_CALL(LGBM_BoosterCreateFromModelfile(CHAR(Rf_asChar(filename)), &out_num_iterations, &handle));
ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue));
R_RegisterCFinalizerEx(ret, _BoosterFinalizer, TRUE);
UNPROTECT(1);
return ret;
R_API_END();
Expand All @@ -360,6 +374,7 @@ SEXP LGBM_BoosterLoadModelFromString_R(SEXP model_str) {
BoosterHandle handle = nullptr;
CHECK_CALL(LGBM_BoosterLoadModelFromString(CHAR(Rf_asChar(model_str)), &out_num_iterations, &handle));
ret = PROTECT(R_MakeExternalPtr(handle, R_NilValue, R_NilValue));
R_RegisterCFinalizerEx(ret, _BoosterFinalizer, TRUE);
UNPROTECT(1);
return ret;
R_API_END();
Expand Down
26 changes: 26 additions & 0 deletions R-package/tests/testthat/test_Predictor.R
Original file line number Diff line number Diff line change
@@ -1,5 +1,31 @@
context("Predictor")

test_that("Predictor$finalize() should not fail", {
X <- as.matrix(as.integer(iris[, "Species"]), ncol = 1L)
y <- iris[["Sepal.Length"]]
dtrain <- lgb.Dataset(X, label = y)
bst <- lgb.train(
data = dtrain
, objective = "regression"
, verbose = -1L
, nrounds = 3L
)
model_file <- tempfile(fileext = ".model")
bst$save_model(filename = model_file)
predictor <- Predictor$new(modelfile = model_file)

expect_true(lgb.is.Predictor(predictor))

expect_false(lgb.is.null.handle(predictor$.__enclos_env__$private$handle))

predictor$finalize()
expect_true(lgb.is.null.handle(predictor$.__enclos_env__$private$handle))

# calling finalize() a second time shouldn't cause any issues
predictor$finalize()
expect_true(lgb.is.null.handle(predictor$.__enclos_env__$private$handle))
})

test_that("predictions do not fail for integer input", {
X <- as.matrix(as.integer(iris[, "Species"]), ncol = 1L)
y <- iris[["Sepal.Length"]]
Expand Down
18 changes: 18 additions & 0 deletions R-package/tests/testthat/test_dataset.R
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,24 @@ test_that("Dataset$update_params() works correctly for recognized Dataset parame
}
})

test_that("Dataset$finalize() should not fail on an already-finalized Dataset", {
dtest <- lgb.Dataset(
data = test_data
, label = test_label
)
expect_true(lgb.is.null.handle(dtest$.__enclos_env__$private$handle))

dtest$construct()
expect_false(lgb.is.null.handle(dtest$.__enclos_env__$private$handle))

dtest$finalize()
expect_true(lgb.is.null.handle(dtest$.__enclos_env__$private$handle))

# calling finalize() a second time shouldn't cause any issues
dtest$finalize()
expect_true(lgb.is.null.handle(dtest$.__enclos_env__$private$handle))
})

test_that("lgb.Dataset: should be able to run lgb.train() immediately after using lgb.Dataset() on a file", {
dtest <- lgb.Dataset(
data = test_data
Expand Down
24 changes: 24 additions & 0 deletions R-package/tests/testthat/test_lgb.Booster.R
Original file line number Diff line number Diff line change
@@ -1,3 +1,27 @@
context("Booster")

test_that("Booster$finalize() should not fail", {
X <- as.matrix(as.integer(iris[, "Species"]), ncol = 1L)
y <- iris[["Sepal.Length"]]
dtrain <- lgb.Dataset(X, label = y)
bst <- lgb.train(
data = dtrain
, objective = "regression"
, verbose = -1L
, nrounds = 3L
)
expect_true(lgb.is.Booster(bst))

expect_false(lgb.is.null.handle(bst$.__enclos_env__$private$handle))

bst$finalize()
expect_true(lgb.is.null.handle(bst$.__enclos_env__$private$handle))

# calling finalize() a second time shouldn't cause any issues
bst$finalize()
expect_true(lgb.is.null.handle(bst$.__enclos_env__$private$handle))
})

context("lgb.get.eval.result")

test_that("lgb.get.eval.result() should throw an informative error if booster is not an lgb.Booster", {
Expand Down

0 comments on commit 53ffba7

Please sign in to comment.