[R-package] avoid unnecessary computation of std deviations in `lgb.c…

…v()` (#4360) * [R-package] avoid unnecessary computation of std deviations in lgb.cv() * use expect_equal()
microsoft · Jun 12, 2021 · f0bca1a · f0bca1a
1 parent 4af4698
commit f0bca1a
Show file tree

Hide file tree

Showing 3 changed files with 44 additions and 4 deletions.
diff --git a/R-package/R/lgb.cv.R b/R-package/R/lgb.cv.R
@@ -26,7 +26,9 @@ CVBooster <- R6::R6Class(
 #' @param label Vector of labels, used if \code{data} is not an \code{\link{lgb.Dataset}}
 #' @param weight vector of response values. If not NULL, will set to dataset
 #' @param record Boolean, TRUE will record iteration message to \code{booster$record_evals}
-#' @param showsd \code{boolean}, whether to show standard deviation of cross validation
+#' @param showsd \code{boolean}, whether to show standard deviation of cross validation.
+#'               This parameter defaults to \code{TRUE}. Setting it to \code{FALSE} can lead to a
+#'               slight speedup by avoiding unnecessary computation.
 #' @param stratified a \code{boolean} indicating whether sampling of folds should be stratified
 #'                   by the values of outcome labels.
 #' @param folds \code{list} provides a possibility to use a list of pre-defined CV folds
@@ -379,7 +381,10 @@ lgb.cv <- function(params = list()
     })
 
     # Prepare collection of evaluation results
-    merged_msg <- lgb.merge.cv.result(msg = msg)
+    merged_msg <- lgb.merge.cv.result(
+      msg = msg
+      , showsd = showsd
+    )
 
     # Write evaluation result in environment
     env$eval_list <- merged_msg$eval_list
@@ -576,7 +581,7 @@ lgb.stratified.folds <- function(y, k) {
   return(out)
 }
 
-lgb.merge.cv.result <- function(msg, showsd = TRUE) {
+lgb.merge.cv.result <- function(msg, showsd) {
 
   # Get CV message length
   if (length(msg) == 0L) {

diff --git a/R-package/man/lgb.cv.Rd b/R-package/man/lgb.cv.Rd
diff --git a/R-package/tests/testthat/test_basic.R b/R-package/tests/testthat/test_basic.R
@@ -389,6 +389,39 @@ test_that("lgb.cv() fit on linearly-relatead data improves when using linear lea
   expect_true(cv_bst_linear$best_score < cv_bst$best_score)
 })
 
+test_that("lgb.cv() respects showsd argument", {
+  dtrain <- lgb.Dataset(train$data, label = train$label)
+  params <- list(objective = "regression", metric = "l2")
+  nrounds <- 5L
+  set.seed(708L)
+  bst_showsd <- lgb.cv(
+    params = params
+    , data = dtrain
+    , nrounds = nrounds
+    , nfold = 3L
+    , min_data = 1L
+    , showsd = TRUE
+  )
+  evals_showsd <- bst_showsd$record_evals[["valid"]][["l2"]]
+  set.seed(708L)
+  bst_no_showsd <- lgb.cv(
+    params = params
+    , data = dtrain
+    , nrounds = nrounds
+    , nfold = 3L
+    , min_data = 1L
+    , showsd = FALSE
+  )
+  evals_no_showsd <- bst_no_showsd$record_evals[["valid"]][["l2"]]
+  expect_equal(
+    evals_showsd[["eval"]]
+    , evals_no_showsd[["eval"]]
+  )
+  expect_is(evals_showsd[["eval_err"]], "list")
+  expect_equal(length(evals_showsd[["eval_err"]]), nrounds)
+  expect_identical(evals_no_showsd[["eval_err"]], list())
+})
+
 context("lgb.train()")
 
 test_that("lgb.train() works as expected with multiple eval metrics", {