From 028d8822bcfbc2639b09b2f6581f70951921c9bf Mon Sep 17 00:00:00 2001 From: mattdowle Date: Sat, 5 Nov 2022 12:47:26 -0600 Subject: [PATCH 01/11] add memtest RSS to timings result and remove memtest.csv; simpler --- R/test.data.table.R | 49 ++++++++++++++++++++++++++++--------------- inst/tests/tests.Rraw | 3 ++- 2 files changed, 34 insertions(+), 18 deletions(-) diff --git a/R/test.data.table.R b/R/test.data.table.R index 298fc34c1..f158d98e4 100644 --- a/R/test.data.table.R +++ b/R/test.data.table.R @@ -113,7 +113,7 @@ test.data.table = function(script="tests.Rraw", verbose=FALSE, pkg=".", silent=F assign("started.at", proc.time(), envir=env) assign("lasttime", proc.time()[3L], envir=env) # used by test() to attribute time inbetween tests to the next test assign("timings", data.table( ID = seq_len(9999L), time=0.0, nTest=0L ), envir=env) # test timings aggregated to integer id - assign("memtest", as.logical(Sys.getenv("TEST_DATA_TABLE_MEMTEST", "FALSE")), envir=env) + assign("memtest", TRUE, envir=env) # as.logical(Sys.getenv("TEST_DATA_TABLE_MEMTEST", "FALSE")), envir=env) assign("filename", fn, envir=env) assign("inittime", as.integer(Sys.time()), envir=env) # keep measures from various test.data.table runs assign("showProgress", showProgress, envir=env) @@ -174,13 +174,21 @@ test.data.table = function(script="tests.Rraw", verbose=FALSE, pkg=".", silent=F } # There aren't any errors, so we can use up 11 lines for the timings table - timings = env$timings - DT = head(timings[-1L][order(-time)], 10L) # exclude id 1 as in dev that includes JIT - if ((x<-sum(timings[["nTest"]])) != ntest) { - warningf("Timings count mismatch: %d vs %d", x, ntest) # nocov + timings = env$timings[nTest>0] + if (!env$memtest) { + ans = head(timings[-1L][order(-time)], 10L) # exclude id 1 as in dev that includes JIT + if ((x<-sum(timings[["nTest"]])) != ntest) { + warningf("Timings count mismatch: %d vs %d", x, ntest) # nocov + } + catf("10 longest running tests took %ds (%d%% of %ds)\n", as.integer(tt<-DT[, sum(time)]), as.integer(100*tt/(ss<-timings[,sum(time)])), as.integer(ss)) + print(ans, class=FALSE) + } else { + y = head(order(-diff(timings$RSS)), 10L) + ans = timings[, diff:=c(NA,round(diff(RSS),1))][y+1L][,time:=NULL] # time is distracting and influenced by gc() calls; just focus on RAM usage here + catf("10 largest RAM increases (MB); see plot for cummulative effect (if any)\n") + print(ans, class=FALSE) + plot(timings$RSS) } - catf("10 longest running tests took %ds (%d%% of %ds)\n", as.integer(tt<-DT[, sum(time)]), as.integer(100*tt/(ss<-timings[,sum(time)])), as.integer(ss)) - print(DT, class=FALSE) catf("All %d tests (last %.8g) in %s completed ok in %s\n", ntest, env$prevtest, names(fn), timetaken(env$started.at)) @@ -209,7 +217,9 @@ test.data.table = function(script="tests.Rraw", verbose=FALSE, pkg=".", silent=F #} #if (memtest<-get("memtest", envir=env)) memtest.plot(get("inittime", envir=env)) - invisible(nfail==0L) + ans = nfail==0L + attr(ans, "timings") = timings # as attr to not upset callers who expect a TRUE/FALSE result + invisible(ans) } # nocov start @@ -278,16 +288,20 @@ test = function(num,x,y=TRUE,error=NULL,warning=NULL,message=NULL,output=NULL,no lasttime = get("lasttime", parent.frame()) timings = get("timings", parent.frame()) memtest = get("memtest", parent.frame()) - inittime = get("inittime", parent.frame()) + # inittime = get("inittime", parent.frame()) filename = get("filename", parent.frame()) foreign = get("foreign", parent.frame()) showProgress = get("showProgress", parent.frame()) time = nTest = NULL # to avoid 'no visible binding' note if (num>0) on.exit( { - now = proc.time()[3L] - took = now-lasttime # so that prep time between tests is attributed to the following test - assign("lasttime", now, parent.frame(), inherits=TRUE) - timings[ as.integer(num), `:=`(time=time+took, nTest=nTest+1L), verbose=FALSE ] + took = proc.time()[3L]-lasttime # so that prep time between tests is attributed to the following test + # mem = as.list(c(inittime=inittime, filename=basename(filename), timestamp=timestamp, test=num, ps_mem(), gc_mem())) # nocov + timings[as.integer(num), `:=`(time=time+took, nTest=nTest+1L), verbose=FALSE] + if (memtest) { + gc() # force gc so we can find tests that use relatively larger amounts of RAM and don't rm() them + timings[as.integer(num), RSS:=max(ps_mem(),RSS,na.rm=TRUE), verbose=FALSE] + } + assign("lasttime", proc.time()[3L], parent.frame(), inherits=TRUE) # proc.time() after gc() to exclude gc() time when memtest } ) if (showProgress) # \r can't be in gettextf msg @@ -340,10 +354,11 @@ test = function(num,x,y=TRUE,error=NULL,warning=NULL,message=NULL,output=NULL,no } else { out = capture.output(print(x <- suppressMessages(withCallingHandlers(tryCatch(x, error=eHandler), warning=wHandler, message=mHandler)))) } - if (memtest) { - mem = as.list(c(inittime=inittime, filename=basename(filename), timestamp=timestamp, test=num, ps_mem(), gc_mem())) # nocov - fwrite(mem, "memtest.csv", append=TRUE, verbose=FALSE) # nocov - } + #if (memtest) { + # mem = as.list(c(inittime=inittime, filename=basename(filename), timestamp=timestamp, test=num, ps_mem(), gc_mem())) # nocov + # + # fwrite(mem, "memtest.csv", append=TRUE, verbose=FALSE) # nocov + #} fail = FALSE if (.test.data.table && num>0) { if (num Date: Mon, 7 Nov 2022 15:19:47 -0700 Subject: [PATCH 02/11] memtest now an argument to test.data.table; simpler than having to use the env variable --- R/test.data.table.R | 34 ++++++++++++---------------------- 1 file changed, 12 insertions(+), 22 deletions(-) diff --git a/R/test.data.table.R b/R/test.data.table.R index f158d98e4..fa5ae8b26 100644 --- a/R/test.data.table.R +++ b/R/test.data.table.R @@ -1,4 +1,5 @@ -test.data.table = function(script="tests.Rraw", verbose=FALSE, pkg=".", silent=FALSE, showProgress=interactive()&&!silent) { +test.data.table = function(script="tests.Rraw", verbose=FALSE, pkg=".", silent=FALSE, showProgress=interactive()&&!silent, + memtest=as.logical(Sys.getenv("TEST_DATA_TABLE_MEMTEST", "FALSE"))) { stopifnot(isTRUEorFALSE(verbose), isTRUEorFALSE(silent), isTRUEorFALSE(showProgress)) if (exists("test.data.table", .GlobalEnv, inherits=FALSE)) { # package developer @@ -112,10 +113,9 @@ test.data.table = function(script="tests.Rraw", verbose=FALSE, pkg=".", silent=F assign("whichfail", NULL, envir=env) assign("started.at", proc.time(), envir=env) assign("lasttime", proc.time()[3L], envir=env) # used by test() to attribute time inbetween tests to the next test - assign("timings", data.table( ID = seq_len(9999L), time=0.0, nTest=0L ), envir=env) # test timings aggregated to integer id - assign("memtest", TRUE, envir=env) # as.logical(Sys.getenv("TEST_DATA_TABLE_MEMTEST", "FALSE")), envir=env) + assign("timings", data.table( ID = seq_len(9999L), time=0.0, nTest=0L, RSS=0.0 ), envir=env) # test timings aggregated to integer id + assign("memtest", memtest, envir=env) assign("filename", fn, envir=env) - assign("inittime", as.integer(Sys.time()), envir=env) # keep measures from various test.data.table runs assign("showProgress", showProgress, envir=env) owd = setwd(tempdir()) # ensure writeable directory; e.g. tests that plot may write .pdf here depending on device option and/or batch mode; #5190 @@ -175,19 +175,19 @@ test.data.table = function(script="tests.Rraw", verbose=FALSE, pkg=".", silent=F # There aren't any errors, so we can use up 11 lines for the timings table timings = env$timings[nTest>0] - if (!env$memtest) { - ans = head(timings[-1L][order(-time)], 10L) # exclude id 1 as in dev that includes JIT + if (!memtest) { + ans = head(timings[-1L][order(-time)], 10L)[,RSS:=NULL] # exclude id 1 as in dev that includes JIT if ((x<-sum(timings[["nTest"]])) != ntest) { warningf("Timings count mismatch: %d vs %d", x, ntest) # nocov } - catf("10 longest running tests took %ds (%d%% of %ds)\n", as.integer(tt<-DT[, sum(time)]), as.integer(100*tt/(ss<-timings[,sum(time)])), as.integer(ss)) + catf("10 longest running tests took %ds (%d%% of %ds)\n", as.integer(tt<-ans[, sum(time)]), as.integer(100*tt/(ss<-timings[,sum(time)])), as.integer(ss)) print(ans, class=FALSE) } else { y = head(order(-diff(timings$RSS)), 10L) ans = timings[, diff:=c(NA,round(diff(RSS),1))][y+1L][,time:=NULL] # time is distracting and influenced by gc() calls; just focus on RAM usage here - catf("10 largest RAM increases (MB); see plot for cummulative effect (if any)\n") + catf("10 largest RAM increases (MB); see plot for cumulative effect (if any)\n") print(ans, class=FALSE) - plot(timings$RSS) + plot(timings$RSS, main=fn, ylab="RSS (MB)") } catf("All %d tests (last %.8g) in %s completed ok in %s\n", ntest, env$prevtest, names(fn), timetaken(env$started.at)) @@ -288,20 +288,18 @@ test = function(num,x,y=TRUE,error=NULL,warning=NULL,message=NULL,output=NULL,no lasttime = get("lasttime", parent.frame()) timings = get("timings", parent.frame()) memtest = get("memtest", parent.frame()) - # inittime = get("inittime", parent.frame()) filename = get("filename", parent.frame()) foreign = get("foreign", parent.frame()) showProgress = get("showProgress", parent.frame()) time = nTest = NULL # to avoid 'no visible binding' note if (num>0) on.exit( { took = proc.time()[3L]-lasttime # so that prep time between tests is attributed to the following test - # mem = as.list(c(inittime=inittime, filename=basename(filename), timestamp=timestamp, test=num, ps_mem(), gc_mem())) # nocov timings[as.integer(num), `:=`(time=time+took, nTest=nTest+1L), verbose=FALSE] if (memtest) { - gc() # force gc so we can find tests that use relatively larger amounts of RAM and don't rm() them - timings[as.integer(num), RSS:=max(ps_mem(),RSS,na.rm=TRUE), verbose=FALSE] + gc() # force gc so we can find tests that use relatively larger amounts of RAM + timings[as.integer(num), RSS:=max(ps_mem(),RSS), verbose=FALSE] } - assign("lasttime", proc.time()[3L], parent.frame(), inherits=TRUE) # proc.time() after gc() to exclude gc() time when memtest + assign("lasttime", proc.time()[3L], parent.frame(), inherits=TRUE) # after gc() to exclude gc() time from next test when memtest } ) if (showProgress) # \r can't be in gettextf msg @@ -344,9 +342,6 @@ test = function(num,x,y=TRUE,error=NULL,warning=NULL,message=NULL,output=NULL,no actual$message <<- c(actual$message, conditionMessage(m)) m } - if (memtest) { - timestamp = as.numeric(Sys.time()) # nocov - } if (is.null(output) && is.null(notOutput)) { x = suppressMessages(withCallingHandlers(tryCatch(x, error=eHandler), warning=wHandler, message=mHandler)) # save the overhead of capture.output() since there are a lot of tests, often called in loops @@ -354,11 +349,6 @@ test = function(num,x,y=TRUE,error=NULL,warning=NULL,message=NULL,output=NULL,no } else { out = capture.output(print(x <- suppressMessages(withCallingHandlers(tryCatch(x, error=eHandler), warning=wHandler, message=mHandler)))) } - #if (memtest) { - # mem = as.list(c(inittime=inittime, filename=basename(filename), timestamp=timestamp, test=num, ps_mem(), gc_mem())) # nocov - # - # fwrite(mem, "memtest.csv", append=TRUE, verbose=FALSE) # nocov - #} fail = FALSE if (.test.data.table && num>0) { if (num Date: Mon, 7 Nov 2022 16:40:35 -0700 Subject: [PATCH 03/11] add local=TRUE to source() in test 2036 to stop it overwriting DT in .GlobalEnv, closes #5514; remove DTfun fix no longer needed. --- inst/tests/tests.Rraw | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 5137745d1..633a562c0 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -7,7 +7,6 @@ if (exists("test.data.table", .GlobalEnv, inherits=FALSE)) { } if ((tt<-compiler::enableJIT(-1))>0) cat("This is dev mode and JIT is enabled (level ", tt, ") so there will be a brief pause around the first test.\n", sep="") - DTfun = DT # just in dev-mode, DT() gets overwritten in .GlobalEnv by DT objects here in tests.Rraw; we restore DT() in test 2212 } else { require(data.table) # Make symbols to the installed version's ::: so that we can i) test internal-only not-exposed R functions @@ -15326,10 +15325,10 @@ test(2035.3, fread('A,B\n"foo","ba"r"', quote=""), ans) # source() printing edge case; #2369 setup = c('DT = data.table(a = 1)') writeLines(c(setup, 'DT[ , a := 1]'), tmp<-tempfile()) -test(2036.1, !any(grepl("1: 1", capture.output(source(tmp, echo = TRUE)), fixed = TRUE))) +test(2036.1, !any(grepl("1: 1", capture.output(source(tmp, echo=TRUE, local=TRUE)), fixed=TRUE))) # local= #5514 ## test force-printing still works writeLines(c(setup, 'DT[ , a := 1][]'), tmp) -test(2036.2, source(tmp, echo = TRUE), output = "1:\\s+1") +test(2036.2, source(tmp, echo=TRUE, local=TRUE), output="1:\\s+1") # more helpful guidance when assigning before setDT() after readRDS(); #1729 DT = data.table(a = 1:3) @@ -18318,7 +18317,6 @@ for (col in c("a","b","c")) { # DT() functional form, #4872 #5106 #5107 #5129 if (base::getRversion() >= "4.1.0") { # we have to EVAL "|>" here too otherwise this tests.Rraw file won't parse in R<4.1.0 - if (exists("DTfun")) DT=DTfun # just in dev-mode restore DT() in .GlobalEnv as DT object overwrote it in tests above droprn = function(df) { rownames(df)=NULL; df } # TODO: could retain rownames where droprn is currently used below test(2212.011, EVAL("mtcars |> DT(mpg>20, .(mean_hp=round(mean(hp),2)), by=cyl)"), data.frame(cyl=c(6,4), mean_hp=c(110.0, 82.64))) From a24cfd385f548d3959c559bbefd70b0832465499 Mon Sep 17 00:00:00 2001 From: mattdowle Date: Mon, 7 Nov 2022 16:49:41 -0700 Subject: [PATCH 04/11] added reminder that memtest should be the first task in a fresh R session --- R/test.data.table.R | 2 ++ 1 file changed, 2 insertions(+) diff --git a/R/test.data.table.R b/R/test.data.table.R index fa5ae8b26..7c72515fe 100644 --- a/R/test.data.table.R +++ b/R/test.data.table.R @@ -120,6 +120,8 @@ test.data.table = function(script="tests.Rraw", verbose=FALSE, pkg=".", silent=F owd = setwd(tempdir()) # ensure writeable directory; e.g. tests that plot may write .pdf here depending on device option and/or batch mode; #5190 on.exit(setwd(owd)) + + if (memtest) catf("\n***\n*** memtest=TRUE. This should be the first task in a fresh R session for best results. Ctrl-C now if not.\n***\n\n") err = try(sys.source(fn, envir=env), silent=silent) From 25ef45b2a07e92b8dc883529e7c938e71ed756c5 Mon Sep 17 00:00:00 2001 From: mattdowle Date: Mon, 7 Nov 2022 16:53:10 -0700 Subject: [PATCH 05/11] remove commented ok memtest plot code; just use base R plot instead, simpler --- R/test.data.table.R | 28 +--------------------------- 1 file changed, 1 insertion(+), 27 deletions(-) diff --git a/R/test.data.table.R b/R/test.data.table.R index 7c72515fe..404623960 100644 --- a/R/test.data.table.R +++ b/R/test.data.table.R @@ -189,36 +189,10 @@ test.data.table = function(script="tests.Rraw", verbose=FALSE, pkg=".", silent=F ans = timings[, diff:=c(NA,round(diff(RSS),1))][y+1L][,time:=NULL] # time is distracting and influenced by gc() calls; just focus on RAM usage here catf("10 largest RAM increases (MB); see plot for cumulative effect (if any)\n") print(ans, class=FALSE) - plot(timings$RSS, main=fn, ylab="RSS (MB)") + plot(timings$RSS, main=basename(fn), ylab="RSS (MB)") } catf("All %d tests (last %.8g) in %s completed ok in %s\n", ntest, env$prevtest, names(fn), timetaken(env$started.at)) - - ## this chunk requires to include new suggested deps: graphics, grDevices - #memtest.plot = function(.inittime) { - # if (!all(requireNamespace(c("graphics","grDevices"), quietly=TRUE))) return(invisible()) - # inittime=PS_rss=GC_used=GC_max_used=NULL - # m = fread("memtest.csv")[inittime==.inittime] - # if (nrow(m)) { - # ps_na = allNA(m[["PS_rss"]]) # OS with no 'ps -o rss R' support - # grDevices::png("memtest.png") - # p = graphics::par(mfrow=c(if (ps_na) 2 else 3, 2)) - # if (!ps_na) { - # m[, graphics::plot(test, PS_rss, pch=18, xlab="test num", ylab="mem MB", main="ps -o rss R")] - # m[, graphics::plot(timestamp, PS_rss, type="l", xlab="timestamp", ylab="mem MB", main="ps -o rss R")] - # } - # m[, graphics::plot(test, GC_used, pch=18, xlab="test num", ylab="mem MB", main="gc used")] - # m[, graphics::plot(timestamp, GC_used, type="l", xlab="timestamp", ylab="mem MB", main="gc used")] - # m[, graphics::plot(test, GC_max_used, pch=18, xlab="test num", ylab="mem MB", main="gc max used")] - # m[, graphics::plot(timestamp, GC_max_used, type="l", xlab="timestamp", ylab="mem MB", main="gc max used")] - # graphics::par(p) - # grDevices::dev.off() - # } else { - # warningf("test.data.table runs with memory testing but did not collect any memory statistics.") - # } - #} - #if (memtest<-get("memtest", envir=env)) memtest.plot(get("inittime", envir=env)) - ans = nfail==0L attr(ans, "timings") = timings # as attr to not upset callers who expect a TRUE/FALSE result invisible(ans) From 6de7f1943a1325e21278d284f5b57a5401879b46 Mon Sep 17 00:00:00 2001 From: mattdowle Date: Mon, 7 Nov 2022 17:17:02 -0700 Subject: [PATCH 06/11] test ps_mem() up front; use --no-headers instead of tail -1 --- R/test.data.table.R | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/R/test.data.table.R b/R/test.data.table.R index 404623960..ac6bcf9e4 100644 --- a/R/test.data.table.R +++ b/R/test.data.table.R @@ -121,7 +121,10 @@ test.data.table = function(script="tests.Rraw", verbose=FALSE, pkg=".", silent=F owd = setwd(tempdir()) # ensure writeable directory; e.g. tests that plot may write .pdf here depending on device option and/or batch mode; #5190 on.exit(setwd(owd)) - if (memtest) catf("\n***\n*** memtest=TRUE. This should be the first task in a fresh R session for best results. Ctrl-C now if not.\n***\n\n") + if (memtest) { + catf("\n***\n*** memtest=TRUE. This should be the first task in a fresh R session for best results. Ctrl-C now if not.\n***\n\n") + if (is.na(ps_mem())) stopf("memtest intended for Linux. Step through ps_mem() to see what went wrong.") + } err = try(sys.source(fn, envir=env), silent=silent) @@ -221,17 +224,16 @@ INT = function(...) { as.integer(c(...)) } # utility used in tests.Rraw ps_mem = function() { # nocov start - cmd = sprintf("ps -o rss %s | tail -1", Sys.getpid()) - ans = tryCatch(as.numeric(system(cmd, intern=TRUE, ignore.stderr=TRUE)), warning=function(w) NA_real_, error=function(e) NA_real_) - stopifnot(length(ans)==1L) # extra check if other OSes would not handle 'tail -1' properly for some reason - # returns RSS memory occupied by current R process in MB rounded to 1 decimal places (as in gc), ps already returns KB - c("PS_rss"=round(ans / 1024, 1L)) + cmd = paste0("ps -o rss --no-headers ", Sys.getpid()) # ps returns KB + ans = tryCatch(as.numeric(system(cmd, intern=TRUE)), warning=function(w) NA_real_, error=function(e) NA_real_) + if (length(ans)!=1L || !is.numeric(ans)) ans=NA_real_ # just in case + round(ans / 1024, 1L) # return MB # nocov end } gc_mem = function() { # nocov start - # gc reported memory in MB + # gc reports memory in MB m = apply(gc()[, c(2L, 4L, 6L)], 2L, sum) names(m) = c("GC_used", "GC_gc_trigger", "GC_max_used") m From 3c538b3c3a1e99b1c8c911a0d086d39f83cdb459 Mon Sep 17 00:00:00 2001 From: mattdowle Date: Mon, 7 Nov 2022 19:31:26 -0700 Subject: [PATCH 07/11] add argument to .Rd too --- man/test.data.table.Rd | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/man/test.data.table.Rd b/man/test.data.table.Rd index ba0fe25f9..90f58593a 100644 --- a/man/test.data.table.Rd +++ b/man/test.data.table.Rd @@ -7,7 +7,8 @@ \usage{ test.data.table(script = "tests.Rraw", verbose = FALSE, pkg = ".", silent = FALSE, - showProgress = interactive() && !silent) + showProgress = interactive() && !silent, + memtest = as.logical(Sys.getenv("TEST_DATA_TABLE_MEMTEST", "FALSE"))) } \arguments{ \item{script}{ Run arbitrary R test script. } @@ -15,6 +16,7 @@ test.data.table(script = "tests.Rraw", verbose = FALSE, pkg = ".", \item{pkg}{ Root directory name under which all package content (ex: DESCRIPTION, src/, R/, inst/ etc..) resides. Used only in \emph{dev-mode}. } \item{silent}{ Controls what happens if a test fails. Like \code{silent} in \code{\link{try}}, \code{TRUE} causes the error message to be suppressed and \code{FALSE} to be returned, otherwise the error is returned. } \item{showProgress}{ Output 'Running test ...\\r' at the start of each test? } +\item{memtest}{ Measure and report memory usage of tests rather than time taken. Intended for and tested on Linux. } } \details{ Runs a series of tests. These can be used to see features and examples of usage, too. Running test.data.table will tell you the full location of the test file(s) to open. From 22997a86eca7f86b6263d670c24ed7c8d591b10c Mon Sep 17 00:00:00 2001 From: mattdowle Date: Mon, 7 Nov 2022 19:44:35 -0700 Subject: [PATCH 08/11] avoid 'no visible binding' note --- R/test.data.table.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R/test.data.table.R b/R/test.data.table.R index ac6bcf9e4..cc8747857 100644 --- a/R/test.data.table.R +++ b/R/test.data.table.R @@ -179,6 +179,7 @@ test.data.table = function(script="tests.Rraw", verbose=FALSE, pkg=".", silent=F } # There aren't any errors, so we can use up 11 lines for the timings table + nTest = RSS = NULL # to avoid 'no visible binding' note timings = env$timings[nTest>0] if (!memtest) { ans = head(timings[-1L][order(-time)], 10L)[,RSS:=NULL] # exclude id 1 as in dev that includes JIT @@ -269,7 +270,7 @@ test = function(num,x,y=TRUE,error=NULL,warning=NULL,message=NULL,output=NULL,no filename = get("filename", parent.frame()) foreign = get("foreign", parent.frame()) showProgress = get("showProgress", parent.frame()) - time = nTest = NULL # to avoid 'no visible binding' note + time = nTest = RSS = NULL # to avoid 'no visible binding' note if (num>0) on.exit( { took = proc.time()[3L]-lasttime # so that prep time between tests is attributed to the following test timings[as.integer(num), `:=`(time=time+took, nTest=nTest+1L), verbose=FALSE] From c211009e37af0b3615a90b401af17fff9d0a62a8 Mon Sep 17 00:00:00 2001 From: mattdowle Date: Tue, 8 Nov 2022 10:46:34 -0700 Subject: [PATCH 09/11] exclude test 1 in dev only; thanks Jan --- R/test.data.table.R | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/R/test.data.table.R b/R/test.data.table.R index cc8747857..0d19a7cf1 100644 --- a/R/test.data.table.R +++ b/R/test.data.table.R @@ -4,6 +4,7 @@ test.data.table = function(script="tests.Rraw", verbose=FALSE, pkg=".", silent=F if (exists("test.data.table", .GlobalEnv, inherits=FALSE)) { # package developer # nocov start + dev = TRUE if ("package:data.table" %chin% search()) stopf("data.table package is loaded. Unload or start a fresh R session.") rootdir = if (pkg!="." && pkg %chin% dir()) file.path(getwd(), pkg) else Sys.getenv("PROJ_PATH") subdir = file.path("inst","tests") @@ -11,6 +12,7 @@ test.data.table = function(script="tests.Rraw", verbose=FALSE, pkg=".", silent=F # nocov end } else { # i) R CMD check and ii) user running test.data.table() + dev = FALSE rootdir = getNamespaceInfo("data.table","path") subdir = "tests" env = new.env(parent=parent.env(.GlobalEnv)) # when user runs test.data.table() we don't want their variables in .GlobalEnv affecting tests, #3705 @@ -182,7 +184,7 @@ test.data.table = function(script="tests.Rraw", verbose=FALSE, pkg=".", silent=F nTest = RSS = NULL # to avoid 'no visible binding' note timings = env$timings[nTest>0] if (!memtest) { - ans = head(timings[-1L][order(-time)], 10L)[,RSS:=NULL] # exclude id 1 as in dev that includes JIT + ans = head(timings[if (dev) -1L else TRUE][order(-time)], 10L)[,RSS:=NULL] # exclude id 1 in dev as that includes JIT if ((x<-sum(timings[["nTest"]])) != ntest) { warningf("Timings count mismatch: %d vs %d", x, ntest) # nocov } From 997c4ab55819943aa7c514c62bc4f98b2ec4a1fe Mon Sep 17 00:00:00 2001 From: mattdowle Date: Tue, 8 Nov 2022 15:16:37 -0700 Subject: [PATCH 10/11] memtest can be 1 (gc before ps) or 2 (gc after ps); thanks Jan --- R/test.data.table.R | 11 +++++++---- man/test.data.table.Rd | 4 ++-- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/R/test.data.table.R b/R/test.data.table.R index 0d19a7cf1..1c212f58b 100644 --- a/R/test.data.table.R +++ b/R/test.data.table.R @@ -1,6 +1,8 @@ test.data.table = function(script="tests.Rraw", verbose=FALSE, pkg=".", silent=FALSE, showProgress=interactive()&&!silent, - memtest=as.logical(Sys.getenv("TEST_DATA_TABLE_MEMTEST", "FALSE"))) { + memtest=Sys.getenv("TEST_DATA_TABLE_MEMTEST", 0)) { stopifnot(isTRUEorFALSE(verbose), isTRUEorFALSE(silent), isTRUEorFALSE(showProgress)) + memtest = as.integer(memtest) + stopifnot(length(memtest)==1L, memtest %in% 0:2) if (exists("test.data.table", .GlobalEnv, inherits=FALSE)) { # package developer # nocov start @@ -124,7 +126,7 @@ test.data.table = function(script="tests.Rraw", verbose=FALSE, pkg=".", silent=F on.exit(setwd(owd)) if (memtest) { - catf("\n***\n*** memtest=TRUE. This should be the first task in a fresh R session for best results. Ctrl-C now if not.\n***\n\n") + catf("\n***\n*** memtest=%d. This should be the first task in a fresh R session for best results. Ctrl-C now if not.\n***\n\n", memtest) if (is.na(ps_mem())) stopf("memtest intended for Linux. Step through ps_mem() to see what went wrong.") } @@ -277,8 +279,9 @@ test = function(num,x,y=TRUE,error=NULL,warning=NULL,message=NULL,output=NULL,no took = proc.time()[3L]-lasttime # so that prep time between tests is attributed to the following test timings[as.integer(num), `:=`(time=time+took, nTest=nTest+1L), verbose=FALSE] if (memtest) { - gc() # force gc so we can find tests that use relatively larger amounts of RAM + if (memtest==1L) gc() # see #5515 for before/after timings[as.integer(num), RSS:=max(ps_mem(),RSS), verbose=FALSE] + if (memtest==2L) gc() } assign("lasttime", proc.time()[3L], parent.frame(), inherits=TRUE) # after gc() to exclude gc() time from next test when memtest } ) @@ -293,7 +296,7 @@ test = function(num,x,y=TRUE,error=NULL,warning=NULL,message=NULL,output=NULL,no # not be flushed to the output upon segfault, depending on OS). } else { # not `test.data.table` but developer running tests manually; i.e. `cc(F); test(...)` - memtest = FALSE # nocov + memtest = 0L # nocov filename = NA_character_ # nocov foreign = FALSE # nocov ; assumes users of 'cc(F); test(...)' has LANGUAGE=en showProgress = FALSE # nocov diff --git a/man/test.data.table.Rd b/man/test.data.table.Rd index 90f58593a..2df2a3284 100644 --- a/man/test.data.table.Rd +++ b/man/test.data.table.Rd @@ -8,7 +8,7 @@ test.data.table(script = "tests.Rraw", verbose = FALSE, pkg = ".", silent = FALSE, showProgress = interactive() && !silent, - memtest = as.logical(Sys.getenv("TEST_DATA_TABLE_MEMTEST", "FALSE"))) + memtest = Sys.getenv("TEST_DATA_TABLE_MEMTEST", 0)) } \arguments{ \item{script}{ Run arbitrary R test script. } @@ -16,7 +16,7 @@ test.data.table(script = "tests.Rraw", verbose = FALSE, pkg = ".", \item{pkg}{ Root directory name under which all package content (ex: DESCRIPTION, src/, R/, inst/ etc..) resides. Used only in \emph{dev-mode}. } \item{silent}{ Controls what happens if a test fails. Like \code{silent} in \code{\link{try}}, \code{TRUE} causes the error message to be suppressed and \code{FALSE} to be returned, otherwise the error is returned. } \item{showProgress}{ Output 'Running test ...\\r' at the start of each test? } -\item{memtest}{ Measure and report memory usage of tests rather than time taken. Intended for and tested on Linux. } +\item{memtest}{ Measure and report memory usage of tests (1:gc before ps, 2:gc after ps) rather than time taken (0) by default. Intended for and tested on Linux. See PR #5515 for more details. } } \details{ Runs a series of tests. These can be used to see features and examples of usage, too. Running test.data.table will tell you the full location of the test file(s) to open. From ac2b4ea1a11f55f9275a78ffefa6151eacd52302 Mon Sep 17 00:00:00 2001 From: mattdowle Date: Tue, 8 Nov 2022 15:40:53 -0700 Subject: [PATCH 11/11] added news item --- NEWS.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/NEWS.md b/NEWS.md index 57214ade8..15bf7e8ea 100644 --- a/NEWS.md +++ b/NEWS.md @@ -599,6 +599,8 @@ 15. Thanks to @ssh352, Václav Tlapák, Cole Miller, András Svraka and Toby Dylan Hocking for reporting and bisecting a significant performance regression in dev. This was fixed before release thanks to a PR by Jan Gorecki, [#5463](https://github.com/Rdatatable/data.table/pull/5463). +16. `test.data.table()` no longer creates `DT` in `.GlobalEnv` and gains `memtest=` for use on Linux to report which tests use the most memory. + # data.table [v1.14.4](https://github.com/Rdatatable/data.table/milestone/26?closed=1) (17 Oct 2022)