Move ram tests (#5520)

Rdatatable · Nov 15, 2022 · 2d2892c · 2d2892c
1 parent 5affced
commit 2d2892c
Show file tree

Hide file tree

Showing 5 changed files with 514 additions and 427 deletions.
diff --git a/R/test.data.table.R b/R/test.data.table.R
@@ -126,8 +126,8 @@ test.data.table = function(script="tests.Rraw", verbose=FALSE, pkg=".", silent=F
   on.exit(setwd(owd))
 
   if (memtest) {
-    catf("\n***\n*** memtest=%d. This should be the first task in a fresh R session for best results. Ctrl-C now if not.\n***\n\n", memtest)
-    if (is.na(ps_mem())) stopf("memtest intended for Linux. Step through ps_mem() to see what went wrong.")
+    catf("\n***\n*** memtest=%d. This should be the first call in a fresh R_GC_MEM_GROW=0 R session for best results. Ctrl-C now if not.\n***\n\n", memtest)
+    if (is.na(rss())) stopf("memtest intended for Linux. Step through data.table:::rss() to see what went wrong.")
   }
 
   err = try(sys.source(fn, envir=env), silent=silent)
@@ -197,7 +197,12 @@ test.data.table = function(script="tests.Rraw", verbose=FALSE, pkg=".", silent=F
     ans = timings[, diff:=c(NA,round(diff(RSS),1))][y+1L][,time:=NULL]  # time is distracting and influenced by gc() calls; just focus on RAM usage here
     catf("10 largest RAM increases (MB); see plot for cumulative effect (if any)\n")
     print(ans, class=FALSE)
-    plot(timings$RSS, main=basename(fn), ylab="RSS (MB)")
+    dev.new(width=14, height=7)
+    par(mfrow=c(1,2))
+    plot(timings$RSS, main=paste(basename(fn),"\nylim[0]=0 for context"), ylab="RSS (MB)", ylim=c(0,max(timings$RSS)))
+    mtext(lastRSS<-as.integer(ceiling(last(timings$RSS))), side=4, at=lastRSS, las=1, font=2)
+    plot(timings$RSS, main=paste(basename(fn),"\nylim=range for inspection"), ylab="RSS (MB)")
+    mtext(lastRSS, side=4, at=lastRSS, las=1, font=2)
   }
 
   catf("All %d tests (last %.8g) in %s completed ok in %s\n", ntest, env$prevtest, names(fn), timetaken(env$started.at))
@@ -227,15 +232,6 @@ compactprint = function(DT, topn=2L) {
 
 INT = function(...) { as.integer(c(...)) }   # utility used in tests.Rraw
 
-ps_mem = function() {
-  # nocov start
-  cmd = paste0("ps -o rss --no-headers ", Sys.getpid()) # ps returns KB
-  ans = tryCatch(as.numeric(system(cmd, intern=TRUE)), warning=function(w) NA_real_, error=function(e) NA_real_)
-  if (length(ans)!=1L || !is.numeric(ans)) ans=NA_real_ # just in case
-  round(ans / 1024, 1L)  # return MB
-  # nocov end
-}
-
 gc_mem = function() {
   # nocov start
   # gc reports memory in MB
@@ -280,7 +276,7 @@ test = function(num,x,y=TRUE,error=NULL,warning=NULL,message=NULL,output=NULL,no
        timings[as.integer(num), `:=`(time=time+took, nTest=nTest+1L), verbose=FALSE]
        if (memtest) {
          if (memtest==1L) gc()  # see #5515 for before/after
-         timings[as.integer(num), RSS:=max(ps_mem(),RSS), verbose=FALSE]
+         timings[as.integer(num), RSS:=max(rss(),RSS), verbose=FALSE]
          if (memtest==2L) gc()
        }
        assign("lasttime", proc.time()[3L], parent.frame(), inherits=TRUE)  # after gc() to exclude gc() time from next test when memtest

diff --git a/R/utils.R b/R/utils.R
@@ -156,3 +156,13 @@ edit.data.table = function(name, ...) {
   setDT(NextMethod('edit', name))[]
 }
 # nocov end
+
+rss = function() {  #5515 #5517
+  # nocov start
+  cmd = paste0("ps -o rss --no-headers ", Sys.getpid()) # ps returns KB
+  ans = tryCatch(as.numeric(system(cmd, intern=TRUE)), warning=function(w) NA_real_, error=function(e) NA_real_)
+  if (length(ans)!=1L || !is.numeric(ans)) ans=NA_real_ # just in case
+  round(ans / 1024, 1L)  # return MB
+  # nocov end
+}
+
diff --git a/inst/tests/benchmark.Rraw b/inst/tests/benchmark.Rraw
@@ -175,6 +175,322 @@ t2 = system.time(DT2 <- do.call(data.table, list(DF)))  # 3.07s before, 0.02s af
 test(, identical(DT1, DT2))
 test(, t2["elapsed"]/t1["elapsed"]<2)
 
+###########################################################
+# largest tests by ram usage moved out of tests.Rraw, #5517
+###########################################################
+
+# Test ad hoc by of more than 100,000 levels, see 2nd part of bug #1387 (100,000 from the limit of base::sort.list radix)
+# This does need to be this large, like this in CRAN checks, because sort.list(method="radix") has this limit, which
+# this tests. But it's well under 10 seconds.
+DT = data.table(A=1:10,B=rnorm(10),C=factor(paste("a",1:100010,sep="")))
+test(301, nrow(DT[,sum(B),by=C])==100010)
+DT = data.table(A=1:10,B=rnorm(10),C=paste("a",1:100010,sep=""))
+test(301.1, nrow(DT[,sum(B),by=C])==100010)
+
+# Test := by key, and that := to the key by key unsets the key. Make it non-trivial in size too.
+options(datatable.optimize=0L)
+set.seed(1)
+DT = data.table(a=sample(1:100,1e6,replace=TRUE),b=sample(1:1000,1e6,replace=TRUE),key="a")
+test(637.1, DT[,m:=sum(b),by=a][1:3], data.table(a=1L,b=c(156L,808L,848L),m=DT[J(1),sum(b)],key="a"))
+test(637.2, key(DT[J(43L),a:=99L]), NULL)
+setkey(DT,a)
+test(637.3, key(DT[,a:=99L,by=a]), NULL)
+options(datatable.optimize=2L)
+set.seed(1)
+DT = data.table(a=sample(1:100,1e6,replace=TRUE),b=sample(1:1000,1e6,replace=TRUE),key="a")
+test(638.1, DT[,m:=sum(b),by=a][1:3], data.table(a=1L,b=c(156L,808L,848L),m=DT[J(1),sum(b)],key="a"))
+test(638.2, key(DT[J(43L),a:=99L]), NULL)
+setkey(DT,a)
+test(638.3, key(DT[,a:=99L,by=a]), NULL)
+
+# Test X[Y] slowdown, #2216
+# Many minutes in 1.8.2!  Now well under 1s, but 10s for very wide tolerance for CRAN. We'd like CRAN to tell us if any changes
+# in R or elsewhere cause the 2 minute (!) bug to return. Hence not moving out to benmark.Rraw.
+X = CJ(a=seq_len(1e3),b=seq_len(1e3))
+Y = copy(X)
+X[4,b:=3L]     # create a dup group, to force allLen1=FALSE
+setkey(X)
+test(819, system.time(X[Y,allow.cartesian=TRUE])["user.self"] < 10)   # this system.time usage ok in this case
+test(820, system.time(X[Y,mult="first"])["user.self"] < 10)           # this system.time usage ok in this case
+
+# test uniqlengths
+set.seed(45)
+x <- sample(c(NA_integer_, 1:1e4), 1e6, TRUE)
+ox <- forderv(x)
+o1 <- uniqlist(list(x), ox)
+test(1151.1, c(diff(o1), length(x)-tail(o1, 1L)+1L), uniqlengths(o1, length(x)))
+o1 <- uniqlist(list(x))
+test(1151.2, c(diff(o1), length(x)-tail(o1, 1L)+1L), uniqlengths(o1, length(x)))
+rm(list=c("x","ox","o1"))
+gc()
+
+# Fix for (usually small) memory leak when grouping, #2648.
+# Deliberate worst case: largest group (100000 rows) followed last by a small group (1 row).
+DT = data.table(A=rep(1:2,c(100000,1)), B=runif(100001))
+before = gc()["Vcells",2]
+for (i in 1:50) DT[, sum(B), by=A]
+after = gc()["Vcells",2]
+test(1157, after < before+3)  # +3 = 3MB
+# Before the patch, Vcells grew dramatically from 6MB to 60MB. Now stable at 6MB. Increase 50 to 1000 and it grew to over 1GB for this case.
+
+# Similar for when dogroups writes less rows than allocated, #2648.
+DT = data.table(k = 1:50, g = 1:20, val = rnorm(1e4))
+before = gc()["Vcells",2]
+for (i in 1:50) DT[ , unlist(.SD), by = 'k']
+after = gc()["Vcells",2]
+test(1158, after < before+3)  # 177.6MB => 179.2MB. Needs to be +3 now from v1.9.8 with alloccol up from 100 to 1024
+
+#  fix DT[TRUE, :=] using too much working memory for i, #1249
+if (!inherits(try(Rprofmem(NULL), silent=TRUE), "try-error")) {  # in case R not compiled with memory profiling enabled
+  f = tempfile()
+  N = 1000000           # or any large number of rows
+  DT = data.table(A=1:N, B=rnorm(N))
+  DT[TRUE, B := B * 2] # stabilize with initial dummy update
+  Rprofmem(f)
+  DT[TRUE, B := B * 2] # or some in-place update
+  Rprofmem(NULL)
+  test(1542, length(grep("000",readLines(f, warn=FALSE))), 1L)  # one allocation for the RHS only
+  unlink(f)
+}
+
+if (FALSE) {
+  # Full range takes too long for CRAN.
+  dts = seq(as.Date("0000-03-01"), as.Date("9999-12-31"), by="day")
+  dtsCh = as.character(dts)   # 36s
+  dtsCh = gsub(" ","0",sprintf("%10s",dtsCh))  # R does not 0 pad years < 1000
+  test(1739.1, length(dtsCh)==3652365 && identical(dtsCh[c(1,3652365)],c("0000-03-01","9999-12-31")))
+} else {
+  # test on CRAN a reduced but important range
+  dts = seq(as.Date("1899-12-31"), as.Date("2100-01-01"), by="day")
+  dtsCh = as.character(dts)
+  test(1739.2, length(dtsCh)==73051 && identical(dtsCh[c(1,73051)],c("1899-12-31","2100-01-01")))
+}
+DT = data.table(A=dts, B=as.IDate(dts))
+test(1739.3, sapply(DT,typeof), c(A="double",B="integer"))
+test(1739.4, typeof(dts), "double")
+f = tempfile()
+g = tempfile()                               # Full range
+fwrite(DT,f)                                 #     0.092s
+write.csv(DT,g,row.names=FALSE,quote=FALSE)  #    65.250s
+test(1739.5, readLines(f), c("A,B",paste(dtsCh,dtsCh,sep=",")))
+test(1739.6, readLines(f), readLines(g))
+unlink(f)
+unlink(g)
+rm(list=c("dtsCh","dts"))
+gc()
+
+# catch malformed factor in rbindlist, #3315
+set.seed(32940)
+NN=7e5; KK=4e4; TT=25
+DT = data.table( id = sample(KK, NN, TRUE), tt = sample(TT, NN, TRUE), ff = factor(sample(3, NN, TRUE)) )
+test(1978, print(DT[ , diff(ff), by = id]), error="Column 2 of item 1 has type 'factor' but has no levels; i.e. malformed.") # the print invokes rbindlist which bites
+
+# print.data.table row id in non-scientific notation, #1167
+DT <- data.table(a = rep(1:5,3*1e5), b = rep(letters[1:3],5*1e5))
+test(1549, capture.output(print(DT)), c("         a b", "      1: 1 a", "      2: 2 b", "      3: 3 c", "      4: 4 a", "      5: 5 b", "     ---    ", "1499996: 1 b", "1499997: 2 c", "1499998: 3 a", "1499999: 4 b", "1500000: 5 c"))
+rm(DT)
+
+# Create a file to test a sample jump being skipped due to format error. It will fail later in the read step because
+# this is a real error. Currently have not constructed an error for which nextGoodLine looks good, but in fact is not.
+# Would need a very complicated construction of embedded new lines in quoted fields, to test that.
+# This test size with default buffMB results in 2 threads being used. 2 is important to pass on CRAN.
+DT = as.data.table(CO2)
+f = tempfile()
+for (i in 0:1000) {
+  start = nrow(CO2)*i
+  fwrite(DT[,Plant:=start:(start+nrow(CO2)-1)], f, append=TRUE, col.names=FALSE)
+  if (i==502) write("-999,Bad,Line,0.0,0.0,extra\n", f, append=TRUE)
+}
+test(1835, fread(f, verbose=TRUE),
+  output = "A line with too-many.*jump 50.*jump landed awkwardly.*skipped",
+  warning = "Stopped.*line 42253. Expected 5 fields but found 6.*discarded.*<<-999,Bad,Line,0.0,0.0,extra>>")
+unlink(f)
+
+# test no memory leak, #2191 and #2284
+# These take a few seconds each, and it's important to run these on CRAN to check no leak
+gc(); before = gc()["Vcells","(Mb)"]
+for (i in 1:2000) { DT = data.table(1:3); rm(DT) }  # in 1.8.2 would leak 3MB
+gc(); after = gc()["Vcells","(Mb)"]
+test(861, after < before+0.5)   # close to 0.0 difference, but 0.5 for safe margin
+gc(); before = gc()["Vcells","(Mb)"]
+DF = data.frame(x=1:20, y=runif(20))
+for (i in 1:2000) { DT = as.data.table(DF); rm(DT) }
+gc(); after = gc()["Vcells","(Mb)"]
+test(862, after < before+0.5)
+gc(); before = gc()["Vcells","(Mb)"]
+DT = data.table(x=1:20, y=runif(20))
+for (i in 1:2000) { x <- DT[1:5,]; rm(x) }
+gc(); after = gc()["Vcells","(Mb)"]
+test(863, after < before+0.5)
+
+# fread should use multiple threads on single column input.
+# tests 2 threads; the very reasonable limit on CRAN
+# file needs to be reasonably large for threads to kick in (minimum chunkSize is 1MB currently)
+if (getDTthreads() == 1L) {
+  cat("Test 1760 not run because this session either has no OpenMP or has been limited to one thread (e.g. under UBSAN and ASAN)\n")
+} else {
+  N = if (TRUE) 2e6 else 1e9   # offline speed check
+  fwrite(data.table(A=sample(10,N,replace=TRUE)), f<-tempfile())
+  test(1760.1, file.info(f)$size > 4*1024*1024)
+  test(1760.2, fread(f, verbose=TRUE, nThread=2), output="using 2 threads")
+  unlink(f)
+}
+
+# segfault of unprotected var caught with the help of address sanitizer; was test 1509
+# in #5517 I figured this test shouldn't be reduced in size due to its nature
+set.seed(1)
+val = sample(c(1:5, NA), 1e4L, TRUE)
+dt <- setDT(replicate(100L, val, simplify=FALSE))
+## to ensure there's no segfault...
+ans <- melt(dt, measure.vars=names(dt), na.rm=TRUE)
+test(1035.21, ans, ans)
+
+# gc race with altrep in R-devel May 2018, #2866 & #2767, PR#2882
+# This runs with 2 threads in the test suite on CRAN and AppVeyor etc.
+# 2 threads are sufficient to fail before the fix.
+N = 20
+DF = data.frame(a=rnorm(N),
+                b=factor(rbinom(N,5,prob=0.5),1:5,letters[1:5]),
+                c=factor(rbinom(N,5,prob=0.5),1:5,letters[1:5]))
+DT = setDT(DF)  # setDT required since data.table() already expanded altrep's
+before = sum(gc()[, 2])
+fff = function(aref) {
+  ff = lapply(1:5, function(i) {
+    DT[,list(sumA=sum(get(aref))),by=b][,c:=letters[i]]
+  })
+  return(rbindlist(ff))
+}
+for(i in 1:100) {
+  f = fff("a")
+  rm("f")
+}
+gc()  # extra gc() (i.e. two including the one on next line) seems to reduce `after`
+      # from 29.7 to 27.2 (exactly `before`). Keeping the extra gc() as no harm.
+after = sum(gc()[, 2])
+test(1912.1, after < before + 10)  # 10MB very wide margin. With the gc race, heap usage grew much more which is all we're testing here (no blow up).
+#
+before = sum(gc()[, 2])
+fff = function(aref) {
+  DT = setDT(data.frame(a=1:N, b=1:N, c=1:N, d=1:N, e=1:N, f=1:N, g=1:N, h=1:N)) # 1:N creates altrep. A few of them too to tickle (the fixed) race.
+  lapply(1:5, function(i) {
+    DT[,list(sumA=sum(get(aref))),by=b][,c:=letters[i]]
+  })
+}
+for(i in 1:100) {
+  fff("a")
+}
+gc()
+after = sum(gc()[, 2])
+test(1912.2, after < before + 10)
+
+DT = data.table(A=seq(1, 1000000), B="x", C=TRUE)
+fwrite(DT, f<-tempfile())
+test(1815, fread(f, nrows=5), DT[1:5])  #2243: nrows small vs large nrow(DT)
+
+# Better jump sync and run-on in PR#2627
+#
+# Reproduces error 'did not finish exactly where jump 1 found ...' in #2561 in master before PR #2627
+# the jump point is just before an empty line and the nextGoodLine() wasn't sync'd properly
+x = sprintf("ABCDEFGHIJKLMNOPQRST%06d", 1:102184)
+x[51094]=""
+cat(x, file=f<-tempfile(), sep="\n")
+test(1874.1, fread(f,header=FALSE,verbose=TRUE)[c(1,51094,.N),],
+           data.table(V1=c("ABCDEFGHIJKLMNOPQRST000001","","ABCDEFGHIJKLMNOPQRST102184")),
+           output="jumps=[0..2)")  # ensure jump 1 happened
+#
+# out-of-sample short lines in the first jump, not near the jump point
+x = sprintf("ABCD,FGHI,KLMN,PQRS,%06d", 1:102184)
+x[5021:5041] = "small,batch,short,lines"   # 4 fields not 5
+cat(x, file=f, sep="\n")
+test(1874.2, fread(f), data.table(V1="ABCD", V2="FGHI", V3="KLMN", V4="PQRS", V5=1:5020),
+             warning="Stopped early on line 5021.*<<small,batch,short,lines>>")
+test(1874.3, fread(f,fill=TRUE,verbose=TRUE)[c(1,5020,5021,5041,5042,.N),],
+             data.table(V1=c("ABCD","ABCD","small","small","ABCD","ABCD"),
+                        V2=c("FGHI","FGHI","batch","batch","FGHI","FGHI"),
+                        V3=c("KLMN","KLMN","short","short","KLMN","KLMN"),
+                        V4=c("PQRS","PQRS","lines","lines","PQRS","PQRS"),
+                        V5=c(1L,5020L,NA,NA,5042L,102184L)),
+             output="jumps=[0..2)")
+#
+# jump just before a set of 30 or more too-few lines, to reproduce "No good line could be found" error in #2267
+# confirmed fails in master with that error before PR#2627
+x = sprintf("ABCD,FGHI,KLMN,PQRS,%06d", 1:102184)
+x[51094:51150] = "small,batch,short,lines"   # 4 fields not 5
+cat(x, file=f, sep="\n")
+test(1874.4, fread(f,verbose=TRUE), data.table(V1="ABCD", V2="FGHI", V3="KLMN", V4="PQRS", V5=1:51093),
+             warning="Stopped early on line 51094.*<<small,batch,short,lines>>",
+             output="jumps=[0..2)")
+test(1874.5, fread(f,fill=TRUE,verbose=TRUE)[c(1,51093,51094,51150,51151,.N),],
+             data.table(V1=c("ABCD","ABCD","small","small","ABCD","ABCD"),
+                        V2=c("FGHI","FGHI","batch","batch","FGHI","FGHI"),
+                        V3=c("KLMN","KLMN","short","short","KLMN","KLMN"),
+                        V4=c("PQRS","PQRS","lines","lines","PQRS","PQRS"),
+                        V5=c(1L,51093L,NA,NA,51151L,102184L)),
+             output="jumps=[0..2)")
+#
+# jump inside a quoted field containing many new lines, to simulate a dirty jump
+# we'll make this jump landing even harder for nextGoodLine() by making the lines resemble the number and types of the true lines, too.
+# Rather than needing to make nextGoodLine() better and better (at some point it's impossible), in these rare cases we'll just sweep dirty jumps.
+x = sprintf("ABCD,FGHI,KLMN,PQRS,%06d", 1:102184)
+x[51093] = "\"A,B,C,D,1\nA,B,C,D,2\nA,B,C,D,3\nA,B,C,D,4\nA,B,C,D,5\nA,B,C,D,6\nA,B,C,D,7\nA,B,C,D,8\n\",FGHI,KLMN,PQRS,51093"
+cat(x, file=f, sep="\n")
+test(1875.6, fread(f,verbose=TRUE)[c(1,51092:51094,.N),][3,V1:=gsub("\r","",V1)],  # gsub since R on Windows replaces \n with \r\n
+             data.table(V1=c("ABCD","ABCD", "A,B,C,D,1\nA,B,C,D,2\nA,B,C,D,3\nA,B,C,D,4\nA,B,C,D,5\nA,B,C,D,6\nA,B,C,D,7\nA,B,C,D,8\n", "ABCD","ABCD"),
+                        V2="FGHI", V3="KLMN", V4="PQRS", V5=c(1L,51092:51094,102184L)),
+             output = "too-few.*sample jump 50.*jump landed awkwardly.*skipped.*Read the data.*jumps=\\[0..2\\).*jumps=\\[1..2\\).*Reading 2 chunks \\(1 swept\\)")
+# Aside: although the file (with over 100,000 lines) is big enough for 100 sampling jumps (of which just 1, the middle sample jump, skipped), it's
+#   still too small for more than 2 reading chunks to be worth it which is correct (based on buffMB not nth)
+unlink(f)
+
+# chmatchdup test from benchmark at the bottom of chmatch.c
+set.seed(45L)
+x = sample(letters, 1e5, TRUE)
+y = sample(letters, 1e6, TRUE)
+test(2000, c(head(ans<-chmatchdup(x,y,0L)),tail(ans)), INT(7,49,11,20,69,25,99365,100750,97596,99671,103320,99406))
+rm(list=c("x","y"))
+
+# Add nq tests 1641-1652 here with larger sizes and calls that have been turned off in the past as took too long, and
+# restore the exact parameters w.r.t. Jan's comment: https://github.com/Rdatatable/data.table/pull/5520#discussion_r1020180583
+
+# issue 2351
+set.seed(1)
+DT = data.table(id=paste0("id",1:1e5), v=sample(100,1e5,replace=TRUE))
+fwrite(DT, file=f<-tempfile(), eol="\r")
+test(1826.1, fread(f)[c(1,2,.N-1,.N)], data.table(id=c("id1","id2","id99999","id100000"), v=c(27L,38L,10L,13L)))
+cat("id888,42", file=f, append=TRUE)  # without final \r after last line
+test(1826.2, fread(f)[c(1,2,.N-1,.N)], data.table(id=c("id1","id2","id100000","id888"), v=c(27L,38L,13L,42L)))
+unlink(f)
+
+# segfault when rbindlist is asked to create a DT with more than 2bn rows
+DT = data.table(1:1e6)
+L = vector("list", 2148)
+for (i in seq_along(L)) L[[i]] = DT   # many references to the same DT to avoid actually using large RAM for this test
+test(1850, rbindlist(L), error="Total rows in the list is 2148000000 which is larger than the maximum number of rows, currently 2147483647")
+rm(L, DT)
+gc()
+
+# segfault in forder when nrow/throttle<nth && ngrp>=255 && nrow>=65536; #5077
+# Matt ran these on clang's ASAN+OpenMP which correctly faulted v1.14.0; these tests segfault consistently without ASAN too
+set.seed(1)
+DT = data.table(grp=sample(255L, 65536L ,replace=TRUE))  # >=255 && >=65536 necessary
+setDTthreads(throttle=nrow(DT))      # increase throttle to reduce threads to 1 for this nrow
+test(2201.1, nrow(DT[, .N, by=grp]), 255L)
+test(2201.2, nrow(setkey(DT, grp)), 65536L)
+set.seed(1)
+DT = data.table(grp=sample(65536L))  # extra case with all size 1 groups too just for fun
+test(2201.3, nrow(DT[, .N, by=grp]), 65536L)
+test(2201.4, nrow(setkey(DT, grp)), 65536L)
+setDTthreads()    # restore default throttle
+
+# print of DT with many columns reordered them, #3306.
+DT = as.data.table(lapply(1:255, function(i)rep.int(i, 105L)))  # 105 to be enough for 'top 5 ... bottom 5' to print
+out = capture.output(print(DT))
+tt = out[grep("V",out)]
+tt = unlist(strsplit(gsub(" ","",tt), "V"))
+test(1982.1, tt[1L], "")
+tt = as.integer(tt[tt!=""])
+test(1982.2, tt, seq_along(tt))
+
 # fread leak, #3292
 dummy = rep("1\t2\t3\t4\t5", 10000000)
 writeLines(dummy, "out.tsv")
@@ -183,3 +499,4 @@ for (i in 1:10) data.table::fread("out.tsv")
 end = gc()["Vcells",2]
 test(, end/start < 1.05)
 
+