From ee82f0f5c662a5655c15f68da8d728e97ba62f6f Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Thu, 18 Jan 2018 11:29:51 +0800 Subject: [PATCH 01/11] Replace integers with explicit integers (1 -> 1L, etc.) --- R/IDateTime.R | 10 +-- R/as.data.table.R | 8 +- R/between.R | 14 ++-- R/bmerge.R | 4 +- R/data.table.R | 180 +++++++++++++++++++++---------------------- R/duplicated.R | 6 +- R/fcast.R | 4 +- R/fmelt.R | 2 +- R/foverlaps.R | 12 +-- R/frank.R | 6 +- R/fread.R | 10 +-- R/fwrite.R | 8 +- R/getdots.R | 2 +- R/groupingsets.R | 8 +- R/merge.R | 2 +- R/onAttach.R | 10 +-- R/onLoad.R | 18 ++--- R/print.data.table.R | 6 +- R/setkey.R | 26 +++---- R/setops.R | 6 +- R/test.data.table.R | 10 +-- R/timetaken.R | 2 +- R/utils.R | 2 +- 23 files changed, 178 insertions(+), 178 deletions(-) diff --git a/R/IDateTime.R b/R/IDateTime.R index f596be9ae..56a497dae 100644 --- a/R/IDateTime.R +++ b/R/IDateTime.R @@ -53,10 +53,10 @@ as.list.IDate <- function(x, ...) NextMethod() round.IDate <- function (x, digits=c("weeks", "months", "quarters", "years"), ...) { units <- match.arg(digits) as.IDate(switch(units, - weeks = round(x, "year") + 7 * (yday(x) %/% 7), - months = ISOdate(year(x), month(x), 1), - quarters = ISOdate(year(x), 3 * (quarter(x)-1) + 1, 1), - years = ISOdate(year(x), 1, 1))) + weeks = round(x, "year") + 7L * (yday(x) %/% 7L), + months = ISOdate(year(x), month(x), 1L), + quarters = ISOdate(year(x), 3L * (quarter(x)-1L) + 1L, 1L), + years = ISOdate(year(x), 1L, 1L))) } #Adapted from `+.Date` @@ -80,7 +80,7 @@ round.IDate <- function (x, digits=c("weeks", "months", "quarters", "years"), .. stop("can only subtract from \"IDate\" objects") if (storage.mode(e1) != "integer") stop("Internal error: storage mode of IDate is somehow no longer integer") - if (nargs() == 1) + if (nargs() == 1L) stop("unary - is not defined for \"IDate\" objects") if (inherits(e2, "difftime")) stop("difftime objects may not be subtracted from IDate. Use plain integer instead of difftime.") diff --git a/R/as.data.table.R b/R/as.data.table.R index 73dac4e89..4b3fccf32 100644 --- a/R/as.data.table.R +++ b/R/as.data.table.R @@ -16,7 +16,7 @@ as.data.table.Date <- as.data.table.ITime <- function(x, keep.rownames=FALSE, .. if (is.matrix(x)) { return(as.data.table.matrix(x, ...)) } - tt = deparse(substitute(x))[1] + tt = deparse(substitute(x))[1L] nm = names(x) # FR #2356 - transfer names of named vector as "rn" column if required if (!identical(keep.rownames, FALSE) & !is.null(nm)) @@ -37,7 +37,7 @@ as.data.table.table <- function(x, keep.rownames=FALSE, ...) { if (is.null(names(val)) || !any(nzchar(names(val)))) setattr(val, 'names', paste("V", rev(seq_along(val)), sep="")) ans <- data.table(do.call(CJ, c(val, sorted=FALSE)), N = as.vector(x)) - setcolorder(ans, c(rev(head(names(ans), -1)), "N")) + setcolorder(ans, c(rev(head(names(ans), -1L)), "N")) ans } @@ -100,7 +100,7 @@ as.data.table.array <- function(x, keep.rownames=FALSE, sorted=TRUE, value.name= if (isTRUE(na.rm)) ans = ans[!is.na(N)] setnames(ans, "N", value.name) - dims = rev(head(names(ans), -1)) + dims = rev(head(names(ans), -1L)) setcolorder(ans, c(dims, value.name)) if (isTRUE(sorted)) setkeyv(ans, dims) @@ -133,7 +133,7 @@ as.data.table.list <- function(x, keep.rownames=FALSE, ...) { # Implementing FR #4813 - recycle with warning when nr %% nrows[i] != 0L if (!n[i] && mn) warning("Item ", i, " is of size 0 but maximum size is ", mn, ", therefore recycled with 'NA'") - else if (n[i] && mn %% n[i] != 0) + else if (n[i] && mn %% n[i] != 0L) warning("Item ", i, " is of size ", n[i], " but maximum size is ", mn, " (recycled leaving a remainder of ", mn%%n[i], " items)") x[[i]] = rep(x[[i]], length.out=mn) } diff --git a/R/between.R b/R/between.R index 01bb9ce8a..10d3398c1 100644 --- a/R/between.R +++ b/R/between.R @@ -12,7 +12,7 @@ between <- function(x,lower,upper,incbounds=TRUE) { } # %between% is vectorised, #534. -"%between%" <- function(x,y) between(x,y[[1]],y[[2]],incbounds=TRUE) +"%between%" <- function(x, y) between(x, y[[1L]], y[[2L]], incbounds=TRUE) # If we want non inclusive bounds with %between%, just +1 to the left, and -1 to the right (assuming integers) # issue FR #707 @@ -22,18 +22,18 @@ inrange <- function(x,lower,upper,incbounds=TRUE) { subject = setDT(list(l=lower, u=upper)) ops = if (incbounds) c(4L, 2L) else c(5L, 3L) # >=,<= and >,< verbose = getOption("datatable.verbose") - if (verbose) {last.started.at=proc.time()[3];cat("forderv(query) took ... ");flush.console()} + if (verbose) {last.started.at=proc.time()[3L];cat("forderv(query) took ... ");flush.console()} xo = forderv(query) - if (verbose) {cat(round(proc.time()[3]-last.started.at,3),"secs\n");flush.console} - ans = bmerge(shallow(subject), query, 1:2, c(1L,1L), FALSE, xo, - 0, c(FALSE, TRUE), 0L, "all", ops, integer(0), + if (verbose) {cat(round(proc.time()[3L]-last.started.at, 3L),"secs\n");flush.console} + ans = bmerge(shallow(subject), query, 1L:2L, c(1L,1L), FALSE, xo, + 0, c(FALSE, TRUE), 0L, "all", ops, integer(0L), 1L, verbose) # fix for #1819, turn on verbose messages options(datatable.verbose=FALSE) setDT(ans[c("starts", "lens")], key=c("starts", "lens")) options(datatable.verbose=verbose) - if (verbose) {last.started.at=proc.time()[3];cat("Generating final logical vector ... ");flush.console()} + if (verbose) {last.started.at=proc.time()[3L];cat("Generating final logical vector ... ");flush.console()} .Call(Cinrange, idx <- vector("logical", length(x)), xo, ans[["starts"]], ans[["lens"]]) - if (verbose) {cat("done in",round(proc.time()[3]-last.started.at,3),"secs\n");flush.console} + if (verbose) {cat("done in",round(proc.time()[3L]-last.started.at,3L),"secs\n");flush.console} idx } diff --git a/R/bmerge.R b/R/bmerge.R index 79be6c8e5..972d5713b 100644 --- a/R/bmerge.R +++ b/R/bmerge.R @@ -89,10 +89,10 @@ bmerge <- function(i, x, leftcols, rightcols, io, xo, roll, rollends, nomatch, m set(i, j=lc, value=newval) } } - if (verbose) {last.started.at=proc.time()[3];cat("Starting bmerge ...");flush.console()} + if (verbose) {last.started.at=proc.time()[3L];cat("Starting bmerge ...");flush.console()} ans = .Call(Cbmerge, i, x, as.integer(leftcols), as.integer(rightcols), io<-haskey(i), xo, roll, rollends, nomatch, mult, ops, nqgrp, nqmaxgrp) # NB: io<-haskey(i) necessary for test 579 where the := above change the factor to character and remove i's key - if (verbose) {cat("done in",round(proc.time()[3]-last.started.at,3),"secs\n");flush.console()} + if (verbose) {cat("done in",round(proc.time()[3L]-last.started.at,3L),"secs\n");flush.console()} # in the caller's shallow copy, see comment at the top of this function for usage # We want to leave the coercions to i in place otherwise, since the caller depends on that to build the result diff --git a/R/data.table.R b/R/data.table.R index 895649c72..b76b78e79 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -64,7 +64,7 @@ data.table <-function(..., keep.rownames=FALSE, check.names=FALSE, key=NULL, str for (i in which(novname)) { # if (ncol(as.data.table(x[[i]])) <= 1) { # cbind call in test 230 fails if I write ncol(as.data.table(eval(tt[[i]], parent.frame()))) <= 1, no idea why... (keep this for later even though all tests pass with ncol(.).. because base uses as.data.frame(.)) if (is.null(ncol(x[[i]]))) { - if ((tmp <- deparse(tt[[i]])[1]) == make.names(tmp)) + if ((tmp <- deparse(tt[[i]])[1L]) == make.names(tmp)) vnames[i] <- tmp } } @@ -190,7 +190,7 @@ replace_dot_alias <- function(e) { # of "list" in several places so it saves having to remember to write "." || "list" in those places if (is.call(e)) { if (e[[1L]] == ".") e[[1L]] = quote(list) - for (i in seq_along(e)[-1]) if (!is.null(e[[i]])) e[[i]] = replace_dot_alias(e[[i]]) + for (i in seq_along(e)[-1L]) if (!is.null(e[[i]])) e[[i]] = replace_dot_alias(e[[i]]) } e } @@ -239,13 +239,13 @@ chmatch2 <- function(x, table, nomatch=NA_integer_) { } force(rollends) if (!is.logical(rollends)) stop("rollends must be a logical vector") - if (length(rollends)>2) stop("rollends must be length 1 or 2") - if (length(rollends)==1) rollends=rep.int(rollends,2L) + if (length(rollends)>2L) stop("rollends must be length 1 or 2") + if (length(rollends)==1L) rollends=rep.int(rollends,2L) # TO DO (document/faq/example). Removed for now ... if ((roll || rolltolast) && missing(mult)) mult="last" # for when there is exact match to mult. This does not control cases where the roll is mult, that is always the last one. missingnomatch = missing(nomatch) if (!is.na(nomatch) && nomatch!=0L) stop("nomatch must either be NA or 0, or (ideally) NA_integer_ or 0L") nomatch = as.integer(nomatch) - if (!is.logical(which) || length(which)>1) stop("'which' must be a logical vector length 1. Either FALSE, TRUE or NA.") + if (!is.logical(which) || length(which)>1L) stop("'which' must be a logical vector length 1. Either FALSE, TRUE or NA.") if ((isTRUE(which)||is.na(which)) && !missing(j)) stop("'which' is ",which," (meaning return row numbers) but 'j' is also supplied. Either you need row numbers or the result of j, but only one type of result can be returned.") if (!is.na(nomatch) && is.na(which)) stop("which=NA with nomatch=0 would always return an empty vector. Please change or remove either which or nomatch.") .global$print="" @@ -295,11 +295,11 @@ chmatch2 <- function(x, table, nomatch=NA_integer_) { with=FALSE } else if (is.name(jsub)) { jsubChar = as.character(jsub) - if (substring(jsubChar,1,2) == "..") { - if (nchar(jsubChar)==2) stop("The symbol .. is invalid. The .. prefix must be followed by at least one character.") + if (substring(jsubChar, 1L, 2L) == "..") { + if (nchar(jsubChar)==2L) stop("The symbol .. is invalid. The .. prefix must be followed by at least one character.") if (!exists(jsubChar, where=parent.frame())) { # We have recommended manual ".." prefix in the past so that needs to keep working and take precedence - jsub = as.name(jsubChar<-substring(jsubChar,3)) + jsub = as.name(jsubChar<-substring(jsubChar, 3L)) } with = FALSE } @@ -317,7 +317,7 @@ chmatch2 <- function(x, table, nomatch=NA_integer_) { stop("You have wrapped := with {} which is ok but then := must be the only thing inside {}. You have something else inside {} as well. Consider placing the {} on the RHS of := instead; e.g. DT[,someCol:={tmpVar1<-...;tmpVar2<-...;tmpVar1*tmpVar2}") } } - if (root=="eval" && !any(all.vars(jsub[[2]]) %chin% names(x))) { + if (root=="eval" && !any(all.vars(jsub[[2L]]) %chin% names(x))) { # TODO: this && !any depends on data. Can we remove it? # Grab the dynamic expression from calling scope now to give the optimizer a chance to optimize it # Only when top level is eval call. Not nested like x:=eval(...) or `:=`(x=eval(...), y=eval(...)) @@ -363,7 +363,7 @@ chmatch2 <- function(x, table, nomatch=NA_integer_) { isub=NA_integer_ } isnull_inames = FALSE - nqgrp = integer(0) # for non-equi join + nqgrp = integer(0L) # for non-equi join nqmaxgrp = 1L # for non-equi join # Fixes 4994: a case where quoted expression with a "!", ex: expr = quote(!dt1); dt[eval(expr)] requires # the "eval" to be checked before `as.name("!")`. Therefore interchanged. @@ -406,7 +406,7 @@ chmatch2 <- function(x, table, nomatch=NA_integer_) { assign("forder", forder, order_env) assign("x", x, order_env) i = eval(isub, order_env, parent.frame()) # for optimisation of 'order' to 'forder' - # that forder returns integer(0) is taken care of internally within forder + # that forder returns integer(0L) is taken care of internally within forder } else if (is.call(isub) && getOption("datatable.use.index") && # #1422 as.character(isub[[1L]]) %chin% c("==","%in%") && @@ -421,7 +421,7 @@ chmatch2 <- function(x, table, nomatch=NA_integer_) { RHS = eval(isub[[3L]], x, parent.frame()) # fix for #961 if (is.list(RHS)) RHS = as.character(RHS) - if (isub[[1L]] == "==" && length(RHS)>1) { + if (isub[[1L]] == "==" && length(RHS)>1L) { if (length(RHS)!=nrow(x)) stop("RHS of == is length ",length(RHS)," which is not 1 or nrow (",nrow(x),"). For robustness, no recycling is allowed (other than of length 1 RHS). Consider %in% instead.") i = x[[isub2]] == RHS # DT[colA == colB] regular element-wise vector scan } else if ( (is.integer(x[[isub2]]) && is.double(RHS) && isReallyReal(RHS)) || (mode(x[[isub2]]) != mode(RHS) && !(class(x[[isub2]]) %in% c("character", "factor") && @@ -444,7 +444,7 @@ chmatch2 <- function(x, table, nomatch=NA_integer_) { if (haskey(x) && isub2 == key(x)[1L]) { # join to key(x)[1L] xo <- integer() - rightcols = chmatch(key(x)[1],names(x)) + rightcols = chmatch(key(x)[1L],names(x)) } else { xo = get2key(x,isub2) # Can't be any index with that col as the first one because those indexes will reorder within each group if (is.null(xo)) { # integer() would be valid and signifies o=1:.N @@ -564,37 +564,37 @@ chmatch2 <- function(x, table, nomatch=NA_integer_) { # non-equi operators present.. investigate groups.. if (verbose) cat("Non-equi join operators detected ... \n") if (!missingroll) stop("roll is not implemented for non-equi joins yet.") - if (verbose) {last.started.at=proc.time()[3];cat(" forder took ... ");flush.console()} + if (verbose) {last.started.at=proc.time()[3L];cat(" forder took ... ");flush.console()} # TODO: could check/reuse secondary indices, but we need 'starts' attribute as well! xo = forderv(x, rightcols, retGrp=TRUE) - if (verbose) {cat(round(proc.time()[3]-last.started.at,3),"secs\n");flush.console} + if (verbose) {cat(round(proc.time()[3L]-last.started.at,3L),"secs\n");flush.console} xg = attr(xo, 'starts') resetcols = head(rightcols, non_equi-1L) if (length(resetcols)) { # TODO: can we get around having to reorder twice here? # or at least reuse previous order? - if (verbose) {last.started.at=proc.time()[3];cat(" Generating group lengths ... ");flush.console()} + if (verbose) {last.started.at=proc.time()[3L];cat(" Generating group lengths ... ");flush.console()} resetlen = attr(forderv(x, resetcols, retGrp=TRUE), 'starts') resetlen = .Call(Cuniqlengths, resetlen, nrow(x)) - if (verbose) {cat("done in",round(proc.time()[3]-last.started.at,3),"secs\n");flush.console} - } else resetlen = integer(0) - if (verbose) {last.started.at=proc.time()[3];cat(" Generating non-equi group ids ... ");flush.console()} + if (verbose) {cat("done in",round(proc.time()[3L]-last.started.at,3L),"secs\n");flush.console} + } else resetlen = integer(0L) + if (verbose) {last.started.at=proc.time()[3L];cat(" Generating non-equi group ids ... ");flush.console()} nqgrp = .Call(Cnestedid, x, rightcols[non_equi:length(rightcols)], xo, xg, resetlen, mult) - if (verbose) {cat("done in", round(proc.time()[3]-last.started.at,3),"secs\n");flush.console} + if (verbose) {cat("done in", round(proc.time()[3L]-last.started.at,3L),"secs\n");flush.console} if (length(nqgrp)) nqmaxgrp = max(nqgrp) # fix for #1986, when 'x' is 0-row table max(.) returns -Inf. if (nqmaxgrp > 1L) { # got some non-equi join work to do if ("_nqgrp_" %in% names(x)) stop("Column name '_nqgrp_' is reserved for non-equi joins.") - if (verbose) {last.started.at=proc.time()[3];cat(" Recomputing forder with non-equi ids ... ");flush.console()} + if (verbose) {last.started.at=proc.time()[3L];cat(" Recomputing forder with non-equi ids ... ");flush.console()} set(nqx<-shallow(x), j="_nqgrp_", value=nqgrp) xo = forderv(nqx, c(ncol(nqx), rightcols)) - if (verbose) {cat("done in",round(proc.time()[3]-last.started.at,3),"secs\n");flush.console} - } else nqgrp = integer(0) + if (verbose) {cat("done in",round(proc.time()[3L]-last.started.at,3L),"secs\n");flush.console} + } else nqgrp = integer(0L) if (verbose) cat(" Found", nqmaxgrp, "non-equi group(s) ...\n") } if (is.na(non_equi)) { # equi join. use existing key (#1825) or existing secondary index (#1439) if ( identical(head(key(x), length(on)), names(on)) ) { - xo = integer(0) + xo = integer(0L) if (verbose) cat("on= matches existing key, using key\n") } else { if (isTRUE(getOption("datatable.use.index"))) { @@ -603,9 +603,9 @@ chmatch2 <- function(x, table, nomatch=NA_integer_) { if (verbose && !is.null(xo)) cat("on= matches existing index, using index\n") } if (is.null(xo)) { - last.started.at=proc.time()[3] + last.started.at=proc.time()[3L] xo = forderv(x, by = rightcols) - if (verbose) cat("Calculated ad hoc index in", round(proc.time()[3]-last.started.at,3), "secs\n") + if (verbose) cat("Calculated ad hoc index in", round(proc.time()[3L]-last.started.at,3L), "secs\n") # TODO: use setindex() instead, so it's cached for future reuse } } @@ -623,10 +623,10 @@ chmatch2 <- function(x, table, nomatch=NA_integer_) { # Implementation for not-join along with by=.EACHI, #604 if (notjoin && (byjoin || mult != "all")) { # mult != "all" needed for #1571 fix notjoin = FALSE - if (verbose) {last.started.at=proc.time()[3];cat("not-join called with 'by=.EACHI'; Replacing !i with i=setdiff(x,i) ...");flush.console()} + if (verbose) {last.started.at=proc.time()[3L];cat("not-join called with 'by=.EACHI'; Replacing !i with i=setdiff(x,i) ...");flush.console()} orignames = copy(names(i)) i = setdiff_(x, i, rightcols, leftcols) # part of #547 - if (verbose) {cat("done in",round(proc.time()[3]-last.started.at,3),"secs\n");flush.console} + if (verbose) {cat("done in",round(proc.time()[3L]-last.started.at,3L),"secs\n");flush.console} setnames(i, orignames[leftcols]) setattr(i, 'sorted', names(i)) # since 'x' has key set, this'll always be sorted } @@ -635,7 +635,7 @@ chmatch2 <- function(x, table, nomatch=NA_integer_) { ans = bmerge(i, x, leftcols, rightcols, io, xo, roll, rollends, nomatch, mult, ops, nqgrp, nqmaxgrp, verbose=verbose) # temp fix for issue spotted by Jan, test #1653.1. TODO: avoid this # 'setorder', as there's another 'setorder' in generating 'irows' below... - if (length(ans$indices)) setorder(setDT(ans[1:3]), indices) + if (length(ans$indices)) setorder(setDT(ans[1L:3L]), indices) allLen1 = ans$allLen1 f__ = ans$starts len__ = ans$lens @@ -709,7 +709,7 @@ chmatch2 <- function(x, table, nomatch=NA_integer_) { && isTRUE(unname(i))) irows=i=NULL # unname() for #2152 - length 1 named logical vector. # NULL is efficient signal to avoid creating 1:nrow(x) but still return all rows, fixes #1249 - else if (length(i)<=1L) irows=i=integer(0) + else if (length(i)<=1L) irows=i=integer(0L) # FALSE, NA and empty. All should return empty data.table. The NA here will be result of expression, # where for consistency of edge case #1252 all NA to be removed. If NA is a single NA symbol, it # was auto converted to NA_integer_ higher up for ease of use and convenience. We definitely @@ -748,7 +748,7 @@ chmatch2 <- function(x, table, nomatch=NA_integer_) { xnrow = nrow(x) xcols = xcolsAns = icols = icolsAns = integer() xdotcols = FALSE - othervars = character(0) + othervars = character(0L) if (missing(j)) { # missing(by)==TRUE was already checked above before dealing with i if (!length(x)) return(null.data.table()) @@ -775,7 +775,7 @@ chmatch2 <- function(x, table, nomatch=NA_integer_) { if (is.data.table(i)) { idotprefix = paste0("i.", names(i)) xdotprefix = paste0("x.", names(x)) - } else idotprefix = xdotprefix = character(0) + } else idotprefix = xdotprefix = character(0L) # j was substituted before dealing with i so that := can set allow.cartesian=FALSE (#800) (used above in i logic) if (is.null(jsub)) return(NULL) @@ -794,7 +794,7 @@ chmatch2 <- function(x, table, nomatch=NA_integer_) { if (!with) { # missing(by)==TRUE was already checked above before dealing with i - if (is.call(jsub) && deparse(jsub[[1]], 500L, backtick=FALSE) %in% c("!", "-")) { # TODO is deparse avoidable here? + if (is.call(jsub) && deparse(jsub[[1L]], 500L, backtick=FALSE) %in% c("!", "-")) { # TODO is deparse avoidable here? notj = TRUE jsub = jsub[[2L]] } else notj = FALSE @@ -855,7 +855,7 @@ chmatch2 <- function(x, table, nomatch=NA_integer_) { if (is.atomic(bysub)) bysubl = list(bysuborig) else bysubl = as.list.default(bysub) } if (length(bysubl) && identical(bysubl[[1L]],quote(eval))) { # TO DO: or by=..() - bysub = eval(bysubl[[2]], parent.frame(), parent.frame()) + bysub = eval(bysubl[[2L]], parent.frame(), parent.frame()) bysub = replace_dot_alias(bysub) # fix for #1298 if (is.expression(bysub)) bysub=bysub[[1L]] bysubl = as.list.default(bysub) @@ -921,7 +921,7 @@ chmatch2 <- function(x, table, nomatch=NA_integer_) { } if (!length(byval) && xnrow>0L) { # see missing(by) up above for comments - # by could be NULL or character(0) for example (e.g. passed in as argument in a loop of different bys) + # by could be NULL or character(0L) for example (e.g. passed in as argument in a loop of different bys) bysameorder = FALSE # 1st and only group is the entire table, so could be TRUE, but FALSE to avoid # a key of empty character() byval = list() @@ -949,12 +949,12 @@ chmatch2 <- function(x, table, nomatch=NA_integer_) { if (any(tt!=xnrow)) stop("The items in the 'by' or 'keyby' list are length (",paste(tt,collapse=","),"). Each must be same length as rows in x or number of rows returned by i (",xnrow,").") if (is.null(bynames)) bynames = rep.int("",length(byval)) if (any(bynames=="")) { - if (length(bysubl)<2) stop("When 'by' or 'keyby' is list() we expect something inside the brackets") + if (length(bysubl)<2L) stop("When 'by' or 'keyby' is list() we expect something inside the brackets") for (jj in seq_along(bynames)) { if (bynames[jj]=="") { # Best guess. Use "month" in the case of by=month(date), use "a" in the case of by=a%%2 byvars = all.vars(bysubl[[jj+1L]], functions = TRUE) - if (length(byvars) == 1) tt = byvars + if (length(byvars) == 1L) tt = byvars else { tt = grep("^eval|^[^[:alpha:]. ]",byvars,invert=TRUE,value=TRUE)[1L] if (!length(tt)) tt = all.vars(bysubl[[jj+1L]])[1L] @@ -988,7 +988,7 @@ chmatch2 <- function(x, table, nomatch=NA_integer_) { } else if (is.call(jsub) && as.character(jsub[[1L]]) %chin% c("list",".")) { jsub[[1L]] = quote(list) jsubl = as.list.default(jsub) # TO DO: names(jsub) and names(jsub)="" seem to work so make use of that - if (length(jsubl)>1) { + if (length(jsubl)>1L) { jvnames = names(jsubl)[-1L] # check list(a=sum(v),v) if (is.null(jvnames)) jvnames = rep.int("", length(jsubl)-1L) for (jj in seq.int(2L,length(jsubl))) { @@ -1106,8 +1106,8 @@ chmatch2 <- function(x, table, nomatch=NA_integer_) { # regular LHS:=RHS usage, or `:=`(...) with no named arguments (an error) # `:=`(LHS,RHS) is valid though, but more because can't see how to detect that, than desire if (length(jsub)!=3L) stop("In `:=`(col1=val1, col2=val2, ...) form, all arguments must be named.") - lhs = jsub[[2]] - jsub = jsub[[3]] + lhs = jsub[[2L]] + jsub = jsub[[3L]] if (is.name(lhs)) { lhs = as.character(lhs) } else { @@ -1116,10 +1116,10 @@ chmatch2 <- function(x, table, nomatch=NA_integer_) { } } else { # `:=`(c2=1L,c3=2L,...) - lhs = names(jsub)[-1] + lhs = names(jsub)[-1L] if (any(lhs=="")) stop("In `:=`(col1=val1, col2=val2, ...) form, all arguments must be named.") names(jsub)="" - jsub[[1]]=as.name("list") + jsub[[1L]]=as.name("list") } av = all.vars(jsub,TRUE) if (!is.atomic(lhs)) stop("LHS of := must be a symbol, or an atomic vector (column names or positions).") @@ -1319,7 +1319,7 @@ chmatch2 <- function(x, table, nomatch=NA_integer_) { SDenv$.SDall = SDenv$.SD = null.data.table() # no columns used by j so .SD can be empty. Only needs to exist so that we can rely on it being there when locking it below for example. If .SD were used by j, of course then xvars would be the columns and we wouldn't be in this leaf. SDenv$.N = if (is.null(irows)) nrow(x) else length(irows) * !identical(suppressWarnings(max(irows)), 0L) # Fix for #963. - # When irows is integer(0), length(irows) = 0 will result in 0 (as expected). + # When irows is integer(0L), length(irows) = 0 will result in 0 (as expected). # Binary search can return all 0 irows when none of the input matches. Instead of doing all(irows==0L) (previous method), which has to allocate a logical vector the size of irows, we can make use of 'max'. If max is 0, we return 0. The condition where only some irows > 0 won't occur. } # Temp fix for #921. Allocate `.I` only if j-expression uses it. @@ -1370,9 +1370,9 @@ chmatch2 <- function(x, table, nomatch=NA_integer_) { # is.call: selecting from a list column should return list # is.object: for test 168 and 168.1 (S4 object result from ggplot2::qplot). Just plain list results should result in data.table - # Fix for #813 and #758. Ex: DT[c(FALSE, FALSE), list(integer(0), y)] + # Fix for #813 and #758. Ex: DT[c(FALSE, FALSE), list(integer(0L), y)] # where DT = data.table(x=1:2, y=3:4) should return an empty data.table!! - if (!is.null(irows) && (identical(irows, integer(0)) || all(irows %in% 0L))) ## TODO: any way to not check all 'irows' values? + if (!is.null(irows) && (identical(irows, integer(0L)) || all(irows %in% 0L))) ## TODO: any way to not check all 'irows' values? if (is.atomic(jval)) jval = jval[0L] else jval = lapply(jval, `[`, 0L) if (is.atomic(jval)) { setattr(jval,"names",NULL) @@ -1418,7 +1418,7 @@ chmatch2 <- function(x, table, nomatch=NA_integer_) { # Now ggplot2 returns data from print, we need a way to throw it away otherwise j accumulates the result SDenv$.SDall = SDenv$.SD = null.data.table() # e.g. test 607. Grouping still proceeds even though no .SD e.g. grouping key only tables, or where j consists of .N only - SDenv$.N = as.integer(0) # not 0L for the reson on next line : + SDenv$.N = as.integer(0) # not 0L for the reason on next line : SDenv$.GRP = as.integer(1) # oddly using 1L doesn't work reliably here! Possible R bug? TO DO: create reproducible example and report. To reproduce change to 1L and run test.data.table, test 780 fails. The assign seems ineffective and a previous value for .GRP from a previous test is retained, despite just creating a new SDenv. if (byjoin) { @@ -1448,9 +1448,9 @@ chmatch2 <- function(x, table, nomatch=NA_integer_) { # Find the groups, using 'byval' ... if (missing(by)) stop("Internal error, by is missing") - if (length(byval) && length(byval[[1]])) { + if (length(byval) && length(byval[[1L]])) { if (!bysameorder) { - if (verbose) {last.started.at=proc.time()[3];cat("Finding groups using forderv ... ");flush.console()} + if (verbose) {last.started.at=proc.time()[3L];cat("Finding groups using forderv ... ");flush.console()} o__ = forderv(byval, sort=!missing(keyby), retGrp=TRUE) # The sort= argument is called sortStr at C level. It's just about saving the sort of unique strings at # C level for efficiency (cgroup vs csort) when by= not keyby=. All other types are always sorted. Getting @@ -1460,37 +1460,37 @@ chmatch2 <- function(x, table, nomatch=NA_integer_) { # forderv() returns empty integer() if already ordered to save allocating 1:xnrow bysameorder = orderedirows && !length(o__) if (verbose) { - cat(round(proc.time()[3]-last.started.at, 3), "sec\n") - last.started.at=proc.time()[3] + cat(round(proc.time()[3L]-last.started.at, 3L), "sec\n") + last.started.at=proc.time()[3L] cat("Finding group sizes from the positions (can be avoided to save RAM) ... ") flush.console() # for windows } f__ = attr(o__, "starts") len__ = uniqlengths(f__, xnrow) - if (verbose) { cat(round(proc.time()[3]-last.started.at, 3), "sec\n");flush.console()} + if (verbose) { cat(round(proc.time()[3L]-last.started.at, 3L), "sec\n");flush.console()} if (!bysameorder && missing(keyby)) { # TO DO: lower this into forder.c - if (verbose) {last.started.at=proc.time()[3];cat("Getting back original order ... ");flush.console()} + if (verbose) {last.started.at=proc.time()[3L];cat("Getting back original order ... ");flush.console()} firstofeachgroup = o__[f__] if (length(origorder <- forderv(firstofeachgroup))) { f__ = f__[origorder] len__ = len__[origorder] } - if (verbose) {cat(round(proc.time()[3]-last.started.at, 3), "sec\n")} + if (verbose) {cat(round(proc.time()[3L]-last.started.at, 3L), "sec\n")} } - if (!orderedirows && !length(o__)) o__ = 1:xnrow # temp fix. TODO: revist orderedirows + if (!orderedirows && !length(o__)) o__ = seq_len(xnrow) # temp fix. TODO: revist orderedirows } else { - if (verbose) {last.started.at=proc.time()[3];cat("Finding groups using uniqlist ... ");flush.console()} + if (verbose) {last.started.at=proc.time()[3L];cat("Finding groups using uniqlist ... ");flush.console()} f__ = uniqlist(byval) if (verbose) { - cat(round(proc.time()[3]-last.started.at, 3), "sec\n") - last.started.at=proc.time()[3] + cat(round(proc.time()[3L]-last.started.at, 3L), "sec\n") + last.started.at=proc.time()[3L] cat("Finding group sizes from the positions (can be avoided to save RAM) ... ") flush.console() # for windows } len__ = uniqlengths(f__, xnrow) # TO DO: combine uniqlist and uniquelengths into one call. Or, just set len__ to NULL when dogroups infers that. - if (verbose) { cat(round(proc.time()[3]-last.started.at, 3), "sec\n");flush.console() } + if (verbose) { cat(round(proc.time()[3L]-last.started.at, 3L), "sec\n");flush.console() } } } else { f__=NULL @@ -1597,7 +1597,7 @@ chmatch2 <- function(x, table, nomatch=NA_integer_) { oldjvnames = jvnames jvnames = NULL # TODO: not let jvnames grow, maybe use (number of lapply(.SD, .))*lenght(ansvarsnew) + other jvars ?? not straightforward. # Fix for #744. Don't use 'i' in for-loops. It masks the 'i' from the input!! - for (i_ in 2:length(jsubl)) { + for (i_ in 2L:length(jsubl)) { this = jsub[[i_]] if (is.name(this)) { if (this == ".SD") { # optimise '.SD' alone @@ -1620,7 +1620,7 @@ chmatch2 <- function(x, table, nomatch=NA_integer_) { funi = funi + 1L # Fix for #985 jsubl[[i_]] = as.list(deparse_ans[[1L]][-1L]) # just keep the '.' from list(.) jvnames = c(jvnames, deparse_ans[[2L]]) - } else if (this[[1]] == "list") { + } else if (this[[1L]] == "list") { # also handle c(lapply(.SD, sum), list()) - silly, yes, but can happen if (length(this) > 1L) { jl__ = as.list(jsubl[[i_]])[-1L] # just keep the '.' from list(.) @@ -1687,12 +1687,12 @@ chmatch2 <- function(x, table, nomatch=NA_integer_) { .ok <- function(q) { if (dotN(q)) return(TRUE) # For #5760 cond = is.call(q) && as.character(q[[1L]]) %chin% gfuns && !is.call(q[[2L]]) - ans = cond && (length(q)==2 || identical("na",substring(names(q)[3L],1,2))) + ans = cond && (length(q)==2L || identical("na",substring(names(q)[3L], 1L, 2L))) if (identical(ans, TRUE)) return(ans) - ans = cond && length(q)==3 && ( as.character(q[[1]]) %chin% c("head", "tail") && - (identical(q[[3]], 1) || identical(q[[3]], 1L)) || - as.character(q[[1]]) %chin% "[" && is.numeric(q[[3]]) && - length(q[[3]])==1 && q[[3]]>0 ) + ans = cond && length(q) == 3L && + length(q[[3L]]) == 1L && is.numeric(q[[3L]]) && ( + as.character(q[[1L]]) %chin% c("head", "tail") && q[[3L]] == 1L || + as.character(q[[1L]]) %chin% "[" && q[[3L]] > 0 ) if (is.na(ans)) ans=FALSE ans } @@ -1705,13 +1705,13 @@ chmatch2 <- function(x, table, nomatch=NA_integer_) { for (ii in seq_along(jsub)[-1L]) { if (dotN(jsub[[ii]])) next; # For #5760 jsub[[ii]][[1L]] = as.name(paste("g", jsub[[ii]][[1L]], sep="")) - if (length(jsub[[ii]])==3) jsub[[ii]][[3]] = eval(jsub[[ii]][[3]], parent.frame()) # tests 1187.2 & 1187.4 + if (length(jsub[[ii]])==3L) jsub[[ii]][[3L]] = eval(jsub[[ii]][[3L]], parent.frame()) # tests 1187.2 & 1187.4 } else { jsub[[1L]] = as.name(paste("g", jsub[[1L]], sep="")) - if (length(jsub)==3) jsub[[3]] = eval(jsub[[3]], parent.frame()) # tests 1187.3 & 1187.5 + if (length(jsub)==3L) jsub[[3L]] = eval(jsub[[3L]], parent.frame()) # tests 1187.3 & 1187.5 } - if (verbose) cat("GForce optimized j to '",deparse(jsub,width.cutoff=200),"'\n",sep="") + if (verbose) cat("GForce optimized j to '",deparse(jsub,width.cutoff=200L),"'\n",sep="") } else if (verbose) cat("GForce is on, left j unchanged\n"); } } @@ -1755,7 +1755,7 @@ chmatch2 <- function(x, table, nomatch=NA_integer_) { jiscols = chmatch(jisvars,names(i)) # integer() if there are no jisvars (usually there aren't, advanced feature) xjiscols = chmatch(xjisvars, names(x)) SDenv$.xSD = x[min(nrow(i), 1L), xjisvars, with=FALSE] - if (!missing(on)) o__ = xo else o__ = integer(0) + if (!missing(on)) o__ = xo else o__ = integer(0L) } else { groups = byval grpcols = seq_along(byval) @@ -1767,7 +1767,7 @@ chmatch2 <- function(x, table, nomatch=NA_integer_) { # for #971, added !GForce. if (GForce) we do it much more (memory) efficiently than subset of order vector below. if (length(irows) && !isTRUE(irows) && !GForce) { # fix for bug #2758. TO DO: provide a better error message - if (length(irows) > 1 && length(zo__ <- which(irows == 0)) > 0) stop("i[", zo__[1], "] is 0. While grouping, i=0 is allowed when it's the only value. When length(i) > 1, all i should be > 0.") + if (length(irows) > 1L && length(zo__ <- which(irows == 0)) > 0L) stop("i[", zo__[1L], "] is 0. While grouping, i=0 is allowed when it's the only value. When length(i) > 1, all i should be > 0.") if (length(o__) && length(irows)!=length(o__)) stop("Internal error: length(irows)!=length(o__)") o__ = if (length(o__)) irows[o__] # better do this once up front (even though another alloc) than deep repeated branch in dogroups.c else irows @@ -1777,7 +1777,7 @@ chmatch2 <- function(x, table, nomatch=NA_integer_) { # for consistency of empty case in test 184 f__=len__=0L } - if (verbose) {last.started.at=proc.time()[3];cat("Making each group and running j (GForce ",GForce,") ... ",sep="");flush.console()} + if (verbose) {last.started.at=proc.time()[3L];cat("Making each group and running j (GForce ",GForce,") ... ",sep="");flush.console()} if (GForce) { thisEnv = new.env() # not parent=parent.frame() so that gsum is found for (ii in ansvars) assign(ii, x[[ii]], thisEnv) @@ -1791,7 +1791,7 @@ chmatch2 <- function(x, table, nomatch=NA_integer_) { } else { ans = .Call(Cdogroups, x, xcols, groups, grpcols, jiscols, xjiscols, grporder, o__, f__, len__, jsub, SDenv, cols, newnames, !missing(on), verbose) } - if (verbose) {cat(round(proc.time()[3]-last.started.at,3),"secs\n");flush.console()} + if (verbose) {cat(round(proc.time()[3L]-last.started.at,3L),"secs\n");flush.console()} # TO DO: xrows would be a better name for irows: irows means the rows of x that i joins to # Grouping by i: icols the joins columns (might not need), isdcols (the non join i and used by j), all __ are length x # Grouping by by: i is by val, icols NULL, o__ may be subset of x, f__ points to o__ (or x if !length o__) @@ -1809,11 +1809,11 @@ chmatch2 <- function(x, table, nomatch=NA_integer_) { for (i in seq_along(hits)) setattr(attrs, hits[i], NULL) # does by reference } if (!missing(keyby)) { - cnames = as.character(bysubl)[-1] + cnames = as.character(bysubl)[-1L] if (all(cnames %chin% names(x))) { - if (verbose) {last.started.at=proc.time()[3];cat("setkey() after the := with keyby= ... ");flush.console()} + if (verbose) {last.started.at=proc.time()[3L];cat("setkey() after the := with keyby= ... ");flush.console()} setkeyv(x,cnames) # TO DO: setkey before grouping to get memcpy benefit. - if (verbose) {cat(round(proc.time()[3]-last.started.at,3),"secs\n");flush.console()} + if (verbose) {cat(round(proc.time()[3L]-last.started.at,3L),"secs\n");flush.console()} } else warning(":= keyby not straightforward character column names or list() of column names, treating as a by:",paste(cnames,collapse=","),"\n") } @@ -1838,9 +1838,9 @@ chmatch2 <- function(x, table, nomatch=NA_integer_) { setnames(ans,seq_along(bynames),bynames) # TO DO: reinvestigate bynames flowing from dogroups here and simplify } if (byjoin && !missing(keyby) && !bysameorder) { - if (verbose) {last.started.at=proc.time()[3];cat("setkey() afterwards for keyby=.EACHI ... ");flush.console()} + if (verbose) {last.started.at=proc.time()[3L];cat("setkey() afterwards for keyby=.EACHI ... ");flush.console()} setkeyv(ans,names(ans)[seq_along(byval)]) - if (verbose) {cat(round(proc.time()[3]-last.started.at,3),"secs\n");flush.console()} + if (verbose) {cat(round(proc.time()[3L]-last.started.at,3L),"secs\n");flush.console()} } else if (!missing(keyby) || (haskey(x) && bysameorder)) { setattr(ans,"sorted",names(ans)[seq_along(grpcols)]) } @@ -1851,7 +1851,7 @@ chmatch2 <- function(x, table, nomatch=NA_integer_) { if (length(expr)==2L) # no parameters passed to mean, so defaults of trim=0 and na.rm=FALSE return(call(".External",quote(Cfastmean),expr[[2L]], FALSE)) # return(call(".Internal",expr)) # slightly faster than .External, but R now blocks .Internal in coerce.c from apx Sep 2012 - if (length(expr)==3L && identical("na",substring(names(expr)[3L],1,2))) # one parameter passed to mean() + if (length(expr)==3L && identical("na",substring(names(expr)[3L], 1L, 2L))) # one parameter passed to mean() return(call(".External",quote(Cfastmean),expr[[2L]], expr[[3L]])) # faster than .Call assign("nomeanopt",TRUE,parent.frame()) expr # e.g. trim is not optimized, just na.rm @@ -1901,7 +1901,7 @@ as.matrix.data.table <- function(x,...) if (length(dj <- dim(xj)) == 2L && dj[2L] > 1L) { if (inherits(xj, "data.table")) xj <- X[[j]] <- as.matrix(X[[j]]) - dnj <- dimnames(xj)[[2]] + dnj <- dimnames(xj)[[2L]] collabs[[j]] <- paste(collabs[[j]], if (length(dnj) > 0L) dnj @@ -1909,7 +1909,7 @@ as.matrix.data.table <- function(x,...) } if (!is.logical(xj)) all.logical <- FALSE - if (length(levels(xj)) > 0 || !(is.numeric(xj) || is.complex(xj) || is.logical(xj)) || + if (length(levels(xj)) > 0L || !(is.numeric(xj) || is.complex(xj) || is.logical(xj)) || (!is.null(cl <- attr(xj, "class")) && any(cl %chin% c("Date", "POSIXct", "POSIXlt")))) non.numeric <- TRUE @@ -1941,13 +1941,13 @@ as.matrix.data.table <- function(x,...) } # bug #2375. fixed. same as head.data.frame and tail.data.frame to deal with negative indices -head.data.table <- function(x, n=6, ...) { +head.data.table <- function(x, n=6L, ...) { if (!cedta()) return(NextMethod()) stopifnot(length(n) == 1L) i = seq_len(if (n<0L) max(nrow(x)+n, 0L) else min(n,nrow(x))) x[i, , ] } -tail.data.table <- function(x, n=6, ...) { +tail.data.table <- function(x, n=6L, ...) { if (!cedta()) return(NextMethod()) stopifnot(length(n) == 1L) n <- if (n<0L) max(nrow(x) + n, 0L) else min(n, nrow(x)) @@ -1959,7 +1959,7 @@ tail.data.table <- function(x, n=6, ...) { # [<- is provided for consistency, but := is preferred as it allows by group and by reference to subsets of columns # with no copy of the (very large, say 10GB) columns at all. := is like an UPDATE in SQL and we like and want two symbols to change. if (!cedta()) { - x = if (nargs()<4) `[<-.data.frame`(x, i, value=value) + x = if (nargs()<4L) `[<-.data.frame`(x, i, value=value) else `[<-.data.frame`(x, i, j, value) return(alloc.col(x)) # over-allocate (again). Avoid all this by using :=. } @@ -2072,10 +2072,10 @@ dimnames.data.table <- function(x) { { if (!cedta()) return(`dimnames<-.data.frame`(x,value)) # won't maintain key column (if any). Revisit if ever causes a compatibility problem but don't think it's likely that packages change column names using dimnames<-. See names<-.data.table below. if (.R.assignNamesCopiesAll) warning("This is R<3.1.0 where dimnames(x)<-value syntax deep copies the entire table. Please upgrade to R>=3.1.0 and see ?setnames which allows you to change names by name with built-in checks and warnings.") - if (!is.list(value) || length(value) != 2) stop("attempting to assign invalid object to dimnames of a data.table") + if (!is.list(value) || length(value) != 2L) stop("attempting to assign invalid object to dimnames of a data.table") if (!is.null(value[[1L]])) stop("data.tables do not have rownames") - if (ncol(x) != length(value[[2]])) stop("can't assign",length(value[[2]]),"colnames to a",ncol(x),"column data.table") - setnames(x,as.character(value[[2]])) + if (ncol(x) != length(value[[2L]])) stop("can't assign",length(value[[2L]]),"colnames to a",ncol(x),"column data.table") + setnames(x,as.character(value[[2L]])) x # this returned value is now shallow copied by R 3.1.0 via *tmp*. A very welcome change. } @@ -2084,7 +2084,7 @@ dimnames.data.table <- function(x) { # When non data.table aware packages change names, we'd like to maintain the key, too. # If call is names(DT)[2]="newname", R will call this names<-.data.table function (notice no i) with 'value' already prepared to be same length as ncol caller = as.character(sys.call(-2L))[1L] - if ( ((tt<-identical(caller,"colnames<-")) && cedta(3)) || cedta() ) { + if ( ((tt<-identical(caller,"colnames<-")) && cedta(3L)) || cedta() ) { if (.R.assignNamesCopiesAll) warning("This is R<3.1.0 where ",if(tt)"col","names(x)<-value deep copies the entire table (several times). Please upgrade to R>=3.1.0 and see ?setnames which allows you to change names by name with built-in checks and warnings.") } @@ -2365,7 +2365,7 @@ point <- function(to, to_idx, from, from_idx) { ## take care of attributes. indices <- names(attributes(attr(ans, "index"))) for(index in indices) { - indexcols <- strsplit(index, split = "__")[[1]][-1L] + indexcols <- strsplit(index, split = "__")[[1L]][-1L] indexlength <- which.first(!indexcols %chin% cols) - 1L if (is.na(indexlength)) next ## all columns are present, nothing to be done reducedindex <- paste0(c("", indexcols[seq_len(indexlength)]), collapse = "__") ## the columns until the first missing form the new index @@ -2495,7 +2495,7 @@ setnames <- function(x,old,new) { # update secondary keys idx = attr(x,"index") for (k in names(attributes(idx))) { - tt = strsplit(k,split="__")[[1]][-1] + tt = strsplit(k,split="__")[[1L]][-1L] m = chmatch(names(x)[i], tt) w = which(!is.na(m)) if (length(w)) { @@ -2541,7 +2541,7 @@ set <- function(x,i=NULL,j,value) # low overhead, loopable { if (is.atomic(value)) { # protect NAMED of atomic value from .Call's NAMED=2 by wrapping with list() - l = vector("list",1) + l = vector("list", 1L) .Call(Csetlistelt,l,1L,value) # to avoid the copy by list() in R < 3.1.0 value = l } diff --git a/R/duplicated.R b/R/duplicated.R index e0c53d309..1f182870d 100644 --- a/R/duplicated.R +++ b/R/duplicated.R @@ -10,12 +10,12 @@ duplicated.data.table <- function(x, incomparables=FALSE, fromLast=FALSE, by=seq by = key(x) warning(warning_oldUniqueByKey) } - if (nrow(x) == 0L || ncol(x) == 0L) return(logical(0)) # fix for bug #5582 + if (nrow(x) == 0L || ncol(x) == 0L) return(logical(0L)) # fix for bug #5582 if (is.na(fromLast) || !is.logical(fromLast)) stop("'fromLast' must be TRUE or FALSE") query <- .duplicated.helper(x, by) # fix for bug #5405 - unique on null data table returns error (because of 'forderv') # however, in this case we can bypass having to go to forderv at all. - if (!length(query$by)) return(logical(0)) + if (!length(query$by)) return(logical(0L)) if (query$use.keyprefix) { f = uniqlist(shallow(x, query$by)) @@ -88,7 +88,7 @@ unique.data.table <- function(x, incomparables=FALSE, fromLast=FALSE, by=seq_alo if (use.sub.cols) { ## Did the user specify (integer) indexes for the columns? if (is.numeric(by)) { - if (any(as.integer(by) != by) || any(by<1) || any(by>ncol(x))) { + if (any(as.integer(by) != by) || any(by<1L) || any(by>ncol(x))) { stop("Integer values between 1 and ncol are required for 'by' when ", "column indices. It's often better to use column names.") } diff --git a/R/fcast.R b/R/fcast.R index c41fca1d9..d398f8ea0 100644 --- a/R/fcast.R +++ b/R/fcast.R @@ -149,8 +149,8 @@ dcast.data.table <- function(data, formula, fun.aggregate = NULL, sep = "_", ... fun.call = aggregate_funs(fun.call, lvals, sep, ...) errmsg = "Aggregating function(s) should take vector inputs and return a single value (length=1). However, function(s) returns length!=1. This value will have to be used to fill any missing combinations, and therefore must be length=1. Either override by setting the 'fill' argument explicitly or modify your function to handle this case appropriately." if (is.null(fill)) { - fill.default <- suppressWarnings(dat[0][, eval(fun.call)]) - # tryCatch(fill.default <- dat[0][, eval(fun.call)], error = function(x) stop(errmsg, call.=FALSE)) + fill.default <- suppressWarnings(dat[0L][, eval(fun.call)]) + # tryCatch(fill.default <- dat[0L][, eval(fun.call)], error = function(x) stop(errmsg, call.=FALSE)) if (nrow(fill.default) != 1L) stop(errmsg, call.=FALSE) } if (!any(valnames %chin% varnames)) { diff --git a/R/fmelt.R b/R/fmelt.R index 4aaf0e262..5f6d83a95 100644 --- a/R/fmelt.R +++ b/R/fmelt.R @@ -6,7 +6,7 @@ melt <- function(data, ..., na.rm = FALSE, value.name = "value") { reshape2::melt(data, ..., na.rm=na.rm, value.name=value.name) } -patterns <- function(..., cols=character(0)) { +patterns <- function(..., cols=character(0L)) { # if ... has no names, names(list(...)) will be ""; # this assures they'll be NULL instead p = unlist(list(...), use.names = any(nzchar(names(...)))) diff --git a/R/foverlaps.R b/R/foverlaps.R index f9306aead..5e60aa791 100644 --- a/R/foverlaps.R +++ b/R/foverlaps.R @@ -104,10 +104,10 @@ foverlaps <- function(x, y, by.x = if (!is.null(key(x))) key(x) else key(y), by. end = yintervals[2L], any =, within =, equal = yintervals) call = construct(head(ynames, -2L), uycols, type) - if (verbose) {last.started.at=proc.time()[3];cat("unique() + setkey() operations done in ...");flush.console()} + if (verbose) {last.started.at=proc.time()[3L];cat("unique() + setkey() operations done in ...");flush.console()} uy = unique(y[, eval(call)]) - setkey(uy)[, `:=`(lookup = list(list(integer(0))), type_lookup = list(list(integer(0))), count=0L, type_count=0L)] - if (verbose) {cat(round(proc.time()[3]-last.started.at,3),"secs\n");flush.console} + setkey(uy)[, `:=`(lookup = list(list(integer(0L))), type_lookup = list(list(integer(0L))), count=0L, type_count=0L)] + if (verbose) {cat(round(proc.time()[3L]-last.started.at,3L),"secs\n");flush.console} matches <- function(ii, xx, del, ...) { cols = setdiff(names(xx), del) xx = .shallow(xx, cols, retain.key = FALSE) @@ -117,7 +117,7 @@ foverlaps <- function(x, y, by.x = if (!is.null(key(x))) key(x) else key(y), by. } indices <- function(x, y, intervals, ...) { if (type == "start") { - sidx = eidx = matches(x, y, intervals[2L], rollends=c(FALSE,FALSE), ...) ## TODO: eidx can be set to integer(0) + sidx = eidx = matches(x, y, intervals[2L], rollends=c(FALSE,FALSE), ...) ## TODO: eidx can be set to integer(0L) } else if (type == "end") { eidx = sidx = matches(x, y, intervals[1L], rollends=c(FALSE,FALSE), ...) ## TODO: sidx can be set to integer(0) } else { @@ -130,9 +130,9 @@ foverlaps <- function(x, y, by.x = if (!is.null(key(x))) key(x) else key(y), by. .Call(Clookup, uy, nrow(y), indices(uy, y, yintervals, nomatch=0L, roll=roll), maxgap, minoverlap, mult, type, verbose) if (maxgap == 0L && minoverlap == 1L) { iintervals = tail(names(x), 2L) - if (verbose) {last.started.at=proc.time()[3];cat("binary search(es) done in ...");flush.console()} + if (verbose) {last.started.at=proc.time()[3L];cat("binary search(es) done in ...");flush.console()} xmatches = indices(uy, x, xintervals, nomatch=0L, roll=roll) - if (verbose) {cat(round(proc.time()[3]-last.started.at,3),"secs\n");flush.console} + if (verbose) {cat(round(proc.time()[3L]-last.started.at,3L),"secs\n");flush.console} olaps = .Call(Coverlaps, uy, xmatches, mult, type, nomatch, verbose) } else if (maxgap == 0L && minoverlap > 1L) { stop("Not yet implemented") diff --git a/R/frank.R b/R/frank.R index b1f3421a3..d45e9682f 100644 --- a/R/frank.R +++ b/R/frank.R @@ -67,7 +67,7 @@ frankv <- function(x, cols=seq_along(x), order=1L, na.last=TRUE, ties.method=c(" } frank <- function(x, ..., na.last=TRUE, ties.method=c("average", "first", "random", "max", "min", "dense")) { - cols = substitute(list(...))[-1] + cols = substitute(list(...))[-1L] if (identical(as.character(cols), "NULL")) { cols = NULL order = 1L @@ -76,8 +76,8 @@ frank <- function(x, ..., na.last=TRUE, ties.method=c("average", "first", "rando order=rep(1L, length(cols)) for (i in seq_along(cols)) { v=as.list(cols[[i]]) - if (length(v) > 1 && v[[1L]] == "+") v=v[[-1L]] - else if (length(v) > 1 && v[[1L]] == "-") { + if (length(v) > 1L && v[[1L]] == "+") v=v[[-1L]] + else if (length(v) > 1L && v[[1L]] == "-") { v=v[[-1L]] order[i] = -1L } diff --git a/R/fread.R b/R/fread.R index f6064bdcf..7cf8c45f4 100644 --- a/R/fread.R +++ b/R/fread.R @@ -4,9 +4,9 @@ fread <- function(input="",file,sep="auto",sep2="auto",dec=".",quote="\"",nrows= if (is.null(sep)) sep="\n" # C level knows that \n means \r\n on Windows, for example else { stopifnot( length(sep)==1L, !is.na(sep), is.character(sep) ) - if (sep=="") sep="\n" # meaning readLines behaviour. The 3 values (NULL, "" or "\n") are equivalent. - else if (sep=="auto") sep="" # sep=="" at C level means auto sep - else stopifnot( nchar(sep)==1 ) # otherwise an actual character to use as sep + if (sep=="") sep="\n" # meaning readLines behaviour. The 3 values (NULL, "" or "\n") are equivalent. + else if (sep=="auto") sep="" # sep=="" at C level means auto sep + else stopifnot( nchar(sep)==1L ) # otherwise an actual character to use as sep } stopifnot( is.character(dec), length(dec)==1L, nchar(dec)==1L ) # handle encoding, #563 @@ -24,7 +24,7 @@ fread <- function(input="",file,sep="auto",sep2="auto",dec=".",quote="\"",nrows= stopifnot(length(skip)==1L) stopifnot(is.numeric(nThread) && length(nThread)==1L) nThread=as.integer(nThread) - stopifnot(nThread>=1) + stopifnot(nThread>=1L) if (!missing(file)) { if (!identical(input, "")) stop("You can provide 'input' or 'file', not both.") if (!file.exists(file)) stop(sprintf("Provided file '%s' does not exists.", file)) @@ -91,7 +91,7 @@ fread <- function(input="",file,sep="auto",sep2="auto",dec=".",quote="\"",nrows= warnings2errors = getOption("warn") >= 2 ans = .Call(CfreadR,input,sep,dec,quote,header,nrows,skip,na.strings,strip.white,blank.lines.skip, fill,showProgress,nThread,verbose,warnings2errors,logical01,select,drop,colClasses,integer64,encoding) - nr = length(ans[[1]]) + nr = length(ans[[1L]]) if ((!"bit64" %chin% loadedNamespaces()) && any(sapply(ans,inherits,"integer64"))) require_bit64() setattr(ans,"row.names",.set_row_names(nr)) diff --git a/R/fwrite.R b/R/fwrite.R index 5ef7fb78c..7d1189ce1 100644 --- a/R/fwrite.R +++ b/R/fwrite.R @@ -12,7 +12,7 @@ fwrite <- function(x, file="", append=FALSE, quote="auto", na = as.character(na[1L]) # fix for #1725 if (missing(qmethod)) qmethod = qmethod[1L] if (missing(dateTimeAs)) dateTimeAs = dateTimeAs[1L] - else if (length(dateTimeAs)>1) stop("dateTimeAs must be a single string") + else if (length(dateTimeAs)>1L) stop("dateTimeAs must be a single string") dateTimeAs = chmatch(dateTimeAs, c("ISO","squash","epoch","write.csv"))-1L if (is.na(dateTimeAs)) stop("dateTimeAs must be 'ISO','squash','epoch' or 'write.csv'") if (!missing(logical01) && !missing(logicalAsInt)) @@ -37,9 +37,9 @@ fwrite <- function(x, file="", append=FALSE, quote="auto", isLOGICAL(col.names), isLOGICAL(append), isLOGICAL(row.names), isLOGICAL(verbose), isLOGICAL(showProgress), isLOGICAL(logical01), length(na) == 1L, #1725, handles NULL or character(0) input - is.character(file) && length(file)==1 && !is.na(file), - length(buffMB)==1 && !is.na(buffMB) && 1<=buffMB && buffMB<=1024, - length(nThread)==1 && !is.na(nThread) && nThread>=1 + is.character(file) && length(file)==1L && !is.na(file), + length(buffMB)==1L && !is.na(buffMB) && 1<=buffMB && buffMB<=1024, + length(nThread)==1L && !is.na(nThread) && nThread>=1L ) file <- path.expand(file) # "~/foo/bar" if (append && missing(col.names) && (file=="" || file.exists(file))) diff --git a/R/getdots.R b/R/getdots.R index f96ad2c90..56c6e52f3 100644 --- a/R/getdots.R +++ b/R/getdots.R @@ -6,5 +6,5 @@ getdots <- function() # return a string vector of the arguments in '...' # My long winded way: gsub(" ","",unlist(strsplit(deparse(substitute(list(...))),"[(,)]")))[-1] # Peter Dalgaard's & Brian Ripley helped out and ended up with : - as.character(match.call(sys.function(-1), call=sys.call(-1), expand.dots=FALSE)$...) + as.character(match.call(sys.function(-1L), call=sys.call(-1L), expand.dots=FALSE)$...) } diff --git a/R/groupingsets.R b/R/groupingsets.R index 038f45b1f..6c40d416e 100644 --- a/R/groupingsets.R +++ b/R/groupingsets.R @@ -10,7 +10,7 @@ rollup.data.table <- function(x, j, by, .SDcols, id = FALSE, ...) { if (!is.logical(id)) stop("Argument 'id' must be logical scalar.") # generate grouping sets for rollup - sets = lapply(length(by):0, function(i) by[0:i]) + sets = lapply(length(by):0L, function(i) by[0L:i]) # redirect to workhorse function jj = substitute(j) groupingsets.data.table(x, by=by, sets=sets, .SDcols=.SDcols, id=id, jj=jj) @@ -29,8 +29,8 @@ cube.data.table <- function(x, j, by, .SDcols, id = FALSE, ...) { stop("Argument 'id' must be logical scalar.") # generate grouping sets for cube - power set: http://stackoverflow.com/a/32187892/2490497 n = length(by) - keepBool = sapply(2L^(1:n - 1L), function(k) rep(c(FALSE, TRUE), each=k, times=(2L^n / (2L*k)))) - sets = lapply((2L^n):1, function(j) by[keepBool[j, ]]) + keepBool = sapply(2L^(seq_len(n) - 1L), function(k) rep(c(FALSE, TRUE), each=k, times=(2L^n / (2L*k)))) + sets = lapply((2L^n):1L, function(j) by[keepBool[j, ]]) # redirect to workhorse function jj = substitute(j) groupingsets.data.table(x, by=by, sets=sets, .SDcols=.SDcols, id=id, jj=jj) @@ -88,7 +88,7 @@ groupingsets.data.table <- function(x, j, by, sets, .SDcols, id = FALSE, jj, ... setcolorder(empty, c("grouping", by, setdiff(names(empty), c("grouping", by)))) } # workaround for rbindlist fill=TRUE on integer64 #1459 - int64.cols = vapply(empty, inherits, logical(1), "integer64") + int64.cols = vapply(empty, inherits, logical(1L), "integer64") int64.cols = names(int64.cols)[int64.cols] if (length(int64.cols) && !requireNamespace("bit64", quietly=TRUE)) stop("Using integer64 class columns require to have 'bit64' package installed.") diff --git a/R/merge.R b/R/merge.R index b36f74fcb..ceb87c75e 100644 --- a/R/merge.R +++ b/R/merge.R @@ -52,7 +52,7 @@ merge.data.table <- function(x, y, by = NULL, by.x = NULL, by.y = NULL, all = FA end[chmatch(dupnames, end, 0L)] = paste(dupnames, suffixes[2L], sep="") } - dt = y[x,nomatch = if (all.x) NA else 0,on=by,allow.cartesian=allow.cartesian] # includes JIS columns (with a i. prefix if conflict with x names) + dt = y[x,nomatch = if (all.x) NA else 0L,on=by,allow.cartesian=allow.cartesian] # includes JIS columns (with a i. prefix if conflict with x names) if (all.y && nrow(y)) { # If y does not have any rows, no need to proceed # Perhaps not very commonly used, so not a huge deal that the join is redone here. diff --git a/R/onAttach.R b/R/onAttach.R index e00b81105..d6c4bfe07 100644 --- a/R/onAttach.R +++ b/R/onAttach.R @@ -3,16 +3,16 @@ if (interactive()) { v = packageVersion("data.table") d = read.dcf(system.file("DESCRIPTION", package="data.table"), fields = c("Packaged", "Built")) - if (is.na(d[1])) { - if (is.na(d[2])) { + if (is.na(d[1L])) { + if (is.na(d[2L])) { return() #neither field exists } else { - d = unlist(strsplit(d[2], split="; "))[3] + d = unlist(strsplit(d[2L], split="; "))[3L] } } else { - d = d[1] + d = d[1L] } - dev = as.integer(v[1,3])%%2 == 1 # version number odd => dev + dev = as.integer(v[1L, 3L]) %% 2L == 1L # version number odd => dev packageStartupMessage("data.table ", v, if(dev) paste0(" IN DEVELOPMENT built ", d)) if (dev && (Sys.Date() - as.Date(d))>28) packageStartupMessage("**********\nThis development version of data.table was built more than 4 weeks ago. Please update.\n**********") diff --git a/R/onLoad.R b/R/onLoad.R index db48c9a80..ad9cbb976 100644 --- a/R/onLoad.R +++ b/R/onLoad.R @@ -7,9 +7,9 @@ ss = body(tt) if (class(ss)!="{") ss = as.call(c(as.name("{"), ss)) prefix = if (!missing(pkgname)) "data.table::" else "" # R provides the arguments when it calls .onLoad, I don't in dev/test - if (!length(grep("data.table",ss[[2]]))) { - ss = ss[c(1,NA,2:length(ss))] - ss[[2]] = parse(text=paste("if (!identical(class(..1),'data.frame')) for (x in list(...)) { if (inherits(x,'data.table')) return(",prefix,"data.table(...)) }",sep=""))[[1]] + if (!length(grep("data.table",ss[[2L]]))) { + ss = ss[c(1L, NA, 2L:length(ss))] + ss[[2L]] = parse(text=paste("if (!identical(class(..1),'data.frame')) for (x in list(...)) { if (inherits(x,'data.table')) return(",prefix,"data.table(...)) }",sep=""))[[1]] body(tt)=ss (unlockBinding)("cbind.data.frame",baseenv()) assign("cbind.data.frame",tt,envir=asNamespace("base"),inherits=FALSE) @@ -18,9 +18,9 @@ tt = base::rbind.data.frame ss = body(tt) if (class(ss)!="{") ss = as.call(c(as.name("{"), ss)) - if (!length(grep("data.table",ss[[2]]))) { - ss = ss[c(1,NA,2:length(ss))] - ss[[2]] = parse(text=paste("for (x in list(...)) { if (inherits(x,'data.table')) return(",prefix,".rbind.data.table(...)) }",sep=""))[[1]] # fix for #4995 + if (!length(grep("data.table",ss[[2L]]))) { + ss = ss[c(1L, NA, 2L:length(ss))] + ss[[2L]] = parse(text=paste("for (x in list(...)) { if (inherits(x,'data.table')) return(",prefix,".rbind.data.table(...)) }",sep=""))[[1L]] # fix for #4995 body(tt)=ss (unlockBinding)("rbind.data.frame",baseenv()) assign("rbind.data.frame",tt,envir=asNamespace("base"),inherits=FALSE) @@ -68,9 +68,9 @@ # Test R behaviour ... - x = 1:3 + x = 1L:3L y = list(x) - .R.listCopiesNamed <<- (address(x) != address(y[[1]])) # FALSE from R 3.1 + .R.listCopiesNamed <<- (address(x) != address(y[[1L]])) # FALSE from R 3.1 DF = data.frame(a=1:3, b=4:6) add1 = address(DF$a) @@ -84,7 +84,7 @@ DF = data.frame(a=1:3, b=4:6) add1 = address(DF$a) add2 = address(DF) - DF[2,"b"] = 7 # changed b but not a + DF[2L, "b"] = 7 # changed b but not a add3 = address(DF$a) add4 = address(DF) .R.subassignCopiesOthers <<- add1 != add3 # FALSE from R 3.1 diff --git a/R/print.data.table.R b/R/print.data.table.R index 129984fa3..fa4f102b8 100644 --- a/R/print.data.table.R +++ b/R/print.data.table.R @@ -22,7 +22,7 @@ print.data.table <- function(x, topn=getOption("datatable.print.topn"), # Other options investigated (could revisit): Cstack_info(), .Last.value gets set first before autoprint, history(), sys.status(), # topenv(), inspecting next statement in caller, using clock() at C level to timeout suppression after some number of cycles SYS <- sys.calls() - if (length(SYS) <= 2 || # "> DT" auto-print or "> print(DT)" explicit print (cannot distinguish from R 3.2.0 but that's ok) + if (length(SYS) <= 2L || # "> DT" auto-print or "> print(DT)" explicit print (cannot distinguish from R 3.2.0 but that's ok) ( length(SYS) > 3L && is.symbol(thisSYS <- SYS[[length(SYS)-3L]][[1L]]) && as.character(thisSYS) %chin% mimicsAutoPrint ) ) { return(invisible()) @@ -39,14 +39,14 @@ print.data.table <- function(x, topn=getOption("datatable.print.topn"), if (!is.null(ky <- key(x))) cat("Key: <", paste(ky, collapse=", "), ">\n", sep="") if (!is.null(ixs <- indices(x))) - cat("Ind", if (length(ixs) > 1) "ices" else "ex", ": <", + cat("Ind", if (length(ixs) > 1L) "ices" else "ex", ": <", paste(ixs, collapse=">, <"), ">\n", sep="") } if (nrow(x) == 0L) { if (length(x)==0L) cat("Null data.table (0 rows and 0 cols)\n") # See FAQ 2.5 and NEWS item in v1.8.9 else - cat("Empty data.table (0 rows) of ",length(x)," col",if(length(x)>1L)"s",": ",paste(head(names(x),6),collapse=","),if(ncol(x)>6)"...","\n",sep="") + cat("Empty data.table (0 rows) of ",length(x)," col",if(length(x)>1L)"s",": ",paste(head(names(x),6L),collapse=","),if(ncol(x)>6L)"...","\n",sep="") return(invisible()) } if (topn*2nrows || !topnmiss)) { diff --git a/R/setkey.R b/R/setkey.R index 6e9e51a80..e26af1d95 100644 --- a/R/setkey.R +++ b/R/setkey.R @@ -1,7 +1,7 @@ setkey <- function(x, ..., verbose=getOption("datatable.verbose"), physical=TRUE) { if (is.character(x)) stop("x may no longer be the character name of the data.table. The possibility was undocumented and has been removed.") - cols = as.character(substitute(list(...))[-1]) + cols = as.character(substitute(list(...))[-1L]) if (!length(cols)) cols=colnames(x) else if (identical(cols,"NULL")) cols=NULL setkeyv(x, cols, verbose=verbose, physical=physical) @@ -62,7 +62,7 @@ setkeyv <- function(x, cols, verbose=getOption("datatable.verbose"), physical=TR .xi = x[[i]] # [[ is copy on write, otherwise checking type would be copying each column if (!typeof(.xi) %chin% c("integer","logical","character","double")) stop("Column '",i,"' is type '",typeof(.xi),"' which is not supported as a key column type, currently.") } - if (!is.character(cols) || length(cols)<1) stop("'cols' should be character at this point in setkey") + if (!is.character(cols) || length(cols)<1L) stop("'cols' should be character at this point in setkey") if (verbose) { tt = system.time(o <- forderv(x, cols, sort=TRUE, retGrp=FALSE)) # system.time does a gc, so we don't want this always on, until refcnt is on by default in R cat("forder took", tt["user.self"]+tt["sys.self"], "sec\n") @@ -148,7 +148,7 @@ is.sorted <- function(x, by=seq_along(x)) { # could pass through a flag for forderv to return early on first FALSE. But we don't need that internally # since internally we always then need ordering, an it's better in one step. Don't want inefficiency to creep in. # This is only here for user/debugging use to check/test valid keys; e.g. data.table:::is.sorted(DT,by) - 0 == length(forderv(x,by,retGrp=FALSE,sort=TRUE)) + 0L == length(forderv(x,by,retGrp=FALSE,sort=TRUE)) } else { if (!missing(by)) stop("x is vector but 'by' is supplied") .Call(Cfsorted, x) @@ -174,8 +174,8 @@ forderv <- function(x, by=seq_along(x), retGrp=FALSE, sort=TRUE, order=1L, na.la if ( !missing(order) && (length(order) != 1L || !(order %in% c(1L, -1L))) ) stop("x is a single vector, length(order) must be =1 and it's value should be 1 (ascending) or -1 (descending).") } else { - if (!length(x)) return(integer(0)) # to be consistent with base::order. this'll make sure forderv(NULL) will result in error - # (as base does) but forderv(data.table(NULL)) and forderv(list()) will return integer(0)) + if (!length(x)) return(integer(0L)) # to be consistent with base::order. this'll make sure forderv(NULL) will result in error + # (as base does) but forderv(data.table(NULL)) and forderv(list()) will return integer(0L)) if (is.character(by)) { w = chmatch(by, names(x)) if (anyNA(w)) stop("'by' contains '",by[is.na(w)][1],"' which is not a column name") @@ -196,9 +196,9 @@ forderv <- function(x, by=seq_along(x), retGrp=FALSE, sort=TRUE, order=1L, na.la forder <- function(x, ..., na.last=TRUE, decreasing=FALSE) { if (!is.data.table(x)) stop("x must be a data.table.") - if (ncol(x) == 0) stop("Attempting to order a 0-column data.table.") + if (ncol(x) == 0L) stop("Attempting to order a 0-column data.table.") if (is.na(decreasing) || !is.logical(decreasing)) stop("'decreasing' must be logical TRUE or FALSE") - cols = substitute(list(...))[-1] + cols = substitute(list(...))[-1L] if (identical(as.character(cols),"NULL") || !length(cols)) return(NULL) # to provide the same output as base::order ans = x order = rep(1L, length(cols)) @@ -259,15 +259,15 @@ setorder <- function(x, ..., na.last=FALSE) # as opposed to DT[order(.)] where na.last=TRUE, to be consistent with base { if (!is.data.frame(x)) stop("x must be a data.frame or data.table.") - cols = substitute(list(...))[-1] + cols = substitute(list(...))[-1L] if (identical(as.character(cols),"NULL")) return(x) if (length(cols)) { cols=as.list(cols) order=rep(1L, length(cols)) for (i in seq_along(cols)) { v=as.list(cols[[i]]) - if (length(v) > 1 && v[[1L]] == "+") v=v[[-1L]] - else if (length(v) > 1 && v[[1L]] == "-") { + if (length(v) > 1L && v[[1L]] == "+") v=v[[-1L]] + else if (length(v) > 1L && v[[1L]] == "-") { v=v[[-1L]] order[i] = -1L } @@ -306,7 +306,7 @@ setorderv <- function(x, cols, order=1L, na.last=FALSE) .xi = x[[i]] # [[ is copy on write, otherwise checking type would be copying each column if (!typeof(.xi) %chin% c("integer","logical","character","double")) stop("Column '",i,"' is type '",typeof(.xi),"' which is not supported for ordering currently.") } - if (!is.character(cols) || length(cols)<1) stop("'cols' should be character at this point in setkey.") + if (!is.character(cols) || length(cols)<1L) stop("'cols' should be character at this point in setkey.") o = forderv(x, cols, sort=TRUE, retGrp=FALSE, order=order, na.last=na.last) if (length(o)) { @@ -341,10 +341,10 @@ CJ <- function(..., sorted = TRUE, unique = FALSE) # The last vector is varied the quickest in the table, so dates should be last for roll for example l = list(...) emptyList <- FALSE ## fix for #2511 - if(any(sapply(l, length) == 0)){ + if(any(sapply(l, length) == 0L)){ ## at least one column is empty The whole thing will be empty in the end emptyList <- TRUE - l <- lapply(l, "[", 0) + l <- lapply(l, "[", 0L) } if (unique && !emptyList) l = lapply(l, unique) diff --git a/R/setops.R b/R/setops.R index 39c03fda7..6b44b1625 100644 --- a/R/setops.R +++ b/R/setops.R @@ -121,7 +121,7 @@ all.equal.data.table <- function(target, current, trim.levels=TRUE, check.attrib stopifnot(is.logical(trim.levels), is.logical(check.attributes), is.logical(ignore.col.order), is.logical(ignore.row.order), is.numeric(tolerance)) if (!is.data.table(target) || !is.data.table(current)) stop("'target' and 'current' must be both data.tables") - msg = character(0) + msg = character(0L) # init checks that detect high level all.equal if (nrow(current) != nrow(target)) msg = "Different number of rows" if (ncol(current) != ncol(target)) msg = c(msg, "Different number of columns") @@ -139,7 +139,7 @@ all.equal.data.table <- function(target, current, trim.levels=TRUE, check.attrib targetModes = vapply_1c(target, mode) currentModes = vapply_1c(current, mode) if (any( d<-(targetModes!=currentModes) )) { - w = head(which(d),3) + w = head(which(d),3L) return(paste0("Datasets have different column modes. First 3: ",paste( paste(names(targetModes)[w],"(",paste(targetModes[w],currentModes[w],sep="!="),")",sep="") ,collapse=" "))) @@ -153,7 +153,7 @@ all.equal.data.table <- function(target, current, trim.levels=TRUE, check.attrib if (length(targetTypes) != length(currentTypes)) stop("Internal error: ncol(current)==ncol(target) was checked above") if (any( d<-(targetTypes != currentTypes))) { - w = head(which(d),3) + w = head(which(d),3L) return(paste0("Datasets have different column classes. First 3: ",paste( paste(names(targetTypes)[w],"(",paste(targetTypes[w],currentTypes[w],sep="!="),")",sep="") ,collapse=" "))) diff --git a/R/test.data.table.R b/R/test.data.table.R index f87fc815f..4714e351d 100644 --- a/R/test.data.table.R +++ b/R/test.data.table.R @@ -76,7 +76,7 @@ test <- function(num,x,y,error=NULL,warning=NULL,output=NULL) { nfail = get("nfail", parent.frame()) # to cater for both test.data.table() and stepping through tests in dev whichfail = get("whichfail", parent.frame()) all.equal.result = TRUE - assign("ntest", get("ntest", parent.frame()) + 1, parent.frame(), inherits=TRUE) # bump number of tests run + assign("ntest", get("ntest", parent.frame()) + 1L, parent.frame(), inherits=TRUE) # bump number of tests run assign("lastnum", num, parent.frame(), inherits=TRUE) cat("\rRunning test id", num, " ") @@ -129,7 +129,7 @@ test <- function(num,x,y,error=NULL,warning=NULL,output=NULL) { cat("Observed: no error or warning\n") else cat("Observed ",observedtype,": '",gsub("^[(]converted from warning[)] ","",gsub("\n$","",gsub("^Error.* : \n ","",as.character(err)))),"'\n",sep="") - assign("nfail", nfail+1, parent.frame(), inherits=TRUE) # Not the same as nfail <<- nfail + 1, it seems (when run via R CMD check) + assign("nfail", nfail+1L, parent.frame(), inherits=TRUE) # Not the same as nfail <<- nfail + 1, it seems (when run via R CMD check) assign("whichfail", c(whichfail, num), parent.frame(), inherits=TRUE) return() } @@ -139,7 +139,7 @@ test <- function(num,x,y,error=NULL,warning=NULL,output=NULL) { } if (inherits(err,"try-error") || (!missing(y) && inherits(err<-try(y,TRUE),"try-error"))) { cat("Test",num,err) - assign("nfail", nfail+1, parent.frame(), inherits=TRUE) + assign("nfail", nfail+1L, parent.frame(), inherits=TRUE) assign("whichfail", c(whichfail, num), parent.frame(), inherits=TRUE) return() } @@ -149,7 +149,7 @@ test <- function(num,x,y,error=NULL,warning=NULL,output=NULL) { cat("Test",num,"expected TRUE but observed:\n") cat(">",deparse(xsub),"\n") if (is.data.table(x)) compactprint(x) else print(x) - assign("nfail", nfail+1, parent.frame(), inherits=TRUE) + assign("nfail", nfail+1L, parent.frame(), inherits=TRUE) assign("whichfail", c(whichfail, num), parent.frame(), inherits=TRUE) return() } else { @@ -185,7 +185,7 @@ test <- function(num,x,y,error=NULL,warning=NULL,output=NULL) { cat("> y =",deparse(ysub),"\n") if (is.data.table(y)) compactprint(y) else {cat("First 6 of ", length(y)," (type '", typeof(y), "'): ", sep=""); print(head(y))} if (!isTRUE(all.equal.result)) cat(all.equal.result,sep="\n") - assign("nfail", nfail+1, parent.frame(), inherits=TRUE) + assign("nfail", nfail+1L, parent.frame(), inherits=TRUE) assign("whichfail", c(whichfail, num), parent.frame(), inherits=TRUE) invisible() } diff --git a/R/timetaken.R b/R/timetaken.R index 72db043df..4372824d4 100644 --- a/R/timetaken.R +++ b/R/timetaken.R @@ -5,7 +5,7 @@ timetaken <- function(started.at) secs <- as.double(difftime(Sys.time(), started.at, units="secs")) } else { # new faster method using started.at = proc.time() - secs = proc.time()[3] - started.at[3] + secs = proc.time()[3L] - started.at[3L] } mins <- secs %/% 60 hrs <- mins %/% 60 diff --git a/R/utils.R b/R/utils.R index 51271425d..b14579b17 100644 --- a/R/utils.R +++ b/R/utils.R @@ -28,7 +28,7 @@ trim <- function(x) { } # take (I don't see it being used anywhere) -take <- function(x, n=1) +take <- function(x, n=1L) { # returns the head of head, without the last n observations # convenient when inlining expressions From 6afb0eef753d703511572af251d5f5bf6ac75c4b Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Mon, 22 Jan 2018 17:50:28 +0800 Subject: [PATCH 02/11] add some tests, comments --- R/data.table.R | 38 +++++++++++++++++++------------------- inst/tests/tests.Rraw | 35 ++++++++++++++++++++++++++--------- 2 files changed, 45 insertions(+), 28 deletions(-) diff --git a/R/data.table.R b/R/data.table.R index b76b78e79..b0a4c4c94 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -125,20 +125,17 @@ data.table <-function(..., keep.rownames=FALSE, check.names=FALSE, key=NULL, str if (nrows[i]==0L) stop("Item ",i," has no length. Provide at least one item (such as NA, NA_integer_ etc) to be repeated to match the ",nr," rows in the longest column. Or, all columns can be 0 length, for insert()ing rows into.") # Implementing FR #4813 - recycle with warning when nr %% nrows[i] != 0L if (nr%%nrows[i] != 0L) warning("Item ", i, " is of size ", nrows[i], " but maximum size is ", nr, " (recycled leaving remainder of ", nr%%nrows[i], " items)") - # if (nr%%nrows[i] == 0L) { - if (is.data.frame(xi)) { # including data.table - ..i = rep(seq_len(nrow(xi)), length.out = nr) - x[[i]] = xi[..i,,drop=FALSE] - next - } - if (is.atomic(xi) || is.list(xi)) { - # TO DO: surely use set() here, or avoid the coercion - x[[i]] = rep(xi, length.out = nr) - next - } - stop("problem recycling column ",i,", try a simpler type") - # } - stop("argument ",i," (nrow ",nrows[i],") cannot be recycled without remainder to match longest nrow (",nr,")") + if (is.data.frame(xi)) { # including data.table + ..i = rep(seq_len(nrow(xi)), length.out = nr) + x[[i]] = xi[..i,,drop=FALSE] + next + } + if (is.atomic(xi) || is.list(xi)) { + # TO DO: surely use set() here, or avoid the coercion + x[[i]] = rep(xi, length.out = nr) + next + } + stop("problem recycling column ",i,", try a simpler type") } if (any(numcols>0L)) { value = vector("list",sum(pmax(numcols,1L))) @@ -1686,13 +1683,16 @@ chmatch2 <- function(x, table, nomatch=NA_integer_) { gfuns = c("sum", "prod", "mean", "median", "var", "sd", ".N", "min", "max", "head", "last", "first", "tail", "[") # added .N for #5760 .ok <- function(q) { if (dotN(q)) return(TRUE) # For #5760 - cond = is.call(q) && as.character(q[[1L]]) %chin% gfuns && !is.call(q[[2L]]) + cond = is.call(q) && as.character(q1 <- q[[1L]]) %chin% gfuns && !is.call(q[[2L]]) + # run GForce for simple f(x) calls and f(x, na.rm = TRUE)-like calls ans = cond && (length(q)==2L || identical("na",substring(names(q)[3L], 1L, 2L))) if (identical(ans, TRUE)) return(ans) - ans = cond && length(q) == 3L && - length(q[[3L]]) == 1L && is.numeric(q[[3L]]) && ( - as.character(q[[1L]]) %chin% c("head", "tail") && q[[3L]] == 1L || - as.character(q[[1L]]) %chin% "[" && q[[3L]] > 0 ) + # otherwise there must be three arguments, and only in two cases -- + # 1) head/tail(x, 1) or 2) x[n], n>0 + ans = cond && length(q)==3L && + length(q3 <- q[[3L]])==1L && is.numeric(q3) && ( + (as.character(q1) %chin% c("head", "tail") && q3==1L) || + (as.character(q1) %chin% "[" && q3 > 0) ) if (is.na(ans)) ans=FALSE ans } diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index ccc2a615f..b40e475c0 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -2868,6 +2868,7 @@ DT = data.table(a=1:3,b=4:6,b=7:9,c=10:12) test(1005, rbind(DT,DT), data.table(a=rep(1:3,2),b=rep(4:6,2),b=rep(7:9,2),c=rep(10:12,2))) M <- mtcars colnames(M)[11] <- NA +# NOTE -- this test requires having options('width') sufficiently wide test(1006, print(as.data.table(M), nrows=10), output="gear NA.*1: 21.0") # rbinding factor with non-factor/character @@ -4040,6 +4041,9 @@ test(1187.4, DT[, list(sum(y,na.rm=MyVar), mean(y,na.rm=MyVar)), by=x, verbose=T DT[, list(sum(y,na.rm=FALSE), mean(y,na.rm=FALSE)), by=x]) test(1187.5, DT[, mean(y,na.rm=MyVar), by=x, verbose=TRUE], output="GForce optimized j to", DT[, mean(y,na.rm=FALSE), by=x]) +# GForce should not turn on when the .ok function isn't triggered +test(1187.6, DT[ , mean(y, trim = .2), by = x, verbose = TRUE], output = 'GForce FALSE') + # test from Zach Mayer @@ -7511,19 +7515,21 @@ test(1579.10, dt[, tail(.SD,1L), by=x], dt[, utils::tail(.SD,1L), by=x]) test(1579.11, dt[, tail(.SD,1L), by=x], dt[, utils::tail(.SD,1L), by=x]) test(1579.12, dt[, tail(.SD,1L), keyby=x], dt[, utils::tail(.SD,1L), keyby=x]) test(1579.13, dt[, tail(.SD,1L), keyby=x], dt[, utils::tail(.SD,1L), keyby=x]) +# GForce _doesn't_ work when n > 1 +test(1579.14, dt[ , tail(.SD, 2), by = x, verbose = TRUE], output = 'GForce FALSE') mysub <- function(x, n) x[n] -test(1579.14, dt[, .SD[2], by=x], dt[, mysub(.SD,2), by=x]) test(1579.15, dt[, .SD[2], by=x], dt[, mysub(.SD,2), by=x]) -test(1579.16, dt[, .SD[2], keyby=x], dt[, mysub(.SD,2), keyby=x]) +test(1579.16, dt[, .SD[2], by=x], dt[, mysub(.SD,2), by=x]) test(1579.17, dt[, .SD[2], keyby=x], dt[, mysub(.SD,2), keyby=x]) -test(1579.18, dt[, .SD[2L], by=x], dt[, mysub(.SD,2L), by=x]) +test(1579.18, dt[, .SD[2], keyby=x], dt[, mysub(.SD,2), keyby=x]) test(1579.19, dt[, .SD[2L], by=x], dt[, mysub(.SD,2L), by=x]) -test(1579.20, dt[, .SD[2L], keyby=x], dt[, mysub(.SD,2L), keyby=x]) +test(1579.20, dt[, .SD[2L], by=x], dt[, mysub(.SD,2L), by=x]) test(1579.21, dt[, .SD[2L], keyby=x], dt[, mysub(.SD,2L), keyby=x]) +test(1579.22, dt[, .SD[2L], keyby=x], dt[, mysub(.SD,2L), keyby=x]) ans = capture.output(dt[, .SD[2], by=x, verbose=TRUE]) -test(1579.22, any(grepl("GForce optimized", ans)), TRUE) +test(1579.23, any(grepl("GForce optimized", ans)), TRUE) options(datatable.optimize=optim) @@ -9256,17 +9262,20 @@ test(1672.1, DT[ , .(.I[1L], V2[1L]), by = V1], #make sure GForce operating test(1672.2, DT[ , .(.I[1L], V2[1L]), by = V1, verbose = TRUE], output = "GForce optimized j") +#make sure GForce not operating for inversion +test(1672.3, DT[ , .(.I[-1L], V2[1L]), by = V1, verbose = TRUE], + output = "GForce FALSE) #make sure works on .I by itself -test(1672.3, DT[ , .I[1L], by = V1], +test(1672.4, DT[ , .I[1L], by = V1], output = " V1 V11: 1 12: 2 2") #make sure GForce here as well -test(1672.4, DT[ , .I[1L], by = V1, verbose = TRUE], +test(1672.5, DT[ , .I[1L], by = V1, verbose = TRUE], output = "GForce optimized j") #make sure works with order -test(1672.5, DT[order(V1), .I[1L], by = V1], +test(1672.6, DT[order(V1), .I[1L], by = V1], output = " V1 V11: 1 12: 2 2") # should also work with subsetting -test(1672.6, DT[1:5, .(.I[1L], V2[1L]), by = V1], +test(1672.7, DT[1:5, .(.I[1L], V2[1L]), by = V1], output = " V1 V1 V21: 1 1 12: 2 2 2") #tests for #1528 @@ -11313,6 +11322,14 @@ DTout = data.table( ) test(1866.6, melt(DT, measure.vars = patterns("^x", "^y", cols=names(DT))), DTout) +# miscellaneous missing tests uncovered by CodeCov difference +# in the process of PR #2573 +## data.table cannot recycle complicated types +short_s4_col = getClass("MethodDefinition") +test(1867.1, data.table(a = 1:4, short_s4_col), error = 'problem recycling.*try a simpler type') +## i must be a data.table when on is specified +DT = data.table(a = 1:3) +test(1867.2, DT[c(TRUE, FALSE), on = 'coefficients'], error = "not a data.table, but 'on'") ########################## From 4c8162d97a4d50bcf388c918d11b99d2372a2d70 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Mon, 22 Jan 2018 18:55:13 +0800 Subject: [PATCH 03/11] simplify verbose timing calls (including some undiscovered typos) --- R/between.R | 8 +++---- R/bmerge.R | 4 ++-- R/data.table.R | 56 +++++++++++++++++++++---------------------- R/foverlaps.R | 8 +++---- R/timetaken.R | 4 ++-- inst/tests/tests.Rraw | 9 +++++-- 6 files changed, 47 insertions(+), 42 deletions(-) diff --git a/R/between.R b/R/between.R index 10d3398c1..22deff3bc 100644 --- a/R/between.R +++ b/R/between.R @@ -22,18 +22,18 @@ inrange <- function(x,lower,upper,incbounds=TRUE) { subject = setDT(list(l=lower, u=upper)) ops = if (incbounds) c(4L, 2L) else c(5L, 3L) # >=,<= and >,< verbose = getOption("datatable.verbose") - if (verbose) {last.started.at=proc.time()[3L];cat("forderv(query) took ... ");flush.console()} + if (verbose) {last.started.at=proc.time();cat("forderv(query) took ... ");flush.console()} xo = forderv(query) - if (verbose) {cat(round(proc.time()[3L]-last.started.at, 3L),"secs\n");flush.console} + if (verbose) {cat(timetaken(last.started.at)); flush.console()} ans = bmerge(shallow(subject), query, 1L:2L, c(1L,1L), FALSE, xo, 0, c(FALSE, TRUE), 0L, "all", ops, integer(0L), 1L, verbose) # fix for #1819, turn on verbose messages options(datatable.verbose=FALSE) setDT(ans[c("starts", "lens")], key=c("starts", "lens")) options(datatable.verbose=verbose) - if (verbose) {last.started.at=proc.time()[3L];cat("Generating final logical vector ... ");flush.console()} + if (verbose) {last.started.at=proc.time();cat("Generating final logical vector ... ");flush.console()} .Call(Cinrange, idx <- vector("logical", length(x)), xo, ans[["starts"]], ans[["lens"]]) - if (verbose) {cat("done in",round(proc.time()[3L]-last.started.at,3L),"secs\n");flush.console} + if (verbose) {cat("done in", timetaken(last.started.at)); flush.console} idx } diff --git a/R/bmerge.R b/R/bmerge.R index 972d5713b..0c20e0475 100644 --- a/R/bmerge.R +++ b/R/bmerge.R @@ -89,10 +89,10 @@ bmerge <- function(i, x, leftcols, rightcols, io, xo, roll, rollends, nomatch, m set(i, j=lc, value=newval) } } - if (verbose) {last.started.at=proc.time()[3L];cat("Starting bmerge ...");flush.console()} + if (verbose) {last.started.at=proc.time();cat("Starting bmerge ...");flush.console()} ans = .Call(Cbmerge, i, x, as.integer(leftcols), as.integer(rightcols), io<-haskey(i), xo, roll, rollends, nomatch, mult, ops, nqgrp, nqmaxgrp) # NB: io<-haskey(i) necessary for test 579 where the := above change the factor to character and remove i's key - if (verbose) {cat("done in",round(proc.time()[3L]-last.started.at,3L),"secs\n");flush.console()} + if (verbose) {cat("done in", timetaken(last.started.at)); flush.console()} # in the caller's shallow copy, see comment at the top of this function for usage # We want to leave the coercions to i in place otherwise, since the caller depends on that to build the result diff --git a/R/data.table.R b/R/data.table.R index b0a4c4c94..865a2aa82 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -561,30 +561,30 @@ chmatch2 <- function(x, table, nomatch=NA_integer_) { # non-equi operators present.. investigate groups.. if (verbose) cat("Non-equi join operators detected ... \n") if (!missingroll) stop("roll is not implemented for non-equi joins yet.") - if (verbose) {last.started.at=proc.time()[3L];cat(" forder took ... ");flush.console()} + if (verbose) {last.started.at=proc.time();cat(" forder took ... ");flush.console()} # TODO: could check/reuse secondary indices, but we need 'starts' attribute as well! xo = forderv(x, rightcols, retGrp=TRUE) - if (verbose) {cat(round(proc.time()[3L]-last.started.at,3L),"secs\n");flush.console} + if (verbose) {cat(timetaken(last.started.at)); flush.console()} xg = attr(xo, 'starts') resetcols = head(rightcols, non_equi-1L) if (length(resetcols)) { # TODO: can we get around having to reorder twice here? # or at least reuse previous order? - if (verbose) {last.started.at=proc.time()[3L];cat(" Generating group lengths ... ");flush.console()} + if (verbose) {last.started.at=proc.time();cat(" Generating group lengths ... ");flush.console()} resetlen = attr(forderv(x, resetcols, retGrp=TRUE), 'starts') resetlen = .Call(Cuniqlengths, resetlen, nrow(x)) - if (verbose) {cat("done in",round(proc.time()[3L]-last.started.at,3L),"secs\n");flush.console} + if (verbose) {cat("done in", timetaken(last.started.at)); flush.console()} } else resetlen = integer(0L) - if (verbose) {last.started.at=proc.time()[3L];cat(" Generating non-equi group ids ... ");flush.console()} + if (verbose) {last.started.at=proc.time();cat(" Generating non-equi group ids ... ");flush.console()} nqgrp = .Call(Cnestedid, x, rightcols[non_equi:length(rightcols)], xo, xg, resetlen, mult) - if (verbose) {cat("done in", round(proc.time()[3L]-last.started.at,3L),"secs\n");flush.console} + if (verbose) {cat("done in", timetaken(last.started.at)); flush.console()} if (length(nqgrp)) nqmaxgrp = max(nqgrp) # fix for #1986, when 'x' is 0-row table max(.) returns -Inf. if (nqmaxgrp > 1L) { # got some non-equi join work to do if ("_nqgrp_" %in% names(x)) stop("Column name '_nqgrp_' is reserved for non-equi joins.") - if (verbose) {last.started.at=proc.time()[3L];cat(" Recomputing forder with non-equi ids ... ");flush.console()} + if (verbose) {last.started.at=proc.time();cat(" Recomputing forder with non-equi ids ... ");flush.console()} set(nqx<-shallow(x), j="_nqgrp_", value=nqgrp) xo = forderv(nqx, c(ncol(nqx), rightcols)) - if (verbose) {cat("done in",round(proc.time()[3L]-last.started.at,3L),"secs\n");flush.console} + if (verbose) {cat("done in", timetaken(last.started.at)); flush.console()} } else nqgrp = integer(0L) if (verbose) cat(" Found", nqmaxgrp, "non-equi group(s) ...\n") } @@ -600,9 +600,9 @@ chmatch2 <- function(x, table, nomatch=NA_integer_) { if (verbose && !is.null(xo)) cat("on= matches existing index, using index\n") } if (is.null(xo)) { - last.started.at=proc.time()[3L] + if (verbose) {last.started.at=proc.time(); flush.console()} xo = forderv(x, by = rightcols) - if (verbose) cat("Calculated ad hoc index in", round(proc.time()[3L]-last.started.at,3L), "secs\n") + if (verbose) {cat("Calculated ad hoc index in", timetaken(last.started.at)); flush.console()} # TODO: use setindex() instead, so it's cached for future reuse } } @@ -620,10 +620,10 @@ chmatch2 <- function(x, table, nomatch=NA_integer_) { # Implementation for not-join along with by=.EACHI, #604 if (notjoin && (byjoin || mult != "all")) { # mult != "all" needed for #1571 fix notjoin = FALSE - if (verbose) {last.started.at=proc.time()[3L];cat("not-join called with 'by=.EACHI'; Replacing !i with i=setdiff(x,i) ...");flush.console()} + if (verbose) {last.started.at=proc.time();cat("not-join called with 'by=.EACHI'; Replacing !i with i=setdiff(x,i) ...");flush.console()} orignames = copy(names(i)) i = setdiff_(x, i, rightcols, leftcols) # part of #547 - if (verbose) {cat("done in",round(proc.time()[3L]-last.started.at,3L),"secs\n");flush.console} + if (verbose) {cat("done in", timetaken(last.started.at)); flush.console()} setnames(i, orignames[leftcols]) setattr(i, 'sorted', names(i)) # since 'x' has key set, this'll always be sorted } @@ -1447,7 +1447,7 @@ chmatch2 <- function(x, table, nomatch=NA_integer_) { if (length(byval) && length(byval[[1L]])) { if (!bysameorder) { - if (verbose) {last.started.at=proc.time()[3L];cat("Finding groups using forderv ... ");flush.console()} + if (verbose) {last.started.at=proc.time();cat("Finding groups using forderv ... ");flush.console()} o__ = forderv(byval, sort=!missing(keyby), retGrp=TRUE) # The sort= argument is called sortStr at C level. It's just about saving the sort of unique strings at # C level for efficiency (cgroup vs csort) when by= not keyby=. All other types are always sorted. Getting @@ -1457,37 +1457,37 @@ chmatch2 <- function(x, table, nomatch=NA_integer_) { # forderv() returns empty integer() if already ordered to save allocating 1:xnrow bysameorder = orderedirows && !length(o__) if (verbose) { - cat(round(proc.time()[3L]-last.started.at, 3L), "sec\n") - last.started.at=proc.time()[3L] + cat(timetaken(last.started.at)) + last.started.at=proc.time() cat("Finding group sizes from the positions (can be avoided to save RAM) ... ") flush.console() # for windows } f__ = attr(o__, "starts") len__ = uniqlengths(f__, xnrow) - if (verbose) { cat(round(proc.time()[3L]-last.started.at, 3L), "sec\n");flush.console()} + if (verbose) {cat(timetaken(last.started.at)); flush.console()} if (!bysameorder && missing(keyby)) { # TO DO: lower this into forder.c - if (verbose) {last.started.at=proc.time()[3L];cat("Getting back original order ... ");flush.console()} + if (verbose) {last.started.at=proc.time();cat("Getting back original order ... ");flush.console()} firstofeachgroup = o__[f__] if (length(origorder <- forderv(firstofeachgroup))) { f__ = f__[origorder] len__ = len__[origorder] } - if (verbose) {cat(round(proc.time()[3L]-last.started.at, 3L), "sec\n")} + if (verbose) {cat(timetaken(last.started.at)); flush.console()} } if (!orderedirows && !length(o__)) o__ = seq_len(xnrow) # temp fix. TODO: revist orderedirows } else { - if (verbose) {last.started.at=proc.time()[3L];cat("Finding groups using uniqlist ... ");flush.console()} + if (verbose) {last.started.at=proc.time();cat("Finding groups using uniqlist ... ");flush.console()} f__ = uniqlist(byval) if (verbose) { - cat(round(proc.time()[3L]-last.started.at, 3L), "sec\n") - last.started.at=proc.time()[3L] + cat(timetaken(last.started.at)) + last.started.at=proc.time() cat("Finding group sizes from the positions (can be avoided to save RAM) ... ") flush.console() # for windows } len__ = uniqlengths(f__, xnrow) # TO DO: combine uniqlist and uniquelengths into one call. Or, just set len__ to NULL when dogroups infers that. - if (verbose) { cat(round(proc.time()[3L]-last.started.at, 3L), "sec\n");flush.console() } + if (verbose) { cat(timetaken(last.started.at)); flush.console() } } } else { f__=NULL @@ -1777,7 +1777,7 @@ chmatch2 <- function(x, table, nomatch=NA_integer_) { # for consistency of empty case in test 184 f__=len__=0L } - if (verbose) {last.started.at=proc.time()[3L];cat("Making each group and running j (GForce ",GForce,") ... ",sep="");flush.console()} + if (verbose) {last.started.at=proc.time();cat("Making each group and running j (GForce ",GForce,") ... ",sep="");flush.console()} if (GForce) { thisEnv = new.env() # not parent=parent.frame() so that gsum is found for (ii in ansvars) assign(ii, x[[ii]], thisEnv) @@ -1791,7 +1791,7 @@ chmatch2 <- function(x, table, nomatch=NA_integer_) { } else { ans = .Call(Cdogroups, x, xcols, groups, grpcols, jiscols, xjiscols, grporder, o__, f__, len__, jsub, SDenv, cols, newnames, !missing(on), verbose) } - if (verbose) {cat(round(proc.time()[3L]-last.started.at,3L),"secs\n");flush.console()} + if (verbose) {cat(timetaken(last.started.at)); flush.console()} # TO DO: xrows would be a better name for irows: irows means the rows of x that i joins to # Grouping by i: icols the joins columns (might not need), isdcols (the non join i and used by j), all __ are length x # Grouping by by: i is by val, icols NULL, o__ may be subset of x, f__ points to o__ (or x if !length o__) @@ -1811,9 +1811,9 @@ chmatch2 <- function(x, table, nomatch=NA_integer_) { if (!missing(keyby)) { cnames = as.character(bysubl)[-1L] if (all(cnames %chin% names(x))) { - if (verbose) {last.started.at=proc.time()[3L];cat("setkey() after the := with keyby= ... ");flush.console()} + if (verbose) {last.started.at=proc.time();cat("setkey() after the := with keyby= ... ");flush.console()} setkeyv(x,cnames) # TO DO: setkey before grouping to get memcpy benefit. - if (verbose) {cat(round(proc.time()[3L]-last.started.at,3L),"secs\n");flush.console()} + if (verbose) {cat(timetaken(last.started.at)); flush.console()} } else warning(":= keyby not straightforward character column names or list() of column names, treating as a by:",paste(cnames,collapse=","),"\n") } @@ -1838,9 +1838,9 @@ chmatch2 <- function(x, table, nomatch=NA_integer_) { setnames(ans,seq_along(bynames),bynames) # TO DO: reinvestigate bynames flowing from dogroups here and simplify } if (byjoin && !missing(keyby) && !bysameorder) { - if (verbose) {last.started.at=proc.time()[3L];cat("setkey() afterwards for keyby=.EACHI ... ");flush.console()} + if (verbose) {last.started.at=proc.time();cat("setkey() afterwards for keyby=.EACHI ... ");flush.console()} setkeyv(ans,names(ans)[seq_along(byval)]) - if (verbose) {cat(round(proc.time()[3L]-last.started.at,3L),"secs\n");flush.console()} + if (verbose) {cat(timetaken(last.started.at)); flush.console()} } else if (!missing(keyby) || (haskey(x) && bysameorder)) { setattr(ans,"sorted",names(ans)[seq_along(grpcols)]) } diff --git a/R/foverlaps.R b/R/foverlaps.R index 5e60aa791..c9f46f767 100644 --- a/R/foverlaps.R +++ b/R/foverlaps.R @@ -104,10 +104,10 @@ foverlaps <- function(x, y, by.x = if (!is.null(key(x))) key(x) else key(y), by. end = yintervals[2L], any =, within =, equal = yintervals) call = construct(head(ynames, -2L), uycols, type) - if (verbose) {last.started.at=proc.time()[3L];cat("unique() + setkey() operations done in ...");flush.console()} + if (verbose) {last.started.at=proc.time();cat("unique() + setkey() operations done in ...");flush.console()} uy = unique(y[, eval(call)]) setkey(uy)[, `:=`(lookup = list(list(integer(0L))), type_lookup = list(list(integer(0L))), count=0L, type_count=0L)] - if (verbose) {cat(round(proc.time()[3L]-last.started.at,3L),"secs\n");flush.console} + if (verbose) {cat(timetaken(last.started.at)); flush.console()} matches <- function(ii, xx, del, ...) { cols = setdiff(names(xx), del) xx = .shallow(xx, cols, retain.key = FALSE) @@ -130,9 +130,9 @@ foverlaps <- function(x, y, by.x = if (!is.null(key(x))) key(x) else key(y), by. .Call(Clookup, uy, nrow(y), indices(uy, y, yintervals, nomatch=0L, roll=roll), maxgap, minoverlap, mult, type, verbose) if (maxgap == 0L && minoverlap == 1L) { iintervals = tail(names(x), 2L) - if (verbose) {last.started.at=proc.time()[3L];cat("binary search(es) done in ...");flush.console()} + if (verbose) {last.started.at=proc.time();cat("binary search(es) done in ...");flush.console()} xmatches = indices(uy, x, xintervals, nomatch=0L, roll=roll) - if (verbose) {cat(round(proc.time()[3L]-last.started.at,3L),"secs\n");flush.console} + if (verbose) {cat(timetaken(last.started.at));flush.console} olaps = .Call(Coverlaps, uy, xmatches, mult, type, nomatch, verbose) } else if (maxgap == 0L && minoverlap > 1L) { stop("Not yet implemented") diff --git a/R/timetaken.R b/R/timetaken.R index 4372824d4..d5e3c0435 100644 --- a/R/timetaken.R +++ b/R/timetaken.R @@ -14,8 +14,8 @@ timetaken <- function(started.at) hrs = hrs - 24*days if (secs >= 60) { if (days >= 1) res = sprintf("%d days ", as.integer(days)) else res="" - paste(res,sprintf("%02.0f:%02.0f:%02.0f", hrs, mins, secs %% 60),sep="") + paste(res,sprintf("%02.0f:%02.0f:%02.0f\n", hrs, mins, secs %% 60),sep="") } else { - sprintf(if (secs>=10) "%.1fsec" else "%.3fsec", secs) + sprintf(if (secs>=10) "%.1fsec\n" else "%.3fsec\n", secs) } } diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index b40e475c0..312c681b6 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -4042,7 +4042,12 @@ test(1187.4, DT[, list(sum(y,na.rm=MyVar), mean(y,na.rm=MyVar)), by=x, verbose=T test(1187.5, DT[, mean(y,na.rm=MyVar), by=x, verbose=TRUE], output="GForce optimized j to", DT[, mean(y,na.rm=FALSE), by=x]) # GForce should not turn on when the .ok function isn't triggered -test(1187.6, DT[ , mean(y, trim = .2), by = x, verbose = TRUE], output = 'GForce FALSE') +test(1187.6, DT[ , mean(y, trim = .2), + by = x, verbose = TRUE], + data.table(x = c("a", "b", "c", "d"), + V1 = c(NA, 3.33333333333333, NA, NA)), + output = 'j unchanged', + warning = "'trim' is not yet optimized") @@ -9264,7 +9269,7 @@ test(1672.2, DT[ , .(.I[1L], V2[1L]), by = V1, verbose = TRUE], output = "GForce optimized j") #make sure GForce not operating for inversion test(1672.3, DT[ , .(.I[-1L], V2[1L]), by = V1, verbose = TRUE], - output = "GForce FALSE) + output = "GForce FALSE") #make sure works on .I by itself test(1672.4, DT[ , .I[1L], by = V1], output = " V1 V11: 1 12: 2 2") From 7c73e5b5f8650d602c04368089ac61919258b9e2 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Tue, 23 Jan 2018 12:40:47 +0800 Subject: [PATCH 04/11] more tests --- R/test.data.table.R | 17 ++++++++++------- inst/tests/tests.Rraw | 35 +++++++++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+), 7 deletions(-) diff --git a/R/test.data.table.R b/R/test.data.table.R index 4714e351d..1cb3b1ca0 100644 --- a/R/test.data.table.R +++ b/R/test.data.table.R @@ -3,8 +3,8 @@ test.data.table <- function(verbose=FALSE, pkg="pkg", silent=FALSE) { if (exists("test.data.table",.GlobalEnv,inherits=FALSE)) { # package developer if ("package:data.table" %in% search()) stop("data.table package is loaded. Unload or start a fresh R session.") - d = if (pkg %in% dir()) paste0(getwd(),"/",pkg) else Sys.getenv("CC_DIR") - d = paste0(d, "/inst/tests") + d = if (pkg %in% dir()) file.path(getwd(), pkg) else Sys.getenv("CC_DIR") + d = file.path(d, "inst/tests") } else { # R CMD check and user running test.data.table() d = paste0(getNamespaceInfo("data.table","path"),"/tests") @@ -40,13 +40,16 @@ test.data.table <- function(verbose=FALSE, pkg="pkg", silent=FALSE) { # whichfail = NULL # .devtesting = TRUE -compactprint <- function(DT, topn=2) { +# essentially toString.default +makeString = function (x) paste(x, collapse = ",") + +compactprint <- function(DT, topn=2L) { tt = vapply_1c(DT,function(x)class(x)[1L]) tt[tt=="integer64"] = "i64" - cn = paste(" [Key=",paste(key(DT),collapse=","), - " Types=",paste(substring(sapply(DT,typeof),1,3),collapse=","), - " Classes=",paste(substring(tt,1,3),collapse=","), - "]",sep="") + tt = substring(tt, 1L, 3L) + cn = paste0(" [Key=",makeString(key(DT)), + " Types=", makeString(substring(sapply(DT, typeof), 1L, 3L)), + " Classes=", makeString(tt), "]") if (nrow(DT)) { print(copy(DT)[,(cn):=""], topn=topn) } else { diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 0e3dd3e01..3e3431656 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -11376,6 +11376,41 @@ test(1869.1, data.table(a = 1:4, short_s4_col), error = 'problem recycling.*try ## i must be a data.table when on is specified DT = data.table(a = 1:3) test(1869.2, DT[c(TRUE, FALSE), on = 'coefficients'], error = "not a data.table, but 'on'") +## missing tests for round.IDate +test_dates = c( + "2017-01-05", "2017-08-04", "2017-06-05", "2017-04-15", + "2017-06-11", "2017-10-04", "2017-04-19", "2017-01-11", + "2017-03-08", "2017-10-10" +) +test_dates = as.IDate(test_dates) +test(1869.3, round(test_dates, 'weeks'), + structure(c(17167L, 17377L, 17321L, 17272L, 17328L, + 17440L, 17272L, 17174L, 17230L, 17447L), + class = c("IDate", "Date"))) +test(1869.4, round(test_dates, 'months'), + structure(c(17167L, 17379L, 17318L, 17257L, 17318L, + 17440L, 17257L, 17167L, 17226L, 17440L), + class = c("IDate", "Date"))) +test(1869.5, round(test_dates, 'quarters'), + structure(c(17167L, 17348L, 17257L, 17257L, 17257L, + 17440L, 17257L, 17167L, 17167L, 17440L), + class = c("IDate", "Date"))) +test(1869.6, round(test_dates, 'years'), + structure(c(17167L, 17167L, 17167L, 17167L, 17167L, + 17167L, 17167L, 17167L, 17167L, 17167L), + class = c("IDate", "Date"))) +test(1869.7, round(test_dates, 'centuries'), + error = 'should be one of') +## missing a test of mday +test(1869.8, mday(test_dates), + c(5L, 4L, 5L, 15L, 11L, 4L, 19L, 11L, 8L, 10L)) +## META TEST of helper function compactprint from test.data.table +DT = data.table(a = 1, b = 2, key = 'a') +DT_out = gsub('\\s+$', '', capture.output(compactprint(DT))) +test(1869.9, DT_out, + c(" a b [Key=a Types=dou,dou Classes=num,num]", + "1: 1 2")) + ########################## From 689e7374f58acf7b4ba9c3f3f067fe6d1111bf13 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Mon, 29 Jan 2018 14:49:44 +0800 Subject: [PATCH 05/11] more tests (foverlaps, verbosity) --- R/foverlaps.R | 24 ++++++++---------------- inst/tests/tests.Rraw | 15 +++++++++++++++ 2 files changed, 23 insertions(+), 16 deletions(-) diff --git a/R/foverlaps.R b/R/foverlaps.R index c9f46f767..ff6737c8f 100644 --- a/R/foverlaps.R +++ b/R/foverlaps.R @@ -15,7 +15,7 @@ foverlaps <- function(x, y, by.x = if (!is.null(key(x))) key(x) else key(y), by. mult = match.arg(mult) if (type == "equal") stop("type = 'equal' is not implemented yet. But note that this is just the same as a normal data.table join y[x, ...], unless you are also interested in setting 'minoverlap / maxgap' arguments. But those arguments are not implemented yet as well.") - if (maxgap > 0L || minoverlap > 1L) + if (maxgap != 0L || minoverlap != 1L) stop("maxgap and minoverlap arguments are not yet implemented.") if (is.null(by.y)) stop("'y' must be keyed (i.e., sorted, and, marked as sorted). Call setkey(y, ...) first, see ?setkey. Also check the examples in ?foverlaps.") @@ -59,6 +59,7 @@ foverlaps <- function(x, y, by.x = if (!is.null(key(x))) key(x) else key(y), by. stop("The last two columns in by.y should correspond to the 'start' and 'end' intervals in data.table 'y' and must be integer/numeric type.") if ( any(y[[yintervals[2L]]] - y[[yintervals[1L]]] < 0L) ) stop("All entries in column ", yintervals[1L], " should be <= corresponding entries in column ", yintervals[2L], " in data.table 'y'") + ## see NOTES below: yclass = c(class(y[[yintervals[1L]]]), class(y[[yintervals[2L]]])) isdouble = FALSE; isposix = FALSE @@ -128,24 +129,15 @@ foverlaps <- function(x, y, by.x = if (!is.null(key(x))) key(x) else key(y), by. } # nomatch has no effect here, just for passing arguments consistently to `bmerge` .Call(Clookup, uy, nrow(y), indices(uy, y, yintervals, nomatch=0L, roll=roll), maxgap, minoverlap, mult, type, verbose) - if (maxgap == 0L && minoverlap == 1L) { - iintervals = tail(names(x), 2L) - if (verbose) {last.started.at=proc.time();cat("binary search(es) done in ...");flush.console()} - xmatches = indices(uy, x, xintervals, nomatch=0L, roll=roll) - if (verbose) {cat(timetaken(last.started.at));flush.console} - olaps = .Call(Coverlaps, uy, xmatches, mult, type, nomatch, verbose) - } else if (maxgap == 0L && minoverlap > 1L) { - stop("Not yet implemented") - } else if (maxgap > 0L && minoverlap == 1L) { - stop("Not yet implemented") - } else if (maxgap > 0L && minoverlap > 1L) { - if (maxgap > minoverlap) - warning("maxgap > minoverlap. maxgap will have no effect here.") - stop("Not yet implemented") - } + if (verbose) {last.started.at=proc.time();cat("binary search(es) done in ...");flush.console()} + xmatches = indices(uy, x, xintervals, nomatch=0L, roll=roll) + if (verbose) {cat(timetaken(last.started.at));flush.console} + olaps = .Call(Coverlaps, uy, xmatches, mult, type, nomatch, verbose) + setDT(olaps) setnames(olaps, c("xid", "yid")) yid = NULL # for 'no visible binding for global variable' from R CMD check on i clauses below + # if (type == "any") setorder(olaps) # at times the combine operation may not result in sorted order # CsubsetDT bug has been fixed by Matt. So back to using it! Should improve subset substantially. if (which) { diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index e1fe5dcd0..0e862d38e 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -11417,6 +11417,21 @@ DT_out = gsub('\\s+$', '', capture.output(compactprint(DT))) test(1870.9, DT_out, c(" a b [Key=a Types=dou,dou Classes=num,num]", "1: 1 2")) +## Test as-yet unimplemented features of foverlaps +x = data.table(start=c(5,31,22,16), end=c(8,50,25,18), val2 = 7:10) +y = data.table(start=c(10, 20, 30), end=c(15, 35, 45), val1 = 1:3) +setkey(y, start, end) +test(1870.10, foverlaps(x, y, maxgap = 2), error = 'maxgap and minoverlap.*not yet') +test(1870.11, foverlaps(x, y, minoverlap = 2), error = 'maxgap and minoverlap.*not yet') +## tests of verbose output +### foverlaps +test(1870.12, foverlaps(x, y, verbose = TRUE), + output = 'unique.*setkey.*operations.*binary search') +### [.data.table +X = data.table(x=c("c","b"), v=8:7, foo=c(4,2)) +DT = data.table(x=rep(c("b","a","c"),each=3), y=c(1,3,6), v=1:9) +test(1870.13, DT[X, on=.(x, v>=v), verbose = TRUE], + output = 'Non-equi join operators.*forder took.*group lengths.*done.*non-equi group ids.*done') ########################## From f199e124ff04e009404cfaf2e03b569ef7ba55f3 Mon Sep 17 00:00:00 2001 From: Matt Dowle Date: Tue, 30 Jan 2018 16:13:31 -0800 Subject: [PATCH 06/11] Create SDenv$.GRP and .N with explicit new vector() in anticipation of internal-to-R global small-integers --- R/data.table.R | 4 ++-- inst/tests/tests.Rraw | 11 +++-------- 2 files changed, 5 insertions(+), 10 deletions(-) diff --git a/R/data.table.R b/R/data.table.R index 92e4ee374..5ea6df347 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -1415,8 +1415,8 @@ chmatch2 <- function(x, table, nomatch=NA_integer_) { # Now ggplot2 returns data from print, we need a way to throw it away otherwise j accumulates the result SDenv$.SDall = SDenv$.SD = null.data.table() # e.g. test 607. Grouping still proceeds even though no .SD e.g. grouping key only tables, or where j consists of .N only - SDenv$.N = as.integer(0) # not 0L for the reason on next line : - SDenv$.GRP = as.integer(1) # oddly using 1L doesn't work reliably here! Possible R bug? TO DO: create reproducible example and report. To reproduce change to 1L and run test.data.table, test 780 fails. The assign seems ineffective and a previous value for .GRP from a previous test is retained, despite just creating a new SDenv. + SDenv$.N = vector("integer", 1L) # explicit new vector (not 0L or as.integer() which might return R's internal small-integer global) + SDenv$.GRP = vector("integer", 1L) # because written to by reference at C level (one write per group). TODO: move this alloc to C level if (byjoin) { # The groupings come instead from each row of the i data.table. diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 499fa3992..bc2779a87 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -4040,14 +4040,9 @@ test(1187.4, DT[, list(sum(y,na.rm=MyVar), mean(y,na.rm=MyVar)), by=x, verbose=T test(1187.5, DT[, mean(y,na.rm=MyVar), by=x, verbose=TRUE], output="GForce optimized j to", DT[, mean(y,na.rm=FALSE), by=x]) # GForce should not turn on when the .ok function isn't triggered -test(1187.6, DT[ , mean(y, trim = .2), - by = x, verbose = TRUE], - data.table(x = c("a", "b", "c", "d"), - V1 = c(NA, 3.33333333333333, NA, NA)), - output = 'j unchanged', - warning = "'trim' is not yet optimized") - - +test(1187.6, DT[, mean(y, trim=.2), by=x, verbose=TRUE], + data.table(x = c("a", "b", "c", "d"), V1 = c(NA, 3.33333333333333, NA, NA)), + output='j unchanged', warning="'trim' is not yet optimized") # test from Zach Mayer a <- c("\"\"\"\") \" \" \" \" \"", "\"\"\"\") \" \" \" \" \"", "\"\"\"\") \" \" \" \" ,\"") From b5c482bfbce65c2da8d2379ce94bce282bf9e4b2 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Thu, 1 Feb 2018 14:53:57 +0800 Subject: [PATCH 07/11] add some nocov, reinstate some code in comments --- R/foverlaps.R | 17 +++++++++++++++++ R/onAttach.R | 4 +++- R/onLoad.R | 4 ++++ 3 files changed, 24 insertions(+), 1 deletion(-) diff --git a/R/foverlaps.R b/R/foverlaps.R index ff6737c8f..0b2874f2e 100644 --- a/R/foverlaps.R +++ b/R/foverlaps.R @@ -15,6 +15,7 @@ foverlaps <- function(x, y, by.x = if (!is.null(key(x))) key(x) else key(y), by. mult = match.arg(mult) if (type == "equal") stop("type = 'equal' is not implemented yet. But note that this is just the same as a normal data.table join y[x, ...], unless you are also interested in setting 'minoverlap / maxgap' arguments. But those arguments are not implemented yet as well.") + # if (maxgap > 0L || minoverlap > 1L) # for future implementation if (maxgap != 0L || minoverlap != 1L) stop("maxgap and minoverlap arguments are not yet implemented.") if (is.null(by.y)) @@ -129,6 +130,22 @@ foverlaps <- function(x, y, by.x = if (!is.null(key(x))) key(x) else key(y), by. } # nomatch has no effect here, just for passing arguments consistently to `bmerge` .Call(Clookup, uy, nrow(y), indices(uy, y, yintervals, nomatch=0L, roll=roll), maxgap, minoverlap, mult, type, verbose) + # placeholder for future improvement + # if (maxgap == 0L && minoverlap == 1L) { + # iintervals = tail(names(x), 2L) + # if (verbose) {last.started.at=proc.time();cat("binary search(es) done in ...");flush.console()} + # xmatches = indices(uy, x, xintervals, nomatch=0L, roll=roll) + # if (verbose) {cat(timetaken(last.started.at));flush.console()} + # olaps = .Call(Coverlaps, uy, xmatches, mult, type, nomatch, verbose) + # } else if (maxgap == 0L && minoverlap > 1L) { + # stop("Not yet implemented") + # } else if (maxgap > 0L && minoverlap == 1L) { + # stop("Not yet implemented") + # } else if (maxgap > 0L && minoverlap > 1L) { + # if (maxgap > minoverlap) + # warning("maxgap > minoverlap. maxgap will have no effect here.") + # stop("Not yet implemented") + # } if (verbose) {last.started.at=proc.time();cat("binary search(es) done in ...");flush.console()} xmatches = indices(uy, x, xintervals, nomatch=0L, roll=roll) if (verbose) {cat(timetaken(last.started.at));flush.console} diff --git a/R/onAttach.R b/R/onAttach.R index d6c4bfe07..eb73aaff4 100644 --- a/R/onAttach.R +++ b/R/onAttach.R @@ -1,3 +1,5 @@ +# nocov start + .onAttach <- function(libname, pkgname) { # Runs when attached to search() path such as by library() or require() if (interactive()) { @@ -24,4 +26,4 @@ } } - +# nocov end diff --git a/R/onLoad.R b/R/onLoad.R index ad9cbb976..a4cd5dd4f 100644 --- a/R/onLoad.R +++ b/R/onLoad.R @@ -1,3 +1,5 @@ +# nocov start + .onLoad <- function(libname, pkgname) { # Runs when loaded but not attached to search() path; e.g., when a package just Imports (not Depends on) data.table @@ -110,3 +112,5 @@ getRversion <- function(...) stop("Reminder to data.table developers: don't use # So 'detach' doesn't find datatable.so, as it looks by default for data.table.so library.dynam.unload("datatable", libpath) } + +# nocov end From f1976f3fd0159d52c52cb82ed1734954421609e8 Mon Sep 17 00:00:00 2001 From: Matt Dowle Date: Mon, 5 Feb 2018 12:54:35 -0800 Subject: [PATCH 08/11] Removed nocov start/end from test.data.table.R to see why those might have been added --- R/test.data.table.R | 5 ----- 1 file changed, 5 deletions(-) diff --git a/R/test.data.table.R b/R/test.data.table.R index a872006e5..9bb1f65dd 100644 --- a/R/test.data.table.R +++ b/R/test.data.table.R @@ -1,4 +1,3 @@ -# nocov start test.data.table <- function(verbose=FALSE, pkg="pkg", silent=FALSE) { if (exists("test.data.table",.GlobalEnv,inherits=FALSE)) { # package developer @@ -43,8 +42,6 @@ test.data.table <- function(verbose=FALSE, pkg="pkg", silent=FALSE) { # essentially toString.default makeString = function (x) paste(x, collapse = ",") -# nocov end - compactprint <- function(DT, topn=2L) { tt = vapply_1c(DT,function(x)class(x)[1L]) tt[tt=="integer64"] = "i64" @@ -61,7 +58,6 @@ compactprint <- function(DT, topn=2L) { invisible() } -# nocov start INT = function(...) { as.integer(c(...)) } # utility used in tests.Rraw test <- function(num,x,y,error=NULL,warning=NULL,output=NULL) { @@ -196,4 +192,3 @@ test <- function(num,x,y,error=NULL,warning=NULL,output=NULL) { invisible() } -# nocov end From 6b793a11913ce1499b55b697ed55c1804a39e6fa Mon Sep 17 00:00:00 2001 From: Matt Dowle Date: Mon, 5 Feb 2018 14:54:33 -0800 Subject: [PATCH 09/11] Focussed nocov in test.data.table. Removed (unused) factor comparison leniency. --- R/test.data.table.R | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/R/test.data.table.R b/R/test.data.table.R index 9bb1f65dd..100643f29 100644 --- a/R/test.data.table.R +++ b/R/test.data.table.R @@ -1,9 +1,11 @@ test.data.table <- function(verbose=FALSE, pkg="pkg", silent=FALSE) { if (exists("test.data.table",.GlobalEnv,inherits=FALSE)) { # package developer + # nocov start if ("package:data.table" %in% search()) stop("data.table package is loaded. Unload or start a fresh R session.") d = if (pkg %in% dir()) file.path(getwd(), pkg) else Sys.getenv("CC_DIR") d = file.path(d, "inst/tests") + # nocov end } else { # R CMD check and user running test.data.table() d = paste0(getNamespaceInfo("data.table","path"),"/tests") @@ -21,7 +23,7 @@ test.data.table <- function(verbose=FALSE, pkg="pkg", silent=FALSE) { envirs[[fn]] = new.env(parent=.GlobalEnv) assign("testDir", function(x)file.path(d,x), envir=envirs[[fn]]) if(isTRUE(silent)){ - try(sys.source(fn,envir=envirs[[fn]]), silent=silent) + try(sys.source(fn,envir=envirs[[fn]]), silent=silent) # nocov } else { sys.source(fn,envir=envirs[[fn]]) } @@ -39,13 +41,12 @@ test.data.table <- function(verbose=FALSE, pkg="pkg", silent=FALSE) { # whichfail = NULL # .devtesting = TRUE -# essentially toString.default -makeString = function (x) paste(x, collapse = ",") - +# nocov start compactprint <- function(DT, topn=2L) { tt = vapply_1c(DT,function(x)class(x)[1L]) tt[tt=="integer64"] = "i64" tt = substring(tt, 1L, 3L) + makeString = function(x) paste(x, collapse = ",") # essentially toString.default cn = paste0(" [Key=",makeString(key(DT)), " Types=", makeString(substring(sapply(DT, typeof), 1L, 3L)), " Classes=", makeString(tt), "]") @@ -57,6 +58,7 @@ compactprint <- function(DT, topn=2L) { } invisible() } +# nocov end INT = function(...) { as.integer(c(...)) } # utility used in tests.Rraw @@ -104,6 +106,7 @@ test <- function(num,x,y,error=NULL,warning=NULL,output=NULL) { output = gsub("+","\\+",output,fixed=TRUE) # e.g numbers like 9.9e+10 should match the + literally output = gsub("\n","",output,fixed=TRUE) # e.g numbers like 9.9e+10 should match the + literally if (!length(grep(output,out))) { + # nocov start cat("Test",num,"didn't produce correct output:\n") cat(">",deparse(xsub),"\n") cat("Expected: '",output,"'\n",sep="") @@ -111,6 +114,7 @@ test <- function(num,x,y,error=NULL,warning=NULL,output=NULL) { assign("nfail", nfail+1, parent.frame(), inherits=TRUE) assign("whichfail", c(whichfail, num), parent.frame(), inherits=TRUE) return() + # nocov end } } if (!is.null(error) || !is.null(warning)) { @@ -124,6 +128,7 @@ test <- function(num,x,y,error=NULL,warning=NULL,output=NULL) { if (! (inherits(err,"try-error") && length(grep(patt,err)) && type==observedtype)) { + # nocov start cat("Test",num,"didn't produce correct",type,":\n") cat(">",deparse(xsub),"\n") cat("Expected ",type,": '",txt,"'\n",sep="") @@ -134,26 +139,31 @@ test <- function(num,x,y,error=NULL,warning=NULL,output=NULL) { assign("nfail", nfail+1L, parent.frame(), inherits=TRUE) # Not the same as nfail <<- nfail + 1, it seems (when run via R CMD check) assign("whichfail", c(whichfail, num), parent.frame(), inherits=TRUE) return() + # nocov end } if (type=="warning") err <- if (is.null(output)) x<-try(suppressWarnings(x),TRUE) else out<-paste(capture.output(x<-try(suppressWarnings(x),TRUE)),collapse="") else return() } if (inherits(err,"try-error") || (!missing(y) && inherits(err<-try(y,TRUE),"try-error"))) { + # nocov start cat("Test",num,err) assign("nfail", nfail+1L, parent.frame(), inherits=TRUE) assign("whichfail", c(whichfail, num), parent.frame(), inherits=TRUE) return() + # nocov end } if (missing(y)) { if (!is.null(output)) return() if (isTRUE(as.vector(x))) return() # as.vector to drop names of a named vector such as returned by system.time + # nocov start cat("Test",num,"expected TRUE but observed:\n") cat(">",deparse(xsub),"\n") if (is.data.table(x)) compactprint(x) else print(x) assign("nfail", nfail+1L, parent.frame(), inherits=TRUE) assign("whichfail", c(whichfail, num), parent.frame(), inherits=TRUE) return() + # nocov end } else { if (identical(x,y)) return() if (is.data.table(x) && is.data.table(y)) { @@ -173,14 +183,10 @@ test <- function(num,x,y,error=NULL,warning=NULL,output=NULL) { if (isTRUE(all.equal.result<-all.equal(xc,yc)) && identical(key(x),key(y)) && identical(vapply_1c(xc,typeof), vapply_1c(yc,typeof))) return() } - if (is.factor(x) && is.factor(y)) { - x = factor(x) - y = factor(y) - if (identical(x,y)) return() - } if (is.atomic(x) && is.atomic(y) && isTRUE(all.equal.result<-all.equal(x,y)) && typeof(x)==typeof(y)) return() # For test 617 on r-prerel-solaris-sparc on 7 Mar 2013 } + # nocov start cat("Test",num,"ran without errors but failed check that x equals y:\n") cat("> x =",deparse(xsub),"\n") if (is.data.table(x)) compactprint(x) else {cat("First 6 of ", length(x)," (type '", typeof(x), "'): ", sep=""); print(head(x))} @@ -190,5 +196,6 @@ test <- function(num,x,y,error=NULL,warning=NULL,output=NULL) { assign("nfail", nfail+1L, parent.frame(), inherits=TRUE) assign("whichfail", c(whichfail, num), parent.frame(), inherits=TRUE) invisible() + # nocov end } From 701a9d85d7021691bac5628b7f903eee93b4b171 Mon Sep 17 00:00:00 2001 From: Matt Dowle Date: Mon, 5 Feb 2018 16:02:49 -0800 Subject: [PATCH 10/11] Reverted commenting-out to avoid duplicated 4 lines and to keep if() live. Added nocov block instead. --- R/foverlaps.R | 38 ++++++++++++++++++-------------------- 1 file changed, 18 insertions(+), 20 deletions(-) diff --git a/R/foverlaps.R b/R/foverlaps.R index 0b2874f2e..be85c31f5 100644 --- a/R/foverlaps.R +++ b/R/foverlaps.R @@ -130,26 +130,24 @@ foverlaps <- function(x, y, by.x = if (!is.null(key(x))) key(x) else key(y), by. } # nomatch has no effect here, just for passing arguments consistently to `bmerge` .Call(Clookup, uy, nrow(y), indices(uy, y, yintervals, nomatch=0L, roll=roll), maxgap, minoverlap, mult, type, verbose) - # placeholder for future improvement - # if (maxgap == 0L && minoverlap == 1L) { - # iintervals = tail(names(x), 2L) - # if (verbose) {last.started.at=proc.time();cat("binary search(es) done in ...");flush.console()} - # xmatches = indices(uy, x, xintervals, nomatch=0L, roll=roll) - # if (verbose) {cat(timetaken(last.started.at));flush.console()} - # olaps = .Call(Coverlaps, uy, xmatches, mult, type, nomatch, verbose) - # } else if (maxgap == 0L && minoverlap > 1L) { - # stop("Not yet implemented") - # } else if (maxgap > 0L && minoverlap == 1L) { - # stop("Not yet implemented") - # } else if (maxgap > 0L && minoverlap > 1L) { - # if (maxgap > minoverlap) - # warning("maxgap > minoverlap. maxgap will have no effect here.") - # stop("Not yet implemented") - # } - if (verbose) {last.started.at=proc.time();cat("binary search(es) done in ...");flush.console()} - xmatches = indices(uy, x, xintervals, nomatch=0L, roll=roll) - if (verbose) {cat(timetaken(last.started.at));flush.console} - olaps = .Call(Coverlaps, uy, xmatches, mult, type, nomatch, verbose) + if (maxgap == 0L && minoverlap == 1L) { + # iintervals = tail(names(x), 2L) # iintervals not yet used so commented out for now + if (verbose) {last.started.at=proc.time();cat("binary search(es) done in ...");flush.console()} + xmatches = indices(uy, x, xintervals, nomatch=0L, roll=roll) + if (verbose) {cat(timetaken(last.started.at));flush.console()} + olaps = .Call(Coverlaps, uy, xmatches, mult, type, nomatch, verbose) + } + # nocov start + else if (maxgap == 0L && minoverlap > 1L) { + stop("Not yet implemented") + } else if (maxgap > 0L && minoverlap == 1L) { + stop("Not yet implemented") + } else if (maxgap > 0L && minoverlap > 1L) { + if (maxgap > minoverlap) + warning("maxgap > minoverlap. maxgap will have no effect here.") + stop("Not yet implemented") + } + # nocov end setDT(olaps) setnames(olaps, c("xid", "yid")) From 5be304928d7de8f71d79daad71f9011bad2c5f33 Mon Sep 17 00:00:00 2001 From: Matt Dowle Date: Mon, 5 Feb 2018 17:16:45 -0800 Subject: [PATCH 11/11] A few more 0->0L. Also, timetaken() is exported so reverted including newline in its return value; e.g. at the end of tests.Rraw it's used assuming no newline included --- R/between.R | 4 ++-- R/bmerge.R | 3 +-- R/data.table.R | 44 +++++++++++++++++++++---------------------- R/foverlaps.R | 4 ++-- R/fwrite.R | 2 +- R/timetaken.R | 4 ++-- inst/tests/tests.Rraw | 8 ++++---- man/timetaken.Rd | 2 +- 8 files changed, 35 insertions(+), 36 deletions(-) diff --git a/R/between.R b/R/between.R index 22deff3bc..20de0d063 100644 --- a/R/between.R +++ b/R/between.R @@ -24,7 +24,7 @@ inrange <- function(x,lower,upper,incbounds=TRUE) { verbose = getOption("datatable.verbose") if (verbose) {last.started.at=proc.time();cat("forderv(query) took ... ");flush.console()} xo = forderv(query) - if (verbose) {cat(timetaken(last.started.at)); flush.console()} + if (verbose) {cat(timetaken(last.started.at),"\n"); flush.console()} ans = bmerge(shallow(subject), query, 1L:2L, c(1L,1L), FALSE, xo, 0, c(FALSE, TRUE), 0L, "all", ops, integer(0L), 1L, verbose) # fix for #1819, turn on verbose messages @@ -33,7 +33,7 @@ inrange <- function(x,lower,upper,incbounds=TRUE) { options(datatable.verbose=verbose) if (verbose) {last.started.at=proc.time();cat("Generating final logical vector ... ");flush.console()} .Call(Cinrange, idx <- vector("logical", length(x)), xo, ans[["starts"]], ans[["lens"]]) - if (verbose) {cat("done in", timetaken(last.started.at)); flush.console} + if (verbose) {cat("done in",timetaken(last.started.at),"\n"); flush.console} idx } diff --git a/R/bmerge.R b/R/bmerge.R index 0c20e0475..25903ca02 100644 --- a/R/bmerge.R +++ b/R/bmerge.R @@ -92,7 +92,7 @@ bmerge <- function(i, x, leftcols, rightcols, io, xo, roll, rollends, nomatch, m if (verbose) {last.started.at=proc.time();cat("Starting bmerge ...");flush.console()} ans = .Call(Cbmerge, i, x, as.integer(leftcols), as.integer(rightcols), io<-haskey(i), xo, roll, rollends, nomatch, mult, ops, nqgrp, nqmaxgrp) # NB: io<-haskey(i) necessary for test 579 where the := above change the factor to character and remove i's key - if (verbose) {cat("done in", timetaken(last.started.at)); flush.console()} + if (verbose) {cat("done in",timetaken(last.started.at),"\n"); flush.console()} # in the caller's shallow copy, see comment at the top of this function for usage # We want to leave the coercions to i in place otherwise, since the caller depends on that to build the result @@ -105,4 +105,3 @@ bmerge <- function(i, x, leftcols, rightcols, io, xo, roll, rollends, nomatch, m return(ans) } - diff --git a/R/data.table.R b/R/data.table.R index aa80a1da0..094dcb9a5 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -404,7 +404,7 @@ chmatch2 <- function(x, table, nomatch=NA_integer_) { assign("forder", forder, order_env) assign("x", x, order_env) i = eval(isub, order_env, parent.frame()) # for optimisation of 'order' to 'forder' - # that forder returns integer(0) is taken care of internally within forder + # that forder returns empty integer() is taken care of internally within forder } else if (length(o <- .prepareFastSubset(isub = isub, x = x, enclos = parent.frame(), notjoin = notjoin, verbose = verbose))){ @@ -518,7 +518,7 @@ chmatch2 <- function(x, table, nomatch=NA_integer_) { if (verbose) {last.started.at=proc.time();cat(" forder took ... ");flush.console()} # TODO: could check/reuse secondary indices, but we need 'starts' attribute as well! xo = forderv(x, rightcols, retGrp=TRUE) - if (verbose) {cat(timetaken(last.started.at)); flush.console()} + if (verbose) {cat(timetaken(last.started.at),"\n"); flush.console()} xg = attr(xo, 'starts') resetcols = head(rightcols, non_equi-1L) if (length(resetcols)) { @@ -527,18 +527,18 @@ chmatch2 <- function(x, table, nomatch=NA_integer_) { if (verbose) {last.started.at=proc.time();cat(" Generating group lengths ... ");flush.console()} resetlen = attr(forderv(x, resetcols, retGrp=TRUE), 'starts') resetlen = .Call(Cuniqlengths, resetlen, nrow(x)) - if (verbose) {cat("done in", timetaken(last.started.at)); flush.console()} + if (verbose) {cat("done in",timetaken(last.started.at),"\n"); flush.console()} } else resetlen = integer(0L) if (verbose) {last.started.at=proc.time();cat(" Generating non-equi group ids ... ");flush.console()} nqgrp = .Call(Cnestedid, x, rightcols[non_equi:length(rightcols)], xo, xg, resetlen, mult) - if (verbose) {cat("done in", timetaken(last.started.at)); flush.console()} + if (verbose) {cat("done in",timetaken(last.started.at),"\n"); flush.console()} if (length(nqgrp)) nqmaxgrp = max(nqgrp) # fix for #1986, when 'x' is 0-row table max(.) returns -Inf. if (nqmaxgrp > 1L) { # got some non-equi join work to do if ("_nqgrp_" %in% names(x)) stop("Column name '_nqgrp_' is reserved for non-equi joins.") if (verbose) {last.started.at=proc.time();cat(" Recomputing forder with non-equi ids ... ");flush.console()} set(nqx<-shallow(x), j="_nqgrp_", value=nqgrp) xo = forderv(nqx, c(ncol(nqx), rightcols)) - if (verbose) {cat("done in", timetaken(last.started.at)); flush.console()} + if (verbose) {cat("done in",timetaken(last.started.at),"\n"); flush.console()} } else nqgrp = integer(0L) if (verbose) cat(" Found", nqmaxgrp, "non-equi group(s) ...\n") } @@ -556,7 +556,7 @@ chmatch2 <- function(x, table, nomatch=NA_integer_) { if (is.null(xo)) { if (verbose) {last.started.at=proc.time(); flush.console()} xo = forderv(x, by = rightcols) - if (verbose) {cat("Calculated ad hoc index in", timetaken(last.started.at)); flush.console()} + if (verbose) {cat("Calculated ad hoc index in",timetaken(last.started.at),"\n"); flush.console()} # TODO: use setindex() instead, so it's cached for future reuse } } @@ -577,7 +577,7 @@ chmatch2 <- function(x, table, nomatch=NA_integer_) { if (verbose) {last.started.at=proc.time();cat("not-join called with 'by=.EACHI'; Replacing !i with i=setdiff(x,i) ...");flush.console()} orignames = copy(names(i)) i = setdiff_(x, i, rightcols, leftcols) # part of #547 - if (verbose) {cat("done in", timetaken(last.started.at)); flush.console()} + if (verbose) {cat("done in",timetaken(last.started.at),"\n"); flush.console()} setnames(i, orignames[leftcols]) setattr(i, 'sorted', names(i)) # since 'x' has key set, this'll always be sorted } @@ -665,8 +665,8 @@ chmatch2 <- function(x, table, nomatch=NA_integer_) { } ## make sure, all columns are taken from x and not from i. ## This is done by simply telling data.table to continue as if there was a simple subset - leftcols = integer(0) - rightcols = integer(0) + leftcols = integer(0L) + rightcols = integer(0L) i <- irows ## important to make i not a data.table because otherwise Gforce doesn't kick in } } @@ -1436,14 +1436,14 @@ chmatch2 <- function(x, table, nomatch=NA_integer_) { # forderv() returns empty integer() if already ordered to save allocating 1:xnrow bysameorder = orderedirows && !length(o__) if (verbose) { - cat(timetaken(last.started.at)) + cat(timetaken(last.started.at),"\n") last.started.at=proc.time() cat("Finding group sizes from the positions (can be avoided to save RAM) ... ") flush.console() # for windows } f__ = attr(o__, "starts") len__ = uniqlengths(f__, xnrow) - if (verbose) {cat(timetaken(last.started.at)); flush.console()} + if (verbose) {cat(timetaken(last.started.at),"\n"); flush.console()} if (!bysameorder && missing(keyby)) { # TO DO: lower this into forder.c if (verbose) {last.started.at=proc.time();cat("Getting back original order ... ");flush.console()} @@ -1452,21 +1452,21 @@ chmatch2 <- function(x, table, nomatch=NA_integer_) { f__ = f__[origorder] len__ = len__[origorder] } - if (verbose) {cat(timetaken(last.started.at)); flush.console()} + if (verbose) {cat(timetaken(last.started.at),"\n"); flush.console()} } if (!orderedirows && !length(o__)) o__ = seq_len(xnrow) # temp fix. TODO: revist orderedirows } else { if (verbose) {last.started.at=proc.time();cat("Finding groups using uniqlist ... ");flush.console()} f__ = uniqlist(byval) if (verbose) { - cat(timetaken(last.started.at)) + cat(timetaken(last.started.at),"\n") last.started.at=proc.time() cat("Finding group sizes from the positions (can be avoided to save RAM) ... ") flush.console() # for windows } len__ = uniqlengths(f__, xnrow) # TO DO: combine uniqlist and uniquelengths into one call. Or, just set len__ to NULL when dogroups infers that. - if (verbose) { cat(timetaken(last.started.at)); flush.console() } + if (verbose) { cat(timetaken(last.started.at),"\n"); flush.console() } } } else { f__=NULL @@ -1770,7 +1770,7 @@ chmatch2 <- function(x, table, nomatch=NA_integer_) { } else { ans = .Call(Cdogroups, x, xcols, groups, grpcols, jiscols, xjiscols, grporder, o__, f__, len__, jsub, SDenv, cols, newnames, !missing(on), verbose) } - if (verbose) {cat(timetaken(last.started.at)); flush.console()} + if (verbose) {cat(timetaken(last.started.at),"\n"); flush.console()} # TO DO: xrows would be a better name for irows: irows means the rows of x that i joins to # Grouping by i: icols the joins columns (might not need), isdcols (the non join i and used by j), all __ are length x # Grouping by by: i is by val, icols NULL, o__ may be subset of x, f__ points to o__ (or x if !length o__) @@ -1792,7 +1792,7 @@ chmatch2 <- function(x, table, nomatch=NA_integer_) { if (all(cnames %chin% names(x))) { if (verbose) {last.started.at=proc.time();cat("setkey() after the := with keyby= ... ");flush.console()} setkeyv(x,cnames) # TO DO: setkey before grouping to get memcpy benefit. - if (verbose) {cat(timetaken(last.started.at)); flush.console()} + if (verbose) {cat(timetaken(last.started.at),"\n"); flush.console()} } else warning(":= keyby not straightforward character column names or list() of column names, treating as a by:",paste(cnames,collapse=","),"\n") } @@ -1819,7 +1819,7 @@ chmatch2 <- function(x, table, nomatch=NA_integer_) { if (byjoin && !missing(keyby) && !bysameorder) { if (verbose) {last.started.at=proc.time();cat("setkey() afterwards for keyby=.EACHI ... ");flush.console()} setkeyv(ans,names(ans)[seq_along(byval)]) - if (verbose) {cat(timetaken(last.started.at)); flush.console()} + if (verbose) {cat(timetaken(last.started.at),"\n"); flush.console()} } else if (!missing(keyby) || (haskey(x) && bysameorder)) { setattr(ans,"sorted",names(ans)[seq_along(grpcols)]) } @@ -2823,7 +2823,7 @@ isReallyReal <- function(x) { nonEqui = FALSE while(length(remainingIsub)){ if(is.call(remainingIsub)){ - if (length(remainingIsub[[1L]]) != 1) return(NULL) ## only single symbol, either '&' or one of validOps allowed. + if (length(remainingIsub[[1L]]) != 1L) return(NULL) ## only single symbol, either '&' or one of validOps allowed. if (remainingIsub[[1L]] != "&"){ ## only a single expression present or a different connection. stub <- remainingIsub remainingIsub <- NULL ## there is no remainder to be evaluated after stub. @@ -2887,7 +2887,7 @@ isReallyReal <- function(x) { on <- c(on, setNames(paste0(col, validOps$on[validOps$op == operator], col), col)) ## loop continues with remainingIsub } - if (length(i) == 0) stop("Internal error in .isFastSubsettable. Please report to data.table developers") + if (length(i) == 0L) stop("Internal error in .isFastSubsettable. Please report to data.table developers") ## convert i to data.table with all combinations in rows. Care is needed with names as we do ## it with 'do.call' and this would cause problems if colNames were 'sorted' or 'unique' ## as these two would be interpreted as args for CJ @@ -2904,7 +2904,7 @@ isReallyReal <- function(x) { ## order of key columns makes no difference, as long as they are all upfront in the key, I believe. if (all(names(i) %chin% head(key(x), length(i)))){ if (verbose) {cat("Optimized subsetting with key '", paste0( head(key(x), length(i)), collapse = ", "),"'\n",sep="");flush.console()} - idx <- integer(0) ## integer(0) is not NULL! Indicates that x is ordered correctly. + idx <- integer(0L) ## integer(0L) not NULL! Indicates that x is ordered correctly. idxCols <- head(key(x), length(i)) ## in correct order! } } @@ -2929,9 +2929,9 @@ isReallyReal <- function(x) { ## if nothing else helped, auto create a new index that can be used if (!getOption("datatable.auto.index")) return(NULL) if (verbose) {cat("Creating new index '", paste0(names(i), collapse = "__"),"'\n",sep="");flush.console()} - if (verbose) {last.started.at=proc.time()[3];cat("Creating index", paste0(names(i), collapse = "__"), "done in ... ");flush.console()} + if (verbose) {last.started.at=proc.time();cat("Creating index", paste0(names(i), collapse = "__"), "done in ... ");flush.console()} setindexv(x, names(i)) - if (verbose) {cat(round(proc.time()[3]-last.started.at,3),"secs\n");flush.console()} + if (verbose) {cat(timetaken(last.started.at),"\n");flush.console()} if (verbose) {cat("Optimized subsetting with index '", paste0(names(i), collapse = "__"),"'\n",sep="");flush.console()} idx <- attr(attr(x, "index"), paste0("__", names(i), collapse = "")) idxCols <- names(i) diff --git a/R/foverlaps.R b/R/foverlaps.R index be85c31f5..96d588800 100644 --- a/R/foverlaps.R +++ b/R/foverlaps.R @@ -109,7 +109,7 @@ foverlaps <- function(x, y, by.x = if (!is.null(key(x))) key(x) else key(y), by. if (verbose) {last.started.at=proc.time();cat("unique() + setkey() operations done in ...");flush.console()} uy = unique(y[, eval(call)]) setkey(uy)[, `:=`(lookup = list(list(integer(0L))), type_lookup = list(list(integer(0L))), count=0L, type_count=0L)] - if (verbose) {cat(timetaken(last.started.at)); flush.console()} + if (verbose) {cat(timetaken(last.started.at),"\n"); flush.console()} matches <- function(ii, xx, del, ...) { cols = setdiff(names(xx), del) xx = .shallow(xx, cols, retain.key = FALSE) @@ -134,7 +134,7 @@ foverlaps <- function(x, y, by.x = if (!is.null(key(x))) key(x) else key(y), by. # iintervals = tail(names(x), 2L) # iintervals not yet used so commented out for now if (verbose) {last.started.at=proc.time();cat("binary search(es) done in ...");flush.console()} xmatches = indices(uy, x, xintervals, nomatch=0L, roll=roll) - if (verbose) {cat(timetaken(last.started.at));flush.console()} + if (verbose) {cat(timetaken(last.started.at),"\n");flush.console()} olaps = .Call(Coverlaps, uy, xmatches, mult, type, nomatch, verbose) } # nocov start diff --git a/R/fwrite.R b/R/fwrite.R index 7d1189ce1..02b59cc63 100644 --- a/R/fwrite.R +++ b/R/fwrite.R @@ -38,7 +38,7 @@ fwrite <- function(x, file="", append=FALSE, quote="auto", isLOGICAL(verbose), isLOGICAL(showProgress), isLOGICAL(logical01), length(na) == 1L, #1725, handles NULL or character(0) input is.character(file) && length(file)==1L && !is.na(file), - length(buffMB)==1L && !is.na(buffMB) && 1<=buffMB && buffMB<=1024, + length(buffMB)==1L && !is.na(buffMB) && 1L<=buffMB && buffMB<=1024, length(nThread)==1L && !is.na(nThread) && nThread>=1L ) file <- path.expand(file) # "~/foo/bar" diff --git a/R/timetaken.R b/R/timetaken.R index d5e3c0435..4372824d4 100644 --- a/R/timetaken.R +++ b/R/timetaken.R @@ -14,8 +14,8 @@ timetaken <- function(started.at) hrs = hrs - 24*days if (secs >= 60) { if (days >= 1) res = sprintf("%d days ", as.integer(days)) else res="" - paste(res,sprintf("%02.0f:%02.0f:%02.0f\n", hrs, mins, secs %% 60),sep="") + paste(res,sprintf("%02.0f:%02.0f:%02.0f", hrs, mins, secs %% 60),sep="") } else { - sprintf(if (secs>=10) "%.1fsec\n" else "%.3fsec\n", secs) + sprintf(if (secs>=10) "%.1fsec" else "%.3fsec", secs) } } diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index e26e8b6c0..cafd2cbb2 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -11575,16 +11575,16 @@ test(1872.9, DT_out, x = data.table(start=c(5,31,22,16), end=c(8,50,25,18), val2 = 7:10) y = data.table(start=c(10, 20, 30), end=c(15, 35, 45), val1 = 1:3) setkey(y, start, end) -test(1872.10, foverlaps(x, y, maxgap = 2), error = 'maxgap and minoverlap.*not yet') -test(1872.11, foverlaps(x, y, minoverlap = 2), error = 'maxgap and minoverlap.*not yet') +test(1872.11, foverlaps(x, y, maxgap = 2), error = 'maxgap and minoverlap.*not yet') +test(1872.12, foverlaps(x, y, minoverlap = 2), error = 'maxgap and minoverlap.*not yet') ## tests of verbose output ### foverlaps -test(1872.12, foverlaps(x, y, verbose = TRUE), +test(1872.13, foverlaps(x, y, verbose = TRUE), output = 'unique.*setkey.*operations.*binary search') ### [.data.table X = data.table(x=c("c","b"), v=8:7, foo=c(4,2)) DT = data.table(x=rep(c("b","a","c"),each=3), y=c(1,3,6), v=1:9) -test(1872.13, DT[X, on=.(x, v>=v), verbose = TRUE], +test(1872.14, DT[X, on=.(x, v>=v), verbose = TRUE], output = 'Non-equi join operators.*forder took.*group lengths.*done.*non-equi group ids.*done') ########################## diff --git a/man/timetaken.Rd b/man/timetaken.Rd index 0fa4b35e8..e0b2722b3 100644 --- a/man/timetaken.Rd +++ b/man/timetaken.Rd @@ -13,7 +13,7 @@ timetaken(started.at) % \details{ % } \value{ - A character vector of the form hh:mm:ss, or ss.mmm if under 60 seconds. + A character vector of the form HH:MM:SS, or SS.MMMsec if under 60 seconds. } \examples{ started.at=proc.time()