diff --git a/R/duplicated.R b/R/duplicated.R index c5a89f115..b8359f31e 100644 --- a/R/duplicated.R +++ b/R/duplicated.R @@ -90,6 +90,7 @@ anyDuplicated.data.table <- function(x, incomparables=FALSE, fromLast=FALSE, by= # simple straightforward helper function to get the number # of groups in a vector or data.table. Here by data.table, # we really mean `.SD` - used in a grouping operation +# TODO: optimise uniqueN further with GForce. uniqueN <- function(x, by = if (is.data.table(x)) key(x) else NULL, na.rm=FALSE) { # na.rm, #1455 if (is.null(x)) return(0L) if (!is.atomic(x) && !is.data.frame(x)) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 00190cae4..a8f0f53b2 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -7801,7 +7801,7 @@ test(1627, charToRaw(names(fread("issue_1087_utf8_bom.csv"))[1L]), as.raw(97L)) # uniqueN gains na.rm argument, #1455 set.seed(1L) -dt = data.table(x=sample(3,25,TRUE), y=sample(c(NA,"a", "b"), 25,TRUE), z=sample(2,25,TRUE)) +dt = data.table(x=sample(c(1:3,NA),25,TRUE), y=sample(c(NA,"a", "b"), 25,TRUE), z=sample(2,25,TRUE)) test(1628.1, uniqueN(dt, by=1:2, na.rm=TRUE), nrow(na.omit(dt[, .N, by=.(x,y)]))) test(1628.2, uniqueN(dt, na.rm=TRUE), nrow(na.omit(dt[, .N, by=.(x,y,z)]))) test(1628.3, dt[, uniqueN(y, na.rm=TRUE), by=z], dt[, length(unique(na.omit(y))), by=z])