diff --git a/R/foverlaps.R b/R/foverlaps.R index c9f46f767..ff6737c8f 100644 --- a/R/foverlaps.R +++ b/R/foverlaps.R @@ -15,7 +15,7 @@ foverlaps <- function(x, y, by.x = if (!is.null(key(x))) key(x) else key(y), by. mult = match.arg(mult) if (type == "equal") stop("type = 'equal' is not implemented yet. But note that this is just the same as a normal data.table join y[x, ...], unless you are also interested in setting 'minoverlap / maxgap' arguments. But those arguments are not implemented yet as well.") - if (maxgap > 0L || minoverlap > 1L) + if (maxgap != 0L || minoverlap != 1L) stop("maxgap and minoverlap arguments are not yet implemented.") if (is.null(by.y)) stop("'y' must be keyed (i.e., sorted, and, marked as sorted). Call setkey(y, ...) first, see ?setkey. Also check the examples in ?foverlaps.") @@ -59,6 +59,7 @@ foverlaps <- function(x, y, by.x = if (!is.null(key(x))) key(x) else key(y), by. stop("The last two columns in by.y should correspond to the 'start' and 'end' intervals in data.table 'y' and must be integer/numeric type.") if ( any(y[[yintervals[2L]]] - y[[yintervals[1L]]] < 0L) ) stop("All entries in column ", yintervals[1L], " should be <= corresponding entries in column ", yintervals[2L], " in data.table 'y'") + ## see NOTES below: yclass = c(class(y[[yintervals[1L]]]), class(y[[yintervals[2L]]])) isdouble = FALSE; isposix = FALSE @@ -128,24 +129,15 @@ foverlaps <- function(x, y, by.x = if (!is.null(key(x))) key(x) else key(y), by. } # nomatch has no effect here, just for passing arguments consistently to `bmerge` .Call(Clookup, uy, nrow(y), indices(uy, y, yintervals, nomatch=0L, roll=roll), maxgap, minoverlap, mult, type, verbose) - if (maxgap == 0L && minoverlap == 1L) { - iintervals = tail(names(x), 2L) - if (verbose) {last.started.at=proc.time();cat("binary search(es) done in ...");flush.console()} - xmatches = indices(uy, x, xintervals, nomatch=0L, roll=roll) - if (verbose) {cat(timetaken(last.started.at));flush.console} - olaps = .Call(Coverlaps, uy, xmatches, mult, type, nomatch, verbose) - } else if (maxgap == 0L && minoverlap > 1L) { - stop("Not yet implemented") - } else if (maxgap > 0L && minoverlap == 1L) { - stop("Not yet implemented") - } else if (maxgap > 0L && minoverlap > 1L) { - if (maxgap > minoverlap) - warning("maxgap > minoverlap. maxgap will have no effect here.") - stop("Not yet implemented") - } + if (verbose) {last.started.at=proc.time();cat("binary search(es) done in ...");flush.console()} + xmatches = indices(uy, x, xintervals, nomatch=0L, roll=roll) + if (verbose) {cat(timetaken(last.started.at));flush.console} + olaps = .Call(Coverlaps, uy, xmatches, mult, type, nomatch, verbose) + setDT(olaps) setnames(olaps, c("xid", "yid")) yid = NULL # for 'no visible binding for global variable' from R CMD check on i clauses below + # if (type == "any") setorder(olaps) # at times the combine operation may not result in sorted order # CsubsetDT bug has been fixed by Matt. So back to using it! Should improve subset substantially. if (which) { diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index e1fe5dcd0..0e862d38e 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -11417,6 +11417,21 @@ DT_out = gsub('\\s+$', '', capture.output(compactprint(DT))) test(1870.9, DT_out, c(" a b [Key=a Types=dou,dou Classes=num,num]", "1: 1 2")) +## Test as-yet unimplemented features of foverlaps +x = data.table(start=c(5,31,22,16), end=c(8,50,25,18), val2 = 7:10) +y = data.table(start=c(10, 20, 30), end=c(15, 35, 45), val1 = 1:3) +setkey(y, start, end) +test(1870.10, foverlaps(x, y, maxgap = 2), error = 'maxgap and minoverlap.*not yet') +test(1870.11, foverlaps(x, y, minoverlap = 2), error = 'maxgap and minoverlap.*not yet') +## tests of verbose output +### foverlaps +test(1870.12, foverlaps(x, y, verbose = TRUE), + output = 'unique.*setkey.*operations.*binary search') +### [.data.table +X = data.table(x=c("c","b"), v=8:7, foo=c(4,2)) +DT = data.table(x=rep(c("b","a","c"),each=3), y=c(1,3,6), v=1:9) +test(1870.13, DT[X, on=.(x, v>=v), verbose = TRUE], + output = 'Non-equi join operators.*forder took.*group lengths.*done.*non-equi group ids.*done') ##########################