diff --git a/R/bmerge.R b/R/bmerge.R index a592f9014..57463b580 100644 --- a/R/bmerge.R +++ b/R/bmerge.R @@ -62,19 +62,31 @@ bmerge <- function(i, x, leftcols, rightcols, io, xo, roll, rollends, nomatch, v # NAs can be produced by this level match, in which case the C code (it knows integer value NA) # can skip over the lookup. It's therefore important we pass NA rather than 0 to the C code. } + # Fix for #1108. + # TODO: clean this code up... + # NOTE: bit64::is.double(int64) returns FALSE.. but base::is.double returns TRUE + is.int64 <- function(x) inherits(x, 'integer64') + is.strictlydouble <- function(x) !is.int64(x) && is.double(x) if (is.integer(x[[rc]]) && (is.double(i[[lc]]) || is.logical(i[[lc]]))) { # TO DO: add warning if reallyreal about loss of precision # or could coerce in binary search on the fly, at cost if (verbose) cat("Coercing ", typeof(i[[lc]])," column i.'",icnam,"' to integer to match type of x.'",xcnam,"'. Please avoid coercion for efficiency.\n",sep="") newval = i[[lc]] - mode(newval) = "integer" # retains column attributes (such as IDateTime class) - set(i,j=lc,value=newval) - } - if (is.double(x[[rc]]) && (is.integer(i[[lc]]) || is.logical(i[[lc]]))) { + if (is.int64(newval)) + newval = as.integer(newval) + else mode(newval) = "integer" # retains column attributes (such as IDateTime class) + set(i, j=lc, value=newval) + } else if (is.int64(x[[rc]]) && (is.integer(i[[lc]]) || is.logical(i[[lc]]) || is.strictlydouble(i[[lc]]) )) { + if (verbose) cat("Coercing ",typeof(i[[lc]])," column i.'",icnam,"' to double to match type of x.'",xcnam,"'. Please avoid coercion for efficiency.\n",sep="") + newval = bit64::as.integer64(i[[lc]]) + set(i, j=lc, value=newval) + } else if (is.strictlydouble(x[[rc]]) && (is.integer(i[[lc]]) || is.logical(i[[lc]]) || is.int64(i[[lc]]) )) { if (verbose) cat("Coercing ",typeof(i[[lc]])," column i.'",icnam,"' to double to match type of x.'",xcnam,"'. Please avoid coercion for efficiency.\n",sep="") newval = i[[lc]] - mode(newval) = "double" - set(i,j=lc,value=newval) + if (is.int64(newval)) + newval = as.numeric(newval) + else mode(newval) = "double" + set(i, j=lc, value=newval) } } diff --git a/README.md b/README.md index 912f03ccb..dea10b8cc 100644 --- a/README.md +++ b/README.md @@ -184,6 +184,8 @@ 47. `as.matrix(DT)` handles cases where `DT` contains both numeric and logical columns correctly (doesn't coerce to character columns anymore). Closes [#1083](https://github.com/Rdatatable/data.table/issues/1083). Thanks to @bramvisser for the [SO post](http://stackoverflow.com/questions/29068328/correlation-between-numeric-and-logical-variable-gives-intended-error). + 48. Coercion is handled properly on subsets/joins on `integer64` key columns. Closes [#1108](https://github.com/Rdatatable/data.table/issues/1108). Thanks to @vspinu. + #### NOTES 1. Clearer explanation of what `duplicated()` does (borrowed from base). Thanks to @matthieugomez for pointing out. Closes [#872](https://github.com/Rdatatable/data.table/issues/872). diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 4d7703983..5f10fe64e 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -6292,6 +6292,21 @@ rownames(DF) = sample(letters, 10) ans = DF[order(-xtfrm(DF$y), DF$x), ] test(1511, ans, setorder(DF, -y, x)) +# fix for #1108 +if ("package:bit64" %in% search()) { + dt <- data.table(id = as.integer64(1:3), a = c("a", "b", "c"), key = "id") + test(1512.1, dt[.(2)], dt[.(as.integer64(2))]) + test(1512.2, dt[.(2L)], dt[.(as.integer64(2))]) + + dt <- data.table(id = as.numeric(1:3), a = c("a", "b", "c"), key = "id") + test(1512.3, dt[.(2L)], dt[.(2)]) + test(1512.4, dt[.(as.integer64(2))], dt[.(2)]) + + dt <- data.table(id = 1:3, a = c("a", "b", "c"), key = "id") + test(1512.5, dt[.(2)], dt[.(2L)]) + test(1512.6, dt[.(as.integer64(2))], dt[.(2L)]) +} + ##########################