diff --git a/NEWS.md b/NEWS.md index b6556230a..59c9404d3 100644 --- a/NEWS.md +++ b/NEWS.md @@ -206,6 +206,51 @@ # v1.9.6 18.5400 19.1800 21.5100 20.6900 23.4200 29.040 100 # v1.14.4 0.4826 0.5586 0.6586 0.6329 0.7348 1.318 100 ``` + +31. `rbind()` and `rbindlist()` now support `fill=TRUE` with `use.names=FALSE` instead of issuing the warning `use.names= cannot be FALSE when fill is TRUE. Setting use.names=TRUE.` + + ```R + DT1 + # A B + # + # 1: 1 5 + # 2: 2 6 + + DT2 + # foo + # + # 1: 3 + # 2: 4 + + rbind(DT1, DT2, fill=TRUE) # no change + # A B foo + # + # 1: 1 5 NA + # 2: 2 6 NA + # 3: NA NA 3 + # 4: NA NA 4 + + rbind(DT1, DT2, fill=TRUE, use.names=FALSE) + + # was: + # A B foo + # + # 1: 1 5 NA + # 2: 2 6 NA + # 3: NA NA 3 + # 4: NA NA 4 + # Warning message: + # In rbindlist(l, use.names, fill, idcol) : + # use.names= cannot be FALSE when fill is TRUE. Setting use.names=TRUE. + + # now: + # A B + # + # 1: 1 5 + # 2: 2 6 + # 3: 3 NA + # 4: 4 NA + ``` ## BUG FIXES diff --git a/R/merge.R b/R/merge.R index 683a6d08a..f237bcbf3 100644 --- a/R/merge.R +++ b/R/merge.R @@ -78,16 +78,7 @@ merge.data.table = function(x, y, by = NULL, by.x = NULL, by.y = NULL, all = FAL # Perhaps not very commonly used, so not a huge deal that the join is redone here. missingyidx = y[!x, which=TRUE, on=by, allow.cartesian=allow.cartesian] if (length(missingyidx)) { - yy = y[missingyidx] - othercolsx = setdiff(nm_x, by) - if (length(othercolsx)) { - tmp = rep.int(NA_integer_, length(missingyidx)) - # TO DO: use set() here instead.. - yy = cbind(yy, x[tmp, othercolsx, with = FALSE]) - } - # empty data.tables (nrow =0, ncol>0) doesn't skip names anymore in new rbindlist - # takes care of #24 without having to save names. This is how it should be, IMHO. - dt = rbind(dt, yy, use.names=FALSE) + dt = rbind(dt, y[missingyidx], use.names=FALSE, fill=TRUE) } } # X[Y] syntax puts JIS i columns at the end, merge likes them alongside i. diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index ef06748e0..6cae6fe5c 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -1863,6 +1863,8 @@ test(628.2, rbind(data.table(a=1:3,b=factor(letters[1:3]),c=factor("foo")), list # Test merge with common names and all.y=TRUE, #2011 DT1 = data.table(a=c(1,3,4,5), total=c(2,1,3,1), key="a") DT2 = data.table(a=c(2,3,5), total=c(5,1,2), key="a") +DT3 = data.table(a=c(2), total=c(5), key="a") +DT4 = data.table(a=c(3), total=c(1), key="a") # 629+630 worked before anyway. 631+632 test the bug fix. adf=as.data.frame adt=as.data.table @@ -1875,6 +1877,16 @@ test(630.1, merge(DT1,DT2,all.x=TRUE), setkey(adt(merge(adf(DT1),adf(DT2),by="a" test(631, merge(DT1,DT2,all.y=TRUE), data.table(a=c(2,3,5),total.x=c(NA,1,1),total.y=c(5,1,2),key="a")) test(631.1, merge(DT1,DT2,all.y=TRUE), setkey(adt(merge(adf(DT1),adf(DT2),by="a",all.y=TRUE)),a)) +# ensure merge(x,y,all.y) does not alter input y ... +# .. i subset y with 1:nrow(y) +test(631.2, merge(DT1[c(1,3)],DT2,all.y=TRUE), data.table(a=c(2,3,5),total.x=NA_real_,total.y=c(5,1,2),key="a")) +test(631.3, DT2, data.table(a=c(2,3,5), total=c(5,1,2), key="a")) +# .. nrow(y)=1, i subset y with 1 and no match with x +test(631.4, merge(DT1,DT3,all.y=TRUE), data.table(a=c(2),total.x=NA_real_,total.y=c(5),key="a")) +test(631.5, DT3, data.table(a=c(2), total=c(5), key="a")) +# .. nrow(y)=1, i subset y with 1 and match with x +test(631.6, merge(DT1,DT4,all.y=TRUE), data.table(a=c(3),total.x=c(1),total.y=c(1),key="a")) +test(631.7, DT4, data.table(a=c(3), total=c(1), key="a")) test(632, merge(DT1,DT2,all=TRUE), data.table(a=c(1,2,3,4,5),total.x=c(2,NA,1,3,1),total.y=c(NA,5,1,NA,2),key="a")) test(632.1, merge(DT1,DT2,all=TRUE), setkey(adt(merge(adf(DT1),adf(DT2),by="a",all=TRUE)),a)) @@ -14577,8 +14589,11 @@ test(2002.12, rbind(DT1, DT2, idcol='id'), data.table(id=integer(), a=logica test(2003.1, rbindlist(list(), use.names=1), error="use.names= should be TRUE, FALSE, or not used [(]\"check\" by default[)]") test(2003.2, rbindlist(list(), fill=1), error="fill= should be TRUE or FALSE") test(2003.3, rbindlist(list(data.table(a=1:2), data.table(b=3:4)), fill=TRUE, use.names=FALSE), - data.table(a=c(1:2,NA,NA), b=c(NA,NA,3:4)), - warning="use.names= cannot be FALSE when fill is TRUE. Setting use.names=TRUE") + data.table(a=c(1:4))) +test(2003.4, rbindlist(list(data.table(a=1:2,c=5:6), data.table(b=3:4)), fill=TRUE, use.names=FALSE), + data.table(a=c(1:4), c=INT(5,6,NA,NA))) +test(2003.5, rbindlist(list(data.table(a=1:2), data.table(b=3:4, c=5:6)), fill=TRUE, use.names=FALSE), + data.table(a=c(1:4), V1=INT(NA,NA,5,6))) # chmatch coverage for two different non-ascii encodings matching; issues mentioned in comments in chmatch.c #69 #2538 #111 x1 = "fa\xE7ile" diff --git a/man/rbindlist.Rd b/man/rbindlist.Rd index 192fb5135..2ba39a2a9 100644 --- a/man/rbindlist.Rd +++ b/man/rbindlist.Rd @@ -13,7 +13,7 @@ rbindlist(l, use.names="check", fill=FALSE, idcol=NULL) \arguments{ \item{l}{ A list containing \code{data.table}, \code{data.frame} or \code{list} objects. \code{\dots} is the same but you pass the objects by name separately. } \item{use.names}{\code{TRUE} binds by matching column name, \code{FALSE} by position. `check` (default) warns if all items don't have the same names in the same order and then currently proceeds as if `use.names=FALSE` for backwards compatibility (\code{TRUE} in future); see news for v1.12.2.} - \item{fill}{\code{TRUE} fills missing columns with NAs. By default \code{FALSE}. When \code{TRUE}, \code{use.names} is set to \code{TRUE}.} + \item{fill}{\code{TRUE} fills missing columns with NAs. By default \code{FALSE}.} \item{idcol}{Creates a column in the result showing which list item those rows came from. \code{TRUE} names this column \code{".id"}. \code{idcol="file"} names this column \code{"file"}. If the input list has names, those names are the values placed in this id column, otherwise the values are an integer vector \code{1:length(l)}. See \code{examples}.} } \details{ diff --git a/src/rbindlist.c b/src/rbindlist.c index 5d0b6547e..366902883 100644 --- a/src/rbindlist.c +++ b/src/rbindlist.c @@ -12,8 +12,7 @@ SEXP rbindlist(SEXP l, SEXP usenamesArg, SEXP fillArg, SEXP idcolArg) if (TYPEOF(l) != VECSXP) error(_("Input to rbindlist must be a list. This list can contain data.tables, data.frames or plain lists.")); Rboolean usenames = LOGICAL(usenamesArg)[0]; const bool fill = LOGICAL(fillArg)[0]; - if (fill && usenames!=TRUE) { - if (usenames==FALSE) warning(_("use.names= cannot be FALSE when fill is TRUE. Setting use.names=TRUE.")); // else no warning if usenames==NA (default) + if (fill && usenames==NA_LOGICAL) { usenames=TRUE; } const bool idcol = !isNull(idcolArg);