Skip to content

Commit

Permalink
setkey works on tables with complex columns (#3688)
Browse files Browse the repository at this point in the history
  • Loading branch information
MichaelChirico authored and mattdowle committed Jul 18, 2019
1 parent cc0a218 commit f837d50
Show file tree
Hide file tree
Showing 5 changed files with 30 additions and 74 deletions.
6 changes: 4 additions & 2 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
* Gains `yaml` argument matching that of `fread`, [#3534](https://github.com/Rdatatable/data.table/issues/3534). See the item in `fread` for a bit more detail; here, we'd like to reiterate that feedback is appreciated in the initial phase of rollout for this feature.

* Gains `bom` argument to add a *byte order mark* (BOM) at the beginning of the file to signal that the file is encoded in UTF-8, [#3488](https://github.com/Rdatatable/data.table/issues/3488). Thanks to Stefan Fleck for requesting and Philippe Chataignon for implementing.

* Now supports type `complex`, [#3690](https://github.com/Rdatatable/data.table/issues/3690).

4. Assigning to one item of a list column no longer requires the RHS to be wrapped with `list` or `.()`, [#950](https://github.com/Rdatatable/data.table/issues/950).
Expand Down Expand Up @@ -131,7 +131,9 @@
# TRUE
```
19. `shift` now supports complex vectors, part of [#3690](https://github.com/Rdatatable/data.table/issues/3690).
19. `shift` now supports type `complex`, part of [#3690](https://github.com/Rdatatable/data.table/issues/3690).
20. `setkey` now supports type `complex` as value columns (not as key columns), [#1444](https://github.com/Rdatatable/data.table/issues/1444). Thanks Gareth Ward for the report.
#### BUG FIXES
Expand Down
3 changes: 0 additions & 3 deletions R/setkey.R
Original file line number Diff line number Diff line change
Expand Up @@ -142,9 +142,6 @@ getindex = function(x, name) {

haskey = function(x) !is.null(key(x))

# reverse a vector by reference (no copy)
setrev = function(x) .Call(Csetrev, x)

# reorder a vector based on 'order' (integer)
# to be used in fastorder instead of x[o], but in general, it's better to replace vector subsetting with this..?
# Basic checks that all items of order are in range 1:n with no NAs are now made inside Creorder.
Expand Down
39 changes: 11 additions & 28 deletions inst/tests/tests.Rraw
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,6 @@ if (exists("test.data.table", .GlobalEnv, inherits=FALSE)) {
setcoalesce = data.table:::setcoalesce
setdiff_ = data.table:::setdiff_
setreordervec = data.table:::setreordervec
setrev = data.table:::setrev
shallow = data.table:::shallow # until exported
.shallow = data.table:::.shallow
split.data.table = data.table:::split.data.table
Expand Down Expand Up @@ -3920,33 +3919,7 @@ test(1159.6, setDT(x), error="Argument 'x' to 'setDT' should be a")
x <- list(1, 2:3)
test(1159.7, setDT(x), error="All elements in argument 'x' to 'setDT'")

# tests for setrev
x <- sample(10)
y <- rev(x)
setrev(x)
test(1160.1, y, x)
x <- sample(c(1:10, NA), 21, TRUE)
y <- rev(x)
setrev(x)
test(1160.2, y, x)
x <- sample(runif(10))
y <- rev(x)
setrev(x)
test(1160.3, y, x)
x <- sample(c(runif(10), NA, NaN), 21, TRUE)
y <- rev(x)
setrev(x)
test(1160.4, y, x)
x <- sample(letters)
y <- rev(x)
setrev(x)
test(1160.5, y, x)
x <- as.logical(sample(0:1, 20, TRUE))
y <- rev(x)
setrev(x)
test(1160.6, y, x)
x <- list(1:10)
test(1160.7, setrev(x), error="Input 'x' must be a vector")
# test 1160 was for setrev, now removed

# tests for setreordervec
# integer
Expand Down Expand Up @@ -15348,6 +15321,16 @@ test(2067.2, shift(z, type = 'lead'), c(z[2:3], NA))
test(2067.3, shift(z, fill = 1i), c(1i, z[1:2]))
test(2067.4, shift(list(z, 1:3)), list(c(NA, z[1:2]), c(NA, 1:2)))

# support for ordering tables with complex columns, #1444
DT = data.table(a = 2:1, z = complex(0, 0:1))
test(2068.1, setkey(copy(DT), a), data.table(a=1:2, z=complex(0, 1:0), key='a'))
test(2068.2, DT[ , abs(z), by=a], data.table(a=2:1, V1=c(0, 1)))
# raw continues not to be supported
DT = data.table(ID=2:1, r=as.raw(0:1))
test(2068.3, setkey(DT, ID), error="Item 2 of list is type 'raw'")
# setreordervec triggers !isNewList branch for coverage
test(2068.4, setreordervec(DT$r, order(DT$ID)), error="reorder accepts vectors but this non-VECSXP")


###################################
# Add new tests above this line #
Expand Down
2 changes: 0 additions & 2 deletions src/init.c
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@ SEXP fmelt();
SEXP fcast();
SEXP uniqlist();
SEXP uniqlengths();
SEXP setrev();
SEXP forder();
SEXP fsorted();
SEXP gforce();
Expand Down Expand Up @@ -116,7 +115,6 @@ R_CallMethodDef callMethods[] = {
{"Cfcast", (DL_FUNC) &fcast, -1},
{"Cuniqlist", (DL_FUNC) &uniqlist, -1},
{"Cuniqlengths", (DL_FUNC) &uniqlengths, -1},
{"Csetrev", (DL_FUNC) &setrev, -1},
{"Cforder", (DL_FUNC) &forder, -1},
{"Cfsorted", (DL_FUNC) &fsorted, -1},
{"Cgforce", (DL_FUNC) &gforce, -1},
Expand Down
54 changes: 15 additions & 39 deletions src/reorder.c
Original file line number Diff line number Diff line change
Expand Up @@ -13,17 +13,17 @@ SEXP reorder(SEXP x, SEXP order)
ncol = length(x);
for (int i=0; i<ncol; i++) {
SEXP v = VECTOR_ELT(x,i);
if (SIZEOF(v)!=4 && SIZEOF(v)!=8)
error("Item %d of list is type '%s' which isn't yet supported", i+1, type2char(TYPEOF(v)));
if (SIZEOF(v)!=4 && SIZEOF(v)!=8 && SIZEOF(v)!=16)
error("Item %d of list is type '%s' which isn't yet supported (SIZEOF=%d)", i+1, type2char(TYPEOF(v)), SIZEOF(v));
if (length(v)!=nrow)
error("Column %d is length %d which differs from length of column 1 (%d). Invalid data.table.", i+1, length(v), nrow);
if (SIZEOF(v) > maxSize)
maxSize=SIZEOF(v);
if (ALTREP(v)) SET_VECTOR_ELT(x,i,duplicate(v)); // expand compact vector in place ready for reordering by reference
}
} else {
if (SIZEOF(x)!=4 && SIZEOF(x)!=8)
error("reorder accepts vectors but this non-VECSXP is type '%s' which isn't yet supported", type2char(TYPEOF(x)));
if (SIZEOF(x)!=4 && SIZEOF(x)!=8 && SIZEOF(x)!=16)
error("reorder accepts vectors but this non-VECSXP is type '%s' which isn't yet supported (SIZEOF=%d)", type2char(TYPEOF(x)), SIZEOF(x));
if (ALTREP(x)) error("Internal error in reorder.c: cannot reorder an ALTREP vector. Please see NEWS item 2 in v1.11.4 and report this as a bug."); // # nocov
maxSize = SIZEOF(x);
nrow = length(x);
Expand Down Expand Up @@ -68,7 +68,7 @@ SEXP reorder(SEXP x, SEXP order)
tmp[i] = cvd[idx[i]-1]; // copies 4 bytes; including pointers on 32bit (STRSXP and VECSXP)
}
memcpy(vd+start, tmp+start, (end-start+1)*size);
} else {
} else if (size==8) {
double *vd = (double *)DATAPTR(v);
const double *restrict cvd = vd;
double *restrict tmp = (double *)TMP;
Expand All @@ -77,6 +77,16 @@ SEXP reorder(SEXP x, SEXP order)
tmp[i] = cvd[idx[i]-1]; // copies 8 bytes; including pointers on 64bit (STRSXP and VECSXP)
}
memcpy(vd+start, tmp+start, (end-start+1)*size);
} else {
// #1444 -- support for copying CPLXSXP (which have size 16)
Rcomplex *vd = (Rcomplex *)DATAPTR(v);
const Rcomplex *restrict cvd = vd;
Rcomplex *restrict tmp = (Rcomplex *)TMP;
#pragma omp parallel for num_threads(getDTthreads())
for (int i=start; i<=end; i++) {
tmp[i] = cvd[idx[i]-1]; // copies 16 bytes
}
memcpy(vd+start, tmp+start, (end-start+1)*size);
}
}
// It's ok to ignore write barrier, and in parallel too, only because this reorder() function accepts and checks
Expand All @@ -92,37 +102,3 @@ SEXP reorder(SEXP x, SEXP order)
return(R_NilValue);
}

// reverse a vector - equivalent of rev(x) in base, but implemented in C and about 12x faster (on 1e8)
SEXP setrev(SEXP x) {
R_len_t j, n, len;
size_t size;
char *tmp, *xt;
if (TYPEOF(x) == VECSXP || isMatrix(x)) error("Input 'x' must be a vector");
len = length(x);
if (len <= 1) return(x);
size = SIZEOF(x);
if (!size) error("don't know how to reverse type '%s' of input 'x'.",type2char(TYPEOF(x)));
n = (int)(len/2);
xt = (char *)DATAPTR(x);
if (size==4) {
tmp = (char *)Calloc(1, int);
if (!tmp) error("unable to allocate temporary working memory for reordering x");
for (j=0;j<n;j++) {
*(int *)tmp = ((int *)xt)[j]; // just copies 4 bytes (pointers on 32bit too)
((int *)xt)[j] = ((int *)xt)[len-1-j];
((int *)xt)[len-1-j] = *(int *)tmp;
}
} else {
if (size!=8) error("Size of x isn't 4 or 8");
tmp = (char *)Calloc(1, double);
if (!tmp) error("unable to allocate temporary working memory for reordering x");
for (j=0;j<n;j++) {
*(double *)tmp = ((double *)xt)[j]; // just copies 8 bytes (pointers on 64bit too)
((double *)xt)[j] = ((double *)xt)[len-1-j];
((double *)xt)[len-1-j] = *(double *)tmp;
}
}
Free(tmp);
return(R_NilValue);
}

0 comments on commit f837d50

Please sign in to comment.