Skip to content

Commit

Permalink
replaced big block with smaller modification to the main loop; also h…
Browse files Browse the repository at this point in the history
…alves the working ram needed as it reuses the same tmp for the rerun
  • Loading branch information
mattdowle committed Jul 19, 2019
1 parent 598097b commit 2df22a3
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 50 deletions.
68 changes: 18 additions & 50 deletions src/forder.c
Original file line number Diff line number Diff line change
Expand Up @@ -449,55 +449,6 @@ SEXP forder(SEXP DT, SEXP by, SEXP retGrpArg, SEXP sortGroupsArg, SEXP ascArg, S
error("Column %d is length %d which differs from length of column 1 (%d)\n", INTEGER(by)[i], length(VECTOR_ELT(DT, INTEGER(by)[i]-1)), nrow);
if (TYPEOF(VECTOR_ELT(DT, by_i-1)) == CPLXSXP) n_cplx++;
}
if (n_cplx) {
// we don't expect users to need complex sorting extensively
// or on massive data sets, so we take the approach of
// splitting a complex vector into its real & imaginary parts
// and using the regular forderv machinery to sort; a baremetal
// implementation would at root do the same, but the approach
// here is a bit more slapdash with respect to memory efficiency
// (seen clearly here at C from the 3+2*n_cplx PROTECT() calls)
int n_out = length(by) + n_cplx;
SEXP new_dt = PROTECT(allocVector(VECSXP, n_out)); n_protect++;
SEXP new_asc = PROTECT(allocVector(INTSXP, n_out)); n_protect++;
// will be simply 1:n_out
SEXP new_by = PROTECT(allocVector(INTSXP, n_out)); n_protect++;
int j = 0;
for (int i=0; i<length(by); i++) {
int by_idx = INTEGER(by)[i]-1;
if (TYPEOF(VECTOR_ELT(DT, by_idx)) == CPLXSXP) {
// I don't see any shorthand way of splitting of the real&imaginary components,
// i.e., a shorthand way of doing Re(z), Im(z). That includes searching all of
// the r-source code & all of the r-devel archives. So just reproduce Re(), Im()
// as done in do_cmathfuns in complex.c
SEXP realPart = PROTECT(allocVector(REALSXP, nrow)); n_protect++;
SEXP cplxPart = PROTECT(allocVector(REALSXP, nrow)); n_protect++;
double *pre = REAL(realPart);
double *pim = REAL(cplxPart);
Rcomplex *pz = COMPLEX(VECTOR_ELT(DT, by_idx));
for (int i = 0; i < nrow; i++) {
pre[i] = pz[i].r;
pim[i] = pz[i].i;
}
SET_VECTOR_ELT(new_dt, j, realPart);
SET_VECTOR_ELT(new_dt, j+1, cplxPart);
INTEGER(new_asc)[j] = INTEGER(ascArg)[i];
INTEGER(new_asc)[j+1] = INTEGER(ascArg)[i];
INTEGER(new_by)[j] = j+1;
INTEGER(new_by)[j+1] = j+2;
j += 2;
} else {
SET_VECTOR_ELT(new_dt, j, VECTOR_ELT(DT, by_idx));
INTEGER(new_asc)[j] = INTEGER(ascArg)[i];
INTEGER(new_by)[j] = j+1;
j += 1;
}
}
DT = new_dt;
ascArg = new_asc;
by = new_by;
}

if (!isLogical(retGrpArg) || LENGTH(retGrpArg)!=1 || INTEGER(retGrpArg)[0]==NA_LOGICAL) error("retGrp must be TRUE or FALSE");
retgrp = LOGICAL(retGrpArg)[0]==TRUE;
if (!isLogical(sortGroupsArg) || LENGTH(sortGroupsArg)!=1 || INTEGER(sortGroupsArg)[0]==NA_LOGICAL ) error("sortGroups must be TRUE or FALSE");
Expand Down Expand Up @@ -527,11 +478,14 @@ SEXP forder(SEXP DT, SEXP by, SEXP retGrpArg, SEXP sortGroupsArg, SEXP ascArg, S
savetl_init(); // from now on use Error not error

int ncol=length(by);
key = calloc(ncol*8+1, sizeof(uint8_t *)); // needs to be before loop because part II relies on part I, column-by-column. +1 because we check NULL after last one
key = calloc((ncol+n_cplx)*8+1, sizeof(uint8_t *)); // needs to be before loop because part II relies on part I, column-by-column. +1 because we check NULL after last one
// TODO: if key==NULL Error
nradix=0; // the current byte we're writing this column to; might be squashing into it (spare>0)
int spare=0; // the amount of bits remaining on the right of the current nradix byte
bool isReal=false;
bool complexRerun = false; // see comments below in CPLXSXP case
SEXP CplxPart = R_NilValue;
if (n_cplx) { CplxPart=PROTECT(allocVector(REALSXP, nrow)); n_protect++; } // one alloc is reused for each part
TEND(2);
for (int col=0; col<ncol; col++) {
// Rprintf("Finding range of column %d ...\n", col);
Expand All @@ -544,6 +498,20 @@ SEXP forder(SEXP DT, SEXP by, SEXP retGrpArg, SEXP sortGroupsArg, SEXP ascArg, S
case INTSXP : case LGLSXP : // TODO skip LGL and assume range [0,1]
range_i32(INTEGER(x), nrow, &min, &max, &na_count);
break;
case CPLXSXP : {
// treat as if two separate columns of double
const Rcomplex *xd = COMPLEX(x);
double *tmp = REAL(CplxPart);
if (!complexRerun) {
for (int i=0; i<nrow; ++i) tmp[i] = xd[i].r; // extract the real part on the first time
complexRerun = true;
col--; // cause this loop iteration to rerun; decrement now in case of early continue below
} else {
for (int i=0; i<nrow; ++i) tmp[i] = xd[i].i;
complexRerun = false;
}
x = CplxPart;
} // !no break! so as to fall through to REAL case

This comment has been minimized.

Copy link
@MichaelChirico

MichaelChirico Jul 19, 2019

Member

wow. very snazzy.

case REALSXP :
if (inherits(x, "integer64")) {
range_i64((int64_t *)REAL(x), nrow, &min, &max, &na_count);
Expand Down
1 change: 1 addition & 0 deletions src/reorder.c
Original file line number Diff line number Diff line change
Expand Up @@ -101,3 +101,4 @@ SEXP reorder(SEXP x, SEXP order)
UNPROTECT(nprotect);
return(R_NilValue);
}

0 comments on commit 2df22a3

Please sign in to comment.