From 0807b1d49dac90f4b9208b6d40fff0be38f95e9e Mon Sep 17 00:00:00 2001 From: 2005m Date: Fri, 17 Jan 2020 22:51:41 +0000 Subject: [PATCH 01/18] first commit topn --- NAMESPACE | 1 + R/wrappers.R | 1 + inst/tests/tests.Rraw | 81 ++++++++++++++++++ man/topn.Rd | 26 ++++++ src/data.table.h | 1 + src/fifelse.c | 190 ++++++++++++++++++++++++++++++++++++++++++ src/init.c | 2 + 7 files changed, 302 insertions(+) create mode 100644 man/topn.Rd diff --git a/NAMESPACE b/NAMESPACE index 2112878f3..85c2de4c4 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -18,6 +18,7 @@ export(chmatch, "%chin%", chorder, chgroup) export(rbindlist) export(fifelse) export(fcase) +export(topn) export(fread) export(fwrite) export(foverlaps) diff --git a/R/wrappers.R b/R/wrappers.R index 5fec33a92..cbac2b4cd 100644 --- a/R/wrappers.R +++ b/R/wrappers.R @@ -7,6 +7,7 @@ setcoalesce = function(...) .Call(Ccoalesce, list(...), TRUE) fifelse = function(test, yes, no, na=NA) .Call(CfifelseR, test, yes, no, na) fcase = function(..., default=NA) .Call(CfcaseR, default, parent.frame(), as.list(substitute(list(...)))[-1L]) +topn = function(vec, n=6L, decreasing=FALSE) .Call(CtopnR, vec, n, decreasing) colnamesInt = function(x, cols, check_dups=FALSE) .Call(CcolnamesInt, x, cols, check_dups) coerceFill = function(x) .Call(CcoerceFillR, x) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index f6d17a076..518963949 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -16770,6 +16770,87 @@ test(2132.2, fifelse(TRUE, 1, s2), error = "S4 class objects (except nanot test(2132.3, fcase(TRUE, s1, FALSE, s2), error = "S4 class objects (except nanotime) are not supported. Please see https://github.com/Rdatatable/data.table/issues/4131.") rm(s1, s2, class2132) +# topn, #3804 +x0 = c(3L, 2L, 10L, NA_integer_, 1L, 1L, NA_integer_, NA_integer_, 10L, 20L, 20L, 20L, 30L) +x1 = as.numeric(x0) +x2 = c(NA_integer_, NA_integer_, NA_integer_) +x3 = as.numeric(x2) +x4 = as.raw(c(1,2,3)) + +class2134 = setClass("class2134", slots=list(x="numeric")) +s1 = class2134(x=20191231) + +test(2134.001, topn(x0, 1L), order(x0)[1:1]) +test(2134.002, topn(x0, 2L), order(x0)[1:2]) +test(2134.003, topn(x0, 3L), order(x0)[1:3]) +test(2134.004, topn(x0, 4L), order(x0)[1:4]) +test(2134.005, topn(x0, 5L), order(x0)[1:5]) +test(2134.006, topn(x0, 6L), order(x0)[1:6]) +test(2134.007, topn(x0, 7L), order(x0)[1:7]) +test(2134.008, topn(x0, 8L), order(x0)[1:8]) +test(2134.009, topn(x0, 9L), order(x0)[1:9]) +test(2134.010, topn(x0, 10L), order(x0)[1:10]) +test(2134.011, topn(x0, 11L), order(x0)[1:11]) +test(2134.012, topn(x0, 12L), order(x0)[1:12]) +test(2134.013, topn(x0, 13L), order(x0)[1:13]) +test(2134.014, topn(x1, 1L), order(x1)[1:1]) +test(2134.015, topn(x1, 2L), order(x1)[1:2]) +test(2134.016, topn(x1, 3L), order(x1)[1:3]) +test(2134.017, topn(x1, 4L), order(x1)[1:4]) +test(2134.018, topn(x1, 5L), order(x1)[1:5]) +test(2134.019, topn(x1, 6L), order(x1)[1:6]) +test(2134.020, topn(x1, 7L), order(x1)[1:7]) +test(2134.021, topn(x1, 8L), order(x1)[1:8]) +test(2134.022, topn(x1, 9L), order(x1)[1:9]) +test(2134.023, topn(x1, 10L), order(x1)[1:10]) +test(2134.024, topn(x1, 11L), order(x1)[1:11]) +test(2134.025, topn(x1, 12L), order(x1)[1:12]) +test(2134.026, topn(x1, 13L), order(x1)[1:13]) +test(2134.027, topn(x2, 1L), order(x2)[1:1]) +test(2134.028, topn(x2, 2L), order(x2)[1:2]) +test(2134.029, topn(x2, 3L), order(x2)[1:3]) +test(2134.030, topn(x3, 1L), order(x3)[1:1]) +test(2134.031, topn(x3, 2L), order(x3)[1:2]) +test(2134.032, topn(x3, 3L), order(x3)[1:3]) +test(2134.001, topn(x0, 1L, decreasing=TRUE), order(x0, decreasing=TRUE)[1:1]) +test(2134.002, topn(x0, 2L, decreasing=TRUE), order(x0, decreasing=TRUE)[1:2]) +test(2134.003, topn(x0, 3L, decreasing=TRUE), order(x0, decreasing=TRUE)[1:3]) +test(2134.004, topn(x0, 4L, decreasing=TRUE), order(x0, decreasing=TRUE)[1:4]) +test(2134.005, topn(x0, 5L, decreasing=TRUE), order(x0, decreasing=TRUE)[1:5]) +test(2134.006, topn(x0, 6L, decreasing=TRUE), order(x0, decreasing=TRUE)[1:6]) +test(2134.007, topn(x0, 7L, decreasing=TRUE), order(x0, decreasing=TRUE)[1:7]) +test(2134.008, topn(x0, 8L, decreasing=TRUE), order(x0, decreasing=TRUE)[1:8]) +test(2134.009, topn(x0, 9L, decreasing=TRUE), order(x0, decreasing=TRUE)[1:9]) +test(2134.010, topn(x0, 10L, decreasing=TRUE), order(x0, decreasing=TRUE)[1:10]) +test(2134.011, topn(x0, 11L, decreasing=TRUE), order(x0, decreasing=TRUE)[1:11]) +test(2134.012, topn(x0, 12L, decreasing=TRUE), order(x0, decreasing=TRUE)[1:12]) +test(2134.013, topn(x0, 13L, decreasing=TRUE), order(x0, decreasing=TRUE)[1:13]) +test(2134.014, topn(x1, 1L, decreasing=TRUE), order(x1, decreasing=TRUE)[1:1]) +test(2134.015, topn(x1, 2L, decreasing=TRUE), order(x1, decreasing=TRUE)[1:2]) +test(2134.016, topn(x1, 3L, decreasing=TRUE), order(x1, decreasing=TRUE)[1:3]) +test(2134.017, topn(x1, 4L, decreasing=TRUE), order(x1, decreasing=TRUE)[1:4]) +test(2134.018, topn(x1, 5L, decreasing=TRUE), order(x1, decreasing=TRUE)[1:5]) +test(2134.019, topn(x1, 6L, decreasing=TRUE), order(x1, decreasing=TRUE)[1:6]) +test(2134.020, topn(x1, 7L, decreasing=TRUE), order(x1, decreasing=TRUE)[1:7]) +test(2134.021, topn(x1, 8L, decreasing=TRUE), order(x1, decreasing=TRUE)[1:8]) +test(2134.022, topn(x1, 9L, decreasing=TRUE), order(x1, decreasing=TRUE)[1:9]) +test(2134.023, topn(x1, 10L, decreasing=TRUE), order(x1, decreasing=TRUE)[1:10]) +test(2134.024, topn(x1, 11L, decreasing=TRUE), order(x1, decreasing=TRUE)[1:11]) +test(2134.025, topn(x1, 12L, decreasing=TRUE), order(x1, decreasing=TRUE)[1:12]) +test(2134.026, topn(x1, 13L, decreasing=TRUE), order(x1, decreasing=TRUE)[1:13]) +test(2134.027, topn(x2, 1L, decreasing=TRUE), order(x2, decreasing=TRUE)[1:1]) +test(2134.028, topn(x2, 2L, decreasing=TRUE), order(x2, decreasing=TRUE)[1:2]) +test(2134.029, topn(x2, 3L, decreasing=TRUE), order(x2, decreasing=TRUE)[1:3]) +test(2134.030, topn(x3, 1L, decreasing=TRUE), order(x3, decreasing=TRUE)[1:1]) +test(2134.031, topn(x3, 2L, decreasing=TRUE), order(x3, decreasing=TRUE)[1:2]) +test(2134.032, topn(x3, 3L, decreasing=TRUE), order(x3, decreasing=TRUE)[1:3]) +test(2134.033, topn(x0, -1L), error = "Please enter a positive integer larger or equal to 1.") +test(2134.034, topn(x0, 1001L), error = "Function 'topn' is not built for large value of 'n'. The algorithm is made for small values. Please prefer the 'order' if you want to proceed with such large value.") +test(2134.035, topn(x0, 100L), error = "'n' cannot be larger than length of 'vec'.") +test(2134.036, topn(x0, 10L, decreasing = NA), error = "Argument 'decreasing' must be TRUE or FALSE and length 1.") +test(2134.037, topn(s1, 10L, decreasing = NA), error = "S4 class objects (excluding nanotime) are not supported.") +test(2134.038, topn(x4, 2L), error = "Type raw is not supported.") +rm(s1, class2134) ######################## # Add new tests here # diff --git a/man/topn.Rd b/man/topn.Rd new file mode 100644 index 000000000..a882f4313 --- /dev/null +++ b/man/topn.Rd @@ -0,0 +1,26 @@ +\name{topn} +\alias{topn} +\title{ Top N values index} +\description{ +The function \code{topn} is equivalent to \code{base} function \code{order} when used like \code{order(vec)[1:n]}. \code{topn} must be much faster than \code{order} for \code{n < 1000}. +} +\usage{ + topn(vec, n=6L, decreasing=FALSE) +} +\arguments{ + \item{vec}{ A numeric vector of type double or integer. Other types are not supported. } + \item{n}{ An positve integer value greater or equal to 1. Maximum value is 1000. } + \item{decreasing}{ A logical value to indicate whether to sort \code{vec} by decreasing or increasing value. Equivalent to argument \code{decreasing} in function \code{base::order}. Default value is \code{FALSE}.} +} +\value{ +Return the index of the top \code{n} value (positive of negative) in vector \code{vec}. +} +\examples{ +# Example 1: + +# Example 2: + +# Example 3: + +} +\keyword{ data } diff --git a/src/data.table.h b/src/data.table.h index f2687f52e..ba3d33139 100644 --- a/src/data.table.h +++ b/src/data.table.h @@ -242,3 +242,4 @@ SEXP testMsgR(SEXP status, SEXP x, SEXP k); //fifelse.c SEXP fifelseR(SEXP l, SEXP a, SEXP b, SEXP na); SEXP fcaseR(SEXP na, SEXP rho, SEXP args); +SEXP topnR(SEXP vec, SEXP n, SEXP dec); \ No newline at end of file diff --git a/src/fifelse.c b/src/fifelse.c index efae03a6b..87ce44dfd 100644 --- a/src/fifelse.c +++ b/src/fifelse.c @@ -343,3 +343,193 @@ SEXP fcaseR(SEXP na, SEXP rho, SEXP args) { UNPROTECT(nprotect); return ans; } + +SEXP topnR(SEXP vec, SEXP n, SEXP dec) { + int nprotect = 0; + int64_t i, j, idx = 0; + const int64_t len0 = asInteger(n); + const int64_t len1 = xlength(vec); + + if (isS4(vec) && !INHERITS(vec, char_nanotime)) { + error("S4 class objects (excluding nanotime) are not supported."); + } + if (len0 > 1000) { + error("Function 'topn' is not built for large value of 'n'. The algorithm is made for small values. Please prefer the 'order' if you want to proceed with such large value."); + } + if (len0 > len1) { + error("'n' cannot be larger than length of 'vec'."); + } + if (len0 < 1) { + error("Please enter a positive integer larger or equal to 1."); + } + if (!IS_TRUE_OR_FALSE(dec)) { + error("Argument 'decreasing' must be TRUE or FALSE and length 1."); + } + + const bool vdec = LOGICAL(dec)[0]; + SEXPTYPE tvec = TYPEOF(vec); + SEXP ans = PROTECT(allocVector(INTSXP, len0)); nprotect++; + int *restrict pans = INTEGER(ans); + int tmp; + + if (vdec) { + switch(tvec) { + case INTSXP: { + const int *restrict pvec = INTEGER(vec); + int min_value = pvec[0]; + for (i = 0; i < len0; ++i) { + pans[i] = i; + if (pvec[i] <= min_value || pvec[i] == NA_INTEGER) { + min_value = pvec[i]; + idx = i; + } + } + for (i = len0; i < len1; ++i) { + if (pvec[i] == NA_INTEGER) { + continue; + } + if (pvec[i] > min_value) { + min_value = pvec[i]; + pans[idx] = i; + for (j = 0; j pvec[pans[j]] || (min_value == pvec[pans[j]] && pans[idx] < pans[j])) || pvec[pans[j]] == NA_INTEGER) { + min_value = pvec[pans[j]]; + idx = j; + } + } + } + } + for (i = 0; i < len0; ++i) { + tmp = pans[i]; + for (j = i; j > 0 && (pvec[tmp] > pvec[pans[j-1]] || (pvec[tmp] == pvec[pans[j-1]] && tmp < pans[j-1])); --j) { + pans[j] = pans[j-1]; + } + pans[j] = tmp; + } + for (i =0; i < len0; ++i) { + pans[i]++; + } + } break; + case REALSXP: { + const double *restrict pvec = REAL(vec); + double min_value = pvec[0]; + for (i = 0; i < len0; ++i) { + pans[i] = i; + if (pvec[i] <= min_value || ISNAN(pvec[i])) { + min_value = pvec[i]; + idx = i; + } + } + for (i = len0; i < len1; ++i) { + if (ISNAN(pvec[i])) { + continue; + } + if (pvec[i] > min_value || ISNAN(min_value)) { + min_value = pvec[i]; + pans[idx] = i; + for (j = 0; j pvec[pans[j]] || (min_value == pvec[pans[j]] && pans[idx] < pans[j])) || ISNAN(pvec[pans[j]])) { + min_value = pvec[pans[j]]; + idx = j; + } + } + } + } + for (i = 0; i < len0; ++i) { + tmp = pans[i]; + for (j = i; j > 0 && (pvec[tmp] > pvec[pans[j-1]] || (pvec[tmp] == pvec[pans[j-1]] && tmp < pans[j-1]) || (!ISNAN(pvec[tmp]) && ISNAN(pvec[pans[j-1]]))); --j) { + pans[j] = pans[j-1]; + } + pans[j] = tmp; + } + for (i =0; i < len0; ++i) { + pans[i]++; + } + } break; + default: + error("Type %s is not supported.", type2char(tvec)); + } + } else { + switch(tvec) { + case INTSXP: { + const int *restrict pvec = INTEGER(vec); + int min_value = pvec[0]; + for (i = 0; i < len0; ++i) { + pans[i] = i; + if ((pvec[i] >= min_value && min_value != NA_INTEGER) || pvec[i] == NA_INTEGER) { + min_value = pvec[i]; + idx = i; + } + } + for (i = len0; i < len1; ++i) { + if (pvec[i] == NA_INTEGER) { + continue; + } + if (pvec[i] < min_value || min_value == NA_INTEGER) { + min_value = pvec[i]; + pans[idx] = i; + for (j = 0; j 0 && (pvec[tmp] < pvec[pans[j-1]] || (pvec[tmp] == pvec[pans[j-1]] && tmp < pans[j-1]) || pvec[pans[j-1]] == NA_INTEGER); --j) { + pans[j] = pans[j-1]; + } + pans[j] = tmp; + } + for (i =0; i < len0; ++i) { + pans[i]++; + } + } break; + case REALSXP: { + const double *restrict pvec = REAL(vec); + double min_value = pvec[0]; + for (i = 0; i < len0; ++i) { + pans[i] = i; + if (pvec[i] >= min_value || ISNAN(pvec[i])) { + min_value = pvec[i]; + idx = i; + } + } + for (i = len0; i < len1; ++i) { + if (ISNAN(pvec[i])) { + continue; + } + if (pvec[i] < min_value || ISNAN(min_value)) { + min_value = pvec[i]; + pans[idx] = i; + for (j = 0; j 0 && (pvec[tmp] < pvec[pans[j-1]] || (pvec[tmp] == pvec[pans[j-1]] && tmp < pans[j-1]) || (!ISNAN(pvec[tmp]) && ISNAN(pvec[pans[j-1]]))); --j) { + pans[j] = pans[j-1]; + } + pans[j] = tmp; + } + for (i =0; i < len0; ++i) { + pans[i]++; + } + } break; + default: + error("Type %s is not supported.", type2char(tvec)); + } + } + UNPROTECT(nprotect); + return ans; +} diff --git a/src/init.c b/src/init.c index aed2da3db..bd197fcd0 100644 --- a/src/init.c +++ b/src/init.c @@ -53,6 +53,7 @@ SEXP chmatchdup_R(); SEXP chin_R(); SEXP fifelseR(); SEXP fcaseR(); +SEXP topnR(); SEXP freadR(); SEXP fwriteR(); SEXP reorder(); @@ -205,6 +206,7 @@ R_CallMethodDef callMethods[] = { {"Ccoalesce", (DL_FUNC) &coalesce, -1}, {"CfifelseR", (DL_FUNC) &fifelseR, -1}, {"CfcaseR", (DL_FUNC) &fcaseR, -1}, +{"CtopnR", (DL_FUNC) &topnR, -1}, {"C_lock", (DL_FUNC) &lock, -1}, // _ for these 3 to avoid Clock as in time {"C_unlock", (DL_FUNC) &unlock, -1}, {"C_islocked", (DL_FUNC) &islockedR, -1}, From aae01582e480cf5e9297d3b09b22064d6766156a Mon Sep 17 00:00:00 2001 From: 2005m Date: Sat, 18 Jan 2020 13:02:04 +0000 Subject: [PATCH 02/18] Update tests with correct numbers --- inst/tests/tests.Rraw | 76 +++++++++++++++++++++---------------------- 1 file changed, 38 insertions(+), 38 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 518963949..d6d839064 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -16812,44 +16812,44 @@ test(2134.029, topn(x2, 3L), order(x2)[1:3]) test(2134.030, topn(x3, 1L), order(x3)[1:1]) test(2134.031, topn(x3, 2L), order(x3)[1:2]) test(2134.032, topn(x3, 3L), order(x3)[1:3]) -test(2134.001, topn(x0, 1L, decreasing=TRUE), order(x0, decreasing=TRUE)[1:1]) -test(2134.002, topn(x0, 2L, decreasing=TRUE), order(x0, decreasing=TRUE)[1:2]) -test(2134.003, topn(x0, 3L, decreasing=TRUE), order(x0, decreasing=TRUE)[1:3]) -test(2134.004, topn(x0, 4L, decreasing=TRUE), order(x0, decreasing=TRUE)[1:4]) -test(2134.005, topn(x0, 5L, decreasing=TRUE), order(x0, decreasing=TRUE)[1:5]) -test(2134.006, topn(x0, 6L, decreasing=TRUE), order(x0, decreasing=TRUE)[1:6]) -test(2134.007, topn(x0, 7L, decreasing=TRUE), order(x0, decreasing=TRUE)[1:7]) -test(2134.008, topn(x0, 8L, decreasing=TRUE), order(x0, decreasing=TRUE)[1:8]) -test(2134.009, topn(x0, 9L, decreasing=TRUE), order(x0, decreasing=TRUE)[1:9]) -test(2134.010, topn(x0, 10L, decreasing=TRUE), order(x0, decreasing=TRUE)[1:10]) -test(2134.011, topn(x0, 11L, decreasing=TRUE), order(x0, decreasing=TRUE)[1:11]) -test(2134.012, topn(x0, 12L, decreasing=TRUE), order(x0, decreasing=TRUE)[1:12]) -test(2134.013, topn(x0, 13L, decreasing=TRUE), order(x0, decreasing=TRUE)[1:13]) -test(2134.014, topn(x1, 1L, decreasing=TRUE), order(x1, decreasing=TRUE)[1:1]) -test(2134.015, topn(x1, 2L, decreasing=TRUE), order(x1, decreasing=TRUE)[1:2]) -test(2134.016, topn(x1, 3L, decreasing=TRUE), order(x1, decreasing=TRUE)[1:3]) -test(2134.017, topn(x1, 4L, decreasing=TRUE), order(x1, decreasing=TRUE)[1:4]) -test(2134.018, topn(x1, 5L, decreasing=TRUE), order(x1, decreasing=TRUE)[1:5]) -test(2134.019, topn(x1, 6L, decreasing=TRUE), order(x1, decreasing=TRUE)[1:6]) -test(2134.020, topn(x1, 7L, decreasing=TRUE), order(x1, decreasing=TRUE)[1:7]) -test(2134.021, topn(x1, 8L, decreasing=TRUE), order(x1, decreasing=TRUE)[1:8]) -test(2134.022, topn(x1, 9L, decreasing=TRUE), order(x1, decreasing=TRUE)[1:9]) -test(2134.023, topn(x1, 10L, decreasing=TRUE), order(x1, decreasing=TRUE)[1:10]) -test(2134.024, topn(x1, 11L, decreasing=TRUE), order(x1, decreasing=TRUE)[1:11]) -test(2134.025, topn(x1, 12L, decreasing=TRUE), order(x1, decreasing=TRUE)[1:12]) -test(2134.026, topn(x1, 13L, decreasing=TRUE), order(x1, decreasing=TRUE)[1:13]) -test(2134.027, topn(x2, 1L, decreasing=TRUE), order(x2, decreasing=TRUE)[1:1]) -test(2134.028, topn(x2, 2L, decreasing=TRUE), order(x2, decreasing=TRUE)[1:2]) -test(2134.029, topn(x2, 3L, decreasing=TRUE), order(x2, decreasing=TRUE)[1:3]) -test(2134.030, topn(x3, 1L, decreasing=TRUE), order(x3, decreasing=TRUE)[1:1]) -test(2134.031, topn(x3, 2L, decreasing=TRUE), order(x3, decreasing=TRUE)[1:2]) -test(2134.032, topn(x3, 3L, decreasing=TRUE), order(x3, decreasing=TRUE)[1:3]) -test(2134.033, topn(x0, -1L), error = "Please enter a positive integer larger or equal to 1.") -test(2134.034, topn(x0, 1001L), error = "Function 'topn' is not built for large value of 'n'. The algorithm is made for small values. Please prefer the 'order' if you want to proceed with such large value.") -test(2134.035, topn(x0, 100L), error = "'n' cannot be larger than length of 'vec'.") -test(2134.036, topn(x0, 10L, decreasing = NA), error = "Argument 'decreasing' must be TRUE or FALSE and length 1.") -test(2134.037, topn(s1, 10L, decreasing = NA), error = "S4 class objects (excluding nanotime) are not supported.") -test(2134.038, topn(x4, 2L), error = "Type raw is not supported.") +test(2134.033, topn(x0, 1L, decreasing=TRUE), order(x0, decreasing=TRUE)[1:1]) +test(2134.034, topn(x0, 2L, decreasing=TRUE), order(x0, decreasing=TRUE)[1:2]) +test(2134.035, topn(x0, 3L, decreasing=TRUE), order(x0, decreasing=TRUE)[1:3]) +test(2134.036, topn(x0, 4L, decreasing=TRUE), order(x0, decreasing=TRUE)[1:4]) +test(2134.037, topn(x0, 5L, decreasing=TRUE), order(x0, decreasing=TRUE)[1:5]) +test(2134.038, topn(x0, 6L, decreasing=TRUE), order(x0, decreasing=TRUE)[1:6]) +test(2134.039, topn(x0, 7L, decreasing=TRUE), order(x0, decreasing=TRUE)[1:7]) +test(2134.040, topn(x0, 8L, decreasing=TRUE), order(x0, decreasing=TRUE)[1:8]) +test(2134.041, topn(x0, 9L, decreasing=TRUE), order(x0, decreasing=TRUE)[1:9]) +test(2134.042, topn(x0, 10L, decreasing=TRUE), order(x0, decreasing=TRUE)[1:10]) +test(2134.043, topn(x0, 11L, decreasing=TRUE), order(x0, decreasing=TRUE)[1:11]) +test(2134.044, topn(x0, 12L, decreasing=TRUE), order(x0, decreasing=TRUE)[1:12]) +test(2134.045, topn(x0, 13L, decreasing=TRUE), order(x0, decreasing=TRUE)[1:13]) +test(2134.046, topn(x1, 1L, decreasing=TRUE), order(x1, decreasing=TRUE)[1:1]) +test(2134.047, topn(x1, 2L, decreasing=TRUE), order(x1, decreasing=TRUE)[1:2]) +test(2134.048, topn(x1, 3L, decreasing=TRUE), order(x1, decreasing=TRUE)[1:3]) +test(2134.049, topn(x1, 4L, decreasing=TRUE), order(x1, decreasing=TRUE)[1:4]) +test(2134.050, topn(x1, 5L, decreasing=TRUE), order(x1, decreasing=TRUE)[1:5]) +test(2134.051, topn(x1, 6L, decreasing=TRUE), order(x1, decreasing=TRUE)[1:6]) +test(2134.052, topn(x1, 7L, decreasing=TRUE), order(x1, decreasing=TRUE)[1:7]) +test(2134.053, topn(x1, 8L, decreasing=TRUE), order(x1, decreasing=TRUE)[1:8]) +test(2134.054, topn(x1, 9L, decreasing=TRUE), order(x1, decreasing=TRUE)[1:9]) +test(2134.055, topn(x1, 10L, decreasing=TRUE), order(x1, decreasing=TRUE)[1:10]) +test(2134.056, topn(x1, 11L, decreasing=TRUE), order(x1, decreasing=TRUE)[1:11]) +test(2134.057, topn(x1, 12L, decreasing=TRUE), order(x1, decreasing=TRUE)[1:12]) +test(2134.058, topn(x1, 13L, decreasing=TRUE), order(x1, decreasing=TRUE)[1:13]) +test(2134.060, topn(x2, 1L, decreasing=TRUE), order(x2, decreasing=TRUE)[1:1]) +test(2134.061, topn(x2, 2L, decreasing=TRUE), order(x2, decreasing=TRUE)[1:2]) +test(2134.062, topn(x2, 3L, decreasing=TRUE), order(x2, decreasing=TRUE)[1:3]) +test(2134.063, topn(x3, 1L, decreasing=TRUE), order(x3, decreasing=TRUE)[1:1]) +test(2134.064, topn(x3, 2L, decreasing=TRUE), order(x3, decreasing=TRUE)[1:2]) +test(2134.065, topn(x3, 3L, decreasing=TRUE), order(x3, decreasing=TRUE)[1:3]) +test(2134.066, topn(x0, -1L), error = "Please enter a positive integer larger or equal to 1.") +test(2134.067, topn(x0, 1001L), error = "Function 'topn' is not built for large value of 'n'. The algorithm is made for small values. Please prefer the 'order' if you want to proceed with such large value.") +test(2134.068, topn(x0, 100L), error = "'n' cannot be larger than length of 'vec'.") +test(2134.069, topn(x0, 10L, decreasing = NA), error = "Argument 'decreasing' must be TRUE or FALSE and length 1.") +test(2134.070, topn(s1, 10L, decreasing = NA), error = "S4 class objects (excluding nanotime) are not supported.") +test(2134.071, topn(x4, 2L), error = "Type raw is not supported.") rm(s1, class2134) ######################## From dfbfb284a1874833f1e3443d3c4c415c38cb853f Mon Sep 17 00:00:00 2001 From: 2005m Date: Sat, 18 Jan 2020 13:24:01 +0000 Subject: [PATCH 03/18] Missing test --- inst/tests/tests.Rraw | 1 + 1 file changed, 1 insertion(+) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index d6d839064..ff6ebc8c0 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -16850,6 +16850,7 @@ test(2134.068, topn(x0, 100L), error = "'n' cannot be larger than length of 'vec test(2134.069, topn(x0, 10L, decreasing = NA), error = "Argument 'decreasing' must be TRUE or FALSE and length 1.") test(2134.070, topn(s1, 10L, decreasing = NA), error = "S4 class objects (excluding nanotime) are not supported.") test(2134.071, topn(x4, 2L), error = "Type raw is not supported.") +test(2134.072, topn(x4, 2L, decreasing = TRUE), error = "Type raw is not supported.") rm(s1, class2134) ######################## From 051886d9fd153e372bb06f519f7bdae2fa673e6d Mon Sep 17 00:00:00 2001 From: 2005m Date: Sat, 18 Jan 2020 18:25:14 +0000 Subject: [PATCH 04/18] Update NEWS.md --- NEWS.md | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/NEWS.md b/NEWS.md index 2d607e9ba..2bd18455b 100644 --- a/NEWS.md +++ b/NEWS.md @@ -73,6 +73,33 @@ unit = "s") 10. The dimensions of objects in a `list` column are now displayed, [#3671](https://github.com/Rdatatable/data.table/issues/3671). Thanks to @randomgambit for the request, and Tyson Barrett for the PR. +11. `topn(vec ,n=6L, decreasing=FALSE)` implemented in C by Morgan Jacob, [#3804](https://github.com/Rdatatable/data.table/issues/3804), is inspired by `dplyr::top_n`. It returns the top largest or smallest `n` values for a given numeric vector `vec`. Please see `?topn` for more details. + +```R +set.seed(123) +x = rnorm(5e7) # 382 MB +microbenchmark::microbenchmark( + topn(x, 6L), + order(x)[1:6], + times = 10L +) +# Unit: seconds +# expr min lq mean median uq max neval +# topn(x, 6L) 0.19 0.19 0.20 0.20 0.20 0.22 10 +# order(x)[1:6] 4.56 4.60 4.65 4.62 4.70 4.77 10 + +microbenchmark::microbenchmark( + x[topn(x, 6L)], + sort(x)[1:6], + times = 10L, + unit = "s" +) +# Unit: seconds +# expr min lq mean median uq max neval +# x[topn(x, 6L)] 0.19 0.20 0.20 0.20 0.20 0.21 10 +# sort(x)[1:6] 8.27 8.36 8.42 8.43 8.50 8.55 10 +``` + ## BUG FIXES 1. A NULL timezone on POSIXct was interpreted by `as.IDate` and `as.ITime` as UTC rather than the session's default timezone (`tz=""`) , [#4085](https://github.com/Rdatatable/data.table/issues/4085). From dd1772d641b4dd83c6ec2aacb0de36a8d38da217 Mon Sep 17 00:00:00 2001 From: 2005m Date: Sat, 18 Jan 2020 18:46:32 +0000 Subject: [PATCH 05/18] Update topn.Rd --- man/topn.Rd | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/man/topn.Rd b/man/topn.Rd index a882f4313..c5cbdf5c1 100644 --- a/man/topn.Rd +++ b/man/topn.Rd @@ -16,11 +16,18 @@ The function \code{topn} is equivalent to \code{base} function \code{order} when Return the index of the top \code{n} value (positive of negative) in vector \code{vec}. } \examples{ -# Example 1: +x = rnorm(1e6) -# Example 2: +# Example 1: index of top 6 negative values +topn(x, 6L) +order(x)[1:6] -# Example 3: +# Example 2: index of top 6 positive values +topn(x, 6L, decreasing = TRUE) +order(x, decreasing = TRUE)[1:6] +# Example 3: top 6 negative values +x[topn(x, 6L)] +sort(x)[1:6] } \keyword{ data } From 535bae88dc80b0cc053e9514d2ad56e5488e3816 Mon Sep 17 00:00:00 2001 From: 2005m Date: Sun, 19 Jan 2020 17:47:51 +0000 Subject: [PATCH 06/18] limit n to length of vec --- src/fifelse.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/fifelse.c b/src/fifelse.c index 87ce44dfd..50dfee45f 100644 --- a/src/fifelse.c +++ b/src/fifelse.c @@ -347,7 +347,7 @@ SEXP fcaseR(SEXP na, SEXP rho, SEXP args) { SEXP topnR(SEXP vec, SEXP n, SEXP dec) { int nprotect = 0; int64_t i, j, idx = 0; - const int64_t len0 = asInteger(n); + int len0 = asInteger(n); const int64_t len1 = xlength(vec); if (isS4(vec) && !INHERITS(vec, char_nanotime)) { @@ -357,7 +357,8 @@ SEXP topnR(SEXP vec, SEXP n, SEXP dec) { error("Function 'topn' is not built for large value of 'n'. The algorithm is made for small values. Please prefer the 'order' if you want to proceed with such large value."); } if (len0 > len1) { - error("'n' cannot be larger than length of 'vec'."); + warning("'n' is larger than length of 'vec'. 'n' will be set to length of 'vec'."); + len0 = len1; } if (len0 < 1) { error("Please enter a positive integer larger or equal to 1."); From 656ff756cc459519eb4ecd1966a77d9701c6843b Mon Sep 17 00:00:00 2001 From: 2005m Date: Sun, 19 Jan 2020 17:50:21 +0000 Subject: [PATCH 07/18] Update tests.Rraw --- inst/tests/tests.Rraw | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index ff6ebc8c0..3deebb2c0 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -16846,7 +16846,7 @@ test(2134.064, topn(x3, 2L, decreasing=TRUE), order(x3, decreasing=TRUE)[1:2]) test(2134.065, topn(x3, 3L, decreasing=TRUE), order(x3, decreasing=TRUE)[1:3]) test(2134.066, topn(x0, -1L), error = "Please enter a positive integer larger or equal to 1.") test(2134.067, topn(x0, 1001L), error = "Function 'topn' is not built for large value of 'n'. The algorithm is made for small values. Please prefer the 'order' if you want to proceed with such large value.") -test(2134.068, topn(x0, 100L), error = "'n' cannot be larger than length of 'vec'.") +test(2134.068, topn(x0, 100L), error = "'n' is larger than length of 'vec'. 'n' will be set to length of 'vec'.") test(2134.069, topn(x0, 10L, decreasing = NA), error = "Argument 'decreasing' must be TRUE or FALSE and length 1.") test(2134.070, topn(s1, 10L, decreasing = NA), error = "S4 class objects (excluding nanotime) are not supported.") test(2134.071, topn(x4, 2L), error = "Type raw is not supported.") From 4e24c8fefcc4a2c1ef444573a565be41a87ce465 Mon Sep 17 00:00:00 2001 From: 2005m Date: Sun, 19 Jan 2020 18:01:00 +0000 Subject: [PATCH 08/18] Update NEWS.md --- NEWS.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/NEWS.md b/NEWS.md index 2bd18455b..733b2e4cf 100644 --- a/NEWS.md +++ b/NEWS.md @@ -90,14 +90,14 @@ microbenchmark::microbenchmark( microbenchmark::microbenchmark( x[topn(x, 6L)], - sort(x)[1:6], + sort(x, partial = 1:6)[1:6], times = 10L, unit = "s" ) # Unit: seconds -# expr min lq mean median uq max neval -# x[topn(x, 6L)] 0.19 0.20 0.20 0.20 0.20 0.21 10 -# sort(x)[1:6] 8.27 8.36 8.42 8.43 8.50 8.55 10 +# expr min lq mean median uq max neval +# x[topn(x, 6L)] 0.19 0.20 0.20 0.20 0.20 0.21 10 +# sort(x, partial = 1:6)[1:6] 1.20 1.22 1.23 1.24 1.25 1.27 10 ``` ## BUG FIXES From 8322e9b4d70fe01552c4507e833dbc0a6c7ab16c Mon Sep 17 00:00:00 2001 From: 2005m Date: Sun, 19 Jan 2020 18:04:50 +0000 Subject: [PATCH 09/18] warning instead or error! --- inst/tests/tests.Rraw | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 3deebb2c0..63e27b4c2 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -16846,7 +16846,7 @@ test(2134.064, topn(x3, 2L, decreasing=TRUE), order(x3, decreasing=TRUE)[1:2]) test(2134.065, topn(x3, 3L, decreasing=TRUE), order(x3, decreasing=TRUE)[1:3]) test(2134.066, topn(x0, -1L), error = "Please enter a positive integer larger or equal to 1.") test(2134.067, topn(x0, 1001L), error = "Function 'topn' is not built for large value of 'n'. The algorithm is made for small values. Please prefer the 'order' if you want to proceed with such large value.") -test(2134.068, topn(x0, 100L), error = "'n' is larger than length of 'vec'. 'n' will be set to length of 'vec'.") +test(2134.068, topn(x0, 100L), warning = "'n' is larger than length of 'vec'. 'n' will be set to length of 'vec'.") test(2134.069, topn(x0, 10L, decreasing = NA), error = "Argument 'decreasing' must be TRUE or FALSE and length 1.") test(2134.070, topn(s1, 10L, decreasing = NA), error = "S4 class objects (excluding nanotime) are not supported.") test(2134.071, topn(x4, 2L), error = "Type raw is not supported.") From 8f3c977a791d2d483303ac37b97fe49ae113f0e1 Mon Sep 17 00:00:00 2001 From: 2005m Date: Sun, 19 Jan 2020 18:54:38 +0000 Subject: [PATCH 10/18] Update test --- inst/tests/tests.Rraw | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 63e27b4c2..da304753d 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -16778,7 +16778,7 @@ x3 = as.numeric(x2) x4 = as.raw(c(1,2,3)) class2134 = setClass("class2134", slots=list(x="numeric")) -s1 = class2134(x=20191231) +s1 = class2134(x=20191231)ķ test(2134.001, topn(x0, 1L), order(x0)[1:1]) test(2134.002, topn(x0, 2L), order(x0)[1:2]) @@ -16846,7 +16846,7 @@ test(2134.064, topn(x3, 2L, decreasing=TRUE), order(x3, decreasing=TRUE)[1:2]) test(2134.065, topn(x3, 3L, decreasing=TRUE), order(x3, decreasing=TRUE)[1:3]) test(2134.066, topn(x0, -1L), error = "Please enter a positive integer larger or equal to 1.") test(2134.067, topn(x0, 1001L), error = "Function 'topn' is not built for large value of 'n'. The algorithm is made for small values. Please prefer the 'order' if you want to proceed with such large value.") -test(2134.068, topn(x0, 100L), warning = "'n' is larger than length of 'vec'. 'n' will be set to length of 'vec'.") +test(2134.068, topn(x0, 100L), order(x0)[1:13], warning = "'n' is larger than length of 'vec'. 'n' will be set to length of 'vec'.") test(2134.069, topn(x0, 10L, decreasing = NA), error = "Argument 'decreasing' must be TRUE or FALSE and length 1.") test(2134.070, topn(s1, 10L, decreasing = NA), error = "S4 class objects (excluding nanotime) are not supported.") test(2134.071, topn(x4, 2L), error = "Type raw is not supported.") From ded01ce98ddb2cc6458acfbab69197a61f1d7923 Mon Sep 17 00:00:00 2001 From: 2005m Date: Sun, 19 Jan 2020 19:12:29 +0000 Subject: [PATCH 11/18] I have fat fingers --- inst/tests/tests.Rraw | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index da304753d..5a210df4d 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -16778,7 +16778,7 @@ x3 = as.numeric(x2) x4 = as.raw(c(1,2,3)) class2134 = setClass("class2134", slots=list(x="numeric")) -s1 = class2134(x=20191231)ķ +s1 = class2134(x=20191231) test(2134.001, topn(x0, 1L), order(x0)[1:1]) test(2134.002, topn(x0, 2L), order(x0)[1:2]) From d2d192129981df1d88c23490098a36caf24cc178 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Tue, 21 Jan 2020 17:32:50 +0800 Subject: [PATCH 12/18] reworked NEWS --- NEWS.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/NEWS.md b/NEWS.md index 733b2e4cf..390b9602a 100644 --- a/NEWS.md +++ b/NEWS.md @@ -73,7 +73,7 @@ unit = "s") 10. The dimensions of objects in a `list` column are now displayed, [#3671](https://github.com/Rdatatable/data.table/issues/3671). Thanks to @randomgambit for the request, and Tyson Barrett for the PR. -11. `topn(vec ,n=6L, decreasing=FALSE)` implemented in C by Morgan Jacob, [#3804](https://github.com/Rdatatable/data.table/issues/3804), is inspired by `dplyr::top_n`. It returns the top largest or smallest `n` values for a given numeric vector `vec`. Please see `?topn` for more details. +11. New function `topn`, implemented in C by Morgan Jacob, [#3804](https://github.com/Rdatatable/data.table/issues/3804). It returns the top largest or smallest `n` values for a given numeric vector `vec`. Please see `?topn` for more details. Similar to `dplyr::top_n`. ```R set.seed(123) From 6f322e450952a06c99f09ff15192d2347969b5c2 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Tue, 21 Jan 2020 17:55:11 +0800 Subject: [PATCH 13/18] update manual --- man/topn.Rd | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/man/topn.Rd b/man/topn.Rd index c5cbdf5c1..3bfa1d2ff 100644 --- a/man/topn.Rd +++ b/man/topn.Rd @@ -2,18 +2,20 @@ \alias{topn} \title{ Top N values index} \description{ -The function \code{topn} is equivalent to \code{base} function \code{order} when used like \code{order(vec)[1:n]}. \code{topn} must be much faster than \code{order} for \code{n < 1000}. + \code{topn} is used to get the indices of the few values of an input. This is an extension of \code{\link{which.max}}/\code{\link{which.min}} which provide \emph{only} the first such index. + + The output is the same as \code{order(vec)[1:n]}, but internally optimized not to sort the irrelevant elements of the input (and therefore much faster, for small \code{n} relative to input size). } \usage{ topn(vec, n=6L, decreasing=FALSE) } \arguments{ - \item{vec}{ A numeric vector of type double or integer. Other types are not supported. } - \item{n}{ An positve integer value greater or equal to 1. Maximum value is 1000. } - \item{decreasing}{ A logical value to indicate whether to sort \code{vec} by decreasing or increasing value. Equivalent to argument \code{decreasing} in function \code{base::order}. Default value is \code{FALSE}.} + \item{vec}{ A numeric vector of type numeric or integer. Other types are not supported yet. } + \item{n}{ A positive integer value greater or equal to 1. Maximum value is 1000. } + \item{decreasing}{ A logical value (default \code{FALSE}) to indicate whether to sort \code{vec} in decreasing or increasing order. Equivalent to argument \code{decreasing} in function \code{base::order}. } } \value{ -Return the index of the top \code{n} value (positive of negative) in vector \code{vec}. + \code{integer} vector of indices of the most extreme (according to \code{decreasing}) \code{n} values in vector \code{vec}. } \examples{ x = rnorm(1e6) From c22c140dd659eb84288118c99ac3cf875d18140a Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Tue, 21 Jan 2020 17:56:11 +0800 Subject: [PATCH 14/18] trailing newline --- src/data.table.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/data.table.h b/src/data.table.h index ba3d33139..3cc0573f4 100644 --- a/src/data.table.h +++ b/src/data.table.h @@ -242,4 +242,4 @@ SEXP testMsgR(SEXP status, SEXP x, SEXP k); //fifelse.c SEXP fifelseR(SEXP l, SEXP a, SEXP b, SEXP na); SEXP fcaseR(SEXP na, SEXP rho, SEXP args); -SEXP topnR(SEXP vec, SEXP n, SEXP dec); \ No newline at end of file +SEXP topnR(SEXP vec, SEXP n, SEXP dec); From af37e1c2044cd8770c0fcc5bfb16a0f3e5006e7e Mon Sep 17 00:00:00 2001 From: 2005m Date: Tue, 21 Jan 2020 10:42:23 +0000 Subject: [PATCH 15/18] Update fifelse.c --- src/fifelse.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fifelse.c b/src/fifelse.c index 50dfee45f..b885ea97d 100644 --- a/src/fifelse.c +++ b/src/fifelse.c @@ -354,7 +354,7 @@ SEXP topnR(SEXP vec, SEXP n, SEXP dec) { error("S4 class objects (excluding nanotime) are not supported."); } if (len0 > 1000) { - error("Function 'topn' is not built for large value of 'n'. The algorithm is made for small values. Please prefer the 'order' if you want to proceed with such large value."); + error("Function 'topn' is not built for large value of 'n'. The algorithm is made for small values. Please prefer the 'order' function if you want to proceed with such large value."); } if (len0 > len1) { warning("'n' is larger than length of 'vec'. 'n' will be set to length of 'vec'."); From 815729afd6562d400f218d1c3c16030a492299e2 Mon Sep 17 00:00:00 2001 From: 2005m Date: Tue, 21 Jan 2020 10:45:26 +0000 Subject: [PATCH 16/18] Update tests --- inst/tests/tests.Rraw | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 5a210df4d..b4ff7ae83 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -16845,7 +16845,7 @@ test(2134.063, topn(x3, 1L, decreasing=TRUE), order(x3, decreasing=TRUE)[1:1]) test(2134.064, topn(x3, 2L, decreasing=TRUE), order(x3, decreasing=TRUE)[1:2]) test(2134.065, topn(x3, 3L, decreasing=TRUE), order(x3, decreasing=TRUE)[1:3]) test(2134.066, topn(x0, -1L), error = "Please enter a positive integer larger or equal to 1.") -test(2134.067, topn(x0, 1001L), error = "Function 'topn' is not built for large value of 'n'. The algorithm is made for small values. Please prefer the 'order' if you want to proceed with such large value.") +test(2134.067, topn(x0, 1001L), error = "Function 'topn' is not built for large value of 'n'. The algorithm is made for small values. Please prefer the 'order' function if you want to proceed with such large value.") test(2134.068, topn(x0, 100L), order(x0)[1:13], warning = "'n' is larger than length of 'vec'. 'n' will be set to length of 'vec'.") test(2134.069, topn(x0, 10L, decreasing = NA), error = "Argument 'decreasing' must be TRUE or FALSE and length 1.") test(2134.070, topn(s1, 10L, decreasing = NA), error = "S4 class objects (excluding nanotime) are not supported.") From 467c2c4052004bff8e8b4fa42e066e6f396e12f3 Mon Sep 17 00:00:00 2001 From: 2005m Date: Tue, 21 Jan 2020 20:53:56 +0000 Subject: [PATCH 17/18] 8whitespaceTO2whitespace --- src/fifelse.c | 374 +++++++++++++++++++++++++------------------------- 1 file changed, 187 insertions(+), 187 deletions(-) diff --git a/src/fifelse.c b/src/fifelse.c index b885ea97d..fb3d39569 100644 --- a/src/fifelse.c +++ b/src/fifelse.c @@ -345,192 +345,192 @@ SEXP fcaseR(SEXP na, SEXP rho, SEXP args) { } SEXP topnR(SEXP vec, SEXP n, SEXP dec) { - int nprotect = 0; - int64_t i, j, idx = 0; - int len0 = asInteger(n); - const int64_t len1 = xlength(vec); - - if (isS4(vec) && !INHERITS(vec, char_nanotime)) { - error("S4 class objects (excluding nanotime) are not supported."); - } - if (len0 > 1000) { - error("Function 'topn' is not built for large value of 'n'. The algorithm is made for small values. Please prefer the 'order' function if you want to proceed with such large value."); - } - if (len0 > len1) { - warning("'n' is larger than length of 'vec'. 'n' will be set to length of 'vec'."); - len0 = len1; - } - if (len0 < 1) { - error("Please enter a positive integer larger or equal to 1."); - } - if (!IS_TRUE_OR_FALSE(dec)) { - error("Argument 'decreasing' must be TRUE or FALSE and length 1."); - } - - const bool vdec = LOGICAL(dec)[0]; - SEXPTYPE tvec = TYPEOF(vec); - SEXP ans = PROTECT(allocVector(INTSXP, len0)); nprotect++; - int *restrict pans = INTEGER(ans); - int tmp; + int nprotect = 0; + int64_t i, j, idx = 0; + int len0 = asInteger(n); + const int64_t len1 = xlength(vec); + + if (isS4(vec) && !INHERITS(vec, char_nanotime)) { + error("S4 class objects (excluding nanotime) are not supported."); + } + if (len0 > 1000) { + error("Function 'topn' is not built for large value of 'n'. The algorithm is made for small values. Please prefer the 'order' function if you want to proceed with such large value."); + } + if (len0 > len1) { + warning("'n' is larger than length of 'vec'. 'n' will be set to length of 'vec'."); + len0 = len1; + } + if (len0 < 1) { + error("Please enter a positive integer larger or equal to 1."); + } + if (!IS_TRUE_OR_FALSE(dec)) { + error("Argument 'decreasing' must be TRUE or FALSE and length 1."); + } + + const bool vdec = LOGICAL(dec)[0]; + SEXPTYPE tvec = TYPEOF(vec); + SEXP ans = PROTECT(allocVector(INTSXP, len0)); nprotect++; + int *restrict pans = INTEGER(ans); + int tmp; - if (vdec) { - switch(tvec) { - case INTSXP: { - const int *restrict pvec = INTEGER(vec); - int min_value = pvec[0]; - for (i = 0; i < len0; ++i) { - pans[i] = i; - if (pvec[i] <= min_value || pvec[i] == NA_INTEGER) { - min_value = pvec[i]; - idx = i; - } - } - for (i = len0; i < len1; ++i) { - if (pvec[i] == NA_INTEGER) { - continue; - } - if (pvec[i] > min_value) { - min_value = pvec[i]; - pans[idx] = i; - for (j = 0; j pvec[pans[j]] || (min_value == pvec[pans[j]] && pans[idx] < pans[j])) || pvec[pans[j]] == NA_INTEGER) { - min_value = pvec[pans[j]]; - idx = j; - } - } - } - } - for (i = 0; i < len0; ++i) { - tmp = pans[i]; - for (j = i; j > 0 && (pvec[tmp] > pvec[pans[j-1]] || (pvec[tmp] == pvec[pans[j-1]] && tmp < pans[j-1])); --j) { - pans[j] = pans[j-1]; - } - pans[j] = tmp; - } - for (i =0; i < len0; ++i) { - pans[i]++; - } - } break; - case REALSXP: { - const double *restrict pvec = REAL(vec); - double min_value = pvec[0]; - for (i = 0; i < len0; ++i) { - pans[i] = i; - if (pvec[i] <= min_value || ISNAN(pvec[i])) { - min_value = pvec[i]; - idx = i; - } - } - for (i = len0; i < len1; ++i) { - if (ISNAN(pvec[i])) { - continue; - } - if (pvec[i] > min_value || ISNAN(min_value)) { - min_value = pvec[i]; - pans[idx] = i; - for (j = 0; j pvec[pans[j]] || (min_value == pvec[pans[j]] && pans[idx] < pans[j])) || ISNAN(pvec[pans[j]])) { - min_value = pvec[pans[j]]; - idx = j; - } - } - } - } - for (i = 0; i < len0; ++i) { - tmp = pans[i]; - for (j = i; j > 0 && (pvec[tmp] > pvec[pans[j-1]] || (pvec[tmp] == pvec[pans[j-1]] && tmp < pans[j-1]) || (!ISNAN(pvec[tmp]) && ISNAN(pvec[pans[j-1]]))); --j) { - pans[j] = pans[j-1]; - } - pans[j] = tmp; - } - for (i =0; i < len0; ++i) { - pans[i]++; - } - } break; - default: - error("Type %s is not supported.", type2char(tvec)); - } - } else { - switch(tvec) { - case INTSXP: { - const int *restrict pvec = INTEGER(vec); - int min_value = pvec[0]; - for (i = 0; i < len0; ++i) { - pans[i] = i; - if ((pvec[i] >= min_value && min_value != NA_INTEGER) || pvec[i] == NA_INTEGER) { - min_value = pvec[i]; - idx = i; - } - } - for (i = len0; i < len1; ++i) { - if (pvec[i] == NA_INTEGER) { - continue; - } - if (pvec[i] < min_value || min_value == NA_INTEGER) { - min_value = pvec[i]; - pans[idx] = i; - for (j = 0; j 0 && (pvec[tmp] < pvec[pans[j-1]] || (pvec[tmp] == pvec[pans[j-1]] && tmp < pans[j-1]) || pvec[pans[j-1]] == NA_INTEGER); --j) { - pans[j] = pans[j-1]; - } - pans[j] = tmp; - } - for (i =0; i < len0; ++i) { - pans[i]++; - } - } break; - case REALSXP: { - const double *restrict pvec = REAL(vec); - double min_value = pvec[0]; - for (i = 0; i < len0; ++i) { - pans[i] = i; - if (pvec[i] >= min_value || ISNAN(pvec[i])) { - min_value = pvec[i]; - idx = i; - } - } - for (i = len0; i < len1; ++i) { - if (ISNAN(pvec[i])) { - continue; - } - if (pvec[i] < min_value || ISNAN(min_value)) { - min_value = pvec[i]; - pans[idx] = i; - for (j = 0; j 0 && (pvec[tmp] < pvec[pans[j-1]] || (pvec[tmp] == pvec[pans[j-1]] && tmp < pans[j-1]) || (!ISNAN(pvec[tmp]) && ISNAN(pvec[pans[j-1]]))); --j) { - pans[j] = pans[j-1]; - } - pans[j] = tmp; - } - for (i =0; i < len0; ++i) { - pans[i]++; - } - } break; - default: - error("Type %s is not supported.", type2char(tvec)); - } - } - UNPROTECT(nprotect); - return ans; + if (vdec) { + switch(tvec) { + case INTSXP: { + const int *restrict pvec = INTEGER(vec); + int min_value = pvec[0]; + for (i = 0; i < len0; ++i) { + pans[i] = i; + if (pvec[i] <= min_value || pvec[i] == NA_INTEGER) { + min_value = pvec[i]; + idx = i; + } + } + for (i = len0; i < len1; ++i) { + if (pvec[i] == NA_INTEGER) { + continue; + } + if (pvec[i] > min_value) { + min_value = pvec[i]; + pans[idx] = i; + for (j = 0; j pvec[pans[j]] || (min_value == pvec[pans[j]] && pans[idx] < pans[j])) || pvec[pans[j]] == NA_INTEGER) { + min_value = pvec[pans[j]]; + idx = j; + } + } + } + } + for (i = 0; i < len0; ++i) { + tmp = pans[i]; + for (j = i; j > 0 && (pvec[tmp] > pvec[pans[j-1]] || (pvec[tmp] == pvec[pans[j-1]] && tmp < pans[j-1])); --j) { + pans[j] = pans[j-1]; + } + pans[j] = tmp; + } + for (i =0; i < len0; ++i) { + pans[i]++; + } + } break; + case REALSXP: { + const double *restrict pvec = REAL(vec); + double min_value = pvec[0]; + for (i = 0; i < len0; ++i) { + pans[i] = i; + if (pvec[i] <= min_value || ISNAN(pvec[i])) { + min_value = pvec[i]; + idx = i; + } + } + for (i = len0; i < len1; ++i) { + if (ISNAN(pvec[i])) { + continue; + } + if (pvec[i] > min_value || ISNAN(min_value)) { + min_value = pvec[i]; + pans[idx] = i; + for (j = 0; j pvec[pans[j]] || (min_value == pvec[pans[j]] && pans[idx] < pans[j])) || ISNAN(pvec[pans[j]])) { + min_value = pvec[pans[j]]; + idx = j; + } + } + } + } + for (i = 0; i < len0; ++i) { + tmp = pans[i]; + for (j = i; j > 0 && (pvec[tmp] > pvec[pans[j-1]] || (pvec[tmp] == pvec[pans[j-1]] && tmp < pans[j-1]) || (!ISNAN(pvec[tmp]) && ISNAN(pvec[pans[j-1]]))); --j) { + pans[j] = pans[j-1]; + } + pans[j] = tmp; + } + for (i =0; i < len0; ++i) { + pans[i]++; + } + } break; + default: + error("Type %s is not supported.", type2char(tvec)); + } + } else { + switch(tvec) { + case INTSXP: { + const int *restrict pvec = INTEGER(vec); + int min_value = pvec[0]; + for (i = 0; i < len0; ++i) { + pans[i] = i; + if ((pvec[i] >= min_value && min_value != NA_INTEGER) || pvec[i] == NA_INTEGER) { + min_value = pvec[i]; + idx = i; + } + } + for (i = len0; i < len1; ++i) { + if (pvec[i] == NA_INTEGER) { + continue; + } + if (pvec[i] < min_value || min_value == NA_INTEGER) { + min_value = pvec[i]; + pans[idx] = i; + for (j = 0; j 0 && (pvec[tmp] < pvec[pans[j-1]] || (pvec[tmp] == pvec[pans[j-1]] && tmp < pans[j-1]) || pvec[pans[j-1]] == NA_INTEGER); --j) { + pans[j] = pans[j-1]; + } + pans[j] = tmp; + } + for (i =0; i < len0; ++i) { + pans[i]++; + } + } break; + case REALSXP: { + const double *restrict pvec = REAL(vec); + double min_value = pvec[0]; + for (i = 0; i < len0; ++i) { + pans[i] = i; + if (pvec[i] >= min_value || ISNAN(pvec[i])) { + min_value = pvec[i]; + idx = i; + } + } + for (i = len0; i < len1; ++i) { + if (ISNAN(pvec[i])) { + continue; + } + if (pvec[i] < min_value || ISNAN(min_value)) { + min_value = pvec[i]; + pans[idx] = i; + for (j = 0; j 0 && (pvec[tmp] < pvec[pans[j-1]] || (pvec[tmp] == pvec[pans[j-1]] && tmp < pans[j-1]) || (!ISNAN(pvec[tmp]) && ISNAN(pvec[pans[j-1]]))); --j) { + pans[j] = pans[j-1]; + } + pans[j] = tmp; + } + for (i =0; i < len0; ++i) { + pans[i]++; + } + } break; + default: + error("Type %s is not supported.", type2char(tvec)); + } + } + UNPROTECT(nprotect); + return ans; } From f34b7592f26dad9a325a3d64c0689d622a2316ad Mon Sep 17 00:00:00 2001 From: 2005m Date: Tue, 3 Mar 2020 12:31:47 +0000 Subject: [PATCH 18/18] Update error warning --- src/fifelse.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/fifelse.c b/src/fifelse.c index fb3d39569..178ca25e7 100644 --- a/src/fifelse.c +++ b/src/fifelse.c @@ -351,20 +351,20 @@ SEXP topnR(SEXP vec, SEXP n, SEXP dec) { const int64_t len1 = xlength(vec); if (isS4(vec) && !INHERITS(vec, char_nanotime)) { - error("S4 class objects (excluding nanotime) are not supported."); + error(_("S4 class objects (excluding nanotime) are not supported.")); } if (len0 > 1000) { - error("Function 'topn' is not built for large value of 'n'. The algorithm is made for small values. Please prefer the 'order' function if you want to proceed with such large value."); + error(_("Function 'topn' is not built for large value of 'n'. The algorithm is made for small values. Please prefer the 'order' function if you want to proceed with such large value.")); } if (len0 > len1) { - warning("'n' is larger than length of 'vec'. 'n' will be set to length of 'vec'."); + warning(_("'n' is larger than length of 'vec'. 'n' will be set to length of 'vec'.")); len0 = len1; } if (len0 < 1) { - error("Please enter a positive integer larger or equal to 1."); + error(_("Please enter a positive integer larger or equal to 1.")); } if (!IS_TRUE_OR_FALSE(dec)) { - error("Argument 'decreasing' must be TRUE or FALSE and length 1."); + error(_("Argument 'decreasing' must be TRUE or FALSE and length 1.")); } const bool vdec = LOGICAL(dec)[0]; @@ -448,7 +448,7 @@ SEXP topnR(SEXP vec, SEXP n, SEXP dec) { } } break; default: - error("Type %s is not supported.", type2char(tvec)); + error(_("Type %s is not supported."), type2char(tvec)); } } else { switch(tvec) { @@ -528,7 +528,7 @@ SEXP topnR(SEXP vec, SEXP n, SEXP dec) { } } break; default: - error("Type %s is not supported.", type2char(tvec)); + error(_("Type %s is not supported."), type2char(tvec)); } } UNPROTECT(nprotect);