From e89872e54a91b06bf22c8f7cc26da7ddb1f2e991 Mon Sep 17 00:00:00 2001 From: Suzanne Jin Date: Sat, 14 Sep 2024 16:50:02 +0200 Subject: [PATCH] further speed up graflex --- NEWS.md | 6 +++- R/3-shared-graflex.R | 10 ++---- R/RcppExports.R | 8 +++-- src/RcppExports.cpp | 20 ++++++++--- src/graflex.cpp | 83 +++++++++++++++++++++++++------------------- 5 files changed, 78 insertions(+), 49 deletions(-) diff --git a/NEWS.md b/NEWS.md index b37c800..c9e7e89 100755 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,8 @@ -## propr 5.1.1 +# propr 5.1.2 +--------------------- +* Restructured `graflex` to speed up + +# propr 5.1.1 --------------------- * Speed up `graflex` related functions diff --git a/R/3-shared-graflex.R b/R/3-shared-graflex.R index 01d7dc7..aa3de97 100644 --- a/R/3-shared-graflex.R +++ b/R/3-shared-graflex.R @@ -21,19 +21,15 @@ runGraflex <- function(A, K, p=100, ncores=1) { stop("'A' must be a square matrix.") if (ncores == 1){ - # for each knowledge network, calculate odds ratio and FDR res <- lapply(1:ncol(K), function(k) { - Gk <- K[, k] %*% t(K[, k]) # converts the k column into an adjacency matrix (genes x genes) - graflex(A, Gk, p=p) # this calls the graflex function implemented in Rcpp C++ + graflex(A, K[,k], p=p) # this calls the modified graflex function implemented in Rcpp C++ }) - - }else{ + } else { packageCheck("parallel") cl <- parallel::makeCluster(ncores) res <- parallel::parLapply(cl, 1:ncol(K), function(k) { - Gk <- K[, k] %*% t(K[, k]) - graflex(A, Gk, p=p) + graflex(A, K[,k], p=p) }) parallel::stopCluster(cl) } diff --git a/R/RcppExports.R b/R/RcppExports.R index 8e08159..c4f3197 100644 --- a/R/RcppExports.R +++ b/R/RcppExports.R @@ -121,8 +121,12 @@ getFDR <- function(actual, permuted) { .Call(`_propr_getFDR`, actual, permuted) } -graflex <- function(A, G, p = 100L) { - .Call(`_propr_graflex`, A, G, p) +getG <- function(Gk) { + .Call(`_propr_getG`, Gk) +} + +graflex <- function(A, Gk, p = 100L) { + .Call(`_propr_graflex`, A, Gk, p) } lr2vlr <- function(lr) { diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp index c5fd893..c4b173c 100644 --- a/src/RcppExports.cpp +++ b/src/RcppExports.cpp @@ -367,16 +367,27 @@ BEGIN_RCPP return rcpp_result_gen; END_RCPP } +// getG +IntegerMatrix getG(const IntegerVector& Gk); +RcppExport SEXP _propr_getG(SEXP GkSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< const IntegerVector& >::type Gk(GkSEXP); + rcpp_result_gen = Rcpp::wrap(getG(Gk)); + return rcpp_result_gen; +END_RCPP +} // graflex -NumericVector graflex(const IntegerMatrix& A, const IntegerMatrix& G, int p); -RcppExport SEXP _propr_graflex(SEXP ASEXP, SEXP GSEXP, SEXP pSEXP) { +NumericVector graflex(const IntegerMatrix& A, const IntegerVector& Gk, int p); +RcppExport SEXP _propr_graflex(SEXP ASEXP, SEXP GkSEXP, SEXP pSEXP) { BEGIN_RCPP Rcpp::RObject rcpp_result_gen; Rcpp::RNGScope rcpp_rngScope_gen; Rcpp::traits::input_parameter< const IntegerMatrix& >::type A(ASEXP); - Rcpp::traits::input_parameter< const IntegerMatrix& >::type G(GSEXP); + Rcpp::traits::input_parameter< const IntegerVector& >::type Gk(GkSEXP); Rcpp::traits::input_parameter< int >::type p(pSEXP); - rcpp_result_gen = Rcpp::wrap(graflex(A, G, p)); + rcpp_result_gen = Rcpp::wrap(graflex(A, Gk, p)); return rcpp_result_gen; END_RCPP } @@ -510,6 +521,7 @@ static const R_CallMethodDef CallEntries[] = { {"_propr_getORperm", (DL_FUNC) &_propr_getORperm, 3}, {"_propr_permuteOR", (DL_FUNC) &_propr_permuteOR, 3}, {"_propr_getFDR", (DL_FUNC) &_propr_getFDR, 2}, + {"_propr_getG", (DL_FUNC) &_propr_getG, 1}, {"_propr_graflex", (DL_FUNC) &_propr_graflex, 3}, {"_propr_lr2vlr", (DL_FUNC) &_propr_lr2vlr, 1}, {"_propr_lr2phi", (DL_FUNC) &_propr_lr2phi, 1}, diff --git a/src/graflex.cpp b/src/graflex.cpp index 2247388..19887ef 100644 --- a/src/graflex.cpp +++ b/src/graflex.cpp @@ -2,58 +2,49 @@ #include using namespace Rcpp; -// Function to calculate the contingency table and the odds ratio +// Optimized function to calculate the contingency table and the odds ratio // [[Rcpp::export]] NumericVector getOR(const IntegerMatrix& A, const IntegerMatrix& G) { int ncol = A.ncol(); - - // calculate the contingency table int a = 0, b = 0, c = 0, d = 0; - for (int j = 0; j < ncol; ++j) { - for (int i = j+1; i < ncol; ++i) { - if (A(i, j) == 0) { - if (G(i, j) == 0) ++a; // not in A and not in G - else ++b; // not in A but in G - } else { - if (G(i, j) == 0) ++c; // in A but not in G - else ++d; // in A and in G - } - } + + for (int j = 0; j < ncol - 1; ++j) { + for (int i = j + 1; i < ncol; ++i) { + int a_val = A(i, j), g_val = G(i, j); + a += (1 - a_val) * (1 - g_val); + b += (1 - a_val) * g_val; + c += a_val * (1 - g_val); + d += a_val * g_val; + } } - // calculate the odds ratio double odds_ratio = static_cast(a * d) / (b * c); + double log_odds_ratio = std::log(odds_ratio); - return NumericVector::create( - a, b, c, d, odds_ratio, std::log(odds_ratio), R_NaN, R_NaN - ); + return NumericVector::create(a, b, c, d, odds_ratio, log_odds_ratio, R_NaN, R_NaN); } -// Function to calculate the contingency table and the odds ratio, given a permuted index vector +// Optimized function to calculate the contingency table and the odds ratio, given a permuted index vector // [[Rcpp::export]] NumericVector getORperm(const IntegerMatrix& A, const IntegerMatrix& G, const IntegerVector& perm) { int ncol = A.ncol(); - - // calculate the contingency table int a = 0, b = 0, c = 0, d = 0; - for (int j = 0; j < ncol; ++j) { - for (int i = j+1; i < ncol; ++i) { - if (A(perm[i], perm[j]) == 0) { - if (G(i, j) == 0) ++a; // not in A and not in G - else ++b; // not in A but in G - } else { - if (G(i, j) == 0) ++c; // in A but not in G - else ++d; // in A and in G - } - } + + for (int j = 0; j < ncol - 1; ++j) { + int pj = perm[j]; + for (int i = j + 1; i < ncol; ++i) { + int a_val = A(perm[i], pj), g_val = G(i, j); + a += (1 - a_val) * (1 - g_val); + b += (1 - a_val) * g_val; + c += a_val * (1 - g_val); + d += a_val * g_val; + } } - // calculate the odds ratio double odds_ratio = static_cast(a * d) / (b * c); + double log_odds_ratio = std::log(odds_ratio); - return NumericVector::create( - a, b, c, d, odds_ratio, std::log(odds_ratio), R_NaN, R_NaN - ); + return NumericVector::create(a, b, c, d, odds_ratio, log_odds_ratio, R_NaN, R_NaN); } // Function to calculate the odds ratio and other relevant info for each permutation @@ -99,9 +90,31 @@ List getFDR(double actual, const NumericVector& permuted) { ); } +// Function to calculate the G matrix from the Gk vector +// [[Rcpp::export]] +IntegerMatrix getG(const IntegerVector& Gk) { + int n = Gk.size(); + IntegerMatrix G(n, n); + + for (int i = 0; i < n; ++i) { + int gi = Gk[i]; + G(i, i) = gi * gi; + for (int j = 0; j < i; ++j) { + int value = gi * Gk[j]; + G(i, j) = value; + G(j, i) = value; + } + } + + return G; +} + // Function to calculate the odds ratio and FDR, given the adjacency matrix A and the knowledge graph G // [[Rcpp::export]] -NumericVector graflex(const IntegerMatrix& A, const IntegerMatrix& G, int p = 100) { +NumericVector graflex(const IntegerMatrix& A, const IntegerVector& Gk, int p = 100) { + + // Calculate Gk + IntegerMatrix G = getG(Gk); // get the actual odds ratio NumericVector actual = getOR(A, G);