From e89872e54a91b06bf22c8f7cc26da7ddb1f2e991 Mon Sep 17 00:00:00 2001
From: Suzanne Jin <suzanne.jin.yao@gmail.com>
Date: Sat, 14 Sep 2024 16:50:02 +0200
Subject: [PATCH] further speed up graflex

---
 NEWS.md              |  6 +++-
 R/3-shared-graflex.R | 10 ++----
 R/RcppExports.R      |  8 +++--
 src/RcppExports.cpp  | 20 ++++++++---
 src/graflex.cpp      | 83 +++++++++++++++++++++++++-------------------
 5 files changed, 78 insertions(+), 49 deletions(-)

diff --git a/NEWS.md b/NEWS.md
index b37c800..c9e7e89 100755
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,4 +1,8 @@
-## propr 5.1.1
+# propr 5.1.2
+---------------------
+* Restructured `graflex` to speed up
+
+# propr 5.1.1
 ---------------------
 * Speed up `graflex` related functions
 
diff --git a/R/3-shared-graflex.R b/R/3-shared-graflex.R
index 01d7dc7..aa3de97 100644
--- a/R/3-shared-graflex.R
+++ b/R/3-shared-graflex.R
@@ -21,19 +21,15 @@ runGraflex <- function(A, K, p=100, ncores=1) {
     stop("'A' must be a square matrix.")
 
   if (ncores == 1){
-
     # for each knowledge network, calculate odds ratio and FDR
     res <- lapply(1:ncol(K), function(k) {
-      Gk <- K[, k] %*% t(K[, k])      # converts the k column into an adjacency matrix (genes x genes)
-      graflex(A, Gk, p=p)  # this calls the graflex function implemented in Rcpp C++
+      graflex(A, K[,k], p=p)  # this calls the modified graflex function implemented in Rcpp C++
     })
-
-  }else{
+  } else {
     packageCheck("parallel")
     cl <- parallel::makeCluster(ncores)
     res <- parallel::parLapply(cl, 1:ncol(K), function(k) {
-      Gk <- K[, k] %*% t(K[, k])
-      graflex(A, Gk, p=p)
+      graflex(A, K[,k], p=p)
     })
     parallel::stopCluster(cl)
   }
diff --git a/R/RcppExports.R b/R/RcppExports.R
index 8e08159..c4f3197 100644
--- a/R/RcppExports.R
+++ b/R/RcppExports.R
@@ -121,8 +121,12 @@ getFDR <- function(actual, permuted) {
     .Call(`_propr_getFDR`, actual, permuted)
 }
 
-graflex <- function(A, G, p = 100L) {
-    .Call(`_propr_graflex`, A, G, p)
+getG <- function(Gk) {
+    .Call(`_propr_getG`, Gk)
+}
+
+graflex <- function(A, Gk, p = 100L) {
+    .Call(`_propr_graflex`, A, Gk, p)
 }
 
 lr2vlr <- function(lr) {
diff --git a/src/RcppExports.cpp b/src/RcppExports.cpp
index c5fd893..c4b173c 100644
--- a/src/RcppExports.cpp
+++ b/src/RcppExports.cpp
@@ -367,16 +367,27 @@ BEGIN_RCPP
     return rcpp_result_gen;
 END_RCPP
 }
+// getG
+IntegerMatrix getG(const IntegerVector& Gk);
+RcppExport SEXP _propr_getG(SEXP GkSEXP) {
+BEGIN_RCPP
+    Rcpp::RObject rcpp_result_gen;
+    Rcpp::RNGScope rcpp_rngScope_gen;
+    Rcpp::traits::input_parameter< const IntegerVector& >::type Gk(GkSEXP);
+    rcpp_result_gen = Rcpp::wrap(getG(Gk));
+    return rcpp_result_gen;
+END_RCPP
+}
 // graflex
-NumericVector graflex(const IntegerMatrix& A, const IntegerMatrix& G, int p);
-RcppExport SEXP _propr_graflex(SEXP ASEXP, SEXP GSEXP, SEXP pSEXP) {
+NumericVector graflex(const IntegerMatrix& A, const IntegerVector& Gk, int p);
+RcppExport SEXP _propr_graflex(SEXP ASEXP, SEXP GkSEXP, SEXP pSEXP) {
 BEGIN_RCPP
     Rcpp::RObject rcpp_result_gen;
     Rcpp::RNGScope rcpp_rngScope_gen;
     Rcpp::traits::input_parameter< const IntegerMatrix& >::type A(ASEXP);
-    Rcpp::traits::input_parameter< const IntegerMatrix& >::type G(GSEXP);
+    Rcpp::traits::input_parameter< const IntegerVector& >::type Gk(GkSEXP);
     Rcpp::traits::input_parameter< int >::type p(pSEXP);
-    rcpp_result_gen = Rcpp::wrap(graflex(A, G, p));
+    rcpp_result_gen = Rcpp::wrap(graflex(A, Gk, p));
     return rcpp_result_gen;
 END_RCPP
 }
@@ -510,6 +521,7 @@ static const R_CallMethodDef CallEntries[] = {
     {"_propr_getORperm", (DL_FUNC) &_propr_getORperm, 3},
     {"_propr_permuteOR", (DL_FUNC) &_propr_permuteOR, 3},
     {"_propr_getFDR", (DL_FUNC) &_propr_getFDR, 2},
+    {"_propr_getG", (DL_FUNC) &_propr_getG, 1},
     {"_propr_graflex", (DL_FUNC) &_propr_graflex, 3},
     {"_propr_lr2vlr", (DL_FUNC) &_propr_lr2vlr, 1},
     {"_propr_lr2phi", (DL_FUNC) &_propr_lr2phi, 1},
diff --git a/src/graflex.cpp b/src/graflex.cpp
index 2247388..19887ef 100644
--- a/src/graflex.cpp
+++ b/src/graflex.cpp
@@ -2,58 +2,49 @@
 #include <numeric>
 using namespace Rcpp;
 
-// Function to calculate the contingency table and the odds ratio
+// Optimized function to calculate the contingency table and the odds ratio
 // [[Rcpp::export]]
 NumericVector getOR(const IntegerMatrix& A, const IntegerMatrix& G) {
   int ncol = A.ncol();
-
-  // calculate the contingency table
   int a = 0, b = 0, c = 0, d = 0;
-  for (int j = 0; j < ncol; ++j) {
-      for (int i = j+1; i < ncol; ++i) {
-          if (A(i, j) == 0) {
-              if (G(i, j) == 0) ++a;  // not in A and not in G
-              else ++b;               // not in A but in G
-          } else {
-              if (G(i, j) == 0) ++c;  // in A but not in G
-              else ++d;               // in A and in G
-          }
-      }
+
+  for (int j = 0; j < ncol - 1; ++j) {
+    for (int i = j + 1; i < ncol; ++i) {
+      int a_val = A(i, j), g_val = G(i, j);
+      a += (1 - a_val) * (1 - g_val);
+      b += (1 - a_val) * g_val;
+      c += a_val * (1 - g_val);
+      d += a_val * g_val;
+    }
   }
 
-  // calculate the odds ratio
   double odds_ratio = static_cast<double>(a * d) / (b * c);
+  double log_odds_ratio = std::log(odds_ratio);
 
-  return NumericVector::create(
-    a, b, c, d, odds_ratio, std::log(odds_ratio), R_NaN, R_NaN
-  );
+  return NumericVector::create(a, b, c, d, odds_ratio, log_odds_ratio, R_NaN, R_NaN);
 }
 
-// Function to calculate the contingency table and the odds ratio, given a permuted index vector
+// Optimized function to calculate the contingency table and the odds ratio, given a permuted index vector
 // [[Rcpp::export]]
 NumericVector getORperm(const IntegerMatrix& A, const IntegerMatrix& G, const IntegerVector& perm) {
   int ncol = A.ncol();
-
-  // calculate the contingency table
   int a = 0, b = 0, c = 0, d = 0;
-  for (int j = 0; j < ncol; ++j) {
-      for (int i = j+1; i < ncol; ++i) {
-          if (A(perm[i], perm[j]) == 0) {
-              if (G(i, j) == 0) ++a;  // not in A and not in G
-              else ++b;               // not in A but in G
-          } else {
-              if (G(i, j) == 0) ++c;  // in A but not in G
-              else ++d;               // in A and in G
-          }
-      }
+
+  for (int j = 0; j < ncol - 1; ++j) {
+    int pj = perm[j];
+    for (int i = j + 1; i < ncol; ++i) {
+      int a_val = A(perm[i], pj), g_val = G(i, j);
+      a += (1 - a_val) * (1 - g_val);
+      b += (1 - a_val) * g_val;
+      c += a_val * (1 - g_val);
+      d += a_val * g_val;
+    }
   }
 
-  // calculate the odds ratio
   double odds_ratio = static_cast<double>(a * d) / (b * c);
+  double log_odds_ratio = std::log(odds_ratio);
 
-  return NumericVector::create(
-    a, b, c, d, odds_ratio, std::log(odds_ratio), R_NaN, R_NaN
-  );
+  return NumericVector::create(a, b, c, d, odds_ratio, log_odds_ratio, R_NaN, R_NaN);
 }
 
 // Function to calculate the odds ratio and other relevant info for each permutation
@@ -99,9 +90,31 @@ List getFDR(double actual, const NumericVector& permuted) {
   );
 }
 
+// Function to calculate the G matrix from the Gk vector
+// [[Rcpp::export]]
+IntegerMatrix getG(const IntegerVector& Gk) {
+  int n = Gk.size();
+  IntegerMatrix G(n, n);
+  
+  for (int i = 0; i < n; ++i) {
+    int gi = Gk[i];
+    G(i, i) = gi * gi;
+    for (int j = 0; j < i; ++j) {
+      int value = gi * Gk[j];
+      G(i, j) = value;
+      G(j, i) = value;
+    }
+  }
+  
+  return G;
+}
+
 // Function to calculate the odds ratio and FDR, given the adjacency matrix A and the knowledge graph G
 // [[Rcpp::export]]
-NumericVector graflex(const IntegerMatrix& A, const IntegerMatrix& G, int p = 100) {
+NumericVector graflex(const IntegerMatrix& A, const IntegerVector& Gk, int p = 100) {
+
+  // Calculate Gk
+  IntegerMatrix G = getG(Gk);
 
   // get the actual odds ratio
   NumericVector actual = getOR(A, G);