Skip to content

Commit

Permalink
Merge pull request #51 from suzannejin/master
Browse files Browse the repository at this point in the history
further speed up graflex
  • Loading branch information
suzannejin authored Sep 14, 2024
2 parents b557940 + e89872e commit 9c68bc5
Show file tree
Hide file tree
Showing 6 changed files with 170 additions and 223 deletions.
6 changes: 5 additions & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
## propr 5.1.1
# propr 5.1.2
---------------------
* Restructured `graflex` to speed up

# propr 5.1.1
---------------------
* Speed up `graflex` related functions

Expand Down
10 changes: 3 additions & 7 deletions R/3-shared-graflex.R
Original file line number Diff line number Diff line change
Expand Up @@ -21,19 +21,15 @@ runGraflex <- function(A, K, p=100, ncores=1) {
stop("'A' must be a square matrix.")

if (ncores == 1){

# for each knowledge network, calculate odds ratio and FDR
res <- lapply(1:ncol(K), function(k) {
Gk <- K[, k] %*% t(K[, k]) # converts the k column into an adjacency matrix (genes x genes)
graflex(A, Gk, p=p) # this calls the graflex function implemented in Rcpp C++
graflex(A, K[,k], p=p) # this calls the modified graflex function implemented in Rcpp C++
})

}else{
} else {
packageCheck("parallel")
cl <- parallel::makeCluster(ncores)
res <- parallel::parLapply(cl, 1:ncol(K), function(k) {
Gk <- K[, k] %*% t(K[, k])
graflex(A, Gk, p=p)
graflex(A, K[,k], p=p)
})
parallel::stopCluster(cl)
}
Expand Down
24 changes: 12 additions & 12 deletions R/RcppExports.R
Original file line number Diff line number Diff line change
Expand Up @@ -105,28 +105,28 @@ ctzRcpp <- function(X) {
.Call(`_propr_ctzRcpp`, X)
}

get_triangle <- function(mat) {
.Call(`_propr_get_triangle`, mat)
}

get_triangle_from_index <- function(mat, index) {
.Call(`_propr_get_triangle_from_index`, mat, index)
}

getOR <- function(A, G) {
.Call(`_propr_getOR`, A, G)
}

permuteOR <- function(A, Gstar, p = 100L) {
.Call(`_propr_permuteOR`, A, Gstar, p)
getORperm <- function(A, G, perm) {
.Call(`_propr_getORperm`, A, G, perm)
}

permuteOR <- function(A, G, p = 100L) {
.Call(`_propr_permuteOR`, A, G, p)
}

getFDR <- function(actual, permuted) {
.Call(`_propr_getFDR`, actual, permuted)
}

graflex <- function(A, G, p = 100L) {
.Call(`_propr_graflex`, A, G, p)
getG <- function(Gk) {
.Call(`_propr_getG`, Gk)
}

graflex <- function(A, Gk, p = 100L) {
.Call(`_propr_graflex`, A, Gk, p)
}

lr2vlr <- function(lr) {
Expand Down
67 changes: 34 additions & 33 deletions src/RcppExports.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -317,51 +317,41 @@ BEGIN_RCPP
return rcpp_result_gen;
END_RCPP
}
// get_triangle
IntegerVector get_triangle(const IntegerMatrix& mat);
RcppExport SEXP _propr_get_triangle(SEXP matSEXP) {
BEGIN_RCPP
Rcpp::RObject rcpp_result_gen;
Rcpp::RNGScope rcpp_rngScope_gen;
Rcpp::traits::input_parameter< const IntegerMatrix& >::type mat(matSEXP);
rcpp_result_gen = Rcpp::wrap(get_triangle(mat));
return rcpp_result_gen;
END_RCPP
}
// get_triangle_from_index
IntegerVector get_triangle_from_index(const IntegerMatrix& mat, const IntegerVector& index);
RcppExport SEXP _propr_get_triangle_from_index(SEXP matSEXP, SEXP indexSEXP) {
// getOR
NumericVector getOR(const IntegerMatrix& A, const IntegerMatrix& G);
RcppExport SEXP _propr_getOR(SEXP ASEXP, SEXP GSEXP) {
BEGIN_RCPP
Rcpp::RObject rcpp_result_gen;
Rcpp::RNGScope rcpp_rngScope_gen;
Rcpp::traits::input_parameter< const IntegerMatrix& >::type mat(matSEXP);
Rcpp::traits::input_parameter< const IntegerVector& >::type index(indexSEXP);
rcpp_result_gen = Rcpp::wrap(get_triangle_from_index(mat, index));
Rcpp::traits::input_parameter< const IntegerMatrix& >::type A(ASEXP);
Rcpp::traits::input_parameter< const IntegerMatrix& >::type G(GSEXP);
rcpp_result_gen = Rcpp::wrap(getOR(A, G));
return rcpp_result_gen;
END_RCPP
}
// getOR
NumericVector getOR(const IntegerVector& A, const IntegerVector& G);
RcppExport SEXP _propr_getOR(SEXP ASEXP, SEXP GSEXP) {
// getORperm
NumericVector getORperm(const IntegerMatrix& A, const IntegerMatrix& G, const IntegerVector& perm);
RcppExport SEXP _propr_getORperm(SEXP ASEXP, SEXP GSEXP, SEXP permSEXP) {
BEGIN_RCPP
Rcpp::RObject rcpp_result_gen;
Rcpp::RNGScope rcpp_rngScope_gen;
Rcpp::traits::input_parameter< const IntegerVector& >::type A(ASEXP);
Rcpp::traits::input_parameter< const IntegerVector& >::type G(GSEXP);
rcpp_result_gen = Rcpp::wrap(getOR(A, G));
Rcpp::traits::input_parameter< const IntegerMatrix& >::type A(ASEXP);
Rcpp::traits::input_parameter< const IntegerMatrix& >::type G(GSEXP);
Rcpp::traits::input_parameter< const IntegerVector& >::type perm(permSEXP);
rcpp_result_gen = Rcpp::wrap(getORperm(A, G, perm));
return rcpp_result_gen;
END_RCPP
}
// permuteOR
NumericMatrix permuteOR(const IntegerMatrix& A, const IntegerVector& Gstar, int p);
RcppExport SEXP _propr_permuteOR(SEXP ASEXP, SEXP GstarSEXP, SEXP pSEXP) {
NumericMatrix permuteOR(const IntegerMatrix& A, const IntegerMatrix& G, int p);
RcppExport SEXP _propr_permuteOR(SEXP ASEXP, SEXP GSEXP, SEXP pSEXP) {
BEGIN_RCPP
Rcpp::RObject rcpp_result_gen;
Rcpp::RNGScope rcpp_rngScope_gen;
Rcpp::traits::input_parameter< const IntegerMatrix& >::type A(ASEXP);
Rcpp::traits::input_parameter< const IntegerVector& >::type Gstar(GstarSEXP);
Rcpp::traits::input_parameter< const IntegerMatrix& >::type G(GSEXP);
Rcpp::traits::input_parameter< int >::type p(pSEXP);
rcpp_result_gen = Rcpp::wrap(permuteOR(A, Gstar, p));
rcpp_result_gen = Rcpp::wrap(permuteOR(A, G, p));
return rcpp_result_gen;
END_RCPP
}
Expand All @@ -377,16 +367,27 @@ BEGIN_RCPP
return rcpp_result_gen;
END_RCPP
}
// getG
IntegerMatrix getG(const IntegerVector& Gk);
RcppExport SEXP _propr_getG(SEXP GkSEXP) {
BEGIN_RCPP
Rcpp::RObject rcpp_result_gen;
Rcpp::RNGScope rcpp_rngScope_gen;
Rcpp::traits::input_parameter< const IntegerVector& >::type Gk(GkSEXP);
rcpp_result_gen = Rcpp::wrap(getG(Gk));
return rcpp_result_gen;
END_RCPP
}
// graflex
NumericVector graflex(const IntegerMatrix& A, const IntegerMatrix& G, int p);
RcppExport SEXP _propr_graflex(SEXP ASEXP, SEXP GSEXP, SEXP pSEXP) {
NumericVector graflex(const IntegerMatrix& A, const IntegerVector& Gk, int p);
RcppExport SEXP _propr_graflex(SEXP ASEXP, SEXP GkSEXP, SEXP pSEXP) {
BEGIN_RCPP
Rcpp::RObject rcpp_result_gen;
Rcpp::RNGScope rcpp_rngScope_gen;
Rcpp::traits::input_parameter< const IntegerMatrix& >::type A(ASEXP);
Rcpp::traits::input_parameter< const IntegerMatrix& >::type G(GSEXP);
Rcpp::traits::input_parameter< const IntegerVector& >::type Gk(GkSEXP);
Rcpp::traits::input_parameter< int >::type p(pSEXP);
rcpp_result_gen = Rcpp::wrap(graflex(A, G, p));
rcpp_result_gen = Rcpp::wrap(graflex(A, Gk, p));
return rcpp_result_gen;
END_RCPP
}
Expand Down Expand Up @@ -516,11 +517,11 @@ static const R_CallMethodDef CallEntries[] = {
{"_propr_count_less_equal_than", (DL_FUNC) &_propr_count_less_equal_than, 2},
{"_propr_count_greater_equal_than", (DL_FUNC) &_propr_count_greater_equal_than, 2},
{"_propr_ctzRcpp", (DL_FUNC) &_propr_ctzRcpp, 1},
{"_propr_get_triangle", (DL_FUNC) &_propr_get_triangle, 1},
{"_propr_get_triangle_from_index", (DL_FUNC) &_propr_get_triangle_from_index, 2},
{"_propr_getOR", (DL_FUNC) &_propr_getOR, 2},
{"_propr_getORperm", (DL_FUNC) &_propr_getORperm, 3},
{"_propr_permuteOR", (DL_FUNC) &_propr_permuteOR, 3},
{"_propr_getFDR", (DL_FUNC) &_propr_getFDR, 2},
{"_propr_getG", (DL_FUNC) &_propr_getG, 1},
{"_propr_graflex", (DL_FUNC) &_propr_graflex, 3},
{"_propr_lr2vlr", (DL_FUNC) &_propr_lr2vlr, 1},
{"_propr_lr2phi", (DL_FUNC) &_propr_lr2phi, 1},
Expand Down
120 changes: 63 additions & 57 deletions src/graflex.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,84 +2,70 @@
#include <numeric>
using namespace Rcpp;


// Function to extract the triangle of a square and symmetric IntegerMatrix
// Optimized function to calculate the contingency table and the odds ratio
// [[Rcpp::export]]
IntegerVector get_triangle(const IntegerMatrix& mat) {
int ncol = mat.ncol();
int n = ncol * (ncol - 1) / 2;

IntegerVector triangle(n);
NumericVector getOR(const IntegerMatrix& A, const IntegerMatrix& G) {
int ncol = A.ncol();
int a = 0, b = 0, c = 0, d = 0;

int k = 0;
for (int j = 0; j < ncol; ++j) {
for (int i = j+1; i < ncol; ++i) {
triangle[k++] = mat(i, j);
for (int j = 0; j < ncol - 1; ++j) {
for (int i = j + 1; i < ncol; ++i) {
int a_val = A(i, j), g_val = G(i, j);
a += (1 - a_val) * (1 - g_val);
b += (1 - a_val) * g_val;
c += a_val * (1 - g_val);
d += a_val * g_val;
}
}

return triangle;
}

// Function to get the triangle of a matrix based on the given indeces
// [[Rcpp::export]]
IntegerVector get_triangle_from_index(const IntegerMatrix& mat, const IntegerVector& index) {
int ncol = mat.ncol();
int n = ncol * (ncol - 1) / 2;

IntegerVector triangle(n);

int k = 0;
for (int j = 0; j < ncol; ++j) {
for (int i = j+1; i < ncol; ++i) {
triangle[k++] = mat(index[i], index[j]);
}
}
double odds_ratio = static_cast<double>(a * d) / (b * c);
double log_odds_ratio = std::log(odds_ratio);

return triangle;
return NumericVector::create(a, b, c, d, odds_ratio, log_odds_ratio, R_NaN, R_NaN);
}

// Function to calculate the contingency table
// Optimized function to calculate the contingency table and the odds ratio, given a permuted index vector
// [[Rcpp::export]]
NumericVector getOR(const IntegerVector& A, const IntegerVector& G) {
int n = A.size();

// calculate the contingency table
NumericVector getORperm(const IntegerMatrix& A, const IntegerMatrix& G, const IntegerVector& perm) {
int ncol = A.ncol();
int a = 0, b = 0, c = 0, d = 0;
for (int i = 0; i < n; ++i) {
if (A[i] == 0) {
if (G[i] == 0) ++a; // not in A and not in G
else ++b; // not in A but in G
} else {
if (G[i] == 0) ++c; // in A but not in G
else ++d; // in A and in G

for (int j = 0; j < ncol - 1; ++j) {
int pj = perm[j];
for (int i = j + 1; i < ncol; ++i) {
int a_val = A(perm[i], pj), g_val = G(i, j);
a += (1 - a_val) * (1 - g_val);
b += (1 - a_val) * g_val;
c += a_val * (1 - g_val);
d += a_val * g_val;
}
}

// calculate the odds ratio

double odds_ratio = static_cast<double>(a * d) / (b * c);
double log_odds_ratio = std::log(odds_ratio);

return NumericVector::create(
a, b, c, d, odds_ratio, std::log(odds_ratio), R_NaN, R_NaN
);
return NumericVector::create(a, b, c, d, odds_ratio, log_odds_ratio, R_NaN, R_NaN);
}

// Function to calculate the odds ratio and other relevant info for each permutation
// [[Rcpp::export]]
NumericMatrix permuteOR(const IntegerMatrix& A, const IntegerVector& Gstar, int p = 100) {
int n = A.ncol();
NumericMatrix permuteOR(const IntegerMatrix& A, const IntegerMatrix& G, int p = 100) {
int ncol = A.ncol();
NumericMatrix or_table(p, 8);

// calculate odds ratio for each permutation
// calculate the odds ratio for each permutation
for (int i = 0; i < p; ++i) {
IntegerVector idx = sample(n, n, false) - 1;
IntegerVector Astar = get_triangle_from_index(A, idx);
or_table(i, _) = getOR(Astar, Gstar);
IntegerVector perm = sample(ncol, ncol, false) - 1;
or_table(i, _) = getORperm(A, G, perm);
// TODO should I downsample the pairs (up to a maximum) to be checked?
// So in this case, we would check how likely we get by chance an OR from the downsampled
// permuted data that is higher/lower than the OR on the downsampled empirical data
}

return(or_table);
}

// Function to calculate the FDR, given the actual odds ratio and the permuted odds ratios
// [[Rcpp::export]]
List getFDR(double actual, const NumericVector& permuted) {
int n = permuted.size();
Expand All @@ -104,17 +90,37 @@ List getFDR(double actual, const NumericVector& permuted) {
);
}

// Function to calculate the G matrix from the Gk vector
// [[Rcpp::export]]
IntegerMatrix getG(const IntegerVector& Gk) {
int n = Gk.size();
IntegerMatrix G(n, n);

for (int i = 0; i < n; ++i) {
int gi = Gk[i];
G(i, i) = gi * gi;
for (int j = 0; j < i; ++j) {
int value = gi * Gk[j];
G(i, j) = value;
G(j, i) = value;
}
}

return G;
}

// Function to calculate the odds ratio and FDR, given the adjacency matrix A and the knowledge graph G
// [[Rcpp::export]]
NumericVector graflex(const IntegerMatrix& A, const IntegerMatrix& G, int p = 100) {
NumericVector graflex(const IntegerMatrix& A, const IntegerVector& Gk, int p = 100) {

// Calculate Gk
IntegerMatrix G = getG(Gk);

// get the actual odds ratio
IntegerVector Astar = get_triangle(A);
IntegerVector Gstar = get_triangle(G);
NumericVector actual = getOR(Astar, Gstar);
NumericVector actual = getOR(A, G);

// get distribution of odds ratios on permuted data
NumericMatrix permuted = permuteOR(A, Gstar, p);
NumericMatrix permuted = permuteOR(A, G, p);

// calculate the FDR
List fdr = getFDR(actual(4), permuted(_, 4));
Expand Down
Loading

0 comments on commit 9c68bc5

Please sign in to comment.