update propr/propd to include adjacency (nf-core#5469)

* update propr/propd to include adjacency * remove whitespace * remove file from .gitignore and modify valcutoff function in propd.R * modify valcutoff function --------- Co-authored-by: Cristina Araiz <[email protected]>
LouisLeNezet · Apr 17, 2024 · c162ae4 · c162ae4
1 parent 303eafd
commit c162ae4
Show file tree

Hide file tree

Showing 6 changed files with 174 additions and 8 deletions.
diff --git a/.gitignore b/.gitignore
@@ -15,3 +15,4 @@ test_output/
 tests/data/
 work/
 .github/CODEOWNERS-tmp
+
diff --git a/modules/nf-core/propr/propd/main.nf b/modules/nf-core/propr/propd/main.nf
@@ -15,6 +15,8 @@ process PROPR_PROPD {
     tuple val(meta), path("*.propd.rds"), emit: propd
     tuple val(meta), path("*.propd.tsv"), emit: results
     tuple val(meta), path("*.fdr.tsv")  , emit: fdr         , optional:true
+    tuple val(meta), path("*.adj.csv"),   emit: adj         , optional:true
+    path "*.warnings.log",                emit: warnings
     path "*.R_sessionInfo.log"          , emit: session_info
     path "versions.yml"                 , emit: versions
 

diff --git a/modules/nf-core/propr/propd/templates/propd.R b/modules/nf-core/propr/propd/templates/propd.R
@@ -63,6 +63,71 @@ read_delim_flexible <- function(file, header = TRUE, row.names = 1, check.names
     return(mat)
 }
 
+#' Extract the values for a single metric and convert it into a genes x genes matrix.
+#'
+#' @param object propd object
+one_metric_df <- function(object){
+    result <- getResults(object)
+    genes <- unique(c(result\$Pair, result\$Partner))
+
+    empty_matrix <- matrix(NA, nrow = length(genes), ncol = length(genes), dimnames = list(genes, genes))
+
+    if (object@active == "theta_d"){
+    metric <- "theta"
+    }else{
+    metric <- object@active
+    }
+
+    for (i in 1:nrow(result)) {
+        row_name <- result\$Pair[i]
+        col_name <- result\$Partner[i]
+        empty_matrix[row_name, col_name] <- result[[metric]][i]
+        empty_matrix[col_name, row_name] <- result[[metric]][i]
+        }
+    return(empty_matrix)
+}
+
+#' Extract the differential proportionality cutoff for a specified FDR value.
+#' Gene pairs with a value higher than the extracted cutoff will be considered significantly differentially proportional.
+#'
+#' @param object propd object. Output from propd function. updateCutoffs function should be applied to the object previous to valCutoff.
+#' @param fdrVal FDR value to extract the cutoff for. Per default 0.05.
+#'
+#' @return cutoff value. Differential proportionality values lower than this cutoff are considered significant.
+valCutoff  <- function(object, fdrVal = 0.05){
+    fdr_df <- object@fdr
+    if (prod(dim(fdr_df) == 0)){
+        warning("Please run updateCutoff on propd first")
+    }else{
+        fdr_vals <- fdr_df\$FDR
+        if (any(!is.na(fdr_vals))){ # Si hay algun valor de FDR correcto
+            threshold <- any(fdr_vals <= fdrVal)
+            if (threshold){
+                fdr_threshold <- fdr_vals[which.min(fdr_vals <= fdrVal) - 1]
+            }else{
+                warning("FDR is higher than the specified threshold for all proportionality values. Using the lowest fdr instead")
+                fdr_threshold <- fdr_vals[1]
+            }
+        }else{
+            stop("No true counts in the given interval. FDR values are not defined")
+            geterrmessage()
+        }
+    }
+    cutoff <- fdr_df\$cutoff[fdr_df\$FDR == fdr_threshold]
+    return(cutoff)
+}
+
+#' Convert a proportionality matrix to an adjacency matrix based on a threshold.
+#'
+#' @param matrix proportionality matrix. Can be extracted from propr object with getMatrix().
+#' @param cutoff Significant proportionality value extracted from valCutoff function.
+#'
+#' @return Adjacency matrix. Gene pairs with a proportionality value lower than the threshold will have 1, otherwise 0.
+convert_to_adjacency <- function(matrix, cutoff) {
+    adjacency <- ifelse(matrix < cutoff, 1, 0)
+    return(adjacency)
+}
+
 ################################################
 ################################################
 ## Parse arguments                            ##
@@ -83,6 +148,8 @@ opt <- list(
     cutoff_max      = NA,                   # maximun threshold to test
     cutoff_interval = NA,                   # interval between thresholds
     fixseed         = FALSE,
+    adjacency       = FALSE,
+    fdrVal          = 0.05,
     ncores          = as.integer('$task.cpus')
 )
 opt_types <- list(
@@ -99,6 +166,8 @@ opt_types <- list(
     cutoff_max      = 'numeric',
     cutoff_interval = 'numeric',
     fixseed         = 'logical',
+    adjacency       = 'logical',
+    fdrVal          = 'numeric',
     ncores          = 'numeric'
 )
 
@@ -211,6 +280,13 @@ if (opt\$permutation > 0) {
     if (opt\$metric == 'theta_d') pd <- updateF(pd)
 }
 
+# Extract adjacency matrix if required
+if (opt\$adjacency == TRUE) {
+    matrix <- one_metric_df(pd)
+    cutoff <- valCutoff(pd, opt\$fdrVal)
+    adj <- convert_to_adjacency(matrix, cutoff)
+}
+
 ################################################
 ################################################
 ## Generate outputs                           ##
@@ -227,7 +303,7 @@ write.table(
     file      = paste0(opt\$prefix, '.propd.tsv'),
     col.names = TRUE,
     row.names = FALSE,
-    sep       = '\t',
+    sep       = '\\t',
     quote     = FALSE
 )
 
@@ -236,11 +312,32 @@ if (opt\$permutation > 0) {
         pd@fdr,
         file      = paste0(opt\$prefix, '.fdr.tsv'),
         col.names = TRUE,
-        sep       = '\t',
+        sep       = '\\t',
+        quote     = FALSE
+    )
+}
+
+if (opt\$adjacency == TRUE) {
+    write.table(
+        adj,
+        file      = paste0(opt\$prefix, '.adj.csv'),
+        col.names = TRUE,
+        row.names = TRUE,
+        sep       = ',',
         quote     = FALSE
     )
 }
 
+################################################
+################################################
+## WARNINGS                                   ##
+################################################
+################################################
+
+sink(paste0(opt\$prefix, ".warnings.log"))
+print(warnings())
+sink()
+
 ################################################
 ################################################
 ## R SESSION INFO                             ##

diff --git a/modules/nf-core/propr/propd/tests/adjacency.config b/modules/nf-core/propr/propd/tests/adjacency.config
@@ -0,0 +1,3 @@
+process {
+    ext.args = {"--permutation 10 --cutoff_min 0.05 --cutoff_max 0.95 --cutoff_interval 0.1 --fixseed true --adjacency true"}
+}
diff --git a/modules/nf-core/propr/propd/tests/main.nf.test b/modules/nf-core/propr/propd/tests/main.nf.test
@@ -122,4 +122,33 @@ nextflow_process {
             )
         }
     }
+
+    test("Test propr/propd with adjacency matrix") {
+
+        tag "adjacency"
+        config "./adjacency.config"
+
+        when {
+            process {
+                """
+                input[0] = [
+                    [ id:'test' ],
+                    file("https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/mus_musculus/rnaseq_expression/SRP254919.salmon.merged.gene_counts.top1000cov.tsv")
+                ]
+                input[1] = [
+                    [ id: 'test'], 
+                    file("https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/mus_musculus/rnaseq_expression/SRP254919.samplesheet.csv")
+                ]
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out.adj).match("Test propr/propd with adjacency matrix - adj") },
+                { assert snapshot(process.out.results).match(" - results") }
+            )
+        }
+    }
 } 
diff --git a/modules/nf-core/propr/propd/tests/main.nf.test.snap b/modules/nf-core/propr/propd/tests/main.nf.test.snap
@@ -14,7 +14,7 @@
             "nf-test": "0.8.4",
             "nextflow": "23.10.1"
         },
-        "timestamp": "2024-04-09T14:44:17.847482"
+        "timestamp": "2024-04-16T11:55:35.703293"
     },
     "Test propr/propd using theta_e and boxcox permutation - results": {
         "content": [
@@ -31,7 +31,7 @@
             "nf-test": "0.8.4",
             "nextflow": "23.10.1"
         },
-        "timestamp": "2024-04-09T14:47:23.007679"
+        "timestamp": "2024-04-16T11:59:33.28078"
     },
     "Test propr/propd using theta_e permutation - results": {
         "content": [
@@ -48,7 +48,7 @@
             "nf-test": "0.8.4",
             "nextflow": "23.10.1"
         },
-        "timestamp": "2024-04-09T14:45:48.749702"
+        "timestamp": "2024-04-16T11:57:24.038188"
     },
     "versions": {
         "content": [
@@ -60,7 +60,24 @@
             "nf-test": "0.8.4",
             "nextflow": "23.10.1"
         },
-        "timestamp": "2024-04-09T14:42:26.701691"
+        "timestamp": "2024-04-16T11:53:45.165637"
+    },
+    " - results": {
+        "content": [
+            [
+                [
+                    {
+                        "id": "test"
+                    },
+                    "test.propd.tsv:md5,34fda117492faf9a60f5807f56c4be68"
+                ]
+            ]
+        ],
+        "meta": {
+            "nf-test": "0.8.4",
+            "nextflow": "23.10.1"
+        },
+        "timestamp": "2024-04-16T14:13:47.602525"
     },
     " Test propr/propd using default boxcox permutation - results": {
         "content": [
@@ -77,7 +94,7 @@
             "nf-test": "0.8.4",
             "nextflow": "23.10.1"
         },
-        "timestamp": "2024-04-09T14:44:14.446556"
+        "timestamp": "2024-04-16T11:55:32.476341"
     },
     "Test propr/propd using default permutation - results": {
         "content": [
@@ -94,6 +111,23 @@
             "nf-test": "0.8.4",
             "nextflow": "23.10.1"
         },
-        "timestamp": "2024-04-09T14:42:24.563837"
+        "timestamp": "2024-04-16T11:53:43.056295"
+    },
+    "Test propr/propd with adjacency matrix - adj": {
+        "content": [
+            [
+                [
+                    {
+                        "id": "test"
+                    },
+                    "test.adj.csv:md5,f9d19255f9400e6c4daa01f86d74f017"
+                ]
+            ]
+        ],
+        "meta": {
+            "nf-test": "0.8.4",
+            "nextflow": "23.10.1"
+        },
+        "timestamp": "2024-04-16T14:13:47.427246"
     }
 }
Original file line number	Diff line number	Diff line change
Expand Up		@@ -15,3 +15,4 @@ test_output/
		tests/data/
		work/
		.github/CODEOWNERS-tmp