Skip to content

Commit

Permalink
update propr/propd to include adjacency (nf-core#5469)
Browse files Browse the repository at this point in the history
* update propr/propd to include adjacency

* remove whitespace

* remove file from .gitignore and modify valcutoff function in propd.R

* modify valcutoff function

---------

Co-authored-by: Cristina Araiz <[email protected]>
  • Loading branch information
caraiz2001 and Cristina Araiz authored Apr 17, 2024
1 parent 303eafd commit c162ae4
Show file tree
Hide file tree
Showing 6 changed files with 174 additions and 8 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,4 @@ test_output/
tests/data/
work/
.github/CODEOWNERS-tmp

2 changes: 2 additions & 0 deletions modules/nf-core/propr/propd/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ process PROPR_PROPD {
tuple val(meta), path("*.propd.rds"), emit: propd
tuple val(meta), path("*.propd.tsv"), emit: results
tuple val(meta), path("*.fdr.tsv") , emit: fdr , optional:true
tuple val(meta), path("*.adj.csv"), emit: adj , optional:true
path "*.warnings.log", emit: warnings
path "*.R_sessionInfo.log" , emit: session_info
path "versions.yml" , emit: versions

Expand Down
101 changes: 99 additions & 2 deletions modules/nf-core/propr/propd/templates/propd.R
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,71 @@ read_delim_flexible <- function(file, header = TRUE, row.names = 1, check.names
return(mat)
}

#' Extract the values for a single metric and convert it into a genes x genes matrix.
#'
#' @param object propd object
one_metric_df <- function(object){
result <- getResults(object)
genes <- unique(c(result\$Pair, result\$Partner))

empty_matrix <- matrix(NA, nrow = length(genes), ncol = length(genes), dimnames = list(genes, genes))

if (object@active == "theta_d"){
metric <- "theta"
}else{
metric <- object@active
}

for (i in 1:nrow(result)) {
row_name <- result\$Pair[i]
col_name <- result\$Partner[i]
empty_matrix[row_name, col_name] <- result[[metric]][i]
empty_matrix[col_name, row_name] <- result[[metric]][i]
}
return(empty_matrix)
}

#' Extract the differential proportionality cutoff for a specified FDR value.
#' Gene pairs with a value higher than the extracted cutoff will be considered significantly differentially proportional.
#'
#' @param object propd object. Output from propd function. updateCutoffs function should be applied to the object previous to valCutoff.
#' @param fdrVal FDR value to extract the cutoff for. Per default 0.05.
#'
#' @return cutoff value. Differential proportionality values lower than this cutoff are considered significant.
valCutoff <- function(object, fdrVal = 0.05){
fdr_df <- object@fdr
if (prod(dim(fdr_df) == 0)){
warning("Please run updateCutoff on propd first")
}else{
fdr_vals <- fdr_df\$FDR
if (any(!is.na(fdr_vals))){ # Si hay algun valor de FDR correcto
threshold <- any(fdr_vals <= fdrVal)
if (threshold){
fdr_threshold <- fdr_vals[which.min(fdr_vals <= fdrVal) - 1]
}else{
warning("FDR is higher than the specified threshold for all proportionality values. Using the lowest fdr instead")
fdr_threshold <- fdr_vals[1]
}
}else{
stop("No true counts in the given interval. FDR values are not defined")
geterrmessage()
}
}
cutoff <- fdr_df\$cutoff[fdr_df\$FDR == fdr_threshold]
return(cutoff)
}

#' Convert a proportionality matrix to an adjacency matrix based on a threshold.
#'
#' @param matrix proportionality matrix. Can be extracted from propr object with getMatrix().
#' @param cutoff Significant proportionality value extracted from valCutoff function.
#'
#' @return Adjacency matrix. Gene pairs with a proportionality value lower than the threshold will have 1, otherwise 0.
convert_to_adjacency <- function(matrix, cutoff) {
adjacency <- ifelse(matrix < cutoff, 1, 0)
return(adjacency)
}

################################################
################################################
## Parse arguments ##
Expand All @@ -83,6 +148,8 @@ opt <- list(
cutoff_max = NA, # maximun threshold to test
cutoff_interval = NA, # interval between thresholds
fixseed = FALSE,
adjacency = FALSE,
fdrVal = 0.05,
ncores = as.integer('$task.cpus')
)
opt_types <- list(
Expand All @@ -99,6 +166,8 @@ opt_types <- list(
cutoff_max = 'numeric',
cutoff_interval = 'numeric',
fixseed = 'logical',
adjacency = 'logical',
fdrVal = 'numeric',
ncores = 'numeric'
)

Expand Down Expand Up @@ -211,6 +280,13 @@ if (opt\$permutation > 0) {
if (opt\$metric == 'theta_d') pd <- updateF(pd)
}

# Extract adjacency matrix if required
if (opt\$adjacency == TRUE) {
matrix <- one_metric_df(pd)
cutoff <- valCutoff(pd, opt\$fdrVal)
adj <- convert_to_adjacency(matrix, cutoff)
}

################################################
################################################
## Generate outputs ##
Expand All @@ -227,7 +303,7 @@ write.table(
file = paste0(opt\$prefix, '.propd.tsv'),
col.names = TRUE,
row.names = FALSE,
sep = '\t',
sep = '\\t',
quote = FALSE
)

Expand All @@ -236,11 +312,32 @@ if (opt\$permutation > 0) {
pd@fdr,
file = paste0(opt\$prefix, '.fdr.tsv'),
col.names = TRUE,
sep = '\t',
sep = '\\t',
quote = FALSE
)
}

if (opt\$adjacency == TRUE) {
write.table(
adj,
file = paste0(opt\$prefix, '.adj.csv'),
col.names = TRUE,
row.names = TRUE,
sep = ',',
quote = FALSE
)
}

################################################
################################################
## WARNINGS ##
################################################
################################################

sink(paste0(opt\$prefix, ".warnings.log"))
print(warnings())
sink()

################################################
################################################
## R SESSION INFO ##
Expand Down
3 changes: 3 additions & 0 deletions modules/nf-core/propr/propd/tests/adjacency.config
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
process {
ext.args = {"--permutation 10 --cutoff_min 0.05 --cutoff_max 0.95 --cutoff_interval 0.1 --fixseed true --adjacency true"}
}
29 changes: 29 additions & 0 deletions modules/nf-core/propr/propd/tests/main.nf.test
Original file line number Diff line number Diff line change
Expand Up @@ -122,4 +122,33 @@ nextflow_process {
)
}
}

test("Test propr/propd with adjacency matrix") {

tag "adjacency"
config "./adjacency.config"

when {
process {
"""
input[0] = [
[ id:'test' ],
file("https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/mus_musculus/rnaseq_expression/SRP254919.salmon.merged.gene_counts.top1000cov.tsv")
]
input[1] = [
[ id: 'test'],
file("https://raw.githubusercontent.com/nf-core/test-datasets/modules/data/genomics/mus_musculus/rnaseq_expression/SRP254919.samplesheet.csv")
]
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out.adj).match("Test propr/propd with adjacency matrix - adj") },
{ assert snapshot(process.out.results).match(" - results") }
)
}
}
}
46 changes: 40 additions & 6 deletions modules/nf-core/propr/propd/tests/main.nf.test.snap
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
"nf-test": "0.8.4",
"nextflow": "23.10.1"
},
"timestamp": "2024-04-09T14:44:17.847482"
"timestamp": "2024-04-16T11:55:35.703293"
},
"Test propr/propd using theta_e and boxcox permutation - results": {
"content": [
Expand All @@ -31,7 +31,7 @@
"nf-test": "0.8.4",
"nextflow": "23.10.1"
},
"timestamp": "2024-04-09T14:47:23.007679"
"timestamp": "2024-04-16T11:59:33.28078"
},
"Test propr/propd using theta_e permutation - results": {
"content": [
Expand All @@ -48,7 +48,7 @@
"nf-test": "0.8.4",
"nextflow": "23.10.1"
},
"timestamp": "2024-04-09T14:45:48.749702"
"timestamp": "2024-04-16T11:57:24.038188"
},
"versions": {
"content": [
Expand All @@ -60,7 +60,24 @@
"nf-test": "0.8.4",
"nextflow": "23.10.1"
},
"timestamp": "2024-04-09T14:42:26.701691"
"timestamp": "2024-04-16T11:53:45.165637"
},
" - results": {
"content": [
[
[
{
"id": "test"
},
"test.propd.tsv:md5,34fda117492faf9a60f5807f56c4be68"
]
]
],
"meta": {
"nf-test": "0.8.4",
"nextflow": "23.10.1"
},
"timestamp": "2024-04-16T14:13:47.602525"
},
" Test propr/propd using default boxcox permutation - results": {
"content": [
Expand All @@ -77,7 +94,7 @@
"nf-test": "0.8.4",
"nextflow": "23.10.1"
},
"timestamp": "2024-04-09T14:44:14.446556"
"timestamp": "2024-04-16T11:55:32.476341"
},
"Test propr/propd using default permutation - results": {
"content": [
Expand All @@ -94,6 +111,23 @@
"nf-test": "0.8.4",
"nextflow": "23.10.1"
},
"timestamp": "2024-04-09T14:42:24.563837"
"timestamp": "2024-04-16T11:53:43.056295"
},
"Test propr/propd with adjacency matrix - adj": {
"content": [
[
[
{
"id": "test"
},
"test.adj.csv:md5,f9d19255f9400e6c4daa01f86d74f017"
]
]
],
"meta": {
"nf-test": "0.8.4",
"nextflow": "23.10.1"
},
"timestamp": "2024-04-16T14:13:47.427246"
}
}

0 comments on commit c162ae4

Please sign in to comment.