Merge branch 'master' into fix_amrfinderplus

nf-core · Sep 28, 2023 · 1f2bed4 · 1f2bed4
2 parents 4b7584d + 34af8de
commit 1f2bed4
Show file tree

Hide file tree

Showing 17 changed files with 323 additions and 66 deletions.
diff --git a/modules/nf-core/custom/matrixfilter/main.nf b/modules/nf-core/custom/matrixfilter/main.nf
@@ -12,6 +12,7 @@ process CUSTOM_MATRIXFILTER {
 
     output:
     tuple val(meta), path("*.filtered.tsv")             , emit: filtered
+    tuple val(meta), path("*.tests.tsv")                , emit: tests
     tuple val(meta), path("R_sessionInfo.log")          , emit: session_info
     path "versions.yml"                                 , emit: versions
 

diff --git a/modules/nf-core/custom/matrixfilter/meta.yml b/modules/nf-core/custom/matrixfilter/meta.yml
@@ -4,6 +4,8 @@ description: filter a matrix based on a minimum value and numbers of samples
 keywords:
   - matrix
   - filter
+  - abundance
+  - na
 tools:
   - "matrixfilter":
       description: "filter a matrix based on a minimum value and numbers of samples"
@@ -34,32 +36,44 @@ input:
         present (see grouping_variable), but also to validate matrix columns.
         If not provided, all numeric columns are selected.
   - minimum_abundance:
-      type: numeric
+      type: float
       description: |
         Minimum abundance value, supplied via task.ext.args as --minimum_abundance
       default: 1
   - minimum_samples:
-      type: numeric
+      type: integer
       description: |
         Minimum observations that must pass the threshold to retain
         the row/ feature (e.g. gene). Supplied via task.ext.args as
         --minimum_samples
       default: 1
   - minimum_proportion:
-      type: numeric
+      type: float
       description: |
         A minimum proportion of observations that must pass the threshold.
         Supplied via task.ext.args as --minimum_proportion. Overrides
         minimum_samples
       default: 0
   - grouping_variable:
-      type: optional string
+      type: string
       description: |
         Optionally supply a variable from the sample sheet that can be used to
         define groups and derive a minimum group size upon which to base
         minimum observation numbers. The rationale being to allow retention of
         features that might be present in only one group. Supplied via
         task.ext.args as --grouping_variable
+  - minimum_proportion_not_na:
+      type: float
+      description: |
+        A minimum proportion of observations that must have a numeric value (not be NA).
+        Supplied via task.ext.args as --minimum_proportion_not_na
+      default: 0.5
+  - minimum_samples_not_na:
+      type: integer
+      description: |
+        Minimum observations that must have a numeric value (not be NA) to retain
+        the row/ feature (e.g. gene). Supplied via task.ext.args as
+        --minimum_samples_not_na. Overrides minimum_proportion_not_na
 
 output:
   - versions:
@@ -75,6 +89,10 @@ output:
       type: file
       description: Filtered version of input matrix
       pattern: "*.filtered.tsv"
+  - tests:
+      type: file
+      description: Boolean matrix with pass/ fail status for each test on each feature
+      pattern: "*.tests.tsv"
 
 authors:
   - "@pinin4fjords"
diff --git a/modules/nf-core/custom/matrixfilter/templates/matrixfilter.R b/modules/nf-core/custom/matrixfilter/templates/matrixfilter.R
@@ -78,7 +78,9 @@ opt <- list(
     minimum_abundance = 1,
     minimum_samples = 1,
     minimum_proportion = 0,
-    grouping_variable = NULL
+    grouping_variable = NULL,
+    minimum_proportion_not_na = 0.5,
+    minimum_samples_not_na = NULL
 )
 opt_types <- lapply(opt, class)
 
@@ -152,11 +154,29 @@ if ((opt\$sample_file != '') && ( ! is.null(opt\$grouping_variable))){
     opt\$minimum_samples <- ncol(abundance_matrix) * opt\$minimum_proportion
 }
 
-# Generate a boolean vector specifying the features to retain
+# Also set up filtering for NAs; use by default minimum_proportion_not_na; only
+# use minimum_samples_not_na if it is provided (default NULL)
 
-keep <- apply(abundance_matrix, 1, function(x){
-    sum(x > opt\$minimum_abundance) >= opt\$minimum_samples
-})
+if (is.null(opt\$minimum_samples_not_na)) {
+    opt\$minimum_samples_not_na <- ncol(abundance_matrix) * opt\$minimum_proportion_not_na
+}
+
+# Define the tests
+
+tests <- list(
+    'abundance' = function(x) sum(x > opt\$minimum_abundance, na.rm = T) >= opt\$minimum_samples,
+    'na' = function(x) !any(is.na(x)) || sum(!is.na(x))/length(x) >= opt\$minimum_samples_not_n
+)
+
+# Apply the functions row-wise on the abundance_matrix and store the result in a boolean matrix
+
+boolean_matrix <- t(apply(abundance_matrix, 1, function(row) {
+    sapply(tests, function(f) f(row))
+}))
+
+# We will retain features passing all tests
+
+keep <- apply(boolean_matrix, 1, all)
 
 # Write out the matrix retaining the specified rows and re-prepending the
 # column with the feature identifiers
@@ -175,6 +195,20 @@ write.table(
     quote = FALSE
 )
 
+# Write a boolean matrix returning specifying the status of each test
+
+write.table(
+    data.frame(rownames(abundance_matrix), boolean_matrix),
+    file = paste0(
+        prefix,
+        '.tests.tsv'
+    ),
+    col.names = c(feature_id_name, names(tests)),
+    row.names = FALSE,
+    sep = '\t',
+    quote = FALSE
+)
+
 ################################################
 ################################################
 ## R SESSION INFO                             ##

diff --git a/modules/nf-core/manta/germline/main.nf b/modules/nf-core/manta/germline/main.nf
@@ -13,6 +13,7 @@ process MANTA_GERMLINE {
     tuple val(meta), path(input), path(index), path(target_bed), path(target_bed_tbi)
     tuple val(meta2), path(fasta)
     tuple val(meta3), path(fai)
+    path(config)
 
     output:
     tuple val(meta), path("*candidate_small_indels.vcf.gz")    , emit: candidate_small_indels_vcf
@@ -31,27 +32,29 @@ process MANTA_GERMLINE {
     def prefix = task.ext.prefix ?: "${meta.id}"
     def input_files = input.collect{"--bam ${it}"}.join(' ')
     def options_manta = target_bed ? "--callRegions $target_bed" : ""
+    def config_option = config ? "--config ${config}" : ""
     """
-    configManta.py \
-        ${input_files} \
-        --reference $fasta \
-        --runDir manta \
-        $options_manta \
+    configManta.py \\
+        ${input_files} \\
+        ${config_option} \\
+        --reference $fasta \\
+        --runDir manta \\
+        $options_manta \\
         $args
 
     python manta/runWorkflow.py -m local -j $task.cpus
 
-    mv manta/results/variants/candidateSmallIndels.vcf.gz \
+    mv manta/results/variants/candidateSmallIndels.vcf.gz \\
         ${prefix}.candidate_small_indels.vcf.gz
-    mv manta/results/variants/candidateSmallIndels.vcf.gz.tbi \
+    mv manta/results/variants/candidateSmallIndels.vcf.gz.tbi \\
         ${prefix}.candidate_small_indels.vcf.gz.tbi
-    mv manta/results/variants/candidateSV.vcf.gz \
+    mv manta/results/variants/candidateSV.vcf.gz \\
         ${prefix}.candidate_sv.vcf.gz
-    mv manta/results/variants/candidateSV.vcf.gz.tbi \
+    mv manta/results/variants/candidateSV.vcf.gz.tbi \\
         ${prefix}.candidate_sv.vcf.gz.tbi
-    mv manta/results/variants/diploidSV.vcf.gz \
+    mv manta/results/variants/diploidSV.vcf.gz \\
         ${prefix}.diploid_sv.vcf.gz
-    mv manta/results/variants/diploidSV.vcf.gz.tbi \
+    mv manta/results/variants/diploidSV.vcf.gz.tbi \\
         ${prefix}.diploid_sv.vcf.gz.tbi
 
     cat <<-END_VERSIONS > versions.yml

diff --git a/modules/nf-core/manta/germline/meta.yml b/modules/nf-core/manta/germline/meta.yml
@@ -57,6 +57,10 @@ input:
       type: file
       description: Genome reference FASTA index file
       pattern: "*.{fa.fai,fasta.fai}"
+  - config:
+      type: file
+      description: Manta configuration file
+      pattern: "*.{ini,conf,config}"
 
 output:
   - meta:
@@ -96,3 +100,4 @@ output:
 authors:
   - "@maxulysse"
   - "@ramprasadn"
+  - "@nvnieuwk"
diff --git a/modules/nf-core/manta/somatic/main.nf b/modules/nf-core/manta/somatic/main.nf
@@ -10,8 +10,9 @@ process MANTA_SOMATIC {
 
     input:
     tuple val(meta), path(input_normal), path(input_index_normal), path(input_tumor), path(input_index_tumor), path(target_bed), path(target_bed_tbi)
-    path fasta
-    path fai
+    tuple val(meta2), path(fasta)
+    tuple val(meta3), path(fai)
+    path(config)
 
     output:
     tuple val(meta), path("*.candidate_small_indels.vcf.gz")     , emit: candidate_small_indels_vcf
@@ -31,26 +32,53 @@ process MANTA_SOMATIC {
     def args = task.ext.args ?: ''
     def prefix = task.ext.prefix ?: "${meta.id}"
     def options_manta = target_bed ? "--callRegions $target_bed" : ""
-
+    def config_option = config ? "--config ${config}" : ""
     """
-    configManta.py \
-        --tumorBam $input_tumor \
-        --normalBam $input_normal \
-        --reference $fasta \
-        --runDir manta \
-        $options_manta \
+    configManta.py \\
+        --tumorBam $input_tumor \\
+        --normalBam $input_normal \\
+        --reference $fasta \\
+        ${config_option} \\
+        --runDir manta \\
+        $options_manta \\
         $args
 
     python manta/runWorkflow.py -m local -j $task.cpus
 
-    mv manta/results/variants/candidateSmallIndels.vcf.gz     ${prefix}.candidate_small_indels.vcf.gz
-    mv manta/results/variants/candidateSmallIndels.vcf.gz.tbi ${prefix}.candidate_small_indels.vcf.gz.tbi
-    mv manta/results/variants/candidateSV.vcf.gz              ${prefix}.candidate_sv.vcf.gz
-    mv manta/results/variants/candidateSV.vcf.gz.tbi          ${prefix}.candidate_sv.vcf.gz.tbi
-    mv manta/results/variants/diploidSV.vcf.gz                ${prefix}.diploid_sv.vcf.gz
-    mv manta/results/variants/diploidSV.vcf.gz.tbi            ${prefix}.diploid_sv.vcf.gz.tbi
-    mv manta/results/variants/somaticSV.vcf.gz                ${prefix}.somatic_sv.vcf.gz
-    mv manta/results/variants/somaticSV.vcf.gz.tbi            ${prefix}.somatic_sv.vcf.gz.tbi
+    mv manta/results/variants/candidateSmallIndels.vcf.gz \\
+        ${prefix}.candidate_small_indels.vcf.gz
+    mv manta/results/variants/candidateSmallIndels.vcf.gz.tbi \\
+        ${prefix}.candidate_small_indels.vcf.gz.tbi
+    mv manta/results/variants/candidateSV.vcf.gz \\
+        ${prefix}.candidate_sv.vcf.gz
+    mv manta/results/variants/candidateSV.vcf.gz.tbi \\
+        ${prefix}.candidate_sv.vcf.gz.tbi
+    mv manta/results/variants/diploidSV.vcf.gz \\
+        ${prefix}.diploid_sv.vcf.gz
+    mv manta/results/variants/diploidSV.vcf.gz.tbi \\
+        ${prefix}.diploid_sv.vcf.gz.tbi
+    mv manta/results/variants/somaticSV.vcf.gz \\
+        ${prefix}.somatic_sv.vcf.gz
+    mv manta/results/variants/somaticSV.vcf.gz.tbi \\
+        ${prefix}.somatic_sv.vcf.gz.tbi
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        manta: \$( configManta.py --version )
+    END_VERSIONS
+    """
+
+    stub:
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    """
+    touch ${prefix}.candidate_small_indels.vcf.gz
+    touch ${prefix}.candidate_small_indels.vcf.gz.tbi
+    touch ${prefix}.candidate_sv.vcf.gz
+    touch ${prefix}.candidate_sv.vcf.gz.tbi
+    touch ${prefix}.diploid_sv.vcf.gz
+    touch ${prefix}.diploid_sv.vcf.gz.tbi
+    touch ${prefix}.somatic_sv.vcf.gz
+    touch ${prefix}.somatic_sv.vcf.gz.tbi
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":

diff --git a/modules/nf-core/manta/somatic/meta.yml b/modules/nf-core/manta/somatic/meta.yml
@@ -47,14 +47,28 @@ input:
       type: file
       description: Index for BED file containing target regions for variant calling
       pattern: "*.{bed.tbi}"
+  - meta2:
+      type: map
+      description: |
+        Groovy Map containing reference information
+        e.g. [ id:'genome' ]
   - fasta:
       type: file
       description: Genome reference FASTA file
       pattern: "*.{fa,fasta}"
+  - meta3:
+      type: map
+      description: |
+        Groovy Map containing reference information
+        e.g. [ id:'genome' ]
   - fai:
       type: file
       description: Genome reference FASTA index file
       pattern: "*.{fa.fai,fasta.fai}"
+  - config:
+      type: file
+      description: Manta configuration file
+      pattern: "*.{ini,conf,config}"
 
 output:
   - meta:
@@ -101,3 +115,4 @@ output:
 
 authors:
   - "@FriederikeHanssen"
+  - "@nvnieuwk"
diff --git a/modules/nf-core/manta/tumoronly/main.nf b/modules/nf-core/manta/tumoronly/main.nf
@@ -10,8 +10,9 @@ process MANTA_TUMORONLY {
 
     input:
     tuple val(meta), path(input), path(input_index), path(target_bed), path(target_bed_tbi)
-    path fasta
-    path fai
+    tuple val(meta2), path(fasta)
+    tuple val(meta3), path(fai)
+    path(config)
 
     output:
     tuple val(meta), path("*candidate_small_indels.vcf.gz")    , emit: candidate_small_indels_vcf
@@ -29,32 +30,50 @@ process MANTA_TUMORONLY {
     def args = task.ext.args ?: ''
     def prefix = task.ext.prefix ?: "${meta.id}"
     def options_manta = target_bed ? "--callRegions $target_bed" : ""
+    def config_option = config ? "--config ${config}" : ""
     """
-    configManta.py \
-        --tumorBam $input \
-        --reference $fasta \
-        --runDir manta \
-        $options_manta \
+    configManta.py \\
+        --tumorBam $input \\
+        --reference $fasta \\
+        ${config_option} \\
+        --runDir manta \\
+        $options_manta \\
         $args
 
     python manta/runWorkflow.py -m local -j $task.cpus
 
-    mv manta/results/variants/candidateSmallIndels.vcf.gz \
+    mv manta/results/variants/candidateSmallIndels.vcf.gz \\
         ${prefix}.candidate_small_indels.vcf.gz
-    mv manta/results/variants/candidateSmallIndels.vcf.gz.tbi \
+    mv manta/results/variants/candidateSmallIndels.vcf.gz.tbi \\
         ${prefix}.candidate_small_indels.vcf.gz.tbi
-    mv manta/results/variants/candidateSV.vcf.gz \
+    mv manta/results/variants/candidateSV.vcf.gz \\
         ${prefix}.candidate_sv.vcf.gz
-    mv manta/results/variants/candidateSV.vcf.gz.tbi \
+    mv manta/results/variants/candidateSV.vcf.gz.tbi \\
         ${prefix}.candidate_sv.vcf.gz.tbi
-    mv manta/results/variants/tumorSV.vcf.gz \
+    mv manta/results/variants/tumorSV.vcf.gz \\
         ${prefix}.tumor_sv.vcf.gz
-    mv manta/results/variants/tumorSV.vcf.gz.tbi \
+    mv manta/results/variants/tumorSV.vcf.gz.tbi \\
         ${prefix}.tumor_sv.vcf.gz.tbi
 
     cat <<-END_VERSIONS > versions.yml
     "${task.process}":
         manta: \$( configManta.py --version )
     END_VERSIONS
     """
+
+    stub:
+    def prefix = task.ext.prefix ?: "${meta.id}"
+    """
+    touch ${prefix}.candidate_small_indels.vcf.gz
+    touch ${prefix}.candidate_small_indels.vcf.gz.tbi
+    touch ${prefix}.candidate_sv.vcf.gz
+    touch ${prefix}.candidate_sv.vcf.gz.tbi
+    touch ${prefix}.tumor_sv.vcf.gz
+    touch ${prefix}.tumor_sv.vcf.gz.tbi
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        manta: \$( configManta.py --version )
+    END_VERSIONS
+    """
 }