Skip to content

Commit

Permalink
Merge branch 'master' into seqkit-sliding-update
Browse files Browse the repository at this point in the history
  • Loading branch information
DLBPointon authored Sep 28, 2023
2 parents 6f69892 + 20d58a6 commit fb224b2
Show file tree
Hide file tree
Showing 16 changed files with 309 additions and 66 deletions.
1 change: 1 addition & 0 deletions modules/nf-core/custom/matrixfilter/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ process CUSTOM_MATRIXFILTER {

output:
tuple val(meta), path("*.filtered.tsv") , emit: filtered
tuple val(meta), path("*.tests.tsv") , emit: tests
tuple val(meta), path("R_sessionInfo.log") , emit: session_info
path "versions.yml" , emit: versions

Expand Down
26 changes: 22 additions & 4 deletions modules/nf-core/custom/matrixfilter/meta.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@ description: filter a matrix based on a minimum value and numbers of samples
keywords:
- matrix
- filter
- abundance
- na
tools:
- "matrixfilter":
description: "filter a matrix based on a minimum value and numbers of samples"
Expand Down Expand Up @@ -34,32 +36,44 @@ input:
present (see grouping_variable), but also to validate matrix columns.
If not provided, all numeric columns are selected.
- minimum_abundance:
type: numeric
type: float
description: |
Minimum abundance value, supplied via task.ext.args as --minimum_abundance
default: 1
- minimum_samples:
type: numeric
type: integer
description: |
Minimum observations that must pass the threshold to retain
the row/ feature (e.g. gene). Supplied via task.ext.args as
--minimum_samples
default: 1
- minimum_proportion:
type: numeric
type: float
description: |
A minimum proportion of observations that must pass the threshold.
Supplied via task.ext.args as --minimum_proportion. Overrides
minimum_samples
default: 0
- grouping_variable:
type: optional string
type: string
description: |
Optionally supply a variable from the sample sheet that can be used to
define groups and derive a minimum group size upon which to base
minimum observation numbers. The rationale being to allow retention of
features that might be present in only one group. Supplied via
task.ext.args as --grouping_variable
- minimum_proportion_not_na:
type: float
description: |
A minimum proportion of observations that must have a numeric value (not be NA).
Supplied via task.ext.args as --minimum_proportion_not_na
default: 0.5
- minimum_samples_not_na:
type: integer
description: |
Minimum observations that must have a numeric value (not be NA) to retain
the row/ feature (e.g. gene). Supplied via task.ext.args as
--minimum_samples_not_na. Overrides minimum_proportion_not_na
output:
- versions:
Expand All @@ -75,6 +89,10 @@ output:
type: file
description: Filtered version of input matrix
pattern: "*.filtered.tsv"
- tests:
type: file
description: Boolean matrix with pass/ fail status for each test on each feature
pattern: "*.tests.tsv"

authors:
- "@pinin4fjords"
44 changes: 39 additions & 5 deletions modules/nf-core/custom/matrixfilter/templates/matrixfilter.R
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,9 @@ opt <- list(
minimum_abundance = 1,
minimum_samples = 1,
minimum_proportion = 0,
grouping_variable = NULL
grouping_variable = NULL,
minimum_proportion_not_na = 0.5,
minimum_samples_not_na = NULL
)
opt_types <- lapply(opt, class)

Expand Down Expand Up @@ -152,11 +154,29 @@ if ((opt\$sample_file != '') && ( ! is.null(opt\$grouping_variable))){
opt\$minimum_samples <- ncol(abundance_matrix) * opt\$minimum_proportion
}

# Generate a boolean vector specifying the features to retain
# Also set up filtering for NAs; use by default minimum_proportion_not_na; only
# use minimum_samples_not_na if it is provided (default NULL)

keep <- apply(abundance_matrix, 1, function(x){
sum(x > opt\$minimum_abundance) >= opt\$minimum_samples
})
if (is.null(opt\$minimum_samples_not_na)) {
opt\$minimum_samples_not_na <- ncol(abundance_matrix) * opt\$minimum_proportion_not_na
}

# Define the tests

tests <- list(
'abundance' = function(x) sum(x > opt\$minimum_abundance, na.rm = T) >= opt\$minimum_samples,
'na' = function(x) !any(is.na(x)) || sum(!is.na(x))/length(x) >= opt\$minimum_samples_not_n
)

# Apply the functions row-wise on the abundance_matrix and store the result in a boolean matrix

boolean_matrix <- t(apply(abundance_matrix, 1, function(row) {
sapply(tests, function(f) f(row))
}))

# We will retain features passing all tests

keep <- apply(boolean_matrix, 1, all)

# Write out the matrix retaining the specified rows and re-prepending the
# column with the feature identifiers
Expand All @@ -175,6 +195,20 @@ write.table(
quote = FALSE
)

# Write a boolean matrix returning specifying the status of each test

write.table(
data.frame(rownames(abundance_matrix), boolean_matrix),
file = paste0(
prefix,
'.tests.tsv'
),
col.names = c(feature_id_name, names(tests)),
row.names = FALSE,
sep = '\t',
quote = FALSE
)

################################################
################################################
## R SESSION INFO ##
Expand Down
25 changes: 14 additions & 11 deletions modules/nf-core/manta/germline/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ process MANTA_GERMLINE {
tuple val(meta), path(input), path(index), path(target_bed), path(target_bed_tbi)
tuple val(meta2), path(fasta)
tuple val(meta3), path(fai)
path(config)

output:
tuple val(meta), path("*candidate_small_indels.vcf.gz") , emit: candidate_small_indels_vcf
Expand All @@ -31,27 +32,29 @@ process MANTA_GERMLINE {
def prefix = task.ext.prefix ?: "${meta.id}"
def input_files = input.collect{"--bam ${it}"}.join(' ')
def options_manta = target_bed ? "--callRegions $target_bed" : ""
def config_option = config ? "--config ${config}" : ""
"""
configManta.py \
${input_files} \
--reference $fasta \
--runDir manta \
$options_manta \
configManta.py \\
${input_files} \\
${config_option} \\
--reference $fasta \\
--runDir manta \\
$options_manta \\
$args
python manta/runWorkflow.py -m local -j $task.cpus
mv manta/results/variants/candidateSmallIndels.vcf.gz \
mv manta/results/variants/candidateSmallIndels.vcf.gz \\
${prefix}.candidate_small_indels.vcf.gz
mv manta/results/variants/candidateSmallIndels.vcf.gz.tbi \
mv manta/results/variants/candidateSmallIndels.vcf.gz.tbi \\
${prefix}.candidate_small_indels.vcf.gz.tbi
mv manta/results/variants/candidateSV.vcf.gz \
mv manta/results/variants/candidateSV.vcf.gz \\
${prefix}.candidate_sv.vcf.gz
mv manta/results/variants/candidateSV.vcf.gz.tbi \
mv manta/results/variants/candidateSV.vcf.gz.tbi \\
${prefix}.candidate_sv.vcf.gz.tbi
mv manta/results/variants/diploidSV.vcf.gz \
mv manta/results/variants/diploidSV.vcf.gz \\
${prefix}.diploid_sv.vcf.gz
mv manta/results/variants/diploidSV.vcf.gz.tbi \
mv manta/results/variants/diploidSV.vcf.gz.tbi \\
${prefix}.diploid_sv.vcf.gz.tbi
cat <<-END_VERSIONS > versions.yml
Expand Down
5 changes: 5 additions & 0 deletions modules/nf-core/manta/germline/meta.yml
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,10 @@ input:
type: file
description: Genome reference FASTA index file
pattern: "*.{fa.fai,fasta.fai}"
- config:
type: file
description: Manta configuration file
pattern: "*.{ini,conf,config}"

output:
- meta:
Expand Down Expand Up @@ -96,3 +100,4 @@ output:
authors:
- "@maxulysse"
- "@ramprasadn"
- "@nvnieuwk"
62 changes: 45 additions & 17 deletions modules/nf-core/manta/somatic/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,9 @@ process MANTA_SOMATIC {

input:
tuple val(meta), path(input_normal), path(input_index_normal), path(input_tumor), path(input_index_tumor), path(target_bed), path(target_bed_tbi)
path fasta
path fai
tuple val(meta2), path(fasta)
tuple val(meta3), path(fai)
path(config)

output:
tuple val(meta), path("*.candidate_small_indels.vcf.gz") , emit: candidate_small_indels_vcf
Expand All @@ -31,26 +32,53 @@ process MANTA_SOMATIC {
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def options_manta = target_bed ? "--callRegions $target_bed" : ""

def config_option = config ? "--config ${config}" : ""
"""
configManta.py \
--tumorBam $input_tumor \
--normalBam $input_normal \
--reference $fasta \
--runDir manta \
$options_manta \
configManta.py \\
--tumorBam $input_tumor \\
--normalBam $input_normal \\
--reference $fasta \\
${config_option} \\
--runDir manta \\
$options_manta \\
$args
python manta/runWorkflow.py -m local -j $task.cpus
mv manta/results/variants/candidateSmallIndels.vcf.gz ${prefix}.candidate_small_indels.vcf.gz
mv manta/results/variants/candidateSmallIndels.vcf.gz.tbi ${prefix}.candidate_small_indels.vcf.gz.tbi
mv manta/results/variants/candidateSV.vcf.gz ${prefix}.candidate_sv.vcf.gz
mv manta/results/variants/candidateSV.vcf.gz.tbi ${prefix}.candidate_sv.vcf.gz.tbi
mv manta/results/variants/diploidSV.vcf.gz ${prefix}.diploid_sv.vcf.gz
mv manta/results/variants/diploidSV.vcf.gz.tbi ${prefix}.diploid_sv.vcf.gz.tbi
mv manta/results/variants/somaticSV.vcf.gz ${prefix}.somatic_sv.vcf.gz
mv manta/results/variants/somaticSV.vcf.gz.tbi ${prefix}.somatic_sv.vcf.gz.tbi
mv manta/results/variants/candidateSmallIndels.vcf.gz \\
${prefix}.candidate_small_indels.vcf.gz
mv manta/results/variants/candidateSmallIndels.vcf.gz.tbi \\
${prefix}.candidate_small_indels.vcf.gz.tbi
mv manta/results/variants/candidateSV.vcf.gz \\
${prefix}.candidate_sv.vcf.gz
mv manta/results/variants/candidateSV.vcf.gz.tbi \\
${prefix}.candidate_sv.vcf.gz.tbi
mv manta/results/variants/diploidSV.vcf.gz \\
${prefix}.diploid_sv.vcf.gz
mv manta/results/variants/diploidSV.vcf.gz.tbi \\
${prefix}.diploid_sv.vcf.gz.tbi
mv manta/results/variants/somaticSV.vcf.gz \\
${prefix}.somatic_sv.vcf.gz
mv manta/results/variants/somaticSV.vcf.gz.tbi \\
${prefix}.somatic_sv.vcf.gz.tbi
cat <<-END_VERSIONS > versions.yml
"${task.process}":
manta: \$( configManta.py --version )
END_VERSIONS
"""

stub:
def prefix = task.ext.prefix ?: "${meta.id}"
"""
touch ${prefix}.candidate_small_indels.vcf.gz
touch ${prefix}.candidate_small_indels.vcf.gz.tbi
touch ${prefix}.candidate_sv.vcf.gz
touch ${prefix}.candidate_sv.vcf.gz.tbi
touch ${prefix}.diploid_sv.vcf.gz
touch ${prefix}.diploid_sv.vcf.gz.tbi
touch ${prefix}.somatic_sv.vcf.gz
touch ${prefix}.somatic_sv.vcf.gz.tbi
cat <<-END_VERSIONS > versions.yml
"${task.process}":
Expand Down
15 changes: 15 additions & 0 deletions modules/nf-core/manta/somatic/meta.yml
Original file line number Diff line number Diff line change
Expand Up @@ -47,14 +47,28 @@ input:
type: file
description: Index for BED file containing target regions for variant calling
pattern: "*.{bed.tbi}"
- meta2:
type: map
description: |
Groovy Map containing reference information
e.g. [ id:'genome' ]
- fasta:
type: file
description: Genome reference FASTA file
pattern: "*.{fa,fasta}"
- meta3:
type: map
description: |
Groovy Map containing reference information
e.g. [ id:'genome' ]
- fai:
type: file
description: Genome reference FASTA index file
pattern: "*.{fa.fai,fasta.fai}"
- config:
type: file
description: Manta configuration file
pattern: "*.{ini,conf,config}"

output:
- meta:
Expand Down Expand Up @@ -101,3 +115,4 @@ output:

authors:
- "@FriederikeHanssen"
- "@nvnieuwk"
45 changes: 32 additions & 13 deletions modules/nf-core/manta/tumoronly/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,9 @@ process MANTA_TUMORONLY {

input:
tuple val(meta), path(input), path(input_index), path(target_bed), path(target_bed_tbi)
path fasta
path fai
tuple val(meta2), path(fasta)
tuple val(meta3), path(fai)
path(config)

output:
tuple val(meta), path("*candidate_small_indels.vcf.gz") , emit: candidate_small_indels_vcf
Expand All @@ -29,32 +30,50 @@ process MANTA_TUMORONLY {
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def options_manta = target_bed ? "--callRegions $target_bed" : ""
def config_option = config ? "--config ${config}" : ""
"""
configManta.py \
--tumorBam $input \
--reference $fasta \
--runDir manta \
$options_manta \
configManta.py \\
--tumorBam $input \\
--reference $fasta \\
${config_option} \\
--runDir manta \\
$options_manta \\
$args
python manta/runWorkflow.py -m local -j $task.cpus
mv manta/results/variants/candidateSmallIndels.vcf.gz \
mv manta/results/variants/candidateSmallIndels.vcf.gz \\
${prefix}.candidate_small_indels.vcf.gz
mv manta/results/variants/candidateSmallIndels.vcf.gz.tbi \
mv manta/results/variants/candidateSmallIndels.vcf.gz.tbi \\
${prefix}.candidate_small_indels.vcf.gz.tbi
mv manta/results/variants/candidateSV.vcf.gz \
mv manta/results/variants/candidateSV.vcf.gz \\
${prefix}.candidate_sv.vcf.gz
mv manta/results/variants/candidateSV.vcf.gz.tbi \
mv manta/results/variants/candidateSV.vcf.gz.tbi \\
${prefix}.candidate_sv.vcf.gz.tbi
mv manta/results/variants/tumorSV.vcf.gz \
mv manta/results/variants/tumorSV.vcf.gz \\
${prefix}.tumor_sv.vcf.gz
mv manta/results/variants/tumorSV.vcf.gz.tbi \
mv manta/results/variants/tumorSV.vcf.gz.tbi \\
${prefix}.tumor_sv.vcf.gz.tbi
cat <<-END_VERSIONS > versions.yml
"${task.process}":
manta: \$( configManta.py --version )
END_VERSIONS
"""

stub:
def prefix = task.ext.prefix ?: "${meta.id}"
"""
touch ${prefix}.candidate_small_indels.vcf.gz
touch ${prefix}.candidate_small_indels.vcf.gz.tbi
touch ${prefix}.candidate_sv.vcf.gz
touch ${prefix}.candidate_sv.vcf.gz.tbi
touch ${prefix}.tumor_sv.vcf.gz
touch ${prefix}.tumor_sv.vcf.gz.tbi
cat <<-END_VERSIONS > versions.yml
"${task.process}":
manta: \$( configManta.py --version )
END_VERSIONS
"""
}
Loading

0 comments on commit fb224b2

Please sign in to comment.