Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bump fgbio versions and swap to nftest #5624

Merged
merged 18 commits into from
Jul 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@ channels:
- bioconda
- defaults
dependencies:
- bioconda::fgbio=2.0.2
- bioconda::fgbio=2.2.1
34 changes: 24 additions & 10 deletions modules/nf-core/fgbio/callduplexconsensusreads/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -4,26 +4,24 @@ process FGBIO_CALLDUPLEXCONSENSUSREADS {

conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/fgbio:2.0.2--hdfd78af_0' :
'biocontainers/fgbio:2.0.2--hdfd78af_0' }"
'https://depot.galaxyproject.org/singularity/fgbio:2.2.1--hdfd78af_0' :
'biocontainers/fgbio:2.2.1--hdfd78af_0' }"

input:
tuple val(meta), path(bam)
// please note:
// --min-reads is a required argument with no default
// --min-input-base-quality is a required argument with no default
// make sure they are specified via ext.args in your config
tuple val(meta), path(grouped_bam)
val min_reads
val min_baseq

output:
tuple val(meta), path("${prefix}.bam"), emit: bam
path "versions.yml" , emit: versions
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
prefix = task.ext.prefix ?: "${meta.id}_consensus"
prefix = task.ext.prefix ?: "${meta.id}_consensus_unmapped"
SPPearce marked this conversation as resolved.
Show resolved Hide resolved

def mem_gb = 8
if (!task.memory) {
Expand All @@ -35,6 +33,7 @@ process FGBIO_CALLDUPLEXCONSENSUSREADS {
mem_gb = task.memory.giga - 1
}
}
if ("$grouped_bam" == "${prefix}.bam") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!"

"""
fgbio \\
Expand All @@ -43,8 +42,10 @@ process FGBIO_CALLDUPLEXCONSENSUSREADS {
--async-io=true \\
--compression=1 \\
CallDuplexConsensusReads \\
--input $bam \\
--input $grouped_bam \\
--output ${prefix}.bam \\
--min-reads ${min_reads} \\
--min-input-base-quality ${min_baseq} \\
--threads ${task.cpus} \\
$args

Expand All @@ -53,4 +54,17 @@ process FGBIO_CALLDUPLEXCONSENSUSREADS {
fgbio: \$( echo \$(fgbio --version 2>&1 | tr -d '[:cntrl:]' ) | sed -e 's/^.*Version: //;s/\\[.*\$//')
END_VERSIONS
"""

stub:
prefix = task.ext.prefix ?: "${meta.id}_consensus_unmapped"
if ("$grouped_bam" == "${prefix}.bam") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!"
"""
touch ${prefix}.bam

cat <<-END_VERSIONS > versions.yml
"${task.process}":
fgbio: \$( echo \$(fgbio --version 2>&1 | tr -d '[:cntrl:]' ) | sed -e 's/^.*Version: //;s/\\[.*\$//')
END_VERSIONS
"""

}
11 changes: 8 additions & 3 deletions modules/nf-core/fgbio/callduplexconsensusreads/meta.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,18 +10,23 @@ tools:
homepage: http://fulcrumgenomics.github.io/fgbio/
documentation: http://fulcrumgenomics.github.io/fgbio/tools/latest/CallDuplexConsensusReads.html
tool_dev_url: https://github.com/fulcrumgenomics/fgbio
licence: "['MIT']"
licence: ["MIT"]
input:
# Only when we have meta
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- bam:
type: file
description: BAM/SAM file
description: BAM/SAM file, grouped by UMI
pattern: "*.{bam,sam}"
- min_reads:
type: string
description: Minimum number of raw/original reads to build each consensus read. Can be a space delimited list of 1-3 values. See fgbio documentation for more details.
- min_baseq:
type: integer
description: Ignore bases in raw reads that have Q below this value
output:
- meta:
type: map
Expand Down
62 changes: 62 additions & 0 deletions modules/nf-core/fgbio/callduplexconsensusreads/tests/main.nf.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
nextflow_process {

name "Test Process FGBIO_CALLDUPLEXCONSENSUSREADS"
script "../main.nf"
process "FGBIO_CALLDUPLEXCONSENSUSREADS"

tag "modules"
tag "modules_nfcore"
tag "fgbio"
tag "fgbio/callduplexconsensusreads"

test("homo_sapiens - bam") {

when {
process {
"""
input[0] = [
[ id:'test', single_end:false ], // meta map
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/umi/test.paired_end.duplex_umi_grouped.bam', checkIfExists: true)
]
input[1] = 3
input[2] = 20
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() }
)
}

}

test("homo_sapiens - stub") {

options "-stub"

when {
process {
"""
input[0] = [
[ id:'test', single_end:false ], // meta map
file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/bam/umi/test.paired_end.duplex_umi_grouped.bam', checkIfExists: true)
]
input[1] = 3
input[2] = 20
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() }
)
}

}

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
{
"homo_sapiens - stub": {
"content": [
{
"0": [
[
{
"id": "test",
"single_end": false
},
"test_consensus_unmapped.bam:md5,d41d8cd98f00b204e9800998ecf8427e"
]
],
"1": [
"versions.yml:md5,7277dba1bc055b578eb6d8d6af43b128"
],
"bam": [
[
{
"id": "test",
"single_end": false
},
"test_consensus_unmapped.bam:md5,d41d8cd98f00b204e9800998ecf8427e"
]
],
"versions": [
"versions.yml:md5,7277dba1bc055b578eb6d8d6af43b128"
]
}
],
"meta": {
"nf-test": "0.8.4",
"nextflow": "24.04.2"
},
"timestamp": "2024-07-02T17:44:41.656625835"
},
"homo_sapiens - bam": {
"content": [
{
"0": [
[
{
"id": "test",
"single_end": false
},
"test_consensus_unmapped.bam:md5,4f0e87feb7601d06617c9f29d7aec352"
]
],
"1": [
"versions.yml:md5,7277dba1bc055b578eb6d8d6af43b128"
],
"bam": [
[
{
"id": "test",
"single_end": false
},
"test_consensus_unmapped.bam:md5,4f0e87feb7601d06617c9f29d7aec352"
]
],
"versions": [
"versions.yml:md5,7277dba1bc055b578eb6d8d6af43b128"
]
}
],
"meta": {
"nf-test": "0.8.4",
"nextflow": "23.10.1"
},
"timestamp": "2024-05-17T06:05:28.894178772"
}
}
2 changes: 2 additions & 0 deletions modules/nf-core/fgbio/callduplexconsensusreads/tests/tags.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
fgbio/callduplexconsensusreads:
- "modules/nf-core/fgbio/callduplexconsensusreads/**"
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@ channels:
- bioconda
- defaults
dependencies:
- bioconda::fgbio=2.0.2
- bioconda::fgbio=2.2.1
41 changes: 34 additions & 7 deletions modules/nf-core/fgbio/callmolecularconsensusreads/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,13 @@ process FGBIO_CALLMOLECULARCONSENSUSREADS {

conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/fgbio:2.0.2--hdfd78af_0' :
'biocontainers/fgbio:2.0.2--hdfd78af_0' }"
'https://depot.galaxyproject.org/singularity/fgbio:2.2.1--hdfd78af_0' :
'biocontainers/fgbio:2.2.1--hdfd78af_0' }"

input:
tuple val(meta), path(bam)
tuple val(meta), path(grouped_bam)
val min_reads
val min_baseq

output:
tuple val(meta), path("*.bam"), emit: bam
Expand All @@ -19,19 +21,44 @@ process FGBIO_CALLMOLECULARCONSENSUSREADS {

script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def prefix = task.ext.prefix ?: "${meta.id}_consensus_unmapped"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why did you add this custom suffix here?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It needs some kind of name renaming to prevent it from colliding with the input file. Some of the subtools already done this, some don't. Although this one is now setting a double prefix in the output file, so that needs fixing.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You can add a check for that (a lot of modules do this already) => https://nf-co.re/docs/guidelines/components/modules#command-file-output-naming

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, it needs the checks whatever.
I think it is better to have a default prefix that doesn't require you to add one.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not one to argue with that, but I don't think diverting from the guidelines is a good idea, they're here for a reason. You can maybe bring this up on Slack?

SPPearce marked this conversation as resolved.
Show resolved Hide resolved
def mem_gb = 8
if (!task.memory) {
log.info '[fgbio CallMolecularConsensusReads] Available memory not known - defaulting to 8GB. Specify process memory requirements to change this.'
} else {
mem_gb = task.memory.giga
}
if ("$grouped_bam" == "${prefix}.bam") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!"
"""
fgbio \\
-Xmx${mem_gb}g \\
--tmp-dir=. \\
--async-io=true \\
--compression=1 \\
CallMolecularConsensusReads \\
--input $bam \\
--input $grouped_bam \\
--output ${prefix}.bam \\
--min-reads ${min_reads} \\
--min-input-base-quality ${min_baseq} \\
--threads ${task.cpus} \\
$args \\
--output ${prefix}.bam
$args;

cat <<-END_VERSIONS > versions.yml
"${task.process}":
fgbio: \$( echo \$(fgbio --version 2>&1 | tr -d '[:cntrl:]' ) | sed -e 's/^.*Version: //;s/\\[.*\$//')
END_VERSIONS
"""

stub:
prefix = task.ext.prefix ?: "${meta.id}_consensus_unmapped"
SPPearce marked this conversation as resolved.
Show resolved Hide resolved
if ("$grouped_bam" == "${prefix}.bam") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!"
"""
touch ${prefix}.bam

cat <<-END_VERSIONS > versions.yml
"${task.process}":
fgbio: \$( echo \$(fgbio --version 2>&1 | tr -d '[:cntrl:]' ) | sed -e 's/^.*Version: //;s/\\[.*\$//')
END_VERSIONS
"""

}
11 changes: 8 additions & 3 deletions modules/nf-core/fgbio/callmolecularconsensusreads/meta.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ keywords:
- UMIs
- consensus sequence
- bam
- sam
tools:
- fgbio:
description: Tools for working with genomic and high throughput sequencing data.
Expand All @@ -17,11 +16,17 @@ input:
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false, collapse:false ]
- bam:
- grouped_bam:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ditto, does the main.nf need to be updated?

type: file
description: |
The input SAM or BAM file.
The input SAM or BAM file, grouped by UMIs
pattern: "*.{bam,sam}"
- min_reads:
type: integer
description: Minimum number of original reads to build each consensus read.
- min_baseq:
type: integer
description: Ignore bases in raw reads that have Q below this value.
output:
- meta:
type: map
Expand Down
Loading
Loading