Skip to content

Commit

Permalink
add cram/index support to bwamem2 (nf-core#5248)
Browse files Browse the repository at this point in the history
* add cram/index support to bwamem2

* update tests
  • Loading branch information
matthdsm authored and tucano committed Mar 20, 2024
1 parent e9ad5b0 commit aaf749e
Show file tree
Hide file tree
Showing 5 changed files with 93 additions and 30 deletions.
39 changes: 35 additions & 4 deletions modules/nf-core/bwamem2/mem/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,16 @@ process BWAMEM2_MEM {
input:
tuple val(meta), path(reads)
tuple val(meta2), path(index)
tuple val(meta3), path(fasta)
val sort_bam

output:
tuple val(meta), path("*.bam"), emit: bam
path "versions.yml" , emit: versions
tuple val(meta), path("*.sam") , emit: sam , optional:true
tuple val(meta), path("*.bam") , emit: bam , optional:true
tuple val(meta), path("*.cram") , emit: cram, optional:true
tuple val(meta), path("*.crai") , emit: crai, optional:true
tuple val(meta), path("*.csi") , emit: csi , optional:true
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when
Expand All @@ -24,6 +29,13 @@ process BWAMEM2_MEM {
def args2 = task.ext.args2 ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def samtools_command = sort_bam ? 'sort' : 'view'

def extension_pattern = /(--output-fmt|-O)+\s+(\S+)/
def extension_matcher = (args2 =~ extension_pattern)
def extension = extension_matcher.getCount() > 0 ? extension_matcher[0][2].toLowerCase() : "bam"
def reference = fasta && extension=="cram" ? "--reference ${fasta}" : ""
if (!fasta && extension=="cram") error "Fasta reference is required for CRAM output"

"""
INDEX=`find -L ./ -name "*.amb" | sed 's/\\.amb\$//'`
Expand All @@ -33,7 +45,7 @@ process BWAMEM2_MEM {
-t $task.cpus \\
\$INDEX \\
$reads \\
| samtools $samtools_command $args2 -@ $task.cpus -o ${prefix}.bam -
| samtools $samtools_command $args2 -@ $task.cpus ${reference} -o ${prefix}.${extension} -
cat <<-END_VERSIONS > versions.yml
"${task.process}":
Expand All @@ -43,9 +55,28 @@ process BWAMEM2_MEM {
"""

stub:

def args = task.ext.args ?: ''
def args2 = task.ext.args2 ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def samtools_command = sort_bam ? 'sort' : 'view'
def extension_pattern = /(--output-fmt|-O)+\s+(\S+)/
def extension_matcher = (args2 =~ extension_pattern)
def extension = extension_matcher.getCount() > 0 ? extension_matcher[0][2].toLowerCase() : "bam"
def reference = fasta && extension=="cram" ? "--reference ${fasta}" : ""
if (!fasta && extension=="cram") error "Fasta reference is required for CRAM output"

def create_index = ""
if (extension == "cram") {
create_index = "touch ${prefix}.crai"
} else if (extension == "bam") {
create_index = "touch ${prefix}.csi"
}

"""
touch ${prefix}.bam
touch ${prefix}.${extension}
${create_index}
cat <<-END_VERSIONS > versions.yml
"${task.process}":
bwamem2: \$(echo \$(bwa-mem2 version 2>&1) | sed 's/.* //')
Expand Down
27 changes: 27 additions & 0 deletions modules/nf-core/bwamem2/mem/meta.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,15 @@ input:
type: file
description: BWA genome index files
pattern: "Directory containing BWA index *.{0132,amb,ann,bwt.2bit.64,pac}"
- meta3:
type: map
description: |
Groovy Map containing reference information
e.g. [ id:'genome' ]
- fasta:
type: file
description: Reference genome in FASTA format
pattern: "*.{fa,fasta,fna}"
- sort_bam:
type: boolean
description: use samtools sort (true) or samtools view (false)
Expand All @@ -47,15 +56,33 @@ output:
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- sam:
type: file
description: Output SAM file containing read alignments
pattern: "*.{sam}"
- bam:
type: file
description: Output BAM file containing read alignments
pattern: "*.{bam}"
- cram:
type: file
description: Output CRAM file containing read alignments
pattern: "*.{cram}"
- crai:
type: file
description: Index file for CRAM file
pattern: "*.{crai}"
- csi:
type: file
description: Index file for BAM file
pattern: "*.{csi}"
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
authors:
- "@maxulysse"
- "@matthdsm"
maintainers:
- "@maxulysse"
- "@matthdsm"
25 changes: 15 additions & 10 deletions modules/nf-core/bwamem2/mem/tests/main.nf.test
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ nextflow_process {
tag "bwamem2/mem"
tag "bwamem2/index"

test("sarscov2 - fastq, index, false") {
test("sarscov2 - fastq, index, fasta, false") {

setup {
run("BWAMEM2_INDEX") {
Expand All @@ -34,7 +34,8 @@ nextflow_process {
[file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true)]
])
input[1] = BWAMEM2_INDEX.out.index
input[2] = false
input[2] = Channel.of([[:], [file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]])
input[3] = false
"""
}
}
Expand All @@ -51,7 +52,7 @@ nextflow_process {

}

test("sarscov2 - fastq, index, true") {
test("sarscov2 - fastq, index, fasta, true") {

setup {
run("BWAMEM2_INDEX") {
Expand All @@ -75,7 +76,8 @@ nextflow_process {
[file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true)]
])
input[1] = BWAMEM2_INDEX.out.index
input[2] = true
input[2] = Channel.of([[:], [file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]])
input[3] = true
"""
}
}
Expand All @@ -92,7 +94,7 @@ nextflow_process {

}

test("sarscov2 - [fastq1, fastq2], index, false") {
test("sarscov2 - [fastq1, fastq2], index, fasta, false") {

setup {
run("BWAMEM2_INDEX") {
Expand All @@ -119,7 +121,8 @@ nextflow_process {
]
])
input[1] = BWAMEM2_INDEX.out.index
input[2] = false
input[2] = Channel.of([[:], [file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]])
input[3] = false
"""
}
}
Expand All @@ -136,7 +139,7 @@ nextflow_process {

}

test("sarscov2 - [fastq1, fastq2], index, true") {
test("sarscov2 - [fastq1, fastq2], index, fasta, true") {

setup {
run("BWAMEM2_INDEX") {
Expand All @@ -163,7 +166,8 @@ nextflow_process {
]
])
input[1] = BWAMEM2_INDEX.out.index
input[2] = true
input[2] = Channel.of([[:], [file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]])
input[3] = true
"""
}
}
Expand All @@ -180,7 +184,7 @@ nextflow_process {

}

test("sarscov2 - [fastq1, fastq2], index, true - stub") {
test("sarscov2 - [fastq1, fastq2], index, fasta, true - stub") {

options "-stub"

Expand Down Expand Up @@ -209,7 +213,8 @@ nextflow_process {
]
])
input[1] = BWAMEM2_INDEX.out.index
input[2] = true
input[2] = Channel.of([[:], [file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]])
input[3] = true
"""
}
}
Expand Down
30 changes: 15 additions & 15 deletions modules/nf-core/bwamem2/mem/tests/main.nf.test.snap

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion subworkflows/nf-core/fastq_align_dna/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ workflow FASTQ_ALIGN_DNA {
ch_versions = ch_versions.mix(BWAMEM1_MEM.out.versions)
break
case 'bwamem2':
BWAMEM2_MEM (ch_reads, ch_aligner_index, sort) // If aligner is bwa-mem2
BWAMEM2_MEM (ch_reads, ch_aligner_index, ch_fasta, sort) // If aligner is bwa-mem2
ch_bam = ch_bam.mix(BWAMEM2_MEM.out.bam)
ch_versions = ch_versions.mix(BWAMEM2_MEM.out.versions)
break
Expand Down

0 comments on commit aaf749e

Please sign in to comment.