diff --git a/modules/nf-core/bwamem2/mem/main.nf b/modules/nf-core/bwamem2/mem/main.nf index 29f90778777..729428c4e07 100644 --- a/modules/nf-core/bwamem2/mem/main.nf +++ b/modules/nf-core/bwamem2/mem/main.nf @@ -10,11 +10,16 @@ process BWAMEM2_MEM { input: tuple val(meta), path(reads) tuple val(meta2), path(index) + tuple val(meta3), path(fasta) val sort_bam output: - tuple val(meta), path("*.bam"), emit: bam - path "versions.yml" , emit: versions + tuple val(meta), path("*.sam") , emit: sam , optional:true + tuple val(meta), path("*.bam") , emit: bam , optional:true + tuple val(meta), path("*.cram") , emit: cram, optional:true + tuple val(meta), path("*.crai") , emit: crai, optional:true + tuple val(meta), path("*.csi") , emit: csi , optional:true + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -24,6 +29,13 @@ process BWAMEM2_MEM { def args2 = task.ext.args2 ?: '' def prefix = task.ext.prefix ?: "${meta.id}" def samtools_command = sort_bam ? 'sort' : 'view' + + def extension_pattern = /(--output-fmt|-O)+\s+(\S+)/ + def extension_matcher = (args2 =~ extension_pattern) + def extension = extension_matcher.getCount() > 0 ? extension_matcher[0][2].toLowerCase() : "bam" + def reference = fasta && extension=="cram" ? "--reference ${fasta}" : "" + if (!fasta && extension=="cram") error "Fasta reference is required for CRAM output" + """ INDEX=`find -L ./ -name "*.amb" | sed 's/\\.amb\$//'` @@ -33,7 +45,7 @@ process BWAMEM2_MEM { -t $task.cpus \\ \$INDEX \\ $reads \\ - | samtools $samtools_command $args2 -@ $task.cpus -o ${prefix}.bam - + | samtools $samtools_command $args2 -@ $task.cpus ${reference} -o ${prefix}.${extension} - cat <<-END_VERSIONS > versions.yml "${task.process}": @@ -43,9 +55,28 @@ process BWAMEM2_MEM { """ stub: + + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' def prefix = task.ext.prefix ?: "${meta.id}" + def samtools_command = sort_bam ? 'sort' : 'view' + def extension_pattern = /(--output-fmt|-O)+\s+(\S+)/ + def extension_matcher = (args2 =~ extension_pattern) + def extension = extension_matcher.getCount() > 0 ? extension_matcher[0][2].toLowerCase() : "bam" + def reference = fasta && extension=="cram" ? "--reference ${fasta}" : "" + if (!fasta && extension=="cram") error "Fasta reference is required for CRAM output" + + def create_index = "" + if (extension == "cram") { + create_index = "touch ${prefix}.crai" + } else if (extension == "bam") { + create_index = "touch ${prefix}.csi" + } + """ - touch ${prefix}.bam + touch ${prefix}.${extension} + ${create_index} + cat <<-END_VERSIONS > versions.yml "${task.process}": bwamem2: \$(echo \$(bwa-mem2 version 2>&1) | sed 's/.* //') diff --git a/modules/nf-core/bwamem2/mem/meta.yml b/modules/nf-core/bwamem2/mem/meta.yml index 04891b26a97..931f712943d 100644 --- a/modules/nf-core/bwamem2/mem/meta.yml +++ b/modules/nf-core/bwamem2/mem/meta.yml @@ -37,6 +37,15 @@ input: type: file description: BWA genome index files pattern: "Directory containing BWA index *.{0132,amb,ann,bwt.2bit.64,pac}" + - meta3: + type: map + description: | + Groovy Map containing reference information + e.g. [ id:'genome' ] + - fasta: + type: file + description: Reference genome in FASTA format + pattern: "*.{fa,fasta,fna}" - sort_bam: type: boolean description: use samtools sort (true) or samtools view (false) @@ -47,15 +56,33 @@ output: description: | Groovy Map containing sample information e.g. [ id:'test', single_end:false ] + - sam: + type: file + description: Output SAM file containing read alignments + pattern: "*.{sam}" - bam: type: file description: Output BAM file containing read alignments pattern: "*.{bam}" + - cram: + type: file + description: Output CRAM file containing read alignments + pattern: "*.{cram}" + - crai: + type: file + description: Index file for CRAM file + pattern: "*.{crai}" + - csi: + type: file + description: Index file for BAM file + pattern: "*.{csi}" - versions: type: file description: File containing software versions pattern: "versions.yml" authors: - "@maxulysse" + - "@matthdsm" maintainers: - "@maxulysse" + - "@matthdsm" diff --git a/modules/nf-core/bwamem2/mem/tests/main.nf.test b/modules/nf-core/bwamem2/mem/tests/main.nf.test index 365a0c433c1..5e67f70b6a2 100644 --- a/modules/nf-core/bwamem2/mem/tests/main.nf.test +++ b/modules/nf-core/bwamem2/mem/tests/main.nf.test @@ -10,7 +10,7 @@ nextflow_process { tag "bwamem2/mem" tag "bwamem2/index" - test("sarscov2 - fastq, index, false") { + test("sarscov2 - fastq, index, fasta, false") { setup { run("BWAMEM2_INDEX") { @@ -34,7 +34,8 @@ nextflow_process { [file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true)] ]) input[1] = BWAMEM2_INDEX.out.index - input[2] = false + input[2] = Channel.of([[:], [file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]]) + input[3] = false """ } } @@ -51,7 +52,7 @@ nextflow_process { } - test("sarscov2 - fastq, index, true") { + test("sarscov2 - fastq, index, fasta, true") { setup { run("BWAMEM2_INDEX") { @@ -75,7 +76,8 @@ nextflow_process { [file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true)] ]) input[1] = BWAMEM2_INDEX.out.index - input[2] = true + input[2] = Channel.of([[:], [file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]]) + input[3] = true """ } } @@ -92,7 +94,7 @@ nextflow_process { } - test("sarscov2 - [fastq1, fastq2], index, false") { + test("sarscov2 - [fastq1, fastq2], index, fasta, false") { setup { run("BWAMEM2_INDEX") { @@ -119,7 +121,8 @@ nextflow_process { ] ]) input[1] = BWAMEM2_INDEX.out.index - input[2] = false + input[2] = Channel.of([[:], [file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]]) + input[3] = false """ } } @@ -136,7 +139,7 @@ nextflow_process { } - test("sarscov2 - [fastq1, fastq2], index, true") { + test("sarscov2 - [fastq1, fastq2], index, fasta, true") { setup { run("BWAMEM2_INDEX") { @@ -163,7 +166,8 @@ nextflow_process { ] ]) input[1] = BWAMEM2_INDEX.out.index - input[2] = true + input[2] = Channel.of([[:], [file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]]) + input[3] = true """ } } @@ -180,7 +184,7 @@ nextflow_process { } - test("sarscov2 - [fastq1, fastq2], index, true - stub") { + test("sarscov2 - [fastq1, fastq2], index, fasta, true - stub") { options "-stub" @@ -209,7 +213,8 @@ nextflow_process { ] ]) input[1] = BWAMEM2_INDEX.out.index - input[2] = true + input[2] = Channel.of([[:], [file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]]) + input[3] = true """ } } diff --git a/modules/nf-core/bwamem2/mem/tests/main.nf.test.snap b/modules/nf-core/bwamem2/mem/tests/main.nf.test.snap index 84be71c60c7..9fb1e69d07d 100644 --- a/modules/nf-core/bwamem2/mem/tests/main.nf.test.snap +++ b/modules/nf-core/bwamem2/mem/tests/main.nf.test.snap @@ -1,5 +1,5 @@ { - "sarscov2 - [fastq1, fastq2], index, true": { + "sarscov2 - [fastq1, fastq2], index, fasta, false": { "content": [ "test.bam", [ @@ -8,11 +8,11 @@ ], "meta": { "nf-test": "0.8.4", - "nextflow": "24.01.0" + "nextflow": "23.10.1" }, - "timestamp": "2024-02-19T13:30:22.691288603" + "timestamp": "2024-03-19T13:13:18.890289958" }, - "sarscov2 - [fastq1, fastq2], index, false": { + "sarscov2 - [fastq1, fastq2], index, fasta, true - stub": { "content": [ "test.bam", [ @@ -21,11 +21,11 @@ ], "meta": { "nf-test": "0.8.4", - "nextflow": "24.01.0" + "nextflow": "23.10.1" }, - "timestamp": "2024-02-19T13:30:11.276168706" + "timestamp": "2024-03-19T13:45:51.821633029" }, - "sarscov2 - [fastq1, fastq2], index, true - stub": { + "sarscov2 - [fastq1, fastq2], index, fasta, true": { "content": [ "test.bam", [ @@ -34,11 +34,11 @@ ], "meta": { "nf-test": "0.8.4", - "nextflow": "24.01.0" + "nextflow": "23.10.1" }, - "timestamp": "2024-02-19T13:30:32.07431961" + "timestamp": "2024-03-19T13:13:36.458291078" }, - "sarscov2 - fastq, index, false": { + "sarscov2 - fastq, index, fasta, false": { "content": [ "test.bam", [ @@ -47,11 +47,11 @@ ], "meta": { "nf-test": "0.8.4", - "nextflow": "24.01.0" + "nextflow": "23.10.1" }, - "timestamp": "2024-02-19T13:29:48.586760544" + "timestamp": "2024-03-19T13:12:44.084654507" }, - "sarscov2 - fastq, index, true": { + "sarscov2 - fastq, index, fasta, true": { "content": [ "test.bam", [ @@ -60,8 +60,8 @@ ], "meta": { "nf-test": "0.8.4", - "nextflow": "24.01.0" + "nextflow": "23.10.1" }, - "timestamp": "2024-02-19T13:29:59.846686393" + "timestamp": "2024-03-19T13:13:01.763341681" } } \ No newline at end of file diff --git a/subworkflows/nf-core/fastq_align_dna/main.nf b/subworkflows/nf-core/fastq_align_dna/main.nf index f907b16e47a..b710a4a6403 100644 --- a/subworkflows/nf-core/fastq_align_dna/main.nf +++ b/subworkflows/nf-core/fastq_align_dna/main.nf @@ -42,7 +42,7 @@ workflow FASTQ_ALIGN_DNA { ch_versions = ch_versions.mix(BWAMEM1_MEM.out.versions) break case 'bwamem2': - BWAMEM2_MEM (ch_reads, ch_aligner_index, sort) // If aligner is bwa-mem2 + BWAMEM2_MEM (ch_reads, ch_aligner_index, ch_fasta, sort) // If aligner is bwa-mem2 ch_bam = ch_bam.mix(BWAMEM2_MEM.out.bam) ch_versions = ch_versions.mix(BWAMEM2_MEM.out.versions) break