diff --git a/modules/nf-core/kaiju/mergeoutputs/environment.yml b/modules/nf-core/kaiju/mergeoutputs/environment.yml new file mode 100644 index 00000000000..c42f573c131 --- /dev/null +++ b/modules/nf-core/kaiju/mergeoutputs/environment.yml @@ -0,0 +1,9 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +name: "kaiju_mergeoutputs" +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - "bioconda::kaiju=1.10.0" diff --git a/modules/nf-core/kaiju/mergeoutputs/main.nf b/modules/nf-core/kaiju/mergeoutputs/main.nf new file mode 100644 index 00000000000..a3c5296e859 --- /dev/null +++ b/modules/nf-core/kaiju/mergeoutputs/main.nf @@ -0,0 +1,53 @@ +process KAIJU_MERGEOUTPUTS { + tag "$meta.id" + label 'process_single' + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/kaiju:1.10.0--h43eeafb_0': + 'biocontainers/kaiju:1.10.0--h43eeafb_0' }" + + input: + tuple val(meta), path(kaiju), path(kraken) + path (db) + + output: + tuple val(meta), path("*.tsv"), emit: merged + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def dbnodes = db ? '-t <(find -L ${db} -name "*nodes.dmp")' : '' + script: + if ("$kaiju" == "${prefix}.tsv") error "Input and output names are the same, set prefix in module configuration to disambiguate!" + if ("$kraken" == "${prefix}.tsv") error "Input and output names are the same, set prefix in module configuration to disambiguate!" + """ + kaiju-mergeOutputs \\ + -i <(sort -k2,2 ${kaiju}) \\ + -j <(sort -k2,2 ${kraken}) \\ + -o ${prefix}.tsv \\ + $dbnodes \\ + $args + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + kaiju: \$(echo \$( kaiju -h 2>&1 | sed -n 1p | sed 's/^.*Kaiju //' )) + END_VERSIONS + """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + """ + touch ${prefix}.tsv + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + kaiju: \$(echo \$( kaiju -h 2>&1 | sed -n 1p | sed 's/^.*Kaiju //' )) + END_VERSIONS + """ +} diff --git a/modules/nf-core/kaiju/mergeoutputs/meta.yml b/modules/nf-core/kaiju/mergeoutputs/meta.yml new file mode 100644 index 00000000000..4ec5000344c --- /dev/null +++ b/modules/nf-core/kaiju/mergeoutputs/meta.yml @@ -0,0 +1,53 @@ +name: kaiju_mergeoutputs +description: Merge two tab-separated output files of Kaiju and Kraken in the column format +keywords: + - classify + - metagenomics + - fastq + - taxonomic profiling +tools: + - kaiju: + description: Fast and sensitive taxonomic classification for metagenomics + homepage: https://kaiju.binf.ku.dk/ + documentation: https://github.com/bioinformatics-centre/kaiju/blob/master/README.md + tool_dev_url: https://github.com/bioinformatics-centre/kaiju + doi: "10.1038/ncomms11257" + licence: ["GNU GPL v3"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - kaiju: + type: file + description: | + Results with taxonomic classification of each read from Kaiju + - kraken: + type: file + description: | + Results with taxonomic classification of each read from Kraken + pattern: "*.{tsv}" + - db: + type: directory + description: | + List containing the database and nodes files for Kaiju + e.g. [ 'database.fmi', 'nodes.dmp' ] +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. [ id:'test', single_end:false ] + - merged: + type: file + description: Results with merged taxonomic classification of each read based on the given strategy '1', '2', 'lca' [default] or 'lowest' + pattern: "*.tsv" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@Joon-Klaps" +maintainers: + - "@Joon-Klaps" diff --git a/modules/nf-core/kaiju/mergeoutputs/tests/db.config b/modules/nf-core/kaiju/mergeoutputs/tests/db.config new file mode 100644 index 00000000000..257dba4704b --- /dev/null +++ b/modules/nf-core/kaiju/mergeoutputs/tests/db.config @@ -0,0 +1,5 @@ +process { + withName: KAIJU_MERGEOUTPUTS{ + ext.prefix = {"${meta.id}_merged"} + } +} diff --git a/modules/nf-core/kaiju/mergeoutputs/tests/main.nf.test b/modules/nf-core/kaiju/mergeoutputs/tests/main.nf.test new file mode 100644 index 00000000000..5c2d5b3159d --- /dev/null +++ b/modules/nf-core/kaiju/mergeoutputs/tests/main.nf.test @@ -0,0 +1,133 @@ +nextflow_process { + + name "Test Process KAIJU_MERGEOUTPUTS" + script "../main.nf" + process "KAIJU_MERGEOUTPUTS" + + tag "modules" + tag "modules_nfcore" + tag "kaiju" + tag "untar" + tag "kaiju/kaiju" + tag "kraken2/kraken2" + tag "kaiju/mergeoutputs" + + setup { + run("UNTAR",alias: "UNTAR_KRAKEN2" ) { + config "./nodb.config" + script "../../../untar/main.nf" + process { + """ + input[0] = Channel.of([ + [], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/db/kraken2.tar.gz', checkIfExists: true) + ]) + """ + } + } + run("KRAKEN2_KRAKEN2") { + config "./nodb.config" + script "../../../kraken2/kraken2/main.nf" + process { + """ + input[0] = [ + [ id:'test', single_end:true ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + input[1] = UNTAR_KRAKEN2.out.untar.map{ it[1] } + input[2] = false + input[3] = true + """ + } + } + run("UNTAR",alias: "UNTAR_KAIJU" ) { + config "./nodb.config" + script "../../../untar/main.nf" + process { + """ + input[0] = Channel.of([ + [], + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/db/kaiju.tar.gz', checkIfExists: true) + ]) + """ + } + } + run("KAIJU_KAIJU") { + config "./nodb.config" + script "../../../kaiju/kaiju/main.nf" + process { + """ + input[0] = [ + [ id:'test', single_end:true ], + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + input[1] = UNTAR_KAIJU.out.untar.map{ it[1] } + """ + } + } + } + + test("sarscov2 - kraken2.kaiju.classified - noDB ") { + config "./nodb.config" + + when { + process { + """ + input[0] = KAIJU_KAIJU.out.results.join(KRAKEN2_KRAKEN2.out.classified_reads_assignment) + input[1] = [ ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - kraken2.kaiju.classified - kaiju.DB ") { + config "./db.config" + when { + process { + """ + input[0] = KAIJU_KAIJU.out.results.join(KRAKEN2_KRAKEN2.out.classified_reads_assignment) + input[1] = UNTAR_KAIJU.out.untar.map{ it[1] } + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + + test("sarscov2 - kraken2.kaiju.classified - noDB - stub ") { + config "./nodb.config" + options "-stub" + + when { + process { + """ + input[0] = KAIJU_KAIJU.out.results.join(KRAKEN2_KRAKEN2.out.classified_reads_assignment) + input[1] = [] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() } + ) + } + + } + +} diff --git a/modules/nf-core/kaiju/mergeoutputs/tests/main.nf.test.snap b/modules/nf-core/kaiju/mergeoutputs/tests/main.nf.test.snap new file mode 100644 index 00000000000..7ee12505930 --- /dev/null +++ b/modules/nf-core/kaiju/mergeoutputs/tests/main.nf.test.snap @@ -0,0 +1,107 @@ +{ + "sarscov2 - kraken2.kaiju.classified - noDB - stub ": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test_merged.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,fe28eae98973762f4fb19fc562a56b9f" + ], + "merged": [ + [ + { + "id": "test", + "single_end": true + }, + "test_merged.tsv:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,fe28eae98973762f4fb19fc562a56b9f" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-19T13:48:25.353007932" + }, + "sarscov2 - kraken2.kaiju.classified - noDB ": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test_merged.tsv:md5,64fef9779d5003775c327bb534403859" + ] + ], + "1": [ + "versions.yml:md5,fe28eae98973762f4fb19fc562a56b9f" + ], + "merged": [ + [ + { + "id": "test", + "single_end": true + }, + "test_merged.tsv:md5,64fef9779d5003775c327bb534403859" + ] + ], + "versions": [ + "versions.yml:md5,fe28eae98973762f4fb19fc562a56b9f" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-19T13:47:53.326864948" + }, + "sarscov2 - kraken2.kaiju.classified - kaiju.DB ": { + "content": [ + { + "0": [ + [ + { + "id": "test", + "single_end": true + }, + "test_merged.tsv:md5,64fef9779d5003775c327bb534403859" + ] + ], + "1": [ + "versions.yml:md5,fe28eae98973762f4fb19fc562a56b9f" + ], + "merged": [ + [ + { + "id": "test", + "single_end": true + }, + "test_merged.tsv:md5,64fef9779d5003775c327bb534403859" + ] + ], + "versions": [ + "versions.yml:md5,fe28eae98973762f4fb19fc562a56b9f" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-19T13:48:11.274433603" + } +} \ No newline at end of file diff --git a/modules/nf-core/kaiju/mergeoutputs/tests/nodb.config b/modules/nf-core/kaiju/mergeoutputs/tests/nodb.config new file mode 100644 index 00000000000..e6f6332b3c5 --- /dev/null +++ b/modules/nf-core/kaiju/mergeoutputs/tests/nodb.config @@ -0,0 +1,14 @@ +process { + withName: UNTAR_KAIJU{ + ext.args2 = {"--no-same-owner"} + } + + withName: UNTAR_KRAKEN2{ + ext.args2 = {"--no-same-owner"} + } + + withName: KAIJU_MERGEOUTPUTS{ + ext.prefix = {"${meta.id}_merged"} + ext.args= "-c 1" + } +} diff --git a/modules/nf-core/kaiju/mergeoutputs/tests/tags.yml b/modules/nf-core/kaiju/mergeoutputs/tests/tags.yml new file mode 100644 index 00000000000..8b1f3c8634d --- /dev/null +++ b/modules/nf-core/kaiju/mergeoutputs/tests/tags.yml @@ -0,0 +1,2 @@ +kaiju/mergeoutputs: + - "modules/nf-core/kaiju/mergeoutputs/**" diff --git a/modules/nf-core/kraken2/kraken2/main.nf b/modules/nf-core/kraken2/kraken2/main.nf index cdf02d0f145..92cd9c34f7a 100644 --- a/modules/nf-core/kraken2/kraken2/main.nf +++ b/modules/nf-core/kraken2/kraken2/main.nf @@ -55,4 +55,31 @@ process KRAKEN2_KRAKEN2 { pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' ) END_VERSIONS """ + + stub: + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def paired = meta.single_end ? "" : "--paired" + def classified = meta.single_end ? "${prefix}.classified.fastq.gz" : "${prefix}.classified_1.fastq.gz ${prefix}.classified_2.fastq.gz" + def unclassified = meta.single_end ? "${prefix}.unclassified.fastq.gz" : "${prefix}.unclassified_1.fastq.gz ${prefix}.unclassified_2.fastq.gz" + def readclassification_option = save_reads_assignment ? "--output ${prefix}.kraken2.classifiedreads.txt" : "--output /dev/null" + def compress_reads_command = save_output_fastqs ? "pigz -p $task.cpus *.fastq" : "" + + """ + touch ${prefix}.kraken2.report.txt + if [ "$save_output_fastqs" == "true" ]; then + touch $classified + touch $unclassified + fi + if [ "$save_reads_assignment" == "true" ]; then + touch ${prefix}.kraken2.classifiedreads.txt + fi + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + kraken2: \$(echo \$(kraken2 --version 2>&1) | sed 's/^.*Kraken version //; s/ .*\$//') + pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' ) + END_VERSIONS + """ + }