From faf557ba56156ac0e5de76a25c1e3df11c944f59 Mon Sep 17 00:00:00 2001 From: Leon Rauschning <99650940+lrauschning@users.noreply.github.com> Date: Thu, 15 Feb 2024 11:24:30 +0100 Subject: [PATCH] Add compression to MSA modules (#4754) * add pigz to clustalo * add compression to muscle5 * enabled compression flag for famsa * added compression to mafft * compression for mtmalign * set to mulled containers * more informative test name * change mtmalign test to search after unzipping * update mtmalign tests to work with gzip, fix typo * regenerate test snaps * muscle5: zip multiple output files, if present * Change MUSCLE5 tests to the same testcase TCOFFEE is using, also fix it * add tags requested by nf-core-lint * add full url to singularity/biocontainers * fix famsa * regenerated snapshots with nf-test 0.8.3. Reenabled snapshots for muscle5 and mtmalign * forgot to regenerate mafft, also mtmalign seems to still be nondeterministic * update metas * compression support for tcoffee modules * added pigz to tools in meta * fix typo * regenerate snaps, adjust test to gzip * added mulled containers for tcoffee * implement compression switching with channel * add tags wanted by lint * regenerate snapshots * whoops, regenerated using container this time * update meta.yml * update glob in meta.yml * support compressed input in irmsd * assign more precise type in meta.yml * add tag flagged by lint to tcoffee/irmsd * set tcoffee/irmsd to use mulled container * tcoffee/irmsd: do not compress template file, and correctly uncompress for irmsd * tcoffee/align: reimplement toggling compression * tcoffee/align: use new pipe name everywhere * tcoffee/align: reenable default html output, add comment * fix escaped line at end of comment... * tcoffee/align: make tcoffee write to stdout, avoid using fifo * clustalo/align: add optional compression * muscle5/super5: add optional compression, also expand tests * update snapshot * muscle5/super5: re-add empty config file * mafft: implement optional output compression, handle compressed input * muscle5/super5: better parallelization for compressed -perm all * mtmalign/align: implement optional compression * mtmalign/align: add pigz to versions.yml * mtmalign/align: fix * regenerate snapshot * famsa/align: implement optional compression * whoops, fix tests * clustalo/align: fix * update snapshots * generate different snapshots for compressed & uncompressed tests, prettify code * updated snapshots * mtmalign/align: update input pattern * tcoffee/alncompare,irmsd: implement jose's suggestion * tcoffee/irmsd: additional test for compressed input * tcoffee/irmsd: add tag required by lint * Revert "mtmalign/align: update input pattern" This reverts commit 7a0e78d821b8bb19d2b56d7cbf23af505f8f4725. * incorporate adams suggestion, fix stub filename extensions * apparently this requires regenerating the snapshots? * try removing test match names, as per sateesh's suggestion * Revert "try removing test match names, as per sateesh's suggestion" This reverts commit 706d05fc4fd850ed4399cafa0e9e575f6b4f7743. * tcoffee/align change snapshot names * make snapshot names unique for nf-test 0.8.4 --------- Co-authored-by: Leon Rauschning --- .../nf-core/clustalo/align/environment.yml | 1 + modules/nf-core/clustalo/align/main.nf | 31 ++- modules/nf-core/clustalo/align/meta.yml | 12 +- .../nf-core/clustalo/align/tests/main.nf.test | 37 ++- .../clustalo/align/tests/main.nf.test.snap | 31 ++- modules/nf-core/famsa/align/main.nf | 13 +- modules/nf-core/famsa/align/meta.yml | 7 +- .../nf-core/famsa/align/tests/main.nf.test | 35 ++- .../famsa/align/tests/main.nf.test.snap | 27 ++- modules/nf-core/mafft/environment.yml | 1 + modules/nf-core/mafft/main.nf | 29 ++- modules/nf-core/mafft/meta.yml | 37 +-- modules/nf-core/mafft/tests/main.nf.test | 40 +++- modules/nf-core/mafft/tests/main.nf.test.snap | 218 ++++-------------- .../nf-core/mtmalign/align/environment.yml | 1 + modules/nf-core/mtmalign/align/main.nf | 32 ++- modules/nf-core/mtmalign/align/meta.yml | 17 +- .../nf-core/mtmalign/align/tests/main.nf.test | 58 ++++- .../mtmalign/align/tests/main.nf.test.snap | 47 +--- .../nf-core/muscle5/super5/environment.yml | 1 + modules/nf-core/muscle5/super5/main.nf | 30 ++- modules/nf-core/muscle5/super5/meta.yml | 12 +- .../nf-core/muscle5/super5/tests/main.nf.test | 88 ++++--- .../muscle5/super5/tests/main.nf.test.snap | 54 +++-- .../muscle5/super5/tests/nextflow.config | 3 - .../muscle5/super5/tests/perm_all.config | 3 + modules/nf-core/tcoffee/align/environment.yml | 1 + modules/nf-core/tcoffee/align/main.nf | 22 +- modules/nf-core/tcoffee/align/meta.yml | 12 +- .../nf-core/tcoffee/align/tests/main.nf.test | 39 +++- .../tcoffee/align/tests/main.nf.test.snap | 39 ++-- .../tcoffee/align/tests/structure.config | 2 +- .../tcoffee/alncompare/environment.yml | 1 + modules/nf-core/tcoffee/alncompare/main.nf | 12 +- modules/nf-core/tcoffee/alncompare/meta.yml | 12 +- .../alncompare/tests/main.nf.test.snap | 6 +- modules/nf-core/tcoffee/irmsd/environment.yml | 1 + modules/nf-core/tcoffee/irmsd/main.nf | 12 +- modules/nf-core/tcoffee/irmsd/meta.yml | 12 +- .../nf-core/tcoffee/irmsd/tests/main.nf.test | 57 ++++- 40 files changed, 676 insertions(+), 417 deletions(-) create mode 100644 modules/nf-core/muscle5/super5/tests/perm_all.config diff --git a/modules/nf-core/clustalo/align/environment.yml b/modules/nf-core/clustalo/align/environment.yml index 1e08e574e37..be1eef95ebf 100644 --- a/modules/nf-core/clustalo/align/environment.yml +++ b/modules/nf-core/clustalo/align/environment.yml @@ -5,3 +5,4 @@ channels: - defaults dependencies: - bioconda::clustalo=1.2.4 + - conda-forge::pigz=2.8 diff --git a/modules/nf-core/clustalo/align/main.nf b/modules/nf-core/clustalo/align/main.nf index f35ed407aad..eb230cad269 100644 --- a/modules/nf-core/clustalo/align/main.nf +++ b/modules/nf-core/clustalo/align/main.nf @@ -4,16 +4,17 @@ process CLUSTALO_ALIGN { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/clustalo:1.2.4--h87f3376_5': - 'biocontainers/clustalo:1.2.4--h87f3376_5' }" + 'https://depot.galaxyproject.org/singularity/mulled-v2-4cefc38542f86c17596c29b35a059de10387c6a7:adbe4fbad680f9beb083956d79128039a727e7b3-0': + 'biocontainers/mulled-v2-4cefc38542f86c17596c29b35a059de10387c6a7:adbe4fbad680f9beb083956d79128039a727e7b3-0' }" input: - tuple val(meta), path(fasta) + tuple val(meta) , path(fasta) tuple val(meta2), path(tree) + val(compress) output: - tuple val(meta), path("*.aln"), emit: alignment - path "versions.yml" , emit: versions + tuple val(meta), path("*.aln{.gz,}"), emit: alignment + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -21,16 +22,23 @@ process CLUSTALO_ALIGN { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" + def write_output = compress ? "--force -o >(pigz -cp ${task.cpus} > ${prefix}.aln.gz)" : "> ${prefix}.aln" + // using >() is necessary to preserve the return value, + // so nextflow knows to display an error when it failed + // the --force -o is necessary, as clustalo expands the commandline input, + // causing it to treat the pipe as a parameter and fail + // this way, the command expands to /dev/fd/, and --force allows writing output to an already existing file """ - clustalo \\ - -i ${fasta} \\ - --threads=${task.cpus} \\ - $args \\ - -o ${prefix}.aln + clustalo \ + -i ${fasta} \ + --threads=${task.cpus} \ + $args \ + $write_output cat <<-END_VERSIONS > versions.yml "${task.process}": clustalo: \$( clustalo --version ) + pigz: \$(echo \$(pigz --version 2>&1) | sed 's/^.*pigz\\w*//' )) END_VERSIONS """ @@ -38,11 +46,12 @@ process CLUSTALO_ALIGN { def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" """ - touch ${prefix}.aln + touch ${prefix}.aln${compress ? '.gz' : ''} cat <<-END_VERSIONS > versions.yml "${task.process}": clustalo: \$( clustalo --version ) + pigz: \$(echo \$(pigz --version 2>&1) | sed 's/^.*pigz\\w*//' )) END_VERSIONS """ } diff --git a/modules/nf-core/clustalo/align/meta.yml b/modules/nf-core/clustalo/align/meta.yml index 639013676bc..469b3a37691 100644 --- a/modules/nf-core/clustalo/align/meta.yml +++ b/modules/nf-core/clustalo/align/meta.yml @@ -12,6 +12,10 @@ tools: tool_dev_url: "http://www.clustal.org/omega/" doi: "10.1038/msb.2011.75" licence: ["GPL v2"] + - "pigz": + description: "Parallel implementation of the gzip algorithm." + homepage: "https://zlib.net/pigz/" + documentation: "https://zlib.net/pigz/pigz.pdf" input: - meta: type: map @@ -31,6 +35,9 @@ input: type: file description: Input guide tree in Newick format pattern: "*.{dnd}" + - compress: + type: boolean + description: Flag representing whether the output MSA should be compressed. Set to true to enable/false to disable compression. Compression is done using pigz, and is multithreaded. output: - meta: type: map @@ -39,8 +46,8 @@ output: e.g. `[ id:'test']` - alignment: type: file - description: Alignment file. - pattern: "*.{aln}" + description: Alignment file, in gzipped fasta format + pattern: "*.aln{.gz,}" - versions: type: file description: File containing software versions @@ -51,3 +58,4 @@ authors: maintainers: - "@luisas" - "@joseespinosa" + - "@lrauschning" diff --git a/modules/nf-core/clustalo/align/tests/main.nf.test b/modules/nf-core/clustalo/align/tests/main.nf.test index 849dd5fa127..3edd36a21c0 100644 --- a/modules/nf-core/clustalo/align/tests/main.nf.test +++ b/modules/nf-core/clustalo/align/tests/main.nf.test @@ -4,13 +4,38 @@ nextflow_process { script "../main.nf" process "CLUSTALO_ALIGN" config "./nextflow.config" - + tag "modules" tag "modules_nfcore" tag "clustalo" tag "clustalo/align" + tag "clustalo/guidetree" + + test("sarscov2 - contigs-fasta - uncompressed") { + + when { + process { + """ + input[0] = [ [ id:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['contigs_fasta'], checkIfExists: true) + ] + input[1] = [[:],[]] + input[2] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.alignment).match("alignment - uncompressed")}, + { assert snapshot(process.out.versions).match("versions0") } + ) + } + + } - test("sarscov2 - contigs-fasta") { + test("sarscov2 - contigs-fasta - compressed") { when { process { @@ -19,6 +44,7 @@ nextflow_process { file(params.test_data['sarscov2']['illumina']['contigs_fasta'], checkIfExists: true) ] input[1] = [[:],[]] + input[2] = true """ } } @@ -26,8 +52,8 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(process.out.alignment).match("alignment")}, - { assert snapshot(process.out.versions).match("versions") } + { assert snapshot(process.out.alignment).match("alignment - compressed")}, + { assert snapshot(process.out.versions).match("versions1") } ) } @@ -56,6 +82,7 @@ nextflow_process { file(params.test_data['sarscov2']['illumina']['contigs_fasta'], checkIfExists: true) ] input[1] = CLUSTALO_GUIDETREE.out.tree.collect{ meta, tree -> tree }.map{ tree -> [[ id: 'test_summary'], tree]} + input[2] = true """ } } @@ -68,4 +95,4 @@ nextflow_process { ) } } -} +} \ No newline at end of file diff --git a/modules/nf-core/clustalo/align/tests/main.nf.test.snap b/modules/nf-core/clustalo/align/tests/main.nf.test.snap index 976cb5085c3..d7d69870c87 100644 --- a/modules/nf-core/clustalo/align/tests/main.nf.test.snap +++ b/modules/nf-core/clustalo/align/tests/main.nf.test.snap @@ -1,13 +1,26 @@ { + "alignment - compressed": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.aln.gz:md5,74bb9a2820a91cf68db94dbd46787722" + ] + ] + ], + "timestamp": "2024-02-09T19:39:46.647351958" + }, "versions": { "content": [ [ - "versions.yml:md5,67b67f513195c4f7ab9ddadc309135fe" + "versions.yml:md5,327da6a4250a6b7c4e45cddaa1f56280" ] ], - "timestamp": "2023-11-27T14:50:24.517805" + "timestamp": "2024-02-09T19:39:14.826528498" }, - "with_guide_tree_alignment": { + "alignment - uncompressed": { "content": [ [ [ @@ -18,27 +31,27 @@ ] ] ], - "timestamp": "2023-11-28T16:28:59.20367" + "timestamp": "2024-02-09T19:39:14.786480272" }, - "alignment": { + "with_guide_tree_alignment": { "content": [ [ [ { "id": "test" }, - "test.aln:md5,74bb9a2820a91cf68db94dbd46787722" + "test.aln.gz:md5,74bb9a2820a91cf68db94dbd46787722" ] ] ], - "timestamp": "2023-11-27T14:50:24.471198" + "timestamp": "2024-02-09T19:40:45.057777867" }, "with_guide_tree_versions": { "content": [ [ - "versions.yml:md5,67b67f513195c4f7ab9ddadc309135fe" + "versions.yml:md5,327da6a4250a6b7c4e45cddaa1f56280" ] ], - "timestamp": "2023-11-28T16:28:59.225509" + "timestamp": "2024-02-09T19:40:45.122824595" } } \ No newline at end of file diff --git a/modules/nf-core/famsa/align/main.nf b/modules/nf-core/famsa/align/main.nf index 388b133951e..096d8ff3890 100644 --- a/modules/nf-core/famsa/align/main.nf +++ b/modules/nf-core/famsa/align/main.nf @@ -10,26 +10,29 @@ process FAMSA_ALIGN { 'biocontainers/famsa:2.2.2--h9f5acd7_0' }" input: - tuple val(meta), path(fasta) + tuple val(meta) , path(fasta) tuple val(meta2), path(tree) + val(compress) output: - tuple val(meta), path("*.aln"), emit: alignment - path "versions.yml" , emit: versions + tuple val(meta), path("*.aln{.gz,}"), emit: alignment + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when script: def args = task.ext.args ?: '' + def compress_args = compress ? '-gz' : '' def prefix = task.ext.prefix ?: "${meta.id}" def options_tree = tree ? "-gt import $tree" : "" """ famsa $options_tree \\ + $compress_args \\ $args \\ -t ${task.cpus} \\ ${fasta} \\ - ${prefix}.aln + ${prefix}.aln${compress ? '.gz':''} cat <<-END_VERSIONS > versions.yml "${task.process}": @@ -40,7 +43,7 @@ process FAMSA_ALIGN { stub: def prefix = task.ext.prefix ?: "${meta.id}" """ - touch ${prefix}.aln + touch ${prefix}.aln${compress ? '.gz' : ''} cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/nf-core/famsa/align/meta.yml b/modules/nf-core/famsa/align/meta.yml index 06a66c274f3..6acf3c21010 100644 --- a/modules/nf-core/famsa/align/meta.yml +++ b/modules/nf-core/famsa/align/meta.yml @@ -33,6 +33,9 @@ input: type: file description: Input guide tree in Newick format pattern: "*.{dnd}" + - compress: + type: boolean + description: Flag representing whether the output MSA should be compressed. Set to true to enable/false to disable compression. Compression is handled by passing '-gz' to FAMSA along with any other options specified in task.ext.args. output: - meta: type: map @@ -41,8 +44,8 @@ output: e.g. `[ id:'test']` - alignment: type: file - description: Alignment file. - pattern: "*.{aln}" + description: Alignment file, in FASTA format. May be gzipped or uncompressed, depending on if compress is set to true or false + pattern: "*.aln{.gz,}" - versions: type: file description: File containing software versions diff --git a/modules/nf-core/famsa/align/tests/main.nf.test b/modules/nf-core/famsa/align/tests/main.nf.test index 45bfee267fb..2d7cac3c4e0 100644 --- a/modules/nf-core/famsa/align/tests/main.nf.test +++ b/modules/nf-core/famsa/align/tests/main.nf.test @@ -8,8 +8,9 @@ nextflow_process { tag "modules_nfcore" tag "famsa" tag "famsa/align" + tag "famsa/guidetree" - test("sarscov2 - fasta") { + test("sarscov2 - fasta - uncompressed") { when { process { @@ -18,6 +19,7 @@ nextflow_process { file(params.test_data['sarscov2']['illumina']['contigs_fasta'], checkIfExists: true) ] input[1] = [[:],[]] + input[2] = false """ } } @@ -25,8 +27,32 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(process.out.alignment).match("alignment")}, - { assert snapshot(process.out.versions).match("versions") } + { assert snapshot(process.out.alignment).match("alignment_uncompressed")}, + { assert snapshot(process.out.versions).match("versions0") } + ) + } + + } + + test("sarscov2 - fasta - compressed") { + + when { + process { + """ + input[0] = [ [ id:'test' ], // meta map + file(params.test_data['sarscov2']['illumina']['contigs_fasta'], checkIfExists: true) + ] + input[1] = [[:],[]] + input[2] = true + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.alignment).match("alignment_compressed")}, + { assert snapshot(process.out.versions).match("versions1") } ) } @@ -54,6 +80,7 @@ nextflow_process { file(params.test_data['sarscov2']['illumina']['contigs_fasta'], checkIfExists: true) ] input[1] = FAMSA_GUIDETREE.out.tree.collect{ meta, tree -> tree }.map{ tree -> [[ id: 'test_summary'], tree]} + input[2] = true """ } } @@ -66,4 +93,4 @@ nextflow_process { ) } } -} +} \ No newline at end of file diff --git a/modules/nf-core/famsa/align/tests/main.nf.test.snap b/modules/nf-core/famsa/align/tests/main.nf.test.snap index f5795e7363e..95bbbf17750 100644 --- a/modules/nf-core/famsa/align/tests/main.nf.test.snap +++ b/modules/nf-core/famsa/align/tests/main.nf.test.snap @@ -1,11 +1,24 @@ { + "alignment_uncompressed": { + "content": [ + [ + [ + { + "id": "test" + }, + "test.aln:md5,7cf7375f2ba360814ea978731838b972" + ] + ] + ], + "timestamp": "2024-02-09T19:08:43.577982822" + }, "versions": { "content": [ [ "versions.yml:md5,7d9e0a8c263fa6d9017075fe88c9e9dc" ] ], - "timestamp": "2023-11-29T11:45:52.435457374" + "timestamp": "2024-02-09T19:08:43.670136799" }, "with_guide_tree_alignment": { "content": [ @@ -14,24 +27,24 @@ { "id": "test" }, - "test.aln:md5,7cf7375f2ba360814ea978731838b972" + "test.aln.gz:md5,7cf7375f2ba360814ea978731838b972" ] ] ], - "timestamp": "2023-11-29T11:46:17.140198556" + "timestamp": "2024-02-09T19:10:05.167368314" }, - "alignment": { + "alignment_compressed": { "content": [ [ [ { "id": "test" }, - "test.aln:md5,7cf7375f2ba360814ea978731838b972" + "test.aln.gz:md5,7cf7375f2ba360814ea978731838b972" ] ] ], - "timestamp": "2023-11-29T11:45:52.421966018" + "timestamp": "2024-02-09T19:09:25.819156831" }, "with_guide_tree_versions": { "content": [ @@ -39,6 +52,6 @@ "versions.yml:md5,7d9e0a8c263fa6d9017075fe88c9e9dc" ] ], - "timestamp": "2023-11-29T11:46:17.147997548" + "timestamp": "2024-02-09T19:10:05.231995851" } } \ No newline at end of file diff --git a/modules/nf-core/mafft/environment.yml b/modules/nf-core/mafft/environment.yml index fff5c9f3599..595252e08c0 100644 --- a/modules/nf-core/mafft/environment.yml +++ b/modules/nf-core/mafft/environment.yml @@ -5,3 +5,4 @@ channels: - defaults dependencies: - bioconda::mafft=7.520 + - conda-forge::pigz=2.8 diff --git a/modules/nf-core/mafft/main.nf b/modules/nf-core/mafft/main.nf index 43c32c98ba0..f09a0c96246 100644 --- a/modules/nf-core/mafft/main.nf +++ b/modules/nf-core/mafft/main.nf @@ -4,20 +4,21 @@ process MAFFT { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mafft:7.520--h031d066_3': - 'biocontainers/mafft:7.520--h031d066_3' }" + 'https://depot.galaxyproject.org/singularity/mulled-v2-12eba4a074f913c639117640936668f5a6a01da6:425707898cf4f85051b77848be253b88f1d2298a-0': + 'biocontainers/mulled-v2-12eba4a074f913c639117640936668f5a6a01da6:425707898cf4f85051b77848be253b88f1d2298a-0' }" input: - tuple val(meta), path(fasta) + tuple val(meta) , path(fasta) tuple val(meta2), path(add) tuple val(meta3), path(addfragments) tuple val(meta4), path(addfull) tuple val(meta5), path(addprofile) tuple val(meta6), path(addlong) + val(compress) output: - tuple val(meta), path("*.fas"), emit: fas - path "versions.yml" , emit: versions + tuple val(meta), path("*.fas{.gz,}"), emit: fas + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -25,11 +26,13 @@ process MAFFT { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def add = add ? "--add ${add}" : '' - def addfragments = addfragments ? "--addfragments ${addfragments}" : '' - def addfull = addfull ? "--addfull ${addfull}" : '' - def addprofile = addprofile ? "--addprofile ${addprofile}" : '' - def addlong = addlong ? "--addlong ${addlong}" : '' + def add = add ? "--add <(unpigz -cdf ${add})" : '' + def addfragments = addfragments ? "--addfragments <(unpigz -cdf ${addfragments})" : '' + def addfull = addfull ? "--addfull <(unpigz -cdf ${addfull})" : '' + def addprofile = addprofile ? "--addprofile <(unpigz -cdf ${addprofile})" : '' + def addlong = addlong ? "--addlong <(unpigz -cdf ${addlong})" : '' + def write_output = compress ? " | pigz -cp ${task.cpus} > ${prefix}.fas.gz" : "> ${prefix}.fas" + // this will not preserve MAFFTs return value, but mafft crashes when it receives a process substitution if ("$fasta" == "${prefix}.fas" ) error "Input and output names are the same, set prefix in module configuration to disambiguate!" """ mafft \\ @@ -41,11 +44,12 @@ process MAFFT { ${addlong} \\ ${args} \\ ${fasta} \\ - > ${prefix}.fas + ${write_output} cat <<-END_VERSIONS > versions.yml "${task.process}": mafft: \$(mafft --version 2>&1 | sed 's/^v//' | sed 's/ (.*)//') + pigz: \$(echo \$(pigz --version 2>&1) | sed 's/^.*pigz\\w*//' )) END_VERSIONS """ @@ -59,11 +63,12 @@ process MAFFT { def addlong = addlong ? "--addlong ${addlong}" : '' if ("$fasta" == "${prefix}.fas" ) error "Input and output names are the same, set prefix in module configuration to disambiguate!" """ - touch ${prefix}.fas + touch ${prefix}.fas${compress ? '.gz' : ''} cat <<-END_VERSIONS > versions.yml "${task.process}": mafft: \$(mafft --version 2>&1 | sed 's/^v//' | sed 's/ (.*)//') + pigz: \$(echo \$(pigz --version 2>&1) | sed 's/^.*pigz\\w*//' )) END_VERSIONS """ diff --git a/modules/nf-core/mafft/meta.yml b/modules/nf-core/mafft/meta.yml index fbe121aa11e..90b9ed39c32 100644 --- a/modules/nf-core/mafft/meta.yml +++ b/modules/nf-core/mafft/meta.yml @@ -5,13 +5,17 @@ keywords: - msa - multiple sequence alignment tools: - - mafft: + - "mafft": description: Multiple alignment program for amino acid or nucleotide sequences based on fast Fourier transform homepage: https://mafft.cbrc.jp/alignment/software/ documentation: https://mafft.cbrc.jp/alignment/software/manual/manual.html tool_dev_url: https://mafft.cbrc.jp/alignment/software/source.html doi: "10.1093/nar/gkf436" licence: ["BSD"] + - "pigz": + description: "Parallel implementation of the gzip algorithm." + homepage: "https://zlib.net/pigz/" + documentation: "https://zlib.net/pigz/pigz.pdf" input: - meta: type: map @@ -20,8 +24,8 @@ input: e.g. [ id:'test', single_end:false ] - fasta: type: file - description: FASTA file containing the sequences to align - pattern: "*.{fa,fasta}" + description: FASTA file containing the sequences to align. May be gzipped or uncompressed. + pattern: "*.{fa,fasta}{.gz,}" - meta2: type: map description: | @@ -29,8 +33,8 @@ input: e.g. [ id:'test', single_end:false ] - add: type: file - description: FASTA file containing sequences to align to the sequences in `fasta` using `--add` - pattern: "*.{fa,fasta}" + description: FASTA file containing sequences to align to the sequences in `fasta` using `--add`. May be gzipped or uncompressed. + pattern: "*.{fa,fasta}{.gz,}" - meta3: type: map description: | @@ -38,8 +42,8 @@ input: e.g. [ id:'test', single_end:false ] - addfragments: type: file - description: FASTA file containing sequences to align to the sequences in `fasta` using `--addfragments` - pattern: "*.{fa,fasta}" + description: FASTA file containing sequences to align to the sequences in `fasta` using `--addfragments`. May be gzipped or uncompressed. + pattern: "*.{fa,fasta}{.gz,}" - meta4: type: map description: | @@ -47,8 +51,8 @@ input: e.g. [ id:'test', single_end:false ] - addfull: type: file - description: FASTA file containing sequences to align to the sequences in `fasta` using `--addfull` - pattern: "*.{fa,fasta}" + description: FASTA file containing sequences to align to the sequences in `fasta` using `--addfull`. May be gzipped or uncompressed. + pattern: "*.{fa,fasta}{.gz,}" - meta5: type: map description: | @@ -56,8 +60,8 @@ input: e.g. [ id:'test', single_end:false ] - addprofile: type: file - description: FASTA file containing sequences to align to the sequences in `fasta` using `--addprofile` - pattern: "*.{fa,fasta}" + description: FASTA file containing sequences to align to the sequences in `fasta` using `--addprofile`. May be gzipped or uncompressed. + pattern: "*.{fa,fasta}{.gz,}" - meta6: type: map description: | @@ -65,8 +69,11 @@ input: e.g. [ id:'test', single_end:false ] - addlong: type: file - description: FASTA file containing sequences to align to the sequences in `fasta` using `--addlong` - pattern: "*.{fa,fasta}" + description: FASTA file containing sequences to align to the sequences in `fasta` using `--addlong`. May be gzipped or uncompressed. + pattern: "*.{fa,fasta}{.gz,}" + - compress: + type: boolean + description: Flag representing whether the output MSA should be compressed. Set to true to enable/false to disable compression. Compression is done using pigz, and is multithreaded. output: - meta: type: map @@ -79,8 +86,8 @@ output: pattern: "versions.yml" - fas: type: file - description: Aligned sequences in FASTA format - pattern: "*.{fas}" + description: Aligned sequences in FASTA format. May be gzipped or uncompressed. + pattern: "*.fas{.gz,}" authors: - "@MillironX" maintainers: diff --git a/modules/nf-core/mafft/tests/main.nf.test b/modules/nf-core/mafft/tests/main.nf.test index f622724e20e..f57ab4965da 100644 --- a/modules/nf-core/mafft/tests/main.nf.test +++ b/modules/nf-core/mafft/tests/main.nf.test @@ -7,7 +7,7 @@ nextflow_process { tag "modules_nfcore" tag "mafft" - test("SARS-CoV-2 scaffolds fasta") { + test("SARS-CoV-2 scaffolds fasta - uncompressed") { when { process { """ @@ -20,6 +20,7 @@ nextflow_process { input[3] = [[:], []] input[4] = [[:], []] input[5] = [[:], []] + input[6] = false """ } } @@ -27,7 +28,34 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert snapshot(process.out).match("SARS-CoV-2 scaffolds fasta")} + { assert snapshot(process.out).match("SARS-CoV-2 scaffolds fasta - uncompressed")} + ) + } + + } + + test("SARS-CoV-2 scaffolds fasta - compressed") { + when { + process { + """ + input[0] = [ + [ id:'test', single_end:false ], // meta map + file(params.test_data['sarscov2']['illumina']['scaffolds_fasta'], checkIfExists: true) + ] + input[1] = [[:], []] + input[2] = [[:], []] + input[3] = [[:], []] + input[4] = [[:], []] + input[5] = [[:], []] + input[6] = true + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match("SARS-CoV-2 scaffolds fasta - compressed")} ) } @@ -49,6 +77,7 @@ nextflow_process { input[3] = [[:], []] input[4] = [[:], []] input[5] = [[:], []] + input[6] = true """ } } @@ -78,6 +107,7 @@ nextflow_process { input[3] = [[:], []] input[4] = [[:], []] input[5] = [[:], []] + input[6] = true """ } } @@ -106,6 +136,7 @@ nextflow_process { ] input[4] = [[:], []] input[5] = [[:], []] + input[6] = true """ } } @@ -135,6 +166,7 @@ nextflow_process { file(params.test_data['sarscov2']['genome']['informative_sites_fas'], checkIfExists: true) ] input[5] = [[:], []] + input[6] = true """ } } @@ -165,6 +197,7 @@ nextflow_process { [ id:'test', single_end:false ], // meta map file(params.test_data['sarscov2']['genome']['informative_sites_fas'], checkIfExists: true) ] + input[6] = true """ } } @@ -198,6 +231,7 @@ nextflow_process { input[3] = [[:], []] input[4] = [[:], []] input[5] = [[:], []] + input[6] = true """ } } @@ -211,4 +245,4 @@ nextflow_process { } -} +} \ No newline at end of file diff --git a/modules/nf-core/mafft/tests/main.nf.test.snap b/modules/nf-core/mafft/tests/main.nf.test.snap index ebf00f7d393..c14ad086fa1 100644 --- a/modules/nf-core/mafft/tests/main.nf.test.snap +++ b/modules/nf-core/mafft/tests/main.nf.test.snap @@ -1,5 +1,5 @@ { - "SARS-CoV-2 scaffolds fasta": { + "SARS-CoV-2 scaffolds fasta - uncompressed": { "content": [ { "0": [ @@ -12,7 +12,7 @@ ] ], "1": [ - "versions.yml:md5,b505d3a7cf3cd5d0e6a5e4d2944df9b7" + "versions.yml:md5,6e930f6a5acc19ff3a7849536a9fd0ee" ], "fas": [ [ @@ -24,137 +24,13 @@ ] ], "versions": [ - "versions.yml:md5,b505d3a7cf3cd5d0e6a5e4d2944df9b7" + "versions.yml:md5,6e930f6a5acc19ff3a7849536a9fd0ee" ] } ], - "timestamp": "2023-11-17T15:31:46.463322875" + "timestamp": "2024-02-09T19:08:41.735774847" }, - "SARS-CoV-2 scaffolds fasta - add informative sites fasta normal": { - "content": [ - { - "0": [ - [ - { - "id": "test", - "single_end": false - }, - "test.fas:md5,a57a34f1c566dea114dc1b13416536d4" - ] - ], - "1": [ - "versions.yml:md5,b505d3a7cf3cd5d0e6a5e4d2944df9b7" - ], - "fas": [ - [ - { - "id": "test", - "single_end": false - }, - "test.fas:md5,a57a34f1c566dea114dc1b13416536d4" - ] - ], - "versions": [ - "versions.yml:md5,b505d3a7cf3cd5d0e6a5e4d2944df9b7" - ] - } - ], - "timestamp": "2023-11-17T15:39:41.739290654" - }, - "SARS-CoV-2 scaffolds fasta - add informative sites fasta long": { - "content": [ - { - "0": [ - [ - { - "id": "test", - "single_end": false - }, - "test.fas:md5,e8868da70d1f3050a8daaee0e53b2fd9" - ] - ], - "1": [ - "versions.yml:md5,b505d3a7cf3cd5d0e6a5e4d2944df9b7" - ], - "fas": [ - [ - { - "id": "test", - "single_end": false - }, - "test.fas:md5,e8868da70d1f3050a8daaee0e53b2fd9" - ] - ], - "versions": [ - "versions.yml:md5,b505d3a7cf3cd5d0e6a5e4d2944df9b7" - ] - } - ], - "timestamp": "2023-11-17T15:40:47.75559372" - }, - "SARS-CoV-2 scaffolds fasta - add informative sites fasta profile": { - "content": [ - { - "0": [ - [ - { - "id": "test", - "single_end": false - }, - "test.fas:md5,c2b5caf39beff4473878e6aa4036ad43" - ] - ], - "1": [ - "versions.yml:md5,b505d3a7cf3cd5d0e6a5e4d2944df9b7" - ], - "fas": [ - [ - { - "id": "test", - "single_end": false - }, - "test.fas:md5,c2b5caf39beff4473878e6aa4036ad43" - ] - ], - "versions": [ - "versions.yml:md5,b505d3a7cf3cd5d0e6a5e4d2944df9b7" - ] - } - ], - "timestamp": "2023-11-17T15:40:33.934616343" - }, - "SARS-CoV-2 scaffolds fasta - add informative sites fasta fragments": { - "content": [ - { - "0": [ - [ - { - "id": "test", - "single_end": false - }, - "test.fas:md5,aed7f866c3a20dc9d2f2b4ad73515961" - ] - ], - "1": [ - "versions.yml:md5,b505d3a7cf3cd5d0e6a5e4d2944df9b7" - ], - "fas": [ - [ - { - "id": "test", - "single_end": false - }, - "test.fas:md5,aed7f866c3a20dc9d2f2b4ad73515961" - ] - ], - "versions": [ - "versions.yml:md5,b505d3a7cf3cd5d0e6a5e4d2944df9b7" - ] - } - ], - "timestamp": "2023-11-17T15:40:05.528308049" - }, - "SARS-CoV-2 scaffolds fasta - add informative sites fasta full": { + "SARS-CoV-2 scaffolds fasta - add informative sites fasta multiple": { "content": [ { "0": [ @@ -163,11 +39,11 @@ "id": "test", "single_end": false }, - "test.fas:md5,611cb0a65195a282f110f7f56e310c66" + "test.fas.gz:md5,aed7f866c3a20dc9d2f2b4ad73515961" ] ], "1": [ - "versions.yml:md5,b505d3a7cf3cd5d0e6a5e4d2944df9b7" + "versions.yml:md5,6e930f6a5acc19ff3a7849536a9fd0ee" ], "fas": [ [ @@ -175,17 +51,17 @@ "id": "test", "single_end": false }, - "test.fas:md5,611cb0a65195a282f110f7f56e310c66" + "test.fas.gz:md5,aed7f866c3a20dc9d2f2b4ad73515961" ] ], "versions": [ - "versions.yml:md5,b505d3a7cf3cd5d0e6a5e4d2944df9b7" + "versions.yml:md5,6e930f6a5acc19ff3a7849536a9fd0ee" ] } ], - "timestamp": "2023-11-17T15:40:20.088737531" + "timestamp": "2024-02-09T19:10:38.940555785" }, - "SARS-CoV-2 scaffolds fasta - add full": { + "SARS-CoV-2 scaffolds fasta - add informative sites fasta normal": { "content": [ { "0": [ @@ -194,11 +70,11 @@ "id": "test", "single_end": false }, - "test.fas:md5,611cb0a65195a282f110f7f56e310c66" + "test.fas.gz:md5,a57a34f1c566dea114dc1b13416536d4" ] ], "1": [ - "versions.yml:md5,b505d3a7cf3cd5d0e6a5e4d2944df9b7" + "versions.yml:md5,6e930f6a5acc19ff3a7849536a9fd0ee" ], "fas": [ [ @@ -206,17 +82,17 @@ "id": "test", "single_end": false }, - "test.fas:md5,611cb0a65195a282f110f7f56e310c66" + "test.fas.gz:md5,a57a34f1c566dea114dc1b13416536d4" ] ], "versions": [ - "versions.yml:md5,b505d3a7cf3cd5d0e6a5e4d2944df9b7" + "versions.yml:md5,6e930f6a5acc19ff3a7849536a9fd0ee" ] } ], - "timestamp": "2023-11-17T15:36:41.023632311" + "timestamp": "2024-02-09T19:09:35.656248409" }, - "SARS-CoV-2 scaffolds fasta - add fragments": { + "SARS-CoV-2 scaffolds fasta - add informative sites fasta long": { "content": [ { "0": [ @@ -225,11 +101,11 @@ "id": "test", "single_end": false }, - "test.fas:md5,aed7f866c3a20dc9d2f2b4ad73515961" + "test.fas.gz:md5,e8868da70d1f3050a8daaee0e53b2fd9" ] ], "1": [ - "versions.yml:md5,b505d3a7cf3cd5d0e6a5e4d2944df9b7" + "versions.yml:md5,6e930f6a5acc19ff3a7849536a9fd0ee" ], "fas": [ [ @@ -237,17 +113,17 @@ "id": "test", "single_end": false }, - "test.fas:md5,aed7f866c3a20dc9d2f2b4ad73515961" + "test.fas.gz:md5,e8868da70d1f3050a8daaee0e53b2fd9" ] ], "versions": [ - "versions.yml:md5,b505d3a7cf3cd5d0e6a5e4d2944df9b7" + "versions.yml:md5,6e930f6a5acc19ff3a7849536a9fd0ee" ] } ], - "timestamp": "2023-11-17T15:36:20.211074168" + "timestamp": "2024-02-09T19:10:26.372655394" }, - "SARS-CoV-2 scaffolds fasta - add normal": { + "SARS-CoV-2 scaffolds fasta - add informative sites fasta profile": { "content": [ { "0": [ @@ -256,11 +132,11 @@ "id": "test", "single_end": false }, - "test.fas:md5,a57a34f1c566dea114dc1b13416536d4" + "test.fas.gz:md5,c2b5caf39beff4473878e6aa4036ad43" ] ], "1": [ - "versions.yml:md5,b505d3a7cf3cd5d0e6a5e4d2944df9b7" + "versions.yml:md5,6e930f6a5acc19ff3a7849536a9fd0ee" ], "fas": [ [ @@ -268,17 +144,17 @@ "id": "test", "single_end": false }, - "test.fas:md5,a57a34f1c566dea114dc1b13416536d4" + "test.fas.gz:md5,c2b5caf39beff4473878e6aa4036ad43" ] ], "versions": [ - "versions.yml:md5,b505d3a7cf3cd5d0e6a5e4d2944df9b7" + "versions.yml:md5,6e930f6a5acc19ff3a7849536a9fd0ee" ] } ], - "timestamp": "2023-11-17T15:36:06.708296829" + "timestamp": "2024-02-09T19:10:14.039053212" }, - "SARS-CoV-2 scaffolds fasta - add informative sites fasta multiple": { + "SARS-CoV-2 scaffolds fasta - add informative sites fasta fragments": { "content": [ { "0": [ @@ -287,11 +163,11 @@ "id": "test", "single_end": false }, - "test.fas:md5,aed7f866c3a20dc9d2f2b4ad73515961" + "test.fas.gz:md5,aed7f866c3a20dc9d2f2b4ad73515961" ] ], "1": [ - "versions.yml:md5,b505d3a7cf3cd5d0e6a5e4d2944df9b7" + "versions.yml:md5,6e930f6a5acc19ff3a7849536a9fd0ee" ], "fas": [ [ @@ -299,17 +175,17 @@ "id": "test", "single_end": false }, - "test.fas:md5,aed7f866c3a20dc9d2f2b4ad73515961" + "test.fas.gz:md5,aed7f866c3a20dc9d2f2b4ad73515961" ] ], "versions": [ - "versions.yml:md5,b505d3a7cf3cd5d0e6a5e4d2944df9b7" + "versions.yml:md5,6e930f6a5acc19ff3a7849536a9fd0ee" ] } ], - "timestamp": "2023-11-17T15:44:19.543603564" + "timestamp": "2024-02-09T19:09:49.737364197" }, - "SARS-CoV-2 scaffolds fasta - add profile": { + "SARS-CoV-2 scaffolds fasta - add informative sites fasta full": { "content": [ { "0": [ @@ -318,11 +194,11 @@ "id": "test", "single_end": false }, - "test.fas:md5,c2b5caf39beff4473878e6aa4036ad43" + "test.fas.gz:md5,611cb0a65195a282f110f7f56e310c66" ] ], "1": [ - "versions.yml:md5,b505d3a7cf3cd5d0e6a5e4d2944df9b7" + "versions.yml:md5,6e930f6a5acc19ff3a7849536a9fd0ee" ], "fas": [ [ @@ -330,17 +206,17 @@ "id": "test", "single_end": false }, - "test.fas:md5,c2b5caf39beff4473878e6aa4036ad43" + "test.fas.gz:md5,611cb0a65195a282f110f7f56e310c66" ] ], "versions": [ - "versions.yml:md5,b505d3a7cf3cd5d0e6a5e4d2944df9b7" + "versions.yml:md5,6e930f6a5acc19ff3a7849536a9fd0ee" ] } ], - "timestamp": "2023-11-17T15:36:53.336352899" + "timestamp": "2024-02-09T19:10:02.952480822" }, - "SARS-CoV-2 scaffolds fasta - add long": { + "SARS-CoV-2 scaffolds fasta - compressed": { "content": [ { "0": [ @@ -349,11 +225,11 @@ "id": "test", "single_end": false }, - "test.fas:md5,e8868da70d1f3050a8daaee0e53b2fd9" + "test.fas.gz:md5,23426611f4a0df532b6708f072bd445b" ] ], "1": [ - "versions.yml:md5,b505d3a7cf3cd5d0e6a5e4d2944df9b7" + "versions.yml:md5,6e930f6a5acc19ff3a7849536a9fd0ee" ], "fas": [ [ @@ -361,14 +237,14 @@ "id": "test", "single_end": false }, - "test.fas:md5,e8868da70d1f3050a8daaee0e53b2fd9" + "test.fas.gz:md5,23426611f4a0df532b6708f072bd445b" ] ], "versions": [ - "versions.yml:md5,b505d3a7cf3cd5d0e6a5e4d2944df9b7" + "versions.yml:md5,6e930f6a5acc19ff3a7849536a9fd0ee" ] } ], - "timestamp": "2023-11-17T15:37:15.058221937" + "timestamp": "2024-02-09T19:09:21.096197597" } -} +} \ No newline at end of file diff --git a/modules/nf-core/mtmalign/align/environment.yml b/modules/nf-core/mtmalign/align/environment.yml index 1cc49c894d5..59d426bb597 100644 --- a/modules/nf-core/mtmalign/align/environment.yml +++ b/modules/nf-core/mtmalign/align/environment.yml @@ -5,3 +5,4 @@ channels: - defaults dependencies: - bioconda::mtm-align=20220104 + - conda-forge::pigz=2.8 diff --git a/modules/nf-core/mtmalign/align/main.nf b/modules/nf-core/mtmalign/align/main.nf index 40254dea85d..c6ad07b7c6d 100644 --- a/modules/nf-core/mtmalign/align/main.nf +++ b/modules/nf-core/mtmalign/align/main.nf @@ -6,16 +6,17 @@ process MTMALIGN_ALIGN { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/mtm-align:20220104--h4ac6f70_0': - 'biocontainers/mtm-align:20220104--h4ac6f70_0' }" + 'https://depot.galaxyproject.org/singularity/mulled-v2-5bcf71dc66dac33d8e003c5e78043b80f5c7f269:8f0e486d46f7ab38892c1a8f78d2894a549d03b5-0': + 'biocontainers/mulled-v2-5bcf71dc66dac33d8e003c5e78043b80f5c7f269:8f0e486d46f7ab38892c1a8f78d2894a549d03b5-0' }" input: tuple val(meta), path('*.pdb', arity: '2..*') + val(compress) output: - tuple val(meta), path("./mTM_result/${prefix}.aln") , emit: alignment - tuple val(meta), path("./mTM_result/${prefix}.pdb") , emit: structure - path "versions.yml" , emit: versions + tuple val(meta), path("./mTM_result/${prefix}.aln${compress ? '.gz' : ''}"), emit: alignment + tuple val(meta), path("./mTM_result/${prefix}.pdb${compress ? '.gz' : ''}"), emit: structure + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -23,16 +24,32 @@ process MTMALIGN_ALIGN { script: def args = task.ext.args ?: '' prefix = task.ext.prefix ?: "${meta.id}" + // mTMalign is not capable of writing to stdout + // if -o /dev/stdout is specified, the output file will be polluted with debug messages emitted by mTMalign """ + # decompress input files if required + if ls ./*.pdb.gz 2&> /dev/null; then # check if any files are compressed; calling unpigz with an empty arg will cause it to panic + unpigz -d ./*.pdb.gz + fi + + # construct input file for mtmalign ls *.pdb | sed s/\\ /\\n/ > input_list.txt + mtm-align -i input_list.txt -o ${prefix}.pdb # -o does not affect the fasta naming, so move it to the new name mv ./mTM_result/result.fasta ./mTM_result/${prefix}.aln + # compress both output files + if ${compress}; then + pigz -p ${task.cpus} ./mTM_result/${prefix}.aln ./mTM_result/${prefix}.pdb + fi + tree + # mtm-align -v prints the wrong version 20180725, so extract it from the cosmetic output in the help message cat <<-END_VERSIONS > versions.yml "${task.process}": mTM-align: \$( mtm-align -h | grep -e "\\(Version [[:digit:]]*\\)" | grep -oe "[[:digit:]]*" ) + pigz: \$(echo \$(pigz --version 2>&1) | sed 's/^.*pigz\\w*//' )) END_VERSIONS """ @@ -41,13 +58,14 @@ process MTMALIGN_ALIGN { prefix = task.ext.prefix ?: "${meta.id}" """ mkdir mTM_result - touch mTM_result/${prefix}.aln - touch mTM_result/${prefix}.pdb + touch mTM_result/${prefix}.aln${compress ? '.gz' : ''} + touch mTM_result/${prefix}.pdb${compress ? '.gz' : ''} # mtm-align -v prints the wrong version 20180725, so extract it from the cosmetic output in the help message cat <<-END_VERSIONS > versions.yml "${task.process}": mTM-align: \$( mtm-align -h | grep -e "\\(Version [[:digit:]]*\\)" | grep -oe "[[:digit:]]*" ) + pigz: \$(echo \$(pigz --version 2>&1) | sed 's/^.*pigz\\w*//' )) END_VERSIONS """ } diff --git a/modules/nf-core/mtmalign/align/meta.yml b/modules/nf-core/mtmalign/align/meta.yml index 9c4b783649e..1e444e1c6ba 100644 --- a/modules/nf-core/mtmalign/align/meta.yml +++ b/modules/nf-core/mtmalign/align/meta.yml @@ -15,6 +15,10 @@ tools: tool_dev_url: "http://yanglab.nankai.edu.cn/mTM-align/" doi: "10.1093/bioinformatics/btx828" licence: ["None"] + - "pigz": + description: "Parallel implementation of the gzip algorithm." + homepage: "https://zlib.net/pigz/" + documentation: "https://zlib.net/pigz/pigz.pdf" input: - meta: type: map @@ -23,9 +27,12 @@ input: e.g. `[ id:'test']` - pdbs: type: file - description: Input protein structures in PDB format. + description: Input protein structures in PDB format. Files may be gzipped or uncompressed. They should contain exactly one chain! pattern: "*.{pdb}" + - compress: + type: boolean + description: Flag representing whether the output MSA should be compressed. Set to true to enable/false to disable compression. Compression is done using pigz, and is multithreaded. output: - meta: type: map @@ -34,12 +41,12 @@ output: e.g. `[ id:'test']` - alignment: type: file - description: Alignment in FASTA format. - pattern: "*.aln" + description: Alignment in FASTA format. May be gzipped or uncompressed. + pattern: "*.aln{.gz,}" - structure: type: file - description: Overlaid structures in PDB format. - pattern: "${prefix}.pdb" + description: Overlaid structures in PDB format. May be gzipped or uncompressed. + pattern: "${prefix}.pdb{.gz,}" - versions: type: file description: File containing software versions diff --git a/modules/nf-core/mtmalign/align/tests/main.nf.test b/modules/nf-core/mtmalign/align/tests/main.nf.test index d3bd9d444f8..cb3f3885b67 100644 --- a/modules/nf-core/mtmalign/align/tests/main.nf.test +++ b/modules/nf-core/mtmalign/align/tests/main.nf.test @@ -7,8 +7,9 @@ nextflow_process { tag "modules_nfcore" tag "mtmalign" tag "mtmalign/align" + tag "untar" - test("Should run without failures") { + test("Test on seatoxin dataset - uncompressed") { setup { run("UNTAR") { @@ -28,6 +29,7 @@ nextflow_process { process { """ input[0] = UNTAR.out.untar.map { meta,dir -> [[ id:'test' ], file(dir).listFiles().collect()]} + input[1] = false """ } } @@ -36,13 +38,53 @@ nextflow_process { assertAll( { assert process.success }, // mTMalign may be nondeterministic, just check if the pdbs are all in there - //assert snapshot(process.out).match() - { assert file(process.out.alignment[0][1]).text.contains(">1.pdb") }, - { assert file(process.out.alignment[0][1]).text.contains(">2.pdb") }, - { assert file(process.out.alignment[0][1]).text.contains(">3.pdb") }, - { assert file(process.out.alignment[0][1]).text.contains(">4.pdb") }, - { assert file(process.out.alignment[0][1]).text.contains(">5.pdb") }, - { assert snapshot(process.out.versions).match("versions") } + //{ assert snapshot(process.out).match() } + { assert path(process.out.alignment[0][1]).getText().contains(">1.pdb") }, + { assert path(process.out.alignment[0][1]).getText().contains(">2.pdb") }, + { assert path(process.out.alignment[0][1]).getText().contains(">3.pdb") }, + { assert path(process.out.alignment[0][1]).getText().contains(">4.pdb") }, + { assert path(process.out.alignment[0][1]).getText().contains(">5.pdb") }, + { assert snapshot(process.out.versions).match("versions0") } + ) + } + } + + test("Test on seatoxin dataset - compressed") { + setup { + + run("UNTAR") { + script "../../../../../modules/nf-core/untar/main.nf" + process { + """ + archive = file("https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/structures/seatoxin-ref.tar.gz", checkIfExists: true) + input[0] = Channel.of(tuple([id:'test'], archive)) + """ + } + } + } + + when { + params { + } + process { + """ + input[0] = UNTAR.out.untar.map { meta,dir -> [[ id:'test' ], file(dir).listFiles().collect()]} + input[1] = true + """ + } + } + + then { + assertAll( + { assert process.success }, + // mTMalign may be nondeterministic, just check if the pdbs are all in there + //{ assert snapshot(process.out).match() } + { assert path(process.out.alignment[0][1]).getTextGzip().contains(">1.pdb") }, + { assert path(process.out.alignment[0][1]).getTextGzip().contains(">2.pdb") }, + { assert path(process.out.alignment[0][1]).getTextGzip().contains(">3.pdb") }, + { assert path(process.out.alignment[0][1]).getTextGzip().contains(">4.pdb") }, + { assert path(process.out.alignment[0][1]).getTextGzip().contains(">5.pdb") }, + { assert snapshot(process.out.versions).match("versions1") } ) } } diff --git a/modules/nf-core/mtmalign/align/tests/main.nf.test.snap b/modules/nf-core/mtmalign/align/tests/main.nf.test.snap index d9d6c9bed5b..eb321457474 100644 --- a/modules/nf-core/mtmalign/align/tests/main.nf.test.snap +++ b/modules/nf-core/mtmalign/align/tests/main.nf.test.snap @@ -1,47 +1,10 @@ { - "Should run without failures": { + "versions": { "content": [ - { - "0": [ - [ - { - "id": "test" - }, - "result.fasta:md5,8400950485c61b741f06c9b936bd2863" - ] - ], - "1": [ - [ - { - "id": "test" - }, - "result.pdb:md5,53aeaeae0bd1c0c2c887c4215ec44f5e" - ] - ], - "2": [ - "versions.yml:md5,4b618e4e98abf72feabb3778d3c39105" - ], - "alignment": [ - [ - { - "id": "test" - }, - "result.fasta:md5,8400950485c61b741f06c9b936bd2863" - ] - ], - "structure": [ - [ - { - "id": "test" - }, - "result.pdb:md5,53aeaeae0bd1c0c2c887c4215ec44f5e" - ] - ], - "versions": [ - "versions.yml:md5,4b618e4e98abf72feabb3778d3c39105" - ] - } + [ + "versions.yml:md5,7cbacec15bb9e0c8cbb27610bde74c10" + ] ], - "timestamp": "2023-12-04T17:24:24.975306332" + "timestamp": "2024-01-25T18:21:22.385207003" } } \ No newline at end of file diff --git a/modules/nf-core/muscle5/super5/environment.yml b/modules/nf-core/muscle5/super5/environment.yml index f2c60f16d00..fbaf4a2ef9f 100644 --- a/modules/nf-core/muscle5/super5/environment.yml +++ b/modules/nf-core/muscle5/super5/environment.yml @@ -5,3 +5,4 @@ channels: - defaults dependencies: - bioconda::muscle=5.1 + - conda-forge::pigz=2.8 diff --git a/modules/nf-core/muscle5/super5/main.nf b/modules/nf-core/muscle5/super5/main.nf index 0f58be9461c..87af149b605 100644 --- a/modules/nf-core/muscle5/super5/main.nf +++ b/modules/nf-core/muscle5/super5/main.nf @@ -3,15 +3,16 @@ process MUSCLE5_SUPER5 { label 'process_medium' conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/muscle:5.1--h9f5acd7_1': - 'biocontainers/muscle:5.1--h9f5acd7_1' }" + 'https://depot.galaxyproject.org/singularity/mulled-v2-8eb01a3c2755c935d070dd03ff2dee698eeb4466:ceb6e65e00346ed20d0d8078dddf9858a7af0fe2-0': + 'biocontainers/mulled-v2-8eb01a3c2755c935d070dd03ff2dee698eeb4466:ceb6e65e00346ed20d0d8078dddf9858a7af0fe2-0' }" input: tuple val(meta), path(fasta) + val(compress) output: - tuple val(meta), path("*.aln"), emit: alignment - path "versions.yml" , emit: versions + tuple val(meta), path("*.aln{.gz,}"), emit: alignment + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -20,16 +21,30 @@ process MUSCLE5_SUPER5 { def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" prefix = args.contains('-perm all') ? "${prefix}@" : "${prefix}" + def write_output = (compress && !args.contains('-perm all')) ? " -output >(pigz -cp ${task.cpus} > ${prefix}.aln.gz)" : "-output ${prefix}.aln" + // muscle internally expands the shell pipe to a file descriptor of the form /dev/fd/ + // this causes it to fail, unless -output is left at the end of the call + // see also clustalo/align + // using >() is necessary to preserve the return value, + // so nextflow knows to display an error when it failed """ muscle \\ -super5 ${fasta} \\ - -output ${prefix}.aln \\ ${args} \\ - -threads ${task.cpus} + -threads ${task.cpus} \\ + $write_output + + + # output may be multiple files if -perm all is set + # compress these individually if set to compress output + if ${args.contains('-perm all') && compress}; then + pigz -p ${task.cpus} *.aln + fi cat <<-END_VERSIONS > versions.yml "${task.process}": muscle: \$(muscle -version | head -n 1 | cut -d ' ' -f 2 | sed 's/.linux64//') + pigz: \$(echo \$(pigz --version 2>&1) | sed 's/^.*pigz\\w*//' )) END_VERSIONS """ @@ -37,11 +52,12 @@ process MUSCLE5_SUPER5 { def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" """ - touch ${prefix}.aln + touch ${prefix}.aln${compress ? '.gz' : ''} cat <<-END_VERSIONS > versions.yml "${task.process}": muscle: \$(muscle -version | head -n 1 | cut -d ' ' -f 2 | sed 's/.linux64//') + pigz: \$(echo \$(pigz --version 2>&1) | sed 's/^.*pigz\\w*//' )) END_VERSIONS """ } diff --git a/modules/nf-core/muscle5/super5/meta.yml b/modules/nf-core/muscle5/super5/meta.yml index a789aab0951..057128dc855 100644 --- a/modules/nf-core/muscle5/super5/meta.yml +++ b/modules/nf-core/muscle5/super5/meta.yml @@ -11,6 +11,10 @@ tools: documentation: "https://drive5.com/muscle5/manual/" doi: "10.1101/2021.06.20.449169" licence: ["Public Domain"] + - "pigz": + description: "Parallel implementation of the gzip algorithm." + homepage: "https://zlib.net/pigz/" + documentation: "https://zlib.net/pigz/pigz.pdf" input: - meta: type: map @@ -21,6 +25,9 @@ input: type: file description: Input sequences for alignment must be in FASTA format pattern: "*.{fasta,fa,fna}" + - compress: + type: boolean + description: Flag representing whether the output MSA should be compressed. Set to true to enable/false to disable compression. Compression is done using pigz, and is multithreaded. output: - meta: type: map @@ -33,11 +40,12 @@ output: pattern: "versions.yml" - alignment: type: file - description: Multiple sequence alignment produced in FASTA format - pattern: "*.{aln}" + description: Multiple sequence alignment produced in gzipped FASTA format. If '-perm all' is passed in ext.args, this will be multiple files per input! + pattern: "*.{aln.gz}" authors: - "@alessiovignoli" - "@JoseEspinosa" maintainers: - "@alessiovignoli" - "@JoseEspinosa" + - "@lrauschning" diff --git a/modules/nf-core/muscle5/super5/tests/main.nf.test b/modules/nf-core/muscle5/super5/tests/main.nf.test index 50821a7f86c..c1541208bcd 100644 --- a/modules/nf-core/muscle5/super5/tests/main.nf.test +++ b/modules/nf-core/muscle5/super5/tests/main.nf.test @@ -1,36 +1,72 @@ nextflow_process { name "Test Process MUSCLE5_SUPER5" - script "../main.nf" - process "MUSCLE5_SUPER5" - config "./nextflow.config" + script "../main.nf" + process "MUSCLE5_SUPER5" + config "./nextflow.config" - tag "modules" - tag "modules_nfcore" - tag "muscle5" - tag "muscle5/super5" + tag "modules" + tag "modules_nfcore" + tag "muscle5" + tag "muscle5/super5" - test("sarscov2 - fasta") { + test("fasta - align_sequence - uncompressed") { + when { + process { + """ + input[0] = [ [ id:'test' ], + file("https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/setoxin-ref.fa", checkIfExists: true) + ] + input[1] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.alignment).match("alignment - uncompressed")}, + ) + } + } - when { - process { - """ - input[0] = [[ id:'test' ], - fasta = [ file(params.test_data['sarscov2']['genome']['proteome_fasta'], checkIfExists: true) ] - ] - """ + test("fasta - align_sequence - compressed") { + when { + process { + """ + input[0] = [ [ id:'test' ], + file("https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/setoxin-ref.fa", checkIfExists: true) + ] + input[1] = true + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.alignment).match("alignment - compressed")}, + ) } } + test("fasta - align_sequence - compressed - perm_all") { + config "./perm_all.config" + when { + process { + """ + input[0] = [ [ id:'test' ], + file("https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/setoxin-ref.fa", checkIfExists: true) + ] + input[1] = true + """ + } + } - then { - assertAll( - { assert process.success }, - { assert snapshot(process.out.alignment[0][1][0]).match("aln0")}, - { assert snapshot(process.out.alignment[0][1][1]).match("aln1")}, - { assert snapshot(process.out.alignment[0][1][2]).match("aln2")}, - { assert snapshot(process.out.alignment[0][1][3]).match("aln3")}, - { assert snapshot(process.out.versions).match("versions") } - ) + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.alignment).match("perm-all")}, + ) + } } - } -} +} \ No newline at end of file diff --git a/modules/nf-core/muscle5/super5/tests/main.nf.test.snap b/modules/nf-core/muscle5/super5/tests/main.nf.test.snap index ba665414273..ce7aadf0575 100644 --- a/modules/nf-core/muscle5/super5/tests/main.nf.test.snap +++ b/modules/nf-core/muscle5/super5/tests/main.nf.test.snap @@ -1,34 +1,46 @@ { - "aln0": { + "alignment - compressed": { "content": [ - "testabc.3.aln:md5,006a173438952c838215cd6cc9b42a3d" - ], - "timestamp": "2023-11-28T18:01:32.513414439" - }, - "aln2": { - "content": [ - "testbca.3.aln:md5,00496f951481d58bf23bb7690057c59f" - ], - "timestamp": "2023-11-28T18:01:32.536803426" - }, - "aln1": { - "content": [ - "testacb.3.aln:md5,fc94bb7d0b0308537fa0997c06c5a86f" + [ + [ + { + "id": "test" + }, + "test.aln.gz:md5,46ba556df08f7aabbe5e1ba31d226b6a" + ] + ] ], - "timestamp": "2023-11-28T18:01:32.52911204" + "timestamp": "2024-02-09T19:08:23.498404397" }, - "versions": { + "perm-all": { "content": [ [ - "versions.yml:md5,eccf3d4f3b7ffc7d1bacb74c4d20304e" + [ + { + "id": "test" + }, + [ + "testabc.0.aln.gz:md5,46ba556df08f7aabbe5e1ba31d226b6a", + "testacb.0.aln.gz:md5,46ba556df08f7aabbe5e1ba31d226b6a", + "testbca.0.aln.gz:md5,46ba556df08f7aabbe5e1ba31d226b6a", + "testnone.0.aln.gz:md5,46ba556df08f7aabbe5e1ba31d226b6a" + ] + ] ] ], - "timestamp": "2023-11-28T17:57:05.227355467" + "timestamp": "2024-02-09T19:08:37.386512953" }, - "aln3": { + "alignment - uncompressed": { "content": [ - "testnone.3.aln:md5,958666a45eabba897417fc2ade08a21b" + [ + [ + { + "id": "test" + }, + "test.aln:md5,46ba556df08f7aabbe5e1ba31d226b6a" + ] + ] ], - "timestamp": "2023-11-28T18:01:32.544996959" + "timestamp": "2024-02-09T19:16:25.330353817" } } \ No newline at end of file diff --git a/modules/nf-core/muscle5/super5/tests/nextflow.config b/modules/nf-core/muscle5/super5/tests/nextflow.config index f6192f874cd..e69de29bb2d 100644 --- a/modules/nf-core/muscle5/super5/tests/nextflow.config +++ b/modules/nf-core/muscle5/super5/tests/nextflow.config @@ -1,3 +0,0 @@ -process { - ext.args = '-perturb 3 -perm all' -} diff --git a/modules/nf-core/muscle5/super5/tests/perm_all.config b/modules/nf-core/muscle5/super5/tests/perm_all.config new file mode 100644 index 00000000000..d3502716003 --- /dev/null +++ b/modules/nf-core/muscle5/super5/tests/perm_all.config @@ -0,0 +1,3 @@ +process { + ext.args = { "-perm all" } +} diff --git a/modules/nf-core/tcoffee/align/environment.yml b/modules/nf-core/tcoffee/align/environment.yml index 6f63405d47e..28f159fd459 100644 --- a/modules/nf-core/tcoffee/align/environment.yml +++ b/modules/nf-core/tcoffee/align/environment.yml @@ -5,3 +5,4 @@ channels: - defaults dependencies: - bioconda::t-coffee=13.46.0.919e8c6b + - conda-forge::pigz=2.8 diff --git a/modules/nf-core/tcoffee/align/main.nf b/modules/nf-core/tcoffee/align/main.nf index b09a819fb4d..671aca8eacc 100644 --- a/modules/nf-core/tcoffee/align/main.nf +++ b/modules/nf-core/tcoffee/align/main.nf @@ -4,26 +4,30 @@ process TCOFFEE_ALIGN { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/t-coffee:13.46.0.919e8c6b--hfc96bf3_0': - 'biocontainers/t-coffee:13.46.0.919e8c6b--hfc96bf3_0' }" + 'https://depot.galaxyproject.org/singularity/mulled-v2-a76a981c07359a31ff55b9dc13bd3da5ce1909c1:84c8f17f1259b49e2f7783b95b7a89c6f2cb199e-0': + 'biocontainers/mulled-v2-a76a981c07359a31ff55b9dc13bd3da5ce1909c1:84c8f17f1259b49e2f7783b95b7a89c6f2cb199e-0' }" input: tuple val(meta) , path(fasta) tuple val(meta2), path(tree) tuple val(meta3), path(template), path(accessory_informations) + val(compress) output: - tuple val (meta), path ("*.aln"), emit: alignment - path "versions.yml" , emit: versions + tuple val(meta), path("*.aln{.gz,}"), emit: alignment + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when script: def args = task.ext.args ?: '' - prefix = task.ext.prefix ?: "${meta.id}" + def prefix = task.ext.prefix ?: "${meta.id}" def tree_args = tree ? "-usetree $tree" : "" def template_args = template ? "-template_file $template" : "" + def write_output = compress ? " >(pigz -cp ${task.cpus} > ${prefix}.aln.gz)" : "> ${prefix}.aln" + // using >() is necessary to preserve the tcoffee return value, + // so nextflow knows to display an error when it failed """ export TEMP='./' t_coffee -seq ${fasta} \ @@ -31,23 +35,25 @@ process TCOFFEE_ALIGN { $template_args \ $args \ -thread ${task.cpus} \ - -outfile ${prefix}.aln - + -outfile stdout \ + $write_output cat <<-END_VERSIONS > versions.yml "${task.process}": tcoffee: \$( t_coffee -version | awk '{gsub("Version_", ""); print \$3}') + pigz: \$(echo \$(pigz --version 2>&1) | sed 's/^.*pigz\\w*//' )) END_VERSIONS """ stub: def prefix = task.ext.prefix ?: "${meta.id}" """ - touch ${prefix}.aln + touch ${prefix}.aln${compress ? '.gz':''} cat <<-END_VERSIONS > versions.yml "${task.process}": tcoffee: \$( t_coffee -version | awk '{gsub("Version_", ""); print \$3}') + pigz: \$(echo \$(pigz --version 2>&1) | sed 's/^.*pigz\\w*//' )) END_VERSIONS """ } diff --git a/modules/nf-core/tcoffee/align/meta.yml b/modules/nf-core/tcoffee/align/meta.yml index 531aa3e4b04..6cfcc72d46a 100644 --- a/modules/nf-core/tcoffee/align/meta.yml +++ b/modules/nf-core/tcoffee/align/meta.yml @@ -12,6 +12,10 @@ tools: tool_dev_url: "https://github.com/cbcrg/tcoffee" doi: "10.1006/jmbi.2000.4042" licence: ["GPL v3"] + - "pigz": + description: "Parallel implementation of the gzip algorithm." + homepage: "https://zlib.net/pigz/" + documentation: "https://zlib.net/pigz/pigz.pdf" input: - meta: type: map @@ -44,6 +48,9 @@ input: type: file description: Accessory files to be used in the alignment. For example, it could be protein structures or secondary structures. pattern: "*" + - compress: + type: boolean + description: Flag representing whether the output MSA should be compressed. Set to true to enable/false to disable compression. Compression is done using pigz, and is multithreaded. output: - meta: type: map @@ -52,8 +59,8 @@ output: e.g. `[ id:'test']` - alignment: type: file - description: Alignment file. - pattern: "*.{aln}" + description: Alignment file in FASTA format. May be gzipped. + pattern: "*.aln{.gz,}" - versions: type: file description: File containing software versions @@ -64,3 +71,4 @@ authors: maintainers: - "@luisas" - "@JoseEspinosa" + - "@lrauschning" diff --git a/modules/nf-core/tcoffee/align/tests/main.nf.test b/modules/nf-core/tcoffee/align/tests/main.nf.test index bb17194c568..9d66f86bf39 100644 --- a/modules/nf-core/tcoffee/align/tests/main.nf.test +++ b/modules/nf-core/tcoffee/align/tests/main.nf.test @@ -8,6 +8,8 @@ nextflow_process { tag "modules_nfcore" tag "tcoffee" tag "tcoffee/align" + tag "famsa/guidetree" + tag "untar" test("fasta - align_sequence") { @@ -21,6 +23,7 @@ nextflow_process { ] input[1] = [[:],[]] input[2] = [[:],[],[]] + input[3] = true """ } } @@ -29,7 +32,33 @@ nextflow_process { assertAll( { assert process.success }, { assert snapshot(process.out.alignment).match("alignment")}, - { assert snapshot(process.out.versions).match("versions") } + { assert snapshot(process.out.versions).match("versions_uncomp") } + ) + } + } + + test("fasta - align_sequence - uncompressed") { + + config "./sequence.config" + + when { + process { + """ + input[0] = [ [ id:'test' ], + file("https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/setoxin-ref.fa", checkIfExists: true) + ] + input[1] = [[:],[]] + input[2] = [[:],[],[]] + input[3] = false + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out.alignment).match("alignment - uncompressed")}, + { assert snapshot(process.out.versions).match("versions_comp") } ) } } @@ -61,6 +90,7 @@ nextflow_process { ] input[1] = FAMSA_GUIDETREE.out.tree.collect{ meta, tree -> tree }.map{ tree -> [[ id: 'test'], tree]} input[2] = [ [:], [], [] ] + input[3] = true """ } } @@ -69,7 +99,7 @@ nextflow_process { assertAll( { assert process.success }, { assert snapshot(process.out.alignment).match("alignment_guidetree")}, - { assert snapshot(process.out.versions).match("versions_guidtree") } + { assert snapshot(process.out.versions).match("versions_guidetree") } ) } @@ -102,6 +132,7 @@ nextflow_process { ] input[1] = [ [:], [] ] input[2] = UNTAR.out.untar.map { meta,dir -> [[ id:'test' ], [] ,file(dir).listFiles().collect()]} + input[3] = true """ } @@ -110,10 +141,10 @@ nextflow_process { then { assertAll( { assert process.success }, - { assert path(process.out.alignment.get(0).get(1)).getText().contains("1ahl") }, + { assert path(process.out.alignment.get(0).get(1)).getTextGzip().contains("1ahl") }, { assert snapshot(process.out.versions).match("versions_structure") } ) } } -} +} \ No newline at end of file diff --git a/modules/nf-core/tcoffee/align/tests/main.nf.test.snap b/modules/nf-core/tcoffee/align/tests/main.nf.test.snap index 79dd44e5707..94b9d4e3cce 100644 --- a/modules/nf-core/tcoffee/align/tests/main.nf.test.snap +++ b/modules/nf-core/tcoffee/align/tests/main.nf.test.snap @@ -2,51 +2,64 @@ "versions_structure": { "content": [ [ - "versions.yml:md5,4d38a2c2211ee21b5c1b181e8260f5f7" + "versions.yml:md5,fb187c9186b50a8076d08cd3be3c1b70" ] ], - "timestamp": "2023-11-28T12:26:47.937090039" + "timestamp": "2024-02-09T19:43:36.590833926" }, "versions": { "content": [ [ - "versions.yml:md5,4d38a2c2211ee21b5c1b181e8260f5f7" + "versions.yml:md5,fb187c9186b50a8076d08cd3be3c1b70" ] ], - "timestamp": "2023-11-28T12:26:26.949694794" + "timestamp": "2024-02-09T19:42:52.802861054" }, - "versions_guidtree": { + "alignment - uncompressed": { "content": [ [ - "versions.yml:md5,4d38a2c2211ee21b5c1b181e8260f5f7" + [ + { + "id": "test" + }, + "test.aln:md5,bd1db08ad04514cc6d1334598c1a6ef0" + ] ] ], - "timestamp": "2023-11-28T12:26:36.766599176" + "timestamp": "2024-02-09T19:43:05.936032398" }, - "msa": { + "versions_guidetree": { + "content": [ + [ + "versions.yml:md5,fb187c9186b50a8076d08cd3be3c1b70" + ] + ], + "timestamp": "2024-02-09T19:43:20.741062719" + }, + "alignment": { "content": [ [ [ { "id": "test" }, - "test.aln:md5,bd1db08ad04514cc6d1334598c1a6ef0" + "test.aln.gz:md5,bd1db08ad04514cc6d1334598c1a6ef0" ] ] ], - "timestamp": "2023-11-28T12:26:26.904709139" + "timestamp": "2024-02-09T19:42:52.774882703" }, - "msa_guidetree": { + "alignment_guidetree": { "content": [ [ [ { "id": "test" }, - "test.aln:md5,93bc8adfcd88f7913718eacc13da8e4a" + "test.aln.gz:md5,93bc8adfcd88f7913718eacc13da8e4a" ] ] ], - "timestamp": "2023-11-28T12:26:36.702189365" + "timestamp": "2024-02-09T19:43:20.688201033" } } \ No newline at end of file diff --git a/modules/nf-core/tcoffee/align/tests/structure.config b/modules/nf-core/tcoffee/align/tests/structure.config index 4ae9d7acc1a..1cbd9c9c8e1 100644 --- a/modules/nf-core/tcoffee/align/tests/structure.config +++ b/modules/nf-core/tcoffee/align/tests/structure.config @@ -1,5 +1,5 @@ process { withName: "TCOFFEE_ALIGN" { - ext.args = { "-method TMalign_pair" } + ext.args = { "-method TMalign_pair -output fasta_aln" } } } diff --git a/modules/nf-core/tcoffee/alncompare/environment.yml b/modules/nf-core/tcoffee/alncompare/environment.yml index 099528eccf4..dfa14141528 100644 --- a/modules/nf-core/tcoffee/alncompare/environment.yml +++ b/modules/nf-core/tcoffee/alncompare/environment.yml @@ -5,3 +5,4 @@ channels: - defaults dependencies: - bioconda::t-coffee=13.46.0.919e8c6b + - conda-forge::pigz=2.8 diff --git a/modules/nf-core/tcoffee/alncompare/main.nf b/modules/nf-core/tcoffee/alncompare/main.nf index 75c008af5bb..043158a75ad 100644 --- a/modules/nf-core/tcoffee/alncompare/main.nf +++ b/modules/nf-core/tcoffee/alncompare/main.nf @@ -4,8 +4,8 @@ process TCOFFEE_ALNCOMPARE { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/t-coffee:13.46.0.919e8c6b--hfc96bf3_0': - 'biocontainers/t-coffee:13.46.0.919e8c6b--hfc96bf3_0' }" + 'https://depot.galaxyproject.org/singularity/mulled-v2-a76a981c07359a31ff55b9dc13bd3da5ce1909c1:84c8f17f1259b49e2f7783b95b7a89c6f2cb199e-0': + 'biocontainers/mulled-v2-a76a981c07359a31ff55b9dc13bd3da5ce1909c1:84c8f17f1259b49e2f7783b95b7a89c6f2cb199e-0' }" input: tuple val(meta), path(msa), path(ref_msa) @@ -23,12 +23,14 @@ process TCOFFEE_ALNCOMPARE { def metric_name = args.split('compare_mode ')[1].split(' ')[0] def header = meta.keySet().join(",") def values = meta.values().join(",") + def read_msa = msa.getName().endsWith(".gz") ? "<(unpigz -cdf ${msa})" : msa + def read_ref = ref_msa.getName().endsWith(".gz") ? "<(unpigz -cdf ${ref_msa})" : ref_msa """ export TEMP='./' t_coffee -other_pg aln_compare \ - -al1 ${ref_msa} \ - -al2 ${msa} \ + -al1 ${read_ref} \ + -al2 ${read_msa} \ ${args} \ | grep -v "seq1" | grep -v '*' | \ awk '{ print \$4}' ORS="\t" \ @@ -44,6 +46,7 @@ process TCOFFEE_ALNCOMPARE { cat <<-END_VERSIONS > versions.yml "${task.process}": tcoffee: \$( t_coffee -version | awk '{gsub("Version_", ""); print \$3}') + pigz: \$(echo \$(pigz --version 2>&1) | sed 's/^.*pigz\\w*//' )) END_VERSIONS """ stub: @@ -55,6 +58,7 @@ process TCOFFEE_ALNCOMPARE { cat <<-END_VERSIONS > versions.yml "${task.process}": tcoffee: \$( t_coffee -version | awk '{gsub("Version_", ""); print \$3}') + pigz: \$(echo \$(pigz --version 2>&1) | sed 's/^.*pigz\\w*//' )) END_VERSIONS """ } diff --git a/modules/nf-core/tcoffee/alncompare/meta.yml b/modules/nf-core/tcoffee/alncompare/meta.yml index 9f85247692b..31502555d9e 100644 --- a/modules/nf-core/tcoffee/alncompare/meta.yml +++ b/modules/nf-core/tcoffee/alncompare/meta.yml @@ -12,6 +12,10 @@ tools: tool_dev_url: "https://github.com/cbcrg/tcoffee" doi: "10.1006/jmbi.2000.4042" licence: ["GPL v3"] + - "pigz": + description: "Parallel implementation of the gzip algorithm." + homepage: "https://zlib.net/pigz/" + documentation: "https://zlib.net/pigz/pigz.pdf" input: - meta: @@ -21,12 +25,12 @@ input: e.g. [ id:'test', ... ] - msa: type: file - description: fasta file containing the alignment to be evaluated - pattern: "*.{aln,fa,fasta,fas}" + description: fasta file containing the alignment to be evaluated. Can be gzipped or uncompressed + pattern: "*.{aln,fa,fasta,fas}{.gz,}" - ref_msa: type: file - description: fasta file containing the reference alignment used for the evaluation - pattern: "*.{aln,fa,fasta,fas}" + description: fasta file containing the reference alignment used for the evaluation. Can be gzipped or uncompressed + pattern: "*.{aln,fa,fasta,fas}{.gz,}" output: - meta: diff --git a/modules/nf-core/tcoffee/alncompare/tests/main.nf.test.snap b/modules/nf-core/tcoffee/alncompare/tests/main.nf.test.snap index 7524515a848..d1fd92e4304 100644 --- a/modules/nf-core/tcoffee/alncompare/tests/main.nf.test.snap +++ b/modules/nf-core/tcoffee/alncompare/tests/main.nf.test.snap @@ -11,7 +11,7 @@ ] ], "1": [ - "versions.yml:md5,8a30677771f2dc3b61b60702622fdfda" + "versions.yml:md5,438507517a1a831c7b7a1571b1fdd98d" ], "scores": [ [ @@ -22,10 +22,10 @@ ] ], "versions": [ - "versions.yml:md5,8a30677771f2dc3b61b60702622fdfda" + "versions.yml:md5,438507517a1a831c7b7a1571b1fdd98d" ] } ], - "timestamp": "2023-12-13T10:50:50.701336" + "timestamp": "2024-01-22T17:08:59.494237269" } } \ No newline at end of file diff --git a/modules/nf-core/tcoffee/irmsd/environment.yml b/modules/nf-core/tcoffee/irmsd/environment.yml index 635de77940d..4065fe70bd6 100644 --- a/modules/nf-core/tcoffee/irmsd/environment.yml +++ b/modules/nf-core/tcoffee/irmsd/environment.yml @@ -5,3 +5,4 @@ channels: - defaults dependencies: - bioconda::t-coffee=13.46.0.919e8c6b + - conda-forge::pigz=2.8 diff --git a/modules/nf-core/tcoffee/irmsd/main.nf b/modules/nf-core/tcoffee/irmsd/main.nf index 0c60312ea78..95e6b61efb4 100644 --- a/modules/nf-core/tcoffee/irmsd/main.nf +++ b/modules/nf-core/tcoffee/irmsd/main.nf @@ -4,8 +4,8 @@ process TCOFFEE_IRMSD { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/t-coffee:13.46.0.919e8c6b--hfc96bf3_0': - 'biocontainers/t-coffee:13.46.0.919e8c6b--hfc96bf3_0' }" + 'https://depot.galaxyproject.org/singularity/mulled-v2-a76a981c07359a31ff55b9dc13bd3da5ce1909c1:84c8f17f1259b49e2f7783b95b7a89c6f2cb199e-0': + 'biocontainers/mulled-v2-a76a981c07359a31ff55b9dc13bd3da5ce1909c1:84c8f17f1259b49e2f7783b95b7a89c6f2cb199e-0' }" input: tuple val(meta), file (msa) @@ -24,14 +24,19 @@ process TCOFFEE_IRMSD { """ export TEMP='./' + if [[ \$(basename $msa) == *.gz ]] ; then + unpigz -f $msa + fi + t_coffee -other_pg irmsd \ - $msa \ + \$(basename $msa .gz) \ $args \ -template_file $template > ${prefix}.irmsd cat <<-END_VERSIONS > versions.yml "${task.process}": tcoffee: \$( t_coffee -version | awk '{gsub("Version_", ""); print \$3}') + pigz: \$(echo \$(pigz --version 2>&1) | sed 's/^.*pigz\\w*//' )) END_VERSIONS """ @@ -44,6 +49,7 @@ process TCOFFEE_IRMSD { cat <<-END_VERSIONS > versions.yml "${task.process}": tcoffee: \$( t_coffee -version | awk '{gsub("Version_", ""); print \$3}') + pigz: \$(echo \$(pigz --version 2>&1) | sed 's/^.*pigz\\w*//' )) END_VERSIONS """ } diff --git a/modules/nf-core/tcoffee/irmsd/meta.yml b/modules/nf-core/tcoffee/irmsd/meta.yml index abee1a3a5d4..f55fca748cf 100644 --- a/modules/nf-core/tcoffee/irmsd/meta.yml +++ b/modules/nf-core/tcoffee/irmsd/meta.yml @@ -1,5 +1,5 @@ name: "tcoffee_irmsd" -description: Computes irmsd score for a given alignment and the structures. +description: Computes the irmsd score for a given alignment and the structures. keywords: - alignment - MSA @@ -12,6 +12,10 @@ tools: tool_dev_url: "https://github.com/cbcrg/tcoffee" doi: "10.1006/jmbi.2000.4042" licence: ["GPL v3"] + - "pigz": + description: "Parallel implementation of the gzip algorithm." + homepage: "https://zlib.net/pigz/" + documentation: "https://zlib.net/pigz/pigz.pdf" input: - meta: @@ -21,11 +25,11 @@ input: e.g. [ id:'test', ... ] - msa: type: file - description: fasta file containing the alignment to be evaluated - pattern: "*.{aln,fa,fasta,fas}" + description: fasta file containing the alignment to be evaluated. May be gzipped or uncompressed. + pattern: "*.{aln,fa,fasta,fas}{.gz,}" - template: type: file - description: Template file matching the structures to the sequences in the alignment + description: Template file matching the structures to the sequences in the alignment. pattern: "*" - structures: type: directory diff --git a/modules/nf-core/tcoffee/irmsd/tests/main.nf.test b/modules/nf-core/tcoffee/irmsd/tests/main.nf.test index 1519a66da89..55a574a4616 100644 --- a/modules/nf-core/tcoffee/irmsd/tests/main.nf.test +++ b/modules/nf-core/tcoffee/irmsd/tests/main.nf.test @@ -6,7 +6,9 @@ nextflow_process { tag "modules" tag "modules_nfcore" tag "tcoffee" - tag "tcoffee/irmsd" + tag "tcoffee/irmsd" + tag "untar" + tag "pigz/compress" test("seatoxin") { @@ -24,7 +26,7 @@ nextflow_process { } } } - + when { process { """ @@ -35,7 +37,7 @@ nextflow_process { input[1] = UNTAR.out.untar.map { meta,dir -> [[ id:'test' ], file("https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/templates/seatoxin-ref_template.txt", checkIfExists: true) ,file(dir).listFiles().collect()]} """ } - + } then { @@ -47,4 +49,51 @@ nextflow_process { } -} + test("seatoxin - compressed") { + + setup { + + run("UNTAR") { + script "../../../untar/main.nf" + process { + """ + input[0] = [ [ id:'test' ], + file("https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/structures/seatoxin-ref.tar.gz", checkIfExists: true) + ] + + """ + } + } + + run("PIGZ_COMPRESS") { + script "../../../pigz/compress/main.nf" + process { + """ + input[0] = [ [ id:'test' ], + file("https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/setoxin.ref", checkIfExists: true) + ] + + """ + } + } + } + + when { + process { + """ + input[0] = PIGZ_COMPRESS.out.archive + input[1] = UNTAR.out.untar.map { meta,dir -> [[ id:'test' ], file("https://raw.githubusercontent.com/nf-core/test-datasets/multiplesequencealign/testdata/templates/seatoxin-ref_template.txt", checkIfExists: true) ,file(dir).listFiles().collect()]} + """ + } + + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.irmsd.get(0).get(1)).getText().contains("1ahl") } + ) + } + + } +} \ No newline at end of file