From 62007703c84bcfef92ce9e4a57cb1cc382917201 Mon Sep 17 00:00:00 2001 From: Leon Rauschning <99650940+lrauschning@users.noreply.github.com> Date: Fri, 22 Mar 2024 17:20:47 +0100 Subject: [PATCH] Update learnmsa module to work with compressed files. (#5276) * add compression handling to learnmsa * add compression simulation to stub * fix typo * commit tests * remove duplicate snapshot names & regenerate * Update modules/nf-core/learnmsa/align/meta.yml Co-authored-by: Luisa Santus --------- Co-authored-by: Leon Rauschning Co-authored-by: Luisa Santus --- .../nf-core/learnmsa/align/environment.yml | 3 +- modules/nf-core/learnmsa/align/main.nf | 20 ++++++++----- modules/nf-core/learnmsa/align/meta.yml | 11 ++++--- .../nf-core/learnmsa/align/tests/main.nf.test | 30 +++++++++++++++++-- .../learnmsa/align/tests/main.nf.test.snap | 22 ++++++++++++-- 5 files changed, 67 insertions(+), 19 deletions(-) diff --git a/modules/nf-core/learnmsa/align/environment.yml b/modules/nf-core/learnmsa/align/environment.yml index 04daae62b86..124b8d8412a 100644 --- a/modules/nf-core/learnmsa/align/environment.yml +++ b/modules/nf-core/learnmsa/align/environment.yml @@ -4,4 +4,5 @@ channels: - bioconda - defaults dependencies: - - bioconda::learnmsa=1.3.2 + - bioconda::learnmsa=2.0.1 + - conda-forge::pigz=2.8 diff --git a/modules/nf-core/learnmsa/align/main.nf b/modules/nf-core/learnmsa/align/main.nf index 444ffcbf421..304fb07ced6 100644 --- a/modules/nf-core/learnmsa/align/main.nf +++ b/modules/nf-core/learnmsa/align/main.nf @@ -4,15 +4,16 @@ process LEARNMSA_ALIGN { conda "${moduleDir}/environment.yml" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/learnmsa:1.3.2--pyhdfd78af_0': - 'biocontainers/learnmsa:1.3.2--pyhdfd78af_0' }" + 'https://depot.galaxyproject.org/singularity/mulled-v2-741e0da5cf2d6d964f559672e2908c2111cbb46b:4930edd009376542543bfd2e20008bb1ae58f841-0' : + 'biocontainers/mulled-v2-741e0da5cf2d6d964f559672e2908c2111cbb46b:4930edd009376542543bfd2e20008bb1ae58f841-0' }" input: - tuple val(meta), path(fasta) + tuple val(meta), path(fasta) + val(compress) output: - tuple val(meta), path("*.aln"), emit: alignment - path "versions.yml" , emit: versions + tuple val(meta), path("*.aln{.gz,}"), emit: alignment + path "versions.yml" , emit: versions when: task.ext.when == null || task.ext.when @@ -20,15 +21,17 @@ process LEARNMSA_ALIGN { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" + def write_output = compress ? ">(pigz -cp ${task.cpus} > ${prefix}.aln.gz)" : "${prefix}.aln" """ learnMSA \\ $args \\ - -i $fasta \\ - -o ${prefix}.aln + -i <(unpigz -cdf $fasta) \\ + -o $write_output cat <<-END_VERSIONS > versions.yml "${task.process}": learnmsa: \$(learnMSA -h | grep 'version' | awk -F 'version ' '{print \$2}' | awk '{print \$1}' | sed 's/)//g') + pigz: \$(echo \$(pigz --version 2>&1) | sed 's/^.*pigz\\w*//' )) END_VERSIONS """ @@ -36,11 +39,12 @@ process LEARNMSA_ALIGN { def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" """ - touch ${prefix}.aln + touch ${prefix}.aln${compress ? '.gz' : ''} cat <<-END_VERSIONS > versions.yml "${task.process}": learnmsa: \$(learnMSA -h | grep 'version' | awk -F 'version ' '{print \$2}' | awk '{print \$1}' | sed 's/)//g') + pigz: \$(echo \$(pigz --version 2>&1) | sed 's/^.*pigz\\w*//' )) END_VERSIONS """ } diff --git a/modules/nf-core/learnmsa/align/meta.yml b/modules/nf-core/learnmsa/align/meta.yml index fcb935b4d20..66a9f7d12c9 100644 --- a/modules/nf-core/learnmsa/align/meta.yml +++ b/modules/nf-core/learnmsa/align/meta.yml @@ -20,8 +20,11 @@ input: e.g. `[ id:'test']` - fasta: type: file - description: Input sequences in FASTA format - pattern: "*.{fa,fasta}" + description: Input sequences in FASTA format. May be gz-compressed or uncompressed. + pattern: "*.{fa,fasta}{.gz,}" + - compress: + type: boolean + description: Flag representing whether the output MSA should be compressed. Set to true to enable/false to disable compression. Compression is done using pigz, and is multithreaded. output: - meta: type: map @@ -30,8 +33,8 @@ output: e.g. `[ id:'test']` - alignment: type: file - description: Alignment file. - pattern: "*.{aln}" + description: Alignment file, in FASTA format. May be gzipped or uncompressed. + pattern: "*.aln{.gz,}" - versions: type: file description: File containing software versions diff --git a/modules/nf-core/learnmsa/align/tests/main.nf.test b/modules/nf-core/learnmsa/align/tests/main.nf.test index e197470dca4..8459ead38d5 100644 --- a/modules/nf-core/learnmsa/align/tests/main.nf.test +++ b/modules/nf-core/learnmsa/align/tests/main.nf.test @@ -10,7 +10,7 @@ nextflow_process { tag "learnmsa" tag "learnmsa/align" - test("sarscov2 - fasta") { + test("sarscov2 - fasta - uncompressed") { when { process { @@ -18,6 +18,7 @@ nextflow_process { input[0] = [ [ id:'test' ], // meta map file(params.test_data['sarscov2']['genome']['informative_sites_fas'], checkIfExists: true) ] + input[1] = false """ } } @@ -26,10 +27,33 @@ nextflow_process { assertAll( { assert process.success }, { assert path(process.out.alignment.get(0).get(1)).getText().contains(">sample1") }, - { assert snapshot(process.out.versions).match("versions_structure") } + { assert snapshot(process.out.versions).match("versions") } ) } } -} + test("sarscov2 - fasta - compressed") { + + when { + process { + """ + input[0] = [ [ id:'test' ], // meta map + file(params.test_data['sarscov2']['genome']['informative_sites_fas'], checkIfExists: true) + ] + input[1] = true + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert path(process.out.alignment.get(0).get(1)).getTextGzip().contains(">sample1") }, + { assert snapshot(process.out.versions).match("versions1") } + ) + } + + } + +} \ No newline at end of file diff --git a/modules/nf-core/learnmsa/align/tests/main.nf.test.snap b/modules/nf-core/learnmsa/align/tests/main.nf.test.snap index 8bbe2c19eb4..981738a276d 100644 --- a/modules/nf-core/learnmsa/align/tests/main.nf.test.snap +++ b/modules/nf-core/learnmsa/align/tests/main.nf.test.snap @@ -1,10 +1,26 @@ { - "versions_structure": { + "versions": { "content": [ [ - "versions.yml:md5,adbcf330bacf246f19438981d58ae35a" + "versions.yml:md5,85322b0f038aa768f202fd0d748d6c7c" ] ], - "timestamp": "2023-11-28T14:26:02.862695056" + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-20T16:06:48.867020809" + }, + "versions1": { + "content": [ + [ + "versions.yml:md5,85322b0f038aa768f202fd0d748d6c7c" + ] + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-03-20T16:12:13.921813607" } } \ No newline at end of file