diff --git a/modules.json b/modules.json index 63fcb986..07ebe99e 100644 --- a/modules.json +++ b/modules.json @@ -40,6 +40,11 @@ "git_sha": "490f8ab54c632d84f99edd6f208305ac5e283ab3", "installed_by": ["fasta_ltrretriever_lai"] }, + "seqkit/seq": { + "branch": "main", + "git_sha": "8ec55a2c8e4ce3af070393768f6c2c57f5c34076", + "installed_by": ["fasta_ltrretriever_lai"] + }, "syri": { "branch": "main", "git_sha": "836bb223521eeaa08d65c1b7f4b82be1272a1964", diff --git a/modules/gallvp/seqkit/seq/environment.yml b/modules/gallvp/seqkit/seq/environment.yml new file mode 100644 index 00000000..74e0dd76 --- /dev/null +++ b/modules/gallvp/seqkit/seq/environment.yml @@ -0,0 +1,9 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json +name: "seqkit_seq" +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - "bioconda::seqkit=2.8.1" diff --git a/modules/gallvp/seqkit/seq/main.nf b/modules/gallvp/seqkit/seq/main.nf new file mode 100644 index 00000000..d7d38fc8 --- /dev/null +++ b/modules/gallvp/seqkit/seq/main.nf @@ -0,0 +1,63 @@ +process SEQKIT_SEQ { + tag "$meta.id" + label 'process_low' + // File IO can be a bottleneck. See: https://bioinf.shenwei.me/seqkit/usage/#parallelization-of-cpu-intensive-jobs + + conda "${moduleDir}/environment.yml" + container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? + 'https://depot.galaxyproject.org/singularity/seqkit:2.8.1--h9ee0642_0': + 'biocontainers/seqkit:2.8.1--h9ee0642_0' }" + + input: + tuple val(meta), path(fastx) + + output: + tuple val(meta), path("${prefix}.*") , emit: fastx + path "versions.yml" , emit: versions + + when: + task.ext.when == null || task.ext.when + + script: + def args = task.ext.args ?: '' + def args2 = task.ext.args2 ?: '' + prefix = task.ext.prefix ?: "${meta.id}" + def extension = "fastq" + if ("$fastx" ==~ /.+\.fasta|.+\.fasta.gz|.+\.fa|.+\.fa.gz|.+\.fas|.+\.fas.gz|.+\.fna|.+\.fna.gz|.+\.fsa|.+\.fsa.gz/ ) { + extension = "fasta" + } + extension = fastx.toString().endsWith('.gz') ? "${extension}.gz" : extension + def call_gzip = extension.endsWith('.gz') ? "| gzip -c $args2" : '' + if("${prefix}.${extension}" == "$fastx") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + """ + seqkit \\ + seq \\ + --threads $task.cpus \\ + $args \\ + $fastx \\ + $call_gzip \\ + > ${prefix}.${extension} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + seqkit: \$(seqkit version | cut -d' ' -f2) + END_VERSIONS + """ + + stub: + prefix = task.ext.prefix ?: "${meta.id}" + def extension = "fastq" + if ("$fastx" ==~ /.+\.fasta|.+\.fasta.gz|.+\.fa|.+\.fa.gz|.+\.fas|.+\.fas.gz|.+\.fna|.+\.fna.gz|.+\.fsa|.+\.fsa.gz/ ) { + extension = "fasta" + } + extension = fastx.toString().endsWith('.gz') ? "${extension}.gz" : extension + if("${prefix}.${extension}" == "$fastx") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" + """ + touch ${prefix}.${extension} + + cat <<-END_VERSIONS > versions.yml + "${task.process}": + seqkit: \$(seqkit version | cut -d' ' -f2) + END_VERSIONS + """ +} diff --git a/modules/gallvp/seqkit/seq/meta.yml b/modules/gallvp/seqkit/seq/meta.yml new file mode 100644 index 00000000..8d4e2b16 --- /dev/null +++ b/modules/gallvp/seqkit/seq/meta.yml @@ -0,0 +1,48 @@ +--- +# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json +name: "seqkit_seq" +description: Transforms sequences (extract ID, filter by length, remove gaps, reverse complement...) +keywords: + - genomics + - fasta + - fastq + - transform + - filter + - gaps + - complement +tools: + - "seqkit": + description: "A cross-platform and ultrafast toolkit for FASTA/Q file manipulation" + homepage: "https://bioinf.shenwei.me/seqkit/" + documentation: "https://bioinf.shenwei.me/seqkit/usage/" + tool_dev_url: "https://github.com/shenwei356/seqkit" + doi: "10.1371/journal.pone.0163962" + licence: ["MIT"] +input: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - fastx: + type: file + description: Input fasta/fastq file + pattern: "*.{fsa,fas,fa,fasta,fastq,fq,fsa.gz,fas.gz,fa.gz,fasta.gz,fastq.gz,fq.gz}" +output: + - meta: + type: map + description: | + Groovy Map containing sample information + e.g. `[ id:'sample1' ]` + - fastx: + type: file + description: Output fasta/fastq file + pattern: "*.{fasta,fasta.gz,fastq,fastq.gz}" + - versions: + type: file + description: File containing software versions + pattern: "versions.yml" +authors: + - "@GallVp" +maintainers: + - "@GallVp" diff --git a/modules/gallvp/seqkit/seq/tests/main.nf.test b/modules/gallvp/seqkit/seq/tests/main.nf.test new file mode 100644 index 00000000..cff90067 --- /dev/null +++ b/modules/gallvp/seqkit/seq/tests/main.nf.test @@ -0,0 +1,145 @@ +nextflow_process { + + name "Test Process SEQKIT_SEQ" + script "../main.nf" + process "SEQKIT_SEQ" + config './nextflow.config' + + tag "modules" + tag "modules_gallvp" + tag "seqkit" + tag "seqkit/seq" + + test("sarscov2-genome_fasta") { + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() }, + ) + } + + } + + test("sarscov2-genome_fasta_gz") { + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() }, + ) + } + + } + + test("sarscov2-test_1_fastq_gz") { + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() }, + ) + } + + } + + test("file_name_conflict-fail_with_error") { + when { + process { + """ + input[0] = [ + [ id:'test_1' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert !process.success }, + { assert process.stdout.toString().contains("Input and output names are the same") } + ) + } + + } + + test("sarscov2-genome_fasta-stub") { + + options '-stub' + + when { + process { + """ + input[0] = [ + [ id:'test' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert process.success }, + { assert snapshot(process.out).match() }, + ) + } + + } + + test("file_name_conflict-fail_with_error-stub") { + + options '-stub' + + when { + process { + """ + input[0] = [ + [ id:'genome' ], // meta map + file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) + ] + """ + } + } + + then { + assertAll( + { assert !process.success }, + { assert process.stdout.toString().contains("Input and output names are the same") } + ) + } + + } + +} diff --git a/modules/gallvp/seqkit/seq/tests/main.nf.test.snap b/modules/gallvp/seqkit/seq/tests/main.nf.test.snap new file mode 100644 index 00000000..e6910966 --- /dev/null +++ b/modules/gallvp/seqkit/seq/tests/main.nf.test.snap @@ -0,0 +1,134 @@ +{ + "sarscov2-genome_fasta-stub": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.fasta:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "1": [ + "versions.yml:md5,34894c4efa5e10a923e78975a3d260dd" + ], + "fastx": [ + [ + { + "id": "test" + }, + "test.fasta:md5,d41d8cd98f00b204e9800998ecf8427e" + ] + ], + "versions": [ + "versions.yml:md5,34894c4efa5e10a923e78975a3d260dd" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-08T08:52:18.220051903" + }, + "sarscov2-test_1_fastq_gz": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.fastq.gz:md5,4161df271f9bfcd25d5845a1e220dbec" + ] + ], + "1": [ + "versions.yml:md5,34894c4efa5e10a923e78975a3d260dd" + ], + "fastx": [ + [ + { + "id": "test" + }, + "test.fastq.gz:md5,4161df271f9bfcd25d5845a1e220dbec" + ] + ], + "versions": [ + "versions.yml:md5,34894c4efa5e10a923e78975a3d260dd" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-08T08:51:55.607826581" + }, + "sarscov2-genome_fasta": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.fasta:md5,483f4a5dfe60171c86ee9b7e6dff908b" + ] + ], + "1": [ + "versions.yml:md5,34894c4efa5e10a923e78975a3d260dd" + ], + "fastx": [ + [ + { + "id": "test" + }, + "test.fasta:md5,483f4a5dfe60171c86ee9b7e6dff908b" + ] + ], + "versions": [ + "versions.yml:md5,34894c4efa5e10a923e78975a3d260dd" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-08T08:51:27.717072933" + }, + "sarscov2-genome_fasta_gz": { + "content": [ + { + "0": [ + [ + { + "id": "test" + }, + "test.fasta.gz:md5,483f4a5dfe60171c86ee9b7e6dff908b" + ] + ], + "1": [ + "versions.yml:md5,34894c4efa5e10a923e78975a3d260dd" + ], + "fastx": [ + [ + { + "id": "test" + }, + "test.fasta.gz:md5,483f4a5dfe60171c86ee9b7e6dff908b" + ] + ], + "versions": [ + "versions.yml:md5,34894c4efa5e10a923e78975a3d260dd" + ] + } + ], + "meta": { + "nf-test": "0.8.4", + "nextflow": "23.10.1" + }, + "timestamp": "2024-05-08T08:51:37.917560104" + } +} \ No newline at end of file diff --git a/modules/gallvp/seqkit/seq/tests/nextflow.config b/modules/gallvp/seqkit/seq/tests/nextflow.config new file mode 100644 index 00000000..d8e3c66a --- /dev/null +++ b/modules/gallvp/seqkit/seq/tests/nextflow.config @@ -0,0 +1,3 @@ +process { + ext.args2 = '-n' +} diff --git a/modules/gallvp/seqkit/seq/tests/tags.yml b/modules/gallvp/seqkit/seq/tests/tags.yml new file mode 100644 index 00000000..5eeca7e3 --- /dev/null +++ b/modules/gallvp/seqkit/seq/tests/tags.yml @@ -0,0 +1,2 @@ +seqkit/seq: + - "modules/nf-core/seqkit/seq/**"