Skip to content

Commit

Permalink
Add module for TaxonKit name2taxid (#4778)
Browse files Browse the repository at this point in the history
* Add module for Taxonkit name2taxid

* Add tests

* Add input assertion

* Update meta data

* Fix linting

* [automated] Fix linting with Prettier

* restore devcontainer.json

* Update modules/nf-core/taxonkit/name2taxid/meta.yml

Co-authored-by: James A. Fellows Yates <[email protected]>

---------

Co-authored-by: nf-core-bot <[email protected]>
Co-authored-by: James A. Fellows Yates <[email protected]>
  • Loading branch information
3 people authored Mar 26, 2024
1 parent e66183d commit e41aa10
Show file tree
Hide file tree
Showing 6 changed files with 310 additions and 0 deletions.
9 changes: 9 additions & 0 deletions modules/nf-core/taxonkit/name2taxid/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
name: "taxonkit_name2taxid"
channels:
- conda-forge
- bioconda
- defaults
dependencies:
- "bioconda::taxonkit=0.15.1"
51 changes: 51 additions & 0 deletions modules/nf-core/taxonkit/name2taxid/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
process TAXONKIT_NAME2TAXID {
tag "$meta.id"
label 'process_low'

conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/taxonkit:0.15.1--h9ee0642_0':
'biocontainers/taxonkit:0.15.1--h9ee0642_0' }"

input:
tuple val(meta), val(name), path(names_txt)
path taxdb

output:
tuple val(meta), path("*.tsv"), emit: tsv
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
assert (!name && names_txt) || (name && !names_txt)
"""
taxonkit \\
name2taxid \\
$args \\
--data-dir $taxdb \\
--threads $task.cpus \\
--out-file ${prefix}.tsv \\
${name? "<<< '$name'": names_txt}
cat <<-END_VERSIONS > versions.yml
"${task.process}":
taxonkit: \$( taxonkit version | sed 's/.* v//' )
END_VERSIONS
"""

stub:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
"""
touch ${prefix}.tsv
cat <<-END_VERSIONS > versions.yml
"${task.process}":
taxonkit: \$( taxonkit version | sed 's/.* v//' )
END_VERSIONS
"""
}
53 changes: 53 additions & 0 deletions modules/nf-core/taxonkit/name2taxid/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
name: "taxonkit_name2taxid"
description: Convert taxon names to TaxIds
keywords:
- taxonomy
- taxids
- taxon name
- conversion
tools:
- "taxonkit":
description: "A Cross-platform and Efficient NCBI Taxonomy Toolkit"
homepage: "https://bioinf.shenwei.me/taxonkit/"
documentation: "https://bioinf.shenwei.me/taxonkit/usage/#name2taxid"
tool_dev_url: "https://github.com/shenwei356/taxonkit"
doi: "10.1016/j.jgg.2021.03.006"
licence: ["MIT"]

input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1', single_end:false ]`
- name:
type: string
description: Taxon name to look up (provide either this or names.txt, not both)
- names_txt:
type: file
description: File with taxon names to look up, each on their own line (provide either this or name, not both)
- taxdb:
type: file
description: Taxonomy database unpacked from ftp://ftp.ncbi.nih.gov/pub/taxonomy/taxdump.tar.gz

output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1', single_end:false ]`
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
- tsv:
type: file
description: TSV file of Taxon names and their taxon ID
pattern: "*.tsv"

authors:
- "@mahesh-panchal"
maintainers:
- "@mahesh-panchal"
100 changes: 100 additions & 0 deletions modules/nf-core/taxonkit/name2taxid/tests/main.nf.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
nextflow_process {

name "Test Process TAXONKIT_NAME2TAXID"
script "../main.nf"
process "TAXONKIT_NAME2TAXID"

tag "modules"
tag "modules_nfcore"
tag "untar"
tag "taxonkit"
tag "taxonkit/name2taxid"

setup {
run("UNTAR"){
script "modules/nf-core/untar/main.nf"
process {
"""
input[0] = [
[ id:'test' ],
file("ftp://ftp.ncbi.nih.gov/pub/taxonomy/taxdump.tar.gz", checkIfExists: true)
]
"""
}
}
}

test("sarscov2 - name") {

when {
process {
"""
input[0] = [
[ id:'test', single_end:false ], // meta map
"SARS-CoV-2",
[]
]
input[1] = UNTAR.out.untar.map{ it[1] }
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() }
)
}

}

test("sarscov2 - list") {

when {
process {
"""
input[0] = Channel.of( [
[ id:'test', single_end:false ], // meta map
''
] ).combine( Channel.of("SARS-CoV-2").collectFile( name:'names.txt', newLine: true ) )
input[1] = UNTAR.out.untar.map{ it[1] }
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() }
)
}

}

test("sarscov2 - name - stub") {

options "-stub"

when {
process {
"""
input[0] = [
[ id:'test', single_end:false ], // meta map
"SARS-CoV-2",
[]
]
input[1] = UNTAR.out.untar.map{ it[1] }
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() }
)
}

}

}
95 changes: 95 additions & 0 deletions modules/nf-core/taxonkit/name2taxid/tests/main.nf.test.snap

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions modules/nf-core/taxonkit/name2taxid/tests/tags.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
taxonkit/name2taxid:
- "modules/nf-core/taxonkit/name2taxid/**"

0 comments on commit e41aa10

Please sign in to comment.