Skip to content

Commit

Permalink
New Module: humid (#5493)
Browse files Browse the repository at this point in the history
* Add nf-core/humid module with necessary files and configurations

* add single end test

* adress comments in meta.yml

* don't checksum the log file

* update sapshot & fix path assertion log
  • Loading branch information
Joon-Klaps authored Apr 17, 2024
1 parent c162ae4 commit c417458
Show file tree
Hide file tree
Showing 7 changed files with 428 additions and 0 deletions.
9 changes: 9 additions & 0 deletions modules/nf-core/humid/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
name: "humid"
channels:
- conda-forge
- bioconda
- defaults
dependencies:
- "bioconda::humid=1.0.4"
69 changes: 69 additions & 0 deletions modules/nf-core/humid/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@

process HUMID {
tag "$meta.id"
label 'process_single'

conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/humid:1.0.4--hadf994f_0':
'biocontainers/humid:1.0.4--hadf994f_0' }"

input:
tuple val(meta), path(reads)
tuple val(meta2), path(umi_file)

output:
tuple val(meta), path("${prefix}.log") , emit: log
tuple val(meta), path("*_dedup*.fastq.gz") , emit: dedup , optional: true
tuple val(meta), path("*_annotated*.fastq.gz") , emit: annotated, optional: true
tuple val(meta), path("${prefix}") , emit: stats , optional: true
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
prefix = task.ext.prefix ?: "${meta.id}"
def umis = umi_file ?: ''
def VERSION = '1.0.4' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions.

"""
humid \\
$args \\
-d ${prefix} \\
-l ${prefix}.log \\
$reads \\
$umis \\
mv ${prefix}/*.fastq* .
cat <<-END_VERSIONS > versions.yml
"${task.process}":
humid: ${VERSION}
END_VERSIONS
"""

stub:
def args = task.ext.args ?: ''
prefix = task.ext.prefix ?: "${meta.id}"
def umis = umi_file ?: ''
def VERSION = '1.0.4' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions.
"""
mkdir -p ${prefix}
touch ${prefix}_1_dedup.fastq.gz
touch ${prefix}_2_dedup.fastq.gz
touch ${prefix}_1_annotated.fastq.gz
touch ${prefix}_2_annotated.fastq.gz
touch ${prefix}/stats.dat
touch ${prefix}/neigh.dat
touch ${prefix}/counts.dat
touch ${prefix}/clusters.dat
touch ${prefix}.log
cat <<-END_VERSIONS > versions.yml
"${task.process}":
humid: ${VERSION}
END_VERSIONS
"""
}
74 changes: 74 additions & 0 deletions modules/nf-core/humid/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
name: "humid"
description: HUMID is a tool to quickly and easily remove duplicate reads from FastQ files, with or without UMIs.
keywords:
- umi
- fastq
- deduplication
- hamming-distance
- clustering
tools:
- "humid":
description: "HUMID -- High-performance UMI Deduplicator"
homepage: "https://github.com/jfjlaros/HUMID"
documentation: "https://humid.readthedocs.io/en/latest/"
tool_dev_url: "https://github.com/jfjlaros/HUMID"
licence: ["MIT"]

input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1', single_end:false ]`
- reads:
type: file
description: Fastq file(s) to deduplicate
pattern: "*.{fastq,fq,fastq.gz,fq.gz}"
- meta2:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1', single_end:false ]`
- umis:
type: file
description: Fastq file(s) containing UMIs
pattern: "*.{fastq,fq,fastq.gz,fq.gz}"

output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. `[ id:'sample1', single_end:false ]`
- dedup:
type: file
description: Deduplicated Fastq file(s)
pattern: "*.{fastq,fq,fastq.gz,fq.gz}"

- annotated:
type: file
description: Annotated Fastq file(s)
pattern: "*.{fastq,fq,fastq.gz,fq.gz}"

- stats:
type: directory
description: Directory containg statistics file, use for multiqc.
pattern: "${prefix}/"

- log:
type: file
description: Log file of humid, containing progress and errors
pattern: "*.log"

- versions:
type: file
description: File containing software versions
pattern: "versions.yml"

authors:
- "@Joon-Klaps"
maintainers:
- "@Joon-Klaps"
134 changes: 134 additions & 0 deletions modules/nf-core/humid/tests/main.nf.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
nextflow_process {

name "Test Process HUMID"
script "../main.nf"
process "HUMID"

tag "modules"
tag "modules_nfcore"
tag "humid"
tag "fastp"

test("sarscov2 - paired-end") {

setup {
run("FASTP") {
script "../../fastp/main.nf"
config "./nextflow.config"
process {
"""
input[0] = Channel.of([
[ id:'test', single_end:false ],
[ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true),
file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ]
])
input[1] = []
input[2] = false
input[3] = false
"""
}
}
}

when {
config "./nextflow.config"
process {
"""
input[0] = FASTP.out.reads
input[1] = [[:],[]]
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out.stats,
process.out.dedup,
process.out.annotated,
process.out.versions).match()},
{assert path(process.out.log.get(0).get(1)).text.contains("Reading data... done.")},
)
}

}

test("sarscov2 - paired-end - stub") {

options "-stub"

when {
process {
"""
input[0] = Channel.of([
[ id:'test', single_end:false ],
[ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true),
file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ]
])
input[1] = [[:],[]]
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out.log,
process.out.stats,
process.out.versions).match()},
{assert file(process.out.dedup[0][1].find {
file(it).name == "test_1_dedup.fastq.gz" }).exists()},
{assert file(process.out.dedup[0][1].find {
file(it).name == "test_2_dedup.fastq.gz" }).exists()},
{assert file(process.out.annotated[0][1].find {
file(it).name == "test_1_annotated.fastq.gz" }).exists()},
{assert file(process.out.annotated[0][1].find {
file(it).name == "test_2_annotated.fastq.gz" }).exists()}
)
}
}


test("sarscov2 - single-end") {

setup {
run("FASTP") {
script "../../fastp/main.nf"
config "./nextflow.config"
process {
"""
input[0] = Channel.of([
[ id:'test', single_end:true ],
[ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true)]
])
input[1] = []
input[2] = false
input[3] = false
"""
}
}
}

when {
config "./nextflow.config"
process {
"""
input[0] = FASTP.out.reads
input[1] = [[:],[]]
"""
}
}

then {
assertAll(
{ assert process.success },
{ assert snapshot(process.out.stats,
process.out.dedup,
process.out.annotated,
process.out.versions).match()},
{assert path(process.out.log.get(0).get(1)).text.contains("Reading data... done.")},
)
}
}

}
Loading

0 comments on commit c417458

Please sign in to comment.