New Module: humid (#5493)

* Add nf-core/humid module with necessary files and configurations * add single end test * adress comments in meta.yml * don't checksum the log file * update sapshot & fix path assertion log
nf-core · Apr 17, 2024 · c417458 · c417458
1 parent c162ae4
commit c417458
Show file tree

Hide file tree

Showing 7 changed files with 428 additions and 0 deletions.
diff --git a/modules/nf-core/humid/environment.yml b/modules/nf-core/humid/environment.yml
@@ -0,0 +1,9 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
+name: "humid"
+channels:
+  - conda-forge
+  - bioconda
+  - defaults
+dependencies:
+  - "bioconda::humid=1.0.4"
diff --git a/modules/nf-core/humid/main.nf b/modules/nf-core/humid/main.nf
@@ -0,0 +1,69 @@
+
+process HUMID {
+    tag "$meta.id"
+    label 'process_single'
+
+    conda "${moduleDir}/environment.yml"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/humid:1.0.4--hadf994f_0':
+        'biocontainers/humid:1.0.4--hadf994f_0' }"
+
+    input:
+    tuple val(meta), path(reads)
+    tuple val(meta2), path(umi_file)
+
+    output:
+    tuple val(meta), path("${prefix}.log")         , emit: log
+    tuple val(meta), path("*_dedup*.fastq.gz")     , emit: dedup    , optional: true
+    tuple val(meta), path("*_annotated*.fastq.gz") , emit: annotated, optional: true
+    tuple val(meta), path("${prefix}")             , emit: stats    , optional: true
+    path "versions.yml"                            , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: ''
+    prefix = task.ext.prefix ?: "${meta.id}"
+    def umis = umi_file ?: ''
+    def VERSION = '1.0.4' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions.
+
+    """
+    humid \\
+        $args \\
+        -d ${prefix} \\
+        -l ${prefix}.log \\
+        $reads \\
+        $umis \\
+
+    mv ${prefix}/*.fastq* .
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        humid: ${VERSION}
+    END_VERSIONS
+    """
+
+    stub:
+    def args = task.ext.args ?: ''
+    prefix = task.ext.prefix ?: "${meta.id}"
+    def umis = umi_file ?: ''
+    def VERSION = '1.0.4' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions.
+    """
+    mkdir -p ${prefix}
+    touch ${prefix}_1_dedup.fastq.gz
+    touch ${prefix}_2_dedup.fastq.gz
+    touch ${prefix}_1_annotated.fastq.gz
+    touch ${prefix}_2_annotated.fastq.gz
+    touch ${prefix}/stats.dat
+    touch ${prefix}/neigh.dat
+    touch ${prefix}/counts.dat
+    touch ${prefix}/clusters.dat
+    touch ${prefix}.log
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        humid: ${VERSION}
+    END_VERSIONS
+    """
+}
diff --git a/modules/nf-core/humid/meta.yml b/modules/nf-core/humid/meta.yml
@@ -0,0 +1,74 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
+name: "humid"
+description: HUMID is a tool to quickly and easily remove duplicate reads from FastQ files, with or without UMIs.
+keywords:
+  - umi
+  - fastq
+  - deduplication
+  - hamming-distance
+  - clustering
+tools:
+  - "humid":
+      description: "HUMID -- High-performance UMI Deduplicator"
+      homepage: "https://github.com/jfjlaros/HUMID"
+      documentation: "https://humid.readthedocs.io/en/latest/"
+      tool_dev_url: "https://github.com/jfjlaros/HUMID"
+      licence: ["MIT"]
+
+input:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. `[ id:'sample1', single_end:false ]`
+  - reads:
+      type: file
+      description: Fastq file(s) to deduplicate
+      pattern: "*.{fastq,fq,fastq.gz,fq.gz}"
+  - meta2:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. `[ id:'sample1', single_end:false ]`
+  - umis:
+      type: file
+      description: Fastq file(s) containing UMIs
+      pattern: "*.{fastq,fq,fastq.gz,fq.gz}"
+
+output:
+  - meta:
+      type: map
+      description: |
+        Groovy Map containing sample information
+        e.g. `[ id:'sample1', single_end:false ]`
+
+  - dedup:
+      type: file
+      description: Deduplicated Fastq file(s)
+      pattern: "*.{fastq,fq,fastq.gz,fq.gz}"
+
+  - annotated:
+      type: file
+      description: Annotated Fastq file(s)
+      pattern: "*.{fastq,fq,fastq.gz,fq.gz}"
+
+  - stats:
+      type: directory
+      description: Directory containg statistics file, use for multiqc.
+      pattern: "${prefix}/"
+
+  - log:
+      type: file
+      description: Log file of humid, containing progress and errors
+      pattern: "*.log"
+
+  - versions:
+      type: file
+      description: File containing software versions
+      pattern: "versions.yml"
+
+authors:
+  - "@Joon-Klaps"
+maintainers:
+  - "@Joon-Klaps"
diff --git a/modules/nf-core/humid/tests/main.nf.test b/modules/nf-core/humid/tests/main.nf.test
@@ -0,0 +1,134 @@
+nextflow_process {
+
+    name "Test Process HUMID"
+    script "../main.nf"
+    process "HUMID"
+
+    tag "modules"
+    tag "modules_nfcore"
+    tag "humid"
+    tag "fastp"
+
+    test("sarscov2 - paired-end") {
+
+        setup {
+            run("FASTP") {
+                script "../../fastp/main.nf"
+                config "./nextflow.config"
+                process {
+                    """
+                    input[0] = Channel.of([
+                        [ id:'test', single_end:false ],
+                        [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true),
+                        file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ]
+                    ])
+                    input[1] = []
+                    input[2] = false
+                    input[3] = false
+                    """
+                }
+            }
+        }
+
+        when {
+            config "./nextflow.config"
+            process {
+                """
+                input[0] = FASTP.out.reads
+                input[1] = [[:],[]]
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out.stats,
+                    process.out.dedup,
+                    process.out.annotated,
+                    process.out.versions).match()},
+                {assert path(process.out.log.get(0).get(1)).text.contains("Reading data... done.")},
+            )
+        }
+
+    }
+
+    test("sarscov2 - paired-end - stub") {
+
+        options "-stub"
+
+        when {
+            process {
+                """
+                input[0] = Channel.of([
+                        [ id:'test', single_end:false ],
+                        [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true),
+                        file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ]
+                    ])
+                input[1] = [[:],[]]
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out.log,
+                    process.out.stats,
+                    process.out.versions).match()},
+                {assert file(process.out.dedup[0][1].find {
+                    file(it).name == "test_1_dedup.fastq.gz" }).exists()},
+                {assert file(process.out.dedup[0][1].find {
+                    file(it).name == "test_2_dedup.fastq.gz" }).exists()},
+                {assert file(process.out.annotated[0][1].find {
+                    file(it).name == "test_1_annotated.fastq.gz" }).exists()},
+                {assert file(process.out.annotated[0][1].find {
+                    file(it).name == "test_2_annotated.fastq.gz" }).exists()}
+            )
+        }
+    }
+
+
+    test("sarscov2 - single-end") {
+
+        setup {
+            run("FASTP") {
+                script "../../fastp/main.nf"
+                config "./nextflow.config"
+                process {
+                    """
+                    input[0] = Channel.of([
+                        [ id:'test', single_end:true ],
+                        [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true)]
+                    ])
+                    input[1] = []
+                    input[2] = false
+                    input[3] = false
+                    """
+                }
+            }
+        }
+
+        when {
+            config "./nextflow.config"
+            process {
+                """
+                input[0] = FASTP.out.reads
+                input[1] = [[:],[]]
+                """
+            }
+        }
+
+        then {
+            assertAll(
+                { assert process.success },
+                { assert snapshot(process.out.stats,
+                    process.out.dedup,
+                    process.out.annotated,
+                    process.out.versions).match()},
+                {assert path(process.out.log.get(0).get(1)).text.contains("Reading data... done.")},
+            )
+        }
+    }
+
+}