\\n Workflow | \\n Nextflow | \\n\\"
- ],
- [
- "CUSTOM_DUMPSOFTWAREVERSIONS:",
- " python: 3.11.7",
- " yaml: 5.4.1",
- "TOOL1:",
- " tool1: 0.11.9",
- "TOOL2:",
- " tool2: '1.9'",
- "Workflow:"
- ]
- ],
- "timestamp": "2024-01-09T23:01:18.710682"
- }
-}
\ No newline at end of file
diff --git a/modules/nf-core/custom/dumpsoftwareversions/tests/tags.yml b/modules/nf-core/custom/dumpsoftwareversions/tests/tags.yml
deleted file mode 100644
index 405aa24a..00000000
--- a/modules/nf-core/custom/dumpsoftwareversions/tests/tags.yml
+++ /dev/null
@@ -1,2 +0,0 @@
-custom/dumpsoftwareversions:
- - modules/nf-core/custom/dumpsoftwareversions/**
diff --git a/modules/nf-core/custom/sratoolsncbisettings/environment.yml b/modules/nf-core/custom/sratoolsncbisettings/environment.yml
new file mode 100644
index 00000000..44a1b008
--- /dev/null
+++ b/modules/nf-core/custom/sratoolsncbisettings/environment.yml
@@ -0,0 +1,7 @@
+name: custom_sratoolsncbisettings
+channels:
+ - conda-forge
+ - bioconda
+ - defaults
+dependencies:
+ - bioconda::sra-tools=3.0.8
diff --git a/modules/nf-core/custom/sratoolsncbisettings/main.nf b/modules/nf-core/custom/sratoolsncbisettings/main.nf
new file mode 100644
index 00000000..577117ed
--- /dev/null
+++ b/modules/nf-core/custom/sratoolsncbisettings/main.nf
@@ -0,0 +1,23 @@
+process CUSTOM_SRATOOLSNCBISETTINGS {
+ tag 'ncbi-settings'
+ label 'process_low'
+
+ conda "${moduleDir}/environment.yml"
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'https://depot.galaxyproject.org/singularity/sra-tools:3.0.8--h9f5acd7_0' :
+ 'biocontainers/sra-tools:3.0.8--h9f5acd7_0' }"
+
+ input:
+ val ids
+
+ output:
+ path('*.mkfg') , emit: ncbi_settings
+ path 'versions.yml', emit: versions
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ shell:
+ config = "/LIBS/GUID = \"${UUID.randomUUID().toString()}\"\\n/libs/cloud/report_instance_identity = \"true\"\\n"
+ template 'detect_ncbi_settings.sh'
+}
diff --git a/modules/nf-core/custom/sratoolsncbisettings/meta.yml b/modules/nf-core/custom/sratoolsncbisettings/meta.yml
new file mode 100644
index 00000000..46a6cd32
--- /dev/null
+++ b/modules/nf-core/custom/sratoolsncbisettings/meta.yml
@@ -0,0 +1,28 @@
+name: "custom_sratoolsncbisettings"
+description: Test for the presence of suitable NCBI settings or create them on the fly.
+keywords:
+ - NCBI
+ - settings
+ - sra-tools
+ - prefetch
+ - fasterq-dump
+tools:
+ - "sratools":
+ description: "SRA Toolkit and SDK from NCBI"
+ homepage: https://github.com/ncbi/sra-tools
+ documentation: https://github.com/ncbi/sra-tools/wiki
+ tool_dev_url: https://github.com/ncbi/sra-tools
+ licence: ["Public Domain"]
+output:
+ - versions:
+ type: file
+ description: File containing software versions
+ pattern: "versions.yml"
+ - ncbi_settings:
+ type: file
+ description: An NCBI user settings file.
+ pattern: "*.mkfg"
+authors:
+ - "@Midnighter"
+maintainers:
+ - "@Midnighter"
diff --git a/modules/nf-core/custom/sratoolsncbisettings/templates/detect_ncbi_settings.sh b/modules/nf-core/custom/sratoolsncbisettings/templates/detect_ncbi_settings.sh
new file mode 100644
index 00000000..cfe3a324
--- /dev/null
+++ b/modules/nf-core/custom/sratoolsncbisettings/templates/detect_ncbi_settings.sh
@@ -0,0 +1,45 @@
+#!/usr/bin/env bash
+
+set -u
+
+
+# Get the expected NCBI settings path and define the environment variable
+# `NCBI_SETTINGS`.
+eval "$(vdb-config -o n NCBI_SETTINGS | sed 's/[" ]//g')"
+
+# If the user settings do not exist yet, create a file suitable for `prefetch`
+# and `fasterq-dump`. If an existing settings file does not contain the required
+# values, error out with a helpful message.
+if [[ ! -f "${NCBI_SETTINGS}" ]]; then
+ printf '!{config}' > 'user-settings.mkfg'
+else
+ prefetch --help &> /dev/null
+ if [[ $? = 78 ]]; then
+ echo "You have an existing vdb-config at '${NCBI_SETTINGS}' but it is"\
+ "missing the required entries for /LIBS/GUID and"\
+ "/libs/cloud/report_instance_identity."\
+ "Feel free to add the following to your settings file:" >&2
+ echo "$(printf '!{config}')" >&2
+ exit 1
+ fi
+ fasterq-dump --help &> /dev/null
+ if [[ $? = 78 ]]; then
+ echo "You have an existing vdb-config at '${NCBI_SETTINGS}' but it is"\
+ "missing the required entries for /LIBS/GUID and"\
+ "/libs/cloud/report_instance_identity."\
+ "Feel free to add the following to your settings file:" >&2
+ echo "$(printf '!{config}')" >&2
+ exit 1
+ fi
+ if [[ "${NCBI_SETTINGS}" != *.mkfg ]]; then
+ echo "The detected settings '${NCBI_SETTINGS}' do not have the required"\
+ "file extension '.mkfg'." >&2
+ exit 1
+ fi
+ cp "${NCBI_SETTINGS}" ./
+fi
+
+cat <<-END_VERSIONS > versions.yml
+"!{task.process}":
+ sratools: $(vdb-config --version 2>&1 | grep -Eo '[0-9.]+')
+END_VERSIONS
diff --git a/modules/nf-core/custom/sratoolsncbisettings/tests/main.nf.test b/modules/nf-core/custom/sratoolsncbisettings/tests/main.nf.test
new file mode 100644
index 00000000..b8b95320
--- /dev/null
+++ b/modules/nf-core/custom/sratoolsncbisettings/tests/main.nf.test
@@ -0,0 +1,42 @@
+nextflow_process {
+
+ name "Test Process CUSTOM_SRATOOLSNCBISETTINGS"
+ script "../main.nf"
+ process "CUSTOM_SRATOOLSNCBISETTINGS"
+ config "modules/nf-core/custom/sratoolsncbisettings/tests/nextflow.config"
+ tag "modules"
+ tag "modules_nfcore"
+ tag "custom"
+ tag "custom/sratoolsncbisettings"
+
+ test("Should run without failures") {
+
+ when {
+ params {
+ settings_path = '/tmp/.ncbi'
+ settings_file = "${params.settings_path}/user-settings.mkfg"
+ }
+
+ process {
+ """
+ input[0] = ["SRX6725035"]
+ file(params.settings_path).mkdirs()
+ def settings = file(params.modules_testdata_base_path + 'generic/config/ncbi_user_settings.mkfg', checkIfExists: true)
+ settings.copyTo(params.settings_file)
+ """
+ }
+ }
+
+ then {
+ assert process.success
+ assert snapshot(
+ process.out.versions
+ ).match()
+
+ with(process.out.ncbi_settings) {
+ assert path(get(0)).readLines().any { it.contains('/LIBS/GUID') }
+ assert path(get(0)).readLines().any { it.contains('/libs/cloud/report_instance_identity') }
+ }
+ }
+ }
+}
diff --git a/modules/nf-core/custom/sratoolsncbisettings/tests/main.nf.test.snap b/modules/nf-core/custom/sratoolsncbisettings/tests/main.nf.test.snap
new file mode 100644
index 00000000..5e314f0b
--- /dev/null
+++ b/modules/nf-core/custom/sratoolsncbisettings/tests/main.nf.test.snap
@@ -0,0 +1,14 @@
+{
+ "Should run without failures": {
+ "content": [
+ [
+ "versions.yml:md5,3d6ee88cce1ee517e198633f062589a8"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-02-28T11:47:15.824443"
+ }
+}
\ No newline at end of file
diff --git a/modules/nf-core/custom/sratoolsncbisettings/tests/nextflow.config b/modules/nf-core/custom/sratoolsncbisettings/tests/nextflow.config
new file mode 100644
index 00000000..df5def04
--- /dev/null
+++ b/modules/nf-core/custom/sratoolsncbisettings/tests/nextflow.config
@@ -0,0 +1,14 @@
+params.settings_path = '/tmp/.ncbi'
+params.settings_file = "${params.settings_path}/user-settings.mkfg"
+
+env.NCBI_SETTINGS = params.settings_file
+
+process {
+ withName: CUSTOM_SRATOOLSNCBISETTINGS {
+ containerOptions = {
+ (workflow.containerEngine == 'singularity') ?
+ "-B ${params.settings_path}:${params.settings_path}" :
+ "-v ${params.settings_path}:${params.settings_path}"
+ }
+ }
+}
\ No newline at end of file
diff --git a/modules/nf-core/custom/sratoolsncbisettings/tests/tags.yml b/modules/nf-core/custom/sratoolsncbisettings/tests/tags.yml
new file mode 100644
index 00000000..fb4a08a7
--- /dev/null
+++ b/modules/nf-core/custom/sratoolsncbisettings/tests/tags.yml
@@ -0,0 +1,2 @@
+custom/sratoolsncbisettings:
+ - modules/nf-core/custom/sratoolsncbisettings/**
diff --git a/modules/nf-core/fastp/main.nf b/modules/nf-core/fastp/main.nf
index 2a3b679e..4fc19b74 100644
--- a/modules/nf-core/fastp/main.nf
+++ b/modules/nf-core/fastp/main.nf
@@ -29,7 +29,7 @@ process FASTP {
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def adapter_list = adapter_fasta ? "--adapter_fasta ${adapter_fasta}" : ""
- def fail_fastq = save_trimmed_fail && meta.single_end ? "--failed_out ${prefix}.fail.fastq.gz" : save_trimmed_fail && !meta.single_end ? "--unpaired1 ${prefix}_1.fail.fastq.gz --unpaired2 ${prefix}_2.fail.fastq.gz" : ''
+ def fail_fastq = save_trimmed_fail && meta.single_end ? "--failed_out ${prefix}.fail.fastq.gz" : save_trimmed_fail && !meta.single_end ? "--failed_out ${prefix}.paired.fail.fastq.gz --unpaired1 ${prefix}_1.fail.fastq.gz --unpaired2 ${prefix}_2.fail.fastq.gz" : ''
// Added soft-links to original fastqs for consistent naming in MultiQC
// Use single ended for interleaved. Add --interleaved_in in config.
if ( task.ext.args?.contains('--interleaved_in') ) {
diff --git a/modules/nf-core/fastp/tests/main.nf.test b/modules/nf-core/fastp/tests/main.nf.test
index 9b3f9a38..6f1f4897 100644
--- a/modules/nf-core/fastp/tests/main.nf.test
+++ b/modules/nf-core/fastp/tests/main.nf.test
@@ -251,7 +251,8 @@ nextflow_process {
}
test("fastp test_fastp_interleaved") {
- config './nextflow.config'
+
+ config './nextflow.interleaved.config'
when {
params {
outdir = "$outputDir"
@@ -277,7 +278,7 @@ nextflow_process {
def html_text = [ "Q20 bases:25.719000 K (93.033098%)",
"paired end (151 cycles + 151 cycles)"]
def log_text = [ "Q20 bases: 12922(92.9841%)",
- "reads passed filter: 198"]
+ "reads passed filter: 162"]
def read_lines = [ "@ERR5069949.2151832 NS500628:121:HK3MMAFX2:2:21208:10793:15304/1",
"TCATAAACCAAAGCACTCACAGTGTCAACAATTTCAGCAGGACAACGCCGACAAGTTCCGAGGAACATGTCTGGACCTATAGTTTTCATAAGTCTACACACTGAATTGAAATATTCTGGTTCTAGTGTGCCCTTAGTTAGCAATGTGCGT",
"AAAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEE
- { assert path(process.out.reads_fail.get(0).get(1).get(1)).linesGzip.contains(failed_read2_line) }
+ { assert path(process.out.reads_fail.get(0).get(1).get(2)).linesGzip.contains(failed_read2_line) }
}
},
{ html_text.each { html_part ->
diff --git a/modules/nf-core/fastp/tests/main.nf.test.snap b/modules/nf-core/fastp/tests/main.nf.test.snap
index b4c0e1dd..3e876288 100644
--- a/modules/nf-core/fastp/tests/main.nf.test.snap
+++ b/modules/nf-core/fastp/tests/main.nf.test.snap
@@ -7,7 +7,7 @@
"id": "test",
"single_end": true
},
- "test.fastp.json:md5,168f516f7bd4b7b6c32da7cba87299a4"
+ "test.fastp.json:md5,b24e0624df5cc0b11cd5ba21b726fb22"
]
]
],
@@ -15,7 +15,7 @@
"nf-test": "0.8.4",
"nextflow": "23.10.1"
},
- "timestamp": "2024-01-17T18:08:06.123035"
+ "timestamp": "2024-03-18T16:19:15.063001"
},
"test_fastp_paired_end_merged-for_stub_match": {
"content": [
@@ -65,7 +65,7 @@
"nf-test": "0.8.4",
"nextflow": "23.10.1"
},
- "timestamp": "2024-01-17T18:06:00.223817"
+ "timestamp": "2024-03-18T16:18:43.526412"
},
"versions_paired_end": {
"content": [
@@ -112,7 +112,7 @@
"nf-test": "0.8.4",
"nextflow": "23.10.1"
},
- "timestamp": "2024-02-01T12:03:37.827323085"
+ "timestamp": "2024-03-18T16:19:15.111894"
},
"test_fastp_paired_end_merged_match": {
"content": [
@@ -283,7 +283,7 @@
"nf-test": "0.8.4",
"nextflow": "23.10.1"
},
- "timestamp": "2024-02-01T11:57:30.791982648"
+ "timestamp": "2024-03-18T16:18:43.580336"
},
"versions_paired_end_merged_adapterlist": {
"content": [
diff --git a/modules/nf-core/fastp/tests/nextflow.interleaved.config b/modules/nf-core/fastp/tests/nextflow.interleaved.config
new file mode 100644
index 00000000..4be8dbd2
--- /dev/null
+++ b/modules/nf-core/fastp/tests/nextflow.interleaved.config
@@ -0,0 +1,5 @@
+process {
+ withName: FASTP {
+ ext.args = "--interleaved_in -e 30"
+ }
+}
diff --git a/modules/nf-core/fastp/tests/nextflow.config b/modules/nf-core/fastp/tests/nextflow.save_failed.config
similarity index 50%
rename from modules/nf-core/fastp/tests/nextflow.config
rename to modules/nf-core/fastp/tests/nextflow.save_failed.config
index 0f7849ad..53b61b0c 100644
--- a/modules/nf-core/fastp/tests/nextflow.config
+++ b/modules/nf-core/fastp/tests/nextflow.save_failed.config
@@ -1,6 +1,5 @@
process {
-
withName: FASTP {
- ext.args = "--interleaved_in"
+ ext.args = "-e 30"
}
}
diff --git a/modules/nf-core/fastqc/main.nf b/modules/nf-core/fastqc/main.nf
index 9e19a74c..d79f1c86 100644
--- a/modules/nf-core/fastqc/main.nf
+++ b/modules/nf-core/fastqc/main.nf
@@ -25,6 +25,11 @@ process FASTQC {
def old_new_pairs = reads instanceof Path || reads.size() == 1 ? [[ reads, "${prefix}.${reads.extension}" ]] : reads.withIndex().collect { entry, index -> [ entry, "${prefix}_${index + 1}.${entry.extension}" ] }
def rename_to = old_new_pairs*.join(' ').join(' ')
def renamed_files = old_new_pairs.collect{ old_name, new_name -> new_name }.join(' ')
+
+ def memory_in_mb = MemoryUnit.of("${task.memory}").toUnit('MB')
+ // FastQC memory value allowed range (100 - 10000)
+ def fastqc_memory = memory_in_mb > 10000 ? 10000 : (memory_in_mb < 100 ? 100 : memory_in_mb)
+
"""
printf "%s %s\\n" $rename_to | while read old_name new_name; do
[ -f "\${new_name}" ] || ln -s \$old_name \$new_name
@@ -33,6 +38,7 @@ process FASTQC {
fastqc \\
$args \\
--threads $task.cpus \\
+ --memory $fastqc_memory \\
$renamed_files
cat <<-END_VERSIONS > versions.yml
diff --git a/modules/nf-core/fcs/fcsadaptor/environment.yml b/modules/nf-core/fcs/fcsadaptor/environment.yml
new file mode 100644
index 00000000..b11cd415
--- /dev/null
+++ b/modules/nf-core/fcs/fcsadaptor/environment.yml
@@ -0,0 +1,7 @@
+name: fcs_fcsadaptor
+channels:
+ - conda-forge
+ - bioconda
+ - defaults
+dependencies:
+ - bioconda::false_flag # False flag to pass nf-core/lint
diff --git a/modules/nf-core/fcs/fcsadaptor/main.nf b/modules/nf-core/fcs/fcsadaptor/main.nf
new file mode 100644
index 00000000..5772afec
--- /dev/null
+++ b/modules/nf-core/fcs/fcsadaptor/main.nf
@@ -0,0 +1,87 @@
+process FCS_FCSADAPTOR {
+ tag "$meta.id"
+ label 'process_low'
+
+ // WARN: Version information not provided by tool on CLI. Please update version string below when bumping container versions.
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'https://ftp.ncbi.nlm.nih.gov/genomes/TOOLS/FCS/releases/0.5.0/fcs-adaptor.sif':
+ 'docker.io/ncbi/fcs-adaptor:0.5.0' }"
+
+ input:
+ tuple val(meta), path(assembly)
+
+ output:
+ tuple val(meta), path("*.cleaned_sequences.fa.gz"), emit: cleaned_assembly, optional: true
+ tuple val(meta), path("*.fcs_adaptor_report.txt") , emit: adaptor_report
+ tuple val(meta), path("*.fcs_adaptor.log") , emit: log
+ tuple val(meta), path("*.pipeline_args.yaml") , emit: pipeline_args
+ tuple val(meta), path("*.skipped_trims.jsonl") , emit: skipped_trims
+ path "versions.yml" , emit: versions
+
+ // Downstream handling of optional cleaned_assembly
+ //
+ // ch_cleaned_assembly = FCS_FCSADAPTOR ( ch_input_assembly ).cleaned_assembly
+
+ // ch_input_assembly
+ // | join (ch_cleaned_assembly, by:0, remainder:true )
+ // | map { meta, input, cleaned ->
+ // [ meta, cleaned ?: input ]
+ // }
+ // | set { ch_downstream_channel }
+
+ // FCS_FCSGX ( ch_downstream_channel )
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ script:
+ // Exit if running this module with -profile conda / -profile mamba
+ if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) {
+ error "FCS_FCSADAPTOR module does not support Conda. Please use Docker / Singularity / Podman instead."
+ }
+ def args = task.ext.args ?: '--prok' // --prok || --euk
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ def FCSADAPTOR_VERSION = '0.5.0' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions.
+ """
+ av_screen_x \\
+ -o output/ \\
+ $args \\
+ $assembly
+
+ # compress and/or rename files with prefix
+ num_contamination_lines=\$(cat "output/fcs_adaptor_report.txt" | wc -l)
+ if [[ \$num_contamination_lines -gt 1 ]]; then
+ find output/cleaned_sequences/ -type f ! -name "*.gz" -exec gzip {} \\;
+ cp output/cleaned_sequences/* "${prefix}.cleaned_sequences.fa.gz"
+ fi
+ cp "output/fcs_adaptor_report.txt" "${prefix}.fcs_adaptor_report.txt"
+ cp "output/fcs_adaptor.log" "${prefix}.fcs_adaptor.log"
+ cp "output/pipeline_args.yaml" "${prefix}.pipeline_args.yaml"
+ cp "output/skipped_trims.jsonl" "${prefix}.skipped_trims.jsonl"
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ FCS-adaptor: $FCSADAPTOR_VERSION
+ END_VERSIONS
+ """
+
+ stub:
+ // Exit if running this module with -profile conda / -profile mamba
+ if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) {
+ error "FCS_FCSADAPTOR module does not support Conda. Please use Docker / Singularity / Podman instead."
+ }
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ def FCSADAPTOR_VERSION = '0.5.0'
+
+ """
+ touch ${prefix}.fcs_adaptor_report.txt
+ touch ${prefix}.fcs_adaptor.log
+ touch ${prefix}.pipeline_args.yaml
+ touch ${prefix}.skipped_trims.jsonl
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ FCS-adaptor: $FCSADAPTOR_VERSION
+ END_VERSIONS
+ """
+}
diff --git a/modules/nf-core/fcs/fcsadaptor/meta.yml b/modules/nf-core/fcs/fcsadaptor/meta.yml
new file mode 100644
index 00000000..54fca1bb
--- /dev/null
+++ b/modules/nf-core/fcs/fcsadaptor/meta.yml
@@ -0,0 +1,64 @@
+name: "fcs_fcsadaptor"
+description: Run NCBI's FCS adaptor on assembled genomes
+keywords:
+ - assembly
+ - genomics
+ - quality control
+ - contamination
+ - NCBI
+tools:
+ - "fcs":
+ description: |
+ The Foreign Contamination Screening (FCS) tool rapidly detects contaminants from foreign
+ organisms in genome assemblies to prepare your data for submission. Therefore, the
+ submission process to NCBI is faster and fewer contaminated genomes are submitted.
+ This reduces errors in analyses and conclusions, not just for the original data submitter
+ but for all subsequent users of the assembly.
+ homepage: "https://www.ncbi.nlm.nih.gov/data-hub/cgr/data-quality-tools/"
+ documentation: "https://github.com/ncbi/fcs/wiki/FCS-adaptor"
+ tool_dev_url: "https://github.com/ncbi/fcs"
+ licence: ["United States Government Work"]
+input:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - assembly:
+ type: file
+ description: assembly fasta file
+output:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - versions:
+ type: file
+ description: File containing software versions
+ pattern: "versions.yml"
+ - cleaned_assembly:
+ type: file
+ description: Cleaned assembly in fasta format
+ pattern: "*.{cleaned_sequences.fa.gz}"
+ - adaptor_report:
+ type: file
+ description: Report of identified adaptors
+ pattern: "*.{fcs_adaptor_report.txt}"
+ - log:
+ type: file
+ description: Log file
+ pattern: "*.{fcs_adaptor.log}"
+ - pipeline_args:
+ type: file
+ description: Run arguments
+ pattern: "*.{pipeline_args.yaml}"
+ - skipped_trims:
+ type: file
+ description: Skipped trim information
+ pattern: "*.{skipped_trims.jsonl}"
+authors:
+ - "@d4straub"
+maintainers:
+ - "@d4straub"
+ - "@gallvp"
diff --git a/modules/nf-core/fcs/fcsadaptor/tests/main.nf.test b/modules/nf-core/fcs/fcsadaptor/tests/main.nf.test
new file mode 100644
index 00000000..13c80264
--- /dev/null
+++ b/modules/nf-core/fcs/fcsadaptor/tests/main.nf.test
@@ -0,0 +1,115 @@
+nextflow_process {
+
+ name "Test Process FCS_FCSADAPTOR"
+ script "../main.nf"
+ process "FCS_FCSADAPTOR"
+
+ tag "modules"
+ tag "modules_nfcore"
+ tag "fcs"
+ tag "fcs/fcsadaptor"
+
+ test("bacteroides_fragilis-genome_fna_gz") {
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id: 'test' ],
+ file(params.test_data['bacteroides_fragilis']['genome']['genome_fna_gz'], checkIfExists: true)
+ ]
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert process.out.cleaned_assembly == [] },
+ { assert snapshot(
+ file(process.out.skipped_trims[0][1]).name,
+ process.out.adaptor_report,
+ process.out.pipeline_args,
+ process.out.versions,
+ ).match()
+ },
+ { assert path(process.out.log[0][1]).text.contains('[step all_cleaned_fasta] completed success') }
+ )
+ }
+
+ }
+
+ test("sarscov2-genome_fasta-pacbio_adapters") {
+
+ when {
+ process {
+ """
+ def input_file_lines = file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true).text.split('\\n')
+ def file_with_adapters = file('sarscov2_with_adapters.fasta')
+ file_with_adapters.text = (
+ input_file_lines[0..<(input_file_lines.size()/2)]
+ + ['ATCTCTCTCTTTTCCTCCTCCTCCGTTGTTGTTGTTGAGAGAGAT'] // Biosciences Blunt Adapter
+ + input_file_lines[(input_file_lines.size()/2)..<(input_file_lines.size())]
+ + ['AAAAAAAAAAAAAAAAAATTAACGGAGGAGGAGGA'] // Pacific Biosciences C2 Primer
+ ).join('\\n')
+
+ input[0] = [
+ [ id: 'test' ],
+ file_with_adapters
+ ]
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(
+ process.out.cleaned_assembly,
+ process.out.skipped_trims,
+ process.out.adaptor_report,
+ process.out.pipeline_args,
+ process.out.versions,
+ ).match()
+ },
+ { assert path(process.out.log[0][1]).text.contains('[step all_cleaned_fasta] completed success') },
+ { assert path(process.out.adaptor_report[0][1]).text.contains('Pacific Biosciences Blunt Adapter') },
+ { assert path(process.out.adaptor_report[0][1]).text.contains('Pacific Biosciences C2 Primer') }
+ )
+ }
+
+ }
+
+ test("bacteroides_fragilis-genome_fna_gz-stub") {
+
+ options "-stub"
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id: 'test' ],
+ file(params.test_data['bacteroides_fragilis']['genome']['genome_fna_gz'], checkIfExists: true)
+ ]
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert process.out.cleaned_assembly == [] },
+ { assert snapshot(
+ [
+ process.out.adaptor_report[0][1],
+ process.out.log[0][1],
+ process.out.pipeline_args[0][1],
+ process.out.skipped_trims[0][1]
+ ].collect { file(it).name }
+ ).match()
+ },
+ { assert snapshot(process.out.versions).match("stub_versions") }
+ )
+ }
+ }
+}
diff --git a/modules/nf-core/fcs/fcsadaptor/tests/main.nf.test.snap b/modules/nf-core/fcs/fcsadaptor/tests/main.nf.test.snap
new file mode 100644
index 00000000..3dc63939
--- /dev/null
+++ b/modules/nf-core/fcs/fcsadaptor/tests/main.nf.test.snap
@@ -0,0 +1,102 @@
+{
+ "sarscov2-genome_fasta-pacbio_adapters": {
+ "content": [
+ [
+ [
+ {
+ "id": "test"
+ },
+ "test.cleaned_sequences.fa.gz:md5,a2f3d1c092463efff18ed3bb519a989e"
+ ]
+ ],
+ [
+ [
+ {
+ "id": "test"
+ },
+ "test.skipped_trims.jsonl:md5,cbcdf071a7f61519e34d6b60c1aa9f51"
+ ]
+ ],
+ [
+ [
+ {
+ "id": "test"
+ },
+ "test.fcs_adaptor_report.txt:md5,f515b272a270b63a059d74060a773d76"
+ ]
+ ],
+ [
+ [
+ {
+ "id": "test"
+ },
+ "test.pipeline_args.yaml:md5,5df663124ae9536c596c5c572d4d4c45"
+ ]
+ ],
+ [
+ "versions.yml:md5,5cd44cb2972a42e74144373ce3307c7f"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.1"
+ },
+ "timestamp": "2024-05-27T12:10:37.17285"
+ },
+ "bacteroides_fragilis-genome_fna_gz": {
+ "content": [
+ "test.skipped_trims.jsonl",
+ [
+ [
+ {
+ "id": "test"
+ },
+ "test.fcs_adaptor_report.txt:md5,27375be4671e01d2d2674ddd1d44414a"
+ ]
+ ],
+ [
+ [
+ {
+ "id": "test"
+ },
+ "test.pipeline_args.yaml:md5,a0cfbaa79a18acf6dc6b67158f448720"
+ ]
+ ],
+ [
+ "versions.yml:md5,5cd44cb2972a42e74144373ce3307c7f"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.1"
+ },
+ "timestamp": "2024-05-27T12:10:01.657496"
+ },
+ "bacteroides_fragilis-genome_fna_gz-stub": {
+ "content": [
+ [
+ "test.fcs_adaptor_report.txt",
+ "test.fcs_adaptor.log",
+ "test.pipeline_args.yaml",
+ "test.skipped_trims.jsonl"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-05-09T11:51:58.777173"
+ },
+ "stub_versions": {
+ "content": [
+ [
+ "versions.yml:md5,5cd44cb2972a42e74144373ce3307c7f"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-05-09T11:45:31.030126"
+ }
+}
\ No newline at end of file
diff --git a/modules/nf-core/fcs/fcsadaptor/tests/tags.yml b/modules/nf-core/fcs/fcsadaptor/tests/tags.yml
new file mode 100644
index 00000000..0e774e58
--- /dev/null
+++ b/modules/nf-core/fcs/fcsadaptor/tests/tags.yml
@@ -0,0 +1,2 @@
+fcs/fcsadaptor:
+ - "modules/nf-core/fcs/fcsadaptor/**"
diff --git a/modules/nf-core/gunzip/environment.yml b/modules/nf-core/gunzip/environment.yml
index 25910b34..3c723b21 100644
--- a/modules/nf-core/gunzip/environment.yml
+++ b/modules/nf-core/gunzip/environment.yml
@@ -4,4 +4,4 @@ channels:
- bioconda
- defaults
dependencies:
- - conda-forge::sed=4.7
+ - bioconda::multiqc=1.21
diff --git a/modules/nf-core/custom/dumpsoftwareversions/environment.yml b/modules/nf-core/merqury/merqury/environment.yml
similarity index 51%
rename from modules/nf-core/custom/dumpsoftwareversions/environment.yml
rename to modules/nf-core/merqury/merqury/environment.yml
index b48ced26..722c47c1 100644
--- a/modules/nf-core/custom/dumpsoftwareversions/environment.yml
+++ b/modules/nf-core/merqury/merqury/environment.yml
@@ -1,7 +1,7 @@
-name: custom_dumpsoftwareversions
+name: merqury_merqury
channels:
- conda-forge
- bioconda
- defaults
dependencies:
- - bioconda::multiqc=1.20
+ - bioconda::merqury=1.3
diff --git a/modules/nf-core/merqury/merqury/main.nf b/modules/nf-core/merqury/merqury/main.nf
new file mode 100644
index 00000000..ca8795a9
--- /dev/null
+++ b/modules/nf-core/merqury/merqury/main.nf
@@ -0,0 +1,87 @@
+process MERQURY_MERQURY {
+ tag "$meta.id"
+ label 'process_low'
+
+ // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions.
+ conda "${moduleDir}/environment.yml"
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'https://depot.galaxyproject.org/singularity/merqury:1.3--hdfd78af_1':
+ 'biocontainers/merqury:1.3--hdfd78af_1' }"
+
+ input:
+ tuple val(meta), path(meryl_db), path(assembly)
+
+ output:
+ tuple val(meta), path("*_only.bed") , emit: assembly_only_kmers_bed
+ tuple val(meta), path("*_only.wig") , emit: assembly_only_kmers_wig
+ tuple val(meta), path("*.completeness.stats"), emit: stats
+ tuple val(meta), path("*.dist_only.hist") , emit: dist_hist
+ tuple val(meta), path("*.spectra-cn.fl.png") , emit: spectra_cn_fl_png
+ tuple val(meta), path("*.spectra-cn.hist") , emit: spectra_cn_hist
+ tuple val(meta), path("*.spectra-cn.ln.png") , emit: spectra_cn_ln_png
+ tuple val(meta), path("*.spectra-cn.st.png") , emit: spectra_cn_st_png
+ tuple val(meta), path("*.spectra-asm.fl.png"), emit: spectra_asm_fl_png
+ tuple val(meta), path("*.spectra-asm.hist") , emit: spectra_asm_hist
+ tuple val(meta), path("*.spectra-asm.ln.png"), emit: spectra_asm_ln_png
+ tuple val(meta), path("*.spectra-asm.st.png"), emit: spectra_asm_st_png
+ tuple val(meta), path("${prefix}.qv") , emit: assembly_qv
+ tuple val(meta), path("${prefix}.*.qv") , emit: scaffold_qv
+ tuple val(meta), path("*.hist.ploidy") , emit: read_ploidy
+ tuple val(meta), path("*.hapmers.blob.png") , emit: hapmers_blob_png , optional: true
+ path "versions.yml" , emit: versions
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ script:
+ // def args = task.ext.args ?: ''
+ prefix = task.ext.prefix ?: "${meta.id}"
+ def VERSION = 1.3 // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions.
+ """
+ # Nextflow changes the container --entrypoint to /bin/bash (container default entrypoint: /usr/local/env-execute)
+ # Check for container variable initialisation script and source it.
+ if [ -f "/usr/local/env-activate.sh" ]; then
+ set +u # Otherwise, errors out because of various unbound variables
+ . "/usr/local/env-activate.sh"
+ set -u
+ fi
+ # limit meryl to use the assigned number of cores.
+ export OMP_NUM_THREADS=$task.cpus
+
+ merqury.sh \\
+ $meryl_db \\
+ $assembly \\
+ $prefix
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ merqury: $VERSION
+ END_VERSIONS
+ """
+
+ stub:
+ prefix = task.ext.prefix ?: "${meta.id}"
+ def VERSION = 1.3 // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions.
+ """
+ touch ${prefix}_only.bed
+ touch ${prefix}_only.wig
+ touch ${prefix}.completeness.stats
+ touch ${prefix}.dist_only.hist
+ touch ${prefix}.spectra-cn.fl.png
+ touch ${prefix}.spectra-cn.hist
+ touch ${prefix}.spectra-cn.ln.png
+ touch ${prefix}.spectra-cn.st.png
+ touch ${prefix}.spectra-asm.fl.png
+ touch ${prefix}.spectra-asm.hist
+ touch ${prefix}.spectra-asm.ln.png
+ touch ${prefix}.spectra-asm.st.png
+ touch ${prefix}.qv
+ touch ${prefix}.${prefix}.qv
+ touch ${prefix}.hist.ploidy
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ merqury: $VERSION
+ END_VERSIONS
+ """
+}
diff --git a/modules/nf-core/merqury/merqury/meta.yml b/modules/nf-core/merqury/merqury/meta.yml
new file mode 100644
index 00000000..19cb11b3
--- /dev/null
+++ b/modules/nf-core/merqury/merqury/meta.yml
@@ -0,0 +1,103 @@
+name: "merqury_merqury"
+description: k-mer based assembly evaluation.
+keywords:
+ - "k-mer"
+ - "assembly"
+ - "evaluation"
+tools:
+ - "merqury":
+ description: "Evaluate genome assemblies with k-mers and more."
+ tool_dev_url: "https://github.com/marbl/merqury"
+ doi: "10.1186/s13059-020-02134-9"
+ licence: ["PUBLIC DOMAIN"]
+input:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - meryl_db:
+ type: file
+ description: "Meryl read database"
+ - assembly:
+ type: file
+ description: FASTA assembly file
+output:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - versions:
+ type: file
+ description: File containing software versions
+ pattern: "versions.yml"
+ - assembly_only_kmers_bed:
+ type: file
+ description: "The positions of the k-mers found only in an assembly for further investigation in .bed"
+ pattern: "*_only.bed"
+ - assembly_only_kmers_wig:
+ type: file
+ description: "The positions of the k-mers found only in an assembly for further investigation in .wig"
+ pattern: "*_only.wig"
+ - stats:
+ type: file
+ description: Assembly statistics file
+ pattern: "*.completeness.stats"
+ - dist_hist:
+ type: file
+ description: Histogram
+ pattern: "*.dist_only.hist"
+ - spectra_cn_fl_png:
+ type: file
+ description: "Unstacked copy number spectra filled plot in PNG format"
+ pattern: "*.spectra-cn.fl.png"
+ - spectra_cn_ln_png:
+ type: file
+ description: "Unstacked copy number spectra line plot in PNG format"
+ pattern: "*.spectra-cn.ln.png"
+ - spectra_cn_st_png:
+ type: file
+ description: "Stacked copy number spectra line plot in PNG format"
+ pattern: "*.spectra-cn.st.png"
+ - spectra_cn_hist:
+ type: file
+ description: "Copy number spectra histogram"
+ pattern: "*.spectra-cn.hist"
+ - spectra_asm_fl_png:
+ type: file
+ description: "Unstacked assembly spectra filled plot in PNG format"
+ pattern: "*.spectra-asm.fl.png"
+ - spectra_asm_ln_png:
+ type: file
+ description: "Unstacked assembly spectra line plot in PNG format"
+ pattern: "*.spectra-asm.ln.png"
+ - spectra_asm_st_png:
+ type: file
+ description: "Stacked assembly spectra line plot in PNG format"
+ pattern: "*.spectra-asm.st.png"
+ - spectra_asm_hist:
+ type: file
+ description: "Assembly spectra histogram"
+ pattern: "*.spectra-asm.hist"
+ - assembly_qv:
+ type: file
+ description: "Assembly consensus quality estimation"
+ pattern: "*.qv"
+ - scaffold_qv:
+ type: file
+ description: "Scaffold consensus quality estimation"
+ pattern: "*.qv"
+ - read_ploidy:
+ type: file
+ description: "Ploidy estimate from read k-mer database"
+ pattern: "*.hist.ploidy"
+ - hapmers_blob_png:
+ type: file
+ description: "Hap-mer blob plot"
+ pattern: "*.hapmers.blob.png"
+authors:
+ - "@mahesh-panchal"
+maintainers:
+ - "@mahesh-panchal"
+ - "@gallvp"
diff --git a/modules/nf-core/merqury/merqury/tests/main.nf.test b/modules/nf-core/merqury/merqury/tests/main.nf.test
new file mode 100644
index 00000000..34c7b46c
--- /dev/null
+++ b/modules/nf-core/merqury/merqury/tests/main.nf.test
@@ -0,0 +1,189 @@
+nextflow_process {
+
+ name "Test Process MERQURY_MERQURY"
+ script "../main.nf"
+ process "MERQURY_MERQURY"
+
+ tag "modules"
+ tag "modules_nfcore"
+ tag "merqury"
+ tag "merqury/merqury"
+ tag "meryl/count"
+ tag "meryl/unionsum"
+
+ setup {
+ run("MERYL_COUNT") {
+ script "../../../meryl/count"
+ process {
+ """
+ input[0] = [
+ [ id:'test', single_end:false ], // meta map
+ [
+ file(params.test_data['homo_sapiens']['illumina']['test_1_fastq_gz'], checkIfExists: true),
+ file(params.test_data['homo_sapiens']['illumina']['test_2_fastq_gz'], checkIfExists: true)
+ ]
+ ]
+ input[1] = 21
+ """
+ }
+ }
+
+ run("MERYL_UNIONSUM") {
+ script "../../../meryl/unionsum"
+ process {
+ """
+ input[0] = MERYL_COUNT.out.meryl_db
+ input[1] = 21
+ """
+ }
+ }
+ }
+
+ test("homo_sapiens-genome") {
+
+ when {
+ process {
+ """
+ assembly = [
+ [ id:'test', single_end:false ], // meta map
+ file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
+ ]
+ input[0] = MERYL_UNIONSUM.out.meryl_db.join( Channel.value( assembly ) )
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(
+ process.out.assembly_only_kmers_bed,
+ process.out.assembly_only_kmers_wig,
+ process.out.stats,
+ process.out.dist_hist,
+ process.out.spectra_cn_hist,
+ process.out.spectra_asm_hist,
+ process.out.assembly_qv,
+ process.out.scaffold_qv,
+ process.out.read_ploidy,
+ process.out.versions,
+ file(process.out.spectra_cn_fl_png[0][1]).name,
+ file(process.out.spectra_cn_ln_png[0][1]).name,
+ file(process.out.spectra_cn_st_png[0][1]).name,
+ file(process.out.spectra_asm_fl_png[0][1]).name,
+ file(process.out.spectra_asm_ln_png[0][1]).name,
+ file(process.out.spectra_asm_st_png[0][1]).name
+ ).match()
+ },
+ { assert process.out.hapmers_blob_png == [] }
+ )
+ }
+
+ }
+
+ test("homo_sapiens-genome-trio") {
+
+ setup {
+ run("MERYL_COUNT", alias: "MATERNAL_COUNT") {
+ script "../../../meryl/count"
+ process {
+ """
+ input[0] = [
+ [ id:'test', single_end:false ], // meta map
+ [
+ file(params.test_data['homo_sapiens']['illumina']['test2_1_fastq_gz'], checkIfExists: true)
+ ]
+ ]
+ input[1] = 21
+ """
+ }
+ }
+
+ run("MERYL_COUNT", alias: "PATERNAL_COUNT") {
+ script "../../../meryl/count"
+ process {
+ """
+ input[0] = [
+ [ id:'test', single_end:false ], // meta map
+ [
+ file(params.test_data['homo_sapiens']['illumina']['test2_2_fastq_gz'], checkIfExists: true)
+ ]
+ ]
+ input[1] = 21
+ """
+ }
+ }
+ }
+
+ when {
+ process {
+ """
+ ch_assembly = Channel.value([
+ [ id:'test', single_end:false ], // meta map
+ file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
+ ])
+
+ ch_input = MERYL_UNIONSUM.out.meryl_db
+ | join( MATERNAL_COUNT.out.meryl_db )
+ | join( PATERNAL_COUNT.out.meryl_db )
+ | join( ch_assembly )
+ | map { meta, meryl, mat_meryl, pat_meryl, fasta -> [ meta, [ meryl, mat_meryl, pat_meryl ], fasta ] }
+ input[0] = ch_input
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(
+ process.out.assembly_only_kmers_bed,
+ process.out.assembly_only_kmers_wig,
+ process.out.stats,
+ process.out.dist_hist,
+ process.out.spectra_cn_hist,
+ process.out.spectra_asm_hist,
+ process.out.assembly_qv,
+ process.out.scaffold_qv,
+ process.out.read_ploidy,
+ process.out.versions,
+ process.out.spectra_cn_fl_png[0][1] .collect { file(it).name }.join(','),
+ process.out.spectra_cn_ln_png[0][1] .collect { file(it).name }.join(','),
+ process.out.spectra_cn_st_png[0][1] .collect { file(it).name }.join(','),
+ file(process.out.spectra_asm_fl_png[0][1]).name,
+ file(process.out.spectra_asm_ln_png[0][1]).name,
+ file(process.out.spectra_asm_st_png[0][1]).name,
+ file(process.out.hapmers_blob_png[0][1]).name,
+ ).match()
+ }
+ )
+ }
+
+ }
+
+ test("homo_sapiens-genome-stub") {
+
+ options '-stub'
+
+ when {
+ process {
+ """
+ assembly = [
+ [ id:'test', single_end:false ], // meta map
+ file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
+ ]
+ input[0] = MERYL_UNIONSUM.out.meryl_db.join( Channel.value( assembly ) )
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+
+ }
+
+}
\ No newline at end of file
diff --git a/modules/nf-core/merqury/merqury/tests/main.nf.test.snap b/modules/nf-core/merqury/merqury/tests/main.nf.test.snap
new file mode 100644
index 00000000..d96fa7ef
--- /dev/null
+++ b/modules/nf-core/merqury/merqury/tests/main.nf.test.snap
@@ -0,0 +1,494 @@
+{
+ "homo_sapiens-genome-stub": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_only.bed:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "1": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_only.wig:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "10": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.spectra-asm.ln.png:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "11": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.spectra-asm.st.png:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "12": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.qv:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "13": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.test.qv:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "14": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.hist.ploidy:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "15": [
+
+ ],
+ "16": [
+ "versions.yml:md5,825a4c61369638389227eee16dfb08b5"
+ ],
+ "2": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.completeness.stats:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "3": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.dist_only.hist:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "4": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.spectra-cn.fl.png:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "5": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.spectra-cn.hist:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "6": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.spectra-cn.ln.png:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "7": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.spectra-cn.st.png:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "8": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.spectra-asm.fl.png:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "9": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.spectra-asm.hist:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "assembly_only_kmers_bed": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_only.bed:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "assembly_only_kmers_wig": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_only.wig:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "assembly_qv": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.qv:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "dist_hist": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.dist_only.hist:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "hapmers_blob_png": [
+
+ ],
+ "read_ploidy": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.hist.ploidy:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "scaffold_qv": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.test.qv:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "spectra_asm_fl_png": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.spectra-asm.fl.png:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "spectra_asm_hist": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.spectra-asm.hist:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "spectra_asm_ln_png": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.spectra-asm.ln.png:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "spectra_asm_st_png": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.spectra-asm.st.png:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "spectra_cn_fl_png": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.spectra-cn.fl.png:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "spectra_cn_hist": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.spectra-cn.hist:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "spectra_cn_ln_png": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.spectra-cn.ln.png:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "spectra_cn_st_png": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.spectra-cn.st.png:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "stats": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.completeness.stats:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,825a4c61369638389227eee16dfb08b5"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-05-22T21:00:35.907142"
+ },
+ "homo_sapiens-genome": {
+ "content": [
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "genome_only.bed:md5,b611f22cde0e410a2ca07c1eefd042d3"
+ ]
+ ],
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "genome_only.wig:md5,19cf44989af72af597ef80d3489b4882"
+ ]
+ ],
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.completeness.stats:md5,d923bf7c98342625d022b2c32be8dd3a"
+ ]
+ ],
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.dist_only.hist:md5,e2e6b54b0febef1f0fcf24cd2afd0b7a"
+ ]
+ ],
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.genome.spectra-cn.hist:md5,6140a138ba47cb2b97814c93f80b2575"
+ ]
+ ],
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.spectra-asm.hist:md5,541c9d1f87ab5c44df5e9e0acc440f8d"
+ ]
+ ],
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.qv:md5,6e04952bc182221c8b9e242dc3298808"
+ ]
+ ],
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.genome.qv:md5,c554315aabcc4207c367805cf3090da3"
+ ]
+ ],
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.unionsum.hist.ploidy:md5,16cf2fd89ed870c9cceb269e8430169a"
+ ]
+ ],
+ [
+ "versions.yml:md5,825a4c61369638389227eee16dfb08b5"
+ ],
+ "test.genome.spectra-cn.fl.png",
+ "test.genome.spectra-cn.ln.png",
+ "test.genome.spectra-cn.st.png",
+ "test.spectra-asm.fl.png",
+ "test.spectra-asm.ln.png",
+ "test.spectra-asm.st.png"
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-05-22T21:52:39.004978"
+ },
+ "homo_sapiens-genome-trio": {
+ "content": [
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "genome_only.bed:md5,b611f22cde0e410a2ca07c1eefd042d3"
+ ]
+ ],
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "genome_only.wig:md5,19cf44989af72af597ef80d3489b4882"
+ ]
+ ],
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.completeness.stats:md5,58b3933129832d64652babd80688eec3"
+ ]
+ ],
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.dist_only.hist:md5,e2e6b54b0febef1f0fcf24cd2afd0b7a"
+ ]
+ ],
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.genome.spectra-cn.hist:md5,6140a138ba47cb2b97814c93f80b2575"
+ ]
+ ],
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.spectra-asm.hist:md5,541c9d1f87ab5c44df5e9e0acc440f8d"
+ ]
+ ],
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.qv:md5,6e04952bc182221c8b9e242dc3298808"
+ ]
+ ],
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.genome.qv:md5,c554315aabcc4207c367805cf3090da3"
+ ]
+ ],
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.unionsum.hist.ploidy:md5,16cf2fd89ed870c9cceb269e8430169a"
+ ]
+ ],
+ [
+ "versions.yml:md5,825a4c61369638389227eee16dfb08b5"
+ ],
+ "test.genome.read.test2_1.spectra-cn.fl.png,test.genome.read.test2_2.spectra-cn.fl.png,test.genome.spectra-cn.fl.png",
+ "test.genome.read.test2_1.spectra-cn.ln.png,test.genome.read.test2_2.spectra-cn.ln.png,test.genome.spectra-cn.ln.png",
+ "test.genome.read.test2_1.spectra-cn.st.png,test.genome.read.test2_2.spectra-cn.st.png,test.genome.spectra-cn.st.png",
+ "test.spectra-asm.fl.png",
+ "test.spectra-asm.ln.png",
+ "test.spectra-asm.st.png",
+ "test.hapmers.blob.png"
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-05-23T10:37:10.601154"
+ }
+}
\ No newline at end of file
diff --git a/modules/nf-core/merqury/merqury/tests/tags.yml b/modules/nf-core/merqury/merqury/tests/tags.yml
new file mode 100644
index 00000000..af157f18
--- /dev/null
+++ b/modules/nf-core/merqury/merqury/tests/tags.yml
@@ -0,0 +1,2 @@
+merqury/merqury:
+ - "modules/nf-core/merqury/merqury/**"
diff --git a/modules/nf-core/meryl/count/environment.yml b/modules/nf-core/meryl/count/environment.yml
new file mode 100644
index 00000000..808565ee
--- /dev/null
+++ b/modules/nf-core/meryl/count/environment.yml
@@ -0,0 +1,7 @@
+name: meryl_count
+channels:
+ - conda-forge
+ - bioconda
+ - defaults
+dependencies:
+ - bioconda::meryl=1.4.1
diff --git a/modules/nf-core/meryl/count/main.nf b/modules/nf-core/meryl/count/main.nf
new file mode 100644
index 00000000..0f1ff0af
--- /dev/null
+++ b/modules/nf-core/meryl/count/main.nf
@@ -0,0 +1,54 @@
+process MERYL_COUNT {
+ tag "$meta.id"
+ label 'process_medium'
+
+ conda "${moduleDir}/environment.yml"
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'https://depot.galaxyproject.org/singularity/meryl:1.4.1--h4ac6f70_0':
+ 'biocontainers/meryl:1.4.1--h4ac6f70_0' }"
+
+ input:
+ tuple val(meta), path(reads)
+ val kvalue
+
+ output:
+ tuple val(meta), path("*.meryl") , emit: meryl_db
+ path "versions.yml" , emit: versions
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ script:
+ def args = task.ext.args ?: ''
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ """
+ for READ in $reads; do
+ meryl count \\
+ k=$kvalue \\
+ threads=$task.cpus \\
+ memory=${task.memory.toGiga()} \\
+ $args \\
+ $reads \\
+ output read.\${READ%.f*}.meryl
+ done
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ meryl: \$( meryl --version |& sed 's/meryl //' )
+ END_VERSIONS
+ """
+
+ stub:
+ def args = task.ext.args ?: ''
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ """
+ for READ in $reads; do
+ touch read.\${READ%.f*}.meryl
+ done
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ meryl: \$( meryl --version |& sed 's/meryl //' )
+ END_VERSIONS
+ """
+}
diff --git a/modules/nf-core/meryl/count/meta.yml b/modules/nf-core/meryl/count/meta.yml
new file mode 100644
index 00000000..809a32fe
--- /dev/null
+++ b/modules/nf-core/meryl/count/meta.yml
@@ -0,0 +1,46 @@
+name: "meryl_count"
+description: A genomic k-mer counter (and sequence utility) with nice features.
+keywords:
+ - k-mer
+ - count
+ - reference-free
+tools:
+ - "meryl":
+ description: "A genomic k-mer counter (and sequence utility) with nice features. "
+ homepage: "https://github.com/marbl/meryl"
+ documentation: "https://meryl.readthedocs.io/en/latest/quick-start.html"
+ tool_dev_url: "https://github.com/marbl/meryl"
+ licence: ["GPL"]
+input:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - reads:
+ type: file
+ description: |
+ List of input FastQ files of size 1 and 2 for single-end and paired-end data,
+ respectively.
+ - kvalue:
+ type: integer
+ description: An integer value of k to use as the k-mer value.
+output:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - versions:
+ type: file
+ description: File containing software versions
+ pattern: "versions.yml"
+ - meryl_db:
+ type: directory
+ description: A Meryl k-mer database
+ pattern: "*.meryl"
+authors:
+ - "@mahesh-panchal"
+maintainers:
+ - "@mahesh-panchal"
+ - "@gallvp"
diff --git a/modules/nf-core/meryl/count/tests/main.nf.test b/modules/nf-core/meryl/count/tests/main.nf.test
new file mode 100644
index 00000000..cce46c36
--- /dev/null
+++ b/modules/nf-core/meryl/count/tests/main.nf.test
@@ -0,0 +1,60 @@
+nextflow_process {
+
+ name "Test Process MERYL_COUNT"
+ script "../main.nf"
+ process "MERYL_COUNT"
+
+ tag "modules"
+ tag "modules_nfcore"
+ tag "meryl"
+ tag "meryl/count"
+
+ test("bacteroides_fragilis - fastq") {
+
+ when {
+ process {
+ """
+ input[0] = Channel.value([
+ [ id: 'test', single_end: true ], // meta map
+ file( params.modules_testdata_base_path + "/genomics/prokaryotes/bacteroides_fragilis/illumina/fastq/test1_1.fastq.gz", checkIfExists: true )
+ ])
+ input[1] = Channel.value(21)
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+
+ }
+
+ test("bacteroides_fragilis - fastq - stub") {
+
+ options "-stub"
+
+ when {
+ process {
+ """
+ input[0] = Channel.value([
+ [ id: 'test', single_end: true ], // meta map
+ file( params.modules_testdata_base_path + "/genomics/prokaryotes/bacteroides_fragilis/illumina/fastq/test1_1.fastq.gz", checkIfExists: true )
+ ])
+ input[1] = Channel.value(21)
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+
+ }
+
+}
diff --git a/modules/nf-core/meryl/count/tests/main.nf.test.snap b/modules/nf-core/meryl/count/tests/main.nf.test.snap
new file mode 100644
index 00000000..e0a20902
--- /dev/null
+++ b/modules/nf-core/meryl/count/tests/main.nf.test.snap
@@ -0,0 +1,332 @@
+{
+ "bacteroides_fragilis - fastq": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ [
+ "0x000000.merylData:md5,eefafbacbd0c41e4e4851e0e79a665a2",
+ "0x000000.merylIndex:md5,be9c1da85afee20689c5c7780b6c4996",
+ "0x000001.merylData:md5,d7e46736fc2896085b4610c50ec74835",
+ "0x000001.merylIndex:md5,a68e8b877ed5ce04e6f36a279b2fac0b",
+ "0x000010.merylData:md5,635badb59ad796363c2ff31801ab3d07",
+ "0x000010.merylIndex:md5,afed85194fcbd762b7acf86427b41cdd",
+ "0x000011.merylData:md5,b79970fce39d47cda1312080cac2f5d4",
+ "0x000011.merylIndex:md5,402ba5cf71656e2d19e584e4b65cadaa",
+ "0x000100.merylData:md5,48a7160c93fe5dd22fcdf8beb30e9256",
+ "0x000100.merylIndex:md5,fbfae07482bed8b00c6a690850c632fc",
+ "0x000101.merylData:md5,3ff336b4d59850f6726a4bd5670d49be",
+ "0x000101.merylIndex:md5,817edfe6dcbb07801bb9de54e0e1839a",
+ "0x000110.merylData:md5,41f253dc8240dd26cd8bbe379968aa1f",
+ "0x000110.merylIndex:md5,64f1400897c3108d5bc4957bea80569c",
+ "0x000111.merylData:md5,0860dd1bb6ee8e99a315c976b0c6226a",
+ "0x000111.merylIndex:md5,794aadc07e62b0d872763f2b40615627",
+ "0x001000.merylData:md5,daee6b4d18a43ce3a6c46ee2e8105a93",
+ "0x001000.merylIndex:md5,5ec6234ffd7d0380db3adc5aa3a3bb85",
+ "0x001001.merylData:md5,7e9b6d3c6f116fe418c4a4b0529a795e",
+ "0x001001.merylIndex:md5,eb63cc438cac77169a527bd6607bd781",
+ "0x001010.merylData:md5,f93c20dbdd16be37cdcfa263fe3c4ca4",
+ "0x001010.merylIndex:md5,d00fe5bd25b49bc9084b705017ccf4b9",
+ "0x001011.merylData:md5,eb25d445533ba345ea4e315750dd878c",
+ "0x001011.merylIndex:md5,1cacb579a9991dc5ebeef884347b81f5",
+ "0x001100.merylData:md5,7c783f255d155c23218e14c390b4c505",
+ "0x001100.merylIndex:md5,41a4d42d07928f3a33fd6c2d4ff613d8",
+ "0x001101.merylData:md5,98f323f13f99a2c75ccf8db9179f4b7d",
+ "0x001101.merylIndex:md5,528f3cdc0cbde3f0c91f8e8acd00a3e1",
+ "0x001110.merylData:md5,12f1c899ed2878d60c3a7ee5eb599839",
+ "0x001110.merylIndex:md5,755fc26bb9c8fec041798958f83b37b8",
+ "0x001111.merylData:md5,594881e83a5ece0879e79e2be82847f9",
+ "0x001111.merylIndex:md5,62625db2836f07303c9e4d383574143f",
+ "0x010000.merylData:md5,537d1a60554bd68386509fc3edd5e609",
+ "0x010000.merylIndex:md5,2bf23b6a552306dab61d0da0ad0789e0",
+ "0x010001.merylData:md5,9baef5a2ea4ed43caf18cabcdccaa3eb",
+ "0x010001.merylIndex:md5,ff98a6e5d18ab52795c6af00f47fc355",
+ "0x010010.merylData:md5,3b37668e9f9e1fc220e1fed36ecb7f86",
+ "0x010010.merylIndex:md5,0724a75055d62586f95f8b5c58688dd3",
+ "0x010011.merylData:md5,b4a039219f34df414048a023beea583b",
+ "0x010011.merylIndex:md5,34b29bdb6676e172e80202d36a0551d7",
+ "0x010100.merylData:md5,a49dfdebd8c4b3ff5ed1f4f84128b7e7",
+ "0x010100.merylIndex:md5,7cf00a46160bcb04de47e8bd05e8696b",
+ "0x010101.merylData:md5,0065195381f0a8629426139f3ba97327",
+ "0x010101.merylIndex:md5,de73cfc31abb763a11e063711f4b86bd",
+ "0x010110.merylData:md5,b331a0a2a59e46741366b8f811eaafcb",
+ "0x010110.merylIndex:md5,1caca7c16ffcc8070fbb58f4c9936089",
+ "0x010111.merylData:md5,7a51306a008a7f7639d98b4d1f24be92",
+ "0x010111.merylIndex:md5,12572a822643689bbc77030acb395850",
+ "0x011000.merylData:md5,6cdc1ab34d47f23a298ed3fc1d80ab4a",
+ "0x011000.merylIndex:md5,8947c6d693556b2530aed1f9ba0f23ba",
+ "0x011001.merylData:md5,9901aed256e9790ee9fe4a797bdc2348",
+ "0x011001.merylIndex:md5,1538131168e32210c7ada2fe7379ca18",
+ "0x011010.merylData:md5,30a5df2f9d2b33bd76780c5e1a1dca39",
+ "0x011010.merylIndex:md5,61d929e6929922e3206d6338340a9f87",
+ "0x011011.merylData:md5,edf65fc218d2b867a7117d0cd20bb578",
+ "0x011011.merylIndex:md5,fd6c02e383ec73fe31051ab16c3dc590",
+ "0x011100.merylData:md5,c0cbe5856dab102fc6ecdc481739936e",
+ "0x011100.merylIndex:md5,2c938d6d5806169f5718e35308d5664e",
+ "0x011101.merylData:md5,135031f4ec8f4f9fee07a6cd980a8f4e",
+ "0x011101.merylIndex:md5,855f5b00dce3c1307db74935f34ff777",
+ "0x011110.merylData:md5,e9c9a04e6d1a8763ad22dbc810918c8e",
+ "0x011110.merylIndex:md5,4bf0e856bea5404c683f77a857c8be04",
+ "0x011111.merylData:md5,5f8032cedc4fe13e9eee3a1e7b58ffd9",
+ "0x011111.merylIndex:md5,c7ff331ae2cb2c9f63cd33a57e3f707a",
+ "0x100000.merylData:md5,100d73f6f495c20c52c12c2e12764c33",
+ "0x100000.merylIndex:md5,3e1715843526e9ad1908d1a6dd882364",
+ "0x100001.merylData:md5,f97f4b5624e8cc4d7899fe31898b75ba",
+ "0x100001.merylIndex:md5,f5b5337094755876ad5ddfe9471f17a7",
+ "0x100010.merylData:md5,d68cc8b0acd31a80b46dc59c682a9875",
+ "0x100010.merylIndex:md5,d3e62d9d487c8299dd4b2806309f2757",
+ "0x100011.merylData:md5,71b87c478dcfdd496dfaae98c532c551",
+ "0x100011.merylIndex:md5,5cbdcd1ebbbc5e0e4848aedfba8f4d7a",
+ "0x100100.merylData:md5,bfbd0b14ae7633f162c2eba5ebf429f2",
+ "0x100100.merylIndex:md5,327cdff7a43f60879b611c56fc689570",
+ "0x100101.merylData:md5,c9147b0a4adc493a15f4c79d39792956",
+ "0x100101.merylIndex:md5,93f5cb4c0a35c15fc45d88561627d6c8",
+ "0x100110.merylData:md5,4e05cbeec33b4085f460093e1a5905a7",
+ "0x100110.merylIndex:md5,b065c6aa4180b82bcc1d723513bd5d4b",
+ "0x100111.merylData:md5,68a6a2ca12adba1c57dad51d73d47339",
+ "0x100111.merylIndex:md5,f1344a178e0141497f404de76758cc27",
+ "0x101000.merylData:md5,15cefb92f8ba5ee54472b66b44fb6d9d",
+ "0x101000.merylIndex:md5,4c83229ee4be2fbd5732d6dae5aacb28",
+ "0x101001.merylData:md5,910c4abf7a925e2582b13d7e396c591b",
+ "0x101001.merylIndex:md5,8fc1b16e74ad2ccf7a5cf330df47735c",
+ "0x101010.merylData:md5,85bf310f1cd130f2201d81daca291ff7",
+ "0x101010.merylIndex:md5,72ea3767e2730d318c292d5d48deb63f",
+ "0x101011.merylData:md5,033989603839db494fdffadedca76a6c",
+ "0x101011.merylIndex:md5,8f3731e5cf80723f7bcf1f2b90d5667b",
+ "0x101100.merylData:md5,df342143a9c4c54f2e8e6496b6e4f708",
+ "0x101100.merylIndex:md5,c3421ce5e8b49548df91cd195315e139",
+ "0x101101.merylData:md5,42d25675e9d0bc06c8b34d75d07a4caf",
+ "0x101101.merylIndex:md5,5c2f10b939ec5d463c524af55cd25c24",
+ "0x101110.merylData:md5,2e43f76c1e826da6fb9757b4f812acd1",
+ "0x101110.merylIndex:md5,b6b3ba4282ca1417462c50c43c164a0c",
+ "0x101111.merylData:md5,45732e59a9113476954035ffe0e7b601",
+ "0x101111.merylIndex:md5,45a3da9ddd34c4b5651690ce421ee5f2",
+ "0x110000.merylData:md5,e10a6e9169bc8fbdbbbac680e58e9781",
+ "0x110000.merylIndex:md5,53b2dbff4c7598489dba7956f2accdda",
+ "0x110001.merylData:md5,3db19699cedecafea182e146ecf2b24d",
+ "0x110001.merylIndex:md5,3271f69d5830172dbab92cdae3a5b3a3",
+ "0x110010.merylData:md5,6c9ccb0ca19ec3dd0bcac3d4ac16c1bb",
+ "0x110010.merylIndex:md5,f9a3fcc0aa34271866a2730a487bcff0",
+ "0x110011.merylData:md5,03138380dd03d3ec08b6ceee3bb8ecdc",
+ "0x110011.merylIndex:md5,26a57f682f875ee3e920e28f9351e586",
+ "0x110100.merylData:md5,caa3b1d6f1a5ea7815511b729dcb4f5a",
+ "0x110100.merylIndex:md5,7d4f76e3dddb398de2f233bfb80f6161",
+ "0x110101.merylData:md5,1f3a72ccf2fdee4332f2b34d2368f585",
+ "0x110101.merylIndex:md5,317e723efbc4bb073cddaaae3bba0931",
+ "0x110110.merylData:md5,fe8eb93ed0cf30cac4678179b0d2daa7",
+ "0x110110.merylIndex:md5,66c33da6a8b65ad35d0e0717c9d134eb",
+ "0x110111.merylData:md5,b7aefc0873d30cde41d38c7f1b90efc3",
+ "0x110111.merylIndex:md5,08cc48d6a13aa4fdce144c3ff41f5be6",
+ "0x111000.merylData:md5,9504dab5d20741912da8f815912507b9",
+ "0x111000.merylIndex:md5,e677a2fb775336b8efda8e41e4bc2720",
+ "0x111001.merylData:md5,eabf6c9b053696a34f38136f4fa05ed8",
+ "0x111001.merylIndex:md5,55479260f69d1d3abab93d0e155d0f2f",
+ "0x111010.merylData:md5,0ea69feac704ed4bcd434c67a6e5a769",
+ "0x111010.merylIndex:md5,f60e31a49a6d097b44d0a244685747f9",
+ "0x111011.merylData:md5,20e7301ecc915e918d390fd45b8bb428",
+ "0x111011.merylIndex:md5,1d8a61f45b82db9caa8e4b5cb1dbfb30",
+ "0x111100.merylData:md5,95e6fe905b6c07e7e3537d1ee6a2dca9",
+ "0x111100.merylIndex:md5,231ddde97648162e313290f49ceab69b",
+ "0x111101.merylData:md5,79cdba511fae030098025de90472e21d",
+ "0x111101.merylIndex:md5,e93892161446762ac545e0399e362685",
+ "0x111110.merylData:md5,062ba875af75297889359bc39409ab99",
+ "0x111110.merylIndex:md5,0cd1b81c64fee3bf067ce5ca1755a2bc",
+ "0x111111.merylData:md5,68feb8f1bade7885a9a696bb0825a17f",
+ "0x111111.merylIndex:md5,fd631e33829939cd7f06581c80791dc4",
+ "merylIndex:md5,d9e794e14b1eeb0eaa4d98e10eb571cc"
+ ]
+ ]
+ ],
+ "1": [
+ "versions.yml:md5,a4a6baac7481f1d4595ba54622bdf33d"
+ ],
+ "meryl_db": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ [
+ "0x000000.merylData:md5,eefafbacbd0c41e4e4851e0e79a665a2",
+ "0x000000.merylIndex:md5,be9c1da85afee20689c5c7780b6c4996",
+ "0x000001.merylData:md5,d7e46736fc2896085b4610c50ec74835",
+ "0x000001.merylIndex:md5,a68e8b877ed5ce04e6f36a279b2fac0b",
+ "0x000010.merylData:md5,635badb59ad796363c2ff31801ab3d07",
+ "0x000010.merylIndex:md5,afed85194fcbd762b7acf86427b41cdd",
+ "0x000011.merylData:md5,b79970fce39d47cda1312080cac2f5d4",
+ "0x000011.merylIndex:md5,402ba5cf71656e2d19e584e4b65cadaa",
+ "0x000100.merylData:md5,48a7160c93fe5dd22fcdf8beb30e9256",
+ "0x000100.merylIndex:md5,fbfae07482bed8b00c6a690850c632fc",
+ "0x000101.merylData:md5,3ff336b4d59850f6726a4bd5670d49be",
+ "0x000101.merylIndex:md5,817edfe6dcbb07801bb9de54e0e1839a",
+ "0x000110.merylData:md5,41f253dc8240dd26cd8bbe379968aa1f",
+ "0x000110.merylIndex:md5,64f1400897c3108d5bc4957bea80569c",
+ "0x000111.merylData:md5,0860dd1bb6ee8e99a315c976b0c6226a",
+ "0x000111.merylIndex:md5,794aadc07e62b0d872763f2b40615627",
+ "0x001000.merylData:md5,daee6b4d18a43ce3a6c46ee2e8105a93",
+ "0x001000.merylIndex:md5,5ec6234ffd7d0380db3adc5aa3a3bb85",
+ "0x001001.merylData:md5,7e9b6d3c6f116fe418c4a4b0529a795e",
+ "0x001001.merylIndex:md5,eb63cc438cac77169a527bd6607bd781",
+ "0x001010.merylData:md5,f93c20dbdd16be37cdcfa263fe3c4ca4",
+ "0x001010.merylIndex:md5,d00fe5bd25b49bc9084b705017ccf4b9",
+ "0x001011.merylData:md5,eb25d445533ba345ea4e315750dd878c",
+ "0x001011.merylIndex:md5,1cacb579a9991dc5ebeef884347b81f5",
+ "0x001100.merylData:md5,7c783f255d155c23218e14c390b4c505",
+ "0x001100.merylIndex:md5,41a4d42d07928f3a33fd6c2d4ff613d8",
+ "0x001101.merylData:md5,98f323f13f99a2c75ccf8db9179f4b7d",
+ "0x001101.merylIndex:md5,528f3cdc0cbde3f0c91f8e8acd00a3e1",
+ "0x001110.merylData:md5,12f1c899ed2878d60c3a7ee5eb599839",
+ "0x001110.merylIndex:md5,755fc26bb9c8fec041798958f83b37b8",
+ "0x001111.merylData:md5,594881e83a5ece0879e79e2be82847f9",
+ "0x001111.merylIndex:md5,62625db2836f07303c9e4d383574143f",
+ "0x010000.merylData:md5,537d1a60554bd68386509fc3edd5e609",
+ "0x010000.merylIndex:md5,2bf23b6a552306dab61d0da0ad0789e0",
+ "0x010001.merylData:md5,9baef5a2ea4ed43caf18cabcdccaa3eb",
+ "0x010001.merylIndex:md5,ff98a6e5d18ab52795c6af00f47fc355",
+ "0x010010.merylData:md5,3b37668e9f9e1fc220e1fed36ecb7f86",
+ "0x010010.merylIndex:md5,0724a75055d62586f95f8b5c58688dd3",
+ "0x010011.merylData:md5,b4a039219f34df414048a023beea583b",
+ "0x010011.merylIndex:md5,34b29bdb6676e172e80202d36a0551d7",
+ "0x010100.merylData:md5,a49dfdebd8c4b3ff5ed1f4f84128b7e7",
+ "0x010100.merylIndex:md5,7cf00a46160bcb04de47e8bd05e8696b",
+ "0x010101.merylData:md5,0065195381f0a8629426139f3ba97327",
+ "0x010101.merylIndex:md5,de73cfc31abb763a11e063711f4b86bd",
+ "0x010110.merylData:md5,b331a0a2a59e46741366b8f811eaafcb",
+ "0x010110.merylIndex:md5,1caca7c16ffcc8070fbb58f4c9936089",
+ "0x010111.merylData:md5,7a51306a008a7f7639d98b4d1f24be92",
+ "0x010111.merylIndex:md5,12572a822643689bbc77030acb395850",
+ "0x011000.merylData:md5,6cdc1ab34d47f23a298ed3fc1d80ab4a",
+ "0x011000.merylIndex:md5,8947c6d693556b2530aed1f9ba0f23ba",
+ "0x011001.merylData:md5,9901aed256e9790ee9fe4a797bdc2348",
+ "0x011001.merylIndex:md5,1538131168e32210c7ada2fe7379ca18",
+ "0x011010.merylData:md5,30a5df2f9d2b33bd76780c5e1a1dca39",
+ "0x011010.merylIndex:md5,61d929e6929922e3206d6338340a9f87",
+ "0x011011.merylData:md5,edf65fc218d2b867a7117d0cd20bb578",
+ "0x011011.merylIndex:md5,fd6c02e383ec73fe31051ab16c3dc590",
+ "0x011100.merylData:md5,c0cbe5856dab102fc6ecdc481739936e",
+ "0x011100.merylIndex:md5,2c938d6d5806169f5718e35308d5664e",
+ "0x011101.merylData:md5,135031f4ec8f4f9fee07a6cd980a8f4e",
+ "0x011101.merylIndex:md5,855f5b00dce3c1307db74935f34ff777",
+ "0x011110.merylData:md5,e9c9a04e6d1a8763ad22dbc810918c8e",
+ "0x011110.merylIndex:md5,4bf0e856bea5404c683f77a857c8be04",
+ "0x011111.merylData:md5,5f8032cedc4fe13e9eee3a1e7b58ffd9",
+ "0x011111.merylIndex:md5,c7ff331ae2cb2c9f63cd33a57e3f707a",
+ "0x100000.merylData:md5,100d73f6f495c20c52c12c2e12764c33",
+ "0x100000.merylIndex:md5,3e1715843526e9ad1908d1a6dd882364",
+ "0x100001.merylData:md5,f97f4b5624e8cc4d7899fe31898b75ba",
+ "0x100001.merylIndex:md5,f5b5337094755876ad5ddfe9471f17a7",
+ "0x100010.merylData:md5,d68cc8b0acd31a80b46dc59c682a9875",
+ "0x100010.merylIndex:md5,d3e62d9d487c8299dd4b2806309f2757",
+ "0x100011.merylData:md5,71b87c478dcfdd496dfaae98c532c551",
+ "0x100011.merylIndex:md5,5cbdcd1ebbbc5e0e4848aedfba8f4d7a",
+ "0x100100.merylData:md5,bfbd0b14ae7633f162c2eba5ebf429f2",
+ "0x100100.merylIndex:md5,327cdff7a43f60879b611c56fc689570",
+ "0x100101.merylData:md5,c9147b0a4adc493a15f4c79d39792956",
+ "0x100101.merylIndex:md5,93f5cb4c0a35c15fc45d88561627d6c8",
+ "0x100110.merylData:md5,4e05cbeec33b4085f460093e1a5905a7",
+ "0x100110.merylIndex:md5,b065c6aa4180b82bcc1d723513bd5d4b",
+ "0x100111.merylData:md5,68a6a2ca12adba1c57dad51d73d47339",
+ "0x100111.merylIndex:md5,f1344a178e0141497f404de76758cc27",
+ "0x101000.merylData:md5,15cefb92f8ba5ee54472b66b44fb6d9d",
+ "0x101000.merylIndex:md5,4c83229ee4be2fbd5732d6dae5aacb28",
+ "0x101001.merylData:md5,910c4abf7a925e2582b13d7e396c591b",
+ "0x101001.merylIndex:md5,8fc1b16e74ad2ccf7a5cf330df47735c",
+ "0x101010.merylData:md5,85bf310f1cd130f2201d81daca291ff7",
+ "0x101010.merylIndex:md5,72ea3767e2730d318c292d5d48deb63f",
+ "0x101011.merylData:md5,033989603839db494fdffadedca76a6c",
+ "0x101011.merylIndex:md5,8f3731e5cf80723f7bcf1f2b90d5667b",
+ "0x101100.merylData:md5,df342143a9c4c54f2e8e6496b6e4f708",
+ "0x101100.merylIndex:md5,c3421ce5e8b49548df91cd195315e139",
+ "0x101101.merylData:md5,42d25675e9d0bc06c8b34d75d07a4caf",
+ "0x101101.merylIndex:md5,5c2f10b939ec5d463c524af55cd25c24",
+ "0x101110.merylData:md5,2e43f76c1e826da6fb9757b4f812acd1",
+ "0x101110.merylIndex:md5,b6b3ba4282ca1417462c50c43c164a0c",
+ "0x101111.merylData:md5,45732e59a9113476954035ffe0e7b601",
+ "0x101111.merylIndex:md5,45a3da9ddd34c4b5651690ce421ee5f2",
+ "0x110000.merylData:md5,e10a6e9169bc8fbdbbbac680e58e9781",
+ "0x110000.merylIndex:md5,53b2dbff4c7598489dba7956f2accdda",
+ "0x110001.merylData:md5,3db19699cedecafea182e146ecf2b24d",
+ "0x110001.merylIndex:md5,3271f69d5830172dbab92cdae3a5b3a3",
+ "0x110010.merylData:md5,6c9ccb0ca19ec3dd0bcac3d4ac16c1bb",
+ "0x110010.merylIndex:md5,f9a3fcc0aa34271866a2730a487bcff0",
+ "0x110011.merylData:md5,03138380dd03d3ec08b6ceee3bb8ecdc",
+ "0x110011.merylIndex:md5,26a57f682f875ee3e920e28f9351e586",
+ "0x110100.merylData:md5,caa3b1d6f1a5ea7815511b729dcb4f5a",
+ "0x110100.merylIndex:md5,7d4f76e3dddb398de2f233bfb80f6161",
+ "0x110101.merylData:md5,1f3a72ccf2fdee4332f2b34d2368f585",
+ "0x110101.merylIndex:md5,317e723efbc4bb073cddaaae3bba0931",
+ "0x110110.merylData:md5,fe8eb93ed0cf30cac4678179b0d2daa7",
+ "0x110110.merylIndex:md5,66c33da6a8b65ad35d0e0717c9d134eb",
+ "0x110111.merylData:md5,b7aefc0873d30cde41d38c7f1b90efc3",
+ "0x110111.merylIndex:md5,08cc48d6a13aa4fdce144c3ff41f5be6",
+ "0x111000.merylData:md5,9504dab5d20741912da8f815912507b9",
+ "0x111000.merylIndex:md5,e677a2fb775336b8efda8e41e4bc2720",
+ "0x111001.merylData:md5,eabf6c9b053696a34f38136f4fa05ed8",
+ "0x111001.merylIndex:md5,55479260f69d1d3abab93d0e155d0f2f",
+ "0x111010.merylData:md5,0ea69feac704ed4bcd434c67a6e5a769",
+ "0x111010.merylIndex:md5,f60e31a49a6d097b44d0a244685747f9",
+ "0x111011.merylData:md5,20e7301ecc915e918d390fd45b8bb428",
+ "0x111011.merylIndex:md5,1d8a61f45b82db9caa8e4b5cb1dbfb30",
+ "0x111100.merylData:md5,95e6fe905b6c07e7e3537d1ee6a2dca9",
+ "0x111100.merylIndex:md5,231ddde97648162e313290f49ceab69b",
+ "0x111101.merylData:md5,79cdba511fae030098025de90472e21d",
+ "0x111101.merylIndex:md5,e93892161446762ac545e0399e362685",
+ "0x111110.merylData:md5,062ba875af75297889359bc39409ab99",
+ "0x111110.merylIndex:md5,0cd1b81c64fee3bf067ce5ca1755a2bc",
+ "0x111111.merylData:md5,68feb8f1bade7885a9a696bb0825a17f",
+ "0x111111.merylIndex:md5,fd631e33829939cd7f06581c80791dc4",
+ "merylIndex:md5,d9e794e14b1eeb0eaa4d98e10eb571cc"
+ ]
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,a4a6baac7481f1d4595ba54622bdf33d"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-03-26T19:30:03.054214308"
+ },
+ "bacteroides_fragilis - fastq - stub": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "read.test1_1.meryl:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "1": [
+ "versions.yml:md5,a4a6baac7481f1d4595ba54622bdf33d"
+ ],
+ "meryl_db": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "read.test1_1.meryl:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,a4a6baac7481f1d4595ba54622bdf33d"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-05-22T12:27:16.300916"
+ }
+}
\ No newline at end of file
diff --git a/modules/nf-core/meryl/count/tests/tags.yml b/modules/nf-core/meryl/count/tests/tags.yml
new file mode 100644
index 00000000..b25bfa60
--- /dev/null
+++ b/modules/nf-core/meryl/count/tests/tags.yml
@@ -0,0 +1,2 @@
+meryl/count:
+ - "modules/nf-core/meryl/count/**"
diff --git a/modules/nf-core/meryl/unionsum/environment.yml b/modules/nf-core/meryl/unionsum/environment.yml
new file mode 100644
index 00000000..b8bdcbe8
--- /dev/null
+++ b/modules/nf-core/meryl/unionsum/environment.yml
@@ -0,0 +1,7 @@
+name: meryl_unionsum
+channels:
+ - conda-forge
+ - bioconda
+ - defaults
+dependencies:
+ - bioconda::meryl=1.4.1
diff --git a/modules/nf-core/meryl/unionsum/main.nf b/modules/nf-core/meryl/unionsum/main.nf
new file mode 100644
index 00000000..bc2853b0
--- /dev/null
+++ b/modules/nf-core/meryl/unionsum/main.nf
@@ -0,0 +1,50 @@
+process MERYL_UNIONSUM {
+ tag "$meta.id"
+ label 'process_low'
+
+ conda "${moduleDir}/environment.yml"
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'https://depot.galaxyproject.org/singularity/meryl:1.4.1--h4ac6f70_0':
+ 'biocontainers/meryl:1.4.1--h4ac6f70_0' }"
+
+ input:
+ tuple val(meta), path(meryl_dbs)
+ val kvalue
+
+ output:
+ tuple val(meta), path("*.unionsum.meryl"), emit: meryl_db
+ path "versions.yml" , emit: versions
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ script:
+ def args = task.ext.args ?: ''
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ """
+ meryl union-sum \\
+ k=$kvalue \\
+ threads=$task.cpus \\
+ memory=${task.memory.toGiga()} \\
+ $args \\
+ output ${prefix}.unionsum.meryl \\
+ $meryl_dbs
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ meryl: \$( meryl --version |& sed 's/meryl //' )
+ END_VERSIONS
+ """
+
+ stub:
+ def args = task.ext.args ?: ''
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ """
+ touch ${prefix}.unionsum.meryl
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ meryl: \$( meryl --version |& sed 's/meryl //' )
+ END_VERSIONS
+ """
+}
diff --git a/modules/nf-core/meryl/unionsum/meta.yml b/modules/nf-core/meryl/unionsum/meta.yml
new file mode 100644
index 00000000..77d0784c
--- /dev/null
+++ b/modules/nf-core/meryl/unionsum/meta.yml
@@ -0,0 +1,44 @@
+name: "meryl_unionsum"
+description: A genomic k-mer counter (and sequence utility) with nice features.
+keywords:
+ - k-mer
+ - unionsum
+ - reference-free
+tools:
+ - "meryl":
+ description: "A genomic k-mer counter (and sequence utility) with nice features. "
+ homepage: "https://github.com/marbl/meryl"
+ documentation: "https://meryl.readthedocs.io/en/latest/quick-start.html"
+ tool_dev_url: "https://github.com/marbl/meryl"
+ licence: ["GPL"]
+input:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - meryl_dbs:
+ type: directory
+ description: Meryl k-mer databases
+ - kvalue:
+ type: integer
+ description: An integer value of k to use as the k-mer value.
+output:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - versions:
+ type: file
+ description: File containing software versions
+ pattern: "versions.yml"
+ - meryl_db:
+ type: directory
+ description: A Meryl k-mer database that is the union sum of the input databases
+ pattern: "*.unionsum.meryl"
+authors:
+ - "@mahesh-panchal"
+maintainers:
+ - "@mahesh-panchal"
+ - "@gallvp"
diff --git a/modules/nf-core/meryl/unionsum/tests/main.nf.test b/modules/nf-core/meryl/unionsum/tests/main.nf.test
new file mode 100644
index 00000000..dc1bf8af
--- /dev/null
+++ b/modules/nf-core/meryl/unionsum/tests/main.nf.test
@@ -0,0 +1,123 @@
+nextflow_process {
+
+ name "Test Process MERYL_UNIONSUM"
+ script "../main.nf"
+ process "MERYL_UNIONSUM"
+
+ tag "modules"
+ tag "modules_nfcore"
+ tag "meryl"
+ tag "meryl/unionsum"
+ tag "meryl/count"
+
+ test("sarscov2 - fastq - single_end") {
+
+ setup {
+ run('MERYL_COUNT'){
+ script "../../../../../modules/nf-core/meryl/count/main.nf"
+ process {
+ """
+ input[0] = Channel.value([
+ [ id: 'test', single_end: true ], // meta map
+ file( params.modules_testdata_base_path + "/genomics/sarscov2/illumina/fastq/test_1.fastq.gz", checkIfExists: true )
+ ])
+ input[1] = Channel.value(21)
+ """
+ }
+ }
+ }
+
+ when {
+ process {
+ """
+ input[0] = MERYL_COUNT.out.meryl_db
+ input[1] = Channel.value(21)
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+
+ }
+
+ test("sarscov2 - fastq - paired_end") {
+
+ setup {
+ run('MERYL_COUNT'){
+ script "../../../../../modules/nf-core/meryl/count/main.nf"
+ process {
+ """
+ input[0] = Channel.value([
+ [ id: 'test', single_end: false ], // meta map
+ [
+ file( params.modules_testdata_base_path + "/genomics/sarscov2/illumina/fastq/test_1.fastq.gz", checkIfExists: true ),
+ file( params.modules_testdata_base_path + "/genomics/sarscov2/illumina/fastq/test_2.fastq.gz", checkIfExists: true )
+ ]
+ ])
+ input[1] = Channel.value(21)
+ """
+ }
+ }
+ }
+
+ when {
+ process {
+ """
+ input[0] = MERYL_COUNT.out.meryl_db
+ input[1] = Channel.value(21)
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+
+ }
+
+ test("sarscov2 - fastq- stub") {
+
+ options "-stub"
+
+ setup {
+ run('MERYL_COUNT'){
+ script "../../../../../modules/nf-core/meryl/count/main.nf"
+ process {
+ """
+ input[0] = Channel.value([
+ [ id: 'test', single_end: true ], // meta map
+ file( params.modules_testdata_base_path + "/genomics/sarscov2/illumina/fastq/test_1.fastq.gz", checkIfExists: true )
+ ])
+ input[1] = Channel.value(21)
+ """
+ }
+ }
+ }
+
+ when {
+ process {
+ """
+ input[0] = MERYL_COUNT.out.meryl_db
+ input[1] = Channel.value(21)
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+
+ }
+
+}
diff --git a/modules/nf-core/meryl/unionsum/tests/main.nf.test.snap b/modules/nf-core/meryl/unionsum/tests/main.nf.test.snap
new file mode 100644
index 00000000..a6ed4c73
--- /dev/null
+++ b/modules/nf-core/meryl/unionsum/tests/main.nf.test.snap
@@ -0,0 +1,627 @@
+{
+ "sarscov2 - fastq- stub": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.unionsum.meryl:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "1": [
+ "versions.yml:md5,c97980ac5ebd37a77768c105861ad719"
+ ],
+ "meryl_db": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.unionsum.meryl:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,c97980ac5ebd37a77768c105861ad719"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-05-22T12:40:21.306142"
+ },
+ "sarscov2 - fastq - single_end": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ [
+ "0x000000.merylData:md5,7d95ca25a2d39a0b8fb5f71a77d7bddd",
+ "0x000000.merylIndex:md5,ca9b05f5b90ed432aa1134378a86f166",
+ "0x000001.merylData:md5,bb4e2ec7d45a2d4fa36e802b0e312965",
+ "0x000001.merylIndex:md5,26b3ad6909cec2061c476201a0ac04cd",
+ "0x000010.merylData:md5,b4b8e7a742b4634ca82d2d19928cacd6",
+ "0x000010.merylIndex:md5,a266e782d80833c8e685a42f07937fc7",
+ "0x000011.merylData:md5,bc181b55548c6d7832b65be4bed8ec56",
+ "0x000011.merylIndex:md5,f2bb128f3da304408c81cefb0d4de879",
+ "0x000100.merylData:md5,07da07cdc83420487641f7201c1593c2",
+ "0x000100.merylIndex:md5,f40d2c9ecf5980328cfd766b9d90019d",
+ "0x000101.merylData:md5,2410ff0d6604044e9175feb668e747ac",
+ "0x000101.merylIndex:md5,f61473e3e39e6990c30bfa53e2a63302",
+ "0x000110.merylData:md5,05804f66393bef8edf69143a6cd984bc",
+ "0x000110.merylIndex:md5,15b6e88e01e1b1abc29b54128067a938",
+ "0x000111.merylData:md5,a3ced1867214878de2d83dda7e805308",
+ "0x000111.merylIndex:md5,2998f056a5f859229229b0051cb792ec",
+ "0x001000.merylData:md5,531c743e9b5df804297fba37e5cd4a63",
+ "0x001000.merylIndex:md5,30ab89ede28b623711675a45751b2ebc",
+ "0x001001.merylData:md5,4303314fa9d7bb8326938537c27d381e",
+ "0x001001.merylIndex:md5,1356eb83aa75fce8693e974bf54200d1",
+ "0x001010.merylData:md5,77918beac0dc29069365d9bf9104237b",
+ "0x001010.merylIndex:md5,d7db72b42185070da78104b356f4b5a1",
+ "0x001011.merylData:md5,d7837930447de7651dee47c00a1d2afe",
+ "0x001011.merylIndex:md5,ac9c6c4aab6172d73134d2dfc5215832",
+ "0x001100.merylData:md5,3e5e6301e1287e7ada42e5bafa24b74f",
+ "0x001100.merylIndex:md5,0e0736dfaa2974d7f09744ece53b3e69",
+ "0x001101.merylData:md5,38b8f1b7982de9c7add7db0071c28d24",
+ "0x001101.merylIndex:md5,c85afc65db43592a006e317c2e4891e8",
+ "0x001110.merylData:md5,f189f910062d272065a0f444789e1d68",
+ "0x001110.merylIndex:md5,701d7a951ee2bf6520845262dbd65559",
+ "0x001111.merylData:md5,0200cf0723778bcc48cffe7bfa48137c",
+ "0x001111.merylIndex:md5,2d445869fecabc6414242cd68a6edda5",
+ "0x010000.merylData:md5,a1d5479d3e8bd671368c6432867a2e95",
+ "0x010000.merylIndex:md5,ad664f2ea294871da909156a88b0dd3a",
+ "0x010001.merylData:md5,28919fad6317bbd6fe9809665437f9ae",
+ "0x010001.merylIndex:md5,64d482b1ef40e3de9d972f4134cb6d07",
+ "0x010010.merylData:md5,5b4630208c973815be52221c91777f77",
+ "0x010010.merylIndex:md5,0460424759df28005d71ebdef60e9ddf",
+ "0x010011.merylData:md5,fc58468b0b010469b11fe3b00c771067",
+ "0x010011.merylIndex:md5,7bd12cb4685bd7dc1aeb619e87bb3115",
+ "0x010100.merylData:md5,0db951a9cdf9c78bea428715ddf75bb0",
+ "0x010100.merylIndex:md5,b13f442b8fec07bf23ef751f2d94f9cb",
+ "0x010101.merylData:md5,9d0c90405b90d864bc3c2495657881a9",
+ "0x010101.merylIndex:md5,b3e13987d9d498b8714dfae339217f93",
+ "0x010110.merylData:md5,3f243ce000c60255f2d0e20d8e85a829",
+ "0x010110.merylIndex:md5,4ca7aeafe7b7de7bfc4dbcf4e0102fc0",
+ "0x010111.merylData:md5,0f369d3e76d23b09f75a231223872491",
+ "0x010111.merylIndex:md5,625e792cf975bc26aec21cb2dbd44eab",
+ "0x011000.merylData:md5,72e99ffb278453732413ffe3ad11d3fd",
+ "0x011000.merylIndex:md5,7bc0d58bb83ff6260a6dc65c8d70d9ac",
+ "0x011001.merylData:md5,52b5f0878131997a0660c7c205d34236",
+ "0x011001.merylIndex:md5,0b510bcd0b9fbbdf5d46bea2c4735126",
+ "0x011010.merylData:md5,5c67cd3def095bfbbe81125012a14364",
+ "0x011010.merylIndex:md5,3eebe66a116b1d121f918899a7fdbb28",
+ "0x011011.merylData:md5,64b7020da74d0d6a2b329e094cec17ed",
+ "0x011011.merylIndex:md5,9e3cc561c44b83131f76344567c4d943",
+ "0x011100.merylData:md5,5ea42e92cddadaa49f24c7f10279bb6c",
+ "0x011100.merylIndex:md5,c796d799243b1622a38455dc274ddbff",
+ "0x011101.merylData:md5,398aed1de15abe0855b22cc566a81065",
+ "0x011101.merylIndex:md5,56fa2957c0c5c9b23434ce1d2070fd40",
+ "0x011110.merylData:md5,24eb8aaa7b58da7626042435a2ee8061",
+ "0x011110.merylIndex:md5,ba35223b3ac43b389413c605bf035a6d",
+ "0x011111.merylData:md5,90e7bcb05dcf23278421ce8a02d4deed",
+ "0x011111.merylIndex:md5,8bcb85c1115c6a4bc5b5f0e177a76bde",
+ "0x100000.merylData:md5,80f244202eef29232d350c0faa2abeda",
+ "0x100000.merylIndex:md5,37bebd00de3b3af3e55c444661e3961f",
+ "0x100001.merylData:md5,a205e2871b8637e32b6eb5272f7d5290",
+ "0x100001.merylIndex:md5,49e5e583346b4491b1586c86213118f0",
+ "0x100010.merylData:md5,2b320d66ce30f8fea1a626278c421220",
+ "0x100010.merylIndex:md5,c7107b6651d2ea4c2e303bf0eb5d4a6a",
+ "0x100011.merylData:md5,35ef5544dc557711302340bae1e41d40",
+ "0x100011.merylIndex:md5,b3afac9563c6ac916010322d6226e729",
+ "0x100100.merylData:md5,17a8f1975e868ccdca0d3b6e72c5d76a",
+ "0x100100.merylIndex:md5,9b8d65f01d22e3873d50e5db6d7711f5",
+ "0x100101.merylData:md5,d3d0250646185c5bfeec43a0490d775f",
+ "0x100101.merylIndex:md5,d072b261cbd89375078cecd73f20016a",
+ "0x100110.merylData:md5,1e7497fafc102ca23a35111deb992816",
+ "0x100110.merylIndex:md5,873ca65e4ef4d432fc356a85beaa4063",
+ "0x100111.merylData:md5,fe791c9598de38dfa75496558a8b2f28",
+ "0x100111.merylIndex:md5,369c608a46fe76cbc77715d163153bfb",
+ "0x101000.merylData:md5,d89e7ae06de81df678021213e9808b4b",
+ "0x101000.merylIndex:md5,43160d471884a484f3862750d44c2788",
+ "0x101001.merylData:md5,8cd9b88142c08c412371e9ef66d59ef9",
+ "0x101001.merylIndex:md5,6a5650d1c9793eeb2781fb9e57b14784",
+ "0x101010.merylData:md5,0f17f7e3dfe8c2ec5c469597c5e66134",
+ "0x101010.merylIndex:md5,fa2913d514e183a3c4cba31cc422dff4",
+ "0x101011.merylData:md5,f8ab9821eebfdedabb61a106abdce55c",
+ "0x101011.merylIndex:md5,8a5a07b1d07b0b7d71943c0980c0755e",
+ "0x101100.merylData:md5,6a969c16cc7a522ebb1f0be5fc81056e",
+ "0x101100.merylIndex:md5,5f12c2413a9ddca0751e4b2034ebdc87",
+ "0x101101.merylData:md5,9cc1443daf53adcf2d95c4fd0f8d49de",
+ "0x101101.merylIndex:md5,19dcebf34036938fed8984d8e100b579",
+ "0x101110.merylData:md5,f0d63a933d58c9c5f0bd5885adfa0dfe",
+ "0x101110.merylIndex:md5,d6517f33c2c4667b5c53f8b099a1f8fe",
+ "0x101111.merylData:md5,87706ddcd8927988661d8e64e69c0723",
+ "0x101111.merylIndex:md5,fc9671ff0358c2350a650ac1763a3368",
+ "0x110000.merylData:md5,63ce769d2987235e28d567b3b64585c1",
+ "0x110000.merylIndex:md5,6f7997e8622eeb89fdbd774d8cd98ba2",
+ "0x110001.merylData:md5,0d3d10723737ac2d1037e1fe576acb22",
+ "0x110001.merylIndex:md5,f27c0d1169639ae9e081b39afc3757f5",
+ "0x110010.merylData:md5,3b92d7921670a6c4a4394c84286a3837",
+ "0x110010.merylIndex:md5,ff12c1af64828ca8d054acea6ebe3440",
+ "0x110011.merylData:md5,9eb8aba55e32e31d064a584682607037",
+ "0x110011.merylIndex:md5,9e5f773d3e6472581bb72f3185d4dc16",
+ "0x110100.merylData:md5,e4a2e750379ad0c4f427b7fb39059412",
+ "0x110100.merylIndex:md5,12473637233e2ad57e71ae72dba34d78",
+ "0x110101.merylData:md5,1b9341b1a03a059abec2bc5e53c31c29",
+ "0x110101.merylIndex:md5,b5e63433b6765efb23cec511cee9175f",
+ "0x110110.merylData:md5,d7109486ca2eb7e38ec53984be463f9e",
+ "0x110110.merylIndex:md5,74e8295e9a08f3040e904d7f7be7d8d1",
+ "0x110111.merylData:md5,75590d1bf3590e64503563e8f441e7cc",
+ "0x110111.merylIndex:md5,292000207b3bed4cf8c388dceaa6600f",
+ "0x111000.merylData:md5,d59dbabc5bd07bbb00cb9bd943ec68eb",
+ "0x111000.merylIndex:md5,fc8b47e18240bb9c47e1e84bcf58d862",
+ "0x111001.merylData:md5,557bfc214b1711ca86fb708a9c29608d",
+ "0x111001.merylIndex:md5,2d862224cd86c6ae27d45efb320442c4",
+ "0x111010.merylData:md5,8f055963758daaac43f47b4b8256b114",
+ "0x111010.merylIndex:md5,83decf260813123b92cb1ed73e135476",
+ "0x111011.merylData:md5,8d842899193a5001b5f29b19689bde4c",
+ "0x111011.merylIndex:md5,b287e4892670a8efd434a38cc822f0c3",
+ "0x111100.merylData:md5,f9fda990220a23ecee48abecfd338d2f",
+ "0x111100.merylIndex:md5,a6fda3f1e2328b22aa6fc9ffc2fa78f6",
+ "0x111101.merylData:md5,1f365b5dcede12ce0f58df73de213ab1",
+ "0x111101.merylIndex:md5,3ff34e0a24e0450a84f46785d7508ea5",
+ "0x111110.merylData:md5,50f0a65e10c92b8315294ef52b20fc4a",
+ "0x111110.merylIndex:md5,07f0179b757af0593f9120c98b7b9052",
+ "0x111111.merylData:md5,1459a239b18d6e7a23bef1e91b1b1be3",
+ "0x111111.merylIndex:md5,e4fcdb894e33dbda5da04fa7ff3fda51",
+ "merylIndex:md5,3eb997b0370a1bf31aa77328f143573e"
+ ]
+ ]
+ ],
+ "1": [
+ "versions.yml:md5,c97980ac5ebd37a77768c105861ad719"
+ ],
+ "meryl_db": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ [
+ "0x000000.merylData:md5,7d95ca25a2d39a0b8fb5f71a77d7bddd",
+ "0x000000.merylIndex:md5,ca9b05f5b90ed432aa1134378a86f166",
+ "0x000001.merylData:md5,bb4e2ec7d45a2d4fa36e802b0e312965",
+ "0x000001.merylIndex:md5,26b3ad6909cec2061c476201a0ac04cd",
+ "0x000010.merylData:md5,b4b8e7a742b4634ca82d2d19928cacd6",
+ "0x000010.merylIndex:md5,a266e782d80833c8e685a42f07937fc7",
+ "0x000011.merylData:md5,bc181b55548c6d7832b65be4bed8ec56",
+ "0x000011.merylIndex:md5,f2bb128f3da304408c81cefb0d4de879",
+ "0x000100.merylData:md5,07da07cdc83420487641f7201c1593c2",
+ "0x000100.merylIndex:md5,f40d2c9ecf5980328cfd766b9d90019d",
+ "0x000101.merylData:md5,2410ff0d6604044e9175feb668e747ac",
+ "0x000101.merylIndex:md5,f61473e3e39e6990c30bfa53e2a63302",
+ "0x000110.merylData:md5,05804f66393bef8edf69143a6cd984bc",
+ "0x000110.merylIndex:md5,15b6e88e01e1b1abc29b54128067a938",
+ "0x000111.merylData:md5,a3ced1867214878de2d83dda7e805308",
+ "0x000111.merylIndex:md5,2998f056a5f859229229b0051cb792ec",
+ "0x001000.merylData:md5,531c743e9b5df804297fba37e5cd4a63",
+ "0x001000.merylIndex:md5,30ab89ede28b623711675a45751b2ebc",
+ "0x001001.merylData:md5,4303314fa9d7bb8326938537c27d381e",
+ "0x001001.merylIndex:md5,1356eb83aa75fce8693e974bf54200d1",
+ "0x001010.merylData:md5,77918beac0dc29069365d9bf9104237b",
+ "0x001010.merylIndex:md5,d7db72b42185070da78104b356f4b5a1",
+ "0x001011.merylData:md5,d7837930447de7651dee47c00a1d2afe",
+ "0x001011.merylIndex:md5,ac9c6c4aab6172d73134d2dfc5215832",
+ "0x001100.merylData:md5,3e5e6301e1287e7ada42e5bafa24b74f",
+ "0x001100.merylIndex:md5,0e0736dfaa2974d7f09744ece53b3e69",
+ "0x001101.merylData:md5,38b8f1b7982de9c7add7db0071c28d24",
+ "0x001101.merylIndex:md5,c85afc65db43592a006e317c2e4891e8",
+ "0x001110.merylData:md5,f189f910062d272065a0f444789e1d68",
+ "0x001110.merylIndex:md5,701d7a951ee2bf6520845262dbd65559",
+ "0x001111.merylData:md5,0200cf0723778bcc48cffe7bfa48137c",
+ "0x001111.merylIndex:md5,2d445869fecabc6414242cd68a6edda5",
+ "0x010000.merylData:md5,a1d5479d3e8bd671368c6432867a2e95",
+ "0x010000.merylIndex:md5,ad664f2ea294871da909156a88b0dd3a",
+ "0x010001.merylData:md5,28919fad6317bbd6fe9809665437f9ae",
+ "0x010001.merylIndex:md5,64d482b1ef40e3de9d972f4134cb6d07",
+ "0x010010.merylData:md5,5b4630208c973815be52221c91777f77",
+ "0x010010.merylIndex:md5,0460424759df28005d71ebdef60e9ddf",
+ "0x010011.merylData:md5,fc58468b0b010469b11fe3b00c771067",
+ "0x010011.merylIndex:md5,7bd12cb4685bd7dc1aeb619e87bb3115",
+ "0x010100.merylData:md5,0db951a9cdf9c78bea428715ddf75bb0",
+ "0x010100.merylIndex:md5,b13f442b8fec07bf23ef751f2d94f9cb",
+ "0x010101.merylData:md5,9d0c90405b90d864bc3c2495657881a9",
+ "0x010101.merylIndex:md5,b3e13987d9d498b8714dfae339217f93",
+ "0x010110.merylData:md5,3f243ce000c60255f2d0e20d8e85a829",
+ "0x010110.merylIndex:md5,4ca7aeafe7b7de7bfc4dbcf4e0102fc0",
+ "0x010111.merylData:md5,0f369d3e76d23b09f75a231223872491",
+ "0x010111.merylIndex:md5,625e792cf975bc26aec21cb2dbd44eab",
+ "0x011000.merylData:md5,72e99ffb278453732413ffe3ad11d3fd",
+ "0x011000.merylIndex:md5,7bc0d58bb83ff6260a6dc65c8d70d9ac",
+ "0x011001.merylData:md5,52b5f0878131997a0660c7c205d34236",
+ "0x011001.merylIndex:md5,0b510bcd0b9fbbdf5d46bea2c4735126",
+ "0x011010.merylData:md5,5c67cd3def095bfbbe81125012a14364",
+ "0x011010.merylIndex:md5,3eebe66a116b1d121f918899a7fdbb28",
+ "0x011011.merylData:md5,64b7020da74d0d6a2b329e094cec17ed",
+ "0x011011.merylIndex:md5,9e3cc561c44b83131f76344567c4d943",
+ "0x011100.merylData:md5,5ea42e92cddadaa49f24c7f10279bb6c",
+ "0x011100.merylIndex:md5,c796d799243b1622a38455dc274ddbff",
+ "0x011101.merylData:md5,398aed1de15abe0855b22cc566a81065",
+ "0x011101.merylIndex:md5,56fa2957c0c5c9b23434ce1d2070fd40",
+ "0x011110.merylData:md5,24eb8aaa7b58da7626042435a2ee8061",
+ "0x011110.merylIndex:md5,ba35223b3ac43b389413c605bf035a6d",
+ "0x011111.merylData:md5,90e7bcb05dcf23278421ce8a02d4deed",
+ "0x011111.merylIndex:md5,8bcb85c1115c6a4bc5b5f0e177a76bde",
+ "0x100000.merylData:md5,80f244202eef29232d350c0faa2abeda",
+ "0x100000.merylIndex:md5,37bebd00de3b3af3e55c444661e3961f",
+ "0x100001.merylData:md5,a205e2871b8637e32b6eb5272f7d5290",
+ "0x100001.merylIndex:md5,49e5e583346b4491b1586c86213118f0",
+ "0x100010.merylData:md5,2b320d66ce30f8fea1a626278c421220",
+ "0x100010.merylIndex:md5,c7107b6651d2ea4c2e303bf0eb5d4a6a",
+ "0x100011.merylData:md5,35ef5544dc557711302340bae1e41d40",
+ "0x100011.merylIndex:md5,b3afac9563c6ac916010322d6226e729",
+ "0x100100.merylData:md5,17a8f1975e868ccdca0d3b6e72c5d76a",
+ "0x100100.merylIndex:md5,9b8d65f01d22e3873d50e5db6d7711f5",
+ "0x100101.merylData:md5,d3d0250646185c5bfeec43a0490d775f",
+ "0x100101.merylIndex:md5,d072b261cbd89375078cecd73f20016a",
+ "0x100110.merylData:md5,1e7497fafc102ca23a35111deb992816",
+ "0x100110.merylIndex:md5,873ca65e4ef4d432fc356a85beaa4063",
+ "0x100111.merylData:md5,fe791c9598de38dfa75496558a8b2f28",
+ "0x100111.merylIndex:md5,369c608a46fe76cbc77715d163153bfb",
+ "0x101000.merylData:md5,d89e7ae06de81df678021213e9808b4b",
+ "0x101000.merylIndex:md5,43160d471884a484f3862750d44c2788",
+ "0x101001.merylData:md5,8cd9b88142c08c412371e9ef66d59ef9",
+ "0x101001.merylIndex:md5,6a5650d1c9793eeb2781fb9e57b14784",
+ "0x101010.merylData:md5,0f17f7e3dfe8c2ec5c469597c5e66134",
+ "0x101010.merylIndex:md5,fa2913d514e183a3c4cba31cc422dff4",
+ "0x101011.merylData:md5,f8ab9821eebfdedabb61a106abdce55c",
+ "0x101011.merylIndex:md5,8a5a07b1d07b0b7d71943c0980c0755e",
+ "0x101100.merylData:md5,6a969c16cc7a522ebb1f0be5fc81056e",
+ "0x101100.merylIndex:md5,5f12c2413a9ddca0751e4b2034ebdc87",
+ "0x101101.merylData:md5,9cc1443daf53adcf2d95c4fd0f8d49de",
+ "0x101101.merylIndex:md5,19dcebf34036938fed8984d8e100b579",
+ "0x101110.merylData:md5,f0d63a933d58c9c5f0bd5885adfa0dfe",
+ "0x101110.merylIndex:md5,d6517f33c2c4667b5c53f8b099a1f8fe",
+ "0x101111.merylData:md5,87706ddcd8927988661d8e64e69c0723",
+ "0x101111.merylIndex:md5,fc9671ff0358c2350a650ac1763a3368",
+ "0x110000.merylData:md5,63ce769d2987235e28d567b3b64585c1",
+ "0x110000.merylIndex:md5,6f7997e8622eeb89fdbd774d8cd98ba2",
+ "0x110001.merylData:md5,0d3d10723737ac2d1037e1fe576acb22",
+ "0x110001.merylIndex:md5,f27c0d1169639ae9e081b39afc3757f5",
+ "0x110010.merylData:md5,3b92d7921670a6c4a4394c84286a3837",
+ "0x110010.merylIndex:md5,ff12c1af64828ca8d054acea6ebe3440",
+ "0x110011.merylData:md5,9eb8aba55e32e31d064a584682607037",
+ "0x110011.merylIndex:md5,9e5f773d3e6472581bb72f3185d4dc16",
+ "0x110100.merylData:md5,e4a2e750379ad0c4f427b7fb39059412",
+ "0x110100.merylIndex:md5,12473637233e2ad57e71ae72dba34d78",
+ "0x110101.merylData:md5,1b9341b1a03a059abec2bc5e53c31c29",
+ "0x110101.merylIndex:md5,b5e63433b6765efb23cec511cee9175f",
+ "0x110110.merylData:md5,d7109486ca2eb7e38ec53984be463f9e",
+ "0x110110.merylIndex:md5,74e8295e9a08f3040e904d7f7be7d8d1",
+ "0x110111.merylData:md5,75590d1bf3590e64503563e8f441e7cc",
+ "0x110111.merylIndex:md5,292000207b3bed4cf8c388dceaa6600f",
+ "0x111000.merylData:md5,d59dbabc5bd07bbb00cb9bd943ec68eb",
+ "0x111000.merylIndex:md5,fc8b47e18240bb9c47e1e84bcf58d862",
+ "0x111001.merylData:md5,557bfc214b1711ca86fb708a9c29608d",
+ "0x111001.merylIndex:md5,2d862224cd86c6ae27d45efb320442c4",
+ "0x111010.merylData:md5,8f055963758daaac43f47b4b8256b114",
+ "0x111010.merylIndex:md5,83decf260813123b92cb1ed73e135476",
+ "0x111011.merylData:md5,8d842899193a5001b5f29b19689bde4c",
+ "0x111011.merylIndex:md5,b287e4892670a8efd434a38cc822f0c3",
+ "0x111100.merylData:md5,f9fda990220a23ecee48abecfd338d2f",
+ "0x111100.merylIndex:md5,a6fda3f1e2328b22aa6fc9ffc2fa78f6",
+ "0x111101.merylData:md5,1f365b5dcede12ce0f58df73de213ab1",
+ "0x111101.merylIndex:md5,3ff34e0a24e0450a84f46785d7508ea5",
+ "0x111110.merylData:md5,50f0a65e10c92b8315294ef52b20fc4a",
+ "0x111110.merylIndex:md5,07f0179b757af0593f9120c98b7b9052",
+ "0x111111.merylData:md5,1459a239b18d6e7a23bef1e91b1b1be3",
+ "0x111111.merylIndex:md5,e4fcdb894e33dbda5da04fa7ff3fda51",
+ "merylIndex:md5,3eb997b0370a1bf31aa77328f143573e"
+ ]
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,c97980ac5ebd37a77768c105861ad719"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-03-27T10:19:25.091170112"
+ },
+ "sarscov2 - fastq - paired_end": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ [
+ "0x000000.merylData:md5,75f3969bef1e3b030f404a5206d6b249",
+ "0x000000.merylIndex:md5,b18113193945289fa1c20e96411e6ad2",
+ "0x000001.merylData:md5,8826eb52c3d06b5a092cb9a1f747e413",
+ "0x000001.merylIndex:md5,72a68ddfd9900e2db4a38f8a0ae48356",
+ "0x000010.merylData:md5,b11a404bf18f87ae565d8e60b63a0f2f",
+ "0x000010.merylIndex:md5,b518574321a3ab6d5eca385a4d6c4c10",
+ "0x000011.merylData:md5,055292edb4ea604a90a749c07528477e",
+ "0x000011.merylIndex:md5,d2aa5bdd749babc771d98185b9da96d1",
+ "0x000100.merylData:md5,a092b9ae90e5096bf819f8eaeec15873",
+ "0x000100.merylIndex:md5,1950c77b8985b4e516ceca2df12ffe24",
+ "0x000101.merylData:md5,1d02c628dda5eeec4f30674fffb1955c",
+ "0x000101.merylIndex:md5,f0c3dc05caae4af498e54f3263e6436f",
+ "0x000110.merylData:md5,62ce2f63040efa731d731abb8e76ccc0",
+ "0x000110.merylIndex:md5,adbf5072a6b7a096b9eba54482c26572",
+ "0x000111.merylData:md5,37912c49ffe875b835f8f4780ee52706",
+ "0x000111.merylIndex:md5,16392081f93560ed66a14d28eb26cd15",
+ "0x001000.merylData:md5,9826f31aee64d0cc96dff5da6996f0e6",
+ "0x001000.merylIndex:md5,849691b9f41cbba9158b30ee19f32b9d",
+ "0x001001.merylData:md5,1a617074700788e78b4ffc3575980adf",
+ "0x001001.merylIndex:md5,cafd9628934092a38eff45d2250ac466",
+ "0x001010.merylData:md5,7f93e8ac5ae70ad104140d032a37a295",
+ "0x001010.merylIndex:md5,866787afefa6acd99199d0eac8ced9e8",
+ "0x001011.merylData:md5,b77639289243a8fb7a43745986eab590",
+ "0x001011.merylIndex:md5,06724a52696784a94d33b2d23acc8873",
+ "0x001100.merylData:md5,20862f1f92786588de42140362672d38",
+ "0x001100.merylIndex:md5,c5468a40dff4268c978cc1918610b7d7",
+ "0x001101.merylData:md5,274002b8f45eb9ca54fad4db3d3aca02",
+ "0x001101.merylIndex:md5,b543aaf4d975360e23a692e44e605168",
+ "0x001110.merylData:md5,b55c68ee98e39ff9327f609ed2438e00",
+ "0x001110.merylIndex:md5,4b3da649a6bc53936fa23c173cbc4f21",
+ "0x001111.merylData:md5,abdc96b88d2f1fbed3c99a33a1eef45a",
+ "0x001111.merylIndex:md5,1758e12d5d8cf4ba9eeafe3bc01231f8",
+ "0x010000.merylData:md5,0c26808742826bddfeb43b7bc7d238bb",
+ "0x010000.merylIndex:md5,0c2b47836a4584765343fdf3a96246e8",
+ "0x010001.merylData:md5,f1dc27b017b36dbf72852b422e83d016",
+ "0x010001.merylIndex:md5,0b8a6db458ba4f63c36a61f487455145",
+ "0x010010.merylData:md5,a57d01b52bcbdd8608999e342102c146",
+ "0x010010.merylIndex:md5,635007bb466089cd3e202b68b8c66d74",
+ "0x010011.merylData:md5,37371dd1409929adee11133b5f03d3a7",
+ "0x010011.merylIndex:md5,492653df27505b88e3acc309c02bb8c7",
+ "0x010100.merylData:md5,517efaeccd253932d2a53f1d8068bde3",
+ "0x010100.merylIndex:md5,a61da37cd8e0d8de98f8e15b868341d7",
+ "0x010101.merylData:md5,9279f4e25c7b4b10f0181f661f3a12d2",
+ "0x010101.merylIndex:md5,c09553e7f106e4c62e2004f8c7f15ade",
+ "0x010110.merylData:md5,2579c222fbbd1e45fac5f44d080b5464",
+ "0x010110.merylIndex:md5,b7ba0385367bcb978c737a2778c9fbd4",
+ "0x010111.merylData:md5,c403bc281ff88cd996aa54d16c808e23",
+ "0x010111.merylIndex:md5,2d4c515b1b65e2fd0da877ff5d29f80c",
+ "0x011000.merylData:md5,ab856ffad72bf9a55fdc5f838eb4051e",
+ "0x011000.merylIndex:md5,86d657bc906e475cb714d704d41c7565",
+ "0x011001.merylData:md5,b64664fa97dab9df2e3b4e247f396f67",
+ "0x011001.merylIndex:md5,67ab51e1233ae97f976fdd24997bd859",
+ "0x011010.merylData:md5,6530932b97c9149c94174d5e229a14a9",
+ "0x011010.merylIndex:md5,e4a6f8cc99486fe79509be98a0c2841d",
+ "0x011011.merylData:md5,58357359932e814a983f0ce26acf5115",
+ "0x011011.merylIndex:md5,4426d81216c7a2dae734cf70536cf6bb",
+ "0x011100.merylData:md5,eb75a337c0649a58727dc3292e4d25db",
+ "0x011100.merylIndex:md5,a94b932a0583180d3027d445755e37f8",
+ "0x011101.merylData:md5,c494378696f831deebad0e0fe2db1881",
+ "0x011101.merylIndex:md5,ef00cd5e7255d4b751140fd0c83400a1",
+ "0x011110.merylData:md5,abec95fb53891ca69dd1e7ea20189204",
+ "0x011110.merylIndex:md5,bc2b1a21e91760ad49897097071df011",
+ "0x011111.merylData:md5,4b858e2a28860f789e8a5dc62e062557",
+ "0x011111.merylIndex:md5,5eed11ba178f7f9559625364d96aa41c",
+ "0x100000.merylData:md5,768528aaafba818f8e34d4b65de5242e",
+ "0x100000.merylIndex:md5,6649d5940b0a60b289dce7073de4777d",
+ "0x100001.merylData:md5,d0ef60de56a5dc1cec3606e3f07620c2",
+ "0x100001.merylIndex:md5,26d0852978fc3a8972aec67cebcc0f28",
+ "0x100010.merylData:md5,5b5a0fd3be35a954597a1c88ed3912ab",
+ "0x100010.merylIndex:md5,68c7135add32c8aac3c4207a7db7d37a",
+ "0x100011.merylData:md5,d8c188713b5379ba7e53bd7ac26baed5",
+ "0x100011.merylIndex:md5,4a9ed0a918945a7e9602427e23bd955c",
+ "0x100100.merylData:md5,68403d2ad355e2f9f9b8032616c6fbab",
+ "0x100100.merylIndex:md5,2fe6f882ae22d87581300c3b9b24242b",
+ "0x100101.merylData:md5,a6102a49c8b51870bf14b6ace728cfa1",
+ "0x100101.merylIndex:md5,f5b56f5edb9f55973288aab6b0c81467",
+ "0x100110.merylData:md5,418f44aecc5beb96d4fe9d5c22e5d8df",
+ "0x100110.merylIndex:md5,3b93d1b48ce9314ed0946e7a4d1b6c19",
+ "0x100111.merylData:md5,1cce84d0d61511837a70bb48aab84a20",
+ "0x100111.merylIndex:md5,42d17c375dccd68da727f531cc05a9fa",
+ "0x101000.merylData:md5,e2f713f0fb453506b18db92609a1fdff",
+ "0x101000.merylIndex:md5,f9fa2ccd8b89aa7147130c7dac705ce0",
+ "0x101001.merylData:md5,1cde5e30da7e245689c7c7a7006ebc40",
+ "0x101001.merylIndex:md5,f3d6a5e2eeb01dffaf6ebd04793d269b",
+ "0x101010.merylData:md5,088d7310df61d7917f3b49cc5ca1bef5",
+ "0x101010.merylIndex:md5,5de136cfda775349c75d47e5c5bae16b",
+ "0x101011.merylData:md5,c37a7fd125f9563760f0a7b065fb99f5",
+ "0x101011.merylIndex:md5,7374a2a00f9cf1fb4760dd6c68ce05de",
+ "0x101100.merylData:md5,3c76f864ab548adfb4d8f11869ab7b3a",
+ "0x101100.merylIndex:md5,e65c609b7b05a34a75c3eb7fbef9a488",
+ "0x101101.merylData:md5,8faec3c432a9fcaf7d6413a9966d15e7",
+ "0x101101.merylIndex:md5,c4fb9020e3b1690f5d4717be5384427d",
+ "0x101110.merylData:md5,92fb3c0d061a4d258bdde77c904312ab",
+ "0x101110.merylIndex:md5,0518b5e9eb8cecedd4da3a0470fbb43d",
+ "0x101111.merylData:md5,7f2174eee07aa116cffb77f85910efec",
+ "0x101111.merylIndex:md5,f7ecf59ae53a3a38cc4b7c2509379b3a",
+ "0x110000.merylData:md5,8c5866a1e6932eb777fe6e72c1090e1e",
+ "0x110000.merylIndex:md5,fdcca3797ca34cdbb3d6808847c111d5",
+ "0x110001.merylData:md5,02cc9f0ece95b69c6dfe3f04a0425f92",
+ "0x110001.merylIndex:md5,d545188ef68c84c1c653e23f6fae4ef3",
+ "0x110010.merylData:md5,665d40c5e306952883fe2ffa641024bb",
+ "0x110010.merylIndex:md5,445e745bd1f34502437541fafbb21943",
+ "0x110011.merylData:md5,e32f9db7cfb6dfa243f45258f1b55041",
+ "0x110011.merylIndex:md5,6e93712a47efaf6a4656c0dbeb1e5b1a",
+ "0x110100.merylData:md5,af978fc2cc304f2aee3f06416a1a43aa",
+ "0x110100.merylIndex:md5,ed97271331d8bb4eabbd0978cd4b9d23",
+ "0x110101.merylData:md5,a7bdc73355bea8749db38966fb7c968d",
+ "0x110101.merylIndex:md5,b30ff36567bede03396e67e973d4fdf9",
+ "0x110110.merylData:md5,0ee1f4b187ce88567262d3a6302f3f2a",
+ "0x110110.merylIndex:md5,505cab345465852c8284d190e91ce7fa",
+ "0x110111.merylData:md5,02929a0fd245f27576d794c98443ed52",
+ "0x110111.merylIndex:md5,8b70850b1775790354ff00ffcfb9df65",
+ "0x111000.merylData:md5,21cb35b24f60b6c69c49c327d1fd797f",
+ "0x111000.merylIndex:md5,7a54391084d2354b534bbf238631a80e",
+ "0x111001.merylData:md5,3c864da93bf29a8a0665cd83e1b612e7",
+ "0x111001.merylIndex:md5,4504239cfc112fcf6e9dba44e6f9f488",
+ "0x111010.merylData:md5,9d08a9742799fd0ab5b29f184667cb7c",
+ "0x111010.merylIndex:md5,b67161fd14e224f11a592691fe08e0e5",
+ "0x111011.merylData:md5,f5a09c0f5faa10aaad17efe454ff9395",
+ "0x111011.merylIndex:md5,75306fb2397e61a0910a5034cb9309c9",
+ "0x111100.merylData:md5,6c1ddf20980ade712673d606c6f3dfbe",
+ "0x111100.merylIndex:md5,35642b10f80c44989de4131a4c2b9c95",
+ "0x111101.merylData:md5,a26af5a2348ddc4882eb06d4edb63b6f",
+ "0x111101.merylIndex:md5,e6affc54363b262b10e55267192cd68b",
+ "0x111110.merylData:md5,7a7f81ee6339c46bffe18de12c3a7177",
+ "0x111110.merylIndex:md5,07f0179b757af0593f9120c98b7b9052",
+ "0x111111.merylData:md5,0e63424ed0b9b5a399058dbea354ae64",
+ "0x111111.merylIndex:md5,b8c6b4cc0580f55e4767f1f4529d772b",
+ "merylIndex:md5,72fb8181bcbb783884da1c8c2ded4eff"
+ ]
+ ]
+ ],
+ "1": [
+ "versions.yml:md5,c97980ac5ebd37a77768c105861ad719"
+ ],
+ "meryl_db": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ [
+ "0x000000.merylData:md5,75f3969bef1e3b030f404a5206d6b249",
+ "0x000000.merylIndex:md5,b18113193945289fa1c20e96411e6ad2",
+ "0x000001.merylData:md5,8826eb52c3d06b5a092cb9a1f747e413",
+ "0x000001.merylIndex:md5,72a68ddfd9900e2db4a38f8a0ae48356",
+ "0x000010.merylData:md5,b11a404bf18f87ae565d8e60b63a0f2f",
+ "0x000010.merylIndex:md5,b518574321a3ab6d5eca385a4d6c4c10",
+ "0x000011.merylData:md5,055292edb4ea604a90a749c07528477e",
+ "0x000011.merylIndex:md5,d2aa5bdd749babc771d98185b9da96d1",
+ "0x000100.merylData:md5,a092b9ae90e5096bf819f8eaeec15873",
+ "0x000100.merylIndex:md5,1950c77b8985b4e516ceca2df12ffe24",
+ "0x000101.merylData:md5,1d02c628dda5eeec4f30674fffb1955c",
+ "0x000101.merylIndex:md5,f0c3dc05caae4af498e54f3263e6436f",
+ "0x000110.merylData:md5,62ce2f63040efa731d731abb8e76ccc0",
+ "0x000110.merylIndex:md5,adbf5072a6b7a096b9eba54482c26572",
+ "0x000111.merylData:md5,37912c49ffe875b835f8f4780ee52706",
+ "0x000111.merylIndex:md5,16392081f93560ed66a14d28eb26cd15",
+ "0x001000.merylData:md5,9826f31aee64d0cc96dff5da6996f0e6",
+ "0x001000.merylIndex:md5,849691b9f41cbba9158b30ee19f32b9d",
+ "0x001001.merylData:md5,1a617074700788e78b4ffc3575980adf",
+ "0x001001.merylIndex:md5,cafd9628934092a38eff45d2250ac466",
+ "0x001010.merylData:md5,7f93e8ac5ae70ad104140d032a37a295",
+ "0x001010.merylIndex:md5,866787afefa6acd99199d0eac8ced9e8",
+ "0x001011.merylData:md5,b77639289243a8fb7a43745986eab590",
+ "0x001011.merylIndex:md5,06724a52696784a94d33b2d23acc8873",
+ "0x001100.merylData:md5,20862f1f92786588de42140362672d38",
+ "0x001100.merylIndex:md5,c5468a40dff4268c978cc1918610b7d7",
+ "0x001101.merylData:md5,274002b8f45eb9ca54fad4db3d3aca02",
+ "0x001101.merylIndex:md5,b543aaf4d975360e23a692e44e605168",
+ "0x001110.merylData:md5,b55c68ee98e39ff9327f609ed2438e00",
+ "0x001110.merylIndex:md5,4b3da649a6bc53936fa23c173cbc4f21",
+ "0x001111.merylData:md5,abdc96b88d2f1fbed3c99a33a1eef45a",
+ "0x001111.merylIndex:md5,1758e12d5d8cf4ba9eeafe3bc01231f8",
+ "0x010000.merylData:md5,0c26808742826bddfeb43b7bc7d238bb",
+ "0x010000.merylIndex:md5,0c2b47836a4584765343fdf3a96246e8",
+ "0x010001.merylData:md5,f1dc27b017b36dbf72852b422e83d016",
+ "0x010001.merylIndex:md5,0b8a6db458ba4f63c36a61f487455145",
+ "0x010010.merylData:md5,a57d01b52bcbdd8608999e342102c146",
+ "0x010010.merylIndex:md5,635007bb466089cd3e202b68b8c66d74",
+ "0x010011.merylData:md5,37371dd1409929adee11133b5f03d3a7",
+ "0x010011.merylIndex:md5,492653df27505b88e3acc309c02bb8c7",
+ "0x010100.merylData:md5,517efaeccd253932d2a53f1d8068bde3",
+ "0x010100.merylIndex:md5,a61da37cd8e0d8de98f8e15b868341d7",
+ "0x010101.merylData:md5,9279f4e25c7b4b10f0181f661f3a12d2",
+ "0x010101.merylIndex:md5,c09553e7f106e4c62e2004f8c7f15ade",
+ "0x010110.merylData:md5,2579c222fbbd1e45fac5f44d080b5464",
+ "0x010110.merylIndex:md5,b7ba0385367bcb978c737a2778c9fbd4",
+ "0x010111.merylData:md5,c403bc281ff88cd996aa54d16c808e23",
+ "0x010111.merylIndex:md5,2d4c515b1b65e2fd0da877ff5d29f80c",
+ "0x011000.merylData:md5,ab856ffad72bf9a55fdc5f838eb4051e",
+ "0x011000.merylIndex:md5,86d657bc906e475cb714d704d41c7565",
+ "0x011001.merylData:md5,b64664fa97dab9df2e3b4e247f396f67",
+ "0x011001.merylIndex:md5,67ab51e1233ae97f976fdd24997bd859",
+ "0x011010.merylData:md5,6530932b97c9149c94174d5e229a14a9",
+ "0x011010.merylIndex:md5,e4a6f8cc99486fe79509be98a0c2841d",
+ "0x011011.merylData:md5,58357359932e814a983f0ce26acf5115",
+ "0x011011.merylIndex:md5,4426d81216c7a2dae734cf70536cf6bb",
+ "0x011100.merylData:md5,eb75a337c0649a58727dc3292e4d25db",
+ "0x011100.merylIndex:md5,a94b932a0583180d3027d445755e37f8",
+ "0x011101.merylData:md5,c494378696f831deebad0e0fe2db1881",
+ "0x011101.merylIndex:md5,ef00cd5e7255d4b751140fd0c83400a1",
+ "0x011110.merylData:md5,abec95fb53891ca69dd1e7ea20189204",
+ "0x011110.merylIndex:md5,bc2b1a21e91760ad49897097071df011",
+ "0x011111.merylData:md5,4b858e2a28860f789e8a5dc62e062557",
+ "0x011111.merylIndex:md5,5eed11ba178f7f9559625364d96aa41c",
+ "0x100000.merylData:md5,768528aaafba818f8e34d4b65de5242e",
+ "0x100000.merylIndex:md5,6649d5940b0a60b289dce7073de4777d",
+ "0x100001.merylData:md5,d0ef60de56a5dc1cec3606e3f07620c2",
+ "0x100001.merylIndex:md5,26d0852978fc3a8972aec67cebcc0f28",
+ "0x100010.merylData:md5,5b5a0fd3be35a954597a1c88ed3912ab",
+ "0x100010.merylIndex:md5,68c7135add32c8aac3c4207a7db7d37a",
+ "0x100011.merylData:md5,d8c188713b5379ba7e53bd7ac26baed5",
+ "0x100011.merylIndex:md5,4a9ed0a918945a7e9602427e23bd955c",
+ "0x100100.merylData:md5,68403d2ad355e2f9f9b8032616c6fbab",
+ "0x100100.merylIndex:md5,2fe6f882ae22d87581300c3b9b24242b",
+ "0x100101.merylData:md5,a6102a49c8b51870bf14b6ace728cfa1",
+ "0x100101.merylIndex:md5,f5b56f5edb9f55973288aab6b0c81467",
+ "0x100110.merylData:md5,418f44aecc5beb96d4fe9d5c22e5d8df",
+ "0x100110.merylIndex:md5,3b93d1b48ce9314ed0946e7a4d1b6c19",
+ "0x100111.merylData:md5,1cce84d0d61511837a70bb48aab84a20",
+ "0x100111.merylIndex:md5,42d17c375dccd68da727f531cc05a9fa",
+ "0x101000.merylData:md5,e2f713f0fb453506b18db92609a1fdff",
+ "0x101000.merylIndex:md5,f9fa2ccd8b89aa7147130c7dac705ce0",
+ "0x101001.merylData:md5,1cde5e30da7e245689c7c7a7006ebc40",
+ "0x101001.merylIndex:md5,f3d6a5e2eeb01dffaf6ebd04793d269b",
+ "0x101010.merylData:md5,088d7310df61d7917f3b49cc5ca1bef5",
+ "0x101010.merylIndex:md5,5de136cfda775349c75d47e5c5bae16b",
+ "0x101011.merylData:md5,c37a7fd125f9563760f0a7b065fb99f5",
+ "0x101011.merylIndex:md5,7374a2a00f9cf1fb4760dd6c68ce05de",
+ "0x101100.merylData:md5,3c76f864ab548adfb4d8f11869ab7b3a",
+ "0x101100.merylIndex:md5,e65c609b7b05a34a75c3eb7fbef9a488",
+ "0x101101.merylData:md5,8faec3c432a9fcaf7d6413a9966d15e7",
+ "0x101101.merylIndex:md5,c4fb9020e3b1690f5d4717be5384427d",
+ "0x101110.merylData:md5,92fb3c0d061a4d258bdde77c904312ab",
+ "0x101110.merylIndex:md5,0518b5e9eb8cecedd4da3a0470fbb43d",
+ "0x101111.merylData:md5,7f2174eee07aa116cffb77f85910efec",
+ "0x101111.merylIndex:md5,f7ecf59ae53a3a38cc4b7c2509379b3a",
+ "0x110000.merylData:md5,8c5866a1e6932eb777fe6e72c1090e1e",
+ "0x110000.merylIndex:md5,fdcca3797ca34cdbb3d6808847c111d5",
+ "0x110001.merylData:md5,02cc9f0ece95b69c6dfe3f04a0425f92",
+ "0x110001.merylIndex:md5,d545188ef68c84c1c653e23f6fae4ef3",
+ "0x110010.merylData:md5,665d40c5e306952883fe2ffa641024bb",
+ "0x110010.merylIndex:md5,445e745bd1f34502437541fafbb21943",
+ "0x110011.merylData:md5,e32f9db7cfb6dfa243f45258f1b55041",
+ "0x110011.merylIndex:md5,6e93712a47efaf6a4656c0dbeb1e5b1a",
+ "0x110100.merylData:md5,af978fc2cc304f2aee3f06416a1a43aa",
+ "0x110100.merylIndex:md5,ed97271331d8bb4eabbd0978cd4b9d23",
+ "0x110101.merylData:md5,a7bdc73355bea8749db38966fb7c968d",
+ "0x110101.merylIndex:md5,b30ff36567bede03396e67e973d4fdf9",
+ "0x110110.merylData:md5,0ee1f4b187ce88567262d3a6302f3f2a",
+ "0x110110.merylIndex:md5,505cab345465852c8284d190e91ce7fa",
+ "0x110111.merylData:md5,02929a0fd245f27576d794c98443ed52",
+ "0x110111.merylIndex:md5,8b70850b1775790354ff00ffcfb9df65",
+ "0x111000.merylData:md5,21cb35b24f60b6c69c49c327d1fd797f",
+ "0x111000.merylIndex:md5,7a54391084d2354b534bbf238631a80e",
+ "0x111001.merylData:md5,3c864da93bf29a8a0665cd83e1b612e7",
+ "0x111001.merylIndex:md5,4504239cfc112fcf6e9dba44e6f9f488",
+ "0x111010.merylData:md5,9d08a9742799fd0ab5b29f184667cb7c",
+ "0x111010.merylIndex:md5,b67161fd14e224f11a592691fe08e0e5",
+ "0x111011.merylData:md5,f5a09c0f5faa10aaad17efe454ff9395",
+ "0x111011.merylIndex:md5,75306fb2397e61a0910a5034cb9309c9",
+ "0x111100.merylData:md5,6c1ddf20980ade712673d606c6f3dfbe",
+ "0x111100.merylIndex:md5,35642b10f80c44989de4131a4c2b9c95",
+ "0x111101.merylData:md5,a26af5a2348ddc4882eb06d4edb63b6f",
+ "0x111101.merylIndex:md5,e6affc54363b262b10e55267192cd68b",
+ "0x111110.merylData:md5,7a7f81ee6339c46bffe18de12c3a7177",
+ "0x111110.merylIndex:md5,07f0179b757af0593f9120c98b7b9052",
+ "0x111111.merylData:md5,0e63424ed0b9b5a399058dbea354ae64",
+ "0x111111.merylIndex:md5,b8c6b4cc0580f55e4767f1f4529d772b",
+ "merylIndex:md5,72fb8181bcbb783884da1c8c2ded4eff"
+ ]
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,c97980ac5ebd37a77768c105861ad719"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-03-27T10:19:40.09228319"
+ }
+}
\ No newline at end of file
diff --git a/modules/nf-core/meryl/unionsum/tests/tags.yml b/modules/nf-core/meryl/unionsum/tests/tags.yml
new file mode 100644
index 00000000..707dd781
--- /dev/null
+++ b/modules/nf-core/meryl/unionsum/tests/tags.yml
@@ -0,0 +1,2 @@
+meryl/unionsum:
+ - "modules/nf-core/meryl/unionsum/**"
diff --git a/modules/nf-core/minimap2/align/environment.yml b/modules/nf-core/minimap2/align/environment.yml
new file mode 100644
index 00000000..051ca8ef
--- /dev/null
+++ b/modules/nf-core/minimap2/align/environment.yml
@@ -0,0 +1,9 @@
+name: minimap2_align
+channels:
+ - conda-forge
+ - bioconda
+ - defaults
+dependencies:
+ - bioconda::minimap2=2.28
+ - bioconda::samtools=1.19.2
+ - bioconda::htslib=1.19.1
diff --git a/modules/nf-core/minimap2/align/main.nf b/modules/nf-core/minimap2/align/main.nf
new file mode 100644
index 00000000..62349edc
--- /dev/null
+++ b/modules/nf-core/minimap2/align/main.nf
@@ -0,0 +1,63 @@
+process MINIMAP2_ALIGN {
+ tag "$meta.id"
+ label 'process_high'
+
+ // Note: the versions here need to match the versions used in the mulled container below and minimap2/index
+ conda "${moduleDir}/environment.yml"
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'https://depot.galaxyproject.org/singularity/mulled-v2-66534bcbb7031a148b13e2ad42583020b9cd25c4:3a70f8bc7e17b723591f6132418640cfdbc88246-0' :
+ 'biocontainers/mulled-v2-66534bcbb7031a148b13e2ad42583020b9cd25c4:3a70f8bc7e17b723591f6132418640cfdbc88246-0' }"
+
+ input:
+ tuple val(meta), path(reads)
+ tuple val(meta2), path(reference)
+ val bam_format
+ val cigar_paf_format
+ val cigar_bam
+
+ output:
+ tuple val(meta), path("*.paf"), optional: true, emit: paf
+ tuple val(meta), path("*.bam"), optional: true, emit: bam
+ tuple val(meta), path("*.csi"), optional: true, emit: csi
+ path "versions.yml" , emit: versions
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ script:
+ def args = task.ext.args ?: ''
+ def args2 = task.ext.args2 ?: ''
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ def bam_output = bam_format ? "-a | samtools sort -@ ${task.cpus-1} -o ${prefix}.bam ${args2}" : "-o ${prefix}.paf"
+ def cigar_paf = cigar_paf_format && !bam_format ? "-c" : ''
+ def set_cigar_bam = cigar_bam && bam_format ? "-L" : ''
+ """
+ minimap2 \\
+ $args \\
+ -t $task.cpus \\
+ ${reference ?: reads} \\
+ $reads \\
+ $cigar_paf \\
+ $set_cigar_bam \\
+ $bam_output
+
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ minimap2: \$(minimap2 --version 2>&1)
+ END_VERSIONS
+ """
+
+ stub:
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ def output_file = bam_format ? "${prefix}.bam" : "${prefix}.paf"
+ """
+ touch $output_file
+ touch ${prefix}.csi
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ minimap2: \$(minimap2 --version 2>&1)
+ END_VERSIONS
+ """
+}
diff --git a/modules/nf-core/minimap2/align/meta.yml b/modules/nf-core/minimap2/align/meta.yml
new file mode 100644
index 00000000..408522d5
--- /dev/null
+++ b/modules/nf-core/minimap2/align/meta.yml
@@ -0,0 +1,75 @@
+name: minimap2_align
+description: A versatile pairwise aligner for genomic and spliced nucleotide sequences
+keywords:
+ - align
+ - fasta
+ - fastq
+ - genome
+ - paf
+ - reference
+tools:
+ - minimap2:
+ description: |
+ A versatile pairwise aligner for genomic and spliced nucleotide sequences.
+ homepage: https://github.com/lh3/minimap2
+ documentation: https://github.com/lh3/minimap2#uguide
+ licence: ["MIT"]
+input:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - reads:
+ type: file
+ description: |
+ List of input FASTA or FASTQ files of size 1 and 2 for single-end
+ and paired-end data, respectively.
+ - meta2:
+ type: map
+ description: |
+ Groovy Map containing reference information
+ e.g. [ id:'test_ref']
+ - reference:
+ type: file
+ description: |
+ Reference database in FASTA format.
+ - bam_format:
+ type: boolean
+ description: Specify that output should be in BAM format
+ - cigar_paf_format:
+ type: boolean
+ description: Specify that output CIGAR should be in PAF format
+ - cigar_bam:
+ type: boolean
+ description: |
+ Write CIGAR with >65535 ops at the CG tag. This is recommended when
+ doing XYZ (https://github.com/lh3/minimap2#working-with-65535-cigar-operations)
+output:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - paf:
+ type: file
+ description: Alignment in PAF format
+ pattern: "*.paf"
+ - bam:
+ type: file
+ description: Alignment in BAM format
+ pattern: "*.bam"
+ - versions:
+ type: file
+ description: File containing software versions
+ pattern: "versions.yml"
+authors:
+ - "@heuermh"
+ - "@sofstam"
+ - "@sateeshperi"
+ - "@jfy133"
+maintainers:
+ - "@heuermh"
+ - "@sofstam"
+ - "@sateeshperi"
+ - "@jfy133"
diff --git a/modules/nf-core/minimap2/align/tests/main.nf.test b/modules/nf-core/minimap2/align/tests/main.nf.test
new file mode 100644
index 00000000..83cceeab
--- /dev/null
+++ b/modules/nf-core/minimap2/align/tests/main.nf.test
@@ -0,0 +1,181 @@
+nextflow_process {
+
+ name "Test Process MINIMAP2_ALIGN"
+ script "../main.nf"
+ process "MINIMAP2_ALIGN"
+
+ tag "modules"
+ tag "modules_nfcore"
+ tag "minimap2"
+ tag "minimap2/align"
+
+ test("sarscov2 - fastq, fasta, true, false, false") {
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test', single_end:true ], // meta map
+ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true)
+ ]
+ input[1] = [
+ [ id:'test_ref' ], // meta map
+ file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
+ ]
+ input[2] = true
+ input[3] = false
+ input[4] = false
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(
+ file(process.out.bam[0][1]).name,
+ process.out.versions
+ ).match() }
+ )
+ }
+
+ }
+
+ test("sarscov2 - [fastq1, fastq2], fasta, true, false, false") {
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test', single_end:false ], // meta map
+ [
+ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
+ file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true)
+ ]
+ ]
+ input[1] = [
+ [ id:'test_ref' ], // meta map
+ file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
+ ]
+ input[2] = true
+ input[3] = false
+ input[4] = false
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(
+ file(process.out.bam[0][1]).name,
+ process.out.versions
+ ).match() }
+ )
+ }
+
+ }
+
+ test("sarscov2 - fastq, [], true, false, false") {
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test', single_end:true ], // meta map
+ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true)
+ ]
+ input[1] = [
+ [ id:'test_ref' ], // meta map
+ []
+ ]
+ input[2] = true
+ input[3] = false
+ input[4] = false
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(
+ file(process.out.bam[0][1]).name,
+ process.out.versions
+ ).match() }
+ )
+ }
+
+ }
+
+ test("sarscov2 - fastq, fasta, true, false, false - stub") {
+
+ options "-stub"
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test', single_end:true ], // meta map
+ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true)
+ ]
+ input[1] = [
+ [ id:'test_ref' ], // meta map
+ file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
+ ]
+ input[2] = true
+ input[3] = false
+ input[4] = false
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(
+ file(process.out.bam[0][1]).name,
+ file(process.out.csi[0][1]).name,
+ process.out.versions
+ ).match() }
+ )
+ }
+
+ }
+
+ test("sarscov2 - fastq, fasta, false, false, false - stub") {
+
+ options "-stub"
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test', single_end:true ], // meta map
+ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true)
+ ]
+ input[1] = [
+ [ id:'test_ref' ], // meta map
+ file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
+ ]
+ input[2] = false
+ input[3] = false
+ input[4] = false
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(
+ file(process.out.paf[0][1]).name,
+ file(process.out.csi[0][1]).name,
+ process.out.versions
+ ).match() }
+ )
+ }
+
+ }
+
+}
\ No newline at end of file
diff --git a/modules/nf-core/minimap2/align/tests/main.nf.test.snap b/modules/nf-core/minimap2/align/tests/main.nf.test.snap
new file mode 100644
index 00000000..19a8f204
--- /dev/null
+++ b/modules/nf-core/minimap2/align/tests/main.nf.test.snap
@@ -0,0 +1,69 @@
+{
+ "sarscov2 - fastq, fasta, true, false, false": {
+ "content": [
+ "test.bam",
+ [
+ "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-04-05T10:14:18.939731126"
+ },
+ "sarscov2 - fastq, fasta, true, false, false - stub": {
+ "content": [
+ "test.bam",
+ "test.csi",
+ [
+ "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-04-05T10:14:34.275879844"
+ },
+ "sarscov2 - fastq, fasta, false, false, false - stub": {
+ "content": [
+ "test.paf",
+ "test.csi",
+ [
+ "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-04-05T10:14:39.227958138"
+ },
+ "sarscov2 - [fastq1, fastq2], fasta, true, false, false": {
+ "content": [
+ "test.bam",
+ [
+ "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-04-05T10:14:24.265054877"
+ },
+ "sarscov2 - fastq, [], true, false, false": {
+ "content": [
+ "test.bam",
+ [
+ "versions.yml:md5,98b8f5f36aa54b82210094f0b0d11938"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-04-05T10:14:29.27901773"
+ }
+}
\ No newline at end of file
diff --git a/modules/nf-core/minimap2/align/tests/tags.yml b/modules/nf-core/minimap2/align/tests/tags.yml
new file mode 100644
index 00000000..39dba374
--- /dev/null
+++ b/modules/nf-core/minimap2/align/tests/tags.yml
@@ -0,0 +1,2 @@
+minimap2/align:
+ - "modules/nf-core/minimap2/align/**"
diff --git a/modules/pfr/custom/checkgff3fastacorrespondence/environment.yml b/modules/nf-core/seqkit/rmdup/environment.yml
similarity index 72%
rename from modules/pfr/custom/checkgff3fastacorrespondence/environment.yml
rename to modules/nf-core/seqkit/rmdup/environment.yml
index ec0e86d1..6cb8fe9d 100644
--- a/modules/pfr/custom/checkgff3fastacorrespondence/environment.yml
+++ b/modules/nf-core/seqkit/rmdup/environment.yml
@@ -1,9 +1,9 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
-name: "custom_checkgff3fastacorrespondence"
+name: "seqkit_rmdup"
channels:
- conda-forge
- bioconda
- defaults
dependencies:
- - "bioconda::samtools=1.18"
+ - "bioconda::seqkit=2.8.1"
diff --git a/modules/nf-core/seqkit/rmdup/main.nf b/modules/nf-core/seqkit/rmdup/main.nf
new file mode 100644
index 00000000..410bb836
--- /dev/null
+++ b/modules/nf-core/seqkit/rmdup/main.nf
@@ -0,0 +1,66 @@
+process SEQKIT_RMDUP {
+ tag "$meta.id"
+ label 'process_low'
+ // File IO can be a bottleneck. See: https://bioinf.shenwei.me/seqkit/usage/#parallelization-of-cpu-intensive-jobs
+
+ conda "${moduleDir}/environment.yml"
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'https://depot.galaxyproject.org/singularity/seqkit:2.8.1--h9ee0642_0':
+ 'biocontainers/seqkit:2.8.1--h9ee0642_0' }"
+
+ input:
+ tuple val(meta), path(fastx)
+
+ output:
+ tuple val(meta), path("${prefix}.${extension}") , emit: fastx
+ tuple val(meta), path("*.log") , emit: log
+ path "versions.yml" , emit: versions
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ script:
+ def args = task.ext.args ?: ''
+ prefix = task.ext.prefix ?: "${meta.id}"
+ extension = "fastq"
+ if ("$fastx" ==~ /.+\.fasta|.+\.fasta.gz|.+\.fa|.+\.fa.gz|.+\.fas|.+\.fas.gz|.+\.fna|.+\.fna.gz|.+\.fsa|.+\.fsa.gz/ ) {
+ extension = "fasta"
+ }
+ extension = fastx.toString().endsWith('.gz') ? "${extension}.gz" : extension
+ // SeqKit/rmdup takes care of compressing the output: https://bioinf.shenwei.me/seqkit/usage/#rmdup
+ if("${prefix}.${extension}" == "$fastx") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!"
+ """
+ seqkit \\
+ rmdup \\
+ --threads $task.cpus \\
+ $args \\
+ $fastx \\
+ -o ${prefix}.${extension} \\
+ 2> >(tee ${prefix}.log >&2)
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ seqkit: \$(seqkit version | cut -d' ' -f2)
+ END_VERSIONS
+ """
+
+ stub:
+ prefix = task.ext.prefix ?: "${meta.id}"
+ extension = "fastq"
+ if ("$fastx" ==~ /.+\.fasta|.+\.fasta.gz|.+\.fa|.+\.fa.gz|.+\.fas|.+\.fas.gz|.+\.fna|.+\.fna.gz|.+\.fsa|.+\.fsa.gz/ ) {
+ extension = "fasta"
+ }
+ extension = fastx.toString().endsWith('.gz') ? "${extension}.gz" : extension
+ if("${prefix}.${extension}" == "$fastx") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!"
+ """
+ touch ${prefix}.${extension}
+ echo \\
+ '[INFO] 0 duplicated records removed' \\
+ > ${prefix}.log
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ seqkit: \$(seqkit version | cut -d' ' -f2)
+ END_VERSIONS
+ """
+}
diff --git a/modules/nf-core/seqkit/rmdup/meta.yml b/modules/nf-core/seqkit/rmdup/meta.yml
new file mode 100644
index 00000000..d0addd45
--- /dev/null
+++ b/modules/nf-core/seqkit/rmdup/meta.yml
@@ -0,0 +1,50 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
+name: "seqkit_rmdup"
+description: Transforms sequences (extract ID, filter by length, remove gaps, reverse complement...)
+keywords:
+ - genomics
+ - fasta
+ - fastq
+ - remove
+ - duplicates
+tools:
+ - "seqkit":
+ description: "A cross-platform and ultrafast toolkit for FASTA/Q file manipulation"
+ homepage: "https://bioinf.shenwei.me/seqkit/"
+ documentation: "https://bioinf.shenwei.me/seqkit/usage/"
+ tool_dev_url: "https://github.com/shenwei356/seqkit"
+ doi: "10.1371/journal.pone.0163962"
+ licence: ["MIT"]
+input:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. `[ id:'sample1' ]`
+ - fastx:
+ type: file
+ description: Input fasta/fastq file
+ pattern: "*.{fsa,fas,fa,fasta,fastq,fq,fsa.gz,fas.gz,fa.gz,fasta.gz,fastq.gz,fq.gz}"
+output:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. `[ id:'sample1' ]`
+ - fastx:
+ type: file
+ description: Output fasta/fastq file
+ pattern: "*.{fasta,fasta.gz,fastq,fastq.gz}"
+ - log:
+ type: file
+ description: Log containing information regarding removed duplicates
+ pattern: "*.log"
+ - versions:
+ type: file
+ description: File containing software versions
+ pattern: "versions.yml"
+authors:
+ - "@GallVp"
+maintainers:
+ - "@GallVp"
diff --git a/modules/nf-core/seqkit/rmdup/tests/main.nf.test b/modules/nf-core/seqkit/rmdup/tests/main.nf.test
new file mode 100644
index 00000000..e990443f
--- /dev/null
+++ b/modules/nf-core/seqkit/rmdup/tests/main.nf.test
@@ -0,0 +1,173 @@
+nextflow_process {
+
+ name "Test Process SEQKIT_RMDUP"
+ script "../main.nf"
+ process "SEQKIT_RMDUP"
+
+ tag "modules"
+ tag "modules_nfcore"
+ tag "seqkit"
+ tag "seqkit/rmdup"
+
+ test("sarscov2-genome_fasta") {
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test' ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)
+ ]
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() },
+ { assert path(process.out.log[0][1]).text.contains('0 duplicated records removed') }
+ )
+ }
+
+ }
+
+ test("repeated-fasta") {
+ when {
+ process {
+ """
+ def repeated_fasta = file('repeated.fasta')
+ repeated_fasta.text = '>A\\nAGCTAGCTAGCT\\n>B\\nAGCTAGCTAGCT\\n>A\\nAGCTAGCTAGCT'
+
+ input[0] = [
+ [ id:'test' ], // meta map
+ repeated_fasta
+ ]
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() },
+ { assert path(process.out.log[0][1]).text.contains('1 duplicated records removed') }
+ )
+ }
+
+ }
+
+ test("sarscov2-genome_fasta_gz") {
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test' ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true)
+ ]
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() },
+ { assert path(process.out.log[0][1]).text.contains('0 duplicated records removed') }
+ )
+ }
+
+ }
+
+ test("sarscov2-test_1_fastq_gz") {
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test' ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true)
+ ]
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() },
+ { assert path(process.out.log[0][1]).text.contains('0 duplicated records removed') }
+ )
+ }
+
+ }
+
+ test("file_name_conflict-fail_with_error") {
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test_1' ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true)
+ ]
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert !process.success },
+ { assert process.stdout.toString().contains("Input and output names are the same") }
+ )
+ }
+
+ }
+
+ test("sarscov2-genome_fasta-stub") {
+
+ options '-stub'
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test' ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)
+ ]
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() },
+ { assert path(process.out.log[0][1]).text.contains('0 duplicated records removed') }
+ )
+ }
+
+ }
+
+ test("file_name_conflict-fail_with_error-stub") {
+
+ options '-stub'
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'genome' ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)
+ ]
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert !process.success },
+ { assert process.stdout.toString().contains("Input and output names are the same") }
+ )
+ }
+
+ }
+
+}
diff --git a/modules/nf-core/seqkit/rmdup/tests/main.nf.test.snap b/modules/nf-core/seqkit/rmdup/tests/main.nf.test.snap
new file mode 100644
index 00000000..68c415cd
--- /dev/null
+++ b/modules/nf-core/seqkit/rmdup/tests/main.nf.test.snap
@@ -0,0 +1,247 @@
+{
+ "sarscov2-genome_fasta-stub": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test"
+ },
+ "test.fasta:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "1": [
+ [
+ {
+ "id": "test"
+ },
+ "test.log:md5,cf833211befdf890bb6b2a3cd0b91853"
+ ]
+ ],
+ "2": [
+ "versions.yml:md5,d2b8da3c114c2bd1c6606030df55b6aa"
+ ],
+ "fastx": [
+ [
+ {
+ "id": "test"
+ },
+ "test.fasta:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "log": [
+ [
+ {
+ "id": "test"
+ },
+ "test.log:md5,cf833211befdf890bb6b2a3cd0b91853"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,d2b8da3c114c2bd1c6606030df55b6aa"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.1"
+ },
+ "timestamp": "2024-05-27T19:40:01.6034"
+ },
+ "sarscov2-test_1_fastq_gz": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test"
+ },
+ "test.fastq.gz:md5,4161df271f9bfcd25d5845a1e220dbec"
+ ]
+ ],
+ "1": [
+ [
+ {
+ "id": "test"
+ },
+ "test.log:md5,a41135cfe024baaf42f135583fe73f0d"
+ ]
+ ],
+ "2": [
+ "versions.yml:md5,d2b8da3c114c2bd1c6606030df55b6aa"
+ ],
+ "fastx": [
+ [
+ {
+ "id": "test"
+ },
+ "test.fastq.gz:md5,4161df271f9bfcd25d5845a1e220dbec"
+ ]
+ ],
+ "log": [
+ [
+ {
+ "id": "test"
+ },
+ "test.log:md5,a41135cfe024baaf42f135583fe73f0d"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,d2b8da3c114c2bd1c6606030df55b6aa"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.1"
+ },
+ "timestamp": "2024-05-27T19:37:48.551195"
+ },
+ "sarscov2-genome_fasta": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test"
+ },
+ "test.fasta:md5,483f4a5dfe60171c86ee9b7e6dff908b"
+ ]
+ ],
+ "1": [
+ [
+ {
+ "id": "test"
+ },
+ "test.log:md5,a41135cfe024baaf42f135583fe73f0d"
+ ]
+ ],
+ "2": [
+ "versions.yml:md5,d2b8da3c114c2bd1c6606030df55b6aa"
+ ],
+ "fastx": [
+ [
+ {
+ "id": "test"
+ },
+ "test.fasta:md5,483f4a5dfe60171c86ee9b7e6dff908b"
+ ]
+ ],
+ "log": [
+ [
+ {
+ "id": "test"
+ },
+ "test.log:md5,a41135cfe024baaf42f135583fe73f0d"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,d2b8da3c114c2bd1c6606030df55b6aa"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.1"
+ },
+ "timestamp": "2024-05-27T19:37:38.821528"
+ },
+ "sarscov2-genome_fasta_gz": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test"
+ },
+ "test.fasta.gz:md5,483f4a5dfe60171c86ee9b7e6dff908b"
+ ]
+ ],
+ "1": [
+ [
+ {
+ "id": "test"
+ },
+ "test.log:md5,a41135cfe024baaf42f135583fe73f0d"
+ ]
+ ],
+ "2": [
+ "versions.yml:md5,d2b8da3c114c2bd1c6606030df55b6aa"
+ ],
+ "fastx": [
+ [
+ {
+ "id": "test"
+ },
+ "test.fasta.gz:md5,483f4a5dfe60171c86ee9b7e6dff908b"
+ ]
+ ],
+ "log": [
+ [
+ {
+ "id": "test"
+ },
+ "test.log:md5,a41135cfe024baaf42f135583fe73f0d"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,d2b8da3c114c2bd1c6606030df55b6aa"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.1"
+ },
+ "timestamp": "2024-05-27T19:37:43.723054"
+ },
+ "repeated-fasta": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test"
+ },
+ "test.fasta:md5,7510a742291241e7d7556bf720caf65c"
+ ]
+ ],
+ "1": [
+ [
+ {
+ "id": "test"
+ },
+ "test.log:md5,314c0aaef0f832a217a3f6ce3f8bc117"
+ ]
+ ],
+ "2": [
+ "versions.yml:md5,d2b8da3c114c2bd1c6606030df55b6aa"
+ ],
+ "fastx": [
+ [
+ {
+ "id": "test"
+ },
+ "test.fasta:md5,7510a742291241e7d7556bf720caf65c"
+ ]
+ ],
+ "log": [
+ [
+ {
+ "id": "test"
+ },
+ "test.log:md5,314c0aaef0f832a217a3f6ce3f8bc117"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,d2b8da3c114c2bd1c6606030df55b6aa"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.1"
+ },
+ "timestamp": "2024-05-27T19:52:34.545807"
+ }
+}
\ No newline at end of file
diff --git a/modules/nf-core/seqkit/rmdup/tests/tags.yml b/modules/nf-core/seqkit/rmdup/tests/tags.yml
new file mode 100644
index 00000000..e732db38
--- /dev/null
+++ b/modules/nf-core/seqkit/rmdup/tests/tags.yml
@@ -0,0 +1,2 @@
+seqkit/rmdup:
+ - "modules/nf-core/seqkit/rmdup/**"
diff --git a/modules/nf-core/seqkit/seq/environment.yml b/modules/nf-core/seqkit/seq/environment.yml
index 9019d269..74e0dd76 100644
--- a/modules/nf-core/seqkit/seq/environment.yml
+++ b/modules/nf-core/seqkit/seq/environment.yml
@@ -6,4 +6,4 @@ channels:
- bioconda
- defaults
dependencies:
- - "bioconda::seqkit=2.6.1"
+ - "bioconda::seqkit=2.8.1"
diff --git a/modules/nf-core/seqkit/seq/main.nf b/modules/nf-core/seqkit/seq/main.nf
index 7fc742b3..d7d38fc8 100644
--- a/modules/nf-core/seqkit/seq/main.nf
+++ b/modules/nf-core/seqkit/seq/main.nf
@@ -5,8 +5,8 @@ process SEQKIT_SEQ {
conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
- 'https://depot.galaxyproject.org/singularity/seqkit:2.6.1--h9ee0642_0':
- 'biocontainers/seqkit:2.6.1--h9ee0642_0' }"
+ 'https://depot.galaxyproject.org/singularity/seqkit:2.8.1--h9ee0642_0':
+ 'biocontainers/seqkit:2.8.1--h9ee0642_0' }"
input:
tuple val(meta), path(fastx)
diff --git a/modules/nf-core/seqkit/seq/tests/main.nf.test b/modules/nf-core/seqkit/seq/tests/main.nf.test
index aa9c283e..3ed23945 100644
--- a/modules/nf-core/seqkit/seq/tests/main.nf.test
+++ b/modules/nf-core/seqkit/seq/tests/main.nf.test
@@ -26,7 +26,6 @@ nextflow_process {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() },
- { assert snapshot(process.out.versions).match("versions") }
)
}
@@ -48,7 +47,6 @@ nextflow_process {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() },
- { assert snapshot(process.out.versions).match("versions") }
)
}
@@ -70,7 +68,6 @@ nextflow_process {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() },
- { assert snapshot(process.out.versions).match("versions") }
)
}
@@ -116,7 +113,6 @@ nextflow_process {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() },
- { assert snapshot(process.out.versions).match("versions") }
)
}
diff --git a/modules/nf-core/seqkit/seq/tests/main.nf.test.snap b/modules/nf-core/seqkit/seq/tests/main.nf.test.snap
index 91b4d17a..e6910966 100644
--- a/modules/nf-core/seqkit/seq/tests/main.nf.test.snap
+++ b/modules/nf-core/seqkit/seq/tests/main.nf.test.snap
@@ -11,7 +11,7 @@
]
],
"1": [
- "versions.yml:md5,03e95c09e4faae889c3d516b2d4b9d7f"
+ "versions.yml:md5,34894c4efa5e10a923e78975a3d260dd"
],
"fastx": [
[
@@ -22,19 +22,15 @@
]
],
"versions": [
- "versions.yml:md5,03e95c09e4faae889c3d516b2d4b9d7f"
+ "versions.yml:md5,34894c4efa5e10a923e78975a3d260dd"
]
}
],
- "timestamp": "2023-12-18T10:34:00.37449"
- },
- "versions": {
- "content": [
- [
- "versions.yml:md5,03e95c09e4faae889c3d516b2d4b9d7f"
- ]
- ],
- "timestamp": "2023-12-17T13:56:53.318962"
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-05-08T08:52:18.220051903"
},
"sarscov2-test_1_fastq_gz": {
"content": [
@@ -48,7 +44,7 @@
]
],
"1": [
- "versions.yml:md5,03e95c09e4faae889c3d516b2d4b9d7f"
+ "versions.yml:md5,34894c4efa5e10a923e78975a3d260dd"
],
"fastx": [
[
@@ -59,11 +55,15 @@
]
],
"versions": [
- "versions.yml:md5,03e95c09e4faae889c3d516b2d4b9d7f"
+ "versions.yml:md5,34894c4efa5e10a923e78975a3d260dd"
]
}
],
- "timestamp": "2023-12-18T10:33:53.528342"
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-05-08T08:51:55.607826581"
},
"sarscov2-genome_fasta": {
"content": [
@@ -77,7 +77,7 @@
]
],
"1": [
- "versions.yml:md5,03e95c09e4faae889c3d516b2d4b9d7f"
+ "versions.yml:md5,34894c4efa5e10a923e78975a3d260dd"
],
"fastx": [
[
@@ -88,11 +88,15 @@
]
],
"versions": [
- "versions.yml:md5,03e95c09e4faae889c3d516b2d4b9d7f"
+ "versions.yml:md5,34894c4efa5e10a923e78975a3d260dd"
]
}
],
- "timestamp": "2023-12-18T10:33:44.757686"
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-05-08T08:51:27.717072933"
},
"sarscov2-genome_fasta_gz": {
"content": [
@@ -106,7 +110,7 @@
]
],
"1": [
- "versions.yml:md5,03e95c09e4faae889c3d516b2d4b9d7f"
+ "versions.yml:md5,34894c4efa5e10a923e78975a3d260dd"
],
"fastx": [
[
@@ -117,10 +121,14 @@
]
],
"versions": [
- "versions.yml:md5,03e95c09e4faae889c3d516b2d4b9d7f"
+ "versions.yml:md5,34894c4efa5e10a923e78975a3d260dd"
]
}
],
- "timestamp": "2023-12-18T10:33:49.115171"
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-05-08T08:51:37.917560104"
}
}
\ No newline at end of file
diff --git a/modules/nf-core/seqkit/sort/environment.yml b/modules/nf-core/seqkit/sort/environment.yml
index 820c5707..3a02d156 100644
--- a/modules/nf-core/seqkit/sort/environment.yml
+++ b/modules/nf-core/seqkit/sort/environment.yml
@@ -6,4 +6,4 @@ channels:
- bioconda
- defaults
dependencies:
- - "bioconda::seqkit=2.6.1"
+ - "bioconda::seqkit=2.8.1"
diff --git a/modules/nf-core/seqkit/sort/main.nf b/modules/nf-core/seqkit/sort/main.nf
index 08f86eb1..c12e35ac 100644
--- a/modules/nf-core/seqkit/sort/main.nf
+++ b/modules/nf-core/seqkit/sort/main.nf
@@ -5,8 +5,8 @@ process SEQKIT_SORT {
conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
- 'https://depot.galaxyproject.org/singularity/seqkit:2.6.1--h9ee0642_0':
- 'biocontainers/seqkit:2.6.1--h9ee0642_0' }"
+ 'https://depot.galaxyproject.org/singularity/seqkit:2.8.1--h9ee0642_0':
+ 'biocontainers/seqkit:2.8.1--h9ee0642_0' }"
input:
tuple val(meta), path(fastx)
diff --git a/modules/nf-core/seqkit/sort/tests/main.nf.test b/modules/nf-core/seqkit/sort/tests/main.nf.test
index 0c2f4e2d..e6f143dd 100644
--- a/modules/nf-core/seqkit/sort/tests/main.nf.test
+++ b/modules/nf-core/seqkit/sort/tests/main.nf.test
@@ -26,7 +26,6 @@ nextflow_process {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() },
- { assert snapshot(process.out.versions).match("versions") }
)
}
@@ -48,7 +47,6 @@ nextflow_process {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() },
- { assert snapshot(process.out.versions).match("versions") }
)
}
@@ -70,7 +68,6 @@ nextflow_process {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() },
- { assert snapshot(process.out.versions).match("versions") }
)
}
@@ -116,7 +113,6 @@ nextflow_process {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() },
- { assert snapshot(process.out.versions).match("versions") }
)
}
diff --git a/modules/nf-core/seqkit/sort/tests/main.nf.test.snap b/modules/nf-core/seqkit/sort/tests/main.nf.test.snap
index 94e2cb35..4d3a862b 100644
--- a/modules/nf-core/seqkit/sort/tests/main.nf.test.snap
+++ b/modules/nf-core/seqkit/sort/tests/main.nf.test.snap
@@ -11,7 +11,7 @@
]
],
"1": [
- "versions.yml:md5,ffb1083944323f1b40bd5699cfa8b998"
+ "versions.yml:md5,04ac93fea9e0019536cfe1445ef15472"
],
"fastx": [
[
@@ -22,19 +22,15 @@
]
],
"versions": [
- "versions.yml:md5,ffb1083944323f1b40bd5699cfa8b998"
+ "versions.yml:md5,04ac93fea9e0019536cfe1445ef15472"
]
}
],
- "timestamp": "2023-12-18T10:07:19.28815"
- },
- "versions": {
- "content": [
- [
- "versions.yml:md5,ffb1083944323f1b40bd5699cfa8b998"
- ]
- ],
- "timestamp": "2023-12-18T10:07:15.341516"
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-05-08T10:15:12.840547526"
},
"sarscov2-test_1_fastq_gz": {
"content": [
@@ -48,7 +44,7 @@
]
],
"1": [
- "versions.yml:md5,ffb1083944323f1b40bd5699cfa8b998"
+ "versions.yml:md5,04ac93fea9e0019536cfe1445ef15472"
],
"fastx": [
[
@@ -59,11 +55,15 @@
]
],
"versions": [
- "versions.yml:md5,ffb1083944323f1b40bd5699cfa8b998"
+ "versions.yml:md5,04ac93fea9e0019536cfe1445ef15472"
]
}
],
- "timestamp": "2023-12-18T10:12:25.704893"
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-05-08T10:14:53.190928013"
},
"sarscov2-genome_fasta": {
"content": [
@@ -77,7 +77,7 @@
]
],
"1": [
- "versions.yml:md5,ffb1083944323f1b40bd5699cfa8b998"
+ "versions.yml:md5,04ac93fea9e0019536cfe1445ef15472"
],
"fastx": [
[
@@ -88,11 +88,15 @@
]
],
"versions": [
- "versions.yml:md5,ffb1083944323f1b40bd5699cfa8b998"
+ "versions.yml:md5,04ac93fea9e0019536cfe1445ef15472"
]
}
],
- "timestamp": "2023-12-18T10:07:15.293713"
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-05-08T10:14:29.745582222"
},
"sarscov2-genome_fasta_gz": {
"content": [
@@ -106,7 +110,7 @@
]
],
"1": [
- "versions.yml:md5,ffb1083944323f1b40bd5699cfa8b998"
+ "versions.yml:md5,04ac93fea9e0019536cfe1445ef15472"
],
"fastx": [
[
@@ -117,10 +121,14 @@
]
],
"versions": [
- "versions.yml:md5,ffb1083944323f1b40bd5699cfa8b998"
+ "versions.yml:md5,04ac93fea9e0019536cfe1445ef15472"
]
}
],
- "timestamp": "2023-12-18T10:09:20.306713"
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-05-08T10:14:41.460601093"
}
}
\ No newline at end of file
diff --git a/modules/nf-core/sratools/fasterqdump/environment.yml b/modules/nf-core/sratools/fasterqdump/environment.yml
new file mode 100644
index 00000000..4011b691
--- /dev/null
+++ b/modules/nf-core/sratools/fasterqdump/environment.yml
@@ -0,0 +1,8 @@
+name: sratools_fasterqdump
+channels:
+ - conda-forge
+ - bioconda
+ - defaults
+dependencies:
+ - bioconda::sra-tools=3.0.8
+ - conda-forge::pigz=2.6
diff --git a/modules/nf-core/sratools/fasterqdump/main.nf b/modules/nf-core/sratools/fasterqdump/main.nf
new file mode 100644
index 00000000..d5b65bab
--- /dev/null
+++ b/modules/nf-core/sratools/fasterqdump/main.nf
@@ -0,0 +1,103 @@
+process SRATOOLS_FASTERQDUMP {
+ tag "$meta.id"
+ label 'process_medium'
+
+ conda "${moduleDir}/environment.yml"
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'https://depot.galaxyproject.org/singularity/mulled-v2-5f89fe0cd045cb1d615630b9261a1d17943a9b6a:2f4a4c900edd6801ff0068c2b3048b4459d119eb-0' :
+ 'biocontainers/mulled-v2-5f89fe0cd045cb1d615630b9261a1d17943a9b6a:2f4a4c900edd6801ff0068c2b3048b4459d119eb-0' }"
+
+ input:
+ tuple val(meta), path(sra)
+ path ncbi_settings
+ path certificate
+
+ output:
+ tuple val(meta), path('*.fastq.gz'), emit: reads
+ path "versions.yml" , emit: versions
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ script:
+ def args = task.ext.args ?: ''
+ def args2 = task.ext.args2 ?: ''
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ def outfile = "${prefix}.fastq"
+ def exclude_third = meta.single_end ? '' : "mv $outfile $prefix || echo 'No third file'"
+ // Excludes the "${prefix}.fastq" file from output `reads` channel for paired end cases and
+ // avoids the '.' in the path bug: https://github.com/ncbi/sra-tools/issues/865
+ def key_file = ''
+ if (certificate.toString().endsWith('.jwt')) {
+ key_file += " --perm ${certificate}"
+ } else if (certificate.toString().endsWith('.ngc')) {
+ key_file += " --ngc ${certificate}"
+ }
+ """
+ export NCBI_SETTINGS="\$PWD/${ncbi_settings}"
+
+ fasterq-dump \\
+ $args \\
+ --threads $task.cpus \\
+ --outfile $outfile \\
+ ${key_file} \\
+ ${sra}
+
+ $exclude_third
+
+ pigz \\
+ $args2 \\
+ --no-name \\
+ --processes $task.cpus \\
+ *.fastq
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ sratools: \$(fasterq-dump --version 2>&1 | grep -Eo '[0-9.]+')
+ pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' )
+ END_VERSIONS
+ """
+
+ stub:
+ def args = task.ext.args ?: ''
+ def args2 = task.ext.args2 ?: ''
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ def outfile = "${prefix}.fastq"
+ def exclude_third = meta.single_end ? '' : "mv $outfile $prefix || echo 'No third file'"
+ // Excludes the "${prefix}.fastq" file from output `reads` channel for paired end cases and
+ // avoids the '.' in the path bug: https://github.com/ncbi/sra-tools/issues/865
+ def key_file = ''
+ if (certificate.toString().endsWith('.jwt')) {
+ key_file += " --perm ${certificate}"
+ } else if (certificate.toString().endsWith('.ngc')) {
+ key_file += " --ngc ${certificate}"
+ }
+ def touch_outfiles = meta.single_end ? "${prefix}.fastq" : "${prefix}_1.fastq ${prefix}_2.fastq"
+ """
+ touch $touch_outfiles
+
+ export NCBI_SETTINGS="\$PWD/${ncbi_settings}"
+
+ echo \\
+ "fasterq-dump \\
+ $args \\
+ --threads $task.cpus \\
+ --outfile $outfile \\
+ ${key_file} \\
+ ${sra}"
+
+ $exclude_third
+
+ pigz \\
+ $args2 \\
+ --no-name \\
+ --processes $task.cpus \\
+ *.fastq
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ sratools: \$(fasterq-dump --version 2>&1 | grep -Eo '[0-9.]+')
+ pigz: \$( pigz --version 2>&1 | sed 's/pigz //g' )
+ END_VERSIONS
+ """
+}
diff --git a/modules/nf-core/sratools/fasterqdump/meta.yml b/modules/nf-core/sratools/fasterqdump/meta.yml
new file mode 100644
index 00000000..6a2151a8
--- /dev/null
+++ b/modules/nf-core/sratools/fasterqdump/meta.yml
@@ -0,0 +1,54 @@
+name: sratools_fasterqdump
+description: Extract sequencing reads in FASTQ format from a given NCBI Sequence Read Archive (SRA).
+keywords:
+ - sequencing
+ - FASTQ
+ - dump
+tools:
+ - sratools:
+ description: SRA Toolkit and SDK from NCBI
+ homepage: https://github.com/ncbi/sra-tools
+ documentation: https://github.com/ncbi/sra-tools/wiki
+ tool_dev_url: https://github.com/ncbi/sra-tools
+ licence: ["Public Domain"]
+input:
+ - meta:
+ type: map
+ description: >
+ Groovy Map containing sample information e.g. [ id:'test', single_end:false ]
+
+ - sra:
+ type: directory
+ description: Directory containing ETL data for the given SRA.
+ pattern: "*/*.sra"
+ - ncbi_settings:
+ type: file
+ description: >
+ An NCBI user settings file.
+
+ pattern: "*.mkfg"
+ - certificate:
+ type: file
+ description: >
+ Path to a JWT cart file used to access protected dbGAP data on SRA using the sra-toolkit
+
+ pattern: "*.cart"
+output:
+ - meta:
+ type: map
+ description: >
+ Groovy Map containing sample information e.g. [ id:'test', single_end:false ]
+
+ - versions:
+ type: file
+ description: File containing software versions
+ pattern: "versions.yml"
+ - reads:
+ type: file
+ description: Extracted FASTQ file or files if the sequencing reads are paired-end.
+ pattern: "*.fastq.gz"
+authors:
+ - "@Midnighter"
+maintainers:
+ - "@Midnighter"
+ - "@gallvp"
diff --git a/modules/nf-core/sratools/fasterqdump/tests/main.nf.test b/modules/nf-core/sratools/fasterqdump/tests/main.nf.test
new file mode 100644
index 00000000..6996cd18
--- /dev/null
+++ b/modules/nf-core/sratools/fasterqdump/tests/main.nf.test
@@ -0,0 +1,122 @@
+nextflow_process {
+ name "Test Process SRATOOLS_FASTERQDUMP"
+ script "../main.nf"
+ process "SRATOOLS_FASTERQDUMP"
+ tag "modules"
+ tag "modules_nfcore"
+ tag "untar"
+ tag "sratools"
+ tag "sratools/fasterqdump"
+
+ test("Single-end") {
+
+ setup {
+ run("UNTAR") {
+ script "modules/nf-core/untar/main.nf"
+ process {
+ """
+ input[0] = Channel.of([ [], file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/sra/SRR13255544.tar.gz', checkIfExists: true) ])
+ """
+ }
+ }
+ }
+
+ when {
+ process {
+ """
+ input[0] = UNTAR.out.untar.map{ meta, files -> [ [ id:'test_single_end', single_end:true ], files]}
+ input[1] = file(params.modules_testdata_base_path + 'generic/config/ncbi_user_settings.mkfg', checkIfExists: true)
+ input[2] = []
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+ }
+
+ test("Paired-end") {
+
+ setup {
+ run("UNTAR") {
+ script "modules/nf-core/untar/main.nf"
+ process {
+ """
+ input[0] = Channel.of([ [], file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/sra/SRR11140744.tar.gz', checkIfExists: true) ])
+ """
+ }
+ }
+ }
+
+ when {
+ process {
+ """
+ input[0] = UNTAR.out.untar.map{ meta, files -> [ [ id:'test_paired_end', single_end:false ], files]}
+ input[1] = file(params.modules_testdata_base_path + 'generic/config/ncbi_user_settings.mkfg', checkIfExists: true)
+ input[2] = []
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+ }
+
+ test("Single-end-stub") {
+
+ options '-stub'
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test_single_end', single_end:true ],
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/sra/SRR13255544.tar.gz', checkIfExists: true)
+ ]
+ input[1] = file(params.modules_testdata_base_path + 'generic/config/ncbi_user_settings.mkfg', checkIfExists: true)
+ input[2] = []
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+ }
+
+ test("Paired-end-stub") {
+
+ options '-stub'
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test_paired_end', single_end:false ],
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/sra/SRR11140744.tar.gz', checkIfExists: true)
+ ]
+ input[1] = file(params.modules_testdata_base_path + 'generic/config/ncbi_user_settings.mkfg', checkIfExists: true)
+ input[2] = []
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+ }
+}
diff --git a/modules/nf-core/sratools/fasterqdump/tests/main.nf.test.snap b/modules/nf-core/sratools/fasterqdump/tests/main.nf.test.snap
new file mode 100644
index 00000000..5d0c3e7c
--- /dev/null
+++ b/modules/nf-core/sratools/fasterqdump/tests/main.nf.test.snap
@@ -0,0 +1,154 @@
+{
+ "Single-end": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test_single_end",
+ "single_end": true
+ },
+ "test_single_end.fastq.gz:md5,674d78c1cc3c1308d6d39d6369a42887"
+ ]
+ ],
+ "1": [
+ "versions.yml:md5,a3d61a9761e1606ef8459f0b68821d7a"
+ ],
+ "reads": [
+ [
+ {
+ "id": "test_single_end",
+ "single_end": true
+ },
+ "test_single_end.fastq.gz:md5,674d78c1cc3c1308d6d39d6369a42887"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,a3d61a9761e1606ef8459f0b68821d7a"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-05-17T22:01:43.486256"
+ },
+ "Single-end-stub": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test_single_end",
+ "single_end": true
+ },
+ "test_single_end.fastq.gz:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "1": [
+ "versions.yml:md5,a3d61a9761e1606ef8459f0b68821d7a"
+ ],
+ "reads": [
+ [
+ {
+ "id": "test_single_end",
+ "single_end": true
+ },
+ "test_single_end.fastq.gz:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,a3d61a9761e1606ef8459f0b68821d7a"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-05-17T21:40:42.335786"
+ },
+ "Paired-end": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test_paired_end",
+ "single_end": false
+ },
+ [
+ "test_paired_end_1.fastq.gz:md5,8573015c91d099b6e30789f8bab2f43c",
+ "test_paired_end_2.fastq.gz:md5,37e6f719a022dc3c9994c80fbc20c311"
+ ]
+ ]
+ ],
+ "1": [
+ "versions.yml:md5,a3d61a9761e1606ef8459f0b68821d7a"
+ ],
+ "reads": [
+ [
+ {
+ "id": "test_paired_end",
+ "single_end": false
+ },
+ [
+ "test_paired_end_1.fastq.gz:md5,8573015c91d099b6e30789f8bab2f43c",
+ "test_paired_end_2.fastq.gz:md5,37e6f719a022dc3c9994c80fbc20c311"
+ ]
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,a3d61a9761e1606ef8459f0b68821d7a"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-05-17T22:09:55.6396"
+ },
+ "Paired-end-stub": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test_paired_end",
+ "single_end": false
+ },
+ [
+ "test_paired_end_1.fastq.gz:md5,d41d8cd98f00b204e9800998ecf8427e",
+ "test_paired_end_2.fastq.gz:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ]
+ ],
+ "1": [
+ "versions.yml:md5,a3d61a9761e1606ef8459f0b68821d7a"
+ ],
+ "reads": [
+ [
+ {
+ "id": "test_paired_end",
+ "single_end": false
+ },
+ [
+ "test_paired_end_1.fastq.gz:md5,d41d8cd98f00b204e9800998ecf8427e",
+ "test_paired_end_2.fastq.gz:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,a3d61a9761e1606ef8459f0b68821d7a"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-05-17T20:35:44.782058"
+ }
+}
\ No newline at end of file
diff --git a/modules/nf-core/sratools/fasterqdump/tests/tags.yml b/modules/nf-core/sratools/fasterqdump/tests/tags.yml
new file mode 100644
index 00000000..5d1ddcb3
--- /dev/null
+++ b/modules/nf-core/sratools/fasterqdump/tests/tags.yml
@@ -0,0 +1,2 @@
+sratools/fasterqdump:
+ - modules/nf-core/sratools/fasterqdump/**
diff --git a/modules/nf-core/sratools/prefetch/environment.yml b/modules/nf-core/sratools/prefetch/environment.yml
new file mode 100644
index 00000000..0abad336
--- /dev/null
+++ b/modules/nf-core/sratools/prefetch/environment.yml
@@ -0,0 +1,8 @@
+name: sratools_prefetch
+channels:
+ - conda-forge
+ - bioconda
+ - defaults
+dependencies:
+ - bioconda::sra-tools=3.1.0
+ - conda-forge::curl=8.5.0
diff --git a/modules/nf-core/sratools/prefetch/main.nf b/modules/nf-core/sratools/prefetch/main.nf
new file mode 100644
index 00000000..46e7cb3f
--- /dev/null
+++ b/modules/nf-core/sratools/prefetch/main.nf
@@ -0,0 +1,47 @@
+process SRATOOLS_PREFETCH {
+ tag "$id"
+ label 'process_low'
+
+ conda "${moduleDir}/environment.yml"
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'https://depot.galaxyproject.org/singularity/sra-tools:3.1.0--h9f5acd7_0' :
+ 'biocontainers/sra-tools:3.1.0--h9f5acd7_0' }"
+
+ input:
+ tuple val(meta), val(id)
+ path ncbi_settings
+ path certificate
+
+ output:
+ tuple val(meta), path(id), emit: sra
+ path 'versions.yml' , emit: versions
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ shell:
+ args = task.ext.args ?: ''
+ args2 = task.ext.args2 ?: '5 1 100' //
+ if (certificate) {
+ if (certificate.toString().endsWith('.jwt')) {
+ args += " --perm ${certificate}"
+ }
+ else if (certificate.toString().endsWith('.ngc')) {
+ args += " --ngc ${certificate}"
+ }
+ }
+
+ template 'retry_with_backoff.sh'
+
+ stub:
+ """
+ mkdir $id
+ touch $id/${id}.sra
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ sratools: \$(prefetch --version 2>&1 | grep -Eo '[0-9.]+')
+ curl: \$(curl --version | head -n 1 | sed 's/^curl //; s/ .*\$//')
+ END_VERSIONS
+ """
+}
diff --git a/modules/nf-core/sratools/prefetch/meta.yml b/modules/nf-core/sratools/prefetch/meta.yml
new file mode 100644
index 00000000..7ed42d49
--- /dev/null
+++ b/modules/nf-core/sratools/prefetch/meta.yml
@@ -0,0 +1,57 @@
+name: sratools_prefetch
+description: Download sequencing data from the NCBI Sequence Read Archive (SRA).
+keywords:
+ - sequencing
+ - fastq
+ - prefetch
+tools:
+ - sratools:
+ description: SRA Toolkit and SDK from NCBI
+ homepage: https://github.com/ncbi/sra-tools
+ documentation: https://github.com/ncbi/sra-tools/wiki
+ tool_dev_url: https://github.com/ncbi/sra-tools
+ licence: ["Public Domain"]
+input:
+ - meta:
+ type: map
+ description: >
+ Groovy Map containing sample information e.g. [ id:'test', single_end:false ]
+
+ - id:
+ type: string
+ description: >
+ A string denoting an SRA id.
+
+ - ncbi_settings:
+ type: file
+ description: >
+ An NCBI user settings file.
+
+ pattern: "*.mkfg"
+ - certificate:
+ type: file
+ description: >
+ Path to a JWT cart file used to access protected dbGAP data on SRA using the sra-toolkit
+
+ pattern: "*.cart"
+output:
+ - meta:
+ type: map
+ description: >
+ Groovy Map containing sample information e.g. [ id:'test', single_end:false ]
+
+ - sra:
+ type: directory
+ description: >
+ Directory containing the ETL data for the given SRA id.
+
+ pattern: "*/*.sra"
+ - versions:
+ type: file
+ description: File containing software versions
+ pattern: "versions.yml"
+authors:
+ - "@Midnighter"
+maintainers:
+ - "@Midnighter"
+ - "@gallvp"
diff --git a/modules/nf-core/sratools/prefetch/templates/retry_with_backoff.sh b/modules/nf-core/sratools/prefetch/templates/retry_with_backoff.sh
new file mode 100755
index 00000000..bfee6070
--- /dev/null
+++ b/modules/nf-core/sratools/prefetch/templates/retry_with_backoff.sh
@@ -0,0 +1,65 @@
+#!/usr/bin/env bash
+
+set -u
+
+retry_with_backoff() {
+ local max_attempts=${1}
+ local delay=${2}
+ local max_time=${3}
+ local attempt=1
+ local output=
+ local status=
+
+ # Remove the first three arguments to this function in order to access
+ # the 'real' command with `${@}`.
+ shift 3
+
+ while [ ${attempt} -le ${max_attempts} ]; do
+ output=$("${@}")
+ status=${?}
+
+ if [ ${status} -eq 0 ]; then
+ break
+ fi
+
+ if [ ${attempt} -lt ${max_attempts} ]; then
+ echo "Failed attempt ${attempt} of ${max_attempts}. Retrying in ${delay} s." >&2
+ sleep ${delay}
+ elif [ ${attempt} -eq ${max_attempts} ]; then
+ echo "Failed after ${attempt} attempts." >&2
+ return ${status}
+ fi
+
+ attempt=$(( ${attempt} + 1 ))
+ delay=$(( ${delay} * 2 ))
+ if [ ${delay} -ge ${max_time} ]; then
+ delay=${max_time}
+ fi
+ done
+
+ echo "${output}"
+}
+
+export NCBI_SETTINGS="$PWD/!{ncbi_settings}"
+
+retry_with_backoff !{args2} \
+ prefetch \
+ !{args} \
+ !{id}
+
+# check file integrity using vdb-validate or (when archive contains no checksums) md5sum
+vdb-validate !{id} > vdb-validate_result.txt 2>&1 || exit 1
+if grep -q "checksums missing" vdb-validate_result.txt; then
+ VALID_MD5SUMS=$(curl --silent --fail --location --retry 3 --retry-delay 60 'https://locate.ncbi.nlm.nih.gov/sdl/2/retrieve?filetype=run&acc=!{id}')
+ LOCAL_MD5SUMS=$(md5sum !{id}/* | cut -f1 -d' ')
+ if ! grep -q -F -f <(echo "$LOCAL_MD5SUMS") <(echo "$VALID_MD5SUMS"); then
+ echo "MD5 sum check failed" 1>&2
+ exit 1
+ fi
+fi
+
+cat <<-END_VERSIONS > versions.yml
+"!{task.process}":
+ sratools: $(prefetch --version 2>&1 | grep -Eo '[0-9.]+')
+ curl: $(curl --version | head -n 1 | sed 's/^curl //; s/ .*$//')
+END_VERSIONS
diff --git a/modules/nf-core/sratools/prefetch/tests/main.nf.test b/modules/nf-core/sratools/prefetch/tests/main.nf.test
new file mode 100644
index 00000000..92034d40
--- /dev/null
+++ b/modules/nf-core/sratools/prefetch/tests/main.nf.test
@@ -0,0 +1,71 @@
+nextflow_process {
+ name "Test Process SRATOOLS_PREFETCH"
+ script "../main.nf"
+ process "SRATOOLS_PREFETCH"
+ tag "modules"
+ tag "modules_nfcore"
+ tag "sratools"
+ tag "sratools/prefetch"
+
+ test("sratools/prefetch") {
+
+ when {
+ process {
+ """
+ input[0] = Channel.of([ [ id:'test', single_end:false ], 'DRR000774' ])
+ input[1] = file(params.modules_testdata_base_path + 'generic/config/ncbi_user_settings.mkfg', checkIfExists: true)
+ input[2] = []
+ """
+ }
+ }
+
+ then {
+ assertAll (
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+ }
+
+ test("sratools/prefetch with sralite") {
+
+ when {
+ process {
+ """
+ input[0] = Channel.of([ [ id:'test', single_end:false ], 'SRR1170046' ])
+ input[1] = file(params.modules_testdata_base_path + 'generic/config/ncbi_user_settings.mkfg', checkIfExists: true)
+ input[2] = []
+ """
+ }
+ }
+
+ then {
+ assertAll (
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+ }
+
+ test("sratools/prefetch/stub") {
+
+ options '-stub'
+
+ when {
+ process {
+ """
+ input[0] = Channel.of([ [ id:'test', single_end:false ], 'DRR000774' ])
+ input[1] = file(params.modules_testdata_base_path + 'generic/config/ncbi_user_settings.mkfg', checkIfExists: true)
+ input[2] = []
+ """
+ }
+ }
+
+ then {
+ assertAll (
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+ }
+}
diff --git a/modules/nf-core/sratools/prefetch/tests/main.nf.test.snap b/modules/nf-core/sratools/prefetch/tests/main.nf.test.snap
new file mode 100644
index 00000000..82a1969c
--- /dev/null
+++ b/modules/nf-core/sratools/prefetch/tests/main.nf.test.snap
@@ -0,0 +1,119 @@
+{
+ "sratools/prefetch/stub": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ [
+ "DRR000774.sra:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ]
+ ],
+ "1": [
+ "versions.yml:md5,83d1b23f5ff5b2ad1b96d17d7d7594ee"
+ ],
+ "sra": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ [
+ "DRR000774.sra:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,83d1b23f5ff5b2ad1b96d17d7d7594ee"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-05-17T20:07:31.627115"
+ },
+ "sratools/prefetch with sralite": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ [
+ "SRR1170046.sralite:md5,7acfce556ca0951aff49d780899c105b"
+ ]
+ ]
+ ],
+ "1": [
+ "versions.yml:md5,83d1b23f5ff5b2ad1b96d17d7d7594ee"
+ ],
+ "sra": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ [
+ "SRR1170046.sralite:md5,7acfce556ca0951aff49d780899c105b"
+ ]
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,83d1b23f5ff5b2ad1b96d17d7d7594ee"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-02-28T11:49:02.309737"
+ },
+ "sratools/prefetch": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ [
+ "DRR000774.sra:md5,7647dba20c89c0e3d7ad13842f060eb0"
+ ]
+ ]
+ ],
+ "1": [
+ "versions.yml:md5,83d1b23f5ff5b2ad1b96d17d7d7594ee"
+ ],
+ "sra": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ [
+ "DRR000774.sra:md5,7647dba20c89c0e3d7ad13842f060eb0"
+ ]
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,83d1b23f5ff5b2ad1b96d17d7d7594ee"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-02-28T11:48:37.428307"
+ }
+}
\ No newline at end of file
diff --git a/modules/nf-core/sratools/prefetch/tests/tags.yml b/modules/nf-core/sratools/prefetch/tests/tags.yml
new file mode 100644
index 00000000..52110bfd
--- /dev/null
+++ b/modules/nf-core/sratools/prefetch/tests/tags.yml
@@ -0,0 +1,2 @@
+sratools/prefetch:
+ - modules/nf-core/sratools/prefetch/**
diff --git a/modules/nf-core/untar/tests/main.nf.test b/modules/nf-core/untar/tests/main.nf.test
index 679e83c7..2a7c97bf 100644
--- a/modules/nf-core/untar/tests/main.nf.test
+++ b/modules/nf-core/untar/tests/main.nf.test
@@ -3,17 +3,12 @@ nextflow_process {
name "Test Process UNTAR"
script "../main.nf"
process "UNTAR"
-
tag "modules"
tag "modules_nfcore"
tag "untar"
-
test("test_untar") {
when {
- params {
- outdir = "$outputDir"
- }
process {
"""
input[0] = [ [], file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/db/kraken2.tar.gz', checkIfExists: true) ]
@@ -33,9 +28,6 @@ nextflow_process {
test("test_untar_onlyfiles") {
when {
- params {
- outdir = "$outputDir"
- }
process {
"""
input[0] = [ [], file(params.modules_testdata_base_path + 'generic/tar/hello.tar.gz', checkIfExists: true) ]
diff --git a/modules/nf-core/untar/tests/main.nf.test.snap b/modules/nf-core/untar/tests/main.nf.test.snap
index ace42576..64550292 100644
--- a/modules/nf-core/untar/tests/main.nf.test.snap
+++ b/modules/nf-core/untar/tests/main.nf.test.snap
@@ -12,7 +12,11 @@
]
]
],
- "timestamp": "2023-10-18T11:56:46.878844"
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-02-28T11:49:41.320643"
},
"test_untar": {
"content": [
@@ -29,6 +33,10 @@
]
]
],
- "timestamp": "2023-10-18T11:56:08.16574"
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-02-28T11:49:33.795172"
}
}
\ No newline at end of file
diff --git a/modules/pfr/busco/busco/environment.yml b/modules/pfr/busco/busco/environment.yml
new file mode 100644
index 00000000..06a5d930
--- /dev/null
+++ b/modules/pfr/busco/busco/environment.yml
@@ -0,0 +1,7 @@
+name: busco_busco
+channels:
+ - conda-forge
+ - bioconda
+ - defaults
+dependencies:
+ - bioconda::busco=5.7.1
diff --git a/modules/pfr/busco/busco/main.nf b/modules/pfr/busco/busco/main.nf
new file mode 100644
index 00000000..f7c1a662
--- /dev/null
+++ b/modules/pfr/busco/busco/main.nf
@@ -0,0 +1,107 @@
+process BUSCO_BUSCO {
+ tag "$meta.id"
+ label 'process_medium'
+
+ conda "${moduleDir}/environment.yml"
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'https://depot.galaxyproject.org/singularity/busco:5.7.1--pyhdfd78af_0':
+ 'biocontainers/busco:5.7.1--pyhdfd78af_0' }"
+
+ input:
+ tuple val(meta), path(fasta, stageAs:'tmp_input/*')
+ val mode // Required: One of genome, proteins, or transcriptome
+ val lineage // Required: lineage to check against, "auto" enables --auto-lineage instead
+ path busco_lineages_path // Recommended: path to busco lineages - downloads if not set
+ path config_file // Optional: busco configuration file
+
+ output:
+ tuple val(meta), path("*-busco.batch_summary.txt") , emit: batch_summary
+ tuple val(meta), path("short_summary.*.txt") , emit: short_summaries_txt , optional: true
+ tuple val(meta), path("short_summary.*.json") , emit: short_summaries_json , optional: true
+ tuple val(meta), path("*-busco/*/run_*/full_table.tsv") , emit: full_table , optional: true
+ tuple val(meta), path("*-busco/*/run_*/missing_busco_list.tsv") , emit: missing_busco_list , optional: true
+ tuple val(meta), path("*-busco/*/run_*/single_copy_proteins.faa") , emit: single_copy_proteins , optional: true
+ tuple val(meta), path("*-busco/*/run_*/busco_sequences") , emit: seq_dir
+ tuple val(meta), path("*-busco/*/translated_proteins") , emit: translated_dir , optional: true
+ tuple val(meta), path("*-busco") , emit: busco_dir
+ path "versions.yml" , emit: versions
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ script:
+ if ( mode !in [ 'genome', 'proteins', 'transcriptome' ] ) {
+ error "Mode must be one of 'genome', 'proteins', or 'transcriptome'."
+ }
+ def args = task.ext.args ?: ''
+ def prefix = task.ext.prefix ?: "${meta.id}-${lineage}"
+ def busco_config = config_file ? "--config $config_file" : ''
+ def busco_lineage = lineage.equals('auto') ? '--auto-lineage' : "--lineage_dataset ${lineage}"
+ def busco_lineage_dir = busco_lineages_path ? "--download_path ${busco_lineages_path}" : ''
+ """
+ # Nextflow changes the container --entrypoint to /bin/bash (container default entrypoint: /usr/local/env-execute)
+ # Check for container variable initialisation script and source it.
+ if [ -f "/usr/local/env-activate.sh" ]; then
+ set +u # Otherwise, errors out because of various unbound variables
+ . "/usr/local/env-activate.sh"
+ set -u
+ fi
+
+ # If the augustus config directory is not writable, then copy to writeable area
+ if [ ! -w "\${AUGUSTUS_CONFIG_PATH}" ]; then
+ # Create writable tmp directory for augustus
+ AUG_CONF_DIR=\$( mktemp -d -p \$PWD )
+ cp -r \$AUGUSTUS_CONFIG_PATH/* \$AUG_CONF_DIR
+ export AUGUSTUS_CONFIG_PATH=\$AUG_CONF_DIR
+ echo "New AUGUSTUS_CONFIG_PATH=\${AUGUSTUS_CONFIG_PATH}"
+ fi
+
+ # Ensure the input is uncompressed
+ INPUT_SEQS=input_seqs
+ mkdir "\$INPUT_SEQS"
+ cd "\$INPUT_SEQS"
+ for FASTA in ../tmp_input/*; do
+ if [ "\${FASTA##*.}" == 'gz' ]; then
+ gzip -cdf "\$FASTA" > \$( basename "\$FASTA" .gz )
+ else
+ ln -s "\$FASTA" .
+ fi
+ done
+ cd ..
+
+ busco \\
+ --cpu $task.cpus \\
+ --in "\$INPUT_SEQS" \\
+ --out ${prefix}-busco \\
+ --mode $mode \\
+ $busco_lineage \\
+ $busco_lineage_dir \\
+ $busco_config \\
+ $args
+
+ # clean up
+ rm -rf "\$INPUT_SEQS"
+
+ # Move files to avoid staging/publishing issues
+ mv ${prefix}-busco/batch_summary.txt ${prefix}-busco.batch_summary.txt
+ mv ${prefix}-busco/*/short_summary.*.{json,txt} . || echo "Short summaries were not available: No genes were found."
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ busco: \$( busco --version 2>&1 | sed 's/^BUSCO //' )
+ END_VERSIONS
+ """
+
+ stub:
+ def prefix = task.ext.prefix ?: "${meta.id}-${lineage}"
+ def fasta_name = files(fasta).first().name - '.gz'
+ """
+ touch ${prefix}-busco.batch_summary.txt
+ mkdir -p ${prefix}-busco/$fasta_name/run_${lineage}/busco_sequences
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ busco: \$( busco --version 2>&1 | sed 's/^BUSCO //' )
+ END_VERSIONS
+ """
+}
diff --git a/modules/pfr/busco/busco/meta.yml b/modules/pfr/busco/busco/meta.yml
new file mode 100644
index 00000000..29745d2c
--- /dev/null
+++ b/modules/pfr/busco/busco/meta.yml
@@ -0,0 +1,98 @@
+name: busco_busco
+description: Benchmarking Universal Single Copy Orthologs
+keywords:
+ - quality control
+ - genome
+ - transcriptome
+ - proteome
+tools:
+ - busco:
+ description: BUSCO provides measures for quantitative assessment of genome assembly, gene set, and transcriptome completeness based on evolutionarily informed expectations of gene content from near-universal single-copy orthologs selected from OrthoDB.
+ homepage: https://busco.ezlab.org/
+ documentation: https://busco.ezlab.org/busco_userguide.html
+ tool_dev_url: https://gitlab.com/ezlab/busco
+ doi: "10.1007/978-1-4939-9173-0_14"
+ licence: ["MIT"]
+input:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - fasta:
+ type: file
+ description: Nucleic or amino acid sequence file in FASTA format.
+ pattern: "*.{fasta,fna,fa,fasta.gz,fna.gz,fa.gz}"
+ - mode:
+ type: string
+ description: The mode to run Busco in. One of genome, proteins, or transcriptome
+ pattern: "{genome,proteins,transcriptome}"
+ - lineage:
+ type: string
+ description: The BUSCO lineage to use, or "auto" to automatically select lineage
+ - busco_lineages_path:
+ type: directory
+ description: Path to local BUSCO lineages directory.
+ - config_file:
+ type: file
+ description: Path to BUSCO config file.
+output:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - batch_summary:
+ type: file
+ description: Summary of all sequence files analyzed
+ pattern: "*-busco.batch_summary.txt"
+ - short_summaries_txt:
+ type: file
+ description: Short Busco summary in plain text format
+ pattern: "short_summary.*.txt"
+ - short_summaries_json:
+ type: file
+ description: Short Busco summary in JSON format
+ pattern: "short_summary.*.json"
+ - busco_dir:
+ type: directory
+ description: BUSCO lineage specific output
+ pattern: "*-busco"
+ - full_table:
+ type: file
+ description: Full BUSCO results table
+ pattern: "full_table.tsv"
+ - missing_busco_list:
+ type: file
+ description: List of missing BUSCOs
+ pattern: "missing_busco_list.tsv"
+ - single_copy_proteins:
+ type: file
+ description: Fasta file of single copy proteins (transcriptome mode)
+ pattern: "single_copy_proteins.faa"
+ - seq_dir:
+ type: directory
+ description: BUSCO sequence directory
+ pattern: "busco_sequences"
+ - translated_dir:
+ type: directory
+ description: Six frame translations of each transcript made by the transcriptome mode
+ pattern: "translated_dir"
+ - versions:
+ type: file
+ description: File containing software versions
+ pattern: "versions.yml"
+authors:
+ - "@priyanka-surana"
+ - "@charles-plessy"
+ - "@mahesh-panchal"
+ - "@muffato"
+ - "@jvhagey"
+ - "@gallvp"
+maintainers:
+ - "@priyanka-surana"
+ - "@charles-plessy"
+ - "@mahesh-panchal"
+ - "@muffato"
+ - "@jvhagey"
+ - "@gallvp"
diff --git a/modules/pfr/busco/busco/tests/main.nf.test b/modules/pfr/busco/busco/tests/main.nf.test
new file mode 100644
index 00000000..16b708b2
--- /dev/null
+++ b/modules/pfr/busco/busco/tests/main.nf.test
@@ -0,0 +1,419 @@
+nextflow_process {
+
+ name "Test Process BUSCO_BUSCO"
+ script "../main.nf"
+ process "BUSCO_BUSCO"
+
+ tag "modules"
+ tag "modules_nfcore"
+ tag "busco"
+ tag "busco/busco"
+
+ test("test_busco_genome_single_fasta") {
+
+ config './nextflow.config'
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test' ], // meta map
+ file( params.test_data['bacteroides_fragilis']['genome']['genome_fna_gz'], checkIfExists: true)
+ ]
+ input[1] = 'genome'
+ input[2] = 'bacteria_odb10' // Launch with 'auto' to use --auto-lineage, and specified lineages // 'auto' removed from test due to memory issues
+ input[3] = [] // Download busco lineage
+ input[4] = [] // No config
+ """
+ }
+ }
+
+ then {
+ assert process.success
+
+ with(path(process.out.short_summaries_txt[0][1]).text) {
+ assert contains('BUSCO version')
+ assert contains('The lineage dataset is')
+ assert contains('BUSCO was run in mode')
+ assert contains('Complete BUSCOs')
+ assert contains('Missing BUSCOs')
+ assert contains('Dependencies and versions')
+ }
+
+ with(path(process.out.short_summaries_json[0][1]).text) {
+ assert contains('one_line_summary')
+ assert contains('mode')
+ assert contains('dataset')
+ }
+
+ assert snapshot(
+ process.out.batch_summary[0][1],
+ process.out.full_table[0][1],
+ process.out.missing_busco_list[0][1],
+ process.out.versions[0]
+ ).match()
+
+ with(file(process.out.seq_dir[0][1]).listFiles().collect { it.name }) {
+ assert contains('single_copy_busco_sequences.tar.gz')
+ assert contains('multi_copy_busco_sequences.tar.gz')
+ assert contains('fragmented_busco_sequences.tar.gz')
+ }
+
+ with(path("${process.out.busco_dir[0][1]}/logs/busco.log").text) {
+ assert contains('DEBUG:busco.run_BUSCO')
+ assert contains('Results from dataset')
+ assert contains('how to cite BUSCO')
+ }
+
+ assert process.out.single_copy_proteins == []
+ assert process.out.translated_dir == []
+ }
+ }
+
+ test("test_busco_genome_multi_fasta") {
+
+ config './nextflow.config'
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test' ], // meta map
+ [
+ file( params.test_data['bacteroides_fragilis']['genome']['genome_fna_gz'], checkIfExists: true),
+ file( params.test_data['candidatus_portiera_aleyrodidarum']['genome']['genome_fasta'], checkIfExists: true)
+ ]
+ ]
+ input[1] = 'genome'
+ input[2] = 'bacteria_odb10'
+ input[3] = []
+ input[4] = []
+ """
+ }
+ }
+
+ then {
+ assert process.success
+
+ with(path(process.out.short_summaries_txt[0][1][0]).text) {
+ assert contains('BUSCO version')
+ assert contains('The lineage dataset is')
+ assert contains('BUSCO was run in mode')
+ assert contains('Complete BUSCOs')
+ assert contains('Missing BUSCOs')
+ assert contains('Dependencies and versions')
+ }
+
+ with(path(process.out.short_summaries_txt[0][1][1]).text) {
+ assert contains('BUSCO version')
+ assert contains('The lineage dataset is')
+ assert contains('BUSCO was run in mode')
+ assert contains('Complete BUSCOs')
+ assert contains('Missing BUSCOs')
+ assert contains('Dependencies and versions')
+ }
+
+ with(path(process.out.short_summaries_json[0][1][0]).text) {
+ assert contains('one_line_summary')
+ assert contains('mode')
+ assert contains('dataset')
+ }
+
+ with(path(process.out.short_summaries_json[0][1][1]).text) {
+ assert contains('one_line_summary')
+ assert contains('mode')
+ assert contains('dataset')
+ }
+
+ assert snapshot(
+ process.out.batch_summary[0][1],
+ process.out.full_table[0][1],
+ process.out.missing_busco_list[0][1],
+ process.out.versions[0]
+ ).match()
+
+ with(file(process.out.seq_dir[0][1][0]).listFiles().collect { it.name }) {
+ assert contains('single_copy_busco_sequences.tar.gz')
+ assert contains('multi_copy_busco_sequences.tar.gz')
+ assert contains('fragmented_busco_sequences.tar.gz')
+ }
+
+ with(file(process.out.seq_dir[0][1][1]).listFiles().collect { it.name }) {
+ assert contains('single_copy_busco_sequences.tar.gz')
+ assert contains('multi_copy_busco_sequences.tar.gz')
+ assert contains('fragmented_busco_sequences.tar.gz')
+ }
+
+ with(path("${process.out.busco_dir[0][1]}/logs/busco.log").text) {
+ assert contains('DEBUG:busco.run_BUSCO')
+ assert contains('Results from dataset')
+ assert contains('how to cite BUSCO')
+ }
+
+ assert process.out.single_copy_proteins == []
+ assert process.out.translated_dir == []
+ }
+
+ }
+
+ test("test_busco_eukaryote_metaeuk") {
+
+ config './nextflow.metaeuk.config'
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test' ], // meta map
+ file( params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
+ ]
+ input[1] = 'genome'
+ input[2] = 'eukaryota_odb10'
+ input[3] = []
+ input[4] = []
+ """
+ }
+ }
+
+ then {
+ assert process.success
+
+ with(path(process.out.short_summaries_txt[0][1]).text) {
+ assert contains('BUSCO version')
+ assert contains('The lineage dataset is')
+ assert contains('BUSCO was run in mode')
+ assert contains('Complete BUSCOs')
+ assert contains('Missing BUSCOs')
+ assert contains('Dependencies and versions')
+ }
+
+ with(path(process.out.short_summaries_json[0][1]).text) {
+ assert contains('one_line_summary')
+ assert contains('mode')
+ assert contains('dataset')
+ }
+
+ assert snapshot(
+ process.out.batch_summary[0][1],
+ process.out.full_table[0][1],
+ process.out.missing_busco_list[0][1],
+ process.out.versions[0]
+ ).match()
+
+ with(file(process.out.seq_dir[0][1]).listFiles().collect { it.name }) {
+ assert contains('single_copy_busco_sequences.tar.gz')
+ assert contains('multi_copy_busco_sequences.tar.gz')
+ assert contains('fragmented_busco_sequences.tar.gz')
+ }
+
+ with(path("${process.out.busco_dir[0][1]}/logs/busco.log").text) {
+ assert contains('DEBUG:busco.run_BUSCO')
+ assert contains("'use_augustus', 'False'")
+ assert contains("'use_metaeuk', 'True'") // METAEUK
+ assert contains('Results from dataset')
+ assert contains('how to cite BUSCO')
+
+ }
+
+ assert process.out.single_copy_proteins == []
+ assert process.out.translated_dir == []
+ }
+
+ }
+
+ test("test_busco_eukaryote_augustus") {
+
+ config './nextflow.augustus.config'
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test' ], // meta map
+ file( params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
+ ]
+ input[1] = 'genome'
+ input[2] = 'eukaryota_odb10'
+ input[3] = []
+ input[4] = []
+ """
+ }
+ }
+
+ then {
+ assert process.success
+
+ assert snapshot(
+ process.out.batch_summary[0][1],
+ process.out.versions[0]
+ ).match()
+
+ with(path("${process.out.busco_dir[0][1]}/logs/busco.log").text) {
+ assert contains('DEBUG:busco.run_BUSCO')
+ assert contains("'use_augustus', 'True'")
+ assert contains("'use_metaeuk', 'False'") // AUGUSTUS
+ assert contains('Augustus did not recognize any genes')
+
+ }
+
+ assert process.out.short_summaries_json == []
+ assert process.out.short_summaries_txt == []
+ assert process.out.missing_busco_list == []
+ assert process.out.full_table == []
+ assert process.out.single_copy_proteins == []
+ assert process.out.translated_dir == []
+ }
+
+ }
+
+ test("test_busco_protein") {
+
+ config './nextflow.config'
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test' ], // meta map
+ file( params.test_data['candidatus_portiera_aleyrodidarum']['genome']['proteome_fasta'], checkIfExists: true)
+ ]
+ input[1] = 'proteins'
+ input[2] = 'bacteria_odb10'
+ input[3] = []
+ input[4] = []
+ """
+ }
+ }
+
+ then {
+ assert process.success
+
+ with(path(process.out.short_summaries_txt[0][1]).text) {
+ assert contains('BUSCO version')
+ assert contains('The lineage dataset is')
+ assert contains('BUSCO was run in mode')
+ assert contains('Complete BUSCOs')
+ assert contains('Missing BUSCOs')
+ assert contains('Dependencies and versions')
+ }
+
+ with(path(process.out.short_summaries_json[0][1]).text) {
+ assert contains('one_line_summary')
+ assert contains('mode')
+ assert contains('dataset')
+ }
+
+ assert snapshot(
+ process.out.batch_summary[0][1],
+ process.out.full_table[0][1],
+ process.out.missing_busco_list[0][1],
+ process.out.versions[0]
+ ).match()
+
+ with(file(process.out.seq_dir[0][1]).listFiles().collect { it.name }) {
+ assert contains('single_copy_busco_sequences.tar.gz')
+ assert contains('multi_copy_busco_sequences.tar.gz')
+ assert contains('fragmented_busco_sequences.tar.gz')
+ }
+
+ with(path("${process.out.busco_dir[0][1]}/logs/busco.log").text) {
+ assert contains('DEBUG:busco.run_BUSCO')
+ assert contains('Results from dataset')
+ assert contains('how to cite BUSCO')
+ }
+
+ assert process.out.single_copy_proteins == []
+ assert process.out.translated_dir == []
+ }
+
+ }
+
+ test("test_busco_transcriptome") {
+
+ config './nextflow.config'
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test' ], // meta map
+ file( params.test_data['bacteroides_fragilis']['illumina']['test1_contigs_fa_gz'], checkIfExists: true)
+ ]
+ input[1] = 'transcriptome'
+ input[2] = 'bacteria_odb10'
+ input[3] = []
+ input[4] = []
+ """
+ }
+ }
+
+ then {
+ assert process.success
+
+ with(path(process.out.short_summaries_txt[0][1]).text) {
+ assert contains('BUSCO version')
+ assert contains('The lineage dataset is')
+ assert contains('BUSCO was run in mode')
+ assert contains('Complete BUSCOs')
+ assert contains('Missing BUSCOs')
+ assert contains('Dependencies and versions')
+ }
+
+ with(path(process.out.short_summaries_json[0][1]).text) {
+ assert contains('one_line_summary')
+ assert contains('mode')
+ assert contains('dataset')
+ }
+
+ assert snapshot(
+ process.out.batch_summary[0][1],
+ process.out.full_table[0][1],
+ process.out.missing_busco_list[0][1],
+ process.out.translated_dir[0][1],
+ process.out.single_copy_proteins[0][1],
+ process.out.versions[0]
+ ).match()
+
+ with(file(process.out.seq_dir[0][1]).listFiles().collect { it.name }) {
+ assert contains('single_copy_busco_sequences.tar.gz')
+ assert contains('multi_copy_busco_sequences.tar.gz')
+ assert contains('fragmented_busco_sequences.tar.gz')
+ }
+
+ with(path("${process.out.busco_dir[0][1]}/logs/busco.log").text) {
+ assert contains('DEBUG:busco.run_BUSCO')
+ assert contains('Results from dataset')
+ assert contains('how to cite BUSCO')
+ }
+ }
+
+ }
+
+ test("minimal-stub") {
+
+ options '-stub'
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test' ], // meta map
+ file( params.test_data['bacteroides_fragilis']['genome']['genome_fna_gz'], checkIfExists: true)
+ ]
+ input[1] = 'genome'
+ input[2] = 'bacteria_odb10'
+ input[3] = []
+ input[4] = []
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+ }
+
+}
diff --git a/modules/pfr/busco/busco/tests/main.nf.test.snap b/modules/pfr/busco/busco/tests/main.nf.test.snap
new file mode 100644
index 00000000..1b6411bc
--- /dev/null
+++ b/modules/pfr/busco/busco/tests/main.nf.test.snap
@@ -0,0 +1,230 @@
+{
+ "minimal-stub": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test"
+ },
+ "test-bacteria_odb10-busco.batch_summary.txt:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "1": [
+
+ ],
+ "2": [
+
+ ],
+ "3": [
+
+ ],
+ "4": [
+
+ ],
+ "5": [
+
+ ],
+ "6": [
+ [
+ {
+ "id": "test"
+ },
+ [
+
+ ]
+ ]
+ ],
+ "7": [
+
+ ],
+ "8": [
+ [
+ {
+ "id": "test"
+ },
+ [
+ [
+ [
+ [
+
+ ]
+ ]
+ ]
+ ]
+ ]
+ ],
+ "9": [
+ "versions.yml:md5,3fc94714b95c2dc15399a4229d9dd1d9"
+ ],
+ "batch_summary": [
+ [
+ {
+ "id": "test"
+ },
+ "test-bacteria_odb10-busco.batch_summary.txt:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "busco_dir": [
+ [
+ {
+ "id": "test"
+ },
+ [
+ [
+ [
+ [
+
+ ]
+ ]
+ ]
+ ]
+ ]
+ ],
+ "full_table": [
+
+ ],
+ "missing_busco_list": [
+
+ ],
+ "seq_dir": [
+ [
+ {
+ "id": "test"
+ },
+ [
+
+ ]
+ ]
+ ],
+ "short_summaries_json": [
+
+ ],
+ "short_summaries_txt": [
+
+ ],
+ "single_copy_proteins": [
+
+ ],
+ "translated_dir": [
+
+ ],
+ "versions": [
+ "versions.yml:md5,3fc94714b95c2dc15399a4229d9dd1d9"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-05-03T13:28:04.451297"
+ },
+ "test_busco_eukaryote_augustus": {
+ "content": [
+ "test-eukaryota_odb10-busco.batch_summary.txt:md5,3ea3bdc423a461dae514d816bdc61c89",
+ "versions.yml:md5,3fc94714b95c2dc15399a4229d9dd1d9"
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-05-03T13:26:36.974986"
+ },
+ "test_busco_genome_single_fasta": {
+ "content": [
+ "test-bacteria_odb10-busco.batch_summary.txt:md5,21b3fb771cf36be917cc451540d999be",
+ "full_table.tsv:md5,638fe7590f442c57361554dae330eca1",
+ "missing_busco_list.tsv:md5,1530af4fe7673a6d001349537bcd410a",
+ "versions.yml:md5,3fc94714b95c2dc15399a4229d9dd1d9"
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-05-03T13:22:45.07816"
+ },
+ "test_busco_genome_multi_fasta": {
+ "content": [
+ "test-bacteria_odb10-busco.batch_summary.txt:md5,fcd3c208913e8abda3d6742c43fec5fa",
+ [
+ "full_table.tsv:md5,c657edcc7d0de0175869717551df6e83",
+ "full_table.tsv:md5,638fe7590f442c57361554dae330eca1"
+ ],
+ [
+ "missing_busco_list.tsv:md5,aceb66e347a353cb7fca8e2a725f9112",
+ "missing_busco_list.tsv:md5,1530af4fe7673a6d001349537bcd410a"
+ ],
+ "versions.yml:md5,3fc94714b95c2dc15399a4229d9dd1d9"
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-05-03T13:23:50.255602"
+ },
+ "test_busco_eukaryote_metaeuk": {
+ "content": [
+ "test-eukaryota_odb10-busco.batch_summary.txt:md5,ff6d8277e452a83ce9456bbee666feb6",
+ "full_table.tsv:md5,92b1b1d5cb5ea0e2093d16f00187e8c7",
+ "missing_busco_list.tsv:md5,0352e563de290bf804c708323c35a9e3",
+ "versions.yml:md5,3fc94714b95c2dc15399a4229d9dd1d9"
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-05-03T13:25:38.159041"
+ },
+ "test_busco_transcriptome": {
+ "content": [
+ "test-bacteria_odb10-busco.batch_summary.txt:md5,8734b3f379c4c0928e5dd4ea1873dc64",
+ "full_table.tsv:md5,1b2ce808fdafa744c56b5f781551272d",
+ "missing_busco_list.tsv:md5,a6931b6470262b997b8b99ea0f1d14a4",
+ [
+ "1024388at2.faa:md5,797d603d262a6595a112e25b73e878b0",
+ "1054741at2.faa:md5,cd4b928cba6b19b4437746ba507e7195",
+ "1093223at2.faa:md5,df9549708e5ffcfaee6a74dd70a0e5dc",
+ "1151822at2.faa:md5,12726afc1cdc40c13392e1596e93df3a",
+ "143460at2.faa:md5,d887431fd988a5556a523440f02d9594",
+ "1491686at2.faa:md5,d03362d19979b27306c192f1c74a84e5",
+ "1504821at2.faa:md5,4f5f6e5c57bac0092c1d85ded73d7e67",
+ "1574817at2.faa:md5,1153e55998c2929eacad2aed7d08d248",
+ "1592033at2.faa:md5,bb7a59e5f3a57ba12d10dabf4c77ab57",
+ "1623045at2.faa:md5,8fe38155feb1802beb97ef7714837bf5",
+ "1661836at2.faa:md5,6c6d592c2fbb0d7a4e5e1f47a15644f0",
+ "1674344at2.faa:md5,bb41b44e53565a54cadf0b780532fe08",
+ "1698718at2.faa:md5,f233860000028eb00329aa85236c71e5",
+ "1990650at2.faa:md5,34a2d29c5f8b6253159ddb7a43fa1829",
+ "223233at2.faa:md5,dec6705c7846c989296e73942f953cbc",
+ "402899at2.faa:md5,acc0f271f9a586d2ce1ee41669b22999",
+ "505485at2.faa:md5,aa0391f8fa5d9bd19b30d844d5a99845",
+ "665824at2.faa:md5,47f8ad43b6a6078206feb48c2e552793",
+ "776861at2.faa:md5,f8b90c13f7c6be828dea3bb920195e3d",
+ "874197at2.faa:md5,8d22a35a768debe6f376fc695d233a69",
+ "932854at2.faa:md5,2eff2de1ab83b22f3234a529a44e22bb",
+ "95696at2.faa:md5,247bfd1aef432f7b5456307768e9149c"
+ ],
+ "single_copy_proteins.faa:md5,73e2c5d6a9b0f01f2deea3cc5f21b764",
+ "versions.yml:md5,3fc94714b95c2dc15399a4229d9dd1d9"
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-05-03T13:27:53.992893"
+ },
+ "test_busco_protein": {
+ "content": [
+ "test-bacteria_odb10-busco.batch_summary.txt:md5,f5a782378f9f94a748aa907381fdef91",
+ "full_table.tsv:md5,812ab6a0496fccab774643cf40c4f2a8",
+ "missing_busco_list.tsv:md5,aceb66e347a353cb7fca8e2a725f9112",
+ "versions.yml:md5,3fc94714b95c2dc15399a4229d9dd1d9"
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-05-03T13:27:12.724862"
+ }
+}
\ No newline at end of file
diff --git a/modules/pfr/busco/busco/tests/nextflow.augustus.config b/modules/pfr/busco/busco/tests/nextflow.augustus.config
new file mode 100644
index 00000000..84daa69d
--- /dev/null
+++ b/modules/pfr/busco/busco/tests/nextflow.augustus.config
@@ -0,0 +1,5 @@
+process {
+ withName: 'BUSCO_BUSCO' {
+ ext.args = '--tar --augustus'
+ }
+}
diff --git a/modules/pfr/busco/busco/tests/nextflow.config b/modules/pfr/busco/busco/tests/nextflow.config
new file mode 100644
index 00000000..1ec3fec0
--- /dev/null
+++ b/modules/pfr/busco/busco/tests/nextflow.config
@@ -0,0 +1,5 @@
+process {
+ withName: 'BUSCO_BUSCO' {
+ ext.args = '--tar'
+ }
+}
diff --git a/modules/pfr/busco/busco/tests/nextflow.metaeuk.config b/modules/pfr/busco/busco/tests/nextflow.metaeuk.config
new file mode 100644
index 00000000..c1418445
--- /dev/null
+++ b/modules/pfr/busco/busco/tests/nextflow.metaeuk.config
@@ -0,0 +1,5 @@
+process {
+ withName: 'BUSCO_BUSCO' {
+ ext.args = '--tar --metaeuk'
+ }
+}
diff --git a/modules/pfr/busco/busco/tests/old_test.yml b/modules/pfr/busco/busco/tests/old_test.yml
new file mode 100644
index 00000000..75177f5d
--- /dev/null
+++ b/modules/pfr/busco/busco/tests/old_test.yml
@@ -0,0 +1,624 @@
+- name: busco test_busco_genome_single_fasta
+ command: nextflow run ./tests/modules/nf-core/busco -entry test_busco_genome_single_fasta -c ./tests/config/nextflow.config
+ tags:
+ - busco
+ files:
+ - path: output/busco/short_summary.specific.bacteria_odb10.genome.fna.json
+ contains:
+ - "one_line_summary"
+ - "mode"
+ - "dataset"
+ - path: output/busco/short_summary.specific.bacteria_odb10.genome.fna.txt
+ contains:
+ - "BUSCO version"
+ - "The lineage dataset is"
+ - "BUSCO was run in mode"
+ - "Complete BUSCOs"
+ - "Missing BUSCOs"
+ - "Dependencies and versions"
+ - path: output/busco/test-bacteria_odb10-busco.batch_summary.txt
+ md5sum: bc2440f8a68d7fbf931ff911c1c3fdfa
+ - path: output/busco/test-bacteria_odb10-busco/genome.fna/logs/bbtools_err.log
+ - path: output/busco/test-bacteria_odb10-busco/genome.fna/logs/bbtools_out.log
+ md5sum: 9caf1a1434414c78562eb0bbb9c0e53f
+ - path: output/busco/test-bacteria_odb10-busco/genome.fna/logs/hmmsearch_err.log
+ - path: output/busco/test-bacteria_odb10-busco/genome.fna/logs/hmmsearch_out.log
+ contains:
+ - "# hmmsearch :: search profile(s) against a sequence database"
+ - "# target sequence database:"
+ - "Internal pipeline statistics summary:"
+ - "[ok]"
+ - path: output/busco/test-bacteria_odb10-busco/genome.fna/logs/prodigal_err.log
+ md5sum: 538510cfc7483498210f01e53fe035ad
+ - path: output/busco/test-bacteria_odb10-busco/genome.fna/logs/prodigal_out.log
+ md5sum: 61050b0706addc9498b2088a2d6efa9a
+ - path: output/busco/test-bacteria_odb10-busco/genome.fna/prodigal_output/.checkpoint
+ contains:
+ - "Tool: prodigal"
+ - "Completed"
+ - "jobs"
+ - path: output/busco/test-bacteria_odb10-busco/genome.fna/prodigal_output/predicted_genes/predicted.faa
+ md5sum: 836e9a80d33d8b89168f07ddc13ee991
+ - path: output/busco/test-bacteria_odb10-busco/genome.fna/prodigal_output/predicted_genes/predicted.fna
+ md5sum: 20eeb75f86842e6e136f02bca8b73a9f
+ - path: output/busco/test-bacteria_odb10-busco/genome.fna/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_11.faa
+ md5sum: 836e9a80d33d8b89168f07ddc13ee991
+ - path: output/busco/test-bacteria_odb10-busco/genome.fna/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_11.fna
+ md5sum: 20eeb75f86842e6e136f02bca8b73a9f
+ - path: output/busco/test-bacteria_odb10-busco/genome.fna/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_11_err.log
+ md5sum: 538510cfc7483498210f01e53fe035ad
+ - path: output/busco/test-bacteria_odb10-busco/genome.fna/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_11_out.log
+ md5sum: 61050b0706addc9498b2088a2d6efa9a
+ - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/.bbtools_output/.checkpoint
+ contains:
+ - "Tool: bbtools"
+ - "Completed"
+ - "jobs"
+ - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/busco_sequences/fragmented_busco_sequences.tar.gz
+ - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/busco_sequences/multi_copy_busco_sequences.tar.gz
+ - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/busco_sequences/single_copy_busco_sequences.tar.gz
+ - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/full_table.tsv
+ md5sum: c56edab1dc1522e993c25ae2b730799f
+ - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/hmmer_output.tar.gz
+ - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/missing_busco_list.tsv
+ md5sum: b533ef30270f27160acce85a22d01bf5
+ - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/short_summary.json
+ contains:
+ - "one_line_summary"
+ - "mode"
+ - "lineage_dataset"
+ - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/short_summary.txt
+ contains:
+ - "# BUSCO version is:"
+ - "Results:"
+ - "busco:"
+ - path: output/busco/test-bacteria_odb10-busco/logs/busco.log
+ contains:
+ - "DEBUG:busco.run_BUSCO"
+ - "Results from dataset"
+ - "how to cite BUSCO"
+ - path: output/busco/versions.yml
+
+- name: busco test_busco_genome_multi_fasta
+ command: nextflow run ./tests/modules/nf-core/busco -entry test_busco_genome_multi_fasta -c ./tests/config/nextflow.config
+ tags:
+ - busco
+ files:
+ - path: output/busco/short_summary.specific.bacteria_odb10.genome.fasta.json
+ contains:
+ - "one_line_summary"
+ - "mode"
+ - "dataset"
+ - path: output/busco/short_summary.specific.bacteria_odb10.genome.fasta.txt
+ contains:
+ - "BUSCO version"
+ - "The lineage dataset is"
+ - "BUSCO was run in mode"
+ - "Complete BUSCOs"
+ - "Missing BUSCOs"
+ - "Dependencies and versions"
+ - path: output/busco/short_summary.specific.bacteria_odb10.genome.fna.json
+ contains:
+ - "one_line_summary"
+ - "mode"
+ - "dataset"
+ - path: output/busco/short_summary.specific.bacteria_odb10.genome.fna.txt
+ contains:
+ - "BUSCO version"
+ - "The lineage dataset is"
+ - "BUSCO was run in mode"
+ - "Complete BUSCOs"
+ - "Missing BUSCOs"
+ - "Dependencies and versions"
+ - path: output/busco/test-bacteria_odb10-busco.batch_summary.txt
+ md5sum: 8c64c1a28b086ef2ee444f99cbed5f7d
+ - path: output/busco/test-bacteria_odb10-busco/genome.fasta/logs/bbtools_err.log
+ - path: output/busco/test-bacteria_odb10-busco/genome.fasta/logs/bbtools_out.log
+ md5sum: 8f047bdb33264d22a83920bc2c63f29a
+ - path: output/busco/test-bacteria_odb10-busco/genome.fasta/logs/hmmsearch_err.log
+ - path: output/busco/test-bacteria_odb10-busco/genome.fasta/logs/hmmsearch_out.log
+ contains:
+ - "# hmmsearch :: search profile(s) against a sequence database"
+ - "# target sequence database:"
+ - "Internal pipeline statistics summary:"
+ - "[ok]"
+ - path: output/busco/test-bacteria_odb10-busco/genome.fasta/logs/prodigal_err.log
+ md5sum: c1fdc6977332f53dfe7f632733bb4585
+ - path: output/busco/test-bacteria_odb10-busco/genome.fasta/logs/prodigal_out.log
+ md5sum: 50752acb1c5a20be886bfdfc06635bcb
+ - path: output/busco/test-bacteria_odb10-busco/genome.fasta/prodigal_output/.checkpoint
+ contains:
+ - "Tool: prodigal"
+ - "Completed"
+ - "jobs"
+ - path: output/busco/test-bacteria_odb10-busco/genome.fasta/prodigal_output/predicted_genes/predicted.faa
+ md5sum: 8166471fc5f08c82fd5643ab42327f9d
+ - path: output/busco/test-bacteria_odb10-busco/genome.fasta/prodigal_output/predicted_genes/predicted.fna
+ md5sum: ddc508a18f60e7f3314534df50cdf8ca
+ - path: output/busco/test-bacteria_odb10-busco/genome.fasta/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_11.faa
+ md5sum: 8166471fc5f08c82fd5643ab42327f9d
+ - path: output/busco/test-bacteria_odb10-busco/genome.fasta/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_11.fna
+ md5sum: ddc508a18f60e7f3314534df50cdf8ca
+ - path: output/busco/test-bacteria_odb10-busco/genome.fasta/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_11_err.log
+ md5sum: c1fdc6977332f53dfe7f632733bb4585
+ - path: output/busco/test-bacteria_odb10-busco/genome.fasta/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_11_out.log
+ md5sum: 50752acb1c5a20be886bfdfc06635bcb
+ - path: output/busco/test-bacteria_odb10-busco/genome.fasta/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_4.faa
+ md5sum: e56fd59c38248dc21ac94355dca98121
+ - path: output/busco/test-bacteria_odb10-busco/genome.fasta/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_4.fna
+ md5sum: b365f84bf99c68357952e0b98ed7ce42
+ - path: output/busco/test-bacteria_odb10-busco/genome.fasta/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_4_err.log
+ md5sum: e5f14d7925ba14a0f9850542f3739894
+ - path: output/busco/test-bacteria_odb10-busco/genome.fasta/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_4_out.log
+ md5sum: d41971bfc1b621d4ffd2633bc47017ea
+ - path: output/busco/test-bacteria_odb10-busco/genome.fasta/run_bacteria_odb10/.bbtools_output/.checkpoint
+ contains:
+ - "Tool: bbtools"
+ - "Completed"
+ - "jobs"
+ - path: output/busco/test-bacteria_odb10-busco/genome.fasta/run_bacteria_odb10/busco_sequences/fragmented_busco_sequences.tar.gz
+ - path: output/busco/test-bacteria_odb10-busco/genome.fasta/run_bacteria_odb10/busco_sequences/multi_copy_busco_sequences.tar.gz
+ - path: output/busco/test-bacteria_odb10-busco/genome.fasta/run_bacteria_odb10/busco_sequences/single_copy_busco_sequences.tar.gz
+ - path: output/busco/test-bacteria_odb10-busco/genome.fasta/run_bacteria_odb10/full_table.tsv
+ md5sum: c9651b88b10871abc260ee655898e828
+ - path: output/busco/test-bacteria_odb10-busco/genome.fasta/run_bacteria_odb10/hmmer_output.tar.gz
+ - path: output/busco/test-bacteria_odb10-busco/genome.fasta/run_bacteria_odb10/missing_busco_list.tsv
+ md5sum: 9939309df2da5419de88c32d1435c779
+ - path: output/busco/test-bacteria_odb10-busco/genome.fasta/run_bacteria_odb10/short_summary.json
+ contains:
+ - "one_line_summary"
+ - "mode"
+ - "dataset"
+ - path: output/busco/test-bacteria_odb10-busco/genome.fasta/run_bacteria_odb10/short_summary.txt
+ contains:
+ - "# BUSCO version is:"
+ - "Results:"
+ - "busco:"
+ - path: output/busco/test-bacteria_odb10-busco/genome.fna/logs/bbtools_err.log
+ - path: output/busco/test-bacteria_odb10-busco/genome.fna/logs/bbtools_out.log
+ md5sum: 9caf1a1434414c78562eb0bbb9c0e53f
+ - path: output/busco/test-bacteria_odb10-busco/genome.fna/logs/hmmsearch_err.log
+ - path: output/busco/test-bacteria_odb10-busco/genome.fna/logs/hmmsearch_out.log
+ contains:
+ - "# hmmsearch :: search profile(s) against a sequence database"
+ - "# target sequence database:"
+ - "Internal pipeline statistics summary:"
+ - "[ok]"
+ - path: output/busco/test-bacteria_odb10-busco/genome.fna/logs/prodigal_err.log
+ md5sum: 538510cfc7483498210f01e53fe035ad
+ - path: output/busco/test-bacteria_odb10-busco/genome.fna/logs/prodigal_out.log
+ md5sum: 61050b0706addc9498b2088a2d6efa9a
+ - path: output/busco/test-bacteria_odb10-busco/genome.fna/prodigal_output/.checkpoint
+ contains:
+ - "Tool: prodigal"
+ - "Completed"
+ - "jobs"
+ - path: output/busco/test-bacteria_odb10-busco/genome.fna/prodigal_output/predicted_genes/predicted.faa
+ md5sum: 836e9a80d33d8b89168f07ddc13ee991
+ - path: output/busco/test-bacteria_odb10-busco/genome.fna/prodigal_output/predicted_genes/predicted.fna
+ md5sum: 20eeb75f86842e6e136f02bca8b73a9f
+ - path: output/busco/test-bacteria_odb10-busco/genome.fna/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_11.faa
+ md5sum: 836e9a80d33d8b89168f07ddc13ee991
+ - path: output/busco/test-bacteria_odb10-busco/genome.fna/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_11.fna
+ md5sum: 20eeb75f86842e6e136f02bca8b73a9f
+ - path: output/busco/test-bacteria_odb10-busco/genome.fna/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_11_err.log
+ md5sum: 538510cfc7483498210f01e53fe035ad
+ - path: output/busco/test-bacteria_odb10-busco/genome.fna/prodigal_output/predicted_genes/tmp/prodigal_mode_single_code_11_out.log
+ md5sum: 61050b0706addc9498b2088a2d6efa9a
+ - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/.bbtools_output/.checkpoint
+ contains:
+ - "Tool: bbtools"
+ - "Completed"
+ - "jobs"
+ - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/busco_sequences/fragmented_busco_sequences.tar.gz
+ - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/busco_sequences/multi_copy_busco_sequences.tar.gz
+ - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/busco_sequences/single_copy_busco_sequences.tar.gz
+ - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/full_table.tsv
+ md5sum: c56edab1dc1522e993c25ae2b730799f
+ - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/hmmer_output.tar.gz
+ - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/missing_busco_list.tsv
+ md5sum: b533ef30270f27160acce85a22d01bf5
+ - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/short_summary.json
+ contains:
+ - "one_line_summary"
+ - "mode"
+ - "dataset"
+ - path: output/busco/test-bacteria_odb10-busco/genome.fna/run_bacteria_odb10/short_summary.txt
+ contains:
+ - "# BUSCO version is:"
+ - "Results:"
+ - "busco:"
+ - path: output/busco/test-bacteria_odb10-busco/logs/busco.log
+ contains:
+ - "DEBUG:busco.run_BUSCO"
+ - "Results from dataset"
+ - "how to cite BUSCO"
+ - path: output/busco/versions.yml
+
+- name: busco test_busco_eukaryote_metaeuk
+ command: nextflow run ./tests/modules/nf-core/busco -entry test_busco_eukaryote_metaeuk -c ./tests/config/nextflow.config
+ tags:
+ - busco
+ files:
+ - path: output/busco/short_summary.specific.eukaryota_odb10.genome.fasta.json
+ contains:
+ - "one_line_summary"
+ - "mode"
+ - "dataset"
+ - path: output/busco/short_summary.specific.eukaryota_odb10.genome.fasta.txt
+ contains:
+ - "BUSCO version"
+ - "The lineage dataset is"
+ - "BUSCO was run in mode"
+ - "Complete BUSCOs"
+ - "Missing BUSCOs"
+ - "Dependencies and versions"
+ - path: output/busco/test-eukaryota_odb10-busco.batch_summary.txt
+ md5sum: ff6d8277e452a83ce9456bbee666feb6
+ - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/bbtools_err.log
+ - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/bbtools_out.log
+ md5sum: e63debaa653f18f7405d936050abc093
+ - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/hmmsearch_err.log
+ - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/hmmsearch_out.log
+ - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/metaeuk_run1_err.log
+ - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/metaeuk_run1_out.log
+ - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/metaeuk_run2_err.log
+ - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/metaeuk_run2_out.log
+ - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/.bbtools_output/.checkpoint
+ contains:
+ - "Tool: bbtools"
+ - "Completed"
+ - "jobs"
+ - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/busco_sequences/fragmented_busco_sequences.tar.gz
+ - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/busco_sequences/multi_copy_busco_sequences.tar.gz
+ - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/busco_sequences/single_copy_busco_sequences.tar.gz
+ - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/full_table.tsv
+ md5sum: bd880e90b9e5620a58943a3e0f9ff16b
+ - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/hmmer_output.tar.gz
+ - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/.checkpoint
+ contains:
+ - "Tool: metaeuk"
+ - "Completed"
+ - "jobs"
+ - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/combined_pred_proteins.fas
+ - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/initial_results/genome.fasta.codon.fas
+ - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/initial_results/genome.fasta.fas
+ - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/initial_results/genome.fasta.gff
+ - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/initial_results/genome.fasta.headersMap.tsv
+ - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/refseq_db_rerun.faa
+ md5sum: d80b8fa4cb5ed0d47d63d6aa93635bc2
+ - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/rerun_results/genome.fasta.codon.fas
+ - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/rerun_results/genome.fasta.fas
+ - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/rerun_results/genome.fasta.gff
+ - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/rerun_results/genome.fasta.headersMap.tsv
+ - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/missing_busco_list.tsv
+ md5sum: 1e8e79c540fd2e69ba0d2659d9eb2988
+ - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/short_summary.json
+ contains:
+ - "one_line_summary"
+ - "mode"
+ - "dataset"
+ - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/short_summary.txt
+ contains:
+ - "# BUSCO version is:"
+ - "Results:"
+ - "busco:"
+ - path: output/busco/test-eukaryota_odb10-busco/logs/busco.log
+ contains:
+ - "DEBUG:busco.run_BUSCO"
+ - "Results from dataset"
+ - "how to cite BUSCO"
+ - path: output/busco/versions.yml
+
+- name: busco test_busco_eukaryote_augustus
+ command: nextflow run ./tests/modules/nf-core/busco -entry test_busco_eukaryote_augustus -c ./tests/config/nextflow.config
+ tags:
+ - busco
+ files:
+ - path: output/busco/short_summary.specific.eukaryota_odb10.genome.fasta.json
+ contains:
+ - "one_line_summary"
+ - "mode"
+ - "dataset"
+ - path: output/busco/short_summary.specific.eukaryota_odb10.genome.fasta.txt
+ contains:
+ - "BUSCO version"
+ - "The lineage dataset is"
+ - "BUSCO was run in mode"
+ - "Complete BUSCOs"
+ - "Missing BUSCOs"
+ - "Dependencies and versions"
+ - path: output/busco/test-eukaryota_odb10-busco.batch_summary.txt
+ md5sum: ff6d8277e452a83ce9456bbee666feb6
+ - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/bbtools_err.log
+ - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/bbtools_out.log
+ md5sum: e63debaa653f18f7405d936050abc093
+ - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/hmmsearch_err.log
+ - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/hmmsearch_out.log
+ - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/metaeuk_run1_err.log
+ - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/metaeuk_run1_out.log
+ contains:
+ - "metaeuk"
+ - "easy-predict"
+ - "Compute score and coverage"
+ - "Time for processing:"
+ - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/metaeuk_run2_err.log
+ - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/logs/metaeuk_run2_out.log
+ contains:
+ - "metaeuk"
+ - "easy-predict"
+ - "Compute score and coverage"
+ - "Time for processing:"
+ - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/.bbtools_output/.checkpoint
+ contains:
+ - "Tool: bbtools"
+ - "Completed"
+ - "jobs"
+ - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/busco_sequences/fragmented_busco_sequences.tar.gz
+ - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/busco_sequences/multi_copy_busco_sequences.tar.gz
+ - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/busco_sequences/single_copy_busco_sequences.tar.gz
+ - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/full_table.tsv
+ md5sum: bd880e90b9e5620a58943a3e0f9ff16b
+ - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/hmmer_output.tar.gz
+ - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/.checkpoint
+ contains:
+ - "Tool: metaeuk"
+ - "Completed"
+ - "jobs"
+ - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/combined_pred_proteins.fas
+ - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/initial_results/genome.fasta.codon.fas
+ - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/initial_results/genome.fasta.fas
+ - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/initial_results/genome.fasta.gff
+ - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/initial_results/genome.fasta.headersMap.tsv
+ - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/refseq_db_rerun.faa
+ md5sum: d80b8fa4cb5ed0d47d63d6aa93635bc2
+ - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/rerun_results/genome.fasta.codon.fas
+ - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/rerun_results/genome.fasta.fas
+ - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/rerun_results/genome.fasta.gff
+ - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/metaeuk_output/rerun_results/genome.fasta.headersMap.tsv
+ - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/missing_busco_list.tsv
+ md5sum: 1e8e79c540fd2e69ba0d2659d9eb2988
+ - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/short_summary.json
+ contains:
+ - "one_line_summary"
+ - "mode"
+ - "dataset"
+ - path: output/busco/test-eukaryota_odb10-busco/genome.fasta/run_eukaryota_odb10/short_summary.txt
+ contains:
+ - "# BUSCO version is:"
+ - "Results:"
+ - "busco:"
+ - path: output/busco/test-eukaryota_odb10-busco/logs/busco.log
+ contains:
+ - "DEBUG:busco.run_BUSCO"
+ - "Results from dataset"
+ - "how to cite BUSCO"
+ - path: output/busco/versions.yml
+
+- name: busco test_busco_protein
+ command: nextflow run ./tests/modules/nf-core/busco -entry test_busco_protein -c ./tests/config/nextflow.config
+ tags:
+ - busco
+ files:
+ - path: output/busco/short_summary.specific.bacteria_odb10.proteome.fasta.json
+ contains:
+ - "one_line_summary"
+ - "mode"
+ - "dataset"
+ - path: output/busco/short_summary.specific.bacteria_odb10.proteome.fasta.txt
+ contains:
+ - "BUSCO version"
+ - "The lineage dataset is"
+ - "BUSCO was run in mode"
+ - "Complete BUSCOs"
+ - "Missing BUSCOs"
+ - "Dependencies and versions"
+ - path: output/busco/test-bacteria_odb10-busco.batch_summary.txt
+ md5sum: 7a65e6cbb6c56a2ea4e739ae0aa3297d
+ - path: output/busco/test-bacteria_odb10-busco/logs/busco.log
+ contains:
+ - "DEBUG:busco.run_BUSCO"
+ - "Results from dataset"
+ - "how to cite BUSCO"
+ - path: output/busco/test-bacteria_odb10-busco/proteome.fasta/logs/hmmsearch_err.log
+ - path: output/busco/test-bacteria_odb10-busco/proteome.fasta/logs/hmmsearch_out.log
+ contains:
+ - "# hmmsearch :: search profile(s) against a sequence database"
+ - "# target sequence database:"
+ - "Internal pipeline statistics summary:"
+ - "[ok]"
+ - path: output/busco/test-bacteria_odb10-busco/proteome.fasta/run_bacteria_odb10/busco_sequences/fragmented_busco_sequences.tar.gz
+ - path: output/busco/test-bacteria_odb10-busco/proteome.fasta/run_bacteria_odb10/busco_sequences/multi_copy_busco_sequences.tar.gz
+ - path: output/busco/test-bacteria_odb10-busco/proteome.fasta/run_bacteria_odb10/busco_sequences/single_copy_busco_sequences.tar.gz
+ - path: output/busco/test-bacteria_odb10-busco/proteome.fasta/run_bacteria_odb10/full_table.tsv
+ md5sum: 0e34f1011cd83ea1d5d5103ec62b8922
+ - path: output/busco/test-bacteria_odb10-busco/proteome.fasta/run_bacteria_odb10/hmmer_output.tar.gz
+ - path: output/busco/test-bacteria_odb10-busco/proteome.fasta/run_bacteria_odb10/missing_busco_list.tsv
+ md5sum: 9939309df2da5419de88c32d1435c779
+ - path: output/busco/test-bacteria_odb10-busco/proteome.fasta/run_bacteria_odb10/short_summary.json
+ contains:
+ - "one_line_summary"
+ - "mode"
+ - "dataset"
+ - path: output/busco/test-bacteria_odb10-busco/proteome.fasta/run_bacteria_odb10/short_summary.txt
+ contains:
+ - "# BUSCO version is:"
+ - "Results:"
+ - "busco:"
+ - path: output/busco/versions.yml
+
+- name: busco test_busco_transcriptome
+ command: nextflow run ./tests/modules/nf-core/busco -entry test_busco_transcriptome -c ./tests/config/nextflow.config
+ tags:
+ - busco
+ files:
+ - path: output/busco/short_summary.specific.bacteria_odb10.test1.contigs.fa.json
+ contains:
+ - "one_line_summary"
+ - "mode"
+ - "dataset"
+ - path: output/busco/short_summary.specific.bacteria_odb10.test1.contigs.fa.txt
+ contains:
+ - "BUSCO version"
+ - "The lineage dataset is"
+ - "BUSCO was run in mode"
+ - "Complete BUSCOs"
+ - "Missing BUSCOs"
+ - "Dependencies and versions"
+ - path: output/busco/test-bacteria_odb10-busco.batch_summary.txt
+ md5sum: 46118ecf60d1b87d22b96d80f4f03632
+ - path: output/busco/test-bacteria_odb10-busco/logs/busco.log
+ contains:
+ - "DEBUG:busco.run_BUSCO"
+ - "Results from dataset"
+ - "how to cite BUSCO"
+ - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/blast_db/.checkpoint
+ contains:
+ - "Tool: makeblastdb"
+ - "Completed"
+ - "jobs"
+ - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/blast_db/test1.contigs.fa.ndb
+ md5sum: 3788c017fe5e6f0f58224e9cdd21822b
+ - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/blast_db/test1.contigs.fa.nhr
+ md5sum: 8ecd2ce392bb5e25ddbe1d85f879582e
+ - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/blast_db/test1.contigs.fa.nin
+ - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/blast_db/test1.contigs.fa.njs
+ - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/blast_db/test1.contigs.fa.not
+ md5sum: 0c340e376c7e85d19f82ec1a833e6a6e
+ - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/blast_db/test1.contigs.fa.nsq
+ md5sum: 532d5c0a7ea00fe95ca3c97cb3be6198
+ - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/blast_db/test1.contigs.fa.ntf
+ md5sum: de1250813f0c7affc6d12dac9d0fb6bb
+ - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/blast_db/test1.contigs.fa.nto
+ md5sum: ff74bd41f9cc9b011c63a32c4f7693bf
+ - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/logs/hmmsearch_err.log
+ - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/logs/hmmsearch_out.log
+ contains:
+ - "# hmmsearch :: search profile(s) against a sequence database"
+ - "# target sequence database:"
+ - "Internal pipeline statistics summary:"
+ - "[ok]"
+ - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/logs/makeblastdb_err.log
+ - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/logs/makeblastdb_out.log
+ contains:
+ - "Building a new DB"
+ - "Adding sequences from FASTA"
+ - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/logs/tblastn_err.log
+ - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/logs/tblastn_out.log
+ - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/.checkpoint
+ contains:
+ - "Tool: tblastn"
+ - "Completed"
+ - "jobs"
+ - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/coordinates.tsv
+ md5sum: cc30eed321944af293452bdbcfc24292
+ - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_101.temp
+ md5sum: 73e9c65fc83fedc58f57f09b08f08238
+ - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_119.temp
+ md5sum: 7fa4cc7955ec0cc36330a221c579b975
+ - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_129.temp
+ md5sum: 6f1601c875d019e3f6f1f98ed8e988d4
+ - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_138.temp
+ md5sum: 3f8e034686cd240c2330650d791bcae2
+ - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_143.temp
+ md5sum: df3dfa8e9ba30ed70cf75b5e7abf2179
+ - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_172.temp
+ md5sum: 7d463e0e6cf7169bc9077d8dc776dda1
+ - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_178.temp
+ md5sum: 2288edf7fa4f88f51b4cf4d94086f77e
+ - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_188.temp
+ md5sum: 029906abbad6d87fc57830dd548cac24
+ - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_195.temp
+ md5sum: 4937f3b348774a31b1160a00297c29cc
+ - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_210.temp
+ md5sum: afcb20ba4c466479d6b91c8c62251e1f
+ - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_232.temp
+ md5sum: 2e1e823ce017345bd998191a39fa9924
+ - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_268.temp
+ md5sum: 08c2d82c34ecffbe1c638b410349412e
+ - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_29.temp
+ md5sum: cd9b63cf93524284781535c888313764
+ - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_44.temp
+ md5sum: d1929b742b24ebe379bf4801ca882dca
+ - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_58.temp
+ md5sum: 69215765b010c05336538cb322c900b3
+ - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_72.temp
+ md5sum: 6feaa1cc3b0899a147ea9d466878f3e3
+ - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_80.temp
+ md5sum: 13625eae14e860a96ce17cd4e37e9d01
+ - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_81.temp
+ md5sum: e14b2484649b0dbc8926815c207b806d
+ - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_93.temp
+ md5sum: 6902c93691df00e690faea914c71839e
+ - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/sequences/k141_97.temp
+ md5sum: 0a0d9d38a83acbd5ad43c29cdf429988
+ - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/blast_output/tblastn.tsv
+ contains:
+ - "TBLASTN"
+ - "BLAST processed"
+ - "queries"
+ - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/busco_sequences/fragmented_busco_sequences.tar.gz
+ - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/busco_sequences/multi_copy_busco_sequences.tar.gz
+ - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/busco_sequences/single_copy_busco_sequences.tar.gz
+ - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/full_table.tsv
+ md5sum: 24df25199e13c88bd892fc3e7b541ca0
+ - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/hmmer_output.tar.gz
+ - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/missing_busco_list.tsv
+ md5sum: e7232e2b8cca4fdfdd9e363b39ebbc81
+ - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/short_summary.json
+ contains:
+ - "one_line_summary"
+ - "mode"
+ - "dataset"
+ - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/short_summary.txt
+ contains:
+ - "# BUSCO version is:"
+ - "Results:"
+ - "busco:"
+ - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/run_bacteria_odb10/single_copy_proteins.faa
+ md5sum: e04b9465733577ae6e4bccb7aa01e720
+ - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/1024388at2.faa
+ md5sum: 7333c39a20258f20c7019ea0cd83157c
+ - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/1054741at2.faa
+ md5sum: ebb481e77a824685fbe04d8a2f3a0d7d
+ - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/1093223at2.faa
+ md5sum: 34621c7d499034e8f8e6b92fd4020a93
+ - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/1151822at2.faa
+ md5sum: aa89ca381c1c70c9c4e1380351ca7c2a
+ - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/143460at2.faa
+ md5sum: f2e91d78b8dd3722840378789f29e8c8
+ - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/1491686at2.faa
+ md5sum: 73c25aef5c9cba7f4151804941b146ea
+ - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/1504821at2.faa
+ md5sum: cda556018d1f84ebe517e89f6fc107d0
+ - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/1574817at2.faa
+ md5sum: a9096c9fb8b25c78a72871ab0463acdc
+ - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/1592033at2.faa
+ md5sum: e463d25ce186c0cebfd749474f3a4c64
+ - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/1623045at2.faa
+ md5sum: f2cfd241590c6d8377286d6135480937
+ - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/1661836at2.faa
+ md5sum: 586569546fb9861502468e3d9ba2775c
+ - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/1674344at2.faa
+ md5sum: 24c658bee14ad84b062d81ad96642eb8
+ - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/1698718at2.faa
+ md5sum: 0b8e26ddf5149bbd8805be7af125208d
+ - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/1990650at2.faa
+ md5sum: 159320712ee01fb2ccb31a25df44eead
+ - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/223233at2.faa
+ md5sum: 812629c0b06ac3d18661c2ca78de0c08
+ - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/402899at2.faa
+ md5sum: f7ff4e1591342d30b77392a2e84b57d9
+ - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/505485at2.faa
+ md5sum: 7b34a24fc49c540d46fcf96ff5129564
+ - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/665824at2.faa
+ md5sum: 4cff2df64f6bcaff8bc19c234c8bcccd
+ - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/776861at2.faa
+ md5sum: 613af7a3fea30ea2bece66f603b9284a
+ - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/874197at2.faa
+ md5sum: a7cd1b13c9ef91c7ef4e31614166f197
+ - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/932854at2.faa
+ md5sum: fe313ffd5efdb0fed887a04fba352552
+ - path: output/busco/test-bacteria_odb10-busco/test1.contigs.fa/translated_proteins/95696at2.faa
+ md5sum: 4e1f30a2fea4dfbf9bb7fae2700622a0
+ - path: output/busco/versions.yml
diff --git a/modules/pfr/busco/busco/tests/tags.yml b/modules/pfr/busco/busco/tests/tags.yml
new file mode 100644
index 00000000..7c4d2835
--- /dev/null
+++ b/modules/pfr/busco/busco/tests/tags.yml
@@ -0,0 +1,2 @@
+busco/busco:
+ - "modules/nf-core/busco/busco/**"
diff --git a/modules/pfr/busco/generateplot/environment.yml b/modules/pfr/busco/generateplot/environment.yml
new file mode 100644
index 00000000..1ca5babb
--- /dev/null
+++ b/modules/pfr/busco/generateplot/environment.yml
@@ -0,0 +1,9 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
+name: "busco_generateplot"
+channels:
+ - conda-forge
+ - bioconda
+ - defaults
+dependencies:
+ - bioconda::busco=5.7.1
diff --git a/modules/local/busco_plot.nf b/modules/pfr/busco/generateplot/main.nf
similarity index 54%
rename from modules/local/busco_plot.nf
rename to modules/pfr/busco/generateplot/main.nf
index e8f70189..6a4b3393 100644
--- a/modules/local/busco_plot.nf
+++ b/modules/pfr/busco/generateplot/main.nf
@@ -1,26 +1,30 @@
-process BUSCO_PLOT {
- tag 'all summaries'
+process BUSCO_GENERATEPLOT {
label 'process_single'
- conda "bioconda::busco=5.6.1"
+ conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
- 'https://depot.galaxyproject.org/singularity/busco:5.6.1--pyhdfd78af_0':
- 'biocontainers/busco:5.6.1--pyhdfd78af_0' }"
+ 'https://depot.galaxyproject.org/singularity/busco:5.7.1--pyhdfd78af_0':
+ 'biocontainers/busco:5.7.1--pyhdfd78af_0' }"
input:
- path "short_summary.*", stageAs: 'busco/*'
+ path short_summary_txt, stageAs: 'busco/*'
output:
- path 'busco/*.png' , emit: png
+ path '*.png' , emit: png
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
+ def args = task.ext.args ?: ''
+ def prefix = task.ext.prefix ?: 'busco_figure'
"""
generate_plot.py \\
- -wd ./busco
+ $args \\
+ -wd busco
+
+ mv ./busco/busco_figure.png ${prefix}.png
cat <<-END_VERSIONS > versions.yml
"${task.process}":
@@ -29,10 +33,9 @@ process BUSCO_PLOT {
"""
stub:
+ def prefix = task.ext.prefix ?: 'busco_figure'
"""
- mkdir -p busco
-
- touch busco/summary_plot.png
+ touch ${prefix}.png
cat <<-END_VERSIONS > versions.yml
"${task.process}":
diff --git a/modules/pfr/busco/generateplot/meta.yml b/modules/pfr/busco/generateplot/meta.yml
new file mode 100644
index 00000000..796f32b4
--- /dev/null
+++ b/modules/pfr/busco/generateplot/meta.yml
@@ -0,0 +1,35 @@
+name: "busco_generateplot"
+description: BUSCO plot generation tool
+keywords:
+ - genome
+ - fasta
+ - annotation
+ - busco
+ - transcriptome
+ - quality control
+tools:
+ - busco:
+ description: BUSCO provides measures for quantitative assessment of genome assembly, gene set, and transcriptome completeness based on evolutionarily informed expectations of gene content from near-universal single-copy orthologs selected from OrthoDB.
+ homepage: https://busco.ezlab.org/
+ documentation: https://busco.ezlab.org/busco_userguide.html
+ tool_dev_url: https://gitlab.com/ezlab/busco
+ doi: "10.1007/978-1-4939-9173-0_14"
+ licence: ["MIT"]
+input:
+ - short_summary_txt:
+ type: file
+ description: One or more short summary txt files from BUSCO
+ pattern: "short_summary.*.txt"
+output:
+ - png:
+ type: file
+ description: A summary plot in png format
+ pattern: "*.png"
+ - versions:
+ type: file
+ description: File containing software versions
+ pattern: "versions.yml"
+authors:
+ - "@GallVp"
+maintainers:
+ - "@GallVp"
diff --git a/modules/pfr/busco/generateplot/tests/main.nf.test b/modules/pfr/busco/generateplot/tests/main.nf.test
new file mode 100644
index 00000000..853dc5ed
--- /dev/null
+++ b/modules/pfr/busco/generateplot/tests/main.nf.test
@@ -0,0 +1,72 @@
+nextflow_process {
+
+ name "Test Process BUSCO_GENERATEPLOT"
+ script "../main.nf"
+ process "BUSCO_GENERATEPLOT"
+
+ tag "modules"
+ tag "modules_nfcore"
+ tag "busco"
+ tag "busco/busco"
+ tag "busco/generateplot"
+
+ test("bacteroides_fragilis-genome_fna_gz") {
+
+ setup {
+ run("BUSCO_BUSCO") {
+ script "../../busco"
+ process {
+ """
+ input[0] = [
+ [ id:'test' ], // meta map
+ file(params.test_data['bacteroides_fragilis']['genome']['genome_fna_gz'], checkIfExists: true)
+ ]
+ input[1] = 'genome'
+ input[2] = 'bacteria_odb10'
+ input[3] = []
+ input[4] = []
+ """
+ }
+ }
+ }
+
+ when {
+ process {
+ """
+ input[0] = BUSCO_BUSCO.out.short_summaries_txt.map { meta, summary -> summary }
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out.versions).match("versions") },
+ { assert process.out.png != null } // PNGs with same data but different meta-data. Not sure how to get around this, yet!
+ )
+ }
+
+ }
+
+ test("stub") {
+
+ options "-stub"
+
+ when {
+ process {
+ """
+ input[0] = file(params.test_data['bacteroides_fragilis']['genome']['genome_fna_gz'], checkIfExists: true)
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+
+ }
+
+}
\ No newline at end of file
diff --git a/modules/pfr/busco/generateplot/tests/main.nf.test.snap b/modules/pfr/busco/generateplot/tests/main.nf.test.snap
new file mode 100644
index 00000000..d9773ec6
--- /dev/null
+++ b/modules/pfr/busco/generateplot/tests/main.nf.test.snap
@@ -0,0 +1,37 @@
+{
+ "versions": {
+ "content": [
+ [
+ "versions.yml:md5,726fa3440ea3a0b2e9d032d7e4d25e74"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-05-03T15:40:01.523993"
+ },
+ "stub": {
+ "content": [
+ {
+ "0": [
+ "busco_figure.png:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ],
+ "1": [
+ "versions.yml:md5,726fa3440ea3a0b2e9d032d7e4d25e74"
+ ],
+ "png": [
+ "busco_figure.png:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ],
+ "versions": [
+ "versions.yml:md5,726fa3440ea3a0b2e9d032d7e4d25e74"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-05-03T15:40:11.864276"
+ }
+}
\ No newline at end of file
diff --git a/modules/pfr/busco/generateplot/tests/tags.yml b/modules/pfr/busco/generateplot/tests/tags.yml
new file mode 100644
index 00000000..b6548a64
--- /dev/null
+++ b/modules/pfr/busco/generateplot/tests/tags.yml
@@ -0,0 +1,2 @@
+busco/generateplot:
+ - "modules/nf-core/busco/generateplot/**"
diff --git a/modules/pfr/bwa/index/environment.yml b/modules/pfr/bwa/index/environment.yml
index 5d3cb323..126e0034 100644
--- a/modules/pfr/bwa/index/environment.yml
+++ b/modules/pfr/bwa/index/environment.yml
@@ -4,4 +4,4 @@ channels:
- bioconda
- defaults
dependencies:
- - bioconda::bwa=0.7.17
+ - bioconda::bwa=0.7.18
diff --git a/modules/pfr/bwa/index/main.nf b/modules/pfr/bwa/index/main.nf
index 24b5a2ea..2e48b6ca 100644
--- a/modules/pfr/bwa/index/main.nf
+++ b/modules/pfr/bwa/index/main.nf
@@ -4,8 +4,8 @@ process BWA_INDEX {
conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
- 'https://depot.galaxyproject.org/singularity/bwa:0.7.17--hed695b0_7' :
- 'biocontainers/bwa:0.7.17--hed695b0_7' }"
+ 'https://depot.galaxyproject.org/singularity/bwa:0.7.18--he4a0461_0' :
+ 'biocontainers/bwa:0.7.18--he4a0461_0' }"
input:
tuple val(meta), path(fasta)
diff --git a/modules/pfr/bwa/index/meta.yml b/modules/pfr/bwa/index/meta.yml
index 730628d0..4c7d30f3 100644
--- a/modules/pfr/bwa/index/meta.yml
+++ b/modules/pfr/bwa/index/meta.yml
@@ -43,3 +43,4 @@ authors:
maintainers:
- "@drpatelh"
- "@maxulysse"
+ - "@gallvp"
diff --git a/modules/pfr/bwa/index/tests/main.nf.test.snap b/modules/pfr/bwa/index/tests/main.nf.test.snap
index e51ad5bf..7c8f0465 100644
--- a/modules/pfr/bwa/index/tests/main.nf.test.snap
+++ b/modules/pfr/bwa/index/tests/main.nf.test.snap
@@ -17,7 +17,7 @@
]
],
"1": [
- "versions.yml:md5,0f20525da90e7489a7ebb02adca3265f"
+ "versions.yml:md5,a64462ac7dfb21f4ade9b02e7f65c5bb"
],
"index": [
[
@@ -34,10 +34,14 @@
]
],
"versions": [
- "versions.yml:md5,0f20525da90e7489a7ebb02adca3265f"
+ "versions.yml:md5,a64462ac7dfb21f4ade9b02e7f65c5bb"
]
}
],
- "timestamp": "2023-10-17T17:20:20.180927714"
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-05-16T11:40:09.925307"
}
}
\ No newline at end of file
diff --git a/modules/pfr/bwa/mem/environment.yml b/modules/pfr/bwa/mem/environment.yml
index 3f136d0a..3aa9f0cc 100644
--- a/modules/pfr/bwa/mem/environment.yml
+++ b/modules/pfr/bwa/mem/environment.yml
@@ -4,7 +4,7 @@ channels:
- bioconda
- defaults
dependencies:
- - bwa=0.7.17
+ - bwa=0.7.18
# renovate: datasource=conda depName=bioconda/samtools
- - samtools=1.19.2
- - htslib=1.19.1
+ - samtools=1.20
+ - htslib=1.20.0
diff --git a/modules/pfr/bwa/mem/main.nf b/modules/pfr/bwa/mem/main.nf
index 54ec0f16..9c815f0c 100644
--- a/modules/pfr/bwa/mem/main.nf
+++ b/modules/pfr/bwa/mem/main.nf
@@ -4,17 +4,21 @@ process BWA_MEM {
conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
- 'https://depot.galaxyproject.org/singularity/mulled-v2-fe8faa35dbf6dc65a0f7f5d4ea12e31a79f73e40:a34558545ae1413d94bde4578787ebef08027945-0' :
- 'biocontainers/mulled-v2-fe8faa35dbf6dc65a0f7f5d4ea12e31a79f73e40:a34558545ae1413d94bde4578787ebef08027945-0' }"
+ 'https://depot.galaxyproject.org/singularity/mulled-v2-fe8faa35dbf6dc65a0f7f5d4ea12e31a79f73e40:1bd8542a8a0b42e0981337910954371d0230828e-0' :
+ 'biocontainers/mulled-v2-fe8faa35dbf6dc65a0f7f5d4ea12e31a79f73e40:1bd8542a8a0b42e0981337910954371d0230828e-0' }"
input:
- tuple val(meta), path(reads)
+ tuple val(meta) , path(reads)
tuple val(meta2), path(index)
+ tuple val(meta3), path(fasta)
val sort_bam
output:
- tuple val(meta), path("*.bam"), emit: bam
- path "versions.yml" , emit: versions
+ tuple val(meta), path("*.bam") , emit: bam, optional: true
+ tuple val(meta), path("*.cram") , emit: cram, optional: true
+ tuple val(meta), path("*.csi") , emit: csi, optional: true
+ tuple val(meta), path("*.crai") , emit: crai, optional: true
+ path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
@@ -24,6 +28,13 @@ process BWA_MEM {
def args2 = task.ext.args2 ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def samtools_command = sort_bam ? 'sort' : 'view'
+ def extension = args2.contains("--output-fmt sam") ? "sam" :
+ args2.contains("--output-fmt cram") ? "cram":
+ sort_bam && args2.contains("-O cram")? "cram":
+ !sort_bam && args2.contains("-C") ? "cram":
+ "bam"
+ def reference = fasta && extension=="cram" ? "--reference ${fasta}" : ""
+ if (!fasta && extension=="cram") error "Fasta reference is required for CRAM output"
"""
INDEX=`find -L ./ -name "*.amb" | sed 's/\\.amb\$//'`
@@ -32,7 +43,7 @@ process BWA_MEM {
-t $task.cpus \\
\$INDEX \\
$reads \\
- | samtools $samtools_command $args2 --threads $task.cpus -o ${prefix}.bam -
+ | samtools $samtools_command $args2 ${reference} --threads $task.cpus -o ${prefix}.${extension} -
cat <<-END_VERSIONS > versions.yml
"${task.process}":
@@ -42,9 +53,19 @@ process BWA_MEM {
"""
stub:
+ def args = task.ext.args ?: ''
+ def args2 = task.ext.args2 ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
+ def samtools_command = sort_bam ? 'sort' : 'view'
+ def extension = args2.contains("--output-fmt sam") ? "sam" :
+ args2.contains("--output-fmt cram") ? "cram":
+ sort_bam && args2.contains("-O cram")? "cram":
+ !sort_bam && args2.contains("-C") ? "cram":
+ "bam"
"""
- touch ${prefix}.bam
+ touch ${prefix}.${extension}
+ touch ${prefix}.csi
+ touch ${prefix}.crai
cat <<-END_VERSIONS > versions.yml
"${task.process}":
diff --git a/modules/pfr/bwa/mem/meta.yml b/modules/pfr/bwa/mem/meta.yml
index 440fb1f9..1532c261 100644
--- a/modules/pfr/bwa/mem/meta.yml
+++ b/modules/pfr/bwa/mem/meta.yml
@@ -37,6 +37,10 @@ input:
type: file
description: BWA genome index files
pattern: "Directory containing BWA index *.{amb,ann,bwt,pac,sa}"
+ - fasta:
+ type: file
+ description: Reference genome in FASTA format
+ pattern: "*.{fasta,fa}"
- sort_bam:
type: boolean
description: use samtools sort (true) or samtools view (false)
@@ -46,6 +50,18 @@ output:
type: file
description: Output BAM file containing read alignments
pattern: "*.{bam}"
+ - cram:
+ type: file
+ description: Output CRAM file containing read alignments
+ pattern: "*.{cram}"
+ - csi:
+ type: file
+ description: Optional index file for BAM file
+ pattern: "*.{csi}"
+ - crai:
+ type: file
+ description: Optional index file for CRAM file
+ pattern: "*.{crai}"
- versions:
type: file
description: File containing software versions
@@ -53,6 +69,8 @@ output:
authors:
- "@drpatelh"
- "@jeremy1805"
+ - "@matthdsm"
maintainers:
- "@drpatelh"
- "@jeremy1805"
+ - "@matthdsm"
diff --git a/modules/pfr/bwa/mem/tests/main.nf.test b/modules/pfr/bwa/mem/tests/main.nf.test
index 2696e4bf..463b76f8 100644
--- a/modules/pfr/bwa/mem/tests/main.nf.test
+++ b/modules/pfr/bwa/mem/tests/main.nf.test
@@ -35,7 +35,8 @@ nextflow_process {
]
]
input[1] = BWA_INDEX.out.index
- input[2] = false
+ input[2] = [[id: 'test'],file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]
+ input[3] = false
"""
}
}
@@ -43,7 +44,14 @@ nextflow_process {
then {
assertAll(
{ assert process.success },
- { assert snapshot(process.out).match() }
+ { assert snapshot(
+ process.out.cram,
+ process.out.csi,
+ process.out.crai,
+ process.out.versions,
+ file(process.out.bam[0][1]).name
+ ).match()
+ }
)
}
@@ -75,7 +83,8 @@ nextflow_process {
]
]
input[1] = BWA_INDEX.out.index
- input[2] = true
+ input[2] = [[id: 'test'],file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]
+ input[3] = true
"""
}
}
@@ -83,7 +92,14 @@ nextflow_process {
then {
assertAll(
{ assert process.success },
- { assert snapshot(process.out).match() }
+ { assert snapshot(
+ process.out.cram,
+ process.out.csi,
+ process.out.crai,
+ process.out.versions,
+ file(process.out.bam[0][1]).name
+ ).match()
+ }
)
}
@@ -116,7 +132,8 @@ nextflow_process {
]
]
input[1] = BWA_INDEX.out.index
- input[2] = false
+ input[2] = [[id: 'test'],file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]
+ input[3] = false
"""
}
}
@@ -124,7 +141,14 @@ nextflow_process {
then {
assertAll(
{ assert process.success },
- { assert snapshot(process.out).match() }
+ { assert snapshot(
+ process.out.cram,
+ process.out.csi,
+ process.out.crai,
+ process.out.versions,
+ file(process.out.bam[0][1]).name
+ ).match()
+ }
)
}
@@ -157,7 +181,104 @@ nextflow_process {
]
]
input[1] = BWA_INDEX.out.index
- input[2] = true
+ input[2] = [[id: 'test'],file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]
+ input[3] = true
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(
+ process.out.cram,
+ process.out.csi,
+ process.out.crai,
+ process.out.versions,
+ file(process.out.bam[0][1]).name
+ ).match()
+ }
+ )
+ }
+
+ }
+
+ test("Paired-End - no fasta") {
+
+ setup {
+ run("BWA_INDEX") {
+ script "../../index/main.nf"
+ process {
+ """
+ input[0] = [
+ [id: 'test'],
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)
+ ]
+ """
+ }
+ }
+ }
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test', single_end:false ], // meta map
+ [
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true)
+ ]
+ ]
+ input[1] = BWA_INDEX.out.index
+ input[2] = [[:],[]]
+ input[3] = false
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(
+ process.out.cram,
+ process.out.csi,
+ process.out.crai,
+ process.out.versions,
+ file(process.out.bam[0][1]).name
+ ).match()
+ }
+ )
+ }
+
+ }
+
+ test("Single-end - stub") {
+ options "-stub"
+ setup {
+ run("BWA_INDEX") {
+ script "../../index/main.nf"
+ process {
+ """
+ input[0] = [
+ [id: 'test'],
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)
+ ]
+ """
+ }
+ }
+ }
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test', single_end:true ], // meta map
+ [
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true)
+ ]
+ ]
+ input[1] = BWA_INDEX.out.index
+ input[2] = [[id: 'test'],file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]
+ input[3] = false
"""
}
}
@@ -165,9 +286,56 @@ nextflow_process {
then {
assertAll(
{ assert process.success },
- { assert snapshot(process.out).match() }
+ { assert snapshot(
+ file(process.out.bam[0][1]).name,
+ file(process.out.csi[0][1]).name,
+ process.out.versions
+ ).match() }
)
}
+ }
+
+ test("Paired-end - stub") {
+ options "-stub"
+ setup {
+ run("BWA_INDEX") {
+ script "../../index/main.nf"
+ process {
+ """
+ input[0] = [
+ [id: 'test'],
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)
+ ]
+ """
+ }
+ }
+ }
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test', single_end:false ], // meta map
+ [
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true)
+ ]
+ ]
+ input[1] = BWA_INDEX.out.index
+ input[2] = [[id: 'test'],file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)]
+ input[3] = false
+ """
+ }
+ }
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(
+ file(process.out.bam[0][1]).name,
+ file(process.out.csi[0][1]).name,
+ process.out.versions
+ ).match() }
+ )
+ }
}
}
diff --git a/modules/pfr/bwa/mem/tests/main.nf.test.snap b/modules/pfr/bwa/mem/tests/main.nf.test.snap
index e4fd8cc0..038ee7b7 100644
--- a/modules/pfr/bwa/mem/tests/main.nf.test.snap
+++ b/modules/pfr/bwa/mem/tests/main.nf.test.snap
@@ -1,142 +1,140 @@
{
"Single-End": {
"content": [
- {
- "0": [
- [
- {
- "id": "test",
- "single_end": true
- },
- "test.bam:md5,a74710a0345b4717bb4431bf9c257120"
- ]
- ],
- "1": [
- "versions.yml:md5,c32f719a68bb2966c8511d808154d42d"
- ],
- "bam": [
- [
- {
- "id": "test",
- "single_end": true
- },
- "test.bam:md5,a74710a0345b4717bb4431bf9c257120"
- ]
- ],
- "versions": [
- "versions.yml:md5,c32f719a68bb2966c8511d808154d42d"
- ]
- }
+ [
+
+ ],
+ [
+
+ ],
+ [
+
+ ],
+ [
+ "versions.yml:md5,478b816fbd37871f5e8c617833d51d80"
+ ],
+ "test.bam"
],
"meta": {
"nf-test": "0.8.4",
- "nextflow": "24.01.0"
+ "nextflow": "23.10.1"
},
- "timestamp": "2024-02-19T11:11:48.440661587"
+ "timestamp": "2024-05-20T08:44:32.953673185"
},
"Single-End Sort": {
"content": [
- {
- "0": [
- [
- {
- "id": "test",
- "single_end": true
- },
- "test.bam:md5,cb1e038bc4d990683fa485d632550b54"
- ]
- ],
- "1": [
- "versions.yml:md5,c32f719a68bb2966c8511d808154d42d"
- ],
- "bam": [
- [
- {
- "id": "test",
- "single_end": true
- },
- "test.bam:md5,cb1e038bc4d990683fa485d632550b54"
- ]
- ],
- "versions": [
- "versions.yml:md5,c32f719a68bb2966c8511d808154d42d"
- ]
- }
+ [
+
+ ],
+ [
+
+ ],
+ [
+
+ ],
+ [
+ "versions.yml:md5,478b816fbd37871f5e8c617833d51d80"
+ ],
+ "test.bam"
],
"meta": {
"nf-test": "0.8.4",
- "nextflow": "24.01.0"
+ "nextflow": "23.10.1"
},
- "timestamp": "2024-02-19T11:11:56.086493265"
+ "timestamp": "2024-05-20T08:44:45.27066093"
},
"Paired-End": {
"content": [
- {
- "0": [
- [
- {
- "id": "test",
- "single_end": false
- },
- "test.bam:md5,aea123a3828a99da1906126355f15a12"
- ]
- ],
- "1": [
- "versions.yml:md5,c32f719a68bb2966c8511d808154d42d"
- ],
- "bam": [
- [
- {
- "id": "test",
- "single_end": false
- },
- "test.bam:md5,aea123a3828a99da1906126355f15a12"
- ]
- ],
- "versions": [
- "versions.yml:md5,c32f719a68bb2966c8511d808154d42d"
- ]
- }
+ [
+
+ ],
+ [
+
+ ],
+ [
+
+ ],
+ [
+ "versions.yml:md5,478b816fbd37871f5e8c617833d51d80"
+ ],
+ "test.bam"
],
"meta": {
"nf-test": "0.8.4",
- "nextflow": "24.01.0"
+ "nextflow": "23.10.1"
},
- "timestamp": "2024-02-19T11:12:03.474974773"
+ "timestamp": "2024-05-20T08:44:57.706852274"
},
"Paired-End Sort": {
"content": [
- {
- "0": [
- [
- {
- "id": "test",
- "single_end": false
- },
- "test.bam:md5,4682087bcdc3617384b375093fecd8dd"
- ]
- ],
- "1": [
- "versions.yml:md5,c32f719a68bb2966c8511d808154d42d"
- ],
- "bam": [
- [
- {
- "id": "test",
- "single_end": false
- },
- "test.bam:md5,4682087bcdc3617384b375093fecd8dd"
- ]
- ],
- "versions": [
- "versions.yml:md5,c32f719a68bb2966c8511d808154d42d"
- ]
- }
+ [
+
+ ],
+ [
+
+ ],
+ [
+
+ ],
+ [
+ "versions.yml:md5,478b816fbd37871f5e8c617833d51d80"
+ ],
+ "test.bam"
],
"meta": {
"nf-test": "0.8.4",
- "nextflow": "24.01.0"
+ "nextflow": "23.10.1"
},
- "timestamp": "2024-02-19T11:12:10.721510817"
+ "timestamp": "2024-05-20T08:45:10.376505036"
+ },
+ "Single-end - stub": {
+ "content": [
+ "test.bam",
+ "test.csi",
+ [
+ "versions.yml:md5,478b816fbd37871f5e8c617833d51d80"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-05-20T08:46:07.182072398"
+ },
+ "Paired-End - no fasta": {
+ "content": [
+ [
+
+ ],
+ [
+
+ ],
+ [
+
+ ],
+ [
+ "versions.yml:md5,478b816fbd37871f5e8c617833d51d80"
+ ],
+ "test.bam"
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-05-20T08:45:53.813076501"
+ },
+ "Paired-end - stub": {
+ "content": [
+ "test.bam",
+ "test.csi",
+ [
+ "versions.yml:md5,478b816fbd37871f5e8c617833d51d80"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-05-20T08:46:18.412916364"
}
}
\ No newline at end of file
diff --git a/modules/pfr/cat/cat/tests/main.nf.test b/modules/pfr/cat/cat/tests/main.nf.test
index fcee2d19..0ac8549e 100644
--- a/modules/pfr/cat/cat/tests/main.nf.test
+++ b/modules/pfr/cat/cat/tests/main.nf.test
@@ -175,4 +175,4 @@ nextflow_process {
)
}
}
-}
+}
\ No newline at end of file
diff --git a/modules/pfr/custom/checkgff3fastacorrespondence/main.nf b/modules/pfr/custom/checkgff3fastacorrespondence/main.nf
deleted file mode 100644
index c1abb6f4..00000000
--- a/modules/pfr/custom/checkgff3fastacorrespondence/main.nf
+++ /dev/null
@@ -1,25 +0,0 @@
-process CUSTOM_CHECKGFF3FASTACORRESPONDENCE {
- tag "$meta.id"
- label 'process_single'
-
- conda "${moduleDir}/environment.yml"
- container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
- 'https://depot.galaxyproject.org/singularity/samtools:1.18--h50ea8bc_1':
- 'biocontainers/samtools:1.18--h50ea8bc_1' }"
-
- input:
- tuple val(meta), path(gff3)
- path(fasta)
-
- output:
- tuple val(meta), path('*.success.log') , emit: success_log , optional: true
- tuple val(meta), path('*.error.log') , emit: error_log , optional: true
- path "versions.yml" , emit: versions
-
- when:
- task.ext.when == null || task.ext.when
-
- shell:
- prefix = task.ext.prefix ?: "${meta.id}"
- template 'check_gff3_fasta_correspondence.sh'
-}
diff --git a/modules/pfr/custom/checkgff3fastacorrespondence/templates/check_gff3_fasta_correspondence.sh b/modules/pfr/custom/checkgff3fastacorrespondence/templates/check_gff3_fasta_correspondence.sh
deleted file mode 100755
index 611c64b3..00000000
--- a/modules/pfr/custom/checkgff3fastacorrespondence/templates/check_gff3_fasta_correspondence.sh
+++ /dev/null
@@ -1,70 +0,0 @@
-#!/usr/bin/env bash
-
-# Bump VERSION on edit
-VERSION="v1"
-
-gff3_file="!{gff3}"
-fasta_file="!{fasta}"
-out_prefix="!{prefix}"
-task_process="!{task.process}"
-
-# Record versions
-cat <<-END_VERSIONS > versions.yml
-"${task_process}":
- samtools: $(echo $(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*$//' )
-END_VERSIONS
-
-# Requires
-# samtools faidx
-
-## STEP 1
-# Check that gff3 has no identifers that are not in fasta (fasta can
-# have ids that are not in gff3 since not all assembly units have gff3 records
-
-# Extract identifiers from the GFF3 file
-gff3_identifiers=$(grep -v '^#' "$gff3_file" | awk '{print $1}' | sort -u)
-
-# Extract identifiers from the FASTA file
-fasta_identifiers=$(grep '^>' "$fasta_file" | awk '{print substr($1, 2)}' | sort -u)
-
-# Compare identifiers and find any that are present in the GFF3 but not in the FASTA
-missing_identifiers=$(comm -23 <(echo "$gff3_identifiers") <(echo "$fasta_identifiers"))
-
-# Check if any missing identifiers were found
-if [[ -n "$missing_identifiers" ]]; then
- touch "${out_prefix}.error.log"
- echo "Failed to validate gff3 file for: $tag_label" >> "${out_prefix}.error.log"
- echo "Fasta file: $fasta_file" >> "${out_prefix}.error.log"
- echo "Gff3 file: $gff3_file" >> "${out_prefix}.error.log"
- echo "GFF3 file contains identifiers not present in FASTA:" >> "${out_prefix}.error.log"
- echo "$missing_identifiers" >> "${out_prefix}.error.log"
- exit 0
-fi
-
-## STEP 2
-# check that there are no coordiantes in gff3 for any seqid that are
-# greater than the seq length of the paretn fasta entry
-
-# Compute sequence lengths using samtools faidx
-samtools faidx "$fasta_file" | cut -f 1,2 > sequence_lengths.txt
-
-# Check GFF3 file for coordinates exceeding sequence lengths
-while IFS=$'\t' read -r seqname source feature start end score strand frame attributes && \
- read -r seq seq_length <&3; do
- if [[ $start -gt $seq_length || $end -gt $seq_length ]]; then
- touch "${out_prefix}.error.log"
- echo "Failed to validate gff3 file for: $tag_label" >> "${out_prefix}.error.log"
- echo "Fasta file: $fasta_file" >> "${out_prefix}.error.log"
- echo "Gff3 file: $gff3_file" >> "${out_prefix}.error.log"
- echo "Coordinates exceed sequence length in GFF3 file:" >> "${out_prefix}.error.log"
- echo "Sequence: $seqname" >> "${out_prefix}.error.log"
- echo "Sequence length: $seq_length" >> "${out_prefix}.error.log"
- echo "Start: $start" >> "${out_prefix}.error.log"
- echo "End: $end" >> "${out_prefix}.error.log"
- exit 0
- fi
-done < "$gff3_file" 3< "sequence_lengths.txt"
-
-touch "${out_prefix}.success.log"
-echo "All tests passed..." >> "${out_prefix}.success.log"
-exit 0
diff --git a/modules/pfr/custom/checkgff3fastacorrespondence/tests/main.nf.test b/modules/pfr/custom/checkgff3fastacorrespondence/tests/main.nf.test
deleted file mode 100644
index 91578e5b..00000000
--- a/modules/pfr/custom/checkgff3fastacorrespondence/tests/main.nf.test
+++ /dev/null
@@ -1,70 +0,0 @@
-nextflow_process {
-
- name "Test Process CUSTOM_CHECKGFF3FASTACORRESPONDENCE"
- script "../main.nf"
- process "CUSTOM_CHECKGFF3FASTACORRESPONDENCE"
-
- tag "modules"
- tag "modules_nfcore"
- tag "custom"
- tag "custom/checkgff3fastacorrespondence"
-
- test("sarscov2-fasta-gff3-success") {
-
- when {
- process {
- """
- input[0] = [
- [ id:'test' ], // meta map
- file(params.test_data['sarscov2']['genome']['genome_gff3'], checkIfExists: true)
- ]
-
- input[1] = [
- file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
- ]
- """
- }
- }
-
- then {
- assertAll(
- { assert process.success },
- { assert snapshot(process.out).match() },
- { assert process.out.error_log == [] },
- { assert process.out.success_log != null },
- { assert path(process.out.success_log.get(0).get(1)).getText().contains("All tests passed...")},
- )
- }
-
- }
-
- test("sarscov2-gff3-homo_sapiens-fasta-error") {
-
- when {
- process {
- """
- input[0] = [
- [ id:'test' ], // meta map
- file(params.test_data['sarscov2']['genome']['genome_gff3'], checkIfExists: true)
- ]
-
- input[1] = [
- file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
- ]
- """
- }
- }
-
- then {
- assertAll(
- { assert process.success },
- { assert snapshot(process.out).match() },
- { assert process.out.success_log == [] },
- { assert process.out.error_log != null },
- { assert path(process.out.error_log.get(0).get(1)).getText().contains("GFF3 file contains identifiers not present in FASTA")},
- )
- }
-
- }
-
-}
diff --git a/modules/pfr/custom/checkgff3fastacorrespondence/tests/tags.yml b/modules/pfr/custom/checkgff3fastacorrespondence/tests/tags.yml
deleted file mode 100644
index 708130d8..00000000
--- a/modules/pfr/custom/checkgff3fastacorrespondence/tests/tags.yml
+++ /dev/null
@@ -1,2 +0,0 @@
-custom/checkgff3fastacorrespondence:
- - "modules/pfr/custom/checkgff3fastacorrespondence/**"
diff --git a/modules/pfr/custom/relabelfasta/environment.yml b/modules/pfr/custom/relabelfasta/environment.yml
new file mode 100644
index 00000000..c45ae6ad
--- /dev/null
+++ b/modules/pfr/custom/relabelfasta/environment.yml
@@ -0,0 +1,10 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
+name: "custom_relabelfasta"
+channels:
+ - conda-forge
+ - bioconda
+ - defaults
+dependencies:
+ - biopython==1.75
+ - python==3.8.13
diff --git a/modules/pfr/custom/relabelfasta/main.nf b/modules/pfr/custom/relabelfasta/main.nf
new file mode 100644
index 00000000..6b851a83
--- /dev/null
+++ b/modules/pfr/custom/relabelfasta/main.nf
@@ -0,0 +1,40 @@
+process CUSTOM_RELABELFASTA {
+ tag "$meta.id"
+ label 'process_single'
+
+ conda "${moduleDir}/environment.yml"
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'https://depot.galaxyproject.org/singularity/biopython:1.75':
+ 'biocontainers/biopython:1.75' }"
+
+ input:
+ tuple val(meta), path(fasta)
+ path(labels)
+
+ output:
+ tuple val(meta), path("*.fasta") , emit: fasta
+ path "versions.yml" , emit: versions
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ script:
+ def args = task.ext.args ?: ''
+ prefix = task.ext.prefix ?: "${meta.id}"
+ if ( "${prefix}.fasta" == "$fasta" ) error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!"
+ template 'relabelfasta.py'
+
+ stub:
+ def args = task.ext.args ?: ''
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ if ( "${prefix}.fasta" == "$fasta" ) error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!"
+ """
+ touch ${prefix}.fasta
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ python: \$(python --version | cut -d' ' -f2)
+ biopython: \$(pip list | grep "biopython" | cut -d' ' -f3)
+ END_VERSIONS
+ """
+}
diff --git a/modules/pfr/custom/relabelfasta/meta.yml b/modules/pfr/custom/relabelfasta/meta.yml
new file mode 100644
index 00000000..7935872a
--- /dev/null
+++ b/modules/pfr/custom/relabelfasta/meta.yml
@@ -0,0 +1,56 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
+name: "custom_relabelfasta"
+description: |
+ Relabels IDs in a fasta file based on a TSV label list
+ consisting of original (first column) and new IDs (second column)
+keywords:
+ - genomics
+ - fasta
+ - relabel
+ - id
+ - rename
+ - sequence
+tools:
+ - "python":
+ description: |
+ Python is a programming language that lets you work quickly
+ and integrate systems more effectively
+ homepage: "https://www.python.org"
+ documentation: "https://docs.python.org/3/"
+ tool_dev_url: "https://github.com/python/cpython"
+ licence: ["MIT"]
+input:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. `[ id:'sample1' ]`
+ - fasta:
+ type: file
+ description: Input fasta file
+ pattern: "*.fasta"
+ - labels:
+ type: file
+ description: |
+ A TSV file with original (first column) and new ids (second column)
+ pattern: "*.tsv"
+
+output:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. `[ id:'sample1' ]`
+ - versions:
+ type: file
+ description: File containing software versions
+ pattern: "versions.yml"
+ - fasta:
+ type: file
+ description: Output fasta file
+ pattern: "*.fasta"
+authors:
+ - "@GallVp"
+maintainers:
+ - "@GallVp"
diff --git a/modules/pfr/custom/relabelfasta/templates/relabelfasta.py b/modules/pfr/custom/relabelfasta/templates/relabelfasta.py
new file mode 100755
index 00000000..039941c6
--- /dev/null
+++ b/modules/pfr/custom/relabelfasta/templates/relabelfasta.py
@@ -0,0 +1,92 @@
+#!/usr/bin/env python3
+
+import re
+from importlib.metadata import version
+from platform import python_version
+
+from Bio import SeqIO
+
+# The input fasta file path
+fasta_file_path = "$fasta"
+labels_file_path = "$labels"
+output_prefix = "$prefix"
+
+
+def create_name_mapping_from_tsv(file_path):
+ dictionary = {}
+
+ with open(file_path) as tsv_file:
+ for line in tsv_file:
+ columns = line.strip().split("\\t")
+ if len(columns) != 2:
+ raise ValueError(f"{file_path} should be a two column TSV file")
+
+ orig_id, new_id = columns[0], columns[1]
+
+ if orig_id in dictionary.keys():
+ raise ValueError(
+ f"{orig_id} is repeated in {file_path}. Each sequence ID should be unique"
+ )
+
+ if new_id in dictionary.values():
+ raise ValueError(
+ f"{new_id} is repeated in {file_path}. Each sequence label should be unique"
+ )
+
+ dictionary[orig_id] = new_id
+
+ return dictionary
+
+
+def write_fasta_with_new_ids(fasta_file_path, orig_to_new_id_map, file_prefix):
+ old_fasta_file_obj = SeqIO.parse(fasta_file_path, "fasta")
+
+ replaced_records = []
+ for record in old_fasta_file_obj:
+ orig_id = record.id
+
+ if orig_id not in orig_to_new_id_map.keys():
+ continue
+
+ new_id = orig_to_new_id_map[orig_id]
+ record.id = new_id
+ record.description = ""
+
+ replaced_records.append(record)
+
+ if replaced_records == []:
+ raise ValueError(
+ f"None of the sequences in {fasta_file_path} are selected by the input label list {orig_to_new_id_map}"
+ )
+
+ selected_ids = [record.id for record in replaced_records]
+ missing_ids = [
+ orig_id
+ for (orig_id, new_id) in orig_to_new_id_map.items()
+ if new_id not in selected_ids
+ ]
+
+ if len(missing_ids) > 0:
+ raise ValueError(
+ f"Some sequences {missing_ids} are missing from the fasta {fasta_file_path}"
+ )
+
+ SeqIO.write(replaced_records, f"{file_prefix}.fasta", "fasta")
+
+
+def match_substrings(substrings, target_string):
+ pattern = "|".join(map(re.escape, substrings))
+ matches = re.findall(pattern, target_string)
+ return matches
+
+
+if __name__ == "__main__":
+ orig_to_new_id_map = create_name_mapping_from_tsv(labels_file_path)
+
+ # Write versions
+ with open("versions.yml", "w") as f_versions:
+ f_versions.write('"${task.process}":\\n')
+ f_versions.write(f" python: {python_version()}\\n")
+ f_versions.write(f" biopython: {version('biopython')}\\n")
+
+ write_fasta_with_new_ids(fasta_file_path, orig_to_new_id_map, output_prefix)
diff --git a/modules/pfr/custom/relabelfasta/tests/main.nf.test b/modules/pfr/custom/relabelfasta/tests/main.nf.test
new file mode 100644
index 00000000..64ccb08f
--- /dev/null
+++ b/modules/pfr/custom/relabelfasta/tests/main.nf.test
@@ -0,0 +1,153 @@
+nextflow_process {
+
+ name "Test Process CUSTOM_RELABELFASTA"
+ script "../main.nf"
+ process "CUSTOM_RELABELFASTA"
+
+ tag "modules"
+ tag "modules_nfcore"
+ tag "custom"
+ tag "custom/relabelfasta"
+
+ test("sarscov2 - fasta") {
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test' ], // meta map
+ file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
+ ]
+ input[1] = Channel.of('MT192765.1\tChr1').collectFile(name: 'id_map.tsv', newLine: true)
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+
+ )
+ }
+
+ }
+
+ test("sarscov2 - fasta - repeated labels") {
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test' ], // meta map
+ file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
+ ]
+ input[1] = Channel.of('MT192765.1\\tChr1\\nMT19276\\tChr1').collectFile(name: 'id_map.tsv', newLine: true)
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert ! process.success },
+ { assert process.stdout.toString().contains('Each sequence label should be unique') }
+
+ )
+ }
+
+ }
+
+ test("sarscov2 - fasta - repeated ids") {
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test' ], // meta map
+ file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
+ ]
+ input[1] = Channel.of('MT192765.1\\tChr1\\nMT192765.1\\tChr1').collectFile(name: 'id_map.tsv', newLine: true)
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert ! process.success },
+ { assert process.stdout.toString().contains('Each sequence ID should be unique') }
+
+ )
+ }
+
+ }
+
+ test("sarscov2 - fasta - malformed") {
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test' ], // meta map
+ file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
+ ]
+ input[1] = Channel.of('MT192765.1\\tChr1\\nMT192765.1').collectFile(name: 'id_map.tsv', newLine: true)
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert ! process.success },
+ { assert process.stdout.toString().contains('should be a two column TSV file') }
+
+ )
+ }
+
+ }
+
+ test("sarscov2 - fasta - missing ids") {
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test' ], // meta map
+ file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
+ ]
+ input[1] = Channel.of('MT192765.1\\tChr1\\nMT192765.2\\tChr2').collectFile(name: 'id_map.tsv', newLine: true)
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert ! process.success },
+ { assert process.stdout.toString().contains('missing from the fasta') }
+
+ )
+ }
+
+ }
+
+ test("sarscov2 - fasta - stub") {
+
+ options '-stub'
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test' ], // meta map
+ file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
+ ]
+ input[1] = Channel.of('MT192765.1\tChr1').collectFile(name: 'id_map.tsv', newLine: true)
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+
+ )
+ }
+
+ }
+
+}
diff --git a/modules/pfr/custom/relabelfasta/tests/main.nf.test.snap b/modules/pfr/custom/relabelfasta/tests/main.nf.test.snap
new file mode 100644
index 00000000..6b1b4ff2
--- /dev/null
+++ b/modules/pfr/custom/relabelfasta/tests/main.nf.test.snap
@@ -0,0 +1,68 @@
+{
+ "sarscov2 - fasta - stub": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test"
+ },
+ "test.fasta:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "1": [
+ "versions.yml:md5,d2052e0039190dca1b50700028ef14c3"
+ ],
+ "fasta": [
+ [
+ {
+ "id": "test"
+ },
+ "test.fasta:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,d2052e0039190dca1b50700028ef14c3"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.1"
+ },
+ "timestamp": "2024-05-25T13:40:44.896721"
+ },
+ "sarscov2 - fasta": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test"
+ },
+ "test.fasta:md5,393572534251b71ec74fbe3b1a31a709"
+ ]
+ ],
+ "1": [
+ "versions.yml:md5,d2052e0039190dca1b50700028ef14c3"
+ ],
+ "fasta": [
+ [
+ {
+ "id": "test"
+ },
+ "test.fasta:md5,393572534251b71ec74fbe3b1a31a709"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,d2052e0039190dca1b50700028ef14c3"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.1"
+ },
+ "timestamp": "2024-05-25T13:38:20.323976"
+ }
+}
\ No newline at end of file
diff --git a/modules/pfr/custom/relabelfasta/tests/tags.yml b/modules/pfr/custom/relabelfasta/tests/tags.yml
new file mode 100644
index 00000000..2db1f658
--- /dev/null
+++ b/modules/pfr/custom/relabelfasta/tests/tags.yml
@@ -0,0 +1,2 @@
+custom/relabelfasta:
+ - "modules/pfr/custom/relabelfasta/**"
diff --git a/modules/pfr/custom/restoregffids/templates/restore_gff_ids.py b/modules/pfr/custom/restoregffids/templates/restore_gff_ids.py
index d0699de0..2bde7abb 100755
--- a/modules/pfr/custom/restoregffids/templates/restore_gff_ids.py
+++ b/modules/pfr/custom/restoregffids/templates/restore_gff_ids.py
@@ -10,7 +10,7 @@
def create_name_mapping_from_tsv(file_path):
dictionary = {}
- with open(file_path, "r") as tsv_file:
+ with open(file_path) as tsv_file:
for line in tsv_file:
columns = line.strip().split("\\t")
if len(columns) != 2:
@@ -24,11 +24,11 @@ def create_name_mapping_from_tsv(file_path):
def restore_gff3_ids(new_to_orig_ids, file_path, output_file_name):
# Write versions
- with open(f"versions.yml", "w") as f_versions:
+ with open("versions.yml", "w") as f_versions:
f_versions.write('"${task.process}":\\n')
f_versions.write(f" python: {python_version()}\\n")
- with open(file_path, "r") as input_gff3_file:
+ with open(file_path) as input_gff3_file:
input_lines = input_gff3_file.readlines()
with open(output_file_name, "w") as output_gff_file:
diff --git a/modules/pfr/custom/restoregffids/tests/main.nf.test b/modules/pfr/custom/restoregffids/tests/main.nf.test
index 521b9248..20aa85c9 100644
--- a/modules/pfr/custom/restoregffids/tests/main.nf.test
+++ b/modules/pfr/custom/restoregffids/tests/main.nf.test
@@ -54,10 +54,10 @@ nextflow_process {
assertAll(
{ assert process.success },
{ assert process.out.restored_ids_gff3 != null },
- { assert snapshot(process.out.versions).match("versions") }
+ { assert snapshot(process.out.versions).match("versions_stub") }
)
}
}
-}
+}
\ No newline at end of file
diff --git a/modules/pfr/custom/restoregffids/tests/main.nf.test.snap b/modules/pfr/custom/restoregffids/tests/main.nf.test.snap
index ffe43e74..ebe850a9 100644
--- a/modules/pfr/custom/restoregffids/tests/main.nf.test.snap
+++ b/modules/pfr/custom/restoregffids/tests/main.nf.test.snap
@@ -28,14 +28,34 @@
]
}
],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
"timestamp": "2023-12-07T13:49:30.047425"
},
+ "versions_stub": {
+ "content": [
+ [
+ "versions.yml:md5,32d31c4f1da9a3d1be013fd163e5867e"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-05-10T15:26:11.66528"
+ },
"versions": {
"content": [
[
"versions.yml:md5,32d31c4f1da9a3d1be013fd163e5867e"
]
],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
"timestamp": "2023-12-07T13:49:30.071175"
}
}
\ No newline at end of file
diff --git a/modules/pfr/custom/shortenfastaids/environment.yml b/modules/pfr/custom/shortenfastaids/environment.yml
index e80fa7cf..a64758c6 100644
--- a/modules/pfr/custom/shortenfastaids/environment.yml
+++ b/modules/pfr/custom/shortenfastaids/environment.yml
@@ -8,4 +8,4 @@ channels:
dependencies:
- biopython==1.75
- - python=3.8
+ - python==3.8.13
diff --git a/modules/pfr/custom/shortenfastaids/main.nf b/modules/pfr/custom/shortenfastaids/main.nf
index 92762ef6..3cdaa07c 100644
--- a/modules/pfr/custom/shortenfastaids/main.nf
+++ b/modules/pfr/custom/shortenfastaids/main.nf
@@ -12,7 +12,7 @@ process CUSTOM_SHORTENFASTAIDS {
output:
tuple val(meta), path("*.short.ids.fasta") , emit: short_ids_fasta , optional: true
- tuple val(meta), path("*.short.ids.tsv") , emit: short_ids_tsv , optional: true
+ tuple val(meta), path("*.short.ids.tsv") , emit: short_ids_tsv
path "versions.yml" , emit: versions
when:
@@ -25,6 +25,10 @@ process CUSTOM_SHORTENFASTAIDS {
stub:
def prefix = task.ext.prefix ?: "${meta.id}"
"""
+ echo \\
+ 'IDs have acceptable length and character. No change required.' \\
+ > ${meta.id}.short.ids.tsv
+
cat <<-END_VERSIONS > versions.yml
"${task.process}":
python: \$(python --version | cut -d' ' -f2)
diff --git a/modules/pfr/custom/shortenfastaids/templates/shorten_fasta_ids.py b/modules/pfr/custom/shortenfastaids/templates/shorten_fasta_ids.py
index 54f35bf6..0ce43903 100755
--- a/modules/pfr/custom/shortenfastaids/templates/shorten_fasta_ids.py
+++ b/modules/pfr/custom/shortenfastaids/templates/shorten_fasta_ids.py
@@ -1,11 +1,11 @@
#!/usr/bin/env python3
import re
-
-from Bio import SeqIO
from importlib.metadata import version
from platform import python_version
+from Bio import SeqIO
+
# The input fasta file path
fasta_file_path = "$fasta"
output_files_prefix = "$prefix"
@@ -122,7 +122,7 @@ def shorten_id_by_pattern_replacement(patterns_dict, id):
for pattern in matches_for_id:
shortened_id = re.sub(
- r"({})".format(re.escape(pattern)),
+ rf"({re.escape(pattern)})",
patterns_dict[pattern],
shortened_id,
)
@@ -158,13 +158,15 @@ def fail_if_new_ids_not_valid(ids):
input_ids = [x[0] for x in input_ids_and_descriptions]
# Write versions
- with open(f"versions.yml", "w") as f_versions:
+ with open("versions.yml", "w") as f_versions:
f_versions.write('"${task.process}":\\n')
f_versions.write(f" python: {python_version()}\\n")
f_versions.write(f" biopython: {version('biopython')}\\n")
if not do_ids_need_to_change(input_ids_and_descriptions):
print("IDs have acceptable length and character. No change required.")
+ with open(f"{output_files_prefix}.short.ids.tsv", "w") as f:
+ f.write("IDs have acceptable length and character. No change required.")
exit(0)
new_ids = shorten_ids(
diff --git a/modules/pfr/custom/shortenfastaids/tests/main.nf.test b/modules/pfr/custom/shortenfastaids/tests/main.nf.test
index dc46bae5..c87e8ebf 100644
--- a/modules/pfr/custom/shortenfastaids/tests/main.nf.test
+++ b/modules/pfr/custom/shortenfastaids/tests/main.nf.test
@@ -5,7 +5,7 @@ nextflow_process {
process "CUSTOM_SHORTENFASTAIDS"
tag "modules"
- tag "modules_nfcore"
+ tag "modules_pfr"
tag "custom"
tag "custom/shortenfastaids"
@@ -25,10 +25,7 @@ nextflow_process {
then {
assertAll(
{ assert process.success },
- { assert snapshot(process.out).match() },
- { assert snapshot(process.out.versions).match("versions") },
- { assert process.out.short_ids_fasta == [] },
- { assert process.out.short_ids_tsv == [] }
+ { assert snapshot(process.out).match() }
)
}
@@ -50,8 +47,7 @@ nextflow_process {
then {
assertAll(
{ assert process.success },
- { assert snapshot(process.out).match() },
- { assert snapshot(process.out.versions).match("versions") }
+ { assert snapshot(process.out).match() }
)
}
@@ -73,8 +69,7 @@ nextflow_process {
then {
assertAll(
{ assert process.success },
- { assert snapshot(process.out).match() },
- { assert snapshot(process.out.versions).match("versions") }
+ { assert snapshot(process.out).match() }
)
}
@@ -95,8 +90,7 @@ nextflow_process {
then {
assertAll(
{ assert process.success },
- { assert snapshot(process.out).match() },
- { assert snapshot(process.out.versions).match("versions") }
+ { assert snapshot(process.out).match() }
)
}
@@ -120,12 +114,10 @@ nextflow_process {
then {
assertAll(
{ assert process.success },
- { assert snapshot(process.out.versions).match("versions") },
- { assert process.out.short_ids_fasta == [] },
- { assert process.out.short_ids_tsv == [] }
+ { assert snapshot(process.out).match() }
)
}
}
-}
+}
\ No newline at end of file
diff --git a/modules/pfr/custom/shortenfastaids/tests/main.nf.test.snap b/modules/pfr/custom/shortenfastaids/tests/main.nf.test.snap
index 8fed1b9d..2506ebd6 100644
--- a/modules/pfr/custom/shortenfastaids/tests/main.nf.test.snap
+++ b/modules/pfr/custom/shortenfastaids/tests/main.nf.test.snap
@@ -42,15 +42,50 @@
]
}
],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
"timestamp": "2023-12-07T13:33:05.523745"
},
- "versions": {
+ "stub": {
"content": [
- [
- "versions.yml:md5,e5704a53ebea373dac3a93ae800d48ba"
- ]
+ {
+ "0": [
+
+ ],
+ "1": [
+ [
+ {
+ "id": "test"
+ },
+ "test.short.ids.tsv:md5,fcf920d9a7b57a1e3c29a9e88673330f"
+ ]
+ ],
+ "2": [
+ "versions.yml:md5,e5704a53ebea373dac3a93ae800d48ba"
+ ],
+ "short_ids_fasta": [
+
+ ],
+ "short_ids_tsv": [
+ [
+ {
+ "id": "test"
+ },
+ "test.short.ids.tsv:md5,fcf920d9a7b57a1e3c29a9e88673330f"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,e5704a53ebea373dac3a93ae800d48ba"
+ ]
+ }
],
- "timestamp": "2023-12-07T13:30:30.361527"
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.2"
+ },
+ "timestamp": "2024-06-04T13:58:30.161542"
},
"homo_sapiens-genome_fasta-no_change": {
"content": [
@@ -59,7 +94,12 @@
],
"1": [
-
+ [
+ {
+ "id": "test"
+ },
+ "test.short.ids.tsv:md5,642382addc4beba37088b1ebe09d38cf"
+ ]
],
"2": [
"versions.yml:md5,e5704a53ebea373dac3a93ae800d48ba"
@@ -68,14 +108,23 @@
],
"short_ids_tsv": [
-
+ [
+ {
+ "id": "test"
+ },
+ "test.short.ids.tsv:md5,642382addc4beba37088b1ebe09d38cf"
+ ]
],
"versions": [
"versions.yml:md5,e5704a53ebea373dac3a93ae800d48ba"
]
}
],
- "timestamp": "2023-12-07T13:32:54.220188"
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.1"
+ },
+ "timestamp": "2024-06-02T20:54:17.945233"
},
"homo_sapiens-genome2_fasta-length_change": {
"content": [
@@ -120,6 +169,10 @@
]
}
],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
"timestamp": "2023-12-07T13:33:01.924483"
},
"sarscov2-genome_fasta-pattern_change": {
@@ -165,6 +218,10 @@
]
}
],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
"timestamp": "2023-12-07T13:32:58.12885"
}
}
\ No newline at end of file
diff --git a/modules/pfr/gffread/environment.yml b/modules/pfr/gffread/environment.yml
new file mode 100644
index 00000000..c6df58ad
--- /dev/null
+++ b/modules/pfr/gffread/environment.yml
@@ -0,0 +1,7 @@
+name: gffread
+channels:
+ - conda-forge
+ - bioconda
+ - defaults
+dependencies:
+ - bioconda::gffread=0.12.7
diff --git a/modules/pfr/gffread/main.nf b/modules/pfr/gffread/main.nf
new file mode 100644
index 00000000..da55cbab
--- /dev/null
+++ b/modules/pfr/gffread/main.nf
@@ -0,0 +1,60 @@
+process GFFREAD {
+ tag "$meta.id"
+ label 'process_low'
+
+ conda "${moduleDir}/environment.yml"
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'https://depot.galaxyproject.org/singularity/gffread:0.12.7--hdcf5f25_4' :
+ 'biocontainers/gffread:0.12.7--hdcf5f25_4' }"
+
+ input:
+ tuple val(meta), path(gff)
+ path fasta
+
+ output:
+ tuple val(meta), path("*.gtf") , emit: gtf , optional: true
+ tuple val(meta), path("*.gff3") , emit: gffread_gff , optional: true
+ tuple val(meta), path("*.fasta"), emit: gffread_fasta , optional: true
+ path "versions.yml" , emit: versions
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ script:
+ def args = task.ext.args ?: ''
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ def extension = args.contains("-T") ? 'gtf' : ( ( ['-w', '-x', '-y' ].any { args.contains(it) } ) ? 'fasta' : 'gff3' )
+ def fasta_arg = fasta ? "-g $fasta" : ''
+ def output_name = "${prefix}.${extension}"
+ def output = extension == "fasta" ? "$output_name" : "-o $output_name"
+ def args_sorted = args.replaceAll(/(.*)(-[wxy])(.*)/) { all, pre, param, post -> "$pre $post $param" }.trim()
+ // args_sorted = Move '-w', '-x', and '-y' to the end of the args string as gffread expects the file name after these parameters
+ if ( "$output_name" in [ "$gff", "$fasta" ] ) error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!"
+ """
+ gffread \\
+ $gff \\
+ $fasta_arg \\
+ $args_sorted \\
+ $output
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ gffread: \$(gffread --version 2>&1)
+ END_VERSIONS
+ """
+
+ stub:
+ def args = task.ext.args ?: ''
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ def extension = args.contains("-T") ? 'gtf' : ( ( ['-w', '-x', '-y' ].any { args.contains(it) } ) ? 'fasta' : 'gff3' )
+ def output_name = "${prefix}.${extension}"
+ if ( "$output_name" in [ "$gff", "$fasta" ] ) error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!"
+ """
+ touch $output_name
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ gffread: \$(gffread --version 2>&1)
+ END_VERSIONS
+ """
+}
diff --git a/modules/pfr/gffread/meta.yml b/modules/pfr/gffread/meta.yml
new file mode 100644
index 00000000..c0602820
--- /dev/null
+++ b/modules/pfr/gffread/meta.yml
@@ -0,0 +1,55 @@
+name: gffread
+description: Validate, filter, convert and perform various other operations on GFF files
+keywords:
+ - gff
+ - conversion
+ - validation
+tools:
+ - gffread:
+ description: GFF/GTF utility providing format conversions, region filtering, FASTA sequence extraction and more.
+ homepage: http://ccb.jhu.edu/software/stringtie/gff.shtml#gffread
+ documentation: http://ccb.jhu.edu/software/stringtie/gff.shtml#gffread
+ tool_dev_url: https://github.com/gpertea/gffread
+ doi: 10.12688/f1000research.23297.1
+ licence: ["MIT"]
+input:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing meta data
+ e.g. [ id:'test' ]
+ - gff:
+ type: file
+ description: A reference file in either the GFF3, GFF2 or GTF format.
+ pattern: "*.{gff, gtf}"
+ - fasta:
+ type: file
+ description: A multi-fasta file with the genomic sequences
+ pattern: "*.{fasta,fa,faa,fas,fsa}"
+output:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing meta data
+ e.g. [ id:'test' ]
+ - gtf:
+ type: file
+ description: GTF file resulting from the conversion of the GFF input file if '-T' argument is present
+ pattern: "*.{gtf}"
+ - gffread_gff:
+ type: file
+ description: GFF3 file resulting from the conversion of the GFF input file if '-T' argument is absent
+ pattern: "*.gff3"
+ - gffread_fasta:
+ type: file
+ description: Fasta file produced when either of '-w', '-x', '-y' parameters is present
+ pattern: "*.fasta"
+ - versions:
+ type: file
+ description: File containing software versions
+ pattern: "versions.yml"
+authors:
+ - "@edmundmiller"
+maintainers:
+ - "@edmundmiller"
+ - "@gallvp"
diff --git a/modules/pfr/gffread/tests/main.nf.test b/modules/pfr/gffread/tests/main.nf.test
new file mode 100644
index 00000000..4cd13dcd
--- /dev/null
+++ b/modules/pfr/gffread/tests/main.nf.test
@@ -0,0 +1,223 @@
+nextflow_process {
+
+ name "Test Process GFFREAD"
+ script "../main.nf"
+ process "GFFREAD"
+
+ tag "gffread"
+ tag "modules_nfcore"
+ tag "modules"
+
+ test("sarscov2-gff3-gtf") {
+
+ config "./nextflow.config"
+
+ when {
+ params {
+ outdir = "$outputDir"
+ }
+ process {
+ """
+ input[0] = [
+ [id: 'test'],
+ file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.gff3", checkIfExists: true)
+ ]
+ input[1] = []
+ """
+ }
+ }
+
+ then {
+ assertAll (
+ { assert process.success },
+ { assert snapshot(process.out).match() },
+ { assert process.out.gffread_gff == [] },
+ { assert process.out.gffread_fasta == [] }
+ )
+ }
+
+ }
+
+ test("sarscov2-gff3-gtf-stub") {
+
+ options '-stub'
+ config "./nextflow.config"
+
+ when {
+ params {
+ outdir = "$outputDir"
+ }
+ process {
+ """
+ input[0] = [
+ [id: 'test'],
+ file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.gff3", checkIfExists: true)
+ ]
+ input[1] = []
+ """
+ }
+ }
+
+ then {
+ assertAll (
+ { assert process.success },
+ { assert snapshot(process.out).match() },
+ { assert process.out.gffread_gff == [] },
+ { assert process.out.gffread_fasta == [] }
+ )
+ }
+
+ }
+
+ test("sarscov2-gff3-gff3") {
+
+ config "./nextflow-gff3.config"
+
+ when {
+ params {
+ outdir = "$outputDir"
+ }
+ process {
+ """
+ input[0] = [
+ [id: 'test'],
+ file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.gff3", checkIfExists: true)
+ ]
+ input[1] = []
+ """
+ }
+ }
+
+ then {
+ assertAll (
+ { assert process.success },
+ { assert snapshot(process.out).match() },
+ { assert process.out.gtf == [] },
+ { assert process.out.gffread_fasta == [] }
+ )
+ }
+
+ }
+
+ test("sarscov2-gff3-gff3-stub") {
+
+ options '-stub'
+ config "./nextflow-gff3.config"
+
+ when {
+ params {
+ outdir = "$outputDir"
+ }
+ process {
+ """
+ input[0] = [
+ [id: 'test'],
+ file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.gff3", checkIfExists: true)
+ ]
+ input[1] = []
+ """
+ }
+ }
+
+ then {
+ assertAll (
+ { assert process.success },
+ { assert snapshot(process.out).match() },
+ { assert process.out.gtf == [] },
+ { assert process.out.gffread_fasta == [] }
+ )
+ }
+
+ }
+
+ test("sarscov2-gff3-fasta") {
+
+ config "./nextflow-fasta.config"
+
+ when {
+ params {
+ outdir = "$outputDir"
+ }
+ process {
+ """
+ input[0] = [
+ [id: 'test'],
+ file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.gff3", checkIfExists: true)
+ ]
+ input[1] = file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.fasta", checkIfExists: true)
+ """
+ }
+ }
+
+ then {
+ assertAll (
+ { assert process.success },
+ { assert snapshot(process.out).match() },
+ { assert process.out.gtf == [] },
+ { assert process.out.gffread_gff == [] }
+ )
+ }
+
+ }
+
+ test("sarscov2-gff3-fasta-stub") {
+
+ options '-stub'
+ config "./nextflow-fasta.config"
+
+ when {
+ params {
+ outdir = "$outputDir"
+ }
+ process {
+ """
+ input[0] = [
+ [id: 'test'],
+ file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.gff3", checkIfExists: true)
+ ]
+ input[1] = file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.fasta", checkIfExists: true)
+ """
+ }
+ }
+
+ then {
+ assertAll (
+ { assert process.success },
+ { assert snapshot(process.out).match() },
+ { assert process.out.gtf == [] },
+ { assert process.out.gffread_gff == [] }
+ )
+ }
+
+ }
+
+ test("sarscov2-gff3-fasta-fail-catch") {
+
+ options '-stub'
+ config "./nextflow-fasta.config"
+
+ when {
+ params {
+ outdir = "$outputDir"
+ }
+ process {
+ """
+ input[0] = [
+ [id: 'genome'],
+ file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.gff3", checkIfExists: true)
+ ]
+ input[1] = file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.fasta", checkIfExists: true)
+ """
+ }
+ }
+
+ then {
+ assertAll (
+ { assert ! process.success },
+ { assert process.stdout.toString().contains("Input and output names are the same") }
+ )
+ }
+
+ }
+
+}
diff --git a/modules/pfr/gffread/tests/main.nf.test.snap b/modules/pfr/gffread/tests/main.nf.test.snap
new file mode 100644
index 00000000..15262320
--- /dev/null
+++ b/modules/pfr/gffread/tests/main.nf.test.snap
@@ -0,0 +1,272 @@
+{
+ "sarscov2-gff3-gtf": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test"
+ },
+ "test.gtf:md5,1ea0ae98d3388e0576407dc4a24ef428"
+ ]
+ ],
+ "1": [
+
+ ],
+ "2": [
+
+ ],
+ "3": [
+ "versions.yml:md5,05f671c6c6e530acedad0af0a5948dbd"
+ ],
+ "gffread_fasta": [
+
+ ],
+ "gffread_gff": [
+
+ ],
+ "gtf": [
+ [
+ {
+ "id": "test"
+ },
+ "test.gtf:md5,1ea0ae98d3388e0576407dc4a24ef428"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,05f671c6c6e530acedad0af0a5948dbd"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-04-09T10:48:56.496187"
+ },
+ "sarscov2-gff3-gff3": {
+ "content": [
+ {
+ "0": [
+
+ ],
+ "1": [
+ [
+ {
+ "id": "test"
+ },
+ "test.gff3:md5,c4e5da6267c6bee5899a2c204ae1ad91"
+ ]
+ ],
+ "2": [
+
+ ],
+ "3": [
+ "versions.yml:md5,05f671c6c6e530acedad0af0a5948dbd"
+ ],
+ "gffread_fasta": [
+
+ ],
+ "gffread_gff": [
+ [
+ {
+ "id": "test"
+ },
+ "test.gff3:md5,c4e5da6267c6bee5899a2c204ae1ad91"
+ ]
+ ],
+ "gtf": [
+
+ ],
+ "versions": [
+ "versions.yml:md5,05f671c6c6e530acedad0af0a5948dbd"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-04-09T10:49:00.892782"
+ },
+ "sarscov2-gff3-gtf-stub": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test"
+ },
+ "test.gtf:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "1": [
+
+ ],
+ "2": [
+
+ ],
+ "3": [
+ "versions.yml:md5,05f671c6c6e530acedad0af0a5948dbd"
+ ],
+ "gffread_fasta": [
+
+ ],
+ "gffread_gff": [
+
+ ],
+ "gtf": [
+ [
+ {
+ "id": "test"
+ },
+ "test.gtf:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,05f671c6c6e530acedad0af0a5948dbd"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-04-09T11:11:26.975666"
+ },
+ "sarscov2-gff3-fasta-stub": {
+ "content": [
+ {
+ "0": [
+
+ ],
+ "1": [
+
+ ],
+ "2": [
+ [
+ {
+ "id": "test"
+ },
+ "test.fasta:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "3": [
+ "versions.yml:md5,05f671c6c6e530acedad0af0a5948dbd"
+ ],
+ "gffread_fasta": [
+ [
+ {
+ "id": "test"
+ },
+ "test.fasta:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "gffread_gff": [
+
+ ],
+ "gtf": [
+
+ ],
+ "versions": [
+ "versions.yml:md5,05f671c6c6e530acedad0af0a5948dbd"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-04-09T11:11:44.34792"
+ },
+ "sarscov2-gff3-gff3-stub": {
+ "content": [
+ {
+ "0": [
+
+ ],
+ "1": [
+ [
+ {
+ "id": "test"
+ },
+ "test.gff3:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "2": [
+
+ ],
+ "3": [
+ "versions.yml:md5,05f671c6c6e530acedad0af0a5948dbd"
+ ],
+ "gffread_fasta": [
+
+ ],
+ "gffread_gff": [
+ [
+ {
+ "id": "test"
+ },
+ "test.gff3:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "gtf": [
+
+ ],
+ "versions": [
+ "versions.yml:md5,05f671c6c6e530acedad0af0a5948dbd"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-04-09T11:11:35.221671"
+ },
+ "sarscov2-gff3-fasta": {
+ "content": [
+ {
+ "0": [
+
+ ],
+ "1": [
+
+ ],
+ "2": [
+ [
+ {
+ "id": "test"
+ },
+ "test.fasta:md5,5f8108fb51739a0588ccf0a251de919a"
+ ]
+ ],
+ "3": [
+ "versions.yml:md5,05f671c6c6e530acedad0af0a5948dbd"
+ ],
+ "gffread_fasta": [
+ [
+ {
+ "id": "test"
+ },
+ "test.fasta:md5,5f8108fb51739a0588ccf0a251de919a"
+ ]
+ ],
+ "gffread_gff": [
+
+ ],
+ "gtf": [
+
+ ],
+ "versions": [
+ "versions.yml:md5,05f671c6c6e530acedad0af0a5948dbd"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-04-09T10:54:02.88143"
+ }
+}
\ No newline at end of file
diff --git a/modules/pfr/gffread/tests/nextflow-fasta.config b/modules/pfr/gffread/tests/nextflow-fasta.config
new file mode 100644
index 00000000..ac6cb148
--- /dev/null
+++ b/modules/pfr/gffread/tests/nextflow-fasta.config
@@ -0,0 +1,5 @@
+process {
+ withName: GFFREAD {
+ ext.args = '-w -S'
+ }
+}
diff --git a/modules/pfr/gffread/tests/nextflow-gff3.config b/modules/pfr/gffread/tests/nextflow-gff3.config
new file mode 100644
index 00000000..afe0830e
--- /dev/null
+++ b/modules/pfr/gffread/tests/nextflow-gff3.config
@@ -0,0 +1,5 @@
+process {
+ withName: GFFREAD {
+ ext.args = ''
+ }
+}
diff --git a/modules/pfr/gffread/tests/nextflow.config b/modules/pfr/gffread/tests/nextflow.config
new file mode 100644
index 00000000..74b25094
--- /dev/null
+++ b/modules/pfr/gffread/tests/nextflow.config
@@ -0,0 +1,5 @@
+process {
+ withName: GFFREAD {
+ ext.args = '-T'
+ }
+}
diff --git a/modules/pfr/gffread/tests/tags.yml b/modules/pfr/gffread/tests/tags.yml
new file mode 100644
index 00000000..05576065
--- /dev/null
+++ b/modules/pfr/gffread/tests/tags.yml
@@ -0,0 +1,2 @@
+gffread:
+ - modules/nf-core/gffread/**
diff --git a/modules/pfr/gt/gff3/main.nf b/modules/pfr/gt/gff3/main.nf
index d27e2bb9..6324a391 100644
--- a/modules/pfr/gt/gff3/main.nf
+++ b/modules/pfr/gt/gff3/main.nf
@@ -27,7 +27,7 @@ process GT_GFF3 {
$args \\
"$gff3" \\
> "${prefix}.gt.gff3" \\
- 2> "${prefix}.error.log" \\
+ 2> >(tee "${prefix}.error.log" >&2) \\
|| echo "Errors from gt-gff3 printed to ${prefix}.error.log"
if grep -q "gt gff3: error:" "${prefix}.error.log"; then
diff --git a/modules/pfr/gt/gff3/tests/main.nf.test.snap b/modules/pfr/gt/gff3/tests/main.nf.test.snap
index f31e8d1c..6e454f18 100644
--- a/modules/pfr/gt/gff3/tests/main.nf.test.snap
+++ b/modules/pfr/gt/gff3/tests/main.nf.test.snap
@@ -32,7 +32,7 @@
]
}
],
- "timestamp": "2023-11-28T13:43:34.620429"
+ "timestamp": "2023-11-29T10:42:11.408352"
},
"sarscov2-gff3-valid": {
"content": [
@@ -67,6 +67,6 @@
]
}
],
- "timestamp": "2023-11-28T13:43:31.065832"
+ "timestamp": "2023-11-29T10:42:07.817894"
}
}
\ No newline at end of file
diff --git a/modules/pfr/gt/gff3/tests/tags.yml b/modules/pfr/gt/gff3/tests/tags.yml
index ae040309..0ce15a90 100644
--- a/modules/pfr/gt/gff3/tests/tags.yml
+++ b/modules/pfr/gt/gff3/tests/tags.yml
@@ -1,2 +1,2 @@
gt/gff3:
- - "modules/pfr/gt/gff3/**"
+ - "modules/nf-core/gt/gff3/**"
diff --git a/modules/pfr/gt/gff3validator/main.nf b/modules/pfr/gt/gff3validator/main.nf
index ae7ec9e7..8c6899ca 100644
--- a/modules/pfr/gt/gff3validator/main.nf
+++ b/modules/pfr/gt/gff3validator/main.nf
@@ -24,22 +24,22 @@ process GT_GFF3VALIDATOR {
gt \\
gff3validator \\
"$gff3" \\
- > "${prefix}.success.log" \\
- 2> "${prefix}.error.log" \\
+ > "${prefix}.stdout" \\
+ 2> >(tee "${prefix}.stderr" >&2) \\
|| echo "Errors from gt-gff3validator printed to ${prefix}.error.log"
- if grep -q "input is valid GFF3" "${prefix}.success.log"; then
+ if grep -q "input is valid GFF3" "${prefix}.stdout"; then
echo "Validation successful..."
-
+ # emit stdout to the success output channel
mv \\
- "${prefix}.error.log" \\
- gt_gff3validator.stderr
+ "${prefix}.stdout" \\
+ "${prefix}.success.log"
else
echo "Validation failed..."
-
+ # emit stderr to the error output channel
mv \\
- "${prefix}.success.log" \\
- gt_gff3validator.stdout
+ "${prefix}.stderr" \\
+ "${prefix}.error.log"
fi
cat <<-END_VERSIONS > versions.yml
@@ -47,4 +47,15 @@ process GT_GFF3VALIDATOR {
genometools: \$(gt --version | head -1 | sed 's/gt (GenomeTools) //')
END_VERSIONS
"""
+
+ stub:
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ """
+ touch "${prefix}.success.log"
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ genometools: \$(gt --version | head -1 | sed 's/gt (GenomeTools) //')
+ END_VERSIONS
+ """
}
diff --git a/modules/pfr/gt/gff3validator/tests/main.nf.test b/modules/pfr/gt/gff3validator/tests/main.nf.test
index 1b99e551..766e3e74 100644
--- a/modules/pfr/gt/gff3validator/tests/main.nf.test
+++ b/modules/pfr/gt/gff3validator/tests/main.nf.test
@@ -8,18 +8,31 @@ nextflow_process {
tag "modules_nfcore"
tag "gt"
tag "gt/gff3validator"
+ tag "gt/gff3"
- test("custom-gff3-valid") {
+ test("sarscov2-gff3-valid") {
+
+ config './nextflow.config'
+
+ setup {
+ run("GT_GFF3") {
+ script "../../../../nf-core/gt/gff3"
+
+ process {
+ """
+ input[0] = [
+ [ id:'test' ],
+ file(params.test_data['sarscov2']['genome']['genome_gff3'], checkIfExists: true)
+ ]
+ """
+ }
+ }
+ }
when {
process {
"""
- input[0] = Channel.of(
- '##gff-version 3',
- 'chr22\thavana\tpseudogene\t16572027\t16574637\t.\t+\t.\tID=gene:ENSG00000233995;Name=AP000547.1'
- )
- .collectFile(name: 'sample.gff3', newLine: true)
- .map { file -> [ [ id:'test' ], file ] }
+ input[0] = GT_GFF3.out.gt_gff3
"""
}
}
@@ -28,9 +41,32 @@ nextflow_process {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() },
- { assert process.out.error_log == [] },
- { assert process.out.success_log != null },
- { assert path(process.out.success_log.get(0).get(1)).getText().contains("input is valid GFF3") }
+ { assert path(process.out.success_log[0][1]).text.contains("input is valid GFF3") },
+ { assert process.out.error_log == [] }
+ )
+ }
+
+ }
+
+ test("sarscov2-gff3-stub") {
+
+ options '-stub'
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test' ],
+ file(params.test_data['sarscov2']['genome']['genome_gff3'], checkIfExists: true)
+ ]
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() }
)
}
@@ -54,8 +90,7 @@ nextflow_process {
{ assert process.success },
{ assert snapshot(process.out).match() },
{ assert process.out.success_log == [] },
- { assert process.out.error_log != null },
- { assert path(process.out.error_log.get(0).get(1)).getText().contains("gt gff3validator: error:") }
+ { assert path(process.out.error_log[0][1]).text.contains("gt gff3validator: error:") }
)
}
diff --git a/modules/pfr/gt/gff3validator/tests/main.nf.test.snap b/modules/pfr/gt/gff3validator/tests/main.nf.test.snap
index 0b6f065a..ccc83f90 100644
--- a/modules/pfr/gt/gff3validator/tests/main.nf.test.snap
+++ b/modules/pfr/gt/gff3validator/tests/main.nf.test.snap
@@ -1,4 +1,39 @@
{
+ "sarscov2-gff3-stub": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test"
+ },
+ "test.success.log:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "1": [
+
+ ],
+ "2": [
+ "versions.yml:md5,5927673eb73a8c22408643d224414215"
+ ],
+ "error_log": [
+
+ ],
+ "success_log": [
+ [
+ {
+ "id": "test"
+ },
+ "test.success.log:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,5927673eb73a8c22408643d224414215"
+ ]
+ }
+ ],
+ "timestamp": "2024-01-11T09:29:13.14468"
+ },
"sarscov2-gff3-invalid": {
"content": [
{
@@ -34,7 +69,7 @@
],
"timestamp": "2023-11-29T11:09:23.708792"
},
- "custom-gff3-valid": {
+ "sarscov2-gff3-valid": {
"content": [
{
"0": [
@@ -67,6 +102,6 @@
]
}
],
- "timestamp": "2023-11-29T11:09:19.530068"
+ "timestamp": "2023-12-13T10:34:54.485391"
}
}
\ No newline at end of file
diff --git a/modules/pfr/gt/gff3validator/tests/nextflow.config b/modules/pfr/gt/gff3validator/tests/nextflow.config
new file mode 100644
index 00000000..c69915da
--- /dev/null
+++ b/modules/pfr/gt/gff3validator/tests/nextflow.config
@@ -0,0 +1,5 @@
+process {
+ withName: GT_GFF3 {
+ ext.args = '-tidy -retainids'
+ }
+}
diff --git a/modules/pfr/gt/gff3validator/tests/tags.yml b/modules/pfr/gt/gff3validator/tests/tags.yml
index e247d55e..98c39ffe 100644
--- a/modules/pfr/gt/gff3validator/tests/tags.yml
+++ b/modules/pfr/gt/gff3validator/tests/tags.yml
@@ -1,2 +1,2 @@
gt/gff3validator:
- - "modules/pfr/gt/gff3validator/**"
+ - "modules/nf-core/gt/gff3validator/**"
diff --git a/modules/pfr/gt/stat/main.nf b/modules/pfr/gt/stat/main.nf
index 3308b562..0fbc936b 100644
--- a/modules/pfr/gt/stat/main.nf
+++ b/modules/pfr/gt/stat/main.nf
@@ -11,21 +11,32 @@ process GT_STAT {
tuple val(meta), path(gff3)
output:
- tuple val(meta), path("*.gt.stat.yml") , emit: stats
+ tuple val(meta), path("${prefix}.yml") , emit: stats
path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
- def args = task.ext.args ?: ''
- def prefix = task.ext.prefix ?: "${meta.id}"
+ def args = task.ext.args ?: ''
+ prefix = task.ext.prefix ?: "${meta.id}"
"""
gt \\
stat \\
$args \\
- "$gff3" \\
- > "${prefix}.gt.stat.yml"
+ $gff3 \\
+ > ${prefix}.yml
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ genometools: \$(gt --version | head -1 | sed 's/gt (GenomeTools) //')
+ END_VERSIONS
+ """
+
+ stub:
+ prefix = task.ext.prefix ?: "${meta.id}"
+ """
+ touch ${prefix}.yml
cat <<-END_VERSIONS > versions.yml
"${task.process}":
diff --git a/modules/pfr/gt/stat/meta.yml b/modules/pfr/gt/stat/meta.yml
index 203059a6..fa477f34 100644
--- a/modules/pfr/gt/stat/meta.yml
+++ b/modules/pfr/gt/stat/meta.yml
@@ -35,7 +35,7 @@ output:
- stats:
type: file
description: Stats file in yaml format
- pattern: "*.gt.stat.yml"
+ pattern: "*.yml"
- versions:
type: file
description: File containing software versions
diff --git a/modules/pfr/gt/stat/tests/main.nf.test b/modules/pfr/gt/stat/tests/main.nf.test
index 57f5992c..d1fe2f0d 100644
--- a/modules/pfr/gt/stat/tests/main.nf.test
+++ b/modules/pfr/gt/stat/tests/main.nf.test
@@ -16,7 +16,7 @@ nextflow_process {
"""
input[0] = Channel.of(
"##gff-version 3" +
- file(params.test_data['sarscov2']['genome']['genome_gff3'], checkIfExists: true).getText().toLowerCase()
+ file(params.test_data['sarscov2']['genome']['genome_gff3'], checkIfExists: true).text.toLowerCase()
)
.collectFile(name: 'sample.gff3', newLine: true)
.map { file -> [ [ id:'test' ], file ] }
@@ -28,10 +28,35 @@ nextflow_process {
assertAll(
{ assert process.success },
{ assert snapshot(process.out).match() },
- { assert path(process.out.stats.get(0).get(1)).getText().contains("cdss: 12") }
+ { assert path(process.out.stats[0][1]).text.contains("cdss: 12") }
)
}
}
-}
+ test("sarscov2-gff3-stub") {
+
+ options '-stub'
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id: 'test' ],
+ file(params.test_data['sarscov2']['genome']['genome_gff3'], checkIfExists: true)
+ ]
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert process.out.stats != null }, // md5sum for empty files not allowed by nf-core/tools 2.14.0
+ { assert snapshot(process.out.versions).match("stub_versions") }
+ )
+ }
+
+ }
+
+}
\ No newline at end of file
diff --git a/modules/pfr/gt/stat/tests/main.nf.test.snap b/modules/pfr/gt/stat/tests/main.nf.test.snap
index 2fcfb8a8..481ceb1d 100644
--- a/modules/pfr/gt/stat/tests/main.nf.test.snap
+++ b/modules/pfr/gt/stat/tests/main.nf.test.snap
@@ -7,7 +7,7 @@
{
"id": "test"
},
- "test.gt.stat.yml:md5,ebba7831ddbf916b8bbea675ba8693b5"
+ "test.yml:md5,ebba7831ddbf916b8bbea675ba8693b5"
]
],
"1": [
@@ -18,7 +18,7 @@
{
"id": "test"
},
- "test.gt.stat.yml:md5,ebba7831ddbf916b8bbea675ba8693b5"
+ "test.yml:md5,ebba7831ddbf916b8bbea675ba8693b5"
]
],
"versions": [
@@ -26,6 +26,22 @@
]
}
],
- "timestamp": "2023-11-29T11:34:48.057277"
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-05-09T19:29:08.925383"
+ },
+ "stub_versions": {
+ "content": [
+ [
+ "versions.yml:md5,a184b50afb2ad6dd2d3d37b0a211dd71"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-05-09T19:29:12.808817"
}
}
\ No newline at end of file
diff --git a/modules/pfr/gt/stat/tests/tags.yml b/modules/pfr/gt/stat/tests/tags.yml
index 46be6341..baef3b08 100644
--- a/modules/pfr/gt/stat/tests/tags.yml
+++ b/modules/pfr/gt/stat/tests/tags.yml
@@ -1,2 +1,2 @@
gt/stat:
- - "modules/pfr/gt/stat/**"
+ - "modules/nf-core/gt/stat/**"
diff --git a/modules/pfr/ltrfinder/tests/main.nf.test b/modules/pfr/ltrfinder/tests/main.nf.test
index 447ce34d..361e7324 100644
--- a/modules/pfr/ltrfinder/tests/main.nf.test
+++ b/modules/pfr/ltrfinder/tests/main.nf.test
@@ -69,4 +69,4 @@ nextflow_process {
}
-}
+}
\ No newline at end of file
diff --git a/modules/pfr/ltrharvest/tests/main.nf.test b/modules/pfr/ltrharvest/tests/main.nf.test
index 9226bc0c..76b49476 100644
--- a/modules/pfr/ltrharvest/tests/main.nf.test
+++ b/modules/pfr/ltrharvest/tests/main.nf.test
@@ -57,4 +57,4 @@ nextflow_process {
}
-}
+}
\ No newline at end of file
diff --git a/modules/pfr/ltrretriever/lai/tests/main.nf.test b/modules/pfr/ltrretriever/lai/tests/main.nf.test
index df7db2cf..a617811b 100644
--- a/modules/pfr/ltrretriever/lai/tests/main.nf.test
+++ b/modules/pfr/ltrretriever/lai/tests/main.nf.test
@@ -163,4 +163,4 @@ nextflow_process {
}
-}
+}
\ No newline at end of file
diff --git a/modules/pfr/ltrretriever/ltrretriever/tests/main.nf.test b/modules/pfr/ltrretriever/ltrretriever/tests/main.nf.test
index f6ab43db..e8e13a60 100644
--- a/modules/pfr/ltrretriever/ltrretriever/tests/main.nf.test
+++ b/modules/pfr/ltrretriever/ltrretriever/tests/main.nf.test
@@ -130,4 +130,4 @@ nextflow_process {
}
-}
+}
\ No newline at end of file
diff --git a/modules/pfr/merqury/hapmers/environment.yml b/modules/pfr/merqury/hapmers/environment.yml
new file mode 100644
index 00000000..5f7977cd
--- /dev/null
+++ b/modules/pfr/merqury/hapmers/environment.yml
@@ -0,0 +1,9 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
+name: "merqury_hapmers"
+channels:
+ - conda-forge
+ - bioconda
+ - defaults
+dependencies:
+ - "bioconda::merqury=1.3"
diff --git a/modules/pfr/merqury/hapmers/main.nf b/modules/pfr/merqury/hapmers/main.nf
new file mode 100644
index 00000000..22dd66d5
--- /dev/null
+++ b/modules/pfr/merqury/hapmers/main.nf
@@ -0,0 +1,96 @@
+process MERQURY_HAPMERS {
+ tag "$meta.id"
+ label 'process_single'
+
+ conda "${moduleDir}/environment.yml"
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'https://depot.galaxyproject.org/singularity/merqury:1.3--hdfd78af_1':
+ 'biocontainers/merqury:1.3--hdfd78af_1' }"
+
+ input:
+ tuple val(meta), path(child_meryl, stageAs: 'child.meryl')
+ path(maternal_meryl, stageAs: 'mat.meryl')
+ path(paternal_meryl, stageAs: 'pat.meryl')
+
+ output:
+ tuple val(meta), path('*_mat.hapmer.meryl') , emit: mat_hapmer_meryl
+ tuple val(meta), path('*_pat.hapmer.meryl') , emit: pat_hapmer_meryl
+ tuple val(meta), path('*_inherited_hapmers.fl.png') , emit: inherited_hapmers_fl_png
+ tuple val(meta), path('*_inherited_hapmers.ln.png') , emit: inherited_hapmers_ln_png
+ tuple val(meta), path('*_inherited_hapmers.st.png') , emit: inherited_hapmers_st_png
+ path "versions.yml" , emit: versions
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ script:
+ def args = task.ext.args ?: ''
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ def VERSION = 1.3
+ """
+ # Nextflow changes the container --entrypoint to /bin/bash (container default entrypoint: /usr/local/env-execute)
+ # Check for container variable initialisation script and source it.
+ if [ -f "/usr/local/env-activate.sh" ]; then
+ set +u # Otherwise, errors out because of various unbound variables
+ . "/usr/local/env-activate.sh"
+ set -u
+ fi
+
+ \$MERQURY/trio/hapmers.sh \\
+ mat.meryl \\
+ pat.meryl\\
+ child.meryl \\
+ $args
+
+ mv mat.hapmer.meryl ${prefix}_mat.hapmer.meryl
+ mv pat.hapmer.meryl ${prefix}_pat.hapmer.meryl
+ mv inherited_hapmers.fl.png ${prefix}_inherited_hapmers.fl.png
+ mv inherited_hapmers.ln.png ${prefix}_inherited_hapmers.ln.png
+ mv inherited_hapmers.st.png ${prefix}_inherited_hapmers.st.png
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ merqury: $VERSION
+ END_VERSIONS
+ """
+
+ stub:
+ def args = task.ext.args ?: ''
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ def VERSION = 1.3
+ """
+ # Nextflow changes the container --entrypoint to /bin/bash (container default entrypoint: /usr/local/env-execute)
+ # Check for container variable initialisation script and source it.
+ if [ -f "/usr/local/env-activate.sh" ]; then
+ set +u # Otherwise, errors out because of various unbound variables
+ . "/usr/local/env-activate.sh"
+ set -u
+ fi
+
+ echo \\
+ "\$MERQURY/trio/hapmers.sh \\
+ mat.meryl \\
+ pat.meryl\\
+ child.meryl \\
+ $args"
+
+ mkdir ${prefix}_mat.hapmer.meryl
+ touch ${prefix}_mat.hapmer.meryl/0x000000.merylData
+ touch ${prefix}_mat.hapmer.meryl/0x000000.merylIndex
+ touch ${prefix}_mat.hapmer.meryl/merylIndex
+
+ mkdir ${prefix}_pat.hapmer.meryl
+ touch ${prefix}_pat.hapmer.meryl/0x000000.merylData
+ touch ${prefix}_pat.hapmer.meryl/0x000000.merylIndex
+ touch ${prefix}_pat.hapmer.meryl/merylIndex
+
+ touch ${prefix}_inherited_hapmers.fl.png
+ touch ${prefix}_inherited_hapmers.ln.png
+ touch ${prefix}_inherited_hapmers.st.png
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ merqury: $VERSION
+ END_VERSIONS
+ """
+}
diff --git a/modules/pfr/merqury/hapmers/meta.yml b/modules/pfr/merqury/hapmers/meta.yml
new file mode 100644
index 00000000..6693f540
--- /dev/null
+++ b/modules/pfr/merqury/hapmers/meta.yml
@@ -0,0 +1,70 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
+name: "merqury_hapmers"
+description: A script to generate hap-mer dbs for trios
+keywords:
+ - genomics
+ - quality check
+ - qc
+ - kmer
+tools:
+ - "merqury":
+ description: "Evaluate genome assemblies with k-mers and more."
+ tool_dev_url: "https://github.com/marbl/merqury"
+ doi: "10.1186/s13059-020-02134-9"
+ licence: ["United States Government Work"]
+
+input:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. `[ id:'sample1' ]`
+ - child_meryl:
+ type: directory
+ description: Childs' k-mers (all, from WGS reads)
+ pattern: "*.meryl"
+ - maternal_meryl:
+ type: directory
+ description: Haplotype1 k-mers (all, ex. maternal)
+ pattern: "*.meryl"
+ - paternal_meryl:
+ type: directory
+ description: Haplotype2 k-mers (all, ex. paternal)
+ pattern: "*.meryl"
+
+output:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. `[ id:'sample1' ]`
+ - versions:
+ type: file
+ description: File containing software versions
+ pattern: "versions.yml"
+ - mat_hapmer_meryl:
+ type: directory
+ description: Inherited maternal hap-mer dbs
+ pattern: "*_mat.hapmer.meryl"
+ - pat_hapmer_meryl:
+ type: directory
+ description: Inherited paternal hap-mer dbs
+ pattern: "*_pat.hapmer.meryl"
+ - inherited_hapmers_fl_png:
+ type: file
+ description: k-mer distribution of the inherited dbs and cutoffs used to generate hap-mer dbs
+ pattern: "*_inherited_hapmers.fl.png"
+ - inherited_hapmers_ln_png:
+ type: file
+ description: k-mer distribution of the inherited dbs and cutoffs used to generate hap-mer dbs
+ pattern: "*_inherited_hapmers.ln.png"
+ - inherited_hapmers_st_png:
+ type: file
+ description: k-mer distribution of the inherited dbs and cutoffs used to generate hap-mer dbs
+ pattern: "*_inherited_hapmers.st.png"
+
+authors:
+ - "@GallVp"
+maintainers:
+ - "@GallVp"
diff --git a/modules/pfr/merqury/hapmers/tests/main.nf.test b/modules/pfr/merqury/hapmers/tests/main.nf.test
new file mode 100644
index 00000000..9a254aaf
--- /dev/null
+++ b/modules/pfr/merqury/hapmers/tests/main.nf.test
@@ -0,0 +1,113 @@
+nextflow_process {
+
+ name "Test Process MERQURY_HAPMERS"
+ script "../main.nf"
+ process "MERQURY_HAPMERS"
+
+ tag "modules"
+ tag "modules_nfcore"
+ tag "merqury"
+ tag "merqury/hapmers"
+ tag "meryl/count"
+
+ setup {
+ run('MERYL_COUNT') {
+ script "../../../meryl/count"
+ process {
+ """
+ input[0] = [
+ [ id:'child' ], // meta map
+ file(params.test_data['homo_sapiens']['illumina']['test2_1_fastq_gz'], checkIfExists: true)
+ ]
+ input[1] = 21
+ """
+ }
+ }
+
+ run('MERYL_COUNT', alias: 'MERYL_COUNT_MATERNAL') {
+ script "../../../meryl/count"
+ process {
+ """
+ input[0] = [
+ [ id:'maternal' ], // meta map
+ file(params.test_data['homo_sapiens']['illumina']['test_1_fastq_gz'], checkIfExists: true)
+ ]
+ input[1] = 21
+ """
+ }
+ }
+
+ run('MERYL_COUNT', alias: 'MERYL_COUNT_PATERNAL') {
+ script "../../../meryl/count"
+ process {
+ """
+ input[0] = [
+ [ id:'paternal' ], // meta map
+ file(params.test_data['homo_sapiens']['illumina']['test_2_fastq_gz'], checkIfExists: true)
+ ]
+ input[1] = 21
+ """
+ }
+ }
+ }
+
+ test("homo_sapiens") {
+
+ when {
+ process {
+ """
+ input[0] = MERYL_COUNT.out.meryl_db
+ input[1] = MERYL_COUNT_MATERNAL.out.meryl_db.map { meta, db -> db }
+ input[2] = MERYL_COUNT_PATERNAL.out.meryl_db.map { meta, db -> db }
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out.mat_hapmer_meryl).md5().match("mat_hapmer_meryl") },
+ { assert snapshot(process.out.pat_hapmer_meryl).md5().match("pat_hapmer_meryl") },
+ { assert snapshot(
+ process.out.versions,
+ file(process.out.inherited_hapmers_fl_png[0][1]).name,
+ file(process.out.inherited_hapmers_ln_png[0][1]).name,
+ file(process.out.inherited_hapmers_st_png[0][1]).name
+ ).match()
+ },
+ )
+ }
+ }
+
+ test("homo_sapiens-stub") {
+
+ options '-stub'
+
+ when {
+ process {
+ """
+ input[0] = MERYL_COUNT.out.meryl_db
+ input[1] = MERYL_COUNT_MATERNAL.out.meryl_db.map { meta, db -> db }
+ input[2] = MERYL_COUNT_PATERNAL.out.meryl_db.map { meta, db -> db }
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out.mat_hapmer_meryl).md5().match("mat_hapmer_meryl_stub") },
+ { assert snapshot(process.out.pat_hapmer_meryl).md5().match("pat_hapmer_meryl_stub") },
+ { assert snapshot(
+ process.out.versions,
+ file(process.out.inherited_hapmers_fl_png[0][1]).name,
+ file(process.out.inherited_hapmers_ln_png[0][1]).name,
+ file(process.out.inherited_hapmers_st_png[0][1]).name
+ ).match()
+ },
+ )
+ }
+
+ }
+
+}
\ No newline at end of file
diff --git a/modules/pfr/merqury/hapmers/tests/main.nf.test.snap b/modules/pfr/merqury/hapmers/tests/main.nf.test.snap
new file mode 100644
index 00000000..3a0d4902
--- /dev/null
+++ b/modules/pfr/merqury/hapmers/tests/main.nf.test.snap
@@ -0,0 +1,64 @@
+{
+ "homo_sapiens": {
+ "content": [
+ [
+ "versions.yml:md5,e786e8381bfc64b2fdc423a67f60763e"
+ ],
+ "child_inherited_hapmers.fl.png",
+ "child_inherited_hapmers.ln.png",
+ "child_inherited_hapmers.st.png"
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-05-21T22:26:25.962754"
+ },
+ "mat_hapmer_meryl": {
+ "content": "ccc510b93c249fb07688cf3e4eecd669",
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-05-21T22:26:25.853929"
+ },
+ "pat_hapmer_meryl": {
+ "content": "003a22d74da4ff265c2224c980d1b9d8",
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-05-21T22:26:25.88616"
+ },
+ "mat_hapmer_meryl_stub": {
+ "content": "59c648572f6591567baecccf5c3556ed",
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-05-21T22:26:58.081794"
+ },
+ "pat_hapmer_meryl_stub": {
+ "content": "59c648572f6591567baecccf5c3556ed",
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-05-21T22:26:58.084766"
+ },
+ "homo_sapiens-stub": {
+ "content": [
+ [
+ "versions.yml:md5,e786e8381bfc64b2fdc423a67f60763e"
+ ],
+ "child_inherited_hapmers.fl.png",
+ "child_inherited_hapmers.ln.png",
+ "child_inherited_hapmers.st.png"
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-05-21T22:26:58.142356"
+ }
+}
\ No newline at end of file
diff --git a/modules/pfr/merqury/hapmers/tests/tags.yml b/modules/pfr/merqury/hapmers/tests/tags.yml
new file mode 100644
index 00000000..553f09d8
--- /dev/null
+++ b/modules/pfr/merqury/hapmers/tests/tags.yml
@@ -0,0 +1,2 @@
+merqury/hapmers:
+ - "modules/pfr/merqury/hapmers/**"
diff --git a/modules/pfr/plotsr/environment.yml b/modules/pfr/plotsr/environment.yml
new file mode 100644
index 00000000..39f05f88
--- /dev/null
+++ b/modules/pfr/plotsr/environment.yml
@@ -0,0 +1,9 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
+name: "plotsr"
+channels:
+ - conda-forge
+ - bioconda
+ - defaults
+dependencies:
+ - "bioconda::plotsr=1.1.1"
diff --git a/modules/pfr/plotsr/main.nf b/modules/pfr/plotsr/main.nf
new file mode 100644
index 00000000..664c1a24
--- /dev/null
+++ b/modules/pfr/plotsr/main.nf
@@ -0,0 +1,91 @@
+process PLOTSR {
+ tag "$meta.id"
+ label 'process_single'
+
+ conda "${moduleDir}/environment.yml"
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'https://depot.galaxyproject.org/singularity/plotsr:1.1.1--pyh7cba7a3_0':
+ 'biocontainers/plotsr:1.1.1--pyh7cba7a3_0' }"
+
+ input:
+ tuple val(meta), path(syri)
+ path(fastas)
+ val(genomes)
+ path(bedpe)
+ path(markers)
+ path(tracks)
+ path(chrord)
+ path(chrname)
+
+ output:
+ tuple val(meta), path("*.png"), emit: png
+ path "versions.yml" , emit: versions
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ script:
+ def args = task.ext.args ?: ''
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ def syri_arg = ( syri instanceof List ) ? syri.collect { "--sr $it" }.join(' ') : "--sr $syri"
+ def genomes_txt = genomes.tokenize('\n').withIndex().collect { line, index -> index == 0 ? line : "${fastas[index-1]}\t${line}" }.join('\\n')
+ def bedpe_arg = bedpe ? "--bedpe $bedpe" : ''
+ def markers_arg = markers ? "--markers $markers" : ''
+ def tracks_arg = tracks ? "--tracks $tracks" : ''
+ def chrord_arg = chrord ? "--chrord $chrord" : ''
+ def chrname_arg = chrname ? "--chrname $chrname" : ''
+ """
+ echo -e "${genomes_txt}" \\
+ > ${prefix}.genomes.txt
+
+ plotsr \\
+ $syri_arg \\
+ --genomes ${prefix}.genomes.txt \\
+ $bedpe_arg \\
+ $markers_arg \\
+ $tracks_arg \\
+ $chrord_arg \\
+ $chrname_arg \\
+ $args \\
+ -o ${prefix}.png
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ plotsr: \$(plotsr --version)
+ END_VERSIONS
+ """
+
+ stub:
+ def args = task.ext.args ?: ''
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ def syri_arg = ( syri instanceof List ) ? syri.collect { "--sr $it" }.join(' ') : "--sr $syri"
+ def genomes_txt = genomes.tokenize('\n').withIndex().collect { line, index -> index == 0 ? line : "${fastas[index-1]}\t${line}" }.join('\\n')
+ def bedpe_arg = bedpe ? "--bedpe $bedpe" : ''
+ def markers_arg = markers ? "--markers $markers" : ''
+ def tracks_arg = tracks ? "--tracks $tracks" : ''
+ def chrord_arg = chrord ? "--chrord $chrord" : ''
+ def chrname_arg = chrname ? "--chrname $chrname" : ''
+ """
+ echo -e "${genomes_txt}" \\
+ > ${prefix}.genomes.txt
+
+ echo \\
+ "plotsr \\
+ $syri_arg \\
+ --genomes ${prefix}.genomes.txt \\
+ $bedpe_arg \\
+ $markers_arg \\
+ $tracks_arg \\
+ $chrord_arg \\
+ $chrname_arg \\
+ $args \\
+ -o ${prefix}.png"
+
+ touch ${prefix}.png
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ plotsr: \$(plotsr --version)
+ END_VERSIONS
+ """
+}
diff --git a/modules/pfr/plotsr/meta.yml b/modules/pfr/plotsr/meta.yml
new file mode 100644
index 00000000..c728b683
--- /dev/null
+++ b/modules/pfr/plotsr/meta.yml
@@ -0,0 +1,79 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
+name: "plotsr"
+description: Plotsr generates high-quality visualisation of synteny and structural rearrangements between multiple genomes.
+keywords:
+ - genomics
+ - synteny
+ - rearrangements
+ - chromosome
+tools:
+ - "plotsr":
+ description: "Visualiser for structural annotations between multiple genomes"
+ homepage: "https://github.com/schneebergerlab/plotsr"
+ documentation: "https://github.com/schneebergerlab/plotsr"
+ tool_dev_url: "https://github.com/schneebergerlab/plotsr"
+ doi: "10.1093/bioinformatics/btac196"
+ licence: ["MIT"]
+input:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. `[ id:'sample1' ]`
+ - syri:
+ type: file
+ description: Structural annotation mappings (syri.out) identified by SyRI
+ pattern: "*syri.out"
+ - fastas:
+ type: list
+ description: Fasta files in the sequence specified by the `genomes` file
+ pattern: "*.{fasta,fa,fsa,faa}"
+ - genomes:
+ type: string
+ description: |
+ String containing the genomes.txt file contents including the header, but excluding the path to genome fasta files.
+ The path to staged genome files is automatically added by the process script. For example, see the included nf-test.
+ pattern: "*.txt"
+ - bedpe:
+ type: file
+ description: Structural annotation mappings in BEDPE format
+ pattern: "*.bedpe"
+ - markers:
+ type: file
+ description: File containing path to markers
+ pattern: "*.bed"
+ - tracks:
+ type: file
+ description: File listing paths and details for all tracks to be plotted
+ pattern: "*.txt"
+ - chrord:
+ type: file
+ description: |
+ File containing reference (first genome) chromosome IDs in the order in which they are to be plotted.
+ File requires one chromosome ID per line. Not compatible with --chr
+ pattern: "*.txt"
+ - chrname:
+ type: file
+ description: |
+ File containing reference (first genome) chromosome names to be used in the plot.
+ File needs to be a TSV with the chromosome ID in first column and chromosome name in the second.
+ pattern: "*.txt"
+output:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. `[ id:'sample1' ]`
+ - versions:
+ type: file
+ description: File containing software versions
+ pattern: "versions.yml"
+ - png:
+ type: file
+ description: Synteny plot
+ pattern: "*.png"
+authors:
+ - "@GallVp"
+maintainers:
+ - "@GallVp"
diff --git a/modules/pfr/plotsr/tests/main.nf.test b/modules/pfr/plotsr/tests/main.nf.test
new file mode 100644
index 00000000..f02b8ebf
--- /dev/null
+++ b/modules/pfr/plotsr/tests/main.nf.test
@@ -0,0 +1,121 @@
+nextflow_process {
+
+ name "Test Process PLOTSR"
+ script "../main.nf"
+ config './nextflow.config'
+ process "PLOTSR"
+
+ tag "modules"
+ tag "modules_nfcore"
+ tag "plotsr"
+ tag "minimap2/align"
+ tag "syri"
+
+ setup {
+ run("MINIMAP2_ALIGN") {
+ script "modules/pfr/minimap2/align/main.nf"
+ process {
+ """
+ def genome2_cleanid = file('genome2_cleanid.fasta')
+ genome2_cleanid.text = file(params.test_data['homo_sapiens']['genome']['genome2_fasta'], checkIfExists: true).text.replace(/>chr22:16600000-16800000/, '>chr22')
+
+ input[0] = [
+ [id: 'test'],
+ file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
+ ]
+ input[1] = [
+ [id: 'reference'],
+ genome2_cleanid
+ ]
+ input[2] = true // bam_format
+ input[3] = false // cigar_paf_format
+ input[4] = false // cigar_bam
+ """
+ }
+ }
+
+ run("SYRI") {
+ script "modules/pfr/syri/main.nf"
+ process {
+ """
+ def genome2_cleanid = file('genome2_cleanid.fasta')
+ genome2_cleanid.text = file(params.test_data['homo_sapiens']['genome']['genome2_fasta'], checkIfExists: true).text.replace(/>chr22:16600000-16800000/, '>chr22')
+
+ input[0] = MINIMAP2_ALIGN.out.bam
+ input[1] = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
+ input[2] = genome2_cleanid
+ input[3] = 'B'
+ """
+ }
+ }
+ }
+
+ test("homo_sapiens - genome") {
+
+ when {
+ process {
+ """
+ def genome2_cleanid = file('genome2_cleanid.fasta')
+
+ input[0] = SYRI.out.syri
+ input[1] = [
+ file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true),
+ genome2_cleanid
+ ]
+
+ input[2] = "#file name tags\\nquery lw:1.5\\nreference lw:1.5\\n"
+ input[3] = []
+ input[4] = []
+ input[5] = []
+ input[6] = []
+ input[7] = []
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out.versions).match() },
+ { assert file(process.out.png[0][1]).name }
+ )
+ }
+
+ }
+
+ test("homo_sapiens - genome - stub") {
+
+ options '-stub'
+
+ when {
+ process {
+ """
+ def genome2_cleanid = file('genome2_cleanid.fasta')
+
+ input[0] = SYRI.out.syri
+ input[1] = [
+ file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true),
+ genome2_cleanid
+ ]
+
+ input[2] = "#file name tags\\nquery lw:1.5\\nreference lw:1.5\\n"
+ input[3] = []
+ input[4] = []
+ input[5] = []
+ input[6] = []
+ input[7] = []
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out.versions).match() },
+ { assert file(process.out.png[0][1]).name }
+ )
+ }
+
+ }
+
+}
diff --git a/modules/pfr/plotsr/tests/main.nf.test.snap b/modules/pfr/plotsr/tests/main.nf.test.snap
new file mode 100644
index 00000000..5488c5c8
--- /dev/null
+++ b/modules/pfr/plotsr/tests/main.nf.test.snap
@@ -0,0 +1,26 @@
+{
+ "homo_sapiens - genome": {
+ "content": [
+ [
+ "versions.yml:md5,73ec2873447f67f41edb762294f8f7c5"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-05-23T17:20:57.183473"
+ },
+ "homo_sapiens - genome - stub": {
+ "content": [
+ [
+ "versions.yml:md5,73ec2873447f67f41edb762294f8f7c5"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-05-23T20:28:10.557465"
+ }
+}
\ No newline at end of file
diff --git a/modules/pfr/plotsr/tests/nextflow.config b/modules/pfr/plotsr/tests/nextflow.config
new file mode 100644
index 00000000..959099ae
--- /dev/null
+++ b/modules/pfr/plotsr/tests/nextflow.config
@@ -0,0 +1,5 @@
+process {
+ withName: MINIMAP2_ALIGN {
+ ext.args = '-x asm5 --eqx'
+ }
+}
diff --git a/modules/pfr/plotsr/tests/tags.yml b/modules/pfr/plotsr/tests/tags.yml
new file mode 100644
index 00000000..0a8616fd
--- /dev/null
+++ b/modules/pfr/plotsr/tests/tags.yml
@@ -0,0 +1,2 @@
+plotsr:
+ - "modules/pfr/plotsr/**"
diff --git a/modules/pfr/samblaster/tests/main.nf.test b/modules/pfr/samblaster/tests/main.nf.test
index 01794307..06bb9162 100644
--- a/modules/pfr/samblaster/tests/main.nf.test
+++ b/modules/pfr/samblaster/tests/main.nf.test
@@ -54,4 +54,4 @@ nextflow_process {
}
-}
+}
\ No newline at end of file
diff --git a/modules/pfr/samtools/faidx/environment.yml b/modules/pfr/samtools/faidx/environment.yml
new file mode 100644
index 00000000..9c24eb0a
--- /dev/null
+++ b/modules/pfr/samtools/faidx/environment.yml
@@ -0,0 +1,10 @@
+name: samtools_faidx
+
+channels:
+ - conda-forge
+ - bioconda
+ - defaults
+
+dependencies:
+ - bioconda::htslib=1.19.1
+ - bioconda::samtools=1.19.2
diff --git a/modules/pfr/samtools/faidx/main.nf b/modules/pfr/samtools/faidx/main.nf
new file mode 100644
index 00000000..cfe7ad95
--- /dev/null
+++ b/modules/pfr/samtools/faidx/main.nf
@@ -0,0 +1,50 @@
+process SAMTOOLS_FAIDX {
+ tag "$fasta"
+ label 'process_single'
+
+ conda "${moduleDir}/environment.yml"
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'https://depot.galaxyproject.org/singularity/samtools:1.19.2--h50ea8bc_0' :
+ 'biocontainers/samtools:1.19.2--h50ea8bc_0' }"
+
+ input:
+ tuple val(meta), path(fasta)
+ tuple val(meta2), path(fai)
+
+ output:
+ tuple val(meta), path ("*.{fa,fasta}") , emit: fa , optional: true
+ tuple val(meta), path ("*.fai") , emit: fai, optional: true
+ tuple val(meta), path ("*.gzi") , emit: gzi, optional: true
+ path "versions.yml" , emit: versions
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ script:
+ def args = task.ext.args ?: ''
+ """
+ samtools \\
+ faidx \\
+ $fasta \\
+ $args
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
+ END_VERSIONS
+ """
+
+ stub:
+ def match = (task.ext.args =~ /-o(?:utput)?\s(.*)\s?/).findAll()
+ def fastacmd = match[0] ? "touch ${match[0][1]}" : ''
+ """
+ ${fastacmd}
+ touch ${fasta}.fai
+
+ cat <<-END_VERSIONS > versions.yml
+
+ "${task.process}":
+ samtools: \$(echo \$(samtools --version 2>&1) | sed 's/^.*samtools //; s/Using.*\$//')
+ END_VERSIONS
+ """
+}
diff --git a/modules/pfr/custom/checkgff3fastacorrespondence/meta.yml b/modules/pfr/samtools/faidx/meta.yml
similarity index 51%
rename from modules/pfr/custom/checkgff3fastacorrespondence/meta.yml
rename to modules/pfr/samtools/faidx/meta.yml
index 69bbd053..f3c25de2 100644
--- a/modules/pfr/custom/checkgff3fastacorrespondence/meta.yml
+++ b/modules/pfr/samtools/faidx/meta.yml
@@ -1,12 +1,9 @@
----
-# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
-name: "custom_checkgff3fastacorrespondence"
-description: "A custom bash script which checks the correspondence of a gff3 file with a fasta file"
+name: samtools_faidx
+description: Index FASTA file
keywords:
- - genome
- - gff3
- - annotation
- - validation
+ - index
+ - fasta
+ - faidx
tools:
- samtools:
description: |
@@ -17,40 +14,52 @@ tools:
documentation: http://www.htslib.org/doc/samtools.html
doi: 10.1093/bioinformatics/btp352
licence: ["MIT"]
-
input:
- meta:
type: map
description: |
- Groovy Map containing sample information
- e.g. `[ id:'test' ]`
- - gff3:
- type: file
- description: Input gff3 file
- pattern: "*.{gff,gff3}"
+ Groovy Map containing reference information
+ e.g. [ id:'test' ]
- fasta:
type: file
- description: Input fasta file
- pattern: "*.{fsa,fa,fasta}"
+ description: FASTA file
+ pattern: "*.{fa,fasta}"
+ - meta2:
+ type: map
+ description: |
+ Groovy Map containing reference information
+ e.g. [ id:'test' ]
+ - fai:
+ type: file
+ description: FASTA index file
+ pattern: "*.{fai}"
output:
- meta:
type: map
description: |
Groovy Map containing sample information
- e.g. `[ id:'test' ]`
- - success_log:
+ e.g. [ id:'test', single_end:false ]
+ - fa:
+ type: file
+ description: FASTA file
+ pattern: "*.{fa}"
+ - fai:
type: file
- description: Log file for successful validation
- pattern: "*.success.log"
- - error_log:
+ description: FASTA index file
+ pattern: "*.{fai}"
+ - gzi:
type: file
- description: Log file for failed validation
- pattern: "*.error.log"
+ description: Optional gzip index file for compressed inputs
+ pattern: "*.gzi"
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
authors:
- - "@GallVp"
+ - "@drpatelh"
+ - "@ewels"
+ - "@phue"
maintainers:
- - "@GallVp"
+ - "@drpatelh"
+ - "@ewels"
+ - "@phue"
diff --git a/modules/pfr/samtools/faidx/tests/main.nf.test b/modules/pfr/samtools/faidx/tests/main.nf.test
new file mode 100644
index 00000000..17244ef2
--- /dev/null
+++ b/modules/pfr/samtools/faidx/tests/main.nf.test
@@ -0,0 +1,122 @@
+nextflow_process {
+
+ name "Test Process SAMTOOLS_FAIDX"
+ script "../main.nf"
+ process "SAMTOOLS_FAIDX"
+
+ tag "modules"
+ tag "modules_nfcore"
+ tag "samtools"
+ tag "samtools/faidx"
+
+ test("test_samtools_faidx") {
+
+ when {
+ process {
+ """
+ input[0] = [ [ id:'test', single_end:false ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ]
+
+ input[1] = [[],[]]
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+ }
+
+ test("test_samtools_faidx_bgzip") {
+
+ when {
+ process {
+ """
+ input[0] = [ [ id:'test', single_end:false ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.gz', checkIfExists: true)]
+
+ input[1] = [[],[]]
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+ }
+
+ test("test_samtools_faidx_fasta") {
+
+ config "./nextflow.config"
+
+ when {
+ process {
+ """
+ input[0] = [ [ id:'test', single_end:false ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ]
+
+ input[1] = [ [ id:'test', single_end:false ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true) ]
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+ }
+
+ test("test_samtools_faidx_stub_fasta") {
+
+ config "./nextflow2.config"
+
+ when {
+ process {
+ """
+ input[0] = [ [ id:'test', single_end:false ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ]
+
+ input[1] = [ [ id:'test', single_end:false ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta.fai', checkIfExists: true) ]
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+ }
+
+ test("test_samtools_faidx_stub_fai") {
+
+ when {
+ process {
+ """
+ input[0] = [ [ id:'test', single_end:false ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true) ]
+
+ input[1] = [[],[]]
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+ }
+}
\ No newline at end of file
diff --git a/modules/pfr/samtools/faidx/tests/main.nf.test.snap b/modules/pfr/samtools/faidx/tests/main.nf.test.snap
new file mode 100644
index 00000000..3e651ef6
--- /dev/null
+++ b/modules/pfr/samtools/faidx/tests/main.nf.test.snap
@@ -0,0 +1,249 @@
+{
+ "test_samtools_faidx": {
+ "content": [
+ {
+ "0": [
+
+ ],
+ "1": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "genome.fasta.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5"
+ ]
+ ],
+ "2": [
+
+ ],
+ "3": [
+ "versions.yml:md5,4870fc0a88c616aa937f8325a2db0c3c"
+ ],
+ "fa": [
+
+ ],
+ "fai": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "genome.fasta.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5"
+ ]
+ ],
+ "gzi": [
+
+ ],
+ "versions": [
+ "versions.yml:md5,4870fc0a88c616aa937f8325a2db0c3c"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-03-18T16:22:39.412601"
+ },
+ "test_samtools_faidx_bgzip": {
+ "content": [
+ {
+ "0": [
+
+ ],
+ "1": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "genome.fasta.gz.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5"
+ ]
+ ],
+ "2": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "genome.fasta.gz.gzi:md5,7dea362b3fac8e00956a4952a3d4f474"
+ ]
+ ],
+ "3": [
+ "versions.yml:md5,4870fc0a88c616aa937f8325a2db0c3c"
+ ],
+ "fa": [
+
+ ],
+ "fai": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "genome.fasta.gz.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5"
+ ]
+ ],
+ "gzi": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "genome.fasta.gz.gzi:md5,7dea362b3fac8e00956a4952a3d4f474"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,4870fc0a88c616aa937f8325a2db0c3c"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-03-18T16:23:22.427966"
+ },
+ "test_samtools_faidx_fasta": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "extract.fa:md5,6a0774a0ad937ba0bfd2ac7457d90f36"
+ ]
+ ],
+ "1": [
+
+ ],
+ "2": [
+
+ ],
+ "3": [
+ "versions.yml:md5,4870fc0a88c616aa937f8325a2db0c3c"
+ ],
+ "fa": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "extract.fa:md5,6a0774a0ad937ba0bfd2ac7457d90f36"
+ ]
+ ],
+ "fai": [
+
+ ],
+ "gzi": [
+
+ ],
+ "versions": [
+ "versions.yml:md5,4870fc0a88c616aa937f8325a2db0c3c"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-03-18T16:24:04.107537"
+ },
+ "test_samtools_faidx_stub_fasta": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "extract.fa:md5,9da2a56e2853dc8c0b86a9e7229c9fe5"
+ ]
+ ],
+ "1": [
+
+ ],
+ "2": [
+
+ ],
+ "3": [
+ "versions.yml:md5,4870fc0a88c616aa937f8325a2db0c3c"
+ ],
+ "fa": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "extract.fa:md5,9da2a56e2853dc8c0b86a9e7229c9fe5"
+ ]
+ ],
+ "fai": [
+
+ ],
+ "gzi": [
+
+ ],
+ "versions": [
+ "versions.yml:md5,4870fc0a88c616aa937f8325a2db0c3c"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-03-18T16:24:45.868463"
+ },
+ "test_samtools_faidx_stub_fai": {
+ "content": [
+ {
+ "0": [
+
+ ],
+ "1": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "genome.fasta.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5"
+ ]
+ ],
+ "2": [
+
+ ],
+ "3": [
+ "versions.yml:md5,4870fc0a88c616aa937f8325a2db0c3c"
+ ],
+ "fa": [
+
+ ],
+ "fai": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "genome.fasta.fai:md5,9da2a56e2853dc8c0b86a9e7229c9fe5"
+ ]
+ ],
+ "gzi": [
+
+ ],
+ "versions": [
+ "versions.yml:md5,4870fc0a88c616aa937f8325a2db0c3c"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-03-18T16:25:27.550554"
+ }
+}
\ No newline at end of file
diff --git a/modules/pfr/samtools/faidx/tests/nextflow.config b/modules/pfr/samtools/faidx/tests/nextflow.config
new file mode 100644
index 00000000..f76a3ba0
--- /dev/null
+++ b/modules/pfr/samtools/faidx/tests/nextflow.config
@@ -0,0 +1,7 @@
+process {
+
+ withName: SAMTOOLS_FAIDX {
+ ext.args = 'MT192765.1 -o extract.fa'
+ }
+
+}
diff --git a/modules/pfr/samtools/faidx/tests/nextflow2.config b/modules/pfr/samtools/faidx/tests/nextflow2.config
new file mode 100644
index 00000000..33ebbd5d
--- /dev/null
+++ b/modules/pfr/samtools/faidx/tests/nextflow2.config
@@ -0,0 +1,6 @@
+process {
+
+ withName: SAMTOOLS_FAIDX {
+ ext.args = '-o extract.fa'
+ }
+}
diff --git a/modules/pfr/samtools/faidx/tests/tags.yml b/modules/pfr/samtools/faidx/tests/tags.yml
new file mode 100644
index 00000000..e4a83948
--- /dev/null
+++ b/modules/pfr/samtools/faidx/tests/tags.yml
@@ -0,0 +1,2 @@
+samtools/faidx:
+ - modules/nf-core/samtools/faidx/**
diff --git a/modules/pfr/syri/environment.yml b/modules/pfr/syri/environment.yml
new file mode 100644
index 00000000..45eafb1f
--- /dev/null
+++ b/modules/pfr/syri/environment.yml
@@ -0,0 +1,9 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
+name: "syri"
+channels:
+ - conda-forge
+ - bioconda
+ - defaults
+dependencies:
+ - "bioconda::syri=1.6.3"
diff --git a/modules/pfr/syri/main.nf b/modules/pfr/syri/main.nf
new file mode 100644
index 00000000..0a10ae19
--- /dev/null
+++ b/modules/pfr/syri/main.nf
@@ -0,0 +1,60 @@
+process SYRI {
+ tag "$meta.id"
+ label 'process_single'
+
+ conda "${moduleDir}/environment.yml"
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'https://depot.galaxyproject.org/singularity/syri:1.6.3--py38hdbdd923_2':
+ 'biocontainers/syri:1.6.3--py38hdbdd923_2' }"
+
+ input:
+ tuple val(meta), path(infile)
+ path(query_fasta)
+ path(reference_fasta)
+ val(file_type)
+
+ output:
+ tuple val(meta), path("*syri.out") , emit: syri , optional: true
+ tuple val(meta), path("*.error.log") , emit: error , optional: true
+ path "versions.yml" , emit: versions
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ script:
+ def args = task.ext.args ?: ''
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ if ( ! ( file_type in [ 'T', 'S', 'B', 'P' ] ) ) { error "File type should be one of [ 'T', 'S', 'B', 'P' ]" }
+ """
+ syri \\
+ -c $infile \\
+ -q $query_fasta \\
+ -r $reference_fasta \\
+ -F $file_type \\
+ $args \\
+ --prefix $prefix \\
+ 2> >(tee "${prefix}.error.log" >&2) \\
+ || echo "Errors from syri printed to ${prefix}.error.log"
+
+ [ -f "${prefix}syri.out" ] \\
+ && rm "${prefix}.error.log" \\
+ || echo 'Syri failed and no syri.out file was created'
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ syri: \$(syri --version)
+ END_VERSIONS
+ """
+
+ stub:
+ def args = task.ext.args ?: ''
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ """
+ touch ${prefix}syri.out
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ syri: \$(syri --version)
+ END_VERSIONS
+ """
+}
diff --git a/modules/pfr/syri/meta.yml b/modules/pfr/syri/meta.yml
new file mode 100644
index 00000000..d7801532
--- /dev/null
+++ b/modules/pfr/syri/meta.yml
@@ -0,0 +1,64 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
+name: "syri"
+description: Syri compares alignments between two chromosome-level assemblies and identifies synteny and structural rearrangements.
+keywords:
+ - genomics
+ - synteny
+ - rearrangements
+ - chromosome
+tools:
+ - "syri":
+ description: "Synteny and rearrangement identifier between whole-genome assemblies"
+ homepage: "https://github.com/schneebergerlab/syri"
+ documentation: "https://github.com/schneebergerlab/syri"
+ tool_dev_url: "https://github.com/schneebergerlab/syri"
+ doi: "10.1186/s13059-019-1911-0"
+ licence: ["MIT License"]
+input:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. `[ id:'sample1' ]`
+ - infile:
+ type: file
+ description: File containing alignment coordinates
+ pattern: "*.{table, sam, bam, paf}"
+ - query_fasta:
+ type: file
+ description: Query genome for the alignments
+ pattern: "*.fasta"
+ - reference_fasta:
+ type: file
+ description: Reference genome for the alignments
+ pattern: "*.fasta"
+ - file_type:
+ type: string
+ description: |
+ Input file type which is one of T: Table, S: SAM, B: BAM, P: PAF
+
+output:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. `[ id:'sample1' ]`
+ - versions:
+ type: file
+ description: File containing software versions
+ pattern: "versions.yml"
+ - syri:
+ type: file
+ description: Syri output file
+ pattern: "*syri.out"
+ - error:
+ type: file
+ description: |
+ Error log if syri fails. This error log enables the pipeline to detect if syri has failed due to one of its
+ known limitations and pass the information to the user in a user-friendly manner such as a HTML report
+ pattern: "*.error.log"
+authors:
+ - "@GallVp"
+maintainers:
+ - "@GallVp"
diff --git a/modules/pfr/syri/tests/main.nf.test b/modules/pfr/syri/tests/main.nf.test
new file mode 100644
index 00000000..baa8555c
--- /dev/null
+++ b/modules/pfr/syri/tests/main.nf.test
@@ -0,0 +1,80 @@
+nextflow_process {
+
+ name "Test Process SYRI"
+ script "../main.nf"
+ config './nextflow.config'
+ process "SYRI"
+
+ tag "modules"
+ tag "modules_nfcore"
+ tag "syri"
+ tag "minimap2/align"
+
+ setup {
+ run("MINIMAP2_ALIGN") {
+ script "modules/pfr/minimap2/align/main.nf"
+ process {
+ """
+ input[0] = [
+ [id: 'test'],
+ file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
+ ]
+ input[1] = [
+ [id: 'reference'],
+ file(params.test_data['homo_sapiens']['genome']['genome2_fasta'], checkIfExists: true)
+ ]
+ input[2] = true // bam_format
+ input[3] = false // cigar_paf_format
+ input[4] = false // cigar_bam
+ """
+ }
+ }
+ }
+
+ test("homo_sapiens - genome") {
+
+ when {
+ process {
+ """
+ input[0] = MINIMAP2_ALIGN.out.bam
+ input[1] = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
+ input[2] = file(params.test_data['homo_sapiens']['genome']['genome2_fasta'], checkIfExists: true)
+ input[3] = 'B'
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+
+ }
+
+ test("homo_sapiens - genome - stub") {
+
+ options "-stub"
+
+ when {
+ process {
+ """
+ input[0] = MINIMAP2_ALIGN.out.bam
+ input[1] = file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)
+ input[2] = file(params.test_data['homo_sapiens']['genome']['genome2_fasta'], checkIfExists: true)
+ input[3] = 'B'
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+
+ }
+
+}
diff --git a/modules/pfr/custom/checkgff3fastacorrespondence/tests/main.nf.test.snap b/modules/pfr/syri/tests/main.nf.test.snap
similarity index 53%
rename from modules/pfr/custom/checkgff3fastacorrespondence/tests/main.nf.test.snap
rename to modules/pfr/syri/tests/main.nf.test.snap
index 261e0dc3..8d09bf33 100644
--- a/modules/pfr/custom/checkgff3fastacorrespondence/tests/main.nf.test.snap
+++ b/modules/pfr/syri/tests/main.nf.test.snap
@@ -1,40 +1,44 @@
{
- "sarscov2-gff3-homo_sapiens-fasta-error": {
+ "homo_sapiens - genome": {
"content": [
{
"0": [
-
- ],
- "1": [
[
{
"id": "test"
},
- "test.error.log:md5,8a119170625dc95fb2faa6843fad2c3f"
+ "testsyri.out:md5,fd2a1b676912818960056aa281177bb4"
]
+ ],
+ "1": [
+
],
"2": [
- "versions.yml:md5,c8e0bb60f7422aa6c15db35013620802"
+ "versions.yml:md5,05cf55e24c29c2661a2b9301d3c7c2a0"
+ ],
+ "error": [
+
],
- "error_log": [
+ "syri": [
[
{
"id": "test"
},
- "test.error.log:md5,8a119170625dc95fb2faa6843fad2c3f"
+ "testsyri.out:md5,fd2a1b676912818960056aa281177bb4"
]
- ],
- "success_log": [
-
],
"versions": [
- "versions.yml:md5,c8e0bb60f7422aa6c15db35013620802"
+ "versions.yml:md5,05cf55e24c29c2661a2b9301d3c7c2a0"
]
}
],
- "timestamp": "2023-11-29T12:24:08.677505"
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.1"
+ },
+ "timestamp": "2024-05-25T21:30:50.653501"
},
- "sarscov2-fasta-gff3-success": {
+ "homo_sapiens - genome - stub": {
"content": [
{
"0": [
@@ -42,31 +46,35 @@
{
"id": "test"
},
- "test.success.log:md5,5cad27984e6af4889f7dcf12264fe47b"
+ "testsyri.out:md5,d41d8cd98f00b204e9800998ecf8427e"
]
],
"1": [
],
"2": [
- "versions.yml:md5,c8e0bb60f7422aa6c15db35013620802"
+ "versions.yml:md5,05cf55e24c29c2661a2b9301d3c7c2a0"
],
- "error_log": [
+ "error": [
],
- "success_log": [
+ "syri": [
[
{
"id": "test"
},
- "test.success.log:md5,5cad27984e6af4889f7dcf12264fe47b"
+ "testsyri.out:md5,d41d8cd98f00b204e9800998ecf8427e"
]
],
"versions": [
- "versions.yml:md5,c8e0bb60f7422aa6c15db35013620802"
+ "versions.yml:md5,05cf55e24c29c2661a2b9301d3c7c2a0"
]
}
],
- "timestamp": "2023-11-29T12:24:04.530428"
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.1"
+ },
+ "timestamp": "2024-05-25T21:22:31.028411"
}
}
\ No newline at end of file
diff --git a/modules/pfr/syri/tests/nextflow.config b/modules/pfr/syri/tests/nextflow.config
new file mode 100644
index 00000000..959099ae
--- /dev/null
+++ b/modules/pfr/syri/tests/nextflow.config
@@ -0,0 +1,5 @@
+process {
+ withName: MINIMAP2_ALIGN {
+ ext.args = '-x asm5 --eqx'
+ }
+}
diff --git a/modules/pfr/syri/tests/tags.yml b/modules/pfr/syri/tests/tags.yml
new file mode 100644
index 00000000..3afda524
--- /dev/null
+++ b/modules/pfr/syri/tests/tags.yml
@@ -0,0 +1,2 @@
+syri:
+ - "modules/pfr/syri/**"
diff --git a/nextflow.config b/nextflow.config
index f0ee1b5b..1f445720 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -12,6 +12,9 @@ params {
// Input options
input = null
+ // Validation options
+ check_sequence_duplicates = true
+
// Assemblathon stats options
assemblathon_stats_n_limit = 100
@@ -23,6 +26,8 @@ params {
ncbi_fcs_gx_tax_id = null
ncbi_fcs_gx_db_path = null
+ contamination_stops_pipeline = true
+
// BUSCO options
busco_skip = true
busco_mode = null
@@ -50,13 +55,22 @@ params {
// Synteny options
synteny_skip = true
+ synteny_mummer_skip = true
+ synteny_plotsr_skip = true
+ synteny_xref_assemblies = null
synteny_between_input_assemblies = true
- synteny_many_to_many_align = false
- synteny_max_gap = 1000000
- synteny_min_bundle_size = 1000
- synteny_plot_1_vs_all = true
+ synteny_mummer_plot_type = 'both'
+ synteny_mummer_m2m_align = false
+ synteny_mummer_max_gap = 1000000
+ synteny_mummer_min_bundle_size = 1000
+ synteny_plot_1_vs_all = false
synteny_color_by_contig = true
- synteny_xref_assemblies = null
+ synteny_plotsr_seq_label = 'Chr'
+ synteny_plotsr_assembly_order = null
+
+ // Merqury options
+ merqury_skip = true
+ merqury_kmer_length = 21
// Output options
outdir = './results'
@@ -72,6 +86,7 @@ params {
email_on_fail = null
plaintext_email = false
monochrome_logs = false
+ monochromeLogs = false
hook_url = null
help = false
version = false
@@ -104,38 +119,37 @@ try {
}
// Load plant-food-research-open/assemblyqc custom profiles from different institutions.
-// Warning: Uncomment only if a pipeline-specific institutional config already exists on nf-core/configs!
// try {
-// includeConfig "${params.custom_config_base}/pipeline/assemblyqc.config"
+// includeConfig "${params.custom_config_base}/pipeline/assemblyqc.config"
// } catch (Exception e) {
-// System.err.println("WARNING: Could not load nf-core/config/assemblyqc profiles: ${params.custom_config_base}/pipeline/assemblyqc.config")
+// System.err.println("WARNING: Could not load nf-core/config/assemblyqc profiles: ${params.custom_config_base}/pipeline/assemblyqc.config")
// }
profiles {
debug {
- dumpHashes = true
- process.beforeScript = 'echo $HOSTNAME'
- cleanup = false
+ dumpHashes = true
+ process.beforeScript = 'echo $HOSTNAME'
+ cleanup = false
nextflow.enable.configProcessNamesValidation = true
}
conda {
- conda.enabled = true
- docker.enabled = false
- singularity.enabled = false
- podman.enabled = false
- shifter.enabled = false
- charliecloud.enabled = false
- channels = ['conda-forge', 'bioconda', 'defaults']
- apptainer.enabled = false
+ conda.enabled = true
+ docker.enabled = false
+ singularity.enabled = false
+ podman.enabled = false
+ shifter.enabled = false
+ charliecloud.enabled = false
+ conda.channels = ['conda-forge', 'bioconda', 'defaults']
+ apptainer.enabled = false
}
mamba {
- conda.enabled = true
- conda.useMamba = true
- docker.enabled = false
- singularity.enabled = false
- podman.enabled = false
- shifter.enabled = false
- charliecloud.enabled = false
- apptainer.enabled = false
+ conda.enabled = true
+ conda.useMamba = true
+ docker.enabled = false
+ singularity.enabled = false
+ podman.enabled = false
+ shifter.enabled = false
+ charliecloud.enabled = false
+ apptainer.enabled = false
}
docker {
docker.enabled = true
@@ -152,56 +166,63 @@ profiles {
docker.runOptions = '--platform=linux/amd64'
}
singularity {
- singularity.enabled = true
- singularity.autoMounts = true
- conda.enabled = false
- docker.enabled = false
- podman.enabled = false
- shifter.enabled = false
- charliecloud.enabled = false
- apptainer.enabled = false
+ singularity.enabled = true
+ singularity.autoMounts = true
+ conda.enabled = false
+ docker.enabled = false
+ podman.enabled = false
+ shifter.enabled = false
+ charliecloud.enabled = false
+ apptainer.enabled = false
}
podman {
- podman.enabled = true
- conda.enabled = false
- docker.enabled = false
- singularity.enabled = false
- shifter.enabled = false
- charliecloud.enabled = false
- apptainer.enabled = false
+ podman.enabled = true
+ conda.enabled = false
+ docker.enabled = false
+ singularity.enabled = false
+ shifter.enabled = false
+ charliecloud.enabled = false
+ apptainer.enabled = false
}
shifter {
- shifter.enabled = true
- conda.enabled = false
- docker.enabled = false
- singularity.enabled = false
- podman.enabled = false
- charliecloud.enabled = false
- apptainer.enabled = false
+ shifter.enabled = true
+ conda.enabled = false
+ docker.enabled = false
+ singularity.enabled = false
+ podman.enabled = false
+ charliecloud.enabled = false
+ apptainer.enabled = false
}
charliecloud {
- charliecloud.enabled = true
- conda.enabled = false
- docker.enabled = false
- singularity.enabled = false
- podman.enabled = false
- shifter.enabled = false
- apptainer.enabled = false
+ charliecloud.enabled = true
+ conda.enabled = false
+ docker.enabled = false
+ singularity.enabled = false
+ podman.enabled = false
+ shifter.enabled = false
+ apptainer.enabled = false
}
apptainer {
- apptainer.enabled = true
- apptainer.autoMounts = true
- conda.enabled = false
- docker.enabled = false
- singularity.enabled = false
- podman.enabled = false
- shifter.enabled = false
- charliecloud.enabled = false
+ apptainer.enabled = true
+ apptainer.autoMounts = true
+ conda.enabled = false
+ docker.enabled = false
+ singularity.enabled = false
+ podman.enabled = false
+ shifter.enabled = false
+ charliecloud.enabled = false
+ }
+ wave {
+ apptainer.ociAutoPull = true
+ singularity.ociAutoPull = true
+ wave.enabled = true
+ wave.freeze = true
+ wave.strategy = 'conda,container'
}
gitpod {
- executor.name = 'local'
- executor.cpus = 4
- executor.memory = 8.GB
+ executor.name = 'local'
+ executor.cpus = 4
+ executor.memory = 8.GB
}
test { includeConfig 'conf/test.config' }
test_full { includeConfig 'conf/test_full.config' }
@@ -262,8 +283,8 @@ manifest {
description = """A NextFlow pipeline which evaluates assembly quality with multiple QC tools and presents the results in a unified html report."""
mainScript = 'main.nf'
nextflowVersion = '!>=23.04.0'
- version = '1.4'
- doi = '10.5281/zenodo.10647870'
+ version = '2.0.0'
+ doi = 'https://doi.org/10.5281/zenodo.10647870'
}
// Load modules.config for DSL2 module specific options
diff --git a/nextflow_schema.json b/nextflow_schema.json
index f16a18ba..1fee6de3 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -14,9 +14,10 @@
"input": {
"type": "string",
"format": "file-path",
- "mimetype": "csv",
+ "exists": true,
"schema": "assets/schema_input.json",
- "help_text": "FASTA and other associated files for input assemblies provided as a formatted CSV file",
+ "mimetype": "text/csv",
+ "pattern": "^\\S+\\.csv$",
"description": "Input assembly sheet in CSV format",
"fa_icon": "fas fa-file-csv"
},
@@ -31,12 +32,25 @@
"type": "string",
"description": "Email address for completion summary.",
"fa_icon": "fas fa-envelope",
- "help_text": "Set this parameter to your e-mail address to get a summary e-mail with details of the run sent to you when the workflow exits. If set in your user config file (`~/.nextflow/config`) then you don't need to specify this on the command line for every run.",
"pattern": "^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$"
}
},
"required": ["input", "outdir"]
},
+ "validation_options": {
+ "title": "Validation options",
+ "type": "object",
+ "description": "",
+ "default": "",
+ "properties": {
+ "check_sequence_duplicates": {
+ "type": "boolean",
+ "default": true,
+ "fa_icon": "fas fa-question-circle",
+ "description": "Check for duplicate sequences in fasta validation"
+ }
+ }
+ },
"general_stats_options": {
"title": "General stats options",
"type": "object",
@@ -46,8 +60,8 @@
"assemblathon_stats_n_limit": {
"type": "integer",
"default": 100,
- "help_text": "This number is used to split the scaffolds into contigs to compute contig-related stats such as the number of contigs, N50, etc. NCBI recommendation is 100.",
- "description": "The number of 'N's for the unknown gap size"
+ "description": "The number of 'N's for the unknown gap size. NCBI recommendation is 100",
+ "fa_icon": "fas fa-ruler-horizontal"
}
}
},
@@ -60,29 +74,37 @@
"ncbi_fcs_adaptor_skip": {
"type": "boolean",
"description": "Skip NCBI FCS Adaptor checking",
- "default": true
+ "default": true,
+ "fa_icon": "fas fa-forward"
},
"ncbi_fcs_adaptor_empire": {
"type": "string",
"enum": ["euk", "prok"],
- "description": "Empire for NCBI FCS Adaptor checking",
- "help_text": "'euk' for eukaryotes, or 'prok' for prokaryotes"
+ "description": "Empire for NCBI FCS Adaptor checking: 'euk' for eukaryotes, or 'prok' for prokaryotes",
+ "fa_icon": "fas fa-project-diagram"
},
"ncbi_fcs_gx_skip": {
"type": "boolean",
"description": "Skip NCBI FCS external organism contamination checking",
- "default": true
+ "default": true,
+ "fa_icon": "fas fa-forward"
},
"ncbi_fcs_gx_tax_id": {
"type": "number",
- "help_text": "Get correct tax ID from https://www.ncbi.nlm.nih.gov/taxonomy",
- "description": "Tax ID for NCBI FCS GX"
+ "description": "Tax ID for NCBI FCS GX. See: https://www.ncbi.nlm.nih.gov/taxonomy",
+ "fa_icon": "fab fa-orcid"
},
"ncbi_fcs_gx_db_path": {
"type": "string",
"format": "directory-path",
- "help_text": "NCBI FCS GX DB path\n\nDue to enormity of the DB size, the pipeline does NOT download the data. It must be setup by the user manually before running the pipeline. See instructions for DB setup: https://github.com/ncbi/fcs/wiki/FCS-GX",
- "description": "Path to NCBI FCS GX database"
+ "description": "Path to NCBI FCS GX database. See: https://github.com/ncbi/fcs/wiki/FCS-GX",
+ "fa_icon": "fas fa-folder-open"
+ },
+ "contamination_stops_pipeline": {
+ "type": "boolean",
+ "default": true,
+ "fa_icon": "fas fa-hand-paper",
+ "description": "Skip remaining QC steps for an assembly which has adaptor or GX contamination"
}
}
},
@@ -95,25 +117,26 @@
"busco_skip": {
"type": "boolean",
"description": "Skip BUSCO",
- "default": true
+ "default": true,
+ "fa_icon": "fas fa-forward"
},
"busco_mode": {
"type": "string",
- "enum": ["geno", "tran", "prot", "genome", "transcriptome", "proteins"],
- "help_text": "'geno' or 'genome' for genome assemblies (DNA), 'tran' or 'transcriptome' for transcriptome assemblies (DNA), 'prot' or 'proteins' for annotated gene sets (protein)",
- "description": "BUSCO mode"
+ "enum": ["genome", "transcriptome", "proteins"],
+ "description": "BUSCO mode: 'genome', 'transcriptome', 'proteins'",
+ "fa_icon": "fas fa-cog"
},
"busco_lineage_datasets": {
"type": "string",
- "help_text": "Each input assembly is assessed against each lineage. It should be provided as a space-separated list of lineages: 'fungi_odb10 microsporidia_odb10' ",
"pattern": "^(\\w+_odb10\\s)*\\w+_odb10$",
- "description": "BUSCO lineages"
+ "description": "BUSCO lineages. It should be provided as a space-separated list of lineages: 'fungi_odb10 microsporidia_odb10'",
+ "fa_icon": "fas fa-clipboard-list"
},
"busco_download_path": {
"type": "string",
- "help_text": "BUSCO DB download path\n\nThe pipeline automatically downloads the required DB if needed",
"description": "Download path for BUSCO",
- "format": "directory-path"
+ "format": "directory-path",
+ "fa_icon": "fas fa-folder-open"
}
}
},
@@ -126,22 +149,25 @@
"tidk_skip": {
"type": "boolean",
"description": "Skip telomere identification",
- "default": true
+ "default": true,
+ "fa_icon": "fas fa-forward"
},
"tidk_repeat_seq": {
"type": "string",
- "description": "Telomere repeat sequence",
+ "description": "Telomere repeat sequence. Typical values for plant: TTTAGGG, fungus, vertebrates: TTAGGG and Insect: TTAGG",
"pattern": "^[ACGT]+$",
- "help_text": "Plant: TTTAGGG, Fungus, Vertebrates: TTAGGG, Insect: TTAGG"
+ "fa_icon": "fas fa-dna"
},
"tidk_filter_by_size": {
"type": "boolean",
- "description": "Filter size in base-pairs"
+ "description": "Filter assembly sequences smaller than the specified length",
+ "fa_icon": "fas fa-question-circle"
},
"tidk_filter_size_bp": {
"type": "integer",
"default": 1000000,
- "description": "Filter size in base-pairs"
+ "description": "Filter size in base-pairs",
+ "fa_icon": "fas fa-ruler-horizontal"
}
}
},
@@ -154,7 +180,8 @@
"lai_skip": {
"type": "boolean",
"default": true,
- "description": "Skip LAI estimation"
+ "description": "Skip LAI estimation",
+ "fa_icon": "fas fa-forward"
}
}
},
@@ -167,12 +194,14 @@
"kraken2_skip": {
"type": "boolean",
"default": true,
- "description": "Skip Kraken2"
+ "description": "Skip Kraken2",
+ "fa_icon": "fas fa-forward"
},
"kraken2_db_path": {
"type": "string",
"description": "Kraken2 database path",
- "format": "path"
+ "format": "path",
+ "fa_icon": "fas fa-folder-open"
}
}
},
@@ -184,22 +213,24 @@
"properties": {
"hic": {
"type": "string",
- "description": "HiC reads",
- "help_text": "Path to reads provided as a SRA ID or as a path to paired reads with pattern '*{1,2}.(fastq|fq).gz'",
+ "description": "HiC reads path provided as a SRA ID or as paired reads with pattern '*{1,2}.fastq.gz'",
"pattern": "^SR\\w+$|^\\S+\\{1,2\\}\\.f(ast)?q\\.gz$"
},
"hic_skip_fastp": {
"type": "boolean",
- "description": "Skip HiC read trimming"
+ "description": "Skip HiC read trimming",
+ "fa_icon": "fas fa-forward"
},
"hic_skip_fastqc": {
"type": "boolean",
- "description": "Skip HiC read QC"
+ "description": "Skip HiC read QC",
+ "fa_icon": "fas fa-forward"
},
"hic_fastp_ext_args": {
"type": "string",
"default": "--qualified_quality_phred 20 --length_required 50",
- "description": "Additional parameters for fastp trimming"
+ "description": "Additional parameters for fastp trimming",
+ "fa_icon": "fas fa-terminal"
}
}
},
@@ -212,44 +243,102 @@
"synteny_skip": {
"type": "boolean",
"default": true,
- "description": "Skip synteny analysis"
+ "description": "Skip synteny analysis",
+ "fa_icon": "fas fa-forward"
+ },
+ "synteny_mummer_skip": {
+ "type": "boolean",
+ "default": true,
+ "description": "Skip Mummer-based synteny analysis",
+ "fa_icon": "fas fa-forward"
+ },
+ "synteny_plotsr_skip": {
+ "type": "boolean",
+ "default": true,
+ "description": "Skip plotsr-based synteny analysis",
+ "fa_icon": "fas fa-forward"
+ },
+ "synteny_xref_assemblies": {
+ "type": "string",
+ "description": "Reference assemblies for synteny analysis",
+ "format": "file-path",
+ "mimetype": "csv",
+ "schema": "assets/schema_xref_assemblies.json",
+ "fa_icon": "fas fa-file-csv"
},
"synteny_between_input_assemblies": {
"type": "boolean",
"description": "Create syntenic plots between each pair of input assemblies",
- "default": true
+ "default": true,
+ "fa_icon": "fas fa-question-circle"
},
- "synteny_many_to_many_align": {
+ "synteny_mummer_plot_type": {
+ "type": "string",
+ "default": "both",
+ "description": "Synteny plot type from Mummer alignments: 'dotplot', 'circos', or 'both'",
+ "enum": ["both", "dotplot", "circos"],
+ "fa_icon": "fas fa-chart-line"
+ },
+ "synteny_mummer_m2m_align": {
"type": "boolean",
- "description": "Include alignment blocks with many-to-many mappings (dnadiff .mcoords file)"
+ "description": "Include Mummer alignment blocks with many-to-many mappings",
+ "fa_icon": "fas fa-question-circle"
},
- "synteny_max_gap": {
+ "synteny_mummer_max_gap": {
"type": "integer",
"default": 1000000,
- "description": "Alignments within this distance are bundled together"
+ "description": "Mummer alignments within this distance are bundled together",
+ "fa_icon": "fas fa-ruler-horizontal"
},
- "synteny_min_bundle_size": {
+ "synteny_mummer_min_bundle_size": {
"type": "integer",
"default": 1000,
- "description": "After bundling, any bundle smaller than this size is filtered out"
+ "description": "After bundling, any Mummer alignment bundle smaller than this size is filtered out",
+ "fa_icon": "fas fa-ruler-horizontal"
},
"synteny_plot_1_vs_all": {
"type": "boolean",
- "default": true,
- "description": "Create a separate synteny plot for each contig of the target assembly versus all contigs of the reference assembly"
+ "description": "Create a separate synteny plot for each contig of the target assembly versus all contigs of the reference assembly. This only applies to Mummer plots",
+ "fa_icon": "fas fa-question-circle"
},
"synteny_color_by_contig": {
"type": "boolean",
"default": true,
- "description": "Synteny plot is colored by contig"
+ "description": "Mummer synteny plots are colored by contig. Otherwise, they are colored by bundle size",
+ "fa_icon": "fas fa-question-circle"
},
- "synteny_xref_assemblies": {
+ "synteny_plotsr_seq_label": {
"type": "string",
- "description": "Reference assemblies for synteny analysis",
- "help_text": "FASTA and synteny label tsv files should be provided in a formatted CSV file ",
- "format": "file-path",
- "mimetype": "csv",
- "schema": "assets/schema_xref_assemblies.json"
+ "default": "Chr",
+ "description": "Sequence label prefix for plotsr synteny",
+ "fa_icon": "fas fa-tag"
+ },
+ "synteny_plotsr_assembly_order": {
+ "type": "string",
+ "fa_icon": "fas fa-sort-alpha-down",
+ "pattern": "^(\\w+\\s)*\\w+$",
+ "description": "The order in which the assemblies should be compared, provided as space separated string of assembly tags. If absent, assemblies are ordered by their tags alphabetically."
+ }
+ }
+ },
+ "merqury_options": {
+ "title": "Merqury options",
+ "type": "object",
+ "description": "",
+ "default": "",
+ "properties": {
+ "merqury_skip": {
+ "type": "boolean",
+ "default": true,
+ "description": "Skip merqury analysis",
+ "fa_icon": "fas fa-forward"
+ },
+ "merqury_kmer_length": {
+ "type": "integer",
+ "default": 21,
+ "description": "kmer length for merqury analysis",
+ "minimum": 3,
+ "fa_icon": "fas fa-ruler-horizontal"
}
}
},
@@ -265,26 +354,23 @@
"description": "Maximum number of CPUs that can be requested for any single job.",
"default": 16,
"fa_icon": "fas fa-microchip",
- "hidden": true,
- "help_text": "Use to set an upper-limit for the CPU requirement for each process. Should be an integer e.g. `--max_cpus 1`"
+ "hidden": true
},
"max_memory": {
"type": "string",
- "description": "Maximum amount of memory that can be requested for any single job.",
+ "description": "Maximum amount of memory that can be requested for any single job. Example: '8.GB'",
"default": "512.GB",
"fa_icon": "fas fa-memory",
"pattern": "^\\d+(\\.\\d+)?\\.?\\s*(K|M|G|T)?B$",
- "hidden": true,
- "help_text": "Use to set an upper-limit for the memory requirement for each process. Should be a string in the format integer-unit e.g. `--max_memory '8.GB'`"
+ "hidden": true
},
"max_time": {
"type": "string",
- "description": "Maximum amount of time that can be requested for any single job.",
+ "description": "Maximum amount of time that can be requested for any single job. Example: '1.day'",
"default": "7.day",
"fa_icon": "far fa-clock",
"pattern": "^(\\d+\\.?\\s*(s|m|h|d|day)\\s*)+$",
- "hidden": true,
- "help_text": "Use to set an upper-limit for the time requirement for each process. Should be a string in the format integer-unit e.g. `--max_time '2.h'`"
+ "hidden": true
}
}
},
@@ -307,7 +393,6 @@
"description": "Base directory for Institutional configs.",
"default": "https://raw.githubusercontent.com/nf-core/configs/master",
"hidden": true,
- "help_text": "If you're running offline, Nextflow will not be able to fetch the institutional config files from the internet. If you don't need them, then this is not a problem. If you do need them, you should download the files from the repo and tell Nextflow where to find them with this parameter.",
"fa_icon": "fas fa-users-cog"
},
"config_profile_name": {
@@ -359,7 +444,6 @@
"type": "string",
"default": "copy",
"description": "Method used to save pipeline results to output directory.",
- "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.",
"fa_icon": "fas fa-copy",
"enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"],
"hidden": true
@@ -369,7 +453,6 @@
"description": "Email address for completion summary, only when pipeline fails.",
"fa_icon": "fas fa-exclamation-triangle",
"pattern": "^([a-zA-Z0-9_\\-\\.]+)@([a-zA-Z0-9_\\-\\.]+)\\.([a-zA-Z]{2,5})$",
- "help_text": "An email address to send a summary email to when the pipeline is completed - ONLY sent if the pipeline does not exit successfully.",
"hidden": true
},
"plaintext_email": {
@@ -384,11 +467,16 @@
"fa_icon": "fas fa-palette",
"hidden": true
},
+ "monochromeLogs": {
+ "type": "boolean",
+ "fa_icon": "fas fa-palette",
+ "description": "Do not use coloured log outputs.",
+ "hidden": true
+ },
"hook_url": {
"type": "string",
"description": "Incoming hook URL for messaging service",
"fa_icon": "fas fa-people-group",
- "help_text": "Incoming hook URL for messaging service. Currently, MS Teams and Slack are supported.",
"hidden": true
},
"validate_params": {
@@ -402,22 +490,19 @@
"type": "boolean",
"fa_icon": "far fa-eye-slash",
"description": "Show all params when using `--help`",
- "hidden": true,
- "help_text": "By default, parameters set as _hidden_ in the schema are not shown on the command line when a user runs with `--help`. Specifying this option will tell the pipeline to show all parameters."
+ "hidden": true
},
"validationFailUnrecognisedParams": {
"type": "boolean",
"fa_icon": "far fa-check-circle",
"description": "Validation of parameters fails when an unrecognised parameter is found.",
- "hidden": true,
- "help_text": "By default, when an unrecognised parameter is found, it returns a warinig."
+ "hidden": true
},
"validationLenientMode": {
"type": "boolean",
"fa_icon": "far fa-check-circle",
"description": "Validation of parameters in lenient more.",
- "hidden": true,
- "help_text": "Allows string values that are parseable as numbers or booleans. For further information see [JSONSchema docs](https://github.com/everit-org/json-schema#lenient-mode)."
+ "hidden": true
}
}
}
@@ -426,6 +511,9 @@
{
"$ref": "#/definitions/input_output_options"
},
+ {
+ "$ref": "#/definitions/validation_options"
+ },
{
"$ref": "#/definitions/general_stats_options"
},
@@ -450,6 +538,9 @@
{
"$ref": "#/definitions/synteny_options"
},
+ {
+ "$ref": "#/definitions/merqury_options"
+ },
{
"$ref": "#/definitions/max_job_request_options"
},
diff --git a/pfr/params.json b/pfr/params.json
index ab1ea360..8453604b 100644
--- a/pfr/params.json
+++ b/pfr/params.json
@@ -1,12 +1,15 @@
{
- "input": "/workspace/assemblyqc/testdata/default/assemblysheet.csv",
+ "input": "/workspace/assemblyqc/testdata/v2/assemblysheet.csv",
+ "check_sequence_duplicates": true,
+ "assemblathon_stats_n_limit": 100,
"ncbi_fcs_adaptor_skip": false,
"ncbi_fcs_adaptor_empire": "euk",
"ncbi_fcs_gx_skip": false,
"ncbi_fcs_gx_tax_id": 3750,
"ncbi_fcs_gx_db_path": "/workspace/ComparativeDataSources/NCBI/FCS/GX/r2023-01-24",
+ "contamination_stops_pipeline": false,
"busco_skip": false,
- "busco_mode": "geno",
+ "busco_mode": "genome",
"busco_lineage_datasets": "embryophyta_odb10 eudicots_odb10",
"busco_download_path": "/workspace/ComparativeDataSources/BUSCO/assemblyqc",
"tidk_skip": false,
@@ -17,13 +20,23 @@
"kraken2_skip": false,
"kraken2_db_path": "/workspace/ComparativeDataSources/kraken2db/k2_pluspfp_20230314",
"hic": null,
+ "hic_skip_fastp": false,
+ "hic_skip_fastqc": false,
+ "hic_fastp_ext_args": "--qualified_quality_phred 20 --length_required 50",
"synteny_skip": false,
+ "synteny_mummer_skip": false,
+ "synteny_plotsr_skip": false,
+ "synteny_xref_assemblies": "/workspace/assemblyqc/testdata/v2/xrefsheet.csv",
"synteny_between_input_assemblies": true,
- "synteny_many_to_many_align": false,
- "synteny_max_gap": 1000000,
- "synteny_min_bundle_size": 1000,
- "synteny_plot_1_vs_all": true,
+ "synteny_mummer_plot_type": "both",
+ "synteny_mummer_m2m_align": false,
+ "synteny_mummer_max_gap": 1000000,
+ "synteny_mummer_min_bundle_size": 1000,
+ "synteny_plot_1_vs_all": false,
"synteny_color_by_contig": true,
- "synteny_xref_assemblies": "/workspace/assemblyqc/testdata/default/xrefsheet.csv",
- "outdir": "./results"
+ "synteny_plotsr_seq_label": "Chr",
+ "synteny_plotsr_assembly_order": "gddh13_v1p1 m9_v1 m9_v1_h1 m9_v1_h2",
+ "merqury_skip": false,
+ "outdir": "./results",
+ "email": null
}
diff --git a/pfr_assemblyqc b/pfr_assemblyqc
index f92ae0d2..8f40d31d 100644
--- a/pfr_assemblyqc
+++ b/pfr_assemblyqc
@@ -38,7 +38,6 @@ if [ $full_test_flag -eq 1 ]; then
-c pfr/profile.config \
-profile pfr,apptainer,test_full \
--ncbi_fcs_gx_skip false \
- --ncbi_fcs_gx_tax_id 35717 \
--ncbi_fcs_gx_db_path "/workspace/ComparativeDataSources/NCBI/FCS/GX/r2023-01-24" \
--kraken2_skip false \
--kraken2_db_path "/workspace/ComparativeDataSources/kraken2db/k2_pluspfp_20230314" \
diff --git a/pyproject.toml b/pyproject.toml
deleted file mode 100644
index 7d08e1c8..00000000
--- a/pyproject.toml
+++ /dev/null
@@ -1,13 +0,0 @@
-# Config file for Python. Mostly used to configure linting of bin/*.py with Ruff.
-# Should be kept the same as nf-core/tools to avoid fighting with template synchronisation.
-[tool.ruff]
-line-length = 120
-target-version = "py38"
-select = ["I", "E1", "E4", "E7", "E9", "F", "UP", "N"]
-cache-dir = "~/.cache/ruff"
-
-[tool.ruff.isort]
-known-first-party = ["nf_core"]
-
-[tool.ruff.per-file-ignores]
-"__init__.py" = ["E402", "F401"]
diff --git a/subworkflows/local/fasta_busco_plot.nf b/subworkflows/local/fasta_busco_plot.nf
deleted file mode 100644
index f6cc5dc7..00000000
--- a/subworkflows/local/fasta_busco_plot.nf
+++ /dev/null
@@ -1,37 +0,0 @@
-include { BUSCO } from '../../modules/local/busco'
-include { BUSCO_PLOT } from '../../modules/local/busco_plot'
-
-workflow FASTA_BUSCO_PLOT {
- take:
- tuple_of_hap_file // Channel
- lineage // val
- mode // val
- download_path // val; Use [] to use work directory. Useful on AWS
-
- main:
- ch_versions = Channel.empty()
-
- // MODULE: BUSCO
- BUSCO(
- tuple_of_hap_file,
- lineage,
- mode,
- download_path
- )
-
- ch_busco_summaries = BUSCO.out.summary
- | collect
-
- ch_versions = ch_versions.mix(BUSCO.out.versions.first())
-
- // MODULE: BUSCO_PLOT
- BUSCO_PLOT ( ch_busco_summaries )
-
- ch_busco_plot = BUSCO_PLOT.out.png
- ch_versions = ch_versions.mix(BUSCO_PLOT.out.versions.first())
-
- emit:
- summary = BUSCO.out.summary
- plot = ch_busco_plot
- versions = ch_versions
-}
diff --git a/subworkflows/local/fasta_synteny.nf b/subworkflows/local/fasta_synteny.nf
index 219b3da7..0ca879f5 100644
--- a/subworkflows/local/fasta_synteny.nf
+++ b/subworkflows/local/fasta_synteny.nf
@@ -3,13 +3,18 @@ include { FILTERSORTFASTA } from '../../modules/local/filtersortfa
include { MUMMER } from '../../modules/local/mummer'
include { GETFASTALENGTH } from '../../modules/local/getfastalength'
include { DNADIFF } from '../../modules/local/dnadiff'
-include { CIRCOS_BUNDLELINKS } from '../../modules/local/circos_bundlelinks'
+include { BUNDLELINKS } from '../../modules/local/bundlelinks'
include { COLOURBUNDLELINKS } from '../../modules/local/colourbundlelinks'
include { RELABELBUNDLELINKS } from '../../modules/local/relabelbundlelinks'
include { SPLITBUNDLEFILE } from '../../modules/local/splitbundlefile'
include { RELABELFASTALENGTH } from '../../modules/local/relabelfastalength'
include { GENERATEKARYOTYPE } from '../../modules/local/generatekaryotype'
include { CIRCOS } from '../../modules/local/circos'
+include { LINEARSYNTENY } from '../../modules/local/linearsynteny'
+include { CUSTOM_RELABELFASTA } from '../../modules/pfr/custom/relabelfasta/main'
+include { MINIMAP2_ALIGN } from '../../modules/nf-core/minimap2/align/main'
+include { SYRI } from '../../modules/pfr/syri/main'
+include { PLOTSR } from '../../modules/pfr/plotsr/main'
workflow FASTA_SYNTENY {
take:
@@ -17,11 +22,16 @@ workflow FASTA_SYNTENY {
ch_labels // Channel: [ tag, txt ]
ch_xref_fasta_labels // Channel: [ tag2, fa, txt ]
between_input_assemblies // val(true|false)
- many_to_many_align // val(true|false)
- max_gap // val(Integer)
- min_bundle_size // val(Integer)
+ mummer_m2m_align // val(true|false)
+ mummer_max_gap // val(Integer)
+ mummer_min_bundle_size // val(Integer)
plot_1_vs_all // val(true|false)
color_by_contig // val(true|false)
+ mummer_plot_type // val(linear|circos|both)
+ mummer_skip // val(true|false)
+ plotsr_seq_label // val(String)
+ plotsr_skip // val(true|false)
+ plotsr_assembly_order // val(String)
main:
ch_versions = Channel.empty()
@@ -31,7 +41,7 @@ workflow FASTA_SYNTENY {
ch_labels
)
- ch_input_combinations = ! between_input_assemblies
+ ch_input_combination = ! between_input_assemblies
? Channel.empty()
: ch_fasta_labels
| map { [it] }
@@ -52,11 +62,13 @@ workflow FASTA_SYNTENY {
// MODULE: GUNZIP_FASTA
GUNZIP_FASTA ( ch_xref_fa_branch.gz )
- ch_xref_ungz_fa_labels = GUNZIP_FASTA.out.gunzip
+ ch_xref_ungz = GUNZIP_FASTA.out.gunzip
| mix(
ch_xref_fa_branch.rest
)
| map { meta, fa -> [ meta.id, fa ] }
+
+ ch_xref_ungz_fa_labels = ch_xref_ungz
| join(
ch_xref_fasta_labels
)
@@ -64,7 +76,9 @@ workflow FASTA_SYNTENY {
[ tag, fa, seq_list ]
}
- ch_all_combinations = ch_input_combinations
+ ch_combination = mummer_skip
+ ? Channel.empty()
+ : ch_input_combination
| mix(
ch_fasta_labels
| combine(
@@ -72,7 +86,7 @@ workflow FASTA_SYNTENY {
)
)
- ch_all_combination_labels = ch_all_combinations
+ ch_combination_labels = ch_combination
| map { target_tag, target_fa, target_txt, xref_tag, xref_fa, xref_txt ->
[ "${target_tag}.on.${xref_tag}", target_txt, xref_txt ]
}
@@ -80,7 +94,7 @@ workflow FASTA_SYNTENY {
ch_versions = ch_versions.mix(GUNZIP_FASTA.out.versions.first())
// MODULE: FILTERSORTFASTA
- FILTERSORTFASTA ( ch_all_combinations )
+ FILTERSORTFASTA ( ch_combination )
ch_versions = ch_versions.mix(FILTERSORTFASTA.out.versions.first())
@@ -104,23 +118,23 @@ workflow FASTA_SYNTENY {
)
DNADIFF(
ch_dnadiff_inputs,
- many_to_many_align
+ mummer_m2m_align
)
ch_versions = ch_versions.mix(DNADIFF.out.versions.first())
- // MODULE: CIRCOS_BUNDLELINKS
- CIRCOS_BUNDLELINKS(
+ // MODULE: BUNDLELINKS
+ BUNDLELINKS(
DNADIFF.out.coords,
- max_gap,
- min_bundle_size
+ mummer_max_gap,
+ mummer_min_bundle_size
)
- ch_versions = ch_versions.mix(CIRCOS_BUNDLELINKS.out.versions.first())
+ ch_versions = ch_versions.mix(BUNDLELINKS.out.versions.first())
// MODULE: COLOURBUNDLELINKS
COLOURBUNDLELINKS(
- CIRCOS_BUNDLELINKS.out.links,
+ BUNDLELINKS.out.links,
color_by_contig
)
@@ -129,7 +143,7 @@ workflow FASTA_SYNTENY {
// MODULE: RELABELBUNDLELINKS
ch_relabellinks_inputs = ch_coloured_links
- | join(ch_all_combination_labels)
+ | join(ch_combination_labels)
RELABELBUNDLELINKS ( ch_relabellinks_inputs )
@@ -150,7 +164,7 @@ workflow FASTA_SYNTENY {
// MODULE: RELABELFASTALENGTH
ch_relabelfastalength_inputs = GETFASTALENGTH.out.length
- | join(ch_all_combination_labels)
+ | join(ch_combination_labels)
RELABELFASTALENGTH ( ch_relabelfastalength_inputs )
@@ -176,19 +190,201 @@ workflow FASTA_SYNTENY {
ch_versions = ch_versions.mix(GENERATEKARYOTYPE.out.versions.first())
// MODULE: CIRCOS
- ch_circos_inputs = GENERATEKARYOTYPE.out.karyotype
- | join(
- ch_split_links
- | map { target_on_xref, seq_tag, txt ->
- [ "${target_on_xref}.${seq_tag}", txt ]
- }
- )
+ ch_circos_inputs = ( mummer_plot_type in [ 'circos', 'both' ] )
+ ? ch_split_links
+ | map { target_on_xref, seq_tag, txt ->
+ [ "${target_on_xref}.${seq_tag}", txt ]
+ }
+ | join(GENERATEKARYOTYPE.out.karyotype)
+ : Channel.empty()
CIRCOS ( ch_circos_inputs )
ch_versions = ch_versions.mix(CIRCOS.out.versions.first())
+ // MODULE: LINEARSYNTENY
+ ch_linear_synteny_inputs = ( mummer_plot_type in [ 'dotplot', 'both' ] )
+ ? ch_split_links
+ | map { target_on_xref, seq_tag, txt ->
+ [ "${target_on_xref}.${seq_tag}", txt ]
+ }
+ | join(GENERATEKARYOTYPE.out.karyotype_ref)
+ | join(GENERATEKARYOTYPE.out.karyotype_target)
+ : Channel.empty()
+
+ LINEARSYNTENY ( ch_linear_synteny_inputs )
+
+ ch_versions = ch_versions.mix(LINEARSYNTENY.out.versions.first())
+
+ // Create chr label lists
+ ch_assembly_labels = plotsr_skip
+ ? Channel.empty()
+ : ch_fasta_labels
+ | mix(ch_xref_ungz_fa_labels)
+
+ ch_common_label_count = ch_assembly_labels
+ | map { tag, fa, labels ->
+ labels.readLines().findAll { it != '' }.size()
+ }
+ | collect
+ | map { it.min() }
+
+ ch_plotsr_formatted_labels = ch_assembly_labels
+ | combine(ch_common_label_count)
+ | map { tag, fa, labels, num ->
+ def label_lines = labels
+ .readLines()
+ .collect { it.trim() }
+ .findAll { it != '' }
+
+ label_lines.each { line ->
+ if ( line.split('\t').size() != 2 ) {
+ error "synteny_labels file ${labels.name} for assembly ${tag} is malformed near line ${line}"
+ }
+ }
+
+ def new_labels = label_lines[0..
+ def literals = line.split('\t')
+ def seq = literals[0]
+ def label = "${plotsr_seq_label}${index+1}"
+
+ "$seq\t$label"
+ }.join('\n')
+
+ [ "${tag}.plotsr.csv", new_labels]
+ }
+ | collectFile(storeDir: "${params.outdir}/synteny/plotsr")
+ | map { labels -> [ labels.baseName.replace('.plotsr', ''), labels ] }
+
+ // MODULE: CUSTOM_RELABELFASTA
+ ch_relabel_inputs = ch_assembly_labels
+ | join(ch_plotsr_formatted_labels)
+ | map { tag, fa, old_l, new_l -> [ tag, fa, new_l ] }
+ CUSTOM_RELABELFASTA(
+ ch_relabel_inputs.map { tag, fa, labels -> [ [ id: tag ], fa ] },
+ ch_relabel_inputs.map { tag, fa, labels -> labels }
+ )
+
+ ch_plotsr_assembly = CUSTOM_RELABELFASTA.out.fasta
+ ch_versions = ch_versions.mix(CUSTOM_RELABELFASTA.out.versions.first())
+
+ // MODULE: MINIMAP2_ALIGN
+ ch_minimap_inputs = ch_plotsr_assembly
+ | map { [ it ] }
+ | collect
+ | map { list ->
+ if ( plotsr_assembly_order == null ) {
+ return list.sort(false) { it[0].id.toUpperCase() }
+ }
+
+ def order = plotsr_assembly_order.tokenize(' ')
+
+ if ( order.size() != order.unique().size() ) error "Tags listed by synteny_plotsr_assembly_order should all be unique: $order"
+
+ def tags = list.collect { it[0].id }
+
+ def ordered_list = []
+ order.each { tag ->
+ if ( ! ( tag in tags ) ) error "Assembly $tag listed in synteny_plotsr_assembly_order could not be found in input or synteny_xref_assemblies: $tags"
+
+ ordered_list << ( list.find { it[0].id == tag } )
+ }
+ ordered_list
+ }
+ | flatMap { list ->
+ if ( list.size() < 2 ) return null
+
+ def new_list = []
+ list.eachWithIndex { assembly, index -> if ( index > 0 ) { new_list << [ assembly, list[index-1] ] } }
+ new_list
+ }
+ | flatten
+ | buffer(size: 4)
+ | map { tmeta, tfa, rmeta, rfa ->
+ [ [ id: "${tmeta.id}.on.${rmeta.id}" ], tfa, rfa ]
+ }
+ MINIMAP2_ALIGN(
+ ch_minimap_inputs.map { meta, tfa, rfa -> [ meta, tfa ] },
+ ch_minimap_inputs.map { meta, tfa, rfa -> [ meta, rfa ] },
+ true, // bam_format
+ false, // cigar_paf_format
+ false // cigar_bam
+ )
+
+ ch_minimap2_bam = MINIMAP2_ALIGN.out.bam
+ ch_versions = ch_versions.mix(MINIMAP2_ALIGN.out.versions.first())
+
+ // MODULE: SYRI
+ ch_syri_inputs = ch_minimap2_bam
+ | join(ch_minimap_inputs)
+
+ SYRI(
+ ch_syri_inputs.map { meta, bam, tfa, rfa -> [ meta, bam ] },
+ ch_syri_inputs.map { meta, bam, tfa, rfa -> tfa },
+ ch_syri_inputs.map { meta, bam, tfa, rfa -> rfa },
+ 'B' // BAM
+ )
+
+ ch_syri = SYRI.out.syri
+ ch_syri_fail_log = SYRI.out.error
+ ch_versions = ch_versions.mix(SYRI.out.versions.first())
+
+ // MODULE: PLOTSR
+ ch_plotsr_inputs = ch_syri
+ | join(ch_syri_inputs)
+ | map { meta, syri, bam, tfa, rfa ->
+
+ [
+ [ id: 'plotsr' ],
+ syri,
+ [ tfa, rfa]
+ ]
+ }
+ | groupTuple
+ | map { meta, syri, fastas ->
+ def fasta_list = fastas.flatten()
+ def syri_tags = syri.collect { it.name.replace('syri.out', '').tokenize('.on.') }.flatten().unique()
+ def proposed_order = plotsr_assembly_order ? plotsr_assembly_order.tokenize(' ') : syri_tags.sort(false)
+
+ def available_tags = []
+ proposed_order.each { tag -> if ( tag in syri_tags ) available_tags << tag }
+
+ def ordered_fa = []
+ available_tags.each { tag -> ordered_fa << ( fasta_list.find { it.baseName == "${tag}.plotsr" } ) }
+
+ def ordered_syri_tags = []
+ available_tags.eachWithIndex { tag, index -> if ( index > 0 ) { ordered_syri_tags << "${tag}.on.${available_tags[index-1]}" } }
+
+ def ordered_syri = []
+ ordered_syri_tags.each { tag -> ordered_syri << ( syri.find { it.baseName == "${tag}syri" } ) }
+
+ [
+ meta,
+ ordered_syri,
+ ordered_fa,
+ "#file\tname\n" + ordered_fa.collect { it.baseName.replace('.plotsr', '') }.join('\n')
+ ]
+ }
+
+ PLOTSR(
+ ch_plotsr_inputs.map { meta, syri, fastas, txt -> [ meta, syri ] },
+ ch_plotsr_inputs.map { meta, syri, fastas, txt -> fastas },
+ ch_plotsr_inputs.map { meta, syri, fastas, txt -> txt },
+ [],
+ [],
+ [],
+ [],
+ []
+ )
+
+ ch_plotsr_png = PLOTSR.out.png
+ ch_versions = ch_versions.mix(PLOTSR.out.versions.first())
+
emit:
- plot = CIRCOS.out.png_file
+ png = CIRCOS.out.png_file
+ | mix( ch_plotsr_png.map { meta, png -> png } )
+ html = LINEARSYNTENY.out.html
+ syri_fail_log = ch_syri_fail_log.map { meta, log -> log }
+ plotsr_labels = ch_plotsr_formatted_labels.map { tag, labels -> labels }
versions = ch_versions
}
diff --git a/subworkflows/local/fq2hic.nf b/subworkflows/local/fq2hic.nf
index f8a3eaa9..38761473 100644
--- a/subworkflows/local/fq2hic.nf
+++ b/subworkflows/local/fq2hic.nf
@@ -1,5 +1,6 @@
include { FASTQ_TRIM_FASTP_FASTQC } from '../nf-core/fastq_trim_fastp_fastqc/main'
include { FASTQ_BWA_MEM_SAMBLASTER } from '../pfr/fastq_bwa_mem_samblaster/main'
+include { SEQKIT_SORT } from '../../modules/nf-core/seqkit/sort/main'
include { HICQC } from '../../modules/local/hicqc'
include { MAKEAGPFROMFASTA } from '../../modules/local/makeagpfromfasta'
include { AGP2ASSEMBLY } from '../../modules/local/agp2assembly'
@@ -32,10 +33,16 @@ workflow FQ2HIC {
ch_trim_reads = FASTQ_TRIM_FASTP_FASTQC.out.reads
ch_versions = ch_versions.mix(FASTQ_TRIM_FASTP_FASTQC.out.versions)
+ // MODULE: SEQKIT_SORT
+ SEQKIT_SORT ( ref )
+
+ ch_sorted_ref = SEQKIT_SORT.out.fastx
+ ch_versions = ch_versions.mix(SEQKIT_SORT.out.versions)
+
// SUBWORKFLOW: FASTQ_BWA_MEM_SAMBLASTER
FASTQ_BWA_MEM_SAMBLASTER(
ch_trim_reads,
- ref.map { meta2, fa -> [ meta2, fa, [] ] }
+ ch_sorted_ref.map { meta2, fa -> [ meta2, fa, [] ] }
)
ch_bam = FASTQ_BWA_MEM_SAMBLASTER.out.bam
@@ -45,7 +52,7 @@ workflow FQ2HIC {
ch_bam_and_ref = ch_bam
| map { meta, bam -> [ meta.ref_id, meta, bam ] }
| join(
- ref.map { meta2, fa -> [ meta2.id, fa ] }
+ ch_sorted_ref.map { meta2, fa -> [ meta2.id, fa ] }
)
| map { ref_id, meta, bam, fa ->
[ [ id: "${meta.id}.on.${meta.ref_id}" ], bam, fa ]
diff --git a/subworkflows/local/ncbi_fcs_gx.nf b/subworkflows/local/ncbi_fcs_gx.nf
index cc54292f..0c442a6e 100644
--- a/subworkflows/local/ncbi_fcs_gx.nf
+++ b/subworkflows/local/ncbi_fcs_gx.nf
@@ -16,6 +16,9 @@ workflow NCBI_FCS_GX {
ch_all_samples = NCBI_FCS_GX_SETUP_SAMPLE.out.fsata
| collect
+ | map {
+ it.sort(false)
+ }
ch_versions = ch_versions.mix(NCBI_FCS_GX_SETUP_SAMPLE.out.versions.first())
diff --git a/subworkflows/local/utils_nfcore_assemblyqc_pipeline/main.nf b/subworkflows/local/utils_nfcore_assemblyqc_pipeline/main.nf
new file mode 100644
index 00000000..a8b381e2
--- /dev/null
+++ b/subworkflows/local/utils_nfcore_assemblyqc_pipeline/main.nf
@@ -0,0 +1,404 @@
+//
+// Subworkflow with functionality specific to the plant-food-research-open/assemblyqc pipeline
+//
+
+import groovy.json.JsonOutput
+
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ IMPORT FUNCTIONS / MODULES / SUBWORKFLOWS
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+*/
+
+include { UTILS_NFVALIDATION_PLUGIN } from '../../nf-core/utils_nfvalidation_plugin'
+include { paramsSummaryMap } from 'plugin/nf-validation'
+include { fromSamplesheet } from 'plugin/nf-validation'
+include { UTILS_NEXTFLOW_PIPELINE } from '../../nf-core/utils_nextflow_pipeline'
+include { completionEmail } from '../../nf-core/utils_nfcore_pipeline'
+include { completionSummary } from '../../nf-core/utils_nfcore_pipeline'
+include { dashedLine } from '../../nf-core/utils_nfcore_pipeline'
+include { nfCoreLogo } from '../../nf-core/utils_nfcore_pipeline'
+include { imNotification } from '../../nf-core/utils_nfcore_pipeline'
+include { UTILS_NFCORE_PIPELINE } from '../../nf-core/utils_nfcore_pipeline'
+include { workflowCitation } from '../../nf-core/utils_nfcore_pipeline'
+
+/*
+========================================================================================
+ SUBWORKFLOW TO INITIALISE PIPELINE
+========================================================================================
+*/
+
+workflow PIPELINE_INITIALISATION {
+
+ take:
+ version // boolean: Display version and exit
+ help // boolean: Display help text
+ validate_params // boolean: Boolean whether to validate parameters against the schema at runtime
+ monochrome_logs // boolean: Do not use coloured log outputs
+ nextflow_cli_args // array: List of positional nextflow CLI args
+ outdir // string: The output directory where the results will be saved
+ input // string: Path to input assemblysheet
+
+ main:
+
+ ch_versions = Channel.empty()
+ summary_params = paramsSummaryMap(workflow, parameters_schema: "nextflow_schema.json")
+
+ //
+ // Print version and exit if required and dump pipeline parameters to JSON file
+ //
+ UTILS_NEXTFLOW_PIPELINE (
+ version,
+ true,
+ outdir,
+ workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1
+ )
+
+ //
+ // Validate parameters and generate parameter summary to stdout
+ //
+ pre_help_text = nfCoreLogo(monochrome_logs)
+ post_help_text = '\n' + workflowCitation() + '\n' + dashedLine(monochrome_logs)
+ def String workflow_command = "nextflow run ${workflow.manifest.name} -profile --input assemblysheet.csv --outdir "
+ UTILS_NFVALIDATION_PLUGIN (
+ help,
+ workflow_command,
+ pre_help_text,
+ post_help_text,
+ validate_params,
+ "nextflow_schema.json"
+ )
+
+ //
+ // Check config provided to the pipeline
+ //
+ UTILS_NFCORE_PIPELINE (
+ nextflow_cli_args
+ )
+ //
+ // Custom validation for pipeline parameters
+ //
+ validateInputParameters()
+
+ //
+ // Initialise input channels
+ //
+
+ ch_input = Channel.fromSamplesheet('input')
+
+ // Function: validateInputTags
+ ch_input_validated = ch_input
+ | map { row -> row[0] }
+ | collect
+ | map { tags -> validateInputTags( tags ) }
+ | combine ( ch_input.map { row -> [ row ] } )
+ | map { result, row -> row }
+
+ ch_hic_reads = ! params.hic
+ ? Channel.empty()
+ : (
+ "$params.hic".find(/.*[\/].*\.(fastq|fq)\.gz/)
+ ? Channel.fromFilePairs(params.hic, checkIfExists: true)
+ : Channel.of( [ params.hic, 'is_sra' ] )
+ )
+ | map { sample, fq ->
+ "$fq" != 'is_sra'
+ ? [ [ id: sample, single_end: false, is_sra: false, type: 'hic' ], fq ]
+ : [ [ id: sample, single_end: false, is_sra: true, type: 'hic' ], sample ]
+ }
+
+ ch_xref_assembly = params.synteny_skip || ! params.synteny_xref_assemblies
+ ? Channel.empty()
+ : Channel.fromSamplesheet('synteny_xref_assemblies')
+
+ ch_xref_assembly_validated = ch_xref_assembly
+ | map { row -> row[0] }
+ | collect
+ | map { tags -> validateXrefAssemblies( tags ) }
+ | combine ( ch_xref_assembly.map { row -> [ row ] } )
+ | map { result, row -> row }
+ | map { tag, fa, labels ->
+ [ tag, file(fa, checkIfExists: true), file(labels, checkIfExists: true) ]
+ }
+
+ ch_reads = params.merqury_skip
+ ? Channel.empty()
+ : ch_input_validated
+ | map { input_data ->
+ def tag = input_data[0]
+ def reads_1 = input_data[5]
+ def reads_2 = input_data[6]
+
+ reads_1
+ ? extractReadsTuple ( tag, reads_1, reads_2 )
+ : null
+ }
+ | groupTuple
+ | map { fid, metas, reads ->
+ validateAndNormaliseReadsTuple ( fid, metas, reads, 'reads' )
+ }
+
+ ch_maternal_reads = params.merqury_skip
+ ? Channel.empty()
+ : ch_input_validated
+ | map { input_data ->
+ def tag = input_data[0]
+ def maternal_reads_1 = input_data[7]
+ def maternal_reads_2 = input_data[8]
+
+ maternal_reads_1
+ ? extractReadsTuple ( tag, maternal_reads_1, maternal_reads_2 )
+ : null
+ }
+ | groupTuple
+ | map { fid, metas, m_reads ->
+ validateAndNormaliseReadsTuple ( fid, metas, m_reads, 'maternal' )
+ }
+
+ ch_paternal_reads = params.merqury_skip
+ ? Channel.empty()
+ : ch_input_validated
+ | map { input_data ->
+ def tag = input_data[0]
+ def paternal_reads_1 = input_data[9]
+ def paternal_reads_2 = input_data[10]
+
+ paternal_reads_1
+ ? extractReadsTuple ( tag, paternal_reads_1, paternal_reads_2 )
+ : null
+ }
+ | groupTuple
+ | map { fid, metas, m_reads ->
+ validateAndNormaliseReadsTuple ( fid, metas, m_reads, 'paternal' )
+ }
+
+ // Initialise parameter channels
+ ch_params_as_json = Channel.of ( jsonifyParams ( params ) )
+ ch_summary_params_as_json = Channel.of ( jsonifySummaryParams ( summary_params ) )
+
+ emit:
+ input = ch_input_validated
+ hic_reads = ch_hic_reads
+ xref_assembly = ch_xref_assembly_validated
+ reads = ch_reads
+ maternal_reads = ch_maternal_reads
+ paternal_reads = ch_paternal_reads
+ params_as_json = ch_params_as_json
+ summary_params_as_json = ch_summary_params_as_json
+ versions = ch_versions
+}
+
+/*
+========================================================================================
+ SUBWORKFLOW FOR PIPELINE COMPLETION
+========================================================================================
+*/
+
+workflow PIPELINE_COMPLETION {
+
+ take:
+ email // string: email address
+ email_on_fail // string: email address sent on pipeline failure
+ plaintext_email // boolean: Send plain-text email instead of HTML
+ outdir // path: Path to output directory where results will be published
+ monochrome_logs // boolean: Disable ANSI colour codes in log output
+ hook_url // string: hook URL for notifications
+
+ main:
+
+ summary_params = paramsSummaryMap(workflow, parameters_schema: "nextflow_schema.json")
+
+ //
+ // Completion email and summary
+ //
+ workflow.onComplete {
+ if (email || email_on_fail) {
+ completionEmail(summary_params, email, email_on_fail, plaintext_email, outdir, monochrome_logs)
+ }
+
+ completionSummary(monochrome_logs)
+
+ if (hook_url) {
+ imNotification(summary_params, hook_url)
+ }
+ }
+
+ workflow.onError {
+ log.error "Pipeline failed. Please refer to troubleshooting docs: https://nf-co.re/docs/usage/troubleshooting"
+ }
+}
+
+/*
+========================================================================================
+ FUNCTIONS
+========================================================================================
+*/
+//
+// Check and validate pipeline parameters
+//
+def validateInputParameters() {
+ // Check for ncbi_fcs_adaptor_empire
+ if (!params.ncbi_fcs_adaptor_skip && !params.ncbi_fcs_adaptor_empire) {
+ error('ncbi_fcs_adaptor_empire must be provided when executing NCBI FCS Adaptor')
+ }
+
+ // Check for ncbi_fcs_gx_tax_id
+ if (!params.ncbi_fcs_gx_skip && !params.ncbi_fcs_gx_tax_id) {
+ error('ncbi_fcs_gx_tax_id must be provided when executing NCBI FCS GX')
+ }
+
+ // Check for ncbi_fcs_gx_db_path
+ if (!params.ncbi_fcs_gx_skip && !params.ncbi_fcs_gx_db_path) {
+ error('ncbi_fcs_gx_db_path must be provided when executing NCBI FCS GX')
+ }
+
+ // Check for busco_mode
+ if (!params.busco_skip && !params.busco_mode) {
+ error("busco_mode must be provided when executing BUSCO")
+ }
+
+ // Check for busco_lineage_datasets
+ if (!params.busco_skip && !params.busco_lineage_datasets) {
+ error('busco_lineage_datasets must be provided when executing BUSCO')
+ }
+
+ // Check for tidk_repeat_seq
+ if (!params.tidk_skip && !params.tidk_repeat_seq) {
+ error('tidk_repeat_seq must be provided when executing TIDK')
+ }
+
+ // Check for kraken2_db_path
+ if (!params.kraken2_skip && !params.kraken2_db_path) {
+ error('kraken2_db_path must be provided when executing Kraken2')
+ }
+}
+
+def validateInputTags(assemblyTags) {
+
+ def tagCounts = [:]
+ assemblyTags.each { tag ->
+ tagCounts[tag] = tagCounts.containsKey(tag) ? tagCounts[tag] + 1 : 1
+ }
+ def repeatedTags = tagCounts.findAll { key, count -> count > 1 }.collect { key, count -> key }
+
+ if (repeatedTags.size() > 0) {
+ error("Please check input assemblysheet -> Multiple assemblies have the same tags!: ${repeatedTags}")
+ }
+
+ return true
+}
+
+def validateXrefAssemblies(xrefTags) {
+
+ def tagCounts = [:]
+ xrefTags.each { tag ->
+ tagCounts[tag] = tagCounts.containsKey(tag) ? tagCounts[tag] + 1 : 1
+ }
+ def repeatedTags = tagCounts.findAll { key, count -> count > 1 }.collect { key, count -> key }
+
+ if (repeatedTags.size() > 0) {
+ error("Please check synteny_xref_assemblies -> Multiple xref assemblies have the same tags!: ${repeatedTags}")
+ }
+
+ return true
+}
+
+def jsonifyParams(params) {
+ return JsonOutput.toJson(params).toString()
+}
+
+def jsonifySummaryParams(params) {
+
+ def summary = [:]
+ for (group in params.keySet()) {
+ for (subgroup in params[group].keySet()) {
+ if ( params[group][subgroup] ) { summary << [ "$subgroup": "${params[group][subgroup]}" ] }
+ }
+ }
+
+ return JsonOutput.toJson(summary).toString()
+}
+
+def extractReadsTuple(tag, reads_1, reads_2) {
+ if ( reads_1 && reads_2 ) {
+ return [
+ [ fid: file(reads_1).name ],
+ [
+ id: tag,
+ single_end: false,
+ is_sra: false
+ ],
+ [
+ reads_1,
+ reads_2,
+ ]
+ ]
+ }
+
+ if ( "$reads_1".find(/^SRR[0-9]*$/) ) {
+ return [
+ [ fid: "$reads_1" ],
+ [
+ id: tag,
+ single_end: false,
+ is_sra: true
+ ],
+ reads_1
+ ]
+ }
+
+ return [
+ [ fid: file(reads_1).name ],
+ [
+ id: tag,
+ single_end: true,
+ is_sra: false
+ ],
+ [
+ reads_1
+ ]
+ ]
+}
+
+def validateAndNormaliseReadsTuple ( fid, metas, reads, readsType ) {
+
+ def tags = metas.collect { it.id }.flatten()
+ def endedness = metas.collect { it.single_end }.flatten()
+ def identifier = readsType == 'reads' ? '' : "${readsType}_"
+
+ // Validate
+ if ( endedness.unique().size() != 1 ) {
+ error("Please check input assemblysheet -> Following assemblies have different ${identifier}reads_1 and ${identifier}reads_2: ${tags}")
+ }
+
+ if ( readsType == 'reads' && tags.size() > 2 ) {
+ error("Please check input assemblysheet -> More than two assemblies (${tags}) are in the same genome group defined by ${identifier}reads_1: ${fid.fid}")
+ }
+
+ def groupID = readsType == 'reads' ? ( tags.join('-and-') ) : fid.fid.replaceAll(/\./, '_')
+
+ if ( metas.first().is_sra ) { // SRA
+ return [
+ [ id:groupID, single_end:false, is_sra:true, type: readsType, assemblies:tags ],
+ reads.first()
+ ]
+ }
+
+ if ( endedness.unique().first() ) { // Single ended
+ return [
+ [ id:groupID, single_end:true, is_sra:false, type: readsType, assemblies:tags ],
+ reads.first().collect { file(it, checkIfExists: true) }
+ ]
+ }
+
+ def reads_2 = reads.collect { it[1] }
+
+ if ( reads_2.unique().size() != 1 ) {
+ error("Please check input assemblysheet -> Following assemblies have different ${identifier}reads_1 and ${identifier}reads_2: ${tags}")
+ }
+
+ return [
+ [ id:groupID, single_end:false, is_sra:false, type: readsType, assemblies:tags ],
+ reads.first().collect { file(it, checkIfExists: true) }
+ ]
+}
diff --git a/subworkflows/nf-core/fasta_explore_search_plot_tidk/tests/main.nf.test.snap b/subworkflows/nf-core/fasta_explore_search_plot_tidk/tests/main.nf.test.snap
index 6e4e99c6..40bd469c 100644
--- a/subworkflows/nf-core/fasta_explore_search_plot_tidk/tests/main.nf.test.snap
+++ b/subworkflows/nf-core/fasta_explore_search_plot_tidk/tests/main.nf.test.snap
@@ -56,12 +56,12 @@
],
"5": [
"versions.yml:md5,02d48eb43c3882d9832c88b9e080b420",
- "versions.yml:md5,506585e66b23f17620bf582ef60af56d",
"versions.yml:md5,5f9958ea613eceae4b09bb42ecaac656",
"versions.yml:md5,6713d71b01fe0402e5d32f1a5a181e0f",
+ "versions.yml:md5,702fdfb5f74070b208b10384021f3f28",
"versions.yml:md5,83a6abbf9b68ec1e6152c5b6eb12f0d7",
- "versions.yml:md5,b900c2a21c17ac2d16e33641e64fc7c1",
- "versions.yml:md5,d0a1f1950f8dc5eb106b04a9364c8f04"
+ "versions.yml:md5,95dde1750b4171bedad83c5340cc712b",
+ "versions.yml:md5,b900c2a21c17ac2d16e33641e64fc7c1"
],
"aposteriori_sequence": [
[
@@ -117,16 +117,20 @@
],
"versions": [
"versions.yml:md5,02d48eb43c3882d9832c88b9e080b420",
- "versions.yml:md5,506585e66b23f17620bf582ef60af56d",
"versions.yml:md5,5f9958ea613eceae4b09bb42ecaac656",
"versions.yml:md5,6713d71b01fe0402e5d32f1a5a181e0f",
+ "versions.yml:md5,702fdfb5f74070b208b10384021f3f28",
"versions.yml:md5,83a6abbf9b68ec1e6152c5b6eb12f0d7",
- "versions.yml:md5,b900c2a21c17ac2d16e33641e64fc7c1",
- "versions.yml:md5,d0a1f1950f8dc5eb106b04a9364c8f04"
+ "versions.yml:md5,95dde1750b4171bedad83c5340cc712b",
+ "versions.yml:md5,b900c2a21c17ac2d16e33641e64fc7c1"
]
}
],
- "timestamp": "2023-12-12T15:33:35.106116"
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-05-08T08:52:52.0126754"
},
"homo_sapiens-genome_fasta-genome_21_fasta-no_apriori-stub": {
"content": [
@@ -181,10 +185,10 @@
],
"5": [
"versions.yml:md5,02d48eb43c3882d9832c88b9e080b420",
- "versions.yml:md5,506585e66b23f17620bf582ef60af56d",
"versions.yml:md5,5f9958ea613eceae4b09bb42ecaac656",
"versions.yml:md5,6713d71b01fe0402e5d32f1a5a181e0f",
- "versions.yml:md5,d0a1f1950f8dc5eb106b04a9364c8f04"
+ "versions.yml:md5,702fdfb5f74070b208b10384021f3f28",
+ "versions.yml:md5,95dde1750b4171bedad83c5340cc712b"
],
"aposteriori_sequence": [
[
@@ -236,14 +240,18 @@
],
"versions": [
"versions.yml:md5,02d48eb43c3882d9832c88b9e080b420",
- "versions.yml:md5,506585e66b23f17620bf582ef60af56d",
"versions.yml:md5,5f9958ea613eceae4b09bb42ecaac656",
"versions.yml:md5,6713d71b01fe0402e5d32f1a5a181e0f",
- "versions.yml:md5,d0a1f1950f8dc5eb106b04a9364c8f04"
+ "versions.yml:md5,702fdfb5f74070b208b10384021f3f28",
+ "versions.yml:md5,95dde1750b4171bedad83c5340cc712b"
]
}
],
- "timestamp": "2023-12-21T11:15:42.277945"
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-05-08T08:53:21.817193592"
},
"homo_sapiens-genome_fasta-genome_21_fasta-partial_apriori-stub": {
"content": [
@@ -308,12 +316,12 @@
],
"5": [
"versions.yml:md5,02d48eb43c3882d9832c88b9e080b420",
- "versions.yml:md5,506585e66b23f17620bf582ef60af56d",
"versions.yml:md5,5f9958ea613eceae4b09bb42ecaac656",
"versions.yml:md5,6713d71b01fe0402e5d32f1a5a181e0f",
+ "versions.yml:md5,702fdfb5f74070b208b10384021f3f28",
"versions.yml:md5,83a6abbf9b68ec1e6152c5b6eb12f0d7",
- "versions.yml:md5,b900c2a21c17ac2d16e33641e64fc7c1",
- "versions.yml:md5,d0a1f1950f8dc5eb106b04a9364c8f04"
+ "versions.yml:md5,95dde1750b4171bedad83c5340cc712b",
+ "versions.yml:md5,b900c2a21c17ac2d16e33641e64fc7c1"
],
"aposteriori_sequence": [
[
@@ -375,15 +383,19 @@
],
"versions": [
"versions.yml:md5,02d48eb43c3882d9832c88b9e080b420",
- "versions.yml:md5,506585e66b23f17620bf582ef60af56d",
"versions.yml:md5,5f9958ea613eceae4b09bb42ecaac656",
"versions.yml:md5,6713d71b01fe0402e5d32f1a5a181e0f",
+ "versions.yml:md5,702fdfb5f74070b208b10384021f3f28",
"versions.yml:md5,83a6abbf9b68ec1e6152c5b6eb12f0d7",
- "versions.yml:md5,b900c2a21c17ac2d16e33641e64fc7c1",
- "versions.yml:md5,d0a1f1950f8dc5eb106b04a9364c8f04"
+ "versions.yml:md5,95dde1750b4171bedad83c5340cc712b",
+ "versions.yml:md5,b900c2a21c17ac2d16e33641e64fc7c1"
]
}
],
- "timestamp": "2023-12-21T11:15:25.633714"
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-05-08T08:53:07.119615485"
}
}
\ No newline at end of file
diff --git a/subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools/main.nf b/subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools/main.nf
new file mode 100644
index 00000000..fbeacf4a
--- /dev/null
+++ b/subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools/main.nf
@@ -0,0 +1,39 @@
+include { CUSTOM_SRATOOLSNCBISETTINGS } from '../../../modules/nf-core/custom/sratoolsncbisettings/main'
+include { SRATOOLS_PREFETCH } from '../../../modules/nf-core/sratools/prefetch/main'
+include { SRATOOLS_FASTERQDUMP } from '../../../modules/nf-core/sratools/fasterqdump/main'
+
+//
+// Download FASTQ sequencing reads from the NCBI's Sequence Read Archive (SRA).
+//
+workflow FASTQ_DOWNLOAD_PREFETCH_FASTERQDUMP_SRATOOLS {
+ take:
+ ch_sra_ids // channel: [ val(meta), val(id) ]
+ ch_dbgap_key // channel: [ path(dbgap_key) ]
+
+ main:
+
+ ch_versions = Channel.empty()
+
+ //
+ // Detect existing NCBI user settings or create new ones.
+ //
+ CUSTOM_SRATOOLSNCBISETTINGS ( ch_sra_ids.collect() )
+ ch_ncbi_settings = CUSTOM_SRATOOLSNCBISETTINGS.out.ncbi_settings
+ ch_versions = ch_versions.mix(CUSTOM_SRATOOLSNCBISETTINGS.out.versions)
+
+ //
+ // Prefetch sequencing reads in SRA format.
+ //
+ SRATOOLS_PREFETCH ( ch_sra_ids, ch_ncbi_settings, ch_dbgap_key )
+ ch_versions = ch_versions.mix(SRATOOLS_PREFETCH.out.versions.first())
+
+ //
+ // Convert the SRA format into one or more compressed FASTQ files.
+ //
+ SRATOOLS_FASTERQDUMP ( SRATOOLS_PREFETCH.out.sra, ch_ncbi_settings, ch_dbgap_key )
+ ch_versions = ch_versions.mix(SRATOOLS_FASTERQDUMP.out.versions.first())
+
+ emit:
+ reads = SRATOOLS_FASTERQDUMP.out.reads // channel: [ val(meta), [ reads ] ]
+ versions = ch_versions // channel: [ versions.yml ]
+}
diff --git a/subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools/meta.yml b/subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools/meta.yml
new file mode 100644
index 00000000..1b968acc
--- /dev/null
+++ b/subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools/meta.yml
@@ -0,0 +1,52 @@
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json
+name: fastq_download_prefetch_fasterqdump_sratools
+description: Download FASTQ sequencing reads from the NCBI's Sequence Read Archive (SRA).
+keywords:
+ - SRA
+ - NCBI
+ - sequencing
+ - fastq
+ - prefetch
+ - fasterq-dump
+components:
+ - custom/sratoolsncbisettings
+ - sratools/prefetch
+ - sratools/fasterqdump
+input:
+ - meta:
+ type: map
+ description: >
+ Groovy Map containing sample information e.g. [ id:'test', single_end:false ]
+
+ - id:
+ type: string
+ description: >
+ SRA run identifier.
+
+ - certificate:
+ type: file
+ description: >
+ Path to a JWT cart file used to access protected dbGAP data on SRA using the sra-toolkit
+
+ pattern: "*.cart"
+# TODO Update when we decide on a standard for subworkflow docs
+output:
+ - meta:
+ type: map
+ description: >
+ Groovy Map containing sample information e.g. [ id:'test', single_end:false ]
+
+ - reads:
+ type: file
+ description: Extracted FASTQ file or files if the sequencing reads are paired-end.
+ pattern: "*.fastq.gz"
+ - versions:
+ type: file
+ description: File containing software versions
+ pattern: "versions.yml"
+authors:
+ - "@Midnighter"
+ - "@drpatelh"
+maintainers:
+ - "@Midnighter"
+ - "@drpatelh"
diff --git a/subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools/tests/main.nf.test b/subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools/tests/main.nf.test
new file mode 100644
index 00000000..5f74ed20
--- /dev/null
+++ b/subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools/tests/main.nf.test
@@ -0,0 +1,43 @@
+nextflow_workflow {
+
+ name "Test workflow: fastq_download_prefetch_fasterqdump_sratools/main.nf"
+ script "../main.nf"
+ workflow "FASTQ_DOWNLOAD_PREFETCH_FASTERQDUMP_SRATOOLS"
+ tag "subworkflows"
+ tag "subworkflows_nfcore"
+ tag "custom/sratoolsncbisettings"
+ tag "sratools/prefetch"
+ tag "sratools/fasterqdump"
+ tag "subworkflows/fastq_download_prefetch_fasterqdump_sratools"
+
+ test("Parameters: default") {
+
+ when {
+ workflow {
+ """
+ input[0] = Channel.of(
+ [[ id:'test_single_end', single_end:true ], 'DRR000774'],
+ [[ id:'test_paired_end', single_end:false ], 'SRR11140744']
+ )
+ input[1] = []
+ """
+ }
+ }
+
+ then {
+ def pelines1 = path(workflow.out.reads[0][1][0]).linesGzip
+ def pelines2 = path(workflow.out.reads[0][1][1]).linesGzip
+ def selines = path(workflow.out.reads[1][1]).linesGzip
+ assertAll(
+ { assert workflow.success },
+ { assert snapshot(pelines1[0..5]).match("test_pe_reads_1_lines") },
+ { assert snapshot(pelines1.size()).match("test_pe_reads_1_size") },
+ { assert snapshot(pelines2[0..5]).match("test_pe_reads_2_lines") },
+ { assert snapshot(pelines2.size()).match("test_pe_reads_2_size") },
+ { assert snapshot(selines[0..5]).match("test_se_reads_lines") },
+ { assert snapshot(selines.size()).match("test_se_reads_size") },
+ { assert snapshot(workflow.out.versions).match("versions") }
+ )
+ }
+ }
+}
diff --git a/subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools/tests/main.nf.test.snap b/subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools/tests/main.nf.test.snap
new file mode 100644
index 00000000..1a34e0b8
--- /dev/null
+++ b/subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools/tests/main.nf.test.snap
@@ -0,0 +1,97 @@
+{
+ "test_se_reads_size": {
+ "content": [
+ 19996
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-02-28T12:02:56.176292"
+ },
+ "test_pe_reads_2_lines": {
+ "content": [
+ [
+ "@SRR11140744.1 M01472:285:000000000-CYHNP:1:1101:12117:3295 length=251",
+ "ACAGGACACGAGTAACTCGTCTATCTTCTGCTGGCTGCTTACGGTTTCGTCCGTGTTGCAGCCGATCATCAGCACATCTAGGTTTCGTCCGGGTGTGACCGAAAGGTAAGATGGAGAGCCTTGTCCCTGGTTTCAACGAGAAAACACACGTCCAACTCAGTTTGCCTGTTTTACAGGTTCGCGACGTGCTCGTACGTGGCTTTGGAGACTCCGTGGAGGAGGTCTTATCAGAGGCACGTCAACATCTTAAA",
+ "+SRR11140744.1 M01472:285:000000000-CYHNP:1:1101:12117:3295 length=251",
+ "ABAAAFBFFBDBGGGGGGGGGGHHHHHHHHHHCHGHGGGHHHGGHGGHGHGGGHFHHHHHHHHGGGGGHHHHHHHHHFHHHHGHHHGHGGGGGEFGDGHHGFGGGHHHHHGHHGGHHFHHHHGHHHHHHHHHHHHHHGFFGGHHHHHHGGHHGGHHHHHEGHHHHHHHGHHGHHFHHHHHGGGGGGGGGGGGAGGG9BEFFFFFFFFFFFFFFEEFFFFFFFA.FFFFFFFEFEFFFFFFF.BFFFFFFFB",
+ "@SRR11140744.2 M01472:285:000000000-CYHNP:1:1101:20752:3564 length=238",
+ "GTGCACTCACGCAGTATAATTAATAACTAATTACTGTCGTTGACAGGACACGAGTAACTCGTCTATCTTCTGCAGGCTGCTTACGGTTTCGTCCGTGTTGCAGCCGATCATCAGCACATCTAGGTTTCGTCCGGGTGTGACCGAAAGGTAAGATGGAGAGCCTTGTCCCTGGTTTCAACGAGAAAACACACGTCCAACTCAGTTTGCCTGTTTTACAGGTTCGCGACGTGCTCGTACG"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-02-28T12:02:56.166207"
+ },
+ "test_pe_reads_2_size": {
+ "content": [
+ 2011460
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-02-28T12:02:56.168869"
+ },
+ "versions": {
+ "content": [
+ [
+ "versions.yml:md5,1a2218ff913fc33408bffccb081b5048",
+ "versions.yml:md5,53d6e983afde3a28add2ffc6b7eba4f3",
+ "versions.yml:md5,9c558ff624585a6eee82a19c8c0136db"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.01.0"
+ },
+ "timestamp": "2024-02-28T15:19:18.755939"
+ },
+ "test_pe_reads_1_size": {
+ "content": [
+ 2011460
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.01.0"
+ },
+ "timestamp": "2024-02-28T15:19:18.677234"
+ },
+ "test_se_reads_lines": {
+ "content": [
+ [
+ "@DRR000774.1 1 length=421",
+ "ACGCAGGTGCCAGCAGCCGCGGTAATACGTAGGATCCGAGCGTTGTCCGGATTTATTGGGCGTAAAGGGTGCGTAGGCGGCTTGTCAAGTCTCATGTGAAATCTCCCGGCTCAACTGGGAGGGTCATGGGAAACTGATGAGCTCGAGGGCAGTAGAGGGAAGCGGAATTCCGAGAGTAGTGGTGAAATGCGTAGATACTCGGAGGAACACCAGTGGCGAAAGCGGCTTCCTGGACTGTACCTGACGCTGAGGCACGAAAGCGTGGGGAGCAAACCGGATTAGATACCCGGGTAGTCCACGCCCTAAACGATGGATACTAGATATAGGGGGTATCGACCCTCTGTGTCGAAGCTAACGCATTAAGTATCCCGCCTGAGGAGTACGGCCGCAAGGCTAAAACTTAAGGAATTGACGGCTGCGT",
+ "+DRR000774.1 1 length=421",
+ "FFFFFFFFFFFIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIIHHFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF:88FFF888???DBBBBB666F222ADDDFFF::;FFFFFFFFFFFFFFFFFFFFFFFFFFFF9:::FFFFCCCFFFFDDDFFFFF<<<<<8888886623//38><83238@B@@<;855557,,,,,,,0/0;;8:==DDDDDDDDD9:",
+ "@DRR000774.2 2 length=126",
+ "ACGCAGGTGCCAGCAGCCGCGGTAATACGGAGGGAGCTAGCGTTGTTCGGAATTACTGGGCGTAAAGCGCACGTAGGCGGCTTTTCAAGTCAGGGGTGGAAATACCCGGGGCCGTCAACCCGACCG"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-02-28T12:02:56.171227"
+ },
+ "test_pe_reads_1_lines": {
+ "content": [
+ [
+ "@SRR11140744.1 M01472:285:000000000-CYHNP:1:1101:12117:3295 length=251",
+ "ACATAGGGCTGTTCAAGTTGAGGCAAAACGCCTTTTTCAACTTCTACTAAGCCACAAGTGCCATCTTTAAGATGTTGACGTGCCTCTGATAAGACCTCCTCCACGGAGTCTCCAAAGCCACGTACGAGCACGTCGCGAACCTGTAAAACAGGCAAACTGAGTTGGACGTGTGTTTTCTCGTTGAAACCAGGGACAAGGCTCTCCATCTTACCTTTCGGTCACACCCGGACGAAACCTAGATGTGCTGATGA",
+ "+SRR11140744.1 M01472:285:000000000-CYHNP:1:1101:12117:3295 length=251",
+ "BCCCCFFFFFCFGGGGGGGGGGHGGHHHHGGGHGHHHHHHHHHHHHHHHHHHHHHHHGHHHHHHHHHHHHHHHHHHHHHGGGHHHHHGHHGHHHHHHHHHHHHHGGGGGHHHHHHHHHHHHGHHHGGGGGHGHHGGGGGGGHHHHHHHHHHHGGHHHHHFHHHHHHHGGGHHHHHHHHHGGGHHHHHHHHGGGGGGGFGGGGGGGGGGGGGGGGGGGGGGFFFFFFFFFDFFFFFFFFFFFFFFFFFFFFB",
+ "@SRR11140744.2 M01472:285:000000000-CYHNP:1:1101:20752:3564 length=238",
+ "CGTACGAGCACGTCGCGAACCTGTAAAACAGGCAAACTGAGTTGGACGTGTGTTTTCTCGTTGAAACCAGGGACAAGGCTCTCCATCTTACCTTTCGGTCACACCCGGACGAAACCTAGATGTGCTGATGATCGGCTGCAACACGGACGAAACCGTAAGCAGCCTGCAGAAGATAGACGAGTTACTCGTGTCCTGTCAACGACAGTAATTAGTTATTAATTATACTGCGTGAGTGCAC"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-02-28T12:02:56.161354"
+ }
+}
diff --git a/subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools/tests/tags.yml b/subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools/tests/tags.yml
new file mode 100644
index 00000000..03028c32
--- /dev/null
+++ b/subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools/tests/tags.yml
@@ -0,0 +1,2 @@
+subworkflows/fastq_download_prefetch_fasterqdump_sratools:
+ - subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools/**
diff --git a/subworkflows/nf-core/fastq_trim_fastp_fastqc/main.nf b/subworkflows/nf-core/fastq_trim_fastp_fastqc/main.nf
index 39a086ad..4f1c84fc 100644
--- a/subworkflows/nf-core/fastq_trim_fastp_fastqc/main.nf
+++ b/subworkflows/nf-core/fastq_trim_fastp_fastqc/main.nf
@@ -68,14 +68,11 @@ workflow FASTQ_TRIM_FASTP_FASTQC {
//
ch_trim_reads
.join(ch_trim_json)
- .map { meta, reads, json ->
- if (json.text.readLines().size < 1) {
- return [ meta, reads ]
- }
-
- if (getFastpReadsAfterFiltering(json) > 0) {
- [ meta, reads ]
- }
+ .map {
+ meta, reads, json ->
+ if (getFastpReadsAfterFiltering(json) > 0) {
+ [ meta, reads ]
+ }
}
.set { ch_trim_reads }
diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/main.nf b/subworkflows/nf-core/utils_nextflow_pipeline/main.nf
new file mode 100644
index 00000000..ac31f28f
--- /dev/null
+++ b/subworkflows/nf-core/utils_nextflow_pipeline/main.nf
@@ -0,0 +1,126 @@
+//
+// Subworkflow with functionality that may be useful for any Nextflow pipeline
+//
+
+import org.yaml.snakeyaml.Yaml
+import groovy.json.JsonOutput
+import nextflow.extension.FilesEx
+
+/*
+========================================================================================
+ SUBWORKFLOW DEFINITION
+========================================================================================
+*/
+
+workflow UTILS_NEXTFLOW_PIPELINE {
+
+ take:
+ print_version // boolean: print version
+ dump_parameters // boolean: dump parameters
+ outdir // path: base directory used to publish pipeline results
+ check_conda_channels // boolean: check conda channels
+
+ main:
+
+ //
+ // Print workflow version and exit on --version
+ //
+ if (print_version) {
+ log.info "${workflow.manifest.name} ${getWorkflowVersion()}"
+ System.exit(0)
+ }
+
+ //
+ // Dump pipeline parameters to a JSON file
+ //
+ if (dump_parameters && outdir) {
+ dumpParametersToJSON(outdir)
+ }
+
+ //
+ // When running with Conda, warn if channels have not been set-up appropriately
+ //
+ if (check_conda_channels) {
+ checkCondaChannels()
+ }
+
+ emit:
+ dummy_emit = true
+}
+
+/*
+========================================================================================
+ FUNCTIONS
+========================================================================================
+*/
+
+//
+// Generate version string
+//
+def getWorkflowVersion() {
+ String version_string = ""
+ if (workflow.manifest.version) {
+ def prefix_v = workflow.manifest.version[0] != 'v' ? 'v' : ''
+ version_string += "${prefix_v}${workflow.manifest.version}"
+ }
+
+ if (workflow.commitId) {
+ def git_shortsha = workflow.commitId.substring(0, 7)
+ version_string += "-g${git_shortsha}"
+ }
+
+ return version_string
+}
+
+//
+// Dump pipeline parameters to a JSON file
+//
+def dumpParametersToJSON(outdir) {
+ def timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss')
+ def filename = "params_${timestamp}.json"
+ def temp_pf = new File(workflow.launchDir.toString(), ".${filename}")
+ def jsonStr = JsonOutput.toJson(params)
+ temp_pf.text = JsonOutput.prettyPrint(jsonStr)
+
+ FilesEx.copyTo(temp_pf.toPath(), "${outdir}/pipeline_info/params_${timestamp}.json")
+ temp_pf.delete()
+}
+
+//
+// When running with -profile conda, warn if channels have not been set-up appropriately
+//
+def checkCondaChannels() {
+ Yaml parser = new Yaml()
+ def channels = []
+ try {
+ def config = parser.load("conda config --show channels".execute().text)
+ channels = config.channels
+ } catch(NullPointerException | IOException e) {
+ log.warn "Could not verify conda channel configuration."
+ return
+ }
+
+ // Check that all channels are present
+ // This channel list is ordered by required channel priority.
+ def required_channels_in_order = ['conda-forge', 'bioconda', 'defaults']
+ def channels_missing = ((required_channels_in_order as Set) - (channels as Set)) as Boolean
+
+ // Check that they are in the right order
+ def channel_priority_violation = false
+ def n = required_channels_in_order.size()
+ for (int i = 0; i < n - 1; i++) {
+ channel_priority_violation |= !(channels.indexOf(required_channels_in_order[i]) < channels.indexOf(required_channels_in_order[i+1]))
+ }
+
+ if (channels_missing | channel_priority_violation) {
+ log.warn "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" +
+ " There is a problem with your Conda configuration!\n\n" +
+ " You will need to set-up the conda-forge and bioconda channels correctly.\n" +
+ " Please refer to https://bioconda.github.io/\n" +
+ " The observed channel order is \n" +
+ " ${channels}\n" +
+ " but the following channel order is required:\n" +
+ " ${required_channels_in_order}\n" +
+ "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
+ }
+}
diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/meta.yml b/subworkflows/nf-core/utils_nextflow_pipeline/meta.yml
new file mode 100644
index 00000000..e5c3a0a8
--- /dev/null
+++ b/subworkflows/nf-core/utils_nextflow_pipeline/meta.yml
@@ -0,0 +1,38 @@
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json
+name: "UTILS_NEXTFLOW_PIPELINE"
+description: Subworkflow with functionality that may be useful for any Nextflow pipeline
+keywords:
+ - utility
+ - pipeline
+ - initialise
+ - version
+components: []
+input:
+ - print_version:
+ type: boolean
+ description: |
+ Print the version of the pipeline and exit
+ - dump_parameters:
+ type: boolean
+ description: |
+ Dump the parameters of the pipeline to a JSON file
+ - output_directory:
+ type: directory
+ description: Path to output dir to write JSON file to.
+ pattern: "results/"
+ - check_conda_channel:
+ type: boolean
+ description: |
+ Check if the conda channel priority is correct.
+output:
+ - dummy_emit:
+ type: boolean
+ description: |
+ Dummy emit to make nf-core subworkflows lint happy
+authors:
+ - "@adamrtalbot"
+ - "@drpatelh"
+maintainers:
+ - "@adamrtalbot"
+ - "@drpatelh"
+ - "@maxulysse"
diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test
new file mode 100644
index 00000000..68718e4f
--- /dev/null
+++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test
@@ -0,0 +1,54 @@
+
+nextflow_function {
+
+ name "Test Functions"
+ script "subworkflows/nf-core/utils_nextflow_pipeline/main.nf"
+ config "subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config"
+ tag 'subworkflows'
+ tag 'utils_nextflow_pipeline'
+ tag 'subworkflows/utils_nextflow_pipeline'
+
+ test("Test Function getWorkflowVersion") {
+
+ function "getWorkflowVersion"
+
+ then {
+ assertAll(
+ { assert function.success },
+ { assert snapshot(function.result).match() }
+ )
+ }
+ }
+
+ test("Test Function dumpParametersToJSON") {
+
+ function "dumpParametersToJSON"
+
+ when {
+ function {
+ """
+ // define inputs of the function here. Example:
+ input[0] = "$outputDir"
+ """.stripIndent()
+ }
+ }
+
+ then {
+ assertAll(
+ { assert function.success }
+ )
+ }
+ }
+
+ test("Test Function checkCondaChannels") {
+
+ function "checkCondaChannels"
+
+ then {
+ assertAll(
+ { assert function.success },
+ { assert snapshot(function.result).match() }
+ )
+ }
+ }
+}
diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test.snap b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test.snap
new file mode 100644
index 00000000..e3f0baf4
--- /dev/null
+++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.function.nf.test.snap
@@ -0,0 +1,20 @@
+{
+ "Test Function getWorkflowVersion": {
+ "content": [
+ "v9.9.9"
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-02-28T12:02:05.308243"
+ },
+ "Test Function checkCondaChannels": {
+ "content": null,
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-02-28T12:02:12.425833"
+ }
+}
\ No newline at end of file
diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.workflow.nf.test b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.workflow.nf.test
new file mode 100644
index 00000000..ca964ce8
--- /dev/null
+++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/main.workflow.nf.test
@@ -0,0 +1,111 @@
+nextflow_workflow {
+
+ name "Test Workflow UTILS_NEXTFLOW_PIPELINE"
+ script "../main.nf"
+ config "subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config"
+ workflow "UTILS_NEXTFLOW_PIPELINE"
+ tag 'subworkflows'
+ tag 'utils_nextflow_pipeline'
+ tag 'subworkflows/utils_nextflow_pipeline'
+
+ test("Should run no inputs") {
+
+ when {
+ workflow {
+ """
+ print_version = false
+ dump_parameters = false
+ outdir = null
+ check_conda_channels = false
+
+ input[0] = print_version
+ input[1] = dump_parameters
+ input[2] = outdir
+ input[3] = check_conda_channels
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert workflow.success }
+ )
+ }
+ }
+
+ test("Should print version") {
+
+ when {
+ workflow {
+ """
+ print_version = true
+ dump_parameters = false
+ outdir = null
+ check_conda_channels = false
+
+ input[0] = print_version
+ input[1] = dump_parameters
+ input[2] = outdir
+ input[3] = check_conda_channels
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert workflow.success },
+ { assert workflow.stdout.contains("nextflow_workflow v9.9.9") }
+ )
+ }
+ }
+
+ test("Should dump params") {
+
+ when {
+ workflow {
+ """
+ print_version = false
+ dump_parameters = true
+ outdir = 'results'
+ check_conda_channels = false
+
+ input[0] = false
+ input[1] = true
+ input[2] = outdir
+ input[3] = false
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert workflow.success }
+ )
+ }
+ }
+
+ test("Should not create params JSON if no output directory") {
+
+ when {
+ workflow {
+ """
+ print_version = false
+ dump_parameters = true
+ outdir = null
+ check_conda_channels = false
+
+ input[0] = false
+ input[1] = true
+ input[2] = outdir
+ input[3] = false
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert workflow.success }
+ )
+ }
+ }
+}
diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config b/subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config
new file mode 100644
index 00000000..d0a926bf
--- /dev/null
+++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/nextflow.config
@@ -0,0 +1,9 @@
+manifest {
+ name = 'nextflow_workflow'
+ author = """nf-core"""
+ homePage = 'https://127.0.0.1'
+ description = """Dummy pipeline"""
+ nextflowVersion = '!>=23.04.0'
+ version = '9.9.9'
+ doi = 'https://doi.org/10.5281/zenodo.5070524'
+}
diff --git a/subworkflows/nf-core/utils_nextflow_pipeline/tests/tags.yml b/subworkflows/nf-core/utils_nextflow_pipeline/tests/tags.yml
new file mode 100644
index 00000000..f8476112
--- /dev/null
+++ b/subworkflows/nf-core/utils_nextflow_pipeline/tests/tags.yml
@@ -0,0 +1,2 @@
+subworkflows/utils_nextflow_pipeline:
+ - subworkflows/nf-core/utils_nextflow_pipeline/**
diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/main.nf b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf
new file mode 100644
index 00000000..14558c39
--- /dev/null
+++ b/subworkflows/nf-core/utils_nfcore_pipeline/main.nf
@@ -0,0 +1,446 @@
+//
+// Subworkflow with utility functions specific to the nf-core pipeline template
+//
+
+import org.yaml.snakeyaml.Yaml
+import nextflow.extension.FilesEx
+
+/*
+========================================================================================
+ SUBWORKFLOW DEFINITION
+========================================================================================
+*/
+
+workflow UTILS_NFCORE_PIPELINE {
+
+ take:
+ nextflow_cli_args
+
+ main:
+ valid_config = checkConfigProvided()
+ checkProfileProvided(nextflow_cli_args)
+
+ emit:
+ valid_config
+}
+
+/*
+========================================================================================
+ FUNCTIONS
+========================================================================================
+*/
+
+//
+// Warn if a -profile or Nextflow config has not been provided to run the pipeline
+//
+def checkConfigProvided() {
+ valid_config = true
+ if (workflow.profile == 'standard' && workflow.configFiles.size() <= 1) {
+ log.warn "[$workflow.manifest.name] You are attempting to run the pipeline without any custom configuration!\n\n" +
+ "This will be dependent on your local compute environment but can be achieved via one or more of the following:\n" +
+ " (1) Using an existing pipeline profile e.g. `-profile docker` or `-profile singularity`\n" +
+ " (2) Using an existing nf-core/configs for your Institution e.g. `-profile crick` or `-profile uppmax`\n" +
+ " (3) Using your own local custom config e.g. `-c /path/to/your/custom.config`\n\n" +
+ "Please refer to the quick start section and usage docs for the pipeline.\n "
+ valid_config = false
+ }
+ return valid_config
+}
+
+//
+// Exit pipeline if --profile contains spaces
+//
+def checkProfileProvided(nextflow_cli_args) {
+ if (workflow.profile.endsWith(',')) {
+ error "The `-profile` option cannot end with a trailing comma, please remove it and re-run the pipeline!\n" +
+ "HINT: A common mistake is to provide multiple values separated by spaces e.g. `-profile test, docker`.\n"
+ }
+ if (nextflow_cli_args[0]) {
+ log.warn "nf-core pipelines do not accept positional arguments. The positional argument `${nextflow_cli_args[0]}` has been detected.\n" +
+ "HINT: A common mistake is to provide multiple values separated by spaces e.g. `-profile test, docker`.\n"
+ }
+}
+
+//
+// Citation string for pipeline
+//
+def workflowCitation() {
+ def temp_doi_ref = ""
+ String[] manifest_doi = workflow.manifest.doi.tokenize(",")
+ // Using a loop to handle multiple DOIs
+ // Removing `https://doi.org/` to handle pipelines using DOIs vs DOI resolvers
+ // Removing ` ` since the manifest.doi is a string and not a proper list
+ for (String doi_ref: manifest_doi) temp_doi_ref += " https://doi.org/${doi_ref.replace('https://doi.org/', '').replace(' ', '')}\n"
+ return "If you use ${workflow.manifest.name} for your analysis please cite:\n\n" +
+ "* The pipeline\n" +
+ temp_doi_ref + "\n" +
+ "* The nf-core framework\n" +
+ " https://doi.org/10.1038/s41587-020-0439-x\n\n" +
+ "* Software dependencies\n" +
+ " https://github.com/${workflow.manifest.name}/blob/master/CITATIONS.md"
+}
+
+//
+// Generate workflow version string
+//
+def getWorkflowVersion() {
+ String version_string = ""
+ if (workflow.manifest.version) {
+ def prefix_v = workflow.manifest.version[0] != 'v' ? 'v' : ''
+ version_string += "${prefix_v}${workflow.manifest.version}"
+ }
+
+ if (workflow.commitId) {
+ def git_shortsha = workflow.commitId.substring(0, 7)
+ version_string += "-g${git_shortsha}"
+ }
+
+ return version_string
+}
+
+//
+// Get software versions for pipeline
+//
+def processVersionsFromYAML(yaml_file) {
+ Yaml yaml = new Yaml()
+ versions = yaml.load(yaml_file).collectEntries { k, v -> [ k.tokenize(':')[-1], v ] }
+ return yaml.dumpAsMap(versions).trim()
+}
+
+//
+// Get workflow version for pipeline
+//
+def workflowVersionToYAML() {
+ return """
+ Workflow:
+ $workflow.manifest.name: ${getWorkflowVersion()}
+ Nextflow: $workflow.nextflow.version
+ """.stripIndent().trim()
+}
+
+//
+// Get channel of software versions used in pipeline in YAML format
+//
+def softwareVersionsToYAML(ch_versions) {
+ return ch_versions
+ .unique()
+ .map { processVersionsFromYAML(it) }
+ .unique()
+ .mix(Channel.of(workflowVersionToYAML()))
+}
+
+//
+// Get workflow summary for MultiQC
+//
+def paramsSummaryMultiqc(summary_params) {
+ def summary_section = ''
+ for (group in summary_params.keySet()) {
+ def group_params = summary_params.get(group) // This gets the parameters of that particular group
+ if (group_params) {
+ summary_section += " $group \n"
+ summary_section += " \n"
+ for (param in group_params.keySet()) {
+ summary_section += " - $param
- ${group_params.get(param) ?: 'N/A'}
\n"
+ }
+ summary_section += " \n"
+ }
+ }
+
+ String yaml_file_text = "id: '${workflow.manifest.name.replace('/','-')}-summary'\n"
+ yaml_file_text += "description: ' - this information is collected when the pipeline is started.'\n"
+ yaml_file_text += "section_name: '${workflow.manifest.name} Workflow Summary'\n"
+ yaml_file_text += "section_href: 'https://github.com/${workflow.manifest.name}'\n"
+ yaml_file_text += "plot_type: 'html'\n"
+ yaml_file_text += "data: |\n"
+ yaml_file_text += "${summary_section}"
+
+ return yaml_file_text
+}
+
+//
+// nf-core logo
+//
+def nfCoreLogo(monochrome_logs=true) {
+ Map colors = logColours(monochrome_logs)
+ String.format(
+ """\n
+ ${dashedLine(monochrome_logs)}
+ ${colors.green},--.${colors.black}/${colors.green},-.${colors.reset}
+ ${colors.blue} ___ __ __ __ ___ ${colors.green}/,-._.--~\'${colors.reset}
+ ${colors.blue} |\\ | |__ __ / ` / \\ |__) |__ ${colors.yellow}} {${colors.reset}
+ ${colors.blue} | \\| | \\__, \\__/ | \\ |___ ${colors.green}\\`-._,-`-,${colors.reset}
+ ${colors.green}`._,._,\'${colors.reset}
+ ${colors.purple} ${workflow.manifest.name} ${getWorkflowVersion()}${colors.reset}
+ ${dashedLine(monochrome_logs)}
+ """.stripIndent()
+ )
+}
+
+//
+// Return dashed line
+//
+def dashedLine(monochrome_logs=true) {
+ Map colors = logColours(monochrome_logs)
+ return "-${colors.dim}----------------------------------------------------${colors.reset}-"
+}
+
+//
+// ANSII colours used for terminal logging
+//
+def logColours(monochrome_logs=true) {
+ Map colorcodes = [:]
+
+ // Reset / Meta
+ colorcodes['reset'] = monochrome_logs ? '' : "\033[0m"
+ colorcodes['bold'] = monochrome_logs ? '' : "\033[1m"
+ colorcodes['dim'] = monochrome_logs ? '' : "\033[2m"
+ colorcodes['underlined'] = monochrome_logs ? '' : "\033[4m"
+ colorcodes['blink'] = monochrome_logs ? '' : "\033[5m"
+ colorcodes['reverse'] = monochrome_logs ? '' : "\033[7m"
+ colorcodes['hidden'] = monochrome_logs ? '' : "\033[8m"
+
+ // Regular Colors
+ colorcodes['black'] = monochrome_logs ? '' : "\033[0;30m"
+ colorcodes['red'] = monochrome_logs ? '' : "\033[0;31m"
+ colorcodes['green'] = monochrome_logs ? '' : "\033[0;32m"
+ colorcodes['yellow'] = monochrome_logs ? '' : "\033[0;33m"
+ colorcodes['blue'] = monochrome_logs ? '' : "\033[0;34m"
+ colorcodes['purple'] = monochrome_logs ? '' : "\033[0;35m"
+ colorcodes['cyan'] = monochrome_logs ? '' : "\033[0;36m"
+ colorcodes['white'] = monochrome_logs ? '' : "\033[0;37m"
+
+ // Bold
+ colorcodes['bblack'] = monochrome_logs ? '' : "\033[1;30m"
+ colorcodes['bred'] = monochrome_logs ? '' : "\033[1;31m"
+ colorcodes['bgreen'] = monochrome_logs ? '' : "\033[1;32m"
+ colorcodes['byellow'] = monochrome_logs ? '' : "\033[1;33m"
+ colorcodes['bblue'] = monochrome_logs ? '' : "\033[1;34m"
+ colorcodes['bpurple'] = monochrome_logs ? '' : "\033[1;35m"
+ colorcodes['bcyan'] = monochrome_logs ? '' : "\033[1;36m"
+ colorcodes['bwhite'] = monochrome_logs ? '' : "\033[1;37m"
+
+ // Underline
+ colorcodes['ublack'] = monochrome_logs ? '' : "\033[4;30m"
+ colorcodes['ured'] = monochrome_logs ? '' : "\033[4;31m"
+ colorcodes['ugreen'] = monochrome_logs ? '' : "\033[4;32m"
+ colorcodes['uyellow'] = monochrome_logs ? '' : "\033[4;33m"
+ colorcodes['ublue'] = monochrome_logs ? '' : "\033[4;34m"
+ colorcodes['upurple'] = monochrome_logs ? '' : "\033[4;35m"
+ colorcodes['ucyan'] = monochrome_logs ? '' : "\033[4;36m"
+ colorcodes['uwhite'] = monochrome_logs ? '' : "\033[4;37m"
+
+ // High Intensity
+ colorcodes['iblack'] = monochrome_logs ? '' : "\033[0;90m"
+ colorcodes['ired'] = monochrome_logs ? '' : "\033[0;91m"
+ colorcodes['igreen'] = monochrome_logs ? '' : "\033[0;92m"
+ colorcodes['iyellow'] = monochrome_logs ? '' : "\033[0;93m"
+ colorcodes['iblue'] = monochrome_logs ? '' : "\033[0;94m"
+ colorcodes['ipurple'] = monochrome_logs ? '' : "\033[0;95m"
+ colorcodes['icyan'] = monochrome_logs ? '' : "\033[0;96m"
+ colorcodes['iwhite'] = monochrome_logs ? '' : "\033[0;97m"
+
+ // Bold High Intensity
+ colorcodes['biblack'] = monochrome_logs ? '' : "\033[1;90m"
+ colorcodes['bired'] = monochrome_logs ? '' : "\033[1;91m"
+ colorcodes['bigreen'] = monochrome_logs ? '' : "\033[1;92m"
+ colorcodes['biyellow'] = monochrome_logs ? '' : "\033[1;93m"
+ colorcodes['biblue'] = monochrome_logs ? '' : "\033[1;94m"
+ colorcodes['bipurple'] = monochrome_logs ? '' : "\033[1;95m"
+ colorcodes['bicyan'] = monochrome_logs ? '' : "\033[1;96m"
+ colorcodes['biwhite'] = monochrome_logs ? '' : "\033[1;97m"
+
+ return colorcodes
+}
+
+//
+// Attach the multiqc report to email
+//
+def attachMultiqcReport(multiqc_report) {
+ def mqc_report = null
+ try {
+ if (workflow.success) {
+ mqc_report = multiqc_report.getVal()
+ if (mqc_report.getClass() == ArrayList && mqc_report.size() >= 1) {
+ if (mqc_report.size() > 1) {
+ log.warn "[$workflow.manifest.name] Found multiple reports from process 'MULTIQC', will use only one"
+ }
+ mqc_report = mqc_report[0]
+ }
+ }
+ } catch (all) {
+ if (multiqc_report) {
+ log.warn "[$workflow.manifest.name] Could not attach MultiQC report to summary email"
+ }
+ }
+ return mqc_report
+}
+
+//
+// Construct and send completion email
+//
+def completionEmail(summary_params, email, email_on_fail, plaintext_email, outdir, monochrome_logs=true, multiqc_report=null) {
+
+ // Set up the e-mail variables
+ def subject = "[$workflow.manifest.name] Successful: $workflow.runName"
+ if (!workflow.success) {
+ subject = "[$workflow.manifest.name] FAILED: $workflow.runName"
+ }
+
+ def summary = [:]
+ for (group in summary_params.keySet()) {
+ summary << summary_params[group]
+ }
+
+ def misc_fields = [:]
+ misc_fields['Date Started'] = workflow.start
+ misc_fields['Date Completed'] = workflow.complete
+ misc_fields['Pipeline script file path'] = workflow.scriptFile
+ misc_fields['Pipeline script hash ID'] = workflow.scriptId
+ if (workflow.repository) misc_fields['Pipeline repository Git URL'] = workflow.repository
+ if (workflow.commitId) misc_fields['Pipeline repository Git Commit'] = workflow.commitId
+ if (workflow.revision) misc_fields['Pipeline Git branch/tag'] = workflow.revision
+ misc_fields['Nextflow Version'] = workflow.nextflow.version
+ misc_fields['Nextflow Build'] = workflow.nextflow.build
+ misc_fields['Nextflow Compile Timestamp'] = workflow.nextflow.timestamp
+
+ def email_fields = [:]
+ email_fields['version'] = getWorkflowVersion()
+ email_fields['runName'] = workflow.runName
+ email_fields['success'] = workflow.success
+ email_fields['dateComplete'] = workflow.complete
+ email_fields['duration'] = workflow.duration
+ email_fields['exitStatus'] = workflow.exitStatus
+ email_fields['errorMessage'] = (workflow.errorMessage ?: 'None')
+ email_fields['errorReport'] = (workflow.errorReport ?: 'None')
+ email_fields['commandLine'] = workflow.commandLine
+ email_fields['projectDir'] = workflow.projectDir
+ email_fields['summary'] = summary << misc_fields
+
+ // On success try attach the multiqc report
+ def mqc_report = attachMultiqcReport(multiqc_report)
+
+ // Check if we are only sending emails on failure
+ def email_address = email
+ if (!email && email_on_fail && !workflow.success) {
+ email_address = email_on_fail
+ }
+
+ // Render the TXT template
+ def engine = new groovy.text.GStringTemplateEngine()
+ def tf = new File("${workflow.projectDir}/assets/email_template.txt")
+ def txt_template = engine.createTemplate(tf).make(email_fields)
+ def email_txt = txt_template.toString()
+
+ // Render the HTML template
+ def hf = new File("${workflow.projectDir}/assets/email_template.html")
+ def html_template = engine.createTemplate(hf).make(email_fields)
+ def email_html = html_template.toString()
+
+ // Render the sendmail template
+ def max_multiqc_email_size = (params.containsKey('max_multiqc_email_size') ? params.max_multiqc_email_size : 0) as nextflow.util.MemoryUnit
+ def smail_fields = [ email: email_address, subject: subject, email_txt: email_txt, email_html: email_html, projectDir: "${workflow.projectDir}", mqcFile: mqc_report, mqcMaxSize: max_multiqc_email_size.toBytes() ]
+ def sf = new File("${workflow.projectDir}/assets/sendmail_template.txt")
+ def sendmail_template = engine.createTemplate(sf).make(smail_fields)
+ def sendmail_html = sendmail_template.toString()
+
+ // Send the HTML e-mail
+ Map colors = logColours(monochrome_logs)
+ if (email_address) {
+ try {
+ if (plaintext_email) { throw GroovyException('Send plaintext e-mail, not HTML') }
+ // Try to send HTML e-mail using sendmail
+ def sendmail_tf = new File(workflow.launchDir.toString(), ".sendmail_tmp.html")
+ sendmail_tf.withWriter { w -> w << sendmail_html }
+ [ 'sendmail', '-t' ].execute() << sendmail_html
+ log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (sendmail)-"
+ } catch (all) {
+ // Catch failures and try with plaintext
+ def mail_cmd = [ 'mail', '-s', subject, '--content-type=text/html', email_address ]
+ mail_cmd.execute() << email_html
+ log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Sent summary e-mail to $email_address (mail)-"
+ }
+ }
+
+ // Write summary e-mail HTML to a file
+ def output_hf = new File(workflow.launchDir.toString(), ".pipeline_report.html")
+ output_hf.withWriter { w -> w << email_html }
+ FilesEx.copyTo(output_hf.toPath(), "${outdir}/pipeline_info/pipeline_report.html");
+ output_hf.delete()
+
+ // Write summary e-mail TXT to a file
+ def output_tf = new File(workflow.launchDir.toString(), ".pipeline_report.txt")
+ output_tf.withWriter { w -> w << email_txt }
+ FilesEx.copyTo(output_tf.toPath(), "${outdir}/pipeline_info/pipeline_report.txt");
+ output_tf.delete()
+}
+
+//
+// Print pipeline summary on completion
+//
+def completionSummary(monochrome_logs=true) {
+ Map colors = logColours(monochrome_logs)
+ if (workflow.success) {
+ if (workflow.stats.ignoredCount == 0) {
+ log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Pipeline completed successfully${colors.reset}-"
+ } else {
+ log.info "-${colors.purple}[$workflow.manifest.name]${colors.yellow} Pipeline completed successfully, but with errored process(es) ${colors.reset}-"
+ }
+ } else {
+ log.info "-${colors.purple}[$workflow.manifest.name]${colors.red} Pipeline completed with errors${colors.reset}-"
+ }
+}
+
+//
+// Construct and send a notification to a web server as JSON e.g. Microsoft Teams and Slack
+//
+def imNotification(summary_params, hook_url) {
+ def summary = [:]
+ for (group in summary_params.keySet()) {
+ summary << summary_params[group]
+ }
+
+ def misc_fields = [:]
+ misc_fields['start'] = workflow.start
+ misc_fields['complete'] = workflow.complete
+ misc_fields['scriptfile'] = workflow.scriptFile
+ misc_fields['scriptid'] = workflow.scriptId
+ if (workflow.repository) misc_fields['repository'] = workflow.repository
+ if (workflow.commitId) misc_fields['commitid'] = workflow.commitId
+ if (workflow.revision) misc_fields['revision'] = workflow.revision
+ misc_fields['nxf_version'] = workflow.nextflow.version
+ misc_fields['nxf_build'] = workflow.nextflow.build
+ misc_fields['nxf_timestamp'] = workflow.nextflow.timestamp
+
+ def msg_fields = [:]
+ msg_fields['version'] = getWorkflowVersion()
+ msg_fields['runName'] = workflow.runName
+ msg_fields['success'] = workflow.success
+ msg_fields['dateComplete'] = workflow.complete
+ msg_fields['duration'] = workflow.duration
+ msg_fields['exitStatus'] = workflow.exitStatus
+ msg_fields['errorMessage'] = (workflow.errorMessage ?: 'None')
+ msg_fields['errorReport'] = (workflow.errorReport ?: 'None')
+ msg_fields['commandLine'] = workflow.commandLine.replaceFirst(/ +--hook_url +[^ ]+/, "")
+ msg_fields['projectDir'] = workflow.projectDir
+ msg_fields['summary'] = summary << misc_fields
+
+ // Render the JSON template
+ def engine = new groovy.text.GStringTemplateEngine()
+ // Different JSON depending on the service provider
+ // Defaults to "Adaptive Cards" (https://adaptivecards.io), except Slack which has its own format
+ def json_path = hook_url.contains("hooks.slack.com") ? "slackreport.json" : "adaptivecard.json"
+ def hf = new File("${workflow.projectDir}/assets/${json_path}")
+ def json_template = engine.createTemplate(hf).make(msg_fields)
+ def json_message = json_template.toString()
+
+ // POST
+ def post = new URL(hook_url).openConnection();
+ post.setRequestMethod("POST")
+ post.setDoOutput(true)
+ post.setRequestProperty("Content-Type", "application/json")
+ post.getOutputStream().write(json_message.getBytes("UTF-8"));
+ def postRC = post.getResponseCode();
+ if (! postRC.equals(200)) {
+ log.warn(post.getErrorStream().getText());
+ }
+}
diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/meta.yml b/subworkflows/nf-core/utils_nfcore_pipeline/meta.yml
new file mode 100644
index 00000000..d08d2434
--- /dev/null
+++ b/subworkflows/nf-core/utils_nfcore_pipeline/meta.yml
@@ -0,0 +1,24 @@
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json
+name: "UTILS_NFCORE_PIPELINE"
+description: Subworkflow with utility functions specific to the nf-core pipeline template
+keywords:
+ - utility
+ - pipeline
+ - initialise
+ - version
+components: []
+input:
+ - nextflow_cli_args:
+ type: list
+ description: |
+ Nextflow CLI positional arguments
+output:
+ - success:
+ type: boolean
+ description: |
+ Dummy output to indicate success
+authors:
+ - "@adamrtalbot"
+maintainers:
+ - "@adamrtalbot"
+ - "@maxulysse"
diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test
new file mode 100644
index 00000000..1dc317f8
--- /dev/null
+++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test
@@ -0,0 +1,134 @@
+
+nextflow_function {
+
+ name "Test Functions"
+ script "../main.nf"
+ config "subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config"
+ tag "subworkflows"
+ tag "subworkflows_nfcore"
+ tag "utils_nfcore_pipeline"
+ tag "subworkflows/utils_nfcore_pipeline"
+
+ test("Test Function checkConfigProvided") {
+
+ function "checkConfigProvided"
+
+ then {
+ assertAll(
+ { assert function.success },
+ { assert snapshot(function.result).match() }
+ )
+ }
+ }
+
+ test("Test Function checkProfileProvided") {
+
+ function "checkProfileProvided"
+
+ when {
+ function {
+ """
+ input[0] = []
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert function.success },
+ { assert snapshot(function.result).match() }
+ )
+ }
+ }
+
+ test("Test Function workflowCitation") {
+
+ function "workflowCitation"
+
+ then {
+ assertAll(
+ { assert function.success },
+ { assert snapshot(function.result).match() }
+ )
+ }
+ }
+
+ test("Test Function nfCoreLogo") {
+
+ function "nfCoreLogo"
+
+ when {
+ function {
+ """
+ input[0] = false
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert function.success },
+ { assert snapshot(function.result).match() }
+ )
+ }
+ }
+
+ test("Test Function dashedLine") {
+
+ function "dashedLine"
+
+ when {
+ function {
+ """
+ input[0] = false
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert function.success },
+ { assert snapshot(function.result).match() }
+ )
+ }
+ }
+
+ test("Test Function without logColours") {
+
+ function "logColours"
+
+ when {
+ function {
+ """
+ input[0] = true
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert function.success },
+ { assert snapshot(function.result).match() }
+ )
+ }
+ }
+
+ test("Test Function with logColours") {
+ function "logColours"
+
+ when {
+ function {
+ """
+ input[0] = false
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert function.success },
+ { assert snapshot(function.result).match() }
+ )
+ }
+ }
+}
diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test.snap b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test.snap
new file mode 100644
index 00000000..1037232c
--- /dev/null
+++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.function.nf.test.snap
@@ -0,0 +1,166 @@
+{
+ "Test Function checkProfileProvided": {
+ "content": null,
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-02-28T12:03:03.360873"
+ },
+ "Test Function checkConfigProvided": {
+ "content": [
+ true
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-02-28T12:02:59.729647"
+ },
+ "Test Function nfCoreLogo": {
+ "content": [
+ "\n\n-\u001b[2m----------------------------------------------------\u001b[0m-\n \u001b[0;32m,--.\u001b[0;30m/\u001b[0;32m,-.\u001b[0m\n\u001b[0;34m ___ __ __ __ ___ \u001b[0;32m/,-._.--~'\u001b[0m\n\u001b[0;34m |\\ | |__ __ / ` / \\ |__) |__ \u001b[0;33m} {\u001b[0m\n\u001b[0;34m | \\| | \\__, \\__/ | \\ |___ \u001b[0;32m\\`-._,-`-,\u001b[0m\n \u001b[0;32m`._,._,'\u001b[0m\n\u001b[0;35m nextflow_workflow v9.9.9\u001b[0m\n-\u001b[2m----------------------------------------------------\u001b[0m-\n"
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-02-28T12:03:10.562934"
+ },
+ "Test Function workflowCitation": {
+ "content": [
+ "If you use nextflow_workflow for your analysis please cite:\n\n* The pipeline\n https://doi.org/10.5281/zenodo.5070524\n\n* The nf-core framework\n https://doi.org/10.1038/s41587-020-0439-x\n\n* Software dependencies\n https://github.com/nextflow_workflow/blob/master/CITATIONS.md"
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-02-28T12:03:07.019761"
+ },
+ "Test Function without logColours": {
+ "content": [
+ {
+ "reset": "",
+ "bold": "",
+ "dim": "",
+ "underlined": "",
+ "blink": "",
+ "reverse": "",
+ "hidden": "",
+ "black": "",
+ "red": "",
+ "green": "",
+ "yellow": "",
+ "blue": "",
+ "purple": "",
+ "cyan": "",
+ "white": "",
+ "bblack": "",
+ "bred": "",
+ "bgreen": "",
+ "byellow": "",
+ "bblue": "",
+ "bpurple": "",
+ "bcyan": "",
+ "bwhite": "",
+ "ublack": "",
+ "ured": "",
+ "ugreen": "",
+ "uyellow": "",
+ "ublue": "",
+ "upurple": "",
+ "ucyan": "",
+ "uwhite": "",
+ "iblack": "",
+ "ired": "",
+ "igreen": "",
+ "iyellow": "",
+ "iblue": "",
+ "ipurple": "",
+ "icyan": "",
+ "iwhite": "",
+ "biblack": "",
+ "bired": "",
+ "bigreen": "",
+ "biyellow": "",
+ "biblue": "",
+ "bipurple": "",
+ "bicyan": "",
+ "biwhite": ""
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-02-28T12:03:17.969323"
+ },
+ "Test Function dashedLine": {
+ "content": [
+ "-\u001b[2m----------------------------------------------------\u001b[0m-"
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-02-28T12:03:14.366181"
+ },
+ "Test Function with logColours": {
+ "content": [
+ {
+ "reset": "\u001b[0m",
+ "bold": "\u001b[1m",
+ "dim": "\u001b[2m",
+ "underlined": "\u001b[4m",
+ "blink": "\u001b[5m",
+ "reverse": "\u001b[7m",
+ "hidden": "\u001b[8m",
+ "black": "\u001b[0;30m",
+ "red": "\u001b[0;31m",
+ "green": "\u001b[0;32m",
+ "yellow": "\u001b[0;33m",
+ "blue": "\u001b[0;34m",
+ "purple": "\u001b[0;35m",
+ "cyan": "\u001b[0;36m",
+ "white": "\u001b[0;37m",
+ "bblack": "\u001b[1;30m",
+ "bred": "\u001b[1;31m",
+ "bgreen": "\u001b[1;32m",
+ "byellow": "\u001b[1;33m",
+ "bblue": "\u001b[1;34m",
+ "bpurple": "\u001b[1;35m",
+ "bcyan": "\u001b[1;36m",
+ "bwhite": "\u001b[1;37m",
+ "ublack": "\u001b[4;30m",
+ "ured": "\u001b[4;31m",
+ "ugreen": "\u001b[4;32m",
+ "uyellow": "\u001b[4;33m",
+ "ublue": "\u001b[4;34m",
+ "upurple": "\u001b[4;35m",
+ "ucyan": "\u001b[4;36m",
+ "uwhite": "\u001b[4;37m",
+ "iblack": "\u001b[0;90m",
+ "ired": "\u001b[0;91m",
+ "igreen": "\u001b[0;92m",
+ "iyellow": "\u001b[0;93m",
+ "iblue": "\u001b[0;94m",
+ "ipurple": "\u001b[0;95m",
+ "icyan": "\u001b[0;96m",
+ "iwhite": "\u001b[0;97m",
+ "biblack": "\u001b[1;90m",
+ "bired": "\u001b[1;91m",
+ "bigreen": "\u001b[1;92m",
+ "biyellow": "\u001b[1;93m",
+ "biblue": "\u001b[1;94m",
+ "bipurple": "\u001b[1;95m",
+ "bicyan": "\u001b[1;96m",
+ "biwhite": "\u001b[1;97m"
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-02-28T12:03:21.714424"
+ }
+}
\ No newline at end of file
diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test
new file mode 100644
index 00000000..8940d32d
--- /dev/null
+++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test
@@ -0,0 +1,29 @@
+nextflow_workflow {
+
+ name "Test Workflow UTILS_NFCORE_PIPELINE"
+ script "../main.nf"
+ config "subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config"
+ workflow "UTILS_NFCORE_PIPELINE"
+ tag "subworkflows"
+ tag "subworkflows_nfcore"
+ tag "utils_nfcore_pipeline"
+ tag "subworkflows/utils_nfcore_pipeline"
+
+ test("Should run without failures") {
+
+ when {
+ workflow {
+ """
+ input[0] = []
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert workflow.success },
+ { assert snapshot(workflow.out).match() }
+ )
+ }
+ }
+}
diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test.snap b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test.snap
new file mode 100644
index 00000000..859d1030
--- /dev/null
+++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/main.workflow.nf.test.snap
@@ -0,0 +1,19 @@
+{
+ "Should run without failures": {
+ "content": [
+ {
+ "0": [
+ true
+ ],
+ "valid_config": [
+ true
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-02-28T12:03:25.726491"
+ }
+}
\ No newline at end of file
diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config b/subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config
new file mode 100644
index 00000000..d0a926bf
--- /dev/null
+++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/nextflow.config
@@ -0,0 +1,9 @@
+manifest {
+ name = 'nextflow_workflow'
+ author = """nf-core"""
+ homePage = 'https://127.0.0.1'
+ description = """Dummy pipeline"""
+ nextflowVersion = '!>=23.04.0'
+ version = '9.9.9'
+ doi = 'https://doi.org/10.5281/zenodo.5070524'
+}
diff --git a/subworkflows/nf-core/utils_nfcore_pipeline/tests/tags.yml b/subworkflows/nf-core/utils_nfcore_pipeline/tests/tags.yml
new file mode 100644
index 00000000..ac8523c9
--- /dev/null
+++ b/subworkflows/nf-core/utils_nfcore_pipeline/tests/tags.yml
@@ -0,0 +1,2 @@
+subworkflows/utils_nfcore_pipeline:
+ - subworkflows/nf-core/utils_nfcore_pipeline/**
diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/main.nf b/subworkflows/nf-core/utils_nfvalidation_plugin/main.nf
new file mode 100644
index 00000000..2585b65d
--- /dev/null
+++ b/subworkflows/nf-core/utils_nfvalidation_plugin/main.nf
@@ -0,0 +1,62 @@
+//
+// Subworkflow that uses the nf-validation plugin to render help text and parameter summary
+//
+
+/*
+========================================================================================
+ IMPORT NF-VALIDATION PLUGIN
+========================================================================================
+*/
+
+include { paramsHelp } from 'plugin/nf-validation'
+include { paramsSummaryLog } from 'plugin/nf-validation'
+include { validateParameters } from 'plugin/nf-validation'
+
+/*
+========================================================================================
+ SUBWORKFLOW DEFINITION
+========================================================================================
+*/
+
+workflow UTILS_NFVALIDATION_PLUGIN {
+
+ take:
+ print_help // boolean: print help
+ workflow_command // string: default commmand used to run pipeline
+ pre_help_text // string: string to be printed before help text and summary log
+ post_help_text // string: string to be printed after help text and summary log
+ validate_params // boolean: validate parameters
+ schema_filename // path: JSON schema file, null to use default value
+
+ main:
+
+ log.debug "Using schema file: ${schema_filename}"
+
+ // Default values for strings
+ pre_help_text = pre_help_text ?: ''
+ post_help_text = post_help_text ?: ''
+ workflow_command = workflow_command ?: ''
+
+ //
+ // Print help message if needed
+ //
+ if (print_help) {
+ log.info pre_help_text + paramsHelp(workflow_command, parameters_schema: schema_filename) + post_help_text
+ System.exit(0)
+ }
+
+ //
+ // Print parameter summary to stdout
+ //
+ log.info pre_help_text + paramsSummaryLog(workflow, parameters_schema: schema_filename) + post_help_text
+
+ //
+ // Validate parameters relative to the parameter JSON schema
+ //
+ if (validate_params){
+ validateParameters(parameters_schema: schema_filename)
+ }
+
+ emit:
+ dummy_emit = true
+}
diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/meta.yml b/subworkflows/nf-core/utils_nfvalidation_plugin/meta.yml
new file mode 100644
index 00000000..3d4a6b04
--- /dev/null
+++ b/subworkflows/nf-core/utils_nfvalidation_plugin/meta.yml
@@ -0,0 +1,44 @@
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json
+name: "UTILS_NFVALIDATION_PLUGIN"
+description: Use nf-validation to initiate and validate a pipeline
+keywords:
+ - utility
+ - pipeline
+ - initialise
+ - validation
+components: []
+input:
+ - print_help:
+ type: boolean
+ description: |
+ Print help message and exit
+ - workflow_command:
+ type: string
+ description: |
+ The command to run the workflow e.g. "nextflow run main.nf"
+ - pre_help_text:
+ type: string
+ description: |
+ Text to print before the help message
+ - post_help_text:
+ type: string
+ description: |
+ Text to print after the help message
+ - validate_params:
+ type: boolean
+ description: |
+ Validate the parameters and error if invalid.
+ - schema_filename:
+ type: string
+ description: |
+ The filename of the schema to validate against.
+output:
+ - dummy_emit:
+ type: boolean
+ description: |
+ Dummy emit to make nf-core subworkflows lint happy
+authors:
+ - "@adamrtalbot"
+maintainers:
+ - "@adamrtalbot"
+ - "@maxulysse"
diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/tests/main.nf.test b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/main.nf.test
new file mode 100644
index 00000000..5784a33f
--- /dev/null
+++ b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/main.nf.test
@@ -0,0 +1,200 @@
+nextflow_workflow {
+
+ name "Test Workflow UTILS_NFVALIDATION_PLUGIN"
+ script "../main.nf"
+ workflow "UTILS_NFVALIDATION_PLUGIN"
+ tag "subworkflows"
+ tag "subworkflows_nfcore"
+ tag "plugin/nf-validation"
+ tag "'plugin/nf-validation'"
+ tag "utils_nfvalidation_plugin"
+ tag "subworkflows/utils_nfvalidation_plugin"
+
+ test("Should run nothing") {
+
+ when {
+
+ params {
+ monochrome_logs = true
+ test_data = ''
+ }
+
+ workflow {
+ """
+ help = false
+ workflow_command = null
+ pre_help_text = null
+ post_help_text = null
+ validate_params = false
+ schema_filename = "$moduleTestDir/nextflow_schema.json"
+
+ input[0] = help
+ input[1] = workflow_command
+ input[2] = pre_help_text
+ input[3] = post_help_text
+ input[4] = validate_params
+ input[5] = schema_filename
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert workflow.success }
+ )
+ }
+ }
+
+ test("Should run help") {
+
+
+ when {
+
+ params {
+ monochrome_logs = true
+ test_data = ''
+ }
+ workflow {
+ """
+ help = true
+ workflow_command = null
+ pre_help_text = null
+ post_help_text = null
+ validate_params = false
+ schema_filename = "$moduleTestDir/nextflow_schema.json"
+
+ input[0] = help
+ input[1] = workflow_command
+ input[2] = pre_help_text
+ input[3] = post_help_text
+ input[4] = validate_params
+ input[5] = schema_filename
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert workflow.success },
+ { assert workflow.exitStatus == 0 },
+ { assert workflow.stdout.any { it.contains('Input/output options') } },
+ { assert workflow.stdout.any { it.contains('--outdir') } }
+ )
+ }
+ }
+
+ test("Should run help with command") {
+
+ when {
+
+ params {
+ monochrome_logs = true
+ test_data = ''
+ }
+ workflow {
+ """
+ help = true
+ workflow_command = "nextflow run noorg/doesntexist"
+ pre_help_text = null
+ post_help_text = null
+ validate_params = false
+ schema_filename = "$moduleTestDir/nextflow_schema.json"
+
+ input[0] = help
+ input[1] = workflow_command
+ input[2] = pre_help_text
+ input[3] = post_help_text
+ input[4] = validate_params
+ input[5] = schema_filename
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert workflow.success },
+ { assert workflow.exitStatus == 0 },
+ { assert workflow.stdout.any { it.contains('nextflow run noorg/doesntexist') } },
+ { assert workflow.stdout.any { it.contains('Input/output options') } },
+ { assert workflow.stdout.any { it.contains('--outdir') } }
+ )
+ }
+ }
+
+ test("Should run help with extra text") {
+
+
+ when {
+
+ params {
+ monochrome_logs = true
+ test_data = ''
+ }
+ workflow {
+ """
+ help = true
+ workflow_command = "nextflow run noorg/doesntexist"
+ pre_help_text = "pre-help-text"
+ post_help_text = "post-help-text"
+ validate_params = false
+ schema_filename = "$moduleTestDir/nextflow_schema.json"
+
+ input[0] = help
+ input[1] = workflow_command
+ input[2] = pre_help_text
+ input[3] = post_help_text
+ input[4] = validate_params
+ input[5] = schema_filename
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert workflow.success },
+ { assert workflow.exitStatus == 0 },
+ { assert workflow.stdout.any { it.contains('pre-help-text') } },
+ { assert workflow.stdout.any { it.contains('nextflow run noorg/doesntexist') } },
+ { assert workflow.stdout.any { it.contains('Input/output options') } },
+ { assert workflow.stdout.any { it.contains('--outdir') } },
+ { assert workflow.stdout.any { it.contains('post-help-text') } }
+ )
+ }
+ }
+
+ test("Should validate params") {
+
+ when {
+
+ params {
+ monochrome_logs = true
+ test_data = ''
+ outdir = 1
+ }
+ workflow {
+ """
+ help = false
+ workflow_command = null
+ pre_help_text = null
+ post_help_text = null
+ validate_params = true
+ schema_filename = "$moduleTestDir/nextflow_schema.json"
+
+ input[0] = help
+ input[1] = workflow_command
+ input[2] = pre_help_text
+ input[3] = post_help_text
+ input[4] = validate_params
+ input[5] = schema_filename
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert workflow.failed },
+ { assert workflow.stdout.any { it.contains('ERROR ~ ERROR: Validation of pipeline parameters failed!') } }
+ )
+ }
+ }
+}
diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/tests/nextflow_schema.json b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/nextflow_schema.json
new file mode 100644
index 00000000..7626c1c9
--- /dev/null
+++ b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/nextflow_schema.json
@@ -0,0 +1,96 @@
+{
+ "$schema": "http://json-schema.org/draft-07/schema",
+ "$id": "https://raw.githubusercontent.com/./master/nextflow_schema.json",
+ "title": ". pipeline parameters",
+ "description": "",
+ "type": "object",
+ "definitions": {
+ "input_output_options": {
+ "title": "Input/output options",
+ "type": "object",
+ "fa_icon": "fas fa-terminal",
+ "description": "Define where the pipeline should find input data and save output data.",
+ "required": ["outdir"],
+ "properties": {
+ "validate_params": {
+ "type": "boolean",
+ "description": "Validate parameters?",
+ "default": true,
+ "hidden": true
+ },
+ "outdir": {
+ "type": "string",
+ "format": "directory-path",
+ "description": "The output directory where the results will be saved. You have to use absolute paths to storage on Cloud infrastructure.",
+ "fa_icon": "fas fa-folder-open"
+ },
+ "test_data_base": {
+ "type": "string",
+ "default": "https://raw.githubusercontent.com/nf-core/test-datasets/modules",
+ "description": "Base for test data directory",
+ "hidden": true
+ },
+ "test_data": {
+ "type": "string",
+ "description": "Fake test data param",
+ "hidden": true
+ }
+ }
+ },
+ "generic_options": {
+ "title": "Generic options",
+ "type": "object",
+ "fa_icon": "fas fa-file-import",
+ "description": "Less common options for the pipeline, typically set in a config file.",
+ "help_text": "These options are common to all nf-core pipelines and allow you to customise some of the core preferences for how the pipeline runs.\n\nTypically these options would be set in a Nextflow config file loaded for all pipeline runs, such as `~/.nextflow/config`.",
+ "properties": {
+ "help": {
+ "type": "boolean",
+ "description": "Display help text.",
+ "fa_icon": "fas fa-question-circle",
+ "hidden": true
+ },
+ "version": {
+ "type": "boolean",
+ "description": "Display version and exit.",
+ "fa_icon": "fas fa-question-circle",
+ "hidden": true
+ },
+ "logo": {
+ "type": "boolean",
+ "default": true,
+ "description": "Display nf-core logo in console output.",
+ "fa_icon": "fas fa-image",
+ "hidden": true
+ },
+ "singularity_pull_docker_container": {
+ "type": "boolean",
+ "description": "Pull Singularity container from Docker?",
+ "hidden": true
+ },
+ "publish_dir_mode": {
+ "type": "string",
+ "default": "copy",
+ "description": "Method used to save pipeline results to output directory.",
+ "help_text": "The Nextflow `publishDir` option specifies which intermediate files should be saved to the output directory. This option tells the pipeline what method should be used to move these files. See [Nextflow docs](https://www.nextflow.io/docs/latest/process.html#publishdir) for details.",
+ "fa_icon": "fas fa-copy",
+ "enum": ["symlink", "rellink", "link", "copy", "copyNoFollow", "move"],
+ "hidden": true
+ },
+ "monochrome_logs": {
+ "type": "boolean",
+ "description": "Use monochrome_logs",
+ "hidden": true
+ }
+ }
+ }
+ },
+ "allOf": [
+ {
+ "$ref": "#/definitions/input_output_options"
+ },
+ {
+ "$ref": "#/definitions/generic_options"
+ }
+ ]
+}
diff --git a/subworkflows/nf-core/utils_nfvalidation_plugin/tests/tags.yml b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/tags.yml
new file mode 100644
index 00000000..60b1cfff
--- /dev/null
+++ b/subworkflows/nf-core/utils_nfvalidation_plugin/tests/tags.yml
@@ -0,0 +1,2 @@
+subworkflows/utils_nfvalidation_plugin:
+ - subworkflows/nf-core/utils_nfvalidation_plugin/**
diff --git a/subworkflows/pfr/fasta_gxf_busco_plot/main.nf b/subworkflows/pfr/fasta_gxf_busco_plot/main.nf
new file mode 100644
index 00000000..555087d4
--- /dev/null
+++ b/subworkflows/pfr/fasta_gxf_busco_plot/main.nf
@@ -0,0 +1,168 @@
+include { BUSCO_BUSCO as BUSCO_ASSEMBLY } from '../../../modules/pfr/busco/busco/main'
+include { BUSCO_GENERATEPLOT as PLOT_ASSEMBLY } from '../../../modules/pfr/busco/generateplot/main'
+include { GFFREAD as EXTRACT_PROTEINS } from '../../../modules/pfr/gffread/main'
+include { BUSCO_BUSCO as BUSCO_ANNOTATION } from '../../../modules/pfr/busco/busco/main'
+include { BUSCO_GENERATEPLOT as PLOT_ANNOTATION } from '../../../modules/pfr/busco/generateplot/main'
+
+workflow FASTA_GXF_BUSCO_PLOT {
+
+ take:
+ ch_fasta // channel: [ val(meta), fasta ]
+ ch_gxf // channel: [ val(meta2), gxf ]; gxf ~ gff | gff3 | gtf
+ //
+ // Meta and meta2 should have same id
+
+ val_mode // val(mode); BUSCO mode to apply to ch_fasta
+ // - genome, for genome assemblies (DNA)
+ // - transcriptome, for transcriptome assemblies (DNA)
+ // - proteins, for annotated gene sets (protein)
+ //
+ // If mode is genome, annotations from ch_gxf are evaluated with
+ // mode proteins, otherwise, evaluation of the annotations is skipped
+ //
+ val_lineages // [ val(lineage) ]
+ val_busco_lineages_path // val(path); Optional; Set to [] if not needed
+ val_busco_config // val(path); Optional; Set to [] if not needed
+
+ main:
+ ch_versions = Channel.empty()
+ ch_db_path = val_busco_lineages_path
+ ? Channel.of(file(val_busco_lineages_path, checkIfExists: true))
+ : Channel.of(null)
+ ch_config_path = val_busco_config
+ ? Channel.of(file(val_busco_config, checkIfExists: true))
+ : Channel.of(null)
+
+ // MODULE: BUSCO_BUSCO as BUSCO_ASSEMBLY
+ ch_busco_assembly_inputs = ch_fasta
+ | combine(
+ Channel.of(val_mode)
+ )
+ | combine(
+ Channel.fromList(val_lineages)
+ )
+ | map { meta, fasta, mode, lineage ->
+ [
+ meta + [ mode:mode, lineage:lineage ],
+ fasta, mode, lineage
+ ]
+ }
+ | combine(
+ ch_db_path
+ )
+ | combine(
+ ch_config_path
+ )
+
+ BUSCO_ASSEMBLY(
+ ch_busco_assembly_inputs.map { meta, fasta, mode, lineage, db, config -> [ meta, fasta ] },
+ ch_busco_assembly_inputs.map { meta, fasta, mode, lineage, db, config -> mode },
+ ch_busco_assembly_inputs.map { meta, fasta, mode, lineage, db, config -> lineage },
+ ch_busco_assembly_inputs.map { meta, fasta, mode, lineage, db, config -> db ?: [] },
+ ch_busco_assembly_inputs.map { meta, fasta, mode, lineage, db, config -> config ?: [] }
+ )
+
+ ch_assembly_batch_summary = BUSCO_ASSEMBLY.out.batch_summary
+ ch_assembly_short_summaries_txt = BUSCO_ASSEMBLY.out.short_summaries_txt
+ ch_assembly_short_summaries_json = BUSCO_ASSEMBLY.out.short_summaries_json
+ ch_versions = ch_versions.mix(BUSCO_ASSEMBLY.out.versions.first())
+
+ // MODULE: BUSCO_GENERATEPLOT as PLOT_ASSEMBLY
+ ch_assembly_plot_inputs = ch_assembly_short_summaries_txt
+ | map { meta, txt ->
+ def lineage_name = meta.lineage.split('_odb')[0]
+ [
+ "short_summary.specific.${meta.lineage}.${meta.id}_${lineage_name}.txt",
+ txt.text
+ ]
+ }
+ | collectFile
+ | collect
+
+ PLOT_ASSEMBLY( ch_assembly_plot_inputs )
+
+ ch_assembly_png = PLOT_ASSEMBLY.out.png
+ ch_versions = ch_versions.mix(PLOT_ASSEMBLY.out.versions)
+
+ // MODULE: GFFREAD as EXTRACT_PROTEINS
+ ch_gffread_inputs = ! ( val_mode == 'geno' || val_mode == 'genome' )
+ ? Channel.empty()
+ : ch_fasta
+ | map { meta, fasta -> [ meta.id, meta, fasta ] }
+ | join(
+ ch_gxf.map { meta2, gxf -> [ meta2.id, gxf ] }
+ // Join with matching annotation
+ // to allow one annotations per fasta
+ )
+ | map { id, meta, fasta, gxf -> [ meta, gxf, fasta ] }
+ EXTRACT_PROTEINS(
+ ch_gffread_inputs.map { meta, gxf, fasta -> [ meta, gxf ] },
+ ch_gffread_inputs.map { meta, gxf, fasta -> fasta }
+ )
+
+ ch_proteins = EXTRACT_PROTEINS.out.gffread_fasta
+ ch_versions = ch_versions.mix(EXTRACT_PROTEINS.out.versions.first())
+
+ // MODULE: BUSCO_BUSCO as BUSCO_ANNOTATION
+ ch_busco_annotation_inputs = ch_proteins
+ | combine(
+ Channel.of('proteins')
+ )
+ | combine(
+ Channel.fromList(val_lineages)
+ )
+ | map { meta, fasta, mode, lineage ->
+ [
+ meta + [ mode:mode, lineage:lineage ],
+ fasta, mode, lineage
+ ]
+ }
+ | combine(
+ ch_db_path
+ )
+ | combine(
+ ch_config_path
+ )
+
+ BUSCO_ANNOTATION(
+ ch_busco_annotation_inputs.map { meta, fasta, mode, lineage, db, config -> [ meta, fasta ] },
+ ch_busco_annotation_inputs.map { meta, fasta, mode, lineage, db, config -> mode },
+ ch_busco_annotation_inputs.map { meta, fasta, mode, lineage, db, config -> lineage },
+ ch_busco_annotation_inputs.map { meta, fasta, mode, lineage, db, config -> db ?: [] },
+ ch_busco_annotation_inputs.map { meta, fasta, mode, lineage, db, config -> config ?: [] }
+ )
+
+ ch_annotation_batch_summary = BUSCO_ANNOTATION.out.batch_summary
+ ch_annotation_short_summaries_txt = BUSCO_ANNOTATION.out.short_summaries_txt
+ ch_annotation_short_summaries_json = BUSCO_ANNOTATION.out.short_summaries_json
+ ch_versions = ch_versions.mix(BUSCO_ANNOTATION.out.versions.first())
+
+ // MODULE: BUSCO_GENERATEPLOT as PLOT_ANNOTATION
+ ch_annotation_plot_inputs = ch_annotation_short_summaries_txt
+ | map { meta, txt ->
+ def lineage_name = meta.lineage.split('_odb')[0]
+ [
+ "short_summary.specific.${meta.lineage}.${meta.id}_${lineage_name}.txt",
+ txt.text
+ ]
+ }
+ | collectFile
+ | collect
+
+ PLOT_ANNOTATION( ch_annotation_plot_inputs )
+
+ ch_annotation_png = PLOT_ANNOTATION.out.png
+ ch_versions = ch_versions.mix(PLOT_ANNOTATION.out.versions)
+
+
+ emit:
+ assembly_batch_summary = ch_assembly_batch_summary // channel: [ meta3, txt ]; meta3 ~ meta + [ val(mode), val(lineage) ]
+ assembly_short_summaries_txt = ch_assembly_short_summaries_txt // channel: [ meta3, txt ]
+ assembly_short_summaries_json = ch_assembly_short_summaries_json // channel: [ meta3, json ]
+ assembly_png = ch_assembly_png // channel: [ png ]
+ annotation_batch_summary = ch_annotation_batch_summary // channel: [ meta3, txt ]
+ annotation_short_summaries_txt = ch_annotation_short_summaries_txt // channel: [ meta3, txt ]
+ annotation_short_summaries_json = ch_annotation_short_summaries_json // channel: [ meta3, json ]
+ annotation_png = ch_annotation_png // channel: [ png ]
+ versions = ch_versions // channel: [ versions.yml ]
+}
diff --git a/subworkflows/pfr/fasta_gxf_busco_plot/meta.yml b/subworkflows/pfr/fasta_gxf_busco_plot/meta.yml
new file mode 100644
index 00000000..9226357e
--- /dev/null
+++ b/subworkflows/pfr/fasta_gxf_busco_plot/meta.yml
@@ -0,0 +1,106 @@
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json
+name: "fasta_gxf_busco_plot"
+description: |
+ Runs BUSCO for input assemblies and their annotations in GFF/GFF3/GTF format, and creates summary plots using `BUSCO/generate_plot.py` script
+keywords:
+ - genome
+ - annotation
+ - busco
+ - plot
+components:
+ - busco/busco
+ - busco/generateplot
+ - gffread
+input:
+ - ch_fasta:
+ type: file
+ description: |
+ Channel containing FASTA files
+ Structure:[ val(meta), fasta ]
+ pattern: "*.{fa,faa,fsa,fas,fasta}(.gz)?"
+ - ch_gxf:
+ type: file
+ description: |
+ Channel containing GFF/GFF3/GTF files
+ Structure:[ val(meta2), gxf ]
+ pattern: "*.{gff,gff3,gtf}"
+ - val_mode:
+ type: string
+ description: |
+ String containing BUSCO mode to apply to ch_fasta files
+ Structure:val(mode)
+ - val_lineages:
+ type: array
+ description: |
+ Array of strings representing BUSCO lineage datasets
+ Structure:[ val(lineage) ]
+ - val_busco_lineages_path:
+ type: path
+ description: |
+ Path where BUSCO lineages are located or downloaded if not already there. If this input is `[]`,
+ BUSCO will download the datasets in the task work directory
+ Structure:val(busco_lineages_path)
+ - val_busco_config:
+ type: path
+ description: |
+ Path to BUSCO config. It is optional and can be set to `[]`
+ Structure:val(busco_config)
+output:
+ - assembly_batch_summary:
+ type: file
+ description: |
+ Channel containing BUSCO batch summaries corresponding to fasta files
+ Structure: [ val(meta), txt ]
+ pattern: "*.txt"
+ - assembly_short_summaries_txt:
+ type: file
+ description: |
+ Channel containing BUSCO short summaries corresponding to fasta files
+ Structure: [ val(meta), txt ]
+ pattern: "*.txt"
+ - assembly_short_summaries_json:
+ type: file
+ description: |
+ Channel containing BUSCO short summaries corresponding to fasta files
+ Structure: [ val(meta), json ]
+ pattern: "*.json"
+ - assembly_png:
+ type: file
+ description: |
+ Channel containing summary plot for assemblies
+ Structure: png
+ pattern: "*.png"
+ - annotation_batch_summary:
+ type: file
+ description: |
+ Channel containing BUSCO batch summaries corresponding to annotation files
+ Structure: [ val(meta), txt ]
+ pattern: "*.txt"
+ - annotation_short_summaries_txt:
+ type: file
+ description: |
+ Channel containing BUSCO short summaries corresponding to annotation files
+ Structure: [ val(meta), txt ]
+ pattern: "*.txt"
+ - annotation_short_summaries_json:
+ type: file
+ description: |
+ Channel containing BUSCO short summaries corresponding to annotation files
+ Structure: [ val(meta), json ]
+ pattern: "*.json"
+ - annotation_png:
+ type: file
+ description: |
+ Channel containing summary plot for annotations
+ Structure: png
+ pattern: "*.png"
+ - versions:
+ type: file
+ description: |
+ File containing software versions
+ Structure: [ path(versions.yml) ]
+ pattern: "versions.yml"
+authors:
+ - "@GallVp"
+maintainers:
+ - "@GallVp"
diff --git a/subworkflows/pfr/fasta_gxf_busco_plot/tests/main.nf.test b/subworkflows/pfr/fasta_gxf_busco_plot/tests/main.nf.test
new file mode 100644
index 00000000..8c8ac7b0
--- /dev/null
+++ b/subworkflows/pfr/fasta_gxf_busco_plot/tests/main.nf.test
@@ -0,0 +1,104 @@
+nextflow_workflow {
+
+ name "Test Subworkflow FASTA_GXF_BUSCO_PLOT"
+ script "../main.nf"
+ workflow "FASTA_GXF_BUSCO_PLOT"
+ config './nextflow.config'
+
+ tag "subworkflows"
+ tag "subworkflows_nfcore"
+ tag "subworkflows/fasta_gxf_busco_plot"
+ tag "busco"
+ tag "busco/busco"
+ tag "busco/generateplot"
+ tag "gffread"
+
+ test("candidatus_portiera_aleyrodidarum - bacteroides_fragilis - genome") {
+
+ when {
+ workflow {
+ """
+ input[0] = Channel.of(
+ [
+ [ id:'test' ], // meta map
+ file( params.test_data['candidatus_portiera_aleyrodidarum']['genome']['genome_fasta'], checkIfExists: true)
+ ],
+ [
+ [ id:'test2' ], // meta map
+ file( params.test_data['bacteroides_fragilis']['genome']['genome_fna_gz'], checkIfExists: true)
+ ]
+ )
+ input[1] = Channel.of(
+ [
+ [ id:'test' ], // meta map
+ file( params.test_data['candidatus_portiera_aleyrodidarum']['genome']['test1_gff'], checkIfExists: true)
+ ]
+ )
+ input[2] = 'genome'
+ input[3] = [ 'bacteria_odb10', 'archaea_odb10' ]
+ input[4] = []
+ input[5] = []
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert workflow.success},
+ { assert snapshot(
+ workflow.out.assembly_batch_summary,
+ workflow.out.annotation_batch_summary,
+ workflow.out.versions,
+ ).match()
+ },
+
+ { assert workflow.out.assembly_png != null },
+ { assert workflow.out.annotation_png != null },
+
+ { assert workflow.out.assembly_short_summaries_json != null },
+ { assert workflow.out.assembly_short_summaries_txt != null },
+ { assert workflow.out.annotation_short_summaries_json != null },
+ { assert workflow.out.annotation_short_summaries_txt != null }
+ )
+ }
+ }
+
+ test("candidatus_portiera_aleyrodidarum - bacteroides_fragilis - genome - stub") {
+
+ options '-stub'
+
+ when {
+ workflow {
+ """
+ input[0] = Channel.of(
+ [
+ [ id:'test' ], // meta map
+ file( params.test_data['candidatus_portiera_aleyrodidarum']['genome']['genome_fasta'], checkIfExists: true)
+ ],
+ [
+ [ id:'test2' ], // meta map
+ file( params.test_data['bacteroides_fragilis']['genome']['genome_fna_gz'], checkIfExists: true)
+ ]
+ )
+ input[1] = Channel.of(
+ [
+ [ id:'test' ], // meta map
+ file( params.test_data['candidatus_portiera_aleyrodidarum']['genome']['test1_gff'], checkIfExists: true)
+ ]
+ )
+ input[2] = 'genome'
+ input[3] = [ 'bacteria_odb10', 'archaea_odb10' ]
+ input[4] = []
+ input[5] = []
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert workflow.success},
+ { assert snapshot(workflow.out).match()}
+ )
+ }
+ }
+}
diff --git a/subworkflows/pfr/fasta_gxf_busco_plot/tests/main.nf.test.snap b/subworkflows/pfr/fasta_gxf_busco_plot/tests/main.nf.test.snap
new file mode 100644
index 00000000..118ad7ed
--- /dev/null
+++ b/subworkflows/pfr/fasta_gxf_busco_plot/tests/main.nf.test.snap
@@ -0,0 +1,231 @@
+{
+ "candidatus_portiera_aleyrodidarum - bacteroides_fragilis - genome - stub": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test",
+ "mode": "genome",
+ "lineage": "archaea_odb10"
+ },
+ "test-archaea_odb10-busco.batch_summary.txt:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ],
+ [
+ {
+ "id": "test",
+ "mode": "genome",
+ "lineage": "bacteria_odb10"
+ },
+ "test-bacteria_odb10-busco.batch_summary.txt:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ],
+ [
+ {
+ "id": "test2",
+ "mode": "genome",
+ "lineage": "archaea_odb10"
+ },
+ "test2-archaea_odb10-busco.batch_summary.txt:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ],
+ [
+ {
+ "id": "test2",
+ "mode": "genome",
+ "lineage": "bacteria_odb10"
+ },
+ "test2-bacteria_odb10-busco.batch_summary.txt:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "1": [
+
+ ],
+ "2": [
+
+ ],
+ "3": [
+
+ ],
+ "4": [
+ [
+ {
+ "id": "test",
+ "mode": "proteins",
+ "lineage": "archaea_odb10"
+ },
+ "test-archaea_odb10-busco.batch_summary.txt:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ],
+ [
+ {
+ "id": "test",
+ "mode": "proteins",
+ "lineage": "bacteria_odb10"
+ },
+ "test-bacteria_odb10-busco.batch_summary.txt:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "5": [
+
+ ],
+ "6": [
+
+ ],
+ "7": [
+
+ ],
+ "8": [
+ "versions.yml:md5,36b11c442943567e471af0abd474a10b",
+ "versions.yml:md5,9435355f913e283f60b4fb7ef77dd52a",
+ "versions.yml:md5,e9d65e2f2f13175e99c5b7f4ae1013b9"
+ ],
+ "annotation_batch_summary": [
+ [
+ {
+ "id": "test",
+ "mode": "proteins",
+ "lineage": "archaea_odb10"
+ },
+ "test-archaea_odb10-busco.batch_summary.txt:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ],
+ [
+ {
+ "id": "test",
+ "mode": "proteins",
+ "lineage": "bacteria_odb10"
+ },
+ "test-bacteria_odb10-busco.batch_summary.txt:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "annotation_png": [
+
+ ],
+ "annotation_short_summaries_json": [
+
+ ],
+ "annotation_short_summaries_txt": [
+
+ ],
+ "assembly_batch_summary": [
+ [
+ {
+ "id": "test",
+ "mode": "genome",
+ "lineage": "archaea_odb10"
+ },
+ "test-archaea_odb10-busco.batch_summary.txt:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ],
+ [
+ {
+ "id": "test",
+ "mode": "genome",
+ "lineage": "bacteria_odb10"
+ },
+ "test-bacteria_odb10-busco.batch_summary.txt:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ],
+ [
+ {
+ "id": "test2",
+ "mode": "genome",
+ "lineage": "archaea_odb10"
+ },
+ "test2-archaea_odb10-busco.batch_summary.txt:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ],
+ [
+ {
+ "id": "test2",
+ "mode": "genome",
+ "lineage": "bacteria_odb10"
+ },
+ "test2-bacteria_odb10-busco.batch_summary.txt:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "assembly_png": [
+
+ ],
+ "assembly_short_summaries_json": [
+
+ ],
+ "assembly_short_summaries_txt": [
+
+ ],
+ "versions": [
+ "versions.yml:md5,36b11c442943567e471af0abd474a10b",
+ "versions.yml:md5,9435355f913e283f60b4fb7ef77dd52a",
+ "versions.yml:md5,e9d65e2f2f13175e99c5b7f4ae1013b9"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-05-13T16:39:45.021811"
+ },
+ "candidatus_portiera_aleyrodidarum - bacteroides_fragilis - genome": {
+ "content": [
+ [
+ [
+ {
+ "id": "test",
+ "mode": "genome",
+ "lineage": "archaea_odb10"
+ },
+ "test-archaea_odb10-busco.batch_summary.txt:md5,1397d74518a776ad75b16a843bc5b6c1"
+ ],
+ [
+ {
+ "id": "test",
+ "mode": "genome",
+ "lineage": "bacteria_odb10"
+ },
+ "test-bacteria_odb10-busco.batch_summary.txt:md5,a1186bc25448ac1949bf7790810f7161"
+ ],
+ [
+ {
+ "id": "test2",
+ "mode": "genome",
+ "lineage": "archaea_odb10"
+ },
+ "test2-archaea_odb10-busco.batch_summary.txt:md5,946582b353a8dba7d6452a71856eca06"
+ ],
+ [
+ {
+ "id": "test2",
+ "mode": "genome",
+ "lineage": "bacteria_odb10"
+ },
+ "test2-bacteria_odb10-busco.batch_summary.txt:md5,21b3fb771cf36be917cc451540d999be"
+ ]
+ ],
+ [
+ [
+ {
+ "id": "test",
+ "mode": "proteins",
+ "lineage": "archaea_odb10"
+ },
+ "test-archaea_odb10-busco.batch_summary.txt:md5,95172bd5b1a30e632fc79084ea0ca585"
+ ],
+ [
+ {
+ "id": "test",
+ "mode": "proteins",
+ "lineage": "bacteria_odb10"
+ },
+ "test-bacteria_odb10-busco.batch_summary.txt:md5,995127c0caecb36205dbf21aa2f9f8a8"
+ ]
+ ],
+ [
+ "versions.yml:md5,05d8022e3afb0d5642ed17147b991730",
+ "versions.yml:md5,36b11c442943567e471af0abd474a10b",
+ "versions.yml:md5,53987b35fc275297efdaf525937fdca3",
+ "versions.yml:md5,9435355f913e283f60b4fb7ef77dd52a",
+ "versions.yml:md5,e9d65e2f2f13175e99c5b7f4ae1013b9"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-05-13T16:39:04.376704"
+ }
+}
\ No newline at end of file
diff --git a/subworkflows/pfr/fasta_gxf_busco_plot/tests/nextflow.config b/subworkflows/pfr/fasta_gxf_busco_plot/tests/nextflow.config
new file mode 100644
index 00000000..ddbc644d
--- /dev/null
+++ b/subworkflows/pfr/fasta_gxf_busco_plot/tests/nextflow.config
@@ -0,0 +1,5 @@
+process {
+ withName: EXTRACT_PROTEINS {
+ ext.args = '-y'
+ }
+}
diff --git a/subworkflows/pfr/fasta_gxf_busco_plot/tests/tags.yml b/subworkflows/pfr/fasta_gxf_busco_plot/tests/tags.yml
new file mode 100644
index 00000000..b8786a59
--- /dev/null
+++ b/subworkflows/pfr/fasta_gxf_busco_plot/tests/tags.yml
@@ -0,0 +1,2 @@
+subworkflows/fasta_gxf_busco_plot:
+ - subworkflows/pfr/fasta_gxf_busco_plot/**
diff --git a/subworkflows/pfr/fasta_ltrretriever_lai/main.nf b/subworkflows/pfr/fasta_ltrretriever_lai/main.nf
index 7bdc2c28..3820e69c 100644
--- a/subworkflows/pfr/fasta_ltrretriever_lai/main.nf
+++ b/subworkflows/pfr/fasta_ltrretriever_lai/main.nf
@@ -10,38 +10,64 @@ workflow FASTA_LTRRETRIEVER_LAI {
take:
ch_fasta // channel: [ val(meta), fasta ]
- ch_monoploid_seqs // channel: [ val(meta), txt ]; Optional: Set to [] if not needed
- // val(meta) from ch_fasta and ch_monoploid_seqs are only required
- // to have the same `id`
+ ch_monoploid_seqs // channel: [ val(meta2), txt ]; Optional: Set to [] if not needed
+ // val(meta) from ch_fasta and val(meta2) from ch_monoploid_seqs are
+ // only required to have the same `id`
skip_lai // val(true|false)
main:
ch_versions = Channel.empty()
+ // Prapre input channels
+ ch_monoploid_seqs_plain = ( ch_monoploid_seqs ?: Channel.empty() )
+ | filter { meta2, seqs -> seqs }
+ // Cater to channel: [ meta2, [] ]
+ | map { meta2, seqs -> [ meta2.id, seqs ] }
+
// MOUDLE: CUSTOM_SHORTENFASTAIDS
CUSTOM_SHORTENFASTAIDS ( ch_fasta )
- ch_short_ids_fasta = ch_fasta
- | join(CUSTOM_SHORTENFASTAIDS.out.short_ids_fasta, by:0, remainder:true)
- | map { meta, fasta, short_ids_fasta ->
- if ( fasta ) { [ meta, short_ids_fasta ?: fasta ] }
+ ch_short_ids_tsv = CUSTOM_SHORTENFASTAIDS.out.short_ids_tsv
+ ch_shortenfastaids_branch = ch_short_ids_tsv
+ | branch { meta, tsv ->
+ change: ! tsv.text.contains('IDs have acceptable length and character')
+ nonchange: tsv.text.contains('IDs have acceptable length and character')
}
- ch_short_ids_tsv = CUSTOM_SHORTENFASTAIDS.out.short_ids_tsv
- ch_short_monoploid_seqs = ch_short_ids_tsv
+ ch_short_ids_fasta = ch_shortenfastaids_branch.nonchange
| join(
- ch_monoploid_seqs ?: Channel.empty()
+ ch_fasta
+ )
+ | map { meta, tsv, fasta -> [ meta, fasta ] }
+ | mix(
+ ch_shortenfastaids_branch.change
+ | join(
+ CUSTOM_SHORTENFASTAIDS.out.short_ids_fasta
+ )
+ | map { meta, tsv, fasta -> [ meta, fasta ] }
)
- | map { meta, short_ids_tsv, monoploid_seqs ->
- map_monoploid_seqs_to_new_ids(meta, short_ids_tsv, monoploid_seqs)
+
+ ch_versions = ch_versions.mix(CUSTOM_SHORTENFASTAIDS.out.versions.first())
+
+ // collectFile: Map monoploid seqs to short IDs
+ ch_short_monoploid_seqs = ch_short_ids_tsv
+ | map { meta, tsv -> [ meta.id, tsv ] }
+ | join(ch_monoploid_seqs_plain)
+ | map { id, tsv, seqs ->
+ map_monoploid_seqs_to_new_ids(id, tsv, seqs)
}
| collectFile(newLine:true)
| map { seqs ->
def id = seqs.name.split('.mapped.monoploid.seqs.txt')[0]
- [ [ id: id ], seqs ]
+ [ id, seqs ]
}
- ch_versions = ch_versions.mix(CUSTOM_SHORTENFASTAIDS.out.versions.first())
+ | join(
+ ch_short_ids_tsv
+ | map { meta, tsv -> [ meta.id, meta, tsv ] }
+ )
+ | map { id, seqs, meta, tsv -> [ meta, seqs ] }
+
// MODULE: LTRHARVEST
LTRHARVEST ( ch_short_ids_fasta )
@@ -77,34 +103,42 @@ workflow FASTA_LTRRETRIEVER_LAI {
)
ch_pass_list = LTRRETRIEVER_LTRRETRIEVER.out.pass_list
- ch_ltrlib = LTRRETRIEVER_LTRRETRIEVER.out.ltrlib
ch_annotation_out = LTRRETRIEVER_LTRRETRIEVER.out.annotation_out
+ ch_pass_out = ch_pass_list.join(ch_annotation_out)
ch_annotation_gff = LTRRETRIEVER_LTRRETRIEVER.out.annotation_gff
+ ch_ltrlib = LTRRETRIEVER_LTRRETRIEVER.out.ltrlib
ch_versions = ch_versions.mix(LTRRETRIEVER_LTRRETRIEVER.out.versions.first())
- // MODULE: LAI
- ch_lai_inputs = skip_lai
- ? Channel.empty()
- : ch_short_ids_fasta
- | join(ch_pass_list)
- | join(ch_annotation_out)
- | map { meta, fasta, pass, out ->
- [ meta.id, meta, fasta, pass, out ]
- }
+ // MODULE: LTRRETRIEVER_LAI
+ ch_short_ids_fasta_mono = ch_short_ids_fasta
| join(
- ch_short_monoploid_seqs
- | map { meta, mono -> [ meta.id, mono ] },
+ ch_short_monoploid_seqs,
by:0,
remainder: true
)
- | map { id, meta, fasta, pass, out, mono ->
- [ meta, fasta, pass, out, mono ?: [] ]
+ // Danger! This partial join can fail
+ | filter { meta, fasta, seqs -> fasta }
+ // This filter safeguards against fail on upstream
+ // process failure: https://github.com/nextflow-io/nextflow/issues/5043
+ // fasta may come from upstream processes
+ // seqs also comes from upstream processes, it is optional
+ // and may not be present for some of the combinations
+ | map { meta, fasta, seqs -> [ meta, fasta, seqs ?: [] ] }
+
+ ch_lai_inputs = skip_lai
+ ? Channel.empty()
+ : ch_short_ids_fasta_mono
+ | join(
+ ch_pass_out
+ )
+ | map { meta, fasta, seqs, pass, out ->
+ [ meta, fasta, pass, out, seqs ]
}
LTRRETRIEVER_LAI(
- ch_lai_inputs.map { meta, fasta, pass, out, mono -> [ meta, fasta ] },
- ch_lai_inputs.map { meta, fasta, pass, out, mono -> pass },
- ch_lai_inputs.map { meta, fasta, pass, out, mono -> out },
- ch_lai_inputs.map { meta, fasta, pass, out, mono -> mono }
+ ch_lai_inputs.map { meta, fasta, pass, out, seqs -> [ meta, fasta ] },
+ ch_lai_inputs.map { meta, fasta, pass, out, seqs -> pass },
+ ch_lai_inputs.map { meta, fasta, pass, out, seqs -> out },
+ ch_lai_inputs.map { meta, fasta, pass, out, seqs -> seqs }
)
ch_lai_log = LTRRETRIEVER_LAI.out.log
@@ -112,16 +146,20 @@ workflow FASTA_LTRRETRIEVER_LAI {
ch_versions = ch_versions.mix(LTRRETRIEVER_LAI.out.versions.first())
// MODULE: CUSTOM_RESTOREGFFIDS
- ch_restorable_gff_tsv = ch_annotation_gff.join(ch_short_ids_tsv)
+ ch_gff_tsv_branch = ch_annotation_gff.join(ch_short_ids_tsv)
+ | branch { meta, gff, tsv ->
+ change: ! tsv.text.contains('IDs have acceptable length and character')
+ nochange: tsv.text.contains('IDs have acceptable length and character')
+ }
CUSTOM_RESTOREGFFIDS (
- ch_restorable_gff_tsv.map { meta, gff, tsv -> [ meta, gff ] },
- ch_restorable_gff_tsv.map { meta, gff, tsv -> tsv }
+ ch_gff_tsv_branch.change.map { meta, gff, tsv -> [ meta, gff ] },
+ ch_gff_tsv_branch.change.map { meta, gff, tsv -> tsv }
)
- ch_restored_gff = ch_annotation_gff
- | join(CUSTOM_RESTOREGFFIDS.out.restored_ids_gff3, by:0, remainder:true)
- | map { meta, gff, restored_gff -> [ meta, restored_gff ?: gff ] }
+ ch_restored_gff = ch_gff_tsv_branch.nochange
+ | map { meta, gff, tsv -> [ meta, gff ] }
+ | mix(CUSTOM_RESTOREGFFIDS.out.restored_ids_gff3)
ch_versions = ch_versions.mix(CUSTOM_RESTOREGFFIDS.out.versions.first())
@@ -134,29 +172,29 @@ workflow FASTA_LTRRETRIEVER_LAI {
}
-def map_monoploid_seqs_to_new_ids(meta, short_ids_tsv, monoploid_seqs) {
+def map_monoploid_seqs_to_new_ids(id, short_ids_tsv, monoploid_seqs) {
- def short_ids_head = short_ids_tsv.text.split('\n')[0]
+ def short_ids_head = short_ids_tsv.text.tokenize('\n')[0]
if (short_ids_head == "IDs have acceptable length and character. No change required.") {
- return [ "${meta.id}.mapped.monoploid.seqs.txt" ] + monoploid_seqs.text.split('\n')
+ return [ "${id}.mapped.monoploid.seqs.txt" ] + monoploid_seqs.text.tokenize('\n')
}
def orig_to_new_ids = [:]
short_ids_tsv.text.eachLine { line ->
- def (original_id, renamed_id) = line.split('\t')
+ def (original_id, renamed_id) = line.tokenize('\t')
orig_to_new_ids[original_id] = renamed_id
}
def mapped_ids = []
monoploid_seqs.text.eachLine { original_id ->
if (!orig_to_new_ids[original_id]) {
- error "Faild to find $original_id in ${monoploid_seqs}" +
- "The monoploid_seqs file is malformed!"
+ error "Faild to find $original_id in ${short_ids_tsv}" +
+ "\nThe short_ids_tsv file is malformed!"
}
mapped_ids.add(orig_to_new_ids[original_id])
}
- return [ "${meta.id}.mapped.monoploid.seqs.txt" ] + mapped_ids
+ return [ "${id}.mapped.monoploid.seqs.txt" ] + mapped_ids
}
diff --git a/subworkflows/pfr/fasta_ltrretriever_lai/tests/main.nf.test b/subworkflows/pfr/fasta_ltrretriever_lai/tests/main.nf.test
index 7ba88ab6..a7fc65eb 100644
--- a/subworkflows/pfr/fasta_ltrretriever_lai/tests/main.nf.test
+++ b/subworkflows/pfr/fasta_ltrretriever_lai/tests/main.nf.test
@@ -6,7 +6,7 @@ nextflow_workflow {
config "./nextflow.config"
tag "subworkflows"
- tag "subworkflows_nfcore"
+ tag "subworkflows_pfr"
tag "subworkflows/fasta_ltrretriever_lai"
tag "fasta_ltrretriever_lai"
tag "modules/nf-core/gunzip"
@@ -18,22 +18,22 @@ nextflow_workflow {
tag "ltrretriever/lai"
tag "custom/restoregffids"
- test("actinidia_chinensis-genome_21_fasta_gz") {
+ setup {
+ run("GUNZIP") {
+ script "../../../../modules/nf-core/gunzip"
- setup {
- run("GUNZIP") {
- script "../../../../modules/nf-core/gunzip"
-
- process {
- """
- input[0] = [
- [ id:'test' ],
- file(params.test_data['actinidia_chinensis']['genome']['genome_21_fasta_gz'], checkIfExists: true)
- ]
- """
- }
+ process {
+ """
+ input[0] = [
+ [ id:'test' ],
+ file(params.test_data['actinidia_chinensis']['genome']['genome_1_fasta_gz'], checkIfExists: true)
+ ]
+ """
}
}
+ }
+
+ test("actinidia_chinensis-genome_1_fasta_gz") {
when {
workflow {
@@ -52,27 +52,13 @@ nextflow_workflow {
{ assert file(workflow.out.lai_log[0][1]).text.contains('Calculate LAI:') },
{ assert file(workflow.out.lai_log[0][1]).text.contains('Done!') },
{ assert Math.abs(Float.parseFloat(path(workflow.out.lai_out[0][1]).text.split("\n")[1].split("\t")[6]) - 31.29) <= 1.0 },
- { assert file(workflow.out.ltrlib[0][1]).text.contains('#LTR/Copia') }
+ { assert file(workflow.out.ltrlib[0][1]).text.contains('#LTR/Copia') },
+ { assert snapshot(workflow.out.versions).match("fasta_gz_versions") }
)
}
}
- test("actinidia_chinensis-genome_21_fasta_gz-with_mono") {
-
- setup {
- run("GUNZIP") {
- script "../../../../modules/nf-core/gunzip"
-
- process {
- """
- input[0] = [
- [ id:'test' ],
- file(params.test_data['actinidia_chinensis']['genome']['genome_21_fasta_gz'], checkIfExists: true)
- ]
- """
- }
- }
- }
+ test("actinidia_chinensis-genome_1_fasta_gz-with_mono") {
when {
workflow {
@@ -102,29 +88,13 @@ nextflow_workflow {
{ assert file(workflow.out.lai_log[0][1]).text.contains('Calculate LAI:') },
{ assert file(workflow.out.lai_log[0][1]).text.contains('Done!') },
{ assert Math.abs(Float.parseFloat(path(workflow.out.lai_out[0][1]).text.split("\n")[1].split("\t")[6]) - 31.29) <= 1.0 },
- { assert file(workflow.out.ltrlib[0][1]).text.contains('#LTR/Copia') }
+ { assert file(workflow.out.ltrlib[0][1]).text.contains('#LTR/Copia') },
+ { assert snapshot(workflow.out.versions).match("with_mono_versions") }
)
}
}
- test("actinidia_chinensis-genome_21_fasta_gz-without_lai") {
-
- options '-stub'
-
- setup {
- run("GUNZIP") {
- script "../../../../modules/nf-core/gunzip"
-
- process {
- """
- input[0] = [
- [ id:'test' ],
- file(params.test_data['actinidia_chinensis']['genome']['genome_21_fasta_gz'], checkIfExists: true)
- ]
- """
- }
- }
- }
+ test("actinidia_chinensis-genome_1_fasta_gz-with_empty_mono") {
when {
workflow {
@@ -137,12 +107,9 @@ nextflow_workflow {
}
| set { ch_fa }
- def monoploid_seqs = new File('test.mono.seq.txt')
- monoploid_seqs.write("chr_xxxxxxxxxxxxxxx_1")
-
input[0] = ch_fa
- input[1] = Channel.of( [ [ id:'test' ], monoploid_seqs.toPath() ] )
- input[2] = true
+ input[1] = []
+ input[2] = false
"""
}
}
@@ -150,10 +117,12 @@ nextflow_workflow {
then {
assertAll(
{ assert workflow.success },
- { assert workflow.out.annotation_gff != null },
- { assert workflow.out.ltrlib != null },
- { assert workflow.out.lai_log == [] },
- { assert workflow.out.lai_out == [] }
+ { assert file(workflow.out.annotation_gff[0][1]).text.contains('Copia_LTR_retrotransposon') },
+ { assert file(workflow.out.lai_log[0][1]).text.contains('Calculate LAI:') },
+ { assert file(workflow.out.lai_log[0][1]).text.contains('Done!') },
+ { assert Math.abs(Float.parseFloat(path(workflow.out.lai_out[0][1]).text.split("\n")[1].split("\t")[6]) - 31.29) <= 1.0 },
+ { assert file(workflow.out.ltrlib[0][1]).text.contains('#LTR/Copia') },
+ { assert snapshot(workflow.out.versions).match("with_empty_mono_versions") }
)
}
}
@@ -166,9 +135,9 @@ nextflow_workflow {
workflow {
"""
def monoploid_seqs = new File('test.mono.seq.txt')
- monoploid_seqs.write("chr_xxxxxxxxxxxxxxx_1")
+ monoploid_seqs.write("chr1")
- input[0] = Channel.empty()
+ input[0] = GUNZIP.out.gunzip
input[1] = Channel.of( [ [ id:'test' ], monoploid_seqs.toPath() ] )
input[2] = false
"""
@@ -177,8 +146,9 @@ nextflow_workflow {
then {
assertAll(
- { assert workflow.success }
+ { assert workflow.success },
+ { assert snapshot(workflow.out).match() }
)
}
}
-}
+}
\ No newline at end of file
diff --git a/subworkflows/pfr/fasta_ltrretriever_lai/tests/main.nf.test.snap b/subworkflows/pfr/fasta_ltrretriever_lai/tests/main.nf.test.snap
new file mode 100644
index 00000000..e827219f
--- /dev/null
+++ b/subworkflows/pfr/fasta_ltrretriever_lai/tests/main.nf.test.snap
@@ -0,0 +1,146 @@
+{
+ "fasta_gz_versions": {
+ "content": [
+ [
+ "versions.yml:md5,05a74dacbcd6c5906faeca3206f6563f",
+ "versions.yml:md5,52a98b45c3cf73f9314dbe9b34eca9d8",
+ "versions.yml:md5,5d86cda262f5c21e43738433a2781ebc",
+ "versions.yml:md5,608d746d45680f025562c4be455db461",
+ "versions.yml:md5,8468404805dddec679524e75b4fa51e8",
+ "versions.yml:md5,fb42c6cc5c28b6f9d3384171c7b6b143"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.1"
+ },
+ "timestamp": "2024-06-02T20:28:52.168753"
+ },
+ "with_empty_mono_versions": {
+ "content": [
+ [
+ "versions.yml:md5,05a74dacbcd6c5906faeca3206f6563f",
+ "versions.yml:md5,52a98b45c3cf73f9314dbe9b34eca9d8",
+ "versions.yml:md5,5d86cda262f5c21e43738433a2781ebc",
+ "versions.yml:md5,608d746d45680f025562c4be455db461",
+ "versions.yml:md5,8468404805dddec679524e75b4fa51e8",
+ "versions.yml:md5,9b2d5d76add82441b1e950d333e31307",
+ "versions.yml:md5,fb42c6cc5c28b6f9d3384171c7b6b143"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.1"
+ },
+ "timestamp": "2024-06-02T20:35:11.879068"
+ },
+ "with_mono_versions": {
+ "content": [
+ [
+ "versions.yml:md5,05a74dacbcd6c5906faeca3206f6563f",
+ "versions.yml:md5,52a98b45c3cf73f9314dbe9b34eca9d8",
+ "versions.yml:md5,5d86cda262f5c21e43738433a2781ebc",
+ "versions.yml:md5,608d746d45680f025562c4be455db461",
+ "versions.yml:md5,8468404805dddec679524e75b4fa51e8",
+ "versions.yml:md5,9b2d5d76add82441b1e950d333e31307",
+ "versions.yml:md5,fb42c6cc5c28b6f9d3384171c7b6b143"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.1"
+ },
+ "timestamp": "2024-06-02T20:30:58.393054"
+ },
+ "empty_fasta_stub": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test"
+ },
+ "test.LTRlib.fa:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "1": [
+ [
+ {
+ "id": "test"
+ },
+ "test.out.gff3:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "2": [
+ [
+ {
+ "id": "test"
+ },
+ "test.LAI.log:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "3": [
+ [
+ {
+ "id": "test"
+ },
+ "test.LAI.out:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "4": [
+ "versions.yml:md5,05a74dacbcd6c5906faeca3206f6563f",
+ "versions.yml:md5,52a98b45c3cf73f9314dbe9b34eca9d8",
+ "versions.yml:md5,5d86cda262f5c21e43738433a2781ebc",
+ "versions.yml:md5,608d746d45680f025562c4be455db461",
+ "versions.yml:md5,8468404805dddec679524e75b4fa51e8",
+ "versions.yml:md5,fb42c6cc5c28b6f9d3384171c7b6b143"
+ ],
+ "annotation_gff": [
+ [
+ {
+ "id": "test"
+ },
+ "test.out.gff3:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "lai_log": [
+ [
+ {
+ "id": "test"
+ },
+ "test.LAI.log:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "lai_out": [
+ [
+ {
+ "id": "test"
+ },
+ "test.LAI.out:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "ltrlib": [
+ [
+ {
+ "id": "test"
+ },
+ "test.LTRlib.fa:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,05a74dacbcd6c5906faeca3206f6563f",
+ "versions.yml:md5,52a98b45c3cf73f9314dbe9b34eca9d8",
+ "versions.yml:md5,5d86cda262f5c21e43738433a2781ebc",
+ "versions.yml:md5,608d746d45680f025562c4be455db461",
+ "versions.yml:md5,8468404805dddec679524e75b4fa51e8",
+ "versions.yml:md5,fb42c6cc5c28b6f9d3384171c7b6b143"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.04.2"
+ },
+ "timestamp": "2024-06-04T13:50:48.75158"
+ }
+}
\ No newline at end of file
diff --git a/subworkflows/pfr/fastq_bwa_mem_samblaster/main.nf b/subworkflows/pfr/fastq_bwa_mem_samblaster/main.nf
index 50e824a9..456f36c7 100644
--- a/subworkflows/pfr/fastq_bwa_mem_samblaster/main.nf
+++ b/subworkflows/pfr/fastq_bwa_mem_samblaster/main.nf
@@ -6,7 +6,7 @@ workflow FASTQ_BWA_MEM_SAMBLASTER {
take:
ch_fastq // channel: [ val(meta), [ fq ] ]
- ch_reference // channel: [ val(meta2), fasta, index ]; fast | index
+ ch_reference // channel: [ val(meta2), fasta, index ]; fasta | index
main:
ch_versions = Channel.empty()
@@ -43,6 +43,7 @@ workflow FASTQ_BWA_MEM_SAMBLASTER {
BWA_MEM(
ch_mem_inputs.map { meta, fq, index -> [ meta, fq ] },
ch_mem_inputs.map { meta, fq, index -> [ [], index ] },
+ [ [], [] ],
sort_bam
)
diff --git a/subworkflows/pfr/fastq_bwa_mem_samblaster/tests/main.nf.test b/subworkflows/pfr/fastq_bwa_mem_samblaster/tests/main.nf.test
index 1e279e48..0a84fd8c 100644
--- a/subworkflows/pfr/fastq_bwa_mem_samblaster/tests/main.nf.test
+++ b/subworkflows/pfr/fastq_bwa_mem_samblaster/tests/main.nf.test
@@ -41,4 +41,35 @@ nextflow_workflow {
)
}
}
-}
+
+ test("sarscov2-fq-gz-stub") {
+
+ options '-stub'
+
+ when {
+ workflow {
+ """
+ input[0] = Channel.of(
+ [
+ [ id:'test' ],
+ [
+ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
+ file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true)
+ ]
+ ]
+ )
+ input[1] = Channel.of(
+ [ [ id: 'genome' ], file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true), [] ]
+ )
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert workflow.success},
+ { assert snapshot(workflow.out).match()}
+ )
+ }
+ }
+}
\ No newline at end of file
diff --git a/subworkflows/pfr/fastq_bwa_mem_samblaster/tests/main.nf.test.snap b/subworkflows/pfr/fastq_bwa_mem_samblaster/tests/main.nf.test.snap
index e5a9b18e..a2b24322 100644
--- a/subworkflows/pfr/fastq_bwa_mem_samblaster/tests/main.nf.test.snap
+++ b/subworkflows/pfr/fastq_bwa_mem_samblaster/tests/main.nf.test.snap
@@ -8,13 +8,13 @@
"id": "test",
"ref_id": "genome"
},
- "test.on.genome.samblaster.bam:md5,496319fc81c383a9ae6ab52592876c9b"
+ "test.on.genome.samblaster.bam:md5,38ade4c8ae74d49a197286e09a6cb3ad"
]
],
"1": [
- "versions.yml:md5,2ed54ca5e54063cb579273d0792465a7",
- "versions.yml:md5,49d22cda9beaf6ea1a1ad838ef4a4255",
- "versions.yml:md5,6a2baa7f2d1d555fe604e451624f414b"
+ "versions.yml:md5,3b59736950270bca59c29afeb5289b4f",
+ "versions.yml:md5,80949a94ee1be8238b1c79d89ed9fa83",
+ "versions.yml:md5,ddd810edeee1008ec55b628e3a9d6204"
],
"bam": [
[
@@ -22,13 +22,13 @@
"id": "test",
"ref_id": "genome"
},
- "test.on.genome.samblaster.bam:md5,496319fc81c383a9ae6ab52592876c9b"
+ "test.on.genome.samblaster.bam:md5,38ade4c8ae74d49a197286e09a6cb3ad"
]
],
"versions": [
- "versions.yml:md5,2ed54ca5e54063cb579273d0792465a7",
- "versions.yml:md5,49d22cda9beaf6ea1a1ad838ef4a4255",
- "versions.yml:md5,6a2baa7f2d1d555fe604e451624f414b"
+ "versions.yml:md5,3b59736950270bca59c29afeb5289b4f",
+ "versions.yml:md5,80949a94ee1be8238b1c79d89ed9fa83",
+ "versions.yml:md5,ddd810edeee1008ec55b628e3a9d6204"
]
}
],
@@ -36,6 +36,45 @@
"nf-test": "0.8.4",
"nextflow": "23.10.1"
},
- "timestamp": "2024-02-08T19:37:23.464854"
+ "timestamp": "2024-05-22T15:55:44.323082"
+ },
+ "sarscov2-fq-gz-stub": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test",
+ "ref_id": "genome"
+ },
+ "test.on.genome.samblaster.bam:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "1": [
+ "versions.yml:md5,3b59736950270bca59c29afeb5289b4f",
+ "versions.yml:md5,80949a94ee1be8238b1c79d89ed9fa83",
+ "versions.yml:md5,ddd810edeee1008ec55b628e3a9d6204"
+ ],
+ "bam": [
+ [
+ {
+ "id": "test",
+ "ref_id": "genome"
+ },
+ "test.on.genome.samblaster.bam:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,3b59736950270bca59c29afeb5289b4f",
+ "versions.yml:md5,80949a94ee1be8238b1c79d89ed9fa83",
+ "versions.yml:md5,ddd810edeee1008ec55b628e3a9d6204"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-05-22T15:55:50.527259"
}
}
\ No newline at end of file
diff --git a/subworkflows/pfr/gff3_gt_gff3_gff3validator_stat/main.nf b/subworkflows/pfr/gff3_gt_gff3_gff3validator_stat/main.nf
new file mode 100644
index 00000000..3891fc81
--- /dev/null
+++ b/subworkflows/pfr/gff3_gt_gff3_gff3validator_stat/main.nf
@@ -0,0 +1,155 @@
+include { GT_GFF3 } from '../../../modules/pfr/gt/gff3/main'
+include { SAMTOOLS_FAIDX } from '../../../modules/pfr/samtools/faidx/main'
+include { GT_GFF3VALIDATOR } from '../../../modules/pfr/gt/gff3validator/main'
+include { GT_STAT } from '../../../modules/pfr/gt/stat/main'
+
+workflow GFF3_GT_GFF3_GFF3VALIDATOR_STAT {
+
+ take:
+ ch_gff3 // channel: [ val(meta), gff3 ]
+ ch_fasta // channel: [ val(meta), fasta ]
+
+ main:
+
+ ch_versions = Channel.empty()
+
+ // MODULE: GT_GFF3
+ GT_GFF3 ( ch_gff3 )
+
+ ch_versions = ch_versions.mix(GT_GFF3.out.versions.first())
+
+ // MODULE: GT_GFF3VALIDATOR
+ GT_GFF3VALIDATOR ( GT_GFF3.out.gt_gff3 )
+
+ ch_versions = ch_versions.mix(GT_GFF3VALIDATOR.out.versions.first())
+
+ // MODULE: SAMTOOLS_FAIDX
+ SAMTOOLS_FAIDX(
+ ch_fasta,
+ [ [], [] ]
+ )
+
+ ch_fai = SAMTOOLS_FAIDX.out.fai
+ ch_versions = ch_versions.mix(SAMTOOLS_FAIDX.out.versions.first())
+
+ // FUNCTION: checkGff3FastaCorrespondence
+ ch_gff3_fai = GT_GFF3VALIDATOR.out.success_log
+ | join (
+ GT_GFF3.out.gt_gff3
+ )
+ | map { meta, log, gff3 -> [ meta, gff3 ] }
+ | join (
+ ch_fai
+ )
+
+ ch_correspondence_status = ch_gff3_fai
+ | map { meta, gff3, fai ->
+ checkGff3FastaCorrespondence ( meta, gff3, fai )
+ }
+
+ ch_correspondence_success = ch_correspondence_status
+ | map { meta, success, error ->
+ if ( success ) {
+ [ meta, success ]
+ }
+ }
+
+ ch_correspondence_error = ch_correspondence_status
+ | map { meta, success, error ->
+ if ( error ) {
+ [ "${meta.id}.error.log", error.join('\n') ]
+ }
+ }
+ | collectFile
+ | map { error ->
+ [ [ id: error.baseName.replace('.error', '') ], error ]
+ }
+
+ ch_valid_gff3 = ch_correspondence_success
+ | join (
+ ch_gff3_fai
+ | map { meta, gff3, fai -> [ meta, gff3 ] }
+ )
+ | map { meta, log, gff3 -> [ meta, gff3 ] }
+
+ ch_log_for_invalid_gff3 = GT_GFF3.out.error_log
+ | mix (
+ GT_GFF3VALIDATOR.out.error_log
+ )
+ | mix (
+ ch_correspondence_error
+ )
+
+ // MODULE: GT_STAT
+ GT_STAT ( ch_valid_gff3 )
+
+ ch_gff3_stats = GT_STAT.out.stats
+ ch_versions = ch_versions.mix(GT_STAT.out.versions.first())
+
+ emit:
+ valid_gff3 = ch_valid_gff3 // channel: [ val(meta), gff3 ]
+ gff3_stats = ch_gff3_stats // channel: [ val(meta), yml ]
+ log_for_invalid_gff3 = ch_log_for_invalid_gff3 // channel: [ val(meta), log ]
+ versions = ch_versions // channel: [ versions.yml ]
+}
+
+def checkGff3FastaCorrespondence(meta, gff3File, faiFile) {
+
+ // STEP 1
+ // Check that gff3 has no identifiers that are not in fasta
+ def gff3Lines = gff3File.readLines().findAll { ! it.startsWith('#') }
+ def gff3Identifiers = gff3Lines.collect { it.split('\t')[0] }.unique().sort()
+ def fastaIdentifiers = faiFile.readLines().collect { it.split('\t')[0] }.unique().sort()
+ def missingIdentifiers = gff3Identifiers.findAll { ! fastaIdentifiers.contains(it) }
+
+ if (missingIdentifiers) {
+ return [
+ meta,
+ [], // success log
+ [
+ "Failed to validate gff3 file: ${gff3File.name}",
+ "GFF3 file contains identifiers not present in FASTA:",
+ "${missingIdentifiers.join('\n')}"
+ ] // error log
+ ]
+ }
+
+ // STEP 2
+ // Check that there are no coordinates in gff3 that exceed the sequence length in the parent fasta entry
+ def sequenceLengths = [:]
+ faiFile.readLines().each { line ->
+ def parts = line.split('\t')
+ sequenceLengths[parts[0]] = parts[1]
+ }
+
+ for ( line in gff3Lines ) {
+ def parts = line.split('\t')
+ def name = parts[0]
+ def start = parts[3].toInteger()
+ def end = parts[4].toInteger()
+ def seqLength = sequenceLengths[name].toInteger()
+
+ if (start > seqLength || end > seqLength) {
+ return [
+ meta,
+ [], // success log
+ [
+ "Failed to validate gff3: ${gff3File.name}",
+ "Coordinates exceed sequence length in GFF3 file:",
+ "Sequence: $name",
+ "Sequence length: $seqLength",
+ "Start: $start",
+ "End: $end"
+ ] // error log
+ ]
+ }
+ }
+
+ return [
+ meta,
+ [
+ "All tests passed..."
+ ], // success log
+ [] // error log
+ ]
+}
diff --git a/subworkflows/pfr/gff3_validate/meta.yml b/subworkflows/pfr/gff3_gt_gff3_gff3validator_stat/meta.yml
similarity index 80%
rename from subworkflows/pfr/gff3_validate/meta.yml
rename to subworkflows/pfr/gff3_gt_gff3_gff3validator_stat/meta.yml
index 5dea12a5..0d12d5fd 100644
--- a/subworkflows/pfr/gff3_validate/meta.yml
+++ b/subworkflows/pfr/gff3_gt_gff3_gff3validator_stat/meta.yml
@@ -1,17 +1,18 @@
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/subworkflows/yaml-schema.json
-name: gff3_validate
+name: gff3_gt_gff3_gff3validator_stat
description: |
- Validates a gff3 file using GenomeTools gt-gff3, gt-gff3validator and
- checks its correspondence with a fasta file
+ Validates a gff3 file using GenomeTools gt-gff3, gt-gff3validator, checks its correspondence with a fasta file and produces stats with gt-stat
keywords:
- genome
- gff3
- annotation
- validation
+ - stats
components:
- gt/gff3
- gt/gff3validator
- - custom/checkgff3fastacorrespondence
+ - samtools/faidx
+ - gt/stat
input:
- ch_gff3:
type: file
@@ -32,6 +33,12 @@ output:
Valid gff3 file
Structure: [ val(meta), path(gff3) ]
pattern: "*.gff3"
+ - gff3_stats:
+ type: file
+ description: |
+ Statistics
+ Structure: [ val(meta), path(yml) ]
+ pattern: "*.yml"
- log_for_invalid_gff3:
type: file
description: |
diff --git a/subworkflows/pfr/gff3_validate/tests/main.nf.test b/subworkflows/pfr/gff3_gt_gff3_gff3validator_stat/tests/main.nf.test
similarity index 56%
rename from subworkflows/pfr/gff3_validate/tests/main.nf.test
rename to subworkflows/pfr/gff3_gt_gff3_gff3validator_stat/tests/main.nf.test
index e71712b8..f15972fa 100644
--- a/subworkflows/pfr/gff3_validate/tests/main.nf.test
+++ b/subworkflows/pfr/gff3_gt_gff3_gff3validator_stat/tests/main.nf.test
@@ -1,21 +1,22 @@
nextflow_workflow {
- name "Test Workflow GFF3_VALIDATE"
+ name "Test Workflow GFF3_GT_GFF3_GFF3VALIDATOR_STAT"
script "../main.nf"
- workflow "GFF3_VALIDATE"
+ workflow "GFF3_GT_GFF3_GFF3VALIDATOR_STAT"
config "./nextflow.config"
tag "subworkflows"
tag "subworkflows_nfcore"
- tag "subworkflows/gff3_validate"
- tag "gff3_validate"
+ tag "subworkflows/gff3_gt_gff3_gff3validator_stat"
+ tag "gff3_gt_gff3_gff3validator_stat"
tag "gt"
tag "gt/gff3"
tag "gt/gff3validator"
- tag "custom"
- tag "custom/checkgff3fastacorrespondence"
+ tag "gt/stat"
+ tag "samtools"
+ tag "samtools/faidx"
- test("sarscov2-genome_gff3-genome_fasta-all_pass") {
+ test("sarscov2-genome_gff3-all_pass") {
when {
workflow {
@@ -38,7 +39,7 @@ nextflow_workflow {
}
}
- test("homo_sapiens-genome_bed-genome_fasta-gt_gff3_fail") {
+ test("homo_sapiens-genome_bed-gt_gff3_fail") {
when {
workflow {
@@ -79,8 +80,42 @@ nextflow_workflow {
then {
assertAll(
{ assert workflow.success},
- { assert snapshot(workflow.out).match()}
+ { assert snapshot(
+ workflow.out.valid_gff3,
+ workflow.out.versions).match()
+ },
+ { assert path(workflow.out.log_for_invalid_gff3[0][1]).text.contains('GFF3 file contains identifiers not present in FASTA') }
+ )
+ }
+ }
+
+ test("sarscov2-genome_gff3-test_fasta-correspondence_fail-out_of_bounds") {
+
+ when {
+ workflow {
+ """
+ def test_fasta = new File('genome.fasta')
+ test_fasta.text = '>MT192765.1\\nAGCTAGCT'
+
+ input[0] = Channel.of([ [ id:'test' ], // meta map
+ file(params.test_data['sarscov2']['genome']['genome_gff3'], checkIfExists: true)
+ ])
+ input[1] = Channel.of([ [ id:'test' ],
+ test_fasta.toPath()
+ ])
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert workflow.success},
+ { assert snapshot(
+ workflow.out.valid_gff3,
+ workflow.out.versions).match()
+ },
+ { assert path(workflow.out.log_for_invalid_gff3[0][1]).text.contains('Coordinates exceed sequence length in GFF3 file') }
)
}
}
-}
+}
\ No newline at end of file
diff --git a/subworkflows/pfr/gff3_gt_gff3_gff3validator_stat/tests/main.nf.test.snap b/subworkflows/pfr/gff3_gt_gff3_gff3validator_stat/tests/main.nf.test.snap
new file mode 100644
index 00000000..57aadb93
--- /dev/null
+++ b/subworkflows/pfr/gff3_gt_gff3_gff3validator_stat/tests/main.nf.test.snap
@@ -0,0 +1,144 @@
+{
+ "homo_sapiens-genome_bed-gt_gff3_fail": {
+ "content": [
+ {
+ "0": [
+
+ ],
+ "1": [
+
+ ],
+ "2": [
+ [
+ {
+ "id": "test"
+ },
+ "test.error.log:md5,c096494c3cd02864eb54434c294ba382"
+ ]
+ ],
+ "3": [
+ "versions.yml:md5,0cb9519e626e5128d8495cf29b7d59ff",
+ "versions.yml:md5,d81758cb7178ee32a1fb78f8639504d8"
+ ],
+ "gff3_stats": [
+
+ ],
+ "log_for_invalid_gff3": [
+ [
+ {
+ "id": "test"
+ },
+ "test.error.log:md5,c096494c3cd02864eb54434c294ba382"
+ ]
+ ],
+ "valid_gff3": [
+
+ ],
+ "versions": [
+ "versions.yml:md5,0cb9519e626e5128d8495cf29b7d59ff",
+ "versions.yml:md5,d81758cb7178ee32a1fb78f8639504d8"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-05-09T19:41:05.240982"
+ },
+ "sarscov2-genome_gff3-test_fasta-correspondence_fail-out_of_bounds": {
+ "content": [
+ [
+
+ ],
+ [
+ "versions.yml:md5,0cb9519e626e5128d8495cf29b7d59ff",
+ "versions.yml:md5,c89b081a13c68acc5326e43ca9104344",
+ "versions.yml:md5,d81758cb7178ee32a1fb78f8639504d8"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-05-09T19:41:16.606045"
+ },
+ "sarscov2-genome_gff3-homo_sapiens-genome_fasta-correspondence_fail": {
+ "content": [
+ [
+
+ ],
+ [
+ "versions.yml:md5,0cb9519e626e5128d8495cf29b7d59ff",
+ "versions.yml:md5,c89b081a13c68acc5326e43ca9104344",
+ "versions.yml:md5,d81758cb7178ee32a1fb78f8639504d8"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-05-09T19:41:11.28855"
+ },
+ "sarscov2-genome_gff3-all_pass": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test"
+ },
+ "test.gt.gff3:md5,2ae900237ace415557b8735fac088b85"
+ ]
+ ],
+ "1": [
+ [
+ {
+ "id": "test"
+ },
+ "test.yml:md5,762d733791108d9723989c3bbc69fa2b"
+ ]
+ ],
+ "2": [
+
+ ],
+ "3": [
+ "versions.yml:md5,0cb9519e626e5128d8495cf29b7d59ff",
+ "versions.yml:md5,80555fe6e28e9564cb534f5478842286",
+ "versions.yml:md5,c89b081a13c68acc5326e43ca9104344",
+ "versions.yml:md5,d81758cb7178ee32a1fb78f8639504d8"
+ ],
+ "gff3_stats": [
+ [
+ {
+ "id": "test"
+ },
+ "test.yml:md5,762d733791108d9723989c3bbc69fa2b"
+ ]
+ ],
+ "log_for_invalid_gff3": [
+
+ ],
+ "valid_gff3": [
+ [
+ {
+ "id": "test"
+ },
+ "test.gt.gff3:md5,2ae900237ace415557b8735fac088b85"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,0cb9519e626e5128d8495cf29b7d59ff",
+ "versions.yml:md5,80555fe6e28e9564cb534f5478842286",
+ "versions.yml:md5,c89b081a13c68acc5326e43ca9104344",
+ "versions.yml:md5,d81758cb7178ee32a1fb78f8639504d8"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-05-10T09:03:38.938037"
+ }
+}
\ No newline at end of file
diff --git a/subworkflows/pfr/gff3_validate/tests/nextflow.config b/subworkflows/pfr/gff3_gt_gff3_gff3validator_stat/tests/nextflow.config
similarity index 100%
rename from subworkflows/pfr/gff3_validate/tests/nextflow.config
rename to subworkflows/pfr/gff3_gt_gff3_gff3validator_stat/tests/nextflow.config
diff --git a/subworkflows/pfr/gff3_gt_gff3_gff3validator_stat/tests/tags.yml b/subworkflows/pfr/gff3_gt_gff3_gff3validator_stat/tests/tags.yml
new file mode 100644
index 00000000..2b0b9279
--- /dev/null
+++ b/subworkflows/pfr/gff3_gt_gff3_gff3validator_stat/tests/tags.yml
@@ -0,0 +1,2 @@
+subworkflows/gff3_gt_gff3_gff3validator_stat:
+ - subworkflows/pfr/gff3_gt_gff3_gff3validator_stat/**
diff --git a/subworkflows/pfr/gff3_validate/main.nf b/subworkflows/pfr/gff3_validate/main.nf
deleted file mode 100644
index 5437c5a6..00000000
--- a/subworkflows/pfr/gff3_validate/main.nf
+++ /dev/null
@@ -1,61 +0,0 @@
-include { GT_GFF3 } from '../../../modules/pfr/gt/gff3/main'
-include { GT_GFF3VALIDATOR } from '../../../modules/pfr/gt/gff3validator/main'
-include { CUSTOM_CHECKGFF3FASTACORRESPONDENCE } from '../../../modules/pfr/custom/checkgff3fastacorrespondence/main'
-
-workflow GFF3_VALIDATE {
-
- take:
- ch_gff3 // channel: [ val(meta), gff3 ]
- ch_fasta // channel: [ val(meta), fasta ]
-
- main:
-
- ch_versions = Channel.empty()
-
- // MODULE: GT_GFF3
- GT_GFF3 ( ch_gff3 )
- ch_versions = ch_versions.mix(GT_GFF3.out.versions.first())
-
- // MODULE: GT_GFF3VALIDATOR
- GT_GFF3VALIDATOR ( GT_GFF3.out.gt_gff3 )
- ch_versions = ch_versions.mix(GT_GFF3VALIDATOR.out.versions.first())
-
- // MODULE: CUSTOM_CHECKGFF3FASTACORRESPONDENCE
- GT_GFF3VALIDATOR.out.success_log
- | join (
- GT_GFF3.out.gt_gff3
- )
- | map { meta, log, gff3 -> [ meta, gff3 ] }
- | join (
- ch_fasta
- )
- | set { ch_gff3_fasta }
-
- CUSTOM_CHECKGFF3FASTACORRESPONDENCE (
- ch_gff3_fasta.map { meta, gff3, fasta -> [ meta, gff3 ] },
- ch_gff3_fasta.map { meta, gff3, fasta -> fasta }
- )
-
- ch_versions = ch_versions.mix(CUSTOM_CHECKGFF3FASTACORRESPONDENCE.out.versions.first())
-
- CUSTOM_CHECKGFF3FASTACORRESPONDENCE.out.success_log
- | join (
- ch_gff3_fasta.map { meta, gff3, fasta -> [ meta, gff3 ] }
- )
- | map { meta, log, gff3 -> [ meta, gff3 ] }
- | set { ch_valid_gff3 }
-
- GT_GFF3.out.error_log
- | mix (
- GT_GFF3VALIDATOR.out.error_log
- )
- | mix (
- CUSTOM_CHECKGFF3FASTACORRESPONDENCE.out.error_log
- )
- | set { ch_log_for_invalid_gff3 }
-
- emit:
- valid_gff3 = ch_valid_gff3 // channel: [ val(meta), gff3 ]
- log_for_invalid_gff3 = ch_log_for_invalid_gff3 // channel: [ val(meta), log ]
- versions = ch_versions // channel: [ versions.yml ]
-}
diff --git a/subworkflows/pfr/gff3_validate/tests/main.nf.test.snap b/subworkflows/pfr/gff3_validate/tests/main.nf.test.snap
deleted file mode 100644
index 4d2a59b2..00000000
--- a/subworkflows/pfr/gff3_validate/tests/main.nf.test.snap
+++ /dev/null
@@ -1,115 +0,0 @@
-{
- "sarscov2-genome_gff3-genome_fasta-all_pass": {
- "content": [
- {
- "0": [
- [
- {
- "id": "test"
- },
- "test.gt.gff3:md5,2ae900237ace415557b8735fac088b85"
- ]
- ],
- "1": [
-
- ],
- "2": [
- "versions.yml:md5,10fe5c201e5fcddb52c3607ab3fdfb34",
- "versions.yml:md5,856745cef2fff087e50ea4c0ffa3addd",
- "versions.yml:md5,a89255422a163684b0c80ebdd8ad28ae"
- ],
- "log_for_invalid_gff3": [
-
- ],
- "valid_gff3": [
- [
- {
- "id": "test"
- },
- "test.gt.gff3:md5,2ae900237ace415557b8735fac088b85"
- ]
- ],
- "versions": [
- "versions.yml:md5,10fe5c201e5fcddb52c3607ab3fdfb34",
- "versions.yml:md5,856745cef2fff087e50ea4c0ffa3addd",
- "versions.yml:md5,a89255422a163684b0c80ebdd8ad28ae"
- ]
- }
- ],
- "timestamp": "2023-12-07T10:33:21.09887"
- },
- "homo_sapiens-genome_bed-genome_fasta-gt_gff3_fail": {
- "content": [
- {
- "0": [
-
- ],
- "1": [
- [
- {
- "id": "test"
- },
- "test.error.log:md5,c096494c3cd02864eb54434c294ba382"
- ]
- ],
- "2": [
- "versions.yml:md5,a89255422a163684b0c80ebdd8ad28ae"
- ],
- "log_for_invalid_gff3": [
- [
- {
- "id": "test"
- },
- "test.error.log:md5,c096494c3cd02864eb54434c294ba382"
- ]
- ],
- "valid_gff3": [
-
- ],
- "versions": [
- "versions.yml:md5,a89255422a163684b0c80ebdd8ad28ae"
- ]
- }
- ],
- "timestamp": "2023-12-07T10:35:26.549003"
- },
- "sarscov2-genome_gff3-homo_sapiens-genome_fasta-correspondence_fail": {
- "content": [
- {
- "0": [
-
- ],
- "1": [
- [
- {
- "id": "test"
- },
- "test.error.log:md5,67686ea1ef271821f1218a8fe0207e1f"
- ]
- ],
- "2": [
- "versions.yml:md5,10fe5c201e5fcddb52c3607ab3fdfb34",
- "versions.yml:md5,856745cef2fff087e50ea4c0ffa3addd",
- "versions.yml:md5,a89255422a163684b0c80ebdd8ad28ae"
- ],
- "log_for_invalid_gff3": [
- [
- {
- "id": "test"
- },
- "test.error.log:md5,67686ea1ef271821f1218a8fe0207e1f"
- ]
- ],
- "valid_gff3": [
-
- ],
- "versions": [
- "versions.yml:md5,10fe5c201e5fcddb52c3607ab3fdfb34",
- "versions.yml:md5,856745cef2fff087e50ea4c0ffa3addd",
- "versions.yml:md5,a89255422a163684b0c80ebdd8ad28ae"
- ]
- }
- ],
- "timestamp": "2023-12-07T10:35:32.53584"
- }
-}
\ No newline at end of file
diff --git a/subworkflows/pfr/gff3_validate/tests/tags.yml b/subworkflows/pfr/gff3_validate/tests/tags.yml
deleted file mode 100644
index 60ffbf0c..00000000
--- a/subworkflows/pfr/gff3_validate/tests/tags.yml
+++ /dev/null
@@ -1,2 +0,0 @@
-subworkflows/gff3_validate:
- - subworkflows/pfr/gff3_validate/**
diff --git a/tests/invalid/assemblysheet.csv b/tests/invalid/assemblysheet.csv
index de7e586f..93e81420 100644
--- a/tests/invalid/assemblysheet.csv
+++ b/tests/invalid/assemblysheet.csv
@@ -1,4 +1,5 @@
-tag,fasta,gff3,monoploid_ids,synteny_labels
-FI1,https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/003/814/445/GCA_003814445.1_ASM381444v1/GCA_003814445.1_ASM381444v1_genomic.fna.gz,https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/003/814/445/GCA_003814445.1_ASM381444v1/GCA_003814445.1_ASM381444v1_genomic.gff.gz,,
-TT_2021a,https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/021/950/295/GCA_021950295.1_ASM2195029v1/GCA_021950295.1_ASM2195029v1_genomic.fna.gz,https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/003/814/445/GCA_003814445.1_ASM381444v1/GCA_003814445.1_ASM381444v1_genomic.gff.gz,,
-MISC,tests/invalid/invalid.fsa.gz,,,
+tag,fasta,gff3
+FI1,https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/003/814/445/GCA_003814445.1_ASM381444v1/GCA_003814445.1_ASM381444v1_genomic.fna.gz,https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/003/814/445/GCA_003814445.1_ASM381444v1/GCA_003814445.1_ASM381444v1_genomic.gff.gz
+TT_2021a,https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/021/950/295/GCA_021950295.1_ASM2195029v1/GCA_021950295.1_ASM2195029v1_genomic.fna.gz,https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/003/814/445/GCA_003814445.1_ASM381444v1/GCA_003814445.1_ASM381444v1_genomic.gff.gz
+MISC,tests/invalid/invalid.fsa.gz
+DUPSEQ,tests/invalid/dupseq.fsa.gz
diff --git a/tests/invalid/dupseq.fsa.gz b/tests/invalid/dupseq.fsa.gz
new file mode 100644
index 00000000..33cdb354
Binary files /dev/null and b/tests/invalid/dupseq.fsa.gz differ
diff --git a/tests/merqury/mixed2x/assemblysheet.csv b/tests/merqury/mixed2x/assemblysheet.csv
new file mode 100644
index 00000000..61df3863
--- /dev/null
+++ b/tests/merqury/mixed2x/assemblysheet.csv
@@ -0,0 +1,2 @@
+tag,fasta,reads_1
+Col0Cvi0F1,https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/001/753/755/GCA_001753755.2_Athal_Col0Cvi0F1_phased_diploid_1.0/GCA_001753755.2_Athal_Col0Cvi0F1_phased_diploid_1.0_genomic.fna.gz,SRR3703081
diff --git a/tests/merqury/mixed2x/params.json b/tests/merqury/mixed2x/params.json
new file mode 100644
index 00000000..6ce89d4b
--- /dev/null
+++ b/tests/merqury/mixed2x/params.json
@@ -0,0 +1,10 @@
+{
+ "config_profile_name": "Test profile",
+ "config_profile_description": "Merqury test for a mixed diploid assembly contained in a single fasta",
+ "input": "tests/merqury/mixed2x/assemblysheet.csv",
+ "merqury_skip": false,
+ "merqury_kmer_length": 21,
+ "max_cpus": 8,
+ "max_memory": "32.GB",
+ "max_time": "6.h"
+}
diff --git a/tests/merqury/phased2x.mp/assemblysheet.csv b/tests/merqury/phased2x.mp/assemblysheet.csv
new file mode 100644
index 00000000..df1ed671
--- /dev/null
+++ b/tests/merqury/phased2x.mp/assemblysheet.csv
@@ -0,0 +1,3 @@
+tag,fasta,reads_1,maternal_reads_1,paternal_reads_1
+COL,https://gembox.cbcb.umd.edu/triobinning/athal_COL.fasta,SRR3703081,https://gembox.cbcb.umd.edu/triobinning/athal_CVI.fastq.gz,https://gembox.cbcb.umd.edu/triobinning/athal_COL.fastq.gz
+CVI,https://gembox.cbcb.umd.edu/triobinning/athal_CVI.fasta,SRR3703081,https://gembox.cbcb.umd.edu/triobinning/athal_CVI.fastq.gz,https://gembox.cbcb.umd.edu/triobinning/athal_COL.fastq.gz
diff --git a/tests/merqury/phased2x.mp/assemblysheet.csv.local b/tests/merqury/phased2x.mp/assemblysheet.csv.local
new file mode 100644
index 00000000..f90aec6f
--- /dev/null
+++ b/tests/merqury/phased2x.mp/assemblysheet.csv.local
@@ -0,0 +1,4 @@
+tag,fasta,reads_1,reads_2,maternal_reads_1,paternal_reads_1
+COL,https://gembox.cbcb.umd.edu/triobinning/athal_COL.fasta,/Users/hrauxr/Projects/test-data/assemblyqc/merqury.fk/phased2x.mp/SRR3703081_1.fastq.gz,/Users/hrauxr/Projects/test-data/assemblyqc/merqury.fk/phased2x.mp/SRR3703081_2.fastq.gz,/Users/hrauxr/Projects/test-data/assemblyqc/merqury.fk/phased2x.mp/athal_CVI.fastq.gz,/Users/hrauxr/Projects/test-data/assemblyqc/merqury.fk/phased2x.mp/athal_COL.fastq.gz
+CVI,https://gembox.cbcb.umd.edu/triobinning/athal_CVI.fasta,/Users/hrauxr/Projects/test-data/assemblyqc/merqury.fk/phased2x.mp/SRR3703081_1.fastq.gz,/Users/hrauxr/Projects/test-data/assemblyqc/merqury.fk/phased2x.mp/SRR3703081_2.fastq.gz,/Users/hrauxr/Projects/test-data/assemblyqc/merqury.fk/phased2x.mp/athal_CVI.fastq.gz,/Users/hrauxr/Projects/test-data/assemblyqc/merqury.fk/phased2x.mp/athal_COL.fastq.gz
+FI1,https://ftp.ncbi.nlm.nih.gov/genomes/all/GCA/003/814/445/GCA_003814445.1_ASM381444v1/GCA_003814445.1_ASM381444v1_genomic.fna.gz,/Users/hrauxr/Projects/test-data/assemblyqc/merqury.fk/phased2x.mp/SRR8238189_1.fastq.gz,/Users/hrauxr/Projects/test-data/assemblyqc/merqury.fk/phased2x.mp/SRR8238189_2.fastq.gz
diff --git a/tests/merqury/phased2x.mp/params.json b/tests/merqury/phased2x.mp/params.json
new file mode 100644
index 00000000..60f157ca
--- /dev/null
+++ b/tests/merqury/phased2x.mp/params.json
@@ -0,0 +1,10 @@
+{
+ "config_profile_name": "Test profile",
+ "config_profile_description": "Merqury test for a phased diploid assembly contained in separate fasta with parental reads",
+ "input": "tests/merqury/phased2x.mp/assemblysheet.csv",
+ "merqury_skip": false,
+ "merqury_kmer_length": 21,
+ "max_cpus": 8,
+ "max_memory": "32.GB",
+ "max_time": "6.h"
+}
diff --git a/tests/merqury/phased2x/assemblysheet.csv b/tests/merqury/phased2x/assemblysheet.csv
new file mode 100644
index 00000000..852fd392
--- /dev/null
+++ b/tests/merqury/phased2x/assemblysheet.csv
@@ -0,0 +1,3 @@
+tag,fasta,reads_1
+COL,https://gembox.cbcb.umd.edu/triobinning/athal_COL.fasta,SRR3703081
+CVI,https://gembox.cbcb.umd.edu/triobinning/athal_CVI.fasta,SRR3703081
diff --git a/tests/merqury/phased2x/params.json b/tests/merqury/phased2x/params.json
new file mode 100644
index 00000000..0b363fab
--- /dev/null
+++ b/tests/merqury/phased2x/params.json
@@ -0,0 +1,10 @@
+{
+ "config_profile_name": "Test profile",
+ "config_profile_description": "Merqury test for a phased diploid assembly contained in separate fasta",
+ "input": "tests/merqury/phased2x/assemblysheet.csv",
+ "merqury_skip": false,
+ "merqury_kmer_length": 21,
+ "max_cpus": 8,
+ "max_memory": "32.GB",
+ "max_time": "6.h"
+}
diff --git a/tests/minimal/params.json b/tests/minimal/params.json
new file mode 100644
index 00000000..c5683622
--- /dev/null
+++ b/tests/minimal/params.json
@@ -0,0 +1,8 @@
+{
+ "config_profile_name": "Test profile",
+ "config_profile_description": "Minimal test dataset to check pipeline function",
+ "input": "https://raw.githubusercontent.com/plant-food-research-open/assemblyqc/dev/assets/assemblysheetv2.csv",
+ "max_cpus": 2,
+ "max_memory": "6.GB",
+ "max_time": "6.h"
+}
diff --git a/tests/stub/JAD.seq.labels.tsv b/tests/stub/JAD.seq.labels.tsv
new file mode 100644
index 00000000..7d957f9c
--- /dev/null
+++ b/tests/stub/JAD.seq.labels.tsv
@@ -0,0 +1,10 @@
+JADWOS010000003.1 JAD1
+JADWOS010000004.1 JAD2
+JADWOS010000005.1 JAD3
+JADWOS010000006.1 JAD4
+JADWOS010000007.1 JAD5
+JADWOS010000008.1 JAD6
+JADWOS010000009.1 JAD7
+JADWOS010000001.1 JAD8
+JADWOS010000010.1 JAD9
+JADWOS010000011.1 JAD10
diff --git a/tests/stub/params.json b/tests/stub/params.json
new file mode 100644
index 00000000..6e303ac4
--- /dev/null
+++ b/tests/stub/params.json
@@ -0,0 +1,29 @@
+{
+ "config_profile_name": "Full stub test",
+ "config_profile_description": "Full test of the pipeline in stub mode",
+ "input": "assets/assemblysheetv2.csv",
+ "ncbi_fcs_adaptor_skip": false,
+ "ncbi_fcs_adaptor_empire": "euk",
+ "ncbi_fcs_gx_skip": false,
+ "ncbi_fcs_gx_tax_id": 12,
+ "ncbi_fcs_gx_db_path": "tests/stub/gxdb/test",
+ "busco_skip": false,
+ "busco_mode": "genome",
+ "busco_lineage_datasets": "fungi_odb10 hypocreales_odb10",
+ "tidk_skip": false,
+ "tidk_repeat_seq": "TTTGGG",
+ "lai_skip": false,
+ "kraken2_skip": false,
+ "kraken2_db_path": "tests/stub/kraken2/k2_minusb_20231009.tar.gz",
+ "hic": "tests/stub/hic/Dummy_hic.R{1,2}.fq.gz",
+ "hic_skip_fastp": true,
+ "hic_skip_fastqc": false,
+ "synteny_skip": false,
+ "synteny_mummer_skip": false,
+ "synteny_plotsr_skip": false,
+ "synteny_xref_assemblies": "assets/xrefsheet.csv",
+ "merqury_skip": false,
+ "max_cpus": 2,
+ "max_memory": "6.GB",
+ "max_time": "6.h"
+}
diff --git a/tests/stub/stub.config b/tests/stub/stub.config
deleted file mode 100644
index 4f753187..00000000
--- a/tests/stub/stub.config
+++ /dev/null
@@ -1,35 +0,0 @@
-params {
- config_profile_name = 'Full stub test'
- config_profile_description = 'Full test of the pipeline in stub mode'
-
- input = 'assets/assemblysheet.csv'
-
- ncbi_fcs_adaptor_skip = false
- ncbi_fcs_adaptor_empire = 'euk'
-
- ncbi_fcs_gx_skip = false
- ncbi_fcs_gx_tax_id = 12
- ncbi_fcs_gx_db_path = 'tests/stub/gxdb/test'
-
- busco_skip = false
- busco_mode = 'geno'
- busco_lineage_datasets = 'fungi_odb10 hypocreales_odb10'
-
- tidk_skip = false
- tidk_repeat_seq = 'TTTGGG'
-
- lai_skip = false
-
- kraken2_skip = false
- kraken2_db_path = 'tests/stub/kraken2/k2_minusb_20231009.tar.gz'
-
- hic = 'tests/stub/hic/Dummy_hic.R{1,2}.fq.gz'
-
- synteny_skip = true // GitHub action runner runs out of memory
- synteny_xref_assemblies = 'assets/xrefsheet.csv'
-
- // Limit resources so that this can run on GitHub Actions
- max_cpus = 2
- max_memory = '6.GB'
- max_time = '6.h'
-}
diff --git a/tower.yml b/tower.yml
index 787aedfe..96116b57 100644
--- a/tower.yml
+++ b/tower.yml
@@ -1,5 +1,3 @@
reports:
- multiqc_report.html:
- display: "MultiQC HTML report"
- samplesheet.csv:
- display: "Auto-created samplesheet with collated metadata and FASTQ paths"
+ report.html:
+ display: "AssemblyQC report"
diff --git a/version_check.sh b/version_check.sh
index e23860d3..ca34d57e 100755
--- a/version_check.sh
+++ b/version_check.sh
@@ -10,5 +10,10 @@ fi
# Check CHANGELOG version
-grep "## $config_version - " CHANGELOG.md >/dev/null \
+head -10 CHANGELOG.md | grep "## v$config_version - " >/dev/null \
|| (echo 'Failed to match CHANGELOG version'; exit 1)
+
+# Check README version
+
+grep "assembly quality ($config_version)" README.md >/dev/null \
+ || (echo 'Failed to match README version'; exit 1)
diff --git a/workflows/assemblyqc.nf b/workflows/assemblyqc.nf
index 38217ec5..1fa329ae 100644
--- a/workflows/assemblyqc.nf
+++ b/workflows/assemblyqc.nf
@@ -1,61 +1,37 @@
/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- PRINT PARAMS SUMMARY
+ IMPORT MODULES / SUBWORKFLOWS / FUNCTIONS
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/
-include { paramsSummaryLog; paramsSummaryMap; fromSamplesheet } from 'plugin/nf-validation'
+include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline'
-def logo = NfcoreTemplate.logo(workflow, params.monochrome_logs)
-def citation = '\n' + WorkflowMain.citation(workflow) + '\n'
-def summary_params = paramsSummaryMap(workflow)
-
-// Print parameter summary log to screen
-log.info logo + paramsSummaryLog(workflow) + citation
-
-WorkflowAssemblyqc.initialise(params, log)
-
-/*
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- CONFIG FILES
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-*/
-
-/*
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- IMPORT LOCAL MODULES/SUBWORKFLOWS
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-*/
-
-include { GT_STAT } from '../modules/pfr/gt/stat/main'
-include { GFF3_VALIDATE } from '../subworkflows/pfr/gff3_validate/main'
-include { NCBI_FCS_ADAPTOR } from '../modules/local/ncbi_fcs_adaptor'
+include { GUNZIP as GUNZIP_FASTA } from '../modules/nf-core/gunzip/main'
+include { GUNZIP as GUNZIP_GFF3 } from '../modules/nf-core/gunzip/main'
+include { FASTAVALIDATOR } from '../modules/nf-core/fastavalidator/main'
+include { SEQKIT_RMDUP } from '../modules/nf-core/seqkit/rmdup/main'
+include { FASTA_EXPLORE_SEARCH_PLOT_TIDK } from '../subworkflows/nf-core/fasta_explore_search_plot_tidk/main'
+include { GFF3_GT_GFF3_GFF3VALIDATOR_STAT } from '../subworkflows/pfr/gff3_gt_gff3_gff3validator_stat/main'
+include { FCS_FCSADAPTOR } from '../modules/nf-core/fcs/fcsadaptor/main'
include { NCBI_FCS_GX } from '../subworkflows/local/ncbi_fcs_gx'
include { ASSEMBLATHON_STATS } from '../modules/local/assemblathon_stats'
-include { FASTA_BUSCO_PLOT } from '../subworkflows/local/fasta_busco_plot'
+include { FASTA_GXF_BUSCO_PLOT } from '../subworkflows/pfr/fasta_gxf_busco_plot/main'
include { FASTA_LTRRETRIEVER_LAI } from '../subworkflows/pfr/fasta_ltrretriever_lai/main'
include { FASTA_KRAKEN2 } from '../subworkflows/local/fasta_kraken2'
include { FQ2HIC } from '../subworkflows/local/fq2hic'
+include { CAT_CAT as TAG_ASSEMBLY } from '../modules/pfr/cat/cat/main'
include { FASTA_SYNTENY } from '../subworkflows/local/fasta_synteny'
+include { MERYL_COUNT } from '../modules/nf-core/meryl/count/main'
+include { MERYL_UNIONSUM } from '../modules/nf-core/meryl/unionsum/main'
+include { MERYL_COUNT as MAT_MERYL_COUNT } from '../modules/nf-core/meryl/count/main'
+include { MERYL_UNIONSUM as MAT_UNIONSUM } from '../modules/nf-core/meryl/unionsum/main'
+include { MERYL_COUNT as PAT_MERYL_COUNT } from '../modules/nf-core/meryl/count/main'
+include { MERYL_UNIONSUM as PAT_UNIONSUM } from '../modules/nf-core/meryl/unionsum/main'
+include { MERQURY_HAPMERS } from '../modules/pfr/merqury/hapmers/main'
+include { MERQURY_MERQURY } from '../modules/nf-core/merqury/merqury/main'
include { CREATEREPORT } from '../modules/local/createreport'
-/*
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- IMPORT NF-CORE MODULES/SUBWORKFLOWS
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-*/
-
-//
-// MODULE: Installed directly from nf-core/modules
-//
-
-include { GUNZIP as GUNZIP_FASTA } from '../modules/nf-core/gunzip/main'
-include { GUNZIP as GUNZIP_GFF3 } from '../modules/nf-core/gunzip/main'
-include { FASTAVALIDATOR } from '../modules/nf-core/fastavalidator/main'
-include { FASTA_EXPLORE_SEARCH_PLOT_TIDK } from '../subworkflows/nf-core/fasta_explore_search_plot_tidk/main'
-
-include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions/main'
-
+include { FASTQ_DOWNLOAD_PREFETCH_FASTERQDUMP_SRATOOLS as FETCHNGS } from '../subworkflows/nf-core/fastq_download_prefetch_fasterqdump_sratools/main'
/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -63,20 +39,29 @@ include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/du
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/
-def input_assembly_sheet_fields = 5
-def synteny_xref_assemblies_fields = 3
-
workflow ASSEMBLYQC {
- // Input channels
+ take:
+ ch_input
+ ch_hic_reads
+ ch_xref_assembly
+ ch_reads
+ ch_maternal_reads
+ ch_paternal_reads
+ ch_params_as_json
+ ch_summary_params_as_json
+
+ main:
+
+ // Versions
ch_versions = Channel.empty()
- ch_input = Channel.fromSamplesheet('input')
- | collect
- | flatMap { WorkflowAssemblyqc.validateInput(it) }
- | buffer(size: input_assembly_sheet_fields)
+ // Input channels
ch_target_assemby_branch = ch_input
- | map { tag, fasta, gff, mono_ids, labels ->
+ | map { input_data ->
+ def tag = input_data[0]
+ def fasta = input_data[1]
+
[ [ id: tag ], file(fasta, checkIfExists: true) ]
}
| branch { meta, fasta ->
@@ -85,7 +70,10 @@ workflow ASSEMBLYQC {
}
ch_assemby_gff3_branch = ch_input
- | map { tag, fasta, gff, mono_ids, labels ->
+ | map { input_data ->
+ def tag = input_data[0]
+ def gff = input_data[2]
+
gff
? [ [ id: tag ], file(gff, checkIfExists: true) ]
: null
@@ -96,14 +84,20 @@ workflow ASSEMBLYQC {
}
ch_mono_ids = ch_input
- | map { tag, fasta, gff, mono_ids, labels ->
+ | map { input_data ->
+ def tag = input_data[0]
+ def mono_ids= input_data[3]
+
mono_ids
? [ [ id: tag ], file(mono_ids, checkIfExists: true) ]
: null
}
ch_synteny_labels = ch_input
- | map { tag, fasta, gff, mono_ids, labels ->
+ | map { input_data ->
+ def tag = input_data[0]
+ def labels = input_data[4]
+
labels
? [ [ id: tag ], file(labels, checkIfExists: true) ]
: (
@@ -111,31 +105,10 @@ workflow ASSEMBLYQC {
? null
: log.warn("A synteny_labels file must be provided" +
" in the input assembly sheet when running synteny analysis." +
- " Synteny analysis is skipped!")
+ " Synteny analysis is skipped for $tag")
)
}
- ch_hic_reads = ! params.hic
- ? Channel.empty()
- : (
- "$params.hic".find(/.*[\/].*\.(fastq|fq)\.gz/)
- ? Channel.fromFilePairs(params.hic, checkIfExists: true)
- : Channel.fromSRA(params.hic)
- )
- | map{ sample, fq ->
- [ [ id: sample, single_end: false ], fq ]
- }
-
- ch_xref_assembly = params.synteny_skip || ! params.synteny_xref_assemblies
- ? Channel.empty()
- : Channel.fromSamplesheet('synteny_xref_assemblies')
- | collect
- | flatMap { WorkflowAssemblyqc.validateXrefAssemblies(it) }
- | buffer(size: synteny_xref_assemblies_fields)
- | map { tag, fa, labels ->
- [ tag, file(fa, checkIfExists: true), file(labels, checkIfExists: true) ]
- }
-
// MODULE: GUNZIP as GUNZIP_FASTA
GUNZIP_FASTA ( ch_target_assemby_branch.gz )
@@ -152,89 +125,113 @@ workflow ASSEMBLYQC {
// MODULE: FASTAVALIDATOR
FASTAVALIDATOR ( ch_target_assembly )
- ch_valid_target_assembly = ch_target_assembly.join(FASTAVALIDATOR.out.success_log)
+ ch_fastavalidator_assembly = ch_target_assembly.join(FASTAVALIDATOR.out.success_log)
| map { meta, fasta, log -> [ meta, fasta ] }
- ch_invalid_assembly_log = FASTAVALIDATOR.out.error_log
+ ch_fastavalidator_log = FASTAVALIDATOR.out.error_log
| map { meta, error_log ->
log.warn("FASTA validation failed for ${meta.id}\n${error_log.text}")
- [ meta, error_log ]
+ error_log
}
ch_versions = ch_versions.mix(FASTAVALIDATOR.out.versions.first())
- // SUBWORKFLOW: GFF3_VALIDATE
- GFF3_VALIDATE (
+ // MODULE: SEQKIT_RMDUP
+ ch_seqkit_rmdup_input = ! params.check_sequence_duplicates
+ ? Channel.empty()
+ : ch_fastavalidator_assembly
+ SEQKIT_RMDUP ( ch_seqkit_rmdup_input )
+
+ ch_valid_target_assembly = params.check_sequence_duplicates
+ ? SEQKIT_RMDUP.out.log
+ | join(ch_seqkit_rmdup_input)
+ | map { meta, error_log, fasta ->
+ if ( error_log.text.contains('0 duplicated records removed') ) {
+ return [ meta, fasta ]
+ }
+
+ log.warn("FASTA validation failed for ${meta.id} due to presence of duplicate sequences")
+ return null
+ }
+ : ch_fastavalidator_assembly
+
+ ch_invalid_assembly_log = ch_fastavalidator_log
+ | mix(
+ SEQKIT_RMDUP.out.log
+ | map { meta, error_log ->
+ if ( error_log.text.contains('0 duplicated records removed') ) {
+ return null
+ }
+
+ error_log
+ }
+ )
+ ch_versions = ch_versions.mix(SEQKIT_RMDUP.out.versions.first())
+
+ // SUBWORKFLOW: GFF3_GT_GFF3_GFF3VALIDATOR_STAT
+ GFF3_GT_GFF3_GFF3VALIDATOR_STAT (
ch_assembly_gff3,
ch_valid_target_assembly
)
- ch_valid_gff3 = GFF3_VALIDATE.out.valid_gff3
-
- ch_invalid_gff3_log = GFF3_VALIDATE.out.log_for_invalid_gff3
+ ch_valid_gff3 = GFF3_GT_GFF3_GFF3VALIDATOR_STAT.out.valid_gff3
+ ch_invalid_gff3_log = GFF3_GT_GFF3_GFF3VALIDATOR_STAT.out.log_for_invalid_gff3
| map { meta, error_log ->
log.warn("GFF3 validation failed for ${meta.id}\n${error_log.text}")
- [ meta, error_log ]
+ error_log
}
- ch_versions = ch_versions.mix(GFF3_VALIDATE.out.versions)
-
- // MODULE: GT_STAT
- GT_STAT ( ch_valid_gff3 )
-
- ch_gt_stats = GT_STAT.out.stats
+ ch_gt_stats = GFF3_GT_GFF3_GFF3VALIDATOR_STAT.out.gff3_stats
| map { meta, yml -> yml }
- ch_versions = ch_versions.mix(GT_STAT.out.versions.first())
+ ch_versions = ch_versions.mix(GFF3_GT_GFF3_GFF3VALIDATOR_STAT.out.versions)
- // MODULE: NCBI_FCS_ADAPTOR
- ch_fcs_adaptor_inputs = params.ncbi_fcs_adaptor_skip
+ // MODULE: FCS_FCSADAPTOR
+ ch_fcs_adaptor_input = params.ncbi_fcs_adaptor_skip
? Channel.empty()
: ch_valid_target_assembly
- | map { meta, fa -> [ meta.id, fa ] }
- NCBI_FCS_ADAPTOR(
- ch_fcs_adaptor_inputs,
- params.ncbi_fcs_adaptor_empire ?: []
+ FCS_FCSADAPTOR(
+ ch_fcs_adaptor_input
)
- ch_fcs_adaptor_report = NCBI_FCS_ADAPTOR.out.report
- | map { tag, report ->
+ ch_fcs_adaptor_report = FCS_FCSADAPTOR.out.adaptor_report
+ | map { meta, report ->
def is_clean = file(report).readLines().size < 2
if (! is_clean) {
log.warn("""
- Adaptor contamination detected in ${tag}.
+ Adaptor contamination detected in ${meta.id}.
See the report for further details.
""".stripIndent())
}
- [ tag, report ]
+ [ meta, report ]
}
ch_fcs_adaptor_passed_assembly = params.ncbi_fcs_adaptor_skip
? (
ch_valid_target_assembly
- | map { meta, fa -> [ meta.id, fa ] }
)
: (
ch_fcs_adaptor_report
- | map { tag, report ->
- [ tag, file(report).readLines().size < 2 ]
+ | map { meta, report ->
+ [ meta, file(report).readLines().size < 2 ]
+ }
+ | filter { meta, is_clean ->
+ ( is_clean || ( ! params.contamination_stops_pipeline ) )
}
- | filter { tag, is_clean -> is_clean }
| join(
ch_valid_target_assembly
- | map { meta, fa -> [ meta.id, fa ] }
)
- | map { tag, clean, fa ->
- [ tag, fa ]
+ | map { meta, clean, fa ->
+ [ meta, fa ]
}
)
- ch_versions = ch_versions.mix(NCBI_FCS_ADAPTOR.out.versions.first())
+ ch_versions = ch_versions.mix(FCS_FCSADAPTOR.out.versions.first())
// SUBWORKFLOW: NCBI_FCS_GX
ch_fcs_gx_input_assembly = params.ncbi_fcs_gx_skip
@@ -275,7 +272,9 @@ workflow ASSEMBLYQC {
| map { tag, report ->
[ tag, file(report).readLines().size < 3 ]
}
- | filter { tag, is_clean -> is_clean }
+ | filter { tag, is_clean ->
+ ( is_clean || ( ! params.contamination_stops_pipeline ) )
+ }
| join(
ch_valid_target_assembly
| map { meta, fa -> [ meta.id, fa ] }
@@ -288,6 +287,7 @@ workflow ASSEMBLYQC {
ch_versions = ch_versions.mix(NCBI_FCS_GX.out.versions)
ch_clean_assembly = ch_fcs_adaptor_passed_assembly
+ | map { meta, fa -> [ meta.id, fa ] }
| join(
ch_fcs_gx_passed_assembly
)
@@ -295,6 +295,135 @@ workflow ASSEMBLYQC {
[ tag, fa ]
}
+ // MODULE: CAT_CAT as TAG_ASSEMBLY
+ TAG_ASSEMBLY (
+ ch_clean_assembly.map { tag, fa -> [ [ id: tag ], fa ] }
+ )
+
+ ch_clean_assembly_tagged = TAG_ASSEMBLY.out.file_out
+ ch_versions = ch_versions.mix(TAG_ASSEMBLY.out.versions)
+
+ // Prepare channels for FETCHNGS
+ // HiC
+ ch_hic_input_assembly = ! params.hic
+ ? Channel.empty()
+ : ch_clean_assembly
+ | map { tag, fa -> [ [ id: tag ], fa ] }
+
+ ch_hic_reads_branch = ch_hic_reads
+ | combine(ch_hic_input_assembly.first())
+ // Wait till first clean assembly arrives
+ | map { meta, fq, meta2, fasta -> [ meta, fq ] }
+ | branch { meta, fq ->
+ sra: meta.is_sra
+ rest: ! meta.is_sra
+ }
+ // Reads
+ ch_all_clean_assemblies = ch_clean_assembly_tagged
+ | map { [ it ] }
+ | collect
+ | map { [ it ] }
+
+ ch_reads_assemblies = ch_reads
+ | combine(
+ ch_all_clean_assemblies
+ )
+ // This combine with the filter after map
+ // is a join on list of assembly tags
+ // such as [ tag1 ] or [ tag1, tag2 ]
+ | map { meta, fq, assemblies ->
+ [
+ meta,
+ fq,
+ assemblies
+ .findAll { meta2, fasta -> meta2.id in meta.assemblies }
+ .collect { meta2, fasta -> fasta }
+ .flatten()
+ .sort(false)
+ ]
+ }
+ | filter { meta, fq, fastas -> fastas }
+
+ ch_reads_branch = ch_reads_assemblies
+ | map { meta, fq, fastas -> [ meta, fq ] }
+ | branch { meta, fq ->
+ sra: meta.is_sra
+ rest: ! meta.is_sra
+ }
+
+ // Maternal reads
+ ch_maternal_reads_branch = ch_maternal_reads
+ | combine(
+ ch_all_clean_assemblies
+ )
+ // This combine/filter/map is used to sync the
+ // reads channel with clean_assembly channel
+ // so that the downstream modules wait for
+ // the upstream modules to complete first
+ | map { meta, fq, assemblies ->
+ [
+ meta,
+ fq,
+ assemblies
+ .findAll { meta2, fasta -> meta2.id in meta.assemblies }
+ .collect { meta2, fasta -> fasta }
+ .flatten()
+ .sort(false)
+ ]
+ }
+ | filter { meta, fq, fastas -> fastas }
+ | map { meta, fq, assemblies -> [ meta, fq ] }
+ | branch { meta, fq ->
+ sra: meta.is_sra
+ rest: ! meta.is_sra
+ }
+
+ // Paternal reads
+ ch_paternal_reads_branch = ch_paternal_reads
+ | combine(
+ ch_all_clean_assemblies
+ )
+ | map { meta, fq, assemblies ->
+ [
+ meta,
+ fq,
+ assemblies
+ .findAll { meta2, fasta -> meta2.id in meta.assemblies }
+ .collect { meta2, fasta -> fasta }
+ .flatten()
+ .sort(false)
+ ]
+ }
+ | filter { meta, fq, fastas -> fastas }
+ | map { meta, fq, assemblies -> [ meta, fq ] }
+ | branch { meta, fq ->
+ sra: meta.is_sra
+ rest: ! meta.is_sra
+ }
+
+ // MODULE: FASTQ_DOWNLOAD_PREFETCH_FASTERQDUMP_SRATOOLS as FETCHNGS
+ ch_fetchngs_inputs = ch_hic_reads_branch.sra
+ | mix(ch_reads_branch.sra)
+ | mix(ch_maternal_reads_branch.sra)
+ FETCHNGS(
+ ch_fetchngs_inputs.map { meta, sra -> [ [ id: meta.id, single_end: meta.single_end ], sra ] },
+ []
+ )
+
+ ch_fetchngs = FETCHNGS.out.reads
+ | join(
+ ch_fetchngs_inputs
+ | map { meta, sra -> [ [ id: meta.id, single_end: meta.single_end ], meta ] }
+ )
+ | map { meta, fq, meta2 -> [ meta2, fq ] }
+ | branch { meta, fq ->
+ hic: meta.type == 'hic'
+ reads: meta.type == 'reads'
+ maternal: meta.type == 'maternal'
+ paternal: meta.type == 'paternal'
+ }
+ ch_versions = ch_versions.mix(FETCHNGS.out.versions)
+
// MODULE: ASSEMBLATHON_STATS
ASSEMBLATHON_STATS(
ch_clean_assembly,
@@ -304,28 +433,53 @@ workflow ASSEMBLYQC {
ch_assemblathon_stats = ASSEMBLATHON_STATS.out.stats
ch_versions = ch_versions.mix(ASSEMBLATHON_STATS.out.versions.first())
- // SUBWORKFLOW: FASTA_BUSCO_PLOT
- ch_busco_inputs = params.busco_skip
+ // SUBWORKFLOW: FASTA_GXF_BUSCO_PLOT
+ ch_busco_input_assembly = params.busco_skip
? Channel.empty()
: ch_clean_assembly
- | combine(
- Channel.of(params.busco_lineage_datasets)
- | map { it.split(' ') }
- | flatten
- )
- | map { tag, fa, lineage ->
- [ tag, file(fa, checkIfExists: true), lineage ]
- }
- FASTA_BUSCO_PLOT(
- ch_busco_inputs.map { tag, fa, lineage -> [ tag, fa ] },
- ch_busco_inputs.map { tag, fa, lineage -> lineage },
- params.busco_mode ?: [],
- params.busco_download_path ?: []
+ | map { tag, fasta -> [ [ id: tag ], fasta ] }
+
+ FASTA_GXF_BUSCO_PLOT(
+ ch_busco_input_assembly,
+ ch_valid_gff3,
+ params.busco_mode,
+ params.busco_lineage_datasets?.tokenize(' '),
+ params.busco_download_path,
+ [] // val_busco_config
)
- ch_busco_summary = FASTA_BUSCO_PLOT.out.summary
- ch_busco_plot = FASTA_BUSCO_PLOT.out.plot
- ch_versions = ch_versions.mix(FASTA_BUSCO_PLOT.out.versions)
+ ch_busco_summary = FASTA_GXF_BUSCO_PLOT.out.assembly_short_summaries_txt
+ | map { meta, txt ->
+ def lineage_name = meta.lineage.split('_odb')[0]
+ [
+ "short_summary.specific.${meta.lineage}.${meta.id}_${lineage_name}.txt",
+ txt.text
+ ]
+ }
+ | collectFile
+ ch_busco_plot = FASTA_GXF_BUSCO_PLOT.out.assembly_png
+
+ ch_busco_outputs = ch_busco_summary
+ | mix(ch_busco_plot)
+ | collect
+
+ ch_busco_gff_summary = FASTA_GXF_BUSCO_PLOT.out.annotation_short_summaries_txt
+ | map { meta, txt ->
+ def lineage_name = meta.lineage.split('_odb')[0]
+ [
+ "short_summary.specific.${meta.lineage}.${meta.id}_${lineage_name}.txt",
+ txt.text
+ ]
+ }
+ | collectFile
+
+ ch_busco_gff_plot = FASTA_GXF_BUSCO_PLOT.out.annotation_png
+
+ ch_busco_gff_outputs = ch_busco_gff_summary
+ | mix(ch_busco_gff_plot)
+ | collect
+
+ ch_versions = ch_versions.mix(FASTA_GXF_BUSCO_PLOT.out.versions)
// SUBWORKFLOW: FASTA_EXPLORE_SEARCH_PLOT_TIDK
ch_tidk_inputs = params.tidk_skip
@@ -361,7 +515,13 @@ workflow ASSEMBLYQC {
| map { meta, mono -> [ meta.id, mono ] },
remainder: true
)
- | filter { id, fasta, mono -> fasta != null }
+ // Danger! This partial join can fail
+ | filter { id, fasta, mono -> fasta }
+ // This filter safeguards against fail on upstream
+ // process failure: https://github.com/nextflow-io/nextflow/issues/5043
+ // fasta comes from upstream processes
+ // mono comes from input params, it is optional
+ // and may not be present for some of the combinations
| map { id, fasta, mono -> [ id, fasta, mono ?: [] ] }
FASTA_LTRRETRIEVER_LAI(
@@ -372,6 +532,8 @@ workflow ASSEMBLYQC {
ch_lai_outputs = FASTA_LTRRETRIEVER_LAI.out.lai_log
| join(FASTA_LTRRETRIEVER_LAI.out.lai_out, remainder: true)
+ // This partial join can't fail because both outputs are
+ // from the same process
| map { meta, log, out -> out ? [ log, out ] : [log] }
ch_versions = ch_versions.mix(FASTA_LTRRETRIEVER_LAI.out.versions)
@@ -393,13 +555,10 @@ workflow ASSEMBLYQC {
ch_versions = ch_versions.mix(FASTA_KRAKEN2.out.versions)
// SUBWORKFLOW: FQ2HIC
- ch_hic_input_assembly = ! params.hic
- ? Channel.empty()
- : ch_clean_assembly
- | map { tag, fa -> [ [ id: tag ], fa ] }
-
+ ch_hic_read_files = ch_fetchngs.hic
+ | mix(ch_hic_reads_branch.rest)
FQ2HIC(
- ch_hic_reads,
+ ch_hic_read_files,
ch_hic_input_assembly,
params.hic_skip_fastp,
params.hic_skip_fastqc
@@ -414,58 +573,237 @@ workflow ASSEMBLYQC {
ch_synteny_labels.map { meta, txt -> [ meta.id, txt ] },
ch_xref_assembly,
params.synteny_between_input_assemblies,
- params.synteny_many_to_many_align,
- params.synteny_max_gap,
- params.synteny_min_bundle_size,
+ params.synteny_mummer_m2m_align,
+ params.synteny_mummer_max_gap,
+ params.synteny_mummer_min_bundle_size,
params.synteny_plot_1_vs_all,
- params.synteny_color_by_contig
+ params.synteny_color_by_contig,
+ params.synteny_mummer_plot_type,
+ params.synteny_mummer_skip,
+ params.synteny_plotsr_seq_label,
+ params.synteny_plotsr_skip,
+ params.synteny_plotsr_assembly_order
)
- ch_synteny_plot = FASTA_SYNTENY.out.plot
+ ch_synteny_outputs = FASTA_SYNTENY.out.png
+ | mix(FASTA_SYNTENY.out.html)
+ | mix(FASTA_SYNTENY.out.syri_fail_log)
+ | mix(FASTA_SYNTENY.out.plotsr_labels)
ch_versions = ch_versions.mix(FASTA_SYNTENY.out.versions)
- // MODULE: CUSTOM_DUMPSOFTWAREVERSIONS
- CUSTOM_DUMPSOFTWAREVERSIONS (
- ch_versions.unique().collectFile(name: 'collated_versions.yml')
+ // MODULE: MERYL_COUNT
+ ch_reads_files = ch_fetchngs.reads
+ | mix(ch_reads_branch.rest)
+
+ MERYL_COUNT(
+ ch_reads_files,
+ params.merqury_kmer_length
+ )
+
+ ch_reads_meryl = MERYL_COUNT.out.meryl_db
+ ch_versions = ch_versions.mix(MERYL_COUNT.out.versions.first())
+
+ // MODULE: MERYL_UNIONSUM
+ ch_reads_meryl_branch = ch_reads_meryl
+ | branch { meta, meryl ->
+ single: meta.single_end
+ paired: ! meta.single_end
+ }
+ MERYL_UNIONSUM(
+ ch_reads_meryl_branch.paired,
+ params.merqury_kmer_length
)
+ ch_reads_union_meryl = MERYL_UNIONSUM.out.meryl_db
+ | mix(ch_reads_meryl_branch.single)
+ ch_versions = ch_versions.mix(MERYL_UNIONSUM.out.versions.first())
+
+ // MODULE: MERYL_COUNT as MAT_MERYL_COUNT
+ ch_maternal_reads_files = ch_fetchngs.maternal
+ | mix(ch_maternal_reads_branch.rest)
+
+ MAT_MERYL_COUNT(
+ // Guard against failed resume on addition of assemblies with same parents
+ ch_maternal_reads_files
+ | map { meta, fq -> [ [ id: meta.id ], fq ] },
+ params.merqury_kmer_length
+ )
+
+ ch_maternal_meryl = MAT_MERYL_COUNT.out.meryl_db
+ | join(
+ ch_maternal_reads_files
+ | map { meta, fq -> [ [ id: meta.id ], meta ] }
+ )
+ | map { meta, meryl, meta2 -> [ meta2, meryl ] }
+ ch_versions = ch_versions.mix(MAT_MERYL_COUNT.out.versions.first())
+
+ // MODULE: MAT_UNIONSUM
+ ch_maternal_meryl_branch = ch_maternal_meryl
+ | branch { meta, meryl_db ->
+ single: meta.single_end
+ paired: ! meta.single_end
+ }
+ MAT_UNIONSUM(
+ ch_maternal_meryl_branch.paired,
+ params.merqury_kmer_length
+ )
+
+ ch_maternal_union_meryl = MAT_UNIONSUM.out.meryl_db
+ | mix(ch_maternal_meryl_branch.single)
+ ch_versions = ch_versions.mix(MAT_UNIONSUM.out.versions.first())
+
+ // MODULE: MERYL_COUNT as PAT_MERYL_COUNT
+ ch_paternal_reads_files = ch_fetchngs.paternal
+ | mix(ch_paternal_reads_branch.rest)
+
+ PAT_MERYL_COUNT(
+ ch_paternal_reads_files
+ | map { meta, fq -> [ [ id: meta.id ], fq ] },
+ params.merqury_kmer_length
+ )
+
+ ch_paternal_meryl = PAT_MERYL_COUNT.out.meryl_db
+ | join(
+ ch_paternal_reads_files
+ | map { meta, fq -> [ [ id: meta.id ], meta ] }
+ )
+ | map { meta, meryl, meta2 -> [ meta2, meryl ] }
+ ch_versions = ch_versions.mix(PAT_MERYL_COUNT.out.versions.first())
+
+ // MODULE: PAT_UNIONSUM
+ ch_paternal_meryl_branch = ch_paternal_meryl
+ | branch { meta, meryl ->
+ single: meta.single_end
+ paired: ! meta.single_end
+ }
+ PAT_UNIONSUM(
+ ch_paternal_meryl_branch.paired,
+ params.merqury_kmer_length
+ )
+
+ ch_paternal_union_meryl = PAT_UNIONSUM.out.meryl_db
+ | mix(ch_paternal_meryl_branch.single)
+ ch_versions = ch_versions.mix(PAT_UNIONSUM.out.versions.first())
+
+ // MODULE: MERQURY_HAPMERS
+ ch_all_assemblies_with_parents = ch_maternal_union_meryl
+ | mix(ch_paternal_union_meryl)
+ | flatMap { meta, meryl -> meta.assemblies }
+ | unique
+ | collect
+ | map { [ it ] }
+ | ifEmpty( [ [] ] )
+
+ ch_meryl_without_parents = ch_reads_union_meryl
+ | combine(
+ ch_all_assemblies_with_parents
+ )
+ | filter { meta, meryl, p_asms -> ! meta.assemblies.any { it in p_asms } }
+ | map { meta, meryl, p_asms -> [ meta, meryl, [], [] ] }
+
+ ch_group_meryl = ch_reads_union_meryl
+ | combine ( ch_maternal_union_meryl )
+ | filter { meta, meryl, meta2, mat_meryl ->
+ meta.assemblies.every { it in meta2.assemblies }
+ }
+ | map { meta, meryl, meta2, mat_meryl ->
+ [ meta, meryl, mat_meryl ]
+ }
+ | combine ( ch_paternal_union_meryl )
+ | filter { meta, meryl, mat_meryl, meta2, pat_meryl ->
+ meta.assemblies.every { it in meta2.assemblies }
+ }
+ | map { meta, meryl, mat_meryl, meta2, pat_meryl ->
+ [ meta, meryl, mat_meryl, pat_meryl ]
+ }
+
+ MERQURY_HAPMERS(
+ ch_group_meryl.map { meta, meryl, mat_meryl, pat_meryl -> [ meta, meryl ] },
+ ch_group_meryl.map { meta, meryl, mat_meryl, pat_meryl -> mat_meryl },
+ ch_group_meryl.map { meta, meryl, mat_meryl, pat_meryl -> pat_meryl }
+ )
+
+ ch_parental_hapmers = MERQURY_HAPMERS.out.mat_hapmer_meryl
+ | join(MERQURY_HAPMERS.out.pat_hapmer_meryl)
+ ch_versions = ch_versions.mix(MERQURY_HAPMERS.out.versions.first())
+
+ // Prepare group meryl dbs
+ ch_meryl_all = ch_group_meryl
+ | join(ch_parental_hapmers)
+ | map { meta, meryl, mat_meryl, pat_meryl, hap_mat_meryl, hap_pat_meryl ->
+ [ meta, meryl, hap_mat_meryl, hap_pat_meryl ]
+ }
+ | mix(ch_meryl_without_parents)
+ | map { meta, meryl, mat_meryl, pat_meryl ->
+ [
+ meta,
+ mat_meryl
+ ? [ meryl, mat_meryl, pat_meryl ]
+ : meryl
+ ]
+ }
+
+ // MODULE: MERQURY_MERQURY
+ ch_merqury_inputs = ch_meryl_all
+ | join(
+ ch_reads_assemblies
+ | map { meta, fq, fastas -> [ meta, fastas ] }
+ )
+
+ MERQURY_MERQURY ( ch_merqury_inputs )
+
+ ch_merqury_qv = MERQURY_MERQURY.out.assembly_qv
+ ch_merqury_stats = MERQURY_MERQURY.out.stats
+ ch_merqury_spectra_cn_fl_png = MERQURY_MERQURY.out.spectra_cn_fl_png
+ ch_merqury_spectra_asm_fl_png = MERQURY_MERQURY.out.spectra_asm_fl_png
+ ch_hapmers_blob_png = MERQURY_MERQURY.out.hapmers_blob_png
+
+ ch_merqury_outputs = ch_merqury_qv
+ | mix(ch_merqury_stats)
+ | mix(ch_merqury_spectra_cn_fl_png)
+ | mix(ch_merqury_spectra_asm_fl_png)
+ | mix(ch_hapmers_blob_png)
+ | flatMap { meta, data -> data }
+ ch_versions = ch_versions.mix(MERQURY_MERQURY.out.versions.first())
+
+ // Collate and save software versions
+ ch_versions = ch_versions
+ | unique
+ | map { yml ->
+ if ( yml ) { yml }
+ }
+
+ ch_versions_yml = softwareVersionsToYAML(ch_versions)
+ | collectFile(
+ storeDir: "${params.outdir}/pipeline_info",
+ name: 'software_versions.yml',
+ sort: true,
+ newLine: true,
+ cache: false
+ )
+
// MODULE: CREATEREPORT
CREATEREPORT(
- ch_invalid_assembly_log .map { meta, file -> file }.collect().ifEmpty([]),
- ch_invalid_gff3_log .map { meta, file -> file }.collect().ifEmpty([]),
+ ch_invalid_assembly_log .collect().ifEmpty([]),
+ ch_invalid_gff3_log .collect().ifEmpty([]),
ch_fcs_adaptor_report .map { meta, file -> file }.collect().ifEmpty([]),
ch_fcs_gx_report .mix(ch_fcs_gx_taxonomy_plot).map { meta, file -> file }.collect().ifEmpty([]),
ch_assemblathon_stats .collect().ifEmpty([]),
ch_gt_stats .collect().ifEmpty([]),
- ch_busco_summary .mix(ch_busco_plot).collect().ifEmpty([]),
+ ch_busco_outputs .collect().ifEmpty([]),
+ ch_busco_gff_outputs .collect().ifEmpty([]),
ch_tidk_outputs .collect().ifEmpty([]),
ch_lai_outputs .collect().ifEmpty([]),
ch_kraken2_plot .collect().ifEmpty([]),
ch_hic_html .collect().ifEmpty([]),
- ch_synteny_plot .collect().ifEmpty([]),
- CUSTOM_DUMPSOFTWAREVERSIONS .out.yml,
- Channel.of ( WorkflowAssemblyqc.jsonifyParams ( params ) ),
- Channel.of ( WorkflowAssemblyqc.jsonifySummaryParams ( summary_params ) )
+ ch_synteny_outputs .collect().ifEmpty([]),
+ ch_merqury_outputs .collect().ifEmpty([]),
+ ch_versions_yml,
+ ch_params_as_json,
+ ch_summary_params_as_json
)
}
-/*
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- COMPLETION EMAIL AND SUMMARY
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-*/
-
-workflow.onComplete {
- if (params.email || params.email_on_fail) {
- NfcoreTemplate.email(workflow, params, summary_params, projectDir, log)
- }
- NfcoreTemplate.dump_parameters(workflow, params)
- NfcoreTemplate.summary(workflow, params, log)
- if (params.hook_url) {
- NfcoreTemplate.IM_notification(workflow, params, summary_params, projectDir, log)
- }
-}
-
/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
THE END
|