'
-
- parser = argparse.ArgumentParser(description=Description, epilog=Epilog)
- parser.add_argument("FILE_IN", help="Input samplesheet file.")
- parser.add_argument("TARGET_ASSEMBLIES", help="Permissible target assemblies")
- parser.add_argument("FILE_OUT", help="Output file.")
- return parser.parse_args(args)
-
-
-def make_dir(path):
- if len(path) > 0:
- try:
- os.makedirs(path)
- except OSError as exception:
- if exception.errno != errno.EEXIST:
- raise exception
-
-
-def print_error(error, context="Line", context_str=""):
- error_str = f"ERROR: Please check samplesheet -> {error}"
- if context != "" and context_str != "":
- error_str = f"ERROR: Please check samplesheet -> {error}\n{context.strip()}: '{context_str.strip()}'"
- print(error_str)
- sys.exit(1)
-
-
-def check_samplesheet(file_in, file_out, permissible_target_assemblies):
- """
- This function checks that the samplesheet follows the following structure:
-
- sample,fastq_1,fastq_2,target_assemblies
- SAMPLE_PE,SAMPLE_PE_RUN1_1.fastq.gz,SAMPLE_PE_RUN1_2.fastq.gz,red5;red3
- SAMPLE_PE,SAMPLE_PE_RUN2_1.fastq.gz,SAMPLE_PE_RUN2_2.fastq.gz,red5;red3
- SAMPLE_SE,SAMPLE_SE_RUN1_1.fastq.gz,,red5
- """
-
- sample_mapping_dict = {}
- with open(file_in, "r", encoding="utf-8-sig") as fin:
- ## Check header
- MIN_COLS = 4
- HEADER = ["sample", "fastq_1", "fastq_2", "target_assemblies"]
- header = [x.strip('"') for x in fin.readline().strip().split(",")]
- if header[: len(HEADER)] != HEADER:
- print(
- f"ERROR: Please check samplesheet header -> {','.join(header)} != {','.join(HEADER)}"
- )
- sys.exit(1)
-
- ## Check sample entries
- for line in fin:
- if line.strip():
- lspl = [x.strip().strip('"') for x in line.strip().split(",")]
-
- ## Check valid number of columns per row
- if len(lspl) < len(HEADER):
- print_error(
- f"Invalid number of columns (minimum = {len(HEADER)})!",
- "Line",
- line,
- )
-
- num_cols = len([x for x in lspl[: len(HEADER)] if x])
- if num_cols < MIN_COLS:
- print_error(
- f"Invalid number of populated columns (minimum = {MIN_COLS})!",
- "Line",
- line,
- )
-
- ## Check sample name entries
- sample, fastq_1, fastq_2, target_assemblies = lspl[
- : len(HEADER)
- ]
-
- if sample.find(" ") != -1:
- print(
- f"WARNING: Spaces have been replaced by underscores for sample: {sample}"
- )
- sample = sample.replace(" ", "_")
- if not sample:
- print_error("Sample entry has not been specified!", "Line", line)
-
- ## Check FastQ file extension
- for fastq in [fastq_1, fastq_2]:
- if fastq:
- if fastq.find(" ") != -1:
- print_error("FastQ file contains spaces!", "Line", line)
- if not fastq.endswith(".fastq.gz") and not fastq.endswith(
- ".fq.gz"
- ):
- print_error(
- "FastQ file does not have extension '.fastq.gz' or '.fq.gz'!",
- "Line",
- line,
- )
-
- ## Auto-detect paired-end/single-end
- sample_info = [] ## [single_end, fastq_1, fastq_2]
- if sample and fastq_1 and fastq_2: ## Paired-end short reads
- sample_info = ["0", fastq_1, fastq_2]
- elif sample and fastq_1 and not fastq_2: ## Single-end short reads
- sample_info = ["1", fastq_1, fastq_2]
- else:
- print_error(
- "Invalid combination of columns provided!", "Line", line
- )
-
- ## Check if the target assemblies are permissible
- target_assemblies_list = sorted(
- [x.strip() for x in target_assemblies.strip().split(";")]
- )
-
- for assembly in target_assemblies_list:
- if assembly in permissible_target_assemblies:
- continue
-
- print_error(
- f"Target assembly '{assembly}' is not one of {permissible_target_assemblies}!",
- "Line",
- line,
- )
-
- ## Create sample mapping dictionary = {sample: [[ single_end, fastq_1, fastq_2, strandedness, [target_assemblies] ]]}
- sample_target_assemblies = ";".join(target_assemblies_list)
- sample_info = (
- sample_info + lspl[len(HEADER) :] + [sample_target_assemblies]
- )
- if sample not in sample_mapping_dict:
- sample_mapping_dict[sample] = [sample_info]
- else:
- if sample_info in sample_mapping_dict[sample]:
- print_error(
- "Samplesheet contains duplicate rows!", "Line", line
- )
- else:
- sample_mapping_dict[sample].append(sample_info)
-
- ## Write validated samplesheet with appropriate columns
- if len(sample_mapping_dict) > 0:
- out_dir = os.path.dirname(file_out)
- make_dir(out_dir)
- with open(file_out, "w") as fout:
- fout.write(
- ",".join(
- [
- "sample",
- "single_end",
- "fastq_1",
- "fastq_2",
- "target_assemblies",
- ]
- + header[len(HEADER) :]
- )
- + "\n"
- )
- for sample in sorted(sample_mapping_dict.keys()):
- ## Check that multiple runs of the same sample are of the same datatype i.e. single-end / paired-end
- if not all(
- x[0] == sample_mapping_dict[sample][0][0]
- for x in sample_mapping_dict[sample]
- ):
- print_error(
- f"Multiple runs of a sample must be of the same datatype i.e. single-end or paired-end!",
- "Sample",
- sample,
- )
-
- ## Check that multiple runs of the same sample have same target assemblies
- if not all(
- x[3] == sample_mapping_dict[sample][0][3]
- for x in sample_mapping_dict[sample]
- ):
- print_error(
- f"Multiple runs of a sample must have the same target assemblies!",
- "Sample",
- sample,
- )
-
- for idx, val in enumerate(sample_mapping_dict[sample]):
- fout.write(",".join([f"{sample}_T{idx+1}"] + val) + "\n")
- else:
- print_error(f"No entries to process!", "Samplesheet: {file_in}")
-
-
-def main(args=None):
- args = parse_args(args)
- permissible_target_assemblies = [
- x.strip() for x in args.TARGET_ASSEMBLIES.strip().split(",")
- ]
- check_samplesheet(args.FILE_IN, args.FILE_OUT, permissible_target_assemblies)
-
-
-if __name__ == "__main__":
- sys.exit(main())
diff --git a/modules/local/utils.nf b/modules/local/utils.nf
new file mode 100644
index 0000000..7e1c86a
--- /dev/null
+++ b/modules/local/utils.nf
@@ -0,0 +1,40 @@
+def idFromFileName(fileName) {
+
+ def trial = ( fileName
+ ).replaceFirst(
+ /\.f(ast)?q$/, ''
+ ).replaceFirst(
+ /\.f(asta|sa|a|as|aa)?$/, ''
+ ).replaceFirst(
+ /\.gff(3)?$/, ''
+ ).replaceFirst(
+ /\.gz$/, ''
+ )
+
+ if ( trial == fileName ) { return fileName }
+
+ return idFromFileName ( trial )
+}
+
+def validateFastqMetadata(metas, fqs, permAssString) {
+ def permAssList = permAssString.split(",")
+
+ // Check if each listed assembly is permissible
+ metas.each { meta ->
+ if ( meta.target_assemblies.any { !permAssList.contains( it ) } ) {
+ exit 1, "Sample ${meta.id} targets ${meta.target_assemblies} which are not in $permAssList"
+ }
+ }
+
+ // Check if multiple runs of a sample have the same target assemblies
+ if ( metas.collect { meta -> meta.target_assemblies }.unique().size() > 1 ) {
+ error "Multiple runs of sample ${metas.first().id} must target same assemblies"
+ }
+
+ // Check if multiple runs of a sample have the same endedness
+ if ( metas.collect { meta -> meta.single_end }.unique().size() > 1 ) {
+ error "Multiple runs of sample ${metas.first().id} must have same endedness"
+ }
+
+ [ metas.first(), fqs ]
+}
diff --git a/modules/local/validate_params.nf b/modules/local/validate_params.nf
deleted file mode 100644
index 460ce80..0000000
--- a/modules/local/validate_params.nf
+++ /dev/null
@@ -1,104 +0,0 @@
-def validateParams(params) {
- validateFastaTags(params)
-
- if (!params['repeat_annotator']) {
- error "Error: repeat_annotator must be either 'repeatmodeler' or 'edta'"
- }
-
- if ( !(params['repeat_annotator'] in ['repeatmodeler', 'edta']) ) {
- error "Error: repeat_annotator must be either 'repeatmodeler' or 'edta'"
- }
-
- validateTETags(params)
- validateTEFastaCorrespondence(params)
-
- validateRiboDBManifest(params)
-
- validateLiftoffXrefs(params)
-}
-
-def validateFastaTags(params) {
- def listOfFastaTuples = params["target_assemblies"]
-
- if (isNotListOfLists(listOfFastaTuples, 2)) {
- error 'Error: target_assemblies must be a list of sublists, with each sublist containing 2 elements'
- }
-
- def fastaTags = listOfFastaTuples.collect { it[0] }
-
- fastaTags.each {
- if (!(it =~ /^\w+$/)) {
- error "Error: $it is not a valid tag in target_assemblies"
- }
- }
-
- if (fastaTags.size() != (fastaTags as Set).size()) {
- error "All the tags in target_assemblies should be unique"
- }
-}
-
-def validateTETags(params) {
-
- if(!params["te_libraries"]) {
- return
- }
-
- def listOfTETuples = params["te_libraries"]
-
- if (listOfTETuples.isEmpty()) {
- return
- }
-
- if (isNotListOfLists(listOfTETuples, 2)) {
- error 'Error: te_libraries must be a list of sublists, with each sublist containing 2 elements'
- }
-
- def teTags = listOfTETuples.collect { it[0] }
-
- teTags.each {
- if (!(it =~ /^\w+$/)) {
- error "Error: $it is not a valid tag in te_libraries"
- }
- }
-}
-
-def validateTEFastaCorrespondence(params) {
-
- if(!params["te_libraries"]) {
- return
- }
-
- def listOfTETuples = params["te_libraries"]
- def listOfFastaTuples = params["target_assemblies"]
-
- def fastaTags = listOfFastaTuples.collect { it[0] }
- def teTags = listOfTETuples.collect { it[0] }
-
- teTags.each {
- if(!fastaTags.contains(it)) {
- error "Error: $it in te_libraries does not have a corresponding tag in target_assemblies"
- }
- }
-}
-
-def validateRiboDBManifest(params) {
- if (params.remove_ribo_rna) {
- file_ribo_db = file(params.ribo_database_manifest, checkIfExists: true)
-
- if (file_ribo_db.isEmpty()) {exit 1, "File provided with --ribo_database_manifest is empty: ${file_ribo_db.getName()}!"}
- }
-}
-
-def validateLiftoffXrefs(params) {
- if(!params["liftoff_xref_annotations"]) {
- return
- }
-
- if(isNotListOfLists(params["liftoff_xref_annotations"]), 2) {
- error "Error: liftoff_xref_annotations must be a list of sublists, with each sublist containing 2 elements"
- }
-}
-
-def isNotListOfLists(thisOne, subListSize) {
- return (!(thisOne instanceof List) || thisOne.isEmpty() || thisOne.any { !(it instanceof List) || it.size() != subListSize })
-}
diff --git a/modules/nf-core/agat/convertspgff2gtf/environment.yml b/modules/nf-core/agat/convertspgff2gtf/environment.yml
new file mode 100644
index 0000000..381154f
--- /dev/null
+++ b/modules/nf-core/agat/convertspgff2gtf/environment.yml
@@ -0,0 +1,7 @@
+name: agat_convertspgff2gtf
+channels:
+ - conda-forge
+ - bioconda
+ - defaults
+dependencies:
+ - bioconda::agat=1.0.0
diff --git a/modules/nf-core/agat/convertspgff2gtf/main.nf b/modules/nf-core/agat/convertspgff2gtf/main.nf
new file mode 100644
index 0000000..8f1f8b4
--- /dev/null
+++ b/modules/nf-core/agat/convertspgff2gtf/main.nf
@@ -0,0 +1,48 @@
+process AGAT_CONVERTSPGFF2GTF {
+ tag "$meta.id"
+ label 'process_single'
+
+ conda "${moduleDir}/environment.yml"
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'https://depot.galaxyproject.org/singularity/agat:1.0.0--pl5321hdfd78af_0' :
+ 'biocontainers/agat:1.0.0--pl5321hdfd78af_0' }"
+
+ input:
+ tuple val(meta), path(gff)
+
+ output:
+ tuple val(meta), path("*.agat.gtf"), emit: output_gtf
+ tuple val(meta), path("*.log"), emit: log
+ path "versions.yml" , emit: versions
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ script:
+ def args = task.ext.args ?: ''
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ """
+ agat_convert_sp_gff2gtf.pl \\
+ --gff $gff \\
+ --output ${prefix}.agat.gtf \\
+ $args
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ agat: \$(agat_convert_sp_gff2gtf.pl --help | sed '4!d; s/.*v//')
+ END_VERSIONS
+ """
+
+ stub:
+ def args = task.ext.args ?: ''
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ """
+ touch ${prefix}.agat.gtf
+ touch ${gff}.agat.log
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ agat: \$(agat_convert_sp_gff2gtf.pl --help | sed '4!d; s/.*v//')
+ END_VERSIONS
+ """
+}
diff --git a/modules/nf-core/agat/convertspgff2gtf/meta.yml b/modules/nf-core/agat/convertspgff2gtf/meta.yml
new file mode 100644
index 0000000..dcdc8d9
--- /dev/null
+++ b/modules/nf-core/agat/convertspgff2gtf/meta.yml
@@ -0,0 +1,43 @@
+name: agat_convertspgff2gtf
+description: |
+ Converts a GFF/GTF file into a proper GTF file
+keywords:
+ - genome
+ - gff
+ - gtf
+ - conversion
+tools:
+ - agat:
+ description: "AGAT is a toolkit for manipulation and getting information from GFF/GTF files"
+ homepage: "https://github.com/NBISweden/AGAT"
+ documentation: "https://agat.readthedocs.io/"
+ tool_dev_url: "https://github.com/NBISweden/AGAT"
+ doi: "10.5281/zenodo.3552717"
+ licence: ["GPL v3"]
+input:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - gff:
+ type: file
+ description: Annotation file in GFF3/GTF format
+ pattern: "*.{gff, gtf}"
+output:
+ - output_gtf:
+ type: file
+ description: Annotation file in GTF format
+ pattern: "*.{gtf}"
+ - log:
+ type: file
+ description: Log file of the conversion process
+ pattern: "*.{log}"
+ - versions:
+ type: file
+ description: File containing software versions
+ pattern: "versions.yml"
+authors:
+ - "@toniher"
+maintainers:
+ - "@toniher"
diff --git a/modules/nf-core/agat/convertspgff2gtf/tests/main.nf.test b/modules/nf-core/agat/convertspgff2gtf/tests/main.nf.test
new file mode 100644
index 0000000..9accfec
--- /dev/null
+++ b/modules/nf-core/agat/convertspgff2gtf/tests/main.nf.test
@@ -0,0 +1,62 @@
+nextflow_process {
+
+ name "Test Process AGAT_CONVERTSPGFF2GTF"
+ script "../main.nf"
+ process "AGAT_CONVERTSPGFF2GTF"
+
+ tag "modules"
+ tag "modules_nfcore"
+ tag "agat"
+ tag "agat/convertspgff2gtf"
+
+ test("sarscov2 - genome [gff3]") {
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id: 'test' ], // meta map
+ file(params.test_data['sarscov2']['genome']['genome_gff3'], checkIfExists: true)
+ ]
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out.output_gtf,
+ process.out.versions).match() },
+ { assert path(process.out.log[0][1]).exists() }
+ )
+ }
+
+ }
+
+ test("sarscov2 - genome [gff3] - stub") {
+
+ options "-stub"
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id: 'test' ], // meta map
+ file(params.test_data['sarscov2']['genome']['genome_gff3'], checkIfExists: true)
+ ]
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out.output_gtf.collect { file(it[1]).getName() } +
+ process.out.log.collect { file(it[1]).getName() } +
+ process.out.versions ).match() }
+ )
+ }
+
+ }
+
+}
diff --git a/modules/nf-core/agat/convertspgff2gtf/tests/main.nf.test.snap b/modules/nf-core/agat/convertspgff2gtf/tests/main.nf.test.snap
new file mode 100644
index 0000000..6193be8
--- /dev/null
+++ b/modules/nf-core/agat/convertspgff2gtf/tests/main.nf.test.snap
@@ -0,0 +1,28 @@
+{
+ "sarscov2 - genome [gff3] - stub": {
+ "content": [
+ [
+ "test.agat.gtf",
+ "genome.gff3.agat.log",
+ "versions.yml:md5,dcbde1b24eb36571645f2d4bd4b4e551"
+ ]
+ ],
+ "timestamp": "2023-12-24T23:36:49.538312808"
+ },
+ "sarscov2 - genome [gff3]": {
+ "content": [
+ [
+ [
+ {
+ "id": "test"
+ },
+ "test.agat.gtf:md5,bbe333239767d048eb8392bba6856616"
+ ]
+ ],
+ [
+ "versions.yml:md5,dcbde1b24eb36571645f2d4bd4b4e551"
+ ]
+ ],
+ "timestamp": "2023-12-24T23:36:39.319717066"
+ }
+}
\ No newline at end of file
diff --git a/modules/nf-core/agat/convertspgff2gtf/tests/tags.yml b/modules/nf-core/agat/convertspgff2gtf/tests/tags.yml
new file mode 100644
index 0000000..7a59648
--- /dev/null
+++ b/modules/nf-core/agat/convertspgff2gtf/tests/tags.yml
@@ -0,0 +1,2 @@
+agat/convertspgff2gtf:
+ - "modules/nf-core/agat/convertspgff2gtf/**"
diff --git a/modules/nf-core/agat/convertspgxf2gxf/environment.yml b/modules/nf-core/agat/convertspgxf2gxf/environment.yml
new file mode 100644
index 0000000..6ed34fa
--- /dev/null
+++ b/modules/nf-core/agat/convertspgxf2gxf/environment.yml
@@ -0,0 +1,7 @@
+name: agat_convertspgxf2gxf
+channels:
+ - conda-forge
+ - bioconda
+ - defaults
+dependencies:
+ - bioconda::agat=1.4.0
diff --git a/modules/nf-core/agat/convertspgxf2gxf/main.nf b/modules/nf-core/agat/convertspgxf2gxf/main.nf
new file mode 100644
index 0000000..b9a7668
--- /dev/null
+++ b/modules/nf-core/agat/convertspgxf2gxf/main.nf
@@ -0,0 +1,48 @@
+process AGAT_CONVERTSPGXF2GXF {
+ tag "$meta.id"
+ label 'process_single'
+
+ conda "${moduleDir}/environment.yml"
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'https://depot.galaxyproject.org/singularity/agat:1.4.0--pl5321hdfd78af_0' :
+ 'biocontainers/agat:1.4.0--pl5321hdfd78af_0' }"
+
+ input:
+ tuple val(meta), path(gxf)
+
+ output:
+ tuple val(meta), path("*.agat.gff") , emit: output_gff
+ tuple val(meta), path("*.log") , emit: log
+ path "versions.yml" , emit: versions
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ script:
+ def args = task.ext.args ?: ''
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ """
+ agat_convert_sp_gxf2gxf.pl \\
+ --gxf $gxf \\
+ --output ${prefix}.agat.gff \\
+ $args
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ agat: \$(agat_convert_sp_gxf2gxf.pl --help | sed -n 's/.*(AGAT) - Version: \\(.*\\) .*/\\1/p')
+ END_VERSIONS
+ """
+
+ stub:
+ def args = task.ext.args ?: ''
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ """
+ touch ${prefix}.agat.gff
+ touch ${gxf}.agat.log
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ agat: \$(agat_convert_sp_gxf2gxf.pl --help | sed -n 's/.*(AGAT) - Version: \\(.*\\) .*/\\1/p')
+ END_VERSIONS
+ """
+}
diff --git a/modules/nf-core/agat/convertspgxf2gxf/meta.yml b/modules/nf-core/agat/convertspgxf2gxf/meta.yml
new file mode 100644
index 0000000..0ef9881
--- /dev/null
+++ b/modules/nf-core/agat/convertspgxf2gxf/meta.yml
@@ -0,0 +1,43 @@
+name: agat_convertspgxf2gxf
+description: |
+ Fixes and standardizes GFF/GTF files and outputs a cleaned GFF/GTF file
+keywords:
+ - genome
+ - gff
+ - gtf
+ - conversion
+tools:
+ - agat:
+ description: "AGAT is a toolkit for manipulation and getting information from GFF/GTF files"
+ homepage: "https://github.com/NBISweden/AGAT"
+ documentation: "https://agat.readthedocs.io/"
+ tool_dev_url: "https://github.com/NBISweden/AGAT"
+ doi: "10.5281/zenodo.3552717"
+ licence: ["GPL v3"]
+input:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - gxf:
+ type: file
+ description: Annotation file in GFF3/GTF format
+ pattern: "*.{gff, gtf}"
+output:
+ - output_gff:
+ type: file
+ description: Cleaned annotation file in GFF3 format
+ pattern: "*.{gff}"
+ - log:
+ type: file
+ description: Log file of the conversion process
+ pattern: "*.{log}"
+ - versions:
+ type: file
+ description: File containing software versions
+ pattern: "versions.yml"
+authors:
+ - "@toniher"
+maintainers:
+ - "@toniher"
diff --git a/modules/nf-core/agat/convertspgxf2gxf/tests/main.nf.test b/modules/nf-core/agat/convertspgxf2gxf/tests/main.nf.test
new file mode 100644
index 0000000..db85991
--- /dev/null
+++ b/modules/nf-core/agat/convertspgxf2gxf/tests/main.nf.test
@@ -0,0 +1,60 @@
+nextflow_process {
+
+ name "Test Process AGAT_CONVERTSPGXF2GXF"
+ script "../main.nf"
+ process "AGAT_CONVERTSPGXF2GXF"
+
+ tag "modules"
+ tag "modules_nfcore"
+ tag "agat"
+ tag "agat/convertspgxf2gxf"
+
+ test("sarscov2 genome [gtf]") {
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id: 'test' ], // meta map
+ file(params.test_data['sarscov2']['genome']['genome_gtf'], checkIfExists: true)
+ ]
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out.output_gff,
+ process.out.versions).match() },
+ { assert path(process.out.log[0][1]).exists() }
+ )
+ }
+
+ }
+
+ test("sarscov2 genome [gtf] - stub") {
+
+ options "-stub"
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id: 'test' ], // meta map
+ file(params.test_data['sarscov2']['genome']['genome_gtf'], checkIfExists: true)
+ ]
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+
+ }
+
+}
diff --git a/modules/nf-core/agat/convertspgxf2gxf/tests/main.nf.test.snap b/modules/nf-core/agat/convertspgxf2gxf/tests/main.nf.test.snap
new file mode 100644
index 0000000..e89073f
--- /dev/null
+++ b/modules/nf-core/agat/convertspgxf2gxf/tests/main.nf.test.snap
@@ -0,0 +1,71 @@
+{
+ "sarscov2 genome [gtf] - stub": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test"
+ },
+ "test.agat.gff:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "1": [
+ [
+ {
+ "id": "test"
+ },
+ "genome.gtf.agat.log:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "2": [
+ "versions.yml:md5,5ec6166c5c080ec4bc08a8fe55ada486"
+ ],
+ "log": [
+ [
+ {
+ "id": "test"
+ },
+ "genome.gtf.agat.log:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "output_gff": [
+ [
+ {
+ "id": "test"
+ },
+ "test.agat.gff:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,5ec6166c5c080ec4bc08a8fe55ada486"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-04-12T12:25:34.583294"
+ },
+ "sarscov2 genome [gtf]": {
+ "content": [
+ [
+ [
+ {
+ "id": "test"
+ },
+ "test.agat.gff:md5,7d7e9bcd82a2f0bb7d8a38f85e82f0bc"
+ ]
+ ],
+ [
+ "versions.yml:md5,5ec6166c5c080ec4bc08a8fe55ada486"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-04-12T12:21:21.310464"
+ }
+}
\ No newline at end of file
diff --git a/modules/nf-core/agat/convertspgxf2gxf/tests/tags.yml b/modules/nf-core/agat/convertspgxf2gxf/tests/tags.yml
new file mode 100644
index 0000000..85c7000
--- /dev/null
+++ b/modules/nf-core/agat/convertspgxf2gxf/tests/tags.yml
@@ -0,0 +1,2 @@
+agat/convertspgxf2gxf:
+ - "modules/nf-core/agat/convertspgxf2gxf/**"
diff --git a/modules/nf-core/cat/cat/main.nf b/modules/nf-core/cat/cat/main.nf
index 970ab76..adbdbd7 100644
--- a/modules/nf-core/cat/cat/main.nf
+++ b/modules/nf-core/cat/cat/main.nf
@@ -22,6 +22,8 @@ process CAT_CAT {
def args2 = task.ext.args2 ?: ''
def file_list = files_in.collect { it.toString() }
+ // choose appropriate concatenation tool depending on input and output format
+
// | input | output | command1 | command2 |
// |-----------|------------|----------|----------|
// | gzipped | gzipped | cat | |
@@ -30,7 +32,7 @@ process CAT_CAT {
// | ungzipped | gzipped | cat | pigz |
// Use input file ending as default
- prefix = task.ext.prefix ?: "${meta.id}${file_list[0].substring(file_list[0].lastIndexOf('.'))}"
+ prefix = task.ext.prefix ?: "${meta.id}${getFileSuffix(file_list[0])}"
out_zip = prefix.endsWith('.gz')
in_zip = file_list[0].endsWith('.gz')
command1 = (in_zip && !out_zip) ? 'zcat' : 'cat'
@@ -68,3 +70,10 @@ process CAT_CAT {
END_VERSIONS
"""
}
+
+// for .gz files also include the second to last extension if it is present. E.g., .fasta.gz
+def getFileSuffix(filename) {
+ def match = filename =~ /^.*?((\.\w{1,5})?(\.\w{1,5}\.gz$))/
+ return match ? match[0][1] : filename.substring(filename.lastIndexOf('.'))
+}
+
diff --git a/modules/nf-core/cat/cat/tests/main.nf.test b/modules/nf-core/cat/cat/tests/main.nf.test
index ed5a4f1..fcee2d1 100644
--- a/modules/nf-core/cat/cat/tests/main.nf.test
+++ b/modules/nf-core/cat/cat/tests/main.nf.test
@@ -19,8 +19,8 @@ nextflow_process {
[
[ id:'genome', single_end:true ],
[
- file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true),
- file(params.test_data['sarscov2']['genome']['genome_sizes'], checkIfExists: true)
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.sizes', checkIfExists: true)
]
]
"""
@@ -45,8 +45,8 @@ nextflow_process {
[
[ id:'test', single_end:true ],
[
- file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true),
- file(params.test_data['sarscov2']['genome']['genome_sizes'], checkIfExists: true)
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.sizes', checkIfExists: true)
]
]
"""
@@ -72,8 +72,8 @@ nextflow_process {
[
[ id:'test', single_end:true ],
[
- file(params.test_data['sarscov2']['genome']['genome_gff3_gz'], checkIfExists: true),
- file(params.test_data['sarscov2']['genome']['contigs_genome_maf_gz'], checkIfExists: true)
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.gff3.gz', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/alignment/last/contigs.genome.maf.gz', checkIfExists: true)
]
]
"""
@@ -102,8 +102,8 @@ nextflow_process {
[
[ id:'test', single_end:true ],
[
- file(params.test_data['sarscov2']['genome']['genome_gff3_gz'], checkIfExists: true),
- file(params.test_data['sarscov2']['genome']['contigs_genome_maf_gz'], checkIfExists: true)
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.gff3.gz', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/alignment/last/contigs.genome.maf.gz', checkIfExists: true)
]
]
"""
@@ -131,8 +131,8 @@ nextflow_process {
[
[ id:'test', single_end:true ],
[
- file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true),
- file(params.test_data['sarscov2']['genome']['genome_sizes'], checkIfExists: true)
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.sizes', checkIfExists: true)
]
]
"""
@@ -160,7 +160,7 @@ nextflow_process {
[
[ id:'test', single_end:true ],
[
- file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true)
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/genome/genome.fasta', checkIfExists: true)
]
]
"""
@@ -176,4 +176,3 @@ nextflow_process {
}
}
}
-
diff --git a/modules/nf-core/cat/fastq/environment.yml b/modules/nf-core/cat/fastq/environment.yml
index bff93ad..8c69b12 100644
--- a/modules/nf-core/cat/fastq/environment.yml
+++ b/modules/nf-core/cat/fastq/environment.yml
@@ -4,4 +4,4 @@ channels:
- bioconda
- defaults
dependencies:
- - conda-forge::sed=4.7
+ - conda-forge::coreutils=8.30
diff --git a/modules/nf-core/cat/fastq/main.nf b/modules/nf-core/cat/fastq/main.nf
index 3d96378..f132b2a 100644
--- a/modules/nf-core/cat/fastq/main.nf
+++ b/modules/nf-core/cat/fastq/main.nf
@@ -76,5 +76,4 @@ process CAT_FASTQ {
"""
}
}
-
}
diff --git a/modules/nf-core/cat/fastq/tests/main.nf.test b/modules/nf-core/cat/fastq/tests/main.nf.test
index f5f9418..dab2e14 100644
--- a/modules/nf-core/cat/fastq/tests/main.nf.test
+++ b/modules/nf-core/cat/fastq/tests/main.nf.test
@@ -16,11 +16,11 @@ nextflow_process {
}
process {
"""
- input[0] = [
- [ id:'test', single_end:true ], // meta map
- [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
- file(params.test_data['sarscov2']['illumina']['test2_1_fastq_gz'], checkIfExists: true) ]
- ]
+ input[0] = Channel.of([
+ [ id:'test', single_end:true ], // meta map
+ [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true)]
+ ])
"""
}
}
@@ -28,8 +28,7 @@ nextflow_process {
then {
assertAll(
{ assert process.success },
- { assert snapshot(process.out.reads).match() },
- { assert path(process.out.versions.get(0)).getText().contains("cat") }
+ { assert snapshot(process.out).match() }
)
}
}
@@ -42,13 +41,13 @@ nextflow_process {
}
process {
"""
- input[0] = [
+ input[0] = Channel.of([
[ id:'test', single_end:false ], // meta map
- [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
- file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true),
- file(params.test_data['sarscov2']['illumina']['test2_1_fastq_gz'], checkIfExists: true),
- file(params.test_data['sarscov2']['illumina']['test2_2_fastq_gz'], checkIfExists: true) ]
- ]
+ [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_1.fastq.gz', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_2.fastq.gz', checkIfExists: true)]
+ ])
"""
}
}
@@ -56,8 +55,7 @@ nextflow_process {
then {
assertAll(
{ assert process.success },
- { assert snapshot(process.out.reads).match() },
- { assert path(process.out.versions.get(0)).getText().contains("cat") }
+ { assert snapshot(process.out).match() }
)
}
}
@@ -70,11 +68,11 @@ nextflow_process {
}
process {
"""
- input[0] = [
+ input[0] = Channel.of([
[ id:'test', single_end:true ], // meta map
- [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
- file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ]
- ]
+ [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true)]
+ ])
"""
}
}
@@ -82,8 +80,7 @@ nextflow_process {
then {
assertAll(
{ assert process.success },
- { assert snapshot(process.out.reads).match() },
- { assert path(process.out.versions.get(0)).getText().contains("cat") }
+ { assert snapshot(process.out).match() }
)
}
}
@@ -96,13 +93,13 @@ nextflow_process {
}
process {
"""
- input[0] = [
+ input[0] = Channel.of([
[ id:'test', single_end:false ], // meta map
- [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
- file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true),
- file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
- file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ]
- ]
+ [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true)]
+ ])
"""
}
}
@@ -110,8 +107,7 @@ nextflow_process {
then {
assertAll(
{ assert process.success },
- { assert snapshot(process.out.reads).match() },
- { assert path(process.out.versions.get(0)).getText().contains("cat") }
+ { assert snapshot(process.out).match() }
)
}
}
@@ -124,10 +120,10 @@ nextflow_process {
}
process {
"""
- input[0] = [
+ input[0] = Channel.of([
[ id:'test', single_end:true ], // meta map
- [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true)]
- ]
+ [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true)]
+ ])
"""
}
}
@@ -135,8 +131,7 @@ nextflow_process {
then {
assertAll(
{ assert process.success },
- { assert snapshot(process.out.reads).match() },
- { assert path(process.out.versions.get(0)).getText().contains("cat") }
+ { assert snapshot(process.out).match() }
)
}
}
diff --git a/modules/nf-core/cat/fastq/tests/main.nf.test.snap b/modules/nf-core/cat/fastq/tests/main.nf.test.snap
index ec2342e..43dfe28 100644
--- a/modules/nf-core/cat/fastq/tests/main.nf.test.snap
+++ b/modules/nf-core/cat/fastq/tests/main.nf.test.snap
@@ -1,78 +1,169 @@
{
"test_cat_fastq_single_end": {
"content": [
- [
- [
- {
- "id": "test",
- "single_end": true
- },
- "test.merged.fastq.gz:md5,f9cf5e375f7de81a406144a2c70cc64d"
+ {
+ "0": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.merged.fastq.gz:md5,ee314a9bd568d06617171b0c85f508da"
+ ]
+ ],
+ "1": [
+ "versions.yml:md5,d42d6e24d67004608495883e00bd501b"
+ ],
+ "reads": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.merged.fastq.gz:md5,ee314a9bd568d06617171b0c85f508da"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,d42d6e24d67004608495883e00bd501b"
]
- ]
+ }
],
- "timestamp": "2023-10-17T23:19:12.990284837"
+ "timestamp": "2024-01-17T17:30:39.816981"
},
"test_cat_fastq_single_end_same_name": {
"content": [
- [
- [
- {
- "id": "test",
- "single_end": true
- },
- "test.merged.fastq.gz:md5,63f817db7a29a03eb538104495556f66"
+ {
+ "0": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.merged.fastq.gz:md5,3ad9406595fafec8172368f9cd0b6a22"
+ ]
+ ],
+ "1": [
+ "versions.yml:md5,d42d6e24d67004608495883e00bd501b"
+ ],
+ "reads": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.merged.fastq.gz:md5,3ad9406595fafec8172368f9cd0b6a22"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,d42d6e24d67004608495883e00bd501b"
]
- ]
+ }
],
- "timestamp": "2023-10-17T23:19:31.554568147"
+ "timestamp": "2024-01-17T17:32:35.229332"
},
"test_cat_fastq_single_end_single_file": {
"content": [
- [
- [
- {
- "id": "test",
- "single_end": true
- },
- "test.merged.fastq.gz:md5,e325ef7deb4023447a1f074e285761af"
+ {
+ "0": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.merged.fastq.gz:md5,4161df271f9bfcd25d5845a1e220dbec"
+ ]
+ ],
+ "1": [
+ "versions.yml:md5,d42d6e24d67004608495883e00bd501b"
+ ],
+ "reads": [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.merged.fastq.gz:md5,4161df271f9bfcd25d5845a1e220dbec"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,d42d6e24d67004608495883e00bd501b"
]
- ]
+ }
],
- "timestamp": "2023-10-17T23:19:49.629360033"
+ "timestamp": "2024-01-17T17:34:00.058829"
},
"test_cat_fastq_paired_end_same_name": {
"content": [
- [
- [
- {
- "id": "test",
- "single_end": false
- },
+ {
+ "0": [
[
- "test_1.merged.fastq.gz:md5,63f817db7a29a03eb538104495556f66",
- "test_2.merged.fastq.gz:md5,fe9f266f43a6fc3dcab690a18419a56e"
+ {
+ "id": "test",
+ "single_end": false
+ },
+ [
+ "test_1.merged.fastq.gz:md5,3ad9406595fafec8172368f9cd0b6a22",
+ "test_2.merged.fastq.gz:md5,a52cab0b840c7178b0ea83df1fdbe8d5"
+ ]
]
+ ],
+ "1": [
+ "versions.yml:md5,d42d6e24d67004608495883e00bd501b"
+ ],
+ "reads": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ [
+ "test_1.merged.fastq.gz:md5,3ad9406595fafec8172368f9cd0b6a22",
+ "test_2.merged.fastq.gz:md5,a52cab0b840c7178b0ea83df1fdbe8d5"
+ ]
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,d42d6e24d67004608495883e00bd501b"
]
- ]
+ }
],
- "timestamp": "2023-10-17T23:19:40.711617539"
+ "timestamp": "2024-01-17T17:33:33.031555"
},
"test_cat_fastq_paired_end": {
"content": [
- [
- [
- {
- "id": "test",
- "single_end": false
- },
+ {
+ "0": [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ [
+ "test_1.merged.fastq.gz:md5,3ad9406595fafec8172368f9cd0b6a22",
+ "test_2.merged.fastq.gz:md5,a52cab0b840c7178b0ea83df1fdbe8d5"
+ ]
+ ]
+ ],
+ "1": [
+ "versions.yml:md5,d42d6e24d67004608495883e00bd501b"
+ ],
+ "reads": [
[
- "test_1.merged.fastq.gz:md5,f9cf5e375f7de81a406144a2c70cc64d",
- "test_2.merged.fastq.gz:md5,77c8e966e130d8c6b6ec9be52fcb2bda"
+ {
+ "id": "test",
+ "single_end": false
+ },
+ [
+ "test_1.merged.fastq.gz:md5,3ad9406595fafec8172368f9cd0b6a22",
+ "test_2.merged.fastq.gz:md5,a52cab0b840c7178b0ea83df1fdbe8d5"
+ ]
]
+ ],
+ "versions": [
+ "versions.yml:md5,d42d6e24d67004608495883e00bd501b"
]
- ]
+ }
],
- "timestamp": "2023-10-18T07:53:20.923560211"
+ "timestamp": "2024-01-17T17:32:02.270935"
}
}
\ No newline at end of file
diff --git a/modules/nf-core/custom/dumpsoftwareversions/environment.yml b/modules/nf-core/custom/dumpsoftwareversions/environment.yml
index 9b3272b..b48ced2 100644
--- a/modules/nf-core/custom/dumpsoftwareversions/environment.yml
+++ b/modules/nf-core/custom/dumpsoftwareversions/environment.yml
@@ -4,4 +4,4 @@ channels:
- bioconda
- defaults
dependencies:
- - bioconda::multiqc=1.19
+ - bioconda::multiqc=1.20
diff --git a/modules/nf-core/custom/dumpsoftwareversions/main.nf b/modules/nf-core/custom/dumpsoftwareversions/main.nf
index f218761..105f926 100644
--- a/modules/nf-core/custom/dumpsoftwareversions/main.nf
+++ b/modules/nf-core/custom/dumpsoftwareversions/main.nf
@@ -4,8 +4,8 @@ process CUSTOM_DUMPSOFTWAREVERSIONS {
// Requires `pyyaml` which does not have a dedicated container but is in the MultiQC container
conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
- 'https://depot.galaxyproject.org/singularity/multiqc:1.19--pyhdfd78af_0' :
- 'biocontainers/multiqc:1.19--pyhdfd78af_0' }"
+ 'https://depot.galaxyproject.org/singularity/multiqc:1.20--pyhdfd78af_0' :
+ 'biocontainers/multiqc:1.20--pyhdfd78af_0' }"
input:
path versions
diff --git a/modules/nf-core/eggnogmapper/eggnogmapper.diff b/modules/nf-core/eggnogmapper/eggnogmapper.diff
new file mode 100644
index 0000000..b38223d
--- /dev/null
+++ b/modules/nf-core/eggnogmapper/eggnogmapper.diff
@@ -0,0 +1,53 @@
+Changes in module 'nf-core/eggnogmapper'
+--- modules/nf-core/eggnogmapper/meta.yml
++++ modules/nf-core/eggnogmapper/meta.yml
+@@ -60,3 +60,6 @@
+ pattern: "versions.yml"
+ authors:
+ - "@vagkaratzas"
++maintainers:
++ - "@vagkaratzas"
++ - "@gallvp"
+
+--- modules/nf-core/eggnogmapper/main.nf
++++ modules/nf-core/eggnogmapper/main.nf
+@@ -1,6 +1,6 @@
+ process EGGNOGMAPPER {
+ tag "$meta.id"
+- label 'process_long'
++ label 'process_high'
+
+ conda "${moduleDir}/environment.yml"
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+@@ -23,11 +23,13 @@
+ task.ext.when == null || task.ext.when
+
+ script:
+- def args = task.ext.args ?: ''
+- def prefix = task.ext.prefix ?: "${meta.id}"
+- def is_compressed = fasta.name.endsWith(".gz")
+- def fasta_name = fasta.name.replace(".gz", "")
+- def dbmem = task.memory.toMega() > 40000 ? '--dbmem' : ''
++ def args = task.ext.args ?: ''
++ def prefix = task.ext.prefix ?: "${meta.id}"
++ def is_compressed = fasta.name.endsWith(".gz")
++ def fasta_name = fasta.name.replace(".gz", "")
++ def dbmem = task.memory.toMega() > 40000 ? '--dbmem' : ''
++ def database_arg = eggnog_db ? "--database $eggnog_db" : ''
++ def dmnd_db_arg = eggnog_diamond_db ? "--dmnd_db $eggnog_diamond_db" : ''
+ """
+ if [ "$is_compressed" == "true" ]; then
+ gzip -c -d $fasta > $fasta_name
+@@ -38,8 +40,8 @@
+ -i ${fasta_name} \\
+ --data_dir ${eggnog_data_dir} \\
+ -m diamond \\
+- --dmnd_db ${eggnog_diamond_db} \\
+- --database ${eggnog_db} \\
++ $dmnd_db_arg \\
++ $database_arg \\
+ --output ${prefix} \\
+ ${dbmem} \\
+ $args
+
+************************************************************
diff --git a/modules/nf-core/eggnogmapper/environment.yml b/modules/nf-core/eggnogmapper/environment.yml
new file mode 100644
index 0000000..f4fb6fd
--- /dev/null
+++ b/modules/nf-core/eggnogmapper/environment.yml
@@ -0,0 +1,7 @@
+name: eggnogmapper
+channels:
+ - conda-forge
+ - bioconda
+ - defaults
+dependencies:
+ - bioconda::eggnog-mapper=2.1.12
diff --git a/modules/nf-core/eggnogmapper/main.nf b/modules/nf-core/eggnogmapper/main.nf
new file mode 100644
index 0000000..134451d
--- /dev/null
+++ b/modules/nf-core/eggnogmapper/main.nf
@@ -0,0 +1,68 @@
+process EGGNOGMAPPER {
+ tag "$meta.id"
+ label 'process_high'
+
+ conda "${moduleDir}/environment.yml"
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'https://depot.galaxyproject.org/singularity/eggnog-mapper:2.1.12--pyhdfd78af_0':
+ 'biocontainers/eggnog-mapper:2.1.12--pyhdfd78af_0' }"
+
+ input:
+ tuple val(meta), path(fasta)
+ path(eggnog_db)
+ path(eggnog_data_dir)
+ tuple val(meta2), path(eggnog_diamond_db)
+
+ output:
+ tuple val(meta), path("*.emapper.annotations") , emit: annotations
+ tuple val(meta), path("*.emapper.seed_orthologs"), emit: orthologs
+ tuple val(meta), path("*.emapper.hits") , emit: hits
+ path "versions.yml" , emit: versions
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ script:
+ def args = task.ext.args ?: ''
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ def is_compressed = fasta.name.endsWith(".gz")
+ def fasta_name = fasta.name.replace(".gz", "")
+ def dbmem = task.memory.toMega() > 40000 ? '--dbmem' : ''
+ def database_arg = eggnog_db ? "--database $eggnog_db" : ''
+ def dmnd_db_arg = eggnog_diamond_db ? "--dmnd_db $eggnog_diamond_db" : ''
+ """
+ if [ "$is_compressed" == "true" ]; then
+ gzip -c -d $fasta > $fasta_name
+ fi
+
+ emapper.py \\
+ --cpu ${task.cpus} \\
+ -i ${fasta_name} \\
+ --data_dir ${eggnog_data_dir} \\
+ -m diamond \\
+ $dmnd_db_arg \\
+ $database_arg \\
+ --output ${prefix} \\
+ ${dbmem} \\
+ $args
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ eggnog-mapper: \$(echo \$(emapper.py --version) | grep -o "emapper-[0-9]\\+\\.[0-9]\\+\\.[0-9]\\+" | sed "s/emapper-//")
+ END_VERSIONS
+ """
+
+ stub:
+ def args = task.ext.args ?: ''
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ """
+ touch ${prefix}.emapper.annotations
+ touch ${prefix}.emapper.seed_orthologs
+ touch ${prefix}.emapper.hits
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ eggnog-mapper: \$(echo \$(emapper.py --version) | grep -o "emapper-[0-9]\\+\\.[0-9]\\+\\.[0-9]\\+" | sed "s/emapper-//")
+ END_VERSIONS
+ """
+}
diff --git a/modules/nf-core/eggnogmapper/meta.yml b/modules/nf-core/eggnogmapper/meta.yml
new file mode 100644
index 0000000..b07c27e
--- /dev/null
+++ b/modules/nf-core/eggnogmapper/meta.yml
@@ -0,0 +1,65 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/yaml-schema.json
+name: "eggnogmapper"
+description: Fast genome-wide functional annotation through orthology assignment.
+keywords:
+ - annotation
+ - orthology
+ - genomics
+tools:
+ - "eggnogmapper":
+ description: "Fast genome-wide functional annotation through orthology assignment."
+ homepage: "https://github.com/eggnogdb/eggnog-mapper"
+ documentation: "https://github.com/eggnogdb/eggnog-mapper/wiki"
+ tool_dev_url: "https://github.com/eggnogdb/eggnog-mapper"
+ doi: "10.1093/molbev/msab293"
+ licence: ["AGPL v3"]
+input:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. `[ id:'test', single_end:false ]`
+ - fasta:
+ type: file
+ description: Database of sequences in FASTA format
+ pattern: "*.{fasta,fa,fasta.gz,fa.gz}"
+ - eggnog_db:
+ type: file
+ description: The eggnog database file (e.g. eggnog-mapper/data/eggnog.db)
+ pattern: "*.db"
+ - eggnog_data_dir:
+ type: directory
+ description: Directory containing eggnog database files (e.g. eggnog-mapper/data)
+ pattern: "*"
+ - eggnog_diamond_db:
+ type: file
+ description: The eggnog Diamond protein database file (e.g. eggnog-mapper/data/eggnog_proteins.dmnd)
+ pattern: "*.dmnd"
+output:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. `[ id:'test', single_end:false ]`
+ - annotations:
+ type: file
+ description: TSV with the results from the annotation phase
+ pattern: "*.emapper.annotations"
+ - orthologs:
+ type: file
+ description: TSV with the results from parsing the hits, linking queries with seed orthologs (with commented metadata)
+ pattern: "*.emapper.seed_orthologs"
+ - hits:
+ type: file
+ description: TSV with the results from the Diamond search phase
+ pattern: "*.emapper.hits"
+ - versions:
+ type: file
+ description: File containing software versions
+ pattern: "versions.yml"
+authors:
+ - "@vagkaratzas"
+maintainers:
+ - "@vagkaratzas"
+ - "@gallvp"
diff --git a/modules/nf-core/eggnogmapper/tests/main.nf.test b/modules/nf-core/eggnogmapper/tests/main.nf.test
new file mode 100644
index 0000000..fb707ea
--- /dev/null
+++ b/modules/nf-core/eggnogmapper/tests/main.nf.test
@@ -0,0 +1,56 @@
+nextflow_process {
+
+ name "Test Process EGGNOGMAPPER"
+ script "../main.nf"
+ process "EGGNOGMAPPER"
+ tag "modules"
+ tag "modules_nfcore"
+ tag "eggnogmapper"
+ tag "diamond/makedb"
+
+ test("Should search for protein annotations against the eggnogmapper db") {
+
+ setup {
+ run("DIAMOND_MAKEDB") {
+ script "../../diamond/makedb/main.nf"
+ process {
+ """
+ input[0] = [ [id:'test2'], file(params.test_data['sarscov2']['genome']['proteome_fasta'], checkIfExists: true) ]
+ input[1] = []
+ input[2] = []
+ input[3] = []
+ """
+ }
+ }
+ }
+
+ when {
+ params {
+ outdir = "$outputDir"
+ }
+ process {
+ """
+ input[0] = [ [id:'test'], file(params.test_data['sarscov2']['genome']['proteome_fasta'], checkIfExists: true) ]
+ eggnog_db = file("https://github.com/nf-core/test-datasets/raw/eddf5b0e3336e0f93c81d4b4843b07257f9efaec/data/delete_me/eggnogmapper/eggnog.db", checkIfExists: true)
+ eggnog_db.copyTo("${workDir}/tmp/eggnog.db")
+ eggnog_data_dir = "${workDir}/tmp/"
+ input[1] = eggnog_db
+ input[2] = eggnog_data_dir
+ input[3] = DIAMOND_MAKEDB.out.db
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert path(process.out.annotations.get(0).get(1)).readLines().contains("ENSSASP00005000002.1\tENSSASP00005000002.1\t0.0\t14179.0\tCOG0498@1|root,COG0498@2|Bacteria,1MUWQ@1224|Proteobacteria,2VHR6@28216|Betaproteobacteria,2KUMA@206389|Rhodocyclales\t1224|Proteobacteria\tE\tthreonine synthase\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-\t-") },
+ { assert path(process.out.orthologs.get(0).get(1)).readLines().contains("ENSSASP00005000002.1\tENSSASP00005000002.1\t0.0\t14179.0\t1\t7096\t1\t7096\t100.0\t100.0\t100.0") },
+ { assert snapshot(process.out.hits).match("hits") },
+ { assert process.out.versions }
+ )
+ }
+
+ }
+
+}
diff --git a/modules/nf-core/eggnogmapper/tests/main.nf.test.snap b/modules/nf-core/eggnogmapper/tests/main.nf.test.snap
new file mode 100644
index 0000000..4e1c837
--- /dev/null
+++ b/modules/nf-core/eggnogmapper/tests/main.nf.test.snap
@@ -0,0 +1,15 @@
+{
+ "hits": {
+ "content": [
+ [
+ [
+ {
+ "id": "test"
+ },
+ "test.emapper.hits:md5,864b7a1f902893d8aee6621baeab7be8"
+ ]
+ ]
+ ],
+ "timestamp": "2023-11-08T20:43:50.173213923"
+ }
+}
\ No newline at end of file
diff --git a/modules/nf-core/eggnogmapper/tests/tags.yml b/modules/nf-core/eggnogmapper/tests/tags.yml
new file mode 100644
index 0000000..284ba2e
--- /dev/null
+++ b/modules/nf-core/eggnogmapper/tests/tags.yml
@@ -0,0 +1,2 @@
+eggnogmapper:
+ - modules/nf-core/eggnogmapper/**
diff --git a/modules/nf-core/fastp/main.nf b/modules/nf-core/fastp/main.nf
index 2a3b679..4fc19b7 100644
--- a/modules/nf-core/fastp/main.nf
+++ b/modules/nf-core/fastp/main.nf
@@ -29,7 +29,7 @@ process FASTP {
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def adapter_list = adapter_fasta ? "--adapter_fasta ${adapter_fasta}" : ""
- def fail_fastq = save_trimmed_fail && meta.single_end ? "--failed_out ${prefix}.fail.fastq.gz" : save_trimmed_fail && !meta.single_end ? "--unpaired1 ${prefix}_1.fail.fastq.gz --unpaired2 ${prefix}_2.fail.fastq.gz" : ''
+ def fail_fastq = save_trimmed_fail && meta.single_end ? "--failed_out ${prefix}.fail.fastq.gz" : save_trimmed_fail && !meta.single_end ? "--failed_out ${prefix}.paired.fail.fastq.gz --unpaired1 ${prefix}_1.fail.fastq.gz --unpaired2 ${prefix}_2.fail.fastq.gz" : ''
// Added soft-links to original fastqs for consistent naming in MultiQC
// Use single ended for interleaved. Add --interleaved_in in config.
if ( task.ext.args?.contains('--interleaved_in') ) {
diff --git a/modules/nf-core/fastp/tests/main.nf.test b/modules/nf-core/fastp/tests/main.nf.test
index 17dce8a..6f1f489 100644
--- a/modules/nf-core/fastp/tests/main.nf.test
+++ b/modules/nf-core/fastp/tests/main.nf.test
@@ -19,11 +19,10 @@ nextflow_process {
save_trimmed_fail = false
save_merged = false
- input[0] = [
+ input[0] = Channel.of([
[ id:'test', single_end:true ],
- [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ]
- ]
-
+ [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ]
+ ])
input[1] = adapter_fasta
input[2] = save_trimmed_fail
input[3] = save_merged
@@ -68,9 +67,9 @@ nextflow_process {
process.out.reads_fail.collect { file(it[1]).getName() } +
process.out.reads_merged.collect { file(it[1]).getName() }
).sort()
- ).match("test_fastp_single_end-for_stub_match")
+ ).match("test_fastp_single_end-_match")
},
- { assert snapshot(process.out.versions).match("versions") }
+ { assert snapshot(process.out.versions).match("versions_single_end") }
)
}
}
@@ -89,11 +88,10 @@ nextflow_process {
save_trimmed_fail = false
save_merged = false
- input[0] = [
+ input[0] = Channel.of([
[ id:'test', single_end:true ],
- [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ]
- ]
-
+ [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ]
+ ])
input[1] = adapter_fasta
input[2] = save_trimmed_fail
input[3] = save_merged
@@ -118,7 +116,7 @@ nextflow_process {
).sort()
).match("test_fastp_single_end-for_stub_match")
},
- { assert snapshot(process.out.versions).match("versions") }
+ { assert snapshot(process.out.versions).match("versions_single_end_stub") }
)
}
}
@@ -135,12 +133,11 @@ nextflow_process {
save_trimmed_fail = false
save_merged = false
- input[0] = [
+ input[0] = Channel.of([
[ id:'test', single_end:false ], // meta map
- [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
- file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ]
- ]
-
+ [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ]
+ ])
input[1] = adapter_fasta
input[2] = save_trimmed_fail
input[3] = save_merged
@@ -199,9 +196,9 @@ nextflow_process {
process.out.reads_fail.collect { file(it[1]).getName() } +
process.out.reads_merged.collect { file(it[1]).getName() }
).sort()
- ).match("test_fastp_paired_end-for_stub_match")
+ ).match("test_fastp_paired_end_match")
},
- { assert snapshot(process.out.versions).match("versions") }
+ { assert snapshot(process.out.versions).match("versions_paired_end") }
)
}
}
@@ -220,12 +217,11 @@ nextflow_process {
save_trimmed_fail = false
save_merged = false
- input[0] = [
+ input[0] = Channel.of([
[ id:'test', single_end:false ], // meta map
- [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
- file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ]
- ]
-
+ [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ]
+ ])
input[1] = adapter_fasta
input[2] = save_trimmed_fail
input[3] = save_merged
@@ -249,13 +245,14 @@ nextflow_process {
).sort()
).match("test_fastp_paired_end-for_stub_match")
},
- { assert snapshot(process.out.versions).match("versions") }
+ { assert snapshot(process.out.versions).match("versions_paired_end-stub") }
)
}
}
test("fastp test_fastp_interleaved") {
- config './nextflow.config'
+
+ config './nextflow.interleaved.config'
when {
params {
outdir = "$outputDir"
@@ -266,10 +263,10 @@ nextflow_process {
save_trimmed_fail = false
save_merged = false
- input[0] = [ [ id:'test', single_end:true ], // meta map
- [ file(params.test_data['sarscov2']['illumina']['test_interleaved_fastq_gz'], checkIfExists: true) ]
- ]
-
+ input[0] = Channel.of([
+ [ id:'test', single_end:true ], // meta map
+ [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_interleaved.fastq.gz', checkIfExists: true) ]
+ ])
input[1] = adapter_fasta
input[2] = save_trimmed_fail
input[3] = save_merged
@@ -281,7 +278,7 @@ nextflow_process {
def html_text = [ "Q20 bases:25.719000 K (93.033098%)",
"paired end (151 cycles + 151 cycles)"]
def log_text = [ "Q20 bases: 12922(92.9841%)",
- "reads passed filter: 198"]
+ "reads passed filter: 162"]
def read_lines = [ "@ERR5069949.2151832 NS500628:121:HK3MMAFX2:2:21208:10793:15304/1",
"TCATAAACCAAAGCACTCACAGTGTCAACAATTTCAGCAGGACAACGCCGACAAGTTCCGAGGAACATGTCTGGACCTATAGTTTTCATAAGTCTACACACTGAATTGAAATATTCTGGTTCTAGTGTGCCCTTAGTTAGCAATGTGCGT",
"AAAAAAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEAEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEAAEEEEE
- { assert path(process.out.reads_fail.get(0).get(1).get(1)).linesGzip.contains(failed_read2_line) }
+ { assert path(process.out.reads_fail.get(0).get(1).get(2)).linesGzip.contains(failed_read2_line) }
}
},
{ html_text.each { html_part ->
@@ -503,7 +500,7 @@ nextflow_process {
{ assert path(process.out.log.get(0).get(1)).getText().contains(log_part) }
}
},
- { assert snapshot(process.out.versions).match("versions") }
+ { assert snapshot(process.out.versions).match("versions_paired_end_trim_fail") }
)
}
}
@@ -519,11 +516,11 @@ nextflow_process {
adapter_fasta = []
save_trimmed_fail = false
save_merged = true
-
- input[0] = [ [ id:'test', single_end:false ], // meta map
- [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
- file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ]
- ]
+ input[0] = Channel.of([
+ [ id:'test', single_end:false ], // meta map
+ [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ]
+ ])
input[1] = adapter_fasta
input[2] = save_trimmed_fail
input[3] = save_merged
@@ -592,9 +589,9 @@ nextflow_process {
process.out.reads_fail.collect { file(it[1]).getName() } +
process.out.reads_merged.collect { file(it[1]).getName() }
).sort()
- ).match("test_fastp_paired_end_merged-for_stub_match")
+ ).match("test_fastp_paired_end_merged_match")
},
- { assert snapshot(process.out.versions).match("versions") }
+ { assert snapshot(process.out.versions).match("versions_paired_end_merged") }
)
}
}
@@ -613,10 +610,11 @@ nextflow_process {
save_trimmed_fail = false
save_merged = true
- input[0] = [ [ id:'test', single_end:false ], // meta map
- [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
- file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ]
- ]
+ input[0] = Channel.of([
+ [ id:'test', single_end:false ], // meta map
+ [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ]
+ ])
input[1] = adapter_fasta
input[2] = save_trimmed_fail
input[3] = save_merged
@@ -640,7 +638,7 @@ nextflow_process {
).sort()
).match("test_fastp_paired_end_merged-for_stub_match")
},
- { assert snapshot(process.out.versions).match("versions") }
+ { assert snapshot(process.out.versions).match("versions_paired_end_merged_stub") }
)
}
}
@@ -653,14 +651,15 @@ nextflow_process {
}
process {
"""
- adapter_fasta = file("https://github.com/nf-core/test-datasets/raw/modules/data/delete_me/fastp/adapters.fasta", checkIfExists: true)
+ adapter_fasta = Channel.of([ file(params.modules_testdata_base_path + 'delete_me/fastp/adapters.fasta', checkIfExists: true) ])
save_trimmed_fail = false
save_merged = true
- input[0] = [ [ id:'test', single_end:false ], // meta map
- [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
- file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ]
- ]
+ input[0] = Channel.of([
+ [ id:'test', single_end:false ], // meta map
+ [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ]
+ ])
input[1] = adapter_fasta
input[2] = save_trimmed_fail
input[3] = save_merged
@@ -719,7 +718,7 @@ nextflow_process {
{ assert path(process.out.log.get(0).get(1)).getText().contains(log_part) }
}
},
- { assert snapshot(process.out.versions).match("versions") }
+ { assert snapshot(process.out.versions).match("versions_paired_end_merged_adapterlist") }
)
}
}
diff --git a/modules/nf-core/fastp/tests/main.nf.test.snap b/modules/nf-core/fastp/tests/main.nf.test.snap
index 1b7d241..3e87628 100644
--- a/modules/nf-core/fastp/tests/main.nf.test.snap
+++ b/modules/nf-core/fastp/tests/main.nf.test.snap
@@ -1,5 +1,23 @@
{
- "test_fastp_paired_end-for_stub_match": {
+ "fastp test_fastp_interleaved_json": {
+ "content": [
+ [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.fastp.json:md5,b24e0624df5cc0b11cd5ba21b726fb22"
+ ]
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-03-18T16:19:15.063001"
+ },
+ "test_fastp_paired_end_merged-for_stub_match": {
"content": [
[
[
@@ -9,12 +27,29 @@
"test.fastp.html",
"test.fastp.json",
"test.fastp.log",
+ "test.merged.fastq.gz",
"{id=test, single_end=false}"
]
],
- "timestamp": "2023-12-21T09:44:37.202512"
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-01-17T18:10:13.467574"
},
- "fastp test_fastp_interleaved_json": {
+ "versions_interleaved": {
+ "content": [
+ [
+ "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-02-01T11:56:24.615634793"
+ },
+ "test_fastp_single_end_json": {
"content": [
[
[
@@ -22,13 +57,64 @@
"id": "test",
"single_end": true
},
- "test.fastp.json:md5,168f516f7bd4b7b6c32da7cba87299a4"
+ "test.fastp.json:md5,c852d7a6dba5819e4ac8d9673bedcacc"
]
]
],
- "timestamp": "2023-10-17T11:04:45.794175881"
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-03-18T16:18:43.526412"
},
- "test_fastp_paired_end_merged-for_stub_match": {
+ "versions_paired_end": {
+ "content": [
+ [
+ "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-02-01T11:55:42.333545689"
+ },
+ "test_fastp_paired_end_match": {
+ "content": [
+ [
+ [
+ "test_1.fastp.fastq.gz",
+ "test_2.fastp.fastq.gz"
+ ],
+ "test.fastp.html",
+ "test.fastp.json",
+ "test.fastp.log",
+ "{id=test, single_end=false}"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-02-01T12:03:06.431833729"
+ },
+ "test_fastp_interleaved-_match": {
+ "content": [
+ [
+ "test.fastp.fastq.gz",
+ "test.fastp.html",
+ "test.fastp.json",
+ "test.fastp.log",
+ "{id=test, single_end=true}"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-03-18T16:19:15.111894"
+ },
+ "test_fastp_paired_end_merged_match": {
"content": [
[
[
@@ -42,29 +128,102 @@
"{id=test, single_end=false}"
]
],
- "timestamp": "2023-12-21T09:53:45.237014"
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-02-01T12:08:44.496251446"
},
- "test_fastp_single_end_json": {
+ "versions_single_end_stub": {
+ "content": [
+ [
+ "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-02-01T11:55:27.354051299"
+ },
+ "versions_interleaved-stub": {
+ "content": [
+ [
+ "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-02-01T11:56:46.535528418"
+ },
+ "versions_single_end_trim_fail": {
+ "content": [
+ [
+ "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-02-01T11:59:03.724591407"
+ },
+ "test_fastp_paired_end-for_stub_match": {
"content": [
[
[
- {
- "id": "test",
- "single_end": true
- },
- "test.fastp.json:md5,c852d7a6dba5819e4ac8d9673bedcacc"
- ]
+ "test_1.fastp.fastq.gz",
+ "test_2.fastp.fastq.gz"
+ ],
+ "test.fastp.html",
+ "test.fastp.json",
+ "test.fastp.log",
+ "{id=test, single_end=false}"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-01-17T18:07:15.398827"
+ },
+ "versions_paired_end-stub": {
+ "content": [
+ [
+ "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02"
]
],
- "timestamp": "2023-10-17T11:04:10.566343705"
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-02-01T11:56:06.50017282"
},
- "versions": {
+ "versions_single_end": {
"content": [
[
"versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02"
]
],
- "timestamp": "2023-10-17T11:04:10.582076024"
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-02-01T11:55:07.67921647"
+ },
+ "versions_paired_end_merged_stub": {
+ "content": [
+ [
+ "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-02-01T11:59:47.350653154"
},
"test_fastp_interleaved-for_stub_match": {
"content": [
@@ -76,7 +235,23 @@
"{id=test, single_end=true}"
]
],
- "timestamp": "2023-12-21T09:48:43.148485"
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-01-17T18:08:06.127974"
+ },
+ "versions_paired_end_trim_fail": {
+ "content": [
+ [
+ "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-02-01T11:59:18.140484878"
},
"test_fastp_single_end-for_stub_match": {
"content": [
@@ -88,7 +263,51 @@
"{id=test, single_end=true}"
]
],
- "timestamp": "2023-12-21T09:20:07.254788"
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-01-17T18:06:00.244202"
+ },
+ "test_fastp_single_end-_match": {
+ "content": [
+ [
+ "test.fastp.fastq.gz",
+ "test.fastp.html",
+ "test.fastp.json",
+ "test.fastp.log",
+ "{id=test, single_end=true}"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-03-18T16:18:43.580336"
+ },
+ "versions_paired_end_merged_adapterlist": {
+ "content": [
+ [
+ "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-02-01T12:05:37.845370554"
+ },
+ "versions_paired_end_merged": {
+ "content": [
+ [
+ "versions.yml:md5,48ffc994212fb1fc9f83a74fa69c9f02"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-02-01T11:59:32.860543858"
},
"test_fastp_single_end_trim_fail_json": {
"content": [
@@ -102,6 +321,10 @@
]
]
],
- "timestamp": "2023-10-17T11:05:00.379878948"
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-01-17T18:08:41.942317"
}
}
\ No newline at end of file
diff --git a/modules/nf-core/fastp/tests/nextflow.interleaved.config b/modules/nf-core/fastp/tests/nextflow.interleaved.config
new file mode 100644
index 0000000..4be8dbd
--- /dev/null
+++ b/modules/nf-core/fastp/tests/nextflow.interleaved.config
@@ -0,0 +1,5 @@
+process {
+ withName: FASTP {
+ ext.args = "--interleaved_in -e 30"
+ }
+}
diff --git a/modules/nf-core/fastp/tests/nextflow.config b/modules/nf-core/fastp/tests/nextflow.save_failed.config
similarity index 50%
rename from modules/nf-core/fastp/tests/nextflow.config
rename to modules/nf-core/fastp/tests/nextflow.save_failed.config
index 0f7849a..53b61b0 100644
--- a/modules/nf-core/fastp/tests/nextflow.config
+++ b/modules/nf-core/fastp/tests/nextflow.save_failed.config
@@ -1,6 +1,5 @@
process {
-
withName: FASTP {
- ext.args = "--interleaved_in"
+ ext.args = "-e 30"
}
}
diff --git a/modules/nf-core/fastqc/main.nf b/modules/nf-core/fastqc/main.nf
index 9e19a74..1fd7ac4 100644
--- a/modules/nf-core/fastqc/main.nf
+++ b/modules/nf-core/fastqc/main.nf
@@ -25,6 +25,11 @@ process FASTQC {
def old_new_pairs = reads instanceof Path || reads.size() == 1 ? [[ reads, "${prefix}.${reads.extension}" ]] : reads.withIndex().collect { entry, index -> [ entry, "${prefix}_${index + 1}.${entry.extension}" ] }
def rename_to = old_new_pairs*.join(' ').join(' ')
def renamed_files = old_new_pairs.collect{ old_name, new_name -> new_name }.join(' ')
+
+ def memory_in_mb = MemoryUnit.of("${task.memory}").toUnit('MB')
+ // FastQC memory value allowed range (100 - 10000)
+ def fastqc_memory = memory_in_mb > 10000 ? 10000 : (memory_in_mb < 100 ? 100 : memory_in_mb)
+
"""
printf "%s %s\\n" $rename_to | while read old_name new_name; do
[ -f "\${new_name}" ] || ln -s \$old_name \$new_name
diff --git a/modules/nf-core/fastqc/tests/main.nf.test b/modules/nf-core/fastqc/tests/main.nf.test
index ad9bc54..70edae4 100644
--- a/modules/nf-core/fastqc/tests/main.nf.test
+++ b/modules/nf-core/fastqc/tests/main.nf.test
@@ -13,12 +13,10 @@ nextflow_process {
when {
process {
"""
- input[0] = [
- [ id: 'test', single_end:true ],
- [
- file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true)
- ]
- ]
+ input[0] = Channel.of([
+ [ id: 'test', single_end:true ],
+ [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ]
+ ])
"""
}
}
@@ -35,7 +33,7 @@ nextflow_process {
{ assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" },
{ assert path(process.out.html[0][1]).text.contains("File type | Conventional base calls | ") },
- { assert snapshot(process.out.versions).match("versions") }
+ { assert snapshot(process.out.versions).match("fastqc_versions_single") }
)
}
}
@@ -44,15 +42,13 @@ nextflow_process {
when {
process {
- """
- input[0] = [
- [id: 'test', single_end: false], // meta map
- [
- file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
- file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true)
- ]
- ]
- """
+ """
+ input[0] = Channel.of([
+ [id: 'test', single_end: false], // meta map
+ [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true) ]
+ ])
+ """
}
}
@@ -67,7 +63,7 @@ nextflow_process {
{ assert path(process.out.html[0][1][0]).text.contains("File type | Conventional base calls | ") },
{ assert path(process.out.html[0][1][1]).text.contains("File type | Conventional base calls | ") },
- { assert snapshot(process.out.versions).match("versions") }
+ { assert snapshot(process.out.versions).match("fastqc_versions_paired") }
)
}
}
@@ -76,11 +72,11 @@ nextflow_process {
when {
process {
- """
- input[0] = [
- [id: 'test', single_end: false], // meta map
- file(params.test_data['sarscov2']['illumina']['test_interleaved_fastq_gz'], checkIfExists: true)
- ]
+ """
+ input[0] = Channel.of([
+ [id: 'test', single_end: false], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_interleaved.fastq.gz', checkIfExists: true)
+ ])
"""
}
}
@@ -93,7 +89,7 @@ nextflow_process {
{ assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" },
{ assert path(process.out.html[0][1]).text.contains("File type | Conventional base calls | ") },
- { assert snapshot(process.out.versions).match("versions") }
+ { assert snapshot(process.out.versions).match("fastqc_versions_interleaved") }
)
}
}
@@ -102,12 +98,12 @@ nextflow_process {
when {
process {
- """
- input[0] = [
- [id: 'test', single_end: false], // meta map
- file(params.test_data['sarscov2']['illumina']['test_paired_end_sorted_bam'], checkIfExists: true)
- ]
- """
+ """
+ input[0] = Channel.of([
+ [id: 'test', single_end: false], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.sorted.bam', checkIfExists: true)
+ ])
+ """
}
}
@@ -119,7 +115,7 @@ nextflow_process {
{ assert process.out.zip[0][1] ==~ ".*/test_fastqc.zip" },
{ assert path(process.out.html[0][1]).text.contains("File type | Conventional base calls | ") },
- { assert snapshot(process.out.versions).match("versions") }
+ { assert snapshot(process.out.versions).match("fastqc_versions_bam") }
)
}
}
@@ -128,17 +124,15 @@ nextflow_process {
when {
process {
- """
- input[0] = [
- [id: 'test', single_end: false], // meta map
- [
- file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
- file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true),
- file(params.test_data['sarscov2']['illumina']['test2_1_fastq_gz'], checkIfExists: true),
- file(params.test_data['sarscov2']['illumina']['test2_2_fastq_gz'], checkIfExists: true)
- ]
- ]
- """
+ """
+ input[0] = Channel.of([
+ [id: 'test', single_end: false], // meta map
+ [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_1.fastq.gz', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test2_2.fastq.gz', checkIfExists: true) ]
+ ])
+ """
}
}
@@ -159,7 +153,7 @@ nextflow_process {
{ assert path(process.out.html[0][1][2]).text.contains("File type | Conventional base calls | ") },
{ assert path(process.out.html[0][1][3]).text.contains("File type | Conventional base calls | ") },
- { assert snapshot(process.out.versions).match("versions") }
+ { assert snapshot(process.out.versions).match("fastqc_versions_multiple") }
)
}
}
@@ -168,12 +162,12 @@ nextflow_process {
when {
process {
- """
- input[0] = [
- [ id:'mysample', single_end:true ], // meta map
- file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true)
- ]
- """
+ """
+ input[0] = Channel.of([
+ [ id:'mysample', single_end:true ], // meta map
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true)
+ ])
+ """
}
}
@@ -185,7 +179,7 @@ nextflow_process {
{ assert process.out.zip[0][1] ==~ ".*/mysample_fastqc.zip" },
{ assert path(process.out.html[0][1]).text.contains("File type | Conventional base calls | ") },
- { assert snapshot(process.out.versions).match("versions") }
+ { assert snapshot(process.out.versions).match("fastqc_versions_custom_prefix") }
)
}
}
@@ -197,12 +191,10 @@ nextflow_process {
when {
process {
"""
- input[0] = [
- [ id: 'test', single_end:true ],
- [
- file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true)
- ]
- ]
+ input[0] = Channel.of([
+ [ id: 'test', single_end:true ],
+ [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true) ]
+ ])
"""
}
}
@@ -212,7 +204,7 @@ nextflow_process {
{ assert process.success },
{ assert snapshot(process.out.html.collect { file(it[1]).getName() } +
process.out.zip.collect { file(it[1]).getName() } +
- process.out.versions ).match() }
+ process.out.versions ).match("fastqc_stub") }
)
}
}
diff --git a/modules/nf-core/fastqc/tests/main.nf.test.snap b/modules/nf-core/fastqc/tests/main.nf.test.snap
index 5ef5afb..86f7c31 100644
--- a/modules/nf-core/fastqc/tests/main.nf.test.snap
+++ b/modules/nf-core/fastqc/tests/main.nf.test.snap
@@ -1,5 +1,17 @@
{
- "sarscov2 single-end [fastq] - stub": {
+ "fastqc_versions_interleaved": {
+ "content": [
+ [
+ "versions.yml:md5,e1cc25ca8af856014824abd842e93978"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-01-31T17:40:07.293713"
+ },
+ "fastqc_stub": {
"content": [
[
"test.html",
@@ -7,14 +19,70 @@
"versions.yml:md5,e1cc25ca8af856014824abd842e93978"
]
],
- "timestamp": "2023-12-29T02:48:05.126117287"
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-01-31T17:31:01.425198"
+ },
+ "fastqc_versions_multiple": {
+ "content": [
+ [
+ "versions.yml:md5,e1cc25ca8af856014824abd842e93978"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-01-31T17:40:55.797907"
+ },
+ "fastqc_versions_bam": {
+ "content": [
+ [
+ "versions.yml:md5,e1cc25ca8af856014824abd842e93978"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-01-31T17:40:26.795862"
+ },
+ "fastqc_versions_single": {
+ "content": [
+ [
+ "versions.yml:md5,e1cc25ca8af856014824abd842e93978"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-01-31T17:39:27.043675"
+ },
+ "fastqc_versions_paired": {
+ "content": [
+ [
+ "versions.yml:md5,e1cc25ca8af856014824abd842e93978"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-01-31T17:39:47.584191"
},
- "versions": {
+ "fastqc_versions_custom_prefix": {
"content": [
[
"versions.yml:md5,e1cc25ca8af856014824abd842e93978"
]
],
- "timestamp": "2023-12-29T02:46:49.507942667"
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-01-31T17:41:14.576531"
}
}
\ No newline at end of file
diff --git a/modules/nf-core/gffcompare/environment.yml b/modules/nf-core/gffcompare/environment.yml
new file mode 100644
index 0000000..bcd633e
--- /dev/null
+++ b/modules/nf-core/gffcompare/environment.yml
@@ -0,0 +1,7 @@
+name: gffcompare
+channels:
+ - conda-forge
+ - bioconda
+ - defaults
+dependencies:
+ - bioconda::gffcompare=0.12.6
diff --git a/modules/nf-core/gffcompare/main.nf b/modules/nf-core/gffcompare/main.nf
new file mode 100644
index 0000000..edca0f2
--- /dev/null
+++ b/modules/nf-core/gffcompare/main.nf
@@ -0,0 +1,63 @@
+process GFFCOMPARE {
+ tag "$meta.id"
+ label 'process_single'
+
+ conda "${moduleDir}/environment.yml"
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'https://depot.galaxyproject.org/singularity/gffcompare:0.12.6--h9f5acd7_0' :
+ 'biocontainers/gffcompare:0.12.6--h9f5acd7_0' }"
+
+ input:
+ tuple val(meta), path(gtfs)
+ tuple val(meta2), path(fasta), path(fai)
+ tuple val(meta3), path(reference_gtf)
+
+ output:
+ tuple val(meta), path("*.annotated.gtf"), optional: true, emit: annotated_gtf
+ tuple val(meta), path("*.combined.gtf") , optional: true, emit: combined_gtf
+ tuple val(meta), path("*.tmap") , optional: true, emit: tmap
+ tuple val(meta), path("*.refmap") , optional: true, emit: refmap
+ tuple val(meta), path("*.loci") , emit: loci
+ tuple val(meta), path("*.stats") , emit: stats
+ tuple val(meta), path("*.tracking") , emit: tracking
+ path "versions.yml" , emit: versions
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ script:
+ def args = task.ext.args ?: ''
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ def ref_fasta = fasta ? "-s ${fasta}" : ''
+ def ref_gtf = reference_gtf ? "-r ${reference_gtf}" : ''
+ """
+ gffcompare \\
+ $args \\
+ $ref_fasta \\
+ $ref_gtf \\
+ -o $prefix \\
+ $gtfs
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ gffcompare: \$(echo \$(gffcompare --version 2>&1) | sed 's/^gffcompare v//')
+ END_VERSIONS
+ """
+
+ stub:
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ """
+ touch ${prefix}.annotated.gtf
+ touch ${prefix}.combined.gtf
+ touch ${prefix}.tmap
+ touch ${prefix}.refmap
+ touch ${prefix}.loci
+ touch ${prefix}.stats
+ touch ${prefix}.tracking
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ gffcompare: \$(echo \$(gffcompare --version 2>&1) | sed 's/^gffcompare v//')
+ END_VERSIONS
+ """
+}
diff --git a/modules/nf-core/gffcompare/meta.yml b/modules/nf-core/gffcompare/meta.yml
new file mode 100644
index 0000000..674f08c
--- /dev/null
+++ b/modules/nf-core/gffcompare/meta.yml
@@ -0,0 +1,91 @@
+name: "gffcompare"
+description: Compare, merge, annotate and estimate accuracy of generated gtf files
+keywords:
+ - transcripts
+ - gtf
+ - merge
+ - compare
+tools:
+ - "gffcompare":
+ description: "GffCompare by Geo Pertea"
+ homepage: "http://ccb.jhu.edu/software/stringtie/gffcompare.shtml"
+ documentation: "http://ccb.jhu.edu/software/stringtie/gffcompare.shtml"
+ tool_dev_url: "https://github.com/gpertea/gffcompare"
+ doi: "10.12688/f1000research.23297.1"
+ licence: ["MIT"]
+input:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing meta data
+ e.g. [ id:'test', single_end:false ]
+ - gtfs:
+ type: file
+ description: |
+ GTF/GFF files
+ e.g. [ 'file_1.gtf', 'file_2.gtf' ]
+ pattern: "*.{gtf,gff}"
+ - fasta:
+ type: file
+ description: Genome reference fasta file (optional)
+ pattern: "*.{fasta,fa}"
+ - fai:
+ type: file
+ description: Index for fasta file
+ pattern: "*.fai"
+ - reference_gtf:
+ type: file
+ description: Reference annotation in gtf/gff format (optional)
+ pattern: "*.{gtf,gff}"
+output:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing meta data
+ e.g. [ id:'test', single_end:false ]
+ - annotated_gtf:
+ type: file
+ description: |
+ Annotated gtf file when reference gtf is provided (optional)
+ pattern: "*.annotated.gtf"
+ - combined_gtf:
+ type: file
+ description: |
+ Combined gtf file when multiple input files are
+ provided (optional)
+ pattern: "*.annotated.gtf"
+ - tmap:
+ type: file
+ description: |
+ File listing the most closely matching reference transcript
+ for each query transcript (optional)
+ pattern: "*.tmap"
+ - refmap:
+ type: file
+ description: |
+ File listing the reference transcripts with overlapping
+ query transcripts (optional)
+ pattern: "*.refmap"
+ - loci:
+ type: file
+ description: File with loci
+ pattern: "*.loci"
+ - stats:
+ type: file
+ description: |
+ File with stats for input transcripts as compared to
+ reference alternatively stats for the combined gtf
+ pattern: "*.stats"
+ - tracking:
+ type: file
+ description: |
+ This file matches transcripts up between samples
+ pattern: "*.tracking"
+ - versions:
+ type: file
+ description: File containing software versions
+ pattern: "versions.yml"
+authors:
+ - "@jemten"
+maintainers:
+ - "@jemten"
diff --git a/modules/nf-core/gffread/environment.yml b/modules/nf-core/gffread/environment.yml
index 5398f71..c6df58a 100644
--- a/modules/nf-core/gffread/environment.yml
+++ b/modules/nf-core/gffread/environment.yml
@@ -4,4 +4,4 @@ channels:
- bioconda
- defaults
dependencies:
- - bioconda::gffread=0.12.1
+ - bioconda::gffread=0.12.7
diff --git a/modules/nf-core/gffread/gffread.diff b/modules/nf-core/gffread/gffread.diff
new file mode 100644
index 0000000..fa3668c
--- /dev/null
+++ b/modules/nf-core/gffread/gffread.diff
@@ -0,0 +1,675 @@
+Changes in module 'nf-core/gffread'
+--- modules/nf-core/gffread/environment.yml
++++ modules/nf-core/gffread/environment.yml
+@@ -4,4 +4,4 @@
+ - bioconda
+ - defaults
+ dependencies:
+- - bioconda::gffread=0.12.1
++ - bioconda::gffread=0.12.7
+
+--- modules/nf-core/gffread/meta.yml
++++ modules/nf-core/gffread/meta.yml
+@@ -13,11 +13,25 @@
+ doi: 10.12688/f1000research.23297.1
+ licence: ["MIT"]
+ input:
++ - meta:
++ type: map
++ description: |
++ Groovy Map containing meta data
++ e.g. [ id:'test' ]
+ - gff:
+ type: file
+ description: A reference file in either the GFF3, GFF2 or GTF format.
+ pattern: "*.{gff, gtf}"
++ - fasta:
++ type: file
++ description: A multi-fasta file with the genomic sequences
++ pattern: "*.{fasta,fa,faa,fas,fsa}"
+ output:
++ - meta:
++ type: map
++ description: |
++ Groovy Map containing meta data
++ e.g. [ id:'test' ]
+ - gtf:
+ type: file
+ description: GTF file resulting from the conversion of the GFF input file if '-T' argument is present
+@@ -25,7 +39,11 @@
+ - gffread_gff:
+ type: file
+ description: GFF3 file resulting from the conversion of the GFF input file if '-T' argument is absent
+- pattern: "*.{gff3}"
++ pattern: "*.gff3"
++ - gffread_fasta:
++ type: file
++ description: Fasta file produced when either of '-w', '-x', '-y' parameters is present
++ pattern: "*.fasta"
+ - versions:
+ type: file
+ description: File containing software versions
+@@ -34,3 +52,4 @@
+ - "@edmundmiller"
+ maintainers:
+ - "@edmundmiller"
++ - "@gallvp"
+
+--- modules/nf-core/gffread/main.nf
++++ modules/nf-core/gffread/main.nf
+@@ -1,32 +1,59 @@
+ process GFFREAD {
+- tag "$gff"
++ tag "$meta.id"
+ label 'process_low'
+
+ conda "${moduleDir}/environment.yml"
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+- 'https://depot.galaxyproject.org/singularity/gffread:0.12.1--h8b12597_0' :
+- 'biocontainers/gffread:0.12.1--h8b12597_0' }"
++ 'https://depot.galaxyproject.org/singularity/gffread:0.12.7--hdcf5f25_4' :
++ 'biocontainers/gffread:0.12.7--hdcf5f25_4' }"
+
+ input:
+- path gff
++ tuple val(meta), path(gff)
++ path fasta
+
+ output:
+- path "*.gtf" , emit: gtf , optional: true
+- path "*.gff3" , emit: gffread_gff , optional: true
+- path "versions.yml" , emit: versions
++ tuple val(meta), path("*.gtf") , emit: gtf , optional: true
++ tuple val(meta), path("*.gff3") , emit: gffread_gff , optional: true
++ tuple val(meta), path("*.fasta"), emit: gffread_fasta , optional: true
++ path "versions.yml" , emit: versions
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ script:
+- def args = task.ext.args ?: ''
+- def prefix = task.ext.prefix ?: "${gff.baseName}"
+- def extension = args.contains("-T") ? 'gtf' : 'gffread.gff3'
++ def args = task.ext.args ?: ''
++ def prefix = task.ext.prefix ?: "${meta.id}"
++ def extension = args.contains("-T") ? 'gtf' : ( ( ['-w', '-x', '-y' ].any { args.contains(it) } ) ? 'fasta' : 'gff3' )
++ def fasta_arg = fasta ? "-g $fasta" : ''
++ def output_name = "${prefix}.${extension}"
++ def output = extension == "fasta" ? "$output_name" : "-o $output_name"
++ def args_sorted = args.replaceAll(/(.*)(-[wxy])(.*)/) { all, pre, param, post -> "$pre $post $param" }.trim()
++ if ( "$output_name" in [ "$gff", "$fasta" ] ) error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!"
+ """
+ gffread \\
+ $gff \\
+- $args \\
+- -o ${prefix}.${extension}
++ $fasta_arg \\
++ $args_sorted \\
++ $output
++
++ cat <<-END_VERSIONS > versions.yml
++ "${task.process}":
++ gffread: \$(gffread --version 2>&1)
++ END_VERSIONS
++ """
++
++ stub:
++ def args = task.ext.args ?: ''
++ def prefix = task.ext.prefix ?: "${meta.id}"
++ def extension = args.contains("-T") ? 'gtf' : ( ( ['-w', '-x', '-y' ].any { args.contains(it) } ) ? 'fasta' : 'gff3' )
++ def fasta_arg = fasta ? "-g $fasta" : ''
++ def output_name = "${prefix}.${extension}"
++ def output = extension == "fasta" ? "$output_name" : "-o $output_name"
++ def args_sorted = args.replaceAll(/(.*)(-[wxy])(.*)/) { all, pre, param, post -> "$pre $post $param" }.trim()
++ if ( "$output_name" in [ "$gff", "$fasta" ] ) error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!"
++ """
++ touch $output_name
++
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ gffread: \$(gffread --version 2>&1)
+
+--- modules/nf-core/gffread/tests/main.nf.test.snap
++++ modules/nf-core/gffread/tests/main.nf.test.snap
+@@ -1,24 +1,272 @@
+ {
+ "sarscov2-gff3-gtf": {
+ "content": [
+- [
+- "genome.gtf:md5,2394072d7d31530dfd590c4a117bf6e3"
+- ],
+- [
+- "versions.yml:md5,a71b6cdfa528dd206a238ec64bae13d6"
+- ]
+- ],
+- "timestamp": "2024-01-23T20:00:32.688779117"
++ {
++ "0": [
++ [
++ {
++ "id": "test"
++ },
++ "test.gtf:md5,1ea0ae98d3388e0576407dc4a24ef428"
++ ]
++ ],
++ "1": [
++
++ ],
++ "2": [
++
++ ],
++ "3": [
++ "versions.yml:md5,05f671c6c6e530acedad0af0a5948dbd"
++ ],
++ "gffread_fasta": [
++
++ ],
++ "gffread_gff": [
++
++ ],
++ "gtf": [
++ [
++ {
++ "id": "test"
++ },
++ "test.gtf:md5,1ea0ae98d3388e0576407dc4a24ef428"
++ ]
++ ],
++ "versions": [
++ "versions.yml:md5,05f671c6c6e530acedad0af0a5948dbd"
++ ]
++ }
++ ],
++ "meta": {
++ "nf-test": "0.8.4",
++ "nextflow": "23.10.1"
++ },
++ "timestamp": "2024-04-09T10:48:56.496187"
+ },
+ "sarscov2-gff3-gff3": {
+ "content": [
+- [
+- "genome.gffread.gff3:md5,a7d40d99dcddac23ac673c473279ea2d"
+- ],
+- [
+- "versions.yml:md5,a71b6cdfa528dd206a238ec64bae13d6"
+- ]
+- ],
+- "timestamp": "2024-01-23T20:07:11.457356625"
++ {
++ "0": [
++
++ ],
++ "1": [
++ [
++ {
++ "id": "test"
++ },
++ "test.gff3:md5,c4e5da6267c6bee5899a2c204ae1ad91"
++ ]
++ ],
++ "2": [
++
++ ],
++ "3": [
++ "versions.yml:md5,05f671c6c6e530acedad0af0a5948dbd"
++ ],
++ "gffread_fasta": [
++
++ ],
++ "gffread_gff": [
++ [
++ {
++ "id": "test"
++ },
++ "test.gff3:md5,c4e5da6267c6bee5899a2c204ae1ad91"
++ ]
++ ],
++ "gtf": [
++
++ ],
++ "versions": [
++ "versions.yml:md5,05f671c6c6e530acedad0af0a5948dbd"
++ ]
++ }
++ ],
++ "meta": {
++ "nf-test": "0.8.4",
++ "nextflow": "23.10.1"
++ },
++ "timestamp": "2024-04-09T10:49:00.892782"
++ },
++ "sarscov2-gff3-gtf-stub": {
++ "content": [
++ {
++ "0": [
++ [
++ {
++ "id": "test"
++ },
++ "test.gtf:md5,d41d8cd98f00b204e9800998ecf8427e"
++ ]
++ ],
++ "1": [
++
++ ],
++ "2": [
++
++ ],
++ "3": [
++ "versions.yml:md5,05f671c6c6e530acedad0af0a5948dbd"
++ ],
++ "gffread_fasta": [
++
++ ],
++ "gffread_gff": [
++
++ ],
++ "gtf": [
++ [
++ {
++ "id": "test"
++ },
++ "test.gtf:md5,d41d8cd98f00b204e9800998ecf8427e"
++ ]
++ ],
++ "versions": [
++ "versions.yml:md5,05f671c6c6e530acedad0af0a5948dbd"
++ ]
++ }
++ ],
++ "meta": {
++ "nf-test": "0.8.4",
++ "nextflow": "23.10.1"
++ },
++ "timestamp": "2024-04-09T11:11:26.975666"
++ },
++ "sarscov2-gff3-fasta-stub": {
++ "content": [
++ {
++ "0": [
++
++ ],
++ "1": [
++
++ ],
++ "2": [
++ [
++ {
++ "id": "test"
++ },
++ "test.fasta:md5,d41d8cd98f00b204e9800998ecf8427e"
++ ]
++ ],
++ "3": [
++ "versions.yml:md5,05f671c6c6e530acedad0af0a5948dbd"
++ ],
++ "gffread_fasta": [
++ [
++ {
++ "id": "test"
++ },
++ "test.fasta:md5,d41d8cd98f00b204e9800998ecf8427e"
++ ]
++ ],
++ "gffread_gff": [
++
++ ],
++ "gtf": [
++
++ ],
++ "versions": [
++ "versions.yml:md5,05f671c6c6e530acedad0af0a5948dbd"
++ ]
++ }
++ ],
++ "meta": {
++ "nf-test": "0.8.4",
++ "nextflow": "23.10.1"
++ },
++ "timestamp": "2024-04-09T11:11:44.34792"
++ },
++ "sarscov2-gff3-gff3-stub": {
++ "content": [
++ {
++ "0": [
++
++ ],
++ "1": [
++ [
++ {
++ "id": "test"
++ },
++ "test.gff3:md5,d41d8cd98f00b204e9800998ecf8427e"
++ ]
++ ],
++ "2": [
++
++ ],
++ "3": [
++ "versions.yml:md5,05f671c6c6e530acedad0af0a5948dbd"
++ ],
++ "gffread_fasta": [
++
++ ],
++ "gffread_gff": [
++ [
++ {
++ "id": "test"
++ },
++ "test.gff3:md5,d41d8cd98f00b204e9800998ecf8427e"
++ ]
++ ],
++ "gtf": [
++
++ ],
++ "versions": [
++ "versions.yml:md5,05f671c6c6e530acedad0af0a5948dbd"
++ ]
++ }
++ ],
++ "meta": {
++ "nf-test": "0.8.4",
++ "nextflow": "23.10.1"
++ },
++ "timestamp": "2024-04-09T11:11:35.221671"
++ },
++ "sarscov2-gff3-fasta": {
++ "content": [
++ {
++ "0": [
++
++ ],
++ "1": [
++
++ ],
++ "2": [
++ [
++ {
++ "id": "test"
++ },
++ "test.fasta:md5,5f8108fb51739a0588ccf0a251de919a"
++ ]
++ ],
++ "3": [
++ "versions.yml:md5,05f671c6c6e530acedad0af0a5948dbd"
++ ],
++ "gffread_fasta": [
++ [
++ {
++ "id": "test"
++ },
++ "test.fasta:md5,5f8108fb51739a0588ccf0a251de919a"
++ ]
++ ],
++ "gffread_gff": [
++
++ ],
++ "gtf": [
++
++ ],
++ "versions": [
++ "versions.yml:md5,05f671c6c6e530acedad0af0a5948dbd"
++ ]
++ }
++ ],
++ "meta": {
++ "nf-test": "0.8.4",
++ "nextflow": "23.10.1"
++ },
++ "timestamp": "2024-04-09T10:54:02.88143"
+ }
+ }
+--- modules/nf-core/gffread/tests/main.nf.test
++++ modules/nf-core/gffread/tests/main.nf.test
+@@ -18,47 +18,203 @@
+ }
+ process {
+ """
+- input[0] = file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.gff3", checkIfExists: true)
+- """
+- }
+- }
+-
+- then {
+- assertAll (
+- { assert process.success },
+- { assert snapshot(
+- process.out.gtf,
+- process.out.versions
+- ).match() },
++ input[0] = [
++ [id: 'test'],
++ file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.gff3", checkIfExists: true)
++ ]
++ input[1] = []
++ """
++ }
++ }
++
++ then {
++ assertAll (
++ { assert process.success },
++ { assert snapshot(process.out).match() },
++ { assert process.out.gffread_gff == [] },
++ { assert process.out.gffread_fasta == [] }
++ )
++ }
++
++ }
++
++ test("sarscov2-gff3-gtf-stub") {
++
++ options '-stub'
++ config "./nextflow.config"
++
++ when {
++ params {
++ outdir = "$outputDir"
++ }
++ process {
++ """
++ input[0] = [
++ [id: 'test'],
++ file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.gff3", checkIfExists: true)
++ ]
++ input[1] = []
++ """
++ }
++ }
++
++ then {
++ assertAll (
++ { assert process.success },
++ { assert snapshot(process.out).match() },
++ { assert process.out.gffread_gff == [] },
++ { assert process.out.gffread_fasta == [] }
++ )
++ }
++
++ }
++
++ test("sarscov2-gff3-gff3") {
++
++ config "./nextflow-gff3.config"
++
++ when {
++ params {
++ outdir = "$outputDir"
++ }
++ process {
++ """
++ input[0] = [
++ [id: 'test'],
++ file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.gff3", checkIfExists: true)
++ ]
++ input[1] = []
++ """
++ }
++ }
++
++ then {
++ assertAll (
++ { assert process.success },
++ { assert snapshot(process.out).match() },
++ { assert process.out.gtf == [] },
++ { assert process.out.gffread_fasta == [] }
++ )
++ }
++
++ }
++
++ test("sarscov2-gff3-gff3-stub") {
++
++ options '-stub'
++ config "./nextflow-gff3.config"
++
++ when {
++ params {
++ outdir = "$outputDir"
++ }
++ process {
++ """
++ input[0] = [
++ [id: 'test'],
++ file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.gff3", checkIfExists: true)
++ ]
++ input[1] = []
++ """
++ }
++ }
++
++ then {
++ assertAll (
++ { assert process.success },
++ { assert snapshot(process.out).match() },
++ { assert process.out.gtf == [] },
++ { assert process.out.gffread_fasta == [] }
++ )
++ }
++
++ }
++
++ test("sarscov2-gff3-fasta") {
++
++ config "./nextflow-fasta.config"
++
++ when {
++ params {
++ outdir = "$outputDir"
++ }
++ process {
++ """
++ input[0] = [
++ [id: 'test'],
++ file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.gff3", checkIfExists: true)
++ ]
++ input[1] = file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.fasta", checkIfExists: true)
++ """
++ }
++ }
++
++ then {
++ assertAll (
++ { assert process.success },
++ { assert snapshot(process.out).match() },
++ { assert process.out.gtf == [] },
+ { assert process.out.gffread_gff == [] }
+ )
+ }
+
+ }
+
+- test("sarscov2-gff3-gff3") {
+-
+- config "./nextflow-gff3.config"
+-
+- when {
+- params {
+- outdir = "$outputDir"
+- }
+- process {
+- """
+- input[0] = file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.gff3", checkIfExists: true)
+- """
+- }
+- }
+-
+- then {
+- assertAll (
+- { assert process.success },
+- { assert snapshot(
+- process.out.gffread_gff,
+- process.out.versions
+- ).match() },
+- { assert process.out.gtf == [] },
++ test("sarscov2-gff3-fasta-stub") {
++
++ options '-stub'
++ config "./nextflow-fasta.config"
++
++ when {
++ params {
++ outdir = "$outputDir"
++ }
++ process {
++ """
++ input[0] = [
++ [id: 'test'],
++ file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.gff3", checkIfExists: true)
++ ]
++ input[1] = file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.fasta", checkIfExists: true)
++ """
++ }
++ }
++
++ then {
++ assertAll (
++ { assert process.success },
++ { assert snapshot(process.out).match() },
++ { assert process.out.gtf == [] },
++ { assert process.out.gffread_gff == [] }
++ )
++ }
++
++ }
++
++ test("sarscov2-gff3-fasta-fail-catch") {
++
++ options '-stub'
++ config "./nextflow-fasta.config"
++
++ when {
++ params {
++ outdir = "$outputDir"
++ }
++ process {
++ """
++ input[0] = [
++ [id: 'genome'],
++ file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.gff3", checkIfExists: true)
++ ]
++ input[1] = file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.fasta", checkIfExists: true)
++ """
++ }
++ }
++
++ then {
++ assertAll (
++ { assert ! process.success },
++ { assert process.stdout.toString().contains("Input and output names are the same") }
+ )
+ }
+
+
+--- /dev/null
++++ modules/nf-core/gffread/tests/nextflow-fasta.config
+@@ -0,0 +1,5 @@
++process {
++ withName: GFFREAD {
++ ext.args = '-w -S'
++ }
++}
+
+************************************************************
diff --git a/modules/nf-core/gffread/main.nf b/modules/nf-core/gffread/main.nf
index d8a473e..cfd3e2f 100644
--- a/modules/nf-core/gffread/main.nf
+++ b/modules/nf-core/gffread/main.nf
@@ -1,32 +1,59 @@
process GFFREAD {
- tag "$gff"
+ tag "$meta.id"
label 'process_low'
conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
- 'https://depot.galaxyproject.org/singularity/gffread:0.12.1--h8b12597_0' :
- 'biocontainers/gffread:0.12.1--h8b12597_0' }"
+ 'https://depot.galaxyproject.org/singularity/gffread:0.12.7--hdcf5f25_4' :
+ 'biocontainers/gffread:0.12.7--hdcf5f25_4' }"
input:
- path gff
+ tuple val(meta), path(gff)
+ path fasta
output:
- path "*.gtf" , emit: gtf , optional: true
- path "*.gff3" , emit: gffread_gff , optional: true
- path "versions.yml" , emit: versions
+ tuple val(meta), path("*.gtf") , emit: gtf , optional: true
+ tuple val(meta), path("*.gff3") , emit: gffread_gff , optional: true
+ tuple val(meta), path("*.fasta"), emit: gffread_fasta , optional: true
+ path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
- def args = task.ext.args ?: ''
- def prefix = task.ext.prefix ?: "${gff.baseName}"
- def extension = args.contains("-T") ? 'gtf' : 'gffread.gff3'
+ def args = task.ext.args ?: ''
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ def extension = args.contains("-T") ? 'gtf' : ( ( ['-w', '-x', '-y' ].any { args.contains(it) } ) ? 'fasta' : 'gff3' )
+ def fasta_arg = fasta ? "-g $fasta" : ''
+ def output_name = "${prefix}.${extension}"
+ def output = extension == "fasta" ? "$output_name" : "-o $output_name"
+ def args_sorted = args.replaceAll(/(.*)(-[wxy])(.*)/) { all, pre, param, post -> "$pre $post $param" }.trim()
+ if ( "$output_name" in [ "$gff", "$fasta" ] ) error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!"
"""
gffread \\
$gff \\
- $args \\
- -o ${prefix}.${extension}
+ $fasta_arg \\
+ $args_sorted \\
+ $output
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ gffread: \$(gffread --version 2>&1)
+ END_VERSIONS
+ """
+
+ stub:
+ def args = task.ext.args ?: ''
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ def extension = args.contains("-T") ? 'gtf' : ( ( ['-w', '-x', '-y' ].any { args.contains(it) } ) ? 'fasta' : 'gff3' )
+ def fasta_arg = fasta ? "-g $fasta" : ''
+ def output_name = "${prefix}.${extension}"
+ def output = extension == "fasta" ? "$output_name" : "-o $output_name"
+ def args_sorted = args.replaceAll(/(.*)(-[wxy])(.*)/) { all, pre, param, post -> "$pre $post $param" }.trim()
+ if ( "$output_name" in [ "$gff", "$fasta" ] ) error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!"
+ """
+ touch $output_name
+
cat <<-END_VERSIONS > versions.yml
"${task.process}":
gffread: \$(gffread --version 2>&1)
diff --git a/modules/nf-core/gffread/meta.yml b/modules/nf-core/gffread/meta.yml
index 27ac310..c060282 100644
--- a/modules/nf-core/gffread/meta.yml
+++ b/modules/nf-core/gffread/meta.yml
@@ -13,11 +13,25 @@ tools:
doi: 10.12688/f1000research.23297.1
licence: ["MIT"]
input:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing meta data
+ e.g. [ id:'test' ]
- gff:
type: file
description: A reference file in either the GFF3, GFF2 or GTF format.
pattern: "*.{gff, gtf}"
+ - fasta:
+ type: file
+ description: A multi-fasta file with the genomic sequences
+ pattern: "*.{fasta,fa,faa,fas,fsa}"
output:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing meta data
+ e.g. [ id:'test' ]
- gtf:
type: file
description: GTF file resulting from the conversion of the GFF input file if '-T' argument is present
@@ -25,12 +39,17 @@ output:
- gffread_gff:
type: file
description: GFF3 file resulting from the conversion of the GFF input file if '-T' argument is absent
- pattern: "*.{gff3}"
+ pattern: "*.gff3"
+ - gffread_fasta:
+ type: file
+ description: Fasta file produced when either of '-w', '-x', '-y' parameters is present
+ pattern: "*.fasta"
- versions:
type: file
description: File containing software versions
pattern: "versions.yml"
authors:
- - "@emiller88"
+ - "@edmundmiller"
maintainers:
- - "@emiller88"
+ - "@edmundmiller"
+ - "@gallvp"
diff --git a/modules/nf-core/gffread/tests/main.nf.test b/modules/nf-core/gffread/tests/main.nf.test
index 3c064b3..4cd13dc 100644
--- a/modules/nf-core/gffread/tests/main.nf.test
+++ b/modules/nf-core/gffread/tests/main.nf.test
@@ -18,17 +18,52 @@ nextflow_process {
}
process {
"""
- input[0] = file(params.test_data['sarscov2']['genome']['genome_gff3'], checkIfExists: true)
+ input[0] = [
+ [id: 'test'],
+ file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.gff3", checkIfExists: true)
+ ]
+ input[1] = []
"""
}
}
then {
assertAll (
- { assert process.success },
- { assert snapshot(process.out).match() },
- { assert process.out.gtf != null },
- { assert process.out.gffread_gff == [] }
+ { assert process.success },
+ { assert snapshot(process.out).match() },
+ { assert process.out.gffread_gff == [] },
+ { assert process.out.gffread_fasta == [] }
+ )
+ }
+
+ }
+
+ test("sarscov2-gff3-gtf-stub") {
+
+ options '-stub'
+ config "./nextflow.config"
+
+ when {
+ params {
+ outdir = "$outputDir"
+ }
+ process {
+ """
+ input[0] = [
+ [id: 'test'],
+ file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.gff3", checkIfExists: true)
+ ]
+ input[1] = []
+ """
+ }
+ }
+
+ then {
+ assertAll (
+ { assert process.success },
+ { assert snapshot(process.out).match() },
+ { assert process.out.gffread_gff == [] },
+ { assert process.out.gffread_fasta == [] }
)
}
@@ -36,23 +71,150 @@ nextflow_process {
test("sarscov2-gff3-gff3") {
+ config "./nextflow-gff3.config"
+
+ when {
+ params {
+ outdir = "$outputDir"
+ }
+ process {
+ """
+ input[0] = [
+ [id: 'test'],
+ file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.gff3", checkIfExists: true)
+ ]
+ input[1] = []
+ """
+ }
+ }
+
+ then {
+ assertAll (
+ { assert process.success },
+ { assert snapshot(process.out).match() },
+ { assert process.out.gtf == [] },
+ { assert process.out.gffread_fasta == [] }
+ )
+ }
+
+ }
+
+ test("sarscov2-gff3-gff3-stub") {
+
+ options '-stub'
+ config "./nextflow-gff3.config"
+
+ when {
+ params {
+ outdir = "$outputDir"
+ }
+ process {
+ """
+ input[0] = [
+ [id: 'test'],
+ file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.gff3", checkIfExists: true)
+ ]
+ input[1] = []
+ """
+ }
+ }
+
+ then {
+ assertAll (
+ { assert process.success },
+ { assert snapshot(process.out).match() },
+ { assert process.out.gtf == [] },
+ { assert process.out.gffread_fasta == [] }
+ )
+ }
+
+ }
+
+ test("sarscov2-gff3-fasta") {
+
+ config "./nextflow-fasta.config"
+
+ when {
+ params {
+ outdir = "$outputDir"
+ }
+ process {
+ """
+ input[0] = [
+ [id: 'test'],
+ file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.gff3", checkIfExists: true)
+ ]
+ input[1] = file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.fasta", checkIfExists: true)
+ """
+ }
+ }
+
+ then {
+ assertAll (
+ { assert process.success },
+ { assert snapshot(process.out).match() },
+ { assert process.out.gtf == [] },
+ { assert process.out.gffread_gff == [] }
+ )
+ }
+
+ }
+
+ test("sarscov2-gff3-fasta-stub") {
+
+ options '-stub'
+ config "./nextflow-fasta.config"
+
+ when {
+ params {
+ outdir = "$outputDir"
+ }
+ process {
+ """
+ input[0] = [
+ [id: 'test'],
+ file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.gff3", checkIfExists: true)
+ ]
+ input[1] = file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.fasta", checkIfExists: true)
+ """
+ }
+ }
+
+ then {
+ assertAll (
+ { assert process.success },
+ { assert snapshot(process.out).match() },
+ { assert process.out.gtf == [] },
+ { assert process.out.gffread_gff == [] }
+ )
+ }
+
+ }
+
+ test("sarscov2-gff3-fasta-fail-catch") {
+
+ options '-stub'
+ config "./nextflow-fasta.config"
+
when {
params {
outdir = "$outputDir"
}
process {
"""
- input[0] = file(params.test_data['sarscov2']['genome']['genome_gff3'], checkIfExists: true)
+ input[0] = [
+ [id: 'genome'],
+ file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.gff3", checkIfExists: true)
+ ]
+ input[1] = file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.fasta", checkIfExists: true)
"""
}
}
then {
assertAll (
- { assert process.success },
- { assert snapshot(process.out).match() },
- { assert process.out.gtf == [] },
- { assert process.out.gffread_gff != null },
+ { assert ! process.success },
+ { assert process.stdout.toString().contains("Input and output names are the same") }
)
}
diff --git a/modules/nf-core/gffread/tests/main.nf.test.snap b/modules/nf-core/gffread/tests/main.nf.test.snap
index 1f1342e..1526232 100644
--- a/modules/nf-core/gffread/tests/main.nf.test.snap
+++ b/modules/nf-core/gffread/tests/main.nf.test.snap
@@ -3,26 +3,46 @@
"content": [
{
"0": [
- "genome.gtf:md5,2394072d7d31530dfd590c4a117bf6e3"
+ [
+ {
+ "id": "test"
+ },
+ "test.gtf:md5,1ea0ae98d3388e0576407dc4a24ef428"
+ ]
],
"1": [
],
"2": [
- "versions.yml:md5,a71b6cdfa528dd206a238ec64bae13d6"
+
+ ],
+ "3": [
+ "versions.yml:md5,05f671c6c6e530acedad0af0a5948dbd"
+ ],
+ "gffread_fasta": [
+
],
"gffread_gff": [
],
"gtf": [
- "genome.gtf:md5,2394072d7d31530dfd590c4a117bf6e3"
+ [
+ {
+ "id": "test"
+ },
+ "test.gtf:md5,1ea0ae98d3388e0576407dc4a24ef428"
+ ]
],
"versions": [
- "versions.yml:md5,a71b6cdfa528dd206a238ec64bae13d6"
+ "versions.yml:md5,05f671c6c6e530acedad0af0a5948dbd"
]
}
],
- "timestamp": "2023-11-29T15:39:30.006985"
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-04-09T10:48:56.496187"
},
"sarscov2-gff3-gff3": {
"content": [
@@ -31,22 +51,222 @@
],
"1": [
- "genome.gffread.gff3:md5,a7d40d99dcddac23ac673c473279ea2d"
+ [
+ {
+ "id": "test"
+ },
+ "test.gff3:md5,c4e5da6267c6bee5899a2c204ae1ad91"
+ ]
+ ],
+ "2": [
+
+ ],
+ "3": [
+ "versions.yml:md5,05f671c6c6e530acedad0af0a5948dbd"
+ ],
+ "gffread_fasta": [
+
+ ],
+ "gffread_gff": [
+ [
+ {
+ "id": "test"
+ },
+ "test.gff3:md5,c4e5da6267c6bee5899a2c204ae1ad91"
+ ]
+ ],
+ "gtf": [
+
+ ],
+ "versions": [
+ "versions.yml:md5,05f671c6c6e530acedad0af0a5948dbd"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-04-09T10:49:00.892782"
+ },
+ "sarscov2-gff3-gtf-stub": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test"
+ },
+ "test.gtf:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "1": [
+
+ ],
+ "2": [
+
+ ],
+ "3": [
+ "versions.yml:md5,05f671c6c6e530acedad0af0a5948dbd"
+ ],
+ "gffread_fasta": [
+
+ ],
+ "gffread_gff": [
+
+ ],
+ "gtf": [
+ [
+ {
+ "id": "test"
+ },
+ "test.gtf:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,05f671c6c6e530acedad0af0a5948dbd"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-04-09T11:11:26.975666"
+ },
+ "sarscov2-gff3-fasta-stub": {
+ "content": [
+ {
+ "0": [
+
+ ],
+ "1": [
+
+ ],
+ "2": [
+ [
+ {
+ "id": "test"
+ },
+ "test.fasta:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "3": [
+ "versions.yml:md5,05f671c6c6e530acedad0af0a5948dbd"
+ ],
+ "gffread_fasta": [
+ [
+ {
+ "id": "test"
+ },
+ "test.fasta:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "gffread_gff": [
+
+ ],
+ "gtf": [
+
+ ],
+ "versions": [
+ "versions.yml:md5,05f671c6c6e530acedad0af0a5948dbd"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-04-09T11:11:44.34792"
+ },
+ "sarscov2-gff3-gff3-stub": {
+ "content": [
+ {
+ "0": [
+
+ ],
+ "1": [
+ [
+ {
+ "id": "test"
+ },
+ "test.gff3:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "2": [
+
+ ],
+ "3": [
+ "versions.yml:md5,05f671c6c6e530acedad0af0a5948dbd"
+ ],
+ "gffread_fasta": [
+
+ ],
+ "gffread_gff": [
+ [
+ {
+ "id": "test"
+ },
+ "test.gff3:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "gtf": [
+
+ ],
+ "versions": [
+ "versions.yml:md5,05f671c6c6e530acedad0af0a5948dbd"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-04-09T11:11:35.221671"
+ },
+ "sarscov2-gff3-fasta": {
+ "content": [
+ {
+ "0": [
+
+ ],
+ "1": [
+
],
"2": [
- "versions.yml:md5,a71b6cdfa528dd206a238ec64bae13d6"
+ [
+ {
+ "id": "test"
+ },
+ "test.fasta:md5,5f8108fb51739a0588ccf0a251de919a"
+ ]
+ ],
+ "3": [
+ "versions.yml:md5,05f671c6c6e530acedad0af0a5948dbd"
+ ],
+ "gffread_fasta": [
+ [
+ {
+ "id": "test"
+ },
+ "test.fasta:md5,5f8108fb51739a0588ccf0a251de919a"
+ ]
],
"gffread_gff": [
- "genome.gffread.gff3:md5,a7d40d99dcddac23ac673c473279ea2d"
+
],
"gtf": [
],
"versions": [
- "versions.yml:md5,a71b6cdfa528dd206a238ec64bae13d6"
+ "versions.yml:md5,05f671c6c6e530acedad0af0a5948dbd"
]
}
],
- "timestamp": "2023-11-29T15:39:34.636061"
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-04-09T10:54:02.88143"
}
}
\ No newline at end of file
diff --git a/modules/nf-core/gffread/tests/nextflow-fasta.config b/modules/nf-core/gffread/tests/nextflow-fasta.config
new file mode 100644
index 0000000..ac6cb14
--- /dev/null
+++ b/modules/nf-core/gffread/tests/nextflow-fasta.config
@@ -0,0 +1,5 @@
+process {
+ withName: GFFREAD {
+ ext.args = '-w -S'
+ }
+}
diff --git a/modules/nf-core/gffread/tests/nextflow-gff3.config b/modules/nf-core/gffread/tests/nextflow-gff3.config
new file mode 100644
index 0000000..afe0830
--- /dev/null
+++ b/modules/nf-core/gffread/tests/nextflow-gff3.config
@@ -0,0 +1,5 @@
+process {
+ withName: GFFREAD {
+ ext.args = ''
+ }
+}
diff --git a/modules/nf-core/gt/gff3/environment.yml b/modules/nf-core/gt/gff3/environment.yml
new file mode 100644
index 0000000..8289fb3
--- /dev/null
+++ b/modules/nf-core/gt/gff3/environment.yml
@@ -0,0 +1,9 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
+name: "gt_gff3"
+channels:
+ - conda-forge
+ - bioconda
+ - defaults
+dependencies:
+ - "bioconda::genometools-genometools=1.6.5"
diff --git a/modules/nf-core/gt/gff3/main.nf b/modules/nf-core/gt/gff3/main.nf
new file mode 100644
index 0000000..6324a39
--- /dev/null
+++ b/modules/nf-core/gt/gff3/main.nf
@@ -0,0 +1,51 @@
+process GT_GFF3 {
+ tag "$meta.id"
+ label 'process_single'
+
+ conda "${moduleDir}/environment.yml"
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'https://depot.galaxyproject.org/singularity/genometools-genometools:1.6.5--py310h3db02ab_0':
+ 'biocontainers/genometools-genometools:1.6.5--py310h3db02ab_0' }"
+
+ input:
+ tuple val(meta), path(gff3)
+
+ output:
+ tuple val(meta), path("*.gt.gff3") , emit: gt_gff3 , optional: true
+ tuple val(meta), path("*.error.log"), emit: error_log , optional: true
+ path "versions.yml" , emit: versions
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ script:
+ def args = task.ext.args ?: ''
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ """
+ gt \\
+ gff3 \\
+ $args \\
+ "$gff3" \\
+ > "${prefix}.gt.gff3" \\
+ 2> >(tee "${prefix}.error.log" >&2) \\
+ || echo "Errors from gt-gff3 printed to ${prefix}.error.log"
+
+ if grep -q "gt gff3: error:" "${prefix}.error.log"; then
+ echo "gt-gff3 failed to parse $gff3"
+
+ rm \\
+ "${prefix}.gt.gff3"
+ else
+ echo "gt-gff3 successfully parsed $gff3"
+
+ mv \\
+ "${prefix}.error.log" \\
+ gt_gff3.stderr
+ fi
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ genometools: \$(gt --version | head -1 | sed 's/gt (GenomeTools) //')
+ END_VERSIONS
+ """
+}
diff --git a/modules/nf-core/gt/gff3/meta.yml b/modules/nf-core/gt/gff3/meta.yml
new file mode 100644
index 0000000..5cecd8d
--- /dev/null
+++ b/modules/nf-core/gt/gff3/meta.yml
@@ -0,0 +1,48 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
+name: "gt_gff3"
+description: "GenomeTools gt-gff3 utility to parse, possibly transform, and output GFF3 files"
+keywords:
+ - genome
+ - gff3
+ - annotation
+tools:
+ - "gt":
+ description: "The GenomeTools genome analysis system"
+ homepage: "https://genometools.org/index.html"
+ documentation: "https://genometools.org/documentation.html"
+ tool_dev_url: "https://github.com/genometools/genometools"
+ doi: "10.1109/TCBB.2013.68"
+ licence: ["ISC"]
+input:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. `[ id:'test' ]`
+ - gff3:
+ type: file
+ description: Input gff3 file
+ pattern: "*.{gff,gff3}"
+output:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. `[ id:'test' ]`
+ - gt_gff3:
+ type: file
+ description: Parsed gff3 file produced only if there is no parsing error
+ pattern: "*.gt.gff3"
+ - error_log:
+ type: file
+ description: Error log if gt-gff3 failed to parse the input gff3 file
+ pattern: "*.error.log"
+ - versions:
+ type: file
+ description: File containing software versions
+ pattern: "versions.yml"
+authors:
+ - "@gallvp"
+maintainers:
+ - "@gallvp"
diff --git a/modules/nf-core/gt/gff3/tests/main.nf.test b/modules/nf-core/gt/gff3/tests/main.nf.test
new file mode 100644
index 0000000..cb44bc8
--- /dev/null
+++ b/modules/nf-core/gt/gff3/tests/main.nf.test
@@ -0,0 +1,61 @@
+nextflow_process {
+
+ name "Test Process GT_GFF3"
+ script "../main.nf"
+ process "GT_GFF3"
+ config "./nextflow.config"
+
+ tag "modules"
+ tag "modules_nfcore"
+ tag "gt"
+ tag "gt/gff3"
+
+ test("sarscov2-gff3-valid") {
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test' ], // meta map
+ file(params.test_data['sarscov2']['genome']['genome_gff3'], checkIfExists: true)
+ ]
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() },
+ { assert process.out.gt_gff3 != null },
+ { assert process.out.error_log == [] }
+ )
+ }
+
+ }
+
+ test("sarscov2-gff3-invalid") {
+ when {
+ process {
+ """
+ input[0] = Channel.of(
+ '##gff-version 3',
+ 'chr22\tID=gene:ENSG00000233995;Name=AP000547.1'
+ )
+ .collectFile(name: 'sample.gff3', newLine: true)
+ .map { file -> [ [ id:'test' ], file ] }
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() },
+ { assert process.out.gt_gff3 == [] },
+ { assert process.out.error_log != null },
+ { assert path(process.out.error_log.get(0).get(1)).getText().contains("gt gff3: error:") }
+ )
+ }
+ }
+
+}
diff --git a/modules/nf-core/gt/gff3/tests/main.nf.test.snap b/modules/nf-core/gt/gff3/tests/main.nf.test.snap
new file mode 100644
index 0000000..6e454f1
--- /dev/null
+++ b/modules/nf-core/gt/gff3/tests/main.nf.test.snap
@@ -0,0 +1,72 @@
+{
+ "sarscov2-gff3-invalid": {
+ "content": [
+ {
+ "0": [
+
+ ],
+ "1": [
+ [
+ {
+ "id": "test"
+ },
+ "test.error.log:md5,31e6117c516f936ec403f792c732bc76"
+ ]
+ ],
+ "2": [
+ "versions.yml:md5,9753770dd19a2a306dcf16d4aaf049eb"
+ ],
+ "error_log": [
+ [
+ {
+ "id": "test"
+ },
+ "test.error.log:md5,31e6117c516f936ec403f792c732bc76"
+ ]
+ ],
+ "gt_gff3": [
+
+ ],
+ "versions": [
+ "versions.yml:md5,9753770dd19a2a306dcf16d4aaf049eb"
+ ]
+ }
+ ],
+ "timestamp": "2023-11-29T10:42:11.408352"
+ },
+ "sarscov2-gff3-valid": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test"
+ },
+ "test.gt.gff3:md5,2ae900237ace415557b8735fac088b85"
+ ]
+ ],
+ "1": [
+
+ ],
+ "2": [
+ "versions.yml:md5,9753770dd19a2a306dcf16d4aaf049eb"
+ ],
+ "error_log": [
+
+ ],
+ "gt_gff3": [
+ [
+ {
+ "id": "test"
+ },
+ "test.gt.gff3:md5,2ae900237ace415557b8735fac088b85"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,9753770dd19a2a306dcf16d4aaf049eb"
+ ]
+ }
+ ],
+ "timestamp": "2023-11-29T10:42:07.817894"
+ }
+}
\ No newline at end of file
diff --git a/modules/nf-core/gt/gff3/tests/nextflow.config b/modules/nf-core/gt/gff3/tests/nextflow.config
new file mode 100644
index 0000000..af56226
--- /dev/null
+++ b/modules/nf-core/gt/gff3/tests/nextflow.config
@@ -0,0 +1,3 @@
+process {
+ ext.args = '-tidy -retainids'
+}
diff --git a/modules/nf-core/gt/gff3/tests/tags.yml b/modules/nf-core/gt/gff3/tests/tags.yml
new file mode 100644
index 0000000..0ce15a9
--- /dev/null
+++ b/modules/nf-core/gt/gff3/tests/tags.yml
@@ -0,0 +1,2 @@
+gt/gff3:
+ - "modules/nf-core/gt/gff3/**"
diff --git a/modules/nf-core/gunzip/tests/main.nf.test b/modules/nf-core/gunzip/tests/main.nf.test
index d031792..6406008 100644
--- a/modules/nf-core/gunzip/tests/main.nf.test
+++ b/modules/nf-core/gunzip/tests/main.nf.test
@@ -15,10 +15,11 @@ nextflow_process {
}
process {
"""
- input[0] = [
- [],
- file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true)
- ]
+ input[0] = Channel.of([
+ [],
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true)
+ ]
+ )
"""
}
}
diff --git a/modules/pfr/liftoff/environment.yml b/modules/nf-core/liftoff/environment.yml
similarity index 100%
rename from modules/pfr/liftoff/environment.yml
rename to modules/nf-core/liftoff/environment.yml
diff --git a/modules/pfr/liftoff/main.nf b/modules/nf-core/liftoff/main.nf
similarity index 85%
rename from modules/pfr/liftoff/main.nf
rename to modules/nf-core/liftoff/main.nf
index 317eca1..4db5da2 100644
--- a/modules/pfr/liftoff/main.nf
+++ b/modules/nf-core/liftoff/main.nf
@@ -11,6 +11,7 @@ process LIFTOFF {
tuple val(meta), path(target_fa)
path ref_fa, name: 'ref_assembly.fa'
path ref_annotation
+ path ref_db
output:
tuple val(meta), path("${prefix}.gff3") , emit: gff3
@@ -22,11 +23,14 @@ process LIFTOFF {
task.ext.when == null || task.ext.when
script:
- def args = task.ext.args ?: ''
- prefix = task.ext.prefix ?: "${meta.id}"
+ def args = task.ext.args ?: ''
+ def arg_g = ref_annotation ? "-g $ref_annotation" : ''
+ def arg_db = ref_db ? "-db $ref_db" : ''
+ prefix = task.ext.prefix ?: "${meta.id}"
"""
liftoff \\
- -g $ref_annotation \\
+ $arg_g \\
+ $arg_db \\
-p $task.cpus \\
-o "${prefix}.gff3" \\
-u "${prefix}.unmapped.txt" \\
diff --git a/modules/pfr/liftoff/meta.yml b/modules/nf-core/liftoff/meta.yml
similarity index 91%
rename from modules/pfr/liftoff/meta.yml
rename to modules/nf-core/liftoff/meta.yml
index 46b3c58..10e502c 100644
--- a/modules/pfr/liftoff/meta.yml
+++ b/modules/nf-core/liftoff/meta.yml
@@ -38,6 +38,11 @@ input:
type: file
description: Reference assembly annotations in gtf or gff3 format
pattern: "*.{gtf,gff3}"
+ - ref_db:
+ type: file
+ description: |
+ Name of feature database; if not specified, the -g argument must
+ be provided and a database will be built automatically
output:
- meta:
type: map
diff --git a/modules/pfr/liftoff/tests/main.nf.test b/modules/nf-core/liftoff/tests/main.nf.test
similarity index 70%
rename from modules/pfr/liftoff/tests/main.nf.test
rename to modules/nf-core/liftoff/tests/main.nf.test
index 272c882..fc0f567 100644
--- a/modules/pfr/liftoff/tests/main.nf.test
+++ b/modules/nf-core/liftoff/tests/main.nf.test
@@ -9,6 +9,7 @@ nextflow_process {
tag "modules_nfcore"
tag "nf-core/gunzip"
tag "liftoff"
+ tag "gunzip"
test("homo_sapiens-genome_21_fasta-genome_1_fasta-genome_1_gtf") {
@@ -38,6 +39,7 @@ nextflow_process {
input[2] = [
file(params.test_data['homo_sapiens']['genome']['genome_1_gtf'], checkIfExists: true)
]
+ input[3] = []
"""
}
}
@@ -48,16 +50,6 @@ nextflow_process {
{ assert snapshot(process.out.unmapped_txt).match("unmapped_txt") },
{ assert file(process.out.gff3[0][1]).text.contains("chr21\tLiftoff\texon\t34608061\t34608118\t.\t+\t.") },
{ assert file(process.out.polished_gff3[0][1]).text.contains("chr21\tLiftoff\texon\t34608061\t34608118\t.\t+\t.") },
- {
- assert snapshot(
- (
- [process.out.gff3[0][0].toString()] + // meta
- process.out.gff3.collect { file(it[1]).getName() } +
- process.out.polished_gff3.collect { file(it[1]).getName() } +
- process.out.unmapped_txt.collect { file(it[1]).getName() }
- ).sort()
- ).match("homo_sapiens-genome_21_fasta-genome_1_fasta-genome_1_gtf-for_stub_match")
- },
{ assert snapshot(process.out.versions).match("versions") }
)
}
@@ -93,6 +85,7 @@ nextflow_process {
input[2] = [
file(params.test_data['homo_sapiens']['genome']['genome_1_gtf'], checkIfExists: true)
]
+ input[3] = []
"""
}
}
@@ -100,17 +93,7 @@ nextflow_process {
then {
assertAll(
{ assert process.success },
- {
- assert snapshot(
- (
- [process.out.gff3[0][0].toString()] + // meta
- process.out.gff3.collect { file(it[1]).getName() } +
- process.out.polished_gff3.collect { file(it[1]).getName() } +
- process.out.unmapped_txt.collect { file(it[1]).getName() }
- ).sort()
- ).match("homo_sapiens-genome_21_fasta-genome_1_fasta-genome_1_gtf-for_stub_match")
- },
- { assert snapshot(process.out.versions).match("versions") }
+ { assert snapshot(process.out).match() }
)
}
diff --git a/modules/nf-core/liftoff/tests/main.nf.test.snap b/modules/nf-core/liftoff/tests/main.nf.test.snap
new file mode 100644
index 0000000..f606446
--- /dev/null
+++ b/modules/nf-core/liftoff/tests/main.nf.test.snap
@@ -0,0 +1,96 @@
+{
+ "unmapped_txt": {
+ "content": [
+ [
+ [
+ {
+ "id": "test"
+ },
+ "test.unmapped.txt:md5,7391d10df6e15db356b084c9af5259e4"
+ ]
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2023-12-01T13:57:40.748507"
+ },
+ "versions": {
+ "content": [
+ [
+ "versions.yml:md5,205d9c609e7fe27d8199550d842bdce8"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2023-12-01T13:57:40.752414"
+ },
+ "homo_sapiens-genome_21_fasta-genome_1_fasta-genome_1_gtf-stub": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test"
+ },
+ "test.gff3:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "1": [
+ [
+ {
+ "id": "test"
+ },
+ "test.polished.gff3:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "2": [
+ [
+ {
+ "id": "test"
+ },
+ "test.unmapped.txt:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "3": [
+ "versions.yml:md5,205d9c609e7fe27d8199550d842bdce8"
+ ],
+ "gff3": [
+ [
+ {
+ "id": "test"
+ },
+ "test.gff3:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "polished_gff3": [
+ [
+ {
+ "id": "test"
+ },
+ "test.polished.gff3:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "unmapped_txt": [
+ [
+ {
+ "id": "test"
+ },
+ "test.unmapped.txt:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,205d9c609e7fe27d8199550d842bdce8"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-03-19T09:15:25.661428"
+ }
+}
\ No newline at end of file
diff --git a/modules/nf-core/liftoff/tests/nextflow.config b/modules/nf-core/liftoff/tests/nextflow.config
new file mode 100644
index 0000000..f35ef80
--- /dev/null
+++ b/modules/nf-core/liftoff/tests/nextflow.config
@@ -0,0 +1,5 @@
+process {
+ withName: LIFTOFF {
+ ext.args = '-polish'
+ }
+}
diff --git a/modules/nf-core/liftoff/tests/tags.yml b/modules/nf-core/liftoff/tests/tags.yml
new file mode 100644
index 0000000..4d0adb6
--- /dev/null
+++ b/modules/nf-core/liftoff/tests/tags.yml
@@ -0,0 +1,2 @@
+liftoff:
+ - "modules/nf-core/liftoff/**"
diff --git a/modules/nf-core/samtools/cat/environment.yml b/modules/nf-core/samtools/cat/environment.yml
index 0455a7d..75f10f7 100644
--- a/modules/nf-core/samtools/cat/environment.yml
+++ b/modules/nf-core/samtools/cat/environment.yml
@@ -4,4 +4,5 @@ channels:
- bioconda
- defaults
dependencies:
- - bioconda::samtools=1.18
+ - bioconda::samtools=1.19.2
+ - bioconda::htslib=1.19.1
diff --git a/modules/nf-core/samtools/cat/main.nf b/modules/nf-core/samtools/cat/main.nf
index b3b2508..06615aa 100644
--- a/modules/nf-core/samtools/cat/main.nf
+++ b/modules/nf-core/samtools/cat/main.nf
@@ -4,8 +4,8 @@ process SAMTOOLS_CAT {
conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
- 'https://depot.galaxyproject.org/singularity/samtools:1.18--h50ea8bc_1' :
- 'biocontainers/samtools:1.18--h50ea8bc_1' }"
+ 'https://depot.galaxyproject.org/singularity/samtools:1.19.2--h50ea8bc_0' :
+ 'biocontainers/samtools:1.19.2--h50ea8bc_0' }"
input:
tuple val(meta), path(input_files, stageAs: "?/*")
diff --git a/modules/nf-core/samtools/cat/tests/main.nf.test b/modules/nf-core/samtools/cat/tests/main.nf.test
index 49c633f..dad80b8 100644
--- a/modules/nf-core/samtools/cat/tests/main.nf.test
+++ b/modules/nf-core/samtools/cat/tests/main.nf.test
@@ -9,18 +9,16 @@ nextflow_process {
tag "samtools"
tag "samtools/cat"
- test("sarscov2 - [bam1, bam2]") {
+ test("bams") {
when {
process {
"""
- input[0] = [
- [ id:'test', single_end:false ], // meta map
- [
- file(params.test_data['sarscov2']['illumina']['test_paired_end_bam'], checkIfExists: true),
- file(params.test_data['sarscov2']['illumina']['test_unaligned_bam'], checkIfExists: true)
- ]
- ]
+ input[0] = Channel.of([
+ [id: 'test', single_end: false], // meta map
+ [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.bam', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.unaligned.bam', checkIfExists: true) ]
+ ])
"""
}
}
@@ -28,30 +26,25 @@ nextflow_process {
then {
assertAll(
{ assert process.success },
- { assert snapshot(
- file(process.out.bam[0][1]).name,
- process.out.cram,
- process.out.versions
- ).match() }
+ { assert snapshot(file(process.out.bam[0][1]).name).match("bams_bam") },
+ { assert snapshot(process.out.cram).match("bams_cram") },
+ { assert snapshot(process.out.versions).match("bams_versions") }
)
}
-
}
- test("sarscov2 - [bam1, bam2] - stub") {
+ test("bams_stub") {
options "-stub"
when {
process {
"""
- input[0] = [
- [ id:'test', single_end:false ], // meta map
- [
- file(params.test_data['sarscov2']['illumina']['test_paired_end_bam'], checkIfExists: true),
- file(params.test_data['sarscov2']['illumina']['test_unaligned_bam'], checkIfExists: true)
- ]
- ]
+ input[0] = Channel.of([
+ [id: 'test', single_end: false], // meta map
+ [ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.paired_end.bam', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/bam/test.unaligned.bam', checkIfExists: true) ]
+ ])
"""
}
}
@@ -59,14 +52,10 @@ nextflow_process {
then {
assertAll(
{ assert process.success },
- { assert snapshot(
- file(process.out.bam[0][1]).name,
- process.out.cram,
- process.out.versions
- ).match() }
+ { assert snapshot(file(process.out.bam[0][1]).name).match("bams_stub_bam") },
+ { assert snapshot(process.out.cram).match("bams_stub_cram") },
+ { assert snapshot(process.out.versions).match("bams_stub_versions") }
)
}
-
}
-
}
diff --git a/modules/nf-core/samtools/cat/tests/main.nf.test.snap b/modules/nf-core/samtools/cat/tests/main.nf.test.snap
index 298e25d..f99cdd6 100644
--- a/modules/nf-core/samtools/cat/tests/main.nf.test.snap
+++ b/modules/nf-core/samtools/cat/tests/main.nf.test.snap
@@ -1,26 +1,70 @@
{
- "sarscov2 - [bam1, bam2]": {
+ "bams_stub_cram": {
"content": [
- "test.bam",
[
- ],
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.04.3"
+ },
+ "timestamp": "2024-02-02T16:45:42.587418"
+ },
+ "bams_stub_versions": {
+ "content": [
[
- "versions.yml:md5,f10a4f6b2e0272bef2ceb4ca826a15a1"
+ "versions.yml:md5,e214a92343158372aa79dabe0fb0064a"
]
],
- "timestamp": "2023-12-04T14:00:18.264348819"
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.01.0"
+ },
+ "timestamp": "2024-02-13T16:15:40.92408626"
+ },
+ "bams_bam": {
+ "content": [
+ "test.bam"
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.04.3"
+ },
+ "timestamp": "2024-02-02T16:45:37.965199"
},
- "sarscov2 - [bam1, bam2] - stub": {
+ "bams_cram": {
"content": [
- "test.bam",
[
- ],
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.04.3"
+ },
+ "timestamp": "2024-02-02T16:45:37.96805"
+ },
+ "bams_stub_bam": {
+ "content": [
+ "test.bam"
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.04.3"
+ },
+ "timestamp": "2024-02-02T16:45:42.583881"
+ },
+ "bams_versions": {
+ "content": [
[
- "versions.yml:md5,f10a4f6b2e0272bef2ceb4ca826a15a1"
+ "versions.yml:md5,e214a92343158372aa79dabe0fb0064a"
]
],
- "timestamp": "2023-12-04T14:03:17.714482742"
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "24.01.0"
+ },
+ "timestamp": "2024-02-13T16:15:33.224336325"
}
}
\ No newline at end of file
diff --git a/modules/nf-core/sortmerna/main.nf b/modules/nf-core/sortmerna/main.nf
index 29c640c..7c17e50 100644
--- a/modules/nf-core/sortmerna/main.nf
+++ b/modules/nf-core/sortmerna/main.nf
@@ -9,88 +9,102 @@ process SORTMERNA {
input:
tuple val(meta), path(reads)
- path fastas
+ tuple val(meta2), path(fastas)
+ tuple val(meta3), path(index)
output:
- tuple val(meta), path("*non_rRNA.fastq.gz"), emit: reads
- tuple val(meta), path("*.log") , emit: log
- path "versions.yml" , emit: versions
+ tuple val(meta), path("*non_rRNA.fastq.gz"), emit: reads, optional: true
+ tuple val(meta), path("*.log") , emit: log, optional: true
+ tuple val(meta2), path("idx") , emit: index, optional: true
+ path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
script:
- def args = task.ext.args ?: ''
- def prefix = task.ext.prefix ?: "${meta.id}"
- if (meta.single_end) {
- """
- sortmerna \\
- ${'--ref '+fastas.join(' --ref ')} \\
- --reads $reads \\
- --threads $task.cpus \\
- --workdir . \\
- --aligned rRNA_reads \\
- --fastx \\
- --other non_rRNA_reads \\
- $args
+ def args = task.ext.args ?: ''
+ def prefix = task.ext.prefix ?: "${meta.id}"
- mv non_rRNA_reads.f*q.gz ${prefix}.non_rRNA.fastq.gz
- mv rRNA_reads.log ${prefix}.sortmerna.log
+ def index_only = args.contains('--index 1')? true : false
+ def skip_index = args.contains('--index 0')? true : false
+ def paired_end = reads instanceof List
+ def paired_cmd = ''
+ def reads_args = ''
+ def out2_cmd = ''
+ def mv_cmd = ''
+ def reads_input = ''
+ def refs_input = ''
- cat <<-END_VERSIONS > versions.yml
- "${task.process}":
- sortmerna: \$(echo \$(sortmerna --version 2>&1) | sed 's/^.*SortMeRNA version //; s/ Build Date.*\$//')
- END_VERSIONS
- """
- } else {
- """
- sortmerna \\
- ${'--ref '+fastas.join(' --ref ')} \\
- --reads ${reads[0]} \\
- --reads ${reads[1]} \\
- --threads $task.cpus \\
- --workdir . \\
- --aligned rRNA_reads \\
- --fastx \\
- --other non_rRNA_reads \\
- --paired_in \\
- --out2 \\
- $args
+ if (! index_only){
+ reads_args = '--aligned rRNA_reads --fastx --other non_rRNA_reads'
+ reads_input = paired_end ? reads.collect{"--reads $it"}.join(' ') : "--reads $reads"
+ def n_fastq = paired_end ? reads.size() : 1
+ if ( n_fastq == 1 ) {
+ mv_cmd = """
+ mv non_rRNA_reads.f*q.gz ${prefix}.non_rRNA.fastq.gz
+ mv rRNA_reads.log ${prefix}.sortmerna.log
+ """
+ } else {
+ mv_cmd = """
+ mv non_rRNA_reads_fwd.f*q.gz ${prefix}_1.non_rRNA.fastq.gz
+ mv non_rRNA_reads_rev.f*q.gz ${prefix}_2.non_rRNA.fastq.gz
+ mv rRNA_reads.log ${prefix}.sortmerna.log
+ """
+ paired_cmd = "--paired_in"
+ out2_cmd = "--out2"
+ }
+ }
+ """
+ sortmerna \\
+ ${'--ref '+fastas.join(' --ref ')} \\
+ $refs_input \\
+ $reads_input \\
+ --threads $task.cpus \\
+ --workdir . \\
+ $reads_args \\
+ $paired_cmd \\
+ $out2_cmd \\
+ $args
- mv non_rRNA_reads_fwd.f*q.gz ${prefix}_1.non_rRNA.fastq.gz
- mv non_rRNA_reads_rev.f*q.gz ${prefix}_2.non_rRNA.fastq.gz
- mv rRNA_reads.log ${prefix}.sortmerna.log
+ $mv_cmd
- cat <<-END_VERSIONS > versions.yml
- "${task.process}":
- sortmerna: \$(echo \$(sortmerna --version 2>&1) | sed 's/^.*SortMeRNA version //; s/ Build Date.*\$//')
- END_VERSIONS
- """
- }
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ sortmerna: \$(echo \$(sortmerna --version 2>&1) | sed 's/^.*SortMeRNA version //; s/ Build Date.*\$//')
+ END_VERSIONS
+ """
stub:
- def args = task.ext.args ?: ''
- def prefix = task.ext.prefix ?: "${meta.id}"
- if (meta.single_end) {
- """
- touch ${prefix}.non_rRNA.fastq.gz
- touch ${prefix}.sortmerna.log
+ def args = task.ext.args ?: ''
+ def prefix = task.ext.prefix ?: "${meta.id}"
- cat <<-END_VERSIONS > versions.yml
- "${task.process}":
- sortmerna: \$(echo \$(sortmerna --version 2>&1) | sed 's/^.*SortMeRNA version //; s/ Build Date.*\$//')
- END_VERSIONS
- """
- } else {
- """
- touch ${prefix}_1.non_rRNA.fastq.gz
- touch ${prefix}_2.non_rRNA.fastq.gz
- touch ${prefix}.sortmerna.log
+ def index_only = args.contains('--index 1')? true : false
+ def paired_end = reads instanceof List
+ def paired_cmd = ''
+ def out2_cmd = ''
+ def mv_cmd = ''
+ def reads_input = ''
- cat <<-END_VERSIONS > versions.yml
- "${task.process}":
- sortmerna: \$(echo \$(sortmerna --version 2>&1) | sed 's/^.*SortMeRNA version //; s/ Build Date.*\$//')
- END_VERSIONS
- """
+ if (! index_only){
+ reads_input = paired_end ? reads.collect{"--reads $it"}.join(' ') : "--reads $reads"
+ def n_fastq = paired_end ? reads.size() : 1
+ if ( n_fastq == 1 ) {
+ mv_cmd = "touch ${prefix}.non_rRNA.fastq.gz"
+ } else {
+ mv_cmd = """
+ touch ${prefix}_1.non_rRNA.fastq.gz
+ touch ${prefix}_2.non_rRNA.fastq.gz
+ """
+ }
}
+ """
+ $mv_cmd
+ mkdir -p idx
+ touch ${prefix}.sortmerna.log
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ sortmerna: \$(echo \$(sortmerna --version 2>&1) | sed 's/^.*SortMeRNA version //; s/ Build Date.*\$//')
+ END_VERSIONS
+ """
}
diff --git a/modules/nf-core/sortmerna/meta.yml b/modules/nf-core/sortmerna/meta.yml
index de0b18e..c0a2a58 100644
--- a/modules/nf-core/sortmerna/meta.yml
+++ b/modules/nf-core/sortmerna/meta.yml
@@ -23,16 +23,31 @@ input:
description: |
List of input FastQ files of size 1 and 2 for single-end and paired-end data,
respectively.
+ - meta2:
+ type: map
+ description: |
+ Groovy Map containing reference information
+ e.g. [ id:'test' ]
- fastas:
type: file
description: |
Path to reference file(s)
+ - meta3:
+ type: map
+ description: |
+ Groovy Map containing index information
+ e.g. [ id:'test' ]
+ - index:
+ type: directory
+ description: |
+ Path to index directory of a previous sortmerna run
output:
- meta:
type: map
description: |
Groovy Map containing sample information
- e.g. [ id:'test', single_end:false ]
+ e.g. [ id:'test', single_end:false ], or reference information from an
+ indexing-only run
- reads:
type: file
description: The filtered fastq reads
@@ -41,6 +56,15 @@ output:
type: file
description: SortMeRNA log file
pattern: "*sortmerna.log"
+ - meta2:
+ type: map
+ description: |
+ Groovy Map containing reference information
+ e.g. [ id:'test' ]
+ - index:
+ type: directory
+ description: |
+ Path to index directory generated by sortmern
- versions:
type: file
description: File containing software versions
diff --git a/modules/nf-core/sortmerna/tests/indexing_only.config b/modules/nf-core/sortmerna/tests/indexing_only.config
new file mode 100644
index 0000000..3e74a32
--- /dev/null
+++ b/modules/nf-core/sortmerna/tests/indexing_only.config
@@ -0,0 +1,5 @@
+process {
+ withName: 'SORTMERNA' {
+ ext.args = '--index 1'
+ }
+}
diff --git a/modules/nf-core/sortmerna/tests/main.nf.test b/modules/nf-core/sortmerna/tests/main.nf.test
index 8a01e2a..73bc119 100644
--- a/modules/nf-core/sortmerna/tests/main.nf.test
+++ b/modules/nf-core/sortmerna/tests/main.nf.test
@@ -7,15 +7,73 @@ nextflow_process {
tag "modules_nfcore"
tag "sortmerna"
+ test("sarscov2 indexing only") {
+
+ config './indexing_only.config'
+
+ when {
+ process {
+ """
+ input[0] = Channel.of([[],[]])
+ input[1] = [ [id:'test2'], // meta map
+ [ file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.fasta", checkIfExists: true) ]
+ ]
+ input[2] = Channel.of([[],[]])
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert ! process.out.reads },
+ { assert snapshot(process.out.index).match("index_index_only") },
+ { assert snapshot(process.out.versions).match("versions_index_only") }
+ )
+ }
+
+ }
+
+ test("sarscov2 indexing only stub") {
+
+ options '-stub'
+ config './indexing_only.config'
+
+ when {
+ process {
+ """
+ input[0] = Channel.of([[],[]])
+ input[1] = [ [id:'test2'], // meta map
+ [ file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.fasta", checkIfExists: true) ]
+ ]
+ input[2] = Channel.of([[],[]])
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert ! process.out.reads },
+ { assert snapshot(process.out.index).match("index_only_stub") },
+ { assert snapshot(process.out.versions).match("versions_index_only_stub") }
+ )
+ }
+
+ }
+
test("sarscov2 single_end") {
when {
process {
"""
- input[0] = [ [ id:'test', single_end:true ], // meta map
- [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ]
- ]
- input[1] = [ file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) ]
+ input[0] = [ [ id:'test' ], // meta map
+ [ file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_1.fastq.gz", checkIfExists: true) ]
+ ]
+ input[1] = [ [id:'test2'], // meta map
+ [ file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.fasta", checkIfExists: true) ]
+ ]
+ input[2] = Channel.of([[],[]])
"""
}
}
@@ -32,9 +90,10 @@ nextflow_process {
process.out.reads.collect { file(it[1]).getName() } +
process.out.log.collect { file(it[1]).getName() }
).sort()
- ).match("sarscov2 single_end-for_stub_match")
+ ).match("sarscov2 single_end_match")
},
- { assert snapshot(process.out.versions).match("versions") }
+ { assert snapshot(process.out.index).match("index_single_end") },
+ { assert snapshot(process.out.versions).match("versions_single_end") }
)
}
@@ -47,10 +106,13 @@ nextflow_process {
when {
process {
"""
- input[0] = [ [ id:'test', single_end:true ], // meta map
- [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ]
- ]
- input[1] = [ file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) ]
+ input[0] = [ [ id:'test' ], // meta map
+ [ file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_1.fastq.gz", checkIfExists: true) ]
+ ]
+ input[1] = [ [id:'test2'], // meta map
+ [ file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.fasta", checkIfExists: true) ]
+ ]
+ input[2] = Channel.of([[],[]])
"""
}
}
@@ -67,7 +129,8 @@ nextflow_process {
).sort()
).match("sarscov2 single_end-for_stub_match")
},
- { assert snapshot(process.out.versions).match("versions") }
+ { assert snapshot(process.out.index).match("index_single_end_stub") },
+ { assert snapshot(process.out.versions).match("versions_single_end_stub") }
)
}
@@ -78,11 +141,17 @@ nextflow_process {
when {
process {
"""
- input[0] = [ [ id:'test', single_end:false ], // meta map
- [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
- file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ]
- ]
- input[1] = [ file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) ]
+ input[0] = [
+ [ id:'test' ], // meta map
+ [
+ file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_1.fastq.gz", checkIfExists: true),
+ file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_2.fastq.gz", checkIfExists: true)
+ ]
+ ]
+ input[1] = [ [id:'test2'], // meta map
+ [ file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.fasta", checkIfExists: true) ]
+ ]
+ input[2] = Channel.of([[],[]])
"""
}
}
@@ -99,9 +168,10 @@ nextflow_process {
process.out.reads.collect { it[1].collect { item -> file(item).getName() } } +
process.out.log.collect { file(it[1]).getName() }
).sort()
- ).match("sarscov2 paired_end-for_stub_match")
+ ).match("sarscov2 paired_end_match")
},
- { assert snapshot(process.out.versions).match("versions") }
+ { assert snapshot(process.out.index).match("index_paired_end") },
+ { assert snapshot(process.out.versions).match("versions_paired_end") }
)
}
@@ -114,11 +184,16 @@ nextflow_process {
when {
process {
"""
- input[0] = [ [ id:'test', single_end:false ], // meta map
- [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
- file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true) ]
- ]
- input[1] = [ file(params.test_data['sarscov2']['genome']['genome_fasta'], checkIfExists: true) ]
+ input[0] = [ [ id:'test' ], // meta map
+ [
+ file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_1.fastq.gz", checkIfExists: true),
+ file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_2.fastq.gz", checkIfExists: true)
+ ]
+ ]
+ input[1] = [ [id:'test2'], // meta map
+ [ file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.fasta", checkIfExists: true) ]
+ ]
+ input[2] = Channel.of([[],[]])
"""
}
}
@@ -135,10 +210,118 @@ nextflow_process {
).sort()
).match("sarscov2 paired_end-for_stub_match")
},
- { assert snapshot(process.out.versions).match("versions") }
+ { assert snapshot(process.out.index).match("index_paired_end_stub") },
+ { assert snapshot(process.out.versions).match("versions_paired_end_stub") }
)
}
}
+ test("sarscov2 single_end premade_index") {
+
+ config './premade_index.config'
+
+ setup {
+
+ run("SORTMERNA", alias: "SORTMERNA_INDEX") {
+ script "../main.nf"
+ process {
+ """
+ input[0] = Channel.of([[],[]])
+ input[1] = [ [id:'test2'], // meta map
+ [ file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.fasta", checkIfExists: true) ]
+ ]
+ input[2] = Channel.of([[],[]])
+ """
+ }
+ }
+ }
+
+ when {
+ process {
+ """
+ input[0] = [ [ id:'test' ], // meta map
+ [ file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_1.fastq.gz", checkIfExists: true) ]
+ ]
+ input[1] = [ [id:'test2'], // meta map
+ [ file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.fasta", checkIfExists: true) ]
+ ]
+ input[2] = SORTMERNA_INDEX.out.index
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert process.out.reads },
+ { assert file(process.out.log[0][1]).text.contains("Total reads passing E-value threshold = 100 (100.00)") },
+ {
+ assert snapshot(
+ (
+ [process.out.reads[0][0].toString()] + // meta
+ process.out.reads.collect { file(it[1]).getName() } +
+ process.out.log.collect { file(it[1]).getName() }
+ ).sort()
+ ).match("sarscov2 single_end_premade_index_match")
+ },
+ { assert snapshot(process.out.index).match("index_single_end_premade_index") },
+ { assert snapshot(process.out.versions).match("versions_single_end_premade_index") }
+ )
+ }
+ }
+
+ test("sarscov2 single_end premade_index stub") {
+
+ config './premade_index.config'
+ options '-stub'
+
+ setup {
+
+ run("SORTMERNA", alias: "SORTMERNA_INDEX") {
+ script "../main.nf"
+ process {
+ """
+ input[0] = Channel.of([[],[]])
+ input[1] = [ [id:'test2'], // meta map
+ [ file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.fasta", checkIfExists: true) ]
+ ]
+ input[2] = Channel.of([[],[]])
+ """
+ }
+ }
+ }
+
+ when {
+ process {
+ """
+ input[0] = [ [ id:'test' ], // meta map
+ [ file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_1.fastq.gz", checkIfExists: true) ]
+ ]
+ input[1] = [ [id:'test2'], // meta map
+ [ file(params.modules_testdata_base_path + "genomics/sarscov2/genome/genome.fasta", checkIfExists: true) ]
+ ]
+ input[2] = SORTMERNA_INDEX.out.index
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert process.out.reads },
+ {
+ assert snapshot(
+ (
+ [process.out.reads[0][0].toString()] + // meta
+ process.out.reads.collect { file(it[1]).getName() } +
+ process.out.log.collect { file(it[1]).getName() }
+ ).sort()
+ ).match("sarscov2 single_end_premade_index_match_stub")
+ },
+ { assert snapshot(process.out.index).match("index_single_end_premade_index_stub") },
+ { assert snapshot(process.out.versions).match("versions_single_end_premade_index_stub") }
+ )
+ }
+ }
}
diff --git a/modules/nf-core/sortmerna/tests/main.nf.test.snap b/modules/nf-core/sortmerna/tests/main.nf.test.snap
index e502000..86e8473 100644
--- a/modules/nf-core/sortmerna/tests/main.nf.test.snap
+++ b/modules/nf-core/sortmerna/tests/main.nf.test.snap
@@ -1,33 +1,352 @@
{
- "sarscov2 single_end-for_stub_match": {
+ "versions_paired_end_stub": {
+ "content": [
+ [
+ "versions.yml:md5,7df9d50209f351e1f75e05a1fad6ba4b"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-02-07T12:27:11.223149"
+ },
+ "index_paired_end_stub": {
+ "content": [
+ [
+ [
+ {
+ "id": "test2"
+ },
+ [
+
+ ]
+ ]
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-02-26T14:24:25.384097178"
+ },
+ "versions_paired_end": {
+ "content": [
+ [
+ "versions.yml:md5,7df9d50209f351e1f75e05a1fad6ba4b"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-02-07T12:27:04.517155"
+ },
+ "versions_single_end_stub": {
+ "content": [
+ [
+ "versions.yml:md5,7df9d50209f351e1f75e05a1fad6ba4b"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-02-01T12:10:35.228450189"
+ },
+ "sarscov2 single_end_match": {
"content": [
[
"test.non_rRNA.fastq.gz",
"test.sortmerna.log",
- "{id=test, single_end=true}"
+ "{id=test}"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-02-07T12:28:23.20327"
+ },
+ "index_only_stub": {
+ "content": [
+ [
+ [
+ {
+ "id": "test2"
+ },
+ [
+
+ ]
+ ]
]
],
- "timestamp": "2023-12-21T11:56:00.15356"
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-02-26T15:00:47.128504164"
},
- "versions": {
+ "index_single_end_premade_index": {
+ "content": [
+ [
+ [
+ {
+ "id": "test2"
+ },
+ [
+ "2415186086593376314.bursttrie_0.dat:md5,74f7f020e8d46e24a8a2e9c5fbcd564a",
+ "2415186086593376314.kmer_0.dat:md5,4a0bcb71b120f6a6949b7969292ef2e7",
+ "2415186086593376314.pos_0.dat:md5,bc2875e4cc4017707306565e396839ef",
+ "2415186086593376314.stats:md5,67c9d4c768f28a450fc82a2b5d43db5c"
+ ]
+ ]
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-02-26T15:01:53.832643452"
+ },
+ "versions_single_end_premade_index": {
"content": [
[
"versions.yml:md5,7df9d50209f351e1f75e05a1fad6ba4b"
]
],
- "timestamp": "2023-12-21T11:56:00.200244"
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-02-26T15:01:53.902154982"
},
"sarscov2 paired_end-for_stub_match": {
"content": [
[
+ "{id=test}",
[
"test_1.non_rRNA.fastq.gz",
"test_2.non_rRNA.fastq.gz"
],
+ "test.sortmerna.log"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-02-07T12:28:56.063579"
+ },
+ "index_paired_end": {
+ "content": [
+ [
+ [
+ {
+ "id": "test2"
+ },
+ [
+ "2415186086593376314.bursttrie_0.dat:md5,74f7f020e8d46e24a8a2e9c5fbcd564a",
+ "2415186086593376314.kmer_0.dat:md5,4a0bcb71b120f6a6949b7969292ef2e7",
+ "2415186086593376314.pos_0.dat:md5,bc2875e4cc4017707306565e396839ef",
+ "2415186086593376314.stats:md5,67c9d4c768f28a450fc82a2b5d43db5c"
+ ]
+ ]
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-02-26T14:24:14.272659781"
+ },
+ "sarscov2 single_end_premade_index_match_stub": {
+ "content": [
+ [
+ "test.non_rRNA.fastq.gz",
+ "test.sortmerna.log",
+ "{id=test}"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-02-26T15:05:01.913287272"
+ },
+ "sarscov2 single_end-for_stub_match": {
+ "content": [
+ [
+ "test.non_rRNA.fastq.gz",
+ "test.sortmerna.log",
+ "{id=test}"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-02-07T12:28:29.197913"
+ },
+ "sarscov2 paired_end_match": {
+ "content": [
+ [
+ "{id=test}",
+ [
+ "test_1.non_rRNA.fastq.gz",
+ "test_2.non_rRNA.fastq.gz"
+ ],
+ "test.sortmerna.log"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-02-07T12:28:49.914992"
+ },
+ "versions_single_end": {
+ "content": [
+ [
+ "versions.yml:md5,7df9d50209f351e1f75e05a1fad6ba4b"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-02-26T14:36:27.14244294"
+ },
+ "versions_index_only": {
+ "content": [
+ [
+ "versions.yml:md5,7df9d50209f351e1f75e05a1fad6ba4b"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-02-26T15:00:35.609161481"
+ },
+ "versions_single_end_premade_index_stub": {
+ "content": [
+ [
+ "versions.yml:md5,7df9d50209f351e1f75e05a1fad6ba4b"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-02-26T15:05:02.059858431"
+ },
+ "index_single_end_stub": {
+ "content": [
+ [
+ [
+ {
+ "id": "test2"
+ },
+ [
+
+ ]
+ ]
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-02-26T14:23:58.068772508"
+ },
+ "versions_index_only_stub": {
+ "content": [
+ [
+ "versions.yml:md5,7df9d50209f351e1f75e05a1fad6ba4b"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-02-26T15:00:47.169402699"
+ },
+ "index_single_end_premade_index_stub": {
+ "content": [
+ [
+ [
+ {
+ "id": "test2"
+ },
+ [
+
+ ]
+ ]
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-02-26T15:05:01.953316205"
+ },
+ "index_single_end": {
+ "content": [
+ [
+ [
+ {
+ "id": "test2"
+ },
+ [
+ "2415186086593376314.bursttrie_0.dat:md5,74f7f020e8d46e24a8a2e9c5fbcd564a",
+ "2415186086593376314.kmer_0.dat:md5,4a0bcb71b120f6a6949b7969292ef2e7",
+ "2415186086593376314.pos_0.dat:md5,bc2875e4cc4017707306565e396839ef",
+ "2415186086593376314.stats:md5,67c9d4c768f28a450fc82a2b5d43db5c"
+ ]
+ ]
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-02-26T14:36:26.88061978"
+ },
+ "index_index_only": {
+ "content": [
+ [
+ [
+ {
+ "id": "test2"
+ },
+ [
+ "2415186086593376314.bursttrie_0.dat:md5,74f7f020e8d46e24a8a2e9c5fbcd564a",
+ "2415186086593376314.kmer_0.dat:md5,4a0bcb71b120f6a6949b7969292ef2e7",
+ "2415186086593376314.pos_0.dat:md5,bc2875e4cc4017707306565e396839ef",
+ "2415186086593376314.stats:md5,67c9d4c768f28a450fc82a2b5d43db5c"
+ ]
+ ]
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-02-26T15:00:35.34089252"
+ },
+ "sarscov2 single_end_premade_index_match": {
+ "content": [
+ [
+ "test.non_rRNA.fastq.gz",
"test.sortmerna.log",
- "{id=test, single_end=false}"
+ "{id=test}"
]
],
- "timestamp": "2023-12-21T12:00:47.879193"
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-02-26T15:01:53.797737296"
}
}
\ No newline at end of file
diff --git a/modules/nf-core/sortmerna/tests/premade_index.config b/modules/nf-core/sortmerna/tests/premade_index.config
new file mode 100644
index 0000000..ab86d2e
--- /dev/null
+++ b/modules/nf-core/sortmerna/tests/premade_index.config
@@ -0,0 +1,8 @@
+process {
+ withName: 'SORTMERNA_INDEX' {
+ ext.args = '--index 1'
+ }
+ withName: 'SORTMERNA' {
+ ext.args = '--index 0'
+ }
+}
diff --git a/modules/nf-core/star/align/environment.yml b/modules/nf-core/star/align/environment.yml
index 36fcd02..8bd58cf 100644
--- a/modules/nf-core/star/align/environment.yml
+++ b/modules/nf-core/star/align/environment.yml
@@ -6,4 +6,5 @@ channels:
dependencies:
- bioconda::star=2.7.10a
- bioconda::samtools=1.18
+ - bioconda::htslib=1.18
- conda-forge::gawk=5.1.0
diff --git a/modules/nf-core/star/align/tests/main.nf.test b/modules/nf-core/star/align/tests/main.nf.test
index 4c87847..6ecd778 100644
--- a/modules/nf-core/star/align/tests/main.nf.test
+++ b/modules/nf-core/star/align/tests/main.nf.test
@@ -7,39 +7,40 @@ nextflow_process {
tag "modules_nfcore"
tag "star"
tag "star/align"
+ tag "star/genomegenerate"
- test("homo_sapiens - single_end") {
- config "./nextflow.config"
-
- setup {
- run("STAR_GENOMEGENERATE") {
- script "../../../star/genomegenerate/main.nf"
- process {
- """
- input[0] = Channel.of([
- [ id:'test_fasta' ],
- [file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)]
- ])
- input[1] = Channel.of([
- [ id:'test_gtf' ],
- [file(params.test_data['homo_sapiens']['genome']['genome_gtf'], checkIfExists: true)]
- ])
- """
- }
+ setup {
+ run("STAR_GENOMEGENERATE") {
+ script "../../../star/genomegenerate/main.nf"
+ process {
+ """
+ input[0] = Channel.of([
+ [ id:'test_fasta' ],
+ [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ]
+ ])
+ input[1] = Channel.of([
+ [ id:'test_gtf' ],
+ [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ]
+ ])
+ """
}
}
+ }
+
+ test("homo_sapiens - single_end") {
+ config "./nextflow.config"
when {
process {
"""
input[0] = Channel.of([
[ id:'test', single_end:true ], // meta map
- [ file(params.test_data['homo_sapiens']['illumina']['test_rnaseq_1_fastq_gz'], checkIfExists: true) ]
+ [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_1.fastq.gz', checkIfExists: true) ]
])
input[1] = STAR_GENOMEGENERATE.out.index
input[2] = Channel.of([
[ id:'test_gtf' ],
- [file(params.test_data['homo_sapiens']['genome']['genome_gtf'], checkIfExists: true)]
+ [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ]
])
input[3] = false
input[4] = 'illumina'
@@ -74,38 +75,20 @@ nextflow_process {
test("homo_sapiens - paired_end") {
config "./nextflow.config"
- setup {
- run("STAR_GENOMEGENERATE") {
- script "../../../star/genomegenerate/main.nf"
- process {
- """
- input[0] = Channel.of([
- [ id:'test_fasta' ],
- [file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)]
- ])
- input[1] = Channel.of([
- [ id:'test_gtf' ],
- [file(params.test_data['homo_sapiens']['genome']['genome_gtf'], checkIfExists: true)]
- ])
- """
- }
- }
- }
-
when {
process {
"""
input[0] = Channel.of([
[ id:'test', single_end:false ], // meta map
[
- file(params.test_data['homo_sapiens']['illumina']['test_rnaseq_1_fastq_gz'], checkIfExists: true),
- file(params.test_data['homo_sapiens']['illumina']['test_rnaseq_2_fastq_gz'], checkIfExists: true)
+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_1.fastq.gz', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_2.fastq.gz', checkIfExists: true)
]
])
input[1] = STAR_GENOMEGENERATE.out.index
input[2] = Channel.of([
[ id:'test_gtf' ],
- [file(params.test_data['homo_sapiens']['genome']['genome_gtf'], checkIfExists: true)]
+ [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ]
])
input[3] = false
input[4] = 'illumina'
@@ -140,38 +123,20 @@ nextflow_process {
test("homo_sapiens - paired_end - arriba") {
config "./nextflow.arriba.config"
- setup {
- run("STAR_GENOMEGENERATE") {
- script "../../../star/genomegenerate/main.nf"
- process {
- """
- input[0] = Channel.of([
- [ id:'test_fasta' ],
- [file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)]
- ])
- input[1] = Channel.of([
- [ id:'test_gtf' ],
- [file(params.test_data['homo_sapiens']['genome']['genome_gtf'], checkIfExists: true)]
- ])
- """
- }
- }
- }
-
when {
process {
"""
input[0] = Channel.of([
[ id:'test', single_end:false ], // meta map
[
- file(params.test_data['homo_sapiens']['illumina']['test_rnaseq_1_fastq_gz'], checkIfExists: true),
- file(params.test_data['homo_sapiens']['illumina']['test_rnaseq_2_fastq_gz'], checkIfExists: true)
+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_1.fastq.gz', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_2.fastq.gz', checkIfExists: true)
]
])
input[1] = STAR_GENOMEGENERATE.out.index
input[2] = Channel.of([
[ id:'test_gtf' ],
- [file(params.test_data['homo_sapiens']['genome']['genome_gtf'], checkIfExists: true)]
+ [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ]
])
input[3] = false
input[4] = 'illumina'
@@ -206,38 +171,20 @@ nextflow_process {
test("homo_sapiens - paired_end - starfusion") {
config "./nextflow.starfusion.config"
- setup {
- run("STAR_GENOMEGENERATE") {
- script "../../../star/genomegenerate/main.nf"
- process {
- """
- input[0] = Channel.of([
- [ id:'test_fasta' ],
- [file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)]
- ])
- input[1] = Channel.of([
- [ id:'test_gtf' ],
- [file(params.test_data['homo_sapiens']['genome']['genome_gtf'], checkIfExists: true)]
- ])
- """
- }
- }
- }
-
when {
process {
"""
input[0] = Channel.of([
[ id:'test', single_end:false ], // meta map
[
- file(params.test_data['homo_sapiens']['illumina']['test_rnaseq_1_fastq_gz'], checkIfExists: true),
- file(params.test_data['homo_sapiens']['illumina']['test_rnaseq_2_fastq_gz'], checkIfExists: true)
+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_1.fastq.gz', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_2.fastq.gz', checkIfExists: true)
]
])
input[1] = STAR_GENOMEGENERATE.out.index
input[2] = Channel.of([
[ id:'test_gtf' ],
- [file(params.test_data['homo_sapiens']['genome']['genome_gtf'], checkIfExists: true)]
+ [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ]
])
input[3] = false
input[4] = 'illumina'
@@ -272,40 +219,22 @@ nextflow_process {
test("homo_sapiens - paired_end - multiple") {
config "./nextflow.config"
- setup {
- run("STAR_GENOMEGENERATE") {
- script "../../../star/genomegenerate/main.nf"
- process {
- """
- input[0] = Channel.of([
- [ id:'test_fasta' ],
- [file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)]
- ])
- input[1] = Channel.of([
- [ id:'test_gtf' ],
- [file(params.test_data['homo_sapiens']['genome']['genome_gtf'], checkIfExists: true)]
- ])
- """
- }
- }
- }
-
when {
process {
"""
input[0] = Channel.of([
[ id:'test', single_end:false ], // meta map
[
- file(params.test_data['homo_sapiens']['illumina']['test_rnaseq_1_fastq_gz'], checkIfExists: true),
- file(params.test_data['homo_sapiens']['illumina']['test_rnaseq_2_fastq_gz'], checkIfExists: true),
- file(params.test_data['homo_sapiens']['illumina']['test_rnaseq_1_fastq_gz'], checkIfExists: true),
- file(params.test_data['homo_sapiens']['illumina']['test_rnaseq_2_fastq_gz'], checkIfExists: true)
+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_1.fastq.gz', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_2.fastq.gz', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_1.fastq.gz', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/illumina/fastq/test_rnaseq_2.fastq.gz', checkIfExists: true)
]
])
input[1] = STAR_GENOMEGENERATE.out.index
input[2] = Channel.of([
[ id:'test_gtf' ],
- [file(params.test_data['homo_sapiens']['genome']['genome_gtf'], checkIfExists: true)]
+ [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ]
])
input[3] = false
input[4] = 'illumina'
diff --git a/modules/nf-core/star/genomegenerate/environment.yml b/modules/nf-core/star/genomegenerate/environment.yml
index 93e4476..791f255 100644
--- a/modules/nf-core/star/genomegenerate/environment.yml
+++ b/modules/nf-core/star/genomegenerate/environment.yml
@@ -1,11 +1,10 @@
name: star_genomegenerate
-
channels:
- conda-forge
- bioconda
- defaults
-
dependencies:
- bioconda::samtools=1.18
+ - bioconda::htslib=1.18
- bioconda::star=2.7.10a
- conda-forge::gawk=5.1.0
diff --git a/modules/nf-core/star/genomegenerate/tests/main.nf.test b/modules/nf-core/star/genomegenerate/tests/main.nf.test
index af0c942..c17c8ba 100644
--- a/modules/nf-core/star/genomegenerate/tests/main.nf.test
+++ b/modules/nf-core/star/genomegenerate/tests/main.nf.test
@@ -8,18 +8,18 @@ nextflow_process {
tag "star"
tag "star/genomegenerate"
- test("homo_sapiens") {
+ test("fasta_gtf") {
when {
process {
"""
input[0] = Channel.of([
[ id:'test_fasta' ],
- [file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)]
+ [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ]
])
input[1] = Channel.of([
[ id:'test_gtf' ],
- [file(params.test_data['homo_sapiens']['genome']['genome_gtf'], checkIfExists: true)]
+ [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ]
])
"""
}
@@ -28,14 +28,13 @@ nextflow_process {
then {
assertAll(
{ assert process.success },
- { assert snapshot(file(process.out.index[0][1]).listFiles().collect { it.getName() }.sort().toString()).match("index_with_gtf") },
- { assert snapshot(process.out.versions).match("versions") }
+ { assert snapshot(file(process.out.index[0][1]).listFiles().collect { it.getName() }.sort().toString()).match("fasta_gtf_index") },
+ { assert snapshot(process.out.versions).match("fasta_gtf_versions") }
)
}
-
}
- test("homo_sapiens-stub") {
+ test("fasta_gtf_stub") {
options '-stub'
@@ -44,11 +43,11 @@ nextflow_process {
"""
input[0] = Channel.of([
[ id:'test_fasta' ],
- [file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)]
+ [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ]
])
input[1] = Channel.of([
[ id:'test_gtf' ],
- [file(params.test_data['homo_sapiens']['genome']['genome_gtf'], checkIfExists: true)]
+ [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.gtf', checkIfExists: true) ]
])
"""
}
@@ -57,21 +56,20 @@ nextflow_process {
then {
assertAll(
{ assert process.success },
- { assert snapshot(file(process.out.index[0][1]).listFiles().collect { it.getName() }.sort().toString()).match("index_with_gtf") },
- { assert snapshot(process.out.versions).match("versions") }
+ { assert snapshot(file(process.out.index[0][1]).listFiles().collect { it.getName() }.sort().toString()).match("fasta_gtf_stub_index") },
+ { assert snapshot(process.out.versions).match("fasta_gtf_stub_versions") }
)
}
-
}
- test("homo_sapiens-without_gtf") {
+ test("fasta") {
when {
process {
"""
input[0] = Channel.of([
[ id:'test_fasta' ],
- [file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)]
+ [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ]
])
input[1] = Channel.of([ [], [] ])
"""
@@ -81,14 +79,14 @@ nextflow_process {
then {
assertAll(
{ assert process.success },
- { assert snapshot(file(process.out.index[0][1]).listFiles().collect { it.getName() }.sort().toString()).match("index_without_gtf") },
- { assert snapshot(process.out.versions).match("versions") }
+ { assert snapshot(file(process.out.index[0][1]).listFiles().collect { it.getName() }.sort().toString()).match("fasta_index") },
+ { assert snapshot(process.out.versions).match("fasta_versions") }
)
}
}
- test("homo_sapiens-without_gtf-stub") {
+ test("fasta_stub") {
options '-stub'
@@ -97,7 +95,7 @@ nextflow_process {
"""
input[0] = Channel.of([
[ id:'test_fasta' ],
- [file(params.test_data['homo_sapiens']['genome']['genome_fasta'], checkIfExists: true)]
+ [ file(params.modules_testdata_base_path + 'genomics/homo_sapiens/genome/genome.fasta', checkIfExists: true) ]
])
input[1] = Channel.of([ [], [] ])
"""
@@ -107,11 +105,11 @@ nextflow_process {
then {
assertAll(
{ assert process.success },
- { assert snapshot(file(process.out.index[0][1]).listFiles().collect { it.getName() }.sort().toString()).match("index_without_gtf") },
- { assert snapshot(process.out.versions).match("versions") }
+ { assert snapshot(file(process.out.index[0][1]).listFiles().collect { it.getName() }.sort().toString()).match("fasta_stub_index") },
+ { assert snapshot(process.out.versions).match("fasta_stub_versions") }
)
}
}
-}
\ No newline at end of file
+}
diff --git a/modules/nf-core/star/genomegenerate/tests/main.nf.test.snap b/modules/nf-core/star/genomegenerate/tests/main.nf.test.snap
index 9de08c7..5653d6e 100644
--- a/modules/nf-core/star/genomegenerate/tests/main.nf.test.snap
+++ b/modules/nf-core/star/genomegenerate/tests/main.nf.test.snap
@@ -1,22 +1,90 @@
{
- "versions": {
+ "fasta_gtf_versions": {
"content": [
[
"versions.yml:md5,46b8f1f34bb7f23892cd1eb249ed4d7f"
]
],
- "timestamp": "2023-12-19T11:05:51.741109"
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.04.3"
+ },
+ "timestamp": "2024-02-01T15:54:31.798555"
},
- "index_with_gtf": {
+ "fasta_stub_versions": {
+ "content": [
+ [
+ "versions.yml:md5,46b8f1f34bb7f23892cd1eb249ed4d7f"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.04.3"
+ },
+ "timestamp": "2024-02-01T15:55:07.521209"
+ },
+ "fasta_gtf_stub_index": {
+ "content": [
+ "[Genome, Log.out, SA, SAindex, chrLength.txt, chrName.txt, chrNameLength.txt, chrStart.txt, exonGeTrInfo.tab, exonInfo.tab, geneInfo.tab, genomeParameters.txt, sjdbInfo.txt, sjdbList.fromGTF.out.tab, sjdbList.out.tab, transcriptInfo.tab]"
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.04.3"
+ },
+ "timestamp": "2024-02-01T15:54:46.478098"
+ },
+ "fasta_gtf_stub_versions": {
+ "content": [
+ [
+ "versions.yml:md5,46b8f1f34bb7f23892cd1eb249ed4d7f"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.04.3"
+ },
+ "timestamp": "2024-02-01T15:54:46.491657"
+ },
+ "fasta_index": {
+ "content": [
+ "[Genome, Log.out, SA, SAindex, chrLength.txt, chrName.txt, chrNameLength.txt, chrStart.txt, genomeParameters.txt]"
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.04.3"
+ },
+ "timestamp": "2024-02-01T15:54:57.552329"
+ },
+ "fasta_versions": {
+ "content": [
+ [
+ "versions.yml:md5,46b8f1f34bb7f23892cd1eb249ed4d7f"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.04.3"
+ },
+ "timestamp": "2024-02-01T15:54:57.560541"
+ },
+ "fasta_gtf_index": {
"content": [
"[Genome, Log.out, SA, SAindex, chrLength.txt, chrName.txt, chrNameLength.txt, chrStart.txt, exonGeTrInfo.tab, exonInfo.tab, geneInfo.tab, genomeParameters.txt, sjdbInfo.txt, sjdbList.fromGTF.out.tab, sjdbList.out.tab, transcriptInfo.tab]"
],
- "timestamp": "2023-12-19T11:38:14.551548"
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.04.3"
+ },
+ "timestamp": "2024-02-01T15:54:31.786814"
},
- "index_without_gtf": {
+ "fasta_stub_index": {
"content": [
"[Genome, Log.out, SA, SAindex, chrLength.txt, chrName.txt, chrNameLength.txt, chrStart.txt, genomeParameters.txt]"
],
- "timestamp": "2023-12-19T11:38:22.382905"
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.04.3"
+ },
+ "timestamp": "2024-02-01T15:55:07.517472"
}
}
\ No newline at end of file
diff --git a/modules/nf-core/umitools/extract/environment.yml b/modules/nf-core/umitools/extract/environment.yml
index 7d08ac0..aab452d 100644
--- a/modules/nf-core/umitools/extract/environment.yml
+++ b/modules/nf-core/umitools/extract/environment.yml
@@ -4,4 +4,4 @@ channels:
- bioconda
- defaults
dependencies:
- - bioconda::umi_tools=1.1.4
+ - bioconda::umi_tools=1.1.5
diff --git a/modules/nf-core/umitools/extract/main.nf b/modules/nf-core/umitools/extract/main.nf
index 4bd79e7..8719e5f 100644
--- a/modules/nf-core/umitools/extract/main.nf
+++ b/modules/nf-core/umitools/extract/main.nf
@@ -5,8 +5,8 @@ process UMITOOLS_EXTRACT {
conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
- 'https://depot.galaxyproject.org/singularity/umi_tools:1.1.4--py38hbff2b2d_1' :
- 'biocontainers/umi_tools:1.1.4--py38hbff2b2d_1' }"
+ 'https://depot.galaxyproject.org/singularity/umi_tools:1.1.5--py39hf95cd2a_0' :
+ 'biocontainers/umi_tools:1.1.5--py39hf95cd2a_0' }"
input:
tuple val(meta), path(reads)
diff --git a/modules/nf-core/umitools/extract/tests/main.nf.test b/modules/nf-core/umitools/extract/tests/main.nf.test
index 22242d1..2a8eba1 100644
--- a/modules/nf-core/umitools/extract/tests/main.nf.test
+++ b/modules/nf-core/umitools/extract/tests/main.nf.test
@@ -12,24 +12,20 @@ nextflow_process {
test("Should run without failures") {
when {
- params {
- outdir = "$outputDir"
- }
process {
"""
input[0] = [ [ id:'test', single_end:true ], // meta map
- [ file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true) ]
- ]
+ [ file(params.modules_testdata_base_path + "genomics/sarscov2/illumina/fastq/test_1.fastq.gz", checkIfExists: true) ]
+ ]
"""
}
}
then {
assertAll (
- { assert process.success },
- { assert snapshot(process.out.versions).match("versions") }
+ { assert process.success },
+ { assert snapshot(process.out.versions).match("versions") }
)
}
-
}
}
\ No newline at end of file
diff --git a/modules/nf-core/umitools/extract/tests/main.nf.test.snap b/modules/nf-core/umitools/extract/tests/main.nf.test.snap
index 6d5944f..bf82701 100644
--- a/modules/nf-core/umitools/extract/tests/main.nf.test.snap
+++ b/modules/nf-core/umitools/extract/tests/main.nf.test.snap
@@ -2,9 +2,13 @@
"versions": {
"content": [
[
- "versions.yml:md5,5a18da2d3a5a4de15e7aaae9082d7abb"
+ "versions.yml:md5,568d243174c081a0301e74ed42e59b48"
]
],
- "timestamp": "2023-12-08T09:41:43.540658352"
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-02-16T10:01:33.326046137"
}
}
\ No newline at end of file
diff --git a/modules/nf-core/umitools/extract/tests/nextflow.config b/modules/nf-core/umitools/extract/tests/nextflow.config
index 628f5fc..c866f5a 100644
--- a/modules/nf-core/umitools/extract/tests/nextflow.config
+++ b/modules/nf-core/umitools/extract/tests/nextflow.config
@@ -1,7 +1,7 @@
process {
publishDir = { "${params.outdir}/${task.process.tokenize(':')[-1].tokenize('_')[0].toLowerCase()}" }
-
+
withName: UMITOOLS_EXTRACT {
ext.args = '--bc-pattern="NNNN"'
}
diff --git a/modules/pfr/agat/spfilterfeaturefromkilllist/environment.yml b/modules/pfr/agat/spfilterfeaturefromkilllist/environment.yml
new file mode 100644
index 0000000..b0811b4
--- /dev/null
+++ b/modules/pfr/agat/spfilterfeaturefromkilllist/environment.yml
@@ -0,0 +1,9 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
+name: "agat_spfilterfeaturefromkilllist"
+channels:
+ - conda-forge
+ - bioconda
+ - defaults
+dependencies:
+ - "bioconda::agat=1.3.3"
diff --git a/modules/pfr/agat/spfilterfeaturefromkilllist/main.nf b/modules/pfr/agat/spfilterfeaturefromkilllist/main.nf
new file mode 100644
index 0000000..ffb91d1
--- /dev/null
+++ b/modules/pfr/agat/spfilterfeaturefromkilllist/main.nf
@@ -0,0 +1,53 @@
+process AGAT_SPFILTERFEATUREFROMKILLLIST {
+ tag "$meta.id"
+ label 'process_single'
+
+ conda "${moduleDir}/environment.yml"
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'https://depot.galaxyproject.org/singularity/agat:1.3.3--pl5321hdfd78af_0':
+ 'biocontainers/agat:1.3.3--pl5321hdfd78af_0' }"
+
+ input:
+ tuple val(meta), path(gff)
+ path kill_list
+ path config
+
+ output:
+ tuple val(meta), path("*.gff"), emit: gff
+ path "versions.yml" , emit: versions
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ script:
+ def args = task.ext.args ?: ''
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ def config_param = config ? "--config $config" : ''
+ if( "$gff" == "${prefix}.gff" ) error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!"
+ """
+ agat_sp_filter_feature_from_kill_list.pl \\
+ --gff $gff \\
+ --kill_list $kill_list \\
+ $config_param \\
+ $args \\
+ --output "${prefix}.gff"
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ agat: \$(agat_sp_filter_feature_from_kill_list.pl -h | sed -n 's/.*(AGAT) - Version: \\(.*\\) .*/\\1/p')
+ END_VERSIONS
+ """
+
+ stub:
+ def args = task.ext.args ?: ''
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ if( "$gff" == "${prefix}.gff" ) error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!"
+ """
+ touch "${prefix}.gff"
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ agat: \$(agat_sp_filter_feature_from_kill_list.pl -h | sed -n 's/.*(AGAT) - Version: \\(.*\\) .*/\\1/p')
+ END_VERSIONS
+ """
+}
diff --git a/modules/pfr/agat/spfilterfeaturefromkilllist/meta.yml b/modules/pfr/agat/spfilterfeaturefromkilllist/meta.yml
new file mode 100644
index 0000000..d408fe7
--- /dev/null
+++ b/modules/pfr/agat/spfilterfeaturefromkilllist/meta.yml
@@ -0,0 +1,60 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
+name: "agat_spfilterfeaturefromkilllist"
+description: |
+ The script aims to remove features based on a kill list. The default behaviour is to look at the features's ID.
+ If the feature has an ID (case insensitive) listed among the kill list it will be removed. /!\ Removing a level1
+ or level2 feature will automatically remove all linked subfeatures, and removing all children of a feature will
+ automatically remove this feature too.
+keywords:
+ - genomics
+ - gff
+ - remove
+ - feature
+tools:
+ - "agat":
+ description: "Another Gff Analysis Toolkit (AGAT). Suite of tools to handle gene annotations in any GTF/GFF format."
+ homepage: "https://agat.readthedocs.io/en/latest/"
+ documentation: "https://agat.readthedocs.io/en/latest/tools/agat_sp_filter_feature_from_kill_list.html"
+ tool_dev_url: "https://github.com/NBISweden/AGAT"
+ doi: "10.5281/zenodo.3552717"
+ licence: ["GPL v3"]
+input:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. `[ id:'sample1', single_end:false ]`
+ - gff:
+ type: file
+ description: Input GFF3 file that will be read
+ pattern: "*.{gff,gff3}"
+ - kill_list:
+ type: file
+ description: Kill list. One value per line.
+ pattern: "*.txt"
+ - config:
+ type: file
+ description: |
+ Input agat config file. By default AGAT takes as input agat_config.yaml file from the working directory if any, otherwise it takes the orignal agat_config.yaml shipped with AGAT. To get the agat_config.yaml locally type: "agat config --expose". The --config option gives you the possibility to use your own AGAT config file (located elsewhere or named differently).
+ pattern: "*.yaml"
+
+output:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. `[ id:'sample1', single_end:false ]
+ - versions:
+ type: file
+ description: File containing software versions
+ pattern: "versions.yml"
+ - gff:
+ type: file
+ description: Output GFF file.
+ pattern: "*.gff"
+
+authors:
+ - "@GallVp"
+maintainers:
+ - "@GallVp"
diff --git a/modules/pfr/agat/spfilterfeaturefromkilllist/tests/main.nf.test b/modules/pfr/agat/spfilterfeaturefromkilllist/tests/main.nf.test
new file mode 100644
index 0000000..891e0a7
--- /dev/null
+++ b/modules/pfr/agat/spfilterfeaturefromkilllist/tests/main.nf.test
@@ -0,0 +1,74 @@
+nextflow_process {
+
+ name "Test Process AGAT_SPFILTERFEATUREFROMKILLLIST"
+ script "../main.nf"
+ process "AGAT_SPFILTERFEATUREFROMKILLLIST"
+
+ tag "modules"
+ tag "modules_nfcore"
+ tag "agat"
+ tag "agat/spfilterfeaturefromkilllist"
+
+ test("sarscov2-genome_gff3") {
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test' ], // meta map
+ file(params.test_data['sarscov2']['genome']['genome_gff3'], checkIfExists: true)
+ ]
+
+ def kill_list = "unknown_transcript_1"
+ def kill_list_file = new File('kill.list.txt')
+ kill_list_file.text = kill_list
+
+ input[1] = kill_list_file.toPath()
+
+ input[2] = []
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+
+ }
+
+ test("sarscov2-genome_gff3-stub") {
+
+ options "-stub"
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test' ], // meta map
+ file(params.test_data['sarscov2']['genome']['genome_gff3'], checkIfExists: true)
+ ]
+
+ def kill_list = "unknown_transcript_1"
+ def kill_list_file = new File('kill.list.txt')
+ kill_list_file.text = kill_list
+
+ input[1] = kill_list_file.toPath()
+
+ input[2] = []
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+
+ }
+
+}
\ No newline at end of file
diff --git a/modules/pfr/agat/spfilterfeaturefromkilllist/tests/main.nf.test.snap b/modules/pfr/agat/spfilterfeaturefromkilllist/tests/main.nf.test.snap
new file mode 100644
index 0000000..bbc8cea
--- /dev/null
+++ b/modules/pfr/agat/spfilterfeaturefromkilllist/tests/main.nf.test.snap
@@ -0,0 +1,68 @@
+{
+ "sarscov2-genome_gff3-stub": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test"
+ },
+ "test.gff:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "1": [
+ "versions.yml:md5,ace24108f514da465e068372b18d4651"
+ ],
+ "gff": [
+ [
+ {
+ "id": "test"
+ },
+ "test.gff:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,ace24108f514da465e068372b18d4651"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-04-02T13:29:56.638311"
+ },
+ "sarscov2-genome_gff3": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test"
+ },
+ "test.gff:md5,df19e1b84ba6f691d20c72b397c88abf"
+ ]
+ ],
+ "1": [
+ "versions.yml:md5,ace24108f514da465e068372b18d4651"
+ ],
+ "gff": [
+ [
+ {
+ "id": "test"
+ },
+ "test.gff:md5,df19e1b84ba6f691d20c72b397c88abf"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,ace24108f514da465e068372b18d4651"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-04-02T13:11:42.236263"
+ }
+}
\ No newline at end of file
diff --git a/modules/pfr/agat/spfilterfeaturefromkilllist/tests/tags.yml b/modules/pfr/agat/spfilterfeaturefromkilllist/tests/tags.yml
new file mode 100644
index 0000000..2ab17b0
--- /dev/null
+++ b/modules/pfr/agat/spfilterfeaturefromkilllist/tests/tags.yml
@@ -0,0 +1,2 @@
+agat/spfilterfeaturefromkilllist:
+ - "modules/pfr/agat/spfilterfeaturefromkilllist/**"
diff --git a/modules/pfr/agat/spmergeannotations/environment.yml b/modules/pfr/agat/spmergeannotations/environment.yml
new file mode 100644
index 0000000..6df7aea
--- /dev/null
+++ b/modules/pfr/agat/spmergeannotations/environment.yml
@@ -0,0 +1,9 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
+name: "agat_spmergeannotations"
+channels:
+ - conda-forge
+ - bioconda
+ - defaults
+dependencies:
+ - "bioconda::agat=1.3.3"
diff --git a/modules/pfr/agat/spmergeannotations/main.nf b/modules/pfr/agat/spmergeannotations/main.nf
new file mode 100644
index 0000000..7738ac5
--- /dev/null
+++ b/modules/pfr/agat/spmergeannotations/main.nf
@@ -0,0 +1,53 @@
+process AGAT_SPMERGEANNOTATIONS {
+ tag "$meta.id"
+ label 'process_single'
+
+ conda "${moduleDir}/environment.yml"
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'https://depot.galaxyproject.org/singularity/agat:1.3.3--pl5321hdfd78af_0':
+ 'biocontainers/agat:1.3.3--pl5321hdfd78af_0' }"
+
+ input:
+ tuple val(meta), path(gffs)
+ path config
+
+ output:
+ tuple val(meta), path("*.gff") , emit: gff
+ path "versions.yml" , emit: versions
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ script:
+ def args = task.ext.args ?: ''
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ def config_param = config ? "--config $config" : ''
+ def gff_param = "$gffs".split(' ').collect { "--gff $it" }.join(' ')
+ def file_names = gffs.collect { "$it" }
+ if ( file_names.contains ( "${prefix}.gff" ) ) error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!"
+ """
+ agat_sp_merge_annotations.pl \\
+ $gff_param \\
+ $config_param \\
+ $args \\
+ --output ${prefix}.gff
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ agat: \$(agat_sp_merge_annotations.pl -h | sed -n 's/.*(AGAT) - Version: \\(.*\\) .*/\\1/p')
+ END_VERSIONS
+ """
+
+ stub:
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ def file_names = gffs.collect { "$it" }
+ if ( file_names.contains ( "${prefix}.gff" ) ) error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!"
+ """
+ touch ${prefix}.gff
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ agat: \$(agat_sp_merge_annotations.pl -h | sed -n 's/.*(AGAT) - Version: \\(.*\\) .*/\\1/p')
+ END_VERSIONS
+ """
+}
diff --git a/modules/pfr/agat/spmergeannotations/meta.yml b/modules/pfr/agat/spmergeannotations/meta.yml
new file mode 100644
index 0000000..afa9ddd
--- /dev/null
+++ b/modules/pfr/agat/spmergeannotations/meta.yml
@@ -0,0 +1,54 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
+name: "agat_spmergeannotations"
+description: |
+ This script merge different gff annotation files in one. It uses the AGAT parser that takes care of duplicated names and fixes other oddities met in those files.
+keywords:
+ - genomics
+ - gff
+ - merge
+ - combine
+tools:
+ - "agat":
+ description: "Another Gff Analysis Toolkit (AGAT). Suite of tools to handle gene annotations in any GTF/GFF format."
+ homepage: "https://agat.readthedocs.io/en/latest/"
+ documentation: "https://agat.readthedocs.io/en/latest/tools/agat_sp_merge_annotations.html"
+ tool_dev_url: "https://github.com/NBISweden/AGAT"
+ doi: "10.5281/zenodo.3552717"
+ licence: ["GPL v3"]
+input:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. `[ id:'sample1' ]`
+ - gffs:
+ type: list
+ description: A list of GFFs to merge
+ pattern: "[ *.{gff,gff3} ]"
+ - config:
+ type: file
+ description: |
+ Input agat config file. By default AGAT takes as input agat_config.yaml file from the working directory if any,
+ otherwise it takes the orignal agat_config.yaml shipped with AGAT. To get the agat_config.yaml
+ locally type: "agat config --expose". The --config option gives you the possibility to use your
+ own AGAT config file (located elsewhere or named differently).
+ pattern: "*.yaml"
+output:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. `[ id:'sample1' ]`
+ - versions:
+ type: file
+ description: File containing software versions
+ pattern: "versions.yml"
+ - gff:
+ type: file
+ description: Output GFF file.
+ pattern: "*.gff"
+authors:
+ - "@GallVp"
+maintainers:
+ - "@GallVp"
diff --git a/modules/pfr/agat/spmergeannotations/tests/main.nf.test b/modules/pfr/agat/spmergeannotations/tests/main.nf.test
new file mode 100644
index 0000000..5e25599
--- /dev/null
+++ b/modules/pfr/agat/spmergeannotations/tests/main.nf.test
@@ -0,0 +1,71 @@
+nextflow_process {
+
+ name "Test Process AGAT_SPMERGEANNOTATIONS"
+ script "../main.nf"
+ process "AGAT_SPMERGEANNOTATIONS"
+
+ tag "modules"
+ tag "modules_nfcore"
+ tag "agat"
+ tag "agat/spmergeannotations"
+
+ test("candidatus_portiera_aleyrodidarum-multi_gffs") {
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test' ], // meta map
+ [
+ file(params.test_data['candidatus_portiera_aleyrodidarum']['genome']['test1_gff'], checkIfExists: true),
+ file(params.test_data['candidatus_portiera_aleyrodidarum']['genome']['test2_gff'], checkIfExists: true),
+ file(params.test_data['candidatus_portiera_aleyrodidarum']['genome']['test3_gff'], checkIfExists: true),
+ ]
+ ]
+
+ input[1] = []
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert path(process.out.gff[0][1]).text.contains('AGAT gene') },
+ { assert snapshot(process.out.versions).match("versions") }
+ )
+ }
+
+ }
+
+ test("candidatus_portiera_aleyrodidarum-multi_gffs-stub") {
+
+ options '-stub'
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test' ], // meta map
+ [
+ file(params.test_data['candidatus_portiera_aleyrodidarum']['genome']['test1_gff'], checkIfExists: true),
+ file(params.test_data['candidatus_portiera_aleyrodidarum']['genome']['test2_gff'], checkIfExists: true),
+ file(params.test_data['candidatus_portiera_aleyrodidarum']['genome']['test3_gff'], checkIfExists: true),
+ ]
+ ]
+
+ input[1] = []
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+
+ }
+
+}
\ No newline at end of file
diff --git a/modules/pfr/agat/spmergeannotations/tests/main.nf.test.snap b/modules/pfr/agat/spmergeannotations/tests/main.nf.test.snap
new file mode 100644
index 0000000..c7e2154
--- /dev/null
+++ b/modules/pfr/agat/spmergeannotations/tests/main.nf.test.snap
@@ -0,0 +1,47 @@
+{
+ "versions": {
+ "content": [
+ [
+ "versions.yml:md5,0ae449590befbaac9269ad8a7a84b66d"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-04-02T17:08:15.459625"
+ },
+ "candidatus_portiera_aleyrodidarum-multi_gffs-stub": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test"
+ },
+ "test.gff:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "1": [
+ "versions.yml:md5,0ae449590befbaac9269ad8a7a84b66d"
+ ],
+ "gff": [
+ [
+ {
+ "id": "test"
+ },
+ "test.gff:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,0ae449590befbaac9269ad8a7a84b66d"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-04-02T17:08:20.581403"
+ }
+}
\ No newline at end of file
diff --git a/modules/pfr/agat/spmergeannotations/tests/tags.yml b/modules/pfr/agat/spmergeannotations/tests/tags.yml
new file mode 100644
index 0000000..7d9b839
--- /dev/null
+++ b/modules/pfr/agat/spmergeannotations/tests/tags.yml
@@ -0,0 +1,2 @@
+agat/spmergeannotations:
+ - "modules/pfr/agat/spmergeannotations/**"
diff --git a/modules/pfr/custom/restoregffids/tests/main.nf.test b/modules/pfr/custom/restoregffids/tests/main.nf.test
index 521b924..cc374b7 100644
--- a/modules/pfr/custom/restoregffids/tests/main.nf.test
+++ b/modules/pfr/custom/restoregffids/tests/main.nf.test
@@ -60,4 +60,4 @@ nextflow_process {
}
-}
+}
\ No newline at end of file
diff --git a/modules/pfr/custom/shortenfastaids/tests/main.nf.test b/modules/pfr/custom/shortenfastaids/tests/main.nf.test
index dc46bae..efff639 100644
--- a/modules/pfr/custom/shortenfastaids/tests/main.nf.test
+++ b/modules/pfr/custom/shortenfastaids/tests/main.nf.test
@@ -128,4 +128,4 @@ nextflow_process {
}
-}
+}
\ No newline at end of file
diff --git a/modules/pfr/edta/edta/tests/main.nf.test b/modules/pfr/edta/edta/tests/main.nf.test
index 3aed0a2..7601876 100644
--- a/modules/pfr/edta/edta/tests/main.nf.test
+++ b/modules/pfr/edta/edta/tests/main.nf.test
@@ -74,4 +74,4 @@ nextflow_process {
}
-}
+}
\ No newline at end of file
diff --git a/modules/pfr/lai/tests/main.nf.test b/modules/pfr/lai/tests/main.nf.test
deleted file mode 100644
index 353043c..0000000
--- a/modules/pfr/lai/tests/main.nf.test
+++ /dev/null
@@ -1,120 +0,0 @@
-nextflow_process {
-
- name "Test Process LAI"
- script "../main.nf"
- process "LAI"
- config "./nextflow.config"
-
- tag "modules"
- tag "modules_nfcore"
- tag "lai"
- tag "gt/suffixerator"
- tag "nf-core/gunzip"
- tag "gt/ltrharvest"
- tag "ltrretriever"
-
- test("homo_sapiens-genome_21_fasta-success") {
-
- setup {
- run("GUNZIP") {
- script "../../../nf-core/gunzip"
-
- process {
- """
- input[0] = [
- [ id:'test' ],
- file('/Users/hrauxr/Projects/nxf-modules/tests/data/chr1.fa.gz', checkIfExists: true)
- ]
- """
- }
- }
-
- run("GT_SUFFIXERATOR") {
- script "../../../pfr/gt/suffixerator"
-
- process {
- """
- input[0] = GUNZIP.out.gunzip
- """
- }
- }
-
- run("GT_LTRHARVEST") {
- script "../../../pfr/gt/ltrharvest"
-
- process {
- """
- input[0] = GT_SUFFIXERATOR.out.index
- """
- }
- }
-
- run("LTRRETRIEVER") {
- script "../../../pfr/ltrretriever"
-
- process {
- """
- input[0] = GUNZIP.out.gunzip
- input[1] = GT_LTRHARVEST.out.tabout.map { meta, tabout -> tabout }
- input[2] = []
- input[3] = []
- input[4] = []
- """
- }
- }
- }
-
- when {
- process {
- """
- input[0] = GUNZIP.out.gunzip
- input[1] = LTRRETRIEVER.out.pass_list.map { meta, pass_list -> pass_list }
- input[2] = LTRRETRIEVER.out.annotation_out.map { meta, annotation_out -> annotation_out }
- input[3] = []
- """
- }
- }
-
- then {
- assertAll(
- { assert process.success },
- { assert path(process.out.log.get(0).get(1)).getText().contains("Dependency checking: Passed!") },
- { assert path(process.out.log.get(0).get(1)).getText().contains("Calculate LAI:") },
- { assert path(process.out.log.get(0).get(1)).getText().contains("Total LTR sequence content (0%) is too low for accurate LAI calculation") },
- { assert path(process.out.log.get(0).get(1)).getText().contains("Sorry, LAI is not applicable on the current genome assembly.") },
- { assert process.out.lai_out == [] },
- { assert snapshot(process.out.versions).match("versions") }
- )
- }
-
- }
-
- test("stub") {
-
- options '-stub'
-
- when {
- process {
- """
- input[0] = [
- [ id:'test' ],
- file(params.test_data['homo_sapiens']['genome']['genome_21_fasta'], checkIfExists: true)
- ]
- input[1] = []
- input[2] = []
- input[3] = []
- """
- }
- }
-
- then {
- assertAll(
- { assert process.success },
- { assert snapshot(process.out).match() },
- { assert snapshot(process.out.versions).match("versions") }
- )
- }
-
- }
-
-}
diff --git a/modules/pfr/lai/tests/main.nf.test.snap b/modules/pfr/lai/tests/main.nf.test.snap
deleted file mode 100644
index 751ddb6..0000000
--- a/modules/pfr/lai/tests/main.nf.test.snap
+++ /dev/null
@@ -1,10 +0,0 @@
-{
- "versions": {
- "content": [
- [
- "versions.yml:md5,2ac93e1e6324236af6f9a794bbac2099"
- ]
- ],
- "timestamp": "2023-12-05T12:15:32.969684"
- }
-}
\ No newline at end of file
diff --git a/modules/pfr/lai/tests/nextflow.config b/modules/pfr/lai/tests/nextflow.config
deleted file mode 100644
index 516a3e2..0000000
--- a/modules/pfr/lai/tests/nextflow.config
+++ /dev/null
@@ -1,10 +0,0 @@
-process {
-
- withName: GT_SUFFIXERATOR {
- ext.args = '-tis -suf -lcp -des -ssp -sds -dna'
- }
-
- withName: GT_LTRHARVEST {
- ext.args = '-minlenltr 100 -maxlenltr 7000 -mintsd 4 -maxtsd 6 -motif TGCA -motifmis 1 -similar 85 -vic 10 -seed 20 -seqids yes'
- }
-}
diff --git a/modules/pfr/lai/tests/tags.yml b/modules/pfr/lai/tests/tags.yml
deleted file mode 100644
index 252295d..0000000
--- a/modules/pfr/lai/tests/tags.yml
+++ /dev/null
@@ -1,2 +0,0 @@
-lai:
- - "modules/pfr/lai/**"
diff --git a/modules/pfr/liftoff/tests/main.nf.test.snap b/modules/pfr/liftoff/tests/main.nf.test.snap
deleted file mode 100644
index baa4d70..0000000
--- a/modules/pfr/liftoff/tests/main.nf.test.snap
+++ /dev/null
@@ -1,34 +0,0 @@
-{
- "unmapped_txt": {
- "content": [
- [
- [
- {
- "id": "test"
- },
- "test.unmapped.txt:md5,7391d10df6e15db356b084c9af5259e4"
- ]
- ]
- ],
- "timestamp": "2023-12-01T13:57:40.748507"
- },
- "versions": {
- "content": [
- [
- "versions.yml:md5,205d9c609e7fe27d8199550d842bdce8"
- ]
- ],
- "timestamp": "2023-12-01T13:57:40.752414"
- },
- "homo_sapiens-genome_21_fasta-genome_1_fasta-genome_1_gtf-for_stub_match": {
- "content": [
- [
- "test.gff3",
- "test.polished.gff3",
- "test.unmapped.txt",
- "{id=test}"
- ]
- ],
- "timestamp": "2023-12-21T15:20:04.816416"
- }
-}
\ No newline at end of file
diff --git a/modules/pfr/liftoff/tests/nextflow.config b/modules/pfr/liftoff/tests/nextflow.config
deleted file mode 100644
index 06b9d76..0000000
--- a/modules/pfr/liftoff/tests/nextflow.config
+++ /dev/null
@@ -1,5 +0,0 @@
-process {
- withName: LIFTOFF {
- ext.args = '-exclude_partial -copies -polish -a 0.1 -s 0.1'
- }
-}
diff --git a/modules/pfr/liftoff/tests/tags.yml b/modules/pfr/liftoff/tests/tags.yml
deleted file mode 100644
index 4ae1fb0..0000000
--- a/modules/pfr/liftoff/tests/tags.yml
+++ /dev/null
@@ -1,2 +0,0 @@
-liftoff:
- - "modules/pfr/liftoff/**"
diff --git a/modules/pfr/ltrretriever/lai/environment.yml b/modules/pfr/ltrretriever/lai/environment.yml
new file mode 100644
index 0000000..e0e4968
--- /dev/null
+++ b/modules/pfr/ltrretriever/lai/environment.yml
@@ -0,0 +1,9 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
+name: "ltrretriever_lai"
+channels:
+ - conda-forge
+ - bioconda
+ - defaults
+dependencies:
+ - "bioconda::LTR_retriever=2.9.9"
diff --git a/modules/pfr/lai/main.nf b/modules/pfr/ltrretriever/lai/main.nf
similarity index 54%
rename from modules/pfr/lai/main.nf
rename to modules/pfr/ltrretriever/lai/main.nf
index d4fced9..464b215 100644
--- a/modules/pfr/lai/main.nf
+++ b/modules/pfr/ltrretriever/lai/main.nf
@@ -1,11 +1,11 @@
-process LAI {
+process LTRRETRIEVER_LAI {
tag "$meta.id"
- label 'process_high'
+ label 'process_medium'
conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
- 'https://depot.galaxyproject.org/singularity/ltr_retriever:2.9.0--hdfd78af_2':
- 'biocontainers/ltr_retriever:2.9.0--hdfd78af_2' }"
+ 'https://depot.galaxyproject.org/singularity/ltr_retriever:2.9.9--hdfd78af_0':
+ 'biocontainers/ltr_retriever:2.9.9--hdfd78af_0' }"
input:
tuple val(meta), path(fasta)
@@ -26,44 +26,46 @@ process LAI {
def prefix = task.ext.prefix ?: "${meta.id}"
def monoploid_param = monoploid_seqs ? "-mono $monoploid_seqs" : ''
def lai_output_name = monoploid_seqs ? "${annotation_out}.${monoploid_seqs}.out.LAI" : "${annotation_out}.LAI"
+ def VERSION = 'beta3.2' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions.
"""
- # Remove comments from genome fasta,
- # otherwise LAI triggers its sequence name change logic
-
- sed \\
- '/^>/ s/\\s.*\$//' \\
- $fasta \\
- > for_lai_no_comments.fsa
-
LAI \\
- -genome for_lai_no_comments.fsa \\
+ -genome $fasta \\
-intact $pass_list \\
-all $annotation_out \\
-t $task.cpus \\
$monoploid_param \\
$args \\
- > "${prefix}.LAI.log"
+ > >(tee "${prefix}.LAI.log") \\
+ || echo "LAI failed! See ${prefix}.LAI.log"
mv \\
$lai_output_name \\
"${prefix}.LAI.out" \\
- || echo "LAI did not produce the output file"
+ || echo "LAI failed to estimate assembly index. See ${prefix}.LAI.log"
cat <<-END_VERSIONS > versions.yml
"${task.process}":
- lai: \$(cat /usr/local/share/LTR_retriever/LAI | grep "my \\\$version" | sed 's/my \$version="//; s/";//')
+ lai: $VERSION
END_VERSIONS
"""
stub:
- def args = task.ext.args ?: ''
- def prefix = task.ext.prefix ?: "${meta.id}"
+ def args = task.ext.args ?: ''
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ def monoploid_param = monoploid_seqs ? "-mono $monoploid_seqs" : ''
+ def lai_output_name = monoploid_seqs ? "${annotation_out}.${monoploid_seqs}.out.LAI" : "${annotation_out}.LAI"
+ def VERSION = 'beta3.2' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions.
"""
- touch ${prefix}.LAI.log
+ touch "${prefix}.LAI.log"
+ touch "$lai_output_name"
+
+ mv \\
+ $lai_output_name \\
+ "${prefix}.LAI.out"
cat <<-END_VERSIONS > versions.yml
"${task.process}":
- lai: \$(cat /usr/local/share/LTR_retriever/LAI | grep "my \\\$version" | sed 's/my \$version="//; s/";//')
+ lai: $VERSION
END_VERSIONS
"""
}
diff --git a/modules/pfr/lai/meta.yml b/modules/pfr/ltrretriever/lai/meta.yml
similarity index 91%
rename from modules/pfr/lai/meta.yml
rename to modules/pfr/ltrretriever/lai/meta.yml
index 6fd7aef..f84cf6c 100644
--- a/modules/pfr/lai/meta.yml
+++ b/modules/pfr/ltrretriever/lai/meta.yml
@@ -1,7 +1,9 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
-name: "lai"
-description: Estimates the mean LTR sequence identity in the genome
+name: "ltrretriever_lai"
+description: |
+ Estimates the mean LTR sequence identity in the genome. The input genome fasta should
+ have short alphanumeric IDs without comments
keywords:
- genomics
- annotation
diff --git a/modules/pfr/ltrretriever/lai/tests/main.nf.test b/modules/pfr/ltrretriever/lai/tests/main.nf.test
new file mode 100644
index 0000000..a617811
--- /dev/null
+++ b/modules/pfr/ltrretriever/lai/tests/main.nf.test
@@ -0,0 +1,166 @@
+nextflow_process {
+
+ name "Test Process LTRRETRIEVER_LAI"
+ script "../main.nf"
+ process "LTRRETRIEVER_LAI"
+ config "./nextflow.config"
+
+ tag "modules"
+ tag "modules_nfcore"
+ tag "gunzip"
+ tag "ltrretriever"
+ tag "ltrretriever/ltrretriever"
+ tag "ltrretriever/lai"
+ tag "ltrharvest"
+ tag "ltrfinder"
+ tag "cat/cat"
+
+ test("actinidia_chinensis-genome_21_fasta_gz-success") {
+
+ setup {
+
+ run("GUNZIP") {
+ script "../../../gunzip"
+
+ process {
+ """
+ input[0] = [
+ [ id:'test' ],
+ file(params.test_data['actinidia_chinensis']['genome']['genome_21_fasta_gz'], checkIfExists: true)
+ ]
+ """
+ }
+ }
+
+ run("LTRHARVEST") {
+ script "../../../ltrharvest"
+
+ process {
+ """
+ input[0] = GUNZIP.out.gunzip
+ """
+ }
+ }
+
+ run("LTRFINDER") {
+ script "../../../ltrfinder"
+
+ process {
+ """
+ input[0] = GUNZIP.out.gunzip
+ """
+ }
+ }
+
+ run("CAT_CAT") {
+ script "../../../cat/cat"
+
+ process {
+ """
+ input[0] = LTRHARVEST.out.scn.mix(LTRFINDER.out.scn).groupTuple()
+ """
+ }
+ }
+
+ run("LTRRETRIEVER_LTRRETRIEVER") {
+ script "../../ltrretriever"
+
+ process {
+ """
+ input[0] = GUNZIP.out.gunzip
+ input[1] = CAT_CAT.out.file_out.map { meta, tabout -> tabout }
+ input[2] = []
+ input[3] = []
+ input[4] = []
+ """
+ }
+ }
+ }
+
+ when {
+ process {
+ """
+ input[0] = GUNZIP.out.gunzip
+ input[1] = LTRRETRIEVER_LTRRETRIEVER.out.pass_list.map { meta, pass_list -> pass_list }
+ input[2] = LTRRETRIEVER_LTRRETRIEVER.out.annotation_out.map { meta, annotation_out -> annotation_out }
+ input[3] = []
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert path(process.out.log[0][1]).text.contains("Dependency checking: Passed!") },
+ { assert path(process.out.log[0][1]).text.contains("Calculate LAI:") },
+ { assert path(process.out.log[0][1]).text.contains("Done!") },
+ { assert path(process.out.log[0][1]).text.contains("Result file:") },
+ { assert Math.abs(Float.parseFloat(path(process.out.lai_out[0][1]).text.split("\n")[1].split("\t")[6]) - 31.29) <= 1.0 }
+ )
+ }
+
+ }
+
+ test("stub") {
+
+ options '-stub'
+
+ when {
+ process {
+ """
+ def pass_list = new File('test.pass.list')
+ def out_file = new File('test.out')
+ def monoploid_seqs = new File('some_seqs.list.txt')
+
+ input[0] = [
+ [ id:'test' ],
+ file(params.test_data['actinidia_chinensis']['genome']['genome_21_fasta_gz'], checkIfExists: true)
+ ]
+ input[1] = pass_list.toPath()
+ input[2] = out_file.toPath()
+ input[3] = []
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+
+ }
+
+ test("stub_with_monoploid_seqs") {
+
+ options '-stub'
+
+ when {
+ process {
+ """
+ def pass_list = new File('test.pass.list')
+ def out_file = new File('test.out')
+ def monoploid_seqs = new File('some_seqs.list.txt')
+
+ input[0] = [
+ [ id:'test' ],
+ file(params.test_data['actinidia_chinensis']['genome']['genome_21_fasta_gz'], checkIfExists: true)
+ ]
+ input[1] = pass_list.toPath()
+ input[2] = out_file.toPath()
+ input[3] = monoploid_seqs.toPath()
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+
+ }
+
+}
\ No newline at end of file
diff --git a/modules/pfr/ltrretriever/lai/tests/main.nf.test.snap b/modules/pfr/ltrretriever/lai/tests/main.nf.test.snap
new file mode 100644
index 0000000..e1c8086
--- /dev/null
+++ b/modules/pfr/ltrretriever/lai/tests/main.nf.test.snap
@@ -0,0 +1,100 @@
+{
+ "stub": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test"
+ },
+ "test.LAI.log:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "1": [
+ [
+ {
+ "id": "test"
+ },
+ "test.LAI.out:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "2": [
+ "versions.yml:md5,e04e27f9408e771795cd44d96518b7cd"
+ ],
+ "lai_out": [
+ [
+ {
+ "id": "test"
+ },
+ "test.LAI.out:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "log": [
+ [
+ {
+ "id": "test"
+ },
+ "test.LAI.log:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,e04e27f9408e771795cd44d96518b7cd"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-02-22T20:09:00.558021"
+ },
+ "stub_with_monoploid_seqs": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test"
+ },
+ "test.LAI.log:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "1": [
+ [
+ {
+ "id": "test"
+ },
+ "test.LAI.out:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "2": [
+ "versions.yml:md5,e04e27f9408e771795cd44d96518b7cd"
+ ],
+ "lai_out": [
+ [
+ {
+ "id": "test"
+ },
+ "test.LAI.out:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "log": [
+ [
+ {
+ "id": "test"
+ },
+ "test.LAI.log:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,e04e27f9408e771795cd44d96518b7cd"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-02-22T20:10:08.213842"
+ }
+}
\ No newline at end of file
diff --git a/modules/pfr/ltrretriever/lai/tests/nextflow.config b/modules/pfr/ltrretriever/lai/tests/nextflow.config
new file mode 100644
index 0000000..75edf1a
--- /dev/null
+++ b/modules/pfr/ltrretriever/lai/tests/nextflow.config
@@ -0,0 +1,15 @@
+process {
+
+ withName: LTRHARVEST {
+ ext.prefix = { "${meta.id}_ltrharvest" }
+ }
+
+ withName: LTRFINDER {
+ ext.args = '-harvest_out -size 1000000 -time 300'
+ // recommended parameters: https://github.com/oushujun/LTR_retriever#usage
+ }
+
+ withName: CAT_CAT {
+ ext.prefix = { "${meta.id}_ltrharvest_ltrfinder.tabout" }
+ }
+}
diff --git a/modules/pfr/ltrretriever/lai/tests/tags.yml b/modules/pfr/ltrretriever/lai/tests/tags.yml
new file mode 100644
index 0000000..470f468
--- /dev/null
+++ b/modules/pfr/ltrretriever/lai/tests/tags.yml
@@ -0,0 +1,2 @@
+ltrretriever/lai:
+ - "modules/nf-core/ltrretriever/lai/**"
diff --git a/modules/pfr/repeatmodeler/builddatabase/main.nf b/modules/pfr/repeatmodeler/builddatabase/main.nf
index 486e25d..6fe244b 100644
--- a/modules/pfr/repeatmodeler/builddatabase/main.nf
+++ b/modules/pfr/repeatmodeler/builddatabase/main.nf
@@ -26,7 +26,7 @@ process REPEATMODELER_BUILDDATABASE {
cat <<-END_VERSIONS > versions.yml
"${task.process}":
- repeatmodeler: \$(RepeatModeler | grep '/usr/local/bin/RepeatModeler - ' | sed 's|/usr/local/bin/RepeatModeler - ||')
+ repeatmodeler: \$(RepeatModeler --version | sed 's/RepeatModeler version //')
END_VERSIONS
"""
@@ -44,7 +44,7 @@ process REPEATMODELER_BUILDDATABASE {
cat <<-END_VERSIONS > versions.yml
"${task.process}":
- repeatmodeler: \$(RepeatModeler | grep '/usr/local/bin/RepeatModeler - ' | sed 's|/usr/local/bin/RepeatModeler - ||')
+ repeatmodeler: \$(RepeatModeler --version | sed 's/RepeatModeler version //')
END_VERSIONS
"""
}
diff --git a/modules/pfr/repeatmodeler/builddatabase/tests/main.nf.test b/modules/pfr/repeatmodeler/builddatabase/tests/main.nf.test
index 616f88c..fdeda4a 100644
--- a/modules/pfr/repeatmodeler/builddatabase/tests/main.nf.test
+++ b/modules/pfr/repeatmodeler/builddatabase/tests/main.nf.test
@@ -57,4 +57,4 @@ nextflow_process {
}
-}
+}
\ No newline at end of file
diff --git a/modules/pfr/repeatmodeler/repeatmodeler/main.nf b/modules/pfr/repeatmodeler/repeatmodeler/main.nf
index 34df322..c7df9ca 100644
--- a/modules/pfr/repeatmodeler/repeatmodeler/main.nf
+++ b/modules/pfr/repeatmodeler/repeatmodeler/main.nf
@@ -35,7 +35,7 @@ process REPEATMODELER_REPEATMODELER {
cat <<-END_VERSIONS > versions.yml
"${task.process}":
- repeatmodeler: \$(RepeatModeler | grep '/usr/local/bin/RepeatModeler - ' | sed 's|/usr/local/bin/RepeatModeler - ||')
+ repeatmodeler: \$(RepeatModeler --version | sed 's/RepeatModeler version //')
END_VERSIONS
"""
@@ -48,7 +48,7 @@ process REPEATMODELER_REPEATMODELER {
cat <<-END_VERSIONS > versions.yml
"${task.process}":
- repeatmodeler: \$(RepeatModeler | grep '/usr/local/bin/RepeatModeler - ' | sed 's|/usr/local/bin/RepeatModeler - ||')
+ repeatmodeler: \$(RepeatModeler --version | sed 's/RepeatModeler version //')
END_VERSIONS
"""
}
diff --git a/modules/pfr/repeatmodeler/repeatmodeler/tests/main.nf.test b/modules/pfr/repeatmodeler/repeatmodeler/tests/main.nf.test
index 78b7957..dd7185f 100644
--- a/modules/pfr/repeatmodeler/repeatmodeler/tests/main.nf.test
+++ b/modules/pfr/repeatmodeler/repeatmodeler/tests/main.nf.test
@@ -89,4 +89,4 @@ nextflow_process {
}
-}
+}
\ No newline at end of file
diff --git a/modules/pfr/lai/environment.yml b/modules/pfr/tsebra/environment.yml
similarity index 79%
rename from modules/pfr/lai/environment.yml
rename to modules/pfr/tsebra/environment.yml
index 94fadbd..3505512 100644
--- a/modules/pfr/lai/environment.yml
+++ b/modules/pfr/tsebra/environment.yml
@@ -1,9 +1,9 @@
---
# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/environment-schema.json
-name: "lai"
+name: "tsebra"
channels:
- conda-forge
- bioconda
- defaults
dependencies:
- - "bioconda::LTR_retriever=2.9.0"
+ - "bioconda::tsebra=1.1.2.4"
diff --git a/modules/pfr/tsebra/main.nf b/modules/pfr/tsebra/main.nf
new file mode 100644
index 0000000..c92ade7
--- /dev/null
+++ b/modules/pfr/tsebra/main.nf
@@ -0,0 +1,61 @@
+process TSEBRA {
+ tag "$meta.id"
+ label 'process_single'
+
+ conda "${moduleDir}/environment.yml"
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'https://depot.galaxyproject.org/singularity/tsebra:1.1.2.4--pyhca03a8a_0':
+ 'biocontainers/tsebra:1.1.2.4--pyhca03a8a_0' }"
+
+ input:
+ tuple val(meta), path(gtfs)
+ path hints_files
+ path keep_gtfs
+ path config
+
+ output:
+ tuple val(meta), path("*.gtf"), emit: tsebra_gtf
+ tuple val(meta), path("*.tsv"), emit: tsebra_scores
+ path "versions.yml" , emit: versions
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ script:
+ def args = task.ext.args ?: ''
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ def gtf_arg = '-g ' + gtfs.collect { "$it" }.join(',')
+ def hints_arg = '-e ' + hints_files.collect { "$it" }.join(',')
+ def keep_arg = keep_gtfs ? ( '-k ' + keep_gtfs.collect { "$it" }.join(',') ) : ''
+ def config_arg = config ? "-c $config" : ''
+ def VERSION = '1.1.2.4'
+ """
+ tsebra.py \\
+ $gtf_arg \\
+ $hints_arg \\
+ $keep_arg \\
+ $config_arg \\
+ $args \\
+ -o ${prefix}.gtf \\
+ -s ${prefix}.tsv
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ tsebra: $VERSION
+ END_VERSIONS
+ """
+
+ stub:
+ def args = task.ext.args ?: ''
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ def VERSION = '1.1.2.4'
+ """
+ touch ${prefix}.gtf
+ touch ${prefix}.tsv
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ tsebra: $VERSION
+ END_VERSIONS
+ """
+}
diff --git a/modules/pfr/tsebra/meta.yml b/modules/pfr/tsebra/meta.yml
new file mode 100644
index 0000000..18660d4
--- /dev/null
+++ b/modules/pfr/tsebra/meta.yml
@@ -0,0 +1,66 @@
+---
+# yaml-language-server: $schema=https://raw.githubusercontent.com/nf-core/modules/master/modules/meta-schema.json
+name: "tsebra"
+description: Transcript Selector for BRAKER TSEBRA combines gene predictions by selecing transcripts based on their extrisic evidence support
+keywords:
+ - genomics
+ - transcript
+ - selector
+ - gene
+ - prediction
+ - evidence
+tools:
+ - "tsebra":
+ description: TSEBRA is a combiner tool that selects transcripts from gene predictions based on the support by extrisic evidence in form of introns and start/stop codons
+ homepage: "https://github.com/Gaius-Augustus/TSEBRA"
+ documentation: "https://github.com/Gaius-Augustus/TSEBRA"
+ tool_dev_url: "https://github.com/Gaius-Augustus/TSEBRA"
+ doi: "10.1186/s12859-021-04482-0"
+ licence: ["Artistic-2.0"]
+
+input:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. `[ id:'sample1' ]`
+ - gtfs:
+ type: list
+ description: List of gene prediction files in gtf
+ pattern: "*.gtf"
+ - hints_files:
+ type: list
+ description: List of files containing extrinsic evidence in gff
+ pattern: "*.gff"
+ - keep_gtfs:
+ type: list
+ description: |
+ List of gene prediction files in gtf. These gene sets are used the same way as other inputs, but TSEBRA ensures that all
+ transcripts from these gene sets are included in the output
+ pattern: "*.gtf"
+ - config:
+ type: file
+ description: Configuration file that sets the parameter for TSEBRA
+ pattern: "*.cfg"
+output:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. `[ id:'sample1' ]`
+ - versions:
+ type: file
+ description: File containing software versions
+ pattern: "versions.yml"
+ - tsebra_gtf:
+ type: file
+ description: Output file for the combined gene predictions in gtf
+ pattern: "*.gtf"
+ - tsebra_scores:
+ type: file
+ description: Transcript scores as a table
+ pattern: "*.tsv"
+authors:
+ - "@GallVp"
+maintainers:
+ - "@GallVp"
diff --git a/modules/pfr/tsebra/tests/main.nf.test b/modules/pfr/tsebra/tests/main.nf.test
new file mode 100644
index 0000000..ddf7a6c
--- /dev/null
+++ b/modules/pfr/tsebra/tests/main.nf.test
@@ -0,0 +1,91 @@
+nextflow_process {
+
+ name "Test Process TSEBRA"
+ script "../main.nf"
+ process "TSEBRA"
+
+ tag "modules"
+ tag "modules_nfcore"
+ tag "tsebra"
+ tag "nf-core/gunzip/main"
+
+ test("actinidia_chinensis-genome") {
+
+ setup {
+ run('GUNZIP', alias: 'GUNZIP_GTF') {
+ script "../../../nf-core/gunzip/main"
+
+ process {
+ """
+ input[0] = [
+ [ id:'test' ], // meta map
+ file(params.test_data['actinidia_chinensis']['genome']['genome_1_gtf_gz'], checkIfExists: true)
+ ]
+ """
+ }
+ }
+
+ run('GUNZIP', alias: 'GUNZIP_HINTS') {
+ script "../../../nf-core/gunzip/main"
+
+ process {
+ """
+ input[0] = [
+ [ id:'test' ], // meta map
+ file(params.test_data['actinidia_chinensis']['genome']['genome_1_hints_gff_gz'], checkIfExists: true)
+ ]
+ """
+ }
+ }
+ }
+
+ when {
+ process {
+ """
+ input[0] = GUNZIP_GTF.out.gunzip.map { meta, gtf -> [ meta, [ gtf ] ] }
+ input[1] = GUNZIP_HINTS.out.gunzip.map { meta, gff -> [ gff ] }
+ input[2] = []
+ input[3] = []
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+
+ }
+
+ test("actinidia_chinensis-genome-stub") {
+
+ options "-stub"
+
+ when {
+ process {
+ """
+ input[0] = [
+ [ id:'test' ], // meta map
+ [ file(params.test_data['actinidia_chinensis']['genome']['genome_1_gtf_gz'], checkIfExists: true) ]
+ ]
+ input[1] = [
+ file(params.test_data['actinidia_chinensis']['genome']['genome_1_hints_gff_gz'], checkIfExists: true)
+ ]
+ input[2] = []
+ input[3] = []
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert process.success },
+ { assert snapshot(process.out).match() }
+ )
+ }
+
+ }
+
+}
diff --git a/modules/pfr/tsebra/tests/main.nf.test.snap b/modules/pfr/tsebra/tests/main.nf.test.snap
new file mode 100644
index 0000000..4d9e15f
--- /dev/null
+++ b/modules/pfr/tsebra/tests/main.nf.test.snap
@@ -0,0 +1,100 @@
+{
+ "actinidia_chinensis-genome-stub": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test"
+ },
+ "test.gtf:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "1": [
+ [
+ {
+ "id": "test"
+ },
+ "test.tsv:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "2": [
+ "versions.yml:md5,6d60045f4f9b66baa508c174ae6a6408"
+ ],
+ "tsebra_gtf": [
+ [
+ {
+ "id": "test"
+ },
+ "test.gtf:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "tsebra_scores": [
+ [
+ {
+ "id": "test"
+ },
+ "test.tsv:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,6d60045f4f9b66baa508c174ae6a6408"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-04-11T12:20:49.618044"
+ },
+ "actinidia_chinensis-genome": {
+ "content": [
+ {
+ "0": [
+ [
+ {
+ "id": "test"
+ },
+ "test.gtf:md5,7c781c919e6aa20561f72dea09474f74"
+ ]
+ ],
+ "1": [
+ [
+ {
+ "id": "test"
+ },
+ "test.tsv:md5,ad7bd858c1838b40286609a311f3a891"
+ ]
+ ],
+ "2": [
+ "versions.yml:md5,6d60045f4f9b66baa508c174ae6a6408"
+ ],
+ "tsebra_gtf": [
+ [
+ {
+ "id": "test"
+ },
+ "test.gtf:md5,7c781c919e6aa20561f72dea09474f74"
+ ]
+ ],
+ "tsebra_scores": [
+ [
+ {
+ "id": "test"
+ },
+ "test.tsv:md5,ad7bd858c1838b40286609a311f3a891"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,6d60045f4f9b66baa508c174ae6a6408"
+ ]
+ }
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-04-11T12:20:45.666076"
+ }
+}
\ No newline at end of file
diff --git a/modules/pfr/tsebra/tests/tags.yml b/modules/pfr/tsebra/tests/tags.yml
new file mode 100644
index 0000000..7594182
--- /dev/null
+++ b/modules/pfr/tsebra/tests/tags.yml
@@ -0,0 +1,2 @@
+tsebra:
+ - "modules/pfr/tsebra/**"
diff --git a/nextflow.config b/nextflow.config
index 409da80..a9ae34a 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -1,87 +1,83 @@
includeConfig './conf/base.config'
params {
- target_assemblies = [
- ["red5_v2p1", "/workspace/pangene/test_data/red5_v2p1_chr1.fasta"],
- ["donghong", "/workspace/pangene/test_data/donghong.chr1.fsa.gz"]
- ]
- // Pattern: [ [tag, fasta(.gz) ] ]
- // Permissible tags: tag, tag_1, tag_tag2_3, tag_tag2_tag3;
- // Any name with alphanumeric characters including "_".
- // "." is not allowed in the tag name
-
- te_libraries = [
- ["donghong", "/workspace/pangene/test_data/donghong.TElib.fa.gz"]
- ]
- // Pattern: [ [tag, fasta(.gz) ] ]
- // Optional Set to null if libraries are not available.
- //
- // Each TE library should have an associated (by tag) assembly in target_assemblies.
- // Not all target_assemblies need to have an associated (by tag) TE library.
- // When the TE lib is not available for a traget assembly, EDTA is used to create one.
+ // Input/output options
+ input = null
+ external_protein_fastas = null
+ eggnogmapper_db_dir = null
+ eggnogmapper_tax_scope = null
+ fastq = null
+ liftoff_annotations = null
+ outdir = "./results"
+ // Repeat annotation options
repeat_annotator = 'repeatmodeler'
- // 'repeatmodeler' or 'edta'
-
- save_annotated_te_lib = true
-
+ save_annotated_te_lib = false
edta_is_sensitive = false
+ repeatmasker_save_outputs = false
- repeatmasker_save_outputs = true
-
- samplesheet = "/workspace/pangene/test_data/samplesheet.csv"
- // Optional: Set to null if not available
-
+ // RNASeq pre-processing options
skip_fastqc = false
skip_fastp = false
min_trimmed_reads = 10000
extra_fastp_args = ""
-
- save_trimmed = true
- // toggling this parameter results in rerun of FASTP and FASTQC_TRIM
-
+ save_trimmed = false
remove_ribo_rna = false
- save_non_ribo_reads = true
+ save_non_ribo_reads = false
ribo_database_manifest = "${projectDir}/assets/rrna-db-defaults.txt"
+ // RNAseq alignment options
star_max_intron_length = 16000
star_align_extra_args = ""
- star_save_outputs = true
- save_cat_bam = true
- // A single BAM is created for each assembly from all the RNAseq samples, if there
- // are more than one
-
- external_protein_fastas = [
- "/workspace/ComparativeDataSources/OrthoDB11/Viridiplantae.fa.gz",
- "/output/genomic/fairGenomes/Plant/Actinidia/chinensis/var_chinensis/male/2x/assembly_russell/v2.1/RU01.20221115150135.pep.fasta"
- ]
- // Optional: Set to null if not available
+ star_save_outputs = false
+ save_cat_bam = false
+ // Annotation options
braker_extra_args = ""
-
- liftoff_xref_annotations = [
- [
- "/output/genomic/fairGenomes/Plant/Actinidia/chinensis/var_chinensis/male/2x/assembly_russell/v2.1/Russell_V2a.chromosomes.and.unassiged.and.haplotigs.fsa",
- "/output/genomic/fairGenomes/Plant/Actinidia/chinensis/var_chinensis/male/2x/assembly_russell/v2.1/RU01.20221115150135.gff3"
- ],
- [
- "/output/genomic/fairGenomes/Plant/Arabidopsis/thaliana/var_na/sex_na/2x/assembly_tair/v10/TAIR10_chr_all.fas",
- "/output/genomic/fairGenomes/Plant/Arabidopsis/thaliana/var_na/sex_na/2x/assembly_tair/v10/TAIR10_GFF3_genes_transposons.fixed.gff3"
- ]
- ]
- // Format: [ [ fasta(.gz), gff3(.gz) ] ]
- // Optional: Set to null if not available
-
+ braker_allow_isoforms = true
liftoff_coverage = 0.9
liftoff_identity = 0.9
+ eggnogmapper_evalue = 0.00001
+ eggnogmapper_pident = 35
+ eggnogmapper_purge_nohits = false
- outdir = "./results"
-
+ // Max job request options
max_cpus = 12
- max_memory = 200.GB
- max_time = 1.days
+ max_memory = '200.GB'
+ max_time = '7.day'
+
+ // Infrastructure options
+ validationSkipDuplicateCheck= true
+ validationS3PathCheck = true
+}
+
+manifest {
+ name = 'pangene'
+ author = """Usman Rashid, Jason Shiller"""
+ homePage = 'https://github.com/PlantandFoodResearch/pan-gene'
+ description = """A NextFlow pipeline for pan-genome annotation"""
+ mainScript = 'main.nf'
+ nextflowVersion = '!>=23.04.4'
+ version = '0.3.0'
+ doi = ''
+}
+
+def trace_timestamp = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss')
+timeline {
+ enabled = true
+ file = "${params.outdir}/pipeline_info/execution_timeline_${trace_timestamp}.html"
+}
+report {
+ enabled = true
+ file = "${params.outdir}/pipeline_info/execution_report_${trace_timestamp}.html"
+}
+trace {
+ enabled = true
+ file = "${params.outdir}/pipeline_info/execution_trace_${trace_timestamp}.txt"
+}
+
+plugins {
+ id 'nf-validation@1.1.3'
}
-includeConfig './conf/manifest.config'
includeConfig './conf/modules.config'
-includeConfig './conf/reporting_defaults.config'
diff --git a/nextflow_schema.json b/nextflow_schema.json
new file mode 100644
index 0000000..988f87a
--- /dev/null
+++ b/nextflow_schema.json
@@ -0,0 +1,293 @@
+{
+ "$schema": "http://json-schema.org/draft-07/schema",
+ "$id": "https://raw.githubusercontent.com/plantandfoodresearch/pangene/master/nextflow_schema.json",
+ "title": "plantandfoodresearch/pangene pipeline parameters",
+ "description": "A NextFlow pipeline for pan-genome annotation",
+ "type": "object",
+ "definitions": {
+ "input_output_options": {
+ "title": "Input/output options",
+ "type": "object",
+ "fa_icon": "fas fa-terminal",
+ "description": "",
+ "required": ["input", "outdir", "external_protein_fastas", "eggnogmapper_db_dir", "eggnogmapper_tax_scope"],
+ "properties": {
+ "input": {
+ "type": "string",
+ "format": "file-path",
+ "mimetype": "text/csv",
+ "schema": "assets/schema_input.json",
+ "pattern": "^\\S+\\.csv$",
+ "description": "Target assemblies listed in a CSV sheet",
+ "fa_icon": "fas fa-file-csv",
+ "help_text": "FASTA and other associated files for target assemblies provided as a CSV sheet"
+ },
+ "external_protein_fastas": {
+ "type": "string",
+ "description": "External protein fastas listed in a text sheet",
+ "help_text": "A text file listing FASTA files to provide protein evidence for annotation",
+ "format": "file-path",
+ "mimetype": "text/txt",
+ "fa_icon": "far fa-file-alt"
+ },
+ "eggnogmapper_db_dir": {
+ "type": "string",
+ "description": "Eggnogmapper database directory",
+ "format": "directory-path"
+ },
+ "eggnogmapper_tax_scope": {
+ "type": "integer",
+ "description": "Eggnogmapper taxonomy scopre",
+ "minimum": 0
+ },
+ "fastq": {
+ "type": "string",
+ "format": "file-path",
+ "mimetype": "text/csv",
+ "schema": "assets/schema_fastq.json",
+ "pattern": "^\\S+\\.csv$",
+ "help_text": "FASTQ files for RNASeq samples corresponding to each target assembly provided in a CSV sheet",
+ "fa_icon": "fas fa-file-csv",
+ "description": "FASTQ samples listed in a CSV sheet"
+ },
+ "liftoff_annotations": {
+ "type": "string",
+ "format": "file-path",
+ "mimetype": "text/csv",
+ "schema": "assets/schema_liftoff.json",
+ "pattern": "^\\S+\\.csv$",
+ "description": "Reference annotations listed in a CSV sheet",
+ "help_text": "FASTA and GFF3 files for reference annotations for liftoff listed in a CSV sheet",
+ "fa_icon": "fas fa-file-csv"
+ },
+ "outdir": {
+ "type": "string",
+ "format": "directory-path",
+ "description": "The output directory where the results will be saved",
+ "fa_icon": "fas fa-folder-open",
+ "default": "./results",
+ "help_text": " Use absolute paths to storage on Cloud infrastructure"
+ }
+ }
+ },
+ "repeat_annotation_options": {
+ "title": "Repeat annotation options",
+ "type": "object",
+ "description": "",
+ "default": "",
+ "properties": {
+ "repeat_annotator": {
+ "type": "string",
+ "default": "repeatmodeler",
+ "enum": ["edta", "repeatmodeler"],
+ "description": "'edta' or 'repeatmodeler'"
+ },
+ "save_annotated_te_lib": {
+ "type": "boolean",
+ "description": "Save annotated TE library or not?"
+ },
+ "edta_is_sensitive": {
+ "type": "boolean",
+ "description": "Use '--sensitive 1' flag with EDTA or not?"
+ },
+ "repeatmasker_save_outputs": {
+ "type": "boolean",
+ "description": "Save the repeat-masked genome or not?"
+ }
+ }
+ },
+ "rnaseq_pre_processing_options": {
+ "title": "RNASeq pre-processing options",
+ "type": "object",
+ "description": "",
+ "default": "",
+ "properties": {
+ "skip_fastqc": {
+ "type": "boolean",
+ "description": "Skip FASTQC or not?"
+ },
+ "skip_fastp": {
+ "type": "boolean",
+ "description": "Skip trimming by FASTQP or not?"
+ },
+ "min_trimmed_reads": {
+ "type": "integer",
+ "default": 10000,
+ "description": "Exclude a sample if its reads after trimming are below this number",
+ "minimum": 0
+ },
+ "extra_fastp_args": {
+ "type": "string",
+ "description": "Extra FASTP arguments"
+ },
+ "save_trimmed": {
+ "type": "boolean",
+ "description": "Save FASTQ files after trimming or not?"
+ },
+ "remove_ribo_rna": {
+ "type": "boolean",
+ "description": "Remove Ribosomal RNA or not?"
+ },
+ "save_non_ribo_reads": {
+ "type": "boolean",
+ "description": "Save FASTQ files after Ribosomal RNA removal or not?"
+ },
+ "ribo_database_manifest": {
+ "type": "string",
+ "default": "${projectDir}/assets/rrna-db-defaults.txt",
+ "description": "Ribosomal RNA fastas listed in a text sheet",
+ "format": "file-path",
+ "mimetype": "text/txt"
+ }
+ }
+ },
+ "rnaseq_alignment_options": {
+ "title": "RNAseq alignment options",
+ "type": "object",
+ "description": "",
+ "default": "",
+ "properties": {
+ "star_max_intron_length": {
+ "type": "integer",
+ "default": 16000,
+ "minimum": 0,
+ "description": "Maximum intron length for STAR alignment"
+ },
+ "star_align_extra_args": {
+ "type": "string",
+ "description": "EXTRA arguments for STAR"
+ },
+ "star_save_outputs": {
+ "type": "boolean",
+ "description": "Save BAM files from STAR or not?"
+ },
+ "save_cat_bam": {
+ "type": "boolean",
+ "description": "SAVE a concatenated BAM file per assembly or not?"
+ }
+ }
+ },
+ "annotation_options": {
+ "title": "Annotation options",
+ "type": "object",
+ "description": "",
+ "default": "",
+ "properties": {
+ "braker_extra_args": {
+ "type": "string",
+ "description": "Extra arguments for BRAKER"
+ },
+ "braker_allow_isoforms": {
+ "type": "boolean",
+ "default": true,
+ "description": "Allow multiple isoforms for gene models"
+ },
+ "liftoff_coverage": {
+ "type": "number",
+ "default": 0.9,
+ "minimum": 0,
+ "maximum": 1,
+ "description": "Liftoff coverage parameter"
+ },
+ "liftoff_identity": {
+ "type": "number",
+ "default": 0.9,
+ "description": "Liftoff identity parameter"
+ },
+ "eggnogmapper_evalue": {
+ "type": "number",
+ "default": 1e-5,
+ "description": "Only report alignments below or equal the e-value threshold"
+ },
+ "eggnogmapper_pident": {
+ "type": "integer",
+ "default": 35,
+ "description": "Only report alignments above or equal to the given percentage of identity (0-100)",
+ "minimum": 0,
+ "maximum": 100
+ },
+ "eggnogmapper_purge_nohits": {
+ "type": "boolean",
+ "description": "Purge transcripts which do not have a hit against eggnog"
+ }
+ }
+ },
+ "max_job_request_options": {
+ "title": "Max job request options",
+ "type": "object",
+ "fa_icon": "fab fa-acquisitions-incorporated",
+ "description": "Set the top limit for requested resources for any single job.",
+ "help_text": "If you are running on a smaller system, a pipeline step requesting more resources than are available may cause the Nextflow to stop the run with an error. These options allow you to cap the maximum resources requested by any single job so that the pipeline will run on your system.\n\nNote that you can not _increase_ the resources requested by any job using these options. For that you will need your own configuration file. See [the nf-core website](https://nf-co.re/usage/configuration) for details.",
+ "properties": {
+ "max_cpus": {
+ "type": "integer",
+ "description": "Maximum number of CPUs that can be requested for any single job.",
+ "default": 12,
+ "fa_icon": "fas fa-microchip",
+ "hidden": true,
+ "help_text": "Use to set an upper-limit for the CPU requirement for each process. Should be an integer e.g. `--max_cpus 1`"
+ },
+ "max_memory": {
+ "type": "string",
+ "description": "Maximum amount of memory that can be requested for any single job.",
+ "default": "200.GB",
+ "fa_icon": "fas fa-memory",
+ "pattern": "^\\d+(\\.\\d+)?\\.?\\s*(K|M|G|T)?B$",
+ "hidden": true,
+ "help_text": "Use to set an upper-limit for the memory requirement for each process. Should be a string in the format integer-unit e.g. `--max_memory '8.GB'`"
+ },
+ "max_time": {
+ "type": "string",
+ "description": "Maximum amount of time that can be requested for any single job.",
+ "default": "7.day",
+ "fa_icon": "far fa-clock",
+ "pattern": "^(\\d+\\.?\\s*(s|m|h|d|day)\\s*)+$",
+ "hidden": true,
+ "help_text": "Use to set an upper-limit for the time requirement for each process. Should be a string in the format integer-unit e.g. `--max_time '2.h'`"
+ }
+ }
+ },
+ "infrastructure_options": {
+ "title": "Infrastructure options",
+ "type": "object",
+ "fa_icon": "fas fa-file-import",
+ "description": "",
+ "help_text": "",
+ "properties": {
+ "validationSkipDuplicateCheck": {
+ "type": "boolean",
+ "default": true,
+ "hidden": true
+ },
+ "validationS3PathCheck": {
+ "type": "boolean",
+ "default": true,
+ "hidden": true
+ }
+ }
+ }
+ },
+ "allOf": [
+ {
+ "$ref": "#/definitions/input_output_options"
+ },
+ {
+ "$ref": "#/definitions/repeat_annotation_options"
+ },
+ {
+ "$ref": "#/definitions/rnaseq_pre_processing_options"
+ },
+ {
+ "$ref": "#/definitions/rnaseq_alignment_options"
+ },
+ {
+ "$ref": "#/definitions/annotation_options"
+ },
+ {
+ "$ref": "#/definitions/max_job_request_options"
+ },
+ {
+ "$ref": "#/definitions/infrastructure_options"
+ }
+ ]
+}
diff --git a/pangene_pfr b/pangene_pfr
deleted file mode 100644
index 608798c..0000000
--- a/pangene_pfr
+++ /dev/null
@@ -1,22 +0,0 @@
-#!/bin/bash -e
-
-
-#SBATCH --job-name PANGENE
-#SBATCH --time=1-00:00:00
-#SBATCH --nodes=1
-#SBATCH --ntasks=1
-#SBATCH --cpus-per-task=1
-#SBATCH --output pangene_pfr.stdout
-#SBATCH --error pangene_pfr.stderr
-#SBATCH --mem=4G
-
-ml apptainer/1.1
-ml nextflow/23.04.4
-
-export TMPDIR="/workspace/$USER/tmp"
-export APPTAINER_BINDPATH="$APPTAINER_BINDPATH,$TMPDIR:$TMPDIR,$TMPDIR:/tmp"
-
-nextflow \
- main.nf \
- -profile pfr,apptainer \
- -resume
diff --git a/pfr/params.json b/pfr/params.json
new file mode 100644
index 0000000..cb7d801
--- /dev/null
+++ b/pfr/params.json
@@ -0,0 +1,8 @@
+{
+ "input": "/workspace/pangene/test_data/assemblysheet.csv",
+ "external_protein_fastas": "/workspace/pangene/test_data/external-protein-fastas.txt",
+ "eggnogmapper_db_dir": "/workspace/ComparativeDataSources/emapperdb/5.0.2",
+ "eggnogmapper_tax_scope": 33090,
+ "fastq": "/workspace/pangene/test_data/fastqsheet.csv",
+ "liftoff_annotations": "/workspace/pangene/test_data/liftoffannotations.csv"
+}
diff --git a/pfr/profile.config b/pfr/profile.config
new file mode 100644
index 0000000..b0eba29
--- /dev/null
+++ b/pfr/profile.config
@@ -0,0 +1,17 @@
+profiles {
+ pfr {
+ process {
+ executor = 'slurm'
+ }
+
+ apptainer {
+ envWhitelist = "APPTAINER_BINDPATH,APPTAINER_BIND"
+ cacheDir = "/workspace/pangene/singularity"
+ }
+ }
+}
+
+// params {
+// config_profile_name = 'Plant&Food profile'
+// config_profile_description = 'Plant&Food profile using SLURM in combination with Apptainer'
+// }
diff --git a/pfr_pangene b/pfr_pangene
new file mode 100644
index 0000000..4f9bf4c
--- /dev/null
+++ b/pfr_pangene
@@ -0,0 +1,49 @@
+#!/bin/bash -e
+
+
+#SBATCH --job-name PANGENE
+#SBATCH --time=7-00:00:00
+#SBATCH --nodes=1
+#SBATCH --ntasks=1
+#SBATCH --cpus-per-task=1
+#SBATCH --output pfr_pangene.stdout
+#SBATCH --error pfr_pangene.stderr
+#SBATCH --mem=4G
+
+full_test_flag=0
+
+# Parse command line options
+while getopts "t" opt; do
+ case ${opt} in
+ t )
+ full_test_flag=1
+ ;;
+ \? )
+ echo "Invalid option: $OPTARG" 1>&2
+ exit 1
+ ;;
+ esac
+done
+shift $((OPTIND -1))
+
+ml unload perl
+ml apptainer/1.1
+ml nextflow/23.04.4
+
+export TMPDIR="/workspace/$USER/tmp"
+export APPTAINER_BINDPATH="$APPTAINER_BINDPATH,$TMPDIR:$TMPDIR,$TMPDIR:/tmp"
+
+if [ $full_test_flag -eq 1 ]; then
+ nextflow \
+ main.nf \
+ -c pfr/profile.config \
+ -profile pfr,apptainer,test_full \
+ -resume
+else
+ nextflow \
+ main.nf \
+ -c pfr/profile.config \
+ -profile pfr,apptainer \
+ -params-file pfr/params.json \
+ -resume
+fi
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 0000000..5611062
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,15 @@
+# Config file for Python. Mostly used to configure linting of bin/*.py with Ruff.
+# Should be kept the same as nf-core/tools to avoid fighting with template synchronisation.
+[tool.ruff]
+line-length = 120
+target-version = "py38"
+cache-dir = "~/.cache/ruff"
+
+[tool.ruff.lint]
+select = ["I", "E1", "E4", "E7", "E9", "F", "UP", "N"]
+
+[tool.ruff.lint.isort]
+known-first-party = ["nf_core"]
+
+[tool.ruff.lint.per-file-ignores]
+"__init__.py" = ["E402", "F401"]
diff --git a/subworkflows/local/align_rnaseq.nf b/subworkflows/local/align_rnaseq.nf
index 05cd2fa..94d8b8d 100644
--- a/subworkflows/local/align_rnaseq.nf
+++ b/subworkflows/local/align_rnaseq.nf
@@ -71,6 +71,6 @@ workflow ALIGN_RNASEQ {
ch_versions = ch_versions.mix(SAMTOOLS_CAT.out.versions.first())
emit:
- bam = ch_samtools_bam // channel: [ [ id, single_end, target_assembly ], [ bam ] ]
+ bam = ch_samtools_bam // channel: [ [ id: target_assembly, single_end ], [ bam ] ]
versions = ch_versions // channel: [ versions.yml ]
}
diff --git a/subworkflows/local/extract_samples.nf b/subworkflows/local/extract_samples.nf
deleted file mode 100644
index 947c0b7..0000000
--- a/subworkflows/local/extract_samples.nf
+++ /dev/null
@@ -1,71 +0,0 @@
-// Source:
-// https://github.com/nf-core/rnaseq
-// MIT: https://github.com/nf-core/rnaseq/blob/master/LICENSE
-//
-// Check input samplesheet and get read channels
-//
-// Changes:
-// Added channel permissible_target_assemblies
-// Changed file name from input_check.nf to extract_samples.nf
-// Removed strandedness
-// Nowing emitting an extra channel 'assemblies' which indicates the
-// assemblies targeted by each read
-
-include { SAMPLESHEET_CHECK } from '../../modules/local/samplesheet_check'
-
-workflow EXTRACT_SAMPLES {
- take:
- samplesheet // file: /path/to/samplesheet.csv
- permissible_target_assemblies // val: assembly_a,assembly_b
-
- main:
- SAMPLESHEET_CHECK ( samplesheet, permissible_target_assemblies )
- .csv
- | splitCsv ( header:true, sep:',' )
- | combine ( samplesheet )
- | map { row, sheet ->
- create_fastq_channel(row, sheet.getParent())
- }
- | set { ch_reads }
-
- reads = ch_reads.map { meta, fastq -> [[id:meta.id, single_end:meta.single_end], fastq]}
-
- ch_reads
- | flatMap { meta, fastq ->
- meta.target_assemblies.collect { assembly -> [[id:meta.id, single_end:meta.single_end], assembly] }
- }
- | set { assemblies }
-
- emit:
- reads // channel: [ val(meta), [ reads ] ]
- assemblies // channel: [ val(meta), val(assembly) ]
- versions = SAMPLESHEET_CHECK.out.versions // channel: [ versions.yml ]
-}
-
-// Function to get list of [ meta, [ fastq_1, fastq_2 ] ]
-def create_fastq_channel(LinkedHashMap row, sheetPath) {
- // create meta map
- def meta = [:]
- meta.id = row.sample
- meta.single_end = row.single_end.toBoolean()
- meta.target_assemblies = row.target_assemblies.split(";").sort()
-
- def fq1 = row.fastq_1.startsWith("/") ? row.fastq_1 : "$sheetPath/${row.fastq_1}"
- def fq2 = row.fastq_2.startsWith("/") ? row.fastq_2 : "$sheetPath/${row.fastq_2}"
-
- // add path(s) of the fastq file(s) to the meta map
- def fastq_meta = []
- if (!file(fq1).exists()) {
- exit 1, "ERROR: Please check input samplesheet -> Read 1 FastQ file does not exist!\n${fq1}"
- }
- if (meta.single_end) {
- fastq_meta = [ meta, [ file(fq1) ] ]
- } else {
- if (!file(fq2).exists()) {
- exit 1, "ERROR: Please check input samplesheet -> Read 2 FastQ file does not exist!\n${fq2}"
- }
- fastq_meta = [ meta, [ file(fq1), file(fq2) ] ]
- }
-
- return fastq_meta
-}
diff --git a/subworkflows/local/fasta_braker3.nf b/subworkflows/local/fasta_braker3.nf
new file mode 100644
index 0000000..b76bdf7
--- /dev/null
+++ b/subworkflows/local/fasta_braker3.nf
@@ -0,0 +1,65 @@
+include { BRAKER3 } from '../../modules/kherronism/braker3'
+include { FILE_GUNZIP as BRAKER_GFF3_GUNZIP } from '../../subworkflows/local/file_gunzip'
+include { FILE_GUNZIP as BRAKER_HINTS_GUNZIP } from '../../subworkflows/local/file_gunzip'
+
+workflow FASTA_BRAKER3 {
+ take:
+ ch_masked_target_assembly // channel: [ meta, fasta ]; meta ~ [ id: traget_assembly ]
+ ch_braker_ex_asm_str // channel: val(assembly_x,assembly_y)
+ ch_rnaseq_bam // channel: [ meta, bam ]
+ ch_ext_prots_fasta // channel: [ meta2, fasta ]; meta2 ~ [ id: ext_protein_seqs ]
+ ch_braker_annotation // channel: [ meta, gff3, hints.gff ]
+
+ main:
+ ch_versions = Channel.empty()
+
+
+ ch_braker_inputs = ch_masked_target_assembly
+ | combine( ch_braker_ex_asm_str )
+ | filter { meta, fasta, ex_str -> !( ex_str.split(",").contains( meta.id ) ) }
+ | map { meta, fasta, ex_str ->
+ [ meta, fasta ]
+ }
+ | join(ch_rnaseq_bam, remainder: true)
+ | combine(
+ ch_ext_prots_fasta.map { meta, fasta -> fasta }.ifEmpty(null)
+ )
+ | map { meta, fasta, bam, prots -> [ meta, fasta, bam ?: [], prots ?: [] ] }
+
+ def rnaseq_sets_dirs = []
+ def rnaseq_sets_ids = []
+ def hintsfile = []
+
+ // MODULE: BRAKER3
+ BRAKER3(
+ ch_braker_inputs.map { meta, fasta, bam, prots -> [meta, fasta] },
+ ch_braker_inputs.map { meta, fasta, bam, prots -> bam },
+ rnaseq_sets_dirs,
+ rnaseq_sets_ids,
+ ch_braker_inputs.map { meta, fasta, bam, prots -> prots },
+ hintsfile
+ )
+
+ ch_braker_gff3 = BRAKER3.out.gff3
+ | mix( ch_braker_annotation.map { meta, gff3, hints -> [ meta, gff3 ] } )
+ ch_braker_hints = BRAKER3.out.hintsfile
+ | mix( ch_braker_annotation.map { meta, gff3, hints -> [ meta, hints ] } )
+ ch_versions = ch_versions.mix(BRAKER3.out.versions.first())
+
+ // WORKFLOW: FILE_GUNZIP as BRAKER_GFF3_GUNZIP
+ BRAKER_GFF3_GUNZIP ( ch_braker_gff3 )
+
+ ch_braker_gff3 = BRAKER_GFF3_GUNZIP.out.gunzip
+ ch_versions = ch_versions.mix(BRAKER_GFF3_GUNZIP.out.versions)
+
+ // WORKFLOW: FILE_GUNZIP as BRAKER_HINTS_GUNZIP
+ BRAKER_HINTS_GUNZIP ( ch_braker_hints )
+
+ ch_braker_hints = BRAKER_HINTS_GUNZIP.out.gunzip
+ ch_versions = ch_versions.mix(BRAKER_HINTS_GUNZIP.out.versions)
+
+ emit:
+ braker_gff3 = ch_braker_gff3 // [ meta, gff3 ]
+ braker_hints = ch_braker_hints // [ meta, hints.gff ]
+ versions = ch_versions // [ versions.yml ]
+}
diff --git a/subworkflows/local/fasta_liftoff.nf b/subworkflows/local/fasta_liftoff.nf
index 4c59ba3..01cd776 100644
--- a/subworkflows/local/fasta_liftoff.nf
+++ b/subworkflows/local/fasta_liftoff.nf
@@ -1,7 +1,10 @@
-include { GUNZIP as GUNZIP_FASTA } from '../../modules/nf-core/gunzip'
-include { GUNZIP as GUNZIP_GFF } from '../../modules/nf-core/gunzip'
-include { GFFREAD } from '../../modules/nf-core/gffread'
-include { LIFTOFF } from '../../modules/pfr/liftoff'
+include { GUNZIP as GUNZIP_FASTA } from '../../modules/nf-core/gunzip/main'
+include { GUNZIP as GUNZIP_GFF } from '../../modules/nf-core/gunzip/main'
+include { GFFREAD as GFFREAD_BEFORE_LIFTOFF } from '../../modules/nf-core/gffread/main'
+include { LIFTOFF } from '../../modules/nf-core/liftoff/main'
+include { AGAT_SPMERGEANNOTATIONS as MERGE_LIFTOFF_ANNOTATIONS } from '../../modules/pfr/agat/spmergeannotations/main'
+include { AGAT_SPFILTERFEATUREFROMKILLLIST } from '../../modules/pfr/agat/spfilterfeaturefromkilllist/main'
+include { GFFREAD as GFFREAD_AFTER_LIFTOFF } from '../../modules/nf-core/gffread/main'
workflow FASTA_LIFTOFF {
take:
@@ -44,21 +47,11 @@ workflow FASTA_LIFTOFF {
ch_versions = ch_versions.mix(GUNZIP_GFF.out.versions.first())
- // MODULE: GFFREAD
- ch_gffread_inputs = ch_xref_gunzip_gff
- | map { meta, gff ->
- [ gff.getSimpleName(), meta, gff ]
- } // For meta insertion later, remove when GFFREAD has meta
-
- GFFREAD ( ch_gffread_inputs.map { name, meta, gff -> gff } )
+ // MODULE: GFFREAD as GFFREAD_BEFORE_LIFTOFF
+ GFFREAD_BEFORE_LIFTOFF ( ch_xref_gunzip_gff, [] )
- ch_gffread_gff = GFFREAD.out.gffread_gff
- | map { gff -> [ gff.getSimpleName(), gff ] }
- | join(ch_gffread_inputs)
- | map { fid, gffread_gff, meta, gff -> [ meta, gffread_gff ] }
- // meta insertion
-
- ch_versions = ch_versions.mix(GFFREAD.out.versions.first())
+ ch_gffread_gff = GFFREAD_BEFORE_LIFTOFF.out.gffread_gff
+ ch_versions = ch_versions.mix(GFFREAD_BEFORE_LIFTOFF.out.versions.first())
// MODULE: LIFTOFF
ch_liftoff_inputs = target_assemby
@@ -83,7 +76,8 @@ workflow FASTA_LIFTOFF {
LIFTOFF(
ch_liftoff_inputs.map { meta, target_fa, ref_fa, ref_gff -> [ meta, target_fa ] },
ch_liftoff_inputs.map { meta, target_fa, ref_fa, ref_gff -> ref_fa },
- ch_liftoff_inputs.map { meta, target_fa, ref_fa, ref_gff -> ref_gff }
+ ch_liftoff_inputs.map { meta, target_fa, ref_fa, ref_gff -> ref_gff },
+ []
)
ch_liftoff_gff3 = LIFTOFF.out.polished_gff3
@@ -92,7 +86,73 @@ workflow FASTA_LIFTOFF {
ch_versions = ch_versions.mix(LIFTOFF.out.versions.first())
+ // MODULE: AGAT_SPMERGEANNOTATIONS as MERGE_LIFTOFF_ANNOTATIONS
+ ch_merge_inputs = ch_liftoff_gff3
+ | branch { meta, list_polished ->
+ one: list_polished.size() == 1
+ many: list_polished.size() > 1
+ }
+
+ MERGE_LIFTOFF_ANNOTATIONS(
+ ch_merge_inputs.many,
+ []
+ )
+
+ ch_merged_gff = MERGE_LIFTOFF_ANNOTATIONS.out.gff.mix(ch_merge_inputs.one)
+ ch_versions = ch_versions.mix(MERGE_LIFTOFF_ANNOTATIONS.out.versions.first())
+
+ // COLLECTFILE: Transcript level kill list
+ ch_kill_list = ch_merged_gff
+ | map { meta, gff ->
+
+ def tx_from_gff = gff.readLines()
+ .findAll { it ->
+ if ( it.startsWith('#') ) { return false }
+
+ def cols = it.split('\t')
+ def feat = cols[2]
+ if ( ! ( feat == 'transcript' || feat == 'mRNA' ) ) { return false }
+
+ def attrs = cols[8]
+ attrs.contains('valid_ORF=False')
+ }
+ .collect {
+ def cols = it.split('\t')
+ def attrs = cols[8]
+
+ def matches = attrs =~ /ID=([^;]*)/
+
+ return matches[0][1]
+ }
+
+ [ "${meta.id}.kill.list.txt" ] + tx_from_gff.join('\n')
+ }
+ | collectFile(newLine: true)
+ | map { file ->
+ [ [ id: file.baseName.replace('.kill.list', '') ], file ]
+ }
+
+ // MODULE: AGAT_SPFILTERFEATUREFROMKILLLIST
+ ch_agat_kill_inputs = ch_merged_gff
+ | join(ch_kill_list)
+
+
+ AGAT_SPFILTERFEATUREFROMKILLLIST(
+ ch_agat_kill_inputs.map { meta, gff, kill -> [ meta, gff ] },
+ ch_agat_kill_inputs.map { meta, gff, kill -> kill },
+ [] // default config
+ )
+
+ ch_liftoff_purged_gff = AGAT_SPFILTERFEATUREFROMKILLLIST.out.gff
+ ch_versions = ch_versions.mix(AGAT_SPFILTERFEATUREFROMKILLLIST.out.versions.first())
+
+ // MODULE: GFFREAD as GFFREAD_AFTER_LIFTOFF
+ GFFREAD_AFTER_LIFTOFF ( ch_liftoff_purged_gff, [] )
+
+ ch_attr_trimmed_gff = GFFREAD_AFTER_LIFTOFF.out.gffread_gff
+ ch_versions = ch_versions.mix(GFFREAD_AFTER_LIFTOFF.out.versions.first())
+
emit:
- gff3 = ch_liftoff_gff3 // [ meta, [ gff3 ] ]
- versions = ch_versions // [ versions.yml ]
+ gff3 = ch_attr_trimmed_gff // [ meta, gff3 ]
+ versions = ch_versions // [ versions.yml ]
}
diff --git a/subworkflows/local/file_gunzip.nf b/subworkflows/local/file_gunzip.nf
new file mode 100644
index 0000000..30f3368
--- /dev/null
+++ b/subworkflows/local/file_gunzip.nf
@@ -0,0 +1,20 @@
+include { GUNZIP } from '../../modules/nf-core/gunzip'
+
+workflow FILE_GUNZIP {
+ take:
+ ch_input // channel [ meta, archive ]
+
+ main:
+ ch_input_branch = ch_input
+ | branch { meta, archive ->
+ gz: "$archive".endsWith('.gz')
+ rest: ! "$archive".endsWith('.gz')
+ }
+
+ // MODULE: GUNZIP
+ GUNZIP ( ch_input_branch.gz )
+
+ emit:
+ versions = GUNZIP.out.versions.first()
+ gunzip = GUNZIP.out.gunzip.mix( ch_input_branch.rest )
+}
diff --git a/subworkflows/local/gff_eggnogmapper.nf b/subworkflows/local/gff_eggnogmapper.nf
new file mode 100644
index 0000000..7ea0d19
--- /dev/null
+++ b/subworkflows/local/gff_eggnogmapper.nf
@@ -0,0 +1,47 @@
+include { GFFREAD as GFF2FASTA_FOR_EGGNOGMAPPER } from '../../modules/nf-core/gffread/main'
+include { EGGNOGMAPPER } from '../../modules/nf-core/eggnogmapper/main'
+
+workflow GFF_EGGNOGMAPPER {
+ take:
+ ch_gff // Channel: [ meta, gff ]
+ ch_fasta // Channel: [ meta, fasta ]
+ db_folder // val(db_folder)
+
+ main:
+ // Versions
+ ch_versions = Channel.empty()
+
+ // MODULE: GFFREAD as GFF2FASTA_FOR_EGGNOGMAPPER
+ ch_gffread_inputs = ch_gff
+ | join(ch_fasta)
+
+ GFF2FASTA_FOR_EGGNOGMAPPER(
+ ch_gffread_inputs.map { meta, gff, fasta -> [ meta, gff ] },
+ ch_gffread_inputs.map { meta, gff, fasta -> fasta }
+ )
+
+ ch_gffread_fasta = GFF2FASTA_FOR_EGGNOGMAPPER.out.gffread_fasta
+ ch_versions = ch_versions.mix(GFF2FASTA_FOR_EGGNOGMAPPER.out.versions.first())
+
+
+ ch_eggnogmapper_inputs = ch_gffread_fasta
+ | combine(Channel.fromPath(db_folder))
+
+ EGGNOGMAPPER(
+ ch_eggnogmapper_inputs.map { meta, fasta, db -> [ meta, fasta ] },
+ [],
+ ch_eggnogmapper_inputs.map { meta, fasta, db -> db },
+ [ [], [] ]
+ )
+
+ ch_eggnogmapper_annotations = EGGNOGMAPPER.out.annotations
+ ch_eggnogmapper_orthologs = EGGNOGMAPPER.out.orthologs
+ ch_eggnogmapper_hits = EGGNOGMAPPER.out.hits
+ ch_versions = ch_versions.mix(EGGNOGMAPPER.out.versions.first())
+
+ emit:
+ eggnogmapper_annotations = ch_eggnogmapper_annotations
+ eggnogmapper_orthologs = ch_eggnogmapper_orthologs
+ eggnogmapper_hits = ch_eggnogmapper_hits
+ versions = ch_versions
+}
diff --git a/subworkflows/local/gff_merge_cleanup.nf b/subworkflows/local/gff_merge_cleanup.nf
new file mode 100644
index 0000000..834af94
--- /dev/null
+++ b/subworkflows/local/gff_merge_cleanup.nf
@@ -0,0 +1,118 @@
+include { AGAT_SPMERGEANNOTATIONS } from '../../modules/pfr/agat/spmergeannotations/main'
+include { GT_GFF3 } from '../../modules/nf-core/gt/gff3/main'
+include { AGAT_CONVERTSPGXF2GXF } from '../../modules/nf-core/agat/convertspgxf2gxf/main'
+
+workflow GFF_MERGE_CLEANUP {
+ take:
+ ch_braker_gff // Channel: [ meta, gff ]
+ ch_liftoff_gff // Channel: [ meta, gff ]
+
+ main:
+ ch_versions = Channel.empty()
+
+ ch_gff_branch = ch_braker_gff
+ | join(ch_liftoff_gff, remainder:true)
+ | branch { meta, braker_gff, liftoff_gff ->
+ both : ( braker_gff && liftoff_gff )
+ braker_only : ( braker_gff && ( ! liftoff_gff ) )
+ liftoff_only: ( ( ! braker_gff ) && liftoff_gff )
+ }
+
+ // MODULE: AGAT_SPMERGEANNOTATIONS
+ AGAT_SPMERGEANNOTATIONS(
+ ch_gff_branch.both.map { meta, bg, lg -> [ meta, [ bg, lg ] ] },
+ []
+ )
+
+ ch_merged_gff = AGAT_SPMERGEANNOTATIONS.out.gff
+ | mix ( ch_gff_branch.liftoff_only.map { meta, braker_gff, liftoff_gff -> [ meta, liftoff_gff ] } )
+ | mix ( ch_gff_branch.braker_only.map { meta, braker_gff, liftoff_gff -> [ meta, braker_gff ] } )
+ ch_versions = ch_versions.mix(AGAT_SPMERGEANNOTATIONS.out.versions.first())
+
+ // MODULE: GT_GFF3
+ GT_GFF3 ( ch_merged_gff )
+
+ ch_gt_gff = GT_GFF3.out.gt_gff3
+ ch_versions = ch_versions.mix(GT_GFF3.out.versions.first())
+
+ // COLLECTFILE: Format GT_GFF3 output
+ ch_gt_formatted_gff = ch_gt_gff
+ | map { meta, gff ->
+
+ def lines = gff.readLines()
+ .collect { line ->
+ if ( line.startsWith('##') ) { return line }
+ if ( line.startsWith('#') ) { return '' }
+
+ def cols = line.split('\t')
+ def program = cols[1]
+ def feat = cols[2]
+ def atts = cols[8]
+
+ def atts_r = ''
+ // Remove attributes and use AGAT_CONVERTSPGXF2GXF
+ // to create attributes based on sequential layout
+
+ def feat_r = feat == 'transcript' ? 'mRNA' : feat
+ // Use mRNA inplace of transcript
+
+ if ( feat != 'gene' || program != 'Liftoff' ) {
+ return ( cols[0..1] + [ feat_r ] + cols[3..7] + [ atts_r ] ).join('\t')
+ }
+
+ def gene_id = ( atts =~ /ID=([^;]*)/ )[0][1]
+ def atts_g = "liftoffID=$gene_id"
+
+ return ( cols[0..7] + [ atts_g ] ).join('\t')
+
+ }.join('\n')
+
+ [ "${meta.id}.bare.gff" ] + [ lines ]
+ }
+ | collectFile(newLine: true)
+ | map { file ->
+ [ [ id: file.baseName.replace('.bare', '') ], file ]
+ }
+
+ // MODULE: AGAT_CONVERTSPGXF2GXF
+ AGAT_CONVERTSPGXF2GXF ( ch_gt_formatted_gff )
+
+ ch_agat_gff = AGAT_CONVERTSPGXF2GXF.out.output_gff
+ ch_versions = ch_versions.mix(AGAT_CONVERTSPGXF2GXF.out.versions.first())
+
+ // COLLECTFILE: Format AGAT_CONVERTSPGXF2GXF output
+ ch_final_gff = ch_agat_gff
+ | map { meta, gff ->
+
+ def lines = gff.readLines()
+ .collect { line ->
+ if ( line.startsWith('#') ) { return line }
+
+ def cols = line.split('\t')
+ def program = cols[1]
+ def feat = cols[2]
+ def atts = cols[8]
+ def atts_r = atts.replace('-', '').replace('agat', '')
+
+ if ( feat != 'gene' || program != 'Liftoff' ) {
+ return ( cols[0..7] + [ atts_r ] ).join('\t')
+ }
+
+ def oldID = ( atts =~ /liftoffID=([^;]*)/ )[0][1]
+ def newID = ( atts =~ /ID=([^;]*)/ )[0][1].replace('-', '').replace('agat', '')
+ def atts_g = "ID=${newID};liftoffID=${oldID}"
+
+ return ( cols[0..7] + [ atts_g ] ).join('\t')
+ }.join('\n')
+
+ [ "${meta.id}.agat.cleanup.gff" ] + [ lines ]
+ }
+ | collectFile(newLine: true)
+ | map { file ->
+ [ [ id: file.baseName.replace('.agat.cleanup', '') ], file ]
+ }
+
+ emit:
+ gff = ch_final_gff // [ meta, gff ]
+ versions = ch_versions // [ versions.yml ]
+}
diff --git a/subworkflows/local/gff_store.nf b/subworkflows/local/gff_store.nf
new file mode 100644
index 0000000..3326e2a
--- /dev/null
+++ b/subworkflows/local/gff_store.nf
@@ -0,0 +1,119 @@
+import java.net.URLEncoder
+
+include { GT_GFF3 as FINAL_GFF_CHECK } from '../../modules/nf-core/gt/gff3/main'
+
+workflow GFF_STORE {
+ take:
+ ch_target_gff // [ meta, gff ]
+ ch_eggnogmapper_annotations // [ meta, annotations ]
+
+ main:
+ ch_versions = Channel.empty()
+
+ // COLLECTFILE: Add eggnogmapper hits to gff
+ ch_described_gff = ch_target_gff
+ | join(ch_eggnogmapper_annotations)
+ | map { meta, gff, annotations ->
+ def tx_annotations = annotations.readLines()
+ .findAll { ! it.startsWith('#') }
+ .collect { line ->
+ def cols = line.split('\t')
+ def id = cols[0]
+ def txt = cols[7]
+ def pfams = cols[20]
+
+ [ id, txt, pfams ]
+ }
+ .collect { id, txt, pfams ->
+ if ( txt != '-' ) { return [ id, txt ] }
+ if ( pfams != '-' ) { return [ id, "PFAMs: $pfams" ] }
+
+ [ id, 'No eggnog description and PFAMs' ]
+ }
+ .collectEntries { id, txt ->
+ [ id, txt ]
+ }
+
+ def gene_tx_annotations = [:]
+ gff.readLines()
+ .findAll { line ->
+ if ( line.startsWith('#') || line == '' ) { return false }
+
+ def cols = line.split('\t')
+ def feat = cols[2]
+
+ if ( ! ( feat == 'transcript' || feat == 'mRNA' ) ) { return false }
+
+ return true
+ }
+ .each { line ->
+ def cols = line.split('\t')
+ def atts = cols[8]
+
+ def matches = atts =~ /ID=([^;]*)/
+ def tx_id = matches[0][1]
+
+ def matches_p= atts =~ /Parent=([^;]*)/
+ def gene_id = matches_p[0][1]
+
+ if ( ! gene_tx_annotations.containsKey(gene_id) ) {
+ gene_tx_annotations[gene_id] = [:]
+ }
+
+ def anno = tx_annotations.containsKey(tx_id)
+ ? URLEncoder.encode(tx_annotations[tx_id], "UTF-8").replace('+', '%20')
+ : URLEncoder.encode('Hypothetical protein | no eggnog hit', "UTF-8").replace('+', '%20')
+
+ gene_tx_annotations[gene_id] += [ ( tx_id ): anno ]
+ }
+
+ gene_tx_annotations = gene_tx_annotations
+ .collectEntries { gene_id, tx_annos ->
+ def default_anno = tx_annos.values().first()
+
+ if ( tx_annos.values().findAll { it != default_anno }.size() > 0 ) {
+ return [ gene_id, ( tx_annos + [ 'default': 'Differing%20isoform%20descriptions' ] ) ]
+ }
+
+ [ gene_id, ( tx_annos + [ 'default': default_anno ] ) ]
+ }
+
+ def gff_lines = gff.readLines()
+ .collect { line ->
+
+ if ( line.startsWith('#') || line == '' ) { return line }
+
+ def cols = line.split('\t')
+ def feat = cols[2]
+ def atts = cols[8]
+
+ if ( ! ( feat == 'gene' || feat == 'transcript' || feat == 'mRNA' ) ) { return line }
+
+ def id = feat == 'gene' ? ( atts =~ /ID=([^;]*)/ )[0][1] : ( atts =~ /Parent=([^;]*)/ )[0][1]
+
+ if ( ! gene_tx_annotations.containsKey(id) ) { return line }
+
+ def tx_id = feat == 'gene' ? null : ( atts =~ /ID=([^;]*)/ )[0][1]
+ def desc = feat == 'gene' ? gene_tx_annotations[id]['default'] : gene_tx_annotations[id][tx_id]
+
+ return ( line + ";description=$desc" )
+ }
+
+ [ "${meta.id}.described.gff" ] + gff_lines.join('\n')
+ }
+ | collectFile(newLine: true)
+ | map { file ->
+ [ [ id: file.baseName.replace('.described', '') ], file ]
+ }
+
+ // MODULE: GT_GFF3 as FINAL_GFF_CHECK
+ FINAL_GFF_CHECK ( ch_described_gff )
+
+ ch_final_gff = FINAL_GFF_CHECK.out.gt_gff3
+ ch_versions = ch_versions.mix(FINAL_GFF_CHECK.out.versions.first())
+
+
+ emit:
+ final_gff = ch_final_gff // [ meta, gff ]
+ versions = ch_versions // [ versions.yml ]
+}
diff --git a/subworkflows/local/prepare_assembly.nf b/subworkflows/local/prepare_assembly.nf
index d18f5ce..f0c9d7f 100644
--- a/subworkflows/local/prepare_assembly.nf
+++ b/subworkflows/local/prepare_assembly.nf
@@ -13,6 +13,7 @@ workflow PREPARE_ASSEMBLY {
target_assembly // channel: [ meta, fasta ]
te_library // channel: [ meta, fasta ]
repeat_annotator // val(String), 'repeatmodeler' or 'edta'
+ exclude_assemblies // channel: val(assembly_x,assembly_y)
main:
ch_versions = Channel.empty()
@@ -74,6 +75,14 @@ workflow PREPARE_ASSEMBLY {
ch_edta_inputs = repeat_annotator != 'edta'
? Channel.empty()
: ch_annotator_inputs
+ | combine( exclude_assemblies )
+ | map { meta, fasta, ex_assemblies ->
+ def ex_list = ex_assemblies.split(",")
+
+ if ( !( ex_list.contains( meta.id ) ) ) {
+ [ meta, fasta ]
+ }
+ }
FASTA_EDTA_LAI(
ch_edta_inputs,
@@ -87,6 +96,14 @@ workflow PREPARE_ASSEMBLY {
ch_repeatmodeler_inputs = repeat_annotator != 'repeatmodeler'
? Channel.empty()
: ch_annotator_inputs
+ | combine( exclude_assemblies )
+ | map { meta, fasta, ex_assemblies ->
+ def ex_list = ex_assemblies.split(",")
+
+ if ( !( ex_list.contains( meta.id ) ) ) {
+ [ meta, fasta ]
+ }
+ }
REPEATMODELER_BUILDDATABASE ( ch_repeatmodeler_inputs )
@@ -113,9 +130,20 @@ workflow PREPARE_ASSEMBLY {
ch_versions = ch_versions.mix(REPEATMASKER.out.versions.first())
// MODULE: STAR_GENOMEGENERATE
+ ch_genomegenerate_inputs = ch_validated_assembly
+ | combine( exclude_assemblies )
+ | map { meta, fasta, ex_assemblies ->
+ def ex_list = ex_assemblies.split(",")
+
+ if ( !( ex_list.contains( meta.id ) ) ) {
+ [ meta, fasta ]
+ }
+ }
+
+
STAR_GENOMEGENERATE(
- ch_validated_assembly,
- ch_validated_assembly.map { meta, fasta -> [ [], [] ] }
+ ch_genomegenerate_inputs,
+ ch_genomegenerate_inputs.map { meta, fasta -> [ [], [] ] }
)
ch_assembly_index = STAR_GENOMEGENERATE.out.index
diff --git a/subworkflows/local/preprocess_rnaseq.nf b/subworkflows/local/preprocess_rnaseq.nf
index 9466104..72fa176 100644
--- a/subworkflows/local/preprocess_rnaseq.nf
+++ b/subworkflows/local/preprocess_rnaseq.nf
@@ -1,12 +1,13 @@
include { CAT_FASTQ } from '../../modules/nf-core/cat/fastq'
-include { SORTMERNA } from '../../modules/nf-core/sortmerna'
-include { EXTRACT_SAMPLES } from '../../subworkflows/local/extract_samples'
+include { SORTMERNA as SORTMERNA_INDEX } from '../../modules/nf-core/sortmerna'
+include { SORTMERNA as SORTMERNA_READS } from '../../modules/nf-core/sortmerna'
include { FASTQ_FASTQC_UMITOOLS_FASTP } from '../../subworkflows/nf-core/fastq_fastqc_umitools_fastp'
workflow PREPROCESS_RNASEQ {
take:
- samplesheet // path: csv
+ ch_reads // channel: [ [ id, single_end, target_assemblies ], [ [ fq ] ] ]
permissible_assemblies // val: assembly_a,assembly_b
+ exclude_assemblies // channel: val(assembly_x,assembly_y)
skip_fastqc // val: true|false
skip_fastp // val: true|false
save_trimmed // val: true|false
@@ -15,36 +16,37 @@ workflow PREPROCESS_RNASEQ {
sortmerna_fastas // channel: [ [ fasta ] ]
main:
- ch_versions = Channel.empty()
+ ch_versions = Channel.empty()
- // SUBWORKFLOW: EXTRACT_SAMPLES
- EXTRACT_SAMPLES(
- samplesheet,
- permissible_assemblies
- )
+ ch_fastq = ch_reads
+ | combine( exclude_assemblies )
+ | map { meta, fqs, ex_assemblies ->
+ def ex_list = ex_assemblies.split(",")
- ch_fastq = EXTRACT_SAMPLES.out.reads
- | map { meta, fastq ->
- groupID = meta.id - ~/_T\d+/
- [ meta + [id: groupID], fastq ]
+ if ( !( meta.target_assemblies.every { ex_list.contains( it ) } ) ) {
+ [ [ id:meta.id, single_end:meta.single_end ], fqs ]
+ }
}
- | groupTuple()
- | branch { meta, fastq ->
- single : fastq.size() == 1
- return [ meta, fastq.flatten() ]
- multiple: fastq.size() > 1
- return [ meta, fastq.flatten() ]
+ | branch { meta, fqs ->
+ single : fqs.size() == 1
+ return [ meta, fqs.flatten() ]
+ multiple: fqs.size() > 1
+ return [ meta, fqs.flatten() ]
}
- ch_reads_target = EXTRACT_SAMPLES.out.assemblies
- | map { meta, assembly ->
- groupID = meta.id - ~/_T\d+/
- [ meta + [id: groupID], assembly ]
+
+ ch_reads_target = ch_reads
+ | combine( exclude_assemblies )
+ | flatMap { meta, fqs, ex_assemblies ->
+ def ex_list = ex_assemblies.split(",")
+
+ meta
+ .target_assemblies
+ .collect { assembly -> [ [ id:meta.id, single_end:meta.single_end ], assembly ] }
+ .findAll { _meta, assembly -> !( ex_list.contains( assembly ) ) }
}
| unique
- ch_versions = ch_versions.mix(EXTRACT_SAMPLES.out.versions)
-
// MODULES: CAT_FASTQ
CAT_FASTQ ( ch_fastq.multiple )
@@ -81,21 +83,41 @@ workflow PREPROCESS_RNASEQ {
ch_versions = ch_versions.mix(FASTQ_FASTQC_UMITOOLS_FASTP.out.versions.first())
- // MODULE: SORTMERNA
- SORTMERNA(
- remove_ribo_rna ? ch_trim_reads : Channel.empty(),
- sortmerna_fastas
+
+ // MODULE: SORTMERNA as SORTMERNA_INDEX
+ SORTMERNA_INDEX(
+ [ [ id: 'idx' ], [] ],
+ sortmerna_fastas.map { fastas -> [ [ id: 'fastas' ], fastas ] },
+ [ [], [] ]
+ )
+
+ ch_versions = ch_versions.mix(SORTMERNA_INDEX.out.versions)
+
+ // MODULE: SORTMERNA as SORTMERNA_READS
+ ch_sortmerna_inputs = remove_ribo_rna
+ ? ch_trim_reads
+ | combine(
+ sortmerna_fastas
+ | map { fastas -> [ [ id: 'fastas' ], fastas ] }
+ | join(SORTMERNA_INDEX.out.index)
+ )
+ : Channel.empty()
+
+ SORTMERNA_READS(
+ ch_sortmerna_inputs.map { meta, reads, meta2, fastas, idx -> [ meta, reads ] },
+ ch_sortmerna_inputs.map { meta, reads, meta2, fastas, idx -> [ meta2, fastas ] },
+ ch_sortmerna_inputs.map { meta, reads, meta2, fastas, idx -> [ meta2, idx ] }
)
ch_emitted_reads = remove_ribo_rna
- ? SORTMERNA.out.reads
+ ? SORTMERNA_READS.out.reads
: ch_trim_reads
- ch_versions = ch_versions.mix(SORTMERNA.out.versions.first())
+ ch_versions = ch_versions.mix(SORTMERNA_READS.out.versions.first())
emit:
- trim_reads = ch_emitted_reads // channel: [ meta, [ fq ] ]
- reads_target = ch_reads_target // channel: [ meta, assembly_id ]
+ trim_reads = ch_emitted_reads // channel: [ [ id, single_end ], [ fq ] ]
+ reads_target = ch_reads_target // channel: [ [ id, single_end ], assembly_id ]
versions = ch_versions // channel: [ versions.yml ]
}
diff --git a/subworkflows/local/purge_breaker_models.nf b/subworkflows/local/purge_breaker_models.nf
new file mode 100644
index 0000000..db828ab
--- /dev/null
+++ b/subworkflows/local/purge_breaker_models.nf
@@ -0,0 +1,241 @@
+include { AGAT_CONVERTSPGFF2GTF } from '../../modules/nf-core/agat/convertspgff2gtf/main'
+include { TSEBRA } from '../../modules/pfr/tsebra/main'
+include { AGAT_CONVERTSPGXF2GXF } from '../../modules/nf-core/agat/convertspgxf2gxf/main'
+include { AGAT_SPFILTERFEATUREFROMKILLLIST as KILL_TSEBRA_ISOFORMS } from '../../modules/pfr/agat/spfilterfeaturefromkilllist/main'
+include { GFFCOMPARE as COMPARE_BRAKER_TO_LIFTOFF } from '../../modules/nf-core/gffcompare/main'
+include { AGAT_SPFILTERFEATUREFROMKILLLIST } from '../../modules/pfr/agat/spfilterfeaturefromkilllist/main'
+include { GFFCOMPARE as VALIDATE_PURGING_BY_AGAT } from '../../modules/nf-core/gffcompare/main'
+include { AGAT_SPMERGEANNOTATIONS as MERGE_BRAKER_LIFTOFF } from '../../modules/pfr/agat/spmergeannotations/main'
+
+workflow PURGE_BREAKER_MODELS {
+ take:
+ braker_gff3 // [ meta, gff3 ]
+ braker_hints // [ meta, gff ]
+ liftoff_gff3 // [ meta, gff3 ]
+ tsebra_config // val(tsebra_config)
+ braker_allow_isoforms // val(true|false)
+
+ main:
+ ch_versions = Channel.empty()
+
+ // MODULE: AGAT_CONVERTSPGFF2GTF
+ AGAT_CONVERTSPGFF2GTF ( braker_gff3 )
+
+ ch_braker_gtf = AGAT_CONVERTSPGFF2GTF.out.output_gtf
+ ch_versions = ch_versions.mix(AGAT_CONVERTSPGFF2GTF.out.versions.first())
+
+ // COLLECTFILE: Prepare for TSEBRA
+ ch_tsebra_input_gtf = ch_braker_gtf
+ | map { meta, gtf ->
+
+ def lines = gtf.readLines()
+ .collect { line ->
+ if ( line.startsWith('#') ) { return line }
+
+ def cols = line.split('\t')
+ def feat = cols[2]
+
+ if ( ! ( feat in [ 'gene', 'transcript', 'mRNA' ] ) ) { return line }
+
+ def atts = cols[8]
+ def matches = atts =~ /ID ([^;]*)/
+ def id = matches[0][1]
+
+ def feat_format = ( feat == 'mRNA' ) ? 'transcript' : feat
+
+ return ( cols[0..1] + [ feat_format ] + cols[3..7] + [ id ] ).join('\t')
+ }.join('\n')
+
+ [ "${meta.id}.clean.gtf" ] + [ lines ]
+ }
+ | collectFile(newLine: true)
+ | map { file ->
+ [ [ id: file.baseName.replace(".clean", "") ], file ]
+ }
+
+ // MODULE: TSEBRA
+ ch_tsebra_inputs = ch_tsebra_input_gtf
+ | join(braker_hints)
+ | combine(Channel.fromPath(tsebra_config))
+ TSEBRA(
+ ch_tsebra_inputs.map { meta, gtf, gff, cfg -> [ meta, [ gtf ] ] },
+ ch_tsebra_inputs.map { meta, gtf, gff, cfg -> [ gff ] },
+ [],
+ ch_tsebra_inputs.map { meta, gtf, gff, cfg -> cfg }
+ )
+
+ ch_tsebra_gtf = TSEBRA.out.tsebra_gtf
+ ch_versions = ch_versions.mix(TSEBRA.out.versions.first())
+
+ // COLLECTFILE: Format TSEBRA output
+ ch_tsebra_formatted_gtf = ch_tsebra_gtf
+ | map { meta, gtf ->
+
+ def lines = gtf.readLines()
+ .collect { line ->
+ if ( line.startsWith('#') ) { return line }
+
+ def cols = line.split('\t')
+ def atts_r = ''
+ // Remove attributes and use AGAT_CONVERTSPGXF2GXF
+ // to create attributes based on sequential layout
+
+ return ( cols[0..7] + [ atts_r ] ).join('\t')
+ }.join('\n')
+
+ [ "${meta.id}.gtf" ] + [ lines ]
+ }
+ | collectFile(newLine: true)
+ | map { file ->
+ [ [ id: file.baseName ], file ]
+ }
+
+ // MODULE: AGAT_CONVERTSPGXF2GXF
+ AGAT_CONVERTSPGXF2GXF ( ch_tsebra_formatted_gtf )
+
+ ch_tsebra_formatted_gff = AGAT_CONVERTSPGXF2GXF.out.output_gff
+ ch_versions = ch_versions.mix(AGAT_CONVERTSPGXF2GXF.out.versions.first())
+
+ // COLLECTFILE: Format AGAT_CONVERTSPGXF2GXF output
+ ch_tsebra_gff = ch_tsebra_formatted_gff
+ | map { meta, gff ->
+
+ def lines = gff.readLines()
+ .collect { line ->
+ if ( line.startsWith('#') ) { return line }
+
+ def cols = line.split('\t')
+ def atts_r = cols[8].replaceAll('-', '').replaceAll('agat', '')
+
+ return ( cols[0..7] + [ atts_r ] ).join('\t')
+ }.join('\n')
+
+ [ "${meta.id}.gff3" ] + [ lines ]
+ }
+ | collectFile(newLine: true)
+ | map { file ->
+ [ [ id: file.baseName ], file ]
+ }
+
+ // COLLECTFILE: Iso-form kill list if braker_allow_isoforms=true
+ ch_post_tsebra_kill_list = braker_allow_isoforms
+ ? Channel.empty()
+ : ch_tsebra_gff
+ | map { meta, gff ->
+ def kill_list = gff.readLines()
+ .findAll { line ->
+ if ( line.startsWith('#') ) { return false }
+
+ def cols = line.split('\t')
+ def feat = cols[2]
+
+ ( feat == 'mRNA' || feat == 'transcript' )
+ }
+ .collect { line ->
+ def cols = line.split('\t')
+ def atts = cols[8]
+ def tx_id = ( atts =~ /ID=([^;]*)/ )[0][1]
+ def g_id = ( atts =~ /Parent=([^;]*)/ )[0][1]
+
+ [ g_id, tx_id ]
+ }
+ .groupBy { g_id, tx_id -> g_id }
+ .findAll { key, value -> value.size() > 1 }
+ .collect { key, value ->
+ value.collect { it[1] }[1..-1]
+ }
+ .flatten()
+ .join('\n')
+
+ [ "${meta.id}.kill.list.txt" ] + [ kill_list ]
+ }
+ | collectFile(newLine: true)
+ | map { file ->
+ [ [ id: file.baseName.replace('.kill.list', '') ], file ]
+ }
+
+ // MODULE: AGAT_SPFILTERFEATUREFROMKILLLIST as KILL_TSEBRA_ISOFORMS
+ ch_tsebra_kill_inputs = ch_tsebra_gff
+ | join(ch_post_tsebra_kill_list)
+
+
+ KILL_TSEBRA_ISOFORMS(
+ ch_tsebra_kill_inputs.map { meta, gff, kill -> [ meta, gff ] },
+ ch_tsebra_kill_inputs.map { meta, gff, kill -> kill },
+ [] // default config
+ )
+
+ ch_tsebra_killed_gff = ch_tsebra_gff
+ | join(KILL_TSEBRA_ISOFORMS.out.gff, remainder: true)
+ | map { meta, tsebra, killed ->
+ if ( tsebra ) { [ meta, killed ?: tsebra ] }
+ }
+ ch_versions = ch_versions.mix(KILL_TSEBRA_ISOFORMS.out.versions.first())
+
+ // MODULE: GFFCOMPARE as COMPARE_BRAKER_TO_LIFTOFF
+ ch_comparison_inputs = ch_tsebra_killed_gff
+ | join(liftoff_gff3)
+
+
+ COMPARE_BRAKER_TO_LIFTOFF (
+ ch_comparison_inputs.map { meta, braker, liftoff -> [ meta, braker ] },
+ [ [], [], [] ],
+ ch_comparison_inputs.map { meta, braker, liftoff -> [ meta, liftoff ] },
+ )
+
+ ch_tracking = COMPARE_BRAKER_TO_LIFTOFF.out.tracking
+ ch_versions = ch_versions.mix(COMPARE_BRAKER_TO_LIFTOFF.out.versions.first())
+
+ // COLLECTFILE: Transcript level kill list
+ ch_kill_list = ch_tracking
+ | map { meta, tracking ->
+
+ def kept_lines = tracking.readLines()
+ .findAll { line ->
+ def cols = line.split('\t')
+
+ ( cols[3] != 'u' ) && ( cols[3] != 'p' )
+ }
+
+ def tx_kill_list = kept_lines
+ .collect { line ->
+ def cols = line.split('\t')
+
+ def matched = cols[4] =~ /q1:([^\|]+)\|([^\|]+)/
+
+ matched[0][2].trim()
+ }.join('\n')
+
+ [ "${meta.id}.kill.list.txt" ] + tx_kill_list
+ }
+ | collectFile(newLine: true)
+ | map { file ->
+ [ [ id: file.baseName.replace('.kill.list', '') ], file ]
+ }
+
+ // MODULE: AGAT_SPFILTERFEATUREFROMKILLLIST
+ ch_agat_kill_inputs = ch_tsebra_killed_gff
+ | join(ch_kill_list)
+
+
+ AGAT_SPFILTERFEATUREFROMKILLLIST(
+ ch_agat_kill_inputs.map { meta, gff, kill -> [ meta, gff ] },
+ ch_agat_kill_inputs.map { meta, gff, kill -> kill },
+ [] // default config
+ )
+
+ ch_braker_purged_gff = AGAT_SPFILTERFEATUREFROMKILLLIST.out.gff
+ ch_versions = ch_versions.mix(AGAT_SPFILTERFEATUREFROMKILLLIST.out.versions.first())
+
+ // Handle case where liftoff is not present
+ ch_all_braker_gff = ch_tsebra_killed_gff
+ | join(ch_braker_purged_gff, remainder:true)
+ | map { meta, tsebra_gff, purged_gff ->
+ if ( purged_gff ) { return [ meta, purged_gff ] }
+ if ( tsebra_gff ) { return [ meta, tsebra_gff ] }
+ }
+
+ emit:
+ braker_purged_gff = ch_all_braker_gff // [ meta, gff3 ]
+ versions = ch_versions // [ versions.yml ]
+}
diff --git a/subworkflows/local/purge_nohit_models.nf b/subworkflows/local/purge_nohit_models.nf
new file mode 100644
index 0000000..d213dc2
--- /dev/null
+++ b/subworkflows/local/purge_nohit_models.nf
@@ -0,0 +1,65 @@
+include { AGAT_SPFILTERFEATUREFROMKILLLIST } from '../../modules/pfr/agat/spfilterfeaturefromkilllist/main'
+
+workflow PURGE_NOHIT_MODELS {
+ take:
+ ch_target_gff // [ meta, gff ]
+ ch_eggnogmapper_hits // [ meta, hits ]
+ val_purge_nohits // val(true|false)
+
+ main:
+ ch_versions = Channel.empty()
+
+ // COLLECTFILE: Transcript level kill list
+ ch_kill_list = ch_target_gff
+ | join(ch_eggnogmapper_hits)
+ | map { meta, gff, hits ->
+
+ def tx_with_hits = hits.readLines()
+ .collect { it.split('\t')[0] }
+ .sort(false)
+ .unique()
+
+ def tx_in_gff = gff.readLines()
+ .findAll { line ->
+ if ( line.startsWith('#') || line == '' ) { return false }
+
+ def feat = line.split('\t')[2]
+ ( feat == 'transcript' || feat == 'mRNA' )
+ }
+ .collect { it ->
+ def attrs = it.split('\t')[8]
+
+ ( attrs =~ /ID=([^;]*)/ )[0][1]
+ }
+ .sort(false)
+ .unique()
+
+ def tx_without_hits = tx_in_gff - tx_with_hits
+
+ [ "${meta.id}.kill.list.txt" ] + tx_without_hits.join('\n')
+ }
+ | collectFile(newLine: true)
+ | map { file ->
+ [ [ id: file.baseName.replace('.kill.list', '') ], file ]
+ }
+
+ // MODULE: AGAT_SPFILTERFEATUREFROMKILLLIST
+ ch_agat_kill_inputs = ! val_purge_nohits
+ ? Channel.empty()
+ : ch_target_gff
+ | join(ch_kill_list)
+
+
+ AGAT_SPFILTERFEATUREFROMKILLLIST(
+ ch_agat_kill_inputs.map { meta, gff, kill -> [ meta, gff ] },
+ ch_agat_kill_inputs.map { meta, gff, kill -> kill },
+ [] // default config
+ )
+
+ ch_target_purged_gff = AGAT_SPFILTERFEATUREFROMKILLLIST.out.gff
+ ch_versions = ch_versions.mix(AGAT_SPFILTERFEATUREFROMKILLLIST.out.versions.first())
+
+ emit:
+ purged_gff = ch_target_purged_gff.mix(val_purge_nohits ? Channel.empty() : ch_target_gff)
+ versions = ch_versions // [ versions.yml ]
+}
diff --git a/subworkflows/nf-core/fastq_fastqc_umitools_fastp/main.nf b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/main.nf
index 2c67b3c..833d82b 100644
--- a/subworkflows/nf-core/fastq_fastqc_umitools_fastp/main.nf
+++ b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/main.nf
@@ -20,6 +20,19 @@ def getFastpReadsAfterFiltering(json_file, min_trimmed_reads) {
return json['after_filtering']['total_reads'].toLong()
}
+def getFastpAdapterSequence(json_file){
+
+ if (!json_file.text) { return "" } // Usman Rashid: To allow -stub with FASTP
+
+ def Map json = (Map) new JsonSlurper().parseText(json_file.text)
+ try{
+ adapter = json['adapter_cutting']['read1_adapter_sequence']
+ } catch(Exception ex){
+ adapter = ""
+ }
+ return adapter
+}
+
workflow FASTQ_FASTQC_UMITOOLS_FASTP {
take:
reads // channel: [ val(meta), [ reads ] ]
@@ -28,7 +41,7 @@ workflow FASTQ_FASTQC_UMITOOLS_FASTP {
skip_umi_extract // boolean: true/false
umi_discard_read // integer: 0, 1 or 2
skip_trimming // boolean: true/false
- adapter_fasta // file: adapter.fasta
+ adapter_fasta // file: adapter.fasta
save_trimmed_fail // boolean: true/false
save_merged // boolean: true/false
min_trimmed_reads // integer: > 0
@@ -78,6 +91,8 @@ workflow FASTQ_FASTQC_UMITOOLS_FASTP {
fastqc_trim_html = Channel.empty()
fastqc_trim_zip = Channel.empty()
trim_read_count = Channel.empty()
+ adapter_seq = Channel.empty()
+
if (!skip_trimming) {
FASTP (
umi_reads,
@@ -111,6 +126,10 @@ workflow FASTQ_FASTQC_UMITOOLS_FASTP {
.map { meta, reads, num_reads -> [ meta, num_reads ] }
.set { trim_read_count }
+ trim_json
+ .map { meta, json -> [meta, getFastpAdapterSequence(json)] }
+ .set { adapter_seq }
+
if (!skip_fastqc) {
FASTQC_TRIM (
trim_reads
@@ -128,6 +147,7 @@ workflow FASTQ_FASTQC_UMITOOLS_FASTP {
fastqc_raw_zip // channel: [ val(meta), [ zip ] ]
umi_log // channel: [ val(meta), [ log ] ]
+ adapter_seq // channel: [ val(meta), [ adapter_seq] ]
trim_json // channel: [ val(meta), [ json ] ]
trim_html // channel: [ val(meta), [ html ] ]
diff --git a/subworkflows/nf-core/fastq_fastqc_umitools_fastp/meta.yml b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/meta.yml
index 220e8db..9308fe9 100644
--- a/subworkflows/nf-core/fastq_fastqc_umitools_fastp/meta.yml
+++ b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/meta.yml
@@ -43,7 +43,7 @@ input:
- skip_trimming:
type: boolean
description: |
- Allows to skip trimgalore execution
+ Allows to skip FastP execution
- adapter_fasta:
type: file
description: |
@@ -70,10 +70,7 @@ output:
type: file
description: >
Extracted FASTQ files. | For single-end reads, pattern is \${prefix}.umi_extract.fastq.gz. |
-
-
-
- For paired-end reads, pattern is \${prefix}.umi_extract_{1,2}.fastq.gz.
+ For paired-end reads, pattern is \${prefix}.umi_extract_{1,2}.fastq.gz.
pattern: "*.{fastq.gz}"
- fastqc_html:
type: file
@@ -118,6 +115,10 @@ output:
type: file
description: FastQC report archive
pattern: "*_{fastqc.zip}"
+ - adapter_seq:
+ type: string
+ description: |
+ Adapter Sequence found in read1
- versions:
type: file
description: File containing software versions
diff --git a/subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/main.nf.test b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/main.nf.test
index cdd7398..961b5b4 100644
--- a/subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/main.nf.test
+++ b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/main.nf.test
@@ -3,6 +3,8 @@ nextflow_workflow {
name "Test Workflow FASTQ_FASTQC_UMITOOLS_FASTP"
script "../main.nf"
workflow "FASTQ_FASTQC_UMITOOLS_FASTP"
+ config './nextflow.config'
+
tag "subworkflows"
tag "subworkflows_nfcore"
tag "subworkflows/fastq_fastqc_umitools_fastp"
@@ -17,22 +19,518 @@ nextflow_workflow {
when {
workflow {
"""
- input[0] = [
- [ id:'test', single_end:false ], // meta map
- [
- file(params.test_data['sarscov2']['illumina']['test_1_fastq_gz'], checkIfExists: true),
- file(params.test_data['sarscov2']['illumina']['test_2_fastq_gz'], checkIfExists: true)
- ]
- ]
- input[1] = false // skip_fastqc
- input[2] = false // with_umi
- input[3] = false // skip_umi_extract
- input[4] = 1 // umi_discard_read
- input[5] = false // skip_trimming
- input[6] = [] // adapter_fasta
- input[7] = false // save_trimmed_fail
- input[8] = false // save_merged
- input[9] = 1 // min_trimmed_reads
+ skip_fastqc = false
+ with_umi = false
+ skip_umi_extract = false
+ umi_discard_read = 1
+ skip_trimming = false
+ adapter_fasta = []
+ save_trimmed_fail = false
+ save_merged = false
+ min_trimmed_reads = 1
+
+ input[0] = Channel.of([
+ [ id:'test', single_end:false ], // meta map
+ [
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true)
+ ]
+ ])
+ input[1] = skip_fastqc
+ input[2] = with_umi
+ input[3] = skip_umi_extract
+ input[4] = umi_discard_read
+ input[5] = skip_trimming
+ input[6] = adapter_fasta
+ input[7] = save_trimmed_fail
+ input[8] = save_merged
+ input[9] = min_trimmed_reads
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert workflow.success },
+ { assert snapshot(
+ workflow.out.reads,
+ workflow.out.umi_log,
+ workflow.out.trim_json,
+ workflow.out.trim_reads_fail,
+ workflow.out.trim_reads_merged,
+ workflow.out.adapter_seq,
+ workflow.out.trim_read_count,
+ workflow.out.versions
+ ).match()
+ },
+
+ { assert workflow.out.fastqc_raw_html },
+ { assert workflow.out.fastqc_raw_zip },
+ { assert workflow.out.trim_html },
+ { assert workflow.out.trim_log },
+ { assert workflow.out.fastqc_trim_html },
+ { assert workflow.out.fastqc_trim_zip }
+ )
+ }
+ }
+
+ test("skip_fastqc") {
+
+ when {
+ workflow {
+ """
+ skip_fastqc = true
+ with_umi = false
+ skip_umi_extract = false
+ umi_discard_read = 1
+ skip_trimming = false
+ adapter_fasta = []
+ save_trimmed_fail = false
+ save_merged = false
+ min_trimmed_reads = 1
+
+ input[0] = Channel.of([
+ [ id:'test', single_end: false ], // meta map
+ [
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true)
+ ]
+ ])
+ input[1] = skip_fastqc
+ input[2] = with_umi
+ input[3] = skip_umi_extract
+ input[4] = umi_discard_read
+ input[5] = skip_trimming
+ input[6] = adapter_fasta
+ input[7] = save_trimmed_fail
+ input[8] = save_merged
+ input[9] = min_trimmed_reads
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert workflow.success },
+ { assert snapshot(
+ workflow.out.reads,
+ workflow.out.umi_log,
+ workflow.out.trim_json,
+ workflow.out.trim_reads_fail,
+ workflow.out.trim_reads_merged,
+ workflow.out.adapter_seq,
+ workflow.out.trim_read_count,
+ workflow.out.versions
+ ).match()
+ },
+
+ { assert !workflow.out.fastqc_raw_html },
+ { assert !workflow.out.fastqc_raw_zip },
+ { assert workflow.out.trim_html },
+ { assert workflow.out.trim_log },
+ { assert !workflow.out.fastqc_trim_html },
+ { assert !workflow.out.fastqc_trim_zip }
+ )
+ }
+ }
+
+ test("with_umi") {
+
+ when {
+ workflow {
+ """
+ skip_fastqc = false
+ with_umi = true
+ skip_umi_extract = false
+ umi_discard_read = 1
+ skip_trimming = false
+ adapter_fasta = []
+ save_trimmed_fail = false
+ save_merged = false
+ min_trimmed_reads = 1
+
+ input[0] = Channel.of([
+ [ id:'test', single_end:false ], // meta map
+ [
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true)
+ ]
+ ])
+ input[1] = skip_fastqc
+ input[2] = with_umi
+ input[3] = skip_umi_extract
+ input[4] = umi_discard_read
+ input[5] = skip_trimming
+ input[6] = adapter_fasta
+ input[7] = save_trimmed_fail
+ input[8] = save_merged
+ input[9] = min_trimmed_reads
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert workflow.success },
+ { assert snapshot(
+ workflow.out.reads,
+ workflow.out.trim_json,
+ workflow.out.trim_reads_fail,
+ workflow.out.trim_reads_merged,
+ workflow.out.adapter_seq,
+ workflow.out.trim_read_count,
+ workflow.out.versions
+ ).match()
+ },
+
+ { assert workflow.out.fastqc_raw_html },
+ { assert workflow.out.fastqc_raw_zip },
+ { assert workflow.out.trim_html },
+ { assert workflow.out.trim_log },
+ { assert workflow.out.fastqc_trim_html },
+ { assert workflow.out.fastqc_trim_zip }
+ )
+ }
+ }
+
+
+ test("skip_umi_extract") {
+
+ when {
+ workflow {
+ """
+ skip_fastqc = false
+ with_umi = true
+ skip_umi_extract = true
+ umi_discard_read = 1
+ skip_trimming = false
+ adapter_fasta = []
+ save_trimmed_fail = false
+ save_merged = false
+ min_trimmed_reads = 1
+
+ input[0] = Channel.of([
+ [ id:'test', single_end:false ], // meta map
+ [
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true)
+ ]
+ ])
+ input[1] = skip_fastqc
+ input[2] = with_umi
+ input[3] = skip_umi_extract
+ input[4] = umi_discard_read
+ input[5] = skip_trimming
+ input[6] = adapter_fasta
+ input[7] = save_trimmed_fail
+ input[8] = save_merged
+ input[9] = min_trimmed_reads
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert workflow.success },
+ { assert snapshot(
+ workflow.out.reads,
+ workflow.out.umi_log,
+ workflow.out.trim_json,
+ workflow.out.trim_reads_fail,
+ workflow.out.trim_reads_merged,
+ workflow.out.adapter_seq,
+ workflow.out.trim_read_count,
+ workflow.out.versions
+ ).match()
+ },
+
+ { assert workflow.out.fastqc_raw_html },
+ { assert workflow.out.fastqc_raw_zip },
+ { assert workflow.out.trim_html },
+ { assert workflow.out.trim_log },
+ { assert workflow.out.fastqc_trim_html },
+ { assert workflow.out.fastqc_trim_zip }
+ )
+ }
+ }
+
+ test("umi_discard_read = 2") {
+
+ when {
+ workflow {
+ """
+ skip_fastqc = false
+ with_umi = true
+ skip_umi_extract = true
+ umi_discard_read = 2
+ skip_trimming = false
+ adapter_fasta = []
+ save_trimmed_fail = false
+ save_merged = false
+ min_trimmed_reads = 1
+
+ input[0] = Channel.of([
+ [ id:'test', single_end:false ], // meta map
+ [
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true)
+ ]
+ ])
+ input[1] = skip_fastqc
+ input[2] = with_umi
+ input[3] = skip_umi_extract
+ input[4] = umi_discard_read
+ input[5] = skip_trimming
+ input[6] = adapter_fasta
+ input[7] = save_trimmed_fail
+ input[8] = save_merged
+ input[9] = min_trimmed_reads
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert workflow.success },
+ { assert snapshot(
+ workflow.out.reads,
+ workflow.out.umi_log,
+ workflow.out.trim_json,
+ workflow.out.trim_reads_fail,
+ workflow.out.trim_reads_merged,
+ workflow.out.adapter_seq,
+ workflow.out.trim_read_count,
+ workflow.out.versions
+ ).match()
+ },
+
+ { assert workflow.out.fastqc_raw_html },
+ { assert workflow.out.fastqc_raw_zip },
+ { assert workflow.out.trim_html },
+ { assert workflow.out.trim_log },
+ { assert workflow.out.fastqc_trim_html },
+ { assert workflow.out.fastqc_trim_zip }
+ )
+ }
+ }
+
+ test("skip_trimming") {
+
+ when {
+ workflow {
+ """
+ skip_fastqc = false
+ with_umi = false
+ skip_umi_extract = false
+ umi_discard_read = 1
+ skip_trimming = true
+ adapter_fasta = []
+ save_trimmed_fail = false
+ save_merged = false
+ min_trimmed_reads = 1
+
+ input[0] = Channel.of([
+ [ id:'test', single_end:false ], // meta map
+ [
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true)
+ ]
+ ])
+ input[1] = skip_fastqc
+ input[2] = with_umi
+ input[3] = skip_umi_extract
+ input[4] = umi_discard_read
+ input[5] = skip_trimming
+ input[6] = adapter_fasta
+ input[7] = save_trimmed_fail
+ input[8] = save_merged
+ input[9] = min_trimmed_reads
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert workflow.success },
+ { assert snapshot(
+ workflow.out.reads.get(0).get(0), // Reads meta map
+ // Because the input file is passed to the output file, we have to do check the filename only
+ file(workflow.out.reads.get(0).get(1).get(0)).name,
+ file(workflow.out.reads.get(0).get(1).get(1)).name,
+ workflow.out.umi_log,
+ workflow.out.trim_json,
+ workflow.out.trim_reads_fail,
+ workflow.out.trim_reads_merged,
+ workflow.out.adapter_seq,
+ workflow.out.trim_read_count,
+ workflow.out.versions
+ ).match()
+ },
+
+ { assert workflow.out.fastqc_raw_html },
+ { assert workflow.out.fastqc_raw_zip },
+ { assert !workflow.out.trim_html },
+ { assert !workflow.out.trim_log },
+ { assert !workflow.out.fastqc_trim_html },
+ { assert !workflow.out.fastqc_trim_zip }
+ )
+ }
+ }
+
+ test("save_trimmed_fail") {
+
+ config './nextflow.save_trimmed.config'
+
+ when {
+ workflow {
+ """
+ skip_fastqc = false
+ with_umi = false
+ skip_umi_extract = false
+ umi_discard_read = 1
+ skip_trimming = false
+ adapter_fasta = []
+ save_trimmed_fail = true
+ save_merged = false
+ min_trimmed_reads = 1
+
+ input[0] = Channel.of([
+ [ id:'test', single_end:false ], // meta map
+ [
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true)
+ ]
+ ])
+ input[1] = skip_fastqc
+ input[2] = with_umi
+ input[3] = skip_umi_extract
+ input[4] = umi_discard_read
+ input[5] = skip_trimming
+ input[6] = adapter_fasta
+ input[7] = save_trimmed_fail
+ input[8] = save_merged
+ input[9] = min_trimmed_reads
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert workflow.success },
+ { assert snapshot(
+ workflow.out.reads,
+ workflow.out.umi_log,
+ workflow.out.trim_json,
+ workflow.out.trim_reads_fail,
+ workflow.out.trim_reads_merged,
+ workflow.out.adapter_seq,
+ workflow.out.trim_read_count,
+ workflow.out.versions
+ ).match()
+ },
+
+ { assert workflow.out.fastqc_raw_html },
+ { assert workflow.out.fastqc_raw_zip },
+ { assert workflow.out.trim_html },
+ { assert workflow.out.trim_log },
+ { assert workflow.out.fastqc_trim_html },
+ { assert workflow.out.fastqc_trim_zip }
+ )
+ }
+ }
+
+ test("save_merged") {
+
+ when {
+ workflow {
+ """
+ skip_fastqc = false
+ with_umi = false
+ skip_umi_extract = false
+ umi_discard_read = 1
+ skip_trimming = false
+ adapter_fasta = []
+ save_trimmed_fail = false
+ save_merged = true
+ min_trimmed_reads = 1
+
+ input[0] = Channel.of([
+ [ id:'test', single_end:false ], // meta map
+ [
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true)
+ ]
+ ])
+ input[1] = skip_fastqc
+ input[2] = with_umi
+ input[3] = skip_umi_extract
+ input[4] = umi_discard_read
+ input[5] = skip_trimming
+ input[6] = adapter_fasta
+ input[7] = save_trimmed_fail
+ input[8] = save_merged
+ input[9] = min_trimmed_reads
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert workflow.success },
+ { assert snapshot(
+ workflow.out.reads,
+ workflow.out.umi_log,
+ workflow.out.trim_json,
+ workflow.out.trim_reads_fail,
+ workflow.out.trim_reads_merged,
+ workflow.out.adapter_seq,
+ workflow.out.trim_read_count,
+ workflow.out.versions
+ ).match()
+ },
+
+ { assert workflow.out.fastqc_raw_html },
+ { assert workflow.out.fastqc_raw_zip },
+ { assert workflow.out.trim_html },
+ { assert workflow.out.trim_log },
+ { assert workflow.out.fastqc_trim_html },
+ { assert workflow.out.fastqc_trim_zip }
+ )
+ }
+ }
+
+ test("min_trimmed_reads = 26") {
+ // Subworkflow should stop after FASTP which trims down to 25 reads
+
+ when {
+ workflow {
+ """
+ skip_fastqc = false
+ with_umi = false
+ skip_umi_extract = false
+ umi_discard_read = 1
+ skip_trimming = false
+ adapter_fasta = []
+ save_trimmed_fail = false
+ save_merged = true
+ min_trimmed_reads = 26
+
+ input[0] = Channel.of([
+ [ id:'test', single_end:false ], // meta map
+ [
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_1.fastq.gz', checkIfExists: true),
+ file(params.modules_testdata_base_path + 'genomics/sarscov2/illumina/fastq/test_2.fastq.gz', checkIfExists: true)
+ ]
+ ])
+ input[1] = skip_fastqc
+ input[2] = with_umi
+ input[3] = skip_umi_extract
+ input[4] = umi_discard_read
+ input[5] = skip_trimming
+ input[6] = adapter_fasta
+ input[7] = save_trimmed_fail
+ input[8] = save_merged
+ input[9] = min_trimmed_reads
"""
}
}
@@ -40,13 +538,17 @@ nextflow_workflow {
then {
assertAll(
{ assert workflow.success },
- { assert snapshot(workflow.out.reads).match("reads") },
- { assert snapshot(workflow.out.umi_log).match("umi_log") },
- { assert snapshot(workflow.out.trim_json).match("trim_json") },
- { assert snapshot(workflow.out.trim_reads_fail).match("trim_reads_fail") },
- { assert snapshot(workflow.out.trim_reads_merged).match("trim_reads_merged") },
- { assert snapshot(workflow.out.trim_read_count).match("trim_read_count") },
- { assert snapshot(workflow.out.versions).match("versions") },
+ { assert snapshot(
+ workflow.out.reads,
+ workflow.out.umi_log,
+ workflow.out.trim_json,
+ workflow.out.trim_reads_fail,
+ workflow.out.trim_reads_merged,
+ workflow.out.adapter_seq,
+ workflow.out.trim_read_count,
+ workflow.out.versions
+ ).match()
+ },
{ assert workflow.out.fastqc_raw_html },
{ assert workflow.out.fastqc_raw_zip },
diff --git a/subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/main.nf.test.snap b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/main.nf.test.snap
index 38a65ae..3e11d9e 100644
--- a/subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/main.nf.test.snap
+++ b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/main.nf.test.snap
@@ -1,32 +1,215 @@
{
- "trim_reads_merged": {
+ "skip_fastqc": {
"content": [
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ [
+ "test_1.fastp.fastq.gz:md5,67b2bbae47f073e05a97a9c2edce23c7",
+ "test_2.fastp.fastq.gz:md5,25cbdca08e2083dbd4f0502de6b62f39"
+ ]
+ ]
+ ],
+ [
+
+ ],
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.fastp.json:md5,1e0f8e27e71728e2b63fc64086be95cd"
+ ]
+ ],
[
+ ],
+ [
+
+ ],
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "unspecified"
+ ]
+ ],
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ 198
+ ]
+ ],
+ [
+ "versions.yml:md5,85bd0117e5778fff18e3920972a296ad"
]
],
- "timestamp": "2023-11-26T02:28:26.26920982"
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-03-18T16:53:49.315194"
},
- "trim_reads_fail": {
+ "save_trimmed_fail": {
"content": [
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ [
+ "test_1.fastp.fastq.gz:md5,6ff32a64c5188b9a9192be1398c262c7",
+ "test_2.fastp.fastq.gz:md5,db0cb7c9977e94ac2b4b446ebd017a8a"
+ ]
+ ]
+ ],
[
+ ],
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.fastp.json:md5,4c3268ddb50ea5b33125984776aa3519"
+ ]
+ ],
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ [
+ "test.paired.fail.fastq.gz:md5,409b687c734cedd7a1fec14d316e1366",
+ "test_1.fail.fastq.gz:md5,4f273cf3159c13f79e8ffae12f5661f6",
+ "test_2.fail.fastq.gz:md5,f97b9edefb5649aab661fbc9e71fc995"
+ ]
+ ]
+ ],
+ [
+
+ ],
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "unspecified"
+ ]
+ ],
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ 162
+ ]
+ ],
+ [
+ "versions.yml:md5,85bd0117e5778fff18e3920972a296ad",
+ "versions.yml:md5,c50aa59475ab901bc6f9a2cf7b1a14e0",
+ "versions.yml:md5,f3dcaae948e8eed92b4a5557b4c6668e"
]
],
- "timestamp": "2023-11-26T02:28:26.25861515"
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-03-18T16:51:45.34934"
},
- "versions": {
+ "skip_umi_extract": {
"content": [
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ [
+ "test_1.fastp.fastq.gz:md5,67b2bbae47f073e05a97a9c2edce23c7",
+ "test_2.fastp.fastq.gz:md5,25cbdca08e2083dbd4f0502de6b62f39"
+ ]
+ ]
+ ],
+ [
+
+ ],
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.fastp.json:md5,1e0f8e27e71728e2b63fc64086be95cd"
+ ]
+ ],
+ [
+
+ ],
+ [
+
+ ],
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "unspecified"
+ ]
+ ],
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ 198
+ ]
+ ],
[
"versions.yml:md5,85bd0117e5778fff18e3920972a296ad",
"versions.yml:md5,c50aa59475ab901bc6f9a2cf7b1a14e0",
"versions.yml:md5,f3dcaae948e8eed92b4a5557b4c6668e"
]
],
- "timestamp": "2023-11-26T02:28:26.30891403"
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-03-18T12:07:40.34249"
},
- "trim_json": {
+ "umi_discard_read = 2": {
"content": [
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ [
+ "test_1.fastp.fastq.gz:md5,67b2bbae47f073e05a97a9c2edce23c7",
+ "test_2.fastp.fastq.gz:md5,25cbdca08e2083dbd4f0502de6b62f39"
+ ]
+ ]
+ ],
+ [
+
+ ],
[
[
{
@@ -35,11 +218,44 @@
},
"test.fastp.json:md5,1e0f8e27e71728e2b63fc64086be95cd"
]
+ ],
+ [
+
+ ],
+ [
+
+ ],
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "unspecified"
+ ]
+ ],
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ 198
+ ]
+ ],
+ [
+ "versions.yml:md5,85bd0117e5778fff18e3920972a296ad",
+ "versions.yml:md5,c50aa59475ab901bc6f9a2cf7b1a14e0",
+ "versions.yml:md5,f3dcaae948e8eed92b4a5557b4c6668e"
]
],
- "timestamp": "2023-11-26T02:28:26.24768259"
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-03-18T12:08:24.141938"
},
- "reads": {
+ "save_merged": {
"content": [
[
[
@@ -48,24 +264,142 @@
"single_end": false
},
[
- "test_1.fastp.fastq.gz:md5,67b2bbae47f073e05a97a9c2edce23c7",
- "test_2.fastp.fastq.gz:md5,25cbdca08e2083dbd4f0502de6b62f39"
+ "test_1.fastp.fastq.gz:md5,54b726a55e992a869fd3fa778afe1672",
+ "test_2.fastp.fastq.gz:md5,29d3b33b869f7b63417b8ff07bb128ba"
]
]
+ ],
+ [
+
+ ],
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.fastp.json:md5,b712fd68ed0322f4bec49ff2a5237fcc"
+ ]
+ ],
+ [
+
+ ],
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.merged.fastq.gz:md5,c873bb1ab3fa859dcc47306465e749d5"
+ ]
+ ],
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "unspecified"
+ ]
+ ],
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ 75
+ ]
+ ],
+ [
+ "versions.yml:md5,85bd0117e5778fff18e3920972a296ad",
+ "versions.yml:md5,c50aa59475ab901bc6f9a2cf7b1a14e0",
+ "versions.yml:md5,f3dcaae948e8eed92b4a5557b4c6668e"
]
],
- "timestamp": "2023-12-04T11:30:32.061644815"
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-03-18T12:10:18.546963"
},
- "umi_log": {
+ "skip_trimming": {
"content": [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test_1.fastq.gz",
+ "test_2.fastq.gz",
+ [
+
+ ],
+ [
+
+ ],
+ [
+
+ ],
+ [
+
+ ],
+ [
+
+ ],
[
+ ],
+ [
+ "versions.yml:md5,f3dcaae948e8eed92b4a5557b4c6668e"
]
],
- "timestamp": "2023-11-26T02:28:26.238536"
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-03-19T15:49:26.574759"
},
- "trim_read_count": {
+ "sarscov2 paired-end [fastq]": {
"content": [
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ [
+ "test_1.fastp.fastq.gz:md5,67b2bbae47f073e05a97a9c2edce23c7",
+ "test_2.fastp.fastq.gz:md5,25cbdca08e2083dbd4f0502de6b62f39"
+ ]
+ ]
+ ],
+ [
+
+ ],
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.fastp.json:md5,1e0f8e27e71728e2b63fc64086be95cd"
+ ]
+ ],
+ [
+
+ ],
+ [
+
+ ],
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "unspecified"
+ ]
+ ],
[
[
{
@@ -74,8 +408,142 @@
},
198
]
+ ],
+ [
+ "versions.yml:md5,85bd0117e5778fff18e3920972a296ad",
+ "versions.yml:md5,c50aa59475ab901bc6f9a2cf7b1a14e0",
+ "versions.yml:md5,f3dcaae948e8eed92b4a5557b4c6668e"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-03-18T16:53:39.139038"
+ },
+ "min_trimmed_reads = 26": {
+ "content": [
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ [
+ "test_1.fastp.fastq.gz:md5,54b726a55e992a869fd3fa778afe1672",
+ "test_2.fastp.fastq.gz:md5,29d3b33b869f7b63417b8ff07bb128ba"
+ ]
+ ]
+ ],
+ [
+
+ ],
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.fastp.json:md5,b712fd68ed0322f4bec49ff2a5237fcc"
+ ]
+ ],
+ [
+
+ ],
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "test.merged.fastq.gz:md5,c873bb1ab3fa859dcc47306465e749d5"
+ ]
+ ],
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ "unspecified"
+ ]
+ ],
+ [
+ [
+ {
+ "id": "test",
+ "single_end": false
+ },
+ 75
+ ]
+ ],
+ [
+ "versions.yml:md5,85bd0117e5778fff18e3920972a296ad",
+ "versions.yml:md5,c50aa59475ab901bc6f9a2cf7b1a14e0",
+ "versions.yml:md5,f3dcaae948e8eed92b4a5557b4c6668e"
+ ]
+ ],
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-03-18T11:52:23.849945"
+ },
+ "with_umi": {
+ "content": [
+ [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.fastp.fastq.gz:md5,ba8c6c3a7ce718d9a2c5857e2edf53bc"
+ ]
+ ],
+ [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ "test.fastp.json:md5,d39c5c6d9a2e35fb60d26ced46569af6"
+ ]
+ ],
+ [
+
+ ],
+ [
+
+ ],
+ [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ ""
+ ]
+ ],
+ [
+ [
+ {
+ "id": "test",
+ "single_end": true
+ },
+ 99
+ ]
+ ],
+ [
+ "versions.yml:md5,01f264f78de3c6d893c449cc6d3cd721",
+ "versions.yml:md5,85bd0117e5778fff18e3920972a296ad",
+ "versions.yml:md5,c50aa59475ab901bc6f9a2cf7b1a14e0",
+ "versions.yml:md5,f3dcaae948e8eed92b4a5557b4c6668e"
]
],
- "timestamp": "2023-11-26T02:28:26.27984169"
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-03-18T17:31:09.193212"
}
}
\ No newline at end of file
diff --git a/subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/nextflow.config b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/nextflow.config
new file mode 100644
index 0000000..12f7b25
--- /dev/null
+++ b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/nextflow.config
@@ -0,0 +1,11 @@
+process {
+
+ withName: UMITOOLS_EXTRACT {
+ ext.args = '--bc-pattern="NNNN" --bc-pattern2="NNNN"'
+ }
+
+ withName: UMICOLLAPSE {
+ ext.prefix = { "${meta.id}.dedup" }
+ }
+
+}
\ No newline at end of file
diff --git a/subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/nextflow.save_trimmed.config b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/nextflow.save_trimmed.config
new file mode 100644
index 0000000..2430e9d
--- /dev/null
+++ b/subworkflows/nf-core/fastq_fastqc_umitools_fastp/tests/nextflow.save_trimmed.config
@@ -0,0 +1,6 @@
+process {
+ // Make filtering more aggressive to make more reads fail
+ withName: FASTP {
+ ext.args = "-e 30"
+ }
+}
\ No newline at end of file
diff --git a/subworkflows/pfr/fasta_edta_lai/main.nf b/subworkflows/pfr/fasta_edta_lai/main.nf
index 2e73ca5..628e255 100644
--- a/subworkflows/pfr/fasta_edta_lai/main.nf
+++ b/subworkflows/pfr/fasta_edta_lai/main.nf
@@ -1,33 +1,45 @@
-include { CUSTOM_SHORTENFASTAIDS } from '../../../modules/pfr/custom/shortenfastaids'
-include { EDTA_EDTA } from '../../../modules/pfr/edta/edta'
-include { LAI } from '../../../modules/pfr/lai'
-include { CUSTOM_RESTOREGFFIDS } from '../../../modules/pfr/custom/restoregffids'
+include { CUSTOM_SHORTENFASTAIDS } from '../../../modules/pfr/custom/shortenfastaids/main'
+include { EDTA_EDTA } from '../../../modules/pfr/edta/edta/main'
+include { LTRRETRIEVER_LAI } from '../../../modules/pfr/ltrretriever/lai/main'
+include { CUSTOM_RESTOREGFFIDS } from '../../../modules/pfr/custom/restoregffids/main'
workflow FASTA_EDTA_LAI {
take:
- ch_fasta // channel: [ val(meta), fasta ]
- ch_monoploid_seqs // channel: [ val(meta), txt ]; Optional: Set to [] if not needed
- skip_lai // val; true|false
+ ch_fasta // channel: [ val(meta), fasta ]
+ ch_monoploid_seqs // channel: [ val(meta), txt ]; Optional: Set to [] if not needed
+ skip_lai // val; true|false
main:
-
- ch_versions = Channel.empty()
+ ch_versions = Channel.empty()
// MOUDLE: CUSTOM_SHORTENFASTAIDS
CUSTOM_SHORTENFASTAIDS ( ch_fasta )
- ch_short_ids_fasta = ch_fasta
- | join(CUSTOM_SHORTENFASTAIDS.out.short_ids_fasta, by:0, remainder:true)
- | map { meta, fasta, short_ids_fasta ->
- [ meta, short_ids_fasta ?: fasta ]
- }
+ ch_short_ids_fasta = ch_fasta
+ | join(CUSTOM_SHORTENFASTAIDS.out.short_ids_fasta, by:0, remainder:true)
+ | map { meta, fasta, short_ids_fasta ->
+ if ( fasta ) { [ meta, short_ids_fasta ?: fasta ] }
+ }
+
+ ch_short_ids_tsv = CUSTOM_SHORTENFASTAIDS.out.short_ids_tsv
+ ch_short_monoploid_seqs = ch_short_ids_tsv
+ | join(
+ ch_monoploid_seqs ?: Channel.empty()
+ )
+ | map { meta, short_ids_tsv, monoploid_seqs ->
+ map_monoploid_seqs_to_new_ids(meta, short_ids_tsv, monoploid_seqs)
+ }
+ | collectFile(newLine:true)
+ | map { seqs ->
+ def id = seqs.name.split('.mapped.monoploid.seqs.txt')[0]
- ch_short_ids_tsv = CUSTOM_SHORTENFASTAIDS.out.short_ids_tsv
- ch_versions = ch_versions.mix(CUSTOM_SHORTENFASTAIDS.out.versions.first())
+ [ [ id: id ], seqs ]
+ }
+ ch_versions = ch_versions.mix(CUSTOM_SHORTENFASTAIDS.out.versions.first())
// MODULE: EDTA_EDTA
- EDTA_EDTA (
+ EDTA_EDTA(
ch_short_ids_fasta,
[],
[],
@@ -35,54 +47,85 @@ workflow FASTA_EDTA_LAI {
[]
)
- ch_te_lib_fasta = EDTA_EDTA.out.te_lib_fasta
- ch_pass_list = EDTA_EDTA.out.pass_list
- ch_out_file = EDTA_EDTA.out.out_file
- ch_te_anno_gff3 = EDTA_EDTA.out.te_anno_gff3
- ch_versions = ch_versions.mix(EDTA_EDTA.out.versions.first())
+ ch_te_lib_fasta = EDTA_EDTA.out.te_lib_fasta
+ ch_pass_list = EDTA_EDTA.out.pass_list
+ ch_out_file = EDTA_EDTA.out.out_file
+ ch_te_anno_gff3 = EDTA_EDTA.out.te_anno_gff3
+ ch_versions = ch_versions.mix(EDTA_EDTA.out.versions.first())
// MODULE: LAI
- ch_lai_inputs = skip_lai
- ? Channel.empty()
- : ch_short_ids_fasta
- | join(ch_pass_list)
- | join(ch_out_file)
- | join(
- ch_monoploid_seqs ?: Channel.empty(),
- by:0,
- remainder: true
- )
- | map { meta, fasta, pass, out, mono ->
- [ meta, fasta, pass, out, mono ?: [] ]
- }
- LAI (
+ ch_lai_inputs = skip_lai
+ ? Channel.empty()
+ : ch_short_ids_fasta
+ | join(ch_pass_list)
+ | join(ch_out_file)
+ | map { meta, fasta, pass, out ->
+ [ meta.id, meta, fasta, pass, out ]
+ }
+ | join(
+ ch_short_monoploid_seqs
+ | map { meta, mono -> [ meta.id, mono ] },
+ by:0,
+ remainder: true
+ )
+ | map { id, meta, fasta, pass, out, mono ->
+ [ meta, fasta, pass, out, mono ?: [] ]
+ }
+ LTRRETRIEVER_LAI(
ch_lai_inputs.map { meta, fasta, pass, out, mono -> [ meta, fasta ] },
ch_lai_inputs.map { meta, fasta, pass, out, mono -> pass },
ch_lai_inputs.map { meta, fasta, pass, out, mono -> out },
ch_lai_inputs.map { meta, fasta, pass, out, mono -> mono }
)
- ch_lai_log = LAI.out.log
- ch_lai_out = LAI.out.lai_out
- ch_versions = ch_versions.mix(LAI.out.versions.first())
+ ch_lai_log = LTRRETRIEVER_LAI.out.log
+ ch_lai_out = LTRRETRIEVER_LAI.out.lai_out
+ ch_versions = ch_versions.mix(LTRRETRIEVER_LAI.out.versions.first())
// MODULE: CUSTOM_RESTOREGFFIDS
- ch_restorable_gff_tsv = ch_te_anno_gff3.join(ch_short_ids_tsv)
+ ch_restorable_gff_tsv = ch_te_anno_gff3.join(ch_short_ids_tsv)
CUSTOM_RESTOREGFFIDS (
ch_restorable_gff_tsv.map { meta, gff, tsv -> [ meta, gff ] },
ch_restorable_gff_tsv.map { meta, gff, tsv -> tsv }
)
- ch_restored_gff = ch_te_anno_gff3
- | join(CUSTOM_RESTOREGFFIDS.out.restored_ids_gff3, by:0, remainder:true)
- | map { meta, gff, restored_gff -> [ meta, restored_gff ?: gff ] }
- ch_versions = ch_versions.mix(CUSTOM_RESTOREGFFIDS.out.versions.first())
+ ch_restored_gff = ch_te_anno_gff3
+ | join(CUSTOM_RESTOREGFFIDS.out.restored_ids_gff3, by:0, remainder:true)
+ | map { meta, gff, restored_gff -> [ meta, restored_gff ?: gff ] }
+ ch_versions = ch_versions.mix(CUSTOM_RESTOREGFFIDS.out.versions.first())
emit:
- te_lib_fasta = ch_te_lib_fasta // channel: [ val(meta), fasta ]
- te_anno_gff3 = ch_restored_gff // channel: [ val(meta), gff ]
- lai_log = ch_lai_log // channel: [ val(meta), log ]
- lai_out = ch_lai_out // channel: [ val(meta), out ]
- versions = ch_versions // channel: [ versions.yml ]
+ te_lib_fasta = ch_te_lib_fasta // channel: [ val(meta), fasta ]
+ te_anno_gff3 = ch_restored_gff // channel: [ val(meta), gff ]
+ lai_log = ch_lai_log // channel: [ val(meta), log ]
+ lai_out = ch_lai_out // channel: [ val(meta), out ]
+ versions = ch_versions // channel: [ versions.yml ]
+}
+
+def map_monoploid_seqs_to_new_ids(meta, short_ids_tsv, monoploid_seqs) {
+
+ def short_ids_head = short_ids_tsv.text.split('\n')[0]
+
+ if (short_ids_head == "IDs have acceptable length and character. No change required.") {
+ return [ "${meta.id}.mapped.monoploid.seqs.txt" ] + monoploid_seqs.text.split('\n')
+ }
+
+ def orig_to_new_ids = [:]
+ short_ids_tsv.text.eachLine { line ->
+ def (original_id, renamed_id) = line.split('\t')
+ orig_to_new_ids[original_id] = renamed_id
+ }
+
+ def mapped_ids = []
+ monoploid_seqs.text.eachLine { original_id ->
+ if (!orig_to_new_ids[original_id]) {
+ error "Faild to find $original_id in ${monoploid_seqs}" +
+ "The monoploid_seqs file is malformed!"
+ }
+
+ mapped_ids.add(orig_to_new_ids[original_id])
+ }
+
+ return [ "${meta.id}.mapped.monoploid.seqs.txt" ] + mapped_ids
}
diff --git a/subworkflows/pfr/fasta_edta_lai/meta.yml b/subworkflows/pfr/fasta_edta_lai/meta.yml
index 52483ce..c356ce7 100644
--- a/subworkflows/pfr/fasta_edta_lai/meta.yml
+++ b/subworkflows/pfr/fasta_edta_lai/meta.yml
@@ -11,10 +11,10 @@ keywords:
- stats
- qc
components:
+ - edta/edta
- custom/restoregffids
+ - ltrretriever/lai
- custom/shortenfastaids
- - edta/edta
- - lai
input:
- ch_fasta:
type: file
diff --git a/subworkflows/pfr/fasta_edta_lai/tests/main.nf.test b/subworkflows/pfr/fasta_edta_lai/tests/main.nf.test
index e852a70..2c6850d 100644
--- a/subworkflows/pfr/fasta_edta_lai/tests/main.nf.test
+++ b/subworkflows/pfr/fasta_edta_lai/tests/main.nf.test
@@ -3,17 +3,60 @@ nextflow_workflow {
name "Test Workflow FASTA_EDTA_LAI"
script "../main.nf"
workflow "FASTA_EDTA_LAI"
+ config "./nextflow.config"
tag "subworkflows"
tag "subworkflows_nfcore"
tag "subworkflows/fasta_edta_lai"
tag "fasta_edta_lai"
- tag "lai"
+ tag "modules/nf-core/gunzip"
+ tag "custom/shortenfastaids"
tag "edta/edta"
+ tag "ltrretriever/lai"
tag "custom/restoregffids"
- tag "custom/shortenfastaids"
- test("test_data") {
+ test("actinidia_chinensis-genome_21_fasta_gz") {
+
+ setup {
+ run("GUNZIP") {
+ script "../../../../modules/nf-core/gunzip"
+
+ process {
+ """
+ input[0] = [
+ [ id:'test' ],
+ file(params.test_data['actinidia_chinensis']['genome']['genome_21_fasta_gz'], checkIfExists: true)
+ ]
+ """
+ }
+ }
+ }
+
+ when {
+ workflow {
+ """
+ input[0] = GUNZIP.out.gunzip
+ input[1] = []
+ input[2] = false
+ """
+ }
+ }
+
+ then {
+ assertAll(
+ { assert workflow.success },
+ { assert file(workflow.out.te_anno_gff3[0][1]).text.contains('Copia_LTR_retrotransposon') },
+ { assert file(workflow.out.lai_log[0][1]).text.contains('Calculate LAI:') },
+ { assert file(workflow.out.lai_log[0][1]).text.contains('Done!') },
+ { assert Math.abs(Float.parseFloat(path(workflow.out.lai_out[0][1]).text.split("\n")[1].split("\t")[6]) - 31.29) <= 1.0 },
+ { assert file(workflow.out.te_lib_fasta[0][1]).text.contains('#LTR/Copia') }
+ )
+ }
+ }
+
+ test("actinidia_chinensis-genome_21_fasta_gz-stub") {
+
+ options '-stub'
setup {
run("GUNZIP") {
@@ -23,7 +66,7 @@ nextflow_workflow {
"""
input[0] = [
[ id:'test' ],
- file('/Users/hrauxr/Projects/nxf-modules/tests/data/genome.fasta.gz', checkIfExists: true)
+ file(params.test_data['actinidia_chinensis']['genome']['genome_21_fasta_gz'], checkIfExists: true)
]
"""
}
@@ -43,8 +86,8 @@ nextflow_workflow {
then {
assertAll(
{ assert workflow.success },
- { assert snapshot(workflow.out.versions).match("versions") }
+ { assert snapshot(workflow.out).match() }
)
}
}
-}
+}
\ No newline at end of file
diff --git a/subworkflows/pfr/fasta_edta_lai/tests/main.nf.test.snap b/subworkflows/pfr/fasta_edta_lai/tests/main.nf.test.snap
index 574acc9..2ab7da2 100644
--- a/subworkflows/pfr/fasta_edta_lai/tests/main.nf.test.snap
+++ b/subworkflows/pfr/fasta_edta_lai/tests/main.nf.test.snap
@@ -1,11 +1,87 @@
{
- "versions": {
+ "actinidia_chinensis-genome_21_fasta_gz-stub": {
"content": [
- [
- "versions.yml:md5,0d4bc49e94acb8995ca552d4e666e3ce",
- "versions.yml:md5,754bb19f86be761d90c002a0af2faf1c"
- ]
+ {
+ "0": [
+ [
+ {
+ "id": "test"
+ },
+ "test.EDTA.TElib.fa:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "1": [
+ [
+ {
+ "id": "test"
+ },
+ "test.EDTA.TEanno.gff3:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "2": [
+ [
+ {
+ "id": "test"
+ },
+ "test.LAI.log:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "3": [
+ [
+ {
+ "id": "test"
+ },
+ "test.LAI.out:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "4": [
+ "versions.yml:md5,0d4bc49e94acb8995ca552d4e666e3ce",
+ "versions.yml:md5,65666e975bdfd71978843ca963e84d0c",
+ "versions.yml:md5,754bb19f86be761d90c002a0af2faf1c"
+ ],
+ "lai_log": [
+ [
+ {
+ "id": "test"
+ },
+ "test.LAI.log:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "lai_out": [
+ [
+ {
+ "id": "test"
+ },
+ "test.LAI.out:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "te_anno_gff3": [
+ [
+ {
+ "id": "test"
+ },
+ "test.EDTA.TEanno.gff3:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "te_lib_fasta": [
+ [
+ {
+ "id": "test"
+ },
+ "test.EDTA.TElib.fa:md5,d41d8cd98f00b204e9800998ecf8427e"
+ ]
+ ],
+ "versions": [
+ "versions.yml:md5,0d4bc49e94acb8995ca552d4e666e3ce",
+ "versions.yml:md5,65666e975bdfd71978843ca963e84d0c",
+ "versions.yml:md5,754bb19f86be761d90c002a0af2faf1c"
+ ]
+ }
],
- "timestamp": "2023-12-22T14:09:24.171934"
+ "meta": {
+ "nf-test": "0.8.4",
+ "nextflow": "23.10.1"
+ },
+ "timestamp": "2024-03-20T18:05:46.667121"
}
}
\ No newline at end of file
diff --git a/subworkflows/pfr/fasta_edta_lai/tests/nextflow.config b/subworkflows/pfr/fasta_edta_lai/tests/nextflow.config
new file mode 100644
index 0000000..1fa6315
--- /dev/null
+++ b/subworkflows/pfr/fasta_edta_lai/tests/nextflow.config
@@ -0,0 +1,5 @@
+process {
+ withName: EDTA_EDTA {
+ ext.args = '--anno 1'
+ }
+}
diff --git a/tests/stub/assemblysheet.csv b/tests/stub/assemblysheet.csv
new file mode 100644
index 0000000..cfa0cdb
--- /dev/null
+++ b/tests/stub/assemblysheet.csv
@@ -0,0 +1,3 @@
+tag,fasta,is_masked,te_lib,braker_gff3,braker_hints
+red5_v2p1,tests/stub/target/red5_v2p1_chr1.fasta.gz,no,,tests/stub/braker/red5_v2p1.gff3.gz,tests/stub/braker/red5_v2p1.hints.gff.gz
+donghong,tests/stub/target/donghong.chr1.fsa.gz,no,tests/stub/te_lib/donghong.TElib.fa.gz,tests/stub/braker/red5_v2p1.gff3.gz,tests/stub/braker/red5_v2p1.hints.gff.gz
diff --git a/tests/stub/braker/donghong.gff3.gz b/tests/stub/braker/donghong.gff3.gz
new file mode 100644
index 0000000..e69de29
diff --git a/tests/stub/braker/donghong.hints.gff.gz b/tests/stub/braker/donghong.hints.gff.gz
new file mode 100644
index 0000000..e69de29
diff --git a/tests/stub/braker/red5_v2p1.gff3.gz b/tests/stub/braker/red5_v2p1.gff3.gz
new file mode 100644
index 0000000..e69de29
diff --git a/tests/stub/braker/red5_v2p1.hints.gff.gz b/tests/stub/braker/red5_v2p1.hints.gff.gz
new file mode 100644
index 0000000..e69de29
diff --git a/tests/stub/emapperdb/5.0.2/eggnog.db b/tests/stub/emapperdb/5.0.2/eggnog.db
new file mode 100644
index 0000000..e69de29
diff --git a/tests/stub/emapperdb/5.0.2/eggnog_proteins.dmnd b/tests/stub/emapperdb/5.0.2/eggnog_proteins.dmnd
new file mode 100644
index 0000000..e69de29
diff --git a/tests/stub/ext_prot/RU01.20221115150135.chr1.pep.fasta.gz b/tests/stub/ext_prot/RU01.20221115150135.chr1.pep.fasta.gz
new file mode 100644
index 0000000..e69de29
diff --git a/tests/stub/ext_prot/RU01.20221115150135.chr2.pep.fasta.gz b/tests/stub/ext_prot/RU01.20221115150135.chr2.pep.fasta.gz
new file mode 100644
index 0000000..e69de29
diff --git a/tests/stub/external-protein-fastas.txt b/tests/stub/external-protein-fastas.txt
new file mode 100644
index 0000000..d510c4c
--- /dev/null
+++ b/tests/stub/external-protein-fastas.txt
@@ -0,0 +1,2 @@
+tests/stub/ext_prot/RU01.20221115150135.chr1.pep.fasta.gz
+tests/stub/ext_prot/RU01.20221115150135.chr2.pep.fasta.gz
diff --git a/tests/stub/fastqsheet.csv b/tests/stub/fastqsheet.csv
new file mode 100644
index 0000000..0b7e223
--- /dev/null
+++ b/tests/stub/fastqsheet.csv
@@ -0,0 +1,4 @@
+sample,fastq_1,fastq_2,target_assemblies
+Root1,tests/stub/fq/1505KHS-0090_Root1_162bp_C728RACXX_Lane1_R1.1k.fastq.gz,tests/stub/fq/1505KHS-0090_Root1_162bp_C728RACXX_Lane1_R2.1k.fastq.gz,red5_v2p1
+Root1,tests/stub/fq/1505KHS-0090_Root2_156bp_C728RACXX_Lane1_R1.1k.fastq.gz,tests/stub/fq/1505KHS-0090_Root2_156bp_C728RACXX_Lane1_R2.1k.fastq.gz,red5_v2p1
+cane3,tests/stub/fq/1505KHS-0090_cane3_165bp_C728RACXX_Lane1_R1.1k.fastq.gz,tests/stub/fq/1505KHS-0090_cane3_165bp_C728RACXX_Lane1_R2.1k.fastq.gz,red5_v2p1;donghong
diff --git a/tests/stub/fq/1505KHS-0090_Root1_162bp_C728RACXX_Lane1_R1.1k.fastq.gz b/tests/stub/fq/1505KHS-0090_Root1_162bp_C728RACXX_Lane1_R1.1k.fastq.gz
new file mode 100644
index 0000000..e69de29
diff --git a/tests/stub/fq/1505KHS-0090_Root1_162bp_C728RACXX_Lane1_R2.1k.fastq.gz b/tests/stub/fq/1505KHS-0090_Root1_162bp_C728RACXX_Lane1_R2.1k.fastq.gz
new file mode 100644
index 0000000..e69de29
diff --git a/tests/stub/fq/1505KHS-0090_Root2_156bp_C728RACXX_Lane1_R1.1k.fastq.gz b/tests/stub/fq/1505KHS-0090_Root2_156bp_C728RACXX_Lane1_R1.1k.fastq.gz
new file mode 100644
index 0000000..e69de29
diff --git a/tests/stub/fq/1505KHS-0090_Root2_156bp_C728RACXX_Lane1_R2.1k.fastq.gz b/tests/stub/fq/1505KHS-0090_Root2_156bp_C728RACXX_Lane1_R2.1k.fastq.gz
new file mode 100644
index 0000000..e69de29
diff --git a/tests/stub/fq/1505KHS-0090_cane3_165bp_C728RACXX_Lane1_R1.1k.fastq.gz b/tests/stub/fq/1505KHS-0090_cane3_165bp_C728RACXX_Lane1_R1.1k.fastq.gz
new file mode 100644
index 0000000..e69de29
diff --git a/tests/stub/fq/1505KHS-0090_cane3_165bp_C728RACXX_Lane1_R2.1k.fastq.gz b/tests/stub/fq/1505KHS-0090_cane3_165bp_C728RACXX_Lane1_R2.1k.fastq.gz
new file mode 100644
index 0000000..e69de29
diff --git a/tests/stub/liftoff/RU01.20221115150135.chr1.gff3.gz b/tests/stub/liftoff/RU01.20221115150135.chr1.gff3.gz
new file mode 100644
index 0000000..e69de29
diff --git a/tests/stub/liftoff/RU01.20221115150135.chr2.gff3.gz b/tests/stub/liftoff/RU01.20221115150135.chr2.gff3.gz
new file mode 100644
index 0000000..e69de29
diff --git a/tests/stub/liftoff/Russell_V2a.chr1.fsa.gz b/tests/stub/liftoff/Russell_V2a.chr1.fsa.gz
new file mode 100644
index 0000000..e69de29
diff --git a/tests/stub/liftoff/Russell_V2a.chr2.fsa.gz b/tests/stub/liftoff/Russell_V2a.chr2.fsa.gz
new file mode 100644
index 0000000..e69de29
diff --git a/tests/stub/liftoffannotations.csv b/tests/stub/liftoffannotations.csv
new file mode 100644
index 0000000..5215d2c
--- /dev/null
+++ b/tests/stub/liftoffannotations.csv
@@ -0,0 +1,3 @@
+fasta,gff3
+tests/stub/liftoff/Russell_V2a.chr1.fsa.gz,tests/stub/liftoff/RU01.20221115150135.chr1.gff3.gz
+tests/stub/liftoff/Russell_V2a.chr2.fsa.gz,tests/stub/liftoff/RU01.20221115150135.chr2.gff3.gz
diff --git a/tests/stub/params.json b/tests/stub/params.json
new file mode 100644
index 0000000..db97d3b
--- /dev/null
+++ b/tests/stub/params.json
@@ -0,0 +1,10 @@
+{
+ "input": "tests/stub/assemblysheet.csv",
+ "external_protein_fastas": "tests/stub/external-protein-fastas.txt",
+ "eggnogmapper_db_dir": "tests/stub/emapperdb/5.0.2",
+ "eggnogmapper_tax_scope": 33090,
+ "fastq": "tests/stub/fastqsheet.csv",
+ "liftoff_annotations": "tests/stub/liftoffannotations.csv",
+ "max_cpus": 2,
+ "max_memory": "3.GB"
+}
diff --git a/tests/stub/target/donghong.chr1.fsa.gz b/tests/stub/target/donghong.chr1.fsa.gz
new file mode 100644
index 0000000..e69de29
diff --git a/tests/stub/target/red5_v2p1_chr1.fasta.gz b/tests/stub/target/red5_v2p1_chr1.fasta.gz
new file mode 100644
index 0000000..e69de29
diff --git a/tests/stub/te_lib/donghong.TElib.fa.gz b/tests/stub/te_lib/donghong.TElib.fa.gz
new file mode 100644
index 0000000..e69de29
diff --git a/version_check.sh b/version_check.sh
new file mode 100755
index 0000000..131d97c
--- /dev/null
+++ b/version_check.sh
@@ -0,0 +1,8 @@
+#!/usr/bin/env bash
+
+config_version=$(sed -n "s/.*version.*= '\(.*\)'.*/\1/p" nextflow.config)
+
+# Check CHANGELOG version
+
+grep "## $config_version - " CHANGELOG.md >/dev/null \
+ || (echo 'Failed to match CHANGELOG version'; exit 1)
diff --git a/workflows/pangene.nf b/workflows/pangene.nf
index 8512ff9..c0f3c32 100644
--- a/workflows/pangene.nf
+++ b/workflows/pangene.nf
@@ -1,36 +1,102 @@
-include { validateParams } from '../modules/local/validate_params'
-include { PREPARE_ASSEMBLY } from '../subworkflows/local/prepare_assembly'
-include { PREPROCESS_RNASEQ } from '../subworkflows/local/preprocess_rnaseq'
-include { ALIGN_RNASEQ } from '../subworkflows/local/align_rnaseq'
-include { PREPARE_EXT_PROTS } from '../subworkflows/local/prepare_ext_prots'
-include { BRAKER3 } from '../modules/kherronism/braker3'
-include { FASTA_LIFTOFF } from '../subworkflows/local/fasta_liftoff'
-include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions'
+include { fromSamplesheet; paramsSummaryLog } from 'plugin/nf-validation'
+include { idFromFileName; validateFastqMetadata } from '../modules/local/utils'
+include { PREPARE_ASSEMBLY } from '../subworkflows/local/prepare_assembly'
+include { PREPROCESS_RNASEQ } from '../subworkflows/local/preprocess_rnaseq'
+include { ALIGN_RNASEQ } from '../subworkflows/local/align_rnaseq'
+include { PREPARE_EXT_PROTS } from '../subworkflows/local/prepare_ext_prots'
+include { FASTA_BRAKER3 } from '../subworkflows/local/fasta_braker3'
+include { FASTA_LIFTOFF } from '../subworkflows/local/fasta_liftoff'
+include { PURGE_BREAKER_MODELS } from '../subworkflows/local/purge_breaker_models'
+include { GFF_MERGE_CLEANUP } from '../subworkflows/local/gff_merge_cleanup'
+include { GFF_EGGNOGMAPPER } from '../subworkflows/local/gff_eggnogmapper'
+include { PURGE_NOHIT_MODELS } from '../subworkflows/local/purge_nohit_models'
+include { GFF_STORE } from '../subworkflows/local/gff_store'
+include { CUSTOM_DUMPSOFTWAREVERSIONS } from '../modules/nf-core/custom/dumpsoftwareversions'
-validateParams(params)
+log.info paramsSummaryLog(workflow)
workflow PANGENE {
+ // Versions channel
ch_versions = Channel.empty()
- ch_target_assembly = Channel.fromList(params.target_assemblies)
- | map { tag, filePath ->
- [ [ id: tag ], file(filePath, checkIfExists: true) ]
+ // Input channels
+ ch_input = Channel.fromSamplesheet('input')
+
+ ch_target_assembly = ch_input
+ | map { it ->
+ def tag = it[0]
+ def fasta = it[1]
+
+ [ [ id: tag ], file(fasta, checkIfExists: true) ]
+ }
+
+ ch_tar_assm_str = ch_input
+ | map { it ->
+ def tag = it[0].strip()
+
+ tag
}
+ | collect
+ | map { it ->
+ it.join(",")
+ }
+
+ ch_masked = ch_input
+ | map { it ->
+ def tag = it[0]
+ def is_masked = it[2]
- ch_te_library = Channel.fromList(params.te_libraries)
- | map { tag, filePath ->
- [ [ id:tag ], file(filePath, checkIfExists: true) ]
+ [ [ id: tag ], is_masked == "yes" ]
}
- ch_samplesheet = params.samplesheet
- ? Channel.fromPath(params.samplesheet, checkIfExists: true)
- : Channel.empty()
+ ch_te_library = ch_input
+ | map { it ->
+ def tag = it[0]
+ def te_fasta = it[3]
- ch_tar_assm_str = Channel.of(
- params.target_assemblies
- .collect { tag, fastaPath -> tag.strip() }.join(",")
- )
+ if ( te_fasta ) {
+ [ [ id:tag ], file(te_fasta, checkIfExists: true) ]
+ }
+ }
+
+ ch_braker_annotation = ch_input
+ | map { it ->
+ def tag = it[0]
+ def braker_gff3 = it[4]
+ def hints_gff = it[5]
+
+ if ( braker_gff3 ) {
+ [
+ [ id: tag ],
+ file(braker_gff3, checkIfExists: true),
+ file(hints_gff, checkIfExists: true)
+ ]
+ }
+ }
+
+ ch_braker_ex_asm_str = ch_braker_annotation
+ | map { meta, braker_gff3, hints_gff -> meta.id }
+ | collect
+ | map { it.join(",") }
+ | ifEmpty( "" )
+
+ ch_reads = ! params.fastq
+ ? Channel.empty()
+ : Channel.fromSamplesheet('fastq')
+ | map { meta, fq1, fq2 ->
+ fq2
+ ? [ meta + [ single_end: false ], [ file(fq1, checkIfExists:true), file(fq2, checkIfExists:true) ] ]
+ : [ meta + [ single_end: true ], [ file(fq1, checkIfExists:true) ] ]
+ }
+ | map { meta, fqs ->
+ [ meta.id, meta + [ target_assemblies: meta.target_assemblies.split(';').sort() ], fqs ]
+ }
+ | groupTuple
+ | combine(ch_tar_assm_str)
+ | map { id, metas, fqs, tar_assm_str ->
+ validateFastqMetadata(metas, fqs, tar_assm_str)
+ }
ch_ribo_db = params.remove_ribo_rna
? file(params.ribo_database_manifest, checkIfExists: true)
@@ -42,32 +108,43 @@ workflow PANGENE {
| collect
: Channel.empty()
- ch_ext_prot_fastas = params.external_protein_fastas
- ? Channel.fromList(params.external_protein_fastas)
- | map { filePath ->
- def fileHandle = file(filePath, checkIfExists: true)
- [ [ id: fileHandle.getSimpleName() ], fileHandle]
+ ch_ext_prot_fastas = ! params.external_protein_fastas
+ ? Channel.empty()
+ : Channel.fromPath(params.external_protein_fastas)
+ | splitText
+ | map { file_path ->
+ def file_handle = file(file_path.strip(), checkIfExists: true)
+ [ [ id: idFromFileName( file_handle.baseName ) ], file_handle ]
}
- : Channel.empty()
- ch_xref_mm = params.liftoff_xref_annotations
- ? Channel.fromList(params.liftoff_xref_annotations)
+ ch_liftoff_mm = ! params.liftoff_annotations
+ ? Channel.empty()
+ : Channel.fromSamplesheet('liftoff_annotations')
| multiMap { fasta, gff ->
def fastaFile = file(fasta, checkIfExists:true)
- fasta: [ [ id: fastaFile.getSimpleName() ], fastaFile ]
- gff: [ [ id: fastaFile.getSimpleName() ], file(gff, checkIfExists:true) ]
+ fasta: [ [ id: idFromFileName( fastaFile.baseName ) ], fastaFile ]
+ gff: [ [ id: idFromFileName( fastaFile.baseName ) ], file(gff, checkIfExists:true) ]
}
+
+ ch_liftoff_fasta = params.liftoff_annotations
+ ? ch_liftoff_mm.fasta
: Channel.empty()
- ch_xref_fasta = ch_xref_mm.fasta
- ch_xref_gff = ch_xref_mm.gff
+ ch_liftoff_gff = params.liftoff_annotations
+ ? ch_liftoff_mm.gff
+ : Channel.empty()
+
+ val_tsebra_config = params.braker_allow_isoforms
+ ? "${projectDir}/assets/tsebra-default.cfg"
+ : "${projectDir}/assets/tsebra-1form.cfg"
// SUBWORKFLOW: PREPARE_ASSEMBLY
PREPARE_ASSEMBLY(
ch_target_assembly,
ch_te_library,
- params.repeat_annotator
+ params.repeat_annotator,
+ ch_braker_ex_asm_str
)
ch_valid_target_assembly = PREPARE_ASSEMBLY.out.target_assemby
@@ -77,8 +154,9 @@ workflow PANGENE {
// SUBWORKFLOW: PREPROCESS_RNASEQ
PREPROCESS_RNASEQ(
- ch_samplesheet,
+ ch_reads,
ch_tar_assm_str,
+ ch_braker_ex_asm_str,
params.skip_fastqc,
params.skip_fastp,
params.save_trimmed,
@@ -95,7 +173,7 @@ workflow PANGENE {
ALIGN_RNASEQ(
ch_reads_target,
ch_trim_reads,
- ch_target_assemby_index
+ ch_target_assemby_index,
)
ch_rnaseq_bam = ALIGN_RNASEQ.out.bam
@@ -109,40 +187,77 @@ workflow PANGENE {
ch_ext_prots_fasta = PREPARE_EXT_PROTS.out.ext_prots_fasta
ch_versions = ch_versions.mix(PREPARE_EXT_PROTS.out.versions)
- // MODULE: BRAKER3
- ch_braker_inputs = ch_masked_target_assembly
- | join(ch_rnaseq_bam, remainder: true)
- | combine(
- ch_ext_prots_fasta.map { meta, filePath -> filePath }.ifEmpty(null)
- )
- | map { meta, fasta, bam, prots -> [ meta, fasta, bam ?: [], prots ?: [] ] }
-
- def rnaseq_sets_dirs = []
- def rnaseq_sets_ids = []
- def hintsfile = []
-
- BRAKER3(
- ch_braker_inputs.map { meta, fasta, bam, prots -> [meta, fasta] },
- ch_braker_inputs.map { meta, fasta, bam, prots -> bam },
- rnaseq_sets_dirs,
- rnaseq_sets_ids,
- ch_braker_inputs.map { meta, fasta, bam, prots -> prots },
- hintsfile
+ // SUBWORKFLOW: FASTA_BRAKER3
+ FASTA_BRAKER3(
+ ch_masked_target_assembly,
+ ch_braker_ex_asm_str,
+ ch_rnaseq_bam,
+ ch_ext_prots_fasta,
+ ch_braker_annotation
)
- ch_braker_gff3 = BRAKER3.out.gff3
- ch_versions = ch_versions.mix(BRAKER3.out.versions.first())
+ ch_braker_gff3 = FASTA_BRAKER3.out.braker_gff3
+ ch_braker_hints = FASTA_BRAKER3.out.braker_hints
+ ch_versions = ch_versions.mix(FASTA_BRAKER3.out.versions)
// SUBWORKFLOW: FASTA_LIFTOFF
FASTA_LIFTOFF(
ch_valid_target_assembly,
- ch_xref_fasta,
- ch_xref_gff
+ ch_liftoff_fasta,
+ ch_liftoff_gff
)
ch_liftoff_gff3 = FASTA_LIFTOFF.out.gff3
ch_versions = ch_versions.mix(FASTA_LIFTOFF.out.versions)
+ // SUBWORKFLOW: PURGE_BREAKER_MODELS
+ PURGE_BREAKER_MODELS(
+ ch_braker_gff3,
+ ch_braker_hints,
+ ch_liftoff_gff3,
+ val_tsebra_config,
+ params.braker_allow_isoforms
+ )
+
+ ch_braker_purged_gff = PURGE_BREAKER_MODELS.out.braker_purged_gff
+ ch_versions = ch_versions.mix(PURGE_BREAKER_MODELS.out.versions)
+
+ // SUBWORKFLOW: GFF_MERGE_CLEANUP
+ GFF_MERGE_CLEANUP(
+ ch_braker_purged_gff,
+ ch_liftoff_gff3
+ )
+
+ ch_merged_gff = GFF_MERGE_CLEANUP.out.gff
+ ch_versions = ch_versions.mix(GFF_MERGE_CLEANUP.out.versions)
+
+ // SUBWORKFLOW: GFF_EGGNOGMAPPER
+ GFF_EGGNOGMAPPER(
+ ch_merged_gff,
+ ch_valid_target_assembly,
+ params.eggnogmapper_db_dir,
+ )
+
+ ch_eggnogmapper_hits = GFF_EGGNOGMAPPER.out.eggnogmapper_hits
+ ch_eggnogmapper_annotations = GFF_EGGNOGMAPPER.out.eggnogmapper_annotations
+ ch_versions = ch_versions.mix(GFF_EGGNOGMAPPER.out.versions)
+
+ // SUBWORKFLOW: PURGE_NOHIT_MODELS
+ PURGE_NOHIT_MODELS(
+ ch_merged_gff,
+ ch_eggnogmapper_hits,
+ params.eggnogmapper_purge_nohits
+ )
+
+ ch_purged_gff = PURGE_NOHIT_MODELS.out.purged_gff
+ ch_versions = ch_versions.mix(PURGE_NOHIT_MODELS.out.versions)
+
+ // SUBWORKFLOW: GFF_STORE
+ GFF_STORE(
+ ch_purged_gff,
+ ch_eggnogmapper_annotations
+ )
+
// MODULE: CUSTOM_DUMPSOFTWAREVERSIONS
CUSTOM_DUMPSOFTWAREVERSIONS (
ch_versions.unique().collectFile(name: 'collated_versions.yml')
|