From 4aca65aa16b2780fe2eff5bfb4a08b98b97458b2 Mon Sep 17 00:00:00 2001 From: JoseEspinosa Date: Wed, 8 May 2024 18:22:09 +0200 Subject: [PATCH 01/23] Add validation for fasta files --- assets/schema_input.json | 18 ++++++++++++++++-- .../utils_nfcore_reportho_pipeline/main.nf | 8 ++++++-- 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/assets/schema_input.json b/assets/schema_input.json index d80499c..55dd337 100644 --- a/assets/schema_input.json +++ b/assets/schema_input.json @@ -17,8 +17,22 @@ "type": "string", "pattern": "^\\S+$", "errorMessage": "A query must be provided" + }, + "fasta": { + "type": "string", + "format": "file-path", + "exists": true, + "pattern": "^\\S+\\.fa(sta)?$", + "errorMessage": "Fasta file must be provided, cannot contain spaces and must have extension '.fa' or '.fasta'" } + } + }, + "anyOf": [ + { + "required": ["id", "query"] }, - "required": ["id", "query"] - } + { + "required": ["id", "fasta"] + } + ] } diff --git a/subworkflows/local/utils_nfcore_reportho_pipeline/main.nf b/subworkflows/local/utils_nfcore_reportho_pipeline/main.nf index be134bc..b36a33c 100644 --- a/subworkflows/local/utils_nfcore_reportho_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_reportho_pipeline/main.nf @@ -79,8 +79,12 @@ workflow PIPELINE_INITIALISATION { Channel .fromSamplesheet("input") .map { - id, query -> - [ id, query ] + id, query, fasta -> + if (query) { + [ id, query ] + } else { + [ id, fasta ] + } } .set { ch_samplesheet } From be8b29036d2da82560af8262b8cbd6b25469e344 Mon Sep 17 00:00:00 2001 From: JoseEspinosa Date: Wed, 8 May 2024 18:24:18 +0200 Subject: [PATCH 02/23] Fix format --- modules/local/identify_seq_online.nf | 2 +- subworkflows/local/get_orthologs.nf | 7 ++++--- workflows/reportho.nf | 8 ++++---- 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/modules/local/identify_seq_online.nf b/modules/local/identify_seq_online.nf index 2ada143..719b325 100644 --- a/modules/local/identify_seq_online.nf +++ b/modules/local/identify_seq_online.nf @@ -21,7 +21,7 @@ process IDENTIFY_SEQ_ONLINE { prefix = task.ext.prefix ?: meta.id """ fetch_oma_by_sequence.py $fasta id_raw.txt ${prefix}_taxid.txt ${prefix}_exact.txt - uniprotize_oma.py id_raw.txt > ${prefix}_id.txt + uniprotize_oma_online.py id_raw.txt > ${prefix}_id.txt cat <<- END_VERSIONS > versions.yml "${task.process}": diff --git a/subworkflows/local/get_orthologs.nf b/subworkflows/local/get_orthologs.nf index 86fb6c2..d151ecc 100644 --- a/subworkflows/local/get_orthologs.nf +++ b/subworkflows/local/get_orthologs.nf @@ -28,7 +28,6 @@ workflow GET_ORTHOLOGS { ch_orthogroups = Channel.empty() // Preprocessing - find the ID and taxid of the query sequences - if (!params.uniprot_query) { ch_samplesheet .map { it -> [it[0], file(it[1])] } @@ -45,7 +44,8 @@ workflow GET_ORTHOLOGS { ch_versions .mix(IDENTIFY_SEQ_ONLINE.out.versions) .set { ch_versions } - } else { + } + else { WRITE_SEQINFO ( ch_samplesheet ) @@ -78,7 +78,8 @@ workflow GET_ORTHOLOGS { ch_versions .mix(FETCH_OMA_GROUP_LOCAL.out.versions) .set { ch_versions } - } else { + } + else { FETCH_OMA_GROUP_ONLINE ( ch_query ) diff --git a/workflows/reportho.nf b/workflows/reportho.nf index 88328b4..b33a639 100644 --- a/workflows/reportho.nf +++ b/workflows/reportho.nf @@ -44,13 +44,13 @@ workflow REPORTHO { .mix(GET_ORTHOLOGS.out.versions) .set { ch_versions } - ch_seqhits = ch_samplesheet.map { [it[0], []] } + ch_seqhits = ch_samplesheet.map { [it[0], []] } ch_seqmisses = ch_samplesheet.map { [it[0], []] } - ch_strhits = ch_samplesheet.map { [it[0], []] } + ch_strhits = ch_samplesheet.map { [it[0], []] } ch_strmisses = ch_samplesheet.map { [it[0], []] } ch_alignment = ch_samplesheet.map { [it[0], []] } - ch_iqtree = ch_samplesheet.map { [it[0], []] } - ch_fastme = ch_samplesheet.map { [it[0], []] } + ch_iqtree = ch_samplesheet.map { [it[0], []] } + ch_fastme = ch_samplesheet.map { [it[0], []] } if (!params.skip_downstream) { FETCH_SEQUENCES ( From 053697608df1ea755e86f17e09e06fd3aa3a55cd Mon Sep 17 00:00:00 2001 From: JoseEspinosa Date: Wed, 8 May 2024 18:24:41 +0200 Subject: [PATCH 03/23] Add test_fasta profile --- conf/test_fasta.config | 32 ++++++++++++++++++++++++++++++++ nextflow.config | 7 ++++--- 2 files changed, 36 insertions(+), 3 deletions(-) create mode 100644 conf/test_fasta.config diff --git a/conf/test_fasta.config b/conf/test_fasta.config new file mode 100644 index 0000000..c81035c --- /dev/null +++ b/conf/test_fasta.config @@ -0,0 +1,32 @@ +/* +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Nextflow config file for running minimal tests +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + Defines input files and everything required to run a fast and simple pipeline test. + + Use as follows: + nextflow run nf-core/reportho -profile test, --outdir + +---------------------------------------------------------------------------------------- +*/ + +params { + config_profile_name = 'Test profile' + config_profile_description = 'Minimal test dataset to check pipeline function' + + // Limit resources so that this can run on GitHub Actions + max_cpus = 2 + max_memory = '6.GB' + max_time = '6.h' + + // Input data + input = 'https://raw.githubusercontent.com/nf-core/test-datasets/reportho/testdata/samplesheet/samplesheet_fasta.csv' + + // Other parameters + uniprot_query = false + skip_eggnog = true + min_score = 3 + skip_iqtree = true + fastme_bootstrap = 0 +} + diff --git a/nextflow.config b/nextflow.config index 6c195f4..3a86815 100644 --- a/nextflow.config +++ b/nextflow.config @@ -185,8 +185,9 @@ profiles { executor.cpus = 4 executor.memory = 8.GB } - test { includeConfig 'conf/test.config' } - test_full { includeConfig 'conf/test_full.config' } + test { includeConfig 'conf/test.config' } + test_fasta { includeConfig 'conf/test_fasta.config' } + test_full { includeConfig 'conf/test_full.config' } } // Set default registry for Apptainer, Docker, Podman and Singularity independent of -profile @@ -199,7 +200,7 @@ singularity.registry = 'quay.io' // Nextflow plugins plugins { - id 'nf-validation@1.1.3' // Validation of pipeline parameters and creation of an input channel from a sample sheet + id 'nf-schema@2.0.0' // Validation of pipeline parameters and creation of an input channel from a sample sheet } // Export these variables to prevent local Python/R libraries from conflicting with those in the container From fe1ca80eda995497ebac48165a24df0073af4908 Mon Sep 17 00:00:00 2001 From: JoseEspinosa Date: Wed, 8 May 2024 18:25:15 +0200 Subject: [PATCH 04/23] Avoid error if dict key not set --- bin/fetch_oma_by_sequence.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/bin/fetch_oma_by_sequence.py b/bin/fetch_oma_by_sequence.py index eeab2ba..3f176fc 100755 --- a/bin/fetch_oma_by_sequence.py +++ b/bin/fetch_oma_by_sequence.py @@ -15,6 +15,7 @@ def main() -> None: raise ValueError("Not enough arguments. Usage: fetch_oma_by_sequence.py ") seqs = SeqIO.parse(sys.argv[1], "fasta") + seq = next(seqs).seq # Only use the first sequence, ignore all others @@ -30,12 +31,12 @@ def main() -> None: # Find the main isoform for it in json["targets"]: - if it["is_main_isoform"]: - entry = it - break + if it["is_main_isoform"]: + entry = it + break # Write exact match status - if entry["identified_by"] == "exact match": + if entry.get("identified_by") is "exact match": print("true", file=open(sys.argv[4], 'w')) else: print("false", file=open(sys.argv[4], 'w')) @@ -53,6 +54,7 @@ def main() -> None: raise ValueError("Isoform not found") print(entry["canonicalid"], file=open(sys.argv[2], "w")) + print(entry["species"]["taxon_id"], file=open(sys.argv[3], "w")) From 38bd3ee89c4f2aa959f41a149ddfa2fbdb668602 Mon Sep 17 00:00:00 2001 From: JoseEspinosa Date: Wed, 8 May 2024 18:25:32 +0200 Subject: [PATCH 05/23] Get rid of parameter from module --- conf/modules.config | 1 + modules/local/fetch_sequences_online.nf | 7 ++++--- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index f7989d5..f47004b 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -126,6 +126,7 @@ process { // ---------------------- withName: 'FETCH_SEQUENCES_ONLINE' { + ext.args = { params.uniprot_query ? "" : "cat ${query_fasta} >> ${meta.id}_orthologs.fa" } publishDir = [ path: { "${params.outdir}/sequences" }, mode: params.publish_dir_mode, diff --git a/modules/local/fetch_sequences_online.nf b/modules/local/fetch_sequences_online.nf index eec8581..304ddd0 100644 --- a/modules/local/fetch_sequences_online.nf +++ b/modules/local/fetch_sequences_online.nf @@ -10,6 +10,7 @@ process FETCH_SEQUENCES_ONLINE { input: tuple val(meta), path(ids), path(query_fasta) + output: tuple val(meta), path("*_orthologs.fa") , emit: fasta tuple val(meta), path("*_seq_hits.txt") , emit: hits @@ -20,11 +21,11 @@ process FETCH_SEQUENCES_ONLINE { task.ext.when == null || task.ext.when script: - prefix = task.ext.prefix ?: meta.id - add_query = params.uniprot_query ? "" : "cat $query_fasta >> ${prefix}_orthologs.fa" + def args = task.ext.args ?: '' + prefix = task.ext.prefix ?: meta.id """ fetch_sequences.py $ids $prefix > ${prefix}_orthologs.fa - $add_query + $args cat <<- END_VERSIONS > versions.yml "${task.process}": From 7c48293786c9e7659fcb9ae7215c2f82bf1d3b11 Mon Sep 17 00:00:00 2001 From: JoseEspinosa Date: Wed, 8 May 2024 18:35:49 +0200 Subject: [PATCH 06/23] Make lint happy --- bin/fetch_oma_by_sequence.py | 2 +- subworkflows/local/get_orthologs.nf | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/bin/fetch_oma_by_sequence.py b/bin/fetch_oma_by_sequence.py index 3f176fc..af35dd6 100755 --- a/bin/fetch_oma_by_sequence.py +++ b/bin/fetch_oma_by_sequence.py @@ -15,7 +15,7 @@ def main() -> None: raise ValueError("Not enough arguments. Usage: fetch_oma_by_sequence.py ") seqs = SeqIO.parse(sys.argv[1], "fasta") - + seq = next(seqs).seq # Only use the first sequence, ignore all others diff --git a/subworkflows/local/get_orthologs.nf b/subworkflows/local/get_orthologs.nf index d151ecc..a8cfdda 100644 --- a/subworkflows/local/get_orthologs.nf +++ b/subworkflows/local/get_orthologs.nf @@ -44,7 +44,7 @@ workflow GET_ORTHOLOGS { ch_versions .mix(IDENTIFY_SEQ_ONLINE.out.versions) .set { ch_versions } - } + } else { WRITE_SEQINFO ( ch_samplesheet @@ -78,7 +78,7 @@ workflow GET_ORTHOLOGS { ch_versions .mix(FETCH_OMA_GROUP_LOCAL.out.versions) .set { ch_versions } - } + } else { FETCH_OMA_GROUP_ONLINE ( ch_query From ca1906836ad048c4f8127edf78013079b15ef6a5 Mon Sep 17 00:00:00 2001 From: Jose Espinosa-Carrasco Date: Thu, 9 May 2024 11:36:13 +0200 Subject: [PATCH 07/23] Update bin/fetch_oma_by_sequence.py --- bin/fetch_oma_by_sequence.py | 1 - 1 file changed, 1 deletion(-) diff --git a/bin/fetch_oma_by_sequence.py b/bin/fetch_oma_by_sequence.py index 637db01..f500eb1 100755 --- a/bin/fetch_oma_by_sequence.py +++ b/bin/fetch_oma_by_sequence.py @@ -15,7 +15,6 @@ def main() -> None: raise ValueError("Not enough arguments. Usage: fetch_oma_by_sequence.py ") seqs = SeqIO.parse(sys.argv[1], "fasta") - seq = next(seqs).seq # Only use the first sequence, ignore all others From 2f2eaf006d52d2c5ef5af0fab81b5621530a0e70 Mon Sep 17 00:00:00 2001 From: Jose Espinosa-Carrasco Date: Thu, 9 May 2024 11:36:29 +0200 Subject: [PATCH 08/23] Update bin/fetch_oma_by_sequence.py --- bin/fetch_oma_by_sequence.py | 1 - 1 file changed, 1 deletion(-) diff --git a/bin/fetch_oma_by_sequence.py b/bin/fetch_oma_by_sequence.py index f500eb1..a71717f 100755 --- a/bin/fetch_oma_by_sequence.py +++ b/bin/fetch_oma_by_sequence.py @@ -54,7 +54,6 @@ def main() -> None: raise ValueError("Isoform not found") print(entry["canonicalid"], file=open(sys.argv[2], "w")) - print(entry["species"]["taxon_id"], file=open(sys.argv[3], "w")) From 15aa8f61aae1ce1cc3a4bd4550e060f36488045b Mon Sep 17 00:00:00 2001 From: JoseEspinosa Date: Fri, 10 May 2024 18:08:54 +0200 Subject: [PATCH 09/23] Branch depending on whether uniprot_id or fasta provided --- conf/modules.config | 1 - main.nf | 11 +- modules/local/dump_params.nf | 2 - modules/local/fetch_sequences_online.nf | 7 +- subworkflows/local/fetch_sequences.nf | 8 +- subworkflows/local/get_orthologs.nf | 125 +++++------------- subworkflows/local/report.nf | 10 +- .../utils_nfcore_reportho_pipeline/main.nf | 20 +-- workflows/reportho.nf | 51 +++---- 9 files changed, 83 insertions(+), 152 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index f47004b..f7989d5 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -126,7 +126,6 @@ process { // ---------------------- withName: 'FETCH_SEQUENCES_ONLINE' { - ext.args = { params.uniprot_query ? "" : "cat ${query_fasta} >> ${meta.id}_orthologs.fa" } publishDir = [ path: { "${params.outdir}/sequences" }, mode: params.publish_dir_mode, diff --git a/main.nf b/main.nf index a7e69c2..a810341 100644 --- a/main.nf +++ b/main.nf @@ -33,7 +33,8 @@ include { PIPELINE_COMPLETION } from './subworkflows/local/utils_nfcore_repo workflow NFCORE_REPORTHO { take: - samplesheet // channel: samplesheet read in from --input + samplesheet_query // channel: samplesheet read in from --input with query + samplesheet_fasta // channel: samplesheet read in from --input with fasta main: @@ -41,7 +42,8 @@ workflow NFCORE_REPORTHO { // WORKFLOW: Run pipeline // REPORTHO ( - samplesheet + samplesheet_query, + samplesheet_fasta, ) // emit: @@ -70,12 +72,13 @@ workflow { params.outdir, params.input ) - + // // WORKFLOW: Run main workflow // NFCORE_REPORTHO ( - PIPELINE_INITIALISATION.out.samplesheet + PIPELINE_INITIALISATION.out.samplesheet_query, + PIPELINE_INITIALISATION.out.samplesheet_fasta, ) // diff --git a/modules/local/dump_params.nf b/modules/local/dump_params.nf index de9747b..2b4712d 100644 --- a/modules/local/dump_params.nf +++ b/modules/local/dump_params.nf @@ -8,7 +8,6 @@ process DUMP_PARAMS { input: tuple val(meta), path(exact) - val uniprot_query val use_structures val use_centroid val min_score @@ -26,7 +25,6 @@ process DUMP_PARAMS { """ cat <<- END_PARAMS > params.yml id: ${meta.id} - uniprot_query: ${uniprot_query} exact_match: \$(cat $exact) use_structures: ${use_structures} use_centroid: ${use_centroid} diff --git a/modules/local/fetch_sequences_online.nf b/modules/local/fetch_sequences_online.nf index 304ddd0..b95be8f 100644 --- a/modules/local/fetch_sequences_online.nf +++ b/modules/local/fetch_sequences_online.nf @@ -10,7 +10,6 @@ process FETCH_SEQUENCES_ONLINE { input: tuple val(meta), path(ids), path(query_fasta) - output: tuple val(meta), path("*_orthologs.fa") , emit: fasta tuple val(meta), path("*_seq_hits.txt") , emit: hits @@ -21,11 +20,11 @@ process FETCH_SEQUENCES_ONLINE { task.ext.when == null || task.ext.when script: - def args = task.ext.args ?: '' - prefix = task.ext.prefix ?: meta.id + def prefix = task.ext.prefix ?: meta.id + def add_query = query_fasta == [] ? "" : "cat $query_fasta >> ${prefix}_orthologs.fa" """ fetch_sequences.py $ids $prefix > ${prefix}_orthologs.fa - $args + $add_query cat <<- END_VERSIONS > versions.yml "${task.process}": diff --git a/subworkflows/local/fetch_sequences.nf b/subworkflows/local/fetch_sequences.nf index bb03048..0c441dd 100644 --- a/subworkflows/local/fetch_sequences.nf +++ b/subworkflows/local/fetch_sequences.nf @@ -2,12 +2,14 @@ include { FETCH_SEQUENCES_ONLINE } from "../../modules/local/fetch_sequences_onl workflow FETCH_SEQUENCES { take: - ch_idlist - ch_query_fasta + ch_id_list + ch_query main: + ch_id_list + .join(ch_query) + .set { ch_input } - ch_input = params.uniprot_query ? ch_idlist.map { it -> [it[0], it[1], []]} : ch_idlist.join(ch_query_fasta) FETCH_SEQUENCES_ONLINE ( ch_input ) diff --git a/subworkflows/local/get_orthologs.nf b/subworkflows/local/get_orthologs.nf index a8cfdda..55563eb 100644 --- a/subworkflows/local/get_orthologs.nf +++ b/subworkflows/local/get_orthologs.nf @@ -19,44 +19,31 @@ include { CSVTK_CONCAT as MERGE_STATS } from "../../modules/nf-core/csvtk/conca workflow GET_ORTHOLOGS { take: - ch_samplesheet + ch_samplesheet_query + ch_samplesheet_fasta main: - ch_versions = Channel.empty() - ch_queryid = params.uniprot_query ? ch_samplesheet.map { it[1] } : ch_samplesheet.map { it[0].id } ch_orthogroups = Channel.empty() // Preprocessing - find the ID and taxid of the query sequences - if (!params.uniprot_query) { - ch_samplesheet - .map { it -> [it[0], file(it[1])] } - .set { ch_inputfile } - - - IDENTIFY_SEQ_ONLINE ( - ch_inputfile - ) - - IDENTIFY_SEQ_ONLINE.out.seqinfo - .set { ch_query } + ch_samplesheet_fasta + .map { it -> [it[0], file(it[1])] } + .set { ch_fasta } - ch_versions - .mix(IDENTIFY_SEQ_ONLINE.out.versions) - .set { ch_versions } - } - else { - WRITE_SEQINFO ( - ch_samplesheet - ) + IDENTIFY_SEQ_ONLINE ( + ch_fasta + ) - WRITE_SEQINFO.out.seqinfo - .set { ch_query } + ch_query = IDENTIFY_SEQ_ONLINE.out.seqinfo + ch_versions = ch_versions.mix(IDENTIFY_SEQ_ONLINE.out.versions) + + WRITE_SEQINFO ( + ch_samplesheet_query + ) - ch_versions - .mix(WRITE_SEQINFO.out.versions) - .set { ch_versions } - } + ch_query = IDENTIFY_SEQ_ONLINE.out.seqinfo.mix(WRITE_SEQINFO.out.seqinfo) + ch_versions = ch_versions.mix(WRITE_SEQINFO.out.versions) // Ortholog fetching @@ -75,9 +62,7 @@ workflow GET_ORTHOLOGS { .mix(FETCH_OMA_GROUP_LOCAL.out.oma_group) .set { ch_orthogroups } - ch_versions - .mix(FETCH_OMA_GROUP_LOCAL.out.versions) - .set { ch_versions } + ch_versions = ch_versions.mix(FETCH_OMA_GROUP_LOCAL.out.versions) } else { FETCH_OMA_GROUP_ONLINE ( @@ -88,9 +73,7 @@ workflow GET_ORTHOLOGS { .mix(FETCH_OMA_GROUP_ONLINE.out.oma_group) .set { ch_orthogroups } - ch_versions - .mix(FETCH_OMA_GROUP_ONLINE.out.versions) - .set { ch_versions } + ch_versions = ch_versions.mix(FETCH_OMA_GROUP_ONLINE.out.versions) } // Panther if (params.local_databases) { @@ -103,9 +86,7 @@ workflow GET_ORTHOLOGS { .mix(FETCH_PANTHER_GROUP_LOCAL.out.panther_group) .set { ch_orthogroups } - ch_versions - .mix(FETCH_PANTHER_GROUP_LOCAL.out.versions) - .set { ch_versions } + ch_versions = ch_versions.mix(FETCH_PANTHER_GROUP_LOCAL.out.versions) } else { FETCH_PANTHER_GROUP_ONLINE ( ch_query @@ -115,9 +96,7 @@ workflow GET_ORTHOLOGS { .mix(FETCH_PANTHER_GROUP_ONLINE.out.panther_group) .set { ch_orthogroups } - ch_versions - .mix(FETCH_PANTHER_GROUP_ONLINE.out.versions) - .set { ch_versions } + ch_versions = ch_versions.mix(FETCH_PANTHER_GROUP_ONLINE.out.versions) } // OrthoInspector FETCH_INSPECTOR_GROUP_ONLINE ( @@ -128,10 +107,8 @@ workflow GET_ORTHOLOGS { ch_orthogroups .mix(FETCH_INSPECTOR_GROUP_ONLINE.out.inspector_group) .set { ch_orthogroups } - - ch_versions - .mix(FETCH_INSPECTOR_GROUP_ONLINE.out.versions) - .set { ch_versions } + + ch_versions = ch_versions.mix(FETCH_INSPECTOR_GROUP_ONLINE.out.versions) FETCH_EGGNOG_GROUP_LOCAL ( ch_query, @@ -143,9 +120,7 @@ workflow GET_ORTHOLOGS { .mix(FETCH_EGGNOG_GROUP_LOCAL.out.eggnog_group) .set { ch_orthogroups } - ch_versions - .mix(FETCH_EGGNOG_GROUP_LOCAL.out.versions) - .set { ch_versions } + ch_versions = ch_versions.mix(FETCH_EGGNOG_GROUP_LOCAL.out.versions) } else { // online/local separation is used // local only @@ -163,9 +138,7 @@ workflow GET_ORTHOLOGS { .mix(FETCH_OMA_GROUP_LOCAL.out.oma_group) .set { ch_orthogroups } - ch_versions - .mix(FETCH_OMA_GROUP_LOCAL.out.versions) - .set { ch_versions } + ch_versions = ch_versions.mix(FETCH_OMA_GROUP_LOCAL.out.versions) } if (!params.skip_panther) { @@ -178,9 +151,7 @@ workflow GET_ORTHOLOGS { .mix(FETCH_PANTHER_GROUP_LOCAL.out.panther_group) .set { ch_orthogroups } - ch_versions - .mix(FETCH_PANTHER_GROUP_LOCAL.out.versions) - .set { ch_versions } + ch_versions = ch_versions.mix(FETCH_PANTHER_GROUP_LOCAL.out.versions) } if(!params.skip_eggnog) { @@ -194,10 +165,7 @@ workflow GET_ORTHOLOGS { .mix(FETCH_EGGNOG_GROUP_LOCAL.out.eggnog_group) .set { ch_orthogroups } - ch_versions - .mix(FETCH_EGGNOG_GROUP_LOCAL.out.versions) - .set { ch_versions } - + ch_versions = ch_versions.mix(FETCH_EGGNOG_GROUP_LOCAL.out.versions) } } else { // online only @@ -210,10 +178,7 @@ workflow GET_ORTHOLOGS { .mix(FETCH_OMA_GROUP_ONLINE.out.oma_group) .set { ch_orthogroups } - ch_versions - .mix(FETCH_OMA_GROUP_ONLINE.out.versions) - .set { ch_versions } - + ch_versions = ch_versions.mix(FETCH_OMA_GROUP_ONLINE.out.versions) } if (!params.skip_panther) { FETCH_PANTHER_GROUP_ONLINE ( @@ -224,9 +189,7 @@ workflow GET_ORTHOLOGS { .mix(FETCH_PANTHER_GROUP_ONLINE.out.panther_group) .set { ch_orthogroups } - ch_versions - .mix(FETCH_PANTHER_GROUP_ONLINE.out.versions) - .set { ch_versions } + ch_versions = ch_versions.mix(FETCH_PANTHER_GROUP_ONLINE.out.versions) } if (!params.skip_orthoinspector) { FETCH_INSPECTOR_GROUP_ONLINE ( @@ -238,9 +201,7 @@ workflow GET_ORTHOLOGS { .mix(FETCH_INSPECTOR_GROUP_ONLINE.out.inspector_group) .set { ch_orthogroups } - ch_versions - .mix(FETCH_INSPECTOR_GROUP_ONLINE.out.versions) - .set { ch_versions } + ch_versions = ch_versions.mix(FETCH_INSPECTOR_GROUP_ONLINE.out.versions) } } } @@ -251,9 +212,7 @@ workflow GET_ORTHOLOGS { ch_orthogroups.groupTuple() ) - ch_versions - .mix(MERGE_CSV.out.versions) - .set { ch_versions } + ch_versions = ch_versions.mix(MERGE_CSV.out.versions) // Scoring and filtering @@ -261,9 +220,7 @@ workflow GET_ORTHOLOGS { MERGE_CSV.out.csv ) - ch_versions - .mix(MAKE_SCORE_TABLE.out.versions) - .set { ch_versions } + ch_versions = ch_versions.mix(MAKE_SCORE_TABLE.out.versions) ch_forfilter = MAKE_SCORE_TABLE.out.score_table .combine(ch_query, by: 0) @@ -275,9 +232,7 @@ workflow GET_ORTHOLOGS { params.min_score ) - ch_versions - .mix(FILTER_HITS.out.versions) - .set { ch_versions } + ch_versions = ch_versions.mix(FILTER_HITS.out.versions) // Plotting @@ -294,9 +249,7 @@ workflow GET_ORTHOLOGS { ch_vennplot = PLOT_ORTHOLOGS.out.venn ch_jaccardplot = PLOT_ORTHOLOGS.out.jaccard - ch_versions - .mix(PLOT_ORTHOLOGS.out.versions) - .set { ch_versions } + ch_versions = ch_versions.mix(PLOT_ORTHOLOGS.out.versions) } // Stats @@ -305,17 +258,13 @@ workflow GET_ORTHOLOGS { MAKE_SCORE_TABLE.out.score_table ) - ch_versions - .mix(MAKE_STATS.out.versions) - .set { ch_versions } + ch_versions = ch_versions.mix(MAKE_STATS.out.versions) STATS2CSV( MAKE_STATS.out.stats ) - ch_versions - .mix(STATS2CSV.out.versions) - .set { ch_versions } + ch_versions = ch_versions.mix(STATS2CSV.out.versions) ch_stats = STATS2CSV.out.csv .collect { it[1] } @@ -327,9 +276,7 @@ workflow GET_ORTHOLOGS { "csv" ) - ch_versions - .mix(MERGE_STATS.out.versions) - .set { ch_versions } + ch_versions = ch_versions.mix(MERGE_STATS.out.versions) ch_versions .collectFile(name: "get_orthologs_versions.yml", sort: true, newLine: true) diff --git a/subworkflows/local/report.nf b/subworkflows/local/report.nf index 47e061d..b3c60ed 100644 --- a/subworkflows/local/report.nf +++ b/subworkflows/local/report.nf @@ -5,7 +5,6 @@ include { CONVERT_FASTA } from "../../modules/local/convert_fasta" workflow REPORT { take: - uniprot_query use_structures use_centroid min_score @@ -52,7 +51,6 @@ workflow REPORT { DUMP_PARAMS( ch_seqinfo.map { [it[0], it[3]] }, - params.uniprot_query, params.use_structures, params.use_centroid, params.min_score, @@ -66,9 +64,7 @@ workflow REPORT { ch_fasta = CONVERT_FASTA.out.fasta - ch_versions - .mix(CONVERT_FASTA.out.versions) - .set { ch_versions } + ch_versions = ch_versions.mix(CONVERT_FASTA.out.versions) } ch_forreport = ch_seqinfo @@ -91,9 +87,7 @@ workflow REPORT { ch_forreport ) - ch_versions - .mix(MAKE_REPORT.out.versions) - .set { ch_versions } + ch_versions = ch_versions.mix(MAKE_REPORT.out.versions) emit: versions = ch_versions diff --git a/subworkflows/local/utils_nfcore_reportho_pipeline/main.nf b/subworkflows/local/utils_nfcore_reportho_pipeline/main.nf index 976b779..675e66d 100644 --- a/subworkflows/local/utils_nfcore_reportho_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_reportho_pipeline/main.nf @@ -74,23 +74,23 @@ workflow PIPELINE_INITIALISATION { ) // - // Create channel from input file provided through params.input - // + // Create channel from input file provided through params.input and check for query + // Channel .fromSamplesheet("input") - .map { + .branch { id, query, fasta -> - if (query) { - [ id, query ] - } else { - [ id, fasta ] - } + query: query != [] + return [ id, query ] + fasta: query == [] + return [ id, fasta ] } .set { ch_samplesheet } emit: - samplesheet = ch_samplesheet - versions = ch_versions + samplesheet_query = ch_samplesheet.query + samplesheet_fasta = ch_samplesheet.fasta + versions = ch_versions } /* diff --git a/workflows/reportho.nf b/workflows/reportho.nf index b33a639..464f257 100644 --- a/workflows/reportho.nf +++ b/workflows/reportho.nf @@ -27,44 +27,42 @@ include { REPORT } from '../subworkflows/local/report' workflow REPORTHO { take: - ch_samplesheet // channel: samplesheet read in from --input + ch_samplesheet_query // channel: samplesheet query + ch_samplesheet_fasta // channel: samplesheet fasta main: ch_versions = Channel.empty() ch_multiqc_files = Channel.empty() - - ch_query_fasta = params.uniprot_query ? ch_samplesheet.map { [it[0], []] } : ch_samplesheet.map { [it[0], file(it[1])] } + ch_fasta_query = ch_samplesheet_query.map { [it[0], []] }.mix(ch_samplesheet_fasta.map { [it[0], file(it[1])] }) GET_ORTHOLOGS ( - ch_samplesheet + ch_samplesheet_query, + ch_samplesheet_fasta ) - ch_versions - .mix(GET_ORTHOLOGS.out.versions) - .set { ch_versions } + ch_versions = ch_versions.mix(GET_ORTHOLOGS.out.versions) + ch_samplesheet = ch_samplesheet_query.mix (ch_samplesheet_fasta) - ch_seqhits = ch_samplesheet.map { [it[0], []] } - ch_seqmisses = ch_samplesheet.map { [it[0], []] } - ch_strhits = ch_samplesheet.map { [it[0], []] } - ch_strmisses = ch_samplesheet.map { [it[0], []] } - ch_alignment = ch_samplesheet.map { [it[0], []] } - ch_iqtree = ch_samplesheet.map { [it[0], []] } - ch_fastme = ch_samplesheet.map { [it[0], []] } + ch_seqhits = ch_samplesheet.map { [it[0], []] } + ch_seqmisses = ch_samplesheet.map { [it[0], []] } + ch_strhits = ch_samplesheet.map { [it[0], []] } + ch_strmisses = ch_samplesheet.map { [it[0], []] } + ch_alignment = ch_samplesheet.map { [it[0], []] } + ch_iqtree = ch_samplesheet.map { [it[0], []] } + ch_fastme = ch_samplesheet.map { [it[0], []] } if (!params.skip_downstream) { FETCH_SEQUENCES ( GET_ORTHOLOGS.out.orthologs, - ch_query_fasta + ch_fasta_query ) ch_seqhits = FETCH_SEQUENCES.out.hits ch_seqmisses = FETCH_SEQUENCES.out.misses - ch_versions - .mix(FETCH_SEQUENCES.out.versions) - .set { ch_versions } + ch_versions = ch_versions.mix(FETCH_SEQUENCES.out.versions) if (params.use_structures) { FETCH_STRUCTURES ( @@ -75,9 +73,7 @@ workflow REPORTHO { ch_strmisses = FETCH_STRUCTURES.out.misses - ch_versions - .mix(FETCH_STRUCTURES.out.versions) - .set { ch_versions } + ch_versions = ch_versions.mix(FETCH_STRUCTURES.out.versions) } ch_structures = params.use_structures ? FETCH_STRUCTURES.out.structures : Channel.empty() @@ -89,9 +85,7 @@ workflow REPORTHO { ch_alignment = ALIGN.out.alignment - ch_versions - .mix(ALIGN.out.versions) - .set { ch_versions } + ch_versions = ch_versions.mix(ALIGN.out.versions) MAKE_TREES ( ALIGN.out.alignment @@ -100,14 +94,11 @@ workflow REPORTHO { ch_iqtree = MAKE_TREES.out.mlplot ch_fastme = MAKE_TREES.out.meplot - ch_versions - .mix(MAKE_TREES.out.versions) - .set { ch_versions } + ch_versions = ch_versions.mix(MAKE_TREES.out.versions) } if(!params.skip_report) { REPORT ( - params.uniprot_query, params.use_structures, params.use_centroid, params.min_score, @@ -130,9 +121,7 @@ workflow REPORTHO { ch_fastme ) - ch_versions - .mix(REPORT.out.versions) - .set { ch_versions } + ch_versions = ch_versions.mix(REPORT.out.versions) } // From bb04c563c6497f70bae73a30909817ddaec000be Mon Sep 17 00:00:00 2001 From: JoseEspinosa Date: Fri, 10 May 2024 18:09:14 +0200 Subject: [PATCH 10/23] Update tests --- conf/test.config | 1 - conf/test_fasta.config | 1 - conf/test_full.config | 1 - 3 files changed, 3 deletions(-) diff --git a/conf/test.config b/conf/test.config index 2a67104..7de21c0 100644 --- a/conf/test.config +++ b/conf/test.config @@ -23,7 +23,6 @@ params { input = 'https://raw.githubusercontent.com/nf-core/test-datasets/reportho/testdata/samplesheet/samplesheet.csv' // Other parameters - uniprot_query = true skip_eggnog = true min_score = 3 skip_iqtree = true diff --git a/conf/test_fasta.config b/conf/test_fasta.config index c81035c..e9b009f 100644 --- a/conf/test_fasta.config +++ b/conf/test_fasta.config @@ -23,7 +23,6 @@ params { input = 'https://raw.githubusercontent.com/nf-core/test-datasets/reportho/testdata/samplesheet/samplesheet_fasta.csv' // Other parameters - uniprot_query = false skip_eggnog = true min_score = 3 skip_iqtree = true diff --git a/conf/test_full.config b/conf/test_full.config index 2f59347..68c6bb4 100644 --- a/conf/test_full.config +++ b/conf/test_full.config @@ -18,7 +18,6 @@ params { input = 'https://raw.githubusercontent.com/nf-core/test-datasets/reportho/testdata/samplesheet/samplesheet.csv' // Other parameters - uniprot_query = true eggnog_path = 'http://eggnog5.embl.de/download/eggnog_5.0/per_tax_level/1/1_members.tsv.gz' eggnog_idmap_path = "http://eggnog5.embl.de/download/eggnog_5.0/id_mappings/uniprot/latest.Eukaryota.tsv.gz" min_score = 3 From a376ae3d74839c11d12c2a03b8f86dcd9683c7ef Mon Sep 17 00:00:00 2001 From: JoseEspinosa Date: Fri, 10 May 2024 18:09:55 +0200 Subject: [PATCH 11/23] Get rid of leftovers of the uniprot_query parameter --- nextflow.config | 1 - nextflow_schema.json | 6 ------ 2 files changed, 7 deletions(-) diff --git a/nextflow.config b/nextflow.config index 03b1861..41dc4b7 100644 --- a/nextflow.config +++ b/nextflow.config @@ -10,7 +10,6 @@ params { // Input options input = null - uniprot_query = false // Ortholog options use_all = false diff --git a/nextflow_schema.json b/nextflow_schema.json index 304443b..ff34b8d 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -23,12 +23,6 @@ "help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row. See [usage docs](https://nf-co.re/reportho/usage#samplesheet-input).", "fa_icon": "fas fa-file-csv" }, - "uniprot_query": { - "type": "boolean", - "description": "The input contains a Uniprot ID as query.", - "help_text": "If the input file contains a Uniprot ID as query, set this parameter to `true`.", - "fa_icon": "fas fa-database" - }, "outdir": { "type": "string", "format": "directory-path", From f1f25bdf18407979041568f88a3be236ac251517 Mon Sep 17 00:00:00 2001 From: JoseEspinosa Date: Fri, 10 May 2024 18:10:10 +0200 Subject: [PATCH 12/23] Update docs --- README.md | 13 ++++++------- docs/usage.md | 17 +++++++++-------- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/README.md b/README.md index f363652..9ee5128 100644 --- a/README.md +++ b/README.md @@ -44,21 +44,20 @@ Steps that follow can be skipped with `--skip_downstream` in batch analysis. First, prepare a samplesheet with your input data that looks as follows: -`samplesheet.csv`: - -```csv -id,query +```csv title="samplesheet.csv" +id,fasta BicD2,data/bicd2.fasta ``` -or: +or if you know the UniProt ID of the protein you can provide it directly: -```csv +```csv title="samplesheet.csv" id,query BicD2,Q8TD16 ``` -If using the latter format, you must set `--uniprot_query` to true. +> [!NOTE] +> If you provide both a FASTA file and a UniProt ID only the later will be used. Now, you can run the pipeline using: diff --git a/docs/usage.md b/docs/usage.md index 33eaba7..b431ed7 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -18,26 +18,27 @@ You will need to create a samplesheet with information about the samples you wou ### Full samplesheet -The samplesheet can have as many columns as you desire, however, there is a strict requirement for the first 2 columns to match those defined in the table below. +The samplesheet can have as many columns as you desire, however, there is a strict requirement for the first 2 columns to match those defined in the tables below. -A final samplesheet file may look something like the one below, with `--uniprot_query` enabled: +A final samplesheet file may look something like the one below: ```csv title="samplesheet.csv" id,query BicD2,Q8TD16 ``` -or the one below, otherwise: +or the one below, if you provide the sequence of the protein in FASTA format: ```csv title="samplesheet.csv" -id,query +id,fasta BicD2,/home/myuser/data/bicd2.fa ``` -| Column | Description | -| ------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `id` | User-defined identifier. It is used to identify output files for the protein. Can be anything descriptive, as long as it does not contain spaces. | -| `query` | The query of the user-specified type. If `--uniprot_query` is `true`, it should be a valid Uniprot accession. Otherwise, it should be a valid path to a FASTA file. | +| Column | Description | +| ------- | ------------------------------------------------------------------------------------------------------------------------------------------------- | +| `id` | User-defined identifier. It is used to identify output files for the protein. Can be anything descriptive, as long as it does not contain spaces. | +| `query` | The query of the user-specified type. It should be a valid Uniprot accession. | +| `fasta` | It should be a valid path to a FASTA file. | An [example samplesheet](../assets/samplesheet.csv) has been provided with the pipeline. From b8992468e53bcfcde48fc3c4c3f564c2292b14c2 Mon Sep 17 00:00:00 2001 From: JoseEspinosa Date: Fri, 10 May 2024 18:10:35 +0200 Subject: [PATCH 13/23] Do not use set for ch_versions --- subworkflows/local/align.nf | 13 ++++--------- subworkflows/local/make_trees.nf | 20 +++++--------------- 2 files changed, 9 insertions(+), 24 deletions(-) diff --git a/subworkflows/local/align.nf b/subworkflows/local/align.nf index 46c78b4..2459c65 100644 --- a/subworkflows/local/align.nf +++ b/subworkflows/local/align.nf @@ -25,9 +25,7 @@ workflow ALIGN { ch_for_filter ) - ch_versions - .mix(FILTER_FASTA.out.versions) - .set { ch_versions } + ch_versions = ch_versions.mix(FILTER_FASTA.out.versions) CREATE_TCOFFEETEMPLATE( ch_pdb @@ -52,9 +50,8 @@ workflow ALIGN { TCOFFEE_3DALIGN.out.alignment .set { ch_alignment } - ch_versions - .mix(TCOFFEE_3DALIGN.out.versions) - .set { ch_versions } + ch_versions = ch_versions.mix(TCOFFEE_3DALIGN.out.versions) + } else { TCOFFEE_ALIGN ( @@ -67,9 +64,7 @@ workflow ALIGN { TCOFFEE_ALIGN.out.alignment .set { ch_alignment } - ch_versions - .mix(TCOFFEE_ALIGN.out.versions) - .set { ch_versions } + ch_versions = ch_versions.mix(TCOFFEE_ALIGN.out.versions) } emit: diff --git a/subworkflows/local/make_trees.nf b/subworkflows/local/make_trees.nf index b4743a0..6f60967 100644 --- a/subworkflows/local/make_trees.nf +++ b/subworkflows/local/make_trees.nf @@ -24,9 +24,7 @@ workflow MAKE_TREES { ch_mltree = IQTREE.out.phylogeny - ch_versions - .mix(IQTREE.out.versions) - .set { ch_versions } + ch_versions = ch_versions.mix(IQTREE.out.versions) ch_mlplot = ch_alignment.map { [it[0], []] } @@ -38,9 +36,7 @@ workflow MAKE_TREES { ch_mlplot = PLOT_IQTREE.out.plot - ch_versions - .mix(PLOT_IQTREE.out.versions) - .set { ch_versions } + ch_versions = ch_versions.mix(PLOT_IQTREE.out.versions) } } @@ -50,9 +46,7 @@ workflow MAKE_TREES { ch_alignment ) - ch_versions - .mix(CONVERT_PHYLIP.out.versions) - .set { ch_versions } + ch_versions = ch_versions.mix(CONVERT_PHYLIP.out.versions) FASTME ( CONVERT_PHYLIP.out.phylip.map { [it[0], it[1], []] } @@ -60,9 +54,7 @@ workflow MAKE_TREES { ch_metree = FASTME.out.nwk - ch_versions - .mix(FASTME.out.versions) - .set { ch_versions } + ch_versions = ch_versions.mix(FASTME.out.versions) ch_meplot = ch_alignment.map { [it[0], []] } @@ -74,9 +66,7 @@ workflow MAKE_TREES { ch_meplot = PLOT_FASTME.out.plot - ch_versions - .mix(PLOT_FASTME.out.versions) - .set { ch_versions } + ch_versions = ch_versions.mix(PLOT_FASTME.out.versions) } } From 3f472ad86cbce1ceeb3885ef6425c14ce7780b6b Mon Sep 17 00:00:00 2001 From: JoseEspinosa Date: Fri, 10 May 2024 18:12:27 +0200 Subject: [PATCH 14/23] Add test_fasta to CI --- .github/workflows/ci.yml | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 32fb5b8..fb18a85 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -44,3 +44,32 @@ jobs: # Remember that you can parallelise this by using strategy.matrix run: | nextflow run ${GITHUB_WORKSPACE} -profile test,docker --outdir ./results + + test_fasta: + name: Run pipeline with test data with fasta files in samplesheet + # Only run on push if this is the nf-core dev branch (merged PRs) + if: "${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/reportho') }}" + runs-on: ubuntu-latest + strategy: + matrix: + NXF_VER: + - "23.04.0" + - "latest-everything" + steps: + - name: Check out pipeline code + uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4 + + - name: Install Nextflow + uses: nf-core/setup-nextflow@v2 + with: + version: "${{ matrix.NXF_VER }}" + + - name: Disk space cleanup + uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 + + - name: Run pipeline with test data + # TODO nf-core: You can customise CI pipeline run tests as required + # For example: adding multiple test runs with different parameters + # Remember that you can parallelise this by using strategy.matrix + run: | + nextflow run ${GITHUB_WORKSPACE} -profile test_fasta,docker --outdir ./results From 72f80aa2dd7fbc096a2d6891acddc19bee2c4cef Mon Sep 17 00:00:00 2001 From: JoseEspinosa Date: Fri, 10 May 2024 18:14:19 +0200 Subject: [PATCH 15/23] Make nf-core lint happy --- .nf-core.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.nf-core.yml b/.nf-core.yml index e0b85a7..13b10ff 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -1,2 +1,5 @@ repository_type: pipeline nf_core_version: "2.14.1" + +files_exist: + - lib/nfcore_external_java_deps.jar From 3490c4c87faef78eef76d2e56c346fdcf9b2d8a3 Mon Sep 17 00:00:00 2001 From: JoseEspinosa Date: Fri, 10 May 2024 18:19:20 +0200 Subject: [PATCH 16/23] Make prettier happy --- main.nf | 2 +- subworkflows/local/get_orthologs.nf | 4 ++-- subworkflows/local/utils_nfcore_reportho_pipeline/main.nf | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/main.nf b/main.nf index a810341..cb1dfd0 100644 --- a/main.nf +++ b/main.nf @@ -72,7 +72,7 @@ workflow { params.outdir, params.input ) - + // // WORKFLOW: Run main workflow // diff --git a/subworkflows/local/get_orthologs.nf b/subworkflows/local/get_orthologs.nf index 55563eb..1512675 100644 --- a/subworkflows/local/get_orthologs.nf +++ b/subworkflows/local/get_orthologs.nf @@ -37,7 +37,7 @@ workflow GET_ORTHOLOGS { ch_query = IDENTIFY_SEQ_ONLINE.out.seqinfo ch_versions = ch_versions.mix(IDENTIFY_SEQ_ONLINE.out.versions) - + WRITE_SEQINFO ( ch_samplesheet_query ) @@ -107,7 +107,7 @@ workflow GET_ORTHOLOGS { ch_orthogroups .mix(FETCH_INSPECTOR_GROUP_ONLINE.out.inspector_group) .set { ch_orthogroups } - + ch_versions = ch_versions.mix(FETCH_INSPECTOR_GROUP_ONLINE.out.versions) FETCH_EGGNOG_GROUP_LOCAL ( diff --git a/subworkflows/local/utils_nfcore_reportho_pipeline/main.nf b/subworkflows/local/utils_nfcore_reportho_pipeline/main.nf index 675e66d..44dc7eb 100644 --- a/subworkflows/local/utils_nfcore_reportho_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_reportho_pipeline/main.nf @@ -75,7 +75,7 @@ workflow PIPELINE_INITIALISATION { // // Create channel from input file provided through params.input and check for query - // + // Channel .fromSamplesheet("input") .branch { From 8fe4f82962495a45235fef67494be45762ac818b Mon Sep 17 00:00:00 2001 From: JoseEspinosa Date: Fri, 10 May 2024 18:26:53 +0200 Subject: [PATCH 17/23] Make nf-core lint happy (bug in tools until fixed) --- lib/nfcore_external_java_deps.jar | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 lib/nfcore_external_java_deps.jar diff --git a/lib/nfcore_external_java_deps.jar b/lib/nfcore_external_java_deps.jar new file mode 100644 index 0000000..e69de29 From b13725cc4a63caa63186bec731381df11c90a634 Mon Sep 17 00:00:00 2001 From: JoseEspinosa Date: Fri, 10 May 2024 18:28:14 +0200 Subject: [PATCH 18/23] Revert changes in .nf-core.yml --- .nf-core.yml | 3 --- 1 file changed, 3 deletions(-) diff --git a/.nf-core.yml b/.nf-core.yml index 13b10ff..e0b85a7 100644 --- a/.nf-core.yml +++ b/.nf-core.yml @@ -1,5 +1,2 @@ repository_type: pipeline nf_core_version: "2.14.1" - -files_exist: - - lib/nfcore_external_java_deps.jar From 6d270eaf88a03c905c1bb892aa4d47c31a21c961 Mon Sep 17 00:00:00 2001 From: JoseEspinosa Date: Fri, 10 May 2024 22:25:18 +0200 Subject: [PATCH 19/23] Assign ch_versions --- subworkflows/local/get_orthologs.nf | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/subworkflows/local/get_orthologs.nf b/subworkflows/local/get_orthologs.nf index 1fe3c9d..6634aaf 100644 --- a/subworkflows/local/get_orthologs.nf +++ b/subworkflows/local/get_orthologs.nf @@ -260,9 +260,7 @@ workflow GET_ORTHOLOGS { MERGE_CSV.out.csv ) - ch_versions - .mix(MAKE_HITS_TABLE.out.versions) - .set { ch_versions } + ch_versions = ch_versions.mix(MAKE_HITS_TABLE.out.versions) ch_hits = MAKE_HITS_TABLE.out.hits_table .collect { it[1] } @@ -274,9 +272,7 @@ workflow GET_ORTHOLOGS { "csv" ) - ch_versions - .mix(MERGE_HITS.out.versions) - .set { ch_versions } + ch_versions = ch_versions.mix(MERGE_HITS.out.versions) // Stats From b849f9225060bf06995f5cfde98dd30a9e6f00c0 Mon Sep 17 00:00:00 2001 From: Jose Espinosa-Carrasco Date: Mon, 13 May 2024 11:34:57 +0200 Subject: [PATCH 20/23] Fix tyop Co-authored-by: Igor Trujnara <53370556+itrujnara@users.noreply.github.com> --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 9ee5128..f761a37 100644 --- a/README.md +++ b/README.md @@ -57,7 +57,7 @@ BicD2,Q8TD16 ``` > [!NOTE] -> If you provide both a FASTA file and a UniProt ID only the later will be used. +> If you provide both a FASTA file and a UniProt ID only the latter will be used. Now, you can run the pipeline using: From 195ab8d1f553887dac4a34d9a4f26e271f771322 Mon Sep 17 00:00:00 2001 From: JoseEspinosa Date: Mon, 13 May 2024 11:53:13 +0200 Subject: [PATCH 21/23] Add samplesheet_fasta in assets --- assets/samplesheet_fasta.csv | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 assets/samplesheet_fasta.csv diff --git a/assets/samplesheet_fasta.csv b/assets/samplesheet_fasta.csv new file mode 100644 index 0000000..9cdb0c6 --- /dev/null +++ b/assets/samplesheet_fasta.csv @@ -0,0 +1,3 @@ +id,fasta +ste2,https://raw.githubusercontent.com/nf-core/test-datasets/reportho/testdata/sequences/ste2.fa +ste3,https://raw.githubusercontent.com/nf-core/test-datasets/reportho/testdata/sequences/ste3.fa From f276a8e3c1c18a4476b8e7bdc9f74b7d64e81670 Mon Sep 17 00:00:00 2001 From: JoseEspinosa Date: Mon, 13 May 2024 11:56:03 +0200 Subject: [PATCH 22/23] Rename samplesheet_fasta example --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index f761a37..63e8fac 100644 --- a/README.md +++ b/README.md @@ -44,7 +44,7 @@ Steps that follow can be skipped with `--skip_downstream` in batch analysis. First, prepare a samplesheet with your input data that looks as follows: -```csv title="samplesheet.csv" +```csv title="samplesheet_fasta.csv" id,fasta BicD2,data/bicd2.fasta ``` From cc50269d0f55ddbdbf206dfb85651b0888655ac8 Mon Sep 17 00:00:00 2001 From: Igor Trujnara <53370556+itrujnara@users.noreply.github.com> Date: Mon, 13 May 2024 11:56:16 +0200 Subject: [PATCH 23/23] Update docs/usage.md --- docs/usage.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/usage.md b/docs/usage.md index b431ed7..1b1ce30 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -40,7 +40,7 @@ BicD2,/home/myuser/data/bicd2.fa | `query` | The query of the user-specified type. It should be a valid Uniprot accession. | | `fasta` | It should be a valid path to a FASTA file. | -An [example samplesheet](../assets/samplesheet.csv) has been provided with the pipeline. +An [example Uniprot samplesheet](../assets/samplesheet.csv) and [example FASTA samplesheet](../assets/samplesheet_fasta.csv) has been provided with the pipeline. ## Running the pipeline