From 4aca65aa16b2780fe2eff5bfb4a08b98b97458b2 Mon Sep 17 00:00:00 2001
From: JoseEspinosa <kadomu@gmail.com>
Date: Wed, 8 May 2024 18:22:09 +0200
Subject: [PATCH 01/23] Add validation for fasta files

---
 assets/schema_input.json                       | 18 ++++++++++++++++--
 .../utils_nfcore_reportho_pipeline/main.nf     |  8 ++++++--
 2 files changed, 22 insertions(+), 4 deletions(-)

diff --git a/assets/schema_input.json b/assets/schema_input.json
index d80499c..55dd337 100644
--- a/assets/schema_input.json
+++ b/assets/schema_input.json
@@ -17,8 +17,22 @@
                 "type": "string",
                 "pattern": "^\\S+$",
                 "errorMessage": "A query must be provided"
+            },
+            "fasta": {
+                "type": "string",
+                "format": "file-path",
+                "exists": true,
+                "pattern": "^\\S+\\.fa(sta)?$",
+                "errorMessage": "Fasta file must be provided, cannot contain spaces and must have extension '.fa' or '.fasta'"
             }
+        }
+    },
+    "anyOf": [
+        {
+            "required": ["id", "query"]
         },
-        "required": ["id", "query"]
-    }
+        {
+            "required": ["id", "fasta"]
+        }
+    ]
 }
diff --git a/subworkflows/local/utils_nfcore_reportho_pipeline/main.nf b/subworkflows/local/utils_nfcore_reportho_pipeline/main.nf
index be134bc..b36a33c 100644
--- a/subworkflows/local/utils_nfcore_reportho_pipeline/main.nf
+++ b/subworkflows/local/utils_nfcore_reportho_pipeline/main.nf
@@ -79,8 +79,12 @@ workflow PIPELINE_INITIALISATION {
     Channel
         .fromSamplesheet("input")
         .map {
-            id, query ->
-                [ id, query ]
+            id, query, fasta ->
+                if (query) {
+                    [ id, query ]
+                } else {
+                    [ id, fasta ]
+                }
         }
         .set { ch_samplesheet }
 

From be8b29036d2da82560af8262b8cbd6b25469e344 Mon Sep 17 00:00:00 2001
From: JoseEspinosa <kadomu@gmail.com>
Date: Wed, 8 May 2024 18:24:18 +0200
Subject: [PATCH 02/23] Fix format

---
 modules/local/identify_seq_online.nf | 2 +-
 subworkflows/local/get_orthologs.nf  | 7 ++++---
 workflows/reportho.nf                | 8 ++++----
 3 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/modules/local/identify_seq_online.nf b/modules/local/identify_seq_online.nf
index 2ada143..719b325 100644
--- a/modules/local/identify_seq_online.nf
+++ b/modules/local/identify_seq_online.nf
@@ -21,7 +21,7 @@ process IDENTIFY_SEQ_ONLINE {
     prefix = task.ext.prefix ?: meta.id
     """
     fetch_oma_by_sequence.py $fasta id_raw.txt ${prefix}_taxid.txt ${prefix}_exact.txt
-    uniprotize_oma.py id_raw.txt > ${prefix}_id.txt
+    uniprotize_oma_online.py id_raw.txt > ${prefix}_id.txt
 
     cat <<- END_VERSIONS > versions.yml
     "${task.process}":
diff --git a/subworkflows/local/get_orthologs.nf b/subworkflows/local/get_orthologs.nf
index 86fb6c2..d151ecc 100644
--- a/subworkflows/local/get_orthologs.nf
+++ b/subworkflows/local/get_orthologs.nf
@@ -28,7 +28,6 @@ workflow GET_ORTHOLOGS {
     ch_orthogroups = Channel.empty()
 
     // Preprocessing - find the ID and taxid of the query sequences
-
     if (!params.uniprot_query) {
         ch_samplesheet
             .map { it -> [it[0], file(it[1])] }
@@ -45,7 +44,8 @@ workflow GET_ORTHOLOGS {
         ch_versions
             .mix(IDENTIFY_SEQ_ONLINE.out.versions)
             .set { ch_versions }
-    } else {
+    } 
+    else {
         WRITE_SEQINFO (
             ch_samplesheet
         )
@@ -78,7 +78,8 @@ workflow GET_ORTHOLOGS {
             ch_versions
                 .mix(FETCH_OMA_GROUP_LOCAL.out.versions)
                 .set { ch_versions }
-        } else {
+        } 
+        else {
             FETCH_OMA_GROUP_ONLINE (
                 ch_query
             )
diff --git a/workflows/reportho.nf b/workflows/reportho.nf
index 88328b4..b33a639 100644
--- a/workflows/reportho.nf
+++ b/workflows/reportho.nf
@@ -44,13 +44,13 @@ workflow REPORTHO {
         .mix(GET_ORTHOLOGS.out.versions)
         .set { ch_versions }
 
-    ch_seqhits = ch_samplesheet.map { [it[0], []] }
+    ch_seqhits   = ch_samplesheet.map { [it[0], []] }
     ch_seqmisses = ch_samplesheet.map { [it[0], []] }
-    ch_strhits = ch_samplesheet.map { [it[0], []] }
+    ch_strhits   = ch_samplesheet.map { [it[0], []] }
     ch_strmisses = ch_samplesheet.map { [it[0], []] }
     ch_alignment = ch_samplesheet.map { [it[0], []] }
-    ch_iqtree = ch_samplesheet.map { [it[0], []] }
-    ch_fastme = ch_samplesheet.map { [it[0], []] }
+    ch_iqtree    = ch_samplesheet.map { [it[0], []] }
+    ch_fastme    = ch_samplesheet.map { [it[0], []] }
 
     if (!params.skip_downstream) {
         FETCH_SEQUENCES (

From 053697608df1ea755e86f17e09e06fd3aa3a55cd Mon Sep 17 00:00:00 2001
From: JoseEspinosa <kadomu@gmail.com>
Date: Wed, 8 May 2024 18:24:41 +0200
Subject: [PATCH 03/23] Add test_fasta profile

---
 conf/test_fasta.config | 32 ++++++++++++++++++++++++++++++++
 nextflow.config        |  7 ++++---
 2 files changed, 36 insertions(+), 3 deletions(-)
 create mode 100644 conf/test_fasta.config

diff --git a/conf/test_fasta.config b/conf/test_fasta.config
new file mode 100644
index 0000000..c81035c
--- /dev/null
+++ b/conf/test_fasta.config
@@ -0,0 +1,32 @@
+/*
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    Nextflow config file for running minimal tests
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+    Defines input files and everything required to run a fast and simple pipeline test.
+
+    Use as follows:
+        nextflow run nf-core/reportho -profile test,<docker/singularity> --outdir <OUTDIR>
+
+----------------------------------------------------------------------------------------
+*/
+
+params {
+    config_profile_name        = 'Test profile'
+    config_profile_description = 'Minimal test dataset to check pipeline function'
+
+    // Limit resources so that this can run on GitHub Actions
+    max_cpus   = 2
+    max_memory = '6.GB'
+    max_time   = '6.h'
+
+    // Input data
+    input  = 'https://raw.githubusercontent.com/nf-core/test-datasets/reportho/testdata/samplesheet/samplesheet_fasta.csv'
+
+    // Other parameters
+    uniprot_query    = false
+    skip_eggnog      = true
+    min_score        = 3
+    skip_iqtree      = true
+    fastme_bootstrap = 0
+}
+
diff --git a/nextflow.config b/nextflow.config
index 6c195f4..3a86815 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -185,8 +185,9 @@ profiles {
         executor.cpus          = 4
         executor.memory        = 8.GB
     }
-    test      { includeConfig 'conf/test.config'      }
-    test_full { includeConfig 'conf/test_full.config' }
+    test       { includeConfig 'conf/test.config'       }
+    test_fasta { includeConfig 'conf/test_fasta.config' }
+    test_full  { includeConfig 'conf/test_full.config'  }
 }
 
 // Set default registry for Apptainer, Docker, Podman and Singularity independent of -profile
@@ -199,7 +200,7 @@ singularity.registry = 'quay.io'
 
 // Nextflow plugins
 plugins {
-    id 'nf-validation@1.1.3' // Validation of pipeline parameters and creation of an input channel from a sample sheet
+    id 'nf-schema@2.0.0' // Validation of pipeline parameters and creation of an input channel from a sample sheet
 }
 
 // Export these variables to prevent local Python/R libraries from conflicting with those in the container

From fe1ca80eda995497ebac48165a24df0073af4908 Mon Sep 17 00:00:00 2001
From: JoseEspinosa <kadomu@gmail.com>
Date: Wed, 8 May 2024 18:25:15 +0200
Subject: [PATCH 04/23] Avoid error if dict key not set

---
 bin/fetch_oma_by_sequence.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/bin/fetch_oma_by_sequence.py b/bin/fetch_oma_by_sequence.py
index eeab2ba..3f176fc 100755
--- a/bin/fetch_oma_by_sequence.py
+++ b/bin/fetch_oma_by_sequence.py
@@ -15,6 +15,7 @@ def main() -> None:
         raise ValueError("Not enough arguments. Usage: fetch_oma_by_sequence.py <fasta> <id_out> <taxid_out> <exact_out>")
 
     seqs = SeqIO.parse(sys.argv[1], "fasta")
+    
     seq = next(seqs).seq
 
     # Only use the first sequence, ignore all others
@@ -30,12 +31,12 @@ def main() -> None:
 
     # Find the main isoform
     for it in json["targets"]:
-            if it["is_main_isoform"]:
-                entry = it
-                break
+        if it["is_main_isoform"]:
+            entry = it
+            break
 
     # Write exact match status
-    if entry["identified_by"] == "exact match":
+    if entry.get("identified_by") is "exact match":
         print("true", file=open(sys.argv[4], 'w'))
     else:
         print("false", file=open(sys.argv[4], 'w'))
@@ -53,6 +54,7 @@ def main() -> None:
                 raise ValueError("Isoform not found")
 
     print(entry["canonicalid"], file=open(sys.argv[2], "w"))
+
     print(entry["species"]["taxon_id"], file=open(sys.argv[3], "w"))
 
 

From 38bd3ee89c4f2aa959f41a149ddfa2fbdb668602 Mon Sep 17 00:00:00 2001
From: JoseEspinosa <kadomu@gmail.com>
Date: Wed, 8 May 2024 18:25:32 +0200
Subject: [PATCH 05/23] Get rid of parameter from module

---
 conf/modules.config                     | 1 +
 modules/local/fetch_sequences_online.nf | 7 ++++---
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/conf/modules.config b/conf/modules.config
index f7989d5..f47004b 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -126,6 +126,7 @@ process {
     // ----------------------
 
     withName: 'FETCH_SEQUENCES_ONLINE' {
+        ext.args   = { params.uniprot_query ? "" : "cat ${query_fasta} >> ${meta.id}_orthologs.fa" }
         publishDir = [
             path: { "${params.outdir}/sequences" },
             mode: params.publish_dir_mode,
diff --git a/modules/local/fetch_sequences_online.nf b/modules/local/fetch_sequences_online.nf
index eec8581..304ddd0 100644
--- a/modules/local/fetch_sequences_online.nf
+++ b/modules/local/fetch_sequences_online.nf
@@ -10,6 +10,7 @@ process FETCH_SEQUENCES_ONLINE {
     input:
     tuple val(meta), path(ids), path(query_fasta)
 
+
     output:
     tuple val(meta), path("*_orthologs.fa")  , emit: fasta
     tuple val(meta), path("*_seq_hits.txt")  , emit: hits
@@ -20,11 +21,11 @@ process FETCH_SEQUENCES_ONLINE {
     task.ext.when == null || task.ext.when
 
     script:
-    prefix    = task.ext.prefix ?: meta.id
-    add_query = params.uniprot_query ? "" : "cat $query_fasta >> ${prefix}_orthologs.fa"
+    def args = task.ext.args ?: ''
+    prefix   = task.ext.prefix ?: meta.id
     """
     fetch_sequences.py $ids $prefix > ${prefix}_orthologs.fa
-    $add_query
+    $args
 
     cat <<- END_VERSIONS > versions.yml
     "${task.process}":

From 7c48293786c9e7659fcb9ae7215c2f82bf1d3b11 Mon Sep 17 00:00:00 2001
From: JoseEspinosa <kadomu@gmail.com>
Date: Wed, 8 May 2024 18:35:49 +0200
Subject: [PATCH 06/23] Make lint happy

---
 bin/fetch_oma_by_sequence.py        | 2 +-
 subworkflows/local/get_orthologs.nf | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/bin/fetch_oma_by_sequence.py b/bin/fetch_oma_by_sequence.py
index 3f176fc..af35dd6 100755
--- a/bin/fetch_oma_by_sequence.py
+++ b/bin/fetch_oma_by_sequence.py
@@ -15,7 +15,7 @@ def main() -> None:
         raise ValueError("Not enough arguments. Usage: fetch_oma_by_sequence.py <fasta> <id_out> <taxid_out> <exact_out>")
 
     seqs = SeqIO.parse(sys.argv[1], "fasta")
-    
+
     seq = next(seqs).seq
 
     # Only use the first sequence, ignore all others
diff --git a/subworkflows/local/get_orthologs.nf b/subworkflows/local/get_orthologs.nf
index d151ecc..a8cfdda 100644
--- a/subworkflows/local/get_orthologs.nf
+++ b/subworkflows/local/get_orthologs.nf
@@ -44,7 +44,7 @@ workflow GET_ORTHOLOGS {
         ch_versions
             .mix(IDENTIFY_SEQ_ONLINE.out.versions)
             .set { ch_versions }
-    } 
+    }
     else {
         WRITE_SEQINFO (
             ch_samplesheet
@@ -78,7 +78,7 @@ workflow GET_ORTHOLOGS {
             ch_versions
                 .mix(FETCH_OMA_GROUP_LOCAL.out.versions)
                 .set { ch_versions }
-        } 
+        }
         else {
             FETCH_OMA_GROUP_ONLINE (
                 ch_query

From ca1906836ad048c4f8127edf78013079b15ef6a5 Mon Sep 17 00:00:00 2001
From: Jose Espinosa-Carrasco <kadomu@gmail.com>
Date: Thu, 9 May 2024 11:36:13 +0200
Subject: [PATCH 07/23] Update bin/fetch_oma_by_sequence.py

---
 bin/fetch_oma_by_sequence.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/bin/fetch_oma_by_sequence.py b/bin/fetch_oma_by_sequence.py
index 637db01..f500eb1 100755
--- a/bin/fetch_oma_by_sequence.py
+++ b/bin/fetch_oma_by_sequence.py
@@ -15,7 +15,6 @@ def main() -> None:
         raise ValueError("Not enough arguments. Usage: fetch_oma_by_sequence.py <fasta> <id_out> <taxid_out> <exact_out>")
 
     seqs = SeqIO.parse(sys.argv[1], "fasta")
-
     seq = next(seqs).seq
 
     # Only use the first sequence, ignore all others

From 2f2eaf006d52d2c5ef5af0fab81b5621530a0e70 Mon Sep 17 00:00:00 2001
From: Jose Espinosa-Carrasco <kadomu@gmail.com>
Date: Thu, 9 May 2024 11:36:29 +0200
Subject: [PATCH 08/23] Update bin/fetch_oma_by_sequence.py

---
 bin/fetch_oma_by_sequence.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/bin/fetch_oma_by_sequence.py b/bin/fetch_oma_by_sequence.py
index f500eb1..a71717f 100755
--- a/bin/fetch_oma_by_sequence.py
+++ b/bin/fetch_oma_by_sequence.py
@@ -54,7 +54,6 @@ def main() -> None:
                 raise ValueError("Isoform not found")
 
     print(entry["canonicalid"], file=open(sys.argv[2], "w"))
-
     print(entry["species"]["taxon_id"], file=open(sys.argv[3], "w"))
 
 

From 15aa8f61aae1ce1cc3a4bd4550e060f36488045b Mon Sep 17 00:00:00 2001
From: JoseEspinosa <kadomu@gmail.com>
Date: Fri, 10 May 2024 18:08:54 +0200
Subject: [PATCH 09/23] Branch depending on whether uniprot_id or fasta
 provided

---
 conf/modules.config                           |   1 -
 main.nf                                       |  11 +-
 modules/local/dump_params.nf                  |   2 -
 modules/local/fetch_sequences_online.nf       |   7 +-
 subworkflows/local/fetch_sequences.nf         |   8 +-
 subworkflows/local/get_orthologs.nf           | 125 +++++-------------
 subworkflows/local/report.nf                  |  10 +-
 .../utils_nfcore_reportho_pipeline/main.nf    |  20 +--
 workflows/reportho.nf                         |  51 +++----
 9 files changed, 83 insertions(+), 152 deletions(-)

diff --git a/conf/modules.config b/conf/modules.config
index f47004b..f7989d5 100644
--- a/conf/modules.config
+++ b/conf/modules.config
@@ -126,7 +126,6 @@ process {
     // ----------------------
 
     withName: 'FETCH_SEQUENCES_ONLINE' {
-        ext.args   = { params.uniprot_query ? "" : "cat ${query_fasta} >> ${meta.id}_orthologs.fa" }
         publishDir = [
             path: { "${params.outdir}/sequences" },
             mode: params.publish_dir_mode,
diff --git a/main.nf b/main.nf
index a7e69c2..a810341 100644
--- a/main.nf
+++ b/main.nf
@@ -33,7 +33,8 @@ include { PIPELINE_COMPLETION     } from './subworkflows/local/utils_nfcore_repo
 workflow NFCORE_REPORTHO {
 
     take:
-    samplesheet // channel: samplesheet read in from --input
+    samplesheet_query   // channel: samplesheet read in from --input with query
+    samplesheet_fasta   // channel: samplesheet read in from --input with fasta
 
     main:
 
@@ -41,7 +42,8 @@ workflow NFCORE_REPORTHO {
     // WORKFLOW: Run pipeline
     //
     REPORTHO (
-        samplesheet
+        samplesheet_query,
+        samplesheet_fasta,
     )
 
     // emit:
@@ -70,12 +72,13 @@ workflow {
         params.outdir,
         params.input
     )
-
+    
     //
     // WORKFLOW: Run main workflow
     //
     NFCORE_REPORTHO (
-        PIPELINE_INITIALISATION.out.samplesheet
+        PIPELINE_INITIALISATION.out.samplesheet_query,
+        PIPELINE_INITIALISATION.out.samplesheet_fasta,
     )
 
     //
diff --git a/modules/local/dump_params.nf b/modules/local/dump_params.nf
index de9747b..2b4712d 100644
--- a/modules/local/dump_params.nf
+++ b/modules/local/dump_params.nf
@@ -8,7 +8,6 @@ process DUMP_PARAMS {
 
     input:
     tuple val(meta), path(exact)
-    val uniprot_query
     val use_structures
     val use_centroid
     val min_score
@@ -26,7 +25,6 @@ process DUMP_PARAMS {
     """
     cat <<- END_PARAMS > params.yml
     id: ${meta.id}
-    uniprot_query: ${uniprot_query}
     exact_match: \$(cat $exact)
     use_structures: ${use_structures}
     use_centroid: ${use_centroid}
diff --git a/modules/local/fetch_sequences_online.nf b/modules/local/fetch_sequences_online.nf
index 304ddd0..b95be8f 100644
--- a/modules/local/fetch_sequences_online.nf
+++ b/modules/local/fetch_sequences_online.nf
@@ -10,7 +10,6 @@ process FETCH_SEQUENCES_ONLINE {
     input:
     tuple val(meta), path(ids), path(query_fasta)
 
-
     output:
     tuple val(meta), path("*_orthologs.fa")  , emit: fasta
     tuple val(meta), path("*_seq_hits.txt")  , emit: hits
@@ -21,11 +20,11 @@ process FETCH_SEQUENCES_ONLINE {
     task.ext.when == null || task.ext.when
 
     script:
-    def args = task.ext.args ?: ''
-    prefix   = task.ext.prefix ?: meta.id
+    def prefix = task.ext.prefix ?: meta.id
+    def add_query = query_fasta == [] ? "" : "cat $query_fasta >> ${prefix}_orthologs.fa"
     """
     fetch_sequences.py $ids $prefix > ${prefix}_orthologs.fa
-    $args
+    $add_query
 
     cat <<- END_VERSIONS > versions.yml
     "${task.process}":
diff --git a/subworkflows/local/fetch_sequences.nf b/subworkflows/local/fetch_sequences.nf
index bb03048..0c441dd 100644
--- a/subworkflows/local/fetch_sequences.nf
+++ b/subworkflows/local/fetch_sequences.nf
@@ -2,12 +2,14 @@ include { FETCH_SEQUENCES_ONLINE } from "../../modules/local/fetch_sequences_onl
 
 workflow FETCH_SEQUENCES {
     take:
-    ch_idlist
-    ch_query_fasta
+    ch_id_list
+    ch_query
 
     main:
+    ch_id_list
+        .join(ch_query)
+        .set { ch_input }
 
-    ch_input = params.uniprot_query ? ch_idlist.map { it -> [it[0], it[1], []]} : ch_idlist.join(ch_query_fasta)
     FETCH_SEQUENCES_ONLINE (
         ch_input
     )
diff --git a/subworkflows/local/get_orthologs.nf b/subworkflows/local/get_orthologs.nf
index a8cfdda..55563eb 100644
--- a/subworkflows/local/get_orthologs.nf
+++ b/subworkflows/local/get_orthologs.nf
@@ -19,44 +19,31 @@ include { CSVTK_CONCAT as MERGE_STATS  } from "../../modules/nf-core/csvtk/conca
 
 workflow GET_ORTHOLOGS {
     take:
-    ch_samplesheet
+    ch_samplesheet_query
+    ch_samplesheet_fasta
 
     main:
-
     ch_versions    = Channel.empty()
-    ch_queryid     = params.uniprot_query ? ch_samplesheet.map { it[1] } : ch_samplesheet.map { it[0].id }
     ch_orthogroups = Channel.empty()
 
     // Preprocessing - find the ID and taxid of the query sequences
-    if (!params.uniprot_query) {
-        ch_samplesheet
-            .map { it -> [it[0], file(it[1])] }
-            .set { ch_inputfile }
-
-
-        IDENTIFY_SEQ_ONLINE (
-            ch_inputfile
-        )
-
-        IDENTIFY_SEQ_ONLINE.out.seqinfo
-            .set { ch_query }
+    ch_samplesheet_fasta
+        .map { it -> [it[0], file(it[1])] }
+        .set { ch_fasta }
 
-        ch_versions
-            .mix(IDENTIFY_SEQ_ONLINE.out.versions)
-            .set { ch_versions }
-    }
-    else {
-        WRITE_SEQINFO (
-            ch_samplesheet
-        )
+    IDENTIFY_SEQ_ONLINE (
+        ch_fasta
+    )
 
-        WRITE_SEQINFO.out.seqinfo
-            .set { ch_query }
+    ch_query = IDENTIFY_SEQ_ONLINE.out.seqinfo
+    ch_versions = ch_versions.mix(IDENTIFY_SEQ_ONLINE.out.versions)
+    
+    WRITE_SEQINFO (
+        ch_samplesheet_query
+    )
 
-        ch_versions
-            .mix(WRITE_SEQINFO.out.versions)
-            .set { ch_versions }
-    }
+    ch_query = IDENTIFY_SEQ_ONLINE.out.seqinfo.mix(WRITE_SEQINFO.out.seqinfo)
+    ch_versions = ch_versions.mix(WRITE_SEQINFO.out.versions)
 
     // Ortholog fetching
 
@@ -75,9 +62,7 @@ workflow GET_ORTHOLOGS {
                 .mix(FETCH_OMA_GROUP_LOCAL.out.oma_group)
                 .set { ch_orthogroups }
 
-            ch_versions
-                .mix(FETCH_OMA_GROUP_LOCAL.out.versions)
-                .set { ch_versions }
+            ch_versions = ch_versions.mix(FETCH_OMA_GROUP_LOCAL.out.versions)
         }
         else {
             FETCH_OMA_GROUP_ONLINE (
@@ -88,9 +73,7 @@ workflow GET_ORTHOLOGS {
                 .mix(FETCH_OMA_GROUP_ONLINE.out.oma_group)
                 .set { ch_orthogroups }
 
-            ch_versions
-                .mix(FETCH_OMA_GROUP_ONLINE.out.versions)
-                .set { ch_versions }
+            ch_versions = ch_versions.mix(FETCH_OMA_GROUP_ONLINE.out.versions)
         }
         // Panther
         if (params.local_databases) {
@@ -103,9 +86,7 @@ workflow GET_ORTHOLOGS {
                 .mix(FETCH_PANTHER_GROUP_LOCAL.out.panther_group)
                 .set { ch_orthogroups }
 
-            ch_versions
-                .mix(FETCH_PANTHER_GROUP_LOCAL.out.versions)
-                .set { ch_versions }
+            ch_versions = ch_versions.mix(FETCH_PANTHER_GROUP_LOCAL.out.versions)
         } else {
             FETCH_PANTHER_GROUP_ONLINE (
                 ch_query
@@ -115,9 +96,7 @@ workflow GET_ORTHOLOGS {
                 .mix(FETCH_PANTHER_GROUP_ONLINE.out.panther_group)
                 .set { ch_orthogroups }
 
-            ch_versions
-                .mix(FETCH_PANTHER_GROUP_ONLINE.out.versions)
-                .set { ch_versions }
+            ch_versions = ch_versions.mix(FETCH_PANTHER_GROUP_ONLINE.out.versions)
         }
         // OrthoInspector
         FETCH_INSPECTOR_GROUP_ONLINE (
@@ -128,10 +107,8 @@ workflow GET_ORTHOLOGS {
         ch_orthogroups
             .mix(FETCH_INSPECTOR_GROUP_ONLINE.out.inspector_group)
             .set { ch_orthogroups }
-
-        ch_versions
-            .mix(FETCH_INSPECTOR_GROUP_ONLINE.out.versions)
-            .set { ch_versions }
+        
+        ch_versions = ch_versions.mix(FETCH_INSPECTOR_GROUP_ONLINE.out.versions)
 
         FETCH_EGGNOG_GROUP_LOCAL (
             ch_query,
@@ -143,9 +120,7 @@ workflow GET_ORTHOLOGS {
             .mix(FETCH_EGGNOG_GROUP_LOCAL.out.eggnog_group)
             .set { ch_orthogroups }
 
-        ch_versions
-            .mix(FETCH_EGGNOG_GROUP_LOCAL.out.versions)
-            .set { ch_versions }
+        ch_versions = ch_versions.mix(FETCH_EGGNOG_GROUP_LOCAL.out.versions)
     }
     else { // online/local separation is used
         // local only
@@ -163,9 +138,7 @@ workflow GET_ORTHOLOGS {
                     .mix(FETCH_OMA_GROUP_LOCAL.out.oma_group)
                     .set { ch_orthogroups }
 
-                ch_versions
-                    .mix(FETCH_OMA_GROUP_LOCAL.out.versions)
-                    .set { ch_versions }
+                ch_versions = ch_versions.mix(FETCH_OMA_GROUP_LOCAL.out.versions)
             }
 
             if (!params.skip_panther) {
@@ -178,9 +151,7 @@ workflow GET_ORTHOLOGS {
                     .mix(FETCH_PANTHER_GROUP_LOCAL.out.panther_group)
                     .set { ch_orthogroups }
 
-                ch_versions
-                    .mix(FETCH_PANTHER_GROUP_LOCAL.out.versions)
-                    .set { ch_versions }
+                ch_versions = ch_versions.mix(FETCH_PANTHER_GROUP_LOCAL.out.versions)
             }
 
             if(!params.skip_eggnog) {
@@ -194,10 +165,7 @@ workflow GET_ORTHOLOGS {
                     .mix(FETCH_EGGNOG_GROUP_LOCAL.out.eggnog_group)
                     .set { ch_orthogroups }
 
-                ch_versions
-                    .mix(FETCH_EGGNOG_GROUP_LOCAL.out.versions)
-                    .set { ch_versions }
-
+                ch_versions = ch_versions.mix(FETCH_EGGNOG_GROUP_LOCAL.out.versions)
             }
         }
         else { // online only
@@ -210,10 +178,7 @@ workflow GET_ORTHOLOGS {
                     .mix(FETCH_OMA_GROUP_ONLINE.out.oma_group)
                     .set { ch_orthogroups }
 
-                ch_versions
-                    .mix(FETCH_OMA_GROUP_ONLINE.out.versions)
-                    .set { ch_versions }
-
+                ch_versions = ch_versions.mix(FETCH_OMA_GROUP_ONLINE.out.versions)
             }
             if (!params.skip_panther) {
                 FETCH_PANTHER_GROUP_ONLINE (
@@ -224,9 +189,7 @@ workflow GET_ORTHOLOGS {
                     .mix(FETCH_PANTHER_GROUP_ONLINE.out.panther_group)
                     .set { ch_orthogroups }
 
-                ch_versions
-                    .mix(FETCH_PANTHER_GROUP_ONLINE.out.versions)
-                    .set { ch_versions }
+                ch_versions = ch_versions.mix(FETCH_PANTHER_GROUP_ONLINE.out.versions)
             }
             if (!params.skip_orthoinspector) {
                 FETCH_INSPECTOR_GROUP_ONLINE (
@@ -238,9 +201,7 @@ workflow GET_ORTHOLOGS {
                     .mix(FETCH_INSPECTOR_GROUP_ONLINE.out.inspector_group)
                     .set { ch_orthogroups }
 
-                ch_versions
-                    .mix(FETCH_INSPECTOR_GROUP_ONLINE.out.versions)
-                    .set { ch_versions }
+                ch_versions = ch_versions.mix(FETCH_INSPECTOR_GROUP_ONLINE.out.versions)
             }
         }
     }
@@ -251,9 +212,7 @@ workflow GET_ORTHOLOGS {
         ch_orthogroups.groupTuple()
     )
 
-    ch_versions
-        .mix(MERGE_CSV.out.versions)
-        .set { ch_versions }
+    ch_versions = ch_versions.mix(MERGE_CSV.out.versions)
 
     // Scoring and filtering
 
@@ -261,9 +220,7 @@ workflow GET_ORTHOLOGS {
         MERGE_CSV.out.csv
     )
 
-    ch_versions
-        .mix(MAKE_SCORE_TABLE.out.versions)
-        .set { ch_versions }
+    ch_versions = ch_versions.mix(MAKE_SCORE_TABLE.out.versions)
 
     ch_forfilter = MAKE_SCORE_TABLE.out.score_table
         .combine(ch_query, by: 0)
@@ -275,9 +232,7 @@ workflow GET_ORTHOLOGS {
         params.min_score
     )
 
-    ch_versions
-        .mix(FILTER_HITS.out.versions)
-        .set { ch_versions }
+    ch_versions = ch_versions.mix(FILTER_HITS.out.versions)
 
     // Plotting
 
@@ -294,9 +249,7 @@ workflow GET_ORTHOLOGS {
         ch_vennplot     = PLOT_ORTHOLOGS.out.venn
         ch_jaccardplot  = PLOT_ORTHOLOGS.out.jaccard
 
-        ch_versions
-            .mix(PLOT_ORTHOLOGS.out.versions)
-            .set { ch_versions }
+        ch_versions = ch_versions.mix(PLOT_ORTHOLOGS.out.versions)
     }
 
     // Stats
@@ -305,17 +258,13 @@ workflow GET_ORTHOLOGS {
         MAKE_SCORE_TABLE.out.score_table
     )
 
-    ch_versions
-        .mix(MAKE_STATS.out.versions)
-        .set { ch_versions }
+    ch_versions = ch_versions.mix(MAKE_STATS.out.versions)
 
     STATS2CSV(
         MAKE_STATS.out.stats
     )
 
-    ch_versions
-        .mix(STATS2CSV.out.versions)
-        .set { ch_versions }
+    ch_versions = ch_versions.mix(STATS2CSV.out.versions)
 
     ch_stats = STATS2CSV.out.csv
         .collect { it[1] }
@@ -327,9 +276,7 @@ workflow GET_ORTHOLOGS {
         "csv"
     )
 
-    ch_versions
-        .mix(MERGE_STATS.out.versions)
-        .set { ch_versions }
+    ch_versions = ch_versions.mix(MERGE_STATS.out.versions)
 
     ch_versions
         .collectFile(name: "get_orthologs_versions.yml", sort: true, newLine: true)
diff --git a/subworkflows/local/report.nf b/subworkflows/local/report.nf
index 47e061d..b3c60ed 100644
--- a/subworkflows/local/report.nf
+++ b/subworkflows/local/report.nf
@@ -5,7 +5,6 @@ include { CONVERT_FASTA } from "../../modules/local/convert_fasta"
 workflow REPORT {
 
     take:
-    uniprot_query
     use_structures
     use_centroid
     min_score
@@ -52,7 +51,6 @@ workflow REPORT {
 
     DUMP_PARAMS(
         ch_seqinfo.map { [it[0], it[3]] },
-        params.uniprot_query,
         params.use_structures,
         params.use_centroid,
         params.min_score,
@@ -66,9 +64,7 @@ workflow REPORT {
 
         ch_fasta = CONVERT_FASTA.out.fasta
 
-        ch_versions
-            .mix(CONVERT_FASTA.out.versions)
-            .set { ch_versions }
+        ch_versions = ch_versions.mix(CONVERT_FASTA.out.versions)
     }
 
     ch_forreport = ch_seqinfo
@@ -91,9 +87,7 @@ workflow REPORT {
         ch_forreport
     )
 
-    ch_versions
-        .mix(MAKE_REPORT.out.versions)
-        .set { ch_versions }
+    ch_versions = ch_versions.mix(MAKE_REPORT.out.versions)
 
     emit:
     versions = ch_versions
diff --git a/subworkflows/local/utils_nfcore_reportho_pipeline/main.nf b/subworkflows/local/utils_nfcore_reportho_pipeline/main.nf
index 976b779..675e66d 100644
--- a/subworkflows/local/utils_nfcore_reportho_pipeline/main.nf
+++ b/subworkflows/local/utils_nfcore_reportho_pipeline/main.nf
@@ -74,23 +74,23 @@ workflow PIPELINE_INITIALISATION {
     )
 
     //
-    // Create channel from input file provided through params.input
-    //
+    // Create channel from input file provided through params.input and check for query
+    //    
     Channel
         .fromSamplesheet("input")
-        .map {
+        .branch {
             id, query, fasta ->
-                if (query) {
-                    [ id, query ]
-                } else {
-                    [ id, fasta ]
-                }
+                query: query != []
+                    return [ id, query ]
+                fasta: query == []
+                    return [ id, fasta ]
         }
         .set { ch_samplesheet }
 
     emit:
-    samplesheet = ch_samplesheet
-    versions    = ch_versions
+    samplesheet_query = ch_samplesheet.query
+    samplesheet_fasta = ch_samplesheet.fasta
+    versions          = ch_versions
 }
 
 /*
diff --git a/workflows/reportho.nf b/workflows/reportho.nf
index b33a639..464f257 100644
--- a/workflows/reportho.nf
+++ b/workflows/reportho.nf
@@ -27,44 +27,42 @@ include { REPORT                 } from '../subworkflows/local/report'
 workflow REPORTHO {
 
     take:
-    ch_samplesheet // channel: samplesheet read in from --input
+    ch_samplesheet_query // channel: samplesheet query
+    ch_samplesheet_fasta // channel: samplesheet fasta
 
     main:
 
     ch_versions = Channel.empty()
     ch_multiqc_files = Channel.empty()
-
-    ch_query_fasta = params.uniprot_query ? ch_samplesheet.map { [it[0], []] } : ch_samplesheet.map { [it[0], file(it[1])] }
+    ch_fasta_query   = ch_samplesheet_query.map { [it[0], []] }.mix(ch_samplesheet_fasta.map { [it[0], file(it[1])] })
 
     GET_ORTHOLOGS (
-        ch_samplesheet
+        ch_samplesheet_query,
+        ch_samplesheet_fasta
     )
 
-    ch_versions
-        .mix(GET_ORTHOLOGS.out.versions)
-        .set { ch_versions }
+    ch_versions    = ch_versions.mix(GET_ORTHOLOGS.out.versions)
+    ch_samplesheet = ch_samplesheet_query.mix (ch_samplesheet_fasta)
 
-    ch_seqhits   = ch_samplesheet.map { [it[0], []] }
-    ch_seqmisses = ch_samplesheet.map { [it[0], []] }
-    ch_strhits   = ch_samplesheet.map { [it[0], []] }
-    ch_strmisses = ch_samplesheet.map { [it[0], []] }
-    ch_alignment = ch_samplesheet.map { [it[0], []] }
-    ch_iqtree    = ch_samplesheet.map { [it[0], []] }
-    ch_fastme    = ch_samplesheet.map { [it[0], []] }
+    ch_seqhits     = ch_samplesheet.map { [it[0], []] }
+    ch_seqmisses   = ch_samplesheet.map { [it[0], []] }
+    ch_strhits     = ch_samplesheet.map { [it[0], []] }
+    ch_strmisses   = ch_samplesheet.map { [it[0], []] }
+    ch_alignment   = ch_samplesheet.map { [it[0], []] }
+    ch_iqtree      = ch_samplesheet.map { [it[0], []] }
+    ch_fastme      = ch_samplesheet.map { [it[0], []] }
 
     if (!params.skip_downstream) {
         FETCH_SEQUENCES (
             GET_ORTHOLOGS.out.orthologs,
-            ch_query_fasta
+            ch_fasta_query
         )
 
         ch_seqhits = FETCH_SEQUENCES.out.hits
 
         ch_seqmisses = FETCH_SEQUENCES.out.misses
 
-        ch_versions
-            .mix(FETCH_SEQUENCES.out.versions)
-            .set { ch_versions }
+        ch_versions = ch_versions.mix(FETCH_SEQUENCES.out.versions)
 
         if (params.use_structures) {
             FETCH_STRUCTURES (
@@ -75,9 +73,7 @@ workflow REPORTHO {
 
             ch_strmisses = FETCH_STRUCTURES.out.misses
 
-            ch_versions
-                .mix(FETCH_STRUCTURES.out.versions)
-                .set { ch_versions }
+            ch_versions = ch_versions.mix(FETCH_STRUCTURES.out.versions)
         }
 
         ch_structures = params.use_structures ? FETCH_STRUCTURES.out.structures : Channel.empty()
@@ -89,9 +85,7 @@ workflow REPORTHO {
 
         ch_alignment = ALIGN.out.alignment
 
-        ch_versions
-            .mix(ALIGN.out.versions)
-            .set { ch_versions }
+        ch_versions = ch_versions.mix(ALIGN.out.versions)
 
         MAKE_TREES (
             ALIGN.out.alignment
@@ -100,14 +94,11 @@ workflow REPORTHO {
         ch_iqtree = MAKE_TREES.out.mlplot
         ch_fastme = MAKE_TREES.out.meplot
 
-        ch_versions
-            .mix(MAKE_TREES.out.versions)
-            .set { ch_versions }
+        ch_versions = ch_versions.mix(MAKE_TREES.out.versions)
     }
 
     if(!params.skip_report) {
         REPORT (
-            params.uniprot_query,
             params.use_structures,
             params.use_centroid,
             params.min_score,
@@ -130,9 +121,7 @@ workflow REPORTHO {
             ch_fastme
         )
 
-        ch_versions
-            .mix(REPORT.out.versions)
-            .set { ch_versions }
+        ch_versions = ch_versions.mix(REPORT.out.versions)
     }
 
     //

From bb04c563c6497f70bae73a30909817ddaec000be Mon Sep 17 00:00:00 2001
From: JoseEspinosa <kadomu@gmail.com>
Date: Fri, 10 May 2024 18:09:14 +0200
Subject: [PATCH 10/23] Update tests

---
 conf/test.config       | 1 -
 conf/test_fasta.config | 1 -
 conf/test_full.config  | 1 -
 3 files changed, 3 deletions(-)

diff --git a/conf/test.config b/conf/test.config
index 2a67104..7de21c0 100644
--- a/conf/test.config
+++ b/conf/test.config
@@ -23,7 +23,6 @@ params {
     input  = 'https://raw.githubusercontent.com/nf-core/test-datasets/reportho/testdata/samplesheet/samplesheet.csv'
 
     // Other parameters
-    uniprot_query    = true
     skip_eggnog      = true
     min_score        = 3
     skip_iqtree      = true
diff --git a/conf/test_fasta.config b/conf/test_fasta.config
index c81035c..e9b009f 100644
--- a/conf/test_fasta.config
+++ b/conf/test_fasta.config
@@ -23,7 +23,6 @@ params {
     input  = 'https://raw.githubusercontent.com/nf-core/test-datasets/reportho/testdata/samplesheet/samplesheet_fasta.csv'
 
     // Other parameters
-    uniprot_query    = false
     skip_eggnog      = true
     min_score        = 3
     skip_iqtree      = true
diff --git a/conf/test_full.config b/conf/test_full.config
index 2f59347..68c6bb4 100644
--- a/conf/test_full.config
+++ b/conf/test_full.config
@@ -18,7 +18,6 @@ params {
     input  = 'https://raw.githubusercontent.com/nf-core/test-datasets/reportho/testdata/samplesheet/samplesheet.csv'
 
     // Other parameters
-    uniprot_query     = true
     eggnog_path       = 'http://eggnog5.embl.de/download/eggnog_5.0/per_tax_level/1/1_members.tsv.gz'
     eggnog_idmap_path = "http://eggnog5.embl.de/download/eggnog_5.0/id_mappings/uniprot/latest.Eukaryota.tsv.gz"
     min_score         = 3

From a376ae3d74839c11d12c2a03b8f86dcd9683c7ef Mon Sep 17 00:00:00 2001
From: JoseEspinosa <kadomu@gmail.com>
Date: Fri, 10 May 2024 18:09:55 +0200
Subject: [PATCH 11/23] Get rid of leftovers of the uniprot_query parameter

---
 nextflow.config      | 1 -
 nextflow_schema.json | 6 ------
 2 files changed, 7 deletions(-)

diff --git a/nextflow.config b/nextflow.config
index 03b1861..41dc4b7 100644
--- a/nextflow.config
+++ b/nextflow.config
@@ -10,7 +10,6 @@
 params {
     // Input options
     input                      = null
-    uniprot_query              = false
 
     // Ortholog options
     use_all                    = false
diff --git a/nextflow_schema.json b/nextflow_schema.json
index 304443b..ff34b8d 100644
--- a/nextflow_schema.json
+++ b/nextflow_schema.json
@@ -23,12 +23,6 @@
                     "help_text": "You will need to create a design file with information about the samples in your experiment before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row. See [usage docs](https://nf-co.re/reportho/usage#samplesheet-input).",
                     "fa_icon": "fas fa-file-csv"
                 },
-                "uniprot_query": {
-                    "type": "boolean",
-                    "description": "The input contains a Uniprot ID as query.",
-                    "help_text": "If the input file contains a Uniprot ID as query, set this parameter to `true`.",
-                    "fa_icon": "fas fa-database"
-                },
                 "outdir": {
                     "type": "string",
                     "format": "directory-path",

From f1f25bdf18407979041568f88a3be236ac251517 Mon Sep 17 00:00:00 2001
From: JoseEspinosa <kadomu@gmail.com>
Date: Fri, 10 May 2024 18:10:10 +0200
Subject: [PATCH 12/23] Update docs

---
 README.md     | 13 ++++++-------
 docs/usage.md | 17 +++++++++--------
 2 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/README.md b/README.md
index f363652..9ee5128 100644
--- a/README.md
+++ b/README.md
@@ -44,21 +44,20 @@ Steps that follow can be skipped with `--skip_downstream` in batch analysis.
 
 First, prepare a samplesheet with your input data that looks as follows:
 
-`samplesheet.csv`:
-
-```csv
-id,query
+```csv title="samplesheet.csv"
+id,fasta
 BicD2,data/bicd2.fasta
 ```
 
-or:
+or if you know the UniProt ID of the protein you can provide it directly:
 
-```csv
+```csv title="samplesheet.csv"
 id,query
 BicD2,Q8TD16
 ```
 
-If using the latter format, you must set `--uniprot_query` to true.
+> [!NOTE]
+> If you provide both a FASTA file and a UniProt ID only the later will be used.
 
 Now, you can run the pipeline using:
 
diff --git a/docs/usage.md b/docs/usage.md
index 33eaba7..b431ed7 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -18,26 +18,27 @@ You will need to create a samplesheet with information about the samples you wou
 
 ### Full samplesheet
 
-The samplesheet can have as many columns as you desire, however, there is a strict requirement for the first 2 columns to match those defined in the table below.
+The samplesheet can have as many columns as you desire, however, there is a strict requirement for the first 2 columns to match those defined in the tables below.
 
-A final samplesheet file may look something like the one below, with `--uniprot_query` enabled:
+A final samplesheet file may look something like the one below:
 
 ```csv title="samplesheet.csv"
 id,query
 BicD2,Q8TD16
 ```
 
-or the one below, otherwise:
+or the one below, if you provide the sequence of the protein in FASTA format:
 
 ```csv title="samplesheet.csv"
-id,query
+id,fasta
 BicD2,/home/myuser/data/bicd2.fa
 ```
 
-| Column  | Description                                                                                                                                                         |
-| ------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| `id`    | User-defined identifier. It is used to identify output files for the protein. Can be anything descriptive, as long as it does not contain spaces.                   |
-| `query` | The query of the user-specified type. If `--uniprot_query` is `true`, it should be a valid Uniprot accession. Otherwise, it should be a valid path to a FASTA file. |
+| Column  | Description                                                                                                                                       |
+| ------- | ------------------------------------------------------------------------------------------------------------------------------------------------- |
+| `id`    | User-defined identifier. It is used to identify output files for the protein. Can be anything descriptive, as long as it does not contain spaces. |
+| `query` | The query of the user-specified type. It should be a valid Uniprot accession.                                                                     |
+| `fasta` | It should be a valid path to a FASTA file.                                                                                                        |
 
 An [example samplesheet](../assets/samplesheet.csv) has been provided with the pipeline.
 

From b8992468e53bcfcde48fc3c4c3f564c2292b14c2 Mon Sep 17 00:00:00 2001
From: JoseEspinosa <kadomu@gmail.com>
Date: Fri, 10 May 2024 18:10:35 +0200
Subject: [PATCH 13/23] Do not use set for ch_versions

---
 subworkflows/local/align.nf      | 13 ++++---------
 subworkflows/local/make_trees.nf | 20 +++++---------------
 2 files changed, 9 insertions(+), 24 deletions(-)

diff --git a/subworkflows/local/align.nf b/subworkflows/local/align.nf
index 46c78b4..2459c65 100644
--- a/subworkflows/local/align.nf
+++ b/subworkflows/local/align.nf
@@ -25,9 +25,7 @@ workflow ALIGN {
             ch_for_filter
         )
 
-        ch_versions
-            .mix(FILTER_FASTA.out.versions)
-            .set { ch_versions }
+        ch_versions = ch_versions.mix(FILTER_FASTA.out.versions)
 
         CREATE_TCOFFEETEMPLATE(
             ch_pdb
@@ -52,9 +50,8 @@ workflow ALIGN {
         TCOFFEE_3DALIGN.out.alignment
             .set { ch_alignment }
 
-        ch_versions
-            .mix(TCOFFEE_3DALIGN.out.versions)
-            .set { ch_versions }
+        ch_versions = ch_versions.mix(TCOFFEE_3DALIGN.out.versions)
+
     }
     else {
         TCOFFEE_ALIGN (
@@ -67,9 +64,7 @@ workflow ALIGN {
         TCOFFEE_ALIGN.out.alignment
             .set { ch_alignment }
 
-        ch_versions
-            .mix(TCOFFEE_ALIGN.out.versions)
-            .set { ch_versions }
+        ch_versions = ch_versions.mix(TCOFFEE_ALIGN.out.versions)
     }
 
     emit:
diff --git a/subworkflows/local/make_trees.nf b/subworkflows/local/make_trees.nf
index b4743a0..6f60967 100644
--- a/subworkflows/local/make_trees.nf
+++ b/subworkflows/local/make_trees.nf
@@ -24,9 +24,7 @@ workflow MAKE_TREES {
 
         ch_mltree = IQTREE.out.phylogeny
 
-        ch_versions
-            .mix(IQTREE.out.versions)
-            .set { ch_versions }
+        ch_versions = ch_versions.mix(IQTREE.out.versions)
 
         ch_mlplot = ch_alignment.map { [it[0], []] }
 
@@ -38,9 +36,7 @@ workflow MAKE_TREES {
 
             ch_mlplot = PLOT_IQTREE.out.plot
 
-            ch_versions
-                .mix(PLOT_IQTREE.out.versions)
-                .set { ch_versions }
+            ch_versions = ch_versions.mix(PLOT_IQTREE.out.versions)
         }
     }
 
@@ -50,9 +46,7 @@ workflow MAKE_TREES {
             ch_alignment
         )
 
-        ch_versions
-            .mix(CONVERT_PHYLIP.out.versions)
-            .set { ch_versions }
+        ch_versions = ch_versions.mix(CONVERT_PHYLIP.out.versions)
 
         FASTME (
             CONVERT_PHYLIP.out.phylip.map { [it[0], it[1], []] }
@@ -60,9 +54,7 @@ workflow MAKE_TREES {
 
         ch_metree = FASTME.out.nwk
 
-        ch_versions
-            .mix(FASTME.out.versions)
-            .set { ch_versions }
+        ch_versions = ch_versions.mix(FASTME.out.versions)
 
         ch_meplot = ch_alignment.map { [it[0], []] }
 
@@ -74,9 +66,7 @@ workflow MAKE_TREES {
 
             ch_meplot = PLOT_FASTME.out.plot
 
-            ch_versions
-                .mix(PLOT_FASTME.out.versions)
-                .set { ch_versions }
+            ch_versions = ch_versions.mix(PLOT_FASTME.out.versions)
         }
     }
 

From 3f472ad86cbce1ceeb3885ef6425c14ce7780b6b Mon Sep 17 00:00:00 2001
From: JoseEspinosa <kadomu@gmail.com>
Date: Fri, 10 May 2024 18:12:27 +0200
Subject: [PATCH 14/23] Add test_fasta to CI

---
 .github/workflows/ci.yml | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 32fb5b8..fb18a85 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -44,3 +44,32 @@ jobs:
         # Remember that you can parallelise this by using strategy.matrix
         run: |
           nextflow run ${GITHUB_WORKSPACE} -profile test,docker --outdir ./results
+
+  test_fasta:
+    name: Run pipeline with test data with fasta files in samplesheet
+    # Only run on push if this is the nf-core dev branch (merged PRs)
+    if: "${{ github.event_name != 'push' || (github.event_name == 'push' && github.repository == 'nf-core/reportho') }}"
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        NXF_VER:
+          - "23.04.0"
+          - "latest-everything"
+    steps:
+      - name: Check out pipeline code
+        uses: actions/checkout@0ad4b8fadaa221de15dcec353f45205ec38ea70b # v4
+
+      - name: Install Nextflow
+        uses: nf-core/setup-nextflow@v2
+        with:
+          version: "${{ matrix.NXF_VER }}"
+
+      - name: Disk space cleanup
+        uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1
+
+      - name: Run pipeline with test data
+        # TODO nf-core: You can customise CI pipeline run tests as required
+        # For example: adding multiple test runs with different parameters
+        # Remember that you can parallelise this by using strategy.matrix
+        run: |
+          nextflow run ${GITHUB_WORKSPACE} -profile test_fasta,docker --outdir ./results

From 72f80aa2dd7fbc096a2d6891acddc19bee2c4cef Mon Sep 17 00:00:00 2001
From: JoseEspinosa <kadomu@gmail.com>
Date: Fri, 10 May 2024 18:14:19 +0200
Subject: [PATCH 15/23] Make nf-core lint happy

---
 .nf-core.yml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/.nf-core.yml b/.nf-core.yml
index e0b85a7..13b10ff 100644
--- a/.nf-core.yml
+++ b/.nf-core.yml
@@ -1,2 +1,5 @@
 repository_type: pipeline
 nf_core_version: "2.14.1"
+
+files_exist:
+  - lib/nfcore_external_java_deps.jar

From 3490c4c87faef78eef76d2e56c346fdcf9b2d8a3 Mon Sep 17 00:00:00 2001
From: JoseEspinosa <kadomu@gmail.com>
Date: Fri, 10 May 2024 18:19:20 +0200
Subject: [PATCH 16/23] Make prettier happy

---
 main.nf                                                   | 2 +-
 subworkflows/local/get_orthologs.nf                       | 4 ++--
 subworkflows/local/utils_nfcore_reportho_pipeline/main.nf | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/main.nf b/main.nf
index a810341..cb1dfd0 100644
--- a/main.nf
+++ b/main.nf
@@ -72,7 +72,7 @@ workflow {
         params.outdir,
         params.input
     )
-    
+
     //
     // WORKFLOW: Run main workflow
     //
diff --git a/subworkflows/local/get_orthologs.nf b/subworkflows/local/get_orthologs.nf
index 55563eb..1512675 100644
--- a/subworkflows/local/get_orthologs.nf
+++ b/subworkflows/local/get_orthologs.nf
@@ -37,7 +37,7 @@ workflow GET_ORTHOLOGS {
 
     ch_query = IDENTIFY_SEQ_ONLINE.out.seqinfo
     ch_versions = ch_versions.mix(IDENTIFY_SEQ_ONLINE.out.versions)
-    
+
     WRITE_SEQINFO (
         ch_samplesheet_query
     )
@@ -107,7 +107,7 @@ workflow GET_ORTHOLOGS {
         ch_orthogroups
             .mix(FETCH_INSPECTOR_GROUP_ONLINE.out.inspector_group)
             .set { ch_orthogroups }
-        
+
         ch_versions = ch_versions.mix(FETCH_INSPECTOR_GROUP_ONLINE.out.versions)
 
         FETCH_EGGNOG_GROUP_LOCAL (
diff --git a/subworkflows/local/utils_nfcore_reportho_pipeline/main.nf b/subworkflows/local/utils_nfcore_reportho_pipeline/main.nf
index 675e66d..44dc7eb 100644
--- a/subworkflows/local/utils_nfcore_reportho_pipeline/main.nf
+++ b/subworkflows/local/utils_nfcore_reportho_pipeline/main.nf
@@ -75,7 +75,7 @@ workflow PIPELINE_INITIALISATION {
 
     //
     // Create channel from input file provided through params.input and check for query
-    //    
+    //
     Channel
         .fromSamplesheet("input")
         .branch {

From 8fe4f82962495a45235fef67494be45762ac818b Mon Sep 17 00:00:00 2001
From: JoseEspinosa <kadomu@gmail.com>
Date: Fri, 10 May 2024 18:26:53 +0200
Subject: [PATCH 17/23] Make nf-core lint happy (bug in tools until fixed)

---
 lib/nfcore_external_java_deps.jar | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 create mode 100644 lib/nfcore_external_java_deps.jar

diff --git a/lib/nfcore_external_java_deps.jar b/lib/nfcore_external_java_deps.jar
new file mode 100644
index 0000000..e69de29

From b13725cc4a63caa63186bec731381df11c90a634 Mon Sep 17 00:00:00 2001
From: JoseEspinosa <kadomu@gmail.com>
Date: Fri, 10 May 2024 18:28:14 +0200
Subject: [PATCH 18/23] Revert changes in .nf-core.yml

---
 .nf-core.yml | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/.nf-core.yml b/.nf-core.yml
index 13b10ff..e0b85a7 100644
--- a/.nf-core.yml
+++ b/.nf-core.yml
@@ -1,5 +1,2 @@
 repository_type: pipeline
 nf_core_version: "2.14.1"
-
-files_exist:
-  - lib/nfcore_external_java_deps.jar

From 6d270eaf88a03c905c1bb892aa4d47c31a21c961 Mon Sep 17 00:00:00 2001
From: JoseEspinosa <kadomu@gmail.com>
Date: Fri, 10 May 2024 22:25:18 +0200
Subject: [PATCH 19/23] Assign ch_versions

---
 subworkflows/local/get_orthologs.nf | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/subworkflows/local/get_orthologs.nf b/subworkflows/local/get_orthologs.nf
index 1fe3c9d..6634aaf 100644
--- a/subworkflows/local/get_orthologs.nf
+++ b/subworkflows/local/get_orthologs.nf
@@ -260,9 +260,7 @@ workflow GET_ORTHOLOGS {
         MERGE_CSV.out.csv
     )
 
-    ch_versions
-        .mix(MAKE_HITS_TABLE.out.versions)
-        .set { ch_versions }
+    ch_versions = ch_versions.mix(MAKE_HITS_TABLE.out.versions)
 
     ch_hits = MAKE_HITS_TABLE.out.hits_table
         .collect { it[1] }
@@ -274,9 +272,7 @@ workflow GET_ORTHOLOGS {
         "csv"
     )
 
-    ch_versions
-        .mix(MERGE_HITS.out.versions)
-        .set { ch_versions }
+    ch_versions = ch_versions.mix(MERGE_HITS.out.versions)
 
     // Stats
 

From b849f9225060bf06995f5cfde98dd30a9e6f00c0 Mon Sep 17 00:00:00 2001
From: Jose Espinosa-Carrasco <kadomu@gmail.com>
Date: Mon, 13 May 2024 11:34:57 +0200
Subject: [PATCH 20/23] Fix tyop

Co-authored-by: Igor Trujnara <53370556+itrujnara@users.noreply.github.com>
---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 9ee5128..f761a37 100644
--- a/README.md
+++ b/README.md
@@ -57,7 +57,7 @@ BicD2,Q8TD16
 ```
 
 > [!NOTE]
-> If you provide both a FASTA file and a UniProt ID only the later will be used.
+> If you provide both a FASTA file and a UniProt ID only the latter will be used.
 
 Now, you can run the pipeline using:
 

From 195ab8d1f553887dac4a34d9a4f26e271f771322 Mon Sep 17 00:00:00 2001
From: JoseEspinosa <kadomu@gmail.com>
Date: Mon, 13 May 2024 11:53:13 +0200
Subject: [PATCH 21/23] Add samplesheet_fasta in assets

---
 assets/samplesheet_fasta.csv | 3 +++
 1 file changed, 3 insertions(+)
 create mode 100644 assets/samplesheet_fasta.csv

diff --git a/assets/samplesheet_fasta.csv b/assets/samplesheet_fasta.csv
new file mode 100644
index 0000000..9cdb0c6
--- /dev/null
+++ b/assets/samplesheet_fasta.csv
@@ -0,0 +1,3 @@
+id,fasta
+ste2,https://raw.githubusercontent.com/nf-core/test-datasets/reportho/testdata/sequences/ste2.fa
+ste3,https://raw.githubusercontent.com/nf-core/test-datasets/reportho/testdata/sequences/ste3.fa

From f276a8e3c1c18a4476b8e7bdc9f74b7d64e81670 Mon Sep 17 00:00:00 2001
From: JoseEspinosa <kadomu@gmail.com>
Date: Mon, 13 May 2024 11:56:03 +0200
Subject: [PATCH 22/23] Rename samplesheet_fasta example

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index f761a37..63e8fac 100644
--- a/README.md
+++ b/README.md
@@ -44,7 +44,7 @@ Steps that follow can be skipped with `--skip_downstream` in batch analysis.
 
 First, prepare a samplesheet with your input data that looks as follows:
 
-```csv title="samplesheet.csv"
+```csv title="samplesheet_fasta.csv"
 id,fasta
 BicD2,data/bicd2.fasta
 ```

From cc50269d0f55ddbdbf206dfb85651b0888655ac8 Mon Sep 17 00:00:00 2001
From: Igor Trujnara <53370556+itrujnara@users.noreply.github.com>
Date: Mon, 13 May 2024 11:56:16 +0200
Subject: [PATCH 23/23] Update docs/usage.md

---
 docs/usage.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/usage.md b/docs/usage.md
index b431ed7..1b1ce30 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -40,7 +40,7 @@ BicD2,/home/myuser/data/bicd2.fa
 | `query` | The query of the user-specified type. It should be a valid Uniprot accession.                                                                     |
 | `fasta` | It should be a valid path to a FASTA file.                                                                                                        |
 
-An [example samplesheet](../assets/samplesheet.csv) has been provided with the pipeline.
+An [example Uniprot samplesheet](../assets/samplesheet.csv) and [example FASTA samplesheet](../assets/samplesheet_fasta.csv) has been provided with the pipeline.
 
 ## Running the pipeline