From 36aa9fe58827e0a498267874412fe68602338e82 Mon Sep 17 00:00:00 2001 From: itrujnara Date: Tue, 28 May 2024 12:39:09 +0200 Subject: [PATCH 01/13] Removed code duplication in get_orthologs --- subworkflows/local/get_orthologs.nf | 110 +++++----------------------- 1 file changed, 19 insertions(+), 91 deletions(-) diff --git a/subworkflows/local/get_orthologs.nf b/subworkflows/local/get_orthologs.nf index 8c524de..c098d3a 100644 --- a/subworkflows/local/get_orthologs.nf +++ b/subworkflows/local/get_orthologs.nf @@ -46,6 +46,7 @@ workflow GET_ORTHOLOGS { } // Preprocessing - find the ID and taxid of the query sequences + ch_samplesheet_fasta .map { it -> [it[0], file(it[1])] } .set { ch_fasta } @@ -66,12 +67,14 @@ workflow GET_ORTHOLOGS { ch_versions = ch_versions.mix(WRITE_SEQINFO.out.versions) // Ortholog fetching + if(params.offline_run && params.use_all) { log.warn("Both '--use_all' and '--offline_run' parameters have been specified!\nThose databases that can't be run offline will be run online.") } - if(params.use_all) { - // OMA + // OMA + + if (params.use_all || !params.skip_oma) { if (params.local_databases) { FETCH_OMA_GROUP_LOCAL ( ch_query, @@ -98,7 +101,11 @@ workflow GET_ORTHOLOGS { ch_versions = ch_versions.mix(FETCH_OMA_GROUP_ONLINE.out.versions) } - // Panther + } + + // PANTHER + + if (params.use_all || !params.skip_panther) { if (params.local_databases) { FETCH_PANTHER_GROUP_LOCAL ( ch_query, @@ -121,7 +128,11 @@ workflow GET_ORTHOLOGS { ch_versions = ch_versions.mix(FETCH_PANTHER_GROUP_ONLINE.out.versions) } - // OrthoInspector + } + + // OrthoInspector + + if ((params.use_all || !params.skip_orthoinspector) && !params.local_databases) { FETCH_INSPECTOR_GROUP_ONLINE ( ch_query, params.orthoinspector_version @@ -132,8 +143,11 @@ workflow GET_ORTHOLOGS { .set { ch_orthogroups } ch_versions = ch_versions.mix(FETCH_INSPECTOR_GROUP_ONLINE.out.versions) + } - // EggNOG + // EggNOG + + if (params.use_all || (!params.skip_eggnog && params.local_databases)) { FETCH_EGGNOG_GROUP_LOCAL ( ch_query, ch_eggnog, @@ -149,92 +163,6 @@ workflow GET_ORTHOLOGS { ch_versions = ch_versions.mix(FETCH_EGGNOG_GROUP_LOCAL.out.versions) } - else { // online/local separation is used - // local only - if (params.local_databases) { - if (!params.skip_oma) { - FETCH_OMA_GROUP_LOCAL ( - ch_query, - ch_oma_groups, - ch_oma_uniprot, - ch_oma_ensembl, - ch_oma_refseq - ) - - ch_orthogroups - .mix(FETCH_OMA_GROUP_LOCAL.out.oma_group) - .set { ch_orthogroups } - - ch_versions = ch_versions.mix(FETCH_OMA_GROUP_LOCAL.out.versions) - } - - if (!params.skip_panther) { - FETCH_PANTHER_GROUP_LOCAL ( - ch_query, - ch_panther - ) - - ch_orthogroups - .mix(FETCH_PANTHER_GROUP_LOCAL.out.panther_group) - .set { ch_orthogroups } - - ch_versions = ch_versions.mix(FETCH_PANTHER_GROUP_LOCAL.out.versions) - } - - if(!params.skip_eggnog) { - FETCH_EGGNOG_GROUP_LOCAL ( - ch_query, - ch_eggnog, - ch_eggnog_idmap, - ch_oma_ensembl, - ch_oma_refseq, - params.offline_run - ) - - ch_orthogroups - .mix(FETCH_EGGNOG_GROUP_LOCAL.out.eggnog_group) - .set { ch_orthogroups } - - ch_versions = ch_versions.mix(FETCH_EGGNOG_GROUP_LOCAL.out.versions) - } - } - else { // online only - if (!params.skip_oma) { - FETCH_OMA_GROUP_ONLINE ( - ch_query - ) - - ch_orthogroups - .mix(FETCH_OMA_GROUP_ONLINE.out.oma_group) - .set { ch_orthogroups } - - ch_versions = ch_versions.mix(FETCH_OMA_GROUP_ONLINE.out.versions) - } - if (!params.skip_panther) { - FETCH_PANTHER_GROUP_ONLINE ( - ch_query - ) - - ch_orthogroups - .mix(FETCH_PANTHER_GROUP_ONLINE.out.panther_group) - .set { ch_orthogroups } - - ch_versions = ch_versions.mix(FETCH_PANTHER_GROUP_ONLINE.out.versions) - } - if (!params.skip_orthoinspector) { - FETCH_INSPECTOR_GROUP_ONLINE ( - ch_query, - params.orthoinspector_version - ) - - ch_orthogroups - .mix(FETCH_INSPECTOR_GROUP_ONLINE.out.inspector_group) - .set { ch_orthogroups } - - ch_versions = ch_versions.mix(FETCH_INSPECTOR_GROUP_ONLINE.out.versions) - } - } - } // Result merging From 4e496df23a539afbc40846f4e2e6239ac4ae249c Mon Sep 17 00:00:00 2001 From: itrujnara Date: Tue, 28 May 2024 12:42:26 +0200 Subject: [PATCH 02/13] Removed superfluous channel prepopulation from report subworkflow --- subworkflows/local/report.nf | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/subworkflows/local/report.nf b/subworkflows/local/report.nf index b3c60ed..5a997de 100644 --- a/subworkflows/local/report.nf +++ b/subworkflows/local/report.nf @@ -30,25 +30,6 @@ workflow REPORT { ch_versions = Channel.empty() ch_fasta = ch_seqinfo.map { [it[0], []] } - if(params.skip_downstream) { - ch_seqhits = ch_seqinfo.map { [it[0], []] } - ch_seqmisses = ch_seqinfo.map { [it[0], []] } - ch_strhits = ch_seqinfo.map { [it[0], []] } - ch_strmisses = ch_seqinfo.map { [it[0], []] } - ch_alignment = ch_seqinfo.map { [it[0], []] } - } - else if(!params.use_structures) { - ch_strhits = ch_seqinfo.map { [it[0], []] } - ch_strmisses = ch_seqinfo.map { [it[0], []] } - } - - if (params.skip_iqtree) { - ch_iqtree = ch_seqinfo.map { [it[0], []] } - } - if (params.skip_fastme) { - ch_fastme = ch_seqinfo.map { [it[0], []] } - } - DUMP_PARAMS( ch_seqinfo.map { [it[0], it[3]] }, params.use_structures, From 56c86c927b17a1940c4db85d9a59230cb197f9bd Mon Sep 17 00:00:00 2001 From: itrujnara Date: Tue, 28 May 2024 12:57:50 +0200 Subject: [PATCH 03/13] Added tree channel pre-population --- subworkflows/local/make_trees.nf | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/subworkflows/local/make_trees.nf b/subworkflows/local/make_trees.nf index 6f60967..5d12701 100644 --- a/subworkflows/local/make_trees.nf +++ b/subworkflows/local/make_trees.nf @@ -11,10 +11,10 @@ workflow MAKE_TREES { main: ch_versions = Channel.empty() - ch_mltree = Channel.empty() - ch_metree = Channel.empty() - ch_mlplot = Channel.empty() - ch_meplot = Channel.empty() + ch_mltree = ch_alignment.map { [it[0], []] } + ch_metree = ch_alignment.map { [it[0], []] } + ch_mlplot = ch_alignment.map { [it[0], []] } + ch_meplot = ch_alignment.map { [it[0], []] } if (!params.skip_iqtree) { IQTREE ( @@ -26,8 +26,6 @@ workflow MAKE_TREES { ch_versions = ch_versions.mix(IQTREE.out.versions) - ch_mlplot = ch_alignment.map { [it[0], []] } - if(!params.skip_treeplots) { PLOT_IQTREE ( IQTREE.out.phylogeny, @@ -56,8 +54,6 @@ workflow MAKE_TREES { ch_versions = ch_versions.mix(FASTME.out.versions) - ch_meplot = ch_alignment.map { [it[0], []] } - if(!params.skip_treeplots) { PLOT_FASTME ( FASTME.out.nwk, From 7d6855e86b3a25adcb3e076e34cd71eb151035cf Mon Sep 17 00:00:00 2001 From: itrujnara Date: Tue, 28 May 2024 14:34:34 +0200 Subject: [PATCH 04/13] Improved parameter validation --- subworkflows/local/get_orthologs.nf | 18 ++++------- .../utils_nfcore_reportho_pipeline/main.nf | 30 +++++++++++++++++++ 2 files changed, 36 insertions(+), 12 deletions(-) diff --git a/subworkflows/local/get_orthologs.nf b/subworkflows/local/get_orthologs.nf index c098d3a..124b0c3 100644 --- a/subworkflows/local/get_orthologs.nf +++ b/subworkflows/local/get_orthologs.nf @@ -36,14 +36,12 @@ workflow GET_ORTHOLOGS { ch_eggnog = params.eggnog_path ? Channel.value(file(params.eggnog_path)) : Channel.empty() ch_eggnog_idmap = params.eggnog_idmap_path ? Channel.value(file(params.eggnog_idmap_path)) : Channel.empty() - fasta_input = true - ch_samplesheet_fasta.ifEmpty { - fasta_input = false - } - ch_samplesheet_fasta.view() - if (fasta_input && params.offline_run) { - log.warn("You are using FASTA input in an offline run. Online identification will be used. Be aware it might cause rate limit issues.") - } + ch_samplesheet_fasta.map { + if (params.offline_run) { + error "Tried to use FASTA input in an offline run. Aborting pipeline for user safety." + } + return it + }.set { ch_samplesheet_fasta } // Preprocessing - find the ID and taxid of the query sequences @@ -68,10 +66,6 @@ workflow GET_ORTHOLOGS { // Ortholog fetching - if(params.offline_run && params.use_all) { - log.warn("Both '--use_all' and '--offline_run' parameters have been specified!\nThose databases that can't be run offline will be run online.") - } - // OMA if (params.use_all || !params.skip_oma) { diff --git a/subworkflows/local/utils_nfcore_reportho_pipeline/main.nf b/subworkflows/local/utils_nfcore_reportho_pipeline/main.nf index 44dc7eb..d0e7824 100644 --- a/subworkflows/local/utils_nfcore_reportho_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_reportho_pipeline/main.nf @@ -73,6 +73,11 @@ workflow PIPELINE_INITIALISATION { nextflow_cli_args ) + // + // Validate parameters + // + validateParameters() + // // Create channel from input file provided through params.input and check for query // @@ -140,6 +145,31 @@ workflow PIPELINE_COMPLETION { ======================================================================================== */ +// +// Validate parameters +// +def validateParameters() { + validateOfflineSettings() +} + +def validateOfflineSettings() { + if (params.offline_run) { + if (!params.local_databases) { + params.local_databases = true + log.warn("Offline mode enabled, setting 'local_databases' to 'true'") + } + if (!params.skip_downstream) { + params.skip_downstream = true + log.warn("Offline mode enabled, setting 'skip_downstream' to 'true'") + } + if (params.use_all) { + log.warn("Offline run set with 'use_all', only local databases will be used") + } + } else if (params.use_all && params.local_databases) { + log.warn("Local databases set with 'use_all', only local databases will be used") + } +} + // // Validate channels from input samplesheet From 2117c6b2bc86babb50f2f09b0c71851a49964a30 Mon Sep 17 00:00:00 2001 From: itrujnara Date: Tue, 28 May 2024 15:20:49 +0200 Subject: [PATCH 05/13] Added more info to usage.md --- docs/usage.md | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/docs/usage.md b/docs/usage.md index 5919ae1..bc77a53 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -101,12 +101,18 @@ If you want to use local database copies for the run, you must provide the requi | `eggnog_path` | `1_members.tsv.gz` | | `eggnog_idmap_path` | `latest.Eukaryota.tsv.gz` | +If you need reduced versions of the local databases for testing, you can find them [here](https://github.com/nf-core/test-datasets/tree/reportho/testdata/databases). Note that they were designed to work with the [test samplesheet](https://github.com/nf-core/test-datasets/blob/reportho/testdata/samplesheet/samplesheet.csv) and will likely not provide any result for other queries. + ### Running offline -With large input sizes, you might want to run the pipeline locally, without runtime access to APIs. There are two main parameters used to achieve this. If you want to use local databases, set `--local_databases` to `true`. Remember to set `--use_all` to `false` to ensure the database step is run fully offline. If your input is especially large, you can also skip the initial online identification steps by setting `--offline_run` to `true`. Note that FASTA input will not work with this option enabled. You can check `test_offline.config` to see the required options for a fully offline run. Keep in mind that the options only affect ortholog finding, and the downstream analysis still requires connection to obtain sequences and structures. +With large input sizes, you might want to run the pipeline locally, without runtime access to APIs. There are two main parameters used to achieve this. If you want to use local databases, set `--local_databases` to `true`. Remember to set `--use_all` to `false` to ensure the database step is run fully offline. If your input is especially large, you can also skip the initial online identification steps by setting `--offline_run` to `true`. Note that FASTA input will not work with this option enabled, and the pipeline will be aborted if this is attempted. You can check `test_offline.config` to see the required options for a fully offline run. Keep in mind that the options only affect ortholog finding, and the downstream analysis still requires connection to obtain sequences and structures. While those options allow the pipeline to run its steps offline, the pipeline requires certain configuration files and container images that are downloaded from the internet. If you wish to run the pipeline on a machine without a connection, you can pre-download the required files with `nf-core download`. See [the nf-core tools documentation](https://nf-co.re/docs/nf-core-tools/pipelines/download) for details. +### Downstream analysis + +Downstream analysis relies on online resources to obtain sequences and structures, and thus cannot be run offline. For your convenience, it will be automatically disabled if you enable `offline_run`. Note that in case some sequences or structures cannot be obtained, the corresponding ortholog will be excluded from the alignment and phylogeny. In particular, only the orthologs with both a sequence and a structure available will be retained if `use_structures` is enabled. + ### Updating the pipeline When you run the above command, Nextflow automatically pulls the pipeline code from GitHub and stores it as a cached version. When running the pipeline after this, it will always use the cached version if available - even if the pipeline has been updated since. To make sure that you're running the latest version of the pipeline, make sure that you regularly update the cached version of the pipeline: From e527f3374503f1bd27793513f4a019cafeba0d09 Mon Sep 17 00:00:00 2001 From: itrujnara Date: Tue, 28 May 2024 15:41:57 +0200 Subject: [PATCH 06/13] Disabled some outdir outputs, with params to re-enable them --- conf/modules.config | 15 +++++++++++++++ nextflow.config | 2 ++ nextflow_schema.json | 12 ++++++++++++ 3 files changed, 29 insertions(+) diff --git a/conf/modules.config b/conf/modules.config index 614ed45..7c0a4ea 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -27,6 +27,7 @@ process { path: { "${params.outdir}/seqinfo" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + enabled: params.output_more || params.output_all ] errorStrategy = {task.exitStatus == 10 ? 'retry' : 'finish'} maxRetries = 3 @@ -37,6 +38,7 @@ process { path: { "${params.outdir}/orthologs/oma" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + enabled: params.output_more || params.output_all ] errorStrategy = {task.exitStatus == 10 ? 'retry' : 'finish'} maxRetries = 3 @@ -47,6 +49,7 @@ process { path: { "${params.outdir}/orthologs/panther" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + enabled: params.output_more || params.output_all ] errorStrategy = {task.exitStatus == 10 ? 'retry' : 'finish'} maxRetries = 3 @@ -57,6 +60,7 @@ process { path: { "${params.outdir}/orthologs/orthoinspector" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + enabled: params.output_more || params.output_all ] errorStrategy = {task.exitStatus == 10 ? 'retry' : 'finish'} maxRetries = 3 @@ -67,6 +71,7 @@ process { path: { "${params.outdir}/orthologs/eggnog" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + enabled: params.output_more || params.output_all ] errorStrategy = {task.exitStatus == 10 ? 'retry' : 'finish'} maxRetries = 3 @@ -78,6 +83,7 @@ process { path: { "${params.outdir}/orthologs/merge_csv" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + enabled: params.output_all ] } @@ -94,6 +100,7 @@ process { path: { "${params.outdir}/orthologs/filter_hits" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + enabled: params.output_more || params.output_all ] } @@ -110,6 +117,7 @@ process { path: { "${params.outdir}/orthologs/hits" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + enabled: params.output_all ] } @@ -128,6 +136,7 @@ process { path: { "${params.outdir}/orthologs/stats" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + enabled: params.output_all ] } @@ -136,6 +145,7 @@ process { path: { "${params.outdir}/orthologs/stats" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + enabled: params.output_all ] } @@ -178,6 +188,7 @@ process { path: { "${params.outdir}/alignment/filter" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + enabled: params.output_more || params.output_all ] } @@ -186,6 +197,7 @@ process { path: { "${params.outdir}/alignment/template" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + enabled: params.output_all ] } @@ -206,6 +218,7 @@ process { path: { "${params.outdir}/trees/convert" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + enabled: params.output_all ] } @@ -252,6 +265,7 @@ process { path: { "${params.outdir}/report/convert" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + enabled: params.output_all ] } @@ -260,6 +274,7 @@ process { path: { "${params.outdir}/report/params" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + enabled: params.output_more || params.output_all ] } diff --git a/nextflow.config b/nextflow.config index 303129b..1bdb0a2 100644 --- a/nextflow.config +++ b/nextflow.config @@ -10,6 +10,8 @@ params { // Input options input = null + output_more = false + output_all = false // MultiQC options multiqc_config = null diff --git a/nextflow_schema.json b/nextflow_schema.json index e93113e..be5ee4c 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -29,6 +29,18 @@ "description": "The output directory where the results will be saved. You have to use absolute paths to storage on Cloud infrastructure.", "fa_icon": "fas fa-folder-open" }, + "output_more": { + "type": "boolean", + "default": "false", + "description": "Output more files, including specific prediction lists.", + "fa_icon": "fas fa-folder-open" + }, + "output_all": { + "type": "boolean", + "default": "false", + "description": "Output all files, including intermediate files. Intended for debugging.", + "fa_icon": "fas fa-folder-open" + }, "email": { "type": "string", "description": "Email address for completion summary.", From b445b662edf9ec86b5455c277e9f931b98f464fe Mon Sep 17 00:00:00 2001 From: itrujnara Date: Tue, 28 May 2024 15:53:13 +0200 Subject: [PATCH 07/13] Tweaks in output options --- conf/modules.config | 80 ++++++-------------------------------------- nextflow.config | 3 +- nextflow_schema.json | 10 ++---- 3 files changed, 13 insertions(+), 80 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index 7c0a4ea..c207430 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -27,7 +27,7 @@ process { path: { "${params.outdir}/seqinfo" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - enabled: params.output_more || params.output_all + enabled: params.output_intermediates ] errorStrategy = {task.exitStatus == 10 ? 'retry' : 'finish'} maxRetries = 3 @@ -38,7 +38,7 @@ process { path: { "${params.outdir}/orthologs/oma" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - enabled: params.output_more || params.output_all + enabled: params.output_intermediates ] errorStrategy = {task.exitStatus == 10 ? 'retry' : 'finish'} maxRetries = 3 @@ -49,7 +49,7 @@ process { path: { "${params.outdir}/orthologs/panther" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - enabled: params.output_more || params.output_all + enabled: params.output_intermediates ] errorStrategy = {task.exitStatus == 10 ? 'retry' : 'finish'} maxRetries = 3 @@ -60,7 +60,7 @@ process { path: { "${params.outdir}/orthologs/orthoinspector" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - enabled: params.output_more || params.output_all + enabled: params.output_intermediates ] errorStrategy = {task.exitStatus == 10 ? 'retry' : 'finish'} maxRetries = 3 @@ -71,7 +71,7 @@ process { path: { "${params.outdir}/orthologs/eggnog" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - enabled: params.output_more || params.output_all + enabled: params.output_intermediates ] errorStrategy = {task.exitStatus == 10 ? 'retry' : 'finish'} maxRetries = 3 @@ -79,12 +79,6 @@ process { withName: 'MERGE_CSV' { ext.args = '-f 1 --outer-join --na 0' - publishDir = [ - path: { "${params.outdir}/orthologs/merge_csv" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - enabled: params.output_all - ] } withName: 'MAKE_SCORE_TABLE' { @@ -100,7 +94,7 @@ process { path: { "${params.outdir}/orthologs/filter_hits" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - enabled: params.output_more || params.output_all + enabled: params.output_intermediates ] } @@ -112,40 +106,13 @@ process { ] } - withName: 'MAKE_HITS_TABLE' { - publishDir = [ - path: { "${params.outdir}/orthologs/hits" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - enabled: params.output_all - ] - } - withName: 'MERGE_HITS' { ext.args = "-u 0 -k" ext.prefix = "aggregated_hits" - publishDir = [ - path: { "${params.outdir}/orthologs/hits" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - ] - } - - withName: 'MAKE_STATS' { publishDir = [ path: { "${params.outdir}/orthologs/stats" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - enabled: params.output_all - ] - } - - withName: 'STATS2CSV' { - publishDir = [ - path: { "${params.outdir}/orthologs/stats" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - enabled: params.output_all ] } @@ -165,7 +132,7 @@ process { withName: 'FETCH_SEQUENCES_ONLINE' { publishDir = [ - path: { "${params.outdir}/sequences" }, + path: { "${params.outdir}/alignment/sequences" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -175,7 +142,7 @@ process { withName: 'FETCH_AFDB_STRUCTURES' { publishDir = [ - path: { "${params.outdir}/structures" }, + path: { "${params.outdir}/alignment/structures" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } ] @@ -188,16 +155,7 @@ process { path: { "${params.outdir}/alignment/filter" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - enabled: params.output_more || params.output_all - ] - } - - withName: 'CREATE_TCOFFEETEMPLATE' { - publishDir = [ - path: { "${params.outdir}/alignment/template" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - enabled: params.output_all + enabled: params.output_intermediates ] } @@ -213,15 +171,6 @@ process { // Tree reconstruction // ---------------------- - withName: 'CONVERT_PHYLIP' { - publishDir = [ - path: { "${params.outdir}/trees/convert" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - enabled: params.output_all - ] - } - withName: 'IQTREE' { ext.args = '-m TEST' + (params.iqtree_bootstrap > 0 ? ' -bb ' + params.iqtree_bootstrap : '') publishDir = [ @@ -260,21 +209,12 @@ process { // Report generation // ---------------------- - withName: 'CONVERT_FASTA' { - publishDir = [ - path: { "${params.outdir}/report/convert" }, - mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - enabled: params.output_all - ] - } - withName: 'DUMP_PARAMS' { publishDir = [ path: { "${params.outdir}/report/params" }, mode: params.publish_dir_mode, saveAs: { filename -> filename.equals('versions.yml') ? null : filename } - enabled: params.output_more || params.output_all + enabled: params.output_intermediates ] } diff --git a/nextflow.config b/nextflow.config index 1bdb0a2..26c16f9 100644 --- a/nextflow.config +++ b/nextflow.config @@ -10,8 +10,7 @@ params { // Input options input = null - output_more = false - output_all = false + output_intermediates = false // MultiQC options multiqc_config = null diff --git a/nextflow_schema.json b/nextflow_schema.json index be5ee4c..abf877d 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -29,16 +29,10 @@ "description": "The output directory where the results will be saved. You have to use absolute paths to storage on Cloud infrastructure.", "fa_icon": "fas fa-folder-open" }, - "output_more": { + "output_intermediates": { "type": "boolean", "default": "false", - "description": "Output more files, including specific prediction lists.", - "fa_icon": "fas fa-folder-open" - }, - "output_all": { - "type": "boolean", - "default": "false", - "description": "Output all files, including intermediate files. Intended for debugging.", + "description": "Output certain potentially interesting intermediate files, including specific prediction lists.", "fa_icon": "fas fa-folder-open" }, "email": { From 43fa0b1bf37fe7f9dc69c5d73f698826c6a8e0bc Mon Sep 17 00:00:00 2001 From: itrujnara Date: Tue, 28 May 2024 15:56:14 +0200 Subject: [PATCH 08/13] Fixed typos in config --- conf/modules.config | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/conf/modules.config b/conf/modules.config index c207430..aba1e3e 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -26,7 +26,7 @@ process { publishDir = [ path: { "${params.outdir}/seqinfo" }, mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, enabled: params.output_intermediates ] errorStrategy = {task.exitStatus == 10 ? 'retry' : 'finish'} @@ -37,7 +37,7 @@ process { publishDir = [ path: { "${params.outdir}/orthologs/oma" }, mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, enabled: params.output_intermediates ] errorStrategy = {task.exitStatus == 10 ? 'retry' : 'finish'} @@ -48,7 +48,7 @@ process { publishDir = [ path: { "${params.outdir}/orthologs/panther" }, mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, enabled: params.output_intermediates ] errorStrategy = {task.exitStatus == 10 ? 'retry' : 'finish'} @@ -59,7 +59,7 @@ process { publishDir = [ path: { "${params.outdir}/orthologs/orthoinspector" }, mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, enabled: params.output_intermediates ] errorStrategy = {task.exitStatus == 10 ? 'retry' : 'finish'} @@ -70,7 +70,7 @@ process { publishDir = [ path: { "${params.outdir}/orthologs/eggnog" }, mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, enabled: params.output_intermediates ] errorStrategy = {task.exitStatus == 10 ? 'retry' : 'finish'} @@ -93,7 +93,7 @@ process { publishDir = [ path: { "${params.outdir}/orthologs/filter_hits" }, mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, enabled: params.output_intermediates ] } @@ -154,7 +154,7 @@ process { publishDir = [ path: { "${params.outdir}/alignment/filter" }, mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, enabled: params.output_intermediates ] } @@ -213,7 +213,7 @@ process { publishDir = [ path: { "${params.outdir}/report/params" }, mode: params.publish_dir_mode, - saveAs: { filename -> filename.equals('versions.yml') ? null : filename } + saveAs: { filename -> filename.equals('versions.yml') ? null : filename }, enabled: params.output_intermediates ] } From df4b8a60ffee6e8fbc92ba86c1cc03416b19e3d7 Mon Sep 17 00:00:00 2001 From: itrujnara Date: Tue, 28 May 2024 16:08:15 +0200 Subject: [PATCH 09/13] Added timeout handling to Python requests --- bin/utils.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/bin/utils.py b/bin/utils.py index 9d062ba..fc9a5a0 100644 --- a/bin/utils.py +++ b/bin/utils.py @@ -7,7 +7,6 @@ from typing import Any import requests -from requests.exceptions import RequestException POLLING_INTERVAL = 0.5 @@ -16,7 +15,10 @@ def safe_get(url: str): Get a URL and return the response. """ try: - return requests.get(url) + return requests.get(url, timeout = 300) + except requests.exceptions.Timeout as e: + print(f"Request timed out. This might be due to a server issue. If this persists, try again later. Details:\n{e}", file=sys.stderr) + sys.exit(9) except requests.exceptions.RequestException as e: print(f"A network issue occurred. Retrying request. Details:\n{e}", file=sys.stderr) sys.exit(10) @@ -27,7 +29,10 @@ def safe_post(url: str, data: dict = dict(), json: dict = dict()): Post data to a URL and return the response. """ try: - return requests.post(url, data=data, json=json) + return requests.post(url, data = data, json = json, timeout = 300) + except requests.exceptions.Timeout as e: + print(f"Request timed out. This might be due to a server issue. If this persists, try again later. Details:\n{e}", file=sys.stderr) + sys.exit(9) except requests.exceptions.RequestException as e: print(f"A network issue occurred. Retrying request. Details:\n{e}", file=sys.stderr) sys.exit(10) From 3b82f75303d8750f56cd133c3aa0a022ca355709 Mon Sep 17 00:00:00 2001 From: itrujnara Date: Tue, 28 May 2024 16:10:37 +0200 Subject: [PATCH 10/13] Added retry for timeout --- bin/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bin/utils.py b/bin/utils.py index fc9a5a0..4662722 100644 --- a/bin/utils.py +++ b/bin/utils.py @@ -18,7 +18,7 @@ def safe_get(url: str): return requests.get(url, timeout = 300) except requests.exceptions.Timeout as e: print(f"Request timed out. This might be due to a server issue. If this persists, try again later. Details:\n{e}", file=sys.stderr) - sys.exit(9) + sys.exit(10) except requests.exceptions.RequestException as e: print(f"A network issue occurred. Retrying request. Details:\n{e}", file=sys.stderr) sys.exit(10) @@ -32,7 +32,7 @@ def safe_post(url: str, data: dict = dict(), json: dict = dict()): return requests.post(url, data = data, json = json, timeout = 300) except requests.exceptions.Timeout as e: print(f"Request timed out. This might be due to a server issue. If this persists, try again later. Details:\n{e}", file=sys.stderr) - sys.exit(9) + sys.exit(10) except requests.exceptions.RequestException as e: print(f"A network issue occurred. Retrying request. Details:\n{e}", file=sys.stderr) sys.exit(10) From 8bb4dd0b9f2c86e94d11085a60e318b767523abb Mon Sep 17 00:00:00 2001 From: Igor Trujnara <53370556+itrujnara@users.noreply.github.com> Date: Tue, 28 May 2024 18:10:05 +0200 Subject: [PATCH 11/13] Update docs/usage.md Co-authored-by: Jose Espinosa-Carrasco --- docs/usage.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/usage.md b/docs/usage.md index bc77a53..0e76ab6 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -105,7 +105,7 @@ If you need reduced versions of the local databases for testing, you can find th ### Running offline -With large input sizes, you might want to run the pipeline locally, without runtime access to APIs. There are two main parameters used to achieve this. If you want to use local databases, set `--local_databases` to `true`. Remember to set `--use_all` to `false` to ensure the database step is run fully offline. If your input is especially large, you can also skip the initial online identification steps by setting `--offline_run` to `true`. Note that FASTA input will not work with this option enabled, and the pipeline will be aborted if this is attempted. You can check `test_offline.config` to see the required options for a fully offline run. Keep in mind that the options only affect ortholog finding, and the downstream analysis still requires connection to obtain sequences and structures. +With large input sizes, you might want to run the pipeline locally, without runtime access to APIs. There are two main parameters used to achieve this. If you want to use local databases, set `--local_databases` to `true`. Remember to set `--use_all` to `false` to ensure the database step is run fully offline. If your input is especially large, you can also skip the initial online identification steps by setting `--offline_run` to `true`. Note that FASTA input will not work with this option enabled, and the pipeline will be aborted if this is attempted. You can check [test_offline.config](https://github.com/nf-core/reportho/blob/master/conf/test_offline.config) to see the required options for a fully offline run. Keep in mind that the options only affect ortholog finding, and the downstream analysis still requires connection to obtain sequences and structures. While those options allow the pipeline to run its steps offline, the pipeline requires certain configuration files and container images that are downloaded from the internet. If you wish to run the pipeline on a machine without a connection, you can pre-download the required files with `nf-core download`. See [the nf-core tools documentation](https://nf-co.re/docs/nf-core-tools/pipelines/download) for details. From e61d5290155cb6d696c5c5bba9a5a4690f99b4b8 Mon Sep 17 00:00:00 2001 From: Igor Trujnara <53370556+itrujnara@users.noreply.github.com> Date: Tue, 28 May 2024 18:10:29 +0200 Subject: [PATCH 12/13] Update nextflow_schema.json Co-authored-by: Jose Espinosa-Carrasco --- nextflow_schema.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nextflow_schema.json b/nextflow_schema.json index abf877d..0a08e56 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -32,7 +32,7 @@ "output_intermediates": { "type": "boolean", "default": "false", - "description": "Output certain potentially interesting intermediate files, including specific prediction lists.", + "description": "Output intermediate files, including specific prediction lists.", "fa_icon": "fas fa-folder-open" }, "email": { From 824403cc62acbdaa30282b2d8f241fd02baada26 Mon Sep 17 00:00:00 2001 From: itrujnara Date: Tue, 28 May 2024 18:12:25 +0200 Subject: [PATCH 13/13] Added brief explanation of downstream --- docs/usage.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/usage.md b/docs/usage.md index 0e76ab6..cc4ee4d 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -111,7 +111,7 @@ While those options allow the pipeline to run its steps offline, the pipeline re ### Downstream analysis -Downstream analysis relies on online resources to obtain sequences and structures, and thus cannot be run offline. For your convenience, it will be automatically disabled if you enable `offline_run`. Note that in case some sequences or structures cannot be obtained, the corresponding ortholog will be excluded from the alignment and phylogeny. In particular, only the orthologs with both a sequence and a structure available will be retained if `use_structures` is enabled. +Downstream analysis (i.e. MSA and phylogeny) relies on online resources to obtain sequences and structures, and thus cannot be run offline. For your convenience, it will be automatically disabled if you enable `offline_run`. Note that in case some sequences or structures cannot be obtained, the corresponding ortholog will be excluded from the alignment and phylogeny. In particular, only the orthologs with both a sequence and a structure available will be retained if `use_structures` is enabled. ### Updating the pipeline