Merge pull request #45 from itrujnara/bug_fixes

Multiple fixes
nf-core · May 29, 2024 · cb220ff · cb220ff
2 parents 2da7da1 + 824403c
commit cb220ff
Show file tree

Hide file tree

Showing 9 changed files with 98 additions and 196 deletions.
diff --git a/bin/utils.py b/bin/utils.py
@@ -7,7 +7,6 @@
 from typing import Any
 
 import requests
-from requests.exceptions import RequestException
 
 POLLING_INTERVAL = 0.5
 
@@ -16,7 +15,10 @@ def safe_get(url: str):
     Get a URL and return the response.
     """
     try:
-        return requests.get(url)
+        return requests.get(url, timeout = 300)
+    except requests.exceptions.Timeout as e:
+        print(f"Request timed out. This might be due to a server issue. If this persists, try again later. Details:\n{e}", file=sys.stderr)
+        sys.exit(10)
     except requests.exceptions.RequestException as e:
         print(f"A network issue occurred. Retrying request. Details:\n{e}", file=sys.stderr)
         sys.exit(10)
@@ -27,7 +29,10 @@ def safe_post(url: str, data: dict = dict(), json: dict = dict()):
     Post data to a URL and return the response.
     """
     try:
-        return requests.post(url, data=data, json=json)
+        return requests.post(url, data = data, json = json, timeout = 300)
+    except requests.exceptions.Timeout as e:
+        print(f"Request timed out. This might be due to a server issue. If this persists, try again later. Details:\n{e}", file=sys.stderr)
+        sys.exit(10)
     except requests.exceptions.RequestException as e:
         print(f"A network issue occurred. Retrying request. Details:\n{e}", file=sys.stderr)
         sys.exit(10)

diff --git a/conf/modules.config b/conf/modules.config
@@ -26,7 +26,8 @@ process {
         publishDir = [
             path: { "${params.outdir}/seqinfo" },
             mode: params.publish_dir_mode,
-            saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
+            saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
+            enabled: params.output_intermediates
         ]
         errorStrategy = {task.exitStatus == 10 ? 'retry' : 'finish'}
         maxRetries = 3
@@ -36,7 +37,8 @@ process {
         publishDir = [
             path: { "${params.outdir}/orthologs/oma" },
             mode: params.publish_dir_mode,
-            saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
+            saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
+            enabled: params.output_intermediates
         ]
         errorStrategy = {task.exitStatus == 10 ? 'retry' : 'finish'}
         maxRetries = 3
@@ -46,7 +48,8 @@ process {
         publishDir = [
             path: { "${params.outdir}/orthologs/panther" },
             mode: params.publish_dir_mode,
-            saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
+            saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
+            enabled: params.output_intermediates
         ]
         errorStrategy = {task.exitStatus == 10 ? 'retry' : 'finish'}
         maxRetries = 3
@@ -56,7 +59,8 @@ process {
         publishDir = [
             path: { "${params.outdir}/orthologs/orthoinspector" },
             mode: params.publish_dir_mode,
-            saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
+            saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
+            enabled: params.output_intermediates
         ]
         errorStrategy = {task.exitStatus == 10 ? 'retry' : 'finish'}
         maxRetries = 3
@@ -66,19 +70,15 @@ process {
         publishDir = [
             path: { "${params.outdir}/orthologs/eggnog" },
             mode: params.publish_dir_mode,
-            saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
+            saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
+            enabled: params.output_intermediates
         ]
         errorStrategy = {task.exitStatus == 10 ? 'retry' : 'finish'}
         maxRetries = 3
     }
 
     withName: 'MERGE_CSV' {
         ext.args = '-f 1 --outer-join --na 0'
-        publishDir = [
-            path: { "${params.outdir}/orthologs/merge_csv" },
-            mode: params.publish_dir_mode,
-            saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
-        ]
     }
 
     withName: 'MAKE_SCORE_TABLE' {
@@ -93,7 +93,8 @@ process {
         publishDir = [
             path: { "${params.outdir}/orthologs/filter_hits" },
             mode: params.publish_dir_mode,
-            saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
+            saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
+            enabled: params.output_intermediates
         ]
     }
 
@@ -105,33 +106,9 @@ process {
         ]
     }
 
-    withName: 'MAKE_HITS_TABLE' {
-        publishDir = [
-            path: { "${params.outdir}/orthologs/hits" },
-            mode: params.publish_dir_mode,
-            saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
-        ]
-    }
-
     withName: 'MERGE_HITS' {
         ext.args = "-u 0 -k"
         ext.prefix = "aggregated_hits"
-        publishDir = [
-            path: { "${params.outdir}/orthologs/hits" },
-            mode: params.publish_dir_mode,
-            saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
-        ]
-    }
-
-    withName: 'MAKE_STATS' {
-        publishDir = [
-            path: { "${params.outdir}/orthologs/stats" },
-            mode: params.publish_dir_mode,
-            saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
-        ]
-    }
-
-    withName: 'STATS2CSV' {
         publishDir = [
             path: { "${params.outdir}/orthologs/stats" },
             mode: params.publish_dir_mode,
@@ -155,7 +132,7 @@ process {
 
     withName: 'FETCH_SEQUENCES_ONLINE' {
         publishDir = [
-            path: { "${params.outdir}/sequences" },
+            path: { "${params.outdir}/alignment/sequences" },
             mode: params.publish_dir_mode,
             saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
         ]
@@ -165,7 +142,7 @@ process {
 
     withName: 'FETCH_AFDB_STRUCTURES' {
         publishDir = [
-            path: { "${params.outdir}/structures" },
+            path: { "${params.outdir}/alignment/structures" },
             mode: params.publish_dir_mode,
             saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
         ]
@@ -177,15 +154,8 @@ process {
         publishDir = [
             path: { "${params.outdir}/alignment/filter" },
             mode: params.publish_dir_mode,
-            saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
-        ]
-    }
-
-    withName: 'CREATE_TCOFFEETEMPLATE' {
-        publishDir = [
-            path: { "${params.outdir}/alignment/template" },
-            mode: params.publish_dir_mode,
-            saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
+            saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
+            enabled: params.output_intermediates
         ]
     }
 
@@ -201,14 +171,6 @@ process {
     // Tree reconstruction
     // ----------------------
 
-    withName: 'CONVERT_PHYLIP' {
-        publishDir = [
-            path: { "${params.outdir}/trees/convert" },
-            mode: params.publish_dir_mode,
-            saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
-        ]
-    }
-
     withName: 'IQTREE' {
         ext.args = '-m TEST' + (params.iqtree_bootstrap > 0 ? ' -bb ' + params.iqtree_bootstrap : '')
         publishDir = [
@@ -247,19 +209,12 @@ process {
     // Report generation
     // ----------------------
 
-    withName: 'CONVERT_FASTA' {
-        publishDir = [
-            path: { "${params.outdir}/report/convert" },
-            mode: params.publish_dir_mode,
-            saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
-        ]
-    }
-
     withName: 'DUMP_PARAMS' {
         publishDir = [
             path: { "${params.outdir}/report/params" },
             mode: params.publish_dir_mode,
-            saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
+            saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
+            enabled: params.output_intermediates
         ]
     }
 

diff --git a/docs/usage.md b/docs/usage.md
@@ -101,12 +101,18 @@ If you want to use local database copies for the run, you must provide the requi
 | `eggnog_path`       | `1_members.tsv.gz`        |
 | `eggnog_idmap_path` | `latest.Eukaryota.tsv.gz` |
 
+If you need reduced versions of the local databases for testing, you can find them [here](https://github.com/nf-core/test-datasets/tree/reportho/testdata/databases). Note that they were designed to work with the [test samplesheet](https://github.com/nf-core/test-datasets/blob/reportho/testdata/samplesheet/samplesheet.csv) and will likely not provide any result for other queries.
+
 ### Running offline
 
-With large input sizes, you might want to run the pipeline locally, without runtime access to APIs. There are two main parameters used to achieve this. If you want to use local databases, set `--local_databases` to `true`. Remember to set `--use_all` to `false` to ensure the database step is run fully offline. If your input is especially large, you can also skip the initial online identification steps by setting `--offline_run` to `true`. Note that FASTA input will not work with this option enabled. You can check `test_offline.config` to see the required options for a fully offline run. Keep in mind that the options only affect ortholog finding, and the downstream analysis still requires connection to obtain sequences and structures.
+With large input sizes, you might want to run the pipeline locally, without runtime access to APIs. There are two main parameters used to achieve this. If you want to use local databases, set `--local_databases` to `true`. Remember to set `--use_all` to `false` to ensure the database step is run fully offline. If your input is especially large, you can also skip the initial online identification steps by setting `--offline_run` to `true`. Note that FASTA input will not work with this option enabled, and the pipeline will be aborted if this is attempted. You can check [test_offline.config](https://github.com/nf-core/reportho/blob/master/conf/test_offline.config) to see the required options for a fully offline run. Keep in mind that the options only affect ortholog finding, and the downstream analysis still requires connection to obtain sequences and structures.
 
 While those options allow the pipeline to run its steps offline, the pipeline requires certain configuration files and container images that are downloaded from the internet. If you wish to run the pipeline on a machine without a connection, you can pre-download the required files with `nf-core download`. See [the nf-core tools documentation](https://nf-co.re/docs/nf-core-tools/pipelines/download) for details.
 
+### Downstream analysis
+
+Downstream analysis (i.e. MSA and phylogeny) relies on online resources to obtain sequences and structures, and thus cannot be run offline. For your convenience, it will be automatically disabled if you enable `offline_run`. Note that in case some sequences or structures cannot be obtained, the corresponding ortholog will be excluded from the alignment and phylogeny. In particular, only the orthologs with both a sequence and a structure available will be retained if `use_structures` is enabled.
+
 ### Updating the pipeline
 
 When you run the above command, Nextflow automatically pulls the pipeline code from GitHub and stores it as a cached version. When running the pipeline after this, it will always use the cached version if available - even if the pipeline has been updated since. To make sure that you're running the latest version of the pipeline, make sure that you regularly update the cached version of the pipeline:

diff --git a/nextflow.config b/nextflow.config
@@ -10,6 +10,7 @@
 params {
     // Input options
     input                      = null
+    output_intermediates       = false
 
     // MultiQC options
     multiqc_config              = null

diff --git a/nextflow_schema.json b/nextflow_schema.json
@@ -29,6 +29,12 @@
                     "description": "The output directory where the results will be saved. You have to use absolute paths to storage on Cloud infrastructure.",
                     "fa_icon": "fas fa-folder-open"
                 },
+                "output_intermediates": {
+                    "type": "boolean",
+                    "default": "false",
+                    "description": "Output intermediate files, including specific prediction lists.",
+                    "fa_icon": "fas fa-folder-open"
+                },
                 "email": {
                     "type": "string",
                     "description": "Email address for completion summary.",