From ef28ccf22ecb9a6f89a9aaafeab356e3863bff7e Mon Sep 17 00:00:00 2001 From: npappas Date: Tue, 20 Oct 2020 10:12:29 +0200 Subject: [PATCH 01/15] added dockerfile for seeker --- phage-tool-Dockerfiles/seeker/Dockerfile | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 phage-tool-Dockerfiles/seeker/Dockerfile diff --git a/phage-tool-Dockerfiles/seeker/Dockerfile b/phage-tool-Dockerfiles/seeker/Dockerfile new file mode 100644 index 0000000..a02936f --- /dev/null +++ b/phage-tool-Dockerfiles/seeker/Dockerfile @@ -0,0 +1,14 @@ +FROM continuumio/miniconda3:latest + +RUN conda config --add channels conda-forge && \ + conda config --add channels default && \ + conda create -y --name seeker python=3.7 pip && \ + conda clean --all + +ENV PATH /opt/conda/envs/seeker/bin:$PATH + +SHELL ["conda", "run", "-n", "seeker", "/bin/bash", "-c"] + +RUN pip install --no-cache-dir --use-feature=2020-resolver seeker==1.0.3 + + From 74e243ca4e2a1fa8de6c3dfcbeafd302df56e3ec Mon Sep 17 00:00:00 2001 From: npappas Date: Tue, 20 Oct 2020 10:25:46 +0200 Subject: [PATCH 02/15] initial seeker module --- modules/tools/seeker.nf | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 modules/tools/seeker.nf diff --git a/modules/tools/seeker.nf b/modules/tools/seeker.nf new file mode 100644 index 0000000..7a3975d --- /dev/null +++ b/modules/tools/seeker.nf @@ -0,0 +1,13 @@ +process seeker { + label 'seeker' + errorStrategy 'ignore' + input: + tuple val(name), file(fasta) + output: + tuple val(name), file("${name}_*.list") + script: + """ + predict-metagenome ${fasta} > ${name}.tsv + cp ${name}.tsv ${name}_\${PWD##*/}.list + """ +} From bffc31f7505c54655b53451815825d8add580d51 Mon Sep 17 00:00:00 2001 From: npappas Date: Tue, 20 Oct 2020 11:04:19 +0200 Subject: [PATCH 03/15] include seeker subworfklow in main --- phage.nf | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/phage.nf b/phage.nf index da3af2e..4e11d7f 100755 --- a/phage.nf +++ b/phage.nf @@ -192,6 +192,7 @@ if (!params.setup && !workflow.profile.contains('test') && !workflow.profile.con include { virsorter2 } from './modules/tools/virsorter2' include { filter_virsorter2 } from './modules/parser/filter_virsorter2' include { virsorter2_collect_data} from './modules/raw_data_collection/virsorter2_collect_data' + include { seeker } from '.modules/tools/seeker' /************* * DATABASES for Phage Identification @@ -576,7 +577,17 @@ workflow phigaro_wf { } else { phigaro_results = Channel.from( [ 'deactivated', 'deactivated'] ) } emit: phigaro_results -} +} + +workflow seeker_wf { + take: fasta + main: if (!params.sk) { + // run and filter seeker + filter_seeker(seeker(fasta).groupTuple(remainder: true)) + // results channel + seeker_results = filter_seeker.out + else { seeker_results = Channel.from( ['deactivated', 'deactivated'] ) } + emit: seeker_results workflow setup_wf { take: @@ -876,4 +887,4 @@ if (!params.setup) { log.info ( workflow.success ? "\nDone! Results are stored here --> $params.output \nThank you for using What the Phage\n \nPlease cite us: https://doi.org/10.1101/2020.07.24.219899 \ \n\nPlease also cite the other tools we use in our workflow --> $params.output/literature \n" : "Oops .. something went wrong" ) } -} \ No newline at end of file +} From 8eab679b9808e87454cd9d80b919b67d6386422a Mon Sep 17 00:00:00 2001 From: npappas Date: Tue, 20 Oct 2020 11:04:51 +0200 Subject: [PATCH 04/15] add seeker config options --- nextflow.config | 3 +++ 1 file changed, 3 insertions(+) diff --git a/nextflow.config b/nextflow.config index e7f96b6..19c0f0b 100644 --- a/nextflow.config +++ b/nextflow.config @@ -38,6 +38,7 @@ params { vb = false vn = false ph = false + sk = false identify = false annotate = false virome = false @@ -51,6 +52,7 @@ params { sm_filter = '0.5' vn_filter = '0.5' vs2_filter = '0.9' + sk_filter = '0.5' // pp_filter = '' // vb_filter = '' @@ -146,6 +148,7 @@ profiles { vb = true vn = true dv = true + sk = true anno = true fasta = 'test-data/all_pos_phage.fa' } From 0013b596fb6edf3e5cfee753fd67d5594b8dfe0a Mon Sep 17 00:00:00 2001 From: npappas Date: Tue, 20 Oct 2020 11:05:28 +0200 Subject: [PATCH 05/15] added filtering for seeker --- modules/parser/filter_seeker.nf | 34 +++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 modules/parser/filter_seeker.nf diff --git a/modules/parser/filter_seeker.nf b/modules/parser/filter_seeker.nf new file mode 100644 index 0000000..310aff5 --- /dev/null +++ b/modules/parser/filter_seeker.nf @@ -0,0 +1,34 @@ +process filter_seeker { + label 'ubuntu' + input: + tuple val(name), file(results) + output: + tuple val(name), file("seeker_*.txt") + shell: + """ + tail -n+2 *.list | sort -gr -k3 | awk '\$3>=${params.sk_filter}' | awk '{ print \$1 }' > seeker_\${PWD##*/}.txt + """ +} + +/* +raw output: + +$ predict-metagenome example_input/PGE.txt > PGE.out.tsv +$ cat PGE.out.tsv +name prediction score +MH356729.1 Phage 0.85 +LC333428.1 Phage 0.79 +MK903728.1 Phage 0.94 +MN016939.1 Bacteria 0.34 +MN095770.1 Phage 0.82 +MN095772.1 Phage 0.84 +MN176219.1 Phage 0.52 +MN310548.1 Phage 0.62 +MN379739.1 Phage 0.82 +MN419153.1 Phage 0.89 + +"Sequences with scores above 0.5 are predicted phages, +while sequences with scores below 0.5 are predicted bacteria." + +*/ + From c98d393b00a2c9c947eff3b2d3075c7f53f05bd9 Mon Sep 17 00:00:00 2001 From: npappas Date: Tue, 20 Oct 2020 11:06:03 +0200 Subject: [PATCH 06/15] update configs for seeker --- configs/container.config | 1 + configs/local.config | 1 + configs/node.config | 3 ++- 3 files changed, 4 insertions(+), 1 deletion(-) diff --git a/configs/container.config b/configs/container.config index 2d27ffa..e9ba554 100644 --- a/configs/container.config +++ b/configs/container.config @@ -22,5 +22,6 @@ process { withLabel: virsorter { container = 'multifractal/virsorter:0.1.2' } withLabel: phigaro { container = 'multifractal/phigaro:0.5.2' } withLabel: virsorter2 { container = 'multifractal/virsorter-2:0.1' } + withLabel: seeker { container = 'papanikos/seeker:1.0.3' } } diff --git a/configs/local.config b/configs/local.config index cd4718f..abbb9e8 100644 --- a/configs/local.config +++ b/configs/local.config @@ -22,4 +22,5 @@ process { withLabel: virnet { cpus = 4 } withLabel: virsorter { cpus = params.cores } withLabel: phigaro { cpus = params.cores } + withLabel: seeker { cpus = params.cores } } diff --git a/configs/node.config b/configs/node.config index ed6ecaa..caf992e 100644 --- a/configs/node.config +++ b/configs/node.config @@ -24,4 +24,5 @@ process { withLabel: virsorter { cpus = 24; memory = '32 GB' } withLabel: noDocker { cpus = 4; memory = '4 GB' } withLabel: phigaro { cpus = 24; memory = '32 GB' } -} \ No newline at end of file + withLabel: seeker { cpus = 24; memory = '32 GB' } +} From 850539c3497b54ca63e23eff2f03942a21067af8 Mon Sep 17 00:00:00 2001 From: npappas Date: Tue, 20 Oct 2020 11:27:50 +0200 Subject: [PATCH 07/15] add missing brace --- phage.nf | 1 + 1 file changed, 1 insertion(+) diff --git a/phage.nf b/phage.nf index 4e11d7f..08e9c60 100755 --- a/phage.nf +++ b/phage.nf @@ -586,6 +586,7 @@ workflow seeker_wf { filter_seeker(seeker(fasta).groupTuple(remainder: true)) // results channel seeker_results = filter_seeker.out + } else { seeker_results = Channel.from( ['deactivated', 'deactivated'] ) } emit: seeker_results From d9bf4a2b8370a6ead593b2345a6f337536a5919b Mon Sep 17 00:00:00 2001 From: npappas Date: Tue, 20 Oct 2020 12:10:34 +0200 Subject: [PATCH 08/15] update identify_fasta_msf, help msg, includes --- phage.nf | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/phage.nf b/phage.nf index 08e9c60..b22ba91 100755 --- a/phage.nf +++ b/phage.nf @@ -90,7 +90,7 @@ else { exit 1, "No executer selected: -profile EXECUTER,ENGINE" } if (!params.setup && !workflow.profile.contains('test') && !workflow.profile.contains('smalltest')) { if ( !params.fasta && !params.fastq ) { exit 1, "input missing, use [--fasta] or [--fastq]"} - if ( params.ma && params.mp && params.vf && params.vs && params.pp && params.dv && params.sm && params.vn && params.vb && params.ph ) { + if ( params.ma && params.mp && params.vf && params.vs && params.pp && params.dv && params.sm && params.vn && params.vb && params.ph && params.sk ) { exit 0, "What the... you deactivated all the tools"} } @@ -192,7 +192,8 @@ if (!params.setup && !workflow.profile.contains('test') && !workflow.profile.con include { virsorter2 } from './modules/tools/virsorter2' include { filter_virsorter2 } from './modules/parser/filter_virsorter2' include { virsorter2_collect_data} from './modules/raw_data_collection/virsorter2_collect_data' - include { seeker } from '.modules/tools/seeker' + include { seeker } from './modules/tools/seeker' + include { filter_seeker } from './modules/parser/filter_seeker' /************* * DATABASES for Phage Identification @@ -589,6 +590,7 @@ workflow seeker_wf { } else { seeker_results = Channel.from( ['deactivated', 'deactivated'] ) } emit: seeker_results +} workflow setup_wf { take: @@ -684,6 +686,7 @@ workflow identify_fasta_MSF { .concat(vibrant_virome_wf(fasta_validation_wf.out, vibrant_DB)) .concat(virnet_wf(fasta_validation_wf.out)) .concat(phigaro_wf(fasta_validation_wf.out)) + .concat(seeker_wf(fasta_validation_wf.out)) .filter { it != 'deactivated' } // removes deactivated tool channels .groupTuple() @@ -855,6 +858,7 @@ def helpMSG() { --vn deactivates virnet --vs deactivates virsorter --ph deactivates phigaro + --sk deactivates seeker Adjust tools individually --virome deactivates virome-mode (vibrand and virsorter) @@ -864,6 +868,7 @@ def helpMSG() { --vs2_filter dsDNAphage score cut-off [default: $params.vs2_filter] --sm_filter Similarity score [default: $params.sm_filter] --vn_filter Score [default: $params.vn_filter] + --sk_filter score cut-off [default: $params.sk_filter] Workflow control: --identify only phage identification, skips analysis From 978e631835ef202ed5a35b7ac50b046e8c8b51d2 Mon Sep 17 00:00:00 2001 From: npappas Date: Tue, 20 Oct 2020 14:42:34 +0200 Subject: [PATCH 09/15] added seeker citation --- docs/Citations.bib | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/docs/Citations.bib b/docs/Citations.bib index 793bb4f..89ae437 100644 --- a/docs/Citations.bib +++ b/docs/Citations.bib @@ -203,3 +203,16 @@ @article{starikova2020phigaro publisher={Oxford University Press} } +@article{auslander2020seeker, + title={Seeker: alignment-free identification of bacteriophage genomes by deep learning}, + author={Auslander, Noam and Gussow, Ayal B and Benler, Sean and Wolf, Yuri I and Koonin, Eugene V}, + journal={Nucleic Acids Research}, + year={2020}, + month={10}, + issn={0305-1048}, + doi={10.1093/nar/gkaa856}, + url={https://doi.org/10.1093/nar/gkaa856}, + note={gkaa856}, + publisher={Oxford University Press} +} + From 86d73e165704ec94abe3bba17caebbb4595a6f31 Mon Sep 17 00:00:00 2001 From: npappas Date: Tue, 20 Oct 2020 14:43:25 +0200 Subject: [PATCH 10/15] fix dockerfile for ps --- phage-tool-Dockerfiles/seeker/Dockerfile | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/phage-tool-Dockerfiles/seeker/Dockerfile b/phage-tool-Dockerfiles/seeker/Dockerfile index a02936f..d47069c 100644 --- a/phage-tool-Dockerfiles/seeker/Dockerfile +++ b/phage-tool-Dockerfiles/seeker/Dockerfile @@ -1,5 +1,9 @@ FROM continuumio/miniconda3:latest +RUN apt-get update -y && apt-get install -y procps && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* + RUN conda config --add channels conda-forge && \ conda config --add channels default && \ conda create -y --name seeker python=3.7 pip && \ From f730cc604c962e80dfd2b960741277f5277998b8 Mon Sep 17 00:00:00 2001 From: npappas Date: Tue, 20 Oct 2020 15:24:56 +0200 Subject: [PATCH 11/15] include seeker in descriptions, minor style fixes --- README.md | 128 +++++++++++++++++++++++++++++++++++------------------- 1 file changed, 83 insertions(+), 45 deletions(-) diff --git a/README.md b/README.md index 39a4fa8..f3ea00b 100644 --- a/README.md +++ b/README.md @@ -14,13 +14,16 @@ [![Twitter Follow](https://img.shields.io/twitter/follow/mult1fractal.svg?style=social)](https://twitter.com/mult1fractal) # What the Phage (WtP) + * by Christian Brandt & Mike Marquet * **this tool is under active development,feel free to report issues and add suggestions** -* use a release candidate for a stable experience via `-r` e.g. `-r v0.9.0` - * these are extensively tested release versions of WtP +* Use a release candidate for a stable experience via `-r` e.g. `-r v0.9.0` + * These are extensively tested release versions of WtP * [releases of WtP are here](https://github.com/replikation/What_the_Phage/releases) + ## Preprint: + > **What the Phage: A scalable workflow for the identification and analysis of phage sequences** > > M. Marquet, M. Hölzer, M. W. Pletz, A. Viehweger, O. Makarewicz, R. Ehricht, C. Brandt @@ -28,7 +31,7 @@ > doi: https://doi.org/10.1101/2020.07.24.219899 -# Table of content +# Table of contents * [What is this Repo?](#What-is-this-Repo) * [Installation](#Installation) @@ -41,7 +44,7 @@ * [Workflow control](#Workflow-control) * [Profiles](#Profiles) * [Data Handling](#Data-handling) - * [Pre-download for Offline-mode](#Pre-download-for-Offline-mode) + * [Pre-download for offline-mode](#Pre-download-for-offline-mode) * [Results / Examples](#Example-/-results) * [Under the hood](#Under-the-hood) * [Included bioinformatic tools](#Included-bioinformatic-tools) @@ -50,11 +53,11 @@ # What is this repo #### TL;DR -* WtP is a scalable and easy-to-use workflow for phage identification and analysis. Our tool currently combines 9 established phage [identification tools](#included-bioinformatic-tools) +* WtP is a scalable and easy-to-use workflow for phage identification and analysis. Our tool currently combines 10 established phage [identification tools](#included-bioinformatic-tools) * An attempt to streamline the usage of various phage identification and prediction tools * The main focus is stability and data filtering/analysis for the user * The tool is intended for fasta and fastq reads to identify phages in contigs/reads -* a proper Prophage detection is not implemented (yet) - but a handful of tools report them - so they are mostly identified +* Proper prophage detection is not implemented (yet) - but a handful of tools report them - so they are mostly identified # Installation @@ -85,28 +88,32 @@ sudo usermod -a -G docker $USER > * tar (should be already installed) * Choose one: -> * [Docker installation](https://docs.docker.com/v17.09/engine/installation/linux/docker-ce/ubuntu/#install-docker-ce) -> * add docker to your User group via `sudo usermod -a -G docker $USER` + +> * [Docker installation](https://docs.docker.com/v17.09/engine/installation/linux/docker-ce/ubuntu/#install-docker-ce) +> * add docker to your User group via `sudo usermod -a -G docker $USER` > * [Singularity installation](https://github.com/sylabs/singularity/blob/master/INSTALL.md) * Restart your computer -* Try out the installation by entering the following (analyses 1 sample with 10 phage sequences ~ 30 min runtime) +* Try out the installation by entering the following (analyses 1 sample with 10 phage sequences ~ 30 min runtime): ```shell # for docker (local use) -nextflow run replikation/What_the_Phage -r v0.8.0 --cores 8 -profile smalltest,local,docker +nextflow run replikation/What_the_Phage -r v0.9.0 --cores 8 -profile smalltest,local,docker + # for singularity (slurm use) -nextflow run replikation/What_the_Phage -r v0.8.0 --cores 8 -profile smalltest,slurm,singularity +nextflow run replikation/What_the_Phage -r v0.9.0 --cores 8 -profile smalltest,slurm,singularity ``` # Execution / Examples / Help ## Call help via "--help" + ```bash -nextflow run replikation/What_the_Phage -r v0.8.0 --help +nextflow run replikation/What_the_Phage -r v0.9.0 --help ``` ## Quick execution + * Just give me the command god dammit..... ```bash @@ -115,23 +122,27 @@ nextflow run \ # calling the workflow --fasta /path/to/file.fa \ # provide a fasta-file as input --cores 8 \ # number of cores you want to use -profile local,docker # choose the environment:local and docker - -r v0.8.0 # WtP release version + -r v0.9.0 # WtP release version ``` - - - ## Advanced execution ### Advanced execution command -* e.g.: + +* The following command will run WtP using + * ... all `fasta` files found in the `/path/to` directory. + * ... a `local` host (the machine where it is launched) and `docker` containers for handling software dependencies. + * ... 20 cores for parallel execution. + * ... release `0.9.0` of the worfklow. + * ... identification only - no annotation. + * ... and excluding `deepvirfinder`, `virfinder` and `marvel` from the list of tools. ```shell nextflow run replikation/What_the_Phage \ --fasta '/path/to/*.fasta' \ -profile local,docker \ --cores 20 \ - -r v0.8.0 \ + -r v0.9.0 \ --anno \ --dv \ --vf \ @@ -139,9 +150,12 @@ nextflow run replikation/What_the_Phage \ ``` * The order of flags does not matter + ### Inputs + * Input examples: * wildcards need single quotes around the path (`'`) + ```bash --fasta /path/to/phage-assembly.fa # path to your fasta-file --fasta '/path/to/*.fa' # path to all .fa files in a dir @@ -150,6 +164,7 @@ nextflow run replikation/What_the_Phage \ ``` ### Workflow control + * Turn on/off tools (check `--help` for more) ```bash @@ -162,44 +177,53 @@ nextflow run replikation/What_the_Phage \ --vf # deactivates virfinder --vn # deactivates virnet --vs # deactivates virsorter - --ph # deactivates phigaro + --ph # deactivates phigaro + --sk # deactivates seeker --identify # only phage identification, skips analysis --annotate # only annotation, skips phage identification ``` -* min size of contigs for identification +* Set the minimum size of contigs for identification ```bash ---filter # min contig size [bp] to analyse +--filter # min contig size [bp] to analyse [default: 1500] ``` + ### Profiles -1. Choose the environment: local, slurm, lsf or ebi -2. Choose the engine: docker or singularity -* examples: + +1. Choose the environment: `local`, `slurm`, `lsf` or `ebi` +2. Choose the engine: `docker` or `singularity` + +Examples: + ```bash -profile local,docker -profile local,singularity -profile lsf,docker ``` + ### Release candidate * A release candidate is a [released version of WtP](https://github.com/replikation/What_the_Phage/releases) which ensures proper functionality -* version control ensures reproducibility as each tools version is also "locked" within the release candidate - * databases have no automatic version control (they are downloaded from the source) - * if you need version control for databases, just make a copy of the database dir after download - * you can specify the database dir via the `--database` flag (see below) - * WtP only downloads a database if it's missing, it is not "auto-updating" them -* add this flag to your command and a specific release is used instead +* Version control ensures reproducibility as each tools version is also "locked" within the release candidate + * Databases have no automatic version control (they are downloaded from the source) + > If you need version control for databases, just make a copy of the database dir after download + > you can specify the database directory via the `--database` flag (see below) + * WtP only downloads a database if it's missing. It is **not** "auto-updating" them +* A release can be specified in the command line, using the `-r` flag. For example: + ```bash -r v0.8.0 ``` + ### Data handling * WtP handles everything by default * If you need to change paths use the following commands - * It's useful to specify `--workdir` to your current working dir if `/tmp` (default) has limited space + * It's useful to set the parameter `--workdir` to your current working directory if `/tmp` (default) has limited space. + ```bash --workdir /path/to/dir # defines the path where nextflow writes temporary files, default: '/tmp/nextflow-phage-$USER' --databases /path/to/dir # specify download location of databases, default './nextflow-autodownload-databases' @@ -207,35 +231,44 @@ nextflow run replikation/What_the_Phage \ --output results # path of the outdir, default './results' ``` +### Pre-download for offline-mode -### Pre-download for Offline-mode - -* `--setup` skips analysis and just downloads all databases and containers +* The flag `--setup` skips analysis and just downloads all databases and containers * Needs roughly 30 GB storage for databases, excluding programs ```bash nextflow run replikation/What_the_Phage --setup -r v0.8.0 ``` -* you can change the database download location via (--databases) -* make sure that you specify the database location when executing WtP, if you change the default path -* singularity images sometimes fail during building, just try to re-execute `--setup` - * WtP attempts to build images up to 3 times, image building is individually skipped if present +* You can specify the location where all databases are stored, by providing a location with the `--databases` parameter. +* Make sure that you specify the database location when executing WtP, if you change the default path. +* Singularity images sometimes fail during building, just try to re-execute with `--setup`. + * WtP attempts to build images up to 3 times, image building is individually skipped if present. + # Example results #### 1. Identification Tool and contig overview (UpSetR) ![plot](figures/plot.svg) -*Figure 1:* This chart (UpSetR plot) quantifies the result-intersections of the phage identification tools, similar to a Venn diagram. The amount of positive phage-sequences identified by each tool is represented on the left barplot in blue. The dot plot shows via line connection(s) which of the tools identified the exact same positive phage sequences. The amount of these shared matches is quantified as a barplot above each corresponding dot pattern. +*Figure 1:* This chart (UpSetR plot) quantifies the result-intersections of the phage identification tools, similar to a Venn diagram. +The number of positive phage-sequences identified by each tool is represented on the left barplot in blue. +The dot plot shows via line connection(s) which of the tools identified the exact same positive phage sequences. +The amount of these shared matches is quantified as a barplot above each corresponding dot pattern. + #### 2. Annotation Visualization (Chromomap) -* [chromomap results](https://replikation.github.io/What_the_Phage/index.html) +* [Chromomap results](https://replikation.github.io/What_the_Phage/index.html) + +*See Link:* The graphical output of the annotation shows an overview of the individual loci of the predicted ORFs and the corresponding genes in the fasta sequences identified as phages. +For better visibility, we have chosen 4 categories: tail, capsid, baseplate and other. +This output can be used to verify the identified sequences (if the predicted sequences make sense or not). +The annotation results are additionally plotted in an interactive HTML-file and are available as a file for further analysis. -*See Link:* The graphical output of the annotation shows an overview of the individual loci of the predicted ORFs and the corresponding genes in the fasta sequences identified as phages. For a better visibility, we have chosen 4 categories tail, capsid, baseplate, and other. This output can be used to verify the identified sequences (if the predicted sequences make sense or not). The annotation results are additionally plotted in an interactive HTML-file and are available as a file for further analysis. #### 3. Summary Table (checkV + Results) -* check [CheckV](https://bitbucket.org/berkeleylab/checkv/src/master/) for a detailed explanation + +* Check [CheckV](https://bitbucket.org/berkeleylab/checkv/src/master/) for a detailed explanation. contig_id| contig_length| genome_copies| gene_count| viral_genes| host_genes| checkv_quality| miuvig_quality| completeness| completeness_method| contamination| provirus| |-|-|-|-|-|-|-|-|-|-|-|-| @@ -258,13 +291,14 @@ pos_phage_9| 221908| 1| 310| 48| 9| High-quality| High-quality| 100| AAI-based| *Figure 3:* This plot shows a simplified dag-chart of WtP for better understanding of what's going on behind the curtain. - # Included bioinformatic tools + * Please cite the following tools ### Identification -Toolname/Gitlink | Reference | + +Toolname/Gitlink | Reference | |-|-| [MARVEL](https://github.com/LaboratorioBioinformatica/MARVEL#metagenomic-analysis-and-retrieval-of-viral-elements)|[MARVEL, a Tool for Prediction of Bacteriophage Sequences in Metagenomic Bins](https://www.frontiersin.org/articles/10.3389/fgene.2018.00304/full) [VirFinder](https://github.com/jessieren/VirFinder)|[VirFinder: R package for identifying viral sequences from metagenomic data using sequence signatures](https://link.springer.com/epdf/10.1186/s40168-017-0283-5?) @@ -277,17 +311,21 @@ Toolname/Gitlink | Reference | [VirNet](https://github.com/alyosama/virnet)|[Deep attention model for viral reads identification](https://ieeexplore.ieee.org/document/8639400) [Phigaro](https://github.com/bobeobibo/phigaro)| [Phigaro: high throughput prophage sequence annotation](https://www.biorxiv.org/content/10.1101/598243v1) [Virsorter2 beta](https://github.com/jiarong/VirSorter2)| - +[Seeker](https://github.com/gussow/seeker)|[Seeker: alignment-free identification of bacteriophage genomes by deep learning](https://doi.org/10.1093/nar/gkaa856)| ### Annotation & classification + Toolname/Git | Reference |-|-| [prodigal](https://github.com/hyattpd/Prodigal)|[Prodigal: prokaryotic gene recognition and translation initiation site identification](https://bmcbioinformatics.biomedcentral.com/articles/10.1186/1471-2105-11-119) [hmmer](http://hmmer.org/)|[nhmmer: DNA homology search with profile HMMs](https://academic.oup.com/bioinformatics/article/29/19/2487/186765) [chromomap](https://cran.r-project.org/web/packages/chromoMap/vignettes/chromoMap.html)| [CheckV](https://bitbucket.org/berkeleylab/checkv/src/master/)|[CheckV: assessing the quality of metagenome-assembled viral genomes](https://www.biorxiv.org/content/10.1101/2020.05.06.081778v1) + + ### Other tools + Toolname/Git | Reference |-|-| [samtools](https://github.com/samtools/samtools)|[The Sequence Alignment/Map format and SAMtools](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC2723002/) From 0f0ca691cda1c36dcaaf0f0d7a6fbdbcda570018 Mon Sep 17 00:00:00 2001 From: npappas Date: Tue, 20 Oct 2020 15:40:35 +0200 Subject: [PATCH 12/15] fix whitespace --- configs/container.config | 2 +- configs/local.config | 2 +- configs/node.config | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/configs/container.config b/configs/container.config index e9ba554..50de8bd 100644 --- a/configs/container.config +++ b/configs/container.config @@ -22,6 +22,6 @@ process { withLabel: virsorter { container = 'multifractal/virsorter:0.1.2' } withLabel: phigaro { container = 'multifractal/phigaro:0.5.2' } withLabel: virsorter2 { container = 'multifractal/virsorter-2:0.1' } - withLabel: seeker { container = 'papanikos/seeker:1.0.3' } + withLabel: seeker { container = 'papanikos/seeker:1.0.3' } } diff --git a/configs/local.config b/configs/local.config index abbb9e8..6ee4113 100644 --- a/configs/local.config +++ b/configs/local.config @@ -22,5 +22,5 @@ process { withLabel: virnet { cpus = 4 } withLabel: virsorter { cpus = params.cores } withLabel: phigaro { cpus = params.cores } - withLabel: seeker { cpus = params.cores } + withLabel: seeker { cpus = params.cores } } diff --git a/configs/node.config b/configs/node.config index caf992e..3a876d9 100644 --- a/configs/node.config +++ b/configs/node.config @@ -24,5 +24,5 @@ process { withLabel: virsorter { cpus = 24; memory = '32 GB' } withLabel: noDocker { cpus = 4; memory = '4 GB' } withLabel: phigaro { cpus = 24; memory = '32 GB' } - withLabel: seeker { cpus = 24; memory = '32 GB' } + withLabel: seeker { cpus = 24; memory = '32 GB' } } From 15587b0d40f8a6f5b73837f15a61c9da21e4528d Mon Sep 17 00:00:00 2001 From: npappas Date: Tue, 20 Oct 2020 15:42:28 +0200 Subject: [PATCH 13/15] remove duplicate Docs folder --- Docs/Citations.txt | 193 --------------------------------------------- 1 file changed, 193 deletions(-) delete mode 100644 Docs/Citations.txt diff --git a/Docs/Citations.txt b/Docs/Citations.txt deleted file mode 100644 index 8380ca8..0000000 --- a/Docs/Citations.txt +++ /dev/null @@ -1,193 +0,0 @@ -@article{marquet2020phage, - title={What the Phage: A scalable workflow for the identification and analysis of phage sequences}, - author={Marquet, Mike and H{\"o}lzer, Martin and Pletz, Mathias W and Viehweger, Adrian and Makarewicz, Oliwia and Ehricht, Ralf and Brandt, Christian}, - journal={bioRxiv}, - year={2020}, - publisher={Cold Spring Harbor Laboratory} -} - -@article{amgarten2018marvel, - title={MARVEL, a tool for prediction of bacteriophage sequences in metagenomic bins}, - author={Amgarten, Deyvid and Braga, Lucas PP and da Silva, Aline M and Setubal, Jo{\~a}o C}, - journal={Frontiers in genetics}, - volume={9}, - pages={304}, - year={2018}, - publisher={Frontiers} -} - -@article{ren2017virfinder, - title={VirFinder: a novel k-mer based tool for identifying viral sequences from assembled metagenomic data}, - author={Ren, Jie and Ahlgren, Nathan A and Lu, Yang Young and Fuhrman, Jed A and Sun, Fengzhu}, - journal={Microbiome}, - volume={5}, - number={1}, - pages={69}, - year={2017}, - publisher={Springer} -} - -@article{fang2019ppr, - title={PPR-Meta: a tool for identifying phages and plasmids from metagenomic fragments using deep learning}, - author={Fang, Zhencheng and Tan, Jie and Wu, Shufang and Li, Mo and Xu, Congmin and Xie, Zhongjie and Zhu, Huaiqiu}, - journal={GigaScience}, - volume={8}, - number={6}, - pages={giz066}, - year={2019}, - publisher={Oxford University Press} -} - -@article{roux2015virsorter, - title={VirSorter: mining viral signal from microbial genomic data}, - author={Roux, Simon and Enault, Francois and Hurwitz, Bonnie L and Sullivan, Matthew B}, - journal={PeerJ}, - volume={3}, - pages={e985}, - year={2015}, - publisher={PeerJ Inc.} -} - -@article{jurtz2016metaphinder, - title={MetaPhinder—identifying bacteriophage sequences in metagenomic data sets}, - author={Jurtz, Vanessa Isabell and Villarroel, Julia and Lund, Ole and Voldby Larsen, Mette and Nielsen, Morten}, - journal={PLoS One}, - volume={11}, - number={9}, - pages={e0163111}, - year={2016}, - publisher={Public Library of Science San Francisco, CA USA} -} - -@article{ren2018identifying, - title={Identifying viruses from metagenomic data by deep learning}, - author={Ren, Jie and Song, Kai and Deng, Chao and Ahlgren, Nathan A and Fuhrman, Jed A and Li, Yi and Xie, Xiaohui and Sun, Fengzhu}, - journal={arXiv preprint arXiv:1806.07810}, - year={2018} -} - -@article{brown2016sourmash, - title={sourmash: a library for MinHash sketching of DNA}, - author={Brown, C Titus and Irber, Luiz}, - journal={Journal of Open Source Software}, - volume={1}, - number={5}, - pages={27}, - year={2016} -} - -@article{kieft2020vibrant, - title={VIBRANT: automated recovery, annotation and curation of microbial viruses, and evaluation of viral community function from genomic sequences}, - author={Kieft, Kristopher and Zhou, Zhichao and Anantharaman, Karthik}, - journal={Microbiome}, - volume={8}, - number={1}, - pages={1--23}, - year={2020}, - publisher={BioMed Central} -} - -@inproceedings{abdelkareem2018virnet, - title={Virnet: Deep attention model for viral reads identification}, - author={Abdelkareem, Aly O and Khalil, Mahmoud I and Elaraby, Mostafa and Abbas, Hazem and Elbehery, Ali HA}, - booktitle={2018 13th International Conference on Computer Engineering and Systems (ICCES)}, - pages={623--626}, - year={2018}, - organization={IEEE} -} - -library-citation-WtP-other-tools - -@article{nayfach2020checkv, - title={CheckV: assessing the quality of metagenome-assembled viral genomes}, - author={Nayfach, Stephen and Camargo, Antonio Pedro and Eloe-Fadrosh, Emiley and Roux, Simon and Kyrpides, Nikos}, - journal={BioRxiv}, - year={2020}, - publisher={Cold Spring Harbor Laboratory} -} - -@article{anand2019chromomap, - title={chromoMap: An R package for Interactive Visualization and Annotation of Chromosomes}, - author={Anand, Lakshay}, - journal={bioRxiv}, - pages={605600}, - year={2019}, - publisher={Cold Spring Harbor Laboratory} -} - -@article{wheeler2013nhmmer, - title={nhmmer: DNA homology search with profile HMMs}, - author={Wheeler, Travis J and Eddy, Sean R}, - journal={Bioinformatics}, - volume={29}, - number={19}, - pages={2487--2489}, - year={2013}, - publisher={Oxford University Press} -} - -@article{hyatt2010prodigal, - title={Prodigal: prokaryotic gene recognition and translation initiation site identification}, - author={Hyatt, Doug and Chen, Gwo-Liang and LoCascio, Philip F and Land, Miriam L and Larimer, Frank W and Hauser, Loren J}, - journal={BMC bioinformatics}, - volume={11}, - number={1}, - pages={119}, - year={2010}, - publisher={Springer} -} - -@article{li2009sequence, - title={The sequence alignment/map format and SAMtools}, - author={Li, Heng and Handsaker, Bob and Wysoker, Alec and Fennell, Tim and Ruan, Jue and Homer, Nils and Marth, Gabor and Abecasis, Goncalo and Durbin, Richard}, - journal={Bioinformatics}, - volume={25}, - number={16}, - pages={2078--2079}, - year={2009}, - publisher={Oxford University Press} -} - -@article{shen2016seqkit, - title={SeqKit: a cross-platform and ultrafast toolkit for FASTA/Q file manipulation}, - author={Shen, Wei and Le, Shuai and Li, Yan and Hu, Fuquan}, - journal={PloS one}, - volume={11}, - number={10}, - pages={e0163962}, - year={2016}, - publisher={Public Library of Science San Francisco, CA USA} -} - -@article{conway2017upsetr, - title={UpSetR: an R package for the visualization of intersecting sets and their properties}, - author={Conway, Jake R and Lex, Alexander and Gehlenborg, Nils}, - journal={Bioinformatics}, - volume={33}, - number={18}, - pages={2938--2940}, - year={2017}, - publisher={Oxford University Press} -} - -@article{di2017nextflow, - title={Nextflow enables reproducible computational workflows}, - author={Di Tommaso, Paolo and Chatzou, Maria and Floden, Evan W and Barja, Pablo Prieto and Palumbo, Emilio and Notredame, Cedric}, - journal={Nature biotechnology}, - volume={35}, - number={4}, - pages={316--319}, - year={2017}, - publisher={Nature Publishing Group} -} - -@article{boettiger2015introduction, - title={An introduction to Docker for reproducible research}, - author={Boettiger, Carl}, - journal={ACM SIGOPS Operating Systems Review}, - volume={49}, - number={1}, - pages={71--79}, - year={2015}, - publisher={ACM New York, NY, USA} -} From 59df2fead42ec61e9ddb5ee9fee6cae40aee21b6 Mon Sep 17 00:00:00 2001 From: npappas Date: Tue, 20 Oct 2020 15:45:57 +0200 Subject: [PATCH 14/15] fix whitespace (again) --- phage.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/phage.nf b/phage.nf index b22ba91..b02152d 100755 --- a/phage.nf +++ b/phage.nf @@ -686,7 +686,7 @@ workflow identify_fasta_MSF { .concat(vibrant_virome_wf(fasta_validation_wf.out, vibrant_DB)) .concat(virnet_wf(fasta_validation_wf.out)) .concat(phigaro_wf(fasta_validation_wf.out)) - .concat(seeker_wf(fasta_validation_wf.out)) + .concat(seeker_wf(fasta_validation_wf.out)) .filter { it != 'deactivated' } // removes deactivated tool channels .groupTuple() From 2be979e19353dc2193e23b4879d8fffa0ee11ad7 Mon Sep 17 00:00:00 2001 From: npappas Date: Tue, 20 Oct 2020 15:52:08 +0200 Subject: [PATCH 15/15] proper formatting --- phage.nf | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/phage.nf b/phage.nf index b02152d..274bd82 100755 --- a/phage.nf +++ b/phage.nf @@ -192,8 +192,8 @@ if (!params.setup && !workflow.profile.contains('test') && !workflow.profile.con include { virsorter2 } from './modules/tools/virsorter2' include { filter_virsorter2 } from './modules/parser/filter_virsorter2' include { virsorter2_collect_data} from './modules/raw_data_collection/virsorter2_collect_data' - include { seeker } from './modules/tools/seeker' - include { filter_seeker } from './modules/parser/filter_seeker' + include { seeker } from './modules/tools/seeker' + include { filter_seeker } from './modules/parser/filter_seeker' /************* * DATABASES for Phage Identification @@ -583,13 +583,13 @@ workflow phigaro_wf { workflow seeker_wf { take: fasta main: if (!params.sk) { - // run and filter seeker - filter_seeker(seeker(fasta).groupTuple(remainder: true)) - // results channel - seeker_results = filter_seeker.out - } - else { seeker_results = Channel.from( ['deactivated', 'deactivated'] ) } - emit: seeker_results + // run and filter seeker + filter_seeker(seeker(fasta).groupTuple(remainder: true)) + // results channel + seeker_results = filter_seeker.out + } + else { seeker_results = Channel.from( ['deactivated', 'deactivated'] ) } + emit: seeker_results } workflow setup_wf {