diff --git a/Docs/Citations.txt b/Docs/Citations.txt deleted file mode 100644 index 8380ca8..0000000 --- a/Docs/Citations.txt +++ /dev/null @@ -1,193 +0,0 @@ -@article{marquet2020phage, - title={What the Phage: A scalable workflow for the identification and analysis of phage sequences}, - author={Marquet, Mike and H{\"o}lzer, Martin and Pletz, Mathias W and Viehweger, Adrian and Makarewicz, Oliwia and Ehricht, Ralf and Brandt, Christian}, - journal={bioRxiv}, - year={2020}, - publisher={Cold Spring Harbor Laboratory} -} - -@article{amgarten2018marvel, - title={MARVEL, a tool for prediction of bacteriophage sequences in metagenomic bins}, - author={Amgarten, Deyvid and Braga, Lucas PP and da Silva, Aline M and Setubal, Jo{\~a}o C}, - journal={Frontiers in genetics}, - volume={9}, - pages={304}, - year={2018}, - publisher={Frontiers} -} - -@article{ren2017virfinder, - title={VirFinder: a novel k-mer based tool for identifying viral sequences from assembled metagenomic data}, - author={Ren, Jie and Ahlgren, Nathan A and Lu, Yang Young and Fuhrman, Jed A and Sun, Fengzhu}, - journal={Microbiome}, - volume={5}, - number={1}, - pages={69}, - year={2017}, - publisher={Springer} -} - -@article{fang2019ppr, - title={PPR-Meta: a tool for identifying phages and plasmids from metagenomic fragments using deep learning}, - author={Fang, Zhencheng and Tan, Jie and Wu, Shufang and Li, Mo and Xu, Congmin and Xie, Zhongjie and Zhu, Huaiqiu}, - journal={GigaScience}, - volume={8}, - number={6}, - pages={giz066}, - year={2019}, - publisher={Oxford University Press} -} - -@article{roux2015virsorter, - title={VirSorter: mining viral signal from microbial genomic data}, - author={Roux, Simon and Enault, Francois and Hurwitz, Bonnie L and Sullivan, Matthew B}, - journal={PeerJ}, - volume={3}, - pages={e985}, - year={2015}, - publisher={PeerJ Inc.} -} - -@article{jurtz2016metaphinder, - title={MetaPhinder—identifying bacteriophage sequences in metagenomic data sets}, - author={Jurtz, Vanessa Isabell and Villarroel, Julia and Lund, Ole and Voldby Larsen, Mette and Nielsen, Morten}, - journal={PLoS One}, - volume={11}, - number={9}, - pages={e0163111}, - year={2016}, - publisher={Public Library of Science San Francisco, CA USA} -} - -@article{ren2018identifying, - title={Identifying viruses from metagenomic data by deep learning}, - author={Ren, Jie and Song, Kai and Deng, Chao and Ahlgren, Nathan A and Fuhrman, Jed A and Li, Yi and Xie, Xiaohui and Sun, Fengzhu}, - journal={arXiv preprint arXiv:1806.07810}, - year={2018} -} - -@article{brown2016sourmash, - title={sourmash: a library for MinHash sketching of DNA}, - author={Brown, C Titus and Irber, Luiz}, - journal={Journal of Open Source Software}, - volume={1}, - number={5}, - pages={27}, - year={2016} -} - -@article{kieft2020vibrant, - title={VIBRANT: automated recovery, annotation and curation of microbial viruses, and evaluation of viral community function from genomic sequences}, - author={Kieft, Kristopher and Zhou, Zhichao and Anantharaman, Karthik}, - journal={Microbiome}, - volume={8}, - number={1}, - pages={1--23}, - year={2020}, - publisher={BioMed Central} -} - -@inproceedings{abdelkareem2018virnet, - title={Virnet: Deep attention model for viral reads identification}, - author={Abdelkareem, Aly O and Khalil, Mahmoud I and Elaraby, Mostafa and Abbas, Hazem and Elbehery, Ali HA}, - booktitle={2018 13th International Conference on Computer Engineering and Systems (ICCES)}, - pages={623--626}, - year={2018}, - organization={IEEE} -} - -library-citation-WtP-other-tools - -@article{nayfach2020checkv, - title={CheckV: assessing the quality of metagenome-assembled viral genomes}, - author={Nayfach, Stephen and Camargo, Antonio Pedro and Eloe-Fadrosh, Emiley and Roux, Simon and Kyrpides, Nikos}, - journal={BioRxiv}, - year={2020}, - publisher={Cold Spring Harbor Laboratory} -} - -@article{anand2019chromomap, - title={chromoMap: An R package for Interactive Visualization and Annotation of Chromosomes}, - author={Anand, Lakshay}, - journal={bioRxiv}, - pages={605600}, - year={2019}, - publisher={Cold Spring Harbor Laboratory} -} - -@article{wheeler2013nhmmer, - title={nhmmer: DNA homology search with profile HMMs}, - author={Wheeler, Travis J and Eddy, Sean R}, - journal={Bioinformatics}, - volume={29}, - number={19}, - pages={2487--2489}, - year={2013}, - publisher={Oxford University Press} -} - -@article{hyatt2010prodigal, - title={Prodigal: prokaryotic gene recognition and translation initiation site identification}, - author={Hyatt, Doug and Chen, Gwo-Liang and LoCascio, Philip F and Land, Miriam L and Larimer, Frank W and Hauser, Loren J}, - journal={BMC bioinformatics}, - volume={11}, - number={1}, - pages={119}, - year={2010}, - publisher={Springer} -} - -@article{li2009sequence, - title={The sequence alignment/map format and SAMtools}, - author={Li, Heng and Handsaker, Bob and Wysoker, Alec and Fennell, Tim and Ruan, Jue and Homer, Nils and Marth, Gabor and Abecasis, Goncalo and Durbin, Richard}, - journal={Bioinformatics}, - volume={25}, - number={16}, - pages={2078--2079}, - year={2009}, - publisher={Oxford University Press} -} - -@article{shen2016seqkit, - title={SeqKit: a cross-platform and ultrafast toolkit for FASTA/Q file manipulation}, - author={Shen, Wei and Le, Shuai and Li, Yan and Hu, Fuquan}, - journal={PloS one}, - volume={11}, - number={10}, - pages={e0163962}, - year={2016}, - publisher={Public Library of Science San Francisco, CA USA} -} - -@article{conway2017upsetr, - title={UpSetR: an R package for the visualization of intersecting sets and their properties}, - author={Conway, Jake R and Lex, Alexander and Gehlenborg, Nils}, - journal={Bioinformatics}, - volume={33}, - number={18}, - pages={2938--2940}, - year={2017}, - publisher={Oxford University Press} -} - -@article{di2017nextflow, - title={Nextflow enables reproducible computational workflows}, - author={Di Tommaso, Paolo and Chatzou, Maria and Floden, Evan W and Barja, Pablo Prieto and Palumbo, Emilio and Notredame, Cedric}, - journal={Nature biotechnology}, - volume={35}, - number={4}, - pages={316--319}, - year={2017}, - publisher={Nature Publishing Group} -} - -@article{boettiger2015introduction, - title={An introduction to Docker for reproducible research}, - author={Boettiger, Carl}, - journal={ACM SIGOPS Operating Systems Review}, - volume={49}, - number={1}, - pages={71--79}, - year={2015}, - publisher={ACM New York, NY, USA} -} diff --git a/README.md b/README.md index 39a4fa8..f3ea00b 100644 --- a/README.md +++ b/README.md @@ -14,13 +14,16 @@ [![Twitter Follow](https://img.shields.io/twitter/follow/mult1fractal.svg?style=social)](https://twitter.com/mult1fractal) # What the Phage (WtP) + * by Christian Brandt & Mike Marquet * **this tool is under active development,feel free to report issues and add suggestions** -* use a release candidate for a stable experience via `-r` e.g. `-r v0.9.0` - * these are extensively tested release versions of WtP +* Use a release candidate for a stable experience via `-r` e.g. `-r v0.9.0` + * These are extensively tested release versions of WtP * [releases of WtP are here](https://github.com/replikation/What_the_Phage/releases) + ## Preprint: + > **What the Phage: A scalable workflow for the identification and analysis of phage sequences** > > M. Marquet, M. Hölzer, M. W. Pletz, A. Viehweger, O. Makarewicz, R. Ehricht, C. Brandt @@ -28,7 +31,7 @@ > doi: https://doi.org/10.1101/2020.07.24.219899 -# Table of content +# Table of contents * [What is this Repo?](#What-is-this-Repo) * [Installation](#Installation) @@ -41,7 +44,7 @@ * [Workflow control](#Workflow-control) * [Profiles](#Profiles) * [Data Handling](#Data-handling) - * [Pre-download for Offline-mode](#Pre-download-for-Offline-mode) + * [Pre-download for offline-mode](#Pre-download-for-offline-mode) * [Results / Examples](#Example-/-results) * [Under the hood](#Under-the-hood) * [Included bioinformatic tools](#Included-bioinformatic-tools) @@ -50,11 +53,11 @@ # What is this repo #### TL;DR -* WtP is a scalable and easy-to-use workflow for phage identification and analysis. Our tool currently combines 9 established phage [identification tools](#included-bioinformatic-tools) +* WtP is a scalable and easy-to-use workflow for phage identification and analysis. Our tool currently combines 10 established phage [identification tools](#included-bioinformatic-tools) * An attempt to streamline the usage of various phage identification and prediction tools * The main focus is stability and data filtering/analysis for the user * The tool is intended for fasta and fastq reads to identify phages in contigs/reads -* a proper Prophage detection is not implemented (yet) - but a handful of tools report them - so they are mostly identified +* Proper prophage detection is not implemented (yet) - but a handful of tools report them - so they are mostly identified # Installation @@ -85,28 +88,32 @@ sudo usermod -a -G docker $USER > * tar (should be already installed) * Choose one: -> * [Docker installation](https://docs.docker.com/v17.09/engine/installation/linux/docker-ce/ubuntu/#install-docker-ce) -> * add docker to your User group via `sudo usermod -a -G docker $USER` + +> * [Docker installation](https://docs.docker.com/v17.09/engine/installation/linux/docker-ce/ubuntu/#install-docker-ce) +> * add docker to your User group via `sudo usermod -a -G docker $USER` > * [Singularity installation](https://github.com/sylabs/singularity/blob/master/INSTALL.md) * Restart your computer -* Try out the installation by entering the following (analyses 1 sample with 10 phage sequences ~ 30 min runtime) +* Try out the installation by entering the following (analyses 1 sample with 10 phage sequences ~ 30 min runtime): ```shell # for docker (local use) -nextflow run replikation/What_the_Phage -r v0.8.0 --cores 8 -profile smalltest,local,docker +nextflow run replikation/What_the_Phage -r v0.9.0 --cores 8 -profile smalltest,local,docker + # for singularity (slurm use) -nextflow run replikation/What_the_Phage -r v0.8.0 --cores 8 -profile smalltest,slurm,singularity +nextflow run replikation/What_the_Phage -r v0.9.0 --cores 8 -profile smalltest,slurm,singularity ``` # Execution / Examples / Help ## Call help via "--help" + ```bash -nextflow run replikation/What_the_Phage -r v0.8.0 --help +nextflow run replikation/What_the_Phage -r v0.9.0 --help ``` ## Quick execution + * Just give me the command god dammit..... ```bash @@ -115,23 +122,27 @@ nextflow run \ # calling the workflow --fasta /path/to/file.fa \ # provide a fasta-file as input --cores 8 \ # number of cores you want to use -profile local,docker # choose the environment:local and docker - -r v0.8.0 # WtP release version + -r v0.9.0 # WtP release version ``` - - - ## Advanced execution ### Advanced execution command -* e.g.: + +* The following command will run WtP using + * ... all `fasta` files found in the `/path/to` directory. + * ... a `local` host (the machine where it is launched) and `docker` containers for handling software dependencies. + * ... 20 cores for parallel execution. + * ... release `0.9.0` of the worfklow. + * ... identification only - no annotation. + * ... and excluding `deepvirfinder`, `virfinder` and `marvel` from the list of tools. ```shell nextflow run replikation/What_the_Phage \ --fasta '/path/to/*.fasta' \ -profile local,docker \ --cores 20 \ - -r v0.8.0 \ + -r v0.9.0 \ --anno \ --dv \ --vf \ @@ -139,9 +150,12 @@ nextflow run replikation/What_the_Phage \ ``` * The order of flags does not matter + ### Inputs + * Input examples: * wildcards need single quotes around the path (`'`) + ```bash --fasta /path/to/phage-assembly.fa # path to your fasta-file --fasta '/path/to/*.fa' # path to all .fa files in a dir @@ -150,6 +164,7 @@ nextflow run replikation/What_the_Phage \ ``` ### Workflow control + * Turn on/off tools (check `--help` for more) ```bash @@ -162,44 +177,53 @@ nextflow run replikation/What_the_Phage \ --vf # deactivates virfinder --vn # deactivates virnet --vs # deactivates virsorter - --ph # deactivates phigaro + --ph # deactivates phigaro + --sk # deactivates seeker --identify # only phage identification, skips analysis --annotate # only annotation, skips phage identification ``` -* min size of contigs for identification +* Set the minimum size of contigs for identification ```bash ---filter # min contig size [bp] to analyse +--filter # min contig size [bp] to analyse [default: 1500] ``` + ### Profiles -1. Choose the environment: local, slurm, lsf or ebi -2. Choose the engine: docker or singularity -* examples: + +1. Choose the environment: `local`, `slurm`, `lsf` or `ebi` +2. Choose the engine: `docker` or `singularity` + +Examples: + ```bash -profile local,docker -profile local,singularity -profile lsf,docker ``` + ### Release candidate * A release candidate is a [released version of WtP](https://github.com/replikation/What_the_Phage/releases) which ensures proper functionality -* version control ensures reproducibility as each tools version is also "locked" within the release candidate - * databases have no automatic version control (they are downloaded from the source) - * if you need version control for databases, just make a copy of the database dir after download - * you can specify the database dir via the `--database` flag (see below) - * WtP only downloads a database if it's missing, it is not "auto-updating" them -* add this flag to your command and a specific release is used instead +* Version control ensures reproducibility as each tools version is also "locked" within the release candidate + * Databases have no automatic version control (they are downloaded from the source) + > If you need version control for databases, just make a copy of the database dir after download + > you can specify the database directory via the `--database` flag (see below) + * WtP only downloads a database if it's missing. It is **not** "auto-updating" them +* A release can be specified in the command line, using the `-r` flag. For example: + ```bash -r v0.8.0 ``` + ### Data handling * WtP handles everything by default * If you need to change paths use the following commands - * It's useful to specify `--workdir` to your current working dir if `/tmp` (default) has limited space + * It's useful to set the parameter `--workdir` to your current working directory if `/tmp` (default) has limited space. + ```bash --workdir /path/to/dir # defines the path where nextflow writes temporary files, default: '/tmp/nextflow-phage-$USER' --databases /path/to/dir # specify download location of databases, default './nextflow-autodownload-databases' @@ -207,35 +231,44 @@ nextflow run replikation/What_the_Phage \ --output results # path of the outdir, default './results' ``` +### Pre-download for offline-mode -### Pre-download for Offline-mode - -* `--setup` skips analysis and just downloads all databases and containers +* The flag `--setup` skips analysis and just downloads all databases and containers * Needs roughly 30 GB storage for databases, excluding programs ```bash nextflow run replikation/What_the_Phage --setup -r v0.8.0 ``` -* you can change the database download location via (--databases) -* make sure that you specify the database location when executing WtP, if you change the default path -* singularity images sometimes fail during building, just try to re-execute `--setup` - * WtP attempts to build images up to 3 times, image building is individually skipped if present +* You can specify the location where all databases are stored, by providing a location with the `--databases` parameter. +* Make sure that you specify the database location when executing WtP, if you change the default path. +* Singularity images sometimes fail during building, just try to re-execute with `--setup`. + * WtP attempts to build images up to 3 times, image building is individually skipped if present. + # Example results #### 1. Identification Tool and contig overview (UpSetR) ![plot](figures/plot.svg) -*Figure 1:* This chart (UpSetR plot) quantifies the result-intersections of the phage identification tools, similar to a Venn diagram. The amount of positive phage-sequences identified by each tool is represented on the left barplot in blue. The dot plot shows via line connection(s) which of the tools identified the exact same positive phage sequences. The amount of these shared matches is quantified as a barplot above each corresponding dot pattern. +*Figure 1:* This chart (UpSetR plot) quantifies the result-intersections of the phage identification tools, similar to a Venn diagram. +The number of positive phage-sequences identified by each tool is represented on the left barplot in blue. +The dot plot shows via line connection(s) which of the tools identified the exact same positive phage sequences. +The amount of these shared matches is quantified as a barplot above each corresponding dot pattern. + #### 2. Annotation Visualization (Chromomap) -* [chromomap results](https://replikation.github.io/What_the_Phage/index.html) +* [Chromomap results](https://replikation.github.io/What_the_Phage/index.html) + +*See Link:* The graphical output of the annotation shows an overview of the individual loci of the predicted ORFs and the corresponding genes in the fasta sequences identified as phages. +For better visibility, we have chosen 4 categories: tail, capsid, baseplate and other. +This output can be used to verify the identified sequences (if the predicted sequences make sense or not). +The annotation results are additionally plotted in an interactive HTML-file and are available as a file for further analysis. -*See Link:* The graphical output of the annotation shows an overview of the individual loci of the predicted ORFs and the corresponding genes in the fasta sequences identified as phages. For a better visibility, we have chosen 4 categories tail, capsid, baseplate, and other. This output can be used to verify the identified sequences (if the predicted sequences make sense or not). The annotation results are additionally plotted in an interactive HTML-file and are available as a file for further analysis. #### 3. Summary Table (checkV + Results) -* check [CheckV](https://bitbucket.org/berkeleylab/checkv/src/master/) for a detailed explanation + +* Check [CheckV](https://bitbucket.org/berkeleylab/checkv/src/master/) for a detailed explanation. contig_id| contig_length| genome_copies| gene_count| viral_genes| host_genes| checkv_quality| miuvig_quality| completeness| completeness_method| contamination| provirus| |-|-|-|-|-|-|-|-|-|-|-|-| @@ -258,13 +291,14 @@ pos_phage_9| 221908| 1| 310| 48| 9| High-quality| High-quality| 100| AAI-based| *Figure 3:* This plot shows a simplified dag-chart of WtP for better understanding of what's going on behind the curtain. - # Included bioinformatic tools + * Please cite the following tools ### Identification -Toolname/Gitlink | Reference | + +Toolname/Gitlink | Reference | |-|-| [MARVEL](https://github.com/LaboratorioBioinformatica/MARVEL#metagenomic-analysis-and-retrieval-of-viral-elements)|[MARVEL, a Tool for Prediction of Bacteriophage Sequences in Metagenomic Bins](https://www.frontiersin.org/articles/10.3389/fgene.2018.00304/full) [VirFinder](https://github.com/jessieren/VirFinder)|[VirFinder: R package for identifying viral sequences from metagenomic data using sequence signatures](https://link.springer.com/epdf/10.1186/s40168-017-0283-5?) @@ -277,17 +311,21 @@ Toolname/Gitlink | Reference | [VirNet](https://github.com/alyosama/virnet)|[Deep attention model for viral reads identification](https://ieeexplore.ieee.org/document/8639400) [Phigaro](https://github.com/bobeobibo/phigaro)| [Phigaro: high throughput prophage sequence annotation](https://www.biorxiv.org/content/10.1101/598243v1) [Virsorter2 beta](https://github.com/jiarong/VirSorter2)| - +[Seeker](https://github.com/gussow/seeker)|[Seeker: alignment-free identification of bacteriophage genomes by deep learning](https://doi.org/10.1093/nar/gkaa856)| ### Annotation & classification + Toolname/Git | Reference |-|-| [prodigal](https://github.com/hyattpd/Prodigal)|[Prodigal: prokaryotic gene recognition and translation initiation site identification](https://bmcbioinformatics.biomedcentral.com/articles/10.1186/1471-2105-11-119) [hmmer](http://hmmer.org/)|[nhmmer: DNA homology search with profile HMMs](https://academic.oup.com/bioinformatics/article/29/19/2487/186765) [chromomap](https://cran.r-project.org/web/packages/chromoMap/vignettes/chromoMap.html)| [CheckV](https://bitbucket.org/berkeleylab/checkv/src/master/)|[CheckV: assessing the quality of metagenome-assembled viral genomes](https://www.biorxiv.org/content/10.1101/2020.05.06.081778v1) + + ### Other tools + Toolname/Git | Reference |-|-| [samtools](https://github.com/samtools/samtools)|[The Sequence Alignment/Map format and SAMtools](https://www.ncbi.nlm.nih.gov/pmc/articles/PMC2723002/) diff --git a/configs/container.config b/configs/container.config index 2d27ffa..50de8bd 100644 --- a/configs/container.config +++ b/configs/container.config @@ -22,5 +22,6 @@ process { withLabel: virsorter { container = 'multifractal/virsorter:0.1.2' } withLabel: phigaro { container = 'multifractal/phigaro:0.5.2' } withLabel: virsorter2 { container = 'multifractal/virsorter-2:0.1' } + withLabel: seeker { container = 'papanikos/seeker:1.0.3' } } diff --git a/configs/local.config b/configs/local.config index cd4718f..6ee4113 100644 --- a/configs/local.config +++ b/configs/local.config @@ -22,4 +22,5 @@ process { withLabel: virnet { cpus = 4 } withLabel: virsorter { cpus = params.cores } withLabel: phigaro { cpus = params.cores } + withLabel: seeker { cpus = params.cores } } diff --git a/configs/node.config b/configs/node.config index ed6ecaa..3a876d9 100644 --- a/configs/node.config +++ b/configs/node.config @@ -24,4 +24,5 @@ process { withLabel: virsorter { cpus = 24; memory = '32 GB' } withLabel: noDocker { cpus = 4; memory = '4 GB' } withLabel: phigaro { cpus = 24; memory = '32 GB' } -} \ No newline at end of file + withLabel: seeker { cpus = 24; memory = '32 GB' } +} diff --git a/docs/Citations.bib b/docs/Citations.bib index 793bb4f..89ae437 100644 --- a/docs/Citations.bib +++ b/docs/Citations.bib @@ -203,3 +203,16 @@ @article{starikova2020phigaro publisher={Oxford University Press} } +@article{auslander2020seeker, + title={Seeker: alignment-free identification of bacteriophage genomes by deep learning}, + author={Auslander, Noam and Gussow, Ayal B and Benler, Sean and Wolf, Yuri I and Koonin, Eugene V}, + journal={Nucleic Acids Research}, + year={2020}, + month={10}, + issn={0305-1048}, + doi={10.1093/nar/gkaa856}, + url={https://doi.org/10.1093/nar/gkaa856}, + note={gkaa856}, + publisher={Oxford University Press} +} + diff --git a/modules/parser/filter_seeker.nf b/modules/parser/filter_seeker.nf new file mode 100644 index 0000000..310aff5 --- /dev/null +++ b/modules/parser/filter_seeker.nf @@ -0,0 +1,34 @@ +process filter_seeker { + label 'ubuntu' + input: + tuple val(name), file(results) + output: + tuple val(name), file("seeker_*.txt") + shell: + """ + tail -n+2 *.list | sort -gr -k3 | awk '\$3>=${params.sk_filter}' | awk '{ print \$1 }' > seeker_\${PWD##*/}.txt + """ +} + +/* +raw output: + +$ predict-metagenome example_input/PGE.txt > PGE.out.tsv +$ cat PGE.out.tsv +name prediction score +MH356729.1 Phage 0.85 +LC333428.1 Phage 0.79 +MK903728.1 Phage 0.94 +MN016939.1 Bacteria 0.34 +MN095770.1 Phage 0.82 +MN095772.1 Phage 0.84 +MN176219.1 Phage 0.52 +MN310548.1 Phage 0.62 +MN379739.1 Phage 0.82 +MN419153.1 Phage 0.89 + +"Sequences with scores above 0.5 are predicted phages, +while sequences with scores below 0.5 are predicted bacteria." + +*/ + diff --git a/modules/tools/seeker.nf b/modules/tools/seeker.nf new file mode 100644 index 0000000..7a3975d --- /dev/null +++ b/modules/tools/seeker.nf @@ -0,0 +1,13 @@ +process seeker { + label 'seeker' + errorStrategy 'ignore' + input: + tuple val(name), file(fasta) + output: + tuple val(name), file("${name}_*.list") + script: + """ + predict-metagenome ${fasta} > ${name}.tsv + cp ${name}.tsv ${name}_\${PWD##*/}.list + """ +} diff --git a/nextflow.config b/nextflow.config index e7f96b6..19c0f0b 100644 --- a/nextflow.config +++ b/nextflow.config @@ -38,6 +38,7 @@ params { vb = false vn = false ph = false + sk = false identify = false annotate = false virome = false @@ -51,6 +52,7 @@ params { sm_filter = '0.5' vn_filter = '0.5' vs2_filter = '0.9' + sk_filter = '0.5' // pp_filter = '' // vb_filter = '' @@ -146,6 +148,7 @@ profiles { vb = true vn = true dv = true + sk = true anno = true fasta = 'test-data/all_pos_phage.fa' } diff --git a/phage-tool-Dockerfiles/seeker/Dockerfile b/phage-tool-Dockerfiles/seeker/Dockerfile new file mode 100644 index 0000000..d47069c --- /dev/null +++ b/phage-tool-Dockerfiles/seeker/Dockerfile @@ -0,0 +1,18 @@ +FROM continuumio/miniconda3:latest + +RUN apt-get update -y && apt-get install -y procps && \ + apt-get clean && \ + rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* + +RUN conda config --add channels conda-forge && \ + conda config --add channels default && \ + conda create -y --name seeker python=3.7 pip && \ + conda clean --all + +ENV PATH /opt/conda/envs/seeker/bin:$PATH + +SHELL ["conda", "run", "-n", "seeker", "/bin/bash", "-c"] + +RUN pip install --no-cache-dir --use-feature=2020-resolver seeker==1.0.3 + + diff --git a/phage.nf b/phage.nf index da3af2e..274bd82 100755 --- a/phage.nf +++ b/phage.nf @@ -90,7 +90,7 @@ else { exit 1, "No executer selected: -profile EXECUTER,ENGINE" } if (!params.setup && !workflow.profile.contains('test') && !workflow.profile.contains('smalltest')) { if ( !params.fasta && !params.fastq ) { exit 1, "input missing, use [--fasta] or [--fastq]"} - if ( params.ma && params.mp && params.vf && params.vs && params.pp && params.dv && params.sm && params.vn && params.vb && params.ph ) { + if ( params.ma && params.mp && params.vf && params.vs && params.pp && params.dv && params.sm && params.vn && params.vb && params.ph && params.sk ) { exit 0, "What the... you deactivated all the tools"} } @@ -192,6 +192,8 @@ if (!params.setup && !workflow.profile.contains('test') && !workflow.profile.con include { virsorter2 } from './modules/tools/virsorter2' include { filter_virsorter2 } from './modules/parser/filter_virsorter2' include { virsorter2_collect_data} from './modules/raw_data_collection/virsorter2_collect_data' + include { seeker } from './modules/tools/seeker' + include { filter_seeker } from './modules/parser/filter_seeker' /************* * DATABASES for Phage Identification @@ -576,7 +578,19 @@ workflow phigaro_wf { } else { phigaro_results = Channel.from( [ 'deactivated', 'deactivated'] ) } emit: phigaro_results -} +} + +workflow seeker_wf { + take: fasta + main: if (!params.sk) { + // run and filter seeker + filter_seeker(seeker(fasta).groupTuple(remainder: true)) + // results channel + seeker_results = filter_seeker.out + } + else { seeker_results = Channel.from( ['deactivated', 'deactivated'] ) } + emit: seeker_results +} workflow setup_wf { take: @@ -672,6 +686,7 @@ workflow identify_fasta_MSF { .concat(vibrant_virome_wf(fasta_validation_wf.out, vibrant_DB)) .concat(virnet_wf(fasta_validation_wf.out)) .concat(phigaro_wf(fasta_validation_wf.out)) + .concat(seeker_wf(fasta_validation_wf.out)) .filter { it != 'deactivated' } // removes deactivated tool channels .groupTuple() @@ -843,6 +858,7 @@ def helpMSG() { --vn deactivates virnet --vs deactivates virsorter --ph deactivates phigaro + --sk deactivates seeker Adjust tools individually --virome deactivates virome-mode (vibrand and virsorter) @@ -852,6 +868,7 @@ def helpMSG() { --vs2_filter dsDNAphage score cut-off [default: $params.vs2_filter] --sm_filter Similarity score [default: $params.sm_filter] --vn_filter Score [default: $params.vn_filter] + --sk_filter score cut-off [default: $params.sk_filter] Workflow control: --identify only phage identification, skips analysis @@ -876,4 +893,4 @@ if (!params.setup) { log.info ( workflow.success ? "\nDone! Results are stored here --> $params.output \nThank you for using What the Phage\n \nPlease cite us: https://doi.org/10.1101/2020.07.24.219899 \ \n\nPlease also cite the other tools we use in our workflow --> $params.output/literature \n" : "Oops .. something went wrong" ) } -} \ No newline at end of file +}