diff --git a/.github/workflows/run_WtP.yml b/.github/workflows/run_WtP.yml index 68612ac..6c7c291 100644 --- a/.github/workflows/run_WtP.yml +++ b/.github/workflows/run_WtP.yml @@ -8,4 +8,4 @@ jobs: steps: - uses: actions/checkout@v1 - name: run_What_the_Phage - run: curl -s https://get.nextflow.io | bash && ./nextflow run phage.nf -profile git_action + run: curl -s https://get.nextflow.io | bash && ./nextflow run phage.nf -profile git_action,docker --cores 2 diff --git a/.gitignore b/.gitignore index 9aef681..073bdd8 100644 --- a/.gitignore +++ b/.gitignore @@ -7,3 +7,4 @@ work/ .nextflow/ results/ .Rhistory +singularity_images/ \ No newline at end of file diff --git a/README.md b/README.md index da289c6..33339d6 100644 --- a/README.md +++ b/README.md @@ -52,7 +52,9 @@ * WtP runs with the workflow manager `nextflow` using `docker` * this means all the other programs are automatically pulled via docker -* Only `docker` and `nextflow` needs to be installed +* Only `docker` or `singularity`, and `nextflow` needs to be installed +* the first execution will take time as tons of databases and container get pulled/downloaded + * we recommend in executing the first time with a small data set! ### Easy Installation * if you dont have experience with bioinformatic tools use this @@ -89,6 +91,7 @@ nextflow run replikation/What_the_Phage -r v0.5 --fasta ~/.nextflow/assets/repli * Docker installation [here](https://docs.docker.com/v17.09/engine/installation/linux/docker-ce/ubuntu/#install-docker-ce) * Nextflow installation [here](https://www.nextflow.io/) +* Singularity installation [here](https://github.com/sylabs/singularity/blob/master/INSTALL.md) * move or add the nextflow executable to a bin path * add docker to your User group via `sudo usermod -a -G docker $USER` @@ -98,7 +101,7 @@ nextflow run replikation/What_the_Phage -r v0.5 --fasta ~/.nextflow/assets/repli * for local use of the `master` branch you could either clone the git and do: ```bash -./phage.nf --fasta 'test-data/*.fasta' +./phage.nf --fasta 'test-data/*.fasta' -profile local,docker ``` * or execute it directly via a release candidate ( `-r releasenumber` , e.g. `-r v0.5`) @@ -111,6 +114,13 @@ nextflow run replikation/What_the_Phage -r v0.5 --fasta your-file.fasta * long-read input is supported via `--fastq` +### Profiles + +* we are currently setting up a few profiles to allow for the execution in different working environments +* `-profile local,docker` -> local use with docker container +* `-profile local,singularity` -> local use with singularity container +* if you need executioner profiles (slurm lsf etc.) please contact us and so we can implement that + ### Tool control * all the tools can be separatly turned off via adding the related option flag @@ -125,7 +135,7 @@ nextflow run replikation/What_the_Phage -r v0.5 --fasta your-file.fasta --dv --m ### Temporary files * all temporary files are stored in `/tmp/nextflow-phage-$USER` -* you can change the location via adding '-work-dir new/path/here' to the command +* you can change the location via adding '--workdir new/path/here' to the command ## Results diff --git a/configs/container.config b/configs/container.config new file mode 100644 index 0000000..2e5dbcd --- /dev/null +++ b/configs/container.config @@ -0,0 +1,21 @@ +process { + withLabel: chromomap { container = 'nanozoo/r_fungi:0.1--097b1bb' } + withLabel: deepvirfinder { container = 'multifractal/deepvirfinder:0.1' } + withLabel: emboss { container = 'quay.io/biocontainers/emboss:6.5.7--4' } + withLabel: ggplot2 { container = 'michelsteuwer/ggplot2:latest' } + withLabel: hmmscan { container = 'mhoelzer/hmmscan:0.1' } + withLabel: marvel { container = 'multifractal/marvel:0.1' } + withLabel: metaphinder { container = 'multifractal/metaphinder:0.1' } + withLabel: pprmeta { container = 'multifractal/ppr-meta:0.1' } + withLabel: prodigal { container = 'nanozoo/prodigal:2.6.3--2769024' } + withLabel: python { container = 'nanozoo/template:3.8--ccd0653' } + withLabel: r_plot { container = 'replikation/r-phage-plot:latest' } + withLabel: samtools { container = 'nanozoo/samtools:1.9--76b9270' } + withLabel: sourmash { container = 'nanozoo/sourmash:2.3.0--4257650' } + withLabel: ubuntu { container = 'nanozoo/basics:1.0--962b907' } + withLabel: upsetr { container = 'nanozoo/upsetr:1.4.0--0ea25b3' } + withLabel: vibrant { container = 'multifractal/vibrant:0.4' } + withLabel: virfinder { container = 'multifractal/virfinder:0.1' } + withLabel: virnet { container = 'multifractal/virnet:0.5.1' } + withLabel: virsorter { container = 'multifractal/virsorter:0.1.2' } +} diff --git a/configs/local.config b/configs/local.config index e756e4c..177e4a8 100644 --- a/configs/local.config +++ b/configs/local.config @@ -1,26 +1,23 @@ process.executor = 'local' -docker { enabled = true } -params.cpus = params.cores -workDir = params.workdir process { - withLabel: deepvirfinder { cpus = 1; container = 'multifractal/deepvirfinder:0.1' } - withLabel: emboss { cpus = params.cores ; container = 'quay.io/biocontainers/emboss:6.5.7--4' } - withLabel: ggplot2 { cpus = 1 ; container = 'michelsteuwer/ggplot2:latest' } - withLabel: marvel { cpus = 1; container = 'multifractal/marvel:0.1' } - withLabel: metaphinder { cpus = 1; container = ' multifractal/metaphinder:0.1' } - withLabel: pprmeta { cpus = 1; container = 'multifractal/ppr-meta:0.1' } - withLabel: python { cpus = 1; container = 'nanozoo/template:3.8--ccd0653' } - withLabel: r_plot { cpus = 1; container = 'replikation/r-phage-plot:latest' } - withLabel: samtools { cpus = 1 ; container = 'nanozoo/samtools:latest' } - withLabel: sourmash { cpus = 1 ; container = 'nanozoo/sourmash:2.3.0--4257650' } - withLabel: ubuntu { cpus = 1; container = 'nanozoo/basics:1.0--962b907' } - withLabel: upsetr { cpus = 1; container = 'nanozoo/upsetr:1.4.0--0ea25b3' } - withLabel: vibrant { cpus = 1 ; container = 'multifractal/vibrant:0.4' } - withLabel: virfinder { cpus = 1; container = 'multifractal/virfinder:0.1' } - withLabel: virnet { cpus = 1; container = 'multifractal/virnet:0.5.1' } - withLabel: virsorter { cpus = 1; container = 'quay.io/biocontainers/virsorter:1.0.6--pl526h516909a_1' } - withLabel: prodigal { cpus = 1; container = 'nanozoo/prodigal:2.6.3--2769024' } - withLabel: hmmscan { cpus = params.cores; container = 'mhoelzer/hmmscan:0.1' } - withLabel: chromomap { cpus = params.cores; container = 'nanozoo/r_fungi:0.1--097b1bb' } + withLabel: chromomap { cpus = params.cores } + withLabel: deepvirfinder { cpus = 4 } + withLabel: emboss { cpus = params.cores } + withLabel: ggplot2 { cpus = 1 } + withLabel: hmmscan { cpus = params.cores } + withLabel: marvel { cpus = 4 } + withLabel: metaphinder { cpus = 4 } + withLabel: pprmeta { cpus = 4 } + withLabel: prodigal { cpus = 1 } + withLabel: python { cpus = 1 } + withLabel: r_plot { cpus = 1 } + withLabel: samtools { cpus = 1 } + withLabel: sourmash { cpus = 4 } + withLabel: ubuntu { cpus = 1 } + withLabel: upsetr { cpus = 1 } + withLabel: vibrant { cpus = 4 } + withLabel: virfinder { cpus = params.cores } + withLabel: virnet { cpus = 4 } + withLabel: virsorter { cpus = params.cores } } diff --git a/configs/lsf.config b/configs/lsf.config deleted file mode 100644 index af7eada..0000000 --- a/configs/lsf.config +++ /dev/null @@ -1,39 +0,0 @@ -workDir = params.workdir - -executor { - name = "lsf" - queueSize = 200 -} - -singularity { - enabled = true - autoMounts = true - cacheDir = params.cachedir -} - -params.cpus = params.cores - -process { - cache = "lenient" - //errorStrategy = "retry" - //maxRetries = 1 - withLabel: deepvirfinder { cpus = 24; memory = '32 GB'; container = 'multifractal/deepvirfinder:0.1' } - withLabel: emboss { cpus = 4; memory = '8 GB'; container = 'quay.io/biocontainers/emboss:6.5.7--4' } - withLabel: marvel { cpus = 24; memory = '32 GB'; container = 'multifractal/marvel:0.1' } - withLabel: metaphinder { cpus = 24; memory = '32 GB'; container = 'multifractal/metaphinder:0.1' } - withLabel: pprmeta { cpus = 24; memory = '32 GB'; container = 'multifractal/ppr-meta:0.1' } - withLabel: r_plot { cpus = 4; memory = '4 GB'; container = 'replikation/r-phage-plot:latest' } - withLabel: upsetr { cpus = 1; memory = '4 GB'; container = 'nanozoo/upsetr:1.4.0--0ea25b3' } - withLabel: ubuntu { cpus = 4; memory = '4 GB'; container = 'ubuntu:bionic' } - withLabel: virfinder { cpus = 24; memory = '32 GB'; container = 'multifractal/virfinder:0.1' } - withLabel: virsorter { cpus = 24; memory = '32 GB'; container = 'quay.io/biocontainers/virsorter:1.0.6--pl526h516909a_1' } - withLabel: ggplot2 { cpus = 1 ; memory = '8 GB'; container = 'michelsteuwer/ggplot2:latest' } - withLabel: samtools { cpus = 1 ; memory = '32 GB'; container = 'nanozoo/samtools:latest' } - withLabel: sourmash { cpus = 8 ; memory = '32 GB'; container = 'nanozoo/sourmash:2.3.0--4257650' } - withLabel: vibrant { cpus = 1 ; container = 'multifractal/vibrant:0.4' } - withLabel: virnet { cpus = 1; container = 'multifractal/virnet:0.5.1' } - withLabel: prodigal { cpus = 1; container = 'nanozoo/prodigal:latest' } - withLabel: hmmscan { cpus = params.cores; container = 'mhoelzer/hmmscan:0.1' } - withLabel: chromomap { cpus = params.cores; container = 'nanozoo/r_fungi:0.1--097b1bb' } -} - diff --git a/configs/node.config b/configs/node.config new file mode 100644 index 0000000..804073d --- /dev/null +++ b/configs/node.config @@ -0,0 +1,25 @@ + +process { + + //errorStrategy = "retry" + //maxRetries = 1 + withLabel: chromomap { cpus = 1 } + withLabel: deepvirfinder { cpus = 24; memory = '32 GB' } + withLabel: emboss { cpus = 4; memory = '8 GB' } + withLabel: ggplot2 { cpus = 1; memory = '8 GB' } + withLabel: hmmscan { cpus = 4 } + withLabel: marvel { cpus = 24; memory = '32 GB' } + withLabel: metaphinder { cpus = 24; memory = '32 GB' } + withLabel: pprmeta { cpus = 24; memory = '32 GB' } + withLabel: prodigal { cpus = 4 } + withLabel: python { cpus = 1 } + withLabel: r_plot { cpus = 4; memory = '4 GB' } + withLabel: samtools { cpus = 1 ; memory = '32 GB' } + withLabel: sourmash { cpus = 8 ; memory = '32 GB' } + withLabel: ubuntu { cpus = 4; memory = '4 GB' } + withLabel: upsetr { cpus = 1; memory = '4 GB' } + withLabel: vibrant { cpus = 4 } + withLabel: virfinder { cpus = 24; memory = '32 GB' } + withLabel: virnet { cpus = 4 } + withLabel: virsorter { cpus = 24; memory = '32 GB' } +} \ No newline at end of file diff --git a/figures/chart.png b/figures/chart.png index af5e008..b4d2ffc 100644 Binary files a/figures/chart.png and b/figures/chart.png differ diff --git a/modules/databases/download_pvog_DB.nf b/modules/databases/download_pvog_DB.nf index e2d7842..ba40bb7 100644 --- a/modules/databases/download_pvog_DB.nf +++ b/modules/databases/download_pvog_DB.nf @@ -2,10 +2,10 @@ process pvog_DB { label 'noDocker' if (params.cloudProcess) { - publishDir "${params.cloudDatabase}/", mode: 'copy' + publishDir "${params.databases}/", mode: 'copy' } else { - storeDir "nextflow-autodownload-databases/pvog" + storeDir "${params.databases}/pvog" } output: diff --git a/modules/databases/download_references.nf b/modules/databases/download_references.nf index ba315c4..d1f5bda 100644 --- a/modules/databases/download_references.nf +++ b/modules/databases/download_references.nf @@ -1,9 +1,9 @@ process download_references { if (params.cloudProcess) { - publishDir "${params.cloudDatabase}/references/", mode: 'copy', pattern: "phage_references.fa" + publishDir "${params.databases}/references/", mode: 'copy', pattern: "phage_references.fa" } else { - storeDir "nextflow-autodownload-databases/references/" + storeDir "${params.databases}/references/" } label 'noDocker' output: diff --git a/modules/databases/download_rvdb_DB.nf b/modules/databases/download_rvdb_DB.nf index 3c8ff71..30ddd66 100644 --- a/modules/databases/download_rvdb_DB.nf +++ b/modules/databases/download_rvdb_DB.nf @@ -1,10 +1,10 @@ process rvdb_DB { label 'noDocker' if (params.cloudProcess) { - publishDir "${params.cloudDatabase}/", mode: 'copy', pattern: "rvdb" + publishDir "${params.databases}/", mode: 'copy', pattern: "rvdb" } else { - storeDir "nextflow-autodownload-databases/rvdb" + storeDir "${params.databases}/rvdb" } output: diff --git a/modules/databases/download_viphog_DB.nf b/modules/databases/download_viphog_DB.nf deleted file mode 100644 index e69de29..0000000 diff --git a/modules/databases/download_vog_DB.nf b/modules/databases/download_vog_DB.nf index 33135e0..bd5128e 100644 --- a/modules/databases/download_vog_DB.nf +++ b/modules/databases/download_vog_DB.nf @@ -1,10 +1,10 @@ process vog_DB { label 'noDocker' if (params.cloudProcess) { - publishDir "${params.cloudDatabase}/", mode: 'copy', pattern: "vogdb" + publishDir "${params.databases}/", mode: 'copy', pattern: "vogdb" } else { - storeDir "nextflow-autodownload-databases/vog" + storeDir "${params.databases}/vog" } output: diff --git a/modules/databases/phage_references_blastDB.nf b/modules/databases/phage_references_blastDB.nf index 280fedb..7fdca9b 100644 --- a/modules/databases/phage_references_blastDB.nf +++ b/modules/databases/phage_references_blastDB.nf @@ -1,9 +1,9 @@ process phage_references_blastDB { if (params.cloudProcess) { - publishDir "${params.cloudDatabase}/", mode: 'copy', pattern: "phage_db.*" + publishDir "${params.databases}/", mode: 'copy', pattern: "phage_db.*" } else { - storeDir "nextflow-autodownload-databases/blast_phage_DB" + storeDir "${params.databases}/blast_phage_DB" } label 'metaphinder' input: diff --git a/modules/databases/ppr_download_dependencies.nf b/modules/databases/ppr_download_dependencies.nf index 2a88730..fafa679 100644 --- a/modules/databases/ppr_download_dependencies.nf +++ b/modules/databases/ppr_download_dependencies.nf @@ -1,9 +1,9 @@ process ppr_download_dependencies { if (params.cloudProcess) { - publishDir "${params.cloudDatabase}/pprmeta/", mode: 'copy', pattern: "PPR-Meta" + publishDir "${params.databases}/pprmeta/", mode: 'copy', pattern: "PPR-Meta" } else { - storeDir "nextflow-autodownload-databases/pprmeta/" + storeDir "${params.databases}/pprmeta/" } label 'noDocker' output: diff --git a/modules/databases/sourmash_download_DB.nf b/modules/databases/sourmash_download_DB.nf index a1672fc..8a211f9 100644 --- a/modules/databases/sourmash_download_DB.nf +++ b/modules/databases/sourmash_download_DB.nf @@ -1,9 +1,9 @@ process sourmash_download_DB { if (params.cloudProcess) { - publishDir "${params.cloudDatabase}/sourmash/", mode: 'copy', pattern: "phages.sbt.json.tar.gz" + publishDir "${params.databases}/sourmash/", mode: 'copy', pattern: "phages.sbt.json.tar.gz" } else { - storeDir "nextflow-autodownload-databases/sourmash/" + storeDir "${params.databases}/sourmash/" } label 'sourmash' input: diff --git a/modules/databases/vibrant_download_DB.nf b/modules/databases/vibrant_download_DB.nf index de683be..add58cf 100644 --- a/modules/databases/vibrant_download_DB.nf +++ b/modules/databases/vibrant_download_DB.nf @@ -1,9 +1,9 @@ process vibrant_download_DB { if (params.cloudProcess) { - publishDir "${params.cloudDatabase}/Vibrant/", mode: 'copy', pattern: "database.tar.gz" + publishDir "${params.databases}/Vibrant/", mode: 'copy', pattern: "database.tar.gz" } else { - storeDir "nextflow-autodownload-databases/Vibrant" + storeDir "${params.databases}/Vibrant" } label 'vibrant' output: diff --git a/modules/databases/virnet_download_dependencies.nf b/modules/databases/virnet_download_dependencies.nf index ee82e66..f7305d1 100644 --- a/modules/databases/virnet_download_dependencies.nf +++ b/modules/databases/virnet_download_dependencies.nf @@ -1,9 +1,9 @@ process virnet_download_dependencies { if (params.cloudProcess) { - publishDir "${params.cloudDatabase}/virnet/", mode: 'copy', pattern: "virnet" + publishDir "${params.databases}/virnet/", mode: 'copy', pattern: "virnet" } else { - storeDir "nextflow-autodownload-databases/virnet/" + storeDir "${params.databases}/virnet/" } label 'noDocker' output: diff --git a/modules/databases/virsorter_download_DB.nf b/modules/databases/virsorter_download_DB.nf index 283f978..3328db4 100644 --- a/modules/databases/virsorter_download_DB.nf +++ b/modules/databases/virsorter_download_DB.nf @@ -1,9 +1,9 @@ process virsorter_download_DB { if (params.cloudProcess) { - publishDir "${params.cloudDatabase}/virsorter/", mode: 'copy', pattern: "virsorter-data" + publishDir "${params.databases}/virsorter/", mode: 'copy', pattern: "virsorter-data" } else { - storeDir "nextflow-autodownload-databases/virsorter/" + storeDir "${params.databases}/virsorter/" } label 'noDocker' output: diff --git a/modules/hmmscan.nf b/modules/hmmscan.nf index a34839a..992b2d1 100644 --- a/modules/hmmscan.nf +++ b/modules/hmmscan.nf @@ -1,22 +1,14 @@ process hmmscan { - publishDir "${params.output}/${name}/hmm/", mode: 'copy' - label 'hmmscan' - + publishDir "${params.output}/${name}/raw_data/hmm/", mode: 'copy' + label 'hmmscan' input: - tuple val(name), file(faa) - file(pvog_db) - // file(vog_db) - // file(rvdb_db) - + tuple val(name), file(faa) + file(pvog_db) output: - tuple val(name), file("${name}_${pvog_db}_hmmscan.tbl"), file(faa) - // tuple val(name), file("${name}_${vog_db}_hmmscan.tbl"), file(faa) - // tuple val(name), file("${name}_${rvdb_db}_hmmscan.tbl"), file(faa) - + tuple val(name), file("${name}_${pvog_db}_hmmscan.tbl"), file(faa) script: """ - hmmscan --cpu ${task.cpus} --noali --domtblout ${name}_${pvog_db}_hmmscan.tbl ${pvog_db}/${pvog_db}.hmm ${faa} - + hmmscan --cpu ${task.cpus} --noali --domtblout ${name}_${pvog_db}_hmmscan.tbl ${pvog_db}/${pvog_db}.hmm ${faa} """ } diff --git a/modules/parser/filter_tool_names.nf b/modules/parser/filter_tool_names.nf index bf4c761..106ded9 100644 --- a/modules/parser/filter_tool_names.nf +++ b/modules/parser/filter_tool_names.nf @@ -1,5 +1,5 @@ process filter_tool_names { - label 'upsetr' + label 'ubuntu' input: tuple val(name), file(files) output: diff --git a/modules/prodigal.nf b/modules/prodigal.nf index 6c3814e..b1d16a0 100644 --- a/modules/prodigal.nf +++ b/modules/prodigal.nf @@ -1,13 +1,10 @@ process prodigal { - publishDir "${params.output}/${name}/prodigal_out", mode: 'copy' - label 'prodigal' - + publishDir "${params.output}/${name}/raw_data/prodigal_out", mode: 'copy' + label 'prodigal' input: - tuple val(name), path(positive_contigs) - + tuple val(name), path(positive_contigs) output: - tuple val(name), path("${name}_prodigal.faa") - + tuple val(name), path("${name}_prodigal.faa") script: """ prodigal -p "meta" -a ${name}_prodigal.faa -i ${positive_contigs} diff --git a/modules/tools/deepvirfinder.nf b/modules/tools/deepvirfinder.nf index cb1a7aa..93cea81 100644 --- a/modules/tools/deepvirfinder.nf +++ b/modules/tools/deepvirfinder.nf @@ -7,7 +7,7 @@ process deepvirfinder { tuple val(name), file("${name}_*.list") script: """ - dvf.py -c ${params.cpus} -i ${fasta} -o ${name} + dvf.py -c ${task.cpus} -i ${fasta} -o ${name} cp ${name}/*.txt ${name}_\${PWD##*/}.list """ } \ No newline at end of file diff --git a/modules/tools/marvel.nf b/modules/tools/marvel.nf index 42282ba..1e146cf 100644 --- a/modules/tools/marvel.nf +++ b/modules/tools/marvel.nf @@ -8,7 +8,7 @@ process marvel { script: """ # Marvel - marvel_bins.py -i ${contig_bins} -t ${params.cpus} > results_\${PWD##*/}.txt + marvel_bins.py -i ${contig_bins} -t ${task.cpus} > results_\${PWD##*/}.txt """ } diff --git a/modules/tools/virfinder.nf b/modules/tools/virfinder.nf index 686c736..ab3738b 100644 --- a/modules/tools/virfinder.nf +++ b/modules/tools/virfinder.nf @@ -7,7 +7,6 @@ process virfinder { tuple val(name), file("${name}_*.list") script: """ - virfinder_execute.R ${fasta} cp results.txt ${name}_\${PWD##*/}.list """ diff --git a/modules/tools/virsorter.nf b/modules/tools/virsorter.nf index 71a97be..606d699 100644 --- a/modules/tools/virsorter.nf +++ b/modules/tools/virsorter.nf @@ -10,8 +10,7 @@ process virsorter { tuple val(name), file("virsorter_results_*.tar") script: """ - - wrapper_phage_contigs_sorter_iPlant.pl -f ${fasta} -db 1 --wdir virsorter --ncpu 8 --data-dir ${database} + wrapper_phage_contigs_sorter_iPlant.pl -f ${fasta} -db 1 --wdir virsorter --ncpu \$(( ${task.cpus} * 2 )) --data-dir ${database} cat virsorter/Predicted_viral_sequences/VIRSorter_cat-[1,2].fasta | grep ">" | sed -e s/\\>VIRSorter_//g | sed -e s/-cat_1//g | sed -e s/-cat_2//g | sed -e s/-circular//g > virsorter_\${PWD##*/}.list tar cf virsorter_results_\${PWD##*/}.tar virsorter diff --git a/nextflow.config b/nextflow.config index c56198f..2ebce32 100644 --- a/nextflow.config +++ b/nextflow.config @@ -2,74 +2,112 @@ manifest { mainScript = 'phage.nf' } -// default parameters params { - cores = "8" - mem = "12" - help = false - fasta = '' - fastq = '' - dir = '' - list = false - - workdir = "/tmp/nextflow-phages-$USER" - cloudDatabase = false - cachedir = false - - // folder structure - output = 'results' - - // tool deactivation - dv = false - ma = false - mp = false - vf = false - vs = false - pp = false - sm = false - vb = false - vn = false - anno = false -} + // default parameters + cores = "8" + mem = "12" + help = false + fasta = '' + fastq = '' + dir = '' + list = false + cloudDatabase = false + + + // folder structure + output = 'results' + databases = 'nextflow-autodownload-databases' + workdir = "/tmp/nextflow-phages-$USER" + cachedir = "singularity_images" + // Profiles + executor = 'local' + engine = 'docker' + + // tool deactivation + dv = false + ma = false + mp = false + vf = false + vs = false + pp = false + sm = false + vb = false + vn = false + anno = false +} profiles { - standard { + //executer + local { + workDir = params.workdir params.cloudProcess = false - params.cloudDatabase = false includeConfig 'configs/local.config' - } + } - lsf { + lsf { + executor { + name = "lsf" + queueSize = 200 + } + workDir = params.workdir params.cloudProcess = true - includeConfig 'configs/lsf.config' - } + process.cache = "lenient" + includeConfig 'configs/node.config' + } - ebi { - params.cloudProcess = true - params.workdir = "/hps/nobackup2/production/metagenomics/$USER/nextflow-work-$USER" - params.cloudDatabase = "/homes/$USER/data/nextflow-databases/" - params.cachedir = "/hps/nobackup2/singularity/$USER" - includeConfig 'configs/lsf.config' - } + //engines + docker { + docker { enabled = true } + includeConfig 'configs/container.config' + } - git_action { + singularity { + singularity { + enabled = true + autoMounts = true + cacheDir = params.cachedir + } + includeConfig 'configs/container.config' + } + + // gitaction + git_action { params.cloudProcess = false - params.cloudDatabase = false includeConfig 'configs/local.config' - params.dv = true - params.ma = true - params.mp = true - params.vf = false - params.vs = true - params.pp = true - params.sm = true - params.vb = true - params.vn = true - params.anno = true - params.cores = 2 - params.fasta = 'test-data/all_pos_phage.fa' - } + params { + ma = true + mp = true + vf = false + vs = true + pp = true + sm = true + vb = true + vn = true + dv = true + anno = true + fasta = 'test-data/all_pos_phage.fa' + } + } + + // cluster specific executer + ebi { + executor { + name = "lsf" + queueSize = 200 + } + process.cache = "lenient" + params.cloudProcess = true + params.workdir = "/hps/nobackup2/production/metagenomics/$USER/nextflow-work-$USER" + workDir = params.workdir + params.databases = "/homes/$USER/data/nextflow-databases/" + params.cachedir = "/hps/nobackup2/singularity/$USER" + includeConfig 'configs/node.config' + } } + +// PROFILES + + diff --git a/phage.nf b/phage.nf index 22f9d9b..670052f 100755 --- a/phage.nf +++ b/phage.nf @@ -6,6 +6,10 @@ nextflow.preview.dsl=2 * Author: christian.jena@gmail.com */ +if ( !nextflow.version.matches('20.+') ) { + println "This workflow requires Nextflow version 20.X or greater -- You are running version $nextflow.version" + exit 1 +} println "_____ _____ ____ ____ ___ ___ __ __ _ _ " println " __ _______________________ " @@ -17,10 +21,7 @@ println " \\/ " println "_____ _____ ____ ____ ___ ___ __ __ _ _ " if (params.help) { exit 0, helpMSG() } -if( !nextflow.version.matches('20.+') ) { - println "This workflow requires Nextflow version 20.X or greater -- You are running version $nextflow.version" - exit 1 -} + println " " println "\u001B[32mProfile: $workflow.profile\033[0m" println " " @@ -28,44 +29,75 @@ println "\033[2mCurrent User: $workflow.userName" println "Nextflow-version: $nextflow.version" println "WtP intended for Nextflow-version: 20.01.0" println "Starting time: $nextflow.timestamp" -println "Workdir location:" -println " $workflow.workDir\u001B[0m" +println "Workdir location [--workdir]:" +println " $workflow.workDir" +println "Output location [--output]:" +println " $params.output" +println "\033[2mDatabase location [--databases]:" +println " $params.databases\u001B[0m" +if (workflow.profile.contains('singularity')) { +println "\033[2mSingularity cache location [--cachedir]:" +println " $params.cachedir" +println "\u001B[33m WARNING: Singularity image building sometimes fails!" +println " Rerun WtP via -resume to retry the failed image build" +println " Manually remove faulty images in $params.cachedir for a rebuild\u001B[0m" +} +println " " +println "\033[2mCPUs to use: $params.cores\033[0m" println " " -if (workflow.profile == 'standard') { -println "\033[2mCPUs to use: $params.cores" -println "Output dir name: $params.output\u001B[0m" -println " "} + +/************* +* ERROR HANDLING +*************/ +// profiles +if ( workflow.profile == 'standard' ) { exit 1, "NO VALID EXECUTION PROFILE SELECTED, use e.g. [-profile local,docker]" } + +if ( + workflow.profile.contains('singularity') || + workflow.profile.contains('docker') + ) { "engine selected" } +else { exit 1, "No engine selected: -profile EXECUTER,ENGINE" } + +if ( + workflow.profile.contains('local') || + workflow.profile.contains('ebi') || + workflow.profile.contains('lsf') || + workflow.profile.contains('git_action') + ) { "executer selected" } +else { exit 1, "No executer selected: -profile EXECUTER,ENGINE" } + +// params tests +if ( !params.fasta && !params.fastq ) { + exit 1, "input missing, use [--fasta] or [--fastq]"} +if ( params.fasta && params.fastq ) { + exit 1, "please use either [--fasta] or [--fastq] as input"} +if ( params.ma && params.mp && params.vf && params.vs && params.pp && params.dv && params.sm && params.vn && params.vb ) { + exit 0, "What the... you deactivated all the tools"} /************* * INPUT HANDLING *************/ - if ( !params.fasta && !params.fastq ) { - exit 1, "input missing, use [--fasta] or [--fastq]"} - if ( params.fasta && params.fastq ) { - exit 1, "please use either [--fasta] or [--fastq] as input"} - if ( params.ma && params.mp && params.vf && params.vs && params.pp && params.dv && params.sm && params.vn && params.vb ) { - exit 0, "What the... you deactivated all the tools"} - - // fasta input or via csv file - if (params.fasta && params.list) { fasta_input_ch = Channel - .fromPath( params.fasta, checkIfExists: true ) - .splitCsv() - .map { row -> ["${row[0]}", file("${row[1]}", checkIfExists: true)] } - } - else if (params.fasta) { fasta_input_ch = Channel - .fromPath( params.fasta, checkIfExists: true) - .map { file -> tuple(file.baseName, file) } - } - // fastq input or via csv file - if (params.fastq && params.list) { fastq_input_ch = Channel - .fromPath( params.fastq, checkIfExists: true ) - .splitCsv() - .map { row -> ["${row[0]}", file("${row[1]}", checkIfExists: true)] } - } - else if (params.fastq) { fastq_input_ch = Channel - .fromPath( params.fastq, checkIfExists: true) - .map { file -> tuple(file.baseName, file) } - } + +// fasta input or via csv file + if (params.fasta && params.list) { fasta_input_ch = Channel + .fromPath( params.fasta, checkIfExists: true ) + .splitCsv() + .map { row -> ["${row[0]}", file("${row[1]}", checkIfExists: true)] } + } + else if (params.fasta) { fasta_input_ch = Channel + .fromPath( params.fasta, checkIfExists: true) + .map { file -> tuple(file.baseName, file) } + } +// fastq input or via csv file + if (params.fastq && params.list) { fastq_input_ch = Channel + .fromPath( params.fastq, checkIfExists: true ) + .splitCsv() + .map { row -> ["${row[0]}", file("${row[1]}", checkIfExists: true)] } + } + else if (params.fastq) { fastq_input_ch = Channel + .fromPath( params.fastq, checkIfExists: true) + .map { file -> tuple(file.baseName, file) } + } /************* * MODULES @@ -137,7 +169,7 @@ workflow ppr_dependecies { if (!params.cloudProcess) { ppr_download_dependencies(); db = ppr_download_dependencies.out } // cloud storage via db_preload.exists() if (params.cloudProcess) { - db_preload = file("${params.cloudDatabase}/pprmeta/PPR-Meta") + db_preload = file("${params.databases}/pprmeta/PPR-Meta") if (db_preload.exists()) { db = db_preload } else { ppr_download_dependencies(); db = ppr_download_dependencies.out } } @@ -150,7 +182,7 @@ workflow virsorter_database { if (!params.cloudProcess) { virsorter_download_DB(); db = virsorter_download_DB.out } // cloud storage via db_preload.exists() if (params.cloudProcess) { - db_preload = file("${params.cloudDatabase}/virsorter/virsorter-data") + db_preload = file("${params.databases}/virsorter/virsorter-data") if (db_preload.exists()) { db = db_preload } else { virsorter_download_DB(); db = virsorter_download_DB.out } } @@ -164,7 +196,7 @@ workflow sourmash_database { if (!params.cloudProcess) { sourmash_download_DB(references); db = sourmash_download_DB.out } // cloud storage via db_preload.exists() if (params.cloudProcess) { - db_preload = file("${params.cloudDatabase}/sourmash/phages.sbt.json.tar.gz") + db_preload = file("${params.databases}/sourmash/phages.sbt.json.tar.gz") if (db_preload.exists()) { db = db_preload } else { sourmash_download_DB(references); db = sourmash_download_DB.out } } @@ -177,7 +209,7 @@ workflow phage_references { if (!params.cloudProcess) { download_references(); db = download_references.out } // cloud storage via db_preload.exists() if (params.cloudProcess) { - db_preload = file("${params.cloudDatabase}/references/phage_references.fa") + db_preload = file("${params.databases}/references/phage_references.fa") if (db_preload.exists()) { db = db_preload } else { download_references(); db = download_references.out } } @@ -191,7 +223,7 @@ workflow phage_blast_DB { if (!params.cloudProcess) { phage_references_blastDB(references); db = phage_references_blastDB.out } // cloud storage via db_preload.exists() if (params.cloudProcess) { - db_preload = file("${params.cloudDatabase}/blast_phage_DB") + db_preload = file("${params.databases}/blast_phage_DB") if (db_preload.exists()) { db = db_preload } else { phage_references_blastDB(references); db = phage_references_blastDB.out } } @@ -204,7 +236,7 @@ workflow vibrant_database { if (!params.cloudProcess) { vibrant_download_DB(); db = vibrant_download_DB.out } //cloud storage via db_preload.exists() if (params.cloudProcess) { - db_preload = file("${params.cloudDatabase}/Vibrant/database.tar.gz") + db_preload = file("${params.databases}/Vibrant/database.tar.gz") if (db_preload.exists()) { db = db_preload } else { vibrant_download_DB(); db = vibrant_download_DB.out } } @@ -217,7 +249,7 @@ workflow virnet_dependecies { if (!params.cloudProcess) { virnet_download_dependencies(); db = virnet_download_dependencies.out } // cloud storage via db_preload.exists() if (params.cloudProcess) { - db_preload = file("${params.cloudDatabase}/virnet/virnet") + db_preload = file("${params.databases}/virnet/virnet") if (db_preload.exists()) { db = db_preload } else { virnet_download_dependencies(); db = virnet_download_dependencies.out } } @@ -234,7 +266,7 @@ workflow pvog_database { if (!params.cloudProcess) { pvog_DB(); db = pvog_DB.out } // cloud storage via db_preload.exists() if (params.cloudProcess) { - db_preload = file("${params.cloudDatabase}/pvogs/") + db_preload = file("${params.databases}/pvogs/") if (db_preload.exists()) { db = db_preload } else { pvog_DB(); db = pvog_DB.out } } @@ -247,7 +279,7 @@ workflow rvdb_database { if (!params.cloudProcess) { rvdb_DB(); db = rvdb_DB.out } // cloud storage via db_preload.exists() if (params.cloudProcess) { - db_preload = file("${params.cloudDatabase}/pvogs/") + db_preload = file("${params.databases}/pvogs/") if (db_preload.exists()) { db = db_preload } else { rvdb_DB(); db = rvdb_DB.out } } @@ -260,7 +292,7 @@ workflow vog_database { if (!params.cloudProcess) { vog_DB(); db = vog_DB.out } // cloud storage via db_preload.exists() if (params.cloudProcess) { - db_preload = file("${params.cloudDatabase}/pvogs/") + db_preload = file("${params.databases}/pvogs/") if (db_preload.exists()) { db = db_preload } else { vog_DB(); db = vog_DB.out } } @@ -269,32 +301,29 @@ workflow vog_database { - - - /************* * SUB WORKFLOWS *************/ workflow fasta_validation_wf { - take: fasta + take: fasta main: input_suffix_check(fasta) emit: input_suffix_check.out } workflow read_validation_wf { - take: fastq + take: fastq main: fastqTofasta(removeSmallReads(fastq.splitFastq(by: 1000, file: true))) emit: fastqTofasta.out } workflow read_shuffling_wf { - take: fastq + take: fastq main: fastqTofasta(shuffle_reads_nts(removeSmallReads(fastq.splitFastq(by: 10000, file: true)))) emit: fastqTofasta.out } workflow sourmash_wf { - take: fasta + take: fasta sourmash_database main: if (!params.sm) { @@ -310,7 +339,7 @@ workflow sourmash_wf { } workflow deepvirfinder_wf { - take: fasta + take: fasta main: if (!params.dv) { filter_deepvirfinder(deepvirfinder(fasta).groupTuple(remainder: true)) @@ -324,7 +353,7 @@ workflow deepvirfinder_wf { } workflow marvel_wf { - take: fasta + take: fasta main: if (!params.ma) { // filtering filter_marvel(marvel(split_multi_fasta(fasta)).groupTuple(remainder: true)) @@ -338,7 +367,7 @@ workflow marvel_wf { } workflow metaphinder_wf { - take: fasta + take: fasta main: if (!params.mp) { metaphinder(fasta) // filtering @@ -353,7 +382,7 @@ workflow metaphinder_wf { } workflow metaphinder_own_DB_wf { - take: fasta + take: fasta blast_db main: if (!params.mp) { metaphinder_own_DB(fasta, blast_db) @@ -369,7 +398,7 @@ workflow metaphinder_own_DB_wf { } workflow virfinder_wf { - take: fasta + take: fasta main: if (!params.vf) { filter_virfinder(virfinder(fasta).groupTuple(remainder: true)) // raw data collector @@ -382,7 +411,7 @@ workflow virfinder_wf { } workflow virsorter_wf { - take: fasta + take: fasta virsorter_DB main: if (!params.vs) { virsorter(fasta, virsorter_DB) @@ -398,7 +427,7 @@ workflow virsorter_wf { } workflow pprmeta_wf { - take: fasta + take: fasta ppr_deps main: if (!params.pp) { filter_PPRmeta(pprmeta(fasta, ppr_deps).groupTuple(remainder: true)) @@ -412,7 +441,7 @@ workflow pprmeta_wf { } workflow vibrant_wf { - take: fasta + take: fasta vibrant_download_DB main: if (!params.vb) { vibrant(fasta, vibrant_download_DB) @@ -428,7 +457,7 @@ workflow vibrant_wf { } workflow virnet_wf { - take: fasta + take: fasta virnet_dependecies main: if (!params.vn) { filter_virnet(virnet(normalize_contig_size(fasta), virnet_dependecies).groupTuple(remainder: true)) @@ -497,9 +526,9 @@ workflow { if (params.vs) { virsorter_DB = Channel.from( [ 'deactivated', 'deactivated'] ) } else { virsorter_DB = virsorter_database() } // phage annotation DBs deactivation based on input flags - if (params.anno) { pvog_DB = Channel.from( [ 'deactivated', 'deactivated'] ) } else { pvog_DB = pvog_database() } - if (params.anno) { vog_DB = Channel.from( [ 'deactivated', 'deactivated'] ) } else { vog_DB = vog_database() } - if (params.anno) { rvdb_DB = Channel.from( [ 'deactivated', 'deactivated'] ) } else { rvdb_DB = rvdb_database() } + if (params.anno) { pvog_DB = Channel.from( [ 'deactivated', 'deactivated'] ) } else { pvog_DB = pvog_database() } + if (params.anno) { vog_DB = Channel.from( [ 'deactivated', 'deactivated'] ) } else { vog_DB = vog_database() } + if (params.anno) { rvdb_DB = Channel.from( [ 'deactivated', 'deactivated'] ) } else { rvdb_DB = rvdb_database() } // gather results results = virsorter_wf(fasta_validation_wf.out, virsorter_DB) @@ -521,9 +550,8 @@ workflow { r_plot(filter_tool_names.out) upsetr_plot(filter_tool_names.out) //samtools - samtools(fasta_validation_wf.out.join(filter_tool_names.out)) + samtools(fasta_validation_wf.out.join(filter_tool_names.out)) //annotation - phage_annotation_wf(samtools.out, pvog_DB, vog_DB, rvdb_DB) } @@ -567,20 +595,40 @@ def helpMSG() { c_dim = "\033[2m"; log.info """ . - ${c_yellow}Usage example:${c_reset} - nextflow run phage.nf --fasta '*/*.fasta' + ${c_yellow}Usage examples:${c_reset} + nextflow run phage.nf --fasta '*/*.fasta' --cores 20 \\ + --output results -profile local,docker + + nextflow run phage.nf --fasta '*/*.fasta' --cores 20 \\ + --output results -profile lsf,singularity \\ + --cachedir /images/singularity_images \\ + --databases /databases/WtP_databases/ ${c_yellow}Input:${c_reset} - ${c_green} --fasta ${c_reset} '*.fasta' -> assembly file(s) - ${c_green} --fastq ${c_reset} '*.fastq' -> long read file(s) - ${c_dim} ..change above input to csv:${c_reset} ${c_green}--list ${c_reset} + --fasta '*.fasta' -> assembly file(s) + --fastq '*.fastq' -> long read file(s) + ${c_dim} ..change above input to csv via --list ${c_reset} + ${c_dim} e.g. --fasta inputs.csv --list + the .csv contains per line: name,/path/to/file + + ${c_yellow}Execution/Engine profiles:${c_reset} + WtP supports profiles to run via different ${c_green}Executers${c_reset} and ${c_blue}Engines${c_reset} e.g.: + -profile ${c_green}local${c_reset},${c_blue}docker${c_reset} + + ${c_green}Executer${c_reset} (choose one): + local + lsf + ebi + ${c_blue}Engines${c_reset} (choose one): + docker + singularity ${c_yellow}Options:${c_reset} --cores max cores for local use [default: $params.cores] --output name of the result folder [default: $params.output] - ${c_yellow}Tool control (BETA feature - might break the plots):${c_reset} - All tools are activated by default, deactivate them by adding one or more flags + ${c_yellow}Tool control:${c_reset} + Deactivate tools individually by adding one or more of these flags --dv deactivates deepvirfinder --ma deactivates marvel --mp deactivates metaphinder @@ -590,25 +638,20 @@ def helpMSG() { --vf deactivates virfinder --vn deactivates virnet --vs deactivates virsorter + --anno skips annotation - ${c_yellow}Database behaviour:${c_reset} - This workflow will automatically download files to ./nextflow-autodownload-databases - It will skip this download if the files are present in ./nextflow-autodownload-databases + ${c_yellow}Databases and file behaviour:${c_reset} + --databases specifiy download location of databases + [default: ${params.databases}] + ${c_dim}This workflow skip the download if the files are present${c_reset} + + --workdir defines the path where nextflow writes temporary files + [default: $params.workdir] - ${c_yellow}HPC or cloud computing:${c_reset} - For execution of the workflow in the cloud or on a HPC (such as provided with LSF) - you might want to adjust the following parameters. - --databases defines the path where databases are stored [default: $params.cloudDatabase] - --workdir defines the path where nextflow writes tmp files [default: $params.workdir] - --cachedir defines the path where images (singularity) are cached [default: $params.cachedir] - - ${c_dim}Nextflow options: - -with-report rep.html cpu / ram usage (may cause errors) - -with-dag chart.html generates a flowchart for the process tree - -with-timeline time.html timeline (may cause errors) - - Profile: - -profile standard, lsf [default: standard] ${c_reset} + ${c_yellow}Singularity:${c_reset} + --cachedir defines the path where singularity images are cached + [default: $params.cachedir] + """.stripIndent() }