diff --git a/README.md b/README.md index dd50714..c54289f 100644 --- a/README.md +++ b/README.md @@ -18,7 +18,7 @@ * by Christian Brandt & Mike Marquet * **this tool is under active development,feel free to report issues and add suggestions** -* Use a release candidate for a stable experience via `-r` e.g. `-r v1.1.0` +* Use a release candidate for a stable experience via `-r` e.g. `-r v1.2.0` * These are extensively tested release versions of WtP * [releases of WtP are listed here](https://github.com/replikation/What_the_Phage/releases) diff --git a/bin/contig_by_tool_count.sh b/bin/contig_by_tool_count.sh index fd0b5f8..da9de11 100755 --- a/bin/contig_by_tool_count.sh +++ b/bin/contig_by_tool_count.sh @@ -8,7 +8,15 @@ done awk '{print $0"\t"FILENAME}' *.txt > tmp_result.tsv awk '{ gsub(/.txt/,"", $3); print }' OFS='\t' tmp_result.tsv > tmp_results2.tsv rm *.txt -sed -e '1i\contig_name\tp_value\ttoolname' tmp_results2.tsv > contig_tool_p-value_overview.tsv +sed -e '1i\contig_name\tp_value\ttoolname' tmp_results2.tsv > tmp_results3.tsv + +## if r markdown code breaks .... these are the files can generate the error +## error prevention +grep -E -v ".txt" tmp_results3.tsv > contig_tool_p-value_overview.tsv + + + + ## which tools were used? #cut -f3 tmp_results2.tsv | sort -u > tools_used_for_phage_prediction.tsv diff --git a/nextflow.config b/nextflow.config index 160039f..171a321 100644 --- a/nextflow.config +++ b/nextflow.config @@ -15,6 +15,8 @@ params { cloudDatabase = false filter = '1500' setup = '' + all_tools = false + annotation_db = false // folder structure output = 'results' @@ -46,18 +48,14 @@ params { // parameter hmm_params = '-E 1e-30' - // raw tool output filters - + // raw tool output filters dv_filter = '0.9' - // ma_filter = '75' mp_filter = '50' vf_filter = '0.9' sm_filter = '0.5' vn_filter = '0.5' vs2_filter = '0.9' sk_filter = '0.75' - // pp_filter = '' - // vb_filter = '' } // runinfo diff --git a/phage.nf b/phage.nf index ee9e722..b2c1441 100755 --- a/phage.nf +++ b/phage.nf @@ -171,11 +171,37 @@ workflow { else if (params.fasta && !params.identify && !params.annotate && !params.setup ) { prediction_channel = input_validation_wf(fasta_input_ch) } /************************** -* Prediction +* Prediction via benchmarked tools only **************************/ // run annotation if identify flag or no flag at all - if (params.fasta && params.identify && !params.annotate && !params.setup || params.fasta && !params.identify && !params.annotate && !params.setup ) { + if (params.fasta && params.identify && !params.annotate && !params.setup && !params.all_tools || params.fasta && !params.identify && !params.annotate && !params.setup && !params.all_tools ) { // actual tools + results = deepvirfinder_wf( prediction_channel) + .concat( seeker_wf(prediction_channel)) + .concat( virfinder_wf(prediction_channel)) + .concat( pprmeta_wf(prediction_channel)) + .concat( metaphinder_wf(prediction_channel)) + .concat( vibrant_wf(prediction_channel)) + .concat( vibrant_virome_wf(prediction_channel)) + .concat( virsorter_wf(prediction_channel)) + .concat( virsorter_virome_wf(prediction_channel)) + .concat( virsorter2_wf(prediction_channel)) + .filter { it != 'deactivated' } // removes deactivated tool channels + .groupTuple() + + prepare_results_wf(results, prediction_channel) + + // markdown report input + // map identify output for input of annotaion tools + annotation_channel = input_validation_wf.out.join(results) + } + //&& !params.all_tools +/************************** +* Prediction via all tools +**************************/ + // run annotation if identify flag or no flag at all + if (params.fasta && params.identify && !params.annotate && !params.setup && params.all_tools|| params.fasta && params.all_tools && !params.identify && !params.annotate && !params.setup ) { + // benchmarked tools results = deepvirfinder_wf( prediction_channel) .concat( phigaro_wf(prediction_channel)) .concat( seeker_wf(prediction_channel)) @@ -199,7 +225,7 @@ workflow { // map identify output for input of annotaion tools annotation_channel = input_validation_wf.out.join(results) } - + /************************** * Annotation **************************/ @@ -256,10 +282,11 @@ def helpMSG() { c_reset = "\033[0m"; c_yellow = "\033[0;33m"; c_blue = "\033[0;34m"; + c_purple = "\033[0;35m"; c_dim = "\033[2m"; log.info """ . - ${c_yellow}Usage examples:${c_reset} + ${c_purple}Usage examples:${c_reset} nextflow run replikation/What_the_Phage --fasta '*/*.fasta' --cores 20 --max_cores 40 \\ --output results -profile local,docker @@ -268,7 +295,7 @@ def helpMSG() { --cachedir /images/singularity_images \\ --databases /databases/WtP_databases/ - ${c_yellow}Input:${c_reset} + ${c_purple}Input:${c_reset} --fasta '*.fasta' -> assembly file(s) --fastq '*.fastq' -> long read file(s) ${c_dim} ..change above input to csv via --list ${c_reset} @@ -276,7 +303,7 @@ def helpMSG() { the .csv contains per line: name,/path/to/file${c_reset} --setup skips analysis and just downloads databases and containers - ${c_yellow}Execution/Engine profiles:${c_reset} + ${c_purple}Execution/Engine profiles:${c_reset} WtP supports profiles to run via different ${c_green}Executers${c_reset} and ${c_blue}Engines${c_reset} e.g.: -profile ${c_green}local${c_reset},${c_blue}docker${c_reset} @@ -291,13 +318,16 @@ def helpMSG() { For a test run (~ 1h), add "smalltest" to the profile, e.g. -profile smalltest,local,singularity - ${c_yellow}Options:${c_reset} + ${c_purple}Options:${c_reset} --filter min contig size [bp] to analyse [default: $params.filter] --cores max cores per process for local use [default: $params.cores] --max_cores max cores used on the machine for local use [default: $params.max_cores] --output name of the result folder [default: $params.output] - ${c_yellow}Tool control:${c_reset} + ${c_purple}Tool control:${c_reset} + Deploy all integrated phage prediction tools + --all_tools activate all phage prediction tools + Deactivate tools individually by adding one or more of these flags --dv deactivates deepvirfinder --mp deactivates metaphinder @@ -311,8 +341,12 @@ def helpMSG() { --vs2 deactivates virsorter2 --sk deactivates seeker + ${c_purple}Custom phage annotation Database:${c_reset} + --annotation_db /path/to/your/custom_phage_annotation_db.tar.gz + Please provide a custom_phage_annotation_db.tar.gz archive that contains the following file formats: + *.hmm *.hmm.h3f *.hmm.h3i *.hmm.h3m *.hmm.h3p - Workflow control: + ${c_yellow}Workflow control:${c_reset} --identify only phage identification, skips analysis --annotate only annotation, skips phage identification diff --git a/submodule_report/Heatmap_table.Rmd b/submodule_report/Heatmap_table.Rmd index ad1f88c..58812ed 100644 --- a/submodule_report/Heatmap_table.Rmd +++ b/submodule_report/Heatmap_table.Rmd @@ -60,6 +60,26 @@ The tool's output and what WtP assigns are shown in the table below. +#### **Explanation tool output** + +**Tab.2**: The output of each tool and the values WtP assigns in Tab.1 . + +Tool | Standard output | WtP displayed value | F1 scores by Ho et al. +|-|-|-|-| +|deepvirfinder | p-value: 0 to 1 | 0 to 1 | >0.83 +|metaphinder | string: phage | 1 | >0.83 +|metaphinder own| string: phage | 1 | N/A +|phigaro | score: 0 to 1 | 0 to 1 | N/A +|pprmeta | phage_score: 0 to 1 | 0 to 1 | 0.92 +|seeker | score: 0 to 1 | 0 to 1 | <0.5 +|sourmash | similarity: 0 to 1 | 0 to 1 | N/A +|vibrant | prediction: virus | 1 | >0.83 +|vibrant-virome | prediction: virus | 1 | N/A +|virfinder | p-value: 0 to 1 | 0 to 1 | >0.83 +|virnet | score: 0 to 1 | 0 to 1 | N/A +|virsorter | category 1, category 2, category 3 | 1, 0.5, 0 | >0.83 +|virsorter-virome|category 1, category 2, category 3 | 1, 0.5, 0 | N/A +|virsorter2 | dsDNAphage: 0 to 1 | 0 to 1 | 0.93 #### **Extract contigs of interest** @@ -89,25 +109,5 @@ seqkit grep --pattern-file contig_IDs_of_interest.txt your_input_fasta.fa.gz > c -#### **Explanation tool output** - -**Tab.2**: The output of each tool and the values WtP assigns in Tab.1 . - -Tool | Standard output | WtP displayed value | -|-|-|-| -|deepvirfinder |p-value: 0 to 1 | 0 to 1 | -|metaphinder |string: phage | 1 | -|metaphinder own| string: phage | 1 | -|phigaro | score: 0 to 1 | 0 to 1 | -|pprmeta |phage_score: 0 to 1 | 0 to 1 | -|seeker |score: 0 to 1 | 0 to 1 | -|sourmash |similarity: 0 to 1 | 0 to 1 | -|vibrant |prediction: virus | 1 | -|vibrant-virome |prediction: virus | 1 | -|virfinder |p-value: 0 to 1 | 0 to 1 | -|virnet |score: 0 to 1 | 0 to 1 | -|virsorter |category 1, category 2, category 3| 1, 0.5, 0 | -|virsorter-virome|category 1, category 2, category 3| 1, 0.5, 0 | -|virsorter2 |dsDNAphage: 0 to 1 | 0 to 1 | Back to top \ No newline at end of file diff --git a/submodule_report/UpsetR.Rmd b/submodule_report/UpsetR.Rmd index 8088703..444973f 100644 --- a/submodule_report/UpsetR.Rmd +++ b/submodule_report/UpsetR.Rmd @@ -24,10 +24,11 @@ div.blue { background-color:#e6f0ff; border-radius: 5px; padding: 20px;}