Skip to content

Commit

Permalink
Merge pull request #172 from replikation/revision_gigascience
Browse files Browse the repository at this point in the history
Revision gigascience
  • Loading branch information
mult1fractal authored Aug 17, 2022
2 parents 928cd60 + 3db94ce commit 79faf1a
Show file tree
Hide file tree
Showing 12 changed files with 118 additions and 74 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@

* by Christian Brandt & Mike Marquet
* **this tool is under active development,feel free to report issues and add suggestions**
* Use a release candidate for a stable experience via `-r` e.g. `-r v1.1.0`
* Use a release candidate for a stable experience via `-r` e.g. `-r v1.2.0`
* These are extensively tested release versions of WtP
* [releases of WtP are listed here](https://github.com/replikation/What_the_Phage/releases)

Expand Down
10 changes: 9 additions & 1 deletion bin/contig_by_tool_count.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,15 @@ done
awk '{print $0"\t"FILENAME}' *.txt > tmp_result.tsv
awk '{ gsub(/.txt/,"", $3); print }' OFS='\t' tmp_result.tsv > tmp_results2.tsv
rm *.txt
sed -e '1i\contig_name\tp_value\ttoolname' tmp_results2.tsv > contig_tool_p-value_overview.tsv
sed -e '1i\contig_name\tp_value\ttoolname' tmp_results2.tsv > tmp_results3.tsv

## if r markdown code breaks .... these are the files can generate the error
## error prevention
grep -E -v ".txt" tmp_results3.tsv > contig_tool_p-value_overview.tsv





## which tools were used?
#cut -f3 tmp_results2.tsv | sort -u > tools_used_for_phage_prediction.tsv
Expand Down
8 changes: 3 additions & 5 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ params {
cloudDatabase = false
filter = '1500'
setup = ''
all_tools = false
annotation_db = false

// folder structure
output = 'results'
Expand Down Expand Up @@ -46,18 +48,14 @@ params {
// parameter
hmm_params = '-E 1e-30'

// raw tool output filters

// raw tool output filters
dv_filter = '0.9'
// ma_filter = '75'
mp_filter = '50'
vf_filter = '0.9'
sm_filter = '0.5'
vn_filter = '0.5'
vs2_filter = '0.9'
sk_filter = '0.75'
// pp_filter = ''
// vb_filter = ''

}
// runinfo
Expand Down
52 changes: 43 additions & 9 deletions phage.nf
Original file line number Diff line number Diff line change
Expand Up @@ -171,11 +171,37 @@ workflow {
else if (params.fasta && !params.identify && !params.annotate && !params.setup ) { prediction_channel = input_validation_wf(fasta_input_ch) }

/**************************
* Prediction
* Prediction via benchmarked tools only
**************************/
// run annotation if identify flag or no flag at all
if (params.fasta && params.identify && !params.annotate && !params.setup || params.fasta && !params.identify && !params.annotate && !params.setup ) {
if (params.fasta && params.identify && !params.annotate && !params.setup && !params.all_tools || params.fasta && !params.identify && !params.annotate && !params.setup && !params.all_tools ) {
// actual tools
results = deepvirfinder_wf( prediction_channel)
.concat( seeker_wf(prediction_channel))
.concat( virfinder_wf(prediction_channel))
.concat( pprmeta_wf(prediction_channel))
.concat( metaphinder_wf(prediction_channel))
.concat( vibrant_wf(prediction_channel))
.concat( vibrant_virome_wf(prediction_channel))
.concat( virsorter_wf(prediction_channel))
.concat( virsorter_virome_wf(prediction_channel))
.concat( virsorter2_wf(prediction_channel))
.filter { it != 'deactivated' } // removes deactivated tool channels
.groupTuple()

prepare_results_wf(results, prediction_channel)

// markdown report input
// map identify output for input of annotaion tools
annotation_channel = input_validation_wf.out.join(results)
}
//&& !params.all_tools
/**************************
* Prediction via all tools
**************************/
// run annotation if identify flag or no flag at all
if (params.fasta && params.identify && !params.annotate && !params.setup && params.all_tools|| params.fasta && params.all_tools && !params.identify && !params.annotate && !params.setup ) {
// benchmarked tools
results = deepvirfinder_wf( prediction_channel)
.concat( phigaro_wf(prediction_channel))
.concat( seeker_wf(prediction_channel))
Expand All @@ -199,7 +225,7 @@ workflow {
// map identify output for input of annotaion tools
annotation_channel = input_validation_wf.out.join(results)
}

/**************************
* Annotation
**************************/
Expand Down Expand Up @@ -256,10 +282,11 @@ def helpMSG() {
c_reset = "\033[0m";
c_yellow = "\033[0;33m";
c_blue = "\033[0;34m";
c_purple = "\033[0;35m";
c_dim = "\033[2m";
log.info """
.
${c_yellow}Usage examples:${c_reset}
${c_purple}Usage examples:${c_reset}
nextflow run replikation/What_the_Phage --fasta '*/*.fasta' --cores 20 --max_cores 40 \\
--output results -profile local,docker
Expand All @@ -268,15 +295,15 @@ def helpMSG() {
--cachedir /images/singularity_images \\
--databases /databases/WtP_databases/
${c_yellow}Input:${c_reset}
${c_purple}Input:${c_reset}
--fasta '*.fasta' -> assembly file(s)
--fastq '*.fastq' -> long read file(s)
${c_dim} ..change above input to csv via --list ${c_reset}
${c_dim} e.g. --fasta inputs.csv --list
the .csv contains per line: name,/path/to/file${c_reset}
--setup skips analysis and just downloads databases and containers
${c_yellow}Execution/Engine profiles:${c_reset}
${c_purple}Execution/Engine profiles:${c_reset}
WtP supports profiles to run via different ${c_green}Executers${c_reset} and ${c_blue}Engines${c_reset} e.g.:
-profile ${c_green}local${c_reset},${c_blue}docker${c_reset}
Expand All @@ -291,13 +318,16 @@ def helpMSG() {
For a test run (~ 1h), add "smalltest" to the profile, e.g. -profile smalltest,local,singularity
${c_yellow}Options:${c_reset}
${c_purple}Options:${c_reset}
--filter min contig size [bp] to analyse [default: $params.filter]
--cores max cores per process for local use [default: $params.cores]
--max_cores max cores used on the machine for local use [default: $params.max_cores]
--output name of the result folder [default: $params.output]
${c_yellow}Tool control:${c_reset}
${c_purple}Tool control:${c_reset}
Deploy all integrated phage prediction tools
--all_tools activate all phage prediction tools
Deactivate tools individually by adding one or more of these flags
--dv deactivates deepvirfinder
--mp deactivates metaphinder
Expand All @@ -311,8 +341,12 @@ def helpMSG() {
--vs2 deactivates virsorter2
--sk deactivates seeker
${c_purple}Custom phage annotation Database:${c_reset}
--annotation_db /path/to/your/custom_phage_annotation_db.tar.gz
Please provide a custom_phage_annotation_db.tar.gz archive that contains the following file formats:
*.hmm *.hmm.h3f *.hmm.h3i *.hmm.h3m *.hmm.h3p
Workflow control:
${c_yellow}Workflow control:${c_reset}
--identify only phage identification, skips analysis
--annotate only annotation, skips phage identification
Expand Down
40 changes: 20 additions & 20 deletions submodule_report/Heatmap_table.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,26 @@ The tool's output and what WtP assigns are shown in the table below.

</div>

#### **Explanation tool output**

**Tab.2**: The output of each tool and the values WtP assigns in Tab.1 .

Tool | Standard output | WtP displayed value | F1 scores by Ho et al.
|-|-|-|-|
|deepvirfinder | p-value: 0 to 1 | 0 to 1 | >0.83
|metaphinder | string: phage | 1 | >0.83
|metaphinder own| string: phage | 1 | N/A
|phigaro | score: 0 to 1 | 0 to 1 | N/A
|pprmeta | phage_score: 0 to 1 | 0 to 1 | 0.92
|seeker | score: 0 to 1 | 0 to 1 | <0.5
|sourmash | similarity: 0 to 1 | 0 to 1 | N/A
|vibrant | prediction: virus | 1 | >0.83
|vibrant-virome | prediction: virus | 1 | N/A
|virfinder | p-value: 0 to 1 | 0 to 1 | >0.83
|virnet | score: 0 to 1 | 0 to 1 | N/A
|virsorter | category 1, category 2, category 3 | 1, 0.5, 0 | >0.83
|virsorter-virome|category 1, category 2, category 3 | 1, 0.5, 0 | N/A
|virsorter2 | dsDNAphage: 0 to 1 | 0 to 1 | 0.93


#### **Extract contigs of interest**
Expand Down Expand Up @@ -89,25 +109,5 @@ seqkit grep --pattern-file contig_IDs_of_interest.txt your_input_fasta.fa.gz > c



#### **Explanation tool output**

**Tab.2**: The output of each tool and the values WtP assigns in Tab.1 .

Tool | Standard output | WtP displayed value |
|-|-|-|
|deepvirfinder |p-value: 0 to 1 | 0 to 1 |
|metaphinder |string: phage | 1 |
|metaphinder own| string: phage | 1 |
|phigaro | score: 0 to 1 | 0 to 1 |
|pprmeta |phage_score: 0 to 1 | 0 to 1 |
|seeker |score: 0 to 1 | 0 to 1 |
|sourmash |similarity: 0 to 1 | 0 to 1 |
|vibrant |prediction: virus | 1 |
|vibrant-virome |prediction: virus | 1 |
|virfinder |p-value: 0 to 1 | 0 to 1 |
|virnet |score: 0 to 1 | 0 to 1 |
|virsorter |category 1, category 2, category 3| 1, 0.5, 0 |
|virsorter-virome|category 1, category 2, category 3| 1, 0.5, 0 |
|virsorter2 |dsDNAphage: 0 to 1 | 0 to 1 |

<a href="#top">Back to top</a>
9 changes: 5 additions & 4 deletions submodule_report/UpsetR.Rmd
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,11 @@ div.blue { background-color:#e6f0ff; border-radius: 5px; padding: 20px;}
</style>
<div class = "blue">

The "UpSet" plot is an alternative but more detailed Venn diagram, and it summarizes the prediction performance of each tool for your sample.
The total amount of identified phage-contigs per tool is shown in blue bars on the left.
Black bars visualize the number of contigs that each tool or tool combination has uniquely identified, and each tool combination is shown below each black bar as a dot matrix.
E.g., a black bar with two dots below it means that these two tools identified the same contigs as phages.
The total amount of identified phage contigs per tool is shown in blue bars on the left.
Black, vertical bars visualize the number of contigs that each tool or tool combination has uniquely identified.
Each tool combination is shown below the vertical barplot as a dot matrix. How to read the diagram: For example, 53 phage contigs are found by six tools (DeepVirFinder, Metaphinder-own-DB, Metaphinder, PPRmeta, Seeker and VirFinder).
Another 42 contigs are found by these tools but also virnet.

</div>


Expand Down
9 changes: 7 additions & 2 deletions workflows/phage_annotation_wf.nf
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,11 @@ include { chromomap } from './process/phage_annotation/chromomap'
workflow phage_annotation_wf {
take: fasta_and_tool_results
main:

// Input for custom annotation database
if (params.annotation_db) { annotation_custom_db_ch = Channel
.fromPath( params.annotation_db, checkIfExists: true)
.view()}
// map input for prodigal
fasta = fasta_and_tool_results.map {it -> tuple(it[0],it[1])}
//Databases
//Database for hmmscan
Expand All @@ -31,7 +35,8 @@ workflow phage_annotation_wf {
}
//annotation-process
prodigal(fasta)
hmmscan(prodigal.out, pvog_DB.out)
if (!params.annotation_db) {hmmscan(prodigal.out, pvog_DB.out)}
else {hmmscan(prodigal.out, annotation_custom_db_ch)}
chromomap_parser(fasta.join(hmmscan.out), vogtable_DB.out)
chromomap(chromomap_parser.out[0].mix(chromomap_parser.out[1]))

Expand Down
27 changes: 1 addition & 26 deletions workflows/process/metaphinder/metaphinder.nf
Original file line number Diff line number Diff line change
Expand Up @@ -23,29 +23,4 @@ process metaphinder {
}


process metaphinder_own_DB {
label 'metaphinder'
errorStrategy 'ignore'
input:
tuple val(name), file(fasta)
file(database)
output:
tuple val(name), file("${name}_*.list")
// output collection stream
tuple val(name), file("${name}_*.list"), file("${name}_*_blast.out")
script:
"""
rnd=${Math.random()}
mkdir ${name}
tar xzf ${database}
MetaPhinder.py -i ${fasta} -o ${name} -d phage_blast_DB/phage_db
mv ${name}/output.txt ${name}_\${PWD##*/}.list
mv ${name}/blast.out ${name}_\${PWD##*/}_blast.out
"""
stub:
"""
echo "#contigID classification ANI [%] merged coverage [%] number of hits size[bp]" > ${name}_\${PWD##*/}.list
echo "pos_phage_0 phage 80.754 96.97 172 146647" >> ${name}_\${PWD##*/}.list
touch ${name}_\${PWD##*/}_blast.out
"""
}

Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ process phage_references_blastDB {
else { storeDir "${params.databases}/blast_DB_phage" }
label 'metaphinder'
errorStrategy = "retry"
maxRetries = 1
maxRetries = 2
input:
path(references)
output:
Expand Down
16 changes: 12 additions & 4 deletions workflows/process/phage_annotation/hmmscan.nf
Original file line number Diff line number Diff line change
Expand Up @@ -3,16 +3,24 @@ process hmmscan {
label 'hmmscan'
input:
tuple val(name), path(faa)
path(pvog_db)
path(annotation_db)
output:
tuple val(name), path("${name}_${pvog_db}_hmmscan.tbl"), path(faa)
tuple val(name), path("${name}_${annotation_db}_hmmscan.tbl"), path(faa)
script:
if (!params.annotation_db)
"""
hmmscan --cpu ${task.cpus} ${params.hmm_params} --noali --domtblout ${name}_${pvog_db}_hmmscan.tbl ${pvog_db}/${pvog_db}.hmm ${faa}
hmmscan --cpu ${task.cpus} ${params.hmm_params} --noali --domtblout ${name}_${annotation_db}_hmmscan.tbl ${annotation_db}/${annotation_db}.hmm ${faa}
"""
else
"""
mkdir custom_annotation_db
tar -xvzf ${annotation_db} -C custom_annotation_db
hmmscan --cpu ${task.cpus} ${params.hmm_params} --noali --domtblout ${name}_${annotation_db}_hmmscan.tbl custom_annotation_db/*.hmm ${faa}
"""

stub:
"""
touch ${name}_${pvog_db}_hmmscan.tbl
touch ${name}_${annotation_db}_hmmscan.tbl
"""
}

Expand Down
2 changes: 1 addition & 1 deletion workflows/process/prepare_results/upsetr.nf
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ process upsetr_plot {
#nsets = 20, nintersects = 40
upset(fromList(sets), sets = names(sets),
mainbar.y.label = "No. of shared viral contigs", sets.x.label = "Number of identified \\nviral contigs",
mainbar.y.label = "No. of shared phage contigs", sets.x.label = "Number of phage \\ncontigs per tool ",
order.by = "freq", sets.bar.color = "#56B4E9", keep.order = F,
text.scale = 1.4, point.size = 2.6, line.size = 0.8, set_size.show = FALSE)
Expand Down
15 changes: 15 additions & 0 deletions workflows/process/virsorter2/filter_virsorter2.nf
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ process filter_virsorter2 {
script:
"""
tail -n+2 *.tsv | awk '{ print \$1, \$2 }' OFS='\\t' | awk '{ print \$1, \$2 }' OFS='||' | cut -d "|" -f1,5 | awk -F"|" '{ print \$1, \$2 }' OFS='\t' > virsorter2_\${PWD##*/}.tsv
sed -i 's/nan/0/g' virsorter2_\${PWD##*/}.tsv
"""
}

Expand All @@ -19,3 +20,17 @@ ctg6_len=5303||full 0.967 0.733 0.967 dsDNAphage 3585 1

//awk '{ gsub(/||full/,"", $3); print }' OFS='\t' tmp_result.tsv > tmp_results2.tsv
//awk '{ print \$1, \$2 }' OFS='||' virsorter2_\${PWD##*/}.tsv

// nan if row contains nan make to 0
// GT1_27252 0.840 virsorter2
// GT1_27276 0.640 virsorter2
// GT1_27283 0.560 virsorter2
// GT1_27330 0.633 virsorter2
// GT1_27380 0.180 virsorter2
// GT1_1 0.840 virsorter2
// GT1_396 0.567 virsorter2
// GT1_2577 nan virsorter2
// GT1_5431 nan virsorter2
// GT1_5765 nan virsorter2
// GT1_7346 nan virsorter2
//sed -i 's/nan/0/g' virsorter2_test_nan.txt

0 comments on commit 79faf1a

Please sign in to comment.