Skip to content

Commit

Permalink
add version info for each classification tool
Browse files Browse the repository at this point in the history
  • Loading branch information
scanon committed Jan 30, 2023
1 parent 3defae7 commit f4bdd33
Show file tree
Hide file tree
Showing 4 changed files with 100 additions and 17 deletions.
21 changes: 9 additions & 12 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
FROM continuumio/miniconda3:4.8.2
FROM continuumio/miniconda3:latest

LABEL developer="Po-E Li"
LABEL email="[email protected]"
LABEL version="1.0.1"
LABEL version="1.0.4"
LABEL software="nmdc_taxa_profilers"
LABEL tags="metagenome, bioinformatics, NMDC, taxonomy"

Expand All @@ -18,26 +18,23 @@ RUN conda config --add channels conda-forge \

# install gottcha2
RUN conda install minimap2 pandas
RUN wget https://github.com/poeli/GOTTCHA2/archive/2.1.7.tar.gz \
&& tar -xzf 2.1.7.tar.gz \
&& cp GOTTCHA2-2.1.7/*.py /usr/local/bin \
&& rm -rf GOTTCHA2-2.1.7/ 2.1.7.zip
RUN wget https://github.com/poeli/GOTTCHA2/archive/2.1.8.1.tar.gz \
&& tar -xzf 2.1.8.1.tar.gz \
&& cp GOTTCHA2-2.1.8.1/*.py /usr/local/bin \
&& rm -rf GOTTCHA2-2.1.8.1/ 2.1.8.1.zip

# install kraken2
RUN conda install kraken2=2.1.0
RUN conda install kraken2=2.1.2

# install centrifuge
RUN wget https://github.com/DaehwanKimLab/centrifuge/archive/v1.0.4-beta.tar.gz \
&& tar -xzf v1.0.4-beta.tar.gz \
&& cd centrifuge-1.0.4-beta \
&& make install prefix=/usr/local
RUN conda create -n centrifuge centrifuge=1.0.4_beta

# install krona
RUN conda install krona \
&& ktUpdateTaxonomy.sh

# install additional libs
RUN conda install click
RUN conda install pandas click
ADD *.py /opt/conda/bin/

CMD ["/bin/bash"]
76 changes: 74 additions & 2 deletions ReadbasedAnalysis.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ workflow ReadbasedAnalysis {
String? outdir
Boolean? paired = false
String bbtools_container="microbiomedata/bbtools:38.96"
String? docker = "microbiomedata/nmdc_taxa_profilers:1.0.2p1"
String? docker = "microbiomedata/nmdc_taxa_profilers:1.0.4"

call stage {
input:
Expand Down Expand Up @@ -53,6 +53,19 @@ workflow ReadbasedAnalysis {
}
}

call make_info_file {
input: enabled_tools = enabled_tools,
db = db,
docker = docker,
gottcha2_info = profilerGottcha2.info,
gottcha2_report_tsv = profilerGottcha2.report_tsv,
gottcha2_info = profilerGottcha2.info,
centrifuge_report_tsv = profilerCentrifuge.report_tsv,
centrifuge_info = profilerCentrifuge.info,
kraken2_report_tsv = profilerKraken2.report_tsv,
kraken2_info = profilerKraken2.info,
}

call finish_reads {
input:
proj=proj,
Expand Down Expand Up @@ -85,12 +98,14 @@ workflow ReadbasedAnalysis {
File final_kraken2_report_tsv = finish_reads.kr_report_tsv
File final_kraken2_krona_html = finish_reads.kr_krona_html
File reads_objects = finish_reads.objects
File? info_file = make_info_file.profiler_info
String? info = make_info_file.profiler_info_text
}

meta {
author: "Po-E Li, B10, LANL"
email: "[email protected]"
version: "1.0.2"
version: "1.0.4"
}
}

Expand Down Expand Up @@ -245,3 +260,60 @@ task make_outputs{
}
}
task make_info_file {
Map[String, Boolean] enabled_tools
Map[String, String] db
String? docker
File? gottcha2_report_tsv
File? gottcha2_info
File? centrifuge_report_tsv
File? centrifuge_info
File? kraken2_report_tsv
File? kraken2_info
String info_filename = "profiler.info"

command <<<
set -euo pipefail

# generate output info file

info_text="Taxonomy profiling tools and databases used: "
echo $info_text > ${info_filename}

if [[ ${enabled_tools['kraken2']} == true ]]
then
software_ver=`cat ${kraken2_info}`
#db_ver=`echo "${db['kraken2']}" | rev | cut -d'/' -f 1 | rev`
db_ver=`cat ${db['kraken2']}/db_ver.info`
info_text="Kraken2 v$software_ver (database version: $db_ver)"
echo $info_text >> ${info_filename}
fi

if [[ ${enabled_tools['centrifuge']} == true ]]
then
software_ver=`cat ${centrifuge_info}`
db_ver=`cat $(dirname ${db['centrifuge']})/db_ver.info`
info_text="Centrifuge v$software_ver (database version: $db_ver)"
echo $info_text >> ${info_filename}
fi

if [[ ${enabled_tools['gottcha2']} == true ]]
then
software_ver=`cat ${gottcha2_info}`
db_ver=`cat ${db['gottcha2']}/db_ver.info`
info_text="Gottcha2 v$software_ver (database version: $db_ver)"
echo $info_text >> ${info_filename}
fi
>>>

output {
File profiler_info = "${info_filename}"
String profiler_info_text = read_string("${info_filename}")
}
runtime {
memory: "2G"
cpu: 1
maxRetries: 1
}
}
16 changes: 16 additions & 0 deletions ReadbasedAnalysisTasks.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ task profilerGottcha2 {

command <<<
set -euo pipefail
. /opt/conda/etc/profile.d/conda.sh
conda activate gottcha2

gottcha2.py -r ${RELABD_COL} \
-i ${sep=' ' READS} \
Expand All @@ -17,11 +19,14 @@ task profilerGottcha2 {
--database ${DB}

grep "^species" ${PREFIX}.tsv | ktImportTaxonomy -t 3 -m 9 -o ${PREFIX}.krona.html - || true

gottcha2.py --version > ${PREFIX}.info
>>>
output {
File report_tsv = "${PREFIX}.tsv"
File full_tsv = "${PREFIX}.full.tsv"
File krona_html = "${PREFIX}.krona.html"
File info = "${PREFIX}.info"
}
runtime {
docker: DOCKER
Expand All @@ -46,6 +51,8 @@ task profilerCentrifuge {

command <<<
set -euo pipefail
. /opt/conda/etc/profile.d/conda.sh
conda activate centrifuge

centrifuge -x ${DB} \
-p ${CPU} \
Expand All @@ -54,11 +61,14 @@ task profilerCentrifuge {
--report-file ${PREFIX}.report.tsv

ktImportTaxonomy -m 5 -t 2 -o ${PREFIX}.krona.html ${PREFIX}.report.tsv

centrifuge --version | head -1 | cut -d ' ' -f3 > ${PREFIX}.info
>>>
output {
File classification_tsv="${PREFIX}.classification.tsv"
File report_tsv="${PREFIX}.report.tsv"
File krona_html="${PREFIX}.krona.html"
File info = "${PREFIX}.info"
}
runtime {
docker: DOCKER
Expand All @@ -84,20 +94,26 @@ task profilerKraken2 {

command <<<
set -euo pipefail
. /opt/conda/etc/profile.d/conda.sh
conda activate kraken2

kraken2 ${true="--paired" false='' PAIRED} \
--threads ${CPU} \
--db ${DB} \
--output ${PREFIX}.classification.tsv \
--report ${PREFIX}.report.tsv \
${sep=' ' READS}
conda deactivate

ktImportTaxonomy -m 3 -t 5 -o ${PREFIX}.krona.html ${PREFIX}.report.tsv

kraken2 --version | head -1 | cut -d ' ' -f3 > ${PREFIX}.info
>>>
output {
File classification_tsv = "${PREFIX}.classification.tsv"
File report_tsv = "${PREFIX}.report.tsv"
File krona_html = "${PREFIX}.krona.html"
File info = "${PREFIX}.info"
}
runtime {
docker: DOCKER
Expand Down
4 changes: 1 addition & 3 deletions outputTsv2json.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,13 @@
import os
import json
import pandas as pd
import numpy as np
import click

@click.command()
@click.option('--meta', type=click.File('r'), help='JSON of the metadata of output files')

def output2json(meta):
""" Simple converter that takes TSV files to generate a summary JSON. """
df = pd.DataFrame()
out_dict = {}

tsvfile_lod = json.load(meta)
Expand All @@ -21,6 +19,7 @@ def output2json(meta):
tool = tsvmeta['tool']
idx_col = 'taxRank'
read_cnt_col = 'numReads'
df = pd.DataFrame()

result = {
'classifiedReadCount': 0,
Expand All @@ -33,7 +32,6 @@ def output2json(meta):
def reduceDf(df, cols, ranks=['species','genus','family'], top=10):
"""
Report top # rows of ranks respectively and return a dict
df: results in dataframe
cols: (rnk_col, name_col, read_count_col, abu_col, taxid_col)
"""
Expand Down

0 comments on commit f4bdd33

Please sign in to comment.