Skip to content

Commit

Permalink
Merge pull request #12 from microbiomedata/output_tweaks
Browse files Browse the repository at this point in the history
WIP: adjust outputs
  • Loading branch information
poeli authored Jun 2, 2021
2 parents d2e1b65 + a013ef1 commit 4a77c5d
Show file tree
Hide file tree
Showing 2 changed files with 83 additions and 58 deletions.
78 changes: 64 additions & 14 deletions ReadbasedAnalysis.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ workflow ReadbasedAnalysis {
input: READS = reads,
DB = db["gottcha2"],
PREFIX = prefix,
OUTPATH = outdir+"/gottcha2",
CPU = cpu,
DOCKER = docker
}
Expand All @@ -27,7 +26,6 @@ workflow ReadbasedAnalysis {
PAIRED = paired,
DB = db["kraken2"],
PREFIX = prefix,
OUTPATH = outdir+"/kraken2",
CPU = cpu,
DOCKER = docker
}
Expand All @@ -38,26 +36,42 @@ workflow ReadbasedAnalysis {
input: READS = reads,
DB = db["centrifuge"],
PREFIX = prefix,
OUTPATH = outdir+"/centrifuge",
CPU = cpu,
DOCKER = docker
}
}

call tasks.generateSummaryJson {
input: TSV_META_JSON = [profilerGottcha2.results, profilerCentrifuge.results, profilerKraken2.results],
PREFIX = prefix,
OUTPATH = outdir,
DOCKER = docker
# call tasks.generateSummaryJson {
# input: TSV_META_JSON = [profilerGottcha2.results, profilerCentrifuge.results, profilerKraken2.results],
# PREFIX = prefix,
# OUTPATH = outdir,
# DOCKER = docker
# }
call make_outputs {
input: gottcha2_report_tsv = profilerGottcha2.report_tsv,
gottcha2_full_tsv = profilerGottcha2.full_tsv,
gottcha2_krona_html = profilerGottcha2.krona_html,
centrifuge_classification_tsv = profilerCentrifuge.classification_tsv,
centrifuge_report_tsv = profilerCentrifuge.report_tsv,
centrifuge_krona_html = profilerCentrifuge.krona_html,
kraken2_classification_tsv = profilerKraken2.classification_tsv,
kraken2_report_tsv = profilerKraken2.report_tsv,
kraken2_krona_html = profilerKraken2.krona_html,
outdir = outdir,
container = docker
}

output {
Map[String, Map[String, String]?] results = {
"gottcha2": profilerGottcha2.results,
"centrifuge": profilerCentrifuge.results,
"kraken2": profilerKraken2.results
}
File summary_json = generateSummaryJson.summary_json
File? gottcha2_report_tsv = profilerGottcha2.report_tsv
File? gottcha2_full_tsv = profilerGottcha2.full_tsv
File? gottcha2_krona_html = profilerGottcha2.krona_html
File? centrifuge_classification_tsv = profilerCentrifuge.classification_tsv
File? centrifuge_report_tsv = profilerCentrifuge.report_tsv
File? centrifuge_krona_html = profilerCentrifuge.krona_html
File? kraken2_classification_tsv = profilerKraken2.classification_tsv
File? kraken2_report_tsv = profilerKraken2.report_tsv
File? kraken2_krona_html = profilerKraken2.krona_html
# File summary_json = generateSummaryJson.summary_json
}

meta {
Expand All @@ -66,3 +80,39 @@ workflow ReadbasedAnalysis {
version: "1.0.2"
}
}


task make_outputs{
String outdir
File? gottcha2_report_tsv
File? gottcha2_full_tsv
File? gottcha2_krona_html
File? centrifuge_classification_tsv
File? centrifuge_report_tsv
File? centrifuge_krona_html
File? kraken2_classification_tsv
File? kraken2_report_tsv
File? kraken2_krona_html
String container

command<<<
mkdir -p ${outdir}/gottcha2
cp ${gottcha2_report_tsv} ${gottcha2_full_tsv} ${gottcha2_krona_html} \
${outdir}/gottcha2
mkdir -p ${outdir}/centrifuge
cp ${centrifuge_classification_tsv} ${centrifuge_report_tsv} ${centrifuge_krona_html} \
${outdir}/centrifuge
mkdir -p ${outdir}/kraken2
cp ${kraken2_classification_tsv} ${kraken2_report_tsv} ${kraken2_krona_html} \
${outdir}/kraken2
>>>
runtime {
docker: container
memory: "1 GiB"
cpu: 1
}
output{
Array[String] fastq_files = glob("${outdir}/*.fastq*")
}
}
63 changes: 19 additions & 44 deletions ReadbasedAnalysisTasks.wdl
Original file line number Diff line number Diff line change
@@ -1,43 +1,35 @@
task profilerGottcha2 {
Array[File] READS
String DB
String OUTPATH
String PREFIX
String? RELABD_COL = "ROLLUP_DOC"
String DOCKER
Int? CPU = 4

command <<<
set -euo pipefail
mkdir -p ${OUTPATH}
touch ${OUTPATH}/${PREFIX}.full.tsv

gottcha2.py -r ${RELABD_COL} \
-i ${sep=' ' READS} \
-t ${CPU} \
-o ${OUTPATH} \
-o . \
-p ${PREFIX} \
--database ${DB}

grep "^species" ${OUTPATH}/${PREFIX}.tsv | ktImportTaxonomy -t 3 -m 9 -o ${OUTPATH}/${PREFIX}.krona.html - || true
grep "^species" ${PREFIX}.tsv | ktImportTaxonomy -t 3 -m 9 -o ${PREFIX}.krona.html - || true
>>>
output {
Map[String, String] results = {
"tool": "gottcha2",
"orig_out_tsv": "${OUTPATH}/${PREFIX}.full.tsv",
"orig_rep_tsv": "${OUTPATH}/${PREFIX}.tsv",
"krona_html": "${OUTPATH}/${PREFIX}.krona.html"
}
File report_tsv = "${PREFIX}.tsv"
File full_tsv = "${PREFIX}.full.tsv"
File krona_html = "${PREFIX}.krona.html"
}
runtime {
docker: DOCKER
cpu: CPU
poolname: "readbaseanalysis-pool"
node: 1
nwpn: 1
mem: "45G"
time: "04:00:00"
shared: 0
}
meta {
author: "Po-E Li, B10, LANL"
Expand All @@ -48,40 +40,33 @@ task profilerGottcha2 {
task profilerCentrifuge {
Array[File] READS
String DB
String OUTPATH
String PREFIX
Int? CPU = 4
String DOCKER

command <<<
set -euo pipefail
mkdir -p ${OUTPATH}

centrifuge -x ${DB} \
-p ${CPU} \
-U ${sep=',' READS} \
-S ${OUTPATH}/${PREFIX}.classification.tsv \
--report-file ${OUTPATH}/${PREFIX}.report.tsv
-S ${PREFIX}.classification.tsv \
--report-file ${PREFIX}.report.tsv

ktImportTaxonomy -m 5 -t 2 -o ${OUTPATH}/${PREFIX}.krona.html ${OUTPATH}/${PREFIX}.report.tsv
ktImportTaxonomy -m 5 -t 2 -o ${PREFIX}.krona.html ${PREFIX}.report.tsv
>>>
output {
Map[String, String] results = {
"tool": "centrifuge",
"orig_out_tsv": "${OUTPATH}/${PREFIX}.classification.tsv",
"orig_rep_tsv": "${OUTPATH}/${PREFIX}.report.tsv",
"krona_html": "${OUTPATH}/${PREFIX}.krona.html"
}
File classification_tsv="${PREFIX}.classification.tsv"
File report_tsv="${PREFIX}.report.tsv"
File krona_html="${PREFIX}.krona.html"
}
runtime {
docker: DOCKER
cpu: CPU
poolname: "readbaseanalysis-pool"
node: 1
nwpn: 1
mem: "45G"
time: "04:00:00"
shared: 0
}
meta {
author: "Po-E Li, B10, LANL"
Expand All @@ -92,42 +77,35 @@ task profilerCentrifuge {
task profilerKraken2 {
Array[File] READS
String DB
String OUTPATH
String PREFIX
Boolean? PAIRED = false
Int? CPU = 4
String DOCKER

command <<<
set -euo pipefail
mkdir -p ${OUTPATH}

kraken2 ${true="--paired" false='' PAIRED} \
--threads ${CPU} \
--db ${DB} \
--output ${OUTPATH}/${PREFIX}.classification.tsv \
--report ${OUTPATH}/${PREFIX}.report.tsv \
--output ${PREFIX}.classification.tsv \
--report ${PREFIX}.report.tsv \
${sep=' ' READS}

ktImportTaxonomy -m 3 -t 5 -o ${OUTPATH}/${PREFIX}.krona.html ${OUTPATH}/${PREFIX}.report.tsv
ktImportTaxonomy -m 3 -t 5 -o ${PREFIX}.krona.html ${PREFIX}.report.tsv
>>>
output {
Map[String, String] results = {
"tool": "kraken2",
"orig_out_tsv": "${OUTPATH}/${PREFIX}.classification.tsv",
"orig_rep_tsv": "${OUTPATH}/${PREFIX}.report.tsv",
"krona_html": "${OUTPATH}/${PREFIX}.krona.html"
}
File classification_tsv = "${PREFIX}.classification.tsv"
File report_tsv = "${PREFIX}.report.tsv"
File krona_html = "${PREFIX}.krona.html"
}
runtime {
docker: DOCKER
cpu: CPU
poolname: "readbaseanalysis-pool"
node: 1
nwpn: 1
mem: "45G"
time: "04:00:00"
shared: 0
}
meta {
author: "Po-E Li, B10, LANL"
Expand All @@ -137,24 +115,21 @@ task profilerKraken2 {

task generateSummaryJson {
Array[Map[String, String]?] TSV_META_JSON
String OUTPATH
String PREFIX
String DOCKER

command {
outputTsv2json.py --meta ${write_json(TSV_META_JSON)} > ${OUTPATH}/${PREFIX}.json
outputTsv2json.py --meta ${write_json(TSV_META_JSON)} > ${PREFIX}.json
}
output {
File summary_json = "${OUTPATH}/${PREFIX}.json"
File summary_json = "${PREFIX}.json"
}
runtime {
docker: DOCKER
poolname: "readbaseanalysis-pool"
node: 1
nwpn: 1
mem: "45G"
time: "04:00:00"
shared: 0
}
meta {
author: "Po-E Li, B10, LANL"
Expand Down

0 comments on commit 4a77c5d

Please sign in to comment.