Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP: adjust outputs #12

Merged
merged 1 commit into from
Jun 2, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
78 changes: 64 additions & 14 deletions ReadbasedAnalysis.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ workflow ReadbasedAnalysis {
input: READS = reads,
DB = db["gottcha2"],
PREFIX = prefix,
OUTPATH = outdir+"/gottcha2",
CPU = cpu,
DOCKER = docker
}
Expand All @@ -27,7 +26,6 @@ workflow ReadbasedAnalysis {
PAIRED = paired,
DB = db["kraken2"],
PREFIX = prefix,
OUTPATH = outdir+"/kraken2",
CPU = cpu,
DOCKER = docker
}
Expand All @@ -38,26 +36,42 @@ workflow ReadbasedAnalysis {
input: READS = reads,
DB = db["centrifuge"],
PREFIX = prefix,
OUTPATH = outdir+"/centrifuge",
CPU = cpu,
DOCKER = docker
}
}

call tasks.generateSummaryJson {
input: TSV_META_JSON = [profilerGottcha2.results, profilerCentrifuge.results, profilerKraken2.results],
PREFIX = prefix,
OUTPATH = outdir,
DOCKER = docker
# call tasks.generateSummaryJson {
# input: TSV_META_JSON = [profilerGottcha2.results, profilerCentrifuge.results, profilerKraken2.results],
# PREFIX = prefix,
# OUTPATH = outdir,
# DOCKER = docker
# }
call make_outputs {
input: gottcha2_report_tsv = profilerGottcha2.report_tsv,
gottcha2_full_tsv = profilerGottcha2.full_tsv,
gottcha2_krona_html = profilerGottcha2.krona_html,
centrifuge_classification_tsv = profilerCentrifuge.classification_tsv,
centrifuge_report_tsv = profilerCentrifuge.report_tsv,
centrifuge_krona_html = profilerCentrifuge.krona_html,
kraken2_classification_tsv = profilerKraken2.classification_tsv,
kraken2_report_tsv = profilerKraken2.report_tsv,
kraken2_krona_html = profilerKraken2.krona_html,
outdir = outdir,
container = docker
}

output {
Map[String, Map[String, String]?] results = {
"gottcha2": profilerGottcha2.results,
"centrifuge": profilerCentrifuge.results,
"kraken2": profilerKraken2.results
}
File summary_json = generateSummaryJson.summary_json
File? gottcha2_report_tsv = profilerGottcha2.report_tsv
File? gottcha2_full_tsv = profilerGottcha2.full_tsv
File? gottcha2_krona_html = profilerGottcha2.krona_html
File? centrifuge_classification_tsv = profilerCentrifuge.classification_tsv
File? centrifuge_report_tsv = profilerCentrifuge.report_tsv
File? centrifuge_krona_html = profilerCentrifuge.krona_html
File? kraken2_classification_tsv = profilerKraken2.classification_tsv
File? kraken2_report_tsv = profilerKraken2.report_tsv
File? kraken2_krona_html = profilerKraken2.krona_html
# File summary_json = generateSummaryJson.summary_json
}

meta {
Expand All @@ -66,3 +80,39 @@ workflow ReadbasedAnalysis {
version: "1.0.2"
}
}


task make_outputs{
String outdir
File? gottcha2_report_tsv
File? gottcha2_full_tsv
File? gottcha2_krona_html
File? centrifuge_classification_tsv
File? centrifuge_report_tsv
File? centrifuge_krona_html
File? kraken2_classification_tsv
File? kraken2_report_tsv
File? kraken2_krona_html
String container

command<<<
mkdir -p ${outdir}/gottcha2
cp ${gottcha2_report_tsv} ${gottcha2_full_tsv} ${gottcha2_krona_html} \
${outdir}/gottcha2
mkdir -p ${outdir}/centrifuge
cp ${centrifuge_classification_tsv} ${centrifuge_report_tsv} ${centrifuge_krona_html} \
${outdir}/centrifuge
mkdir -p ${outdir}/kraken2
cp ${kraken2_classification_tsv} ${kraken2_report_tsv} ${kraken2_krona_html} \
${outdir}/kraken2
>>>
runtime {
docker: container
memory: "1 GiB"
cpu: 1
}
output{
Array[String] fastq_files = glob("${outdir}/*.fastq*")
}
}

63 changes: 19 additions & 44 deletions ReadbasedAnalysisTasks.wdl
Original file line number Diff line number Diff line change
@@ -1,43 +1,35 @@
task profilerGottcha2 {
Array[File] READS
String DB
String OUTPATH
String PREFIX
String? RELABD_COL = "ROLLUP_DOC"
String DOCKER
Int? CPU = 4

command <<<
set -euo pipefail
mkdir -p ${OUTPATH}
touch ${OUTPATH}/${PREFIX}.full.tsv

gottcha2.py -r ${RELABD_COL} \
-i ${sep=' ' READS} \
-t ${CPU} \
-o ${OUTPATH} \
-o . \
-p ${PREFIX} \
--database ${DB}

grep "^species" ${OUTPATH}/${PREFIX}.tsv | ktImportTaxonomy -t 3 -m 9 -o ${OUTPATH}/${PREFIX}.krona.html - || true
grep "^species" ${PREFIX}.tsv | ktImportTaxonomy -t 3 -m 9 -o ${PREFIX}.krona.html - || true
>>>
output {
Map[String, String] results = {
"tool": "gottcha2",
"orig_out_tsv": "${OUTPATH}/${PREFIX}.full.tsv",
"orig_rep_tsv": "${OUTPATH}/${PREFIX}.tsv",
"krona_html": "${OUTPATH}/${PREFIX}.krona.html"
}
File report_tsv = "${PREFIX}.tsv"
File full_tsv = "${PREFIX}.full.tsv"
File krona_html = "${PREFIX}.krona.html"
}
runtime {
docker: DOCKER
cpu: CPU
poolname: "readbaseanalysis-pool"
node: 1
nwpn: 1
mem: "45G"
time: "04:00:00"
shared: 0
}
meta {
author: "Po-E Li, B10, LANL"
Expand All @@ -48,40 +40,33 @@ task profilerGottcha2 {
task profilerCentrifuge {
Array[File] READS
String DB
String OUTPATH
String PREFIX
Int? CPU = 4
String DOCKER

command <<<
set -euo pipefail
mkdir -p ${OUTPATH}

centrifuge -x ${DB} \
-p ${CPU} \
-U ${sep=',' READS} \
-S ${OUTPATH}/${PREFIX}.classification.tsv \
--report-file ${OUTPATH}/${PREFIX}.report.tsv
-S ${PREFIX}.classification.tsv \
--report-file ${PREFIX}.report.tsv

ktImportTaxonomy -m 5 -t 2 -o ${OUTPATH}/${PREFIX}.krona.html ${OUTPATH}/${PREFIX}.report.tsv
ktImportTaxonomy -m 5 -t 2 -o ${PREFIX}.krona.html ${PREFIX}.report.tsv
>>>
output {
Map[String, String] results = {
"tool": "centrifuge",
"orig_out_tsv": "${OUTPATH}/${PREFIX}.classification.tsv",
"orig_rep_tsv": "${OUTPATH}/${PREFIX}.report.tsv",
"krona_html": "${OUTPATH}/${PREFIX}.krona.html"
}
File classification_tsv="${PREFIX}.classification.tsv"
File report_tsv="${PREFIX}.report.tsv"
File krona_html="${PREFIX}.krona.html"
}
runtime {
docker: DOCKER
cpu: CPU
poolname: "readbaseanalysis-pool"
node: 1
nwpn: 1
mem: "45G"
time: "04:00:00"
shared: 0
}
meta {
author: "Po-E Li, B10, LANL"
Expand All @@ -92,42 +77,35 @@ task profilerCentrifuge {
task profilerKraken2 {
Array[File] READS
String DB
String OUTPATH
String PREFIX
Boolean? PAIRED = false
Int? CPU = 4
String DOCKER

command <<<
set -euo pipefail
mkdir -p ${OUTPATH}

kraken2 ${true="--paired" false='' PAIRED} \
--threads ${CPU} \
--db ${DB} \
--output ${OUTPATH}/${PREFIX}.classification.tsv \
--report ${OUTPATH}/${PREFIX}.report.tsv \
--output ${PREFIX}.classification.tsv \
--report ${PREFIX}.report.tsv \
${sep=' ' READS}

ktImportTaxonomy -m 3 -t 5 -o ${OUTPATH}/${PREFIX}.krona.html ${OUTPATH}/${PREFIX}.report.tsv
ktImportTaxonomy -m 3 -t 5 -o ${PREFIX}.krona.html ${PREFIX}.report.tsv
>>>
output {
Map[String, String] results = {
"tool": "kraken2",
"orig_out_tsv": "${OUTPATH}/${PREFIX}.classification.tsv",
"orig_rep_tsv": "${OUTPATH}/${PREFIX}.report.tsv",
"krona_html": "${OUTPATH}/${PREFIX}.krona.html"
}
File classification_tsv = "${PREFIX}.classification.tsv"
File report_tsv = "${PREFIX}.report.tsv"
File krona_html = "${PREFIX}.krona.html"
}
runtime {
docker: DOCKER
cpu: CPU
poolname: "readbaseanalysis-pool"
node: 1
nwpn: 1
mem: "45G"
time: "04:00:00"
shared: 0
}
meta {
author: "Po-E Li, B10, LANL"
Expand All @@ -137,24 +115,21 @@ task profilerKraken2 {

task generateSummaryJson {
Array[Map[String, String]?] TSV_META_JSON
String OUTPATH
String PREFIX
String DOCKER

command {
outputTsv2json.py --meta ${write_json(TSV_META_JSON)} > ${OUTPATH}/${PREFIX}.json
outputTsv2json.py --meta ${write_json(TSV_META_JSON)} > ${PREFIX}.json
}
output {
File summary_json = "${OUTPATH}/${PREFIX}.json"
File summary_json = "${PREFIX}.json"
}
runtime {
docker: DOCKER
poolname: "readbaseanalysis-pool"
node: 1
nwpn: 1
mem: "45G"
time: "04:00:00"
shared: 0
}
meta {
author: "Po-E Li, B10, LANL"
Expand Down