From a013ef1c2b1334b1ba36b7e71b3dfaf3db117cb3 Mon Sep 17 00:00:00 2001 From: Shane Canon Date: Tue, 1 Jun 2021 17:52:48 -0700 Subject: [PATCH] adjust outputs - Have outputs written in the execution area - Temporarily disabled the JSON output. Needs to be fixed. --- ReadbasedAnalysis.wdl | 78 +++++++++++++++++++++++++++++++------- ReadbasedAnalysisTasks.wdl | 63 ++++++++++-------------------- 2 files changed, 83 insertions(+), 58 deletions(-) diff --git a/ReadbasedAnalysis.wdl b/ReadbasedAnalysis.wdl index bca79f0..1de443e 100644 --- a/ReadbasedAnalysis.wdl +++ b/ReadbasedAnalysis.wdl @@ -15,7 +15,6 @@ workflow ReadbasedAnalysis { input: READS = reads, DB = db["gottcha2"], PREFIX = prefix, - OUTPATH = outdir+"/gottcha2", CPU = cpu, DOCKER = docker } @@ -27,7 +26,6 @@ workflow ReadbasedAnalysis { PAIRED = paired, DB = db["kraken2"], PREFIX = prefix, - OUTPATH = outdir+"/kraken2", CPU = cpu, DOCKER = docker } @@ -38,26 +36,42 @@ workflow ReadbasedAnalysis { input: READS = reads, DB = db["centrifuge"], PREFIX = prefix, - OUTPATH = outdir+"/centrifuge", CPU = cpu, DOCKER = docker } } - call tasks.generateSummaryJson { - input: TSV_META_JSON = [profilerGottcha2.results, profilerCentrifuge.results, profilerKraken2.results], - PREFIX = prefix, - OUTPATH = outdir, - DOCKER = docker +# call tasks.generateSummaryJson { +# input: TSV_META_JSON = [profilerGottcha2.results, profilerCentrifuge.results, profilerKraken2.results], +# PREFIX = prefix, +# OUTPATH = outdir, +# DOCKER = docker +# } + call make_outputs { + input: gottcha2_report_tsv = profilerGottcha2.report_tsv, + gottcha2_full_tsv = profilerGottcha2.full_tsv, + gottcha2_krona_html = profilerGottcha2.krona_html, + centrifuge_classification_tsv = profilerCentrifuge.classification_tsv, + centrifuge_report_tsv = profilerCentrifuge.report_tsv, + centrifuge_krona_html = profilerCentrifuge.krona_html, + kraken2_classification_tsv = profilerKraken2.classification_tsv, + kraken2_report_tsv = profilerKraken2.report_tsv, + kraken2_krona_html = profilerKraken2.krona_html, + outdir = outdir, + container = docker } output { - Map[String, Map[String, String]?] results = { - "gottcha2": profilerGottcha2.results, - "centrifuge": profilerCentrifuge.results, - "kraken2": profilerKraken2.results - } - File summary_json = generateSummaryJson.summary_json + File? gottcha2_report_tsv = profilerGottcha2.report_tsv + File? gottcha2_full_tsv = profilerGottcha2.full_tsv + File? gottcha2_krona_html = profilerGottcha2.krona_html + File? centrifuge_classification_tsv = profilerCentrifuge.classification_tsv + File? centrifuge_report_tsv = profilerCentrifuge.report_tsv + File? centrifuge_krona_html = profilerCentrifuge.krona_html + File? kraken2_classification_tsv = profilerKraken2.classification_tsv + File? kraken2_report_tsv = profilerKraken2.report_tsv + File? kraken2_krona_html = profilerKraken2.krona_html +# File summary_json = generateSummaryJson.summary_json } meta { @@ -66,3 +80,39 @@ workflow ReadbasedAnalysis { version: "1.0.2" } } + + +task make_outputs{ + String outdir + File? gottcha2_report_tsv + File? gottcha2_full_tsv + File? gottcha2_krona_html + File? centrifuge_classification_tsv + File? centrifuge_report_tsv + File? centrifuge_krona_html + File? kraken2_classification_tsv + File? kraken2_report_tsv + File? kraken2_krona_html + String container + + command<<< + mkdir -p ${outdir}/gottcha2 + cp ${gottcha2_report_tsv} ${gottcha2_full_tsv} ${gottcha2_krona_html} \ + ${outdir}/gottcha2 + mkdir -p ${outdir}/centrifuge + cp ${centrifuge_classification_tsv} ${centrifuge_report_tsv} ${centrifuge_krona_html} \ + ${outdir}/centrifuge + mkdir -p ${outdir}/kraken2 + cp ${kraken2_classification_tsv} ${kraken2_report_tsv} ${kraken2_krona_html} \ + ${outdir}/kraken2 + >>> + runtime { + docker: container + memory: "1 GiB" + cpu: 1 + } + output{ + Array[String] fastq_files = glob("${outdir}/*.fastq*") + } +} + diff --git a/ReadbasedAnalysisTasks.wdl b/ReadbasedAnalysisTasks.wdl index 10996dd..e745e17 100644 --- a/ReadbasedAnalysisTasks.wdl +++ b/ReadbasedAnalysisTasks.wdl @@ -1,7 +1,6 @@ task profilerGottcha2 { Array[File] READS String DB - String OUTPATH String PREFIX String? RELABD_COL = "ROLLUP_DOC" String DOCKER @@ -9,35 +8,28 @@ task profilerGottcha2 { command <<< set -euo pipefail - mkdir -p ${OUTPATH} - touch ${OUTPATH}/${PREFIX}.full.tsv gottcha2.py -r ${RELABD_COL} \ -i ${sep=' ' READS} \ -t ${CPU} \ - -o ${OUTPATH} \ + -o . \ -p ${PREFIX} \ --database ${DB} - grep "^species" ${OUTPATH}/${PREFIX}.tsv | ktImportTaxonomy -t 3 -m 9 -o ${OUTPATH}/${PREFIX}.krona.html - || true + grep "^species" ${PREFIX}.tsv | ktImportTaxonomy -t 3 -m 9 -o ${PREFIX}.krona.html - || true >>> output { - Map[String, String] results = { - "tool": "gottcha2", - "orig_out_tsv": "${OUTPATH}/${PREFIX}.full.tsv", - "orig_rep_tsv": "${OUTPATH}/${PREFIX}.tsv", - "krona_html": "${OUTPATH}/${PREFIX}.krona.html" - } + File report_tsv = "${PREFIX}.tsv" + File full_tsv = "${PREFIX}.full.tsv" + File krona_html = "${PREFIX}.krona.html" } runtime { docker: DOCKER cpu: CPU - poolname: "readbaseanalysis-pool" node: 1 nwpn: 1 mem: "45G" time: "04:00:00" - shared: 0 } meta { author: "Po-E Li, B10, LANL" @@ -48,40 +40,33 @@ task profilerGottcha2 { task profilerCentrifuge { Array[File] READS String DB - String OUTPATH String PREFIX Int? CPU = 4 String DOCKER command <<< set -euo pipefail - mkdir -p ${OUTPATH} centrifuge -x ${DB} \ -p ${CPU} \ -U ${sep=',' READS} \ - -S ${OUTPATH}/${PREFIX}.classification.tsv \ - --report-file ${OUTPATH}/${PREFIX}.report.tsv + -S ${PREFIX}.classification.tsv \ + --report-file ${PREFIX}.report.tsv - ktImportTaxonomy -m 5 -t 2 -o ${OUTPATH}/${PREFIX}.krona.html ${OUTPATH}/${PREFIX}.report.tsv + ktImportTaxonomy -m 5 -t 2 -o ${PREFIX}.krona.html ${PREFIX}.report.tsv >>> output { - Map[String, String] results = { - "tool": "centrifuge", - "orig_out_tsv": "${OUTPATH}/${PREFIX}.classification.tsv", - "orig_rep_tsv": "${OUTPATH}/${PREFIX}.report.tsv", - "krona_html": "${OUTPATH}/${PREFIX}.krona.html" - } + File classification_tsv="${PREFIX}.classification.tsv" + File report_tsv="${PREFIX}.report.tsv" + File krona_html="${PREFIX}.krona.html" } runtime { docker: DOCKER cpu: CPU - poolname: "readbaseanalysis-pool" node: 1 nwpn: 1 mem: "45G" time: "04:00:00" - shared: 0 } meta { author: "Po-E Li, B10, LANL" @@ -92,7 +77,6 @@ task profilerCentrifuge { task profilerKraken2 { Array[File] READS String DB - String OUTPATH String PREFIX Boolean? PAIRED = false Int? CPU = 4 @@ -100,34 +84,28 @@ task profilerKraken2 { command <<< set -euo pipefail - mkdir -p ${OUTPATH} kraken2 ${true="--paired" false='' PAIRED} \ --threads ${CPU} \ --db ${DB} \ - --output ${OUTPATH}/${PREFIX}.classification.tsv \ - --report ${OUTPATH}/${PREFIX}.report.tsv \ + --output ${PREFIX}.classification.tsv \ + --report ${PREFIX}.report.tsv \ ${sep=' ' READS} - ktImportTaxonomy -m 3 -t 5 -o ${OUTPATH}/${PREFIX}.krona.html ${OUTPATH}/${PREFIX}.report.tsv + ktImportTaxonomy -m 3 -t 5 -o ${PREFIX}.krona.html ${PREFIX}.report.tsv >>> output { - Map[String, String] results = { - "tool": "kraken2", - "orig_out_tsv": "${OUTPATH}/${PREFIX}.classification.tsv", - "orig_rep_tsv": "${OUTPATH}/${PREFIX}.report.tsv", - "krona_html": "${OUTPATH}/${PREFIX}.krona.html" - } + File classification_tsv = "${PREFIX}.classification.tsv" + File report_tsv = "${PREFIX}.report.tsv" + File krona_html = "${PREFIX}.krona.html" } runtime { docker: DOCKER cpu: CPU - poolname: "readbaseanalysis-pool" node: 1 nwpn: 1 mem: "45G" time: "04:00:00" - shared: 0 } meta { author: "Po-E Li, B10, LANL" @@ -137,24 +115,21 @@ task profilerKraken2 { task generateSummaryJson { Array[Map[String, String]?] TSV_META_JSON - String OUTPATH String PREFIX String DOCKER command { - outputTsv2json.py --meta ${write_json(TSV_META_JSON)} > ${OUTPATH}/${PREFIX}.json + outputTsv2json.py --meta ${write_json(TSV_META_JSON)} > ${PREFIX}.json } output { - File summary_json = "${OUTPATH}/${PREFIX}.json" + File summary_json = "${PREFIX}.json" } runtime { docker: DOCKER - poolname: "readbaseanalysis-pool" node: 1 nwpn: 1 mem: "45G" time: "04:00:00" - shared: 0 } meta { author: "Po-E Li, B10, LANL"