Merge pull request #12 from microbiomedata/output_tweaks

WIP: adjust outputs
microbiomedata · Jun 2, 2021 · 4a77c5d · 4a77c5d
2 parents d2e1b65 + a013ef1
commit 4a77c5d
Show file tree

Hide file tree

Showing 2 changed files with 83 additions and 58 deletions.
diff --git a/ReadbasedAnalysis.wdl b/ReadbasedAnalysis.wdl
@@ -15,7 +15,6 @@ workflow ReadbasedAnalysis {
             input: READS = reads,
                    DB = db["gottcha2"],
                    PREFIX = prefix,
-                   OUTPATH = outdir+"/gottcha2",
                    CPU = cpu,
                    DOCKER = docker
         }
@@ -27,7 +26,6 @@ workflow ReadbasedAnalysis {
                    PAIRED = paired,
                    DB = db["kraken2"],
                    PREFIX = prefix,
-                   OUTPATH = outdir+"/kraken2",
                    CPU = cpu,
                    DOCKER = docker
         }
@@ -38,26 +36,42 @@ workflow ReadbasedAnalysis {
             input: READS = reads,
                    DB = db["centrifuge"],
                    PREFIX = prefix,
-                   OUTPATH = outdir+"/centrifuge",
                    CPU = cpu,
                    DOCKER = docker
         }
     }
 
-    call tasks.generateSummaryJson {
-        input: TSV_META_JSON = [profilerGottcha2.results, profilerCentrifuge.results, profilerKraken2.results],
-               PREFIX = prefix,
-               OUTPATH = outdir,
-               DOCKER = docker
+#    call tasks.generateSummaryJson {
+#        input: TSV_META_JSON = [profilerGottcha2.results, profilerCentrifuge.results, profilerKraken2.results],
+#               PREFIX = prefix,
+#               OUTPATH = outdir,
+#               DOCKER = docker
+#    }
+    call make_outputs {
+        input: gottcha2_report_tsv = profilerGottcha2.report_tsv,
+               gottcha2_full_tsv = profilerGottcha2.full_tsv,
+               gottcha2_krona_html = profilerGottcha2.krona_html,
+               centrifuge_classification_tsv = profilerCentrifuge.classification_tsv,
+               centrifuge_report_tsv = profilerCentrifuge.report_tsv,
+               centrifuge_krona_html = profilerCentrifuge.krona_html,
+               kraken2_classification_tsv = profilerKraken2.classification_tsv,
+               kraken2_report_tsv = profilerKraken2.report_tsv,
+               kraken2_krona_html = profilerKraken2.krona_html,
+               outdir = outdir,
+               container = docker
     }
 
     output {
-        Map[String, Map[String, String]?] results = {
-            "gottcha2": profilerGottcha2.results,
-            "centrifuge": profilerCentrifuge.results,
-            "kraken2": profilerKraken2.results
-        }
-        File summary_json = generateSummaryJson.summary_json
+        File? gottcha2_report_tsv = profilerGottcha2.report_tsv
+        File? gottcha2_full_tsv = profilerGottcha2.full_tsv
+        File? gottcha2_krona_html = profilerGottcha2.krona_html
+        File? centrifuge_classification_tsv = profilerCentrifuge.classification_tsv
+        File? centrifuge_report_tsv = profilerCentrifuge.report_tsv
+        File? centrifuge_krona_html = profilerCentrifuge.krona_html
+        File? kraken2_classification_tsv = profilerKraken2.classification_tsv
+        File? kraken2_report_tsv = profilerKraken2.report_tsv
+        File? kraken2_krona_html = profilerKraken2.krona_html
+#        File summary_json = generateSummaryJson.summary_json
     }
 
     meta {
@@ -66,3 +80,39 @@ workflow ReadbasedAnalysis {
         version: "1.0.2"
     }
 }
+
+
+task make_outputs{
+    String outdir
+    File? gottcha2_report_tsv
+    File? gottcha2_full_tsv
+    File? gottcha2_krona_html
+    File? centrifuge_classification_tsv
+    File? centrifuge_report_tsv
+    File? centrifuge_krona_html
+    File? kraken2_classification_tsv
+    File? kraken2_report_tsv
+    File? kraken2_krona_html
+    String container
+
+    command<<<
+        mkdir -p ${outdir}/gottcha2
+        cp ${gottcha2_report_tsv} ${gottcha2_full_tsv} ${gottcha2_krona_html} \
+           ${outdir}/gottcha2
+        mkdir -p ${outdir}/centrifuge
+        cp ${centrifuge_classification_tsv} ${centrifuge_report_tsv} ${centrifuge_krona_html} \
+           ${outdir}/centrifuge
+        mkdir -p ${outdir}/kraken2
+        cp ${kraken2_classification_tsv} ${kraken2_report_tsv} ${kraken2_krona_html} \
+           ${outdir}/kraken2
+    >>>
+    runtime {
+        docker: container
+        memory: "1 GiB"
+        cpu:  1
+    }
+    output{
+        Array[String] fastq_files = glob("${outdir}/*.fastq*")
+    }
+}
+
diff --git a/ReadbasedAnalysisTasks.wdl b/ReadbasedAnalysisTasks.wdl
@@ -1,43 +1,35 @@
 task profilerGottcha2 {
     Array[File] READS
     String DB
-    String OUTPATH
     String PREFIX
     String? RELABD_COL = "ROLLUP_DOC"
     String DOCKER
     Int? CPU = 4
 
     command <<<
         set -euo pipefail
-        mkdir -p ${OUTPATH}
-        touch ${OUTPATH}/${PREFIX}.full.tsv
 
         gottcha2.py -r ${RELABD_COL} \
                     -i ${sep=' ' READS} \
                     -t ${CPU} \
-                    -o ${OUTPATH} \
+                    -o . \
                     -p ${PREFIX} \
                     --database ${DB}
 
-        grep "^species" ${OUTPATH}/${PREFIX}.tsv | ktImportTaxonomy -t 3 -m 9 -o ${OUTPATH}/${PREFIX}.krona.html - || true
+        grep "^species" ${PREFIX}.tsv | ktImportTaxonomy -t 3 -m 9 -o ${PREFIX}.krona.html - || true
     >>>
     output {
-        Map[String, String] results = {
-            "tool": "gottcha2",
-            "orig_out_tsv": "${OUTPATH}/${PREFIX}.full.tsv",
-            "orig_rep_tsv": "${OUTPATH}/${PREFIX}.tsv",
-            "krona_html": "${OUTPATH}/${PREFIX}.krona.html"
-        }
+        File report_tsv = "${PREFIX}.tsv"
+        File full_tsv = "${PREFIX}.full.tsv"
+        File krona_html = "${PREFIX}.krona.html"
     }
     runtime {
         docker: DOCKER
         cpu: CPU
-        poolname: "readbaseanalysis-pool"
         node: 1
         nwpn: 1
         mem: "45G"
         time: "04:00:00"
-        shared: 0
     }
     meta {
         author: "Po-E Li, B10, LANL"
@@ -48,40 +40,33 @@ task profilerGottcha2 {
 task profilerCentrifuge {
     Array[File] READS
     String DB
-    String OUTPATH
     String PREFIX
     Int? CPU = 4
     String DOCKER
 
     command <<<
         set -euo pipefail
-        mkdir -p ${OUTPATH}
 
         centrifuge -x ${DB} \
                    -p ${CPU} \
                    -U ${sep=',' READS} \
-                   -S ${OUTPATH}/${PREFIX}.classification.tsv \
-                   --report-file ${OUTPATH}/${PREFIX}.report.tsv
+                   -S ${PREFIX}.classification.tsv \
+                   --report-file ${PREFIX}.report.tsv
 
-        ktImportTaxonomy -m 5 -t 2 -o ${OUTPATH}/${PREFIX}.krona.html ${OUTPATH}/${PREFIX}.report.tsv
+        ktImportTaxonomy -m 5 -t 2 -o ${PREFIX}.krona.html ${PREFIX}.report.tsv
     >>>
     output {
-        Map[String, String] results = {
-            "tool": "centrifuge",
-            "orig_out_tsv": "${OUTPATH}/${PREFIX}.classification.tsv",
-            "orig_rep_tsv": "${OUTPATH}/${PREFIX}.report.tsv",
-            "krona_html": "${OUTPATH}/${PREFIX}.krona.html"
-        }
+      File classification_tsv="${PREFIX}.classification.tsv"
+      File report_tsv="${PREFIX}.report.tsv"
+      File krona_html="${PREFIX}.krona.html"
     }
     runtime {
         docker: DOCKER
         cpu: CPU
-        poolname: "readbaseanalysis-pool"
         node: 1
         nwpn: 1
         mem: "45G"
         time: "04:00:00"
-        shared: 0
     }
     meta {
         author: "Po-E Li, B10, LANL"
@@ -92,42 +77,35 @@ task profilerCentrifuge {
 task profilerKraken2 {
     Array[File] READS
     String DB
-    String OUTPATH
     String PREFIX
     Boolean? PAIRED = false
     Int? CPU = 4
     String DOCKER
 
     command <<<
         set -euo pipefail
-        mkdir -p ${OUTPATH}
 
         kraken2 ${true="--paired" false='' PAIRED} \
                 --threads ${CPU} \
                 --db ${DB} \
-                --output ${OUTPATH}/${PREFIX}.classification.tsv \
-                --report ${OUTPATH}/${PREFIX}.report.tsv \
+                --output ${PREFIX}.classification.tsv \
+                --report ${PREFIX}.report.tsv \
                 ${sep=' ' READS}
 
-        ktImportTaxonomy -m 3 -t 5 -o ${OUTPATH}/${PREFIX}.krona.html ${OUTPATH}/${PREFIX}.report.tsv
+        ktImportTaxonomy -m 3 -t 5 -o ${PREFIX}.krona.html ${PREFIX}.report.tsv
     >>>
     output {
-        Map[String, String] results = {
-            "tool": "kraken2",
-            "orig_out_tsv": "${OUTPATH}/${PREFIX}.classification.tsv",
-            "orig_rep_tsv": "${OUTPATH}/${PREFIX}.report.tsv",
-            "krona_html": "${OUTPATH}/${PREFIX}.krona.html"
-        }
+      File classification_tsv = "${PREFIX}.classification.tsv"
+      File report_tsv = "${PREFIX}.report.tsv"
+      File krona_html = "${PREFIX}.krona.html"
     }
     runtime {
         docker: DOCKER
         cpu: CPU
-        poolname: "readbaseanalysis-pool"
         node: 1
         nwpn: 1
         mem: "45G"
         time: "04:00:00"
-        shared: 0
     }
     meta {
         author: "Po-E Li, B10, LANL"
@@ -137,24 +115,21 @@ task profilerKraken2 {
 
 task generateSummaryJson {
     Array[Map[String, String]?] TSV_META_JSON
-    String OUTPATH
     String PREFIX
     String DOCKER
 
     command {
-        outputTsv2json.py --meta ${write_json(TSV_META_JSON)} > ${OUTPATH}/${PREFIX}.json
+        outputTsv2json.py --meta ${write_json(TSV_META_JSON)} > ${PREFIX}.json
     }
     output {
-        File summary_json = "${OUTPATH}/${PREFIX}.json"
+        File summary_json = "${PREFIX}.json"
     }
     runtime {
         docker: DOCKER
-        poolname: "readbaseanalysis-pool"
         node: 1
         nwpn: 1
         mem: "45G"
         time: "04:00:00"
-        shared: 0
     }
     meta {
         author: "Po-E Li, B10, LANL"