From edd30276fe5b7575b294619702afcd8e5cfc05af Mon Sep 17 00:00:00 2001 From: Danny Park Date: Mon, 25 Nov 2024 16:29:01 -0500 Subject: [PATCH] initial downsample work --- pipes/WDL/tasks/tasks_assembly.wdl | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/pipes/WDL/tasks/tasks_assembly.wdl b/pipes/WDL/tasks/tasks_assembly.wdl index 7d577bd4b..060f0356d 100644 --- a/pipes/WDL/tasks/tasks_assembly.wdl +++ b/pipes/WDL/tasks/tasks_assembly.wdl @@ -718,6 +718,7 @@ task refine_assembly_with_aligned_reads { Boolean mark_duplicates = false Float major_cutoff = 0.5 Int min_coverage = 3 + Int? max_coverage Int machine_mem_gb = 15 String docker = "quay.io/broadinstitute/viral-assemble:2.3.6.1" @@ -757,9 +758,20 @@ task refine_assembly_with_aligned_reads { assembly.py --version | tee VERSION + if [[ ~{default="0" max_coverage} -gt 0 ]]; then + rasusa aln -O sam \ + --step-size 200 \ + --coverage ~{max_coverage} \ + "~{reads_aligned_bam}" \ + | samtools sort -o temp_maxcov.bam -@ $(nproc) -l 1 + else + ln -s "~{reads_aligned_bam}" temp_maxcov.bam + fi + samtools index -@ $(nproc) temp_maxcov.bam temp_maxcov.bai + if [ ~{true='true' false='false' mark_duplicates} == "true" ]; then read_utils.py mkdup_picard \ - ~{reads_aligned_bam} \ + temp_maxcov.bam \ temp_markdup.bam \ --JVMmemory "$mem_in_mb"m \ --loglevel=DEBUG