Skip to content

Commit

Permalink
make depletion databases optional for demux_deplete
Browse files Browse the repository at this point in the history
  • Loading branch information
dpark01 committed Aug 19, 2024
1 parent bb3cbe6 commit 9417803
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 13 deletions.
9 changes: 8 additions & 1 deletion pipes/WDL/tasks/tasks_reports.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -456,6 +456,8 @@ task align_and_count {
TOTAL_COUNT_OF_TOP_HIT=$(grep -E "^($TOP_HIT)" "~{reads_basename}.count.~{ref_basename}.txt" | cut -f3 | tee TOTAL_COUNT_OF_TOP_HIT)
TOTAL_COUNT_OF_LESSER_HITS=$((grep -vE "^(\*|$TOP_HIT)" "~{reads_basename}.count.~{ref_basename}.txt" || echo "0" ) | cut -f3 | paste -sd+ - | bc -l | tee TOTAL_COUNT_OF_LESSER_HITS)
echo $TOTAL_COUNT_OF_TOP_HIT | tee TOTAL_COUNT_OF_TOP_HIT
echo $TOTAL_COUNT_OF_LESSER_HITS | tee TOTAL_COUNT_OF_LESSER_HITS
if [ $TOTAL_COUNT_OF_LESSER_HITS -ne 0 -o $TOTAL_COUNT_OF_TOP_HIT -ne 0 ]; then
PCT_MAPPING_TO_LESSER_HITS=$( echo "scale=3; 100 * $TOTAL_COUNT_OF_LESSER_HITS / ($TOTAL_COUNT_OF_LESSER_HITS + $TOTAL_COUNT_OF_TOP_HIT)" | \
Expand All @@ -466,6 +468,7 @@ task align_and_count {
fi
TOTAL_READS_IN_INPUT=$(samtools view -c "~{reads_basename}.bam")
echo $TOTAL_READS_IN_INPUT | tee TOTAL_READS_IN_INPUT
if [ $TOTAL_READS_IN_INPUT -eq 0 ]; then
echo "no reads in input bam"
PCT_OF_INPUT_READS_MAPPED=$(echo "0" | tee "~{reads_basename}.count.~{ref_basename}.pct_total_reads_mapped.txt")
Expand All @@ -480,7 +483,11 @@ task align_and_count {
File report_top_hits = "~{reads_basename}.count.~{ref_basename}.top_~{topNHits}_hits.txt"
String top_hit_id = read_string("~{reads_basename}.count.~{ref_basename}.top.txt")
Int reads_total = read_int("TOTAL_READS_IN_INPUT")
Int reads_mapped_top_hit = read_int("TOTAL_COUNT_OF_TOP_HIT")
Int reads_mapped = read_int("TOTAL_COUNT_OF_LESSER_HITS") + read_int("TOTAL_COUNT_OF_TOP_HIT")
String pct_total_reads_mapped = read_string('~{reads_basename}.count.~{ref_basename}.pct_total_reads_mapped.txt')
String pct_lesser_hits_of_mapped = read_string('~{reads_basename}.count.~{ref_basename}.pct_lesser_hits_of_mapped.txt')
Expand Down
29 changes: 17 additions & 12 deletions pipes/WDL/workflows/demux_deplete.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -158,17 +158,22 @@ workflow demux_deplete {
reads_bam = raw_reads,
ref_db = spikein_db
}
call taxon_filter.deplete_taxa as deplete {
input:
raw_reads_unmapped_bam = raw_reads,
bmtaggerDbs = bmtaggerDbs,
blastDbs = blastDbs,
bwaDbs = bwaDbs
if (length(flatten(select_all([bmtaggerDbs, blastDbs, bwaDbs]))) > 0) {
call taxon_filter.deplete_taxa as deplete {
input:
raw_reads_unmapped_bam = raw_reads,
bmtaggerDbs = bmtaggerDbs,
blastDbs = blastDbs,
bwaDbs = bwaDbs
}
}
if (deplete.depletion_read_count_post >= min_reads_per_bam) {
File cleaned_bam_passing = deplete.cleaned_bam

Int read_count_post_depletion = select_first([deplete.depletion_read_count_post, spikein.reads_total])
File cleaned_bam = select_first([deplete.cleaned_bam, raw_reads])
if (read_count_post_depletion >= min_reads_per_bam) {
File cleaned_bam_passing = cleaned_bam
}
if (deplete.depletion_read_count_post < min_reads_per_bam) {
if (read_count_post_depletion < min_reads_per_bam) {
File empty_bam = raw_reads
}
}
Expand Down Expand Up @@ -216,7 +221,7 @@ workflow demux_deplete {
}
call reports.MultiQC as multiqc_cleaned {
input:
input_files = deplete.cleaned_fastqc_zip,
input_files = cleaned_bam,
file_name = "multiqc-cleaned.html"
}
call reports.align_and_count_summary as spike_summary {
Expand All @@ -228,7 +233,7 @@ workflow demux_deplete {
output {
Array[File] raw_reads_unaligned_bams = flatten(illumina_demux.raw_reads_unaligned_bams)
Array[Int] read_counts_raw = deplete.depletion_read_count_pre
Array[Int] read_counts_raw = spikein.reads_total

Map[String,Map[String,String]] meta_by_filename = meta_filename.merged
Map[String,Map[String,String]] meta_by_sample = meta_sample.merged
Expand All @@ -237,7 +242,7 @@ workflow demux_deplete {
Array[File] cleaned_reads_unaligned_bams = select_all(cleaned_bam_passing)
Array[File] cleaned_bams_tiny = select_all(empty_bam)
Array[Int] read_counts_depleted = deplete.depletion_read_count_post
Array[Int] read_counts_depleted = read_count_post_depletion
File? sra_metadata = sra_meta_prep.sra_metadata
File? cleaned_bam_uris = sra_meta_prep.cleaned_bam_uris
Expand Down

0 comments on commit 9417803

Please sign in to comment.