Skip to content

Commit

Permalink
nio looks good too
Browse files Browse the repository at this point in the history
  • Loading branch information
takutosato committed Mar 19, 2019
1 parent 1da95f5 commit 7662a18
Showing 1 changed file with 141 additions and 57 deletions.
198 changes: 141 additions & 57 deletions scripts/mutect2_wdl/mutect2_nio.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,36 @@ workflow Mutect2 {
disk_space = ref_size + ceil(size(intervals, "GB") * small_input_to_output_multiplier) + disk_pad
}

if (run_ob_mm_filter) {
scatter (subintervals in SplitIntervals.interval_files ) {
call CollectF1R2Counts {
input:
gatk_docker = gatk_docker,
ref_fasta = ref_fasta,
ref_fai = ref_fai,
ref_dict = ref_dict,
preemptible_attempts = preemptible_attempts,
tumor_bam = tumor_bam,
gatk_override = gatk_override,
disk_space = tumor_bam_size + ref_size + disk_pad,
intervals = subintervals,
extra_intervals = ob_mm_filter_training_intervals,
max_retries = max_retries
}
}

call LearnReadOrientationModel {
input:
alt_tables = CollectF1R2Counts.alt_table,
ref_histograms = CollectF1R2Counts.ref_histograms,
alt_histograms = CollectF1R2Counts.alt_histograms,
gatk_override = gatk_override,
gatk_docker = gatk_docker,
preemptible_attempts = preemptible_attempts,
max_retries = max_retries
}
}

scatter (subintervals in SplitIntervals.interval_files ) {
call M2 {
input:
Expand All @@ -260,6 +290,7 @@ workflow Mutect2 {
max_retries = max_retries,
m2_extra_args = m2_extra_args,
artifact_prior_table = LearnReadOrientationModel.artifact_prior_table,
variants_for_contamination = variants_for_contamination,
make_bamout = make_bamout_or_default,
compress = compress,
gga_vcf = gga_vcf,
Expand Down Expand Up @@ -336,47 +367,41 @@ workflow Mutect2 {
}
}

if (run_ob_mm_filter) {
call CollectF1R2Counts {
input:
gatk_docker = gatk_docker,
ref_fasta = ref_fasta,
ref_fai = ref_fai,
ref_dict = ref_dict,
preemptible_attempts = preemptible_attempts,
tumor_bam = tumor_bam,
tumor_bai = tumor_bai,
gatk_override = gatk_override,
disk_space = tumor_bam_size + ref_size + disk_pad,
intervals = if defined(ob_mm_filter_training_intervals) then ob_mm_filter_training_intervals else intervals,
max_retries = max_retries
}
if (defined(variants_for_contamination)) {
call MergePileupSummaries as MergeTumorPileups {
input:
input_tables = M2.tumor_pileups,
output_name = output_basename,
ref_dict = ref_dict,
gatk_override = gatk_override,
gatk_docker = gatk_docker,
preemptible_attempts = preemptible_attempts,
max_retries = max_retries,
disk_space = ceil(SumSubVcfs.total_size * large_input_to_output_multiplier) + disk_pad
}

call LearnReadOrientationModel {
if (defined(normal_bam)){
call MergePileupSummaries as MergeNormalPileups {
input:
alt_table = CollectF1R2Counts.alt_table,
ref_histogram = CollectF1R2Counts.ref_histogram,
alt_histograms = CollectF1R2Counts.alt_histograms,
tumor_sample = CollectF1R2Counts.tumor_sample,
input_tables = M2.normal_pileups,
output_name = output_basename,
ref_dict = ref_dict,
gatk_override = gatk_override,
gatk_docker = gatk_docker,
preemptible_attempts = preemptible_attempts,
max_retries = max_retries
max_retries = max_retries,
disk_space = ceil(SumSubVcfs.total_size * large_input_to_output_multiplier) + disk_pad
}
}

if (defined(variants_for_contamination)) {
call CalculateContamination {
input:
gatk_override = gatk_override,
intervals = intervals,
ref_fasta = ref_fasta,
preemptible_attempts = preemptible_attempts,
max_retries = max_retries,
gatk_docker = gatk_docker,
tumor_bam = tumor_bam,
normal_bam = normal_bam,
variants_for_contamination = variants_for_contamination,
tumor_pileups = MergeTumorPileups.merged_table,
normal_pileups = MergeNormalPileups.merged_table,
disk_space = tumor_bam_size + normal_bam_size + ceil(size(variants_for_contamination, "GB") * small_input_to_output_multiplier) + disk_pad
}
}
Expand Down Expand Up @@ -501,6 +526,7 @@ workflow Mutect2 {
File? bamout = MergeBamOuts.merged_bam_out
File? bamout_index = MergeBamOuts.merged_bam_out_index
File? maf_segments = CalculateContamination.maf_segments
File? read_orientation_model_params = LearnReadOrientationModel.artifact_prior_table
}
}

Expand Down Expand Up @@ -608,6 +634,7 @@ task M2 {
String? gga_vcf
String? gga_vcf_idx
File? artifact_prior_table
String? variants_for_contamination

String output_vcf = "output" + if compress then ".vcf.gz" else ".vcf"
String output_vcf_index = output_vcf + if compress then ".tbi" else ".idx"
Expand Down Expand Up @@ -658,6 +685,21 @@ task M2 {
${"--orientation-bias-artifact-priors " + artifact_prior_table} \
--stats mutect2.stats \
${m2_extra_args}

### GetPileupSummaries
# These must be created, even if they remain empty, as cromwell doesn't support optional output
touch tumor-pileups.table
touch normal-pileups.table

if [[ ! -z "${variants_for_contamination}" ]]; then
gatk --java-options "-Xmx${command_mem}m" GetPileupSummaries -R ${ref_fasta} -I ${tumor_bam} ${"--interval-set-rule INTERSECTION -L " + intervals} \
-V ${variants_for_contamination} -L ${variants_for_contamination} -O tumor-pileups.table

if [[ ! -z "${normal_bam}" ]]; then
gatk --java-options "-Xmx${command_mem}m" GetPileupSummaries -R ${ref_fasta} -I ${normal_bam} ${"--interval-set-rule INTERSECTION -L " + intervals} \
-V ${variants_for_contamination} -L ${variants_for_contamination} -O normal-pileups.table
fi
fi
>>>

runtime {
Expand All @@ -677,6 +719,8 @@ task M2 {
String tumor_sample = read_string("tumor_name.txt")
String normal_sample = read_string("normal_name.txt")
File stats = "mutect2.stats"
File tumor_pileups = "tumor-pileups.table"
File normal_pileups = "normal-pileups.table"
}
}

Expand Down Expand Up @@ -829,6 +873,52 @@ task MergeStats {
}
}

task MergePileupSummaries {
# input_tables needs to be optional because GetPileupSummaries is in an if-block
Array[File?] input_tables
String output_name
File? gatk_override
File ref_dict

# runtime
String gatk_docker
Int? mem
Int? preemptible_attempts
Int? max_retries
Int? disk_space
Int? cpu
Boolean use_ssd = false

# Mem is in units of GB but our command and memory runtime values are in MB
Int machine_mem = if defined(mem) then mem * 1000 else 3500
Int command_mem = machine_mem - 1000

command {
set -e
export GATK_LOCAL_JAR=${default="/root/gatk.jar" gatk_override}

gatk --java-options "-Xmx${command_mem}m" GatherPileupSummaries \
--sequence-dictionary ${ref_dict} \
-I ${sep=' -I ' input_tables} \
-O ${output_name}.tsv
}

runtime {
docker: gatk_docker
bootDiskSizeGb: 12
memory: machine_mem + " MB"
disks: "local-disk " + select_first([disk_space, 100]) + if use_ssd then " SSD" else " HDD"
preemptible: select_first([preemptible_attempts, 10])
maxRetries: select_first([max_retries, 3])
cpu: select_first([cpu, 1])
}

output {
File merged_table = "${output_name}.tsv"
}
}


task CollectSequencingArtifactMetrics {
# inputs
File ref_fasta
Expand Down Expand Up @@ -880,11 +970,11 @@ task CollectF1R2Counts {
File ref_fasta
File ref_fai
File ref_dict
File tumor_bam
File tumor_bai
String tumor_bam

File? gatk_override
File? intervals
File? extra_intervals

# runtime
Int? max_retries
Expand All @@ -911,9 +1001,10 @@ task CollectF1R2Counts {
gatk --java-options "-Xmx${command_mem}m" CollectF1R2Counts \
-I ${tumor_bam} -R ${ref_fasta} \
${"-L " + intervals} \
-alt-table "$tumor_name-alt.tsv" \
-ref-hist "$tumor_name-ref.metrics" \
-alt-hist "$tumor_name-alt-depth1.metrics"
${"-isr INTERSECTION -L " + extra_intervals} \
--alt-table "$tumor_name-alt.tsv" \
--ref-hist "$tumor_name-ref.metrics" \
--alt-hist "$tumor_name-alt-depth1.metrics"
}

runtime {
Expand All @@ -928,21 +1019,20 @@ task CollectF1R2Counts {

output {
File alt_table = glob("*-alt.tsv")[0]
File ref_histogram = glob("*-ref.metrics")[0]
File ref_histograms = glob("*-ref.metrics")[0]
File alt_histograms = glob("*-alt-depth1.metrics")[0]
String tumor_sample = read_string("tumor_name.txt")
}
}

# Learning step of the orientation bias mixture model, which is the recommended orientation bias filter as of September 2018
task LearnReadOrientationModel {
File alt_table
File ref_histogram
File? alt_histograms
Array[File] alt_tables
Array[File] ref_histograms
Array[File?] alt_histograms
Int alt_hist_length = length(alt_histograms)

File? gatk_override
File? intervals
String tumor_sample

# runtime
Int? max_retries
Expand All @@ -961,11 +1051,15 @@ task LearnReadOrientationModel {
set -e
export GATK_LOCAL_JAR=${default="/root/gatk.jar" gatk_override}

if [[ "${alt_hist_length}" > 0 ]]; then
ah_flag="--alt-hist"
fi

gatk --java-options "-Xmx${command_mem}m" LearnReadOrientationModel \
-alt-table ${alt_table} \
-ref-hist ${ref_histogram} \
-alt-hist ${alt_histograms} \
-O "${tumor_sample}-artifact-prior-table.tsv"
--alt-table ${sep=" --alt-table " alt_tables} \
--ref-hist ${sep=" --ref-hist " ref_histograms} \
$ah_flag ${sep=" --alt-hist " alt_histograms} \
-O "artifact-prior-table.tsv"
}

runtime {
Expand All @@ -979,18 +1073,16 @@ task LearnReadOrientationModel {
}

output {
File artifact_prior_table = "${tumor_sample}-artifact-prior-table.tsv"
File artifact_prior_table = "artifact-prior-table.tsv"
}

}

task CalculateContamination {
# inputs
String? intervals
String ref_fasta
String tumor_bam
String? normal_bam
String? variants_for_contamination
File tumor_pileups
File? normal_pileups

File? gatk_override

Expand All @@ -1010,15 +1102,8 @@ task CalculateContamination {

export GATK_LOCAL_JAR=${default="/root/gatk.jar" gatk_override}

if [[ -f "${normal_bam}" ]]; then
gatk --java-options "-Xmx${command_mem}m" GetPileupSummaries -I ${normal_bam} ${"--interval-set-rule INTERSECTION -L " + intervals} \
-V ${variants_for_contamination} -L ${variants_for_contamination} -O normal_pileups.table
NORMAL_CMD="-matched normal_pileups.table"
fi

gatk --java-options "-Xmx${command_mem}m" GetPileupSummaries -R ${ref_fasta} -I ${tumor_bam} ${"--interval-set-rule INTERSECTION -L " + intervals} \
-V ${variants_for_contamination} -L ${variants_for_contamination} -O pileups.table
gatk --java-options "-Xmx${command_mem}m" CalculateContamination -I pileups.table -O contamination.table --tumor-segmentation segments.table $NORMAL_CMD
gatk --java-options "-Xmx${command_mem}m" CalculateContamination -I ${tumor_pileups} \
-O contamination.table --tumor-segmentation segments.table ${"-matched " + normal_pileups}
}

runtime {
Expand All @@ -1031,7 +1116,6 @@ task CalculateContamination {
}

output {
File pileups = "pileups.table"
File contamination_table = "contamination.table"
File maf_segments = "segments.table"
}
Expand Down

0 comments on commit 7662a18

Please sign in to comment.