Skip to content

Commit

Permalink
Standardized use of mem parameters in CNV WDLs. (#4193)
Browse files Browse the repository at this point in the history
  • Loading branch information
samuelklee authored Jan 22, 2018
1 parent a4bf560 commit 32f25b9
Show file tree
Hide file tree
Showing 6 changed files with 93 additions and 86 deletions.
44 changes: 27 additions & 17 deletions scripts/cnv_wdl/cnv_common_tasks.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,14 @@ task PreprocessIntervals {
File? gatk4_jar_override

# Runtime parameters
Int? mem
Int? mem_gb
String gatk_docker
Int? preemptible_attempts
Int? disk_space_gb

Int machine_mem_mb = select_first([mem_gb, 2]) * 1000
Int command_mem_mb = machine_mem_mb - 500

# Determine output filename
String filename = select_first([intervals, "wgs"])
String base_filename = basename(filename, ".interval_list")
Expand All @@ -21,7 +24,7 @@ task PreprocessIntervals {
set -e
export GATK_LOCAL_JAR=${default="/root/gatk.jar" gatk4_jar_override}

gatk --java-options "-Xmx${default="2" mem}g" PreprocessIntervals \
gatk --java-options "-Xmx${command_mem_mb}m" PreprocessIntervals \
${"-L " + intervals} \
--sequence-dictionary ${ref_fasta_dict} \
--reference ${ref_fasta} \
Expand All @@ -33,7 +36,7 @@ task PreprocessIntervals {

runtime {
docker: "${gatk_docker}"
memory: select_first([mem, 2]) + " GB"
memory: machine_mem_mb + " MB"
disks: "local-disk " + select_first([disk_space_gb, 40]) + " HDD"
preemptible: select_first([preemptible_attempts, 5])
}
Expand All @@ -51,16 +54,19 @@ task AnnotateIntervals {
File? gatk4_jar_override

# Runtime parameters
Int? mem
Int? mem_gb
String gatk_docker
Int? preemptible_attempts
Int? disk_space_gb

Int machine_mem_mb = select_first([mem_gb, 2]) * 1000
Int command_mem_mb = machine_mem_mb - 500

command <<<
set -e
export GATK_LOCAL_JAR=${default="/root/gatk.jar" gatk4_jar_override}

gatk --java-options "-Xmx${default="4" mem}g" AnnotateIntervals \
gatk --java-options "-Xmx${command_mem_mb}m" AnnotateIntervals \
-L ${intervals} \
--reference ${ref_fasta} \
--interval-merging-rule OVERLAPPING_ONLY \
Expand All @@ -69,7 +75,7 @@ task AnnotateIntervals {

runtime {
docker: "${gatk_docker}"
memory: select_first([mem, 5]) + " GB"
memory: machine_mem_mb + " MB"
disks: "local-disk " + select_first([disk_space_gb, ceil(size(ref_fasta, "GB")) + 50]) + " HDD"
preemptible: select_first([preemptible_attempts, 5])
}
Expand All @@ -87,11 +93,14 @@ task CollectCounts {
File? gatk4_jar_override

# Runtime parameters
Int? mem
Int? mem_gb
String gatk_docker
Int? preemptible_attempts
Int? disk_space_gb

Int machine_mem_mb = select_first([mem_gb, 7]) * 1000
Int command_mem_mb = machine_mem_mb - 1000

# Sample name is derived from the bam filename
String base_filename = basename(bam, ".bam")
String counts_filename = if !defined(format) then "${base_filename}.counts.hdf5" else "${base_filename}.counts.tsv"
Expand All @@ -100,7 +109,7 @@ task CollectCounts {
set -e
export GATK_LOCAL_JAR=${default="/root/gatk.jar" gatk4_jar_override}

gatk --java-options "-Xmx${default="8" mem}g" CollectFragmentCounts \
gatk --java-options "-Xmx${command_mem_mb}m" CollectFragmentCounts \
--input ${bam} \
-L ${intervals} \
--format ${default="HDF5" format} \
Expand All @@ -110,7 +119,7 @@ task CollectCounts {

runtime {
docker: "${gatk_docker}"
memory: select_first([mem, 8]) + " GB"
memory: machine_mem_mb + " MB"
disks: "local-disk " + select_first([disk_space_gb, ceil(size(bam, "GB")) + 50]) + " HDD"
preemptible: select_first([preemptible_attempts, 5])
}
Expand All @@ -132,14 +141,13 @@ task CollectAllelicCounts {
File? gatk4_jar_override

# Runtime parameters
Int? mem
Int? mem_gb
String gatk_docker
Int? preemptible_attempts
Int? disk_space_gb

# Mem is in units of GB but our command and memory runtime values are in MB
Int machine_mem = if defined(mem) then mem * 1000 else 13000
Int command_mem = machine_mem - 1000
Int machine_mem_mb = select_first([mem_gb, 13]) * 1000
Int command_mem_mb = machine_mem_mb - 1000

# Sample name is derived from the bam filename
String base_filename = basename(bam, ".bam")
Expand All @@ -150,7 +158,7 @@ task CollectAllelicCounts {
set -e
export GATK_LOCAL_JAR=${default="/root/gatk.jar" gatk4_jar_override}

gatk --java-options "-Xmx${command_mem}m" CollectAllelicCounts \
gatk --java-options "-Xmx${command_mem_mb}m" CollectAllelicCounts \
-L ${common_sites} \
--input ${bam} \
--reference ${ref_fasta} \
Expand All @@ -160,7 +168,7 @@ task CollectAllelicCounts {

runtime {
docker: "${gatk_docker}"
memory: machine_mem + " MB"
memory: machine_mem_mb + " MB"
disks: "local-disk " + select_first([disk_space_gb, ceil(size(bam, "GB")) + 50]) + " HDD"
preemptible: select_first([preemptible_attempts, 5])
}
Expand All @@ -176,11 +184,13 @@ task ScatterIntervals {
Int num_intervals_per_scatter

# Runtime parameters
Int? mem
Int? mem_gb
String gatk_docker
Int? preemptible_attempts
Int? disk_space_gb

Int machine_mem_mb = select_first([mem_gb, 2]) * 1000

String base_filename = basename(interval_list, ".interval_list")

command <<<
Expand All @@ -194,7 +204,7 @@ task ScatterIntervals {

runtime {
docker: "${gatk_docker}"
memory: select_first([mem, 2]) + " GB"
memory: machine_mem_mb + " MB"
disks: "local-disk " + select_first([disk_space_gb, 40]) + " HDD"
preemptible: select_first([preemptible_attempts, 5])
}
Expand Down
28 changes: 14 additions & 14 deletions scripts/cnv_wdl/germline/cnv_germline_case_workflow.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,8 @@ workflow CNVGermlineCaseWorkflow {
#### optional basic arguments ####
##################################
File? gatk4_jar_override
Int? mem_for_determine_germline_contig_ploidy
Int? mem_for_germline_cnv_caller
Int? mem_gb_for_determine_germline_contig_ploidy
Int? mem_gb_for_germline_cnv_caller
Int? cpu_for_determine_germline_contig_ploidy
Int? cpu_for_germline_cnv_caller
Int? preemptible_attempts
Expand Down Expand Up @@ -116,7 +116,7 @@ workflow CNVGermlineCaseWorkflow {
contig_ploidy_model_tar = contig_ploidy_model_tar,
gatk4_jar_override = gatk4_jar_override,
gatk_docker = gatk_docker,
mem = mem_for_determine_germline_contig_ploidy,
mem_gb = mem_for_determine_germline_contig_ploidy,
cpu = cpu_for_determine_germline_contig_ploidy,
mapping_error_rate = ploidy_mapping_error_rate,
sample_psi_scale = ploidy_sample_psi_scale,
Expand All @@ -141,7 +141,7 @@ workflow CNVGermlineCaseWorkflow {
intervals = ScatterIntervals.scattered_interval_lists[scatter_index],
gatk4_jar_override = gatk4_jar_override,
gatk_docker = gatk_docker,
mem = mem_for_germline_cnv_caller,
mem_gb = mem_for_germline_cnv_caller,
cpu = cpu_for_germline_cnv_caller,
p_alt = gcnv_p_alt,
cnv_coherence_length = gcnv_cnv_coherence_length,
Expand Down Expand Up @@ -190,7 +190,7 @@ task DetermineGermlineContigPloidyCaseMode {
File? gatk4_jar_override

# Runtime parameters
Int? mem
Int? mem_gb
Int? cpu
String gatk_docker
Int? preemptible_attempts
Expand All @@ -202,8 +202,8 @@ task DetermineGermlineContigPloidyCaseMode {

# We do not expose Hybrid ADVI parameters -- the default values are decent
Int machine_mem = if defined(mem) then select_first([mem]) else 8
Float command_mem = machine_mem - 0.5
Int machine_mem_mb = select_first([mem_gb, 7]) * 1000
Int command_mem_mb = machine_mem_mb - 500

# If optional output_dir not specified, use "out"
String output_dir_ = select_first([output_dir, "out"])
Expand All @@ -218,7 +218,7 @@ task DetermineGermlineContigPloidyCaseMode {
mkdir input-contig-ploidy-model
tar xzf ${contig_ploidy_model_tar} -C input-contig-ploidy-model

gatk --java-options "-Xmx${machine_mem}g" DetermineGermlineContigPloidy \
gatk --java-options "-Xmx${command_mem_mb}m" DetermineGermlineContigPloidy \
--input ${sep=" --input " read_count_files} \
--model input-contig-ploidy-model \
--output ${output_dir_} \
Expand All @@ -232,7 +232,7 @@ task DetermineGermlineContigPloidyCaseMode {

runtime {
docker: "${gatk_docker}"
memory: command_mem + " GB"
memory: machine_mem_mb + " MB"
disks: "local-disk " + select_first([disk_space_gb, 150]) + " HDD"
preemptible: select_first([preemptible_attempts, 2])
cpu: select_first([cpu, 8])
Expand All @@ -254,7 +254,7 @@ task GermlineCNVCallerCaseMode {
File? gatk4_jar_override

# Runtime parameters
Int? mem
Int? mem_gb
Int? cpu
String gatk_docker
Int? preemptible_attempts
Expand Down Expand Up @@ -293,8 +293,8 @@ task GermlineCNVCallerCaseMode {
Float? caller_admixing_rate
Boolean? disable_annealing

Int machine_mem = if defined(mem) then select_first([mem]) else 8
Float command_mem = machine_mem - 0.5
Int machine_mem_mb = select_first([mem_gb, 7]) * 1000
Int command_mem_mb = machine_mem_mb - 500

# If optional output_dir not specified, use "out"
String output_dir_ = select_first([output_dir, "out"])
Expand All @@ -312,7 +312,7 @@ task GermlineCNVCallerCaseMode {
mkdir gcnv-model
tar xzf ${gcnv_model_tar} -C gcnv-model

gatk --java-options "-Xmx${machine_mem}g" GermlineCNVCaller \
gatk --java-options "-Xmx${command_mem_mb}m" GermlineCNVCaller \
--run-mode CASE \
--input ${sep=" --input " read_count_files} \
--contig-ploidy-calls contig-ploidy-calls-dir \
Expand Down Expand Up @@ -353,7 +353,7 @@ task GermlineCNVCallerCaseMode {

runtime {
docker: "${gatk_docker}"
memory: command_mem + " GB"
memory: machine_mem_mb + " MB"
disks: "local-disk " + select_first([disk_space_gb, 150]) + " HDD"
preemptible: select_first([preemptible_attempts, 2])
cpu: select_first([cpu, 8])
Expand Down
28 changes: 14 additions & 14 deletions scripts/cnv_wdl/germline/cnv_germline_cohort_workflow.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,8 @@ workflow CNVGermlineCohortWorkflow {
#### optional basic arguments ####
##################################
File? gatk4_jar_override
Int? mem_for_determine_germline_contig_ploidy
Int? mem_for_germline_cnv_caller
Int? mem_gb_for_determine_germline_contig_ploidy
Int? mem_gb_for_germline_cnv_caller
Int? cpu_for_determine_germline_contig_ploidy
Int? cpu_for_germline_cnv_caller
Int? preemptible_attempts
Expand Down Expand Up @@ -147,7 +147,7 @@ workflow CNVGermlineCohortWorkflow {
contig_ploidy_priors = contig_ploidy_priors,
gatk4_jar_override = gatk4_jar_override,
gatk_docker = gatk_docker,
mem = mem_for_determine_germline_contig_ploidy,
mem_gb = mem_for_determine_germline_contig_ploidy,
cpu = cpu_for_determine_germline_contig_ploidy,
mean_bias_standard_deviation = ploidy_mean_bias_standard_deviation,
mapping_error_rate = ploidy_mapping_error_rate,
Expand Down Expand Up @@ -175,7 +175,7 @@ workflow CNVGermlineCohortWorkflow {
annotated_intervals = AnnotateIntervals.annotated_intervals,
gatk4_jar_override = gatk4_jar_override,
gatk_docker = gatk_docker,
mem = mem_for_germline_cnv_caller,
mem_gb = mem_for_germline_cnv_caller,
cpu = cpu_for_germline_cnv_caller,
p_alt = gcnv_p_alt,
p_active = gcnv_p_active,
Expand Down Expand Up @@ -234,7 +234,7 @@ task DetermineGermlineContigPloidyCohortMode {
File? gatk4_jar_override

# Runtime parameters
Int? mem
Int? mem_gb
Int? cpu
String gatk_docker
Int? preemptible_attempts
Expand All @@ -248,8 +248,8 @@ task DetermineGermlineContigPloidyCohortMode {

# We do not expose Hybrid ADVI parameters -- the default values are decent
Int machine_mem = if defined(mem) then select_first([mem]) else 8
Float command_mem = machine_mem - 0.5
Int machine_mem_mb = select_first([mem_gb, 7]) * 1000
Int command_mem_mb = machine_mem_mb - 500

# If optional output_dir not specified, use "out"
String output_dir_ = select_first([output_dir, "out"])
Expand All @@ -261,7 +261,7 @@ task DetermineGermlineContigPloidyCohortMode {
export MKL_NUM_THREADS=${default=8 cpu}
export OMP_NUM_THREADS=${default=8 cpu}

gatk --java-options "-Xmx${machine_mem}g" DetermineGermlineContigPloidy \
gatk --java-options "-Xmx${command_mem_mb}m" DetermineGermlineContigPloidy \
--input ${sep=" --input " read_count_files} \
--contig-ploidy-priors ${contig_ploidy_priors} \
--output ${output_dir_} \
Expand All @@ -278,7 +278,7 @@ task DetermineGermlineContigPloidyCohortMode {

runtime {
docker: "${gatk_docker}"
memory: command_mem + " GB"
memory: machine_mem_mb + " MB"
disks: "local-disk " + select_first([disk_space_gb, 150]) + " HDD"
preemptible: select_first([preemptible_attempts, 2])
cpu: select_first([cpu, 8])
Expand All @@ -301,7 +301,7 @@ task GermlineCNVCallerCohortMode {
File? gatk4_jar_override

# Runtime parameters
Int? mem
Int? mem_gb
Int? cpu
String gatk_docker
Int? preemptible_attempts
Expand Down Expand Up @@ -349,8 +349,8 @@ task GermlineCNVCallerCohortMode {
Float? caller_admixing_rate
Boolean? disable_annealing

Int machine_mem = if defined(mem) then select_first([mem]) else 8
Float command_mem = machine_mem - 0.5
Int machine_mem_mb = select_first([mem_gb, 7]) * 1000
Int command_mem_mb = machine_mem_mb - 500

# If optional output_dir not specified, use "out"
String output_dir_ = select_first([output_dir, "out"])
Expand All @@ -365,7 +365,7 @@ task GermlineCNVCallerCohortMode {
mkdir contig-ploidy-calls-dir
tar xzf ${contig_ploidy_calls_tar} -C contig-ploidy-calls-dir

gatk --java-options "-Xmx${machine_mem}g" GermlineCNVCaller \
gatk --java-options "-Xmx${command_mem_mb}m" GermlineCNVCaller \
--run-mode COHORT \
-L ${intervals} \
--input ${sep=" --input " read_count_files} \
Expand Down Expand Up @@ -418,7 +418,7 @@ task GermlineCNVCallerCohortMode {

runtime {
docker: "${gatk_docker}"
memory: command_mem + " GB"
memory: machine_mem_mb + " MB"
disks: "local-disk " + select_first([disk_space_gb, 150]) + " HDD"
preemptible: select_first([preemptible_attempts, 2])
cpu: select_first([cpu, 8])
Expand Down
Loading

0 comments on commit 32f25b9

Please sign in to comment.