-
Notifications
You must be signed in to change notification settings - Fork 596
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Standardized use of mem parameters in CNV WDLs. #4193
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,11 +8,14 @@ task PreprocessIntervals { | |
File? gatk4_jar_override | ||
|
||
# Runtime parameters | ||
Int? mem | ||
Int? mem_gb | ||
String gatk_docker | ||
Int? preemptible_attempts | ||
Int? disk_space_gb | ||
|
||
Int machine_mem_mb = select_first([mem_gb, 2]) * 1000 | ||
Int command_mem_mb = machine_mem_mb - 500 | ||
|
||
# Determine output filename | ||
String filename = select_first([intervals, "wgs"]) | ||
String base_filename = basename(filename, ".interval_list") | ||
|
@@ -21,7 +24,7 @@ task PreprocessIntervals { | |
set -e | ||
export GATK_LOCAL_JAR=${default="/root/gatk.jar" gatk4_jar_override} | ||
|
||
gatk --java-options "-Xmx${default="2" mem}g" PreprocessIntervals \ | ||
gatk --java-options "-Xmx${command_mem_mb}m" PreprocessIntervals \ | ||
${"-L " + intervals} \ | ||
--sequence-dictionary ${ref_fasta_dict} \ | ||
--reference ${ref_fasta} \ | ||
|
@@ -33,7 +36,7 @@ task PreprocessIntervals { | |
|
||
runtime { | ||
docker: "${gatk_docker}" | ||
memory: select_first([mem, 2]) + " GB" | ||
memory: machine_mem_mb + " MB" | ||
disks: "local-disk " + select_first([disk_space_gb, 40]) + " HDD" | ||
preemptible: select_first([preemptible_attempts, 5]) | ||
} | ||
|
@@ -51,16 +54,19 @@ task AnnotateIntervals { | |
File? gatk4_jar_override | ||
|
||
# Runtime parameters | ||
Int? mem | ||
Int? mem_gb | ||
String gatk_docker | ||
Int? preemptible_attempts | ||
Int? disk_space_gb | ||
|
||
Int machine_mem_mb = select_first([mem_gb, 2]) * 1000 | ||
Int command_mem_mb = machine_mem_mb - 500 | ||
|
||
command <<< | ||
set -e | ||
export GATK_LOCAL_JAR=${default="/root/gatk.jar" gatk4_jar_override} | ||
|
||
gatk --java-options "-Xmx${default="4" mem}g" AnnotateIntervals \ | ||
gatk --java-options "-Xmx${command_mem_mb}m" AnnotateIntervals \ | ||
-L ${intervals} \ | ||
--reference ${ref_fasta} \ | ||
--interval-merging-rule OVERLAPPING_ONLY \ | ||
|
@@ -69,7 +75,7 @@ task AnnotateIntervals { | |
|
||
runtime { | ||
docker: "${gatk_docker}" | ||
memory: select_first([mem, 5]) + " GB" | ||
memory: machine_mem_mb + " MB" | ||
disks: "local-disk " + select_first([disk_space_gb, ceil(size(ref_fasta, "GB")) + 50]) + " HDD" | ||
preemptible: select_first([preemptible_attempts, 5]) | ||
} | ||
|
@@ -87,11 +93,14 @@ task CollectCounts { | |
File? gatk4_jar_override | ||
|
||
# Runtime parameters | ||
Int? mem | ||
Int? mem_gb | ||
String gatk_docker | ||
Int? preemptible_attempts | ||
Int? disk_space_gb | ||
|
||
Int machine_mem_mb = select_first([mem_gb, 7]) * 1000 | ||
Int command_mem_mb = machine_mem_mb - 1000 | ||
|
||
# Sample name is derived from the bam filename | ||
String base_filename = basename(bam, ".bam") | ||
String counts_filename = if !defined(format) then "${base_filename}.counts.hdf5" else "${base_filename}.counts.tsv" | ||
|
@@ -100,7 +109,7 @@ task CollectCounts { | |
set -e | ||
export GATK_LOCAL_JAR=${default="/root/gatk.jar" gatk4_jar_override} | ||
|
||
gatk --java-options "-Xmx${default="8" mem}g" CollectFragmentCounts \ | ||
gatk --java-options "-Xmx${command_mem_mb}m" CollectFragmentCounts \ | ||
--input ${bam} \ | ||
-L ${intervals} \ | ||
--format ${default="HDF5" format} \ | ||
|
@@ -110,7 +119,7 @@ task CollectCounts { | |
|
||
runtime { | ||
docker: "${gatk_docker}" | ||
memory: select_first([mem, 8]) + " GB" | ||
memory: machine_mem_mb + " MB" | ||
disks: "local-disk " + select_first([disk_space_gb, ceil(size(bam, "GB")) + 50]) + " HDD" | ||
preemptible: select_first([preemptible_attempts, 5]) | ||
} | ||
|
@@ -132,14 +141,13 @@ task CollectAllelicCounts { | |
File? gatk4_jar_override | ||
|
||
# Runtime parameters | ||
Int? mem | ||
Int? mem_gb | ||
String gatk_docker | ||
Int? preemptible_attempts | ||
Int? disk_space_gb | ||
|
||
# Mem is in units of GB but our command and memory runtime values are in MB | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Somewhere it should made clear that the user provided There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Any objection to simply using There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @samuelklee Fine by me... There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Done. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
That is sufficiently self-documenting for my tastes. |
||
Int machine_mem = if defined(mem) then mem * 1000 else 13000 | ||
Int command_mem = machine_mem - 1000 | ||
Int machine_mem_mb = select_first([mem_gb, 13]) * 1000 | ||
Int command_mem_mb = machine_mem_mb - 1000 | ||
|
||
# Sample name is derived from the bam filename | ||
String base_filename = basename(bam, ".bam") | ||
|
@@ -150,7 +158,7 @@ task CollectAllelicCounts { | |
set -e | ||
export GATK_LOCAL_JAR=${default="/root/gatk.jar" gatk4_jar_override} | ||
|
||
gatk --java-options "-Xmx${command_mem}m" CollectAllelicCounts \ | ||
gatk --java-options "-Xmx${command_mem_mb}m" CollectAllelicCounts \ | ||
-L ${common_sites} \ | ||
--input ${bam} \ | ||
--reference ${ref_fasta} \ | ||
|
@@ -160,7 +168,7 @@ task CollectAllelicCounts { | |
|
||
runtime { | ||
docker: "${gatk_docker}" | ||
memory: machine_mem + " MB" | ||
memory: machine_mem_mb + " MB" | ||
disks: "local-disk " + select_first([disk_space_gb, ceil(size(bam, "GB")) + 50]) + " HDD" | ||
preemptible: select_first([preemptible_attempts, 5]) | ||
} | ||
|
@@ -176,11 +184,13 @@ task ScatterIntervals { | |
Int num_intervals_per_scatter | ||
|
||
# Runtime parameters | ||
Int? mem | ||
Int? mem_gb | ||
String gatk_docker | ||
Int? preemptible_attempts | ||
Int? disk_space_gb | ||
|
||
Int machine_mem_mb = select_first([mem_gb, 2]) * 1000 | ||
|
||
String base_filename = basename(interval_list, ".interval_list") | ||
|
||
command <<< | ||
|
@@ -194,7 +204,7 @@ task ScatterIntervals { | |
|
||
runtime { | ||
docker: "${gatk_docker}" | ||
memory: select_first([mem, 2]) + " GB" | ||
memory: machine_mem_mb + " MB" | ||
disks: "local-disk " + select_first([disk_space_gb, 40]) + " HDD" | ||
preemptible: select_first([preemptible_attempts, 5]) | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -36,8 +36,8 @@ workflow CNVGermlineCohortWorkflow { | |
#### optional basic arguments #### | ||
################################## | ||
File? gatk4_jar_override | ||
Int? mem_for_determine_germline_contig_ploidy | ||
Int? mem_for_germline_cnv_caller | ||
Int? mem_gb_for_determine_germline_contig_ploidy | ||
Int? mem_gb_for_germline_cnv_caller | ||
Int? cpu_for_determine_germline_contig_ploidy | ||
Int? cpu_for_germline_cnv_caller | ||
Int? preemptible_attempts | ||
|
@@ -147,7 +147,7 @@ workflow CNVGermlineCohortWorkflow { | |
contig_ploidy_priors = contig_ploidy_priors, | ||
gatk4_jar_override = gatk4_jar_override, | ||
gatk_docker = gatk_docker, | ||
mem = mem_for_determine_germline_contig_ploidy, | ||
mem_gb = mem_for_determine_germline_contig_ploidy, | ||
cpu = cpu_for_determine_germline_contig_ploidy, | ||
mean_bias_standard_deviation = ploidy_mean_bias_standard_deviation, | ||
mapping_error_rate = ploidy_mapping_error_rate, | ||
|
@@ -175,7 +175,7 @@ workflow CNVGermlineCohortWorkflow { | |
annotated_intervals = AnnotateIntervals.annotated_intervals, | ||
gatk4_jar_override = gatk4_jar_override, | ||
gatk_docker = gatk_docker, | ||
mem = mem_for_germline_cnv_caller, | ||
mem_gb = mem_for_germline_cnv_caller, | ||
cpu = cpu_for_germline_cnv_caller, | ||
p_alt = gcnv_p_alt, | ||
p_active = gcnv_p_active, | ||
|
@@ -234,7 +234,7 @@ task DetermineGermlineContigPloidyCohortMode { | |
File? gatk4_jar_override | ||
|
||
# Runtime parameters | ||
Int? mem | ||
Int? mem_gb | ||
Int? cpu | ||
String gatk_docker | ||
Int? preemptible_attempts | ||
|
@@ -248,8 +248,8 @@ task DetermineGermlineContigPloidyCohortMode { | |
|
||
# We do not expose Hybrid ADVI parameters -- the default values are decent | ||
|
||
Int machine_mem = if defined(mem) then select_first([mem]) else 8 | ||
Float command_mem = machine_mem - 0.5 | ||
Int machine_mem_mb = select_first([mem_gb, 7]) * 1000 | ||
Int command_mem_mb = machine_mem_mb - 500 | ||
|
||
# If optional output_dir not specified, use "out" | ||
String output_dir_ = select_first([output_dir, "out"]) | ||
|
@@ -261,7 +261,7 @@ task DetermineGermlineContigPloidyCohortMode { | |
export MKL_NUM_THREADS=${default=8 cpu} | ||
export OMP_NUM_THREADS=${default=8 cpu} | ||
|
||
gatk --java-options "-Xmx${machine_mem}g" DetermineGermlineContigPloidy \ | ||
gatk --java-options "-Xmx${command_mem_mb}m" DetermineGermlineContigPloidy \ | ||
--input ${sep=" --input " read_count_files} \ | ||
--contig-ploidy-priors ${contig_ploidy_priors} \ | ||
--output ${output_dir_} \ | ||
|
@@ -278,7 +278,7 @@ task DetermineGermlineContigPloidyCohortMode { | |
|
||
runtime { | ||
docker: "${gatk_docker}" | ||
memory: command_mem + " GB" | ||
memory: machine_mem_mb + " MB" | ||
disks: "local-disk " + select_first([disk_space_gb, 150]) + " HDD" | ||
preemptible: select_first([preemptible_attempts, 2]) | ||
cpu: select_first([cpu, 8]) | ||
|
@@ -301,7 +301,7 @@ task GermlineCNVCallerCohortMode { | |
File? gatk4_jar_override | ||
|
||
# Runtime parameters | ||
Int? mem | ||
Int? mem_gb | ||
Int? cpu | ||
String gatk_docker | ||
Int? preemptible_attempts | ||
|
@@ -349,8 +349,8 @@ task GermlineCNVCallerCohortMode { | |
Float? caller_admixing_rate | ||
Boolean? disable_annealing | ||
|
||
Int machine_mem = if defined(mem) then select_first([mem]) else 8 | ||
Float command_mem = machine_mem - 0.5 | ||
Int machine_mem_mb = select_first([mem_gb, 7]) * 1000 | ||
Int command_mem_mb = machine_mem_mb - 500 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Elsewhere you have a 1GB difference. |
||
|
||
# If optional output_dir not specified, use "out" | ||
String output_dir_ = select_first([output_dir, "out"]) | ||
|
@@ -365,7 +365,7 @@ task GermlineCNVCallerCohortMode { | |
mkdir contig-ploidy-calls-dir | ||
tar xzf ${contig_ploidy_calls_tar} -C contig-ploidy-calls-dir | ||
|
||
gatk --java-options "-Xmx${machine_mem}g" GermlineCNVCaller \ | ||
gatk --java-options "-Xmx${command_mem_mb}m" GermlineCNVCaller \ | ||
--run-mode COHORT \ | ||
-L ${intervals} \ | ||
--input ${sep=" --input " read_count_files} \ | ||
|
@@ -418,7 +418,7 @@ task GermlineCNVCallerCohortMode { | |
|
||
runtime { | ||
docker: "${gatk_docker}" | ||
memory: command_mem + " GB" | ||
memory: machine_mem_mb + " MB" | ||
disks: "local-disk " + select_first([disk_space_gb, 150]) + " HDD" | ||
preemptible: select_first([preemptible_attempts, 2]) | ||
cpu: select_first([cpu, 8]) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
As per @LeeTL1220's template, we should be putting in values for
cpu
even in the single threaded case.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I didn't check to make sure all of the tasks adhered to the template just yet, since I'm assuming that it might still change. (Actually, I don't think any of the somatic CNV tasks specify cpu.) I'll focus on the mem changes in this PR and overhaul the rest later (perhaps once tasks are automatically generated), if you don't mind!
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@samuelklee Fine by me.