Skip to content

Commit

Permalink
Wdlupdate (#7)
Browse files Browse the repository at this point in the history
* Updated picard commands to use gatk4, update gatk docker to 4.0.4.0

* edit gatk tool requirment version
  • Loading branch information
bshifaw authored Jun 11, 2018
1 parent d490fe7 commit 6486075
Show file tree
Hide file tree
Showing 4 changed files with 73 additions and 80 deletions.
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,13 +21,13 @@ applied to Exomes and Targeted Panels.
- A clean BAM file and its index, suitable for variant discovery analyses.

### Software version requirements :
- GATK 4.beta.3 or later
- GATK 4 or later
- Picard 2.x
- Samtools (see gotc docker)
- Python 2.7

Cromwell version support
- Successfully tested on v28
- Successfully tested on v32
- Does not work on versions < v23 due to output syntax

Runtime parameters are optimized for Broad's Google Cloud Platform implementation.
7 changes: 2 additions & 5 deletions processing-for-variant-discovery-gatk4.b37.wgs.inputs.json
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
"PreProcessingForVariantDiscovery_GATK4.ref_dict": "gs://gatk-legacy-bundles/b37/human_g1k_v37_decoy.dict",
"PreProcessingForVariantDiscovery_GATK4.ref_fasta": "gs://gatk-legacy-bundles/b37/human_g1k_v37_decoy.fasta",
"PreProcessingForVariantDiscovery_GATK4.ref_fasta_index": "gs://gatk-legacy-bundles/b37/human_g1k_v37_decoy.fasta.fai",
"PreProcessingForVariantDiscovery_GATK4.SamToFastqAndBwaMem.ref_alt": "",
"PreProcessingForVariantDiscovery_GATK4.SamToFastqAndBwaMem.ref_sa": "gs://gatk-legacy-bundles/b37/human_g1k_v37_decoy.fasta.sa",
"PreProcessingForVariantDiscovery_GATK4.SamToFastqAndBwaMem.ref_amb": "gs://gatk-legacy-bundles/b37/human_g1k_v37_decoy.fasta.amb",
"PreProcessingForVariantDiscovery_GATK4.SamToFastqAndBwaMem.ref_bwt": "gs://gatk-legacy-bundles/b37/human_g1k_v37_decoy.fasta.bwt",
Expand All @@ -32,14 +31,12 @@
"PreProcessingForVariantDiscovery_GATK4.SamToFastqAndBwaMem.num_cpu": "16",

"##_COMMENT5": "DOCKERS",
"PreProcessingForVariantDiscovery_GATK4.gotc_docker": "broadinstitute/genomes-in-the-cloud:2.3.0-1501082129",
"PreProcessingForVariantDiscovery_GATK4.gatk_docker": "broadinstitute/gatk:4.0.0.0",
"PreProcessingForVariantDiscovery_GATK4.picard_docker": "broadinstitute/genomes-in-the-cloud:2.3.0-1501082129",
"PreProcessingForVariantDiscovery_GATK4.gotc_docker": "broadinstitute/genomes-in-the-cloud:2.3.1-1512499786",
"PreProcessingForVariantDiscovery_GATK4.gatk_docker": "broadinstitute/gatk:4.0.4.0",
"PreProcessingForVariantDiscovery_GATK4.python_docker": "python:2.7",

"##_COMMENT6": "PATHS",
"PreProcessingForVariantDiscovery_GATK4.gotc_path": "/usr/gitc/",
"PreProcessingForVariantDiscovery_GATK4.picard_path": "/usr/gitc/",
"PreProcessingForVariantDiscovery_GATK4.gatk_path": "/gatk/gatk",

"##_COMMENT7": "JAVA OPTIONS",
Expand Down
6 changes: 2 additions & 4 deletions processing-for-variant-discovery-gatk4.hg38.wgs.inputs.json
Original file line number Diff line number Diff line change
Expand Up @@ -34,14 +34,12 @@
"PreProcessingForVariantDiscovery_GATK4.SamToFastqAndBwaMem.num_cpu": "16",

"##_COMMENT5": "DOCKERS",
"PreProcessingForVariantDiscovery_GATK4.gotc_docker": "broadinstitute/genomes-in-the-cloud:2.3.0-1501082129",
"PreProcessingForVariantDiscovery_GATK4.gatk_docker": "broadinstitute/gatk:4.0.0.0",
"PreProcessingForVariantDiscovery_GATK4.picard_docker": "broadinstitute/genomes-in-the-cloud:2.3.0-1501082129",
"PreProcessingForVariantDiscovery_GATK4.gotc_docker": "broadinstitute/genomes-in-the-cloud:2.3.1-1512499786",
"PreProcessingForVariantDiscovery_GATK4.gatk_docker": "broadinstitute/gatk:4.0.4.0",
"PreProcessingForVariantDiscovery_GATK4.python_docker": "python:2.7",

"##_COMMENT6": "PATHS",
"PreProcessingForVariantDiscovery_GATK4.gotc_path": "/usr/gitc/",
"PreProcessingForVariantDiscovery_GATK4.picard_path": "/usr/gitc/",
"PreProcessingForVariantDiscovery_GATK4.gatk_path": "/gatk/gatk",

"##_COMMENT7": "JAVA OPTIONS",
Expand Down
136 changes: 67 additions & 69 deletions processing-for-variant-discovery-gatk4.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,13 @@
## - A clean BAM file and its index, suitable for variant discovery analyses.
##
## Software version requirements (see recommended dockers in inputs JSON)
## - GATK 4.beta.3 or later
## - Picard 2.x
## - GATK 4 or later
## - Picard (see gotc docker)
## - Samtools (see gotc docker)
## - Python 2.7
##
## Cromwell version support
## - Successfully tested on v28
## - Successfully tested on v32
## - Does not work on versions < v23 due to output syntax
##
## Runtime parameters are optimized for Broad's Google Cloud Platform implementation.
Expand Down Expand Up @@ -57,12 +57,10 @@ workflow PreProcessingForVariantDiscovery_GATK4 {
Array[File] known_indels_sites_indices

String gotc_docker
String picard_docker
String gatk_docker
String python_docker

String gotc_path
String picard_path
String gatk_path

Int flowcell_small_disk
Expand Down Expand Up @@ -104,7 +102,7 @@ workflow PreProcessingForVariantDiscovery_GATK4 {
ref_dict = ref_dict,
docker_image = gotc_docker,
bwa_path = gotc_path,
picard_path = gotc_path,
gotc_path = gotc_path,
disk_size = flowcell_medium_disk,
preemptible_tries = preemptible_tries,
compression_level = compression_level
Expand All @@ -121,8 +119,8 @@ workflow PreProcessingForVariantDiscovery_GATK4 {
ref_fasta = ref_fasta,
ref_fasta_index = ref_fasta_index,
ref_dict = ref_dict,
docker_image = picard_docker,
picard_path = picard_path,
docker_image = gatk_docker,
gatk_path = gatk_path,
disk_size = flowcell_medium_disk,
preemptible_tries = preemptible_tries,
compression_level = compression_level
Expand All @@ -137,8 +135,8 @@ workflow PreProcessingForVariantDiscovery_GATK4 {
input_bams = MergeBamAlignment.output_bam,
output_bam_basename = base_file_name + ".aligned.unsorted.duplicates_marked",
metrics_filename = base_file_name + ".duplicate_metrics",
docker_image = picard_docker,
picard_path = picard_path,
docker_image = gatk_docker,
gatk_path = gatk_path,
disk_size = agg_large_disk,
compression_level = compression_level,
preemptible_tries = agg_preemptible_tries
Expand All @@ -152,8 +150,8 @@ workflow PreProcessingForVariantDiscovery_GATK4 {
ref_dict = ref_dict,
ref_fasta = ref_fasta,
ref_fasta_index = ref_fasta_index,
docker_image = picard_docker,
picard_path = picard_path,
docker_image = gatk_docker,
gatk_path = gatk_path,
disk_size = agg_large_disk,
preemptible_tries = 0,
compression_level = compression_level
Expand Down Expand Up @@ -226,8 +224,8 @@ workflow PreProcessingForVariantDiscovery_GATK4 {
input:
input_bams = ApplyBQSR.recalibrated_bam,
output_bam_basename = base_file_name,
docker_image = picard_docker,
picard_path = picard_path,
docker_image = gatk_docker,
gatk_path = gatk_path,
disk_size = agg_large_disk,
preemptible_tries = agg_preemptible_tries,
compression_level = compression_level
Expand Down Expand Up @@ -298,7 +296,7 @@ task SamToFastqAndBwaMem {

String docker_image
String bwa_path
String picard_path
String gotc_path
String java_opt

command <<<
Expand All @@ -308,7 +306,7 @@ task SamToFastqAndBwaMem {
# set the bash variable needed for the command-line
bash_ref_fasta=${ref_fasta}

java -Dsamjdk.compression_level=${compression_level} ${java_opt} -jar ${picard_path}picard.jar \
java -Dsamjdk.compression_level=${compression_level} ${java_opt} -jar ${gotc_path}picard.jar \
SamToFastq \
INPUT=${input_bam} \
FASTQ=/dev/stdout \
Expand Down Expand Up @@ -350,37 +348,37 @@ task MergeBamAlignment {
String mem_size

String docker_image
String picard_path
String gatk_path
String java_opt

command {
# set the bash variable needed for the command-line
bash_ref_fasta=${ref_fasta}
java -Dsamjdk.compression_level=${compression_level} ${java_opt} -jar ${picard_path}picard.jar \
${gatk_path} --java-options "-Dsamjdk.compression_level=${compression_level} ${java_opt}" \
MergeBamAlignment \
VALIDATION_STRINGENCY=SILENT \
EXPECTED_ORIENTATIONS=FR \
ATTRIBUTES_TO_RETAIN=X0 \
ALIGNED_BAM=${aligned_bam} \
UNMAPPED_BAM=${unmapped_bam} \
OUTPUT=${output_bam_basename}.bam \
REFERENCE_SEQUENCE=${ref_fasta} \
PAIRED_RUN=true \
SORT_ORDER="unsorted" \
IS_BISULFITE_SEQUENCE=false \
ALIGNED_READS_ONLY=false \
CLIP_ADAPTERS=false \
MAX_RECORDS_IN_RAM=2000000 \
ADD_MATE_CIGAR=true \
MAX_INSERTIONS_OR_DELETIONS=-1 \
PRIMARY_ALIGNMENT_STRATEGY=MostDistant \
PROGRAM_RECORD_ID="bwamem" \
PROGRAM_GROUP_VERSION="${bwa_version}" \
PROGRAM_GROUP_COMMAND_LINE="${bwa_commandline}" \
PROGRAM_GROUP_NAME="bwamem" \
UNMAPPED_READ_STRATEGY=COPY_TO_TAG \
ALIGNER_PROPER_PAIR_FLAGS=true \
UNMAP_CONTAMINANT_READS=true
--VALIDATION_STRINGENCY SILENT \
--EXPECTED_ORIENTATIONS FR \
--ATTRIBUTES_TO_RETAIN X0 \
--ALIGNED_BAM ${aligned_bam} \
--UNMAPPED_BAM ${unmapped_bam} \
--OUTPUT ${output_bam_basename}.bam \
--REFERENCE_SEQUENCE ${ref_fasta} \
--PAIRED_RUN true \
--SORT_ORDER "unsorted" \
--IS_BISULFITE_SEQUENCE false \
--ALIGNED_READS_ONLY false \
--CLIP_ADAPTERS false \
--MAX_RECORDS_IN_RAM 2000000 \
--ADD_MATE_CIGAR true \
--MAX_INSERTIONS_OR_DELETIONS -1 \
--PRIMARY_ALIGNMENT_STRATEGY MostDistant \
--PROGRAM_RECORD_ID "bwamem" \
--PROGRAM_GROUP_VERSION "${bwa_version}" \
--PROGRAM_GROUP_COMMAND_LINE "${bwa_commandline}" \
--PROGRAM_GROUP_NAME "bwamem" \
--UNMAPPED_READ_STRATEGY COPY_TO_TAG \
--ALIGNER_PROPER_PAIR_FLAGS true \
--UNMAP_CONTAMINANT_READS true
}
runtime {
preemptible: preemptible_tries
Expand All @@ -407,28 +405,28 @@ task SortAndFixTags {
String mem_size

String docker_image
String picard_path
String gatk_path
String java_opt_sort
String java_opt_fix

command {
set -o pipefail

java -Dsamjdk.compression_level=${compression_level} ${java_opt_sort} -jar ${picard_path}picard.jar \
${gatk_path} --java-options "-Dsamjdk.compression_level=${compression_level} ${java_opt_sort}" \
SortSam \
INPUT=${input_bam} \
OUTPUT=/dev/stdout \
SORT_ORDER="coordinate" \
CREATE_INDEX=false \
CREATE_MD5_FILE=false \
--INPUT ${input_bam} \
--OUTPUT /dev/stdout \
--SORT_ORDER "coordinate" \
--CREATE_INDEX false \
--CREATE_MD5_FILE false \
| \
java -Dsamjdk.compression_level=${compression_level} ${java_opt_fix} -jar ${picard_path}picard.jar \
${gatk_path} --java-options "-Dsamjdk.compression_level=${compression_level} ${java_opt_fix}" \
SetNmAndUqTags \
INPUT=/dev/stdin \
OUTPUT=${output_bam_basename}.bam \
CREATE_INDEX=true \
CREATE_MD5_FILE=true \
REFERENCE_SEQUENCE=${ref_fasta}
--INPUT /dev/stdin \
--OUTPUT ${output_bam_basename}.bam \
--CREATE_INDEX true \
--CREATE_MD5_FILE true \
--REFERENCE_SEQUENCE ${ref_fasta}
}
runtime {
preemptible: preemptible_tries
Expand All @@ -455,22 +453,22 @@ task MarkDuplicates {
String mem_size

String docker_image
String picard_path
String gatk_path
String java_opt

# Task is assuming query-sorted input so that the Secondary and Supplementary reads get marked correctly.
# This works because the output of BWA is query-grouped and therefore, so is the output of MergeBamAlignment.
# While query-grouped isn't actually query-sorted, it's good enough for MarkDuplicates with ASSUME_SORT_ORDER="queryname"
command {
java -Dsamjdk.compression_level=${compression_level} ${java_opt} -jar ${picard_path}picard.jar \
${gatk_path} --java-options "-Dsamjdk.compression_level=${compression_level} ${java_opt}" \
MarkDuplicates \
INPUT=${sep=' INPUT=' input_bams} \
OUTPUT=${output_bam_basename}.bam \
METRICS_FILE=${metrics_filename} \
VALIDATION_STRINGENCY=SILENT \
OPTICAL_DUPLICATE_PIXEL_DISTANCE=2500 \
ASSUME_SORT_ORDER="queryname"
CREATE_MD5_FILE=true
--INPUT ${sep=' --INPUT ' input_bams} \
--OUTPUT ${output_bam_basename}.bam \
--METRICS_FILE ${metrics_filename} \
--VALIDATION_STRINGENCY SILENT \
--OPTICAL_DUPLICATE_PIXEL_DISTANCE 2500 \
--ASSUME_SORT_ORDER "queryname" \
--CREATE_MD5_FILE true
}
runtime {
preemptible: preemptible_tries
Expand Down Expand Up @@ -672,16 +670,16 @@ task GatherBamFiles {
String mem_size
String docker_image
String picard_path
String gatk_path
String java_opt
command {
java -Dsamjdk.compression_level=${compression_level} ${java_opt} -jar ${picard_path}picard.jar \
${gatk_path} --java-options "-Dsamjdk.compression_level=${compression_level} ${java_opt}" \
GatherBamFiles \
INPUT=${sep=' INPUT=' input_bams} \
OUTPUT=${output_bam_basename}.bam \
CREATE_INDEX=true \
CREATE_MD5_FILE=true
--INPUT ${sep=' --INPUT ' input_bams} \
--OUTPUT ${output_bam_basename}.bam \
--CREATE_INDEX true \
--CREATE_MD5_FILE true
}
runtime {
preemptible: preemptible_tries
Expand Down

0 comments on commit 6486075

Please sign in to comment.