diff --git a/scripts/mutect2_wdl/mutect2_nio.wdl b/scripts/mutect2_wdl/mutect2_nio.wdl index daad7574b45..cf1ec9b6e30 100755 --- a/scripts/mutect2_wdl/mutect2_nio.wdl +++ b/scripts/mutect2_wdl/mutect2_nio.wdl @@ -56,12 +56,17 @@ ## ## Funcotator parameters (see Funcotator help for more details). ## funco_reference_version: "hg19" for hg19 or b37. "hg38" for hg38. Default: "hg19" -## funco_transcript_selection_list: Transcripts (one GENCODE ID per line) to give priority during selection process. +## funco_output_format: "MAF" to produce a MAF file, "VCF" to procude a VCF file. Default: "MAF" +## funco_compress: (Only valid if funco_output_format == "VCF" ) If true, will compress the output of Funcotator. If false, produces an uncompressed output file. Default: false +## funco_use_gnomad_AF: If true, will include gnomAD allele frequency annotations in output by connecting to the internet to query gnomAD (this impacts performance). If false, will not annotate with gnomAD. Default: false ## funco_transcript_selection_mode: How to select transcripts in Funcotator. ALL, CANONICAL, or BEST_EFFECT +## funco_transcript_selection_list: Transcripts (one GENCODE ID per line) to give priority during selection process. ## funco_data_sources_tar_gz: Funcotator datasources tar gz file. Bucket location is recommended when running on the cloud. ## funco_annotation_defaults: Default values for annotations, when values are unspecified. Specified as :. For example: "Center:Broad" ## funco_annotation_overrides: Values for annotations, even when values are unspecified. Specified as :. For example: "Center:Broad" ## funcotator_excluded_fields: Annotations that should not appear in the output (VCF or MAF). Specified as . For example: "ClinVar_ALLELEID" +## funco_filter_funcotations: If true, will only annotate variants that have passed filtering (. or PASS value in the FILTER column). If false, will annotate all variants in the input file. Default: true +## funcotator_extra_args: Any additional arguments to pass to Funcotator. Default: "" ## ## Outputs : ## - One VCF file and its index with primary filtering applied; secondary filtering and functional annotation if requested; a bamout.bam @@ -119,22 +124,28 @@ workflow Mutect2 { File? default_config_file String? oncotator_extra_args - # funcotator inputs + # Funcotator inputs Boolean? run_funcotator Boolean run_funcotator_or_default = select_first([run_funcotator, false]) String? funco_reference_version + String? funco_output_format + Boolean? funco_compress + Boolean? funco_use_gnomad_AF File? funco_data_sources_tar_gz String? funco_transcript_selection_mode File? funco_transcript_selection_list Array[String]? funco_annotation_defaults Array[String]? funco_annotation_overrides Array[String]? funcotator_excluded_fields + Boolean? funco_filter_funcotations String? funcotator_extra_args - File? gatk_override + String funco_default_output_format = "MAF" + # runtime String gatk_docker + File? gatk_override String basic_bash_docker = "ubuntu:16.04" String? oncotator_docker String oncotator_docker_or_default = select_first([oncotator_docker, "broadinstitute/oncotator:1.9.9.0"]) @@ -446,28 +457,33 @@ workflow Mutect2 { if (run_funcotator_or_default) { File funcotate_vcf_input = select_first([FilterAlignmentArtifacts.filtered_vcf, FilterByOrientationBias.filtered_vcf, Filter.filtered_vcf]) File funcotate_vcf_input_index = select_first([FilterAlignmentArtifacts.filtered_vcf_index, FilterByOrientationBias.filtered_vcf_index, Filter.filtered_vcf_index]) - call FuncotateMaf { + call Funcotate { input: + ref_fasta = ref_fasta, input_vcf = funcotate_vcf_input, input_vcf_idx = funcotate_vcf_input_index, - ref_fasta = ref_fasta, reference_version = select_first([funco_reference_version, "hg19"]), + output_file_base_name = basename(funcotate_vcf_input, ".vcf") + ".annotated", + output_format = if defined(funco_output_format) then "" + funco_output_format else funco_default_output_format, + compress = if defined(funco_compress) then funco_compress else false, + use_gnomad = if defined(funco_use_gnomad_AF) then funco_use_gnomad_AF else false, data_sources_tar_gz = funco_data_sources_tar_gz, case_id = M2.tumor_sample[0], control_id = M2.normal_sample[0], + sequencing_center = sequencing_center, + sequence_source = sequence_source, transcript_selection_mode = funco_transcript_selection_mode, transcript_selection_list = funco_transcript_selection_list, annotation_defaults = funco_annotation_defaults, annotation_overrides = funco_annotation_overrides, + funcotator_excluded_fields = funcotator_excluded_fields, + filter_funcotations = filter_funcotations_or_default, + extra_args = funcotator_extra_args, gatk_docker = gatk_docker, gatk_override = gatk_override, - filter_funcotations = filter_funcotations_or_default, - funcotator_excluded_fields = funcotator_excluded_fields, - sequencing_center = sequencing_center, - sequence_source = sequence_source, - disk_space_gb = ceil(size(funcotate_vcf_input, "GB") * large_input_to_output_multiplier) + funco_tar_size + disk_pad, + preemptible_attempts = preemptible_attempts, max_retries = max_retries, - extra_args = funcotator_extra_args + disk_space_gb = ceil(size(funcotate_vcf_input, "GB") * large_input_to_output_multiplier) + onco_tar_size + disk_pad } } @@ -479,7 +495,8 @@ workflow Mutect2 { File? contamination_table = CalculateContamination.contamination_table File? oncotated_m2_maf = oncotate_m2.oncotated_m2_maf - File? funcotated_maf = FuncotateMaf.funcotated_output + File? funcotated_file = Funcotate.funcotated_output_file + File? funcotated_file_index = Funcotate.funcotated_output_file_index File? preadapter_detail_metrics = CollectSequencingArtifactMetrics.pre_adapter_metrics File? bamout = MergeBamOuts.merged_bam_out File? bamout_index = MergeBamOuts.merged_bam_out_index @@ -1289,40 +1306,52 @@ task SumFloats { } } -task FuncotateMaf { - # inputs +task Funcotate { + # ============== + # Inputs String ref_fasta String input_vcf String input_vcf_idx String reference_version - String output_format = "MAF" + String output_file_base_name + String output_format + Boolean compress + Boolean use_gnomad + # This should be updated when a new version of the data sources is released + # TODO: Make this dynamically chosen in the command. + File? data_sources_tar_gz = "gs://broad-public-datasets/funcotator/funcotator_dataSources.v1.6.20190124s.tar.gz" + String? control_id + String? case_id String? sequencing_center String? sequence_source - String case_id - String? control_id - - File? data_sources_tar_gz String? transcript_selection_mode File? transcript_selection_list Array[String]? annotation_defaults Array[String]? annotation_overrides Array[String]? funcotator_excluded_fields - Boolean filter_funcotations + Boolean? filter_funcotations File? interval_list String? extra_args # ============== # Process input args: + String output_maf = output_file_base_name + ".maf" + String output_maf_index = output_maf + ".idx" + String output_vcf = output_file_base_name + if compress then ".vcf.gz" else ".vcf" + String output_vcf_index = output_vcf + if compress then ".tbi" else ".idx" + String output_file = if output_format == "MAF" then output_maf else output_vcf + String output_file_index = if output_format == "MAF" then output_maf_index else output_vcf_index + String transcript_selection_arg = if defined(transcript_selection_list) then " --transcript-list " else "" String annotation_def_arg = if defined(annotation_defaults) then " --annotation-default " else "" String annotation_over_arg = if defined(annotation_overrides) then " --annotation-override " else "" - String filter_funcotations_args = if (filter_funcotations) then " --remove-filtered-variants " else "" + String filter_funcotations_args = if defined(filter_funcotations) && (filter_funcotations) then " --remove-filtered-variants " else "" String excluded_fields_args = if defined(funcotator_excluded_fields) then " --exclude-field " else "" - String final_output_filename = basename(input_vcf, ".vcf") + ".maf.annotated" - # ============== - - # runtime + String interval_list_arg = if defined(interval_list) then " -L " else "" + String extra_args_arg = select_first([extra_args, ""]) + # ============== + # Runtime options: String gatk_docker File? gatk_override Int? mem @@ -1333,56 +1362,66 @@ task FuncotateMaf { Boolean use_ssd = false - # This should be updated when a new version of the data sources is released - String default_datasources_version = "funcotator_dataSources.v1.4.20180615" - # You may have to change the following two parameter values depending on the task requirements Int default_ram_mb = 3000 - # WARNING: In the workflow, you should calculate the disk space as an input to this task (disk_space_gb). + # WARNING: In the workflow, you should calculate the disk space as an input to this task (disk_space_gb). Please see [TODO: Link from Jose] for examples. Int default_disk_space_gb = 100 # Mem is in units of GB but our command and memory runtime values are in MB Int machine_mem = if defined(mem) then mem *1000 else default_ram_mb Int command_mem = machine_mem - 1000 + String dollar = "$" + command <<< set -e export GATK_LOCAL_JAR=${default="/root/gatk.jar" gatk_override} - DATA_SOURCES_TAR_GZ=${data_sources_tar_gz} - if [[ ! -e $DATA_SOURCES_TAR_GZ ]] ; then - # We have to download the data sources: - echo "Data sources gzip does not exist: $DATA_SOURCES_TAR_GZ" - echo "Downloading default data sources..." - wget ftp://gsapubftp-anonymous@ftp.broadinstitute.org/bundle/funcotator/${default_datasources_version}.tar.gz - tar -zxf ${default_datasources_version}.tar.gz - DATA_SOURCES_FOLDER=${default_datasources_version} - else - # Extract the tar.gz: - mkdir datasources_dir - tar zxvf ${data_sources_tar_gz} -C datasources_dir --strip-components 1 - DATA_SOURCES_FOLDER="$PWD/datasources_dir" + # Extract our data sources: + echo "Extracting data sources zip file..." + mkdir datasources_dir + tar zxvf ${data_sources_tar_gz} -C datasources_dir --strip-components 1 + DATA_SOURCES_FOLDER="$PWD/datasources_dir" + + # Handle gnomAD: + if ${use_gnomad} ; then + echo "Enabling gnomAD..." + for potential_gnomad_gz in gnomAD_exome.tar.gz gnomAD_genome.tar.gz ; do + if [[ -f ${dollar}{DATA_SOURCES_FOLDER}/${dollar}{potential_gnomad_gz} ]] ; then + cd ${dollar}{DATA_SOURCES_FOLDER} + tar -zvxf ${dollar}{potential_gnomad_gz} + cd - + else + echo "ERROR: Cannot find gnomAD folder: ${dollar}{potential_gnomad_gz}" 1>&2 + false + fi + done fi + # Run Funcotator: gatk --java-options "-Xmx${command_mem}m" Funcotator \ --data-sources-path $DATA_SOURCES_FOLDER \ --ref-version ${reference_version} \ --output-file-format ${output_format} \ -R ${ref_fasta} \ -V ${input_vcf} \ - -O ${final_output_filename} \ - ${"-L " + interval_list} \ + -O ${output_file} \ + ${interval_list_arg} ${default="" interval_list} \ + --annotation-default normal_barcode:${default="Unknown" control_id} \ + --annotation-default tumor_barcode:${default="Unknown" case_id} \ + --annotation-default Center:${default="Unknown" sequencing_center} \ + --annotation-default source:${default="Unknown" sequence_source} \ ${"--transcript-selection-mode " + transcript_selection_mode} \ - ${"--transcript-list " + transcript_selection_list} \ - --annotation-default normal_barcode:${control_id} \ - --annotation-default tumor_barcode:${case_id} \ - --annotation-default Center:${default="Unknown" sequencing_center} \ - --annotation-default source:${default="Unknown" sequence_source} \ + ${transcript_selection_arg}${default="" sep=" --transcript-list " transcript_selection_list} \ ${annotation_def_arg}${default="" sep=" --annotation-default " annotation_defaults} \ ${annotation_over_arg}${default="" sep=" --annotation-override " annotation_overrides} \ ${excluded_fields_args}${default="" sep=" --exclude-field " funcotator_excluded_fields} \ ${filter_funcotations_args} \ - ${extra_args} + ${extra_args_arg} + # Make sure we have a placeholder index for MAF files so this workflow doesn't fail: + if [[ "${output_format}" == "MAF" ]] ; then + touch ${output_maf_index} + fi >>> runtime { @@ -1396,6 +1435,7 @@ task FuncotateMaf { } output { - File funcotated_output = "${final_output_filename}" + File funcotated_output_file = "${output_file}" + File funcotated_output_file_index = "${output_file_index}" } } \ No newline at end of file