Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

bring Funcotator changes to M2 NIO WDL #5742

Merged
merged 2 commits into from
Mar 13, 2019
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
142 changes: 91 additions & 51 deletions scripts/mutect2_wdl/mutect2_nio.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -56,12 +56,17 @@
##
## Funcotator parameters (see Funcotator help for more details).
## funco_reference_version: "hg19" for hg19 or b37. "hg38" for hg38. Default: "hg19"
## funco_transcript_selection_list: Transcripts (one GENCODE ID per line) to give priority during selection process.
## funco_output_format: "MAF" to produce a MAF file, "VCF" to procude a VCF file. Default: "MAF"
## funco_compress: (Only valid if funco_output_format == "VCF" ) If true, will compress the output of Funcotator. If false, produces an uncompressed output file. Default: false
## funco_use_gnomad_AF: If true, will include gnomAD allele frequency annotations in output by connecting to the internet to query gnomAD (this impacts performance). If false, will not annotate with gnomAD. Default: false
## funco_transcript_selection_mode: How to select transcripts in Funcotator. ALL, CANONICAL, or BEST_EFFECT
## funco_transcript_selection_list: Transcripts (one GENCODE ID per line) to give priority during selection process.
## funco_data_sources_tar_gz: Funcotator datasources tar gz file. Bucket location is recommended when running on the cloud.
## funco_annotation_defaults: Default values for annotations, when values are unspecified. Specified as <ANNOTATION>:<VALUE>. For example: "Center:Broad"
## funco_annotation_overrides: Values for annotations, even when values are unspecified. Specified as <ANNOTATION>:<VALUE>. For example: "Center:Broad"
## funcotator_excluded_fields: Annotations that should not appear in the output (VCF or MAF). Specified as <ANNOTATION>. For example: "ClinVar_ALLELEID"
## funco_filter_funcotations: If true, will only annotate variants that have passed filtering (. or PASS value in the FILTER column). If false, will annotate all variants in the input file. Default: true
## funcotator_extra_args: Any additional arguments to pass to Funcotator. Default: ""
##
## Outputs :
## - One VCF file and its index with primary filtering applied; secondary filtering and functional annotation if requested; a bamout.bam
Expand Down Expand Up @@ -119,22 +124,28 @@ workflow Mutect2 {
File? default_config_file
String? oncotator_extra_args

# funcotator inputs
# Funcotator inputs
Boolean? run_funcotator
Boolean run_funcotator_or_default = select_first([run_funcotator, false])
String? funco_reference_version
String? funco_output_format
Boolean? funco_compress
Boolean? funco_use_gnomad_AF
File? funco_data_sources_tar_gz
String? funco_transcript_selection_mode
File? funco_transcript_selection_list
Array[String]? funco_annotation_defaults
Array[String]? funco_annotation_overrides
Array[String]? funcotator_excluded_fields
Boolean? funco_filter_funcotations
String? funcotator_extra_args

File? gatk_override
String funco_default_output_format = "MAF"


# runtime
String gatk_docker
File? gatk_override
String basic_bash_docker = "ubuntu:16.04"
String? oncotator_docker
String oncotator_docker_or_default = select_first([oncotator_docker, "broadinstitute/oncotator:1.9.9.0"])
Expand Down Expand Up @@ -446,28 +457,33 @@ workflow Mutect2 {
if (run_funcotator_or_default) {
File funcotate_vcf_input = select_first([FilterAlignmentArtifacts.filtered_vcf, FilterByOrientationBias.filtered_vcf, Filter.filtered_vcf])
File funcotate_vcf_input_index = select_first([FilterAlignmentArtifacts.filtered_vcf_index, FilterByOrientationBias.filtered_vcf_index, Filter.filtered_vcf_index])
call FuncotateMaf {
call Funcotate {
input:
ref_fasta = ref_fasta,
input_vcf = funcotate_vcf_input,
input_vcf_idx = funcotate_vcf_input_index,
ref_fasta = ref_fasta,
reference_version = select_first([funco_reference_version, "hg19"]),
output_file_base_name = basename(funcotate_vcf_input, ".vcf") + ".annotated",
output_format = if defined(funco_output_format) then "" + funco_output_format else funco_default_output_format,
compress = if defined(funco_compress) then funco_compress else false,
use_gnomad = if defined(funco_use_gnomad_AF) then funco_use_gnomad_AF else false,
data_sources_tar_gz = funco_data_sources_tar_gz,
case_id = M2.tumor_sample[0],
control_id = M2.normal_sample[0],
sequencing_center = sequencing_center,
sequence_source = sequence_source,
transcript_selection_mode = funco_transcript_selection_mode,
transcript_selection_list = funco_transcript_selection_list,
annotation_defaults = funco_annotation_defaults,
annotation_overrides = funco_annotation_overrides,
funcotator_excluded_fields = funcotator_excluded_fields,
filter_funcotations = filter_funcotations_or_default,
extra_args = funcotator_extra_args,
gatk_docker = gatk_docker,
gatk_override = gatk_override,
filter_funcotations = filter_funcotations_or_default,
funcotator_excluded_fields = funcotator_excluded_fields,
sequencing_center = sequencing_center,
sequence_source = sequence_source,
disk_space_gb = ceil(size(funcotate_vcf_input, "GB") * large_input_to_output_multiplier) + funco_tar_size + disk_pad,
preemptible_attempts = preemptible_attempts,
max_retries = max_retries,
extra_args = funcotator_extra_args
disk_space_gb = ceil(size(funcotate_vcf_input, "GB") * large_input_to_output_multiplier) + onco_tar_size + disk_pad
}
}

Expand All @@ -479,7 +495,8 @@ workflow Mutect2 {
File? contamination_table = CalculateContamination.contamination_table

File? oncotated_m2_maf = oncotate_m2.oncotated_m2_maf
File? funcotated_maf = FuncotateMaf.funcotated_output
File? funcotated_file = Funcotate.funcotated_output_file
File? funcotated_file_index = Funcotate.funcotated_output_file_index
File? preadapter_detail_metrics = CollectSequencingArtifactMetrics.pre_adapter_metrics
File? bamout = MergeBamOuts.merged_bam_out
File? bamout_index = MergeBamOuts.merged_bam_out_index
Expand Down Expand Up @@ -1289,40 +1306,52 @@ task SumFloats {
}
}

task FuncotateMaf {
# inputs
task Funcotate {
# ==============
# Inputs
String ref_fasta
String input_vcf
String input_vcf_idx
String reference_version
String output_format = "MAF"
String output_file_base_name
String output_format
Boolean compress
Boolean use_gnomad
# This should be updated when a new version of the data sources is released
# TODO: Make this dynamically chosen in the command.
File? data_sources_tar_gz = "gs://broad-public-datasets/funcotator/funcotator_dataSources.v1.6.20190124s.tar.gz"
String? control_id
String? case_id
String? sequencing_center
String? sequence_source
String case_id
String? control_id

File? data_sources_tar_gz
String? transcript_selection_mode
File? transcript_selection_list
Array[String]? annotation_defaults
Array[String]? annotation_overrides
Array[String]? funcotator_excluded_fields
Boolean filter_funcotations
Boolean? filter_funcotations
File? interval_list

String? extra_args

# ==============
# Process input args:
String output_maf = output_file_base_name + ".maf"
String output_maf_index = output_maf + ".idx"
String output_vcf = output_file_base_name + if compress then ".vcf.gz" else ".vcf"
String output_vcf_index = output_vcf + if compress then ".tbi" else ".idx"
String output_file = if output_format == "MAF" then output_maf else output_vcf
String output_file_index = if output_format == "MAF" then output_maf_index else output_vcf_index
String transcript_selection_arg = if defined(transcript_selection_list) then " --transcript-list " else ""
String annotation_def_arg = if defined(annotation_defaults) then " --annotation-default " else ""
String annotation_over_arg = if defined(annotation_overrides) then " --annotation-override " else ""
String filter_funcotations_args = if (filter_funcotations) then " --remove-filtered-variants " else ""
String filter_funcotations_args = if defined(filter_funcotations) && (filter_funcotations) then " --remove-filtered-variants " else ""
String excluded_fields_args = if defined(funcotator_excluded_fields) then " --exclude-field " else ""
String final_output_filename = basename(input_vcf, ".vcf") + ".maf.annotated"
# ==============

# runtime
String interval_list_arg = if defined(interval_list) then " -L " else ""
String extra_args_arg = select_first([extra_args, ""])

# ==============
# Runtime options:
String gatk_docker
File? gatk_override
Int? mem
Expand All @@ -1333,56 +1362,66 @@ task FuncotateMaf {

Boolean use_ssd = false

# This should be updated when a new version of the data sources is released
String default_datasources_version = "funcotator_dataSources.v1.4.20180615"

# You may have to change the following two parameter values depending on the task requirements
Int default_ram_mb = 3000
# WARNING: In the workflow, you should calculate the disk space as an input to this task (disk_space_gb).
# WARNING: In the workflow, you should calculate the disk space as an input to this task (disk_space_gb). Please see [TODO: Link from Jose] for examples.
Int default_disk_space_gb = 100

# Mem is in units of GB but our command and memory runtime values are in MB
Int machine_mem = if defined(mem) then mem *1000 else default_ram_mb
Int command_mem = machine_mem - 1000

String dollar = "$"

command <<<
set -e
export GATK_LOCAL_JAR=${default="/root/gatk.jar" gatk_override}

DATA_SOURCES_TAR_GZ=${data_sources_tar_gz}
if [[ ! -e $DATA_SOURCES_TAR_GZ ]] ; then
# We have to download the data sources:
echo "Data sources gzip does not exist: $DATA_SOURCES_TAR_GZ"
echo "Downloading default data sources..."
wget ftp://[email protected]/bundle/funcotator/${default_datasources_version}.tar.gz
tar -zxf ${default_datasources_version}.tar.gz
DATA_SOURCES_FOLDER=${default_datasources_version}
else
# Extract the tar.gz:
mkdir datasources_dir
tar zxvf ${data_sources_tar_gz} -C datasources_dir --strip-components 1
DATA_SOURCES_FOLDER="$PWD/datasources_dir"
# Extract our data sources:
echo "Extracting data sources zip file..."
mkdir datasources_dir
tar zxvf ${data_sources_tar_gz} -C datasources_dir --strip-components 1
DATA_SOURCES_FOLDER="$PWD/datasources_dir"

# Handle gnomAD:
if ${use_gnomad} ; then
echo "Enabling gnomAD..."
for potential_gnomad_gz in gnomAD_exome.tar.gz gnomAD_genome.tar.gz ; do
if [[ -f ${dollar}{DATA_SOURCES_FOLDER}/${dollar}{potential_gnomad_gz} ]] ; then
cd ${dollar}{DATA_SOURCES_FOLDER}
tar -zvxf ${dollar}{potential_gnomad_gz}
cd -
else
echo "ERROR: Cannot find gnomAD folder: ${dollar}{potential_gnomad_gz}" 1>&2
false
fi
done
fi

# Run Funcotator:
gatk --java-options "-Xmx${command_mem}m" Funcotator \
--data-sources-path $DATA_SOURCES_FOLDER \
--ref-version ${reference_version} \
--output-file-format ${output_format} \
-R ${ref_fasta} \
-V ${input_vcf} \
-O ${final_output_filename} \
${"-L " + interval_list} \
-O ${output_file} \
${interval_list_arg} ${default="" interval_list} \
--annotation-default normal_barcode:${default="Unknown" control_id} \
--annotation-default tumor_barcode:${default="Unknown" case_id} \
--annotation-default Center:${default="Unknown" sequencing_center} \
--annotation-default source:${default="Unknown" sequence_source} \
${"--transcript-selection-mode " + transcript_selection_mode} \
${"--transcript-list " + transcript_selection_list} \
--annotation-default normal_barcode:${control_id} \
--annotation-default tumor_barcode:${case_id} \
--annotation-default Center:${default="Unknown" sequencing_center} \
--annotation-default source:${default="Unknown" sequence_source} \
${transcript_selection_arg}${default="" sep=" --transcript-list " transcript_selection_list} \
${annotation_def_arg}${default="" sep=" --annotation-default " annotation_defaults} \
${annotation_over_arg}${default="" sep=" --annotation-override " annotation_overrides} \
${excluded_fields_args}${default="" sep=" --exclude-field " funcotator_excluded_fields} \
${filter_funcotations_args} \
${extra_args}
${extra_args_arg}
# Make sure we have a placeholder index for MAF files so this workflow doesn't fail:
if [[ "${output_format}" == "MAF" ]] ; then
touch ${output_maf_index}
fi
>>>

runtime {
Expand All @@ -1396,6 +1435,7 @@ task FuncotateMaf {
}

output {
File funcotated_output = "${final_output_filename}"
File funcotated_output_file = "${output_file}"
File funcotated_output_file_index = "${output_file_index}"
}
}