-
Notifications
You must be signed in to change notification settings - Fork 594
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
bring Funcotator changes to M2 NIO WDL
- Loading branch information
1 parent
80ab76b
commit 342c15f
Showing
1 changed file
with
92 additions
and
51 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -56,12 +56,17 @@ | |
## | ||
## Funcotator parameters (see Funcotator help for more details). | ||
## funco_reference_version: "hg19" for hg19 or b37. "hg38" for hg38. Default: "hg19" | ||
## funco_transcript_selection_list: Transcripts (one GENCODE ID per line) to give priority during selection process. | ||
## funco_output_format: "MAF" to produce a MAF file, "VCF" to procude a VCF file. Default: "MAF" | ||
## funco_compress: (Only valid if funco_output_format == "VCF" ) If true, will compress the output of Funcotator. If false, produces an uncompressed output file. Default: false | ||
## funco_use_gnomad_AF: If true, will include gnomAD allele frequency annotations in output by connecting to the internet to query gnomAD (this impacts performance). If false, will not annotate with gnomAD. Default: false | ||
## funco_transcript_selection_mode: How to select transcripts in Funcotator. ALL, CANONICAL, or BEST_EFFECT | ||
## funco_transcript_selection_list: Transcripts (one GENCODE ID per line) to give priority during selection process. | ||
## funco_data_sources_tar_gz: Funcotator datasources tar gz file. Bucket location is recommended when running on the cloud. | ||
## funco_annotation_defaults: Default values for annotations, when values are unspecified. Specified as <ANNOTATION>:<VALUE>. For example: "Center:Broad" | ||
## funco_annotation_overrides: Values for annotations, even when values are unspecified. Specified as <ANNOTATION>:<VALUE>. For example: "Center:Broad" | ||
## funcotator_excluded_fields: Annotations that should not appear in the output (VCF or MAF). Specified as <ANNOTATION>. For example: "ClinVar_ALLELEID" | ||
## funco_filter_funcotations: If true, will only annotate variants that have passed filtering (. or PASS value in the FILTER column). If false, will annotate all variants in the input file. Default: true | ||
## funcotator_extra_args: Any additional arguments to pass to Funcotator. Default: "" | ||
## | ||
## Outputs : | ||
## - One VCF file and its index with primary filtering applied; secondary filtering and functional annotation if requested; a bamout.bam | ||
|
@@ -119,22 +124,29 @@ workflow Mutect2 { | |
File? default_config_file | ||
String? oncotator_extra_args | ||
|
||
# funcotator inputs | ||
# Funcotator inputs | ||
Boolean? run_funcotator | ||
Boolean run_funcotator_or_default = select_first([run_funcotator, false]) | ||
String? funco_reference_version | ||
String? funco_output_format | ||
Boolean? funco_compress | ||
Boolean? funco_use_gnomad_AF | ||
File? funco_data_sources_tar_gz | ||
String? funco_transcript_selection_mode | ||
File? funco_transcript_selection_list | ||
Array[String]? funco_annotation_defaults | ||
Array[String]? funco_annotation_overrides | ||
Array[String]? funcotator_excluded_fields | ||
Boolean? funco_filter_funcotations | ||
String? funcotator_extra_args | ||
|
||
File? gatk_override | ||
Boolean run_funcotator_or_default = select_first([run_funcotator, false]) | ||
String funco_default_output_format = "MAF" | ||
|
||
|
||
# runtime | ||
String gatk_docker | ||
File? gatk_override | ||
String basic_bash_docker = "ubuntu:16.04" | ||
String? oncotator_docker | ||
String oncotator_docker_or_default = select_first([oncotator_docker, "broadinstitute/oncotator:1.9.9.0"]) | ||
|
@@ -438,28 +450,33 @@ workflow Mutect2 { | |
if (run_funcotator_or_default) { | ||
File funcotate_vcf_input = select_first([FilterAlignmentArtifacts.filtered_vcf, FilterByOrientationBias.filtered_vcf, Filter.filtered_vcf]) | ||
File funcotate_vcf_input_index = select_first([FilterAlignmentArtifacts.filtered_vcf_index, FilterByOrientationBias.filtered_vcf_index, Filter.filtered_vcf_index]) | ||
call FuncotateMaf { | ||
call Funcotate { | ||
input: | ||
ref_fasta = ref_fasta, | ||
input_vcf = funcotate_vcf_input, | ||
input_vcf_idx = funcotate_vcf_input_index, | ||
ref_fasta = ref_fasta, | ||
reference_version = select_first([funco_reference_version, "hg19"]), | ||
output_file_base_name = basename(funcotate_vcf_input, ".vcf") + ".annotated", | ||
output_format = if defined(funco_output_format) then "" + funco_output_format else funco_default_output_format, | ||
compress = if defined(funco_compress) then funco_compress else false, | ||
use_gnomad = if defined(funco_use_gnomad_AF) then funco_use_gnomad_AF else false, | ||
data_sources_tar_gz = funco_data_sources_tar_gz, | ||
case_id = M2.tumor_sample[0], | ||
control_id = M2.normal_sample[0], | ||
sequencing_center = sequencing_center, | ||
sequence_source = sequence_source, | ||
transcript_selection_mode = funco_transcript_selection_mode, | ||
transcript_selection_list = funco_transcript_selection_list, | ||
annotation_defaults = funco_annotation_defaults, | ||
annotation_overrides = funco_annotation_overrides, | ||
funcotator_excluded_fields = funcotator_excluded_fields, | ||
filter_funcotations = filter_funcotations_or_default, | ||
extra_args = funcotator_extra_args, | ||
gatk_docker = gatk_docker, | ||
gatk_override = gatk_override, | ||
filter_funcotations = filter_funcotations_or_default, | ||
funcotator_excluded_fields = funcotator_excluded_fields, | ||
sequencing_center = sequencing_center, | ||
sequence_source = sequence_source, | ||
disk_space_gb = ceil(size(funcotate_vcf_input, "GB") * large_input_to_output_multiplier) + funco_tar_size + disk_pad, | ||
preemptible_attempts = preemptible_attempts, | ||
max_retries = max_retries, | ||
extra_args = funcotator_extra_args | ||
disk_space_gb = ceil(size(funcotate_vcf_input, "GB") * large_input_to_output_multiplier) + onco_tar_size + disk_pad | ||
} | ||
} | ||
|
||
|
@@ -469,7 +486,8 @@ workflow Mutect2 { | |
File? contamination_table = CalculateContamination.contamination_table | ||
|
||
File? oncotated_m2_maf = oncotate_m2.oncotated_m2_maf | ||
File? funcotated_maf = FuncotateMaf.funcotated_output | ||
File? funcotated_file = Funcotate.funcotated_output_file | ||
File? funcotated_file_index = Funcotate.funcotated_output_file_index | ||
File? preadapter_detail_metrics = CollectSequencingArtifactMetrics.pre_adapter_metrics | ||
File? bamout = MergeBamOuts.merged_bam_out | ||
File? bamout_index = MergeBamOuts.merged_bam_out_index | ||
|
@@ -1228,40 +1246,52 @@ task SumFloats { | |
} | ||
} | ||
|
||
task FuncotateMaf { | ||
# inputs | ||
task Funcotate { | ||
# ============== | ||
# Inputs | ||
String ref_fasta | ||
String input_vcf | ||
String input_vcf_idx | ||
String reference_version | ||
String output_format = "MAF" | ||
String output_file_base_name | ||
String output_format | ||
Boolean compress | ||
Boolean use_gnomad | ||
# This should be updated when a new version of the data sources is released | ||
# TODO: Make this dynamically chosen in the command. | ||
File? data_sources_tar_gz = "gs://broad-public-datasets/funcotator/funcotator_dataSources.v1.6.20190124s.tar.gz" | ||
String? control_id | ||
String? case_id | ||
String? sequencing_center | ||
String? sequence_source | ||
String case_id | ||
String? control_id | ||
|
||
File? data_sources_tar_gz | ||
String? transcript_selection_mode | ||
File? transcript_selection_list | ||
Array[String]? annotation_defaults | ||
Array[String]? annotation_overrides | ||
Array[String]? funcotator_excluded_fields | ||
Boolean filter_funcotations | ||
Boolean? filter_funcotations | ||
File? interval_list | ||
|
||
String? extra_args | ||
|
||
# ============== | ||
# Process input args: | ||
String output_maf = output_file_base_name + ".maf" | ||
String output_maf_index = output_maf + ".idx" | ||
String output_vcf = output_file_base_name + if compress then ".vcf.gz" else ".vcf" | ||
String output_vcf_index = output_vcf + if compress then ".tbi" else ".idx" | ||
String output_file = if output_format == "MAF" then output_maf else output_vcf | ||
String output_file_index = if output_format == "MAF" then output_maf_index else output_vcf_index | ||
String transcript_selection_arg = if defined(transcript_selection_list) then " --transcript-list " else "" | ||
String annotation_def_arg = if defined(annotation_defaults) then " --annotation-default " else "" | ||
String annotation_over_arg = if defined(annotation_overrides) then " --annotation-override " else "" | ||
String filter_funcotations_args = if (filter_funcotations) then " --remove-filtered-variants " else "" | ||
String filter_funcotations_args = if defined(filter_funcotations) && (filter_funcotations) then " --remove-filtered-variants " else "" | ||
String excluded_fields_args = if defined(funcotator_excluded_fields) then " --exclude-field " else "" | ||
String final_output_filename = basename(input_vcf, ".vcf") + ".maf.annotated" | ||
# ============== | ||
# runtime | ||
String interval_list_arg = if defined(interval_list) then " -L " else "" | ||
String extra_args_arg = select_first([extra_args, ""]) | ||
|
||
# ============== | ||
# Runtime options: | ||
String gatk_docker | ||
File? gatk_override | ||
Int? mem | ||
|
@@ -1272,56 +1302,66 @@ task FuncotateMaf { | |
|
||
Boolean use_ssd = false | ||
|
||
# This should be updated when a new version of the data sources is released | ||
String default_datasources_version = "funcotator_dataSources.v1.4.20180615" | ||
|
||
# You may have to change the following two parameter values depending on the task requirements | ||
Int default_ram_mb = 3000 | ||
# WARNING: In the workflow, you should calculate the disk space as an input to this task (disk_space_gb). | ||
# WARNING: In the workflow, you should calculate the disk space as an input to this task (disk_space_gb). Please see [TODO: Link from Jose] for examples. | ||
Int default_disk_space_gb = 100 | ||
|
||
# Mem is in units of GB but our command and memory runtime values are in MB | ||
Int machine_mem = if defined(mem) then mem *1000 else default_ram_mb | ||
Int command_mem = machine_mem - 1000 | ||
|
||
String dollar = "$" | ||
|
||
command <<< | ||
set -e | ||
export GATK_LOCAL_JAR=${default="/root/gatk.jar" gatk_override} | ||
|
||
DATA_SOURCES_TAR_GZ=${data_sources_tar_gz} | ||
if [[ ! -e $DATA_SOURCES_TAR_GZ ]] ; then | ||
# We have to download the data sources: | ||
echo "Data sources gzip does not exist: $DATA_SOURCES_TAR_GZ" | ||
echo "Downloading default data sources..." | ||
wget ftp://[email protected]/bundle/funcotator/${default_datasources_version}.tar.gz | ||
tar -zxf ${default_datasources_version}.tar.gz | ||
DATA_SOURCES_FOLDER=${default_datasources_version} | ||
else | ||
# Extract the tar.gz: | ||
mkdir datasources_dir | ||
tar zxvf ${data_sources_tar_gz} -C datasources_dir --strip-components 1 | ||
DATA_SOURCES_FOLDER="$PWD/datasources_dir" | ||
# Extract our data sources: | ||
echo "Extracting data sources zip file..." | ||
mkdir datasources_dir | ||
tar zxvf ${data_sources_tar_gz} -C datasources_dir --strip-components 1 | ||
DATA_SOURCES_FOLDER="$PWD/datasources_dir" | ||
|
||
# Handle gnomAD: | ||
if ${use_gnomad} ; then | ||
echo "Enabling gnomAD..." | ||
for potential_gnomad_gz in gnomAD_exome.tar.gz gnomAD_genome.tar.gz ; do | ||
if [[ -f ${dollar}{DATA_SOURCES_FOLDER}/${dollar}{potential_gnomad_gz} ]] ; then | ||
cd ${dollar}{DATA_SOURCES_FOLDER} | ||
tar -zvxf ${dollar}{potential_gnomad_gz} | ||
cd - | ||
else | ||
echo "ERROR: Cannot find gnomAD folder: ${dollar}{potential_gnomad_gz}" 1>&2 | ||
false | ||
fi | ||
done | ||
fi | ||
|
||
# Run Funcotator: | ||
gatk --java-options "-Xmx${command_mem}m" Funcotator \ | ||
--data-sources-path $DATA_SOURCES_FOLDER \ | ||
--ref-version ${reference_version} \ | ||
--output-file-format ${output_format} \ | ||
-R ${ref_fasta} \ | ||
-V ${input_vcf} \ | ||
-O ${final_output_filename} \ | ||
${"-L " + interval_list} \ | ||
-O ${output_file} \ | ||
${interval_list_arg} ${default="" interval_list} \ | ||
--annotation-default normal_barcode:${default="Unknown" control_id} \ | ||
--annotation-default tumor_barcode:${default="Unknown" case_id} \ | ||
--annotation-default Center:${default="Unknown" sequencing_center} \ | ||
--annotation-default source:${default="Unknown" sequence_source} \ | ||
${"--transcript-selection-mode " + transcript_selection_mode} \ | ||
${"--transcript-list " + transcript_selection_list} \ | ||
--annotation-default normal_barcode:${control_id} \ | ||
--annotation-default tumor_barcode:${case_id} \ | ||
--annotation-default Center:${default="Unknown" sequencing_center} \ | ||
--annotation-default source:${default="Unknown" sequence_source} \ | ||
${transcript_selection_arg}${default="" sep=" --transcript-list " transcript_selection_list} \ | ||
${annotation_def_arg}${default="" sep=" --annotation-default " annotation_defaults} \ | ||
${annotation_over_arg}${default="" sep=" --annotation-override " annotation_overrides} \ | ||
${excluded_fields_args}${default="" sep=" --exclude-field " funcotator_excluded_fields} \ | ||
${filter_funcotations_args} \ | ||
${extra_args} | ||
${extra_args_arg} | ||
# Make sure we have a placeholder index for MAF files so this workflow doesn't fail: | ||
if [[ "${output_format}" == "MAF" ]] ; then | ||
touch ${output_maf_index} | ||
fi | ||
>>> | ||
|
||
runtime { | ||
|
@@ -1335,6 +1375,7 @@ task FuncotateMaf { | |
} | ||
|
||
output { | ||
File funcotated_output = "${final_output_filename}" | ||
File funcotated_output_file = "${output_file}" | ||
File funcotated_output_file_index = "${output_file_index}" | ||
} | ||
} |