Skip to content

Commit

Permalink
Make genome_tracks optional in FilterGenotypes (#593)
Browse files Browse the repository at this point in the history
* Adds explicit parameters to FilterGenotypes
  • Loading branch information
MattWellie authored Sep 5, 2023
1 parent 98ec699 commit 2c97aad
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 10 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -5,32 +5,32 @@
"FilterGenotypes.gq_recalibrator_model_file": {{ reference_resources.aou_recalibrate_gq_model_file | tojson }},
"FilterGenotypes.sl_filter_args": "--small-del-threshold 93 --medium-del-threshold 150 --small-dup-threshold -51 --medium-dup-threshold -4 --ins-threshold -13 --inv-threshold -19",

"FilterGenotypes.RecalibrateGq.genome_tracks": {{ reference_resources.recalibrate_gq_genome_tracks | tojson }},
"FilterGenotypes.RecalibrateGq.recalibrate_gq_args": [
"FilterGenotypes.genome_tracks": {{ reference_resources.recalibrate_gq_genome_tracks | tojson }},
"FilterGenotypes.recalibrate_gq_args": [
"--keep-homvar false",
"--keep-homref true",
"--keep-multiallelic true",
"--skip-genotype-filtering true",
"--min-samples-to-estimate-allele-frequency -1"
],

"FilterGenotypes.MainVcfQc.ped_file": {{ test_batch.ped_file | tojson }},
"FilterGenotypes.MainVcfQc.primary_contigs_fai": {{ reference_resources.primary_contigs_fai | tojson }},
"FilterGenotypes.MainVcfQc.site_level_comparison_datasets": [
"FilterGenotypes.ped_file": {{ test_batch.ped_file | tojson }},
"FilterGenotypes.primary_contigs_fai": {{ reference_resources.primary_contigs_fai | tojson }},
"FilterGenotypes.site_level_comparison_datasets": [
{{ reference_resources.ccdg_abel_site_level_benchmarking_dataset | tojson }},
{{ reference_resources.gnomad_v2_collins_site_level_benchmarking_dataset | tojson }},
{{ reference_resources.hgsv_byrska_bishop_site_level_benchmarking_dataset | tojson }},
{{ reference_resources.thousand_genomes_site_level_benchmarking_dataset | tojson }}
],
"FilterGenotypes.MainVcfQc.sample_level_comparison_datasets": [
"FilterGenotypes.sample_level_comparison_datasets": [
{{ reference_resources.hgsv_byrska_bishop_sample_level_benchmarking_dataset | tojson }}
],
"FilterGenotypes.MainVcfQc.sample_renaming_tsv": {{ reference_resources.hgsv_byrska_bishop_sample_renaming_tsv | tojson }},
"FilterGenotypes.MainVcfQc.runtime_override_per_sample_benchmark_plot": {
"FilterGenotypes.sample_renaming_tsv": {{ reference_resources.hgsv_byrska_bishop_sample_renaming_tsv | tojson }},
"FilterGenotypes.runtime_override_per_sample_benchmark_plot": {
"mem_gb": 30,
"disk_gb": 50
},
"FilterGenotypes.MainVcfQc.runtime_override_plot_qc_per_family": {
"FilterGenotypes.runtime_override_plot_qc_per_family": {
"mem_gb": 15,
"disk_gb": 100
},
Expand Down
18 changes: 18 additions & 0 deletions wdl/FilterGenotypes.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@ workflow FilterGenotypes {
File ploidy_table

File gq_recalibrator_model_file
Array[String] recalibrate_gq_args = []
Array[File] genome_tracks = []
Float no_call_rate_cutoff = 0.05 # Set to 1 to disable NCR filtering
Float fmax_beta = 0.5 # Recommended range [0.3, 0.5] (use lower values for higher specificity)
Expand All @@ -24,10 +26,17 @@ workflow FilterGenotypes {
Int filter_vcf_records_per_shard = 20000

# For MainVcfQc
File primary_contigs_fai
File? ped_file
Array[Array[String]]? site_level_comparison_datasets # Array of two-element arrays, one per dataset, each of format [prefix, gs:// path to directory with one BED per population]
Array[Array[String]]? sample_level_comparison_datasets # Array of two-element arrays, one per dataset, each of format [prefix, gs:// path to per-sample tarballs]
File? sample_renaming_tsv # File with mapping to rename per-sample benchmark sample IDs for compatibility with cohort
Boolean run_qc = true
String qc_bcftools_preprocessing_options = "-e 'FILTER~\"UNRESOLVED\" || FILTER~\"HIGH_NCR\"'"
Int qc_sv_per_shard = 2500
Int qc_samples_per_shard = 600
RuntimeAttr? runtime_override_plot_qc_per_family
RuntimeAttr? runtime_override_per_sample_benchmark_plot
String linux_docker
String gatk_docker
Expand All @@ -43,6 +52,8 @@ workflow FilterGenotypes {
vcf = vcf,
vcf_index = vcf + ".tbi",
gq_recalibrator_model_file=gq_recalibrator_model_file,
recalibrate_gq_args=recalibrate_gq_args,
genome_tracks=genome_tracks,
gatk_docker=gatk_docker,
sv_base_mini_docker=sv_base_mini_docker,
sv_pipeline_docker=sv_pipeline_docker
Expand Down Expand Up @@ -113,8 +124,15 @@ workflow FilterGenotypes {
input:
vcfs=[ConcatVcfs.concat_vcf],
prefix="~{output_prefix_}.filter_genotypes",
ped_file=ped_file,
bcftools_preprocessing_options=qc_bcftools_preprocessing_options,
primary_contigs_fai=primary_contigs_fai,
site_level_comparison_datasets=site_level_comparison_datasets,
sample_level_comparison_datasets=sample_level_comparison_datasets,
sample_renaming_tsv=sample_renaming_tsv,
sv_per_shard=qc_sv_per_shard,
runtime_override_per_sample_benchmark_plot=runtime_override_per_sample_benchmark_plot,
runtime_override_plot_qc_per_family=runtime_override_plot_qc_per_family,
samples_per_shard=qc_samples_per_shard,
sv_pipeline_qc_docker=sv_pipeline_docker,
sv_base_mini_docker=sv_base_mini_docker,
Expand Down
2 changes: 1 addition & 1 deletion wdl/RecalibrateGq.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ workflow RecalibrateGq {
File vcf
File vcf_index
Int? recalibrate_records_per_shard
Array[File] genome_tracks
Array[File] genome_tracks = []
File gq_recalibrator_model_file
Array[String] recalibrate_gq_args = []
String gatk_docker
Expand Down

0 comments on commit 2c97aad

Please sign in to comment.