diff --git a/inputs/templates/test/FilterGenotypes/FilterGenotypes.fixed_cutoffs.json.tmpl b/inputs/templates/test/FilterGenotypes/FilterGenotypes.fixed_cutoffs.json.tmpl index eeaa27037..19bce81e9 100644 --- a/inputs/templates/test/FilterGenotypes/FilterGenotypes.fixed_cutoffs.json.tmpl +++ b/inputs/templates/test/FilterGenotypes/FilterGenotypes.fixed_cutoffs.json.tmpl @@ -5,8 +5,8 @@ "FilterGenotypes.gq_recalibrator_model_file": {{ reference_resources.aou_recalibrate_gq_model_file | tojson }}, "FilterGenotypes.sl_filter_args": "--small-del-threshold 93 --medium-del-threshold 150 --small-dup-threshold -51 --medium-dup-threshold -4 --ins-threshold -13 --inv-threshold -19", - "FilterGenotypes.RecalibrateGq.genome_tracks": {{ reference_resources.recalibrate_gq_genome_tracks | tojson }}, - "FilterGenotypes.RecalibrateGq.recalibrate_gq_args": [ + "FilterGenotypes.genome_tracks": {{ reference_resources.recalibrate_gq_genome_tracks | tojson }}, + "FilterGenotypes.recalibrate_gq_args": [ "--keep-homvar false", "--keep-homref true", "--keep-multiallelic true", @@ -14,23 +14,23 @@ "--min-samples-to-estimate-allele-frequency -1" ], - "FilterGenotypes.MainVcfQc.ped_file": {{ test_batch.ped_file | tojson }}, - "FilterGenotypes.MainVcfQc.primary_contigs_fai": {{ reference_resources.primary_contigs_fai | tojson }}, - "FilterGenotypes.MainVcfQc.site_level_comparison_datasets": [ + "FilterGenotypes.ped_file": {{ test_batch.ped_file | tojson }}, + "FilterGenotypes.primary_contigs_fai": {{ reference_resources.primary_contigs_fai | tojson }}, + "FilterGenotypes.site_level_comparison_datasets": [ {{ reference_resources.ccdg_abel_site_level_benchmarking_dataset | tojson }}, {{ reference_resources.gnomad_v2_collins_site_level_benchmarking_dataset | tojson }}, {{ reference_resources.hgsv_byrska_bishop_site_level_benchmarking_dataset | tojson }}, {{ reference_resources.thousand_genomes_site_level_benchmarking_dataset | tojson }} ], - "FilterGenotypes.MainVcfQc.sample_level_comparison_datasets": [ + "FilterGenotypes.sample_level_comparison_datasets": [ {{ reference_resources.hgsv_byrska_bishop_sample_level_benchmarking_dataset | tojson }} ], - "FilterGenotypes.MainVcfQc.sample_renaming_tsv": {{ reference_resources.hgsv_byrska_bishop_sample_renaming_tsv | tojson }}, - "FilterGenotypes.MainVcfQc.runtime_override_per_sample_benchmark_plot": { + "FilterGenotypes.sample_renaming_tsv": {{ reference_resources.hgsv_byrska_bishop_sample_renaming_tsv | tojson }}, + "FilterGenotypes.runtime_override_per_sample_benchmark_plot": { "mem_gb": 30, "disk_gb": 50 }, - "FilterGenotypes.MainVcfQc.runtime_override_plot_qc_per_family": { + "FilterGenotypes.runtime_override_plot_qc_per_family": { "mem_gb": 15, "disk_gb": 100 }, diff --git a/wdl/FilterGenotypes.wdl b/wdl/FilterGenotypes.wdl index cab24cc78..c8a226bbb 100644 --- a/wdl/FilterGenotypes.wdl +++ b/wdl/FilterGenotypes.wdl @@ -13,6 +13,8 @@ workflow FilterGenotypes { File ploidy_table File gq_recalibrator_model_file + Array[String] recalibrate_gq_args = [] + Array[File] genome_tracks = [] Float no_call_rate_cutoff = 0.05 # Set to 1 to disable NCR filtering Float fmax_beta = 0.5 # Recommended range [0.3, 0.5] (use lower values for higher specificity) @@ -24,10 +26,17 @@ workflow FilterGenotypes { Int filter_vcf_records_per_shard = 20000 # For MainVcfQc + File primary_contigs_fai + File? ped_file + Array[Array[String]]? site_level_comparison_datasets # Array of two-element arrays, one per dataset, each of format [prefix, gs:// path to directory with one BED per population] + Array[Array[String]]? sample_level_comparison_datasets # Array of two-element arrays, one per dataset, each of format [prefix, gs:// path to per-sample tarballs] + File? sample_renaming_tsv # File with mapping to rename per-sample benchmark sample IDs for compatibility with cohort Boolean run_qc = true String qc_bcftools_preprocessing_options = "-e 'FILTER~\"UNRESOLVED\" || FILTER~\"HIGH_NCR\"'" Int qc_sv_per_shard = 2500 Int qc_samples_per_shard = 600 + RuntimeAttr? runtime_override_plot_qc_per_family + RuntimeAttr? runtime_override_per_sample_benchmark_plot String linux_docker String gatk_docker @@ -43,6 +52,8 @@ workflow FilterGenotypes { vcf = vcf, vcf_index = vcf + ".tbi", gq_recalibrator_model_file=gq_recalibrator_model_file, + recalibrate_gq_args=recalibrate_gq_args, + genome_tracks=genome_tracks, gatk_docker=gatk_docker, sv_base_mini_docker=sv_base_mini_docker, sv_pipeline_docker=sv_pipeline_docker @@ -113,8 +124,15 @@ workflow FilterGenotypes { input: vcfs=[ConcatVcfs.concat_vcf], prefix="~{output_prefix_}.filter_genotypes", + ped_file=ped_file, bcftools_preprocessing_options=qc_bcftools_preprocessing_options, + primary_contigs_fai=primary_contigs_fai, + site_level_comparison_datasets=site_level_comparison_datasets, + sample_level_comparison_datasets=sample_level_comparison_datasets, + sample_renaming_tsv=sample_renaming_tsv, sv_per_shard=qc_sv_per_shard, + runtime_override_per_sample_benchmark_plot=runtime_override_per_sample_benchmark_plot, + runtime_override_plot_qc_per_family=runtime_override_plot_qc_per_family, samples_per_shard=qc_samples_per_shard, sv_pipeline_qc_docker=sv_pipeline_docker, sv_base_mini_docker=sv_base_mini_docker, diff --git a/wdl/RecalibrateGq.wdl b/wdl/RecalibrateGq.wdl index f861fd8b0..1cab2a504 100644 --- a/wdl/RecalibrateGq.wdl +++ b/wdl/RecalibrateGq.wdl @@ -9,7 +9,7 @@ workflow RecalibrateGq { File vcf File vcf_index Int? recalibrate_records_per_shard - Array[File] genome_tracks + Array[File] genome_tracks = [] File gq_recalibrator_model_file Array[String] recalibrate_gq_args = [] String gatk_docker