From 517a88255e79af339fb625dd472f075ec6521d9e Mon Sep 17 00:00:00 2001
From: Mark Walker <markw@broadinstitute.org>
Date: Mon, 19 Jun 2023 11:12:38 -0400
Subject: [PATCH] SVConcordance workflows update (#540)

---
 .../CleanVcf.SingleBatch.json.tmpl            |   2 +-
 .../CleanVcf.json.tmpl                        |   2 +-
 ...otypeComplexVariants.SingleBatch.json.tmpl |   2 +-
 .../GenotypeComplexVariants.json.tmpl         |   2 +-
 .../FormatVcfForGatk.json.tmpl                |  12 +
 .../test/JoinRawCalls/JoinRawCalls.json.tmpl  |  13 +-
 .../test/MakeCohortVcf/CleanVcf.json.tmpl     |   2 +-
 .../GenotypeComplexVariants.json.tmpl         |   2 +-
 .../SVConcordance/SVConcordance.json.tmpl     |  16 +-
 inputs/templates/test/Vapor/Vapor.json.tmpl   |  24 +-
 inputs/values/dockers.json                    |   5 +-
 inputs/values/dockers_azure.json              |   3 +-
 inputs/values/hgdp.json                       | 147 ++++++++
 inputs/values/ref_panel_1kg.json              |   6 +-
 inputs/values/resources_hg38.json             |   1 +
 scripts/inputs/build_default_inputs.sh        |   4 +
 scripts/inputs/build_inputs.py                |   8 +-
 scripts/test/validate.sh                      |   2 +-
 .../scripts/format_gatk_vcf_for_svtk.py       |  43 ++-
 .../scripts/format_svtk_vcf_for_gatk.py       | 326 +++++-------------
 wdl/CleanVcf.wdl                              |  23 +-
 wdl/CleanVcfChromosome.wdl                    |  24 +-
 wdl/ClusterBatch.wdl                          |   1 +
 wdl/FormatVcfForGatk.wdl                      | 145 ++++++++
 wdl/GATKSVPipelineSingleSample.wdl            |   4 +-
 wdl/GenotypeComplexVariants.wdl               |   6 +-
 wdl/GenotypeCpxCnvs.wdl                       |   4 +-
 wdl/JoinRawCalls.wdl                          |  70 +++-
 wdl/MakeCohortVcf.wdl                         |   8 +-
 wdl/PESRClustering.wdl                        |   9 +-
 wdl/SVConcordance.wdl                         | 183 +---------
 wdl/ScatterCpxGenotyping.wdl                  |   4 +-
 wdl/TasksClusterBatch.wdl                     |  21 +-
 wdl/TasksMakeCohortVcf.wdl                    |  83 -----
 34 files changed, 581 insertions(+), 626 deletions(-)
 create mode 100644 inputs/templates/test/FormatVcfForGatk/FormatVcfForGatk.json.tmpl
 create mode 100644 inputs/values/hgdp.json
 create mode 100644 wdl/FormatVcfForGatk.wdl

diff --git a/inputs/templates/terra_workspaces/cohort_mode/workflow_configurations/CleanVcf.SingleBatch.json.tmpl b/inputs/templates/terra_workspaces/cohort_mode/workflow_configurations/CleanVcf.SingleBatch.json.tmpl
index 4de4c554d..dc37d432f 100644
--- a/inputs/templates/terra_workspaces/cohort_mode/workflow_configurations/CleanVcf.SingleBatch.json.tmpl
+++ b/inputs/templates/terra_workspaces/cohort_mode/workflow_configurations/CleanVcf.SingleBatch.json.tmpl
@@ -20,7 +20,7 @@
   "CleanVcf.sv_base_mini_docker": "${workspace.sv_base_mini_docker}",
 
   "CleanVcf.cohort_name": "${this.sample_set_id}",
-  "CleanVcf.merged_ped_file": "${workspace.cohort_ped_file}",
+  "CleanVcf.ped_file": "${workspace.cohort_ped_file}",
   "CleanVcf.complex_genotype_vcfs": "${this.complex_genotype_vcfs}",
   "CleanVcf.complex_resolve_bothside_pass_lists": "${this.complex_resolve_bothside_pass_lists}",
   "CleanVcf.complex_resolve_background_fail_lists": "${this.complex_resolve_background_fail_lists}"
diff --git a/inputs/templates/terra_workspaces/cohort_mode/workflow_configurations/CleanVcf.json.tmpl b/inputs/templates/terra_workspaces/cohort_mode/workflow_configurations/CleanVcf.json.tmpl
index a71e75b17..d2c5d265e 100644
--- a/inputs/templates/terra_workspaces/cohort_mode/workflow_configurations/CleanVcf.json.tmpl
+++ b/inputs/templates/terra_workspaces/cohort_mode/workflow_configurations/CleanVcf.json.tmpl
@@ -20,7 +20,7 @@
   "CleanVcf.sv_base_mini_docker": "${workspace.sv_base_mini_docker}",
 
   "CleanVcf.cohort_name": "${this.sample_set_set_id}",
-  "CleanVcf.merged_ped_file": "${workspace.cohort_ped_file}",
+  "CleanVcf.ped_file": "${workspace.cohort_ped_file}",
   "CleanVcf.complex_genotype_vcfs": "${this.complex_genotype_vcfs}",
   "CleanVcf.complex_resolve_bothside_pass_lists": "${this.complex_resolve_bothside_pass_lists}",
   "CleanVcf.complex_resolve_background_fail_lists": "${this.complex_resolve_background_fail_lists}"
diff --git a/inputs/templates/terra_workspaces/cohort_mode/workflow_configurations/GenotypeComplexVariants.SingleBatch.json.tmpl b/inputs/templates/terra_workspaces/cohort_mode/workflow_configurations/GenotypeComplexVariants.SingleBatch.json.tmpl
index 2120cff6c..53f0c6d3d 100644
--- a/inputs/templates/terra_workspaces/cohort_mode/workflow_configurations/GenotypeComplexVariants.SingleBatch.json.tmpl
+++ b/inputs/templates/terra_workspaces/cohort_mode/workflow_configurations/GenotypeComplexVariants.SingleBatch.json.tmpl
@@ -15,7 +15,7 @@
   "GenotypeComplexVariants.depth_vcfs": "${this.regenotyped_depth_vcfs}",
   "GenotypeComplexVariants.complex_resolve_vcfs": "${this.complex_resolve_vcfs}",
   "GenotypeComplexVariants.complex_resolve_vcf_indexes": "${this.complex_resolve_vcf_indexes}",
-  "GenotypeComplexVariants.merged_ped_file": "${workspace.cohort_ped_file}",
+  "GenotypeComplexVariants.ped_file": "${workspace.cohort_ped_file}",
   "GenotypeComplexVariants.bincov_files": "${this.merged_bincov}",
   "GenotypeComplexVariants.depth_gt_rd_sep_files": "${this.trained_genotype_depth_depth_sepcutoff}",
   "GenotypeComplexVariants.median_coverage_files": "${this.median_cov}"
diff --git a/inputs/templates/terra_workspaces/cohort_mode/workflow_configurations/GenotypeComplexVariants.json.tmpl b/inputs/templates/terra_workspaces/cohort_mode/workflow_configurations/GenotypeComplexVariants.json.tmpl
index 91dec1548..0b8ca397a 100644
--- a/inputs/templates/terra_workspaces/cohort_mode/workflow_configurations/GenotypeComplexVariants.json.tmpl
+++ b/inputs/templates/terra_workspaces/cohort_mode/workflow_configurations/GenotypeComplexVariants.json.tmpl
@@ -15,7 +15,7 @@
   "GenotypeComplexVariants.depth_vcfs": "${this.regenotyped_depth_vcfs}",
   "GenotypeComplexVariants.complex_resolve_vcfs": "${this.complex_resolve_vcfs}",
   "GenotypeComplexVariants.complex_resolve_vcf_indexes": "${this.complex_resolve_vcf_indexes}",
-  "GenotypeComplexVariants.merged_ped_file": "${workspace.cohort_ped_file}",
+  "GenotypeComplexVariants.ped_file": "${workspace.cohort_ped_file}",
   "GenotypeComplexVariants.bincov_files": "${this.sample_sets.merged_bincov}",
   "GenotypeComplexVariants.depth_gt_rd_sep_files": "${this.sample_sets.trained_genotype_depth_depth_sepcutoff}",
   "GenotypeComplexVariants.median_coverage_files": "${this.sample_sets.median_cov}"
diff --git a/inputs/templates/test/FormatVcfForGatk/FormatVcfForGatk.json.tmpl b/inputs/templates/test/FormatVcfForGatk/FormatVcfForGatk.json.tmpl
new file mode 100644
index 000000000..e81f46244
--- /dev/null
+++ b/inputs/templates/test/FormatVcfForGatk/FormatVcfForGatk.json.tmpl
@@ -0,0 +1,12 @@
+{
+  "FormatVcfForGatk.vcf": {{ test_batch.clean_vcf | tojson }},
+  "FormatVcfForGatk.prefix": {{ test_batch.name | tojson }},
+  "FormatVcfForGatk.ped_file": {{ test_batch.ped_file | tojson }},
+  "FormatVcfForGatk.formatter_args": {{ test_batch.clean_vcf_gatk_formatter_args | tojson }},
+  "FormatVcfForGatk.contig_list": {{ reference_resources.primary_contigs_list | tojson }},
+  "FormatVcfForGatk.chr_x": {{ reference_resources.chr_x | tojson }},
+  "FormatVcfForGatk.chr_y": {{ reference_resources.chr_y | tojson }},
+  "FormatVcfForGatk.contigs_header": {{ reference_resources.contigs_header | tojson }},
+  "FormatVcfForGatk.sv_base_mini_docker": {{ dockers.sv_base_mini_docker | tojson }},
+  "FormatVcfForGatk.sv_pipeline_docker": {{ dockers.sv_pipeline_docker | tojson }}
+}
diff --git a/inputs/templates/test/JoinRawCalls/JoinRawCalls.json.tmpl b/inputs/templates/test/JoinRawCalls/JoinRawCalls.json.tmpl
index 332634643..732d238ef 100644
--- a/inputs/templates/test/JoinRawCalls/JoinRawCalls.json.tmpl
+++ b/inputs/templates/test/JoinRawCalls/JoinRawCalls.json.tmpl
@@ -1,19 +1,26 @@
 {
-  "JoinRawCalls.gatk_docker":{{ dockers.gatk_docker | tojson }},
+  "JoinRawCalls.gatk_docker": {{ dockers.gatk_docker | tojson }},
   "JoinRawCalls.sv_base_mini_docker": {{ dockers.sv_base_mini_docker | tojson }},
   "JoinRawCalls.sv_pipeline_docker": {{ dockers.sv_pipeline_docker | tojson }},
 
   "JoinRawCalls.clustered_depth_vcfs" : [{{ test_batch.merged_depth_vcf | tojson }}],
+  "JoinRawCalls.clustered_depth_vcf_indexes" : [{{ test_batch.merged_depth_vcf_index | tojson }}],
+
   "JoinRawCalls.clustered_manta_vcfs" : [{{ test_batch.merged_manta_vcf | tojson }}],
+  "JoinRawCalls.clustered_manta_vcf_indexes" : [{{ test_batch.merged_manta_vcf_index | tojson }}],
+
   "JoinRawCalls.clustered_wham_vcfs" : [{{ test_batch.merged_wham_vcf | tojson }}],
+  "JoinRawCalls.clustered_wham_vcf_indexes" : [{{ test_batch.merged_wham_vcf_index | tojson }}],
+
   "JoinRawCalls.clustered_melt_vcfs" : [{{ test_batch.merged_melt_vcf | tojson }}],
+  "JoinRawCalls.clustered_melt_vcf_indexes" : [{{ test_batch.merged_melt_vcf_index | tojson }}],
 
-  "JoinRawCalls.ploidy_table": {{ test_batch.ploidy_table | tojson }},
+  "JoinRawCalls.ped_file": {{ test_batch.ped_file | tojson }},
 
   "JoinRawCalls.contig_list": {{ reference_resources.primary_contigs_list | tojson }},
   "JoinRawCalls.reference_fasta": {{ reference_resources.reference_fasta | tojson }},
   "JoinRawCalls.reference_fasta_fai": {{ reference_resources.reference_index | tojson }},
   "JoinRawCalls.reference_dict": {{ reference_resources.reference_dict | tojson }},
 
-  "JoinRawCalls.cohort": {{ test_batch.name | tojson }}
+  "JoinRawCalls.prefix": {{ test_batch.name | tojson }}
 }
diff --git a/inputs/templates/test/MakeCohortVcf/CleanVcf.json.tmpl b/inputs/templates/test/MakeCohortVcf/CleanVcf.json.tmpl
index 2b2f36962..f79f52fc6 100644
--- a/inputs/templates/test/MakeCohortVcf/CleanVcf.json.tmpl
+++ b/inputs/templates/test/MakeCohortVcf/CleanVcf.json.tmpl
@@ -20,7 +20,7 @@
   "CleanVcf.sv_base_mini_docker":{{ dockers.sv_base_mini_docker | tojson }},
 
   "CleanVcf.cohort_name": {{ test_batch.name | tojson }},
-  "CleanVcf.merged_ped_file": {{ test_batch.ped_file | tojson }},
+  "CleanVcf.ped_file": {{ test_batch.ped_file | tojson }},
   "CleanVcf.complex_genotype_vcfs": {{ test_batch.complex_genotype_vcfs | tojson }},
   "CleanVcf.complex_resolve_bothside_pass_lists": {{ test_batch.complex_resolve_bothside_pass_lists | tojson }},
   "CleanVcf.complex_resolve_background_fail_lists": {{ test_batch.complex_resolve_background_fail_lists | tojson }}
diff --git a/inputs/templates/test/MakeCohortVcf/GenotypeComplexVariants.json.tmpl b/inputs/templates/test/MakeCohortVcf/GenotypeComplexVariants.json.tmpl
index e4df9a9df..16eaeceb3 100644
--- a/inputs/templates/test/MakeCohortVcf/GenotypeComplexVariants.json.tmpl
+++ b/inputs/templates/test/MakeCohortVcf/GenotypeComplexVariants.json.tmpl
@@ -19,7 +19,7 @@
   ],
   "GenotypeComplexVariants.complex_resolve_vcfs": {{ test_batch.complex_resolve_vcfs | tojson }},
   "GenotypeComplexVariants.complex_resolve_vcf_indexes": {{ test_batch.complex_resolve_vcf_indexes | tojson }},
-  "GenotypeComplexVariants.merged_ped_file": {{ test_batch.ped_file | tojson }},
+  "GenotypeComplexVariants.ped_file": {{ test_batch.ped_file | tojson }},
   "GenotypeComplexVariants.bincov_files": [
     {{ test_batch.merged_coverage_file | tojson }}
   ],
diff --git a/inputs/templates/test/SVConcordance/SVConcordance.json.tmpl b/inputs/templates/test/SVConcordance/SVConcordance.json.tmpl
index 07e98a4c5..0540305cd 100644
--- a/inputs/templates/test/SVConcordance/SVConcordance.json.tmpl
+++ b/inputs/templates/test/SVConcordance/SVConcordance.json.tmpl
@@ -1,21 +1,11 @@
 {
-  "SVConcordance.gatk_docker":{{ dockers.gatk_docker_concordance | tojson }},
+  "SVConcordance.gatk_docker": {{ dockers.gatk_docker | tojson }},
   "SVConcordance.sv_base_mini_docker": {{ dockers.sv_base_mini_docker | tojson }},
-  "SVConcordance.sv_pipeline_docker": {{ dockers.sv_pipeline_docker | tojson }},
-  "SVConcordance.sv_utils_docker": {{ dockers.sv_utils_docker | tojson }},
 
-  "SVConcordance.eval_vcf" : {{ test_batch.clean_vcf | tojson }},
+  "SVConcordance.eval_vcf" : {{ test_batch.gatk_formatted_vcf | tojson }},
   "SVConcordance.truth_vcf" : {{ test_batch.joined_raw_calls_vcf | tojson }},
 
-  "SVConcordance.ploidy_table": {{ test_batch.ploidy_table | tojson }},
-  "SVConcordance.cohort": {{ test_batch.name | tojson }},
-
-  "SVConcordance.run_svutils_truth_vcf": "false",
-  "SVConcordance.run_formatter_truth_vcf": "false",
-
-  "SVConcordance.run_svutils_eval_vcf": "true",
-  "SVConcordance.run_formatter_eval_vcf": "true",
-  "SVConcordance.formatter_eval_args": "--only-add-cn-fields --replace-ev-format --filter-unsupported-types",
+  "SVConcordance.output_prefix": {{ test_batch.name | tojson }},
 
   "SVConcordance.contig_list": {{ reference_resources.primary_contigs_list | tojson }},
   "SVConcordance.reference_dict": {{ reference_resources.reference_dict | tojson }}
diff --git a/inputs/templates/test/Vapor/Vapor.json.tmpl b/inputs/templates/test/Vapor/Vapor.json.tmpl
index 582e1e9e7..0c7adf9c7 100644
--- a/inputs/templates/test/Vapor/Vapor.json.tmpl
+++ b/inputs/templates/test/Vapor/Vapor.json.tmpl
@@ -1,15 +1,15 @@
 {
-    "VaporBatch.sv_base_mini_docker" : {{ dockers.sv_base_mini_docker | tojson }},
-    "VaporBatch.sv_pipeline_docker" : {{ dockers.sv_pipeline_docker | tojson }},
-    "VaporBatch.vapor_docker": {{ dockers.vapor_docker | tojson }},
-    "VaporBatch.contigs": {{ reference_resources.primary_contigs_list | tojson }},
-    "VaporBatch.ref_fasta" : {{ reference_resources.reference_fasta | tojson }},
-    "VaporBatch.ref_fai" : {{ reference_resources.reference_index | tojson }},
-    "VaporBatch.ref_dict": {{ reference_resources.reference_dict | tojson }},
+    "Vapor.sv_base_mini_docker" : {{ dockers.sv_base_mini_docker | tojson }},
+    "Vapor.sv_pipeline_docker" : {{ dockers.sv_pipeline_docker | tojson }},
+    "Vapor.vapor_docker": {{ dockers.vapor_docker | tojson }},
+    "Vapor.contigs": {{ reference_resources.primary_contigs_list | tojson }},
+    "Vapor.ref_fasta" : {{ reference_resources.reference_fasta | tojson }},
+    "Vapor.ref_fai" : {{ reference_resources.reference_index | tojson }},
+    "Vapor.ref_dict": {{ reference_resources.reference_dict | tojson }},
 
-    "VaporBatch.prefix": {{ test_batch.example_pacbio_sample_id | tojson }},
-    "VaporBatch.sample_id": {{ test_batch.example_pacbio_sample_id | tojson }},
-    "VaporBatch.bam_or_cram_file": {{ test_batch.example_pacbio_cram | tojson }},
-    "VaporBatch.bam_or_cram_index": {{ test_batch.example_pacbio_cram_index | tojson }},
-    "VaporBatch.bed_file": {{ test_batch.clean_bed | tojson }}
+    "Vapor.prefix": {{ test_batch.example_pacbio_sample_id | tojson }},
+    "Vapor.sample_id": {{ test_batch.example_pacbio_sample_id | tojson }},
+    "Vapor.bam_or_cram_file": {{ test_batch.example_pacbio_cram | tojson }},
+    "Vapor.bam_or_cram_index": {{ test_batch.example_pacbio_cram_index | tojson }},
+    "Vapor.bed_file": {{ test_batch.clean_bed | tojson }}
 }
diff --git a/inputs/values/dockers.json b/inputs/values/dockers.json
index 4c3c2d5bf..4cffa8b4d 100644
--- a/inputs/values/dockers.json
+++ b/inputs/values/dockers.json
@@ -2,9 +2,8 @@
   "name": "dockers",
   "cnmops_docker": "us.gcr.io/broad-dsde-methods/gatk-sv/cnmops:2023-02-01-v0.26.8-beta-9b25c72d",
   "condense_counts_docker": "us.gcr.io/broad-dsde-methods/tsharpe/gatk:4.2.6.1-57-g9e03432",
-  "gatk_docker": "us.gcr.io/broad-dsde-methods/tsharpe/gatk:4.2.6.1-57-g9e03432",
+  "gatk_docker": "us.gcr.io/broad-dsde-methods/markw/gatk:2023-05-16-4.4.0.0-17-g18edcd3e6-NIGHTLY-SNAPSHOT",
   "gatk_docker_pesr_override": "us.gcr.io/broad-dsde-methods/tsharpe/gatk:4.2.6.1-57-g9e03432",
-  "gatk_docker_concordance": "us.gcr.io/broad-dsde-methods/markw/gatk:mw-sv-concordance-937c81",
   "genomes_in_the_cloud_docker": "us.gcr.io/broad-gotc-prod/genomes-in-the-cloud:2.3.2-1510681135",
   "linux_docker": "marketplace.gcr.io/google/ubuntu1804",
   "manta_docker": "us.gcr.io/broad-dsde-methods/vjalili/manta:5994670",
@@ -33,4 +32,4 @@
   "sv_utils_docker": "us.gcr.io/broad-dsde-methods/gatk-sv/sv-utils:2023-03-16-v0.27-beta-906c6272",
   "gq_recalibrator_docker": "us.gcr.io/broad-dsde-methods/tbrookin/gatk:0a7e1d86f",
   "str": "us.gcr.io/broad-dsde-methods/gatk-sv/str:2023-05-23-v0.27.3-beta-e537bdd6"
-}
\ No newline at end of file
+}
diff --git a/inputs/values/dockers_azure.json b/inputs/values/dockers_azure.json
index c324d341d..4b7aed7a8 100644
--- a/inputs/values/dockers_azure.json
+++ b/inputs/values/dockers_azure.json
@@ -2,9 +2,8 @@
   "name": "dockers",
   "cnmops_docker": "vahid.azurecr.io/gatk-sv/cnmops:2023-02-01-v0.26.8-beta-9b25c72d",
   "condense_counts_docker": "vahid.azurecr.io/tsharpe/gatk:4.2.6.1-57-g9e03432",
-  "gatk_docker": "vahid.azurecr.io/tsharpe/gatk:4.2.6.1-57-g9e03432",
+  "gatk_docker": "vahid.azurecr.io/markw/gatk:2023-05-16-4.4.0.0-17-g18edcd3e6-NIGHTLY-SNAPSHOT",
   "gatk_docker_pesr_override": "vahid.azurecr.io/tsharpe/gatk:4.2.6.1-57-g9e03432",
-  "gatk_docker_concordance": "vahid.azurecr.io/markw/gatk:mw-sv-concordance-937c81",
   "genomes_in_the_cloud_docker": "vahid.azurecr.io/genomes-in-the-cloud:2.3.2-1510681135",
   "linux_docker": "vahid.azurecr.io/google/ubuntu1804",
   "manta_docker": "vahid.azurecr.io/vjalili/manta:5994670",
diff --git a/inputs/values/hgdp.json b/inputs/values/hgdp.json
new file mode 100644
index 000000000..5eae7726e
--- /dev/null
+++ b/inputs/values/hgdp.json
@@ -0,0 +1,147 @@
+{
+  "name": "hgdp",
+  "ped_file": "gs://gatk-sv-hgdp/mw-sv-concordance-update/HGDP_1KGP.ped",
+  "ploidy_table": "gs://gatk-sv-hgdp/mw-sv-concordance-update/hgdp.ploidy_table.tsv",
+
+  "del_bed": "gs://gatk-sv-hgdp/mw-sv-concordance-update/hgdp.DEL.bed.gz",
+  "dup_bed": "gs://gatk-sv-hgdp/mw-sv-concordance-update/hgdp.DUP.bed.gz",
+  "std_wham_vcf_tar": "gs://gatk-sv-hgdp/mw-sv-concordance-update/hgdp.wham.std_vcfs.37_missing.tar.gz",
+  "std_manta_vcf_tar": "gs://gatk-sv-hgdp/mw-sv-concordance-update/hgdp.manta.std_vcfs.37_missing.tar.gz",
+  "std_melt_vcf_tar": "gs://gatk-sv-hgdp/mw-sv-concordance-update/hgdp.melt.std_vcfs.37_missing.tar.gz",
+
+  "merged_depth_vcf": "gs://gatk-sv-hgdp/mw-sv-concordance-update/hgdp.cluster_batch.depth.vcf.gz",
+  "merged_depth_vcf_index": "gs://gatk-sv-hgdp/mw-sv-concordance-update/hgdp.cluster_batch.depth.vcf.gz.tbi",
+  "merged_manta_vcf": "gs://gatk-sv-hgdp/mw-sv-concordance-update/hgdp.cluster_batch.manta.vcf.gz",
+  "merged_manta_vcf_index": "gs://gatk-sv-hgdp/mw-sv-concordance-update/hgdp.cluster_batch.manta.vcf.gz.tbi",
+  "merged_melt_vcf": "gs://gatk-sv-hgdp/mw-sv-concordance-update/hgdp.cluster_batch.melt.vcf.gz",
+  "merged_melt_vcf_index": "gs://gatk-sv-hgdp/mw-sv-concordance-update/hgdp.cluster_batch.melt.vcf.gz.tbi",
+  "merged_wham_vcf": "gs://gatk-sv-hgdp/mw-sv-concordance-update/hgdp.cluster_batch.wham.vcf.gz",
+  "merged_wham_vcf_index": "gs://gatk-sv-hgdp/mw-sv-concordance-update/hgdp.cluster_batch.wham.vcf.gz.tbi",
+
+  "clean_vcf": "gs://gatk-sv-hgdp/mw-sv-concordance-update/hgdp_and_hgsv.cleaned.vcf.gz",
+  "clean_vcf_index": "gs://gatk-sv-hgdp/mw-sv-concordance-update/hgdp_and_hgsv.cleaned.vcf.gz.tbi",
+  "clean_vcf_qc": "gs://gatk-sv-hgdp/mw-sv-concordance-update/hgdp_SV_VCF_QC_output.tar.gz",
+  "clean_bed": "gs://gatk-sv-hgdp/mw-sv-concordance-update/hgdp.vcf2bed.bed.gz",
+
+  "clean_vcf_gatk_formatter_args": "--scale-down-gq",
+  "gatk_formatted_vcf": "gs://gatk-sv-hgdp/mw-sv-concordance-update/hgdp.gatk_formatted.vcf.gz",
+  "gatk_formatted_vcf_index": "gs://gatk-sv-hgdp/mw-sv-concordance-update/hgdp.gatk_formatted.vcf.gz.tbi",
+
+  "joined_raw_calls_vcf": "gs://gatk-sv-hgdp/mw-sv-concordance-update/hgdp.join_raw_calls.vcf.gz",
+  "joined_raw_calls_vcf_index": "gs://gatk-sv-hgdp/mw-sv-concordance-update/hgdp.join_raw_calls.vcf.gz.tbi",
+  "concordance_vcf": "gs://gatk-sv-hgdp/mw-sv-concordance-update/hgdp.concordance.vcf.gz",
+  "concordance_vcf_index": "gs://gatk-sv-hgdp/mw-sv-concordance-update/hgdp.concordance.vcf.gz.tbi",
+
+  "pacbio_sample_concordance_vcf": "gs://gatk-sv-hgdp/mw-sv-concordance-update/training/hgdp.concordance.subset.vcf.gz",
+  "pacbio_sample_concordance_vcf_index": "gs://gatk-sv-hgdp/mw-sv-concordance-update/training/hgdp.concordance.subset.vcf.gz.tbi",
+  "recalibrate_gq_truth_json": "gs://gatk-sv-hgdp/mw-sv-concordance-update/training/hgdp.gq_training_labels.json",
+
+  "hgdp_recalibrate_gq_model_file": "gs://gatk-sv-hgdp/mw-sv-concordance-update/hgdp.gq_recalibrator.model",
+  "hgdp_recalibrated_vcf": "gs://gatk-sv-hgdp/mw-sv-concordance-update/hgdp.concordance.hgdp_gq_recalibrated.vcf.gz",
+  "hgdp_recalibrated_vcf_index": "gs://gatk-sv-hgdp/mw-sv-concordance-update/hgdp.concordance.hgdp_gq_recalibrated.vcf.gz.tbi",
+  "hgdp_sl_filtered_vcf": "gs://gatk-sv-hgdp/mw-sv-concordance-update/hgdp.hgdp_gq_recalibrated.filtered.vcf.gz",
+  "hgdp_sl_filtered_vcf_index": "gs://gatk-sv-hgdp/mw-sv-concordance-update/hgdp.hgdp_gq_recalibrated.filtered.vcf.gz.tbi",
+
+  "aou_recalibrate_gq_model_file": "gs://broad-dsde-methods-markw/gq-filter/aou.gq_recalibrator.v2.model",
+  "aou_recalibrated_vcf": "gs://gatk-sv-hgdp/mw-sv-concordance-update/hgdp.concordance.aou_gq_recalibrated.vcf.gz",
+  "aou_recalibrated_vcf_index": "gs://gatk-sv-hgdp/mw-sv-concordance-update/hgdp.concordance.aou_gq_recalibrated.vcf.gz.tbi",
+  "aou_sl_filtered_vcf": "gs://gatk-sv-hgdp/mw-sv-concordance-update/hgdp.aou_gq_recalibrated.filtered.vcf.gz",
+  "aou_sl_filtered_vcf_index": "gs://gatk-sv-hgdp/mw-sv-concordance-update/hgdp.aou_gq_recalibrated.filtered.vcf.gz.tbi",
+
+  "example_pacbio_sample_id": "HG00512",
+  "example_pacbio_cram": "gs://fc-7891e5cf-0a7a-4c2f-8a18-0d05b27c53ab/GRCh38/CompressLongreadsBam/HG00512/HG00512.cram",
+  "example_pacbio_cram_index": "gs://fc-7891e5cf-0a7a-4c2f-8a18-0d05b27c53ab/GRCh38/CompressLongreadsBam/HG00512/HG00512.cram.crai",
+
+  "pacbio_samples_list": "gs://gatk-sv-hgdp/mw-sv-concordance-update/hgdp.pacbio_samples.list",
+  "pacbio_samples": [
+    "HG00512",
+    "HG00513",
+    "HG00514",
+    "HG00731",
+    "HG00732",
+    "HG00733",
+    "NA12878",
+    "NA19238",
+    "NA19239",
+    "NA19240",
+    "NA24385"
+  ],
+  "vapor_files": [
+    "gs://gatk-sv-hgdp/mw-sv-concordance-update/vapor/HG00512.bed.gz",
+    "gs://gatk-sv-hgdp/mw-sv-concordance-update/vapor/HG00513.bed.gz",
+    "gs://gatk-sv-hgdp/mw-sv-concordance-update/vapor/HG00514.bed.gz",
+    "gs://gatk-sv-hgdp/mw-sv-concordance-update/vapor/HG00731.bed.gz",
+    "gs://gatk-sv-hgdp/mw-sv-concordance-update/vapor/HG00732.bed.gz",
+    "gs://gatk-sv-hgdp/mw-sv-concordance-update/vapor/HG00733.bed.gz",
+    "gs://gatk-sv-hgdp/mw-sv-concordance-update/vapor/NA12878.bed.gz",
+    "gs://gatk-sv-hgdp/mw-sv-concordance-update/vapor/NA19238.bed.gz",
+    "gs://gatk-sv-hgdp/mw-sv-concordance-update/vapor/NA19239.bed.gz",
+    "gs://gatk-sv-hgdp/mw-sv-concordance-update/vapor/NA19240.bed.gz",
+    "gs://gatk-sv-hgdp/mw-sv-concordance-update/vapor/NA24385.bed.gz"
+  ],
+  "pacbio_crams": [
+    "gs://fc-7891e5cf-0a7a-4c2f-8a18-0d05b27c53ab/GRCh38/CompressLongreadsBam/HG00512/HG00512.cram",
+    "gs://fc-7891e5cf-0a7a-4c2f-8a18-0d05b27c53ab/GRCh38/CompressLongreadsBam/HG00513/HG00513.cram",
+    "gs://fc-7891e5cf-0a7a-4c2f-8a18-0d05b27c53ab/GRCh38/CompressLongreadsBam/HG00514/HG00514.cram",
+    "gs://fc-7891e5cf-0a7a-4c2f-8a18-0d05b27c53ab/GRCh38/CompressLongreadsBam/HG00731/HG00731.cram",
+    "gs://fc-7891e5cf-0a7a-4c2f-8a18-0d05b27c53ab/GRCh38/CompressLongreadsBam/HG00732/HG00732.cram",
+    "gs://fc-7891e5cf-0a7a-4c2f-8a18-0d05b27c53ab/GRCh38/CompressLongreadsBam/HG00733/HG00733.cram",
+    "gs://fc-7891e5cf-0a7a-4c2f-8a18-0d05b27c53ab/GRCh38/CompressLongreadsBam/NA12878/NA12878.cram",
+    "gs://fc-7891e5cf-0a7a-4c2f-8a18-0d05b27c53ab/GRCh38/CompressLongreadsBam/NA19238/NA19238.cram",
+    "gs://fc-7891e5cf-0a7a-4c2f-8a18-0d05b27c53ab/GRCh38/CompressLongreadsBam/NA19239/NA19239.cram",
+    "gs://fc-7891e5cf-0a7a-4c2f-8a18-0d05b27c53ab/GRCh38/CompressLongreadsBam/NA19240/NA19240.cram",
+    "gs://fc-7891e5cf-0a7a-4c2f-8a18-0d05b27c53ab/GRCh38/CompressLongreadsBam/NA24385/NA24385.cram"
+  ],
+  "pacbio_cram_indexes": [
+    "gs://fc-7891e5cf-0a7a-4c2f-8a18-0d05b27c53ab/GRCh38/CompressLongreadsBam/HG00512/HG00512.cram.crai",
+    "gs://fc-7891e5cf-0a7a-4c2f-8a18-0d05b27c53ab/GRCh38/CompressLongreadsBam/HG00513/HG00513.cram.crai",
+    "gs://fc-7891e5cf-0a7a-4c2f-8a18-0d05b27c53ab/GRCh38/CompressLongreadsBam/HG00514/HG00514.cram.crai",
+    "gs://fc-7891e5cf-0a7a-4c2f-8a18-0d05b27c53ab/GRCh38/CompressLongreadsBam/HG00731/HG00731.cram.crai",
+    "gs://fc-7891e5cf-0a7a-4c2f-8a18-0d05b27c53ab/GRCh38/CompressLongreadsBam/HG00732/HG00732.cram.crai",
+    "gs://fc-7891e5cf-0a7a-4c2f-8a18-0d05b27c53ab/GRCh38/CompressLongreadsBam/HG00733/HG00733.cram.crai",
+    "gs://fc-7891e5cf-0a7a-4c2f-8a18-0d05b27c53ab/GRCh38/CompressLongreadsBam/NA12878/NA12878.cram.crai",
+    "gs://fc-7891e5cf-0a7a-4c2f-8a18-0d05b27c53ab/GRCh38/CompressLongreadsBam/NA19238/NA19238.cram.crai",
+    "gs://fc-7891e5cf-0a7a-4c2f-8a18-0d05b27c53ab/GRCh38/CompressLongreadsBam/NA19239/NA19239.cram.crai",
+    "gs://fc-7891e5cf-0a7a-4c2f-8a18-0d05b27c53ab/GRCh38/CompressLongreadsBam/NA19240/NA19240.cram.crai",
+    "gs://fc-7891e5cf-0a7a-4c2f-8a18-0d05b27c53ab/GRCh38/CompressLongreadsBam/NA24385/NA24385.cram.crai"
+  ],
+  "pacbio_pav_vcfs": [
+    "gs://fc-7891e5cf-0a7a-4c2f-8a18-0d05b27c53ab/GRCh38/PAV/HG00512/pav_HG00512.vcf.gz",
+    "gs://fc-7891e5cf-0a7a-4c2f-8a18-0d05b27c53ab/GRCh38/PAV/HG00513/pav_HG00513.vcf.gz",
+    "gs://fc-7891e5cf-0a7a-4c2f-8a18-0d05b27c53ab/GRCh38/PAV/HG00514/pav_HG00514.vcf.gz",
+    "gs://fc-7891e5cf-0a7a-4c2f-8a18-0d05b27c53ab/GRCh38/PAV/HG00731/pav_HG00731.vcf.gz",
+    "gs://fc-7891e5cf-0a7a-4c2f-8a18-0d05b27c53ab/GRCh38/PAV/HG00732/pav_HG00732.vcf.gz",
+    "gs://fc-7891e5cf-0a7a-4c2f-8a18-0d05b27c53ab/GRCh38/PAV/HG00733/pav_HG00733.vcf.gz",
+    "gs://fc-7891e5cf-0a7a-4c2f-8a18-0d05b27c53ab/GRCh38/PAV/NA12878/pav_NA12878.vcf.gz",
+    "gs://fc-7891e5cf-0a7a-4c2f-8a18-0d05b27c53ab/GRCh38/PAV/NA19238/pav_NA19238.vcf.gz",
+    "gs://fc-7891e5cf-0a7a-4c2f-8a18-0d05b27c53ab/GRCh38/PAV/NA19239/pav_NA19239.vcf.gz",
+    "gs://fc-7891e5cf-0a7a-4c2f-8a18-0d05b27c53ab/GRCh38/PAV/NA19240/pav_NA19240.vcf.gz",
+    "gs://fc-7891e5cf-0a7a-4c2f-8a18-0d05b27c53ab/GRCh38/PAV/NA24385/pav_NA24385.vcf.gz"
+  ],
+  "pacbio_pbsv_vcfs": [
+    "gs://fc-7891e5cf-0a7a-4c2f-8a18-0d05b27c53ab/GRCh38/PBCCSWholeGenome/HG00512/variants/sv/HG00512.pbsv.vcf.gz",
+    "gs://fc-7891e5cf-0a7a-4c2f-8a18-0d05b27c53ab/GRCh38/PBCCSWholeGenome/HG00513/variants/sv/HG00513.pbsv.vcf.gz",
+    "gs://fc-7891e5cf-0a7a-4c2f-8a18-0d05b27c53ab/GRCh38/PBCCSWholeGenome/HG00514/variants/sv/HG00514.pbsv.vcf.gz",
+    "gs://fc-7891e5cf-0a7a-4c2f-8a18-0d05b27c53ab/GRCh38/PBCCSWholeGenome/HG00731/variants/sv/HG00731.pbsv.vcf.gz",
+    "gs://fc-7891e5cf-0a7a-4c2f-8a18-0d05b27c53ab/GRCh38/PBCCSWholeGenome/HG00732/variants/sv/HG00732.pbsv.vcf.gz",
+    "gs://fc-7891e5cf-0a7a-4c2f-8a18-0d05b27c53ab/GRCh38/PBCCSWholeGenome/HG00733/variants/sv/HG00733.pbsv.vcf.gz",
+    "gs://fc-7891e5cf-0a7a-4c2f-8a18-0d05b27c53ab/GRCh38/PBCCSWholeGenome/NA12878/variants/sv/NA12878.pbsv.vcf.gz",
+    "gs://fc-7891e5cf-0a7a-4c2f-8a18-0d05b27c53ab/GRCh38/PBCCSWholeGenome/NA19238/variants/sv/NA19238.pbsv.vcf.gz",
+    "gs://fc-7891e5cf-0a7a-4c2f-8a18-0d05b27c53ab/GRCh38/PBCCSWholeGenome/NA19239/variants/sv/NA19239.pbsv.vcf.gz",
+    "gs://fc-7891e5cf-0a7a-4c2f-8a18-0d05b27c53ab/GRCh38/PBCCSWholeGenome/NA19240/variants/sv/NA19240.pbsv.vcf.gz",
+    "gs://fc-7891e5cf-0a7a-4c2f-8a18-0d05b27c53ab/GRCh38/PBCCSWholeGenome/NA24385/variants/sv/NA24385.pbsv.vcf.gz"
+  ],
+  "pacbio_sniffles_vcfs": [
+    "gs://fc-7891e5cf-0a7a-4c2f-8a18-0d05b27c53ab/GRCh38/PBCCSWholeGenome/HG00512/variants/sv/HG00512.sniffles.vcf.gz",
+    "gs://fc-7891e5cf-0a7a-4c2f-8a18-0d05b27c53ab/GRCh38/PBCCSWholeGenome/HG00513/variants/sv/HG00513.sniffles.vcf.gz",
+    "gs://fc-7891e5cf-0a7a-4c2f-8a18-0d05b27c53ab/GRCh38/PBCCSWholeGenome/HG00514/variants/sv/HG00514.sniffles.vcf.gz",
+    "gs://fc-7891e5cf-0a7a-4c2f-8a18-0d05b27c53ab/GRCh38/PBCCSWholeGenome/HG00731/variants/sv/HG00731.sniffles.vcf.gz",
+    "gs://fc-7891e5cf-0a7a-4c2f-8a18-0d05b27c53ab/GRCh38/PBCCSWholeGenome/HG00732/variants/sv/HG00732.sniffles.vcf.gz",
+    "gs://fc-7891e5cf-0a7a-4c2f-8a18-0d05b27c53ab/GRCh38/PBCCSWholeGenome/HG00733/variants/sv/HG00733.sniffles.vcf.gz",
+    "gs://fc-7891e5cf-0a7a-4c2f-8a18-0d05b27c53ab/GRCh38/PBCCSWholeGenome/NA12878/variants/sv/NA12878.sniffles.vcf.gz",
+    "gs://fc-7891e5cf-0a7a-4c2f-8a18-0d05b27c53ab/GRCh38/PBCCSWholeGenome/NA19238/variants/sv/NA19238.sniffles.vcf.gz",
+    "gs://fc-7891e5cf-0a7a-4c2f-8a18-0d05b27c53ab/GRCh38/PBCCSWholeGenome/NA19239/variants/sv/NA19239.sniffles.vcf.gz",
+    "gs://fc-7891e5cf-0a7a-4c2f-8a18-0d05b27c53ab/GRCh38/PBCCSWholeGenome/NA19240/variants/sv/NA19240.sniffles.vcf.gz",
+    "gs://fc-7891e5cf-0a7a-4c2f-8a18-0d05b27c53ab/GRCh38/PBCCSWholeGenome/NA24385/variants/sv/NA24385.sniffles.vcf.gz"
+  ]
+}
diff --git a/inputs/values/ref_panel_1kg.json b/inputs/values/ref_panel_1kg.json
index 66bf811c2..6c319480a 100644
--- a/inputs/values/ref_panel_1kg.json
+++ b/inputs/values/ref_panel_1kg.json
@@ -1168,6 +1168,7 @@
     ],
     "clean_vcf": "gs://gatk-sv-ref-panel-1kg/outputs/MakeCohortVcf/8a209488-c928-449d-92cd-0a5131e92b7c/call-CleanVcf/CleanVcf/277f3f25-bb99-4fe4-a48b-567fd3f344f9/call-ConcatCleanedVcfs/ref_panel_1kg.cleaned.vcf.gz",
     "clean_vcf_index": "gs://gatk-sv-ref-panel-1kg/outputs/MakeCohortVcf/8a209488-c928-449d-92cd-0a5131e92b7c/call-CleanVcf/CleanVcf/277f3f25-bb99-4fe4-a48b-567fd3f344f9/call-ConcatCleanedVcfs/ref_panel_1kg.cleaned.vcf.gz.tbi",
+    "clean_vcf_gatk_formatter_args": "",
     "cluster_background_fail_lists": [
         "gs://gatk-sv-ref-panel-1kg/outputs/mw-make-cohort-vcf-templates/CombineBatches/cluster-background-fail-lists/ref_panel_1kg.chr1.sr_background_fail.updated2.txt",
         "gs://gatk-sv-ref-panel-1kg/outputs/mw-make-cohort-vcf-templates/CombineBatches/cluster-background-fail-lists/ref_panel_1kg.chr2.sr_background_fail.updated2.txt",
@@ -1605,6 +1606,8 @@
     "filtered_pesr_vcf": "gs://gatk-sv-ref-panel-1kg/outputs/GATKSVPipelineBatch/38c65ca4-2a07-4805-86b6-214696075fef/call-GATKSVPipelinePhase1/GATKSVPipelinePhase1/acce2c71-7458-4205-ae13-624f6efc9956/call-FilterBatch/FilterBatch/184defa3-e61c-4757-9962-f685f6d0d204/call-FilterBatchSamples/FilterBatchSamples/b308c32e-d171-4d8d-aeaf-b561c55b06b4/call-MergePesrVcfs/cacheCopy/ref_panel_1kg.filtered_pesr_merged.vcf.gz",
     "final_sample_list": "gs://gatk-sv-ref-panel-1kg/outputs/GATKSVPipelineBatch/38c65ca4-2a07-4805-86b6-214696075fef/call-GATKSVPipelinePhase1/GATKSVPipelinePhase1/acce2c71-7458-4205-ae13-624f6efc9956/call-FilterBatch/FilterBatch/184defa3-e61c-4757-9962-f685f6d0d204/call-FilterBatchSamples/FilterBatchSamples/b308c32e-d171-4d8d-aeaf-b561c55b06b4/call-FilterSampleList/cacheCopy/ref_panel_1kg.outliers_excluded.samples.list",
     "final_sample_outlier_list": "gs://gatk-sv-ref-panel-1kg/outputs/GATKSVPipelineBatch/38c65ca4-2a07-4805-86b6-214696075fef/call-GATKSVPipelinePhase1/GATKSVPipelinePhase1/acce2c71-7458-4205-ae13-624f6efc9956/call-FilterBatch/FilterBatch/184defa3-e61c-4757-9962-f685f6d0d204/call-FilterBatchSamples/FilterBatchSamples/b308c32e-d171-4d8d-aeaf-b561c55b06b4/call-CatOutliers/cacheCopy/ref_panel_1kg.outliers.samples.list",
+    "gatk_formatted_vcf": "gs://gatk-sv-ref-panel-1kg/outputs/mw_sv_concordance_update/ref_panel_1kg.gatk_formatted.vcf.gz",
+    "gatk_formatted_vcf_index": "gs://gatk-sv-ref-panel-1kg/outputs/mw_sv_concordance_update/ref_panel_1kg.gatk_formatted.vcf.gz.tbi",
     "gcnv_model_tars": [
         "gs://gatk-sv-resources-public/hg38/v0/sv-resources/ref-panel/1KG/v2/gcnv/model_files/ref_panel_1kg_v2-gcnv-model-shard-0.tar.gz",
         "gs://gatk-sv-resources-public/hg38/v0/sv-resources/ref-panel/1KG/v2/gcnv/model_files/ref_panel_1kg_v2-gcnv-model-shard-1.tar.gz",
@@ -1899,7 +1902,8 @@
     "genotype_pesr_pesr_sepcutoff": "gs://gatk-sv-ref-panel-1kg/outputs/GATKSVPipelineBatch/38c65ca4-2a07-4805-86b6-214696075fef/call-GenotypeBatch/GenotypeBatch/ad17f522-0950-4f0a-9148-a13f689082ed/call-GenotypePESRPart1/GenotypePESRPart1/40ec6d76-dd1c-432d-bfab-bc4426d0b1ec/call-TrainRDGenotyping/TrainRDGenotyping/e5540a96-9072-4719-bcfb-afccdfec15c6/call-UpdateCutoff/cacheCopy/ref_panel_1kg.pesr.pesr_sepcutoff.txt",
     "genotyped_depth_vcf": "gs://gatk-sv-ref-panel-1kg/outputs/GATKSVPipelineBatch/38c65ca4-2a07-4805-86b6-214696075fef/call-GenotypeBatch/GenotypeBatch/ad17f522-0950-4f0a-9148-a13f689082ed/call-GenotypeDepthPart2/GenotypeDepthPart2/0aafd752-e606-4196-86ac-41c1c3ce1eb2/call-ConcatGenotypedVcfs/cacheCopy/ref_panel_1kg.depth.vcf.gz",
     "genotyped_pesr_vcf": "gs://gatk-sv-ref-panel-1kg/outputs/GATKSVPipelineBatch/38c65ca4-2a07-4805-86b6-214696075fef/call-GenotypeBatch/GenotypeBatch/ad17f522-0950-4f0a-9148-a13f689082ed/call-GenotypePESRPart2/GenotypePESRPart2/ce1f4075-1a3e-44b5-9cfe-bfb701327616/call-ConcatGenotypedVcfs/cacheCopy/ref_panel_1kg.pesr.vcf.gz",
-    "joined_raw_calls_vcf": "gs://gatk-sv-ref-panel-1kg/outputs/JoinRawCalls/a613865b-f7ec-4edb-8a2e-21508335249e/ref_panel_1kg.join_raw_calls.vcf.gz",
+    "joined_raw_calls_vcf": "gs://gatk-sv-ref-panel-1kg/outputs/mw_sv_concordance_update/ref_panel_1kg.join_raw_calls.vcf.gz",
+    "joined_raw_calls_vcf_index": "gs://gatk-sv-ref-panel-1kg/outputs/mw_sv_concordance_update/ref_panel_1kg.join_raw_calls.vcf.gz.tbi",
     "manta_vcfs": [
         "gs://broad-dsde-methods-markw/tws-no-cram-conversion/GatherSampleEvidenceBatch/HG00096.manta.vcf.gz",
         "gs://broad-dsde-methods-markw/tws-no-cram-conversion/GatherSampleEvidenceBatch/HG00129.manta.vcf.gz",
diff --git a/inputs/values/resources_hg38.json b/inputs/values/resources_hg38.json
index 3ee4cf632..8713900c5 100644
--- a/inputs/values/resources_hg38.json
+++ b/inputs/values/resources_hg38.json
@@ -26,6 +26,7 @@
   "preprocessed_intervals" : "gs://gatk-sv-resources-public/hg38/v0/sv-resources/resources/v1/preprocessed_intervals.interval_list",
   "primary_contigs_fai" : "gs://gcp-public-data--broad-references/hg38/v0/sv-resources/resources/v1/contig.fai",
   "primary_contigs_list" : "gs://gcp-public-data--broad-references/hg38/v0/sv-resources/resources/v1/primary_contigs.list",
+  "contigs_header": "gs://gatk-sv-resources-public/hg38/v0/sv-resources/resources/v1/hg38_contigs_header.vcf",
   "protein_coding_gtf" : "gs://gatk-sv-resources-public/hg38/v0/sv-resources/resources/v1/MANE.GRCh38.v0.95.select_ensembl_genomic.gtf",
   "reference_build" : "hg38",
   "reference_dict" : "gs://gcp-public-data--broad-references/hg38/v0/Homo_sapiens_assembly38.dict",
diff --git a/scripts/inputs/build_default_inputs.sh b/scripts/inputs/build_default_inputs.sh
index b8ca3e7d5..6397de18a 100755
--- a/scripts/inputs/build_default_inputs.sh
+++ b/scripts/inputs/build_default_inputs.sh
@@ -51,6 +51,10 @@ echo "########## Building ref_panel_1kg cohort Terra workspace ##########"
 scripts/inputs/build_inputs.py ${BASE_DIR}/inputs/values ${BASE_DIR}/inputs/templates/terra_workspaces/cohort_mode ${BASE_DIR}/inputs/build/ref_panel_1kg/terra \
   -a '{ "test_batch" : "ref_panel_1kg", "cloud_env" : "'$CLOUD_ENV'" }'
 
+echo "########## Building hgdp test ##########"
+scripts/inputs/build_inputs.py ${BASE_DIR}/inputs/values ${BASE_DIR}/inputs/templates/test ${BASE_DIR}/inputs/build/hgdp/test \
+  -a '{ "test_batch" : "hgdp", "cloud_env" : "'$CLOUD_ENV'" }'
+
 # Note CLOUD_ENV is not currently required for the single-sample workflow
 echo "########## Building NA19240 single-sample test ##########"
 scripts/inputs/build_inputs.py ${BASE_DIR}/inputs/values ${BASE_DIR}/inputs/templates/test/GATKSVPipelineSingleSample ${BASE_DIR}/inputs/build/NA19240/test \
diff --git a/scripts/inputs/build_inputs.py b/scripts/inputs/build_inputs.py
index 0dc9e1f06..e097f4d46 100755
--- a/scripts/inputs/build_inputs.py
+++ b/scripts/inputs/build_inputs.py
@@ -42,7 +42,7 @@
 #
 # Will cause the "test_batch_small" input value set to be aliased to the "test_batch" resource bundle.
 #
-# If a template refers to missing property from a resource bundle, it will be skipped, with a warning message listing which
+# If a template refers to missing property from a resource bundle, it will be skipped, with an info message listing which
 # properties are missing. This feature can be used purposefully to generate different sets of input files from the same sets
 # of templates depending on which properties are present in the input value files. For example, the build_default_inputs.sh
 # script generates inputs three times from the test_input_templates directory, with the test_batch bundle aliased to the
@@ -96,7 +96,7 @@ def main():
     parser.add_argument('-a', '--aliases', type=json.loads,
                         default={}, help="Aliases for input value bundles")
     parser.add_argument('--log-info', action='store_true',
-                        help="Show INFO-level logging messages")
+                        help="Show INFO-level logging messages. Use for troubleshooting.")
     args = parser.parse_args()
 
     # Set logger
@@ -212,8 +212,8 @@ def process_file(input_dict, template_subdir, template_file, target_subdir):
         # Transpose the TSV data in processed_content
         processed_content = transpose_tsv(processed_content)
     if len(undefined_names) > 0:
-        logging.warning("skipping file " + template_file_path +
-                        " due to missing values " + str(undefined_names))
+        logging.info("skipping file " + template_file_path +
+                     " due to missing values " + str(undefined_names))
     else:
         os.makedirs(target_subdir, exist_ok=True)
         target_file = open(target_file_path, "w")
diff --git a/scripts/test/validate.sh b/scripts/test/validate.sh
index 89c8888ae..b40467c7f 100755
--- a/scripts/test/validate.sh
+++ b/scripts/test/validate.sh
@@ -63,7 +63,7 @@ COUNTER=0
 for wdl in "${WDLS[@]}"
 do
   name=$(basename $wdl .wdl)
-  JSONS=(`find inputs/build/ref_panel_1kg/test -name "${name}.*json"` `find inputs/build/NA12878/test -name "${name}.*json"`)
+  JSONS=(`find inputs/build/ref_panel_1kg/test -name "${name}.*json"` `find inputs/build/hgdp/test -name "${name}.*json"` `find inputs/build/NA12878/test -name "${name}.*json"`)
   for json in "${JSONS[@]}"
   do
     cmd="java -jar ${WOMTOOL_JAR} validate ${wdl} -i ${json}"
diff --git a/src/sv-pipeline/scripts/format_gatk_vcf_for_svtk.py b/src/sv-pipeline/scripts/format_gatk_vcf_for_svtk.py
index ddae71da0..64e42b18e 100644
--- a/src/sv-pipeline/scripts/format_gatk_vcf_for_svtk.py
+++ b/src/sv-pipeline/scripts/format_gatk_vcf_for_svtk.py
@@ -5,6 +5,18 @@
 import sys
 from typing import Optional, List, Text, Set
 
+_gt_sum_map = dict()
+
+
+def _cache_gt_sum(gt):
+    if gt is None:
+        return 0
+    s = _gt_sum_map.get(gt, None)
+    if s is None:
+        s = sum([1 for a in gt if a is not None and a > 0])
+        _gt_sum_map[gt] = s
+    return s
+
 
 def create_header(header_in: pysam.VariantHeader,
                   source: Text,
@@ -91,25 +103,19 @@ def convert(record: pysam.VariantRecord,
             new_record.info[key] = record.info[key]
     # fix END, CHR2, SVLEN, STRANDS
     if svtype == 'INS':
-        new_record.info['CHR2'] = contig
-        if 'SVLEN' not in record.info:
-            new_record.info['SVLEN'] = -1
+        new_record.info['SVLEN'] = record.info.get('SVLEN', -1)
         new_record.info['STRANDS'] = '+-'
-    elif svtype == 'BND':
+    elif svtype == 'BND' or svtype == 'CTX':
         new_record.stop = record.info['END2']
         new_record.info['SVLEN'] = -1
+    elif svtype == 'CPX':
+        new_record.info['SVLEN'] = record.info.get('SVLEN', -1)
     elif svtype == 'DEL':
-        new_record.info['CHR2'] = contig
-        new_record.info['SVLEN'] = record.stop - record.start
         new_record.info['STRANDS'] = '+-'
     elif svtype == 'DUP':
-        new_record.info['CHR2'] = contig
-        new_record.info['SVLEN'] = record.stop - record.start
         new_record.info['STRANDS'] = '-+'
     elif svtype == 'INV':
-        new_record.info['CHR2'] = contig
-        new_record.info['SVLEN'] = record.stop - record.start
-        new_record.info['STRANDS'] = record.info['STRANDS']
+        new_record.info['STRANDS'] = record.info.get('STRANDS', None)
 
     for sample in record.samples:
         new_genotype = new_record.samples[sample]
@@ -119,17 +125,10 @@ def convert(record: pysam.VariantRecord,
             if key not in remove_formats:
                 new_genotype[key] = genotype[key]
         # fix GT, always assuming diploid
-        if svtype == 'DUP':
-            if genotype['ECN'] < genotype['CN']:
-                new_genotype['GT'] = (0, 1)
-            else:
-                new_genotype['GT'] = (0, 0)
+        if _cache_gt_sum(genotype.get('GT', None)) > 0:
+            new_genotype['GT'] = (0, 1)
         else:
-            called_gt = [g for g in genotype['GT'] if g is not None] if 'GT' in genotype else []
-            if sum(called_gt) > 0:
-                new_genotype['GT'] = (0, 1)
-            else:
-                new_genotype['GT'] = (0, 0)
+            new_genotype['GT'] = (0, 0)
     return new_record
 
 
@@ -148,7 +147,7 @@ def __parse_arg_list(arg: Text) -> List[Text]:
 def __parse_arguments(argv: List[Text]) -> argparse.Namespace:
     # noinspection PyTypeChecker
     parser = argparse.ArgumentParser(
-        description="Convert a GATK-style SV VCF to SVTK-style",
+        description="Convert a GATK-style SV VCF from ClusterBatch for consumption by GenerateBatchMetrics.",
         formatter_class=argparse.ArgumentDefaultsHelpFormatter
     )
     parser.add_argument("--vcf", type=str, required=True,
diff --git a/src/sv-pipeline/scripts/format_svtk_vcf_for_gatk.py b/src/sv-pipeline/scripts/format_svtk_vcf_for_gatk.py
index e2d32fbca..6bc550b37 100644
--- a/src/sv-pipeline/scripts/format_svtk_vcf_for_gatk.py
+++ b/src/sv-pipeline/scripts/format_svtk_vcf_for_gatk.py
@@ -4,9 +4,10 @@
 import pysam
 import sys
 import gzip
-from typing import Any, List, Text, Set, Dict, Optional
+from math import floor
+from typing import Any, List, Text, Dict, Optional
 
-_gt_sum_map = dict()
+GQ_FIELDS = ["GQ", "PE_GQ", "SR_GQ", "RD_GQ"]
 
 
 def _parse_bnd_ends(vcf_path: Text) -> Dict[Text, int]:
@@ -32,7 +33,7 @@ def _parse_bnd_ends(vcf_path: Text) -> Dict[Text, int]:
             columns = line.split('\t', 8)
             vid = columns[2]
             info = columns[7]
-            if 'SVTYPE=BND' not in info and 'SVTYPE=CTX' not in info and 'SVTYPE=CPX' not in info:
+            if 'SVTYPE=BND' not in info:
                 continue
             info_tokens = info.split(';')
             end_field_list = [x for x in info_tokens if x.startswith("END=")]
@@ -69,234 +70,103 @@ def _parse_ploidy_table(path: Text) -> Dict[Text, Dict[Text, int]]:
     return ploidy_dict
 
 
-def create_header(header_in: pysam.VariantHeader,
-                  replace_ev_format: bool,
-                  remove_infos: Set[Text],
-                  remove_formats: Set[Text]) -> pysam.VariantHeader:
+def update_header(header: pysam.VariantHeader) -> None:
     """
     Ingests the given header, removes specified fields, and adds necessary fields.
 
     Parameters
     ----------
-    header_in: pysam.VariantHeader
-        input header
-    remove_infos: Set[Text]
-        set of info fields to remove
-    remove_formats: Set[Text]
-        set of format fields to remove
-
-    Returns
-    -------
     header: pysam.VariantHeader
-        gatk-style header
+        input header
     """
-    header = pysam.VariantHeader()
-    for sample in header_in.samples:
-        header.add_sample(sample)
-    for line in header_in.records:
-        # remove fields
-        if len(line.attrs) > 0 and 'ID' in line.keys() and (line['ID'] in remove_infos or line['ID'] in remove_formats):
-            continue
-        line_str = str(line)
-        # remove source line
-        if line_str.startswith('##source='):
-            continue
-        header.add_line(line_str)
-    # new fields
-    header.add_line('##INFO=<ID=END2,Number=1,Type=Integer,Description="Second breakend position">')
-    header.add_line('##INFO=<ID=OSVTYPE,Number=1,Type=String,Description="Original SVTYPE">')
-    header.add_line('##FORMAT=<ID=CN,Number=1,Type=Integer,Description="Copy number">')
     header.add_line('##FORMAT=<ID=ECN,Number=1,Type=Integer,Description="Expected copy number for ref genotype">')
-    if replace_ev_format:
-        header.add_line('##FORMAT=<ID=EV,Number=.,Type=String,Description="Classes of evidence supporting final '
-                        'genotype">')
-    return header
+    # Add these just in case (no effect if they exist)
+    header.add_line('##INFO=<ID=END2,Number=1,Type=Integer,Description="Second position">')
+    header.add_line('##INFO=<ID=CHR2,Number=1,Type=String,Description="Second contig">')
+
+
+def rescale_gq(record):
+    for sample in record.samples:
+        for gq_field in GQ_FIELDS:
+            if gq_field in record.samples[sample] and record.samples[sample][gq_field] is not None:
+                record.samples[sample][gq_field] = floor(record.samples[sample][gq_field] / 10)
 
 
 def convert(record: pysam.VariantRecord,
-            vcf_out: pysam.VariantFile,
-            remove_infos: Set[Text],
-            remove_formats: Set[Text],
             bnd_end_dict: Optional[Dict[Text, int]],
-            ploidy_dict: Dict[Text, Dict[Text, int]]) -> pysam.VariantRecord:
+            ploidy_dict: Dict[Text, Dict[Text, int]],
+            scale_down_gq: bool) -> pysam.VariantRecord:
     """
-    Converts a record from svtk to gatk style. This includes updating all GT fields with proper ploidy, and adding
-    necessary fields such as ECN and CN.
+    Converts a record from svtk to gatk style. This includes updating END/END2 and adding
+    necessary fields such as ECN.
 
     Parameters
     ----------
     record: pysam.VariantRecord
         svtk-style record
-    vcf_out: pysam.VariantFile
-        new vcf, to which the converted record will be written
-    remove_infos: Set[Text]
-        info fields to remove
-    remove_formats: Set[Text]
-        format fields to remove
     bnd_end_dict: Optional[Dict[Text, int]]
         map from BND variant ID to END coordinate
     ploidy_dict: Dict[Text, Dict[Text, int]]
         map from sample to contig to ploidy
+    scale_down_gq: bool
+        scale GQs to 0-99 range
 
     Returns
     -------
     header: pysam.VariantRecord
         gatk-style record
     """
+
+    def is_null(val):
+        return val is None or val == "."
+
     svtype = record.info['SVTYPE']
-    # Force symbolic BND alleles
-    if svtype == 'BND':
-        alleles = (record.alleles[0], '<BND>')
-    else:
-        alleles = record.alleles
     contig = record.contig
-    new_record = vcf_out.new_record(id=record.id, contig=contig, start=record.start, stop=record.stop, alleles=alleles,
-                                    info={key: value for key, value in record.info.items() if key not in remove_infos})
     # Version of htsjdk currently in gatk only supports these base alleles
-    if new_record.ref in ['A', 'C', 'G', 'T', 'a', 'c', 'g', 't', 'N', 'n']:
-        new_record.ref = new_record.ref
+    if record.ref in ['A', 'C', 'G', 'T', 'a', 'c', 'g', 't', 'N', 'n']:
+        record.ref = record.ref
     else:
-        new_record.ref = 'N'
-    # fix SVLEN, STRANDS, CHR2, and END2 where needed
-    if svtype == 'INS':
-        new_record.info['SVLEN'] = record.info['SVLEN']
-    elif svtype == 'BND' or svtype == 'CTX':
-        if svtype == 'CTX':
-            svtype = 'BND'
-            new_record.info['OSVTYPE'] = record.info['SVTYPE']
-            new_record.info['SVTYPE'] = svtype
-            new_record.info['STRANDS'] = record.info.get('STRANDS', '++')
-        else:
-            new_record.info['STRANDS'] = record.info['STRANDS']
-        new_record.info['CHR2'] = record.info['CHR2']
-        new_record.info['END2'] = bnd_end_dict[record.id] if bnd_end_dict is not None else record.info.get('END2', record.pos + record.info['SVLEN'])
-        new_record.stop = record.start + 1
-    elif svtype == 'INV':
-        new_record.info['STRANDS'] = record.info.get('STRANDS', '++')
-    elif svtype == 'CTX':
-        new_record.info['STRANDS'] = record.info.get('STRANDS', '++')
-        new_record.info['OSVTYPE'] = record.info['SVTYPE']
-        new_record.info['SVTYPE'] = 'BND'
-    elif svtype == 'CPX':
-        svtype = 'INV'
-        new_record.info['STRANDS'] = record.info.get('STRANDS', '++')
-        new_record.info['OSVTYPE'] = record.info['SVTYPE']
-        new_record.info['SVTYPE'] = svtype
-    # copy FORMAT fields
-    for sample in record.samples:
-        genotype = record.samples[sample]
-        new_genotype = new_record.samples[sample]
-        for key in genotype.keys():
-            if key not in remove_formats:
-                new_genotype[key] = genotype[key]
-        new_genotype['ECN'] = ploidy_dict[sample][contig]
-        if new_genotype['ECN'] == 0:
-            new_genotype['GT'] = ()
-            new_genotype['CN'] = 0
-        elif new_genotype['ECN'] == 1:
-            if svtype == 'DUP':
-                new_genotype['CN'] = 1 + _cache_gt_sum(genotype['GT'])
-                new_genotype['GT'] = (None,)
-            elif _cache_gt_sum(genotype['GT']) == 0:
-                new_genotype['GT'] = (0,)
-                if svtype == 'DEL':
-                    new_genotype['CN'] = 1
+        record.ref = 'N'
+    if svtype == 'BND' or svtype == 'CTX':
+        record.info['END2'] = bnd_end_dict[record.id] if bnd_end_dict is not None \
+            else record.info.get('END2', record.stop)
+    # Fix this weird edge case (may be from CPX review workflow)
+    if svtype == 'INV' and '<CPX>' in record.alleles[1]:
+        svtype = 'CPX'
+        record.info['SVTYPE'] = svtype
+    is_ddup = svtype == 'CPX' and 'dDUP' in record.info.get('CPX_TYPE', '')
+    if svtype == 'BND' or svtype == 'INS' or svtype == 'CTX' or is_ddup:
+        record.stop = record.start + 1
+        if is_ddup:
+            # e.g. SOURCE=DUP_chrX:49151588-49151850
+            source = record.info.get('SOURCE', None)
+            if source is not None:
+                tokens = source.split(':')
+                chr2 = tokens[0].split('_')[-1]
+                end2 = int(tokens[-1].split('-')[0])
+                record.info['CHR2'] = chr2
+                record.info['END2'] = end2
             else:
-                new_genotype['GT'] = (1,)
-                if svtype == 'DEL':
-                    new_genotype['CN'] = 0
-        else:
-            gt_sum = _cache_gt_sum(genotype['GT'])
-            if svtype == 'DUP':
-                new_genotype['CN'] = 2 + gt_sum
-                new_genotype['GT'] = (None, None)
-            elif gt_sum == 0:
-                new_genotype['GT'] = (0, 0)
-                if svtype == 'DEL':
-                    new_genotype['CN'] = 2
-            elif gt_sum == 1:
-                new_genotype['GT'] = (0, 1)
-                if svtype == 'DEL':
-                    new_genotype['CN'] = 1
-            else:
-                new_genotype['GT'] = (1, 1)
-                if svtype == 'DEL':
-                    new_genotype['CN'] = 0
-        if svtype == 'CNV':
-            new_genotype['GT'] = (None,) * new_genotype['ECN']
-    return new_record
-
-
-def _cache_gt_sum(gt):
-    s = _gt_sum_map.get(gt, None)
-    if s is None:
-        s = sum([1 for a in gt if a is not None and a > 0])
-        _gt_sum_map[gt] = s
-    return s
-
-
-def add_cn_ecn(record: pysam.VariantRecord,
-               vcf_out: pysam.VariantFile,
-               ploidy_dict: Dict[Text, Dict[Text, int]]) -> pysam.VariantRecord:
-    """"
-    Only modifies records by adding CN and ECN INFO fields, e.g. for 'fixed' VCFs that just need
-    this metadata for certain GATK tools such as SVCluster and SVConcordance
-
-    Parameters
-    ----------
-    record: pysam.VariantRecord
-        input record
-    vcf_out: pysam.VariantFile
-        new vcf, to which the converted record will be written
-    ploidy_dict: Dict[Text, Dict[Text, int]]
-        map from sample to contig to ploidy
-
-    Returns
-    -------
-    header: pysam.VariantRecord
-        record with CN and ECN fields added"""
-    svtype = record.info['SVTYPE']
-    contig = record.contig
-    new_record = vcf_out.new_record(id=record.id, contig=contig, start=record.start, stop=record.stop,
-                                    alleles=record.alleles, info=record.info)
-
+                # Sometimes SOURCE is not set (may be from CPX review workflow)
+                record.info['CHR2'] = record.chrom
+                record.info['END2'] = record.stop
+    # Delete empty INFO fields (GATK does not like "." for non-String types)
+    keys = record.info.keys()
+    for k in keys:
+        val = record.info[k]
+        if is_null(val) or (isinstance(val, tuple) and len(val) == 1 and is_null(val[0])):
+            del record.info[k]
     # copy FORMAT fields
-    for sample in record.samples:
-        genotype = record.samples[sample]
-        new_genotype = new_record.samples[sample]
-        for key in genotype.keys():
-            new_genotype[key] = genotype[key]
-        ecn = ploidy_dict[sample][contig]
-        new_genotype['ECN'] = ecn
-        if svtype == 'DEL':
-            new_genotype['CN'] = max(0, ecn - _cache_gt_sum(genotype['GT']))
-        elif svtype == 'DUP':
-            new_genotype['CN'] = ecn + _cache_gt_sum(genotype['GT'])
-        elif svtype == 'CNV':
-            # Disambiguates non-existent and empty (i.e. ".") CN
-            cn = genotype.get('CN', None)
-            if cn is None:
-                cn = ecn
-            new_genotype['CN'] = cn
-    return new_record
-
-
-def filter_unsupported_type(record: pysam.VariantRecord) -> bool:
-    svtype = record.info['SVTYPE']
-    return svtype == 'CPX' or svtype == 'CTX'
-
-
-def _parse_arg_list(arg: Text) -> List[Text]:
-    if arg is None:
-        return set()
-    else:
-        return arg.split(',')
+    for sample, genotype in record.samples.items():
+        genotype['ECN'] = ploidy_dict[sample][contig]
+    if scale_down_gq:
+        rescale_gq(record)
+    return record
 
 
 def _process(vcf_in: pysam.VariantFile,
              vcf_out: pysam.VariantFile,
-             arguments: Dict[Text, Any],
-             vcf_filter: Optional[pysam.VariantFile] = None) -> None:
+             arguments: Dict[Text, Any]) -> None:
     """"
     Master function for processing the given input vcf and writing output
 
@@ -308,50 +178,27 @@ def _process(vcf_in: pysam.VariantFile,
         output vcf
     arguments: Dict[Text, Any]
         commandline arguments
-    vcf_filter: Optional[pysam.VariantFile]
-        if provided, write filtered records to this vcf
 
     Returns
     -------
     header: pysam.VariantRecord
-        record with CN and ECN fields added"""
-    remove_formats = set(_parse_arg_list(arguments.remove_formats))
-    remove_infos = set(_parse_arg_list(arguments.remove_infos))
-    if not arguments.only_add_cn_fields and not arguments.use_end2:
+        record with ECN fields added"""
+    if arguments.fix_end:
         bnd_end_dict = _parse_bnd_ends(arguments.vcf)
     else:
         bnd_end_dict = None
     ploidy_dict = _parse_ploidy_table(arguments.ploidy_table)
 
-    # info fields we drop by default (unless needed for certain SV types)
-    default_remove_infos = set(["SVLEN", "STRANDS", "CHR2"])
-    if bnd_end_dict is not None:
-        default_remove_infos.add("END2")
-    remove_infos = remove_infos.union(default_remove_infos)
-
     for record in vcf_in:
-        if arguments.filter_unsupported_types and filter_unsupported_type(record):
-            if vcf_filter is not None:
-                vcf_filter.write(record)
-        else:
-            if arguments.only_add_cn_fields:
-                out = add_cn_ecn(record=record, vcf_out=vcf_out, ploidy_dict=ploidy_dict)
-            else:
-                out = convert(
-                    record=record,
-                    vcf_out=vcf_out,
-                    remove_infos=remove_infos,
-                    remove_formats=remove_formats,
-                    bnd_end_dict=bnd_end_dict,
-                    ploidy_dict=ploidy_dict
-                )
-            vcf_out.write(out)
+        out = convert(record=record, bnd_end_dict=bnd_end_dict,
+                      ploidy_dict=ploidy_dict, scale_down_gq=arguments.scale_down_gq)
+        vcf_out.write(out)
 
 
 def _parse_arguments(argv: List[Text]) -> argparse.Namespace:
     # noinspection PyTypeChecker
     parser = argparse.ArgumentParser(
-        description="Convert a GATK-style SV VCF to SVTK-style",
+        description="Convert a SVTK-style SV VCF to GATK-style",
         formatter_class=argparse.ArgumentDefaultsHelpFormatter
     )
     parser.add_argument("--vcf", type=str, required=True,
@@ -363,20 +210,10 @@ def _parse_arguments(argv: List[Text]) -> argparse.Namespace:
                              "first column is SAMPLE, and the remaining columns are contig names. For each row "
                              "thereafter, the first column is the sample name, and remaining columns are the contig "
                              "ploidy values for that sample.")
-    parser.add_argument("--only-add-cn-fields", action='store_true',
-                        help="Only add CN and ECN info fields. All other corrections are skipped.")
-    parser.add_argument("--use-end2", action='store_true',
-                        help="Use existing END2 fields rather than getting them from END")
-    parser.add_argument("--filter-unsupported-types", action='store_true',
-                        help="Filter CPX and CTX types, which are not currently supported by GATK")
-    parser.add_argument("--filter-out", type=str,
-                        help="Write any filtered variants to the specified VCF")
-    parser.add_argument("--replace-ev-format", action='store_true',
-                        help="Adds EV FORMAT field with unbounded Number to header")
-    parser.add_argument("--remove-formats", type=str,
-                        help="Comma-delimited list of FORMAT fields to remove")
-    parser.add_argument("--remove-infos", type=str,
-                        help="Comma-delimited list of INFO fields to remove")
+    parser.add_argument("--fix-end", action='store_true',
+                        help="Fix END tags and assign END2 to END")
+    parser.add_argument("--scale-down-gq", action='store_true',
+                        help="Scales all GQs down from [0-999] to [0-99]")
     if len(argv) <= 1:
         parser.parse_args(["--help"])
         sys.exit(0)
@@ -388,23 +225,14 @@ def main(argv: Optional[List[Text]] = None):
     if argv is None:
         argv = sys.argv
     arguments = _parse_arguments(argv)
-    remove_formats = set(_parse_arg_list(arguments.remove_formats))
-    remove_infos = set(_parse_arg_list(arguments.remove_infos))
 
     # convert vcf header and records
     with pysam.VariantFile(arguments.vcf) as vcf_in:
-        header = create_header(
-            header_in=vcf_in.header,
-            replace_ev_format=arguments.replace_ev_format,
-            remove_infos=remove_infos,
-            remove_formats=remove_formats
+        update_header(
+            header=vcf_in.header
         )
-        with pysam.VariantFile(arguments.out, mode='w', header=header) as vcf_out:
-            vcf_filter = pysam.VariantFile(arguments.filter_out, mode='w', header=vcf_in.header) if \
-                arguments.filter_out is not None else None
-            _process(vcf_in, vcf_out, arguments, vcf_filter=vcf_filter)
-            if vcf_filter is not None:
-                vcf_filter.close()
+        with pysam.VariantFile(arguments.out, mode='w', header=vcf_in.header) as vcf_out:
+            _process(vcf_in, vcf_out, arguments)
 
 
 if __name__ == "__main__":
diff --git a/wdl/CleanVcf.wdl b/wdl/CleanVcf.wdl
index 257580fef..0694bc9a6 100644
--- a/wdl/CleanVcf.wdl
+++ b/wdl/CleanVcf.wdl
@@ -1,6 +1,7 @@
 version 1.0
 
 import "CleanVcfChromosome.wdl" as CleanVcfChromosome
+import "TasksClusterBatch.wdl" as TasksCluster
 import "TasksMakeCohortVcf.wdl" as MiniTasks
 import "HailMerge.wdl" as HailMerge
 import "MakeCohortVcfMetrics.wdl" as metrics
@@ -12,7 +13,7 @@ workflow CleanVcf {
     Array[File] complex_genotype_vcfs
     Array[File] complex_resolve_bothside_pass_lists
     Array[File] complex_resolve_background_fail_lists
-    File merged_ped_file
+    File ped_file
 
     File contig_list
     File allosome_fai
@@ -55,7 +56,7 @@ workflow CleanVcf {
     RuntimeAttr? runtime_override_hail_merge_clean_final
     RuntimeAttr? runtime_override_fix_header_clean_final
     RuntimeAttr? runtime_override_concat_cleaned_vcfs
-    RuntimeAttr? runtime_override_fix_bad_ends
+    RuntimeAttr? runtime_attr_create_ploidy
 
     # overrides for CleanVcfContig
     RuntimeAttr? runtime_override_clean_vcf_1a
@@ -68,6 +69,7 @@ workflow CleanVcf {
     RuntimeAttr? runtime_override_clean_vcf_5_polish
     RuntimeAttr? runtime_override_stitch_fragmented_cnvs
     RuntimeAttr? runtime_override_final_cleanup
+    RuntimeAttr? runtime_attr_format
 
     # Clean vcf 1b
     RuntimeAttr? runtime_attr_override_subset_large_cnvs_1b
@@ -98,6 +100,18 @@ workflow CleanVcf {
     RuntimeAttr? runtime_override_sort_drop_redundant_cnvs
   }
 
+  call TasksCluster.CreatePloidyTableFromPed {
+    input:
+      ped_file=ped_file,
+      contig_list=contig_list,
+      retain_female_chr_y=false,
+      chr_x=chr_x,
+      chr_y=chr_y,
+      output_prefix="~{cohort_name}.ploidy",
+      sv_pipeline_docker=sv_pipeline_docker,
+      runtime_attr_override=runtime_attr_create_ploidy
+  }
+
   #Scatter per chromosome
   Array[String] contigs = transpose(read_tsv(contig_list))[0]
   scatter ( i in range(length(contigs)) ) {
@@ -108,7 +122,7 @@ workflow CleanVcf {
         vcf=complex_genotype_vcfs[i],
         contig=contig,
         background_list=complex_resolve_background_fail_lists[i],
-        ped_file=merged_ped_file,
+        ped_file=ped_file,
         bothsides_pass_list=complex_resolve_bothside_pass_lists[i],
         allosome_fai=allosome_fai,
         prefix="~{cohort_name}.~{contig}",
@@ -121,6 +135,7 @@ workflow CleanVcf {
         gcs_project=gcs_project,
         clean_vcf1b_records_per_shard=clean_vcf1b_records_per_shard,
         clean_vcf5_records_per_shard=clean_vcf5_records_per_shard,
+        ploidy_table=CreatePloidyTableFromPed.out,
         chr_x=chr_x,
         chr_y=chr_y,
         linux_docker=linux_docker,
@@ -159,7 +174,7 @@ workflow CleanVcf {
         runtime_attr_override_filter_vcf_1b=runtime_attr_override_filter_vcf_1b,
         runtime_override_concat_vcfs_1b=runtime_override_concat_vcfs_1b,
         runtime_override_cat_multi_cnvs_1b=runtime_override_cat_multi_cnvs_1b,
-        runtime_override_fix_bad_ends=runtime_override_fix_bad_ends
+        runtime_attr_format=runtime_attr_format,
     }
   }
 
diff --git a/wdl/CleanVcfChromosome.wdl b/wdl/CleanVcfChromosome.wdl
index 298ca5fd0..a3bbfcd37 100644
--- a/wdl/CleanVcfChromosome.wdl
+++ b/wdl/CleanVcfChromosome.wdl
@@ -2,6 +2,7 @@ version 1.0
 
 import "Structs.wdl"
 import "TasksMakeCohortVcf.wdl" as MiniTasks
+import "FormatVcfForGatk.wdl" as fvcf
 import "CleanVcf1b.wdl" as c1b
 import "CleanVcf5.wdl" as c5
 import "HailMerge.wdl" as HailMerge
@@ -24,9 +25,12 @@ workflow CleanVcfChromosome {
     File? outlier_samples_list
     Int? max_samples_per_shard_step3
 
+    File ploidy_table
     String chr_x
     String chr_y
 
+    File? svtk_to_gatk_script  # For debugging
+
     Boolean use_hail
     String? gcs_project
 
@@ -76,7 +80,7 @@ workflow CleanVcfChromosome {
     RuntimeAttr? runtime_override_drop_redundant_cnvs
     RuntimeAttr? runtime_override_combine_step_1_vcfs
     RuntimeAttr? runtime_override_sort_drop_redundant_cnvs
-    RuntimeAttr? runtime_override_fix_bad_ends
+    RuntimeAttr? runtime_attr_format
 
   }
 
@@ -293,20 +297,22 @@ workflow CleanVcfChromosome {
       prefix="~{prefix}.final_cleanup",
       sv_pipeline_docker=sv_pipeline_docker,
       runtime_attr_override=runtime_override_final_cleanup
-
   }
 
-  call MiniTasks.FixEndsRescaleGQ {
+  call fvcf.FormatVcf {
     input:
-      vcf = FinalCleanup.final_cleaned_shard,
-      prefix = prefix + ".cleaned",
-      sv_pipeline_docker = sv_pipeline_docker,
-      runtime_attr_override = runtime_override_fix_bad_ends
+      vcf=FinalCleanup.final_cleaned_shard,
+      ploidy_table=ploidy_table,
+      args="--fix-end --scale-down-gq",
+      output_prefix="~{prefix}.final_format",
+      script=svtk_to_gatk_script,
+      sv_pipeline_docker=sv_pipeline_docker,
+      runtime_attr_override=runtime_attr_format
   }
   
   output {
-    File out=FixEndsRescaleGQ.out
-    File out_idx=FixEndsRescaleGQ.out_idx
+    File out = FormatVcf.out
+    File out_idx = FormatVcf.out_index
   }
 }
 
diff --git a/wdl/ClusterBatch.wdl b/wdl/ClusterBatch.wdl
index f340c4880..f1ce387c9 100644
--- a/wdl/ClusterBatch.wdl
+++ b/wdl/ClusterBatch.wdl
@@ -97,6 +97,7 @@ workflow ClusterBatch {
       ped_file=ped_file,
       script=ploidy_table_script,
       contig_list=contig_list,
+      retain_female_chr_y=true,
       chr_x=chr_x,
       chr_y=chr_y,
       output_prefix="~{batch}.ploidy",
diff --git a/wdl/FormatVcfForGatk.wdl b/wdl/FormatVcfForGatk.wdl
new file mode 100644
index 000000000..e6461b841
--- /dev/null
+++ b/wdl/FormatVcfForGatk.wdl
@@ -0,0 +1,145 @@
+version 1.0
+
+import "Structs.wdl"
+import "TasksClusterBatch.wdl" as tasks_cluster
+import "TasksMakeCohortVcf.wdl" as tasks
+
+workflow FormatVcfForGatk {
+  input {
+    File vcf
+    String prefix
+    File ped_file
+    Int records_per_shard = 40000
+
+    File contig_list
+    File? contigs_header  # Replaces vcf contig dictionary if provided
+    String? formatter_args
+
+    String? chr_x
+    String? chr_y
+
+    File? svtk_to_gatk_script  # For debugging
+
+    String sv_base_mini_docker
+    String sv_pipeline_docker
+
+    RuntimeAttr? runtime_attr_create_ploidy
+    RuntimeAttr? runtime_attr_scatter
+    RuntimeAttr? runtime_attr_format
+    RuntimeAttr? runtime_override_concat
+    RuntimeAttr? runtime_override_preconcat_step1
+    RuntimeAttr? runtime_override_hail_merge_step1
+    RuntimeAttr? runtime_override_fix_header_step1
+  }
+
+  call tasks_cluster.CreatePloidyTableFromPed {
+    input:
+      ped_file=ped_file,
+      contig_list=contig_list,
+      retain_female_chr_y=false,
+      chr_x=chr_x,
+      chr_y=chr_y,
+      output_prefix="~{prefix}.ploidy",
+      sv_pipeline_docker=sv_pipeline_docker,
+      runtime_attr_override=runtime_attr_create_ploidy
+  }
+
+  call tasks.ScatterVcf {
+    input:
+      vcf=vcf,
+      records_per_shard = records_per_shard,
+      prefix = "~{prefix}.scatter_vcf",
+      sv_pipeline_docker=sv_pipeline_docker,
+      runtime_attr_override=runtime_attr_scatter
+  }
+
+  scatter ( i in range(length(ScatterVcf.shards)) ) {
+    call FormatVcf {
+      input:
+        vcf=ScatterVcf.shards[i],
+        ploidy_table=CreatePloidyTableFromPed.out,
+        args=formatter_args,
+        output_prefix="~{prefix}.format.shard_~{i}",
+        contigs_header=contigs_header,
+        script=svtk_to_gatk_script,
+        sv_pipeline_docker=sv_pipeline_docker,
+        runtime_attr_override=runtime_attr_format
+    }
+  }
+
+  Boolean shards_unsorted = defined(contigs_header)
+  call tasks.ConcatVcfs {
+    input:
+      vcfs=FormatVcf.out,
+      vcfs_idx=FormatVcf.out_index,
+      naive=!shards_unsorted,
+      allow_overlaps=shards_unsorted,
+      outfile_prefix="~{prefix}.gatk_formatted",
+      sv_base_mini_docker=sv_base_mini_docker,
+      runtime_attr_override=runtime_override_concat
+  }
+
+  output {
+    File gatk_formatted_vcf = ConcatVcfs.concat_vcf
+    File gatk_formatted_vcf_index = ConcatVcfs.concat_vcf_idx
+  }
+}
+
+task FormatVcf {
+  input {
+    File vcf
+    File ploidy_table
+    File? script
+    String? args
+    File? contigs_header  # Overwrites contig dictionary, in case they are out of order
+    String output_prefix
+    String sv_pipeline_docker
+    RuntimeAttr? runtime_attr_override
+  }
+
+  RuntimeAttr default_attr = object {
+                               cpu_cores: 1,
+                               mem_gb: 3.75,
+                               disk_gb: ceil(50 + size(vcf, "GB") * 3),
+                               boot_disk_gb: 10,
+                               preemptible_tries: 3,
+                               max_retries: 1
+                             }
+  RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr])
+
+  output {
+    File out = "~{output_prefix}.vcf.gz"
+    File out_index = "~{output_prefix}.vcf.gz.tbi"
+  }
+  command <<<
+    set -euo pipefail
+
+    # Convert format
+    python ~{default="/opt/sv-pipeline/scripts/format_svtk_vcf_for_gatk.py" script} \
+      --vcf ~{vcf} \
+      --out tmp.vcf.gz \
+      --ploidy-table ~{ploidy_table} \
+      ~{args}
+
+    if ~{defined(contigs_header)}; then
+      bcftools view --no-version -h tmp.vcf.gz > original_header.vcf
+      grep -v "^##contig=" original_header.vcf | grep -v "^#CHROM" > header.vcf
+      cat ~{contigs_header} >> header.vcf
+      grep "^#CHROM" original_header.vcf >> header.vcf
+      bcftools reheader -h header.vcf tmp.vcf.gz | bcftools sort -Oz -o ~{output_prefix}.vcf.gz
+    else
+      mv tmp.vcf.gz ~{output_prefix}.vcf.gz
+    fi
+
+    tabix ~{output_prefix}.vcf.gz
+  >>>
+  runtime {
+    cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores])
+    memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB"
+    disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD"
+    bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb])
+    docker: sv_pipeline_docker
+    preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries])
+    maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries])
+  }
+}
\ No newline at end of file
diff --git a/wdl/GATKSVPipelineSingleSample.wdl b/wdl/GATKSVPipelineSingleSample.wdl
index bf107c48c..464638389 100644
--- a/wdl/GATKSVPipelineSingleSample.wdl
+++ b/wdl/GATKSVPipelineSingleSample.wdl
@@ -487,7 +487,7 @@ workflow GATKSVPipelineSingleSample {
     RuntimeAttr? runtime_override_preconcat_clean_final
     RuntimeAttr? runtime_override_hail_merge_clean_final
     RuntimeAttr? runtime_override_fix_header_clean_final
-    RuntimeAttr? runtime_override_fix_bad_ends
+    RuntimeAttr? runtime_attr_format_clean
 
     RuntimeAttr? runtime_override_clean_vcf_1a
     RuntimeAttr? runtime_override_clean_vcf_2
@@ -1296,7 +1296,7 @@ workflow GATKSVPipelineSingleSample {
       runtime_override_benchmark_samples=runtime_override_benchmark_samples,
       runtime_override_split_shuffled_list=runtime_override_split_shuffled_list,
       runtime_override_merge_and_tar_shard_benchmarks=runtime_override_merge_and_tar_shard_benchmarks,
-      runtime_override_fix_bad_ends=runtime_override_fix_bad_ends
+      runtime_attr_format_clean=runtime_attr_format_clean
 
   }
 
diff --git a/wdl/GenotypeComplexVariants.wdl b/wdl/GenotypeComplexVariants.wdl
index 22e89c68a..73e28415d 100644
--- a/wdl/GenotypeComplexVariants.wdl
+++ b/wdl/GenotypeComplexVariants.wdl
@@ -8,7 +8,7 @@ workflow GenotypeComplexVariants {
   input {
     String cohort_name
     Array[String] batches
-    File merged_ped_file
+    File ped_file
     Array[File] depth_vcfs
 
     Boolean merge_vcfs = false
@@ -65,7 +65,7 @@ workflow GenotypeComplexVariants {
     }
     call util.SubsetPedFile {
       input:
-        ped_file = merged_ped_file,
+        ped_file = ped_file,
         sample_list = GetSampleIdsFromVcf.out_file,
         subset_name = batches[i],
         sv_base_mini_docker = sv_base_mini_docker,
@@ -87,7 +87,7 @@ workflow GenotypeComplexVariants {
         batches=batches,
         coverage_files=bincov_files,
         rd_depth_sep_cutoff_files=depth_gt_rd_sep_files,
-        merged_ped_file=merged_ped_file,
+        ped_file=ped_file,
         median_coverage_files=median_coverage_files,
         n_per_split_small=2500,
         n_per_split_large=250,
diff --git a/wdl/GenotypeCpxCnvs.wdl b/wdl/GenotypeCpxCnvs.wdl
index ab9f69e9a..7a4c2331d 100644
--- a/wdl/GenotypeCpxCnvs.wdl
+++ b/wdl/GenotypeCpxCnvs.wdl
@@ -20,7 +20,7 @@ workflow GenotypeCpxCnvs {
     Int n_per_split_large
     Int n_rd_test_bins
     String prefix
-    File merged_ped_file
+    File ped_file
     String contig
     File ref_dict
 
@@ -96,7 +96,7 @@ workflow GenotypeCpxCnvs {
       intervals=GetCpxCnvIntervals.cpx_cnv_bed,
       genotypes=MergeMeltedGts.outfile,
       prefix=contig_prefix,
-      ped_file=merged_ped_file,
+      ped_file=ped_file,
       contig=contig,
       sv_pipeline_docker=sv_pipeline_docker,
       runtime_attr_override=runtime_override_parse_genotypes
diff --git a/wdl/JoinRawCalls.wdl b/wdl/JoinRawCalls.wdl
index aae4f50e3..3bc70a766 100644
--- a/wdl/JoinRawCalls.wdl
+++ b/wdl/JoinRawCalls.wdl
@@ -1,25 +1,30 @@
 version 1.0
 
 import "Structs.wdl"
+import "FormatVcfForGatk.wdl" as format
 import "TasksClusterBatch.wdl" as tasks_cluster
 import "TasksMakeCohortVcf.wdl" as tasks_cohort
-import "SVConcordance.wdl" as svc
 
 # Clusters raw call VCFs across batches - to be used for preparing raw calls for SV concordance analysis
 
 workflow JoinRawCalls {
   input {
 
-    String cohort
+    String prefix
 
     # ClusterBatch outputs
     Array[File]? clustered_manta_vcfs
+    Array[File]? clustered_manta_vcf_indexes
     Array[File]? clustered_melt_vcfs
+    Array[File]? clustered_melt_vcf_indexes
     Array[File]? clustered_scramble_vcfs
+    Array[File]? clustered_scramble_vcf_indexes
     Array[File]? clustered_wham_vcfs
+    Array[File]? clustered_wham_vcf_indexes
     Array[File]? clustered_depth_vcfs
+    Array[File]? clustered_depth_vcf_indexes
 
-    File ploidy_table
+    File ped_file
 
     String? preprocess_args
 
@@ -28,37 +33,66 @@ workflow JoinRawCalls {
     File reference_fasta_fai
     File reference_dict
 
+    String? chr_x
+    String? chr_y
+
     String gatk_docker
     String sv_base_mini_docker
     String sv_pipeline_docker
 
     Float? java_mem_fraction
 
+    RuntimeAttr? runtime_attr_create_ploidy
+    RuntimeAttr? runtime_override_concat_input_vcfs
     RuntimeAttr? runtime_attr_prepare_truth
     RuntimeAttr? runtime_attr_svcluster
     RuntimeAttr? runtime_override_concat_vcfs_pesr
   }
 
-  Array[File] vcfs_ = flatten(select_all([clustered_manta_vcfs, clustered_melt_vcfs, clustered_scramble_vcfs, clustered_wham_vcfs, clustered_depth_vcfs]))
-  scatter (i in range(length(vcfs_))) {
-    call svc.PreprocessVcf {
+  call tasks_cluster.CreatePloidyTableFromPed {
+    input:
+      ped_file=ped_file,
+      contig_list=contig_list,
+      retain_female_chr_y=false,
+      chr_x=chr_x,
+      chr_y=chr_y,
+      output_prefix="~{prefix}.ploidy",
+      sv_pipeline_docker=sv_pipeline_docker,
+      runtime_attr_override=runtime_attr_create_ploidy
+  }
+
+  Array[Array[File]] vcf_matrix = transpose(select_all([clustered_manta_vcfs, clustered_melt_vcfs, clustered_scramble_vcfs, clustered_wham_vcfs, clustered_depth_vcfs]))
+  Array[Array[File]] vcf_index_matrix = transpose(select_all([clustered_manta_vcf_indexes, clustered_melt_vcf_indexes, clustered_scramble_vcf_indexes, clustered_wham_vcf_indexes, clustered_depth_vcf_indexes]))
+  scatter (i in range(length(vcf_matrix))) {
+    call tasks_cohort.ConcatVcfs as ConcatInputVcfs {
+      input:
+        vcfs=vcf_matrix[i],
+        vcfs_idx=vcf_index_matrix[i],
+        allow_overlaps=true,
+        outfile_prefix="~{prefix}.join_raw_calls.concat_batch_~{i}",
+        sv_base_mini_docker=sv_base_mini_docker,
+        runtime_attr_override=runtime_override_concat_input_vcfs
+    }
+  }
+
+  scatter (i in range(length(ConcatInputVcfs.concat_vcf))) {
+    call format.FormatVcfForGatk {
       input:
-        vcf=vcfs_[i],
-        ploidy_table=ploidy_table,
-        args=preprocess_args,
-        output_prefix="~{cohort}.join_raw_calls.preprocess_~{i}",
+        vcf=ConcatInputVcfs.concat_vcf[i],
+        ped_file=ped_file,
+        contig_list=contig_list,
+        prefix="~{prefix}.join_raw_calls.format_~{i}",
         sv_pipeline_docker=sv_pipeline_docker,
-        runtime_attr_override=runtime_attr_prepare_truth
+        sv_base_mini_docker=sv_base_mini_docker
     }
   }
 
-  Array[String] contigs = transpose(read_tsv(contig_list))[0]
-  scatter (contig in contigs) {
+  scatter (contig in read_lines(contig_list)) {
     call tasks_cluster.SVCluster {
       input:
-        vcfs=PreprocessVcf.out,
-        ploidy_table=ploidy_table,
-        output_prefix="~{cohort}.join_raw_calls.~{contig}",
+        vcfs=FormatVcfForGatk.gatk_formatted_vcf,
+        ploidy_table=CreatePloidyTableFromPed.out,
+        output_prefix="~{prefix}.join_raw_calls.~{contig}",
         contig=contig,
         fast_mode=true,
         algorithm="SINGLE_LINKAGE",
@@ -69,7 +103,7 @@ workflow JoinRawCalls {
         reference_fasta_fai=reference_fasta_fai,
         reference_dict=reference_dict,
         java_mem_fraction=java_mem_fraction,
-        variant_prefix="~{cohort}_~{contig}_",
+        variant_prefix="~{prefix}_~{contig}_",
         gatk_docker=gatk_docker,
         runtime_attr_override=runtime_attr_svcluster
     }
@@ -80,7 +114,7 @@ workflow JoinRawCalls {
       vcfs=SVCluster.out,
       vcfs_idx=SVCluster.out_index,
       naive=true,
-      outfile_prefix="~{cohort}.join_raw_calls",
+      outfile_prefix="~{prefix}.join_raw_calls",
       sv_base_mini_docker=sv_base_mini_docker,
       runtime_attr_override=runtime_override_concat_vcfs_pesr
   }
diff --git a/wdl/MakeCohortVcf.wdl b/wdl/MakeCohortVcf.wdl
index 7a8fd10b2..a3b40f8e2 100644
--- a/wdl/MakeCohortVcf.wdl
+++ b/wdl/MakeCohortVcf.wdl
@@ -170,7 +170,6 @@ workflow MakeCohortVcf {
     RuntimeAttr? runtime_override_preconcat_clean_final
     RuntimeAttr? runtime_override_hail_merge_clean_final
     RuntimeAttr? runtime_override_fix_header_clean_final
-    RuntimeAttr? runtime_override_fix_bad_ends
 
     RuntimeAttr? runtime_override_clean_vcf_1a
     RuntimeAttr? runtime_override_clean_vcf_2
@@ -182,6 +181,7 @@ workflow MakeCohortVcf {
     RuntimeAttr? runtime_override_clean_vcf_5_polish
     RuntimeAttr? runtime_override_stitch_fragmented_cnvs
     RuntimeAttr? runtime_override_final_cleanup
+    RuntimeAttr? runtime_attr_format_clean
 
     RuntimeAttr? runtime_attr_override_subset_large_cnvs_1b
     RuntimeAttr? runtime_attr_override_sort_bed_1b
@@ -340,7 +340,7 @@ workflow MakeCohortVcf {
       complex_resolve_vcfs=ResolveComplexVariants.complex_resolve_vcfs,
       complex_resolve_vcf_indexes=ResolveComplexVariants.complex_resolve_vcf_indexes,
       depth_vcfs=depth_vcfs,
-      merged_ped_file=ped_file,
+      ped_file=ped_file,
       bincov_files=bincov_files,
       depth_gt_rd_sep_files=depth_gt_rd_sep_files,
       median_coverage_files=median_coverage_files,
@@ -375,7 +375,7 @@ workflow MakeCohortVcf {
       complex_genotype_vcfs=GenotypeComplexVariants.complex_genotype_vcfs,
       complex_resolve_bothside_pass_lists=ResolveComplexVariants.complex_resolve_bothside_pass_lists,
       complex_resolve_background_fail_lists=ResolveComplexVariants.complex_resolve_background_fail_lists,
-      merged_ped_file=ped_file,
+      ped_file=ped_file,
       contig_list=contig_list,
       allosome_fai=allosome_fai,
       chr_x=chr_x,
@@ -441,7 +441,7 @@ workflow MakeCohortVcf {
       runtime_override_drop_redundant_cnvs=runtime_override_drop_redundant_cnvs,
       runtime_override_combine_step_1_vcfs=runtime_override_combine_step_1_vcfs,
       runtime_override_sort_drop_redundant_cnvs=runtime_override_sort_drop_redundant_cnvs,
-      runtime_override_fix_bad_ends=runtime_override_fix_bad_ends
+      runtime_attr_format=runtime_attr_format_clean
   }
 
   call VcfQc.MainVcfQc {
diff --git a/wdl/PESRClustering.wdl b/wdl/PESRClustering.wdl
index 1405e4106..617ac0b00 100644
--- a/wdl/PESRClustering.wdl
+++ b/wdl/PESRClustering.wdl
@@ -128,8 +128,6 @@ task PreparePESRVcfs {
     File ploidy_table
     Int min_size
     File? script
-    String? remove_infos
-    String? remove_formats
     String output_prefix
     String sv_pipeline_docker
     RuntimeAttr? runtime_attr_override
@@ -162,17 +160,16 @@ task PreparePESRVcfs {
         --vcf $VCF \
         --out tmp.vcf.gz \
         --ploidy-table ~{ploidy_table} \
-        ~{"--remove-infos " + remove_infos} \
-        ~{"--remove-formats " + remove_formats}
+        --fix-end
 
       # Interval, contig, and size filtering
       bcftools query -f '%CHROM\t%POS\t%POS\t%ID\t%SVTYPE\n%CHROM\t%END\t%END\t%ID\t%SVTYPE\n%CHR2\t%END2\t%END2\t%ID\t%SVTYPE\n' tmp.vcf.gz \
-        | awk '$1!="."' \
+        | awk '$1!="." && $2!="."' \
         | sort -k1,1V -k2,2n -k3,3n \
         > ends.bed
       bedtools intersect -sorted -u -wa -g genome.file -wa -a ends.bed -b ~{exclude_intervals} | cut -f4 | sort | uniq \
         > excluded_vids.list
-      bcftools view -i 'ID!=@excluded_vids.list && (INFO/SVLEN="." || INFO/SVLEN>=~{min_size})' tmp.vcf.gz \
+      bcftools view -i 'ID!=@excluded_vids.list && (INFO/SVLEN="." || INFO/SVLEN=-1 || INFO/SVLEN>=~{min_size})' tmp.vcf.gz \
         -Oz -o out/$SAMPLE_NUM.$NAME.vcf.gz
       tabix out/$SAMPLE_NUM.$NAME.vcf.gz
       i=$((i+1))
diff --git a/wdl/SVConcordance.wdl b/wdl/SVConcordance.wdl
index 3e144cc19..cd50b40f0 100644
--- a/wdl/SVConcordance.wdl
+++ b/wdl/SVConcordance.wdl
@@ -5,99 +5,32 @@ import "TasksMakeCohortVcf.wdl" as tasks_cohort
 
 workflow SVConcordance {
   input {
+    # Vcfs must be formatted using FormatVcfForGatk (if unsure, check for ECN FORMAT field)
     File eval_vcf
     File truth_vcf
-
-    File ploidy_table
-    String cohort
-
-    Boolean? run_svutils_truth_vcf
-    Boolean? run_formatter_truth_vcf
-    String? formatter_truth_args
-
-    Boolean? run_svutils_eval_vcf
-    Boolean? run_formatter_eval_vcf
-    String? formatter_eval_args
-
-    # For testing
-    File? svtk_to_gatk_script
+    String output_prefix
 
     File contig_list
     File reference_dict
 
     String gatk_docker
     String sv_base_mini_docker
-    String sv_pipeline_docker
-    String sv_utils_docker
 
     Float? java_mem_fraction
 
-    RuntimeAttr? runtime_attr_svutils_truth
-    RuntimeAttr? runtime_attr_format_truth
-    RuntimeAttr? runtime_attr_svutils_eval
     RuntimeAttr? runtime_attr_format_eval
+    RuntimeAttr? runtime_attr_format_truth
     RuntimeAttr? runtime_attr_sv_concordance
     RuntimeAttr? runtime_attr_postprocess
     RuntimeAttr? runtime_override_concat_shards
   }
 
-  Boolean run_svutils_truth_vcf_ = select_first([run_svutils_truth_vcf, true])
-  Boolean run_formatter_truth_vcf_ = select_first([run_formatter_truth_vcf, true])
-
-  Boolean run_svutils_eval_vcf_ = select_first([run_svutils_eval_vcf, true])
-  Boolean run_formatter_eval_vcf_ = select_first([run_formatter_eval_vcf, true])
-
-  if (run_svutils_truth_vcf_) {
-    call SvutilsFixVcf as SvutilsTruth {
-      input:
-        vcf=truth_vcf,
-        output_prefix="~{cohort}.svutils_truth",
-        sv_utils_docker=sv_utils_docker,
-        runtime_attr_override=runtime_attr_svutils_truth
-    }
-  }
-  if (run_formatter_truth_vcf_) {
-    call PreprocessVcf as FormatTruth {
-      input:
-        vcf=select_first([SvutilsTruth.out, truth_vcf]),
-        ploidy_table=ploidy_table,
-        args=formatter_truth_args,
-        output_prefix="~{cohort}.format_truth",
-        script=svtk_to_gatk_script,
-        sv_pipeline_docker=sv_pipeline_docker,
-        runtime_attr_override=runtime_attr_format_truth
-    }
-  }
-
-  if (run_svutils_eval_vcf_) {
-    call SvutilsFixVcf as SvutilsEval {
-      input:
-        vcf=eval_vcf,
-        output_prefix="~{cohort}.svutils_eval",
-        sv_utils_docker=sv_utils_docker,
-        runtime_attr_override=runtime_attr_svutils_eval
-    }
-  }
-  if (run_formatter_eval_vcf_) {
-    call PreprocessVcf as FormatEval {
-      input:
-        vcf=select_first([SvutilsEval.out, eval_vcf]),
-        ploidy_table=ploidy_table,
-        args=formatter_eval_args,
-        output_prefix="~{cohort}.format_eval",
-        script=svtk_to_gatk_script,
-        sv_pipeline_docker=sv_pipeline_docker,
-        runtime_attr_override=runtime_attr_format_eval
-    }
-  }
-
-  Array[String] contigs = transpose(read_tsv(contig_list))[0]
-  scatter (contig in contigs) {
+  scatter (contig in read_lines(contig_list)) {
     call SVConcordanceTask {
       input:
-        eval_vcf=select_first([FormatEval.out, SvutilsEval.out, eval_vcf]),
-        truth_vcf=select_first([FormatTruth.out, SvutilsTruth.out, truth_vcf]),
-        output_prefix="~{cohort}.concordance.~{contig}",
+        eval_vcf=eval_vcf,
+        truth_vcf=truth_vcf,
+        output_prefix="~{output_prefix}.concordance.~{contig}",
         contig=contig,
         reference_dict=reference_dict,
         java_mem_fraction=java_mem_fraction,
@@ -111,7 +44,7 @@ workflow SVConcordance {
       vcfs=SVConcordanceTask.out,
       vcfs_idx=SVConcordanceTask.out_index,
       naive=true,
-      outfile_prefix="~{cohort}.concordance",
+      outfile_prefix="~{output_prefix}.concordance",
       sv_base_mini_docker=sv_base_mini_docker,
       runtime_attr_override=runtime_override_concat_shards
   }
@@ -119,102 +52,6 @@ workflow SVConcordance {
   output {
     File concordance_vcf = ConcatVcfs.concat_vcf
     File concordance_vcf_index = ConcatVcfs.concat_vcf_idx
-    File? filtered_eval_records_vcf = FormatEval.filtered
-    File? filtered_eval_records_index =FormatEval.filtered_index
-    File? filtered_truth_records_vcf = FormatTruth.filtered
-    File? filtered_truth_records_index = FormatTruth.filtered_index
-  }
-}
-
-task SvutilsFixVcf {
-  input {
-    File vcf
-    String output_prefix
-    String sv_utils_docker
-    RuntimeAttr? runtime_attr_override
-  }
-
-  RuntimeAttr default_attr = object {
-                               cpu_cores: 1,
-                               mem_gb: 3.75,
-                               disk_gb: ceil(10 + size(vcf, "GB") * 2),
-                               boot_disk_gb: 10,
-                               preemptible_tries: 3,
-                               max_retries: 1
-                             }
-  RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr])
-
-  output {
-    File out = "~{output_prefix}.vcf.gz"
-    File out_index = "~{output_prefix}.vcf.gz.tbi"
-  }
-  command <<<
-    set -euo pipefail
-    sv-utils fix-vcf ~{vcf} ~{output_prefix}.vcf.gz
-  >>>
-  runtime {
-    cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores])
-    memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB"
-    disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD"
-    bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb])
-    docker: sv_utils_docker
-    preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries])
-    maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries])
-  }
-}
-
-task PreprocessVcf {
-  input {
-    File vcf
-    File ploidy_table
-    File? script
-    String? args
-    String output_prefix
-    String sv_pipeline_docker
-    RuntimeAttr? runtime_attr_override
-  }
-
-  RuntimeAttr default_attr = object {
-                               cpu_cores: 1,
-                               mem_gb: 3.75,
-                               disk_gb: ceil(10 + size(vcf, "GB") * 2),
-                               boot_disk_gb: 10,
-                               preemptible_tries: 3,
-                               max_retries: 1
-                             }
-  RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr])
-
-  output {
-    File out = "~{output_prefix}.vcf.gz"
-    File out_index = "~{output_prefix}.vcf.gz.tbi"
-    File filtered = "~{output_prefix}.filtered_records.vcf.gz"
-    File filtered_index = "~{output_prefix}.filtered_records.vcf.gz.tbi"
-  }
-  command <<<
-    set -euo pipefail
-
-    # Convert format
-    python ~{default="/opt/sv-pipeline/scripts/format_svtk_vcf_for_gatk.py" script} \
-      --vcf ~{vcf} \
-      --out tmp.vcf.gz \
-      --filter-out ~{output_prefix}.filtered_records.vcf.gz \
-      --ploidy-table ~{ploidy_table} \
-      ~{args}
-
-    # TODO Filter invalid records with SVLEN=0, only needed for legacy runs that used svtk cluster in ClusterBatch
-    bcftools view --no-version -i 'INFO/SVLEN="." || INFO/SVLEN>0' tmp.vcf.gz -Oz -o ~{output_prefix}.vcf.gz
-
-    tabix ~{output_prefix}.vcf.gz
-    tabix ~{output_prefix}.filtered_records.vcf.gz
-  >>>
-  runtime {
-    cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores])
-    memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB"
-    disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD"
-    bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb])
-    docker: sv_pipeline_docker
-    preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries])
-    maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries])
   }
 }
 
@@ -226,6 +63,7 @@ task SVConcordanceTask {
     File reference_dict
     String? contig
     String? additional_args
+
     Float? java_mem_fraction
     String gatk_docker
     RuntimeAttr? runtime_attr_override
@@ -242,7 +80,7 @@ task SVConcordanceTask {
 
   RuntimeAttr default_attr = object {
                                cpu_cores: 1,
-                               mem_gb: 3.75,
+                               mem_gb: 16,
                                disk_gb: ceil(10 + size(eval_vcf, "GB") * 2 + size(truth_vcf, "GB")),
                                boot_disk_gb: 10,
                                preemptible_tries: 3,
@@ -276,7 +114,6 @@ task SVConcordanceTask {
       --eval ~{eval_vcf} \
       --truth ~{truth_vcf} \
       -O ~{output_prefix}.vcf.gz \
-      --force-biallelic-dups \
       ~{additional_args}
   >>>
   runtime {
diff --git a/wdl/ScatterCpxGenotyping.wdl b/wdl/ScatterCpxGenotyping.wdl
index 694059784..2315f79ec 100644
--- a/wdl/ScatterCpxGenotyping.wdl
+++ b/wdl/ScatterCpxGenotyping.wdl
@@ -22,7 +22,7 @@ workflow ScatterCpxGenotyping {
     Int n_per_split_large
     Int n_rd_test_bins
     String prefix
-    File merged_ped_file
+    File ped_file
     String contig
     File ref_dict
 
@@ -82,7 +82,7 @@ workflow ScatterCpxGenotyping {
         n_per_split_small=n_per_split_small,
         n_rd_test_bins=n_rd_test_bins,
         prefix=prefix,
-        merged_ped_file=merged_ped_file,
+        ped_file=ped_file,
         contig=contig,
         ref_dict=ref_dict,
         linux_docker=linux_docker,
diff --git a/wdl/TasksClusterBatch.wdl b/wdl/TasksClusterBatch.wdl
index 3bf35e9de..cab695ca9 100644
--- a/wdl/TasksClusterBatch.wdl
+++ b/wdl/TasksClusterBatch.wdl
@@ -28,12 +28,15 @@ task SVCluster {
 
         Float? depth_sample_overlap
         Float? depth_interval_overlap
+        Float? depth_size_similarity
         Int? depth_breakend_window
         Float? mixed_sample_overlap
         Float? mixed_interval_overlap
+        Float? mixed_size_similarity
         Int? mixed_breakend_window
         Float? pesr_sample_overlap
         Float? pesr_interval_overlap
+        Float? pesr_size_similarity
         Int? pesr_breakend_window
 
         File reference_fasta
@@ -98,24 +101,27 @@ task SVCluster {
             --arguments_file arguments.txt \
             --output ~{output_prefix}.vcf.gz \
             --ploidy-table ~{ploidy_table} \
-            --variant-prefix ~{variant_prefix} \
             --reference ~{reference_fasta} \
             ~{"-L " + contig} \
             ~{true="--fast-mode" false="" fast_mode} \
             ~{true="--enable-cnv" false="" enable_cnv} \
             ~{true="--omit-members" false="" omit_members} \
             ~{true="--default-no-call" false="" default_no_call} \
+            ~{"--variant-prefix " + variant_prefix} \
             ~{"--algorithm " + algorithm} \
             ~{"--defrag-padding-fraction " + defrag_padding_fraction} \
             ~{"--defrag-sample-overlap " + defrag_sample_overlap} \
             ~{"--depth-sample-overlap " + depth_sample_overlap} \
             ~{"--depth-interval-overlap " + depth_interval_overlap} \
+            ~{"--depth-size-similarity " + depth_size_similarity} \
             ~{"--depth-breakend-window " + depth_breakend_window} \
             ~{"--mixed-sample-overlap " + mixed_sample_overlap} \
             ~{"--mixed-interval-overlap " + mixed_interval_overlap} \
+            ~{"--mixed-size-similarity " + mixed_size_similarity} \
             ~{"--mixed-breakend-window " + mixed_breakend_window} \
             ~{"--pesr-sample-overlap " + pesr_sample_overlap} \
             ~{"--pesr-interval-overlap " + pesr_interval_overlap} \
+            ~{"--pesr-size-similarity " + pesr_size_similarity} \
             ~{"--pesr-breakend-window " + pesr_breakend_window} \
             ~{"--insertion-length-summary-strategy " + insertion_length_summary_strategy} \
             ~{"--breakpoint-summary-strategy " + breakpoint_summary_strategy} \
@@ -333,6 +339,7 @@ task CreatePloidyTableFromPed {
         File ped_file
         File? script
         File contig_list
+        Boolean retain_female_chr_y = false
         String? chr_x
         String? chr_y
         String output_prefix
@@ -350,20 +357,26 @@ task CreatePloidyTableFromPed {
                                }
     RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr])
 
+    String output_file = if retain_female_chr_y then "~{output_prefix}.FEMALE_chrY_1.tsv" else "~{output_prefix}.tsv"
+
     output {
-        File out = "~{output_prefix}.tsv"
+        File out = "~{output_file}"
     }
     command <<<
         set -euo pipefail
         python ~{default="/opt/sv-pipeline/scripts/ploidy_table_from_ped.py" script} \
             --ped ~{ped_file} \
-            --out ~{output_prefix}.tsv.tmp \
+            --out tmp.tsv \
             --contigs ~{contig_list} \
             ~{"--chr-x " + chr_x} \
             ~{"--chr-y " + chr_y}
 
         # TODO : For now we retain female Y genotypes for metric generation
-        sed -e 's/\t0/\t1/g' ~{output_prefix}.tsv.tmp > ~{output_prefix}.tsv
+        if ~{retain_female_chr_y}; then
+            sed -e 's/\t0/\t1/g' tmp.tsv > ~{output_file}
+        else
+            mv tmp.tsv ~{output_file}
+        fi
     >>>
     runtime {
         cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores])
diff --git a/wdl/TasksMakeCohortVcf.wdl b/wdl/TasksMakeCohortVcf.wdl
index dba6b41e1..1cf9237d7 100644
--- a/wdl/TasksMakeCohortVcf.wdl
+++ b/wdl/TasksMakeCohortVcf.wdl
@@ -1006,86 +1006,3 @@ task ScatterVcf {
     Array[File] shards = glob("~{prefix}.shard_*.vcf.gz")
   }
 }
-
-task FixEndsRescaleGQ {
-  input {
-    File vcf
-    String prefix
-
-    Boolean? fix_ends
-    Boolean? rescale_gq
-
-    String sv_pipeline_docker
-    RuntimeAttr? runtime_attr_override
-  }
-
-  RuntimeAttr default_attr = object {
-    cpu_cores: 1,
-    mem_gb: 3.75,
-    disk_gb: ceil(10 + size(vcf, "GB") * 2),
-    boot_disk_gb: 10,
-    preemptible_tries: 3,
-    max_retries: 1
-  }
-  RuntimeAttr runtime_attr = select_first([runtime_attr_override, default_attr])
-
-  String outfile = "~{prefix}.vcf.gz"
-  Boolean fix_ends_ = select_first([fix_ends, true])
-  Boolean rescale_gq_ = select_first([rescale_gq, true])
-
-  output {
-    File out = "~{outfile}"
-    File out_idx = "~{outfile}.tbi"
-  }
-  command <<<
-
-    set -euo pipefail
-
-    python <<CODE
-    import pysam
-    import argparse
-    from math import floor
-
-
-    GQ_FIELDS = ["GQ", "PE_GQ", "SR_GQ", "RD_GQ"]
-
-
-    def fix_bad_end(record):
-      # pysam converts to 0-based half-open intervals by subtracting 1 from start, but END is unaltered
-      if record.stop < record.start + 1:
-        if record.info["SVTYPE"] == "BND" or record.info["SVTYPE"] == "CTX":
-          record.info["END2"] = record.stop  # just in case it is not already set. not needed for INS or CPX
-        record.stop = record.start + 1
-
-
-    def rescale_gq(record):
-      for sample in record.samples:
-        for gq_field in GQ_FIELDS:
-          if gq_field in record.samples[sample] and record.samples[sample][gq_field] is not None:
-            record.samples[sample][gq_field] = floor(record.samples[sample][gq_field] / 10)
-
-
-    with pysam.VariantFile("~{vcf}", 'r') as f_in, pysam.VariantFile("~{outfile}", 'w', header=f_in.header) as f_out:
-      for record in f_in:
-        if "~{fix_ends_}" == "true":
-          fix_bad_end(record)
-        if "~{rescale_gq_}" == "true":
-          rescale_gq(record)
-        f_out.write(record)
-
-    CODE
-    tabix ~{outfile}
-
-  >>>
-  runtime {
-    cpu: select_first([runtime_attr.cpu_cores, default_attr.cpu_cores])
-    memory: select_first([runtime_attr.mem_gb, default_attr.mem_gb]) + " GiB"
-    disks: "local-disk " + select_first([runtime_attr.disk_gb, default_attr.disk_gb]) + " HDD"
-    bootDiskSizeGb: select_first([runtime_attr.boot_disk_gb, default_attr.boot_disk_gb])
-    docker: sv_pipeline_docker
-    preemptible: select_first([runtime_attr.preemptible_tries, default_attr.preemptible_tries])
-    maxRetries: select_first([runtime_attr.max_retries, default_attr.max_retries])
-  }
-}
-
-