From 35e8aaa9e34acf242a768f82178d2b9179750b1e Mon Sep 17 00:00:00 2001 From: gbggrant Date: Tue, 22 Oct 2024 11:45:05 -0400 Subject: [PATCH] Fixed another bug --- .../wdl/GvsExtractAvroFilesForHail.wdl | 4 +--- scripts/variantstore/wdl/GvsUtils.wdl | 2 +- scripts/variantstore/wdl/extract/import_gvs.py | 16 ++++++++-------- .../wdl/test/GvsQuickstartIntegration.wdl | 2 +- 4 files changed, 11 insertions(+), 13 deletions(-) diff --git a/scripts/variantstore/wdl/GvsExtractAvroFilesForHail.wdl b/scripts/variantstore/wdl/GvsExtractAvroFilesForHail.wdl index b539ef10542..2b366c22d86 100644 --- a/scripts/variantstore/wdl/GvsExtractAvroFilesForHail.wdl +++ b/scripts/variantstore/wdl/GvsExtractAvroFilesForHail.wdl @@ -196,8 +196,6 @@ task ExtractFromFilterTables { String variants_docker } - String vets_score_field = 'calibration_sensitivity' - parameter_meta { avro_sibling: "Cloud path to a file that will be the sibling to the 'avro' 'directory' under which output Avro files will be written." } @@ -212,7 +210,7 @@ task ExtractFromFilterTables { python3 /app/run_avro_query.py --sql " EXPORT DATA OPTIONS( uri='${avro_prefix}/vets_filtering_data/vets_filtering_data_*.avro', format='AVRO', compression='SNAPPY') AS - SELECT location, type as model, ref, alt, ~{vets_score_field}, yng_status + SELECT location, type as model, ref, alt, calibration_sensitivity, yng_status FROM \`~{project_id}.~{dataset_name}.filter_set_info\` WHERE filter_set_name = '~{filter_set_name}' ORDER BY location diff --git a/scripts/variantstore/wdl/GvsUtils.wdl b/scripts/variantstore/wdl/GvsUtils.wdl index e0666d1aa51..b2b7a712c95 100644 --- a/scripts/variantstore/wdl/GvsUtils.wdl +++ b/scripts/variantstore/wdl/GvsUtils.wdl @@ -72,7 +72,7 @@ task GetToolVersions { # GVS generally uses the smallest `alpine` version of the Google Cloud SDK as it suffices for most tasks, but # there are a handlful of tasks that require the larger GNU libc-based `slim`. String cloud_sdk_slim_docker = "gcr.io/google.com/cloudsdktool/cloud-sdk:435.0.0-slim" - String variants_docker = "us-central1-docker.pkg.dev/broad-dsde-methods/gvs/variants:2024-10-21-alpine-d931b2311c9e" + String variants_docker = "us-central1-docker.pkg.dev/broad-dsde-methods/gvs/variants:2024-10-22-alpine-e7443149b8db" String variants_nirvana_docker = "us.gcr.io/broad-dsde-methods/variantstore:nirvana_2022_10_19" String gatk_docker = "us-central1-docker.pkg.dev/broad-dsde-methods/gvs/gatk:2024_10_10-gatkbase-1cd1f9652cb9" String real_time_genomics_docker = "docker.io/realtimegenomics/rtg-tools:latest" diff --git a/scripts/variantstore/wdl/extract/import_gvs.py b/scripts/variantstore/wdl/extract/import_gvs.py index 8a096106724..2b2aeb7bf4d 100644 --- a/scripts/variantstore/wdl/extract/import_gvs.py +++ b/scripts/variantstore/wdl/extract/import_gvs.py @@ -168,7 +168,7 @@ def convert_array_with_id_keys_to_dense_array(arr, ids, drop=[]): return hl.rbind(sdict, lambda sdict: ids.map(lambda x: sdict.get(x))) site_path = os.path.join(tmp_dir, 'site_filters.ht') - vets_path = os.path.join(tmp_dir, 'vets.ht') + vets_filter_path = os.path.join(tmp_dir, 'vets_filters.ht') if intermediate_resume_point > 0: info('import_gvs: skipping site and VETS filter import') @@ -183,12 +183,12 @@ def convert_array_with_id_keys_to_dense_array(arr, ids, drop=[]): site.write(site_path, overwrite=True) info('import_gvs: Importing and writing VETS filter data to temporary storage') - vets = hl.import_avro(vets_filtering_data) - vets = vets.transmute( - locus=translate_locus(vets.location) + vets_filter = hl.import_avro(vets_filtering_data) + vets_filter = vets_filter.transmute( + locus=translate_locus(vets_filter.location) ) - vets = vets.key_by('locus') - vets.write(vets_path, overwrite=True) + vets_filter = vets_filter.key_by('locus') + vets_filter.write(vets_filter_path, overwrite=True) n_samples = 0 @@ -329,12 +329,12 @@ def convert_array_with_id_keys_to_dense_array(arr, ids, drop=[]): # read site and vets data with same intervals for efficient joins site = hl.read_table(site_path, _intervals=target_final_intervals) - vets = hl.read_table(vets_path, _intervals=target_final_intervals) + vets_filter = hl.read_table(vets_filter_path, _intervals=target_final_intervals) vd = vd.annotate_rows(filters=hl.coalesce(site[vd.locus].filters, hl.empty_set(hl.tstr))) # vets ref/alt come in normalized individually, so need to renormalize to the dataset ref allele - vd = vd.annotate_rows(as_vets = hl.dict(vets.index(vd.locus, all_matches=True) + vd = vd.annotate_rows(as_vets = hl.dict(vets_filter.index(vd.locus, all_matches=True) .map(lambda record: (record.alt + vd.alleles[0][hl.len(record.ref):], record.drop('ref', 'alt'))))) vd = vd.annotate_globals(truth_sensitivity_snp_threshold=truth_sensitivity_snp_threshold, diff --git a/scripts/variantstore/wdl/test/GvsQuickstartIntegration.wdl b/scripts/variantstore/wdl/test/GvsQuickstartIntegration.wdl index f1631eddfbe..7f018d38b8c 100644 --- a/scripts/variantstore/wdl/test/GvsQuickstartIntegration.wdl +++ b/scripts/variantstore/wdl/test/GvsQuickstartIntegration.wdl @@ -5,7 +5,7 @@ import "GvsQuickstartHailIntegration.wdl" as QuickstartHailIntegration import "../GvsJointVariantCalling.wdl" as JointVariantCalling import "../GvsUtils.wdl" as Utils -# comment that is still here! +# comment that is still here!! workflow GvsQuickstartIntegration { input {