Skip to content

Commit

Permalink
Fixed another bug
Browse files Browse the repository at this point in the history
  • Loading branch information
gbggrant committed Oct 22, 2024
1 parent 7e866e6 commit 35e8aaa
Show file tree
Hide file tree
Showing 4 changed files with 11 additions and 13 deletions.
4 changes: 1 addition & 3 deletions scripts/variantstore/wdl/GvsExtractAvroFilesForHail.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -196,8 +196,6 @@ task ExtractFromFilterTables {
String variants_docker
}

String vets_score_field = 'calibration_sensitivity'

parameter_meta {
avro_sibling: "Cloud path to a file that will be the sibling to the 'avro' 'directory' under which output Avro files will be written."
}
Expand All @@ -212,7 +210,7 @@ task ExtractFromFilterTables {
python3 /app/run_avro_query.py --sql "
EXPORT DATA OPTIONS(
uri='${avro_prefix}/vets_filtering_data/vets_filtering_data_*.avro', format='AVRO', compression='SNAPPY') AS
SELECT location, type as model, ref, alt, ~{vets_score_field}, yng_status
SELECT location, type as model, ref, alt, calibration_sensitivity, yng_status
FROM \`~{project_id}.~{dataset_name}.filter_set_info\`
WHERE filter_set_name = '~{filter_set_name}'
ORDER BY location
Expand Down
2 changes: 1 addition & 1 deletion scripts/variantstore/wdl/GvsUtils.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ task GetToolVersions {
# GVS generally uses the smallest `alpine` version of the Google Cloud SDK as it suffices for most tasks, but
# there are a handlful of tasks that require the larger GNU libc-based `slim`.
String cloud_sdk_slim_docker = "gcr.io/google.com/cloudsdktool/cloud-sdk:435.0.0-slim"
String variants_docker = "us-central1-docker.pkg.dev/broad-dsde-methods/gvs/variants:2024-10-21-alpine-d931b2311c9e"
String variants_docker = "us-central1-docker.pkg.dev/broad-dsde-methods/gvs/variants:2024-10-22-alpine-e7443149b8db"
String variants_nirvana_docker = "us.gcr.io/broad-dsde-methods/variantstore:nirvana_2022_10_19"
String gatk_docker = "us-central1-docker.pkg.dev/broad-dsde-methods/gvs/gatk:2024_10_10-gatkbase-1cd1f9652cb9"
String real_time_genomics_docker = "docker.io/realtimegenomics/rtg-tools:latest"
Expand Down
16 changes: 8 additions & 8 deletions scripts/variantstore/wdl/extract/import_gvs.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,7 @@ def convert_array_with_id_keys_to_dense_array(arr, ids, drop=[]):
return hl.rbind(sdict, lambda sdict: ids.map(lambda x: sdict.get(x)))

site_path = os.path.join(tmp_dir, 'site_filters.ht')
vets_path = os.path.join(tmp_dir, 'vets.ht')
vets_filter_path = os.path.join(tmp_dir, 'vets_filters.ht')

if intermediate_resume_point > 0:
info('import_gvs: skipping site and VETS filter import')
Expand All @@ -183,12 +183,12 @@ def convert_array_with_id_keys_to_dense_array(arr, ids, drop=[]):
site.write(site_path, overwrite=True)

info('import_gvs: Importing and writing VETS filter data to temporary storage')
vets = hl.import_avro(vets_filtering_data)
vets = vets.transmute(
locus=translate_locus(vets.location)
vets_filter = hl.import_avro(vets_filtering_data)
vets_filter = vets_filter.transmute(
locus=translate_locus(vets_filter.location)
)
vets = vets.key_by('locus')
vets.write(vets_path, overwrite=True)
vets_filter = vets_filter.key_by('locus')
vets_filter.write(vets_filter_path, overwrite=True)

n_samples = 0

Expand Down Expand Up @@ -329,12 +329,12 @@ def convert_array_with_id_keys_to_dense_array(arr, ids, drop=[]):

# read site and vets data with same intervals for efficient joins
site = hl.read_table(site_path, _intervals=target_final_intervals)
vets = hl.read_table(vets_path, _intervals=target_final_intervals)
vets_filter = hl.read_table(vets_filter_path, _intervals=target_final_intervals)

vd = vd.annotate_rows(filters=hl.coalesce(site[vd.locus].filters, hl.empty_set(hl.tstr)))

# vets ref/alt come in normalized individually, so need to renormalize to the dataset ref allele
vd = vd.annotate_rows(as_vets = hl.dict(vets.index(vd.locus, all_matches=True)
vd = vd.annotate_rows(as_vets = hl.dict(vets_filter.index(vd.locus, all_matches=True)
.map(lambda record: (record.alt + vd.alleles[0][hl.len(record.ref):], record.drop('ref', 'alt')))))

vd = vd.annotate_globals(truth_sensitivity_snp_threshold=truth_sensitivity_snp_threshold,
Expand Down
2 changes: 1 addition & 1 deletion scripts/variantstore/wdl/test/GvsQuickstartIntegration.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ import "GvsQuickstartHailIntegration.wdl" as QuickstartHailIntegration
import "../GvsJointVariantCalling.wdl" as JointVariantCalling
import "../GvsUtils.wdl" as Utils

# comment that is still here!
# comment that is still here!!
workflow GvsQuickstartIntegration {
input {
Expand Down

0 comments on commit 35e8aaa

Please sign in to comment.