Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[bio] Update biomed property embeddings. #4721

Merged
merged 9 commits into from
Nov 11, 2024
2 changes: 1 addition & 1 deletion deploy/nl/catalog.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ indexes:
bio_ft:
store_type: MEMORY
source_path: ../../tools/nl/embeddings/input/bio
embeddings_path: gs://datcom-nl-models/bio_ft_2024_11_05_09_59_39/embeddings.csv
embeddings_path: gs://datcom-nl-models/bio_ft_2024_11_08_19_00_38/embeddings.csv
model: ft-final-v20230717230459-all-MiniLM-L6-v2
healthcheck_query: "Gene"
base_uae_lance:
Expand Down
7 changes: 5 additions & 2 deletions server/integration_tests/explore_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -761,14 +761,17 @@ def test_e2e_triple(self):
self.run_detect_and_fulfill(
'e2e_triple',
[
# ----- Context Based Queries -----
# Should have 'out' properties as answer
'What strand orientation does FGFR1 have?',
# Should use context for the entity
'what transcripts does it have',
'what genomic coordinates does it have',
# Should use context for the property
'how about for P53',
'how about for PQLC3',
# Should not use context because no entity or property found
'what animal is that found in',

# ----- Singleton Queries -----
# Should have 'in' properties as answer
'What is Betacoronavirus 1 the species of',
# Should have a chained property in the answer
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
0.314266,
0.312765,
0.312629,
0.312504,
0.312505,
0.311774,
0.310339,
0.310075,
Expand Down Expand Up @@ -75,24 +75,23 @@
0.31467,
0.308395,
0.307941,
0.30463,
0.282709,
0.281875,
0.280828,
0.280453,
0.274298,
0.271505,
0.270718,
0.269547,
0.259901,
0.259901,
0.248279,
0.247354,
0.24662,
0.246321,
0.246164,
0.242123,
0.246163,
0.238985,
0.231355,
0.229624
0.230288,
0.229624,
0.227736,
0.221732,
0.21856,
0.216932
],
"PROP": [
"phylum",
Expand All @@ -106,24 +105,23 @@
"ensemblID",
"simplifiedMolecularInputLineEntrySystem",
"geneticVariantFunctionalCategory",
"imageUrl",
"genomicCoordinates",
"chromosomeSize",
"observedAllele",
"hg38GenomicPosition",
"hg19GenomicPosition",
"virusHost",
"strandOrientation",
"ncbiDNASequenceName",
"<-geneID{typeOf:GeneGeneticVariantAssociation}->variantID",
"<-variantID{typeOf:GeneGeneticVariantAssociation}->geneID",
"ncbiProteinAccessionNumber",
"hgncID",
"alleleType",
"hg38GenomicLocation",
"ofVirusSpecies",
"hg19GenomicLocation",
"ncbiTaxonID",
"antigenType"
"hasGenomicCoordinates",
"ncbiTaxId",
"umlsConceptUniqueID",
"antigenType",
"alleleOrigin",
"<-diseaseID{typeOf:DiseaseGeneAssociation}->geneID",
"antibodyType",
"inChIKey"
]
},
"query_detection_debug_logs": {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
0.400638,
0.398746,
0.398361,
0.390523,
0.390524,
0.389033,
0.388461,
0.388382,
Expand All @@ -35,7 +35,7 @@
0.385679,
0.385434,
0.385339,
0.381842,
0.381843,
0.379705
],
"MultiSV": {},
Expand Down Expand Up @@ -77,24 +77,23 @@
0.65902,
0.63579,
0.63404,
0.617156,
0.617157,
0.598812,
0.595879,
0.585015,
0.583271,
0.581079,
0.576904,
0.543242,
0.539973,
0.532047,
0.530959,
0.51427,
0.540855,
0.538974,
0.512528,
0.46106,
0.444108,
0.398229,
0.394443,
0.39229
0.39229,
0.384802,
0.341795,
0.324578,
0.32201,
0.320837
],
"PROP": [
"typeOfGene",
Expand All @@ -107,22 +106,21 @@
"<-diseaseID{typeOf:DiseaseGeneAssociation}->geneID",
"<-compoundID{typeOf:ChemicalCompoundGeneticVariantAssociation}->variantID",
"<-geneID{typeOf:DiseaseGeneAssociation}->diseaseID",
"genomicCoordinates",
"antigenType",
"<-variantID{typeOf:ChemicalCompoundGeneticVariantAssociation}->compoundID",
"alleleType",
"<-geneticVariantID{typeOf:DiseaseGeneticVariantAssociation}->diseaseID",
"hg19GenomicPosition",
"hasRNATranscript",
"hg19GenomicLocation",
"hg38GenomicPosition",
"hg38GenomicLocation",
"hgncID",
"hasGenomicCoordinates",
"antibodyType",
"chromosomeSize",
"referenceAlleleNCBI",
"observedAllele",
"ncbiDNASequenceName",
"alleleOrigin"
"referenceAllele",
"alleleOrigin",
"specializationOf",
"subClassificationOf",
"virusGenus",
"virusHost",
"ncbiProteinAccessionNumber"
]
},
"query_detection_debug_logs": {
Expand Down
Loading
Loading