Skip to content

Commit

Permalink
[bio] update multi-hop properties (datacommonsorg#4712)
Browse files Browse the repository at this point in the history
- Add multi-hop properties needed for the hero queries
- Update multi-hop properties where naming has changed from data cleanup
- Update entity overview tile to link to the browser page for ease of
testing
  • Loading branch information
chejennifer authored Nov 5, 2024
1 parent 454b21f commit cf2ed57
Show file tree
Hide file tree
Showing 13 changed files with 380 additions and 325 deletions.
2 changes: 1 addition & 1 deletion deploy/nl/catalog.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ indexes:
bio_ft:
store_type: MEMORY
source_path: ../../tools/nl/embeddings/input/bio
embeddings_path: gs://datcom-nl-models/bio_ft_2024_06_24_23_40_05/embeddings.csv
embeddings_path: gs://datcom-nl-models/bio_ft_2024_11_05_09_59_39/embeddings.csv
model: ft-final-v20230717230459-all-MiniLM-L6-v2
healthcheck_query: "Gene"
base_uae_lance:
Expand Down
24 changes: 20 additions & 4 deletions server/config/nl_page/prop_titles.json
Original file line number Diff line number Diff line change
@@ -1,19 +1,35 @@
{
"<-referenceSNPClusterID{typeOf:GeneticVariantGeneAssociation}->geneSymbol": {
"<-variantID{typeOf:GeneGeneticVariantAssociation}->geneID": {
"displayName": "associated gene",
"titleFormat": "The associated genes for {entity} are"
},
"<-geneSymbol{typeOf:GeneticVariantGeneAssociation}->referenceSNPClusterID": {
"<-geneID{typeOf:GeneGeneticVariantAssociation}->variantID": {
"displayName": "associated genetic variant",
"titleFormat": "The associated genetic variants for {entity} are"
},
"<-diseaseOntologyID{typeOf:DiseaseGeneAssociation}->geneID": {
"<-diseaseID{typeOf:DiseaseGeneAssociation}->geneID": {
"displayName": "associated gene",
"titleFormat": "The associated genes for {entity} are"
},
"<-geneID{typeOf:DiseaseGeneAssociation}->diseaseOntologyID": {
"<-geneID{typeOf:DiseaseGeneAssociation}->diseaseID": {
"displayName": "associated disease",
"titleFormat": "The associated diseases for {entity} are"
},
"<-diseaseID{typeOf:DiseaseGeneticVariantAssociation}->geneticVariantID": {
"displayName": "associated genetic variant",
"titleFormat": "The associated genetic variants for {entity} are"
},
"<-geneticVariantID{typeOf:DiseaseGeneticVariantAssociation}->diseaseID": {
"displayName": "associated disease",
"titleFormat": "The associated diseases for {entity} are"
},
"<-compoundID{typeOf:ChemicalCompoundGeneticVariantAssociation}->variantID": {
"displayName": "associated genetic variant",
"titleFormat": "The associated genetic variants for {entity} are"
},
"<-variantID{typeOf:ChemicalCompoundGeneticVariantAssociation}->compoundID": {
"displayName": "associated chemical compound",
"titleFormat": "The associated chemical compounds for {entity} are"
},
"->mRNA": {
"displayName": "mRNA"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,28 +11,28 @@
"sv_matching": {
"CosineScore": [
0.35510897636413574,
0.3460872769355774,
0.3460870683193207,
0.3452577292919159,
0.344845712184906,
0.34113651514053345,
0.3384983241558075,
0.3349902033805847,
0.3316609561443329,
0.3260059356689453,
0.32424432039260864,
0.3206062316894531,
0.34484589099884033,
0.3411366045475006,
0.3384982645511627,
0.3349902629852295,
0.3316609263420105,
0.32600605487823486,
0.3242444694042206,
0.32060620188713074,
0.31426557898521423,
0.3127653896808624,
0.31262922286987305,
0.31250447034835815,
0.31177398562431335,
0.31033921241760254,
0.31007489562034607,
0.30988338589668274,
0.3080539405345917,
0.30745869874954224,
0.30719631910324097,
0.3039420247077942
0.3127654492855072,
0.3126293420791626,
0.31250452995300293,
0.31177404522895813,
0.3103388845920563,
0.3100748658180237,
0.3098835051059723,
0.3080541491508484,
0.3074587285518646,
0.3071962296962738,
0.30394214391708374
],
"MultiSV": {},
"Query": "what is the phylum of",
Expand Down Expand Up @@ -64,39 +64,42 @@
},
"props_matching": {
"CosineScore": [
0.9999998807907104,
0.4752839505672455,
0.36154165863990784,
0.34200993180274963,
0.3293309211730957,
0.31845176219940186,
0.31466981768608093,
0.3083951473236084,
0.3079407513141632,
0.30463001132011414,
0.28270864486694336,
0.28187549114227295,
0.2808278203010559,
0.2804529666900635,
0.27429836988449097,
0.27150505781173706,
0.2707182466983795,
0.26954683661460876,
0.2599008083343506,
0.2599008083343506,
0.24827907979488373,
0.24662044644355774,
0.24632112681865692,
0.24616354703903198,
0.24212250113487244,
0.23135541379451752,
0.2296244502067566,
0.22773587703704834
1.0000001192092896,
0.47528401017189026,
0.3615417182445526,
0.34543079137802124,
0.34543079137802124,
0.3420097231864929,
0.3293308615684509,
0.318451464176178,
0.31466978788375854,
0.308395117521286,
0.30794060230255127,
0.3046301007270813,
0.2827085852622986,
0.281875342130661,
0.28082770109176636,
0.2804528772830963,
0.2742983400821686,
0.2715050280094147,
0.27071818709373474,
0.2695467472076416,
0.25990068912506104,
0.25990068912506104,
0.24827904999256134,
0.24662022292613983,
0.24632100760936737,
0.2461635172367096,
0.24212227761745453,
0.23135530948638916,
0.2296244353055954
],
"PROP": [
"phylum",
"chemblID",
"geneID",
"<-compoundID{typeOf:ChemicalCompoundGeneticVariantAssociation}->variantID",
"<-variantID{typeOf:ChemicalCompoundGeneticVariantAssociation}->compoundID",
"virusGenus",
"typeOfGene",
"fullName",
Expand All @@ -112,16 +115,15 @@
"virusHost",
"strandOrientation",
"ncbiDNASequenceName",
"<-geneSymbol{typeOf:GeneticVariantGeneAssociation}->referenceSNPClusterID",
"<-referenceSNPClusterID{typeOf:GeneticVariantGeneAssociation}->geneSymbol",
"<-geneID{typeOf:GeneGeneticVariantAssociation}->variantID",
"<-variantID{typeOf:GeneGeneticVariantAssociation}->geneID",
"ncbiProteinAccessionNumber",
"alleleType",
"hg38GenomicLocation",
"ofVirusSpecies",
"hg19GenomicLocation",
"ncbiTaxonID",
"antigenType",
"alleleOrigin"
"antigenType"
]
},
"query_detection_debug_logs": {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,30 +12,30 @@
"query_with_places_removed": "what types of genes are and",
"sv_matching": {
"CosineScore": [
0.5237788558006287,
0.44518929719924927,
0.4198145568370819,
0.5237791538238525,
0.4451894760131836,
0.4198145866394043,
0.41922125220298767,
0.4136667251586914,
0.4096524715423584,
0.4066111147403717,
0.40137141942977905,
0.40101921558380127,
0.41366666555404663,
0.4096527099609375,
0.4066110849380493,
0.4013715386390686,
0.4010191559791565,
0.40063756704330444,
0.3987462520599365,
0.3983607292175293,
0.39052334427833557,
0.3890331983566284,
0.38846108317375183,
0.39874646067619324,
0.39836058020591736,
0.39052361249923706,
0.38903331756591797,
0.3884609639644623,
0.3883824050426483,
0.3873300850391388,
0.3864220678806305,
0.38589635491371155,
0.385678768157959,
0.3854341208934784,
0.38533875346183777,
0.3818424642086029,
0.37970471382141113
0.3873301148414612,
0.38642188906669617,
0.3858962655067444,
0.3856789469718933,
0.3854343295097351,
0.38533878326416016,
0.38184264302253723,
0.37970495223999023
],
"MultiSV": {},
"Query": "what types of genes are and",
Expand Down Expand Up @@ -68,42 +68,49 @@
},
"props_matching": {
"CosineScore": [
0.9086108803749084,
0.7445291876792908,
0.7330227494239807,
0.7175889611244202,
0.6834693551063538,
0.9086110591888428,
0.7445294260978699,
0.7330226898193359,
0.7175887823104858,
0.6834690570831299,
0.6590196490287781,
0.6340398192405701,
0.5988121628761292,
0.595878541469574,
0.5850147604942322,
0.5810789465904236,
0.6357895731925964,
0.6340402364730835,
0.6171568036079407,
0.5988123416900635,
0.5958784818649292,
0.5850145220756531,
0.5832706689834595,
0.5810792446136475,
0.576904296875,
0.5432419776916504,
0.5399730205535889,
0.5399729013442993,
0.5320467948913574,
0.5309585928916931,
0.5142701268196106,
0.5125278830528259,
0.4610598683357239,
0.4441079795360565,
0.3982292413711548,
0.3944430947303772,
0.3922898471355438,
0.38480237126350403
0.5309585332870483,
0.5142703056335449,
0.5125279426574707,
0.46105992794036865,
0.4441080689430237,
0.39822953939437866,
0.3944427967071533,
0.39229002594947815
],
"PROP": [
"typeOfGene",
"geneticVariantFunctionalCategory",
"<-referenceSNPClusterID{typeOf:GeneticVariantGeneAssociation}->geneSymbol",
"<-geneSymbol{typeOf:GeneticVariantGeneAssociation}->referenceSNPClusterID",
"<-variantID{typeOf:GeneGeneticVariantAssociation}->geneID",
"<-geneID{typeOf:GeneGeneticVariantAssociation}->variantID",
"fullName",
"geneID",
"<-diseaseOntologyID{typeOf:DiseaseGeneAssociation}->geneID",
"<-geneID{typeOf:DiseaseGeneAssociation}->diseaseOntologyID",
"<-diseaseID{typeOf:DiseaseGeneticVariantAssociation}->geneticVariantID",
"<-diseaseID{typeOf:DiseaseGeneAssociation}->geneID",
"<-compoundID{typeOf:ChemicalCompoundGeneticVariantAssociation}->variantID",
"<-geneID{typeOf:DiseaseGeneAssociation}->diseaseID",
"genomicCoordinates",
"antigenType",
"<-variantID{typeOf:ChemicalCompoundGeneticVariantAssociation}->compoundID",
"alleleType",
"<-geneticVariantID{typeOf:DiseaseGeneticVariantAssociation}->diseaseID",
"hg19GenomicPosition",
"hasRNATranscript",
"hg19GenomicLocation",
Expand All @@ -114,8 +121,7 @@
"referenceAlleleNCBI",
"observedAllele",
"ncbiDNASequenceName",
"alleleOrigin",
"specializationOf"
"alleleOrigin"
]
},
"query_detection_debug_logs": {
Expand Down
Loading

0 comments on commit cf2ed57

Please sign in to comment.