From 11c671d4abc5afea15b5aa319a5499fef56e574b Mon Sep 17 00:00:00 2001 From: Nayib Gloria <55710092+nayib-jose-gloria@users.noreply.github.com> Date: Mon, 8 Jul 2024 14:52:44 -0400 Subject: [PATCH] feat: fetch term ID by term label (#212) --- .../ontology_parser.py | 46 +++++++++++++++++++ api/python/tests/test_ontology_parser.py | 26 +++++++++++ 2 files changed, 72 insertions(+) diff --git a/api/python/src/cellxgene_ontology_guide/ontology_parser.py b/api/python/src/cellxgene_ontology_guide/ontology_parser.py index e8c13d7e..eb3c65be 100644 --- a/api/python/src/cellxgene_ontology_guide/ontology_parser.py +++ b/api/python/src/cellxgene_ontology_guide/ontology_parser.py @@ -24,6 +24,32 @@ def __init__(self, schema_version: Optional[str] = None): is loaded. """ self.cxg_schema = CXGSchema(version=schema_version) if schema_version else CXGSchema() + self.term_label_to_id_map: Dict[str, Dict[str, str]] = { + ontology_name: dict() for ontology_name in self.cxg_schema.supported_ontologies + } + + def get_term_label_to_id_map(self, ontology_name: str) -> Dict[str, str]: + """ + Fetch the mapping of term labels to term IDs for a given ontology. Caches generated maps by ontology_name. + + Example + >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser + >>> ontology_parser = OntologyParser() + >>> ontology_parser.get_term_label_to_id_map("CL") # doctest: +SKIP + {'Label A': 'CL:0000000', ... } + + :param ontology_name: str name of ontology to get map of term labels to term IDs + """ + if ontology_name not in self.cxg_schema.supported_ontologies: + raise ValueError(f"{ontology_name} is not a supported ontology, its metadata cannot be fetched.") + + if self.term_label_to_id_map[ontology_name]: + return self.term_label_to_id_map[ontology_name] + + for term_id, term_metadata in self.cxg_schema.ontology(ontology_name).items(): + self.term_label_to_id_map[ontology_name][term_metadata["label"]] = term_id + + return self.term_label_to_id_map[ontology_name] def _parse_ontology_name(self, term_id: str) -> str: """ @@ -598,3 +624,23 @@ def map_term_synonyms(self, term_ids: List[str]) -> Dict[str, List[str]]: :return: Dict[str, List[str]] mapping term IDs to their respective synonym lists """ return {term_id: self.get_term_synonyms(term_id) for term_id in term_ids} + + def get_term_id_by_label(self, term_label: str, ontology_name: str) -> Optional[str]: + """ + Fetch the ontology term ID from a given human-readable label. Filters by ontology_name. Raises ValueError if + ontology_name is not a supported ontology. + + Returns None if term ID is not valid member of a supported ontology. + + Example + >>> from cellxgene_ontology_guide.ontology_parser import OntologyParser + >>> ontology_parser = OntologyParser() + >>> ontology_parser.get_term_id_by_label("neural crest derived fibroblast", "CL") + 'CL:0000005' + + :param term_label: str human-readable label to fetch term ID for + :param ontology_name: str name of ontology to search for term label in + :return: Optional[str] term IDs with that label, or None if the label is not found in the ontology + """ + ontology_term_label_to_id_map = self.get_term_label_to_id_map(ontology_name) + return ontology_term_label_to_id_map.get(term_label) diff --git a/api/python/tests/test_ontology_parser.py b/api/python/tests/test_ontology_parser.py index 13d5f33d..619ef24b 100644 --- a/api/python/tests/test_ontology_parser.py +++ b/api/python/tests/test_ontology_parser.py @@ -371,3 +371,29 @@ def test_get_term_graph(ontology_parser): "CL:0000006": 1, "CL:0000007": 1, } + + +def test_get_term_label_to_id_map(ontology_parser): + term_label_to_id_map_expected = { + "cell A": "CL:0000000", + "cell B": "CL:0000001", + "cell C": "CL:0000002", + "obsolete cell": "CL:0000003", + "cell BC": "CL:0000004", + "cell BC2": "CL:0000005", + "cell B2": "CL:0000006", + "cell B3": "CL:0000007", + "cell unrelated": "CL:0000008", + } + assert ontology_parser.get_term_label_to_id_map("CL") == term_label_to_id_map_expected + assert ontology_parser.term_label_to_id_map["CL"] == term_label_to_id_map_expected + + +def test_get_term_id_by_label(ontology_parser): + assert ontology_parser.get_term_id_by_label("cell A", "CL") == "CL:0000000" + assert ontology_parser.get_term_id_by_label("cell Z", "CL") is None + + +def test_get_term_id_by_label__unsupported_ontology_name(ontology_parser): + with pytest.raises(ValueError): + ontology_parser.get_term_id_by_label("gene A", "GO")