Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: fetch ontology term descriptions, if available #181

Merged
merged 4 commits into from
Apr 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 34 additions & 0 deletions api/python/src/cellxgene_ontology_guide/ontology_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -473,3 +473,37 @@ def map_term_labels(self, term_ids: Iterable[str]) -> Dict[str, str]:
:return: Dict[str, str] mapping term IDs to their respective human-readable labels
"""
return {term_id: self.get_term_label(term_id) for term_id in term_ids}

def get_term_description(self, term_id: str) -> Optional[str]:
"""
Fetch the description for a given ontology term.

Example
>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.get_term_description("CL:0000005")
'Any fibroblast that is deriived from the neural crest.'

:param term_id: str ontology term to fetch description for
:return: str description for the term
"""
if term_id in VALID_NON_ONTOLOGY_TERMS:
return term_id
ontology_name = self._parse_ontology_name(term_id)
description: Optional[str] = self.cxg_schema.ontology(ontology_name)[term_id].get("description", None)
return description

def map_term_descriptions(self, term_ids: List[str]) -> Dict[str, Optional[str]]:
"""
Fetch the descriptions for a given list of ontology terms.

Example
>>> from cellxgene_ontology_guide.ontology_parser import OntologyParser
>>> ontology_parser = OntologyParser()
>>> ontology_parser.map_term_descriptions(["CL:0000005", "CL:0000006"])
{'CL:0000005': 'Any fibroblast that is deriived from the neural crest.', 'CL:0000006': None}

:param term_ids: list of str ontology terms to fetch descriptions for
:return: Dict[str, str] mapping term IDs to their respective descriptions
"""
return {term_id: self.get_term_description(term_id) for term_id in term_ids}
15 changes: 14 additions & 1 deletion api/python/tests/test_ontology_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
@pytest.fixture
def ontology_dict():
return {
"CL:0000000": {"ancestors": {}, "label": "cell A", "deprecated": False},
"CL:0000000": {"ancestors": {}, "label": "cell A", "description": "This is cell A.", "deprecated": False},
"CL:0000001": {
"ancestors": {"CL:0000000": 1},
"label": "cell B",
Expand Down Expand Up @@ -221,6 +221,19 @@ def test_map_term_labels(ontology_parser):
}


def test_get_term_description(ontology_parser):
assert ontology_parser.get_term_description("CL:0000000") == "This is cell A."


def test_map_term_description(ontology_parser):
assert ontology_parser.map_term_descriptions(["CL:0000000", "CL:0000004", "unknown", "na"]) == {
"CL:0000000": "This is cell A.",
"CL:0000004": None,
"unknown": "unknown",
"na": "na",
}


def test_get_high_level_terms(ontology_parser):
high_level_terms = ["CL:0000000", "CL:0000001"]
assert ontology_parser.get_high_level_terms("CL:0000004", high_level_terms) == ["CL:0000000", "CL:0000001"]
Expand Down
4 changes: 4 additions & 0 deletions artifact-schemas/all_ontology_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,10 @@
"type": "string",
"description": "human-readable name for the ontology entry."
},
"description": {
"type": "string",
"description": "Optional description of the ontology entry."
},
"deprecated": {
"type": "boolean",
"description": "Indicates whether the ontology entry is deprecated."
Expand Down
Binary file modified ontology-assets/CL-ontology-v2024-01-04.json.gz
Binary file not shown.
Binary file modified ontology-assets/EFO-ontology-v3.62.0.json.gz
Binary file not shown.
Binary file modified ontology-assets/HANCESTRO-ontology-3.0.json.gz
Binary file not shown.
Binary file modified ontology-assets/HsapDv-ontology-11.json.gz
Binary file not shown.
Binary file modified ontology-assets/MONDO-ontology-v2024-01-03.json.gz
Binary file not shown.
Binary file modified ontology-assets/MmusDv-ontology-9.json.gz
Binary file not shown.
Binary file modified ontology-assets/PATO-ontology-v2023-05-18.json.gz
Binary file not shown.
Binary file modified ontology-assets/UBERON-ontology-v2024-01-18.json.gz
Binary file not shown.
2 changes: 1 addition & 1 deletion ontology-assets/ontology_info.json
Original file line number Diff line number Diff line change
Expand Up @@ -48,4 +48,4 @@
}
}
}
}
}
5 changes: 4 additions & 1 deletion tools/ontology-builder/src/all_ontology_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,9 +193,12 @@ def _extract_ontology_term_metadata(onto: owlready2.entity.ThingClass) -> Dict[s
# no current use-case for NCBITaxon
term_dict[term_id]["ancestors"] = {} if onto.name == "NCBITaxon" else ancestors

# Gets label
term_dict[term_id]["label"] = onto_term.label[0] if onto_term.label else ""

# optional description, if available
if getattr(onto_term, "IAO_0000115", None):
term_dict[term_id]["description"] = onto_term.IAO_0000115[0]

# Add the "deprecated" status and associated metadata if True
term_dict[term_id]["deprecated"] = False
if onto_term.deprecated and onto_term.deprecated.first():
Expand Down
Loading