Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Add API ontology querying module #39

Merged
merged 15 commits into from
Feb 23, 2024
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/generate_all_ontology.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ name: Updates to Ontology Files
on:
push:
paths:
- "**/tools/ontology-builder/src/ontology-references/ontology_info.yml"
- "**/api/python/src/cellxgene_ontology_guide/artifacts/ontology_info.yml"
branches-ignore:
- main

Expand Down Expand Up @@ -36,7 +36,7 @@ jobs:
- name: ontology-processing
run: |
python3 ./tools/ontology-builder/src/all_ontology_generator.py
git add ./tools/ontology-builder/src/ontology-references/all_ontology.json.gz
git add ./api/python/src/cellxgene_ontology_guide/fixtures/all_ontology.json.gz
nayib-jose-gloria marked this conversation as resolved.
Show resolved Hide resolved
- name: Commit
run: |
git commit -m "AUTO: update ontologies"
Expand Down
Empty file removed api/python/__init__.py
nayib-jose-gloria marked this conversation as resolved.
Show resolved Hide resolved
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this file needed?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it was removed! unless you're saying you think it should be added back in? don't think its needed

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes remove. Sorry I thought it was just an empty file.

Empty file.
2 changes: 1 addition & 1 deletion api/python/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ license = { text = "MIT" }
readme = "README.md"
requires-python = "~= 3.11"
dependencies = [
"owlready2"
"PyYAML"
]

[project.optional-dependencies]
Expand Down
Binary file not shown.
5 changes: 5 additions & 0 deletions api/python/src/cellxgene_ontology_guide/constants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
import os

PACKAGE_ROOT = os.path.dirname(os.path.realpath(__file__))
ALL_ONTOLOGY_JSON = os.path.join(PACKAGE_ROOT, "artifacts/all_ontology.json.gz")
ONTOLOGY_INFO_YML = os.path.join(PACKAGE_ROOT, "artifacts/ontology_info.yml")
160 changes: 160 additions & 0 deletions api/python/src/cellxgene_ontology_guide/query.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,160 @@
import gzip
import json
import re
from typing import Any, Dict, List, Union

import yaml
from constants import ALL_ONTOLOGY_JSON, ONTOLOGY_INFO_YML

with gzip.open(ALL_ONTOLOGY_JSON, "rt") as f:
ONTOLOGY_DICT = json.load(f)

with open(ONTOLOGY_INFO_YML, "rt") as f:
SUPPORTED_ONTOLOGIES = yaml.safe_load(f)


def _parse_ontology_name(term_id: str) -> str:
"""
Parse the ontology name from a given term ID. If the term ID does not conform to the expected term format or is not
from an ontology supported by cellxgene-ontology-guide, raise a ValueError.

:param term_id: str ontology term to parse
:return: str name of ontology that term belongs to
"""
pattern = r"[A-Za-z]+:\d+"
if not re.match(pattern, term_id):
raise ValueError(f"{term_id} does not conform to expected regex pattern {pattern} and cannot be queried.")

ontology_name = term_id.split(":")[0]
if ontology_name not in SUPPORTED_ONTOLOGIES:
raise ValueError(f"{term_id} is not part of a supported ontology, its metadata cannot be fetched.")

return ontology_name


def get_term_ancestors(term_id: str, include_self: bool = False) -> List[str]:
"""
Get the ancestor ontology terms for a given term. If include_self is True, the term itself will be included as an
ancestor.

Example: get_term_ancestors("CL:0000005") -> ["CL:0000000", ...]

:param term_id: str ontology term to find ancestors for
:param include_self: boolean flag to include the term itself as an ancestor
:return: flattened List[str] of ancestor terms
"""
ontology_name = _parse_ontology_name(term_id)
ancestors: List[str] = ONTOLOGY_DICT[ontology_name][term_id]["ancestors"]
return ancestors + [term_id] if include_self else ancestors


def get_term_list_ancestors(term_ids: str, include_self: bool = False) -> Dict[str, List[str]]:
"""
Get the ancestor ontology terms for each term in a list. If include_self is True, the term itself will be included
as an ancestor.

Example: get_term_list_ancestors(["CL:0000003", "CL:0000005"], include_self=True) -> {
"CL:0000003": ["CL:0000003"],
"CL:0000005": ["CL:0000005", "CL:0000000", ...]
}

:param term_ids: list of str ontology terms to find ancestors for
:param include_self: boolean flag to include the term itself as an ancestor
:return: Dictionary mapping str term IDs to their respective flattened List[str] of ancestor terms. Maps to empty
list if there are no ancestors.
"""
return {term_id: get_term_ancestors(term_id, include_self) for term_id in term_ids}


def get_terms_descendants(term_ids: List[str], include_self: bool = False) -> Dict[str, List[str]]:
"""
Get the descendant ontology terms for each term in a list. If include_self is True, the term itself will be included
as a descendant.

Example: get_terms_descendants(["CL:0000003", "CL:0000005"], include_self=True) -> {
"CL:0000003": ["CL:0000003", "CL:0000004", ...],
"CL:0000005": ["CL:0000005", "CL:0002363", ...]
}

:param term_ids: list of str ontology terms to find descendants for
:param include_self: boolean flag to include the term itself as an descendant
:return: Dictionary mapping str term IDs to their respective flattened List[str] of descendant terms. Maps to empty
list if there are no descendants.
"""
descendants_dict = dict()
ontology_names = set()
for term_id in term_ids:
ontology_name = _parse_ontology_name(term_id)
descendants_dict[term_id] = [term_id] if include_self else []
ontology_names.add(ontology_name)

for ontology in ontology_names:
for candidate_descendant, candidate_metadata in ONTOLOGY_DICT[ontology].items():
for ancestor_id in descendants_dict:
if ancestor_id in candidate_metadata["ancestors"]:
descendants_dict[ancestor_id].append(candidate_descendant)

return descendants_dict


def is_term_deprecated(term_id: str) -> bool:
"""
Check if an ontology term is deprecated.

Example: is_term_deprecated("CL:0000003") -> True

:param term_id: str ontology term to check for deprecation
:return: boolean flag indicating whether the term is deprecated
"""
ontology_name = _parse_ontology_name(term_id)
is_deprecated: bool = ONTOLOGY_DICT[ontology_name][term_id].get("deprecated")
return is_deprecated


def get_term_replacement(term_id: str) -> Union[str, None]:
"""
Fetch the replacement term for a deprecated ontology term, if a replacement exists. Return None otherwise.

Example: get_term_replacement("CL:0000003") -> "CL:0000000"

:param term_id: str ontology term to check a replacement term for
:return: replacement str term ID if it exists, None otherwise
"""
ontology_name = _parse_ontology_name(term_id)
replaced_by: str = ONTOLOGY_DICT[ontology_name][term_id].get("replaced_by")
return replaced_by if replaced_by else None


def get_term_metadata(term_id: str) -> Dict[str, Any]:
"""
Fetch metadata for a given ontology term. Returns a dict with format

{"comments": ["...", ...], "term_tracker": "...", "consider": ["...", ...]}

Comments maps to List[str] of ontology curator comments
Term Tracker maps to a str url where there is discussion around this term's curation (or deprecation).
Consider maps to List[str] of alternate ontology terms to consider using instead of this term

All keys map to None if no metadata of that type is present.

:param term_id: str ontology term to fetch metadata for
:return: Dict with keys 'Comments', 'Term Tracker', and 'Consider' containing associated metadata.
"""
ontology_name = _parse_ontology_name(term_id)
return {
key: ONTOLOGY_DICT[ontology_name][term_id].get(key, None) for key in {"comments", "term_tracker", "consider"}
}


def get_term_label(term_id: str) -> str:
"""
Fetch the human-readable label for a given ontology term.

Example: get_term_label("CL:0000005") -> "fibroblast neural crest derived"

:param term_id: str ontology term to fetch label for
:return: str human-readable label for the term
"""
ontology_name = _parse_ontology_name(term_id)
label: str = ONTOLOGY_DICT[ontology_name][term_id]["label"]
return label
106 changes: 106 additions & 0 deletions api/python/tests/test_query.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
import pytest
from cellxgene_ontology_guide import query


@pytest.fixture()
def ontology_dict():
return {
"CL": {
"CL:0000000": {"ancestors": [], "label": "cell A", "deprecated": False},
"CL:0000001": {
"ancestors": ["CL:0000000"],
"label": "cell B",
"deprecated": False,
"consider": ["CL:0000004"],
},
"CL:0000002": {"ancestors": ["CL:0000000"], "label": "cell C", "deprecated": False},
"CL:0000003": {
"ancestors": ["CL:0000000"],
"label": "obsolete cell",
"deprecated": True,
"replaced_by": "CL:0000004",
"comments": ["this term was deprecated in favor of a descendant term of CL:0000001"],
"term_tracker": "http://example.com/issue/1234",
},
"CL:0000004": {"ancestors": ["CL:0000001", "CL:0000000"], "label": "cell B2", "deprecated": False},
}
}


@pytest.fixture()
def supported_ontologies():
return {"CL": {"version": "2024-01-01", "source": "http://example.com", "filetype": "owl"}}


@pytest.fixture
def module_globals_override(monkeypatch, ontology_dict, supported_ontologies):
monkeypatch.setattr(query, "ONTOLOGY_DICT", ontology_dict)
monkeypatch.setattr(query, "SUPPORTED_ONTOLOGIES", supported_ontologies)


def test_parse_ontology_name(module_globals_override):
assert query._parse_ontology_name("CL:0000001") == "CL"


def test_parse_ontology_name__wrong_format(module_globals_override):
with pytest.raises(ValueError):
query._parse_ontology_name("CL_0000001")


def test_parse_ontology_name__not_supported(module_globals_override):
with pytest.raises(ValueError):
query._parse_ontology_name("GO:0000001")


def test_get_term_ancestors(module_globals_override):
assert query.get_term_ancestors("CL:0000004") == ["CL:0000001", "CL:0000000"]
assert query.get_term_ancestors("CL:0000004", include_self=True) == ["CL:0000001", "CL:0000000", "CL:0000004"]


def test_get_term_list_ancestors(module_globals_override):
assert query.get_term_list_ancestors(["CL:0000000", "CL:0000004"]) == {
"CL:0000000": [],
"CL:0000004": ["CL:0000001", "CL:0000000"],
}
assert query.get_term_list_ancestors(["CL:0000000", "CL:0000004"], include_self=True) == {
"CL:0000000": ["CL:0000000"],
"CL:0000004": ["CL:0000001", "CL:0000000", "CL:0000004"],
}


def test_get_terms_descendants(module_globals_override):
assert query.get_terms_descendants(["CL:0000000", "CL:0000004"]) == {
"CL:0000000": ["CL:0000001", "CL:0000002", "CL:0000003", "CL:0000004"],
"CL:0000004": [],
}
assert query.get_terms_descendants(["CL:0000000", "CL:0000004"], include_self=True) == {
"CL:0000000": ["CL:0000000", "CL:0000001", "CL:0000002", "CL:0000003", "CL:0000004"],
"CL:0000004": ["CL:0000004"],
}


def test_is_term_deprecated(module_globals_override):
assert query.is_term_deprecated("CL:0000003")
assert not query.is_term_deprecated("CL:0000004")


def test_get_term_replacement(module_globals_override):
assert query.get_term_replacement("CL:0000003") == "CL:0000004"
assert query.get_term_replacement("CL:0000004") is None


def test_get_term_metadata(module_globals_override):
assert query.get_term_metadata("CL:0000003") == {
"comments": ["this term was deprecated in favor of a descendant term of CL:0000001"],
"term_tracker": "http://example.com/issue/1234",
"consider": None,
}
assert query.get_term_metadata("CL:0000001") == {
"comments": None,
"term_tracker": None,
"consider": ["CL:0000004"],
}


def test_get_term_label(module_globals_override):
assert query.get_term_label("CL:0000004") == "cell B2"
6 changes: 4 additions & 2 deletions tools/ontology-builder/src/env.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
import os

PACKAGE_ROOT = os.path.dirname(os.path.realpath(__file__))
ROOT_DIR = os.path.dirname(os.path.dirname(os.path.dirname(PACKAGE_ROOT)))
FIXTURES_ROOT = os.path.join(ROOT_DIR, "api/python/src/cellxgene_ontology_guide/artifacts")
ONTOLOGY_REF_DIR = os.path.join(PACKAGE_ROOT, "ontology-references")
RAW_ONTOLOGY_DIR = os.path.join(ONTOLOGY_REF_DIR, "raw-files")
ONTO_INFO_YAML = os.path.join(ONTOLOGY_REF_DIR, "ontology_info.yml")
PARSED_ONTOLOGIES_FILE = os.path.join(ONTOLOGY_REF_DIR, "all_ontology.json.gz")
ONTO_INFO_YAML = os.path.join(FIXTURES_ROOT, "ontology_info.yml")
PARSED_ONTOLOGIES_FILE = os.path.join(FIXTURES_ROOT, "all_ontology.json.gz")
SCHEMA_DIR = os.path.join(os.path.realpath(__file__).rsplit("/", maxsplit=4)[0], "artifact-schemas")
Loading