Skip to content

Commit

Permalink
feat: prototype to support multiple prefixes (#225)
Browse files Browse the repository at this point in the history
  • Loading branch information
Bento007 authored Oct 21, 2024
1 parent 133167c commit dbcdd29
Show file tree
Hide file tree
Showing 5 changed files with 149 additions and 10 deletions.
2 changes: 2 additions & 0 deletions .github/workflows/push-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ jobs:
name: coverage-builder
path: /home/runner/work/cellxgene-ontology-guide/cellxgene-ontology-guide/.coverage*
retention-days: 3
include-hidden-files: true

unit-test-python-api:
runs-on: ubuntu-latest
Expand Down Expand Up @@ -92,6 +93,7 @@ jobs:
name: coverage-api
path: /home/runner/work/cellxgene-ontology-guide/cellxgene-ontology-guide/.coverage*
retention-days: 3
include-hidden-files: true

submit-codecoverage:
needs:
Expand Down
7 changes: 7 additions & 0 deletions asset-schemas/ontology_info_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,13 @@
"filename": {
"type": "string",
"description": "name of ontology file used to build generated artifacts for this ontology data release"
},
"additional_ontologies": {
"type": "array",
"items": {
"type": "string"
},
"description": "List of additional term id prefixes to extracted from the source ontology file."
}
},
"required": [
Expand Down
2 changes: 1 addition & 1 deletion ontology-assets/ontology_info.json
Original file line number Diff line number Diff line change
Expand Up @@ -148,4 +148,4 @@
},
"deprecated_on": "2024-05-10"
}
}
}
19 changes: 10 additions & 9 deletions tools/ontology-builder/src/all_ontology_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,14 +116,14 @@ def _load_ontology_object(onto_file: str) -> owlready2.entity.ThingClass:
return onto


def _get_ancestors(onto_class: owlready2.entity.ThingClass, onto_name: str) -> Dict[str, int]:
def _get_ancestors(onto_class: owlready2.entity.ThingClass, allowed_ontologies: list[str]) -> Dict[str, int]:
"""
Returns a list of unique ancestor ontology term ids of the given onto class. Only returns those belonging to
ontology_name, it will format the id from the form CL_xxxx to CL:xxxx. Ancestors are returned in ascending order
of distance from the given term.
:param owlready2.entity.ThingClass onto_class: the class for which ancestors will be retrieved
:param str onto_name: only ancestors from this ontology will be kept
:param listp[str] allowed_ontologies: only ancestors from these ontologies will be kept
:rtype List[str]
:return list of ancestors (term ids), it could be empty
Expand All @@ -144,7 +144,7 @@ def _get_ancestors(onto_class: owlready2.entity.ThingClass, onto_name: str) -> D
ancestors[branch_ancestor_name] = min(ancestors[branch_ancestor_name], distance)
else:
queue.append((parent.value, distance + 1))
if branch_ancestor_name.split(":")[0] == onto_name:
if branch_ancestor_name.split(":")[0] in allowed_ontologies:
ancestors[branch_ancestor_name] = distance
elif hasattr(parent, "name") and not hasattr(parent, "Classes"):
parent_name = parent.name.replace("_", ":")
Expand All @@ -158,26 +158,27 @@ def _get_ancestors(onto_class: owlready2.entity.ThingClass, onto_name: str) -> D
return {
ancestor: distance
for ancestor, distance in sorted(ancestors.items(), key=lambda item: item[1])
if ancestor.split(":")[0] == onto_name
if ancestor.split(":")[0] in allowed_ontologies
}


def _extract_ontology_term_metadata(onto: owlready2.entity.ThingClass) -> Dict[str, Any]:
def _extract_ontology_term_metadata(onto: owlready2.entity.ThingClass, allowed_ontologies: list[str]) -> Dict[str, Any]:
"""
Extract relevant metadata from ontology object and save into a dictionary following our JSON Schema
:param: onto: Ontology Object to Process
:param: allowed_ontologies: List of term prefixes to filter out terms that are not direct children from this ontology
:return: Dict[str, Any] map of ontology term IDs to pertinent metadata from ontology files
"""
term_dict: Dict[str, Any] = dict()
for onto_term in onto.classes():
term_id = onto_term.name.replace("_", ":")

# Skip terms that are not direct children from this ontology
if onto.name != term_id.split(":")[0]:
if term_id.split(":")[0] not in allowed_ontologies:
continue
# Gets ancestors
ancestors = _get_ancestors(onto_term, onto.name)
ancestors = _get_ancestors(onto_term, allowed_ontologies)

# Special Case: skip the current term if it is an NCBI Term, but not a descendant of 'NCBITaxon:33208'.
if onto.name == "NCBITaxon" and "NCBITaxon:33208" not in ancestors:
Expand Down Expand Up @@ -266,8 +267,8 @@ def _parse_ontologies(
version = ontology_info[onto.name]["version"]
output_file = os.path.join(output_path, get_ontology_file_name(onto.name, version))
logging.info(f"Processing {output_file}")

onto_dict = _extract_ontology_term_metadata(onto)
allowed_ontologies = [onto.name] + ontology_info[onto.name].get("additional_ontologies", [])
onto_dict = _extract_ontology_term_metadata(onto, allowed_ontologies)

with gzip.GzipFile(output_file, mode="wb", mtime=0) as fp:
fp.write(json.dumps(onto_dict, indent=2).encode("utf-8"))
Expand Down
129 changes: 129 additions & 0 deletions tools/ontology-builder/tests/test_all_ontology_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import pytest
from all_ontology_generator import (
_download_ontologies,
_extract_ontology_term_metadata,
_parse_ontologies,
deprecate_previous_cellxgene_schema_versions,
get_ontology_info_file,
Expand Down Expand Up @@ -45,6 +46,16 @@ def mock_raw_ontology_dir(tmpdir):
return str(sub_dir)


@pytest.fixture
def mock_owl(tmpdir):
import owlready2

onto = owlready2.get_ontology("http://example.com/ontology_name.owl")
onto.name = "FAKE"

return onto


def test_get_ontology_info_file_default(mock_ontology_info_file):
# Call the function
ontology_info = get_ontology_info_file(ontology_info_file=mock_ontology_info_file)
Expand Down Expand Up @@ -224,3 +235,121 @@ def test_deprecate_previous_cellxgene_schema_versions(mock_datetime):
deprecate_previous_cellxgene_schema_versions(ontology_info, "v1")

assert ontology_info == expected_ontology_info


@pytest.fixture
def sample_ontology(tmp_path):
# Create a new ontology
import owlready2

onto = owlready2.get_ontology("http://test.org/onto.owl")
onto.name = "FOO"

with onto:

class FOO_000001(owlready2.Thing):
label = ["Test Root Term"]

class FOO_000002(FOO_000001):
label = ["Test Deprecated Descendant Term"]
IAO_0000115 = ["Test description"]
hasExactSynonym = ["Test synonym"]
deprecated = [True]
comment = ["Deprecated term", "See Links for more details"]
IAO_0000233 = ["http://example.org/term_tracker"]
IAO_0100001 = ["http://ontology.org/FOO_000003"]

class FOO_000003(FOO_000001):
label = ["Test Non-Deprecated Descendant Term"]

class OOF_000001(owlready2.Thing):
label = ["Test Unrelated Different Ontology Term"]

class OOF_000002(FOO_000001):
label = ["Test Descendant Different Ontology Term"]

class FOO_000004(OOF_000002, FOO_000003):
label = ["Test Ontology Term With Different Ontology Ancestors"]

onto.save(file=str(tmp_path.joinpath("test_ontology.owl")))
return onto


def test_extract_ontology_term_metadata(sample_ontology):
allowed_ontologies = ["FOO"]
result = _extract_ontology_term_metadata(sample_ontology, allowed_ontologies)

expected_result = {
"FOO:000001": {
"ancestors": {},
"label": "Test Root Term",
"deprecated": False,
},
"FOO:000002": {
"ancestors": {"FOO:000001": 1},
"label": "Test Deprecated Descendant Term",
"description": "Test description",
"synonyms": ["Test synonym"],
"deprecated": True,
"comments": ["Deprecated term", "See Links for more details"],
"term_tracker": "http://example.org/term_tracker",
"replaced_by": "FOO:000003",
},
"FOO:000003": {
"ancestors": {"FOO:000001": 1},
"label": "Test Non-Deprecated Descendant Term",
"deprecated": False,
},
"FOO:000004": {
"ancestors": {"FOO:000001": 2, "FOO:000003": 1},
"label": "Test Ontology Term With Different Ontology Ancestors",
"deprecated": False,
},
}

assert result == expected_result


def test_extract_ontology_term_metadata_multiple_allowed_ontologies(sample_ontology):
allowed_ontologies = ["FOO", "OOF"]
result = _extract_ontology_term_metadata(sample_ontology, allowed_ontologies)

expected_result = {
"FOO:000001": {
"ancestors": {},
"label": "Test Root Term",
"deprecated": False,
},
"FOO:000002": {
"ancestors": {"FOO:000001": 1},
"label": "Test Deprecated Descendant Term",
"description": "Test description",
"synonyms": ["Test synonym"],
"deprecated": True,
"comments": ["Deprecated term", "See Links for more details"],
"term_tracker": "http://example.org/term_tracker",
"replaced_by": "FOO:000003",
},
"FOO:000003": {
"ancestors": {"FOO:000001": 1},
"label": "Test Non-Deprecated Descendant Term",
"deprecated": False,
},
"FOO:000004": {
"ancestors": {"FOO:000001": 2, "FOO:000003": 1, "OOF:000002": 1},
"label": "Test Ontology Term With Different Ontology Ancestors",
"deprecated": False,
},
"OOF:000001": {
"ancestors": {},
"label": "Test Unrelated Different Ontology Term",
"deprecated": False,
},
"OOF:000002": {
"ancestors": {"FOO:000001": 1},
"label": "Test Descendant Different Ontology Term",
"deprecated": False,
},
}

assert result == expected_result

0 comments on commit dbcdd29

Please sign in to comment.