Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: prototype to support multiple prefixes #225

Merged
merged 10 commits into from
Oct 21, 2024
2 changes: 2 additions & 0 deletions .github/workflows/push-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ jobs:
name: coverage-builder
path: /home/runner/work/cellxgene-ontology-guide/cellxgene-ontology-guide/.coverage*
retention-days: 3
include-hidden-files: true

unit-test-python-api:
runs-on: ubuntu-latest
Expand Down Expand Up @@ -92,6 +93,7 @@ jobs:
name: coverage-api
path: /home/runner/work/cellxgene-ontology-guide/cellxgene-ontology-guide/.coverage*
retention-days: 3
include-hidden-files: true

submit-codecoverage:
needs:
Expand Down
7 changes: 7 additions & 0 deletions asset-schemas/ontology_info_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,13 @@
"filename": {
"type": "string",
"description": "name of ontology file used to build generated artifacts for this ontology data release"
},
"additional_ontologies": {
"type": "array",
"items": {
"type": "string"
},
"description": "List of additional term id prefixes to extracted from the source ontology file."
}
},
"required": [
Expand Down
2 changes: 1 addition & 1 deletion ontology-assets/ontology_info.json
Original file line number Diff line number Diff line change
Expand Up @@ -148,4 +148,4 @@
},
"deprecated_on": "2024-05-10"
}
}
}
19 changes: 10 additions & 9 deletions tools/ontology-builder/src/all_ontology_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,14 +116,14 @@ def _load_ontology_object(onto_file: str) -> owlready2.entity.ThingClass:
return onto


def _get_ancestors(onto_class: owlready2.entity.ThingClass, onto_name: str) -> Dict[str, int]:
def _get_ancestors(onto_class: owlready2.entity.ThingClass, allowed_ontologies: list[str]) -> Dict[str, int]:
"""
Returns a list of unique ancestor ontology term ids of the given onto class. Only returns those belonging to
ontology_name, it will format the id from the form CL_xxxx to CL:xxxx. Ancestors are returned in ascending order
of distance from the given term.

:param owlready2.entity.ThingClass onto_class: the class for which ancestors will be retrieved
:param str onto_name: only ancestors from this ontology will be kept
:param listp[str] allowed_ontologies: only ancestors from these ontologies will be kept

:rtype List[str]
:return list of ancestors (term ids), it could be empty
Expand All @@ -144,7 +144,7 @@ def _get_ancestors(onto_class: owlready2.entity.ThingClass, onto_name: str) -> D
ancestors[branch_ancestor_name] = min(ancestors[branch_ancestor_name], distance)
else:
queue.append((parent.value, distance + 1))
if branch_ancestor_name.split(":")[0] == onto_name:
if branch_ancestor_name.split(":")[0] in allowed_ontologies:
ancestors[branch_ancestor_name] = distance
elif hasattr(parent, "name") and not hasattr(parent, "Classes"):
parent_name = parent.name.replace("_", ":")
Expand All @@ -158,26 +158,27 @@ def _get_ancestors(onto_class: owlready2.entity.ThingClass, onto_name: str) -> D
return {
ancestor: distance
for ancestor, distance in sorted(ancestors.items(), key=lambda item: item[1])
if ancestor.split(":")[0] == onto_name
if ancestor.split(":")[0] in allowed_ontologies
}


def _extract_ontology_term_metadata(onto: owlready2.entity.ThingClass) -> Dict[str, Any]:
def _extract_ontology_term_metadata(onto: owlready2.entity.ThingClass, allowed_ontologies: list[str]) -> Dict[str, Any]:
"""
Extract relevant metadata from ontology object and save into a dictionary following our JSON Schema

:param: onto: Ontology Object to Process
:param: allowed_ontologies: List of term prefixes to filter out terms that are not direct children from this ontology
:return: Dict[str, Any] map of ontology term IDs to pertinent metadata from ontology files
"""
term_dict: Dict[str, Any] = dict()
for onto_term in onto.classes():
term_id = onto_term.name.replace("_", ":")

# Skip terms that are not direct children from this ontology
if onto.name != term_id.split(":")[0]:
if term_id.split(":")[0] not in allowed_ontologies:
continue
# Gets ancestors
ancestors = _get_ancestors(onto_term, onto.name)
ancestors = _get_ancestors(onto_term, allowed_ontologies)

# Special Case: skip the current term if it is an NCBI Term, but not a descendant of 'NCBITaxon:33208'.
if onto.name == "NCBITaxon" and "NCBITaxon:33208" not in ancestors:
Expand Down Expand Up @@ -266,8 +267,8 @@ def _parse_ontologies(
version = ontology_info[onto.name]["version"]
output_file = os.path.join(output_path, get_ontology_file_name(onto.name, version))
logging.info(f"Processing {output_file}")

onto_dict = _extract_ontology_term_metadata(onto)
allowed_ontologies = [onto.name] + ontology_info[onto.name].get("additional_ontologies", [])
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should allowed_ontologies agree with additional_ontologies in the schema?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think allowed_ontologies == main ontology + additional ontologies makes intuitive sense as-is

onto_dict = _extract_ontology_term_metadata(onto, allowed_ontologies)

with gzip.GzipFile(output_file, mode="wb", mtime=0) as fp:
fp.write(json.dumps(onto_dict, indent=2).encode("utf-8"))
Expand Down
129 changes: 129 additions & 0 deletions tools/ontology-builder/tests/test_all_ontology_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import pytest
from all_ontology_generator import (
_download_ontologies,
_extract_ontology_term_metadata,
_parse_ontologies,
deprecate_previous_cellxgene_schema_versions,
get_ontology_info_file,
Expand Down Expand Up @@ -45,6 +46,16 @@
return str(sub_dir)


@pytest.fixture
def mock_owl(tmpdir):
import owlready2

Check warning on line 51 in tools/ontology-builder/tests/test_all_ontology_generator.py

View check run for this annotation

Codecov / codecov/patch

tools/ontology-builder/tests/test_all_ontology_generator.py#L51

Added line #L51 was not covered by tests

onto = owlready2.get_ontology("http://example.com/ontology_name.owl")
onto.name = "FAKE"

Check warning on line 54 in tools/ontology-builder/tests/test_all_ontology_generator.py

View check run for this annotation

Codecov / codecov/patch

tools/ontology-builder/tests/test_all_ontology_generator.py#L53-L54

Added lines #L53 - L54 were not covered by tests

return onto

Check warning on line 56 in tools/ontology-builder/tests/test_all_ontology_generator.py

View check run for this annotation

Codecov / codecov/patch

tools/ontology-builder/tests/test_all_ontology_generator.py#L56

Added line #L56 was not covered by tests


def test_get_ontology_info_file_default(mock_ontology_info_file):
# Call the function
ontology_info = get_ontology_info_file(ontology_info_file=mock_ontology_info_file)
Expand Down Expand Up @@ -224,3 +235,121 @@
deprecate_previous_cellxgene_schema_versions(ontology_info, "v1")

assert ontology_info == expected_ontology_info


@pytest.fixture
def sample_ontology(tmp_path):
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@nayib-jose-gloria thank you for fixing.

# Create a new ontology
import owlready2

onto = owlready2.get_ontology("http://test.org/onto.owl")
onto.name = "FOO"

with onto:

class FOO_000001(owlready2.Thing):
label = ["Test Root Term"]

class FOO_000002(FOO_000001):
label = ["Test Deprecated Descendant Term"]
IAO_0000115 = ["Test description"]
hasExactSynonym = ["Test synonym"]
deprecated = [True]
comment = ["Deprecated term", "See Links for more details"]
IAO_0000233 = ["http://example.org/term_tracker"]
IAO_0100001 = ["http://ontology.org/FOO_000003"]

class FOO_000003(FOO_000001):
label = ["Test Non-Deprecated Descendant Term"]

class OOF_000001(owlready2.Thing):
label = ["Test Unrelated Different Ontology Term"]

class OOF_000002(FOO_000001):
label = ["Test Descendant Different Ontology Term"]

class FOO_000004(OOF_000002, FOO_000003):
label = ["Test Ontology Term With Different Ontology Ancestors"]

onto.save(file=str(tmp_path.joinpath("test_ontology.owl")))
return onto


def test_extract_ontology_term_metadata(sample_ontology):
allowed_ontologies = ["FOO"]
nayib-jose-gloria marked this conversation as resolved.
Show resolved Hide resolved
result = _extract_ontology_term_metadata(sample_ontology, allowed_ontologies)

expected_result = {
"FOO:000001": {
"ancestors": {},
"label": "Test Root Term",
"deprecated": False,
},
"FOO:000002": {
"ancestors": {"FOO:000001": 1},
"label": "Test Deprecated Descendant Term",
"description": "Test description",
"synonyms": ["Test synonym"],
"deprecated": True,
"comments": ["Deprecated term", "See Links for more details"],
"term_tracker": "http://example.org/term_tracker",
"replaced_by": "FOO:000003",
},
"FOO:000003": {
"ancestors": {"FOO:000001": 1},
"label": "Test Non-Deprecated Descendant Term",
"deprecated": False,
},
"FOO:000004": {
"ancestors": {"FOO:000001": 2, "FOO:000003": 1},
"label": "Test Ontology Term With Different Ontology Ancestors",
"deprecated": False,
},
}

assert result == expected_result


def test_extract_ontology_term_metadata_multiple_allowed_ontologies(sample_ontology):
allowed_ontologies = ["FOO", "OOF"]
result = _extract_ontology_term_metadata(sample_ontology, allowed_ontologies)

expected_result = {
"FOO:000001": {
"ancestors": {},
"label": "Test Root Term",
"deprecated": False,
},
"FOO:000002": {
"ancestors": {"FOO:000001": 1},
"label": "Test Deprecated Descendant Term",
"description": "Test description",
"synonyms": ["Test synonym"],
"deprecated": True,
"comments": ["Deprecated term", "See Links for more details"],
"term_tracker": "http://example.org/term_tracker",
"replaced_by": "FOO:000003",
},
"FOO:000003": {
"ancestors": {"FOO:000001": 1},
"label": "Test Non-Deprecated Descendant Term",
"deprecated": False,
},
"FOO:000004": {
"ancestors": {"FOO:000001": 2, "FOO:000003": 1, "OOF:000002": 1},
"label": "Test Ontology Term With Different Ontology Ancestors",
"deprecated": False,
},
"OOF:000001": {
"ancestors": {},
"label": "Test Unrelated Different Ontology Term",
"deprecated": False,
},
"OOF:000002": {
"ancestors": {"FOO:000001": 1},
"label": "Test Descendant Different Ontology Term",
"deprecated": False,
},
}

assert result == expected_result
Loading