From e35148d217495bb33252e7881919ff5b9ba543c4 Mon Sep 17 00:00:00 2001 From: Bento007 Date: Mon, 7 Oct 2024 17:03:08 -0700 Subject: [PATCH 01/10] prototype to support multiple suffixes --- asset-schemas/ontology_info_schema.json | 7 +++ ontology-assets/ontology_info.json | 7 ++- .../tests/test_all_ontology_generator.py | 60 +++++++++++++++++++ 3 files changed, 72 insertions(+), 2 deletions(-) diff --git a/asset-schemas/ontology_info_schema.json b/asset-schemas/ontology_info_schema.json index cded49f3..f52c75db 100644 --- a/asset-schemas/ontology_info_schema.json +++ b/asset-schemas/ontology_info_schema.json @@ -67,6 +67,13 @@ "filename": { "type": "string", "description": "name of ontology file used to build generated artifacts for this ontology data release" + }, + "additional_term_prefixes": { + "type": "array", + "items": { + "type": "string" + }, + "description": "List of additional term id prefixes to extracted from the source ontology file." } }, "required": [ diff --git a/ontology-assets/ontology_info.json b/ontology-assets/ontology_info.json index 1031c2f2..adb56e8e 100644 --- a/ontology-assets/ontology_info.json +++ b/ontology-assets/ontology_info.json @@ -12,9 +12,12 @@ "filename": "efo.owl" }, "HANCESTRO": { - "version": "3.0", + "version": "v2024-08-19", "source": "https://github.com/EBISPOT/hancestro/raw", - "filename": "hancestro.owl" + "filename": "hancestro.owl", + "additional_term_prefixes": [ + "AfPO" + ] }, "HsapDv": { "version": "v2024-05-28", diff --git a/tools/ontology-builder/tests/test_all_ontology_generator.py b/tools/ontology-builder/tests/test_all_ontology_generator.py index f1b547b1..c327e958 100644 --- a/tools/ontology-builder/tests/test_all_ontology_generator.py +++ b/tools/ontology-builder/tests/test_all_ontology_generator.py @@ -7,6 +7,7 @@ import pytest from all_ontology_generator import ( _download_ontologies, + _extract_ontology_term_metadata, _parse_ontologies, deprecate_previous_cellxgene_schema_versions, get_ontology_info_file, @@ -45,6 +46,16 @@ def mock_raw_ontology_dir(tmpdir): return str(sub_dir) +@pytest.fixture +def mock_owl(tmpdir): + import owlready2 + + onto = owlready2.get_ontology("http://example.com/ontology_name.owl") + onto.name = "FAKE" + + return onto + + def test_get_ontology_info_file_default(mock_ontology_info_file): # Call the function ontology_info = get_ontology_info_file(ontology_info_file=mock_ontology_info_file) @@ -224,3 +235,52 @@ def test_deprecate_previous_cellxgene_schema_versions(mock_datetime): deprecate_previous_cellxgene_schema_versions(ontology_info, "v1") assert ontology_info == expected_ontology_info + + +@pytest.fixture +def sample_ontology(tmp_path): + # Create a new ontology + import owlready2 + + onto = owlready2.get_ontology("http://test.org/onto.owl") + onto.name = "FOO" + + with onto: + + class FOO(owlready2.Thing): + pass + + class FOO_33208(FOO): + pass + + class TestTerm(FOO_33208): + label = ["Test Term"] + IAO_0000115 = ["Test description"] + hasExactSynonym = ["Test synonym"] + deprecated = [True] + comment = ["Deprecated term"] + IAO_0000233 = ["http://example.org/term_tracker"] + IAO_0100001 = ["http://example.org/replaced_by"] + + onto.save(file=str(tmp_path.joinpath("test_ontology.owl"))) + return onto + + +def test_extract_ontology_term_metadata(sample_ontology): + term_prefixes = ["FOO"] + result = _extract_ontology_term_metadata(sample_ontology, term_prefixes) + + expected_result = { + "FOO:TestTerm": { + "ancestors": {"FOO:33208": 1}, + "label": "Test Term", + "description": "Test description", + "synonyms": ["Test synonym"], + "deprecated": True, + "comments": ["Deprecated term"], + "term_tracker": "http://example.org/term_tracker", + "replaced_by": "example:replaced_by", + } + } + + assert result == expected_result From 563093ec72d95fcf8a06cf8539c616f8cddba31e Mon Sep 17 00:00:00 2001 From: Bento007 Date: Mon, 7 Oct 2024 17:11:58 -0700 Subject: [PATCH 02/10] prototype to support multiple suffixes --- tools/ontology-builder/src/all_ontology_generator.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/ontology-builder/src/all_ontology_generator.py b/tools/ontology-builder/src/all_ontology_generator.py index 2882faa3..b00fa987 100755 --- a/tools/ontology-builder/src/all_ontology_generator.py +++ b/tools/ontology-builder/src/all_ontology_generator.py @@ -162,7 +162,7 @@ def _get_ancestors(onto_class: owlready2.entity.ThingClass, onto_name: str) -> D } -def _extract_ontology_term_metadata(onto: owlready2.entity.ThingClass) -> Dict[str, Any]: +def _extract_ontology_term_metadata(onto: owlready2.entity.ThingClass, term_prefixes: list[str]) -> Dict[str, Any]: """ Extract relevant metadata from ontology object and save into a dictionary following our JSON Schema @@ -174,7 +174,7 @@ def _extract_ontology_term_metadata(onto: owlready2.entity.ThingClass) -> Dict[s term_id = onto_term.name.replace("_", ":") # Skip terms that are not direct children from this ontology - if onto.name != term_id.split(":")[0]: + if term_id.split(":")[0] not in term_prefixes: continue # Gets ancestors ancestors = _get_ancestors(onto_term, onto.name) @@ -266,8 +266,8 @@ def _parse_ontologies( version = ontology_info[onto.name]["version"] output_file = os.path.join(output_path, get_ontology_file_name(onto.name, version)) logging.info(f"Processing {output_file}") - - onto_dict = _extract_ontology_term_metadata(onto) + term_prefixes = [onto.name] + ontology_info[onto.name].get("additional_term_prefixes", []) + onto_dict = _extract_ontology_term_metadata(onto, term_prefixes) with gzip.GzipFile(output_file, mode="wb", mtime=0) as fp: fp.write(json.dumps(onto_dict, indent=2).encode("utf-8")) From cdf869262f808f1f5e4888f584680baef85ee5c6 Mon Sep 17 00:00:00 2001 From: Bento007 Date: Mon, 7 Oct 2024 17:12:52 -0700 Subject: [PATCH 03/10] prototype to support multiple suffixes --- tools/ontology-builder/src/all_ontology_generator.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/ontology-builder/src/all_ontology_generator.py b/tools/ontology-builder/src/all_ontology_generator.py index b00fa987..856aeacc 100755 --- a/tools/ontology-builder/src/all_ontology_generator.py +++ b/tools/ontology-builder/src/all_ontology_generator.py @@ -167,6 +167,7 @@ def _extract_ontology_term_metadata(onto: owlready2.entity.ThingClass, term_pref Extract relevant metadata from ontology object and save into a dictionary following our JSON Schema :param: onto: Ontology Object to Process + :param: term_prefixes: List of prefixes to filter out terms that are not direct children from this ontology :return: Dict[str, Any] map of ontology term IDs to pertinent metadata from ontology files """ term_dict: Dict[str, Any] = dict() From 7aa656d01e3f0e7f2517e850f82dabb81823179f Mon Sep 17 00:00:00 2001 From: Bento007 Date: Wed, 16 Oct 2024 16:43:13 -0700 Subject: [PATCH 04/10] update _get_ancestors to support allowed_ontologies change term_prefix to allowed_ontologies --- asset-schemas/ontology_info_schema.json | 2 +- ontology-assets/ontology_info.json | 4 ++-- .../src/all_ontology_generator.py | 20 +++++++++---------- .../tests/test_all_ontology_generator.py | 4 ++-- 4 files changed, 15 insertions(+), 15 deletions(-) diff --git a/asset-schemas/ontology_info_schema.json b/asset-schemas/ontology_info_schema.json index f52c75db..10cf7818 100644 --- a/asset-schemas/ontology_info_schema.json +++ b/asset-schemas/ontology_info_schema.json @@ -68,7 +68,7 @@ "type": "string", "description": "name of ontology file used to build generated artifacts for this ontology data release" }, - "additional_term_prefixes": { + "additional_ontologies": { "type": "array", "items": { "type": "string" diff --git a/ontology-assets/ontology_info.json b/ontology-assets/ontology_info.json index adb56e8e..bffea035 100644 --- a/ontology-assets/ontology_info.json +++ b/ontology-assets/ontology_info.json @@ -15,7 +15,7 @@ "version": "v2024-08-19", "source": "https://github.com/EBISPOT/hancestro/raw", "filename": "hancestro.owl", - "additional_term_prefixes": [ + "additional_ontologies": [ "AfPO" ] }, @@ -151,4 +151,4 @@ }, "deprecated_on": "2024-05-10" } -} \ No newline at end of file +} diff --git a/tools/ontology-builder/src/all_ontology_generator.py b/tools/ontology-builder/src/all_ontology_generator.py index 856aeacc..064c65d6 100755 --- a/tools/ontology-builder/src/all_ontology_generator.py +++ b/tools/ontology-builder/src/all_ontology_generator.py @@ -116,14 +116,14 @@ def _load_ontology_object(onto_file: str) -> owlready2.entity.ThingClass: return onto -def _get_ancestors(onto_class: owlready2.entity.ThingClass, onto_name: str) -> Dict[str, int]: +def _get_ancestors(onto_class: owlready2.entity.ThingClass, allowed_ontologies: list[str]) -> Dict[str, int]: """ Returns a list of unique ancestor ontology term ids of the given onto class. Only returns those belonging to ontology_name, it will format the id from the form CL_xxxx to CL:xxxx. Ancestors are returned in ascending order of distance from the given term. :param owlready2.entity.ThingClass onto_class: the class for which ancestors will be retrieved - :param str onto_name: only ancestors from this ontology will be kept + :param listp[str] allowed_ontologies: only ancestors from these ontologies will be kept :rtype List[str] :return list of ancestors (term ids), it could be empty @@ -144,7 +144,7 @@ def _get_ancestors(onto_class: owlready2.entity.ThingClass, onto_name: str) -> D ancestors[branch_ancestor_name] = min(ancestors[branch_ancestor_name], distance) else: queue.append((parent.value, distance + 1)) - if branch_ancestor_name.split(":")[0] == onto_name: + if branch_ancestor_name.split(":")[0] in allowed_ontologies: ancestors[branch_ancestor_name] = distance elif hasattr(parent, "name") and not hasattr(parent, "Classes"): parent_name = parent.name.replace("_", ":") @@ -158,16 +158,16 @@ def _get_ancestors(onto_class: owlready2.entity.ThingClass, onto_name: str) -> D return { ancestor: distance for ancestor, distance in sorted(ancestors.items(), key=lambda item: item[1]) - if ancestor.split(":")[0] == onto_name + if ancestor.split(":")[0] in allowed_ontologies } -def _extract_ontology_term_metadata(onto: owlready2.entity.ThingClass, term_prefixes: list[str]) -> Dict[str, Any]: +def _extract_ontology_term_metadata(onto: owlready2.entity.ThingClass, allowed_ontologies: list[str]) -> Dict[str, Any]: """ Extract relevant metadata from ontology object and save into a dictionary following our JSON Schema :param: onto: Ontology Object to Process - :param: term_prefixes: List of prefixes to filter out terms that are not direct children from this ontology + :param: allowed_ontologies: List of term prefixes to filter out terms that are not direct children from this ontology :return: Dict[str, Any] map of ontology term IDs to pertinent metadata from ontology files """ term_dict: Dict[str, Any] = dict() @@ -175,10 +175,10 @@ def _extract_ontology_term_metadata(onto: owlready2.entity.ThingClass, term_pref term_id = onto_term.name.replace("_", ":") # Skip terms that are not direct children from this ontology - if term_id.split(":")[0] not in term_prefixes: + if term_id.split(":")[0] not in allowed_ontologies: continue # Gets ancestors - ancestors = _get_ancestors(onto_term, onto.name) + ancestors = _get_ancestors(onto_term, allowed_ontologies) # Special Case: skip the current term if it is an NCBI Term, but not a descendant of 'NCBITaxon:33208'. if onto.name == "NCBITaxon" and "NCBITaxon:33208" not in ancestors: @@ -267,8 +267,8 @@ def _parse_ontologies( version = ontology_info[onto.name]["version"] output_file = os.path.join(output_path, get_ontology_file_name(onto.name, version)) logging.info(f"Processing {output_file}") - term_prefixes = [onto.name] + ontology_info[onto.name].get("additional_term_prefixes", []) - onto_dict = _extract_ontology_term_metadata(onto, term_prefixes) + allowed_ontologies = [onto.name] + ontology_info[onto.name].get("additional_ontologies", []) + onto_dict = _extract_ontology_term_metadata(onto, allowed_ontologies) with gzip.GzipFile(output_file, mode="wb", mtime=0) as fp: fp.write(json.dumps(onto_dict, indent=2).encode("utf-8")) diff --git a/tools/ontology-builder/tests/test_all_ontology_generator.py b/tools/ontology-builder/tests/test_all_ontology_generator.py index c327e958..b171b825 100644 --- a/tools/ontology-builder/tests/test_all_ontology_generator.py +++ b/tools/ontology-builder/tests/test_all_ontology_generator.py @@ -267,8 +267,8 @@ class TestTerm(FOO_33208): def test_extract_ontology_term_metadata(sample_ontology): - term_prefixes = ["FOO"] - result = _extract_ontology_term_metadata(sample_ontology, term_prefixes) + allowed_ontologies = ["FOO"] + result = _extract_ontology_term_metadata(sample_ontology, allowed_ontologies) expected_result = { "FOO:TestTerm": { From c0ce9b04fc69ad7b79a07ca8746d8f3856777fd5 Mon Sep 17 00:00:00 2001 From: Bento007 Date: Wed, 16 Oct 2024 16:49:06 -0700 Subject: [PATCH 05/10] update ontology_term_id_schema.json with AfPO --- asset-schemas/ontology_term_id_schema.json | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/asset-schemas/ontology_term_id_schema.json b/asset-schemas/ontology_term_id_schema.json index 1f6f0ef6..1c279d90 100644 --- a/asset-schemas/ontology_term_id_schema.json +++ b/asset-schemas/ontology_term_id_schema.json @@ -2,6 +2,10 @@ "$schema": "http://json-schema.org/draft-07/schema#", "title": "Ontology Version and Source Schema", "definitions": { + "AfPO_term_id": { + "type": "string", + "pattern": "^AfPO+:[0-9]+$" + }, "CL_term_id": { "type": "string", "pattern": "^CL+:[0-9]+$" @@ -40,6 +44,9 @@ }, "supported_term_id": { "anyOf": [ + { + "$ref": "#/definitions/AfPO_term_id" + }, { "$ref": "#/definitions/CL_term_id" }, From 777bb0e005f87fb6a0dc2e98db7528e672256f11 Mon Sep 17 00:00:00 2001 From: nayib-jose-gloria Date: Mon, 21 Oct 2024 12:07:08 -0400 Subject: [PATCH 06/10] fix ontology builder unit tests + add multi-ontology unit test --- .../tests/test_all_ontology_generator.py | 99 ++++++++++++++++--- 1 file changed, 84 insertions(+), 15 deletions(-) diff --git a/tools/ontology-builder/tests/test_all_ontology_generator.py b/tools/ontology-builder/tests/test_all_ontology_generator.py index b171b825..6b98820f 100644 --- a/tools/ontology-builder/tests/test_all_ontology_generator.py +++ b/tools/ontology-builder/tests/test_all_ontology_generator.py @@ -247,20 +247,29 @@ def sample_ontology(tmp_path): with onto: - class FOO(owlready2.Thing): - pass + class FOO_000001(owlready2.Thing): + label = ["Test Root Term"] - class FOO_33208(FOO): - pass - - class TestTerm(FOO_33208): - label = ["Test Term"] + class FOO_000002(FOO_000001): + label = ["Test Deprecated Descendant Term"] IAO_0000115 = ["Test description"] hasExactSynonym = ["Test synonym"] deprecated = [True] - comment = ["Deprecated term"] + comment = ["Deprecated term", "See Links for more details"] IAO_0000233 = ["http://example.org/term_tracker"] - IAO_0100001 = ["http://example.org/replaced_by"] + IAO_0100001 = ["http://ontology.org/FOO_000003"] + + class FOO_000003(FOO_000001): + label = ["Test Non-Deprecated Descendant Term"] + + class OOF_000001(owlready2.Thing): + label = ["Test Unrelated Different Ontology Term"] + + class OOF_000002(FOO_000001): + label = ["Test Descendant Different Ontology Term"] + + class FOO_000004(OOF_000002, FOO_000003): + label = ["Test Ontology Term With Different Ontology Ancestors"] onto.save(file=str(tmp_path.joinpath("test_ontology.owl"))) return onto @@ -271,16 +280,76 @@ def test_extract_ontology_term_metadata(sample_ontology): result = _extract_ontology_term_metadata(sample_ontology, allowed_ontologies) expected_result = { - "FOO:TestTerm": { - "ancestors": {"FOO:33208": 1}, - "label": "Test Term", + "FOO:000001": { + "ancestors": {}, + "label": "Test Root Term", + "deprecated": False, + }, + "FOO:000002": { + "ancestors": {"FOO:000001": 1}, + "label": "Test Deprecated Descendant Term", "description": "Test description", "synonyms": ["Test synonym"], "deprecated": True, - "comments": ["Deprecated term"], + "comments": ["Deprecated term", "See Links for more details"], "term_tracker": "http://example.org/term_tracker", - "replaced_by": "example:replaced_by", - } + "replaced_by": "FOO:000003", + }, + "FOO:000003": { + "ancestors": {"FOO:000001": 1}, + "label": "Test Non-Deprecated Descendant Term", + "deprecated": False, + }, + "FOO:000004": { + "ancestors": {"FOO:000001": 2, "FOO:000003": 1}, + "label": "Test Ontology Term With Different Ontology Ancestors", + "deprecated": False, + }, } assert result == expected_result + + +def test_extract_ontology_term_metadata_multiple_allowed_ontologies(sample_ontology): + allowed_ontologies = ["FOO", "OOF"] + result = _extract_ontology_term_metadata(sample_ontology, allowed_ontologies) + + expected_result = { + "FOO:000001": { + "ancestors": {}, + "label": "Test Root Term", + "deprecated": False, + }, + "FOO:000002": { + "ancestors": {"FOO:000001": 1}, + "label": "Test Deprecated Descendant Term", + "description": "Test description", + "synonyms": ["Test synonym"], + "deprecated": True, + "comments": ["Deprecated term", "See Links for more details"], + "term_tracker": "http://example.org/term_tracker", + "replaced_by": "FOO:000003", + }, + "FOO:000003": { + "ancestors": {"FOO:000001": 1}, + "label": "Test Non-Deprecated Descendant Term", + "deprecated": False, + }, + "FOO:000004": { + "ancestors": {"FOO:000001": 2, "FOO:000003": 1, "OOF:000002": 1}, + "label": "Test Ontology Term With Different Ontology Ancestors", + "deprecated": False, + }, + "OOF:000001": { + "ancestors": {}, + "label": "Test Unrelated Different Ontology Term", + "deprecated": False, + }, + "OOF:000002": { + "ancestors": {"FOO:000001": 1}, + "label": "Test Descendant Different Ontology Term", + "deprecated": False, + }, + } + + assert result == expected_result \ No newline at end of file From e78dd343ff6ab12f1a98a4190186919448ca44e1 Mon Sep 17 00:00:00 2001 From: nayib-jose-gloria Date: Mon, 21 Oct 2024 12:07:47 -0400 Subject: [PATCH 07/10] lint --- tools/ontology-builder/tests/test_all_ontology_generator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/ontology-builder/tests/test_all_ontology_generator.py b/tools/ontology-builder/tests/test_all_ontology_generator.py index 6b98820f..090cb4d9 100644 --- a/tools/ontology-builder/tests/test_all_ontology_generator.py +++ b/tools/ontology-builder/tests/test_all_ontology_generator.py @@ -352,4 +352,4 @@ def test_extract_ontology_term_metadata_multiple_allowed_ontologies(sample_ontol }, } - assert result == expected_result \ No newline at end of file + assert result == expected_result From 1a8f886e2d69ddd6e039a9c4b684166365ba3823 Mon Sep 17 00:00:00 2001 From: nayib-jose-gloria Date: Mon, 21 Oct 2024 12:28:51 -0400 Subject: [PATCH 08/10] fix codecov --- .github/workflows/push-tests.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/push-tests.yml b/.github/workflows/push-tests.yml index 0cfda347..6cfd3f5e 100644 --- a/.github/workflows/push-tests.yml +++ b/.github/workflows/push-tests.yml @@ -59,6 +59,7 @@ jobs: name: coverage-builder path: /home/runner/work/cellxgene-ontology-guide/cellxgene-ontology-guide/.coverage* retention-days: 3 + include-hidden-files: true unit-test-python-api: runs-on: ubuntu-latest @@ -92,6 +93,7 @@ jobs: name: coverage-api path: /home/runner/work/cellxgene-ontology-guide/cellxgene-ontology-guide/.coverage* retention-days: 3 + include-hidden-files: true submit-codecoverage: needs: From aae57a7540b0d9c5cb2cc0535184315478761868 Mon Sep 17 00:00:00 2001 From: nayib-jose-gloria Date: Mon, 21 Oct 2024 13:24:53 -0400 Subject: [PATCH 09/10] revert HANCESTRO test bump --- ontology-assets/ontology_info.json | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/ontology-assets/ontology_info.json b/ontology-assets/ontology_info.json index bffea035..217658b4 100644 --- a/ontology-assets/ontology_info.json +++ b/ontology-assets/ontology_info.json @@ -12,12 +12,9 @@ "filename": "efo.owl" }, "HANCESTRO": { - "version": "v2024-08-19", + "version": "3.0", "source": "https://github.com/EBISPOT/hancestro/raw", - "filename": "hancestro.owl", - "additional_ontologies": [ - "AfPO" - ] + "filename": "hancestro.owl" }, "HsapDv": { "version": "v2024-05-28", From 3949116079a0edf6c53458262fb6ca257f1b0240 Mon Sep 17 00:00:00 2001 From: nayib-jose-gloria Date: Mon, 21 Oct 2024 13:26:57 -0400 Subject: [PATCH 10/10] revert AfPo term inclusion --- asset-schemas/ontology_term_id_schema.json | 7 ------- 1 file changed, 7 deletions(-) diff --git a/asset-schemas/ontology_term_id_schema.json b/asset-schemas/ontology_term_id_schema.json index 1c279d90..1f6f0ef6 100644 --- a/asset-schemas/ontology_term_id_schema.json +++ b/asset-schemas/ontology_term_id_schema.json @@ -2,10 +2,6 @@ "$schema": "http://json-schema.org/draft-07/schema#", "title": "Ontology Version and Source Schema", "definitions": { - "AfPO_term_id": { - "type": "string", - "pattern": "^AfPO+:[0-9]+$" - }, "CL_term_id": { "type": "string", "pattern": "^CL+:[0-9]+$" @@ -44,9 +40,6 @@ }, "supported_term_id": { "anyOf": [ - { - "$ref": "#/definitions/AfPO_term_id" - }, { "$ref": "#/definitions/CL_term_id" },