From 6c715a2d01d6bfa696d76d78003d40be3eb31f50 Mon Sep 17 00:00:00 2001 From: nayib-jose-gloria Date: Mon, 21 Oct 2024 18:30:03 -0400 Subject: [PATCH 01/18] query terms imported into a supported ontology --- .../ontology_parser.py | 28 ++- .../supported_versions.py | 8 + api/python/tests/test_ontology_parser.py | 171 ++++++++++++++++-- api/python/tests/test_supported_versions.py | 30 ++- 4 files changed, 210 insertions(+), 27 deletions(-) diff --git a/api/python/src/cellxgene_ontology_guide/ontology_parser.py b/api/python/src/cellxgene_ontology_guide/ontology_parser.py index cd83d178..d57330b3 100644 --- a/api/python/src/cellxgene_ontology_guide/ontology_parser.py +++ b/api/python/src/cellxgene_ontology_guide/ontology_parser.py @@ -40,7 +40,8 @@ def get_term_label_to_id_map(self, ontology_name: str) -> Dict[str, str]: :param ontology_name: str name of ontology to get map of term labels to term IDs """ - if ontology_name not in self.cxg_schema.supported_ontologies: + ontology_name = self._get_supported_ontology_name(ontology_name) + if not ontology_name: raise ValueError(f"{ontology_name} is not a supported ontology, its metadata cannot be fetched.") if self.term_label_to_id_map[ontology_name]: @@ -63,12 +64,33 @@ def _parse_ontology_name(self, term_id: str) -> str: if not re.match(pattern, term_id): raise ValueError(f"{term_id} does not conform to expected regex pattern {pattern} and cannot be queried.") - ontology_name = term_id.split(":")[0] - if ontology_name not in self.cxg_schema.supported_ontologies: + ontology_term_prefix = term_id.split(":")[0] + ontology_name = self._get_supported_ontology_name(ontology_term_prefix) + if not ontology_name: raise ValueError(f"{term_id} is not part of a supported ontology, its metadata cannot be fetched.") return ontology_name + def _get_supported_ontology_name(self, ontology_term_prefix: str) -> Optional[str]: + """ + Get the source ontology name for a given ontology term prefix, if it is supported by the CxG schema. + + If ontology_term_prefix is directly supported by the CxG schema, returns ontology_term_prefix. + If ontology_term_prefix is supported as an import from another ontology, returns the name of the source ontology + it is imported in. + Otherwise, returns None. + + :param ontology_term_prefix: str ontology term prefix to check + :return: str name of ontology that term belongs to, or None if it is not directly supported nor imported in + a supported ontology in the CxG schema. + """ + if ontology_term_prefix in self.cxg_schema.supported_ontologies: + return ontology_term_prefix + elif ontology_term_prefix in self.cxg_schema.imported_ontologies: + return self.cxg_schema.imported_ontologies[ontology_term_prefix] + return None + + def is_valid_term_id(self, term_id: str, ontology: Optional[str] = None) -> bool: """ Check if an ontology term ID is valid and defined in a supported ontology. If deprecated but defined diff --git a/api/python/src/cellxgene_ontology_guide/supported_versions.py b/api/python/src/cellxgene_ontology_guide/supported_versions.py index 52b10cbd..8dd1db8c 100644 --- a/api/python/src/cellxgene_ontology_guide/supported_versions.py +++ b/api/python/src/cellxgene_ontology_guide/supported_versions.py @@ -75,6 +75,11 @@ def __init__(self, version: Optional[str] = None): self.version = _version self.supported_ontologies = ontology_info[_version]["ontologies"] + self.imported_ontologies = { + imported_ontology: ontology + for ontology, info in self.supported_ontologies.items() + for imported_ontology in info.get("additional_ontologies", []) + } self.ontology_file_names: Dict[str, str] = {} self.deprecated_on = ontology_info[_version].get("deprecated_on") if self.deprecated_on: @@ -87,6 +92,9 @@ def __init__(self, version: Optional[str] = None): def ontology(self, name: str) -> Any: """Return the ontology terms for the given ontology name. Load from the file cache if available. + + Does not support "additional ontologies" of another ontology. + :param name: str name of the ontology to get the terms for :return: dict representation of the ontology terms """ diff --git a/api/python/tests/test_ontology_parser.py b/api/python/tests/test_ontology_parser.py index 619ef24b..ad24e0fa 100644 --- a/api/python/tests/test_ontology_parser.py +++ b/api/python/tests/test_ontology_parser.py @@ -42,13 +42,64 @@ def ontology_dict(): @pytest.fixture -def mock_CXGSchema(ontology_dict, mock_load_supported_versions, mock_load_ontology_file): +def ontology_dict_with_imports(): + return { + "HANCESTRO:0000000": { + "ancestors": {}, + "label": "root ancestry type", + "description": "This is a root ancestry type.", + "comments": ["this is an HANCESTRO term, not imported"], + "term_tracker": "http://example.com/issue/HANCESTRO/1234", + "synonyms": ["root ancestry synonym"], + "deprecated": False + }, + "AfPO:0000000": { + "ancestors": {"HANCESTRO:0000000": 1}, + "label": "specialized ancestry type", + "description": "This is a specialized ancestry type.", + "deprecated": False, + "comments": ["this is an AfPO term imported into HANCESTRO"], + "synonyms": ["specialized ancestry synonym"], + "term_tracker": "http://example.com/issue/AfPO/1234", + "replaced_by": "AfPO:0000001" + }, + "HANCESTRO:0000001": { + "ancestors": {"HANCESTRO:0000000": 2, "AfPO:0000000": 1}, + "label": "root ontology descendant of specialized ancestry type", + "deprecated": False + }, + } + + +@pytest.fixture +def mock_CXGSchema(ontology_dict, ontology_dict_with_imports, mock_load_supported_versions, mock_load_ontology_file): mock_load_supported_versions.return_value = { - "5.0.0": {"ontologies": {"CL": {"version": "2024-01-01", "source": "http://example.com", "filename": "cl.owl"}}} + "5.0.0": { + "ontologies": { + "CL": {"version": "2024-01-01", "source": "http://example.com", "filename": "cl.owl"}, + "HANCESTRO": { + "version": "2024-01-01", + "source": "http://example.com", + "filename": "cl.owl", + "additional_ontologies": ["AfPO"] + }, + } + } } cxg_schema = CXGSchema() - cxg_schema.ontology_file_names = {"CL": "CL-ontology-2024-01-01.json.gz"} - mock_load_ontology_file.return_value = ontology_dict + cxg_schema.ontology_file_names = { + "CL": "CL-ontology-2024-01-01.json.gz", + "HANCESTRO": "HANCESTRO-ontology-2024-01-01.json.gz" + } + + def get_mock_ontology_dict(file_name): + if "CL" in file_name: + return ontology_dict + if "HANCESTRO" in file_name: + return ontology_dict_with_imports + return None + + mock_load_ontology_file.side_effect = get_mock_ontology_dict with patch("cellxgene_ontology_guide.ontology_parser.CXGSchema", return_value=cxg_schema) as mock: yield mock @@ -62,6 +113,10 @@ def ontology_parser(mock_CXGSchema): def test_parse_ontology_name(ontology_parser): assert ontology_parser._parse_ontology_name("CL:0000001") == "CL" +@pytest.mark.parametrize("term_id", ["AfPO:0000001", "HANCESTRO:0000000"]) +def test_parse_ontology_name__imported_term(ontology_parser, term_id): + assert ontology_parser._parse_ontology_name(term_id) == "HANCESTRO" + def test_parse_ontology_name__wrong_format(ontology_parser): with pytest.raises(ValueError): @@ -74,7 +129,9 @@ def test_parse_ontology_name__not_supported(ontology_parser): @pytest.mark.parametrize( - "term_id,expected", [("CL:0000001", True), ("CL:0000003", True), ("CL:0000009", False), ("GO:0000001", False)] + "term_id,expected", [ + ("CL:0000001", True), ("CL:0000003", True), ("CL:0000009", False), ("GO:0000001", False), ("AfPO:0000000", True) + ] ) def test_is_valid_term_id(ontology_parser, term_id, expected): assert ontology_parser.is_valid_term_id(term_id) == expected @@ -82,7 +139,10 @@ def test_is_valid_term_id(ontology_parser, term_id, expected): @pytest.mark.parametrize( "term_id,ontology,expected", - [("CL:0000001", "CL", True), ("CL:0000001", "UBERON", False), ("GO:0000001", "GO", False)], + [ + ("CL:0000001", "CL", True), ("CL:0000001", "UBERON", False), ("GO:0000001", "GO", False), + ("AfPO:0000000", "HANCESTRO", True), ("AfPO:0000000", "AfPO", False), ("HANCESTRO:0000001", "AfPO", False), + ] ) def test_is_valid_term_id__with_ontology(ontology_parser, term_id, ontology, expected): assert ontology_parser.is_valid_term_id(term_id, ontology) == expected @@ -97,6 +157,8 @@ def test_get_term_ancestors(ontology_parser): "CL:0000004", ] assert ontology_parser.get_term_ancestors("unknown", include_self=True) == [] + assert ontology_parser.get_term_ancestors("AfPO:0000000") == ["HANCESTRO:0000000"] + assert ontology_parser.get_term_ancestors("HANCESTRO:0000001") == ["HANCESTRO:0000000", "AfPO:0000000"] def test_map_term_ancestors(ontology_parser): @@ -109,6 +171,10 @@ def test_map_term_ancestors(ontology_parser): "CL:0000004": ["CL:0000000", "CL:0000001", "CL:0000002", "CL:0000004"], "unknown": [], } + assert ontology_parser.map_term_ancestors(["AfPO:0000000", "HANCESTRO:0000001"]) == { + "AfPO:0000000": ["HANCESTRO:0000000"], + "HANCESTRO:0000001": ["HANCESTRO:0000000", "AfPO:0000000"], + } def test_get_term_ancestors_with_distances(ontology_parser): @@ -124,6 +190,11 @@ def test_get_term_ancestors_with_distances(ontology_parser): "CL:0000004": 0, } assert ontology_parser.get_term_ancestors_with_distances("unknown", include_self=True) == {} + assert ontology_parser.get_term_ancestors_with_distances("AfPO:0000000") == {"HANCESTRO:0000000": 1} + assert ontology_parser.get_term_ancestors_with_distances("HANCESTRO:0000001") == { + "HANCESTRO:0000000": 2, + "AfPO:0000000": 1, + } def map_term_ancestors_with_distances(ontology_parser): @@ -138,6 +209,10 @@ def map_term_ancestors_with_distances(ontology_parser): "CL:0000004": {"CL:0000000": 2, "CL:0000001": 1, "CL:0000002": 1, "CL:0000004": 0}, "unknown": {}, } + assert ontology_parser.map_term_ancestors_with_distances(["AfPO:0000000", "HANCESTRO:0000001"]) == { + "AfPO:0000000": {"HANCESTRO:0000000": 1}, + "HANCESTRO:0000001": {"HANCESTRO:0000000": 2, "AfPO:0000000": 1}, + } def test_get_term_descendants(ontology_parser): @@ -153,6 +228,8 @@ def test_get_term_descendants(ontology_parser): assert ontology_parser.get_term_descendants("CL:0000004") == [] assert ontology_parser.get_term_descendants("CL:0000004", include_self=True) == ["CL:0000004"] assert ontology_parser.get_term_descendants("na") == [] + assert ontology_parser.get_term_descendants("AfPO:0000000") == ["HANCESTRO:0000001"] + assert ontology_parser.get_term_descendants("HANCESTRO:0000000") == ["AfPO:0000000", "HANCESTRO:0000001"] def test_map_term_descendants(ontology_parser): @@ -182,16 +259,23 @@ def test_map_term_descendants(ontology_parser): "CL:0000004": ["CL:0000004"], "unknown": [], } + assert ontology_parser.map_term_descendants(["AfPO:0000000", "HANCESTRO:0000000"]) == { + "AfPO:0000000": ["HANCESTRO:0000001"], + "HANCESTRO:0000000": ["AfPO:0000000", "HANCESTRO:0000001"], + } def test_is_term_deprecated(ontology_parser): assert ontology_parser.is_term_deprecated("CL:0000003") assert not ontology_parser.is_term_deprecated("CL:0000004") + assert not ontology_parser.is_term_deprecated("AfPO:0000000") def test_get_term_replacement(ontology_parser): assert ontology_parser.get_term_replacement("CL:0000003") == "CL:0000004" assert ontology_parser.get_term_replacement("CL:0000004") is None + assert ontology_parser.get_term_replacement("HANCESTRO:0000000") is None + assert ontology_parser.get_term_replacement("AfPO:0000000") == "AfPO:0000001" def test_get_term_metadata(ontology_parser): @@ -205,16 +289,30 @@ def test_get_term_metadata(ontology_parser): "term_tracker": None, "consider": ["CL:0000004"], } + assert ontology_parser.get_term_metadata("AfPO:0000000") == { + "comments": ["this is an AfPO term imported into HANCESTRO"], + "term_tracker": "http://example.com/issue/AfPO/1234", + "consider": None, + } + assert ontology_parser.get_term_metadata("HANCESTRO:0000000") == { + "comments": ["this is an HANCESTRO term, not imported"], + "term_tracker": "http://example.com/issue/HANCESTRO/1234", + "consider": None, + } def test_get_term_label(ontology_parser): assert ontology_parser.get_term_label("CL:0000004") == "cell BC" + assert ontology_parser.get_term_label("AfPO:0000000") == "specialized ancestry type" + assert ontology_parser.get_term_label("HANCESTRO:0000000") == "root ancestry type" def test_map_term_labels(ontology_parser): - assert ontology_parser.map_term_labels(["CL:0000000", "CL:0000004", "unknown", "na"]) == { + assert ontology_parser.map_term_labels(["CL:0000000", "CL:0000004", "AfPO:0000000", "HANCESTRO:0000000", "unknown", "na"]) == { "CL:0000000": "cell A", "CL:0000004": "cell BC", + "AfPO:0000000": "specialized ancestry type", + "HANCESTRO:0000000": "root ancestry type", "unknown": "unknown", "na": "na", } @@ -222,12 +320,19 @@ def test_map_term_labels(ontology_parser): def test_get_term_description(ontology_parser): assert ontology_parser.get_term_description("CL:0000000") == "This is cell A." + assert ontology_parser.get_term_description("CL:0000004") is None + assert ontology_parser.get_term_description("HANCESTRO:0000000") == "This is a root ancestry type." + assert ontology_parser.get_term_description("AfPO:0000000") == "This is a specialized ancestry type." def test_map_term_description(ontology_parser): - assert ontology_parser.map_term_descriptions(["CL:0000000", "CL:0000004", "unknown", "na"]) == { + assert ontology_parser.map_term_descriptions( + ["CL:0000000", "CL:0000004", "AfPO:0000000", "HANCESTRO:0000000", "unknown", "na"] + ) == { "CL:0000000": "This is cell A.", "CL:0000004": None, + "AfPO:0000000": "This is a specialized ancestry type.", + "HANCESTRO:0000000": "This is a root ancestry type.", "unknown": "unknown", "na": "na", } @@ -235,12 +340,16 @@ def test_map_term_description(ontology_parser): def test_get_term_synonyms(ontology_parser): assert ontology_parser.get_term_synonyms("CL:0000001") == ["cell Beta", "cell Bravo"] + assert ontology_parser.get_term_synonyms("AfPO:0000000") == ["specialized ancestry synonym"] + assert ontology_parser.get_term_synonyms("HANCESTRO:0000000") == ["root ancestry synonym"] def test_map_term_synonyms(ontology_parser): - assert ontology_parser.map_term_synonyms(["CL:0000000", "CL:0000001", "unknown", "na"]) == { + assert ontology_parser.map_term_synonyms(["CL:0000000", "CL:0000001", "AfPO:0000000", "HANCESTRO:0000000", "unknown", "na"]) == { "CL:0000000": [], "CL:0000001": ["cell Beta", "cell Bravo"], + "AfPO:0000000": ["specialized ancestry synonym"], + "HANCESTRO:0000000": ["root ancestry synonym"], "unknown": [], "na": [], } @@ -319,7 +428,8 @@ def test_get_distance_between_terms(ontology_parser): @pytest.mark.parametrize( "term_id,expected", - [("CL:0000005", ["CL:0000001", "CL:0000002"]), ("CL:0000002", ["CL:0000000"]), ("CL:0000000", []), ("unknown", [])], + [("CL:0000005", ["CL:0000001", "CL:0000002"]), ("CL:0000002", ["CL:0000000"]), ("CL:0000000", []), ("unknown", []), + ("AfPO:0000000", ["HANCESTRO:0000000"]), ("HANCESTRO:0000001", ["AfPO:0000000"])], ) def test_get_term_parents(ontology_parser, term_id, expected): assert ontology_parser.get_term_parents(term_id) == expected @@ -327,7 +437,8 @@ def test_get_term_parents(ontology_parser, term_id, expected): @pytest.mark.parametrize( "term_id,expected", - [("CL:0000000", ["CL:0000001", "CL:0000002", "CL:0000003"]), ("CL:0000005", []), ("unknown", [])], + [("CL:0000000", ["CL:0000001", "CL:0000002", "CL:0000003"]), ("CL:0000005", []), ("unknown", []), + ("AfPO:0000000", ["HANCESTRO:0000001"]), ("HANCESTRO:0000000", ["AfPO:0000000"])], ) def test_get_term_children(ontology_parser, term_id, expected): assert ontology_parser.get_term_children(term_id) == expected @@ -373,6 +484,20 @@ def test_get_term_graph(ontology_parser): } +def test_get_term_graph__imported_ontology(ontology_parser): + graph = ontology_parser.get_term_graph("AfPO:0000000") + assert graph.to_dict() == { + "term_id": "AfPO:0000000", + "name": "specialized ancestry type", + "children": [{"term_id": "HANCESTRO:0000001", "name": "root ontology descendant of specialized ancestry type", "children": []}], + } + + assert graph.term_counter == { + "AfPO:0000000": 1, + "HANCESTRO:0000001": 1, + } + + def test_get_term_label_to_id_map(ontology_parser): term_label_to_id_map_expected = { "cell A": "CL:0000000", @@ -389,9 +514,27 @@ def test_get_term_label_to_id_map(ontology_parser): assert ontology_parser.term_label_to_id_map["CL"] == term_label_to_id_map_expected -def test_get_term_id_by_label(ontology_parser): - assert ontology_parser.get_term_id_by_label("cell A", "CL") == "CL:0000000" - assert ontology_parser.get_term_id_by_label("cell Z", "CL") is None +@pytest.mark.parametrize("ontology_name", ["AfPO", "HANCESTRO"]) +def test_get_term_label_to_id_map__imported_ontology(ontology_parser, ontology_name): + term_label_to_id_map_expected = { + "root ancestry type": "HANCESTRO:0000000", + "specialized ancestry type": "AfPO:0000000", + "root ontology descendant of specialized ancestry type": "HANCESTRO:0000001", + } + assert ontology_parser.get_term_label_to_id_map(ontology_name) == term_label_to_id_map_expected + + +@pytest.mark.parametrize( + "label,ontology_name,expected", + [ + ("cell A", "CL", "CL:0000000"), + ("cell Z", "CL", None), + ("root ancestry type", "HANCESTRO", "HANCESTRO:0000000"), + ("specialized ancestry type", "AfPO", "AfPO:0000000"), + ], +) +def test_get_term_id_by_label(ontology_parser, label, ontology_name, expected): + assert ontology_parser.get_term_id_by_label(label, ontology_name) == expected def test_get_term_id_by_label__unsupported_ontology_name(ontology_parser): diff --git a/api/python/tests/test_supported_versions.py b/api/python/tests/test_supported_versions.py index 965ff4a5..a9f3e3cd 100644 --- a/api/python/tests/test_supported_versions.py +++ b/api/python/tests/test_supported_versions.py @@ -13,14 +13,26 @@ MODULE_PATH = "cellxgene_ontology_guide.supported_versions" - @pytest.fixture -def initialized_CXGSchemaInfo(mock_load_supported_versions): - mock_load_supported_versions.return_value = { +def ontology_info_content(): + return { "5.0.0": { - "ontologies": {"CL": {"version": "v2024-01-01", "source": "http://example.com", "filename": "cl.owl"}} + "ontologies": { + "CL": {"version": "v2024-01-01", "source": "http://example.com", "filename": "cl.owl"}, + "HANCESTRO": { + "version": "v2024-01-01", + "source": "http://example.com", + "filename": "hancestro.owl", + "additional_ontologies": ["FOO", "OOF"] + }, + } } } + + +@pytest.fixture +def initialized_CXGSchemaInfo(mock_load_supported_versions, ontology_info_content): + mock_load_supported_versions.return_value = ontology_info_content return CXGSchema() @@ -77,12 +89,10 @@ def test_coerce_version(version, expected): class TestCXGSchema: - def test__init__defaults(self, mock_load_supported_versions): - support_versions = {"5.0.0": {"ontologies": {}}, "0.0.1": {"ontologies": {}}} - mock_load_supported_versions.return_value = support_versions - cxgs = CXGSchema() - assert cxgs.version == "5.0.0" - assert cxgs.supported_ontologies == support_versions["5.0.0"]["ontologies"] + def test__init__defaults(self, ontology_info_content, initialized_CXGSchemaInfo): + assert initialized_CXGSchemaInfo.version == "5.0.0" + assert initialized_CXGSchemaInfo.supported_ontologies == ontology_info_content["5.0.0"]["ontologies"] + assert initialized_CXGSchemaInfo.imported_ontologies == {"FOO": "HANCESTRO", "OOF": "HANCESTRO"} @pytest.mark.parametrize("version", ["v0.0.1", "0.0.1"]) def test__init__specific_version(self, version, mock_load_supported_versions): From 8586d8f13a676451526ebfe1eba9af7ce70be20b Mon Sep 17 00:00:00 2001 From: nayib-jose-gloria Date: Mon, 21 Oct 2024 18:39:01 -0400 Subject: [PATCH 02/18] add a couple more tests --- api/python/tests/test_ontology_parser.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/api/python/tests/test_ontology_parser.py b/api/python/tests/test_ontology_parser.py index ad24e0fa..27a362da 100644 --- a/api/python/tests/test_ontology_parser.py +++ b/api/python/tests/test_ontology_parser.py @@ -411,6 +411,11 @@ def test_get_lowest_common_ancestors(ontology_parser): # disjoint assert ontology_parser.get_lowest_common_ancestors(term_id_1="CL:0000001", term_id_2="CL:0000008") == [] + # diff ontology terms with a common ancestor + assert ontology_parser.get_lowest_common_ancestors( + term_id_1="AfPO:0000000", term_id_2="HANCESTRO:0000001" + ) == ["AfPO:0000000"] + def test_get_distance_between_terms(ontology_parser): # distance when root node is lca @@ -425,6 +430,9 @@ def test_get_distance_between_terms(ontology_parser): # disjoint distance assert ontology_parser.get_distance_between_terms(term_id_1="CL:0000001", term_id_2="CL:0000008") == -1 + # diff ontology terms + assert ontology_parser.get_distance_between_terms(term_id_1="AfPO:0000000", term_id_2="HANCESTRO:0000001") == 1 + @pytest.mark.parametrize( "term_id,expected", From 04cd72541da40427d14dd5750513dbba91a4eea6 Mon Sep 17 00:00:00 2001 From: nayib-jose-gloria Date: Tue, 22 Oct 2024 11:51:25 -0400 Subject: [PATCH 03/18] lint + mypy typing --- .../ontology_parser.py | 5 +- api/python/tests/test_ontology_parser.py | 67 +++++++++++++------ api/python/tests/test_supported_versions.py | 3 +- 3 files changed, 50 insertions(+), 25 deletions(-) diff --git a/api/python/src/cellxgene_ontology_guide/ontology_parser.py b/api/python/src/cellxgene_ontology_guide/ontology_parser.py index d57330b3..e14746b2 100644 --- a/api/python/src/cellxgene_ontology_guide/ontology_parser.py +++ b/api/python/src/cellxgene_ontology_guide/ontology_parser.py @@ -40,7 +40,7 @@ def get_term_label_to_id_map(self, ontology_name: str) -> Dict[str, str]: :param ontology_name: str name of ontology to get map of term labels to term IDs """ - ontology_name = self._get_supported_ontology_name(ontology_name) + ontology_name: Optional[str] = self._get_supported_ontology_name(ontology_name) if not ontology_name: raise ValueError(f"{ontology_name} is not a supported ontology, its metadata cannot be fetched.") @@ -65,7 +65,7 @@ def _parse_ontology_name(self, term_id: str) -> str: raise ValueError(f"{term_id} does not conform to expected regex pattern {pattern} and cannot be queried.") ontology_term_prefix = term_id.split(":")[0] - ontology_name = self._get_supported_ontology_name(ontology_term_prefix) + ontology_name: Optional[str] = self._get_supported_ontology_name(ontology_term_prefix) if not ontology_name: raise ValueError(f"{term_id} is not part of a supported ontology, its metadata cannot be fetched.") @@ -90,7 +90,6 @@ def _get_supported_ontology_name(self, ontology_term_prefix: str) -> Optional[st return self.cxg_schema.imported_ontologies[ontology_term_prefix] return None - def is_valid_term_id(self, term_id: str, ontology: Optional[str] = None) -> bool: """ Check if an ontology term ID is valid and defined in a supported ontology. If deprecated but defined diff --git a/api/python/tests/test_ontology_parser.py b/api/python/tests/test_ontology_parser.py index 27a362da..af47c5c8 100644 --- a/api/python/tests/test_ontology_parser.py +++ b/api/python/tests/test_ontology_parser.py @@ -51,7 +51,7 @@ def ontology_dict_with_imports(): "comments": ["this is an HANCESTRO term, not imported"], "term_tracker": "http://example.com/issue/HANCESTRO/1234", "synonyms": ["root ancestry synonym"], - "deprecated": False + "deprecated": False, }, "AfPO:0000000": { "ancestors": {"HANCESTRO:0000000": 1}, @@ -61,12 +61,12 @@ def ontology_dict_with_imports(): "comments": ["this is an AfPO term imported into HANCESTRO"], "synonyms": ["specialized ancestry synonym"], "term_tracker": "http://example.com/issue/AfPO/1234", - "replaced_by": "AfPO:0000001" + "replaced_by": "AfPO:0000001", }, "HANCESTRO:0000001": { "ancestors": {"HANCESTRO:0000000": 2, "AfPO:0000000": 1}, "label": "root ontology descendant of specialized ancestry type", - "deprecated": False + "deprecated": False, }, } @@ -81,7 +81,7 @@ def mock_CXGSchema(ontology_dict, ontology_dict_with_imports, mock_load_supporte "version": "2024-01-01", "source": "http://example.com", "filename": "cl.owl", - "additional_ontologies": ["AfPO"] + "additional_ontologies": ["AfPO"], }, } } @@ -89,7 +89,7 @@ def mock_CXGSchema(ontology_dict, ontology_dict_with_imports, mock_load_supporte cxg_schema = CXGSchema() cxg_schema.ontology_file_names = { "CL": "CL-ontology-2024-01-01.json.gz", - "HANCESTRO": "HANCESTRO-ontology-2024-01-01.json.gz" + "HANCESTRO": "HANCESTRO-ontology-2024-01-01.json.gz", } def get_mock_ontology_dict(file_name): @@ -113,6 +113,7 @@ def ontology_parser(mock_CXGSchema): def test_parse_ontology_name(ontology_parser): assert ontology_parser._parse_ontology_name("CL:0000001") == "CL" + @pytest.mark.parametrize("term_id", ["AfPO:0000001", "HANCESTRO:0000000"]) def test_parse_ontology_name__imported_term(ontology_parser, term_id): assert ontology_parser._parse_ontology_name(term_id) == "HANCESTRO" @@ -129,9 +130,8 @@ def test_parse_ontology_name__not_supported(ontology_parser): @pytest.mark.parametrize( - "term_id,expected", [ - ("CL:0000001", True), ("CL:0000003", True), ("CL:0000009", False), ("GO:0000001", False), ("AfPO:0000000", True) - ] + "term_id,expected", + [("CL:0000001", True), ("CL:0000003", True), ("CL:0000009", False), ("GO:0000001", False), ("AfPO:0000000", True)], ) def test_is_valid_term_id(ontology_parser, term_id, expected): assert ontology_parser.is_valid_term_id(term_id) == expected @@ -140,9 +140,13 @@ def test_is_valid_term_id(ontology_parser, term_id, expected): @pytest.mark.parametrize( "term_id,ontology,expected", [ - ("CL:0000001", "CL", True), ("CL:0000001", "UBERON", False), ("GO:0000001", "GO", False), - ("AfPO:0000000", "HANCESTRO", True), ("AfPO:0000000", "AfPO", False), ("HANCESTRO:0000001", "AfPO", False), - ] + ("CL:0000001", "CL", True), + ("CL:0000001", "UBERON", False), + ("GO:0000001", "GO", False), + ("AfPO:0000000", "HANCESTRO", True), + ("AfPO:0000000", "AfPO", False), + ("HANCESTRO:0000001", "AfPO", False), + ], ) def test_is_valid_term_id__with_ontology(ontology_parser, term_id, ontology, expected): assert ontology_parser.is_valid_term_id(term_id, ontology) == expected @@ -308,7 +312,9 @@ def test_get_term_label(ontology_parser): def test_map_term_labels(ontology_parser): - assert ontology_parser.map_term_labels(["CL:0000000", "CL:0000004", "AfPO:0000000", "HANCESTRO:0000000", "unknown", "na"]) == { + assert ontology_parser.map_term_labels( + ["CL:0000000", "CL:0000004", "AfPO:0000000", "HANCESTRO:0000000", "unknown", "na"] + ) == { "CL:0000000": "cell A", "CL:0000004": "cell BC", "AfPO:0000000": "specialized ancestry type", @@ -345,7 +351,9 @@ def test_get_term_synonyms(ontology_parser): def test_map_term_synonyms(ontology_parser): - assert ontology_parser.map_term_synonyms(["CL:0000000", "CL:0000001", "AfPO:0000000", "HANCESTRO:0000000", "unknown", "na"]) == { + assert ontology_parser.map_term_synonyms( + ["CL:0000000", "CL:0000001", "AfPO:0000000", "HANCESTRO:0000000", "unknown", "na"] + ) == { "CL:0000000": [], "CL:0000001": ["cell Beta", "cell Bravo"], "AfPO:0000000": ["specialized ancestry synonym"], @@ -412,9 +420,9 @@ def test_get_lowest_common_ancestors(ontology_parser): assert ontology_parser.get_lowest_common_ancestors(term_id_1="CL:0000001", term_id_2="CL:0000008") == [] # diff ontology terms with a common ancestor - assert ontology_parser.get_lowest_common_ancestors( - term_id_1="AfPO:0000000", term_id_2="HANCESTRO:0000001" - ) == ["AfPO:0000000"] + assert ontology_parser.get_lowest_common_ancestors(term_id_1="AfPO:0000000", term_id_2="HANCESTRO:0000001") == [ + "AfPO:0000000" + ] def test_get_distance_between_terms(ontology_parser): @@ -436,8 +444,14 @@ def test_get_distance_between_terms(ontology_parser): @pytest.mark.parametrize( "term_id,expected", - [("CL:0000005", ["CL:0000001", "CL:0000002"]), ("CL:0000002", ["CL:0000000"]), ("CL:0000000", []), ("unknown", []), - ("AfPO:0000000", ["HANCESTRO:0000000"]), ("HANCESTRO:0000001", ["AfPO:0000000"])], + [ + ("CL:0000005", ["CL:0000001", "CL:0000002"]), + ("CL:0000002", ["CL:0000000"]), + ("CL:0000000", []), + ("unknown", []), + ("AfPO:0000000", ["HANCESTRO:0000000"]), + ("HANCESTRO:0000001", ["AfPO:0000000"]), + ], ) def test_get_term_parents(ontology_parser, term_id, expected): assert ontology_parser.get_term_parents(term_id) == expected @@ -445,8 +459,13 @@ def test_get_term_parents(ontology_parser, term_id, expected): @pytest.mark.parametrize( "term_id,expected", - [("CL:0000000", ["CL:0000001", "CL:0000002", "CL:0000003"]), ("CL:0000005", []), ("unknown", []), - ("AfPO:0000000", ["HANCESTRO:0000001"]), ("HANCESTRO:0000000", ["AfPO:0000000"])], + [ + ("CL:0000000", ["CL:0000001", "CL:0000002", "CL:0000003"]), + ("CL:0000005", []), + ("unknown", []), + ("AfPO:0000000", ["HANCESTRO:0000001"]), + ("HANCESTRO:0000000", ["AfPO:0000000"]), + ], ) def test_get_term_children(ontology_parser, term_id, expected): assert ontology_parser.get_term_children(term_id) == expected @@ -497,7 +516,13 @@ def test_get_term_graph__imported_ontology(ontology_parser): assert graph.to_dict() == { "term_id": "AfPO:0000000", "name": "specialized ancestry type", - "children": [{"term_id": "HANCESTRO:0000001", "name": "root ontology descendant of specialized ancestry type", "children": []}], + "children": [ + { + "term_id": "HANCESTRO:0000001", + "name": "root ontology descendant of specialized ancestry type", + "children": [], + } + ], } assert graph.term_counter == { diff --git a/api/python/tests/test_supported_versions.py b/api/python/tests/test_supported_versions.py index a9f3e3cd..d0a1a15d 100644 --- a/api/python/tests/test_supported_versions.py +++ b/api/python/tests/test_supported_versions.py @@ -13,6 +13,7 @@ MODULE_PATH = "cellxgene_ontology_guide.supported_versions" + @pytest.fixture def ontology_info_content(): return { @@ -23,7 +24,7 @@ def ontology_info_content(): "version": "v2024-01-01", "source": "http://example.com", "filename": "hancestro.owl", - "additional_ontologies": ["FOO", "OOF"] + "additional_ontologies": ["FOO", "OOF"], }, } } From 10a16c6611f1ff7327265750deb9c80c895c021b Mon Sep 17 00:00:00 2001 From: nayib-jose-gloria Date: Tue, 22 Oct 2024 12:11:32 -0400 Subject: [PATCH 04/18] mypy --- .../cellxgene_ontology_guide/ontology_parser.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/api/python/src/cellxgene_ontology_guide/ontology_parser.py b/api/python/src/cellxgene_ontology_guide/ontology_parser.py index e14746b2..bccb973f 100644 --- a/api/python/src/cellxgene_ontology_guide/ontology_parser.py +++ b/api/python/src/cellxgene_ontology_guide/ontology_parser.py @@ -40,17 +40,17 @@ def get_term_label_to_id_map(self, ontology_name: str) -> Dict[str, str]: :param ontology_name: str name of ontology to get map of term labels to term IDs """ - ontology_name: Optional[str] = self._get_supported_ontology_name(ontology_name) - if not ontology_name: - raise ValueError(f"{ontology_name} is not a supported ontology, its metadata cannot be fetched.") + supported_ontology_name: Optional[str] = self._get_supported_ontology_name(ontology_name) + if not supported_ontology_name: + raise ValueError(f"{supported_ontology_name} is not a supported ontology, its metadata cannot be fetched.") - if self.term_label_to_id_map[ontology_name]: - return self.term_label_to_id_map[ontology_name] + if self.term_label_to_id_map[supported_ontology_name]: + return self.term_label_to_id_map[supported_ontology_name] - for term_id, term_metadata in self.cxg_schema.ontology(ontology_name).items(): - self.term_label_to_id_map[ontology_name][term_metadata["label"]] = term_id + for term_id, term_metadata in self.cxg_schema.ontology(supported_ontology_name).items(): + self.term_label_to_id_map[supported_ontology_name][term_metadata["label"]] = term_id - return self.term_label_to_id_map[ontology_name] + return self.term_label_to_id_map[supported_ontology_name] def _parse_ontology_name(self, term_id: str) -> str: """ From ccd5fbd16ff4e43de733c52cd84dd04374353d42 Mon Sep 17 00:00:00 2001 From: nayib-jose-gloria Date: Tue, 22 Oct 2024 12:20:42 -0400 Subject: [PATCH 05/18] mypy --- api/python/src/cellxgene_ontology_guide/supported_versions.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api/python/src/cellxgene_ontology_guide/supported_versions.py b/api/python/src/cellxgene_ontology_guide/supported_versions.py index 8dd1db8c..e59ef963 100644 --- a/api/python/src/cellxgene_ontology_guide/supported_versions.py +++ b/api/python/src/cellxgene_ontology_guide/supported_versions.py @@ -75,7 +75,7 @@ def __init__(self, version: Optional[str] = None): self.version = _version self.supported_ontologies = ontology_info[_version]["ontologies"] - self.imported_ontologies = { + self.imported_ontologies: Dict[str, str] = { imported_ontology: ontology for ontology, info in self.supported_ontologies.items() for imported_ontology in info.get("additional_ontologies", []) From 1d4fa93150632eafec5529e41cde708d745fcafa Mon Sep 17 00:00:00 2001 From: nayib-jose-gloria Date: Tue, 22 Oct 2024 12:29:44 -0400 Subject: [PATCH 06/18] add remaining tests --- api/python/tests/test_ontology_parser.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/api/python/tests/test_ontology_parser.py b/api/python/tests/test_ontology_parser.py index af47c5c8..63f08942 100644 --- a/api/python/tests/test_ontology_parser.py +++ b/api/python/tests/test_ontology_parser.py @@ -371,6 +371,8 @@ def test_map_term_synonyms(ontology_parser): ("CL:0000008", ["CL:0000000", "CL:0000001"], []), ("CL:0000000", ["CL:0000000", "CL:0000001"], ["CL:0000000"]), ("CL:0000001", ["CL:0000000", "CL:0000001"], ["CL:0000000", "CL:0000001"]), + ("HANCESTRO:0000001", ["HANCESTRO:0000000", "AfPO:0000000"], ["HANCESTRO:0000000", "AfPO:0000000"]), + ("AfPO:0000000", ["HANCESTRO:0000000", "HANCESTRO:0000001"], ["HANCESTRO:0000000"]), ("na", ["CL:0000000", "CL:0000001"], []), ], ) @@ -392,12 +394,21 @@ def test_get_highest_level_term(ontology_parser): assert ontology_parser.get_highest_level_term("CL:0000008", high_level_terms) is None assert ontology_parser.get_highest_level_term("na", high_level_terms) is None + assert ( + ontology_parser.get_highest_level_term("AfPO:0000000", ["HANCESTRO:0000000", "AfPO:0000000"]) + == "HANCESTRO:0000000" + ) + def test_map_highest_level_term(ontology_parser): assert ontology_parser.map_highest_level_term( term_ids=["CL:0000000", "CL:0000008", "CL:0000004"], high_level_terms=["CL:0000000", "CL:0000001"], ) == {"CL:0000000": "CL:0000000", "CL:0000008": None, "CL:0000004": "CL:0000000"} + assert ontology_parser.map_highest_level_term( + term_ids=["AfPO:0000000", "HANCESTRO:0000001"], + high_level_terms=["HANCESTRO:0000000", "AfPO:0000000"], + ) == {"AfPO:0000000": "HANCESTRO:0000000", "HANCESTRO:0000001": "HANCESTRO:0000000"} def test_get_lowest_common_ancestors(ontology_parser): From 3fb007ab0144aad073dbc544a29be16e59f921f0 Mon Sep 17 00:00:00 2001 From: nayib-jose-gloria Date: Tue, 22 Oct 2024 12:33:19 -0400 Subject: [PATCH 07/18] more lint --- api/python/src/cellxgene_ontology_guide/ontology_parser.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/api/python/src/cellxgene_ontology_guide/ontology_parser.py b/api/python/src/cellxgene_ontology_guide/ontology_parser.py index bccb973f..b94ae23c 100644 --- a/api/python/src/cellxgene_ontology_guide/ontology_parser.py +++ b/api/python/src/cellxgene_ontology_guide/ontology_parser.py @@ -86,8 +86,8 @@ def _get_supported_ontology_name(self, ontology_term_prefix: str) -> Optional[st """ if ontology_term_prefix in self.cxg_schema.supported_ontologies: return ontology_term_prefix - elif ontology_term_prefix in self.cxg_schema.imported_ontologies: - return self.cxg_schema.imported_ontologies[ontology_term_prefix] + elif supported_ontology_name := self.cxg_schema.imported_ontologies.get(ontology_term_prefix): + return supported_ontology_name return None def is_valid_term_id(self, term_id: str, ontology: Optional[str] = None) -> bool: From bda6b986b1c09f0cc2425d4a481be73c07fbaae5 Mon Sep 17 00:00:00 2001 From: nayib-jose-gloria Date: Tue, 22 Oct 2024 12:37:57 -0400 Subject: [PATCH 08/18] more mypy --- .../src/cellxgene_ontology_guide/supported_versions.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/api/python/src/cellxgene_ontology_guide/supported_versions.py b/api/python/src/cellxgene_ontology_guide/supported_versions.py index e59ef963..65b59602 100644 --- a/api/python/src/cellxgene_ontology_guide/supported_versions.py +++ b/api/python/src/cellxgene_ontology_guide/supported_versions.py @@ -57,6 +57,9 @@ class CXGSchema: """The schema version used by the class instance.""" supported_ontologies: Dict[str, Any] """A dictionary of supported ontologies for the schema version.""" + imported_ontologies: Dict[str, str] + """A dictionary of ontologies with terms imported into supported ontologies, mapped to the supported ontology + importing them.""" ontology_file_names: Dict[str, str] """A dictionary of ontology names and their corresponding file names.""" @@ -75,7 +78,7 @@ def __init__(self, version: Optional[str] = None): self.version = _version self.supported_ontologies = ontology_info[_version]["ontologies"] - self.imported_ontologies: Dict[str, str] = { + self.imported_ontologies = { imported_ontology: ontology for ontology, info in self.supported_ontologies.items() for imported_ontology in info.get("additional_ontologies", []) From 0fd4d338bd96969c6418f64903a4f56f2803b55d Mon Sep 17 00:00:00 2001 From: nayib-jose-gloria Date: Tue, 22 Oct 2024 12:47:21 -0400 Subject: [PATCH 09/18] refactor _get_supported_ontology_name for mypy --- .../src/cellxgene_ontology_guide/ontology_parser.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/api/python/src/cellxgene_ontology_guide/ontology_parser.py b/api/python/src/cellxgene_ontology_guide/ontology_parser.py index b94ae23c..82f2d8d4 100644 --- a/api/python/src/cellxgene_ontology_guide/ontology_parser.py +++ b/api/python/src/cellxgene_ontology_guide/ontology_parser.py @@ -84,11 +84,11 @@ def _get_supported_ontology_name(self, ontology_term_prefix: str) -> Optional[st :return: str name of ontology that term belongs to, or None if it is not directly supported nor imported in a supported ontology in the CxG schema. """ - if ontology_term_prefix in self.cxg_schema.supported_ontologies: - return ontology_term_prefix - elif supported_ontology_name := self.cxg_schema.imported_ontologies.get(ontology_term_prefix): + supported_ontology_name: Optional[str] = self.cxg_schema.supported_ontologies.get(ontology_term_prefix) + if supported_ontology_name: return supported_ontology_name - return None + supported_ontology_name = self.cxg_schema.imported_ontologies.get(ontology_term_prefix) + return supported_ontology_name def is_valid_term_id(self, term_id: str, ontology: Optional[str] = None) -> bool: """ From 323a906bf2f18c48369d73b954fc0c32f4f5506b Mon Sep 17 00:00:00 2001 From: nayib-jose-gloria Date: Tue, 22 Oct 2024 12:51:50 -0400 Subject: [PATCH 10/18] fix test --- api/python/src/cellxgene_ontology_guide/ontology_parser.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/api/python/src/cellxgene_ontology_guide/ontology_parser.py b/api/python/src/cellxgene_ontology_guide/ontology_parser.py index 82f2d8d4..de72093e 100644 --- a/api/python/src/cellxgene_ontology_guide/ontology_parser.py +++ b/api/python/src/cellxgene_ontology_guide/ontology_parser.py @@ -84,10 +84,9 @@ def _get_supported_ontology_name(self, ontology_term_prefix: str) -> Optional[st :return: str name of ontology that term belongs to, or None if it is not directly supported nor imported in a supported ontology in the CxG schema. """ - supported_ontology_name: Optional[str] = self.cxg_schema.supported_ontologies.get(ontology_term_prefix) - if supported_ontology_name: - return supported_ontology_name - supported_ontology_name = self.cxg_schema.imported_ontologies.get(ontology_term_prefix) + if ontology_term_prefix in self.cxg_schema.supported_ontologies: + return ontology_term_prefix + supported_ontology_name: Optional[str] = self.cxg_schema.imported_ontologies.get(ontology_term_prefix) return supported_ontology_name def is_valid_term_id(self, term_id: str, ontology: Optional[str] = None) -> bool: From 5a2525cd75ca12910385415365c73ac4e7ecccb5 Mon Sep 17 00:00:00 2001 From: nayib-jose-gloria Date: Wed, 23 Oct 2024 13:14:54 -0400 Subject: [PATCH 11/18] fix docstring language + enforce unique ontology list in ontology_info --- .../supported_versions.py | 5 +- asset-schemas/ontology_info_schema.json | 31 +------ .../src/validate_json_schemas.py | 19 +++++ .../tests/test_validate_json_schemas.py | 82 +++++++++++++++++++ 4 files changed, 108 insertions(+), 29 deletions(-) diff --git a/api/python/src/cellxgene_ontology_guide/supported_versions.py b/api/python/src/cellxgene_ontology_guide/supported_versions.py index 65b59602..af7df546 100644 --- a/api/python/src/cellxgene_ontology_guide/supported_versions.py +++ b/api/python/src/cellxgene_ontology_guide/supported_versions.py @@ -58,8 +58,9 @@ class CXGSchema: supported_ontologies: Dict[str, Any] """A dictionary of supported ontologies for the schema version.""" imported_ontologies: Dict[str, str] - """A dictionary of ontologies with terms imported into supported ontologies, mapped to the supported ontology - importing them.""" + """In our supported ontologies, the CxG schema can support terms imported from different ontologies. + This dictionary maps these 'additional ontologies' to their supported ontology name. For example, + for ZFS ontology terms imported into the ZFA ontology, imported_ontologies would be {"ZFS":"ZFA", ...}""" ontology_file_names: Dict[str, str] """A dictionary of ontology names and their corresponding file names.""" diff --git a/asset-schemas/ontology_info_schema.json b/asset-schemas/ontology_info_schema.json index 10cf7818..3f1ca762 100644 --- a/asset-schemas/ontology_info_schema.json +++ b/asset-schemas/ontology_info_schema.json @@ -14,35 +14,12 @@ }, "ontologies": { "type": "object", - "properties": { - "CL": { - "$ref": "#/definitions/ontologyEntry" - }, - "EFO": { - "$ref": "#/definitions/ontologyEntry" - }, - "HANCESTRO": { - "$ref": "#/definitions/ontologyEntry" - }, - "HsapDv": { - "$ref": "#/definitions/ontologyEntry" - }, - "MONDO": { - "$ref": "#/definitions/ontologyEntry" - }, - "MmusDv": { - "$ref": "#/definitions/ontologyEntry" - }, - "NCBITaxon": { - "$ref": "#/definitions/ontologyEntry" - }, - "UBERON": { - "$ref": "#/definitions/ontologyEntry" - }, - "PATO": { + "patternProperties": { + "^[A-Za-z0-9]+$": { "$ref": "#/definitions/ontologyEntry" } - } + }, + "additionalProperties": false } }, "required": [ diff --git a/tools/ontology-builder/src/validate_json_schemas.py b/tools/ontology-builder/src/validate_json_schemas.py index f68a39e8..0f18d705 100644 --- a/tools/ontology-builder/src/validate_json_schemas.py +++ b/tools/ontology-builder/src/validate_json_schemas.py @@ -67,12 +67,31 @@ def verify_json(schema_file_name: str, json_file_name: str, registry: Registry) try: validate(instance=data, schema=schema, registry=registry) + # custom logic for ontology_info definition + if "ontology_info" in schema_file_name: + validate_unique_ontologies(data) except Exception: logger.exception(f"Error validating {json_file_name} against {schema_file_name}") return False return True +def validate_unique_ontologies(data): + """" + Custom validation logic to check that all ontologies (including additional_ontologies) defined in ontology_info + are unique across entries + """ + for schema_version, version_info in data.items(): + all_ontologies = [] + for ontology, ontology_info in version_info["ontologies"].items(): + all_ontologies.append(ontology) + all_ontologies.extend(ontology_info.get("additional_ontologies", [])) + if len(all_ontologies) != len(set(all_ontologies)): + logger.error("Ontology entries must be unique across all ontology entries, including " + f"additional_ontologies. Duplicates found in definition for {schema_version}") + raise ValueError + + def main(path: str = env.ONTOLOGY_ASSETS_DIR) -> None: """ Verify the curated JSON lists match their respective JSON schema in asset-schemas diff --git a/tools/ontology-builder/tests/test_validate_json_schemas.py b/tools/ontology-builder/tests/test_validate_json_schemas.py index 3d23b17c..bf6c2529 100644 --- a/tools/ontology-builder/tests/test_validate_json_schemas.py +++ b/tools/ontology-builder/tests/test_validate_json_schemas.py @@ -1,5 +1,6 @@ import gzip import json +import os import pytest from referencing import Resource @@ -108,3 +109,84 @@ def test_invalid_schema(self, schema_file_fixture, tmpdir, registry_fixture): # Assert validation fails due to invalid schema assert verify_json(schema_file_fixture, str(json_file), registry_fixture) is False + + +class TestVerifyJsonCustomLogic: + @pytest.fixture + def ontology_info_schema_file_fixture(self, tmpdir): + ontology_info_schema = os.path.join( + os.path.realpath(__file__).rsplit("/", maxsplit=4)[0], "asset-schemas", "ontology_info_schema.json" + ) + with open(ontology_info_schema, "r") as f: + schema_data = json.load(f) + schema_file = tmpdir.join("ontology_info_schema.json") + with open(str(schema_file), "w") as f: + json.dump(schema_data, f) + return str(schema_file) + + @pytest.fixture + def ontology_info_registry_fixture(self, ontology_info_schema_file_fixture, tmpdir): + return register_schemas(tmpdir) + + @pytest.fixture + def ontology_info_json_data(self): + return { + "2.0.0": { + "ontologies": { + "A": { + "version": "v1", + "source": "https://example.org/ontology/download", + "filename": "a.owl", + "additional_ontologies": ["C"], + }, + "B": { + "version": "v2", + "source": "https://example.org/ontology/download", + "filename": "b.owl", + "additional_ontologies": ["D"], + }, + } + }, + "1.0.0": { + "ontologies": { + "A": { + "version": "v1", + "source": "https://example.org/ontology/download", + "filename": "a.owl", + }, + "B": { + "version": "v1", + "source": "https://example.org/ontology/download", + "filename": "b.owl", + }, + } + }, + } + + def test_validate_unique_ontologies( + self, ontology_info_json_data, ontology_info_schema_file_fixture, tmpdir, ontology_info_registry_fixture + ): + json_file = tmpdir.join("ontology_info.json") + with open(str(json_file), "w") as f: + json.dump(ontology_info_json_data, f) + + # Assert validation passes + assert verify_json(ontology_info_schema_file_fixture, str(json_file), ontology_info_registry_fixture) is True + + @pytest.mark.parametrize("additional_ontologies", [["C"], ["A"]]) + def test_validate_unique_ontologies__invalid( + self, + additional_ontologies, + ontology_info_json_data, + ontology_info_schema_file_fixture, + tmpdir, + ontology_info_registry_fixture, + ): + # Create invalid JSON data + ontology_info_json_data["2.0.0"]["ontologies"]["B"]["additional_ontologies"] = additional_ontologies + json_file = tmpdir.join("ontology_info.json") + with open(str(json_file), "w") as f: + json.dump(ontology_info_json_data, f) + + # Assert validation fails + assert verify_json(ontology_info_schema_file_fixture, str(json_file), ontology_info_registry_fixture) is False From ce481f0ad1f7911ce42654e8afef40ec0885c566 Mon Sep 17 00:00:00 2001 From: nayib-jose-gloria Date: Wed, 23 Oct 2024 13:16:22 -0400 Subject: [PATCH 12/18] lint --- tools/ontology-builder/src/validate_json_schemas.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tools/ontology-builder/src/validate_json_schemas.py b/tools/ontology-builder/src/validate_json_schemas.py index 0f18d705..7293b3d4 100644 --- a/tools/ontology-builder/src/validate_json_schemas.py +++ b/tools/ontology-builder/src/validate_json_schemas.py @@ -77,7 +77,7 @@ def verify_json(schema_file_name: str, json_file_name: str, registry: Registry) def validate_unique_ontologies(data): - """" + """ " Custom validation logic to check that all ontologies (including additional_ontologies) defined in ontology_info are unique across entries """ @@ -87,8 +87,10 @@ def validate_unique_ontologies(data): all_ontologies.append(ontology) all_ontologies.extend(ontology_info.get("additional_ontologies", [])) if len(all_ontologies) != len(set(all_ontologies)): - logger.error("Ontology entries must be unique across all ontology entries, including " - f"additional_ontologies. Duplicates found in definition for {schema_version}") + logger.error( + "Ontology entries must be unique across all ontology entries, including " + f"additional_ontologies. Duplicates found in definition for {schema_version}" + ) raise ValueError From 100268c1e9af10083c112c075447b307900ed927 Mon Sep 17 00:00:00 2001 From: nayib-jose-gloria Date: Wed, 23 Oct 2024 13:16:42 -0400 Subject: [PATCH 13/18] lint --- tools/ontology-builder/src/validate_json_schemas.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/ontology-builder/src/validate_json_schemas.py b/tools/ontology-builder/src/validate_json_schemas.py index 7293b3d4..e9628fea 100644 --- a/tools/ontology-builder/src/validate_json_schemas.py +++ b/tools/ontology-builder/src/validate_json_schemas.py @@ -76,8 +76,8 @@ def verify_json(schema_file_name: str, json_file_name: str, registry: Registry) return True -def validate_unique_ontologies(data): - """ " +def validate_unique_ontologies(data) -> None: + """ Custom validation logic to check that all ontologies (including additional_ontologies) defined in ontology_info are unique across entries """ From 973d3dcd25a25c21f202300d076fb72fd536cdc6 Mon Sep 17 00:00:00 2001 From: nayib-jose-gloria Date: Wed, 23 Oct 2024 13:48:48 -0400 Subject: [PATCH 14/18] lint --- tools/ontology-builder/src/validate_json_schemas.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/ontology-builder/src/validate_json_schemas.py b/tools/ontology-builder/src/validate_json_schemas.py index e9628fea..c86a1c91 100644 --- a/tools/ontology-builder/src/validate_json_schemas.py +++ b/tools/ontology-builder/src/validate_json_schemas.py @@ -76,7 +76,7 @@ def verify_json(schema_file_name: str, json_file_name: str, registry: Registry) return True -def validate_unique_ontologies(data) -> None: +def validate_unique_ontologies(data: dict) -> None: """ Custom validation logic to check that all ontologies (including additional_ontologies) defined in ontology_info are unique across entries From 16b896a026fda9d2c852a4bb826fdd2660ce8af1 Mon Sep 17 00:00:00 2001 From: nayib-jose-gloria Date: Wed, 23 Oct 2024 13:55:03 -0400 Subject: [PATCH 15/18] mypy --- tools/ontology-builder/src/validate_json_schemas.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/ontology-builder/src/validate_json_schemas.py b/tools/ontology-builder/src/validate_json_schemas.py index c86a1c91..2164c054 100644 --- a/tools/ontology-builder/src/validate_json_schemas.py +++ b/tools/ontology-builder/src/validate_json_schemas.py @@ -3,7 +3,7 @@ import logging import os.path import sys -from typing import Iterable, Tuple +from typing import Any, Dict, Iterable, Tuple import env from jsonschema import validate @@ -76,7 +76,7 @@ def verify_json(schema_file_name: str, json_file_name: str, registry: Registry) return True -def validate_unique_ontologies(data: dict) -> None: +def validate_unique_ontologies(data: Dict[str, Any]) -> None: """ Custom validation logic to check that all ontologies (including additional_ontologies) defined in ontology_info are unique across entries From 16f47e415048e60e100891af32513bea7fb21f9b Mon Sep 17 00:00:00 2001 From: nayib-jose-gloria Date: Wed, 23 Oct 2024 15:18:16 -0400 Subject: [PATCH 16/18] chore: add test to ensure new, high-level ontologies are added to ontology dataclass --- api/python/tests/notebooks/test_python_api.py | 30 +++++++++---------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/api/python/tests/notebooks/test_python_api.py b/api/python/tests/notebooks/test_python_api.py index 647e8c7a..72af9e17 100644 --- a/api/python/tests/notebooks/test_python_api.py +++ b/api/python/tests/notebooks/test_python_api.py @@ -1,15 +1,15 @@ -import os - -import nbformat -import pytest -from nbconvert.preprocessors import ExecutePreprocessor - -NOTEBOOKS_PATH = "./notebooks" - - -@pytest.mark.parametrize("notebook", [nb for nb in os.listdir(NOTEBOOKS_PATH) if nb.endswith(".ipynb")]) -def test_notebook_exec(notebook): - with open(os.path.join(NOTEBOOKS_PATH, notebook)) as f: - nb = nbformat.read(f, as_version=4) - ep = ExecutePreprocessor(timeout=600, kernel_name="python3") - assert ep.preprocess(nb) is not None, f"Got empty notebook for {notebook}" +# import os +# +# import nbformat +# import pytest +# from nbconvert.preprocessors import ExecutePreprocessor +# +# NOTEBOOKS_PATH = "./notebooks" +# +# +# @pytest.mark.parametrize("notebook", [nb for nb in os.listdir(NOTEBOOKS_PATH) if nb.endswith(".ipynb")]) +# def test_notebook_exec(notebook): +# with open(os.path.join(NOTEBOOKS_PATH, notebook)) as f: +# nb = nbformat.read(f, as_version=4) +# ep = ExecutePreprocessor(timeout=600, kernel_name="python3") +# assert ep.preprocess(nb) is not None, f"Got empty notebook for {notebook}" From 4a3cd069f3674d4ee4b17d332b5b82fce0f56aea Mon Sep 17 00:00:00 2001 From: nayib-jose-gloria Date: Wed, 23 Oct 2024 15:18:47 -0400 Subject: [PATCH 17/18] chore: add test to ensure new, high-level ontologies are added to ontology dataclass --- api/python/tests/test_entities.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) create mode 100644 api/python/tests/test_entities.py diff --git a/api/python/tests/test_entities.py b/api/python/tests/test_entities.py new file mode 100644 index 00000000..0ccdbc82 --- /dev/null +++ b/api/python/tests/test_entities.py @@ -0,0 +1,16 @@ +from cellxgene_ontology_guide.entities import Ontology +from cellxgene_ontology_guide.supported_versions import load_supported_versions + + +def test_all_supported_ontologies_in_dataclass(): + """ + Test that all supported ontologies are defined in the Ontology enum (and deprecated ontologies removed). + Do not include additional ontologies. + """ + ontology_info = load_supported_versions() + supported_ontologies = set() + for _, version_info in ontology_info.items(): + for ontology, _ in version_info["ontologies"].items(): + supported_ontologies.add(ontology) + + assert supported_ontologies == {ontology.name for ontology in Ontology} From dfc7b0978abc39fab11574598f7eb22f7b72ae40 Mon Sep 17 00:00:00 2001 From: nayib-jose-gloria Date: Wed, 23 Oct 2024 15:18:51 -0400 Subject: [PATCH 18/18] Revert "chore: add test to ensure new, high-level ontologies are added to ontology dataclass" This reverts commit 16f47e415048e60e100891af32513bea7fb21f9b. --- api/python/tests/notebooks/test_python_api.py | 30 +++++++++---------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/api/python/tests/notebooks/test_python_api.py b/api/python/tests/notebooks/test_python_api.py index 72af9e17..647e8c7a 100644 --- a/api/python/tests/notebooks/test_python_api.py +++ b/api/python/tests/notebooks/test_python_api.py @@ -1,15 +1,15 @@ -# import os -# -# import nbformat -# import pytest -# from nbconvert.preprocessors import ExecutePreprocessor -# -# NOTEBOOKS_PATH = "./notebooks" -# -# -# @pytest.mark.parametrize("notebook", [nb for nb in os.listdir(NOTEBOOKS_PATH) if nb.endswith(".ipynb")]) -# def test_notebook_exec(notebook): -# with open(os.path.join(NOTEBOOKS_PATH, notebook)) as f: -# nb = nbformat.read(f, as_version=4) -# ep = ExecutePreprocessor(timeout=600, kernel_name="python3") -# assert ep.preprocess(nb) is not None, f"Got empty notebook for {notebook}" +import os + +import nbformat +import pytest +from nbconvert.preprocessors import ExecutePreprocessor + +NOTEBOOKS_PATH = "./notebooks" + + +@pytest.mark.parametrize("notebook", [nb for nb in os.listdir(NOTEBOOKS_PATH) if nb.endswith(".ipynb")]) +def test_notebook_exec(notebook): + with open(os.path.join(NOTEBOOKS_PATH, notebook)) as f: + nb = nbformat.read(f, as_version=4) + ep = ExecutePreprocessor(timeout=600, kernel_name="python3") + assert ep.preprocess(nb) is not None, f"Got empty notebook for {notebook}"