diff --git a/api/python/src/cellxgene_ontology_guide/ontology_parser.py b/api/python/src/cellxgene_ontology_guide/ontology_parser.py index cd83d178..de72093e 100644 --- a/api/python/src/cellxgene_ontology_guide/ontology_parser.py +++ b/api/python/src/cellxgene_ontology_guide/ontology_parser.py @@ -40,16 +40,17 @@ def get_term_label_to_id_map(self, ontology_name: str) -> Dict[str, str]: :param ontology_name: str name of ontology to get map of term labels to term IDs """ - if ontology_name not in self.cxg_schema.supported_ontologies: - raise ValueError(f"{ontology_name} is not a supported ontology, its metadata cannot be fetched.") + supported_ontology_name: Optional[str] = self._get_supported_ontology_name(ontology_name) + if not supported_ontology_name: + raise ValueError(f"{supported_ontology_name} is not a supported ontology, its metadata cannot be fetched.") - if self.term_label_to_id_map[ontology_name]: - return self.term_label_to_id_map[ontology_name] + if self.term_label_to_id_map[supported_ontology_name]: + return self.term_label_to_id_map[supported_ontology_name] - for term_id, term_metadata in self.cxg_schema.ontology(ontology_name).items(): - self.term_label_to_id_map[ontology_name][term_metadata["label"]] = term_id + for term_id, term_metadata in self.cxg_schema.ontology(supported_ontology_name).items(): + self.term_label_to_id_map[supported_ontology_name][term_metadata["label"]] = term_id - return self.term_label_to_id_map[ontology_name] + return self.term_label_to_id_map[supported_ontology_name] def _parse_ontology_name(self, term_id: str) -> str: """ @@ -63,12 +64,31 @@ def _parse_ontology_name(self, term_id: str) -> str: if not re.match(pattern, term_id): raise ValueError(f"{term_id} does not conform to expected regex pattern {pattern} and cannot be queried.") - ontology_name = term_id.split(":")[0] - if ontology_name not in self.cxg_schema.supported_ontologies: + ontology_term_prefix = term_id.split(":")[0] + ontology_name: Optional[str] = self._get_supported_ontology_name(ontology_term_prefix) + if not ontology_name: raise ValueError(f"{term_id} is not part of a supported ontology, its metadata cannot be fetched.") return ontology_name + def _get_supported_ontology_name(self, ontology_term_prefix: str) -> Optional[str]: + """ + Get the source ontology name for a given ontology term prefix, if it is supported by the CxG schema. + + If ontology_term_prefix is directly supported by the CxG schema, returns ontology_term_prefix. + If ontology_term_prefix is supported as an import from another ontology, returns the name of the source ontology + it is imported in. + Otherwise, returns None. + + :param ontology_term_prefix: str ontology term prefix to check + :return: str name of ontology that term belongs to, or None if it is not directly supported nor imported in + a supported ontology in the CxG schema. + """ + if ontology_term_prefix in self.cxg_schema.supported_ontologies: + return ontology_term_prefix + supported_ontology_name: Optional[str] = self.cxg_schema.imported_ontologies.get(ontology_term_prefix) + return supported_ontology_name + def is_valid_term_id(self, term_id: str, ontology: Optional[str] = None) -> bool: """ Check if an ontology term ID is valid and defined in a supported ontology. If deprecated but defined diff --git a/api/python/src/cellxgene_ontology_guide/supported_versions.py b/api/python/src/cellxgene_ontology_guide/supported_versions.py index 52b10cbd..af7df546 100644 --- a/api/python/src/cellxgene_ontology_guide/supported_versions.py +++ b/api/python/src/cellxgene_ontology_guide/supported_versions.py @@ -57,6 +57,10 @@ class CXGSchema: """The schema version used by the class instance.""" supported_ontologies: Dict[str, Any] """A dictionary of supported ontologies for the schema version.""" + imported_ontologies: Dict[str, str] + """In our supported ontologies, the CxG schema can support terms imported from different ontologies. + This dictionary maps these 'additional ontologies' to their supported ontology name. For example, + for ZFS ontology terms imported into the ZFA ontology, imported_ontologies would be {"ZFS":"ZFA", ...}""" ontology_file_names: Dict[str, str] """A dictionary of ontology names and their corresponding file names.""" @@ -75,6 +79,11 @@ def __init__(self, version: Optional[str] = None): self.version = _version self.supported_ontologies = ontology_info[_version]["ontologies"] + self.imported_ontologies = { + imported_ontology: ontology + for ontology, info in self.supported_ontologies.items() + for imported_ontology in info.get("additional_ontologies", []) + } self.ontology_file_names: Dict[str, str] = {} self.deprecated_on = ontology_info[_version].get("deprecated_on") if self.deprecated_on: @@ -87,6 +96,9 @@ def __init__(self, version: Optional[str] = None): def ontology(self, name: str) -> Any: """Return the ontology terms for the given ontology name. Load from the file cache if available. + + Does not support "additional ontologies" of another ontology. + :param name: str name of the ontology to get the terms for :return: dict representation of the ontology terms """ diff --git a/api/python/tests/test_ontology_parser.py b/api/python/tests/test_ontology_parser.py index 619ef24b..63f08942 100644 --- a/api/python/tests/test_ontology_parser.py +++ b/api/python/tests/test_ontology_parser.py @@ -42,13 +42,64 @@ def ontology_dict(): @pytest.fixture -def mock_CXGSchema(ontology_dict, mock_load_supported_versions, mock_load_ontology_file): +def ontology_dict_with_imports(): + return { + "HANCESTRO:0000000": { + "ancestors": {}, + "label": "root ancestry type", + "description": "This is a root ancestry type.", + "comments": ["this is an HANCESTRO term, not imported"], + "term_tracker": "http://example.com/issue/HANCESTRO/1234", + "synonyms": ["root ancestry synonym"], + "deprecated": False, + }, + "AfPO:0000000": { + "ancestors": {"HANCESTRO:0000000": 1}, + "label": "specialized ancestry type", + "description": "This is a specialized ancestry type.", + "deprecated": False, + "comments": ["this is an AfPO term imported into HANCESTRO"], + "synonyms": ["specialized ancestry synonym"], + "term_tracker": "http://example.com/issue/AfPO/1234", + "replaced_by": "AfPO:0000001", + }, + "HANCESTRO:0000001": { + "ancestors": {"HANCESTRO:0000000": 2, "AfPO:0000000": 1}, + "label": "root ontology descendant of specialized ancestry type", + "deprecated": False, + }, + } + + +@pytest.fixture +def mock_CXGSchema(ontology_dict, ontology_dict_with_imports, mock_load_supported_versions, mock_load_ontology_file): mock_load_supported_versions.return_value = { - "5.0.0": {"ontologies": {"CL": {"version": "2024-01-01", "source": "http://example.com", "filename": "cl.owl"}}} + "5.0.0": { + "ontologies": { + "CL": {"version": "2024-01-01", "source": "http://example.com", "filename": "cl.owl"}, + "HANCESTRO": { + "version": "2024-01-01", + "source": "http://example.com", + "filename": "cl.owl", + "additional_ontologies": ["AfPO"], + }, + } + } } cxg_schema = CXGSchema() - cxg_schema.ontology_file_names = {"CL": "CL-ontology-2024-01-01.json.gz"} - mock_load_ontology_file.return_value = ontology_dict + cxg_schema.ontology_file_names = { + "CL": "CL-ontology-2024-01-01.json.gz", + "HANCESTRO": "HANCESTRO-ontology-2024-01-01.json.gz", + } + + def get_mock_ontology_dict(file_name): + if "CL" in file_name: + return ontology_dict + if "HANCESTRO" in file_name: + return ontology_dict_with_imports + return None + + mock_load_ontology_file.side_effect = get_mock_ontology_dict with patch("cellxgene_ontology_guide.ontology_parser.CXGSchema", return_value=cxg_schema) as mock: yield mock @@ -63,6 +114,11 @@ def test_parse_ontology_name(ontology_parser): assert ontology_parser._parse_ontology_name("CL:0000001") == "CL" +@pytest.mark.parametrize("term_id", ["AfPO:0000001", "HANCESTRO:0000000"]) +def test_parse_ontology_name__imported_term(ontology_parser, term_id): + assert ontology_parser._parse_ontology_name(term_id) == "HANCESTRO" + + def test_parse_ontology_name__wrong_format(ontology_parser): with pytest.raises(ValueError): ontology_parser._parse_ontology_name("CL_0000001") @@ -74,7 +130,8 @@ def test_parse_ontology_name__not_supported(ontology_parser): @pytest.mark.parametrize( - "term_id,expected", [("CL:0000001", True), ("CL:0000003", True), ("CL:0000009", False), ("GO:0000001", False)] + "term_id,expected", + [("CL:0000001", True), ("CL:0000003", True), ("CL:0000009", False), ("GO:0000001", False), ("AfPO:0000000", True)], ) def test_is_valid_term_id(ontology_parser, term_id, expected): assert ontology_parser.is_valid_term_id(term_id) == expected @@ -82,7 +139,14 @@ def test_is_valid_term_id(ontology_parser, term_id, expected): @pytest.mark.parametrize( "term_id,ontology,expected", - [("CL:0000001", "CL", True), ("CL:0000001", "UBERON", False), ("GO:0000001", "GO", False)], + [ + ("CL:0000001", "CL", True), + ("CL:0000001", "UBERON", False), + ("GO:0000001", "GO", False), + ("AfPO:0000000", "HANCESTRO", True), + ("AfPO:0000000", "AfPO", False), + ("HANCESTRO:0000001", "AfPO", False), + ], ) def test_is_valid_term_id__with_ontology(ontology_parser, term_id, ontology, expected): assert ontology_parser.is_valid_term_id(term_id, ontology) == expected @@ -97,6 +161,8 @@ def test_get_term_ancestors(ontology_parser): "CL:0000004", ] assert ontology_parser.get_term_ancestors("unknown", include_self=True) == [] + assert ontology_parser.get_term_ancestors("AfPO:0000000") == ["HANCESTRO:0000000"] + assert ontology_parser.get_term_ancestors("HANCESTRO:0000001") == ["HANCESTRO:0000000", "AfPO:0000000"] def test_map_term_ancestors(ontology_parser): @@ -109,6 +175,10 @@ def test_map_term_ancestors(ontology_parser): "CL:0000004": ["CL:0000000", "CL:0000001", "CL:0000002", "CL:0000004"], "unknown": [], } + assert ontology_parser.map_term_ancestors(["AfPO:0000000", "HANCESTRO:0000001"]) == { + "AfPO:0000000": ["HANCESTRO:0000000"], + "HANCESTRO:0000001": ["HANCESTRO:0000000", "AfPO:0000000"], + } def test_get_term_ancestors_with_distances(ontology_parser): @@ -124,6 +194,11 @@ def test_get_term_ancestors_with_distances(ontology_parser): "CL:0000004": 0, } assert ontology_parser.get_term_ancestors_with_distances("unknown", include_self=True) == {} + assert ontology_parser.get_term_ancestors_with_distances("AfPO:0000000") == {"HANCESTRO:0000000": 1} + assert ontology_parser.get_term_ancestors_with_distances("HANCESTRO:0000001") == { + "HANCESTRO:0000000": 2, + "AfPO:0000000": 1, + } def map_term_ancestors_with_distances(ontology_parser): @@ -138,6 +213,10 @@ def map_term_ancestors_with_distances(ontology_parser): "CL:0000004": {"CL:0000000": 2, "CL:0000001": 1, "CL:0000002": 1, "CL:0000004": 0}, "unknown": {}, } + assert ontology_parser.map_term_ancestors_with_distances(["AfPO:0000000", "HANCESTRO:0000001"]) == { + "AfPO:0000000": {"HANCESTRO:0000000": 1}, + "HANCESTRO:0000001": {"HANCESTRO:0000000": 2, "AfPO:0000000": 1}, + } def test_get_term_descendants(ontology_parser): @@ -153,6 +232,8 @@ def test_get_term_descendants(ontology_parser): assert ontology_parser.get_term_descendants("CL:0000004") == [] assert ontology_parser.get_term_descendants("CL:0000004", include_self=True) == ["CL:0000004"] assert ontology_parser.get_term_descendants("na") == [] + assert ontology_parser.get_term_descendants("AfPO:0000000") == ["HANCESTRO:0000001"] + assert ontology_parser.get_term_descendants("HANCESTRO:0000000") == ["AfPO:0000000", "HANCESTRO:0000001"] def test_map_term_descendants(ontology_parser): @@ -182,16 +263,23 @@ def test_map_term_descendants(ontology_parser): "CL:0000004": ["CL:0000004"], "unknown": [], } + assert ontology_parser.map_term_descendants(["AfPO:0000000", "HANCESTRO:0000000"]) == { + "AfPO:0000000": ["HANCESTRO:0000001"], + "HANCESTRO:0000000": ["AfPO:0000000", "HANCESTRO:0000001"], + } def test_is_term_deprecated(ontology_parser): assert ontology_parser.is_term_deprecated("CL:0000003") assert not ontology_parser.is_term_deprecated("CL:0000004") + assert not ontology_parser.is_term_deprecated("AfPO:0000000") def test_get_term_replacement(ontology_parser): assert ontology_parser.get_term_replacement("CL:0000003") == "CL:0000004" assert ontology_parser.get_term_replacement("CL:0000004") is None + assert ontology_parser.get_term_replacement("HANCESTRO:0000000") is None + assert ontology_parser.get_term_replacement("AfPO:0000000") == "AfPO:0000001" def test_get_term_metadata(ontology_parser): @@ -205,16 +293,32 @@ def test_get_term_metadata(ontology_parser): "term_tracker": None, "consider": ["CL:0000004"], } + assert ontology_parser.get_term_metadata("AfPO:0000000") == { + "comments": ["this is an AfPO term imported into HANCESTRO"], + "term_tracker": "http://example.com/issue/AfPO/1234", + "consider": None, + } + assert ontology_parser.get_term_metadata("HANCESTRO:0000000") == { + "comments": ["this is an HANCESTRO term, not imported"], + "term_tracker": "http://example.com/issue/HANCESTRO/1234", + "consider": None, + } def test_get_term_label(ontology_parser): assert ontology_parser.get_term_label("CL:0000004") == "cell BC" + assert ontology_parser.get_term_label("AfPO:0000000") == "specialized ancestry type" + assert ontology_parser.get_term_label("HANCESTRO:0000000") == "root ancestry type" def test_map_term_labels(ontology_parser): - assert ontology_parser.map_term_labels(["CL:0000000", "CL:0000004", "unknown", "na"]) == { + assert ontology_parser.map_term_labels( + ["CL:0000000", "CL:0000004", "AfPO:0000000", "HANCESTRO:0000000", "unknown", "na"] + ) == { "CL:0000000": "cell A", "CL:0000004": "cell BC", + "AfPO:0000000": "specialized ancestry type", + "HANCESTRO:0000000": "root ancestry type", "unknown": "unknown", "na": "na", } @@ -222,12 +326,19 @@ def test_map_term_labels(ontology_parser): def test_get_term_description(ontology_parser): assert ontology_parser.get_term_description("CL:0000000") == "This is cell A." + assert ontology_parser.get_term_description("CL:0000004") is None + assert ontology_parser.get_term_description("HANCESTRO:0000000") == "This is a root ancestry type." + assert ontology_parser.get_term_description("AfPO:0000000") == "This is a specialized ancestry type." def test_map_term_description(ontology_parser): - assert ontology_parser.map_term_descriptions(["CL:0000000", "CL:0000004", "unknown", "na"]) == { + assert ontology_parser.map_term_descriptions( + ["CL:0000000", "CL:0000004", "AfPO:0000000", "HANCESTRO:0000000", "unknown", "na"] + ) == { "CL:0000000": "This is cell A.", "CL:0000004": None, + "AfPO:0000000": "This is a specialized ancestry type.", + "HANCESTRO:0000000": "This is a root ancestry type.", "unknown": "unknown", "na": "na", } @@ -235,12 +346,18 @@ def test_map_term_description(ontology_parser): def test_get_term_synonyms(ontology_parser): assert ontology_parser.get_term_synonyms("CL:0000001") == ["cell Beta", "cell Bravo"] + assert ontology_parser.get_term_synonyms("AfPO:0000000") == ["specialized ancestry synonym"] + assert ontology_parser.get_term_synonyms("HANCESTRO:0000000") == ["root ancestry synonym"] def test_map_term_synonyms(ontology_parser): - assert ontology_parser.map_term_synonyms(["CL:0000000", "CL:0000001", "unknown", "na"]) == { + assert ontology_parser.map_term_synonyms( + ["CL:0000000", "CL:0000001", "AfPO:0000000", "HANCESTRO:0000000", "unknown", "na"] + ) == { "CL:0000000": [], "CL:0000001": ["cell Beta", "cell Bravo"], + "AfPO:0000000": ["specialized ancestry synonym"], + "HANCESTRO:0000000": ["root ancestry synonym"], "unknown": [], "na": [], } @@ -254,6 +371,8 @@ def test_map_term_synonyms(ontology_parser): ("CL:0000008", ["CL:0000000", "CL:0000001"], []), ("CL:0000000", ["CL:0000000", "CL:0000001"], ["CL:0000000"]), ("CL:0000001", ["CL:0000000", "CL:0000001"], ["CL:0000000", "CL:0000001"]), + ("HANCESTRO:0000001", ["HANCESTRO:0000000", "AfPO:0000000"], ["HANCESTRO:0000000", "AfPO:0000000"]), + ("AfPO:0000000", ["HANCESTRO:0000000", "HANCESTRO:0000001"], ["HANCESTRO:0000000"]), ("na", ["CL:0000000", "CL:0000001"], []), ], ) @@ -275,12 +394,21 @@ def test_get_highest_level_term(ontology_parser): assert ontology_parser.get_highest_level_term("CL:0000008", high_level_terms) is None assert ontology_parser.get_highest_level_term("na", high_level_terms) is None + assert ( + ontology_parser.get_highest_level_term("AfPO:0000000", ["HANCESTRO:0000000", "AfPO:0000000"]) + == "HANCESTRO:0000000" + ) + def test_map_highest_level_term(ontology_parser): assert ontology_parser.map_highest_level_term( term_ids=["CL:0000000", "CL:0000008", "CL:0000004"], high_level_terms=["CL:0000000", "CL:0000001"], ) == {"CL:0000000": "CL:0000000", "CL:0000008": None, "CL:0000004": "CL:0000000"} + assert ontology_parser.map_highest_level_term( + term_ids=["AfPO:0000000", "HANCESTRO:0000001"], + high_level_terms=["HANCESTRO:0000000", "AfPO:0000000"], + ) == {"AfPO:0000000": "HANCESTRO:0000000", "HANCESTRO:0000001": "HANCESTRO:0000000"} def test_get_lowest_common_ancestors(ontology_parser): @@ -302,6 +430,11 @@ def test_get_lowest_common_ancestors(ontology_parser): # disjoint assert ontology_parser.get_lowest_common_ancestors(term_id_1="CL:0000001", term_id_2="CL:0000008") == [] + # diff ontology terms with a common ancestor + assert ontology_parser.get_lowest_common_ancestors(term_id_1="AfPO:0000000", term_id_2="HANCESTRO:0000001") == [ + "AfPO:0000000" + ] + def test_get_distance_between_terms(ontology_parser): # distance when root node is lca @@ -316,10 +449,20 @@ def test_get_distance_between_terms(ontology_parser): # disjoint distance assert ontology_parser.get_distance_between_terms(term_id_1="CL:0000001", term_id_2="CL:0000008") == -1 + # diff ontology terms + assert ontology_parser.get_distance_between_terms(term_id_1="AfPO:0000000", term_id_2="HANCESTRO:0000001") == 1 + @pytest.mark.parametrize( "term_id,expected", - [("CL:0000005", ["CL:0000001", "CL:0000002"]), ("CL:0000002", ["CL:0000000"]), ("CL:0000000", []), ("unknown", [])], + [ + ("CL:0000005", ["CL:0000001", "CL:0000002"]), + ("CL:0000002", ["CL:0000000"]), + ("CL:0000000", []), + ("unknown", []), + ("AfPO:0000000", ["HANCESTRO:0000000"]), + ("HANCESTRO:0000001", ["AfPO:0000000"]), + ], ) def test_get_term_parents(ontology_parser, term_id, expected): assert ontology_parser.get_term_parents(term_id) == expected @@ -327,7 +470,13 @@ def test_get_term_parents(ontology_parser, term_id, expected): @pytest.mark.parametrize( "term_id,expected", - [("CL:0000000", ["CL:0000001", "CL:0000002", "CL:0000003"]), ("CL:0000005", []), ("unknown", [])], + [ + ("CL:0000000", ["CL:0000001", "CL:0000002", "CL:0000003"]), + ("CL:0000005", []), + ("unknown", []), + ("AfPO:0000000", ["HANCESTRO:0000001"]), + ("HANCESTRO:0000000", ["AfPO:0000000"]), + ], ) def test_get_term_children(ontology_parser, term_id, expected): assert ontology_parser.get_term_children(term_id) == expected @@ -373,6 +522,26 @@ def test_get_term_graph(ontology_parser): } +def test_get_term_graph__imported_ontology(ontology_parser): + graph = ontology_parser.get_term_graph("AfPO:0000000") + assert graph.to_dict() == { + "term_id": "AfPO:0000000", + "name": "specialized ancestry type", + "children": [ + { + "term_id": "HANCESTRO:0000001", + "name": "root ontology descendant of specialized ancestry type", + "children": [], + } + ], + } + + assert graph.term_counter == { + "AfPO:0000000": 1, + "HANCESTRO:0000001": 1, + } + + def test_get_term_label_to_id_map(ontology_parser): term_label_to_id_map_expected = { "cell A": "CL:0000000", @@ -389,9 +558,27 @@ def test_get_term_label_to_id_map(ontology_parser): assert ontology_parser.term_label_to_id_map["CL"] == term_label_to_id_map_expected -def test_get_term_id_by_label(ontology_parser): - assert ontology_parser.get_term_id_by_label("cell A", "CL") == "CL:0000000" - assert ontology_parser.get_term_id_by_label("cell Z", "CL") is None +@pytest.mark.parametrize("ontology_name", ["AfPO", "HANCESTRO"]) +def test_get_term_label_to_id_map__imported_ontology(ontology_parser, ontology_name): + term_label_to_id_map_expected = { + "root ancestry type": "HANCESTRO:0000000", + "specialized ancestry type": "AfPO:0000000", + "root ontology descendant of specialized ancestry type": "HANCESTRO:0000001", + } + assert ontology_parser.get_term_label_to_id_map(ontology_name) == term_label_to_id_map_expected + + +@pytest.mark.parametrize( + "label,ontology_name,expected", + [ + ("cell A", "CL", "CL:0000000"), + ("cell Z", "CL", None), + ("root ancestry type", "HANCESTRO", "HANCESTRO:0000000"), + ("specialized ancestry type", "AfPO", "AfPO:0000000"), + ], +) +def test_get_term_id_by_label(ontology_parser, label, ontology_name, expected): + assert ontology_parser.get_term_id_by_label(label, ontology_name) == expected def test_get_term_id_by_label__unsupported_ontology_name(ontology_parser): diff --git a/api/python/tests/test_supported_versions.py b/api/python/tests/test_supported_versions.py index 965ff4a5..d0a1a15d 100644 --- a/api/python/tests/test_supported_versions.py +++ b/api/python/tests/test_supported_versions.py @@ -15,12 +15,25 @@ @pytest.fixture -def initialized_CXGSchemaInfo(mock_load_supported_versions): - mock_load_supported_versions.return_value = { +def ontology_info_content(): + return { "5.0.0": { - "ontologies": {"CL": {"version": "v2024-01-01", "source": "http://example.com", "filename": "cl.owl"}} + "ontologies": { + "CL": {"version": "v2024-01-01", "source": "http://example.com", "filename": "cl.owl"}, + "HANCESTRO": { + "version": "v2024-01-01", + "source": "http://example.com", + "filename": "hancestro.owl", + "additional_ontologies": ["FOO", "OOF"], + }, + } } } + + +@pytest.fixture +def initialized_CXGSchemaInfo(mock_load_supported_versions, ontology_info_content): + mock_load_supported_versions.return_value = ontology_info_content return CXGSchema() @@ -77,12 +90,10 @@ def test_coerce_version(version, expected): class TestCXGSchema: - def test__init__defaults(self, mock_load_supported_versions): - support_versions = {"5.0.0": {"ontologies": {}}, "0.0.1": {"ontologies": {}}} - mock_load_supported_versions.return_value = support_versions - cxgs = CXGSchema() - assert cxgs.version == "5.0.0" - assert cxgs.supported_ontologies == support_versions["5.0.0"]["ontologies"] + def test__init__defaults(self, ontology_info_content, initialized_CXGSchemaInfo): + assert initialized_CXGSchemaInfo.version == "5.0.0" + assert initialized_CXGSchemaInfo.supported_ontologies == ontology_info_content["5.0.0"]["ontologies"] + assert initialized_CXGSchemaInfo.imported_ontologies == {"FOO": "HANCESTRO", "OOF": "HANCESTRO"} @pytest.mark.parametrize("version", ["v0.0.1", "0.0.1"]) def test__init__specific_version(self, version, mock_load_supported_versions): diff --git a/asset-schemas/ontology_info_schema.json b/asset-schemas/ontology_info_schema.json index 10cf7818..3f1ca762 100644 --- a/asset-schemas/ontology_info_schema.json +++ b/asset-schemas/ontology_info_schema.json @@ -14,35 +14,12 @@ }, "ontologies": { "type": "object", - "properties": { - "CL": { - "$ref": "#/definitions/ontologyEntry" - }, - "EFO": { - "$ref": "#/definitions/ontologyEntry" - }, - "HANCESTRO": { - "$ref": "#/definitions/ontologyEntry" - }, - "HsapDv": { - "$ref": "#/definitions/ontologyEntry" - }, - "MONDO": { - "$ref": "#/definitions/ontologyEntry" - }, - "MmusDv": { - "$ref": "#/definitions/ontologyEntry" - }, - "NCBITaxon": { - "$ref": "#/definitions/ontologyEntry" - }, - "UBERON": { - "$ref": "#/definitions/ontologyEntry" - }, - "PATO": { + "patternProperties": { + "^[A-Za-z0-9]+$": { "$ref": "#/definitions/ontologyEntry" } - } + }, + "additionalProperties": false } }, "required": [ diff --git a/tools/ontology-builder/src/validate_json_schemas.py b/tools/ontology-builder/src/validate_json_schemas.py index f68a39e8..2164c054 100644 --- a/tools/ontology-builder/src/validate_json_schemas.py +++ b/tools/ontology-builder/src/validate_json_schemas.py @@ -3,7 +3,7 @@ import logging import os.path import sys -from typing import Iterable, Tuple +from typing import Any, Dict, Iterable, Tuple import env from jsonschema import validate @@ -67,12 +67,33 @@ def verify_json(schema_file_name: str, json_file_name: str, registry: Registry) try: validate(instance=data, schema=schema, registry=registry) + # custom logic for ontology_info definition + if "ontology_info" in schema_file_name: + validate_unique_ontologies(data) except Exception: logger.exception(f"Error validating {json_file_name} against {schema_file_name}") return False return True +def validate_unique_ontologies(data: Dict[str, Any]) -> None: + """ + Custom validation logic to check that all ontologies (including additional_ontologies) defined in ontology_info + are unique across entries + """ + for schema_version, version_info in data.items(): + all_ontologies = [] + for ontology, ontology_info in version_info["ontologies"].items(): + all_ontologies.append(ontology) + all_ontologies.extend(ontology_info.get("additional_ontologies", [])) + if len(all_ontologies) != len(set(all_ontologies)): + logger.error( + "Ontology entries must be unique across all ontology entries, including " + f"additional_ontologies. Duplicates found in definition for {schema_version}" + ) + raise ValueError + + def main(path: str = env.ONTOLOGY_ASSETS_DIR) -> None: """ Verify the curated JSON lists match their respective JSON schema in asset-schemas diff --git a/tools/ontology-builder/tests/test_validate_json_schemas.py b/tools/ontology-builder/tests/test_validate_json_schemas.py index 3d23b17c..bf6c2529 100644 --- a/tools/ontology-builder/tests/test_validate_json_schemas.py +++ b/tools/ontology-builder/tests/test_validate_json_schemas.py @@ -1,5 +1,6 @@ import gzip import json +import os import pytest from referencing import Resource @@ -108,3 +109,84 @@ def test_invalid_schema(self, schema_file_fixture, tmpdir, registry_fixture): # Assert validation fails due to invalid schema assert verify_json(schema_file_fixture, str(json_file), registry_fixture) is False + + +class TestVerifyJsonCustomLogic: + @pytest.fixture + def ontology_info_schema_file_fixture(self, tmpdir): + ontology_info_schema = os.path.join( + os.path.realpath(__file__).rsplit("/", maxsplit=4)[0], "asset-schemas", "ontology_info_schema.json" + ) + with open(ontology_info_schema, "r") as f: + schema_data = json.load(f) + schema_file = tmpdir.join("ontology_info_schema.json") + with open(str(schema_file), "w") as f: + json.dump(schema_data, f) + return str(schema_file) + + @pytest.fixture + def ontology_info_registry_fixture(self, ontology_info_schema_file_fixture, tmpdir): + return register_schemas(tmpdir) + + @pytest.fixture + def ontology_info_json_data(self): + return { + "2.0.0": { + "ontologies": { + "A": { + "version": "v1", + "source": "https://example.org/ontology/download", + "filename": "a.owl", + "additional_ontologies": ["C"], + }, + "B": { + "version": "v2", + "source": "https://example.org/ontology/download", + "filename": "b.owl", + "additional_ontologies": ["D"], + }, + } + }, + "1.0.0": { + "ontologies": { + "A": { + "version": "v1", + "source": "https://example.org/ontology/download", + "filename": "a.owl", + }, + "B": { + "version": "v1", + "source": "https://example.org/ontology/download", + "filename": "b.owl", + }, + } + }, + } + + def test_validate_unique_ontologies( + self, ontology_info_json_data, ontology_info_schema_file_fixture, tmpdir, ontology_info_registry_fixture + ): + json_file = tmpdir.join("ontology_info.json") + with open(str(json_file), "w") as f: + json.dump(ontology_info_json_data, f) + + # Assert validation passes + assert verify_json(ontology_info_schema_file_fixture, str(json_file), ontology_info_registry_fixture) is True + + @pytest.mark.parametrize("additional_ontologies", [["C"], ["A"]]) + def test_validate_unique_ontologies__invalid( + self, + additional_ontologies, + ontology_info_json_data, + ontology_info_schema_file_fixture, + tmpdir, + ontology_info_registry_fixture, + ): + # Create invalid JSON data + ontology_info_json_data["2.0.0"]["ontologies"]["B"]["additional_ontologies"] = additional_ontologies + json_file = tmpdir.join("ontology_info.json") + with open(str(json_file), "w") as f: + json.dump(ontology_info_json_data, f) + + # Assert validation fails + assert verify_json(ontology_info_schema_file_fixture, str(json_file), ontology_info_registry_fixture) is False