Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add function to fetch curated ontology term lists #141

Merged
merged 3 commits into from
Mar 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@ jobs:
content_type: "application/json"
- file_name: "system_list.json"
content_type: "application/json"
- file_name: "uberon_development_stage.json"
content_type: "application/json"
steps:
- name: Checkout main branch
uses: actions/checkout@v4
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
import functools
import json
import os
from typing import List

from cellxgene_ontology_guide._constants import DATA_ROOT
from cellxgene_ontology_guide.entities import CuratedOntologyTermList


@functools.cache
def get_curated_ontology_term_list(curated_ontology_term_list: CuratedOntologyTermList) -> List[str]:
"""
Get the list of curated ontology terms for the given curated_ontology_term_list.

:param curated_ontology_term_list: Enum attribute representing the curated ontology term list
:return: List[str] of ontology term IDs
"""
filename = f"{curated_ontology_term_list.value}_list.json"
with open(os.path.join(DATA_ROOT, filename)) as f:
return json.load(f) # type: ignore
13 changes: 13 additions & 0 deletions api/python/src/cellxgene_ontology_guide/entities.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,3 +15,16 @@ class Ontology(Enum):
MmusDv = "mmusdv"
PATO = "pato"
NCBITaxon = "ncbitaxon"


class CuratedOntologyTermList(Enum):
"""
Enum for the set of curated ontology term lists supported by CZ CellXGene
"""

CELL_CLASS = "cell_class"
CELL_SUBCLASS = "cell_subclass"
ORGAN = "organ"
SYSTEM = "system"
TISSUE_GENERAL = "tissue_general"
UBERON_DEVELOPMENT_STAGE = "uberon_development_stage"
10 changes: 7 additions & 3 deletions api/python/src/cellxgene_ontology_guide/ontology_parser.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import re
from typing import Any, Dict, Iterable, List, Union
from typing import Any, Dict, Iterable, List, Optional, Union

from cellxgene_ontology_guide._constants import VALID_NON_ONTOLOGY_TERMS
from cellxgene_ontology_guide.entities import Ontology
Expand Down Expand Up @@ -39,16 +39,20 @@ def _parse_ontology_name(self, term_id: str) -> str:

return ontology_name

def is_valid_term_id(self, term_id: str) -> bool:
def is_valid_term_id(self, term_id: str, ontology: Optional[str] = None) -> bool:
"""
Check if an ontology term ID is valid and defined in a supported ontology. If deprecated but defined
in the ontology, it is considered valid.
in the ontology, it is considered valid. Optionally, specify an ontology to check against, and determine
if the term is defined in that particular ontology. Otherwise, checks if term is valid in any supported ontology

:param term_id: str ontology term to check
:param ontology: str name of ontology to check against
:return: boolean flag indicating whether the term is supported
"""
try:
ontology_name = self._parse_ontology_name(term_id)
if ontology and ontology_name != ontology:
return False
if term_id in self.cxg_schema.ontology(ontology_name):
return True
except ValueError:
Expand Down
40 changes: 40 additions & 0 deletions api/python/tests/test_curated_ontology_term_lists.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
import json
from unittest.mock import patch

import pytest
from cellxgene_ontology_guide.curated_ontology_term_lists import get_curated_ontology_term_list
from cellxgene_ontology_guide.entities import CuratedOntologyTermList

MODULE_PATH = "cellxgene_ontology_guide.curated_ontology_term_lists"


@pytest.fixture
def mock_curated_ontology_term_list_file(tmpdir):
with patch(f"{MODULE_PATH}.DATA_ROOT", tmpdir):
test_file_name = "cell_class_list.json"
test_enum = CuratedOntologyTermList.CELL_CLASS
onto_file = tmpdir.join(test_file_name)
file_contents = ["cell class 1", "cell class 2"]
with open(str(onto_file), "wt") as onto_file:
json.dump(file_contents, onto_file)
yield test_enum, file_contents


def test_get_curated_ontology_term_list(mock_curated_ontology_term_list_file):
test_enum, file_contents = mock_curated_ontology_term_list_file
assert get_curated_ontology_term_list(test_enum) == file_contents
assert get_curated_ontology_term_list.cache_info().hits == 0
assert get_curated_ontology_term_list.cache_info().misses == 1
get_curated_ontology_term_list(test_enum)
assert get_curated_ontology_term_list.cache_info().hits == 1
assert get_curated_ontology_term_list.cache_info().misses == 1


def test__clear_curated_ontology_term_list_cache(mock_curated_ontology_term_list_file):
test_enum, _ = mock_curated_ontology_term_list_file
get_curated_ontology_term_list(test_enum)
assert get_curated_ontology_term_list.cache_info().misses == 1
get_curated_ontology_term_list.cache_clear()
assert get_curated_ontology_term_list.cache_info().misses == 0
get_curated_ontology_term_list(test_enum)
assert get_curated_ontology_term_list.cache_info().misses == 1
8 changes: 8 additions & 0 deletions api/python/tests/test_ontology_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,14 @@ def test_is_valid_term_id(ontology_parser, term_id, expected):
assert ontology_parser.is_valid_term_id(term_id) == expected


@pytest.mark.parametrize(
"term_id,ontology,expected",
[("CL:0000001", "CL", True), ("CL:0000001", "UBERON", False), ("GO:0000001", "GO", False)],
)
def test_is_valid_term_id__with_ontology(ontology_parser, term_id, ontology, expected):
assert ontology_parser.is_valid_term_id(term_id, ontology) == expected


def test_get_term_ancestors(ontology_parser):
assert ontology_parser.get_term_ancestors("CL:0000004") == ["CL:0000000", "CL:0000001", "CL:0000002"]
assert ontology_parser.get_term_ancestors("CL:0000004", include_self=True) == [
Expand Down
9 changes: 9 additions & 0 deletions artifact-schemas/uberon_development_stage_list_schema.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
{
"$schema": "http://json-schema.org/draft-07/schema#",
"title": "Curated 'Development Stage' UBERON Ontology Terms Schema",
"description": "A schema for validating an array of high-level UBERON ontology term IDs representing 'development stage' ontology terms, curated for CZ CellxGene use.",
"type": "array",
"items": {"$ref": "ontology_term_id_schema.json#/definitions/UBERON_term_id"},
"minItems": 1,
"uniqueItems": true
}
50 changes: 50 additions & 0 deletions ontology-assets/uberon_development_stage_list.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
[
"UBERON:0007236",
"UBERON:0000106",
"UBERON:0014859",
"UBERON:0008264",
"UBERON:0007233",
"UBERON:0000112",
"UBERON:8000003",
"UBERON:0014857",
"UBERON:0009849",
"UBERON:0034920",
"UBERON:0000069",
"UBERON:0000109",
"UBERON:8000001",
"UBERON:0000068",
"UBERON:0018685",
"UBERON:0000107",
"UBERON:0007222",
"UBERON:0000092",
"UBERON:0018378",
"UBERON:0014864",
"UBERON:0004730",
"UBERON:0000111",
"UBERON:0007220",
"UBERON:0014405",
"UBERON:0014862",
"UBERON:8000000",
"UBERON:0000071",
"UBERON:0014860",
"UBERON:0012101",
"UBERON:0000113",
"UBERON:0014858",
"UBERON:0007232",
"UBERON:0000070",
"UBERON:0000110",
"UBERON:8000002",
"UBERON:0014856",
"UBERON:0004728",
"UBERON:0034919",
"UBERON:0000108",
"UBERON:0000066",
"UBERON:0004707",
"UBERON:0000105",
"UBERON:0018241",
"UBERON:0007221",
"UBERON:0014406",
"UBERON:0014863",
"UBERON:0004729",
"UBERON:0014861"
]
Loading