Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Support getting download link for ontology from source repo #86

Merged
merged 3 commits into from
Mar 4, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 10 additions & 2 deletions api/python/src/cellxgene_ontology_guide/artifact_download.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@
import gzip
import json
from io import BytesIO
from typing import Any
from urllib.error import HTTPError, URLError
from urllib.request import urlopen

from constants import ONTOLOGY_ASSET_RELEASE_URL, SCHEMA_VERSION_TO_ONTOLOGY_ASSET_TAG


def load_artifact_by_schema(schema_version: str, filename: str) -> bytes:
def load_artifact_by_schema(schema_version: str, filename: str) -> Any:
"""
Load ontology files from GitHub Release Assets, based on the provided schema version.
Returns ValueError if the schema version is not supported in this package version or filename is not found for
Expand All @@ -25,7 +29,11 @@ def load_artifact_by_schema(schema_version: str, filename: str) -> bytes:
with urlopen(download_url) as response:
if response.status == 200:
content: bytes = response.read()
return content
if filename.endswith("json.gz"):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

update the return type in the docstring

with gzip.open(BytesIO(content), "rt") as f:
return json.load(f)
else:
return json.loads(content)
else:
raise ValueError(f"Server responded with status code: {response.status}")
except HTTPError as e:
Expand Down
42 changes: 42 additions & 0 deletions api/python/src/cellxgene_ontology_guide/entities.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
from enum import Enum


class Ontology(Enum):
"""
Enum for the set of ontologies supported by CZ CellXGene.
"""

CL = "cl"
EFO = "efo"
MONDO = "mondo"
UBERON = "uberon"
HANCESTRO = "hancestro"
HsapDv = "hsapdv"
MmusDv = "mmusdv"
PATO = "pato"
NCBITaxon = "ncbitaxon"


class OntologyVariant(Enum):
"""
Enum for the standard set of ontology variants. Each is curated for a specific purpose.

See https://oboacademy.github.io/obook/explanation/owl-format-variants/ for more information on the distinction
and use-cases for each variant.
"""

FULL = "full"
BASE = "base"
SIMPLE = "simple"
BASIC = "basic"


class OntologyFileType(Enum):
"""
Enum for the standard set of ontology file types. Each requires different parsing tools, but relay the same
information.
"""

OWL = "owl"
OBO = "obo"
JSON = "json"
53 changes: 27 additions & 26 deletions api/python/src/cellxgene_ontology_guide/ontology_parser.py
Original file line number Diff line number Diff line change
@@ -1,31 +1,16 @@
import gzip
import json
import re
from io import BytesIO
from typing import Any, Dict, List, Union

from artifact_download import load_artifact_by_schema
from constants import ALL_ONTOLOGY_FILENAME, ONTOLOGY_INFO_FILENAME
from entities import Ontology, OntologyFileType, OntologyVariant


class OntologyParser:
"""
An object to parse ontology term metadata from ontologies corresponding to a given CellxGene Schema Version.
"""

# Private attribute to keep track of instances
_instances: Dict[str, Any] = {}

def __new__(cls, schema_version: str) -> Any:
"""
Ensure that only one instance per schema_version exists.
"""
if schema_version not in cls._instances:
instance = super(OntologyParser, cls).__new__(cls)
cls._instances[schema_version] = instance
return instance
return cls._instances[schema_version]

def __init__(self, schema_version: str):
"""
Initialize an OntologyParser object with the ontology metadata corresponding to the given CellxGene schema
Expand All @@ -34,16 +19,8 @@ def __init__(self, schema_version: str):

:param schema_version: str version of the schema to load ontology metadata for
"""
if not hasattr(self, "initialized"): # Prevents reinitialization
all_ontology = load_artifact_by_schema(schema_version, ALL_ONTOLOGY_FILENAME)
ontology_info = load_artifact_by_schema(schema_version, ONTOLOGY_INFO_FILENAME)

with gzip.open(BytesIO(all_ontology), "rt") as f:
self.ontology_dict = json.load(f)

self.supported_ontologies = json.loads(ontology_info)

self.initialized = True
self.ontology_dict = load_artifact_by_schema(schema_version, ALL_ONTOLOGY_FILENAME)
self.supported_ontologies = load_artifact_by_schema(schema_version, ONTOLOGY_INFO_FILENAME)

def _parse_ontology_name(self, term_id: str) -> str:
"""
Expand Down Expand Up @@ -184,3 +161,27 @@ def get_term_label(self, term_id: str) -> str:
ontology_name = self._parse_ontology_name(term_id)
label: str = self.ontology_dict[ontology_name][term_id]["label"]
return label

def get_ontology_download_url(
self, ontology: Ontology, ontology_filetype: OntologyFileType, ontology_variant: OntologyVariant = None
) -> str:
"""
Get the download URL for a given ontology file. If the ontology_variant is not provided, the default ontology
file will be returned.

Examples:
get_ontology_download_url("CL", "owl") -> "http://example.com/2024-01-01/cl.owl"
get_ontology_download_url("CL", "obo", "base") -> "http://example.com/2024-01-01/cl-base.obo"

:param ontology: Ontology enum of the ontology to fetch
:param ontology_filetype: OntologyFileType enum of the ontology file type to fetch
:param ontology_variant: OntologyVariant enum of the ontology variant to fetch
:return: str download URL for the requested ontology file
"""
source_url = self.supported_ontologies[ontology.name]["source"]
version = self.supported_ontologies[ontology.name]["version"]
return (
f"{source_url}/{version}/{ontology.value}-{ontology_variant.value}.{ontology_filetype.value}"
if ontology_variant
else f"{source_url}/{version}/{ontology.value}.{ontology_filetype.value}"
)
30 changes: 26 additions & 4 deletions api/python/tests/test_artifact_download.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
import gzip
from unittest.mock import Mock, patch
from urllib.error import HTTPError, URLError

import pytest
from cellxgene_ontology_guide.artifact_download import load_artifact_by_schema
from cellxgene_ontology_guide.constants import ALL_ONTOLOGY_FILENAME, ONTOLOGY_ASSET_RELEASE_URL
from cellxgene_ontology_guide.constants import ALL_ONTOLOGY_FILENAME, ONTOLOGY_ASSET_RELEASE_URL, ONTOLOGY_INFO_FILENAME


@pytest.fixture
Expand All @@ -15,7 +16,16 @@ def get_mock_response(url):
mock_response = Mock()
mock_response.__enter__ = Mock(return_value=mock_response)
mock_response.__exit__ = Mock(return_value=None)
mock_response.read.return_value = b'{"key": "value"}'
mock_response.read.return_value = gzip.compress(b'{"key": "value"}')
mock_response.status = 200
return mock_response
elif url.endswith(ONTOLOGY_INFO_FILENAME):
mock_response = Mock()
mock_response.__enter__ = Mock(return_value=mock_response)
mock_response.__exit__ = Mock(return_value=None)
mock_response.read.return_value = (
b'{"CL": {"version": "2024-01-01", "source": "http://example.com", "filetype": "owl"}}'
)
mock_response.status = 200
return mock_response
else:
Expand All @@ -34,10 +44,10 @@ def mock_urlopen_url_error():
yield mock


def test_load_artifact_by_schema__success(mock_urlopen):
def test_load_artifact_by_schema__success_gzip(mock_urlopen):
schema_version = "5.0.0"
expected_tag = "ontology-assets-v0.0.1"
expected_resp_content = b'{"key": "value"}'
expected_resp_content = {"key": "value"}

result = load_artifact_by_schema(schema_version, ALL_ONTOLOGY_FILENAME)
expected_download_url = f"{ONTOLOGY_ASSET_RELEASE_URL}/{expected_tag}/{ALL_ONTOLOGY_FILENAME}"
Expand All @@ -46,6 +56,18 @@ def test_load_artifact_by_schema__success(mock_urlopen):
assert result == expected_resp_content


def test_load_artifact_by_schema__success_json(mock_urlopen):
schema_version = "5.0.0"
expected_tag = "ontology-assets-v0.0.1"
expected_resp_content = {"CL": {"version": "2024-01-01", "source": "http://example.com", "filetype": "owl"}}

result = load_artifact_by_schema(schema_version, ONTOLOGY_INFO_FILENAME)
expected_download_url = f"{ONTOLOGY_ASSET_RELEASE_URL}/{expected_tag}/{ONTOLOGY_INFO_FILENAME}"

mock_urlopen.assert_called_once_with(expected_download_url)
assert result == expected_resp_content


def test_load_artifact_by_schema__unsupported_schema_version(mock_urlopen):
schema_version = "v0.0.0"
with pytest.raises(ValueError) as exc_info:
Expand Down
23 changes: 12 additions & 11 deletions api/python/tests/test_ontology_parser.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,14 @@
import gzip
import json
from unittest.mock import patch

import pytest
from cellxgene_ontology_guide.constants import ALL_ONTOLOGY_FILENAME, ONTOLOGY_INFO_FILENAME
from cellxgene_ontology_guide.entities import Ontology, OntologyFileType, OntologyVariant
from cellxgene_ontology_guide.ontology_parser import OntologyParser


@pytest.fixture(scope="module")
def ontology_dict():
ontology_dict = {
return {
"CL": {
"CL:0000000": {"ancestors": [], "label": "cell A", "deprecated": False},
"CL:0000001": {
Expand All @@ -30,12 +29,11 @@ def ontology_dict():
"CL:0000004": {"ancestors": ["CL:0000001", "CL:0000000"], "label": "cell B2", "deprecated": False},
}
}
return gzip.compress(json.dumps(ontology_dict).encode("utf-8"))


@pytest.fixture(scope="module")
def supported_ontologies():
return b'{"CL": {"version": "2024-01-01", "source": "http://example.com", "filetype": "owl"}}'
return {"CL": {"version": "2024-01-01", "source": "http://example.com", "filetype": "owl"}}


@pytest.fixture(scope="module")
Expand Down Expand Up @@ -129,9 +127,12 @@ def test_get_term_label(ontology_parser):
assert ontology_parser.get_term_label("CL:0000004") == "cell B2"


def test__init__multiple_ontology_parsers(mock_load_artifact_by_schema, ontology_parser):
ontology_parser_duplicate = OntologyParser(schema_version="5.0.0")
ontology_parser_4 = OntologyParser(schema_version="4.0.0")

assert ontology_parser_duplicate is ontology_parser
assert ontology_parser_4 is not ontology_parser
def test_get_ontology_download_url(ontology_parser):
assert (
ontology_parser.get_ontology_download_url(Ontology.CL, OntologyFileType.OWL)
== "http://example.com/2024-01-01/cl.owl"
)
assert (
ontology_parser.get_ontology_download_url(Ontology.CL, OntologyFileType.OBO, OntologyVariant.BASE)
== "http://example.com/2024-01-01/cl-base.obo"
)
Loading