Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Some cleanup on the ensembldb code #67

Merged
merged 4 commits into from
Apr 4, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 12 additions & 8 deletions src/genomic_features/ensembl/ensembldb.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import warnings
from functools import cached_property
from itertools import product
import os
ivirshup marked this conversation as resolved.
Show resolved Hide resolved
from pathlib import Path
from typing import Final, Literal

Expand All @@ -20,10 +21,7 @@
PKG_CACHE_DIR = "genomic-features"

BIOC_ANNOTATION_HUB_URL = (
"https://bioconductorhubs.blob.core.windows.net/annotationhub/"
)
ENSEMBL_URL_TEMPLATE = (
BIOC_ANNOTATION_HUB_URL + "AHEnsDbs/v{version}/EnsDb.{species}.v{version}.sqlite"
"https://bioconductorhubs.blob.core.windows.net/annotationhub"
)
ANNOTATION_HUB_URL = (
"https://annotationhub.bioconductor.org/metadata/annotationhub.sqlite3"
Expand Down Expand Up @@ -56,7 +54,7 @@ def annotation(
"""
try:
sqlite_file_path = retrieve_annotation(
ENSEMBL_URL_TEMPLATE.format(species=species, version=version)
f'{BIOC_ANNOTATION_HUB_URL}/AHEnsDbs/v{version}/EnsDb.{species}.v{version}.sqlite'
)

if backend == "sqlite":
Expand All @@ -74,7 +72,8 @@ def annotation(
except HTTPError as err:
if err.response.status_code == 404:
raise ValueError(
f"No Ensembl database found for {species} v{version}. Check available versions with `genomic_features.ensembl.list_versions`."
f"No Ensembl database found for {species} v{version}. Check "
f"available versions with `genomic_features.ensembl.list_ensdb_annotations `."
) from err
else:
raise HTTPError from err
Expand Down Expand Up @@ -125,7 +124,8 @@ def list_ensdb_annotations(species: None | str | list[str] = None) -> DataFrame:
# check that species exist
if version_table.shape[0] == 0:
raise ValueError(
f"No Ensembl database found for {species}. Check species name."
f"No Ensembl database found for {species}. Available species can "
f"be found via: `list_ensdb_annotations()['Species'].unique()`."
)

version_table["Ensembl_version"] = version_table["rdatapath"].str.split(
Expand Down Expand Up @@ -153,7 +153,11 @@ def metadata(self) -> dict:

def __repr__(self) -> str:
d = self.metadata
return f"EnsemblDB(organism='{d['Organism']}', ensembl_release='{d['ensembl_version']}')"
return (
f"EnsemblDB(organism='{d['Organism']}', "
f"ensembl_release='{d['ensembl_version']}', "
f"genome_build='{d['genome_build']}')"
)

def genes(
self,
Expand Down
2 changes: 1 addition & 1 deletion tests/test_basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ def test_missing_version():

def test_repr():
result = repr(gf.ensembl.annotation("Hsapiens", 108))
expected = "EnsemblDB(organism='Homo sapiens', ensembl_release='108')"
expected = "EnsemblDB(organism='Homo sapiens', ensembl_release='108', genome_build='GRCh38')"

assert result == expected

Expand Down
Loading