Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add explicit typedefs for GeoNames and ICD10 #240

Merged
merged 1 commit into from
Nov 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 6 additions & 5 deletions src/pyobo/sources/geonames.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
from pystow.utils import read_zipfile_csv
from tqdm import tqdm

from pyobo import Obo, Term
from pyobo import Obo, Term, TypeDef
from pyobo.struct import Reference, part_of
from pyobo.utils.path import ensure_df, ensure_path

Expand All @@ -22,14 +22,15 @@
ADMIN1_URL = "https://download.geonames.org/export/dump/admin1CodesASCII.txt"
ADMIN2_URL = "https://download.geonames.org/export/dump/admin2Codes.txt"
CITIES_URL = "https://download.geonames.org/export/dump/cities15000.zip"
CODE_TYPEDEF = TypeDef.default(PREFIX, "code")


class GeonamesGetter(Obo):
"""An ontology representation of GeoNames."""

ontology = PREFIX
dynamic_version = True
typedefs = [part_of]
typedefs = [part_of, CODE_TYPEDEF]

def iter_terms(self, force: bool = False) -> Iterable[Term]:
"""Iterate over terms in the ontology."""
Expand Down Expand Up @@ -80,7 +81,7 @@ def get_code_to_country(*, force: bool = False) -> Mapping[str, Term]:
term.append_synonym(fips)
if pd.notna(iso3):
term.append_synonym(iso3)
term.annotate_literal("code", code)
term.annotate_literal(CODE_TYPEDEF, code)
code_to_country[code] = term
logger.info(f"got {len(code_to_country):,} country records")
return code_to_country
Expand All @@ -107,7 +108,7 @@ def get_code_to_admin1(
term = Term.from_triple(
"geonames", identifier, name if pd.notna(name) else None, type="Instance"
)
term.annotate_literal("code", code)
term.annotate_literal(CODE_TYPEDEF, code)
code_to_admin1[code] = term

country_code = code.split(".")[0]
Expand Down Expand Up @@ -135,7 +136,7 @@ def get_code_to_admin2(
term = Term.from_triple(
"geonames", identifier, name if pd.notna(name) else None, type="Instance"
)
term.annotate_literal("code", code)
term.annotate_literal(CODE_TYPEDEF, code)
code_to_admin2[code] = term
admin1_code = code.rsplit(".", 1)[0]
admin1_term = code_to_admin1.get(admin1_code)
Expand Down
6 changes: 3 additions & 3 deletions src/pyobo/sources/icd10.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
get_icd,
visiter,
)
from ..struct import Obo, Reference, Synonym, Term
from ..struct import Obo, Reference, Synonym, Term, has_category
from ..utils.path import prefix_directory_join

__all__ = [
Expand All @@ -35,6 +35,7 @@ class ICD10Getter(Obo):

ontology = PREFIX
dynamic_version = True
typedefs = [has_category]

def iter_terms(self, force: bool = False) -> Iterable[Term]:
"""Iterate over terms in the ontology."""
Expand Down Expand Up @@ -81,8 +82,7 @@ def _extract_icd10(res_json: Mapping[str, Any]) -> Term:
synonyms=synonyms,
parents=parents,
)

rv.annotate_literal("class_kind", res_json["classKind"])
rv.annotate_literal(has_category, res_json["classKind"])

return rv

Expand Down
2 changes: 2 additions & 0 deletions src/pyobo/struct/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
enables,
from_species,
gene_product_member_of,
has_category,
has_gene_product,
has_member,
has_part,
Expand Down Expand Up @@ -47,6 +48,7 @@
"enables",
"from_species",
"gene_product_member_of",
"has_category",
"has_gene_product",
"has_member",
"has_part",
Expand Down