Skip to content

Commit

Permalink
Add API for annotating year and update NLM catalog source (#268)
Browse files Browse the repository at this point in the history
  • Loading branch information
cthoyt authored Jan 1, 2025
1 parent a036f9c commit 8779368
Show file tree
Hide file tree
Showing 4 changed files with 70 additions and 10 deletions.
9 changes: 4 additions & 5 deletions src/pyobo/sources/nlm_catalog.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from xml.etree import ElementTree

from pyobo.struct import Obo, Reference, Term, TypeDef, default_reference
from pyobo.struct.typedef import exact_match, has_end_date, has_start_date
from pyobo.utils.path import ensure_df, ensure_path

__all__ = [
Expand All @@ -14,8 +15,6 @@
CATALOG_TO_PUBLISHER = "https://ftp.ncbi.nlm.nih.gov/pubmed/xmlprovidernames.txt"
JOURNAL_INFO_PATH = "https://ftp.ncbi.nlm.nih.gov/pubmed/jourcache.xml"
PUBLISHER = TypeDef.default(PREFIX, "has_publisher", name="has publisher")
START_YEAR = TypeDef.default(PREFIX, "has_start_year", name="has start year")
END_YEAR = TypeDef.default(PREFIX, "has_end_year", name="has end year")


# TODO enrich with context from https://ftp.ncbi.nlm.nih.gov/pubmed/J_Entrez.txt and https://ftp.ncbi.nlm.nih.gov/pubmed/J_Medline.txt
Expand All @@ -26,7 +25,7 @@ class NLMCatalogGetter(Obo):

bioversions_key = ontology = PREFIX
dynamic_version = True
typedefs = [PUBLISHER, START_YEAR, END_YEAR]
typedefs = [PUBLISHER, has_end_date, has_start_date, exact_match]
idspaces = {
PREFIX: "https://www.ncbi.nlm.nih.gov/nlmcatalog/",
}
Expand Down Expand Up @@ -70,9 +69,9 @@ def _process_journal(element, journal_id_to_publisher_key: dict[str, Reference])
# to determine a "canonical" one
term.append_xref(Reference(prefix="issn", identifier=issn))
if start_year := element.findtext("StartYear"):
term.annotate_integer(START_YEAR, start_year)
term.annotate_year(has_start_date, start_year)
if end_year := element.findtext("EndYear"):
term.annotate_integer(END_YEAR, end_year)
term.annotate_year(has_end_date, end_year)
if publisher_reference := journal_id_to_publisher_key.get(term.identifier):
term.annotate_object(PUBLISHER, publisher_reference)
return term
Expand Down
12 changes: 10 additions & 2 deletions src/pyobo/struct/struct.py
Original file line number Diff line number Diff line change
Expand Up @@ -649,9 +649,17 @@ def annotate_boolean(self, prop: ReferenceHint, value: bool) -> Self:
prop, str(value).lower(), Reference(prefix="xsd", identifier="boolean")
)

def annotate_integer(self, prop: ReferenceHint, value: str) -> Self:
def annotate_integer(self, prop: ReferenceHint, value: int | str) -> Self:
"""Append an object annotation."""
return self.annotate_literal(prop, value, Reference(prefix="xsd", identifier="integer"))
return self.annotate_literal(
prop, str(int(value)), Reference(prefix="xsd", identifier="integer")
)

def annotate_year(self, prop: ReferenceHint, value: int | str) -> Self:
"""Append a year annotation."""
return self.annotate_literal(
prop, str(int(value)), Reference(prefix="xsd", identifier="gYear")
)

def _definition_fp(self) -> str:
definition = obo_escape_slim(self.definition) if self.definition else ""
Expand Down
11 changes: 11 additions & 0 deletions src/pyobo/struct/typedef.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
"gene_product_member_of",
"has_contributor",
"has_dbxref",
"has_end_date",
"has_gene_product",
"has_homepage",
"has_inchi",
Expand All @@ -36,6 +37,7 @@
"has_role",
"has_salt",
"has_smiles",
"has_start_date",
"has_successor",
"has_taxonomy_rank",
"is_a",
Expand Down Expand Up @@ -402,6 +404,15 @@ def default(cls, prefix: str, identifier: str, *, name: str | None = None) -> Se
is_metadata_tag=True,
)

has_start_date = TypeDef(
reference=Reference(prefix="dcat", identifier="startDate", name="has start date"),
is_metadata_tag=True,
)
has_end_date = TypeDef(
reference=Reference(prefix="dcat", identifier="endDate", name="has end date"),
is_metadata_tag=True,
)

default_typedefs: dict[ReferenceTuple, TypeDef] = {
v.pair: v for v in locals().values() if isinstance(v, TypeDef)
}
Expand Down
48 changes: 45 additions & 3 deletions tests/test_struct.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,49 @@ def test_property_literal(self) -> None:
name: lysine dehydrogenase activity
property_value: RO:1234567 "value" xsd:string
""",
term.iterate_obo_lines(ontology_prefix="go", typedefs={}),
term.iterate_obo_lines(ontology_prefix="go", typedefs={RO_DUMMY.pair: RO_DUMMY}),
)

def test_property_integer(self) -> None:
"""Test emitting property literals that were annotated as a boolean."""
term = Term(reference=LYSINE_DEHYDROGENASE_ACT)
term.annotate_integer(RO_DUMMY, 1234)
self.assert_lines(
"""\
[Term]
id: GO:0050069
name: lysine dehydrogenase activity
property_value: RO:1234567 "1234" xsd:integer
""",
term.iterate_obo_lines(ontology_prefix="go", typedefs={RO_DUMMY.pair: RO_DUMMY}),
)

def test_property_bool(self) -> None:
"""Test emitting property literals that were annotated as a boolean."""
term = Term(reference=LYSINE_DEHYDROGENASE_ACT)
term.annotate_boolean(RO_DUMMY, True)
self.assert_lines(
"""\
[Term]
id: GO:0050069
name: lysine dehydrogenase activity
property_value: RO:1234567 "true" xsd:boolean
""",
term.iterate_obo_lines(ontology_prefix="go", typedefs={RO_DUMMY.pair: RO_DUMMY}),
)

def test_property_year(self) -> None:
"""Test emitting property literals that were annotated as a year."""
term = Term(reference=LYSINE_DEHYDROGENASE_ACT)
term.annotate_year(RO_DUMMY, "1993")
self.assert_lines(
"""\
[Term]
id: GO:0050069
name: lysine dehydrogenase activity
property_value: RO:1234567 "1993" xsd:gYear
""",
term.iterate_obo_lines(ontology_prefix="go", typedefs={RO_DUMMY.pair: RO_DUMMY}),
)

def test_property_object(self) -> None:
Expand All @@ -179,7 +221,7 @@ def test_property_object(self) -> None:
name: lysine dehydrogenase activity
property_value: RO:1234567 hgnc:123
""",
term.iterate_obo_lines(ontology_prefix="go", typedefs={}),
term.iterate_obo_lines(ontology_prefix="go", typedefs={RO_DUMMY.pair: RO_DUMMY}),
)

def test_relation(self) -> None:
Expand Down Expand Up @@ -350,7 +392,7 @@ def test_property_default_reference(self) -> None:
name: lysine dehydrogenase activity
property_value: hey GO:1234569
""",
term.iterate_obo_lines(ontology_prefix="go", typedefs={RO_DUMMY.pair: RO_DUMMY}),
term.iterate_obo_lines(ontology_prefix="go", typedefs={r.pair: r}),
)

def test_alt(self) -> None:
Expand Down

0 comments on commit 8779368

Please sign in to comment.