Skip to content

Commit

Permalink
Update definition handling (#239)
Browse files Browse the repository at this point in the history
  • Loading branch information
cthoyt authored Nov 23, 2024
1 parent 56f0efc commit df343f4
Show file tree
Hide file tree
Showing 8 changed files with 45 additions and 31 deletions.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ dependencies = [
"pystow>=0.6.0",
"bioversions>=0.5.535",
"bioregistry>=0.11.23",
"bioontologies>=0.4.0",
"bioontologies>=0.5.2",
"zenodo-client>=0.0.5",
"class_resolver",
"psycopg2-binary",
Expand Down
4 changes: 1 addition & 3 deletions src/pyobo/obographs.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,9 +69,7 @@ def _rewire(r: curies.Reference | Referenced) -> curies.Reference:


def _get_class_node(term: Term) -> Node:
if term.provenance and not term.definition:
logger.warning("[%s] unhandled when provenance but no definition", term.curie)
elif term.definition:
if term.provenance or term.definition:
definition = Definition.from_parsed(
value=term.definition, references=[_rewire(p) for p in term.provenance or []]
)
Expand Down
19 changes: 10 additions & 9 deletions src/pyobo/reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,8 +160,7 @@ def from_obonet(graph: nx.MultiDiGraph, *, strict: bool = True) -> Obo:
n_xrefs += len(xrefs)

definition, definition_references = get_definition(data, node=reference)
if definition_references:
provenance.extend(definition_references)
provenance.extend(definition_references)

alt_ids = list(iterate_node_alt_ids(data, strict=strict))
n_alt_ids += len(alt_ids)
Expand Down Expand Up @@ -342,11 +341,11 @@ def iterate_graph_typedefs(
yield TypeDef(reference=reference, xrefs=xrefs)


def get_definition(data, *, node: Reference) -> tuple[None, None] | tuple[str, list[Reference]]:
def get_definition(data, *, node: Reference) -> tuple[None | str, list[Reference]]:
"""Extract the definition from the data."""
definition = data.get("def") # it's allowed not to have a definition
if not definition:
return None, None
return None, []
return _extract_definition(definition, node=node)


Expand All @@ -355,17 +354,17 @@ def _extract_definition(
*,
node: Reference,
strict: bool = False,
) -> tuple[None, None] | tuple[str, list[Reference]]:
) -> tuple[None | str, list[Reference]]:
"""Extract the definitions."""
if not s.startswith('"'):
logger.warning(f"[{node.curie}] definition does not start with a quote")
return None, None
return None, []

try:
definition, rest = _quote_split(s)
except ValueError as e:
logger.warning("[%s] failed to parse definition quotes: %s", node.curie, str(e))
return None, None
return None, []

if not rest.startswith("[") or not rest.endswith("]"):
logger.warning(
Expand All @@ -374,7 +373,7 @@ def _extract_definition(
provenance = []
else:
provenance = _parse_trailing_ref_list(rest, strict=strict, node=node)
return definition, provenance
return definition or None, provenance


def get_first_nonescaped_quote(s: str) -> int | None:
Expand All @@ -389,7 +388,9 @@ def get_first_nonescaped_quote(s: str) -> int | None:


def _quote_split(s: str) -> tuple[str, str]:
s = s.lstrip('"')
if not s.startswith('"'):
raise ValueError(f"'{s}' does not start with a quote")
s = s.removeprefix('"')
i = get_first_nonescaped_quote(s)
if i is None:
raise ValueError(f"no closing quote found in `{s}`")
Expand Down
5 changes: 5 additions & 0 deletions src/pyobo/struct/reference.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,3 +218,8 @@ def reference_escape(predicate: Reference | Referenced, *, ontology_prefix: str)
return predicate.identifier.removeprefix(f"{ontology_prefix}#")
else:
return predicate.preferred_curie


def comma_separate_references(references: list[Reference]) -> str:
"""Map a list to strings and make comma separated."""
return ", ".join(r.preferred_curie for r in references)
24 changes: 13 additions & 11 deletions src/pyobo/struct/struct.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,13 @@
from tqdm.auto import tqdm
from typing_extensions import Self

from .reference import Reference, Referenced, default_reference, reference_escape
from .reference import (
Reference,
Referenced,
comma_separate_references,
default_reference,
reference_escape,
)
from .typedef import (
TypeDef,
comment,
Expand All @@ -40,7 +46,7 @@
see_also,
term_replaced_by,
)
from .utils import comma_separate, obo_escape_slim
from .utils import obo_escape_slim
from ..api.utils import get_version
from ..constants import (
DATE_FORMAT,
Expand Down Expand Up @@ -109,7 +115,7 @@ def _fp(self, ontology_prefix: str) -> str:
x = f'"{self._escape(self.name)}" {self.specificity}'
if self.type and self.type.pair != DEFAULT_SYNONYM_TYPE.pair:
x = f"{x} {reference_escape(self.type, ontology_prefix=ontology_prefix)}"
return f"{x} [{comma_separate(self.provenance)}]"
return f"{x} [{comma_separate_references(self.provenance)}]"

@staticmethod
def _escape(s: str) -> str:
Expand Down Expand Up @@ -454,9 +460,8 @@ def annotate_boolean(self, prop: ReferenceHint, value: bool) -> Self:
)

def _definition_fp(self) -> str:
if self.definition is None:
raise AssertionError
return f'"{obo_escape_slim(self.definition)}" [{comma_separate(self.provenance)}]'
definition = obo_escape_slim(self.definition) if self.definition else ""
return f'"{definition}" [{comma_separate_references(self.provenance)}]'

def iterate_relations(self) -> Iterable[tuple[Reference, Reference]]:
"""Iterate over pairs of typedefs and targets."""
Expand Down Expand Up @@ -498,11 +503,8 @@ def iterate_obo_lines(

xrefs = list(self.xrefs)

if self.definition:
if self.definition or self.provenance:
yield f"def: {self._definition_fp()}"
elif self.provenance:
# if no definition, just stick on xrefs
xrefs.extend(self.provenance)

for alt in sorted(self.alt_ids):
yield f"alt_id: {alt}" # __str__ bakes in the ! name
Expand Down Expand Up @@ -1144,7 +1146,7 @@ def to_obonet(self: Obo, *, use_tqdm: bool = False) -> nx.MultiDiGraph:
d = {
"id": term.curie,
"name": term.name,
"def": term.definition and term._definition_fp(),
"def": (term.definition or term.provenance) and term._definition_fp(),
"xref": [xref.curie for xref in term.xrefs],
"is_a": parents,
"relationship": relations,
Expand Down
6 changes: 0 additions & 6 deletions src/pyobo/struct/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
__all__ = [
"OBO_ESCAPE",
"OBO_ESCAPE_SLIM",
"comma_separate",
"obo_escape",
"obo_escape_slim",
]
Expand All @@ -24,8 +23,3 @@ def obo_escape_slim(string: str) -> str:
rv = "".join(OBO_ESCAPE_SLIM.get(character, character) for character in string)
rv = rv.replace("\n", "\\n")
return rv


def comma_separate(elements) -> str:
"""Map a list to strings and make comma separated."""
return ", ".join(map(str, elements))
14 changes: 14 additions & 0 deletions tests/test_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -466,6 +466,20 @@ def test_definition_with_provenance(self) -> None:
self.assertEqual(1, len(term.provenance))
self.assertEqual(CHARLIE, term.provenance[0])

def test_provenance_no_definition(self) -> None:
"""Test parsing a term with provenance but no definition."""
ontology = _read(f"""\
ontology: chebi
[Term]
id: CHEBI:1234
def: "" [{CHARLIE.curie}]
""")
term = self.get_only_term(ontology)
self.assertIsNone(term.definition)
self.assertEqual(1, len(term.provenance))
self.assertEqual(CHARLIE, term.provenance[0])

def test_synonym_minimal(self) -> None:
"""Test parsing a synonym just the text."""
ontology = _read("""\
Expand Down
2 changes: 1 addition & 1 deletion tests/test_struct.py
Original file line number Diff line number Diff line change
Expand Up @@ -425,7 +425,7 @@ def test_provenance_no_definition(self) -> None:
[Term]
id: GO:0050069
name: lysine dehydrogenase activity
xref: orcid:0000-0003-4423-4370
def: "" [orcid:0000-0003-4423-4370]
""",
term.iterate_obo_lines(ontology_prefix="go", typedefs={}),
)
Expand Down

0 comments on commit df343f4

Please sign in to comment.