diff --git a/pyproject.toml b/pyproject.toml index 545bd287..df04fca7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -69,7 +69,7 @@ dependencies = [ "pystow>=0.6.0", "bioversions>=0.5.535", "bioregistry>=0.11.23", - "bioontologies>=0.4.0", + "bioontologies>=0.5.2", "zenodo-client>=0.0.5", "class_resolver", "psycopg2-binary", diff --git a/src/pyobo/obographs.py b/src/pyobo/obographs.py index 3329efe9..c3c21ddf 100644 --- a/src/pyobo/obographs.py +++ b/src/pyobo/obographs.py @@ -69,9 +69,7 @@ def _rewire(r: curies.Reference | Referenced) -> curies.Reference: def _get_class_node(term: Term) -> Node: - if term.provenance and not term.definition: - logger.warning("[%s] unhandled when provenance but no definition", term.curie) - elif term.definition: + if term.provenance or term.definition: definition = Definition.from_parsed( value=term.definition, references=[_rewire(p) for p in term.provenance or []] ) diff --git a/src/pyobo/reader.py b/src/pyobo/reader.py index d8390f76..65f3331f 100644 --- a/src/pyobo/reader.py +++ b/src/pyobo/reader.py @@ -160,8 +160,7 @@ def from_obonet(graph: nx.MultiDiGraph, *, strict: bool = True) -> Obo: n_xrefs += len(xrefs) definition, definition_references = get_definition(data, node=reference) - if definition_references: - provenance.extend(definition_references) + provenance.extend(definition_references) alt_ids = list(iterate_node_alt_ids(data, strict=strict)) n_alt_ids += len(alt_ids) @@ -342,11 +341,11 @@ def iterate_graph_typedefs( yield TypeDef(reference=reference, xrefs=xrefs) -def get_definition(data, *, node: Reference) -> tuple[None, None] | tuple[str, list[Reference]]: +def get_definition(data, *, node: Reference) -> tuple[None | str, list[Reference]]: """Extract the definition from the data.""" definition = data.get("def") # it's allowed not to have a definition if not definition: - return None, None + return None, [] return _extract_definition(definition, node=node) @@ -355,17 +354,17 @@ def _extract_definition( *, node: Reference, strict: bool = False, -) -> tuple[None, None] | tuple[str, list[Reference]]: +) -> tuple[None | str, list[Reference]]: """Extract the definitions.""" if not s.startswith('"'): logger.warning(f"[{node.curie}] definition does not start with a quote") - return None, None + return None, [] try: definition, rest = _quote_split(s) except ValueError as e: logger.warning("[%s] failed to parse definition quotes: %s", node.curie, str(e)) - return None, None + return None, [] if not rest.startswith("[") or not rest.endswith("]"): logger.warning( @@ -374,7 +373,7 @@ def _extract_definition( provenance = [] else: provenance = _parse_trailing_ref_list(rest, strict=strict, node=node) - return definition, provenance + return definition or None, provenance def get_first_nonescaped_quote(s: str) -> int | None: @@ -389,7 +388,9 @@ def get_first_nonescaped_quote(s: str) -> int | None: def _quote_split(s: str) -> tuple[str, str]: - s = s.lstrip('"') + if not s.startswith('"'): + raise ValueError(f"'{s}' does not start with a quote") + s = s.removeprefix('"') i = get_first_nonescaped_quote(s) if i is None: raise ValueError(f"no closing quote found in `{s}`") diff --git a/src/pyobo/struct/reference.py b/src/pyobo/struct/reference.py index 46222c9d..0d82ee73 100644 --- a/src/pyobo/struct/reference.py +++ b/src/pyobo/struct/reference.py @@ -218,3 +218,8 @@ def reference_escape(predicate: Reference | Referenced, *, ontology_prefix: str) return predicate.identifier.removeprefix(f"{ontology_prefix}#") else: return predicate.preferred_curie + + +def comma_separate_references(references: list[Reference]) -> str: + """Map a list to strings and make comma separated.""" + return ", ".join(r.preferred_curie for r in references) diff --git a/src/pyobo/struct/struct.py b/src/pyobo/struct/struct.py index 34230b0c..39114832 100644 --- a/src/pyobo/struct/struct.py +++ b/src/pyobo/struct/struct.py @@ -25,7 +25,13 @@ from tqdm.auto import tqdm from typing_extensions import Self -from .reference import Reference, Referenced, default_reference, reference_escape +from .reference import ( + Reference, + Referenced, + comma_separate_references, + default_reference, + reference_escape, +) from .typedef import ( TypeDef, comment, @@ -40,7 +46,7 @@ see_also, term_replaced_by, ) -from .utils import comma_separate, obo_escape_slim +from .utils import obo_escape_slim from ..api.utils import get_version from ..constants import ( DATE_FORMAT, @@ -109,7 +115,7 @@ def _fp(self, ontology_prefix: str) -> str: x = f'"{self._escape(self.name)}" {self.specificity}' if self.type and self.type.pair != DEFAULT_SYNONYM_TYPE.pair: x = f"{x} {reference_escape(self.type, ontology_prefix=ontology_prefix)}" - return f"{x} [{comma_separate(self.provenance)}]" + return f"{x} [{comma_separate_references(self.provenance)}]" @staticmethod def _escape(s: str) -> str: @@ -454,9 +460,8 @@ def annotate_boolean(self, prop: ReferenceHint, value: bool) -> Self: ) def _definition_fp(self) -> str: - if self.definition is None: - raise AssertionError - return f'"{obo_escape_slim(self.definition)}" [{comma_separate(self.provenance)}]' + definition = obo_escape_slim(self.definition) if self.definition else "" + return f'"{definition}" [{comma_separate_references(self.provenance)}]' def iterate_relations(self) -> Iterable[tuple[Reference, Reference]]: """Iterate over pairs of typedefs and targets.""" @@ -498,11 +503,8 @@ def iterate_obo_lines( xrefs = list(self.xrefs) - if self.definition: + if self.definition or self.provenance: yield f"def: {self._definition_fp()}" - elif self.provenance: - # if no definition, just stick on xrefs - xrefs.extend(self.provenance) for alt in sorted(self.alt_ids): yield f"alt_id: {alt}" # __str__ bakes in the ! name @@ -1144,7 +1146,7 @@ def to_obonet(self: Obo, *, use_tqdm: bool = False) -> nx.MultiDiGraph: d = { "id": term.curie, "name": term.name, - "def": term.definition and term._definition_fp(), + "def": (term.definition or term.provenance) and term._definition_fp(), "xref": [xref.curie for xref in term.xrefs], "is_a": parents, "relationship": relations, diff --git a/src/pyobo/struct/utils.py b/src/pyobo/struct/utils.py index 344fe514..d30dfb7b 100644 --- a/src/pyobo/struct/utils.py +++ b/src/pyobo/struct/utils.py @@ -5,7 +5,6 @@ __all__ = [ "OBO_ESCAPE", "OBO_ESCAPE_SLIM", - "comma_separate", "obo_escape", "obo_escape_slim", ] @@ -24,8 +23,3 @@ def obo_escape_slim(string: str) -> str: rv = "".join(OBO_ESCAPE_SLIM.get(character, character) for character in string) rv = rv.replace("\n", "\\n") return rv - - -def comma_separate(elements) -> str: - """Map a list to strings and make comma separated.""" - return ", ".join(map(str, elements)) diff --git a/tests/test_reader.py b/tests/test_reader.py index 8537e97c..01a9b767 100644 --- a/tests/test_reader.py +++ b/tests/test_reader.py @@ -466,6 +466,20 @@ def test_definition_with_provenance(self) -> None: self.assertEqual(1, len(term.provenance)) self.assertEqual(CHARLIE, term.provenance[0]) + def test_provenance_no_definition(self) -> None: + """Test parsing a term with provenance but no definition.""" + ontology = _read(f"""\ + ontology: chebi + + [Term] + id: CHEBI:1234 + def: "" [{CHARLIE.curie}] + """) + term = self.get_only_term(ontology) + self.assertIsNone(term.definition) + self.assertEqual(1, len(term.provenance)) + self.assertEqual(CHARLIE, term.provenance[0]) + def test_synonym_minimal(self) -> None: """Test parsing a synonym just the text.""" ontology = _read("""\ diff --git a/tests/test_struct.py b/tests/test_struct.py index 04f17820..233dd10e 100644 --- a/tests/test_struct.py +++ b/tests/test_struct.py @@ -425,7 +425,7 @@ def test_provenance_no_definition(self) -> None: [Term] id: GO:0050069 name: lysine dehydrogenase activity - xref: orcid:0000-0003-4423-4370 + def: "" [orcid:0000-0003-4423-4370] """, term.iterate_obo_lines(ontology_prefix="go", typedefs={}), )