Skip to content

Commit

Permalink
Add function to yield Terms from file
Browse files Browse the repository at this point in the history
  • Loading branch information
bgyori committed Feb 28, 2023
1 parent 3e5c4f7 commit f1bcc20
Showing 1 changed file with 32 additions and 13 deletions.
45 changes: 32 additions & 13 deletions gilda/grounder.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from pathlib import Path
from collections import defaultdict, Counter
from textwrap import dedent
from typing import List, Mapping, Optional, Set, Tuple, Union
from typing import Iterator, List, Mapping, Optional, Set, Tuple, Union
from adeft.disambiguate import load_disambiguator
from adeft.modeling.classify import load_model_info
from adeft import available_shortforms as available_adeft_models
Expand All @@ -23,6 +23,7 @@
"GrounderInput",
"ScoredMatch",
"load_terms_file",
"load_entries_from_terms_file",
"filter_for_organism",
"load_adeft_models",
"load_gilda_models",
Expand Down Expand Up @@ -542,20 +543,19 @@ def get_grounding_dict(self) -> Mapping[str, str]:
}


def load_terms_file(terms_file: Union[str, Path]) -> Mapping[str, List[Term]]:
"""Load a TSV file containing terms into a lookup dictionary.
def load_entries_from_terms_file(terms_file: Union[str, Path]) -> Iterator[Term]:
"""Yield Terms from a compressed terms TSV file path.
Parameters
----------
terms_file :
Path to a TSV terms file with columns corresponding to the serialized
elements of a Term.
Path to a compressed TSV terms file with columns corresponding to the
serialized elements of a Term.
Returns
-------
:
A lookup dictionary whose keys are normalized entity texts, and values
are lists of Terms with that normalized entity text.
Terms loaded from the file yielded by a generator.
"""
with gzip.open(terms_file, 'rt', encoding='utf-8') as fh:
entries = {}
Expand All @@ -564,12 +564,31 @@ def load_terms_file(terms_file: Union[str, Path]) -> Mapping[str, List[Term]]:
next(reader)
for row in reader:
row_nones = [r if r else None for r in row]
entry = Term(*row_nones)
if row[0] in entries:
entries[row[0]].append(entry)
else:
entries[row[0]] = [entry]
return entries
yield Term(*row_nones)


def load_terms_file(terms_file: Union[str, Path]) -> Mapping[str, List[Term]]:
"""Load a TSV file containing terms into a lookup dictionary.
Parameters
----------
terms_file :
Path to a compressed TSV terms file with columns corresponding to the
serialized elements of a Term.
Returns
-------
:
A lookup dictionary whose keys are normalized entity texts, and values
are lists of Terms with that normalized entity text.
"""
entries = {}
for term in load_entries_from_terms_file(terms_file):
if term.norm_text in entries:
entries[term.norm_text].append(term)
else:
entries[term.norm_text] = [term]
return entries


def filter_for_organism(terms, organisms):
Expand Down

0 comments on commit f1bcc20

Please sign in to comment.