Skip to content

Commit

Permalink
Fix data written by dictionary_pickler.py (#156)
Browse files Browse the repository at this point in the history
* Fix data written by dictionary_pickler.py

The changes to the return types of `DictionaryFactory.get_dictionary()`
in 63933fc broke the generation of dictionaries using
`training/dictionary_pickler.py`. This commit fixes that again, by
undoing the changes to `training/dictionary_pickler.py` made by 63933fc.

* Fix tests for dictionary_pickler
  • Loading branch information
Dunedan authored Nov 5, 2024
1 parent 10cc7fb commit db5f5d8
Show file tree
Hide file tree
Showing 2 changed files with 8 additions and 6 deletions.
4 changes: 2 additions & 2 deletions tests/test_dictionary_pickler.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,9 @@ def test_logic() -> None:
# different order
mydict = dictionary_pickler._read_dict(testfile, "es", silent=True)
assert len(mydict) == 5
assert mydict["closeones"] == "closeone"
assert mydict[b"closeones"] == b"closeone"
item = sorted(mydict.keys(), reverse=True)[0]
assert item == "valid-word"
assert item == b"valid-word"

# file I/O
assert dictionary_pickler._determine_path("lists", "de").endswith("de.txt")
Expand Down
10 changes: 6 additions & 4 deletions training/dictionary_pickler.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
import re
from operator import itemgetter
from pathlib import Path
from typing import Dict, List, Optional
from typing import ByteString, Dict, List, Optional

import simplemma
from simplemma.strategies.defaultrules import DEFAULT_RULES
Expand Down Expand Up @@ -49,7 +49,9 @@ def _determine_path(listpath: str, langcode: str) -> str:
return str(Path(__file__).parent / filename)


def _read_dict(filepath: str, langcode: str, silent: bool) -> Dict[str, str]:
def _read_dict(
filepath: str, langcode: str, silent: bool
) -> Dict[ByteString, ByteString]:
mydict: Dict[str, str] = {}
myadditions: List[str] = []
i: int = 0
Expand Down Expand Up @@ -120,12 +122,12 @@ def _read_dict(filepath: str, langcode: str, silent: bool) -> Dict[str, str]:
mydict[word] = word
LOGGER.debug("%s %s", langcode, i)
# sort and convert to bytestrings
return dict(sorted(mydict.items()))
return {k.encode("utf-8"): v.encode("utf-8") for k, v in sorted(mydict.items())}


def _load_dict(
langcode: str, listpath: str = "lists", silent: bool = True
) -> Dict[str, str]:
) -> Dict[ByteString, ByteString]:
filepath = _determine_path(listpath, langcode)
return _read_dict(filepath, langcode, silent)

Expand Down

0 comments on commit db5f5d8

Please sign in to comment.