Skip to content

Commit

Permalink
alpha2digits keeps newlines. Fixes #94.
Browse files Browse the repository at this point in the history
  • Loading branch information
rtxm committed Aug 14, 2023
1 parent 497d461 commit bfcb54d
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 4 deletions.
3 changes: 3 additions & 0 deletions tests/test_text_to_num_fr.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,3 +201,6 @@ def test_article(self):
def test_un_pronoun(self):
source = "Je n'en veux qu'un. J'annonce: le un"
self.assertEqual(alpha2digit(source, "fr"), source)

def test_alpha2digit_newline(self):
self.assertEqual(alpha2digit("dix + deux\n= douze", "fr"), "10 + 2\n= 12")
7 changes: 3 additions & 4 deletions text_to_num/transforms.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@

omg = OrdinalsMerger()
USE_PT_ORDINALS_MERGER = True
WORD_SEP = re.compile(r"\s*[\.,;\(\)…\[\]:!\?]+\s*|\n")


def look_ahead(sequence: Sequence[Any]) -> Iterator[Tuple[Any, Any]]:
Expand Down Expand Up @@ -108,10 +109,8 @@ def alpha2digit(
raise Exception("Language not supported")

language = LANG[lang]
segments = re.split(
r"\s*[\.,;\(\)…\[\]:!\?]+\s*", text
)
punct = re.findall(r"\s*[\.,;\(\)…\[\]:!\?]+\s*", text)
segments = WORD_SEP.split(text)
punct = WORD_SEP.findall(text)
if len(punct) < len(segments):
punct.append("")

Expand Down

0 comments on commit bfcb54d

Please sign in to comment.