From 510f223e48c1da95d726e5a73708e4fefb207037 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Romuald=20Texier-Marcad=C3=A9?= Date: Tue, 19 Mar 2024 16:01:49 +0100 Subject: [PATCH] Refined handling of zero. Fixes #105, fixes #106. --- tests/test_text_to_num_en.py | 1 + tests/test_text_to_num_fr.py | 2 ++ text_to_num/lang/base.py | 2 +- text_to_num/lang/english.py | 2 +- text_to_num/parsers.py | 20 ++++++++++---------- 5 files changed, 15 insertions(+), 12 deletions(-) diff --git a/tests/test_text_to_num_en.py b/tests/test_text_to_num_en.py index 6d21928..881e040 100644 --- a/tests/test_text_to_num_en.py +++ b/tests/test_text_to_num_en.py @@ -126,6 +126,7 @@ def test_alpha2digit_zero(self): self.assertEqual(alpha2digit(source, "en"), expected) self.assertEqual(alpha2digit("zero", "en"), "0") + self.assertEqual(alpha2digit("zero love", "en"), "0 love") def test_alpha2digit_ordinals(self): source = ( diff --git a/tests/test_text_to_num_fr.py b/tests/test_text_to_num_fr.py index accd0ef..c9ba038 100644 --- a/tests/test_text_to_num_fr.py +++ b/tests/test_text_to_num_fr.py @@ -143,6 +143,8 @@ def test_alpha2digit_zero(self): # self.assertEqual(alpha2digit(source, "fr"), source) self.assertEqual(alpha2digit("zéro", "fr"), "0") + self.assertEqual(alpha2digit("a a un trois sept trois trois sept cinq quatre zéro c c", "fr"), "a a 1 3 7 3 3 7 5 4 0 c c") + self.assertEqual(alpha2digit("sept un zéro", "fr"), "7 1 0") def test_alpha2digit_ordinals(self): source = ( diff --git a/text_to_num/lang/base.py b/text_to_num/lang/base.py index b2a8cc9..e349c1c 100644 --- a/text_to_num/lang/base.py +++ b/text_to_num/lang/base.py @@ -70,7 +70,7 @@ def normalize(self, word: str) -> str: return NotImplemented def not_numeric_word(self, word: Optional[str]) -> bool: - return word is None or word != self.DECIMAL_SEP and word not in self.NUMBERS + return word is None or word != self.DECIMAL_SEP and word not in self.NUMBERS and word not in self.ZERO def split_number_word(self, word: str) -> str: # maybe use: List[str] """In some languages numbers are written as one word, e.g. German diff --git a/text_to_num/lang/english.py b/text_to_num/lang/english.py index e155a2e..4286476 100644 --- a/text_to_num/lang/english.py +++ b/text_to_num/lang/english.py @@ -117,7 +117,7 @@ class English(Language): AND_NUMS: Set[str] = set() AND = "and" - NEVER_IF_ALONE = {"one"} + NEVER_IF_ALONE = {"one", "o"} # Relaxed composed numbers (two-words only) # start => (next, target) diff --git a/text_to_num/parsers.py b/text_to_num/parsers.py index d90ebb7..57df1e9 100644 --- a/text_to_num/parsers.py +++ b/text_to_num/parsers.py @@ -652,21 +652,21 @@ def push(self, word: str, look_ahead: Optional[str] = None) -> bool: elif ( word in self.lang.ZERO and self.at_start_of_seq() - and ( - look_ahead is None - or look_ahead in self.lang.NUMBERS - or look_ahead in self.lang.ZERO - or look_ahead in self.lang.DECIMAL_SEP - ) + and look_ahead is not None + and look_ahead in self.lang.DECIMAL_SEP ): - self._value.append("0") + pass elif ( word in self.lang.ZERO and self.at_start_of_seq() - and look_ahead is not None - and look_ahead in self.lang.DECIMAL_SEP + # and ( + # look_ahead is None + # or look_ahead in self.lang.NUMBERS + # or look_ahead in self.lang.ZERO + # or look_ahead in self.lang.DECIMAL_SEP + # ) ): - pass + self._value.append("0") elif self._push(self.lang.ord2card(word) or "", look_ahead): self._value.append( self.lang.num_ord(