Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix several issues with num2words in Arabic #512

Merged
merged 17 commits into from
Apr 11, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
114 changes: 84 additions & 30 deletions num2words/lang_AR.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,14 @@
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
# MA 02110-1301 USA

import decimal
import math
import re
from decimal import Decimal
from math import floor

from .base import Num2Word_Base

CURRENCY_SR = [("ريال", "ريالان", "ريالات", "ريالاً"),
("هللة", "هللتان", "هللات", "هللة")]
CURRENCY_EGP = [("جنيه", "جنيهان", "جنيهات", "جنيهاً"),
Expand All @@ -37,11 +41,13 @@
]


class Num2Word_AR(object):
errmsg_too_big = "Too large"
max_num = 10 ** 36
class Num2Word_AR(Num2Word_Base):
errmsg_toobig = "abs(%s) must be less than %s."
MAXVAL = 10**51

def __init__(self):
super().__init__()

self.number = 0
self.arabicPrefixText = ""
self.arabicSuffixText = ""
Expand Down Expand Up @@ -75,34 +81,47 @@ def __init__(self):
"", "مائة", "مئتان", "ثلاثمائة", "أربعمائة", "خمسمائة", "ستمائة",
"سبعمائة", "ثمانمائة", "تسعمائة"
]

self.arabicAppendedTwos = [
"مئتا", "ألفا", "مليونا", "مليارا", "تريليونا", "كوادريليونا",
"كوينتليونا", "سكستيليونا"
"كوينتليونا", "سكستيليونا", "سبتيليونا", "أوكتيليونا ",
"نونيليونا", "ديسيليونا", "أندسيليونا", "دوديسيليونا",
"تريديسيليونا", "كوادريسيليونا", "كوينتينيليونا"
]
self.arabicTwos = [
"مئتان", "ألفان", "مليونان", "ملياران", "تريليونان",
"كوادريليونان", "كوينتليونان", "سكستيليونان"
"كوادريليونان", "كوينتليونان", "سكستيليونان", "سبتيليونان",
"أوكتيليونان ", "نونيليونان ", "ديسيليونان", "أندسيليونان",
"دوديسيليونان", "تريديسيليونان", "كوادريسيليونان", "كوينتينيليونان"
]
self.arabicGroup = [
"مائة", "ألف", "مليون", "مليار", "تريليون", "كوادريليون",
"كوينتليون", "سكستيليون"
"كوينتليون", "سكستيليون", "سبتيليون", "أوكتيليون", "نونيليون",
"ديسيليون", "أندسيليون", "دوديسيليون", "تريديسيليون",
"كوادريسيليون", "كوينتينيليون"
]
self.arabicAppendedGroup = [
"", "ألفاً", "مليوناً", "ملياراً", "تريليوناً", "كوادريليوناً",
"كوينتليوناً", "سكستيليوناً"
"كوينتليوناً", "سكستيليوناً", "سبتيليوناً", "أوكتيليوناً",
"نونيليوناً", "ديسيليوناً", "أندسيليوناً", "دوديسيليوناً",
"تريديسيليوناً", "كوادريسيليوناً", "كوينتينيليوناً"
]
self.arabicPluralGroups = [
"", "آلاف", "ملايين", "مليارات", "تريليونات", "كوادريليونات",
"كوينتليونات", "سكستيليونات"
"كوينتليونات", "سكستيليونات", "سبتيليونات", "أوكتيليونات",
"نونيليونات", "ديسيليونات", "أندسيليونات", "دوديسيليونات",
"تريديسيليونات", "كوادريسيليونات", "كوينتينيليونات"
]
assert len(self.arabicAppendedGroup) == len(self.arabicGroup)
assert len(self.arabicPluralGroups) == len(self.arabicGroup)
assert len(self.arabicAppendedTwos) == len(self.arabicTwos)

def number_to_arabic(self, arabic_prefix_text, arabic_suffix_text):
self.arabicPrefixText = arabic_prefix_text
self.arabicSuffixText = arabic_suffix_text
self.extract_integer_and_decimal_parts()

def extract_integer_and_decimal_parts(self):
re.split('\\.', str(self.number))
splits = re.split('\\.', str(self.number))

self.integer_value = int(splits[0])
Expand All @@ -129,22 +148,23 @@ def decimal_value(self, decimal_part):
else:
result = decimal_part

for i in range(len(result), self.partPrecision):
result += '0'
# The following is useless (never happens)
# for i in range(len(result), self.partPrecision):
# result += '0'
return result

def digit_feminine_status(self, digit, group_level):
if group_level == -1:
if self.isCurrencyPartNameFeminine:
return self.arabicFeminineOnes[int(digit)]
else:
# Note: this never happens
return self.arabicOnes[int(digit)]
elif group_level == 0:
if self.isCurrencyNameFeminine:
return self.arabicFeminineOnes[int(digit)]
else:
return self.arabicOnes[int(digit)]

else:
return self.arabicOnes[int(digit)]

Expand All @@ -159,38 +179,44 @@ def process_arabic_group(self, group_number, group_level,
ret_val = "{}".format(self.arabicAppendedTwos[0])
else:
ret_val = "{}".format(self.arabicHundreds[int(hundreds)])
if ret_val != "" and tens != 0:
ret_val += " و "

if tens > 0:
if tens < 20:
# if int(group_level) >= len(self.arabicTwos):
# raise OverflowError(self.errmsg_toobig %
# (self.number, self.MAXVAL))
assert int(group_level) < len(self.arabicTwos)
if tens == 2 and int(hundreds) == 0 and group_level > 0:
if self.integer_value in [2000, 2000000, 2000000000,
2000000000000, 2000000000000000,
2000000000000000000]:
pow = int(math.log10(self.integer_value))
if self.integer_value > 10 and pow % 3 == 0 and \
self.integer_value == 2 * (10 ** pow):
ret_val = "{}".format(
self.arabicAppendedTwos[int(group_level)])
else:
ret_val = "{}".format(
self.arabicTwos[int(group_level)])
else:
if ret_val != "":
ret_val += " و "

if tens == 1 and group_level > 0 and hundreds == 0:
# Note: this never happens
# (hundreds == 0 only if group_number is 0)
ret_val += ""
elif (tens == 1 or tens == 2) and (
group_level == 0 or group_level == -1) and \
hundreds == 0 and remaining_number == 0:
# Note: this never happens (idem)
ret_val += ""
elif tens == 1 and group_level > 0:
ret_val += self.arabicGroup[int(group_level)]
else:
ret_val += self.digit_feminine_status(int(tens),
group_level)
else:
ones = tens % 10
tens = (tens / 10) - 2
if ones > 0:
if ret_val != "" and tens < 4:
ret_val += " و "

ret_val += self.digit_feminine_status(ones, group_level)
if ret_val != "" and ones != 0:
ret_val += " و "
Expand All @@ -199,8 +225,23 @@ def process_arabic_group(self, group_number, group_level,

return ret_val

# We use this instead of built-in `abs` function,
# because `abs` suffers from loss of precision for big numbers
def abs(self, number):
return number if number >= 0 else -number

# We use this instead of `"{:09d}".format(number)`,
# because the string conversion suffers from loss of
# precision for big numbers
def to_str(self, number):
integer = int(number)
if integer == number:
return str(integer)
decimal = round((number - integer) * 10**9)
return str(integer) + "." + "{:09d}".format(decimal).rstrip("0")

def convert(self, value):
self.number = "{:.9f}".format(value)
self.number = self.to_str(value)
self.number_to_arabic(self.arabicPrefixText, self.arabicSuffixText)
return self.convert_to_arabic()

Expand All @@ -218,9 +259,16 @@ def convert_to_arabic(self):

while temp_number > Decimal(0):

number_to_process = int(
Decimal(str(temp_number)) % Decimal(str(1000)))
temp_number = int(Decimal(temp_number) / Decimal(1000))
temp_number_dec = Decimal(str(temp_number))
try:
number_to_process = int(temp_number_dec % Decimal(str(1000)))
except decimal.InvalidOperation:
decimal.getcontext().prec = len(
temp_number_dec.as_tuple().digits
)
number_to_process = int(temp_number_dec % Decimal(str(1000)))

temp_number = int(temp_number_dec / Decimal(1000))

group_description = \
self.process_arabic_group(number_to_process,
Expand All @@ -229,8 +277,13 @@ def convert_to_arabic(self):
if group_description != '':
if group > 0:
if ret_val != "":
ret_val = "{} و {}".format("", ret_val)
if number_to_process != 2:
ret_val = "{}و {}".format("", ret_val)
if number_to_process != 2 and number_to_process != 1:
# if group >= len(self.arabicGroup):
# raise OverflowError(self.errmsg_toobig %
# (self.number, self.MAXVAL)
# )
assert group < len(self.arabicGroup)
if number_to_process % 100 != 1:
if 3 <= number_to_process <= 10:
ret_val = "{} {}".format(
Expand Down Expand Up @@ -294,8 +347,8 @@ def convert_to_arabic(self):
return formatted_number

def validate_number(self, number):
if number >= self.max_num:
raise OverflowError(self.errmsg_too_big)
if number >= self.MAXVAL:
raise OverflowError(self.errmsg_toobig % (number, self.MAXVAL))
return number

def set_currency_prefer(self, currency):
Expand Down Expand Up @@ -329,7 +382,7 @@ def to_ordinal(self, number, prefix=''):
self.currency_unit = ('', '', '', '')
self.arabicPrefixText = prefix
self.arabicSuffixText = ""
return "{}".format(self.convert(abs(number)).strip())
return "{}".format(self.convert(self.abs(number)).strip())

def to_year(self, value):
value = self.validate_number(value)
Expand All @@ -339,6 +392,7 @@ def to_ordinal_num(self, value):
return self.to_ordinal(value).strip()

def to_cardinal(self, number):
self.isCurrencyNameFeminine = False
number = self.validate_number(number)
minus = ''
if number < 0:
Expand All @@ -349,4 +403,4 @@ def to_cardinal(self, number):
self.arabicPrefixText = ""
self.arabicSuffixText = ""
self.arabicOnes = ARABIC_ONES
return minus + self.convert(value=abs(number)).strip()
return minus + self.convert(value=self.abs(number)).strip()
2 changes: 1 addition & 1 deletion num2words/lang_EO.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def setup(self):
self.pointword = "komo"
self.errmsg_nonnum = u"Sole nombroj povas esti konvertita en vortojn."
self.errmsg_toobig = (
u"Tro granda nombro por esti konvertita en vortojn."
u"Tro granda nombro por esti konvertita en vortojn (abs(%s) > %s)."
)
self.exclude_title = ["kaj", "komo", "minus"]
self.mid_numwords = [(1000, "mil"), (100, "cent"), (90, "naŭdek"),
Expand Down
5 changes: 3 additions & 2 deletions num2words/lang_FA.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,8 +78,9 @@


class Num2Word_FA(object):
errmsg_too_big = "Too large"
max_num = 10 ** 36
# Those are unused
errmsg_toobig = "Too large"
MAXNUM = 10 ** 36

def __init__(self):
self.number = 0
Expand Down
4 changes: 3 additions & 1 deletion num2words/lang_FR.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,9 @@ def setup(self):
self.errmsg_nonnum = (
u"Seulement des nombres peuvent être convertis en mots."
)
self.errmsg_toobig = u"Nombre trop grand pour être converti en mots."
self.errmsg_toobig = (
u"Nombre trop grand pour être converti en mots (abs(%s) > %s)."
)
self.exclude_title = ["et", "virgule", "moins"]
self.mid_numwords = [(1000, "mille"), (100, "cent"),
(80, "quatre-vingts"), (60, "soixante"),
Expand Down
8 changes: 4 additions & 4 deletions num2words/lang_ID.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,8 @@ class Num2Word_ID():

errmsg_floatord = "Cannot treat float number as ordinal"
errmsg_negord = "Cannot treat negative number as ordinal"
errmsg_toobig = "Too large"
max_num = 10 ** 36
errmsg_toobig = "Number is too large to convert to words (abs(%s) > %s)."
MAXVAL = 10 ** 36

def split_by_koma(self, number):
return str(number).split('.')
Expand Down Expand Up @@ -169,8 +169,8 @@ def join(self, word_blocks, float_part):
return ' '.join(word_list) + float_part

def to_cardinal(self, number):
if number >= self.max_num:
raise OverflowError(self.errmsg_toobig % (number, self.max_num))
if number >= self.MAXVAL:
raise OverflowError(self.errmsg_toobig % (number, self.MAXVAL))
minus = ''
if number < 0:
minus = 'min '
Expand Down
3 changes: 2 additions & 1 deletion num2words/lang_RO.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,8 @@ def setup(self):
self.pointword = "virgulă"
self.exclude_title = ["și", "virgulă", "minus"]
self.errmsg_toobig = (
"Numărul e prea mare pentru a fi convertit în cuvinte."
"Numărul e prea mare pentru a \
fi convertit în cuvinte (abs(%s) > %s)."
)
self.mid_numwords = [(1000, "mie/i"), (100, "sută/e"),
(90, "nouăzeci"), (80, "optzeci"),
Expand Down
4 changes: 3 additions & 1 deletion num2words/lang_SL.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,9 @@ def setup(self):
self.negword = "minus "
self.pointword = "celih"
self.errmsg_nonnum = "Only numbers may be converted to words."
self.errmsg_toobig = "Number is too large to convert to words."
self.errmsg_toobig = (
"Number is too large to convert to words (abs(%s) > %s)."
)
self.exclude_title = []

self.mid_numwords = [(1000, "tisoč"), (900, "devetsto"),
Expand Down
Loading