Skip to content

Commit

Permalink
feat/langcode_utils
Browse files Browse the repository at this point in the history
  • Loading branch information
JarbasAl committed Jan 26, 2022
1 parent 64e09cb commit 1451a2e
Show file tree
Hide file tree
Showing 2 changed files with 315 additions and 1 deletion.
157 changes: 157 additions & 0 deletions lingua_franca/format.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
_REGISTERED_FUNCTIONS = ("nice_number",
"nice_time",
"pronounce_number",
"langcode2name",
"nice_response",
"nice_duration")

Expand Down Expand Up @@ -242,6 +243,162 @@ def year_format(self, dt, lang, bc):
'res/text'))


@localized_function(run_own_code_on=[UnsupportedLanguageError])
def langcode2name(lang_code, lang=""):
LANGUAGES = {'aa': 'Afar',
'ab': 'Abkhazian',
'af': 'Afrikaans',
'ak': 'Akan',
'am': 'Amharic',
'ar': 'Arabic',
'as': 'Assamese',
'ay': 'Aymara',
'az': 'Azerbaijani',
'ba': 'Bashkir',
'be': 'Belarusian',
'bg': 'Bulgarian',
'bh': 'Bihari',
'bi': 'Bislama',
'bn': 'Bengali',
'bo': 'Tibetan',
'br': 'Breton',
'bs': 'Bosnian',
'ca': 'Catalan',
'co': 'Corsican',
'cs': 'Czech',
'cy': 'Welsh',
'da': 'Danish',
'de': 'German',
'dv': 'Dhivehi',
'dz': 'Dzongkha',
'ee': 'Ewe',
'el': 'Greek',
'en': 'English',
'eo': 'Esperanto',
'es': 'Spanish',
'et': 'Estonian',
'eu': 'Basque',
'fa': 'Persian',
'fi': 'Finnish',
'fj': 'Fijian',
'fo': 'Faroese',
'fr': 'French',
'fy': 'Frisian',
'ga': 'Irish',
'gd': 'Scots Gaelic',
'gl': 'Galician',
'gn': 'Guarani',
'gu': 'Gujarati',
'gv': 'Manx',
'ha': 'Hausa',
'hi': 'Hindi',
'hr': 'Croatian',
'ht': 'Haitian Creole',
'hu': 'Hungarian',
'hy': 'Armenian',
'ia': 'Interlingua',
'id': 'Indonesian',
'ie': 'Interlingue',
'ig': 'Igbo',
'ik': 'Inupiak',
'is': 'Icelandic',
'it': 'Italian',
'iu': 'Inuktitut',
'iw': 'Hebrew',
'ja': 'Japanese',
'jw': 'Javanese',
'ka': 'Georgian',
'kk': 'Kazakh',
'kl': 'Greenlandic',
'km': 'Khmer',
'kn': 'Kannada',
'ko': 'Korean',
'ks': 'Kashmiri',
'ku': 'Kurdish',
'ky': 'Kyrgyz',
'la': 'Latin',
'lb': 'Luxembourgish',
'lg': 'Ganda',
'ln': 'Lingala',
'lo': 'Laothian',
'lt': 'Lithuanian',
'lv': 'Latvian',
'mg': 'Malagasy',
'mi': 'Maori',
'mk': 'Macedonian',
'ml': 'Malayalam',
'mn': 'Mongolian',
'mr': 'Marathi',
'ms': 'Malay',
'mt': 'Maltese',
'my': 'Burmese',
'na': 'Nauru',
'ne': 'Nepali',
'nl': 'Dutch',
'nn': 'Norwegian N',
'no': 'Norwegian',
'nr': 'Ndebele',
'ny': 'Nyanja',
'oc': 'Occitan',
'om': 'Oromo',
'or': 'Oriya',
'os': 'Ossetian',
'pa': 'Punjabi',
'pl': 'Polish',
'ps': 'Pashto',
'pt': 'Portuguese',
'qu': 'Quechua',
'rm': 'Rhaeto Romance',
'rn': 'Rundi',
'ro': 'Romanian',
'ru': 'Russian',
'rw': 'Kinyarwanda',
'sa': 'Sanskrit',
'sd': 'Sindhi',
'sg': 'Sango',
'si': 'Sinhalese',
'sk': 'Slovak',
'sl': 'Slovenian',
'sm': 'Samoan',
'sn': 'Shona',
'so': 'Somali',
'sq': 'Albanian',
'sr': 'Serbian',
'ss': 'Siswant',
'st': 'Sesotho',
'su': 'Sundanese',
'sv': 'Swedish',
'sw': 'Swahili',
'ta': 'Tamil',
'te': 'Telugu',
'tg': 'Tajik',
'th': 'Thai',
'ti': 'Tigrinya',
'tk': 'Turkmen',
'tl': 'Tagalog',
'tn': 'Tswana',
'to': 'Tonga',
'tr': 'Turkish',
'ts': 'Tsonga',
'tt': 'Tatar',
'tw': 'Twi',
'ug': 'Uighur',
'uk': 'Ukrainian',
'ur': 'Urdu',
'uz': 'Uzbek',
've': 'Venda',
'vi': 'Vietnamese',
'vo': 'Volapuk',
'wo': 'Wolof',
'xh': 'Xhosa',
'yi': 'Yiddish',
'yo': 'Yoruba',
'za': 'Zhuang',
'zh': 'Chinese',
'zu': 'Zulu'}
return LANGUAGES.get(lang_code.lower().split("-")[0]) or lang_code


@localized_function(run_own_code_on=[UnsupportedLanguageError])
def nice_number(number, lang='', speech=True, denominators=None):
"""Format a float to human readable functions
Expand Down
159 changes: 158 additions & 1 deletion lingua_franca/parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,13 @@
from lingua_franca.time import now_local
from lingua_franca.internal import populate_localized_function_dict, \
get_active_langs, get_full_lang_code, get_primary_lang_code, \
get_default_lang, localized_function, _raise_unsupported_language
get_default_lang, localized_function, _raise_unsupported_language, UnsupportedLanguageError

_REGISTERED_FUNCTIONS = ("extract_numbers",
"extract_number",
"extract_duration",
"extract_datetime",
"extract_langcode",
"normalize",
"get_gender",
"is_fractional",
Expand Down Expand Up @@ -73,6 +74,162 @@ def match_one(query, choices):
return best


@localized_function(run_own_code_on=[UnsupportedLanguageError])
def extract_langcode(text, lang=""):
LANGUAGES = {'Abkhazian': 'ab',
'Afar': 'aa',
'Afrikaans': 'af',
'Akan': 'ak',
'Albanian': 'sq',
'Amharic': 'am',
'Arabic': 'ar',
'Armenian': 'hy',
'Assamese': 'as',
'Aymara': 'ay',
'Azerbaijani': 'az',
'Bashkir': 'ba',
'Basque': 'eu',
'Belarusian': 'be',
'Bengali': 'bn',
'Bihari': 'bh',
'Bislama': 'bi',
'Bosnian': 'bs',
'Breton': 'br',
'Bulgarian': 'bg',
'Burmese': 'my',
'Catalan': 'ca',
'Chinese': 'zh',
'Corsican': 'co',
'Croatian': 'hr',
'Czech': 'cs',
'Danish': 'da',
'Dhivehi': 'dv',
'Dutch': 'nl',
'Dzongkha': 'dz',
'English': 'en',
'Esperanto': 'eo',
'Estonian': 'et',
'Ewe': 'ee',
'Faroese': 'fo',
'Fijian': 'fj',
'Finnish': 'fi',
'French': 'fr',
'Frisian': 'fy',
'Galician': 'gl',
'Ganda': 'lg',
'Georgian': 'ka',
'German': 'de',
'Greek': 'el',
'Greenlandic': 'kl',
'Guarani': 'gn',
'Gujarati': 'gu',
'Haitian Creole': 'ht',
'Hausa': 'ha',
'Hebrew': 'iw',
'Hindi': 'hi',
'Hungarian': 'hu',
'Icelandic': 'is',
'Igbo': 'ig',
'Indonesian': 'id',
'Interlingua': 'ia',
'Interlingue': 'ie',
'Inuktitut': 'iu',
'Inupiak': 'ik',
'Irish': 'ga',
'Italian': 'it',
'Japanese': 'ja',
'Javanese': 'jw',
'Kannada': 'kn',
'Kashmiri': 'ks',
'Kazakh': 'kk',
'Khmer': 'km',
'Kinyarwanda': 'rw',
'Korean': 'ko',
'Kurdish': 'ku',
'Kyrgyz': 'ky',
'Laothian': 'lo',
'Latin': 'la',
'Latvian': 'lv',
'Lingala': 'ln',
'Lithuanian': 'lt',
'Luxembourgish': 'lb',
'Macedonian': 'mk',
'Malagasy': 'mg',
'Malay': 'ms',
'Malayalam': 'ml',
'Maltese': 'mt',
'Manx': 'gv',
'Maori': 'mi',
'Marathi': 'mr',
'Mongolian': 'mn',
'Nauru': 'na',
'Ndebele': 'nr',
'Nepali': 'ne',
'Norwegian': 'no',
'Norwegian N': 'nn',
'Nyanja': 'ny',
'Occitan': 'oc',
'Oriya': 'or',
'Oromo': 'om',
'Ossetian': 'os',
'Pashto': 'ps',
'Persian': 'fa',
'Polish': 'pl',
'Portuguese': 'pt',
'Punjabi': 'pa',
'Quechua': 'qu',
'Rhaeto Romance': 'rm',
'Romanian': 'ro',
'Rundi': 'rn',
'Russian': 'ru',
'Samoan': 'sm',
'Sango': 'sg',
'Sanskrit': 'sa',
'Scots Gaelic': 'gd',
'Serbian': 'sr',
'Sesotho': 'st',
'Shona': 'sn',
'Sindhi': 'sd',
'Sinhalese': 'si',
'Siswant': 'ss',
'Slovak': 'sk',
'Slovenian': 'sl',
'Somali': 'so',
'Spanish': 'es',
'Sundanese': 'su',
'Swahili': 'sw',
'Swedish': 'sv',
'Tagalog': 'tl',
'Tajik': 'tg',
'Tamil': 'ta',
'Tatar': 'tt',
'Telugu': 'te',
'Thai': 'th',
'Tibetan': 'bo',
'Tigrinya': 'ti',
'Tonga': 'to',
'Tsonga': 'ts',
'Tswana': 'tn',
'Turkish': 'tr',
'Turkmen': 'tk',
'Twi': 'tw',
'Uighur': 'ug',
'Ukrainian': 'uk',
'Urdu': 'ur',
'Uzbek': 'uz',
'Venda': 've',
'Vietnamese': 'vi',
'Volapuk': 'vo',
'Welsh': 'cy',
'Wolof': 'wo',
'Xhosa': 'xh',
'Yiddish': 'yi',
'Yoruba': 'yo',
'Zhuang': 'za',
'Zulu': 'zu'}
return match_one(text, LANGUAGES)


@localized_function()
def extract_numbers(text, short_scale=True, ordinals=False, lang=''):
"""
Expand Down

0 comments on commit 1451a2e

Please sign in to comment.