Skip to content

Commit

Permalink
#2, #4: started langcodescli.py
Browse files Browse the repository at this point in the history
  • Loading branch information
fititnt committed Nov 20, 2021
1 parent 360e780 commit 38c6bf5
Show file tree
Hide file tree
Showing 4 changed files with 171 additions and 1 deletion.
5 changes: 5 additions & 0 deletions scripts/data-external-prepare.sh
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,11 @@ if [ ! -f scripts/data-external/cldr/aliases.json ]; then
head -n 15 scripts/data-external/cldr/likelySubtags.json > scripts/data-external/cldr/aliases.sample.json
fi

if [ ! -f scripts/data-external/cldr/territoryInfo.json ]; then
curl https://raw.githubusercontent.com/unicode-org/cldr-json/main/cldr-json/cldr-core/supplemental/territoryInfo.json --output scripts/data-external/cldr/territoryInfo.json
head -n 15 scripts/data-external/cldr/likelySubtags.json > scripts/data-external/cldr/territoryInfo.sample.json
fi

# https://github.com/unicode-org/cldr-json/blob/main/cldr-json/cldr-core/scriptMetadata.json

# mlr --irs '|' --implicit-csv-header cat scripts/data-external/iso15924_no-comments-pipe.txt
Expand Down
15 changes: 15 additions & 0 deletions scripts/data-external/cldr/territoryInfo.sample.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
{
"supplemental": {
"version": {
"_unicodeVersion": "14.0.0",
"_cldrVersion": "40"
},
"likelySubtags": {
"aa": "aa-Latn-ET",
"aai": "aai-Latn-ZZ",
"aak": "aak-Latn-ZZ",
"aau": "aau-Latn-ZZ",
"ab": "ab-Cyrl-GE",
"abi": "abi-Latn-ZZ",
"abq": "abq-Cyrl-ZZ",
"abr": "abr-Latn-GH",
23 changes: 22 additions & 1 deletion scripts/fn/cldr_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
# CLDR_BASE="scripts/data-external/cldr" CLDR_CLI_DEBUG=1 ./scripts/fn/cldr_cli.py languageAlias por
# CLDR_BASE="scripts/data-external/cldr" CLDR_CLI_DEBUG=1 ./scripts/fn/cldr_cli.py territoryAlias 076
# CLDR_BASE="scripts/data-external/cldr" CLDR_CLI_DEBUG=1 ./scripts/fn/cldr_cli.py likelySubtags por
# CLDR_BASE="scripts/data-external/cldr" CLDR_CLI_DEBUG=1 ./scripts/fn/cldr_cli.py territoryInfo BR

if len(sys.argv) < 2 or sys.argv[1] == '-h' or sys.argv[1] == '--help':
print('usage: ' + sys.argv[0] + ' [command] [parameters]')
Expand All @@ -44,6 +45,9 @@
print(' ' + sys.argv[0] + ' likelySubtags zz')
print(' ' + sys.argv[0] + ' likelySubtags pt')
print('')
print(' ' + sys.argv[0] + ' territoryInfo zz')
print(' ' + sys.argv[0] + ' likelySubtags BR')
print('')
print(' CLDR_CLI_DEBUG=1 ' + sys.argv[0] + ' [command] [parameters]')
print('')
print('NOTE: ')
Expand All @@ -52,6 +56,8 @@
' must be already defined to run this script. For example: \n')
print(' CLDR_BASE="~/Downloads/cldr/" ' +
sys.argv[0] + ' languageAlias por')
print('')
print('CLDR version tested: v40 (may need changes for new versions)')

sys.exit()

Expand All @@ -61,6 +67,7 @@
is_debug = bool(os.environ.get('CLDR_CLI_DEBUG', '0'))
cldr_alias_path = os.environ['CLDR_BASE'] + '/aliases.json'
cldr_likelySubtags_path = os.environ['CLDR_BASE'] + '/likelySubtags.json'
cldr_territoryInfo_path = os.environ['CLDR_BASE'] + '/territoryInfo.json'
repo_cldr_json_base = 'https://raw.githubusercontent.com/unicode-org/' + \
'cldr-json/main/cldr-json/'

Expand Down Expand Up @@ -90,7 +97,7 @@
with open(cldr_alias_path, 'r') as _file:
data = json.loads(_file.read())

if sys.argv[2] in data['supplemental']['metadata']['alias']['territoryAlias']:
if sys.argv[2] in data['supplemental']['metadata']['alias']['territoryAlias']: # noqa
print(str(data['supplemental']['metadata']
['alias']['territoryAlias'][sys.argv[2]]))
else:
Expand All @@ -113,6 +120,20 @@
cldr_likelySubtags_path + ']"}')
sys.exit()

if sys.argv[1] == 'territoryInfo':
with open(cldr_territoryInfo_path, 'r') as _file:
data = json.loads(_file.read())

# print(data['supplemental']['territoryInfo'].keys())

if sys.argv[2] in data['supplemental']['territoryInfo']:
print(str(data['supplemental']['territoryInfo'][sys.argv[2]]))
else:
if is_debug:
print('{"msg": "Not found [' + sys.argv[2] + '] on [' +
cldr_territoryInfo_path + ']"}')
sys.exit()


sys.exit('unknow command [' + sys.argv[1] +
'] . See ' + sys.argv[0] + ' --help')
129 changes: 129 additions & 0 deletions scripts/fn/langcodescli.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
#!/usr/bin/python3
# ==============================================================================
#
# FILE: langcodescli.py
#
# USAGE: ./scripts/fn/langcodescli.py
#
# DESCRIPTION: A command line wrapper to https://github.com/rspeer/langcodes.
# Install dependencies with
# pip install langcodes[data]
#
# OPTIONS: ---
#
# REQUIREMENTS: - python3
# - langcodes
# - https://github.com/rspeer/langcodes
# - click
# - https://click.palletsprojects.com/
# BUGS: ---
# NOTES: ---
# AUTHORS: Emerson Rocha <rocha[at]ieee.org>
# COLLABORATORS: <@TODO: put additional non-anonymous names here>
# COMPANY: EticaAI
# LICENSE: Public Domain dedication OR Zero-Clause BSD
# SPDX-License-Identifier: Unlicense OR 0BSD
# VERSION: v1.0
# CREATED: 2021-11-20 10:37 UTC
# ==============================================================================

import sys
import argparse
import langcodes
# https://realpython.com/comparing-python-command-line-parsing-libraries-argparse-docopt-click/

description = "A command line wrapper to python langcodes"
epilog = """
EXAMPLES:
> Get BCP47 minimum tag
{0} standardize_tag por-Latn-BR
> Check if language tag is valid.
These ones have syntax errors (no language with these country codes)
{0} is_valid jp-JP
{0} is_valid us
These are syntax valid (but likely user error)
{0} is_valid ar-AR
{0} is_valid en-UK
""".format(sys.argv[0])


def info(args):
result = langcodes.Language.get(args.tag)
print(result.describe())
# print('ooi', result)


def is_valid(args):
if langcodes.tag_is_valid(args.tag):
print(1)
sys.exit(0)
else:
print(0)
sys.exit(1)


def standardize_tag(args):
print(langcodes.standardize_tag(args.tag))


def speaking_population(args):
result = langcodes.Language.get(args.tag)
print(result.speaking_population())

# def noargs():
# print(sys.argv[0] + ' --help')
# sys.exit(1)


# parser = argparse.ArgumentParser()
parser = argparse.ArgumentParser(
description=description,
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog=epilog
)
parser.add_argument('--version', action='version', version='1.0.0')
subparsers = parser.add_subparsers()

standardize_tag_parser = subparsers.add_parser('standardize_tag')
standardize_tag_parser.add_argument(
'tag', help='Tag value to normalize tags the minimum BCP 47')
standardize_tag_parser.set_defaults(func=standardize_tag)

info_parser = subparsers.add_parser('info')
info_parser.add_argument(
'tag', help='Tag value to return information')
info_parser.set_defaults(func=info)

speaking_population_parser = subparsers.add_parser('speaking_population')
speaking_population_parser.add_argument(
'tag', help='Tag value to return information')
speaking_population_parser.set_defaults(func=speaking_population)

is_valid_parser = subparsers.add_parser('is_valid')
is_valid_parser.add_argument(
'tag', help='Tag value to return information')
is_valid_parser.set_defaults(func=is_valid)

# goodbye_parser = subparsers.add_parser('goodbye')
# goodbye_parser.add_argument('name', help='name of the person to greet')
# goodbye_parser.add_argument('--greeting', default='Hello', help='word to use for the greeting')
# goodbye_parser.add_argument('--caps', action='store_true', help='uppercase the output')
# goodbye_parser.set_defaults(func=greet)

if __name__ == '__main__':
# return 'oi'
if len(sys.argv) > 1:
args = parser.parse_args()
args.func(args)
else:
parser.print_help()
sys.exit(1)
# try:
# args = parser.parse_args()
# args.func(args)
# except:
# print('deu erro')
# # parser.print_help()

0 comments on commit 38c6bf5

Please sign in to comment.