Skip to content

Commit

Permalink
Update generate_emoji_keywords.py and created common_arg_parser.py fo…
Browse files Browse the repository at this point in the history
…r modularity
  • Loading branch information
Ekikereabasi-Nk committed Oct 17, 2024
1 parent 08a6986 commit 5a47465
Show file tree
Hide file tree
Showing 3 changed files with 112 additions and 64 deletions.
Original file line number Diff line number Diff line change
@@ -1,56 +1,43 @@
"""
* Copyright (C) 2024 Scribe
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
Generates keyword-emoji relationships from a selection of Hindustani words.
.. raw:: html
<!--
* Copyright (C) 2024 Scribe
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
-->.
"""

import argparse
from scribe_data.unicode.generate_emoji_keyword import generate_emoji_keyword
from scribe_data.unicode.common_arg_parser import setup_arg_parser

# Define the main language
LANGUAGE = "Hindustani" # Change to a grouped language if needed
emojis_per_keyword = 3

# Set up the argument parser
parser = argparse.ArgumentParser(
description="Generate emoji keywords for a specific language."
)
parser.add_argument(
"--file-path", required=True, help="Path to save the generated emoji keywords."
)
parser.add_argument(
"--sub-languages",
nargs="*",
help="List of specific sub-languages to process (e.g., Hindi Urdu). If omitted, all sub-languages will be processed.",
)
parser.add_argument(
"--gender",
choices=["male", "female", "neutral"],
help="Specify the gender for emoji customization.",
)
parser.add_argument(
"--region", help="Specify the region for emoji customization (e.g., US, IN)."
)
LANGUAGE = "Hindustani"


# Set up the argument parser by calling the imported function.
parser = setup_arg_parser()

# Parse the command-line arguments
# Parse the command-line arguments.
args = parser.parse_args()

# Call the generate_emoji_keyword function with optional parameters
# Call the generate_emoji_keyword function with optional parameters.
generate_emoji_keyword(
LANGUAGE,
emojis_per_keyword,
args.file_path,
emojis_per_keyword=args.emojis_per_keyword,
gender=args.gender,
region=args.region,
sub_languages=args.sub_languages,
Expand Down
51 changes: 51 additions & 0 deletions src/scribe_data/unicode/common_arg_parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
"""
common argument parsing function
.. raw:: html
<!--
* Copyright (C) 2024 Scribe
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
-->.
"""

import argparse


def setup_arg_parser():
parser = argparse.ArgumentParser(
description="Generate emoji keywords for a specific language."
)
parser.add_argument(
"--file-path", required=True, help="Path to save the generated emoji keywords."
)
parser.add_argument(
"--sub-languages",
nargs="*",
help="List of specific sub-languages to process (e.g., Hindi Urdu). If omitted, all sub-languages will be processed.",
)
parser.add_argument(
"--gender",
choices=["male", "female", "neutral"],
help="Specify the gender for emoji customization.",
)
parser.add_argument(
"--region", help="Specify the region for emoji customization (e.g., US, IN)."
)
parser.add_argument(
"--emojis-per-keyword",
type=int,
help="Number of emojis to generate per keyword.",
)
return parser
56 changes: 33 additions & 23 deletions src/scribe_data/unicode/generate_emoji_keyword.py
Original file line number Diff line number Diff line change
@@ -1,53 +1,63 @@
"""
Centralize emoji keyword generation logic
.. raw:: html
<!--
* Copyright (C) 2024 Scribe
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
-->.
"""

from .process_unicode import gen_emoji_lexicon
from scribe_data.utils import export_formatted_data


def generate_emoji_keyword(
language,
emojis_per_keyword,
file_path,
emojis_per_keyword=3, # default value for emojis_per_keyword
gender=None,
region=None,
sub_languages=None,
):
"""
Generate emoji keywords for a specified language, with optional support for grouped languages (e.g., Hindustani = Hindi + Urdu).
Parameters:
- language (str): The language or grouped language for which emoji keywords are generated (e.g., "Hindustani" for Hindi and Urdu).
- emojis_per_keyword (int): Number of emojis to associate with each keyword.
- file_path (str): The path to the file where the generated emoji keywords will be saved.
- gender (str): Gender-based customization for emojis (e.g., "male", "female").
- region (str): Regional customization for emojis (e.g., "US", "JP").
- sub_languages (list): A list of specific sub-languages for grouped languages (e.g., ["Hindi", "Urdu"]). If not provided, all sub-languages in the group will be processed.
"""

# Define grouped languages and their sub-languages
# Define grouped languages and their sub-languages.
grouped_languages = {
"Hindustani": ["Hindi", "Urdu"],
"Norwegian": ["Bokmål", "Nynorsk"],
# Add more grouped languages as needed
# Add more grouped languages as needed.
}

# If the language is a grouped language, handle its sub-languages
# If the language is a grouped language, handle its sub-languages.
if language in grouped_languages:
# If specific sub-languages are provided, only process those
# If specific sub-languages are provided, only process those.
sub_languages_to_process = sub_languages or grouped_languages[language]

for sub_lang in sub_languages_to_process:
print(f"Processing sub-language: {sub_lang}")

# Generate emoji keywords for the sub-language
# Generate emoji keywords for the sub-language.
emoji_keywords_dict = gen_emoji_lexicon(
language=sub_lang,
emojis_per_keyword=emojis_per_keyword,
gender=gender,
region=region,
)

# Export the generated emoji keywords for the sub-language
# Export the generated emoji keywords for the sub-language.
if emoji_keywords_dict:
# Save the file with the sub-language included in the file name
# Save the file with the sub-language included in the file name.
export_file_path = f"{file_path}_{sub_lang}.json"
export_formatted_data(
file_path=export_file_path,
Expand All @@ -57,17 +67,17 @@ def generate_emoji_keyword(
data_type="emoji-keywords",
)

# If it's not a grouped language, process it as a single language
# If it's not a grouped language, process it as a single language.
else:
# Generate emoji keywords for the given language
# generate emoji keywords for the given language.
emoji_keywords_dict = gen_emoji_lexicon(
language=language,
emojis_per_keyword=emojis_per_keyword,
gender=gender,
region=region,
)

# Export the generated emoji keywords for the language
# Export the generated emoji keywords for the language.
if emoji_keywords_dict:
export_formatted_data(
file_path=file_path,
Expand Down

0 comments on commit 5a47465

Please sign in to comment.