Skip to content

Commit

Permalink
#14 initial emoji keywords
Browse files Browse the repository at this point in the history
  • Loading branch information
wkyoshida committed Nov 8, 2022
1 parent 7a1271f commit 0381755
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 4 deletions.
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"dependencies": {
"cldr-annotations-derived-full": "latest"
"cldr-annotations-full": "latest"
}
}
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ beautifulsoup4==4.9.3
black>=19.10b0
certifi>=2020.12.5
defusedxml==0.7.1
emoji>=2.2.0
mwparserfromhell==0.6
packaging>=20.9
pytest-cov>=3.0.0
Expand Down
19 changes: 16 additions & 3 deletions src/scribe_data/extract_transform/process_unicode.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
gen_emoji_autosuggestions
"""

import emoji
import json

from scribe_data.load.update_utils import get_language_iso
Expand Down Expand Up @@ -51,14 +52,26 @@ def gen_emoji_autosuggestions(

iso = get_language_iso(language)

cldr_file_path = f"node_modules/cldr-annotations-derived-full/annotationsDerived/{iso}/annotations.json"
cldr_file_path = f"node_modules/cldr-annotations-full/annotations/{iso}/annotations.json"

with open(cldr_file_path, "r") as file:
cldr_data = json.load(file)

emoji_dict = cldr_data["annotationsDerived"]["annotations"]
cldr_dict = cldr_data["annotations"]["annotations"]

print("Number of emojis loaded:", len(emoji_dict))
print("Number of characters loaded:", len(cldr_dict))

for cldr_char in cldr_dict:
# Filter CLDR data for emoji characters
if cldr_char in emoji.EMOJI_DATA:
emoji_annotations = cldr_dict[cldr_char]

for emoji_keyword in emoji_annotations["default"]:
# Use single-word annotations as keywords
if len(emoji_keyword.split()) == 1:
autosuggest_dict.setdefault(emoji_keyword, []).append(cldr_char)

print("Number of trigger keywords found:", len(autosuggest_dict))

###

Expand Down

0 comments on commit 0381755

Please sign in to comment.