From 11398e0e11eb5fad5fd77f0b3044e3454fe086c1 Mon Sep 17 00:00:00 2001 From: Martin Tournoij Date: Wed, 11 Sep 2024 00:56:03 +0100 Subject: [PATCH] Decode XML entities in CLDR data --- README.md | 70 +++++++++++++++++++++---------------------- unidata/gen/emojis.go | 8 +++-- unidata/gen_emojis.go | 6 ++-- 3 files changed, 44 insertions(+), 40 deletions(-) diff --git a/README.md b/README.md index 2ad9810..ccfff66 100644 --- a/README.md +++ b/README.md @@ -233,11 +233,11 @@ The `emoji` command (shortcut: `e`) is is the real reason I wrote this: % uni e cry Name CLDR - 🥹 face holding back tears [admiration, angry, aw, aww, cry, embarrassed, feelings, grateful, gratitude, please, proud, resist, sad, sadness, tears of joy] + 🥹 face holding back tears [admiration, aww, cry, embarrassed, feelings, grateful, gratitude, joy, please, proud, resist, sad] 😢 crying face [awful, feels, miss, sad, tear, triste, unhappy] 😭 loudly crying face [bawling, sad, sob, tear, tears, unhappy] - 😿 crying cat [animal, crying cat face, face, sad, tear] - 🔮 crystal ball [fairy tale, fairytale, fantasy, fortune, future, magic, tool] + 😿 crying cat [animal, face, sad, tear] + 🔮 crystal ball [fairy, fairytale, fantasy, fortune, future, magic, tale, tool] By default both the name and CLDR data are searched; the CLDR data is a list of keywords for an emoji; prefix with `name:` or `n:` to search on the name only: @@ -245,12 +245,12 @@ keywords for an emoji; prefix with `name:` or `n:` to search on the name only: % uni e smile Name CLDR 😀 grinning face [cheerful, cheery, happy, laugh, nice, smile, smiling, teeth] - 😃 grinning face with big eyes [awesome, happy, mouth, open, smile, smiling, smiling face with open mouth, teeth, yay] + 😃 grinning face with big eyes [awesome, happy, mouth, open, smile, smiling, teeth, yay] … % uni e name:smile Name CLDR - 😼 cat with wry smile [animal, cat face with wry smile, face, ironic] + 😼 cat with wry smile [animal, face, ironic] As you can see, the CLDR is pretty useful, as "smile" only gives one result as most emojis use "smiling". @@ -259,29 +259,29 @@ Prefix with `group:` to search by group: % uni e group:hands Name CLDR - 👏 clapping hands [applause, approval, awesome, congrats, congratulations, excited, good job, great, homie, nice, prayed, well done, yay] + 👏 clapping hands [applause, approval, awesome, congrats, congratulations, excited, good, great, homie, job, nice, prayed, well, yay] 🙌 raising hands [celebration, gesture, hooray, praise, raised] - 🫶 heart hands [<3, love, love you] - 👐 open hands [hug, jazz hands, swerve] - 🤲 palms up together [cupped hands, dua, pray, prayer, wish] + 🫶 heart hands [<3, love, you] + 👐 open hands [hug, jazz, swerve] + 🤲 palms up together [cupped, dua, hands, pray, prayer, wish] 🤝 handshake [agreement, deal, meeting] - 🙏 folded hands [appreciate, ask, beg, blessed, bow, cmon, five, gesture, high 5, high five, please, pray, thank, thank you, thanks, thx] + 🙏 folded hands [appreciate, ask, beg, blessed, bow, cmon, five, gesture, high, please, pray, thanks, thx] Group and search can be combined, and `group:` can be abbreviated to `g:`: % uni e g:cat-face grin Name CLDR - 😺 grinning cat [animal, face, mouth, open, smile, smiling cat face with open mouth] - 😸 grinning cat with smiling eyes [animal, face, grinning cat face with smiling eyes, smile] + 😺 grinning cat [animal, face, mouth, open, smile, smiling] + 😸 grinning cat with smiling eyes [animal, face, smile] Like with `search`, use `-or` to OR the parameters together instead of AND: % uni e -or g:face-glasses g:face-hat Name CLDR 🤠 cowboy hat face [cowgirl] - 🥳 partying face [birthday, celebrate, celebration, excited, happy bday, happy birthday, hat, hooray, horn] + 🥳 partying face [bday, birthday, celebrate, celebration, excited, happy, hat, hooray, horn] 🥸 disguised face [eyebrow, glasses, incognito, moustache, mustache, nose, person, spy, tache, tash] - 😎 smiling face with sunglasses [awesome, beach, bright, bro, chillin, cool, eye, eyewear, fly, rad, relaxed, shades, slay, smile, stunner, style, swag, swagger, win, winning, yeah] + 😎 smiling face with sunglasses [awesome, beach, bright, bro, chilling, cool, rad, relaxed, shades, slay, smile, style, swag, win] 🤓 nerd face [brainy, clever, expert, geek, gifted, glasses, intelligent, smart] 🧐 face with monocle [classy, fancy, rich, stuffy, wealthy] @@ -289,13 +289,13 @@ Apply skin tone modifiers with `-tone`: % uni e -tone dark g:hands Name CLDR - 👏🏿 clapping hands: dark skin tone [applause, approval, awesome, congrats, congratulations, excited, good job, great, homie, nice, prayed, well done, yay] + 👏🏿 clapping hands: dark skin tone [applause, approval, awesome, congrats, congratulations, excited, good, great, homie, job, nice, prayed, well, yay] 🙌🏿 raising hands: dark skin tone [celebration, gesture, hooray, praise, raised] - 🫶🏿 heart hands: dark skin tone [<3, love, love you] - 👐🏿 open hands: dark skin tone [hug, jazz hands, swerve] - 🤲🏿 palms up together: dark skin tone [cupped hands, dua, pray, prayer, wish] + 🫶🏿 heart hands: dark skin tone [<3, love, you] + 👐🏿 open hands: dark skin tone [hug, jazz, swerve] + 🤲🏿 palms up together: dark skin tone [cupped, dua, hands, pray, prayer, wish] 🤝🏿 handshake: dark skin tone [agreement, deal, meeting] - 🙏🏿 folded hands: dark skin tone [appreciate, ask, beg, blessed, bow, cmon, five, gesture, high 5, high five, please, pray, thank, thank you, thanks, thx] + 🙏🏿 folded hands: dark skin tone [appreciate, ask, beg, blessed, bow, cmon, five, gesture, high, please, pray, thanks, thx] The handshake emoji supports setting individual skin tones per hand since Unicode 14, but this isn't supported, mostly because I can't really really think @@ -308,16 +308,16 @@ changed with the `-gender` option: % uni e -gender man g:person-gesture Name CLDR - 🙍‍♂️ man frowning [annoyed, disappoint, disgruntled, disturbed, frustrated, gesture, irritated, not happy, person frowning, upset, woman frowning] - 🙎‍♂️ man pouting [disappoint, downtrodden, frown, gesture, grimace, person pouting, scowl, sulk, upset, whine, woman pouting] - 🙅‍♂️ man gesturing NO [exclude, forbidden, gesture, hand, no, nope, not, not a chance, person gesturing NO, prohibit, prohibited, woman gesturing NO] - 🙆‍♂️ man gesturing OK [exercise, gesture, hand, omg, person gesturing OK, woman gesturing OK] - 💁‍♂️ man tipping hand [fetch, gossip, hair flick, hair flip, help, information, person tipping hand, sarcasm, sarcastic, sassy, seriously, whatever, woman tipping hand] - 🙋‍♂️ man raising hand [gesture, hands, happy, I can help, i know, me, over here, person raising hand, pick me, question, raised, right here, woman raising hand] - 🧏‍♂️ deaf man [accessibility, deaf person, ear, hear] - 🙇‍♂️ man bowing [apology, beg, forgive, gesture, meditate, meditation, person bowing, pity, regret, sorry] - 🤦‍♂️ man facepalming [disbelief, exasperation, not again, oh no, omg, person, person facepalming, shock, smh] - 🤷‍♂️ man shrugging [doubt, dunno, i dunno, I guess, idk, ignorance, indifference, maybe, person, person shrugging, whatever, who knows] + 🙍‍♂️ man frowning [annoyed, disappointed, disgruntled, disturbed, frustrated, gesture, irritated, person, upset] + 🙎‍♂️ man pouting [disappointed, downtrodden, frown, grimace, person, scowl, sulk, upset, whine] + 🙅‍♂️ man gesturing NO [forbidden, gesture, hand, not, person, prohibit] + 🙆‍♂️ man gesturing OK [exercise, gesture, hand, omg, person] + 💁‍♂️ man tipping hand [fetch, flick, flip, gossip, person, sarcasm, sarcastic, sassy, seriously, whatever] + 🙋‍♂️ man raising hand [gesture, here, know, me, person, pick, question, raise] + 🧏‍♂️ deaf man [accessibility, ear, gesture, hear, person] + 🙇‍♂️ man bowing [apology, ask, beg, favor, forgive, gesture, meditate, meditation, person, pity, regret, sorry] + 🤦‍♂️ man facepalming [again, bewilder, disbelief, exasperation, no, not, oh, omg, person, shock, smh] + 🤷‍♂️ man shrugging [doubt, dunno, guess, idk, ignorance, indifference, knows, maybe, person, whatever, who] Both `-tone` and `-gender` accept multiple values. `-gender women,man` will display both the female and male variants, and `-tone light,dark` will display @@ -325,10 +325,10 @@ both a light and dark skin tone; use `all` to display all skin tones or genders: % uni e -tone light,dark -gender f,m shrug Name CLDR - 🤷🏻‍♂️ man shrugging: light skin tone [doubt, dunno, i dunno, I guess, idk, ignorance, indifference, maybe, person, person shrugging, whatever, who knows] - 🤷🏻‍♀️ woman shrugging: light skin tone [doubt, dunno, i dunno, I guess, idk, ignorance, indifference, maybe, person, person shrugging, whatever, who knows] - 🤷🏿‍♂️ man shrugging: dark skin tone [doubt, dunno, i dunno, I guess, idk, ignorance, indifference, maybe, person, person shrugging, whatever, who knows] - 🤷🏿‍♀️ woman shrugging: dark skin tone [doubt, dunno, i dunno, I guess, idk, ignorance, indifference, maybe, person, person shrugging, whatever, who knows] + 🤷🏻‍♂️ man shrugging: light skin tone [doubt, dunno, guess, idk, ignorance, indifference, knows, maybe, person, whatever, who] + 🤷🏻‍♀️ woman shrugging: light skin tone [doubt, dunno, guess, idk, ignorance, indifference, knows, maybe, person, whatever, who] + 🤷🏿‍♂️ man shrugging: dark skin tone [doubt, dunno, guess, idk, ignorance, indifference, knows, maybe, person, whatever, who] + 🤷🏿‍♀️ woman shrugging: dark skin tone [doubt, dunno, guess, idk, ignorance, indifference, knows, maybe, person, whatever, who] Like `print` and `identify`, you can use `-format`: @@ -464,8 +464,8 @@ This also works for the `emoji` command: % uni e -as json -f all 'kissing cat' [{ - "cldr": "animal, eye, face, kissing cat face with closed eyes", - "cldr_full": "animal, cat, eye, face, kiss, kissing cat, kissing cat face with closed eyes", + "cldr": "animal, closed, eye, eyes, face", + "cldr_full": "animal, cat, closed, eye, eyes, face, kiss, kissing", "cpoint": "U+1F63D", "emoji": "😽", "group": "Smileys & Emotion", diff --git a/unidata/gen/emojis.go b/unidata/gen/emojis.go index 0b38050..4d86474 100644 --- a/unidata/gen/emojis.go +++ b/unidata/gen/emojis.go @@ -49,11 +49,15 @@ func readCLDR(f string) map[string][]string { } zli.F(xml.Unmarshal(d, &cldr)) - out := make(map[string][]string) + var ( + // "Good enough" XML entity removal. + tr = strings.NewReplacer("<", "<", ">", ">", "&", "&") + out = make(map[string][]string) + ) for _, a := range cldr.Annotations { if a.Type != "tts" { a.CP = strings.ReplaceAll(a.CP, "\u200d", "") - out[a.CP] = strings.Split(a.Names, " | ") + out[a.CP] = strings.Split(tr.Replace(a.Names), " | ") } } return out diff --git a/unidata/gen_emojis.go b/unidata/gen_emojis.go index ca69390..4a6b88d 100644 --- a/unidata/gen_emojis.go +++ b/unidata/gen_emojis.go @@ -453,7 +453,7 @@ var Emojis = []Emoji{ {[]rune{0x1f90f}, "pinching hand", 1, 17, []string{"amount", "bit", "fingers", "hand", "little", "pinching", "small", "sort"}, true, 0}, {[]rune{0x270c, 0xfe0f}, "victory hand", 1, 17, []string{"hand", "peace", "v", "victory"}, true, 0}, {[]rune{0x1f91e}, "crossed fingers", 1, 17, []string{"cross", "crossed", "finger", "fingers", "hand", "luck"}, true, 0}, - {[]rune{0x1faf0}, "hand with index finger and thumb crossed", 1, 17, []string{"<3", "crossed", "expensive", "finger", "hand", "heart", "index", "love", "money", "snap", "thumb"}, true, 0}, + {[]rune{0x1faf0}, "hand with index finger and thumb crossed", 1, 17, []string{"<3", "crossed", "expensive", "finger", "hand", "heart", "index", "love", "money", "snap", "thumb"}, true, 0}, {[]rune{0x1f91f}, "love-you gesture", 1, 17, []string{"fingers", "gesture", "hand", "ILY", "love", "love-you", "three", "you"}, true, 0}, {[]rune{0x1f918}, "sign of the horns", 1, 17, []string{"finger", "hand", "horns", "rock-on", "sign"}, true, 0}, {[]rune{0x1f919}, "call me hand", 1, 17, []string{"call", "hand", "hang", "loose", "me", "Shaka"}, true, 0}, @@ -472,7 +472,7 @@ var Emojis = []Emoji{ {[]rune{0x1f91c}, "right-facing fist", 1, 19, []string{"fist", "right-facing", "rightwards"}, true, 0}, {[]rune{0x1f44f}, "clapping hands", 1, 20, []string{"applause", "approval", "awesome", "clap", "congrats", "congratulations", "excited", "good", "great", "hand", "homie", "job", "nice", "prayed", "well", "yay"}, true, 0}, {[]rune{0x1f64c}, "raising hands", 1, 20, []string{"celebration", "gesture", "hand", "hands", "hooray", "praise", "raised", "raising"}, true, 0}, - {[]rune{0x1faf6}, "heart hands", 1, 20, []string{"<3", "hands", "heart", "love", "you"}, true, 0}, + {[]rune{0x1faf6}, "heart hands", 1, 20, []string{"<3", "hands", "heart", "love", "you"}, true, 0}, {[]rune{0x1f450}, "open hands", 1, 20, []string{"hand", "hands", "hug", "jazz", "open", "swerve"}, true, 0}, {[]rune{0x1f932}, "palms up together", 1, 20, []string{"cupped", "dua", "hands", "palms", "pray", "prayer", "together", "up", "wish"}, true, 0}, {[]rune{0x1f91d}, "handshake", 1, 20, []string{"agreement", "deal", "hand", "handshake", "meeting", "shake"}, true, 0}, @@ -1637,7 +1637,7 @@ var Emojis = []Emoji{ {[]rune{0x1f520}, "input latin uppercase", 8, 95, []string{"ABCD", "input", "latin", "letters", "uppercase"}, false, 0}, {[]rune{0x1f521}, "input latin lowercase", 8, 95, []string{"abcd", "input", "latin", "letters", "lowercase"}, false, 0}, {[]rune{0x1f522}, "input numbers", 8, 95, []string{"1234", "input", "numbers"}, false, 0}, - {[]rune{0x1f523}, "input symbols", 8, 95, []string{"&", "%", "♪", "〒", "input", "symbols"}, false, 0}, + {[]rune{0x1f523}, "input symbols", 8, 95, []string{"&", "%", "♪", "〒", "input", "symbols"}, false, 0}, {[]rune{0x1f524}, "input latin letters", 8, 95, []string{"abc", "alphabet", "input", "latin", "letters"}, false, 0}, {[]rune{0x1f170, 0xfe0f}, "A button (blood type)", 8, 95, []string{"blood", "button", "type"}, false, 0}, {[]rune{0x1f18e}, "AB button (blood type)", 8, 95, []string{"AB", "blood", "button", "type"}, false, 0},