From 40b23d6aa4cb6ad465c4ec1c9c2b55bd2ebe3cc9 Mon Sep 17 00:00:00 2001 From: David Baker Date: Tue, 7 Jan 2020 18:48:55 +0000 Subject: [PATCH 1/3] Strip all variation selectors on emoji ...when inserting into or looking up in the unicode to emoji map. This broke with emojibase 4.2.0 which changed the type of a whole load of emojis to 'text' when previously they were 'emoji'. This caused them to get the 'text' variant of the unicode string which has the text variation selector (15) appended instead of the emoji variation selector (16). We were only stripping the emoji selector, so upgrading to 4.2.0 caused riot to fail to find the heart in the unicode map, which therefore prevented the app from starting. --- src/emoji.js | 39 ++++++++++++++++++++++++++++----------- 1 file changed, 28 insertions(+), 11 deletions(-) diff --git a/src/emoji.js b/src/emoji.js index 7b7a9c1bfeb..8b3c4c9fe48 100644 --- a/src/emoji.js +++ b/src/emoji.js @@ -16,14 +16,12 @@ limitations under the License. import EMOJIBASE from 'emojibase-data/en/compact.json'; -export const VARIATION_SELECTOR = String.fromCharCode(0xFE0F); - // The unicode is stored without the variant selector const UNICODE_TO_EMOJI = new Map(); // not exported as gets for it are handled by getEmojiFromUnicode export const EMOTICON_TO_EMOJI = new Map(); export const SHORTCODE_TO_EMOJI = new Map(); -export const getEmojiFromUnicode = unicode => UNICODE_TO_EMOJI.get(unicode.replace(VARIATION_SELECTOR, "")); +export const getEmojiFromUnicode = unicode => UNICODE_TO_EMOJI.get(stripVariation(unicode)); const EMOJIBASE_GROUP_ID_TO_CATEGORY = [ "people", // smileys @@ -51,13 +49,6 @@ export const DATA_BY_CATEGORY = { // Store various mappings from unicode/emoticon/shortcode to the Emoji objects EMOJIBASE.forEach(emoji => { - if (emoji.unicode.includes(VARIATION_SELECTOR)) { - // Clone data into variation-less version - emoji = Object.assign({}, emoji, { - unicode: emoji.unicode.replace(VARIATION_SELECTOR, ""), - }); - } - const categoryId = EMOJIBASE_GROUP_ID_TO_CATEGORY[emoji.group]; if (DATA_BY_CATEGORY.hasOwnProperty(categoryId)) { DATA_BY_CATEGORY[categoryId].push(emoji); @@ -66,7 +57,13 @@ EMOJIBASE.forEach(emoji => { emoji.filterString = `${emoji.annotation}\n${emoji.shortcodes.join('\n')}}\n${emoji.emoticon || ''}`.toLowerCase(); // Add mapping from unicode to Emoji object - UNICODE_TO_EMOJI.set(emoji.unicode, emoji); + // The 'unicode' field that we use in emojibase has either + // VS15 or VS16 appended to any characters that can take + // variation selectors. Which one it appends depends + // on whether emojibase considers their type to be 'text' or + // 'emoji'. We therefore strip any variation chars from strings + // both when building the map and when looking up. + UNICODE_TO_EMOJI.set(stripVariation(emoji.unicode), emoji); if (emoji.emoticon) { // Add mapping from emoticon to Emoji object @@ -80,3 +77,23 @@ EMOJIBASE.forEach(emoji => { }); } }); + +/** + * Strips variation selectors from a string + * NB. Skin tone modifers are not variation selectors: + * this function does not touch them. (Should it?) + * + * @param {string} str string to strip + * @returns {string} stripped string + */ +function stripVariation(str) { + let ret = ''; + for (let i = 0; i < str.length; ++i) { + const charCode = str.charCodeAt(i); + // append to output only if it's outside the variation selector range + if (charCode < 0xFE00 && charCode > 0xFE0F) { + ret += str.charAt(i); + } + } + return ret; +} From 2d410c91acdc41fa7bcd985787429e40f8188055 Mon Sep 17 00:00:00 2001 From: David Baker Date: Tue, 7 Jan 2020 19:57:17 +0000 Subject: [PATCH 2/3] Use a regex because it's simpler and works and my loop did not because I meant 'or', not 'and' --- src/emoji.js | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/src/emoji.js b/src/emoji.js index 8b3c4c9fe48..d62630ae088 100644 --- a/src/emoji.js +++ b/src/emoji.js @@ -87,13 +87,5 @@ EMOJIBASE.forEach(emoji => { * @returns {string} stripped string */ function stripVariation(str) { - let ret = ''; - for (let i = 0; i < str.length; ++i) { - const charCode = str.charCodeAt(i); - // append to output only if it's outside the variation selector range - if (charCode < 0xFE00 && charCode > 0xFE0F) { - ret += str.charAt(i); - } - } - return ret; + return str.replace("\uFE00-\uFE0F", ""); } From 45ef57d86f9d2a5523468c2a8c7ee87fd158bcaa Mon Sep 17 00:00:00 2001 From: David Baker Date: Tue, 7 Jan 2020 20:41:19 +0000 Subject: [PATCH 3/3] Use a regex that actually works --- src/emoji.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/emoji.js b/src/emoji.js index d62630ae088..125864e381d 100644 --- a/src/emoji.js +++ b/src/emoji.js @@ -87,5 +87,5 @@ EMOJIBASE.forEach(emoji => { * @returns {string} stripped string */ function stripVariation(str) { - return str.replace("\uFE00-\uFE0F", ""); + return str.replace(/[\uFE00-\uFE0F]/, ""); }