From 8b3228acd25cead2b68572c08976d2c66e015ec0 Mon Sep 17 00:00:00 2001 From: Robin Date: Wed, 5 Jun 2024 16:19:22 -0400 Subject: [PATCH] Don't consider textual characters to be emoji MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We were using emojibase-regex to match emoji within messages. However, the docs (https://emojibase.dev/docs/regex/) state that this regex matches both emoji and text presentation characters. This is not what we want, and will result in false positives for characters like '↔' that could turn into an emoji if paired with a variation selector. The emojibase-regex/emoji regex from the same package does what we want. --- src/HtmlUtils.tsx | 6 ++-- .../views/rooms/SendMessageComposer.tsx | 2 +- src/editor/parts.ts | 2 +- test/HtmlUtils-test.tsx | 12 +++++++ test/__snapshots__/HtmlUtils-test.tsx.snap | 34 +++++++++++++++++++ 5 files changed, 51 insertions(+), 5 deletions(-) diff --git a/src/HtmlUtils.tsx b/src/HtmlUtils.tsx index b63ed1dcf0c..379b2614661 100644 --- a/src/HtmlUtils.tsx +++ b/src/HtmlUtils.tsx @@ -20,7 +20,7 @@ limitations under the License. import React, { LegacyRef, ReactNode } from "react"; import sanitizeHtml from "sanitize-html"; import classNames from "classnames"; -import EMOJIBASE_REGEX from "emojibase-regex"; +import EMOJIBASE_REGEX from "emojibase-regex/emoji"; import katex from "katex"; import { decode } from "html-entities"; import { IContent } from "matrix-js-sdk/src/matrix"; @@ -46,8 +46,8 @@ const SURROGATE_PAIR_PATTERN = /([\ud800-\udbff])([\udc00-\udfff])/; const SYMBOL_PATTERN = /([\u2100-\u2bff])/; // Regex pattern for non-emoji characters that can appear in an "all-emoji" message -// (Zero-Width Joiner, Zero-Width Space, Emoji presentation character, other whitespace) -const EMOJI_SEPARATOR_REGEX = /[\u200D\u200B\s]|\uFE0F/g; +// (Zero-Width Space, other whitespace) +const EMOJI_SEPARATOR_REGEX = /[\u200B\s]/g; const BIGEMOJI_REGEX = new RegExp(`^(${EMOJIBASE_REGEX.source})+$`, "i"); diff --git a/src/components/views/rooms/SendMessageComposer.tsx b/src/components/views/rooms/SendMessageComposer.tsx index 0ea0bdf94c1..4e78942da61 100644 --- a/src/components/views/rooms/SendMessageComposer.tsx +++ b/src/components/views/rooms/SendMessageComposer.tsx @@ -15,7 +15,7 @@ limitations under the License. */ import React, { createRef, KeyboardEvent, SyntheticEvent } from "react"; -import EMOJI_REGEX from "emojibase-regex"; +import EMOJI_REGEX from "emojibase-regex/emoji"; import { IContent, MatrixEvent, diff --git a/src/editor/parts.ts b/src/editor/parts.ts index 12262280ae0..697d4223813 100644 --- a/src/editor/parts.ts +++ b/src/editor/parts.ts @@ -15,7 +15,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -import EMOJIBASE_REGEX from "emojibase-regex"; +import EMOJIBASE_REGEX from "emojibase-regex/emoji"; import { MatrixClient, RoomMember, Room } from "matrix-js-sdk/src/matrix"; import GraphemeSplitter from "graphemer"; diff --git a/test/HtmlUtils-test.tsx b/test/HtmlUtils-test.tsx index d9e75faaa99..3701e13e8d9 100644 --- a/test/HtmlUtils-test.tsx +++ b/test/HtmlUtils-test.tsx @@ -107,6 +107,12 @@ describe("bodyToHtml", () => { expect(html).toMatchInlineSnapshot(`"test foo <b>bar"`); }); + it("generates big emoji for emoji made of multiple characters", () => { + const { asFragment } = render(bodyToHtml({ body: "πŸ‘¨β€πŸ‘©β€πŸ‘§β€πŸ‘¦ ↔️", msgtype: "m.text" }, [], {}) as ReactElement); + + expect(asFragment()).toMatchSnapshot(); + }); + it("should generate big emoji for an emoji-only reply to a message", () => { const { asFragment } = render( bodyToHtml( @@ -132,6 +138,12 @@ describe("bodyToHtml", () => { expect(asFragment()).toMatchSnapshot(); }); + it("does not mistake characters in text presentation mode for emoji", () => { + const { asFragment } = render(bodyToHtml({ body: "↔", msgtype: "m.text" }, [], {}) as ReactElement); + + expect(asFragment()).toMatchSnapshot(); + }); + describe("feature_latex_maths", () => { beforeEach(() => { jest.spyOn(SettingsStore, "getValue").mockImplementation((feature) => feature === "feature_latex_maths"); diff --git a/test/__snapshots__/HtmlUtils-test.tsx.snap b/test/__snapshots__/HtmlUtils-test.tsx.snap index c33cc46433d..03de209307f 100644 --- a/test/__snapshots__/HtmlUtils-test.tsx.snap +++ b/test/__snapshots__/HtmlUtils-test.tsx.snap @@ -1,5 +1,16 @@ // Jest Snapshot v1, https://goo.gl/fbAQLP +exports[`bodyToHtml does not mistake characters in text presentation mode for emoji 1`] = ` + + + ↔ + + +`; + exports[`bodyToHtml feature_latex_maths should not mangle code blocks 1`] = `"

hello

$\\xi$

world

"`; exports[`bodyToHtml feature_latex_maths should not mangle divs 1`] = `"

hello

world
"`; @@ -8,6 +19,29 @@ exports[`bodyToHtml feature_latex_maths should render block katex 1`] = `"

hel exports[`bodyToHtml feature_latex_maths should render inline katex 1`] = `"hello ΞΎ\\xi world"`; +exports[`bodyToHtml generates big emoji for emoji made of multiple characters 1`] = ` + + + + πŸ‘¨β€πŸ‘©β€πŸ‘§β€πŸ‘¦ + + + + ↔️ + + + +`; + exports[`bodyToHtml should generate big emoji for an emoji-only reply to a message 1`] = `