From b47f0f1347f66d083d98ace05dbe338cd9714279 Mon Sep 17 00:00:00 2001 From: Bernhard Owen Josephus Date: Thu, 10 Oct 2024 12:11:57 +0800 Subject: [PATCH] fix nbsp is removed from string --- src/libs/StringUtils.ts | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/src/libs/StringUtils.ts b/src/libs/StringUtils.ts index b3fcd247284e..d13c38700e18 100644 --- a/src/libs/StringUtils.ts +++ b/src/libs/StringUtils.ts @@ -34,20 +34,19 @@ function removeInvisibleCharacters(value: string): string { // Remove spaces: // - \u200B: zero-width space - // - \u00A0: non-breaking space // - \u2060: word joiner - result = result.replace(/[\u200B\u00A0\u2060]/g, ''); - - // Temporarily replace all newlines with non-breaking spaces - // It is necessary because the next step removes all newlines because they are in the (Cc) category - result = result.replace(/\n/g, '\u00A0'); - - // Remove all characters from the 'Other' (C) category except for format characters (Cf) - // because some of them are used for emojis - result = result.replace(/[\p{Cc}\p{Cs}\p{Co}\p{Cn}]/gu, ''); - - // Replace all non-breaking spaces with newlines - result = result.replace(/\u00A0/g, '\n'); + result = result.replace(/[\u200B\u2060]/g, ''); + + // The control unicode (Cc) regex removes all newlines, + // so we first split the string by newline and rejoin it afterward to retain the original line breaks. + result = result + .split('\n') + .map((part) => + // Remove all characters from the 'Other' (C) category except for format characters (Cf) + // because some of them are used for emojis + part.replace(/[\p{Cc}\p{Cs}\p{Co}\p{Cn}]/gu, ''), + ) + .join('\n'); // Remove characters from the (Cf) category that are not used for emojis result = result.replace(/[\u200E-\u200F]/g, '');