From fca23f656c6eeea00b54b420a379fa3b2fd426dc Mon Sep 17 00:00:00 2001 From: Sunny Yang Date: Sun, 24 Nov 2024 06:38:03 +0800 Subject: [PATCH] add support for emoji/full unicode range in string (fix #2) --- src/load/tokens/patterns.ts | 5 +++-- test/string.ts | 39 +++++++++++++++++++++++++++++++++++++ 2 files changed, 42 insertions(+), 2 deletions(-) diff --git a/src/load/tokens/patterns.ts b/src/load/tokens/patterns.ts index 460d8f5..8f23a11 100644 --- a/src/load/tokens/patterns.ts +++ b/src/load/tokens/patterns.ts @@ -1,8 +1,9 @@ import XRegExp from 'xregexp'; export const whiteSpaceChar = /[ \t]/; -// const nonAscii = /[\x80-\uD7FF]|[\uE000-\u{10FFFF}]/u; -export const nonAscii = /[\x80-\uD7FF]|[\uE000-\uFFFF]/; +// Handle full Unicode range including surrogate pairs for characters beyond BMP +export const nonAscii = + /[\x80-\uD7FF]|[\uE000-\uFFFF]|[\uD800-\uDBFF][\uDC00-\uDFFF]/; export const newline = /\r\n|\n/; diff --git a/test/string.ts b/test/string.ts index 2453cbe..2c5d5db 100644 --- a/test/string.ts +++ b/test/string.ts @@ -232,3 +232,42 @@ it('should throw error when meeting non-scalar \\U character in string', () => { expect(() => load(input)).toThrow(SyntaxParseError); }); + +it('should support simple emoji strings', () => { + const input = 'asdf = "๐Ÿ”–"'; + const result = load(input); + expect(result).toEqual({ asdf: '๐Ÿ”–' }); +}); + +it('should support comprehensive Unicode and emoji strings', () => { + const input = `# Mixed ASCII, Unicode and Emojis with Escape Sequences +mixed = "Hello ไธ–็•Œ ๐ŸŒ! \\u00A9 2024" + +# Emojis with text modifiers and ZWJ sequences +skin_tone = "๐Ÿ‘‹๐Ÿฝ Hi there ๐Ÿ‘จ๐Ÿพโ€๐Ÿ’ป" +family = "๐Ÿ‘จโ€๐Ÿ‘ฉโ€๐Ÿ‘งโ€๐Ÿ‘ฆ is my ๐Ÿ‘จโ€๐Ÿ‘ฆ family" + +# Mixing escape sequences with emojis +escaped_mix = "\\u0048\\u0069 ๐Ÿ™‹โ€โ™‚๏ธ \\U0001F4BB" + +# Unicode characters mixed with emojis +multilang = "Cafรฉ โ˜•๏ธ & Ramen ๐Ÿœ = ๐Ÿ’–" + +# Special characters and emojis +special = "๐ŸŽต La-la-la โ™ช (โŒโ– _โ– ) โ†’โ˜…โ†" + +# Stress test string +stress = "๐Ÿณ๏ธโ€๐ŸŒˆ Hello\\tไธ–็•Œ\\nโ˜ฎ๏ธ\\u0026\\u2764 Peace & Love โœŒ๐Ÿฝ ๐ŸŒ"`; + + const result = load(input); + + expect(result).toEqual({ + mixed: 'Hello ไธ–็•Œ ๐ŸŒ! ยฉ 2024', + skin_tone: '๐Ÿ‘‹๐Ÿฝ Hi there ๐Ÿ‘จ๐Ÿพโ€๐Ÿ’ป', + family: '๐Ÿ‘จโ€๐Ÿ‘ฉโ€๐Ÿ‘งโ€๐Ÿ‘ฆ is my ๐Ÿ‘จโ€๐Ÿ‘ฆ family', + escaped_mix: 'Hi ๐Ÿ™‹โ€โ™‚๏ธ ๐Ÿ’ป', + multilang: 'Cafรฉ โ˜•๏ธ & Ramen ๐Ÿœ = ๐Ÿ’–', + special: '๐ŸŽต La-la-la โ™ช (โŒโ– _โ– ) โ†’โ˜…โ†', + stress: '๐Ÿณ๏ธโ€๐ŸŒˆ Hello\tไธ–็•Œ\nโ˜ฎ๏ธ&โค Peace & Love โœŒ๐Ÿฝ ๐ŸŒ' + }); +});