Skip to content

Commit

Permalink
add support for emoji/full unicode range in string (fix #2)
Browse files Browse the repository at this point in the history
  • Loading branch information
sunnyadn committed Nov 23, 2024
1 parent 5d30dc9 commit fca23f6
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 2 deletions.
5 changes: 3 additions & 2 deletions src/load/tokens/patterns.ts
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
import XRegExp from 'xregexp';

export const whiteSpaceChar = /[ \t]/;
// const nonAscii = /[\x80-\uD7FF]|[\uE000-\u{10FFFF}]/u;
export const nonAscii = /[\x80-\uD7FF]|[\uE000-\uFFFF]/;
// Handle full Unicode range including surrogate pairs for characters beyond BMP
export const nonAscii =
/[\x80-\uD7FF]|[\uE000-\uFFFF]|[\uD800-\uDBFF][\uDC00-\uDFFF]/;

export const newline = /\r\n|\n/;

Expand Down
39 changes: 39 additions & 0 deletions test/string.ts
Original file line number Diff line number Diff line change
Expand Up @@ -232,3 +232,42 @@ it('should throw error when meeting non-scalar \\U character in string', () => {

expect(() => load(input)).toThrow(SyntaxParseError);
});

it('should support simple emoji strings', () => {
const input = 'asdf = "🔖"';
const result = load(input);
expect(result).toEqual({ asdf: '🔖' });
});

it('should support comprehensive Unicode and emoji strings', () => {
const input = `# Mixed ASCII, Unicode and Emojis with Escape Sequences
mixed = "Hello 世界 🌍! \\u00A9 2024"
# Emojis with text modifiers and ZWJ sequences
skin_tone = "👋🏽 Hi there 👨🏾‍💻"
family = "👨‍👩‍👧‍👦 is my 👨‍👦 family"
# Mixing escape sequences with emojis
escaped_mix = "\\u0048\\u0069 🙋‍♂️ \\U0001F4BB"
# Unicode characters mixed with emojis
multilang = "Café ☕️ & Ramen 🍜 = 💖"
# Special characters and emojis
special = "🎵 La-la-la ♪ (⌐■_■) →★←"
# Stress test string
stress = "🏳️‍🌈 Hello\\t世界\\n☮️\\u0026\\u2764 Peace & Love ✌🏽 🌏"`;

const result = load(input);

expect(result).toEqual({
mixed: 'Hello 世界 🌍! © 2024',
skin_tone: '👋🏽 Hi there 👨🏾‍💻',
family: '👨‍👩‍👧‍👦 is my 👨‍👦 family',
escaped_mix: 'Hi 🙋‍♂️ 💻',
multilang: 'Café ☕️ & Ramen 🍜 = 💖',
special: '🎵 La-la-la ♪ (⌐■_■) →★←',
stress: '🏳️‍🌈 Hello\t世界\n☮️&❤ Peace & Love ✌🏽 🌏'
});
});

0 comments on commit fca23f6

Please sign in to comment.