From 5d3030b9d9c1daf87d875819ad1bb0c2f4fbc5d4 Mon Sep 17 00:00:00 2001 From: Bruno Rocha Date: Fri, 13 Oct 2023 15:02:05 -0300 Subject: [PATCH] refactor: strLength support more edge cases --- packages/core/src/utils.ts | 66 +++++++++++++++++++++++++------------- 1 file changed, 44 insertions(+), 22 deletions(-) diff --git a/packages/core/src/utils.ts b/packages/core/src/utils.ts index d3fd7c58..383d6051 100644 --- a/packages/core/src/utils.ts +++ b/packages/core/src/utils.ts @@ -48,31 +48,53 @@ export function block({ }; } -export function strLength(input: string) { +function ansiRegex(): RegExp { + const pattern = [ + '[\\u001B\\u009B][[\\]()#;?]*(?:(?:(?:[a-zA-Z\\d]*(?:;[-a-zA-Z\\d\\/#&.:=?%@~_]*)*)?\\u0007)', + '(?:(?:\\d{1,4}(?:;\\d{0,4})*)?[\\dA-PR-TZcf-ntqry=><~]))', + ].join('|'); + + return new RegExp(pattern, 'g'); +} + +function stripAnsi(str: string): string { + return str.replace(ansiRegex(), ''); +} + +function isControlCharacter(code: number): boolean { + return code <= 0x1f || (code >= 0x7f && code <= 0x9f); +} + +function isCombiningCharacter(code: number): boolean { + return code >= 0x300 && code <= 0x36f; +} + +function isSurrogatePair(code: number): boolean { + return code >= 0xd800 && code <= 0xdbff; +} + +export function strLength(str: string): number { + if (str === '') { + return 0; + } + + // Remove ANSI escape codes from the input string. + str = stripAnsi(str); + let length = 0; - let i = 0; - - while (i < input.length) { - if (input[i] === '\u001b') { - // Check for escape character (ANSI escape code) - const endIndex = input.indexOf('m', i + 1); // Find the end of ANSI code - if (endIndex === -1) { - i++; // Skip the escape character and continue - continue; - } else { - i = endIndex + 1; - continue; - } + + for (let i = 0; i < str.length; i++) { + const code = str.codePointAt(i); + + if (!code || isControlCharacter(code) || isCombiningCharacter(code)) { + continue; } - // Handle other control codes or regular characters - const code = input.charCodeAt(i); - - if (code >= 0xd800 && code <= 0xdbff) { - i += 2; - } else { - length++; - i++; + + if (isSurrogatePair(code)) { + i++; // Skip the next code unit. } + + length++; } return length;