From e2bb5ec1e640f0c902598701c77c66a4ea31656c Mon Sep 17 00:00:00 2001 From: zima Date: Wed, 23 Oct 2024 17:14:49 +0900 Subject: [PATCH] Fix binary detection for text files containing emoji --- src/index.ts | 14 ++++++++++---- test/fixtures/emoji.txt | 1 + test/index.test.ts | 6 ++++++ 3 files changed, 17 insertions(+), 4 deletions(-) create mode 100644 test/fixtures/emoji.txt diff --git a/src/index.ts b/src/index.ts index 36a8191..232b1dd 100644 --- a/src/index.ts +++ b/src/index.ts @@ -223,17 +223,23 @@ function isBinaryCheck(fileBuffer: Buffer, bytesRead: number): boolean { return true; } else if ((fileBuffer[i] < 7 || fileBuffer[i] > 14) && (fileBuffer[i] < 32 || fileBuffer[i] > 127)) { // UTF-8 detection - if (fileBuffer[i] > 193 && fileBuffer[i] < 224 && i + 1 < totalBytes) { + if (fileBuffer[i] >= 0xc0 && fileBuffer[i] <= 0xdf && i + 1 < totalBytes) { i++; - if (fileBuffer[i] > 127 && fileBuffer[i] < 192) { + if (fileBuffer[i] >= 0x80 && fileBuffer[i] <= 0xbf) { continue; } - } else if (fileBuffer[i] > 223 && fileBuffer[i] < 240 && i + 2 < totalBytes) { + } else if (fileBuffer[i] >= 0xe0 && fileBuffer[i] <= 0xef && i + 2 < totalBytes) { i++; - if (fileBuffer[i] > 127 && fileBuffer[i] < 192 && fileBuffer[i + 1] > 127 && fileBuffer[i + 1] < 192) { + if (fileBuffer[i] >= 0x80 && fileBuffer[i] <= 0xbf && fileBuffer[i + 1] >= 0x80 && fileBuffer[i + 1] <= 0xbf) { i++; continue; } + } else if (fileBuffer[i] >= 0xf0 && fileBuffer[i] <= 0xf7 && i + 3 < totalBytes) { + i++; + if (fileBuffer[i] >= 0x80 && fileBuffer[i] <= 0xbf && fileBuffer[i + 1] >= 0x80 && fileBuffer[i + 1] <= 0xbf && fileBuffer[i + 2] >= 0x80 && fileBuffer[i + 2] <= 0xbf) { + i += 2; + continue; + } } suspiciousBytes++; diff --git a/test/fixtures/emoji.txt b/test/fixtures/emoji.txt new file mode 100644 index 0000000..9eea5c2 --- /dev/null +++ b/test/fixtures/emoji.txt @@ -0,0 +1 @@ +UTF-8 emoji 📦 diff --git a/test/index.test.ts b/test/index.test.ts index 08c967a..a37a1fe 100644 --- a/test/index.test.ts +++ b/test/index.test.ts @@ -292,3 +292,9 @@ describe('sync', () => { }); }); }); + +it("should return false on a UTF-8 file with emoji", () => { + const file = path.join(FIXTURE_PATH, "emoji.txt"); + const result = isBinaryFileSync(file); + expect(result).toBe(false); +});