Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

improve Bun.stringWidth's algorithm #9022

Merged
merged 17 commits into from
Feb 23, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions packages/bun-types/test.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -731,6 +731,8 @@ declare module "bun:test" {
}

export interface MatchersBuiltin<T = unknown> {
[key: string]: any;
nektro marked this conversation as resolved.
Show resolved Hide resolved

/**
* Negates the result of a subsequent assertion.
* If you know how to test something, `.not` lets you test its opposite.
Expand All @@ -743,7 +745,7 @@ declare module "bun:test" {
* expect(42).toEqual(42); // will pass
* expect(42).not.toEqual(42); // will fail
*/
not: Matchers<unknown>;
not: Matchers<T>;

/**
* Expects the value to be a promise that resolves.
Expand All @@ -759,7 +761,7 @@ declare module "bun:test" {
* @example
* expect(Promise.reject("error")).rejects.toBe("error");
*/
rejects: Matchers<unknown>;
rejects: Matchers<T>;

/**
* Assertion which passes.
Expand Down
3 changes: 3 additions & 0 deletions src/bun.js/bindings/BunObject.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -675,6 +675,9 @@ class JSBunObject : public JSC::JSNonFinalObject {
auto structure = createStructure(vm, globalObject, globalObject->objectPrototype());
auto* object = new (NotNull, JSC::allocateCell<JSBunObject>(vm)) JSBunObject(vm, structure);
object->finishCreation(vm);
#ifndef NDEBUG // move this to table above when all the tests in test/js/bun/util/stringWidth.test.ts pass
nektro marked this conversation as resolved.
Show resolved Hide resolved
object->putDirectNativeFunction(vm, globalObject, Identifier::fromString(vm, "stringWidth"_s), 2, BunObject_callback_stringWidth, ImplementationVisibility::Public, NoIntrinsic, JSC::PropertyAttribute::DontDelete | JSC::PropertyAttribute::Function | 0);
#endif
return object;
}
};
Expand Down
45 changes: 26 additions & 19 deletions src/string_immutable.zig
Original file line number Diff line number Diff line change
Expand Up @@ -4404,7 +4404,6 @@ pub fn firstNonASCII16CheckMin(comptime Slice: type, slice: Slice, comptime chec
if (char > 127 or char < 0x20) {
return @as(u32, @truncate(i));
}

i += 1;
}
} else {
Expand All @@ -4413,7 +4412,6 @@ pub fn firstNonASCII16CheckMin(comptime Slice: type, slice: Slice, comptime chec
if (char > 127) {
return @as(u32, @truncate(i));
}

i += 1;
}
}
Expand Down Expand Up @@ -5799,11 +5797,11 @@ pub fn isFullWidthCodepointType(comptime T: type, cp: T) bool {
};
}

pub fn visibleCodepointWidth(cp: anytype) u3 {
return visibleCodepointWidthType(@TypeOf(cp), cp);
pub fn visibleCodepointWidth(cp: u21) u3 {
nektro marked this conversation as resolved.
Show resolved Hide resolved
return visibleCodepointWidthType(u21, cp);
}

pub fn visibleCodepointWidthType(comptime T: type, cp: T) usize {
pub fn visibleCodepointWidthType(comptime T: type, cp: T) u3 {
if (isZeroWidthCodepointType(T, cp)) {
return 0;
}
Expand Down Expand Up @@ -5905,20 +5903,29 @@ pub const visible = struct {
return len;
}

fn visibleUTF16WidthFn(input: []const u16, comptime asciiFn: anytype) usize {
var bytes = input;
fn visibleUTF16WidthFn(input: []const u16, exclude_ansi_colors: bool) usize {
var len: usize = 0;
while (bun.strings.firstNonASCII16CheckMin([]const u16, bytes, false)) |i| {
len += asciiFn(bytes[0..i]);
bytes = bytes[i..];

const utf8 = utf16CodepointWithFFFD([]const u16, bytes);
len += visibleCodepointWidthType(u32, utf8.code_point);
bytes = bytes[@min(@as(usize, utf8.len), bytes.len)..];
var iter = std.unicode.Utf16LeIterator.init(input);
nektro marked this conversation as resolved.
Show resolved Hide resolved
nektro marked this conversation as resolved.
Show resolved Hide resolved
while (iter.nextCodepoint() catch return 0) |cp| blk: {
if (!exclude_ansi_colors) {
len += visibleCodepointWidth(cp);
continue;
}
if (cp != 0x1b) {
len += visibleCodepointWidth(cp);
continue;
}
if ((iter.nextCodepoint() catch return 0) != '[') {
len += visibleCodepointWidth(cp);
continue;
}
var stretch_len: usize = 0;
while (iter.nextCodepoint() catch return 0) |cp2| {
if (cp2 == 'm') break :blk;
stretch_len += visibleCodepointWidth(cp2);
}
len += stretch_len;
}

len += asciiFn(bytes);

return len;
}

Expand All @@ -5936,7 +5943,7 @@ pub const visible = struct {
}

pub fn utf16(input: []const u16) usize {
return visibleUTF16WidthFn(input, visibleASCIIWidth);
return visibleUTF16WidthFn(input, false);
}

pub const exclude_ansi_colors = struct {
Expand All @@ -5949,7 +5956,7 @@ pub const visible = struct {
}

pub fn utf16(input: []const u16) usize {
return visibleUTF16WidthFn(input, visibleASCIIWidthExcludeANSIColors);
return visibleUTF16WidthFn(input, true);
}
};
};
Expand Down
69 changes: 61 additions & 8 deletions test/js/bun/util/stringWidth.test.ts
Original file line number Diff line number Diff line change
@@ -1,26 +1,26 @@
import { test, expect, describe } from "bun:test";

import npmStringWidth from "string-width";
import { stringWidth } from "bun";

const bun_has_stringwidth = "stringWidth" in Bun;

expect.extend({
toMatchNPMStringWidth(received: string) {
const width = npmStringWidth(received);
const bunWidth = stringWidth(received);
const width = npmStringWidth(received, { countAnsiEscapeCodes: true });
const bunWidth = Bun.stringWidth(received, { countAnsiEscapeCodes: true });
const pass = width === bunWidth;
const message = () => `expected ${received} to have npm string width ${width} but got ${bunWidth}`;
return { pass, message };
},
toMatchNPMStringWidthExcludeANSI(received: string) {
const width = npmStringWidth(received, { countAnsiEscapeCodes: false });
const bunWidth = stringWidth(received, { countAnsiEscapeCodes: false });
const bunWidth = Bun.stringWidth(received, { countAnsiEscapeCodes: false });
const pass = width === bunWidth;
const message = () => `expected ${received} to have npm string width ${width} but got ${bunWidth}`;
return { pass, message };
},
});

test.skipIf(!stringWidth)("stringWidth", () => {
test.skipIf(!bun_has_stringwidth)("stringWidth", () => {
expect(undefined).toMatchNPMStringWidth();
expect("").toMatchNPMStringWidth();
expect("a").toMatchNPMStringWidth();
Expand All @@ -40,7 +40,7 @@ test.skipIf(!stringWidth)("stringWidth", () => {

for (let matcher of ["toMatchNPMStringWidth", "toMatchNPMStringWidthExcludeANSI"]) {
describe(matcher, () => {
test.skipIf(!stringWidth)("ansi colors", () => {
test.skipIf(!bun_has_stringwidth)("ansi colors", () => {
expect("\u001b[31m")[matcher]();
expect("\u001b[31ma")[matcher]();
expect("\u001b[31mab")[matcher]();
Expand Down Expand Up @@ -85,9 +85,62 @@ for (let matcher of ["toMatchNPMStringWidth", "toMatchNPMStringWidthExcludeANSI"
}

for (let matcher of ["toMatchNPMStringWidth", "toMatchNPMStringWidthExcludeANSI"]) {
test.todo("leading non-ansi characters in UTF-16 string seems to fail", () => {
test.skipIf(!bun_has_stringwidth)("leading non-ansi characters in UTF-16 string seems to fail", () => {
expect("\x1b[31mhshh🌎")[matcher]();
expect("a\x1b[31mhshh🌎")[matcher]();
expect("a\x1b[31mhshh🌎a")[matcher]();
});
}

for (let matcher of ["toMatchNPMStringWidth", "toMatchNPMStringWidthExcludeANSI"]) {
test.skipIf(!bun_has_stringwidth)("upstream", () => {
expect("abcde")[matcher]();
expect("古池や")[matcher]();
expect("あいうabc")[matcher]();
expect("あいう★")[matcher]();
expect("±")[matcher]();
expect("ノード.js")[matcher]();
expect("你好")[matcher]();
expect("안녕하세요")[matcher]();
expect("A\uD83C\uDE00BC")[matcher]();
expect("\u001B[31m\u001B[39m")[matcher]();
// expect("\u001B]8;;https://github.com\u0007Click\u001B]8;;\u0007")[matcher]();
expect("\u{231A}")[matcher]();
// expect("\u{2194}\u{FE0F}")[matcher]();
expect("\u{1F469}")[matcher]();
// expect("\u{1F469}\u{1F3FF}")[matcher]();
expect("\u{845B}\u{E0100}")[matcher]();
// expect("ปฏัก")[matcher]();
// expect("_\u0E34")[matcher]();
expect("\u001B[31m\u001B[39m")[matcher]();
// expect(stringWidth("⛣", { ambiguousIsNarrow: false }), 2);
// expect(stringWidth("あいう★", { ambiguousIsNarrow: false }), 8);
// expect(stringWidth("“", { ambiguousIsNarrow: false }), 2);
});
}

for (let matcher of ["toMatchNPMStringWidth", "toMatchNPMStringWidthExcludeANSI"]) {
test.skipIf(!bun_has_stringwidth)("ignores control characters", () => {
expect(String.fromCodePoint(0))[matcher]();
expect(String.fromCodePoint(31))[matcher]();
// expect(String.fromCodePoint(127))[matcher]();
// expect(String.fromCodePoint(134))[matcher]();
// expect(String.fromCodePoint(159))[matcher]();
expect("\u001B")[matcher]();
});
}

for (let matcher of ["toMatchNPMStringWidth", "toMatchNPMStringWidthExcludeANSI"]) {
test.skipIf(!bun_has_stringwidth)("handles combining characters", () => {
expect("x\u0300")[matcher]();
});
}

for (let matcher of ["toMatchNPMStringWidth", "toMatchNPMStringWidthExcludeANSI"]) {
test.skipIf(!bun_has_stringwidth)("handles ZWJ characters", () => {
expect("👶")[matcher]();
// expect("👶🏽")[matcher]();
// expect("👩‍👩‍👦‍👦")[matcher]();
// expect("👨‍❤️‍💋‍👨")[matcher]();
});
}
Loading