Skip to content

Commit

Permalink
[Strings] Escape strings printed by fuzz-exec
Browse files Browse the repository at this point in the history
Previously we printed strings as WTF-8 in the output of fuzz-exec, but this
could produce invalid unicode output and did not make unprintable characters
visible. Fix both these problems by escaping the output, using the JSON string
escape procedure since the string to be escaped is WTF-16. Reimplement the same
escaping procedure in fuzz_shell.js so that the way we print strings when
running on a real JS engine matches the way we print them in our own fuzz-exec
interpreter.

Fixes #6435.
  • Loading branch information
tlively committed Mar 26, 2024
1 parent 47bcca6 commit f65a41a
Show file tree
Hide file tree
Showing 3 changed files with 92 additions and 12 deletions.
55 changes: 52 additions & 3 deletions scripts/fuzz_shell.js
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,58 @@ function printed(x, y) {
// 'object', below.
return 'null';
} else if (typeof x === 'string') {
// Emit a string in the same format as the binaryen interpreter.
return 'string("' + x + '")';
// Emit a string in the same format as the binaryen interpreter. This
// escaping routine must be kept in sync with String::printEscapedJSON.
var escaped = '';
for (u of x) {
switch (u) {
case '"':
escaped += '\\"';
continue;
case '\\':
escaped += '\\\\';
continue;
case '\b':
escaped += '\\b';
continue;
case '\f':
escaped += '\\f';
continue;
case '\n':
escaped += '\\n';
continue;
case '\r':
escaped += '\\r';
continue;
case '\t':
escaped += '\\t';
continue;
default:
break;
}

var codePoint = u.codePointAt(0);
if (32 <= codePoint && codePoint < 127) {
escaped += u;
continue
}

var printEscape = (codePoint) => {
escaped += '\\u'
escaped += ((codePoint & 0xF000) >> 12).toString(16);
escaped += ((codePoint & 0x0F00) >> 8).toString(16);
escaped += ((codePoint & 0x00F0) >> 4).toString(16);
escaped += (codePoint & 0x000F).toString(16);
};

if (codePoint < 0x10000) {
printEscape(codePoint);
} else {
printEscape(0xD800 + ((codePoint - 0x10000) >> 10));
printEscape(0xDC00 + ((codePoint - 0x10000) & 0x3FF));
}
}
return 'string("' + escaped + '")';
} else if (typeof x === 'bigint') {
// Print bigints in legalized form, which is two 32-bit numbers of the low
// and high bits.
Expand Down Expand Up @@ -146,4 +196,3 @@ for (var e in exports) {
console.log('exception thrown: ' + e);
}
}

11 changes: 5 additions & 6 deletions src/wasm/literal.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -639,7 +639,7 @@ std::ostream& operator<<(std::ostream& o, Literal literal) {
if (!data) {
o << "nullstring";
} else {
o << "string(\"";
o << "string(";
// Convert WTF-16 literals to WTF-16 string.
std::stringstream wtf16;
for (auto c : data->values) {
Expand All @@ -648,12 +648,11 @@ std::ostream& operator<<(std::ostream& o, Literal literal) {
wtf16 << uint8_t(u & 0xFF);
wtf16 << uint8_t(u >> 8);
}
// Convert to WTF-8 for printing.
// Escape to ensure we have valid unicode output and to make
// unprintable characters visible.
// TODO: Use wtf16.view() once we have C++20.
[[maybe_unused]] bool valid =
String::convertWTF16ToWTF8(o, wtf16.str());
assert(valid);
o << "\")";
String::printEscapedJSON(o, wtf16.str());
o << ")";
}
break;
}
Expand Down
38 changes: 35 additions & 3 deletions test/lit/exec/strings.wast
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

(memory 1 1)

(import "fuzzing-support" "log" (func $log (param i32)))
(import "fuzzing-support" "log-i32" (func $log (param i32)))

;; CHECK: [fuzz-exec] calling new_wtf16_array
;; CHECK-NEXT: [fuzz-exec] note result: new_wtf16_array => string("ello")
Expand Down Expand Up @@ -280,7 +280,9 @@
(func $slice (export "slice") (result (ref string))
;; Slicing [3:6] here should definitely output "def".
(stringview_wtf16.slice
(string.const "abcdefgh")
(string.as_wtf16
(string.const "abcdefgh")
)
(i32.const 3)
(i32.const 6)
)
Expand All @@ -291,11 +293,33 @@
(func $slice-big (export "slice-big") (result (ref string))
;; Slicing [3:huge unsigned value] leads to slicing til the end: "defgh".
(stringview_wtf16.slice
(string.const "abcdefgh")
(string.as_wtf16
(string.const "abcdefgh")
)
(i32.const 3)
(i32.const -1)
)
)

;; CHECK: [fuzz-exec] calling slice-unicode
;; CHECK-NEXT: [fuzz-exec] note result: slice-unicode => string("d\u00a3f")
(func $slice-unicode (export "slice-unicode") (result (ref string))
(stringview_wtf16.slice
;; abcd£fgh
(string.as_wtf16
(string.const "abcd\C2\A3fgh")
)
(i32.const 3)
(i32.const 6)
)
)

;; CHECK: [fuzz-exec] calling concat-surrogates
;; CHECK-NEXT: [fuzz-exec] note result: concat-surrogates => string("\ud800\udf48")
(func $concat-surrogates (export "concat-surrogates") (result (ref string))
;; Concatenating these surrogates creates '𐍈'.
(string.concat (string.const "\ED\A0\80") (string.const "\ED\BD\88"))
)
)
;; CHECK: [fuzz-exec] calling new_wtf16_array
;; CHECK-NEXT: [fuzz-exec] note result: new_wtf16_array => string("ello")
Expand Down Expand Up @@ -373,6 +397,12 @@

;; CHECK: [fuzz-exec] calling slice-big
;; CHECK-NEXT: [fuzz-exec] note result: slice-big => string("defgh")

;; CHECK: [fuzz-exec] calling slice-unicode
;; CHECK-NEXT: [fuzz-exec] note result: slice-unicode => string("d\u00a3f")

;; CHECK: [fuzz-exec] calling concat-surrogates
;; CHECK-NEXT: [fuzz-exec] note result: concat-surrogates => string("\ud800\udf48")
;; CHECK-NEXT: [fuzz-exec] comparing compare.1
;; CHECK-NEXT: [fuzz-exec] comparing compare.10
;; CHECK-NEXT: [fuzz-exec] comparing compare.2
Expand All @@ -383,6 +413,7 @@
;; CHECK-NEXT: [fuzz-exec] comparing compare.7
;; CHECK-NEXT: [fuzz-exec] comparing compare.8
;; CHECK-NEXT: [fuzz-exec] comparing compare.9
;; CHECK-NEXT: [fuzz-exec] comparing concat-surrogates
;; CHECK-NEXT: [fuzz-exec] comparing const
;; CHECK-NEXT: [fuzz-exec] comparing encode
;; CHECK-NEXT: [fuzz-exec] comparing encode-overflow
Expand All @@ -397,3 +428,4 @@
;; CHECK-NEXT: [fuzz-exec] comparing new_wtf16_array
;; CHECK-NEXT: [fuzz-exec] comparing slice
;; CHECK-NEXT: [fuzz-exec] comparing slice-big
;; CHECK-NEXT: [fuzz-exec] comparing slice-unicode

0 comments on commit f65a41a

Please sign in to comment.