diff --git a/scripts/fuzz_opt.py b/scripts/fuzz_opt.py index f839984c255..251b23d033a 100755 --- a/scripts/fuzz_opt.py +++ b/scripts/fuzz_opt.py @@ -286,6 +286,7 @@ def is_git_repo(): # TODO: fuzzer and interpreter support for strings 'strings.wast', 'simplify-locals-strings.wast', + 'string-lowering-instructions.wast', # TODO: fuzzer and interpreter support for extern conversions 'extern-conversions.wast', # ignore DWARF because it is incompatible with multivalue atm diff --git a/src/passes/StringLowering.cpp b/src/passes/StringLowering.cpp index 31e41b9e8c7..e4b3ed865d5 100644 --- a/src/passes/StringLowering.cpp +++ b/src/passes/StringLowering.cpp @@ -191,6 +191,9 @@ struct StringLowering : public StringGathering { // Remove all HeapType::string etc. in favor of externref. updateTypes(module); + // Replace string.* etc. operations with imported ones. + replaceInstructions(module); + // Disable the feature here after we lowered everything away. module->features.disable(FeatureSet::Strings); } @@ -225,9 +228,88 @@ struct StringLowering : public StringGathering { void updateTypes(Module* module) { TypeMapper::TypeUpdates updates; + // There is no difference between strings and views with imported strings: + // they are all just JS strings, so they all turn into externref. updates[HeapType::string] = HeapType::ext; + updates[HeapType::stringview_wtf8] = HeapType::ext; + updates[HeapType::stringview_wtf16] = HeapType::ext; + updates[HeapType::stringview_iter] = HeapType::ext; TypeMapper(*module, updates).map(); } + + // Imported string functions. + Name fromCharCodeArrayImport; + Name fromCodePointImport; + + // The name of the module to import string functions from. + Name WasmStringsModule = "wasm:js-string"; + + // Common types used in imports. + Type nullArray16 = Type(Array(Field(Field::i16, Mutable)), Nullable); + Type nnExt = Type(HeapType::ext, NonNullable); + + // Creates an imported string function, returning its name (which is equal to + // the true name of the import, if there is no conflict). + Name addImport(Module* module, Name trueName, Type params, Type results) { + auto name = Names::getValidFunctionName(*module, trueName); + auto sig = Signature(params, results); + Builder builder(*module); + auto* func = module->addFunction(builder.makeFunction(name, sig, {})); + func->module = WasmStringsModule; + func->base = trueName; + return name; + } + + void replaceInstructions(Module* module) { + // Add all the possible imports up front, to avoid adding them during + // parallel work. Optimizations can remove unneeded ones later. + + // string.fromCharCodeArray: array, start, end -> ext + fromCharCodeArrayImport = addImport( + module, "fromCharCodeArray", {nullArray16, Type::i32, Type::i32}, nnExt); + // string.fromCodePoint: codepoint -> ext + fromCodePointImport = addImport(module, "fromCodePoint", Type::i32, nnExt); + + // Replace the string instructions in parallel. + struct Replacer : public WalkerPass> { + bool isFunctionParallel() override { return true; } + + StringLowering& lowering; + + std::unique_ptr create() override { + return std::make_unique(lowering); + } + + Replacer(StringLowering& lowering) : lowering(lowering) {} + + void visitStringNew(StringNew* curr) { + Builder builder(*getModule()); + switch (curr->op) { + case StringNewWTF16Array: + replaceCurrent(builder.makeCall(lowering.fromCharCodeArrayImport, + {curr->ptr, curr->start, curr->end}, + lowering.nnExt)); + return; + case StringNewFromCodePoint: + replaceCurrent(builder.makeCall( + lowering.fromCodePointImport, {curr->ptr}, lowering.nnExt)); + return; + default: + WASM_UNREACHABLE("TODO: all of string.new*"); + } + } + + void visitStringAs(StringAs* curr) { + // There is no difference between strings and views with imported + // strings: they are all just JS strings, so no conversion is needed. + replaceCurrent(curr->ref); + } + }; + + Replacer replacer(*this); + replacer.run(getPassRunner(), module); + replacer.walkModuleCode(module); + } }; Pass* createStringGatheringPass() { return new StringGathering(); } diff --git a/test/lit/passes/string-gathering.wast b/test/lit/passes/string-gathering.wast index 657858fc050..8c315ddc140 100644 --- a/test/lit/passes/string-gathering.wast +++ b/test/lit/passes/string-gathering.wast @@ -27,12 +27,22 @@ ;; CHECK: (global $global2 stringref (global.get $string.const_bar)) ;; LOWER: (type $0 (func)) + ;; LOWER: (type $1 (array (mut i16))) + + ;; LOWER: (type $2 (func (param (ref null $1) i32 i32) (result (ref extern)))) + + ;; LOWER: (type $3 (func (param i32) (result (ref extern)))) + ;; LOWER: (import "string.const" "0" (global $string.const_bar (ref extern))) ;; LOWER: (import "string.const" "1" (global $string.const_other (ref extern))) ;; LOWER: (import "string.const" "2" (global $global (ref extern))) + ;; LOWER: (import "wasm:js-string" "fromCharCodeArray" (func $fromCharCodeArray (type $2) (param (ref null $1) i32 i32) (result (ref extern)))) + + ;; LOWER: (import "wasm:js-string" "fromCodePoint" (func $fromCodePoint (type $3) (param i32) (result (ref extern)))) + ;; LOWER: (global $global2 externref (global.get $string.const_bar)) (global $global2 (ref null string) (string.const "bar")) @@ -111,6 +121,12 @@ ;; Multiple possible reusable globals. Also test ignoring of imports. (module ;; CHECK: (import "a" "b" (global $import (ref string))) + ;; LOWER: (type $0 (array (mut i16))) + + ;; LOWER: (type $1 (func (param (ref null $0) i32 i32) (result (ref extern)))) + + ;; LOWER: (type $2 (func (param i32) (result (ref extern)))) + ;; LOWER: (import "a" "b" (global $import (ref extern))) (import "a" "b" (global $import (ref string))) @@ -122,6 +138,10 @@ ;; LOWER: (import "string.const" "1" (global $global4 (ref extern))) + ;; LOWER: (import "wasm:js-string" "fromCharCodeArray" (func $fromCharCodeArray (type $1) (param (ref null $0) i32 i32) (result (ref extern)))) + + ;; LOWER: (import "wasm:js-string" "fromCodePoint" (func $fromCodePoint (type $2) (param i32) (result (ref extern)))) + ;; LOWER: (global $global2 (ref extern) (global.get $global1)) (global $global2 (ref string) (string.const "foo")) diff --git a/test/lit/passes/string-lowering-instructions.wast b/test/lit/passes/string-lowering-instructions.wast new file mode 100644 index 00000000000..05d555ef0df --- /dev/null +++ b/test/lit/passes/string-lowering-instructions.wast @@ -0,0 +1,95 @@ +;; NOTE: Assertions have been generated by update_lit_checks.py --all-items and should not be edited. + +;; RUN: foreach %s %t wasm-opt --string-lowering -all -S -o - | filecheck %s + +(module + ;; CHECK: (type $0 (func)) + + ;; CHECK: (type $array16 (array (mut i16))) + (type $array16 (array (mut i16))) + + ;; CHECK: (rec + ;; CHECK-NEXT: (type $2 (func (param (ref $array16)))) + + ;; CHECK: (type $3 (func (param externref externref externref externref))) + + ;; CHECK: (type $4 (func (param (ref null $array16) i32 i32) (result (ref extern)))) + + ;; CHECK: (type $5 (func (param i32) (result (ref extern)))) + + ;; CHECK: (import "colliding" "name" (func $fromCodePoint (type $0))) + (import "colliding" "name" (func $fromCodePoint)) + + ;; CHECK: (import "wasm:js-string" "fromCharCodeArray" (func $fromCharCodeArray (type $4) (param (ref null $array16) i32 i32) (result (ref extern)))) + + ;; CHECK: (import "wasm:js-string" "fromCodePoint" (func $fromCodePoint_5 (type $5) (param i32) (result (ref extern)))) + + ;; CHECK: (func $string.as (type $3) (param $a externref) (param $b externref) (param $c externref) (param $d externref) + ;; CHECK-NEXT: (local.set $b + ;; CHECK-NEXT: (local.get $a) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $c + ;; CHECK-NEXT: (local.get $a) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: (local.set $d + ;; CHECK-NEXT: (local.get $a) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $string.as + (param $a stringref) + (param $b stringview_wtf8) + (param $c stringview_wtf16) + (param $d stringview_iter) + ;; These operations all vanish in the lowering, as they all become extref + ;; (JS strings). + (local.set $b + (string.as_wtf8 + (local.get $a) + ) + ) + (local.set $c + (string.as_wtf16 + (local.get $a) + ) + ) + (local.set $d + (string.as_iter + (local.get $a) + ) + ) + ) + + ;; CHECK: (func $string.new.gc (type $2) (param $array16 (ref $array16)) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (call $fromCharCodeArray + ;; CHECK-NEXT: (local.get $array16) + ;; CHECK-NEXT: (i32.const 7) + ;; CHECK-NEXT: (i32.const 8) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $string.new.gc (param $array16 (ref $array16)) + (drop + (string.new_wtf16_array + (local.get $array16) + (i32.const 7) + (i32.const 8) + ) + ) + ) + + ;; CHECK: (func $string.from_code_point (type $0) + ;; CHECK-NEXT: (drop + ;; CHECK-NEXT: (call $fromCodePoint_5 + ;; CHECK-NEXT: (i32.const 1) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + ;; CHECK-NEXT: ) + (func $string.from_code_point + (drop + (string.from_code_point + (i32.const 1) + ) + ) + ) +)