Skip to content

Commit

Permalink
runtime: add wasm bulk memory operations
Browse files Browse the repository at this point in the history
The existing implementation uses loops to implement bulk memory
operations such as memcpy and memclr. Now that bulk memory operations
have been standardized and are implemented in all major browsers and
engines (see https://webassembly.org/roadmap/), we should use them
to improve performance.

Updates #28360

Change-Id: I28df0e0350287d5e7e1d1c09a4064ea1054e7575
Reviewed-on: https://go-review.googlesource.com/c/go/+/444935
Reviewed-by: Cherry Mui <[email protected]>
Reviewed-by: Keith Randall <[email protected]>
TryBot-Result: Gopher Robot <[email protected]>
Run-TryBot: Richard Musiol <[email protected]>
Reviewed-by: David Chase <[email protected]>
Auto-Submit: Richard Musiol <[email protected]>
Reviewed-by: Richard Musiol <[email protected]>
  • Loading branch information
garet90 authored and gopherbot committed Oct 27, 2022
1 parent 599a1e4 commit 50557ed
Show file tree
Hide file tree
Showing 15 changed files with 59 additions and 347 deletions.
4 changes: 0 additions & 4 deletions src/cmd/compile/internal/ir/symtab.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,10 +58,6 @@ var Syms struct {
// Wasm
WasmDiv *obj.LSym
// Wasm
WasmMove *obj.LSym
// Wasm
WasmZero *obj.LSym
// Wasm
WasmTruncS *obj.LSym
// Wasm
WasmTruncU *obj.LSym
Expand Down
25 changes: 5 additions & 20 deletions src/cmd/compile/internal/ssa/_gen/Wasm.rules
Original file line number Diff line number Diff line change
Expand Up @@ -234,24 +234,9 @@
(I64Store [s-8] dst (I64Load [s-8] src mem)
(I64Store dst (I64Load src mem) mem))

// Adjust moves to be a multiple of 16 bytes.
(Move [s] dst src mem)
&& s > 16 && s%16 != 0 && s%16 <= 8 =>
(Move [s-s%16]
(OffPtr <dst.Type> dst [s%16])
(OffPtr <src.Type> src [s%16])
(I64Store dst (I64Load src mem) mem))
(Move [s] dst src mem)
&& s > 16 && s%16 != 0 && s%16 > 8 =>
(Move [s-s%16]
(OffPtr <dst.Type> dst [s%16])
(OffPtr <src.Type> src [s%16])
(I64Store [8] dst (I64Load [8] src mem)
(I64Store dst (I64Load src mem) mem)))

// Large copying uses helper.
(Move [s] dst src mem) && s%8 == 0 && logLargeCopy(v, s) =>
(LoweredMove [s/8] dst src mem)
(Move [s] dst src mem) && logLargeCopy(v, s) =>
(LoweredMove [s] dst src mem)

// Lowering Zero instructions
(Zero [0] _ mem) => mem
Expand All @@ -274,7 +259,7 @@
(I64Store32 destptr (I64Const [0]) mem))

// Strip off any fractional word zeroing.
(Zero [s] destptr mem) && s%8 != 0 && s > 8 =>
(Zero [s] destptr mem) && s%8 != 0 && s > 8 && s < 32 =>
(Zero [s-s%8] (OffPtr <destptr.Type> destptr [s%8])
(I64Store destptr (I64Const [0]) mem))

Expand All @@ -293,8 +278,8 @@
(I64Store destptr (I64Const [0]) mem))))

// Large zeroing uses helper.
(Zero [s] destptr mem) && s%8 == 0 && s > 32 =>
(LoweredZero [s/8] destptr mem)
(Zero [s] destptr mem) =>
(LoweredZero [s] destptr mem)

// Lowering constants
(Const64 ...) => (I64Const ...)
Expand Down
4 changes: 2 additions & 2 deletions src/cmd/compile/internal/ssa/_gen/WasmOps.go
Original file line number Diff line number Diff line change
Expand Up @@ -126,8 +126,8 @@ func init() {
{name: "LoweredInterCall", argLength: 2, reg: regInfo{inputs: []regMask{gp}, clobbers: callerSave}, aux: "CallOff", call: true}, // call fn by pointer. arg0=codeptr, arg1=mem, auxint=argsize, returns mem

{name: "LoweredAddr", argLength: 1, reg: gp11, aux: "SymOff", rematerializeable: true, symEffect: "Addr"}, // returns base+aux+auxint, arg0=base
{name: "LoweredMove", argLength: 3, reg: regInfo{inputs: []regMask{gp, gp}}, aux: "Int64"}, // large move. arg0=dst, arg1=src, arg2=mem, auxint=len/8, returns mem
{name: "LoweredZero", argLength: 2, reg: regInfo{inputs: []regMask{gp}}, aux: "Int64"}, // large zeroing. arg0=start, arg1=mem, auxint=len/8, returns mem
{name: "LoweredMove", argLength: 3, reg: regInfo{inputs: []regMask{gp, gp}}, aux: "Int64"}, // large move. arg0=dst, arg1=src, arg2=mem, auxint=len, returns mem
{name: "LoweredZero", argLength: 2, reg: regInfo{inputs: []regMask{gp}}, aux: "Int64"}, // large zeroing. arg0=start, arg1=mem, auxint=len, returns mem

{name: "LoweredGetClosurePtr", reg: gp01}, // returns wasm.REG_CTXT, the closure pointer
{name: "LoweredGetCallerPC", reg: gp01, rematerializeable: true}, // returns the PC of the caller of the current function
Expand Down
79 changes: 8 additions & 71 deletions src/cmd/compile/internal/ssa/rewriteWasm.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 0 additions & 2 deletions src/cmd/compile/internal/ssagen/ssa.go
Original file line number Diff line number Diff line change
Expand Up @@ -206,8 +206,6 @@ func InitConfig() {
}

// Wasm (all asm funcs with special ABIs)
ir.Syms.WasmMove = typecheck.LookupRuntimeVar("wasmMove")
ir.Syms.WasmZero = typecheck.LookupRuntimeVar("wasmZero")
ir.Syms.WasmDiv = typecheck.LookupRuntimeVar("wasmDiv")
ir.Syms.WasmTruncS = typecheck.LookupRuntimeVar("wasmTruncS")
ir.Syms.WasmTruncU = typecheck.LookupRuntimeVar("wasmTruncU")
Expand Down
7 changes: 3 additions & 4 deletions src/cmd/compile/internal/wasm/ssa.go
Original file line number Diff line number Diff line change
Expand Up @@ -149,14 +149,13 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
getValue32(s, v.Args[0])
getValue32(s, v.Args[1])
i32Const(s, int32(v.AuxInt))
p := s.Prog(wasm.ACall)
p.To = obj.Addr{Type: obj.TYPE_MEM, Name: obj.NAME_EXTERN, Sym: ir.Syms.WasmMove}
s.Prog(wasm.AMemoryCopy)

case ssa.OpWasmLoweredZero:
getValue32(s, v.Args[0])
i32Const(s, 0)
i32Const(s, int32(v.AuxInt))
p := s.Prog(wasm.ACall)
p.To = obj.Addr{Type: obj.TYPE_MEM, Name: obj.NAME_EXTERN, Sym: ir.Syms.WasmZero}
s.Prog(wasm.AMemoryFill)

case ssa.OpWasmLoweredNilCheck:
getValue64(s, v.Args[0])
Expand Down
11 changes: 11 additions & 0 deletions src/cmd/internal/obj/wasm/a.out.go
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,17 @@ const (
AI64TruncSatF64S
AI64TruncSatF64U

AMemoryInit
ADataDrop
AMemoryCopy
AMemoryFill
ATableInit
AElemDrop
ATableCopy
ATableGrow
ATableSize
ATableFill

ALast // Sentinel: End of low-level WebAssembly instructions.

ARESUMEPOINT
Expand Down
10 changes: 10 additions & 0 deletions src/cmd/internal/obj/wasm/anames.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

10 changes: 6 additions & 4 deletions src/cmd/internal/obj/wasm/wasmobj.go
Original file line number Diff line number Diff line change
Expand Up @@ -799,8 +799,6 @@ var notUsePC_B = map[string]bool{
"wasm_export_resume": true,
"wasm_export_getsp": true,
"wasm_pc_f_loop": true,
"runtime.wasmMove": true,
"runtime.wasmZero": true,
"runtime.wasmDiv": true,
"runtime.wasmTruncS": true,
"runtime.wasmTruncU": true,
Expand Down Expand Up @@ -844,7 +842,7 @@ func assemble(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) {
// Some functions use a special calling convention.
switch s.Name {
case "_rt0_wasm_js", "wasm_export_run", "wasm_export_resume", "wasm_export_getsp", "wasm_pc_f_loop",
"runtime.wasmMove", "runtime.wasmZero", "runtime.wasmDiv", "runtime.wasmTruncS", "runtime.wasmTruncU", "memeqbody":
"runtime.wasmDiv", "runtime.wasmTruncS", "runtime.wasmTruncU", "memeqbody":
varDecls = []*varDecl{}
useAssemblyRegMap()
case "memchr", "memcmp":
Expand Down Expand Up @@ -1088,7 +1086,11 @@ func assemble(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) {
writeUleb128(w, align(p.As))
writeUleb128(w, uint64(p.To.Offset))

case ACurrentMemory, AGrowMemory:
case ACurrentMemory, AGrowMemory, AMemoryFill:
w.WriteByte(0x00)

case AMemoryCopy:
w.WriteByte(0x00)
w.WriteByte(0x00)

}
Expand Down
2 changes: 0 additions & 2 deletions src/cmd/link/internal/wasm/asm.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,6 @@ var wasmFuncTypes = map[string]*wasmFuncType{
"wasm_export_resume": {Params: []byte{}}, //
"wasm_export_getsp": {Results: []byte{I32}}, // sp
"wasm_pc_f_loop": {Params: []byte{}}, //
"runtime.wasmMove": {Params: []byte{I32, I32, I32}}, // dst, src, len
"runtime.wasmZero": {Params: []byte{I32, I32}}, // ptr, len
"runtime.wasmDiv": {Params: []byte{I64, I64}, Results: []byte{I64}}, // x, y -> x/y
"runtime.wasmTruncS": {Params: []byte{F64}, Results: []byte{I64}}, // x -> int(x)
"runtime.wasmTruncU": {Params: []byte{F64}, Results: []byte{I64}}, // x -> uint(x)
Expand Down
4 changes: 1 addition & 3 deletions src/runtime/asm_wasm.s
Original file line number Diff line number Diff line change
Expand Up @@ -320,10 +320,8 @@ TEXT NAME(SB), WRAPPER, $MAXSIZE-48; \
I64Load stackArgs+16(FP); \
I32WrapI64; \
I64Load stackArgsSize+24(FP); \
I64Const $3; \
I64ShrU; \
I32WrapI64; \
Call runtime·wasmMove(SB); \
MemoryCopy; \
End; \
\
MOVD f+8(FP), CTXT; \
Expand Down
33 changes: 7 additions & 26 deletions src/runtime/memclr_wasm.s
Original file line number Diff line number Diff line change
Expand Up @@ -11,29 +11,10 @@ TEXT runtime·memclrNoHeapPointers(SB), NOSPLIT, $0-16
MOVD ptr+0(FP), R0
MOVD n+8(FP), R1

loop:
Loop
Get R1
I64Eqz
If
RET
End

Get R0
I32WrapI64
I64Const $0
I64Store8 $0

Get R0
I64Const $1
I64Add
Set R0

Get R1
I64Const $1
I64Sub
Set R1

Br loop
End
UNDEF
Get R0
I32WrapI64
I32Const $0
Get R1
I32WrapI64
MemoryFill
RET
Loading

0 comments on commit 50557ed

Please sign in to comment.