Skip to content

Commit

Permalink
cmd/internal/obj/arm64: make function prologue more predictable
Browse files Browse the repository at this point in the history
Static branch prediction guesses that forward branches aren't taken.
Since stacks are rarely grown, make the forward branch mean grow.

While we're here, remove the debug-only instruction
saving the frame size in the temp register.

Sample disassembly for

func f() {
	_ = [128]byte{}
}

Before:

0x4008248         ldr        x1, [x28, #0x10]
0x400824c         sub        x2, sp, #0x90
0x4008250         cmp        x2, x1
0x4008254         b.hi       0x4008268
0x4008258         mov        x3, x30
0x400825c         movz       x27, #0x90
0x4008260         bl         runtime.morestack_noctxt
0x4008264         b          main.f
0x4008268         sub        sp, sp, #0x90
0x400826c         add        x16, sp, #0x10
0x4008270         str        xzr, [x16]
0x4008274         str        xzr, [x16, #0x8]
0x4008278         str        xzr, [x16, #0x10]
0x400827c         str        xzr, [x16, #0x18]
0x4008280         str        xzr, [x16, #0x20]
0x4008284         str        xzr, [x16, #0x28]
0x4008288         str        xzr, [x16, #0x30]
0x400828c         str        xzr, [x16, #0x38]
0x4008290         str        xzr, [x16, #0x40]
0x4008294         str        xzr, [x16, #0x48]
0x4008298         str        xzr, [x16, #0x50]
0x400829c         str        xzr, [x16, #0x58]
0x40082a0         str        xzr, [x16, #0x60]
0x40082a4         str        xzr, [x16, #0x68]
0x40082a8         str        xzr, [x16, #0x70]
0x40082ac         str        xzr, [x16, #0x78]
0x40082b0         add        sp, sp, #0x90
0x40082b4         ret

After:

0x4004bc8         ldr        x1, [x28, #0x10]
0x4004bcc         sub        x2, sp, #0x90
0x4004bd0         cmp        x2, x1
0x4004bd4         b.ls       0x4004c28
0x4004bd8         sub        sp, sp, #0x90
0x4004bdc         add        x16, sp, #0x10
0x4004be0         str        xzr, [x16]
0x4004be4         str        xzr, [x16, #0x8]
0x4004be8         str        xzr, [x16, #0x10]
0x4004bec         str        xzr, [x16, #0x18]
0x4004bf0         str        xzr, [x16, #0x20]
0x4004bf4         str        xzr, [x16, #0x28]
0x4004bf8         str        xzr, [x16, #0x30]
0x4004bfc         str        xzr, [x16, #0x38]
0x4004c00         str        xzr, [x16, #0x40]
0x4004c04         str        xzr, [x16, #0x48]
0x4004c08         str        xzr, [x16, #0x50]
0x4004c0c         str        xzr, [x16, #0x58]
0x4004c10         str        xzr, [x16, #0x60]
0x4004c14         str        xzr, [x16, #0x68]
0x4004c18         str        xzr, [x16, #0x70]
0x4004c1c         str        xzr, [x16, #0x78]
0x4004c20         add        sp, sp, #0x90
0x4004c24         ret
0x4004c28         mov        x3, x30
0x4004c2c         bl         runtime.morestack_noctxt
0x4004c30         b          main.f

Updates #10587.

Package sort benchmarks using an iPhone 6:

name            old time/op  new time/op  delta
SearchWrappers   355ns ± 1%   328ns ± 1%  -7.57%  (p=0.000 n=25+19)
SortString1K     580µs ± 1%   577µs ± 1%  -0.48%  (p=0.000 n=25+25)
StableString1K  1.04ms ± 0%  1.04ms ± 0%    ~     (p=0.851 n=24+25)
SortInt1K        251µs ± 1%   247µs ± 1%  -1.52%  (p=0.000 n=23+25)
StableInt1K      267µs ± 2%   261µs ± 2%  -2.02%  (p=0.000 n=25+25)
SortInt64K      23.8ms ± 1%  23.6ms ± 0%  -0.97%  (p=0.000 n=25+23)
StableInt64K    22.8ms ± 0%  22.4ms ± 1%  -1.76%  (p=0.000 n=24+25)
Sort1e2          123µs ± 1%   124µs ± 1%    ~     (p=0.256 n=23+23)
Stable1e2        248µs ± 1%   247µs ± 1%  -0.69%  (p=0.000 n=23+25)
Sort1e4         24.3ms ± 2%  24.6ms ± 5%  +1.36%  (p=0.017 n=22+25)
Stable1e4       77.2ms ± 6%  76.2ms ± 5%  -1.36%  (p=0.020 n=25+25)
Sort1e6          3.95s ± 8%   3.95s ± 8%    ~     (p=0.863 n=25+25)
Stable1e6        15.7s ± 1%   15.5s ± 1%  -1.11%  (p=0.000 n=22+23)

Change-Id: I377b3817af2ed27ddeecf24edef97fad91fc1afc
Reviewed-on: https://go-review.googlesource.com/10500
Reviewed-by: Russ Cox <[email protected]>
Run-TryBot: Josh Bleecher Snyder <[email protected]>
Reviewed-by: Dave Cheney <[email protected]>
  • Loading branch information
josharian committed Jun 4, 2015
1 parent 5353cde commit 54789ef
Showing 1 changed file with 43 additions and 42 deletions.
85 changes: 43 additions & 42 deletions src/cmd/internal/obj/arm64/obj7.go
Original file line number Diff line number Diff line change
Expand Up @@ -152,60 +152,61 @@ func stacksplit(ctxt *obj.Link, p *obj.Prog, framesize int32) *obj.Prog {
p.Reg = REG_R2
}

// BHI done
p = obj.Appendp(ctxt, p)
q1 := p
// BLS do-morestack
bls := obj.Appendp(ctxt, p)
bls.As = ABLS
bls.To.Type = obj.TYPE_BRANCH

p.As = ABHI
p.To.Type = obj.TYPE_BRANCH
var last *obj.Prog
for last = ctxt.Cursym.Text; last.Link != nil; last = last.Link {
}

// MOV LR, R3
p = obj.Appendp(ctxt, p)

p.As = AMOVD
p.From.Type = obj.TYPE_REG
p.From.Reg = REGLINK
p.To.Type = obj.TYPE_REG
p.To.Reg = REG_R3
movlr := obj.Appendp(ctxt, last)
movlr.As = AMOVD
movlr.From.Type = obj.TYPE_REG
movlr.From.Reg = REGLINK
movlr.To.Type = obj.TYPE_REG
movlr.To.Reg = REG_R3
if q != nil {
q.Pcond = p
q.Pcond = movlr
}
bls.Pcond = movlr

debug := movlr
if false {
debug = obj.Appendp(ctxt, debug)
debug.As = AMOVD
debug.From.Type = obj.TYPE_CONST
debug.From.Offset = int64(framesize)
debug.To.Type = obj.TYPE_REG
debug.To.Reg = REGTMP
}

// TODO(minux): only for debug
p = obj.Appendp(ctxt, p)
p.As = AMOVD
p.From.Type = obj.TYPE_CONST
p.From.Offset = int64(framesize)
p.To.Type = obj.TYPE_REG
p.To.Reg = REGTMP

// BL runtime.morestack(SB)
p = obj.Appendp(ctxt, p)

p.As = ABL
p.To.Type = obj.TYPE_BRANCH
if ctxt.Cursym.Cfunc != 0 {
p.To.Sym = obj.Linklookup(ctxt, "runtime.morestackc", 0)
} else if ctxt.Cursym.Text.From3.Offset&obj.NEEDCTXT == 0 {
p.To.Sym = obj.Linklookup(ctxt, "runtime.morestack_noctxt", 0)
} else {
p.To.Sym = obj.Linklookup(ctxt, "runtime.morestack", 0)
call := obj.Appendp(ctxt, debug)
call.As = ABL
call.To.Type = obj.TYPE_BRANCH
morestack := "runtime.morestack"
switch {
case ctxt.Cursym.Cfunc != 0:
morestack = "runtime.morestackc"
case ctxt.Cursym.Text.From3.Offset&obj.NEEDCTXT == 0:
morestack = "runtime.morestack_noctxt"
}
call.To.Sym = obj.Linklookup(ctxt, morestack, 0)

// B start
p = obj.Appendp(ctxt, p)

p.As = AB
p.To.Type = obj.TYPE_BRANCH
p.Pcond = ctxt.Cursym.Text.Link

// placeholder for q1's jump target
p = obj.Appendp(ctxt, p)
jmp := obj.Appendp(ctxt, call)
jmp.As = AB
jmp.To.Type = obj.TYPE_BRANCH
jmp.Pcond = ctxt.Cursym.Text.Link

p.As = obj.ANOP
q1.Pcond = p
// placeholder for bls's jump target
// p = obj.Appendp(ctxt, p)
// p.As = obj.ANOP

return p
return bls
}

func progedit(ctxt *obj.Link, p *obj.Prog) {
Expand Down

0 comments on commit 54789ef

Please sign in to comment.