Skip to content

Commit

Permalink
cmd/compile: use BMI1 instructions for GOAMD64=v3 and higher
Browse files Browse the repository at this point in the history
BMI1 includes four instructions (ANDN, BLSI, BLSMSK, BLSR) that are
easy to peephole optimize, and which GCC always seems to favor using
when available and applicable.

Updates #45453.

Change-Id: I0274184057058f5c579e5bc3ea9c414396d3cf46
Reviewed-on: https://go-review.googlesource.com/c/go/+/351130
Run-TryBot: Matthew Dempsky <[email protected]>
Trust: Matthew Dempsky <[email protected]>
TryBot-Result: Go Bot <[email protected]>
Reviewed-by: Keith Randall <[email protected]>
  • Loading branch information
mdempsky committed Sep 22, 2021
1 parent 30faf96 commit 04572fa
Show file tree
Hide file tree
Showing 6 changed files with 378 additions and 0 deletions.
17 changes: 17 additions & 0 deletions src/cmd/compile/internal/amd64/ssa.go
Original file line number Diff line number Diff line change
Expand Up @@ -263,6 +263,23 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
p.To.Reg = lo
p.SetFrom3Reg(hi)

case ssa.OpAMD64BLSIQ, ssa.OpAMD64BLSIL,
ssa.OpAMD64BLSMSKQ, ssa.OpAMD64BLSMSKL,
ssa.OpAMD64BLSRQ, ssa.OpAMD64BLSRL:
p := s.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_REG
p.From.Reg = v.Args[0].Reg()
p.To.Type = obj.TYPE_REG
p.To.Reg = v.Reg()

case ssa.OpAMD64ANDNQ, ssa.OpAMD64ANDNL:
p := s.Prog(v.Op.Asm())
p.From.Type = obj.TYPE_REG
p.From.Reg = v.Args[0].Reg()
p.To.Type = obj.TYPE_REG
p.To.Reg = v.Reg()
p.SetFrom3Reg(v.Args[1].Reg())

case ssa.OpAMD64DIVQU, ssa.OpAMD64DIVLU, ssa.OpAMD64DIVWU:
// Arg[0] (the dividend) is in AX.
// Arg[1] (the divisor) can be in any other register.
Expand Down
7 changes: 7 additions & 0 deletions src/cmd/compile/internal/ssa/gen/AMD64.rules
Original file line number Diff line number Diff line change
Expand Up @@ -639,6 +639,7 @@

// Recognize bit clearing: a &^= 1<<b
(AND(Q|L) (NOT(Q|L) (SHL(Q|L) (MOV(Q|L)const [1]) y)) x) => (BTR(Q|L) x y)
(ANDN(Q|L) x (SHL(Q|L) (MOV(Q|L)const [1]) y)) => (BTR(Q|L) x y)
(ANDQconst [c] x) && isUint64PowerOfTwo(int64(^c)) && uint64(^c) >= 128
=> (BTRQconst [int8(log32(^c))] x)
(ANDLconst [c] x) && isUint32PowerOfTwo(int64(^c)) && uint64(^c) >= 128
Expand Down Expand Up @@ -2204,3 +2205,9 @@
// Prefetch instructions
(PrefetchCache ...) => (PrefetchT0 ...)
(PrefetchCacheStreamed ...) => (PrefetchNTA ...)

// CPUID feature: BMI1.
(AND(Q|L) x (NOT(Q|L) y)) && buildcfg.GOAMD64 >= 3 => (ANDN(Q|L) x y)
(AND(Q|L) x (NEG(Q|L) x)) && buildcfg.GOAMD64 >= 3 => (BLSI(Q|L) x)
(XOR(Q|L) x (ADD(Q|L)const [-1] x)) && buildcfg.GOAMD64 >= 3 => (BLSMSK(Q|L) x)
(AND(Q|L) x (ADD(Q|L)const [-1] x)) && buildcfg.GOAMD64 >= 3 => (BLSR(Q|L) x)
10 changes: 10 additions & 0 deletions src/cmd/compile/internal/ssa/gen/AMD64Ops.go
Original file line number Diff line number Diff line change
Expand Up @@ -908,6 +908,16 @@ func init() {
// Do prefetch arg0 address. arg0=addr, arg1=memory. Instruction variant selects locality hint
{name: "PrefetchT0", argLength: 2, reg: prefreg, asm: "PREFETCHT0", hasSideEffects: true},
{name: "PrefetchNTA", argLength: 2, reg: prefreg, asm: "PREFETCHNTA", hasSideEffects: true},

// CPUID feature: BMI1.
{name: "ANDNQ", argLength: 2, reg: gp21, asm: "ANDNQ", clobberFlags: true}, // arg0 &^ arg1
{name: "ANDNL", argLength: 2, reg: gp21, asm: "ANDNL", clobberFlags: true}, // arg0 &^ arg1
{name: "BLSIQ", argLength: 1, reg: gp11, asm: "BLSIQ", clobberFlags: true}, // arg0 & -arg0
{name: "BLSIL", argLength: 1, reg: gp11, asm: "BLSIL", clobberFlags: true}, // arg0 & -arg0
{name: "BLSMSKQ", argLength: 1, reg: gp11, asm: "BLSMSKQ", clobberFlags: true}, // arg0 ^ (arg0 - 1)
{name: "BLSMSKL", argLength: 1, reg: gp11, asm: "BLSMSKL", clobberFlags: true}, // arg0 ^ (arg0 - 1)
{name: "BLSRQ", argLength: 1, reg: gp11, asm: "BLSRQ", clobberFlags: true}, // arg0 & (arg0 - 1)
{name: "BLSRL", argLength: 1, reg: gp11, asm: "BLSRL", clobberFlags: true}, // arg0 & (arg0 - 1)
}

var AMD64blocks = []blockData{
Expand Down
122 changes: 122 additions & 0 deletions src/cmd/compile/internal/ssa/opGen.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 04572fa

Please sign in to comment.