diff --git a/src/cmd/compile/internal/amd64/ssa.go b/src/cmd/compile/internal/amd64/ssa.go index 30131bd5590ee8..68266d35d67a95 100644 --- a/src/cmd/compile/internal/amd64/ssa.go +++ b/src/cmd/compile/internal/amd64/ssa.go @@ -263,6 +263,23 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) { p.To.Reg = lo p.SetFrom3Reg(hi) + case ssa.OpAMD64BLSIQ, ssa.OpAMD64BLSIL, + ssa.OpAMD64BLSMSKQ, ssa.OpAMD64BLSMSKL, + ssa.OpAMD64BLSRQ, ssa.OpAMD64BLSRL: + p := s.Prog(v.Op.Asm()) + p.From.Type = obj.TYPE_REG + p.From.Reg = v.Args[0].Reg() + p.To.Type = obj.TYPE_REG + p.To.Reg = v.Reg() + + case ssa.OpAMD64ANDNQ, ssa.OpAMD64ANDNL: + p := s.Prog(v.Op.Asm()) + p.From.Type = obj.TYPE_REG + p.From.Reg = v.Args[0].Reg() + p.To.Type = obj.TYPE_REG + p.To.Reg = v.Reg() + p.SetFrom3Reg(v.Args[1].Reg()) + case ssa.OpAMD64DIVQU, ssa.OpAMD64DIVLU, ssa.OpAMD64DIVWU: // Arg[0] (the dividend) is in AX. // Arg[1] (the divisor) can be in any other register. diff --git a/src/cmd/compile/internal/ssa/gen/AMD64.rules b/src/cmd/compile/internal/ssa/gen/AMD64.rules index bfed3bc7fdaa03..edb1a4869a34c5 100644 --- a/src/cmd/compile/internal/ssa/gen/AMD64.rules +++ b/src/cmd/compile/internal/ssa/gen/AMD64.rules @@ -639,6 +639,7 @@ // Recognize bit clearing: a &^= 1< (BTR(Q|L) x y) +(ANDN(Q|L) x (SHL(Q|L) (MOV(Q|L)const [1]) y)) => (BTR(Q|L) x y) (ANDQconst [c] x) && isUint64PowerOfTwo(int64(^c)) && uint64(^c) >= 128 => (BTRQconst [int8(log32(^c))] x) (ANDLconst [c] x) && isUint32PowerOfTwo(int64(^c)) && uint64(^c) >= 128 @@ -2204,3 +2205,9 @@ // Prefetch instructions (PrefetchCache ...) => (PrefetchT0 ...) (PrefetchCacheStreamed ...) => (PrefetchNTA ...) + +// CPUID feature: BMI1. +(AND(Q|L) x (NOT(Q|L) y)) && buildcfg.GOAMD64 >= 3 => (ANDN(Q|L) x y) +(AND(Q|L) x (NEG(Q|L) x)) && buildcfg.GOAMD64 >= 3 => (BLSI(Q|L) x) +(XOR(Q|L) x (ADD(Q|L)const [-1] x)) && buildcfg.GOAMD64 >= 3 => (BLSMSK(Q|L) x) +(AND(Q|L) x (ADD(Q|L)const [-1] x)) && buildcfg.GOAMD64 >= 3 => (BLSR(Q|L) x) diff --git a/src/cmd/compile/internal/ssa/gen/AMD64Ops.go b/src/cmd/compile/internal/ssa/gen/AMD64Ops.go index 51cbf5f78a215e..6e4c514bd02b84 100644 --- a/src/cmd/compile/internal/ssa/gen/AMD64Ops.go +++ b/src/cmd/compile/internal/ssa/gen/AMD64Ops.go @@ -908,6 +908,16 @@ func init() { // Do prefetch arg0 address. arg0=addr, arg1=memory. Instruction variant selects locality hint {name: "PrefetchT0", argLength: 2, reg: prefreg, asm: "PREFETCHT0", hasSideEffects: true}, {name: "PrefetchNTA", argLength: 2, reg: prefreg, asm: "PREFETCHNTA", hasSideEffects: true}, + + // CPUID feature: BMI1. + {name: "ANDNQ", argLength: 2, reg: gp21, asm: "ANDNQ", clobberFlags: true}, // arg0 &^ arg1 + {name: "ANDNL", argLength: 2, reg: gp21, asm: "ANDNL", clobberFlags: true}, // arg0 &^ arg1 + {name: "BLSIQ", argLength: 1, reg: gp11, asm: "BLSIQ", clobberFlags: true}, // arg0 & -arg0 + {name: "BLSIL", argLength: 1, reg: gp11, asm: "BLSIL", clobberFlags: true}, // arg0 & -arg0 + {name: "BLSMSKQ", argLength: 1, reg: gp11, asm: "BLSMSKQ", clobberFlags: true}, // arg0 ^ (arg0 - 1) + {name: "BLSMSKL", argLength: 1, reg: gp11, asm: "BLSMSKL", clobberFlags: true}, // arg0 ^ (arg0 - 1) + {name: "BLSRQ", argLength: 1, reg: gp11, asm: "BLSRQ", clobberFlags: true}, // arg0 & (arg0 - 1) + {name: "BLSRL", argLength: 1, reg: gp11, asm: "BLSRL", clobberFlags: true}, // arg0 & (arg0 - 1) } var AMD64blocks = []blockData{ diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go index ceb0a24285e85a..fed3bc338644d4 100644 --- a/src/cmd/compile/internal/ssa/opGen.go +++ b/src/cmd/compile/internal/ssa/opGen.go @@ -1033,6 +1033,14 @@ const ( OpAMD64ORLlock OpAMD64PrefetchT0 OpAMD64PrefetchNTA + OpAMD64ANDNQ + OpAMD64ANDNL + OpAMD64BLSIQ + OpAMD64BLSIL + OpAMD64BLSMSKQ + OpAMD64BLSMSKL + OpAMD64BLSRQ + OpAMD64BLSRL OpARMADD OpARMADDconst @@ -13628,6 +13636,120 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "ANDNQ", + argLen: 2, + clobberFlags: true, + asm: x86.AANDNQ, + reg: regInfo{ + inputs: []inputInfo{ + {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15 + {1, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15 + }, + outputs: []outputInfo{ + {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15 + }, + }, + }, + { + name: "ANDNL", + argLen: 2, + clobberFlags: true, + asm: x86.AANDNL, + reg: regInfo{ + inputs: []inputInfo{ + {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15 + {1, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15 + }, + outputs: []outputInfo{ + {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15 + }, + }, + }, + { + name: "BLSIQ", + argLen: 1, + clobberFlags: true, + asm: x86.ABLSIQ, + reg: regInfo{ + inputs: []inputInfo{ + {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15 + }, + outputs: []outputInfo{ + {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15 + }, + }, + }, + { + name: "BLSIL", + argLen: 1, + clobberFlags: true, + asm: x86.ABLSIL, + reg: regInfo{ + inputs: []inputInfo{ + {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15 + }, + outputs: []outputInfo{ + {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15 + }, + }, + }, + { + name: "BLSMSKQ", + argLen: 1, + clobberFlags: true, + asm: x86.ABLSMSKQ, + reg: regInfo{ + inputs: []inputInfo{ + {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15 + }, + outputs: []outputInfo{ + {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15 + }, + }, + }, + { + name: "BLSMSKL", + argLen: 1, + clobberFlags: true, + asm: x86.ABLSMSKL, + reg: regInfo{ + inputs: []inputInfo{ + {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15 + }, + outputs: []outputInfo{ + {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15 + }, + }, + }, + { + name: "BLSRQ", + argLen: 1, + clobberFlags: true, + asm: x86.ABLSRQ, + reg: regInfo{ + inputs: []inputInfo{ + {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15 + }, + outputs: []outputInfo{ + {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15 + }, + }, + }, + { + name: "BLSRL", + argLen: 1, + clobberFlags: true, + asm: x86.ABLSRL, + reg: regInfo{ + inputs: []inputInfo{ + {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15 + }, + outputs: []outputInfo{ + {0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15 + }, + }, + }, { name: "ADD", diff --git a/src/cmd/compile/internal/ssa/rewriteAMD64.go b/src/cmd/compile/internal/ssa/rewriteAMD64.go index e20161c9209237..906260fb141291 100644 --- a/src/cmd/compile/internal/ssa/rewriteAMD64.go +++ b/src/cmd/compile/internal/ssa/rewriteAMD64.go @@ -3,6 +3,7 @@ package ssa +import "internal/buildcfg" import "math" import "cmd/internal/obj" import "cmd/compile/internal/types" @@ -53,6 +54,10 @@ func rewriteValueAMD64(v *Value) bool { return rewriteValueAMD64_OpAMD64ANDLload(v) case OpAMD64ANDLmodify: return rewriteValueAMD64_OpAMD64ANDLmodify(v) + case OpAMD64ANDNL: + return rewriteValueAMD64_OpAMD64ANDNL(v) + case OpAMD64ANDNQ: + return rewriteValueAMD64_OpAMD64ANDNQ(v) case OpAMD64ANDQ: return rewriteValueAMD64_OpAMD64ANDQ(v) case OpAMD64ANDQconst: @@ -2759,6 +2764,55 @@ func rewriteValueAMD64_OpAMD64ANDL(v *Value) bool { } break } + // match: (ANDL x (NOTL y)) + // cond: buildcfg.GOAMD64 >= 3 + // result: (ANDNL x y) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + if v_1.Op != OpAMD64NOTL { + continue + } + y := v_1.Args[0] + if !(buildcfg.GOAMD64 >= 3) { + continue + } + v.reset(OpAMD64ANDNL) + v.AddArg2(x, y) + return true + } + break + } + // match: (ANDL x (NEGL x)) + // cond: buildcfg.GOAMD64 >= 3 + // result: (BLSIL x) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + if v_1.Op != OpAMD64NEGL || x != v_1.Args[0] || !(buildcfg.GOAMD64 >= 3) { + continue + } + v.reset(OpAMD64BLSIL) + v.AddArg(x) + return true + } + break + } + // match: (ANDL x (ADDLconst [-1] x)) + // cond: buildcfg.GOAMD64 >= 3 + // result: (BLSRL x) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + if v_1.Op != OpAMD64ADDLconst || auxIntToInt32(v_1.AuxInt) != -1 || x != v_1.Args[0] || !(buildcfg.GOAMD64 >= 3) { + continue + } + v.reset(OpAMD64BLSRL) + v.AddArg(x) + return true + } + break + } return false } func rewriteValueAMD64_OpAMD64ANDLconst(v *Value) bool { @@ -3037,6 +3091,48 @@ func rewriteValueAMD64_OpAMD64ANDLmodify(v *Value) bool { } return false } +func rewriteValueAMD64_OpAMD64ANDNL(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (ANDNL x (SHLL (MOVLconst [1]) y)) + // result: (BTRL x y) + for { + x := v_0 + if v_1.Op != OpAMD64SHLL { + break + } + y := v_1.Args[1] + v_1_0 := v_1.Args[0] + if v_1_0.Op != OpAMD64MOVLconst || auxIntToInt32(v_1_0.AuxInt) != 1 { + break + } + v.reset(OpAMD64BTRL) + v.AddArg2(x, y) + return true + } + return false +} +func rewriteValueAMD64_OpAMD64ANDNQ(v *Value) bool { + v_1 := v.Args[1] + v_0 := v.Args[0] + // match: (ANDNQ x (SHLQ (MOVQconst [1]) y)) + // result: (BTRQ x y) + for { + x := v_0 + if v_1.Op != OpAMD64SHLQ { + break + } + y := v_1.Args[1] + v_1_0 := v_1.Args[0] + if v_1_0.Op != OpAMD64MOVQconst || auxIntToInt64(v_1_0.AuxInt) != 1 { + break + } + v.reset(OpAMD64BTRQ) + v.AddArg2(x, y) + return true + } + return false +} func rewriteValueAMD64_OpAMD64ANDQ(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] @@ -3138,6 +3234,55 @@ func rewriteValueAMD64_OpAMD64ANDQ(v *Value) bool { } break } + // match: (ANDQ x (NOTQ y)) + // cond: buildcfg.GOAMD64 >= 3 + // result: (ANDNQ x y) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + if v_1.Op != OpAMD64NOTQ { + continue + } + y := v_1.Args[0] + if !(buildcfg.GOAMD64 >= 3) { + continue + } + v.reset(OpAMD64ANDNQ) + v.AddArg2(x, y) + return true + } + break + } + // match: (ANDQ x (NEGQ x)) + // cond: buildcfg.GOAMD64 >= 3 + // result: (BLSIQ x) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + if v_1.Op != OpAMD64NEGQ || x != v_1.Args[0] || !(buildcfg.GOAMD64 >= 3) { + continue + } + v.reset(OpAMD64BLSIQ) + v.AddArg(x) + return true + } + break + } + // match: (ANDQ x (ADDQconst [-1] x)) + // cond: buildcfg.GOAMD64 >= 3 + // result: (BLSRQ x) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + if v_1.Op != OpAMD64ADDQconst || auxIntToInt32(v_1.AuxInt) != -1 || x != v_1.Args[0] || !(buildcfg.GOAMD64 >= 3) { + continue + } + v.reset(OpAMD64BLSRQ) + v.AddArg(x) + return true + } + break + } return false } func rewriteValueAMD64_OpAMD64ANDQconst(v *Value) bool { @@ -26474,6 +26619,21 @@ func rewriteValueAMD64_OpAMD64XORL(v *Value) bool { } break } + // match: (XORL x (ADDLconst [-1] x)) + // cond: buildcfg.GOAMD64 >= 3 + // result: (BLSMSKL x) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + if v_1.Op != OpAMD64ADDLconst || auxIntToInt32(v_1.AuxInt) != -1 || x != v_1.Args[0] || !(buildcfg.GOAMD64 >= 3) { + continue + } + v.reset(OpAMD64BLSMSKL) + v.AddArg(x) + return true + } + break + } return false } func rewriteValueAMD64_OpAMD64XORLconst(v *Value) bool { @@ -26950,6 +27110,21 @@ func rewriteValueAMD64_OpAMD64XORQ(v *Value) bool { } break } + // match: (XORQ x (ADDQconst [-1] x)) + // cond: buildcfg.GOAMD64 >= 3 + // result: (BLSMSKQ x) + for { + for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 { + x := v_0 + if v_1.Op != OpAMD64ADDQconst || auxIntToInt32(v_1.AuxInt) != -1 || x != v_1.Args[0] || !(buildcfg.GOAMD64 >= 3) { + continue + } + v.reset(OpAMD64BLSMSKQ) + v.AddArg(x) + return true + } + break + } return false } func rewriteValueAMD64_OpAMD64XORQconst(v *Value) bool { diff --git a/test/codegen/bmi.go b/test/codegen/bmi.go new file mode 100644 index 00000000000000..0c25e0b7968dac --- /dev/null +++ b/test/codegen/bmi.go @@ -0,0 +1,47 @@ +// asmcheck + +// Copyright 2021 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package codegen + +func andn64(x, y int64) int64 { + // amd64/v3:"ANDNQ" + return x &^ y +} + +func andn32(x, y int32) int32 { + // amd64/v3:"ANDNL" + return x &^ y +} + +func blsi64(x int64) int64 { + // amd64/v3:"BLSIQ" + return x & -x +} + +func blsi32(x int32) int32 { + // amd64/v3:"BLSIL" + return x & -x +} + +func blsmsk64(x int64) int64 { + // amd64/v3:"BLSMSKQ" + return x ^ (x - 1) +} + +func blsmsk32(x int32) int32 { + // amd64/v3:"BLSMSKL" + return x ^ (x - 1) +} + +func blsr64(x int64) int64 { + // amd64/v3:"BLSRQ" + return x & (x - 1) +} + +func blsr32(x int32) int32 { + // amd64/v3:"BLSRL" + return x & (x - 1) +}