Skip to content

Commit

Permalink
cmd/internal/obj/arm64: reclassify 32-bit/64-bit constants
Browse files Browse the repository at this point in the history
Current assembler saves constants in Offset which type is int64,
causing 32-bit constants have a incorrect class. This CL reclassifies
constants when opcodes are 32-bit variant, like MOVW, ANDW and
ADDW, etc. Besides, this CL encodes some constants of ADDCON class
as MOVs instructions.

This CL changes the assembler behavior as follows.

1. go assembler ADDW $MOVCON, Rn, Rd
   previous version: MOVD $MOVCON, Rtmp; ADDW Rtmp, Rn, Rd
   current version: MOVW $MOVCON, Rtmp; ADDW Rtmp, Rn, Rd

2. go assembly MOVW $0xaaaaffff, R1
   previous version: treats $0xaaaaffff as VCON, encodes it as MOVW 0x994, R1 (loads it from pool).
   current version: treats $0xaaaaffff as MOVCON, and encodes it into MOVW instructions.

3. go assembly MOVD $0x210000, R1
   previous version: treats $0x210000 as ADDCON, loads it from pool
   current version: treats $0x210000 as MOVCON, and encodes it into MOVD instructions.

Add the test cases.

1. Binary size before/after.
binary                          size change
pkg/linux_arm64                 -1.534KB
pkg/tool/linux_arm64            -0.718KB
go                              -0.32KB
gofmt                           no change

2. go1 benchmark result.
name                     old time/op    new time/op    delta
BinaryTree17-8              6.26s ± 1%     6.28s ± 1%     ~     (p=0.105 n=10+10)
Fannkuch11-8                5.40s ± 0%     5.39s ± 0%   -0.29%  (p=0.028 n=9+10)
FmtFprintfEmpty-8          94.5ns ± 0%    95.0ns ± 0%   +0.51%  (p=0.000 n=10+9)
FmtFprintfString-8          163ns ± 1%     159ns ± 1%   -2.06%  (p=0.000 n=10+9)
FmtFprintfInt-8             200ns ± 1%     196ns ± 1%   -1.99%  (p=0.000 n=9+10)
FmtFprintfIntInt-8          292ns ± 3%     284ns ± 1%   -2.87%  (p=0.001 n=10+9)
FmtFprintfPrefixedInt-8     422ns ± 1%     420ns ± 1%   -0.59%  (p=0.015 n=10+10)
FmtFprintfFloat-8           458ns ± 0%     463ns ± 1%   +1.19%  (p=0.000 n=9+10)
FmtManyArgs-8              1.37µs ± 1%    1.35µs ± 1%   -1.85%  (p=0.000 n=10+10)
GobDecode-8                15.5ms ± 1%    15.3ms ± 1%   -1.82%  (p=0.000 n=10+10)
GobEncode-8                11.7ms ± 5%    11.7ms ± 2%     ~     (p=0.549 n=10+9)
Gzip-8                      622ms ± 0%     624ms ± 0%   +0.23%  (p=0.000 n=10+9)
Gunzip-8                   73.6ms ± 0%    73.8ms ± 1%     ~     (p=0.077 n=9+9)
HTTPClientServer-8          115µs ± 1%     115µs ± 1%     ~     (p=0.796 n=10+10)
JSONEncode-8               31.1ms ± 2%    28.7ms ± 1%   -7.98%  (p=0.000 n=10+9)
JSONDecode-8                145ms ± 0%     145ms ± 1%     ~     (p=0.447 n=9+10)
Mandelbrot200-8            9.67ms ± 0%    9.60ms ± 0%   -0.76%  (p=0.000 n=9+9)
GoParse-8                  7.56ms ± 1%    7.58ms ± 0%   +0.21%  (p=0.035 n=10+9)
RegexpMatchEasy0_32-8       208ns ±10%     222ns ± 0%     ~     (p=0.531 n=10+6)
RegexpMatchEasy0_1K-8       699ns ± 4%     694ns ± 4%     ~     (p=0.868 n=10+10)
RegexpMatchEasy1_32-8       186ns ± 8%     190ns ±12%     ~     (p=0.955 n=10+10)
RegexpMatchEasy1_1K-8      1.13µs ± 1%    1.05µs ± 2%   -6.64%  (p=0.000 n=10+10)
RegexpMatchMedium_32-8      316ns ± 7%     288ns ± 1%   -8.68%  (p=0.000 n=10+7)
RegexpMatchMedium_1K-8     90.2µs ± 0%    85.5µs ± 2%   -5.19%  (p=0.000 n=10+10)
RegexpMatchHard_32-8       5.53µs ± 0%    3.90µs ± 0%  -29.52%  (p=0.000 n=10+10)
RegexpMatchHard_1K-8        119µs ± 0%     124µs ± 0%   +4.29%  (p=0.000 n=9+10)
Revcomp-8                   1.07s ± 0%     1.07s ± 0%     ~     (p=0.094 n=9+9)
Template-8                  162ms ± 1%     160ms ± 2%     ~     (p=0.089 n=10+10)
TimeParse-8                 756ns ± 2%     763ns ± 1%     ~     (p=0.158 n=10+10)
TimeFormat-8                758ns ± 1%     746ns ± 1%   -1.52%  (p=0.000 n=10+10)

name                     old speed      new speed      delta
GobDecode-8              49.4MB/s ± 1%  50.3MB/s ± 1%   +1.84%  (p=0.000 n=10+10)
GobEncode-8              65.6MB/s ± 5%  65.4MB/s ± 2%     ~     (p=0.549 n=10+9)
Gzip-8                   31.2MB/s ± 0%  31.1MB/s ± 0%   -0.24%  (p=0.000 n=9+9)
Gunzip-8                  264MB/s ± 0%   263MB/s ± 1%     ~     (p=0.073 n=9+9)
JSONEncode-8             62.3MB/s ± 2%  67.7MB/s ± 1%   +8.67%  (p=0.000 n=10+9)
JSONDecode-8             13.4MB/s ± 0%  13.4MB/s ± 1%     ~     (p=0.508 n=9+10)
GoParse-8                7.66MB/s ± 1%  7.64MB/s ± 0%   -0.23%  (p=0.049 n=10+9)
RegexpMatchEasy0_32-8     154MB/s ± 9%   143MB/s ± 3%     ~     (p=0.303 n=10+7)
RegexpMatchEasy0_1K-8    1.46GB/s ± 4%  1.47GB/s ± 4%     ~     (p=0.912 n=10+10)
RegexpMatchEasy1_32-8     172MB/s ± 9%   170MB/s ±12%     ~     (p=0.971 n=10+10)
RegexpMatchEasy1_1K-8     908MB/s ± 1%   972MB/s ± 2%   +7.12%  (p=0.000 n=10+10)
RegexpMatchMedium_32-8   3.17MB/s ± 7%  3.46MB/s ± 1%   +9.14%  (p=0.000 n=10+7)
RegexpMatchMedium_1K-8   11.3MB/s ± 0%  12.0MB/s ± 2%   +5.51%  (p=0.000 n=10+10)
RegexpMatchHard_32-8     5.78MB/s ± 0%  8.21MB/s ± 0%  +41.93%  (p=0.000 n=9+10)
RegexpMatchHard_1K-8     8.62MB/s ± 0%  8.27MB/s ± 0%   -4.11%  (p=0.000 n=9+10)
Revcomp-8                 237MB/s ± 0%   237MB/s ± 0%     ~     (p=0.081 n=9+9)
Template-8               12.0MB/s ± 1%  12.1MB/s ± 2%     ~     (p=0.072 n=10+10)

Change-Id: I080801f520366b42d5f9699954bd33106976a81b
Reviewed-on: https://go-review.googlesource.com/c/120661
Run-TryBot: Ben Shi <[email protected]>
TryBot-Result: Gobot Gobot <[email protected]>
Reviewed-by: Cherry Zhang <[email protected]>
  • Loading branch information
zhangfannie authored and cherrymui committed Oct 22, 2018
1 parent 1e8ecef commit 86ce1cb
Show file tree
Hide file tree
Showing 4 changed files with 169 additions and 32 deletions.
30 changes: 29 additions & 1 deletion src/cmd/asm/internal/asm/testdata/arm64.s
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,11 @@ TEXT foo(SB), DUPOK|NOSPLIT, $-8
FMOVD F4, (R2)(R6) // FMOVD F4, (R2)(R6*1) // 446826fc
FMOVD F4, (R2)(R6<<3) // 447826fc

CMPW $40960, R0 // 1f284071
CMPW $27745, R2 // 3b8c8d525f001b6b
CMNW $0x3fffffc0, R2 // CMNW $1073741760, R2 // fb5f1a325f001b2b
CMPW $0xffff0, R1 // CMPW $1048560, R1 // fb3f1c323f001b6b
ADD $0x3fffffffc000, R5 // ADD $70368744161280, R5 // fb7f72b2a5001b8b
// LTYPE1 imsr ',' spreg ','
// {
// outcode($1, &$2, $4, &nullgen);
Expand Down Expand Up @@ -226,13 +231,36 @@ TEXT foo(SB), DUPOK|NOSPLIT, $-8
ANDS $0x22220000, R3, R4 // ANDS $572653568, R3, R4 // 5b44a4d264001bea
BICS $0x22220000, R3, R4 // BICS $572653568, R3, R4 // 5b44a4d264003bea

EOR $0xe03fffffffffffff, R20, R22 // EOR $-2287828610704211969, R20, R22 // 96e243d2
TSTW $0x600000006, R1 // TSTW $25769803782, R1 // 3f041f72
ANDS $0xffff, R2 // ANDS $65535, R2 // 423c40f2
AND $0x7fffffff, R3 // AND $2147483647, R3 // 63784092
ANDS $0x0ffffffff80000000, R2 // ANDS $-2147483648, R2 // 428061f2
AND $0xfffff, R2 // AND $1048575, R2 // 424c4092
ANDW $0xf00fffff, R1 // ANDW $4027580415, R1 // 215c0412
ANDSW $0xff00ffff, R1 // ANDSW $4278255615, R1 // 215c0872
TSTW $0xff00ff, R1 // TSTW $16711935, R1 // 3f9c0072

AND $8, R0, RSP // 1f007d92
ORR $8, R0, RSP // 1f007db2
EOR $8, R0, RSP // 1f007dd2
BIC $8, R0, RSP // 1ff87c92
ORN $8, R0, RSP // 1ff87cb2
EON $8, R0, RSP // 1ff87cd2

MOVD $0x3fffffffc000, R0 // MOVD $70368744161280, R0 // e07f72b2
MOVW $0xaaaa0000, R1 // MOVW $2863267840, R1 // 4155b552
MOVW $0xaaaaffff, R1 // MOVW $2863333375, R1 // a1aaaa12
MOVW $0xaaaa, R1 // MOVW $43690, R1 // 41559552
MOVW $0xffffaaaa, R1 // MOVW $4294945450, R1 // a1aa8a12
MOVW $0xffff0000, R1 // MOVW $4294901760, R1 // e1ffbf52
MOVD $0xffff00000000000, R1 // MOVD $1152903912420802560, R1 // e13f54b2
MOVD $0x11110000, R1 // MOVD $286326784, R1 // 2122a2d2
MOVD $0, R1 // 010080d2
MOVD $-1, R1 // 01008092
MOVD $0x210000, R0 // MOVD $2162688, R0 // 2004a0d2
MOVD $0xffffffffffffaaaa, R1 // MOVD $-21846, R1 // a1aa8a92

//
// CLS
//
Expand Down Expand Up @@ -428,7 +456,7 @@ TEXT foo(SB), DUPOK|NOSPLIT, $-8
CMP R22.SXTX, RSP // ffe336eb

CMP $0x22220000, RSP // CMP $572653568, RSP // 5b44a4d2ff633beb
CMPW $0x22220000, RSP // CMPW $572653568, RSP // 5b44a4d2ff633b6b
CMPW $0x22220000, RSP // CMPW $572653568, RSP // 5b44a452ff633b6b

// TST
TST $15, R2 // 5f0c40f2
Expand Down
1 change: 1 addition & 0 deletions src/cmd/internal/obj/arm64/a.out.go
Original file line number Diff line number Diff line change
Expand Up @@ -407,6 +407,7 @@ const (
C_ABCON0 // could be C_ADDCON0 or C_BITCON
C_ADDCON0 // 12-bit unsigned, unshifted
C_ABCON // could be C_ADDCON or C_BITCON
C_AMCON // could be C_ADDCON or C_MOVCON
C_ADDCON // 12-bit unsigned, shifted left by 0 or 12
C_MBCON // could be C_MOVCON or C_BITCON
C_MOVCON // generated by a 16-bit constant, optionally inverted and/or shifted by multiple of 16
Expand Down
1 change: 1 addition & 0 deletions src/cmd/internal/obj/arm64/anames7.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ var cnames7 = []string{
"ABCON0",
"ADDCON0",
"ABCON",
"AMCON",
"ADDCON",
"MBCON",
"MOVCON",
Expand Down
169 changes: 138 additions & 31 deletions src/cmd/internal/obj/arm64/asm7.go
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,10 @@ func OPBIT(x uint32) uint32 {
return 1<<30 | 0<<29 | 0xD6<<21 | 0<<16 | x<<10
}

func MOVCONST(d int64, s int, rt int) uint32 {
return uint32(((d>>uint(s*16))&0xFFFF)<<5) | uint32(s)&3<<21 | uint32(rt&31)
}

const (
LFROM = 1 << 0
LTO = 1 << 1
Expand Down Expand Up @@ -272,12 +276,10 @@ var optab = []Optab{
/* MOVs that become MOVK/MOVN/MOVZ/ADD/SUB/OR */
{AMOVW, C_MOVCON, C_NONE, C_NONE, C_REG, 32, 4, 0, 0, 0},
{AMOVD, C_MOVCON, C_NONE, C_NONE, C_REG, 32, 4, 0, 0, 0},

// TODO: these don't work properly.
// { AMOVW, C_ADDCON, C_NONE, C_REG, 2, 4, 0 , 0},
// { AMOVD, C_ADDCON, C_NONE, C_REG, 2, 4, 0 , 0},
{AMOVW, C_BITCON, C_NONE, C_NONE, C_REG, 32, 4, 0, 0, 0},
{AMOVD, C_BITCON, C_NONE, C_NONE, C_REG, 32, 4, 0, 0, 0},
{AMOVW, C_LCON, C_NONE, C_NONE, C_REG, 12, 4, 0, LFROM, 0},
{AMOVD, C_VCON, C_NONE, C_NONE, C_REG, 12, 4, 0, LFROM, 0},

{AMOVK, C_VCON, C_NONE, C_NONE, C_REG, 33, 4, 0, 0, 0},
{AMOVD, C_AACON, C_NONE, C_NONE, C_REG, 4, 4, REGFROM, 0, 0},
Expand Down Expand Up @@ -318,9 +320,7 @@ var optab = []Optab{
{AWORD, C_NONE, C_NONE, C_NONE, C_LCON, 14, 4, 0, 0, 0},
{AWORD, C_NONE, C_NONE, C_NONE, C_LEXT, 14, 4, 0, 0, 0},
{AWORD, C_NONE, C_NONE, C_NONE, C_ADDR, 14, 4, 0, 0, 0},
{AMOVW, C_VCON, C_NONE, C_NONE, C_REG, 12, 4, 0, LFROM, 0},
{AMOVW, C_VCONADDR, C_NONE, C_NONE, C_REG, 68, 8, 0, 0, 0},
{AMOVD, C_VCON, C_NONE, C_NONE, C_REG, 12, 4, 0, LFROM, 0},
{AMOVD, C_VCONADDR, C_NONE, C_NONE, C_REG, 68, 8, 0, 0, 0},
{AMOVB, C_REG, C_NONE, C_NONE, C_ADDR, 64, 12, 0, 0, 0},
{AMOVBU, C_REG, C_NONE, C_NONE, C_ADDR, 64, 12, 0, 0, 0},
Expand Down Expand Up @@ -1105,6 +1105,23 @@ func isSTXPop(op obj.As) bool {
return false
}

func isANDWop(op obj.As) bool {
switch op {
case AANDW, AORRW, AEORW, AANDSW, ATSTW,
ABICW, AEONW, AORNW, ABICSW:
return true
}
return false
}

func isADDWop(op obj.As) bool {
switch op {
case AADDW, AADDSW, ASUBW, ASUBSW, ACMNW, ACMPW:
return true
}
return false
}

func isRegShiftOrExt(a *obj.Addr) bool {
return (a.Index-obj.RBaseARM64)&REG_EXT != 0 || (a.Index-obj.RBaseARM64)&REG_LSL != 0
}
Expand Down Expand Up @@ -1411,6 +1428,52 @@ func rclass(r int16) int {
return C_GOK
}

// con32class reclassifies the constant of 32-bit instruction. Becuase the constant type is 32-bit,
// but saved in Offset which type is int64, con32class treats it as uint32 type and reclassifies it.
func (c *ctxt7) con32class(a *obj.Addr) int {
v := uint32(a.Offset)
if v == 0 {
return C_ZCON
}
if isaddcon(int64(v)) {
if v <= 0xFFF {
if isbitcon(uint64(v)) {
return C_ABCON0
}
return C_ADDCON0
}
if isbitcon(uint64(v)) {
return C_ABCON
}
return C_ADDCON
}

t := movcon(int64(v))
if t >= 0 {
if isbitcon(uint64(v)) {
return C_MBCON
}
return C_MOVCON
}

t = movcon(int64(^v))
if t >= 0 {
if isbitcon(uint64(v)) {
return C_MBCON
}
return C_MOVCON
}

if isbitcon(uint64(v)) {
return C_BITCON
}

if 0 <= v && v <= 0xffffff {
return C_ADDCON2
}
return C_LCON
}

func (c *ctxt7) aclass(a *obj.Addr) int {
switch a.Type {
case obj.TYPE_NONE:
Expand Down Expand Up @@ -1517,6 +1580,12 @@ func (c *ctxt7) aclass(a *obj.Addr) int {
if isbitcon(uint64(v)) {
return C_ABCON
}
if movcon(v) >= 0 {
return C_AMCON
}
if movcon(^v) >= 0 {
return C_AMCON
}
return C_ADDCON
}

Expand Down Expand Up @@ -1609,6 +1678,34 @@ func (c *ctxt7) oplook(p *obj.Prog) *Optab {
}
a1 = a0 + 1
p.From.Class = int8(a1)
// more specific classification of 32-bit integers
if p.From.Type == obj.TYPE_CONST && p.From.Name == obj.NAME_NONE {
if p.As == AMOVW || isADDWop(p.As) {
ra0 := c.con32class(&p.From)
// do not break C_ADDCON2 when S bit is set
if (p.As == AADDSW || p.As == ASUBSW) && ra0 == C_ADDCON2 {
ra0 = C_LCON
}
a1 = ra0 + 1
p.From.Class = int8(a1)
}
if isANDWop(p.As) {
switch p.As {
case AANDW, AORRW, AEORW, AANDSW, ATSTW:
// For 32-bit logical instruction with constant,
// rewrite the high 32-bit to be a copy of the low
// 32-bit, so that the BITCON test can be shared
// for both 32-bit and 64-bit.
if a0 == C_BITCON {
break
}
fallthrough
default:
a1 = c.con32class(&p.From) + 1
p.From.Class = int8(a1)
}
}
}
}

a1--
Expand Down Expand Up @@ -1679,7 +1776,7 @@ func cmp(a int, b int) bool {
}

case C_ADDCON:
if b == C_ZCON || b == C_ABCON0 || b == C_ADDCON0 || b == C_ABCON {
if b == C_ZCON || b == C_ABCON0 || b == C_ADDCON0 || b == C_ABCON || b == C_AMCON {
return true
}

Expand All @@ -1689,7 +1786,7 @@ func cmp(a int, b int) bool {
}

case C_MOVCON:
if b == C_MBCON || b == C_ZCON || b == C_ADDCON0 {
if b == C_MBCON || b == C_ZCON || b == C_ADDCON0 || b == C_AMCON {
return true
}

Expand All @@ -1699,7 +1796,7 @@ func cmp(a int, b int) bool {
}

case C_LCON:
if b == C_ZCON || b == C_BITCON || b == C_ADDCON || b == C_ADDCON0 || b == C_ABCON || b == C_ABCON0 || b == C_MBCON || b == C_MOVCON || b == C_ADDCON2 {
if b == C_ZCON || b == C_BITCON || b == C_ADDCON || b == C_ADDCON0 || b == C_ABCON || b == C_ABCON0 || b == C_MBCON || b == C_MOVCON || b == C_ADDCON2 || b == C_AMCON {
return true
}

Expand Down Expand Up @@ -3501,8 +3598,8 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) {
if r == 0 {
r = rt
}
o1 = c.oaddi(p, int32(op), int32(c.regoff(&p.From)) & 0x000fff, r, rt)
o2 = c.oaddi(p, int32(op), int32(c.regoff(&p.From)) & 0xfff000, rt, rt)
o1 = c.oaddi(p, int32(op), int32(c.regoff(&p.From))&0x000fff, r, rt)
o2 = c.oaddi(p, int32(op), int32(c.regoff(&p.From))&0xfff000, rt, rt)

case 50: /* sys/sysl */
o1 = c.opirr(p, p.As)
Expand Down Expand Up @@ -3670,7 +3767,11 @@ func (c *ctxt7) asmout(p *obj.Prog, o *Optab, out []uint32) {
if p.Reg == REGTMP {
c.ctxt.Diag("cannot use REGTMP as source: %v\n", p)
}
o1 = c.omovconst(AMOVD, p, &p.From, REGTMP)
if isADDWop(p.As) || isANDWop(p.As) {
o1 = c.omovconst(AMOVW, p, &p.From, REGTMP)
} else {
o1 = c.omovconst(AMOVD, p, &p.From, REGTMP)
}

rt := int(p.To.Reg)
if p.To.Type == obj.TYPE_NONE {
Expand Down Expand Up @@ -6195,31 +6296,37 @@ func (c *ctxt7) omovconst(as obj.As, p *obj.Prog, a *obj.Addr, rt int) (o1 uint3
return o1
}

r := 32
if as == AMOVD {
r = 64
if as == AMOVW {
d := uint32(a.Offset)
s := movcon(int64(d))
if s < 0 || 16*s >= 32 {
d = ^d
s = movcon(int64(d))
if s < 0 || 16*s >= 32 {
c.ctxt.Diag("impossible 32-bit move wide: %#x\n%v", uint32(a.Offset), p)
}
o1 = c.opirr(p, AMOVNW)
} else {
o1 = c.opirr(p, AMOVZW)
}
o1 |= MOVCONST(int64(d), s, rt)
}
d := a.Offset
s := movcon(d)
if s < 0 || s >= r {
d = ^d
s = movcon(d)
if s < 0 || s >= r {
c.ctxt.Diag("impossible move wide: %#x\n%v", uint64(a.Offset), p)
}
if as == AMOVD {
if as == AMOVD {
d := a.Offset
s := movcon(d)
if s < 0 || 16*s >= 64 {
d = ^d
s = movcon(d)
if s < 0 || 16*s >= 64 {
c.ctxt.Diag("impossible 64-bit move wide: %#x\n%v", uint64(a.Offset), p)
}
o1 = c.opirr(p, AMOVN)
} else {
o1 = c.opirr(p, AMOVNW)
}
} else {
if as == AMOVD {
o1 = c.opirr(p, AMOVZ)
} else {
o1 = c.opirr(p, AMOVZW)
}
o1 |= MOVCONST(d, s, rt)
}
o1 |= uint32((((d >> uint(s*16)) & 0xFFFF) << 5) | int64((uint32(s)&3)<<21) | int64(rt&31))

return o1
}

Expand Down

0 comments on commit 86ce1cb

Please sign in to comment.