Skip to content

Commit

Permalink
cmd/compile: split 3 operand LEA in late lower pass
Browse files Browse the repository at this point in the history
On newer amd64 cpus 3 operand LEA instructions are slow, CL 114655 split
them to 2 LEA instructions in genssa.

This CL make late lower pass run after addressing modes, and split 3
operand LEA in late lower pass so that we can do common-subexpression
elimination for splited LEAs.

Updates #21735

Change-Id: Ied49139c7abab655e1a14a6fd793bdf9f987d1f1
Reviewed-on: https://go-review.googlesource.com/c/go/+/440035
TryBot-Result: Gopher Robot <[email protected]>
Run-TryBot: Wayne Zuo <[email protected]>
Reviewed-by: Keith Randall <[email protected]>
Reviewed-by: Keith Randall <[email protected]>
Reviewed-by: Joedian Reid <[email protected]>
  • Loading branch information
wdvxdr1123 authored and randall77 committed Oct 17, 2022
1 parent 7ffc1e4 commit 1c783f7
Show file tree
Hide file tree
Showing 4 changed files with 419 additions and 1 deletion.
11 changes: 11 additions & 0 deletions src/cmd/compile/internal/ssa/_gen/AMD64latelower.rules
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
// Copyright 2022 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

// split 3 operand LEA.
// Note: Don't split pointer computations in order to avoid invalid pointers.
(LEA(Q|L|W)1 <t> [c] {s} x y) && isPtr(x.Type) && c != 0 && s == nil => (ADD(Q|L|L) x (ADD(Q|L|L)const <y.Type> [c] y))
(LEA(Q|L|W)1 <t> [c] {s} x y) && !isPtr(x.Type) && c != 0 && s == nil => (ADD(Q|L|L) y (ADD(Q|L|L)const <x.Type> [c] x))
(LEA(Q|L|W)2 <t> [c] {s} x y) && !isPtr(t) && c != 0 && s == nil => (ADD(Q|L|L)const [c] (LEA(Q|L|W)2 <x.Type> x y))
(LEA(Q|L|W)4 <t> [c] {s} x y) && !isPtr(t) && c != 0 && s == nil => (ADD(Q|L|L)const [c] (LEA(Q|L|W)4 <x.Type> x y))
(LEA(Q|L|W)8 <t> [c] {s} x y) && !isPtr(t) && c != 0 && s == nil => (ADD(Q|L|L)const [c] (LEA(Q|L|W)8 <x.Type> x y))
2 changes: 1 addition & 1 deletion src/cmd/compile/internal/ssa/compile.go
Original file line number Diff line number Diff line change
Expand Up @@ -486,8 +486,8 @@ var passes = [...]pass{
{name: "insert resched checks", fn: insertLoopReschedChecks,
disabled: !buildcfg.Experiment.PreemptibleLoops}, // insert resched checks in loops.
{name: "lower", fn: lower, required: true},
{name: "late lower", fn: lateLower, required: true},
{name: "addressing modes", fn: addressingModes, required: false},
{name: "late lower", fn: lateLower, required: true},
{name: "lowered deadcode for cse", fn: deadcode}, // deadcode immediately before CSE avoids CSE making dead values live again
{name: "lowered cse", fn: cse},
{name: "elim unread autos", fn: elimUnreadAutos},
Expand Down
1 change: 1 addition & 0 deletions src/cmd/compile/internal/ssa/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,7 @@ func NewConfig(arch string, types Types, ctxt *obj.Link, optimize, softfloat boo
c.RegSize = 8
c.lowerBlock = rewriteBlockAMD64
c.lowerValue = rewriteValueAMD64
c.lateLowerValue = rewriteValueAMD64latelower
c.splitLoad = rewriteValueAMD64splitload
c.registers = registersAMD64[:]
c.gpRegMask = gpRegMaskAMD64
Expand Down
Loading

0 comments on commit 1c783f7

Please sign in to comment.