Skip to content

Commit

Permalink
[Hexagon] Generate absolute-set load/store instructions. (#82034)
Browse files Browse the repository at this point in the history
The optimization finds the loads/stores of a specific form and translate
the first load/store to an absolute-set form there by optimizing out the
transfer and eliminate the constant extenders.
  • Loading branch information
Sumanth Gundapaneni authored Feb 22, 2024
1 parent f204aee commit 4c0fdcd
Show file tree
Hide file tree
Showing 5 changed files with 424 additions and 0 deletions.
1 change: 1 addition & 0 deletions llvm/lib/Target/Hexagon/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ add_llvm_target(HexagonCodeGen
HexagonFrameLowering.cpp
HexagonGenExtract.cpp
HexagonGenInsert.cpp
HexagonGenMemAbsolute.cpp
HexagonGenMux.cpp
HexagonGenPredicate.cpp
HexagonHardwareLoops.cpp
Expand Down
274 changes: 274 additions & 0 deletions llvm/lib/Target/Hexagon/HexagonGenMemAbsolute.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,274 @@
//===--- HexagonGenMemAbsolute.cpp - Generate Load/Store Set Absolute ---===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

// This pass traverses through all the basic blocks in a function and converts
// an indexed load/store with offset "0" to a absolute-set load/store
// instruction as long as the use of the register in the new instruction
// dominates the rest of the uses and there are more than 2 uses.

#include "HexagonTargetMachine.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/TargetInstrInfo.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetMachine.h"

#define DEBUG_TYPE "hexagon-abs"

using namespace llvm;

STATISTIC(HexagonNumLoadAbsConversions,
"Number of Load instructions converted to absolute-set form");
STATISTIC(HexagonNumStoreAbsConversions,
"Number of Store instructions converted to absolute-set form");

namespace llvm {
FunctionPass *createHexagonGenMemAbsolute();
void initializeHexagonGenMemAbsolutePass(PassRegistry &Registry);
} // namespace llvm

namespace {

class HexagonGenMemAbsolute : public MachineFunctionPass {
const HexagonInstrInfo *TII;
MachineRegisterInfo *MRI;
const TargetRegisterInfo *TRI;

public:
static char ID;
HexagonGenMemAbsolute() : MachineFunctionPass(ID), TII(0), MRI(0), TRI(0) {
initializeHexagonGenMemAbsolutePass(*PassRegistry::getPassRegistry());
}

StringRef getPassName() const override {
return "Hexagon Generate Load/Store Set Absolute Address Instruction";
}

void getAnalysisUsage(AnalysisUsage &AU) const override {
MachineFunctionPass::getAnalysisUsage(AU);
AU.addRequired<MachineDominatorTree>();
AU.addPreserved<MachineDominatorTree>();
}

bool runOnMachineFunction(MachineFunction &Fn) override;

private:
static bool isValidIndexedLoad(int &Opcode, int &NewOpcode);
static bool isValidIndexedStore(int &Opcode, int &NewOpcode);
};
} // namespace

char HexagonGenMemAbsolute::ID = 0;

INITIALIZE_PASS(HexagonGenMemAbsolute, "hexagon-gen-load-absolute",
"Hexagon Generate Load/Store Set Absolute Address Instruction",
false, false)

bool HexagonGenMemAbsolute::runOnMachineFunction(MachineFunction &Fn) {
if (skipFunction(Fn.getFunction()))
return false;

TII = Fn.getSubtarget<HexagonSubtarget>().getInstrInfo();
MRI = &Fn.getRegInfo();
TRI = Fn.getRegInfo().getTargetRegisterInfo();

MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>();

// Loop over all of the basic blocks
for (MachineFunction::iterator MBBb = Fn.begin(), MBBe = Fn.end();
MBBb != MBBe; ++MBBb) {
MachineBasicBlock *MBB = &*MBBb;
// Traverse the basic block
for (MachineBasicBlock::iterator MII = MBB->begin(); MII != MBB->end();
++MII) {
MachineInstr *MI = &*MII;
int Opc = MI->getOpcode();
if (Opc != Hexagon::CONST32 && Opc != Hexagon::A2_tfrsi)
continue;

const MachineOperand &MO = MI->getOperand(0);
if (!MO.isReg() || !MO.isDef())
continue;

unsigned DstReg = MO.getReg();
if (MRI->use_nodbg_empty(DstReg))
continue;

typedef MachineRegisterInfo::use_nodbg_iterator use_iterator;
use_iterator NextUseMI = MRI->use_nodbg_begin(DstReg);

MachineInstr *NextMI = NextUseMI->getParent();
int NextOpc = NextMI->getOpcode();
int NewOpc;
bool IsLoad = isValidIndexedLoad(NextOpc, NewOpc);

if (!IsLoad && !isValidIndexedStore(NextOpc, NewOpc))
continue;

// Base and Offset positions for load and store instructions
// Load R(dest), R(base), Imm -> R(dest) = mem(R(base) + Imm)
// Store R(base), Imm, R (src) -> mem(R(base) + Imm) = R(src)
unsigned BaseRegPos, ImmPos, RegPos;
if (!TII->getBaseAndOffsetPosition(*NextMI, BaseRegPos, ImmPos))
continue;
RegPos = IsLoad ? 0 : 2;

bool IsGlobal = MI->getOperand(1).isGlobal();
if (!MI->getOperand(1).isImm() && !IsGlobal)
continue;

const MachineOperand *BaseOp = nullptr;
int64_t Offset;
bool Scalable;
TII->getMemOperandWithOffset(*NextMI, BaseOp, Offset, Scalable, TRI);

// Ensure BaseOp is non-null and register type.
if (!BaseOp || !BaseOp->isReg())
continue;

if (Scalable)
continue;

unsigned BaseReg = BaseOp->getReg();
if ((DstReg != BaseReg) || (Offset != 0))
continue;

const MachineOperand &MO0 = NextMI->getOperand(RegPos);

if (!MO0.isReg())
continue;

unsigned LoadStoreReg = MO0.getReg();

// Store: Bail out if the src and base are same (def and use on same
// register).
if (LoadStoreReg == BaseReg)
continue;

// Insert the absolute-set instruction "I" only if the use of the
// BaseReg in "I" dominates the rest of the uses of BaseReg and if
// there are more than 2 uses of this BaseReg.
bool Dominates = true;
unsigned Counter = 0;
for (use_iterator I = NextUseMI, E = MRI->use_nodbg_end(); I != E; ++I) {
Counter++;
if (!MDT.dominates(NextMI, I->getParent()))
Dominates = false;
}

if ((!Dominates) || (Counter < 3))
continue;

// If we reach here, we have met all the conditions required for the
// replacement of the absolute instruction.
LLVM_DEBUG({
dbgs() << "Found a pair of instructions for absolute-set "
<< (IsLoad ? "load" : "store") << "\n";
dbgs() << *MI;
dbgs() << *NextMI;
});
MachineBasicBlock *ParentBlock = NextMI->getParent();
MachineInstrBuilder MIB;
if (IsLoad) { // Insert absolute-set load instruction
++HexagonNumLoadAbsConversions;
MIB = BuildMI(*ParentBlock, NextMI, NextMI->getDebugLoc(),
TII->get(NewOpc), LoadStoreReg)
.addReg(DstReg, RegState::Define);
} else { // Insert absolute-set store instruction
++HexagonNumStoreAbsConversions;
MIB = BuildMI(*ParentBlock, NextMI, NextMI->getDebugLoc(),
TII->get(NewOpc), DstReg);
}

MachineOperand ImmOperand = MI->getOperand(1);
if (IsGlobal)
MIB.addGlobalAddress(ImmOperand.getGlobal(), ImmOperand.getOffset(),
ImmOperand.getTargetFlags());
else
MIB.addImm(ImmOperand.getImm());

if (IsLoad)
MIB->getOperand(0).setSubReg(MO0.getSubReg());
else
MIB.addReg(LoadStoreReg, 0, MO0.getSubReg());

LLVM_DEBUG(dbgs() << "Replaced with " << *MIB << "\n");
// Erase the instructions that got replaced.
MII = MBB->erase(MI);
--MII;
NextMI->getParent()->erase(NextMI);
}
}

return true;
}

bool HexagonGenMemAbsolute::isValidIndexedLoad(int &Opc, int &NewOpc) {

bool Result = true;
switch (Opc) {
case Hexagon::L2_loadrb_io:
NewOpc = Hexagon::L4_loadrb_ap;
break;
case Hexagon::L2_loadrh_io:
NewOpc = Hexagon::L4_loadrh_ap;
break;
case Hexagon::L2_loadri_io:
NewOpc = Hexagon::L4_loadri_ap;
break;
case Hexagon::L2_loadrd_io:
NewOpc = Hexagon::L4_loadrd_ap;
break;
case Hexagon::L2_loadruh_io:
NewOpc = Hexagon::L4_loadruh_ap;
break;
case Hexagon::L2_loadrub_io:
NewOpc = Hexagon::L4_loadrub_ap;
break;
default:
Result = false;
}

return Result;
}

bool HexagonGenMemAbsolute::isValidIndexedStore(int &Opc, int &NewOpc) {

bool Result = true;
switch (Opc) {
case Hexagon::S2_storerd_io:
NewOpc = Hexagon::S4_storerd_ap;
break;
case Hexagon::S2_storeri_io:
NewOpc = Hexagon::S4_storeri_ap;
break;
case Hexagon::S2_storerh_io:
NewOpc = Hexagon::S4_storerh_ap;
break;
case Hexagon::S2_storerb_io:
NewOpc = Hexagon::S4_storerb_ap;
break;
default:
Result = false;
}

return Result;
}

//===----------------------------------------------------------------------===//
// Public Constructor Functions
//===----------------------------------------------------------------------===//

FunctionPass *llvm::createHexagonGenMemAbsolute() {
return new HexagonGenMemAbsolute();
}
9 changes: 9 additions & 0 deletions llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,10 @@ static cl::opt<bool>
static cl::opt<bool> DisableHSDR("disable-hsdr", cl::init(false), cl::Hidden,
cl::desc("Disable splitting double registers"));

static cl::opt<bool>
EnableGenMemAbs("hexagon-mem-abs", cl::init(true), cl::Hidden,
cl::desc("Generate absolute set instructions"));

static cl::opt<bool> EnableBitSimplify("hexagon-bit", cl::init(true),
cl::Hidden, cl::desc("Bit simplification"));

Expand Down Expand Up @@ -151,6 +155,7 @@ namespace llvm {
void initializeHexagonCopyToCombinePass(PassRegistry&);
void initializeHexagonEarlyIfConversionPass(PassRegistry&);
void initializeHexagonExpandCondsetsPass(PassRegistry&);
void initializeHexagonGenMemAbsolutePass(PassRegistry &);
void initializeHexagonGenMuxPass(PassRegistry&);
void initializeHexagonHardwareLoopsPass(PassRegistry&);
void initializeHexagonLoopIdiomRecognizeLegacyPassPass(PassRegistry &);
Expand All @@ -177,6 +182,7 @@ namespace llvm {
FunctionPass *createHexagonFixupHwLoops();
FunctionPass *createHexagonGenExtract();
FunctionPass *createHexagonGenInsert();
FunctionPass *createHexagonGenMemAbsolute();
FunctionPass *createHexagonGenMux();
FunctionPass *createHexagonGenPredicate();
FunctionPass *createHexagonHardwareLoops();
Expand Down Expand Up @@ -211,6 +217,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeHexagonTarget() {
initializeHexagonConstPropagationPass(PR);
initializeHexagonCopyToCombinePass(PR);
initializeHexagonEarlyIfConversionPass(PR);
initializeHexagonGenMemAbsolutePass(PR);
initializeHexagonGenMuxPass(PR);
initializeHexagonHardwareLoopsPass(PR);
initializeHexagonLoopIdiomRecognizeLegacyPassPass(PR);
Expand Down Expand Up @@ -413,6 +420,8 @@ void HexagonPassConfig::addPreRegAlloc() {
insertPass(&RegisterCoalescerID, &HexagonExpandCondsetsID);
if (!DisableStoreWidening)
addPass(createHexagonStoreWidening());
if (EnableGenMemAbs)
addPass(createHexagonGenMemAbsolute());
if (!DisableHardwareLoops)
addPass(createHexagonHardwareLoops());
}
Expand Down
68 changes: 68 additions & 0 deletions llvm/test/CodeGen/Hexagon/load-const-extend-opt.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
; RUN: llc -march=hexagon -O3 -hexagon-small-data-threshold=0 < %s | FileCheck %s
; This test checks the case if there are more than 2 uses of a constan address, move the
; value in to a register and replace all instances of constant with the register.
; The GenMemAbsolute pass generates a absolute-set instruction if there are more
; than 2 uses of this register.

; CHECK: loadi32_3
; CHECK-NOT: r{{[0-9]+}} = memw(##441652)
; CHECK-NOT: r{{[0-9]+}} = memw(r{{[0-9]+}}+#0)
; CHECK:r{{[0-9]+}} = memw(r[[REG:[0-9]+]]=##441652)
; CHECK-NOT: r{{[0-9]+}} = {emw(##441652)
; CHECK:r{{[0-9]+}} = memw(r[[REG]]+#0)
; CHECK-NOT: r{{[0-9]+}} = memw(##441652)
; CHECK:r{{[0-9]+}} = memw(r[[REG]]+#0)
; CHECK-NOT: r{{[0-9]+}} = memw(##441652)

define void @loadi32_3() #0 {
entry:
%0 = load volatile i32, ptr inttoptr (i32 441652 to ptr), align 4
%1 = load volatile i32, ptr inttoptr (i32 441652 to ptr), align 4
%2 = load volatile i32, ptr inttoptr (i32 441652 to ptr), align 4
ret void
}

; CHECK: loadi32_2
; CHECK-NOT: r{{[0-9]+}} = ##441652
; CHECK: r{{[0-9]+}} = memw(##441652)
; CHECK: r{{[0-9]+}} = memw(##441652)

define void @loadi32_2() #0 {
entry:
%0 = load volatile i32, ptr inttoptr (i32 441652 to ptr), align 4
%1 = load volatile i32, ptr inttoptr (i32 441652 to ptr), align 4
ret void
}

; CHECK: loadi32_abs_global_3
; CHECK-NOT: r{{[0-9]+}} = memw(##globalInt)
; CHECK-NOT: r{{[0-9]+}} = memw(r{{[0-9]+}}+#0)
; CHECK:r{{[0-9]+}} = memw(r[[REG:[0-9]+]]=##globalInt)
; CHECK-NOT: r{{[0-9]+}} = memw(##globalInt)
; CHECK:r{{[0-9]+}} = memw(r[[REG]]+#0)
; CHECK-NOT: r{{[0-9]+}} = memw(##globalInt)
; CHECK:r{{[0-9]+}} = memw(r[[REG]]+#0)
; CHECK-NOT: r{{[0-9]+}} = memw(##globalInt)

@globalInt = external global i32, align 8
define void @loadi32_abs_global_3() #0 {
entry:
%0 = load volatile i32, ptr @globalInt, align 4
%1 = load volatile i32, ptr @globalInt, align 4
%2 = load volatile i32, ptr @globalInt, align 4
ret void
}

; CHECK: loadi32_abs_global_2
; CHECK-NOT:r[[REG:[0-9]+]] = ##globalInt
; CHECK:r{{[0-9]+}} = memw(##globalInt)
; CHECK:r{{[0-9]+}} = memw(##globalInt)

define void @loadi32_abs_global_2() #0 {
entry:
%0 = load volatile i32, ptr @globalInt, align 4
%1 = load volatile i32, ptr @globalInt, align 4
ret void
}

attributes #0 = { nounwind }
Loading

0 comments on commit 4c0fdcd

Please sign in to comment.