Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

moved upfront the skip branch insertion code for correctness. #21

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 1 addition & 32 deletions llvm/lib/Target/AMDGPU/SIInsertSkips.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -92,19 +92,6 @@ INITIALIZE_PASS(SIInsertSkips, DEBUG_TYPE,

char &llvm::SIInsertSkipsPassID = SIInsertSkips::ID;

static bool opcodeEmitsNoInsts(const MachineInstr &MI) {
if (MI.isMetaInstruction())
return true;

// Handle target specific opcodes.
switch (MI.getOpcode()) {
case AMDGPU::SI_MASK_BRANCH:
return true;
default:
return false;
}
}

bool SIInsertSkips::shouldSkip(const MachineBasicBlock &From,
const MachineBasicBlock &To) const {
unsigned NumInstr = 0;
Expand All @@ -116,27 +103,9 @@ bool SIInsertSkips::shouldSkip(const MachineBasicBlock &From,

for (MachineBasicBlock::const_iterator I = MBB.begin(), E = MBB.end();
NumInstr < SkipThreshold && I != E; ++I) {
if (opcodeEmitsNoInsts(*I))
if (TII->opcodeEmitsNoInsts(*I))
continue;

// FIXME: Since this is required for correctness, this should be inserted
// during SILowerControlFlow.

// When a uniform loop is inside non-uniform control flow, the branch
// leaving the loop might be an S_CBRANCH_VCCNZ, which is never taken
// when EXEC = 0. We should skip the loop lest it becomes infinite.
if (I->getOpcode() == AMDGPU::S_CBRANCH_VCCNZ ||
I->getOpcode() == AMDGPU::S_CBRANCH_VCCZ)
return true;

if (TII->hasUnwantedEffectsWhenEXECEmpty(*I))
return true;

// These instructions are potentially expensive even if EXEC = 0.
if (TII->isSMRD(*I) || TII->isVMEM(*I) || TII->isFLAT(*I) ||
I->getOpcode() == AMDGPU::S_WAITCNT)
return true;

++NumInstr;
if (NumInstr >= SkipThreshold)
return true;
Expand Down
13 changes: 13 additions & 0 deletions llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2677,6 +2677,19 @@ bool SIInstrInfo::isAlwaysGDS(uint16_t Opcode) const {
Opcode == AMDGPU::DS_GWS_BARRIER;
}

bool SIInstrInfo::opcodeEmitsNoInsts(const MachineInstr &MI) const {
if (MI.isMetaInstruction())
return true;

// Handle target specific opcodes.
switch (MI.getOpcode()) {
case AMDGPU::SI_MASK_BRANCH:
return true;
default:
return false;
}
}

bool SIInstrInfo::hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const {
unsigned Opcode = MI.getOpcode();

Expand Down
3 changes: 3 additions & 0 deletions llvm/lib/Target/AMDGPU/SIInstrInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -668,6 +668,9 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
return MO.isReg() && RI.isVGPR(MRI, MO.getReg());});
}

/// returns true if \p MI won't emit an instruction in the end.
bool opcodeEmitsNoInsts(const MachineInstr &MI) const;

/// Whether we must prevent this instruction from executing with EXEC = 0.
bool hasUnwantedEffectsWhenEXECEmpty(const MachineInstr &MI) const;

Expand Down
65 changes: 60 additions & 5 deletions llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,10 @@ class SILowerControlFlow : public MachineFunctionPass {
SmallVectorImpl<MachineOperand> &Src) const;

void combineMasks(MachineInstr &MI);
MachineInstr *skipMaskBranch(MachineInstr &MI, MachineBasicBlock &SrcMBB);

bool shouldSkip(const MachineBasicBlock &From,
const MachineBasicBlock &To) const;

public:
static char ID;
Expand Down Expand Up @@ -230,8 +234,10 @@ void SILowerControlFlow::emitIf(MachineInstr &MI) {

// Insert a pseudo terminator to help keep the verifier happy. This will also
// be used later when inserting skips.
MachineInstr *NewBr = BuildMI(MBB, I, DL, TII->get(AMDGPU::SI_MASK_BRANCH))
.add(MI.getOperand(2));
MachineInstr *NewBr = skipMaskBranch(MI, *MI.getParent());
if (!NewBr)
NewBr = BuildMI(MBB, I, DL, TII->get(AMDGPU::SI_MASK_BRANCH))
.add(MI.getOperand(2));

if (!LIS) {
MI.eraseFromParent();
Expand Down Expand Up @@ -307,9 +313,11 @@ void SILowerControlFlow::emitElse(MachineInstr &MI) {
.addReg(Exec)
.addReg(DstReg);

MachineInstr *Branch =
BuildMI(MBB, ElsePt, DL, TII->get(AMDGPU::SI_MASK_BRANCH))
.addMBB(DestBB);
// Insert the skip branch if required.
MachineInstr *Branch = skipMaskBranch(MI, *MI.getParent());
if (!Branch)
Branch = BuildMI(MBB, ElsePt, DL, TII->get(AMDGPU::SI_MASK_BRANCH))
.addMBB(DestBB);

if (!LIS) {
MI.eraseFromParent();
Expand Down Expand Up @@ -473,6 +481,53 @@ void SILowerControlFlow::combineMasks(MachineInstr &MI) {
MRI->getUniqueVRegDef(Reg)->eraseFromParent();
}

bool SILowerControlFlow::shouldSkip(const MachineBasicBlock &From,
const MachineBasicBlock &To) const {
const MachineFunction *MF = From.getParent();

for (MachineFunction::const_iterator MBBI(&From), ToI(&To), End = MF->end();
MBBI != End && MBBI != ToI; ++MBBI) {
const MachineBasicBlock &MBB = *MBBI;

for (MachineBasicBlock::const_iterator I = MBB.begin(), E = MBB.end();
I != E; ++I) {
if (TII->opcodeEmitsNoInsts(*I))
continue;

// When a uniform loop is inside non-uniform control flow, the branch
// leaving the loop might be an S_CBRANCH_VCCNZ, which is never taken
// when EXEC = 0. We should skip the loop lest it becomes infinite.
if (I->getOpcode() == AMDGPU::S_CBRANCH_VCCNZ ||
I->getOpcode() == AMDGPU::S_CBRANCH_VCCZ)
return true;

if (TII->hasUnwantedEffectsWhenEXECEmpty(*I))
return true;

// These instructions are potentially expensive even if EXEC = 0.
if (TII->isSMRD(*I) || TII->isVMEM(*I) || TII->isFLAT(*I) ||
I->getOpcode() == AMDGPU::S_WAITCNT)
return true;
}
}
return false;
}

// Returns true if a branch over the block was inserted.
MachineInstr *SILowerControlFlow::skipMaskBranch(MachineInstr &MI,
MachineBasicBlock &SrcMBB) {
MachineBasicBlock *DestBB = MI.getOperand(2).getMBB();

if (!shouldSkip(**SrcMBB.succ_begin(), *DestBB))
return nullptr;

const DebugLoc &DL = MI.getDebugLoc();
MachineBasicBlock::iterator InsPt = std::next(MI.getIterator());

return BuildMI(SrcMBB, InsPt, DL, TII->get(AMDGPU::S_CBRANCH_EXECZ))
.addMBB(DestBB);
}

bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) {
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
TII = ST.getInstrInfo();
Expand Down