-
Notifications
You must be signed in to change notification settings - Fork 12
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Interleave loop pre-header with SWP prologue #236
base: aie-public
Are you sure you want to change the base?
Changes from all commits
ed1b571
9dcbff6
028d839
400acec
38e5be2
dfc8735
baafd49
171faec
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -275,27 +275,14 @@ class RegionEndEdges : public ScheduleDAGMutation { | |
assert(EdgeLatency < DelaySlots); | ||
EdgeLatency = DelaySlots + 1; | ||
} | ||
|
||
// Between writing Registers (lc, le, ls) and the end of the loop, | ||
// there must be a distance of 112 bytes in terms of PM addresses. | ||
// 112 bytes correspond to 7 fully-expanded 128-bit instructions and | ||
// hence adding a latency of 8 from LoopStart to the ExitSU. | ||
// We can subtract the number of bundles that interblock pushed into | ||
// BottomInsert | ||
// FIXME: this holds as long as we insert them unconditionally. If we | ||
// integrate them with the bottom region, we just need to keep 8 away | ||
// from ExitSU | ||
if (TII->isZeroOverheadLoopSetupInstr(MI)) { | ||
unsigned PatchCycles = 8; | ||
if (DAG->getBB()) { | ||
auto *Scheduler = | ||
static_cast<AIEScheduleDAGMI *>(DAG)->getSchedImpl(); | ||
auto &InterBlock = Scheduler->getInterBlock(); | ||
unsigned InsertedCycles = | ||
InterBlock.getBlockState(DAG->getBB()).BottomInsert.size(); | ||
PatchCycles = | ||
PatchCycles >= InsertedCycles ? PatchCycles - InsertedCycles : 0; | ||
} | ||
EdgeLatency = std::max(EdgeLatency, PatchCycles); | ||
const unsigned ZOLDistance = 8; | ||
EdgeLatency = std::max(EdgeLatency, ZOLDistance); | ||
} | ||
|
||
ExitDep.setLatency(EdgeLatency); | ||
|
@@ -318,6 +305,56 @@ class RegionEndEdges : public ScheduleDAGMutation { | |
}; | ||
}; | ||
|
||
/// This Mutator is responsible for emitting "fixed" SUnits at the top or bottom | ||
/// of the region. These special SUnits require a specific cycle and cannot be | ||
/// placed freely by the scheduler. | ||
/// | ||
/// Here, these special SUnits get created from Region::top_fixed_instrs() or | ||
/// Region::bot_fixed_instrs(), and dependencies are created between "free" and | ||
/// "fixed" SUnits. | ||
class EmitFixedSUnits : public ScheduleDAGMutation { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I could probably add a top-level comment for that mutator There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: for the future, we could have a high-level description of the mutator. |
||
public: | ||
void apply(ScheduleDAGInstrs *DAG) override { | ||
AIEPostRASchedStrategy *Scheduler = | ||
static_cast<AIEScheduleDAGMI *>(DAG)->getSchedImpl(); | ||
auto *TII = static_cast<const AIEBaseInstrInfo *>(DAG->TII); | ||
auto *ItinData = DAG->MF.getSubtarget().getInstrItineraryData(); | ||
const BlockState &BS = | ||
Scheduler->getInterBlock().getBlockState(DAG->getBB()); | ||
const Region &CurRegion = BS.getCurrentRegion(); | ||
|
||
// First, create SUnits for all "fixed" instructions | ||
unsigned DistToExitSU = 0; | ||
for (MachineInstr &MI : reverse(CurRegion.bot_fixed_instrs())) { | ||
Scheduler->addFixedSUnit(MI, /*IsTop=*/false, DistToExitSU); | ||
++DistToExitSU; | ||
} | ||
DAG->makeMaps(); | ||
|
||
// Then, create dependencies between "free" and "fixed" instructions | ||
auto IsFreeSU = [Scheduler](const SUnit &SU) { | ||
return Scheduler->isFreeSU(SU); | ||
}; | ||
ArrayRef<AIE::MachineBundle> BotFixedBundles = | ||
CurRegion.getBotFixedBundles(); | ||
for (SUnit &FreeSU : make_filter_range(DAG->SUnits, IsFreeSU)) { | ||
const MachineInstr &MI = *FreeSU.getInstr(); | ||
MachineInstr *FixedDepMI = | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. super nit: To help readability, we could extend with overloaded cast operators:
Then we can just use:
And also:
|
||
AIE::findEarliestRef(MI, BotFixedBundles, BotFixedBundles.size()).MI; | ||
if (!FixedDepMI) | ||
continue; | ||
|
||
SUnit *FixedDepSU = | ||
DAG->getSUnit(&*getBundleStart(FixedDepMI->getIterator())); | ||
assert(FixedDepSU && "Fixed Bundle has no corresponding SU."); | ||
SDep Dep(&FreeSU, SDep::Artificial); | ||
Dep.setLatency( | ||
AIE::maxLatency(&MI, *TII, *ItinData, /*IncludeStages=*/true)); | ||
FixedDepSU->addPred(Dep, /*Required=*/true); | ||
} | ||
} | ||
}; | ||
|
||
/// Collect all "weak" edges in a separate vector. This allows modifying | ||
/// \p SU.Preds without invalidating iterators. | ||
SmallVector<SDep, 4> getWeakPreds(SUnit &SU) { | ||
|
@@ -543,11 +580,10 @@ class WAWEdges : public ScheduleDAGMutation { | |
LiveRegs.init(*TRI); | ||
bool AddReservedRegs = true; | ||
if (Scheduler) { | ||
assert(!Scheduler->doMBBSchedRegionsTopDown()); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. CHECK: this assert is specific for the prologue handling, and the code needs to be extended for epilogue handling. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. No, WAW handling could run on any region. Here I'm making sure that |
||
MachineBasicBlock *MBB = DAG->getBB(); | ||
const BlockState &BS = Scheduler->getInterBlock().getBlockState(MBB); | ||
auto Region = BS.getCurrentRegion(); | ||
auto BottomRegion = BS.getBottom(); | ||
if (*Region.begin() == *BottomRegion.begin()) { | ||
if (&BS.getCurrentRegion() == &BS.getBottom()) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. CHECK: this a relic of the pre-GatheringRegions era There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not really, the DAGMutator can run for all types of regions. For non-loop blocks, we do not pre-gather regions. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hmm. now you have me worried about reallocation of the region array. pre-gather would imply pre-allocation, making the test 'obviously correct' |
||
// If the region is bottom region, liveouts of region are same as | ||
// liveouts of the MBB | ||
for (const MCPhysReg Reg : BS.LiveOuts) { | ||
|
@@ -665,6 +701,7 @@ AIEBaseSubtarget::getPostRAMutationsImpl(const Triple &TT) { | |
Mutations.emplace_back(std::make_unique<MemoryEdges>()); | ||
Mutations.emplace_back(std::make_unique<MachineSchedWAWEdges>()); | ||
Mutations.emplace_back(std::make_unique<BiasDepth>()); | ||
Mutations.emplace_back(std::make_unique<EmitFixedSUnits>()); | ||
gbossu marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} | ||
return Mutations; | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -27,8 +27,6 @@ | |
|
||
#include "AIEBaseSubtarget.h" | ||
#include "MCTargetDesc/AIEMCFormats.h" | ||
#include "llvm-c/DebugInfo.h" | ||
#include "llvm/ADT/SmallVector.h" | ||
#include "llvm/CodeGen/TargetOpcodes.h" | ||
|
||
#include <unordered_map> | ||
|
@@ -78,6 +76,10 @@ template <class I> class Bundle { | |
return true; | ||
} | ||
|
||
if (InstOpCode == TargetOpcode::BUNDLE) { | ||
return !BundleRoot; | ||
} | ||
|
||
// if we have a standalone bundle, we can't add anything. | ||
if (isStandalone()) | ||
return false; | ||
|
@@ -103,6 +105,16 @@ template <class I> class Bundle { | |
MetaInstrs.push_back(Instr); | ||
return; | ||
} | ||
|
||
// Keep track of BUNDLE instructions. They need to be cleaned up when | ||
// de-bundling before re-bundling. See applyBundles() | ||
if (Instr->getOpcode() == TargetOpcode::BUNDLE) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. super nit: |
||
assert(!BundleRoot && | ||
"AIE::Bundle already has a root BUNDLE instruction"); | ||
BundleRoot = Instr; | ||
return; | ||
} | ||
|
||
// Check if the pre-condition is ensured | ||
assert((!ComputeSlots || !isStandalone()) && | ||
"Tried to add an instruction in a standalone Bundle"); | ||
|
@@ -142,6 +154,7 @@ template <class I> class Bundle { | |
Instrs.clear(); | ||
MetaInstrs.clear(); | ||
SlotMap.clear(); | ||
BundleRoot = nullptr; | ||
} | ||
|
||
/// Check if empty | ||
|
@@ -192,6 +205,15 @@ template <class I> class Bundle { | |
return false; | ||
} | ||
|
||
/// Erase the BUNDLE root instruction from its parent MBB. | ||
/// This does not remove the instructions within the BUNDLE, only the root. | ||
void eraseRootFromBlock() { | ||
if (BundleRoot) { | ||
BundleRoot->eraseFromBundle(); | ||
BundleRoot = nullptr; | ||
} | ||
} | ||
|
||
bool isNOPBundle() const { | ||
const VLIWFormat *Format = getFormatOrNull(); | ||
assert(Format); | ||
|
@@ -228,6 +250,10 @@ template <class I> class Bundle { | |
|
||
// Contained meta instructions (These will end up after the bundle) | ||
std::vector<I *> MetaInstrs; | ||
|
||
private: | ||
/// A root BUNDLE instruction if it exists. | ||
I *BundleRoot = nullptr; | ||
}; | ||
|
||
template <class I> bool operator==(const Bundle<I> &B1, const Bundle<I> &B2) { | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
For the future: this magic 8 could be retrieved from TII as well.