-
Notifications
You must be signed in to change notification settings - Fork 12.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[VPlan] Hook IR blocks into VPlan during skeleton creation (NFC) #114292
Changes from all commits
1b89761
4e5c743
b87cf14
4ddc87a
599e690
1a77b55
a2137b4
b4d0eac
2b4c71f
be2c3a6
466b393
10a675e
7996451
5ea6f7b
64909aa
382380a
333536a
e5b8af3
df6894a
927a66d
5468f61
3eef601
1d6db4a
22eeebe
886bcc2
65ac2d7
2f7d530
ba08c2e
4c76bec
43c9186
7f08758
3709f17
a72df24
16a6246
87f2815
b7b43a8
906603f
1e7cac7
4a51d29
f53cf1b
a0af583
968598b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2466,6 +2466,25 @@ InnerLoopVectorizer::getOrCreateVectorTripCount(BasicBlock *InsertBlock) { | |
return VectorTripCount; | ||
} | ||
|
||
/// Introduces a new VPIRBasicBlock for \p CheckIRBB to \p Plan between the | ||
/// vector preheader and its predecessor, also connecting the new block to the | ||
/// scalar preheader. | ||
static void introduceCheckBlockInVPlan(VPlan &Plan, BasicBlock *CheckIRBB) { | ||
VPBlockBase *ScalarPH = Plan.getScalarPreheader(); | ||
VPBlockBase *VectorPH = Plan.getVectorPreheader(); | ||
VPBlockBase *PreVectorPH = VectorPH->getSinglePredecessor(); | ||
if (PreVectorPH->getNumSuccessors() != 1) { | ||
assert(PreVectorPH->getNumSuccessors() == 2 && "Expected 2 successors"); | ||
assert(PreVectorPH->getSuccessors()[0] == ScalarPH && | ||
"Unexpected successor"); | ||
VPIRBasicBlock *CheckVPIRBB = VPIRBasicBlock::fromBasicBlock(CheckIRBB); | ||
VPBlockUtils::insertOnEdge(PreVectorPH, VectorPH, CheckVPIRBB); | ||
PreVectorPH = CheckVPIRBB; | ||
} | ||
VPBlockUtils::connectBlocks(PreVectorPH, ScalarPH); | ||
PreVectorPH->swapSuccessors(); | ||
} | ||
|
||
void InnerLoopVectorizer::emitIterationCountCheck(BasicBlock *Bypass) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. (Independent): LoopScalarPreHeader is passed as parameter |
||
Value *Count = getTripCount(); | ||
// Reuse existing vector loop preheader for TC checks. | ||
|
@@ -2540,14 +2559,15 @@ void InnerLoopVectorizer::emitIterationCountCheck(BasicBlock *Bypass) { | |
DT->getNode(Bypass)->getIDom()) && | ||
"TC check is expected to dominate Bypass"); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This assert was probably added along with the lines below, but should remain, right? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, this is to check the DT updates from SplitBlock I think |
||
|
||
// Update dominator for Bypass & LoopExit (if needed). | ||
DT->changeImmediateDominator(Bypass, TCCheckBlock); | ||
BranchInst &BI = | ||
*BranchInst::Create(Bypass, LoopVectorPreHeader, CheckMinIters); | ||
if (hasBranchWeightMD(*OrigLoop->getLoopLatch()->getTerminator())) | ||
setBranchWeights(BI, MinItersBypassWeights, /*IsExpected=*/false); | ||
ReplaceInstWithInst(TCCheckBlock->getTerminator(), &BI); | ||
LoopBypassBlocks.push_back(TCCheckBlock); | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Scalar preheader can be connected in VPlan from the outset, rather than here? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There are a few places that assume the scalar PH has a single predecessor, which would need to be updated. Could pull this in here or adjust as follow-up? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The single predecessor of Scalar PH in VPlan being middle_block, right? Applying this additional connection earlier, is fine as a later follow-up, perhaps w/ a TODO. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Thanks, renamed and added TODO to connectScalarAsBypassLoopInVPlan |
||
// TODO: Wrap LoopVectorPreHeader in VPIRBasicBlock here. | ||
introduceCheckBlockInVPlan(Plan, TCCheckBlock); | ||
} | ||
|
||
BasicBlock *InnerLoopVectorizer::emitSCEVChecks(BasicBlock *Bypass) { | ||
|
@@ -2564,6 +2584,8 @@ BasicBlock *InnerLoopVectorizer::emitSCEVChecks(BasicBlock *Bypass) { | |
"Should already be a bypass block due to iteration count check"); | ||
LoopBypassBlocks.push_back(SCEVCheckBlock); | ||
AddedSafetyChecks = true; | ||
|
||
introduceCheckBlockInVPlan(Plan, SCEVCheckBlock); | ||
return SCEVCheckBlock; | ||
} | ||
|
||
|
@@ -2600,6 +2622,7 @@ BasicBlock *InnerLoopVectorizer::emitMemRuntimeChecks(BasicBlock *Bypass) { | |
|
||
AddedSafetyChecks = true; | ||
|
||
introduceCheckBlockInVPlan(Plan, MemCheckBlock); | ||
return MemCheckBlock; | ||
} | ||
|
||
|
@@ -7980,8 +8003,6 @@ EpilogueVectorizerMainLoop::emitIterationCountCheck(BasicBlock *Bypass, | |
DT->getNode(Bypass)->getIDom()) && | ||
"TC check is expected to dominate Bypass"); | ||
|
||
// Update dominator for Bypass. | ||
DT->changeImmediateDominator(Bypass, TCCheckBlock); | ||
LoopBypassBlocks.push_back(TCCheckBlock); | ||
|
||
// Save the trip count so we don't have to regenerate it in the | ||
|
@@ -7996,6 +8017,7 @@ EpilogueVectorizerMainLoop::emitIterationCountCheck(BasicBlock *Bypass, | |
setBranchWeights(BI, MinItersBypassWeights, /*IsExpected=*/false); | ||
ReplaceInstWithInst(TCCheckBlock->getTerminator(), &BI); | ||
|
||
introduceCheckBlockInVPlan(Plan, TCCheckBlock); | ||
return TCCheckBlock; | ||
} | ||
|
||
|
@@ -8027,9 +8049,6 @@ EpilogueVectorizerEpilogueLoop::createEpilogueVectorizedLoopSkeleton( | |
EPI.MainLoopIterationCountCheck->getTerminator()->replaceUsesOfWith( | ||
VecEpilogueIterationCountCheck, LoopVectorPreHeader); | ||
|
||
DT->changeImmediateDominator(LoopVectorPreHeader, | ||
EPI.MainLoopIterationCountCheck); | ||
|
||
EPI.EpilogueIterationCountCheck->getTerminator()->replaceUsesOfWith( | ||
VecEpilogueIterationCountCheck, LoopScalarPreHeader); | ||
|
||
|
@@ -8040,19 +8059,8 @@ EpilogueVectorizerEpilogueLoop::createEpilogueVectorizedLoopSkeleton( | |
EPI.MemSafetyCheck->getTerminator()->replaceUsesOfWith( | ||
VecEpilogueIterationCountCheck, LoopScalarPreHeader); | ||
|
||
DT->changeImmediateDominator( | ||
VecEpilogueIterationCountCheck, | ||
VecEpilogueIterationCountCheck->getSinglePredecessor()); | ||
|
||
DT->changeImmediateDominator(LoopScalarPreHeader, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This one still needed? (Review to be continued from here) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yep this one is still needed for now. |
||
EPI.EpilogueIterationCountCheck); | ||
if (!Cost->requiresScalarEpilogue(EPI.EpilogueVF.isVector())) | ||
// If there is an epilogue which must run, there's no edge from the | ||
// middle block to exit blocks and thus no need to update the immediate | ||
// dominator of the exit blocks. | ||
DT->changeImmediateDominator(OrigLoop->getUniqueLatchExitBlock(), | ||
EPI.EpilogueIterationCountCheck); | ||
|
||
// Keep track of bypass blocks, as they feed start values to the induction and | ||
// reduction phis in the scalar loop preheader. | ||
if (EPI.SCEVSafetyCheck) | ||
|
@@ -8143,6 +8151,16 @@ EpilogueVectorizerEpilogueLoop::emitMinimumVectorEpilogueIterCountCheck( | |
} | ||
ReplaceInstWithInst(Insert->getTerminator(), &BI); | ||
LoopBypassBlocks.push_back(Insert); | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Entry is conceptually an immutable VPIRBB that wraps the original scalar preheader, where SCEVs can be safely expanded, and all runtime checks are added as successors starting with minimal trip count check that is added as its terminal. Does this change when executing the VPlan of an epilog loop, whose Entry is replicated and transformed from old to new? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I don't think it changes conceptually, but here we need to wrap the new entry to be the new node created here, after executing the plan. (There is existing logic to re-use expanded SCEVs from the first execution of the main plan, to avoid duplicated expansions) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. All VPlans are initialized with the original scalar preheader as their Entry, but this works only when vectorizing the main loop (w/ or w/o vectorizing the epilog). When vectorizing the epilog loop, Entry no longer serves to host SCEV-expand recipes but should instead refer to the block between Middle and "Vector Epilog PreHeader" (called "Vector Epilogue Trip Count Check", but that's also how the first block is named), as depicted in https://llvm.org/docs/Vectorizers.html#epilogue-vectorization. The documentation of Entry in createInitialVPlan() deserves update? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added a comment, thanks! |
||
// A new entry block has been created for the epilogue VPlan. Hook it in, as | ||
// otherwise we would try to modify the entry to the main vector loop. | ||
VPIRBasicBlock *NewEntry = VPIRBasicBlock::fromBasicBlock(Insert); | ||
VPBasicBlock *OldEntry = Plan.getEntry(); | ||
VPBlockUtils::reassociateBlocks(OldEntry, NewEntry); | ||
Plan.setEntry(NewEntry); | ||
delete OldEntry; | ||
|
||
introduceCheckBlockInVPlan(Plan, Insert); | ||
return Insert; | ||
} | ||
|
||
|
@@ -10495,8 +10513,6 @@ bool LoopVectorizePass::processLoop(Loop *L) { | |
EpilogILV.setTripCount(MainILV.getTripCount()); | ||
preparePlanForEpilogueVectorLoop(BestEpiPlan, L, ExpandedSCEVs, EPI); | ||
|
||
assert(DT->verify(DominatorTree::VerificationLevel::Fast) && | ||
"DT not preserved correctly"); | ||
LVP.executePlan(EPI.EpilogueVF, EPI.EpilogueUF, BestEpiPlan, EpilogILV, | ||
DT, true, &ExpandedSCEVs); | ||
++LoopsEpilogueVectorized; | ||
|
@@ -10524,6 +10540,9 @@ bool LoopVectorizePass::processLoop(Loop *L) { | |
checkMixedPrecision(L, ORE); | ||
} | ||
|
||
assert(DT->verify(DominatorTree::VerificationLevel::Fast) && | ||
"DT not preserved correctly"); | ||
|
||
std::optional<MDNode *> RemainderLoopID = | ||
makeFollowupLoopID(OrigLoopID, {LLVMLoopVectorizeFollowupAll, | ||
LLVMLoopVectorizeFollowupEpilogue}); | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -170,9 +170,7 @@ VPBasicBlock *VPBlockBase::getEntryBasicBlock() { | |
} | ||
|
||
void VPBlockBase::setPlan(VPlan *ParentPlan) { | ||
assert( | ||
(ParentPlan->getEntry() == this || ParentPlan->getPreheader() == this) && | ||
"Can only set plan on its entry or preheader block."); | ||
assert(ParentPlan->getEntry() == this && "Can only set plan on its entry."); | ||
Plan = ParentPlan; | ||
} | ||
|
||
|
@@ -823,16 +821,25 @@ void VPRegionBlock::print(raw_ostream &O, const Twine &Indent, | |
} | ||
#endif | ||
|
||
VPlan::VPlan(VPBasicBlock *OriginalPreheader, VPValue *TC, | ||
VPBasicBlock *EntryVectorPreHeader, VPIRBasicBlock *ScalarHeader) | ||
: VPlan(OriginalPreheader, TC, ScalarHeader) { | ||
VPBlockUtils::connectBlocks(OriginalPreheader, EntryVectorPreHeader); | ||
} | ||
|
||
VPlan::VPlan(VPBasicBlock *OriginalPreheader, | ||
VPBasicBlock *EntryVectorPreHeader, VPIRBasicBlock *ScalarHeader) | ||
: VPlan(OriginalPreheader, ScalarHeader) { | ||
VPBlockUtils::connectBlocks(OriginalPreheader, EntryVectorPreHeader); | ||
} | ||
|
||
VPlan::~VPlan() { | ||
if (Entry) { | ||
VPValue DummyValue; | ||
for (VPBlockBase *Block : vp_depth_first_shallow(Entry)) | ||
Block->dropAllReferences(&DummyValue); | ||
|
||
VPBlockBase::deleteCFG(Entry); | ||
|
||
Preheader->dropAllReferences(&DummyValue); | ||
delete Preheader; | ||
} | ||
for (VPValue *VPV : VPLiveInsToFree) | ||
delete VPV; | ||
|
@@ -855,9 +862,16 @@ VPlanPtr VPlan::createInitialVPlan(Type *InductionTy, | |
VPIRBasicBlock *Entry = | ||
VPIRBasicBlock::fromBasicBlock(TheLoop->getLoopPreheader()); | ||
VPBasicBlock *VecPreheader = new VPBasicBlock("vector.ph"); | ||
// Connect entry only to vector preheader initially. Entry will also be | ||
// connected to the scalar preheader later, during skeleton creation when | ||
// runtime guards are added as needed. Note that when executing the VPlan for | ||
// an epilogue vector loop, the original entry block here will be replaced by | ||
// a new VPIRBasicBlock wrapping the entry to the epilogue vector loop after | ||
// generating code for the main vector loop. | ||
VPBlockUtils::connectBlocks(Entry, VecPreheader); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Worth noting that this connection is subject to insertion of runtime checks. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added a comment, thanks! |
||
VPIRBasicBlock *ScalarHeader = | ||
VPIRBasicBlock::fromBasicBlock(TheLoop->getHeader()); | ||
auto Plan = std::make_unique<VPlan>(Entry, VecPreheader, ScalarHeader); | ||
auto Plan = std::make_unique<VPlan>(Entry, ScalarHeader); | ||
|
||
// Create SCEV and VPValue for the trip count. | ||
// We use the symbolic max backedge-taken-count, which works also when | ||
|
@@ -981,15 +995,21 @@ void VPlan::execute(VPTransformState *State) { | |
State->CFG.DTU.applyUpdates( | ||
{{DominatorTree::Delete, VectorPreHeader, State->CFG.ExitBB}}); | ||
|
||
// Replace regular VPBB's for the middle and scalar preheader blocks with | ||
// VPIRBasicBlocks wrapping their IR blocks. The IR blocks are created during | ||
// skeleton creation, so we can only create the VPIRBasicBlocks now during | ||
// VPlan execution rather than earlier during VPlan construction. | ||
// Replace regular VPBB's for the vector preheader, middle and scalar | ||
// preheader blocks with VPIRBasicBlocks wrapping their IR blocks. The IR | ||
// blocks are created during skeleton creation, so we can only create the | ||
// VPIRBasicBlocks now during VPlan execution rather than earlier during VPlan | ||
// construction. | ||
BasicBlock *MiddleBB = State->CFG.ExitBB; | ||
VPBasicBlock *MiddleVPBB = getMiddleBlock(); | ||
BasicBlock *ScalarPh = MiddleBB->getSingleSuccessor(); | ||
replaceVPBBWithIRVPBB(getVectorPreheader(), VectorPreHeader); | ||
replaceVPBBWithIRVPBB(getMiddleBlock(), MiddleBB); | ||
replaceVPBBWithIRVPBB(getScalarPreheader(), ScalarPh); | ||
replaceVPBBWithIRVPBB(MiddleVPBB, MiddleBB); | ||
|
||
LLVM_DEBUG(dbgs() << "Executing best plan with VF=" << State->VF | ||
<< ", UF=" << getUF() << '\n'); | ||
setName("Final VPlan"); | ||
LLVM_DEBUG(dump()); | ||
|
||
LLVM_DEBUG(dbgs() << "Executing best plan with VF=" << State->VF | ||
<< ", UF=" << getUF() << '\n'); | ||
|
@@ -1062,9 +1082,6 @@ void VPlan::execute(VPTransformState *State) { | |
} | ||
|
||
State->CFG.DTU.flush(); | ||
assert(State->CFG.DTU.getDomTree().verify( | ||
DominatorTree::VerificationLevel::Fast) && | ||
"DT not preserved correctly"); | ||
} | ||
|
||
InstructionCost VPlan::cost(ElementCount VF, VPCostContext &Ctx) { | ||
|
@@ -1117,11 +1134,6 @@ void VPlan::print(raw_ostream &O) const { | |
|
||
printLiveIns(O); | ||
|
||
if (!getPreheader()->empty()) { | ||
O << "\n"; | ||
getPreheader()->print(O, "", SlotTracker); | ||
} | ||
|
||
ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<const VPBlockBase *>> | ||
RPOT(getEntry()); | ||
for (const VPBlockBase *Block : RPOT) { | ||
|
@@ -1155,6 +1167,21 @@ std::string VPlan::getName() const { | |
return Out; | ||
} | ||
|
||
VPRegionBlock *VPlan::getVectorLoopRegion() { | ||
// TODO: Cache if possible. | ||
for (VPBlockBase *B : vp_depth_first_shallow(getEntry())) | ||
if (auto *R = dyn_cast<VPRegionBlock>(B)) | ||
return R; | ||
return nullptr; | ||
Comment on lines
+1172
to
+1175
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Better cache the result? Can be done as follow-up. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Leave a TODO? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added thanks! |
||
} | ||
|
||
const VPRegionBlock *VPlan::getVectorLoopRegion() const { | ||
for (const VPBlockBase *B : vp_depth_first_shallow(getEntry())) | ||
if (auto *R = dyn_cast<VPRegionBlock>(B)) | ||
return R; | ||
return nullptr; | ||
} | ||
|
||
LLVM_DUMP_METHOD | ||
void VPlan::printDOT(raw_ostream &O) const { | ||
VPlanPrinter Printer(O, *this); | ||
|
@@ -1205,7 +1232,6 @@ static void remapOperands(VPBlockBase *Entry, VPBlockBase *NewEntry, | |
|
||
VPlan *VPlan::duplicate() { | ||
// Clone blocks. | ||
VPBasicBlock *NewPreheader = Preheader->clone(); | ||
const auto &[NewEntry, __] = cloneFrom(Entry); | ||
|
||
BasicBlock *ScalarHeaderIRBB = getScalarHeader()->getIRBasicBlock(); | ||
|
@@ -1215,8 +1241,7 @@ VPlan *VPlan::duplicate() { | |
return VPIRBB && VPIRBB->getIRBasicBlock() == ScalarHeaderIRBB; | ||
})); | ||
// Create VPlan, clone live-ins and remap operands in the cloned blocks. | ||
auto *NewPlan = | ||
new VPlan(NewPreheader, cast<VPBasicBlock>(NewEntry), NewScalarHeader); | ||
auto *NewPlan = new VPlan(cast<VPBasicBlock>(NewEntry), NewScalarHeader); | ||
DenseMap<VPValue *, VPValue *> Old2NewVPValues; | ||
for (VPValue *OldLiveIn : VPLiveInsToFree) { | ||
Old2NewVPValues[OldLiveIn] = | ||
|
@@ -1236,7 +1261,6 @@ VPlan *VPlan::duplicate() { | |
// else NewTripCount will be created and inserted into Old2NewVPValues when | ||
// TripCount is cloned. In any case NewPlan->TripCount is updated below. | ||
|
||
remapOperands(Preheader, NewPreheader, Old2NewVPValues); | ||
remapOperands(Entry, NewEntry, Old2NewVPValues); | ||
|
||
// Initialize remaining fields of cloned VPlan. | ||
|
@@ -1288,8 +1312,6 @@ void VPlanPrinter::dump() { | |
OS << "edge [fontname=Courier, fontsize=30]\n"; | ||
OS << "compound=true\n"; | ||
|
||
dumpBlock(Plan.getPreheader()); | ||
|
||
for (const VPBlockBase *Block : vp_depth_first_shallow(Plan.getEntry())) | ||
dumpBlock(Block); | ||
|
||
|
@@ -1550,7 +1572,6 @@ void VPSlotTracker::assignNames(const VPlan &Plan) { | |
assignName(Plan.BackedgeTakenCount); | ||
for (VPValue *LI : Plan.VPLiveInsToFree) | ||
assignName(LI); | ||
assignNames(Plan.getPreheader()); | ||
|
||
ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<const VPBlockBase *>> | ||
RPOT(VPBlockDeepTraversalWrapper<const VPBlockBase *>(Plan.getEntry())); | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ScalarPH is expected to be the other successor of PreVectorPH, so could be asserted (or another way to retrieve ScalarPH), although more general w/o this assert?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Added an assert for now, than