Skip to content

Commit

Permalink
[VPlan] Hook IR blocks into VPlan during skeleton creation (NFC)
Browse files Browse the repository at this point in the history
As a first step to move towards modeling the full skeleton in VPlan,
start by wrapping IR blocks created during legacy skeleton creation in
VPIRBasicBlocks and hook them into the VPlan. This means the skeleton
CFG is represented in VPlan, just before execute. This allows moving
parts of skeleton creation into recipes in the VPBBs gradually.

Note that this allows retiring some manual DT updates, as this will be
handled automatically during VPlan execution.
  • Loading branch information
fhahn committed Nov 4, 2024
1 parent 45ae7d1 commit 1b89761
Show file tree
Hide file tree
Showing 11 changed files with 167 additions and 123 deletions.
76 changes: 53 additions & 23 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2426,6 +2426,26 @@ InnerLoopVectorizer::getOrCreateVectorTripCount(BasicBlock *InsertBlock) {
return VectorTripCount;
}

static void connectScalarPreheaderInVPlan(VPlan &Plan) {
VPBlockBase *VectorPH = Plan.getVectorPreheader();
VPBlockBase *ScalarPH = Plan.getScalarPreheader();
VPBlockBase *PredVPB = VectorPH->getSinglePredecessor();
VPBlockUtils::disconnectBlocks(Plan.getEntry(), VectorPH);
VPBlockUtils::connectBlocks(PredVPB, ScalarPH);
VPBlockUtils::connectBlocks(PredVPB, VectorPH);
}

static void connectCheckBlockInVPlan(VPlan &Plan, BasicBlock *CheckIRBB) {
VPBlockBase *ScalarPH = Plan.getScalarPreheader();
VPBlockBase *VectorPH = Plan.getVectorPreheader();
VPBlockBase *PredVPB = VectorPH->getSinglePredecessor();
VPBlockUtils::disconnectBlocks(PredVPB, VectorPH);
VPIRBasicBlock *CheckVPIRBB = VPIRBasicBlock::fromBasicBlock(CheckIRBB);
VPBlockUtils::connectBlocks(PredVPB, CheckVPIRBB);
VPBlockUtils::connectBlocks(CheckVPIRBB, ScalarPH);
VPBlockUtils::connectBlocks(CheckVPIRBB, VectorPH);
}

void InnerLoopVectorizer::emitIterationCountCheck(BasicBlock *Bypass) {
Value *Count = getTripCount();
// Reuse existing vector loop preheader for TC checks.
Expand Down Expand Up @@ -2511,13 +2531,14 @@ void InnerLoopVectorizer::emitIterationCountCheck(BasicBlock *Bypass) {
"TC check is expected to dominate Bypass");

// Update dominator for Bypass & LoopExit (if needed).
DT->changeImmediateDominator(Bypass, TCCheckBlock);
BranchInst &BI =
*BranchInst::Create(Bypass, LoopVectorPreHeader, CheckMinIters);
if (hasBranchWeightMD(*OrigLoop->getLoopLatch()->getTerminator()))
setBranchWeights(BI, MinItersBypassWeights, /*IsExpected=*/false);
ReplaceInstWithInst(TCCheckBlock->getTerminator(), &BI);
LoopBypassBlocks.push_back(TCCheckBlock);

connectScalarPreheaderInVPlan(Plan);
}

BasicBlock *InnerLoopVectorizer::emitSCEVChecks(BasicBlock *Bypass) {
Expand All @@ -2534,6 +2555,8 @@ BasicBlock *InnerLoopVectorizer::emitSCEVChecks(BasicBlock *Bypass) {
"Should already be a bypass block due to iteration count check");
LoopBypassBlocks.push_back(SCEVCheckBlock);
AddedSafetyChecks = true;

connectCheckBlockInVPlan(Plan, SCEVCheckBlock);
return SCEVCheckBlock;
}

Expand Down Expand Up @@ -2570,6 +2593,7 @@ BasicBlock *InnerLoopVectorizer::emitMemRuntimeChecks(BasicBlock *Bypass) {

AddedSafetyChecks = true;

connectCheckBlockInVPlan(Plan, MemCheckBlock);
return MemCheckBlock;
}

Expand Down Expand Up @@ -7648,10 +7672,10 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(

// 0. Generate SCEV-dependent code into the preheader, including TripCount,
// before making any changes to the CFG.
if (!BestVPlan.getPreheader()->empty()) {
if (!BestVPlan.getEntry()->empty()) {
State.CFG.PrevBB = OrigLoop->getLoopPreheader();
State.Builder.SetInsertPoint(OrigLoop->getLoopPreheader()->getTerminator());
BestVPlan.getPreheader()->execute(&State);
BestVPlan.getEntry()->execute(&State);
}
if (!ILV.getTripCount())
ILV.setTripCount(State.get(BestVPlan.getTripCount(), VPLane(0)));
Expand Down Expand Up @@ -7859,8 +7883,6 @@ EpilogueVectorizerMainLoop::emitIterationCountCheck(BasicBlock *Bypass,
DT->getNode(Bypass)->getIDom()) &&
"TC check is expected to dominate Bypass");

// Update dominator for Bypass.
DT->changeImmediateDominator(Bypass, TCCheckBlock);
LoopBypassBlocks.push_back(TCCheckBlock);

// Save the trip count so we don't have to regenerate it in the
Expand All @@ -7875,6 +7897,12 @@ EpilogueVectorizerMainLoop::emitIterationCountCheck(BasicBlock *Bypass,
setBranchWeights(BI, MinItersBypassWeights, /*IsExpected=*/false);
ReplaceInstWithInst(TCCheckBlock->getTerminator(), &BI);

VPBlockBase *VectorPH = Plan.getVectorPreheader();
VPBlockBase *PredVPB = VectorPH->getSinglePredecessor();
if (PredVPB->getNumSuccessors() == 1)
connectScalarPreheaderInVPlan(Plan);
else
connectCheckBlockInVPlan(Plan, TCCheckBlock);
return TCCheckBlock;
}

Expand Down Expand Up @@ -7905,32 +7933,19 @@ EpilogueVectorizerEpilogueLoop::createEpilogueVectorizedLoopSkeleton(
EPI.MainLoopIterationCountCheck->getTerminator()->replaceUsesOfWith(
VecEpilogueIterationCountCheck, LoopVectorPreHeader);

DT->changeImmediateDominator(LoopVectorPreHeader,
EPI.MainLoopIterationCountCheck);

EPI.EpilogueIterationCountCheck->getTerminator()->replaceUsesOfWith(
VecEpilogueIterationCountCheck, LoopScalarPreHeader);

if (EPI.SCEVSafetyCheck)
EPI.SCEVSafetyCheck->getTerminator()->replaceUsesOfWith(
VecEpilogueIterationCountCheck, LoopScalarPreHeader);
if (EPI.MemSafetyCheck)
if (EPI.MemSafetyCheck) {
EPI.MemSafetyCheck->getTerminator()->replaceUsesOfWith(
VecEpilogueIterationCountCheck, LoopScalarPreHeader);

DT->changeImmediateDominator(
VecEpilogueIterationCountCheck,
VecEpilogueIterationCountCheck->getSinglePredecessor());
}

DT->changeImmediateDominator(LoopScalarPreHeader,
EPI.EpilogueIterationCountCheck);
if (!Cost->requiresScalarEpilogue(EPI.EpilogueVF.isVector()))
// If there is an epilogue which must run, there's no edge from the
// middle block to exit blocks and thus no need to update the immediate
// dominator of the exit blocks.
DT->changeImmediateDominator(LoopExitBlock,
EPI.EpilogueIterationCountCheck);

// Keep track of bypass blocks, as they feed start values to the induction and
// reduction phis in the scalar loop preheader.
if (EPI.SCEVSafetyCheck)
Expand Down Expand Up @@ -8033,6 +8048,20 @@ EpilogueVectorizerEpilogueLoop::emitMinimumVectorEpilogueIterCountCheck(
}
ReplaceInstWithInst(Insert->getTerminator(), &BI);
LoopBypassBlocks.push_back(Insert);

// A new entry block has been created for the epilogue VPlan. Hook it in.
VPIRBasicBlock *NewEntry = VPIRBasicBlock::fromBasicBlock(Insert);
VPBasicBlock *OldEntry = Plan.getEntry();
VPBlockUtils::reassociateBlocks(OldEntry, NewEntry);
Plan.setEntry(NewEntry);
for (auto &R : make_early_inc_range(*NewEntry)) {
auto *VPIR = dyn_cast<VPIRInstruction>(&R);
if (!VPIR || !isa<PHINode>(VPIR->getInstruction()))
break;
VPIR->eraseFromParent();
}

connectScalarPreheaderInVPlan(Plan);
return Insert;
}

Expand Down Expand Up @@ -10256,7 +10285,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
// should be removed once induction resume value creation is done
// directly in VPlan.
EpilogILV.setTripCount(MainILV.getTripCount());
for (auto &R : make_early_inc_range(*BestEpiPlan.getPreheader())) {
for (auto &R : make_early_inc_range(*BestEpiPlan.getEntry())) {
auto *ExpandR = dyn_cast<VPExpandSCEVRecipe>(&R);
if (!ExpandR)
continue;
Expand Down Expand Up @@ -10316,8 +10345,6 @@ bool LoopVectorizePass::processLoop(Loop *L) {
cast<VPHeaderPHIRecipe>(&R)->setStartValue(StartVal);
}

assert(DT->verify(DominatorTree::VerificationLevel::Fast) &&
"DT not preserved correctly");
LVP.executePlan(EPI.EpilogueVF, EPI.EpilogueUF, BestEpiPlan, EpilogILV,
DT, true, &ExpandedSCEVs);
++LoopsEpilogueVectorized;
Expand Down Expand Up @@ -10345,6 +10372,9 @@ bool LoopVectorizePass::processLoop(Loop *L) {
checkMixedPrecision(L, ORE);
}

assert(DT->verify(DominatorTree::VerificationLevel::Fast) &&
"DT not preserved correctly");

std::optional<MDNode *> RemainderLoopID =
makeFollowupLoopID(OrigLoopID, {LLVMLoopVectorizeFollowupAll,
LLVMLoopVectorizeFollowupEpilogue});
Expand Down
67 changes: 41 additions & 26 deletions llvm/lib/Transforms/Vectorize/VPlan.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -170,9 +170,8 @@ VPBasicBlock *VPBlockBase::getEntryBasicBlock() {
}

void VPBlockBase::setPlan(VPlan *ParentPlan) {
assert(
(ParentPlan->getEntry() == this || ParentPlan->getPreheader() == this) &&
"Can only set plan on its entry or preheader block.");
assert(ParentPlan->getEntry() == this &&
"Can only set plan on its entry or preheader block.");
Plan = ParentPlan;
}

Expand Down Expand Up @@ -463,7 +462,6 @@ void VPIRBasicBlock::execute(VPTransformState *State) {
(getNumSuccessors() == 0 || isa<BranchInst>(IRBB->getTerminator())) &&
"other blocks must be terminated by a branch");
}

for (VPBlockBase *PredVPBlock : getHierarchicalPredecessors()) {
VPBasicBlock *PredVPBB = PredVPBlock->getExitingBasicBlock();
BasicBlock *PredBB = State->CFG.VPBB2IRBB[PredVPBB];
Expand Down Expand Up @@ -851,9 +849,6 @@ VPlan::~VPlan() {
Block->dropAllReferences(&DummyValue);

VPBlockBase::deleteCFG(Entry);

Preheader->dropAllReferences(&DummyValue);
delete Preheader;
}
for (VPValue *VPV : VPLiveInsToFree)
delete VPV;
Expand All @@ -876,9 +871,10 @@ VPlanPtr VPlan::createInitialVPlan(Type *InductionTy,
VPIRBasicBlock *Entry =
VPIRBasicBlock::fromBasicBlock(TheLoop->getLoopPreheader());
VPBasicBlock *VecPreheader = new VPBasicBlock("vector.ph");
VPBlockUtils::connectBlocks(Entry, VecPreheader);
VPIRBasicBlock *ScalarHeader =
VPIRBasicBlock::fromBasicBlock(TheLoop->getHeader());
auto Plan = std::make_unique<VPlan>(Entry, VecPreheader, ScalarHeader);
auto Plan = std::make_unique<VPlan>(Entry, ScalarHeader);

// Create SCEV and VPValue for the trip count.

Expand Down Expand Up @@ -1021,8 +1017,9 @@ void VPlan::execute(VPTransformState *State) {
BasicBlock *VectorPreHeader = State->CFG.PrevBB;
State->Builder.SetInsertPoint(VectorPreHeader->getTerminator());

// Disconnect VectorPreHeader from ExitBB in both the CFG and DT.
cast<BranchInst>(VectorPreHeader->getTerminator())->setSuccessor(0, nullptr);
replaceVPBBWithIRVPBB(
cast<VPBasicBlock>(getVectorLoopRegion()->getSinglePredecessor()),
VectorPreHeader);
State->CFG.DTU.applyUpdates(
{{DominatorTree::Delete, VectorPreHeader, State->CFG.ExitBB}});

Expand All @@ -1049,8 +1046,10 @@ void VPlan::execute(VPTransformState *State) {
State->CFG.DTU.applyUpdates(
{{DominatorTree::Delete, ScalarPh, ScalarPh->getSingleSuccessor()}});

ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<VPBlockBase *>> RPOT(
Entry);
// Generate code in the loop pre-header and body.
for (VPBlockBase *Block : vp_depth_first_shallow(Entry))
for (VPBlockBase *Block : make_range(RPOT.begin(), RPOT.end()))
Block->execute(State);

VPBasicBlock *LatchVPBB = getVectorLoopRegion()->getExitingBasicBlock();
Expand Down Expand Up @@ -1101,9 +1100,6 @@ void VPlan::execute(VPTransformState *State) {
}

State->CFG.DTU.flush();
assert(State->CFG.DTU.getDomTree().verify(
DominatorTree::VerificationLevel::Fast) &&
"DT not preserved correctly");
}

InstructionCost VPlan::cost(ElementCount VF, VPCostContext &Ctx) {
Expand Down Expand Up @@ -1156,12 +1152,10 @@ void VPlan::print(raw_ostream &O) const {

printLiveIns(O);

if (!getPreheader()->empty()) {
O << "\n";
getPreheader()->print(O, "", SlotTracker);
}
ReversePostOrderTraversal<VPBlockShallowTraversalWrapper<const VPBlockBase *>>
RPOT(getEntry());

for (const VPBlockBase *Block : vp_depth_first_shallow(getEntry())) {
for (const VPBlockBase *Block : RPOT) {
O << '\n';
Block->print(O, "", SlotTracker);
}
Expand Down Expand Up @@ -1192,6 +1186,20 @@ std::string VPlan::getName() const {
return Out;
}

VPRegionBlock *VPlan::getVectorLoopRegion() {
for (VPBlockBase *B : vp_depth_first_shallow(getEntry()))
if (auto *R = dyn_cast<VPRegionBlock>(B))
return R;
return nullptr;
}

const VPRegionBlock *VPlan::getVectorLoopRegion() const {
for (const VPBlockBase *B : vp_depth_first_shallow(getEntry()))
if (auto *R = dyn_cast<VPRegionBlock>(B))
return R;
return nullptr;
}

LLVM_DUMP_METHOD
void VPlan::printDOT(raw_ostream &O) const {
VPlanPrinter Printer(O, *this);
Expand Down Expand Up @@ -1242,7 +1250,6 @@ static void remapOperands(VPBlockBase *Entry, VPBlockBase *NewEntry,

VPlan *VPlan::duplicate() {
// Clone blocks.
VPBasicBlock *NewPreheader = Preheader->clone();
const auto &[NewEntry, __] = cloneFrom(Entry);

BasicBlock *ScalarHeaderIRBB = getScalarHeader()->getIRBasicBlock();
Expand All @@ -1252,8 +1259,7 @@ VPlan *VPlan::duplicate() {
return VPIRBB && VPIRBB->getIRBasicBlock() == ScalarHeaderIRBB;
}));
// Create VPlan, clone live-ins and remap operands in the cloned blocks.
auto *NewPlan =
new VPlan(NewPreheader, cast<VPBasicBlock>(NewEntry), NewScalarHeader);
auto *NewPlan = new VPlan(cast<VPBasicBlock>(NewEntry), NewScalarHeader);
DenseMap<VPValue *, VPValue *> Old2NewVPValues;
for (VPValue *OldLiveIn : VPLiveInsToFree) {
Old2NewVPValues[OldLiveIn] =
Expand All @@ -1273,7 +1279,6 @@ VPlan *VPlan::duplicate() {
// else NewTripCount will be created and inserted into Old2NewVPValues when
// TripCount is cloned. In any case NewPlan->TripCount is updated below.

remapOperands(Preheader, NewPreheader, Old2NewVPValues);
remapOperands(Entry, NewEntry, Old2NewVPValues);

// Initialize remaining fields of cloned VPlan.
Expand All @@ -1287,6 +1292,19 @@ VPlan *VPlan::duplicate() {
return NewPlan;
}

VPBasicBlock *VPlan::getScalarPreheader() {
auto *MiddleVPBB =
cast<VPBasicBlock>(getVectorLoopRegion()->getSingleSuccessor());
if (MiddleVPBB->getNumSuccessors() == 2) {
// Order is strict: first is the exit block, second is the scalar preheader.
return cast<VPBasicBlock>(MiddleVPBB->getSuccessors()[1]);
}
if (auto *IRVPBB = dyn_cast<VPBasicBlock>(MiddleVPBB->getSingleSuccessor()))
return IRVPBB;

return nullptr;
}

#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)

Twine VPlanPrinter::getUID(const VPBlockBase *Block) {
Expand Down Expand Up @@ -1325,8 +1343,6 @@ void VPlanPrinter::dump() {
OS << "edge [fontname=Courier, fontsize=30]\n";
OS << "compound=true\n";

dumpBlock(Plan.getPreheader());

for (const VPBlockBase *Block : vp_depth_first_shallow(Plan.getEntry()))
dumpBlock(Block);

Expand Down Expand Up @@ -1587,7 +1603,6 @@ void VPSlotTracker::assignNames(const VPlan &Plan) {
assignName(Plan.BackedgeTakenCount);
for (VPValue *LI : Plan.VPLiveInsToFree)
assignName(LI);
assignNames(Plan.getPreheader());

ReversePostOrderTraversal<VPBlockDeepTraversalWrapper<const VPBlockBase *>>
RPOT(VPBlockDeepTraversalWrapper<const VPBlockBase *>(Plan.getEntry()));
Expand Down
Loading

0 comments on commit 1b89761

Please sign in to comment.