Skip to content

Commit

Permalink
[VPlan] Use ResumePhi to create reduction resume phis.
Browse files Browse the repository at this point in the history
Use VPInstruction::ResumePhi to create phi nodes for reduction resume
values.

This allows simplifying createAndCollectMergePhiForReduction to only
collect reduction resume phis when vectorizing epilogue loops and adding
extra incoming edges from the main vector loop.
  • Loading branch information
fhahn committed Oct 7, 2024
1 parent ee57a68 commit 10cc688
Show file tree
Hide file tree
Showing 4 changed files with 61 additions and 41 deletions.
82 changes: 41 additions & 41 deletions llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7435,23 +7435,31 @@ static void addRuntimeUnrollDisableMetaData(Loop *L) {
}

// Check if \p RedResult is a ComputeReductionResult instruction, and if it is
// create a merge phi node for it.
static void createAndCollectMergePhiForReduction(
VPInstruction *RedResult,
VPTransformState &State, Loop *OrigLoop, BasicBlock *LoopMiddleBlock,
bool VectorizingEpilogue) {
// create a merge phi node for it and add incoming values from the main vector
// loop.
static void updateAndCollectMergePhiForReductionForEpilogueVectorization(
VPInstruction *RedResult, VPTransformState &State, Loop *OrigLoop,
BasicBlock *LoopMiddleBlock, bool VectorizingEpilogue) {
if (!RedResult ||
RedResult->getOpcode() != VPInstruction::ComputeReductionResult)
return;

using namespace VPlanPatternMatch;
VPValue *ResumePhiVPV =
cast<VPInstruction>(*find_if(RedResult->users(), [](VPUser *U) {
return match(U, m_VPInstruction<VPInstruction::ResumePhi>(m_VPValue(),
m_VPValue()));
}));
auto *BCBlockPhi = cast<PHINode>(State.get(ResumePhiVPV, true));
auto *PhiR = cast<VPReductionPHIRecipe>(RedResult->getOperand(0));
const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor();
if (!VectorizingEpilogue)
return;

Value *FinalValue = State.get(RedResult, VPLane(VPLane::getFirstLane()));
auto *ResumePhi =
dyn_cast<PHINode>(PhiR->getStartValue()->getUnderlyingValue());
if (VectorizingEpilogue && RecurrenceDescriptor::isAnyOfRecurrenceKind(
RdxDesc.getRecurrenceKind())) {
if (RecurrenceDescriptor::isAnyOfRecurrenceKind(
RdxDesc.getRecurrenceKind())) {
auto *Cmp = cast<ICmpInst>(PhiR->getStartValue()->getUnderlyingValue());
assert(Cmp->getPredicate() == CmpInst::ICMP_NE);
assert(Cmp->getOperand(1) == RdxDesc.getRecurrenceStartValue());
Expand All @@ -7461,40 +7469,15 @@ static void createAndCollectMergePhiForReduction(
"when vectorizing the epilogue loop, we need a resume phi from main "
"vector loop");

// TODO: bc.merge.rdx should not be created here, instead it should be
// modeled in VPlan.
BasicBlock *LoopScalarPreHeader = OrigLoop->getLoopPreheader();
// Create a phi node that merges control-flow from the backedge-taken check
// block and the middle block.
auto *BCBlockPhi =
PHINode::Create(FinalValue->getType(), 2, "bc.merge.rdx",
LoopScalarPreHeader->getTerminator()->getIterator());

// If we are fixing reductions in the epilogue loop then we should already
// have created a bc.merge.rdx Phi after the main vector body. Ensure that
// we carry over the incoming values correctly.
for (auto *Incoming : predecessors(LoopScalarPreHeader)) {
if (Incoming == LoopMiddleBlock)
BCBlockPhi->addIncoming(FinalValue, Incoming);
else if (ResumePhi && is_contained(ResumePhi->blocks(), Incoming))
BCBlockPhi->addIncoming(ResumePhi->getIncomingValueForBlock(Incoming),
Incoming);
else
BCBlockPhi->addIncoming(RdxDesc.getRecurrenceStartValue(), Incoming);
if (ResumePhi && is_contained(ResumePhi->blocks(), Incoming))
BCBlockPhi->setIncomingValueForBlock(
Incoming, ResumePhi->getIncomingValueForBlock(Incoming));
}

auto *OrigPhi = cast<PHINode>(PhiR->getUnderlyingValue());
// TODO: This fixup should instead be modeled in VPlan.
// Fix the scalar loop reduction variable with the incoming reduction sum
// from the vector body and from the backedge value.
int IncomingEdgeBlockIdx =
OrigPhi->getBasicBlockIndex(OrigLoop->getLoopLatch());
assert(IncomingEdgeBlockIdx >= 0 && "Invalid block index");
// Pick the other block.
int SelfEdgeBlockIdx = (IncomingEdgeBlockIdx ? 0 : 1);
OrigPhi->setIncomingValue(SelfEdgeBlockIdx, BCBlockPhi);
Instruction *LoopExitInst = RdxDesc.getLoopExitInstr();
OrigPhi->setIncomingValue(IncomingEdgeBlockIdx, LoopExitInst);
}

DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
Expand Down Expand Up @@ -7585,11 +7568,12 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan(
// 2.5 Collect reduction resume values.
auto *ExitVPBB =
cast<VPBasicBlock>(BestVPlan.getVectorLoopRegion()->getSingleSuccessor());
for (VPRecipeBase &R : *ExitVPBB) {
createAndCollectMergePhiForReduction(
dyn_cast<VPInstruction>(&R), State, OrigLoop,
State.CFG.VPBB2IRBB[ExitVPBB], ExpandedSCEVs);
}
if (IsEpilogueVectorization)
for (VPRecipeBase &R : *ExitVPBB) {
updateAndCollectMergePhiForReductionForEpilogueVectorization(
dyn_cast<VPInstruction>(&R), State, OrigLoop,
State.CFG.VPBB2IRBB[ExitVPBB], ExpandedSCEVs);
}

// 2.6. Maintain Loop Hints
// Keep all loop hints from the original loop on the vector loop (we'll
Expand Down Expand Up @@ -9383,6 +9367,22 @@ void LoopVectorizationPlanner::adjustRecipesForReductions(
});
FinalReductionResult->insertBefore(*MiddleVPBB, IP);

VPBasicBlock *ScalarPHVPBB = nullptr;
if (MiddleVPBB->getNumSuccessors() == 2) {
// Order is strict: first is the exit block, second is the scalar
// preheader.
ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSuccessors()[1]);
} else {
ScalarPHVPBB = cast<VPBasicBlock>(MiddleVPBB->getSingleSuccessor());
}

VPBuilder ScalarPHBuilder(ScalarPHVPBB);
auto *ResumePhiRecipe = ScalarPHBuilder.createNaryOp(
VPInstruction::ResumePhi, {FinalReductionResult, PhiR->getStartValue()},
{}, "bc.merge.rdx");
auto *RedPhi = cast<PHINode>(PhiR->getUnderlyingInstr());
Plan->addLiveOut(RedPhi, ResumePhiRecipe);

// Adjust AnyOf reductions; replace the reduction phi for the selected value
// with a boolean reduction phi node to check if the condition is true in
// any iteration. The final value is selected by the final
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,10 @@ define i32 @reduction(ptr %a, i64 %n, i32 %start) {
; IF-EVL-INLOOP-NEXT: No successors
; IF-EVL-INLOOP-EMPTY:
; IF-EVL-INLOOP-NEXT: scalar.ph:
; IF-EVL-INLOOP-NEXT: EMIT vp<[[RED_RESUME:%.+]]> = resume-phi vp<[[RDX]]>, ir<%start>
; IF-EVL-INLOOP-NEXT: No successors
; IF-EVL-INLOOP-EMPTY:
; IF-EVL-INLOOP-NEXT: Live-out i32 %rdx = vp<[[RED_RESUME]]>
; IF-EVL-INLOOP-NEXT: }
;

Expand Down Expand Up @@ -104,7 +107,10 @@ define i32 @reduction(ptr %a, i64 %n, i32 %start) {
; NO-VP-OUTLOOP-NEXT: No successors
; NO-VP-OUTLOOP-EMPTY:
; NO-VP-OUTLOOP-NEXT: scalar.ph:
; NO-VP-OUTLOOP-NEXT: EMIT vp<[[RED_RESUME:%.+]]> = resume-phi vp<[[RDX]]>, ir<%start>
; NO-VP-OUTLOOP-NEXT: No successors
; NO-VP-OUTLOOP-EMPTY:
; NO-VP-OUTLOOP-NEXT: Live-out i32 %rdx = vp<[[RED_RESUME]]>
; NO-VP-OUTLOOP-NEXT: }
;

Expand Down Expand Up @@ -143,7 +149,10 @@ define i32 @reduction(ptr %a, i64 %n, i32 %start) {
; NO-VP-INLOOP-NEXT: No successors
; NO-VP-INLOOP-EMPTY:
; NO-VP-INLOOP-NEXT: scalar.ph:
; NO-VP-INLOOP-NEXT: EMIT vp<[[RED_RESUME:%.+]]> = resume-phi vp<[[RDX]]>, ir<%start>
; NO-VP-INLOOP-NEXT: No successors
; NO-VP-INLOOP-EMPTY:
; NO-VP-INLOOP-NEXT: Live-out i32 %rdx = vp<[[RED_RESUME]]>
; NO-VP-INLOOP-NEXT: }
;
entry:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -232,9 +232,11 @@ define i32 @sink_replicate_region_3_reduction(i32 %x, i8 %y, ptr %ptr) optsize {
; CHECK-EMPTY:
; CHECK-NEXT: scalar.ph
; CHECK-NEXT: EMIT vp<[[RESUME_1_P:%.*]]> = resume-phi vp<[[RESUME_1]]>, ir<0>
; CHECK-NEXT: EMIT vp<[[RESUME_RED:%.+]]> = resume-phi vp<[[RED_RES]]>, ir<1234>
; CHECK-NEXT: No successors
; CHECK-EMPTY:
; CHECK-NEXT: Live-out i32 %recur = vp<[[RESUME_1_P]]>
; CHECK-NEXT: Live-out i32 %and.red = vp<[[RESUME_RED]]>
; CHECK-NEXT: }
;
entry:
Expand Down
9 changes: 9 additions & 0 deletions llvm/test/Transforms/LoopVectorize/vplan-printing.ll
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,10 @@ define float @print_reduction(i64 %n, ptr noalias %y) {
; CHECK-NEXT: No successors
; CHECK-EMPTY:
; CHECK-NEXT: scalar.ph
; CHECK-NEXT: EMIT vp<[[RED_RESUME:%.+]]> = resume-phi vp<[[RED_RES]]>, ir<0.000000e+00>
; CHECK-NEXT: No successors
; CHECK-EMPTY:
; CHECK-NEXT: Live-out float %red = vp<[[RED_RESUME]]>
; CHECK-NEXT: }
;
entry:
Expand Down Expand Up @@ -221,7 +224,10 @@ define void @print_reduction_with_invariant_store(i64 %n, ptr noalias %y, ptr no
; CHECK-NEXT: No successors
; CHECK-EMPTY:
; CHECK-NEXT: scalar.ph
; CHECK-NEXT: EMIT vp<[[RED_RESUME:%.+]]> = resume-phi vp<[[RED_RES]]>, ir<0.000000e+00>
; CHECK-NEXT: No successors
; CHECK-EMPTY:
; CHECK-NEXT: Live-out float %red = vp<[[RED_RESUME]]>
; CHECK-NEXT: }
;
entry:
Expand Down Expand Up @@ -447,7 +453,10 @@ define float @print_fmuladd_strict(ptr %a, ptr %b, i64 %n) {
; CHECK-NEXT: No successors
; CHECK-EMPTY:
; CHECK-NEXT: scalar.ph
; CHECK-NEXT: EMIT vp<[[RED_RESUME:%.+]]> = resume-phi vp<[[RED_RES]]>, ir<0.000000e+00>
; CHECK-NEXT: No successors
; CHECK-EMPTY:
; CHECK-NEXT: Live-out float %sum.07 = vp<[[RED_RESUME]]>
; CHECK-NEXT:}

entry:
Expand Down

0 comments on commit 10cc688

Please sign in to comment.