-
Notifications
You must be signed in to change notification settings - Fork 12.4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[VPlan] Use ResumePhi to create reduction resume phis. #110004
Changes from 10 commits
d3614bc
3f64e10
8337aa6
baf5aac
4e652bd
3852344
99af3f5
e631981
5fc79d8
1d34cd8
e4a8918
60c8fae
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
@@ -7562,67 +7562,62 @@ static void addRuntimeUnrollDisableMetaData(Loop *L) { | |||||||||||||||
} | ||||||||||||||||
} | ||||||||||||||||
|
||||||||||||||||
// Check if \p RedResult is a ComputeReductionResult instruction, and if it is | ||||||||||||||||
// create a merge phi node for it. | ||||||||||||||||
static void createAndCollectMergePhiForReduction( | ||||||||||||||||
VPInstruction *RedResult, | ||||||||||||||||
VPTransformState &State, Loop *OrigLoop, BasicBlock *LoopMiddleBlock, | ||||||||||||||||
bool VectorizingEpilogue) { | ||||||||||||||||
if (!RedResult || | ||||||||||||||||
RedResult->getOpcode() != VPInstruction::ComputeReductionResult) | ||||||||||||||||
// If \p R is a ComputeReductionResult when vectorizing the epilog loop, | ||||||||||||||||
// fix the reduction's scalar PHI node by adding the incoming value from the | ||||||||||||||||
// main vector loop. | ||||||||||||||||
static void fixReductionScalarResumeWhenVectorizingEpilog( | ||||||||||||||||
VPRecipeBase *R, VPTransformState &State, Loop *OrigLoop, | ||||||||||||||||
BasicBlock *LoopMiddleBlock) { | ||||||||||||||||
auto *EpiRedResult = dyn_cast<VPInstruction>(R); | ||||||||||||||||
if (!EpiRedResult || | ||||||||||||||||
EpiRedResult->getOpcode() != VPInstruction::ComputeReductionResult) | ||||||||||||||||
return; | ||||||||||||||||
|
||||||||||||||||
auto *PhiR = cast<VPReductionPHIRecipe>(RedResult->getOperand(0)); | ||||||||||||||||
const RecurrenceDescriptor &RdxDesc = PhiR->getRecurrenceDescriptor(); | ||||||||||||||||
|
||||||||||||||||
Value *FinalValue = State.get(RedResult, VPLane(VPLane::getFirstLane())); | ||||||||||||||||
auto *ResumePhi = | ||||||||||||||||
dyn_cast<PHINode>(PhiR->getStartValue()->getUnderlyingValue()); | ||||||||||||||||
if (VectorizingEpilogue && RecurrenceDescriptor::isAnyOfRecurrenceKind( | ||||||||||||||||
RdxDesc.getRecurrenceKind())) { | ||||||||||||||||
auto *Cmp = cast<ICmpInst>(PhiR->getStartValue()->getUnderlyingValue()); | ||||||||||||||||
assert(Cmp->getPredicate() == CmpInst::ICMP_NE); | ||||||||||||||||
assert(Cmp->getOperand(1) == RdxDesc.getRecurrenceStartValue()); | ||||||||||||||||
ResumePhi = cast<PHINode>(Cmp->getOperand(0)); | ||||||||||||||||
} | ||||||||||||||||
assert((!VectorizingEpilogue || ResumePhi) && | ||||||||||||||||
"when vectorizing the epilogue loop, we need a resume phi from main " | ||||||||||||||||
"vector loop"); | ||||||||||||||||
|
||||||||||||||||
// TODO: bc.merge.rdx should not be created here, instead it should be | ||||||||||||||||
// modeled in VPlan. | ||||||||||||||||
auto *EpiRedHeaderPhi = | ||||||||||||||||
cast<VPReductionPHIRecipe>(EpiRedResult->getOperand(0)); | ||||||||||||||||
const RecurrenceDescriptor &RdxDesc = | ||||||||||||||||
EpiRedHeaderPhi->getRecurrenceDescriptor(); | ||||||||||||||||
Value *MainResumeValue = | ||||||||||||||||
EpiRedHeaderPhi->getStartValue()->getUnderlyingValue(); | ||||||||||||||||
if (RecurrenceDescriptor::isAnyOfRecurrenceKind( | ||||||||||||||||
RdxDesc.getRecurrenceKind())) { | ||||||||||||||||
auto *Cmp = cast<ICmpInst>(MainResumeValue); | ||||||||||||||||
assert(Cmp->getPredicate() == CmpInst::ICMP_NE && | ||||||||||||||||
"AnyOf expected to start with ICMP_NE"); | ||||||||||||||||
assert(Cmp->getOperand(1) == RdxDesc.getRecurrenceStartValue() && | ||||||||||||||||
"AnyOf expected to start by comparing main resume value to original " | ||||||||||||||||
"start value"); | ||||||||||||||||
MainResumeValue = Cmp->getOperand(0); | ||||||||||||||||
} | ||||||||||||||||
PHINode *MainResumePhi = cast<PHINode>(MainResumeValue); | ||||||||||||||||
|
||||||||||||||||
// When fixing reductions in the epilogue loop we should already have | ||||||||||||||||
// created a bc.merge.rdx Phi after the main vector body. Ensure that we carry | ||||||||||||||||
// over the incoming values correctly. | ||||||||||||||||
using namespace VPlanPatternMatch; | ||||||||||||||||
auto IsResumePhi = [](VPUser *U) { | ||||||||||||||||
return match( | ||||||||||||||||
U, m_VPInstruction<VPInstruction::ResumePhi>(m_VPValue(), m_VPValue())); | ||||||||||||||||
}; | ||||||||||||||||
assert(count_if(EpiRedResult->users(), IsResumePhi) == 1 && | ||||||||||||||||
"ResumePhi must have a single user"); | ||||||||||||||||
auto *EpiResumePhiVPI = | ||||||||||||||||
cast<VPInstruction>(*find_if(EpiRedResult->users(), IsResumePhi)); | ||||||||||||||||
auto *EpiResumePhi = cast<PHINode>(State.get(EpiResumePhiVPI, true)); | ||||||||||||||||
BasicBlock *LoopScalarPreHeader = OrigLoop->getLoopPreheader(); | ||||||||||||||||
// Create a phi node that merges control-flow from the backedge-taken check | ||||||||||||||||
// block and the middle block. | ||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nit: what is "backedge-taken check block", abbreviated in "BCBlock"? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not entirely sure, but I think this meant the iterations checks. |
||||||||||||||||
auto *BCBlockPhi = | ||||||||||||||||
PHINode::Create(FinalValue->getType(), 2, "bc.merge.rdx", | ||||||||||||||||
LoopScalarPreHeader->getTerminator()->getIterator()); | ||||||||||||||||
|
||||||||||||||||
// If we are fixing reductions in the epilogue loop then we should already | ||||||||||||||||
// have created a bc.merge.rdx Phi after the main vector body. Ensure that | ||||||||||||||||
// we carry over the incoming values correctly. | ||||||||||||||||
unsigned UpdateCnt = 0; | ||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||||||||||
for (auto *Incoming : predecessors(LoopScalarPreHeader)) { | ||||||||||||||||
if (Incoming == LoopMiddleBlock) | ||||||||||||||||
BCBlockPhi->addIncoming(FinalValue, Incoming); | ||||||||||||||||
else if (ResumePhi && is_contained(ResumePhi->blocks(), Incoming)) | ||||||||||||||||
BCBlockPhi->addIncoming(ResumePhi->getIncomingValueForBlock(Incoming), | ||||||||||||||||
Incoming); | ||||||||||||||||
else | ||||||||||||||||
BCBlockPhi->addIncoming(RdxDesc.getRecurrenceStartValue(), Incoming); | ||||||||||||||||
if (is_contained(MainResumePhi->blocks(), Incoming)) { | ||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If we expect at-most one such Incoming, worth asserting? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Updated, thanks! |
||||||||||||||||
assert(EpiResumePhi->getIncomingValueForBlock(Incoming) == | ||||||||||||||||
RdxDesc.getRecurrenceStartValue() && | ||||||||||||||||
"Trying to reset unexpected value"); | ||||||||||||||||
EpiResumePhi->setIncomingValueForBlock( | ||||||||||||||||
Incoming, MainResumePhi->getIncomingValueForBlock(Incoming)); | ||||||||||||||||
UpdateCnt++; | ||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||||||||||
} | ||||||||||||||||
} | ||||||||||||||||
|
||||||||||||||||
auto *OrigPhi = cast<PHINode>(PhiR->getUnderlyingValue()); | ||||||||||||||||
// TODO: This fixup should instead be modeled in VPlan. | ||||||||||||||||
// Fix the scalar loop reduction variable with the incoming reduction sum | ||||||||||||||||
// from the vector body and from the backedge value. | ||||||||||||||||
int IncomingEdgeBlockIdx = | ||||||||||||||||
OrigPhi->getBasicBlockIndex(OrigLoop->getLoopLatch()); | ||||||||||||||||
assert(IncomingEdgeBlockIdx >= 0 && "Invalid block index"); | ||||||||||||||||
// Pick the other block. | ||||||||||||||||
int SelfEdgeBlockIdx = (IncomingEdgeBlockIdx ? 0 : 1); | ||||||||||||||||
OrigPhi->setIncomingValue(SelfEdgeBlockIdx, BCBlockPhi); | ||||||||||||||||
Instruction *LoopExitInst = RdxDesc.getLoopExitInstr(); | ||||||||||||||||
OrigPhi->setIncomingValue(IncomingEdgeBlockIdx, LoopExitInst); | ||||||||||||||||
assert(UpdateCnt <= 1 && "Only should update at most 1 incoming value"); | ||||||||||||||||
(void)UpdateCnt; | ||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||||||||||
} | ||||||||||||||||
|
||||||||||||||||
DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan( | ||||||||||||||||
|
@@ -7713,11 +7708,11 @@ DenseMap<const SCEV *, Value *> LoopVectorizationPlanner::executePlan( | |||||||||||||||
// 2.5 Collect reduction resume values. | ||||||||||||||||
auto *ExitVPBB = | ||||||||||||||||
cast<VPBasicBlock>(BestVPlan.getVectorLoopRegion()->getSingleSuccessor()); | ||||||||||||||||
for (VPRecipeBase &R : *ExitVPBB) { | ||||||||||||||||
createAndCollectMergePhiForReduction( | ||||||||||||||||
dyn_cast<VPInstruction>(&R), State, OrigLoop, | ||||||||||||||||
State.CFG.VPBB2IRBB[ExitVPBB], VectorizingEpilogue); | ||||||||||||||||
} | ||||||||||||||||
if (VectorizingEpilogue) | ||||||||||||||||
for (VPRecipeBase &R : *ExitVPBB) { | ||||||||||||||||
fixReductionScalarResumeWhenVectorizingEpilog( | ||||||||||||||||
&R, State, OrigLoop, State.CFG.VPBB2IRBB[ExitVPBB]); | ||||||||||||||||
} | ||||||||||||||||
|
||||||||||||||||
// 2.6. Maintain Loop Hints | ||||||||||||||||
// Keep all loop hints from the original loop on the vector loop (we'll | ||||||||||||||||
|
@@ -9518,6 +9513,17 @@ void LoopVectorizationPlanner::adjustRecipesForReductions( | |||||||||||||||
}); | ||||||||||||||||
FinalReductionResult->insertBefore(*MiddleVPBB, IP); | ||||||||||||||||
|
||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Can the above TODO also be taken care of - refrain from creating ComputeReductionResult for in-loop reductions? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yep, but may be better as follow-up? |
||||||||||||||||
// Order is strict: if there are multiple successors, the first is the exit | ||||||||||||||||
// block, second is the scalar preheader. | ||||||||||||||||
VPBasicBlock *ScalarPHVPBB = | ||||||||||||||||
cast<VPBasicBlock>(MiddleVPBB->getSuccessors().back()); | ||||||||||||||||
VPBuilder ScalarPHBuilder(ScalarPHVPBB); | ||||||||||||||||
auto *ResumePhiRecipe = ScalarPHBuilder.createNaryOp( | ||||||||||||||||
VPInstruction::ResumePhi, {FinalReductionResult, PhiR->getStartValue()}, | ||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This ResumePhi placed in the scalar preheader prepares for 2 values. When vectorizing an epilog loop, it should ideally prepare for 3: the ResumePhi after the main loop should feed the ResumePhi after the epilog loop (unless we're sure according to the trip count that if main and epilog are run they will take care of all iterations, w/o leaving any remainder for the scalar loop). But neither recipes nor blocks of the two VPlans can currently be connected. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, unfortunately there is no way to determine if the plan will be used for epilogue vectorization at this point I think. Once we model it explicitly in VPlan, the update should be connected here |
||||||||||||||||
{}, "bc.merge.rdx"); | ||||||||||||||||
auto *RedPhi = cast<PHINode>(PhiR->getUnderlyingInstr()); | ||||||||||||||||
Plan->addLiveOut(RedPhi, ResumePhiRecipe); | ||||||||||||||||
|
||||||||||||||||
// Adjust AnyOf reductions; replace the reduction phi for the selected value | ||||||||||||||||
// with a boolean reduction phi node to check if the condition is true in | ||||||||||||||||
// any iteration. The final value is selected by the final | ||||||||||||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
and avoid passing OrigLoop to fixReductionScalarResumeWhenVectorizingEpilog()?