-
Notifications
You must be signed in to change notification settings - Fork 12.5k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[VPlan] Dispatch to multiple exit blocks via middle blocks. #112138
Changes from all commits
245b56a
47258de
9265fb1
3831acb
e64888a
64db0ee
3259e66
0f8aedf
9212f96
5cb0851
e849195
7b98d34
c53eca6
43a8ef7
e26af8e
06c3d39
552bd91
2042a43
00dea4a
7b8866d
4d5608f
b9ee739
43d5590
cba7dce
95f4276
c3d3b39
a875249
65d0288
8d04383
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
digraph VPlan { | ||
graph [labelloc=t, fontsize=30; label=""] | ||
node [shape=rect, fontname=Courier, fontsize=30] | ||
edge [fontname=Courier, fontsize=30] | ||
compound=true | ||
N1 [label = | ||
"vector.ph" | ||
] | ||
N1 -> N2 [ label="" lhead=cluster_N3] | ||
subgraph cluster_N3 { | ||
fontname=Courier | ||
label="\<x1\> vector loop" | ||
N2 [label = | ||
"vector.body" | ||
] | ||
} | ||
N2 -> N4 [ label="" ltail=cluster_N3] | ||
N4 [label = | ||
"middle.split" | ||
] | ||
N4 -> N5 [ label=""] | ||
N4 -> N6 [ label=""] | ||
N5 [label = | ||
"early.exit" | ||
] | ||
N6 [label = | ||
"middle.block" | ||
] | ||
N6 -> N9 [ label=""] | ||
N6 -> N7 [ label=""] | ||
N7 [label = | ||
"scalar.ph" | ||
] | ||
N7 -> N8 [ label=""] | ||
N8 [label = | ||
"loop.header" | ||
] | ||
N9 [label = | ||
"latch.exit" | ||
] | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -861,14 +861,10 @@ VPlanPtr VPlan::createInitialVPlan(Type *InductionTy, | |
auto Plan = std::make_unique<VPlan>(Entry, VecPreheader, ScalarHeader); | ||
|
||
// Create SCEV and VPValue for the trip count. | ||
|
||
// Currently only loops with countable exits are vectorized, but calling | ||
// getSymbolicMaxBackedgeTakenCount allows enablement work for loops with | ||
// uncountable exits whilst also ensuring the symbolic maximum and known | ||
// back-edge taken count remain identical for loops with countable exits. | ||
// We use the symbolic max backedge-taken-count, which works also when | ||
// vectorizing loops with uncountable early exits. | ||
const SCEV *BackedgeTakenCountSCEV = PSE.getSymbolicMaxBackedgeTakenCount(); | ||
assert((!isa<SCEVCouldNotCompute>(BackedgeTakenCountSCEV) && | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is it still worth at least having:
? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yep, added back, thanks |
||
BackedgeTakenCountSCEV == PSE.getBackedgeTakenCount()) && | ||
assert(!isa<SCEVCouldNotCompute>(BackedgeTakenCountSCEV) && | ||
"Invalid loop count"); | ||
ScalarEvolution &SE = *PSE.getSE(); | ||
const SCEV *TripCount = SE.getTripCountFromExitCount(BackedgeTakenCountSCEV, | ||
|
@@ -903,7 +899,7 @@ VPlanPtr VPlan::createInitialVPlan(Type *InductionTy, | |
// 2) If we require a scalar epilogue, there is no conditional branch as | ||
// we unconditionally branch to the scalar preheader. Do nothing. | ||
// 3) Otherwise, construct a runtime check. | ||
BasicBlock *IRExitBlock = TheLoop->getUniqueExitBlock(); | ||
BasicBlock *IRExitBlock = TheLoop->getUniqueLatchExitBlock(); | ||
auto *VPExitBlock = VPIRBasicBlock::fromBasicBlock(IRExitBlock); | ||
// The connection order corresponds to the operands of the conditional branch. | ||
VPBlockUtils::insertBlockAfter(VPExitBlock, MiddleVPBB); | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -621,6 +621,14 @@ class VPBlockBase { | |
/// Remove all the successors of this block. | ||
void clearSuccessors() { Successors.clear(); } | ||
|
||
/// Swap successors of the block. The block must have exactly 2 successors. | ||
// TODO: This should be part of introducing conditional branch recipes rather | ||
// than being independent. | ||
void swapSuccessors() { | ||
assert(Successors.size() == 2 && "must have 2 successors to swap"); | ||
std::swap(Successors[0], Successors[1]); | ||
} | ||
|
||
/// The method which generates the output IR that correspond to this | ||
/// VPBlockBase, thereby "executing" the VPlan. | ||
virtual void execute(VPTransformState *State) = 0; | ||
|
@@ -1232,6 +1240,9 @@ class VPInstruction : public VPRecipeWithIRFlags, | |
// operand). Only generates scalar values (either for the first lane only or | ||
// for all lanes, depending on its uses). | ||
PtrAdd, | ||
// Returns a scalar boolean value, which is true if any lane of its single | ||
// operand is true. | ||
AnyOf, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Perhaps worth adding a simple comment here? Something along the lines of:
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added, thanks There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Some explanation how AnyOf relates (or should relate) to ComputeReductionResult? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think AnyOf also needs adding to the switch statement in VPRecipeBase::mayWriteToMemory and return false? |
||
}; | ||
|
||
private: | ||
|
@@ -3884,10 +3895,10 @@ class VPlan { | |
/// whether to execute the scalar tail loop or the exit block from the loop | ||
/// latch. | ||
const VPBasicBlock *getMiddleBlock() const { | ||
return cast<VPBasicBlock>(getVectorLoopRegion()->getSingleSuccessor()); | ||
return cast<VPBasicBlock>(getScalarPreheader()->getSinglePredecessor()); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This restricts to use of getMiddleBlock() to before bypassing guards are introduced? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think it doesn't really restrict the use of getMiddleBlock, it just updates the anchor point we use to identify it; the scalar preheader (and single predecessor ) can be more easily identified and works automatically with the changes to the skeleton. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Works as long as the scalar preheader has a single predecessor, i.e., until runtime guards are introduced as additional predecessors. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yep, this will need some extra work for #114292, which I plan to land after this PR. |
||
} | ||
VPBasicBlock *getMiddleBlock() { | ||
return cast<VPBasicBlock>(getVectorLoopRegion()->getSingleSuccessor()); | ||
return cast<VPBasicBlock>(getScalarPreheader()->getSinglePredecessor()); | ||
} | ||
|
||
/// Return the VPBasicBlock for the preheader of the scalar loop. | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Better have
collectUsersInExitBlocks()
indicate failure, thanaddUsersInExitBlocks()
, which followsaddExitUsersForFirstOrderRecurrences()
?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
That would require
collectUsersInExitBlocks
to check ifaddUsersInExitBlocks
will be able to handle the user. Left as is for now