Skip to content

Commit

Permalink
JIT: maintain pred lists during loop unrolling (dotnet#80625)
Browse files Browse the repository at this point in the history
We now update pred lists during loop unrolling, rather than recomputing
them from scratch.

There are several parts to the fix: first, `optRedirectBlock' now has
a new ability to add pred references for the flow from a newly cloned
block, be it either to a remapped successor or a non-remapped successor.
Along with this we no longer copy over the block ref count in `CloneBlockState`.
These changes allow us to create the right pred links and ref counts in the
interior of a cloned subgraph.

Second, we now scrub block references from the original loop body blocks
instead of just setting their ref counts to zero.

Finally, we fix up references for exterior flow into and out of the unroll
complex.

Addresses one of the cases mentioned in dotnet#49030.
  • Loading branch information
AndyAyersMS authored Jan 18, 2023
1 parent 11d4c4c commit e2d192a
Show file tree
Hide file tree
Showing 4 changed files with 103 additions and 17 deletions.
7 changes: 4 additions & 3 deletions src/coreclr/jit/block.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -788,18 +788,19 @@ void* BasicBlock::MemoryPhiArg::operator new(size_t sz, Compiler* comp)
// IR nodes. If cloning of any statement fails, `false` will be returned and block `to` may be
// partially populated. If cloning of all statements succeeds, `true` will be returned and
// block `to` will be fully populated.

//
// Note:
// Leaves block ref count at zero, and pred edge list empty.
//
bool BasicBlock::CloneBlockState(
Compiler* compiler, BasicBlock* to, const BasicBlock* from, unsigned varNum, int varVal)
{
assert(to->bbStmtList == nullptr);

to->bbFlags = from->bbFlags;
to->bbWeight = from->bbWeight;
BlockSetOps::AssignAllowUninitRhs(compiler, to->bbReach, from->bbReach);
to->copyEHRegion(from);
to->bbCatchTyp = from->bbCatchTyp;
to->bbRefs = from->bbRefs;
to->bbStkTempsIn = from->bbStkTempsIn;
to->bbStkTempsOut = from->bbStkTempsOut;
to->bbStkDepth = from->bbStkDepth;
Expand Down
11 changes: 10 additions & 1 deletion src/coreclr/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -6474,7 +6474,16 @@ class Compiler
// loop nested in "loopInd" that shares the same head as "loopInd".
void optUpdateLoopHead(unsigned loopInd, BasicBlock* from, BasicBlock* to);

void optRedirectBlock(BasicBlock* blk, BlockToBlockMap* redirectMap, const bool updatePreds = false);
enum class RedirectBlockOption
{
DoNotChangePredLists, // do not modify pred lists
UpdatePredLists, // add/remove to pred lists
AddToPredLists, // only add to pred lists
};

void optRedirectBlock(BasicBlock* blk,
BlockToBlockMap* redirectMap,
const RedirectBlockOption = RedirectBlockOption::DoNotChangePredLists);

// Marks the containsCall information to "lnum" and any parent loops.
void AddContainsCallAllContainingLoops(unsigned lnum);
Expand Down
100 changes: 88 additions & 12 deletions src/coreclr/jit/optimizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2763,16 +2763,27 @@ void Compiler::optIdentifyLoopsForAlignment()
// Updates the successors of `blk`: if `blk2` is a branch successor of `blk`, and there is a mapping
// for `blk2->blk3` in `redirectMap`, change `blk` so that `blk3` is this branch successor.
//
// Note that fall-through successors are not modified, including predecessor lists.
//
// Arguments:
// blk - block to redirect
// redirectMap - block->block map specifying how the `blk` target will be redirected.
// updatePreds - if `true`, update the predecessor lists to match.
// predOption - specifies how to update the pred lists
//
// Notes:
// Fall-through successors are assumed correct and are not modified.
// Pred lists for successors of `blk` may be changed, depending on `predOption`.
//
void Compiler::optRedirectBlock(BasicBlock* blk, BlockToBlockMap* redirectMap, const bool updatePreds)
void Compiler::optRedirectBlock(BasicBlock* blk, BlockToBlockMap* redirectMap, RedirectBlockOption predOption)
{
const bool updatePreds = (predOption == RedirectBlockOption::UpdatePredLists);
const bool addPreds = (predOption == RedirectBlockOption::AddToPredLists);

if (addPreds && blk->bbFallsThrough())
{
fgAddRefPred(blk->bbNext, blk);
}

BasicBlock* newJumpDest = nullptr;

switch (blk->bbJumpKind)
{
case BBJ_NONE:
Expand All @@ -2794,10 +2805,17 @@ void Compiler::optRedirectBlock(BasicBlock* blk, BlockToBlockMap* redirectMap, c
if (updatePreds)
{
fgRemoveRefPred(blk->bbJumpDest, blk);
}
if (updatePreds || addPreds)
{
fgAddRefPred(newJumpDest, blk);
}
blk->bbJumpDest = newJumpDest;
}
else if (addPreds)
{
fgAddRefPred(blk->bbJumpDest, blk);
}
break;

case BBJ_SWITCH:
Expand All @@ -2811,11 +2829,18 @@ void Compiler::optRedirectBlock(BasicBlock* blk, BlockToBlockMap* redirectMap, c
if (updatePreds)
{
fgRemoveRefPred(switchDest, blk);
}
if (updatePreds || addPreds)
{
fgAddRefPred(newJumpDest, blk);
}
blk->bbJumpSwt->bbsDstTab[i] = newJumpDest;
redirected = true;
}
else if (addPreds)
{
fgAddRefPred(switchDest, blk);
}
}
// If any redirections happened, invalidate the switch table map for the switch.
if (redirected)
Expand Down Expand Up @@ -4357,6 +4382,7 @@ PhaseStatus Compiler::optUnrollLoops()

BlockToBlockMap blockMap(getAllocator(CMK_LoopOpt));
BasicBlock* insertAfter = bottom;
BasicBlock* const tail = bottom->bbNext;
BasicBlock::loopNumber newLoopNum = loop.lpParent;
bool anyNestedLoopsUnrolledThisLoop = false;
int lval;
Expand All @@ -4379,9 +4405,8 @@ PhaseStatus Compiler::optUnrollLoops()
// to clone a block in the loop, splice out and forget all the blocks we cloned so far:
// put the loop blocks back to how they were before we started cloning blocks,
// and abort unrolling the loop.
BasicBlock* oldBottomNext = insertAfter->bbNext;
bottom->bbNext = oldBottomNext;
oldBottomNext->bbPrev = bottom;
bottom->bbNext = tail;
tail->bbPrev = bottom;
loop.lpFlags |= LPFLG_DONT_UNROLL; // Mark it so we don't try to unroll it again.
INDEBUG(++unrollFailures);
JITDUMP("Failed to unroll loop " FMT_LP ": block cloning failed on " FMT_BB "\n", lnum,
Expand Down Expand Up @@ -4436,9 +4461,16 @@ PhaseStatus Compiler::optUnrollLoops()
{
BasicBlock* newBlock = blockMap[block];
optCopyBlkDest(block, newBlock);
optRedirectBlock(newBlock, &blockMap);
optRedirectBlock(newBlock, &blockMap, RedirectBlockOption::AddToPredLists);
}

// We fall into to this unroll iteration from the bottom block (first iteration)
// or from the previous unroll clone of the bottom block (subsequent iterations).
// After doing this, all the newly cloned blocks now have proper flow and pred lists.
//
BasicBlock* const clonedTop = blockMap[loop.lpTop];
fgAddRefPred(clonedTop, clonedTop->bbPrev);

/* update the new value for the unrolled iterator */

switch (iterOper)
Expand All @@ -4463,8 +4495,10 @@ PhaseStatus Compiler::optUnrollLoops()
}

// If we get here, we successfully cloned all the blocks in the unrolled loop.
// Note we may not have done any cloning at all, if the loop iteration count was zero.

// Gut the old loop body
// Gut the old loop body.
//
for (BasicBlock* const block : loop.LoopBlocks())
{
// Check if the old loop body had any nested loops that got cloned. Note that we need to do this
Expand All @@ -4475,33 +4509,75 @@ PhaseStatus Compiler::optUnrollLoops()
anyNestedLoopsUnrolledThisLoop = true;
}

// Scrub all pred list references to block, except for bottom-> bottom->bbNext.
//
for (BasicBlock* succ : block->Succs())
{
if ((block == bottom) && (succ == bottom->bbNext))
{
continue;
}

fgRemoveAllRefPreds(succ, block);
}

block->bbStmtList = nullptr;
block->bbJumpKind = BBJ_NONE;
block->bbFlags &= ~BBF_LOOP_HEAD;
block->bbJumpDest = nullptr;
block->bbNatLoopNum = newLoopNum;
}

// The old loop blocks will form an emtpy linear chain.
// Add back a suitable pred list links.
//
BasicBlock* oldLoopPred = head;
for (BasicBlock* const block : loop.LoopBlocks())
{
if (block != top)
{
fgAddRefPred(block, oldLoopPred);
}
oldLoopPred = block;
}

if (anyNestedLoopsUnrolledThisLoop)
{
anyNestedLoopsUnrolled = true;
}

// Now fix up the exterior flow and pred list entries.
//
// Control will fall through from HEAD to its successor, which is either
// the now empty TOP (if totalIter == 0) or the first cloned top.
//
// If the HEAD is a BBJ_COND drop the condition (and make HEAD a BBJ_NONE block).

//
if (head->bbJumpKind == BBJ_COND)
{
testStmt = head->lastStmt();
noway_assert(testStmt->GetRootNode()->gtOper == GT_JTRUE);
fgRemoveStmt(head, testStmt);
fgRemoveRefPred(head->bbJumpDest, head);
head->bbJumpKind = BBJ_NONE;
}
else
{
/* the loop must execute */
assert(totalIter > 0);
noway_assert(head->bbJumpKind == BBJ_NONE);
}

// If we actually unrolled, tail is now reached
// by the last cloned bottom, and no longer
// reached by bottom.
//
if (totalIter > 0)
{
fgAddRefPred(tail, blockMap[bottom]);
fgRemoveRefPred(tail, bottom);
}

#ifdef DEBUG
if (verbose)
{
Expand Down Expand Up @@ -4564,7 +4640,7 @@ PhaseStatus Compiler::optUnrollLoops()
// If we unrolled any nested loops, we rebuild the loop table (including recomputing the
// return blocks list).

constexpr bool computePreds = true;
constexpr bool computePreds = false;
constexpr bool computeDoms = true;
const bool computeReturnBlocks = anyNestedLoopsUnrolled;
const bool computeLoops = anyNestedLoopsUnrolled;
Expand Down Expand Up @@ -5084,7 +5160,7 @@ bool Compiler::optInvertWhileLoop(BasicBlock* block)
// Redirect the predecessor to the new block.
JITDUMP("Redirecting non-loop " FMT_BB " -> " FMT_BB " to " FMT_BB " -> " FMT_BB "\n", predBlock->bbNum,
bTest->bbNum, predBlock->bbNum, bNewCond->bbNum);
optRedirectBlock(predBlock, &blockMap, /*updatePreds*/ true);
optRedirectBlock(predBlock, &blockMap, RedirectBlockOption::UpdatePredLists);
}

// If we have profile data for all blocks and we know that we are cloning the
Expand Down
2 changes: 1 addition & 1 deletion src/coreclr/jit/ssabuilder.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1662,7 +1662,7 @@ void Compiler::DumpSsaSummary()

if (numDefs == 0)
{
printf("V%02u: in SSA but no defs\n");
printf("V%02u: in SSA but no defs\n", lclNum);
}
else
{
Expand Down

0 comments on commit e2d192a

Please sign in to comment.