Skip to content

Commit

Permalink
JIT: optimize redundant branches by looking through phis (#76283)
Browse files Browse the repository at this point in the history
In some cases the value of a block's branch predicate is correlated with the
predecessor of the block. Often this correlation is hinted at by the presence
of phis in the predicate's tree and/or phi VNs in in the predicate's VN graph.

For each predecessor of a block, we evaluate the predicate value number using
the values brought to the block by that predecessor. If we find correlations,
we use them to drive the existing jump threading optimization.

Make sure that when we search local PHIs we also match the ssa def
number to ensure we're looking at the right PHI.

Also, if we end up partially disambiguating such that there is just one
remaining predecessor, update the value number of the predicate to reflect
the values that flow in from that predecessor.

Fixes #75944.
Contributes to #48115.
  • Loading branch information
AndyAyersMS authored Sep 29, 2022
1 parent e69454c commit 35af395
Show file tree
Hide file tree
Showing 2 changed files with 298 additions and 6 deletions.
3 changes: 2 additions & 1 deletion src/coreclr/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -6966,7 +6966,8 @@ class Compiler
PhaseStatus optRedundantBranches();
bool optRedundantRelop(BasicBlock* const block);
bool optRedundantBranch(BasicBlock* const block);
bool optJumpThread(BasicBlock* const block, BasicBlock* const domBlock, bool domIsSameRelop);
bool optJumpThreadDom(BasicBlock* const block, BasicBlock* const domBlock, bool domIsSameRelop);
bool optJumpThreadPhi(BasicBlock* const block, GenTree* tree, ValueNum treeNormVN);
bool optJumpThreadCheck(BasicBlock* const block, BasicBlock* const domBlock);
bool optJumpThreadCore(JumpThreadInfo& jti);
bool optReachable(BasicBlock* const fromBlock, BasicBlock* const toBlock, BasicBlock* const excludedBlock);
Expand Down
301 changes: 296 additions & 5 deletions src/coreclr/jit/redundantbranchopts.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -537,7 +537,7 @@ bool Compiler::optRedundantBranch(BasicBlock* const block)
if (++matchCount > matchLimit)
{
JITDUMP("Bailing out; %d matches found w/o optimizing\n", matchCount);
return false;
break;
}

// Was this an inference from an unrelated relop (GE => GT, say)?
Expand Down Expand Up @@ -589,7 +589,7 @@ bool Compiler::optRedundantBranch(BasicBlock* const block)
// However we may be able to update the flow from block's predecessors so they
// bypass block and instead transfer control to jump's successors (aka jump threading).
//
const bool wasThreaded = optJumpThread(block, domBlock, domIsSameRelop);
const bool wasThreaded = optJumpThreadDom(block, domBlock, domIsSameRelop);

if (wasThreaded)
{
Expand Down Expand Up @@ -654,7 +654,10 @@ bool Compiler::optRedundantBranch(BasicBlock* const block)
//
if (relopValue == -1)
{
return false;
// We were unable to determine the relop value via dominance checks.
// See if we can jump thread via phi disambiguation.
//
return optJumpThreadPhi(block, tree, treeNormVN);
}

// Be conservative if there is an exception effect and we're in an EH region
Expand Down Expand Up @@ -710,12 +713,14 @@ struct JumpThreadInfo
, m_trueTarget(block->bbJumpDest)
, m_falseTarget(block->bbNext)
, m_fallThroughPred(nullptr)
, m_ambiguousVNBlock(nullptr)
, m_truePreds(BlockSetOps::MakeEmpty(comp))
, m_ambiguousPreds(BlockSetOps::MakeEmpty(comp))
, m_numPreds(0)
, m_numAmbiguousPreds(0)
, m_numTruePreds(0)
, m_numFalsePreds(0)
, m_ambiguousVN(ValueNumStore::NoVN)
{
}

Expand All @@ -726,7 +731,9 @@ struct JumpThreadInfo
// Block successor if predicate is false
BasicBlock* const m_falseTarget;
// Unique pred that falls through to block, if any
BasicBlock* m_fallThroughPred = nullptr;
BasicBlock* m_fallThroughPred;
// Block that brings in the ambiguous VN
BasicBlock* m_ambiguousVNBlock;
// Pred blocks for which the predicate will be true
BlockSet m_truePreds;
// Pred blocks that can't be threaded or for which the predicate
Expand All @@ -741,6 +748,8 @@ struct JumpThreadInfo
int m_numTruePreds;
// Number of predecessors for which predicate is false
int m_numFalsePreds;
// Refined VN for ambiguous cases
ValueNum m_ambiguousVN;
};

//------------------------------------------------------------------------
Expand Down Expand Up @@ -892,7 +901,7 @@ bool Compiler::optJumpThreadCheck(BasicBlock* const block, BasicBlock* const dom
// / \ | |
// Tt Ft Tt Ft True/false target
//
bool Compiler::optJumpThread(BasicBlock* const block, BasicBlock* const domBlock, bool domIsSameRelop)
bool Compiler::optJumpThreadDom(BasicBlock* const block, BasicBlock* const domBlock, bool domIsSameRelop)
{
assert(block->bbJumpKind == BBJ_COND);
assert(domBlock->bbJumpKind == BBJ_COND);
Expand Down Expand Up @@ -1079,6 +1088,259 @@ bool Compiler::optJumpThread(BasicBlock* const block, BasicBlock* const domBlock
return optJumpThreadCore(jti);
}

//------------------------------------------------------------------------
// optJumpThreadPhi: attempt jump threading by disambiguating through phis.
//
// Arguments:
// block - block with relop we're trying to optimize
// tree - relop we're trying to optimize
// treeNormVN - liberal normal VN from the relop
//
// Returns:
// True if the branch was optimized.
//
bool Compiler::optJumpThreadPhi(BasicBlock* block, GenTree* tree, ValueNum treeNormVN)
{
// First see if block is eligible for threading.
//
const bool check = optJumpThreadCheck(block, /* domBlock*/ nullptr);
if (!check)
{
return false;
}

// We expect the controlling predicate to be a relop and so be a func app with two args.
//
// We should have screened out constants already. Might want to check if some other kind
// of leaf can meaningfully make it here.
//
VNFuncApp treeNormVNFuncApp;
if (!vnStore->GetVNFunc(treeNormVN, &treeNormVNFuncApp) || !(treeNormVNFuncApp.m_arity == 2))
{
return false;
}

// Find occurrences of phi def VNs in the relop VN.
// We currently just do one level of func destructuring.
//
unsigned funcArgToPhiLocalMap[] = {BAD_VAR_NUM, BAD_VAR_NUM};
GenTree* funcArgToPhiDefNodeMap[] = {nullptr, nullptr};
bool foundPhiDef = false;

for (int i = 0; i < 2; i++)
{
const ValueNum phiDefVN = treeNormVNFuncApp.m_args[i];
VNFuncApp phiDefFuncApp;
if (!vnStore->GetVNFunc(phiDefVN, &phiDefFuncApp) || (phiDefFuncApp.m_func != VNF_PhiDef))
{
// This input is not a phi def. If it's a func app it might depend on
// transitively on a phi def; consider a general search utility.
//
continue;
}

// The PhiDef args tell us which local and which SSA def of that local.
//
assert(phiDefFuncApp.m_arity == 3);
const unsigned lclNum = unsigned(phiDefFuncApp.m_args[0]);
const unsigned ssaDefNum = unsigned(phiDefFuncApp.m_args[1]);
const ValueNum phiVN = ValueNum(phiDefFuncApp.m_args[2]);
JITDUMP("... JT-PHI [interestingVN] in " FMT_BB " relop %s operand VN is PhiDef for V%02u:%u " FMT_VN "\n",
block->bbNum, i == 0 ? "first" : "second", lclNum, ssaDefNum, phiVN);
if (!foundPhiDef)
{
DISPTREE(tree);
}

// Find the PHI for lclNum local in the current block.
//
GenTree* phiNode = nullptr;
for (Statement* const stmt : block->Statements())
{
// If the tree is not an SSA def, break out of the loop: we're done.
if (!stmt->IsPhiDefnStmt())
{
break;
}

GenTree* const phiDefNode = stmt->GetRootNode();
assert(phiDefNode->IsPhiDefn());
GenTreeLclVarCommon* const phiDefLclNode = phiDefNode->AsOp()->gtOp1->AsLclVarCommon();
if (phiDefLclNode->GetLclNum() == lclNum)
{
if (phiDefLclNode->GetSsaNum() == ssaDefNum)
{
funcArgToPhiLocalMap[i] = lclNum;
funcArgToPhiDefNodeMap[i] = phiDefNode;
foundPhiDef = true;
JITDUMP("Found local PHI [%06u] for V%02u\n", dspTreeID(phiDefNode), lclNum);
}
else
{
// Relop input is phi def from some other block.
//
break;
}
}
}
}

if (!foundPhiDef)
{
// No usable PhiDef VNs in the relop's VN.
//
JITDUMP("No usable PhiDef VNs\n");
return false;
}

// At least one relop input depends on a local phi. Walk pred by pred and
// see if the relop value is correlated with the pred.
//
JumpThreadInfo jti(this, block);
for (BasicBlock* const predBlock : block->PredBlocks())
{
jti.m_numPreds++;

// Find VNs for the relevant phi inputs from this block.
//
ValueNum newRelopArgs[] = {treeNormVNFuncApp.m_args[0], treeNormVNFuncApp.m_args[1]};
bool updatedArg = false;

for (int i = 0; i < 2; i++)
{
if (funcArgToPhiLocalMap[i] == BAD_VAR_NUM)
{
// this relop VN arg not phi dependent
continue;
}

GenTree* const phiNode = funcArgToPhiDefNodeMap[i];
GenTreePhi* const phi = phiNode->gtGetOp2()->AsPhi();
for (GenTreePhi::Use& use : phi->Uses())
{
GenTreePhiArg* const phiArgNode = use.GetNode()->AsPhiArg();
assert(phiArgNode->GetLclNum() == funcArgToPhiLocalMap[i]);

if (phiArgNode->gtPredBB == predBlock)
{
ValueNum phiArgVN = phiArgNode->GetVN(VNK_Liberal);

// We sometimes see cases where phi args do not have VNs.
// (I recall seeing this before... track down why)
//
if (phiArgVN != ValueNumStore::NoVN)
{
newRelopArgs[i] = phiArgVN;
updatedArg = true;

// paranoia: keep walking uses to make sure no other
// comes from this pred
break;
}
}
}
}

// We may not find predBlock in the phi args, as we only have one phi
// arg per ssa num, not one per pred.
//
// See SsaBuilder::AddPhiArgsToSuccessors.
//
if (!updatedArg)
{
JITDUMP("Could not map phi inputs from pred " FMT_BB "\n", predBlock->bbNum);
JITDUMP(FMT_BB " is an ambiguous pred\n", predBlock->bbNum);
BlockSetOps::AddElemD(this, jti.m_ambiguousPreds, predBlock->bbNum);
jti.m_numAmbiguousPreds++;
continue;
}

// We have a refined set of args for the relop VN for this
// pred. See if that simplifies the relop.
//
const ValueNum substVN =
vnStore->VNForFunc(tree->TypeGet(), treeNormVNFuncApp.m_func, newRelopArgs[0], newRelopArgs[1]);

JITDUMP("... substituting (" FMT_VN "," FMT_VN ") for (" FMT_VN "," FMT_VN ") in " FMT_VN " gives " FMT_VN "\n",
newRelopArgs[0], newRelopArgs[1], treeNormVNFuncApp.m_args[0], treeNormVNFuncApp.m_args[1], treeNormVN,
substVN);

// If this VN is constant, we're all set!
//
// Note there are other cases we could possibly handle here, say if the substituted
// VN not constant but is related to some dominating relop VN.
//
if (vnStore->IsVNConstant(substVN))
{
const bool relopIsTrue = (substVN == vnStore->VNZeroForType(TYP_INT)) ? 0 : 1;
JITDUMP("... substituted VN implies relop is %d when coming from pred " FMT_BB "\n", relopIsTrue,
predBlock->bbNum);

if (relopIsTrue)
{
if (!BasicBlock::sameEHRegion(predBlock, jti.m_trueTarget))
{
JITDUMP(FMT_BB " is an eh constrained pred\n", predBlock->bbNum);
jti.m_numAmbiguousPreds++;
BlockSetOps::AddElemD(this, jti.m_ambiguousPreds, predBlock->bbNum);
continue;
}

jti.m_numTruePreds++;
BlockSetOps::AddElemD(this, jti.m_truePreds, predBlock->bbNum);
JITDUMP(FMT_BB " is a true pred\n", predBlock->bbNum);
}
else
{
if (!BasicBlock::sameEHRegion(predBlock, jti.m_falseTarget))
{
JITDUMP(FMT_BB " is an eh constrained pred\n", predBlock->bbNum);
BlockSetOps::AddElemD(this, jti.m_ambiguousPreds, predBlock->bbNum);
jti.m_numAmbiguousPreds++;
continue;
}

jti.m_numFalsePreds++;
JITDUMP(FMT_BB " is a false pred\n", predBlock->bbNum);
}
}
else
{
JITDUMP(FMT_BB " is an ambiguous pred\n", predBlock->bbNum);
BlockSetOps::AddElemD(this, jti.m_ambiguousPreds, predBlock->bbNum);
jti.m_numAmbiguousPreds++;

// If this was the first ambiguous pred, remember the substVN
// and the block that providced it, case we can use later to
// sharpen the predicate's liberal normal VN.
//
if ((jti.m_numAmbiguousPreds == 1) && (substVN != treeNormVN))
{
assert(jti.m_ambiguousVN == ValueNumStore::NoVN);
assert(jti.m_ambiguousVNBlock == nullptr);

jti.m_ambiguousVN = substVN;
jti.m_ambiguousVNBlock = predBlock;
}

continue;
}

// Note if the true or false pred is the fall through pred.
//
if (predBlock->bbNext == block)
{
JITDUMP(FMT_BB " is the fall-through pred\n", predBlock->bbNum);
assert(jti.m_fallThroughPred == nullptr);
jti.m_fallThroughPred = predBlock;
}
}

// Do the optimization.
//
return optJumpThreadCore(jti);
}

//------------------------------------------------------------------------
// optJumpThreadCore: restructure block flow based on jump thread information
//
Expand Down Expand Up @@ -1274,6 +1536,35 @@ bool Compiler::optJumpThreadCore(JumpThreadInfo& jti)
}
}

// If block didn't get fully optimized, and now has just one pred, see if
// we can sharpen the predicate's VN.
//
// (Todo, perhaps: revisit all the uses of the old SSA def, update to the
// surviving ssa input, and update all the value numbers...)
//
BasicBlock* const ambBlock = jti.m_ambiguousVNBlock;
if ((ambBlock != nullptr) && (jti.m_block->bbJumpKind == BBJ_COND) &&
(jti.m_block->GetUniquePred(this) == ambBlock))
{
JITDUMP(FMT_BB " has just one remaining predcessor " FMT_BB "\n", jti.m_block->bbNum, ambBlock->bbNum);

Statement* const stmt = jti.m_block->lastStmt();
assert(stmt != nullptr);
GenTree* const jumpTree = stmt->GetRootNode();
assert(jumpTree->OperIs(GT_JTRUE));
GenTree* const tree = jumpTree->AsOp()->gtOp1;
assert(tree->OperIsCompare());

ValueNum treeOldVN = tree->GetVN(VNK_Liberal);
ValueNum treeNormVN = ValueNumStore::NoVN;
ValueNum treeExcVN = ValueNumStore::NoVN;
vnStore->VNUnpackExc(treeOldVN, &treeNormVN, &treeExcVN);
ValueNum treeNewVN = vnStore->VNWithExc(jti.m_ambiguousVN, treeExcVN);
tree->SetVN(VNK_Liberal, treeNewVN);

JITDUMP("Updating [%06u] liberal VN from " FMT_VN " to " FMT_VN "\n", dspTreeID(tree), treeOldVN, treeNewVN);
}

// We optimized.
//
fgModified = true;
Expand Down

0 comments on commit 35af395

Please sign in to comment.