From d11da115c255c3ee51fbd45861a9c36e13038ed3 Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Fri, 23 Sep 2022 20:09:15 -0700 Subject: [PATCH 1/3] JIT: optimize redundant branches by looking through phis In some cases the value of a block's branch predicate is correlated with the predecessor of the block. Often this correlation is hinted at by the presence of phis in the predicate's tree and/or phi VNs in in the predicate's VN graph. For each predecessor of a block, we evaluate the predicate value number using the values brought to the block by that predecessor. If we find correlations, we use them to drive the existing jump threading optimization. Also, if we end up partially disambiguating such that there is just one remaining predecessor, update the value number of the predicate to reflect the values that flow in from that predecessor. Fixes #75944. Contributes to #48115. --- src/coreclr/jit/compiler.h | 3 +- src/coreclr/jit/redundantbranchopts.cpp | 297 +++++++++++++++++++++++- 2 files changed, 294 insertions(+), 6 deletions(-) diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 806d049d0c463..04e2f848a0a4d 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -6966,7 +6966,8 @@ class Compiler PhaseStatus optRedundantBranches(); bool optRedundantRelop(BasicBlock* const block); bool optRedundantBranch(BasicBlock* const block); - bool optJumpThread(BasicBlock* const block, BasicBlock* const domBlock, bool domIsSameRelop); + bool optJumpThreadDom(BasicBlock* const block, BasicBlock* const domBlock, bool domIsSameRelop); + bool optJumpThreadPhi(BasicBlock* const block, GenTree* tree, ValueNum treeNormVN); bool optJumpThreadCheck(BasicBlock* const block, BasicBlock* const domBlock); bool optJumpThreadCore(JumpThreadInfo& jti); bool optReachable(BasicBlock* const fromBlock, BasicBlock* const toBlock, BasicBlock* const excludedBlock); diff --git a/src/coreclr/jit/redundantbranchopts.cpp b/src/coreclr/jit/redundantbranchopts.cpp index c7f29754b2b61..ec512cbf60244 100644 --- a/src/coreclr/jit/redundantbranchopts.cpp +++ b/src/coreclr/jit/redundantbranchopts.cpp @@ -537,7 +537,7 @@ bool Compiler::optRedundantBranch(BasicBlock* const block) if (++matchCount > matchLimit) { JITDUMP("Bailing out; %d matches found w/o optimizing\n", matchCount); - return false; + break; } // Was this an inference from an unrelated relop (GE => GT, say)? @@ -589,7 +589,7 @@ bool Compiler::optRedundantBranch(BasicBlock* const block) // However we may be able to update the flow from block's predecessors so they // bypass block and instead transfer control to jump's successors (aka jump threading). // - const bool wasThreaded = optJumpThread(block, domBlock, domIsSameRelop); + const bool wasThreaded = optJumpThreadDom(block, domBlock, domIsSameRelop); if (wasThreaded) { @@ -654,7 +654,10 @@ bool Compiler::optRedundantBranch(BasicBlock* const block) // if (relopValue == -1) { - return false; + // We were unable to determine the relop value via dominance checks. + // See if we can jump thread via phi disambiguation. + // + return optJumpThreadPhi(block, tree, treeNormVN); } // Be conservative if there is an exception effect and we're in an EH region @@ -710,12 +713,14 @@ struct JumpThreadInfo , m_trueTarget(block->bbJumpDest) , m_falseTarget(block->bbNext) , m_fallThroughPred(nullptr) + , m_ambiguousVNBlock(nullptr) , m_truePreds(BlockSetOps::MakeEmpty(comp)) , m_ambiguousPreds(BlockSetOps::MakeEmpty(comp)) , m_numPreds(0) , m_numAmbiguousPreds(0) , m_numTruePreds(0) , m_numFalsePreds(0) + , m_ambiguousVN(ValueNumStore::NoVN) { } @@ -726,7 +731,9 @@ struct JumpThreadInfo // Block successor if predicate is false BasicBlock* const m_falseTarget; // Unique pred that falls through to block, if any - BasicBlock* m_fallThroughPred = nullptr; + BasicBlock* m_fallThroughPred; + // Block that brings in the ambiguous VN + BasicBlock* m_ambiguousVNBlock; // Pred blocks for which the predicate will be true BlockSet m_truePreds; // Pred blocks that can't be threaded or for which the predicate @@ -741,6 +748,8 @@ struct JumpThreadInfo int m_numTruePreds; // Number of predecessors for which predicate is false int m_numFalsePreds; + // Refined VN for ambiguous cases + ValueNum m_ambiguousVN; }; //------------------------------------------------------------------------ @@ -892,7 +901,7 @@ bool Compiler::optJumpThreadCheck(BasicBlock* const block, BasicBlock* const dom // / \ | | // Tt Ft Tt Ft True/false target // -bool Compiler::optJumpThread(BasicBlock* const block, BasicBlock* const domBlock, bool domIsSameRelop) +bool Compiler::optJumpThreadDom(BasicBlock* const block, BasicBlock* const domBlock, bool domIsSameRelop) { assert(block->bbJumpKind == BBJ_COND); assert(domBlock->bbJumpKind == BBJ_COND); @@ -1079,6 +1088,255 @@ bool Compiler::optJumpThread(BasicBlock* const block, BasicBlock* const domBlock return optJumpThreadCore(jti); } +//------------------------------------------------------------------------ +// optJumpThreadPhi: attempt jump threading by disambiguating through phis. +// +// Arguments: +// block - block with relop we're trying to optimize +// tree - relop we're trying to optimize +// treeNormVN - liberal normal VN from the relop +// +// Returns: +// True if the branch was optimized. +// +bool Compiler::optJumpThreadPhi(BasicBlock* block, GenTree* tree, ValueNum treeNormVN) +{ + // First see if block is eligible for threading. + // + const bool check = optJumpThreadCheck(block, /* domBlock*/ nullptr); + if (!check) + { + return false; + } + + // We expect the controlling predicate to be a relop and so be a func app with two args. + // + // We should have screened out constants already. Might want to check if some other kind + // of leaf can meaningfully make it here. + // + VNFuncApp treeNormVNFuncApp; + if (!vnStore->GetVNFunc(treeNormVN, &treeNormVNFuncApp) || !(treeNormVNFuncApp.m_arity == 2)) + { + return false; + } + + // Find occurrences of phi def VNs in the relop VN. + // We currently just do one level of func destructuring. + // + unsigned funcArgToPhiLocalMap[] = {BAD_VAR_NUM, BAD_VAR_NUM}; + GenTree* funcArgToPhiDefNodeMap[] = {nullptr, nullptr}; + bool foundPhiDef = false; + + for (int i = 0; i < 2; i++) + { + const ValueNum phiDefVN = treeNormVNFuncApp.m_args[i]; + VNFuncApp phiDefFuncApp; + if (!vnStore->GetVNFunc(phiDefVN, &phiDefFuncApp) || (phiDefFuncApp.m_func != VNF_PhiDef)) + { + // This input is not a phi def. If it's a func app it might depend on + // transitively on a phi def; consider a general search utility. + // + continue; + } + + // Which local? + // + const unsigned lclNum = unsigned(phiDefFuncApp.m_args[0]); + JITDUMP("... JT-PHI [interestingVN] in " FMT_BB " relop %s operand VN is PhiDef for V%02u\n", block->bbNum, + i == 0 ? "first" : "second", lclNum); + if (!foundPhiDef) + { + DISPTREE(tree); + } + + foundPhiDef = true; + funcArgToPhiLocalMap[i] = lclNum; + + // Find the PHI for lclNum local in the current block. + // + GenTree* phiNode = nullptr; + for (Statement* const stmt : block->Statements()) + { + // If the tree is not an SSA def, break out of the loop: we're done. + if (!stmt->IsPhiDefnStmt()) + { + break; + } + + GenTree* const phiDefNode = stmt->GetRootNode(); + assert(phiDefNode->IsPhiDefn()); + GenTreeLclVarCommon* const phiDefLclNode = phiDefNode->AsOp()->gtOp1->AsLclVarCommon(); + if (phiDefLclNode->GetLclNum() == lclNum) + { + funcArgToPhiDefNodeMap[i] = phiDefNode; + break; + } + } + + if (funcArgToPhiDefNodeMap[i] == nullptr) + { + // This is unexpected + // + JITDUMP("Could not find phi def for V%02u in " FMT_BB "\n", lclNum, block->bbNum); + return false; + } + } + + if (!foundPhiDef) + { + // No phis in the relop's VN. + // + return false; + } + + // At least one relop input depends on a local phi. Walk pred by pred and + // see if the relop value is correlated with the pred. + // + JumpThreadInfo jti(this, block); + for (BasicBlock* const predBlock : block->PredBlocks()) + { + jti.m_numPreds++; + + // Find VNs for the relevant phi inputs from this block. + // + ValueNum newRelopArgs[] = {treeNormVNFuncApp.m_args[0], treeNormVNFuncApp.m_args[1]}; + bool updatedArg = false; + + for (int i = 0; i < 2; i++) + { + if (funcArgToPhiLocalMap[i] == BAD_VAR_NUM) + { + // this relop VN arg not phi dependent + continue; + } + + GenTree* const phiNode = funcArgToPhiDefNodeMap[i]; + GenTreePhi* const phi = phiNode->gtGetOp2()->AsPhi(); + for (GenTreePhi::Use& use : phi->Uses()) + { + GenTreePhiArg* const phiArgNode = use.GetNode()->AsPhiArg(); + assert(phiArgNode->GetLclNum() == funcArgToPhiLocalMap[i]); + + if (phiArgNode->gtPredBB == predBlock) + { + ValueNum phiArgVN = phiArgNode->GetVN(VNK_Liberal); + + // We sometimes see cases where phi args do not have VNs. + // (I recall seeing this before... track down why) + // + if (phiArgVN != ValueNumStore::NoVN) + { + newRelopArgs[i] = phiArgVN; + updatedArg = true; + + // paranoia: keep walking uses to make sure no other + // comes from this pred + break; + } + } + } + } + + // We may not find predBlock in the phi args, as we only have one phi + // arg per ssa num, not one per pred. + // + // See SsaBuilder::AddPhiArgsToSuccessors. + // + if (!updatedArg) + { + JITDUMP("Could not map phi inputs from pred " FMT_BB "\n", predBlock->bbNum); + JITDUMP(FMT_BB " is an ambiguous pred\n", predBlock->bbNum); + BlockSetOps::AddElemD(this, jti.m_ambiguousPreds, predBlock->bbNum); + jti.m_numAmbiguousPreds++; + continue; + } + + // We have a refined set of args for the relop VN for this + // pred. See if that simplifies the relop. + // + const ValueNum substVN = + vnStore->VNForFunc(tree->TypeGet(), treeNormVNFuncApp.m_func, newRelopArgs[0], newRelopArgs[1]); + + JITDUMP("... substituting (" FMT_VN "," FMT_VN ") for (" FMT_VN "," FMT_VN ") in " FMT_VN " gives " FMT_VN "\n", + newRelopArgs[0], newRelopArgs[1], treeNormVNFuncApp.m_args[0], treeNormVNFuncApp.m_args[1], treeNormVN, + substVN); + + // If this VN is constant, we're all set! + // + // Note there are other cases we could possibly handle here, say if the substituted + // VN not constant but is related to some dominating relop VN. + // + if (vnStore->IsVNConstant(substVN)) + { + const bool relopIsTrue = (substVN == vnStore->VNZeroForType(TYP_INT)) ? 0 : 1; + JITDUMP("... substituted VN implies relop is %d when coming from pred " FMT_BB "\n", relopIsTrue, + predBlock->bbNum); + + if (relopIsTrue) + { + if (!BasicBlock::sameEHRegion(predBlock, jti.m_trueTarget)) + { + JITDUMP(FMT_BB " is an eh constrained pred\n", predBlock->bbNum); + jti.m_numAmbiguousPreds++; + BlockSetOps::AddElemD(this, jti.m_ambiguousPreds, predBlock->bbNum); + continue; + } + + jti.m_numTruePreds++; + BlockSetOps::AddElemD(this, jti.m_truePreds, predBlock->bbNum); + JITDUMP(FMT_BB " is a true pred\n", predBlock->bbNum); + } + else + { + if (!BasicBlock::sameEHRegion(predBlock, jti.m_falseTarget)) + { + JITDUMP(FMT_BB " is an eh constrained pred\n", predBlock->bbNum); + BlockSetOps::AddElemD(this, jti.m_ambiguousPreds, predBlock->bbNum); + jti.m_numAmbiguousPreds++; + continue; + } + + jti.m_numFalsePreds++; + JITDUMP(FMT_BB " is a false pred\n", predBlock->bbNum); + } + } + else + { + JITDUMP(FMT_BB " is an ambiguous pred\n", predBlock->bbNum); + BlockSetOps::AddElemD(this, jti.m_ambiguousPreds, predBlock->bbNum); + jti.m_numAmbiguousPreds++; + + // If this was the first ambiguous pred, remember the substVN + // and the block that providced it, case we can use later to + // sharpen the predicate's liberal normal VN. + // + if ((jti.m_numAmbiguousPreds == 1) && (substVN != treeNormVN)) + { + assert(jti.m_ambiguousVN == ValueNumStore::NoVN); + assert(jti.m_ambiguousVNBlock == nullptr); + + jti.m_ambiguousVN = substVN; + jti.m_ambiguousVNBlock = predBlock; + } + + continue; + } + + // Note if the true or false pred is the fall through pred. + // + if (predBlock->bbNext == block) + { + JITDUMP(FMT_BB " is the fall-through pred\n", predBlock->bbNum); + assert(jti.m_fallThroughPred == nullptr); + jti.m_fallThroughPred = predBlock; + } + } + + // Do the optimization. + // + return optJumpThreadCore(jti); +} + //------------------------------------------------------------------------ // optJumpThreadCore: restructure block flow based on jump thread information // @@ -1274,6 +1532,35 @@ bool Compiler::optJumpThreadCore(JumpThreadInfo& jti) } } + // If block didn't get fully optimized, and now has just one pred, see if + // we can sharpen the predicate's VN. + // + // (Todo, perhaps: revisit all the uses of the old SSA def, update to the + // surviving ssa input, and update all the value numbers...) + // + BasicBlock* const ambBlock = jti.m_ambiguousVNBlock; + if ((ambBlock != nullptr) && (jti.m_block->bbJumpKind == BBJ_COND) && + (jti.m_block->GetUniquePred(this) == ambBlock)) + { + JITDUMP(FMT_BB " has just one remaining predcessor " FMT_BB "\n", jti.m_block->bbNum, ambBlock->bbNum); + + Statement* const stmt = jti.m_block->lastStmt(); + assert(stmt != nullptr); + GenTree* const jumpTree = stmt->GetRootNode(); + assert(jumpTree->OperIs(GT_JTRUE)); + GenTree* const tree = jumpTree->AsOp()->gtOp1; + assert(tree->OperIsCompare()); + + ValueNum treeOldVN = tree->GetVN(VNK_Liberal); + ValueNum treeNormVN = ValueNumStore::NoVN; + ValueNum treeExcVN = ValueNumStore::NoVN; + vnStore->VNUnpackExc(treeOldVN, &treeNormVN, &treeExcVN); + ValueNum treeNewVN = vnStore->VNWithExc(jti.m_ambiguousVN, treeExcVN); + tree->SetVN(VNK_Liberal, treeNewVN); + + JITDUMP("Updating [%06u] liberal VN from " FMT_VN " to " FMT_VN "\n", dspTreeID(tree), treeOldVN, treeNewVN); + } + // We optimized. // fgModified = true; From b7bc2918ee684f073f3428d1015d2dbd7d7b5cbf Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Wed, 28 Sep 2022 10:19:14 -0700 Subject: [PATCH 2/3] fix bug --- src/coreclr/jit/redundantbranchopts.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/coreclr/jit/redundantbranchopts.cpp b/src/coreclr/jit/redundantbranchopts.cpp index ec512cbf60244..c5487f8336299 100644 --- a/src/coreclr/jit/redundantbranchopts.cpp +++ b/src/coreclr/jit/redundantbranchopts.cpp @@ -1141,7 +1141,7 @@ bool Compiler::optJumpThreadPhi(BasicBlock* block, GenTree* tree, ValueNum treeN // Which local? // - const unsigned lclNum = unsigned(phiDefFuncApp.m_args[0]); + const unsigned lclNum = unsigned(phiDefFuncApp.m_args[i]); JITDUMP("... JT-PHI [interestingVN] in " FMT_BB " relop %s operand VN is PhiDef for V%02u\n", block->bbNum, i == 0 ? "first" : "second", lclNum); if (!foundPhiDef) From a01346ff0f6d13c802960007aff522d81ac90ae0 Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Wed, 28 Sep 2022 17:06:17 -0700 Subject: [PATCH 3/3] Proper fix. Both relop VNs may be PhiDefs for the same local, but with different SSA def numbers, despite the fact that we don't allow overlapping SSA lifetimes. For example there may be a Phi in another block that gets copied to some other local and then reaches the current block via that local. So make sure that when we search local PHIs we also match the ssa def number to ensure we're looking at the right PHIs. --- src/coreclr/jit/redundantbranchopts.cpp | 40 ++++++++++++++----------- 1 file changed, 22 insertions(+), 18 deletions(-) diff --git a/src/coreclr/jit/redundantbranchopts.cpp b/src/coreclr/jit/redundantbranchopts.cpp index c5487f8336299..1eec7530b6b19 100644 --- a/src/coreclr/jit/redundantbranchopts.cpp +++ b/src/coreclr/jit/redundantbranchopts.cpp @@ -1139,19 +1139,19 @@ bool Compiler::optJumpThreadPhi(BasicBlock* block, GenTree* tree, ValueNum treeN continue; } - // Which local? + // The PhiDef args tell us which local and which SSA def of that local. // - const unsigned lclNum = unsigned(phiDefFuncApp.m_args[i]); - JITDUMP("... JT-PHI [interestingVN] in " FMT_BB " relop %s operand VN is PhiDef for V%02u\n", block->bbNum, - i == 0 ? "first" : "second", lclNum); + assert(phiDefFuncApp.m_arity == 3); + const unsigned lclNum = unsigned(phiDefFuncApp.m_args[0]); + const unsigned ssaDefNum = unsigned(phiDefFuncApp.m_args[1]); + const ValueNum phiVN = ValueNum(phiDefFuncApp.m_args[2]); + JITDUMP("... JT-PHI [interestingVN] in " FMT_BB " relop %s operand VN is PhiDef for V%02u:%u " FMT_VN "\n", + block->bbNum, i == 0 ? "first" : "second", lclNum, ssaDefNum, phiVN); if (!foundPhiDef) { DISPTREE(tree); } - foundPhiDef = true; - funcArgToPhiLocalMap[i] = lclNum; - // Find the PHI for lclNum local in the current block. // GenTree* phiNode = nullptr; @@ -1168,24 +1168,28 @@ bool Compiler::optJumpThreadPhi(BasicBlock* block, GenTree* tree, ValueNum treeN GenTreeLclVarCommon* const phiDefLclNode = phiDefNode->AsOp()->gtOp1->AsLclVarCommon(); if (phiDefLclNode->GetLclNum() == lclNum) { - funcArgToPhiDefNodeMap[i] = phiDefNode; - break; + if (phiDefLclNode->GetSsaNum() == ssaDefNum) + { + funcArgToPhiLocalMap[i] = lclNum; + funcArgToPhiDefNodeMap[i] = phiDefNode; + foundPhiDef = true; + JITDUMP("Found local PHI [%06u] for V%02u\n", dspTreeID(phiDefNode), lclNum); + } + else + { + // Relop input is phi def from some other block. + // + break; + } } } - - if (funcArgToPhiDefNodeMap[i] == nullptr) - { - // This is unexpected - // - JITDUMP("Could not find phi def for V%02u in " FMT_BB "\n", lclNum, block->bbNum); - return false; - } } if (!foundPhiDef) { - // No phis in the relop's VN. + // No usable PhiDef VNs in the relop's VN. // + JITDUMP("No usable PhiDef VNs\n"); return false; }