diff --git a/src/coreclr/jit/compiler.cpp b/src/coreclr/jit/compiler.cpp index 6ef562922d5bc..14a8448c652d8 100644 --- a/src/coreclr/jit/compiler.cpp +++ b/src/coreclr/jit/compiler.cpp @@ -4814,11 +4814,6 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl // DoPhase(this, PHASE_GS_COOKIE, &Compiler::gsPhase); - // Drop back to just checking profile likelihoods. - // - activePhaseChecks &= ~PhaseChecks::CHECK_PROFILE; - activePhaseChecks |= PhaseChecks::CHECK_LIKELIHOODS; - if (opts.OptimizationEnabled()) { // Compute the block weights @@ -4855,8 +4850,7 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl // DoPhase(this, PHASE_DFS_BLOCKS3, &Compiler::fgDfsBlocksAndRemove); - // Discover and classify natural loops (e.g. mark iterative loops as such). Also marks loop blocks - // and sets bbWeight to the loop nesting levels. + // Discover and classify natural loops (e.g. mark iterative loops as such). // DoPhase(this, PHASE_FIND_LOOPS, &Compiler::optFindLoopsPhase); @@ -4877,6 +4871,11 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl DoPhase(this, PHASE_COMPUTE_DOMINATORS, &Compiler::fgComputeDominators); } + // Drop back to just checking profile likelihoods. + // + activePhaseChecks &= ~PhaseChecks::CHECK_PROFILE; + activePhaseChecks |= PhaseChecks::CHECK_LIKELIHOODS; + #ifdef DEBUG fgDebugCheckLinks(); #endif diff --git a/src/coreclr/jit/fgehopt.cpp b/src/coreclr/jit/fgehopt.cpp index ca42d702c8c62..d9710994cf152 100644 --- a/src/coreclr/jit/fgehopt.cpp +++ b/src/coreclr/jit/fgehopt.cpp @@ -175,6 +175,12 @@ PhaseStatus Compiler::fgRemoveEmptyFinally() currentBlock->SetKind(BBJ_ALWAYS); currentBlock->RemoveFlags(BBF_RETLESS_CALL); // no longer a BBJ_CALLFINALLY + // Update profile data into postTryFinallyBlock + if (currentBlock->hasProfileWeight()) + { + postTryFinallyBlock->increaseBBProfileWeight(currentBlock->bbWeight); + } + // Cleanup the postTryFinallyBlock fgCleanupContinuation(postTryFinallyBlock); @@ -672,12 +678,6 @@ PhaseStatus Compiler::fgRemoveEmptyTry() fgPrepareCallFinallyRetForRemoval(leave); fgRemoveBlock(leave, /* unreachable */ true); - // Remove profile weight into the continuation block - if (continuation->hasProfileWeight()) - { - continuation->decreaseBBProfileWeight(leave->bbWeight); - } - // (3) Convert the callfinally to a normal jump to the handler assert(callFinally->HasInitializedTarget()); callFinally->SetKind(BBJ_ALWAYS); @@ -1674,6 +1674,16 @@ PhaseStatus Compiler::fgCloneFinally() } } + // Update flow into normalCallFinallyReturn + if (normalCallFinallyReturn->hasProfileWeight()) + { + normalCallFinallyReturn->bbWeight = BB_ZERO_WEIGHT; + for (FlowEdge* const predEdge : normalCallFinallyReturn->PredEdges()) + { + normalCallFinallyReturn->increaseBBProfileWeight(predEdge->getLikelyWeight()); + } + } + // Done! JITDUMP("\nDone with EH#%u\n\n", XTnum); cloneCount++; diff --git a/src/coreclr/jit/fgopt.cpp b/src/coreclr/jit/fgopt.cpp index 6c588ed4b96f5..cc153b7ca5621 100644 --- a/src/coreclr/jit/fgopt.cpp +++ b/src/coreclr/jit/fgopt.cpp @@ -1259,8 +1259,25 @@ void Compiler::fgUnreachableBlock(BasicBlock* block) // Mark the block as removed block->SetFlags(BBF_REMOVED); - // Update bbRefs and bbPreds for the blocks reached by this block - fgRemoveBlockAsPred(block); + // Update bbRefs and bbPreds for this block's successors + bool profileInconsistent = false; + for (BasicBlock* const succBlock : block->Succs(this)) + { + FlowEdge* const succEdge = fgRemoveAllRefPreds(succBlock, block); + + if (block->hasProfileWeight() && succBlock->hasProfileWeight()) + { + succBlock->decreaseBBProfileWeight(succEdge->getLikelyWeight()); + profileInconsistent |= (succBlock->NumSucc() > 0); + } + } + + if (profileInconsistent) + { + JITDUMP("Flow removal of " FMT_BB " needs to be propagated. Data %s inconsistent.\n", block->bbNum, + fgPgoConsistent ? "is now" : "was already"); + fgPgoConsistent = false; + } } //------------------------------------------------------------- @@ -2592,13 +2609,13 @@ bool Compiler::fgOptimizeBranch(BasicBlock* bJump) estDupCostSz += expr->GetCostSz(); } - bool allProfileWeightsAreValid = false; - weight_t weightJump = bJump->bbWeight; - weight_t weightDest = bDest->bbWeight; - weight_t weightNext = bJump->Next()->bbWeight; - bool rareJump = bJump->isRunRarely(); - bool rareDest = bDest->isRunRarely(); - bool rareNext = bJump->Next()->isRunRarely(); + bool haveProfileWeights = false; + weight_t weightJump = bJump->bbWeight; + weight_t weightDest = bDest->bbWeight; + weight_t weightNext = bJump->Next()->bbWeight; + bool rareJump = bJump->isRunRarely(); + bool rareDest = bDest->isRunRarely(); + bool rareNext = bJump->Next()->isRunRarely(); // If we have profile data then we calculate the number of time // the loop will iterate into loopIterations @@ -2611,7 +2628,7 @@ bool Compiler::fgOptimizeBranch(BasicBlock* bJump) bDest->HasAnyFlag(BBF_PROF_WEIGHT | BBF_RUN_RARELY) && bJump->Next()->HasAnyFlag(BBF_PROF_WEIGHT | BBF_RUN_RARELY)) { - allProfileWeightsAreValid = true; + haveProfileWeights = true; if ((weightJump * 100) < weightDest) { @@ -2668,9 +2685,9 @@ bool Compiler::fgOptimizeBranch(BasicBlock* bJump) if (verbose) { printf("\nDuplication of the conditional block " FMT_BB " (always branch from " FMT_BB - ") %s, because the cost of duplication (%i) is %s than %i, validProfileWeights = %s\n", + ") %s, because the cost of duplication (%i) is %s than %i, haveProfileWeights = %s\n", bDest->bbNum, bJump->bbNum, costIsTooHigh ? "not done" : "performed", estDupCostSz, - costIsTooHigh ? "greater" : "less or equal", maxDupCostSz, allProfileWeightsAreValid ? "true" : "false"); + costIsTooHigh ? "greater" : "less or equal", maxDupCostSz, dspBool(haveProfileWeights)); } #endif // DEBUG @@ -2772,7 +2789,8 @@ bool Compiler::fgOptimizeBranch(BasicBlock* bJump) // bJump now falls through into the next block // - FlowEdge* const falseEdge = fgAddRefPred(bJump->Next(), bJump, destFalseEdge); + BasicBlock* const bDestFalseTarget = bJump->Next(); + FlowEdge* const falseEdge = fgAddRefPred(bDestFalseTarget, bJump, destFalseEdge); // bJump now jumps to bDest's normal jump target // @@ -2781,35 +2799,26 @@ bool Compiler::fgOptimizeBranch(BasicBlock* bJump) bJump->SetCond(bJump->GetTargetEdge(), falseEdge); - if (weightJump > 0) + // Update profile data + // + if (haveProfileWeights) { - if (allProfileWeightsAreValid) - { - if (weightDest > weightJump) - { - bDest->bbWeight = (weightDest - weightJump); - } - else if (!bDest->isRunRarely()) - { - bDest->bbWeight = BB_UNITY_WEIGHT; - } - } - else - { - weight_t newWeightDest = 0; + // bJump no longer flows into bDest + // + bDest->decreaseBBProfileWeight(bJump->bbWeight); + bDestNormalTarget->decreaseBBProfileWeight(bJump->bbWeight * destFalseEdge->getLikelihood()); + bDestFalseTarget->decreaseBBProfileWeight(bJump->bbWeight * destTrueEdge->getLikelihood()); - if (weightDest > weightJump) - { - newWeightDest = (weightDest - weightJump); - } - if (weightDest >= (BB_LOOP_WEIGHT_SCALE * BB_UNITY_WEIGHT) / 2) - { - newWeightDest = (weightDest * 2) / (BB_LOOP_WEIGHT_SCALE * BB_UNITY_WEIGHT); - } - if (newWeightDest > 0) - { - bDest->bbWeight = newWeightDest; - } + // Propagate bJump's weight into its new successors + // + bDestNormalTarget->increaseBBProfileWeight(bJump->GetTrueEdge()->getLikelyWeight()); + bDestFalseTarget->increaseBBProfileWeight(falseEdge->getLikelyWeight()); + + if ((bDestNormalTarget->NumSucc() > 0) || (bDestFalseTarget->NumSucc() > 0)) + { + JITDUMP("fgOptimizeBranch: New flow out of " FMT_BB " needs to be propagated. Data %s inconsistent.\n", + fgPgoConsistent ? "is now" : "was already"); + fgPgoConsistent = false; } } @@ -2939,6 +2948,10 @@ bool Compiler::fgOptimizeSwitchJumps() blockToTargetEdge->setLikelihood(fraction); blockToNewBlockEdge->setLikelihood(max(0.0, 1.0 - fraction)); + JITDUMP("fgOptimizeSwitchJumps: Updated flow into " FMT_BB " needs to be propagated. Data %s inconsistent.\n", + newBlock->bbNum, fgPgoConsistent ? "is now" : "was already"); + fgPgoConsistent = false; + // For now we leave the switch as is, since there's no way // to indicate that one of the cases is now unreachable. // @@ -3168,7 +3181,7 @@ bool Compiler::fgExpandRarelyRunBlocks() // If block is not run rarely, then check to make sure that it has // at least one non-rarely run block. - if (!block->isRunRarely()) + if (!block->isRunRarely() && !block->isBBCallFinallyPairTail()) { bool rare = true; diff --git a/src/coreclr/jit/fgprofile.cpp b/src/coreclr/jit/fgprofile.cpp index 830c8fe0ce777..9cf332bf16c99 100644 --- a/src/coreclr/jit/fgprofile.cpp +++ b/src/coreclr/jit/fgprofile.cpp @@ -4463,6 +4463,9 @@ bool Compiler::fgComputeCalledCount(weight_t returnWeight) { fgFirstBB->setBBProfileWeight(fgCalledCount); madeChanges = true; + JITDUMP("fgComputeCalledCount: Modified method entry weight. Data %s inconsistent.\n", + fgPgoConsistent ? "is now" : "was already"); + fgPgoConsistent = false; } #if DEBUG diff --git a/src/coreclr/jit/loopcloning.cpp b/src/coreclr/jit/loopcloning.cpp index b417622a0f38e..cb1070d57ba90 100644 --- a/src/coreclr/jit/loopcloning.cpp +++ b/src/coreclr/jit/loopcloning.cpp @@ -2011,21 +2011,8 @@ void Compiler::optCloneLoop(FlowGraphNaturalLoop* loop, LoopCloneContext* contex bool cloneLoopsWithEH = false; INDEBUG(cloneLoopsWithEH = (JitConfig.JitCloneLoopsWithEH() > 0);) - // Determine the depth of the loop, so we can properly weight blocks added (outside the cloned loop blocks). - unsigned depth = loop->GetDepth(); - weight_t ambientWeight = 1; - for (unsigned j = 0; j < depth; j++) - { - weight_t lastWeight = ambientWeight; - ambientWeight *= BB_LOOP_WEIGHT_SCALE; - assert(ambientWeight > lastWeight); - } - assert(loop->EntryEdges().size() == 1); BasicBlock* preheader = loop->EntryEdge(0)->getSourceBlock(); - // The ambient weight might be higher than we computed above. Be safe by - // taking the max with the head block's weight. - ambientWeight = max(ambientWeight, preheader->bbWeight); // We're going to transform this loop: // @@ -2043,8 +2030,7 @@ void Compiler::optCloneLoop(FlowGraphNaturalLoop* loop, LoopCloneContext* contex BasicBlock* fastPreheader = fgNewBBafter(BBJ_ALWAYS, preheader, /*extendRegion*/ true); JITDUMP("Adding " FMT_BB " after " FMT_BB "\n", fastPreheader->bbNum, preheader->bbNum); - fastPreheader->bbWeight = preheader->isRunRarely() ? BB_ZERO_WEIGHT : ambientWeight; - fastPreheader->CopyFlags(preheader, (BBF_PROF_WEIGHT | BBF_RUN_RARELY)); + fastPreheader->inheritWeight(preheader); assert(preheader->KindIs(BBJ_ALWAYS)); assert(preheader->TargetIs(loop->GetHeader())); @@ -2092,8 +2078,7 @@ void Compiler::optCloneLoop(FlowGraphNaturalLoop* loop, LoopCloneContext* contex const bool extendRegion = BasicBlock::sameEHRegion(beforeSlowPreheader, preheader); BasicBlock* slowPreheader = fgNewBBafter(BBJ_ALWAYS, beforeSlowPreheader, extendRegion); JITDUMP("Adding " FMT_BB " after " FMT_BB "\n", slowPreheader->bbNum, beforeSlowPreheader->bbNum); - slowPreheader->bbWeight = preheader->isRunRarely() ? BB_ZERO_WEIGHT : ambientWeight; - slowPreheader->CopyFlags(preheader, (BBF_PROF_WEIGHT | BBF_RUN_RARELY)); + slowPreheader->inheritWeight(preheader); slowPreheader->scaleBBWeight(LoopCloneContext::slowPathWeightScaleFactor); // If we didn't extend the region above (because the beforeSlowPreheader @@ -3091,6 +3076,13 @@ PhaseStatus Compiler::optCloneLoops() m_dfsTree = fgComputeDfs(); m_loops = FlowGraphNaturalLoops::Find(m_dfsTree); } + + if (fgIsUsingProfileWeights()) + { + JITDUMP("optCloneLoops: Profile data needs to be propagated through new loops. Data %s inconsistent.\n", + fgPgoConsistent ? "is now" : "was already"); + fgPgoConsistent = false; + } } #ifdef DEBUG diff --git a/src/coreclr/jit/optimizer.cpp b/src/coreclr/jit/optimizer.cpp index 2ed5909eab2aa..6adb49eed59eb 100644 --- a/src/coreclr/jit/optimizer.cpp +++ b/src/coreclr/jit/optimizer.cpp @@ -2019,11 +2019,11 @@ bool Compiler::optInvertWhileLoop(BasicBlock* block) estDupCostSz += tree->GetCostSz(); } - weight_t loopIterations = BB_LOOP_WEIGHT_SCALE; - bool allProfileWeightsAreValid = false; - weight_t const weightBlock = block->bbWeight; - weight_t const weightTest = bTest->bbWeight; - weight_t const weightTop = bTop->bbWeight; + weight_t loopIterations = BB_LOOP_WEIGHT_SCALE; + bool haveProfileWeights = false; + weight_t const weightBlock = block->bbWeight; + weight_t const weightTest = bTest->bbWeight; + weight_t const weightTop = bTop->bbWeight; // If we have profile data then we calculate the number of times // the loop will iterate into loopIterations @@ -2040,6 +2040,8 @@ bool Compiler::optInvertWhileLoop(BasicBlock* block) return true; } + haveProfileWeights = true; + // We generally expect weightTest > weightTop // // Tolerate small inconsistencies... @@ -2051,8 +2053,6 @@ bool Compiler::optInvertWhileLoop(BasicBlock* block) } else { - allProfileWeightsAreValid = true; - // Determine average iteration count // // weightTop is the number of time this loop executes @@ -2149,10 +2149,10 @@ bool Compiler::optInvertWhileLoop(BasicBlock* block) // tree walk to count them was not done. printf( "\nDuplication of loop condition [%06u] is %s, because the cost of duplication (%i) is %s than %i," - "\n loopIterations = %7.3f, optInvertTotalInfo.sharedStaticHelperCount >= %d, validProfileWeights = %s\n", + "\n loopIterations = %7.3f, optInvertTotalInfo.sharedStaticHelperCount >= %d, haveProfileWeights = %s\n", dspTreeID(condTree), costIsTooHigh ? "not done" : "performed", estDupCostSz, costIsTooHigh ? "greater" : "less or equal", maxDupCostSz, loopIterations, - optInvertTotalInfo.sharedStaticHelperCount, dspBool(allProfileWeightsAreValid)); + optInvertTotalInfo.sharedStaticHelperCount, dspBool(haveProfileWeights)); } #endif @@ -2203,25 +2203,6 @@ bool Compiler::optInvertWhileLoop(BasicBlock* block) // Flag the block that received the copy as potentially having various constructs. bNewCond->CopyFlags(bTest, BBF_COPY_PROPAGATE); - // Fix flow and profile - // - bNewCond->inheritWeight(block); - - if (allProfileWeightsAreValid) - { - weight_t const delta = weightTest - weightTop; - - // If there is just one outside edge incident on bTest, then ideally delta == block->bbWeight. - // But this might not be the case if profile data is inconsistent. - // - // And if bTest has multiple outside edges we want to account for the weight of them all. - // - if (delta > block->bbWeight) - { - bNewCond->setBBProfileWeight(delta); - } - } - // Update pred info // // For now we set the likelihood of the newCond branch to match @@ -2245,6 +2226,15 @@ bool Compiler::optInvertWhileLoop(BasicBlock* block) fgRedirectTargetEdge(block, bNewCond); assert(block->JumpsToNext()); + // Fix flow and profile + // + bNewCond->inheritWeight(block); + + if (haveProfileWeights) + { + bTest->decreaseBBProfileWeight(block->bbWeight); + } + // Move all predecessor edges that look like loop entry edges to point to the new cloned condition // block, not the existing condition block. The idea is that if we only move `block` to point to // `bNewCond`, but leave other `bTest` predecessors still pointing to `bTest`, when we eventually @@ -2256,9 +2246,10 @@ bool Compiler::optInvertWhileLoop(BasicBlock* block) // unsigned const loopFirstNum = bTop->bbNum; unsigned const loopBottomNum = bTest->bbNum; - for (BasicBlock* const predBlock : bTest->PredBlocksEditing()) + for (FlowEdge* const predEdge : bTest->PredEdgesEditing()) { - unsigned const bNum = predBlock->bbNum; + BasicBlock* const predBlock = predEdge->getSourceBlock(); + unsigned const bNum = predBlock->bbNum; if ((loopFirstNum <= bNum) && (bNum <= loopBottomNum)) { // Looks like the predecessor is from within the potential loop; skip it. @@ -2278,6 +2269,14 @@ bool Compiler::optInvertWhileLoop(BasicBlock* block) case BBJ_SWITCH: case BBJ_EHFINALLYRET: fgReplaceJumpTarget(predBlock, bTest, bNewCond); + + // Redirect profile weight, too + if (haveProfileWeights) + { + const weight_t edgeWeight = predEdge->getLikelyWeight(); + bNewCond->increaseBBProfileWeight(edgeWeight); + bTest->decreaseBBProfileWeight(edgeWeight); + } break; case BBJ_EHCATCHRET: @@ -2291,41 +2290,6 @@ bool Compiler::optInvertWhileLoop(BasicBlock* block) } } - // If we have profile data for all blocks and we know that we are cloning the - // `bTest` block into `bNewCond` and thus changing the control flow from `block` so - // that it no longer goes directly to `bTest` anymore, we have to adjust - // various weights. - // - if (allProfileWeightsAreValid) - { - // Update the weight for bTest. Normally, this reduces the weight of the bTest, except in odd - // cases of stress modes with inconsistent weights. - // - JITDUMP("Reducing profile weight of " FMT_BB " from " FMT_WT " to " FMT_WT "\n", bTest->bbNum, weightTest, - weightTop); - bTest->inheritWeight(bTop); - -#ifdef DEBUG - // If we're checking profile data, see if profile for the two target blocks is consistent. - // - if ((activePhaseChecks & PhaseChecks::CHECK_PROFILE) == PhaseChecks::CHECK_PROFILE) - { - if (JitConfig.JitProfileChecks() > 0) - { - const ProfileChecks checks = (ProfileChecks)JitConfig.JitProfileChecks(); - const bool nextProfileOk = fgDebugCheckIncomingProfileData(bNewCond->GetFalseTarget(), checks); - const bool jumpProfileOk = fgDebugCheckIncomingProfileData(bNewCond->GetTrueTarget(), checks); - - if (hasFlag(checks, ProfileChecks::RAISE_ASSERT)) - { - assert(nextProfileOk); - assert(jumpProfileOk); - } - } - } -#endif // DEBUG - } - #ifdef DEBUG if (verbose) { @@ -2941,6 +2905,14 @@ bool Compiler::optCreatePreheader(FlowGraphNaturalLoop* loop) optSetWeightForPreheaderOrExit(loop, preheader); + if (preheader->hasProfileWeight() && preheader->hasEHBoundaryIn()) + { + JITDUMP("optCreatePreheader: " FMT_BB + " is not reachable via normal flow, so skip checking its entry weight. Data %s inconsistent.\n", + preheader->bbNum, fgPgoConsistent ? "is now" : "was already"); + fgPgoConsistent = false; + } + return true; }