Skip to content

Commit

Permalink
JIT: profile checking through inlining (dotnet#101834)
Browse files Browse the repository at this point in the history
Advance profile consistency check through inlining. Turns out there are
five reasons why inlining may make profile data inconsistent. Account
for these and add metrics.

Also add separate metrics for consistency before and after inlining,
since pre-inline phases are run on inlinees and so don't give us good
insight into overall consistency rates. And add some metrics for
inlining itself.

Contributes to dotnet#93020.

Co-authored-by: Aman Khalid <[email protected]>
  • Loading branch information
2 people authored and michaelgsharp committed May 8, 2024
1 parent 9489177 commit c403a06
Show file tree
Hide file tree
Showing 9 changed files with 252 additions and 44 deletions.
10 changes: 5 additions & 5 deletions src/coreclr/jit/compiler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4697,11 +4697,6 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl
return;
}

// Drop back to just checking profile likelihoods.
//
activePhaseChecks &= ~PhaseChecks::CHECK_PROFILE;
activePhaseChecks |= PhaseChecks::CHECK_LIKELIHOODS;

// At this point in the phase list, all the inlinee phases have
// been run, and inlinee compiles have exited, so we should only
// get this far if we are jitting the root method.
Expand All @@ -4718,6 +4713,11 @@ void Compiler::compCompile(void** methodCodePtr, uint32_t* methodCodeSize, JitFl
// Record "start" values for post-inlining cycles and elapsed time.
RecordStateAtEndOfInlining();

// Drop back to just checking profile likelihoods.
//
activePhaseChecks &= ~PhaseChecks::CHECK_PROFILE;
activePhaseChecks |= PhaseChecks::CHECK_LIKELIHOODS;

// Transform each GT_ALLOCOBJ node into either an allocation helper call or
// local variable allocation on the stack.
ObjectAllocator objectAllocator(this); // PHASE_ALLOCATE_OBJECTS
Expand Down
3 changes: 2 additions & 1 deletion src/coreclr/jit/compiler.h
Original file line number Diff line number Diff line change
Expand Up @@ -5284,6 +5284,7 @@ class Compiler

// The number of separate return points in the method.
unsigned fgReturnCount;
unsigned fgThrowCount;

PhaseStatus fgAddInternal();

Expand Down Expand Up @@ -6228,7 +6229,7 @@ class Compiler

void fgLinkBasicBlocks();

unsigned fgMakeBasicBlocks(const BYTE* codeAddr, IL_OFFSET codeSize, FixedBitVect* jumpTarget);
void fgMakeBasicBlocks(const BYTE* codeAddr, IL_OFFSET codeSize, FixedBitVect* jumpTarget);

void fgCheckBasicBlockControlFlow();

Expand Down
38 changes: 21 additions & 17 deletions src/coreclr/jit/fgbasic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ void Compiler::fgInit()
fgDomBBcount = 0;
fgBBVarSetsInited = false;
fgReturnCount = 0;
fgThrowCount = 0;

m_dfsTree = nullptr;
m_loops = nullptr;
Expand Down Expand Up @@ -233,8 +234,9 @@ bool Compiler::fgEnsureFirstBBisScratch()
{
// If the result is clearly nonsensical, just inherit
//
JITDUMP("\fgEnsureFirstBBisScratch: Profile data could not be locally repaired. Data %s inconsisent.\n",
fgPgoConsistent ? "is now" : "was already");
JITDUMP(
"\fgEnsureFirstBBisScratch: Profile data could not be locally repaired. Data %s inconsistent.\n",
fgPgoConsistent ? "is now" : "was already");

if (fgPgoConsistent)
{
Expand Down Expand Up @@ -3099,19 +3101,18 @@ void Compiler::fgLinkBasicBlocks()
// codeSize -- length of the IL stream
// jumpTarget -- [in] bit vector of jump targets found by fgFindJumpTargets
//
// Returns:
// number of return blocks (BBJ_RETURN) in the method (may be zero)
//
// Notes:
// Invoked for prejited and jitted methods, and for all inlinees

unsigned Compiler::fgMakeBasicBlocks(const BYTE* codeAddr, IL_OFFSET codeSize, FixedBitVect* jumpTarget)
// Invoked for prejitted and jitted methods, and for all inlinees.
// Sets fgReturnCount and fgThrowCount
//
void Compiler::fgMakeBasicBlocks(const BYTE* codeAddr, IL_OFFSET codeSize, FixedBitVect* jumpTarget)
{
unsigned retBlocks = 0;
const BYTE* codeBegp = codeAddr;
const BYTE* codeEndp = codeAddr + codeSize;
bool tailCall = false;
unsigned curBBoffs = 0;
unsigned retBlocks = 0;
unsigned throwBlocks = 0;
const BYTE* codeBegp = codeAddr;
const BYTE* codeEndp = codeAddr + codeSize;
bool tailCall = false;
unsigned curBBoffs = 0;
BasicBlock* curBBdesc;

// Keep track of where we are in the scope lists, as we will also
Expand Down Expand Up @@ -3312,7 +3313,8 @@ unsigned Compiler::fgMakeBasicBlocks(const BYTE* codeAddr, IL_OFFSET codeSize, F
// can be dispatched as tail calls from the caller.
compInlineResult->NoteFatal(InlineObservation::CALLEE_EXPLICIT_TAIL_PREFIX);
retBlocks++;
return retBlocks;
fgReturnCount = retBlocks;
return;
}

FALLTHROUGH;
Expand Down Expand Up @@ -3415,6 +3417,7 @@ unsigned Compiler::fgMakeBasicBlocks(const BYTE* codeAddr, IL_OFFSET codeSize, F

case CEE_THROW:
case CEE_RETHROW:
throwBlocks++;
jmpKind = BBJ_THROW;
break;

Expand Down Expand Up @@ -3597,7 +3600,8 @@ unsigned Compiler::fgMakeBasicBlocks(const BYTE* codeAddr, IL_OFFSET codeSize, F

fgLinkBasicBlocks();

return retBlocks;
fgReturnCount = retBlocks;
fgThrowCount = throwBlocks;
}

/*****************************************************************************
Expand Down Expand Up @@ -3709,7 +3713,7 @@ void Compiler::fgFindBasicBlocks()

/* Now create the basic blocks */

fgReturnCount = fgMakeBasicBlocks(info.compCode, info.compILCodeSize, jumpTarget);
fgMakeBasicBlocks(info.compCode, info.compILCodeSize, jumpTarget);

if (compIsForInlining())
{
Expand Down Expand Up @@ -4269,7 +4273,7 @@ void Compiler::fgFixEntryFlowForOSR()
//
if ((fgEntryBB->bbPreds != nullptr) && (fgEntryBB != fgOSREntryBB))
{
JITDUMP("OSR: profile data could not be locally repaired. Data %s inconsisent.\n",
JITDUMP("OSR: profile data could not be locally repaired. Data %s inconsistent.\n",
fgPgoConsistent ? "is now" : "was already");
fgPgoConsistent = false;
}
Expand Down
150 changes: 147 additions & 3 deletions src/coreclr/jit/fginline.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -624,24 +624,86 @@ class SubstitutePlaceholdersAndDevirtualizeWalker : public GenTreeVisitor<Substi
{
JITDUMP(" ... found foldable jtrue at [%06u] in " FMT_BB "\n", m_compiler->dspTreeID(tree),
block->bbNum);
m_compiler->Metrics.InlinerBranchFold++;

// We have a constant operand, and should have the all clear to optimize.
// Update side effects on the tree, assert there aren't any, and bash to nop.
m_compiler->gtUpdateNodeSideEffects(tree);
assert((tree->gtFlags & GTF_SIDE_EFFECT) == 0);
tree->gtBashToNOP();
m_madeChanges = true;
m_madeChanges = true;
FlowEdge* removedEdge = nullptr;

if (condTree->IsIntegralConst(0))
{
m_compiler->fgRemoveRefPred(block->GetTrueEdge());
removedEdge = block->GetTrueEdge();
m_compiler->fgRemoveRefPred(removedEdge);
block->SetKindAndTargetEdge(BBJ_ALWAYS, block->GetFalseEdge());
}
else
{
m_compiler->fgRemoveRefPred(block->GetFalseEdge());
removedEdge = block->GetFalseEdge();
m_compiler->fgRemoveRefPred(removedEdge);
block->SetKindAndTargetEdge(BBJ_ALWAYS, block->GetTrueEdge());
}

// Update profile; make it consistent if possible
//
if (block->hasProfileWeight())
{
bool repairWasComplete = true;
weight_t const weight = removedEdge->getLikelyWeight();

if (weight > 0)
{
// Target block weight will increase.
//
BasicBlock* const target = block->GetTarget();
assert(target->hasProfileWeight());
target->setBBProfileWeight(target->bbWeight + weight);

// Alternate weight will decrease
//
BasicBlock* const alternate = removedEdge->getDestinationBlock();
assert(alternate->hasProfileWeight());
weight_t const alternateNewWeight = alternate->bbWeight - weight;

// If profile weights are consistent, expect at worst a slight underflow.
//
if (m_compiler->fgPgoConsistent && (alternateNewWeight < 0))
{
assert(m_compiler->fgProfileWeightsEqual(alternateNewWeight, 0));
}
alternate->setBBProfileWeight(max(0.0, alternateNewWeight));

// This will affect profile transitively, so in general
// the profile will become inconsistent.
//
repairWasComplete = false;

// But we can check for the special case where the
// block's postdominator is target's target (simple
// if/then/else/join).
//
if (target->KindIs(BBJ_ALWAYS))
{
repairWasComplete =
alternate->KindIs(BBJ_ALWAYS) && alternate->TargetIs(target->GetTarget());
}
}

if (!repairWasComplete)
{
JITDUMP("Profile data could not be locally repaired. Data %s inconsistent.\n",
m_compiler->fgPgoConsistent ? "is now" : "was already");

if (m_compiler->fgPgoConsistent)
{
m_compiler->Metrics.ProfileInconsistentInlinerBranchFold++;
m_compiler->fgPgoConsistent = false;
}
}
}
}
}
else
Expand Down Expand Up @@ -692,6 +754,11 @@ PhaseStatus Compiler::fgInline()
JitConfig.JitPrintInlinedMethods().contains(info.compMethodHnd, info.compClassHnd, &info.compMethodInfo->args);
#endif // DEBUG

if (fgPgoConsistent)
{
Metrics.ProfileConsistentBeforeInline++;
}

noway_assert(fgFirstBB != nullptr);

BasicBlock* block = fgFirstBB;
Expand Down Expand Up @@ -822,6 +889,14 @@ PhaseStatus Compiler::fgInline()
fgRenumberBlocks();
}

if (fgPgoConsistent)
{
Metrics.ProfileConsistentAfterInline++;
}

Metrics.InlineCount = m_inlineStrategy->GetInlineCount();
Metrics.InlineAttempt = m_inlineStrategy->GetImportCount();

return madeChanges ? PhaseStatus::MODIFIED_EVERYTHING : PhaseStatus::MODIFIED_NOTHING;
}

Expand Down Expand Up @@ -1611,6 +1686,75 @@ void Compiler::fgInsertInlineeBlocks(InlineInfo* pInlineInfo)
}
#endif

// Update profile consistency
//
// If inlinee is inconsistent, root method will be inconsistent too.
//
if (!InlineeCompiler->fgPgoConsistent)
{
if (fgPgoConsistent)
{
JITDUMP("INLINER: profile data in root now inconsistent -- inlinee had inconsistency\n");
Metrics.ProfileInconsistentInlinee++;
fgPgoConsistent = false;
}
}

// If we inline a no-return call at a site with profile weight,
// we will introduce inconsistency.
//
if (InlineeCompiler->fgReturnCount == 0)
{
JITDUMP("INLINER: no-return inlinee\n");

if (iciBlock->bbWeight > 0)
{
if (fgPgoConsistent)
{
JITDUMP("INLINER: profile data in root now inconsistent -- no-return inlinee at call site in " FMT_BB
" with weight " FMT_WT "\n",
iciBlock->bbNum, iciBlock->bbWeight);
Metrics.ProfileInconsistentNoReturnInlinee++;
fgPgoConsistent = false;
}
}
else
{
// Inlinee scaling should assure this is so.
//
assert(InlineeCompiler->fgFirstBB->bbWeight == 0);
}
}

// If the call site is not in a try and the callee has a throw,
// we may introduce inconsistency.
//
// Technically we should check if the callee has a throw not in a try, but since
// we can't inline methods with EH yet we don't see those.
//
if (InlineeCompiler->fgThrowCount > 0)
{
JITDUMP("INLINER: may-throw inlinee\n");

if (iciBlock->bbWeight > 0)
{
if (fgPgoConsistent)
{
JITDUMP("INLINER: profile data in root now inconsistent -- may-throw inlinee at call site in " FMT_BB
" with weight " FMT_WT "\n",
iciBlock->bbNum, iciBlock->bbWeight);
Metrics.ProfileInconsistentMayThrowInlinee++;
fgPgoConsistent = false;
}
}
else
{
// Inlinee scaling should assure this is so.
//
assert(InlineeCompiler->fgFirstBB->bbWeight == 0);
}
}

// If an inlinee needs GS cookie we need to make sure that the cookie will not be allocated at zero stack offset.
// Note that if the root method needs GS cookie then this has already been taken care of.
if (!getNeedsGSSecurityCookie() && InlineeCompiler->getNeedsGSSecurityCookie())
Expand Down
41 changes: 33 additions & 8 deletions src/coreclr/jit/fgprofile.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -141,22 +141,39 @@ void Compiler::fgApplyProfileScale()
JITDUMP(" ... no callee profile data, will use non-pgo weight to scale\n");
}

// Ostensibly this should be fgCalledCount for the callee, but that's not available
// as it requires some analysis.
// Determine the weight of the first block preds, if any.
// (only happens if the first block is a loop head).
//
// For most callees it will be the same as the entry block count.
//
// Note when/if we early do normalization this may need to change.
weight_t firstBlockPredWeight = 0;
for (FlowEdge* const firstBlockPred : fgFirstBB->PredEdges())
{
firstBlockPredWeight += firstBlockPred->getLikelyWeight();
}

// Determine the "input" weight for the callee
//
weight_t calleeWeight = fgFirstBB->bbWeight;

// Callee entry weight is nonzero?
// Callee entry weight is zero or negative (taking backedges into account)?
// If so, just choose the smallest plausible weight.
//
if (calleeWeight == BB_ZERO_WEIGHT)
if (calleeWeight <= firstBlockPredWeight)
{
calleeWeight = fgHaveProfileWeights() ? 1.0 : BB_UNITY_WEIGHT;
JITDUMP(" ... callee entry has weight zero, will use weight of " FMT_WT " to scale\n", calleeWeight);
JITDUMP(" ... callee entry has zero or negative weight, will use weight of " FMT_WT " to scale\n",
calleeWeight);
JITDUMP("Profile data could not be scaled consistently. Data %s inconsistent.\n",
fgPgoConsistent ? "is now" : "was already");

if (fgPgoConsistent)
{
Metrics.ProfileInconsistentInlineeScale++;
fgPgoConsistent = false;
}
}
else
{
calleeWeight -= firstBlockPredWeight;
}

// Call site has profile weight?
Expand Down Expand Up @@ -2829,6 +2846,14 @@ PhaseStatus Compiler::fgInstrumentMethod()
//
PhaseStatus Compiler::fgIncorporateProfileData()
{
// For now we only rely on profile data when optimizing.
//
if (!opts.OptimizationEnabled())
{
JITDUMP("not optimizing, so not incorporating any profile data\n");
return PhaseStatus::MODIFIED_NOTHING;
}

// Are we doing profile stress?
//
if (fgStressBBProf() > 0)
Expand Down
Loading

0 comments on commit c403a06

Please sign in to comment.