Skip to content

Commit

Permalink
Merge pull request #2348 from r30shah/AddRecompilationTestPR
Browse files Browse the repository at this point in the history
Add recompilation test in loop in jProfiling
  • Loading branch information
fjeremic authored Jul 17, 2018
2 parents 5958379 + d59c5e4 commit f497aa4
Show file tree
Hide file tree
Showing 12 changed files with 472 additions and 77 deletions.
1 change: 1 addition & 0 deletions runtime/compiler/build/files/common.mk
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ JIT_PRODUCT_BACKEND_SOURCES+=\
compiler/optimizer/JitProfiler.cpp \
compiler/optimizer/JProfilingBlock.cpp \
compiler/optimizer/JProfilingValue.cpp \
compiler/optimizer/JProfilingRecompLoopTest.cpp \
compiler/optimizer/LiveVariablesForGC.cpp \
compiler/optimizer/LoopAliasRefiner.cpp \
compiler/optimizer/MonitorElimination.cpp \
Expand Down
5 changes: 4 additions & 1 deletion runtime/compiler/optimizer/J9OptimizationManager.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*******************************************************************************
* Copyright (c) 2000, 2017 IBM Corp. and others
* Copyright (c) 2000, 2018 IBM Corp. and others
*
* This program and the accompanying materials are made available under
* the terms of the Eclipse Public License 2.0 which accompanies this
Expand Down Expand Up @@ -118,6 +118,9 @@ J9::OptimizationManager::OptimizationManager(TR::Optimizer *o, OptimizationFacto
case OMR::jProfilingBlock:
_flags.set(doesNotRequireAliasSets);
break;
case OMR::jProfilingRecompLoopTest:
_flags.set(requiresStructure);
break;
default:
// do nothing
break;
Expand Down
8 changes: 8 additions & 0 deletions runtime/compiler/optimizer/J9Optimizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@
#include "optimizer/OSRGuardRemoval.hpp"
#include "optimizer/JProfilingBlock.hpp"
#include "optimizer/JProfilingValue.hpp"
#include "optimizer/JProfilingRecompLoopTest.hpp"
#include "runtime/J9Profiler.hpp"
#include "optimizer/UnsafeFastPath.hpp"
#include "optimizer/VarHandleTransformer.hpp"
Expand Down Expand Up @@ -279,6 +280,7 @@ static const OptimizationStrategy coldStrategyOpts[] =
{ OMR::rematerialization },
{ OMR::compactNullChecks, OMR::IfEnabled },
{ OMR::signExtendLoadsGroup, OMR::IfEnabled },
{ OMR::jProfilingRecompLoopTest, OMR::IfLoops },
{ OMR::jProfilingValue, OMR::MustBeDone },
{ OMR::trivialDeadTreeRemoval, },
{ OMR::cheapTacticalGlobalRegisterAllocatorGroup, OMR::IfAOTAndEnabled },
Expand Down Expand Up @@ -364,6 +366,7 @@ static const OptimizationStrategy warmStrategyOpts[] =
{ OMR::globalDeadStoreElimination, OMR::IfVoluntaryOSR },
{ OMR::arraysetStoreElimination },
{ OMR::checkcastAndProfiledGuardCoalescer },
{ OMR::jProfilingRecompLoopTest, OMR::IfLoops },
{ OMR::jProfilingValue, OMR::MustBeDone },
{ OMR::cheapTacticalGlobalRegisterAllocatorGroup, OMR::IfEnabled },
{ OMR::globalDeadStoreGroup, },
Expand Down Expand Up @@ -398,6 +401,7 @@ static const OptimizationStrategy reducedWarmStrategyOpts[] =
{ OMR::localCSE },
{ OMR::treeSimplification, OMR::MarkLastRun },
{ OMR::deadTreesElimination, OMR::IfEnabled }, // cleanup at the end
{ OMR::jProfilingRecompLoopTest, OMR::IfLoops },
{ OMR::jProfilingValue, OMR::MustBeDone },
{ OMR::cheapTacticalGlobalRegisterAllocatorGroup, OMR::IfEnabled },
{ OMR::endOpts }
Expand Down Expand Up @@ -459,6 +463,7 @@ const OptimizationStrategy hotStrategyOpts[] =
{ OMR::localValuePropagation, OMR::MarkLastRun },
{ OMR::arraycopyTransformation },
{ OMR::checkcastAndProfiledGuardCoalescer },
{ OMR::jProfilingRecompLoopTest, OMR::IfLoops },
{ OMR::jProfilingValue, OMR::MustBeDone },
{ OMR::tacticalGlobalRegisterAllocatorGroup, OMR::IfEnabled },
{ OMR::globalDeadStoreElimination, OMR::IfMoreThanOneBlock }, // global dead store removal
Expand Down Expand Up @@ -708,6 +713,7 @@ static const OptimizationStrategy cheapWarmStrategyOpts[] =
{ OMR::deadTreesElimination, OMR::IfEnabled }, // cleanup at the end
{ OMR::treeSimplification, OMR::IfEnabledMarkLastRun }, // Simplify non-normalized address computations introduced by prefetch insertion
{ OMR::trivialDeadTreeRemoval, OMR::IfEnabled }, // final cleanup before opcode expansion
{ OMR::jProfilingRecompLoopTest, OMR::IfLoops },
{ OMR::jProfilingValue, OMR::MustBeDone },
{ OMR::cheapTacticalGlobalRegisterAllocatorGroup, OMR::IfEnabled },
{ OMR::globalDeadStoreGroup, },
Expand Down Expand Up @@ -811,6 +817,8 @@ J9::Optimizer::Optimizer(TR::Compilation *comp, TR::ResolvedMethodSymbol *method
new (comp->allocator()) TR::OptimizationManager(self(), TR_OSRGuardRemoval::create, OMR::osrGuardRemoval);
_opts[OMR::jProfilingBlock] =
new (comp->allocator()) TR::OptimizationManager(self(), TR_JProfilingBlock::create, OMR::jProfilingBlock);
_opts[OMR::jProfilingRecompLoopTest] =
new (comp->allocator()) TR::OptimizationManager(self(), TR_JProfilingRecompLoopTest::create, OMR::jProfilingRecompLoopTest);
_opts[OMR::jProfilingValue] =
new (comp->allocator()) TR::OptimizationManager(self(), TR_JProfilingValue::create, OMR::jProfilingValue);
// NOTE: Please add new J9 optimizations here!
Expand Down
16 changes: 16 additions & 0 deletions runtime/compiler/optimizer/J9TransformUtil.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,22 @@
#include "ras/DebugCounter.hpp"
#include "j9.h"
#include "optimizer/OMROptimization_inlines.hpp"
#include "optimizer/Structure.hpp"

/**
* Walks the TR_RegionStructure counting loops to get the nesting depth of the block
*/
int32_t J9::TransformUtil::getLoopNestingDepth(TR::Compilation *comp, TR::Block *block)
{
TR_RegionStructure *region = block->getParentStructureIfExists(comp->getFlowGraph());
int32_t nestingDepth = 0;
while (region && region->isNaturalLoop())
{
nestingDepth++;
region = region->getParent();
}
return nestingDepth;
}

/*
* Generate trees for call to jitRetranslateCallerWithPrep to trigger recompilation from JIT-Compiled code.
Expand Down
1 change: 1 addition & 0 deletions runtime/compiler/optimizer/J9TransformUtil.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ class OMR_EXTENSIBLE TransformUtil : public OMR::TransformUtilConnector
{
public:
static TR::TreeTop *generateRetranslateCallerWithPrepTrees(TR::Node *node, TR_PersistentMethodInfo::InfoBits reason, TR::Compilation *comp);
static int32_t getLoopNestingDepth(TR::Compilation *comp, TR::Block *block);
static bool foldFinalFieldsIn(TR_OpaqueClassBlock *clazz, char *className, int32_t classNameLength, bool isStatic, TR::Compilation *comp);

static TR::Node *generateArrayElementShiftAmountTrees(
Expand Down
90 changes: 19 additions & 71 deletions runtime/compiler/optimizer/JProfilingBlock.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,6 @@
int32_t TR_JProfilingBlock::nestedLoopRecompileThreshold = 10;
int32_t TR_JProfilingBlock::loopRecompileThreshold = 250;
int32_t TR_JProfilingBlock::recompileThreshold = 500;
int32_t TR_JProfilingBlock::profilingCompileThreshold = 2;

/**
* Prim's algorithm to compute a Minimum Spanning Tree traverses the edges of the tree
Expand Down Expand Up @@ -831,9 +830,9 @@ void TR_JProfilingBlock::dumpCounterDependencies(TR_BitVector **componentCounter
* appropriate number of method entries has occurred as determined by the raw block
* count of the first block of the method.
*/
void TR_JProfilingBlock::addRecompilationTests(TR_BlockFrequencyInfo *blockFrequencyInfo, TR_BitVector **componentCounters)
void TR_JProfilingBlock::addRecompilationTests(TR_BlockFrequencyInfo *blockFrequencyInfo)
{
// add invocation check to the top of the method
// add invocation check to the top of the method
int32_t *thresholdLocation = NULL;
if (comp()->getMethodSymbol()->mayHaveNestedLoops())
thresholdLocation = &nestedLoopRecompileThreshold;
Expand All @@ -842,97 +841,47 @@ void TR_JProfilingBlock::addRecompilationTests(TR_BlockFrequencyInfo *blockFrequ
else
thresholdLocation = &recompileThreshold;

// Profiling compilations have a lower threshold, so that less time is
// spent running the high overhead implementation
if (comp()->isProfilingCompilation())
thresholdLocation = &profilingCompileThreshold;

int32_t startBlockNumber = comp()->getStartBlock()->getNumber();
blockFrequencyInfo->setEntryBlockNumber(startBlockNumber);

TR::Node *node = comp()->getMethodSymbol()->getFirstTreeTop()->getNode();

if (componentCounters[startBlockNumber * 2] && (((uintptr_t)componentCounters[startBlockNumber * 2]) & 0x1 == 1 || !componentCounters[startBlockNumber * 2]->isEmpty()))
TR::Node *root = blockFrequencyInfo->generateBlockRawCountCalculationSubTree(comp(), startBlockNumber, node);
bool isProfilingCompilation = comp()->isProfilingCompilation();
if (root != NULL)
{
TR::DebugCounter::incStaticDebugCounter(comp(), TR::DebugCounter::debugCounterName(comp(), "jprofiling.instrument/success/(%s)", comp()->signature()));
comp()->getFlowGraph()->setStructure(NULL);
// add the positive counters
TR::Node *addRoot = NULL;
if (((uintptr_t)componentCounters[startBlockNumber * 2]) & 0x1 == 1)
{
TR::SymbolReference *symRef = comp()->getSymRefTab()->createKnownStaticDataSymbolRef(blockFrequencyInfo->getFrequencyForBlock(((uintptr_t)componentCounters[startBlockNumber * 2]) >> 1), TR::Int32);
addRoot = TR::Node::createWithSymRef(node, TR::iload, 0, symRef);
}
else
{
TR_BitVectorIterator addBVI(*(componentCounters[startBlockNumber * 2]));
while (addBVI.hasMoreElements())
{
TR::SymbolReference *symRef = comp()->getSymRefTab()->createKnownStaticDataSymbolRef(blockFrequencyInfo->getFrequencyForBlock(addBVI.getNextElement()), TR::Int32);
TR::Node *counterLoad = TR::Node::createWithSymRef(node, TR::iload, 0, symRef);
if (addRoot)
addRoot = TR::Node::create(node, TR::iadd, 2, addRoot, counterLoad);
else
addRoot = counterLoad;
}
}
TR::Node *subRoot = NULL;
if (componentCounters[startBlockNumber * 2 + 1] != NULL)
{
if (((uintptr_t)componentCounters[startBlockNumber * 2 + 1]) & 0x1 == 1)
{
TR::SymbolReference *symRef = comp()->getSymRefTab()->createKnownStaticDataSymbolRef(blockFrequencyInfo->getFrequencyForBlock(((uintptr_t)componentCounters[startBlockNumber * 2 + 1]) >> 1), TR::Int32);
subRoot = TR::Node::createWithSymRef(node, TR::iload, 0, symRef);
}
else
{
TR_BitVectorIterator subBVI(*(componentCounters[startBlockNumber * 2 + 1]));
while (subBVI.hasMoreElements())
{
TR::SymbolReference *symRef = comp()->getSymRefTab()->createKnownStaticDataSymbolRef(blockFrequencyInfo->getFrequencyForBlock(subBVI.getNextElement()), TR::Int32);
TR::Node *counterLoad = TR::Node::createWithSymRef(node, TR::iload, 0, symRef);
if (subRoot)
{
subRoot = TR::Node::create(node, TR::isub, 2, subRoot, counterLoad);
}
else
{
subRoot = counterLoad;
}
}
}
}
TR::Node *root = addRoot;
if (subRoot)
{
root = TR::Node::create(node, TR::isub, 2, root, subRoot);
}

TR::Block * originalFirstBlock = comp()->getStartBlock();

TR::Block *guardBlock1 = TR::Block::createEmptyBlock(node, comp(), originalFirstBlock->getFrequency());
{
TR::SymbolReference *symRef = comp()->getSymRefTab()->createKnownStaticDataSymbolRef(blockFrequencyInfo->getEnableJProfilingRecompilation(), TR::Int32);
// If this is profiling compilation we do not need to check if jProfiling is enabled or not at runtime,
// In this case we only check if we have queued for recompilation before comparing against method invocation count.
int32_t *loadAddress = isProfilingCompilation ? blockFrequencyInfo->getIsQueuedForRecompilation() : blockFrequencyInfo->getEnableJProfilingRecompilation();
TR::SymbolReference *symRef = comp()->getSymRefTab()->createKnownStaticDataSymbolRef(loadAddress, TR::Int32);
TR::Node *enableLoad = TR::Node::createWithSymRef(node, TR::iload, 0, symRef);
TR::Node *enableTest = TR::Node::createif(TR::ificmpeq, enableLoad, TR::Node::iconst(node, 0), originalFirstBlock->getEntry());
TR::Node *enableTest = TR::Node::createif(TR::ificmpeq, enableLoad, TR::Node::iconst(node, -1), originalFirstBlock->getEntry());
TR::TreeTop *enableTree = TR::TreeTop::create(comp(), enableTest);
enableTest->setIsProfilingCode();
guardBlock1->append(enableTree);
}

static int32_t jProfilingCompileThreshold = comp()->getOptions()->getJProfilingMethodRecompThreshold();
if (trace())
traceMsg(comp(),"Profiling Compile Threshold for method = %d\n",isProfilingCompilation ? jProfilingCompileThreshold : *thresholdLocation);
TR::Block *guardBlock2 = TR::Block::createEmptyBlock(node, comp(), originalFirstBlock->getFrequency());
TR::Node *recompThreshold = TR::Node::createWithSymRef(node, TR::iload, 0, comp()->getSymRefTab()->createKnownStaticDataSymbolRef(thresholdLocation, TR::Int32));
TR::Node *recompThreshold = isProfilingCompilation ? TR::Node::iconst(node, jProfilingCompileThreshold) : TR::Node::createWithSymRef(node, TR::iload, 0, comp()->getSymRefTab()->createKnownStaticDataSymbolRef(thresholdLocation, TR::Int32));
TR::Node *cmpFlagNode = TR::Node::createif(TR::ificmplt, root, recompThreshold, originalFirstBlock->getEntry());
TR::TreeTop *cmpFlag = TR::TreeTop::create(comp(), cmpFlagNode);
cmpFlagNode->setIsProfilingCode();
guardBlock2->append(cmpFlag);

// construct call block
const char * const dc1 = TR::DebugCounter::debugCounterName(comp(),
"methodRecomp/(%s)", comp()->signature());
TR::Block *callRecompileBlock = TR::Block::createEmptyBlock(node, comp(), UNKNOWN_COLD_BLOCK_COUNT);
callRecompileBlock->setIsCold(true);
TR::TreeTop *callTree = TR::TransformUtil::generateRetranslateCallerWithPrepTrees(node, TR_PersistentMethodInfo::RecompDueToJProfiling, comp());
callTree->getNode()->setIsProfilingCode();
callRecompileBlock->append(callTree);
TR::DebugCounter::prependDebugCounter(comp(), dc1, callTree);

comp()->getRecompilationInfo()->getJittedBodyInfo()->setUsesJProfiling();
TR::CFG *cfg = comp()->getFlowGraph();
Expand Down Expand Up @@ -1156,10 +1105,9 @@ int32_t TR_JProfilingBlock::perform()

// dump counter dependency information
if (trace())
dumpCounterDependencies(componentCounters);

dumpCounterDependencies(componentCounters);
// modify the method to add tests to trigger recompilation at runtime
addRecompilationTests(blockFrequencyInfo, componentCounters);
addRecompilationTests(blockFrequencyInfo);
return 1;
}

Expand Down
4 changes: 1 addition & 3 deletions runtime/compiler/optimizer/JProfilingBlock.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,6 @@ class TR_JProfilingBlock : public TR::Optimization
static int32_t nestedLoopRecompileThreshold;
static int32_t loopRecompileThreshold;
static int32_t recompileThreshold;
static int32_t profilingCompileThreshold;
TR_JProfilingBlock(TR::OptimizationManager *manager)
: TR::Optimization(manager)
{}
Expand All @@ -62,13 +61,12 @@ class TR_JProfilingBlock : public TR::Optimization

virtual int32_t perform();
virtual const char * optDetailString() const throw();

protected:
void computeMinimumSpanningTree(BlockParents &parents, BlockPriorityQueue &Q, TR::StackMemoryRegion &stackMemoryRegion);
int32_t processCFGForCounting(BlockParents &parent, TR::BlockChecklist &countedBlocks, TR::CFGEdge &loopBack);
TR_BlockFrequencyInfo *initRecompDataStructures();
void dumpCounterDependencies(TR_BitVector **componentCounters);
void addRecompilationTests(TR_BlockFrequencyInfo *blockFrequencyInfo, TR_BitVector **componentCounters);
void addRecompilationTests(TR_BlockFrequencyInfo *blockFrequencyInfo);
};

#endif
Loading

0 comments on commit f497aa4

Please sign in to comment.