Skip to content
This repository has been archived by the owner on Jan 23, 2023. It is now read-only.

Commit

Permalink
Fix lsra memory consumption (#11233)
Browse files Browse the repository at this point in the history
* Rename ClearD to OldStyleClearD

* create fast ClearD function.

The new ClearD reuses existing buffer and assumes that epoch wasn't
changed.

* Reuse existing buffer for the predSet in lsra.

* Use fast ClearD in LSRA.

* Use fast ClearD in liveness

* Fix the comment

* delete the test from issues
  • Loading branch information
Sergey Andreenko authored Apr 28, 2017
1 parent e60e06f commit 70014f8
Show file tree
Hide file tree
Showing 14 changed files with 89 additions and 30 deletions.
2 changes: 1 addition & 1 deletion src/jit/assertionprop.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4556,7 +4556,7 @@ ASSERT_TP* Compiler::optInitAssertionDataflowFlags()
}
// Compute the data flow values for all tracked expressions
// IN and OUT never change for the initial basic block B1
BitVecOps::ClearD(apTraits, fgFirstBB->bbAssertionIn);
BitVecOps::OldStyleClearD(apTraits, fgFirstBB->bbAssertionIn);
return jumpDestOut;
}

Expand Down
11 changes: 10 additions & 1 deletion src/jit/bitset.h
Original file line number Diff line number Diff line change
Expand Up @@ -205,9 +205,13 @@ class BitSetOps
// Destructively set "bs" to be the empty set. This method is unique, in that it does *not*
// require "bs" to be a bitset of the current epoch. It ensures that it is after, however.
// (If the representation is indirect, this requires allocating a new, empty representation.
// If this is a performance issue, we could provide a new version of ClearD that assumes/asserts
// If this is a performance issue, we could provide a new version of OldStyleClearD that assumes/asserts
// that the rep is for the current epoch -- this would be useful if a given bitset were repeatedly
// cleared within an epoch.)
// TODO #11263: delete it.
static void OldStyleClearD(Env env, BitSetType& bs);

// Destructively set "bs" to be the empty set.
static void ClearD(Env env, BitSetType& bs);

// Returns a copy of "bs". If the representation of "bs" involves a level of indirection, the data
Expand Down Expand Up @@ -326,6 +330,11 @@ class BitSetOpsWithCounter
BitSetTraits::GetOpCounter(env)->RecordOp(BitSetSupport::BSOP_AssignNocopy);
BSO::AssignNoCopy(env, lhs, rhs);
}
static void OldStyleClearD(Env env, BitSetType& bs)
{
BitSetTraits::GetOpCounter(env)->RecordOp(BitSetSupport::BSOP_OldStyleClearD);
BSO::OldStyleClearD(env, bs);
}
static void ClearD(Env env, BitSetType& bs)
{
BitSetTraits::GetOpCounter(env)->RecordOp(BitSetSupport::BSOP_ClearD);
Expand Down
32 changes: 30 additions & 2 deletions src/jit/bitsetasshortlong.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ class BitSetOps</*BitSetType*/ BitSetShortLongRep,
static void DiffDLong(Env env, BitSetShortLongRep& bs1, BitSetShortLongRep bs2);
static void AddElemDLong(Env env, BitSetShortLongRep& bs, unsigned i);
static void RemoveElemDLong(Env env, BitSetShortLongRep& bs, unsigned i);
static void OldStyleClearDLong(Env env, BitSetShortLongRep& bs);
static void ClearDLong(Env env, BitSetShortLongRep& bs);
static BitSetShortLongRep MakeUninitArrayBits(Env env);
static BitSetShortLongRep MakeEmptyArrayBits(Env env);
Expand Down Expand Up @@ -122,6 +123,19 @@ class BitSetOps</*BitSetType*/ BitSetShortLongRep,
lhs = rhs;
}

static void OldStyleClearD(Env env, BitSetShortLongRep& bs)
{
if (IsShort(env))
{
bs = (BitSetShortLongRep) nullptr;
}
else
{
assert(bs != UninitVal());
OldStyleClearDLong(env, bs);
}
}

static void ClearD(Env env, BitSetShortLongRep& bs)
{
if (IsShort(env))
Expand Down Expand Up @@ -661,14 +675,28 @@ template <typename Env, typename BitSetTraits>
void BitSetOps</*BitSetType*/ BitSetShortLongRep,
/*Brand*/ BSShortLong,
/*Env*/ Env,
/*BitSetTraits*/ BitSetTraits>::ClearDLong(Env env, BitSetShortLongRep& bs)
/*BitSetTraits*/ BitSetTraits>::OldStyleClearDLong(Env env, BitSetShortLongRep& bs)
{
assert(!IsShort(env));
// Recall that ClearD does *not* require "bs" to be of the current epoch.
// Recall that OldStyleClearD does *not* require "bs" to be of the current epoch.
// Therefore, we must allocate a new representation.
bs = MakeEmptyArrayBits(env);
}

template <typename Env, typename BitSetTraits>
void BitSetOps</*BitSetType*/ BitSetShortLongRep,
/*Brand*/ BSShortLong,
/*Env*/ Env,
/*BitSetTraits*/ BitSetTraits>::ClearDLong(Env env, BitSetShortLongRep& bs)
{
assert(!IsShort(env));
unsigned len = BitSetTraits::GetArrSize(env, sizeof(size_t));
for (unsigned i = 0; i < len; i++)
{
bs[i] = 0;
}
}

template <typename Env, typename BitSetTraits>
BitSetShortLongRep BitSetOps</*BitSetType*/ BitSetShortLongRep,
/*Brand*/ BSShortLong,
Expand Down
5 changes: 5 additions & 0 deletions src/jit/bitsetasuint64.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,11 @@ class BitSetOps</*BitSetType*/ UINT64,
lhs = rhs;
}

static void OldStyleClearD(Env env, UINT64& bs)
{
bs = 0;
}

static void ClearD(Env env, UINT64& bs)
{
bs = 0;
Expand Down
17 changes: 14 additions & 3 deletions src/jit/bitsetasuint64inclass.h
Original file line number Diff line number Diff line change
Expand Up @@ -178,16 +178,22 @@ class BitSetUint64
return res;
}

inline void ClearD(Env env)
inline void OldStyleClearD(Env env)
{
// Recall that ClearD does *not* require "*this" to be of the current epoch.
Uint64BitSetOps::ClearD(env, m_bits);
// Recall that OldStyleClearD does *not* require "*this" to be of the current epoch.
Uint64BitSetOps::OldStyleClearD(env, m_bits);
#ifdef DEBUG
// But it updates it to of the current epoch.
m_epoch = BitSetTraits::GetEpoch(env);
#endif
}

inline void ClearD(Env env)
{
assert(m_epoch == BitSetTraits::GetEpoch(env));
Uint64BitSetOps::ClearD(env, m_bits);
}

inline bool IsEmpty(Env env) const
{
CheckEpoch(env);
Expand Down Expand Up @@ -369,6 +375,11 @@ class BitSetOps</*BitSetType*/ BitSetUint64<Env, BitSetTraits>,
lhs = rhs;
}

static void OldStyleClearD(Env env, BST& bs)
{
bs.OldStyleClearD(env);
}

static void ClearD(Env env, BST& bs)
{
bs.ClearD(env);
Expand Down
1 change: 1 addition & 0 deletions src/jit/bitsetops.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
BSOPNAME(BSOP_Assign)
BSOPNAME(BSOP_AssignAllowUninitRhs)
BSOPNAME(BSOP_AssignNocopy)
BSOPNAME(BSOP_OldStyleClearD)
BSOPNAME(BSOP_ClearD)
BSOPNAME(BSOP_MakeSingleton)
BSOPNAME(BSOP_MakeEmpty)
Expand Down
2 changes: 1 addition & 1 deletion src/jit/copyprop.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -296,7 +296,7 @@ void Compiler::optBlockCopyProp(BasicBlock* block, LclNumToGenTreePtrStack* curS
VarSetOps::Assign(this, compCurLife, block->bbLiveIn);
for (GenTreePtr stmt = block->bbTreeList; stmt; stmt = stmt->gtNext)
{
VarSetOps::ClearD(this, optCopyPropKillSet);
VarSetOps::OldStyleClearD(this, optCopyPropKillSet);

// Walk the tree to find if any local variable can be replaced with current live definitions.
for (GenTreePtr tree = stmt->gtStmt.gtStmtList; tree; tree = tree->gtNext)
Expand Down
6 changes: 3 additions & 3 deletions src/jit/emit.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1472,8 +1472,8 @@ void emitter::emitBegProlog()
/* Nothing is live on entry to the prolog */

// These were initialized to Empty at the start of compilation.
VarSetOps::ClearD(emitComp, emitInitGCrefVars);
VarSetOps::ClearD(emitComp, emitPrevGCrefVars);
VarSetOps::OldStyleClearD(emitComp, emitInitGCrefVars);
VarSetOps::OldStyleClearD(emitComp, emitPrevGCrefVars);
emitInitGCrefRegs = RBM_NONE;
emitPrevGCrefRegs = RBM_NONE;
emitInitByrefRegs = RBM_NONE;
Expand Down Expand Up @@ -4564,7 +4564,7 @@ unsigned emitter::emitEndCodeGen(Compiler* comp,

/* Assume no live GC ref variables on entry */

VarSetOps::ClearD(emitComp, emitThisGCrefVars); // This is initialized to Empty at the start of codegen.
VarSetOps::OldStyleClearD(emitComp, emitThisGCrefVars); // This is initialized to Empty at the start of codegen.
emitThisGCrefRegs = emitThisByrefRegs = RBM_NONE;
emitThisGCrefVset = true;

Expand Down
8 changes: 4 additions & 4 deletions src/jit/flowgraph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1815,9 +1815,9 @@ void Compiler::fgComputeReachabilitySets()

for (block = fgFirstBB; block != nullptr; block = block->bbNext)
{
// Initialize the per-block bbReach sets. (Note that we can't just call BlockSetOps::ClearD()
// when re-running this computation, because if the epoch changes, the size and representation of the
// sets might change).
// Initialize the per-block bbReach sets. It creates a new empty set,
// because the block epoch could change since the previous initialization
// and the old set could have wrong size.
block->bbReach = BlockSetOps::MakeEmpty(this);

/* Mark block as reaching itself */
Expand Down Expand Up @@ -10011,7 +10011,7 @@ void Compiler::fgCompactBlocks(BasicBlock* block, BasicBlock* bNext)
if (fgDomsComputed && block->bbNum > fgDomBBcount)
{
BlockSetOps::Assign(this, block->bbReach, bNext->bbReach);
BlockSetOps::ClearD(this, bNext->bbReach);
BlockSetOps::OldStyleClearD(this, bNext->bbReach);

block->bbIDom = bNext->bbIDom;
bNext->bbIDom = nullptr;
Expand Down
3 changes: 3 additions & 0 deletions src/jit/liveness.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -418,6 +418,8 @@ void Compiler::fgPerBlockLocalVarLiveness()
}
#endif // DEBUG

unsigned livenessVarEpoch = GetCurLVEpoch();

BasicBlock* block;

#if CAN_DISABLE_DFA
Expand Down Expand Up @@ -587,6 +589,7 @@ void Compiler::fgPerBlockLocalVarLiveness()
block->bbMemoryLiveIn = emptyMemoryKindSet;
}

noway_assert(livenessVarEpoch == GetCurLVEpoch());
#ifdef DEBUG
if (verbose)
{
Expand Down
20 changes: 14 additions & 6 deletions src/jit/lsra.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1317,6 +1317,8 @@ void LinearScan::setBlockSequence()
compiler->EnsureBasicBlockEpoch();
bbVisitedSet = BlockSetOps::MakeEmpty(compiler);
BlockSet BLOCKSET_INIT_NOCOPY(readySet, BlockSetOps::MakeEmpty(compiler));
BlockSet BLOCKSET_INIT_NOCOPY(predSet, BlockSetOps::MakeEmpty(compiler));

assert(blockSequence == nullptr && bbSeqCount == 0);
blockSequence = new (compiler, CMK_LSRA) BasicBlock*[compiler->fgBBcount];
bbNumMaxBeforeResolution = compiler->fgBBNumMax;
Expand Down Expand Up @@ -1400,7 +1402,7 @@ void LinearScan::setBlockSequence()
// (i.e. pred-first or random, since layout order is handled above).
if (!BlockSetOps::IsMember(compiler, readySet, succ->bbNum))
{
addToBlockSequenceWorkList(readySet, succ);
addToBlockSequenceWorkList(readySet, succ, predSet);
BlockSetOps::AddElemD(compiler, readySet, succ->bbNum);
}
}
Expand Down Expand Up @@ -1433,7 +1435,7 @@ void LinearScan::setBlockSequence()
{
if (!isBlockVisited(block))
{
addToBlockSequenceWorkList(readySet, block);
addToBlockSequenceWorkList(readySet, block, predSet);
BlockSetOps::AddElemD(compiler, readySet, block->bbNum);
}
}
Expand All @@ -1442,7 +1444,7 @@ void LinearScan::setBlockSequence()
{
if (!isBlockVisited(block))
{
addToBlockSequenceWorkList(readySet, block);
addToBlockSequenceWorkList(readySet, block, predSet);
BlockSetOps::AddElemD(compiler, readySet, block->bbNum);
}
}
Expand Down Expand Up @@ -1540,6 +1542,9 @@ int LinearScan::compareBlocksForSequencing(BasicBlock* block1, BasicBlock* block
// Arguments:
// sequencedBlockSet - the set of blocks that are already sequenced
// block - the new block to be added
// predSet - the buffer to save predecessors set. A block set allocated by the caller used here as a
// temporary block set for constructing a predecessor set. Allocated by the caller to avoid reallocating a new block
// set with every call to this function
//
// Return Value:
// None.
Expand All @@ -1561,13 +1566,13 @@ int LinearScan::compareBlocksForSequencing(BasicBlock* block1, BasicBlock* block
// Note also that, when random traversal order is implemented, this method
// should insert the blocks into the list in random order, so that we can always
// simply select the first block in the list.
void LinearScan::addToBlockSequenceWorkList(BlockSet sequencedBlockSet, BasicBlock* block)
void LinearScan::addToBlockSequenceWorkList(BlockSet sequencedBlockSet, BasicBlock* block, BlockSet& predSet)
{
// The block that is being added is not already sequenced
assert(!BlockSetOps::IsMember(compiler, sequencedBlockSet, block->bbNum));

// Get predSet of block
BlockSet BLOCKSET_INIT_NOCOPY(predSet, BlockSetOps::MakeEmpty(compiler));
BlockSetOps::ClearD(compiler, predSet);
flowList* pred;
for (pred = block->bbPreds; pred != nullptr; pred = pred->flNext)
{
Expand Down Expand Up @@ -1723,6 +1728,8 @@ void LinearScan::doLinearScan()
}
#endif // DEBUG

unsigned lsraBlockEpoch = compiler->GetCurBasicBlockEpoch();

splitBBNumToTargetBBNumMap = nullptr;

// This is complicated by the fact that physical registers have refs associated
Expand All @@ -1738,7 +1745,7 @@ void LinearScan::doLinearScan()

DBEXEC(VERBOSE, lsraDumpIntervals("after buildIntervals"));

BlockSetOps::ClearD(compiler, bbVisitedSet);
clearVisitedBlocks();
initVarRegMaps();
allocateRegisters();
compiler->EndPhase(PHASE_LINEAR_SCAN_ALLOC);
Expand All @@ -1759,6 +1766,7 @@ void LinearScan::doLinearScan()
DBEXEC(VERBOSE, TupleStyleDump(LSRA_DUMP_POST));

compiler->compLSRADone = true;
noway_assert(lsraBlockEpoch = compiler->GetCurBasicBlockEpoch());
}

//------------------------------------------------------------------------
Expand Down
2 changes: 1 addition & 1 deletion src/jit/lsra.h
Original file line number Diff line number Diff line change
Expand Up @@ -1131,7 +1131,7 @@ class LinearScan : public LinearScanInterface
int compareBlocksForSequencing(BasicBlock* block1, BasicBlock* block2, bool useBlockWeights);
BasicBlockList* blockSequenceWorkList;
bool blockSequencingDone;
void addToBlockSequenceWorkList(BlockSet sequencedBlockSet, BasicBlock* block);
void addToBlockSequenceWorkList(BlockSet sequencedBlockSet, BasicBlock* block, BlockSet& predSet);
void removeFromBlockSequenceWorkList(BasicBlockList* listNode, BasicBlockList* prevNode);
BasicBlock* getNextCandidateFromWorkList();

Expand Down
4 changes: 2 additions & 2 deletions src/jit/regalloc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1340,7 +1340,7 @@ regMaskTP Compiler::rpPredictRegPick(var_types type, rpPredictReg predictReg, re
while (iter.NextElem(this, &varNum))
{
// We'll need this for one of the calls...
VarSetOps::ClearD(this, varAsSet);
VarSetOps::OldStyleClearD(this, varAsSet);
VarSetOps::AddElemD(this, varAsSet, varNum);

// If this varBit and lastUse?
Expand Down Expand Up @@ -6348,7 +6348,7 @@ void Compiler::rpPredictRegUse()
/* Zero the variable/register interference graph */
for (unsigned i = 0; i < REG_COUNT; i++)
{
VarSetOps::ClearD(this, raLclRegIntf[i]);
VarSetOps::OldStyleClearD(this, raLclRegIntf[i]);
}

// if there are PInvoke calls and compLvFrameListRoot is enregistered,
Expand Down
6 changes: 0 additions & 6 deletions tests/issues.targets
Original file line number Diff line number Diff line change
Expand Up @@ -210,9 +210,6 @@
<ExcludeList Include="$(XunitTestBinBase)\JIT\Regression\Dev11\External\dev11_239804\ShowLocallocAlignment\ShowLocallocAlignment.cmd">
<Issue>7163, fails on both legacy backend and RyuJIT</Issue>
</ExcludeList>
<ExcludeList Include="$(XunitTestBinBase)\JIT\Regression\JitBlue\DevDiv_255294\DevDiv_255294\DevDiv_255294.cmd">
<Issue>The test is too large for x86 and causes OutOfMemory exception.</Issue>
</ExcludeList>
</ItemGroup>

<!-- Tests that need to be triaged for vararg usage as that is not supported -->
Expand Down Expand Up @@ -1133,9 +1130,6 @@
<ExcludeList Include="$(XunitTestBinBase)\JIT\Regression\Dev11\dev10_865840\dev10_865840\dev10_865840.cmd">
<Issue>needs triage</Issue>
</ExcludeList>
<ExcludeList Include="$(XunitTestBinBase)\JIT\Regression\JITBlue\DevDiv_255294\DevDiv_255294\DevDiv_255294.cmd">
<Issue>11142</Issue>
</ExcludeList>
<ExcludeList Include="$(XunitTestBinBase)\JIT\Regression\VS-ia64-JIT\V1.2-Beta1\b302509\b302509\b302509.cmd">
<Issue>needs triage</Issue>
</ExcludeList>
Expand Down

0 comments on commit 70014f8

Please sign in to comment.