From 3be5238a376cd4bca32db1acb732dc7c15091cdf Mon Sep 17 00:00:00 2001 From: Carol Eidt Date: Wed, 19 Feb 2020 15:52:22 -0800 Subject: [PATCH] Support Write-Thru of EH variables in LSRA (#543) * Support Write-Thru of EH variables in LSRA Mark EH variables (those that are live in or out of exception regions) only as lvLiveInOutOfHndlr, not necessarily lvDoNotEnregister During register allocation, mark these as write-thru, and mark all defs as write-thru, ensuring that the stack value is always valid. Mark those defs with GTF_SPILLED (this the "reload" flag and is not currently used for pure defs) to indicate that it should be kept in the register. Mark blocks that enter EH regions as having no predecessor, and set the location of all live-in vars to be on the stack. Change genFnPrologCalleeRegArgs to store EH vars also to the stack if they have a register assignment. Tuned throughput to compensate for extra processing by rearranging some fields and short-circuiting the physical register RefPositions during allocation. It is disabled by default --- src/coreclr/src/jit/codegencommon.cpp | 72 ++- src/coreclr/src/jit/codegenlinear.cpp | 94 ++- src/coreclr/src/jit/compiler.cpp | 31 + src/coreclr/src/jit/compiler.h | 9 +- src/coreclr/src/jit/gentree.cpp | 7 + src/coreclr/src/jit/instr.cpp | 1 + src/coreclr/src/jit/jitconfigvalues.h | 3 + src/coreclr/src/jit/lclvars.cpp | 47 +- src/coreclr/src/jit/lsra.cpp | 756 ++++++++++++++++-------- src/coreclr/src/jit/lsra.h | 75 ++- src/coreclr/src/jit/lsrabuild.cpp | 361 +++++++---- src/coreclr/src/jit/treelifeupdater.cpp | 16 +- 12 files changed, 1010 insertions(+), 462 deletions(-) diff --git a/src/coreclr/src/jit/codegencommon.cpp b/src/coreclr/src/jit/codegencommon.cpp index f15259334eb72..1af9bc029c5be 100644 --- a/src/coreclr/src/jit/codegencommon.cpp +++ b/src/coreclr/src/jit/codegencommon.cpp @@ -504,7 +504,10 @@ void CodeGenInterface::genUpdateRegLife(const LclVarDsc* varDsc, bool isBorn, bo } else { - assert((regSet.GetMaskVars() & regMask) == 0); + // If this is going live, the register must not have a variable in it, except + // in the case of an exception variable, which may be already treated as live + // in the register. + assert(varDsc->lvLiveInOutOfHndlr || ((regSet.GetMaskVars() & regMask) == 0)); regSet.AddMaskVars(regMask); } } @@ -681,12 +684,14 @@ void Compiler::compChangeLife(VARSET_VALARG_TP newLife) unsigned deadVarIndex = 0; while (deadIter.NextElem(&deadVarIndex)) { - unsigned varNum = lvaTrackedIndexToLclNum(deadVarIndex); - LclVarDsc* varDsc = lvaGetDesc(varNum); - bool isGCRef = (varDsc->TypeGet() == TYP_REF); - bool isByRef = (varDsc->TypeGet() == TYP_BYREF); + unsigned varNum = lvaTrackedIndexToLclNum(deadVarIndex); + LclVarDsc* varDsc = lvaGetDesc(varNum); + bool isGCRef = (varDsc->TypeGet() == TYP_REF); + bool isByRef = (varDsc->TypeGet() == TYP_BYREF); + bool isInReg = varDsc->lvIsInReg(); + bool isInMemory = !isInReg || varDsc->lvLiveInOutOfHndlr; - if (varDsc->lvIsInReg()) + if (isInReg) { // TODO-Cleanup: Move the code from compUpdateLifeVar to genUpdateRegLife that updates the // gc sets @@ -701,8 +706,8 @@ void Compiler::compChangeLife(VARSET_VALARG_TP newLife) } codeGen->genUpdateRegLife(varDsc, false /*isBorn*/, true /*isDying*/ DEBUGARG(nullptr)); } - // This isn't in a register, so update the gcVarPtrSetCur. - else if (isGCRef || isByRef) + // Update the gcVarPtrSetCur if it is in memory. + if (isInMemory && (isGCRef || isByRef)) { VarSetOps::RemoveElemD(this, codeGen->gcInfo.gcVarPtrSetCur, deadVarIndex); JITDUMP("\t\t\t\t\t\t\tV%02u becoming dead\n", varNum); @@ -724,13 +729,18 @@ void Compiler::compChangeLife(VARSET_VALARG_TP newLife) if (varDsc->lvIsInReg()) { -#ifdef DEBUG - if (VarSetOps::IsMember(this, codeGen->gcInfo.gcVarPtrSetCur, bornVarIndex)) + // If this variable is going live in a register, it is no longer live on the stack, + // unless it is an EH var, which always remains live on the stack. + if (!varDsc->lvLiveInOutOfHndlr) { - JITDUMP("\t\t\t\t\t\t\tRemoving V%02u from gcVarPtrSetCur\n", varNum); - } +#ifdef DEBUG + if (VarSetOps::IsMember(this, codeGen->gcInfo.gcVarPtrSetCur, bornVarIndex)) + { + JITDUMP("\t\t\t\t\t\t\tRemoving V%02u from gcVarPtrSetCur\n", varNum); + } #endif // DEBUG - VarSetOps::RemoveElemD(this, codeGen->gcInfo.gcVarPtrSetCur, bornVarIndex); + VarSetOps::RemoveElemD(this, codeGen->gcInfo.gcVarPtrSetCur, bornVarIndex); + } codeGen->genUpdateRegLife(varDsc, true /*isBorn*/, false /*isDying*/ DEBUGARG(nullptr)); regMaskTP regMask = varDsc->lvRegMask(); if (isGCRef) @@ -742,9 +752,9 @@ void Compiler::compChangeLife(VARSET_VALARG_TP newLife) codeGen->gcInfo.gcRegByrefSetCur |= regMask; } } - // This isn't in a register, so update the gcVarPtrSetCur else if (lvaIsGCTracked(varDsc)) { + // This isn't in a register, so update the gcVarPtrSetCur to show that it's live on the stack. VarSetOps::AddElemD(this, codeGen->gcInfo.gcVarPtrSetCur, bornVarIndex); JITDUMP("\t\t\t\t\t\t\tV%02u becoming live\n", varNum); } @@ -3269,6 +3279,7 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbere // 1 means the first part of a register argument // 2, 3 or 4 means the second,third or fourth part of a multireg argument bool stackArg; // true if the argument gets homed to the stack + bool writeThru; // true if the argument gets homed to both stack and register bool processed; // true after we've processed the argument (and it is in its final location) bool circular; // true if this register participates in a circular dependency loop. @@ -3605,6 +3616,7 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbere } regArgTab[regArgNum + i].processed = false; + regArgTab[regArgNum + i].writeThru = (varDsc->lvIsInReg() && varDsc->lvLiveInOutOfHndlr); /* mark stack arguments since we will take care of those first */ regArgTab[regArgNum + i].stackArg = (varDsc->lvIsInReg()) ? false : true; @@ -3765,9 +3777,9 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbere noway_assert(((regArgMaskLive & RBM_FLTARG_REGS) == 0) && "Homing of float argument registers with circular dependencies not implemented."); - /* Now move the arguments to their locations. - * First consider ones that go on the stack since they may - * free some registers. */ + // Now move the arguments to their locations. + // First consider ones that go on the stack since they may free some registers. + // Also home writeThru args, since they're also homed to the stack. regArgMaskLive = regState->rsCalleeRegArgMaskLiveIn; // reset the live in to what it was at the start for (argNum = 0; argNum < argMax; argNum++) @@ -3805,7 +3817,7 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbere // If this arg is never on the stack, go to the next one. if (varDsc->lvType == TYP_LONG) { - if (regArgTab[argNum].slot == 1 && !regArgTab[argNum].stackArg) + if (regArgTab[argNum].slot == 1 && !regArgTab[argNum].stackArg && !regArgTab[argNum].writeThru) { continue; } @@ -3839,7 +3851,7 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbere noway_assert(varDsc->lvIsParam); noway_assert(varDsc->lvIsRegArg); - noway_assert(varDsc->lvIsInReg() == false || + noway_assert(varDsc->lvIsInReg() == false || varDsc->lvLiveInOutOfHndlr || (varDsc->lvType == TYP_LONG && varDsc->GetOtherReg() == REG_STK && regArgTab[argNum].slot == 2)); var_types storeType = TYP_UNDEF; @@ -3906,13 +3918,17 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbere #endif // USING_SCOPE_INFO } - /* mark the argument as processed */ - - regArgTab[argNum].processed = true; - regArgMaskLive &= ~genRegMask(srcRegNum); + // Mark the argument as processed, and set it as no longer live in srcRegNum, + // unless it is a writeThru var, in which case we home it to the stack, but + // don't mark it as processed until below. + if (!regArgTab[argNum].writeThru) + { + regArgTab[argNum].processed = true; + regArgMaskLive &= ~genRegMask(srcRegNum); + } #if defined(TARGET_ARM) - if (storeType == TYP_DOUBLE) + if ((storeType == TYP_DOUBLE) && !regArgTab[argNum].writeThru) { regArgTab[argNum + 1].processed = true; regArgMaskLive &= ~genRegMask(REG_NEXT(srcRegNum)); @@ -4618,7 +4634,7 @@ void CodeGen::genCheckUseBlockInit() { if (!varDsc->lvRegister) { - if (!varDsc->lvIsInReg()) + if (!varDsc->lvIsInReg() || varDsc->lvLiveInOutOfHndlr) { // Var is on the stack at entry. initStkLclCnt += @@ -7233,7 +7249,9 @@ void CodeGen::genFnProlog() continue; } - if (varDsc->lvIsInReg()) + bool isInReg = varDsc->lvIsInReg(); + bool isInMemory = !isInReg || varDsc->lvLiveInOutOfHndlr; + if (isInReg) { regMaskTP regMask = genRegMask(varDsc->GetRegNum()); if (!varDsc->IsFloatRegType()) @@ -7264,7 +7282,7 @@ void CodeGen::genFnProlog() initFltRegs |= regMask; } } - else + if (isInMemory) { INIT_STK: diff --git a/src/coreclr/src/jit/codegenlinear.cpp b/src/coreclr/src/jit/codegenlinear.cpp index c6f7b6f8c483c..e3973ba565119 100644 --- a/src/coreclr/src/jit/codegenlinear.cpp +++ b/src/coreclr/src/jit/codegenlinear.cpp @@ -239,15 +239,18 @@ void CodeGen::genCodeForBBlist() { newRegByrefSet |= varDsc->lvRegMask(); } -#ifdef DEBUG - if (verbose && VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varIndex)) + if (!varDsc->lvLiveInOutOfHndlr) { - VarSetOps::AddElemD(compiler, removedGCVars, varIndex); - } +#ifdef DEBUG + if (verbose && VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varIndex)) + { + VarSetOps::AddElemD(compiler, removedGCVars, varIndex); + } #endif // DEBUG - VarSetOps::RemoveElemD(compiler, gcInfo.gcVarPtrSetCur, varIndex); + VarSetOps::RemoveElemD(compiler, gcInfo.gcVarPtrSetCur, varIndex); + } } - else if (compiler->lvaIsGCTracked(varDsc)) + if ((!varDsc->lvIsInReg() || varDsc->lvLiveInOutOfHndlr) && compiler->lvaIsGCTracked(varDsc)) { #ifdef DEBUG if (verbose && !VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varIndex)) @@ -823,10 +826,20 @@ void CodeGen::genSpillVar(GenTree* tree) var_types lclTyp = genActualType(varDsc->TypeGet()); emitAttr size = emitTypeSize(lclTyp); - instruction storeIns = ins_Store(lclTyp, compiler->isSIMDTypeLocalAligned(varNum)); - assert(varDsc->GetRegNum() == tree->GetRegNum()); - inst_TT_RV(storeIns, size, tree, tree->GetRegNum()); + // If this is a write-thru variable, we don't actually spill at a use, but we will kill the var in the reg + // (below). + if (!varDsc->lvLiveInOutOfHndlr) + { + instruction storeIns = ins_Store(lclTyp, compiler->isSIMDTypeLocalAligned(varNum)); + assert(varDsc->GetRegNum() == tree->GetRegNum()); + inst_TT_RV(storeIns, size, tree, tree->GetRegNum()); + } + // We should only have both GTF_SPILL (i.e. the flag causing this method to be called) and + // GTF_SPILLED on a write-thru def, for which we should not be calling this method. + assert((tree->gtFlags & GTF_SPILLED) == 0); + + // Remove the live var from the register. genUpdateRegLife(varDsc, /*isBorn*/ false, /*isDying*/ true DEBUGARG(tree)); gcInfo.gcMarkRegSetNpt(varDsc->lvRegMask()); @@ -847,10 +860,19 @@ void CodeGen::genSpillVar(GenTree* tree) } tree->gtFlags &= ~GTF_SPILL; - varDsc->SetRegNum(REG_STK); - if (varTypeIsMultiReg(tree)) + // If this is NOT a write-thru, reset the var location. + if ((tree->gtFlags & GTF_SPILLED) == 0) { - varDsc->SetOtherReg(REG_STK); + varDsc->SetRegNum(REG_STK); + if (varTypeIsMultiReg(tree)) + { + varDsc->SetOtherReg(REG_STK); + } + } + else + { + // We only have 'GTF_SPILL' and 'GTF_SPILLED' on a def of a write-thru lclVar. + assert(varDsc->lvLiveInOutOfHndlr && ((tree->gtFlags & GTF_VAR_DEF) != 0)); } #ifdef USING_VARIABLE_LIVE_RANGE @@ -1030,13 +1052,16 @@ void CodeGen::genUnspillRegIfNeeded(GenTree* tree) } #endif // USING_VARIABLE_LIVE_RANGE -#ifdef DEBUG - if (VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex)) + if (!varDsc->lvLiveInOutOfHndlr) { - JITDUMP("\t\t\t\t\t\t\tRemoving V%02u from gcVarPtrSetCur\n", lcl->GetLclNum()); - } +#ifdef DEBUG + if (VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex)) + { + JITDUMP("\t\t\t\t\t\t\tRemoving V%02u from gcVarPtrSetCur\n", lcl->GetLclNum()); + } #endif // DEBUG - VarSetOps::RemoveElemD(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex); + VarSetOps::RemoveElemD(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex); + } #ifdef DEBUG if (compiler->verbose) @@ -1316,15 +1341,15 @@ regNumber CodeGen::genConsumeReg(GenTree* tree) LclVarDsc* varDsc = &compiler->lvaTable[lcl->GetLclNum()]; assert(varDsc->lvLRACandidate); - if ((tree->gtFlags & GTF_VAR_DEATH) != 0) - { - gcInfo.gcMarkRegSetNpt(genRegMask(varDsc->GetRegNum())); - } - else if (varDsc->GetRegNum() == REG_STK) + if (varDsc->GetRegNum() == REG_STK) { // We have loaded this into a register only temporarily gcInfo.gcMarkRegSetNpt(genRegMask(tree->GetRegNum())); } + else if ((tree->gtFlags & GTF_VAR_DEATH) != 0) + { + gcInfo.gcMarkRegSetNpt(genRegMask(varDsc->GetRegNum())); + } } else { @@ -1852,13 +1877,24 @@ void CodeGen::genProduceReg(GenTree* tree) if (genIsRegCandidateLocal(tree)) { - // Store local variable to its home location. - // Ensure that lclVar stores are typed correctly. - unsigned varNum = tree->AsLclVarCommon()->GetLclNum(); - assert(!compiler->lvaTable[varNum].lvNormalizeOnStore() || - (tree->TypeGet() == genActualType(compiler->lvaTable[varNum].TypeGet()))); - inst_TT_RV(ins_Store(tree->gtType, compiler->isSIMDTypeLocalAligned(varNum)), emitTypeSize(tree->TypeGet()), - tree, tree->GetRegNum()); + unsigned varNum = tree->AsLclVarCommon()->GetLclNum(); + LclVarDsc* varDsc = compiler->lvaGetDesc(varNum); + assert(!varDsc->lvNormalizeOnStore() || (tree->TypeGet() == genActualType(varDsc->TypeGet()))); + + // If we reach here, we have a register candidate local that is marked with GTF_SPILL. + // This flag generally means that we need to spill this local. + // The exception is the case of a use of an EH var use that is being "spilled" + // to the stack, indicated by GTF_SPILL (note that all EH lclVar defs are always + // spilled, i.e. write-thru). + // An EH var use is always valid on the stack (so we don't need to actually spill it), + // but the GTF_SPILL flag records the fact that the register value is going dead. + if (((tree->gtFlags & GTF_VAR_DEF) != 0) || !varDsc->lvLiveInOutOfHndlr) + { + // Store local variable to its home location. + // Ensure that lclVar stores are typed correctly. + inst_TT_RV(ins_Store(tree->gtType, compiler->isSIMDTypeLocalAligned(varNum)), + emitTypeSize(tree->TypeGet()), tree, tree->GetRegNum()); + } } else { diff --git a/src/coreclr/src/jit/compiler.cpp b/src/coreclr/src/jit/compiler.cpp index c066d2b1be9f1..7eaf9f5cf3258 100644 --- a/src/coreclr/src/jit/compiler.cpp +++ b/src/coreclr/src/jit/compiler.cpp @@ -4522,6 +4522,37 @@ void Compiler::compCompile(void** methodCodePtr, ULONG* methodCodeSize, JitFlags EndPhase(PHASE_CLONE_FINALLY); +#if DEBUG + if (lvaEnregEHVars) + { + unsigned methHash = info.compMethodHash(); + char* lostr = getenv("JitEHWTHashLo"); + unsigned methHashLo = 0; + bool dump = false; + if (lostr != nullptr) + { + sscanf_s(lostr, "%x", &methHashLo); + dump = true; + } + char* histr = getenv("JitEHWTHashHi"); + unsigned methHashHi = UINT32_MAX; + if (histr != nullptr) + { + sscanf_s(histr, "%x", &methHashHi); + dump = true; + } + if (methHash < methHashLo || methHash > methHashHi) + { + lvaEnregEHVars = false; + } + else if (dump) + { + printf("Enregistering EH Vars for method %s, hash = 0x%x.\n", info.compFullName, info.compMethodHash()); + printf(""); // flush + } + } +#endif + // Compute bbNum, bbRefs and bbPreds // JITDUMP("\nRenumbering the basic blocks for fgComputePreds\n"); diff --git a/src/coreclr/src/jit/compiler.h b/src/coreclr/src/jit/compiler.h index d8aff229e8156..0e55e8172f69b 100644 --- a/src/coreclr/src/jit/compiler.h +++ b/src/coreclr/src/jit/compiler.h @@ -415,6 +415,8 @@ class LclVarDsc unsigned char lvDoNotEnregister : 1; // Do not enregister this variable. unsigned char lvFieldAccessed : 1; // The var is a struct local, and a field of the variable is accessed. Affects // struct promotion. + unsigned char lvLiveInOutOfHndlr : 1; // The variable is live in or out of an exception handler, and therefore must + // be on the stack (at least at those boundaries.) unsigned char lvInSsa : 1; // The variable is in SSA form (set by SsaBuilder) @@ -424,9 +426,6 @@ class LclVarDsc // also, lvType == TYP_STRUCT prevents enregistration. At least one of the reasons should be true. unsigned char lvVMNeedsStackAddr : 1; // The VM may have access to a stack-relative address of the variable, and // read/write its value. - unsigned char lvLiveInOutOfHndlr : 1; // The variable was live in or out of an exception handler, and this required - // the variable to be - // in the stack (at least at those boundaries.) unsigned char lvLclFieldExpr : 1; // The variable is not a struct, but was accessed like one (e.g., reading a // particular byte from an int). unsigned char lvLclBlockOpAddr : 1; // The variable was written to via a block operation that took its address. @@ -3005,6 +3004,9 @@ class Compiler void lvaSetVarAddrExposed(unsigned varNum); void lvaSetVarLiveInOutOfHandler(unsigned varNum); bool lvaVarDoNotEnregister(unsigned varNum); + + bool lvaEnregEHVars; + #ifdef DEBUG // Reasons why we can't enregister. Some of these correspond to debug properties of local vars. enum DoNotEnregisterReason @@ -3027,6 +3029,7 @@ class Compiler DNER_PinningRef, #endif }; + #endif void lvaSetVarDoNotEnregister(unsigned varNum DEBUGARG(DoNotEnregisterReason reason)); diff --git a/src/coreclr/src/jit/gentree.cpp b/src/coreclr/src/jit/gentree.cpp index a9aac9d6d4c22..05c6e0e65f71e 100644 --- a/src/coreclr/src/jit/gentree.cpp +++ b/src/coreclr/src/jit/gentree.cpp @@ -2755,6 +2755,13 @@ bool Compiler::gtIsLikelyRegVar(GenTree* tree) return false; } + // If this is an EH-live var, return false if it is a def, + // as it will have to go to memory. + if (varDsc->lvLiveInOutOfHndlr && ((tree->gtFlags & GTF_VAR_DEF) != 0)) + { + return false; + } + // Be pessimistic if ref counts are not yet set up. // // Perhaps we should be optimistic though. diff --git a/src/coreclr/src/jit/instr.cpp b/src/coreclr/src/jit/instr.cpp index 7a0a6c63f37fd..2ee631fceeff8 100644 --- a/src/coreclr/src/jit/instr.cpp +++ b/src/coreclr/src/jit/instr.cpp @@ -662,6 +662,7 @@ void CodeGen::inst_TT_RV(instruction ins, emitAttr size, GenTree* tree, regNumbe #ifdef DEBUG // The tree must have a valid register value. assert(reg != REG_STK); + bool isValidInReg = ((tree->gtFlags & GTF_SPILLED) == 0); if (!isValidInReg) { diff --git a/src/coreclr/src/jit/jitconfigvalues.h b/src/coreclr/src/jit/jitconfigvalues.h index 688008f69717d..d3a888d79ad7f 100644 --- a/src/coreclr/src/jit/jitconfigvalues.h +++ b/src/coreclr/src/jit/jitconfigvalues.h @@ -244,6 +244,9 @@ CONFIG_INTEGER(EnablePOPCNT, W("EnablePOPCNT"), 1) // Enable POPCNT CONFIG_INTEGER(EnableAVX, W("EnableAVX"), 0) #endif // !defined(TARGET_AMD64) && !defined(TARGET_X86) +CONFIG_INTEGER(EnableEHWriteThru, W("EnableEHWriteThru"), 0) // Enable the register allocator to support EH-write thru: + // partial enregistration of vars exposed on EH boundaries + // clang-format off #if defined(TARGET_ARM64) diff --git a/src/coreclr/src/jit/lclvars.cpp b/src/coreclr/src/jit/lclvars.cpp index 4a5b2f1132efb..f38437012ff2b 100644 --- a/src/coreclr/src/jit/lclvars.cpp +++ b/src/coreclr/src/jit/lclvars.cpp @@ -85,6 +85,8 @@ void Compiler::lvaInit() lvaCurEpoch = 0; structPromotionHelper = new (this, CMK_Generic) StructPromotionHelper(this); + + lvaEnregEHVars = (((opts.compFlags & CLFLG_REGVAR) != 0) && JitConfig.EnableEHWriteThru()); } /*****************************************************************************/ @@ -2377,9 +2379,11 @@ void Compiler::lvaSetVarAddrExposed(unsigned varNum) // void Compiler::lvaSetVarLiveInOutOfHandler(unsigned varNum) { - LclVarDsc* varDsc = lvaGetDesc(varNum); + noway_assert(varNum < lvaCount); - INDEBUG(varDsc->lvLiveInOutOfHndlr = 1); + LclVarDsc* varDsc = &lvaTable[varNum]; + + varDsc->lvLiveInOutOfHndlr = 1; if (varDsc->lvPromoted) { @@ -2388,12 +2392,27 @@ void Compiler::lvaSetVarLiveInOutOfHandler(unsigned varNum) for (unsigned i = varDsc->lvFieldLclStart; i < varDsc->lvFieldLclStart + varDsc->lvFieldCnt; ++i) { noway_assert(lvaTable[i].lvIsStructField); - INDEBUG(lvaTable[i].lvLiveInOutOfHndlr = 1); - lvaSetVarDoNotEnregister(i DEBUGARG(DNER_LiveInOutOfHandler)); + lvaTable[i].lvLiveInOutOfHndlr = 1; + if (!lvaEnregEHVars) + { + lvaSetVarDoNotEnregister(i DEBUGARG(DNER_LiveInOutOfHandler)); + } } } - lvaSetVarDoNotEnregister(varNum DEBUGARG(DNER_LiveInOutOfHandler)); + if (!lvaEnregEHVars) + { + lvaSetVarDoNotEnregister(varNum DEBUGARG(DNER_LiveInOutOfHandler)); + } +#ifdef JIT32_GCENCODER + else if (lvaKeepAliveAndReportThis() && (varNum == info.compThisArg)) + { + // For the JIT32_GCENCODER, when lvaKeepAliveAndReportThis is true, we must either keep the "this" pointer + // in the same register for the entire method, or keep it on the stack. If it is EH-exposed, we can't ever + // keep it in a register, since it must also be live on the stack. Therefore, we won't attempt to allocate it. + lvaSetVarDoNotEnregister(varNum DEBUGARG(DNER_LiveInOutOfHandler)); + } +#endif // JIT32_GCENCODER } /***************************************************************************** @@ -4107,8 +4126,20 @@ void Compiler::lvaComputeRefCounts(bool isRecompute, bool setSlotNumbers) case GT_STORE_LCL_VAR: case GT_STORE_LCL_FLD: { - const unsigned lclNum = node->AsLclVarCommon()->GetLclNum(); - lvaTable[lclNum].incRefCnts(weight, this); + LclVarDsc* varDsc = lvaGetDesc(node->AsLclVarCommon()); + // If this is an EH var, use a zero weight for defs, so that we don't + // count those in our heuristic for register allocation, since they always + // must be stored, so there's no value in enregistering them at defs; only + // if there are enough uses to justify it. + if (varDsc->lvLiveInOutOfHndlr && !varDsc->lvDoNotEnregister && + ((node->gtFlags & GTF_VAR_DEF) != 0)) + { + varDsc->incRefCnts(0, this); + } + else + { + varDsc->incRefCnts(weight, this); + } break; } @@ -6826,7 +6857,7 @@ void Compiler::lvaDumpEntry(unsigned lclNum, FrameLayoutState curState, size_t r { printf("V"); } - if (varDsc->lvLiveInOutOfHndlr) + if (lvaEnregEHVars && varDsc->lvLiveInOutOfHndlr) { printf("H"); } diff --git a/src/coreclr/src/jit/lsra.cpp b/src/coreclr/src/jit/lsra.cpp index c2b03ace694c4..c3f647e3561d3 100644 --- a/src/coreclr/src/jit/lsra.cpp +++ b/src/coreclr/src/jit/lsra.cpp @@ -186,7 +186,16 @@ unsigned LinearScan::getWeight(RefPosition* refPos) if (refPos->getInterval()->isSpilled) { // Decrease the weight if the interval has already been spilled. - weight -= BB_UNITY_WEIGHT; + if (varDsc->lvLiveInOutOfHndlr) + { + // An EH var is always spilled at defs, and we'll decrease the weight by half, + // since only the reload is needed. + weight = weight / 2; + } + else + { + weight -= BB_UNITY_WEIGHT; + } } } else @@ -613,6 +622,7 @@ LinearScan::LinearScan(Compiler* theCompiler) { #ifdef DEBUG maxNodeLocation = 0; + firstColdLoc = MaxLocation; activeRefPosition = nullptr; // Get the value of the environment variable that controls stress for register allocation @@ -823,6 +833,7 @@ void LinearScan::setBlockSequence() blockInfo[block->bbNum].weight = block->getBBWeight(compiler); blockInfo[block->bbNum].hasEHBoundaryIn = block->hasEHBoundaryIn(); blockInfo[block->bbNum].hasEHBoundaryOut = block->hasEHBoundaryOut(); + blockInfo[block->bbNum].hasEHPred = false; #if TRACK_LSRA_STATS blockInfo[block->bbNum].spillCount = 0; @@ -847,12 +858,25 @@ void LinearScan::setBlockSequence() assert(!"Switch with single successor"); } } - if (block->isBBCallAlwaysPairTail() || (hasUniquePred && predBlock->hasEHBoundaryOut())) + + // We treat BBCallAlwaysPairTail blocks as having EH flow, since we can't + // insert resolution moves into those blocks. + if (block->isBBCallAlwaysPairTail()) { - // Treat this as having incoming EH flow, since we can't insert resolution moves into - // the ALWAYS block of a BBCallAlwaysPair, and a unique pred with an EH out edge won't - // allow us to keep any variables enregistered. - blockInfo[block->bbNum].hasEHBoundaryIn = true; + blockInfo[block->bbNum].hasEHBoundaryIn = true; + blockInfo[block->bbNum].hasEHBoundaryOut = true; + } + else if (predBlock->hasEHBoundaryOut() || predBlock->isBBCallAlwaysPairTail()) + { + if (hasUniquePred) + { + // A unique pred with an EH out edge won't allow us to keep any variables enregistered. + blockInfo[block->bbNum].hasEHBoundaryIn = true; + } + else + { + blockInfo[block->bbNum].hasEHPred = true; + } } } @@ -974,6 +998,10 @@ void LinearScan::setBlockSequence() { JITDUMP(" EH-out"); } + if (blockInfo[block->bbNum].hasEHPred) + { + JITDUMP(" has EH pred"); + } JITDUMP("\n"); } JITDUMP("\n"); @@ -1351,9 +1379,6 @@ void Interval::setLocalNumber(Compiler* compiler, unsigned lclNum, LinearScan* l // void LinearScan::identifyCandidatesExceptionDataflow() { -#ifdef DEBUG - VARSET_TP finallyVars(VarSetOps::MakeEmpty(compiler)); -#endif BasicBlock* block; foreach_block(compiler, block) @@ -1367,15 +1392,13 @@ void LinearScan::identifyCandidatesExceptionDataflow() if (block->hasEHBoundaryOut()) { VarSetOps::UnionD(compiler, exceptVars, block->bbLiveOut); -#ifdef DEBUG if (block->bbJumpKind == BBJ_EHFINALLYRET) { - // live on exit from finally. + // Live on exit from finally. // We track these separately because, in addition to having EH live-out semantics, - // we want to verify that they are must-init. + // we need to mark them must-init. VarSetOps::UnionD(compiler, finallyVars, block->bbLiveOut); } -#endif } } @@ -1559,6 +1582,7 @@ void LinearScan::identifyCandidates() } VarSetOps::AssignNoCopy(compiler, exceptVars, VarSetOps::MakeEmpty(compiler)); + VarSetOps::AssignNoCopy(compiler, finallyVars, VarSetOps::MakeEmpty(compiler)); if (compiler->compHndBBtabCount > 0) { identifyCandidatesExceptionDataflow(); @@ -1719,6 +1743,12 @@ void LinearScan::identifyCandidates() newInt->isStructField = true; } + if (varDsc->lvLiveInOutOfHndlr) + { + newInt->isWriteThru = true; + setIntervalAsSpilled(newInt); + } + INTRACK_STATS(regCandidateVarCount++); // We maintain two sets of FP vars - those that meet the first threshold of weighted ref Count, @@ -2156,6 +2186,11 @@ void LinearScan::checkLastUses(BasicBlock* block) VARSET_TP liveInNotComputedLive(VarSetOps::Diff(compiler, block->bbLiveIn, computedLive)); + // We may have exception vars in the liveIn set of exception blocks that are not computed live. + if (compiler->ehBlockHasExnFlowDsc(block)) + { + VarSetOps::DiffD(compiler, liveInNotComputedLive, compiler->fgGetHandlerLiveVars(block)); + } VarSetOps::Iter liveInNotComputedLiveIter(compiler, liveInNotComputedLive); unsigned liveInNotComputedLiveIndex = 0; while (liveInNotComputedLiveIter.NextElem(&liveInNotComputedLiveIndex)) @@ -2287,13 +2322,20 @@ BasicBlock* LinearScan::findPredBlockForLiveIn(BasicBlock* block, // | // block // - for (flowList* pred = otherBlock->bbPreds; pred != nullptr; pred = pred->flNext) + if (blockInfo[otherBlock->bbNum].hasEHBoundaryIn) + { + return nullptr; + } + else { - BasicBlock* otherPred = pred->flBlock; - if (otherPred->bbNum == blockInfo[otherBlock->bbNum].predBBNum) + for (flowList* pred = otherBlock->bbPreds; pred != nullptr; pred = pred->flNext) { - predBlock = otherPred; - break; + BasicBlock* otherPred = pred->flBlock; + if (otherPred->bbNum == blockInfo[otherBlock->bbNum].predBBNum) + { + predBlock = otherPred; + break; + } } } } @@ -2498,9 +2540,7 @@ bool LinearScan::registerIsAvailable(RegRecord* physRegRecord, LsraLocation* nextRefLocationPtr, RegisterType regType) { - *nextRefLocationPtr = MaxLocation; LsraLocation nextRefLocation = MaxLocation; - regMaskTP regMask = genRegMask(physRegRecord->regNum); if (physRegRecord->isBusyUntilNextKill) { return false; @@ -2510,12 +2550,12 @@ bool LinearScan::registerIsAvailable(RegRecord* physRegRecord, if (nextPhysReference != nullptr) { nextRefLocation = nextPhysReference->nodeLocation; - // if (nextPhysReference->refType == RefTypeFixedReg) nextRefLocation--; } else if (!physRegRecord->isCalleeSave) { nextRefLocation = MaxLocation - 1; } + *nextRefLocationPtr = nextRefLocation; Interval* assignedInterval = physRegRecord->assignedInterval; @@ -2546,7 +2586,8 @@ bool LinearScan::registerIsAvailable(RegRecord* physRegRecord, // (Note that it is unlikely that we have a recent copy or move to a different register, // where this physRegRecord is still pointing at an earlier copy or move, but it is possible, // especially in stress modes.) - if ((recentReference->registerAssignment == regMask) && copyOrMoveRegInUse(recentReference, currentLoc)) + if ((recentReference->registerAssignment == genRegMask(physRegRecord->regNum)) && + copyOrMoveRegInUse(recentReference, currentLoc)) { return false; } @@ -2569,12 +2610,13 @@ bool LinearScan::registerIsAvailable(RegRecord* physRegRecord, { if (nextReference->nodeLocation < nextRefLocation) { - nextRefLocation = nextReference->nodeLocation; + *nextRefLocationPtr = nextReference->nodeLocation; } } else { - assert(recentReference->copyReg && recentReference->registerAssignment != regMask); + assert(recentReference->copyReg && + (recentReference->registerAssignment != genRegMask(physRegRecord->regNum))); } } else @@ -2582,10 +2624,6 @@ bool LinearScan::registerIsAvailable(RegRecord* physRegRecord, return false; } } - if (nextRefLocation < *nextRefLocationPtr) - { - *nextRefLocationPtr = nextRefLocation; - } #ifdef TARGET_ARM if (regType == TYP_DOUBLE) @@ -2593,11 +2631,10 @@ bool LinearScan::registerIsAvailable(RegRecord* physRegRecord, // Recurse, but check the other half this time (TYP_FLOAT) if (!registerIsAvailable(findAnotherHalfRegRec(physRegRecord), currentLoc, nextRefLocationPtr, TYP_FLOAT)) return false; - nextRefLocation = *nextRefLocationPtr; } #endif // TARGET_ARM - return (nextRefLocation >= currentLoc); + return true; } //------------------------------------------------------------------------ @@ -2794,8 +2831,11 @@ regNumber LinearScan::tryAllocateFreeReg(Interval* currentInterval, RefPosition* Interval* rangeEndInterval = relatedInterval; regMaskTP relatedPreferences = (relatedInterval == nullptr) ? RBM_NONE : relatedInterval->getCurrentPreferences(); LsraLocation rangeEndLocation = refPosition->getRangeEndLocation(); - bool preferCalleeSave = currentInterval->preferCalleeSave; - bool avoidByteRegs = false; + LsraLocation relatedLastLocation = rangeEndLocation; + + bool preferCalleeSave = currentInterval->preferCalleeSave; + + bool avoidByteRegs = false; #ifdef TARGET_X86 if ((relatedPreferences & ~RBM_BYTE_REGS) != RBM_NONE) { @@ -2863,6 +2903,11 @@ regNumber LinearScan::tryAllocateFreeReg(Interval* currentInterval, RefPosition* rangeEndRefPosition = refPosition; preferCalleeSave = currentInterval->preferCalleeSave; } + else if (currentInterval->isWriteThru && refPosition->spillAfter) + { + // This is treated as a last use of the register, as there is an upcoming EH boundary. + rangeEndRefPosition = refPosition; + } else { rangeEndRefPosition = refPosition->getRangeEndRef(); @@ -2870,11 +2915,37 @@ regNumber LinearScan::tryAllocateFreeReg(Interval* currentInterval, RefPosition* // is not currently occupying a register, and whose lifetime begins after this one, // we want to try to select a register that will cover its lifetime. if ((rangeEndInterval != nullptr) && (rangeEndInterval->assignedReg == nullptr) && + !rangeEndInterval->isWriteThru && (rangeEndInterval->getNextRefLocation() >= rangeEndRefPosition->nodeLocation)) { lastRefPosition = rangeEndInterval->lastRefPosition; } } + if ((relatedInterval != nullptr) && !relatedInterval->isWriteThru) + { + relatedLastLocation = relatedInterval->lastRefPosition->nodeLocation; + } + + regMaskTP callerCalleePrefs; + if (preferCalleeSave) + { + regMaskTP calleeSaveCandidates = calleeSaveRegs(currentInterval->registerType); + if (currentInterval->isWriteThru) + { + // We'll only prefer a callee-save register if it's already been used. + regMaskTP unusedCalleeSaves = calleeSaveCandidates & ~(compiler->codeGen->regSet.rsGetModifiedRegsMask()); + callerCalleePrefs = calleeSaveCandidates & ~unusedCalleeSaves; + preferences &= ~unusedCalleeSaves; + } + else + { + callerCalleePrefs = calleeSaveCandidates; + } + } + else + { + callerCalleePrefs = callerSaveRegs(currentInterval->registerType); + } // If this has a delayed use (due to being used in a rmw position of a // non-commutative operator), its endLocation is delayed until the "def" @@ -3057,7 +3128,7 @@ regNumber LinearScan::tryAllocateFreeReg(Interval* currentInterval, RefPosition* if ((candidateBit & relatedPreferences) != RBM_NONE) { score |= RELATED_PREFERENCE; - if (nextPhysRefLocation > relatedInterval->lastRefPosition->nodeLocation) + if (nextPhysRefLocation > relatedLastLocation) { score |= COVERS_RELATED; } @@ -3071,7 +3142,7 @@ regNumber LinearScan::tryAllocateFreeReg(Interval* currentInterval, RefPosition* score |= RELATED_PREFERENCE; } - if (preferCalleeSave == physRegRecord->isCalleeSave) + if ((candidateBit & callerCalleePrefs) != RBM_NONE) { score |= CALLER_CALLEE; } @@ -4087,7 +4158,8 @@ void LinearScan::spillInterval(Interval* interval, RefPosition* fromRefPosition if (!fromRefPosition->lastUse) { // If not allocated a register, Lcl var def/use ref positions even if reg optional - // should be marked as spillAfter. + // should be marked as spillAfter. Note that if it is a WriteThru interval, the value is always + // written to the stack, but the WriteThru indicates that the register is no longer live. if (fromRefPosition->RegOptional() && !(interval->isLocalVar && fromRefPosition->IsActualRef())) { fromRefPosition->registerAssignment = RBM_NONE; @@ -4822,11 +4894,33 @@ void LinearScan::processBlockStartLocations(BasicBlock* currentBlock) regNumber targetReg; Interval* interval = getIntervalForLocalVar(varIndex); RefPosition* nextRefPosition = interval->getNextRefPosition(); - assert(nextRefPosition != nullptr); + assert((nextRefPosition != nullptr) || (interval->isWriteThru)); + + bool leaveOnStack = false; + + // Special handling for variables live in/out of exception handlers. + if (interval->isWriteThru) + { + // There are 3 cases where we will leave writethru lclVars on the stack: + // 1) There is no predecessor. + // 2) It is conservatively or artificially live - that is, it has no next use, + // so there is no place for codegen to record that the register is no longer occupied. + // 3) This block has a predecessor with an outgoing EH edge. We won't be able to add "join" + // resolution to load the EH var into a register along that edge, so it must be on stack. + if ((predBBNum == 0) || (nextRefPosition == nullptr) || (RefTypeIsDef(nextRefPosition->refType)) || + blockInfo[currentBlock->bbNum].hasEHPred) + { + leaveOnStack = true; + } + } if (!allocationPassComplete) { targetReg = getVarReg(predVarToRegMap, varIndex); + if (leaveOnStack) + { + targetReg = REG_STK; + } #ifdef DEBUG regNumber newTargetReg = rotateBlockStartLocation(interval, targetReg, (~liveRegs | inactiveRegs)); if (newTargetReg != targetReg) @@ -4891,9 +4985,9 @@ void LinearScan::processBlockStartLocations(BasicBlock* currentBlock) { // This can happen if we are using the locations from a basic block other than the // immediately preceding one - where the variable was in a different location. - if (targetReg != REG_STK) + if ((targetReg != REG_STK) || leaveOnStack) { - // Unassign it from the register (it will get a new register below). + // Unassign it from the register (it may get a new register below). if (interval->assignedReg != nullptr && interval->assignedReg->assignedInterval == interval) { interval->isActive = false; @@ -5199,9 +5293,9 @@ void LinearScan::allocateRegisters() } } -#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE if (enregisterLocalVars) { +#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE VarSetOps::Iter largeVectorVarsIter(compiler, largeVectorVars); unsigned largeVectorVarIndex = 0; while (largeVectorVarsIter.NextElem(&largeVectorVarIndex)) @@ -5209,13 +5303,12 @@ void LinearScan::allocateRegisters() Interval* lclVarInterval = getIntervalForLocalVar(largeVectorVarIndex); lclVarInterval->isPartiallySpilled = false; } - } #endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE + } for (regNumber reg = REG_FIRST; reg < ACTUAL_REG_COUNT; reg = REG_NEXT(reg)) { getRegisterRecord(reg)->recentRefPosition = nullptr; - getRegisterRecord(reg)->isActive = false; } #ifdef DEBUG @@ -5275,7 +5368,7 @@ void LinearScan::allocateRegisters() currentReferent = currentRefPosition->referent; - if (spillAlways() && lastAllocatedRefPosition != nullptr && !lastAllocatedRefPosition->isPhysRegRef && + if (spillAlways() && lastAllocatedRefPosition != nullptr && !lastAllocatedRefPosition->IsPhysRegRef() && !lastAllocatedRefPosition->getInterval()->isInternal && (RefTypeIsDef(lastAllocatedRefPosition->refType) || lastAllocatedRefPosition->getInterval()->isLocalVar)) { @@ -5407,6 +5500,19 @@ void LinearScan::allocateRegisters() INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_FIXED_REG, nullptr, currentRefPosition->assignedReg())); continue; } + if (refType == RefTypeKill) + { + RegRecord* currentReg = currentRefPosition->getReg(); + Interval* assignedInterval = currentReg->assignedInterval; + + if (assignedInterval != nullptr) + { + unassignPhysReg(currentReg, assignedInterval->recentRefPosition); + } + currentReg->isBusyUntilNextKill = false; + INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_KEPT_ALLOCATION, nullptr, currentReg->regNum)); + continue; + } // If this is an exposed use, do nothing - this is merely a placeholder to attempt to // ensure that a register is allocated for the full lifetime. The resolution logic @@ -5420,171 +5526,170 @@ void LinearScan::allocateRegisters() regNumber assignedRegister = REG_NA; - if (currentRefPosition->isIntervalRef()) - { - currentInterval = currentRefPosition->getInterval(); - assignedRegister = currentInterval->physReg; + assert(currentRefPosition->isIntervalRef()); + currentInterval = currentRefPosition->getInterval(); + assert(currentInterval != nullptr); + assignedRegister = currentInterval->physReg; - // Identify the special cases where we decide up-front not to allocate - bool allocate = true; - bool didDump = false; + // Identify the special cases where we decide up-front not to allocate + bool allocate = true; + bool didDump = false; - if (refType == RefTypeParamDef || refType == RefTypeZeroInit) + if (refType == RefTypeParamDef || refType == RefTypeZeroInit) + { + if (nextRefPosition == nullptr) { - if (nextRefPosition == nullptr) - { - // If it has no actual references, mark it as "lastUse"; since they're not actually part - // of any flow they won't have been marked during dataflow. Otherwise, if we allocate a - // register we won't unassign it. - INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_ZERO_REF, currentInterval)); - currentRefPosition->lastUse = true; - } - if (refType == RefTypeParamDef) - { - LclVarDsc* varDsc = currentInterval->getLocalVar(compiler); - assert(varDsc != nullptr); - if (varDsc->lvRefCntWtd() <= BB_UNITY_WEIGHT) - { - // For a ParamDef with a weighted refCount less than unity, don't enregister it at entry. - // TODO-CQ: Consider doing this only for stack parameters, since otherwise we may be needlessly - // inserting a store. - allocate = false; - } - else if ((currentInterval->physReg == REG_STK) && nextRefPosition->treeNode->OperIs(GT_BITCAST)) - { - // In the case of ABI mismatches, avoid allocating a register only to have to immediately move - // it to a different register file. - allocate = false; - } - if (!allocate) - { - INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_NO_ENTRY_REG_ALLOCATED, currentInterval)); - didDump = true; - setIntervalAsSpilled(currentInterval); - } - } + // If it has no actual references, mark it as "lastUse"; since they're not actually part + // of any flow they won't have been marked during dataflow. Otherwise, if we allocate a + // register we won't unassign it. + INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_ZERO_REF, currentInterval)); + currentRefPosition->lastUse = true; + } + LclVarDsc* varDsc = currentInterval->getLocalVar(compiler); + assert(varDsc != nullptr); + assert(!blockInfo[compiler->fgFirstBB->bbNum].hasEHBoundaryIn || currentInterval->isWriteThru); + if (blockInfo[compiler->fgFirstBB->bbNum].hasEHBoundaryIn || + blockInfo[compiler->fgFirstBB->bbNum].hasEHPred) + { + allocate = false; + } + else if (refType == RefTypeParamDef && varDsc->lvRefCntWtd() <= BB_UNITY_WEIGHT) + { + allocate = false; + } + else if ((currentInterval->physReg == REG_STK) && nextRefPosition->treeNode->OperIs(GT_BITCAST)) + { + // In the case of ABI mismatches, avoid allocating a register only to have to immediately move + // it to a different register file. + allocate = false; + } + if (!allocate) + { + INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_NO_ENTRY_REG_ALLOCATED, currentInterval)); + didDump = true; + setIntervalAsSpilled(currentInterval); } + } #ifdef FEATURE_SIMD #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE - else if (currentInterval->isUpperVector) + else if (currentInterval->isUpperVector) + { + // This is a save or restore of the upper half of a large vector lclVar. + Interval* lclVarInterval = currentInterval->relatedInterval; + assert(lclVarInterval->isLocalVar); + if (refType == RefTypeUpperVectorSave) { - // This is a save or restore of the upper half of a large vector lclVar. - Interval* lclVarInterval = currentInterval->relatedInterval; - assert(lclVarInterval->isLocalVar); - if (refType == RefTypeUpperVectorSave) + if ((lclVarInterval->physReg == REG_NA) || + (lclVarInterval->isPartiallySpilled && (currentInterval->physReg == REG_STK))) { - if ((lclVarInterval->physReg == REG_NA) || - (lclVarInterval->isPartiallySpilled && (currentInterval->physReg == REG_STK))) - { - allocate = false; - } - else - { - lclVarInterval->isPartiallySpilled = true; - } + allocate = false; } - else if (refType == RefTypeUpperVectorRestore) + else { - assert(currentInterval->isUpperVector); - if (lclVarInterval->isPartiallySpilled) - { - lclVarInterval->isPartiallySpilled = false; - } - else - { - allocate = false; - } + lclVarInterval->isPartiallySpilled = true; } } - else if (refType == RefTypeUpperVectorSave) + else if (refType == RefTypeUpperVectorRestore) { - assert(!currentInterval->isLocalVar); - // Note that this case looks a lot like the case below, but in this case we need to spill - // at the previous RefPosition. - // We may want to consider allocating two callee-save registers for this case, but it happens rarely - // enough that it may not warrant the additional complexity. - if (assignedRegister != REG_NA) + assert(currentInterval->isUpperVector); + if (lclVarInterval->isPartiallySpilled) { - unassignPhysReg(getRegisterRecord(assignedRegister), currentInterval->firstRefPosition); - INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_NO_REG_ALLOCATED, currentInterval)); + lclVarInterval->isPartiallySpilled = false; + } + else + { + allocate = false; } - currentRefPosition->registerAssignment = RBM_NONE; - continue; } + } + else if (refType == RefTypeUpperVectorSave) + { + assert(!currentInterval->isLocalVar); + // Note that this case looks a lot like the case below, but in this case we need to spill + // at the previous RefPosition. + // We may want to consider allocating two callee-save registers for this case, but it happens rarely + // enough that it may not warrant the additional complexity. + if (assignedRegister != REG_NA) + { + unassignPhysReg(getRegisterRecord(assignedRegister), currentInterval->firstRefPosition); + INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_NO_REG_ALLOCATED, currentInterval)); + } + currentRefPosition->registerAssignment = RBM_NONE; + continue; + } #endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE #endif // FEATURE_SIMD - if (allocate == false) + if (allocate == false) + { + if (assignedRegister != REG_NA) { - if (assignedRegister != REG_NA) - { - unassignPhysReg(getRegisterRecord(assignedRegister), currentRefPosition); - } - else if (!didDump) - { - INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_NO_REG_ALLOCATED, currentInterval)); - didDump = true; - } - currentRefPosition->registerAssignment = RBM_NONE; - continue; + unassignPhysReg(getRegisterRecord(assignedRegister), currentRefPosition); } - - if (currentInterval->isSpecialPutArg) + else if (!didDump) { - assert(!currentInterval->isLocalVar); - Interval* srcInterval = currentInterval->relatedInterval; - assert(srcInterval != nullptr && srcInterval->isLocalVar); - if (refType == RefTypeDef) - { - assert(srcInterval->recentRefPosition->nodeLocation == currentLocation - 1); - RegRecord* physRegRecord = srcInterval->assignedReg; - - // For a putarg_reg to be special, its next use location has to be the same - // as fixed reg's next kill location. Otherwise, if source lcl var's next use - // is after the kill of fixed reg but before putarg_reg's next use, fixed reg's - // kill would lead to spill of source but not the putarg_reg if it were treated - // as special. - if (srcInterval->isActive && - genRegMask(srcInterval->physReg) == currentRefPosition->registerAssignment && - currentInterval->getNextRefLocation() == physRegRecord->getNextRefLocation()) - { - assert(physRegRecord->regNum == srcInterval->physReg); - - // Special putarg_reg acts as a pass-thru since both source lcl var - // and putarg_reg have the same register allocated. Physical reg - // record of reg continue to point to source lcl var's interval - // instead of to putarg_reg's interval. So if a spill of reg - // allocated to source lcl var happens, to reallocate to another - // tree node, before its use at call node it will lead to spill of - // lcl var instead of putarg_reg since physical reg record is pointing - // to lcl var's interval. As a result, arg reg would get trashed leading - // to bad codegen. The assumption here is that source lcl var of a - // special putarg_reg doesn't get spilled and re-allocated prior to - // its use at the call node. This is ensured by marking physical reg - // record as busy until next kill. - physRegRecord->isBusyUntilNextKill = true; - } - else - { - currentInterval->isSpecialPutArg = false; - } + INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_NO_REG_ALLOCATED, currentInterval)); + didDump = true; + } + currentRefPosition->registerAssignment = RBM_NONE; + continue; + } + + if (currentInterval->isSpecialPutArg) + { + assert(!currentInterval->isLocalVar); + Interval* srcInterval = currentInterval->relatedInterval; + assert(srcInterval != nullptr && srcInterval->isLocalVar); + if (refType == RefTypeDef) + { + assert(srcInterval->recentRefPosition->nodeLocation == currentLocation - 1); + RegRecord* physRegRecord = srcInterval->assignedReg; + + // For a putarg_reg to be special, its next use location has to be the same + // as fixed reg's next kill location. Otherwise, if source lcl var's next use + // is after the kill of fixed reg but before putarg_reg's next use, fixed reg's + // kill would lead to spill of source but not the putarg_reg if it were treated + // as special. + if (srcInterval->isActive && + genRegMask(srcInterval->physReg) == currentRefPosition->registerAssignment && + currentInterval->getNextRefLocation() == physRegRecord->getNextRefLocation()) + { + assert(physRegRecord->regNum == srcInterval->physReg); + + // Special putarg_reg acts as a pass-thru since both source lcl var + // and putarg_reg have the same register allocated. Physical reg + // record of reg continue to point to source lcl var's interval + // instead of to putarg_reg's interval. So if a spill of reg + // allocated to source lcl var happens, to reallocate to another + // tree node, before its use at call node it will lead to spill of + // lcl var instead of putarg_reg since physical reg record is pointing + // to lcl var's interval. As a result, arg reg would get trashed leading + // to bad codegen. The assumption here is that source lcl var of a + // special putarg_reg doesn't get spilled and re-allocated prior to + // its use at the call node. This is ensured by marking physical reg + // record as busy until next kill. + physRegRecord->isBusyUntilNextKill = true; } - // If this is still a SpecialPutArg, continue; - if (currentInterval->isSpecialPutArg) + else { - INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_SPECIAL_PUTARG, currentInterval, - currentRefPosition->assignedReg())); - continue; + currentInterval->isSpecialPutArg = false; } } - - if (assignedRegister == REG_NA && RefTypeIsUse(refType)) + // If this is still a SpecialPutArg, continue; + if (currentInterval->isSpecialPutArg) { - currentRefPosition->reload = true; - INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_RELOAD, currentInterval, assignedRegister)); + INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_SPECIAL_PUTARG, currentInterval, + currentRefPosition->assignedReg())); + continue; } } + if (assignedRegister == REG_NA && RefTypeIsUse(refType)) + { + currentRefPosition->reload = true; + INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_RELOAD, currentInterval, assignedRegister)); + } + regMaskTP assignedRegBit = RBM_NONE; bool isInRegister = false; if (assignedRegister != REG_NA) @@ -5612,25 +5717,7 @@ void LinearScan::allocateRegisters() currentInterval->assignedReg->assignedInterval == currentInterval); } - // If this is a physical register, we unconditionally assign it to itself! - if (currentRefPosition->isPhysRegRef) - { - RegRecord* currentReg = currentRefPosition->getReg(); - Interval* assignedInterval = currentReg->assignedInterval; - - if (assignedInterval != nullptr) - { - unassignPhysReg(currentReg, assignedInterval->recentRefPosition); - } - currentReg->isActive = true; - assignedRegister = currentReg->regNum; - assignedRegBit = genRegMask(assignedRegister); - if (refType == RefTypeKill) - { - currentReg->isBusyUntilNextKill = false; - } - } - else if (previousRefPosition != nullptr) + if (previousRefPosition != nullptr) { assert(previousRefPosition->nextRefPosition == currentRefPosition); assert(assignedRegister == REG_NA || assignedRegBit == previousRefPosition->registerAssignment || @@ -5726,14 +5813,14 @@ void LinearScan::allocateRegisters() else if ((genRegMask(assignedRegister) & currentRefPosition->registerAssignment) != 0) { currentRefPosition->registerAssignment = assignedRegBit; - if (!currentReferent->isActive) + if (!currentInterval->isActive) { // If we've got an exposed use at the top of a block, the // interval might not have been active. Otherwise if it's a use, // the interval must be active. if (refType == RefTypeDummyDef) { - currentReferent->isActive = true; + currentInterval->isActive = true; assert(getRegisterRecord(assignedRegister)->assignedInterval == currentInterval); } else @@ -5745,16 +5832,35 @@ void LinearScan::allocateRegisters() } else { - assert(currentInterval != nullptr); - // It's already in a register, but not one we need. if (!RefTypeIsDef(currentRefPosition->refType)) { regNumber copyReg = assignCopyReg(currentRefPosition); assert(copyReg != REG_NA); INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_COPY_REG, currentInterval, copyReg)); - lastAllocatedRefPosition = currentRefPosition; + lastAllocatedRefPosition = currentRefPosition; + bool unassign = false; + RefPosition* nextRefPosition = currentRefPosition->nextRefPosition; + if (currentInterval->isWriteThru) + { + if (currentRefPosition->refType == RefTypeDef) + { + currentRefPosition->writeThru = true; + } + if (!currentRefPosition->lastUse) + { + if (currentRefPosition->spillAfter) + { + unassign = true; + } + } + } if (currentRefPosition->lastUse) + { + assert(currentRefPosition->isIntervalRef()); + unassign = true; + } + if (unassign) { if (currentRefPosition->delayRegFree) { @@ -5801,6 +5907,14 @@ void LinearScan::allocateRegisters() { allocateReg = false; } + else if (currentInterval->isWriteThru) + { + // Don't allocate if the next reference is in a cold block. + if (nextRefPosition == nullptr || (nextRefPosition->nodeLocation >= firstColdLoc)) + { + allocateReg = false; + } + } #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE && defined(TARGET_XARCH) // We can also avoid allocating a register (in fact we don't want to) if we have @@ -5921,37 +6035,58 @@ void LinearScan::allocateRegisters() // (it will be freed when it is used). if (!currentInterval->IsUpperVector()) { + bool unassign = false; + if (currentInterval->isWriteThru) + { + if (currentRefPosition->refType == RefTypeDef) + { + currentRefPosition->writeThru = true; + } + if (!currentRefPosition->lastUse) + { + if (currentRefPosition->spillAfter) + { + unassign = true; + } + } + } if (currentRefPosition->lastUse || currentRefPosition->nextRefPosition == nullptr) { assert(currentRefPosition->isIntervalRef()); if (refType != RefTypeExpUse && currentRefPosition->nextRefPosition == nullptr) { - if (currentRefPosition->delayRegFree) - { - delayRegsToFree |= assignedRegBit; - - INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_LAST_USE_DELAYED)); - } - else - { - regsToFree |= assignedRegBit; - - INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_LAST_USE)); - } + unassign = true; } else { currentInterval->isActive = false; } - // Update the register preferences for the relatedInterval, if this is 'preferencedToDef'. - // Don't propagate to subsequent relatedIntervals; that will happen as they are allocated, and we - // don't know yet whether the register will be retained. - if (currentInterval->relatedInterval != nullptr) + } + if (unassign) + { + if (currentRefPosition->delayRegFree) { - currentInterval->relatedInterval->updateRegisterPreferences(assignedRegBit); + delayRegsToFree |= assignedRegBit; + + INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_LAST_USE_DELAYED)); + } + else + { + regsToFree |= assignedRegBit; + + INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_LAST_USE)); } } + + // Update the register preferences for the relatedInterval, if this is 'preferencedToDef'. + // Don't propagate to subsequent relatedIntervals; that will happen as they are allocated, and we + // don't know yet whether the register will be retained. + if ((currentRefPosition->lastUse || nextRefPosition == nullptr) && + (currentInterval->relatedInterval != nullptr)) + { + currentInterval->relatedInterval->updateRegisterPreferences(assignedRegBit); + } } lastAllocatedRefPosition = currentRefPosition; @@ -5975,8 +6110,16 @@ void LinearScan::allocateRegisters() } if (interval->isSpilled) { + unsigned prevBBNum = 0; for (RefPosition* ref = interval->firstRefPosition; ref != nullptr; ref = ref->nextRefPosition) { + // For the resolution phase, we need to ensure that any block with exposed uses has the + // incoming reg for 'this' as REG_STK. + if (RefTypeIsUse(ref->refType) && (ref->bbNum != prevBBNum)) + { + VarToRegMap inVarToRegMap = getInVarToRegMap(ref->bbNum); + setVarReg(inVarToRegMap, thisVarDsc->lvVarIndex, REG_STK); + } if (ref->RegOptional()) { ref->registerAssignment = RBM_NONE; @@ -6003,6 +6146,7 @@ void LinearScan::allocateRegisters() default: break; } + prevBBNum = ref->bbNum; } } } @@ -6203,7 +6347,8 @@ void LinearScan::resolveLocalRef(BasicBlock* block, GenTree* treeNode, RefPositi } if ((currentRefPosition->registerAssignment != RBM_NONE) && (interval->physReg == REG_NA) && - currentRefPosition->RegOptional() && currentRefPosition->lastUse) + currentRefPosition->RegOptional() && currentRefPosition->lastUse && + (currentRefPosition->refType == RefTypeUse)) { // This can happen if the incoming location for the block was changed from a register to the stack // during resolution. In this case we're better off making it contained. @@ -6225,8 +6370,9 @@ void LinearScan::resolveLocalRef(BasicBlock* block, GenTree* treeNode, RefPositi } interval->assignedReg = nullptr; interval->physReg = REG_NA; - if (treeNode != nullptr) + if (currentRefPosition->refType == RefTypeUse) { + assert(treeNode != nullptr); treeNode->SetContained(); } @@ -6266,6 +6412,7 @@ void LinearScan::resolveLocalRef(BasicBlock* block, GenTree* treeNode, RefPositi bool reload = currentRefPosition->reload; bool spillAfter = currentRefPosition->spillAfter; + bool writeThru = currentRefPosition->writeThru; // In the reload case we either: // - Set the register to REG_STK if it will be referenced only from the home location, or @@ -6391,6 +6538,20 @@ void LinearScan::resolveLocalRef(BasicBlock* block, GenTree* treeNode, RefPositi interval->physReg = REG_NA; varDsc->SetRegNum(REG_STK); } + if (writeThru && (treeNode != nullptr)) + { + // This is a def of a write-thru EH var (only defs are marked 'writeThru'). + treeNode->gtFlags |= GTF_SPILL; + // We also mark writeThru defs that are not last-use with GTF_SPILLED to indicate that they are conceptually + // spilled and immediately "reloaded", i.e. the register remains live. + // Note that we can have a "last use" write that has no exposed uses in the standard + // (non-eh) control flow, but that may be used on an exception path. Hence the need + // to retain these defs, and to ensure that they write. + if (!currentRefPosition->lastUse) + { + treeNode->gtFlags |= GTF_SPILLED; + } + } } // Update the physRegRecord for the register, so that we know what vars are in @@ -7172,7 +7333,8 @@ void LinearScan::resolveRegisters() continue; } - if (currentRefPosition->isIntervalRef() && currentRefPosition->getInterval()->isInternal) + assert(currentRefPosition->isIntervalRef()); + if (currentRefPosition->getInterval()->isInternal) { treeNode->gtRsvdRegs |= currentRefPosition->registerAssignment; } @@ -7192,7 +7354,7 @@ void LinearScan::resolveRegisters() else if (currentRefPosition->spillAfter || (currentRefPosition->nextRefPosition != nullptr && currentRefPosition->nextRefPosition->moveReg)) { - if (treeNode != nullptr && currentRefPosition->isIntervalRef()) + if (treeNode != nullptr) { if (currentRefPosition->spillAfter) { @@ -7577,11 +7739,12 @@ void LinearScan::insertMove( else { // Put the copy at the bottom + GenTree* lastNode = blockRange.LastNode(); if (block->bbJumpKind == BBJ_COND || block->bbJumpKind == BBJ_SWITCH) { noway_assert(!blockRange.IsEmpty()); - GenTree* branch = blockRange.LastNode(); + GenTree* branch = lastNode; assert(branch->OperIsConditionalJump() || branch->OperGet() == GT_SWITCH_TABLE || branch->OperGet() == GT_SWITCH); @@ -7589,7 +7752,9 @@ void LinearScan::insertMove( } else { - assert(block->bbJumpKind == BBJ_NONE || block->bbJumpKind == BBJ_ALWAYS); + // These block kinds don't have a branch at the end. + assert((lastNode == nullptr) || (!lastNode->OperIsConditionalJump() && + !lastNode->OperIs(GT_SWITCH_TABLE, GT_SWITCH, GT_RETURN, GT_RETFILT))); blockRange.InsertAtEnd(std::move(treeRange)); } } @@ -7838,14 +8003,24 @@ void LinearScan::addResolution( BasicBlock* block, GenTree* insertionPoint, Interval* interval, regNumber toReg, regNumber fromReg) { #ifdef DEBUG - const char* insertionPointString = "top"; -#endif // DEBUG + const char* insertionPointString; if (insertionPoint == nullptr) { -#ifdef DEBUG + // We can't add resolution to a register at the bottom of a block that has an EHBoundaryOut, + // except in the case of the "EH Dummy" resolution from the stack. + assert((block->bbNum > bbNumMaxBeforeResolution) || (fromReg == REG_STK) || + !blockInfo[block->bbNum].hasEHBoundaryOut); insertionPointString = "bottom"; -#endif // DEBUG } + else + { + // We can't add resolution at the top of a block that has an EHBoundaryIn, + // except in the case of the "EH Dummy" resolution to the stack. + assert((block->bbNum > bbNumMaxBeforeResolution) || (toReg == REG_STK) || + !blockInfo[block->bbNum].hasEHBoundaryIn); + insertionPointString = "top"; + } +#endif // DEBUG JITDUMP(" " FMT_BB " %s: move V%02u from ", block->bbNum, insertionPointString, interval->varNum); JITDUMP("%s to %s", getRegName(fromReg), getRegName(toReg)); @@ -8120,7 +8295,30 @@ void LinearScan::handleOutgoingCriticalEdges(BasicBlock* block) } if (!VarSetOps::IsEmpty(compiler, edgeResolutionSet)) { - resolveEdge(block, succBlock, ResolveCritical, edgeResolutionSet); + // For EH vars, we can always safely load them from the stack into the target for this block, + // so if we have only EH vars, we'll do that instead of splitting the edge. + if ((compiler->compHndBBtabCount > 0) && VarSetOps::IsSubset(compiler, edgeResolutionSet, exceptVars)) + { + GenTree* insertionPoint = LIR::AsRange(succBlock).FirstNonPhiNode(); + VarSetOps::Iter edgeSetIter(compiler, edgeResolutionSet); + unsigned edgeVarIndex = 0; + while (edgeSetIter.NextElem(&edgeVarIndex)) + { + regNumber toReg = getVarReg(succInVarToRegMap, edgeVarIndex); + setVarReg(succInVarToRegMap, edgeVarIndex, REG_STK); + if (toReg != REG_STK) + { + Interval* interval = getIntervalForLocalVar(edgeVarIndex); + assert(interval->isWriteThru); + addResolution(succBlock, insertionPoint, interval, toReg, REG_STK); + JITDUMP(" (EHvar)\n"); + } + } + } + else + { + resolveEdge(block, succBlock, ResolveCritical, edgeResolutionSet); + } } } } @@ -8322,14 +8520,19 @@ void LinearScan::resolveEdges() regNumber toReg = getVarReg(toVarToRegMap, varIndex); if (fromReg != toReg) { - if (!foundMismatch) + Interval* interval = getIntervalForLocalVar(varIndex); + // The fromReg and toReg may not match for a write-thru interval where the toReg is + // REG_STK, since the stack value is always valid for that case (so no move is needed). + if (!interval->isWriteThru || (toReg != REG_STK)) { - foundMismatch = true; - printf("Found mismatched var locations after resolution!\n"); + if (!foundMismatch) + { + foundMismatch = true; + printf("Found mismatched var locations after resolution!\n"); + } + printf(" V%02u: " FMT_BB " to " FMT_BB ": %s to %s\n", interval->varNum, predBlock->bbNum, + block->bbNum, getRegName(fromReg), getRegName(toReg)); } - - printf(" V%02u: " FMT_BB " to " FMT_BB ": %s to %s\n", compiler->lvaTrackedIndexToLclNum(varIndex), - predBlock->bbNum, block->bbNum, getRegName(fromReg), getRegName(toReg)); } } } @@ -8473,6 +8676,29 @@ void LinearScan::resolveEdge(BasicBlock* fromBlock, insertionPoint = LIR::AsRange(block).FirstNonPhiNode(); } + // If this is an edge between EH regions, we may have "extra" live-out EH vars. + // If we are adding resolution at the end of the block, we need to create "virtual" moves + // for these so that their registers are freed and can be reused. + if ((resolveType == ResolveJoin) && (compiler->compHndBBtabCount > 0)) + { + VARSET_TP extraLiveSet(VarSetOps::Diff(compiler, block->bbLiveOut, toBlock->bbLiveIn)); + VarSetOps::IntersectionD(compiler, extraLiveSet, registerCandidateVars); + VarSetOps::Iter iter(compiler, extraLiveSet); + unsigned extraVarIndex = 0; + while (iter.NextElem(&extraVarIndex)) + { + Interval* interval = getIntervalForLocalVar(extraVarIndex); + assert(interval->isWriteThru); + regNumber fromReg = getVarReg(fromVarToRegMap, extraVarIndex); + if (fromReg != REG_STK) + { + addResolution(block, insertionPoint, interval, REG_STK, fromReg); + JITDUMP(" (EH DUMMY)\n"); + setVarReg(fromVarToRegMap, extraVarIndex, REG_STK); + } + } + } + // First: // - Perform all moves from reg to stack (no ordering needed on these) // - For reg to reg moves, record the current location, associating their @@ -8486,13 +8712,24 @@ void LinearScan::resolveEdge(BasicBlock* fromBlock, unsigned varIndex = 0; while (iter.NextElem(&varIndex)) { - regNumber fromReg = getVarReg(fromVarToRegMap, varIndex); - regNumber toReg = getVarReg(toVarToRegMap, varIndex); + Interval* interval = getIntervalForLocalVar(varIndex); + regNumber fromReg = getVarReg(fromVarToRegMap, varIndex); + regNumber toReg = getVarReg(toVarToRegMap, varIndex); if (fromReg == toReg) { continue; } - + if (interval->isWriteThru && (toReg == REG_STK)) + { + // We don't actually move a writeThru var back to the stack, as its stack value is always valid. + // However, if this is a Join edge (i.e. the move is happening at the bottom of the block), + // and it is a "normal" flow edge, we will go ahead and generate a mov instruction, which will be + // a NOP but will cause the variable to be removed from being live in the register. + if ((resolveType == ResolveSplit) || block->hasEHBoundaryOut()) + { + continue; + } + } // For Critical edges, the location will not change on either side of the edge, // since we'll add a new block to do the move. if (resolveType == ResolveSplit) @@ -8506,8 +8743,6 @@ void LinearScan::resolveEdge(BasicBlock* fromBlock, assert(fromReg < UCHAR_MAX && toReg < UCHAR_MAX); - Interval* interval = getIntervalForLocalVar(varIndex); - if (fromReg == REG_STK) { stackToRegIntervals[toReg] = interval; @@ -8517,7 +8752,8 @@ void LinearScan::resolveEdge(BasicBlock* fromBlock, { // Do the reg to stack moves now addResolution(block, insertionPoint, interval, REG_STK, fromReg); - JITDUMP(" (%s)\n", resolveTypeName[resolveType]); + JITDUMP(" (%s)\n", + (interval->isWriteThru && (toReg == REG_STK)) ? "EH DUMMY" : resolveTypeName[resolveType]); } else { @@ -8965,7 +9201,7 @@ void RefPosition::dump() printf(" %s ", getRefTypeName(refType)); - if (this->isPhysRegRef) + if (this->IsPhysRegRef()) { this->getReg()->tinyDump(); } @@ -8997,6 +9233,10 @@ void RefPosition::dump() { printf(" spillAfter"); } + if (this->writeThru) + { + printf(" writeThru"); + } if (this->moveReg) { printf(" move"); @@ -9084,6 +9324,10 @@ void Interval::dump() { printf(" (constant)"); } + if (isWriteThru) + { + printf(" (writeThru)"); + } printf(" RefPositions {"); for (RefPosition* refPosition = this->firstRefPosition; refPosition != nullptr; @@ -9559,7 +9803,7 @@ void LinearScan::TupleStyleDump(LsraTupleDumpMode mode) switch (currentRefPosition->refType) { case RefTypeUse: - if (currentRefPosition->isPhysRegRef) + if (currentRefPosition->IsPhysRegRef()) { printf("\n Use:R%d(#%d)", currentRefPosition->getReg()->regNum, currentRefPosition->rpNum); @@ -10187,7 +10431,7 @@ void LinearScan::dumpRefPositionShort(RefPosition* refPosition, BasicBlock* curr } printf(" %s%c%c ", getRefTypeShortName(refPosition->refType), lastUseChar, delayChar); } - else if (refPosition->isPhysRegRef) + else if (refPosition->IsPhysRegRef()) { RegRecord* regRecord = refPosition->getReg(); printf(regNameFormat, getRegName(regRecord->regNum)); @@ -10315,7 +10559,7 @@ void LinearScan::verifyFinalAllocation() } else { - if (currentRefPosition->isPhysRegRef) + if (currentRefPosition->IsPhysRegRef()) { regRecord = currentRefPosition->getReg(); regRecord->recentRefPosition = currentRefPosition; @@ -10398,7 +10642,11 @@ void LinearScan::verifyFinalAllocation() } regNumber regNum = getVarReg(outVarToRegMap, varIndex); interval = getIntervalForLocalVar(varIndex); - assert(interval->physReg == regNum || (interval->physReg == REG_NA && regNum == REG_STK)); + if (interval->physReg != regNum) + { + assert(regNum == REG_STK); + assert((interval->physReg == REG_NA) || interval->isWriteThru); + } interval->physReg = REG_NA; interval->assignedReg = nullptr; interval->isActive = false; @@ -10555,7 +10803,7 @@ void LinearScan::verifyFinalAllocation() { dumpLsraAllocationEvent(LSRA_EVENT_KEPT_ALLOCATION, nullptr, regRecord->regNum, currentBlock); } - if (currentRefPosition->lastUse || currentRefPosition->spillAfter) + if (currentRefPosition->lastUse || (currentRefPosition->spillAfter && !currentRefPosition->writeThru)) { interval->isActive = false; } @@ -10576,7 +10824,14 @@ void LinearScan::verifyFinalAllocation() } dumpRegRecords(); dumpEmptyRefPosition(); - printf("Spill %-4s ", getRegName(spillReg)); + if (currentRefPosition->writeThru) + { + printf("WThru %-4s ", getRegName(spillReg)); + } + else + { + printf("Spill %-4s ", getRegName(spillReg)); + } } } else if (currentRefPosition->copyReg) @@ -10737,7 +10992,10 @@ void LinearScan::verifyFinalAllocation() } regNumber regNum = getVarReg(outVarToRegMap, varIndex); Interval* interval = getIntervalForLocalVar(varIndex); - assert(interval->physReg == regNum || (interval->physReg == REG_NA && regNum == REG_STK)); + // Either the register assignments match, or the outgoing assignment is on the stack + // and this is a write-thru interval. + assert(interval->physReg == regNum || (interval->physReg == REG_NA && regNum == REG_STK) || + (interval->isWriteThru && regNum == REG_STK)); interval->physReg = REG_NA; interval->assignedReg = nullptr; interval->isActive = false; diff --git a/src/coreclr/src/jit/lsra.h b/src/coreclr/src/jit/lsra.h index f3ff58ed7bd27..9819e069af647 100644 --- a/src/coreclr/src/jit/lsra.h +++ b/src/coreclr/src/jit/lsra.h @@ -71,13 +71,21 @@ inline bool registerTypesEquivalent(RegisterType a, RegisterType b) } //------------------------------------------------------------------------ -// registerTypesEquivalent: Get the set of callee-save registers of the given RegisterType +// calleeSaveRegs: Get the set of callee-save registers of the given RegisterType // inline regMaskTP calleeSaveRegs(RegisterType rt) { return varTypeIsIntegralOrI(rt) ? RBM_INT_CALLEE_SAVED : RBM_FLT_CALLEE_SAVED; } +//------------------------------------------------------------------------ +// callerSaveRegs: Get the set of caller-save registers of the given RegisterType +// +inline regMaskTP callerSaveRegs(RegisterType rt) +{ + return varTypeIsIntegralOrI(rt) ? RBM_INT_CALLEE_TRASH : RBM_FLT_CALLEE_TRASH; +} + //------------------------------------------------------------------------ // RefInfo: Captures the necessary information for a definition that is "in-flight" // during `buildIntervals` (i.e. a tree-node definition has been encountered, @@ -380,10 +388,11 @@ struct LsraBlockInfo // 0 for fgFirstBB. unsigned int predBBNum; BasicBlock::weight_t weight; - bool hasCriticalInEdge; - bool hasCriticalOutEdge; - bool hasEHBoundaryIn; - bool hasEHBoundaryOut; + bool hasCriticalInEdge : 1; + bool hasCriticalOutEdge : 1; + bool hasEHBoundaryIn : 1; + bool hasEHBoundaryOut : 1; + bool hasEHPred : 1; #if TRACK_LSRA_STATS // Per block maintained LSRA statistics. @@ -447,7 +456,6 @@ class Referenceable firstRefPosition = nullptr; recentRefPosition = nullptr; lastRefPosition = nullptr; - isActive = false; } // A linked list of RefPositions. These are only traversed in the forward @@ -458,8 +466,6 @@ class Referenceable RefPosition* recentRefPosition; RefPosition* lastRefPosition; - bool isActive; - // Get the position of the next reference which is at or greater than // the current location (relies upon recentRefPosition being udpated // during traversal). @@ -1372,6 +1378,7 @@ class LinearScan : public LinearScanInterface // A map from bbNum to the block information used during register allocation. LsraBlockInfo* blockInfo; + BasicBlock* findPredBlockForLiveIn(BasicBlock* block, BasicBlock* prevBlock DEBUGARG(bool* pPredBlockIsAllocated)); // The order in which the blocks will be allocated. @@ -1399,6 +1406,8 @@ class LinearScan : public LinearScanInterface unsigned int curBBNum; // The current location LsraLocation currentLoc; + // The first location in a cold or funclet block. + LsraLocation firstColdLoc; // The ordinal of the block we're on (i.e. this is the curBBSeqNum-th block we've allocated). unsigned int curBBSeqNum; // The number of blocks that we've sequenced. @@ -1446,6 +1455,8 @@ class LinearScan : public LinearScanInterface VARSET_TP fpCalleeSaveCandidateVars; // Set of variables exposed on EH flow edges. VARSET_TP exceptVars; + // Set of variables exposed on finally edges. These must be zero-init if they are refs or if compInitMem is true. + VARSET_TP finallyVars; #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE #if defined(TARGET_AMD64) @@ -1624,7 +1635,10 @@ class Interval : public Referenceable : registerPreferences(registerPreferences) , relatedInterval(nullptr) , assignedReg(nullptr) + , varNum(0) + , physReg(REG_COUNT) , registerType(registerType) + , isActive(false) , isLocalVar(false) , isSplit(false) , isSpilled(false) @@ -1640,11 +1654,10 @@ class Interval : public Referenceable , isUpperVector(false) , isPartiallySpilled(false) #endif - , physReg(REG_COUNT) + , isWriteThru(false) #ifdef DEBUG , intervalIndex(0) #endif - , varNum(0) { } @@ -1672,11 +1685,17 @@ class Interval : public Referenceable // register it currently occupies. RegRecord* assignedReg; - // DECIDE : put this in a union or do something w/ inheritance? - // this is an interval for a physical register, not a allocatable entity + unsigned int varNum; // This is the "variable number": the index into the lvaTable array + + // The register to which it is currently assigned. + regNumber physReg; RegisterType registerType; - bool isLocalVar : 1; + + // Is this Interval currently in a register and live? + bool isActive; + + bool isLocalVar : 1; // Indicates whether this interval has been assigned to different registers bool isSplit : 1; // Indicates whether this interval is ever spilled @@ -1728,15 +1747,13 @@ class Interval : public Referenceable } #endif - // The register to which it is currently assigned. - regNumber physReg; + // True if this interval is associated with a lclVar that is written to memory at each definition. + bool isWriteThru : 1; #ifdef DEBUG unsigned int intervalIndex; #endif // DEBUG - unsigned int varNum; // This is the "variable number": the index into the lvaTable array - LclVarDsc* getLocalVar(Compiler* comp) { assert(isLocalVar); @@ -1886,8 +1903,8 @@ class RefPosition { public: // A RefPosition refers to either an Interval or a RegRecord. 'referent' points to one - // of these types. If it refers to a RegRecord, then 'isPhysRegRef' is true. If it - // refers to an Interval, then 'isPhysRegRef' is false. + // of these types. If it refers to a RegRecord, then 'isPhysRegRef()' is true. If it + // refers to an Interval, then 'isPhysRegRef()' is false. // referent can never be null. Referenceable* referent; @@ -1951,6 +1968,9 @@ class RefPosition unsigned char reload : 1; unsigned char spillAfter : 1; + unsigned char writeThru : 1; // true if this var is defined in a register and also spilled. spillAfter must NOT be + // set. + unsigned char copyReg : 1; unsigned char moveReg : 1; // true if this var is moved to a new register @@ -1995,6 +2015,7 @@ class RefPosition , lastUse(false) , reload(false) , spillAfter(false) + , writeThru(false) , copyReg(false) , moveReg(false) , isPhysRegRef(false) @@ -2068,6 +2089,11 @@ class RefPosition } } + bool IsPhysRegRef() + { + return ((refType == RefTypeFixedReg) || (refType == RefTypeKill)); + } + void setRegOptional(bool val) { regOptional = val; @@ -2102,7 +2128,7 @@ class RefPosition RefPosition* getRangeEndRef() { - if (lastUse || nextRefPosition == nullptr) + if (lastUse || nextRefPosition == nullptr || spillAfter) { return this; } @@ -2120,14 +2146,7 @@ class RefPosition bool isIntervalRef() { - return (!isPhysRegRef && (referent != nullptr)); - } - - // isTrueDef indicates that the RefPosition is a non-update def of a non-internal - // interval - bool isTrueDef() - { - return (refType == RefTypeDef && isIntervalRef() && !getInterval()->isInternal); + return (!IsPhysRegRef() && (referent != nullptr)); } // isFixedRefOfRegMask indicates that the RefPosition has a fixed assignment to the register diff --git a/src/coreclr/src/jit/lsrabuild.cpp b/src/coreclr/src/jit/lsrabuild.cpp index fb4b8ff61be89..fba09a5286c07 100644 --- a/src/coreclr/src/jit/lsrabuild.cpp +++ b/src/coreclr/src/jit/lsrabuild.cpp @@ -1181,18 +1181,26 @@ bool LinearScan::buildKillPositionsForNode(GenTree* tree, LsraLocation currentLo { interval->preferCalleeSave = true; } - regMaskTP newPreferences = allRegs(interval->registerType) & (~killMask); - if (newPreferences != RBM_NONE) + // We are more conservative about allocating callee-saves registers to write-thru vars, since + // a call only requires reloading after (not spilling before). So we record (above) the fact + // that we'd prefer a callee-save register, but we don't update the preferences at this point. + // See the "heuristics for writeThru intervals" in 'buildIntervals()'. + if (!interval->isWriteThru || !isCallKill) { - interval->updateRegisterPreferences(newPreferences); - } - else - { - // If there are no callee-saved registers, the call could kill all the registers. - // This is a valid state, so in that case assert should not trigger. The RA will spill in order to - // free a register later. - assert(compiler->opts.compDbgEnC || (calleeSaveRegs(varDsc->lvType)) == RBM_NONE); + regMaskTP newPreferences = allRegs(interval->registerType) & (~killMask); + + if (newPreferences != RBM_NONE) + { + interval->updateRegisterPreferences(newPreferences); + } + else + { + // If there are no callee-saved registers, the call could kill all the registers. + // This is a valid state, so in that case assert should not trigger. The RA will spill in order + // to free a register later. + assert(compiler->opts.compDbgEnC || (calleeSaveRegs(varDsc->lvType)) == RBM_NONE); + } } } } @@ -1791,6 +1799,31 @@ void LinearScan::insertZeroInitRefPositions() } } } + + // We must also insert zero-inits for any finallyVars if they are refs or if compInitMem is true. + if (compiler->lvaEnregEHVars) + { + VarSetOps::Iter iter(compiler, finallyVars); + unsigned varIndex = 0; + while (iter.NextElem(&varIndex)) + { + LclVarDsc* varDsc = compiler->lvaGetDescByTrackedIndex(varIndex); + if (!varDsc->lvIsParam && isCandidateVar(varDsc)) + { + JITDUMP("V%02u is a finally var:", compiler->lvaTrackedIndexToLclNum(varIndex)); + Interval* interval = getIntervalForLocalVar(varIndex); + if (compiler->info.compInitMem || varTypeIsGC(varDsc->TypeGet())) + { + JITDUMP(" creating ZeroInit\n"); + GenTree* firstNode = getNonEmptyBlock(compiler->fgFirstBB)->firstNode(); + RefPosition* pos = newRefPosition(interval, MinLocation, RefTypeZeroInit, firstNode, + allRegs(interval->registerType)); + pos->setRegOptional(true); + varDsc->lvMustInit = true; + } + } + } + } } #if defined(UNIX_AMD64_ABI) @@ -2101,49 +2134,72 @@ void LinearScan::buildIntervals() currentLoc = 1; } - // Any lclVars live-in to a block are resolution candidates. - VarSetOps::UnionD(compiler, resolutionCandidateVars, currentLiveVars); - - if (!blockInfo[block->bbNum].hasEHBoundaryIn) + // Handle special cases for live-in. + // If this block hasEHBoundaryIn, then we will mark the recentRefPosition of each EH Var preemptively as + // spillAfter, since we don't want them to remain in registers. + // Otherwise, determine if we need any DummyDefs. + // We need DummyDefs for cases where "predBlock" isn't really a predecessor. + // Note that it's possible to have uses of unitialized variables, in which case even the first + // block may require DummyDefs, which we are not currently adding - this means that these variables + // will always be considered to be in memory on entry (and reloaded when the use is encountered). + // TODO-CQ: Consider how best to tune this. Currently, if we create DummyDefs for uninitialized + // variables (which may actually be initialized along the dynamically executed paths, but not + // on all static paths), we wind up with excessive liveranges for some of these variables. + + if (blockInfo[block->bbNum].hasEHBoundaryIn) { - // Determine if we need any DummyDefs. - // We need DummyDefs for cases where "predBlock" isn't really a predecessor. - // Note that it's possible to have uses of unitialized variables, in which case even the first - // block may require DummyDefs, which we are not currently adding - this means that these variables - // will always be considered to be in memory on entry (and reloaded when the use is encountered). - // TODO-CQ: Consider how best to tune this. Currently, if we create DummyDefs for uninitialized - // variables (which may actually be initialized along the dynamically executed paths, but not - // on all static paths), we wind up with excessive liveranges for some of these variables. - - VARSET_TP newLiveIn(VarSetOps::MakeCopy(compiler, currentLiveVars)); - if (predBlock != nullptr) + VARSET_TP liveInEHVars(VarSetOps::Intersection(compiler, currentLiveVars, exceptVars)); + VarSetOps::Iter iter(compiler, liveInEHVars); + unsigned varIndex = 0; + while (iter.NextElem(&varIndex)) { - // Compute set difference: newLiveIn = currentLiveVars - predBlock->bbLiveOut - VarSetOps::DiffD(compiler, newLiveIn, predBlock->bbLiveOut); + Interval* interval = getIntervalForLocalVar(varIndex); + if (interval->recentRefPosition != nullptr) + { + JITDUMP(" Marking RP #%d of V%02u as spillAfter\n", interval->recentRefPosition->rpNum, + interval->varNum); + interval->recentRefPosition->spillAfter; + } } - bool needsDummyDefs = (!VarSetOps::IsEmpty(compiler, newLiveIn) && block != compiler->fgFirstBB); - - // Create dummy def RefPositions + } + else + { + // Any lclVars live-in on a non-EH boundary edge are resolution candidates. + VarSetOps::UnionD(compiler, resolutionCandidateVars, currentLiveVars); - if (needsDummyDefs) + if (block != compiler->fgFirstBB) { - // If we are using locations from a predecessor, we should never require DummyDefs. - assert(!predBlockIsAllocated); + VARSET_TP newLiveIn(VarSetOps::MakeCopy(compiler, currentLiveVars)); + if (predBlock != nullptr) + { + // Compute set difference: newLiveIn = currentLiveVars - predBlock->bbLiveOut + VarSetOps::DiffD(compiler, newLiveIn, predBlock->bbLiveOut); + } + // Don't create dummy defs for EH vars; we'll load them from the stack as/when needed. + VarSetOps::DiffD(compiler, newLiveIn, exceptVars); + + // Create dummy def RefPositions - JITDUMP("Creating dummy definitions\n"); - VarSetOps::Iter iter(compiler, newLiveIn); - unsigned varIndex = 0; - while (iter.NextElem(&varIndex)) + if (!VarSetOps::IsEmpty(compiler, newLiveIn)) { - // Add a dummyDef for any candidate vars that are in the "newLiveIn" set. - LclVarDsc* varDsc = compiler->lvaGetDescByTrackedIndex(varIndex); - assert(isCandidateVar(varDsc)); - Interval* interval = getIntervalForLocalVar(varIndex); - RefPosition* pos = newRefPosition(interval, currentLoc, RefTypeDummyDef, nullptr, - allRegs(interval->registerType)); - pos->setRegOptional(true); + // If we are using locations from a predecessor, we should never require DummyDefs. + assert(!predBlockIsAllocated); + + JITDUMP("Creating dummy definitions\n"); + VarSetOps::Iter iter(compiler, newLiveIn); + unsigned varIndex = 0; + while (iter.NextElem(&varIndex)) + { + // Add a dummyDef for any candidate vars that are in the "newLiveIn" set. + LclVarDsc* varDsc = compiler->lvaGetDescByTrackedIndex(varIndex); + assert(isCandidateVar(varDsc)); + Interval* interval = getIntervalForLocalVar(varIndex); + RefPosition* pos = newRefPosition(interval, currentLoc, RefTypeDummyDef, nullptr, + allRegs(interval->registerType)); + pos->setRegOptional(true); + } + JITDUMP("Finished creating dummy definitions\n\n"); } - JITDUMP("Finished creating dummy definitions\n\n"); } } } @@ -2157,6 +2213,23 @@ void LinearScan::buildIntervals() currentLoc += 2; JITDUMP("\n"); + if (firstColdLoc == MaxLocation) + { + if (block->isRunRarely()) + { + firstColdLoc = currentLoc; + JITDUMP("firstColdLoc = %d\n", firstColdLoc); + } + } + else + { + // TODO: We'd like to assert the following but we don't currently ensure that only + // "RunRarely" blocks are contiguous. + // (The funclets will generally be last, but we don't follow layout order, so we + // don't have to preserve that in the block sequence.) + // assert(block->isRunRarely()); + } + LIR::Range& blockRange = LIR::AsRange(block); for (GenTree* node : blockRange.NonPhiNodes()) { @@ -2211,85 +2284,80 @@ void LinearScan::buildIntervals() if (enregisterLocalVars) { - // We don't need exposed uses for an EH edge, because no lclVars will be kept in - // registers across such edges. - if (!blockInfo[block->bbNum].hasEHBoundaryOut) + // Insert exposed uses for a lclVar that is live-out of 'block' but not live-in to the + // next block, or any unvisited successors. + // This will address lclVars that are live on a backedge, as well as those that are kept + // live at a GT_JMP. + // + // Blocks ending with "jmp method" are marked as BBJ_HAS_JMP, + // and jmp call is represented using GT_JMP node which is a leaf node. + // Liveness phase keeps all the arguments of the method live till the end of + // block by adding them to liveout set of the block containing GT_JMP. + // + // The target of a GT_JMP implicitly uses all the current method arguments, however + // there are no actual references to them. This can cause LSRA to assert, because + // the variables are live but it sees no references. In order to correctly model the + // liveness of these arguments, we add dummy exposed uses, in the same manner as for + // backward branches. This will happen automatically via expUseSet. + // + // Note that a block ending with GT_JMP has no successors and hence the variables + // for which dummy use ref positions are added are arguments of the method. + + VARSET_TP expUseSet(VarSetOps::MakeCopy(compiler, block->bbLiveOut)); + VarSetOps::IntersectionD(compiler, expUseSet, registerCandidateVars); + BasicBlock* nextBlock = getNextBlock(); + if (nextBlock != nullptr) { - // Insert exposed uses for a lclVar that is live-out of 'block' but not live-in to the - // next block, or any unvisited successors. - // This will address lclVars that are live on a backedge, as well as those that are kept - // live at a GT_JMP. - // - // Blocks ending with "jmp method" are marked as BBJ_HAS_JMP, - // and jmp call is represented using GT_JMP node which is a leaf node. - // Liveness phase keeps all the arguments of the method live till the end of - // block by adding them to liveout set of the block containing GT_JMP. - // - // The target of a GT_JMP implicitly uses all the current method arguments, however - // there are no actual references to them. This can cause LSRA to assert, because - // the variables are live but it sees no references. In order to correctly model the - // liveness of these arguments, we add dummy exposed uses, in the same manner as for - // backward branches. This will happen automatically via expUseSet. - // - // Note that a block ending with GT_JMP has no successors and hence the variables - // for which dummy use ref positions are added are arguments of the method. - - VARSET_TP expUseSet(VarSetOps::MakeCopy(compiler, block->bbLiveOut)); - VarSetOps::IntersectionD(compiler, expUseSet, registerCandidateVars); - BasicBlock* nextBlock = getNextBlock(); - if (nextBlock != nullptr) - { - VarSetOps::DiffD(compiler, expUseSet, nextBlock->bbLiveIn); - } - for (BasicBlock* succ : block->GetAllSuccs(compiler)) + VarSetOps::DiffD(compiler, expUseSet, nextBlock->bbLiveIn); + } + for (BasicBlock* succ : block->GetAllSuccs(compiler)) + { + if (VarSetOps::IsEmpty(compiler, expUseSet)) { - if (VarSetOps::IsEmpty(compiler, expUseSet)) - { - break; - } - - if (isBlockVisited(succ)) - { - continue; - } - VarSetOps::DiffD(compiler, expUseSet, succ->bbLiveIn); + break; } - if (!VarSetOps::IsEmpty(compiler, expUseSet)) + if (isBlockVisited(succ)) { - JITDUMP("Exposed uses:"); - VarSetOps::Iter iter(compiler, expUseSet); - unsigned varIndex = 0; - while (iter.NextElem(&varIndex)) - { - LclVarDsc* varDsc = compiler->lvaGetDescByTrackedIndex(varIndex); - assert(isCandidateVar(varDsc)); - Interval* interval = getIntervalForLocalVar(varIndex); - regMaskTP regMask = allRegs(interval->registerType); - RefPosition* pos = newRefPosition(interval, currentLoc, RefTypeExpUse, nullptr, regMask); - pos->setRegOptional(true); - JITDUMP(" V%02u", compiler->lvaTrackedIndexToLclNum(varIndex)); - } - JITDUMP("\n"); + continue; } + VarSetOps::DiffD(compiler, expUseSet, succ->bbLiveIn); } - // Clear the "last use" flag on any vars that are live-out from this block. + if (!VarSetOps::IsEmpty(compiler, expUseSet)) { - VARSET_TP bbLiveDefs(VarSetOps::Intersection(compiler, registerCandidateVars, block->bbLiveOut)); - VarSetOps::Iter iter(compiler, bbLiveDefs); + JITDUMP("Exposed uses:"); + VarSetOps::Iter iter(compiler, expUseSet); unsigned varIndex = 0; while (iter.NextElem(&varIndex)) { - LclVarDsc* const varDsc = compiler->lvaGetDescByTrackedIndex(varIndex); + unsigned varNum = compiler->lvaTrackedToVarNum[varIndex]; + LclVarDsc* varDsc = compiler->lvaTable + varNum; assert(isCandidateVar(varDsc)); - RefPosition* const lastRP = getIntervalForLocalVar(varIndex)->lastRefPosition; - // We should be able to assert that lastRP is non-null if it is live-out, but sometimes liveness - // lies. - if ((lastRP != nullptr) && (lastRP->bbNum == block->bbNum)) - { - lastRP->lastUse = false; - } + Interval* interval = getIntervalForLocalVar(varIndex); + RefPosition* pos = + newRefPosition(interval, currentLoc, RefTypeExpUse, nullptr, allRegs(interval->registerType)); + pos->setRegOptional(true); + JITDUMP(" V%02u", varNum); + } + JITDUMP("\n"); + } + + // Clear the "last use" flag on any vars that are live-out from this block. + VARSET_TP bbLiveDefs(VarSetOps::Intersection(compiler, registerCandidateVars, block->bbLiveOut)); + VarSetOps::Iter iter(compiler, bbLiveDefs); + unsigned varIndex = 0; + while (iter.NextElem(&varIndex)) + { + unsigned varNum = compiler->lvaTrackedToVarNum[varIndex]; + LclVarDsc* const varDsc = &compiler->lvaTable[varNum]; + assert(isCandidateVar(varDsc)); + RefPosition* const lastRP = getIntervalForLocalVar(varIndex)->lastRefPosition; + // We should be able to assert that lastRP is non-null if it is live-out, but sometimes liveness + // lies. + if ((lastRP != nullptr) && (lastRP->bbNum == block->bbNum)) + { + lastRP->lastUse = false; } } @@ -2327,6 +2395,62 @@ void LinearScan::buildIntervals() pos->setRegOptional(true); } } + // Adjust heuristics for writeThru intervals. + if (compiler->compHndBBtabCount > 0) + { + VarSetOps::Iter iter(compiler, exceptVars); + unsigned varIndex = 0; + while (iter.NextElem(&varIndex)) + { + unsigned varNum = compiler->lvaTrackedToVarNum[varIndex]; + LclVarDsc* varDsc = compiler->lvaTable + varNum; + Interval* interval = getIntervalForLocalVar(varIndex); + assert(interval->isWriteThru); + BasicBlock::weight_t weight = varDsc->lvRefCntWtd(); + + // We'd like to only allocate registers for EH vars that have enough uses + // to compensate for the additional registers being live (and for the possibility + // that we may have to insert an additional copy). + // However, we don't currently have that information available. Instead, we'll + // aggressively assume that these vars are defined once, at their first RefPosition. + // + RefPosition* firstRefPosition = interval->firstRefPosition; + + // Incoming reg args are given an initial weight of 2 * BB_UNITY_WEIGHT + // (see lvaComputeRefCounts(); this may be reviewed/changed in future). + // + BasicBlock::weight_t initialWeight = (firstRefPosition->refType == RefTypeParamDef) + ? (2 * BB_UNITY_WEIGHT) + : blockInfo[firstRefPosition->bbNum].weight; + weight -= initialWeight; + + // If the remaining weight is less than the initial weight, we'd like to allocate it only + // opportunistically, but we don't currently have a mechanism to do so. + // For now, we'll just avoid using callee-save registers if the weight is too low. + if (interval->preferCalleeSave) + { + // The benefit of a callee-save register isn't as high as it would be for a normal arg. + // We'll have at least the cost of saving & restoring the callee-save register, + // so we won't break even until we have at least 4 * BB_UNITY_WEIGHT. + // Given that we also don't have a good way to tell whether the variable is live + // across a call in the non-EH code, we'll be extra conservative about this. + // Note that for writeThru intervals we don't update the preferences to be only callee-save. + unsigned calleeSaveCount = + (varTypeIsFloating(interval->registerType)) ? CNT_CALLEE_SAVED_FLOAT : CNT_CALLEE_ENREG; + if ((weight <= (BB_UNITY_WEIGHT * 7)) || varDsc->lvVarIndex >= calleeSaveCount) + { + // If this is relatively low weight, don't prefer callee-save at all. + interval->preferCalleeSave = false; + } + else + { + // In other cases, we'll add in the callee-save regs to the preferences, but not clear + // the non-callee-save regs . We also handle this case specially in tryAllocateFreeReg(). + interval->registerPreferences |= calleeSaveRegs(interval->registerType); + } + } + } + } #ifdef DEBUG if (getLsraExtendLifeTimes()) @@ -3023,7 +3147,20 @@ int LinearScan::BuildStoreLoc(GenTreeLclVarCommon* storeLoc) srcInterval->assignRelatedInterval(varDefInterval); } } - newRefPosition(varDefInterval, currentLoc + 1, RefTypeDef, storeLoc, allRegs(storeLoc->TypeGet())); + RefPosition* def = + newRefPosition(varDefInterval, currentLoc + 1, RefTypeDef, storeLoc, allRegs(storeLoc->TypeGet())); + if (varDefInterval->isWriteThru) + { + // We always make write-thru defs reg-optional, as we can store them if they don't + // get a register. + def->regOptional = true; + } +#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE + if (varTypeNeedsPartialCalleeSave(varDefInterval->registerType)) + { + varDefInterval->isPartiallySpilled = false; + } +#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE } return srcCount; diff --git a/src/coreclr/src/jit/treelifeupdater.cpp b/src/coreclr/src/jit/treelifeupdater.cpp index 3396948705aa9..f373e0c89a18e 100644 --- a/src/coreclr/src/jit/treelifeupdater.cpp +++ b/src/coreclr/src/jit/treelifeupdater.cpp @@ -96,11 +96,13 @@ void TreeLifeUpdater::UpdateLifeVar(GenTree* tree) { compiler->codeGen->genUpdateVarReg(varDsc, tree); } - if (varDsc->lvIsInReg() && tree->GetRegNum() != REG_NA) + bool isInReg = varDsc->lvIsInReg() && tree->GetRegNum() != REG_NA; + bool isInMemory = !isInReg || varDsc->lvLiveInOutOfHndlr; + if (isInReg) { compiler->codeGen->genUpdateRegLife(varDsc, isBorn, isDying DEBUGARG(tree)); } - else + if (isInMemory) { VarSetOps::AddElemD(compiler, stackVarDeltaSet, varDsc->lvVarIndex); } @@ -131,6 +133,8 @@ void TreeLifeUpdater::UpdateLifeVar(GenTree* tree) if (fldVarDsc->lvTracked) { unsigned fldVarIndex = fldVarDsc->lvVarIndex; + bool isInReg = fldVarDsc->lvIsInReg(); + bool isInMemory = !isInReg || fldVarDsc->lvLiveInOutOfHndlr; noway_assert(fldVarIndex < compiler->lvaTrackedCount); if (!hasDeadTrackedFieldVars) { @@ -139,7 +143,7 @@ void TreeLifeUpdater::UpdateLifeVar(GenTree* tree) { // We repeat this call here and below to avoid the VarSetOps::IsMember // test in this, the common case, where we have no deadTrackedFieldVars. - if (fldVarDsc->lvIsInReg()) + if (isInReg) { if (isBorn) { @@ -147,7 +151,7 @@ void TreeLifeUpdater::UpdateLifeVar(GenTree* tree) } compiler->codeGen->genUpdateRegLife(fldVarDsc, isBorn, isDying DEBUGARG(tree)); } - else + if (isInMemory) { VarSetOps::AddElemD(compiler, stackVarDeltaSet, fldVarIndex); } @@ -155,7 +159,7 @@ void TreeLifeUpdater::UpdateLifeVar(GenTree* tree) } else if (ForCodeGen && VarSetOps::IsMember(compiler, varDeltaSet, fldVarIndex)) { - if (compiler->lvaTable[i].lvIsInReg()) + if (isInReg) { if (isBorn) { @@ -163,7 +167,7 @@ void TreeLifeUpdater::UpdateLifeVar(GenTree* tree) } compiler->codeGen->genUpdateRegLife(fldVarDsc, isBorn, isDying DEBUGARG(tree)); } - else + if (isInMemory) { VarSetOps::AddElemD(compiler, stackVarDeltaSet, fldVarIndex); }