diff --git a/src/coreclr/src/jit/codegencommon.cpp b/src/coreclr/src/jit/codegencommon.cpp index f15259334eb72..1af9bc029c5be 100644 --- a/src/coreclr/src/jit/codegencommon.cpp +++ b/src/coreclr/src/jit/codegencommon.cpp @@ -504,7 +504,10 @@ void CodeGenInterface::genUpdateRegLife(const LclVarDsc* varDsc, bool isBorn, bo } else { - assert((regSet.GetMaskVars() & regMask) == 0); + // If this is going live, the register must not have a variable in it, except + // in the case of an exception variable, which may be already treated as live + // in the register. + assert(varDsc->lvLiveInOutOfHndlr || ((regSet.GetMaskVars() & regMask) == 0)); regSet.AddMaskVars(regMask); } } @@ -681,12 +684,14 @@ void Compiler::compChangeLife(VARSET_VALARG_TP newLife) unsigned deadVarIndex = 0; while (deadIter.NextElem(&deadVarIndex)) { - unsigned varNum = lvaTrackedIndexToLclNum(deadVarIndex); - LclVarDsc* varDsc = lvaGetDesc(varNum); - bool isGCRef = (varDsc->TypeGet() == TYP_REF); - bool isByRef = (varDsc->TypeGet() == TYP_BYREF); + unsigned varNum = lvaTrackedIndexToLclNum(deadVarIndex); + LclVarDsc* varDsc = lvaGetDesc(varNum); + bool isGCRef = (varDsc->TypeGet() == TYP_REF); + bool isByRef = (varDsc->TypeGet() == TYP_BYREF); + bool isInReg = varDsc->lvIsInReg(); + bool isInMemory = !isInReg || varDsc->lvLiveInOutOfHndlr; - if (varDsc->lvIsInReg()) + if (isInReg) { // TODO-Cleanup: Move the code from compUpdateLifeVar to genUpdateRegLife that updates the // gc sets @@ -701,8 +706,8 @@ void Compiler::compChangeLife(VARSET_VALARG_TP newLife) } codeGen->genUpdateRegLife(varDsc, false /*isBorn*/, true /*isDying*/ DEBUGARG(nullptr)); } - // This isn't in a register, so update the gcVarPtrSetCur. - else if (isGCRef || isByRef) + // Update the gcVarPtrSetCur if it is in memory. + if (isInMemory && (isGCRef || isByRef)) { VarSetOps::RemoveElemD(this, codeGen->gcInfo.gcVarPtrSetCur, deadVarIndex); JITDUMP("\t\t\t\t\t\t\tV%02u becoming dead\n", varNum); @@ -724,13 +729,18 @@ void Compiler::compChangeLife(VARSET_VALARG_TP newLife) if (varDsc->lvIsInReg()) { -#ifdef DEBUG - if (VarSetOps::IsMember(this, codeGen->gcInfo.gcVarPtrSetCur, bornVarIndex)) + // If this variable is going live in a register, it is no longer live on the stack, + // unless it is an EH var, which always remains live on the stack. + if (!varDsc->lvLiveInOutOfHndlr) { - JITDUMP("\t\t\t\t\t\t\tRemoving V%02u from gcVarPtrSetCur\n", varNum); - } +#ifdef DEBUG + if (VarSetOps::IsMember(this, codeGen->gcInfo.gcVarPtrSetCur, bornVarIndex)) + { + JITDUMP("\t\t\t\t\t\t\tRemoving V%02u from gcVarPtrSetCur\n", varNum); + } #endif // DEBUG - VarSetOps::RemoveElemD(this, codeGen->gcInfo.gcVarPtrSetCur, bornVarIndex); + VarSetOps::RemoveElemD(this, codeGen->gcInfo.gcVarPtrSetCur, bornVarIndex); + } codeGen->genUpdateRegLife(varDsc, true /*isBorn*/, false /*isDying*/ DEBUGARG(nullptr)); regMaskTP regMask = varDsc->lvRegMask(); if (isGCRef) @@ -742,9 +752,9 @@ void Compiler::compChangeLife(VARSET_VALARG_TP newLife) codeGen->gcInfo.gcRegByrefSetCur |= regMask; } } - // This isn't in a register, so update the gcVarPtrSetCur else if (lvaIsGCTracked(varDsc)) { + // This isn't in a register, so update the gcVarPtrSetCur to show that it's live on the stack. VarSetOps::AddElemD(this, codeGen->gcInfo.gcVarPtrSetCur, bornVarIndex); JITDUMP("\t\t\t\t\t\t\tV%02u becoming live\n", varNum); } @@ -3269,6 +3279,7 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbere // 1 means the first part of a register argument // 2, 3 or 4 means the second,third or fourth part of a multireg argument bool stackArg; // true if the argument gets homed to the stack + bool writeThru; // true if the argument gets homed to both stack and register bool processed; // true after we've processed the argument (and it is in its final location) bool circular; // true if this register participates in a circular dependency loop. @@ -3605,6 +3616,7 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbere } regArgTab[regArgNum + i].processed = false; + regArgTab[regArgNum + i].writeThru = (varDsc->lvIsInReg() && varDsc->lvLiveInOutOfHndlr); /* mark stack arguments since we will take care of those first */ regArgTab[regArgNum + i].stackArg = (varDsc->lvIsInReg()) ? false : true; @@ -3765,9 +3777,9 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbere noway_assert(((regArgMaskLive & RBM_FLTARG_REGS) == 0) && "Homing of float argument registers with circular dependencies not implemented."); - /* Now move the arguments to their locations. - * First consider ones that go on the stack since they may - * free some registers. */ + // Now move the arguments to their locations. + // First consider ones that go on the stack since they may free some registers. + // Also home writeThru args, since they're also homed to the stack. regArgMaskLive = regState->rsCalleeRegArgMaskLiveIn; // reset the live in to what it was at the start for (argNum = 0; argNum < argMax; argNum++) @@ -3805,7 +3817,7 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbere // If this arg is never on the stack, go to the next one. if (varDsc->lvType == TYP_LONG) { - if (regArgTab[argNum].slot == 1 && !regArgTab[argNum].stackArg) + if (regArgTab[argNum].slot == 1 && !regArgTab[argNum].stackArg && !regArgTab[argNum].writeThru) { continue; } @@ -3839,7 +3851,7 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbere noway_assert(varDsc->lvIsParam); noway_assert(varDsc->lvIsRegArg); - noway_assert(varDsc->lvIsInReg() == false || + noway_assert(varDsc->lvIsInReg() == false || varDsc->lvLiveInOutOfHndlr || (varDsc->lvType == TYP_LONG && varDsc->GetOtherReg() == REG_STK && regArgTab[argNum].slot == 2)); var_types storeType = TYP_UNDEF; @@ -3906,13 +3918,17 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbere #endif // USING_SCOPE_INFO } - /* mark the argument as processed */ - - regArgTab[argNum].processed = true; - regArgMaskLive &= ~genRegMask(srcRegNum); + // Mark the argument as processed, and set it as no longer live in srcRegNum, + // unless it is a writeThru var, in which case we home it to the stack, but + // don't mark it as processed until below. + if (!regArgTab[argNum].writeThru) + { + regArgTab[argNum].processed = true; + regArgMaskLive &= ~genRegMask(srcRegNum); + } #if defined(TARGET_ARM) - if (storeType == TYP_DOUBLE) + if ((storeType == TYP_DOUBLE) && !regArgTab[argNum].writeThru) { regArgTab[argNum + 1].processed = true; regArgMaskLive &= ~genRegMask(REG_NEXT(srcRegNum)); @@ -4618,7 +4634,7 @@ void CodeGen::genCheckUseBlockInit() { if (!varDsc->lvRegister) { - if (!varDsc->lvIsInReg()) + if (!varDsc->lvIsInReg() || varDsc->lvLiveInOutOfHndlr) { // Var is on the stack at entry. initStkLclCnt += @@ -7233,7 +7249,9 @@ void CodeGen::genFnProlog() continue; } - if (varDsc->lvIsInReg()) + bool isInReg = varDsc->lvIsInReg(); + bool isInMemory = !isInReg || varDsc->lvLiveInOutOfHndlr; + if (isInReg) { regMaskTP regMask = genRegMask(varDsc->GetRegNum()); if (!varDsc->IsFloatRegType()) @@ -7264,7 +7282,7 @@ void CodeGen::genFnProlog() initFltRegs |= regMask; } } - else + if (isInMemory) { INIT_STK: diff --git a/src/coreclr/src/jit/codegenlinear.cpp b/src/coreclr/src/jit/codegenlinear.cpp index c6f7b6f8c483c..e3973ba565119 100644 --- a/src/coreclr/src/jit/codegenlinear.cpp +++ b/src/coreclr/src/jit/codegenlinear.cpp @@ -239,15 +239,18 @@ void CodeGen::genCodeForBBlist() { newRegByrefSet |= varDsc->lvRegMask(); } -#ifdef DEBUG - if (verbose && VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varIndex)) + if (!varDsc->lvLiveInOutOfHndlr) { - VarSetOps::AddElemD(compiler, removedGCVars, varIndex); - } +#ifdef DEBUG + if (verbose && VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varIndex)) + { + VarSetOps::AddElemD(compiler, removedGCVars, varIndex); + } #endif // DEBUG - VarSetOps::RemoveElemD(compiler, gcInfo.gcVarPtrSetCur, varIndex); + VarSetOps::RemoveElemD(compiler, gcInfo.gcVarPtrSetCur, varIndex); + } } - else if (compiler->lvaIsGCTracked(varDsc)) + if ((!varDsc->lvIsInReg() || varDsc->lvLiveInOutOfHndlr) && compiler->lvaIsGCTracked(varDsc)) { #ifdef DEBUG if (verbose && !VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varIndex)) @@ -823,10 +826,20 @@ void CodeGen::genSpillVar(GenTree* tree) var_types lclTyp = genActualType(varDsc->TypeGet()); emitAttr size = emitTypeSize(lclTyp); - instruction storeIns = ins_Store(lclTyp, compiler->isSIMDTypeLocalAligned(varNum)); - assert(varDsc->GetRegNum() == tree->GetRegNum()); - inst_TT_RV(storeIns, size, tree, tree->GetRegNum()); + // If this is a write-thru variable, we don't actually spill at a use, but we will kill the var in the reg + // (below). + if (!varDsc->lvLiveInOutOfHndlr) + { + instruction storeIns = ins_Store(lclTyp, compiler->isSIMDTypeLocalAligned(varNum)); + assert(varDsc->GetRegNum() == tree->GetRegNum()); + inst_TT_RV(storeIns, size, tree, tree->GetRegNum()); + } + // We should only have both GTF_SPILL (i.e. the flag causing this method to be called) and + // GTF_SPILLED on a write-thru def, for which we should not be calling this method. + assert((tree->gtFlags & GTF_SPILLED) == 0); + + // Remove the live var from the register. genUpdateRegLife(varDsc, /*isBorn*/ false, /*isDying*/ true DEBUGARG(tree)); gcInfo.gcMarkRegSetNpt(varDsc->lvRegMask()); @@ -847,10 +860,19 @@ void CodeGen::genSpillVar(GenTree* tree) } tree->gtFlags &= ~GTF_SPILL; - varDsc->SetRegNum(REG_STK); - if (varTypeIsMultiReg(tree)) + // If this is NOT a write-thru, reset the var location. + if ((tree->gtFlags & GTF_SPILLED) == 0) { - varDsc->SetOtherReg(REG_STK); + varDsc->SetRegNum(REG_STK); + if (varTypeIsMultiReg(tree)) + { + varDsc->SetOtherReg(REG_STK); + } + } + else + { + // We only have 'GTF_SPILL' and 'GTF_SPILLED' on a def of a write-thru lclVar. + assert(varDsc->lvLiveInOutOfHndlr && ((tree->gtFlags & GTF_VAR_DEF) != 0)); } #ifdef USING_VARIABLE_LIVE_RANGE @@ -1030,13 +1052,16 @@ void CodeGen::genUnspillRegIfNeeded(GenTree* tree) } #endif // USING_VARIABLE_LIVE_RANGE -#ifdef DEBUG - if (VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex)) + if (!varDsc->lvLiveInOutOfHndlr) { - JITDUMP("\t\t\t\t\t\t\tRemoving V%02u from gcVarPtrSetCur\n", lcl->GetLclNum()); - } +#ifdef DEBUG + if (VarSetOps::IsMember(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex)) + { + JITDUMP("\t\t\t\t\t\t\tRemoving V%02u from gcVarPtrSetCur\n", lcl->GetLclNum()); + } #endif // DEBUG - VarSetOps::RemoveElemD(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex); + VarSetOps::RemoveElemD(compiler, gcInfo.gcVarPtrSetCur, varDsc->lvVarIndex); + } #ifdef DEBUG if (compiler->verbose) @@ -1316,15 +1341,15 @@ regNumber CodeGen::genConsumeReg(GenTree* tree) LclVarDsc* varDsc = &compiler->lvaTable[lcl->GetLclNum()]; assert(varDsc->lvLRACandidate); - if ((tree->gtFlags & GTF_VAR_DEATH) != 0) - { - gcInfo.gcMarkRegSetNpt(genRegMask(varDsc->GetRegNum())); - } - else if (varDsc->GetRegNum() == REG_STK) + if (varDsc->GetRegNum() == REG_STK) { // We have loaded this into a register only temporarily gcInfo.gcMarkRegSetNpt(genRegMask(tree->GetRegNum())); } + else if ((tree->gtFlags & GTF_VAR_DEATH) != 0) + { + gcInfo.gcMarkRegSetNpt(genRegMask(varDsc->GetRegNum())); + } } else { @@ -1852,13 +1877,24 @@ void CodeGen::genProduceReg(GenTree* tree) if (genIsRegCandidateLocal(tree)) { - // Store local variable to its home location. - // Ensure that lclVar stores are typed correctly. - unsigned varNum = tree->AsLclVarCommon()->GetLclNum(); - assert(!compiler->lvaTable[varNum].lvNormalizeOnStore() || - (tree->TypeGet() == genActualType(compiler->lvaTable[varNum].TypeGet()))); - inst_TT_RV(ins_Store(tree->gtType, compiler->isSIMDTypeLocalAligned(varNum)), emitTypeSize(tree->TypeGet()), - tree, tree->GetRegNum()); + unsigned varNum = tree->AsLclVarCommon()->GetLclNum(); + LclVarDsc* varDsc = compiler->lvaGetDesc(varNum); + assert(!varDsc->lvNormalizeOnStore() || (tree->TypeGet() == genActualType(varDsc->TypeGet()))); + + // If we reach here, we have a register candidate local that is marked with GTF_SPILL. + // This flag generally means that we need to spill this local. + // The exception is the case of a use of an EH var use that is being "spilled" + // to the stack, indicated by GTF_SPILL (note that all EH lclVar defs are always + // spilled, i.e. write-thru). + // An EH var use is always valid on the stack (so we don't need to actually spill it), + // but the GTF_SPILL flag records the fact that the register value is going dead. + if (((tree->gtFlags & GTF_VAR_DEF) != 0) || !varDsc->lvLiveInOutOfHndlr) + { + // Store local variable to its home location. + // Ensure that lclVar stores are typed correctly. + inst_TT_RV(ins_Store(tree->gtType, compiler->isSIMDTypeLocalAligned(varNum)), + emitTypeSize(tree->TypeGet()), tree, tree->GetRegNum()); + } } else { diff --git a/src/coreclr/src/jit/compiler.cpp b/src/coreclr/src/jit/compiler.cpp index c066d2b1be9f1..7eaf9f5cf3258 100644 --- a/src/coreclr/src/jit/compiler.cpp +++ b/src/coreclr/src/jit/compiler.cpp @@ -4522,6 +4522,37 @@ void Compiler::compCompile(void** methodCodePtr, ULONG* methodCodeSize, JitFlags EndPhase(PHASE_CLONE_FINALLY); +#if DEBUG + if (lvaEnregEHVars) + { + unsigned methHash = info.compMethodHash(); + char* lostr = getenv("JitEHWTHashLo"); + unsigned methHashLo = 0; + bool dump = false; + if (lostr != nullptr) + { + sscanf_s(lostr, "%x", &methHashLo); + dump = true; + } + char* histr = getenv("JitEHWTHashHi"); + unsigned methHashHi = UINT32_MAX; + if (histr != nullptr) + { + sscanf_s(histr, "%x", &methHashHi); + dump = true; + } + if (methHash < methHashLo || methHash > methHashHi) + { + lvaEnregEHVars = false; + } + else if (dump) + { + printf("Enregistering EH Vars for method %s, hash = 0x%x.\n", info.compFullName, info.compMethodHash()); + printf(""); // flush + } + } +#endif + // Compute bbNum, bbRefs and bbPreds // JITDUMP("\nRenumbering the basic blocks for fgComputePreds\n"); diff --git a/src/coreclr/src/jit/compiler.h b/src/coreclr/src/jit/compiler.h index d8aff229e8156..0e55e8172f69b 100644 --- a/src/coreclr/src/jit/compiler.h +++ b/src/coreclr/src/jit/compiler.h @@ -415,6 +415,8 @@ class LclVarDsc unsigned char lvDoNotEnregister : 1; // Do not enregister this variable. unsigned char lvFieldAccessed : 1; // The var is a struct local, and a field of the variable is accessed. Affects // struct promotion. + unsigned char lvLiveInOutOfHndlr : 1; // The variable is live in or out of an exception handler, and therefore must + // be on the stack (at least at those boundaries.) unsigned char lvInSsa : 1; // The variable is in SSA form (set by SsaBuilder) @@ -424,9 +426,6 @@ class LclVarDsc // also, lvType == TYP_STRUCT prevents enregistration. At least one of the reasons should be true. unsigned char lvVMNeedsStackAddr : 1; // The VM may have access to a stack-relative address of the variable, and // read/write its value. - unsigned char lvLiveInOutOfHndlr : 1; // The variable was live in or out of an exception handler, and this required - // the variable to be - // in the stack (at least at those boundaries.) unsigned char lvLclFieldExpr : 1; // The variable is not a struct, but was accessed like one (e.g., reading a // particular byte from an int). unsigned char lvLclBlockOpAddr : 1; // The variable was written to via a block operation that took its address. @@ -3005,6 +3004,9 @@ class Compiler void lvaSetVarAddrExposed(unsigned varNum); void lvaSetVarLiveInOutOfHandler(unsigned varNum); bool lvaVarDoNotEnregister(unsigned varNum); + + bool lvaEnregEHVars; + #ifdef DEBUG // Reasons why we can't enregister. Some of these correspond to debug properties of local vars. enum DoNotEnregisterReason @@ -3027,6 +3029,7 @@ class Compiler DNER_PinningRef, #endif }; + #endif void lvaSetVarDoNotEnregister(unsigned varNum DEBUGARG(DoNotEnregisterReason reason)); diff --git a/src/coreclr/src/jit/gentree.cpp b/src/coreclr/src/jit/gentree.cpp index a9aac9d6d4c22..05c6e0e65f71e 100644 --- a/src/coreclr/src/jit/gentree.cpp +++ b/src/coreclr/src/jit/gentree.cpp @@ -2755,6 +2755,13 @@ bool Compiler::gtIsLikelyRegVar(GenTree* tree) return false; } + // If this is an EH-live var, return false if it is a def, + // as it will have to go to memory. + if (varDsc->lvLiveInOutOfHndlr && ((tree->gtFlags & GTF_VAR_DEF) != 0)) + { + return false; + } + // Be pessimistic if ref counts are not yet set up. // // Perhaps we should be optimistic though. diff --git a/src/coreclr/src/jit/instr.cpp b/src/coreclr/src/jit/instr.cpp index 7a0a6c63f37fd..2ee631fceeff8 100644 --- a/src/coreclr/src/jit/instr.cpp +++ b/src/coreclr/src/jit/instr.cpp @@ -662,6 +662,7 @@ void CodeGen::inst_TT_RV(instruction ins, emitAttr size, GenTree* tree, regNumbe #ifdef DEBUG // The tree must have a valid register value. assert(reg != REG_STK); + bool isValidInReg = ((tree->gtFlags & GTF_SPILLED) == 0); if (!isValidInReg) { diff --git a/src/coreclr/src/jit/jitconfigvalues.h b/src/coreclr/src/jit/jitconfigvalues.h index 688008f69717d..d3a888d79ad7f 100644 --- a/src/coreclr/src/jit/jitconfigvalues.h +++ b/src/coreclr/src/jit/jitconfigvalues.h @@ -244,6 +244,9 @@ CONFIG_INTEGER(EnablePOPCNT, W("EnablePOPCNT"), 1) // Enable POPCNT CONFIG_INTEGER(EnableAVX, W("EnableAVX"), 0) #endif // !defined(TARGET_AMD64) && !defined(TARGET_X86) +CONFIG_INTEGER(EnableEHWriteThru, W("EnableEHWriteThru"), 0) // Enable the register allocator to support EH-write thru: + // partial enregistration of vars exposed on EH boundaries + // clang-format off #if defined(TARGET_ARM64) diff --git a/src/coreclr/src/jit/lclvars.cpp b/src/coreclr/src/jit/lclvars.cpp index 4a5b2f1132efb..f38437012ff2b 100644 --- a/src/coreclr/src/jit/lclvars.cpp +++ b/src/coreclr/src/jit/lclvars.cpp @@ -85,6 +85,8 @@ void Compiler::lvaInit() lvaCurEpoch = 0; structPromotionHelper = new (this, CMK_Generic) StructPromotionHelper(this); + + lvaEnregEHVars = (((opts.compFlags & CLFLG_REGVAR) != 0) && JitConfig.EnableEHWriteThru()); } /*****************************************************************************/ @@ -2377,9 +2379,11 @@ void Compiler::lvaSetVarAddrExposed(unsigned varNum) // void Compiler::lvaSetVarLiveInOutOfHandler(unsigned varNum) { - LclVarDsc* varDsc = lvaGetDesc(varNum); + noway_assert(varNum < lvaCount); - INDEBUG(varDsc->lvLiveInOutOfHndlr = 1); + LclVarDsc* varDsc = &lvaTable[varNum]; + + varDsc->lvLiveInOutOfHndlr = 1; if (varDsc->lvPromoted) { @@ -2388,12 +2392,27 @@ void Compiler::lvaSetVarLiveInOutOfHandler(unsigned varNum) for (unsigned i = varDsc->lvFieldLclStart; i < varDsc->lvFieldLclStart + varDsc->lvFieldCnt; ++i) { noway_assert(lvaTable[i].lvIsStructField); - INDEBUG(lvaTable[i].lvLiveInOutOfHndlr = 1); - lvaSetVarDoNotEnregister(i DEBUGARG(DNER_LiveInOutOfHandler)); + lvaTable[i].lvLiveInOutOfHndlr = 1; + if (!lvaEnregEHVars) + { + lvaSetVarDoNotEnregister(i DEBUGARG(DNER_LiveInOutOfHandler)); + } } } - lvaSetVarDoNotEnregister(varNum DEBUGARG(DNER_LiveInOutOfHandler)); + if (!lvaEnregEHVars) + { + lvaSetVarDoNotEnregister(varNum DEBUGARG(DNER_LiveInOutOfHandler)); + } +#ifdef JIT32_GCENCODER + else if (lvaKeepAliveAndReportThis() && (varNum == info.compThisArg)) + { + // For the JIT32_GCENCODER, when lvaKeepAliveAndReportThis is true, we must either keep the "this" pointer + // in the same register for the entire method, or keep it on the stack. If it is EH-exposed, we can't ever + // keep it in a register, since it must also be live on the stack. Therefore, we won't attempt to allocate it. + lvaSetVarDoNotEnregister(varNum DEBUGARG(DNER_LiveInOutOfHandler)); + } +#endif // JIT32_GCENCODER } /***************************************************************************** @@ -4107,8 +4126,20 @@ void Compiler::lvaComputeRefCounts(bool isRecompute, bool setSlotNumbers) case GT_STORE_LCL_VAR: case GT_STORE_LCL_FLD: { - const unsigned lclNum = node->AsLclVarCommon()->GetLclNum(); - lvaTable[lclNum].incRefCnts(weight, this); + LclVarDsc* varDsc = lvaGetDesc(node->AsLclVarCommon()); + // If this is an EH var, use a zero weight for defs, so that we don't + // count those in our heuristic for register allocation, since they always + // must be stored, so there's no value in enregistering them at defs; only + // if there are enough uses to justify it. + if (varDsc->lvLiveInOutOfHndlr && !varDsc->lvDoNotEnregister && + ((node->gtFlags & GTF_VAR_DEF) != 0)) + { + varDsc->incRefCnts(0, this); + } + else + { + varDsc->incRefCnts(weight, this); + } break; } @@ -6826,7 +6857,7 @@ void Compiler::lvaDumpEntry(unsigned lclNum, FrameLayoutState curState, size_t r { printf("V"); } - if (varDsc->lvLiveInOutOfHndlr) + if (lvaEnregEHVars && varDsc->lvLiveInOutOfHndlr) { printf("H"); } diff --git a/src/coreclr/src/jit/lsra.cpp b/src/coreclr/src/jit/lsra.cpp index c2b03ace694c4..c3f647e3561d3 100644 --- a/src/coreclr/src/jit/lsra.cpp +++ b/src/coreclr/src/jit/lsra.cpp @@ -186,7 +186,16 @@ unsigned LinearScan::getWeight(RefPosition* refPos) if (refPos->getInterval()->isSpilled) { // Decrease the weight if the interval has already been spilled. - weight -= BB_UNITY_WEIGHT; + if (varDsc->lvLiveInOutOfHndlr) + { + // An EH var is always spilled at defs, and we'll decrease the weight by half, + // since only the reload is needed. + weight = weight / 2; + } + else + { + weight -= BB_UNITY_WEIGHT; + } } } else @@ -613,6 +622,7 @@ LinearScan::LinearScan(Compiler* theCompiler) { #ifdef DEBUG maxNodeLocation = 0; + firstColdLoc = MaxLocation; activeRefPosition = nullptr; // Get the value of the environment variable that controls stress for register allocation @@ -823,6 +833,7 @@ void LinearScan::setBlockSequence() blockInfo[block->bbNum].weight = block->getBBWeight(compiler); blockInfo[block->bbNum].hasEHBoundaryIn = block->hasEHBoundaryIn(); blockInfo[block->bbNum].hasEHBoundaryOut = block->hasEHBoundaryOut(); + blockInfo[block->bbNum].hasEHPred = false; #if TRACK_LSRA_STATS blockInfo[block->bbNum].spillCount = 0; @@ -847,12 +858,25 @@ void LinearScan::setBlockSequence() assert(!"Switch with single successor"); } } - if (block->isBBCallAlwaysPairTail() || (hasUniquePred && predBlock->hasEHBoundaryOut())) + + // We treat BBCallAlwaysPairTail blocks as having EH flow, since we can't + // insert resolution moves into those blocks. + if (block->isBBCallAlwaysPairTail()) { - // Treat this as having incoming EH flow, since we can't insert resolution moves into - // the ALWAYS block of a BBCallAlwaysPair, and a unique pred with an EH out edge won't - // allow us to keep any variables enregistered. - blockInfo[block->bbNum].hasEHBoundaryIn = true; + blockInfo[block->bbNum].hasEHBoundaryIn = true; + blockInfo[block->bbNum].hasEHBoundaryOut = true; + } + else if (predBlock->hasEHBoundaryOut() || predBlock->isBBCallAlwaysPairTail()) + { + if (hasUniquePred) + { + // A unique pred with an EH out edge won't allow us to keep any variables enregistered. + blockInfo[block->bbNum].hasEHBoundaryIn = true; + } + else + { + blockInfo[block->bbNum].hasEHPred = true; + } } } @@ -974,6 +998,10 @@ void LinearScan::setBlockSequence() { JITDUMP(" EH-out"); } + if (blockInfo[block->bbNum].hasEHPred) + { + JITDUMP(" has EH pred"); + } JITDUMP("\n"); } JITDUMP("\n"); @@ -1351,9 +1379,6 @@ void Interval::setLocalNumber(Compiler* compiler, unsigned lclNum, LinearScan* l // void LinearScan::identifyCandidatesExceptionDataflow() { -#ifdef DEBUG - VARSET_TP finallyVars(VarSetOps::MakeEmpty(compiler)); -#endif BasicBlock* block; foreach_block(compiler, block) @@ -1367,15 +1392,13 @@ void LinearScan::identifyCandidatesExceptionDataflow() if (block->hasEHBoundaryOut()) { VarSetOps::UnionD(compiler, exceptVars, block->bbLiveOut); -#ifdef DEBUG if (block->bbJumpKind == BBJ_EHFINALLYRET) { - // live on exit from finally. + // Live on exit from finally. // We track these separately because, in addition to having EH live-out semantics, - // we want to verify that they are must-init. + // we need to mark them must-init. VarSetOps::UnionD(compiler, finallyVars, block->bbLiveOut); } -#endif } } @@ -1559,6 +1582,7 @@ void LinearScan::identifyCandidates() } VarSetOps::AssignNoCopy(compiler, exceptVars, VarSetOps::MakeEmpty(compiler)); + VarSetOps::AssignNoCopy(compiler, finallyVars, VarSetOps::MakeEmpty(compiler)); if (compiler->compHndBBtabCount > 0) { identifyCandidatesExceptionDataflow(); @@ -1719,6 +1743,12 @@ void LinearScan::identifyCandidates() newInt->isStructField = true; } + if (varDsc->lvLiveInOutOfHndlr) + { + newInt->isWriteThru = true; + setIntervalAsSpilled(newInt); + } + INTRACK_STATS(regCandidateVarCount++); // We maintain two sets of FP vars - those that meet the first threshold of weighted ref Count, @@ -2156,6 +2186,11 @@ void LinearScan::checkLastUses(BasicBlock* block) VARSET_TP liveInNotComputedLive(VarSetOps::Diff(compiler, block->bbLiveIn, computedLive)); + // We may have exception vars in the liveIn set of exception blocks that are not computed live. + if (compiler->ehBlockHasExnFlowDsc(block)) + { + VarSetOps::DiffD(compiler, liveInNotComputedLive, compiler->fgGetHandlerLiveVars(block)); + } VarSetOps::Iter liveInNotComputedLiveIter(compiler, liveInNotComputedLive); unsigned liveInNotComputedLiveIndex = 0; while (liveInNotComputedLiveIter.NextElem(&liveInNotComputedLiveIndex)) @@ -2287,13 +2322,20 @@ BasicBlock* LinearScan::findPredBlockForLiveIn(BasicBlock* block, // | // block // - for (flowList* pred = otherBlock->bbPreds; pred != nullptr; pred = pred->flNext) + if (blockInfo[otherBlock->bbNum].hasEHBoundaryIn) + { + return nullptr; + } + else { - BasicBlock* otherPred = pred->flBlock; - if (otherPred->bbNum == blockInfo[otherBlock->bbNum].predBBNum) + for (flowList* pred = otherBlock->bbPreds; pred != nullptr; pred = pred->flNext) { - predBlock = otherPred; - break; + BasicBlock* otherPred = pred->flBlock; + if (otherPred->bbNum == blockInfo[otherBlock->bbNum].predBBNum) + { + predBlock = otherPred; + break; + } } } } @@ -2498,9 +2540,7 @@ bool LinearScan::registerIsAvailable(RegRecord* physRegRecord, LsraLocation* nextRefLocationPtr, RegisterType regType) { - *nextRefLocationPtr = MaxLocation; LsraLocation nextRefLocation = MaxLocation; - regMaskTP regMask = genRegMask(physRegRecord->regNum); if (physRegRecord->isBusyUntilNextKill) { return false; @@ -2510,12 +2550,12 @@ bool LinearScan::registerIsAvailable(RegRecord* physRegRecord, if (nextPhysReference != nullptr) { nextRefLocation = nextPhysReference->nodeLocation; - // if (nextPhysReference->refType == RefTypeFixedReg) nextRefLocation--; } else if (!physRegRecord->isCalleeSave) { nextRefLocation = MaxLocation - 1; } + *nextRefLocationPtr = nextRefLocation; Interval* assignedInterval = physRegRecord->assignedInterval; @@ -2546,7 +2586,8 @@ bool LinearScan::registerIsAvailable(RegRecord* physRegRecord, // (Note that it is unlikely that we have a recent copy or move to a different register, // where this physRegRecord is still pointing at an earlier copy or move, but it is possible, // especially in stress modes.) - if ((recentReference->registerAssignment == regMask) && copyOrMoveRegInUse(recentReference, currentLoc)) + if ((recentReference->registerAssignment == genRegMask(physRegRecord->regNum)) && + copyOrMoveRegInUse(recentReference, currentLoc)) { return false; } @@ -2569,12 +2610,13 @@ bool LinearScan::registerIsAvailable(RegRecord* physRegRecord, { if (nextReference->nodeLocation < nextRefLocation) { - nextRefLocation = nextReference->nodeLocation; + *nextRefLocationPtr = nextReference->nodeLocation; } } else { - assert(recentReference->copyReg && recentReference->registerAssignment != regMask); + assert(recentReference->copyReg && + (recentReference->registerAssignment != genRegMask(physRegRecord->regNum))); } } else @@ -2582,10 +2624,6 @@ bool LinearScan::registerIsAvailable(RegRecord* physRegRecord, return false; } } - if (nextRefLocation < *nextRefLocationPtr) - { - *nextRefLocationPtr = nextRefLocation; - } #ifdef TARGET_ARM if (regType == TYP_DOUBLE) @@ -2593,11 +2631,10 @@ bool LinearScan::registerIsAvailable(RegRecord* physRegRecord, // Recurse, but check the other half this time (TYP_FLOAT) if (!registerIsAvailable(findAnotherHalfRegRec(physRegRecord), currentLoc, nextRefLocationPtr, TYP_FLOAT)) return false; - nextRefLocation = *nextRefLocationPtr; } #endif // TARGET_ARM - return (nextRefLocation >= currentLoc); + return true; } //------------------------------------------------------------------------ @@ -2794,8 +2831,11 @@ regNumber LinearScan::tryAllocateFreeReg(Interval* currentInterval, RefPosition* Interval* rangeEndInterval = relatedInterval; regMaskTP relatedPreferences = (relatedInterval == nullptr) ? RBM_NONE : relatedInterval->getCurrentPreferences(); LsraLocation rangeEndLocation = refPosition->getRangeEndLocation(); - bool preferCalleeSave = currentInterval->preferCalleeSave; - bool avoidByteRegs = false; + LsraLocation relatedLastLocation = rangeEndLocation; + + bool preferCalleeSave = currentInterval->preferCalleeSave; + + bool avoidByteRegs = false; #ifdef TARGET_X86 if ((relatedPreferences & ~RBM_BYTE_REGS) != RBM_NONE) { @@ -2863,6 +2903,11 @@ regNumber LinearScan::tryAllocateFreeReg(Interval* currentInterval, RefPosition* rangeEndRefPosition = refPosition; preferCalleeSave = currentInterval->preferCalleeSave; } + else if (currentInterval->isWriteThru && refPosition->spillAfter) + { + // This is treated as a last use of the register, as there is an upcoming EH boundary. + rangeEndRefPosition = refPosition; + } else { rangeEndRefPosition = refPosition->getRangeEndRef(); @@ -2870,11 +2915,37 @@ regNumber LinearScan::tryAllocateFreeReg(Interval* currentInterval, RefPosition* // is not currently occupying a register, and whose lifetime begins after this one, // we want to try to select a register that will cover its lifetime. if ((rangeEndInterval != nullptr) && (rangeEndInterval->assignedReg == nullptr) && + !rangeEndInterval->isWriteThru && (rangeEndInterval->getNextRefLocation() >= rangeEndRefPosition->nodeLocation)) { lastRefPosition = rangeEndInterval->lastRefPosition; } } + if ((relatedInterval != nullptr) && !relatedInterval->isWriteThru) + { + relatedLastLocation = relatedInterval->lastRefPosition->nodeLocation; + } + + regMaskTP callerCalleePrefs; + if (preferCalleeSave) + { + regMaskTP calleeSaveCandidates = calleeSaveRegs(currentInterval->registerType); + if (currentInterval->isWriteThru) + { + // We'll only prefer a callee-save register if it's already been used. + regMaskTP unusedCalleeSaves = calleeSaveCandidates & ~(compiler->codeGen->regSet.rsGetModifiedRegsMask()); + callerCalleePrefs = calleeSaveCandidates & ~unusedCalleeSaves; + preferences &= ~unusedCalleeSaves; + } + else + { + callerCalleePrefs = calleeSaveCandidates; + } + } + else + { + callerCalleePrefs = callerSaveRegs(currentInterval->registerType); + } // If this has a delayed use (due to being used in a rmw position of a // non-commutative operator), its endLocation is delayed until the "def" @@ -3057,7 +3128,7 @@ regNumber LinearScan::tryAllocateFreeReg(Interval* currentInterval, RefPosition* if ((candidateBit & relatedPreferences) != RBM_NONE) { score |= RELATED_PREFERENCE; - if (nextPhysRefLocation > relatedInterval->lastRefPosition->nodeLocation) + if (nextPhysRefLocation > relatedLastLocation) { score |= COVERS_RELATED; } @@ -3071,7 +3142,7 @@ regNumber LinearScan::tryAllocateFreeReg(Interval* currentInterval, RefPosition* score |= RELATED_PREFERENCE; } - if (preferCalleeSave == physRegRecord->isCalleeSave) + if ((candidateBit & callerCalleePrefs) != RBM_NONE) { score |= CALLER_CALLEE; } @@ -4087,7 +4158,8 @@ void LinearScan::spillInterval(Interval* interval, RefPosition* fromRefPosition if (!fromRefPosition->lastUse) { // If not allocated a register, Lcl var def/use ref positions even if reg optional - // should be marked as spillAfter. + // should be marked as spillAfter. Note that if it is a WriteThru interval, the value is always + // written to the stack, but the WriteThru indicates that the register is no longer live. if (fromRefPosition->RegOptional() && !(interval->isLocalVar && fromRefPosition->IsActualRef())) { fromRefPosition->registerAssignment = RBM_NONE; @@ -4822,11 +4894,33 @@ void LinearScan::processBlockStartLocations(BasicBlock* currentBlock) regNumber targetReg; Interval* interval = getIntervalForLocalVar(varIndex); RefPosition* nextRefPosition = interval->getNextRefPosition(); - assert(nextRefPosition != nullptr); + assert((nextRefPosition != nullptr) || (interval->isWriteThru)); + + bool leaveOnStack = false; + + // Special handling for variables live in/out of exception handlers. + if (interval->isWriteThru) + { + // There are 3 cases where we will leave writethru lclVars on the stack: + // 1) There is no predecessor. + // 2) It is conservatively or artificially live - that is, it has no next use, + // so there is no place for codegen to record that the register is no longer occupied. + // 3) This block has a predecessor with an outgoing EH edge. We won't be able to add "join" + // resolution to load the EH var into a register along that edge, so it must be on stack. + if ((predBBNum == 0) || (nextRefPosition == nullptr) || (RefTypeIsDef(nextRefPosition->refType)) || + blockInfo[currentBlock->bbNum].hasEHPred) + { + leaveOnStack = true; + } + } if (!allocationPassComplete) { targetReg = getVarReg(predVarToRegMap, varIndex); + if (leaveOnStack) + { + targetReg = REG_STK; + } #ifdef DEBUG regNumber newTargetReg = rotateBlockStartLocation(interval, targetReg, (~liveRegs | inactiveRegs)); if (newTargetReg != targetReg) @@ -4891,9 +4985,9 @@ void LinearScan::processBlockStartLocations(BasicBlock* currentBlock) { // This can happen if we are using the locations from a basic block other than the // immediately preceding one - where the variable was in a different location. - if (targetReg != REG_STK) + if ((targetReg != REG_STK) || leaveOnStack) { - // Unassign it from the register (it will get a new register below). + // Unassign it from the register (it may get a new register below). if (interval->assignedReg != nullptr && interval->assignedReg->assignedInterval == interval) { interval->isActive = false; @@ -5199,9 +5293,9 @@ void LinearScan::allocateRegisters() } } -#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE if (enregisterLocalVars) { +#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE VarSetOps::Iter largeVectorVarsIter(compiler, largeVectorVars); unsigned largeVectorVarIndex = 0; while (largeVectorVarsIter.NextElem(&largeVectorVarIndex)) @@ -5209,13 +5303,12 @@ void LinearScan::allocateRegisters() Interval* lclVarInterval = getIntervalForLocalVar(largeVectorVarIndex); lclVarInterval->isPartiallySpilled = false; } - } #endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE + } for (regNumber reg = REG_FIRST; reg < ACTUAL_REG_COUNT; reg = REG_NEXT(reg)) { getRegisterRecord(reg)->recentRefPosition = nullptr; - getRegisterRecord(reg)->isActive = false; } #ifdef DEBUG @@ -5275,7 +5368,7 @@ void LinearScan::allocateRegisters() currentReferent = currentRefPosition->referent; - if (spillAlways() && lastAllocatedRefPosition != nullptr && !lastAllocatedRefPosition->isPhysRegRef && + if (spillAlways() && lastAllocatedRefPosition != nullptr && !lastAllocatedRefPosition->IsPhysRegRef() && !lastAllocatedRefPosition->getInterval()->isInternal && (RefTypeIsDef(lastAllocatedRefPosition->refType) || lastAllocatedRefPosition->getInterval()->isLocalVar)) { @@ -5407,6 +5500,19 @@ void LinearScan::allocateRegisters() INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_FIXED_REG, nullptr, currentRefPosition->assignedReg())); continue; } + if (refType == RefTypeKill) + { + RegRecord* currentReg = currentRefPosition->getReg(); + Interval* assignedInterval = currentReg->assignedInterval; + + if (assignedInterval != nullptr) + { + unassignPhysReg(currentReg, assignedInterval->recentRefPosition); + } + currentReg->isBusyUntilNextKill = false; + INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_KEPT_ALLOCATION, nullptr, currentReg->regNum)); + continue; + } // If this is an exposed use, do nothing - this is merely a placeholder to attempt to // ensure that a register is allocated for the full lifetime. The resolution logic @@ -5420,171 +5526,170 @@ void LinearScan::allocateRegisters() regNumber assignedRegister = REG_NA; - if (currentRefPosition->isIntervalRef()) - { - currentInterval = currentRefPosition->getInterval(); - assignedRegister = currentInterval->physReg; + assert(currentRefPosition->isIntervalRef()); + currentInterval = currentRefPosition->getInterval(); + assert(currentInterval != nullptr); + assignedRegister = currentInterval->physReg; - // Identify the special cases where we decide up-front not to allocate - bool allocate = true; - bool didDump = false; + // Identify the special cases where we decide up-front not to allocate + bool allocate = true; + bool didDump = false; - if (refType == RefTypeParamDef || refType == RefTypeZeroInit) + if (refType == RefTypeParamDef || refType == RefTypeZeroInit) + { + if (nextRefPosition == nullptr) { - if (nextRefPosition == nullptr) - { - // If it has no actual references, mark it as "lastUse"; since they're not actually part - // of any flow they won't have been marked during dataflow. Otherwise, if we allocate a - // register we won't unassign it. - INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_ZERO_REF, currentInterval)); - currentRefPosition->lastUse = true; - } - if (refType == RefTypeParamDef) - { - LclVarDsc* varDsc = currentInterval->getLocalVar(compiler); - assert(varDsc != nullptr); - if (varDsc->lvRefCntWtd() <= BB_UNITY_WEIGHT) - { - // For a ParamDef with a weighted refCount less than unity, don't enregister it at entry. - // TODO-CQ: Consider doing this only for stack parameters, since otherwise we may be needlessly - // inserting a store. - allocate = false; - } - else if ((currentInterval->physReg == REG_STK) && nextRefPosition->treeNode->OperIs(GT_BITCAST)) - { - // In the case of ABI mismatches, avoid allocating a register only to have to immediately move - // it to a different register file. - allocate = false; - } - if (!allocate) - { - INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_NO_ENTRY_REG_ALLOCATED, currentInterval)); - didDump = true; - setIntervalAsSpilled(currentInterval); - } - } + // If it has no actual references, mark it as "lastUse"; since they're not actually part + // of any flow they won't have been marked during dataflow. Otherwise, if we allocate a + // register we won't unassign it. + INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_ZERO_REF, currentInterval)); + currentRefPosition->lastUse = true; + } + LclVarDsc* varDsc = currentInterval->getLocalVar(compiler); + assert(varDsc != nullptr); + assert(!blockInfo[compiler->fgFirstBB->bbNum].hasEHBoundaryIn || currentInterval->isWriteThru); + if (blockInfo[compiler->fgFirstBB->bbNum].hasEHBoundaryIn || + blockInfo[compiler->fgFirstBB->bbNum].hasEHPred) + { + allocate = false; + } + else if (refType == RefTypeParamDef && varDsc->lvRefCntWtd() <= BB_UNITY_WEIGHT) + { + allocate = false; + } + else if ((currentInterval->physReg == REG_STK) && nextRefPosition->treeNode->OperIs(GT_BITCAST)) + { + // In the case of ABI mismatches, avoid allocating a register only to have to immediately move + // it to a different register file. + allocate = false; + } + if (!allocate) + { + INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_NO_ENTRY_REG_ALLOCATED, currentInterval)); + didDump = true; + setIntervalAsSpilled(currentInterval); } + } #ifdef FEATURE_SIMD #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE - else if (currentInterval->isUpperVector) + else if (currentInterval->isUpperVector) + { + // This is a save or restore of the upper half of a large vector lclVar. + Interval* lclVarInterval = currentInterval->relatedInterval; + assert(lclVarInterval->isLocalVar); + if (refType == RefTypeUpperVectorSave) { - // This is a save or restore of the upper half of a large vector lclVar. - Interval* lclVarInterval = currentInterval->relatedInterval; - assert(lclVarInterval->isLocalVar); - if (refType == RefTypeUpperVectorSave) + if ((lclVarInterval->physReg == REG_NA) || + (lclVarInterval->isPartiallySpilled && (currentInterval->physReg == REG_STK))) { - if ((lclVarInterval->physReg == REG_NA) || - (lclVarInterval->isPartiallySpilled && (currentInterval->physReg == REG_STK))) - { - allocate = false; - } - else - { - lclVarInterval->isPartiallySpilled = true; - } + allocate = false; } - else if (refType == RefTypeUpperVectorRestore) + else { - assert(currentInterval->isUpperVector); - if (lclVarInterval->isPartiallySpilled) - { - lclVarInterval->isPartiallySpilled = false; - } - else - { - allocate = false; - } + lclVarInterval->isPartiallySpilled = true; } } - else if (refType == RefTypeUpperVectorSave) + else if (refType == RefTypeUpperVectorRestore) { - assert(!currentInterval->isLocalVar); - // Note that this case looks a lot like the case below, but in this case we need to spill - // at the previous RefPosition. - // We may want to consider allocating two callee-save registers for this case, but it happens rarely - // enough that it may not warrant the additional complexity. - if (assignedRegister != REG_NA) + assert(currentInterval->isUpperVector); + if (lclVarInterval->isPartiallySpilled) { - unassignPhysReg(getRegisterRecord(assignedRegister), currentInterval->firstRefPosition); - INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_NO_REG_ALLOCATED, currentInterval)); + lclVarInterval->isPartiallySpilled = false; + } + else + { + allocate = false; } - currentRefPosition->registerAssignment = RBM_NONE; - continue; } + } + else if (refType == RefTypeUpperVectorSave) + { + assert(!currentInterval->isLocalVar); + // Note that this case looks a lot like the case below, but in this case we need to spill + // at the previous RefPosition. + // We may want to consider allocating two callee-save registers for this case, but it happens rarely + // enough that it may not warrant the additional complexity. + if (assignedRegister != REG_NA) + { + unassignPhysReg(getRegisterRecord(assignedRegister), currentInterval->firstRefPosition); + INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_NO_REG_ALLOCATED, currentInterval)); + } + currentRefPosition->registerAssignment = RBM_NONE; + continue; + } #endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE #endif // FEATURE_SIMD - if (allocate == false) + if (allocate == false) + { + if (assignedRegister != REG_NA) { - if (assignedRegister != REG_NA) - { - unassignPhysReg(getRegisterRecord(assignedRegister), currentRefPosition); - } - else if (!didDump) - { - INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_NO_REG_ALLOCATED, currentInterval)); - didDump = true; - } - currentRefPosition->registerAssignment = RBM_NONE; - continue; + unassignPhysReg(getRegisterRecord(assignedRegister), currentRefPosition); } - - if (currentInterval->isSpecialPutArg) + else if (!didDump) { - assert(!currentInterval->isLocalVar); - Interval* srcInterval = currentInterval->relatedInterval; - assert(srcInterval != nullptr && srcInterval->isLocalVar); - if (refType == RefTypeDef) - { - assert(srcInterval->recentRefPosition->nodeLocation == currentLocation - 1); - RegRecord* physRegRecord = srcInterval->assignedReg; - - // For a putarg_reg to be special, its next use location has to be the same - // as fixed reg's next kill location. Otherwise, if source lcl var's next use - // is after the kill of fixed reg but before putarg_reg's next use, fixed reg's - // kill would lead to spill of source but not the putarg_reg if it were treated - // as special. - if (srcInterval->isActive && - genRegMask(srcInterval->physReg) == currentRefPosition->registerAssignment && - currentInterval->getNextRefLocation() == physRegRecord->getNextRefLocation()) - { - assert(physRegRecord->regNum == srcInterval->physReg); - - // Special putarg_reg acts as a pass-thru since both source lcl var - // and putarg_reg have the same register allocated. Physical reg - // record of reg continue to point to source lcl var's interval - // instead of to putarg_reg's interval. So if a spill of reg - // allocated to source lcl var happens, to reallocate to another - // tree node, before its use at call node it will lead to spill of - // lcl var instead of putarg_reg since physical reg record is pointing - // to lcl var's interval. As a result, arg reg would get trashed leading - // to bad codegen. The assumption here is that source lcl var of a - // special putarg_reg doesn't get spilled and re-allocated prior to - // its use at the call node. This is ensured by marking physical reg - // record as busy until next kill. - physRegRecord->isBusyUntilNextKill = true; - } - else - { - currentInterval->isSpecialPutArg = false; - } + INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_NO_REG_ALLOCATED, currentInterval)); + didDump = true; + } + currentRefPosition->registerAssignment = RBM_NONE; + continue; + } + + if (currentInterval->isSpecialPutArg) + { + assert(!currentInterval->isLocalVar); + Interval* srcInterval = currentInterval->relatedInterval; + assert(srcInterval != nullptr && srcInterval->isLocalVar); + if (refType == RefTypeDef) + { + assert(srcInterval->recentRefPosition->nodeLocation == currentLocation - 1); + RegRecord* physRegRecord = srcInterval->assignedReg; + + // For a putarg_reg to be special, its next use location has to be the same + // as fixed reg's next kill location. Otherwise, if source lcl var's next use + // is after the kill of fixed reg but before putarg_reg's next use, fixed reg's + // kill would lead to spill of source but not the putarg_reg if it were treated + // as special. + if (srcInterval->isActive && + genRegMask(srcInterval->physReg) == currentRefPosition->registerAssignment && + currentInterval->getNextRefLocation() == physRegRecord->getNextRefLocation()) + { + assert(physRegRecord->regNum == srcInterval->physReg); + + // Special putarg_reg acts as a pass-thru since both source lcl var + // and putarg_reg have the same register allocated. Physical reg + // record of reg continue to point to source lcl var's interval + // instead of to putarg_reg's interval. So if a spill of reg + // allocated to source lcl var happens, to reallocate to another + // tree node, before its use at call node it will lead to spill of + // lcl var instead of putarg_reg since physical reg record is pointing + // to lcl var's interval. As a result, arg reg would get trashed leading + // to bad codegen. The assumption here is that source lcl var of a + // special putarg_reg doesn't get spilled and re-allocated prior to + // its use at the call node. This is ensured by marking physical reg + // record as busy until next kill. + physRegRecord->isBusyUntilNextKill = true; } - // If this is still a SpecialPutArg, continue; - if (currentInterval->isSpecialPutArg) + else { - INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_SPECIAL_PUTARG, currentInterval, - currentRefPosition->assignedReg())); - continue; + currentInterval->isSpecialPutArg = false; } } - - if (assignedRegister == REG_NA && RefTypeIsUse(refType)) + // If this is still a SpecialPutArg, continue; + if (currentInterval->isSpecialPutArg) { - currentRefPosition->reload = true; - INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_RELOAD, currentInterval, assignedRegister)); + INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_SPECIAL_PUTARG, currentInterval, + currentRefPosition->assignedReg())); + continue; } } + if (assignedRegister == REG_NA && RefTypeIsUse(refType)) + { + currentRefPosition->reload = true; + INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_RELOAD, currentInterval, assignedRegister)); + } + regMaskTP assignedRegBit = RBM_NONE; bool isInRegister = false; if (assignedRegister != REG_NA) @@ -5612,25 +5717,7 @@ void LinearScan::allocateRegisters() currentInterval->assignedReg->assignedInterval == currentInterval); } - // If this is a physical register, we unconditionally assign it to itself! - if (currentRefPosition->isPhysRegRef) - { - RegRecord* currentReg = currentRefPosition->getReg(); - Interval* assignedInterval = currentReg->assignedInterval; - - if (assignedInterval != nullptr) - { - unassignPhysReg(currentReg, assignedInterval->recentRefPosition); - } - currentReg->isActive = true; - assignedRegister = currentReg->regNum; - assignedRegBit = genRegMask(assignedRegister); - if (refType == RefTypeKill) - { - currentReg->isBusyUntilNextKill = false; - } - } - else if (previousRefPosition != nullptr) + if (previousRefPosition != nullptr) { assert(previousRefPosition->nextRefPosition == currentRefPosition); assert(assignedRegister == REG_NA || assignedRegBit == previousRefPosition->registerAssignment || @@ -5726,14 +5813,14 @@ void LinearScan::allocateRegisters() else if ((genRegMask(assignedRegister) & currentRefPosition->registerAssignment) != 0) { currentRefPosition->registerAssignment = assignedRegBit; - if (!currentReferent->isActive) + if (!currentInterval->isActive) { // If we've got an exposed use at the top of a block, the // interval might not have been active. Otherwise if it's a use, // the interval must be active. if (refType == RefTypeDummyDef) { - currentReferent->isActive = true; + currentInterval->isActive = true; assert(getRegisterRecord(assignedRegister)->assignedInterval == currentInterval); } else @@ -5745,16 +5832,35 @@ void LinearScan::allocateRegisters() } else { - assert(currentInterval != nullptr); - // It's already in a register, but not one we need. if (!RefTypeIsDef(currentRefPosition->refType)) { regNumber copyReg = assignCopyReg(currentRefPosition); assert(copyReg != REG_NA); INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_COPY_REG, currentInterval, copyReg)); - lastAllocatedRefPosition = currentRefPosition; + lastAllocatedRefPosition = currentRefPosition; + bool unassign = false; + RefPosition* nextRefPosition = currentRefPosition->nextRefPosition; + if (currentInterval->isWriteThru) + { + if (currentRefPosition->refType == RefTypeDef) + { + currentRefPosition->writeThru = true; + } + if (!currentRefPosition->lastUse) + { + if (currentRefPosition->spillAfter) + { + unassign = true; + } + } + } if (currentRefPosition->lastUse) + { + assert(currentRefPosition->isIntervalRef()); + unassign = true; + } + if (unassign) { if (currentRefPosition->delayRegFree) { @@ -5801,6 +5907,14 @@ void LinearScan::allocateRegisters() { allocateReg = false; } + else if (currentInterval->isWriteThru) + { + // Don't allocate if the next reference is in a cold block. + if (nextRefPosition == nullptr || (nextRefPosition->nodeLocation >= firstColdLoc)) + { + allocateReg = false; + } + } #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE && defined(TARGET_XARCH) // We can also avoid allocating a register (in fact we don't want to) if we have @@ -5921,37 +6035,58 @@ void LinearScan::allocateRegisters() // (it will be freed when it is used). if (!currentInterval->IsUpperVector()) { + bool unassign = false; + if (currentInterval->isWriteThru) + { + if (currentRefPosition->refType == RefTypeDef) + { + currentRefPosition->writeThru = true; + } + if (!currentRefPosition->lastUse) + { + if (currentRefPosition->spillAfter) + { + unassign = true; + } + } + } if (currentRefPosition->lastUse || currentRefPosition->nextRefPosition == nullptr) { assert(currentRefPosition->isIntervalRef()); if (refType != RefTypeExpUse && currentRefPosition->nextRefPosition == nullptr) { - if (currentRefPosition->delayRegFree) - { - delayRegsToFree |= assignedRegBit; - - INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_LAST_USE_DELAYED)); - } - else - { - regsToFree |= assignedRegBit; - - INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_LAST_USE)); - } + unassign = true; } else { currentInterval->isActive = false; } - // Update the register preferences for the relatedInterval, if this is 'preferencedToDef'. - // Don't propagate to subsequent relatedIntervals; that will happen as they are allocated, and we - // don't know yet whether the register will be retained. - if (currentInterval->relatedInterval != nullptr) + } + if (unassign) + { + if (currentRefPosition->delayRegFree) { - currentInterval->relatedInterval->updateRegisterPreferences(assignedRegBit); + delayRegsToFree |= assignedRegBit; + + INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_LAST_USE_DELAYED)); + } + else + { + regsToFree |= assignedRegBit; + + INDEBUG(dumpLsraAllocationEvent(LSRA_EVENT_LAST_USE)); } } + + // Update the register preferences for the relatedInterval, if this is 'preferencedToDef'. + // Don't propagate to subsequent relatedIntervals; that will happen as they are allocated, and we + // don't know yet whether the register will be retained. + if ((currentRefPosition->lastUse || nextRefPosition == nullptr) && + (currentInterval->relatedInterval != nullptr)) + { + currentInterval->relatedInterval->updateRegisterPreferences(assignedRegBit); + } } lastAllocatedRefPosition = currentRefPosition; @@ -5975,8 +6110,16 @@ void LinearScan::allocateRegisters() } if (interval->isSpilled) { + unsigned prevBBNum = 0; for (RefPosition* ref = interval->firstRefPosition; ref != nullptr; ref = ref->nextRefPosition) { + // For the resolution phase, we need to ensure that any block with exposed uses has the + // incoming reg for 'this' as REG_STK. + if (RefTypeIsUse(ref->refType) && (ref->bbNum != prevBBNum)) + { + VarToRegMap inVarToRegMap = getInVarToRegMap(ref->bbNum); + setVarReg(inVarToRegMap, thisVarDsc->lvVarIndex, REG_STK); + } if (ref->RegOptional()) { ref->registerAssignment = RBM_NONE; @@ -6003,6 +6146,7 @@ void LinearScan::allocateRegisters() default: break; } + prevBBNum = ref->bbNum; } } } @@ -6203,7 +6347,8 @@ void LinearScan::resolveLocalRef(BasicBlock* block, GenTree* treeNode, RefPositi } if ((currentRefPosition->registerAssignment != RBM_NONE) && (interval->physReg == REG_NA) && - currentRefPosition->RegOptional() && currentRefPosition->lastUse) + currentRefPosition->RegOptional() && currentRefPosition->lastUse && + (currentRefPosition->refType == RefTypeUse)) { // This can happen if the incoming location for the block was changed from a register to the stack // during resolution. In this case we're better off making it contained. @@ -6225,8 +6370,9 @@ void LinearScan::resolveLocalRef(BasicBlock* block, GenTree* treeNode, RefPositi } interval->assignedReg = nullptr; interval->physReg = REG_NA; - if (treeNode != nullptr) + if (currentRefPosition->refType == RefTypeUse) { + assert(treeNode != nullptr); treeNode->SetContained(); } @@ -6266,6 +6412,7 @@ void LinearScan::resolveLocalRef(BasicBlock* block, GenTree* treeNode, RefPositi bool reload = currentRefPosition->reload; bool spillAfter = currentRefPosition->spillAfter; + bool writeThru = currentRefPosition->writeThru; // In the reload case we either: // - Set the register to REG_STK if it will be referenced only from the home location, or @@ -6391,6 +6538,20 @@ void LinearScan::resolveLocalRef(BasicBlock* block, GenTree* treeNode, RefPositi interval->physReg = REG_NA; varDsc->SetRegNum(REG_STK); } + if (writeThru && (treeNode != nullptr)) + { + // This is a def of a write-thru EH var (only defs are marked 'writeThru'). + treeNode->gtFlags |= GTF_SPILL; + // We also mark writeThru defs that are not last-use with GTF_SPILLED to indicate that they are conceptually + // spilled and immediately "reloaded", i.e. the register remains live. + // Note that we can have a "last use" write that has no exposed uses in the standard + // (non-eh) control flow, but that may be used on an exception path. Hence the need + // to retain these defs, and to ensure that they write. + if (!currentRefPosition->lastUse) + { + treeNode->gtFlags |= GTF_SPILLED; + } + } } // Update the physRegRecord for the register, so that we know what vars are in @@ -7172,7 +7333,8 @@ void LinearScan::resolveRegisters() continue; } - if (currentRefPosition->isIntervalRef() && currentRefPosition->getInterval()->isInternal) + assert(currentRefPosition->isIntervalRef()); + if (currentRefPosition->getInterval()->isInternal) { treeNode->gtRsvdRegs |= currentRefPosition->registerAssignment; } @@ -7192,7 +7354,7 @@ void LinearScan::resolveRegisters() else if (currentRefPosition->spillAfter || (currentRefPosition->nextRefPosition != nullptr && currentRefPosition->nextRefPosition->moveReg)) { - if (treeNode != nullptr && currentRefPosition->isIntervalRef()) + if (treeNode != nullptr) { if (currentRefPosition->spillAfter) { @@ -7577,11 +7739,12 @@ void LinearScan::insertMove( else { // Put the copy at the bottom + GenTree* lastNode = blockRange.LastNode(); if (block->bbJumpKind == BBJ_COND || block->bbJumpKind == BBJ_SWITCH) { noway_assert(!blockRange.IsEmpty()); - GenTree* branch = blockRange.LastNode(); + GenTree* branch = lastNode; assert(branch->OperIsConditionalJump() || branch->OperGet() == GT_SWITCH_TABLE || branch->OperGet() == GT_SWITCH); @@ -7589,7 +7752,9 @@ void LinearScan::insertMove( } else { - assert(block->bbJumpKind == BBJ_NONE || block->bbJumpKind == BBJ_ALWAYS); + // These block kinds don't have a branch at the end. + assert((lastNode == nullptr) || (!lastNode->OperIsConditionalJump() && + !lastNode->OperIs(GT_SWITCH_TABLE, GT_SWITCH, GT_RETURN, GT_RETFILT))); blockRange.InsertAtEnd(std::move(treeRange)); } } @@ -7838,14 +8003,24 @@ void LinearScan::addResolution( BasicBlock* block, GenTree* insertionPoint, Interval* interval, regNumber toReg, regNumber fromReg) { #ifdef DEBUG - const char* insertionPointString = "top"; -#endif // DEBUG + const char* insertionPointString; if (insertionPoint == nullptr) { -#ifdef DEBUG + // We can't add resolution to a register at the bottom of a block that has an EHBoundaryOut, + // except in the case of the "EH Dummy" resolution from the stack. + assert((block->bbNum > bbNumMaxBeforeResolution) || (fromReg == REG_STK) || + !blockInfo[block->bbNum].hasEHBoundaryOut); insertionPointString = "bottom"; -#endif // DEBUG } + else + { + // We can't add resolution at the top of a block that has an EHBoundaryIn, + // except in the case of the "EH Dummy" resolution to the stack. + assert((block->bbNum > bbNumMaxBeforeResolution) || (toReg == REG_STK) || + !blockInfo[block->bbNum].hasEHBoundaryIn); + insertionPointString = "top"; + } +#endif // DEBUG JITDUMP(" " FMT_BB " %s: move V%02u from ", block->bbNum, insertionPointString, interval->varNum); JITDUMP("%s to %s", getRegName(fromReg), getRegName(toReg)); @@ -8120,7 +8295,30 @@ void LinearScan::handleOutgoingCriticalEdges(BasicBlock* block) } if (!VarSetOps::IsEmpty(compiler, edgeResolutionSet)) { - resolveEdge(block, succBlock, ResolveCritical, edgeResolutionSet); + // For EH vars, we can always safely load them from the stack into the target for this block, + // so if we have only EH vars, we'll do that instead of splitting the edge. + if ((compiler->compHndBBtabCount > 0) && VarSetOps::IsSubset(compiler, edgeResolutionSet, exceptVars)) + { + GenTree* insertionPoint = LIR::AsRange(succBlock).FirstNonPhiNode(); + VarSetOps::Iter edgeSetIter(compiler, edgeResolutionSet); + unsigned edgeVarIndex = 0; + while (edgeSetIter.NextElem(&edgeVarIndex)) + { + regNumber toReg = getVarReg(succInVarToRegMap, edgeVarIndex); + setVarReg(succInVarToRegMap, edgeVarIndex, REG_STK); + if (toReg != REG_STK) + { + Interval* interval = getIntervalForLocalVar(edgeVarIndex); + assert(interval->isWriteThru); + addResolution(succBlock, insertionPoint, interval, toReg, REG_STK); + JITDUMP(" (EHvar)\n"); + } + } + } + else + { + resolveEdge(block, succBlock, ResolveCritical, edgeResolutionSet); + } } } } @@ -8322,14 +8520,19 @@ void LinearScan::resolveEdges() regNumber toReg = getVarReg(toVarToRegMap, varIndex); if (fromReg != toReg) { - if (!foundMismatch) + Interval* interval = getIntervalForLocalVar(varIndex); + // The fromReg and toReg may not match for a write-thru interval where the toReg is + // REG_STK, since the stack value is always valid for that case (so no move is needed). + if (!interval->isWriteThru || (toReg != REG_STK)) { - foundMismatch = true; - printf("Found mismatched var locations after resolution!\n"); + if (!foundMismatch) + { + foundMismatch = true; + printf("Found mismatched var locations after resolution!\n"); + } + printf(" V%02u: " FMT_BB " to " FMT_BB ": %s to %s\n", interval->varNum, predBlock->bbNum, + block->bbNum, getRegName(fromReg), getRegName(toReg)); } - - printf(" V%02u: " FMT_BB " to " FMT_BB ": %s to %s\n", compiler->lvaTrackedIndexToLclNum(varIndex), - predBlock->bbNum, block->bbNum, getRegName(fromReg), getRegName(toReg)); } } } @@ -8473,6 +8676,29 @@ void LinearScan::resolveEdge(BasicBlock* fromBlock, insertionPoint = LIR::AsRange(block).FirstNonPhiNode(); } + // If this is an edge between EH regions, we may have "extra" live-out EH vars. + // If we are adding resolution at the end of the block, we need to create "virtual" moves + // for these so that their registers are freed and can be reused. + if ((resolveType == ResolveJoin) && (compiler->compHndBBtabCount > 0)) + { + VARSET_TP extraLiveSet(VarSetOps::Diff(compiler, block->bbLiveOut, toBlock->bbLiveIn)); + VarSetOps::IntersectionD(compiler, extraLiveSet, registerCandidateVars); + VarSetOps::Iter iter(compiler, extraLiveSet); + unsigned extraVarIndex = 0; + while (iter.NextElem(&extraVarIndex)) + { + Interval* interval = getIntervalForLocalVar(extraVarIndex); + assert(interval->isWriteThru); + regNumber fromReg = getVarReg(fromVarToRegMap, extraVarIndex); + if (fromReg != REG_STK) + { + addResolution(block, insertionPoint, interval, REG_STK, fromReg); + JITDUMP(" (EH DUMMY)\n"); + setVarReg(fromVarToRegMap, extraVarIndex, REG_STK); + } + } + } + // First: // - Perform all moves from reg to stack (no ordering needed on these) // - For reg to reg moves, record the current location, associating their @@ -8486,13 +8712,24 @@ void LinearScan::resolveEdge(BasicBlock* fromBlock, unsigned varIndex = 0; while (iter.NextElem(&varIndex)) { - regNumber fromReg = getVarReg(fromVarToRegMap, varIndex); - regNumber toReg = getVarReg(toVarToRegMap, varIndex); + Interval* interval = getIntervalForLocalVar(varIndex); + regNumber fromReg = getVarReg(fromVarToRegMap, varIndex); + regNumber toReg = getVarReg(toVarToRegMap, varIndex); if (fromReg == toReg) { continue; } - + if (interval->isWriteThru && (toReg == REG_STK)) + { + // We don't actually move a writeThru var back to the stack, as its stack value is always valid. + // However, if this is a Join edge (i.e. the move is happening at the bottom of the block), + // and it is a "normal" flow edge, we will go ahead and generate a mov instruction, which will be + // a NOP but will cause the variable to be removed from being live in the register. + if ((resolveType == ResolveSplit) || block->hasEHBoundaryOut()) + { + continue; + } + } // For Critical edges, the location will not change on either side of the edge, // since we'll add a new block to do the move. if (resolveType == ResolveSplit) @@ -8506,8 +8743,6 @@ void LinearScan::resolveEdge(BasicBlock* fromBlock, assert(fromReg < UCHAR_MAX && toReg < UCHAR_MAX); - Interval* interval = getIntervalForLocalVar(varIndex); - if (fromReg == REG_STK) { stackToRegIntervals[toReg] = interval; @@ -8517,7 +8752,8 @@ void LinearScan::resolveEdge(BasicBlock* fromBlock, { // Do the reg to stack moves now addResolution(block, insertionPoint, interval, REG_STK, fromReg); - JITDUMP(" (%s)\n", resolveTypeName[resolveType]); + JITDUMP(" (%s)\n", + (interval->isWriteThru && (toReg == REG_STK)) ? "EH DUMMY" : resolveTypeName[resolveType]); } else { @@ -8965,7 +9201,7 @@ void RefPosition::dump() printf(" %s ", getRefTypeName(refType)); - if (this->isPhysRegRef) + if (this->IsPhysRegRef()) { this->getReg()->tinyDump(); } @@ -8997,6 +9233,10 @@ void RefPosition::dump() { printf(" spillAfter"); } + if (this->writeThru) + { + printf(" writeThru"); + } if (this->moveReg) { printf(" move"); @@ -9084,6 +9324,10 @@ void Interval::dump() { printf(" (constant)"); } + if (isWriteThru) + { + printf(" (writeThru)"); + } printf(" RefPositions {"); for (RefPosition* refPosition = this->firstRefPosition; refPosition != nullptr; @@ -9559,7 +9803,7 @@ void LinearScan::TupleStyleDump(LsraTupleDumpMode mode) switch (currentRefPosition->refType) { case RefTypeUse: - if (currentRefPosition->isPhysRegRef) + if (currentRefPosition->IsPhysRegRef()) { printf("\n Use:R%d(#%d)", currentRefPosition->getReg()->regNum, currentRefPosition->rpNum); @@ -10187,7 +10431,7 @@ void LinearScan::dumpRefPositionShort(RefPosition* refPosition, BasicBlock* curr } printf(" %s%c%c ", getRefTypeShortName(refPosition->refType), lastUseChar, delayChar); } - else if (refPosition->isPhysRegRef) + else if (refPosition->IsPhysRegRef()) { RegRecord* regRecord = refPosition->getReg(); printf(regNameFormat, getRegName(regRecord->regNum)); @@ -10315,7 +10559,7 @@ void LinearScan::verifyFinalAllocation() } else { - if (currentRefPosition->isPhysRegRef) + if (currentRefPosition->IsPhysRegRef()) { regRecord = currentRefPosition->getReg(); regRecord->recentRefPosition = currentRefPosition; @@ -10398,7 +10642,11 @@ void LinearScan::verifyFinalAllocation() } regNumber regNum = getVarReg(outVarToRegMap, varIndex); interval = getIntervalForLocalVar(varIndex); - assert(interval->physReg == regNum || (interval->physReg == REG_NA && regNum == REG_STK)); + if (interval->physReg != regNum) + { + assert(regNum == REG_STK); + assert((interval->physReg == REG_NA) || interval->isWriteThru); + } interval->physReg = REG_NA; interval->assignedReg = nullptr; interval->isActive = false; @@ -10555,7 +10803,7 @@ void LinearScan::verifyFinalAllocation() { dumpLsraAllocationEvent(LSRA_EVENT_KEPT_ALLOCATION, nullptr, regRecord->regNum, currentBlock); } - if (currentRefPosition->lastUse || currentRefPosition->spillAfter) + if (currentRefPosition->lastUse || (currentRefPosition->spillAfter && !currentRefPosition->writeThru)) { interval->isActive = false; } @@ -10576,7 +10824,14 @@ void LinearScan::verifyFinalAllocation() } dumpRegRecords(); dumpEmptyRefPosition(); - printf("Spill %-4s ", getRegName(spillReg)); + if (currentRefPosition->writeThru) + { + printf("WThru %-4s ", getRegName(spillReg)); + } + else + { + printf("Spill %-4s ", getRegName(spillReg)); + } } } else if (currentRefPosition->copyReg) @@ -10737,7 +10992,10 @@ void LinearScan::verifyFinalAllocation() } regNumber regNum = getVarReg(outVarToRegMap, varIndex); Interval* interval = getIntervalForLocalVar(varIndex); - assert(interval->physReg == regNum || (interval->physReg == REG_NA && regNum == REG_STK)); + // Either the register assignments match, or the outgoing assignment is on the stack + // and this is a write-thru interval. + assert(interval->physReg == regNum || (interval->physReg == REG_NA && regNum == REG_STK) || + (interval->isWriteThru && regNum == REG_STK)); interval->physReg = REG_NA; interval->assignedReg = nullptr; interval->isActive = false; diff --git a/src/coreclr/src/jit/lsra.h b/src/coreclr/src/jit/lsra.h index f3ff58ed7bd27..9819e069af647 100644 --- a/src/coreclr/src/jit/lsra.h +++ b/src/coreclr/src/jit/lsra.h @@ -71,13 +71,21 @@ inline bool registerTypesEquivalent(RegisterType a, RegisterType b) } //------------------------------------------------------------------------ -// registerTypesEquivalent: Get the set of callee-save registers of the given RegisterType +// calleeSaveRegs: Get the set of callee-save registers of the given RegisterType // inline regMaskTP calleeSaveRegs(RegisterType rt) { return varTypeIsIntegralOrI(rt) ? RBM_INT_CALLEE_SAVED : RBM_FLT_CALLEE_SAVED; } +//------------------------------------------------------------------------ +// callerSaveRegs: Get the set of caller-save registers of the given RegisterType +// +inline regMaskTP callerSaveRegs(RegisterType rt) +{ + return varTypeIsIntegralOrI(rt) ? RBM_INT_CALLEE_TRASH : RBM_FLT_CALLEE_TRASH; +} + //------------------------------------------------------------------------ // RefInfo: Captures the necessary information for a definition that is "in-flight" // during `buildIntervals` (i.e. a tree-node definition has been encountered, @@ -380,10 +388,11 @@ struct LsraBlockInfo // 0 for fgFirstBB. unsigned int predBBNum; BasicBlock::weight_t weight; - bool hasCriticalInEdge; - bool hasCriticalOutEdge; - bool hasEHBoundaryIn; - bool hasEHBoundaryOut; + bool hasCriticalInEdge : 1; + bool hasCriticalOutEdge : 1; + bool hasEHBoundaryIn : 1; + bool hasEHBoundaryOut : 1; + bool hasEHPred : 1; #if TRACK_LSRA_STATS // Per block maintained LSRA statistics. @@ -447,7 +456,6 @@ class Referenceable firstRefPosition = nullptr; recentRefPosition = nullptr; lastRefPosition = nullptr; - isActive = false; } // A linked list of RefPositions. These are only traversed in the forward @@ -458,8 +466,6 @@ class Referenceable RefPosition* recentRefPosition; RefPosition* lastRefPosition; - bool isActive; - // Get the position of the next reference which is at or greater than // the current location (relies upon recentRefPosition being udpated // during traversal). @@ -1372,6 +1378,7 @@ class LinearScan : public LinearScanInterface // A map from bbNum to the block information used during register allocation. LsraBlockInfo* blockInfo; + BasicBlock* findPredBlockForLiveIn(BasicBlock* block, BasicBlock* prevBlock DEBUGARG(bool* pPredBlockIsAllocated)); // The order in which the blocks will be allocated. @@ -1399,6 +1406,8 @@ class LinearScan : public LinearScanInterface unsigned int curBBNum; // The current location LsraLocation currentLoc; + // The first location in a cold or funclet block. + LsraLocation firstColdLoc; // The ordinal of the block we're on (i.e. this is the curBBSeqNum-th block we've allocated). unsigned int curBBSeqNum; // The number of blocks that we've sequenced. @@ -1446,6 +1455,8 @@ class LinearScan : public LinearScanInterface VARSET_TP fpCalleeSaveCandidateVars; // Set of variables exposed on EH flow edges. VARSET_TP exceptVars; + // Set of variables exposed on finally edges. These must be zero-init if they are refs or if compInitMem is true. + VARSET_TP finallyVars; #if FEATURE_PARTIAL_SIMD_CALLEE_SAVE #if defined(TARGET_AMD64) @@ -1624,7 +1635,10 @@ class Interval : public Referenceable : registerPreferences(registerPreferences) , relatedInterval(nullptr) , assignedReg(nullptr) + , varNum(0) + , physReg(REG_COUNT) , registerType(registerType) + , isActive(false) , isLocalVar(false) , isSplit(false) , isSpilled(false) @@ -1640,11 +1654,10 @@ class Interval : public Referenceable , isUpperVector(false) , isPartiallySpilled(false) #endif - , physReg(REG_COUNT) + , isWriteThru(false) #ifdef DEBUG , intervalIndex(0) #endif - , varNum(0) { } @@ -1672,11 +1685,17 @@ class Interval : public Referenceable // register it currently occupies. RegRecord* assignedReg; - // DECIDE : put this in a union or do something w/ inheritance? - // this is an interval for a physical register, not a allocatable entity + unsigned int varNum; // This is the "variable number": the index into the lvaTable array + + // The register to which it is currently assigned. + regNumber physReg; RegisterType registerType; - bool isLocalVar : 1; + + // Is this Interval currently in a register and live? + bool isActive; + + bool isLocalVar : 1; // Indicates whether this interval has been assigned to different registers bool isSplit : 1; // Indicates whether this interval is ever spilled @@ -1728,15 +1747,13 @@ class Interval : public Referenceable } #endif - // The register to which it is currently assigned. - regNumber physReg; + // True if this interval is associated with a lclVar that is written to memory at each definition. + bool isWriteThru : 1; #ifdef DEBUG unsigned int intervalIndex; #endif // DEBUG - unsigned int varNum; // This is the "variable number": the index into the lvaTable array - LclVarDsc* getLocalVar(Compiler* comp) { assert(isLocalVar); @@ -1886,8 +1903,8 @@ class RefPosition { public: // A RefPosition refers to either an Interval or a RegRecord. 'referent' points to one - // of these types. If it refers to a RegRecord, then 'isPhysRegRef' is true. If it - // refers to an Interval, then 'isPhysRegRef' is false. + // of these types. If it refers to a RegRecord, then 'isPhysRegRef()' is true. If it + // refers to an Interval, then 'isPhysRegRef()' is false. // referent can never be null. Referenceable* referent; @@ -1951,6 +1968,9 @@ class RefPosition unsigned char reload : 1; unsigned char spillAfter : 1; + unsigned char writeThru : 1; // true if this var is defined in a register and also spilled. spillAfter must NOT be + // set. + unsigned char copyReg : 1; unsigned char moveReg : 1; // true if this var is moved to a new register @@ -1995,6 +2015,7 @@ class RefPosition , lastUse(false) , reload(false) , spillAfter(false) + , writeThru(false) , copyReg(false) , moveReg(false) , isPhysRegRef(false) @@ -2068,6 +2089,11 @@ class RefPosition } } + bool IsPhysRegRef() + { + return ((refType == RefTypeFixedReg) || (refType == RefTypeKill)); + } + void setRegOptional(bool val) { regOptional = val; @@ -2102,7 +2128,7 @@ class RefPosition RefPosition* getRangeEndRef() { - if (lastUse || nextRefPosition == nullptr) + if (lastUse || nextRefPosition == nullptr || spillAfter) { return this; } @@ -2120,14 +2146,7 @@ class RefPosition bool isIntervalRef() { - return (!isPhysRegRef && (referent != nullptr)); - } - - // isTrueDef indicates that the RefPosition is a non-update def of a non-internal - // interval - bool isTrueDef() - { - return (refType == RefTypeDef && isIntervalRef() && !getInterval()->isInternal); + return (!IsPhysRegRef() && (referent != nullptr)); } // isFixedRefOfRegMask indicates that the RefPosition has a fixed assignment to the register diff --git a/src/coreclr/src/jit/lsrabuild.cpp b/src/coreclr/src/jit/lsrabuild.cpp index fb4b8ff61be89..fba09a5286c07 100644 --- a/src/coreclr/src/jit/lsrabuild.cpp +++ b/src/coreclr/src/jit/lsrabuild.cpp @@ -1181,18 +1181,26 @@ bool LinearScan::buildKillPositionsForNode(GenTree* tree, LsraLocation currentLo { interval->preferCalleeSave = true; } - regMaskTP newPreferences = allRegs(interval->registerType) & (~killMask); - if (newPreferences != RBM_NONE) + // We are more conservative about allocating callee-saves registers to write-thru vars, since + // a call only requires reloading after (not spilling before). So we record (above) the fact + // that we'd prefer a callee-save register, but we don't update the preferences at this point. + // See the "heuristics for writeThru intervals" in 'buildIntervals()'. + if (!interval->isWriteThru || !isCallKill) { - interval->updateRegisterPreferences(newPreferences); - } - else - { - // If there are no callee-saved registers, the call could kill all the registers. - // This is a valid state, so in that case assert should not trigger. The RA will spill in order to - // free a register later. - assert(compiler->opts.compDbgEnC || (calleeSaveRegs(varDsc->lvType)) == RBM_NONE); + regMaskTP newPreferences = allRegs(interval->registerType) & (~killMask); + + if (newPreferences != RBM_NONE) + { + interval->updateRegisterPreferences(newPreferences); + } + else + { + // If there are no callee-saved registers, the call could kill all the registers. + // This is a valid state, so in that case assert should not trigger. The RA will spill in order + // to free a register later. + assert(compiler->opts.compDbgEnC || (calleeSaveRegs(varDsc->lvType)) == RBM_NONE); + } } } } @@ -1791,6 +1799,31 @@ void LinearScan::insertZeroInitRefPositions() } } } + + // We must also insert zero-inits for any finallyVars if they are refs or if compInitMem is true. + if (compiler->lvaEnregEHVars) + { + VarSetOps::Iter iter(compiler, finallyVars); + unsigned varIndex = 0; + while (iter.NextElem(&varIndex)) + { + LclVarDsc* varDsc = compiler->lvaGetDescByTrackedIndex(varIndex); + if (!varDsc->lvIsParam && isCandidateVar(varDsc)) + { + JITDUMP("V%02u is a finally var:", compiler->lvaTrackedIndexToLclNum(varIndex)); + Interval* interval = getIntervalForLocalVar(varIndex); + if (compiler->info.compInitMem || varTypeIsGC(varDsc->TypeGet())) + { + JITDUMP(" creating ZeroInit\n"); + GenTree* firstNode = getNonEmptyBlock(compiler->fgFirstBB)->firstNode(); + RefPosition* pos = newRefPosition(interval, MinLocation, RefTypeZeroInit, firstNode, + allRegs(interval->registerType)); + pos->setRegOptional(true); + varDsc->lvMustInit = true; + } + } + } + } } #if defined(UNIX_AMD64_ABI) @@ -2101,49 +2134,72 @@ void LinearScan::buildIntervals() currentLoc = 1; } - // Any lclVars live-in to a block are resolution candidates. - VarSetOps::UnionD(compiler, resolutionCandidateVars, currentLiveVars); - - if (!blockInfo[block->bbNum].hasEHBoundaryIn) + // Handle special cases for live-in. + // If this block hasEHBoundaryIn, then we will mark the recentRefPosition of each EH Var preemptively as + // spillAfter, since we don't want them to remain in registers. + // Otherwise, determine if we need any DummyDefs. + // We need DummyDefs for cases where "predBlock" isn't really a predecessor. + // Note that it's possible to have uses of unitialized variables, in which case even the first + // block may require DummyDefs, which we are not currently adding - this means that these variables + // will always be considered to be in memory on entry (and reloaded when the use is encountered). + // TODO-CQ: Consider how best to tune this. Currently, if we create DummyDefs for uninitialized + // variables (which may actually be initialized along the dynamically executed paths, but not + // on all static paths), we wind up with excessive liveranges for some of these variables. + + if (blockInfo[block->bbNum].hasEHBoundaryIn) { - // Determine if we need any DummyDefs. - // We need DummyDefs for cases where "predBlock" isn't really a predecessor. - // Note that it's possible to have uses of unitialized variables, in which case even the first - // block may require DummyDefs, which we are not currently adding - this means that these variables - // will always be considered to be in memory on entry (and reloaded when the use is encountered). - // TODO-CQ: Consider how best to tune this. Currently, if we create DummyDefs for uninitialized - // variables (which may actually be initialized along the dynamically executed paths, but not - // on all static paths), we wind up with excessive liveranges for some of these variables. - - VARSET_TP newLiveIn(VarSetOps::MakeCopy(compiler, currentLiveVars)); - if (predBlock != nullptr) + VARSET_TP liveInEHVars(VarSetOps::Intersection(compiler, currentLiveVars, exceptVars)); + VarSetOps::Iter iter(compiler, liveInEHVars); + unsigned varIndex = 0; + while (iter.NextElem(&varIndex)) { - // Compute set difference: newLiveIn = currentLiveVars - predBlock->bbLiveOut - VarSetOps::DiffD(compiler, newLiveIn, predBlock->bbLiveOut); + Interval* interval = getIntervalForLocalVar(varIndex); + if (interval->recentRefPosition != nullptr) + { + JITDUMP(" Marking RP #%d of V%02u as spillAfter\n", interval->recentRefPosition->rpNum, + interval->varNum); + interval->recentRefPosition->spillAfter; + } } - bool needsDummyDefs = (!VarSetOps::IsEmpty(compiler, newLiveIn) && block != compiler->fgFirstBB); - - // Create dummy def RefPositions + } + else + { + // Any lclVars live-in on a non-EH boundary edge are resolution candidates. + VarSetOps::UnionD(compiler, resolutionCandidateVars, currentLiveVars); - if (needsDummyDefs) + if (block != compiler->fgFirstBB) { - // If we are using locations from a predecessor, we should never require DummyDefs. - assert(!predBlockIsAllocated); + VARSET_TP newLiveIn(VarSetOps::MakeCopy(compiler, currentLiveVars)); + if (predBlock != nullptr) + { + // Compute set difference: newLiveIn = currentLiveVars - predBlock->bbLiveOut + VarSetOps::DiffD(compiler, newLiveIn, predBlock->bbLiveOut); + } + // Don't create dummy defs for EH vars; we'll load them from the stack as/when needed. + VarSetOps::DiffD(compiler, newLiveIn, exceptVars); + + // Create dummy def RefPositions - JITDUMP("Creating dummy definitions\n"); - VarSetOps::Iter iter(compiler, newLiveIn); - unsigned varIndex = 0; - while (iter.NextElem(&varIndex)) + if (!VarSetOps::IsEmpty(compiler, newLiveIn)) { - // Add a dummyDef for any candidate vars that are in the "newLiveIn" set. - LclVarDsc* varDsc = compiler->lvaGetDescByTrackedIndex(varIndex); - assert(isCandidateVar(varDsc)); - Interval* interval = getIntervalForLocalVar(varIndex); - RefPosition* pos = newRefPosition(interval, currentLoc, RefTypeDummyDef, nullptr, - allRegs(interval->registerType)); - pos->setRegOptional(true); + // If we are using locations from a predecessor, we should never require DummyDefs. + assert(!predBlockIsAllocated); + + JITDUMP("Creating dummy definitions\n"); + VarSetOps::Iter iter(compiler, newLiveIn); + unsigned varIndex = 0; + while (iter.NextElem(&varIndex)) + { + // Add a dummyDef for any candidate vars that are in the "newLiveIn" set. + LclVarDsc* varDsc = compiler->lvaGetDescByTrackedIndex(varIndex); + assert(isCandidateVar(varDsc)); + Interval* interval = getIntervalForLocalVar(varIndex); + RefPosition* pos = newRefPosition(interval, currentLoc, RefTypeDummyDef, nullptr, + allRegs(interval->registerType)); + pos->setRegOptional(true); + } + JITDUMP("Finished creating dummy definitions\n\n"); } - JITDUMP("Finished creating dummy definitions\n\n"); } } } @@ -2157,6 +2213,23 @@ void LinearScan::buildIntervals() currentLoc += 2; JITDUMP("\n"); + if (firstColdLoc == MaxLocation) + { + if (block->isRunRarely()) + { + firstColdLoc = currentLoc; + JITDUMP("firstColdLoc = %d\n", firstColdLoc); + } + } + else + { + // TODO: We'd like to assert the following but we don't currently ensure that only + // "RunRarely" blocks are contiguous. + // (The funclets will generally be last, but we don't follow layout order, so we + // don't have to preserve that in the block sequence.) + // assert(block->isRunRarely()); + } + LIR::Range& blockRange = LIR::AsRange(block); for (GenTree* node : blockRange.NonPhiNodes()) { @@ -2211,85 +2284,80 @@ void LinearScan::buildIntervals() if (enregisterLocalVars) { - // We don't need exposed uses for an EH edge, because no lclVars will be kept in - // registers across such edges. - if (!blockInfo[block->bbNum].hasEHBoundaryOut) + // Insert exposed uses for a lclVar that is live-out of 'block' but not live-in to the + // next block, or any unvisited successors. + // This will address lclVars that are live on a backedge, as well as those that are kept + // live at a GT_JMP. + // + // Blocks ending with "jmp method" are marked as BBJ_HAS_JMP, + // and jmp call is represented using GT_JMP node which is a leaf node. + // Liveness phase keeps all the arguments of the method live till the end of + // block by adding them to liveout set of the block containing GT_JMP. + // + // The target of a GT_JMP implicitly uses all the current method arguments, however + // there are no actual references to them. This can cause LSRA to assert, because + // the variables are live but it sees no references. In order to correctly model the + // liveness of these arguments, we add dummy exposed uses, in the same manner as for + // backward branches. This will happen automatically via expUseSet. + // + // Note that a block ending with GT_JMP has no successors and hence the variables + // for which dummy use ref positions are added are arguments of the method. + + VARSET_TP expUseSet(VarSetOps::MakeCopy(compiler, block->bbLiveOut)); + VarSetOps::IntersectionD(compiler, expUseSet, registerCandidateVars); + BasicBlock* nextBlock = getNextBlock(); + if (nextBlock != nullptr) { - // Insert exposed uses for a lclVar that is live-out of 'block' but not live-in to the - // next block, or any unvisited successors. - // This will address lclVars that are live on a backedge, as well as those that are kept - // live at a GT_JMP. - // - // Blocks ending with "jmp method" are marked as BBJ_HAS_JMP, - // and jmp call is represented using GT_JMP node which is a leaf node. - // Liveness phase keeps all the arguments of the method live till the end of - // block by adding them to liveout set of the block containing GT_JMP. - // - // The target of a GT_JMP implicitly uses all the current method arguments, however - // there are no actual references to them. This can cause LSRA to assert, because - // the variables are live but it sees no references. In order to correctly model the - // liveness of these arguments, we add dummy exposed uses, in the same manner as for - // backward branches. This will happen automatically via expUseSet. - // - // Note that a block ending with GT_JMP has no successors and hence the variables - // for which dummy use ref positions are added are arguments of the method. - - VARSET_TP expUseSet(VarSetOps::MakeCopy(compiler, block->bbLiveOut)); - VarSetOps::IntersectionD(compiler, expUseSet, registerCandidateVars); - BasicBlock* nextBlock = getNextBlock(); - if (nextBlock != nullptr) - { - VarSetOps::DiffD(compiler, expUseSet, nextBlock->bbLiveIn); - } - for (BasicBlock* succ : block->GetAllSuccs(compiler)) + VarSetOps::DiffD(compiler, expUseSet, nextBlock->bbLiveIn); + } + for (BasicBlock* succ : block->GetAllSuccs(compiler)) + { + if (VarSetOps::IsEmpty(compiler, expUseSet)) { - if (VarSetOps::IsEmpty(compiler, expUseSet)) - { - break; - } - - if (isBlockVisited(succ)) - { - continue; - } - VarSetOps::DiffD(compiler, expUseSet, succ->bbLiveIn); + break; } - if (!VarSetOps::IsEmpty(compiler, expUseSet)) + if (isBlockVisited(succ)) { - JITDUMP("Exposed uses:"); - VarSetOps::Iter iter(compiler, expUseSet); - unsigned varIndex = 0; - while (iter.NextElem(&varIndex)) - { - LclVarDsc* varDsc = compiler->lvaGetDescByTrackedIndex(varIndex); - assert(isCandidateVar(varDsc)); - Interval* interval = getIntervalForLocalVar(varIndex); - regMaskTP regMask = allRegs(interval->registerType); - RefPosition* pos = newRefPosition(interval, currentLoc, RefTypeExpUse, nullptr, regMask); - pos->setRegOptional(true); - JITDUMP(" V%02u", compiler->lvaTrackedIndexToLclNum(varIndex)); - } - JITDUMP("\n"); + continue; } + VarSetOps::DiffD(compiler, expUseSet, succ->bbLiveIn); } - // Clear the "last use" flag on any vars that are live-out from this block. + if (!VarSetOps::IsEmpty(compiler, expUseSet)) { - VARSET_TP bbLiveDefs(VarSetOps::Intersection(compiler, registerCandidateVars, block->bbLiveOut)); - VarSetOps::Iter iter(compiler, bbLiveDefs); + JITDUMP("Exposed uses:"); + VarSetOps::Iter iter(compiler, expUseSet); unsigned varIndex = 0; while (iter.NextElem(&varIndex)) { - LclVarDsc* const varDsc = compiler->lvaGetDescByTrackedIndex(varIndex); + unsigned varNum = compiler->lvaTrackedToVarNum[varIndex]; + LclVarDsc* varDsc = compiler->lvaTable + varNum; assert(isCandidateVar(varDsc)); - RefPosition* const lastRP = getIntervalForLocalVar(varIndex)->lastRefPosition; - // We should be able to assert that lastRP is non-null if it is live-out, but sometimes liveness - // lies. - if ((lastRP != nullptr) && (lastRP->bbNum == block->bbNum)) - { - lastRP->lastUse = false; - } + Interval* interval = getIntervalForLocalVar(varIndex); + RefPosition* pos = + newRefPosition(interval, currentLoc, RefTypeExpUse, nullptr, allRegs(interval->registerType)); + pos->setRegOptional(true); + JITDUMP(" V%02u", varNum); + } + JITDUMP("\n"); + } + + // Clear the "last use" flag on any vars that are live-out from this block. + VARSET_TP bbLiveDefs(VarSetOps::Intersection(compiler, registerCandidateVars, block->bbLiveOut)); + VarSetOps::Iter iter(compiler, bbLiveDefs); + unsigned varIndex = 0; + while (iter.NextElem(&varIndex)) + { + unsigned varNum = compiler->lvaTrackedToVarNum[varIndex]; + LclVarDsc* const varDsc = &compiler->lvaTable[varNum]; + assert(isCandidateVar(varDsc)); + RefPosition* const lastRP = getIntervalForLocalVar(varIndex)->lastRefPosition; + // We should be able to assert that lastRP is non-null if it is live-out, but sometimes liveness + // lies. + if ((lastRP != nullptr) && (lastRP->bbNum == block->bbNum)) + { + lastRP->lastUse = false; } } @@ -2327,6 +2395,62 @@ void LinearScan::buildIntervals() pos->setRegOptional(true); } } + // Adjust heuristics for writeThru intervals. + if (compiler->compHndBBtabCount > 0) + { + VarSetOps::Iter iter(compiler, exceptVars); + unsigned varIndex = 0; + while (iter.NextElem(&varIndex)) + { + unsigned varNum = compiler->lvaTrackedToVarNum[varIndex]; + LclVarDsc* varDsc = compiler->lvaTable + varNum; + Interval* interval = getIntervalForLocalVar(varIndex); + assert(interval->isWriteThru); + BasicBlock::weight_t weight = varDsc->lvRefCntWtd(); + + // We'd like to only allocate registers for EH vars that have enough uses + // to compensate for the additional registers being live (and for the possibility + // that we may have to insert an additional copy). + // However, we don't currently have that information available. Instead, we'll + // aggressively assume that these vars are defined once, at their first RefPosition. + // + RefPosition* firstRefPosition = interval->firstRefPosition; + + // Incoming reg args are given an initial weight of 2 * BB_UNITY_WEIGHT + // (see lvaComputeRefCounts(); this may be reviewed/changed in future). + // + BasicBlock::weight_t initialWeight = (firstRefPosition->refType == RefTypeParamDef) + ? (2 * BB_UNITY_WEIGHT) + : blockInfo[firstRefPosition->bbNum].weight; + weight -= initialWeight; + + // If the remaining weight is less than the initial weight, we'd like to allocate it only + // opportunistically, but we don't currently have a mechanism to do so. + // For now, we'll just avoid using callee-save registers if the weight is too low. + if (interval->preferCalleeSave) + { + // The benefit of a callee-save register isn't as high as it would be for a normal arg. + // We'll have at least the cost of saving & restoring the callee-save register, + // so we won't break even until we have at least 4 * BB_UNITY_WEIGHT. + // Given that we also don't have a good way to tell whether the variable is live + // across a call in the non-EH code, we'll be extra conservative about this. + // Note that for writeThru intervals we don't update the preferences to be only callee-save. + unsigned calleeSaveCount = + (varTypeIsFloating(interval->registerType)) ? CNT_CALLEE_SAVED_FLOAT : CNT_CALLEE_ENREG; + if ((weight <= (BB_UNITY_WEIGHT * 7)) || varDsc->lvVarIndex >= calleeSaveCount) + { + // If this is relatively low weight, don't prefer callee-save at all. + interval->preferCalleeSave = false; + } + else + { + // In other cases, we'll add in the callee-save regs to the preferences, but not clear + // the non-callee-save regs . We also handle this case specially in tryAllocateFreeReg(). + interval->registerPreferences |= calleeSaveRegs(interval->registerType); + } + } + } + } #ifdef DEBUG if (getLsraExtendLifeTimes()) @@ -3023,7 +3147,20 @@ int LinearScan::BuildStoreLoc(GenTreeLclVarCommon* storeLoc) srcInterval->assignRelatedInterval(varDefInterval); } } - newRefPosition(varDefInterval, currentLoc + 1, RefTypeDef, storeLoc, allRegs(storeLoc->TypeGet())); + RefPosition* def = + newRefPosition(varDefInterval, currentLoc + 1, RefTypeDef, storeLoc, allRegs(storeLoc->TypeGet())); + if (varDefInterval->isWriteThru) + { + // We always make write-thru defs reg-optional, as we can store them if they don't + // get a register. + def->regOptional = true; + } +#if FEATURE_PARTIAL_SIMD_CALLEE_SAVE + if (varTypeNeedsPartialCalleeSave(varDefInterval->registerType)) + { + varDefInterval->isPartiallySpilled = false; + } +#endif // FEATURE_PARTIAL_SIMD_CALLEE_SAVE } return srcCount; diff --git a/src/coreclr/src/jit/treelifeupdater.cpp b/src/coreclr/src/jit/treelifeupdater.cpp index 3396948705aa9..f373e0c89a18e 100644 --- a/src/coreclr/src/jit/treelifeupdater.cpp +++ b/src/coreclr/src/jit/treelifeupdater.cpp @@ -96,11 +96,13 @@ void TreeLifeUpdater::UpdateLifeVar(GenTree* tree) { compiler->codeGen->genUpdateVarReg(varDsc, tree); } - if (varDsc->lvIsInReg() && tree->GetRegNum() != REG_NA) + bool isInReg = varDsc->lvIsInReg() && tree->GetRegNum() != REG_NA; + bool isInMemory = !isInReg || varDsc->lvLiveInOutOfHndlr; + if (isInReg) { compiler->codeGen->genUpdateRegLife(varDsc, isBorn, isDying DEBUGARG(tree)); } - else + if (isInMemory) { VarSetOps::AddElemD(compiler, stackVarDeltaSet, varDsc->lvVarIndex); } @@ -131,6 +133,8 @@ void TreeLifeUpdater::UpdateLifeVar(GenTree* tree) if (fldVarDsc->lvTracked) { unsigned fldVarIndex = fldVarDsc->lvVarIndex; + bool isInReg = fldVarDsc->lvIsInReg(); + bool isInMemory = !isInReg || fldVarDsc->lvLiveInOutOfHndlr; noway_assert(fldVarIndex < compiler->lvaTrackedCount); if (!hasDeadTrackedFieldVars) { @@ -139,7 +143,7 @@ void TreeLifeUpdater::UpdateLifeVar(GenTree* tree) { // We repeat this call here and below to avoid the VarSetOps::IsMember // test in this, the common case, where we have no deadTrackedFieldVars. - if (fldVarDsc->lvIsInReg()) + if (isInReg) { if (isBorn) { @@ -147,7 +151,7 @@ void TreeLifeUpdater::UpdateLifeVar(GenTree* tree) } compiler->codeGen->genUpdateRegLife(fldVarDsc, isBorn, isDying DEBUGARG(tree)); } - else + if (isInMemory) { VarSetOps::AddElemD(compiler, stackVarDeltaSet, fldVarIndex); } @@ -155,7 +159,7 @@ void TreeLifeUpdater::UpdateLifeVar(GenTree* tree) } else if (ForCodeGen && VarSetOps::IsMember(compiler, varDeltaSet, fldVarIndex)) { - if (compiler->lvaTable[i].lvIsInReg()) + if (isInReg) { if (isBorn) { @@ -163,7 +167,7 @@ void TreeLifeUpdater::UpdateLifeVar(GenTree* tree) } compiler->codeGen->genUpdateRegLife(fldVarDsc, isBorn, isDying DEBUGARG(tree)); } - else + if (isInMemory) { VarSetOps::AddElemD(compiler, stackVarDeltaSet, fldVarIndex); }