diff --git a/src/coreclr/jit/compiler.h b/src/coreclr/jit/compiler.h index 9c0ef938a9d55..ded8f6ddd3a66 100644 --- a/src/coreclr/jit/compiler.h +++ b/src/coreclr/jit/compiler.h @@ -7380,17 +7380,17 @@ class Compiler Statement* stmt; const unsigned loopNum; const bool cloneForArrayBounds; - const bool cloneForTypeTests; + const bool cloneForGDVTests; LoopCloneVisitorInfo(LoopCloneContext* context, unsigned loopNum, Statement* stmt, bool cloneForArrayBounds, - bool cloneForTypeTests) + bool cloneForGDVTests) : context(context) , stmt(nullptr) , loopNum(loopNum) , cloneForArrayBounds(cloneForArrayBounds) - , cloneForTypeTests(cloneForTypeTests) + , cloneForGDVTests(cloneForGDVTests) { } }; @@ -7404,6 +7404,8 @@ class Compiler fgWalkResult optCanOptimizeByLoopCloning(GenTree* tree, LoopCloneVisitorInfo* info); bool optObtainLoopCloningOpts(LoopCloneContext* context); bool optIsLoopClonable(unsigned loopInd); + bool optCheckLoopCloningGDVTestProfitable(GenTreeOp* guard, LoopCloneVisitorInfo* info); + bool optIsHandleOrIndirOfHandle(GenTree* tree, GenTreeFlags handleType); bool optLoopCloningEnabled(); diff --git a/src/coreclr/jit/indirectcalltransformer.cpp b/src/coreclr/jit/indirectcalltransformer.cpp index 6af4872473855..9879cd5e094be 100644 --- a/src/coreclr/jit/indirectcalltransformer.cpp +++ b/src/coreclr/jit/indirectcalltransformer.cpp @@ -450,11 +450,9 @@ class IndirectCallTransformer class GuardedDevirtualizationTransformer final : public Transformer { - unsigned m_targetLclNum; - public: GuardedDevirtualizationTransformer(Compiler* compiler, BasicBlock* block, Statement* stmt) - : Transformer(compiler, block, stmt), m_targetLclNum(BAD_VAR_NUM), returnTemp(BAD_VAR_NUM) + : Transformer(compiler, block, stmt), returnTemp(BAD_VAR_NUM) { } @@ -607,25 +605,10 @@ class IndirectCallTransformer // which case the check will be moved into the success case of // a previous GDV and thus may not execute when we hit the cold // path. - // TODO-GDV: Consider duplicating the store at the end of the - // cold case for the previous GDV. Then we can reuse the target - // if the second check of a chained GDV fails. - bool reuseTarget = (origCall->gtCallMoreFlags & GTF_CALL_M_GUARDED_DEVIRT_CHAIN) == 0; if (origCall->IsVirtualVtable()) { GenTree* tarTree = compiler->fgExpandVirtualVtableCallTarget(origCall); - if (reuseTarget) - { - m_targetLclNum = compiler->lvaGrabTemp(false DEBUGARG("guarded devirt call target temp")); - - GenTree* asgTree = compiler->gtNewTempAssign(m_targetLclNum, tarTree); - Statement* asgStmt = compiler->fgNewStmtFromTree(asgTree, stmt->GetDebugInfo()); - compiler->fgInsertStmtAtEnd(checkBlock, asgStmt); - - tarTree = compiler->gtNewLclvNode(m_targetLclNum, TYP_I_IMPL); - } - CORINFO_METHOD_HANDLE methHnd = guardedInfo->guardedMethodHandle; CORINFO_CONST_LOOKUP lookup; compiler->info.compCompHnd->getFunctionEntryPoint(methHnd, &lookup); @@ -635,35 +618,12 @@ class IndirectCallTransformer } else { - // Reusing the call target for delegates is more - // complicated. Essentially we need to do the - // transformation done in LowerDelegateInvoke by converting - // the call to CT_INDIRECT and reusing the target address. - // We will do that transformation in CreateElse, but here - // we need to stash the target. - CLANG_FORMAT_COMMENT_ANCHOR; -#ifdef TARGET_ARM - // Not impossible to support, but would additionally - // require us to load the wrapper delegate cell when - // expanding. - reuseTarget &= (origCall->gtCallMoreFlags & GTF_CALL_M_WRAPPER_DELEGATE_INV) == 0; -#endif - GenTree* offset = compiler->gtNewIconNode((ssize_t)compiler->eeGetEEInfo()->offsetOfDelegateFirstTarget, TYP_I_IMPL); GenTree* tarTree = compiler->gtNewOperNode(GT_ADD, TYP_BYREF, thisTree, offset); tarTree = compiler->gtNewIndir(TYP_I_IMPL, tarTree); - - if (reuseTarget) - { - m_targetLclNum = compiler->lvaGrabTemp(false DEBUGARG("guarded devirt call target temp")); - - GenTree* asgTree = compiler->gtNewTempAssign(m_targetLclNum, tarTree); - Statement* asgStmt = compiler->fgNewStmtFromTree(asgTree, stmt->GetDebugInfo()); - compiler->fgInsertStmtAtEnd(checkBlock, asgStmt); - tarTree = compiler->gtNewLclvNode(m_targetLclNum, TYP_I_IMPL); - } + tarTree->gtFlags |= GTF_IND_INVARIANT; CORINFO_METHOD_HANDLE methHnd = guardedInfo->guardedMethodHandle; CORINFO_CONST_LOOKUP lookup; @@ -970,38 +930,6 @@ class IndirectCallTransformer newStmt->SetRootNode(assign); } - if (m_targetLclNum != BAD_VAR_NUM) - { - if (call->IsVirtualVtable()) - { - // We already loaded the target once for the check, so reuse it from the temp. - call->gtControlExpr = compiler->gtNewLclvNode(m_targetLclNum, TYP_I_IMPL); - call->SetExpandedEarly(); - } - else if (call->IsDelegateInvoke()) - { - // Target was saved into a temp during check. We expand the - // delegate call to a CT_INDIRECT call that uses the target - // directly, somewhat similarly to LowerDelegateInvoke. - call->gtCallType = CT_INDIRECT; - call->gtCallAddr = compiler->gtNewLclvNode(m_targetLclNum, TYP_I_IMPL); - call->gtCallCookie = nullptr; - call->gtCallMoreFlags &= ~GTF_CALL_M_DELEGATE_INV; - - GenTree* thisOffset = - compiler->gtNewIconNode((ssize_t)compiler->eeGetEEInfo()->offsetOfDelegateInstance, TYP_I_IMPL); - CallArg* thisArg = call->gtArgs.GetThisArg(); - GenTree* delegateObj = thisArg->GetNode(); - - assert(delegateObj->OperIsLocal()); - GenTree* newThis = - compiler->gtNewOperNode(GT_ADD, TYP_BYREF, compiler->gtCloneExpr(delegateObj), thisOffset); - newThis = compiler->gtNewIndir(TYP_REF, newThis); - - thisArg->SetEarlyNode(newThis); - } - } - compiler->fgInsertStmtAtEnd(elseBlock, newStmt); // Set the original statement to a nop. diff --git a/src/coreclr/jit/jitconfigvalues.h b/src/coreclr/jit/jitconfigvalues.h index 23fb5ee3c9ff5..b4b45976c1c76 100644 --- a/src/coreclr/jit/jitconfigvalues.h +++ b/src/coreclr/jit/jitconfigvalues.h @@ -41,8 +41,8 @@ CONFIG_INTEGER(JitBreakOnBadCode, W("JitBreakOnBadCode"), 0) CONFIG_INTEGER(JitBreakOnMinOpts, W("JITBreakOnMinOpts"), 0) // Halt if jit switches to MinOpts CONFIG_INTEGER(JitBreakOnUnsafeCode, W("JitBreakOnUnsafeCode"), 0) CONFIG_INTEGER(JitCloneLoops, W("JitCloneLoops"), 1) // If 0, don't clone. Otherwise clone loops for optimizations. -CONFIG_INTEGER(JitCloneLoopsWithTypeTests, W("JitCloneLoopsWithTypeTests"), 1) // If 0, don't clone loops based on - // invariant type tests +CONFIG_INTEGER(JitCloneLoopsWithGdvTests, W("JitCloneLoopsWithGdvTests"), 1) // If 0, don't clone loops based on + // invariant type/method address tests CONFIG_INTEGER(JitDebugLogLoopCloning, W("JitDebugLogLoopCloning"), 0) // In debug builds log places where loop cloning // optimizations are performed on the fast path. CONFIG_INTEGER(JitDefaultFill, W("JitDefaultFill"), 0xdd) // In debug builds, initialize the memory allocated by the nra diff --git a/src/coreclr/jit/loopcloning.cpp b/src/coreclr/jit/loopcloning.cpp index 1eb74e95ff6a4..d99e25ac45c1a 100644 --- a/src/coreclr/jit/loopcloning.cpp +++ b/src/coreclr/jit/loopcloning.cpp @@ -131,18 +131,40 @@ GenTree* LC_Ident::ToGenTree(Compiler* comp, BasicBlock* bb) return comp->gtNewIconNode(constant); case Var: return comp->gtNewLclvNode(lclNum, comp->lvaTable[lclNum].lvType); - case ArrLen: - return arrLen.ToGenTree(comp, bb); + case ArrAccess: + return arrAccess.ToGenTree(comp, bb); case Null: return comp->gtNewIconNode(0, TYP_REF); case ClassHandle: return comp->gtNewIconHandleNode((size_t)clsHnd, GTF_ICON_CLASS_HDL); - case Indir: + case IndirOfLocal: { - GenTree* const indir = comp->gtNewIndir(TYP_I_IMPL, comp->gtNewLclvNode(lclNum, TYP_REF)); + GenTree* addr = comp->gtNewLclvNode(lclNum, TYP_REF); + if (indirOffs != 0) + { + addr = comp->gtNewOperNode(GT_ADD, TYP_BYREF, addr, + comp->gtNewIconNode(static_cast(indirOffs), TYP_I_IMPL)); + } + + GenTree* const indir = comp->gtNewIndir(TYP_I_IMPL, addr); indir->gtFlags |= GTF_IND_INVARIANT; return indir; } + case MethodAddr: + { + GenTreeIntCon* methodAddrHandle = comp->gtNewIconHandleNode((size_t)methAddr, GTF_ICON_FTN_ADDR); + INDEBUG(methodAddrHandle->gtTargetHandle = (size_t)targetMethHnd); + return methodAddrHandle; + } + case IndirOfMethodAddrSlot: + { + GenTreeIntCon* slot = comp->gtNewIconHandleNode((size_t)methAddr, GTF_ICON_FTN_ADDR); + INDEBUG(slot->gtTargetHandle = (size_t)targetMethHnd); + GenTree* indir = comp->gtNewIndir(TYP_I_IMPL, slot); + indir->gtFlags |= GTF_IND_NONFAULTING | GTF_IND_INVARIANT; + indir->gtFlags &= ~GTF_EXCEPT; + return indir; + } default: assert(!"Could not convert LC_Ident to GenTree"); unreached(); @@ -937,7 +959,7 @@ void LC_ArrayDeref::DeriveLevelConditions(JitExpandArrayStackPush( - LC_Condition(GT_NE, LC_Expr(LC_Ident(Lcl(), LC_Ident::Var)), LC_Expr(LC_Ident(LC_Ident::Null)))); + LC_Condition(GT_NE, LC_Expr(LC_Ident::CreateVar(Lcl())), LC_Expr(LC_Ident::CreateNull()))); } else { @@ -946,12 +968,13 @@ void LC_ArrayDeref::DeriveLevelConditions(JitExpandArrayStackPush( - LC_Condition(GT_LT, LC_Expr(LC_Ident(Lcl(), LC_Ident::Var)), LC_Expr(LC_Ident(arrLen)))); + LC_Condition(GT_LT, LC_Expr(LC_Ident::CreateVar(Lcl())), LC_Expr(LC_Ident::CreateArrAccess(arrLen)))); // Push condition (a[i] != null) LC_Array arrTmp = array; arrTmp.dim = level; - (*conds)[level * 2]->Push(LC_Condition(GT_NE, LC_Expr(LC_Ident(arrTmp)), LC_Expr(LC_Ident(LC_Ident::Null)))); + (*conds)[level * 2]->Push( + LC_Condition(GT_NE, LC_Expr(LC_Ident::CreateArrAccess(arrTmp)), LC_Expr(LC_Ident::CreateNull()))); } // Invoke on the children recursively. @@ -1073,14 +1096,36 @@ bool Compiler::optDeriveLoopCloningConditions(unsigned loopNum, LoopCloneContext case LcOptInfo::LcTypeTest: { LcTypeTestOptInfo* ttInfo = optInfo->AsLcTypeTestOptInfo(); - LC_Ident objDeref = LC_Ident(ttInfo->lclNum, LC_Ident::Indir); - LC_Ident methodTable = LC_Ident(ttInfo->clsHnd, LC_Ident::ClassHandle); + LC_Ident objDeref = LC_Ident::CreateIndirOfLocal(ttInfo->lclNum, 0); + LC_Ident methodTable = LC_Ident::CreateClassHandle(ttInfo->clsHnd); LC_Condition cond(GT_EQ, LC_Expr(objDeref), LC_Expr(methodTable)); context->EnsureObjDerefs(loopNum)->Push(objDeref); context->EnsureConditions(loopNum)->Push(cond); break; } + case LcOptInfo::LcMethodAddrTest: + { + LcMethodAddrTestOptInfo* test = optInfo->AsLcMethodAddrTestOptInfo(); + LC_Ident objDeref = + LC_Ident::CreateIndirOfLocal(test->delegateLclNum, eeGetEEInfo()->offsetOfDelegateFirstTarget); + LC_Ident methAddr; + if (test->isSlot) + { + methAddr = LC_Ident::CreateIndirMethodAddrSlot(test->methAddr DEBUG_ARG(test->targetMethHnd)); + } + else + { + methAddr = LC_Ident::CreateMethodAddr(test->methAddr DEBUG_ARG(test->targetMethHnd)); + } + + LC_Condition cond(GT_EQ, LC_Expr(objDeref), LC_Expr(methAddr)); + + context->EnsureObjDerefs(loopNum)->Push(objDeref); + context->EnsureConditions(loopNum)->Push(cond); + break; + } + default: JITDUMP("Unknown opt\n"); return false; @@ -1140,7 +1185,7 @@ bool Compiler::optDeriveLoopCloningConditions(unsigned loopNum, LoopCloneContext if (!isIncreasingLoop) { // For decreasing loop, the init value needs to be checked against the array length - ident = LC_Ident(static_cast(loop->lpConstInit), LC_Ident::Const); + ident = LC_Ident::CreateConst(static_cast(loop->lpConstInit)); } } else @@ -1156,14 +1201,13 @@ bool Compiler::optDeriveLoopCloningConditions(unsigned loopNum, LoopCloneContext LC_Condition geZero; if (isIncreasingLoop) { - geZero = - LC_Condition(GT_GE, LC_Expr(LC_Ident(initLcl, LC_Ident::Var)), LC_Expr(LC_Ident(0u, LC_Ident::Const))); + geZero = LC_Condition(GT_GE, LC_Expr(LC_Ident::CreateVar(initLcl)), LC_Expr(LC_Ident::CreateConst(0u))); } else { // For decreasing loop, the init value needs to be checked against the array length - ident = LC_Ident(initLcl, LC_Ident::Var); - geZero = LC_Condition(GT_GE, LC_Expr(ident), LC_Expr(LC_Ident(0u, LC_Ident::Const))); + ident = LC_Ident::CreateVar(initLcl); + geZero = LC_Condition(GT_GE, LC_Expr(ident), LC_Expr(LC_Ident::CreateConst(0u))); } context->EnsureConditions(loopNum)->Push(geZero); } @@ -1181,7 +1225,7 @@ bool Compiler::optDeriveLoopCloningConditions(unsigned loopNum, LoopCloneContext if (isIncreasingLoop) { // For increasing loop, thelimit value needs to be checked against the array length - ident = LC_Ident(static_cast(limit), LC_Ident::Const); + ident = LC_Ident::CreateConst(static_cast(limit)); } } else if (loop->lpFlags & LPFLG_VAR_LIMIT) @@ -1197,13 +1241,12 @@ bool Compiler::optDeriveLoopCloningConditions(unsigned loopNum, LoopCloneContext if (isIncreasingLoop) { // For increasing loop, thelimit value needs to be checked against the array length - ident = LC_Ident(limitLcl, LC_Ident::Var); - geZero = LC_Condition(GT_GE, LC_Expr(ident), LC_Expr(LC_Ident(0u, LC_Ident::Const))); + ident = LC_Ident::CreateVar(limitLcl); + geZero = LC_Condition(GT_GE, LC_Expr(ident), LC_Expr(LC_Ident::CreateConst(0u))); } else { - geZero = - LC_Condition(GT_GE, LC_Expr(LC_Ident(limitLcl, LC_Ident::Var)), LC_Expr(LC_Ident(0u, LC_Ident::Const))); + geZero = LC_Condition(GT_GE, LC_Expr(LC_Ident::CreateVar(limitLcl)), LC_Expr(LC_Ident::CreateConst(0u))); } context->EnsureConditions(loopNum)->Push(geZero); @@ -1216,7 +1259,7 @@ bool Compiler::optDeriveLoopCloningConditions(unsigned loopNum, LoopCloneContext JITDUMP("> ArrLen not matching\n"); return false; } - ident = LC_Ident(LC_Array(LC_Array::Jagged, index, LC_Array::ArrLen)); + ident = LC_Ident::CreateArrAccess(LC_Array(LC_Array::Jagged, index, LC_Array::ArrLen)); // Ensure that this array must be dereference-able, before executing the actual condition. LC_Array array(LC_Array::Jagged, index, LC_Array::None); @@ -1259,7 +1302,7 @@ bool Compiler::optDeriveLoopCloningConditions(unsigned loopNum, LoopCloneContext { LcJaggedArrayOptInfo* arrIndexInfo = optInfo->AsLcJaggedArrayOptInfo(); LC_Array arrLen(LC_Array::Jagged, &arrIndexInfo->arrIndex, arrIndexInfo->dim, LC_Array::ArrLen); - LC_Ident arrLenIdent = LC_Ident(arrLen); + LC_Ident arrLenIdent = LC_Ident::CreateArrAccess(arrLen); LC_Condition cond(opLimitCondition, LC_Expr(ident), LC_Expr(arrLenIdent)); context->EnsureConditions(loopNum)->Push(cond); @@ -1273,7 +1316,7 @@ bool Compiler::optDeriveLoopCloningConditions(unsigned loopNum, LoopCloneContext LcMdArrayOptInfo* mdArrInfo = optInfo->AsLcMdArrayOptInfo(); LC_Array arrLen(LC_Array(LC_Array::MdArray, mdArrInfo->GetArrIndexForDim(getAllocator(CMK_LoopClone)), mdArrInfo->dim, LC_Array::None)); - LC_Ident arrLenIdent = LC_Ident(arrLen); + LC_Ident arrLenIdent = LC_Ident::CreateArrAccess(arrLen); LC_Condition cond(opLimitCondition, LC_Expr(ident), LC_Expr(arrLenIdent)); context->EnsureConditions(loopNum)->Push(cond); @@ -1513,8 +1556,8 @@ bool Compiler::optComputeDerefConditions(unsigned loopNum, LoopCloneContext* con // ObjDeref array has indir(lcl), we want lcl. // LC_Ident& mtIndirIdent = (*objDeref)[i]; - LC_Ident ident(mtIndirIdent.LclNum(), LC_Ident::Var); - (*levelCond)[0]->Push(LC_Condition(GT_NE, LC_Expr(ident), LC_Expr(LC_Ident(LC_Ident::Null)))); + LC_Ident ident = LC_Ident::CreateVar(mtIndirIdent.LclNum()); + (*levelCond)[0]->Push(LC_Condition(GT_NE, LC_Expr(ident), LC_Expr(LC_Ident::CreateNull()))); } } @@ -1634,8 +1677,37 @@ void Compiler::optPerformStaticOptimizations(unsigned loopNum, LoopCloneContext* // TODO-CQ: CLONE: Implement. break; case LcOptInfo::LcTypeTest: - // We could optimize here. For now, let downstream opts clean this up. + case LcOptInfo::LcMethodAddrTest: + { + Statement* stmt; + GenTreeIndir* indir; + + if (optInfo->GetOptType() == LcOptInfo::LcTypeTest) + { + LcTypeTestOptInfo* typeTestInfo = optInfo->AsLcTypeTestOptInfo(); + stmt = typeTestInfo->stmt; + indir = typeTestInfo->methodTableIndir; + } + else + { + LcMethodAddrTestOptInfo* methodTestInfo = optInfo->AsLcMethodAddrTestOptInfo(); + stmt = methodTestInfo->stmt; + indir = methodTestInfo->delegateAddressIndir; + } + + JITDUMP("Updating flags on GDV guard inside hot loop. Before:\n"); + DISPSTMT(stmt); + + indir->gtFlags |= GTF_ORDER_SIDEEFF | GTF_IND_NONFAULTING; + indir->gtFlags &= ~GTF_EXCEPT; + assert(!fgStmtListThreaded); + gtUpdateStmtSideEffects(stmt); + + JITDUMP("After:\n"); + DISPSTMT(stmt); + break; + } default: break; @@ -2718,10 +2790,11 @@ Compiler::fgWalkResult Compiler::optCanOptimizeByLoopCloning(GenTree* tree, Loop return WALK_SKIP_SUBTREES; } - if (info->cloneForTypeTests && tree->OperIs(GT_JTRUE)) + if (info->cloneForGDVTests && tree->OperIs(GT_JTRUE)) { - JITDUMP("...TT considering [%06u]\n", dspTreeID(tree)); - // Look for invariant type tests. + JITDUMP("...GDV considering [%06u]\n", dspTreeID(tree)); + assert(info->stmt->GetRootNode() == tree); + // Look for invariant type/method address tests. // GenTree* const relop = tree->AsOp()->gtGetOp1(); @@ -2735,141 +2808,257 @@ Compiler::fgWalkResult Compiler::optCanOptimizeByLoopCloning(GenTree* tree, Loop GenTree* relopOp1 = relop->AsOp()->gtGetOp1(); GenTree* relopOp2 = relop->AsOp()->gtGetOp2(); - // One side or the other must be an indir - // The other must be a loop invariant. - // Currently, we'll just look for a constant. + // One side or the other must be an indir and the other must be loop + // invariant. Currently, we'll just look for a constant or indir of a + // constant. Start out by normalizing it to the right. // - bool match = false; - if (relopOp1->OperIs(GT_IND) && relopOp2->IsIntegralConst()) - { - match = true; - } - else if (relopOp2->OperIs(GT_IND) && relopOp1->IsIntegralConst()) + if (optIsHandleOrIndirOfHandle(relopOp1, GTF_ICON_CLASS_HDL) || + optIsHandleOrIndirOfHandle(relopOp1, GTF_ICON_FTN_ADDR)) { std::swap(relopOp1, relopOp2); - match = true; } - if (!match) + if (!relopOp1->OperIs(GT_IND) || !relopOp1->TypeIs(TYP_I_IMPL, TYP_REF, TYP_BYREF)) { return WALK_CONTINUE; } - // The indir addr must be loop invariant TYP_REF local - // - GenTree* const indirAddr = relopOp1->AsIndir()->Addr(); + GenTreeIndir* indir = relopOp1->AsIndir(); + GenTree* indirAddr = indir->Addr(); - if (!indirAddr->TypeIs(TYP_REF)) + if (relopOp2->IsIconHandle(GTF_ICON_CLASS_HDL)) { - return WALK_CONTINUE; - } + // The indir addr must be loop invariant TYP_REF local + // - if (!indirAddr->OperIs(GT_LCL_VAR)) - { - return WALK_CONTINUE; - } + if (!indirAddr->TypeIs(TYP_REF)) + { + return WALK_CONTINUE; + } - if (!relopOp2->IsIconHandle(GTF_ICON_CLASS_HDL)) - { - return WALK_CONTINUE; - } + if (!indirAddr->OperIs(GT_LCL_VAR)) + { + return WALK_CONTINUE; + } - GenTreeLclVarCommon* const indirAddrLcl = indirAddr->AsLclVarCommon(); - const unsigned lclNum = indirAddrLcl->GetLclNum(); + GenTreeLclVarCommon* const indirAddrLcl = indirAddr->AsLclVarCommon(); + const unsigned lclNum = indirAddrLcl->GetLclNum(); - JITDUMP("... right form, V%02u\n", lclNum); + JITDUMP("... right form for type test with local V%02u\n", lclNum); - if (!optIsStackLocalInvariant(info->loopNum, lclNum)) - { - JITDUMP("... but not invariant\n"); - return WALK_CONTINUE; - } + if (!optIsStackLocalInvariant(info->loopNum, lclNum)) + { + JITDUMP("... but not invariant\n"); + return WALK_CONTINUE; + } - // Looks like we found an invariant type test. - // - JITDUMP("Loop " FMT_LP " has invariant type test [%06u] on V%02u ... ", info->loopNum, dspTreeID(tree), lclNum); + // Looks like we found an invariant type test. + // + JITDUMP("Loop " FMT_LP " has invariant type test [%06u] on V%02u\n", info->loopNum, dspTreeID(tree), + lclNum); - // We only want this type test to inspire cloning if - // - // (1) we have profile data - // (2) the loop iterates frequently each time the method is called - // (3) the type test is frequently hit during the loop iteration - // (4) the type test is biased and highly likely to succeed - // - const LoopDsc& loopDsc = optLoopTable[info->loopNum]; - BasicBlock* const loopEntry = loopDsc.lpEntry; - BasicBlock* const typeTestBlock = compCurBB; - double const loopFrequency = 0.50; - double const typeTestFrequency = 0.50; - double const typeTestBias = 0.05; - - // Check for (1) - // - if (!loopEntry->hasProfileWeight() || !typeTestBlock->hasProfileWeight()) - { - JITDUMP(" but loop does not have profile data.\n"); - return WALK_CONTINUE; - } + if (optCheckLoopCloningGDVTestProfitable(relop->AsOp(), info)) + { + // Update the loop context. + // + assert(relopOp2->IsIconHandle(GTF_ICON_CLASS_HDL)); + CORINFO_CLASS_HANDLE clsHnd = (CORINFO_CLASS_HANDLE)relopOp2->AsIntConCommon()->IconValue(); - // Check for (2) - // - if (loopEntry->getBBWeight(this) < (loopFrequency * BB_UNITY_WEIGHT)) - { - JITDUMP(" but loop does not iterate often enough.\n"); - return WALK_CONTINUE; + assert(compCurBB->lastStmt() == info->stmt); + info->context->EnsureLoopOptInfo(info->loopNum) + ->Push(new (this, CMK_LoopOpt) LcTypeTestOptInfo(info->stmt, indir, lclNum, clsHnd)); + } } - - // Check for (3) - // - if (typeTestBlock->bbWeight < (typeTestFrequency * loopEntry->bbWeight)) + else if (optIsHandleOrIndirOfHandle(relopOp2, GTF_ICON_FTN_ADDR)) { - JITDUMP(" but type test does not execute often enough within the loop.\n"); - return WALK_CONTINUE; - } + // ▌ JTRUE void + // └──▌ NE int + // ├──▌ CNS_INT(h) long 0x7ffdb1fa4a08 ftn + // └──▌ IND long + // └──▌ ADD byref <- Matching this tree + // ├──▌ LCL_VAR ref V00 arg0 + // └──▌ CNS_INT long 24 - // Check for (4) - // - BasicBlock* const hotSuccessor = relop->OperIs(GT_EQ) ? typeTestBlock->bbJumpDest : typeTestBlock->bbNext; - BasicBlock* const coldSuccessor = relop->OperIs(GT_EQ) ? typeTestBlock->bbNext : typeTestBlock->bbJumpDest; + // We expect indirections of the 'target' fields. Currently we + // support only the simple cases (one target address, i.e. no + // shuffle thunk/unboxing stubs). - if (!hotSuccessor->hasProfileWeight() || !coldSuccessor->hasProfileWeight()) - { - JITDUMP(" but type test successor blocks were not profiled.\n"); - return WALK_CONTINUE; - } + ssize_t offset = 0; + if (indirAddr->OperIs(GT_ADD)) + { + if (!indirAddr->gtGetOp2()->IsCnsIntOrI() || !indirAddr->gtGetOp2()->TypeIs(TYP_I_IMPL) || + indirAddr->gtGetOp2()->IsIconHandle()) + { + return WALK_CONTINUE; + } - if (hotSuccessor->bbWeight == BB_ZERO_WEIGHT) - { - JITDUMP(" but hot successor block " FMT_BB " is rarely run.\n", hotSuccessor->bbNum); - return WALK_CONTINUE; - } + offset = indirAddr->gtGetOp2()->AsIntConCommon()->IconValue(); + indirAddr = indirAddr->gtGetOp1(); + } - if (coldSuccessor->bbWeight > BB_ZERO_WEIGHT) - { - const weight_t bias = coldSuccessor->bbWeight / (hotSuccessor->bbWeight + coldSuccessor->bbWeight); + if (!indirAddr->TypeIs(TYP_REF)) + { + return WALK_CONTINUE; + } - if (bias > typeTestBias) + if (!indirAddr->OperIs(GT_LCL_VAR)) { - JITDUMP(" but type test not sufficiently biased: failure likelihood is " FMT_WT " > " FMT_WT "\n", bias, - typeTestBias); return WALK_CONTINUE; } - } - JITDUMP(" passed profile screening\n"); + if (offset != static_cast(eeGetEEInfo()->offsetOfDelegateFirstTarget)) + { + return WALK_CONTINUE; + } - // Update the loop context. - // - assert(relopOp2->IsIconHandle(GTF_ICON_CLASS_HDL)); - CORINFO_CLASS_HANDLE clsHnd = (CORINFO_CLASS_HANDLE)relopOp2->AsIntConCommon()->IconValue(); + unsigned lclNum = indirAddr->AsLclVarCommon()->GetLclNum(); + + JITDUMP("... right form for method address test with local V%02u\n", lclNum); + + LclVarDsc* dsc = lvaGetDesc(lclNum); + if (dsc->lvClassHnd == NO_CLASS_HANDLE) + { + JITDUMP("... but no class handle available for local\n"); + return WALK_CONTINUE; + } + + unsigned attribs = this->info.compCompHnd->getClassAttribs(dsc->lvClassHnd); + if ((attribs & CORINFO_FLG_DELEGATE) == 0) + { + JITDUMP("... but not a delegate instance\n"); + return WALK_CONTINUE; + } + + if (!optIsStackLocalInvariant(info->loopNum, lclNum)) + { + JITDUMP("... but not invariant\n"); + return WALK_CONTINUE; + } + + JITDUMP("Loop " FMT_LP " has invariant method address test [%06u] on V%02u\n", info->loopNum, + dspTreeID(tree), lclNum); - info->context->EnsureLoopOptInfo(info->loopNum) - ->Push(new (this, CMK_LoopOpt) LcTypeTestOptInfo(lclNum, clsHnd)); + if (optCheckLoopCloningGDVTestProfitable(relop->AsOp(), info)) + { + // Update the loop context. + // + GenTreeIntCon* iconHandle = + relopOp2->IsIconHandle() ? relopOp2->AsIntCon() : relopOp2->AsIndir()->Addr()->AsIntCon(); + assert(iconHandle->IsIconHandle(GTF_ICON_FTN_ADDR)); + assert(compCurBB->lastStmt() == info->stmt); + LcMethodAddrTestOptInfo* optInfo = new (this, CMK_LoopOpt) + LcMethodAddrTestOptInfo(info->stmt, indir, lclNum, (void*)iconHandle->IconValue(), + relopOp2 != iconHandle DEBUG_ARG( + (CORINFO_METHOD_HANDLE)iconHandle->gtTargetHandle)); + info->context->EnsureLoopOptInfo(info->loopNum)->Push(optInfo); + } + } } return WALK_CONTINUE; } +//---------------------------------------------------------------------------- +// optIsHandleOrIndirOfHandle: +// Check if a tree is a specified handle type or indirection of that handle type. +// +// Arguments: +// tree - the tree +// handleType - the type of handle to check for +// +// Returns: +// True if the tree is such a handle. +// +bool Compiler::optIsHandleOrIndirOfHandle(GenTree* tree, GenTreeFlags handleType) +{ + return tree->OperIs(GT_IND) ? tree->AsIndir()->Addr()->IsIconHandle(handleType) : tree->IsIconHandle(handleType); +} + +//---------------------------------------------------------------------------- +// optCheckLoopCloningGDVTestProfitable: +// Check heuristically if doing loop cloning for a GDV test is profitable. +// +// Arguments: +// guard - the GDV test +// info - info for the cloning we are doing +// +// Returns: +// True if cloning is considered profitable. +// +bool Compiler::optCheckLoopCloningGDVTestProfitable(GenTreeOp* guard, LoopCloneVisitorInfo* info) +{ + JITDUMP("Checking whether cloning is profitable ...\n"); + // We only want GDV tests to inspire cloning if + // + // (1) we have profile data + // (2) the loop iterates frequently each time the method is called + // (3) the test is frequently hit during the loop iteration + // (4) the test is biased and highly likely to succeed + // + const LoopDsc& loopDsc = optLoopTable[info->loopNum]; + BasicBlock* const loopEntry = loopDsc.lpEntry; + BasicBlock* const typeTestBlock = compCurBB; + double const loopFrequency = 0.50; + double const typeTestFrequency = 0.50; + double const typeTestBias = 0.05; + + // Check for (1) + // + if (!loopEntry->hasProfileWeight() || !typeTestBlock->hasProfileWeight()) + { + JITDUMP(" No; loop does not have profile data.\n"); + return WALK_CONTINUE; + } + + // Check for (2) + // + if (loopEntry->getBBWeight(this) < (loopFrequency * BB_UNITY_WEIGHT)) + { + JITDUMP(" No; loop does not iterate often enough.\n"); + return WALK_CONTINUE; + } + + // Check for (3) + // + if (typeTestBlock->bbWeight < (typeTestFrequency * loopEntry->bbWeight)) + { + JITDUMP(" No; guard does not execute often enough within the loop.\n"); + return WALK_CONTINUE; + } + + // Check for (4) + // + BasicBlock* const hotSuccessor = guard->OperIs(GT_EQ) ? typeTestBlock->bbJumpDest : typeTestBlock->bbNext; + BasicBlock* const coldSuccessor = guard->OperIs(GT_EQ) ? typeTestBlock->bbNext : typeTestBlock->bbJumpDest; + + if (!hotSuccessor->hasProfileWeight() || !coldSuccessor->hasProfileWeight()) + { + JITDUMP(" No; guard successor blocks were not profiled.\n"); + return WALK_CONTINUE; + } + + if (hotSuccessor->bbWeight == BB_ZERO_WEIGHT) + { + JITDUMP(" No; guard hot successor block " FMT_BB " is rarely run.\n", hotSuccessor->bbNum); + return WALK_CONTINUE; + } + + if (coldSuccessor->bbWeight > BB_ZERO_WEIGHT) + { + const weight_t bias = coldSuccessor->bbWeight / (hotSuccessor->bbWeight + coldSuccessor->bbWeight); + + if (bias > typeTestBias) + { + JITDUMP(" No; guard not sufficiently biased: failure likelihood is " FMT_WT " > " FMT_WT "\n", bias, + typeTestBias); + return WALK_CONTINUE; + } + } + + JITDUMP(" Yes\n"); + return true; +} + /* static */ Compiler::fgWalkResult Compiler::optCanOptimizeByLoopCloningVisitor(GenTree** pTree, Compiler::fgWalkData* data) { @@ -2908,16 +3097,16 @@ bool Compiler::optIdentifyLoopOptInfo(unsigned loopNum, LoopCloneContext* contex } bool shouldCloneForArrayBounds = canCloneForArrayBounds; - bool shouldCloneForTypeTests = canCloneForTypeTests; + bool shouldCloneForGdvTests = canCloneForTypeTests; #ifdef DEBUG - shouldCloneForTypeTests &= JitConfig.JitCloneLoopsWithTypeTests() != 0; + shouldCloneForGdvTests &= JitConfig.JitCloneLoopsWithGdvTests() != 0; #endif JITDUMP("Checking loop " FMT_LP " for optimization candidates%s%s\n", loopNum, - shouldCloneForArrayBounds ? " (array bounds)" : "", shouldCloneForTypeTests ? " (type tests)" : ""); + shouldCloneForArrayBounds ? " (array bounds)" : "", shouldCloneForGdvTests ? " (GDV tests)" : ""); - LoopCloneVisitorInfo info(context, loopNum, nullptr, shouldCloneForArrayBounds, shouldCloneForTypeTests); + LoopCloneVisitorInfo info(context, loopNum, nullptr, shouldCloneForArrayBounds, shouldCloneForGdvTests); for (BasicBlock* const block : loop.LoopBlocks()) { compCurBB = block; diff --git a/src/coreclr/jit/loopcloning.h b/src/coreclr/jit/loopcloning.h index 103454a89ce95..9ab83531d0f52 100644 --- a/src/coreclr/jit/loopcloning.h +++ b/src/coreclr/jit/loopcloning.h @@ -320,13 +320,47 @@ struct LcJaggedArrayOptInfo : public LcOptInfo // struct LcTypeTestOptInfo : public LcOptInfo { + // statement where the opportunity occurs + Statement* stmt; + // indir for the method table + GenTreeIndir* methodTableIndir; // local whose method table is tested unsigned lclNum; // handle being tested for CORINFO_CLASS_HANDLE clsHnd; - LcTypeTestOptInfo(unsigned lclNum, CORINFO_CLASS_HANDLE clsHnd) - : LcOptInfo(LcTypeTest), lclNum(lclNum), clsHnd(clsHnd) + LcTypeTestOptInfo(Statement* stmt, GenTreeIndir* methodTableIndir, unsigned lclNum, CORINFO_CLASS_HANDLE clsHnd) + : LcOptInfo(LcTypeTest), stmt(stmt), methodTableIndir(methodTableIndir), lclNum(lclNum), clsHnd(clsHnd) + { + } +}; + +struct LcMethodAddrTestOptInfo : public LcOptInfo +{ + // statement where the opportunity occurs + Statement* stmt; + // indir on the delegate + GenTreeIndir* delegateAddressIndir; + // Invariant local whose target field(s) are tested + unsigned delegateLclNum; + // Invariant tree representing method address on the other side of the test + void* methAddr; + bool isSlot; +#ifdef DEBUG + CORINFO_METHOD_HANDLE targetMethHnd; +#endif + + LcMethodAddrTestOptInfo(Statement* stmt, + GenTreeIndir* delegateAddressIndir, + unsigned delegateLclNum, + void* methAddr, + bool isSlot DEBUG_ARG(CORINFO_METHOD_HANDLE targetMethHnd)) + : LcOptInfo(LcMethodAddrTest) + , stmt(stmt) + , delegateAddressIndir(delegateAddressIndir) + , delegateLclNum(delegateLclNum) + , methAddr(methAddr) + , isSlot(isSlot) DEBUG_ARG(targetMethHnd(targetMethHnd)) { } }; @@ -423,9 +457,7 @@ struct LC_Array }; //------------------------------------------------------------------------ -// LC_Ident: symbolic representation of either a constant like 1 or 2, -// or a variable like V02 or V03, or an "LC_Array", or the null constant, -// or a class handle, or an indir of a variable like *V02. +// LC_Ident: symbolic representation of "a value" // struct LC_Ident { @@ -434,24 +466,45 @@ struct LC_Ident Invalid, Const, Var, - ArrLen, + ArrAccess, Null, ClassHandle, - Indir, + IndirOfLocal, + MethodAddr, + IndirOfMethodAddrSlot, }; private: union { - unsigned constant; - unsigned lclNum; - LC_Array arrLen; + unsigned constant; + struct + { + unsigned lclNum; + unsigned indirOffs; + }; + LC_Array arrAccess; CORINFO_CLASS_HANDLE clsHnd; + struct + { + void* methAddr; +#ifdef DEBUG + CORINFO_METHOD_HANDLE targetMethHnd; // for nice disassembly +#endif + }; }; + LC_Ident(IdentType type) : type(type) + { + } + public: // The type of this object IdentType type; + LC_Ident() : type(Invalid) + { + } + // Equality operator bool operator==(const LC_Ident& that) const { @@ -467,12 +520,17 @@ struct LC_Ident case ClassHandle: return (clsHnd == that.clsHnd); case Var: - case Indir: return (lclNum == that.lclNum); - case ArrLen: - return (arrLen == that.arrLen); + case IndirOfLocal: + return (lclNum == that.lclNum) && (indirOffs == that.indirOffs); + case ArrAccess: + return (arrAccess == that.arrAccess); case Null: return true; + case MethodAddr: + return (methAddr == that.methAddr); + case IndirOfMethodAddrSlot: + return (methAddr == that.methAddr); default: assert(!"Unknown LC_Ident type"); unreached(); @@ -481,7 +539,7 @@ struct LC_Ident unsigned LclNum() const { - assert((type == Var) || (type == Indir)); + assert((type == Var) || (type == IndirOfLocal)); return lclNum; } @@ -496,18 +554,31 @@ struct LC_Ident case Var: printf("V%02u", lclNum); break; - case Indir: - printf("*V%02u", lclNum); + case IndirOfLocal: + if (indirOffs != 0) + { + printf("*(V%02u + %u)", lclNum, indirOffs); + } + else + { + printf("*V%02u", lclNum); + } break; case ClassHandle: printf("%p", clsHnd); break; - case ArrLen: - arrLen.Print(); + case ArrAccess: + arrAccess.Print(); break; case Null: printf("null"); break; + case MethodAddr: + printf("%p", methAddr); + break; + case IndirOfMethodAddrSlot: + printf("[%p]", methAddr); + break; default: printf("INVALID"); break; @@ -515,49 +586,65 @@ struct LC_Ident } #endif - LC_Ident() : type(Invalid) + // Convert this symbolic representation into a tree node. + GenTree* ToGenTree(Compiler* comp, BasicBlock* bb); + + static LC_Ident CreateVar(unsigned lclNum) { + LC_Ident id(Var); + id.lclNum = lclNum; + return id; } - explicit LC_Ident(unsigned val, IdentType type) : type(type) + static LC_Ident CreateIndirOfLocal(unsigned lclNum, unsigned offs) { - if (type == Const) - { - constant = val; - } - else if ((type == Var) || (type == Indir)) - { - lclNum = val; - } - else - { - unreached(); - } + LC_Ident id(IndirOfLocal); + id.lclNum = lclNum; + id.indirOffs = offs; + return id; } - explicit LC_Ident(CORINFO_CLASS_HANDLE val, IdentType type) : type(type) + static LC_Ident CreateConst(unsigned value) { - if (type == ClassHandle) - { - clsHnd = val; - } - else - { - unreached(); - } + LC_Ident id(Const); + id.constant = value; + return id; } - explicit LC_Ident(IdentType type) : type(type) + static LC_Ident CreateArrAccess(const LC_Array& arrLen) { - assert(type == Null); + LC_Ident id(ArrAccess); + id.arrAccess = arrLen; + return id; } - explicit LC_Ident(const LC_Array& arrLen) : arrLen(arrLen), type(ArrLen) + static LC_Ident CreateNull() { + return LC_Ident(Null); } - // Convert this symbolic representation into a tree node. - GenTree* ToGenTree(Compiler* comp, BasicBlock* bb); + static LC_Ident CreateClassHandle(CORINFO_CLASS_HANDLE clsHnd) + { + LC_Ident id(ClassHandle); + id.clsHnd = clsHnd; + return id; + } + + static LC_Ident CreateMethodAddr(void* methAddr DEBUG_ARG(CORINFO_METHOD_HANDLE methHnd)) + { + LC_Ident id(MethodAddr); + id.methAddr = methAddr; + INDEBUG(id.targetMethHnd = methHnd); + return id; + } + + static LC_Ident CreateIndirMethodAddrSlot(void* methAddrSlot DEBUG_ARG(CORINFO_METHOD_HANDLE methHnd)) + { + LC_Ident id(IndirOfMethodAddrSlot); + id.methAddr = methAddrSlot; + INDEBUG(id.targetMethHnd = methHnd); + return id; + } }; /** diff --git a/src/coreclr/jit/loopcloningopts.h b/src/coreclr/jit/loopcloningopts.h index 2df5e7baf63b5..2fb13937e2f86 100644 --- a/src/coreclr/jit/loopcloningopts.h +++ b/src/coreclr/jit/loopcloningopts.h @@ -12,5 +12,6 @@ LC_OPT(LcMdArray) LC_OPT(LcJaggedArray) LC_OPT(LcTypeTest) +LC_OPT(LcMethodAddrTest) #undef LC_OPT