From 13f0f6a29609ae3b2d84375f2695ae7ee2e7e723 Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Thu, 27 Feb 2020 15:49:00 -0800 Subject: [PATCH 1/9] JIT: enable tail calls and copy omission for implicit byref structs Handle cases where a caller passes an implicit byref argument struct to a callee in tail position. There is no need to create a local copy or to block tail calling. This pattern comes up increasingly often as we write more layered code operating on spans and similar structs. --- src/coreclr/src/jit/gentree.cpp | 49 +++++ src/coreclr/src/jit/gentree.h | 4 + src/coreclr/src/jit/lclmorph.cpp | 61 +++++- src/coreclr/src/jit/morph.cpp | 152 +++++++++------ .../opt/Tailcall/ImplicitByrefTailCalls.cs | 175 ++++++++++++++++++ .../Tailcall/ImplicitByrefTailCalls.csproj | 12 ++ 6 files changed, 396 insertions(+), 57 deletions(-) create mode 100644 src/coreclr/tests/src/JIT/opt/Tailcall/ImplicitByrefTailCalls.cs create mode 100644 src/coreclr/tests/src/JIT/opt/Tailcall/ImplicitByrefTailCalls.csproj diff --git a/src/coreclr/src/jit/gentree.cpp b/src/coreclr/src/jit/gentree.cpp index 9c78cc1bc1dea..94565b11873c5 100644 --- a/src/coreclr/src/jit/gentree.cpp +++ b/src/coreclr/src/jit/gentree.cpp @@ -16193,6 +16193,55 @@ bool GenTree::IsLocalAddrExpr(Compiler* comp, GenTreeLclVarCommon** pLclVarTree, return false; } +//------------------------------------------------------------------------ +// IsImplicitByrefParameterValue: determine if this tree is the entire +// value of a local implicit byref parameter +// +// Arguments: +// compiler -- compiler instance +// +// Return Value: +// GenTreeLclVarCommon node for the local, or nullptr. + +GenTreeLclVarCommon* GenTree::IsImplicitByrefParameterValue(Compiler* compiler) +{ +#if defined(TARGET_AMD64) || defined(TARGET_ARM64) + + GenTreeLclVarCommon* lcl = nullptr; + + if (OperIs(GT_LCL_VAR)) + { + lcl = AsLclVarCommon(); + } + else if (OperIs(GT_OBJ)) + { + GenTree* addr = AsIndir()->Addr(); + + if (addr->OperIs(GT_LCL_VAR)) + { + lcl = addr->AsLclVarCommon(); + } + else if (addr->OperIs(GT_ADDR)) + { + GenTree* base = addr->AsOp()->gtOp1; + + if (base->OperIs(GT_LCL_VAR)) + { + lcl = base->AsLclVarCommon(); + } + } + } + + if ((lcl != nullptr) && compiler->lvaIsImplicitByRefLocal(lcl->GetLclNum())) + { + return lcl; + } + +#endif // defined(TARGET_AMD64) || defined(TARGET_ARM64) + + return nullptr; +} + //------------------------------------------------------------------------ // IsLclVarUpdateTree: Determine whether this is an assignment tree of the // form Vn = Vn 'oper' 'otherTree' where Vn is a lclVar diff --git a/src/coreclr/src/jit/gentree.h b/src/coreclr/src/jit/gentree.h index 32c81b4630dc6..3dcf4148420a3 100644 --- a/src/coreclr/src/jit/gentree.h +++ b/src/coreclr/src/jit/gentree.h @@ -1841,6 +1841,10 @@ struct GenTree // yields an address into a local GenTreeLclVarCommon* IsLocalAddrExpr(); + // Determine if this tree represents the value of an entire implict byref parameter, + // and if so return the tree for the parameter. + GenTreeLclVarCommon* IsImplicitByrefParameterValue(Compiler* compiler); + // Determine if this is a LclVarCommon node and return some additional info about it in the // two out parameters. bool IsLocalExpr(Compiler* comp, GenTreeLclVarCommon** pLclVarTree, FieldSeqNode** pFldSeq); diff --git a/src/coreclr/src/jit/lclmorph.cpp b/src/coreclr/src/jit/lclmorph.cpp index 546a5d453ed6e..d24efccc7e234 100644 --- a/src/coreclr/src/jit/lclmorph.cpp +++ b/src/coreclr/src/jit/lclmorph.cpp @@ -1097,10 +1097,67 @@ class LocalAddressVisitor final : public GenTreeVisitor { return; } + LclVarDsc* varDsc = m_compiler->lvaGetDesc(lclNum); - JITDUMP("LocalAddressVisitor incrementing ref count from %d to %d for V%02d\n", varDsc->lvRefCnt(RCS_EARLY), - varDsc->lvRefCnt(RCS_EARLY) + 1, lclNum); + JITDUMP("LocalAddressVisitor incrementing ref count from %d to %d for implict by-ref V%02d\n", + varDsc->lvRefCnt(RCS_EARLY), varDsc->lvRefCnt(RCS_EARLY) + 1, lclNum); varDsc->incLvRefCnt(1, RCS_EARLY); + + // See if this struct is an argument to a call. This information is recorded + // via the weighted early ref count for the local, and feeds the undo promition + // heuristic. + // + // It can be approximate, so the pattern match below need not be exhaustive. + // But the pattern should at least subset the implicit byref cases that are + // handed in fgCanFastTailCall and fgMakeOutgoingStructArgCopy. + // + // CALL(OBJ(ADDR(LCL_VAR...))) + bool isArgToCall = false; + bool keepSearching = true; + for (int i = 0; i < m_ancestors.Height() && keepSearching; i++) + { + GenTree* node = m_ancestors.Top(i); + switch (i) + { + case 0: + { + keepSearching = node->OperIs(GT_LCL_VAR); + } + break; + + case 1: + { + keepSearching = node->OperIs(GT_ADDR); + } + break; + + case 2: + { + keepSearching = node->OperIs(GT_OBJ); + } + break; + + case 3: + { + keepSearching = false; + isArgToCall = node->IsCall(); + } + break; + default: + { + keepSearching = false; + } + break; + } + } + + if (isArgToCall) + { + JITDUMP("LocalAddressVisitor incrementing weighted ref count from %d to %d" + " for implict by-ref V%02d arg passed to call\n", + varDsc->lvRefCntWtd(RCS_EARLY), varDsc->lvRefCntWtd(RCS_EARLY) + 1, lclNum); + varDsc->incLvRefCntWtd(1, RCS_EARLY); + } } }; diff --git a/src/coreclr/src/jit/morph.cpp b/src/coreclr/src/jit/morph.cpp index 2b56413f955de..c54835709e5b4 100644 --- a/src/coreclr/src/jit/morph.cpp +++ b/src/coreclr/src/jit/morph.cpp @@ -4796,46 +4796,49 @@ void Compiler::fgMakeOutgoingStructArgCopy(GenTreeCall* call, // // We don't need a copy if this is the last use of an implicit by-ref local. // - // We can't determine that all of the time, but if there is only - // one use and the method has no loops, then this use must be the last. if (opts.OptimizationEnabled()) { - GenTreeLclVarCommon* lcl = nullptr; - - if (argx->OperIsLocal()) - { - lcl = argx->AsLclVarCommon(); - } - else if ((argx->OperGet() == GT_OBJ) && argx->AsIndir()->Addr()->OperIsLocal()) - { - lcl = argx->AsObj()->Addr()->AsLclVarCommon(); - } + GenTreeLclVarCommon* const lcl = argx->IsImplicitByrefParameterValue(this); if (lcl != nullptr) { - unsigned varNum = lcl->AsLclVarCommon()->GetLclNum(); - if (lvaIsImplicitByRefLocal(varNum)) - { - LclVarDsc* varDsc = &lvaTable[varNum]; - // JIT_TailCall helper has an implicit assumption that all tail call arguments live - // on the caller's frame. If an argument lives on the caller caller's frame, it may get - // overwritten if that frame is reused for the tail call. Therefore, we should always copy - // struct parameters if they are passed as arguments to a tail call. - if (!call->IsTailCallViaHelper() && (varDsc->lvRefCnt(RCS_EARLY) == 1) && !fgMightHaveLoop()) - { - assert(!call->IsTailCall()); + unsigned varNum = lcl->AsLclVarCommon()->GetLclNum(); + LclVarDsc* varDsc = lvaGetDesc(varNum); + const unsigned short totalAppearances = varDsc->lvRefCnt(RCS_EARLY); - varDsc->setLvRefCnt(0, RCS_EARLY); - args->SetNode(lcl); - assert(argEntry->GetNode() == lcl); + // We don't have liveness so we rely on other indications of last use. + // + // We handle these cases: + // + // * (must not copy) If the call is a tail call, the use is a last use. + // We must skip the copy if we have a fast tail call. + // + // * (must copy) However the current slow tail call helper always copies + // the tail call args from the current frame, so we must copy + // if the tail call is a slow tail call. + // + // * (may not copy) if the call is noreturn, the use is a last use. + // + // * (may not copy) if there is exactly one use of the local in the method, + // and the call is not in loop, this is a last use. + // + const bool isTailCallLastUse = call->IsTailCall(); + const bool isCallLastUse = (totalAppearances == 1) && !fgMightHaveLoop(); + const bool isNoReturnLastUse = call->IsNoReturn(); + if (!call->IsTailCallViaHelper() && (isTailCallLastUse || isCallLastUse || isNoReturnLastUse)) + { + varDsc->setLvRefCnt(0, RCS_EARLY); + args->SetNode(lcl); + assert(argEntry->GetNode() == lcl); - JITDUMP("did not have to make outgoing copy for V%2d", varNum); - return; - } + JITDUMP("did not need to make outgoing copy for last use of implicit byref V%2d\n", varNum); + return; } } } + JITDUMP("making an outgoing copy for struct arg\n"); + if (fgOutgoingArgTemps == nullptr) { fgOutgoingArgTemps = hashBv::Create(this); @@ -6550,7 +6553,7 @@ void Compiler::fgMorphCallInlineHelper(GenTreeCall* call, InlineResult* result) // // Windows Amd64: // A fast tail call can be made whenever the number of callee arguments -// is larger than or equal to the number of caller arguments, or we have four +// is less than or equal to the number of caller arguments, or we have four // or fewer callee arguments. This is because, on Windows AMD64, each // argument uses exactly one register or one 8-byte stack slot. Thus, we only // need to count arguments, and not be concerned with the size of each @@ -6688,12 +6691,30 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee, const char** failReason) if (arg->isStruct) { - // Byref struct arguments are not allowed to fast tail call as the information - // of the caller's stack is lost when the callee is compiled. if (arg->passedByRef) { - hasByrefParameter = true; - break; + // If we're optimizing, we may be able to pass our caller's byref to our callee, + // and so still be able to tail call. + GenTreeLclVarCommon* const lcl = arg->GetNode()->IsImplicitByrefParameterValue(this); + + // For this to work we must not have promoted the parameter; if we've promoted + // then we'll make a local struct at the call. + // + // We could handle this if there was some way to copy back to the original struct. + if (opts.OptimizationEnabled() && (lcl != nullptr) && !lvaGetDesc(lcl)->lvPromoted) + { + // We can still tail call in this case, provided we + // suppress the copy in fgMakeOutgoingStructArgCopy + JITDUMP("Arg [%06u] is unpromoted implicit byref V%02u\n", dspTreeID(arg->GetNode()), + lcl->GetLclNum()); + } + else + { + // We can't tail call in this case; note the reason why + // and skip looking at the rest of the args. + hasByrefParameter = true; + break; + } } } } @@ -6748,17 +6769,18 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee, const char** failReason) #endif // DEBUG }; -// Note on vararg methods: -// If the caller is vararg method, we don't know the number of arguments passed by caller's caller. -// But we can be sure that in-coming arg area of vararg caller would be sufficient to hold its -// fixed args. Therefore, we can allow a vararg method to fast tail call other methods as long as -// out-going area required for callee is bounded by caller's fixed argument space. -// -// Note that callee being a vararg method is not a problem since we can account the params being passed. -// -// We will currently decide to not fast tail call on Windows armarch if the caller or callee is a vararg -// method. This is due to the ABI differences for native vararg methods for these platforms. There is -// work required to shuffle arguments to the correct locations. + // Note on vararg methods: + // If the caller is vararg method, we don't know the number of arguments passed by caller's caller. + // But we can be sure that in-coming arg area of vararg caller would be sufficient to hold its + // fixed args. Therefore, we can allow a vararg method to fast tail call other methods as long as + // out-going area required for callee is bounded by caller's fixed argument space. + // + // Note that callee being a vararg method is not a problem since we can account the params being passed. + // + // We will currently decide to not fast tail call on Windows armarch if the caller or callee is a vararg + // method. This is due to the ABI differences for native vararg methods for these platforms. There is + // work required to shuffle arguments to the correct locations. + CLANG_FORMAT_COMMENT_ANCHOR; #if (defined(TARGET_WINDOWS) && defined(TARGET_ARM)) || (defined(TARGET_WINDOWS) && defined(TARGET_ARM64)) if (info.compIsVarArgs || callee->IsVarargs()) @@ -6836,13 +6858,18 @@ GenTree* Compiler::fgMorphPotentialTailCall(GenTreeCall* call) // It cannot be an inline candidate assert(!call->IsInlineCandidate()); - auto failTailCall = [&](const char* reason) { + auto failTailCall = [&](const char* reason, unsigned lclNum = BAD_VAR_NUM) { #ifdef DEBUG if (verbose) { printf("\nRejecting tail call in morph for call "); printTreeID(call); - printf(": %s\n", reason); + printf(": %s", reason); + if (lclNum != BAD_VAR_NUM) + { + printf(" V%02u", lclNum); + } + printf("\n"); } #endif @@ -6946,9 +6973,9 @@ GenTree* Compiler::fgMorphPotentialTailCall(GenTreeCall* call) // if (call->IsImplicitTailCall()) { - if (varDsc->lvHasLdAddrOp) + if (varDsc->lvHasLdAddrOp && !lvaIsImplicitByRefLocal(varNum)) { - failTailCall("Local address taken"); + failTailCall("Local address taken", varNum); return nullptr; } if (varDsc->lvAddrExposed) @@ -6971,20 +6998,20 @@ GenTree* Compiler::fgMorphPotentialTailCall(GenTreeCall* call) } else { - failTailCall("Local address taken"); + failTailCall("Local address taken", varNum); return nullptr; } } if (varDsc->lvPromoted && varDsc->lvIsParam && !lvaIsImplicitByRefLocal(varNum)) { - failTailCall("Has Struct Promoted Param"); + failTailCall("Has Struct Promoted Param", varNum); return nullptr; } if (varDsc->lvPinned) { // A tail call removes the method from the stack, which means the pinning // goes away for the callee. We can't allow that. - failTailCall("Has Pinned Vars"); + failTailCall("Has Pinned Vars", varNum); return nullptr; } } @@ -16828,8 +16855,23 @@ void Compiler::fgRetypeImplicitByRefArgs() // through the pointer parameter, the same as we'd do for this // parameter if it weren't promoted at all (otherwise the initialization // of the new temp would just be a needless memcpy at method entry). - bool undoPromotion = (lvaGetPromotionType(newVarDsc) == PROMOTION_TYPE_DEPENDENT) || - (varDsc->lvRefCnt(RCS_EARLY) <= varDsc->lvFieldCnt); + // + // Otherwise, see how many appearances there are. We keep two early ref counts: total + // number of references to the struct or some field, and how many of these are + // arguments to calls. We undo promotion unless we see enougn non-call uses. + // + const unsigned totalAppearances = varDsc->lvRefCnt(RCS_EARLY); + const unsigned callAppearances = varDsc->lvRefCntWtd(RCS_EARLY); + assert(totalAppearances >= callAppearances); + const unsigned nonCallAppearances = totalAppearances - callAppearances; + + bool undoPromotion = ((lvaGetPromotionType(newVarDsc) == PROMOTION_TYPE_DEPENDENT) || + (nonCallAppearances <= varDsc->lvFieldCnt)); + + JITDUMP("%s promotion of implicit by-ref V%02u: %s total: %u non-call: %u fields: %u\n", + undoPromotion ? "Undoing" : "Keeping", lclNum, + (lvaGetPromotionType(newVarDsc) == PROMOTION_TYPE_DEPENDENT) ? "dependent;" : "", + totalAppearances, nonCallAppearances, varDsc->lvFieldCnt); if (!undoPromotion) { @@ -16863,7 +16905,7 @@ void Compiler::fgRetypeImplicitByRefArgs() { // Set the new parent. fieldVarDsc->lvParentLcl = newLclNum; - // Clear the ref count field; it is used to communicate the nubmer of references + // Clear the ref count field; it is used to communicate the number of references // to the implicit byref parameter when morphing calls that pass the implicit byref // out as an outgoing argument value, but that doesn't pertain to this field local // which is now a field of a non-arg local. diff --git a/src/coreclr/tests/src/JIT/opt/Tailcall/ImplicitByrefTailCalls.cs b/src/coreclr/tests/src/JIT/opt/Tailcall/ImplicitByrefTailCalls.cs new file mode 100644 index 0000000000000..6283f24098dc5 --- /dev/null +++ b/src/coreclr/tests/src/JIT/opt/Tailcall/ImplicitByrefTailCalls.cs @@ -0,0 +1,175 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System; +using System.Runtime.CompilerServices; + +// Tail calls with implicit byref parameters as arguments. +// +// Generally, we can tail call provided we don't copy the +// argument to the local frame. + +public class ImplicitByrefTailCalls +{ + public static void Z() {} + public static bool Z(bool b) => b; + + public static int ZZ(Span x) + { + int result = 0; + for (int i = 0; i < x.Length; i++) + { + result += x[i] * x[i] + i; + } + return result; + } + + public static int ZZZ(Span x, int a, int b, int c, int d, int e, int f) + { + Z(); Z(); + Z(); Z(); + int result = 0; + for (int i = 0; i < x.Length; i++) + { + result += x[i] * x[i] + i; + } + return result; + } + + // Various methods that should fast tail call + + public static int A(Span x) + { + Z(); Z(); return ZZ(x); + } + + public static int B(Span x) + { + Z(); Z(); return A(x); + } + + public static int C(Span x, bool p) + { + Z(); Z(); + if (Z(p)) + { + return A(x); + } + else + { + return B(x); + } + } + + public static int D(Span x, bool p) + { + Z(); Z(); + return Z(p) ? A(x) : B(x); + } + + public static int E(Span x, Span y, bool p) + { + Z(); Z(); Z(); Z(); + if (Z(p)) + { + return A(x); + } + else + { + return B(y); + } + } + + public static int F(Span x, Span y, bool p) + { + Z(); Z(); Z(); Z(); + return Z(p) ? ZZ(x) : ZZ(y); + } + + // Methods with tail call sites and other uses + + public static int G(Span x, bool p) + { + Z(); Z(); + if (Z(p)) + { + return ZZ(x); + } + else + { + return x.Length; + } + } + + // Here there are enough actual uses to + // justify promotion, hence we can't tail + // call, as "undoing" promotion at the end + // entails making a local copy. + // + // We could handle this by writing the updated + // struct values back to the original byref + // before tail calling.... + public static int H(Span x, int y) + { + Z(); Z(); + if (y < x.Length) + { + int result = 0; + for (int i = 0; i < y; i++) + { + result += x[i]; + } + return result; + } + + return ZZ(x); + } + + // Here the call to ZZZ would need to be a slow tail call, + // so we won't tail call at all. But the only + // reference to x is at the call, and it's not in + // a loop, so we can still can avoid making a copy. + public static int S(Span x) + { + Z(); Z(); + return ZZZ(x, 1, 2, 3, 4, 5, 6); + } + + // Must copy at the normal call site + // but can avoid at tail call site + public static int T(Span x, ref int q) + { + Z(); Z(); + q = ZZ(x); + return ZZ(x); + } + + static bool p; + + public static int Main() + { + int[] a = new int[100]; + a[45] = 55; + a[55] = 45; + p = false; + + Span s = new Span(a); + int q = 0; + + int ra = A(s); + int rb = B(s); + int rc = C(s, p); + int rd = D(s, p); + int re = E(s, s, p); + int rf = F(s, s, p); + int rg = G(s, p); + int rh = H(s, 46); + int rs = S(s); + int rt = T(s, ref q); + + Console.WriteLine($"{ra},{rb},{rc},{rd},{re},{rf},{rg},{rh},{rs},{rt}"); + + return ra + rb + rc + rd + re + rf + rg + rh + rs + rt - 80055; + } +} diff --git a/src/coreclr/tests/src/JIT/opt/Tailcall/ImplicitByrefTailCalls.csproj b/src/coreclr/tests/src/JIT/opt/Tailcall/ImplicitByrefTailCalls.csproj new file mode 100644 index 0000000000000..f3e1cbd44b404 --- /dev/null +++ b/src/coreclr/tests/src/JIT/opt/Tailcall/ImplicitByrefTailCalls.csproj @@ -0,0 +1,12 @@ + + + Exe + + + None + True + + + + + From 364a302c4d212b40e1d9fe3be8152f5780c8af95 Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Sat, 29 Feb 2020 07:50:36 -0800 Subject: [PATCH 2/9] test case with potential aliasing --- .../ImplicitByrefTailCallsAliasing.cs | 79 +++++++++++++++++++ .../ImplicitByrefTailCallsAliasing.csproj | 12 +++ 2 files changed, 91 insertions(+) create mode 100644 src/coreclr/tests/src/JIT/opt/Tailcall/ImplicitByrefTailCallsAliasing.cs create mode 100644 src/coreclr/tests/src/JIT/opt/Tailcall/ImplicitByrefTailCallsAliasing.csproj diff --git a/src/coreclr/tests/src/JIT/opt/Tailcall/ImplicitByrefTailCallsAliasing.cs b/src/coreclr/tests/src/JIT/opt/Tailcall/ImplicitByrefTailCallsAliasing.cs new file mode 100644 index 0000000000000..874b6a2fd3f8f --- /dev/null +++ b/src/coreclr/tests/src/JIT/opt/Tailcall/ImplicitByrefTailCallsAliasing.cs @@ -0,0 +1,79 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. +// See the LICENSE file in the project root for more information. + +using System; +using System.Runtime.CompilerServices; + +public struct S +{ + public long x; + public long y; +} + +// Tail calls with implicit byref parameters as arguments. +// +// We need to ensure that we don't introduce aliased +// implicit byref parameters by optimizing away copies. + +public class ImplicitByrefTailCalls +{ + public static void Z() {} + + // Will return different answers if x and y refer to the same struct. + public static long Alias(S x, S y) + { + Z(); Z(); Z(); Z(); + y.x++; + long result = 0; + for (int i = 0; i < 100; i++) + { + x.x++; + result += x.x + x.y; + } + return result; + } + + // Will return different answers if y refers to some part of x. + public static unsafe long Alias2(S x, long* y) + { + Z(); Z(); Z(); Z(); + *y += 1; + long result = 0; + for (int i = 0; i < 100; i++) + { + x.x++; + result += x.x + x.y; + } + return result; + } + + // A must copy params locally when calling Alias + // and so can't tail call + public static long A(S x) + { + Z(); Z(); Z(); Z(); + return Alias(x, x); + } + + // B must copy params locally when calling Alias2 + // and so can't tail call + public static unsafe long B(S x) + { + Z(); Z(); Z(); Z(); + return Alias2(x, &x.y); + } + + public static int Main() + { + S s = new S(); + s.x = 1; + s.y = 2; + long ra = A(s); + long rb = B(s); + + Console.WriteLine($"{ra},{rb}"); + + return (ra == 5350) && (rb == 5350) ? 100 : -1; + } +} diff --git a/src/coreclr/tests/src/JIT/opt/Tailcall/ImplicitByrefTailCallsAliasing.csproj b/src/coreclr/tests/src/JIT/opt/Tailcall/ImplicitByrefTailCallsAliasing.csproj new file mode 100644 index 0000000000000..f3e1cbd44b404 --- /dev/null +++ b/src/coreclr/tests/src/JIT/opt/Tailcall/ImplicitByrefTailCallsAliasing.csproj @@ -0,0 +1,12 @@ + + + Exe + + + None + True + + + + + From 5a34292ebbe5e699f97f11c29c8c1911059cbcf5 Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Sat, 29 Feb 2020 14:04:34 -0800 Subject: [PATCH 3/9] extend test case; add safety analysis --- src/coreclr/src/jit/morph.cpp | 183 ++++++++++++++++-- .../ImplicitByrefTailCallsAliasing.cs | 109 ++++++++++- .../ImplicitByrefTailCallsAliasing.csproj | 1 + 3 files changed, 268 insertions(+), 25 deletions(-) diff --git a/src/coreclr/src/jit/morph.cpp b/src/coreclr/src/jit/morph.cpp index c54835709e5b4..2068a8cc3ad25 100644 --- a/src/coreclr/src/jit/morph.cpp +++ b/src/coreclr/src/jit/morph.cpp @@ -6693,26 +6693,177 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee, const char** failReason) { if (arg->passedByRef) { + // Generally a byref arg will block tail calling, as we have to + // make a local copy of the struct for the callee. + hasByrefParameter = true; + // If we're optimizing, we may be able to pass our caller's byref to our callee, // and so still be able to tail call. - GenTreeLclVarCommon* const lcl = arg->GetNode()->IsImplicitByrefParameterValue(this); - - // For this to work we must not have promoted the parameter; if we've promoted - // then we'll make a local struct at the call. - // - // We could handle this if there was some way to copy back to the original struct. - if (opts.OptimizationEnabled() && (lcl != nullptr) && !lvaGetDesc(lcl)->lvPromoted) + if (opts.OptimizationEnabled()) { - // We can still tail call in this case, provided we - // suppress the copy in fgMakeOutgoingStructArgCopy - JITDUMP("Arg [%06u] is unpromoted implicit byref V%02u\n", dspTreeID(arg->GetNode()), - lcl->GetLclNum()); + // First, see if this arg is a byref param. + GenTreeLclVarCommon* const lcl = arg->GetNode()->IsImplicitByrefParameterValue(this); + + // If so, it must not be promoted; if we've promoted, then the arg will be + // a local struct assembled from the promoted fields. + if (lcl != nullptr) + { + JITDUMP("Arg [%06u] is unpromoted implicit byref V%02u, seeing if we can still tail call\n", + dspTreeID(arg->GetNode()), lcl->GetLclNum()); + + LclVarDsc* const varDsc = lvaGetDesc(lcl); + + if (varDsc->lvPromoted) + { + JITDUMP(" ... no, V%02u was promoted\n", lcl->GetLclNum()); + } + else + { + // We also have to worry about introducing aliases if we bypass copying + // the struct at the call. We'll do some limited analysis to see if we + // can rule this out. + const unsigned argLimit = 4; + + // If this is the only appearance of the byref in the method, then + // aliasing is not possible. + // + if (varDsc->lvRefCnt(RCS_EARLY) == 1) + { + JITDUMP("... yes, arg is the only appearance of V%02u\n", lcl->GetLclNum()); + hasByrefParameter = false; + } + // If no other call arg refers to this byref, and no other arg is + // a pointer which could refer to this byref, we can optimize. + // + // We only check this for calls with small numbers of arguments, + // as the analysis cost will be quadratic. + else if (argInfo->ArgCount() <= argLimit) + { + GenTree* interferingArg = nullptr; + for (unsigned index2 = 0; index2 < argInfo->ArgCount(); ++index2) + { + if (index2 == index) + { + continue; + } + + fgArgTabEntry* const arg2 = argInfo->GetArgEntry(index2, false); + JITDUMP("checking other arg [%06u]...\n", dspTreeID(arg2->GetNode())); + DISPTREE(arg2->GetNode()); + + // Do we pass 'lcl' more than once to the callee? + if (arg2->isStruct && arg2->passedByRef) + { + GenTreeLclVarCommon* const lcl2 = + arg2->GetNode()->IsImplicitByrefParameterValue(this); + + if ((lcl2 != nullptr) && (lcl->GetLclNum() == lcl2->GetLclNum())) + { + // not copying would introduce aliased implicit byref structs + // in the callee ... we can't optimize. + interferingArg = arg2->GetNode(); + break; + } + else + { + JITDUMP("... ok, different implicit byref V%02u\n", lcl2->GetLclNum()); + continue; + } + } + + // Do we pass a byref pointer which might point within 'lcl'? + // + // We can assume the 'lcl' is unaliased on entry to the + // method, so the only way we can have an aliasing byref pointer at + // the call is if 'lcl' is address taken/exposed in the method. + // + // Note even though 'lcl' is not promoted, we are in the middle + // of the promote->rewrite->undo->(morph)->demote cycle, and so + // might see references to promoted fields of 'lcl' that haven't yet + // been demoted (see fgMarkDemotedImplicitByRefArgs). + // + // So, we also need to scan all 'lcl's fields, if any, to see if they + // are exposed. + // + // TODO: we should not need to check for TYP_I_IMPL here. + if ((arg2->argType == TYP_BYREF) || (arg2->argType == TYP_I_IMPL)) + { + JITDUMP("... is byref arg...\n", dspTreeID(arg2->GetNode())); + bool hasExposure = false; + if (varDsc->lvHasLdAddrOp || varDsc->lvAddrExposed) + { + // Struct as a whole is exposed, can't optimize + JITDUMP(" ... V%02u exposed\n", lcl->GetLclNum()); + hasExposure = true; + } + else if (varDsc->lvFieldLclStart != 0) + { + // This the promoted/undone struct case. + // + // The field start is actually the local number of the promoted local, + // use it to enumerate the fields. + const unsigned promotedLcl = varDsc->lvFieldLclStart; + LclVarDsc* const promotedVarDsc = lvaGetDesc(promotedLcl); + JITDUMP("promoted-unpromoted case, checking fields of V%02u\n", + promotedLcl); + + for (unsigned fieldIndex = 0; fieldIndex < promotedVarDsc->lvFieldCnt; + fieldIndex++) + { + JITDUMP("checking field V%02u\n", + promotedVarDsc->lvFieldLclStart + fieldIndex); + LclVarDsc* fieldDsc = + lvaGetDesc(promotedVarDsc->lvFieldLclStart + fieldIndex); + + if (fieldDsc->lvHasLdAddrOp || fieldDsc->lvAddrExposed) + { + // Promoted and not yet demoted field is exposed, can't optimize + hasExposure = true; + break; + } + } + } + else + { + JITDUMP("...never promoted case, we're cool\n"); + } + + if (hasExposure) + { + interferingArg = arg2->GetNode(); + break; + } + } + else + { + JITDUMP("...not byref or implicit byref (%s), we're cool\n", + varTypeName(arg2->GetNode()->TypeGet())); + } + } + + if (interferingArg != nullptr) + { + JITDUMP("... no, arg [%06u] may alias with V%02u\n", dspTreeID(interferingArg), + lcl->GetLclNum()); + } + else + { + JITDUMP("... yes, no other arg in call can alias V%02u\n", lcl->GetLclNum()); + hasByrefParameter = false; + } + } + else + { + JITDUMP(" ... no, call has %u > %u args, alias analysis deemed too costly\n", + argInfo->ArgCount(), argLimit); + } + } + } } - else + + if (hasByrefParameter) { - // We can't tail call in this case; note the reason why - // and skip looking at the rest of the args. - hasByrefParameter = true; + // This arg blocks the tail call. No reason to keep scanning the remaining args. break; } } @@ -16858,7 +17009,7 @@ void Compiler::fgRetypeImplicitByRefArgs() // // Otherwise, see how many appearances there are. We keep two early ref counts: total // number of references to the struct or some field, and how many of these are - // arguments to calls. We undo promotion unless we see enougn non-call uses. + // arguments to calls. We undo promotion unless we see enough non-call uses. // const unsigned totalAppearances = varDsc->lvRefCnt(RCS_EARLY); const unsigned callAppearances = varDsc->lvRefCntWtd(RCS_EARLY); diff --git a/src/coreclr/tests/src/JIT/opt/Tailcall/ImplicitByrefTailCallsAliasing.cs b/src/coreclr/tests/src/JIT/opt/Tailcall/ImplicitByrefTailCallsAliasing.cs index 874b6a2fd3f8f..b5eb37d9a6bcf 100644 --- a/src/coreclr/tests/src/JIT/opt/Tailcall/ImplicitByrefTailCallsAliasing.cs +++ b/src/coreclr/tests/src/JIT/opt/Tailcall/ImplicitByrefTailCallsAliasing.cs @@ -18,6 +18,7 @@ public struct S public class ImplicitByrefTailCalls { + // Helper method to make callees unattractive for inlining. public static void Z() {} // Will return different answers if x and y refer to the same struct. @@ -35,10 +36,38 @@ public static long Alias(S x, S y) } // Will return different answers if y refers to some part of x. - public static unsafe long Alias2(S x, long* y) + public static long Alias2(S x, ref long y) { Z(); Z(); Z(); Z(); - *y += 1; + y++; + long result = 0; + for (int i = 0; i < 100; i++) + { + x.x++; + result += x.x + x.y; + } + return result; + } + + // Will return different answers if x and y refer to same struct + public static long Alias3(int a, int b, int c, int d, int e, int f, S x, S y) + { + Z(); Z(); Z(); Z(); + y.x++; + long result = 0; + for (int i = 0; i < 100; i++) + { + x.x++; + result += x.x + x.y; + } + return result; + } + + // Will return different answers if x and y refer to same struct + public static long Alias4(ref long y, int b, int c, int d, int e, int f, S x) + { + Z(); Z(); Z(); Z(); + y++; long result = 0; for (int i = 0; i < 100; i++) { @@ -54,15 +83,72 @@ public static long A(S x) { Z(); Z(); Z(); Z(); return Alias(x, x); - } + } // B must copy params locally when calling Alias2 // and so can't tail call - public static unsafe long B(S x) + public static long B(S x) { Z(); Z(); Z(); Z(); - return Alias2(x, &x.y); - } + return Alias2(x, ref x.y); + } + + // C must copy params locally when calling Alias2 + // and so can't tail call. Here the problematic + // tree is not part of the call. + public static long C(S x) + { + ref long z = ref x.y; + Z(); Z(); Z(); Z(); + return Alias2(x, ref z); + } + + // D should not be able to tail call, as doing so + // means we are not making local copies of x, and + // so introducing aliasing. + public static long D(int a, int b, int c, int d, int e, int f, S x, S y) + { + Z(); Z(); Z(); Z(); + Z(); Z(); Z(); Z(); + Z(); Z(); Z(); Z(); + return Alias3(1, 2, 3, 4, 5, 6, x, x); + } + + // E should be able to tail call + public static long E(int a, int b, int c, int d, int e, int f, S x, S y) + { + Z(); Z(); Z(); Z(); + Z(); Z(); Z(); Z(); + Z(); Z(); Z(); Z(); + return Alias3(1, 2, 3, 4, 5, 6, x, y); + } + + // F should be able to tail call + public static long F(int a, int b, int c, int d, int e, int f, S x, S y) + { + Z(); Z(); Z(); Z(); + Z(); Z(); Z(); Z(); + Z(); Z(); Z(); Z(); + return Alias3(1, 2, 3, 4, 5, 6, y, x); + } + + // G should be able to tail call, but we + // might not want to pay the cost to prove it safe. + public static long G(int a, int b, int c, int d, int e, int f, S x, S y) + { + Z(); Z(); Z(); Z(); + Z(); Z(); Z(); Z(); + Z(); Z(); Z(); Z(); + + if (a != 0) + { + return Alias4(ref x.x, 2, 3, 4, 5, 6, y); + } + else + { + return Alias4(ref y.x, 2, 3, 4, 5, 6, x); + } + } public static int Main() { @@ -71,9 +157,14 @@ public static int Main() s.y = 2; long ra = A(s); long rb = B(s); + long rc = C(s); + long rd = D(0, 0, 0, 0, 0, 0, s, s); + long re = E(0, 0, 0, 0, 0, 0, s, s); + long rf = F(0, 0, 0, 0, 0, 0, s, s); + long rg = G(0, 0, 0, 0, 0, 0, s, s); + + Console.WriteLine($"{ra},{rb},{rc},{rd},{re},{rf},{rg}"); - Console.WriteLine($"{ra},{rb}"); - - return (ra == 5350) && (rb == 5350) ? 100 : -1; + return (ra == 5350) && (rb == 5350) && (rc == 5350) && (rd == 5350) && (re == 5350) && (rf == 5350) && (rg == 5350) ? 100 : -1; } } diff --git a/src/coreclr/tests/src/JIT/opt/Tailcall/ImplicitByrefTailCallsAliasing.csproj b/src/coreclr/tests/src/JIT/opt/Tailcall/ImplicitByrefTailCallsAliasing.csproj index f3e1cbd44b404..bf6f589eb325b 100644 --- a/src/coreclr/tests/src/JIT/opt/Tailcall/ImplicitByrefTailCallsAliasing.csproj +++ b/src/coreclr/tests/src/JIT/opt/Tailcall/ImplicitByrefTailCallsAliasing.csproj @@ -5,6 +5,7 @@ None True + True From a0b9591b513b6541011aead3b019da7fa42551a3 Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Sat, 29 Feb 2020 16:39:49 -0800 Subject: [PATCH 4/9] refine alias analysis; add more test cases --- src/coreclr/src/jit/morph.cpp | 102 ++++++++++++------ .../Tailcall/ImplicitByrefTailCalls.csproj | 1 + .../ImplicitByrefTailCallsAliasing.cs | 81 +++++++++++++- 3 files changed, 148 insertions(+), 36 deletions(-) diff --git a/src/coreclr/src/jit/morph.cpp b/src/coreclr/src/jit/morph.cpp index 2068a8cc3ad25..2eb3d86f58e2e 100644 --- a/src/coreclr/src/jit/morph.cpp +++ b/src/coreclr/src/jit/morph.cpp @@ -4818,13 +4818,15 @@ void Compiler::fgMakeOutgoingStructArgCopy(GenTreeCall* call, // if the tail call is a slow tail call. // // * (may not copy) if the call is noreturn, the use is a last use. + // We also check for just one reference here as we are not doing + // alias analysis of the call's parameters. // // * (may not copy) if there is exactly one use of the local in the method, // and the call is not in loop, this is a last use. // const bool isTailCallLastUse = call->IsTailCall(); const bool isCallLastUse = (totalAppearances == 1) && !fgMightHaveLoop(); - const bool isNoReturnLastUse = call->IsNoReturn(); + const bool isNoReturnLastUse = (totalAppearances == 1) && call->IsNoReturn(); if (!call->IsTailCallViaHelper() && (isTailCallLastUse || isCallLastUse || isNoReturnLastUse)) { varDsc->setLvRefCnt(0, RCS_EARLY); @@ -6722,7 +6724,7 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee, const char** failReason) // We also have to worry about introducing aliases if we bypass copying // the struct at the call. We'll do some limited analysis to see if we // can rule this out. - const unsigned argLimit = 4; + const unsigned argLimit = 6; // If this is the only appearance of the byref in the method, then // aliasing is not possible. @@ -6785,47 +6787,81 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee, const char** failReason) // So, we also need to scan all 'lcl's fields, if any, to see if they // are exposed. // - // TODO: we should not need to check for TYP_I_IMPL here. + // TODO: we should not need to check for TYP_I_IMPL here, any + // aliasing value should be TYP_BYREF. + // + // Note we don't need to check for native pointers (abstractly) + // because aliasing native pointers require pinning, and we + // reject all tail calls in methods with pinned locals. + // if ((arg2->argType == TYP_BYREF) || (arg2->argType == TYP_I_IMPL)) { JITDUMP("... is byref arg...\n", dspTreeID(arg2->GetNode())); - bool hasExposure = false; - if (varDsc->lvHasLdAddrOp || varDsc->lvAddrExposed) + bool checkExposure = true; + bool hasExposure = false; + + // See if there is any way arg could refer to a parameter struct. + GenTree* arg2Node = arg2->GetNode(); + if (arg2Node->OperIs(GT_LCL_VAR)) { - // Struct as a whole is exposed, can't optimize - JITDUMP(" ... V%02u exposed\n", lcl->GetLclNum()); - hasExposure = true; + GenTreeLclVarCommon* arg2LclNode = arg2Node->AsLclVarCommon(); + assert(arg2LclNode->GetLclNum() != lcl->GetLclNum()); + LclVarDsc* arg2Dsc = lvaGetDesc(arg2LclNode); + + // Other params can't alias implicit byref params + if (arg2Dsc->lvIsParam) + { + checkExposure = false; + } } - else if (varDsc->lvFieldLclStart != 0) + // Because we're checking TYP_I_IMPL above, at least + // screen out obvious things that can't cause aliases. + else if (arg2Node->IsIntegralConst()) { - // This the promoted/undone struct case. - // - // The field start is actually the local number of the promoted local, - // use it to enumerate the fields. - const unsigned promotedLcl = varDsc->lvFieldLclStart; - LclVarDsc* const promotedVarDsc = lvaGetDesc(promotedLcl); - JITDUMP("promoted-unpromoted case, checking fields of V%02u\n", - promotedLcl); - - for (unsigned fieldIndex = 0; fieldIndex < promotedVarDsc->lvFieldCnt; - fieldIndex++) - { - JITDUMP("checking field V%02u\n", - promotedVarDsc->lvFieldLclStart + fieldIndex); - LclVarDsc* fieldDsc = - lvaGetDesc(promotedVarDsc->lvFieldLclStart + fieldIndex); + checkExposure = false; + } - if (fieldDsc->lvHasLdAddrOp || fieldDsc->lvAddrExposed) + if (checkExposure) + { + // arg2 might alias arg, see if we've exposed + // arg somewhere in the method. + if (varDsc->lvHasLdAddrOp || varDsc->lvAddrExposed) + { + // Struct as a whole is exposed, can't optimize + JITDUMP(" ... V%02u exposed\n", lcl->GetLclNum()); + hasExposure = true; + } + else if (varDsc->lvFieldLclStart != 0) + { + // This the promoted/undone struct case. + // + // The field start is actually the local number of the promoted local, + // use it to enumerate the fields. + const unsigned promotedLcl = varDsc->lvFieldLclStart; + LclVarDsc* const promotedVarDsc = lvaGetDesc(promotedLcl); + JITDUMP("promoted-unpromoted case, checking fields of V%02u\n", + promotedLcl); + + for (unsigned fieldIndex = 0; fieldIndex < promotedVarDsc->lvFieldCnt; + fieldIndex++) { - // Promoted and not yet demoted field is exposed, can't optimize - hasExposure = true; - break; + JITDUMP("checking field V%02u\n", + promotedVarDsc->lvFieldLclStart + fieldIndex); + LclVarDsc* fieldDsc = + lvaGetDesc(promotedVarDsc->lvFieldLclStart + fieldIndex); + + if (fieldDsc->lvHasLdAddrOp || fieldDsc->lvAddrExposed) + { + // Promoted and not yet demoted field is exposed, can't optimize + hasExposure = true; + break; + } } } - } - else - { - JITDUMP("...never promoted case, we're cool\n"); + else + { + JITDUMP("...never promoted case, we're cool\n"); + } } if (hasExposure) diff --git a/src/coreclr/tests/src/JIT/opt/Tailcall/ImplicitByrefTailCalls.csproj b/src/coreclr/tests/src/JIT/opt/Tailcall/ImplicitByrefTailCalls.csproj index f3e1cbd44b404..bf6f589eb325b 100644 --- a/src/coreclr/tests/src/JIT/opt/Tailcall/ImplicitByrefTailCalls.csproj +++ b/src/coreclr/tests/src/JIT/opt/Tailcall/ImplicitByrefTailCalls.csproj @@ -5,6 +5,7 @@ None True + True diff --git a/src/coreclr/tests/src/JIT/opt/Tailcall/ImplicitByrefTailCallsAliasing.cs b/src/coreclr/tests/src/JIT/opt/Tailcall/ImplicitByrefTailCallsAliasing.cs index b5eb37d9a6bcf..021470ab12ce5 100644 --- a/src/coreclr/tests/src/JIT/opt/Tailcall/ImplicitByrefTailCallsAliasing.cs +++ b/src/coreclr/tests/src/JIT/opt/Tailcall/ImplicitByrefTailCallsAliasing.cs @@ -11,6 +11,12 @@ public struct S public long y; } +public struct R +{ + public S s; + public S t; +} + // Tail calls with implicit byref parameters as arguments. // // We need to ensure that we don't introduce aliased @@ -19,7 +25,7 @@ public struct S public class ImplicitByrefTailCalls { // Helper method to make callees unattractive for inlining. - public static void Z() {} + public static void Z() { } // Will return different answers if x and y refer to the same struct. public static long Alias(S x, S y) @@ -77,6 +83,34 @@ public static long Alias4(ref long y, int b, int c, int d, int e, int f, S x) return result; } + // Will return different answers if x and r refer to the same struct. + public static long Alias5(S x, R r) + { + Z(); Z(); Z(); Z(); + r.s.x++; + long result = 0; + for (int i = 0; i < 100; i++) + { + x.x++; + result += x.x + x.y; + } + return result; + } + + // Will return different answers if ss and x refer to the same struct + public static long Alias6(Span ss, S x) + { + Z(); Z(); Z(); Z(); + ss[0].x++; + long result = 0; + for (int i = 0; i < 100; i++) + { + x.x++; + result += x.x + x.y; + } + return result; + } + // A must copy params locally when calling Alias // and so can't tail call public static long A(S x) @@ -150,11 +184,45 @@ public static long G(int a, int b, int c, int d, int e, int f, S x, S y) } } + // H must copy params locally when calling Alias + // and so can't tail call + public static long H(R r) + { + Z(); Z(); Z(); Z(); + return Alias(r.s, r.s); + } + + // I must copy params locally when calling Alias + // and so can't tail call + public static long I(R r) + { + Z(); Z(); Z(); Z(); + return Alias5(r.s, r); + } + + // J can tail call, but we might not recognize this + public static long J(R r) + { + Z(); Z(); Z(); Z(); + return Alias(r.s, r.t); + } + + // K cannot tail call + public static unsafe long K(S s) + { + Z(); Z(); Z(); Z(); + Span ss = new Span((void*)&s, 1); + return Alias6(ss, s); + } + public static int Main() { S s = new S(); s.x = 1; s.y = 2; + R r = new R(); + r.s = s; + r.t = s; long ra = A(s); long rb = B(s); long rc = C(s); @@ -162,9 +230,16 @@ public static int Main() long re = E(0, 0, 0, 0, 0, 0, s, s); long rf = F(0, 0, 0, 0, 0, 0, s, s); long rg = G(0, 0, 0, 0, 0, 0, s, s); + long rh = H(r); + long ri = I(r); + long rj = J(r); + long rk = K(s); - Console.WriteLine($"{ra},{rb},{rc},{rd},{re},{rf},{rg}"); + Console.WriteLine($"{ra},{rb},{rc},{rd},{re},{rf},{rg},{rh},{ri},{rj},{rk}"); - return (ra == 5350) && (rb == 5350) && (rc == 5350) && (rd == 5350) && (re == 5350) && (rf == 5350) && (rg == 5350) ? 100 : -1; + return + (ra == 5350) && (rb == 5350) && (rc == 5350) && (rd == 5350) && + (re == 5350) && (rf == 5350) && (rg == 5350) && (rh == 5350) && + (ri == 5350) && (rj == 5350) && (rk == 5350) ? 100 : -1; } } From c3cc4b018b045442f53f756413dbd9b9f67bbc90 Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Mon, 2 Mar 2020 12:01:13 -0800 Subject: [PATCH 5/9] clarify comment about aliasing from other args --- src/coreclr/src/jit/morph.cpp | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/coreclr/src/jit/morph.cpp b/src/coreclr/src/jit/morph.cpp index 2eb3d86f58e2e..c6b1fc0879b54 100644 --- a/src/coreclr/src/jit/morph.cpp +++ b/src/coreclr/src/jit/morph.cpp @@ -6787,12 +6787,14 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee, const char** failReason) // So, we also need to scan all 'lcl's fields, if any, to see if they // are exposed. // - // TODO: we should not need to check for TYP_I_IMPL here, any - // aliasing value should be TYP_BYREF. - // - // Note we don't need to check for native pointers (abstractly) - // because aliasing native pointers require pinning, and we - // reject all tail calls in methods with pinned locals. + // When looking for aliases from other args, we check for both TYP_BYREF + // and TYP_I_IMPL typed args here. Conceptually anything that points into + // an implicit byref parameter should be TYP_BYREF, as these parameters could + // refer to boxed heap locations (say if the method is invoked by reflection) + // but there are some stack only structs (like typed references) where + // the importer/runtime code uses TYP_I_IMPL, and fgInitArgInfo will + // transiently retype all simple address-of implicit parameter args as + // TYP_I_IMPL. // if ((arg2->argType == TYP_BYREF) || (arg2->argType == TYP_I_IMPL)) { From cf98b3e9bb7b9f96b8737367449efedc0874f28f Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Tue, 10 Mar 2020 18:52:22 -0700 Subject: [PATCH 6/9] Apply suggestions from code review Co-Authored-By: Eugene Rozenfeld --- src/coreclr/src/jit/lclmorph.cpp | 4 ++-- src/coreclr/src/jit/morph.cpp | 4 ++-- .../src/JIT/opt/Tailcall/ImplicitByrefTailCallsAliasing.cs | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/coreclr/src/jit/lclmorph.cpp b/src/coreclr/src/jit/lclmorph.cpp index d24efccc7e234..176f1994d85ad 100644 --- a/src/coreclr/src/jit/lclmorph.cpp +++ b/src/coreclr/src/jit/lclmorph.cpp @@ -1104,12 +1104,12 @@ class LocalAddressVisitor final : public GenTreeVisitor varDsc->incLvRefCnt(1, RCS_EARLY); // See if this struct is an argument to a call. This information is recorded - // via the weighted early ref count for the local, and feeds the undo promition + // via the weighted early ref count for the local, and feeds the undo promotion // heuristic. // // It can be approximate, so the pattern match below need not be exhaustive. // But the pattern should at least subset the implicit byref cases that are - // handed in fgCanFastTailCall and fgMakeOutgoingStructArgCopy. + // handled in fgCanFastTailCall and fgMakeOutgoingStructArgCopy. // // CALL(OBJ(ADDR(LCL_VAR...))) bool isArgToCall = false; diff --git a/src/coreclr/src/jit/morph.cpp b/src/coreclr/src/jit/morph.cpp index c6b1fc0879b54..74d08e4faf7a3 100644 --- a/src/coreclr/src/jit/morph.cpp +++ b/src/coreclr/src/jit/morph.cpp @@ -6703,7 +6703,7 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee, const char** failReason) // and so still be able to tail call. if (opts.OptimizationEnabled()) { - // First, see if this arg is a byref param. + // First, see if this arg is an implicit byref param. GenTreeLclVarCommon* const lcl = arg->GetNode()->IsImplicitByrefParameterValue(this); // If so, it must not be promoted; if we've promoted, then the arg will be @@ -6835,7 +6835,7 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee, const char** failReason) } else if (varDsc->lvFieldLclStart != 0) { - // This the promoted/undone struct case. + // This is the promoted/undone struct case. // // The field start is actually the local number of the promoted local, // use it to enumerate the fields. diff --git a/src/coreclr/tests/src/JIT/opt/Tailcall/ImplicitByrefTailCallsAliasing.cs b/src/coreclr/tests/src/JIT/opt/Tailcall/ImplicitByrefTailCallsAliasing.cs index 021470ab12ce5..18c577f2e5e17 100644 --- a/src/coreclr/tests/src/JIT/opt/Tailcall/ImplicitByrefTailCallsAliasing.cs +++ b/src/coreclr/tests/src/JIT/opt/Tailcall/ImplicitByrefTailCallsAliasing.cs @@ -83,7 +83,7 @@ public static long Alias4(ref long y, int b, int c, int d, int e, int f, S x) return result; } - // Will return different answers if x and r refer to the same struct. + // Will return different answers if x and r.s refer to the same struct. public static long Alias5(S x, R r) { Z(); Z(); Z(); Z(); From 50da72039010b030289890caa39626420f1fd058 Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Wed, 11 Mar 2020 01:21:11 -0700 Subject: [PATCH 7/9] update test cases --- .../opt/Tailcall/ImplicitByrefTailCalls.cs | 75 +++++++++++++-- .../ImplicitByrefTailCallsAliasing.cs | 91 ++++++++++++------- 2 files changed, 124 insertions(+), 42 deletions(-) diff --git a/src/coreclr/tests/src/JIT/opt/Tailcall/ImplicitByrefTailCalls.cs b/src/coreclr/tests/src/JIT/opt/Tailcall/ImplicitByrefTailCalls.cs index 6283f24098dc5..f960aed87554c 100644 --- a/src/coreclr/tests/src/JIT/opt/Tailcall/ImplicitByrefTailCalls.cs +++ b/src/coreclr/tests/src/JIT/opt/Tailcall/ImplicitByrefTailCalls.cs @@ -12,7 +12,7 @@ public class ImplicitByrefTailCalls { - public static void Z() {} + public static void Z() { } public static bool Z(bool b) => b; public static int ZZ(Span x) @@ -27,8 +27,8 @@ public static int ZZ(Span x) public static int ZZZ(Span x, int a, int b, int c, int d, int e, int f) { - Z(); Z(); - Z(); Z(); + Z(); Z(); Z(); Z(); + Z(); Z(); Z(); Z(); int result = 0; for (int i = 0; i < x.Length; i++) { @@ -42,7 +42,7 @@ public static int ZZZ(Span x, int a, int b, int c, int d, int e, int f) public static int A(Span x) { Z(); Z(); return ZZ(x); - } + } public static int B(Span x) { @@ -134,7 +134,7 @@ public static int S(Span x) { Z(); Z(); return ZZZ(x, 1, 2, 3, 4, 5, 6); - } + } // Must copy at the normal call site // but can avoid at tail call site @@ -145,6 +145,62 @@ public static int T(Span x, ref int q) return ZZ(x); } + // Here ZZZ is called in a loop, so despite + // the call argument x being the only reference to + // x in the method, we must copy x. + // We can't consider it as last use. + public static int L(Span x) + { + Z(); Z(); + int result = 0; + int limit = 10; + for (int i = 0; i < limit; i++) + { + result += ZZZ(x, 1, 2, 3, 4, 5, 6); + } + + return result / limit; + } + + static void MustThrow(Span x) + { + x[0]++; + throw new Exception(); + } + + // Because MustThrow must throw + // and the argument is the only + // mention of x, we do not need to copy x. + public static int M(Span x) + { + Z(); Z(); Z(); Z(); + if (p) + { + MustThrow(x); + } + return 10000; + } + + // Although MustThrow must throw, + // the argument x is the not only + // mention of x, so we must copy x. + public static int N(Span x) + { + Z(); Z(); Z(); Z(); + try + { + if (p) + { + MustThrow(x); + } + } + catch (Exception) + { + } + + return 10000 + x[0]; + } + static bool p; public static int Main() @@ -167,9 +223,12 @@ public static int Main() int rh = H(s, 46); int rs = S(s); int rt = T(s, ref q); + int rl = L(s); + int rm = M(s); + int rn = N(s); + + Console.WriteLine($"{ra},{rb},{rc},{rd},{re},{rf},{rg},{rh},{rs},{rt},{rl},{rm},{rn}"); - Console.WriteLine($"{ra},{rb},{rc},{rd},{re},{rf},{rg},{rh},{rs},{rt}"); - - return ra + rb + rc + rd + re + rf + rg + rh + rs + rt - 80055; + return ra + rb + rc + rd + re + rf + rg + rh + rs + rt + rl + rm + rn - 110055; } } diff --git a/src/coreclr/tests/src/JIT/opt/Tailcall/ImplicitByrefTailCallsAliasing.cs b/src/coreclr/tests/src/JIT/opt/Tailcall/ImplicitByrefTailCallsAliasing.cs index 18c577f2e5e17..9999ae6316403 100644 --- a/src/coreclr/tests/src/JIT/opt/Tailcall/ImplicitByrefTailCallsAliasing.cs +++ b/src/coreclr/tests/src/JIT/opt/Tailcall/ImplicitByrefTailCallsAliasing.cs @@ -7,14 +7,14 @@ public struct S { - public long x; - public long y; + public long s_x; + public long s_y; } public struct R { - public S s; - public S t; + public S r_s; + public S r_t; } // Tail calls with implicit byref parameters as arguments. @@ -31,12 +31,12 @@ public static void Z() { } public static long Alias(S x, S y) { Z(); Z(); Z(); Z(); - y.x++; + y.s_x++; long result = 0; for (int i = 0; i < 100; i++) { - x.x++; - result += x.x + x.y; + x.s_x++; + result += x.s_x + x.s_y; } return result; } @@ -49,8 +49,8 @@ public static long Alias2(S x, ref long y) long result = 0; for (int i = 0; i < 100; i++) { - x.x++; - result += x.x + x.y; + x.s_x++; + result += x.s_x + x.s_y; } return result; } @@ -59,17 +59,17 @@ public static long Alias2(S x, ref long y) public static long Alias3(int a, int b, int c, int d, int e, int f, S x, S y) { Z(); Z(); Z(); Z(); - y.x++; + y.s_x++; long result = 0; for (int i = 0; i < 100; i++) { - x.x++; - result += x.x + x.y; + x.s_x++; + result += x.s_x + x.s_y; } return result; } - // Will return different answers if x and y refer to same struct + // Will return different answers if y refers to some part of x public static long Alias4(ref long y, int b, int c, int d, int e, int f, S x) { Z(); Z(); Z(); Z(); @@ -77,8 +77,8 @@ public static long Alias4(ref long y, int b, int c, int d, int e, int f, S x) long result = 0; for (int i = 0; i < 100; i++) { - x.x++; - result += x.x + x.y; + x.s_x++; + result += x.s_x + x.s_y; } return result; } @@ -87,12 +87,12 @@ public static long Alias4(ref long y, int b, int c, int d, int e, int f, S x) public static long Alias5(S x, R r) { Z(); Z(); Z(); Z(); - r.s.x++; + r.r_s.s_x++; long result = 0; for (int i = 0; i < 100; i++) { - x.x++; - result += x.x + x.y; + x.s_x++; + result += x.s_x + x.s_y; } return result; } @@ -101,12 +101,26 @@ public static long Alias5(S x, R r) public static long Alias6(Span ss, S x) { Z(); Z(); Z(); Z(); - ss[0].x++; + ss[0].s_x++; long result = 0; for (int i = 0; i < 100; i++) { - x.x++; - result += x.x + x.y; + x.s_x++; + result += x.s_x + x.s_y; + } + return result; + } + + // Will return different answers if x and y refer to the same struct. + public static long Alias7(S x, ref S y) + { + Z(); Z(); Z(); Z(); + y.s_x++; + long result = 0; + for (int i = 0; i < 100; i++) + { + x.s_x++; + result += x.s_x + x.s_y; } return result; } @@ -124,7 +138,7 @@ public static long A(S x) public static long B(S x) { Z(); Z(); Z(); Z(); - return Alias2(x, ref x.y); + return Alias2(x, ref x.s_y); } // C must copy params locally when calling Alias2 @@ -132,7 +146,7 @@ public static long B(S x) // tree is not part of the call. public static long C(S x) { - ref long z = ref x.y; + ref long z = ref x.s_y; Z(); Z(); Z(); Z(); return Alias2(x, ref z); } @@ -176,11 +190,11 @@ public static long G(int a, int b, int c, int d, int e, int f, S x, S y) if (a != 0) { - return Alias4(ref x.x, 2, 3, 4, 5, 6, y); + return Alias4(ref x.s_x, 2, 3, 4, 5, 6, y); } else { - return Alias4(ref y.x, 2, 3, 4, 5, 6, x); + return Alias4(ref y.s_x, 2, 3, 4, 5, 6, x); } } @@ -189,7 +203,7 @@ public static long G(int a, int b, int c, int d, int e, int f, S x, S y) public static long H(R r) { Z(); Z(); Z(); Z(); - return Alias(r.s, r.s); + return Alias(r.r_s, r.r_s); } // I must copy params locally when calling Alias @@ -197,14 +211,14 @@ public static long H(R r) public static long I(R r) { Z(); Z(); Z(); Z(); - return Alias5(r.s, r); + return Alias5(r.r_s, r); } // J can tail call, but we might not recognize this public static long J(R r) { Z(); Z(); Z(); Z(); - return Alias(r.s, r.t); + return Alias(r.r_s, r.r_t); } // K cannot tail call @@ -215,14 +229,22 @@ public static unsafe long K(S s) return Alias6(ss, s); } + // L cannot tail call as the first arg to + // Alias7 must be copied locally + public static long L(S s) + { + Z(); Z(); Z(); Z(); + return Alias7(s, ref s); + } + public static int Main() { S s = new S(); - s.x = 1; - s.y = 2; + s.s_x = 1; + s.s_y = 2; R r = new R(); - r.s = s; - r.t = s; + r.r_s = s; + r.r_t = s; long ra = A(s); long rb = B(s); long rc = C(s); @@ -234,12 +256,13 @@ public static int Main() long ri = I(r); long rj = J(r); long rk = K(s); + long rl = L(s); - Console.WriteLine($"{ra},{rb},{rc},{rd},{re},{rf},{rg},{rh},{ri},{rj},{rk}"); + Console.WriteLine($"{ra},{rb},{rc},{rd},{re},{rf},{rg},{rh},{ri},{rj},{rk},{rl}"); return (ra == 5350) && (rb == 5350) && (rc == 5350) && (rd == 5350) && (re == 5350) && (rf == 5350) && (rg == 5350) && (rh == 5350) && - (ri == 5350) && (rj == 5350) && (rk == 5350) ? 100 : -1; + (ri == 5350) && (rj == 5350) && (rk == 5350) && (rl == 5350) ? 100 : -1; } } From 7084ebf0171616ce3587b435399aa96767bfee7b Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Wed, 11 Mar 2020 09:11:15 -0700 Subject: [PATCH 8/9] Tweak the tests a bit. Use no-opt to ensure tail called methods don't promote their struct args and so undo the aliasing checks. Note that one of the tests that could tail call doesn't because of the way morph reuses struct temps. --- .../src/JIT/opt/Tailcall/ImplicitByrefTailCalls.cs | 9 ++++++--- .../opt/Tailcall/ImplicitByrefTailCallsAliasing.cs | 14 +++++++------- 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/src/coreclr/tests/src/JIT/opt/Tailcall/ImplicitByrefTailCalls.cs b/src/coreclr/tests/src/JIT/opt/Tailcall/ImplicitByrefTailCalls.cs index f960aed87554c..161b38ed480de 100644 --- a/src/coreclr/tests/src/JIT/opt/Tailcall/ImplicitByrefTailCalls.cs +++ b/src/coreclr/tests/src/JIT/opt/Tailcall/ImplicitByrefTailCalls.cs @@ -15,6 +15,7 @@ public class ImplicitByrefTailCalls public static void Z() { } public static bool Z(bool b) => b; + [MethodImpl(MethodImplOptions.NoOptimization)] public static int ZZ(Span x) { int result = 0; @@ -25,10 +26,9 @@ public static int ZZ(Span x) return result; } + [MethodImpl(MethodImplOptions.NoOptimization)] public static int ZZZ(Span x, int a, int b, int c, int d, int e, int f) { - Z(); Z(); Z(); Z(); - Z(); Z(); Z(); Z(); int result = 0; for (int i = 0; i < x.Length; i++) { @@ -137,7 +137,10 @@ public static int S(Span x) } // Must copy at the normal call site - // but can avoid at tail call site + // but can avoid at tail call site. However + // we're currently blocked because we reuse + // the temp used to pass the argument and + // it is exposed in the first call. public static int T(Span x, ref int q) { Z(); Z(); diff --git a/src/coreclr/tests/src/JIT/opt/Tailcall/ImplicitByrefTailCallsAliasing.cs b/src/coreclr/tests/src/JIT/opt/Tailcall/ImplicitByrefTailCallsAliasing.cs index 9999ae6316403..bc9b53ab7569f 100644 --- a/src/coreclr/tests/src/JIT/opt/Tailcall/ImplicitByrefTailCallsAliasing.cs +++ b/src/coreclr/tests/src/JIT/opt/Tailcall/ImplicitByrefTailCallsAliasing.cs @@ -28,9 +28,9 @@ public class ImplicitByrefTailCalls public static void Z() { } // Will return different answers if x and y refer to the same struct. + [MethodImpl(MethodImplOptions.NoOptimization)] public static long Alias(S x, S y) { - Z(); Z(); Z(); Z(); y.s_x++; long result = 0; for (int i = 0; i < 100; i++) @@ -42,9 +42,9 @@ public static long Alias(S x, S y) } // Will return different answers if y refers to some part of x. + [MethodImpl(MethodImplOptions.NoOptimization)] public static long Alias2(S x, ref long y) { - Z(); Z(); Z(); Z(); y++; long result = 0; for (int i = 0; i < 100; i++) @@ -56,9 +56,9 @@ public static long Alias2(S x, ref long y) } // Will return different answers if x and y refer to same struct + [MethodImpl(MethodImplOptions.NoOptimization)] public static long Alias3(int a, int b, int c, int d, int e, int f, S x, S y) { - Z(); Z(); Z(); Z(); y.s_x++; long result = 0; for (int i = 0; i < 100; i++) @@ -70,9 +70,9 @@ public static long Alias3(int a, int b, int c, int d, int e, int f, S x, S y) } // Will return different answers if y refers to some part of x + [MethodImpl(MethodImplOptions.NoOptimization)] public static long Alias4(ref long y, int b, int c, int d, int e, int f, S x) { - Z(); Z(); Z(); Z(); y++; long result = 0; for (int i = 0; i < 100; i++) @@ -84,9 +84,9 @@ public static long Alias4(ref long y, int b, int c, int d, int e, int f, S x) } // Will return different answers if x and r.s refer to the same struct. + [MethodImpl(MethodImplOptions.NoOptimization)] public static long Alias5(S x, R r) { - Z(); Z(); Z(); Z(); r.r_s.s_x++; long result = 0; for (int i = 0; i < 100; i++) @@ -98,9 +98,9 @@ public static long Alias5(S x, R r) } // Will return different answers if ss and x refer to the same struct + [MethodImpl(MethodImplOptions.NoOptimization)] public static long Alias6(Span ss, S x) { - Z(); Z(); Z(); Z(); ss[0].s_x++; long result = 0; for (int i = 0; i < 100; i++) @@ -112,9 +112,9 @@ public static long Alias6(Span ss, S x) } // Will return different answers if x and y refer to the same struct. + [MethodImpl(MethodImplOptions.NoOptimization)] public static long Alias7(S x, ref S y) { - Z(); Z(); Z(); Z(); y.s_x++; long result = 0; for (int i = 0; i < 100; i++) From a33348c70deb383109d0c87979cc02add6b98dac Mon Sep 17 00:00:00 2001 From: Andy Ayers Date: Wed, 11 Mar 2020 09:12:30 -0700 Subject: [PATCH 9/9] code changes per review feedback --- src/coreclr/src/jit/gentree.cpp | 14 +++--- src/coreclr/src/jit/gentree.h | 2 +- src/coreclr/src/jit/morph.cpp | 86 ++++++++++++++++++--------------- 3 files changed, 54 insertions(+), 48 deletions(-) diff --git a/src/coreclr/src/jit/gentree.cpp b/src/coreclr/src/jit/gentree.cpp index 94565b11873c5..6f82b452a4e8d 100644 --- a/src/coreclr/src/jit/gentree.cpp +++ b/src/coreclr/src/jit/gentree.cpp @@ -16201,17 +16201,17 @@ bool GenTree::IsLocalAddrExpr(Compiler* comp, GenTreeLclVarCommon** pLclVarTree, // compiler -- compiler instance // // Return Value: -// GenTreeLclVarCommon node for the local, or nullptr. - -GenTreeLclVarCommon* GenTree::IsImplicitByrefParameterValue(Compiler* compiler) +// GenTreeLclVar node for the local, or nullptr. +// +GenTreeLclVar* GenTree::IsImplicitByrefParameterValue(Compiler* compiler) { #if defined(TARGET_AMD64) || defined(TARGET_ARM64) - GenTreeLclVarCommon* lcl = nullptr; + GenTreeLclVar* lcl = nullptr; if (OperIs(GT_LCL_VAR)) { - lcl = AsLclVarCommon(); + lcl = AsLclVar(); } else if (OperIs(GT_OBJ)) { @@ -16219,7 +16219,7 @@ GenTreeLclVarCommon* GenTree::IsImplicitByrefParameterValue(Compiler* compiler) if (addr->OperIs(GT_LCL_VAR)) { - lcl = addr->AsLclVarCommon(); + lcl = addr->AsLclVar(); } else if (addr->OperIs(GT_ADDR)) { @@ -16227,7 +16227,7 @@ GenTreeLclVarCommon* GenTree::IsImplicitByrefParameterValue(Compiler* compiler) if (base->OperIs(GT_LCL_VAR)) { - lcl = base->AsLclVarCommon(); + lcl = base->AsLclVar(); } } } diff --git a/src/coreclr/src/jit/gentree.h b/src/coreclr/src/jit/gentree.h index 3dcf4148420a3..20a950a94ad2e 100644 --- a/src/coreclr/src/jit/gentree.h +++ b/src/coreclr/src/jit/gentree.h @@ -1843,7 +1843,7 @@ struct GenTree // Determine if this tree represents the value of an entire implict byref parameter, // and if so return the tree for the parameter. - GenTreeLclVarCommon* IsImplicitByrefParameterValue(Compiler* compiler); + GenTreeLclVar* IsImplicitByrefParameterValue(Compiler* compiler); // Determine if this is a LclVarCommon node and return some additional info about it in the // two out parameters. diff --git a/src/coreclr/src/jit/morph.cpp b/src/coreclr/src/jit/morph.cpp index 74d08e4faf7a3..9f75f8cc78184 100644 --- a/src/coreclr/src/jit/morph.cpp +++ b/src/coreclr/src/jit/morph.cpp @@ -4798,12 +4798,12 @@ void Compiler::fgMakeOutgoingStructArgCopy(GenTreeCall* call, // if (opts.OptimizationEnabled()) { - GenTreeLclVarCommon* const lcl = argx->IsImplicitByrefParameterValue(this); + GenTreeLclVar* const lcl = argx->IsImplicitByrefParameterValue(this); if (lcl != nullptr) { - unsigned varNum = lcl->AsLclVarCommon()->GetLclNum(); - LclVarDsc* varDsc = lvaGetDesc(varNum); + const unsigned varNum = lcl->GetLclNum(); + LclVarDsc* const varDsc = lvaGetDesc(varNum); const unsigned short totalAppearances = varDsc->lvRefCnt(RCS_EARLY); // We don't have liveness so we rely on other indications of last use. @@ -4819,7 +4819,8 @@ void Compiler::fgMakeOutgoingStructArgCopy(GenTreeCall* call, // // * (may not copy) if the call is noreturn, the use is a last use. // We also check for just one reference here as we are not doing - // alias analysis of the call's parameters. + // alias analysis of the call's parameters, or checking if the call + // site is not within some try region. // // * (may not copy) if there is exactly one use of the local in the method, // and the call is not in loop, this is a last use. @@ -6681,9 +6682,9 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee, const char** failReason) fgInitArgInfo(callee); fgArgInfo* argInfo = callee->fgArgInfo; - bool hasByrefParameter = false; - size_t calleeArgStackSize = 0; - size_t callerArgStackSize = info.compArgStackSize; + bool hasMustCopyByrefParameter = false; + size_t calleeArgStackSize = 0; + size_t callerArgStackSize = info.compArgStackSize; for (unsigned index = 0; index < argInfo->ArgCount(); ++index) { @@ -6697,31 +6698,34 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee, const char** failReason) { // Generally a byref arg will block tail calling, as we have to // make a local copy of the struct for the callee. - hasByrefParameter = true; + hasMustCopyByrefParameter = true; // If we're optimizing, we may be able to pass our caller's byref to our callee, // and so still be able to tail call. if (opts.OptimizationEnabled()) { // First, see if this arg is an implicit byref param. - GenTreeLclVarCommon* const lcl = arg->GetNode()->IsImplicitByrefParameterValue(this); + GenTreeLclVar* const lcl = arg->GetNode()->IsImplicitByrefParameterValue(this); - // If so, it must not be promoted; if we've promoted, then the arg will be - // a local struct assembled from the promoted fields. if (lcl != nullptr) { - JITDUMP("Arg [%06u] is unpromoted implicit byref V%02u, seeing if we can still tail call\n", - dspTreeID(arg->GetNode()), lcl->GetLclNum()); - + // Yes, the arg is an implicit byref param. + const unsigned lclNum = lcl->GetLclNum(); LclVarDsc* const varDsc = lvaGetDesc(lcl); + // The param must not be promoted; if we've promoted, then the arg will be + // a local struct assembled from the promoted fields. if (varDsc->lvPromoted) { - JITDUMP(" ... no, V%02u was promoted\n", lcl->GetLclNum()); + JITDUMP("Arg [%06u] is promoted implicit byref V%02u, so no tail call\n", + dspTreeID(arg->GetNode()), lclNum); } else { - // We also have to worry about introducing aliases if we bypass copying + JITDUMP("Arg [%06u] is unpromoted implicit byref V%02u, seeing if we can still tail call\n", + dspTreeID(arg->GetNode()), lclNum); + + // We have to worry about introducing aliases if we bypass copying // the struct at the call. We'll do some limited analysis to see if we // can rule this out. const unsigned argLimit = 6; @@ -6729,16 +6733,17 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee, const char** failReason) // If this is the only appearance of the byref in the method, then // aliasing is not possible. // - if (varDsc->lvRefCnt(RCS_EARLY) == 1) - { - JITDUMP("... yes, arg is the only appearance of V%02u\n", lcl->GetLclNum()); - hasByrefParameter = false; - } // If no other call arg refers to this byref, and no other arg is // a pointer which could refer to this byref, we can optimize. // // We only check this for calls with small numbers of arguments, // as the analysis cost will be quadratic. + // + if (varDsc->lvRefCnt(RCS_EARLY) == 1) + { + JITDUMP("... yes, arg is the only appearance of V%02u\n", lclNum); + hasMustCopyByrefParameter = false; + } else if (argInfo->ArgCount() <= argLimit) { GenTree* interferingArg = nullptr; @@ -6750,7 +6755,7 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee, const char** failReason) } fgArgTabEntry* const arg2 = argInfo->GetArgEntry(index2, false); - JITDUMP("checking other arg [%06u]...\n", dspTreeID(arg2->GetNode())); + JITDUMP("... checking other arg [%06u]...\n", dspTreeID(arg2->GetNode())); DISPTREE(arg2->GetNode()); // Do we pass 'lcl' more than once to the callee? @@ -6759,7 +6764,7 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee, const char** failReason) GenTreeLclVarCommon* const lcl2 = arg2->GetNode()->IsImplicitByrefParameterValue(this); - if ((lcl2 != nullptr) && (lcl->GetLclNum() == lcl2->GetLclNum())) + if ((lcl2 != nullptr) && (lclNum == lcl2->GetLclNum())) { // not copying would introduce aliased implicit byref structs // in the callee ... we can't optimize. @@ -6768,7 +6773,8 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee, const char** failReason) } else { - JITDUMP("... ok, different implicit byref V%02u\n", lcl2->GetLclNum()); + JITDUMP("... arg refers to different implicit byref V%02u\n", + lcl2->GetLclNum()); continue; } } @@ -6798,7 +6804,7 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee, const char** failReason) // if ((arg2->argType == TYP_BYREF) || (arg2->argType == TYP_I_IMPL)) { - JITDUMP("... is byref arg...\n", dspTreeID(arg2->GetNode())); + JITDUMP("...arg is a byref, must run an alias check\n"); bool checkExposure = true; bool hasExposure = false; @@ -6807,7 +6813,7 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee, const char** failReason) if (arg2Node->OperIs(GT_LCL_VAR)) { GenTreeLclVarCommon* arg2LclNode = arg2Node->AsLclVarCommon(); - assert(arg2LclNode->GetLclNum() != lcl->GetLclNum()); + assert(arg2LclNode->GetLclNum() != lclNum); LclVarDsc* arg2Dsc = lvaGetDesc(arg2LclNode); // Other params can't alias implicit byref params @@ -6825,12 +6831,15 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee, const char** failReason) if (checkExposure) { + JITDUMP( + "... not sure where byref arg points, checking if V%02u is exposed\n", + lclNum); // arg2 might alias arg, see if we've exposed // arg somewhere in the method. if (varDsc->lvHasLdAddrOp || varDsc->lvAddrExposed) { // Struct as a whole is exposed, can't optimize - JITDUMP(" ... V%02u exposed\n", lcl->GetLclNum()); + JITDUMP("... V%02u is exposed\n", lclNum); hasExposure = true; } else if (varDsc->lvFieldLclStart != 0) @@ -6841,29 +6850,26 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee, const char** failReason) // use it to enumerate the fields. const unsigned promotedLcl = varDsc->lvFieldLclStart; LclVarDsc* const promotedVarDsc = lvaGetDesc(promotedLcl); - JITDUMP("promoted-unpromoted case, checking fields of V%02u\n", + JITDUMP("...promoted-unpromoted case -- also checking exposure of " + "fields of V%02u\n", promotedLcl); for (unsigned fieldIndex = 0; fieldIndex < promotedVarDsc->lvFieldCnt; fieldIndex++) { - JITDUMP("checking field V%02u\n", - promotedVarDsc->lvFieldLclStart + fieldIndex); LclVarDsc* fieldDsc = lvaGetDesc(promotedVarDsc->lvFieldLclStart + fieldIndex); if (fieldDsc->lvHasLdAddrOp || fieldDsc->lvAddrExposed) { // Promoted and not yet demoted field is exposed, can't optimize + JITDUMP("... field V%02u is exposed\n", + promotedVarDsc->lvFieldLclStart + fieldIndex); hasExposure = true; break; } } } - else - { - JITDUMP("...never promoted case, we're cool\n"); - } } if (hasExposure) @@ -6874,7 +6880,7 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee, const char** failReason) } else { - JITDUMP("...not byref or implicit byref (%s), we're cool\n", + JITDUMP("...arg is not a byref or implicit byref (%s)\n", varTypeName(arg2->GetNode()->TypeGet())); } } @@ -6882,12 +6888,12 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee, const char** failReason) if (interferingArg != nullptr) { JITDUMP("... no, arg [%06u] may alias with V%02u\n", dspTreeID(interferingArg), - lcl->GetLclNum()); + lclNum); } else { - JITDUMP("... yes, no other arg in call can alias V%02u\n", lcl->GetLclNum()); - hasByrefParameter = false; + JITDUMP("... yes, no other arg in call can alias V%02u\n", lclNum); + hasMustCopyByrefParameter = false; } } else @@ -6899,7 +6905,7 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee, const char** failReason) } } - if (hasByrefParameter) + if (hasMustCopyByrefParameter) { // This arg blocks the tail call. No reason to keep scanning the remaining args. break; @@ -6993,7 +6999,7 @@ bool Compiler::fgCanFastTailCall(GenTreeCall* callee, const char** failReason) // For Windows some struct parameters are copied on the local frame // and then passed by reference. We cannot fast tail call in these situation // as we need to keep our frame around. - if (hasByrefParameter) + if (hasMustCopyByrefParameter) { reportFastTailCallDecision("Callee has a byref parameter"); return false;