Skip to content

Commit

Permalink
Avoid unnecessary return val copying in tailcall-via-help (#72720)
Browse files Browse the repository at this point in the history
The initial implementation of this did not handle the fact that retbuf
can point to GC heap during reflection invoke. It was fixed in #39815,
but the way it was fixed was by copying it into a local. This changes
the fix so that we simply report the return value pointer as a byref
throughout the mechanism, which simplifies the JIT's handling and is a
perf improvement as well.
  • Loading branch information
jakobbotsch authored Jul 24, 2022
1 parent c22f7f8 commit c9c27d4
Show file tree
Hide file tree
Showing 4 changed files with 30 additions and 54 deletions.
20 changes: 13 additions & 7 deletions docs/design/features/tailcalls-with-helpers.md
Original file line number Diff line number Diff line change
Expand Up @@ -99,13 +99,13 @@ certain tailcalls to generic methods.
The second IL stub extracts the arguments and calls the target function. For the
above case a function like the following will be generated:
```csharp
void IL_STUB_CallTailCallTarget(IntPtr argBuffer, IntPtr result, PortableTailCallFrame* pFrame)
void IL_STUB_CallTailCallTarget(IntPtr argBuffer, ref byte result, PortableTailCallFrame* pFrame)
{
pFrame->NextCall = null;
pFrame->TailCallAwareReturnAddress = StubHelpers.NextCallReturnAddress();
int arg1 = *(int*)(argBuffer + 4);
*argBuffer = TAILCALLARGBUFFER_ABANDONED;
*(bool*)result = IsOdd(arg1);
Unsafe.As<byte, bool>(ref result) = IsOdd(arg1);
}
```
It matches the function above by loading the argument that was written, and
Expand Down Expand Up @@ -141,7 +141,7 @@ live instance of the dispatcher. This structure looks like the following:
struct PortableTailCallFrame
{
public IntPtr TailCallAwareReturnAddress;
public IntPtr NextCall;
public delegate*<IntPtr, ref byte, PortableTailCallFrame*, void> NextCall;
}
```

Expand All @@ -166,8 +166,8 @@ Finally, the dispatcher follows:
```csharp
private static unsafe void DispatchTailCalls(
IntPtr callersRetAddrSlot,
delegate*<IntPtr, IntPtr, PortableTailCallFrame*, void> callTarget,
IntPtr retVal)
delegate*<IntPtr, ref byte, PortableTailCallFrame*, void> callTarget,
ref byte retVal)
{
IntPtr callersRetAddr;
TailCallTls* tls = GetTailCallInfo(callersRetAddrSlot, &callersRetAddr);
Expand All @@ -189,7 +189,7 @@ private static unsafe void DispatchTailCalls(

do
{
callTarget(tls->ArgBuffer, retVal, &newFrame);
callTarget(tls->ArgBuffer, ref retVal, &newFrame);
callTarget = newFrame.NextCall;
} while (callTarget != null);
}
Expand Down Expand Up @@ -249,14 +249,20 @@ transforms the code into the equivalent of
```csharp
IL_STUB_StoreTailCallArgs(x - 1);
bool result;
DispatchTailCalls(&IL_STUB_CallTailCallTarget, (IntPtr)&result, _AddressOfReturnAddress());
DispatchTailCalls(_AddressOfReturnAddress(), &IL_STUB_CallTailCallTarget, ref result);
return result;
```

Here `_AddressOfReturnAddress()` represents the stack slot containing the return
address. Note that .NET requires that the return address is always stored on the
stack, even on ARM architectures, due to its return address hijacking mechanism.

When the result is returned by value the JIT will introduce a local and pass a
pointer to it in the second argument. For ret bufs the JIT will typically
directly pass along its own return buffer parameter to DispatchTailCalls. It is
possible that this return buffer is pointing into GC heap, so the result is
always tracked as a byref in the mechanism.

In certain cases the target function pointer is also stored. For some targets
this might require the JIT to perform the equivalent of `ldvirtftn` or `ldftn`
to obtain a properly callable function pointer.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -331,8 +331,8 @@ public static IntPtr AllocateTypeAssociatedMemory(Type type, int size)
[StackTraceHidden]
private static unsafe void DispatchTailCalls(
IntPtr callersRetAddrSlot,
delegate*<IntPtr, IntPtr, PortableTailCallFrame*, void> callTarget,
IntPtr retVal)
delegate*<IntPtr, ref byte, PortableTailCallFrame*, void> callTarget,
ref byte retVal)
{
IntPtr callersRetAddr;
TailCallTls* tls = GetTailCallInfo(callersRetAddrSlot, &callersRetAddr);
Expand All @@ -354,7 +354,7 @@ private static unsafe void DispatchTailCalls(

do
{
callTarget(tls->ArgBuffer, retVal, &newFrame);
callTarget(tls->ArgBuffer, ref retVal, &newFrame);
callTarget = newFrame.NextCall;
} while (callTarget != null);
}
Expand Down Expand Up @@ -663,7 +663,7 @@ public static TypeHandle TypeHandleOf<T>()
internal unsafe struct PortableTailCallFrame
{
public IntPtr TailCallAwareReturnAddress;
public delegate*<IntPtr, IntPtr, PortableTailCallFrame*, void> NextCall;
public delegate*<IntPtr, ref byte, PortableTailCallFrame*, void> NextCall;
}

[StructLayout(LayoutKind.Sequential)]
Expand Down
48 changes: 9 additions & 39 deletions src/coreclr/jit/morph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7258,13 +7258,12 @@ GenTree* Compiler::fgCreateCallDispatcherAndGetResult(GenTreeCall* orig
{
GenTreeCall* callDispatcherNode = gtNewCallNode(CT_USER_FUNC, dispatcherHnd, TYP_VOID, fgMorphStmt->GetDebugInfo());
// The dispatcher has signature
// void DispatchTailCalls(void* callersRetAddrSlot, void* callTarget, void* retValue)
// void DispatchTailCalls(void* callersRetAddrSlot, void* callTarget, ref byte retValue)

// Add return value arg.
GenTree* retValArg;
GenTree* retVal = nullptr;
unsigned int newRetLcl = BAD_VAR_NUM;
GenTree* copyToRetBufNode = nullptr;
GenTree* retVal = nullptr;
unsigned int newRetLcl = BAD_VAR_NUM;

if (origCall->gtArgs.HasRetBuffer())
{
Expand All @@ -7275,29 +7274,7 @@ GenTree* Compiler::fgCreateCallDispatcherAndGetResult(GenTreeCall* orig
assert(retBufArg->OperIsLocal());
assert(retBufArg->AsLclVarCommon()->GetLclNum() == info.compRetBuffArg);

// Caller return buffer argument retBufArg can point to GC heap while the dispatcher expects
// the return value argument retValArg to point to the stack.
// We use a temporary stack allocated return buffer to hold the value during the dispatcher call
// and copy the value back to the caller return buffer after that.
unsigned int tmpRetBufNum = lvaGrabTemp(true DEBUGARG("substitute local for return buffer"));

constexpr bool unsafeValueClsCheck = false;
lvaSetStruct(tmpRetBufNum, origCall->gtRetClsHnd, unsafeValueClsCheck);
lvaSetVarAddrExposed(tmpRetBufNum DEBUGARG(AddressExposedReason::DISPATCH_RET_BUF));

var_types tmpRetBufType = lvaGetDesc(tmpRetBufNum)->TypeGet();

retValArg = gtNewOperNode(GT_ADDR, TYP_I_IMPL, gtNewLclvNode(tmpRetBufNum, tmpRetBufType));

var_types callerRetBufType = lvaGetDesc(info.compRetBuffArg)->TypeGet();

GenTree* dstAddr = gtNewLclvNode(info.compRetBuffArg, callerRetBufType);
GenTree* dst = gtNewObjNode(info.compMethodInfo->args.retTypeClass, dstAddr);
GenTree* src = gtNewLclvNode(tmpRetBufNum, tmpRetBufType);

constexpr bool isVolatile = false;
constexpr bool isCopyBlock = true;
copyToRetBufNode = gtNewBlkOpNode(dst, src, isVolatile, isCopyBlock);
retValArg = retBufArg;

if (origCall->gtType != TYP_VOID)
{
Expand Down Expand Up @@ -7337,7 +7314,7 @@ GenTree* Compiler::fgCreateCallDispatcherAndGetResult(GenTreeCall* orig
retValArg = gtNewZeroConNode(TYP_I_IMPL);
}

// Args are (void** callersReturnAddressSlot, void* callTarget, void* retVal)
// Args are (void** callersReturnAddressSlot, void* callTarget, ref byte retVal)
GenTree* callTarget = new (this, GT_FTN_ADDR) GenTreeFptrVal(TYP_I_IMPL, callTargetStubHnd);

// Add the caller's return address slot.
Expand All @@ -7355,30 +7332,23 @@ GenTree* Compiler::fgCreateCallDispatcherAndGetResult(GenTreeCall* orig
NewCallArg retValCallArg = NewCallArg::Primitive(retValArg);
callDispatcherNode->gtArgs.PushFront(this, retAddrSlotArg, callTargetArg, retValCallArg);

GenTree* finalTree = callDispatcherNode;

if (copyToRetBufNode != nullptr)
{
finalTree = gtNewOperNode(GT_COMMA, TYP_VOID, callDispatcherNode, copyToRetBufNode);
}

if (origCall->gtType == TYP_VOID)
{
return finalTree;
return callDispatcherNode;
}

assert(retVal != nullptr);
finalTree = gtNewOperNode(GT_COMMA, origCall->TypeGet(), finalTree, retVal);
GenTree* comma = gtNewOperNode(GT_COMMA, origCall->TypeGet(), callDispatcherNode, retVal);

// The JIT seems to want to CSE this comma and messes up multi-reg ret
// values in the process. Just avoid CSE'ing this tree entirely in that
// case.
if (origCall->HasMultiRegRetVal())
{
finalTree->gtFlags |= GTF_DONT_CSE;
comma->gtFlags |= GTF_DONT_CSE;
}

return finalTree;
return comma;
}

//------------------------------------------------------------------------
Expand Down
8 changes: 4 additions & 4 deletions src/coreclr/vm/tailcallhelp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -175,8 +175,7 @@ void TailCallHelp::LayOutArgBuffer(
bool thisParamByRef = (calleeMD != NULL) ? calleeMD->GetMethodTable()->IsValueType() : thisArgByRef;
if (thisParamByRef)
{
thisHnd = TypeHandle(CoreLibBinder::GetElementType(ELEMENT_TYPE_U1))
.MakeByRef();
thisHnd = TypeHandle(CoreLibBinder::GetElementType(ELEMENT_TYPE_U1)).MakeByRef();
}
else
{
Expand Down Expand Up @@ -463,7 +462,7 @@ MethodDesc* TailCallHelp::CreateCallTargetStub(const TailCallInfo& info)

ILCodeStream* pCode = sl.NewCodeStream(ILStubLinker::kDispatch);

// void CallTarget(void* argBuffer, void* retVal, PortableTailCallFrame* pFrame)
// void CallTarget(void* argBuffer, ref byte retVal, PortableTailCallFrame* pFrame)
const int ARG_ARG_BUFFER = 0;
const int ARG_RET_VAL = 1;
const int ARG_PTR_FRAME = 2;
Expand Down Expand Up @@ -615,7 +614,8 @@ void TailCallHelp::CreateCallTargetStubSig(const TailCallInfo& info, SigBuilder*
sig->AppendElementType(ELEMENT_TYPE_I);

// Return value
sig->AppendElementType(ELEMENT_TYPE_I);
sig->AppendElementType(ELEMENT_TYPE_BYREF);
sig->AppendElementType(ELEMENT_TYPE_U1);

// Pointer to tail call frame
sig->AppendElementType(ELEMENT_TYPE_I);
Expand Down

0 comments on commit c9c27d4

Please sign in to comment.