1
0
Fork 0
mirror of https://github.com/VSadov/Satori.git synced 2025-06-08 03:27:04 +09:00

Avoid unnecessary return val copying in tailcall-via-help (#72720)

The initial implementation of this did not handle the fact that retbuf
can point to GC heap during reflection invoke. It was fixed in #39815,
but the way it was fixed was by copying it into a local. This changes
the fix so that we simply report the return value pointer as a byref
throughout the mechanism, which simplifies the JIT's handling and is a
perf improvement as well.
This commit is contained in:
Jakob Botsch Nielsen 2022-07-24 19:37:50 +02:00 committed by GitHub
parent c22f7f8c73
commit c9c27d4640
Signed by: github
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 30 additions and 54 deletions

View file

@ -99,13 +99,13 @@ certain tailcalls to generic methods.
The second IL stub extracts the arguments and calls the target function. For the
above case a function like the following will be generated:
```csharp
void IL_STUB_CallTailCallTarget(IntPtr argBuffer, IntPtr result, PortableTailCallFrame* pFrame)
void IL_STUB_CallTailCallTarget(IntPtr argBuffer, ref byte result, PortableTailCallFrame* pFrame)
{
pFrame->NextCall = null;
pFrame->TailCallAwareReturnAddress = StubHelpers.NextCallReturnAddress();
int arg1 = *(int*)(argBuffer + 4);
*argBuffer = TAILCALLARGBUFFER_ABANDONED;
*(bool*)result = IsOdd(arg1);
Unsafe.As<byte, bool>(ref result) = IsOdd(arg1);
}
```
It matches the function above by loading the argument that was written, and
@ -141,7 +141,7 @@ live instance of the dispatcher. This structure looks like the following:
struct PortableTailCallFrame
{
public IntPtr TailCallAwareReturnAddress;
public IntPtr NextCall;
public delegate*<IntPtr, ref byte, PortableTailCallFrame*, void> NextCall;
}
```
@ -166,8 +166,8 @@ Finally, the dispatcher follows:
```csharp
private static unsafe void DispatchTailCalls(
IntPtr callersRetAddrSlot,
delegate*<IntPtr, IntPtr, PortableTailCallFrame*, void> callTarget,
IntPtr retVal)
delegate*<IntPtr, ref byte, PortableTailCallFrame*, void> callTarget,
ref byte retVal)
{
IntPtr callersRetAddr;
TailCallTls* tls = GetTailCallInfo(callersRetAddrSlot, &callersRetAddr);
@ -189,7 +189,7 @@ private static unsafe void DispatchTailCalls(
do
{
callTarget(tls->ArgBuffer, retVal, &newFrame);
callTarget(tls->ArgBuffer, ref retVal, &newFrame);
callTarget = newFrame.NextCall;
} while (callTarget != null);
}
@ -249,7 +249,7 @@ transforms the code into the equivalent of
```csharp
IL_STUB_StoreTailCallArgs(x - 1);
bool result;
DispatchTailCalls(&IL_STUB_CallTailCallTarget, (IntPtr)&result, _AddressOfReturnAddress());
DispatchTailCalls(_AddressOfReturnAddress(), &IL_STUB_CallTailCallTarget, ref result);
return result;
```
@ -257,6 +257,12 @@ Here `_AddressOfReturnAddress()` represents the stack slot containing the return
address. Note that .NET requires that the return address is always stored on the
stack, even on ARM architectures, due to its return address hijacking mechanism.
When the result is returned by value the JIT will introduce a local and pass a
pointer to it in the second argument. For ret bufs the JIT will typically
directly pass along its own return buffer parameter to DispatchTailCalls. It is
possible that this return buffer is pointing into GC heap, so the result is
always tracked as a byref in the mechanism.
In certain cases the target function pointer is also stored. For some targets
this might require the JIT to perform the equivalent of `ldvirtftn` or `ldftn`
to obtain a properly callable function pointer.

View file

@ -331,8 +331,8 @@ namespace System.Runtime.CompilerServices
[StackTraceHidden]
private static unsafe void DispatchTailCalls(
IntPtr callersRetAddrSlot,
delegate*<IntPtr, IntPtr, PortableTailCallFrame*, void> callTarget,
IntPtr retVal)
delegate*<IntPtr, ref byte, PortableTailCallFrame*, void> callTarget,
ref byte retVal)
{
IntPtr callersRetAddr;
TailCallTls* tls = GetTailCallInfo(callersRetAddrSlot, &callersRetAddr);
@ -354,7 +354,7 @@ namespace System.Runtime.CompilerServices
do
{
callTarget(tls->ArgBuffer, retVal, &newFrame);
callTarget(tls->ArgBuffer, ref retVal, &newFrame);
callTarget = newFrame.NextCall;
} while (callTarget != null);
}
@ -663,7 +663,7 @@ namespace System.Runtime.CompilerServices
internal unsafe struct PortableTailCallFrame
{
public IntPtr TailCallAwareReturnAddress;
public delegate*<IntPtr, IntPtr, PortableTailCallFrame*, void> NextCall;
public delegate*<IntPtr, ref byte, PortableTailCallFrame*, void> NextCall;
}
[StructLayout(LayoutKind.Sequential)]

View file

@ -7258,13 +7258,12 @@ GenTree* Compiler::fgCreateCallDispatcherAndGetResult(GenTreeCall* orig
{
GenTreeCall* callDispatcherNode = gtNewCallNode(CT_USER_FUNC, dispatcherHnd, TYP_VOID, fgMorphStmt->GetDebugInfo());
// The dispatcher has signature
// void DispatchTailCalls(void* callersRetAddrSlot, void* callTarget, void* retValue)
// void DispatchTailCalls(void* callersRetAddrSlot, void* callTarget, ref byte retValue)
// Add return value arg.
GenTree* retValArg;
GenTree* retVal = nullptr;
unsigned int newRetLcl = BAD_VAR_NUM;
GenTree* copyToRetBufNode = nullptr;
GenTree* retVal = nullptr;
unsigned int newRetLcl = BAD_VAR_NUM;
if (origCall->gtArgs.HasRetBuffer())
{
@ -7275,29 +7274,7 @@ GenTree* Compiler::fgCreateCallDispatcherAndGetResult(GenTreeCall* orig
assert(retBufArg->OperIsLocal());
assert(retBufArg->AsLclVarCommon()->GetLclNum() == info.compRetBuffArg);
// Caller return buffer argument retBufArg can point to GC heap while the dispatcher expects
// the return value argument retValArg to point to the stack.
// We use a temporary stack allocated return buffer to hold the value during the dispatcher call
// and copy the value back to the caller return buffer after that.
unsigned int tmpRetBufNum = lvaGrabTemp(true DEBUGARG("substitute local for return buffer"));
constexpr bool unsafeValueClsCheck = false;
lvaSetStruct(tmpRetBufNum, origCall->gtRetClsHnd, unsafeValueClsCheck);
lvaSetVarAddrExposed(tmpRetBufNum DEBUGARG(AddressExposedReason::DISPATCH_RET_BUF));
var_types tmpRetBufType = lvaGetDesc(tmpRetBufNum)->TypeGet();
retValArg = gtNewOperNode(GT_ADDR, TYP_I_IMPL, gtNewLclvNode(tmpRetBufNum, tmpRetBufType));
var_types callerRetBufType = lvaGetDesc(info.compRetBuffArg)->TypeGet();
GenTree* dstAddr = gtNewLclvNode(info.compRetBuffArg, callerRetBufType);
GenTree* dst = gtNewObjNode(info.compMethodInfo->args.retTypeClass, dstAddr);
GenTree* src = gtNewLclvNode(tmpRetBufNum, tmpRetBufType);
constexpr bool isVolatile = false;
constexpr bool isCopyBlock = true;
copyToRetBufNode = gtNewBlkOpNode(dst, src, isVolatile, isCopyBlock);
retValArg = retBufArg;
if (origCall->gtType != TYP_VOID)
{
@ -7337,7 +7314,7 @@ GenTree* Compiler::fgCreateCallDispatcherAndGetResult(GenTreeCall* orig
retValArg = gtNewZeroConNode(TYP_I_IMPL);
}
// Args are (void** callersReturnAddressSlot, void* callTarget, void* retVal)
// Args are (void** callersReturnAddressSlot, void* callTarget, ref byte retVal)
GenTree* callTarget = new (this, GT_FTN_ADDR) GenTreeFptrVal(TYP_I_IMPL, callTargetStubHnd);
// Add the caller's return address slot.
@ -7355,30 +7332,23 @@ GenTree* Compiler::fgCreateCallDispatcherAndGetResult(GenTreeCall* orig
NewCallArg retValCallArg = NewCallArg::Primitive(retValArg);
callDispatcherNode->gtArgs.PushFront(this, retAddrSlotArg, callTargetArg, retValCallArg);
GenTree* finalTree = callDispatcherNode;
if (copyToRetBufNode != nullptr)
{
finalTree = gtNewOperNode(GT_COMMA, TYP_VOID, callDispatcherNode, copyToRetBufNode);
}
if (origCall->gtType == TYP_VOID)
{
return finalTree;
return callDispatcherNode;
}
assert(retVal != nullptr);
finalTree = gtNewOperNode(GT_COMMA, origCall->TypeGet(), finalTree, retVal);
GenTree* comma = gtNewOperNode(GT_COMMA, origCall->TypeGet(), callDispatcherNode, retVal);
// The JIT seems to want to CSE this comma and messes up multi-reg ret
// values in the process. Just avoid CSE'ing this tree entirely in that
// case.
if (origCall->HasMultiRegRetVal())
{
finalTree->gtFlags |= GTF_DONT_CSE;
comma->gtFlags |= GTF_DONT_CSE;
}
return finalTree;
return comma;
}
//------------------------------------------------------------------------

View file

@ -175,8 +175,7 @@ void TailCallHelp::LayOutArgBuffer(
bool thisParamByRef = (calleeMD != NULL) ? calleeMD->GetMethodTable()->IsValueType() : thisArgByRef;
if (thisParamByRef)
{
thisHnd = TypeHandle(CoreLibBinder::GetElementType(ELEMENT_TYPE_U1))
.MakeByRef();
thisHnd = TypeHandle(CoreLibBinder::GetElementType(ELEMENT_TYPE_U1)).MakeByRef();
}
else
{
@ -463,7 +462,7 @@ MethodDesc* TailCallHelp::CreateCallTargetStub(const TailCallInfo& info)
ILCodeStream* pCode = sl.NewCodeStream(ILStubLinker::kDispatch);
// void CallTarget(void* argBuffer, void* retVal, PortableTailCallFrame* pFrame)
// void CallTarget(void* argBuffer, ref byte retVal, PortableTailCallFrame* pFrame)
const int ARG_ARG_BUFFER = 0;
const int ARG_RET_VAL = 1;
const int ARG_PTR_FRAME = 2;
@ -615,7 +614,8 @@ void TailCallHelp::CreateCallTargetStubSig(const TailCallInfo& info, SigBuilder*
sig->AppendElementType(ELEMENT_TYPE_I);
// Return value
sig->AppendElementType(ELEMENT_TYPE_I);
sig->AppendElementType(ELEMENT_TYPE_BYREF);
sig->AppendElementType(ELEMENT_TYPE_U1);
// Pointer to tail call frame
sig->AppendElementType(ELEMENT_TYPE_I);