mirror of
https://github.com/VSadov/Satori.git
synced 2025-06-08 03:27:04 +09:00
Avoid unnecessary return val copying in tailcall-via-help (#72720)
The initial implementation of this did not handle the fact that retbuf can point to GC heap during reflection invoke. It was fixed in #39815, but the way it was fixed was by copying it into a local. This changes the fix so that we simply report the return value pointer as a byref throughout the mechanism, which simplifies the JIT's handling and is a perf improvement as well.
This commit is contained in:
parent
c22f7f8c73
commit
c9c27d4640
4 changed files with 30 additions and 54 deletions
|
@ -99,13 +99,13 @@ certain tailcalls to generic methods.
|
|||
The second IL stub extracts the arguments and calls the target function. For the
|
||||
above case a function like the following will be generated:
|
||||
```csharp
|
||||
void IL_STUB_CallTailCallTarget(IntPtr argBuffer, IntPtr result, PortableTailCallFrame* pFrame)
|
||||
void IL_STUB_CallTailCallTarget(IntPtr argBuffer, ref byte result, PortableTailCallFrame* pFrame)
|
||||
{
|
||||
pFrame->NextCall = null;
|
||||
pFrame->TailCallAwareReturnAddress = StubHelpers.NextCallReturnAddress();
|
||||
int arg1 = *(int*)(argBuffer + 4);
|
||||
*argBuffer = TAILCALLARGBUFFER_ABANDONED;
|
||||
*(bool*)result = IsOdd(arg1);
|
||||
Unsafe.As<byte, bool>(ref result) = IsOdd(arg1);
|
||||
}
|
||||
```
|
||||
It matches the function above by loading the argument that was written, and
|
||||
|
@ -141,7 +141,7 @@ live instance of the dispatcher. This structure looks like the following:
|
|||
struct PortableTailCallFrame
|
||||
{
|
||||
public IntPtr TailCallAwareReturnAddress;
|
||||
public IntPtr NextCall;
|
||||
public delegate*<IntPtr, ref byte, PortableTailCallFrame*, void> NextCall;
|
||||
}
|
||||
```
|
||||
|
||||
|
@ -166,8 +166,8 @@ Finally, the dispatcher follows:
|
|||
```csharp
|
||||
private static unsafe void DispatchTailCalls(
|
||||
IntPtr callersRetAddrSlot,
|
||||
delegate*<IntPtr, IntPtr, PortableTailCallFrame*, void> callTarget,
|
||||
IntPtr retVal)
|
||||
delegate*<IntPtr, ref byte, PortableTailCallFrame*, void> callTarget,
|
||||
ref byte retVal)
|
||||
{
|
||||
IntPtr callersRetAddr;
|
||||
TailCallTls* tls = GetTailCallInfo(callersRetAddrSlot, &callersRetAddr);
|
||||
|
@ -189,7 +189,7 @@ private static unsafe void DispatchTailCalls(
|
|||
|
||||
do
|
||||
{
|
||||
callTarget(tls->ArgBuffer, retVal, &newFrame);
|
||||
callTarget(tls->ArgBuffer, ref retVal, &newFrame);
|
||||
callTarget = newFrame.NextCall;
|
||||
} while (callTarget != null);
|
||||
}
|
||||
|
@ -249,7 +249,7 @@ transforms the code into the equivalent of
|
|||
```csharp
|
||||
IL_STUB_StoreTailCallArgs(x - 1);
|
||||
bool result;
|
||||
DispatchTailCalls(&IL_STUB_CallTailCallTarget, (IntPtr)&result, _AddressOfReturnAddress());
|
||||
DispatchTailCalls(_AddressOfReturnAddress(), &IL_STUB_CallTailCallTarget, ref result);
|
||||
return result;
|
||||
```
|
||||
|
||||
|
@ -257,6 +257,12 @@ Here `_AddressOfReturnAddress()` represents the stack slot containing the return
|
|||
address. Note that .NET requires that the return address is always stored on the
|
||||
stack, even on ARM architectures, due to its return address hijacking mechanism.
|
||||
|
||||
When the result is returned by value the JIT will introduce a local and pass a
|
||||
pointer to it in the second argument. For ret bufs the JIT will typically
|
||||
directly pass along its own return buffer parameter to DispatchTailCalls. It is
|
||||
possible that this return buffer is pointing into GC heap, so the result is
|
||||
always tracked as a byref in the mechanism.
|
||||
|
||||
In certain cases the target function pointer is also stored. For some targets
|
||||
this might require the JIT to perform the equivalent of `ldvirtftn` or `ldftn`
|
||||
to obtain a properly callable function pointer.
|
||||
|
|
|
@ -331,8 +331,8 @@ namespace System.Runtime.CompilerServices
|
|||
[StackTraceHidden]
|
||||
private static unsafe void DispatchTailCalls(
|
||||
IntPtr callersRetAddrSlot,
|
||||
delegate*<IntPtr, IntPtr, PortableTailCallFrame*, void> callTarget,
|
||||
IntPtr retVal)
|
||||
delegate*<IntPtr, ref byte, PortableTailCallFrame*, void> callTarget,
|
||||
ref byte retVal)
|
||||
{
|
||||
IntPtr callersRetAddr;
|
||||
TailCallTls* tls = GetTailCallInfo(callersRetAddrSlot, &callersRetAddr);
|
||||
|
@ -354,7 +354,7 @@ namespace System.Runtime.CompilerServices
|
|||
|
||||
do
|
||||
{
|
||||
callTarget(tls->ArgBuffer, retVal, &newFrame);
|
||||
callTarget(tls->ArgBuffer, ref retVal, &newFrame);
|
||||
callTarget = newFrame.NextCall;
|
||||
} while (callTarget != null);
|
||||
}
|
||||
|
@ -663,7 +663,7 @@ namespace System.Runtime.CompilerServices
|
|||
internal unsafe struct PortableTailCallFrame
|
||||
{
|
||||
public IntPtr TailCallAwareReturnAddress;
|
||||
public delegate*<IntPtr, IntPtr, PortableTailCallFrame*, void> NextCall;
|
||||
public delegate*<IntPtr, ref byte, PortableTailCallFrame*, void> NextCall;
|
||||
}
|
||||
|
||||
[StructLayout(LayoutKind.Sequential)]
|
||||
|
|
|
@ -7258,13 +7258,12 @@ GenTree* Compiler::fgCreateCallDispatcherAndGetResult(GenTreeCall* orig
|
|||
{
|
||||
GenTreeCall* callDispatcherNode = gtNewCallNode(CT_USER_FUNC, dispatcherHnd, TYP_VOID, fgMorphStmt->GetDebugInfo());
|
||||
// The dispatcher has signature
|
||||
// void DispatchTailCalls(void* callersRetAddrSlot, void* callTarget, void* retValue)
|
||||
// void DispatchTailCalls(void* callersRetAddrSlot, void* callTarget, ref byte retValue)
|
||||
|
||||
// Add return value arg.
|
||||
GenTree* retValArg;
|
||||
GenTree* retVal = nullptr;
|
||||
unsigned int newRetLcl = BAD_VAR_NUM;
|
||||
GenTree* copyToRetBufNode = nullptr;
|
||||
GenTree* retVal = nullptr;
|
||||
unsigned int newRetLcl = BAD_VAR_NUM;
|
||||
|
||||
if (origCall->gtArgs.HasRetBuffer())
|
||||
{
|
||||
|
@ -7275,29 +7274,7 @@ GenTree* Compiler::fgCreateCallDispatcherAndGetResult(GenTreeCall* orig
|
|||
assert(retBufArg->OperIsLocal());
|
||||
assert(retBufArg->AsLclVarCommon()->GetLclNum() == info.compRetBuffArg);
|
||||
|
||||
// Caller return buffer argument retBufArg can point to GC heap while the dispatcher expects
|
||||
// the return value argument retValArg to point to the stack.
|
||||
// We use a temporary stack allocated return buffer to hold the value during the dispatcher call
|
||||
// and copy the value back to the caller return buffer after that.
|
||||
unsigned int tmpRetBufNum = lvaGrabTemp(true DEBUGARG("substitute local for return buffer"));
|
||||
|
||||
constexpr bool unsafeValueClsCheck = false;
|
||||
lvaSetStruct(tmpRetBufNum, origCall->gtRetClsHnd, unsafeValueClsCheck);
|
||||
lvaSetVarAddrExposed(tmpRetBufNum DEBUGARG(AddressExposedReason::DISPATCH_RET_BUF));
|
||||
|
||||
var_types tmpRetBufType = lvaGetDesc(tmpRetBufNum)->TypeGet();
|
||||
|
||||
retValArg = gtNewOperNode(GT_ADDR, TYP_I_IMPL, gtNewLclvNode(tmpRetBufNum, tmpRetBufType));
|
||||
|
||||
var_types callerRetBufType = lvaGetDesc(info.compRetBuffArg)->TypeGet();
|
||||
|
||||
GenTree* dstAddr = gtNewLclvNode(info.compRetBuffArg, callerRetBufType);
|
||||
GenTree* dst = gtNewObjNode(info.compMethodInfo->args.retTypeClass, dstAddr);
|
||||
GenTree* src = gtNewLclvNode(tmpRetBufNum, tmpRetBufType);
|
||||
|
||||
constexpr bool isVolatile = false;
|
||||
constexpr bool isCopyBlock = true;
|
||||
copyToRetBufNode = gtNewBlkOpNode(dst, src, isVolatile, isCopyBlock);
|
||||
retValArg = retBufArg;
|
||||
|
||||
if (origCall->gtType != TYP_VOID)
|
||||
{
|
||||
|
@ -7337,7 +7314,7 @@ GenTree* Compiler::fgCreateCallDispatcherAndGetResult(GenTreeCall* orig
|
|||
retValArg = gtNewZeroConNode(TYP_I_IMPL);
|
||||
}
|
||||
|
||||
// Args are (void** callersReturnAddressSlot, void* callTarget, void* retVal)
|
||||
// Args are (void** callersReturnAddressSlot, void* callTarget, ref byte retVal)
|
||||
GenTree* callTarget = new (this, GT_FTN_ADDR) GenTreeFptrVal(TYP_I_IMPL, callTargetStubHnd);
|
||||
|
||||
// Add the caller's return address slot.
|
||||
|
@ -7355,30 +7332,23 @@ GenTree* Compiler::fgCreateCallDispatcherAndGetResult(GenTreeCall* orig
|
|||
NewCallArg retValCallArg = NewCallArg::Primitive(retValArg);
|
||||
callDispatcherNode->gtArgs.PushFront(this, retAddrSlotArg, callTargetArg, retValCallArg);
|
||||
|
||||
GenTree* finalTree = callDispatcherNode;
|
||||
|
||||
if (copyToRetBufNode != nullptr)
|
||||
{
|
||||
finalTree = gtNewOperNode(GT_COMMA, TYP_VOID, callDispatcherNode, copyToRetBufNode);
|
||||
}
|
||||
|
||||
if (origCall->gtType == TYP_VOID)
|
||||
{
|
||||
return finalTree;
|
||||
return callDispatcherNode;
|
||||
}
|
||||
|
||||
assert(retVal != nullptr);
|
||||
finalTree = gtNewOperNode(GT_COMMA, origCall->TypeGet(), finalTree, retVal);
|
||||
GenTree* comma = gtNewOperNode(GT_COMMA, origCall->TypeGet(), callDispatcherNode, retVal);
|
||||
|
||||
// The JIT seems to want to CSE this comma and messes up multi-reg ret
|
||||
// values in the process. Just avoid CSE'ing this tree entirely in that
|
||||
// case.
|
||||
if (origCall->HasMultiRegRetVal())
|
||||
{
|
||||
finalTree->gtFlags |= GTF_DONT_CSE;
|
||||
comma->gtFlags |= GTF_DONT_CSE;
|
||||
}
|
||||
|
||||
return finalTree;
|
||||
return comma;
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------
|
||||
|
|
|
@ -175,8 +175,7 @@ void TailCallHelp::LayOutArgBuffer(
|
|||
bool thisParamByRef = (calleeMD != NULL) ? calleeMD->GetMethodTable()->IsValueType() : thisArgByRef;
|
||||
if (thisParamByRef)
|
||||
{
|
||||
thisHnd = TypeHandle(CoreLibBinder::GetElementType(ELEMENT_TYPE_U1))
|
||||
.MakeByRef();
|
||||
thisHnd = TypeHandle(CoreLibBinder::GetElementType(ELEMENT_TYPE_U1)).MakeByRef();
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -463,7 +462,7 @@ MethodDesc* TailCallHelp::CreateCallTargetStub(const TailCallInfo& info)
|
|||
|
||||
ILCodeStream* pCode = sl.NewCodeStream(ILStubLinker::kDispatch);
|
||||
|
||||
// void CallTarget(void* argBuffer, void* retVal, PortableTailCallFrame* pFrame)
|
||||
// void CallTarget(void* argBuffer, ref byte retVal, PortableTailCallFrame* pFrame)
|
||||
const int ARG_ARG_BUFFER = 0;
|
||||
const int ARG_RET_VAL = 1;
|
||||
const int ARG_PTR_FRAME = 2;
|
||||
|
@ -615,7 +614,8 @@ void TailCallHelp::CreateCallTargetStubSig(const TailCallInfo& info, SigBuilder*
|
|||
sig->AppendElementType(ELEMENT_TYPE_I);
|
||||
|
||||
// Return value
|
||||
sig->AppendElementType(ELEMENT_TYPE_I);
|
||||
sig->AppendElementType(ELEMENT_TYPE_BYREF);
|
||||
sig->AppendElementType(ELEMENT_TYPE_U1);
|
||||
|
||||
// Pointer to tail call frame
|
||||
sig->AppendElementType(ELEMENT_TYPE_I);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue