mirror of
https://github.com/VSadov/Satori.git
synced 2025-06-10 01:50:53 +09:00

* Decompose some bitwise operations in HIR to allow more overall optimizations to kick in * Ensure that we actually remove the underlying op * Ensure the AND_NOT decomposition is still folded during import for minopts * Ensure we propagate AllBitsSet into simd GT_XOR on xarch * Ensure that we prefer AndNot over TernaryLogic * Cleanup the TernaryLogic lowering code * Ensure that TernaryLogic picks the best operand for containment * Ensure we swap the operands that are being checked for containment * Ensure that TernaryLogic is simplified where possible * Apply formatting patch
15356 lines
544 KiB
C++
15356 lines
544 KiB
C++
// Licensed to the .NET Foundation under one or more agreements.
|
|
// The .NET Foundation licenses this file to you under the MIT license.
|
|
|
|
/*XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
|
|
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
|
|
XX XX
|
|
XX Morph XX
|
|
XX XX
|
|
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
|
|
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
|
|
*/
|
|
|
|
#include "jitpch.h"
|
|
#ifdef _MSC_VER
|
|
#pragma hdrstop
|
|
#endif
|
|
|
|
#include "allocacheck.h" // for alloca
|
|
|
|
//-------------------------------------------------------------
|
|
// fgMorphInit: prepare for running the morph phases
|
|
//
|
|
// Returns:
|
|
// suitable phase status
|
|
//
|
|
PhaseStatus Compiler::fgMorphInit()
|
|
{
|
|
bool madeChanges = false;
|
|
|
|
// We could allow ESP frames. Just need to reserve space for
|
|
// pushing EBP if the method becomes an EBP-frame after an edit.
|
|
// Note that requiring a EBP Frame disallows double alignment. Thus if we change this
|
|
// we either have to disallow double alignment for E&C some other way or handle it in EETwain.
|
|
|
|
if (opts.compDbgEnC)
|
|
{
|
|
codeGen->setFramePointerRequired(true);
|
|
|
|
// We don't care about localloc right now. If we do support it,
|
|
// EECodeManager::FixContextForEnC() needs to handle it smartly
|
|
// in case the localloc was actually executed.
|
|
//
|
|
// compLocallocUsed = true;
|
|
}
|
|
|
|
// Initialize the BlockSet epoch
|
|
NewBasicBlockEpoch();
|
|
|
|
fgAvailableOutgoingArgTemps = hashBv::Create(this);
|
|
|
|
// Insert call to class constructor as the first basic block if
|
|
// we were asked to do so.
|
|
if (info.compCompHnd->initClass(nullptr /* field */, nullptr /* method */,
|
|
impTokenLookupContextHandle /* context */) &
|
|
CORINFO_INITCLASS_USE_HELPER)
|
|
{
|
|
fgEnsureFirstBBisScratch();
|
|
fgNewStmtAtBeg(fgFirstBB, fgInitThisClass());
|
|
madeChanges = true;
|
|
}
|
|
|
|
#ifdef DEBUG
|
|
if (opts.compGcChecks)
|
|
{
|
|
for (unsigned i = 0; i < info.compArgsCount; i++)
|
|
{
|
|
if (lvaGetDesc(i)->TypeGet() == TYP_REF)
|
|
{
|
|
// confirm that the argument is a GC pointer (for debugging (GC stress))
|
|
GenTree* op = gtNewLclvNode(i, TYP_REF);
|
|
op = gtNewHelperCallNode(CORINFO_HELP_CHECK_OBJ, TYP_VOID, op);
|
|
|
|
fgEnsureFirstBBisScratch();
|
|
fgNewStmtAtEnd(fgFirstBB, op);
|
|
madeChanges = true;
|
|
if (verbose)
|
|
{
|
|
printf("\ncompGcChecks tree:\n");
|
|
gtDispTree(op);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
#endif // DEBUG
|
|
|
|
#if defined(DEBUG) && defined(TARGET_XARCH)
|
|
if (opts.compStackCheckOnRet)
|
|
{
|
|
lvaReturnSpCheck = lvaGrabTempWithImplicitUse(false DEBUGARG("ReturnSpCheck"));
|
|
lvaSetVarDoNotEnregister(lvaReturnSpCheck, DoNotEnregisterReason::ReturnSpCheck);
|
|
lvaGetDesc(lvaReturnSpCheck)->lvType = TYP_I_IMPL;
|
|
madeChanges = true;
|
|
}
|
|
#endif // defined(DEBUG) && defined(TARGET_XARCH)
|
|
|
|
#if defined(DEBUG) && defined(TARGET_X86)
|
|
if (opts.compStackCheckOnCall)
|
|
{
|
|
lvaCallSpCheck = lvaGrabTempWithImplicitUse(false DEBUGARG("CallSpCheck"));
|
|
lvaSetVarDoNotEnregister(lvaCallSpCheck, DoNotEnregisterReason::CallSpCheck);
|
|
lvaGetDesc(lvaCallSpCheck)->lvType = TYP_I_IMPL;
|
|
madeChanges = true;
|
|
}
|
|
#endif // defined(DEBUG) && defined(TARGET_X86)
|
|
|
|
return madeChanges ? PhaseStatus::MODIFIED_EVERYTHING : PhaseStatus::MODIFIED_NOTHING;
|
|
}
|
|
|
|
// Convert the given node into a call to the specified helper passing
|
|
// the given argument list.
|
|
//
|
|
// Tries to fold constants and also adds an edge for overflow exception
|
|
// returns the morphed tree
|
|
GenTree* Compiler::fgMorphCastIntoHelper(GenTree* tree, int helper, GenTree* oper)
|
|
{
|
|
GenTree* result;
|
|
|
|
/* If the operand is a constant, we'll try to fold it */
|
|
if (oper->OperIsConst())
|
|
{
|
|
GenTree* oldTree = tree;
|
|
|
|
tree = gtFoldExprConst(tree); // This may not fold the constant (NaN ...)
|
|
|
|
if (tree != oldTree)
|
|
{
|
|
return fgMorphTree(tree);
|
|
}
|
|
else if (tree->OperIsConst())
|
|
{
|
|
return fgMorphConst(tree);
|
|
}
|
|
|
|
// assert that oper is unchanged and that it is still a GT_CAST node
|
|
noway_assert(tree->AsCast()->CastOp() == oper);
|
|
noway_assert(tree->gtOper == GT_CAST);
|
|
}
|
|
result = fgMorphIntoHelperCall(tree, helper, true /* morphArgs */, oper);
|
|
assert(result == tree);
|
|
return result;
|
|
}
|
|
|
|
class SharedTempsScope
|
|
{
|
|
Compiler* m_comp;
|
|
ArrayStack<unsigned> m_usedSharedTemps;
|
|
ArrayStack<unsigned>* m_prevUsedSharedTemps;
|
|
|
|
public:
|
|
SharedTempsScope(Compiler* comp)
|
|
: m_comp(comp)
|
|
, m_usedSharedTemps(comp->getAllocator(CMK_CallArgs))
|
|
, m_prevUsedSharedTemps(comp->fgUsedSharedTemps)
|
|
{
|
|
comp->fgUsedSharedTemps = &m_usedSharedTemps;
|
|
}
|
|
|
|
~SharedTempsScope()
|
|
{
|
|
m_comp->fgUsedSharedTemps = m_prevUsedSharedTemps;
|
|
|
|
for (int i = 0; i < m_usedSharedTemps.Height(); i++)
|
|
{
|
|
m_comp->fgAvailableOutgoingArgTemps->setBit((indexType)m_usedSharedTemps.Top(i));
|
|
}
|
|
}
|
|
};
|
|
|
|
//------------------------------------------------------------------------
|
|
// fgMorphIntoHelperCall:
|
|
// Morph a node into a helper call, specifying up to two args and whether to
|
|
// call fgMorphArgs after.
|
|
//
|
|
// Parameters:
|
|
// tree - The node that is changed. This must be a large node.
|
|
// helper - The helper.
|
|
// morphArgs - Whether to call fgMorphArgs after adding the args.
|
|
// arg1, arg2 - Optional arguments to add to the call.
|
|
//
|
|
// Return value:
|
|
// The call (which is the same as `tree`).
|
|
//
|
|
GenTree* Compiler::fgMorphIntoHelperCall(GenTree* tree, int helper, bool morphArgs, GenTree* arg1, GenTree* arg2)
|
|
{
|
|
// The helper call ought to be semantically equivalent to the original node, so preserve its VN.
|
|
tree->ChangeOper(GT_CALL, GenTree::PRESERVE_VN);
|
|
|
|
GenTreeCall* call = tree->AsCall();
|
|
// Args are cleared by ChangeOper above
|
|
call->gtCallType = CT_HELPER;
|
|
call->gtReturnType = tree->TypeGet();
|
|
call->gtCallMethHnd = eeFindHelper(helper);
|
|
call->gtRetClsHnd = nullptr;
|
|
call->gtCallMoreFlags = GTF_CALL_M_EMPTY;
|
|
INDEBUG(call->gtCallDebugFlags = GTF_CALL_MD_EMPTY);
|
|
call->gtControlExpr = nullptr;
|
|
call->ClearInlineInfo();
|
|
#ifdef UNIX_X86_ABI
|
|
call->gtFlags |= GTF_CALL_POP_ARGS;
|
|
#endif // UNIX_X86_ABI
|
|
|
|
#if DEBUG
|
|
// Helper calls are never candidates.
|
|
call->gtInlineObservation = InlineObservation::CALLSITE_IS_CALL_TO_HELPER;
|
|
|
|
call->callSig = nullptr;
|
|
#endif // DEBUG
|
|
|
|
#ifdef FEATURE_READYTORUN
|
|
call->gtEntryPoint.addr = nullptr;
|
|
call->gtEntryPoint.accessType = IAT_VALUE;
|
|
#endif
|
|
|
|
#if FEATURE_MULTIREG_RET
|
|
call->ResetReturnType();
|
|
call->ClearOtherRegs();
|
|
call->ClearOtherRegFlags();
|
|
#ifndef TARGET_64BIT
|
|
if (varTypeIsLong(tree))
|
|
{
|
|
call->InitializeLongReturnType();
|
|
}
|
|
#endif // !TARGET_64BIT
|
|
#endif // FEATURE_MULTIREG_RET
|
|
|
|
if (call->OperMayThrow(this))
|
|
{
|
|
call->gtFlags |= GTF_EXCEPT;
|
|
}
|
|
else
|
|
{
|
|
call->gtFlags &= ~GTF_EXCEPT;
|
|
}
|
|
call->gtFlags |= GTF_CALL;
|
|
|
|
if (arg2 != nullptr)
|
|
{
|
|
call->gtArgs.PushFront(this, NewCallArg::Primitive(arg2));
|
|
call->gtFlags |= arg2->gtFlags & GTF_ALL_EFFECT;
|
|
}
|
|
|
|
if (arg1 != nullptr)
|
|
{
|
|
call->gtArgs.PushFront(this, NewCallArg::Primitive(arg1));
|
|
call->gtFlags |= arg1->gtFlags & GTF_ALL_EFFECT;
|
|
}
|
|
|
|
// Perform the morphing
|
|
|
|
if (morphArgs)
|
|
{
|
|
SharedTempsScope scope(this);
|
|
tree = fgMorphArgs(call);
|
|
}
|
|
|
|
return tree;
|
|
}
|
|
|
|
//------------------------------------------------------------------------
|
|
// fgMorphExpandCast: Performs the pre-order (required) morphing for a cast.
|
|
//
|
|
// Performs a rich variety of pre-order transformations (and some optimizations).
|
|
//
|
|
// Notably:
|
|
// 1. Splits long -> small type casts into long -> int -> small type
|
|
// for 32 bit targets. Does the same for float/double -> small type
|
|
// casts for all targets.
|
|
// 2. Morphs casts not supported by the target directly into helpers.
|
|
// These mostly have to do with casts from and to floating point
|
|
// types, especially checked ones. Refer to the implementation for
|
|
// what specific casts need to be handled - it is a complex matrix.
|
|
// 3. "Casts away" the GC-ness of a tree (for CAST(nint <- byref)) via
|
|
// storing the GC tree to an inline non-GC temporary.
|
|
// 3. "Pushes down" truncating long -> int casts for some operations:
|
|
// CAST(int <- MUL(long, long)) => MUL(CAST(int <- long), CAST(int <- long)).
|
|
// The purpose of this is to allow "optNarrowTree" in the post-order
|
|
// traversal to fold the tree into a TYP_INT one, which helps 32 bit
|
|
// targets (and AMD64 too since 32 bit instructions are more compact).
|
|
// TODO-Arm64-CQ: Re-evaluate the value of this optimization for ARM64.
|
|
//
|
|
// Arguments:
|
|
// tree - the cast tree to morph
|
|
//
|
|
// Return Value:
|
|
// The fully morphed tree, or "nullptr" if it needs further morphing,
|
|
// in which case the cast may be transformed into an unchecked one
|
|
// and its operand changed (the cast "expanded" into two).
|
|
//
|
|
GenTree* Compiler::fgMorphExpandCast(GenTreeCast* tree)
|
|
{
|
|
GenTree* oper = tree->CastOp();
|
|
var_types srcType = genActualType(oper);
|
|
var_types dstType = tree->CastToType();
|
|
unsigned dstSize = genTypeSize(dstType);
|
|
|
|
#if defined(TARGET_AMD64)
|
|
// If AVX512 is present, we have intrinsic available to convert
|
|
// ulong directly to float. Hence, we need to combine the 2 nodes
|
|
// GT_CAST(GT_CAST(TYP_ULONG, TYP_DOUBLE), TYP_FLOAT) into a single
|
|
// node i.e. GT_CAST(TYP_ULONG, TYP_FLOAT). At this point, we already
|
|
// have the 2 GT_CAST nodes in the tree and we are combining them below.
|
|
if (oper->OperIs(GT_CAST))
|
|
{
|
|
GenTreeCast* innerCast = oper->AsCast();
|
|
|
|
if (innerCast->IsUnsigned())
|
|
{
|
|
GenTree* innerOper = innerCast->CastOp();
|
|
var_types innerSrcType = genActualType(innerOper);
|
|
var_types innerDstType = innerCast->CastToType();
|
|
unsigned innerDstSize = genTypeSize(innerDstType);
|
|
innerSrcType = varTypeToUnsigned(innerSrcType);
|
|
|
|
// Check if we are going from ulong->double->float
|
|
if ((innerSrcType == TYP_ULONG) && (innerDstType == TYP_DOUBLE) && (dstType == TYP_FLOAT))
|
|
{
|
|
if (canUseEvexEncoding())
|
|
{
|
|
// One optimized (combined) cast here
|
|
tree = gtNewCastNode(TYP_FLOAT, innerOper, true, TYP_FLOAT);
|
|
return fgMorphTree(tree);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
#endif // TARGET_AMD64
|
|
|
|
// See if the cast has to be done in two steps. R -> I
|
|
if (varTypeIsFloating(srcType) && varTypeIsIntegral(dstType))
|
|
{
|
|
if (srcType == TYP_FLOAT
|
|
#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
|
|
// Arm64: src = float, dst is overflow conversion.
|
|
// This goes through helper and hence src needs to be converted to double.
|
|
&& tree->gtOverflow()
|
|
#elif defined(TARGET_AMD64)
|
|
// Amd64: src = float, dst = uint64 or overflow conversion.
|
|
// src needs to be converted to double except for the following cases
|
|
// dstType = int/uint/ulong for AVX512F
|
|
// dstType = int for SSE41
|
|
// For pre-SSE41, the all src is converted to TYP_DOUBLE
|
|
// and goes through helpers.
|
|
&& (tree->gtOverflow() || (dstType == TYP_LONG) ||
|
|
!(canUseEvexEncoding() || (dstType == TYP_INT && compOpportunisticallyDependsOn(InstructionSet_SSE41))))
|
|
#elif defined(TARGET_ARM)
|
|
// Arm: src = float, dst = int64/uint64 or overflow conversion.
|
|
&& (tree->gtOverflow() || varTypeIsLong(dstType))
|
|
#else
|
|
// x86: src = float, dst = uint32/int64/uint64 or overflow conversion.
|
|
&& (tree->gtOverflow() || varTypeIsIntegral(dstType))
|
|
#endif
|
|
)
|
|
{
|
|
oper = gtNewCastNode(TYP_DOUBLE, oper, false, TYP_DOUBLE);
|
|
}
|
|
|
|
// Do we need to do it in two steps R -> I -> smallType?
|
|
if (dstSize < genTypeSize(TYP_INT))
|
|
{
|
|
oper = gtNewCastNodeL(TYP_INT, oper, /* fromUnsigned */ false, TYP_INT);
|
|
oper->gtFlags |= (tree->gtFlags & (GTF_OVERFLOW | GTF_EXCEPT));
|
|
tree->AsCast()->CastOp() = oper;
|
|
// We must not mistreat the original cast, which was from a floating point type,
|
|
// as from an unsigned type, since we now have a TYP_INT node for the source and
|
|
// CAST_OVF(BYTE <- INT) != CAST_OVF(BYTE <- UINT).
|
|
assert(!tree->IsUnsigned());
|
|
}
|
|
else
|
|
{
|
|
if (!tree->gtOverflow())
|
|
{
|
|
// ARM64 and LoongArch64 optimize all non-overflow checking conversions
|
|
#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
|
|
return nullptr;
|
|
#else
|
|
#if defined(TARGET_AMD64)
|
|
// Following nodes are handled when lowering the nodes
|
|
// float -> ulong/uint/int for AVX512F
|
|
// double -> ulong/uint/long/int for AVX512F
|
|
// float -> int for SSE41
|
|
// double -> int/uint/long for SSE41
|
|
// For all other conversions, we use helper functions.
|
|
if (canUseEvexEncoding() ||
|
|
((dstType != TYP_ULONG) && compOpportunisticallyDependsOn(InstructionSet_SSE41)))
|
|
{
|
|
if (tree->CastOp() != oper)
|
|
{
|
|
tree->CastOp() = oper;
|
|
}
|
|
return nullptr;
|
|
}
|
|
#endif // TARGET_AMD64
|
|
switch (dstType)
|
|
{
|
|
case TYP_INT:
|
|
#ifdef TARGET_XARCH
|
|
return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2INT, oper);
|
|
#endif // TARGET_XARCH
|
|
return nullptr;
|
|
|
|
case TYP_UINT:
|
|
#if defined(TARGET_ARM)
|
|
return nullptr;
|
|
#endif
|
|
return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2UINT, oper);
|
|
|
|
case TYP_LONG:
|
|
return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2LNG, oper);
|
|
|
|
case TYP_ULONG:
|
|
return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2ULNG, oper);
|
|
default:
|
|
unreached();
|
|
}
|
|
#endif // TARGET_ARM64 || TARGET_LOONGARCH64 || TARGET_RISCV64
|
|
}
|
|
else
|
|
{
|
|
switch (dstType)
|
|
{
|
|
case TYP_INT:
|
|
return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2INT_OVF, oper);
|
|
case TYP_UINT:
|
|
return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2UINT_OVF, oper);
|
|
case TYP_LONG:
|
|
return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2LNG_OVF, oper);
|
|
case TYP_ULONG:
|
|
return fgMorphCastIntoHelper(tree, CORINFO_HELP_DBL2ULNG_OVF, oper);
|
|
default:
|
|
unreached();
|
|
}
|
|
}
|
|
}
|
|
}
|
|
#ifndef TARGET_64BIT
|
|
// The code generation phase (for x86 & ARM32) does not handle casts
|
|
// directly from [u]long to anything other than [u]int. Insert an
|
|
// intermediate cast to native int.
|
|
else if (varTypeIsLong(srcType) && varTypeIsSmall(dstType))
|
|
{
|
|
oper = gtNewCastNode(TYP_I_IMPL, oper, tree->IsUnsigned(), TYP_I_IMPL);
|
|
oper->gtFlags |= (tree->gtFlags & (GTF_OVERFLOW | GTF_EXCEPT));
|
|
tree->ClearUnsigned();
|
|
tree->AsCast()->CastOp() = oper;
|
|
}
|
|
#endif //! TARGET_64BIT
|
|
|
|
#ifdef TARGET_ARMARCH
|
|
// AArch, unlike x86/amd64, has instructions that can cast directly from
|
|
// all integers (except for longs on AArch32 of course) to floats.
|
|
// Because there is no IL instruction conv.r4.un, uint/ulong -> float
|
|
// casts are always imported as CAST(float <- CAST(double <- uint/ulong)).
|
|
// We can eliminate the redundant intermediate cast as an optimization.
|
|
else if ((dstType == TYP_FLOAT) && (srcType == TYP_DOUBLE) && oper->OperIs(GT_CAST)
|
|
#ifdef TARGET_ARM
|
|
&& !varTypeIsLong(oper->AsCast()->CastOp())
|
|
#endif
|
|
)
|
|
{
|
|
oper->gtType = TYP_FLOAT;
|
|
oper->CastToType() = TYP_FLOAT;
|
|
|
|
return fgMorphTree(oper);
|
|
}
|
|
#endif // TARGET_ARMARCH
|
|
|
|
#ifdef TARGET_ARM
|
|
// converts long/ulong --> float/double casts into helper calls.
|
|
else if (varTypeIsFloating(dstType) && varTypeIsLong(srcType))
|
|
{
|
|
if (dstType == TYP_FLOAT)
|
|
{
|
|
// there is only a double helper, so we
|
|
// - change the dsttype to double
|
|
// - insert a cast from double to float
|
|
// - recurse into the resulting tree
|
|
tree->CastToType() = TYP_DOUBLE;
|
|
tree->gtType = TYP_DOUBLE;
|
|
|
|
tree = gtNewCastNode(TYP_FLOAT, tree, false, TYP_FLOAT);
|
|
|
|
return fgMorphTree(tree);
|
|
}
|
|
if (tree->gtFlags & GTF_UNSIGNED)
|
|
return fgMorphCastIntoHelper(tree, CORINFO_HELP_ULNG2DBL, oper);
|
|
return fgMorphCastIntoHelper(tree, CORINFO_HELP_LNG2DBL, oper);
|
|
}
|
|
#endif // TARGET_ARM
|
|
|
|
#ifdef TARGET_AMD64
|
|
// Do we have to do two step U4/8 -> R4/8 ?
|
|
// Codegen supports the following conversion as one-step operation
|
|
// a) Long -> R4/R8
|
|
// b) U8 -> R8
|
|
//
|
|
// The following conversions are performed as two-step operations using above.
|
|
// U4 -> R4/8 = U4-> Long -> R4/8
|
|
// U8 -> R4 = U8 -> R8 -> R4
|
|
else if (tree->IsUnsigned() && varTypeIsFloating(dstType))
|
|
{
|
|
srcType = varTypeToUnsigned(srcType);
|
|
|
|
if (srcType == TYP_ULONG && !canUseEvexEncoding())
|
|
{
|
|
if (dstType == TYP_FLOAT)
|
|
{
|
|
// Codegen can handle U8 -> R8 conversion.
|
|
// U8 -> R4 = U8 -> R8 -> R4
|
|
// - change the dsttype to double
|
|
// - insert a cast from double to float
|
|
// - recurse into the resulting tree
|
|
tree->CastToType() = TYP_DOUBLE;
|
|
tree->gtType = TYP_DOUBLE;
|
|
tree = gtNewCastNode(TYP_FLOAT, tree, false, TYP_FLOAT);
|
|
|
|
return fgMorphTree(tree);
|
|
}
|
|
}
|
|
else if (srcType == TYP_UINT)
|
|
{
|
|
oper = gtNewCastNode(TYP_LONG, oper, true, TYP_LONG);
|
|
oper->gtFlags |= (tree->gtFlags & (GTF_OVERFLOW | GTF_EXCEPT));
|
|
tree->ClearUnsigned();
|
|
tree->CastOp() = oper;
|
|
}
|
|
}
|
|
#endif // TARGET_AMD64
|
|
|
|
#ifdef TARGET_X86
|
|
// Do we have to do two step U4/8 -> R4/8 ?
|
|
else if (tree->IsUnsigned() && varTypeIsFloating(dstType))
|
|
{
|
|
srcType = varTypeToUnsigned(srcType);
|
|
|
|
if (srcType == TYP_ULONG)
|
|
{
|
|
return fgMorphCastIntoHelper(tree, CORINFO_HELP_ULNG2DBL, oper);
|
|
}
|
|
else if (srcType == TYP_UINT)
|
|
{
|
|
oper = gtNewCastNode(TYP_LONG, oper, true, TYP_LONG);
|
|
oper->gtFlags |= (tree->gtFlags & (GTF_OVERFLOW | GTF_EXCEPT));
|
|
tree->gtFlags &= ~GTF_UNSIGNED;
|
|
return fgMorphCastIntoHelper(tree, CORINFO_HELP_LNG2DBL, oper);
|
|
}
|
|
}
|
|
else if (((tree->gtFlags & GTF_UNSIGNED) == 0) && (srcType == TYP_LONG) && varTypeIsFloating(dstType))
|
|
{
|
|
oper = fgMorphCastIntoHelper(tree, CORINFO_HELP_LNG2DBL, oper);
|
|
|
|
// Since we don't have a Jit Helper that converts to a TYP_FLOAT
|
|
// we just use the one that converts to a TYP_DOUBLE
|
|
// and then add a cast to TYP_FLOAT
|
|
//
|
|
if ((dstType == TYP_FLOAT) && (oper->OperGet() == GT_CALL))
|
|
{
|
|
// Fix the return type to be TYP_DOUBLE
|
|
//
|
|
oper->gtType = TYP_DOUBLE;
|
|
|
|
// Add a Cast to TYP_FLOAT
|
|
//
|
|
tree = gtNewCastNode(TYP_FLOAT, oper, false, TYP_FLOAT);
|
|
INDEBUG(tree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED);
|
|
|
|
return tree;
|
|
}
|
|
else
|
|
{
|
|
return oper;
|
|
}
|
|
}
|
|
#endif // TARGET_X86
|
|
else if (varTypeIsGC(srcType) != varTypeIsGC(dstType))
|
|
{
|
|
// We are casting away GC information. we would like to just
|
|
// change the type to int, however this gives the emitter fits because
|
|
// it believes the variable is a GC variable at the beginning of the
|
|
// instruction group, but is not turned non-gc by the code generator
|
|
// we fix this by copying the GC pointer to a non-gc pointer temp.
|
|
noway_assert(!varTypeIsGC(dstType) && "How can we have a cast to a GCRef here?");
|
|
|
|
// We generate a store to an int and then do the cast from an int. With this we avoid
|
|
// the gc problem and we allow casts to bytes, longs, etc...
|
|
unsigned lclNum = lvaGrabTemp(true DEBUGARG("Cast away GC"));
|
|
oper->gtType = TYP_I_IMPL;
|
|
GenTree* store = gtNewTempStore(lclNum, oper);
|
|
oper->gtType = srcType;
|
|
|
|
// do the real cast
|
|
GenTree* cast = gtNewCastNode(tree->TypeGet(), gtNewLclvNode(lclNum, TYP_I_IMPL), false, dstType);
|
|
|
|
// Generate the comma tree
|
|
oper = gtNewOperNode(GT_COMMA, tree->TypeGet(), store, cast);
|
|
|
|
return fgMorphTree(oper);
|
|
}
|
|
|
|
// Look for narrowing casts ([u]long -> [u]int) and try to push them
|
|
// down into the operand before morphing it.
|
|
//
|
|
// It doesn't matter if this is cast is from ulong or long (i.e. if
|
|
// GTF_UNSIGNED is set) because the transformation is only applied to
|
|
// overflow-insensitive narrowing casts, which always silently truncate.
|
|
//
|
|
// Note that casts from [u]long to small integer types are handled above.
|
|
if ((srcType == TYP_LONG) && ((dstType == TYP_INT) || (dstType == TYP_UINT)))
|
|
{
|
|
// As a special case, look for overflow-sensitive casts of an AND
|
|
// expression, and see if the second operand is a small constant. Since
|
|
// the result of an AND is bound by its smaller operand, it may be
|
|
// possible to prove that the cast won't overflow, which will in turn
|
|
// allow the cast's operand to be transformed.
|
|
if (tree->gtOverflow() && (oper->OperGet() == GT_AND))
|
|
{
|
|
GenTree* andOp2 = oper->AsOp()->gtOp2;
|
|
|
|
// Look for a constant less than 2^{32} for a cast to uint, or less
|
|
// than 2^{31} for a cast to int.
|
|
int maxWidth = (dstType == TYP_UINT) ? 32 : 31;
|
|
|
|
if ((andOp2->OperGet() == GT_CNS_NATIVELONG) && ((andOp2->AsIntConCommon()->LngValue() >> maxWidth) == 0))
|
|
{
|
|
tree->ClearOverflow();
|
|
tree->SetAllEffectsFlags(oper);
|
|
}
|
|
}
|
|
|
|
// Only apply this transformation during global morph,
|
|
// when neither the cast node nor the oper node may throw an exception
|
|
// based on the upper 32 bits.
|
|
//
|
|
if (fgGlobalMorph && !tree->gtOverflow() && !oper->gtOverflowEx())
|
|
{
|
|
// For these operations the lower 32 bits of the result only depends
|
|
// upon the lower 32 bits of the operands.
|
|
//
|
|
bool canPushCast = oper->OperIs(GT_ADD, GT_SUB, GT_MUL, GT_AND, GT_OR, GT_XOR, GT_NOT, GT_NEG);
|
|
|
|
// For long LSH cast to int, there is a discontinuity in behavior
|
|
// when the shift amount is 32 or larger.
|
|
//
|
|
// CAST(INT, LSH(1LL, 31)) == LSH(1, 31)
|
|
// LSH(CAST(INT, 1LL), CAST(INT, 31)) == LSH(1, 31)
|
|
//
|
|
// CAST(INT, LSH(1LL, 32)) == 0
|
|
// LSH(CAST(INT, 1LL), CAST(INT, 32)) == LSH(1, 32) == LSH(1, 0) == 1
|
|
//
|
|
// So some extra validation is needed.
|
|
//
|
|
if (oper->OperIs(GT_LSH))
|
|
{
|
|
GenTree* shiftAmount = oper->AsOp()->gtOp2;
|
|
|
|
// Expose constant value for shift, if possible, to maximize the number
|
|
// of cases we can handle.
|
|
shiftAmount = gtFoldExpr(shiftAmount);
|
|
oper->AsOp()->gtOp2 = shiftAmount;
|
|
|
|
#if DEBUG
|
|
// We may remorph the shift amount tree again later, so clear any morphed flag.
|
|
shiftAmount->gtDebugFlags &= ~GTF_DEBUG_NODE_MORPHED;
|
|
#endif // DEBUG
|
|
|
|
if (shiftAmount->IsIntegralConst())
|
|
{
|
|
const ssize_t shiftAmountValue = shiftAmount->AsIntCon()->IconValue();
|
|
|
|
if ((shiftAmountValue >= 64) || (shiftAmountValue < 0))
|
|
{
|
|
// Shift amount is large enough or negative so result is undefined.
|
|
// Don't try to optimize.
|
|
assert(!canPushCast);
|
|
}
|
|
else if (shiftAmountValue >= 32)
|
|
{
|
|
// We know that we have a narrowing cast ([u]long -> [u]int)
|
|
// and that we are casting to a 32-bit value, which will result in zero.
|
|
//
|
|
// Check to see if we have any side-effects that we must keep
|
|
//
|
|
if ((tree->gtFlags & GTF_ALL_EFFECT) == 0)
|
|
{
|
|
// Result of the shift is zero.
|
|
DEBUG_DESTROY_NODE(tree);
|
|
GenTree* zero = gtNewZeroConNode(TYP_INT);
|
|
return fgMorphTree(zero);
|
|
}
|
|
else // We do have a side-effect
|
|
{
|
|
// We could create a GT_COMMA node here to keep the side-effect and return a zero
|
|
// Instead we just don't try to optimize this case.
|
|
canPushCast = false;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// Shift amount is positive and small enough that we can push the cast through.
|
|
canPushCast = true;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// Shift amount is unknown. We can't optimize this case.
|
|
assert(!canPushCast);
|
|
}
|
|
}
|
|
|
|
if (canPushCast)
|
|
{
|
|
GenTree* op1 = oper->gtGetOp1();
|
|
GenTree* op2 = oper->gtGetOp2IfPresent();
|
|
|
|
canPushCast = !varTypeIsGC(op1) && ((op2 == nullptr) || !varTypeIsGC(op2));
|
|
}
|
|
|
|
if (canPushCast)
|
|
{
|
|
DEBUG_DESTROY_NODE(tree);
|
|
|
|
// Insert narrowing casts for op1 and op2.
|
|
oper->AsOp()->gtOp1 = gtNewCastNode(TYP_INT, oper->AsOp()->gtOp1, false, dstType);
|
|
if (oper->AsOp()->gtOp2 != nullptr)
|
|
{
|
|
oper->AsOp()->gtOp2 = gtNewCastNode(TYP_INT, oper->AsOp()->gtOp2, false, dstType);
|
|
}
|
|
|
|
// Clear the GT_MUL_64RSLT if it is set.
|
|
if (oper->gtOper == GT_MUL && (oper->gtFlags & GTF_MUL_64RSLT))
|
|
{
|
|
oper->gtFlags &= ~GTF_MUL_64RSLT;
|
|
}
|
|
|
|
// The operation now produces a 32-bit result.
|
|
oper->gtType = TYP_INT;
|
|
|
|
// Remorph the new tree as the casts that we added may be folded away.
|
|
return fgMorphTree(oper);
|
|
}
|
|
}
|
|
}
|
|
|
|
return nullptr;
|
|
}
|
|
|
|
#ifdef DEBUG
|
|
|
|
//------------------------------------------------------------------------
|
|
// getWellKnownArgName: Get a string representation of a WellKnownArg.
|
|
//
|
|
const char* getWellKnownArgName(WellKnownArg arg)
|
|
{
|
|
switch (arg)
|
|
{
|
|
case WellKnownArg::None:
|
|
return "None";
|
|
case WellKnownArg::ThisPointer:
|
|
return "ThisPointer";
|
|
case WellKnownArg::VarArgsCookie:
|
|
return "VarArgsCookie";
|
|
case WellKnownArg::InstParam:
|
|
return "InstParam";
|
|
case WellKnownArg::RetBuffer:
|
|
return "RetBuffer";
|
|
case WellKnownArg::PInvokeFrame:
|
|
return "PInvokeFrame";
|
|
case WellKnownArg::WrapperDelegateCell:
|
|
return "WrapperDelegateCell";
|
|
case WellKnownArg::ShiftLow:
|
|
return "ShiftLow";
|
|
case WellKnownArg::ShiftHigh:
|
|
return "ShiftHigh";
|
|
case WellKnownArg::VirtualStubCell:
|
|
return "VirtualStubCell";
|
|
case WellKnownArg::PInvokeCookie:
|
|
return "PInvokeCookie";
|
|
case WellKnownArg::PInvokeTarget:
|
|
return "PInvokeTarget";
|
|
case WellKnownArg::R2RIndirectionCell:
|
|
return "R2RIndirectionCell";
|
|
case WellKnownArg::ValidateIndirectCallTarget:
|
|
return "ValidateIndirectCallTarget";
|
|
case WellKnownArg::DispatchIndirectCallTarget:
|
|
return "DispatchIndirectCallTarget";
|
|
case WellKnownArg::SwiftError:
|
|
return "SwiftError";
|
|
case WellKnownArg::SwiftSelf:
|
|
return "SwiftSelf";
|
|
case WellKnownArg::X86TailCallSpecialArg:
|
|
return "X86TailCallSpecialArg";
|
|
}
|
|
|
|
return "N/A";
|
|
}
|
|
|
|
//------------------------------------------------------------------------
|
|
// Dump: Dump information about a CallArg to jitstdout.
|
|
//
|
|
void CallArg::Dump(Compiler* comp)
|
|
{
|
|
printf("CallArg[[%06u].%s", comp->dspTreeID(GetNode()), GenTree::OpName(GetNode()->OperGet()));
|
|
printf(" %s", varTypeName(m_signatureType));
|
|
printf(" (%s)", AbiInfo.PassedByRef ? "By ref" : "By value");
|
|
if (AbiInfo.GetRegNum() != REG_STK)
|
|
{
|
|
printf(", %u reg%s:", AbiInfo.NumRegs, AbiInfo.NumRegs == 1 ? "" : "s");
|
|
for (unsigned i = 0; i < AbiInfo.NumRegs; i++)
|
|
{
|
|
printf(" %s", getRegName(AbiInfo.GetRegNum(i)));
|
|
}
|
|
}
|
|
if (AbiInfo.GetStackByteSize() > 0)
|
|
{
|
|
printf(", byteSize=%u, byteOffset=%u", AbiInfo.ByteSize, AbiInfo.ByteOffset);
|
|
}
|
|
if (GetLateNode() != nullptr)
|
|
{
|
|
printf(", isLate");
|
|
}
|
|
if (AbiInfo.IsSplit())
|
|
{
|
|
printf(", isSplit");
|
|
}
|
|
if (m_needPlace)
|
|
{
|
|
printf(", needPlace");
|
|
}
|
|
if (m_processed)
|
|
{
|
|
printf(", processed");
|
|
}
|
|
if (AbiInfo.IsHfaRegArg())
|
|
{
|
|
printf(", isHfa(%s)", varTypeName(AbiInfo.GetHfaType()));
|
|
}
|
|
if (m_wellKnownArg != WellKnownArg::None)
|
|
{
|
|
printf(", wellKnown[%s]", getWellKnownArgName(m_wellKnownArg));
|
|
}
|
|
printf("]\n");
|
|
}
|
|
#endif
|
|
|
|
//------------------------------------------------------------------------
|
|
// ArgsComplete: Make final decisions on which arguments to evaluate into temporaries.
|
|
//
|
|
void CallArgs::ArgsComplete(Compiler* comp, GenTreeCall* call)
|
|
{
|
|
unsigned argCount = CountArgs();
|
|
|
|
// Previous argument with GTF_EXCEPT
|
|
GenTree* prevExceptionTree = nullptr;
|
|
// Exceptions previous tree with GTF_EXCEPT may throw (computed lazily, may
|
|
// be empty)
|
|
ExceptionSetFlags prevExceptionFlags = ExceptionSetFlags::None;
|
|
|
|
for (CallArg& arg : Args())
|
|
{
|
|
GenTree* argx = arg.GetEarlyNode();
|
|
assert(argx != nullptr);
|
|
|
|
bool canEvalToTemp = true;
|
|
if (arg.AbiInfo.GetRegNum() == REG_STK)
|
|
{
|
|
assert(m_hasStackArgs);
|
|
#if !FEATURE_FIXED_OUT_ARGS
|
|
// Non-register arguments are evaluated and pushed in order; they
|
|
// should never go in the late arg list.
|
|
canEvalToTemp = false;
|
|
#endif
|
|
}
|
|
#if FEATURE_ARG_SPLIT
|
|
else if (arg.AbiInfo.IsSplit())
|
|
{
|
|
assert(m_hasStackArgs);
|
|
}
|
|
#endif // FEATURE_ARG_SPLIT
|
|
|
|
// If the argument tree contains a store (GTF_ASG) then the argument and
|
|
// and every earlier argument (except constants) must be evaluated into temps
|
|
// since there may be other arguments that follow and they may use the value being defined.
|
|
//
|
|
// EXAMPLE: ArgTab is "a, a=5, a"
|
|
// -> when we see the second arg "a=5"
|
|
// we know the first two arguments "a, a=5" have to be evaluated into temps
|
|
//
|
|
if ((argx->gtFlags & GTF_ASG) != 0)
|
|
{
|
|
// fgMakeOutgoingStructArgCopy can have introduced a temp already,
|
|
// in which case it will have created a setup node in the early
|
|
// node.
|
|
if (!argx->IsValue())
|
|
{
|
|
assert(arg.m_needTmp);
|
|
}
|
|
else if (canEvalToTemp && (argCount > 1))
|
|
{
|
|
SetNeedsTemp(&arg);
|
|
}
|
|
|
|
// For all previous arguments that may interfere with the store we
|
|
// require that they be evaluated into temps.
|
|
for (CallArg& prevArg : Args())
|
|
{
|
|
if (&prevArg == &arg)
|
|
{
|
|
break;
|
|
}
|
|
|
|
#if !FEATURE_FIXED_OUT_ARGS
|
|
if (prevArg.AbiInfo.GetRegNum() == REG_STK)
|
|
{
|
|
// All stack args are already evaluated and placed in order
|
|
// in this case.
|
|
continue;
|
|
}
|
|
#endif
|
|
|
|
if ((prevArg.GetEarlyNode() == nullptr) || prevArg.m_needTmp)
|
|
{
|
|
continue;
|
|
}
|
|
|
|
if (((prevArg.GetEarlyNode()->gtFlags & GTF_ALL_EFFECT) != 0) ||
|
|
comp->gtMayHaveStoreInterference(argx, prevArg.GetEarlyNode()))
|
|
{
|
|
SetNeedsTemp(&prevArg);
|
|
}
|
|
}
|
|
}
|
|
|
|
bool treatLikeCall = ((argx->gtFlags & GTF_CALL) != 0);
|
|
|
|
ExceptionSetFlags exceptionFlags = ExceptionSetFlags::None;
|
|
#if FEATURE_FIXED_OUT_ARGS
|
|
// Like calls, if this argument has a tree that will do an inline throw,
|
|
// a call to a jit helper, then we need to treat it like a call (but only
|
|
// if there are/were any stack args).
|
|
// This means unnesting, sorting, etc. Technically this is overly
|
|
// conservative, but I want to avoid as much special-case debug-only code
|
|
// as possible, so leveraging the GTF_CALL flag is the easiest.
|
|
//
|
|
if (!treatLikeCall && (argx->gtFlags & GTF_EXCEPT) && (argCount > 1) && comp->opts.compDbgCode)
|
|
{
|
|
exceptionFlags = comp->gtCollectExceptions(argx);
|
|
if ((exceptionFlags & (ExceptionSetFlags::IndexOutOfRangeException |
|
|
ExceptionSetFlags::OverflowException)) != ExceptionSetFlags::None)
|
|
{
|
|
for (CallArg& otherArg : Args())
|
|
{
|
|
if (&otherArg == &arg)
|
|
{
|
|
continue;
|
|
}
|
|
|
|
if (otherArg.AbiInfo.GetRegNum() == REG_STK)
|
|
{
|
|
treatLikeCall = true;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
#endif // FEATURE_FIXED_OUT_ARGS
|
|
|
|
// If it contains a call (GTF_CALL) then itself and everything before the call
|
|
// with a GLOB_EFFECT must eval to temp (this is because everything with SIDE_EFFECT
|
|
// has to be kept in the right order since we will move the call to the first position)
|
|
|
|
// For calls we don't have to be quite as conservative as we are with stores
|
|
// since the call won't be modifying any non-address taken LclVars.
|
|
|
|
if (treatLikeCall)
|
|
{
|
|
if (canEvalToTemp)
|
|
{
|
|
if (argCount > 1) // If this is not the only argument
|
|
{
|
|
SetNeedsTemp(&arg);
|
|
}
|
|
else if (varTypeIsFloating(argx->TypeGet()) && (argx->OperGet() == GT_CALL))
|
|
{
|
|
// Spill all arguments that are floating point calls
|
|
SetNeedsTemp(&arg);
|
|
}
|
|
}
|
|
|
|
// All previous arguments may need to be evaluated into temps
|
|
for (CallArg& prevArg : Args())
|
|
{
|
|
if (&prevArg == &arg)
|
|
{
|
|
break;
|
|
}
|
|
|
|
#if !FEATURE_FIXED_OUT_ARGS
|
|
if (prevArg.AbiInfo.GetRegNum() == REG_STK)
|
|
{
|
|
// All stack args are already evaluated and placed in order
|
|
// in this case.
|
|
continue;
|
|
}
|
|
#endif
|
|
|
|
// For all previous arguments, if they have any GTF_ALL_EFFECT
|
|
// we require that they be evaluated into a temp
|
|
if ((prevArg.GetEarlyNode() != nullptr) && ((prevArg.GetEarlyNode()->gtFlags & GTF_ALL_EFFECT) != 0))
|
|
{
|
|
SetNeedsTemp(&prevArg);
|
|
}
|
|
#if FEATURE_FIXED_OUT_ARGS
|
|
// Or, if they are stored into the FIXED_OUT_ARG area
|
|
// we require that they be moved to the late list
|
|
else if (prevArg.AbiInfo.GetRegNum() == REG_STK)
|
|
{
|
|
prevArg.m_needPlace = true;
|
|
}
|
|
#if FEATURE_ARG_SPLIT
|
|
else if (prevArg.AbiInfo.IsSplit())
|
|
{
|
|
prevArg.m_needPlace = true;
|
|
}
|
|
#endif // FEATURE_ARG_SPLIT
|
|
#endif
|
|
}
|
|
}
|
|
else if ((argx->gtFlags & GTF_EXCEPT) != 0)
|
|
{
|
|
// If a previous arg may throw a different exception than this arg
|
|
// then we evaluate all previous arguments with GTF_EXCEPT to temps
|
|
// to avoid reordering them in our sort later.
|
|
if (prevExceptionTree != nullptr)
|
|
{
|
|
if (prevExceptionFlags == ExceptionSetFlags::None)
|
|
{
|
|
prevExceptionFlags = comp->gtCollectExceptions(prevExceptionTree);
|
|
}
|
|
|
|
if (exceptionFlags == ExceptionSetFlags::None)
|
|
{
|
|
exceptionFlags = comp->gtCollectExceptions(argx);
|
|
}
|
|
|
|
bool exactlyOne = isPow2(static_cast<unsigned>(exceptionFlags));
|
|
bool throwsSameAsPrev = exactlyOne && (exceptionFlags == prevExceptionFlags);
|
|
if (!throwsSameAsPrev)
|
|
{
|
|
JITDUMP("Exception set for arg [%06u] interferes with previous tree [%06u]; must evaluate previous "
|
|
"trees with exceptions to temps\n",
|
|
Compiler::dspTreeID(argx), Compiler::dspTreeID(prevExceptionTree));
|
|
|
|
for (CallArg& prevArg : Args())
|
|
{
|
|
if (&prevArg == &arg)
|
|
{
|
|
break;
|
|
}
|
|
|
|
#if !FEATURE_FIXED_OUT_ARGS
|
|
if (prevArg.AbiInfo.GetRegNum() == REG_STK)
|
|
{
|
|
// All stack args are already evaluated and placed in order
|
|
// in this case.
|
|
continue;
|
|
}
|
|
#endif
|
|
// Invariant here is that all nodes that were not
|
|
// already evaluated into temps and that throw can only
|
|
// be throwing the same single exception as the
|
|
// previous tree, so all of them interfere in the same
|
|
// way with the current arg and must be evaluated
|
|
// early.
|
|
if ((prevArg.GetEarlyNode() != nullptr) &&
|
|
((prevArg.GetEarlyNode()->gtFlags & GTF_EXCEPT) != 0))
|
|
{
|
|
SetNeedsTemp(&prevArg);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
prevExceptionTree = argx;
|
|
prevExceptionFlags = exceptionFlags;
|
|
}
|
|
|
|
#if FEATURE_MULTIREG_ARGS
|
|
// In "fgMorphMultiRegStructArg" we will expand the arg into a GT_FIELD_LIST with multiple indirections, so
|
|
// here we consider spilling it into a local. We also need to spill it in case we have a node that we do not
|
|
// currently handle in multi-reg morphing.
|
|
//
|
|
if (varTypeIsStruct(argx) && !arg.m_needTmp)
|
|
{
|
|
if ((arg.AbiInfo.NumRegs > 0) && ((arg.AbiInfo.NumRegs + arg.AbiInfo.GetStackSlotsNumber()) > 1))
|
|
{
|
|
if ((argx->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) != 0)
|
|
{
|
|
// Spill multireg struct arguments that have stores or calls embedded in them.
|
|
SetNeedsTemp(&arg);
|
|
}
|
|
else if (!argx->OperIsLocalRead() && !argx->OperIsLoad())
|
|
{
|
|
// TODO-CQ: handle HWI/SIMD/COMMA nodes in multi-reg morphing.
|
|
SetNeedsTemp(&arg);
|
|
}
|
|
else
|
|
{
|
|
// Finally, we call gtPrepareCost to measure the cost of evaluating this tree.
|
|
comp->gtPrepareCost(argx);
|
|
|
|
if (argx->GetCostEx() > (6 * IND_COST_EX))
|
|
{
|
|
// Spill multireg struct arguments that are expensive to evaluate twice.
|
|
SetNeedsTemp(&arg);
|
|
}
|
|
}
|
|
}
|
|
|
|
// We are only able to expand certain "BLK"s into field lists, so here we spill all the
|
|
// "mis-sized" ones. We could in theory support them directly with some arithmetic and
|
|
// shifts, but these cases are rare enough that it is probably not worth the complexity.
|
|
// No need to do this for stack args as they are directly supported by codegen.
|
|
//
|
|
if (argx->OperIs(GT_BLK) && (arg.AbiInfo.GetRegNum() != REG_STK))
|
|
{
|
|
GenTreeBlk* argObj = argx->AsBlk();
|
|
unsigned structSize = argObj->Size();
|
|
unsigned lastLoadSize = structSize % TARGET_POINTER_SIZE;
|
|
|
|
if ((lastLoadSize != 0) && !isPow2(lastLoadSize))
|
|
{
|
|
#ifdef TARGET_ARM
|
|
// On ARM we don't expand split args larger than 16 bytes into field lists.
|
|
if (!arg.AbiInfo.IsSplit() || (structSize <= 16))
|
|
#endif // TARGET_ARM
|
|
{
|
|
SetNeedsTemp(&arg);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
#endif // FEATURE_MULTIREG_ARGS
|
|
}
|
|
|
|
#if FEATURE_FIXED_OUT_ARGS
|
|
|
|
// For Arm/x64 we only care because we can't reorder a register
|
|
// argument that uses GT_LCLHEAP. This is an optimization to
|
|
// save a check inside the below loop.
|
|
//
|
|
const bool hasStackArgsWeCareAbout = m_hasStackArgs && comp->compLocallocUsed;
|
|
|
|
#else
|
|
|
|
const bool hasStackArgsWeCareAbout = m_hasStackArgs;
|
|
|
|
#endif // FEATURE_FIXED_OUT_ARGS
|
|
|
|
// If we have any stack args we have to force the evaluation
|
|
// of any arguments passed in registers that might throw an exception
|
|
//
|
|
// Technically we only a required to handle the following two cases:
|
|
// a GT_IND with GTF_IND_RNGCHK (only on x86) or
|
|
// a GT_LCLHEAP node that allocates stuff on the stack
|
|
//
|
|
if (hasStackArgsWeCareAbout)
|
|
{
|
|
for (CallArg& arg : EarlyArgs())
|
|
{
|
|
GenTree* argx = arg.GetEarlyNode();
|
|
assert(!comp->gtTreeContainsOper(argx, GT_QMARK));
|
|
|
|
// Examine the register args that are currently not marked needTmp
|
|
//
|
|
if (!arg.m_needTmp && (arg.AbiInfo.GetRegNum() != REG_STK))
|
|
{
|
|
if (hasStackArgsWeCareAbout)
|
|
{
|
|
#if !FEATURE_FIXED_OUT_ARGS
|
|
// On x86 we previously recorded a stack depth of zero when
|
|
// morphing the register arguments of any GT_IND with a GTF_IND_RNGCHK flag
|
|
// Thus we can not reorder the argument after any stack based argument
|
|
// (Note that GT_LCLHEAP sets the GTF_EXCEPT flag so we don't need to
|
|
// check for it explicitly.)
|
|
//
|
|
if (argx->gtFlags & GTF_EXCEPT)
|
|
{
|
|
SetNeedsTemp(&arg);
|
|
continue;
|
|
}
|
|
#else
|
|
// For Arm/X64 we can't reorder a register argument that uses a GT_LCLHEAP
|
|
//
|
|
if (argx->gtFlags & GTF_EXCEPT)
|
|
{
|
|
assert(comp->compLocallocUsed);
|
|
|
|
if (comp->gtTreeContainsOper(argx, GT_LCLHEAP))
|
|
{
|
|
SetNeedsTemp(&arg);
|
|
continue;
|
|
}
|
|
}
|
|
#endif
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// When CFG is enabled and this is a delegate call or vtable call we must
|
|
// compute the call target before all late args. However this will
|
|
// effectively null-check 'this', which should happen only after all
|
|
// arguments are evaluated. Thus we must evaluate all args with side
|
|
// effects to a temp.
|
|
if (comp->opts.IsCFGEnabled() && (call->IsVirtualVtable() || call->IsDelegateInvoke()))
|
|
{
|
|
// Always evaluate 'this' to temp.
|
|
assert(HasThisPointer());
|
|
SetNeedsTemp(GetThisArg());
|
|
|
|
for (CallArg& arg : EarlyArgs())
|
|
{
|
|
if ((arg.GetEarlyNode()->gtFlags & GTF_ALL_EFFECT) != 0)
|
|
{
|
|
SetNeedsTemp(&arg);
|
|
}
|
|
}
|
|
}
|
|
|
|
m_argsComplete = true;
|
|
}
|
|
|
|
//------------------------------------------------------------------------
|
|
// SortArgs: Sort arguments into a better passing order.
|
|
//
|
|
// Parameters:
|
|
// comp - The compiler object.
|
|
// call - The call that contains this CallArgs instance.
|
|
// sortedArgs - A table of at least `CountArgs()` entries where the sorted
|
|
// arguments are written into.
|
|
//
|
|
void CallArgs::SortArgs(Compiler* comp, GenTreeCall* call, CallArg** sortedArgs)
|
|
{
|
|
assert(m_argsComplete);
|
|
|
|
JITDUMP("\nSorting the arguments:\n");
|
|
|
|
// Shuffle the arguments around before we build the late args list. The
|
|
// idea is to move all "simple" arguments like constants and local vars to
|
|
// the end, and move the complex arguments towards the beginning. This will
|
|
// help prevent registers from being spilled by allowing us to evaluate the
|
|
// more complex arguments before the simpler arguments. We use the late
|
|
// list to keep the sorted result at this point, and the ordering ends up
|
|
// looking like:
|
|
// +------------------------------------+ <--- end of sortedArgs
|
|
// | constants |
|
|
// +------------------------------------+
|
|
// | local var / local field |
|
|
// +------------------------------------+
|
|
// | remaining arguments sorted by cost |
|
|
// +------------------------------------+
|
|
// | temps (CallArg::m_needTmp == true) |
|
|
// +------------------------------------+
|
|
// | args with calls (GTF_CALL) |
|
|
// +------------------------------------+ <--- start of sortedArgs
|
|
//
|
|
|
|
unsigned argCount = 0;
|
|
for (CallArg& arg : Args())
|
|
{
|
|
sortedArgs[argCount++] = &arg;
|
|
}
|
|
|
|
// Set the beginning and end for the new argument table
|
|
unsigned curInx;
|
|
int regCount = 0;
|
|
unsigned begTab = 0;
|
|
unsigned endTab = argCount - 1;
|
|
unsigned argsRemaining = argCount;
|
|
|
|
// First take care of arguments that are constants.
|
|
// [We use a backward iterator pattern]
|
|
//
|
|
curInx = argCount;
|
|
do
|
|
{
|
|
curInx--;
|
|
|
|
CallArg* arg = sortedArgs[curInx];
|
|
|
|
if (arg->AbiInfo.GetRegNum() != REG_STK)
|
|
{
|
|
regCount++;
|
|
}
|
|
|
|
// Skip any already processed args
|
|
//
|
|
if (!arg->m_processed)
|
|
{
|
|
GenTree* argx = arg->GetEarlyNode();
|
|
|
|
assert(argx != nullptr);
|
|
// put constants at the end of the table
|
|
//
|
|
if (argx->gtOper == GT_CNS_INT)
|
|
{
|
|
noway_assert(curInx <= endTab);
|
|
|
|
arg->m_processed = true;
|
|
|
|
// place curArgTabEntry at the endTab position by performing a swap
|
|
//
|
|
if (curInx != endTab)
|
|
{
|
|
sortedArgs[curInx] = sortedArgs[endTab];
|
|
sortedArgs[endTab] = arg;
|
|
}
|
|
|
|
endTab--;
|
|
argsRemaining--;
|
|
}
|
|
}
|
|
} while (curInx > 0);
|
|
|
|
if (argsRemaining > 0)
|
|
{
|
|
// Next take care of arguments that are calls.
|
|
// [We use a forward iterator pattern]
|
|
//
|
|
for (curInx = begTab; curInx <= endTab; curInx++)
|
|
{
|
|
CallArg* arg = sortedArgs[curInx];
|
|
|
|
// Skip any already processed args
|
|
//
|
|
if (!arg->m_processed)
|
|
{
|
|
GenTree* argx = arg->GetEarlyNode();
|
|
assert(argx != nullptr);
|
|
|
|
// put calls at the beginning of the table
|
|
//
|
|
if (argx->gtFlags & GTF_CALL)
|
|
{
|
|
arg->m_processed = true;
|
|
|
|
// place curArgTabEntry at the begTab position by performing a swap
|
|
//
|
|
if (curInx != begTab)
|
|
{
|
|
sortedArgs[curInx] = sortedArgs[begTab];
|
|
sortedArgs[begTab] = arg;
|
|
}
|
|
|
|
begTab++;
|
|
argsRemaining--;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if (argsRemaining > 0)
|
|
{
|
|
// Next take care arguments that are temps.
|
|
// These temps come before the arguments that are
|
|
// ordinary local vars or local fields
|
|
// since this will give them a better chance to become
|
|
// enregistered into their actual argument register.
|
|
// [We use a forward iterator pattern]
|
|
//
|
|
for (curInx = begTab; curInx <= endTab; curInx++)
|
|
{
|
|
CallArg* arg = sortedArgs[curInx];
|
|
|
|
// Skip any already processed args
|
|
//
|
|
if (!arg->m_processed)
|
|
{
|
|
if (arg->m_needTmp)
|
|
{
|
|
arg->m_processed = true;
|
|
|
|
// place curArgTabEntry at the begTab position by performing a swap
|
|
//
|
|
if (curInx != begTab)
|
|
{
|
|
sortedArgs[curInx] = sortedArgs[begTab];
|
|
sortedArgs[begTab] = arg;
|
|
}
|
|
|
|
begTab++;
|
|
argsRemaining--;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if (argsRemaining > 0)
|
|
{
|
|
// Next take care of local var and local field arguments.
|
|
// These are moved towards the end of the argument evaluation.
|
|
// [We use a backward iterator pattern]
|
|
//
|
|
curInx = endTab + 1;
|
|
do
|
|
{
|
|
curInx--;
|
|
|
|
CallArg* arg = sortedArgs[curInx];
|
|
|
|
// Skip any already processed args
|
|
//
|
|
if (!arg->m_processed)
|
|
{
|
|
GenTree* argx = arg->GetEarlyNode();
|
|
assert(argx != nullptr);
|
|
|
|
// As a CQ heuristic, sort TYP_STRUCT args using the cost estimation below.
|
|
if (!argx->TypeIs(TYP_STRUCT) && argx->OperIs(GT_LCL_VAR, GT_LCL_FLD))
|
|
{
|
|
noway_assert(curInx <= endTab);
|
|
|
|
arg->m_processed = true;
|
|
|
|
// place curArgTabEntry at the endTab position by performing a swap
|
|
//
|
|
if (curInx != endTab)
|
|
{
|
|
sortedArgs[curInx] = sortedArgs[endTab];
|
|
sortedArgs[endTab] = arg;
|
|
}
|
|
|
|
endTab--;
|
|
argsRemaining--;
|
|
}
|
|
}
|
|
} while (curInx > begTab);
|
|
}
|
|
|
|
// Finally, take care of all the remaining arguments.
|
|
// Note that we fill in one arg at a time using a while loop.
|
|
bool costsPrepared = false; // Only prepare tree costs once, the first time through this loop
|
|
while (argsRemaining > 0)
|
|
{
|
|
/* Find the most expensive arg remaining and evaluate it next */
|
|
|
|
CallArg* expensiveArg = nullptr;
|
|
unsigned expensiveArgIndex = UINT_MAX;
|
|
unsigned expensiveArgCost = 0;
|
|
|
|
// [We use a forward iterator pattern]
|
|
//
|
|
for (curInx = begTab; curInx <= endTab; curInx++)
|
|
{
|
|
CallArg* arg = sortedArgs[curInx];
|
|
|
|
// Skip any already processed args
|
|
//
|
|
if (!arg->m_processed)
|
|
{
|
|
GenTree* argx = arg->GetEarlyNode();
|
|
assert(argx != nullptr);
|
|
|
|
// We should have already handled these kinds of args
|
|
assert((!argx->OperIs(GT_LCL_VAR, GT_LCL_FLD) || argx->TypeIs(TYP_STRUCT)) &&
|
|
!argx->OperIs(GT_CNS_INT));
|
|
|
|
// This arg should either have no persistent side effects or be the last one in our table
|
|
// assert(((argx->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) == 0) || (curInx == (argCount-1)));
|
|
|
|
if (argsRemaining == 1)
|
|
{
|
|
// This is the last arg to place
|
|
expensiveArgIndex = curInx;
|
|
expensiveArg = arg;
|
|
assert(begTab == endTab);
|
|
break;
|
|
}
|
|
else
|
|
{
|
|
if (!costsPrepared)
|
|
{
|
|
/* We call gtPrepareCost to measure the cost of evaluating this tree */
|
|
comp->gtPrepareCost(argx);
|
|
}
|
|
|
|
if (argx->GetCostEx() > expensiveArgCost)
|
|
{
|
|
// Remember this arg as the most expensive one that we have yet seen
|
|
expensiveArgCost = argx->GetCostEx();
|
|
expensiveArgIndex = curInx;
|
|
expensiveArg = arg;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
noway_assert(expensiveArgIndex != UINT_MAX);
|
|
|
|
// put the most expensive arg towards the beginning of the table
|
|
|
|
expensiveArg->m_processed = true;
|
|
|
|
// place expensiveArgTabEntry at the begTab position by performing a swap
|
|
//
|
|
if (expensiveArgIndex != begTab)
|
|
{
|
|
sortedArgs[expensiveArgIndex] = sortedArgs[begTab];
|
|
sortedArgs[begTab] = expensiveArg;
|
|
}
|
|
|
|
begTab++;
|
|
argsRemaining--;
|
|
|
|
costsPrepared = true; // If we have more expensive arguments, don't re-evaluate the tree cost on the next loop
|
|
}
|
|
|
|
// The table should now be completely filled and thus begTab should now be adjacent to endTab
|
|
// and regArgsRemaining should be zero
|
|
assert(begTab == (endTab + 1));
|
|
assert(argsRemaining == 0);
|
|
}
|
|
|
|
//------------------------------------------------------------------------------
|
|
// MakeTmpArgNode:
|
|
// Create a temp for an argument if needed. We usually need this to be done
|
|
// in order to enforce ordering of the evaluation of arguments.
|
|
//
|
|
// Return Value:
|
|
// the newly created temp var tree.
|
|
//
|
|
GenTree* CallArgs::MakeTmpArgNode(Compiler* comp, CallArg* arg, unsigned lclNum)
|
|
{
|
|
LclVarDsc* varDsc = comp->lvaGetDesc(lclNum);
|
|
var_types argType = varDsc->TypeGet();
|
|
assert(genActualType(argType) == genActualType(arg->GetSignatureType()));
|
|
|
|
GenTree* argNode = nullptr;
|
|
|
|
if (varTypeIsStruct(argType))
|
|
{
|
|
if (arg->AbiInfo.PassedByRef)
|
|
{
|
|
argNode = comp->gtNewLclVarAddrNode(lclNum);
|
|
comp->lvaSetVarAddrExposed(lclNum DEBUGARG(AddressExposedReason::ESCAPE_ADDRESS));
|
|
}
|
|
// TODO-CQ: currently this mirrors the logic in "fgMorphArgs", but actually we only need
|
|
// this retyping for args passed in a single register: "(NumRegs == 1) && !IsSplit()".
|
|
else if (arg->AbiInfo.ArgType != TYP_STRUCT)
|
|
{
|
|
argNode = comp->gtNewLclFldNode(lclNum, arg->AbiInfo.ArgType, 0);
|
|
comp->lvaSetVarDoNotEnregister(lclNum DEBUGARG(DoNotEnregisterReason::SwizzleArg));
|
|
}
|
|
else
|
|
{
|
|
// We are passing this struct by value in multiple registers and/or on stack.
|
|
argNode = comp->gtNewLclvNode(lclNum, argType);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
assert(!arg->AbiInfo.PassedByRef);
|
|
argNode = comp->gtNewLclvNode(lclNum, argType);
|
|
}
|
|
|
|
return argNode;
|
|
}
|
|
|
|
//------------------------------------------------------------------------------
|
|
// EvalArgsToTemps: Handle arguments that were marked as requiring temps.
|
|
//
|
|
// Remarks:
|
|
// This is the main function responsible for assigning late nodes in arguments.
|
|
// After this function we may have the following shapes of early and late
|
|
// nodes in arguments:
|
|
// 1. Early: GT_STORE_LCL_VAR, Late: GT_LCL_VAR.
|
|
// When the argument needs to be evaluated early (e.g. because it has
|
|
// side effects, or because it is a struct copy that requires it) it
|
|
// will be assigned to a temp in the early node and passed as the local
|
|
// in the late node. This can happen for both register and stack args.
|
|
//
|
|
// 2. Early: nullptr, Late: <any node>
|
|
// All arguments that are placed in registers need to appear as a late
|
|
// node. Some stack arguments may also require this pattern, for example
|
|
// if a later argument trashes the outgoing arg area by requiring a
|
|
// call.
|
|
// If the argument does not otherwise need to be evaluated into a temp
|
|
// we just move it into the late list.
|
|
//
|
|
// 3. Early: <any node>, Late: nullptr
|
|
// Arguments that are passed on stack and that do not need an explicit
|
|
// temp store in the early node list do not require any late node.
|
|
//
|
|
void CallArgs::EvalArgsToTemps(Compiler* comp, GenTreeCall* call)
|
|
{
|
|
CallArg* inlineTable[32];
|
|
size_t numArgs = call->gtArgs.CountArgs();
|
|
CallArg** sortedArgs =
|
|
numArgs <= ARRAY_SIZE(inlineTable) ? inlineTable : new (comp, CMK_CallArgs) CallArg*[numArgs];
|
|
SortArgs(comp, call, sortedArgs);
|
|
|
|
unsigned regArgInx = 0;
|
|
// Now go through the sorted argument table and perform the necessary evaluation into temps.
|
|
CallArg** lateTail = &m_lateHead;
|
|
for (size_t i = 0; i < numArgs; i++)
|
|
{
|
|
CallArg& arg = *(sortedArgs[i]);
|
|
|
|
if (arg.GetLateNode() != nullptr)
|
|
{
|
|
// We may already have created the temp as part of
|
|
// fgMakeOutgoingStructArgCopy. In that case there is no work to be
|
|
// done.
|
|
*lateTail = &arg;
|
|
lateTail = &arg.LateNextRef();
|
|
continue;
|
|
}
|
|
|
|
GenTree* argx = arg.GetEarlyNode();
|
|
assert(argx != nullptr);
|
|
|
|
GenTree* setupArg = nullptr;
|
|
GenTree* defArg;
|
|
|
|
#if !FEATURE_FIXED_OUT_ARGS
|
|
// Only ever set for FEATURE_FIXED_OUT_ARGS
|
|
assert(!arg.m_needPlace);
|
|
|
|
// On x86 and other archs that use push instructions to pass arguments:
|
|
// Only the register arguments need to be replaced with placeholder nodes.
|
|
// Stacked arguments are evaluated and pushed (or stored into the stack) in order.
|
|
//
|
|
if (arg.AbiInfo.GetRegNum() == REG_STK)
|
|
continue;
|
|
#endif
|
|
|
|
if (arg.m_needTmp)
|
|
{
|
|
// Create a temp store for the argument
|
|
// Put the temp in the late arg list
|
|
|
|
#ifdef DEBUG
|
|
if (comp->verbose)
|
|
{
|
|
printf("Argument with 'side effect'...\n");
|
|
comp->gtDispTree(argx);
|
|
}
|
|
#endif
|
|
|
|
#if defined(TARGET_AMD64) && !defined(UNIX_AMD64_ABI)
|
|
noway_assert(argx->gtType != TYP_STRUCT);
|
|
#endif
|
|
|
|
unsigned tmpVarNum = comp->lvaGrabTemp(true DEBUGARG("argument with side effect"));
|
|
|
|
setupArg = comp->gtNewTempStore(tmpVarNum, argx);
|
|
|
|
LclVarDsc* varDsc = comp->lvaGetDesc(tmpVarNum);
|
|
var_types lclVarType = genActualType(argx->gtType);
|
|
var_types scalarType = TYP_UNKNOWN;
|
|
|
|
if (setupArg->OperIsCopyBlkOp())
|
|
{
|
|
setupArg = comp->fgMorphCopyBlock(setupArg);
|
|
#if defined(TARGET_ARMARCH) || defined(UNIX_AMD64_ABI) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
|
|
if ((lclVarType == TYP_STRUCT) && (arg.AbiInfo.ArgType != TYP_STRUCT))
|
|
{
|
|
scalarType = arg.AbiInfo.ArgType;
|
|
}
|
|
#endif // TARGET_ARMARCH || defined (UNIX_AMD64_ABI) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
|
|
}
|
|
|
|
// scalarType can be set to a wider type for ARM or unix amd64 architectures: (3 => 4) or (5,6,7 =>
|
|
// 8)
|
|
if ((scalarType != TYP_UNKNOWN) && (scalarType != lclVarType))
|
|
{
|
|
// Create a GT_LCL_FLD using the wider type to go to the late argument list
|
|
defArg = comp->gtNewLclFldNode(tmpVarNum, scalarType, 0);
|
|
}
|
|
else
|
|
{
|
|
// Create a copy of the temp to go to the late argument list
|
|
defArg = comp->gtNewLclvNode(tmpVarNum, lclVarType);
|
|
}
|
|
|
|
#ifdef DEBUG
|
|
if (comp->verbose)
|
|
{
|
|
printf("\n Evaluate to a temp:\n");
|
|
comp->gtDispTree(setupArg);
|
|
}
|
|
#endif
|
|
}
|
|
else // curArgTabEntry->needTmp == false
|
|
{
|
|
// On x86 -
|
|
// Only register args are replaced with placeholder nodes
|
|
// and the stack based arguments are evaluated and pushed in order.
|
|
//
|
|
// On Arm/x64 - When needTmp is false and needPlace is false,
|
|
// the non-register arguments are evaluated and stored in order.
|
|
// When needPlace is true we have a nested call that comes after
|
|
// this argument so we have to replace it in the gtCallArgs list
|
|
// (the initial argument evaluation list) with a placeholder.
|
|
//
|
|
if ((arg.AbiInfo.GetRegNum() == REG_STK) && !arg.m_needPlace)
|
|
{
|
|
continue;
|
|
}
|
|
|
|
// No temp needed - move the whole node to the late list
|
|
|
|
defArg = argx;
|
|
|
|
#if defined(TARGET_AMD64) && !defined(UNIX_AMD64_ABI)
|
|
|
|
// All structs are either passed (and retyped) as integral types, OR they
|
|
// are passed by reference.
|
|
noway_assert(argx->gtType != TYP_STRUCT);
|
|
|
|
#endif // !(defined(TARGET_AMD64) && !defined(UNIX_AMD64_ABI))
|
|
|
|
#ifdef DEBUG
|
|
if (comp->verbose)
|
|
{
|
|
if (arg.AbiInfo.GetRegNum() == REG_STK)
|
|
{
|
|
printf("Deferred stack argument :\n");
|
|
}
|
|
else
|
|
{
|
|
printf("Deferred argument ('%s'):\n", getRegName(arg.AbiInfo.GetRegNum()));
|
|
}
|
|
|
|
comp->gtDispTree(argx);
|
|
printf("Moved to late list\n");
|
|
}
|
|
#endif
|
|
|
|
arg.SetEarlyNode(nullptr);
|
|
}
|
|
|
|
if (setupArg != nullptr)
|
|
{
|
|
arg.SetEarlyNode(setupArg);
|
|
call->gtFlags |= setupArg->gtFlags & GTF_SIDE_EFFECT;
|
|
|
|
// Make sure we do not break recognition of retbuf-as-local
|
|
// optimization here. If this is hit it indicates that we are
|
|
// unnecessarily creating temps for some ret buf addresses, and
|
|
// gtCallGetDefinedRetBufLclAddr relies on this not to happen.
|
|
noway_assert((arg.GetWellKnownArg() != WellKnownArg::RetBuffer) || !call->IsOptimizingRetBufAsLocal());
|
|
}
|
|
|
|
arg.SetLateNode(defArg);
|
|
*lateTail = &arg;
|
|
lateTail = &arg.LateNextRef();
|
|
}
|
|
|
|
#ifdef DEBUG
|
|
if (comp->verbose)
|
|
{
|
|
printf("\nRegister placement order: ");
|
|
for (CallArg& arg : LateArgs())
|
|
{
|
|
if (arg.AbiInfo.GetRegNum() != REG_STK)
|
|
{
|
|
printf("%s ", getRegName(arg.AbiInfo.GetRegNum()));
|
|
}
|
|
}
|
|
printf("\n");
|
|
}
|
|
#endif
|
|
}
|
|
|
|
//------------------------------------------------------------------------------
|
|
// SetNeedsTemp: Set the specified argument as requiring evaluation into a temp.
|
|
//
|
|
void CallArgs::SetNeedsTemp(CallArg* arg)
|
|
{
|
|
arg->m_needTmp = true;
|
|
m_needsTemps = true;
|
|
}
|
|
|
|
//------------------------------------------------------------------------------
|
|
// fgMakeTemp: Make a temp variable and store 'value' into it.
|
|
//
|
|
// Arguments:
|
|
// value - The expression to store to a temp.
|
|
//
|
|
// Return Value:
|
|
// 'TempInfo' data that contains the GT_STORE_LCL_VAR and GT_LCL_VAR nodes for
|
|
// store and variable load respectively.
|
|
//
|
|
TempInfo Compiler::fgMakeTemp(GenTree* value)
|
|
{
|
|
unsigned lclNum = lvaGrabTemp(true DEBUGARG("fgMakeTemp is creating a new local variable"));
|
|
GenTree* store = gtNewTempStore(lclNum, value);
|
|
GenTree* load = gtNewLclvNode(lclNum, genActualType(value));
|
|
|
|
TempInfo tempInfo{};
|
|
tempInfo.store = store;
|
|
tempInfo.load = load;
|
|
|
|
return tempInfo;
|
|
}
|
|
|
|
//------------------------------------------------------------------------------
|
|
// fgMakeMultiUse : If the node is an unaliased local or constant clone it,
|
|
// otherwise insert a comma form temp
|
|
//
|
|
// Arguments:
|
|
// ppTree - a pointer to the child node we will be replacing with the comma expression that
|
|
// evaluates ppTree to a temp and returns the result
|
|
//
|
|
// Return Value:
|
|
// A fresh GT_LCL_VAR node referencing the temp which has not been used
|
|
//
|
|
// Notes:
|
|
// This function will clone invariant nodes and locals, so this function
|
|
// should only be used in situations where no interference between the
|
|
// original use and new use is possible. Otherwise, fgInsertCommaFormTemp
|
|
// should be used directly.
|
|
//
|
|
GenTree* Compiler::fgMakeMultiUse(GenTree** pOp)
|
|
{
|
|
GenTree* const tree = *pOp;
|
|
|
|
if (tree->IsInvariant() || tree->OperIsLocal())
|
|
{
|
|
return gtCloneExpr(tree);
|
|
}
|
|
|
|
return fgInsertCommaFormTemp(pOp);
|
|
}
|
|
|
|
//------------------------------------------------------------------------------
|
|
// fgInsertCommaFormTemp: Create a new temporary variable to hold the result of *ppTree,
|
|
// and replace *ppTree with comma(store<newLcl>(*ppTree)), newLcl)
|
|
//
|
|
// Arguments:
|
|
// ppTree - a pointer to the child node we will be replacing with the comma expression that
|
|
// evaluates ppTree to a temp and returns the result
|
|
//
|
|
// Return Value:
|
|
// A fresh GT_LCL_VAR node referencing the temp which has not been used
|
|
//
|
|
|
|
GenTree* Compiler::fgInsertCommaFormTemp(GenTree** ppTree)
|
|
{
|
|
GenTree* subTree = *ppTree;
|
|
|
|
TempInfo tempInfo = fgMakeTemp(subTree);
|
|
GenTree* store = tempInfo.store;
|
|
GenTree* load = tempInfo.load;
|
|
|
|
*ppTree = gtNewOperNode(GT_COMMA, subTree->TypeGet(), store, load);
|
|
|
|
return gtClone(load);
|
|
}
|
|
|
|
//------------------------------------------------------------------------
|
|
// AddFinalArgsAndDetermineABIInfo:
|
|
// Add final arguments and determine the argument ABI information.
|
|
//
|
|
// Parameters:
|
|
// comp - The compiler object.
|
|
// call - The call to which the CallArgs belongs.
|
|
//
|
|
// Remarks:
|
|
// This adds the final "non-standard" arguments to the call and categorizes
|
|
// all the ABI information required for downstream JIT phases. This function
|
|
// modifies IR by adding certain non-standard arguments. For more information
|
|
// see CallArg::IsArgAddedLate and CallArgs::ResetFinalArgsAndABIInfo.
|
|
//
|
|
void CallArgs::AddFinalArgsAndDetermineABIInfo(Compiler* comp, GenTreeCall* call)
|
|
{
|
|
assert(&call->gtArgs == this);
|
|
|
|
if (IsAbiInformationDetermined())
|
|
{
|
|
// We've already determined ABI information.
|
|
return;
|
|
}
|
|
JITDUMP("Initializing arg info for %d.%s:\n", call->gtTreeID, GenTree::OpName(call->gtOper));
|
|
|
|
m_hasRegArgs = false;
|
|
m_hasStackArgs = false;
|
|
// At this point, we should not have any late args, as this needs to be done before those are determined.
|
|
assert(m_lateHead == nullptr);
|
|
|
|
if (TargetOS::IsUnix && IsVarArgs())
|
|
{
|
|
// Currently native varargs is not implemented on non windows targets.
|
|
//
|
|
// Note that some targets like Arm64 Unix should not need much work as
|
|
// the ABI is the same. While other targets may only need small changes
|
|
// such as amd64 Unix, which just expects RAX to pass numFPArguments.
|
|
NYI("Morphing Vararg call not yet implemented on non Windows targets.");
|
|
}
|
|
|
|
// Insert or mark non-standard args. These are either outside the normal calling convention, or
|
|
// arguments registers that don't follow the normal progression of argument registers in the calling
|
|
// convention (such as for the ARM64 fixed return buffer argument x8).
|
|
//
|
|
// *********** NOTE *************
|
|
// The logic here must remain in sync with GetNonStandardAddedArgCount(), which is used to map arguments
|
|
// in the implementation of fast tail call.
|
|
// *********** END NOTE *********
|
|
|
|
#if defined(TARGET_ARM)
|
|
// A non-standard calling convention using wrapper delegate invoke is used on ARM, only, for wrapper
|
|
// delegates. It is used for VSD delegate calls where the VSD custom calling convention ABI requires passing
|
|
// R4, a callee-saved register, with a special value. Since R4 is a callee-saved register, its value needs
|
|
// to be preserved. Thus, the VM uses a wrapper delegate IL stub, which preserves R4 and also sets up R4
|
|
// correctly for the VSD call. The VM is simply reusing an existing mechanism (wrapper delegate IL stub)
|
|
// to achieve its goal for delegate VSD call. See COMDelegate::NeedsWrapperDelegate() in the VM for details.
|
|
if (call->gtCallMoreFlags & GTF_CALL_M_WRAPPER_DELEGATE_INV)
|
|
{
|
|
CallArg* thisArg = GetThisArg();
|
|
assert((thisArg != nullptr) && (thisArg->GetEarlyNode() != nullptr));
|
|
|
|
GenTree* cloned;
|
|
if (thisArg->GetEarlyNode()->OperIsLocal())
|
|
{
|
|
cloned = comp->gtClone(thisArg->GetEarlyNode(), true);
|
|
}
|
|
else
|
|
{
|
|
cloned = comp->fgInsertCommaFormTemp(&thisArg->EarlyNodeRef());
|
|
call->gtFlags |= GTF_ASG;
|
|
}
|
|
noway_assert(cloned != nullptr);
|
|
|
|
GenTree* offsetNode = comp->gtNewIconNode(comp->eeGetEEInfo()->offsetOfWrapperDelegateIndirectCell, TYP_I_IMPL);
|
|
GenTree* newArg = comp->gtNewOperNode(GT_ADD, TYP_BYREF, cloned, offsetNode);
|
|
|
|
// Append newArg as the last arg
|
|
PushBack(comp, NewCallArg::Primitive(newArg).WellKnown(WellKnownArg::WrapperDelegateCell));
|
|
}
|
|
#endif // defined(TARGET_ARM)
|
|
|
|
bool addStubCellArg = true;
|
|
|
|
#ifdef TARGET_X86
|
|
// TODO-X86-CQ: Currently RyuJIT/x86 passes args on the stack, so this is not needed.
|
|
// If/when we change that, the following code needs to be changed to correctly support the (TBD) managed calling
|
|
// convention for x86/SSE.
|
|
|
|
addStubCellArg = call->gtCallType != CT_INDIRECT && comp->IsTargetAbi(CORINFO_NATIVEAOT_ABI);
|
|
#endif
|
|
|
|
// We are allowed to have a ret buffer argument combined
|
|
// with any of the remaining non-standard arguments
|
|
|
|
if (call->IsVirtualStub() && addStubCellArg)
|
|
{
|
|
if (!call->IsTailCallViaJitHelper())
|
|
{
|
|
GenTree* stubAddrArg = comp->fgGetStubAddrArg(call);
|
|
// And push the stub address onto the list of arguments
|
|
NewCallArg stubAddrNewArg = NewCallArg::Primitive(stubAddrArg).WellKnown(WellKnownArg::VirtualStubCell);
|
|
InsertAfterThisOrFirst(comp, stubAddrNewArg);
|
|
}
|
|
else
|
|
{
|
|
// If it is a VSD call getting dispatched via tail call helper,
|
|
// fgMorphTailCallViaJitHelper() would materialize stub addr as an additional
|
|
// parameter added to the original arg list and hence no need to
|
|
// add as a non-standard arg.
|
|
}
|
|
}
|
|
else if (call->gtCallType == CT_INDIRECT && (call->gtCallCookie != nullptr))
|
|
{
|
|
assert(!call->IsUnmanaged());
|
|
|
|
GenTree* arg = call->gtCallCookie;
|
|
noway_assert(arg != nullptr);
|
|
call->gtCallCookie = nullptr;
|
|
|
|
// All architectures pass the cookie in a register.
|
|
InsertAfterThisOrFirst(comp, NewCallArg::Primitive(arg).WellKnown(WellKnownArg::PInvokeCookie));
|
|
// put destination into R10/EAX
|
|
arg = comp->gtClone(call->gtCallAddr, true);
|
|
InsertAfterThisOrFirst(comp, NewCallArg::Primitive(arg).WellKnown(WellKnownArg::PInvokeTarget));
|
|
|
|
// finally change this call to a helper call
|
|
call->gtCallType = CT_HELPER;
|
|
call->gtCallMethHnd = comp->eeFindHelper(CORINFO_HELP_PINVOKE_CALLI);
|
|
}
|
|
#if defined(FEATURE_READYTORUN)
|
|
// For arm/arm64, we dispatch code same as VSD using virtualStubParamInfo->GetReg()
|
|
// for indirection cell address, which ZapIndirectHelperThunk expects.
|
|
// For x64/x86 we use return address to get the indirection cell by disassembling the call site.
|
|
// That is not possible for fast tailcalls, so we only need this logic for fast tailcalls on xarch.
|
|
// Note that we call this before we know if something will be a fast tailcall or not.
|
|
// That's ok; after making something a tailcall, we will invalidate this information
|
|
// and reconstruct it if necessary. The tailcalling decision does not change since
|
|
// this is a non-standard arg in a register.
|
|
bool needsIndirectionCell = call->IsR2RRelativeIndir() && !call->IsDelegateInvoke();
|
|
#if defined(TARGET_XARCH)
|
|
needsIndirectionCell &= call->IsFastTailCall();
|
|
#endif
|
|
|
|
if (needsIndirectionCell)
|
|
{
|
|
assert(call->gtEntryPoint.addr != nullptr);
|
|
|
|
size_t addrValue = (size_t)call->gtEntryPoint.addr;
|
|
GenTree* indirectCellAddress = comp->gtNewIconHandleNode(addrValue, GTF_ICON_FTN_ADDR);
|
|
INDEBUG(indirectCellAddress->AsIntCon()->gtTargetHandle = (size_t)call->gtCallMethHnd);
|
|
|
|
#ifdef TARGET_ARM
|
|
// TODO-ARM: We currently do not properly kill this register in LSRA
|
|
// (see getKillSetForCall which does so only for VSD calls).
|
|
// We should be able to remove these two workarounds once we do so,
|
|
// however when this was tried there were significant regressions.
|
|
indirectCellAddress->SetRegNum(REG_R2R_INDIRECT_PARAM);
|
|
indirectCellAddress->SetDoNotCSE();
|
|
#endif
|
|
|
|
// Push the stub address onto the list of arguments.
|
|
NewCallArg indirCellAddrArg =
|
|
NewCallArg::Primitive(indirectCellAddress).WellKnown(WellKnownArg::R2RIndirectionCell);
|
|
InsertAfterThisOrFirst(comp, indirCellAddrArg);
|
|
}
|
|
#endif
|
|
|
|
ClassifierInfo info;
|
|
info.CallConv = call->GetUnmanagedCallConv();
|
|
// X86 tailcall helper is considered varargs, but not for ABI classification purposes.
|
|
info.IsVarArgs = call->IsVarargs() && !call->IsTailCallViaJitHelper();
|
|
info.HasThis = call->gtArgs.HasThisPointer();
|
|
info.HasRetBuff = call->gtArgs.HasRetBuffer();
|
|
PlatformClassifier classifier(info);
|
|
|
|
// Morph the user arguments
|
|
for (CallArg& arg : Args())
|
|
{
|
|
assert(arg.GetEarlyNode() != nullptr);
|
|
GenTree* argx = arg.GetEarlyNode();
|
|
|
|
// TODO-Cleanup: this is duplicative with the code in args morphing, however, also kicks in for
|
|
// "non-standard" (return buffer on ARM64) arguments. Fix args morphing and delete this code.
|
|
if (argx->OperIs(GT_LCL_ADDR))
|
|
{
|
|
argx->gtType = TYP_I_IMPL;
|
|
}
|
|
|
|
// Note we must use the signature types for making ABI decisions. This is especially important for structs,
|
|
// where the "argx" node can legally have a type that is not ABI-compatible with the one in the signature.
|
|
const var_types argSigType = arg.GetSignatureType();
|
|
const CORINFO_CLASS_HANDLE argSigClass = arg.GetSignatureClassHandle();
|
|
ClassLayout* argLayout = argSigClass == NO_CLASS_HANDLE ? nullptr : comp->typGetObjLayout(argSigClass);
|
|
|
|
ABIPassingInformation abiInfo;
|
|
|
|
// Some well known args have custom register assignment.
|
|
// These should not affect the placement of any other args or stack space required.
|
|
// Example: on AMD64 R10 and R11 are used for indirect VSD (generic interface) and cookie calls.
|
|
// TODO-Cleanup: Integrate this into the new style ABI classifiers.
|
|
regNumber nonStdRegNum = GetCustomRegister(comp, call->GetUnmanagedCallConv(), arg.GetWellKnownArg());
|
|
|
|
if (nonStdRegNum == REG_NA)
|
|
{
|
|
abiInfo = classifier.Classify(comp, argSigType, argLayout, arg.GetWellKnownArg());
|
|
}
|
|
else
|
|
{
|
|
ABIPassingSegment segment = ABIPassingSegment::InRegister(nonStdRegNum, 0, TARGET_POINTER_SIZE);
|
|
abiInfo = ABIPassingInformation::FromSegment(comp, segment);
|
|
}
|
|
|
|
JITDUMP("Argument %u ABI info: ", GetIndex(&arg));
|
|
DBEXEC(VERBOSE, abiInfo.Dump());
|
|
|
|
arg.NewAbiInfo = abiInfo;
|
|
arg.AbiInfo = CallArgABIInformation();
|
|
|
|
if (varTypeIsStruct(argSigType))
|
|
{
|
|
assert(argx == arg.GetEarlyNode());
|
|
|
|
Compiler::structPassingKind howToPassStruct;
|
|
var_types structBaseType =
|
|
comp->getArgTypeForStruct(argSigClass, &howToPassStruct, IsVarArgs(), argLayout->GetSize());
|
|
#if defined(TARGET_RISCV64) || defined(TARGET_LOONGARCH64)
|
|
if (arg.NewAbiInfo.HasAnyFloatingRegisterSegment())
|
|
{
|
|
// Struct passed according to hardware floating-point calling convention
|
|
assert(arg.NewAbiInfo.NumSegments <= 2);
|
|
assert(!arg.NewAbiInfo.HasAnyStackSegment());
|
|
if (arg.NewAbiInfo.NumSegments == 2)
|
|
{
|
|
// On LoongArch64, "getPrimitiveTypeForStruct" will incorrectly return "TYP_LONG"
|
|
// for "struct { float, float }", and retyping to a primitive here will cause the
|
|
// multi-reg morphing to not kick in (the struct in question needs to be passed in
|
|
// two FP registers). Here is just keep "structBaseType" as "TYP_STRUCT".
|
|
// TODO-LoongArch64: fix "getPrimitiveTypeForStruct".
|
|
structBaseType = TYP_STRUCT;
|
|
}
|
|
else
|
|
{
|
|
assert(arg.NewAbiInfo.NumSegments == 1);
|
|
structBaseType = arg.NewAbiInfo.Segment(0).GetRegisterType();
|
|
}
|
|
|
|
for (unsigned i = 0; i < arg.NewAbiInfo.NumSegments; ++i)
|
|
{
|
|
arg.AbiInfo.StructFloatFieldType[i] = arg.NewAbiInfo.Segment(i).GetRegisterType();
|
|
}
|
|
}
|
|
#endif // defined(TARGET_RISCV64) || defined(TARGET_LOONGARCH64)
|
|
arg.AbiInfo.PassedByRef = howToPassStruct == Compiler::SPK_ByReference;
|
|
arg.AbiInfo.ArgType = structBaseType == TYP_UNKNOWN ? argx->TypeGet() : structBaseType;
|
|
|
|
#ifdef UNIX_AMD64_ABI
|
|
comp->eeGetSystemVAmd64PassStructInRegisterDescriptor(argSigClass, &arg.AbiInfo.StructDesc);
|
|
#endif
|
|
}
|
|
else
|
|
{
|
|
arg.AbiInfo.ArgType = argx->TypeGet();
|
|
}
|
|
|
|
if (abiInfo.IsSplitAcrossRegistersAndStack())
|
|
{
|
|
m_hasStackArgs = true;
|
|
m_hasRegArgs = true;
|
|
|
|
arg.AbiInfo.SetSplit(true);
|
|
arg.AbiInfo.ByteOffset = 0;
|
|
unsigned regNumIndex = 0;
|
|
for (unsigned i = 0; i < abiInfo.NumSegments; i++)
|
|
{
|
|
const ABIPassingSegment& segment = abiInfo.Segment(i);
|
|
if (segment.IsPassedInRegister())
|
|
{
|
|
if (regNumIndex < MAX_ARG_REG_COUNT)
|
|
{
|
|
arg.AbiInfo.SetRegNum(regNumIndex, segment.GetRegister());
|
|
regNumIndex++;
|
|
}
|
|
|
|
arg.AbiInfo.NumRegs++;
|
|
}
|
|
else
|
|
{
|
|
assert(segment.GetStackOffset() == 0);
|
|
}
|
|
}
|
|
}
|
|
else if (abiInfo.HasAnyRegisterSegment())
|
|
{
|
|
// This is a register argument
|
|
m_hasRegArgs = true;
|
|
|
|
unsigned regNumIndex = 0;
|
|
for (unsigned i = 0; i < abiInfo.NumSegments; i++)
|
|
{
|
|
const ABIPassingSegment& segment = abiInfo.Segment(i);
|
|
|
|
if (regNumIndex < MAX_ARG_REG_COUNT)
|
|
{
|
|
arg.AbiInfo.SetRegNum(regNumIndex, segment.GetRegister());
|
|
regNumIndex++;
|
|
}
|
|
|
|
arg.AbiInfo.NumRegs++;
|
|
|
|
#ifdef TARGET_ARM
|
|
// Old style ABI info expects two registers counted for these segments.
|
|
if (segment.GetRegisterType() == TYP_DOUBLE)
|
|
{
|
|
arg.AbiInfo.NumRegs++;
|
|
|
|
if (argSigType == TYP_DOUBLE)
|
|
{
|
|
arg.AbiInfo.SetRegNum(regNumIndex, REG_NEXT(segment.GetRegister()));
|
|
regNumIndex++;
|
|
}
|
|
}
|
|
#endif
|
|
}
|
|
|
|
#if defined(UNIX_AMD64_ABI) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
|
|
INDEBUG(arg.CheckIsStruct());
|
|
#endif
|
|
}
|
|
else
|
|
{
|
|
assert(abiInfo.HasAnyStackSegment());
|
|
// We only expect to see one stack segment in these cases.
|
|
assert(abiInfo.NumSegments == 1);
|
|
// This is a stack argument
|
|
m_hasStackArgs = true;
|
|
const ABIPassingSegment& segment = abiInfo.Segment(0);
|
|
arg.AbiInfo.SetRegNum(0, REG_STK);
|
|
arg.AbiInfo.ByteOffset = segment.GetStackOffset();
|
|
}
|
|
|
|
// TODO-Cleanup: remove HFA information from VarDsc.
|
|
var_types hfaType = TYP_UNDEF;
|
|
bool isHfaArg = false;
|
|
unsigned hfaSlots = 0;
|
|
|
|
if (GlobalJitOptions::compFeatureHfa)
|
|
{
|
|
hfaType = comp->GetHfaType(argSigClass);
|
|
isHfaArg = varTypeIsValidHfaType(hfaType);
|
|
|
|
if (TargetOS::IsWindows && TargetArchitecture::IsArm64 && IsVarArgs())
|
|
{
|
|
// Make sure for vararg methods isHfaArg is not true.
|
|
isHfaArg = false;
|
|
}
|
|
|
|
if (isHfaArg)
|
|
{
|
|
hfaSlots = comp->GetHfaCount(argSigClass);
|
|
|
|
// If we have a HFA struct it's possible we transition from a method that originally
|
|
// only had integer types to now start having FP types. We have to communicate this
|
|
// through this flag since LSRA later on will use this flag to determine whether
|
|
// or not to track the FP register set.
|
|
//
|
|
comp->compFloatingPointUsed = true;
|
|
}
|
|
}
|
|
|
|
if (arg.AbiInfo.PassedByRef)
|
|
{
|
|
arg.AbiInfo.ByteSize = TARGET_POINTER_SIZE;
|
|
}
|
|
else
|
|
{
|
|
unsigned size = argLayout != nullptr ? argLayout->GetSize() : genTypeSize(argSigType);
|
|
|
|
// Apple arm64 reuses the same stack slot for multiple args in some
|
|
// cases; old ABI info reflects that in the size.
|
|
// Primitives and float HFAs do not necessarily take up full stack
|
|
// slots.
|
|
if (compAppleArm64Abi() && (!varTypeIsStruct(argSigType) || (isHfaArg && (hfaType == TYP_FLOAT))))
|
|
{
|
|
arg.AbiInfo.ByteSize = size;
|
|
}
|
|
else
|
|
{
|
|
arg.AbiInfo.ByteSize = roundUp(size, TARGET_POINTER_SIZE);
|
|
}
|
|
}
|
|
|
|
if (isHfaArg)
|
|
{
|
|
arg.AbiInfo.SetHfaType(hfaType, hfaSlots);
|
|
}
|
|
} // end foreach argument loop
|
|
|
|
m_argsStackSize = classifier.StackSize();
|
|
|
|
#ifdef DEBUG
|
|
if (VERBOSE)
|
|
{
|
|
JITDUMP("Args for call [%06u] %s after AddFinalArgsAndDetermineABIInfo:\n", comp->dspTreeID(call),
|
|
GenTree::OpName(call->gtOper));
|
|
for (CallArg& arg : Args())
|
|
{
|
|
arg.Dump(comp);
|
|
}
|
|
JITDUMP("\n");
|
|
}
|
|
#endif
|
|
|
|
m_abiInformationDetermined = true;
|
|
}
|
|
|
|
//------------------------------------------------------------------------
|
|
// OutgoingArgsStackSize:
|
|
// Compute the number of bytes allocated on the stack for arguments to this call.
|
|
//
|
|
// Remarks:
|
|
// Note that even with no arguments, some ABIs may still allocate stack
|
|
// space, which will be returned by this function.
|
|
//
|
|
unsigned CallArgs::OutgoingArgsStackSize() const
|
|
{
|
|
unsigned aligned = Compiler::GetOutgoingArgByteSize(m_argsStackSize);
|
|
return max(aligned, (unsigned)MIN_ARG_AREA_FOR_CALL);
|
|
}
|
|
|
|
//------------------------------------------------------------------------
|
|
// CountArgs: Count the number of arguments.
|
|
//
|
|
unsigned CallArgs::CountArgs()
|
|
{
|
|
unsigned numArgs = 0;
|
|
for (CallArg& arg : Args())
|
|
{
|
|
numArgs++;
|
|
}
|
|
|
|
return numArgs;
|
|
}
|
|
|
|
//------------------------------------------------------------------------
|
|
// CountArgs: Count the number of arguments ignoring non-user ones, e.g.
|
|
// r2r cell argument in a user function.
|
|
//
|
|
// Remarks:
|
|
// See IsUserArg's comments
|
|
//
|
|
unsigned CallArgs::CountUserArgs()
|
|
{
|
|
unsigned numArgs = 0;
|
|
for (CallArg& arg : Args())
|
|
{
|
|
if (arg.IsUserArg())
|
|
{
|
|
numArgs++;
|
|
}
|
|
}
|
|
return numArgs;
|
|
}
|
|
|
|
//------------------------------------------------------------------------
|
|
// fgMorphArgs: Walk and transform (morph) the arguments of a call
|
|
//
|
|
// Arguments:
|
|
// call - the call for which we are doing the argument morphing
|
|
//
|
|
// Return Value:
|
|
// Like most morph methods, this method returns the morphed node,
|
|
// though in this case there are currently no scenarios where the
|
|
// node itself is re-created.
|
|
//
|
|
// Notes:
|
|
// This calls CallArgs::AddFinalArgsAndDetermineABIInfo to determine ABI
|
|
// information for the call. If it has already been determined, that method
|
|
// will simply return.
|
|
//
|
|
// This method changes the state of the call node. It may be called even
|
|
// after it has already done the first round of morphing.
|
|
//
|
|
// The first time it is called (i.e. during global morphing), this method
|
|
// computes the "late arguments". This is when it determines which arguments
|
|
// need to be evaluated to temps prior to the main argument setup, and which
|
|
// can be directly evaluated into the argument location. It also creates a
|
|
// second argument list (the late args) that does the final placement of the
|
|
// arguments, e.g. into registers or onto the stack.
|
|
//
|
|
// The "non-late arguments", are doing the in-order evaluation of the
|
|
// arguments that might have side-effects, such as embedded stores, calls
|
|
// or possible throws. In these cases, it and earlier arguments must be
|
|
// evaluated to temps.
|
|
//
|
|
// On targets with a fixed outgoing argument area (FEATURE_FIXED_OUT_ARGS),
|
|
// if we have any nested calls, we need to defer the copying of the argument
|
|
// into the fixed argument area until after the call. If the argument did
|
|
// not otherwise need to be computed into a temp, it is moved to late
|
|
// argument and replaced in the "early" arg list with a placeholder node.
|
|
// Also see `CallArgs::EvalArgsToTemps`.
|
|
//
|
|
#ifdef _PREFAST_
|
|
#pragma warning(push)
|
|
#pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
|
|
#endif
|
|
GenTreeCall* Compiler::fgMorphArgs(GenTreeCall* call)
|
|
{
|
|
GenTreeFlags flagsSummary = GTF_EMPTY;
|
|
|
|
bool reMorphing = call->gtArgs.AreArgsComplete();
|
|
|
|
call->gtArgs.AddFinalArgsAndDetermineABIInfo(this, call);
|
|
JITDUMP("%sMorphing args for %d.%s:\n", (reMorphing) ? "Re" : "", call->gtTreeID, GenTree::OpName(call->gtOper));
|
|
|
|
// If we are remorphing, process the late arguments (which were determined by a previous caller).
|
|
if (reMorphing)
|
|
{
|
|
for (CallArg& arg : call->gtArgs.LateArgs())
|
|
{
|
|
arg.SetLateNode(fgMorphTree(arg.GetLateNode()));
|
|
flagsSummary |= arg.GetLateNode()->gtFlags;
|
|
}
|
|
}
|
|
|
|
// First we morph the argument subtrees ('this' pointer, arguments, etc.).
|
|
// During the first call to fgMorphArgs we also record the
|
|
// information about late arguments in CallArgs.
|
|
// This information is used later to construct the late args
|
|
|
|
// Note that this name a misnomer - it indicates that there are struct args
|
|
// that are passed by value in more than one register or on stack.
|
|
bool hasMultiregStructArgs = false;
|
|
for (CallArg& arg : call->gtArgs.Args())
|
|
{
|
|
GenTree** parentArgx = &arg.EarlyNodeRef();
|
|
|
|
// Morph the arg node and update the node pointer.
|
|
GenTree* argx = *parentArgx;
|
|
if (argx == nullptr)
|
|
{
|
|
// Skip node that was moved to late args during remorphing, no work to be done.
|
|
assert(reMorphing);
|
|
continue;
|
|
}
|
|
|
|
argx = fgMorphTree(argx);
|
|
*parentArgx = argx;
|
|
|
|
if (arg.GetWellKnownArg() == WellKnownArg::ThisPointer)
|
|
{
|
|
// We may want to force 'this' into a temp because we want to use
|
|
// it to expand the call target in morph so that CSE can pick it
|
|
// up.
|
|
if (!reMorphing && call->IsExpandedEarly() && call->IsVirtualVtable() && !argx->OperIsLocal())
|
|
{
|
|
call->gtArgs.SetNeedsTemp(&arg);
|
|
}
|
|
}
|
|
|
|
// TODO-ARGS: Review this, is it really necessary to treat them specially here?
|
|
if (call->gtArgs.IsNonStandard(this, call, &arg) && arg.AbiInfo.IsPassedInRegisters())
|
|
{
|
|
flagsSummary |= argx->gtFlags;
|
|
continue;
|
|
}
|
|
assert(arg.AbiInfo.ByteSize > 0);
|
|
|
|
// For pointers to locals we can skip reporting GC info and also skip zero initialization.
|
|
// NOTE: We deferred this from the importer because of the inliner.
|
|
if (argx->OperIs(GT_LCL_ADDR))
|
|
{
|
|
argx->gtType = TYP_I_IMPL;
|
|
}
|
|
|
|
bool isStructArg = varTypeIsStruct(arg.GetSignatureType());
|
|
GenTree* argObj = argx->gtEffectiveVal();
|
|
bool makeOutArgCopy = false;
|
|
|
|
if (isStructArg && !reMorphing)
|
|
{
|
|
unsigned originalSize;
|
|
if (argObj->TypeGet() == TYP_STRUCT)
|
|
{
|
|
assert(argObj->OperIs(GT_BLK, GT_LCL_VAR, GT_LCL_FLD));
|
|
originalSize = argObj->GetLayout(this)->GetSize();
|
|
}
|
|
else
|
|
{
|
|
originalSize = genTypeSize(argx);
|
|
}
|
|
|
|
assert(argx->TypeGet() == arg.GetSignatureType());
|
|
assert(originalSize == info.compCompHnd->getClassSize(arg.GetSignatureClassHandle()));
|
|
|
|
// First, handle the case where the argument is passed by reference.
|
|
if (arg.AbiInfo.PassedByRef)
|
|
{
|
|
assert(arg.AbiInfo.ByteSize == TARGET_POINTER_SIZE);
|
|
makeOutArgCopy = true;
|
|
#ifdef UNIX_AMD64_ABI
|
|
assert(!"Structs are not passed by reference on x64/ux");
|
|
#endif // UNIX_AMD64_ABI
|
|
}
|
|
else // This is passed by value.
|
|
{
|
|
unsigned structSize = originalSize;
|
|
unsigned passingSize = originalSize;
|
|
|
|
// Check to see if we can transform this struct load (GT_BLK) into a GT_IND of the appropriate size.
|
|
// When it can do this is platform-dependent:
|
|
// - In general, it can be done for power of 2 structs that fit in a single register.
|
|
// - For ARM and ARM64 it must also be a non-HFA struct, or have a single field.
|
|
// - This is irrelevant for X86, since structs are always passed by value on the stack.
|
|
//
|
|
var_types structBaseType = arg.AbiInfo.ArgType;
|
|
bool argIsLocal = argObj->OperIsLocalRead();
|
|
bool canTransform = false;
|
|
|
|
if (structBaseType != TYP_STRUCT)
|
|
{
|
|
if (isPow2(passingSize))
|
|
{
|
|
canTransform =
|
|
(!arg.AbiInfo.IsHfaArg() || (passingSize == genTypeSize(arg.AbiInfo.GetHfaType())));
|
|
}
|
|
else
|
|
{
|
|
// We can pass non-power-of-2 structs in a register, but we can only transform in that
|
|
// case if the arg is a local.
|
|
canTransform = argIsLocal;
|
|
passingSize = genTypeSize(structBaseType);
|
|
}
|
|
}
|
|
#if !defined(TARGET_X86)
|
|
else
|
|
{
|
|
hasMultiregStructArgs = true;
|
|
}
|
|
#endif // !TARGET_X86
|
|
|
|
if (!canTransform)
|
|
{
|
|
#if defined(TARGET_AMD64)
|
|
#ifndef UNIX_AMD64_ABI
|
|
// On Windows structs are always copied and passed by reference (handled above) unless they are
|
|
// passed by value in a single register.
|
|
assert(arg.AbiInfo.GetStackSlotsNumber() == 1);
|
|
makeOutArgCopy = true;
|
|
#else // UNIX_AMD64_ABI
|
|
// On Unix, structs are always passed by value.
|
|
// We only need a copy if we have one of the following:
|
|
// - The sizes don't match for a non-lclVar argument.
|
|
// - We have a known struct type (e.g. SIMD) that requires multiple registers.
|
|
// TODO-Amd64-Unix-Throughput: We don't need to keep the structDesc in the argEntry if it's not
|
|
// actually passed in registers.
|
|
if (arg.AbiInfo.IsPassedInRegisters())
|
|
{
|
|
if (argObj->OperIs(GT_BLK))
|
|
{
|
|
if (passingSize != structSize)
|
|
{
|
|
makeOutArgCopy = true;
|
|
}
|
|
}
|
|
else if (!argIsLocal)
|
|
{
|
|
// This should only be the case of a value directly producing a known struct type.
|
|
assert(argObj->TypeGet() != TYP_STRUCT);
|
|
if (arg.AbiInfo.NumRegs > 1)
|
|
{
|
|
makeOutArgCopy = true;
|
|
}
|
|
}
|
|
}
|
|
#endif // UNIX_AMD64_ABI
|
|
#elif defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
|
|
if ((passingSize != structSize) && !argIsLocal)
|
|
{
|
|
makeOutArgCopy = true;
|
|
}
|
|
#endif // defined(TARGET_ARMARCH) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
|
|
}
|
|
else if (argObj->TypeGet() != structBaseType)
|
|
{
|
|
// We have a struct argument that fits into a register, and it is either a power of 2,
|
|
// or a local.
|
|
// Change our argument, as needed, into a value of the appropriate type.
|
|
assert((structBaseType != TYP_STRUCT) && (genTypeSize(structBaseType) >= originalSize));
|
|
|
|
if (argObj->OperIsLoad())
|
|
{
|
|
assert(argObj->AsIndir()->Size() == genTypeSize(structBaseType));
|
|
argObj->SetOper(GT_IND);
|
|
// Use ChangeType over argx to update types in COMMAs as well
|
|
argx->ChangeType(structBaseType);
|
|
}
|
|
else if (argObj->OperIsLocalRead())
|
|
{
|
|
unsigned lclNum = argObj->AsLclVarCommon()->GetLclNum();
|
|
LclVarDsc* varDsc = lvaGetDesc(lclNum);
|
|
unsigned lclOffset = argObj->AsLclVarCommon()->GetLclOffs();
|
|
unsigned argLclNum = BAD_VAR_NUM;
|
|
LclVarDsc* argVarDsc = nullptr;
|
|
|
|
if (varDsc->lvPromoted)
|
|
{
|
|
argLclNum = lvaGetFieldLocal(varDsc, lclOffset);
|
|
}
|
|
else if (lclOffset == 0)
|
|
{
|
|
argLclNum = lclNum;
|
|
}
|
|
|
|
// See if this local goes into the right register file.
|
|
// TODO-CQ: we could use a bitcast here, if it does not.
|
|
if (argLclNum != BAD_VAR_NUM)
|
|
{
|
|
argVarDsc = lvaGetDesc(argLclNum);
|
|
if ((genTypeSize(argVarDsc) != originalSize) ||
|
|
!varTypeUsesSameRegType(argVarDsc, structBaseType))
|
|
{
|
|
argLclNum = BAD_VAR_NUM;
|
|
}
|
|
}
|
|
|
|
if (argLclNum != BAD_VAR_NUM)
|
|
{
|
|
argx->ChangeType(argVarDsc->TypeGet());
|
|
argObj->SetOper(GT_LCL_VAR);
|
|
argObj->AsLclVar()->SetLclNum(argLclNum);
|
|
}
|
|
else if (varDsc->lvPromoted)
|
|
{
|
|
// Preserve independent promotion of "argObj" by decomposing the copy.
|
|
// TODO-CQ: condition this on the promotion actually being independent.
|
|
makeOutArgCopy = true;
|
|
}
|
|
#ifdef TARGET_AMD64
|
|
else if (!argObj->OperIs(GT_LCL_VAR) || !argObj->TypeIs(TYP_SIMD8)) // Handled by lowering.
|
|
#else // !TARGET_ARM64
|
|
else
|
|
#endif // !TARGET_ARM64
|
|
{
|
|
// TODO-CQ: perform this transformation in lowering instead of here and
|
|
// avoid marking enregisterable structs DNER.
|
|
argx->ChangeType(structBaseType);
|
|
if (argObj->OperIs(GT_LCL_VAR))
|
|
{
|
|
argObj->SetOper(GT_LCL_FLD);
|
|
}
|
|
lvaSetVarDoNotEnregister(lclNum DEBUGARG(DoNotEnregisterReason::SwizzleArg));
|
|
}
|
|
}
|
|
|
|
assert(varTypeIsEnregisterable(argObj) ||
|
|
(makeOutArgCopy && varTypeIsEnregisterable(structBaseType)));
|
|
}
|
|
else if (argObj->OperIs(GT_LCL_VAR) && lvaGetDesc(argObj->AsLclVar())->lvPromoted)
|
|
{
|
|
// Set DNER to block independent promotion.
|
|
lvaSetVarDoNotEnregister(argObj->AsLclVar()->GetLclNum()
|
|
DEBUGARG(DoNotEnregisterReason::IsStructArg));
|
|
}
|
|
}
|
|
}
|
|
|
|
if (makeOutArgCopy)
|
|
{
|
|
fgMakeOutgoingStructArgCopy(call, &arg);
|
|
|
|
if (arg.GetEarlyNode() != nullptr)
|
|
{
|
|
flagsSummary |= arg.GetEarlyNode()->gtFlags;
|
|
}
|
|
}
|
|
|
|
#if FEATURE_MULTIREG_ARGS
|
|
if (!isStructArg)
|
|
{
|
|
#ifdef TARGET_ARM
|
|
if ((arg.AbiInfo.ArgType == TYP_LONG) || (arg.AbiInfo.ArgType == TYP_DOUBLE))
|
|
{
|
|
assert((arg.AbiInfo.NumRegs == 2) || (arg.AbiInfo.GetStackSlotsNumber() == 2));
|
|
}
|
|
else
|
|
#endif
|
|
{
|
|
// We must have exactly one register or slot.
|
|
assert(((arg.AbiInfo.NumRegs == 1) && (arg.AbiInfo.GetStackSlotsNumber() == 0)) ||
|
|
((arg.AbiInfo.NumRegs == 0) && (arg.AbiInfo.GetStackSlotsNumber() == 1)));
|
|
}
|
|
}
|
|
#endif
|
|
|
|
#if defined(TARGET_X86)
|
|
if (isStructArg)
|
|
{
|
|
if (argx->OperIs(GT_LCL_VAR) &&
|
|
(lvaGetPromotionType(argx->AsLclVar()->GetLclNum()) == PROMOTION_TYPE_INDEPENDENT))
|
|
{
|
|
argx = fgMorphLclArgToFieldlist(argx->AsLclVar());
|
|
arg.SetEarlyNode(argx);
|
|
}
|
|
else if (argx->OperIs(GT_LCL_FLD))
|
|
{
|
|
lvaSetVarDoNotEnregister(argx->AsLclFld()->GetLclNum() DEBUGARG(DoNotEnregisterReason::LocalField));
|
|
}
|
|
}
|
|
#endif // TARGET_X86
|
|
|
|
flagsSummary |= arg.GetEarlyNode()->gtFlags;
|
|
|
|
} // end foreach argument loop
|
|
|
|
if (!reMorphing)
|
|
{
|
|
call->gtArgs.ArgsComplete(this, call);
|
|
}
|
|
|
|
// Process the function address, if indirect call
|
|
|
|
if (call->gtCallType == CT_INDIRECT)
|
|
{
|
|
call->gtCallAddr = fgMorphTree(call->gtCallAddr);
|
|
// Const CSE may create a store node here
|
|
flagsSummary |= call->gtCallAddr->gtFlags;
|
|
}
|
|
|
|
#if FEATURE_FIXED_OUT_ARGS && defined(UNIX_AMD64_ABI)
|
|
if (!call->IsFastTailCall())
|
|
{
|
|
// This is currently required for the UNIX ABI to work correctly.
|
|
opts.compNeedToAlignFrame = true;
|
|
}
|
|
#endif // FEATURE_FIXED_OUT_ARGS && UNIX_AMD64_ABI
|
|
|
|
// Clear the ASG and EXCEPT (if possible) flags on the call node
|
|
call->gtFlags &= ~GTF_ASG;
|
|
if (!call->OperMayThrow(this))
|
|
{
|
|
call->gtFlags &= ~GTF_EXCEPT;
|
|
}
|
|
|
|
// Union in the side effect flags from the call's operands
|
|
call->gtFlags |= flagsSummary & GTF_ALL_EFFECT;
|
|
|
|
// If we are remorphing or don't have any register arguments or other arguments that need
|
|
// temps, then we don't need to call SortArgs() and EvalArgsToTemps().
|
|
//
|
|
if (!reMorphing && (call->gtArgs.HasRegArgs() || call->gtArgs.NeedsTemps()))
|
|
{
|
|
// Do the 'defer or eval to temp' analysis.
|
|
call->gtArgs.EvalArgsToTemps(this, call);
|
|
}
|
|
|
|
if (hasMultiregStructArgs)
|
|
{
|
|
fgMorphMultiregStructArgs(call);
|
|
}
|
|
|
|
#ifdef DEBUG
|
|
if (verbose)
|
|
{
|
|
JITDUMP("Args for [%06u].%s after fgMorphArgs:\n", dspTreeID(call), GenTree::OpName(call->gtOper));
|
|
for (CallArg& arg : call->gtArgs.Args())
|
|
{
|
|
arg.Dump(this);
|
|
}
|
|
printf("OutgoingArgsStackSize is %u\n\n", call->gtArgs.OutgoingArgsStackSize());
|
|
}
|
|
#endif
|
|
return call;
|
|
}
|
|
#ifdef _PREFAST_
|
|
#pragma warning(pop)
|
|
#endif
|
|
|
|
//-----------------------------------------------------------------------------
|
|
// fgMorphMultiregStructArgs: Locate the TYP_STRUCT arguments and
|
|
// call fgMorphMultiregStructArg on each of them.
|
|
//
|
|
// Arguments:
|
|
// call : a GenTreeCall node that has one or more TYP_STRUCT arguments\.
|
|
//
|
|
// Notes:
|
|
// We only call fgMorphMultiregStructArg for struct arguments that are not passed as simple types.
|
|
// It will ensure that the struct arguments are in the correct form.
|
|
// If this method fails to find any TYP_STRUCT arguments it will assert.
|
|
//
|
|
void Compiler::fgMorphMultiregStructArgs(GenTreeCall* call)
|
|
{
|
|
bool foundStructArg = false;
|
|
GenTreeFlags flagsSummary = GTF_EMPTY;
|
|
|
|
#ifdef TARGET_X86
|
|
assert(!"Logic error: no MultiregStructArgs for X86");
|
|
#endif
|
|
#if defined(TARGET_AMD64) && !defined(UNIX_AMD64_ABI)
|
|
assert(!"Logic error: no MultiregStructArgs for Windows X64 ABI");
|
|
#endif
|
|
|
|
for (CallArg& arg : call->gtArgs.Args())
|
|
{
|
|
if ((arg.AbiInfo.ArgType == TYP_STRUCT) && !arg.AbiInfo.PassedByRef)
|
|
{
|
|
GenTree*& argx = (arg.GetLateNode() != nullptr) ? arg.LateNodeRef() : arg.EarlyNodeRef();
|
|
|
|
if (!argx->OperIs(GT_FIELD_LIST))
|
|
{
|
|
argx = fgMorphMultiregStructArg(&arg);
|
|
foundStructArg = true;
|
|
}
|
|
}
|
|
}
|
|
|
|
// We should only call this method when we actually have one or more multireg struct args
|
|
assert(foundStructArg);
|
|
|
|
// Update the flags
|
|
call->gtFlags |= (flagsSummary & GTF_ALL_EFFECT);
|
|
}
|
|
|
|
//-----------------------------------------------------------------------------
|
|
// fgMorphMultiregStructArg: Given a TYP_STRUCT arg from a call argument list,
|
|
// morph the argument as needed to be passed correctly.
|
|
//
|
|
// Arguments:
|
|
// arg - The argument containing a struct node.
|
|
//
|
|
// Notes:
|
|
// The arg node must be a GT_BLK or GT_LCL_VAR or GT_LCL_FLD of TYP_STRUCT.
|
|
// If arg node is a lclVar passed on the stack, we will ensure that any lclVars that must be on the
|
|
// stack are marked as doNotEnregister, and then we return.
|
|
//
|
|
// If it is passed by register, we mutate the argument into the GT_FIELD_LIST form
|
|
// which is only used for struct arguments.
|
|
//
|
|
// If arg is a LclVar we check if it is struct promoted and has the right number of fields
|
|
// and if they are at the appropriate offsets we will use the struct promted fields
|
|
// in the GT_FIELD_LIST nodes that we create.
|
|
// If we have a GT_LCL_VAR that isn't struct promoted or doesn't meet the requirements
|
|
// we will use a set of GT_LCL_FLDs nodes to access the various portions of the struct
|
|
// this also forces the struct to be stack allocated into the local frame.
|
|
// For the GT_BLK case will clone the address expression and generate two (or more)
|
|
// indirections.
|
|
//
|
|
GenTree* Compiler::fgMorphMultiregStructArg(CallArg* arg)
|
|
{
|
|
GenTree* argNode = arg->GetNode();
|
|
assert(varTypeIsStruct(argNode));
|
|
|
|
#if !defined(TARGET_ARMARCH) && !defined(UNIX_AMD64_ABI) && !defined(TARGET_LOONGARCH64) && !defined(TARGET_RISCV64)
|
|
NYI("fgMorphMultiregStructArg requires implementation for this target");
|
|
#endif
|
|
|
|
bool isSplit = arg->NewAbiInfo.IsSplitAcrossRegistersAndStack();
|
|
#ifdef TARGET_ARM
|
|
if ((isSplit && (arg->NewAbiInfo.CountRegsAndStackSlots() > 4)) ||
|
|
(!isSplit && arg->NewAbiInfo.HasAnyStackSegment()))
|
|
#else
|
|
if (!arg->NewAbiInfo.HasAnyRegisterSegment())
|
|
#endif
|
|
{
|
|
if (argNode->OperIs(GT_LCL_VAR) &&
|
|
(lvaGetPromotionType(argNode->AsLclVar()->GetLclNum()) == PROMOTION_TYPE_INDEPENDENT))
|
|
{
|
|
// TODO-Arm-CQ: support decomposing "large" promoted structs into field lists.
|
|
if (!isSplit)
|
|
{
|
|
argNode = fgMorphLclArgToFieldlist(argNode->AsLclVar());
|
|
}
|
|
else
|
|
{
|
|
// Set DNER to block independent promotion.
|
|
lvaSetVarDoNotEnregister(argNode->AsLclVar()->GetLclNum() DEBUGARG(DoNotEnregisterReason::IsStructArg));
|
|
}
|
|
}
|
|
else if (argNode->OperIs(GT_LCL_FLD))
|
|
{
|
|
lvaSetVarDoNotEnregister(argNode->AsLclFld()->GetLclNum() DEBUGARG(DoNotEnregisterReason::LocalField));
|
|
}
|
|
|
|
return argNode;
|
|
}
|
|
|
|
#if FEATURE_MULTIREG_ARGS
|
|
ClassLayout* layout = argNode->TypeIs(TYP_STRUCT) ? argNode->GetLayout(this) : nullptr;
|
|
unsigned structSize = argNode->TypeIs(TYP_STRUCT) ? layout->GetSize() : genTypeSize(argNode);
|
|
|
|
if (layout != nullptr)
|
|
{
|
|
assert(ClassLayout::AreCompatible(typGetObjLayout(arg->GetSignatureClassHandle()), layout));
|
|
}
|
|
else
|
|
{
|
|
assert(varTypeIsSIMD(argNode) && varTypeIsSIMD(arg->GetSignatureType()));
|
|
}
|
|
|
|
// We should still have a TYP_STRUCT
|
|
assert(varTypeIsStruct(argNode));
|
|
|
|
GenTreeFieldList* newArg = nullptr;
|
|
|
|
// Are we passing a struct LclVar?
|
|
//
|
|
if (argNode->OperIs(GT_LCL_VAR))
|
|
{
|
|
GenTreeLclVarCommon* lclNode = argNode->AsLclVarCommon();
|
|
unsigned lclNum = lclNode->GetLclNum();
|
|
LclVarDsc* varDsc = lvaGetDesc(lclNum);
|
|
|
|
varDsc->lvIsMultiRegArg = true;
|
|
|
|
JITDUMP("Multireg struct argument V%02u : ", lclNum);
|
|
JITDUMPEXEC(arg->Dump(this));
|
|
|
|
// Try to see if we can use the promoted fields to pass this argument.
|
|
//
|
|
if (varDsc->lvPromoted && (varDsc->lvFieldCnt == arg->NewAbiInfo.CountRegsAndStackSlots()))
|
|
{
|
|
bool fieldsMatch = true;
|
|
|
|
for (unsigned i = 0; i < arg->NewAbiInfo.NumSegments; i++)
|
|
{
|
|
const ABIPassingSegment& seg = arg->NewAbiInfo.Segment(i);
|
|
if (seg.IsPassedInRegister())
|
|
{
|
|
unsigned fieldLclNum = lvaGetFieldLocal(varDsc, seg.Offset);
|
|
if (fieldLclNum == BAD_VAR_NUM)
|
|
{
|
|
fieldsMatch = false;
|
|
break;
|
|
}
|
|
|
|
var_types fieldType = lvaGetDesc(fieldLclNum)->TypeGet();
|
|
var_types regType = genActualType(seg.GetRegisterType());
|
|
|
|
if (!varTypeUsesSameRegType(fieldType, regType))
|
|
{
|
|
// TODO-CQ: We should be able to tolerate mismatches by inserting GT_BITCAST in lowering.
|
|
//
|
|
JITDUMP("Multireg struct V%02u will be passed using GT_LCL_FLD because of type mismatch: "
|
|
"register type is %s, field local V%02u's type is %s\n",
|
|
lclNum, varTypeName(regType), fieldLclNum, varTypeName(fieldType));
|
|
fieldsMatch = false;
|
|
break;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
for (unsigned offset = 0; offset < seg.Size; offset += TARGET_POINTER_SIZE)
|
|
{
|
|
if (lvaGetFieldLocal(varDsc, seg.Offset + offset) == BAD_VAR_NUM)
|
|
{
|
|
fieldsMatch = false;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if (fieldsMatch)
|
|
{
|
|
newArg = fgMorphLclArgToFieldlist(lclNode);
|
|
}
|
|
}
|
|
}
|
|
|
|
// If we were not able to use the promoted fields...
|
|
//
|
|
if (newArg == nullptr)
|
|
{
|
|
if (!arg->NewAbiInfo.HasAnyRegisterSegment())
|
|
{
|
|
// We leave this stack passed argument alone.
|
|
return argNode;
|
|
}
|
|
|
|
auto createSlotAccess = [=](unsigned offset, var_types type) -> GenTree* {
|
|
assert(offset < structSize);
|
|
|
|
if (type == TYP_UNDEF)
|
|
{
|
|
if ((structSize - offset) < TARGET_POINTER_SIZE)
|
|
{
|
|
// ArgsComplete has made it so that for loads from memory
|
|
// we will only see the easily handleable cases here, For
|
|
// locals we may see odd sizes, but for those we can load
|
|
// "too much" from the stack frame, and thus can just round
|
|
// up the size.
|
|
assert(isPow2(structSize - offset) || argNode->OperIsLocalRead());
|
|
|
|
switch (structSize - offset)
|
|
{
|
|
case 1:
|
|
type = TYP_UBYTE;
|
|
break;
|
|
case 2:
|
|
type = TYP_USHORT;
|
|
break;
|
|
case 3:
|
|
case 4:
|
|
type = TYP_INT;
|
|
break;
|
|
case 5:
|
|
case 6:
|
|
case 7:
|
|
case 8:
|
|
type = TYP_LONG;
|
|
break;
|
|
default:
|
|
unreached();
|
|
}
|
|
|
|
#ifdef TARGET_ARM64
|
|
if (argNode->OperIsLocalRead())
|
|
{
|
|
// For arm64 it's beneficial to consider all tails to
|
|
// be TYP_I_IMPL to allow more ldp's.
|
|
type = TYP_I_IMPL;
|
|
}
|
|
#endif
|
|
}
|
|
else if ((layout != nullptr) && ((offset % TARGET_POINTER_SIZE) == 0))
|
|
{
|
|
type = layout->GetGCPtrType(offset / TARGET_POINTER_SIZE);
|
|
}
|
|
else
|
|
{
|
|
type = TYP_I_IMPL;
|
|
}
|
|
}
|
|
|
|
if (argNode->OperIsLocalRead())
|
|
{
|
|
GenTreeLclFld* lclFld = gtNewLclFldNode(argNode->AsLclVarCommon()->GetLclNum(), genActualType(type),
|
|
argNode->AsLclVarCommon()->GetLclOffs() + offset);
|
|
return lclFld;
|
|
}
|
|
else
|
|
{
|
|
assert(argNode->OperIsLoad());
|
|
GenTree* indirAddr = argNode->AsIndir()->Addr();
|
|
GenTree* addr;
|
|
|
|
if (offset == 0)
|
|
{
|
|
addr = indirAddr;
|
|
}
|
|
else
|
|
{
|
|
assert((indirAddr->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) == 0);
|
|
|
|
GenTree* indirAddrDup = gtCloneExpr(indirAddr);
|
|
GenTree* offsetNode = gtNewIconNode(offset, TYP_I_IMPL);
|
|
addr = gtNewOperNode(GT_ADD, indirAddr->TypeGet(), indirAddrDup, offsetNode);
|
|
}
|
|
|
|
GenTree* indir = gtNewIndir(type, addr);
|
|
return indir;
|
|
}
|
|
};
|
|
|
|
newArg = new (this, GT_FIELD_LIST) GenTreeFieldList();
|
|
|
|
for (unsigned i = 0; i < arg->NewAbiInfo.NumSegments; i++)
|
|
{
|
|
const ABIPassingSegment& seg = arg->NewAbiInfo.Segment(i);
|
|
if (seg.IsPassedInRegister())
|
|
{
|
|
var_types regType = seg.GetRegisterType();
|
|
// If passed in a float reg then keep that type; otherwise let
|
|
// createSlotAccess get the type from the layout.
|
|
var_types slotType = varTypeUsesFloatReg(regType) ? regType : TYP_UNDEF;
|
|
GenTree* access = createSlotAccess(seg.Offset, slotType);
|
|
newArg->AddField(this, access, seg.Offset, access->TypeGet());
|
|
}
|
|
else
|
|
{
|
|
for (unsigned slotOffset = 0; slotOffset < seg.Size; slotOffset += TARGET_POINTER_SIZE)
|
|
{
|
|
unsigned layoutOffset = seg.Offset + slotOffset;
|
|
GenTree* access = createSlotAccess(layoutOffset, TYP_UNDEF);
|
|
newArg->AddField(this, access, layoutOffset, access->TypeGet());
|
|
}
|
|
}
|
|
}
|
|
|
|
if (argNode->OperIsLocalRead())
|
|
{
|
|
lvaSetVarDoNotEnregister(argNode->AsLclVarCommon()->GetLclNum()
|
|
DEBUGARG(DoNotEnregisterReason::LocalField));
|
|
}
|
|
}
|
|
|
|
// If we reach here we should have set newArg to something
|
|
noway_assert(newArg != nullptr);
|
|
|
|
JITDUMP("fgMorphMultiregStructArg created tree:\n");
|
|
DISPTREE(newArg);
|
|
|
|
argNode = newArg; // consider calling fgMorphTree(newArg);
|
|
|
|
#endif // FEATURE_MULTIREG_ARGS
|
|
|
|
return argNode;
|
|
}
|
|
|
|
//------------------------------------------------------------------------
|
|
// fgMorphLclArgToFieldlist: Morph a GT_LCL_VAR node to a GT_FIELD_LIST of its promoted fields
|
|
//
|
|
// Arguments:
|
|
// lcl - The GT_LCL_VAR node we will transform
|
|
//
|
|
// Return value:
|
|
// The new GT_FIELD_LIST that we have created.
|
|
//
|
|
GenTreeFieldList* Compiler::fgMorphLclArgToFieldlist(GenTreeLclVarCommon* lcl)
|
|
{
|
|
LclVarDsc* varDsc = lvaGetDesc(lcl);
|
|
assert(varDsc->lvPromoted);
|
|
unsigned fieldCount = varDsc->lvFieldCnt;
|
|
unsigned fieldLclNum = varDsc->lvFieldLclStart;
|
|
|
|
GenTreeFieldList* fieldList = new (this, GT_FIELD_LIST) GenTreeFieldList();
|
|
for (unsigned i = 0; i < fieldCount; i++)
|
|
{
|
|
LclVarDsc* fieldVarDsc = lvaGetDesc(fieldLclNum);
|
|
GenTree* lclVar = gtNewLclvNode(fieldLclNum, fieldVarDsc->TypeGet());
|
|
fieldList->AddField(this, lclVar, fieldVarDsc->lvFldOffset, fieldVarDsc->TypeGet());
|
|
fieldLclNum++;
|
|
}
|
|
return fieldList;
|
|
}
|
|
|
|
//------------------------------------------------------------------------
|
|
// fgMakeOutgoingStructArgCopy: make a copy of a struct variable if necessary,
|
|
// to pass to a callee.
|
|
//
|
|
// Arguments:
|
|
// call - call being processed
|
|
// arg - arg for the call
|
|
//
|
|
// The arg is updated if necessary with the copy.
|
|
//
|
|
void Compiler::fgMakeOutgoingStructArgCopy(GenTreeCall* call, CallArg* arg)
|
|
{
|
|
GenTree* argx = arg->GetEarlyNode();
|
|
|
|
#if FEATURE_IMPLICIT_BYREFS
|
|
// If we're optimizing, see if we can avoid making a copy.
|
|
//
|
|
// We don't need a copy if this is the last use of the local.
|
|
//
|
|
if (opts.OptimizationEnabled() && arg->AbiInfo.PassedByRef)
|
|
{
|
|
GenTree* implicitByRefLclAddr;
|
|
target_ssize_t implicitByRefLclOffs;
|
|
GenTreeLclVarCommon* implicitByRefLcl =
|
|
argx->IsImplicitByrefParameterValuePostMorph(this, &implicitByRefLclAddr, &implicitByRefLclOffs);
|
|
|
|
GenTreeLclVarCommon* lcl = implicitByRefLcl;
|
|
if ((lcl == nullptr) && argx->OperIsLocal())
|
|
{
|
|
lcl = argx->AsLclVarCommon();
|
|
implicitByRefLclOffs = lcl->GetLclOffs();
|
|
}
|
|
|
|
if (lcl != nullptr)
|
|
{
|
|
const unsigned varNum = lcl->GetLclNum();
|
|
LclVarDsc* const varDsc = lvaGetDesc(varNum);
|
|
|
|
// We generally use liveness to figure out if we can omit creating
|
|
// this copy. However, even without liveness (e.g. due to too many
|
|
// tracked locals), we also handle some other cases:
|
|
//
|
|
// * (must not copy) If the call is a tail call, the use is a last use.
|
|
// We must skip the copy if we have a fast tail call.
|
|
//
|
|
// * (may not copy) if the call is noreturn, the use is a last use.
|
|
// We also check for just one reference here as we are not doing
|
|
// alias analysis of the call's parameters, or checking if the call
|
|
// site is not within some try region.
|
|
//
|
|
bool omitCopy = call->IsTailCall();
|
|
|
|
if (!omitCopy && fgGlobalMorph)
|
|
{
|
|
omitCopy = (varDsc->lvIsLastUseCopyOmissionCandidate || (implicitByRefLcl != nullptr)) &&
|
|
!varDsc->lvPromoted && !varDsc->lvIsStructField && ((lcl->gtFlags & GTF_VAR_DEATH) != 0);
|
|
}
|
|
|
|
// Disallow the argument from potentially aliasing the return
|
|
// buffer.
|
|
if (omitCopy)
|
|
{
|
|
GenTreeLclVarCommon* retBuffer = gtCallGetDefinedRetBufLclAddr(call);
|
|
if ((retBuffer != nullptr) && (retBuffer->GetLclNum() == varNum))
|
|
{
|
|
unsigned retBufferSize = typGetObjLayout(call->gtRetClsHnd)->GetSize();
|
|
target_ssize_t retBufferStart = retBuffer->GetLclOffs();
|
|
target_ssize_t retBufferEnd = retBufferStart + static_cast<target_ssize_t>(retBufferSize);
|
|
|
|
unsigned argSize = arg->GetSignatureType() == TYP_STRUCT
|
|
? typGetObjLayout(arg->GetSignatureClassHandle())->GetSize()
|
|
: genTypeSize(arg->GetSignatureType());
|
|
target_ssize_t implByrefStart = implicitByRefLclOffs;
|
|
target_ssize_t implByrefEnd = implByrefStart + static_cast<target_ssize_t>(argSize);
|
|
|
|
bool disjoint = (retBufferEnd <= implByrefStart) || (implByrefEnd <= retBufferStart);
|
|
omitCopy = disjoint;
|
|
}
|
|
}
|
|
|
|
if (omitCopy)
|
|
{
|
|
if (implicitByRefLcl != nullptr)
|
|
{
|
|
arg->SetEarlyNode(implicitByRefLclAddr);
|
|
}
|
|
else
|
|
{
|
|
uint16_t offs = lcl->GetLclOffs();
|
|
lcl->ChangeOper(GT_LCL_ADDR);
|
|
lcl->AsLclFld()->SetLclOffs(offs);
|
|
lcl->gtType = TYP_I_IMPL;
|
|
lcl->gtFlags &= ~GTF_ALL_EFFECT;
|
|
lvaSetVarAddrExposed(varNum DEBUGARG(AddressExposedReason::ESCAPE_ADDRESS));
|
|
|
|
// Copy prop could allow creating another later use of lcl if there are live assertions about it.
|
|
fgKillDependentAssertions(varNum DEBUGARG(lcl));
|
|
}
|
|
|
|
JITDUMP("did not need to make outgoing copy for last use of V%02d\n", varNum);
|
|
return;
|
|
}
|
|
}
|
|
}
|
|
#endif
|
|
|
|
JITDUMP("making an outgoing copy for struct arg\n");
|
|
assert(!call->IsTailCall() || !arg->AbiInfo.PassedByRef);
|
|
|
|
CORINFO_CLASS_HANDLE copyBlkClass = arg->GetSignatureClassHandle();
|
|
unsigned tmp = 0;
|
|
bool found = false;
|
|
|
|
// Attempt to find a local we have already used for an outgoing struct and reuse it.
|
|
// We do not reuse within a statement.
|
|
if (!opts.MinOpts())
|
|
{
|
|
found = ForEachHbvBitSet(*fgAvailableOutgoingArgTemps, [&](indexType lclNum) {
|
|
LclVarDsc* varDsc = lvaGetDesc((unsigned)lclNum);
|
|
ClassLayout* layout = varDsc->GetLayout();
|
|
if (!layout->IsBlockLayout() && (layout->GetClassHandle() == copyBlkClass))
|
|
{
|
|
tmp = (unsigned)lclNum;
|
|
JITDUMP("reusing outgoing struct arg V%02u\n", tmp);
|
|
fgAvailableOutgoingArgTemps->clearBit(lclNum);
|
|
return HbvWalk::Abort;
|
|
}
|
|
|
|
return HbvWalk::Continue;
|
|
}) == HbvWalk::Abort;
|
|
}
|
|
|
|
// Create the CopyBlk tree and insert it.
|
|
if (!found)
|
|
{
|
|
// Get a new temp
|
|
// Here We don't need unsafe value cls check, since the addr of this temp is used only in copyblk.
|
|
tmp = lvaGrabTemp(true DEBUGARG("by-value struct argument"));
|
|
lvaSetStruct(tmp, copyBlkClass, false);
|
|
if (call->IsVarargs())
|
|
{
|
|
lvaSetStructUsedAsVarArg(tmp);
|
|
}
|
|
}
|
|
|
|
if (fgUsedSharedTemps != nullptr)
|
|
{
|
|
fgUsedSharedTemps->Push(tmp);
|
|
}
|
|
else
|
|
{
|
|
assert(!fgGlobalMorph);
|
|
}
|
|
|
|
call->gtArgs.SetNeedsTemp(arg);
|
|
|
|
// Copy the valuetype to the temp
|
|
GenTree* copyBlk = gtNewStoreLclVarNode(tmp, argx);
|
|
copyBlk = fgMorphCopyBlock(copyBlk);
|
|
|
|
#if FEATURE_FIXED_OUT_ARGS
|
|
|
|
// For fixed out args we create the setup node here; EvalArgsToTemps knows
|
|
// to handle the case of "already have a setup node" properly.
|
|
arg->SetEarlyNode(copyBlk);
|
|
arg->SetLateNode(call->gtArgs.MakeTmpArgNode(this, arg, tmp));
|
|
|
|
#else // !FEATURE_FIXED_OUT_ARGS
|
|
|
|
// Structs are always on the stack, and thus never need temps
|
|
// so we have to put the copy and temp all into one expression.
|
|
GenTree* argNode = call->gtArgs.MakeTmpArgNode(this, arg, tmp);
|
|
|
|
// Change the expression to "(tmp=val),tmp"
|
|
argNode = gtNewOperNode(GT_COMMA, argNode->TypeGet(), copyBlk, argNode);
|
|
|
|
arg->SetEarlyNode(argNode);
|
|
|
|
#endif // !FEATURE_FIXED_OUT_ARGS
|
|
}
|
|
|
|
/*****************************************************************************
|
|
*
|
|
* A little helper used to rearrange nested commutative operations. The
|
|
* effect is that nested associative, commutative operations are transformed
|
|
* into a 'left-deep' tree, i.e. into something like this:
|
|
*
|
|
* (((a op b) op c) op d) op...
|
|
*/
|
|
|
|
#if REARRANGE_ADDS
|
|
|
|
void Compiler::fgMoveOpsLeft(GenTree* tree)
|
|
{
|
|
GenTree* op1;
|
|
GenTree* op2;
|
|
genTreeOps oper;
|
|
|
|
do
|
|
{
|
|
op1 = tree->AsOp()->gtOp1;
|
|
op2 = tree->AsOp()->gtOp2;
|
|
oper = tree->OperGet();
|
|
|
|
noway_assert(GenTree::OperIsCommutative(oper));
|
|
noway_assert(oper == GT_ADD || oper == GT_XOR || oper == GT_OR || oper == GT_AND || oper == GT_MUL);
|
|
noway_assert(!varTypeIsFloating(tree->TypeGet()) || !opts.genFPorder);
|
|
noway_assert(oper == op2->gtOper);
|
|
|
|
// Commutativity doesn't hold if overflow checks are needed
|
|
|
|
if (tree->gtOverflowEx() || op2->gtOverflowEx())
|
|
{
|
|
return;
|
|
}
|
|
|
|
if (gtIsActiveCSE_Candidate(op2))
|
|
{
|
|
// If we have marked op2 as a CSE candidate,
|
|
// we can't perform a commutative reordering
|
|
// because any value numbers that we computed for op2
|
|
// will be incorrect after performing a commutative reordering
|
|
//
|
|
return;
|
|
}
|
|
|
|
if (oper == GT_MUL && (op2->gtFlags & GTF_MUL_64RSLT))
|
|
{
|
|
return;
|
|
}
|
|
|
|
// Check for GTF_ADDRMODE_NO_CSE flag on add/mul Binary Operators
|
|
if (((oper == GT_ADD) || (oper == GT_MUL)) && ((tree->gtFlags & GTF_ADDRMODE_NO_CSE) != 0))
|
|
{
|
|
return;
|
|
}
|
|
|
|
noway_assert(!tree->gtOverflowEx() && !op2->gtOverflowEx());
|
|
|
|
GenTree* ad1 = op2->AsOp()->gtOp1;
|
|
GenTree* ad2 = op2->AsOp()->gtOp2;
|
|
|
|
// Compiler::optOptimizeBools() can create GT_OR of two GC pointers yielding a GT_INT
|
|
// We can not reorder such GT_OR trees
|
|
//
|
|
if (varTypeIsGC(ad1->TypeGet()) != varTypeIsGC(op2->TypeGet()))
|
|
{
|
|
break;
|
|
}
|
|
|
|
// Don't split up a byref calculation and create a new byref. E.g.,
|
|
// [byref]+ (ref, [int]+ (int, int)) => [byref]+ ([byref]+ (ref, int), int).
|
|
// Doing this transformation could create a situation where the first
|
|
// addition (that is, [byref]+ (ref, int) ) creates a byref pointer that
|
|
// no longer points within the ref object. If a GC happens, the byref won't
|
|
// get updated. This can happen, for instance, if one of the int components
|
|
// is negative. It also requires the address generation be in a fully-interruptible
|
|
// code region.
|
|
//
|
|
if (varTypeIsGC(op1->TypeGet()) && op2->TypeGet() == TYP_I_IMPL)
|
|
{
|
|
assert(varTypeIsGC(tree->TypeGet()) && (oper == GT_ADD));
|
|
break;
|
|
}
|
|
|
|
/* Change "(x op (y op z))" to "(x op y) op z" */
|
|
/* ie. "(op1 op (ad1 op ad2))" to "(op1 op ad1) op ad2" */
|
|
|
|
GenTree* new_op1 = op2;
|
|
|
|
new_op1->AsOp()->gtOp1 = op1;
|
|
new_op1->AsOp()->gtOp2 = ad1;
|
|
|
|
/* Change the flags. */
|
|
|
|
// Make sure we arent throwing away any flags
|
|
noway_assert((new_op1->gtFlags &
|
|
~(GTF_MAKE_CSE | GTF_DONT_CSE | // It is ok that new_op1->gtFlags contains GTF_DONT_CSE flag.
|
|
GTF_REVERSE_OPS | // The reverse ops flag also can be set, it will be re-calculated
|
|
GTF_NODE_MASK | GTF_ALL_EFFECT | GTF_UNSIGNED)) == 0);
|
|
|
|
new_op1->gtFlags =
|
|
(new_op1->gtFlags & (GTF_NODE_MASK | GTF_DONT_CSE)) | // Make sure we propagate GTF_DONT_CSE flag.
|
|
(op1->gtFlags & GTF_ALL_EFFECT) | (ad1->gtFlags & GTF_ALL_EFFECT);
|
|
|
|
/* Retype new_op1 if it has not/become a GC ptr. */
|
|
|
|
if (varTypeIsGC(op1->TypeGet()))
|
|
{
|
|
noway_assert((varTypeIsGC(tree->TypeGet()) && op2->TypeGet() == TYP_I_IMPL &&
|
|
oper == GT_ADD) || // byref(ref + (int+int))
|
|
(varTypeIsI(tree->TypeGet()) && op2->TypeGet() == TYP_I_IMPL &&
|
|
oper == GT_OR)); // int(gcref | int(gcref|intval))
|
|
|
|
new_op1->gtType = tree->gtType;
|
|
}
|
|
else if (varTypeIsGC(ad2->TypeGet()))
|
|
{
|
|
// Neither ad1 nor op1 are GC. So new_op1 isnt either
|
|
noway_assert(op1->gtType == TYP_I_IMPL && ad1->gtType == TYP_I_IMPL);
|
|
new_op1->gtType = TYP_I_IMPL;
|
|
}
|
|
|
|
// If new_op1 is a new expression. Assign it a new unique value number.
|
|
// vnStore is null before the ValueNumber phase has run
|
|
if (vnStore != nullptr)
|
|
{
|
|
// We can only keep the old value number on new_op1 if both op1 and ad2
|
|
// have the same non-NoVN value numbers. Since op is commutative, comparing
|
|
// only ad2 and op1 is enough.
|
|
if ((op1->gtVNPair.GetLiberal() == ValueNumStore::NoVN) ||
|
|
(ad2->gtVNPair.GetLiberal() == ValueNumStore::NoVN) ||
|
|
(ad2->gtVNPair.GetLiberal() != op1->gtVNPair.GetLiberal()))
|
|
{
|
|
new_op1->gtVNPair.SetBoth(vnStore->VNForExpr(nullptr, new_op1->TypeGet()));
|
|
}
|
|
}
|
|
|
|
tree->AsOp()->gtOp1 = new_op1;
|
|
tree->AsOp()->gtOp2 = ad2;
|
|
|
|
/* If 'new_op1' is now the same nested op, process it recursively */
|
|
|
|
if ((ad1->gtOper == oper) && !ad1->gtOverflowEx())
|
|
{
|
|
fgMoveOpsLeft(new_op1);
|
|
}
|
|
|
|
/* If 'ad2' is now the same nested op, process it
|
|
* Instead of recursion, we set up op1 and op2 for the next loop.
|
|
*/
|
|
|
|
op1 = new_op1;
|
|
op2 = ad2;
|
|
} while ((op2->gtOper == oper) && !op2->gtOverflowEx());
|
|
|
|
return;
|
|
}
|
|
|
|
#endif
|
|
|
|
//------------------------------------------------------------------------
|
|
// fgMorphIndexAddr: Expand a GT_INDEX_ADDR node and fully morph the child operands.
|
|
//
|
|
// We expand the GT_INDEX_ADDR node into a larger tree that evaluates the array
|
|
// base and index. The simplest expansion is a GT_COMMA with a GT_BOUNDS_CHECK.
|
|
// For complex array or index expressions one or more GT_COMMA stores are inserted
|
|
// so that we only evaluate the array or index expressions once.
|
|
//
|
|
// The fully expanded tree is then morphed. This causes gtFoldExpr to
|
|
// perform local constant prop and reorder the constants in the tree and
|
|
// fold them.
|
|
//
|
|
// Arguments:
|
|
// indexAddr - The INDEX_ADRR tree to morph
|
|
//
|
|
// Return Value:
|
|
// The resulting tree.
|
|
//
|
|
GenTree* Compiler::fgMorphIndexAddr(GenTreeIndexAddr* indexAddr)
|
|
{
|
|
const int MAX_ARR_COMPLEXITY = 4;
|
|
const int MAX_INDEX_COMPLEXITY = 4;
|
|
|
|
var_types elemTyp = indexAddr->gtElemType;
|
|
unsigned elemSize = indexAddr->gtElemSize;
|
|
uint8_t elemOffs = static_cast<uint8_t>(indexAddr->gtElemOffset);
|
|
CORINFO_CLASS_HANDLE elemStructType = indexAddr->gtStructElemClass;
|
|
|
|
noway_assert(!varTypeIsStruct(elemTyp) || (elemStructType != NO_CLASS_HANDLE));
|
|
|
|
// In minopts, we will not be expanding GT_INDEX_ADDR in order to minimize the size of the IR. As minopts
|
|
// compilation time is roughly proportional to the size of the IR, this helps keep compilation times down.
|
|
// Furthermore, this representation typically saves on code size in minopts w.r.t. the complete expansion
|
|
// performed when optimizing, as it does not require LclVar nodes (which are always stack loads/stores in
|
|
// minopts).
|
|
//
|
|
// When we *are* optimizing, we fully expand GT_INDEX_ADDR to:
|
|
// 1. Evaluate the array address expression and store the result in a temp if the expression is complex or
|
|
// side-effecting.
|
|
// 2. Evaluate the array index expression and store the result in a temp if the expression is complex or
|
|
// side-effecting.
|
|
// 3. Perform an explicit bounds check: GT_BOUNDS_CHECK(index, GT_ARR_LENGTH(array))
|
|
// 4. Compute the address of the element that will be accessed:
|
|
// GT_ADD(GT_ADD(array, firstElementOffset), GT_MUL(index, elementSize)) OR
|
|
// GT_ADD(GT_ADD(array, GT_ADD(GT_MUL(index, elementSize), firstElementOffset)))
|
|
// 5. Wrap the address in a GT_ADD_ADDR (the information saved there will later be used by VN).
|
|
//
|
|
// This expansion explicitly exposes the bounds check and the address calculation to the optimizer, which allows
|
|
// for more straightforward bounds-check removal, CSE, etc.
|
|
if (opts.MinOpts())
|
|
{
|
|
indexAddr->Arr() = fgMorphTree(indexAddr->Arr());
|
|
indexAddr->Index() = fgMorphTree(indexAddr->Index());
|
|
indexAddr->AddAllEffectsFlags(indexAddr->Arr(), indexAddr->Index());
|
|
|
|
// Mark the indirection node as needing a range check if necessary.
|
|
// Note this will always be true unless JitSkipArrayBoundCheck() is used
|
|
if (indexAddr->IsBoundsChecked())
|
|
{
|
|
fgAddCodeRef(compCurBB, SCK_RNGCHK_FAIL);
|
|
}
|
|
|
|
return indexAddr;
|
|
}
|
|
|
|
#ifdef FEATURE_SIMD
|
|
if (varTypeIsStruct(elemTyp) && structSizeMightRepresentSIMDType(elemSize))
|
|
{
|
|
elemTyp = impNormStructType(elemStructType);
|
|
}
|
|
#endif // FEATURE_SIMD
|
|
|
|
// TODO-CQ: support precise equivalence classes for SIMD-typed arrays in VN.
|
|
if (elemTyp != TYP_STRUCT)
|
|
{
|
|
elemStructType = NO_CLASS_HANDLE;
|
|
}
|
|
|
|
GenTree* arrRef = indexAddr->Arr();
|
|
GenTree* index = indexAddr->Index();
|
|
GenTree* arrRefDefn = nullptr; // non-NULL if we need to allocate a temp for the arrRef expression
|
|
GenTree* indexDefn = nullptr; // non-NULL if we need to allocate a temp for the index expression
|
|
GenTreeBoundsChk* boundsCheck = nullptr;
|
|
|
|
// If we're doing range checking, introduce a GT_BOUNDS_CHECK node for the address.
|
|
if (indexAddr->IsBoundsChecked())
|
|
{
|
|
GenTree* arrRef2 = nullptr; // The second copy will be used in array address expression
|
|
GenTree* index2 = nullptr;
|
|
|
|
// If the arrRef or index expressions involves a store, a call, or reads from global memory,
|
|
// then we *must* allocate a temporary in which to "localize" those values, to ensure that the
|
|
// same values are used in the bounds check and the actual dereference.
|
|
// Also we allocate the temporary when the expression is sufficiently complex/expensive. We special
|
|
// case some simple nodes for which CQ analysis shows it is a little better to do that here than
|
|
// leaving them to CSE.
|
|
//
|
|
// TODO-Bug: GLOB_REF is not yet set for all trees in pre-order morph.
|
|
//
|
|
if (((arrRef->gtFlags & (GTF_ASG | GTF_CALL | GTF_GLOB_REF)) != 0) ||
|
|
gtComplexityExceeds(arrRef, MAX_ARR_COMPLEXITY) || arrRef->OperIs(GT_LCL_FLD) ||
|
|
(arrRef->OperIs(GT_LCL_VAR) && lvaIsLocalImplicitlyAccessedByRef(arrRef->AsLclVar()->GetLclNum())))
|
|
{
|
|
unsigned arrRefTmpNum = lvaGrabTemp(true DEBUGARG("arr expr"));
|
|
arrRefDefn = gtNewTempStore(arrRefTmpNum, arrRef);
|
|
arrRef = gtNewLclvNode(arrRefTmpNum, lvaGetDesc(arrRefTmpNum)->TypeGet());
|
|
arrRef2 = gtNewLclvNode(arrRefTmpNum, lvaGetDesc(arrRefTmpNum)->TypeGet());
|
|
}
|
|
else
|
|
{
|
|
arrRef2 = gtCloneExpr(arrRef);
|
|
noway_assert(arrRef2 != nullptr);
|
|
}
|
|
|
|
if (((index->gtFlags & (GTF_ASG | GTF_CALL | GTF_GLOB_REF)) != 0) ||
|
|
gtComplexityExceeds(index, MAX_ARR_COMPLEXITY) || index->OperIs(GT_LCL_FLD) ||
|
|
(index->OperIs(GT_LCL_VAR) && lvaIsLocalImplicitlyAccessedByRef(index->AsLclVar()->GetLclNum())))
|
|
{
|
|
unsigned indexTmpNum = lvaGrabTemp(true DEBUGARG("index expr"));
|
|
indexDefn = gtNewTempStore(indexTmpNum, index);
|
|
index = gtNewLclvNode(indexTmpNum, lvaGetDesc(indexTmpNum)->TypeGet());
|
|
index2 = gtNewLclvNode(indexTmpNum, lvaGetDesc(indexTmpNum)->TypeGet());
|
|
}
|
|
else
|
|
{
|
|
index2 = gtCloneExpr(index);
|
|
noway_assert(index2 != nullptr);
|
|
}
|
|
|
|
// Next introduce a GT_BOUNDS_CHECK node
|
|
var_types bndsChkType = TYP_INT; // By default, try to use 32-bit comparison for array bounds check.
|
|
|
|
#ifdef TARGET_64BIT
|
|
// The CLI Spec allows an array to be indexed by either an int32 or a native int. In the case
|
|
// of a 64 bit architecture this means the array index can potentially be a TYP_LONG, so for this case,
|
|
// the comparison will have to be widened to 64 bits.
|
|
if (index->TypeGet() == TYP_I_IMPL)
|
|
{
|
|
bndsChkType = TYP_I_IMPL;
|
|
}
|
|
#endif // TARGET_64BIT
|
|
|
|
GenTree* arrLen = gtNewArrLen(TYP_INT, arrRef, (int)indexAddr->gtLenOffset, compCurBB);
|
|
|
|
if (bndsChkType != TYP_INT)
|
|
{
|
|
arrLen = gtNewCastNode(bndsChkType, arrLen, true, bndsChkType);
|
|
}
|
|
|
|
boundsCheck = new (this, GT_BOUNDS_CHECK) GenTreeBoundsChk(index, arrLen, SCK_RNGCHK_FAIL);
|
|
boundsCheck->gtInxType = elemTyp;
|
|
|
|
// Now we'll switch to using the second copies for arrRef and index
|
|
// to compute the address expression
|
|
arrRef = arrRef2;
|
|
index = index2;
|
|
}
|
|
|
|
// Create the "addr" which is "*(arrRef + ((index * elemSize) + elemOffs))"
|
|
GenTree* addr;
|
|
|
|
#ifdef TARGET_64BIT
|
|
// Widen 'index' on 64-bit targets
|
|
if (index->TypeGet() != TYP_I_IMPL)
|
|
{
|
|
if (index->OperGet() == GT_CNS_INT)
|
|
{
|
|
index->gtType = TYP_I_IMPL;
|
|
}
|
|
else
|
|
{
|
|
index = gtNewCastNode(TYP_I_IMPL, index, true, TYP_I_IMPL);
|
|
}
|
|
}
|
|
#endif // TARGET_64BIT
|
|
|
|
/* Scale the index value if necessary */
|
|
if (elemSize > 1)
|
|
{
|
|
GenTree* size = gtNewIconNode(elemSize, TYP_I_IMPL);
|
|
|
|
// Fix 392756 WP7 Crossgen
|
|
//
|
|
// During codegen optGetArrayRefScaleAndIndex() makes the assumption that op2 of a GT_MUL node
|
|
// is a constant and is not capable of handling CSE'ing the elemSize constant into a lclvar.
|
|
// Hence to prevent the constant from becoming a CSE we mark it as NO_CSE.
|
|
//
|
|
size->gtFlags |= GTF_DONT_CSE;
|
|
|
|
/* Multiply by the array element size */
|
|
addr = gtNewOperNode(GT_MUL, TYP_I_IMPL, index, size);
|
|
}
|
|
else
|
|
{
|
|
addr = index;
|
|
}
|
|
|
|
// Be careful to only create the byref pointer when the full index expression is added to the array reference.
|
|
// We don't want to create a partial byref address expression that doesn't include the full index offset:
|
|
// a byref must point within the containing object. It is dangerous (especially when optimizations come into
|
|
// play) to create a "partial" byref that doesn't point exactly to the correct object; there is risk that
|
|
// the partial byref will not point within the object, and thus not get updated correctly during a GC.
|
|
// This is mostly a risk in fully-interruptible code regions.
|
|
|
|
// We can generate two types of trees for "addr":
|
|
//
|
|
// 1) "arrRef + (index + elemOffset)"
|
|
// 2) "(arrRef + elemOffset) + index"
|
|
//
|
|
// XArch has powerful addressing modes such as [base + index*scale + offset] so it's fine with 1),
|
|
// while for Arm we better try to make an invariant sub-tree as large as possible, which is usually
|
|
// "(arrRef + elemOffset)" and is CSE/LoopHoisting friendly => produces better codegen.
|
|
// 2) should still be safe from GC's point of view since both ADD operations are byref and point to
|
|
// within the object so GC will be able to correctly track and update them.
|
|
|
|
bool groupArrayRefWithElemOffset = false;
|
|
#ifdef TARGET_ARMARCH
|
|
groupArrayRefWithElemOffset = true;
|
|
// TODO: in some cases even on ARM we better use 1) shape because if "index" is invariant and "arrRef" is not
|
|
// we at least will be able to hoist/CSE "index + elemOffset" in some cases.
|
|
// See https://github.com/dotnet/runtime/pull/61293#issuecomment-964146497
|
|
|
|
// Don't use 2) for structs to reduce number of size regressions
|
|
if (varTypeIsStruct(elemTyp))
|
|
{
|
|
groupArrayRefWithElemOffset = false;
|
|
}
|
|
#endif
|
|
|
|
// First element's offset
|
|
GenTree* elemOffset = gtNewIconNode(elemOffs, TYP_I_IMPL);
|
|
if (groupArrayRefWithElemOffset)
|
|
{
|
|
GenTree* basePlusOffset = gtNewOperNode(GT_ADD, TYP_BYREF, arrRef, elemOffset);
|
|
addr = gtNewOperNode(GT_ADD, TYP_BYREF, basePlusOffset, addr);
|
|
}
|
|
else
|
|
{
|
|
addr = gtNewOperNode(GT_ADD, TYP_I_IMPL, addr, elemOffset);
|
|
addr = gtNewOperNode(GT_ADD, TYP_BYREF, arrRef, addr);
|
|
}
|
|
|
|
// TODO-Throughput: bash the INDEX_ADDR to ARR_ADDR here instead of creating a new node.
|
|
addr = new (this, GT_ARR_ADDR) GenTreeArrAddr(addr, elemTyp, elemStructType, elemOffs);
|
|
|
|
if (indexAddr->IsNotNull())
|
|
{
|
|
addr->gtFlags |= GTF_ARR_ADDR_NONNULL;
|
|
}
|
|
|
|
GenTree* tree = addr;
|
|
|
|
// Prepend the bounds check and the store trees that were created (if any).
|
|
if (boundsCheck != nullptr)
|
|
{
|
|
// This is changing a value dependency (INDEX_ADDR node) into a flow
|
|
// dependency, so make sure this dependency remains visible. Also, the
|
|
// JIT is not allowed to create arbitrary byrefs, so we must make sure
|
|
// the address is not reordered with the bounds check.
|
|
boundsCheck->SetHasOrderingSideEffect();
|
|
addr->SetHasOrderingSideEffect();
|
|
|
|
tree = gtNewOperNode(GT_COMMA, tree->TypeGet(), boundsCheck, tree);
|
|
fgAddCodeRef(compCurBB, boundsCheck->gtThrowKind);
|
|
}
|
|
|
|
if (indexDefn != nullptr)
|
|
{
|
|
tree = gtNewOperNode(GT_COMMA, tree->TypeGet(), indexDefn, tree);
|
|
}
|
|
|
|
if (arrRefDefn != nullptr)
|
|
{
|
|
tree = gtNewOperNode(GT_COMMA, tree->TypeGet(), arrRefDefn, tree);
|
|
}
|
|
|
|
JITDUMP("fgMorphIndexAddr (before remorph):\n")
|
|
DISPTREE(tree)
|
|
|
|
tree = fgMorphTree(tree);
|
|
DBEXEC(tree == indexAddr, tree->gtDebugFlags &= ~GTF_DEBUG_NODE_MORPHED);
|
|
|
|
JITDUMP("fgMorphIndexAddr (after remorph):\n")
|
|
DISPTREE(tree)
|
|
|
|
return tree;
|
|
}
|
|
|
|
//------------------------------------------------------------------------
|
|
// fgMorphLeafLocal: Fully morph a leaf local node.
|
|
//
|
|
// Arguments:
|
|
// lclNode - The node to morph
|
|
//
|
|
// Return Value:
|
|
// The fully morphed tree.
|
|
//
|
|
GenTree* Compiler::fgMorphLeafLocal(GenTreeLclVarCommon* lclNode)
|
|
{
|
|
assert(lclNode->OperIs(GT_LCL_VAR, GT_LCL_FLD, GT_LCL_ADDR));
|
|
|
|
GenTree* expandedTree = fgMorphExpandLocal(lclNode);
|
|
if (expandedTree != nullptr)
|
|
{
|
|
expandedTree = fgMorphTree(expandedTree);
|
|
DBEXEC(expandedTree == lclNode, expandedTree->gtDebugFlags &= ~GTF_DEBUG_NODE_MORPHED);
|
|
return expandedTree;
|
|
}
|
|
|
|
if (lclNode->OperIs(GT_LCL_ADDR))
|
|
{
|
|
// No further morphing necessary.
|
|
return lclNode;
|
|
}
|
|
|
|
LclVarDsc* varDsc = lvaGetDesc(lclNode);
|
|
// For last-use copy omission candidates we will address expose them when
|
|
// we get to the call that passes their address, but they are not actually
|
|
// address exposed in the full sense, so we allow standard assertion prop
|
|
// on them until that point. However, we must still mark them with
|
|
// GTF_GLOB_REF to avoid illegal reordering with the call passing their
|
|
// address.
|
|
if (varDsc->IsAddressExposed()
|
|
#if FEATURE_IMPLICIT_BYREFS
|
|
|| varDsc->lvIsLastUseCopyOmissionCandidate
|
|
#endif
|
|
)
|
|
{
|
|
lclNode->gtFlags |= GTF_GLOB_REF;
|
|
}
|
|
|
|
// Small-typed arguments and aliased locals are normalized on load. Other small-typed locals are
|
|
// normalized on store. If this is one of the former, insert a narrowing cast on the load.
|
|
// ie. Convert: var-short --> cast-short(var-int)
|
|
//
|
|
if (fgGlobalMorph && lclNode->OperIs(GT_LCL_VAR) && varDsc->lvNormalizeOnLoad() &&
|
|
/* TODO-ASG: delete this zero-diff quirk */ lclNode->CanCSE())
|
|
{
|
|
var_types lclVarType = varDsc->TypeGet();
|
|
|
|
// Assertion prop can tell us to omit adding a cast here. This is useful when the local is a small-typed
|
|
// parameter that is passed in a register: in that case, the ABI specifies that the upper bits might be
|
|
// invalid, but the assertion guarantees us that we have normalized when we wrote it.
|
|
if (optLocalAssertionProp &&
|
|
optAssertionIsSubrange(lclNode, IntegralRange::ForType(lclVarType), apLocal) != NO_ASSERTION_INDEX)
|
|
{
|
|
// The previous assertion can guarantee us that if this node gets
|
|
// assigned a register, it will be normalized already. It is still
|
|
// possible that this node ends up being in memory, in which case
|
|
// normalization will still be needed, so we better have the right
|
|
// type.
|
|
assert(lclNode->TypeGet() == varDsc->TypeGet());
|
|
return lclNode;
|
|
}
|
|
|
|
lclNode->gtType = TYP_INT;
|
|
fgMorphTreeDone(lclNode);
|
|
GenTree* cast = gtNewCastNode(TYP_INT, lclNode, false, lclVarType);
|
|
fgMorphTreeDone(cast);
|
|
|
|
return cast;
|
|
}
|
|
|
|
return lclNode;
|
|
}
|
|
|
|
#ifdef TARGET_X86
|
|
//------------------------------------------------------------------------
|
|
// fgMorphExpandStackArgForVarArgs: Expand a stack arg node for varargs.
|
|
//
|
|
// Expands the node to use the varargs cookie as the base address, indirecting
|
|
// off of it if necessary, similar to how implicit by-ref parameters are morphed
|
|
// on non-x86 targets.
|
|
//
|
|
// Arguments:
|
|
// lclNode - The local node to (possibly) morph
|
|
//
|
|
// Return Value:
|
|
// The new tree for "lclNode", in which case the caller is expected to morph
|
|
// it further, otherwise "nullptr".
|
|
//
|
|
GenTree* Compiler::fgMorphExpandStackArgForVarArgs(GenTreeLclVarCommon* lclNode)
|
|
{
|
|
if (!lvaIsArgAccessedViaVarArgsCookie(lclNode->GetLclNum()))
|
|
{
|
|
return nullptr;
|
|
}
|
|
|
|
LclVarDsc* varDsc = lvaGetDesc(lclNode);
|
|
const ABIPassingInformation& abiInfo = lvaGetParameterABIInfo(lclNode->GetLclNum());
|
|
assert(abiInfo.HasExactlyOneStackSegment());
|
|
|
|
GenTree* argsBaseAddr = gtNewLclvNode(lvaVarargsBaseOfStkArgs, TYP_I_IMPL);
|
|
ssize_t offset = (ssize_t)abiInfo.Segment(0).GetStackOffset() - lclNode->GetLclOffs();
|
|
assert(abiInfo.Segment(0).GetStackOffset() ==
|
|
(varDsc->GetStackOffset() - codeGen->intRegState.rsCalleeRegArgCount * REGSIZE_BYTES));
|
|
GenTree* offsetNode = gtNewIconNode(offset, TYP_I_IMPL);
|
|
GenTree* argAddr = gtNewOperNode(GT_SUB, TYP_I_IMPL, argsBaseAddr, offsetNode);
|
|
|
|
GenTree* argNode;
|
|
if (lclNode->OperIsLocalStore())
|
|
{
|
|
GenTree* value = lclNode->Data();
|
|
argNode = lclNode->TypeIs(TYP_STRUCT) ? gtNewStoreBlkNode(lclNode->GetLayout(this), argAddr, value)
|
|
: gtNewStoreIndNode(lclNode->TypeGet(), argAddr, value)->AsIndir();
|
|
}
|
|
else if (lclNode->OperIsLocalRead())
|
|
{
|
|
argNode = lclNode->TypeIs(TYP_STRUCT) ? gtNewBlkIndir(lclNode->GetLayout(this), argAddr)
|
|
: gtNewIndir(lclNode->TypeGet(), argAddr);
|
|
}
|
|
else
|
|
{
|
|
argNode = argAddr;
|
|
}
|
|
|
|
return argNode;
|
|
}
|
|
#endif
|
|
|
|
//------------------------------------------------------------------------
|
|
// fgMorphExpandImplicitByRefArg: Morph an implicit by-ref parameter.
|
|
//
|
|
// Arguments:
|
|
// lclNode - The local node to morph
|
|
//
|
|
// Return Value:
|
|
// The expanded tree for "lclNode", which the caller is expected to
|
|
// morph further.
|
|
//
|
|
GenTree* Compiler::fgMorphExpandImplicitByRefArg(GenTreeLclVarCommon* lclNode)
|
|
{
|
|
unsigned lclNum = lclNode->GetLclNum();
|
|
LclVarDsc* varDsc = lvaGetDesc(lclNum);
|
|
unsigned fieldOffset = 0;
|
|
unsigned newLclNum = BAD_VAR_NUM;
|
|
bool isStillLastUse = false;
|
|
|
|
assert(lvaIsImplicitByRefLocal(lclNum) ||
|
|
(varDsc->lvIsStructField && lvaIsImplicitByRefLocal(varDsc->lvParentLcl)));
|
|
|
|
if (lvaIsImplicitByRefLocal(lclNum))
|
|
{
|
|
// The SIMD transformation to coalesce contiguous references to SIMD vector fields will re-invoke
|
|
// the traversal to mark address-taken locals. So, we may encounter a tree that has already been
|
|
// transformed to TYP_BYREF. If we do, leave it as-is.
|
|
if (lclNode->OperIs(GT_LCL_VAR) && lclNode->TypeIs(TYP_BYREF))
|
|
{
|
|
return nullptr;
|
|
}
|
|
|
|
if (varDsc->lvPromoted)
|
|
{
|
|
// fgRetypeImplicitByRefArgs created a new promoted struct local to represent this arg.
|
|
// Rewrite the node to refer to it.
|
|
assert(varDsc->lvFieldLclStart != 0);
|
|
|
|
lclNode->SetLclNum(varDsc->lvFieldLclStart);
|
|
return lclNode;
|
|
}
|
|
|
|
newLclNum = lclNum;
|
|
|
|
// As a special case, for implicit byref args where we undid promotion we
|
|
// can still know whether the use of the implicit byref local is a last
|
|
// use, and whether we can omit a copy when passed as an argument (the
|
|
// common reason why promotion is undone).
|
|
//
|
|
// We skip this propagation for the fields of the promoted local. Those are
|
|
// going to be transformed into accesses off of the parent and we cannot
|
|
// know here if this is going to be the last use of the parent local (this
|
|
// would require tracking a full life set on the side, which we do not do
|
|
// in morph).
|
|
//
|
|
if (!varDsc->lvPromoted)
|
|
{
|
|
if (varDsc->lvFieldLclStart != 0)
|
|
{
|
|
// Reference to whole implicit byref parameter that was promoted
|
|
// but isn't anymore. Check if all fields are dying.
|
|
GenTreeFlags allFieldsDying = lvaGetDesc(varDsc->lvFieldLclStart)->AllFieldDeathFlags();
|
|
isStillLastUse = (lclNode->gtFlags & allFieldsDying) == allFieldsDying;
|
|
}
|
|
else
|
|
{
|
|
// Was never promoted, treated as single value.
|
|
isStillLastUse = (lclNode->gtFlags & GTF_VAR_DEATH) != 0;
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// This was a field reference to an implicit-by-reference struct parameter that was dependently promoted.
|
|
newLclNum = varDsc->lvParentLcl;
|
|
fieldOffset = varDsc->lvFldOffset;
|
|
}
|
|
|
|
// Add a level of indirection to this node. The "base" will be a local node referring to "newLclNum".
|
|
// We will also add an offset, and, if the original "lclNode" represents a location, a dereference.
|
|
GenTree* data = lclNode->OperIsLocalStore() ? lclNode->Data() : nullptr;
|
|
bool isLoad = lclNode->OperIsLocalRead();
|
|
unsigned offset = lclNode->GetLclOffs() + fieldOffset;
|
|
var_types argNodeType = lclNode->TypeGet();
|
|
ClassLayout* argNodeLayout = (argNodeType == TYP_STRUCT) ? lclNode->GetLayout(this) : nullptr;
|
|
|
|
JITDUMP("\nRewriting an implicit by-ref parameter reference:\n");
|
|
DISPTREE(lclNode);
|
|
|
|
lclNode->ChangeType(TYP_BYREF);
|
|
lclNode->ChangeOper(GT_LCL_VAR);
|
|
lclNode->SetLclNum(newLclNum);
|
|
lclNode->SetAllEffectsFlags(GTF_EMPTY); // Implicit by-ref parameters cannot be address-exposed.
|
|
|
|
if (isStillLastUse)
|
|
{
|
|
lclNode->gtFlags |= GTF_VAR_DEATH;
|
|
}
|
|
|
|
GenTree* addrNode = lclNode;
|
|
if (offset != 0)
|
|
{
|
|
addrNode = gtNewOperNode(GT_ADD, TYP_BYREF, addrNode, gtNewIconNode(offset, TYP_I_IMPL));
|
|
}
|
|
|
|
// Note: currently, we have to conservatively treat all indirections off of implicit byrefs
|
|
// as global. This is because we lose the information on whether the original local's address
|
|
// was exposed when we retype it in "fgRetypeImplicitByRefArgs".
|
|
//
|
|
GenTree* newArgNode;
|
|
if (data != nullptr)
|
|
{
|
|
newArgNode = (argNodeType == TYP_STRUCT) ? gtNewStoreBlkNode(argNodeLayout, addrNode, data)
|
|
: gtNewStoreIndNode(argNodeType, addrNode, data)->AsIndir();
|
|
}
|
|
else if (isLoad)
|
|
{
|
|
newArgNode =
|
|
(argNodeType == TYP_STRUCT) ? gtNewBlkIndir(argNodeLayout, addrNode) : gtNewIndir(argNodeType, addrNode);
|
|
}
|
|
else
|
|
{
|
|
newArgNode = addrNode;
|
|
}
|
|
|
|
JITDUMP("Transformed into:\n");
|
|
DISPTREE(newArgNode);
|
|
JITDUMP("\n");
|
|
|
|
return newArgNode;
|
|
}
|
|
|
|
GenTree* Compiler::fgMorphExpandLocal(GenTreeLclVarCommon* lclNode)
|
|
{
|
|
GenTree* expandedTree = nullptr;
|
|
#ifdef TARGET_X86
|
|
expandedTree = fgMorphExpandStackArgForVarArgs(lclNode);
|
|
#else
|
|
#if FEATURE_IMPLICIT_BYREFS
|
|
if (fgGlobalMorph)
|
|
{
|
|
LclVarDsc* dsc = lvaGetDesc(lclNode);
|
|
if (dsc->lvIsImplicitByRef || (dsc->lvIsStructField && lvaIsImplicitByRefLocal(dsc->lvParentLcl)))
|
|
{
|
|
expandedTree = fgMorphExpandImplicitByRefArg(lclNode);
|
|
}
|
|
}
|
|
#endif
|
|
#endif
|
|
|
|
if (expandedTree != nullptr)
|
|
{
|
|
return expandedTree;
|
|
}
|
|
|
|
// Small-typed arguments and aliased locals are normalized on load. Other small-typed
|
|
// locals are normalized on store. If it is the latter case, insert the cast on source.
|
|
if (fgGlobalMorph && lclNode->OperIs(GT_STORE_LCL_VAR) && genActualTypeIsInt(lclNode))
|
|
{
|
|
LclVarDsc* varDsc = lvaGetDesc(lclNode);
|
|
|
|
if (varDsc->lvNormalizeOnStore())
|
|
{
|
|
GenTree* value = lclNode->Data();
|
|
noway_assert(genActualTypeIsInt(value));
|
|
|
|
lclNode->gtType = TYP_INT;
|
|
|
|
if (fgCastNeeded(value, varDsc->TypeGet()))
|
|
{
|
|
lclNode->Data() = gtNewCastNode(TYP_INT, value, false, varDsc->TypeGet());
|
|
return lclNode;
|
|
}
|
|
}
|
|
}
|
|
|
|
return nullptr;
|
|
}
|
|
|
|
//------------------------------------------------------------------------
|
|
// fgGetFieldMorphingTemp: Get a local to use for field morphing.
|
|
//
|
|
// We will reuse locals created when morphing field addresses, as well as
|
|
// fields with large offsets.
|
|
//
|
|
// Arguments:
|
|
// fieldNode - The field node
|
|
//
|
|
// Return Value:
|
|
// The local number.
|
|
//
|
|
unsigned Compiler::fgGetFieldMorphingTemp(GenTreeFieldAddr* fieldNode)
|
|
{
|
|
assert(fieldNode->IsInstance());
|
|
|
|
unsigned lclNum = BAD_VAR_NUM;
|
|
|
|
if (fieldNode->IsOffsetKnown() && (fieldNode->gtFldOffset == 0))
|
|
{
|
|
// Quirk: always use a fresh temp for zero-offset fields. This is
|
|
// because temp reuse can create IR where some uses will be in
|
|
// positions we do not support (i. e. [use...store...user]).
|
|
lclNum = lvaGrabTemp(true DEBUGARG("Zero offset field obj"));
|
|
}
|
|
else
|
|
{
|
|
var_types type = genActualType(fieldNode->GetFldObj());
|
|
lclNum = fgBigOffsetMorphingTemps[type];
|
|
|
|
if (lclNum == BAD_VAR_NUM)
|
|
{
|
|
// We haven't created a temp for this kind of type. Create one now.
|
|
lclNum = lvaGrabTemp(false DEBUGARG("Field obj"));
|
|
fgBigOffsetMorphingTemps[type] = lclNum;
|
|
}
|
|
else
|
|
{
|
|
// We better get the right type.
|
|
noway_assert(lvaTable[lclNum].TypeGet() == type);
|
|
}
|
|
}
|
|
|
|
assert(lclNum != BAD_VAR_NUM);
|
|
return lclNum;
|
|
}
|
|
|
|
//------------------------------------------------------------------------
|
|
// fgMorphFieldAddr: Fully morph a FIELD_ADDR tree.
|
|
//
|
|
// Expands the field node into explicit additions.
|
|
//
|
|
// Arguments:
|
|
// tree - The FIELD_ADDR tree
|
|
// mac - The morphing context, used to elide adding null checks
|
|
//
|
|
// Return Value:
|
|
// The fully morphed "tree".
|
|
//
|
|
GenTree* Compiler::fgMorphFieldAddr(GenTree* tree, MorphAddrContext* mac)
|
|
{
|
|
assert(tree->OperIs(GT_FIELD_ADDR));
|
|
|
|
GenTreeFieldAddr* fieldNode = tree->AsFieldAddr();
|
|
GenTree* objRef = fieldNode->GetFldObj();
|
|
bool isAddr = ((tree->gtFlags & GTF_FLD_DEREFERENCED) == 0);
|
|
|
|
if (fieldNode->IsInstance())
|
|
{
|
|
tree = fgMorphExpandInstanceField(tree, mac);
|
|
}
|
|
else if (fieldNode->IsTlsStatic())
|
|
{
|
|
tree = fgMorphExpandTlsFieldAddr(tree);
|
|
}
|
|
else
|
|
{
|
|
assert(!"Normal statics are expected to be handled in the importer");
|
|
}
|
|
|
|
// Pass down the current mac; if non null we are computing an address
|
|
GenTree* result;
|
|
if (tree->OperIsSimple())
|
|
{
|
|
result = fgMorphSmpOp(tree, mac);
|
|
DBEXEC(result != fieldNode, result->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED);
|
|
|
|
// Quirk: preserve previous behavior with this NO_CSE.
|
|
if (isAddr && result->OperIs(GT_COMMA))
|
|
{
|
|
result->SetDoNotCSE();
|
|
}
|
|
}
|
|
else
|
|
{
|
|
result = fgMorphTree(tree, mac);
|
|
DBEXEC(result == fieldNode, result->gtDebugFlags &= ~GTF_DEBUG_NODE_MORPHED);
|
|
}
|
|
|
|
JITDUMP("\nFinal value of Compiler::fgMorphFieldAddr after morphing:\n");
|
|
DISPTREE(result);
|
|
|
|
return result;
|
|
}
|
|
|
|
//------------------------------------------------------------------------
|
|
// fgMorphExpandInstanceField: Expand an instance field address.
|
|
//
|
|
// Expands the field node into explicit additions and nullchecks.
|
|
//
|
|
// Arguments:
|
|
// tree - The FIELD_ADDR tree
|
|
// mac - The morphing context, used to elide adding null checks
|
|
//
|
|
// Return Value:
|
|
// The expanded "tree" of an arbitrary shape.
|
|
//
|
|
GenTree* Compiler::fgMorphExpandInstanceField(GenTree* tree, MorphAddrContext* mac)
|
|
{
|
|
assert(tree->OperIs(GT_FIELD_ADDR) && tree->AsFieldAddr()->IsInstance());
|
|
|
|
GenTree* objRef = tree->AsFieldAddr()->GetFldObj();
|
|
CORINFO_FIELD_HANDLE fieldHandle = tree->AsFieldAddr()->gtFldHnd;
|
|
unsigned fieldOffset = tree->AsFieldAddr()->gtFldOffset;
|
|
|
|
noway_assert(varTypeIsI(genActualType(objRef)));
|
|
|
|
/* Now we have a tree like this:
|
|
|
|
+--------------------+
|
|
| GT_FIELD_ADDR | tree
|
|
+----------+---------+
|
|
|
|
|
+--------------+-------------+
|
|
| tree->GetFldObj() |
|
|
+--------------+-------------+
|
|
|
|
We want to make it like this (when fldOffset is <= MAX_UNCHECKED_OFFSET_FOR_NULL_OBJECT):
|
|
|
|
+---------+----------+
|
|
| GT_ADD | addr
|
|
+---------+----------+
|
|
|
|
|
/ \
|
|
/ \
|
|
/ \
|
|
+-------------------+ +----------------------+
|
|
| objRef | | fldOffset |
|
|
| | | (when fldOffset !=0) |
|
|
+-------------------+ +----------------------+
|
|
|
|
|
|
or this (when fldOffset is > MAX_UNCHECKED_OFFSET_FOR_NULL_OBJECT):
|
|
|
|
|
|
+--------------------+
|
|
| GT_IND/GT_BLK | tree (for FIELD)
|
|
+----------+---------+
|
|
|
|
|
+----------+---------+
|
|
| GT_COMMA | comma2
|
|
+----------+---------+
|
|
|
|
|
/ \
|
|
/ \
|
|
/ \
|
|
/ \
|
|
+---------+----------+ +---------+----------+
|
|
comma | GT_COMMA | | "+" (i.e. GT_ADD) | addr
|
|
+---------+----------+ +---------+----------+
|
|
| |
|
|
/ \ / \
|
|
/ \ / \
|
|
/ \ / \
|
|
+------------+-----------| +-----+-----+ +---------+ +-----------+
|
|
| STORE_LCL_VAR tmpLcl | ind | GT_IND | | tmpLcl | | fldOffset |
|
|
+------------+-----------| +-----+-----+ +---------+ +-----------+
|
|
| |
|
|
| |
|
|
| |
|
|
| |
|
|
+-----------+ +-----------+
|
|
| objRef | | tmpLcl |
|
|
+-----------+ +-----------+
|
|
|
|
*/
|
|
|
|
var_types objRefType = objRef->TypeGet();
|
|
GenTree* addr = nullptr;
|
|
GenTree* comma = nullptr;
|
|
bool addExplicitNullCheck = false;
|
|
|
|
if (fgAddrCouldBeNull(objRef))
|
|
{
|
|
// A non-null context here implies our [+ some offset] parent is an indirection, one that
|
|
// will implicitly null-check the produced address.
|
|
addExplicitNullCheck = (mac == nullptr) || fgIsBigOffset(mac->m_totalOffset + fieldOffset);
|
|
|
|
// The transformation here turns a value dependency (FIELD_ADDR being a
|
|
// known non-null operand) into a control-flow dependency (introducing
|
|
// explicit COMMA(NULLCHECK, ...)). This effectively "disconnects" the
|
|
// null check from the parent of the FIELD_ADDR node. For the cases
|
|
// where we made use of non-nullness we need to make the dependency
|
|
// explicit now.
|
|
if (addExplicitNullCheck)
|
|
{
|
|
if (mac != nullptr)
|
|
{
|
|
mac->m_user->SetHasOrderingSideEffect();
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// We can elide the null check only by letting it happen as part of
|
|
// the consuming indirection, so it is no longer non-faulting.
|
|
mac->m_user->gtFlags &= ~GTF_IND_NONFAULTING;
|
|
}
|
|
}
|
|
|
|
if (addExplicitNullCheck)
|
|
{
|
|
JITDUMP("Before explicit null check morphing:\n");
|
|
DISPTREE(tree);
|
|
|
|
// Create the "comma" subtree.
|
|
GenTree* store = nullptr;
|
|
unsigned lclNum;
|
|
|
|
if (!objRef->OperIs(GT_LCL_VAR) || lvaIsLocalImplicitlyAccessedByRef(objRef->AsLclVar()->GetLclNum()))
|
|
{
|
|
lclNum = fgGetFieldMorphingTemp(tree->AsFieldAddr());
|
|
store = gtNewTempStore(lclNum, objRef);
|
|
}
|
|
else
|
|
{
|
|
lclNum = objRef->AsLclVarCommon()->GetLclNum();
|
|
}
|
|
|
|
GenTree* lclVar = gtNewLclvNode(lclNum, objRefType);
|
|
GenTree* nullchk = gtNewNullCheck(lclVar, compCurBB);
|
|
|
|
nullchk->SetHasOrderingSideEffect();
|
|
|
|
if (store != nullptr)
|
|
{
|
|
// Create the "comma" node.
|
|
comma = gtNewOperNode(GT_COMMA, TYP_VOID, store, nullchk);
|
|
}
|
|
else
|
|
{
|
|
comma = nullchk;
|
|
}
|
|
|
|
addr = gtNewLclvNode(lclNum, objRefType); // Use "tmpLcl" to create "addr" node.
|
|
}
|
|
else
|
|
{
|
|
addr = objRef;
|
|
}
|
|
|
|
#ifdef FEATURE_READYTORUN
|
|
if (tree->AsFieldAddr()->gtFieldLookup.addr != nullptr)
|
|
{
|
|
GenTree* offsetNode = nullptr;
|
|
if (tree->AsFieldAddr()->gtFieldLookup.accessType == IAT_PVALUE)
|
|
{
|
|
offsetNode = gtNewIndOfIconHandleNode(TYP_I_IMPL, (size_t)tree->AsFieldAddr()->gtFieldLookup.addr,
|
|
GTF_ICON_CONST_PTR, true);
|
|
#ifdef DEBUG
|
|
offsetNode->gtGetOp1()->AsIntCon()->gtTargetHandle = (size_t)fieldHandle;
|
|
#endif
|
|
}
|
|
else
|
|
{
|
|
noway_assert(!"unexpected accessType for R2R field access");
|
|
}
|
|
|
|
addr = gtNewOperNode(GT_ADD, (objRefType == TYP_I_IMPL) ? TYP_I_IMPL : TYP_BYREF, addr, offsetNode);
|
|
|
|
// We cannot form and GC report an invalid byref, so this must preserve
|
|
// its ordering with the null check.
|
|
if (addExplicitNullCheck && addr->TypeIs(TYP_BYREF))
|
|
{
|
|
addr->SetHasOrderingSideEffect();
|
|
}
|
|
}
|
|
#endif
|
|
|
|
// We only need to attach the field offset information for class fields.
|
|
FieldSeq* fieldSeq = nullptr;
|
|
if ((objRefType == TYP_REF) && !tree->AsFieldAddr()->gtFldMayOverlap)
|
|
{
|
|
fieldSeq = GetFieldSeqStore()->Create(fieldHandle, fieldOffset, FieldSeq::FieldKind::Instance);
|
|
}
|
|
|
|
// Add the member offset to the object's address.
|
|
if (fieldOffset != 0)
|
|
{
|
|
addr = gtNewOperNode(GT_ADD, (objRefType == TYP_I_IMPL) ? TYP_I_IMPL : TYP_BYREF, addr,
|
|
gtNewIconNode(fieldOffset, fieldSeq));
|
|
|
|
// We cannot form and GC report an invalid byref, so this must preserve
|
|
// its ordering with the null check.
|
|
if (addExplicitNullCheck && addr->TypeIs(TYP_BYREF))
|
|
{
|
|
addr->SetHasOrderingSideEffect();
|
|
}
|
|
|
|
if (addr->gtGetOp1()->OperIsConst() && addr->gtGetOp2()->OperIsConst())
|
|
{
|
|
// Fold it if we have const-handle + const-offset
|
|
addr = gtFoldExprConst(addr);
|
|
}
|
|
}
|
|
|
|
if (addExplicitNullCheck)
|
|
{
|
|
// Create the "comma2" tree.
|
|
addr = gtNewOperNode(GT_COMMA, addr->TypeGet(), comma, addr);
|
|
|
|
JITDUMP("After adding explicit null check:\n");
|
|
DISPTREE(addr);
|
|
}
|
|
|
|
return addr;
|
|
}
|
|
|
|
//------------------------------------------------------------------------
|
|
// fgMorphExpandTlsFieldAddr: Expand a TLS field address.
|
|
//
|
|
// Expands ".tls"-style statics, produced by the C++/CLI compiler for
|
|
// "__declspec(thread)" variables. An overview of the underlying native
|
|
// mechanism can be found here: http://www.nynaeve.net/?p=180.
|
|
//
|
|
// Arguments:
|
|
// tree - The GT_FIELD_ADDR tree
|
|
//
|
|
// Return Value:
|
|
// The expanded tree - a GT_ADD.
|
|
//
|
|
GenTree* Compiler::fgMorphExpandTlsFieldAddr(GenTree* tree)
|
|
{
|
|
assert(tree->OperIs(GT_FIELD_ADDR) && tree->AsFieldAddr()->IsTlsStatic());
|
|
|
|
CORINFO_FIELD_HANDLE fieldHandle = tree->AsFieldAddr()->gtFldHnd;
|
|
int fieldOffset = tree->AsFieldAddr()->gtFldOffset;
|
|
|
|
// Thread Local Storage static field reference
|
|
//
|
|
// Field ref is a TLS 'Thread-Local-Storage' reference
|
|
//
|
|
// Build this tree: ADD(I_IMPL) #
|
|
// / \.
|
|
// / CNS(fldOffset)
|
|
// /
|
|
// /
|
|
// /
|
|
// IND(I_IMPL) == [Base of this DLL's TLS]
|
|
// |
|
|
// ADD(I_IMPL)
|
|
// / \.
|
|
// / CNS(IdValue*4) or MUL
|
|
// / / \.
|
|
// IND(I_IMPL) / CNS(4)
|
|
// | /
|
|
// CNS(TLS_HDL,0x2C) IND
|
|
// |
|
|
// CNS(pIdAddr)
|
|
//
|
|
// # Denotes the original node
|
|
//
|
|
void** pIdAddr = nullptr;
|
|
unsigned IdValue = info.compCompHnd->getFieldThreadLocalStoreID(fieldHandle, (void**)&pIdAddr);
|
|
|
|
//
|
|
// If we can we access the TLS DLL index ID value directly
|
|
// then pIdAddr will be NULL and
|
|
// IdValue will be the actual TLS DLL index ID
|
|
//
|
|
GenTree* dllRef = nullptr;
|
|
if (pIdAddr == nullptr)
|
|
{
|
|
if (IdValue != 0)
|
|
{
|
|
dllRef = gtNewIconNode(IdValue * 4, TYP_I_IMPL);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
dllRef = gtNewIndOfIconHandleNode(TYP_I_IMPL, (size_t)pIdAddr, GTF_ICON_CONST_PTR, true);
|
|
|
|
// Next we multiply by 4
|
|
dllRef = gtNewOperNode(GT_MUL, TYP_I_IMPL, dllRef, gtNewIconNode(4, TYP_I_IMPL));
|
|
}
|
|
|
|
#define WIN32_TLS_SLOTS (0x2C) // Offset from fs:[0] where the pointer to the slots resides
|
|
|
|
// Mark this ICON as a TLS_HDL, codegen will use FS:[cns]
|
|
GenTree* tlsRef = gtNewIconHandleNode(WIN32_TLS_SLOTS, GTF_ICON_TLS_HDL);
|
|
|
|
tlsRef = gtNewIndir(TYP_I_IMPL, tlsRef, GTF_IND_NONFAULTING | GTF_IND_INVARIANT);
|
|
|
|
if (dllRef != nullptr)
|
|
{
|
|
// Add the dllRef.
|
|
tlsRef = gtNewOperNode(GT_ADD, TYP_I_IMPL, tlsRef, dllRef);
|
|
}
|
|
|
|
// indirect to have tlsRef point at the base of the DLLs Thread Local Storage.
|
|
tlsRef = gtNewIndir(TYP_I_IMPL, tlsRef);
|
|
|
|
// Add the TLS static field offset to the address.
|
|
assert(!tree->AsFieldAddr()->gtFldMayOverlap);
|
|
FieldSeq* fieldSeq = GetFieldSeqStore()->Create(fieldHandle, fieldOffset, FieldSeq::FieldKind::SimpleStatic);
|
|
GenTree* offsetNode = gtNewIconNode(fieldOffset, fieldSeq);
|
|
|
|
tree->ChangeOper(GT_ADD);
|
|
tree->AsOp()->gtOp1 = tlsRef;
|
|
tree->AsOp()->gtOp2 = offsetNode;
|
|
|
|
return tree;
|
|
}
|
|
|
|
//------------------------------------------------------------------------
|
|
// fgCanFastTailCall: Check to see if this tail call can be optimized as epilog+jmp.
|
|
//
|
|
// Arguments:
|
|
// callee - The callee to check
|
|
// failReason - If this method returns false, the reason why. Can be nullptr.
|
|
//
|
|
// Return Value:
|
|
// Returns true or false based on whether the callee can be fastTailCalled
|
|
//
|
|
// Notes:
|
|
// This function is target specific and each target will make the fastTailCall
|
|
// decision differently. See the notes below.
|
|
//
|
|
// This function calls AddFinalArgsAndDetermineABIInfo to initialize the ABI
|
|
// info, which is used to analyze the argument. This function can alter the
|
|
// call arguments by adding argument IR nodes for non-standard arguments.
|
|
//
|
|
// Windows Amd64:
|
|
// A fast tail call can be made whenever the number of callee arguments
|
|
// is less than or equal to the number of caller arguments, or we have four
|
|
// or fewer callee arguments. This is because, on Windows AMD64, each
|
|
// argument uses exactly one register or one 8-byte stack slot. Thus, we only
|
|
// need to count arguments, and not be concerned with the size of each
|
|
// incoming or outgoing argument.
|
|
//
|
|
// Can fast tail call examples (amd64 Windows):
|
|
//
|
|
// -- Callee will have all register arguments --
|
|
// caller(int, int, int, int)
|
|
// callee(int, int, float, int)
|
|
//
|
|
// -- Callee requires stack space that is equal or less than the caller --
|
|
// caller(struct, struct, struct, struct, struct, struct)
|
|
// callee(int, int, int, int, int, int)
|
|
//
|
|
// -- Callee requires stack space that is less than the caller --
|
|
// caller(struct, double, struct, float, struct, struct)
|
|
// callee(int, int, int, int, int)
|
|
//
|
|
// -- Callee will have all register arguments --
|
|
// caller(int)
|
|
// callee(int, int, int, int)
|
|
//
|
|
// Cannot fast tail call examples (amd64 Windows):
|
|
//
|
|
// -- Callee requires stack space that is larger than the caller --
|
|
// caller(struct, double, struct, float, struct, struct)
|
|
// callee(int, int, int, int, int, double, double, double)
|
|
//
|
|
// -- Callee has a byref struct argument --
|
|
// caller(int, int, int)
|
|
// callee(struct(size 3 bytes))
|
|
//
|
|
// Unix Amd64 && Arm64:
|
|
// A fastTailCall decision can be made whenever the callee's stack space is
|
|
// less than or equal to the caller's stack space. There are many permutations
|
|
// of when the caller and callee have different stack sizes if there are
|
|
// structs being passed to either the caller or callee.
|
|
//
|
|
// Exceptions:
|
|
// If the callee has a 9 to 16 byte struct argument and the callee has
|
|
// stack arguments, the decision will be to not fast tail call. This is
|
|
// because before fgMorphArgs is done, the struct is unknown whether it
|
|
// will be placed on the stack or enregistered. Therefore, the conservative
|
|
// decision of do not fast tail call is taken. This limitations should be
|
|
// removed if/when fgMorphArgs no longer depends on fgCanFastTailCall.
|
|
//
|
|
// Can fast tail call examples (amd64 Unix):
|
|
//
|
|
// -- Callee will have all register arguments --
|
|
// caller(int, int, int, int)
|
|
// callee(int, int, float, int)
|
|
//
|
|
// -- Callee requires stack space that is equal to the caller --
|
|
// caller({ long, long }, { int, int }, { int }, { int }, { int }, { int }) -- 6 int register arguments, 16 byte
|
|
// stack
|
|
// space
|
|
// callee(int, int, int, int, int, int, int, int) -- 6 int register arguments, 16 byte stack space
|
|
//
|
|
// -- Callee requires stack space that is less than the caller --
|
|
// caller({ long, long }, int, { long, long }, int, { long, long }, { long, long }) 6 int register arguments, 32 byte
|
|
// stack
|
|
// space
|
|
// callee(int, int, int, int, int, int, { long, long } ) // 6 int register arguments, 16 byte stack space
|
|
//
|
|
// -- Callee will have all register arguments --
|
|
// caller(int)
|
|
// callee(int, int, int, int)
|
|
//
|
|
// Cannot fast tail call examples (amd64 Unix):
|
|
//
|
|
// -- Callee requires stack space that is larger than the caller --
|
|
// caller(float, float, float, float, float, float, float, float) -- 8 float register arguments
|
|
// callee(int, int, int, int, int, int, int, int) -- 6 int register arguments, 16 byte stack space
|
|
//
|
|
// -- Callee has structs which cannot be enregistered (Implementation Limitation) --
|
|
// caller(float, float, float, float, float, float, float, float, { double, double, double }) -- 8 float register
|
|
// arguments, 24 byte stack space
|
|
// callee({ double, double, double }) -- 24 bytes stack space
|
|
//
|
|
// -- Callee requires stack space and has a struct argument >8 bytes and <16 bytes (Implementation Limitation) --
|
|
// caller(int, int, int, int, int, int, { double, double, double }) -- 6 int register arguments, 24 byte stack space
|
|
// callee(int, int, int, int, int, int, { int, int }) -- 6 int registers, 16 byte stack space
|
|
//
|
|
// -- Caller requires stack space and nCalleeArgs > nCallerArgs (Bug) --
|
|
// caller({ double, double, double, double, double, double }) // 48 byte stack
|
|
// callee(int, int) -- 2 int registers
|
|
//
|
|
bool Compiler::fgCanFastTailCall(GenTreeCall* callee, const char** failReason)
|
|
{
|
|
#if FEATURE_FASTTAILCALL
|
|
|
|
// To reach here means that the return types of the caller and callee are tail call compatible.
|
|
// In the case of structs that can be returned in a register, compRetNativeType is set to the actual return type.
|
|
|
|
#ifdef DEBUG
|
|
if (callee->IsTailPrefixedCall())
|
|
{
|
|
var_types retType = info.compRetType;
|
|
assert(impTailCallRetTypeCompatible(false, retType, info.compMethodInfo->args.retTypeClass, info.compCallConv,
|
|
(var_types)callee->gtReturnType, callee->gtRetClsHnd,
|
|
callee->GetUnmanagedCallConv()));
|
|
}
|
|
#endif
|
|
|
|
assert(!callee->gtArgs.AreArgsComplete());
|
|
|
|
callee->gtArgs.AddFinalArgsAndDetermineABIInfo(this, callee);
|
|
|
|
unsigned calleeArgStackSize = callee->gtArgs.OutgoingArgsStackSize();
|
|
unsigned callerArgStackSize = roundUp(lvaParameterStackSize, TARGET_POINTER_SIZE);
|
|
|
|
auto reportFastTailCallDecision = [&](const char* thisFailReason) {
|
|
if (failReason != nullptr)
|
|
{
|
|
*failReason = thisFailReason;
|
|
}
|
|
|
|
#ifdef DEBUG
|
|
if ((JitConfig.JitReportFastTailCallDecisions()) == 1)
|
|
{
|
|
if (callee->gtCallType != CT_INDIRECT)
|
|
{
|
|
const char* methodName;
|
|
|
|
methodName = eeGetMethodFullName(callee->gtCallMethHnd);
|
|
|
|
printf("[Fast tailcall decision]: Caller: %s\n[Fast tailcall decision]: Callee: %s -- Decision: ",
|
|
info.compFullName, methodName);
|
|
}
|
|
else
|
|
{
|
|
printf("[Fast tailcall decision]: Caller: %s\n[Fast tailcall decision]: Callee: IndirectCall -- "
|
|
"Decision: ",
|
|
info.compFullName);
|
|
}
|
|
|
|
if (thisFailReason == nullptr)
|
|
{
|
|
printf("Will fast tailcall");
|
|
}
|
|
else
|
|
{
|
|
printf("Will not fast tailcall (%s)", thisFailReason);
|
|
}
|
|
|
|
printf(" (CallerArgStackSize: %d, CalleeArgStackSize: %d)\n\n", callerArgStackSize, calleeArgStackSize);
|
|
}
|
|
else
|
|
{
|
|
if (thisFailReason == nullptr)
|
|
{
|
|
JITDUMP("[Fast tailcall decision]: Will fast tailcall\n");
|
|
}
|
|
else
|
|
{
|
|
JITDUMP("[Fast tailcall decision]: Will not fast tailcall (%s)\n", thisFailReason);
|
|
}
|
|
}
|
|
#endif // DEBUG
|
|
};
|
|
|
|
#if defined(TARGET_ARM) || defined(TARGET_RISCV64)
|
|
for (CallArg& arg : callee->gtArgs.Args())
|
|
{
|
|
if (arg.NewAbiInfo.IsSplitAcrossRegistersAndStack())
|
|
{
|
|
reportFastTailCallDecision("Argument splitting in callee is not supported on " TARGET_READABLE_NAME);
|
|
return false;
|
|
}
|
|
}
|
|
#endif // TARGET_ARM || TARGET_RISCV64
|
|
|
|
#if defined(TARGET_ARM) || defined(TARGET_RISCV64)
|
|
if (compHasSplitParam)
|
|
{
|
|
reportFastTailCallDecision("Argument splitting in caller is not supported on " TARGET_READABLE_NAME);
|
|
return false;
|
|
}
|
|
#endif // TARGET_ARM || TARGET_RISCV64
|
|
|
|
#ifdef TARGET_ARM
|
|
if (compIsProfilerHookNeeded())
|
|
{
|
|
reportFastTailCallDecision("Profiler is not supported on ARM32");
|
|
return false;
|
|
}
|
|
|
|
// On ARM32 we have only one non-parameter volatile register and we need it
|
|
// for the GS security cookie check. We could technically still tailcall
|
|
// when the callee does not use all argument registers, but we keep the
|
|
// code simple here.
|
|
if (getNeedsGSSecurityCookie())
|
|
{
|
|
reportFastTailCallDecision("Not enough registers available due to the GS security cookie check");
|
|
return false;
|
|
}
|
|
#endif
|
|
|
|
if (!opts.compFastTailCalls)
|
|
{
|
|
reportFastTailCallDecision("Configuration doesn't allow fast tail calls");
|
|
return false;
|
|
}
|
|
|
|
if (callee->IsStressTailCall())
|
|
{
|
|
reportFastTailCallDecision("Fast tail calls are not performed under tail call stress");
|
|
return false;
|
|
}
|
|
|
|
#ifdef TARGET_ARM
|
|
if (callee->IsR2RRelativeIndir() || callee->HasNonStandardAddedArgs(this))
|
|
{
|
|
reportFastTailCallDecision(
|
|
"Method with non-standard args passed in callee saved register cannot be tail called");
|
|
return false;
|
|
}
|
|
#endif
|
|
|
|
// Note on vararg methods:
|
|
// If the caller is vararg method, we don't know the number of arguments passed by caller's caller.
|
|
// But we can be sure that in-coming arg area of vararg caller would be sufficient to hold its
|
|
// fixed args. Therefore, we can allow a vararg method to fast tail call other methods as long as
|
|
// out-going area required for callee is bounded by caller's fixed argument space.
|
|
//
|
|
// Note that callee being a vararg method is not a problem since we can account the params being passed.
|
|
//
|
|
// We will currently decide to not fast tail call on Windows armarch if the caller or callee is a vararg
|
|
// method. This is due to the ABI differences for native vararg methods for these platforms. There is
|
|
// work required to shuffle arguments to the correct locations.
|
|
|
|
if (TargetOS::IsWindows && TargetArchitecture::IsArmArch && (info.compIsVarArgs || callee->IsVarargs()))
|
|
{
|
|
reportFastTailCallDecision("Fast tail calls with varargs not supported on Windows ARM/ARM64");
|
|
return false;
|
|
}
|
|
|
|
if (compLocallocUsed)
|
|
{
|
|
reportFastTailCallDecision("Localloc used");
|
|
return false;
|
|
}
|
|
|
|
#ifdef TARGET_AMD64
|
|
// Needed for Jit64 compat.
|
|
// In future, enabling fast tail calls from methods that need GS cookie
|
|
// check would require codegen side work to emit GS cookie check before a
|
|
// tail call.
|
|
if (getNeedsGSSecurityCookie())
|
|
{
|
|
reportFastTailCallDecision("GS Security cookie check required");
|
|
return false;
|
|
}
|
|
#endif
|
|
|
|
// If the NextCallReturnAddress intrinsic is used we should do normal calls.
|
|
if (info.compHasNextCallRetAddr)
|
|
{
|
|
reportFastTailCallDecision("Uses NextCallReturnAddress intrinsic");
|
|
return false;
|
|
}
|
|
|
|
if (callee->gtArgs.HasRetBuffer())
|
|
{
|
|
// If callee has RetBuf param, caller too must have it.
|
|
// Otherwise go the slow route.
|
|
if (info.compRetBuffArg == BAD_VAR_NUM)
|
|
{
|
|
reportFastTailCallDecision("Callee has RetBuf but caller does not.");
|
|
return false;
|
|
}
|
|
}
|
|
|
|
// For a fast tail call the caller will use its incoming arg stack space to place
|
|
// arguments, so if the callee requires more arg stack space than is available here
|
|
// the fast tail call cannot be performed. This is common to all platforms.
|
|
// Note that the GC'ness of on stack args need not match since the arg setup area is marked
|
|
// as non-interruptible for fast tail calls.
|
|
if (calleeArgStackSize > callerArgStackSize)
|
|
{
|
|
reportFastTailCallDecision("Not enough incoming arg space");
|
|
return false;
|
|
}
|
|
|
|
// For Windows some struct parameters are copied on the local frame
|
|
// and then passed by reference. We cannot fast tail call in these situation
|
|
// as we need to keep our frame around.
|
|
if (fgCallHasMustCopyByrefParameter(callee))
|
|
{
|
|
reportFastTailCallDecision("Callee has a byref parameter");
|
|
return false;
|
|
}
|
|
|
|
reportFastTailCallDecision(nullptr);
|
|
return true;
|
|
#else // FEATURE_FASTTAILCALL
|
|
if (failReason)
|
|
*failReason = "Fast tailcalls are not supported on this platform";
|
|
return false;
|
|
#endif
|
|
}
|
|
|
|
#if FEATURE_FASTTAILCALL
|
|
//------------------------------------------------------------------------
|
|
// fgCallHasMustCopyByrefParameter: Check to see if this call has a byref parameter that
|
|
// requires a struct copy in the caller.
|
|
//
|
|
// Arguments:
|
|
// call - The call to check
|
|
//
|
|
// Return Value:
|
|
// Returns true or false based on whether this call has a byref parameter that
|
|
// requires a struct copy in the caller.
|
|
//
|
|
bool Compiler::fgCallHasMustCopyByrefParameter(GenTreeCall* call)
|
|
{
|
|
#if FEATURE_IMPLICIT_BYREFS
|
|
for (CallArg& arg : call->gtArgs.Args())
|
|
{
|
|
if (fgCallArgWillPointIntoLocalFrame(call, arg))
|
|
{
|
|
return true;
|
|
}
|
|
}
|
|
#endif
|
|
|
|
return false;
|
|
}
|
|
|
|
//------------------------------------------------------------------------
|
|
// fgCallArgWillPointIntoLocalFrame:
|
|
// Check to see if a call arg will end up pointing into the local frame after morph.
|
|
//
|
|
// Arguments:
|
|
// call - The call to check
|
|
//
|
|
// Return Value:
|
|
// True if the arg will be passed as an implicit byref pointing to a local
|
|
// on this function's frame; otherwise false.
|
|
//
|
|
// Remarks:
|
|
// The logic here runs before relevant nodes have been morphed.
|
|
//
|
|
bool Compiler::fgCallArgWillPointIntoLocalFrame(GenTreeCall* call, CallArg& arg)
|
|
{
|
|
if (!arg.AbiInfo.PassedByRef)
|
|
{
|
|
return false;
|
|
}
|
|
|
|
// If we're optimizing, we may be able to pass our caller's byref to our callee,
|
|
// and so still be able to avoid a struct copy.
|
|
if (opts.OptimizationDisabled())
|
|
{
|
|
return true;
|
|
}
|
|
|
|
// First, see if this arg is an implicit byref param.
|
|
GenTreeLclVarCommon* const lcl = arg.GetNode()->IsImplicitByrefParameterValuePreMorph(this);
|
|
|
|
if (lcl == nullptr)
|
|
{
|
|
return true;
|
|
}
|
|
|
|
// Yes, the arg is an implicit byref param.
|
|
const unsigned lclNum = lcl->GetLclNum();
|
|
LclVarDsc* const varDsc = lvaGetDesc(lcl);
|
|
|
|
// The param must not be promoted; if we've promoted, then the arg will be
|
|
// a local struct assembled from the promoted fields.
|
|
if (varDsc->lvPromoted)
|
|
{
|
|
JITDUMP("Arg [%06u] is promoted implicit byref V%02u, so no tail call\n", dspTreeID(arg.GetNode()), lclNum);
|
|
|
|
return true;
|
|
}
|
|
|
|
assert(!varDsc->lvIsStructField);
|
|
|
|
JITDUMP("Arg [%06u] is unpromoted implicit byref V%02u, seeing if we can still tail call\n",
|
|
dspTreeID(arg.GetNode()), lclNum);
|
|
|
|
GenTreeFlags deathFlags;
|
|
if (varDsc->lvFieldLclStart != 0)
|
|
{
|
|
// Undone promotion case.
|
|
deathFlags = lvaGetDesc(varDsc->lvFieldLclStart)->AllFieldDeathFlags();
|
|
}
|
|
else
|
|
{
|
|
deathFlags = GTF_VAR_DEATH;
|
|
}
|
|
|
|
if ((lcl->gtFlags & deathFlags) == deathFlags)
|
|
{
|
|
JITDUMP("... yes, arg is a last use\n");
|
|
return false;
|
|
}
|
|
|
|
JITDUMP("... no, arg is not a last use\n");
|
|
return true;
|
|
}
|
|
|
|
#endif
|
|
|
|
//------------------------------------------------------------------------
|
|
// fgMorphPotentialTailCall: Attempt to morph a call that the importer has
|
|
// identified as a potential tailcall to an actual tailcall and return the
|
|
// placeholder node to use in this case.
|
|
//
|
|
// Arguments:
|
|
// call - The call to morph.
|
|
//
|
|
// Return Value:
|
|
// Returns a node to use if the call was morphed into a tailcall. If this
|
|
// function returns a node the call is done being morphed and the new node
|
|
// should be used. Otherwise the call will have been demoted to a regular call
|
|
// and should go through normal morph.
|
|
//
|
|
// Notes:
|
|
// This is called only for calls that the importer has already identified as
|
|
// potential tailcalls. It will do profitability and legality checks and
|
|
// classify which kind of tailcall we are able to (or should) do, along with
|
|
// modifying the trees to perform that kind of tailcall.
|
|
//
|
|
GenTree* Compiler::fgMorphPotentialTailCall(GenTreeCall* call)
|
|
{
|
|
// It should either be an explicit (i.e. tail prefixed) or an implicit tail call
|
|
assert(call->IsTailPrefixedCall() ^ call->IsImplicitTailCall());
|
|
|
|
// It cannot be an inline candidate
|
|
assert(!call->IsInlineCandidate());
|
|
|
|
auto failTailCall = [&](const char* reason, unsigned lclNum = BAD_VAR_NUM) {
|
|
#ifdef DEBUG
|
|
if (verbose)
|
|
{
|
|
printf("\nRejecting tail call in morph for call ");
|
|
printTreeID(call);
|
|
printf(": %s", reason);
|
|
if (lclNum != BAD_VAR_NUM)
|
|
{
|
|
printf(" V%02u", lclNum);
|
|
}
|
|
printf("\n");
|
|
}
|
|
#endif
|
|
|
|
// for non user funcs, we have no handles to report
|
|
info.compCompHnd->reportTailCallDecision(nullptr,
|
|
(call->gtCallType == CT_USER_FUNC) ? call->gtCallMethHnd : nullptr,
|
|
call->IsTailPrefixedCall(), TAILCALL_FAIL, reason);
|
|
|
|
// We have checked the candidate so demote.
|
|
call->gtCallMoreFlags &= ~GTF_CALL_M_EXPLICIT_TAILCALL;
|
|
#if FEATURE_TAILCALL_OPT
|
|
call->gtCallMoreFlags &= ~GTF_CALL_M_IMPLICIT_TAILCALL;
|
|
#endif
|
|
};
|
|
|
|
if (call->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC)
|
|
{
|
|
failTailCall("Might turn into an intrinsic");
|
|
return nullptr;
|
|
}
|
|
|
|
#ifdef TARGET_ARM
|
|
if (call->gtCallMoreFlags & GTF_CALL_M_WRAPPER_DELEGATE_INV)
|
|
{
|
|
failTailCall("Non-standard calling convention");
|
|
return nullptr;
|
|
}
|
|
#endif
|
|
|
|
if (call->IsNoReturn() && !call->IsTailPrefixedCall())
|
|
{
|
|
// Such tail calls always throw an exception and we won't be able to see current
|
|
// Caller() in the stacktrace.
|
|
failTailCall("Never returns");
|
|
return nullptr;
|
|
}
|
|
|
|
#ifdef DEBUG
|
|
if (opts.compGcChecks && (info.compRetType == TYP_REF))
|
|
{
|
|
failTailCall("DOTNET_JitGCChecks or stress might have interposed a call to CORINFO_HELP_CHECK_OBJ, "
|
|
"invalidating tailcall opportunity");
|
|
return nullptr;
|
|
}
|
|
#endif
|
|
|
|
// We have to ensure to pass the incoming retValBuf as the
|
|
// outgoing one. Using a temp will not do as this function will
|
|
// not regain control to do the copy. This can happen when inlining
|
|
// a tailcall which also has a potential tailcall in it: the IL looks
|
|
// like we can do a tailcall, but the trees generated use a temp for the inlinee's
|
|
// result. TODO-CQ: Fix this.
|
|
if (info.compRetBuffArg != BAD_VAR_NUM)
|
|
{
|
|
noway_assert(call->TypeGet() == TYP_VOID);
|
|
noway_assert(call->gtArgs.HasRetBuffer());
|
|
GenTree* retValBuf = call->gtArgs.GetRetBufferArg()->GetNode();
|
|
if (retValBuf->gtOper != GT_LCL_VAR || retValBuf->AsLclVarCommon()->GetLclNum() != info.compRetBuffArg)
|
|
{
|
|
failTailCall("Need to copy return buffer");
|
|
return nullptr;
|
|
}
|
|
}
|
|
|
|
// We are still not sure whether it can be a tail call. Because, when converting
|
|
// a call to an implicit tail call, we must check that there are no locals with
|
|
// their address taken. If this is the case, we have to assume that the address
|
|
// has been leaked and the current stack frame must live until after the final
|
|
// call.
|
|
|
|
// Verify that none of vars has lvHasLdAddrOp or IsAddressExposed() bit set. Note
|
|
// that lvHasLdAddrOp is much more conservative. We cannot just base it on
|
|
// IsAddressExposed() alone since it is not guaranteed to be set on all VarDscs
|
|
// during morph stage. The reason for also checking IsAddressExposed() is that in case
|
|
// of vararg methods user args are marked as addr exposed but not lvHasLdAddrOp.
|
|
// The combination of lvHasLdAddrOp and IsAddressExposed() though conservative allows us
|
|
// never to be incorrect.
|
|
//
|
|
// TODO-Throughput: have a compiler level flag to indicate whether method has vars whose
|
|
// address is taken. Such a flag could be set whenever lvHasLdAddrOp or IsAddressExposed()
|
|
// is set. This avoids the need for iterating through all lcl vars of the current
|
|
// method. Right now throughout the code base we are not consistently using 'set'
|
|
// method to set lvHasLdAddrOp and IsAddressExposed() flags.
|
|
|
|
bool isImplicitOrStressTailCall = call->IsImplicitTailCall() || call->IsStressTailCall();
|
|
if (isImplicitOrStressTailCall && compLocallocUsed)
|
|
{
|
|
failTailCall("Localloc used");
|
|
return nullptr;
|
|
}
|
|
|
|
#ifdef DEBUG
|
|
// For explicit tailcalls the importer will avoid inserting stress
|
|
// poisoning after them. However, implicit tailcalls are marked earlier and
|
|
// we must filter those out here if we ended up adding any poisoning IR
|
|
// after them.
|
|
if (isImplicitOrStressTailCall && compPoisoningAnyImplicitByrefs)
|
|
{
|
|
failTailCall("STRESS_POISON_IMPLICIT_BYREFS has introduced IR after tailcall opportunity, invalidating");
|
|
return nullptr;
|
|
}
|
|
#endif
|
|
|
|
bool hasStructParam = false;
|
|
for (unsigned varNum = 0; varNum < lvaCount; varNum++)
|
|
{
|
|
LclVarDsc* varDsc = lvaGetDesc(varNum);
|
|
|
|
// If the method is marked as an explicit tail call we will skip the
|
|
// following three hazard checks.
|
|
// We still must check for any struct parameters and set 'hasStructParam'
|
|
// so that we won't transform the recursive tail call into a loop.
|
|
//
|
|
if (isImplicitOrStressTailCall)
|
|
{
|
|
if (varDsc->lvHasLdAddrOp && !lvaIsImplicitByRefLocal(varNum))
|
|
{
|
|
failTailCall("Local address taken", varNum);
|
|
return nullptr;
|
|
}
|
|
if (varDsc->IsAddressExposed())
|
|
{
|
|
if (lvaIsImplicitByRefLocal(varNum))
|
|
{
|
|
// The address of the implicit-byref is a non-address use of the pointer parameter.
|
|
}
|
|
else if (varDsc->lvIsStructField && lvaIsImplicitByRefLocal(varDsc->lvParentLcl))
|
|
{
|
|
// The address of the implicit-byref's field is likewise a non-address use of the pointer
|
|
// parameter.
|
|
}
|
|
else if (varDsc->lvPromoted && (lvaTable[varDsc->lvFieldLclStart].lvParentLcl != varNum))
|
|
{
|
|
// This temp was used for struct promotion bookkeeping. It will not be used, and will have
|
|
// its ref count and address-taken flag reset in fgMarkDemotedImplicitByRefArgs.
|
|
assert(lvaIsImplicitByRefLocal(lvaTable[varDsc->lvFieldLclStart].lvParentLcl));
|
|
assert(fgGlobalMorph);
|
|
}
|
|
#if FEATURE_FIXED_OUT_ARGS
|
|
else if (varNum == lvaOutgoingArgSpaceVar)
|
|
{
|
|
// The outgoing arg space is exposed only at callees, which is ok for our purposes.
|
|
}
|
|
#endif
|
|
else
|
|
{
|
|
failTailCall("Local address taken", varNum);
|
|
return nullptr;
|
|
}
|
|
}
|
|
if (varDsc->lvPinned)
|
|
{
|
|
// A tail call removes the method from the stack, which means the pinning
|
|
// goes away for the callee. We can't allow that.
|
|
failTailCall("Has Pinned Vars", varNum);
|
|
return nullptr;
|
|
}
|
|
}
|
|
|
|
if (varTypeIsStruct(varDsc->TypeGet()) && varDsc->lvIsParam)
|
|
{
|
|
hasStructParam = true;
|
|
// This prevents transforming a recursive tail call into a loop
|
|
// but doesn't prevent tail call optimization so we need to
|
|
// look at the rest of parameters.
|
|
}
|
|
}
|
|
|
|
const char* failReason = nullptr;
|
|
bool canFastTailCall = fgCanFastTailCall(call, &failReason);
|
|
|
|
CORINFO_TAILCALL_HELPERS tailCallHelpers;
|
|
bool tailCallViaJitHelper = false;
|
|
if (!canFastTailCall)
|
|
{
|
|
if (call->IsImplicitTailCall())
|
|
{
|
|
// Implicit or opportunistic tail calls are always dispatched via fast tail call
|
|
// mechanism and never via tail call helper for perf.
|
|
failTailCall(failReason);
|
|
return nullptr;
|
|
}
|
|
|
|
assert(call->IsTailPrefixedCall());
|
|
assert(call->tailCallInfo != nullptr);
|
|
|
|
// We do not currently handle non-standard args except for VSD stubs.
|
|
if (!call->IsVirtualStub() && call->HasNonStandardAddedArgs(this))
|
|
{
|
|
failTailCall(
|
|
"Method with non-standard args passed in callee trash register cannot be tail called via helper");
|
|
return nullptr;
|
|
}
|
|
|
|
// On x86 we have a faster mechanism than the general one which we use
|
|
// in almost all cases. See fgCanTailCallViaJitHelper for more information.
|
|
if (fgCanTailCallViaJitHelper(call))
|
|
{
|
|
tailCallViaJitHelper = true;
|
|
}
|
|
else
|
|
{
|
|
// Make sure we can get the helpers. We do this last as the runtime
|
|
// will likely be required to generate these.
|
|
CORINFO_RESOLVED_TOKEN* token = nullptr;
|
|
CORINFO_SIG_INFO* sig = call->tailCallInfo->GetSig();
|
|
unsigned flags = 0;
|
|
if (!call->tailCallInfo->IsCalli())
|
|
{
|
|
token = call->tailCallInfo->GetToken();
|
|
if (call->tailCallInfo->IsCallvirt())
|
|
{
|
|
flags |= CORINFO_TAILCALL_IS_CALLVIRT;
|
|
}
|
|
}
|
|
|
|
if (call->gtArgs.HasThisPointer())
|
|
{
|
|
var_types thisArgType = call->gtArgs.GetThisArg()->GetNode()->TypeGet();
|
|
if (thisArgType != TYP_REF)
|
|
{
|
|
flags |= CORINFO_TAILCALL_THIS_ARG_IS_BYREF;
|
|
}
|
|
}
|
|
|
|
if (!info.compCompHnd->getTailCallHelpers(token, sig, (CORINFO_GET_TAILCALL_HELPERS_FLAGS)flags,
|
|
&tailCallHelpers))
|
|
{
|
|
failTailCall("Tail call help not available");
|
|
return nullptr;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Check if we can make the tailcall a loop.
|
|
bool fastTailCallToLoop = false;
|
|
#if FEATURE_TAILCALL_OPT
|
|
// TODO-CQ: enable the transformation when the method has a struct parameter that can be passed in a register
|
|
// or return type is a struct that can be passed in a register.
|
|
//
|
|
// TODO-CQ: if the method being compiled requires generic context reported in gc-info (either through
|
|
// hidden generic context param or through keep alive thisptr), then while transforming a recursive
|
|
// call to such a method requires that the generic context stored on stack slot be updated. Right now,
|
|
// fgMorphRecursiveFastTailCallIntoLoop() is not handling update of generic context while transforming
|
|
// a recursive call into a loop. Another option is to modify gtIsRecursiveCall() to check that the
|
|
// generic type parameters of both caller and callee generic method are the same.
|
|
if (opts.compTailCallLoopOpt && canFastTailCall && gtIsRecursiveCall(call) && !lvaReportParamTypeArg() &&
|
|
!lvaKeepAliveAndReportThis() && !call->IsVirtual() && !hasStructParam && !varTypeIsStruct(call->TypeGet()))
|
|
{
|
|
fastTailCallToLoop = true;
|
|
}
|
|
#endif
|
|
|
|
// Ok -- now we are committed to performing a tailcall. Report the decision.
|
|
CorInfoTailCall tailCallResult;
|
|
if (fastTailCallToLoop)
|
|
{
|
|
tailCallResult = TAILCALL_RECURSIVE;
|
|
}
|
|
else if (canFastTailCall)
|
|
{
|
|
tailCallResult = TAILCALL_OPTIMIZED;
|
|
}
|
|
else
|
|
{
|
|
tailCallResult = TAILCALL_HELPER;
|
|
}
|
|
|
|
info.compCompHnd->reportTailCallDecision(nullptr,
|
|
(call->gtCallType == CT_USER_FUNC) ? call->gtCallMethHnd : nullptr,
|
|
call->IsTailPrefixedCall(), tailCallResult, nullptr);
|
|
|
|
// Do some profitability checks for whether we should expand a vtable call
|
|
// target early. Note that we may already have expanded it due to GDV at
|
|
// this point, so make sure we do not undo that work.
|
|
//
|
|
if (call->IsExpandedEarly() && call->IsVirtualVtable() && (call->gtControlExpr == nullptr))
|
|
{
|
|
assert(call->gtArgs.HasThisPointer());
|
|
// It isn't always profitable to expand a virtual call early
|
|
//
|
|
// We always expand the TAILCALL_HELPER type late.
|
|
// And we exapnd late when we have an optimized tail call
|
|
// and the this pointer needs to be evaluated into a temp.
|
|
//
|
|
if (tailCallResult == TAILCALL_HELPER)
|
|
{
|
|
// We will always expand this late in lower instead.
|
|
// (see LowerTailCallViaJitHelper as it needs some work
|
|
// for us to be able to expand this earlier in morph)
|
|
//
|
|
call->ClearExpandedEarly();
|
|
}
|
|
else if ((tailCallResult == TAILCALL_OPTIMIZED) &&
|
|
((call->gtArgs.GetThisArg()->GetNode()->gtFlags & GTF_SIDE_EFFECT) != 0))
|
|
{
|
|
// We generate better code when we expand this late in lower instead.
|
|
//
|
|
call->ClearExpandedEarly();
|
|
}
|
|
}
|
|
|
|
// Now actually morph the call.
|
|
compTailCallUsed = true;
|
|
// This will prevent inlining this call.
|
|
call->gtCallMoreFlags |= GTF_CALL_M_TAILCALL;
|
|
if (tailCallViaJitHelper)
|
|
{
|
|
call->gtCallMoreFlags |= GTF_CALL_M_TAILCALL_VIA_JIT_HELPER;
|
|
}
|
|
|
|
#if FEATURE_TAILCALL_OPT
|
|
if (fastTailCallToLoop)
|
|
{
|
|
call->gtCallMoreFlags |= GTF_CALL_M_TAILCALL_TO_LOOP;
|
|
}
|
|
#endif
|
|
|
|
// Mark that this is no longer a pending tailcall. We need to do this before
|
|
// we call fgMorphCall again (which happens in the fast tailcall case) to
|
|
// avoid recursing back into this method.
|
|
call->gtCallMoreFlags &= ~GTF_CALL_M_EXPLICIT_TAILCALL;
|
|
#if FEATURE_TAILCALL_OPT
|
|
call->gtCallMoreFlags &= ~GTF_CALL_M_IMPLICIT_TAILCALL;
|
|
#endif
|
|
|
|
#ifdef DEBUG
|
|
if (verbose)
|
|
{
|
|
printf("\nGTF_CALL_M_TAILCALL bit set for call ");
|
|
printTreeID(call);
|
|
printf("\n");
|
|
if (fastTailCallToLoop)
|
|
{
|
|
printf("\nGTF_CALL_M_TAILCALL_TO_LOOP bit set for call ");
|
|
printTreeID(call);
|
|
printf("\n");
|
|
}
|
|
}
|
|
#endif
|
|
|
|
// For R2R we might need a different entry point for this call if we are doing a tailcall.
|
|
// The reason is that the normal delay load helper uses the return address to find the indirection
|
|
// cell in xarch, but now the JIT is expected to leave the indirection cell in REG_R2R_INDIRECT_PARAM:
|
|
// We optimize delegate invocations manually in the JIT so skip this for those.
|
|
if (call->IsR2RRelativeIndir() && canFastTailCall && !fastTailCallToLoop && !call->IsDelegateInvoke())
|
|
{
|
|
info.compCompHnd->updateEntryPointForTailCall(&call->gtEntryPoint);
|
|
|
|
#ifdef TARGET_XARCH
|
|
// We have already computed arg info to make the fast tailcall decision, but on X64 we now
|
|
// have to pass the indirection cell, so redo arg info.
|
|
call->gtArgs.ResetFinalArgsAndABIInfo();
|
|
#endif
|
|
}
|
|
|
|
fgValidateIRForTailCall(call);
|
|
|
|
// If this block has a flow successor, make suitable updates.
|
|
//
|
|
if (compCurBB->KindIs(BBJ_ALWAYS))
|
|
{
|
|
// Flow no longer reaches the target from here.
|
|
//
|
|
fgRemoveRefPred(compCurBB->GetTargetEdge());
|
|
|
|
// Adjust profile weights of the successor blocks.
|
|
//
|
|
// Note if this is a tail call to loop, further updates
|
|
// are needed once we install the loop edge.
|
|
//
|
|
BasicBlock* curBlock = compCurBB;
|
|
if (curBlock->hasProfileWeight())
|
|
{
|
|
weight_t weightLoss = curBlock->bbWeight;
|
|
BasicBlock* nextBlock = curBlock->GetTarget();
|
|
|
|
while (nextBlock->hasProfileWeight())
|
|
{
|
|
// Since we have linear flow we can update the next block weight.
|
|
//
|
|
weight_t const nextWeight = nextBlock->bbWeight;
|
|
weight_t const newNextWeight = nextWeight - weightLoss;
|
|
|
|
// If the math would result in a negative weight then there's
|
|
// no local repair we can do; just leave things inconsistent.
|
|
//
|
|
if (newNextWeight >= 0)
|
|
{
|
|
// Note if we'd already morphed the IR in nextblock we might
|
|
// have done something profile sensitive that we should arguably reconsider.
|
|
//
|
|
JITDUMP("Reducing profile weight of " FMT_BB " from " FMT_WT " to " FMT_WT "\n", nextBlock->bbNum,
|
|
nextWeight, newNextWeight);
|
|
|
|
nextBlock->setBBProfileWeight(newNextWeight);
|
|
}
|
|
else
|
|
{
|
|
JITDUMP("Not reducing profile weight of " FMT_BB " as its weight " FMT_WT
|
|
" is less than direct flow pred " FMT_BB " weight " FMT_WT "\n",
|
|
nextBlock->bbNum, nextWeight, compCurBB->bbNum, weightLoss);
|
|
}
|
|
|
|
if (!nextBlock->KindIs(BBJ_ALWAYS))
|
|
{
|
|
break;
|
|
}
|
|
|
|
curBlock = nextBlock;
|
|
nextBlock = curBlock->GetTarget();
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// No unique successor. compCurBB should be a return.
|
|
//
|
|
assert(compCurBB->KindIs(BBJ_RETURN));
|
|
}
|
|
|
|
#if !FEATURE_TAILCALL_OPT_SHARED_RETURN
|
|
// We enable shared-ret tail call optimization for recursive calls even if
|
|
// FEATURE_TAILCALL_OPT_SHARED_RETURN is not defined.
|
|
if (gtIsRecursiveCall(call))
|
|
#endif
|
|
{
|
|
// Many tailcalls will have call and ret in the same block, and thus be
|
|
// BBJ_RETURN, but if the call falls through to a ret, and we are doing a
|
|
// tailcall, change it here.
|
|
compCurBB->SetKindAndTargetEdge(BBJ_RETURN);
|
|
}
|
|
|
|
GenTree* stmtExpr = fgMorphStmt->GetRootNode();
|
|
|
|
#ifdef DEBUG
|
|
// Tail call needs to be in one of the following IR forms
|
|
// Either a call stmt or
|
|
// GT_RETURN(GT_CALL(..)) or GT_RETURN(GT_CAST(GT_CALL(..)))
|
|
// var = GT_CALL(..) or var = (GT_CAST(GT_CALL(..)))
|
|
// GT_COMMA(GT_CALL(..), GT_NOP) or GT_COMMA(GT_CAST(GT_CALL(..)), GT_NOP)
|
|
// In the above,
|
|
// GT_CASTS may be nested.
|
|
genTreeOps stmtOper = stmtExpr->gtOper;
|
|
if (stmtOper == GT_CALL)
|
|
{
|
|
assert(stmtExpr == call);
|
|
}
|
|
else
|
|
{
|
|
assert(stmtOper == GT_RETURN || stmtOper == GT_STORE_LCL_VAR || stmtOper == GT_COMMA);
|
|
GenTree* treeWithCall;
|
|
if (stmtOper == GT_RETURN)
|
|
{
|
|
treeWithCall = stmtExpr->gtGetOp1();
|
|
}
|
|
else if (stmtOper == GT_COMMA)
|
|
{
|
|
// Second operation must be nop.
|
|
assert(stmtExpr->gtGetOp2()->IsNothingNode());
|
|
treeWithCall = stmtExpr->gtGetOp1();
|
|
}
|
|
else
|
|
{
|
|
treeWithCall = stmtExpr->AsLclVar()->Data();
|
|
}
|
|
|
|
// Peel off casts
|
|
while (treeWithCall->gtOper == GT_CAST)
|
|
{
|
|
assert(!treeWithCall->gtOverflow());
|
|
treeWithCall = treeWithCall->gtGetOp1();
|
|
}
|
|
|
|
assert(treeWithCall == call);
|
|
}
|
|
#endif
|
|
// Store the call type for later to introduce the correct placeholder.
|
|
var_types origCallType = call->TypeGet();
|
|
|
|
GenTree* result;
|
|
if (!canFastTailCall && !tailCallViaJitHelper)
|
|
{
|
|
// For tailcall via CORINFO_TAILCALL_HELPERS we transform into regular
|
|
// calls with (to the JIT) regular control flow so we do not need to do
|
|
// much special handling.
|
|
result = fgMorphTailCallViaHelpers(call, tailCallHelpers);
|
|
}
|
|
else
|
|
{
|
|
// Otherwise we will transform into something that does not return. For
|
|
// fast tailcalls a "jump" and for tailcall via JIT helper a call to a
|
|
// JIT helper that does not return. So peel off everything after the
|
|
// call.
|
|
Statement* nextMorphStmt = fgMorphStmt->GetNextStmt();
|
|
JITDUMP("Remove all stmts after the call.\n");
|
|
while (nextMorphStmt != nullptr)
|
|
{
|
|
Statement* stmtToRemove = nextMorphStmt;
|
|
nextMorphStmt = stmtToRemove->GetNextStmt();
|
|
fgRemoveStmt(compCurBB, stmtToRemove);
|
|
}
|
|
|
|
bool isRootReplaced = false;
|
|
GenTree* root = fgMorphStmt->GetRootNode();
|
|
|
|
if (root != call)
|
|
{
|
|
JITDUMP("Replace root node [%06d] with [%06d] tail call node.\n", dspTreeID(root), dspTreeID(call));
|
|
isRootReplaced = true;
|
|
fgMorphStmt->SetRootNode(call);
|
|
}
|
|
|
|
// Avoid potential extra work for the return (for example, vzeroupper)
|
|
call->gtType = TYP_VOID;
|
|
|
|
// The runtime requires that we perform a null check on the `this` argument before
|
|
// tail calling to a virtual dispatch stub. This requirement is a consequence of limitations
|
|
// in the runtime's ability to map an AV to a NullReferenceException if
|
|
// the AV occurs in a dispatch stub that has unmanaged caller.
|
|
if (call->IsVirtualStub())
|
|
{
|
|
call->gtFlags |= GTF_CALL_NULLCHECK;
|
|
}
|
|
|
|
// Do some target-specific transformations (before we process the args,
|
|
// etc.) for the JIT helper case.
|
|
if (tailCallViaJitHelper)
|
|
{
|
|
fgMorphTailCallViaJitHelper(call);
|
|
|
|
// Force re-evaluating the argInfo. fgMorphTailCallViaJitHelper will modify the
|
|
// argument list, invalidating the argInfo.
|
|
call->gtArgs.ResetFinalArgsAndABIInfo();
|
|
}
|
|
|
|
// Tail call via JIT helper: The VM can't use return address hijacking
|
|
// if we're not going to return and the helper doesn't have enough info
|
|
// to safely poll, so we poll before the tail call, if the block isn't
|
|
// already safe. Since tail call via helper is a slow mechanism it
|
|
// doesn't matter whether we emit GC poll. This is done to be in parity
|
|
// with Jit64. Also this avoids GC info size increase if all most all
|
|
// methods are expected to be tail calls (e.g. F#).
|
|
//
|
|
// Note that we can avoid emitting GC-poll if we know that the current
|
|
// BB is dominated by a Gc-SafePoint block. But we don't have dominator
|
|
// info at this point. One option is to just add a place holder node for
|
|
// GC-poll (e.g. GT_GCPOLL) here and remove it in lowering if the block
|
|
// is dominated by a GC-SafePoint. For now it not clear whether
|
|
// optimizing slow tail calls is worth the effort. As a low cost check,
|
|
// we check whether the first and current basic blocks are
|
|
// GC-SafePoints.
|
|
//
|
|
// Fast Tail call as epilog+jmp - No need to insert GC-poll. Instead,
|
|
// fgSetBlockOrder() is going to mark the method as fully interruptible
|
|
// if the block containing this tail call is reachable without executing
|
|
// any call.
|
|
if (canFastTailCall || fgFirstBB->HasFlag(BBF_GC_SAFE_POINT) || compCurBB->HasFlag(BBF_GC_SAFE_POINT))
|
|
{
|
|
// No gc poll needed
|
|
}
|
|
else
|
|
{
|
|
JITDUMP("Marking " FMT_BB " as needs gc poll\n", compCurBB->bbNum);
|
|
compCurBB->SetFlags(BBF_NEEDS_GCPOLL);
|
|
optMethodFlags |= OMF_NEEDS_GCPOLLS;
|
|
}
|
|
|
|
fgMorphCall(call);
|
|
|
|
// Fast tail call: in case of fast tail calls, we need a jmp epilog and
|
|
// hence mark it as BBJ_RETURN with BBF_JMP flag set.
|
|
noway_assert(compCurBB->KindIs(BBJ_RETURN));
|
|
if (canFastTailCall)
|
|
{
|
|
compCurBB->SetFlags(BBF_HAS_JMP);
|
|
}
|
|
else
|
|
{
|
|
// We call CORINFO_HELP_TAILCALL which does not return, so we will
|
|
// not need epilogue.
|
|
compCurBB->SetKindAndTargetEdge(BBJ_THROW);
|
|
}
|
|
|
|
if (isRootReplaced)
|
|
{
|
|
// We have replaced the root node of this stmt and deleted the rest,
|
|
// but we still have the deleted, dead nodes on the `fgMorph*` stack
|
|
// if the root node was a store, `RET` or `CAST`.
|
|
// Return a zero con node to exit morphing of the old trees without asserts
|
|
// and forbid POST_ORDER morphing doing something wrong with our call.
|
|
var_types zeroType = (origCallType == TYP_STRUCT) ? TYP_INT : genActualType(origCallType);
|
|
result = fgMorphTree(gtNewZeroConNode(zeroType));
|
|
}
|
|
else
|
|
{
|
|
result = call;
|
|
}
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
//------------------------------------------------------------------------
|
|
// fgValidateIRForTailCall:
|
|
// Validate that the IR looks ok to perform a tailcall.
|
|
//
|
|
// Arguments:
|
|
// call - The call that we are dispatching as a tailcall.
|
|
//
|
|
// Notes:
|
|
// This function needs to handle somewhat complex IR that appears after
|
|
// tailcall candidates due to inlining.
|
|
// Does not support checking struct returns since physical promotion can
|
|
// create very hard to validate IR patterns.
|
|
//
|
|
void Compiler::fgValidateIRForTailCall(GenTreeCall* call)
|
|
{
|
|
#ifdef DEBUG
|
|
if (call->TypeIs(TYP_STRUCT))
|
|
{
|
|
// Due to struct fields it can be very hard to track valid return
|
|
// patterns; just give up on validating those.
|
|
return;
|
|
}
|
|
|
|
class TailCallIRValidatorVisitor final : public GenTreeVisitor<TailCallIRValidatorVisitor>
|
|
{
|
|
GenTreeCall* m_tailcall;
|
|
unsigned m_lclNum;
|
|
bool m_active;
|
|
|
|
public:
|
|
enum
|
|
{
|
|
DoPostOrder = true,
|
|
UseExecutionOrder = true,
|
|
};
|
|
|
|
TailCallIRValidatorVisitor(Compiler* comp, GenTreeCall* tailcall)
|
|
: GenTreeVisitor(comp)
|
|
, m_tailcall(tailcall)
|
|
, m_lclNum(BAD_VAR_NUM)
|
|
, m_active(false)
|
|
{
|
|
}
|
|
|
|
fgWalkResult PostOrderVisit(GenTree** use, GenTree* user)
|
|
{
|
|
GenTree* tree = *use;
|
|
|
|
// Wait until we get to the actual call...
|
|
if (!m_active)
|
|
{
|
|
if (tree == m_tailcall)
|
|
{
|
|
m_active = true;
|
|
}
|
|
|
|
return WALK_CONTINUE;
|
|
}
|
|
|
|
if (tree->OperIs(GT_RETURN))
|
|
{
|
|
assert((tree->TypeIs(TYP_VOID) || ValidateUse(tree->gtGetOp1())) &&
|
|
"Expected return to be result of tailcall");
|
|
return WALK_ABORT;
|
|
}
|
|
|
|
if (tree->OperIs(GT_NOP))
|
|
{
|
|
// GT_NOP might appear due to stores that end up as
|
|
// self-stores, which get morphed to GT_NOP.
|
|
}
|
|
// We might see arbitrary chains of stores that trivially
|
|
// propagate the result. Example:
|
|
//
|
|
// * STORE_LCL_VAR ref V05 tmp5
|
|
// \--* CALL ref CultureInfo.InitializeUserDefaultUICulture
|
|
// (in a new statement/BB)
|
|
// * STORE_LCL_VAR ref V02 tmp2
|
|
// \--* LCL_VAR ref V05 tmp5
|
|
// (in a new statement/BB)
|
|
// * RETURN ref
|
|
// \--* LCL_VAR ref V02 tmp2
|
|
//
|
|
else if (tree->OperIs(GT_STORE_LCL_VAR))
|
|
{
|
|
assert(ValidateUse(tree->AsLclVar()->Data()) && "Expected value of store to be result of tailcall");
|
|
m_lclNum = tree->AsLclVar()->GetLclNum();
|
|
}
|
|
else if (tree->OperIs(GT_LCL_VAR))
|
|
{
|
|
assert(ValidateUse(tree) && "Expected use of local to be tailcall value");
|
|
}
|
|
else
|
|
{
|
|
DISPTREE(tree);
|
|
assert(!"Unexpected tree op after call marked as tailcall");
|
|
}
|
|
|
|
return WALK_CONTINUE;
|
|
}
|
|
|
|
bool ValidateUse(GenTree* node)
|
|
{
|
|
if (m_lclNum != BAD_VAR_NUM)
|
|
{
|
|
return node->OperIs(GT_LCL_VAR) && (node->AsLclVar()->GetLclNum() == m_lclNum);
|
|
}
|
|
|
|
if (node == m_tailcall)
|
|
{
|
|
return true;
|
|
}
|
|
|
|
// If we do not use the call value directly we might have passed
|
|
// this function's ret buffer arg, so verify that is being used.
|
|
CallArg* retBufferArg = m_tailcall->gtArgs.GetRetBufferArg();
|
|
if (retBufferArg != nullptr)
|
|
{
|
|
GenTree* retBufferNode = retBufferArg->GetNode();
|
|
return retBufferNode->OperIs(GT_LCL_VAR) &&
|
|
(retBufferNode->AsLclVar()->GetLclNum() == m_compiler->info.compRetBuffArg) &&
|
|
node->OperIs(GT_LCL_VAR) && (node->AsLclVar()->GetLclNum() == m_compiler->info.compRetBuffArg);
|
|
}
|
|
|
|
return false;
|
|
}
|
|
};
|
|
|
|
TailCallIRValidatorVisitor visitor(this, call);
|
|
for (Statement* stmt = compCurStmt; stmt != nullptr; stmt = stmt->GetNextStmt())
|
|
{
|
|
visitor.WalkTree(stmt->GetRootNodePointer(), nullptr);
|
|
}
|
|
|
|
BasicBlock* bb = compCurBB;
|
|
while (!bb->KindIs(BBJ_RETURN))
|
|
{
|
|
bb = bb->GetUniqueSucc();
|
|
assert((bb != nullptr) && "Expected straight flow after tailcall");
|
|
|
|
for (Statement* stmt : bb->Statements())
|
|
{
|
|
visitor.WalkTree(stmt->GetRootNodePointer(), nullptr);
|
|
}
|
|
}
|
|
#endif
|
|
}
|
|
|
|
//------------------------------------------------------------------------
|
|
// fgMorphTailCallViaHelpers: Transform the given GT_CALL tree for tailcall code
|
|
// generation.
|
|
//
|
|
// Arguments:
|
|
// call - The call to transform
|
|
// helpers - The tailcall helpers provided by the runtime.
|
|
//
|
|
// Return Value:
|
|
// Returns the transformed node.
|
|
//
|
|
// Notes:
|
|
// This transforms
|
|
// GT_CALL
|
|
// {callTarget}
|
|
// {this}
|
|
// {args}
|
|
// into
|
|
// GT_COMMA
|
|
// GT_CALL StoreArgsStub
|
|
// {callTarget} (depending on flags provided by the runtime)
|
|
// {this} (as a regular arg)
|
|
// {args}
|
|
// GT_COMMA
|
|
// GT_CALL Dispatcher
|
|
// GT_LCL_ADDR ReturnAddress
|
|
// {CallTargetStub}
|
|
// GT_LCL_ADDR ReturnValue
|
|
// GT_LCL ReturnValue
|
|
// whenever the call node returns a value. If the call node does not return a
|
|
// value the last comma will not be there.
|
|
//
|
|
GenTree* Compiler::fgMorphTailCallViaHelpers(GenTreeCall* call, CORINFO_TAILCALL_HELPERS& help)
|
|
{
|
|
// R2R requires different handling but we don't support tailcall via
|
|
// helpers in R2R yet, so just leave it for now.
|
|
// TODO: R2R: TailCallViaHelper
|
|
assert(!opts.IsReadyToRun());
|
|
|
|
JITDUMP("fgMorphTailCallViaHelpers (before):\n");
|
|
DISPTREE(call);
|
|
|
|
// Don't support tail calling helper methods
|
|
assert(!call->IsHelperCall());
|
|
|
|
// We come this route only for tail prefixed calls that cannot be dispatched as
|
|
// fast tail calls
|
|
assert(!call->IsImplicitTailCall());
|
|
|
|
// We want to use the following assert, but it can modify the IR in some cases, so we
|
|
// can't do that in an assert.
|
|
// assert(!fgCanFastTailCall(call, nullptr));
|
|
|
|
// We might or might not have called AddFinalArgsAndDetermineABIInfo before
|
|
// this point: in builds with FEATURE_FASTTAILCALL we will have called it
|
|
// when checking if we could do a fast tailcall, so it is possible we have
|
|
// added extra IR for non-standard args that we must get rid of. Get rid of
|
|
// the extra arguments here.
|
|
call->gtArgs.ResetFinalArgsAndABIInfo();
|
|
|
|
GenTree* callDispatcherAndGetResult = fgCreateCallDispatcherAndGetResult(call, help.hCallTarget, help.hDispatcher);
|
|
|
|
// Change the call to a call to the StoreArgs stub.
|
|
if (call->gtArgs.HasRetBuffer())
|
|
{
|
|
JITDUMP("Removing retbuf");
|
|
|
|
call->gtArgs.Remove(call->gtArgs.GetRetBufferArg());
|
|
call->gtCallMoreFlags &= ~GTF_CALL_M_RETBUFFARG;
|
|
}
|
|
|
|
const bool stubNeedsTargetFnPtr = (help.flags & CORINFO_TAILCALL_STORE_TARGET) != 0;
|
|
|
|
GenTree* doBeforeStoreArgsStub = nullptr;
|
|
GenTree* thisPtrStubArg = nullptr;
|
|
|
|
// Put 'this' in normal param list
|
|
if (call->gtArgs.HasThisPointer())
|
|
{
|
|
JITDUMP("Moving this pointer into arg list\n");
|
|
CallArg* thisArg = call->gtArgs.GetThisArg();
|
|
GenTree* objp = thisArg->GetNode();
|
|
GenTree* thisPtr = nullptr;
|
|
|
|
// JIT will need one or two copies of "this" in the following cases:
|
|
// 1) the call needs null check;
|
|
// 2) StoreArgs stub needs the target function pointer address and if the call is virtual
|
|
// the stub also needs "this" in order to evaluate the target.
|
|
|
|
const bool callNeedsNullCheck = call->NeedsNullCheck();
|
|
const bool stubNeedsThisPtr = stubNeedsTargetFnPtr && call->IsVirtual();
|
|
|
|
if (callNeedsNullCheck || stubNeedsThisPtr)
|
|
{
|
|
// Clone "this" if "this" has no side effects.
|
|
if ((objp->gtFlags & GTF_SIDE_EFFECT) == 0)
|
|
{
|
|
thisPtr = gtClone(objp, true);
|
|
}
|
|
|
|
// Create a temp and spill "this" to the temp if "this" has side effects or "this" was too complex to clone.
|
|
if (thisPtr == nullptr)
|
|
{
|
|
const unsigned lclNum = lvaGrabTemp(true DEBUGARG("tail call thisptr"));
|
|
|
|
// tmp = "this"
|
|
doBeforeStoreArgsStub = gtNewTempStore(lclNum, objp);
|
|
|
|
if (callNeedsNullCheck)
|
|
{
|
|
// COMMA(tmp = "this", deref(tmp))
|
|
GenTree* tmp = gtNewLclvNode(lclNum, objp->TypeGet());
|
|
GenTree* nullcheck = gtNewNullCheck(tmp, compCurBB);
|
|
doBeforeStoreArgsStub = gtNewOperNode(GT_COMMA, TYP_VOID, doBeforeStoreArgsStub, nullcheck);
|
|
}
|
|
|
|
thisPtr = gtNewLclvNode(lclNum, objp->TypeGet());
|
|
|
|
if (stubNeedsThisPtr)
|
|
{
|
|
thisPtrStubArg = gtNewLclvNode(lclNum, objp->TypeGet());
|
|
}
|
|
}
|
|
else
|
|
{
|
|
if (callNeedsNullCheck)
|
|
{
|
|
// deref("this")
|
|
doBeforeStoreArgsStub = gtNewNullCheck(objp, compCurBB);
|
|
|
|
if (stubNeedsThisPtr)
|
|
{
|
|
thisPtrStubArg = gtClone(objp, true);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
assert(stubNeedsThisPtr);
|
|
|
|
thisPtrStubArg = objp;
|
|
}
|
|
}
|
|
|
|
call->gtFlags &= ~GTF_CALL_NULLCHECK;
|
|
|
|
assert((thisPtrStubArg != nullptr) == stubNeedsThisPtr);
|
|
}
|
|
else
|
|
{
|
|
thisPtr = objp;
|
|
}
|
|
|
|
// During rationalization tmp="this" and null check will be materialized
|
|
// in the right execution order.
|
|
call->gtArgs.PushFront(this, NewCallArg::Primitive(thisPtr, thisArg->GetSignatureType()));
|
|
call->gtArgs.Remove(thisArg);
|
|
}
|
|
|
|
// We may need to pass the target, for instance for calli or generic methods
|
|
// where we pass instantiating stub.
|
|
if (stubNeedsTargetFnPtr)
|
|
{
|
|
JITDUMP("Adding target since VM requested it\n");
|
|
GenTree* target;
|
|
if (!call->IsVirtual())
|
|
{
|
|
if (call->gtCallType == CT_INDIRECT)
|
|
{
|
|
noway_assert(call->gtCallAddr != nullptr);
|
|
target = call->gtCallAddr;
|
|
}
|
|
else
|
|
{
|
|
CORINFO_CONST_LOOKUP addrInfo;
|
|
info.compCompHnd->getFunctionEntryPoint(call->gtCallMethHnd, &addrInfo);
|
|
|
|
CORINFO_GENERIC_HANDLE handle = nullptr;
|
|
void* pIndirection = nullptr;
|
|
assert(addrInfo.accessType != IAT_PPVALUE && addrInfo.accessType != IAT_RELPVALUE);
|
|
|
|
if (addrInfo.accessType == IAT_VALUE)
|
|
{
|
|
handle = addrInfo.handle;
|
|
}
|
|
else if (addrInfo.accessType == IAT_PVALUE)
|
|
{
|
|
pIndirection = addrInfo.addr;
|
|
}
|
|
target = gtNewIconEmbHndNode(handle, pIndirection, GTF_ICON_FTN_ADDR, call->gtCallMethHnd);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
assert(!call->tailCallInfo->GetSig()->hasTypeArg());
|
|
|
|
CORINFO_CALL_INFO callInfo;
|
|
unsigned flags = CORINFO_CALLINFO_LDFTN;
|
|
if (call->tailCallInfo->IsCallvirt())
|
|
{
|
|
flags |= CORINFO_CALLINFO_CALLVIRT;
|
|
}
|
|
|
|
eeGetCallInfo(call->tailCallInfo->GetToken(), nullptr, (CORINFO_CALLINFO_FLAGS)flags, &callInfo);
|
|
target = getVirtMethodPointerTree(thisPtrStubArg, call->tailCallInfo->GetToken(), &callInfo);
|
|
}
|
|
|
|
call->gtArgs.PushBack(this, NewCallArg::Primitive(target));
|
|
}
|
|
|
|
// This is now a direct call to the store args stub and not a tailcall.
|
|
call->gtCallType = CT_USER_FUNC;
|
|
call->gtCallMethHnd = help.hStoreArgs;
|
|
call->gtFlags &= ~GTF_CALL_VIRT_KIND_MASK;
|
|
call->gtCallMoreFlags &= ~(GTF_CALL_M_TAILCALL | GTF_CALL_M_DELEGATE_INV | GTF_CALL_M_WRAPPER_DELEGATE_INV);
|
|
|
|
// The store-args stub returns no value.
|
|
call->gtRetClsHnd = nullptr;
|
|
call->gtType = TYP_VOID;
|
|
call->gtReturnType = TYP_VOID;
|
|
|
|
GenTree* callStoreArgsStub = call;
|
|
|
|
if (doBeforeStoreArgsStub != nullptr)
|
|
{
|
|
callStoreArgsStub = gtNewOperNode(GT_COMMA, TYP_VOID, doBeforeStoreArgsStub, callStoreArgsStub);
|
|
}
|
|
|
|
GenTree* finalTree =
|
|
gtNewOperNode(GT_COMMA, callDispatcherAndGetResult->TypeGet(), callStoreArgsStub, callDispatcherAndGetResult);
|
|
|
|
finalTree = fgMorphTree(finalTree);
|
|
|
|
JITDUMP("fgMorphTailCallViaHelpers (after):\n");
|
|
DISPTREE(finalTree);
|
|
return finalTree;
|
|
}
|
|
|
|
//------------------------------------------------------------------------
|
|
// fgCreateCallDispatcherAndGetResult: Given a call
|
|
// CALL
|
|
// {callTarget}
|
|
// {retbuf}
|
|
// {this}
|
|
// {args}
|
|
// create a similarly typed node that calls the tailcall dispatcher and returns
|
|
// the result, as in the following:
|
|
// COMMA
|
|
// CALL TailCallDispatcher
|
|
// ADDR ReturnAddress
|
|
// &CallTargetFunc
|
|
// ADDR RetValue
|
|
// RetValue
|
|
// If the call has type TYP_VOID, only create the CALL node.
|
|
//
|
|
// Arguments:
|
|
// origCall - the call
|
|
// callTargetStubHnd - the handle of the CallTarget function (this is a special
|
|
// IL stub created by the runtime)
|
|
// dispatcherHnd - the handle of the tailcall dispatcher function
|
|
//
|
|
// Return Value:
|
|
// A node that can be used in place of the original call.
|
|
//
|
|
GenTree* Compiler::fgCreateCallDispatcherAndGetResult(GenTreeCall* origCall,
|
|
CORINFO_METHOD_HANDLE callTargetStubHnd,
|
|
CORINFO_METHOD_HANDLE dispatcherHnd)
|
|
{
|
|
GenTreeCall* callDispatcherNode = gtNewCallNode(CT_USER_FUNC, dispatcherHnd, TYP_VOID, fgMorphStmt->GetDebugInfo());
|
|
// The dispatcher has signature
|
|
// void DispatchTailCalls(void* callersRetAddrSlot, void* callTarget, ref byte retValue)
|
|
|
|
// Add return value arg.
|
|
GenTree* retValArg;
|
|
GenTree* retVal = nullptr;
|
|
unsigned int newRetLcl = BAD_VAR_NUM;
|
|
|
|
if (origCall->gtArgs.HasRetBuffer())
|
|
{
|
|
JITDUMP("Transferring retbuf\n");
|
|
GenTree* retBufArg = origCall->gtArgs.GetRetBufferArg()->GetNode();
|
|
|
|
assert(info.compRetBuffArg != BAD_VAR_NUM);
|
|
assert(retBufArg->OperIsLocal());
|
|
assert(retBufArg->AsLclVarCommon()->GetLclNum() == info.compRetBuffArg);
|
|
|
|
retValArg = retBufArg;
|
|
|
|
if (origCall->gtType != TYP_VOID)
|
|
{
|
|
retVal = gtClone(retBufArg);
|
|
}
|
|
}
|
|
else if (origCall->gtType != TYP_VOID)
|
|
{
|
|
JITDUMP("Creating a new temp for the return value\n");
|
|
newRetLcl = lvaGrabTemp(false DEBUGARG("Return value for tail call dispatcher"));
|
|
if (varTypeIsStruct(origCall->gtType))
|
|
{
|
|
lvaSetStruct(newRetLcl, origCall->gtRetClsHnd, false);
|
|
}
|
|
else
|
|
{
|
|
// Since we pass a reference to the return value to the dispatcher
|
|
// we need to use the real return type so we can normalize it on
|
|
// load when we return it.
|
|
lvaTable[newRetLcl].lvType = (var_types)origCall->gtReturnType;
|
|
}
|
|
|
|
lvaSetVarAddrExposed(newRetLcl DEBUGARG(AddressExposedReason::DISPATCH_RET_BUF));
|
|
|
|
if (varTypeIsStruct(origCall) && compMethodReturnsMultiRegRetType())
|
|
{
|
|
lvaGetDesc(newRetLcl)->lvIsMultiRegRet = true;
|
|
}
|
|
|
|
retValArg = gtNewLclVarAddrNode(newRetLcl);
|
|
retVal = gtNewLclvNode(newRetLcl, genActualType(lvaTable[newRetLcl].lvType));
|
|
}
|
|
else
|
|
{
|
|
JITDUMP("No return value so using null pointer as arg\n");
|
|
retValArg = gtNewZeroConNode(TYP_I_IMPL);
|
|
}
|
|
|
|
// Args are (void** callersReturnAddressSlot, void* callTarget, ref byte retVal)
|
|
GenTree* callTarget = new (this, GT_FTN_ADDR) GenTreeFptrVal(TYP_I_IMPL, callTargetStubHnd);
|
|
|
|
// Add the caller's return address slot.
|
|
if (lvaRetAddrVar == BAD_VAR_NUM)
|
|
{
|
|
lvaRetAddrVar = lvaGrabTemp(false DEBUGARG("Return address"));
|
|
lvaTable[lvaRetAddrVar].lvType = TYP_I_IMPL;
|
|
lvaSetVarAddrExposed(lvaRetAddrVar DEBUGARG(AddressExposedReason::DISPATCH_RET_BUF));
|
|
}
|
|
|
|
GenTree* retAddrSlot = gtNewLclVarAddrNode(lvaRetAddrVar);
|
|
|
|
NewCallArg retAddrSlotArg = NewCallArg::Primitive(retAddrSlot);
|
|
NewCallArg callTargetArg = NewCallArg::Primitive(callTarget);
|
|
NewCallArg retValCallArg = NewCallArg::Primitive(retValArg);
|
|
callDispatcherNode->gtArgs.PushFront(this, retAddrSlotArg, callTargetArg, retValCallArg);
|
|
|
|
if (origCall->gtType == TYP_VOID)
|
|
{
|
|
return callDispatcherNode;
|
|
}
|
|
|
|
assert(retVal != nullptr);
|
|
GenTree* comma = gtNewOperNode(GT_COMMA, origCall->TypeGet(), callDispatcherNode, retVal);
|
|
|
|
// The JIT seems to want to CSE this comma and messes up multi-reg ret
|
|
// values in the process. Just avoid CSE'ing this tree entirely in that
|
|
// case.
|
|
if (origCall->HasMultiRegRetVal())
|
|
{
|
|
comma->gtFlags |= GTF_DONT_CSE;
|
|
}
|
|
|
|
return comma;
|
|
}
|
|
|
|
//------------------------------------------------------------------------
|
|
// getLookupTree: get a lookup tree
|
|
//
|
|
// Arguments:
|
|
// pResolvedToken - resolved token of the call
|
|
// pLookup - the lookup to get the tree for
|
|
// handleFlags - flags to set on the result node
|
|
// compileTimeHandle - compile-time handle corresponding to the lookup
|
|
//
|
|
// Return Value:
|
|
// A node representing the lookup tree
|
|
//
|
|
GenTree* Compiler::getLookupTree(CORINFO_RESOLVED_TOKEN* pResolvedToken,
|
|
CORINFO_LOOKUP* pLookup,
|
|
GenTreeFlags handleFlags,
|
|
void* compileTimeHandle)
|
|
{
|
|
if (!pLookup->lookupKind.needsRuntimeLookup)
|
|
{
|
|
// No runtime lookup is required.
|
|
// Access is direct or memory-indirect (of a fixed address) reference
|
|
|
|
CORINFO_GENERIC_HANDLE handle = nullptr;
|
|
void* pIndirection = nullptr;
|
|
assert(pLookup->constLookup.accessType != IAT_PPVALUE && pLookup->constLookup.accessType != IAT_RELPVALUE);
|
|
|
|
if (pLookup->constLookup.accessType == IAT_VALUE)
|
|
{
|
|
handle = pLookup->constLookup.handle;
|
|
}
|
|
else if (pLookup->constLookup.accessType == IAT_PVALUE)
|
|
{
|
|
pIndirection = pLookup->constLookup.addr;
|
|
}
|
|
|
|
return gtNewIconEmbHndNode(handle, pIndirection, handleFlags, compileTimeHandle);
|
|
}
|
|
|
|
return getRuntimeLookupTree(pResolvedToken, pLookup, compileTimeHandle);
|
|
}
|
|
|
|
//------------------------------------------------------------------------
|
|
// getRuntimeLookupTree: get a tree for a runtime lookup
|
|
//
|
|
// Arguments:
|
|
// pResolvedToken - resolved token of the call
|
|
// pLookup - the lookup to get the tree for
|
|
// compileTimeHandle - compile-time handle corresponding to the lookup
|
|
//
|
|
// Return Value:
|
|
// A node representing the runtime lookup tree
|
|
//
|
|
GenTree* Compiler::getRuntimeLookupTree(CORINFO_RESOLVED_TOKEN* pResolvedToken,
|
|
CORINFO_LOOKUP* pLookup,
|
|
void* compileTimeHandle)
|
|
{
|
|
assert(!compIsForInlining());
|
|
|
|
CORINFO_RUNTIME_LOOKUP* pRuntimeLookup = &pLookup->runtimeLookup;
|
|
|
|
// If pRuntimeLookup->indirections is equal to CORINFO_USEHELPER, it specifies that a run-time helper should be
|
|
// used; otherwise, it specifies the number of indirections via pRuntimeLookup->offsets array.
|
|
if ((pRuntimeLookup->indirections == CORINFO_USEHELPER) || (pRuntimeLookup->indirections == CORINFO_USENULL) ||
|
|
pRuntimeLookup->testForNull)
|
|
{
|
|
return gtNewRuntimeLookupHelperCallNode(pRuntimeLookup,
|
|
getRuntimeContextTree(pLookup->lookupKind.runtimeLookupKind),
|
|
compileTimeHandle);
|
|
}
|
|
|
|
GenTree* result = getRuntimeContextTree(pLookup->lookupKind.runtimeLookupKind);
|
|
|
|
ArrayStack<GenTree*> stmts(getAllocator(CMK_ArrayStack));
|
|
|
|
auto cloneTree = [&](GenTree** tree DEBUGARG(const char* reason)) -> GenTree* {
|
|
if (!((*tree)->gtFlags & GTF_GLOB_EFFECT))
|
|
{
|
|
GenTree* clone = gtClone(*tree, true);
|
|
|
|
if (clone)
|
|
{
|
|
return clone;
|
|
}
|
|
}
|
|
|
|
unsigned temp = lvaGrabTemp(true DEBUGARG(reason));
|
|
stmts.Push(gtNewTempStore(temp, *tree));
|
|
*tree = gtNewLclvNode(temp, lvaGetActualType(temp));
|
|
return gtNewLclvNode(temp, lvaGetActualType(temp));
|
|
};
|
|
|
|
// Apply repeated indirections
|
|
for (WORD i = 0; i < pRuntimeLookup->indirections; i++)
|
|
{
|
|
GenTree* preInd = nullptr;
|
|
if ((i == 1 && pRuntimeLookup->indirectFirstOffset) || (i == 2 && pRuntimeLookup->indirectSecondOffset))
|
|
{
|
|
preInd = cloneTree(&result DEBUGARG("getRuntimeLookupTree indirectOffset"));
|
|
}
|
|
|
|
if (i != 0)
|
|
{
|
|
result = gtNewIndir(TYP_I_IMPL, result, GTF_IND_NONFAULTING | GTF_IND_INVARIANT);
|
|
}
|
|
|
|
if ((i == 1 && pRuntimeLookup->indirectFirstOffset) || (i == 2 && pRuntimeLookup->indirectSecondOffset))
|
|
{
|
|
result = gtNewOperNode(GT_ADD, TYP_I_IMPL, preInd, result);
|
|
}
|
|
|
|
if (pRuntimeLookup->offsets[i] != 0)
|
|
{
|
|
result = gtNewOperNode(GT_ADD, TYP_I_IMPL, result, gtNewIconNode(pRuntimeLookup->offsets[i], TYP_I_IMPL));
|
|
}
|
|
}
|
|
|
|
assert(!pRuntimeLookup->testForNull);
|
|
if (pRuntimeLookup->indirections > 0)
|
|
{
|
|
result = gtNewIndir(TYP_I_IMPL, result, GTF_IND_NONFAULTING);
|
|
}
|
|
|
|
// Produces GT_COMMA(stmt1, GT_COMMA(stmt2, ... GT_COMMA(stmtN, result)))
|
|
|
|
while (!stmts.Empty())
|
|
{
|
|
result = gtNewOperNode(GT_COMMA, TYP_I_IMPL, stmts.Pop(), result);
|
|
}
|
|
|
|
DISPTREE(result);
|
|
return result;
|
|
}
|
|
|
|
//------------------------------------------------------------------------
|
|
// getVirtMethodPointerTree: get a tree for a virtual method pointer
|
|
//
|
|
// Arguments:
|
|
// thisPtr - tree representing `this` pointer
|
|
// pResolvedToken - pointer to the resolved token of the method
|
|
// pCallInfo - pointer to call info
|
|
//
|
|
// Return Value:
|
|
// A node representing the virtual method pointer
|
|
|
|
GenTree* Compiler::getVirtMethodPointerTree(GenTree* thisPtr,
|
|
CORINFO_RESOLVED_TOKEN* pResolvedToken,
|
|
CORINFO_CALL_INFO* pCallInfo)
|
|
{
|
|
GenTree* exactTypeDesc = getTokenHandleTree(pResolvedToken, true);
|
|
GenTree* exactMethodDesc = getTokenHandleTree(pResolvedToken, false);
|
|
|
|
return gtNewHelperCallNode(CORINFO_HELP_VIRTUAL_FUNC_PTR, TYP_I_IMPL, thisPtr, exactTypeDesc, exactMethodDesc);
|
|
}
|
|
|
|
//------------------------------------------------------------------------
|
|
// getTokenHandleTree: get a handle tree for a token. This method should never
|
|
// be called for tokens imported from inlinees.
|
|
//
|
|
// Arguments:
|
|
// pResolvedToken - token to get a handle for
|
|
// parent - whether parent should be imported
|
|
//
|
|
// Return Value:
|
|
// A node representing the virtual method pointer
|
|
|
|
GenTree* Compiler::getTokenHandleTree(CORINFO_RESOLVED_TOKEN* pResolvedToken, bool parent)
|
|
{
|
|
CORINFO_GENERICHANDLE_RESULT embedInfo;
|
|
|
|
// NOTE: inlining is done at this point, so we don't know which method contained this token.
|
|
// It's fine because currently this is never used for something that belongs to an inlinee.
|
|
// Namely, we currently use it for:
|
|
// 1) Methods with EH are never inlined
|
|
// 2) Methods with explicit tail calls are never inlined
|
|
//
|
|
info.compCompHnd->embedGenericHandle(pResolvedToken, parent, info.compMethodHnd, &embedInfo);
|
|
|
|
GenTree* result = getLookupTree(pResolvedToken, &embedInfo.lookup, gtTokenToIconFlags(pResolvedToken->token),
|
|
embedInfo.compileTimeHandle);
|
|
|
|
// If we have a result and it requires runtime lookup, wrap it in a runtime lookup node.
|
|
if ((result != nullptr) && embedInfo.lookup.lookupKind.needsRuntimeLookup)
|
|
{
|
|
result = gtNewRuntimeLookup(embedInfo.compileTimeHandle, embedInfo.handleType, result);
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
/*****************************************************************************
|
|
*
|
|
* Transform the given GT_CALL tree for tail call via JIT helper.
|
|
*/
|
|
void Compiler::fgMorphTailCallViaJitHelper(GenTreeCall* call)
|
|
{
|
|
JITDUMP("fgMorphTailCallViaJitHelper (before):\n");
|
|
DISPTREE(call);
|
|
|
|
// For the helper-assisted tail calls, we need to push all the arguments
|
|
// into a single list, and then add a few extra at the beginning or end.
|
|
//
|
|
// For x86, the tailcall helper is defined as:
|
|
//
|
|
// JIT_TailCall(<function args>, int numberOfOldStackArgsWords, int numberOfNewStackArgsWords, int flags, void*
|
|
// callTarget)
|
|
//
|
|
// Note that the special arguments are on the stack, whereas the function arguments follow
|
|
// the normal convention: there might be register arguments in ECX and EDX. The stack will
|
|
// look like (highest address at the top):
|
|
// first normal stack argument
|
|
// ...
|
|
// last normal stack argument
|
|
// numberOfOldStackArgs
|
|
// numberOfNewStackArgs
|
|
// flags
|
|
// callTarget
|
|
//
|
|
// Each special arg is 4 bytes.
|
|
//
|
|
// 'flags' is a bitmask where:
|
|
// 1 == restore callee-save registers (EDI,ESI,EBX). The JIT always saves all
|
|
// callee-saved registers for tailcall functions. Note that the helper assumes
|
|
// that the callee-saved registers live immediately below EBP, and must have been
|
|
// pushed in this order: EDI, ESI, EBX.
|
|
// 2 == call target is a virtual stub dispatch.
|
|
//
|
|
// The x86 tail call helper lives in VM\i386\jithelp.asm. See that function for more details
|
|
// on the custom calling convention.
|
|
|
|
// Check for PInvoke call types that we don't handle in codegen yet.
|
|
assert(!call->IsUnmanaged());
|
|
assert(call->IsVirtual() || (call->gtCallType != CT_INDIRECT) || (call->gtCallCookie == nullptr));
|
|
|
|
// Don't support tail calling helper methods
|
|
assert(!call->IsHelperCall());
|
|
|
|
// We come this route only for tail prefixed calls that cannot be dispatched as
|
|
// fast tail calls
|
|
assert(!call->IsImplicitTailCall());
|
|
|
|
// We want to use the following assert, but it can modify the IR in some cases, so we
|
|
// can't do that in an assert.
|
|
// assert(!fgCanFastTailCall(call, nullptr));
|
|
|
|
// First move the 'this' pointer (if any) onto the regular arg list. We do this because
|
|
// we are going to prepend special arguments onto the argument list (for non-x86 platforms),
|
|
// and thus shift where the 'this' pointer will be passed to a later argument slot. In
|
|
// addition, for all platforms, we are going to change the call into a helper call. Our code
|
|
// generation code for handling calls to helpers does not handle 'this' pointers. So, when we
|
|
// do this transformation, we must explicitly create a null 'this' pointer check, if required,
|
|
// since special 'this' pointer handling will no longer kick in.
|
|
//
|
|
// Some call types, such as virtual vtable calls, require creating a call address expression
|
|
// that involves the "this" pointer. Lowering will sometimes create an embedded statement
|
|
// to create a temporary that is assigned to the "this" pointer expression, and then use
|
|
// that temp to create the call address expression. This temp creation embedded statement
|
|
// will occur immediately before the "this" pointer argument, and then will be used for both
|
|
// the "this" pointer argument as well as the call address expression. In the normal ordering,
|
|
// the embedded statement establishing the "this" pointer temp will execute before both uses
|
|
// of the temp. However, for tail calls via a helper, we move the "this" pointer onto the
|
|
// normal call argument list, and insert a placeholder which will hold the call address
|
|
// expression. For non-x86, things are ok, because the order of execution of these is not
|
|
// altered. However, for x86, the call address expression is inserted as the *last* argument
|
|
// in the argument list, *after* the "this" pointer. It will be put on the stack, and be
|
|
// evaluated first. To ensure we don't end up with out-of-order temp definition and use,
|
|
// for those cases where call lowering creates an embedded form temp of "this", we will
|
|
// create a temp here, early, that will later get morphed correctly.
|
|
|
|
CallArg* thisArg = call->gtArgs.GetThisArg();
|
|
if (thisArg != nullptr)
|
|
{
|
|
GenTree* thisPtr = nullptr;
|
|
GenTree* objp = thisArg->GetNode();
|
|
|
|
if ((call->IsDelegateInvoke() || call->IsVirtualVtable()) && !objp->OperIs(GT_LCL_VAR))
|
|
{
|
|
// tmp = "this"
|
|
unsigned lclNum = lvaGrabTemp(true DEBUGARG("tail call thisptr"));
|
|
GenTree* store = gtNewTempStore(lclNum, objp);
|
|
|
|
// COMMA(tmp = "this", tmp)
|
|
var_types vt = objp->TypeGet();
|
|
GenTree* tmp = gtNewLclvNode(lclNum, vt);
|
|
thisPtr = gtNewOperNode(GT_COMMA, vt, store, tmp);
|
|
|
|
objp = thisPtr;
|
|
}
|
|
|
|
if (call->NeedsNullCheck())
|
|
{
|
|
// clone "this" if "this" has no side effects.
|
|
if ((thisPtr == nullptr) && !(objp->gtFlags & GTF_SIDE_EFFECT))
|
|
{
|
|
thisPtr = gtClone(objp, true);
|
|
}
|
|
|
|
var_types vt = objp->TypeGet();
|
|
if (thisPtr == nullptr)
|
|
{
|
|
// create a temp if either "this" has side effects or "this" is too complex to clone.
|
|
|
|
// tmp = "this"
|
|
unsigned lclNum = lvaGrabTemp(true DEBUGARG("tail call thisptr"));
|
|
GenTree* store = gtNewTempStore(lclNum, objp);
|
|
|
|
// COMMA(tmp = "this", deref(tmp))
|
|
GenTree* tmp = gtNewLclvNode(lclNum, vt);
|
|
GenTree* nullcheck = gtNewNullCheck(tmp, compCurBB);
|
|
store = gtNewOperNode(GT_COMMA, TYP_VOID, store, nullcheck);
|
|
|
|
// COMMA(COMMA(tmp = "this", deref(tmp)), tmp)
|
|
thisPtr = gtNewOperNode(GT_COMMA, vt, store, gtNewLclvNode(lclNum, vt));
|
|
}
|
|
else
|
|
{
|
|
// thisPtr = COMMA(deref("this"), "this")
|
|
GenTree* nullcheck = gtNewNullCheck(thisPtr, compCurBB);
|
|
thisPtr = gtNewOperNode(GT_COMMA, vt, nullcheck, gtClone(objp, true));
|
|
}
|
|
|
|
call->gtFlags &= ~GTF_CALL_NULLCHECK;
|
|
}
|
|
else
|
|
{
|
|
thisPtr = objp;
|
|
}
|
|
|
|
// TODO-Cleanup: we leave it as a virtual stub call to
|
|
// use logic in `LowerVirtualStubCall`, clear GTF_CALL_VIRT_KIND_MASK here
|
|
// and change `LowerCall` to recognize it as a direct call.
|
|
|
|
// During rationalization tmp="this" and null check will
|
|
// materialize as embedded stmts in right execution order.
|
|
assert(thisPtr != nullptr);
|
|
call->gtArgs.PushFront(this, NewCallArg::Primitive(thisPtr, thisArg->GetSignatureType()));
|
|
call->gtArgs.Remove(thisArg);
|
|
}
|
|
|
|
unsigned nOldStkArgsWords =
|
|
(compArgSize - (codeGen->intRegState.rsCalleeRegArgCount * REGSIZE_BYTES)) / REGSIZE_BYTES;
|
|
GenTree* arg3Node = gtNewIconNode((ssize_t)nOldStkArgsWords, TYP_I_IMPL);
|
|
CallArg* arg3 =
|
|
call->gtArgs.PushBack(this, NewCallArg::Primitive(arg3Node).WellKnown(WellKnownArg::X86TailCallSpecialArg));
|
|
// Inject a placeholder for the count of outgoing stack arguments that the Lowering phase will generate.
|
|
// The constant will be replaced.
|
|
GenTree* arg2Node = gtNewIconNode(9, TYP_I_IMPL);
|
|
CallArg* arg2 =
|
|
call->gtArgs.InsertAfter(this, arg3,
|
|
NewCallArg::Primitive(arg2Node).WellKnown(WellKnownArg::X86TailCallSpecialArg));
|
|
// Inject a placeholder for the flags.
|
|
// The constant will be replaced.
|
|
GenTree* arg1Node = gtNewIconNode(8, TYP_I_IMPL);
|
|
CallArg* arg1 =
|
|
call->gtArgs.InsertAfter(this, arg2,
|
|
NewCallArg::Primitive(arg1Node).WellKnown(WellKnownArg::X86TailCallSpecialArg));
|
|
// Inject a placeholder for the real call target that the Lowering phase will generate.
|
|
// The constant will be replaced.
|
|
GenTree* arg0Node = gtNewIconNode(7, TYP_I_IMPL);
|
|
CallArg* arg0 =
|
|
call->gtArgs.InsertAfter(this, arg1,
|
|
NewCallArg::Primitive(arg0Node).WellKnown(WellKnownArg::X86TailCallSpecialArg));
|
|
|
|
// It is now a varargs tail call.
|
|
call->gtArgs.SetIsVarArgs();
|
|
call->gtFlags &= ~GTF_CALL_POP_ARGS;
|
|
|
|
// The function is responsible for doing explicit null check when it is necessary.
|
|
assert(!call->NeedsNullCheck());
|
|
|
|
JITDUMP("fgMorphTailCallViaJitHelper (after):\n");
|
|
DISPTREE(call);
|
|
}
|
|
|
|
//------------------------------------------------------------------------
|
|
// fgGetStubAddrArg: Return the virtual stub address for the given call.
|
|
//
|
|
// Notes:
|
|
// the JIT must place the address of the stub used to load the call target,
|
|
// the "stub indirection cell", in special call argument with special register.
|
|
//
|
|
// Arguments:
|
|
// call - a call that needs virtual stub dispatching.
|
|
//
|
|
// Return Value:
|
|
// addr tree
|
|
//
|
|
GenTree* Compiler::fgGetStubAddrArg(GenTreeCall* call)
|
|
{
|
|
assert(call->IsVirtualStub());
|
|
GenTree* stubAddrArg;
|
|
if (call->gtCallType == CT_INDIRECT)
|
|
{
|
|
stubAddrArg = gtClone(call->gtCallAddr, true);
|
|
}
|
|
else
|
|
{
|
|
assert(call->gtCallMoreFlags & GTF_CALL_M_VIRTSTUB_REL_INDIRECT);
|
|
ssize_t addr = ssize_t(call->gtStubCallStubAddr);
|
|
stubAddrArg = gtNewIconHandleNode(addr, GTF_ICON_FTN_ADDR);
|
|
INDEBUG(stubAddrArg->AsIntCon()->gtTargetHandle = (size_t)call->gtCallMethHnd);
|
|
}
|
|
assert(stubAddrArg != nullptr);
|
|
return stubAddrArg;
|
|
}
|
|
|
|
//------------------------------------------------------------------------------
|
|
// fgGetArgTabEntryParameterLclNum : Get the lcl num for the parameter that
|
|
// corresponds to the argument to a recursive call.
|
|
//
|
|
// Notes:
|
|
// Due to non-standard args this is not just the index of the argument in
|
|
// the arg list. For example, in R2R compilations we will have added a
|
|
// non-standard arg for the R2R indirection cell.
|
|
//
|
|
// Arguments:
|
|
// arg - the arg
|
|
//
|
|
unsigned Compiler::fgGetArgParameterLclNum(GenTreeCall* call, CallArg* arg)
|
|
{
|
|
unsigned num = 0;
|
|
|
|
for (CallArg& otherArg : call->gtArgs.Args())
|
|
{
|
|
if (&otherArg == arg)
|
|
{
|
|
break;
|
|
}
|
|
|
|
// Late added args add extra args that do not map to IL parameters and that we should not reassign.
|
|
if (!otherArg.IsArgAddedLate())
|
|
{
|
|
num++;
|
|
}
|
|
}
|
|
|
|
return num;
|
|
}
|
|
|
|
//------------------------------------------------------------------------------
|
|
// fgMorphRecursiveFastTailCallIntoLoop : Transform a recursive fast tail call into a loop.
|
|
//
|
|
//
|
|
// Arguments:
|
|
// block - basic block ending with a recursive fast tail call
|
|
// recursiveTailCall - recursive tail call to transform
|
|
//
|
|
// Notes:
|
|
// The legality of the transformation is ensured by the checks in endsWithTailCallConvertibleToLoop.
|
|
|
|
void Compiler::fgMorphRecursiveFastTailCallIntoLoop(BasicBlock* block, GenTreeCall* recursiveTailCall)
|
|
{
|
|
assert(recursiveTailCall->IsTailCallConvertibleToLoop());
|
|
Statement* lastStmt = block->lastStmt();
|
|
assert(recursiveTailCall == lastStmt->GetRootNode());
|
|
|
|
// Transform recursive tail call into a loop.
|
|
|
|
Statement* earlyArgInsertionPoint = lastStmt;
|
|
const DebugInfo& callDI = lastStmt->GetDebugInfo();
|
|
|
|
// All arguments whose trees may involve caller parameter local variables need to be assigned to temps first;
|
|
// then the temps need to be assigned to the method parameters. This is done so that the caller
|
|
// parameters are not re-assigned before call arguments depending on them are evaluated.
|
|
// tmpAssignmentInsertionPoint and paramAssignmentInsertionPoint keep track of
|
|
// where the next temp or parameter assignment should be inserted.
|
|
|
|
// In the example below the first call argument (arg1 - 1) needs to be assigned to a temp first
|
|
// while the second call argument (const 1) doesn't.
|
|
// Basic block before tail recursion elimination:
|
|
// ***** BB04, stmt 1 (top level)
|
|
// [000037] ------------ * stmtExpr void (top level) (IL 0x00A...0x013)
|
|
// [000033] --C - G------ - \--* call void RecursiveMethod
|
|
// [000030] ------------ | / --* const int - 1
|
|
// [000031] ------------arg0 in rcx + --* +int
|
|
// [000029] ------------ | \--* lclVar int V00 arg1
|
|
// [000032] ------------arg1 in rdx \--* const int 1
|
|
//
|
|
//
|
|
// Basic block after tail recursion elimination :
|
|
// ***** BB04, stmt 1 (top level)
|
|
// [000051] ------------ * stmtExpr void (top level) (IL 0x00A... ? ? ? )
|
|
// [000030] ------------ | / --* const int - 1
|
|
// [000031] ------------ | / --* +int
|
|
// [000029] ------------ | | \--* lclVar int V00 arg1
|
|
// [000050] - A---------- \--* = int
|
|
// [000049] D------N---- \--* lclVar int V02 tmp0
|
|
//
|
|
// ***** BB04, stmt 2 (top level)
|
|
// [000055] ------------ * stmtExpr void (top level) (IL 0x00A... ? ? ? )
|
|
// [000052] ------------ | / --* lclVar int V02 tmp0
|
|
// [000054] - A---------- \--* = int
|
|
// [000053] D------N---- \--* lclVar int V00 arg0
|
|
|
|
// ***** BB04, stmt 3 (top level)
|
|
// [000058] ------------ * stmtExpr void (top level) (IL 0x00A... ? ? ? )
|
|
// [000032] ------------ | / --* const int 1
|
|
// [000057] - A---------- \--* = int
|
|
// [000056] D------N---- \--* lclVar int V01 arg1
|
|
|
|
Statement* tmpAssignmentInsertionPoint = lastStmt;
|
|
Statement* paramAssignmentInsertionPoint = lastStmt;
|
|
|
|
// Process early args. They may contain both setup statements for late args and actual args.
|
|
for (CallArg& arg : recursiveTailCall->gtArgs.EarlyArgs())
|
|
{
|
|
GenTree* earlyArg = arg.GetEarlyNode();
|
|
if (arg.GetLateNode() != nullptr)
|
|
{
|
|
// This is a setup node so we need to hoist it.
|
|
Statement* earlyArgStmt = gtNewStmt(earlyArg, callDI);
|
|
fgInsertStmtBefore(block, earlyArgInsertionPoint, earlyArgStmt);
|
|
}
|
|
else
|
|
{
|
|
// This is an actual argument that needs to be assigned to the corresponding caller parameter.
|
|
// Late-added non-standard args are extra args that are not passed as locals, so skip those
|
|
if (!arg.IsArgAddedLate())
|
|
{
|
|
Statement* paramAssignStmt =
|
|
fgAssignRecursiveCallArgToCallerParam(earlyArg, &arg,
|
|
fgGetArgParameterLclNum(recursiveTailCall, &arg), block,
|
|
callDI, tmpAssignmentInsertionPoint,
|
|
paramAssignmentInsertionPoint);
|
|
if ((tmpAssignmentInsertionPoint == lastStmt) && (paramAssignStmt != nullptr))
|
|
{
|
|
// All temp assignments will happen before the first param assignment.
|
|
tmpAssignmentInsertionPoint = paramAssignStmt;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Process late args.
|
|
for (CallArg& arg : recursiveTailCall->gtArgs.LateArgs())
|
|
{
|
|
// A late argument is an actual argument that needs to be assigned to the corresponding caller's parameter.
|
|
GenTree* lateArg = arg.GetLateNode();
|
|
// Late-added non-standard args are extra args that are not passed as locals, so skip those
|
|
if (!arg.IsArgAddedLate())
|
|
{
|
|
Statement* paramAssignStmt =
|
|
fgAssignRecursiveCallArgToCallerParam(lateArg, &arg, fgGetArgParameterLclNum(recursiveTailCall, &arg),
|
|
block, callDI, tmpAssignmentInsertionPoint,
|
|
paramAssignmentInsertionPoint);
|
|
|
|
if ((tmpAssignmentInsertionPoint == lastStmt) && (paramAssignStmt != nullptr))
|
|
{
|
|
// All temp assignments will happen before the first param assignment.
|
|
tmpAssignmentInsertionPoint = paramAssignStmt;
|
|
}
|
|
}
|
|
}
|
|
|
|
// If the method has starg.s 0 or ldarga.s 0 a special local (lvaArg0Var) is created so that
|
|
// compThisArg stays immutable. Normally it's assigned in fgFirstBBScratch block. Since that
|
|
// block won't be in the loop (it's assumed to have no predecessors), we need to update the special local here.
|
|
if (!info.compIsStatic && (lvaArg0Var != info.compThisArg))
|
|
{
|
|
GenTree* arg0Store = gtNewStoreLclVarNode(lvaArg0Var, gtNewLclVarNode(info.compThisArg));
|
|
Statement* arg0StoreStmt = gtNewStmt(arg0Store, callDI);
|
|
fgInsertStmtBefore(block, paramAssignmentInsertionPoint, arg0StoreStmt);
|
|
}
|
|
|
|
// If compInitMem is set, we may need to zero-initialize some locals. Normally it's done in the prolog
|
|
// but this loop can't include the prolog. Since we don't have liveness information, we insert zero-initialization
|
|
// for all non-parameter IL locals as well as temp structs with GC fields.
|
|
// Liveness phase will remove unnecessary initializations.
|
|
if (info.compInitMem || compSuppressedZeroInit)
|
|
{
|
|
for (unsigned varNum = 0; varNum < lvaCount; varNum++)
|
|
{
|
|
#if FEATURE_FIXED_OUT_ARGS
|
|
if (varNum == lvaOutgoingArgSpaceVar)
|
|
{
|
|
continue;
|
|
}
|
|
#endif // FEATURE_FIXED_OUT_ARGS
|
|
|
|
LclVarDsc* varDsc = lvaGetDesc(varNum);
|
|
|
|
if (varDsc->lvIsParam)
|
|
{
|
|
continue;
|
|
}
|
|
|
|
#if FEATURE_IMPLICIT_BYREFS
|
|
if (varDsc->lvPromoted)
|
|
{
|
|
LclVarDsc* firstField = lvaGetDesc(varDsc->lvFieldLclStart);
|
|
if (firstField->lvParentLcl != varNum)
|
|
{
|
|
// Local copy for implicit byref promotion that was undone. Do
|
|
// not introduce new references to it, all uses have been
|
|
// morphed to access the parameter.
|
|
|
|
#ifdef DEBUG
|
|
LclVarDsc* param = lvaGetDesc(firstField->lvParentLcl);
|
|
assert(param->lvIsImplicitByRef && !param->lvPromoted);
|
|
assert(param->lvFieldLclStart == varNum);
|
|
#endif
|
|
continue;
|
|
}
|
|
}
|
|
#endif
|
|
|
|
var_types lclType = varDsc->TypeGet();
|
|
bool isUserLocal = (varNum < info.compLocalsCount);
|
|
bool structWithGCFields = ((lclType == TYP_STRUCT) && varDsc->GetLayout()->HasGCPtr());
|
|
bool hadSuppressedInit = varDsc->lvSuppressedZeroInit;
|
|
if ((info.compInitMem && (isUserLocal || structWithGCFields)) || hadSuppressedInit)
|
|
{
|
|
GenTree* zero = (lclType == TYP_STRUCT) ? gtNewIconNode(0) : gtNewZeroConNode(lclType);
|
|
GenTree* init = gtNewStoreLclVarNode(varNum, zero);
|
|
init->gtType = lclType; // TODO-ASG: delete this zero-diff quirk.
|
|
if (lclType == TYP_STRUCT)
|
|
{
|
|
init = fgMorphInitBlock(init);
|
|
}
|
|
|
|
Statement* initStmt = gtNewStmt(init, callDI);
|
|
fgInsertStmtBefore(block, lastStmt, initStmt);
|
|
}
|
|
}
|
|
}
|
|
|
|
// Remove the call
|
|
fgRemoveStmt(block, lastStmt);
|
|
|
|
// Set the loop edge.
|
|
if (opts.IsOSR())
|
|
{
|
|
// Todo: this may not look like a viable loop header.
|
|
// Might need the moral equivalent of a scratch BB.
|
|
FlowEdge* const newEdge = fgAddRefPred(fgEntryBB, block);
|
|
block->SetKindAndTargetEdge(BBJ_ALWAYS, newEdge);
|
|
}
|
|
else
|
|
{
|
|
// We should have ensured the first BB was scratch
|
|
// in morph init...
|
|
//
|
|
assert(doesMethodHaveRecursiveTailcall());
|
|
assert(fgFirstBBisScratch());
|
|
|
|
// Loop detection needs to see a pred out of the loop,
|
|
// so mark the scratch block BBF_DONT_REMOVE to prevent empty
|
|
// block removal on it.
|
|
//
|
|
fgFirstBB->SetFlags(BBF_DONT_REMOVE);
|
|
FlowEdge* const newEdge = fgAddRefPred(fgFirstBB->Next(), block);
|
|
block->SetKindAndTargetEdge(BBJ_ALWAYS, newEdge);
|
|
}
|
|
|
|
// Finish hooking things up.
|
|
block->RemoveFlags(BBF_HAS_JMP);
|
|
}
|
|
|
|
//------------------------------------------------------------------------------
|
|
// fgAssignRecursiveCallArgToCallerParam : Assign argument to a recursive call to the corresponding caller parameter.
|
|
//
|
|
// Arguments:
|
|
// arg - argument to assign
|
|
// callArg - the corresponding call argument
|
|
// lclParamNum - the lcl num of the parameter
|
|
// block - basic block the call is in
|
|
// callILOffset - IL offset of the call
|
|
// tmpAssignmentInsertionPoint - tree before which temp assignment should be inserted (if necessary)
|
|
// paramAssignmentInsertionPoint - tree before which parameter assignment should be inserted
|
|
//
|
|
// Return Value:
|
|
// parameter assignment statement if one was inserted; nullptr otherwise.
|
|
//
|
|
Statement* Compiler::fgAssignRecursiveCallArgToCallerParam(GenTree* arg,
|
|
CallArg* callArg,
|
|
unsigned lclParamNum,
|
|
BasicBlock* block,
|
|
const DebugInfo& callDI,
|
|
Statement* tmpAssignmentInsertionPoint,
|
|
Statement* paramAssignmentInsertionPoint)
|
|
{
|
|
// Call arguments should be assigned to temps first and then the temps should be assigned to parameters because
|
|
// some argument trees may reference parameters directly.
|
|
|
|
GenTree* argInTemp = nullptr;
|
|
bool needToAssignParameter = true;
|
|
|
|
// TODO-CQ: enable calls with struct arguments passed in registers.
|
|
noway_assert(!varTypeIsStruct(arg->TypeGet()));
|
|
|
|
if (arg->IsCnsIntOrI() || arg->IsCnsFltOrDbl())
|
|
{
|
|
// The argument is already assigned to a temp or is a const.
|
|
argInTemp = arg;
|
|
}
|
|
else if (arg->OperIs(GT_LCL_VAR))
|
|
{
|
|
unsigned lclNum = arg->AsLclVar()->GetLclNum();
|
|
LclVarDsc* varDsc = lvaGetDesc(lclNum);
|
|
if (!varDsc->lvIsParam)
|
|
{
|
|
// The argument is a non-parameter local so it doesn't need to be assigned to a temp.
|
|
argInTemp = arg;
|
|
}
|
|
else if (lclNum == lclParamNum)
|
|
{
|
|
// The argument is the same parameter local that we were about to assign so
|
|
// we can skip the assignment.
|
|
needToAssignParameter = false;
|
|
}
|
|
}
|
|
|
|
// TODO: We don't need temp assignments if we can prove that the argument tree doesn't involve
|
|
// any caller parameters. Some common cases are handled above but we may be able to eliminate
|
|
// more temp assignments.
|
|
|
|
Statement* paramAssignStmt = nullptr;
|
|
if (needToAssignParameter)
|
|
{
|
|
if (argInTemp == nullptr)
|
|
{
|
|
// The argument is not assigned to a temp. We need to create a new temp and insert a store.
|
|
unsigned tmpNum = lvaGrabTemp(true DEBUGARG("arg temp"));
|
|
lvaTable[tmpNum].lvType = arg->gtType;
|
|
GenTree* tempSrc = arg;
|
|
GenTree* tmpStoreNode = gtNewStoreLclVarNode(tmpNum, tempSrc);
|
|
Statement* tmpStoreStmt = gtNewStmt(tmpStoreNode, callDI);
|
|
fgInsertStmtBefore(block, tmpAssignmentInsertionPoint, tmpStoreStmt);
|
|
argInTemp = gtNewLclvNode(tmpNum, tempSrc->gtType);
|
|
}
|
|
|
|
// Now assign the temp to the parameter.
|
|
assert(lvaGetDesc(lclParamNum)->lvIsParam);
|
|
GenTree* paramStoreNode = gtNewStoreLclVarNode(lclParamNum, argInTemp);
|
|
paramAssignStmt = gtNewStmt(paramStoreNode, callDI);
|
|
|
|
fgInsertStmtBefore(block, paramAssignmentInsertionPoint, paramAssignStmt);
|
|
}
|
|
return paramAssignStmt;
|
|
}
|
|
|
|
/*****************************************************************************
|
|
*
|
|
* Transform the given GT_CALL tree for code generation.
|
|
*/
|
|
|
|
GenTree* Compiler::fgMorphCall(GenTreeCall* call)
|
|
{
|
|
if (call->CanTailCall())
|
|
{
|
|
GenTree* newNode = fgMorphPotentialTailCall(call);
|
|
if (newNode != nullptr)
|
|
{
|
|
return newNode;
|
|
}
|
|
|
|
assert(!call->CanTailCall());
|
|
|
|
#if FEATURE_MULTIREG_RET
|
|
if (fgGlobalMorph && call->HasMultiRegRetVal() && varTypeIsStruct(call->TypeGet()))
|
|
{
|
|
// The tail call has been rejected so we must finish the work deferred
|
|
// by impFixupCallStructReturn for multi-reg-returning calls and transform
|
|
// ret call
|
|
// into
|
|
// temp = call
|
|
// ret temp
|
|
|
|
// Force re-evaluating the argInfo as the return argument has changed.
|
|
call->gtArgs.ResetFinalArgsAndABIInfo();
|
|
|
|
// Create a new temp.
|
|
unsigned tmpNum =
|
|
lvaGrabTemp(false DEBUGARG("Return value temp for multi-reg return (rejected tail call)."));
|
|
lvaTable[tmpNum].lvIsMultiRegRet = true;
|
|
|
|
CORINFO_CLASS_HANDLE structHandle = call->gtRetClsHnd;
|
|
assert(structHandle != NO_CLASS_HANDLE);
|
|
const bool unsafeValueClsCheck = false;
|
|
lvaSetStruct(tmpNum, structHandle, unsafeValueClsCheck);
|
|
GenTree* store = gtNewStoreLclVarNode(tmpNum, call);
|
|
store = fgMorphTree(store);
|
|
|
|
// Create the store statement and insert it before the current statement.
|
|
Statement* storeStmt = gtNewStmt(store, compCurStmt->GetDebugInfo());
|
|
fgInsertStmtBefore(compCurBB, compCurStmt, storeStmt);
|
|
|
|
// Return the temp.
|
|
GenTree* result = gtNewLclvNode(tmpNum, lvaTable[tmpNum].lvType);
|
|
result->gtFlags |= GTF_DONT_CSE;
|
|
|
|
compCurBB->SetFlags(BBF_HAS_CALL); // This block has a call
|
|
|
|
JITDUMP("\nInserting store of a multi-reg call result to a temp:\n");
|
|
DISPSTMT(storeStmt);
|
|
INDEBUG(result->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED);
|
|
|
|
return result;
|
|
}
|
|
#endif
|
|
}
|
|
|
|
if ((call->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC) != 0)
|
|
{
|
|
if (lookupNamedIntrinsic(call->AsCall()->gtCallMethHnd) ==
|
|
NI_System_Text_UTF8Encoding_UTF8EncodingSealed_ReadUtf8)
|
|
{
|
|
// Expanded in fgVNBasedIntrinsicExpansion
|
|
setMethodHasSpecialIntrinsics();
|
|
}
|
|
}
|
|
|
|
if (((call->gtCallMoreFlags & (GTF_CALL_M_SPECIAL_INTRINSIC | GTF_CALL_M_LDVIRTFTN_INTERFACE)) == 0) &&
|
|
(call->gtCallMethHnd == eeFindHelper(CORINFO_HELP_VIRTUAL_FUNC_PTR)
|
|
#ifdef FEATURE_READYTORUN
|
|
|| call->gtCallMethHnd == eeFindHelper(CORINFO_HELP_READYTORUN_VIRTUAL_FUNC_PTR)
|
|
#endif
|
|
) &&
|
|
(call == fgMorphStmt->GetRootNode()))
|
|
{
|
|
// This is call to CORINFO_HELP_VIRTUAL_FUNC_PTR with ignored result.
|
|
// Transform it into a null check.
|
|
|
|
assert(call->gtArgs.CountArgs() >= 1);
|
|
GenTree* objPtr = call->gtArgs.GetArgByIndex(0)->GetNode();
|
|
|
|
GenTree* nullCheck = gtNewNullCheck(objPtr, compCurBB);
|
|
|
|
return fgMorphTree(nullCheck);
|
|
}
|
|
|
|
noway_assert(call->gtOper == GT_CALL);
|
|
|
|
//
|
|
// Only count calls once (only in the global morph phase)
|
|
//
|
|
if (fgGlobalMorph)
|
|
{
|
|
if (call->gtCallType == CT_INDIRECT)
|
|
{
|
|
optCallCount++;
|
|
optIndirectCallCount++;
|
|
}
|
|
else if (call->gtCallType == CT_USER_FUNC)
|
|
{
|
|
optCallCount++;
|
|
if (call->IsVirtual())
|
|
{
|
|
optIndirectCallCount++;
|
|
}
|
|
}
|
|
}
|
|
|
|
// Couldn't inline - remember that this BB contains method calls
|
|
|
|
// Mark the block as a GC safe point for the call if possible.
|
|
// In the event the call indicates the block isn't a GC safe point
|
|
// and the call is unmanaged with a GC transition suppression request
|
|
// then insert a GC poll.
|
|
|
|
if (IsGcSafePoint(call))
|
|
{
|
|
compCurBB->SetFlags(BBF_GC_SAFE_POINT);
|
|
}
|
|
|
|
// Regardless of the state of the basic block with respect to GC safe point,
|
|
// we will always insert a GC Poll for scenarios involving a suppressed GC
|
|
// transition. Only mark the block for GC Poll insertion on the first morph.
|
|
if (fgGlobalMorph && call->IsUnmanaged() && call->IsSuppressGCTransition())
|
|
{
|
|
compCurBB->SetFlags(BBF_HAS_SUPPRESSGC_CALL | BBF_GC_SAFE_POINT);
|
|
optMethodFlags |= OMF_NEEDS_GCPOLLS;
|
|
}
|
|
|
|
if (fgGlobalMorph)
|
|
{
|
|
if (IsStaticHelperEligibleForExpansion(call))
|
|
{
|
|
// Current method has potential candidates for fgExpandStaticInit phase
|
|
setMethodHasStaticInit();
|
|
}
|
|
else if ((call->gtCallMoreFlags & GTF_CALL_M_CAST_CAN_BE_EXPANDED) != 0)
|
|
{
|
|
// Current method has potential candidates for fgLateCastExpansion phase
|
|
setMethodHasExpandableCasts();
|
|
}
|
|
}
|
|
|
|
// Morph Type.op_Equality, Type.op_Inequality, and Enum.HasFlag
|
|
//
|
|
// We need to do these before the arguments are morphed
|
|
if (!call->gtArgs.AreArgsComplete() && (call->gtCallMoreFlags & GTF_CALL_M_SPECIAL_INTRINSIC))
|
|
{
|
|
// See if this is foldable
|
|
GenTree* optTree = gtFoldExprCall(call);
|
|
|
|
// If we optimized, morph the result
|
|
if (optTree != call)
|
|
{
|
|
return fgMorphTree(optTree);
|
|
}
|
|
}
|
|
|
|
compCurBB->SetFlags(BBF_HAS_CALL); // This block has a call
|
|
|
|
// From this point on disallow shared temps to be reused until we are done
|
|
// processing the call.
|
|
SharedTempsScope sharedTemps(this);
|
|
|
|
// Process the "normal" argument list
|
|
call = fgMorphArgs(call);
|
|
noway_assert(call->gtOper == GT_CALL);
|
|
|
|
// Try to replace CORINFO_HELP_TYPEHANDLE_TO_RUNTIMETYPE with a constant gc handle
|
|
// pointing to a frozen segment
|
|
if (!gtIsActiveCSE_Candidate(call) && gtIsTypeHandleToRuntimeTypeHelper(call))
|
|
{
|
|
GenTree* argNode = call->AsCall()->gtArgs.GetArgByIndex(0)->GetNode();
|
|
CORINFO_CLASS_HANDLE hClass = gtGetHelperArgClassHandle(argNode);
|
|
if ((hClass != NO_CLASS_HANDLE) && !gtIsActiveCSE_Candidate(argNode))
|
|
{
|
|
CORINFO_OBJECT_HANDLE ptr = info.compCompHnd->getRuntimeTypePointer(hClass);
|
|
if (ptr != NULL)
|
|
{
|
|
setMethodHasFrozenObjects();
|
|
GenTree* retNode = gtNewIconEmbHndNode((void*)ptr, nullptr, GTF_ICON_OBJ_HDL, nullptr);
|
|
return fgMorphTree(retNode);
|
|
}
|
|
}
|
|
}
|
|
|
|
// Assign DEF flags if it produces a definition from "return buffer".
|
|
fgAssignSetVarDef(call);
|
|
if (call->OperRequiresAsgFlag())
|
|
{
|
|
call->gtFlags |= GTF_ASG;
|
|
}
|
|
|
|
// Should we expand this virtual method call target early here?
|
|
//
|
|
if (call->IsExpandedEarly() && call->IsVirtualVtable())
|
|
{
|
|
// We expand the Vtable Call target either in the global morph phase or
|
|
// in guarded devirt if we need it for the guard.
|
|
if (fgGlobalMorph && (call->gtControlExpr == nullptr))
|
|
{
|
|
call->gtControlExpr = fgExpandVirtualVtableCallTarget(call);
|
|
}
|
|
// We always have to morph or re-morph the control expr
|
|
//
|
|
call->gtControlExpr = fgMorphTree(call->gtControlExpr);
|
|
|
|
// Propagate any side effect flags into the call
|
|
call->gtFlags |= call->gtControlExpr->gtFlags & GTF_ALL_EFFECT;
|
|
}
|
|
|
|
// Morph stelem.ref helper call to store a null value, into a store into an array without the helper.
|
|
// This needs to be done after the arguments are morphed to ensure constant propagation has already taken place.
|
|
if (opts.OptimizationEnabled() && call->IsHelperCall() &&
|
|
(call->gtCallMethHnd == eeFindHelper(CORINFO_HELP_ARRADDR_ST)))
|
|
{
|
|
assert(call->gtArgs.CountArgs() == 3);
|
|
GenTree* value = call->gtArgs.GetArgByIndex(2)->GetNode();
|
|
if (value->IsIntegralConst(0))
|
|
{
|
|
assert(value->OperGet() == GT_CNS_INT);
|
|
|
|
GenTree* arr = call->gtArgs.GetArgByIndex(0)->GetNode();
|
|
GenTree* index = call->gtArgs.GetArgByIndex(1)->GetNode();
|
|
|
|
// Either or both of the array and index arguments may have been spilled to temps by `fgMorphArgs`. Copy
|
|
// the spill trees as well if necessary.
|
|
GenTree* argSetup = nullptr;
|
|
for (CallArg& arg : call->gtArgs.EarlyArgs())
|
|
{
|
|
if (arg.GetLateNode() == nullptr)
|
|
{
|
|
continue;
|
|
}
|
|
|
|
GenTree* const setupArgNode = arg.GetEarlyNode();
|
|
assert((setupArgNode != arr) && (setupArgNode != index));
|
|
|
|
if (argSetup == nullptr)
|
|
{
|
|
argSetup = setupArgNode;
|
|
}
|
|
else
|
|
{
|
|
argSetup = new (this, GT_COMMA) GenTreeOp(GT_COMMA, TYP_VOID, argSetup, setupArgNode);
|
|
#if DEBUG
|
|
argSetup->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
|
|
#endif // DEBUG
|
|
}
|
|
}
|
|
|
|
#ifdef DEBUG
|
|
auto resetMorphedFlag = [](GenTree** slot, fgWalkData* data) -> fgWalkResult {
|
|
(*slot)->gtDebugFlags &= ~GTF_DEBUG_NODE_MORPHED;
|
|
return WALK_CONTINUE;
|
|
};
|
|
|
|
fgWalkTreePost(&arr, resetMorphedFlag);
|
|
fgWalkTreePost(&index, resetMorphedFlag);
|
|
fgWalkTreePost(&value, resetMorphedFlag);
|
|
#endif // DEBUG
|
|
|
|
GenTree* indexAddr = gtNewArrayIndexAddr(arr, index, TYP_REF, NO_CLASS_HANDLE);
|
|
GenTree* store = gtNewStoreIndNode(TYP_REF, indexAddr, value);
|
|
GenTree* result = fgMorphTree(store);
|
|
if (argSetup != nullptr)
|
|
{
|
|
result = new (this, GT_COMMA) GenTreeOp(GT_COMMA, TYP_VOID, argSetup, result);
|
|
#if DEBUG
|
|
result->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
|
|
#endif // DEBUG
|
|
}
|
|
|
|
return result;
|
|
}
|
|
}
|
|
|
|
if (call->IsNoReturn())
|
|
{
|
|
//
|
|
// If we know that the call does not return then we can set fgRemoveRestOfBlock
|
|
// to remove all subsequent statements and change the call's basic block to BBJ_THROW.
|
|
// As a result the compiler won't need to preserve live registers across the call.
|
|
//
|
|
// This isn't need for tail calls as there shouldn't be any code after the call anyway.
|
|
// Besides, the tail call code is part of the epilog and converting the block to
|
|
// BBJ_THROW would result in the tail call being dropped as the epilog is generated
|
|
// only for BBJ_RETURN blocks.
|
|
//
|
|
|
|
if (!call->IsTailCall())
|
|
{
|
|
fgRemoveRestOfBlock = true;
|
|
}
|
|
}
|
|
|
|
return call;
|
|
}
|
|
|
|
/*****************************************************************************
|
|
*
|
|
* Expand and return the call target address for a VirtualCall
|
|
* The code here should match that generated by LowerVirtualVtableCall
|
|
*/
|
|
|
|
GenTree* Compiler::fgExpandVirtualVtableCallTarget(GenTreeCall* call)
|
|
{
|
|
GenTree* result;
|
|
|
|
JITDUMP("Expanding virtual call target for %d.%s:\n", call->gtTreeID, GenTree::OpName(call->gtOper));
|
|
|
|
noway_assert(call->gtCallType == CT_USER_FUNC);
|
|
|
|
assert(call->gtArgs.HasThisPointer());
|
|
// get a reference to the thisPtr being passed
|
|
GenTree* thisPtr = call->gtArgs.GetThisArg()->GetNode();
|
|
|
|
// fgMorphArgs must enforce this invariant by creating a temp
|
|
//
|
|
assert(thisPtr->OperIsLocal());
|
|
|
|
// Make a copy of the thisPtr by cloning
|
|
//
|
|
thisPtr = gtClone(thisPtr, true);
|
|
|
|
noway_assert(thisPtr != nullptr);
|
|
|
|
// Get hold of the vtable offset
|
|
unsigned vtabOffsOfIndirection;
|
|
unsigned vtabOffsAfterIndirection;
|
|
bool isRelative;
|
|
info.compCompHnd->getMethodVTableOffset(call->gtCallMethHnd, &vtabOffsOfIndirection, &vtabOffsAfterIndirection,
|
|
&isRelative);
|
|
|
|
// Dereference the this pointer to obtain the method table, it is called vtab below
|
|
assert(VPTR_OFFS == 0); // We have to add this value to the thisPtr to get the methodTable
|
|
GenTree* vtab = gtNewIndir(TYP_I_IMPL, thisPtr, GTF_IND_INVARIANT);
|
|
|
|
if (fgGlobalMorph)
|
|
{
|
|
vtab->gtFlags &= ~GTF_EXCEPT; // TODO-Cleanup: delete this zero-diff quirk.
|
|
}
|
|
|
|
// Get the appropriate vtable chunk
|
|
if (vtabOffsOfIndirection != CORINFO_VIRTUALCALL_NO_CHUNK)
|
|
{
|
|
// Note this isRelative code path is currently never executed
|
|
// as the VM doesn't ever return: isRelative == true
|
|
//
|
|
if (isRelative)
|
|
{
|
|
// MethodTable offset is a relative pointer.
|
|
//
|
|
// Additional temporary variable is used to store virtual table pointer.
|
|
// Address of method is obtained by the next computations:
|
|
//
|
|
// Save relative offset to tmp (vtab is virtual table pointer, vtabOffsOfIndirection is offset of
|
|
// vtable-1st-level-indirection):
|
|
// tmp = vtab
|
|
//
|
|
// Save address of method to result (vtabOffsAfterIndirection is offset of vtable-2nd-level-indirection):
|
|
// result = [tmp + vtabOffsOfIndirection + vtabOffsAfterIndirection + [tmp + vtabOffsOfIndirection]]
|
|
//
|
|
//
|
|
// When isRelative is true we need to setup two temporary variables
|
|
// var1 = vtab
|
|
// var2 = var1 + vtabOffsOfIndirection + vtabOffsAfterIndirection + [var1 + vtabOffsOfIndirection]
|
|
// result = [var2] + var2
|
|
//
|
|
unsigned varNum1 = lvaGrabTemp(true DEBUGARG("var1 - vtab"));
|
|
unsigned varNum2 = lvaGrabTemp(true DEBUGARG("var2 - relative"));
|
|
GenTree* storeVar1 = gtNewTempStore(varNum1, vtab); // var1 = vtab
|
|
|
|
// [tmp + vtabOffsOfIndirection]
|
|
GenTree* tmpTree1 = gtNewOperNode(GT_ADD, TYP_I_IMPL, gtNewLclvNode(varNum1, TYP_I_IMPL),
|
|
gtNewIconNode(vtabOffsOfIndirection, TYP_I_IMPL));
|
|
tmpTree1 = gtNewIndir(TYP_I_IMPL, tmpTree1, GTF_IND_NONFAULTING | GTF_IND_INVARIANT);
|
|
|
|
// var1 + vtabOffsOfIndirection + vtabOffsAfterIndirection
|
|
GenTree* tmpTree2 =
|
|
gtNewOperNode(GT_ADD, TYP_I_IMPL, gtNewLclvNode(varNum1, TYP_I_IMPL),
|
|
gtNewIconNode(vtabOffsOfIndirection + vtabOffsAfterIndirection, TYP_I_IMPL));
|
|
|
|
// var1 + vtabOffsOfIndirection + vtabOffsAfterIndirection + [var1 + vtabOffsOfIndirection]
|
|
tmpTree2 = gtNewOperNode(GT_ADD, TYP_I_IMPL, tmpTree2, tmpTree1);
|
|
GenTree* storeVar2 = gtNewTempStore(varNum2, tmpTree2); // var2 = <expression>
|
|
|
|
// This last indirection is not invariant, but is non-faulting
|
|
result = gtNewIndir(TYP_I_IMPL, gtNewLclvNode(varNum2, TYP_I_IMPL), GTF_IND_NONFAULTING); // [var2]
|
|
|
|
result = gtNewOperNode(GT_ADD, TYP_I_IMPL, result, gtNewLclvNode(varNum2, TYP_I_IMPL)); // [var2] + var2
|
|
|
|
// Now stitch together the two stores and the calculation of result into a single tree
|
|
GenTree* commaTree = gtNewOperNode(GT_COMMA, TYP_I_IMPL, storeVar2, result);
|
|
result = gtNewOperNode(GT_COMMA, TYP_I_IMPL, storeVar1, commaTree);
|
|
}
|
|
else
|
|
{
|
|
// result = [vtab + vtabOffsOfIndirection]
|
|
result = gtNewOperNode(GT_ADD, TYP_I_IMPL, vtab, gtNewIconNode(vtabOffsOfIndirection, TYP_I_IMPL));
|
|
result = gtNewIndir(TYP_I_IMPL, result, GTF_IND_NONFAULTING | GTF_IND_INVARIANT);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
result = vtab;
|
|
assert(!isRelative);
|
|
}
|
|
|
|
if (!isRelative)
|
|
{
|
|
// Load the function address
|
|
// result = [result + vtabOffsAfterIndirection]
|
|
result = gtNewOperNode(GT_ADD, TYP_I_IMPL, result, gtNewIconNode(vtabOffsAfterIndirection, TYP_I_IMPL));
|
|
// This last indirection is not invariant, but is non-faulting
|
|
result = gtNewIndir(TYP_I_IMPL, result, GTF_IND_NONFAULTING);
|
|
}
|
|
|
|
return result;
|
|
}
|
|
|
|
/*****************************************************************************
|
|
*
|
|
* Transform the given constant tree for code generation.
|
|
*/
|
|
|
|
GenTree* Compiler::fgMorphConst(GenTree* tree)
|
|
{
|
|
assert(tree->OperIsConst());
|
|
|
|
/* Clear any exception flags or other unnecessary flags
|
|
* that may have been set before folding this node to a constant */
|
|
|
|
tree->gtFlags &= ~(GTF_ALL_EFFECT | GTF_REVERSE_OPS);
|
|
|
|
if (!tree->OperIs(GT_CNS_STR))
|
|
{
|
|
return tree;
|
|
}
|
|
|
|
if (tree->AsStrCon()->IsStringEmptyField())
|
|
{
|
|
LPVOID pValue;
|
|
InfoAccessType iat = info.compCompHnd->emptyStringLiteral(&pValue);
|
|
return fgMorphTree(gtNewStringLiteralNode(iat, pValue));
|
|
}
|
|
|
|
// TODO-CQ: Do this for compCurBB->isRunRarely(). Doing that currently will
|
|
// guarantee slow performance for that block. Instead cache the return value
|
|
// of CORINFO_HELP_STRCNS and go to cache first giving reasonable perf.
|
|
|
|
bool useLazyStrCns = false;
|
|
if (compCurBB->KindIs(BBJ_THROW))
|
|
{
|
|
useLazyStrCns = true;
|
|
}
|
|
else if (fgGlobalMorph && compCurStmt->GetRootNode()->IsCall())
|
|
{
|
|
// Quick check: if the root node of the current statement happens to be a noreturn call.
|
|
GenTreeCall* call = compCurStmt->GetRootNode()->AsCall();
|
|
useLazyStrCns = call->IsNoReturn() || fgIsThrow(call);
|
|
}
|
|
|
|
if (useLazyStrCns)
|
|
{
|
|
CorInfoHelpFunc helper = info.compCompHnd->getLazyStringLiteralHelper(tree->AsStrCon()->gtScpHnd);
|
|
if (helper != CORINFO_HELP_UNDEF)
|
|
{
|
|
// For un-important blocks, we want to construct the string lazily
|
|
|
|
tree =
|
|
gtNewHelperCallNode(helper, TYP_REF, gtNewIconNode(RidFromToken(tree->AsStrCon()->gtSconCPX), TYP_INT),
|
|
gtNewIconEmbScpHndNode(tree->AsStrCon()->gtScpHnd));
|
|
return fgMorphTree(tree);
|
|
}
|
|
}
|
|
|
|
assert(tree->AsStrCon()->gtScpHnd == info.compScopeHnd || !IsUninitialized(tree->AsStrCon()->gtScpHnd));
|
|
|
|
LPVOID pValue;
|
|
InfoAccessType iat =
|
|
info.compCompHnd->constructStringLiteral(tree->AsStrCon()->gtScpHnd, tree->AsStrCon()->gtSconCPX, &pValue);
|
|
|
|
tree = gtNewStringLiteralNode(iat, pValue);
|
|
|
|
return fgMorphTree(tree);
|
|
}
|
|
|
|
//------------------------------------------------------------------------
|
|
// fgMorphLeaf: Fully morph a tree with no operands.
|
|
//
|
|
// Arguments:
|
|
// tree - The tree to morph
|
|
//
|
|
// Return Value:
|
|
// The fully morphed "tree".
|
|
//
|
|
GenTree* Compiler::fgMorphLeaf(GenTree* tree)
|
|
{
|
|
assert(tree->OperIsLeaf());
|
|
|
|
if (tree->OperIs(GT_LCL_VAR, GT_LCL_FLD, GT_LCL_ADDR))
|
|
{
|
|
tree = fgMorphLeafLocal(tree->AsLclVarCommon());
|
|
}
|
|
else if (tree->OperIs(GT_FTN_ADDR))
|
|
{
|
|
GenTreeFptrVal* fptrValTree = tree->AsFptrVal();
|
|
|
|
// A function pointer address is being used. Let the VM know if this is the
|
|
// target of a Delegate or a raw function pointer.
|
|
bool isUnsafeFunctionPointer = !fptrValTree->gtFptrDelegateTarget;
|
|
|
|
CORINFO_CONST_LOOKUP addrInfo;
|
|
CORINFO_METHOD_HANDLE funcHandle = fptrValTree->gtFptrMethod;
|
|
|
|
#ifdef FEATURE_READYTORUN
|
|
if (fptrValTree->gtEntryPoint.addr != nullptr)
|
|
{
|
|
addrInfo = fptrValTree->gtEntryPoint;
|
|
}
|
|
else
|
|
#endif
|
|
{
|
|
info.compCompHnd->getFunctionFixedEntryPoint(funcHandle, isUnsafeFunctionPointer, &addrInfo);
|
|
}
|
|
|
|
GenTree* indNode = nullptr;
|
|
switch (addrInfo.accessType)
|
|
{
|
|
case IAT_PPVALUE:
|
|
indNode = gtNewIndOfIconHandleNode(TYP_I_IMPL, (size_t)addrInfo.handle, GTF_ICON_CONST_PTR, true);
|
|
|
|
// Add the second indirection
|
|
indNode = gtNewIndir(TYP_I_IMPL, indNode, GTF_IND_NONFAULTING | GTF_IND_INVARIANT);
|
|
break;
|
|
|
|
case IAT_PVALUE:
|
|
indNode = gtNewIndOfIconHandleNode(TYP_I_IMPL, (size_t)addrInfo.handle, GTF_ICON_FTN_ADDR, true);
|
|
INDEBUG(indNode->gtGetOp1()->AsIntCon()->gtTargetHandle = reinterpret_cast<size_t>(funcHandle));
|
|
break;
|
|
|
|
case IAT_VALUE:
|
|
// Refer to gtNewIconHandleNode() as the template for constructing a constant handle
|
|
//
|
|
tree->SetOper(GT_CNS_INT);
|
|
tree->AsIntConCommon()->SetIconValue(ssize_t(addrInfo.handle));
|
|
tree->gtFlags |= GTF_ICON_FTN_ADDR;
|
|
INDEBUG(tree->AsIntCon()->gtTargetHandle = reinterpret_cast<size_t>(funcHandle));
|
|
break;
|
|
|
|
default:
|
|
noway_assert(!"Unknown addrInfo.accessType");
|
|
}
|
|
|
|
if (indNode != nullptr)
|
|
{
|
|
DEBUG_DESTROY_NODE(tree);
|
|
tree = fgMorphTree(indNode);
|
|
}
|
|
}
|
|
|
|
return tree;
|
|
}
|
|
|
|
void Compiler::fgAssignSetVarDef(GenTree* tree)
|
|
{
|
|
GenTreeLclVarCommon* lclVarCmnTree;
|
|
bool isEntire = false;
|
|
if (tree->DefinesLocal(this, &lclVarCmnTree, &isEntire))
|
|
{
|
|
if (isEntire)
|
|
{
|
|
lclVarCmnTree->gtFlags |= GTF_VAR_DEF;
|
|
}
|
|
else
|
|
{
|
|
// We consider partial definitions to be modeled as uses followed by definitions.
|
|
// This captures the idea that precedings defs are not necessarily made redundant
|
|
// by this definition.
|
|
lclVarCmnTree->gtFlags |= (GTF_VAR_DEF | GTF_VAR_USEASG);
|
|
}
|
|
}
|
|
}
|
|
|
|
#ifdef FEATURE_SIMD
|
|
|
|
//--------------------------------------------------------------------------------------------------------------
|
|
// getSIMDStructFromField:
|
|
// Checking whether the field belongs to a simd struct or not. If it is, return the GenTree* for
|
|
// the struct node, also base type, field index and simd size. If it is not, just return nullptr.
|
|
// Usually if the tree node is from a simd lclvar which is not used in any SIMD intrinsic, then we
|
|
// should return nullptr, since in this case we should treat SIMD struct as a regular struct.
|
|
// However if no matter what, you just want get simd struct node, you can set the ignoreUsedInSIMDIntrinsic
|
|
// as true. Then there will be no IsUsedInSIMDIntrinsic checking, and it will return SIMD struct node
|
|
// if the struct is a SIMD struct.
|
|
//
|
|
// Arguments:
|
|
// tree - GentreePtr. This node will be checked to see this is a field which belongs to a simd
|
|
// struct used for simd intrinsic or not.
|
|
// indexOut - unsigned pointer, if the tree is used for simd intrinsic, we will set *indexOut
|
|
// equals to the index number of this field.
|
|
// simdSizeOut - unsigned pointer, if the tree is used for simd intrinsic, set the *simdSizeOut
|
|
// equals to the simd struct size which this tree belongs to.
|
|
// ignoreUsedInSIMDIntrinsic - bool. If this is set to true, then this function will ignore
|
|
// the UsedInSIMDIntrinsic check.
|
|
//
|
|
// Return Value:
|
|
// A GenTree* which points the simd lclvar tree belongs to. If the tree is not the simd
|
|
// instrinic related field, return nullptr.
|
|
//
|
|
GenTree* Compiler::getSIMDStructFromField(GenTree* tree,
|
|
unsigned* indexOut,
|
|
unsigned* simdSizeOut,
|
|
bool ignoreUsedInSIMDIntrinsic /*false*/)
|
|
{
|
|
if (tree->isIndir())
|
|
{
|
|
GenTree* addr = tree->AsIndir()->Addr();
|
|
if (!addr->OperIs(GT_FIELD_ADDR) || !addr->AsFieldAddr()->IsInstance())
|
|
{
|
|
return nullptr;
|
|
}
|
|
|
|
GenTree* objRef = addr->AsFieldAddr()->GetFldObj();
|
|
if (objRef->IsLclVarAddr())
|
|
{
|
|
LclVarDsc* varDsc = lvaGetDesc(objRef->AsLclVarCommon());
|
|
if (varTypeIsSIMD(varDsc) && (varDsc->lvIsUsedInSIMDIntrinsic() || ignoreUsedInSIMDIntrinsic))
|
|
{
|
|
var_types elementType = tree->TypeGet();
|
|
unsigned fieldOffset = addr->AsFieldAddr()->gtFldOffset;
|
|
unsigned elementSize = genTypeSize(elementType);
|
|
|
|
if (varTypeIsArithmetic(elementType) && ((fieldOffset % elementSize) == 0))
|
|
{
|
|
*simdSizeOut = varDsc->lvExactSize();
|
|
*indexOut = fieldOffset / elementSize;
|
|
|
|
return objRef;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return nullptr;
|
|
}
|
|
#endif // FEATURE_SIMD
|
|
|
|
//------------------------------------------------------------------------------
|
|
// fgMorphCommutative : Try to simplify "(X op C1) op C2" to "X op C3"
|
|
// for commutative operators.
|
|
//
|
|
// Arguments:
|
|
// tree - node to fold
|
|
//
|
|
// return value:
|
|
// A folded GenTree* instance or nullptr if something prevents folding.
|
|
//
|
|
|
|
GenTreeOp* Compiler::fgMorphCommutative(GenTreeOp* tree)
|
|
{
|
|
assert(varTypeIsIntegralOrI(tree->TypeGet()));
|
|
assert(tree->OperIs(GT_ADD, GT_MUL, GT_OR, GT_AND, GT_XOR));
|
|
|
|
if (opts.OptimizationDisabled())
|
|
{
|
|
return nullptr;
|
|
}
|
|
|
|
// op1 can be GT_COMMA, in this case we're going to fold
|
|
// "(op (COMMA(... (op X C1))) C2)" to "(COMMA(... (op X C3)))"
|
|
GenTree* op1 = tree->gtGetOp1()->gtEffectiveVal();
|
|
genTreeOps oper = tree->OperGet();
|
|
|
|
if (!op1->OperIs(oper) || !tree->gtGetOp2()->IsCnsIntOrI() || !op1->gtGetOp2()->IsCnsIntOrI() ||
|
|
op1->gtGetOp1()->IsCnsIntOrI())
|
|
{
|
|
return nullptr;
|
|
}
|
|
|
|
if (!fgGlobalMorph && (op1 != tree->gtGetOp1()))
|
|
{
|
|
// Since 'tree->gtGetOp1()' can have complex structure (e.g. COMMA(..(COMMA(..,op1)))
|
|
// don't run the optimization for such trees outside of global morph.
|
|
// Otherwise, there is a chance of violating VNs invariants and/or modifying a tree
|
|
// that is an active CSE candidate.
|
|
return nullptr;
|
|
}
|
|
|
|
if (gtIsActiveCSE_Candidate(tree) || gtIsActiveCSE_Candidate(op1))
|
|
{
|
|
// The optimization removes 'tree' from IR and changes the value of 'op1'.
|
|
return nullptr;
|
|
}
|
|
|
|
if (tree->OperMayOverflow() && (tree->gtOverflow() || op1->gtOverflow()))
|
|
{
|
|
return nullptr;
|
|
}
|
|
|
|
GenTreeIntCon* cns1 = op1->gtGetOp2()->AsIntCon();
|
|
GenTreeIntCon* cns2 = tree->gtGetOp2()->AsIntCon();
|
|
|
|
if (!varTypeIsIntegralOrI(tree->TypeGet()) || cns1->TypeIs(TYP_REF) || !cns1->TypeIs(cns2->TypeGet()))
|
|
{
|
|
return nullptr;
|
|
}
|
|
|
|
if (gtIsActiveCSE_Candidate(cns1) || gtIsActiveCSE_Candidate(cns2))
|
|
{
|
|
// The optimization removes 'cns2' from IR and changes the value of 'cns1'.
|
|
return nullptr;
|
|
}
|
|
|
|
GenTree* folded = gtFoldExprConst(gtNewOperNode(oper, cns1->TypeGet(), cns1, cns2));
|
|
|
|
if (!folded->IsCnsIntOrI())
|
|
{
|
|
// Give up if we can't fold "C1 op C2"
|
|
return nullptr;
|
|
}
|
|
|
|
auto foldedCns = folded->AsIntCon();
|
|
|
|
cns1->SetIconValue(foldedCns->IconValue());
|
|
cns1->SetVNsFromNode(foldedCns);
|
|
cns1->gtFieldSeq = foldedCns->gtFieldSeq;
|
|
|
|
op1 = tree->gtGetOp1();
|
|
op1->SetVNsFromNode(tree);
|
|
|
|
DEBUG_DESTROY_NODE(tree);
|
|
DEBUG_DESTROY_NODE(cns2);
|
|
DEBUG_DESTROY_NODE(foldedCns);
|
|
INDEBUG(cns1->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED);
|
|
|
|
return op1->AsOp();
|
|
}
|
|
|
|
//------------------------------------------------------------------------
|
|
// fgMorphSmpOp: morph a GTK_SMPOP tree
|
|
//
|
|
// Arguments:
|
|
// tree - tree to morph
|
|
// mac - address context for morphing
|
|
// optAssertionPropDone - [out, optional] set true if local assertions
|
|
// were killed/genned while morphing this tree
|
|
//
|
|
// Returns:
|
|
// Tree, possibly updated
|
|
//
|
|
#ifdef _PREFAST_
|
|
#pragma warning(push)
|
|
#pragma warning(disable : 21000) // Suppress PREFast warning about overly large function
|
|
#endif
|
|
GenTree* Compiler::fgMorphSmpOp(GenTree* tree, MorphAddrContext* mac, bool* optAssertionPropDone)
|
|
{
|
|
assert(tree->OperKind() & GTK_SMPOP);
|
|
|
|
/* The steps in this function are :
|
|
o Perform required preorder processing
|
|
o Process the first, then second operand, if any
|
|
o Perform required postorder morphing
|
|
o Perform optional postorder morphing if optimizing
|
|
*/
|
|
|
|
bool isQmarkColon = false;
|
|
|
|
ASSERT_TP origAssertions = BitVecOps::UninitVal();
|
|
ASSERT_TP thenAssertions = BitVecOps::UninitVal();
|
|
|
|
genTreeOps oper = tree->OperGet();
|
|
var_types typ = tree->TypeGet();
|
|
GenTree* op1 = tree->AsOp()->gtOp1;
|
|
GenTree* op2 = tree->gtGetOp2IfPresent();
|
|
|
|
/*-------------------------------------------------------------------------
|
|
* First do any PRE-ORDER processing
|
|
*/
|
|
|
|
switch (oper)
|
|
{
|
|
// Some arithmetic operators need to use a helper call to the EE
|
|
int helper;
|
|
|
|
case GT_STORE_LCL_VAR:
|
|
case GT_STORE_LCL_FLD:
|
|
{
|
|
LclVarDsc* lclDsc = lvaGetDesc(tree->AsLclVarCommon());
|
|
if (lclDsc->IsAddressExposed()
|
|
#if FEATURE_IMPLICIT_BYREFS
|
|
|| lclDsc->lvIsLastUseCopyOmissionCandidate
|
|
#endif
|
|
)
|
|
{
|
|
tree->AddAllEffectsFlags(GTF_GLOB_REF);
|
|
}
|
|
|
|
GenTree* expandedTree = fgMorphExpandLocal(tree->AsLclVarCommon());
|
|
if (expandedTree != nullptr)
|
|
{
|
|
DBEXEC(tree != expandedTree, expandedTree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED);
|
|
tree = expandedTree;
|
|
oper = tree->OperGet();
|
|
op1 = tree->gtGetOp1();
|
|
op2 = tree->gtGetOp2IfPresent();
|
|
}
|
|
}
|
|
break;
|
|
|
|
case GT_QMARK:
|
|
case GT_JTRUE:
|
|
|
|
noway_assert(op1);
|
|
|
|
if (op1->OperIsCompare())
|
|
{
|
|
/* Mark the comparison node with GTF_RELOP_JMP_USED so it knows that it does
|
|
not need to materialize the result as a 0 or 1. */
|
|
|
|
/* We also mark it as DONT_CSE, as we don't handle QMARKs with nonRELOP op1s */
|
|
op1->gtFlags |= (GTF_RELOP_JMP_USED | GTF_DONT_CSE);
|
|
}
|
|
else
|
|
{
|
|
GenTree* effOp1 = op1->gtEffectiveVal();
|
|
noway_assert((effOp1->gtOper == GT_CNS_INT) &&
|
|
(effOp1->IsIntegralConst(0) || effOp1->IsIntegralConst(1)));
|
|
}
|
|
break;
|
|
|
|
case GT_COLON:
|
|
if (optLocalAssertionProp)
|
|
{
|
|
isQmarkColon = true;
|
|
}
|
|
break;
|
|
|
|
case GT_FIELD_ADDR:
|
|
return fgMorphFieldAddr(tree, mac);
|
|
|
|
case GT_INDEX_ADDR:
|
|
return fgMorphIndexAddr(tree->AsIndexAddr());
|
|
|
|
case GT_CAST:
|
|
{
|
|
GenTree* morphedCast = fgMorphExpandCast(tree->AsCast());
|
|
if (morphedCast != nullptr)
|
|
{
|
|
return morphedCast;
|
|
}
|
|
|
|
op1 = tree->AsCast()->CastOp();
|
|
}
|
|
break;
|
|
|
|
case GT_MUL:
|
|
noway_assert(op2 != nullptr);
|
|
|
|
#ifndef TARGET_64BIT
|
|
if (typ == TYP_LONG)
|
|
{
|
|
// For (long)int1 * (long)int2, we dont actually do the
|
|
// casts, and just multiply the 32 bit values, which will
|
|
// give us the 64 bit result in edx:eax.
|
|
|
|
if (tree->Is64RsltMul())
|
|
{
|
|
// We are seeing this node again.
|
|
// Morph only the children of casts,
|
|
// so as to avoid losing them.
|
|
tree = fgMorphLongMul(tree->AsOp());
|
|
|
|
goto DONE_MORPHING_CHILDREN;
|
|
}
|
|
|
|
tree = fgRecognizeAndMorphLongMul(tree->AsOp());
|
|
op1 = tree->AsOp()->gtGetOp1();
|
|
op2 = tree->AsOp()->gtGetOp2();
|
|
|
|
if (tree->Is64RsltMul())
|
|
{
|
|
goto DONE_MORPHING_CHILDREN;
|
|
}
|
|
else
|
|
{
|
|
if (tree->gtOverflow())
|
|
helper = tree->IsUnsigned() ? CORINFO_HELP_ULMUL_OVF : CORINFO_HELP_LMUL_OVF;
|
|
else
|
|
helper = CORINFO_HELP_LMUL;
|
|
|
|
goto USE_HELPER_FOR_ARITH;
|
|
}
|
|
}
|
|
#endif // !TARGET_64BIT
|
|
break;
|
|
|
|
case GT_ARR_LENGTH:
|
|
if (op1->OperIs(GT_CNS_STR))
|
|
{
|
|
// Optimize `ldstr + String::get_Length()` to CNS_INT
|
|
// e.g. "Hello".Length => 5
|
|
GenTreeIntCon* iconNode = gtNewStringLiteralLength(op1->AsStrCon());
|
|
if (iconNode != nullptr)
|
|
{
|
|
INDEBUG(iconNode->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED);
|
|
return iconNode;
|
|
}
|
|
}
|
|
break;
|
|
|
|
case GT_IND:
|
|
if (opts.OptimizationEnabled() && !optValnumCSE_phase)
|
|
{
|
|
GenTree* constNode = gtFoldIndirConst(tree->AsIndir());
|
|
if (constNode != nullptr)
|
|
{
|
|
assert(constNode->OperIsConst()); // No further morphing required.
|
|
INDEBUG(constNode->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED);
|
|
return constNode;
|
|
}
|
|
}
|
|
break;
|
|
|
|
case GT_STOREIND:
|
|
if (op1->OperIs(GT_FIELD_ADDR) && varTypeIsGC(tree))
|
|
{
|
|
CORINFO_FIELD_HANDLE fieldHandle = op1->AsFieldAddr()->gtFldHnd;
|
|
if (eeIsByrefLike(info.compCompHnd->getFieldClass(fieldHandle)))
|
|
{
|
|
JITDUMP("Marking [%06u] STOREIND as GTF_IND_TGT_NOT_HEAP: field's owner is a byref-like struct\n",
|
|
dspTreeID(tree));
|
|
tree->gtFlags |= GTF_IND_TGT_NOT_HEAP;
|
|
}
|
|
}
|
|
break;
|
|
|
|
case GT_DIV:
|
|
// Replace "val / dcon" with "val * (1.0 / dcon)" if dcon is a power of two.
|
|
// Powers of two within range are always exactly represented,
|
|
// so multiplication by the reciprocal is safe in this scenario
|
|
if (fgGlobalMorph && op2->IsCnsFltOrDbl())
|
|
{
|
|
double divisor = op2->AsDblCon()->DconValue();
|
|
if (((typ == TYP_DOUBLE) && FloatingPointUtils::hasPreciseReciprocal(divisor)) ||
|
|
((typ == TYP_FLOAT) && FloatingPointUtils::hasPreciseReciprocal(forceCastToFloat(divisor))))
|
|
{
|
|
oper = GT_MUL;
|
|
tree->ChangeOper(oper);
|
|
op2->AsDblCon()->SetDconValue(1.0 / divisor);
|
|
}
|
|
}
|
|
|
|
// Convert DIV to UDIV if both op1 and op2 are known to be never negative
|
|
if (!gtIsActiveCSE_Candidate(tree) && varTypeIsIntegral(tree) && op1->IsNeverNegative(this) &&
|
|
op2->IsNeverNegative(this))
|
|
{
|
|
assert(tree->OperIs(GT_DIV));
|
|
tree->ChangeOper(GT_UDIV, GenTree::PRESERVE_VN);
|
|
return fgMorphSmpOp(tree, mac);
|
|
}
|
|
|
|
#ifndef TARGET_64BIT
|
|
if (typ == TYP_LONG)
|
|
{
|
|
helper = CORINFO_HELP_LDIV;
|
|
goto USE_HELPER_FOR_ARITH;
|
|
}
|
|
|
|
#if USE_HELPERS_FOR_INT_DIV
|
|
if (typ == TYP_INT)
|
|
{
|
|
helper = CORINFO_HELP_DIV;
|
|
goto USE_HELPER_FOR_ARITH;
|
|
}
|
|
#endif
|
|
#endif // !TARGET_64BIT
|
|
break;
|
|
|
|
case GT_UDIV:
|
|
|
|
#ifndef TARGET_64BIT
|
|
if (typ == TYP_LONG)
|
|
{
|
|
helper = CORINFO_HELP_ULDIV;
|
|
goto USE_HELPER_FOR_ARITH;
|
|
}
|
|
#if USE_HELPERS_FOR_INT_DIV
|
|
if (typ == TYP_INT)
|
|
{
|
|
helper = CORINFO_HELP_UDIV;
|
|
goto USE_HELPER_FOR_ARITH;
|
|
}
|
|
#endif
|
|
#endif // TARGET_64BIT
|
|
break;
|
|
|
|
case GT_MOD:
|
|
|
|
if (varTypeIsFloating(typ))
|
|
{
|
|
helper = CORINFO_HELP_DBLREM;
|
|
noway_assert(op2);
|
|
if (op1->TypeGet() == TYP_FLOAT)
|
|
{
|
|
if (op2->TypeGet() == TYP_FLOAT)
|
|
{
|
|
helper = CORINFO_HELP_FLTREM;
|
|
}
|
|
else
|
|
{
|
|
tree->AsOp()->gtOp1 = op1 = gtNewCastNode(TYP_DOUBLE, op1, false, TYP_DOUBLE);
|
|
}
|
|
}
|
|
else if (op2->TypeGet() == TYP_FLOAT)
|
|
{
|
|
tree->AsOp()->gtOp2 = op2 = gtNewCastNode(TYP_DOUBLE, op2, false, TYP_DOUBLE);
|
|
}
|
|
goto USE_HELPER_FOR_ARITH;
|
|
}
|
|
|
|
// Convert MOD to UMOD if both op1 and op2 are known to be never negative
|
|
if (!gtIsActiveCSE_Candidate(tree) && varTypeIsIntegral(tree) && op1->IsNeverNegative(this) &&
|
|
op2->IsNeverNegative(this))
|
|
{
|
|
assert(tree->OperIs(GT_MOD));
|
|
tree->ChangeOper(GT_UMOD, GenTree::PRESERVE_VN);
|
|
return fgMorphSmpOp(tree, mac);
|
|
}
|
|
|
|
// Do not use optimizations (unlike UMOD's idiv optimizing during codegen) for signed mod.
|
|
// A similar optimization for signed mod will not work for a negative perfectly divisible
|
|
// HI-word. To make it correct, we would need to divide without the sign and then flip the
|
|
// result sign after mod. This requires 18 opcodes + flow making it not worthy to inline.
|
|
goto ASSIGN_HELPER_FOR_MOD;
|
|
|
|
case GT_UMOD:
|
|
|
|
#ifdef TARGET_ARMARCH
|
|
//
|
|
// Note for TARGET_ARMARCH we don't have a remainder instruction, so we don't do this optimization
|
|
//
|
|
#else // TARGET_XARCH
|
|
// If this is an unsigned long mod with a constant divisor,
|
|
// then don't morph to a helper call - it can be done faster inline using idiv.
|
|
|
|
noway_assert(op2);
|
|
if ((typ == TYP_LONG) && opts.OptimizationEnabled())
|
|
{
|
|
if (op2->OperIs(GT_CNS_NATIVELONG) && op2->AsIntConCommon()->LngValue() >= 2 &&
|
|
op2->AsIntConCommon()->LngValue() <= 0x3fffffff)
|
|
{
|
|
tree->AsOp()->gtOp1 = op1 = fgMorphTree(op1);
|
|
noway_assert(op1->TypeIs(TYP_LONG));
|
|
|
|
// Update flags for op1 morph.
|
|
tree->gtFlags &= ~GTF_ALL_EFFECT;
|
|
|
|
// Only update with op1 as op2 is a constant.
|
|
tree->gtFlags |= (op1->gtFlags & GTF_ALL_EFFECT);
|
|
|
|
// If op1 is a constant, then do constant folding of the division operator.
|
|
if (op1->OperIs(GT_CNS_NATIVELONG))
|
|
{
|
|
tree = gtFoldExpr(tree);
|
|
}
|
|
|
|
if (!tree->OperIsConst())
|
|
{
|
|
tree->AsOp()->CheckDivideByConstOptimized(this);
|
|
}
|
|
|
|
return tree;
|
|
}
|
|
}
|
|
#endif // TARGET_XARCH
|
|
|
|
ASSIGN_HELPER_FOR_MOD:
|
|
|
|
if (!optValnumCSE_phase)
|
|
{
|
|
if (tree->OperIs(GT_MOD, GT_UMOD) && (op2->IsIntegralConst(1)))
|
|
{
|
|
// Transformation: a % 1 = 0
|
|
GenTree* optimizedTree = fgMorphModToZero(tree->AsOp());
|
|
if (optimizedTree != nullptr)
|
|
{
|
|
tree = optimizedTree;
|
|
|
|
if (tree->OperIs(GT_COMMA))
|
|
{
|
|
op1 = tree->gtGetOp1();
|
|
op2 = tree->gtGetOp2();
|
|
}
|
|
else
|
|
{
|
|
assert(tree->IsIntegralConst());
|
|
op1 = nullptr;
|
|
op2 = nullptr;
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
#ifndef TARGET_64BIT
|
|
if (typ == TYP_LONG)
|
|
{
|
|
helper = (oper == GT_UMOD) ? CORINFO_HELP_ULMOD : CORINFO_HELP_LMOD;
|
|
goto USE_HELPER_FOR_ARITH;
|
|
}
|
|
|
|
#if USE_HELPERS_FOR_INT_DIV
|
|
if (typ == TYP_INT)
|
|
{
|
|
if (oper == GT_UMOD)
|
|
{
|
|
helper = CORINFO_HELP_UMOD;
|
|
goto USE_HELPER_FOR_ARITH;
|
|
}
|
|
else if (oper == GT_MOD)
|
|
{
|
|
helper = CORINFO_HELP_MOD;
|
|
goto USE_HELPER_FOR_ARITH;
|
|
}
|
|
}
|
|
#endif
|
|
#endif // !TARGET_64BIT
|
|
|
|
if (!optValnumCSE_phase)
|
|
{
|
|
if (tree->OperIs(GT_UMOD) && op2->IsIntegralConstUnsignedPow2())
|
|
{
|
|
// Transformation: a % b = a & (b - 1);
|
|
tree = fgMorphUModToAndSub(tree->AsOp());
|
|
op1 = tree->AsOp()->gtOp1;
|
|
op2 = tree->AsOp()->gtOp2;
|
|
}
|
|
#ifdef TARGET_ARM64
|
|
// ARM64 architecture manual suggests this transformation
|
|
// for the mod operator.
|
|
else
|
|
#else
|
|
// XARCH only applies this transformation if we know
|
|
// that magic division will be used - which is determined
|
|
// when 'b' is not a power of 2 constant and mod operator is signed.
|
|
// Lowering for XARCH does this optimization already,
|
|
// but is also done here to take advantage of CSE.
|
|
else if (tree->OperIs(GT_MOD) && op2->IsIntegralConst() && !op2->IsIntegralConstAbsPow2())
|
|
#endif
|
|
{
|
|
// Transformation: a % b = a - (a / b) * b;
|
|
tree = fgMorphModToSubMulDiv(tree->AsOp());
|
|
op1 = tree->AsOp()->gtOp1;
|
|
op2 = tree->AsOp()->gtOp2;
|
|
}
|
|
}
|
|
break;
|
|
|
|
USE_HELPER_FOR_ARITH:
|
|
{
|
|
// TODO: this comment is wrong now, do an appropriate fix.
|
|
/* We have to morph these arithmetic operations into helper calls
|
|
before morphing the arguments (preorder), else the arguments
|
|
won't get correct values of fgPtrArgCntCur.
|
|
However, try to fold the tree first in case we end up with a
|
|
simple node which won't need a helper call at all */
|
|
|
|
noway_assert(tree->OperIsBinary());
|
|
|
|
GenTree* oldTree = tree;
|
|
|
|
if (opts.OptimizationEnabled())
|
|
{
|
|
tree = gtFoldExpr(tree);
|
|
}
|
|
|
|
// Were we able to fold it ?
|
|
// Note that gtFoldExpr may return a non-leaf even if successful
|
|
// e.g. for something like "expr / 1" - see also bug #290853
|
|
if (tree->OperIsLeaf() || (oldTree != tree))
|
|
{
|
|
return (oldTree != tree) ? fgMorphTree(tree) : fgMorphLeaf(tree);
|
|
}
|
|
|
|
// Did we fold it into a comma node with throw?
|
|
if (tree->gtOper == GT_COMMA)
|
|
{
|
|
noway_assert(fgIsCommaThrow(tree));
|
|
return fgMorphTree(tree);
|
|
}
|
|
}
|
|
|
|
return fgMorphIntoHelperCall(tree, helper, true /* morphArgs */, op1, op2);
|
|
|
|
case GT_RETURN:
|
|
case GT_SWIFT_ERROR_RET:
|
|
{
|
|
GenTree* retVal = tree->AsOp()->GetReturnValue();
|
|
|
|
if (!tree->TypeIs(TYP_VOID))
|
|
{
|
|
if (retVal->OperIs(GT_LCL_FLD))
|
|
{
|
|
retVal = fgMorphRetInd(tree->AsOp());
|
|
}
|
|
|
|
fgTryReplaceStructLocalWithField(retVal);
|
|
}
|
|
|
|
// normalize small integer return values
|
|
if (fgGlobalMorph && varTypeIsSmall(info.compRetType) && (retVal != nullptr) && !retVal->TypeIs(TYP_VOID) &&
|
|
fgCastNeeded(retVal, info.compRetType))
|
|
{
|
|
#ifdef SWIFT_SUPPORT
|
|
// Morph error operand if tree is a GT_SWIFT_ERROR_RET node
|
|
if (tree->OperIs(GT_SWIFT_ERROR_RET))
|
|
{
|
|
GenTree* const errorVal = fgMorphTree(tree->gtGetOp1());
|
|
tree->AsOp()->gtOp1 = errorVal;
|
|
|
|
// Propagate side effect flags
|
|
tree->SetAllEffectsFlags(errorVal);
|
|
}
|
|
#endif // SWIFT_SUPPORT
|
|
|
|
// Small-typed return values are normalized by the callee
|
|
retVal = gtNewCastNode(TYP_INT, retVal, false, info.compRetType);
|
|
|
|
// Propagate GTF_COLON_COND
|
|
retVal->gtFlags |= (tree->gtFlags & GTF_COLON_COND);
|
|
|
|
retVal = fgMorphTree(retVal);
|
|
tree->AsOp()->SetReturnValue(retVal);
|
|
|
|
// Propagate side effect flags
|
|
tree->SetAllEffectsFlags(retVal);
|
|
|
|
return tree;
|
|
}
|
|
|
|
if (tree->OperIs(GT_RETURN))
|
|
{
|
|
op1 = retVal;
|
|
}
|
|
else
|
|
{
|
|
op2 = retVal;
|
|
}
|
|
break;
|
|
}
|
|
|
|
case GT_EQ:
|
|
case GT_NE:
|
|
{
|
|
if (opts.OptimizationEnabled())
|
|
{
|
|
GenTree* optimizedTree = gtFoldTypeCompare(tree);
|
|
if (optimizedTree != tree)
|
|
{
|
|
return fgMorphTree(optimizedTree);
|
|
}
|
|
}
|
|
|
|
// Pattern-matching optimization:
|
|
// (a % c) ==/!= 0
|
|
// for power-of-2 constant `c`
|
|
// =>
|
|
// a & (c - 1) ==/!= 0
|
|
// For integer `a`, even if negative.
|
|
if (opts.OptimizationEnabled() && !optValnumCSE_phase)
|
|
{
|
|
assert(tree->OperIs(GT_EQ, GT_NE));
|
|
if (op1->OperIs(GT_MOD) && varTypeIsIntegral(op1) && op2->IsIntegralConst(0))
|
|
{
|
|
GenTree* op1op2 = op1->AsOp()->gtOp2;
|
|
if (op1op2->IsCnsIntOrI())
|
|
{
|
|
const ssize_t modValue = op1op2->AsIntCon()->IconValue();
|
|
if (isPow2(modValue))
|
|
{
|
|
JITDUMP("\nTransforming:\n");
|
|
DISPTREE(tree);
|
|
|
|
op1->SetOper(GT_AND); // Change % => &
|
|
op1op2->AsIntConCommon()->SetIconValue(modValue - 1); // Change c => c - 1
|
|
fgUpdateConstTreeValueNumber(op1op2);
|
|
|
|
JITDUMP("\ninto:\n");
|
|
DISPTREE(tree);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
FALLTHROUGH;
|
|
|
|
case GT_GT:
|
|
{
|
|
// Try and optimize nullable boxes feeding compares
|
|
GenTree* optimizedTree = gtFoldBoxNullable(tree);
|
|
|
|
if (optimizedTree->OperGet() != tree->OperGet())
|
|
{
|
|
return optimizedTree;
|
|
}
|
|
else
|
|
{
|
|
tree = optimizedTree;
|
|
}
|
|
|
|
op1 = tree->AsOp()->gtOp1;
|
|
op2 = tree->gtGetOp2IfPresent();
|
|
|
|
break;
|
|
}
|
|
|
|
case GT_RUNTIMELOOKUP:
|
|
return fgMorphTree(op1);
|
|
|
|
case GT_COMMA:
|
|
if (op2->OperIsStore() || (op2->OperGet() == GT_COMMA && op2->TypeGet() == TYP_VOID) || fgIsThrow(op2))
|
|
{
|
|
typ = tree->gtType = TYP_VOID;
|
|
}
|
|
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
|
|
if (opts.OptimizationEnabled() && fgGlobalMorph)
|
|
{
|
|
GenTree* morphed = fgMorphReduceAddOps(tree);
|
|
if (morphed != tree)
|
|
return fgMorphTree(morphed);
|
|
}
|
|
|
|
/*-------------------------------------------------------------------------
|
|
* Process the first operand, if any
|
|
*/
|
|
|
|
if (op1 != nullptr)
|
|
{
|
|
// If we are entering the "then" part of a Qmark-Colon we must
|
|
// save the state of the current assertions table so that we can
|
|
// restore this state when entering the "else" part
|
|
if (isQmarkColon)
|
|
{
|
|
noway_assert(optLocalAssertionProp);
|
|
BitVecOps::Assign(apTraits, origAssertions, apLocal);
|
|
}
|
|
|
|
// TODO-Bug: Moving the null check to this indirection should nominally check for interference with
|
|
// the other operands in case this is a store. However, doing so unconditionally preserves previous
|
|
// behavior and "fixes up" field store importation that places the null check in the wrong location
|
|
// (before the 'value' operand is evaluated).
|
|
MorphAddrContext indMac;
|
|
if (tree->OperIsIndir() && !tree->OperIsAtomicOp())
|
|
{
|
|
// Communicate to FIELD_ADDR morphing that the parent is an indirection.
|
|
indMac.m_user = tree->AsIndir();
|
|
mac = &indMac;
|
|
}
|
|
// For additions, if we already have a context, keep track of whether all offsets added
|
|
// to the address are constant, and their sum does not overflow.
|
|
else if ((mac != nullptr) && tree->OperIs(GT_ADD) && op2->IsCnsIntOrI())
|
|
{
|
|
ClrSafeInt<size_t> offset(mac->m_totalOffset);
|
|
offset += op2->AsIntCon()->IconValue();
|
|
if (!offset.IsOverflow())
|
|
{
|
|
mac->m_totalOffset = offset.Value();
|
|
}
|
|
else
|
|
{
|
|
mac = nullptr;
|
|
}
|
|
}
|
|
else // Reset the context.
|
|
{
|
|
mac = nullptr;
|
|
}
|
|
|
|
tree->AsOp()->gtOp1 = op1 = fgMorphTree(op1, mac);
|
|
|
|
// If we are exiting the "then" part of a Qmark-Colon we must
|
|
// save the state of the current assertions table so that we
|
|
// can merge this state with the "else" part exit
|
|
if (isQmarkColon)
|
|
{
|
|
noway_assert(optLocalAssertionProp);
|
|
BitVecOps::Assign(apTraits, thenAssertions, apLocal);
|
|
}
|
|
}
|
|
|
|
/*-------------------------------------------------------------------------
|
|
* Process the second operand, if any
|
|
*/
|
|
|
|
if (op2 != nullptr)
|
|
{
|
|
// If we are entering the "else" part of a Qmark-Colon we must
|
|
// reset the state of the current assertions table
|
|
if (isQmarkColon)
|
|
{
|
|
noway_assert(optLocalAssertionProp);
|
|
BitVecOps::Assign(apTraits, apLocal, origAssertions);
|
|
}
|
|
|
|
tree->AsOp()->gtOp2 = op2 = fgMorphTree(op2);
|
|
|
|
// If we are exiting the "else" part of a Qmark-Colon we must
|
|
// merge the state of the current assertions table with that
|
|
// of the exit of the "then" part.
|
|
//
|
|
if (isQmarkColon)
|
|
{
|
|
noway_assert(optLocalAssertionProp);
|
|
|
|
// Merge then and else (current) assertion sets.
|
|
//
|
|
BitVecOps::IntersectionD(apTraits, apLocal, thenAssertions);
|
|
}
|
|
}
|
|
|
|
#ifndef TARGET_64BIT
|
|
DONE_MORPHING_CHILDREN:
|
|
#endif // !TARGET_64BIT
|
|
|
|
gtUpdateNodeOperSideEffects(tree);
|
|
|
|
if (op1 != nullptr)
|
|
{
|
|
tree->AddAllEffectsFlags(op1);
|
|
}
|
|
if (op2 != nullptr)
|
|
{
|
|
tree->AddAllEffectsFlags(op2);
|
|
}
|
|
|
|
/*-------------------------------------------------------------------------
|
|
* Now do POST-ORDER processing
|
|
*/
|
|
|
|
if (varTypeIsGC(tree->TypeGet()) && (op1 && !varTypeIsGC(op1->TypeGet())) && (op2 && !varTypeIsGC(op2->TypeGet())))
|
|
{
|
|
// The tree is really not GC but was marked as such. Now that the
|
|
// children have been unmarked, unmark the tree too.
|
|
|
|
// Remember that GT_COMMA inherits it's type only from op2
|
|
if (tree->gtOper == GT_COMMA)
|
|
{
|
|
tree->gtType = genActualType(op2->TypeGet());
|
|
}
|
|
else
|
|
{
|
|
tree->gtType = genActualType(op1->TypeGet());
|
|
}
|
|
}
|
|
|
|
GenTree* oldTree = tree;
|
|
|
|
GenTree* qmarkOp1 = nullptr;
|
|
GenTree* qmarkOp2 = nullptr;
|
|
|
|
if ((tree->OperGet() == GT_QMARK) && (tree->AsOp()->gtOp2->OperGet() == GT_COLON))
|
|
{
|
|
qmarkOp1 = oldTree->AsOp()->gtOp2->AsOp()->gtOp1;
|
|
qmarkOp2 = oldTree->AsOp()->gtOp2->AsOp()->gtOp2;
|
|
}
|
|
|
|
if (opts.OptimizationEnabled())
|
|
{
|
|
// Try to fold it, maybe we get lucky,
|
|
tree = gtFoldExpr(tree);
|
|
}
|
|
|
|
if (oldTree != tree)
|
|
{
|
|
/* if gtFoldExpr returned op1 or op2 then we are done */
|
|
if ((tree == op1) || (tree == op2) || (tree == qmarkOp1) || (tree == qmarkOp2))
|
|
{
|
|
return tree;
|
|
}
|
|
|
|
/* If we created a comma-throw tree then we need to morph op1 */
|
|
if (fgIsCommaThrow(tree))
|
|
{
|
|
tree->AsOp()->gtOp1 = fgMorphTree(tree->AsOp()->gtOp1);
|
|
fgMorphTreeDone(tree);
|
|
return tree;
|
|
}
|
|
|
|
return tree;
|
|
}
|
|
else if (tree->OperIsConst())
|
|
{
|
|
return tree;
|
|
}
|
|
else if (tree->IsNothingNode())
|
|
{
|
|
return tree;
|
|
}
|
|
|
|
/* gtFoldExpr could have used setOper to change the oper */
|
|
oper = tree->OperGet();
|
|
typ = tree->TypeGet();
|
|
|
|
/* gtFoldExpr could have changed op1 and op2 */
|
|
op1 = tree->AsOp()->gtOp1;
|
|
op2 = tree->gtGetOp2IfPresent();
|
|
|
|
/*-------------------------------------------------------------------------
|
|
* Perform the required oper-specific postorder morphing
|
|
*/
|
|
|
|
switch (oper)
|
|
{
|
|
case GT_STORE_LCL_VAR:
|
|
case GT_STORE_LCL_FLD:
|
|
case GT_STOREIND:
|
|
tree = fgOptimizeCastOnStore(tree);
|
|
op1 = tree->gtGetOp1();
|
|
op2 = tree->gtGetOp2IfPresent();
|
|
|
|
if (tree->OperIs(GT_STOREIND))
|
|
{
|
|
GenTree* optimizedTree = fgMorphFinalizeIndir(tree->AsIndir());
|
|
if (optimizedTree != nullptr)
|
|
{
|
|
return optimizedTree;
|
|
}
|
|
}
|
|
break;
|
|
|
|
case GT_CAST:
|
|
tree = fgOptimizeCast(tree->AsCast());
|
|
if (!tree->OperIsSimple())
|
|
{
|
|
return tree;
|
|
}
|
|
if (tree->OperIs(GT_CAST) && tree->gtOverflow())
|
|
{
|
|
fgAddCodeRef(compCurBB, SCK_OVERFLOW);
|
|
}
|
|
|
|
typ = tree->TypeGet();
|
|
oper = tree->OperGet();
|
|
op1 = tree->AsOp()->gtGetOp1();
|
|
op2 = tree->gtGetOp2IfPresent();
|
|
break;
|
|
|
|
case GT_BITCAST:
|
|
{
|
|
GenTree* optimizedTree = fgOptimizeBitCast(tree->AsUnOp());
|
|
if (optimizedTree != nullptr)
|
|
{
|
|
return optimizedTree;
|
|
}
|
|
}
|
|
break;
|
|
|
|
case GT_EQ:
|
|
case GT_NE:
|
|
// It is not safe to reorder/delete CSE's
|
|
if (!optValnumCSE_phase && op2->IsIntegralConst())
|
|
{
|
|
tree = fgOptimizeEqualityComparisonWithConst(tree->AsOp());
|
|
assert(tree->OperIsCompare());
|
|
|
|
oper = tree->OperGet();
|
|
op1 = tree->gtGetOp1();
|
|
op2 = tree->gtGetOp2();
|
|
}
|
|
goto COMPARE;
|
|
|
|
case GT_LT:
|
|
case GT_LE:
|
|
case GT_GE:
|
|
case GT_GT:
|
|
// Change "CNS relop op2" to "op2 relop* CNS"
|
|
if (!optValnumCSE_phase && op1->IsIntegralConst() && tree->OperIsCompare() && gtCanSwapOrder(op1, op2))
|
|
{
|
|
std::swap(tree->AsOp()->gtOp1, tree->AsOp()->gtOp2);
|
|
tree->gtOper = GenTree::SwapRelop(tree->OperGet());
|
|
|
|
oper = tree->OperGet();
|
|
op1 = tree->gtGetOp1();
|
|
op2 = tree->gtGetOp2();
|
|
}
|
|
|
|
if (!optValnumCSE_phase && (op1->OperIs(GT_CAST) || op2->OperIs(GT_CAST)))
|
|
{
|
|
tree = fgOptimizeRelationalComparisonWithCasts(tree->AsOp());
|
|
oper = tree->OperGet();
|
|
op1 = tree->gtGetOp1();
|
|
op2 = tree->gtGetOp2();
|
|
}
|
|
|
|
// op2's value may be changed, so it cannot be a CSE candidate.
|
|
if (op2->IsIntegralConst() && !gtIsActiveCSE_Candidate(op2))
|
|
{
|
|
tree = fgOptimizeRelationalComparisonWithConst(tree->AsOp());
|
|
oper = tree->OperGet();
|
|
|
|
assert(op1 == tree->AsOp()->gtGetOp1());
|
|
assert(op2 == tree->AsOp()->gtGetOp2());
|
|
}
|
|
|
|
if (opts.OptimizationEnabled() && fgGlobalMorph)
|
|
{
|
|
if (op2->IsIntegralConst() || op1->IsIntegralConst())
|
|
{
|
|
if (tree->OperIs(GT_GT, GT_LT, GT_LE, GT_GE))
|
|
{
|
|
tree = fgOptimizeRelationalComparisonWithFullRangeConst(tree->AsOp());
|
|
if (tree->OperIs(GT_CNS_INT))
|
|
{
|
|
return tree;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
COMPARE:
|
|
|
|
noway_assert(tree->OperIsCompare());
|
|
break;
|
|
|
|
case GT_MUL:
|
|
|
|
#ifndef TARGET_64BIT
|
|
if (typ == TYP_LONG)
|
|
{
|
|
// This must be GTF_MUL_64RSLT
|
|
INDEBUG(tree->AsOp()->DebugCheckLongMul());
|
|
return tree;
|
|
}
|
|
#endif // TARGET_64BIT
|
|
goto CM_OVF_OP;
|
|
|
|
case GT_SUB:
|
|
|
|
if (tree->gtOverflow())
|
|
{
|
|
goto CM_OVF_OP;
|
|
}
|
|
|
|
// TODO #4104: there are a lot of other places where
|
|
// this condition is not checked before transformations.
|
|
noway_assert(op2);
|
|
if (fgGlobalMorph && !op2->TypeIs(TYP_BYREF))
|
|
{
|
|
/* Check for "op1 - cns2" , we change it to "op1 + (-cns2)" */
|
|
|
|
if (op2->IsCnsIntOrI() && !op2->IsIconHandle())
|
|
{
|
|
// Negate the constant and change the node to be "+",
|
|
// except when `op2` is a const byref.
|
|
|
|
op2->AsIntConCommon()->SetIconValue(-op2->AsIntConCommon()->IconValue());
|
|
op2->AsIntConRef().gtFieldSeq = nullptr;
|
|
oper = GT_ADD;
|
|
tree->ChangeOper(oper);
|
|
goto CM_ADD_OP;
|
|
}
|
|
|
|
/* Check for "cns1 - op2" , we change it to "(cns1 + (-op2))" */
|
|
|
|
noway_assert(op1);
|
|
if (op1->IsCnsIntOrI())
|
|
{
|
|
noway_assert(varTypeIsIntegralOrI(tree));
|
|
|
|
// The type of the new GT_NEG node cannot just be op2->TypeGet().
|
|
// Otherwise we may sign-extend incorrectly in cases where the GT_NEG
|
|
// node ends up feeding directly into a cast, for example in
|
|
// GT_CAST<ubyte>(GT_SUB(0, s_1.ubyte))
|
|
|
|
if (op1->IsIntegralConst(0))
|
|
{
|
|
tree->ChangeOper(GT_NEG);
|
|
tree->gtType = genActualType(op2->TypeGet());
|
|
|
|
tree->AsOp()->gtOp1 = op2;
|
|
tree->AsOp()->gtOp2 = nullptr;
|
|
|
|
DEBUG_DESTROY_NODE(op1);
|
|
return tree;
|
|
}
|
|
|
|
tree->AsOp()->gtOp2 = op2 = gtNewOperNode(GT_NEG, genActualType(op2->TypeGet()), op2);
|
|
fgMorphTreeDone(op2);
|
|
|
|
oper = GT_ADD;
|
|
tree->ChangeOper(oper);
|
|
goto CM_ADD_OP;
|
|
}
|
|
|
|
/* No match - exit */
|
|
}
|
|
|
|
// Skip optimization if non-NEG operand is constant.
|
|
// Both op1 and op2 are not constant because it was already checked above.
|
|
if (opts.OptimizationEnabled() && fgGlobalMorph)
|
|
{
|
|
// a - -b = > a + b
|
|
// SUB(a, (NEG(b)) => ADD(a, b)
|
|
|
|
if (!op1->OperIs(GT_NEG) && op2->OperIs(GT_NEG))
|
|
{
|
|
// tree: SUB
|
|
// op1: a
|
|
// op2: NEG
|
|
// op2Child: b
|
|
|
|
GenTree* op2Child = op2->AsOp()->gtOp1; // b
|
|
oper = GT_ADD;
|
|
tree->SetOper(oper, GenTree::PRESERVE_VN);
|
|
tree->AsOp()->gtOp2 = op2Child;
|
|
|
|
DEBUG_DESTROY_NODE(op2);
|
|
|
|
op2 = op2Child;
|
|
}
|
|
// -a - -b = > b - a
|
|
// SUB(NEG(a), (NEG(b)) => SUB(b, a)
|
|
else if (op1->OperIs(GT_NEG) && op2->OperIs(GT_NEG) && gtCanSwapOrder(op1, op2))
|
|
{
|
|
// tree: SUB
|
|
// op1: NEG
|
|
// op1Child: a
|
|
// op2: NEG
|
|
// op2Child: b
|
|
|
|
GenTree* op1Child = op1->AsOp()->gtOp1; // a
|
|
GenTree* op2Child = op2->AsOp()->gtOp1; // b
|
|
tree->AsOp()->gtOp1 = op2Child;
|
|
tree->AsOp()->gtOp2 = op1Child;
|
|
|
|
DEBUG_DESTROY_NODE(op1);
|
|
DEBUG_DESTROY_NODE(op2);
|
|
|
|
op1 = op2Child;
|
|
op2 = op1Child;
|
|
}
|
|
}
|
|
|
|
break;
|
|
|
|
#if defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
|
|
case GT_DIV:
|
|
#if defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
|
|
case GT_MOD:
|
|
#endif
|
|
{
|
|
if (!varTypeIsFloating(tree->gtType))
|
|
{
|
|
ExceptionSetFlags exSetFlags = tree->OperExceptions(this);
|
|
|
|
if ((exSetFlags & ExceptionSetFlags::ArithmeticException) != ExceptionSetFlags::None)
|
|
{
|
|
fgAddCodeRef(compCurBB, SCK_OVERFLOW);
|
|
}
|
|
else
|
|
{
|
|
tree->gtFlags |= GTF_DIV_MOD_NO_OVERFLOW;
|
|
}
|
|
|
|
if ((exSetFlags & ExceptionSetFlags::DivideByZeroException) != ExceptionSetFlags::None)
|
|
{
|
|
fgAddCodeRef(compCurBB, SCK_DIV_BY_ZERO);
|
|
}
|
|
else
|
|
{
|
|
tree->gtFlags |= GTF_DIV_MOD_NO_BY_ZERO;
|
|
}
|
|
}
|
|
}
|
|
break;
|
|
case GT_UDIV:
|
|
#if defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
|
|
case GT_UMOD:
|
|
#endif
|
|
{
|
|
ExceptionSetFlags exSetFlags = tree->OperExceptions(this);
|
|
if ((exSetFlags & ExceptionSetFlags::DivideByZeroException) != ExceptionSetFlags::None)
|
|
{
|
|
fgAddCodeRef(compCurBB, SCK_DIV_BY_ZERO);
|
|
}
|
|
else
|
|
{
|
|
tree->gtFlags |= GTF_DIV_MOD_NO_BY_ZERO;
|
|
}
|
|
}
|
|
break;
|
|
#endif // defined(TARGET_ARM64) || defined(TARGET_LOONGARCH64) || defined(TARGET_RISCV64)
|
|
|
|
case GT_ADD:
|
|
|
|
CM_OVF_OP:
|
|
if (tree->gtOverflow())
|
|
{
|
|
// Add the excptn-throwing basic block to jump to on overflow
|
|
|
|
fgAddCodeRef(compCurBB, SCK_OVERFLOW);
|
|
|
|
// We can't do any commutative morphing for overflow instructions
|
|
|
|
break;
|
|
}
|
|
|
|
CM_ADD_OP:
|
|
|
|
FALLTHROUGH;
|
|
|
|
case GT_OR:
|
|
case GT_XOR:
|
|
case GT_AND:
|
|
tree = fgOptimizeCommutativeArithmetic(tree->AsOp());
|
|
if (!tree->OperIsSimple())
|
|
{
|
|
return tree;
|
|
}
|
|
typ = tree->TypeGet();
|
|
oper = tree->OperGet();
|
|
op1 = tree->gtGetOp1();
|
|
op2 = tree->gtGetOp2IfPresent();
|
|
break;
|
|
|
|
case GT_NOT:
|
|
case GT_NEG:
|
|
// Remove double negation/not.
|
|
// Note: this is not a safe transformation if "tree" is a CSE candidate.
|
|
// Consider for example the following expression: NEG(NEG(OP)), where any
|
|
// NEG is a CSE candidate. Were we to morph this to just OP, CSE would fail to find
|
|
// the original NEG in the statement.
|
|
if (op1->OperIs(oper) && opts.OptimizationEnabled() && !gtIsActiveCSE_Candidate(tree) &&
|
|
!gtIsActiveCSE_Candidate(op1))
|
|
{
|
|
JITDUMP("Remove double negation/not\n")
|
|
GenTree* op1op1 = op1->gtGetOp1();
|
|
DEBUG_DESTROY_NODE(tree);
|
|
DEBUG_DESTROY_NODE(op1);
|
|
return op1op1;
|
|
}
|
|
|
|
// Distribute negation over simple multiplication/division expressions
|
|
if (opts.OptimizationEnabled() && !optValnumCSE_phase && tree->OperIs(GT_NEG) &&
|
|
op1->OperIs(GT_MUL, GT_DIV))
|
|
{
|
|
GenTreeOp* mulOrDiv = op1->AsOp();
|
|
GenTree* op1op1 = mulOrDiv->gtGetOp1();
|
|
GenTree* op1op2 = mulOrDiv->gtGetOp2();
|
|
|
|
if (!op1op1->IsCnsIntOrI() && op1op2->IsCnsIntOrI() && !op1op2->IsIconHandle())
|
|
{
|
|
// NEG(MUL(a, C)) => MUL(a, -C)
|
|
// NEG(DIV(a, C)) => DIV(a, -C), except when C = {-1, 1}
|
|
ssize_t constVal = op1op2->AsIntCon()->IconValue();
|
|
if ((mulOrDiv->OperIs(GT_DIV) && (constVal != -1) && (constVal != 1)) ||
|
|
(mulOrDiv->OperIs(GT_MUL) && !mulOrDiv->gtOverflow()))
|
|
{
|
|
GenTree* newOp1 = op1op1; // a
|
|
GenTree* newOp2 = gtNewIconNode(-constVal, op1op2->TypeGet()); // -C
|
|
mulOrDiv->gtOp1 = newOp1;
|
|
mulOrDiv->gtOp2 = newOp2;
|
|
mulOrDiv->SetVNsFromNode(tree);
|
|
|
|
DEBUG_DESTROY_NODE(tree);
|
|
DEBUG_DESTROY_NODE(op1op2);
|
|
|
|
return mulOrDiv;
|
|
}
|
|
}
|
|
}
|
|
|
|
/* Any constant cases should have been folded earlier */
|
|
noway_assert(!op1->OperIsConst() || op1->IsIconHandle() || opts.OptimizationDisabled() ||
|
|
optValnumCSE_phase);
|
|
break;
|
|
|
|
case GT_CKFINITE:
|
|
|
|
noway_assert(varTypeIsFloating(op1->TypeGet()));
|
|
|
|
fgAddCodeRef(compCurBB, SCK_ARITH_EXCPN);
|
|
break;
|
|
|
|
case GT_BOUNDS_CHECK:
|
|
|
|
fgAddCodeRef(compCurBB, tree->AsBoundsChk()->gtThrowKind);
|
|
break;
|
|
|
|
case GT_IND:
|
|
{
|
|
if (op1->IsIconHandle(GTF_ICON_OBJ_HDL))
|
|
{
|
|
tree->gtFlags |= (GTF_IND_INVARIANT | GTF_IND_NONFAULTING | GTF_IND_NONNULL);
|
|
}
|
|
|
|
GenTree* optimizedTree = fgMorphFinalizeIndir(tree->AsIndir());
|
|
if (optimizedTree != nullptr)
|
|
{
|
|
return optimizedTree;
|
|
}
|
|
|
|
// Only do this optimization when we are in the global optimizer. Doing this after value numbering
|
|
// could result in an invalid value number for the newly generated GT_IND node.
|
|
if (!varTypeIsStruct(tree) && op1->OperIs(GT_COMMA) && fgGlobalMorph)
|
|
{
|
|
// Perform the transform IND(COMMA(x, ..., z)) -> COMMA(x, ..., IND(z)).
|
|
GenTree* commaNode = op1;
|
|
GenTreeFlags treeFlags = tree->gtFlags;
|
|
commaNode->gtType = typ;
|
|
commaNode->gtFlags = (treeFlags & ~GTF_REVERSE_OPS); // Bashing the GT_COMMA flags here is
|
|
// dangerous, clear the GTF_REVERSE_OPS at
|
|
// least.
|
|
#ifdef DEBUG
|
|
commaNode->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
|
|
#endif
|
|
while (commaNode->AsOp()->gtOp2->gtOper == GT_COMMA)
|
|
{
|
|
commaNode = commaNode->AsOp()->gtOp2;
|
|
commaNode->gtType = typ;
|
|
commaNode->gtFlags =
|
|
(treeFlags & ~GTF_REVERSE_OPS & ~GTF_ASG & ~GTF_CALL); // Bashing the GT_COMMA flags here is
|
|
// dangerous, clear the GTF_REVERSE_OPS, GTF_ASG, and GTF_CALL at
|
|
// least.
|
|
commaNode->gtFlags |= ((commaNode->AsOp()->gtOp1->gtFlags | commaNode->AsOp()->gtOp2->gtFlags) &
|
|
(GTF_ASG | GTF_CALL));
|
|
#ifdef DEBUG
|
|
commaNode->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
|
|
#endif
|
|
}
|
|
|
|
tree = op1;
|
|
GenTree* addr = commaNode->AsOp()->gtOp2;
|
|
// TODO-1stClassStructs: we often create a struct IND without a handle, fix it.
|
|
op1 = gtNewIndir(typ, addr);
|
|
|
|
// GTF_GLOB_EFFECT flags can be recomputed from the child
|
|
// nodes. GTF_ORDER_SIDEEFF may be set already and indicate no
|
|
// reordering is allowed with sibling nodes, so we cannot
|
|
// recompute that.
|
|
//
|
|
op1->gtFlags |= treeFlags & ~GTF_GLOB_EFFECT;
|
|
op1->gtFlags |= (addr->gtFlags & GTF_ALL_EFFECT);
|
|
|
|
// if this was a non-faulting indir, clear GTF_EXCEPT,
|
|
// unless we inherit it from the addr.
|
|
//
|
|
if (((treeFlags & GTF_IND_NONFAULTING) != 0) && ((addr->gtFlags & GTF_EXCEPT) == 0))
|
|
{
|
|
op1->gtFlags &= ~GTF_EXCEPT;
|
|
}
|
|
|
|
op1->gtFlags |= treeFlags & GTF_GLOB_REF;
|
|
|
|
#ifdef DEBUG
|
|
op1->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
|
|
#endif
|
|
commaNode->AsOp()->gtOp2 = op1;
|
|
commaNode->gtFlags |= (op1->gtFlags & GTF_ALL_EFFECT);
|
|
return tree;
|
|
}
|
|
}
|
|
break;
|
|
|
|
case GT_NULLCHECK:
|
|
if (opts.OptimizationEnabled() && !optValnumCSE_phase && !tree->OperMayThrow(this))
|
|
{
|
|
JITDUMP("\nNULLCHECK on [%06u] will always succeed\n", dspTreeID(op1));
|
|
if ((op1->gtFlags & GTF_SIDE_EFFECT) != 0)
|
|
{
|
|
tree = gtUnusedValNode(op1);
|
|
INDEBUG(tree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED);
|
|
}
|
|
else
|
|
{
|
|
tree->gtBashToNOP();
|
|
}
|
|
return tree;
|
|
}
|
|
break;
|
|
|
|
case GT_COLON:
|
|
if (fgGlobalMorph)
|
|
{
|
|
/* Mark the nodes that are conditionally executed */
|
|
fgWalkTreePre(&tree, gtMarkColonCond);
|
|
}
|
|
/* Since we're doing this postorder we clear this if it got set by a child */
|
|
fgRemoveRestOfBlock = false;
|
|
break;
|
|
|
|
case GT_COMMA:
|
|
|
|
/* Special case: trees that don't produce a value */
|
|
if (op2->OperIsStore() || (op2->OperGet() == GT_COMMA && op2->TypeGet() == TYP_VOID) || fgIsThrow(op2))
|
|
{
|
|
typ = tree->gtType = TYP_VOID;
|
|
}
|
|
|
|
// If we are in the Valuenum CSE phase then don't morph away anything as these
|
|
// nodes may have CSE defs/uses in them.
|
|
//
|
|
if (!optValnumCSE_phase)
|
|
{
|
|
// Extract the side effects from the left side of the comma. Since they don't "go" anywhere, this
|
|
// is all we need.
|
|
|
|
GenTree* op1SideEffects = nullptr;
|
|
// The addition of "GTF_MAKE_CSE" below prevents us from throwing away (for example)
|
|
// hoisted expressions in loops.
|
|
gtExtractSideEffList(op1, &op1SideEffects, (GTF_SIDE_EFFECT | GTF_MAKE_CSE));
|
|
if (op1SideEffects)
|
|
{
|
|
// Replace the left hand side with the side effect list.
|
|
op1 = op1SideEffects;
|
|
tree->AsOp()->gtOp1 = op1SideEffects;
|
|
gtUpdateNodeSideEffects(tree);
|
|
}
|
|
else
|
|
{
|
|
op2->gtFlags |= (tree->gtFlags & GTF_DONT_CSE);
|
|
DEBUG_DESTROY_NODE(tree);
|
|
DEBUG_DESTROY_NODE(op1);
|
|
return op2;
|
|
}
|
|
|
|
// If the right operand is just a void nop node, throw it away. Unless this is a
|
|
// comma throw, in which case we want the top-level morphing loop to recognize it.
|
|
if (op2->IsNothingNode() && op1->TypeIs(TYP_VOID) && !fgIsCommaThrow(tree))
|
|
{
|
|
op1->gtFlags |= (tree->gtFlags & GTF_DONT_CSE);
|
|
DEBUG_DESTROY_NODE(tree);
|
|
DEBUG_DESTROY_NODE(op2);
|
|
return op1;
|
|
}
|
|
}
|
|
|
|
break;
|
|
|
|
case GT_JTRUE:
|
|
|
|
/* Special case if fgRemoveRestOfBlock is set to true */
|
|
if (fgRemoveRestOfBlock)
|
|
{
|
|
if (fgIsCommaThrow(op1, true))
|
|
{
|
|
GenTree* throwNode = op1->AsOp()->gtOp1;
|
|
|
|
JITDUMP("Removing [%06d] GT_JTRUE as the block now unconditionally throws an exception.\n",
|
|
dspTreeID(tree));
|
|
DEBUG_DESTROY_NODE(tree);
|
|
|
|
return throwNode;
|
|
}
|
|
|
|
noway_assert(op1->OperIsCompare());
|
|
noway_assert(op1->gtFlags & GTF_EXCEPT);
|
|
|
|
// We need to keep op1 for the side-effects. Hang it off
|
|
// a GT_COMMA node
|
|
|
|
JITDUMP("Keeping side-effects by bashing [%06d] GT_JTRUE into a GT_COMMA.\n", dspTreeID(tree));
|
|
|
|
tree->ChangeOper(GT_COMMA);
|
|
tree->AsOp()->gtOp2 = op2 = gtNewNothingNode();
|
|
|
|
// Additionally since we're eliminating the JTRUE
|
|
// codegen won't like it if op1 is a RELOP of longs, floats or doubles.
|
|
// So we change it into a GT_COMMA as well.
|
|
JITDUMP("Also bashing [%06d] (a relop) into a GT_COMMA.\n", dspTreeID(op1));
|
|
op1->ChangeOper(GT_COMMA);
|
|
op1->gtFlags &= ~GTF_UNSIGNED; // Clear the unsigned flag if it was set on the relop
|
|
op1->gtType = op1->AsOp()->gtOp1->gtType;
|
|
|
|
return tree;
|
|
}
|
|
break;
|
|
|
|
case GT_INTRINSIC:
|
|
if (tree->AsIntrinsic()->gtIntrinsicName ==
|
|
NI_System_Runtime_CompilerServices_RuntimeHelpers_IsKnownConstant)
|
|
{
|
|
// Should be expanded by the time it reaches CSE phase
|
|
assert(!optValnumCSE_phase);
|
|
|
|
JITDUMP("\nExpanding RuntimeHelpers.IsKnownConstant to ");
|
|
if (op1->OperIsConst() || gtIsTypeof(op1))
|
|
{
|
|
// We're lucky to catch a constant here while importer was not
|
|
JITDUMP("true\n");
|
|
DEBUG_DESTROY_NODE(tree, op1);
|
|
tree = gtNewIconNode(1);
|
|
}
|
|
else
|
|
{
|
|
JITDUMP("false\n");
|
|
tree = gtWrapWithSideEffects(gtNewIconNode(0), op1, GTF_ALL_EFFECT);
|
|
}
|
|
INDEBUG(tree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED);
|
|
return tree;
|
|
}
|
|
break;
|
|
|
|
case GT_RETURN:
|
|
case GT_SWIFT_ERROR_RET:
|
|
{
|
|
// Retry updating return operand to a field -- assertion
|
|
// prop done when morphing this operand changed the local.
|
|
//
|
|
GenTree* const retVal = tree->AsOp()->GetReturnValue();
|
|
if (retVal != nullptr)
|
|
{
|
|
fgTryReplaceStructLocalWithField(retVal);
|
|
}
|
|
break;
|
|
}
|
|
|
|
default:
|
|
break;
|
|
}
|
|
|
|
assert(oper == tree->gtOper);
|
|
|
|
// Propagate comma throws.
|
|
// If we are in the Valuenum CSE phase then don't morph away anything as these
|
|
// nodes may have CSE defs/uses in them.
|
|
if (fgGlobalMorph && (oper != GT_COLON) &&
|
|
/* TODO-ASG-Cleanup: delete this zero-diff quirk */ !GenTree::OperIsStore(oper))
|
|
{
|
|
if ((op1 != nullptr) && fgIsCommaThrow(op1, true))
|
|
{
|
|
GenTree* propagatedThrow = fgPropagateCommaThrow(tree, op1->AsOp(), GTF_EMPTY);
|
|
if (propagatedThrow != nullptr)
|
|
{
|
|
return propagatedThrow;
|
|
}
|
|
}
|
|
|
|
if ((op2 != nullptr) && fgIsCommaThrow(op2, true))
|
|
{
|
|
GenTree* propagatedThrow = fgPropagateCommaThrow(tree, op2->AsOp(), op1->gtFlags & GTF_ALL_EFFECT);
|
|
if (propagatedThrow != nullptr)
|
|
{
|
|
return propagatedThrow;
|
|
}
|
|
}
|
|
}
|
|
|
|
/*-------------------------------------------------------------------------
|
|
* Optional morphing is done if tree transformations is permitted
|
|
*/
|
|
|
|
if ((opts.compFlags & CLFLG_TREETRANS) == 0)
|
|
{
|
|
return tree;
|
|
}
|
|
|
|
tree = fgMorphSmpOpOptional(tree->AsOp(), optAssertionPropDone);
|
|
|
|
return tree;
|
|
}
|
|
|
|
//------------------------------------------------------------------------
|
|
// fgTryReplaceStructLocalWithField: see if a struct use can be replaced
|
|
// with an equivalent field use
|
|
//
|
|
// Arguments:
|
|
// tree - tree to examine and possibly modify
|
|
//
|
|
// Notes:
|
|
// Currently only called when the tree parent is a GT_RETURN/GT_SWIFT_ERROR_RET.
|
|
//
|
|
void Compiler::fgTryReplaceStructLocalWithField(GenTree* tree)
|
|
{
|
|
if (!tree->OperIs(GT_LCL_VAR))
|
|
{
|
|
return;
|
|
}
|
|
|
|
// With a `genReturnBB` this `RETURN(src)` tree will be replaced by a `STORE_LCL_VAR<genReturnLocal>(src)`
|
|
// and `STORE_LCL_VAR` will be transformed into field by field copy without parent local referencing if
|
|
// possible.
|
|
GenTreeLclVar* lclVar = tree->AsLclVar();
|
|
unsigned lclNum = lclVar->GetLclNum();
|
|
if ((genReturnLocal == BAD_VAR_NUM) || (genReturnLocal == lclNum))
|
|
{
|
|
LclVarDsc* const varDsc = lvaGetDesc(lclVar);
|
|
if (varDsc->CanBeReplacedWithItsField(this))
|
|
{
|
|
// We can replace the struct with its only field and allow copy propagation to replace
|
|
// return value that was written as a field.
|
|
unsigned const fieldLclNum = varDsc->lvFieldLclStart;
|
|
LclVarDsc* const fieldDsc = lvaGetDesc(fieldLclNum);
|
|
|
|
JITDUMP("Replacing an independently promoted local var V%02u with its only field "
|
|
"V%02u for "
|
|
"the return [%06u]\n",
|
|
lclVar->GetLclNum(), fieldLclNum, dspTreeID(tree));
|
|
lclVar->SetLclNum(fieldLclNum);
|
|
lclVar->ChangeType(fieldDsc->lvType);
|
|
}
|
|
}
|
|
}
|
|
|
|
//------------------------------------------------------------------------
|
|
// fgMorphFinalizeIndir: Finalize morphing an indirection.
|
|
//
|
|
// Turns indirections off of local addresses into local field nodes.
|
|
// Adds UNALIGNED for some accesses on ARM for backwards compatibility.
|
|
//
|
|
// Arguments:
|
|
// indir - The indirection to morph (can be a store)
|
|
//
|
|
// Return Value:
|
|
// The optimized tree or "nullptr" if no transformations that would
|
|
// replace it were performed.
|
|
//
|
|
GenTree* Compiler::fgMorphFinalizeIndir(GenTreeIndir* indir)
|
|
{
|
|
assert(indir->isIndir());
|
|
GenTree* addr = indir->Addr();
|
|
|
|
#ifdef TARGET_ARM
|
|
if (varTypeIsFloating(indir))
|
|
{
|
|
// Check for a misaligned floating point indirection.
|
|
GenTree* effAddr = addr->gtEffectiveVal();
|
|
target_ssize_t offset;
|
|
gtPeelOffsets(&effAddr, &offset);
|
|
|
|
if (((offset % genTypeSize(TYP_FLOAT)) != 0) ||
|
|
(effAddr->IsCnsIntOrI() && ((effAddr->AsIntConCommon()->IconValue() % genTypeSize(TYP_FLOAT)) != 0)))
|
|
{
|
|
indir->gtFlags |= GTF_IND_UNALIGNED;
|
|
}
|
|
}
|
|
#endif // TARGET_ARM
|
|
|
|
if (!indir->IsVolatile() && !indir->TypeIs(TYP_STRUCT) && addr->OperIs(GT_LCL_ADDR) && !optValnumCSE_phase)
|
|
{
|
|
unsigned size = indir->Size();
|
|
unsigned offset = addr->AsLclVarCommon()->GetLclOffs();
|
|
unsigned extent = offset + size;
|
|
unsigned lclSize = lvaLclExactSize(addr->AsLclVarCommon()->GetLclNum());
|
|
|
|
if ((extent <= lclSize) && (extent < UINT16_MAX))
|
|
{
|
|
addr->ChangeType(indir->TypeGet());
|
|
if (indir->OperIs(GT_STOREIND))
|
|
{
|
|
GenTree* value = indir->Data();
|
|
addr->SetOper(GT_STORE_LCL_FLD);
|
|
addr->AsLclFld()->Data() = value;
|
|
addr->gtFlags |= (GTF_ASG | GTF_VAR_DEF);
|
|
addr->AddAllEffectsFlags(value);
|
|
}
|
|
else
|
|
{
|
|
assert(indir->OperIs(GT_IND));
|
|
addr->SetOper(GT_LCL_FLD);
|
|
}
|
|
addr->AsLclFld()->SetLclOffs(offset);
|
|
addr->SetVNsFromNode(indir);
|
|
addr->AddAllEffectsFlags(indir->gtFlags & GTF_GLOB_REF);
|
|
|
|
if (addr->OperIs(GT_STORE_LCL_FLD) && addr->IsPartialLclFld(this))
|
|
{
|
|
addr->gtFlags |= GTF_VAR_USEASG;
|
|
}
|
|
|
|
return addr;
|
|
}
|
|
}
|
|
|
|
return nullptr;
|
|
}
|
|
|
|
//------------------------------------------------------------------------
|
|
// fgOptimizeCast: Optimizes the supplied GT_CAST tree.
|
|
//
|
|
// Tries to get rid of the cast, its operand, the GTF_OVERFLOW flag, calls
|
|
// calls "optNarrowTree". Called in post-order by "fgMorphSmpOp".
|
|
//
|
|
// Arguments:
|
|
// tree - the cast tree to optimize
|
|
//
|
|
// Return Value:
|
|
// The optimized tree (that can have any shape).
|
|
//
|
|
GenTree* Compiler::fgOptimizeCast(GenTreeCast* cast)
|
|
{
|
|
GenTree* src = cast->CastOp();
|
|
|
|
if (gtIsActiveCSE_Candidate(cast) || gtIsActiveCSE_Candidate(src))
|
|
{
|
|
return cast;
|
|
}
|
|
|
|
// See if we can discard the cast.
|
|
if (varTypeIsIntegral(cast) && varTypeIsIntegral(src))
|
|
{
|
|
IntegralRange srcRange = IntegralRange::ForNode(src, this);
|
|
IntegralRange noOvfRange = IntegralRange::ForCastInput(cast);
|
|
|
|
if (noOvfRange.Contains(srcRange))
|
|
{
|
|
// Casting between same-sized types is a no-op,
|
|
// given we have proven this cast cannot overflow.
|
|
if (genActualType(cast) == genActualType(src))
|
|
{
|
|
return src;
|
|
}
|
|
|
|
cast->ClearOverflow();
|
|
cast->SetAllEffectsFlags(src);
|
|
|
|
// Try and see if we can make this cast into a cheaper zero-extending version.
|
|
if (genActualTypeIsInt(src) && cast->TypeIs(TYP_LONG) && srcRange.IsNonNegative())
|
|
{
|
|
cast->SetUnsigned();
|
|
}
|
|
}
|
|
|
|
// For checked casts, we're done.
|
|
if (cast->gtOverflow())
|
|
{
|
|
return cast;
|
|
}
|
|
|
|
var_types castToType = cast->CastToType();
|
|
|
|
// For indir-like nodes, we may be able to change their type to satisfy (and discard) the cast.
|
|
if (varTypeIsSmall(castToType) && (genTypeSize(castToType) == genTypeSize(src)) &&
|
|
src->OperIs(GT_IND, GT_LCL_FLD))
|
|
{
|
|
// We're changing the type here so we need to update the VN;
|
|
// in other cases we discard the cast without modifying src
|
|
// so the VN doesn't change.
|
|
|
|
src->ChangeType(castToType);
|
|
src->SetVNsFromNode(cast);
|
|
|
|
return src;
|
|
}
|
|
|
|
// Try to narrow the operand of the cast and discard the cast.
|
|
if (opts.OptEnabled(CLFLG_TREETRANS) && (genTypeSize(src) > genTypeSize(castToType)) &&
|
|
optNarrowTree(src, src->TypeGet(), castToType, cast->gtVNPair, false))
|
|
{
|
|
optNarrowTree(src, src->TypeGet(), castToType, cast->gtVNPair, true);
|
|
|
|
// "optNarrowTree" may leave a redundant cast behind.
|
|
if (src->OperIs(GT_CAST) && (src->AsCast()->CastToType() == genActualType(src->AsCast()->CastOp())))
|
|
{
|
|
src = src->AsCast()->CastOp();
|
|
}
|
|
|
|
return src;
|
|
}
|
|
|
|
// Check for two consecutive casts, we may be able to discard the intermediate one.
|
|
if (opts.OptimizationEnabled() && src->OperIs(GT_CAST) && !src->gtOverflow())
|
|
{
|
|
var_types dstCastToType = castToType;
|
|
var_types srcCastToType = src->AsCast()->CastToType();
|
|
|
|
// CAST(ubyte <- CAST(short <- X)): CAST(ubyte <- X).
|
|
// CAST(ushort <- CAST(short <- X)): CAST(ushort <- X).
|
|
if (varTypeIsSmall(srcCastToType) && (genTypeSize(dstCastToType) <= genTypeSize(srcCastToType)))
|
|
{
|
|
cast->CastOp() = src->AsCast()->CastOp();
|
|
DEBUG_DESTROY_NODE(src);
|
|
}
|
|
}
|
|
}
|
|
|
|
return cast;
|
|
}
|
|
|
|
//------------------------------------------------------------------------
|
|
// fgOptimizeCastOnStore: Optimizes the supplied store tree with a GT_CAST node.
|
|
//
|
|
// Arguments:
|
|
// tree - the store to optimize
|
|
//
|
|
// Return Value:
|
|
// The optimized store tree.
|
|
//
|
|
GenTree* Compiler::fgOptimizeCastOnStore(GenTree* store)
|
|
{
|
|
assert(store->OperIsStore());
|
|
|
|
GenTree* const src = store->Data();
|
|
|
|
if (!src->OperIs(GT_CAST))
|
|
return store;
|
|
|
|
if (store->OperIs(GT_STORE_LCL_VAR))
|
|
{
|
|
LclVarDsc* varDsc = lvaGetDesc(store->AsLclVarCommon()->GetLclNum());
|
|
|
|
// We can make this transformation only under the assumption that NOL locals are always normalized before they
|
|
// are used,
|
|
// however this is not always the case: the JIT will utilize subrange assertions for NOL locals to make
|
|
// normalization
|
|
// assumptions -- see fgMorphLeafLocal. Thus we can only do this for cases where we know for sure that
|
|
// subsequent uses
|
|
// will normalize, which we can only guarantee when the local is address exposed.
|
|
if (!varDsc->lvNormalizeOnLoad() || !varDsc->IsAddressExposed())
|
|
return store;
|
|
}
|
|
|
|
if (src->gtOverflow())
|
|
return store;
|
|
|
|
if (gtIsActiveCSE_Candidate(src))
|
|
return store;
|
|
|
|
GenTreeCast* cast = src->AsCast();
|
|
var_types castToType = cast->CastToType();
|
|
var_types castFromType = cast->CastFromType();
|
|
|
|
if (gtIsActiveCSE_Candidate(cast->CastOp()))
|
|
return store;
|
|
|
|
if (!varTypeIsSmall(store))
|
|
return store;
|
|
|
|
if (!varTypeIsSmall(castToType))
|
|
return store;
|
|
|
|
if (!varTypeIsIntegral(castFromType))
|
|
return store;
|
|
|
|
// If we are performing a narrowing cast and
|
|
// castToType is larger or the same as op1's type
|
|
// then we can discard the cast.
|
|
if (genTypeSize(castToType) < genTypeSize(store))
|
|
return store;
|
|
|
|
if (genActualType(castFromType) == genActualType(castToType))
|
|
{
|
|
// Removes the cast.
|
|
store->Data() = cast->CastOp();
|
|
}
|
|
else
|
|
{
|
|
// This is a type-changing cast so we cannot remove it entirely.
|
|
cast->gtCastType = genActualType(castToType);
|
|
|
|
// See if we can optimize the new cast.
|
|
store->Data() = fgOptimizeCast(cast);
|
|
}
|
|
|
|
return store;
|
|
}
|
|
|
|
//------------------------------------------------------------------------
|
|
// fgOptimizeBitCast: Optimizes the supplied BITCAST node.
|
|
//
|
|
// Retypes the source node and removes the cast if possible.
|
|
//
|
|
// Arguments:
|
|
// bitCast - the BITCAST node
|
|
//
|
|
// Return Value:
|
|
// The optimized tree or "nullptr" if no transformations were performed.
|
|
//
|
|
GenTree* Compiler::fgOptimizeBitCast(GenTreeUnOp* bitCast)
|
|
{
|
|
if (opts.OptimizationDisabled() || optValnumCSE_phase)
|
|
{
|
|
return nullptr;
|
|
}
|
|
|
|
GenTree* op1 = bitCast->gtGetOp1();
|
|
if (op1->OperIs(GT_IND, GT_LCL_FLD) && (genTypeSize(op1) == genTypeSize(bitCast)))
|
|
{
|
|
op1->ChangeType(bitCast->TypeGet());
|
|
op1->SetVNsFromNode(bitCast);
|
|
return op1;
|
|
}
|
|
|
|
return nullptr;
|
|
}
|
|
|
|
//------------------------------------------------------------------------
|
|
// fgOptimizeEqualityComparisonWithConst: optimizes various EQ/NE(OP, CONST) patterns.
|
|
//
|
|
// Arguments:
|
|
// cmp - The GT_NE/GT_EQ tree the second operand of which is an integral constant
|
|
//
|
|
// Return Value:
|
|
// The optimized tree, "cmp" in case no optimizations were done.
|
|
// Currently only returns relop trees.
|
|
//
|
|
GenTree* Compiler::fgOptimizeEqualityComparisonWithConst(GenTreeOp* cmp)
|
|
{
|
|
assert(cmp->OperIs(GT_EQ, GT_NE));
|
|
assert(cmp->gtGetOp2()->IsIntegralConst());
|
|
assert(!optValnumCSE_phase);
|
|
|
|
GenTree* op1 = cmp->gtGetOp1();
|
|
GenTreeIntConCommon* op2 = cmp->gtGetOp2()->AsIntConCommon();
|
|
|
|
// Check for "(expr +/- icon1) ==/!= (non-zero-icon2)".
|
|
if (op2->IsCnsIntOrI() && (op2->IconValue() != 0))
|
|
{
|
|
// Since this can occur repeatedly we use a while loop.
|
|
while (op1->OperIs(GT_ADD, GT_SUB) && op1->AsOp()->gtGetOp2()->IsCnsIntOrI() && op1->TypeIs(TYP_INT) &&
|
|
!op1->gtOverflow())
|
|
{
|
|
// Got it; change "x + icon1 == icon2" to "x == icon2 - icon1".
|
|
ssize_t op1Value = op1->AsOp()->gtGetOp2()->AsIntCon()->IconValue();
|
|
ssize_t op2Value = op2->IconValue();
|
|
|
|
if (op1->OperIs(GT_ADD))
|
|
{
|
|
op2Value -= op1Value;
|
|
}
|
|
else
|
|
{
|
|
op2Value += op1Value;
|
|
}
|
|
|
|
op1 = op1->AsOp()->gtGetOp1();
|
|
op2->SetIconValue(static_cast<int32_t>(op2Value));
|
|
}
|
|
|
|
cmp->gtOp1 = op1;
|
|
fgUpdateConstTreeValueNumber(op2);
|
|
}
|
|
|
|
// Here we look for the following tree
|
|
//
|
|
// EQ/NE
|
|
// / \.
|
|
// op1 CNS 0/1
|
|
//
|
|
if (op2->IsIntegralConst(0) || op2->IsIntegralConst(1))
|
|
{
|
|
ssize_t op2Value = static_cast<ssize_t>(op2->IntegralValue());
|
|
|
|
if (op1->OperIsCompare())
|
|
{
|
|
// Here we look for the following tree
|
|
//
|
|
// EQ/NE -> RELOP/!RELOP
|
|
// / \ / \.
|
|
// RELOP CNS 0/1
|
|
// / \.
|
|
//
|
|
// Note that we will remove/destroy the EQ/NE node and move
|
|
// the RELOP up into it's location.
|
|
|
|
// Here we reverse the RELOP if necessary.
|
|
|
|
bool reverse = ((op2Value == 0) == (cmp->OperIs(GT_EQ)));
|
|
|
|
if (reverse)
|
|
{
|
|
gtReverseCond(op1);
|
|
}
|
|
|
|
noway_assert((op1->gtFlags & GTF_RELOP_JMP_USED) == 0);
|
|
op1->gtFlags |= cmp->gtFlags & (GTF_RELOP_JMP_USED | GTF_DONT_CSE);
|
|
op1->SetVNsFromNode(cmp);
|
|
|
|
DEBUG_DESTROY_NODE(cmp);
|
|
return op1;
|
|
}
|
|
|
|
//
|
|
// Now we check for a compare with the result of an '&' operator
|
|
//
|
|
// Here we look for the following transformation (canonicalization):
|
|
//
|
|
// EQ/NE EQ/NE
|
|
// / \ / \.
|
|
// AND CNS 0/1 -> AND CNS 0
|
|
// / \ / \.
|
|
// RSZ/RSH CNS 1 x LSH (folded if 'y' is constant)
|
|
// / \ / \.
|
|
// x y 1 y
|
|
|
|
if (fgGlobalMorph && op1->OperIs(GT_AND) && op1->AsOp()->gtGetOp1()->OperIs(GT_RSZ, GT_RSH))
|
|
{
|
|
GenTreeOp* andOp = op1->AsOp();
|
|
GenTreeOp* rshiftOp = andOp->gtGetOp1()->AsOp();
|
|
|
|
if (!andOp->gtGetOp2()->IsIntegralConst(1))
|
|
{
|
|
goto SKIP;
|
|
}
|
|
|
|
// If the shift is constant, we can fold the mask and delete the shift node:
|
|
// -> AND(x, CNS(1 << y)) EQ/NE 0
|
|
if (rshiftOp->gtGetOp2()->IsCnsIntOrI())
|
|
{
|
|
ssize_t shiftAmount = rshiftOp->gtGetOp2()->AsIntCon()->IconValue();
|
|
|
|
if (shiftAmount < 0)
|
|
{
|
|
goto SKIP;
|
|
}
|
|
|
|
GenTreeIntConCommon* andMask = andOp->gtGetOp2()->AsIntConCommon();
|
|
|
|
if (andOp->TypeIs(TYP_INT) && shiftAmount < 32)
|
|
{
|
|
andMask->SetIconValue(static_cast<int32_t>(1 << shiftAmount));
|
|
}
|
|
else if (andOp->TypeIs(TYP_LONG) && shiftAmount < 64)
|
|
{
|
|
andMask->SetLngValue(1LL << shiftAmount);
|
|
}
|
|
else
|
|
{
|
|
goto SKIP; // Unsupported type or invalid shift amount.
|
|
}
|
|
andOp->gtOp1 = rshiftOp->gtGetOp1();
|
|
|
|
DEBUG_DESTROY_NODE(rshiftOp->gtGetOp2());
|
|
DEBUG_DESTROY_NODE(rshiftOp);
|
|
}
|
|
// Otherwise, if the shift is not constant, just rewire the nodes and reverse the shift op:
|
|
// AND(RSH(x, y), 1) -> AND(x, LSH(1, y))
|
|
//
|
|
// On ARM/BMI2 the original pattern should result in smaller code when comparing to non-zero,
|
|
// the other case where this transform is worth is if the compare is being used by a jump.
|
|
//
|
|
else
|
|
{
|
|
if (!(cmp->gtFlags & GTF_RELOP_JMP_USED) &&
|
|
((op2Value == 0 && cmp->OperIs(GT_NE)) || (op2Value == 1 && cmp->OperIs(GT_EQ))))
|
|
{
|
|
goto SKIP;
|
|
}
|
|
|
|
andOp->gtOp1 = rshiftOp->gtGetOp1();
|
|
rshiftOp->gtOp1 = andOp->gtGetOp2();
|
|
andOp->gtOp2 = rshiftOp;
|
|
|
|
rshiftOp->SetOper(GT_LSH);
|
|
gtUpdateNodeSideEffects(rshiftOp);
|
|
}
|
|
|
|
// Reverse the condition if necessary.
|
|
if (op2Value == 1)
|
|
{
|
|
gtReverseCond(cmp);
|
|
op2->SetIntegralValue(0);
|
|
}
|
|
}
|
|
}
|
|
|
|
SKIP:
|
|
|
|
// Now check for compares with small constant longs that can be cast to int.
|
|
// Note that we filter out negative values here so that the transformations
|
|
// below are correct. E. g. "EQ(-1L, CAST_UN(int))" is always "false", but were
|
|
// we to make it into "EQ(-1, int)", "true" becomes possible for negative inputs.
|
|
if (!op2->TypeIs(TYP_LONG) || ((op2->LngValue() >> 31) != 0))
|
|
{
|
|
return cmp;
|
|
}
|
|
|
|
if (!op1->OperIs(GT_AND))
|
|
{
|
|
// Another interesting case: cast from int.
|
|
if (op1->OperIs(GT_CAST) && op1->AsCast()->CastOp()->TypeIs(TYP_INT) && !op1->gtOverflow())
|
|
{
|
|
// Simply make this into an integer comparison.
|
|
cmp->gtOp1 = op1->AsCast()->CastOp();
|
|
|
|
op2->BashToConst(static_cast<int32_t>(op2->LngValue()));
|
|
fgUpdateConstTreeValueNumber(op2);
|
|
}
|
|
|
|
return cmp;
|
|
}
|
|
|
|
// Now we perform the following optimization:
|
|
// EQ/NE(AND(OP long, CNS_LNG), CNS_LNG) =>
|
|
// EQ/NE(AND(CAST(int <- OP), CNS_INT), CNS_INT)
|
|
// when the constants are sufficiently small.
|
|
// This transform cannot preserve VNs.
|
|
if (fgGlobalMorph)
|
|
{
|
|
assert(op1->TypeIs(TYP_LONG) && op1->OperIs(GT_AND));
|
|
|
|
// Is the result of the mask effectively an INT?
|
|
GenTreeOp* andOp = op1->AsOp();
|
|
if (!andOp->gtGetOp2()->OperIs(GT_CNS_NATIVELONG))
|
|
{
|
|
return cmp;
|
|
}
|
|
|
|
GenTreeIntConCommon* andMask = andOp->gtGetOp2()->AsIntConCommon();
|
|
if ((andMask->LngValue() >> 32) != 0)
|
|
{
|
|
return cmp;
|
|
}
|
|
|
|
GenTree* andOpOp1 = andOp->gtGetOp1();
|
|
// Now we narrow the first operand of AND to int.
|
|
if (optNarrowTree(andOpOp1, TYP_LONG, TYP_INT, ValueNumPair(), false))
|
|
{
|
|
optNarrowTree(andOpOp1, TYP_LONG, TYP_INT, ValueNumPair(), true);
|
|
|
|
// "optNarrowTree" may leave a redundant cast behind.
|
|
if (andOpOp1->OperIs(GT_CAST) &&
|
|
(andOpOp1->AsCast()->CastToType() == genActualType(andOpOp1->AsCast()->CastOp())))
|
|
{
|
|
andOp->gtOp1 = andOpOp1->AsCast()->CastOp();
|
|
}
|
|
}
|
|
else
|
|
{
|
|
andOp->gtOp1 = gtNewCastNode(TYP_INT, andOp->gtGetOp1(), false, TYP_INT);
|
|
}
|
|
|
|
assert(andMask == andOp->gtGetOp2());
|
|
|
|
// Now replace the mask node.
|
|
andMask->BashToConst(static_cast<int32_t>(andMask->LngValue()));
|
|
|
|
// Now change the type of the AND node.
|
|
andOp->ChangeType(TYP_INT);
|
|
|
|
// Finally we replace the comparand.
|
|
op2->BashToConst(static_cast<int32_t>(op2->LngValue()));
|
|
}
|
|
|
|
return cmp;
|
|
}
|
|
|
|
//------------------------------------------------------------------------
|
|
// fgOptimizeRelationalComparisonWithFullRangeConst: optimizes a comparison operation.
|
|
//
|
|
// Recognizes "Always false"/"Always true" comparisons against various full range constant operands and morphs
|
|
// them into zero/one.
|
|
//
|
|
// Arguments:
|
|
// cmp - the GT_LT/GT_GT tree to morph.
|
|
//
|
|
// Return Value:
|
|
// 1. The unmodified "cmp" tree.
|
|
// 2. A CNS_INT node containing zero.
|
|
// 3. A CNS_INT node containing one.
|
|
// Assumptions:
|
|
// The second operand is an integral constant or the first operand is an integral constant.
|
|
//
|
|
GenTree* Compiler::fgOptimizeRelationalComparisonWithFullRangeConst(GenTreeOp* cmp)
|
|
{
|
|
if (gtTreeHasSideEffects(cmp, GTF_SIDE_EFFECT))
|
|
{
|
|
return cmp;
|
|
}
|
|
|
|
int64_t lhsMin;
|
|
int64_t lhsMax;
|
|
if (cmp->gtGetOp1()->IsIntegralConst())
|
|
{
|
|
lhsMin = cmp->gtGetOp1()->AsIntConCommon()->IntegralValue();
|
|
lhsMax = lhsMin;
|
|
}
|
|
else
|
|
{
|
|
IntegralRange lhsRange = IntegralRange::ForNode(cmp->gtGetOp1(), this);
|
|
lhsMin = IntegralRange::SymbolicToRealValue(lhsRange.GetLowerBound());
|
|
lhsMax = IntegralRange::SymbolicToRealValue(lhsRange.GetUpperBound());
|
|
}
|
|
|
|
int64_t rhsMin;
|
|
int64_t rhsMax;
|
|
if (cmp->gtGetOp2()->IsIntegralConst())
|
|
{
|
|
rhsMin = cmp->gtGetOp2()->AsIntConCommon()->IntegralValue();
|
|
rhsMax = rhsMin;
|
|
}
|
|
else
|
|
{
|
|
IntegralRange rhsRange = IntegralRange::ForNode(cmp->gtGetOp2(), this);
|
|
rhsMin = IntegralRange::SymbolicToRealValue(rhsRange.GetLowerBound());
|
|
rhsMax = IntegralRange::SymbolicToRealValue(rhsRange.GetUpperBound());
|
|
}
|
|
|
|
genTreeOps op = cmp->gtOper;
|
|
if ((op != GT_LT) && (op != GT_LE))
|
|
{
|
|
op = GenTree::SwapRelop(op);
|
|
std::swap(lhsMin, rhsMin);
|
|
std::swap(lhsMax, rhsMax);
|
|
}
|
|
|
|
GenTree* ret = nullptr;
|
|
|
|
if (cmp->IsUnsigned())
|
|
{
|
|
if ((lhsMin < 0) && (lhsMax >= 0))
|
|
{
|
|
// [0, (uint64_t)lhsMax] U [(uint64_t)lhsMin, MaxValue]
|
|
lhsMin = 0;
|
|
lhsMax = -1;
|
|
}
|
|
|
|
if ((rhsMin < 0) && (rhsMax >= 0))
|
|
{
|
|
// [0, (uint64_t)rhsMax] U [(uint64_t)rhsMin, MaxValue]
|
|
rhsMin = 0;
|
|
rhsMax = -1;
|
|
}
|
|
|
|
if (((op == GT_LT) && ((uint64_t)lhsMax < (uint64_t)rhsMin)) ||
|
|
((op == GT_LE) && ((uint64_t)lhsMax <= (uint64_t)rhsMin)))
|
|
{
|
|
ret = gtNewOneConNode(TYP_INT);
|
|
}
|
|
else if (((op == GT_LT) && ((uint64_t)lhsMin >= (uint64_t)rhsMax)) ||
|
|
((op == GT_LE) && ((uint64_t)lhsMin > (uint64_t)rhsMax)))
|
|
{
|
|
ret = gtNewZeroConNode(TYP_INT);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// [x0, x1] < [y0, y1] is false if x0 >= y1
|
|
// [x0, x1] <= [y0, y1] is false if x0 > y1
|
|
if (((op == GT_LT) && (lhsMin >= rhsMax)) || (((op == GT_LE) && (lhsMin > rhsMax))))
|
|
{
|
|
ret = gtNewZeroConNode(TYP_INT);
|
|
}
|
|
// [x0, x1] < [y0, y1] is true if x1 < y0
|
|
// [x0, x1] <= [y0, y1] is true if x1 <= y0
|
|
else if (((op == GT_LT) && (lhsMax < rhsMin)) || ((op == GT_LE) && (lhsMax <= rhsMin)))
|
|
{
|
|
ret = gtNewOneConNode(TYP_INT);
|
|
}
|
|
}
|
|
|
|
if (ret != nullptr)
|
|
{
|
|
fgUpdateConstTreeValueNumber(ret);
|
|
|
|
DEBUG_DESTROY_NODE(cmp);
|
|
|
|
INDEBUG(ret->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED);
|
|
|
|
return ret;
|
|
}
|
|
|
|
return cmp;
|
|
}
|
|
|
|
//------------------------------------------------------------------------
|
|
// fgOptimizeRelationalComparisonWithConst: optimizes a comparison operation.
|
|
//
|
|
// Recognizes comparisons against various constant operands and morphs
|
|
// them, if possible, into comparisons against zero.
|
|
//
|
|
// Arguments:
|
|
// cmp - the GT_LE/GT_LT/GT_GE/GT_GT tree to morph.
|
|
//
|
|
// Return Value:
|
|
// The "cmp" tree, possibly with a modified oper.
|
|
// The second operand's constant value may be modified as well.
|
|
//
|
|
// Assumptions:
|
|
// The operands have been swapped so that any constants are on the right.
|
|
// The second operand is an integral constant.
|
|
//
|
|
GenTree* Compiler::fgOptimizeRelationalComparisonWithConst(GenTreeOp* cmp)
|
|
{
|
|
assert(cmp->OperIs(GT_LE, GT_LT, GT_GE, GT_GT));
|
|
assert(cmp->gtGetOp2()->IsIntegralConst());
|
|
assert(!gtIsActiveCSE_Candidate(cmp->gtGetOp2()));
|
|
|
|
GenTree* op1 = cmp->gtGetOp1();
|
|
GenTreeIntConCommon* op2 = cmp->gtGetOp2()->AsIntConCommon();
|
|
|
|
assert(genActualType(op1) == genActualType(op2));
|
|
|
|
genTreeOps oper = cmp->OperGet();
|
|
int64_t op2Value = op2->IntegralValue();
|
|
|
|
if (op2Value == 1)
|
|
{
|
|
// Check for "expr >= 1".
|
|
if (oper == GT_GE)
|
|
{
|
|
// Change to "expr != 0" for unsigned and "expr > 0" for signed.
|
|
oper = cmp->IsUnsigned() ? GT_NE : GT_GT;
|
|
}
|
|
// Check for "expr < 1".
|
|
else if (oper == GT_LT)
|
|
{
|
|
// Change to "expr == 0" for unsigned and "expr <= 0".
|
|
oper = cmp->IsUnsigned() ? GT_EQ : GT_LE;
|
|
}
|
|
}
|
|
// Check for "expr relop -1".
|
|
else if (!cmp->IsUnsigned() && (op2Value == -1))
|
|
{
|
|
// Check for "expr <= -1".
|
|
if (oper == GT_LE)
|
|
{
|
|
// Change to "expr < 0".
|
|
oper = GT_LT;
|
|
}
|
|
// Check for "expr > -1".
|
|
else if (oper == GT_GT)
|
|
{
|
|
// Change to "expr >= 0".
|
|
oper = GT_GE;
|
|
}
|
|
}
|
|
else if (cmp->IsUnsigned())
|
|
{
|
|
if ((oper == GT_LE) || (oper == GT_GT))
|
|
{
|
|
if (op2Value == 0)
|
|
{
|
|
// IL doesn't have a cne instruction so compilers use cgt.un instead. The JIT
|
|
// recognizes certain patterns that involve GT_NE (e.g (x & 4) != 0) and fails
|
|
// if GT_GT is used instead. Transform (x GT_GT.unsigned 0) into (x GT_NE 0)
|
|
// and (x GT_LE.unsigned 0) into (x GT_EQ 0). The later case is rare, it sometimes
|
|
// occurs as a result of branch inversion.
|
|
oper = (oper == GT_LE) ? GT_EQ : GT_NE;
|
|
cmp->gtFlags &= ~GTF_UNSIGNED;
|
|
}
|
|
// LE_UN/GT_UN(expr, int/long.MaxValue) => GE/LT(expr, 0).
|
|
else if (((op1->TypeIs(TYP_LONG) && (op2Value == INT64_MAX))) ||
|
|
((genActualType(op1) == TYP_INT) && (op2Value == INT32_MAX)))
|
|
{
|
|
oper = (oper == GT_LE) ? GT_GE : GT_LT;
|
|
cmp->gtFlags &= ~GTF_UNSIGNED;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (!cmp->OperIs(oper))
|
|
{
|
|
// Keep the old ValueNumber for 'tree' as the new expr
|
|
// will still compute the same value as before.
|
|
cmp->SetOper(oper, GenTree::PRESERVE_VN);
|
|
op2->SetIntegralValue(0);
|
|
fgUpdateConstTreeValueNumber(op2);
|
|
}
|
|
|
|
return cmp;
|
|
}
|
|
|
|
#ifdef FEATURE_HW_INTRINSICS
|
|
//------------------------------------------------------------------------
|
|
// fgOptimizeHWIntrinsic: optimize a HW intrinsic node
|
|
//
|
|
// Arguments:
|
|
// node - HWIntrinsic node to examine
|
|
//
|
|
// Returns:
|
|
// The original node if no optimization happened or if tree bashing occurred.
|
|
// An alternative tree if an optimization happened.
|
|
//
|
|
// Notes:
|
|
// Checks for HWIntrinsic nodes: Vector64.Create/Vector128.Create/Vector256.Create,
|
|
// and if the call is one of these, attempt to optimize.
|
|
// This is post-order, meaning that it will not morph the children.
|
|
//
|
|
GenTree* Compiler::fgOptimizeHWIntrinsic(GenTreeHWIntrinsic* node)
|
|
{
|
|
assert(opts.OptimizationEnabled());
|
|
|
|
GenTree* optimizedTree = fgOptimizeHWIntrinsicAssociative(node);
|
|
|
|
if (optimizedTree != nullptr)
|
|
{
|
|
if (optimizedTree != node)
|
|
{
|
|
assert(!fgIsCommaThrow(optimizedTree));
|
|
INDEBUG(optimizedTree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED);
|
|
return optimizedTree;
|
|
}
|
|
else if (!optimizedTree->OperIsHWIntrinsic())
|
|
{
|
|
INDEBUG(optimizedTree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED);
|
|
return optimizedTree;
|
|
}
|
|
}
|
|
|
|
if (optValnumCSE_phase)
|
|
{
|
|
return node;
|
|
}
|
|
|
|
NamedIntrinsic intrinsicId = node->GetHWIntrinsicId();
|
|
var_types retType = node->TypeGet();
|
|
CorInfoType simdBaseJitType = node->GetSimdBaseJitType();
|
|
var_types simdBaseType = node->GetSimdBaseType();
|
|
unsigned simdSize = node->GetSimdSize();
|
|
|
|
switch (intrinsicId)
|
|
{
|
|
#if defined(TARGET_ARM64)
|
|
case NI_Vector64_Create:
|
|
#endif // TARGET_ARM64
|
|
case NI_Vector128_Create:
|
|
{
|
|
// The managed `Dot` API returns a scalar. However, many common usages require
|
|
// it to be then immediately broadcast back to a vector so that it can be used
|
|
// in a subsequent operation. One of the most common is normalizing a vector
|
|
// which is effectively `value / value.Length` where `Length` is
|
|
// `Sqrt(Dot(value, value))`. Because of this, and because of how a lot of
|
|
// hardware works, we treat `NI_Vector_Dot` as returning a SIMD type and then
|
|
// also wrap it in `ToScalar` where required.
|
|
//
|
|
// In order to ensure that developers can still utilize this efficiently, we
|
|
// then look for four common patterns:
|
|
// * Create(Dot(..., ...))
|
|
// * Create(Sqrt(Dot(..., ...)))
|
|
// * Create(ToScalar(Dot(..., ...)))
|
|
// * Create(ToScalar(Sqrt(Dot(..., ...))))
|
|
//
|
|
// When these exist, we'll avoid converting to a scalar and hence, avoid broadcasting
|
|
// the value back into a vector. Instead we'll just keep everything as a vector.
|
|
//
|
|
// We only do this for Vector64/Vector128 today. We could expand this more in
|
|
// the future but it would require additional hand handling for Vector256
|
|
// (since a 256-bit result requires more work). We do some integer handling
|
|
// when the value is trivially replicated to all elements without extra work.
|
|
|
|
if (node->GetOperandCount() != 1)
|
|
{
|
|
break;
|
|
}
|
|
|
|
#if defined(TARGET_XARCH)
|
|
if ((simdSize == 8) && !compOpportunisticallyDependsOn(InstructionSet_SSE41))
|
|
{
|
|
// When SSE4.1 isn't supported then Vector2 only needs a single horizontal add
|
|
// which means the result isn't broadcast across the entire vector and we can't
|
|
// optimize
|
|
break;
|
|
}
|
|
#endif // TARGET_XARCH
|
|
|
|
GenTree* op1 = node->Op(1);
|
|
GenTree* sqrt = nullptr;
|
|
GenTree* toScalar = nullptr;
|
|
|
|
if (op1->OperIs(GT_INTRINSIC))
|
|
{
|
|
if (!varTypeIsFloating(simdBaseType))
|
|
{
|
|
break;
|
|
}
|
|
|
|
if (op1->AsIntrinsic()->gtIntrinsicName != NI_System_Math_Sqrt)
|
|
{
|
|
break;
|
|
}
|
|
|
|
sqrt = op1;
|
|
op1 = op1->gtGetOp1();
|
|
}
|
|
|
|
if (!op1->OperIs(GT_HWINTRINSIC))
|
|
{
|
|
break;
|
|
}
|
|
|
|
GenTreeHWIntrinsic* hwop1 = op1->AsHWIntrinsic();
|
|
|
|
#if defined(TARGET_ARM64)
|
|
if ((hwop1->GetHWIntrinsicId() == NI_Vector64_ToScalar) ||
|
|
(hwop1->GetHWIntrinsicId() == NI_Vector128_ToScalar))
|
|
#else
|
|
if (hwop1->GetHWIntrinsicId() == NI_Vector128_ToScalar)
|
|
#endif
|
|
{
|
|
op1 = hwop1->Op(1);
|
|
|
|
if (!op1->OperIs(GT_HWINTRINSIC))
|
|
{
|
|
break;
|
|
}
|
|
|
|
toScalar = hwop1;
|
|
hwop1 = op1->AsHWIntrinsic();
|
|
}
|
|
|
|
#if defined(TARGET_ARM64)
|
|
if ((hwop1->GetHWIntrinsicId() != NI_Vector64_Dot) && (hwop1->GetHWIntrinsicId() != NI_Vector128_Dot))
|
|
#else
|
|
if (hwop1->GetHWIntrinsicId() != NI_Vector128_Dot)
|
|
#endif
|
|
{
|
|
break;
|
|
}
|
|
|
|
// Must be working with the same types of vectors.
|
|
if (hwop1->TypeGet() != retType)
|
|
{
|
|
break;
|
|
}
|
|
|
|
if (toScalar != nullptr)
|
|
{
|
|
DEBUG_DESTROY_NODE(toScalar);
|
|
}
|
|
|
|
if (sqrt != nullptr)
|
|
{
|
|
var_types simdType = getSIMDTypeForSize(simdSize);
|
|
|
|
node = gtNewSimdSqrtNode(simdType, hwop1, simdBaseJitType, simdSize)->AsHWIntrinsic();
|
|
DEBUG_DESTROY_NODE(sqrt);
|
|
}
|
|
else
|
|
{
|
|
node = hwop1;
|
|
}
|
|
|
|
INDEBUG(node->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED);
|
|
return node;
|
|
}
|
|
|
|
default:
|
|
{
|
|
bool isScalar = false;
|
|
genTreeOps actualOper = node->GetOperForHWIntrinsicId(&isScalar);
|
|
genTreeOps oper = actualOper;
|
|
|
|
// We shouldn't find AND_NOT nodes since it should only be produced in lowering
|
|
assert(oper != GT_AND_NOT);
|
|
|
|
if (GenTreeHWIntrinsic::OperIsBitwiseHWIntrinsic(oper))
|
|
{
|
|
GenTree* op1 = node->Op(1);
|
|
|
|
GenTree* op2;
|
|
GenTree* actualOp2;
|
|
|
|
if (oper == GT_NOT)
|
|
{
|
|
op2 = op1;
|
|
actualOp2 = nullptr;
|
|
}
|
|
else
|
|
{
|
|
op2 = node->Op(2);
|
|
actualOp2 = op2;
|
|
}
|
|
|
|
// We need both operands to be ConvertMaskToVector in
|
|
// order to optimize this to a direct mask operation
|
|
|
|
if (!op1->OperIsConvertMaskToVector())
|
|
{
|
|
break;
|
|
}
|
|
|
|
if (!op2->OperIsHWIntrinsic())
|
|
{
|
|
#if defined(TARGET_XARCH)
|
|
if ((oper != GT_XOR) || !op2->IsVectorAllBitsSet())
|
|
{
|
|
break;
|
|
}
|
|
|
|
// We want to explicitly recognize op1 ^ AllBitsSet as
|
|
// some platforms don't have direct support for ~op1
|
|
|
|
oper = GT_NOT;
|
|
op2 = op1;
|
|
#else
|
|
break;
|
|
#endif
|
|
}
|
|
|
|
GenTreeHWIntrinsic* cvtOp1 = op1->AsHWIntrinsic();
|
|
GenTreeHWIntrinsic* cvtOp2 = op2->AsHWIntrinsic();
|
|
|
|
if (!cvtOp2->OperIsConvertMaskToVector())
|
|
{
|
|
break;
|
|
}
|
|
|
|
unsigned simdBaseTypeSize = genTypeSize(simdBaseType);
|
|
|
|
if ((genTypeSize(cvtOp1->GetSimdBaseType()) != simdBaseTypeSize) ||
|
|
(genTypeSize(cvtOp2->GetSimdBaseType()) != simdBaseTypeSize))
|
|
{
|
|
// We need both operands to be the same kind of mask; otherwise
|
|
// the bitwise operation can differ in how it performs
|
|
break;
|
|
}
|
|
|
|
NamedIntrinsic maskIntrinsicId = NI_Illegal;
|
|
|
|
#if defined(TARGET_XARCH)
|
|
switch (oper)
|
|
{
|
|
case GT_AND:
|
|
{
|
|
maskIntrinsicId = NI_EVEX_AndMask;
|
|
break;
|
|
}
|
|
|
|
case GT_NOT:
|
|
{
|
|
maskIntrinsicId = NI_EVEX_NotMask;
|
|
break;
|
|
}
|
|
|
|
case GT_OR:
|
|
{
|
|
maskIntrinsicId = NI_EVEX_OrMask;
|
|
break;
|
|
}
|
|
|
|
case GT_XOR:
|
|
{
|
|
maskIntrinsicId = NI_EVEX_XorMask;
|
|
break;
|
|
}
|
|
|
|
default:
|
|
{
|
|
unreached();
|
|
}
|
|
}
|
|
#elif defined(TARGET_ARM64)
|
|
// TODO-ARM64-CQ: Support transforming bitwise operations on masks
|
|
break;
|
|
#else
|
|
#error Unsupported platform
|
|
#endif // !TARGET_XARCH && !TARGET_ARM64
|
|
|
|
if (maskIntrinsicId == NI_Illegal)
|
|
{
|
|
break;
|
|
}
|
|
|
|
if (oper == actualOper)
|
|
{
|
|
node->ChangeHWIntrinsicId(maskIntrinsicId);
|
|
node->Op(1) = cvtOp1->Op(1);
|
|
}
|
|
else
|
|
{
|
|
assert(oper == GT_NOT);
|
|
node->ResetHWIntrinsicId(maskIntrinsicId, this, cvtOp1->Op(1));
|
|
}
|
|
|
|
node->gtType = TYP_MASK;
|
|
DEBUG_DESTROY_NODE(op1);
|
|
|
|
if (oper != GT_NOT)
|
|
{
|
|
assert(actualOp2 != nullptr);
|
|
node->Op(2) = cvtOp2->Op(1);
|
|
}
|
|
|
|
if (actualOp2 != nullptr)
|
|
{
|
|
DEBUG_DESTROY_NODE(actualOp2);
|
|
}
|
|
|
|
node = gtNewSimdCvtMaskToVectorNode(retType, node, simdBaseJitType, simdSize)->AsHWIntrinsic();
|
|
|
|
INDEBUG(node->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED);
|
|
return node;
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
|
|
bool isScalar = false;
|
|
genTreeOps oper = node->GetOperForHWIntrinsicId(&isScalar);
|
|
|
|
if (isScalar)
|
|
{
|
|
return node;
|
|
}
|
|
|
|
switch (oper)
|
|
{
|
|
#if defined(TARGET_ARM64)
|
|
// Transforms:
|
|
// 1. -(-v1) to v1
|
|
case GT_NEG:
|
|
{
|
|
GenTree* op1 = node->Op(1);
|
|
|
|
if (op1->OperIsHWIntrinsic())
|
|
{
|
|
GenTreeHWIntrinsic* hw = op1->AsHWIntrinsic();
|
|
genTreeOps hwOper = hw->GetOperForHWIntrinsicId(&isScalar);
|
|
|
|
if (isScalar)
|
|
{
|
|
return node;
|
|
}
|
|
|
|
if (varTypeToSigned(simdBaseType) != varTypeToSigned(hw->GetSimdBaseType()))
|
|
{
|
|
// We need the base types to be of the same kind and size
|
|
// that is, we can't mix floating-point and integers or int and long
|
|
// but we can mix int and uint or long and ulong.
|
|
return node;
|
|
}
|
|
|
|
if (hwOper == GT_NEG)
|
|
{
|
|
GenTree* result = hw->Op(1);
|
|
DEBUG_DESTROY_NODE(hw);
|
|
DEBUG_DESTROY_NODE(node);
|
|
return result;
|
|
}
|
|
}
|
|
break;
|
|
}
|
|
#endif // TARGET_ARM64
|
|
|
|
#if defined(TARGET_ARM64)
|
|
// Transforms:
|
|
// 1. ~(~v1) to v1
|
|
case GT_NOT:
|
|
{
|
|
GenTree* op1 = node->Op(1);
|
|
|
|
if (op1->OperIsHWIntrinsic())
|
|
{
|
|
GenTreeHWIntrinsic* hw = op1->AsHWIntrinsic();
|
|
genTreeOps hwOper = hw->GetOperForHWIntrinsicId(&isScalar);
|
|
|
|
if (isScalar)
|
|
{
|
|
return node;
|
|
}
|
|
|
|
if (hwOper == GT_NOT)
|
|
{
|
|
GenTree* result = hw->Op(1);
|
|
DEBUG_DESTROY_NODE(hw);
|
|
DEBUG_DESTROY_NODE(node);
|
|
return result;
|
|
}
|
|
}
|
|
break;
|
|
}
|
|
#endif // TARGET_ARM64
|
|
|
|
// Transforms:
|
|
// 1. (Zero - v1) to VectorXxx.Negate(v1); for integers
|
|
// 2. (Zero - (Zero - v1)) to v1; for integers
|
|
case GT_SUB:
|
|
{
|
|
GenTree* op1 = node->Op(1);
|
|
GenTree* op2 = node->Op(2);
|
|
|
|
if (varTypeIsIntegral(simdBaseType) && op1->IsVectorZero())
|
|
{
|
|
#if defined(TARGET_ARM64)
|
|
// xarch doesn't have a native GT_NEG representation for integers and itself uses (Zero - v1)
|
|
GenTree* negNode = gtNewSimdUnOpNode(GT_NEG, retType, op2, simdBaseJitType, simdSize);
|
|
|
|
DEBUG_DESTROY_NODE(op1);
|
|
DEBUG_DESTROY_NODE(node);
|
|
INDEBUG(negNode->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED);
|
|
|
|
return negNode;
|
|
#elif defined(TARGET_XARCH)
|
|
if (op2->OperIsHWIntrinsic())
|
|
{
|
|
GenTreeHWIntrinsic* hw = op2->AsHWIntrinsic();
|
|
genTreeOps hwOper = hw->GetOperForHWIntrinsicId(&isScalar);
|
|
var_types hwSimdBaseType = hw->GetSimdBaseType();
|
|
|
|
if (isScalar)
|
|
{
|
|
return node;
|
|
}
|
|
|
|
if (varTypeToSigned(simdBaseType) != varTypeToSigned(hwSimdBaseType))
|
|
{
|
|
// We need the base types to be of the same kind and size
|
|
// that is, we can't mix floating-point and integers or int and long
|
|
// but we can mix int and uint or long and ulong.
|
|
return node;
|
|
}
|
|
|
|
if (hwOper == GT_SUB)
|
|
{
|
|
if (varTypeIsIntegral(hwSimdBaseType) && hw->Op(1)->IsVectorZero())
|
|
{
|
|
GenTree* result = hw->Op(2);
|
|
DEBUG_DESTROY_NODE(hw->Op(1));
|
|
DEBUG_DESTROY_NODE(hw);
|
|
DEBUG_DESTROY_NODE(op1);
|
|
DEBUG_DESTROY_NODE(node);
|
|
return result;
|
|
}
|
|
}
|
|
}
|
|
#endif // !TARGET_ARM64 && !TARGET_XARCH
|
|
}
|
|
break;
|
|
}
|
|
|
|
// Transforms:
|
|
// 1. (v1 ^ AllBitsSet) to VectorXxx.OnesComplement(v1)
|
|
// 2. (v1 ^ -0.0) to VectorXxx.Negate(v1); for floating-point
|
|
case GT_XOR:
|
|
{
|
|
GenTree* op1 = node->Op(1);
|
|
GenTree* op2 = node->Op(2);
|
|
|
|
#if defined(TARGET_ARM64)
|
|
if (op2->IsVectorAllBitsSet())
|
|
{
|
|
// xarch doesn't have a native GT_NOT representation and itself uses (v1 ^ AllBitsSet)
|
|
GenTree* notNode = gtNewSimdUnOpNode(GT_NOT, retType, op1, simdBaseJitType, simdSize);
|
|
|
|
DEBUG_DESTROY_NODE(op2);
|
|
DEBUG_DESTROY_NODE(node);
|
|
INDEBUG(notNode->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED);
|
|
|
|
return notNode;
|
|
}
|
|
|
|
if (varTypeIsFloating(simdBaseType) && op2->IsVectorNegativeZero(simdBaseType))
|
|
{
|
|
// xarch doesn't have a native GT_NEG representation for floating-point and itself uses (v1 ^ -0.0)
|
|
GenTree* negNode = gtNewSimdUnOpNode(GT_NEG, retType, op1, simdBaseJitType, simdSize);
|
|
|
|
DEBUG_DESTROY_NODE(op2);
|
|
DEBUG_DESTROY_NODE(node);
|
|
INDEBUG(negNode->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED);
|
|
|
|
return negNode;
|
|
}
|
|
#endif // TARGET_ARM64
|
|
break;
|
|
}
|
|
|
|
default:
|
|
{
|
|
break;
|
|
}
|
|
}
|
|
|
|
return node;
|
|
}
|
|
|
|
//------------------------------------------------------------------------
|
|
// fgOptimizeHWIntrinsicAssociative: Morph an associative GenTreeHWIntrinsic tree.
|
|
//
|
|
// Arguments:
|
|
// tree - The tree to morph
|
|
//
|
|
// Return Value:
|
|
// The fully morphed tree.
|
|
//
|
|
GenTree* Compiler::fgOptimizeHWIntrinsicAssociative(GenTreeHWIntrinsic* tree)
|
|
{
|
|
// In general this tries to simplify `(v1 op c1) op c2` into `v1 op (c1 op c2)`
|
|
// so that we can fold it down to `v1 op c3`
|
|
assert(opts.OptimizationEnabled());
|
|
|
|
NamedIntrinsic intrinsicId = tree->GetHWIntrinsicId();
|
|
var_types simdType = tree->TypeGet();
|
|
CorInfoType simdBaseJitType = tree->GetSimdBaseJitType();
|
|
var_types simdBaseType = tree->GetSimdBaseType();
|
|
unsigned simdSize = tree->GetSimdSize();
|
|
|
|
if (!varTypeIsSIMD(simdType))
|
|
{
|
|
return nullptr;
|
|
}
|
|
|
|
bool isScalar = false;
|
|
genTreeOps oper = tree->GetOperForHWIntrinsicId(&isScalar);
|
|
bool needsMatchingBaseType = false;
|
|
|
|
switch (oper)
|
|
{
|
|
case GT_ADD:
|
|
case GT_MUL:
|
|
{
|
|
if (varTypeIsIntegral(simdBaseType))
|
|
{
|
|
needsMatchingBaseType = true;
|
|
break;
|
|
}
|
|
return nullptr;
|
|
}
|
|
|
|
case GT_AND:
|
|
case GT_OR:
|
|
case GT_XOR:
|
|
{
|
|
break;
|
|
}
|
|
|
|
default:
|
|
{
|
|
return nullptr;
|
|
}
|
|
}
|
|
|
|
// op1 can be GT_COMMA, in which case we're going to fold
|
|
// `(..., (v1 op c1)) op c2` to `(..., (v1 op c3))`
|
|
|
|
GenTree* op1 = tree->Op(1);
|
|
GenTree* effectiveOp1 = op1->gtEffectiveVal();
|
|
|
|
if (!effectiveOp1->OperIsHWIntrinsic())
|
|
{
|
|
return nullptr;
|
|
}
|
|
|
|
GenTreeHWIntrinsic* intrinOp1 = effectiveOp1->AsHWIntrinsic();
|
|
|
|
bool op1IsScalar = false;
|
|
genTreeOps op1Oper = intrinOp1->GetOperForHWIntrinsicId(&op1IsScalar);
|
|
|
|
if ((op1Oper != oper) || (op1IsScalar != isScalar))
|
|
{
|
|
return nullptr;
|
|
}
|
|
|
|
if (needsMatchingBaseType && (intrinOp1->GetSimdBaseType() != simdBaseType))
|
|
{
|
|
return nullptr;
|
|
}
|
|
|
|
if (!intrinOp1->Op(2)->IsCnsVec() || !tree->Op(2)->IsCnsVec())
|
|
{
|
|
return nullptr;
|
|
}
|
|
|
|
if (!fgGlobalMorph && (effectiveOp1 != op1))
|
|
{
|
|
// Since 'tree->Op(1)' can have complex structure; e.g. `(.., (.., op1))`
|
|
// don't run the optimization for such trees outside of global morph.
|
|
// Otherwise, there is a chance of violating VNs invariants and/or modifying a tree
|
|
// that is an active CSE candidate.
|
|
return nullptr;
|
|
}
|
|
|
|
if (gtIsActiveCSE_Candidate(tree) || gtIsActiveCSE_Candidate(effectiveOp1))
|
|
{
|
|
// In the case op1 is a comma, the optimization removes 'tree' from IR and changes
|
|
// the value of op1 and otherwise we're changing the value of 'tree' instead
|
|
return nullptr;
|
|
}
|
|
|
|
GenTreeVecCon* cns1 = intrinOp1->Op(2)->AsVecCon();
|
|
GenTreeVecCon* cns2 = tree->Op(2)->AsVecCon();
|
|
|
|
assert(cns1->TypeIs(simdType));
|
|
assert(cns2->TypeIs(simdType));
|
|
|
|
if (gtIsActiveCSE_Candidate(cns1) || gtIsActiveCSE_Candidate(cns2))
|
|
{
|
|
// The optimization removes 'cns2' from IR and changes the value of 'cns1'.
|
|
return nullptr;
|
|
}
|
|
|
|
GenTree* res = gtNewSimdHWIntrinsicNode(simdType, cns1, cns2, intrinsicId, simdBaseJitType, simdSize);
|
|
res = gtFoldExprHWIntrinsic(res->AsHWIntrinsic());
|
|
|
|
assert(res == cns1);
|
|
assert(res->IsCnsVec());
|
|
|
|
if (effectiveOp1 != op1)
|
|
{
|
|
// We had a comma, so pull the VNs from node
|
|
op1->SetVNsFromNode(tree);
|
|
|
|
DEBUG_DESTROY_NODE(cns2);
|
|
DEBUG_DESTROY_NODE(tree);
|
|
|
|
return op1;
|
|
}
|
|
else
|
|
{
|
|
// We had a simple tree, so pull the value and new constant up
|
|
|
|
tree->Op(1) = intrinOp1->Op(1);
|
|
tree->Op(2) = intrinOp1->Op(2);
|
|
|
|
DEBUG_DESTROY_NODE(cns2);
|
|
DEBUG_DESTROY_NODE(intrinOp1);
|
|
|
|
assert(tree->Op(2) == cns1);
|
|
return tree;
|
|
}
|
|
}
|
|
#endif // FEATURE_HW_INTRINSICS
|
|
|
|
//------------------------------------------------------------------------
|
|
// fgOptimizeCommutativeArithmetic: Optimizes commutative operations.
|
|
//
|
|
// Arguments:
|
|
// tree - the unchecked GT_ADD/GT_MUL/GT_OR/GT_XOR/GT_AND tree to optimize.
|
|
//
|
|
// Return Value:
|
|
// The optimized tree that can have any shape.
|
|
//
|
|
GenTree* Compiler::fgOptimizeCommutativeArithmetic(GenTreeOp* tree)
|
|
{
|
|
assert(tree->OperIs(GT_ADD, GT_MUL, GT_OR, GT_XOR, GT_AND));
|
|
assert(!tree->gtOverflowEx());
|
|
|
|
// Commute constants to the right.
|
|
if (tree->gtGetOp1()->OperIsConst() && !tree->gtGetOp1()->TypeIs(TYP_REF))
|
|
{
|
|
// TODO-Review: We used to assert here that "(!op2->OperIsConst() || !opts.OptEnabled(CLFLG_CONSTANTFOLD))".
|
|
// This may indicate a missed "remorph". Task is to re-enable this assertion and investigate.
|
|
std::swap(tree->gtOp1, tree->gtOp2);
|
|
}
|
|
|
|
if (fgOperIsBitwiseRotationRoot(tree->OperGet()))
|
|
{
|
|
GenTree* rotationTree = fgRecognizeAndMorphBitwiseRotation(tree);
|
|
if (rotationTree != nullptr)
|
|
{
|
|
return rotationTree;
|
|
}
|
|
}
|
|
|
|
if (varTypeIsIntegralOrI(tree))
|
|
{
|
|
genTreeOps oldTreeOper = tree->OperGet();
|
|
GenTreeOp* optimizedTree = fgMorphCommutative(tree->AsOp());
|
|
if (optimizedTree != nullptr)
|
|
{
|
|
if (!optimizedTree->OperIs(oldTreeOper))
|
|
{
|
|
// "optimizedTree" could end up being a COMMA.
|
|
return optimizedTree;
|
|
}
|
|
|
|
tree = optimizedTree;
|
|
}
|
|
}
|
|
|
|
if (!optValnumCSE_phase)
|
|
{
|
|
GenTree* optimizedTree = nullptr;
|
|
if (tree->OperIs(GT_ADD))
|
|
{
|
|
optimizedTree = fgOptimizeAddition(tree);
|
|
}
|
|
else if (tree->OperIs(GT_MUL))
|
|
{
|
|
optimizedTree = fgOptimizeMultiply(tree);
|
|
}
|
|
else if (tree->OperIs(GT_AND))
|
|
{
|
|
optimizedTree = fgOptimizeBitwiseAnd(tree);
|
|
}
|
|
else if (tree->OperIs(GT_XOR))
|
|
{
|
|
optimizedTree = fgOptimizeBitwiseXor(tree);
|
|
}
|
|
|
|
if (optimizedTree != nullptr)
|
|
{
|
|
return optimizedTree;
|
|
}
|
|
}
|
|
|
|
return tree;
|
|
}
|
|
|
|
//------------------------------------------------------------------------
|
|
// fgOptimizeAddition: optimizes addition.
|
|
//
|
|
// Arguments:
|
|
// add - the unchecked GT_ADD tree to optimize.
|
|
//
|
|
// Return Value:
|
|
// The optimized tree, that can have any shape, in case any transformations
|
|
// were performed. Otherwise, "nullptr", guaranteeing no state change.
|
|
//
|
|
GenTree* Compiler::fgOptimizeAddition(GenTreeOp* add)
|
|
{
|
|
assert(add->OperIs(GT_ADD) && !add->gtOverflow());
|
|
assert(!optValnumCSE_phase);
|
|
|
|
GenTree* op1 = add->gtGetOp1();
|
|
GenTree* op2 = add->gtGetOp2();
|
|
|
|
// Fold "((x + icon1) + (y + icon2))" to ((x + y) + (icon1 + icon2))".
|
|
// Be careful not to create a byref pointer that may point outside of the ref object.
|
|
// Only do this in global morph as we don't recompute the VN for "(x + y)", the new "op2".
|
|
if (op1->OperIs(GT_ADD) && op2->OperIs(GT_ADD) && !op1->gtOverflow() && !op2->gtOverflow() &&
|
|
op1->AsOp()->gtGetOp2()->IsCnsIntOrI() && op2->AsOp()->gtGetOp2()->IsCnsIntOrI() &&
|
|
!varTypeIsGC(op1->AsOp()->gtGetOp1()) && !varTypeIsGC(op2->AsOp()->gtGetOp1()) && fgGlobalMorph)
|
|
{
|
|
GenTreeOp* addOne = op1->AsOp();
|
|
GenTreeOp* addTwo = op2->AsOp();
|
|
GenTreeIntCon* constOne = addOne->gtGetOp2()->AsIntCon();
|
|
|
|
// addOne is now "x + y"
|
|
addOne->gtOp2 = addTwo->gtGetOp1();
|
|
addOne->SetAllEffectsFlags(addOne->gtGetOp1(), addOne->gtGetOp2());
|
|
|
|
// addTwo is now "icon1 + icon2" so we can fold it using gtFoldExprConst
|
|
addTwo->gtOp1 = constOne;
|
|
add->gtOp2 = gtFoldExprConst(add->gtOp2);
|
|
op2 = add->gtGetOp2();
|
|
}
|
|
|
|
// Fold (x + 0) - given it won't change the tree type.
|
|
if (op2->IsIntegralConst(0) && (genActualType(add) == genActualType(op1)))
|
|
{
|
|
// Keep the offset nodes with annotations for value numbering purposes.
|
|
if (!op2->IsCnsIntOrI() || (op2->AsIntCon()->gtFieldSeq == nullptr))
|
|
{
|
|
DEBUG_DESTROY_NODE(op2);
|
|
DEBUG_DESTROY_NODE(add);
|
|
|
|
return op1;
|
|
}
|
|
|
|
// Communicate to CSE that this addition is a no-op.
|
|
add->SetDoNotCSE();
|
|
}
|
|
|
|
if (opts.OptimizationEnabled())
|
|
{
|
|
// Reduce local addresses: "ADD(LCL_ADDR, OFFSET)" => "LCL_FLD_ADDR".
|
|
//
|
|
if (op1->OperIs(GT_LCL_ADDR) && op2->IsCnsIntOrI())
|
|
{
|
|
GenTreeLclVarCommon* lclAddrNode = op1->AsLclVarCommon();
|
|
GenTreeIntCon* offsetNode = op2->AsIntCon();
|
|
if (FitsIn<uint16_t>(offsetNode->IconValue()))
|
|
{
|
|
unsigned offset = lclAddrNode->GetLclOffs() + static_cast<uint16_t>(offsetNode->IconValue());
|
|
|
|
// Note: the emitter does not expect out-of-bounds access for LCL_FLD_ADDR.
|
|
if (FitsIn<uint16_t>(offset) && (offset < lvaLclExactSize(lclAddrNode->GetLclNum())))
|
|
{
|
|
lclAddrNode->SetOper(GT_LCL_ADDR);
|
|
lclAddrNode->AsLclFld()->SetLclOffs(offset);
|
|
assert(lvaGetDesc(lclAddrNode)->lvDoNotEnregister);
|
|
|
|
lclAddrNode->SetVNsFromNode(add);
|
|
|
|
DEBUG_DESTROY_NODE(offsetNode);
|
|
DEBUG_DESTROY_NODE(add);
|
|
|
|
return lclAddrNode;
|
|
}
|
|
}
|
|
}
|
|
|
|
// - a + b = > b - a
|
|
// ADD((NEG(a), b) => SUB(b, a)
|
|
|
|
// Do not do this if "op2" is constant for canonicalization purposes.
|
|
if (op1->OperIs(GT_NEG) && !op2->OperIs(GT_NEG) && !op2->IsIntegralConst() && gtCanSwapOrder(op1, op2))
|
|
{
|
|
add->SetOper(GT_SUB);
|
|
add->gtOp1 = op2;
|
|
add->gtOp2 = op1->AsOp()->gtGetOp1();
|
|
|
|
DEBUG_DESTROY_NODE(op1);
|
|
|
|
return add;
|
|
}
|
|
|
|
// a + -b = > a - b
|
|
// ADD(a, (NEG(b)) => SUB(a, b)
|
|
if (!op1->OperIs(GT_NEG) && op2->OperIs(GT_NEG))
|
|
{
|
|
add->SetOper(GT_SUB);
|
|
add->gtOp2 = op2->AsOp()->gtGetOp1();
|
|
|
|
DEBUG_DESTROY_NODE(op2);
|
|
|
|
return add;
|
|
}
|
|
|
|
// Fold (~x + 1) to -x.
|
|
if (op1->OperIs(GT_NOT) && op2->IsIntegralConst(1))
|
|
{
|
|
op1->SetOper(GT_NEG);
|
|
op1->SetVNsFromNode(add);
|
|
DEBUG_DESTROY_NODE(op2);
|
|
DEBUG_DESTROY_NODE(add);
|
|
return op1;
|
|
}
|
|
}
|
|
|
|
return nullptr;
|
|
}
|
|
|
|
//------------------------------------------------------------------------
|
|
// fgOptimizeMultiply: optimizes multiplication.
|
|
//
|
|
// Arguments:
|
|
// mul - the unchecked TYP_I_IMPL/TYP_INT GT_MUL tree to optimize.
|
|
//
|
|
// Return Value:
|
|
// The optimized tree, that can have any shape, in case any transformations
|
|
// were performed. Otherwise, "nullptr", guaranteeing no state change.
|
|
//
|
|
GenTree* Compiler::fgOptimizeMultiply(GenTreeOp* mul)
|
|
{
|
|
assert(mul->OperIs(GT_MUL));
|
|
assert(varTypeIsIntOrI(mul) || varTypeIsFloating(mul));
|
|
assert(!mul->gtOverflow());
|
|
assert(!optValnumCSE_phase);
|
|
|
|
GenTree* op1 = mul->gtGetOp1();
|
|
GenTree* op2 = mul->gtGetOp2();
|
|
|
|
assert(mul->TypeGet() == genActualType(op1));
|
|
assert(mul->TypeGet() == genActualType(op2));
|
|
|
|
if (opts.OptimizationEnabled() && op2->IsCnsFltOrDbl())
|
|
{
|
|
double multiplierValue = op2->AsDblCon()->DconValue();
|
|
|
|
if (multiplierValue == 1.0)
|
|
{
|
|
// Fold "x * 1.0" to "x".
|
|
DEBUG_DESTROY_NODE(op2);
|
|
DEBUG_DESTROY_NODE(mul);
|
|
|
|
return op1;
|
|
}
|
|
|
|
// Fold "x * 2.0" to "x + x".
|
|
// If op1 is not a local we will have to introduce a temporary via GT_COMMA.
|
|
// Unfortunately, it's not optHoistLoopCode-friendly (yet), so we'll only do
|
|
// this for locals / after hoisting has run (when rationalization remorphs
|
|
// math INTRINSICSs into calls...).
|
|
if ((multiplierValue == 2.0) && (op1->IsLocal() || (fgOrder == FGOrderLinear)))
|
|
{
|
|
op2 = fgMakeMultiUse(&op1);
|
|
GenTree* add = gtNewOperNode(GT_ADD, mul->TypeGet(), op1, op2);
|
|
INDEBUG(add->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED);
|
|
|
|
return add;
|
|
}
|
|
}
|
|
|
|
if (op2->IsIntegralConst())
|
|
{
|
|
// We should not get here for 64-bit multiplications on 32-bit.
|
|
assert(op2->IsCnsIntOrI());
|
|
|
|
// MUL(NEG(a), C) => MUL(a, NEG(C))
|
|
if (opts.OptimizationEnabled() && op1->OperIs(GT_NEG) && !op2->IsIconHandle())
|
|
{
|
|
mul->gtOp1 = op1->AsUnOp()->gtGetOp1();
|
|
op2->AsIntCon()->gtIconVal = -op2->AsIntCon()->gtIconVal;
|
|
fgUpdateConstTreeValueNumber(op2);
|
|
DEBUG_DESTROY_NODE(op1);
|
|
|
|
op1 = mul->gtOp1;
|
|
}
|
|
|
|
ssize_t mult = op2->AsIntConCommon()->IconValue();
|
|
|
|
if (mult == 0)
|
|
{
|
|
// We may be able to throw away op1 (unless it has side-effects)
|
|
|
|
if ((op1->gtFlags & GTF_SIDE_EFFECT) == 0)
|
|
{
|
|
DEBUG_DESTROY_NODE(op1);
|
|
DEBUG_DESTROY_NODE(mul);
|
|
|
|
return op2; // Just return the "0" node
|
|
}
|
|
|
|
// We need to keep op1 for the side-effects. Hang it off a GT_COMMA node.
|
|
mul->ChangeOper(GT_COMMA, GenTree::PRESERVE_VN);
|
|
return mul;
|
|
}
|
|
|
|
#ifdef TARGET_XARCH
|
|
// Should we try to replace integer multiplication with lea/add/shift sequences?
|
|
bool mulShiftOpt = compCodeOpt() != SMALL_CODE;
|
|
#else // !TARGET_XARCH
|
|
bool mulShiftOpt = false;
|
|
#endif // !TARGET_XARCH
|
|
|
|
size_t abs_mult = (mult >= 0) ? mult : -mult;
|
|
size_t lowestBit = genFindLowestBit(abs_mult);
|
|
bool changeToShift = false;
|
|
|
|
// is it a power of two? (positive or negative)
|
|
if (abs_mult == lowestBit)
|
|
{
|
|
// if negative negate (min-int does not need negation)
|
|
if (mult < 0 && mult != SSIZE_T_MIN)
|
|
{
|
|
op1 = gtNewOperNode(GT_NEG, genActualType(op1), op1);
|
|
mul->gtOp1 = op1;
|
|
fgMorphTreeDone(op1);
|
|
}
|
|
|
|
if (abs_mult == 1)
|
|
{
|
|
DEBUG_DESTROY_NODE(op2);
|
|
DEBUG_DESTROY_NODE(mul);
|
|
return op1;
|
|
}
|
|
|
|
// Change the multiplication into a shift by log2(val) bits.
|
|
op2->AsIntConCommon()->SetIconValue(genLog2(abs_mult));
|
|
changeToShift = true;
|
|
}
|
|
else if (mulShiftOpt && (lowestBit > 1) && jitIsScaleIndexMul(lowestBit))
|
|
{
|
|
int shift = genLog2(lowestBit);
|
|
ssize_t factor = abs_mult >> shift;
|
|
|
|
if (factor == 3 || factor == 5 || factor == 9)
|
|
{
|
|
// if negative negate (min-int does not need negation)
|
|
if (mult < 0 && mult != SSIZE_T_MIN)
|
|
{
|
|
op1 = gtNewOperNode(GT_NEG, genActualType(op1), op1);
|
|
mul->gtOp1 = op1;
|
|
fgMorphTreeDone(op1);
|
|
}
|
|
|
|
// change the multiplication into a smaller multiplication (by 3, 5 or 9) and a shift
|
|
op1 = gtNewOperNode(GT_MUL, mul->TypeGet(), op1, gtNewIconNode(factor, mul->TypeGet()));
|
|
mul->gtOp1 = op1;
|
|
fgMorphTreeDone(op1);
|
|
|
|
op2->AsIntConCommon()->SetIconValue(shift);
|
|
changeToShift = true;
|
|
}
|
|
}
|
|
|
|
if (changeToShift)
|
|
{
|
|
fgUpdateConstTreeValueNumber(op2);
|
|
mul->ChangeOper(GT_LSH, GenTree::PRESERVE_VN);
|
|
|
|
return mul;
|
|
}
|
|
}
|
|
|
|
return nullptr;
|
|
}
|
|
|
|
//------------------------------------------------------------------------
|
|
// fgOptimizeBitwiseAnd: optimizes the "and" operation.
|
|
//
|
|
// Arguments:
|
|
// andOp - the GT_AND tree to optimize.
|
|
//
|
|
// Return Value:
|
|
// The optimized tree, currently always a relop, in case any transformations
|
|
// were performed. Otherwise, "nullptr", guaranteeing no state change.
|
|
//
|
|
GenTree* Compiler::fgOptimizeBitwiseAnd(GenTreeOp* andOp)
|
|
{
|
|
assert(andOp->OperIs(GT_AND));
|
|
assert(!optValnumCSE_phase);
|
|
|
|
GenTree* op1 = andOp->gtGetOp1();
|
|
GenTree* op2 = andOp->gtGetOp2();
|
|
|
|
// Fold "cmp & 1" to just "cmp".
|
|
if (andOp->TypeIs(TYP_INT) && op1->OperIsCompare() && op2->IsIntegralConst(1))
|
|
{
|
|
DEBUG_DESTROY_NODE(op2);
|
|
DEBUG_DESTROY_NODE(andOp);
|
|
|
|
return op1;
|
|
}
|
|
|
|
return nullptr;
|
|
}
|
|
|
|
//------------------------------------------------------------------------
|
|
// fgOptimizeRelationalComparisonWithCasts: Recognizes comparisons against
|
|
// various cast operands and tries to remove them. E.g.:
|
|
//
|
|
// * GE int
|
|
// +--* CAST long <- ulong <- uint
|
|
// | \--* X int
|
|
// \--* CNS_INT long
|
|
//
|
|
// to:
|
|
//
|
|
// * GE_un int
|
|
// +--* X int
|
|
// \--* CNS_INT int
|
|
//
|
|
// same for:
|
|
//
|
|
// * GE int
|
|
// +--* CAST long <- ulong <- uint
|
|
// | \--* X int
|
|
// \--* CAST long <- [u]long <- int
|
|
// \--* ARR_LEN int
|
|
//
|
|
// These patterns quite often show up along with index checks
|
|
//
|
|
// Arguments:
|
|
// cmp - the GT_LE/GT_LT/GT_GE/GT_GT tree to morph.
|
|
//
|
|
// Return Value:
|
|
// Returns the same tree where operands might have narrower types
|
|
//
|
|
// Notes:
|
|
// TODO-Casts: consider unifying this function with "optNarrowTree"
|
|
//
|
|
GenTree* Compiler::fgOptimizeRelationalComparisonWithCasts(GenTreeOp* cmp)
|
|
{
|
|
assert(cmp->OperIs(GT_LE, GT_LT, GT_GE, GT_GT));
|
|
assert(!optValnumCSE_phase);
|
|
|
|
GenTree* op1 = cmp->gtGetOp1();
|
|
GenTree* op2 = cmp->gtGetOp2();
|
|
|
|
// Caller is expected to call this function only if we have at least one CAST node
|
|
assert(op1->OperIs(GT_CAST) || op2->OperIs(GT_CAST));
|
|
|
|
assert(genActualType(op1) == genActualType(op2));
|
|
|
|
if (!op1->TypeIs(TYP_LONG))
|
|
{
|
|
return cmp;
|
|
}
|
|
|
|
auto supportedOp = [](GenTree* op) {
|
|
if (op->IsIntegralConst())
|
|
{
|
|
return true;
|
|
}
|
|
|
|
if (op->OperIs(GT_CAST))
|
|
{
|
|
if (op->gtOverflow())
|
|
{
|
|
return false;
|
|
}
|
|
|
|
if (genActualType(op->CastFromType()) != TYP_INT)
|
|
{
|
|
return false;
|
|
}
|
|
|
|
assert(varTypeIsLong(op->CastToType()));
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
};
|
|
|
|
if (!supportedOp(op1) || !supportedOp(op2))
|
|
{
|
|
return cmp;
|
|
}
|
|
|
|
auto isUpperZero = [this](GenTree* op) {
|
|
if (op->IsIntegralConst())
|
|
{
|
|
int64_t lng = op->AsIntConCommon()->LngValue();
|
|
return (lng >= 0) && (lng <= UINT_MAX);
|
|
}
|
|
|
|
assert(op->OperIs(GT_CAST));
|
|
if (op->AsCast()->IsUnsigned())
|
|
{
|
|
return true;
|
|
}
|
|
|
|
return IntegralRange::ForNode(op->AsCast()->CastOp(), this).IsNonNegative();
|
|
};
|
|
|
|
// If both operands have zero as the upper half then any signed/unsigned
|
|
// 64-bit comparison is equivalent to the same unsigned 32-bit comparison.
|
|
if (isUpperZero(op1) && isUpperZero(op2))
|
|
{
|
|
JITDUMP("Removing redundant cast(s) for:\n")
|
|
DISPTREE(cmp)
|
|
JITDUMP("\n\nto:\n\n")
|
|
|
|
cmp->SetUnsigned();
|
|
|
|
auto transform = [this](GenTree** use) {
|
|
if ((*use)->IsIntegralConst())
|
|
{
|
|
(*use)->BashToConst(static_cast<int>((*use)->AsIntConCommon()->LngValue()));
|
|
fgUpdateConstTreeValueNumber(*use);
|
|
}
|
|
else
|
|
{
|
|
assert((*use)->OperIs(GT_CAST));
|
|
GenTreeCast* cast = (*use)->AsCast();
|
|
*use = cast->CastOp();
|
|
DEBUG_DESTROY_NODE(cast);
|
|
}
|
|
};
|
|
|
|
transform(&cmp->gtOp1);
|
|
transform(&cmp->gtOp2);
|
|
|
|
assert((genActualType(cmp->gtOp1) == TYP_INT) && (genActualType(cmp->gtOp2) == TYP_INT));
|
|
|
|
DISPTREE(cmp)
|
|
JITDUMP("\n")
|
|
}
|
|
return cmp;
|
|
}
|
|
|
|
// fgOptimizeBitwiseXor: optimizes the "xor" operation.
|
|
//
|
|
// Arguments:
|
|
// xorOp - the GT_XOR tree to optimize.
|
|
//
|
|
// Return Value:
|
|
// The optimized tree, currently always a local variable, in case any transformations
|
|
// were performed. Otherwise, "nullptr", guaranteeing no state change.
|
|
//
|
|
GenTree* Compiler::fgOptimizeBitwiseXor(GenTreeOp* xorOp)
|
|
{
|
|
assert(xorOp->OperIs(GT_XOR));
|
|
assert(!optValnumCSE_phase);
|
|
|
|
GenTree* op1 = xorOp->gtGetOp1();
|
|
GenTree* op2 = xorOp->gtGetOp2();
|
|
|
|
if (op2->IsIntegralConst(0))
|
|
{
|
|
/* "x ^ 0" is "x" */
|
|
DEBUG_DESTROY_NODE(xorOp, op2);
|
|
return op1;
|
|
}
|
|
else if (op2->IsIntegralConst(-1))
|
|
{
|
|
/* "x ^ -1" is "~x" */
|
|
xorOp->ChangeOper(GT_NOT);
|
|
xorOp->gtOp2 = nullptr;
|
|
DEBUG_DESTROY_NODE(op2);
|
|
|
|
return xorOp;
|
|
}
|
|
else if (op2->IsIntegralConst(1) && op1->OperIsCompare())
|
|
{
|
|
/* "binaryVal ^ 1" is "!binaryVal" */
|
|
gtReverseCond(op1);
|
|
DEBUG_DESTROY_NODE(op2);
|
|
DEBUG_DESTROY_NODE(xorOp);
|
|
|
|
return op1;
|
|
}
|
|
else if (varTypeIsFloating(xorOp) && op2->IsFloatNegativeZero())
|
|
{
|
|
// "x ^ -0.0" is "-x"
|
|
|
|
xorOp->ChangeOper(GT_NEG);
|
|
xorOp->gtOp2 = nullptr;
|
|
|
|
DEBUG_DESTROY_NODE(op2);
|
|
return xorOp;
|
|
}
|
|
|
|
return nullptr;
|
|
}
|
|
|
|
//------------------------------------------------------------------------
|
|
// fgPropagateCommaThrow: propagate a "comma throw" up the tree.
|
|
//
|
|
// "Comma throws" in the compiler represent the canonical form of an always
|
|
// throwing expression. They have the shape of COMMA(THROW, ZERO), to satisfy
|
|
// the semantic that the original expression produced some value and are
|
|
// generated by "gtFoldExprConst" when it encounters checked arithmetic that
|
|
// will determinably overflow.
|
|
//
|
|
// In the global morphing phase, "comma throws" are "propagated" up the tree,
|
|
// in post-order, to eliminate nodes that will never execute. This method,
|
|
// called by "fgMorphSmpOp", encapsulates this optimization.
|
|
//
|
|
// Arguments:
|
|
// parent - the node currently being processed.
|
|
// commaThrow - the comma throw in question, "parent"'s operand.
|
|
// precedingSideEffects - side effects of nodes preceding "comma" in execution order.
|
|
//
|
|
// Return Value:
|
|
// If "parent" is to be replaced with a comma throw, i. e. the propagation was successful,
|
|
// the new "parent", otherwise "nullptr", guaranteeing no state change, with one exception:
|
|
// the "fgRemoveRestOfBlock" "global" may be set. Note that the new returned tree does not
|
|
// have to be a "comma throw", it can be "bare" throw call if the "parent" node did not
|
|
// produce any value.
|
|
//
|
|
// Notes:
|
|
// "Comma throws" are very rare.
|
|
//
|
|
GenTree* Compiler::fgPropagateCommaThrow(GenTree* parent, GenTreeOp* commaThrow, GenTreeFlags precedingSideEffects)
|
|
{
|
|
// Comma throw propagation does not preserve VNs, and deletes nodes.
|
|
assert(fgGlobalMorph);
|
|
assert(fgIsCommaThrow(commaThrow));
|
|
|
|
if ((commaThrow->gtFlags & GTF_COLON_COND) == 0)
|
|
{
|
|
fgRemoveRestOfBlock = true;
|
|
}
|
|
|
|
if ((precedingSideEffects & GTF_ALL_EFFECT) == 0)
|
|
{
|
|
if (parent->TypeIs(TYP_VOID))
|
|
{
|
|
// Return the throw node as the new tree.
|
|
return commaThrow->gtGetOp1();
|
|
}
|
|
|
|
// Fix up the COMMA's type if needed.
|
|
if (genActualType(parent) != genActualType(commaThrow))
|
|
{
|
|
commaThrow->gtGetOp2()->BashToZeroConst(genActualType(parent));
|
|
commaThrow->ChangeType(genActualType(parent));
|
|
}
|
|
|
|
return commaThrow;
|
|
}
|
|
|
|
return nullptr;
|
|
}
|
|
|
|
//----------------------------------------------------------------------------------------------
|
|
// fgMorphRetInd: Try to get rid of extra local indirections in a return tree.
|
|
//
|
|
// Arguments:
|
|
// node - The return node that uses a local field.
|
|
//
|
|
// Return Value:
|
|
// the original return operand if there was no optimization, or an optimized new return operand.
|
|
//
|
|
GenTree* Compiler::fgMorphRetInd(GenTreeOp* ret)
|
|
{
|
|
assert(ret->OperIs(GT_RETURN, GT_SWIFT_ERROR_RET));
|
|
assert(ret->GetReturnValue()->OperIs(GT_LCL_FLD));
|
|
GenTreeLclFld* lclFld = ret->GetReturnValue()->AsLclFld();
|
|
unsigned lclNum = lclFld->GetLclNum();
|
|
|
|
if (fgGlobalMorph && varTypeIsStruct(lclFld) && !lvaIsImplicitByRefLocal(lclNum))
|
|
{
|
|
LclVarDsc* varDsc = lvaGetDesc(lclNum);
|
|
unsigned indSize = lclFld->GetSize();
|
|
unsigned lclVarSize = lvaLclExactSize(lclNum);
|
|
|
|
// TODO: change conditions in `canFold` to `indSize <= lclVarSize`, but currently do not support `BITCAST
|
|
// int<-SIMD16` etc. Note this will also require the offset of the field to be zero.
|
|
assert(indSize <= lclVarSize);
|
|
|
|
#if defined(TARGET_64BIT)
|
|
bool canFold = (indSize == lclVarSize);
|
|
#else // !TARGET_64BIT
|
|
// TODO: improve 32 bit targets handling for LONG returns if necessary, nowadays we do not support `BITCAST
|
|
// long<->double` there.
|
|
bool canFold = (indSize == lclVarSize) && (lclVarSize <= REGSIZE_BYTES);
|
|
#endif
|
|
|
|
// TODO: support `genReturnBB != nullptr`, it requires #11413 to avoid `Incompatible types for
|
|
// gtNewTempStore`.
|
|
if (canFold && (genReturnBB == nullptr))
|
|
{
|
|
// Fold even if types do not match, lowering will handle it. This allows the local
|
|
// to remain DNER-free and be enregistered.
|
|
assert(lclFld->GetLclOffs() == 0);
|
|
lclFld->ChangeType(varDsc->TypeGet());
|
|
lclFld->SetOper(GT_LCL_VAR);
|
|
}
|
|
else if (!varDsc->lvDoNotEnregister)
|
|
{
|
|
lvaSetVarDoNotEnregister(lclNum DEBUGARG(DoNotEnregisterReason::BlockOpRet));
|
|
}
|
|
}
|
|
|
|
return lclFld;
|
|
}
|
|
|
|
#ifdef _PREFAST_
|
|
#pragma warning(pop)
|
|
#endif
|
|
//-------------------------------------------------------------
|
|
// fgMorphSmpOpOptional: optional post-order morping of some SMP trees
|
|
//
|
|
// Arguments:
|
|
// tree - tree to morph
|
|
// optAssertionPropDone - [out, optional] set true if local assertions were
|
|
// killed/genned by the optional morphing
|
|
//
|
|
// Returns:
|
|
// Tree, possibly updated
|
|
//
|
|
GenTree* Compiler::fgMorphSmpOpOptional(GenTreeOp* tree, bool* optAssertionPropDone)
|
|
{
|
|
genTreeOps oper = tree->gtOper;
|
|
GenTree* op1 = tree->gtOp1;
|
|
GenTree* op2 = tree->gtOp2;
|
|
var_types typ = tree->TypeGet();
|
|
|
|
if (fgGlobalMorph && GenTree::OperIsCommutative(oper))
|
|
{
|
|
/* Swap the operands so that the more expensive one is 'op1' */
|
|
|
|
if (tree->gtFlags & GTF_REVERSE_OPS)
|
|
{
|
|
tree->gtOp1 = op2;
|
|
tree->gtOp2 = op1;
|
|
|
|
op2 = op1;
|
|
op1 = tree->gtOp1;
|
|
|
|
tree->gtFlags &= ~GTF_REVERSE_OPS;
|
|
}
|
|
|
|
if (oper == op2->gtOper)
|
|
{
|
|
/* Reorder nested operators at the same precedence level to be
|
|
left-recursive. For example, change "(a+(b+c))" to the
|
|
equivalent expression "((a+b)+c)".
|
|
*/
|
|
|
|
/* Things are handled differently for floating-point operators */
|
|
|
|
if (!varTypeIsFloating(tree->TypeGet()))
|
|
{
|
|
fgMoveOpsLeft(tree);
|
|
op1 = tree->gtOp1;
|
|
op2 = tree->gtOp2;
|
|
}
|
|
}
|
|
}
|
|
|
|
#if REARRANGE_ADDS
|
|
|
|
/* Change "((x+icon)+y)" to "((x+y)+icon)"
|
|
Don't reorder floating-point operations */
|
|
|
|
if (fgGlobalMorph && (oper == GT_ADD) && !tree->gtOverflow() && (op1->gtOper == GT_ADD) && !op1->gtOverflow() &&
|
|
varTypeIsIntegralOrI(typ))
|
|
{
|
|
GenTree* ad1 = op1->AsOp()->gtOp1;
|
|
GenTree* ad2 = op1->AsOp()->gtOp2;
|
|
|
|
if (!op2->OperIsConst() && ad2->OperIsConst())
|
|
{
|
|
// This takes
|
|
// + (tree)
|
|
// / \.
|
|
// / \.
|
|
// / \.
|
|
// + (op1) op2
|
|
// / \.
|
|
// / \.
|
|
// ad1 ad2
|
|
//
|
|
// and it swaps ad2 and op2.
|
|
|
|
// Don't create a byref pointer that may point outside of the ref object.
|
|
// If a GC happens, the byref won't get updated. This can happen if one
|
|
// of the int components is negative. It also requires the address generation
|
|
// be in a fully-interruptible code region.
|
|
if (!varTypeIsGC(ad1->TypeGet()) && !varTypeIsGC(op2->TypeGet()))
|
|
{
|
|
tree->gtOp2 = ad2;
|
|
|
|
op1->AsOp()->gtOp2 = op2;
|
|
op1->gtFlags |= op2->gtFlags & GTF_ALL_EFFECT;
|
|
|
|
op2 = tree->gtOp2;
|
|
}
|
|
}
|
|
}
|
|
|
|
#endif
|
|
|
|
/*-------------------------------------------------------------------------
|
|
* Perform optional oper-specific postorder morphing
|
|
*/
|
|
|
|
switch (oper)
|
|
{
|
|
case GT_STOREIND:
|
|
case GT_STORE_BLK:
|
|
case GT_STORE_LCL_VAR:
|
|
case GT_STORE_LCL_FLD:
|
|
// Make sure we're allowed to do this.
|
|
if (optValnumCSE_phase)
|
|
{
|
|
// It is not safe to reorder/delete CSE's
|
|
break;
|
|
}
|
|
|
|
if (varTypeIsStruct(typ) && !tree->IsPhiDefn())
|
|
{
|
|
// Block ops handle assertion kill/gen specially.
|
|
// See PrepareDst and PropagateAssertions
|
|
//
|
|
if (optAssertionPropDone != nullptr)
|
|
{
|
|
*optAssertionPropDone = true;
|
|
}
|
|
|
|
if (tree->OperIsCopyBlkOp())
|
|
{
|
|
return fgMorphCopyBlock(tree);
|
|
}
|
|
else
|
|
{
|
|
return fgMorphInitBlock(tree);
|
|
}
|
|
}
|
|
|
|
/* Special case: a cast that can be thrown away */
|
|
|
|
// TODO-Cleanup: fgMorphSmp does a similar optimization. However, it removes only
|
|
// one cast and sometimes there is another one after it that gets removed by this
|
|
// code. fgMorphSmp should be improved to remove all redundant casts so this code
|
|
// can be removed.
|
|
if (tree->OperIs(GT_STOREIND))
|
|
{
|
|
if (typ == TYP_LONG)
|
|
{
|
|
break;
|
|
}
|
|
|
|
if (op2->gtFlags & GTF_ASG)
|
|
{
|
|
break;
|
|
}
|
|
|
|
if (op2->gtFlags & GTF_CALL)
|
|
{
|
|
break;
|
|
}
|
|
|
|
if (op2->gtOper == GT_CAST && !op2->gtOverflow())
|
|
{
|
|
var_types srct;
|
|
var_types cast;
|
|
var_types dstt;
|
|
|
|
srct = op2->AsCast()->CastOp()->TypeGet();
|
|
cast = (var_types)op2->CastToType();
|
|
dstt = tree->TypeGet();
|
|
|
|
/* Make sure these are all ints and precision is not lost */
|
|
|
|
if (genTypeSize(cast) >= genTypeSize(dstt) && dstt <= TYP_INT && srct <= TYP_INT)
|
|
{
|
|
op2 = tree->gtOp2 = op2->AsCast()->CastOp();
|
|
}
|
|
}
|
|
}
|
|
break;
|
|
|
|
case GT_MUL:
|
|
|
|
/* Check for the case "(val + icon) * icon" */
|
|
|
|
if (op2->gtOper == GT_CNS_INT && op1->gtOper == GT_ADD)
|
|
{
|
|
GenTree* add = op1->AsOp()->gtOp2;
|
|
|
|
if (add->IsCnsIntOrI() && (op2->GetScaleIndexMul() != 0))
|
|
{
|
|
if (tree->gtOverflow() || op1->gtOverflow())
|
|
{
|
|
break;
|
|
}
|
|
|
|
ssize_t imul = op2->AsIntCon()->gtIconVal;
|
|
ssize_t iadd = add->AsIntCon()->gtIconVal;
|
|
|
|
/* Change '(val + iadd) * imul' -> '(val * imul) + (iadd * imul)' */
|
|
|
|
oper = GT_ADD;
|
|
tree->ChangeOper(oper);
|
|
|
|
op2->AsIntCon()->SetValueTruncating(iadd * imul);
|
|
|
|
op1->ChangeOper(GT_MUL);
|
|
|
|
add->AsIntCon()->SetIconValue(imul);
|
|
}
|
|
}
|
|
|
|
break;
|
|
|
|
case GT_DIV:
|
|
|
|
/* For "val / 1", just return "val" */
|
|
|
|
if (op2->IsIntegralConst(1))
|
|
{
|
|
DEBUG_DESTROY_NODE(tree);
|
|
return op1;
|
|
}
|
|
break;
|
|
|
|
case GT_UDIV:
|
|
case GT_UMOD:
|
|
tree->CheckDivideByConstOptimized(this);
|
|
break;
|
|
|
|
case GT_LSH:
|
|
|
|
/* Check for the case "(val + icon) << icon" */
|
|
|
|
if (!optValnumCSE_phase && op2->IsCnsIntOrI() && op1->gtOper == GT_ADD && !op1->gtOverflow())
|
|
{
|
|
GenTree* cns = op1->AsOp()->gtOp2;
|
|
|
|
if (cns->IsCnsIntOrI() && (op2->GetScaleIndexShf() != 0))
|
|
{
|
|
ssize_t ishf = op2->AsIntConCommon()->IconValue();
|
|
ssize_t iadd = cns->AsIntConCommon()->IconValue();
|
|
|
|
// printf("Changing '(val+icon1)<<icon2' into '(val<<icon2+icon1<<icon2)'\n");
|
|
|
|
/* Change "(val + iadd) << ishf" into "(val<<ishf + iadd<<ishf)" */
|
|
|
|
tree->ChangeOper(GT_ADD);
|
|
|
|
// we are reusing the shift amount node here, but the type we want is that of the shift result
|
|
op2->gtType = op1->gtType;
|
|
op2->AsIntConCommon()->SetValueTruncating(iadd << ishf);
|
|
op1->ChangeOper(GT_LSH);
|
|
cns->AsIntConCommon()->SetIconValue(ishf);
|
|
}
|
|
}
|
|
|
|
break;
|
|
|
|
case GT_INIT_VAL:
|
|
// Initialization values for initBlk have special semantics - their lower
|
|
// byte is used to fill the struct. However, we allow 0 as a "bare" value,
|
|
// which enables them to get a VNForZero, and be propagated.
|
|
if (op1->IsIntegralConst(0))
|
|
{
|
|
return op1;
|
|
}
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
return tree;
|
|
}
|
|
|
|
#if defined(FEATURE_HW_INTRINSICS)
|
|
//------------------------------------------------------------------------
|
|
// fgMorphHWIntrinsic: Morph a GenTreeHWIntrinsic tree.
|
|
//
|
|
// Arguments:
|
|
// tree - The tree to morph
|
|
//
|
|
// Return Value:
|
|
// The fully morphed tree.
|
|
//
|
|
GenTree* Compiler::fgMorphHWIntrinsic(GenTreeHWIntrinsic* tree)
|
|
{
|
|
bool allArgsAreConst = true;
|
|
bool canBenefitFromConstantProp = false;
|
|
bool hasImmediateOperand = false;
|
|
|
|
// Opportunistically, avoid unexpected CSE for hwintrinsics with certain const arguments
|
|
NamedIntrinsic intrinsicId = tree->GetHWIntrinsicId();
|
|
|
|
if (HWIntrinsicInfo::CanBenefitFromConstantProp(intrinsicId))
|
|
{
|
|
canBenefitFromConstantProp = true;
|
|
}
|
|
|
|
if (HWIntrinsicInfo::HasImmediateOperand(intrinsicId))
|
|
{
|
|
hasImmediateOperand = true;
|
|
}
|
|
|
|
for (GenTree** use : tree->UseEdges())
|
|
{
|
|
*use = fgMorphTree(*use);
|
|
GenTree* operand = *use;
|
|
|
|
if (operand->OperIsConst())
|
|
{
|
|
if (hasImmediateOperand && operand->IsCnsIntOrI())
|
|
{
|
|
operand->SetDoNotCSE();
|
|
}
|
|
else if (canBenefitFromConstantProp && operand->IsVectorConst())
|
|
{
|
|
if (tree->ShouldConstantProp(operand, operand->AsVecCon()))
|
|
{
|
|
operand->SetDoNotCSE();
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
allArgsAreConst = false;
|
|
}
|
|
|
|
// Promoted structs after morph must be in one of two states:
|
|
// a) Fully eliminated from the IR (independent promotion) OR only be
|
|
// used by "special" nodes (e. g. multi-reg stores).
|
|
// b) Marked as do-not-enregister (dependent promotion).
|
|
//
|
|
// So here we preserve this invariant and mark any promoted structs as do-not-enreg.
|
|
//
|
|
if (operand->OperIs(GT_LCL_VAR))
|
|
{
|
|
GenTreeLclVar* lclVar = operand->AsLclVar();
|
|
|
|
if (lvaGetDesc(lclVar)->lvPromoted)
|
|
{
|
|
lvaSetVarDoNotEnregister(lclVar->GetLclNum() DEBUGARG(DoNotEnregisterReason::SimdUserForcesDep));
|
|
}
|
|
}
|
|
}
|
|
|
|
gtUpdateNodeOperSideEffects(tree);
|
|
|
|
for (GenTree* operand : tree->Operands())
|
|
{
|
|
tree->AddAllEffectsFlags(operand);
|
|
}
|
|
|
|
if (opts.OptimizationEnabled())
|
|
{
|
|
if (tree->isCommutativeHWIntrinsic())
|
|
{
|
|
assert(tree->GetOperandCount() == 2);
|
|
GenTree*& op1 = tree->Op(1);
|
|
|
|
if (op1->IsVectorConst())
|
|
{
|
|
// Move constant vectors from op1 to op2 for commutative operations
|
|
std::swap(op1, tree->Op(2));
|
|
}
|
|
}
|
|
|
|
// Try to fold it, maybe we get lucky,
|
|
GenTree* foldedTree = gtFoldExpr(tree);
|
|
|
|
if (foldedTree != tree)
|
|
{
|
|
assert(!fgIsCommaThrow(foldedTree));
|
|
INDEBUG(foldedTree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED);
|
|
return foldedTree;
|
|
}
|
|
else if (!foldedTree->OperIsHWIntrinsic())
|
|
{
|
|
INDEBUG(foldedTree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED);
|
|
return foldedTree;
|
|
}
|
|
|
|
if (allArgsAreConst && tree->IsVectorCreate())
|
|
{
|
|
// Avoid unexpected CSE for constant arguments for Vector_.Create
|
|
// but only if all arguments are constants.
|
|
|
|
for (GenTree* arg : tree->Operands())
|
|
{
|
|
arg->SetDoNotCSE();
|
|
}
|
|
}
|
|
|
|
return fgOptimizeHWIntrinsic(tree->AsHWIntrinsic());
|
|
}
|
|
|
|
return tree;
|
|
}
|
|
#endif // FEATURE_HW_INTRINSICS
|
|
|
|
//------------------------------------------------------------------------
|
|
// fgMorphModToZero: Transform 'a % 1' into the equivalent '0'.
|
|
//
|
|
// Arguments:
|
|
// tree - The GT_MOD/GT_UMOD tree to morph
|
|
//
|
|
// Returns:
|
|
// The morphed tree, will be a GT_COMMA or a zero constant node.
|
|
// Can return null if the transformation did not happen.
|
|
//
|
|
GenTree* Compiler::fgMorphModToZero(GenTreeOp* tree)
|
|
{
|
|
assert(tree->OperIs(GT_MOD, GT_UMOD));
|
|
assert(tree->gtOp2->IsIntegralConst(1));
|
|
|
|
if (opts.OptimizationDisabled())
|
|
return nullptr;
|
|
|
|
// Do not transform this if there are side effects and we are not in global morph.
|
|
// If we want to allow this, we need to update value numbers for the GT_COMMA.
|
|
if (!fgGlobalMorph && ((tree->gtGetOp1()->gtFlags & GTF_SIDE_EFFECT) != 0))
|
|
return nullptr;
|
|
|
|
JITDUMP("\nMorphing MOD/UMOD [%06u] to Zero\n", dspTreeID(tree));
|
|
|
|
GenTree* op1 = tree->gtGetOp1();
|
|
GenTree* op2 = tree->gtGetOp2();
|
|
|
|
op2->AsIntConCommon()->SetIntegralValue(0);
|
|
fgUpdateConstTreeValueNumber(op2);
|
|
|
|
GenTree* const zero = op2;
|
|
|
|
GenTree* op1SideEffects = nullptr;
|
|
gtExtractSideEffList(op1, &op1SideEffects, GTF_ALL_EFFECT);
|
|
if (op1SideEffects != nullptr)
|
|
{
|
|
GenTree* comma = gtNewOperNode(GT_COMMA, zero->TypeGet(), op1SideEffects, zero);
|
|
INDEBUG(comma->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED);
|
|
|
|
DEBUG_DESTROY_NODE(tree);
|
|
|
|
return comma;
|
|
}
|
|
else
|
|
{
|
|
INDEBUG(zero->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED);
|
|
|
|
DEBUG_DESTROY_NODE(tree->gtOp1);
|
|
DEBUG_DESTROY_NODE(tree);
|
|
|
|
return zero;
|
|
}
|
|
}
|
|
|
|
//------------------------------------------------------------------------
|
|
// fgMorphModToSubMulDiv: Transform a % b into the equivalent a - (a / b) * b
|
|
// (see ECMA III 3.55 and III.3.56).
|
|
//
|
|
// Arguments:
|
|
// tree - The GT_MOD/GT_UMOD tree to morph
|
|
//
|
|
// Returns:
|
|
// The morphed tree
|
|
//
|
|
// Notes:
|
|
// For ARM64 we don't have a remainder instruction so this transform is
|
|
// always done. For XARCH this transform is done if we know that magic
|
|
// division will be used, in that case this transform allows CSE to
|
|
// eliminate the redundant div from code like "x = a / 3; y = a % 3;".
|
|
//
|
|
// Before:
|
|
// * RETURN int
|
|
// \--* MOD int
|
|
// +--* MUL int
|
|
// | +--* LCL_VAR int V00 arg0
|
|
// | \--* LCL_VAR int V00 arg0
|
|
// \--* LCL_VAR int V01 arg1
|
|
// After:
|
|
// * RETURN int
|
|
// \--* COMMA int
|
|
// +--* STORE_LCL_VAR int V03 tmp1
|
|
// | \--* MUL int
|
|
// | +--* LCL_VAR int V00 arg0
|
|
// | \--* LCL_VAR int V00 arg0
|
|
// \--* SUB int
|
|
// +--* LCL_VAR int V03 tmp1
|
|
// \--* MUL int
|
|
// +--* DIV int
|
|
// | +--* LCL_VAR int V03 tmp1
|
|
// | \--* LCL_VAR int V01 arg1
|
|
// \--* LCL_VAR int V01 arg1
|
|
GenTree* Compiler::fgMorphModToSubMulDiv(GenTreeOp* tree)
|
|
{
|
|
JITDUMP("\nMorphing MOD/UMOD [%06u] to Sub/Mul/Div\n", dspTreeID(tree));
|
|
|
|
if (tree->OperGet() == GT_MOD)
|
|
{
|
|
tree->SetOper(GT_DIV);
|
|
}
|
|
else if (tree->OperGet() == GT_UMOD)
|
|
{
|
|
tree->SetOper(GT_UDIV);
|
|
}
|
|
else
|
|
{
|
|
noway_assert(!"Illegal gtOper in fgMorphModToSubMulDiv");
|
|
}
|
|
|
|
GenTreeOp* const div = tree;
|
|
|
|
GenTree* opA = div->gtGetOp1();
|
|
GenTree* opB = div->gtGetOp2();
|
|
if (div->IsReverseOp())
|
|
{
|
|
std::swap(opA, opB);
|
|
}
|
|
|
|
TempInfo tempInfos[2];
|
|
int tempInfoCount = 0;
|
|
|
|
// This transform runs in pre-morph so we cannot rely on GTF_GLOB_REF.
|
|
// Furthermore, this logic is somewhat complicated since the divisor and
|
|
// dividend are arbitrary nodes. For instance, if we spill the divisor and
|
|
// the dividend is a local, we need to spill the dividend too unless the
|
|
// divisor could not cause it to be reassigned.
|
|
// There is even more complexity due to needing to handle GTF_REVERSE_OPS.
|
|
//
|
|
// This could be slightly better via GTF_CALL and GTF_ASG checks on the
|
|
// divisor but the diffs of this were minor and the extra complexity seemed
|
|
// not worth it.
|
|
bool spillA;
|
|
bool spillB;
|
|
if (opB->IsInvariant() || opB->OperIsLocal())
|
|
{
|
|
spillB = false;
|
|
spillA = !opA->IsInvariant() && !opA->OperIsLocal();
|
|
}
|
|
else
|
|
{
|
|
spillB = true;
|
|
spillA = !opA->IsInvariant();
|
|
}
|
|
|
|
if (spillA)
|
|
{
|
|
tempInfos[tempInfoCount] = fgMakeTemp(opA);
|
|
opA = tempInfos[tempInfoCount].load;
|
|
tempInfoCount++;
|
|
}
|
|
|
|
if (spillB)
|
|
{
|
|
tempInfos[tempInfoCount] = fgMakeTemp(opB);
|
|
opB = tempInfos[tempInfoCount].load;
|
|
tempInfoCount++;
|
|
}
|
|
|
|
GenTree* dividend = div->IsReverseOp() ? opB : opA;
|
|
GenTree* divisor = div->IsReverseOp() ? opA : opB;
|
|
|
|
div->gtOp1 = gtCloneExpr(dividend);
|
|
div->gtOp2 = gtCloneExpr(divisor);
|
|
|
|
var_types type = div->gtType;
|
|
GenTree* const mul = gtNewOperNode(GT_MUL, type, div, divisor);
|
|
GenTree* const sub = gtNewOperNode(GT_SUB, type, dividend, mul);
|
|
|
|
GenTree* result = sub;
|
|
// We loop backwards as it is easier to create new commas
|
|
// within one another for their sequence order.
|
|
for (int i = tempInfoCount - 1; i >= 0; i--)
|
|
{
|
|
result = gtNewOperNode(GT_COMMA, type, tempInfos[i].store, result);
|
|
}
|
|
|
|
#ifdef DEBUG
|
|
result->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED;
|
|
#endif
|
|
|
|
optRecordSsaUses(result, compCurBB);
|
|
|
|
div->CheckDivideByConstOptimized(this);
|
|
|
|
return result;
|
|
}
|
|
|
|
//------------------------------------------------------------------------
|
|
// fgMorphUModToAndSub: Transform a % b into the equivalent a & (b - 1).
|
|
// '%' must be unsigned (GT_UMOD).
|
|
// 'a' and 'b' must be integers.
|
|
// 'b' must be a constant and a power of two.
|
|
//
|
|
// Arguments:
|
|
// tree - The GT_UMOD tree to morph
|
|
//
|
|
// Returns:
|
|
// The morphed tree
|
|
//
|
|
// Notes:
|
|
// This is more optimized than calling fgMorphModToSubMulDiv.
|
|
//
|
|
GenTree* Compiler::fgMorphUModToAndSub(GenTreeOp* tree)
|
|
{
|
|
JITDUMP("\nMorphing UMOD [%06u] to And/Sub\n", dspTreeID(tree));
|
|
|
|
assert(tree->OperIs(GT_UMOD));
|
|
assert(tree->gtOp2->IsIntegralConstUnsignedPow2());
|
|
|
|
const var_types type = tree->TypeGet();
|
|
|
|
const size_t cnsValue = (static_cast<size_t>(tree->gtOp2->AsIntConCommon()->IntegralValue())) - 1;
|
|
GenTree* const newTree = gtNewOperNode(GT_AND, type, tree->gtOp1, gtNewIconNode(cnsValue, type));
|
|
|
|
INDEBUG(newTree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED);
|
|
|
|
DEBUG_DESTROY_NODE(tree->gtOp2);
|
|
DEBUG_DESTROY_NODE(tree);
|
|
|
|
return newTree;
|
|
}
|
|
|
|
//------------------------------------------------------------------------------
|
|
// fgOperIsBitwiseRotationRoot : Check if the operation can be a root of a bitwise rotation tree.
|
|
//
|
|
//
|
|
// Arguments:
|
|
// oper - Operation to check
|
|
//
|
|
// Return Value:
|
|
// True if the operation can be a root of a bitwise rotation tree; false otherwise.
|
|
|
|
bool Compiler::fgOperIsBitwiseRotationRoot(genTreeOps oper)
|
|
{
|
|
return (oper == GT_OR) || (oper == GT_XOR);
|
|
}
|
|
|
|
//------------------------------------------------------------------------------
|
|
// fgRecognizeAndMorphBitwiseRotation : Check if the tree represents a left or right rotation. If so, return
|
|
// an equivalent GT_ROL or GT_ROR tree; otherwise, return the original tree.
|
|
//
|
|
// Arguments:
|
|
// tree - tree to check for a rotation pattern
|
|
//
|
|
// Return Value:
|
|
// An equivalent GT_ROL or GT_ROR tree if a pattern is found; "nullptr" otherwise.
|
|
//
|
|
// Assumption:
|
|
// The input is a GT_OR or a GT_XOR tree.
|
|
|
|
GenTree* Compiler::fgRecognizeAndMorphBitwiseRotation(GenTree* tree)
|
|
{
|
|
//
|
|
// Check for a rotation pattern, e.g.,
|
|
//
|
|
// OR ROL
|
|
// / \ / \.
|
|
// LSH RSZ -> x y
|
|
// / \ / \.
|
|
// x AND x AND
|
|
// / \ / \.
|
|
// y 31 ADD 31
|
|
// / \.
|
|
// NEG 32
|
|
// |
|
|
// y
|
|
// The patterns recognized:
|
|
// (x << (y & M)) op (x >>> ((-y + N) & M))
|
|
// (x >>> ((-y + N) & M)) op (x << (y & M))
|
|
//
|
|
// (x << y) op (x >>> (-y + N))
|
|
// (x >> > (-y + N)) op (x << y)
|
|
//
|
|
// (x >>> (y & M)) op (x << ((-y + N) & M))
|
|
// (x << ((-y + N) & M)) op (x >>> (y & M))
|
|
//
|
|
// (x >>> y) op (x << (-y + N))
|
|
// (x << (-y + N)) op (x >>> y)
|
|
//
|
|
// (x << c1) op (x >>> c2)
|
|
// (x >>> c1) op (x << c2)
|
|
//
|
|
// where
|
|
// c1 and c2 are const
|
|
// c1 + c2 == bitsize(x)
|
|
// N == bitsize(x)
|
|
// M is const
|
|
// M & (N - 1) == N - 1
|
|
// op is either | or ^
|
|
|
|
if (((tree->gtFlags & GTF_PERSISTENT_SIDE_EFFECTS) != 0) || ((tree->gtFlags & GTF_ORDER_SIDEEFF) != 0))
|
|
{
|
|
// We can't do anything if the tree has stores, calls, or volatile reads. Note that we allow
|
|
// GTF_EXCEPT side effect since any exceptions thrown by the original tree will be thrown by
|
|
// the transformed tree as well.
|
|
return nullptr;
|
|
}
|
|
|
|
genTreeOps oper = tree->OperGet();
|
|
assert(fgOperIsBitwiseRotationRoot(oper));
|
|
|
|
// Check if we have an LSH on one side of the OR and an RSZ on the other side.
|
|
GenTree* op1 = tree->gtGetOp1();
|
|
GenTree* op2 = tree->gtGetOp2();
|
|
GenTree* leftShiftTree = nullptr;
|
|
GenTree* rightShiftTree = nullptr;
|
|
if ((op1->OperGet() == GT_LSH) && (op2->OperGet() == GT_RSZ))
|
|
{
|
|
leftShiftTree = op1;
|
|
rightShiftTree = op2;
|
|
}
|
|
else if ((op1->OperGet() == GT_RSZ) && (op2->OperGet() == GT_LSH))
|
|
{
|
|
leftShiftTree = op2;
|
|
rightShiftTree = op1;
|
|
}
|
|
else
|
|
{
|
|
return nullptr;
|
|
}
|
|
|
|
// Check if the trees representing the value to shift are identical.
|
|
// We already checked that there are no side effects above.
|
|
if (GenTree::Compare(leftShiftTree->gtGetOp1(), rightShiftTree->gtGetOp1()))
|
|
{
|
|
GenTree* rotatedValue = leftShiftTree->gtGetOp1();
|
|
var_types rotatedValueActualType = genActualType(rotatedValue->gtType);
|
|
ssize_t rotatedValueBitSize = genTypeSize(rotatedValueActualType) * 8;
|
|
noway_assert((rotatedValueBitSize == 32) || (rotatedValueBitSize == 64));
|
|
GenTree* leftShiftIndex = leftShiftTree->gtGetOp2();
|
|
GenTree* rightShiftIndex = rightShiftTree->gtGetOp2();
|
|
|
|
// The shift index may be masked. At least (rotatedValueBitSize - 1) lower bits
|
|
// shouldn't be masked for the transformation to be valid. If additional
|
|
// higher bits are not masked, the transformation is still valid since the result
|
|
// of MSIL shift instructions is unspecified if the shift amount is greater or equal
|
|
// than the width of the value being shifted.
|
|
ssize_t minimalMask = rotatedValueBitSize - 1;
|
|
ssize_t leftShiftMask = -1;
|
|
ssize_t rightShiftMask = -1;
|
|
|
|
if ((leftShiftIndex->OperGet() == GT_AND))
|
|
{
|
|
if (leftShiftIndex->gtGetOp2()->IsCnsIntOrI())
|
|
{
|
|
leftShiftMask = leftShiftIndex->gtGetOp2()->AsIntCon()->gtIconVal;
|
|
leftShiftIndex = leftShiftIndex->gtGetOp1();
|
|
}
|
|
else
|
|
{
|
|
return nullptr;
|
|
}
|
|
}
|
|
|
|
if ((rightShiftIndex->OperGet() == GT_AND))
|
|
{
|
|
if (rightShiftIndex->gtGetOp2()->IsCnsIntOrI())
|
|
{
|
|
rightShiftMask = rightShiftIndex->gtGetOp2()->AsIntCon()->gtIconVal;
|
|
rightShiftIndex = rightShiftIndex->gtGetOp1();
|
|
}
|
|
else
|
|
{
|
|
return nullptr;
|
|
}
|
|
}
|
|
|
|
if (((minimalMask & leftShiftMask) != minimalMask) || ((minimalMask & rightShiftMask) != minimalMask))
|
|
{
|
|
// The shift index is overmasked, e.g., we have
|
|
// something like (x << y & 15) or
|
|
// (x >> (32 - y) & 15 with 32 bit x.
|
|
// The transformation is not valid.
|
|
return nullptr;
|
|
}
|
|
|
|
GenTree* shiftIndexWithAdd = nullptr;
|
|
GenTree* shiftIndexWithoutAdd = nullptr;
|
|
genTreeOps rotateOp = GT_NONE;
|
|
GenTree* rotateIndex = nullptr;
|
|
|
|
if (leftShiftIndex->OperGet() == GT_ADD)
|
|
{
|
|
shiftIndexWithAdd = leftShiftIndex;
|
|
shiftIndexWithoutAdd = rightShiftIndex;
|
|
rotateOp = GT_ROR;
|
|
}
|
|
else if (rightShiftIndex->OperGet() == GT_ADD)
|
|
{
|
|
shiftIndexWithAdd = rightShiftIndex;
|
|
shiftIndexWithoutAdd = leftShiftIndex;
|
|
rotateOp = GT_ROL;
|
|
}
|
|
|
|
if (shiftIndexWithAdd != nullptr)
|
|
{
|
|
if (shiftIndexWithAdd->gtGetOp2()->IsCnsIntOrI())
|
|
{
|
|
if (shiftIndexWithAdd->gtGetOp2()->AsIntCon()->gtIconVal == rotatedValueBitSize)
|
|
{
|
|
if (shiftIndexWithAdd->gtGetOp1()->OperGet() == GT_NEG)
|
|
{
|
|
if (GenTree::Compare(shiftIndexWithAdd->gtGetOp1()->gtGetOp1(), shiftIndexWithoutAdd))
|
|
{
|
|
// We found one of these patterns:
|
|
// (x << (y & M)) | (x >>> ((-y + N) & M))
|
|
// (x << y) | (x >>> (-y + N))
|
|
// (x >>> (y & M)) | (x << ((-y + N) & M))
|
|
// (x >>> y) | (x << (-y + N))
|
|
// where N == bitsize(x), M is const, and
|
|
// M & (N - 1) == N - 1
|
|
|
|
#ifndef TARGET_64BIT
|
|
if (!shiftIndexWithoutAdd->IsCnsIntOrI() && (rotatedValueBitSize == 64))
|
|
{
|
|
// TODO-X86-CQ: we need to handle variable-sized long shifts specially on x86.
|
|
// GT_LSH, GT_RSH, and GT_RSZ have helpers for this case. We may need
|
|
// to add helpers for GT_ROL and GT_ROR.
|
|
return nullptr;
|
|
}
|
|
#endif
|
|
|
|
rotateIndex = shiftIndexWithoutAdd;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
else if ((leftShiftIndex->IsCnsIntOrI() && rightShiftIndex->IsCnsIntOrI()))
|
|
{
|
|
if (leftShiftIndex->AsIntCon()->gtIconVal + rightShiftIndex->AsIntCon()->gtIconVal == rotatedValueBitSize)
|
|
{
|
|
// We found this pattern:
|
|
// (x << c1) | (x >>> c2)
|
|
// where c1 and c2 are const and c1 + c2 == bitsize(x)
|
|
rotateOp = GT_ROL;
|
|
rotateIndex = leftShiftIndex;
|
|
}
|
|
}
|
|
|
|
if (rotateIndex != nullptr)
|
|
{
|
|
noway_assert(GenTree::OperIsRotate(rotateOp));
|
|
|
|
GenTreeFlags inputTreeEffects = tree->gtFlags & GTF_ALL_EFFECT;
|
|
|
|
// We can use the same tree only during global morph; reusing the tree in a later morph
|
|
// may invalidate value numbers.
|
|
if (fgGlobalMorph)
|
|
{
|
|
tree->AsOp()->gtOp1 = rotatedValue;
|
|
tree->AsOp()->gtOp2 = rotateIndex;
|
|
tree->ChangeOper(rotateOp);
|
|
|
|
unsigned childFlags = 0;
|
|
for (GenTree* op : tree->Operands())
|
|
{
|
|
childFlags |= (op->gtFlags & GTF_ALL_EFFECT);
|
|
}
|
|
|
|
// The parent's flags should be a superset of its operands' flags
|
|
noway_assert((inputTreeEffects & childFlags) == childFlags);
|
|
}
|
|
else
|
|
{
|
|
tree = gtNewOperNode(rotateOp, rotatedValueActualType, rotatedValue, rotateIndex);
|
|
noway_assert(inputTreeEffects == (tree->gtFlags & GTF_ALL_EFFECT));
|
|
}
|
|
|
|
return tree;
|
|
}
|
|
}
|
|
|
|
return nullptr;
|
|
}
|
|
|
|
#if !defined(TARGET_64BIT)
|
|
//------------------------------------------------------------------------------
|
|
// fgRecognizeAndMorphLongMul : Check for and morph long multiplication with 32 bit operands.
|
|
//
|
|
// Uses "GenTree::IsValidLongMul" to check for the long multiplication pattern. Will swap
|
|
// operands if the first one is a constant and the second one is not, even for trees which
|
|
// end up not being eligibile for long multiplication.
|
|
//
|
|
// Arguments:
|
|
// mul - GT_MUL tree to check for a long multiplication opportunity
|
|
//
|
|
// Return Value:
|
|
// The original tree, with operands possibly swapped, if it is not eligible for long multiplication.
|
|
// Tree with GTF_MUL_64RSLT set, side effect flags propagated, and children morphed if it is.
|
|
//
|
|
GenTreeOp* Compiler::fgRecognizeAndMorphLongMul(GenTreeOp* mul)
|
|
{
|
|
assert(mul->OperIs(GT_MUL));
|
|
assert(mul->TypeIs(TYP_LONG));
|
|
|
|
GenTree* op1 = mul->gtGetOp1();
|
|
GenTree* op2 = mul->gtGetOp2();
|
|
|
|
// "IsValidLongMul" and decomposition do not handle constant op1.
|
|
if (op1->IsIntegralConst())
|
|
{
|
|
std::swap(op1, op2);
|
|
mul->gtOp1 = op1;
|
|
mul->gtOp2 = op2;
|
|
}
|
|
|
|
if (!mul->IsValidLongMul())
|
|
{
|
|
return mul;
|
|
}
|
|
|
|
// MUL_LONG needs to do the work the casts would have done.
|
|
mul->ClearUnsigned();
|
|
if (op1->IsUnsigned())
|
|
{
|
|
mul->SetUnsigned();
|
|
}
|
|
|
|
// "IsValidLongMul" returned "true", so this GT_MUL cannot overflow.
|
|
mul->ClearOverflow();
|
|
mul->Set64RsltMul();
|
|
|
|
return fgMorphLongMul(mul);
|
|
}
|
|
|
|
//------------------------------------------------------------------------------
|
|
// fgMorphLongMul : Morphs GT_MUL nodes marked with GTF_MUL_64RSLT.
|
|
//
|
|
// Morphs *only* the operands of casts that compose the long mul to
|
|
// avoid them being folded always.
|
|
//
|
|
// Arguments:
|
|
// mul - GT_MUL tree to morph operands of
|
|
//
|
|
// Return Value:
|
|
// The original tree, with operands morphed and flags propagated.
|
|
//
|
|
GenTreeOp* Compiler::fgMorphLongMul(GenTreeOp* mul)
|
|
{
|
|
INDEBUG(mul->DebugCheckLongMul());
|
|
|
|
GenTree* op1 = mul->gtGetOp1();
|
|
GenTree* op2 = mul->gtGetOp2();
|
|
|
|
// Morph the operands. We cannot allow the casts to go away, so we morph their operands directly.
|
|
op1->AsCast()->CastOp() = fgMorphTree(op1->AsCast()->CastOp());
|
|
op1->SetAllEffectsFlags(op1->AsCast()->CastOp());
|
|
|
|
if (op2->OperIs(GT_CAST))
|
|
{
|
|
op2->AsCast()->CastOp() = fgMorphTree(op2->AsCast()->CastOp());
|
|
op2->SetAllEffectsFlags(op2->AsCast()->CastOp());
|
|
}
|
|
|
|
mul->SetAllEffectsFlags(op1, op2);
|
|
op1->SetDoNotCSE();
|
|
op2->SetDoNotCSE();
|
|
|
|
return mul;
|
|
}
|
|
#endif // !defined(TARGET_64BIT)
|
|
|
|
/*****************************************************************************
|
|
*
|
|
* Transform the given tree for code generation and return an equivalent tree.
|
|
*/
|
|
|
|
GenTree* Compiler::fgMorphTree(GenTree* tree, MorphAddrContext* mac)
|
|
{
|
|
assert(tree);
|
|
|
|
#ifdef DEBUG
|
|
if (verbose)
|
|
{
|
|
if ((unsigned)JitConfig.JitBreakMorphTree() == tree->gtTreeID)
|
|
{
|
|
noway_assert(!"JitBreakMorphTree hit");
|
|
}
|
|
}
|
|
#endif
|
|
|
|
#ifdef DEBUG
|
|
int thisMorphNum = 0;
|
|
if (verbose && treesBeforeAfterMorph)
|
|
{
|
|
thisMorphNum = morphNum++;
|
|
printf("\nfgMorphTree (before %d):\n", thisMorphNum);
|
|
gtDispTree(tree);
|
|
}
|
|
#endif
|
|
|
|
bool optAssertionPropDone = false;
|
|
|
|
/*-------------------------------------------------------------------------
|
|
* fgMorphTree() can potentially replace a tree with another, and the
|
|
* caller has to store the return value correctly.
|
|
* Turn this on to always make copy of "tree" here to shake out
|
|
* hidden/unupdated references.
|
|
*/
|
|
|
|
#ifdef DEBUG
|
|
|
|
if (compStressCompile(STRESS_GENERIC_CHECK, 0))
|
|
{
|
|
GenTree* copy;
|
|
|
|
if (GenTree::s_gtNodeSizes[tree->gtOper] == TREE_NODE_SZ_SMALL)
|
|
{
|
|
copy = gtNewLargeOperNode(GT_ADD, TYP_INT);
|
|
}
|
|
else
|
|
{
|
|
copy = new (this, GT_CALL) GenTreeCall(TYP_INT);
|
|
}
|
|
|
|
copy->ReplaceWith(tree, this);
|
|
|
|
#if defined(LATE_DISASM)
|
|
// GT_CNS_INT is considered small, so ReplaceWith() won't copy all fields
|
|
if (tree->IsIconHandle())
|
|
{
|
|
copy->AsIntCon()->gtCompileTimeHandle = tree->AsIntCon()->gtCompileTimeHandle;
|
|
}
|
|
#endif
|
|
|
|
DEBUG_DESTROY_NODE(tree);
|
|
tree = copy;
|
|
}
|
|
#endif // DEBUG
|
|
|
|
if (fgGlobalMorph)
|
|
{
|
|
/* Ensure that we haven't morphed this node already */
|
|
assert(((tree->gtDebugFlags & GTF_DEBUG_NODE_MORPHED) == 0) && "ERROR: Already morphed this node!");
|
|
|
|
/* Before morphing the tree, we try to propagate any active assertions */
|
|
if (optLocalAssertionProp)
|
|
{
|
|
/* Do we have any active assertions? */
|
|
|
|
if (optAssertionCount > 0)
|
|
{
|
|
GenTree* newTree = tree;
|
|
while (newTree != nullptr)
|
|
{
|
|
tree = newTree;
|
|
/* newTree is non-Null if we propagated an assertion */
|
|
newTree = optAssertionProp(apLocal, tree, nullptr, nullptr);
|
|
}
|
|
assert(tree != nullptr);
|
|
}
|
|
}
|
|
PREFAST_ASSUME(tree != nullptr);
|
|
}
|
|
|
|
/* Save the original un-morphed tree for fgMorphTreeDone */
|
|
|
|
GenTree* const oldTree = tree;
|
|
|
|
/* Figure out what kind of a node we have */
|
|
|
|
unsigned const kind = tree->OperKind();
|
|
|
|
/* Is this a constant node? */
|
|
|
|
if (tree->OperIsConst())
|
|
{
|
|
tree = fgMorphConst(tree);
|
|
goto DONE;
|
|
}
|
|
|
|
/* Is this a leaf node? */
|
|
|
|
if (kind & GTK_LEAF)
|
|
{
|
|
tree = fgMorphLeaf(tree);
|
|
goto DONE;
|
|
}
|
|
|
|
/* Is it a 'simple' unary/binary operator? */
|
|
|
|
if (kind & GTK_SMPOP)
|
|
{
|
|
tree = fgMorphSmpOp(tree, mac, &optAssertionPropDone);
|
|
goto DONE;
|
|
}
|
|
|
|
/* See what kind of a special operator we have here */
|
|
|
|
switch (tree->OperGet())
|
|
{
|
|
case GT_CALL:
|
|
if (tree->OperMayThrow(this))
|
|
{
|
|
tree->gtFlags |= GTF_EXCEPT;
|
|
}
|
|
else
|
|
{
|
|
tree->gtFlags &= ~GTF_EXCEPT;
|
|
}
|
|
tree = fgMorphCall(tree->AsCall());
|
|
break;
|
|
|
|
#if defined(FEATURE_HW_INTRINSICS)
|
|
case GT_HWINTRINSIC:
|
|
tree = fgMorphHWIntrinsic(tree->AsHWIntrinsic());
|
|
break;
|
|
#endif // FEATURE_HW_INTRINSICS
|
|
|
|
case GT_ARR_ELEM:
|
|
tree->AsArrElem()->gtArrObj = fgMorphTree(tree->AsArrElem()->gtArrObj);
|
|
|
|
unsigned dim;
|
|
for (dim = 0; dim < tree->AsArrElem()->gtArrRank; dim++)
|
|
{
|
|
tree->AsArrElem()->gtArrInds[dim] = fgMorphTree(tree->AsArrElem()->gtArrInds[dim]);
|
|
}
|
|
|
|
tree->gtFlags &= ~GTF_CALL;
|
|
|
|
tree->gtFlags |= tree->AsArrElem()->gtArrObj->gtFlags & GTF_ALL_EFFECT;
|
|
|
|
for (dim = 0; dim < tree->AsArrElem()->gtArrRank; dim++)
|
|
{
|
|
tree->gtFlags |= tree->AsArrElem()->gtArrInds[dim]->gtFlags & GTF_ALL_EFFECT;
|
|
}
|
|
|
|
if (fgGlobalMorph)
|
|
{
|
|
fgAddCodeRef(compCurBB, SCK_RNGCHK_FAIL);
|
|
}
|
|
break;
|
|
|
|
case GT_PHI:
|
|
tree->gtFlags &= ~GTF_ALL_EFFECT;
|
|
for (GenTreePhi::Use& use : tree->AsPhi()->Uses())
|
|
{
|
|
use.SetNode(fgMorphTree(use.GetNode()));
|
|
tree->gtFlags |= use.GetNode()->gtFlags & GTF_ALL_EFFECT;
|
|
}
|
|
break;
|
|
|
|
case GT_FIELD_LIST:
|
|
tree->gtFlags &= ~GTF_ALL_EFFECT;
|
|
for (GenTreeFieldList::Use& use : tree->AsFieldList()->Uses())
|
|
{
|
|
use.SetNode(fgMorphTree(use.GetNode()));
|
|
tree->gtFlags |= (use.GetNode()->gtFlags & GTF_ALL_EFFECT);
|
|
}
|
|
break;
|
|
|
|
case GT_CMPXCHG:
|
|
tree->AsCmpXchg()->Addr() = fgMorphTree(tree->AsCmpXchg()->Addr());
|
|
tree->AsCmpXchg()->Data() = fgMorphTree(tree->AsCmpXchg()->Data());
|
|
tree->AsCmpXchg()->Comparand() = fgMorphTree(tree->AsCmpXchg()->Comparand());
|
|
gtUpdateNodeSideEffects(tree);
|
|
break;
|
|
|
|
case GT_SELECT:
|
|
tree->AsConditional()->gtCond = fgMorphTree(tree->AsConditional()->gtCond);
|
|
tree->AsConditional()->gtOp1 = fgMorphTree(tree->AsConditional()->gtOp1);
|
|
tree->AsConditional()->gtOp2 = fgMorphTree(tree->AsConditional()->gtOp2);
|
|
|
|
tree->gtFlags &= (~GTF_EXCEPT & ~GTF_CALL);
|
|
|
|
tree->gtFlags |= tree->AsConditional()->gtCond->gtFlags & GTF_ALL_EFFECT;
|
|
tree->gtFlags |= tree->AsConditional()->gtOp1->gtFlags & GTF_ALL_EFFECT;
|
|
tree->gtFlags |= tree->AsConditional()->gtOp2->gtFlags & GTF_ALL_EFFECT;
|
|
|
|
if (opts.OptimizationEnabled())
|
|
{
|
|
// Try to fold away any constants etc.
|
|
tree = gtFoldExpr(tree);
|
|
}
|
|
|
|
break;
|
|
|
|
default:
|
|
#ifdef DEBUG
|
|
gtDispTree(tree);
|
|
#endif
|
|
noway_assert(!"unexpected operator");
|
|
}
|
|
DONE:
|
|
|
|
const bool isNewTree = (oldTree != tree);
|
|
fgMorphTreeDone(tree, optAssertionPropDone, isNewTree DEBUGARG(thisMorphNum));
|
|
|
|
return tree;
|
|
}
|
|
|
|
//------------------------------------------------------------------------
|
|
// fgKillDependentAssertionsSingle: Kill all assertions specific to lclNum
|
|
//
|
|
// Arguments:
|
|
// lclNum - The varNum of the lclVar for which we're killing assertions.
|
|
// tree - (DEBUG only) the tree responsible for killing its assertions.
|
|
//
|
|
void Compiler::fgKillDependentAssertionsSingle(unsigned lclNum DEBUGARG(GenTree* tree))
|
|
{
|
|
// Active dependent assertions are killed here
|
|
//
|
|
ASSERT_TP killed = BitVecOps::MakeCopy(apTraits, GetAssertionDep(lclNum));
|
|
BitVecOps::IntersectionD(apTraits, killed, apLocal);
|
|
|
|
if (killed)
|
|
{
|
|
|
|
#ifdef DEBUG
|
|
AssertionIndex index = optAssertionCount;
|
|
while (killed && (index > 0))
|
|
{
|
|
if (BitVecOps::IsMember(apTraits, killed, index - 1))
|
|
{
|
|
AssertionDsc* curAssertion = optGetAssertion(index);
|
|
noway_assert((curAssertion->op1.lcl.lclNum == lclNum) ||
|
|
((curAssertion->op2.kind == O2K_LCLVAR_COPY) && (curAssertion->op2.lcl.lclNum == lclNum)));
|
|
if (verbose)
|
|
{
|
|
printf("\nThe store ");
|
|
printTreeID(tree);
|
|
printf(" using V%02u removes: ", curAssertion->op1.lcl.lclNum);
|
|
optPrintAssertion(curAssertion, index);
|
|
}
|
|
}
|
|
|
|
index--;
|
|
}
|
|
#endif
|
|
|
|
BitVecOps::DiffD(apTraits, apLocal, killed);
|
|
}
|
|
}
|
|
|
|
//------------------------------------------------------------------------
|
|
// fgKillDependentAssertions: Kill all dependent assertions with regard to lclNum.
|
|
//
|
|
// Arguments:
|
|
// lclNum - The varNum of the lclVar for which we're killing assertions.
|
|
// tree - (DEBUG only) the tree responsible for killing its assertions.
|
|
//
|
|
// Notes:
|
|
// For structs and struct fields, it will invalidate the children and parent
|
|
// respectively.
|
|
// Calls fgKillDependentAssertionsSingle to kill the assertions for a single lclVar.
|
|
//
|
|
void Compiler::fgKillDependentAssertions(unsigned lclNum DEBUGARG(GenTree* tree))
|
|
{
|
|
if (BitVecOps::IsEmpty(apTraits, apLocal))
|
|
{
|
|
return;
|
|
}
|
|
|
|
LclVarDsc* const varDsc = lvaGetDesc(lclNum);
|
|
|
|
if (varDsc->lvPromoted)
|
|
{
|
|
noway_assert(varTypeIsStruct(varDsc));
|
|
|
|
// Kill the field locals.
|
|
for (unsigned i = varDsc->lvFieldLclStart; i < varDsc->lvFieldLclStart + varDsc->lvFieldCnt; ++i)
|
|
{
|
|
fgKillDependentAssertionsSingle(i DEBUGARG(tree));
|
|
}
|
|
|
|
// Kill the struct local itself.
|
|
fgKillDependentAssertionsSingle(lclNum DEBUGARG(tree));
|
|
}
|
|
else if (varDsc->lvIsStructField)
|
|
{
|
|
// Kill the field local.
|
|
fgKillDependentAssertionsSingle(lclNum DEBUGARG(tree));
|
|
|
|
// Kill the parent struct.
|
|
fgKillDependentAssertionsSingle(varDsc->lvParentLcl DEBUGARG(tree));
|
|
}
|
|
else
|
|
{
|
|
fgKillDependentAssertionsSingle(lclNum DEBUGARG(tree));
|
|
}
|
|
}
|
|
|
|
//------------------------------------------------------------------------
|
|
// fgAssertionGen: generate local assertions for morphed tree
|
|
//
|
|
// Arguments:
|
|
// tree - tree to examine for local assertions
|
|
//
|
|
// Notes:
|
|
// wraps optAssertionGen to work with local assertion prop
|
|
//
|
|
void Compiler::fgAssertionGen(GenTree* tree)
|
|
{
|
|
INDEBUG(unsigned oldAssertionCount = optAssertionCount;);
|
|
optAssertionGen(tree);
|
|
|
|
// Helper to note when an existing assertion has been
|
|
// brought back to life.
|
|
//
|
|
auto announce = [&](AssertionIndex apIndex, const char* condition) {
|
|
#ifdef DEBUG
|
|
if (verbose)
|
|
{
|
|
if (oldAssertionCount == optAssertionCount)
|
|
{
|
|
if (!BitVecOps::IsMember(apTraits, apLocal, apIndex - 1))
|
|
{
|
|
// This tree resurrected an existing assertion.
|
|
// We call that out here since assertion prop won't.
|
|
//
|
|
printf("GenTreeNode creates %sassertion:\n", condition);
|
|
gtDispTree(tree, nullptr, nullptr, true);
|
|
printf("In " FMT_BB " New Local ", compCurBB->bbNum);
|
|
optPrintAssertion(optGetAssertion(apIndex), apIndex);
|
|
}
|
|
else
|
|
{
|
|
// This tree re-asserted an already live assertion
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// This tree has created a new assertion.
|
|
// Assertion prop will have already described it.
|
|
}
|
|
}
|
|
#endif
|
|
};
|
|
|
|
// For BBJ_COND nodes, we have two assertion out BVs.
|
|
// apLocal will be stored on bbAssertionOutIfFalse and be used for false successors.
|
|
// apLocalIfTrue will be stored on bbAssertionOutIfTrue and be used for true successors.
|
|
//
|
|
const bool makeCondAssertions =
|
|
tree->OperIs(GT_JTRUE) && compCurBB->KindIs(BBJ_COND) && (compCurBB->NumSucc() == 2);
|
|
|
|
// Initialize apLocalIfTrue if we might look for it later,
|
|
// even if it ends up identical to apLocal.
|
|
//
|
|
if (makeCondAssertions)
|
|
{
|
|
apLocalIfTrue = BitVecOps::MakeCopy(apTraits, apLocal);
|
|
}
|
|
|
|
if (!tree->GeneratesAssertion())
|
|
{
|
|
return;
|
|
}
|
|
|
|
AssertionInfo info = tree->GetAssertionInfo();
|
|
|
|
if (makeCondAssertions)
|
|
{
|
|
// Update apLocal and apIfTrue with suitable assertions
|
|
// from the JTRUE
|
|
//
|
|
assert(optCrossBlockLocalAssertionProp);
|
|
|
|
AssertionIndex ifFalseAssertionIndex;
|
|
AssertionIndex ifTrueAssertionIndex;
|
|
|
|
if (info.AssertionHoldsOnFalseEdge())
|
|
{
|
|
ifFalseAssertionIndex = info.GetAssertionIndex();
|
|
ifTrueAssertionIndex = optFindComplementary(ifFalseAssertionIndex);
|
|
}
|
|
else
|
|
{
|
|
ifTrueAssertionIndex = info.GetAssertionIndex();
|
|
ifFalseAssertionIndex = optFindComplementary(ifTrueAssertionIndex);
|
|
}
|
|
|
|
if (ifTrueAssertionIndex != NO_ASSERTION_INDEX)
|
|
{
|
|
announce(ifTrueAssertionIndex, "[if true] ");
|
|
unsigned const bvIndex = ifTrueAssertionIndex - 1;
|
|
BitVecOps::AddElemD(apTraits, apLocalIfTrue, bvIndex);
|
|
}
|
|
|
|
if (ifFalseAssertionIndex != NO_ASSERTION_INDEX)
|
|
{
|
|
announce(ifFalseAssertionIndex, "[if false] ");
|
|
unsigned const bvIndex = ifFalseAssertionIndex - 1;
|
|
BitVecOps::AddElemD(apTraits, apLocal, ifFalseAssertionIndex - 1);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
AssertionIndex const apIndex = tree->GetAssertionInfo().GetAssertionIndex();
|
|
announce(apIndex, "");
|
|
unsigned const bvIndex = apIndex - 1;
|
|
BitVecOps::AddElemD(apTraits, apLocal, bvIndex);
|
|
}
|
|
}
|
|
|
|
//------------------------------------------------------------------------
|
|
// fgMorphTreeDone: complete the morphing of a tree node
|
|
//
|
|
// Arguments:
|
|
// tree - the tree after morphing
|
|
//
|
|
// Notes:
|
|
// Simple version where the tree has not been marked
|
|
// as morphed, and where assertion kill/gen has not yet been done.
|
|
//
|
|
void Compiler::fgMorphTreeDone(GenTree* tree)
|
|
{
|
|
fgMorphTreeDone(tree, false, false);
|
|
}
|
|
|
|
//------------------------------------------------------------------------
|
|
// fgMorphTreeDone: complete the morphing of a tree node
|
|
//
|
|
// Arguments:
|
|
// tree - the tree after morphing
|
|
// optAssertionPropDone - true if local assertion prop was done already
|
|
// isMorphedTree - true if caller should have marked tree as morphed
|
|
// morphNum - counts invocations of fgMorphTree
|
|
//
|
|
// Notes:
|
|
// This function is called to complete the morphing of a tree node
|
|
// It should only be called once for each node.
|
|
// If DEBUG is defined the flag GTF_DEBUG_NODE_MORPHED is checked and updated,
|
|
// to enforce the invariant that each node is only morphed once.
|
|
//
|
|
// When local assertion prop is active assertions are killed and generated
|
|
// based on tree (unless optAssertionPropDone is true).
|
|
//
|
|
void Compiler::fgMorphTreeDone(GenTree* tree, bool optAssertionPropDone, bool isMorphedTree DEBUGARG(int morphNum))
|
|
{
|
|
#ifdef DEBUG
|
|
if (verbose && treesBeforeAfterMorph)
|
|
{
|
|
printf("\nfgMorphTree (after %d):\n", morphNum);
|
|
gtDispTree(tree);
|
|
printf(""); // in our logic this causes a flush
|
|
}
|
|
#endif
|
|
|
|
if (!fgGlobalMorph)
|
|
{
|
|
return;
|
|
}
|
|
|
|
if (isMorphedTree)
|
|
{
|
|
// caller should have set the morphed flag
|
|
//
|
|
assert((tree->gtDebugFlags & GTF_DEBUG_NODE_MORPHED) && "ERROR: Did not morph this node!");
|
|
}
|
|
else
|
|
{
|
|
// caller should not have set the morphed flag
|
|
//
|
|
assert(((tree->gtDebugFlags & GTF_DEBUG_NODE_MORPHED) == 0) && "ERROR: Already morphed this node!");
|
|
INDEBUG(tree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED);
|
|
}
|
|
|
|
// Note "tree" may generate new assertions that we
|
|
// miss if we did them early... perhaps we should skip
|
|
// kills but rerun gens.
|
|
//
|
|
if (tree->OperIsConst() || !optLocalAssertionProp || optAssertionPropDone)
|
|
{
|
|
return;
|
|
}
|
|
|
|
// Kill active assertions
|
|
//
|
|
GenTreeLclVarCommon* lclVarTree = nullptr;
|
|
if ((optAssertionCount > 0) && tree->DefinesLocal(this, &lclVarTree))
|
|
{
|
|
fgKillDependentAssertions(lclVarTree->GetLclNum() DEBUGARG(tree));
|
|
}
|
|
|
|
// Generate assertions
|
|
//
|
|
fgAssertionGen(tree);
|
|
}
|
|
|
|
//------------------------------------------------------------------------
|
|
// fgFoldConditional: try and fold conditionals and optimize BBJ_COND or
|
|
// BBJ_SWITCH blocks.
|
|
//
|
|
// Arguments:
|
|
// block - block to examine
|
|
//
|
|
// Returns:
|
|
// FoldResult indicating what changes were made, if any
|
|
//
|
|
Compiler::FoldResult Compiler::fgFoldConditional(BasicBlock* block)
|
|
{
|
|
FoldResult result = FoldResult::FOLD_DID_NOTHING;
|
|
|
|
// We don't want to make any code unreachable
|
|
//
|
|
if (opts.OptimizationDisabled())
|
|
{
|
|
return result;
|
|
}
|
|
|
|
if (block->KindIs(BBJ_COND))
|
|
{
|
|
noway_assert(block->bbStmtList != nullptr && block->bbStmtList->GetPrevStmt() != nullptr);
|
|
|
|
Statement* lastStmt = block->lastStmt();
|
|
|
|
noway_assert(lastStmt->GetNextStmt() == nullptr);
|
|
|
|
if (lastStmt->GetRootNode()->gtOper == GT_CALL)
|
|
{
|
|
noway_assert(fgRemoveRestOfBlock);
|
|
|
|
// Unconditional throw - transform the basic block into a BBJ_THROW
|
|
//
|
|
fgConvertBBToThrowBB(block);
|
|
result = FoldResult::FOLD_CHANGED_CONTROL_FLOW;
|
|
JITDUMP("\nConditional folded at " FMT_BB "\n", block->bbNum);
|
|
JITDUMP(FMT_BB " becomes a BBJ_THROW\n", block->bbNum);
|
|
|
|
return result;
|
|
}
|
|
|
|
noway_assert(lastStmt->GetRootNode()->gtOper == GT_JTRUE);
|
|
|
|
/* Did we fold the conditional */
|
|
|
|
noway_assert(lastStmt->GetRootNode()->AsOp()->gtOp1);
|
|
GenTree* condTree;
|
|
condTree = lastStmt->GetRootNode()->AsOp()->gtOp1;
|
|
GenTree* cond;
|
|
cond = condTree->gtEffectiveVal();
|
|
|
|
if (cond->OperIsConst())
|
|
{
|
|
/* Yupee - we folded the conditional!
|
|
* Remove the conditional statement */
|
|
|
|
noway_assert(cond->gtOper == GT_CNS_INT);
|
|
noway_assert((block->GetFalseTarget()->countOfInEdges() > 0) &&
|
|
(block->GetTrueTarget()->countOfInEdges() > 0));
|
|
|
|
if (condTree != cond)
|
|
{
|
|
// Preserve any side effects
|
|
assert(condTree->OperIs(GT_COMMA));
|
|
lastStmt->SetRootNode(condTree);
|
|
result = FoldResult::FOLD_ALTERED_LAST_STMT;
|
|
}
|
|
else
|
|
{
|
|
// no side effects, remove the jump entirely
|
|
fgRemoveStmt(block, lastStmt);
|
|
result = FoldResult::FOLD_REMOVED_LAST_STMT;
|
|
}
|
|
// block is a BBJ_COND that we are folding the conditional for.
|
|
// bTaken is the path that will always be taken from block.
|
|
// bNotTaken is the path that will never be taken from block.
|
|
//
|
|
BasicBlock* bTaken;
|
|
BasicBlock* bNotTaken;
|
|
FlowEdge* edgeTaken;
|
|
|
|
if (cond->AsIntCon()->gtIconVal != 0)
|
|
{
|
|
// JTRUE 1 - transform the basic block into a BBJ_ALWAYS
|
|
bTaken = block->GetTrueTarget();
|
|
bNotTaken = block->GetFalseTarget();
|
|
|
|
// Remove 'block' from the predecessor list of 'bNotTaken' */
|
|
fgRemoveRefPred(block->GetFalseEdge());
|
|
|
|
edgeTaken = block->GetTrueEdge();
|
|
block->SetKindAndTargetEdge(BBJ_ALWAYS, edgeTaken);
|
|
}
|
|
else
|
|
{
|
|
// JTRUE 0 - transform the basic block into a BBJ_ALWAYS
|
|
bTaken = block->GetFalseTarget();
|
|
bNotTaken = block->GetTrueTarget();
|
|
|
|
// Remove 'block' from the predecessor list of 'bNotTaken' */
|
|
fgRemoveRefPred(block->GetTrueEdge());
|
|
|
|
edgeTaken = block->GetFalseEdge();
|
|
block->SetKindAndTargetEdge(BBJ_ALWAYS, block->GetFalseEdge());
|
|
}
|
|
|
|
// We examine the taken edge (block -> bTaken)
|
|
// if block has valid profile weight and bTaken does not we try to adjust bTaken's weight
|
|
// else if bTaken has valid profile weight and block does not we try to adjust block's weight
|
|
// We can only adjust the block weights when (the edge block -> bTaken) is the only edge into bTaken
|
|
//
|
|
if (block->hasProfileWeight())
|
|
{
|
|
if (!bTaken->hasProfileWeight())
|
|
{
|
|
if ((bTaken->countOfInEdges() == 1) || (bTaken->bbWeight < block->bbWeight))
|
|
{
|
|
// Update the weight of bTaken
|
|
bTaken->inheritWeight(block);
|
|
}
|
|
}
|
|
}
|
|
else if (bTaken->hasProfileWeight())
|
|
{
|
|
if (bTaken->countOfInEdges() == 1)
|
|
{
|
|
// Update the weight of block
|
|
block->inheritWeight(bTaken);
|
|
}
|
|
}
|
|
|
|
#ifdef DEBUG
|
|
if (verbose)
|
|
{
|
|
printf("\nConditional folded at " FMT_BB "\n", block->bbNum);
|
|
printf(FMT_BB " becomes a %s", block->bbNum, "BBJ_ALWAYS");
|
|
printf(" to " FMT_BB, block->GetTarget()->bbNum);
|
|
printf("\n");
|
|
}
|
|
#endif
|
|
}
|
|
}
|
|
else if (block->KindIs(BBJ_SWITCH))
|
|
{
|
|
noway_assert(block->bbStmtList != nullptr && block->bbStmtList->GetPrevStmt() != nullptr);
|
|
|
|
Statement* lastStmt = block->lastStmt();
|
|
|
|
noway_assert(lastStmt->GetNextStmt() == nullptr);
|
|
|
|
if (lastStmt->GetRootNode()->gtOper == GT_CALL)
|
|
{
|
|
noway_assert(fgRemoveRestOfBlock);
|
|
|
|
// Unconditional throw - transform the basic block into a BBJ_THROW
|
|
//
|
|
fgConvertBBToThrowBB(block);
|
|
result = FoldResult::FOLD_CHANGED_CONTROL_FLOW;
|
|
JITDUMP("\nConditional folded at " FMT_BB "\n", block->bbNum);
|
|
JITDUMP(FMT_BB " becomes a BBJ_THROW\n", block->bbNum);
|
|
|
|
return result;
|
|
}
|
|
|
|
noway_assert(lastStmt->GetRootNode()->gtOper == GT_SWITCH);
|
|
|
|
// Did we fold the conditional
|
|
|
|
noway_assert(lastStmt->GetRootNode()->AsOp()->gtOp1);
|
|
GenTree* condTree = lastStmt->GetRootNode()->AsOp()->gtOp1;
|
|
GenTree* cond = condTree->gtEffectiveVal();
|
|
|
|
if (cond->OperIsConst())
|
|
{
|
|
// Yupee - we folded the conditional!
|
|
// Remove the conditional statement
|
|
|
|
noway_assert(cond->gtOper == GT_CNS_INT);
|
|
|
|
if (condTree != cond)
|
|
{
|
|
// Preserve any side effects
|
|
assert(condTree->OperIs(GT_COMMA));
|
|
lastStmt->SetRootNode(condTree);
|
|
result = FoldResult::FOLD_ALTERED_LAST_STMT;
|
|
}
|
|
else
|
|
{
|
|
// no side effects, remove the switch entirely
|
|
fgRemoveStmt(block, lastStmt);
|
|
result = FoldResult::FOLD_REMOVED_LAST_STMT;
|
|
}
|
|
|
|
// modify the flow graph
|
|
|
|
// Find the actual jump target
|
|
size_t switchVal = (size_t)cond->AsIntCon()->gtIconVal;
|
|
unsigned jumpCnt = block->GetSwitchTargets()->bbsCount;
|
|
FlowEdge** jumpTab = block->GetSwitchTargets()->bbsDstTab;
|
|
bool foundVal = false;
|
|
|
|
for (unsigned val = 0; val < jumpCnt; val++, jumpTab++)
|
|
{
|
|
FlowEdge* curEdge = *jumpTab;
|
|
|
|
assert(curEdge->getDestinationBlock()->countOfInEdges() > 0);
|
|
|
|
// If val matches switchVal or we are at the last entry and
|
|
// we never found the switch value then set the new jump dest
|
|
|
|
if ((val == switchVal) || (!foundVal && (val == jumpCnt - 1)))
|
|
{
|
|
block->SetKindAndTargetEdge(BBJ_ALWAYS, curEdge);
|
|
foundVal = true;
|
|
}
|
|
else
|
|
{
|
|
// Remove 'curEdge'
|
|
fgRemoveRefPred(curEdge);
|
|
}
|
|
}
|
|
|
|
assert(foundVal);
|
|
#ifdef DEBUG
|
|
if (verbose)
|
|
{
|
|
printf("\nConditional folded at " FMT_BB "\n", block->bbNum);
|
|
printf(FMT_BB " becomes a %s", block->bbNum, "BBJ_ALWAYS");
|
|
printf(" to " FMT_BB, block->GetTarget()->bbNum);
|
|
printf("\n");
|
|
}
|
|
#endif
|
|
}
|
|
}
|
|
return result;
|
|
}
|
|
|
|
//------------------------------------------------------------------------
|
|
// fgMorphBlockStmt: morph a single statement in a block.
|
|
//
|
|
// Arguments:
|
|
// block - block containing the statement
|
|
// stmt - statement to morph
|
|
// msg - string to identify caller in a dump
|
|
//
|
|
// Returns:
|
|
// true if 'stmt' was removed from the block.
|
|
// false if 'stmt' is still in the block (even if other statements were removed).
|
|
//
|
|
// Notes:
|
|
// Can be called anytime, unlike fgMorphStmts() which should only be called once.
|
|
//
|
|
bool Compiler::fgMorphBlockStmt(BasicBlock* block, Statement* stmt DEBUGARG(const char* msg))
|
|
{
|
|
assert(block != nullptr);
|
|
assert(stmt != nullptr);
|
|
|
|
// Reset some ambient state
|
|
fgRemoveRestOfBlock = false;
|
|
compCurBB = block;
|
|
compCurStmt = stmt;
|
|
|
|
GenTree* morph = fgMorphTree(stmt->GetRootNode());
|
|
|
|
// Bug 1106830 - During the CSE phase we can't just remove
|
|
// morph->AsOp()->gtOp2 as it could contain CSE expressions.
|
|
// This leads to a noway_assert in OptCSE.cpp when
|
|
// searching for the removed CSE ref. (using gtFindLink)
|
|
//
|
|
if (!optValnumCSE_phase)
|
|
{
|
|
// Check for morph as a GT_COMMA with an unconditional throw
|
|
if (fgIsCommaThrow(morph, true))
|
|
{
|
|
#ifdef DEBUG
|
|
if (verbose)
|
|
{
|
|
printf("Folding a top-level fgIsCommaThrow stmt\n");
|
|
printf("Removing op2 as unreachable:\n");
|
|
gtDispTree(morph->AsOp()->gtOp2);
|
|
printf("\n");
|
|
}
|
|
#endif
|
|
// Use the call as the new stmt
|
|
morph = morph->AsOp()->gtOp1;
|
|
noway_assert(morph->gtOper == GT_CALL);
|
|
}
|
|
|
|
// we can get a throw as a statement root
|
|
if (fgIsThrow(morph))
|
|
{
|
|
#ifdef DEBUG
|
|
if (verbose)
|
|
{
|
|
printf("We have a top-level fgIsThrow stmt\n");
|
|
printf("Removing the rest of block as unreachable:\n");
|
|
}
|
|
#endif
|
|
noway_assert((morph->gtFlags & GTF_COLON_COND) == 0);
|
|
fgRemoveRestOfBlock = true;
|
|
}
|
|
}
|
|
|
|
stmt->SetRootNode(morph);
|
|
|
|
// Can the entire tree be removed?
|
|
bool removedStmt = false;
|
|
|
|
// Defer removing statements during CSE so we don't inadvertently remove any CSE defs.
|
|
if (!optValnumCSE_phase)
|
|
{
|
|
removedStmt = fgCheckRemoveStmt(block, stmt);
|
|
}
|
|
|
|
// Or this is the last statement of a conditional branch that was just folded?
|
|
if (!removedStmt && (stmt->GetNextStmt() == nullptr) && !fgRemoveRestOfBlock)
|
|
{
|
|
FoldResult const fr = fgFoldConditional(block);
|
|
removedStmt = (fr == FoldResult::FOLD_REMOVED_LAST_STMT);
|
|
}
|
|
|
|
if (!removedStmt)
|
|
{
|
|
// Have to re-do the evaluation order since for example some later code does not expect constants as op1
|
|
gtSetStmtInfo(stmt);
|
|
|
|
// This may be called both when the nodes are linked and when they aren't.
|
|
if (fgNodeThreading == NodeThreading::AllTrees)
|
|
{
|
|
fgSetStmtSeq(stmt);
|
|
}
|
|
}
|
|
|
|
#ifdef DEBUG
|
|
if (verbose)
|
|
{
|
|
printf("%s %s tree:\n", msg, (removedStmt ? "removed" : "morphed"));
|
|
gtDispTree(morph);
|
|
printf("\n");
|
|
}
|
|
#endif
|
|
|
|
if (fgRemoveRestOfBlock)
|
|
{
|
|
// Remove the rest of the stmts in the block
|
|
for (Statement* removeStmt : StatementList(stmt->GetNextStmt()))
|
|
{
|
|
fgRemoveStmt(block, removeStmt);
|
|
}
|
|
|
|
// The rest of block has been removed and we will always throw an exception.
|
|
//
|
|
// For compDbgCode, we prepend an empty BB as the firstBB, it is BBJ_ALWAYS.
|
|
// We should not convert it to a ThrowBB.
|
|
if ((block != fgFirstBB) || !fgFirstBB->HasFlag(BBF_INTERNAL))
|
|
{
|
|
// Convert block to a throw bb
|
|
fgConvertBBToThrowBB(block);
|
|
}
|
|
|
|
#ifdef DEBUG
|
|
if (verbose)
|
|
{
|
|
printf("\n%s Block " FMT_BB " becomes a throw block.\n", msg, block->bbNum);
|
|
}
|
|
#endif
|
|
fgRemoveRestOfBlock = false;
|
|
}
|
|
|
|
return removedStmt;
|
|
}
|
|
|
|
//------------------------------------------------------------------------
|
|
// fgMorphStmtBlockOps: Morph all block ops in the specified statement.
|
|
//
|
|
// Arguments:
|
|
// stmt - the statement
|
|
//
|
|
void Compiler::fgMorphStmtBlockOps(BasicBlock* block, Statement* stmt)
|
|
{
|
|
struct Visitor : GenTreeVisitor<Visitor>
|
|
{
|
|
enum
|
|
{
|
|
DoPostOrder = true,
|
|
};
|
|
|
|
Visitor(Compiler* comp)
|
|
: GenTreeVisitor(comp)
|
|
{
|
|
}
|
|
|
|
fgWalkResult PostOrderVisit(GenTree** use, GenTree* user)
|
|
{
|
|
if ((*use)->OperIsBlkOp())
|
|
{
|
|
if ((*use)->OperIsInitBlkOp())
|
|
{
|
|
*use = m_compiler->fgMorphInitBlock(*use);
|
|
}
|
|
else
|
|
{
|
|
*use = m_compiler->fgMorphCopyBlock(*use);
|
|
}
|
|
}
|
|
|
|
return WALK_CONTINUE;
|
|
}
|
|
};
|
|
|
|
compCurBB = block;
|
|
compCurStmt = stmt;
|
|
Visitor visitor(this);
|
|
visitor.WalkTree(stmt->GetRootNodePointer(), nullptr);
|
|
|
|
gtSetStmtInfo(stmt);
|
|
|
|
if (fgNodeThreading == NodeThreading::AllTrees)
|
|
{
|
|
fgSetStmtSeq(stmt);
|
|
}
|
|
}
|
|
|
|
//------------------------------------------------------------------------
|
|
// fgMorphStmts: Morph all statements in a block
|
|
//
|
|
// Arguments:
|
|
// block - block in question
|
|
//
|
|
void Compiler::fgMorphStmts(BasicBlock* block)
|
|
{
|
|
fgRemoveRestOfBlock = false;
|
|
|
|
for (Statement* const stmt : block->Statements())
|
|
{
|
|
if (fgRemoveRestOfBlock)
|
|
{
|
|
fgRemoveStmt(block, stmt);
|
|
continue;
|
|
}
|
|
|
|
fgMorphStmt = stmt;
|
|
compCurStmt = stmt;
|
|
GenTree* oldTree = stmt->GetRootNode();
|
|
|
|
#ifdef DEBUG
|
|
|
|
unsigned oldHash = verbose ? gtHashValue(oldTree) : DUMMY_INIT(~0);
|
|
|
|
if (verbose)
|
|
{
|
|
printf("\nfgMorphTree " FMT_BB ", " FMT_STMT " (before)\n", block->bbNum, stmt->GetID());
|
|
gtDispTree(oldTree);
|
|
}
|
|
#endif
|
|
|
|
/* Morph this statement tree */
|
|
|
|
GenTree* morphedTree = fgMorphTree(oldTree);
|
|
|
|
// Has fgMorphStmt been sneakily changed ?
|
|
|
|
if ((stmt->GetRootNode() != oldTree) || (block != compCurBB))
|
|
{
|
|
if (stmt->GetRootNode() != oldTree)
|
|
{
|
|
/* This must be tailcall. Ignore 'morphedTree' and carry on with
|
|
the tail-call node */
|
|
|
|
morphedTree = stmt->GetRootNode();
|
|
}
|
|
else
|
|
{
|
|
/* This must be a tailcall that caused a GCPoll to get
|
|
injected. We haven't actually morphed the call yet
|
|
but the flag still got set, clear it here... */
|
|
|
|
#ifdef DEBUG
|
|
morphedTree->gtDebugFlags &= ~GTF_DEBUG_NODE_MORPHED;
|
|
#endif
|
|
}
|
|
|
|
noway_assert(compTailCallUsed);
|
|
noway_assert(morphedTree->gtOper == GT_CALL);
|
|
GenTreeCall* call = morphedTree->AsCall();
|
|
// Could be
|
|
// - a fast call made as jmp in which case block will be ending with
|
|
// BBJ_RETURN (as we need epilog) and marked as containing a jmp.
|
|
// - a tailcall dispatched via JIT helper, on x86, in which case
|
|
// block will be ending with BBJ_THROW.
|
|
// - a tail call dispatched via runtime help (IL stubs), in which
|
|
// case there will not be any tailcall and the block will be ending
|
|
// with BBJ_RETURN (as normal control flow)
|
|
noway_assert((call->IsFastTailCall() && compCurBB->KindIs(BBJ_RETURN) && compCurBB->HasFlag(BBF_HAS_JMP)) ||
|
|
(call->IsTailCallViaJitHelper() && compCurBB->KindIs(BBJ_THROW)) ||
|
|
(!call->IsTailCall() && compCurBB->KindIs(BBJ_RETURN)));
|
|
}
|
|
|
|
#ifdef DEBUG
|
|
if (compStressCompile(STRESS_CLONE_EXPR, 30))
|
|
{
|
|
// Clone all the trees to stress gtCloneExpr()
|
|
|
|
if (verbose)
|
|
{
|
|
printf("\nfgMorphTree (stressClone from):\n");
|
|
gtDispTree(morphedTree);
|
|
}
|
|
|
|
morphedTree = gtCloneExpr(morphedTree);
|
|
noway_assert(morphedTree != nullptr);
|
|
|
|
if (verbose)
|
|
{
|
|
printf("\nfgMorphTree (stressClone to):\n");
|
|
gtDispTree(morphedTree);
|
|
}
|
|
}
|
|
|
|
/* If the hash value changes. we modified the tree during morphing */
|
|
if (verbose)
|
|
{
|
|
unsigned newHash = gtHashValue(morphedTree);
|
|
if (newHash != oldHash)
|
|
{
|
|
printf("\nfgMorphTree " FMT_BB ", " FMT_STMT " (after)\n", block->bbNum, stmt->GetID());
|
|
gtDispTree(morphedTree);
|
|
}
|
|
}
|
|
#endif
|
|
|
|
/* Check for morphedTree as a GT_COMMA with an unconditional throw */
|
|
if (!gtIsActiveCSE_Candidate(morphedTree) && fgIsCommaThrow(morphedTree, true))
|
|
{
|
|
/* Use the call as the new stmt */
|
|
morphedTree = morphedTree->AsOp()->gtOp1;
|
|
noway_assert(morphedTree->gtOper == GT_CALL);
|
|
noway_assert((morphedTree->gtFlags & GTF_COLON_COND) == 0);
|
|
|
|
fgRemoveRestOfBlock = true;
|
|
}
|
|
|
|
stmt->SetRootNode(morphedTree);
|
|
|
|
if (fgRemoveRestOfBlock)
|
|
{
|
|
continue;
|
|
}
|
|
|
|
/* Has the statement been optimized away */
|
|
|
|
if (fgCheckRemoveStmt(block, stmt))
|
|
{
|
|
continue;
|
|
}
|
|
|
|
/* Check if this block ends with a conditional branch that can be folded */
|
|
|
|
if (fgFoldConditional(block) != FoldResult::FOLD_DID_NOTHING)
|
|
{
|
|
continue;
|
|
}
|
|
|
|
if (ehBlockHasExnFlowDsc(block))
|
|
{
|
|
continue;
|
|
}
|
|
}
|
|
|
|
if (fgRemoveRestOfBlock)
|
|
{
|
|
if (block->KindIs(BBJ_COND, BBJ_SWITCH))
|
|
{
|
|
Statement* first = block->firstStmt();
|
|
noway_assert(first);
|
|
Statement* lastStmt = block->lastStmt();
|
|
noway_assert(lastStmt && lastStmt->GetNextStmt() == nullptr);
|
|
GenTree* last = lastStmt->GetRootNode();
|
|
|
|
if ((block->KindIs(BBJ_COND) && (last->gtOper == GT_JTRUE)) ||
|
|
(block->KindIs(BBJ_SWITCH) && (last->gtOper == GT_SWITCH)))
|
|
{
|
|
GenTree* op1 = last->AsOp()->gtOp1;
|
|
|
|
if (op1->OperIsCompare())
|
|
{
|
|
/* Unmark the comparison node with GTF_RELOP_JMP_USED */
|
|
op1->gtFlags &= ~GTF_RELOP_JMP_USED;
|
|
}
|
|
|
|
lastStmt->SetRootNode(fgMorphTree(op1));
|
|
}
|
|
}
|
|
|
|
/* Mark block as a BBJ_THROW block */
|
|
fgConvertBBToThrowBB(block);
|
|
}
|
|
|
|
#if FEATURE_FASTTAILCALL
|
|
GenTree* recursiveTailCall = nullptr;
|
|
if (block->endsWithTailCallConvertibleToLoop(this, &recursiveTailCall))
|
|
{
|
|
fgMorphRecursiveFastTailCallIntoLoop(block, recursiveTailCall->AsCall());
|
|
}
|
|
#endif
|
|
|
|
// Reset this back so that it doesn't leak out impacting other blocks
|
|
fgRemoveRestOfBlock = false;
|
|
}
|
|
|
|
//------------------------------------------------------------------------
|
|
// fgMorphBlock: Morph a basic block
|
|
//
|
|
// Arguments:
|
|
// block - block in question
|
|
//
|
|
void Compiler::fgMorphBlock(BasicBlock* block)
|
|
{
|
|
JITDUMP("\nMorphing " FMT_BB "\n", block->bbNum);
|
|
|
|
if (optLocalAssertionProp)
|
|
{
|
|
if (!optCrossBlockLocalAssertionProp)
|
|
{
|
|
// Each block starts with an empty table, and no available assertions
|
|
//
|
|
optAssertionReset(0);
|
|
apLocal = BitVecOps::MakeEmpty(apTraits);
|
|
}
|
|
else
|
|
{
|
|
// Determine if this block can leverage assertions from its pred blocks.
|
|
//
|
|
// Some blocks are ineligible.
|
|
//
|
|
bool canUsePredAssertions = !block->HasFlag(BBF_CAN_ADD_PRED) && !bbIsHandlerBeg(block);
|
|
|
|
// Validate all preds have valid info
|
|
//
|
|
if (!canUsePredAssertions)
|
|
{
|
|
JITDUMP(FMT_BB " ineligible for cross-block\n", block->bbNum);
|
|
}
|
|
else
|
|
{
|
|
bool hasPredAssertions = false;
|
|
|
|
for (BasicBlock* const pred : block->PredBlocks())
|
|
{
|
|
assert(m_dfsTree->Contains(pred)); // We should have removed dead blocks before this.
|
|
|
|
// A smaller pred postorder number means the pred appears later in the reverse postorder.
|
|
// An equal number means pred == block (block is a self-loop).
|
|
// Either way the assertion info is not available, and we must assume the worst.
|
|
//
|
|
if (pred->bbPostorderNum <= block->bbPostorderNum)
|
|
{
|
|
JITDUMP(FMT_BB " pred " FMT_BB " not processed; clearing assertions in\n", block->bbNum,
|
|
pred->bbNum);
|
|
hasPredAssertions = false;
|
|
break;
|
|
}
|
|
|
|
// Yes, pred assertions are available.
|
|
// If the pred is (a non-degenerate) BBJ_COND, fetch the appropriate out set.
|
|
//
|
|
ASSERT_TP assertionsOut;
|
|
const bool useCondAssertions = pred->KindIs(BBJ_COND) && (pred->NumSucc() == 2);
|
|
|
|
if (useCondAssertions)
|
|
{
|
|
if (block == pred->GetTrueTarget())
|
|
{
|
|
JITDUMP("Using `if true` assertions from pred " FMT_BB "\n", pred->bbNum);
|
|
assertionsOut = pred->bbAssertionOutIfTrue;
|
|
}
|
|
else
|
|
{
|
|
assert(block == pred->GetFalseTarget());
|
|
JITDUMP("Using `if false` assertions from pred " FMT_BB "\n", pred->bbNum);
|
|
assertionsOut = pred->bbAssertionOutIfFalse;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
assertionsOut = pred->bbAssertionOut;
|
|
}
|
|
|
|
// If this is the first pred, copy (or share, when block is the only successor).
|
|
// If this is a subsequent pred, intersect.
|
|
//
|
|
if (!hasPredAssertions)
|
|
{
|
|
if (pred->NumSucc() == 1)
|
|
{
|
|
apLocal = assertionsOut;
|
|
}
|
|
else
|
|
{
|
|
apLocal = BitVecOps::MakeCopy(apTraits, assertionsOut);
|
|
}
|
|
hasPredAssertions = true;
|
|
}
|
|
else
|
|
{
|
|
BitVecOps::IntersectionD(apTraits, apLocal, assertionsOut);
|
|
}
|
|
}
|
|
|
|
if (!hasPredAssertions)
|
|
{
|
|
// Either no preds, or some preds w/o assertions.
|
|
//
|
|
canUsePredAssertions = false;
|
|
}
|
|
}
|
|
|
|
if (!canUsePredAssertions)
|
|
{
|
|
apLocal = BitVecOps::MakeEmpty(apTraits);
|
|
}
|
|
|
|
JITDUMPEXEC(optDumpAssertionIndices("Assertions in: ", apLocal));
|
|
}
|
|
}
|
|
|
|
// Make the current basic block address available globally.
|
|
compCurBB = block;
|
|
|
|
// Process all statement trees in the basic block.
|
|
fgMorphStmts(block);
|
|
|
|
// Do we need to merge the result of this block into a single return block?
|
|
if (block->KindIs(BBJ_RETURN) && !block->HasFlag(BBF_HAS_JMP))
|
|
{
|
|
if ((genReturnBB != nullptr) && (genReturnBB != block))
|
|
{
|
|
fgMergeBlockReturn(block);
|
|
}
|
|
}
|
|
|
|
// Publish the live out state.
|
|
//
|
|
if (optCrossBlockLocalAssertionProp && (block->NumSucc() > 0))
|
|
{
|
|
assert(optLocalAssertionProp);
|
|
|
|
if (block->KindIs(BBJ_COND))
|
|
{
|
|
// We don't need to make a copy of the if true set; this BV
|
|
// was freshly copied in fgAssertionGen
|
|
//
|
|
block->bbAssertionOutIfTrue = apLocalIfTrue;
|
|
block->bbAssertionOutIfFalse = BitVecOps::MakeCopy(apTraits, apLocal);
|
|
}
|
|
else
|
|
{
|
|
block->bbAssertionOut = BitVecOps::MakeCopy(apTraits, apLocal);
|
|
}
|
|
}
|
|
|
|
compCurBB = nullptr;
|
|
}
|
|
|
|
//------------------------------------------------------------------------
|
|
// fgMorphBlocks: Morph all blocks in the method
|
|
//
|
|
// Returns:
|
|
// Suitable phase status.
|
|
//
|
|
// Note:
|
|
// Morph almost always changes IR, so we don't actually bother to
|
|
// track if it made any changes.
|
|
//
|
|
PhaseStatus Compiler::fgMorphBlocks()
|
|
{
|
|
// This is the one and only global morph phase
|
|
//
|
|
fgGlobalMorph = true;
|
|
|
|
if (opts.OptimizationEnabled())
|
|
{
|
|
// Local assertion prop is enabled if we are optimizing.
|
|
//
|
|
optAssertionInit(/* isLocalProp*/ true);
|
|
}
|
|
else
|
|
{
|
|
// Not optimizing. No assertion prop.
|
|
//
|
|
optLocalAssertionProp = false;
|
|
optCrossBlockLocalAssertionProp = false;
|
|
}
|
|
|
|
if (!compEnregLocals())
|
|
{
|
|
// Morph is checking if lvDoNotEnregister is already set for some optimizations.
|
|
// If we are running without `CLFLG_REGVAR` flag set (`compEnregLocals() == false`)
|
|
// then we already know that we won't enregister any locals and it is better to set
|
|
// this flag before we start reading it.
|
|
// The main reason why this flag is not set is that we are running in minOpts.
|
|
lvSetMinOptsDoNotEnreg();
|
|
}
|
|
|
|
// Ensure the first BB is scratch if we might need it as a pred for
|
|
// the recursive tail call to loop optimization.
|
|
//
|
|
if (doesMethodHaveRecursiveTailcall())
|
|
{
|
|
fgEnsureFirstBBisScratch();
|
|
}
|
|
|
|
// Morph all blocks.
|
|
//
|
|
if (!optLocalAssertionProp)
|
|
{
|
|
// If we aren't optimizing, we just morph in normal bbNext order.
|
|
//
|
|
for (BasicBlock* block : Blocks())
|
|
{
|
|
fgMorphBlock(block);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// Disallow general creation of new blocks or edges as it
|
|
// would invalidate RPO.
|
|
//
|
|
// Removal of edges, or altering dup counts, is OK.
|
|
//
|
|
INDEBUG(fgSafeBasicBlockCreation = false;);
|
|
INDEBUG(fgSafeFlowEdgeCreation = false;);
|
|
|
|
// Allow edge creation to genReturnBB (target of return merging)
|
|
// and the scratch block successor (target for tail call to loop).
|
|
// This will also disallow dataflow into these blocks.
|
|
//
|
|
if (genReturnBB != nullptr)
|
|
{
|
|
genReturnBB->SetFlags(BBF_CAN_ADD_PRED);
|
|
}
|
|
if (fgFirstBBisScratch())
|
|
{
|
|
fgFirstBB->Next()->SetFlags(BBF_CAN_ADD_PRED);
|
|
}
|
|
|
|
// Remember this so we can sanity check that no new blocks will get created.
|
|
//
|
|
unsigned const bbNumMax = fgBBNumMax;
|
|
|
|
// Morph the blocks in RPO.
|
|
//
|
|
for (unsigned i = m_dfsTree->GetPostOrderCount(); i != 0; i--)
|
|
{
|
|
BasicBlock* const block = m_dfsTree->GetPostOrder(i - 1);
|
|
fgMorphBlock(block);
|
|
}
|
|
assert(bbNumMax == fgBBNumMax);
|
|
|
|
// Re-enable block and edge creation, and revoke
|
|
// special treatment of genReturnBB and the "first" bb
|
|
//
|
|
INDEBUG(fgSafeBasicBlockCreation = true;);
|
|
INDEBUG(fgSafeFlowEdgeCreation = true;);
|
|
|
|
if (genReturnBB != nullptr)
|
|
{
|
|
genReturnBB->RemoveFlags(BBF_CAN_ADD_PRED);
|
|
}
|
|
if (fgFirstBBisScratch())
|
|
{
|
|
fgFirstBB->Next()->RemoveFlags(BBF_CAN_ADD_PRED);
|
|
}
|
|
}
|
|
|
|
// Under OSR, we no longer need to specially protect the original method entry
|
|
//
|
|
if (opts.IsOSR() && (fgEntryBB != nullptr))
|
|
{
|
|
JITDUMP("OSR: un-protecting original method entry " FMT_BB "\n", fgEntryBB->bbNum);
|
|
assert(fgEntryBBExtraRefs == 1);
|
|
assert(fgEntryBB->bbRefs >= 1);
|
|
fgEntryBB->bbRefs--;
|
|
fgEntryBBExtraRefs = 0;
|
|
|
|
// We don't need to remember this block anymore.
|
|
fgEntryBB = nullptr;
|
|
}
|
|
|
|
// We don't maintain `genReturnBB` after this point.
|
|
if (genReturnBB != nullptr)
|
|
{
|
|
// It no longer needs special "keep" treatment.
|
|
genReturnBB->RemoveFlags(BBF_DONT_REMOVE);
|
|
genReturnBB = nullptr;
|
|
}
|
|
|
|
// We are done with the global morphing phase
|
|
//
|
|
fgInvalidateDfsTree();
|
|
fgGlobalMorph = false;
|
|
fgGlobalMorphDone = true;
|
|
compCurBB = nullptr;
|
|
|
|
#ifdef DEBUG
|
|
if (optLocalAssertionProp)
|
|
{
|
|
JITDUMP("morph assertion stats: %u table size, %u assertions, %u dropped\n", optMaxAssertionCount,
|
|
optAssertionCount, optAssertionOverflow);
|
|
}
|
|
#endif
|
|
|
|
return PhaseStatus::MODIFIED_EVERYTHING;
|
|
}
|
|
|
|
//------------------------------------------------------------------------
|
|
// fgMergeBlockReturn: assign the block return value (if any) into the single return temp
|
|
// and branch to the single return block.
|
|
//
|
|
// Arguments:
|
|
// block - the block to process.
|
|
//
|
|
// Notes:
|
|
// A block is not guaranteed to have a last stmt if its jump kind is BBJ_RETURN.
|
|
// For example a method returning void could have an empty block with jump kind BBJ_RETURN.
|
|
// Such blocks do materialize as part of in-lining.
|
|
//
|
|
// A block with jump kind BBJ_RETURN does not necessarily need to end with GT_RETURN.
|
|
// It could end with a tail call or rejected tail call or monitor.exit or a GT_INTRINSIC.
|
|
// For now it is safe to explicitly check whether last stmt is GT_RETURN if genReturnLocal
|
|
// is BAD_VAR_NUM.
|
|
//
|
|
void Compiler::fgMergeBlockReturn(BasicBlock* block)
|
|
{
|
|
assert(block->KindIs(BBJ_RETURN) && !block->HasFlag(BBF_HAS_JMP));
|
|
assert((genReturnBB != nullptr) && (genReturnBB != block));
|
|
|
|
// TODO: Need to characterize the last top level stmt of a block ending with BBJ_RETURN.
|
|
|
|
Statement* lastStmt = block->lastStmt();
|
|
GenTree* ret = (lastStmt != nullptr) ? lastStmt->GetRootNode() : nullptr;
|
|
|
|
if ((ret != nullptr) && ret->OperIs(GT_RETURN, GT_SWIFT_ERROR_RET) && ((ret->gtFlags & GTF_RET_MERGED) != 0))
|
|
{
|
|
// This return was generated during epilog merging, so leave it alone
|
|
}
|
|
else
|
|
{
|
|
// We'll jump to the genReturnBB.
|
|
|
|
#if !defined(TARGET_X86)
|
|
if (info.compFlags & CORINFO_FLG_SYNCH)
|
|
{
|
|
fgConvertSyncReturnToLeave(block);
|
|
}
|
|
else
|
|
#endif // !TARGET_X86
|
|
{
|
|
FlowEdge* const newEdge = fgAddRefPred(genReturnBB, block);
|
|
block->SetKindAndTargetEdge(BBJ_ALWAYS, newEdge);
|
|
fgReturnCount--;
|
|
}
|
|
|
|
#ifdef SWIFT_SUPPORT
|
|
// If merging GT_SWIFT_ERROR_RET nodes, ensure the error operand is stored to the merged return error local,
|
|
// so the correct error value is retrieved in the merged return block.
|
|
if ((ret != nullptr) && ret->OperIs(GT_SWIFT_ERROR_RET))
|
|
{
|
|
assert(genReturnErrorLocal != BAD_VAR_NUM);
|
|
const DebugInfo& di = lastStmt->GetDebugInfo();
|
|
GenTree* swiftErrorStore = gtNewTempStore(genReturnErrorLocal, ret->gtGetOp1());
|
|
Statement* const newStmt = gtNewStmt(swiftErrorStore, di);
|
|
fgInsertStmtBefore(block, lastStmt, newStmt);
|
|
}
|
|
#endif // SWIFT_SUPPORT
|
|
|
|
if (genReturnLocal != BAD_VAR_NUM)
|
|
{
|
|
// replace the GT_RETURN/GT_SWIFT_ERROR_RET node to be a STORE_LCL_VAR that stores the return value into
|
|
// genReturnLocal.
|
|
|
|
// Method must be returning a value other than TYP_VOID.
|
|
noway_assert(compMethodHasRetVal());
|
|
|
|
// This block must be ending with a GT_RETURN/GT_SWIFT_ERROR_RET
|
|
noway_assert(lastStmt != nullptr);
|
|
noway_assert(lastStmt->GetNextStmt() == nullptr);
|
|
noway_assert(ret != nullptr);
|
|
|
|
// Return node must have non-null operand as the method is returning the value assigned to
|
|
// genReturnLocal
|
|
GenTree* const retVal = ret->AsOp()->GetReturnValue();
|
|
noway_assert(retVal != nullptr);
|
|
|
|
Statement* pAfterStatement = lastStmt;
|
|
const DebugInfo& di = lastStmt->GetDebugInfo();
|
|
GenTree* tree = gtNewTempStore(genReturnLocal, retVal, CHECK_SPILL_NONE, &pAfterStatement, di, block);
|
|
if (tree->OperIsCopyBlkOp())
|
|
{
|
|
tree = fgMorphCopyBlock(tree);
|
|
}
|
|
else if (tree->OperIsInitBlkOp())
|
|
{
|
|
tree = fgMorphInitBlock(tree);
|
|
}
|
|
|
|
if (pAfterStatement == lastStmt)
|
|
{
|
|
lastStmt->SetRootNode(tree);
|
|
}
|
|
else
|
|
{
|
|
// gtNewTempStore inserted additional statements after last
|
|
fgRemoveStmt(block, lastStmt);
|
|
Statement* newStmt = gtNewStmt(tree, di);
|
|
fgInsertStmtAfter(block, pAfterStatement, newStmt);
|
|
lastStmt = newStmt;
|
|
}
|
|
}
|
|
else if ((ret != nullptr) && ret->OperIs(GT_RETURN, GT_SWIFT_ERROR_RET))
|
|
{
|
|
// This block ends with a GT_RETURN/GT_SWIFT_ERROR_RET
|
|
noway_assert(lastStmt != nullptr);
|
|
noway_assert(lastStmt->GetNextStmt() == nullptr);
|
|
|
|
// Must be a void return node with null operand; delete it as this block branches to
|
|
// oneReturn block
|
|
GenTree* const retVal = ret->AsOp()->GetReturnValue();
|
|
noway_assert(ret->TypeGet() == TYP_VOID);
|
|
noway_assert(retVal == nullptr);
|
|
|
|
if (opts.compDbgCode && lastStmt->GetDebugInfo().IsValid())
|
|
{
|
|
// We can't remove the return as it might remove a sequence point. Convert it to a NOP.
|
|
ret->gtBashToNOP();
|
|
}
|
|
else
|
|
{
|
|
fgRemoveStmt(block, lastStmt);
|
|
}
|
|
}
|
|
|
|
JITDUMP("\nUpdate " FMT_BB " to jump to common return block.\n", block->bbNum);
|
|
DISPBLOCK(block);
|
|
|
|
if (block->hasProfileWeight())
|
|
{
|
|
weight_t const oldWeight = genReturnBB->hasProfileWeight() ? genReturnBB->bbWeight : BB_ZERO_WEIGHT;
|
|
weight_t const newWeight = oldWeight + block->bbWeight;
|
|
|
|
JITDUMP("merging profile weight " FMT_WT " from " FMT_BB " to common return " FMT_BB "\n", block->bbWeight,
|
|
block->bbNum, genReturnBB->bbNum);
|
|
|
|
genReturnBB->setBBProfileWeight(newWeight);
|
|
DISPBLOCK(genReturnBB);
|
|
}
|
|
}
|
|
}
|
|
|
|
/*****************************************************************************
|
|
*
|
|
* Make some decisions about the kind of code to generate.
|
|
*/
|
|
|
|
void Compiler::fgSetOptions()
|
|
{
|
|
#ifdef DEBUG
|
|
/* Should we force fully interruptible code ? */
|
|
if (JitConfig.JitFullyInt() || compStressCompile(STRESS_GENERIC_VARN, 30))
|
|
{
|
|
noway_assert(!codeGen->isGCTypeFixed());
|
|
SetInterruptible(true);
|
|
}
|
|
#endif
|
|
|
|
if (opts.compDbgCode)
|
|
{
|
|
assert(!codeGen->isGCTypeFixed());
|
|
SetInterruptible(true); // debugging is easier this way ...
|
|
}
|
|
|
|
/* Assume we won't need an explicit stack frame if this is allowed */
|
|
|
|
if (compLocallocUsed)
|
|
{
|
|
codeGen->setFramePointerRequired(true);
|
|
}
|
|
|
|
#ifdef TARGET_X86
|
|
|
|
if (compTailCallUsed)
|
|
codeGen->setFramePointerRequired(true);
|
|
|
|
#endif // TARGET_X86
|
|
|
|
if (!opts.genFPopt)
|
|
{
|
|
codeGen->setFramePointerRequired(true);
|
|
}
|
|
|
|
// Assert that the EH table has been initialized by now. Note that
|
|
// compHndBBtabAllocCount never decreases; it is a high-water mark
|
|
// of table allocation. In contrast, compHndBBtabCount does shrink
|
|
// if we delete a dead EH region, and if it shrinks to zero, the
|
|
// table pointer compHndBBtab is unreliable.
|
|
assert(compHndBBtabAllocCount >= info.compXcptnsCount);
|
|
|
|
#ifdef TARGET_X86
|
|
|
|
// Note: this case, and the !X86 case below, should both use the
|
|
// !X86 path. This would require a few more changes for X86 to use
|
|
// compHndBBtabCount (the current number of EH clauses) instead of
|
|
// info.compXcptnsCount (the number of EH clauses in IL), such as
|
|
// in ehNeedsShadowSPslots(). This is because sometimes the IL has
|
|
// an EH clause that we delete as statically dead code before we
|
|
// get here, leaving no EH clauses left, and thus no requirement
|
|
// to use a frame pointer because of EH. But until all the code uses
|
|
// the same test, leave info.compXcptnsCount here.
|
|
if (info.compXcptnsCount > 0)
|
|
{
|
|
codeGen->setFramePointerRequiredEH(true);
|
|
|
|
if (UsesFunclets())
|
|
{
|
|
assert(!codeGen->isGCTypeFixed());
|
|
// Enforce fully interruptible codegen for funclet unwinding
|
|
SetInterruptible(true);
|
|
}
|
|
}
|
|
|
|
#else // !TARGET_X86
|
|
|
|
if (compHndBBtabCount > 0)
|
|
{
|
|
codeGen->setFramePointerRequiredEH(true);
|
|
}
|
|
|
|
#endif // TARGET_X86
|
|
|
|
if (compMethodRequiresPInvokeFrame())
|
|
{
|
|
codeGen->setFramePointerRequired(true); // Setup of Pinvoke frame currently requires an EBP style frame
|
|
}
|
|
|
|
if (info.compPublishStubParam)
|
|
{
|
|
codeGen->setFramePointerRequiredGCInfo(true);
|
|
}
|
|
|
|
if (compIsProfilerHookNeeded())
|
|
{
|
|
codeGen->setFramePointerRequired(true);
|
|
}
|
|
|
|
if (info.compIsVarArgs)
|
|
{
|
|
// Code that initializes lvaVarargsBaseOfStkArgs requires this to be EBP relative.
|
|
codeGen->setFramePointerRequiredGCInfo(true);
|
|
}
|
|
|
|
if (lvaReportParamTypeArg())
|
|
{
|
|
codeGen->setFramePointerRequiredGCInfo(true);
|
|
}
|
|
|
|
// printf("method will %s be fully interruptible\n", GetInterruptible() ? " " : "not");
|
|
}
|
|
|
|
/*****************************************************************************/
|
|
|
|
GenTree* Compiler::fgInitThisClass()
|
|
{
|
|
noway_assert(!compIsForInlining());
|
|
|
|
CORINFO_LOOKUP_KIND kind;
|
|
info.compCompHnd->getLocationOfThisType(info.compMethodHnd, &kind);
|
|
|
|
if (!kind.needsRuntimeLookup)
|
|
{
|
|
return fgGetSharedCCtor(info.compClassHnd);
|
|
}
|
|
else
|
|
{
|
|
#ifdef FEATURE_READYTORUN
|
|
// Only NativeAOT understands CORINFO_HELP_READYTORUN_GENERIC_STATIC_BASE. Don't do this on CoreCLR.
|
|
if (opts.IsReadyToRun() && IsTargetAbi(CORINFO_NATIVEAOT_ABI))
|
|
{
|
|
CORINFO_RESOLVED_TOKEN resolvedToken;
|
|
memset(&resolvedToken, 0, sizeof(resolvedToken));
|
|
|
|
// We are in a shared method body, but maybe we don't need a runtime lookup after all.
|
|
// This covers the case of a generic method on a non-generic type.
|
|
if (!(info.compClassAttr & CORINFO_FLG_SHAREDINST))
|
|
{
|
|
resolvedToken.hClass = info.compClassHnd;
|
|
fgSetPreferredInitCctor();
|
|
return impReadyToRunHelperToTree(&resolvedToken, m_preferredInitCctor, TYP_BYREF);
|
|
}
|
|
|
|
// We need a runtime lookup.
|
|
GenTree* ctxTree = getRuntimeContextTree(kind.runtimeLookupKind);
|
|
|
|
// CORINFO_HELP_READYTORUN_GENERIC_STATIC_BASE with a zeroed out resolvedToken means "get the static
|
|
// base of the class that owns the method being compiled". If we're in this method, it means we're not
|
|
// inlining and there's no ambiguity.
|
|
return impReadyToRunHelperToTree(&resolvedToken, CORINFO_HELP_READYTORUN_GENERIC_STATIC_BASE, TYP_BYREF,
|
|
&kind, ctxTree);
|
|
}
|
|
#endif
|
|
|
|
// Collectible types requires that for shared generic code, if we use the generic context parameter
|
|
// that we report it. (This is a conservative approach, we could detect some cases particularly when the
|
|
// context parameter is this that we don't need the eager reporting logic.)
|
|
lvaGenericsContextInUse = true;
|
|
|
|
switch (kind.runtimeLookupKind)
|
|
{
|
|
case CORINFO_LOOKUP_THISOBJ:
|
|
{
|
|
// This code takes a this pointer; but we need to pass the static method desc to get the right point in
|
|
// the hierarchy
|
|
GenTree* vtTree = gtNewLclvNode(info.compThisArg, TYP_REF);
|
|
vtTree->gtFlags |= GTF_VAR_CONTEXT;
|
|
// Vtable pointer of this object
|
|
vtTree = gtNewMethodTableLookup(vtTree);
|
|
GenTree* methodHnd = gtNewIconEmbMethHndNode(info.compMethodHnd);
|
|
|
|
return gtNewHelperCallNode(CORINFO_HELP_INITINSTCLASS, TYP_VOID, vtTree, methodHnd);
|
|
}
|
|
|
|
case CORINFO_LOOKUP_CLASSPARAM:
|
|
{
|
|
GenTree* vtTree = gtNewLclvNode(info.compTypeCtxtArg, TYP_I_IMPL);
|
|
vtTree->gtFlags |= GTF_VAR_CONTEXT;
|
|
return gtNewHelperCallNode(CORINFO_HELP_INITCLASS, TYP_VOID, vtTree);
|
|
}
|
|
|
|
case CORINFO_LOOKUP_METHODPARAM:
|
|
{
|
|
GenTree* methHndTree = gtNewLclvNode(info.compTypeCtxtArg, TYP_I_IMPL);
|
|
methHndTree->gtFlags |= GTF_VAR_CONTEXT;
|
|
return gtNewHelperCallNode(CORINFO_HELP_INITINSTCLASS, TYP_VOID, gtNewIconNode(0), methHndTree);
|
|
}
|
|
|
|
default:
|
|
noway_assert(!"Unknown LOOKUP_KIND");
|
|
UNREACHABLE();
|
|
}
|
|
}
|
|
}
|
|
|
|
#ifdef DEBUG
|
|
|
|
//------------------------------------------------------------------------
|
|
// fgPreExpandQmarkChecks: Verify that the importer has created GT_QMARK nodes
|
|
// in a way we can process them. The following
|
|
//
|
|
// Returns:
|
|
// Suitable phase status.
|
|
//
|
|
// Remarks:
|
|
// The following is allowed:
|
|
// 1. A top level qmark. Top level qmark is of the form:
|
|
// a) (bool) ? (void) : (void) OR
|
|
// b) V0N = (bool) ? (type) : (type)
|
|
//
|
|
// 2. Recursion is allowed at the top level, i.e., a GT_QMARK can be a child
|
|
// of either op1 of colon or op2 of colon but not a child of any other
|
|
// operator.
|
|
//
|
|
void Compiler::fgPreExpandQmarkChecks(GenTree* expr)
|
|
{
|
|
GenTree* topQmark = fgGetTopLevelQmark(expr);
|
|
|
|
// If the top level Qmark is null, then scan the tree to make sure
|
|
// there are no qmarks within it.
|
|
if (topQmark == nullptr)
|
|
{
|
|
assert(!gtTreeContainsOper(expr, GT_QMARK) && "Illegal QMARK");
|
|
}
|
|
else
|
|
{
|
|
// We could probably expand the cond node also, but don't think the extra effort is necessary,
|
|
// so let's just assert the cond node of a top level qmark doesn't have further top level qmarks.
|
|
assert(!gtTreeContainsOper(topQmark->gtGetOp1(), GT_QMARK) && "Illegal QMARK");
|
|
|
|
fgPreExpandQmarkChecks(topQmark->gtGetOp2()->gtGetOp1());
|
|
fgPreExpandQmarkChecks(topQmark->gtGetOp2()->gtGetOp2());
|
|
}
|
|
}
|
|
|
|
//------------------------------------------------------------------------
|
|
// fgPostExpandQmarkChecks: Make sure we don't have any more GT_QMARK nodes.
|
|
//
|
|
void Compiler::fgPostExpandQmarkChecks()
|
|
{
|
|
for (BasicBlock* const block : Blocks())
|
|
{
|
|
for (Statement* const stmt : block->Statements())
|
|
{
|
|
GenTree* expr = stmt->GetRootNode();
|
|
assert(!gtTreeContainsOper(expr, GT_QMARK) && "QMARKs are disallowed beyond morph");
|
|
}
|
|
}
|
|
}
|
|
|
|
#endif // DEBUG
|
|
|
|
//------------------------------------------------------------------------
|
|
// fgGetTopLevelQmark:
|
|
// Get the top level GT_QMARK node in a given expression.
|
|
//
|
|
// Arguments:
|
|
// expr - the tree, a root node that may contain a top level qmark.
|
|
// ppDst - [optional] if the top level GT_QMARK node is stored into
|
|
// a local, then this is that store node. Otherwise nullptr.
|
|
//
|
|
// Returns:
|
|
// The GT_QMARK node, or nullptr if there is no top level qmark.
|
|
//
|
|
GenTreeQmark* Compiler::fgGetTopLevelQmark(GenTree* expr, GenTree** ppDst /* = NULL */)
|
|
{
|
|
if (ppDst != nullptr)
|
|
{
|
|
*ppDst = nullptr;
|
|
}
|
|
|
|
GenTreeQmark* topQmark = nullptr;
|
|
|
|
if (expr->gtOper == GT_QMARK)
|
|
{
|
|
topQmark = expr->AsQmark();
|
|
}
|
|
else if (expr->OperIsLocalStore() && expr->AsLclVarCommon()->Data()->OperIs(GT_QMARK))
|
|
{
|
|
topQmark = expr->AsLclVarCommon()->Data()->AsQmark();
|
|
|
|
if (ppDst != nullptr)
|
|
{
|
|
*ppDst = expr;
|
|
}
|
|
}
|
|
|
|
return topQmark;
|
|
}
|
|
|
|
//------------------------------------------------------------------------
|
|
// fgExpandQmarkStmt: expand a qmark into control flow
|
|
//
|
|
// Arguments:
|
|
// block - block containing the qmark
|
|
// stmt - statement containing the qmark
|
|
//
|
|
// Returns:
|
|
// true if the expansion introduced a throwing block
|
|
//
|
|
// Notes:
|
|
//
|
|
// Expand a statement with a top level qmark node. There are three cases, based
|
|
// on whether the qmark has both "true" and "false" arms, or just one of them.
|
|
//
|
|
// S0;
|
|
// C ? T : F;
|
|
// S1;
|
|
//
|
|
// Generates ===>
|
|
//
|
|
// bbj_always
|
|
// +---->------+
|
|
// false | |
|
|
// S0 -->-- ~C -->-- T F -->-- S1
|
|
// | |
|
|
// +--->--------+
|
|
// bbj_cond(true)
|
|
//
|
|
// -----------------------------------------
|
|
//
|
|
// S0;
|
|
// C ? T : NOP;
|
|
// S1;
|
|
//
|
|
// Generates ===>
|
|
//
|
|
// false
|
|
// S0 -->-- ~C -->-- T -->-- S1
|
|
// | |
|
|
// +-->-------------+
|
|
// bbj_cond(true)
|
|
//
|
|
// -----------------------------------------
|
|
//
|
|
// S0;
|
|
// C ? NOP : F;
|
|
// S1;
|
|
//
|
|
// Generates ===>
|
|
//
|
|
// false
|
|
// S0 -->-- C -->-- F -->-- S1
|
|
// | |
|
|
// +-->------------+
|
|
// bbj_cond(true)
|
|
//
|
|
// If the qmark assigns to a variable, then create tmps for "then"
|
|
// and "else" results and assign the temp to the variable as a writeback step.
|
|
//
|
|
bool Compiler::fgExpandQmarkStmt(BasicBlock* block, Statement* stmt)
|
|
{
|
|
bool introducedThrow = false;
|
|
GenTree* expr = stmt->GetRootNode();
|
|
|
|
// Retrieve the Qmark node to be expanded.
|
|
GenTree* dst = nullptr;
|
|
GenTreeQmark* qmark = fgGetTopLevelQmark(expr, &dst);
|
|
if (qmark == nullptr)
|
|
{
|
|
return false;
|
|
}
|
|
|
|
#ifdef DEBUG
|
|
if (verbose)
|
|
{
|
|
printf("\nExpanding top-level qmark in " FMT_BB " (before)\n", block->bbNum);
|
|
fgDispBasicBlocks(block, block, true);
|
|
}
|
|
#endif // DEBUG
|
|
|
|
// Retrieve the operands.
|
|
GenTree* condExpr = qmark->gtGetOp1();
|
|
GenTree* trueExpr = qmark->gtGetOp2()->AsColon()->ThenNode();
|
|
GenTree* falseExpr = qmark->gtGetOp2()->AsColon()->ElseNode();
|
|
|
|
assert(!varTypeIsFloating(condExpr->TypeGet()));
|
|
|
|
bool hasTrueExpr = (trueExpr->OperGet() != GT_NOP);
|
|
bool hasFalseExpr = (falseExpr->OperGet() != GT_NOP);
|
|
assert(hasTrueExpr || hasFalseExpr); // We expect to have at least one arm of the qmark!
|
|
|
|
// Create remainder, cond and "else" blocks. After this, the blocks are in this order:
|
|
// block ... condBlock ... elseBlock ... remainderBlock
|
|
//
|
|
// We need to remember flags that exist on 'block' that we want to propagate to 'remainderBlock',
|
|
// if they are going to be cleared by fgSplitBlockAfterStatement(). We currently only do this only
|
|
// for the GC safe point bit, the logic being that if 'block' was marked gcsafe, then surely
|
|
// remainderBlock will still be GC safe.
|
|
BasicBlockFlags propagateFlagsToRemainder = block->GetFlagsRaw() & BBF_GC_SAFE_POINT;
|
|
// Conservatively propagate BBF_COPY_PROPAGATE flags to all blocks
|
|
BasicBlockFlags propagateFlagsToAll = block->GetFlagsRaw() & BBF_COPY_PROPAGATE;
|
|
BasicBlock* remainderBlock = fgSplitBlockAfterStatement(block, stmt);
|
|
|
|
BasicBlock* condBlock = fgNewBBafter(BBJ_ALWAYS, block, true);
|
|
BasicBlock* elseBlock = fgNewBBafter(BBJ_ALWAYS, condBlock, true);
|
|
|
|
// These blocks are only internal if 'block' is (but they've been set as internal by fgNewBBafter).
|
|
// If they're not internal, mark them as imported to avoid asserts about un-imported blocks.
|
|
if (!block->HasFlag(BBF_INTERNAL))
|
|
{
|
|
condBlock->RemoveFlags(BBF_INTERNAL);
|
|
elseBlock->RemoveFlags(BBF_INTERNAL);
|
|
condBlock->SetFlags(BBF_IMPORTED);
|
|
elseBlock->SetFlags(BBF_IMPORTED);
|
|
}
|
|
|
|
block->RemoveFlags(BBF_NEEDS_GCPOLL);
|
|
remainderBlock->SetFlags(propagateFlagsToRemainder | propagateFlagsToAll);
|
|
|
|
condBlock->inheritWeight(block);
|
|
|
|
// Make sure remainderBlock gets exactly the same weight as block after split
|
|
assert(condBlock->bbWeight == remainderBlock->bbWeight);
|
|
|
|
assert(block->KindIs(BBJ_ALWAYS));
|
|
fgRedirectTargetEdge(block, condBlock);
|
|
|
|
{
|
|
FlowEdge* const newEdge = fgAddRefPred(elseBlock, condBlock);
|
|
condBlock->SetTargetEdge(newEdge);
|
|
}
|
|
|
|
{
|
|
FlowEdge* const newEdge = fgAddRefPred(remainderBlock, elseBlock);
|
|
elseBlock->SetTargetEdge(newEdge);
|
|
}
|
|
|
|
assert(condBlock->JumpsToNext());
|
|
assert(elseBlock->JumpsToNext());
|
|
|
|
condBlock->SetFlags(propagateFlagsToAll);
|
|
elseBlock->SetFlags(propagateFlagsToAll);
|
|
|
|
BasicBlock* thenBlock = nullptr;
|
|
if (hasTrueExpr && hasFalseExpr)
|
|
{
|
|
// bbj_always
|
|
// +---->------+
|
|
// false | |
|
|
// S0 -->-- ~C -->-- T F -->-- S1
|
|
// | |
|
|
// +--->--------+
|
|
// bbj_cond(true)
|
|
//
|
|
gtReverseCond(condExpr);
|
|
|
|
thenBlock = fgNewBBafter(BBJ_ALWAYS, condBlock, true);
|
|
thenBlock->SetFlags(propagateFlagsToAll);
|
|
if (!block->HasFlag(BBF_INTERNAL))
|
|
{
|
|
thenBlock->RemoveFlags(BBF_INTERNAL);
|
|
thenBlock->SetFlags(BBF_IMPORTED);
|
|
}
|
|
|
|
const unsigned thenLikelihood = qmark->ThenNodeLikelihood();
|
|
const unsigned elseLikelihood = qmark->ElseNodeLikelihood();
|
|
|
|
FlowEdge* const newEdge = fgAddRefPred(remainderBlock, thenBlock);
|
|
thenBlock->SetTargetEdge(newEdge);
|
|
|
|
assert(condBlock->TargetIs(elseBlock));
|
|
FlowEdge* const elseEdge = fgAddRefPred(thenBlock, condBlock);
|
|
FlowEdge* const thenEdge = condBlock->GetTargetEdge();
|
|
condBlock->SetCond(thenEdge, elseEdge);
|
|
thenBlock->inheritWeightPercentage(condBlock, thenLikelihood);
|
|
elseBlock->inheritWeightPercentage(condBlock, elseLikelihood);
|
|
thenEdge->setLikelihood(thenLikelihood / 100.0);
|
|
elseEdge->setLikelihood(elseLikelihood / 100.0);
|
|
}
|
|
else if (hasTrueExpr)
|
|
{
|
|
// false
|
|
// S0 -->-- ~C -->-- T -->-- S1
|
|
// | |
|
|
// +-->-------------+
|
|
// bbj_cond(true)
|
|
//
|
|
gtReverseCond(condExpr);
|
|
|
|
const unsigned thenLikelihood = qmark->ThenNodeLikelihood();
|
|
const unsigned elseLikelihood = qmark->ElseNodeLikelihood();
|
|
|
|
assert(condBlock->TargetIs(elseBlock));
|
|
FlowEdge* const thenEdge = fgAddRefPred(remainderBlock, condBlock);
|
|
FlowEdge* const elseEdge = condBlock->GetTargetEdge();
|
|
condBlock->SetCond(thenEdge, elseEdge);
|
|
|
|
// Since we have no false expr, use the one we'd already created.
|
|
thenBlock = elseBlock;
|
|
elseBlock = nullptr;
|
|
|
|
thenBlock->inheritWeightPercentage(condBlock, thenLikelihood);
|
|
thenEdge->setLikelihood(thenLikelihood / 100.0);
|
|
elseEdge->setLikelihood(elseLikelihood / 100.0);
|
|
}
|
|
else if (hasFalseExpr)
|
|
{
|
|
// false
|
|
// S0 -->-- C -->-- F -->-- S1
|
|
// | |
|
|
// +-->------------+
|
|
// bbj_cond(true)
|
|
//
|
|
const unsigned thenLikelihood = qmark->ThenNodeLikelihood();
|
|
const unsigned elseLikelihood = qmark->ElseNodeLikelihood();
|
|
|
|
assert(condBlock->TargetIs(elseBlock));
|
|
FlowEdge* const thenEdge = fgAddRefPred(remainderBlock, condBlock);
|
|
FlowEdge* const elseEdge = condBlock->GetTargetEdge();
|
|
condBlock->SetCond(thenEdge, elseEdge);
|
|
|
|
elseBlock->inheritWeightPercentage(condBlock, elseLikelihood);
|
|
thenEdge->setLikelihood(thenLikelihood / 100.0);
|
|
elseEdge->setLikelihood(elseLikelihood / 100.0);
|
|
}
|
|
|
|
assert(condBlock->KindIs(BBJ_COND));
|
|
|
|
GenTree* jmpTree = gtNewOperNode(GT_JTRUE, TYP_VOID, qmark->gtGetOp1());
|
|
Statement* jmpStmt = fgNewStmtFromTree(jmpTree, stmt->GetDebugInfo());
|
|
fgInsertStmtAtEnd(condBlock, jmpStmt);
|
|
|
|
// Remove the original qmark statement.
|
|
fgRemoveStmt(block, stmt);
|
|
|
|
// Since we have top level qmarks, we either have a dst for it in which case
|
|
// we need to create tmps for true and falseExprs, else just don't bother assigning.
|
|
unsigned dstLclNum = BAD_VAR_NUM;
|
|
if (dst != nullptr)
|
|
{
|
|
dstLclNum = dst->AsLclVarCommon()->GetLclNum();
|
|
assert(dst->OperIsLocalStore());
|
|
}
|
|
else
|
|
{
|
|
assert(qmark->TypeGet() == TYP_VOID);
|
|
}
|
|
|
|
if (hasTrueExpr)
|
|
{
|
|
if (trueExpr->OperIs(GT_CALL) && trueExpr->AsCall()->IsNoReturn())
|
|
{
|
|
Statement* trueStmt = fgNewStmtFromTree(trueExpr, stmt->GetDebugInfo());
|
|
fgInsertStmtAtEnd(thenBlock, trueStmt);
|
|
fgConvertBBToThrowBB(thenBlock);
|
|
introducedThrow = true;
|
|
}
|
|
else
|
|
{
|
|
if (dst != nullptr)
|
|
{
|
|
trueExpr = dst->OperIs(GT_STORE_LCL_FLD) ? gtNewStoreLclFldNode(dstLclNum, dst->TypeGet(),
|
|
dst->AsLclFld()->GetLclOffs(), trueExpr)
|
|
: gtNewStoreLclVarNode(dstLclNum, trueExpr)->AsLclVarCommon();
|
|
}
|
|
Statement* trueStmt = fgNewStmtFromTree(trueExpr, stmt->GetDebugInfo());
|
|
fgInsertStmtAtEnd(thenBlock, trueStmt);
|
|
}
|
|
}
|
|
|
|
// Assign the falseExpr into the dst or tmp, insert in elseBlock
|
|
if (hasFalseExpr)
|
|
{
|
|
if (falseExpr->OperIs(GT_CALL) && falseExpr->AsCall()->IsNoReturn())
|
|
{
|
|
Statement* falseStmt = fgNewStmtFromTree(falseExpr, stmt->GetDebugInfo());
|
|
fgInsertStmtAtEnd(elseBlock, falseStmt);
|
|
fgConvertBBToThrowBB(elseBlock);
|
|
introducedThrow = true;
|
|
}
|
|
else
|
|
{
|
|
if (dst != nullptr)
|
|
{
|
|
falseExpr =
|
|
dst->OperIs(GT_STORE_LCL_FLD)
|
|
? gtNewStoreLclFldNode(dstLclNum, dst->TypeGet(), dst->AsLclFld()->GetLclOffs(), falseExpr)
|
|
: gtNewStoreLclVarNode(dstLclNum, falseExpr)->AsLclVarCommon();
|
|
}
|
|
Statement* falseStmt = fgNewStmtFromTree(falseExpr, stmt->GetDebugInfo());
|
|
fgInsertStmtAtEnd(elseBlock, falseStmt);
|
|
}
|
|
}
|
|
|
|
#ifdef DEBUG
|
|
if (verbose)
|
|
{
|
|
printf("\nExpanding top-level qmark in " FMT_BB " (after)\n", block->bbNum);
|
|
fgDispBasicBlocks(block, remainderBlock, true);
|
|
}
|
|
#endif // DEBUG
|
|
|
|
return introducedThrow;
|
|
}
|
|
|
|
/*****************************************************************************
|
|
*
|
|
* Expand GT_QMARK nodes from the flow graph into basic blocks.
|
|
*
|
|
*/
|
|
|
|
void Compiler::fgExpandQmarkNodes()
|
|
{
|
|
bool introducedThrows = false;
|
|
|
|
if (compQmarkUsed)
|
|
{
|
|
for (BasicBlock* const block : Blocks())
|
|
{
|
|
for (Statement* const stmt : block->Statements())
|
|
{
|
|
GenTree* expr = stmt->GetRootNode();
|
|
#ifdef DEBUG
|
|
fgPreExpandQmarkChecks(expr);
|
|
#endif
|
|
introducedThrows |= fgExpandQmarkStmt(block, stmt);
|
|
}
|
|
}
|
|
#ifdef DEBUG
|
|
fgPostExpandQmarkChecks();
|
|
#endif
|
|
}
|
|
compQmarkRationalized = true;
|
|
|
|
// TODO: if qmark expansion created throw blocks, try and merge them
|
|
//
|
|
if (introducedThrows)
|
|
{
|
|
JITDUMP("Qmark expansion created new throw blocks\n");
|
|
}
|
|
}
|
|
|
|
//------------------------------------------------------------------------
|
|
// fgPromoteStructs: promote structs to collections of per-field locals
|
|
//
|
|
// Returns:
|
|
// Suitable phase status.
|
|
//
|
|
PhaseStatus Compiler::fgPromoteStructs()
|
|
{
|
|
if (!opts.OptEnabled(CLFLG_STRUCTPROMOTE))
|
|
{
|
|
JITDUMP(" promotion opt flag not enabled\n");
|
|
return PhaseStatus::MODIFIED_NOTHING;
|
|
}
|
|
|
|
if (fgNoStructPromotion)
|
|
{
|
|
JITDUMP(" promotion disabled by JitNoStructPromotion\n");
|
|
return PhaseStatus::MODIFIED_NOTHING;
|
|
}
|
|
|
|
#ifdef DEBUG
|
|
if (compStressCompile(STRESS_NO_OLD_PROMOTION, 10))
|
|
{
|
|
JITDUMP(" skipping due to stress\n");
|
|
return PhaseStatus::MODIFIED_NOTHING;
|
|
}
|
|
#endif
|
|
|
|
#if 0
|
|
// The code in this #if has been useful in debugging struct promotion issues, by
|
|
// enabling selective enablement of the struct promotion optimization according to
|
|
// method hash.
|
|
#ifdef DEBUG
|
|
unsigned methHash = info.compMethodHash();
|
|
char* lostr = getenv("structpromohashlo");
|
|
unsigned methHashLo = 0;
|
|
if (lostr != NULL)
|
|
{
|
|
sscanf_s(lostr, "%x", &methHashLo);
|
|
}
|
|
char* histr = getenv("structpromohashhi");
|
|
unsigned methHashHi = UINT32_MAX;
|
|
if (histr != NULL)
|
|
{
|
|
sscanf_s(histr, "%x", &methHashHi);
|
|
}
|
|
if (methHash < methHashLo || methHash > methHashHi)
|
|
{
|
|
return PhaseStatus::MODIFIED_NOTHING;
|
|
}
|
|
else
|
|
{
|
|
printf("Promoting structs for method %s, hash = 0x%x.\n",
|
|
info.compFullName, info.compMethodHash());
|
|
printf(""); // in our logic this causes a flush
|
|
}
|
|
#endif // DEBUG
|
|
#endif // 0
|
|
|
|
if (info.compIsVarArgs)
|
|
{
|
|
JITDUMP(" promotion disabled because of varargs\n");
|
|
return PhaseStatus::MODIFIED_NOTHING;
|
|
}
|
|
|
|
#ifdef DEBUG
|
|
if (verbose)
|
|
{
|
|
printf("\nlvaTable before fgPromoteStructs\n");
|
|
lvaTableDump();
|
|
}
|
|
#endif // DEBUG
|
|
|
|
// The lvaTable might grow as we grab temps. Make a local copy here.
|
|
unsigned startLvaCount = lvaCount;
|
|
|
|
//
|
|
// Loop through the original lvaTable. Looking for struct locals to be promoted.
|
|
//
|
|
lvaStructPromotionInfo structPromotionInfo;
|
|
bool tooManyLocalsReported = false;
|
|
bool madeChanges = false;
|
|
|
|
// Clear the structPromotionHelper, since it is used during inlining, at which point it
|
|
// may be conservative about looking up SIMD info.
|
|
// We don't want to preserve those conservative decisions for the actual struct promotion.
|
|
structPromotionHelper->Clear();
|
|
|
|
for (unsigned lclNum = 0; lclNum < startLvaCount; lclNum++)
|
|
{
|
|
// Whether this var got promoted
|
|
bool promotedVar = false;
|
|
LclVarDsc* varDsc = lvaGetDesc(lclNum);
|
|
|
|
// If we have marked this as lvUsedInSIMDIntrinsic, then we do not want to promote
|
|
// its fields. Instead, we will attempt to enregister the entire struct.
|
|
if (varTypeIsSIMD(varDsc) && (varDsc->lvIsUsedInSIMDIntrinsic() || isOpaqueSIMDLclVar(varDsc)))
|
|
{
|
|
varDsc->lvRegStruct = true;
|
|
}
|
|
// Don't promote if we have reached the tracking limit.
|
|
else if (lvaHaveManyLocals())
|
|
{
|
|
// Print the message first time when we detected this condition
|
|
if (!tooManyLocalsReported)
|
|
{
|
|
JITDUMP("Stopped promoting struct fields, due to too many locals.\n");
|
|
}
|
|
tooManyLocalsReported = true;
|
|
}
|
|
else if (varTypeIsStruct(varDsc))
|
|
{
|
|
assert(structPromotionHelper != nullptr);
|
|
promotedVar = structPromotionHelper->TryPromoteStructVar(lclNum);
|
|
}
|
|
|
|
madeChanges |= promotedVar;
|
|
|
|
if (!promotedVar && varTypeIsSIMD(varDsc) && !varDsc->lvFieldAccessed)
|
|
{
|
|
// Even if we have not used this in a SIMD intrinsic, if it is not being promoted,
|
|
// we will treat it as a reg struct.
|
|
varDsc->lvRegStruct = true;
|
|
}
|
|
}
|
|
|
|
#ifdef DEBUG
|
|
if (verbose && madeChanges)
|
|
{
|
|
printf("\nlvaTable after fgPromoteStructs\n");
|
|
lvaTableDump();
|
|
}
|
|
#endif // DEBUG
|
|
|
|
return madeChanges ? PhaseStatus::MODIFIED_EVERYTHING : PhaseStatus::MODIFIED_NOTHING;
|
|
}
|
|
|
|
//------------------------------------------------------------------------
|
|
// fgMarkImplicitByRefCopyOmissionCandidates:
|
|
// Find and mark all locals that are passed as implicit byref args and are
|
|
// candidates for last-use copy omission.
|
|
//
|
|
// Remarks:
|
|
// We must mark these locals beforehand to avoid potential reordering with
|
|
// the call that ends up getting the address of the local. For example, if we
|
|
// waited until morph it would be possible for morph to reorder the two
|
|
// occurrences of V00 in
|
|
//
|
|
// [000015] --CXG------ ▌ CALL void Program:Foo(int,int)
|
|
// [000010] ----------- arg0 ├──▌ LCL_FLD int V00 loc0 [+0]
|
|
// [000012] --CXG------ arg1 └──▌ CALL int Program:Bar(S):int
|
|
// [000011] ----------- arg0 └──▌ LCL_VAR struct<S, 32> V00 loc0 (last use)
|
|
//
|
|
// to end up with
|
|
//
|
|
// [000015] --CXG+----- ▌ CALL void Program:Foo(int,int)
|
|
// [000037] DACXG------ arg1 setup ├──▌ STORE_LCL_VAR int V04 tmp3
|
|
// [000012] --CXG+----- │ └──▌ CALL int Program:Bar(S):int
|
|
// [000011] -----+----- arg0 in rcx │ └──▌ LCL_ADDR long V00 loc0 [+0]
|
|
// [000038] ----------- arg1 in rdx ├──▌ LCL_VAR int V04 tmp3
|
|
// [000010] -----+----- arg0 in rcx └──▌ LCL_FLD int (AX) V00 loc0 [+0]
|
|
//
|
|
// If Bar mutates V00 then this is a problem.
|
|
//
|
|
// Returns:
|
|
// Suitable phase status.
|
|
//
|
|
PhaseStatus Compiler::fgMarkImplicitByRefCopyOmissionCandidates()
|
|
{
|
|
#if FEATURE_IMPLICIT_BYREFS && !defined(UNIX_AMD64_ABI)
|
|
if (!fgDidEarlyLiveness)
|
|
{
|
|
return PhaseStatus::MODIFIED_NOTHING;
|
|
}
|
|
|
|
struct Visitor : GenTreeVisitor<Visitor>
|
|
{
|
|
enum
|
|
{
|
|
DoPreOrder = true,
|
|
UseExecutionOrder = true,
|
|
};
|
|
|
|
Visitor(Compiler* comp)
|
|
: GenTreeVisitor(comp)
|
|
{
|
|
}
|
|
|
|
fgWalkResult PreOrderVisit(GenTree** use, GenTree* user)
|
|
{
|
|
GenTree* node = *use;
|
|
if ((node->gtFlags & GTF_CALL) == 0)
|
|
{
|
|
return WALK_SKIP_SUBTREES;
|
|
}
|
|
|
|
if (!node->IsCall())
|
|
{
|
|
return WALK_CONTINUE;
|
|
}
|
|
|
|
GenTreeCall* call = node->AsCall();
|
|
|
|
for (CallArg& arg : call->gtArgs.Args())
|
|
{
|
|
if (!varTypeIsStruct(arg.GetSignatureType()))
|
|
{
|
|
continue;
|
|
}
|
|
|
|
GenTree* argNode = arg.GetNode()->gtEffectiveVal();
|
|
if (!argNode->OperIsLocalRead())
|
|
{
|
|
continue;
|
|
}
|
|
|
|
unsigned lclNum = argNode->AsLclVarCommon()->GetLclNum();
|
|
LclVarDsc* varDsc = m_compiler->lvaGetDesc(lclNum);
|
|
|
|
if (varDsc->lvIsLastUseCopyOmissionCandidate)
|
|
{
|
|
// Already a candidate.
|
|
continue;
|
|
}
|
|
|
|
if (varDsc->lvIsImplicitByRef)
|
|
{
|
|
// While implicit byrefs are candidates, they are handled
|
|
// specially and do not need GTF_GLOB_REF (the indirections
|
|
// added on top already always get them). If we marked them
|
|
// as a candidate fgMorphLeafLocal would add GTF_GLOB_REF
|
|
// to the local containing the address, which is
|
|
// conservative.
|
|
continue;
|
|
}
|
|
|
|
if (varDsc->lvPromoted || varDsc->lvIsStructField || ((argNode->gtFlags & GTF_VAR_DEATH) == 0))
|
|
{
|
|
// Not a candidate.
|
|
continue;
|
|
}
|
|
|
|
unsigned structSize =
|
|
argNode->TypeIs(TYP_STRUCT) ? argNode->GetLayout(m_compiler)->GetSize() : genTypeSize(argNode);
|
|
|
|
Compiler::structPassingKind passKind;
|
|
m_compiler->getArgTypeForStruct(arg.GetSignatureClassHandle(), &passKind, call->IsVarargs(),
|
|
structSize);
|
|
|
|
if (passKind != SPK_ByReference)
|
|
{
|
|
continue;
|
|
}
|
|
|
|
JITDUMP("Marking V%02u as a candidate for last-use copy omission [%06u]\n", lclNum, dspTreeID(argNode));
|
|
varDsc->lvIsLastUseCopyOmissionCandidate = 1;
|
|
}
|
|
|
|
return WALK_CONTINUE;
|
|
}
|
|
};
|
|
|
|
Visitor visitor(this);
|
|
for (BasicBlock* bb : Blocks())
|
|
{
|
|
for (Statement* stmt : bb->Statements())
|
|
{
|
|
// Does this have any calls?
|
|
if ((stmt->GetRootNode()->gtFlags & GTF_CALL) == 0)
|
|
{
|
|
continue;
|
|
}
|
|
|
|
// If so, check for any struct last use and only do the expensive
|
|
// tree walk if one exists.
|
|
for (GenTreeLclVarCommon* lcl : stmt->LocalsTreeList())
|
|
{
|
|
if (!varTypeIsStruct(lcl) || !lcl->OperIsLocalRead())
|
|
{
|
|
continue;
|
|
}
|
|
|
|
if ((lcl->gtFlags & GTF_VAR_DEATH) != 0)
|
|
{
|
|
visitor.WalkTree(stmt->GetRootNodePointer(), nullptr);
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
#endif
|
|
|
|
return PhaseStatus::MODIFIED_NOTHING;
|
|
}
|
|
|
|
//------------------------------------------------------------------------
|
|
// fgRetypeImplicitByRefArgs: Update the types on implicit byref parameters' `LclVarDsc`s (from
|
|
// struct to pointer). Also choose (based on address-exposed analysis)
|
|
// which struct promotions of implicit byrefs to keep or discard.
|
|
// For those which are kept, insert the appropriate initialization code.
|
|
// For those which are to be discarded, annotate the promoted field locals
|
|
// so that fgMorphExpandImplicitByRefArg will know to rewrite their
|
|
// appearances using indirections off the pointer parameters.
|
|
//
|
|
// Returns:
|
|
// Suitable phase status
|
|
//
|
|
PhaseStatus Compiler::fgRetypeImplicitByRefArgs()
|
|
{
|
|
bool madeChanges = false;
|
|
|
|
#if FEATURE_IMPLICIT_BYREFS
|
|
|
|
for (unsigned lclNum = 0; lclNum < info.compArgsCount; lclNum++)
|
|
{
|
|
LclVarDsc* varDsc = lvaGetDesc(lclNum);
|
|
|
|
if (lvaIsImplicitByRefLocal(lclNum))
|
|
{
|
|
madeChanges = true;
|
|
|
|
if (varDsc->lvPromoted)
|
|
{
|
|
// This implicit-by-ref was promoted; create a new temp to represent the
|
|
// promoted struct before rewriting this parameter as a pointer.
|
|
unsigned newLclNum = lvaGrabTemp(false DEBUGARG("Promoted implicit byref"));
|
|
// Update varDsc since lvaGrabTemp might have re-allocated the var dsc array.
|
|
varDsc = lvaGetDesc(lclNum);
|
|
|
|
lvaSetStruct(newLclNum, varDsc->GetLayout(), true);
|
|
if (info.compIsVarArgs)
|
|
{
|
|
lvaSetStructUsedAsVarArg(newLclNum);
|
|
}
|
|
|
|
// Copy the struct promotion annotations to the new temp.
|
|
LclVarDsc* newVarDsc = lvaGetDesc(newLclNum);
|
|
newVarDsc->lvPromoted = true;
|
|
newVarDsc->lvFieldLclStart = varDsc->lvFieldLclStart;
|
|
newVarDsc->lvFieldCnt = varDsc->lvFieldCnt;
|
|
newVarDsc->lvContainsHoles = varDsc->lvContainsHoles;
|
|
#ifdef DEBUG
|
|
newVarDsc->lvKeepType = true;
|
|
#endif // DEBUG
|
|
|
|
// Propagate address-taken-ness and do-not-enregister-ness.
|
|
newVarDsc->SetAddressExposed(varDsc->IsAddressExposed() DEBUGARG(varDsc->GetAddrExposedReason()));
|
|
newVarDsc->lvDoNotEnregister = varDsc->lvDoNotEnregister;
|
|
newVarDsc->lvLiveInOutOfHndlr = varDsc->lvLiveInOutOfHndlr;
|
|
newVarDsc->lvSingleDef = varDsc->lvSingleDef;
|
|
newVarDsc->lvSingleDefRegCandidate = varDsc->lvSingleDefRegCandidate;
|
|
newVarDsc->lvSpillAtSingleDef = varDsc->lvSpillAtSingleDef;
|
|
#ifdef DEBUG
|
|
newVarDsc->SetDoNotEnregReason(varDsc->GetDoNotEnregReason());
|
|
#endif // DEBUG
|
|
|
|
// If the promotion is dependent, the promoted temp would just be committed
|
|
// to memory anyway, so we'll rewrite its appearances to be indirections
|
|
// through the pointer parameter, the same as we'd do for this
|
|
// parameter if it weren't promoted at all (otherwise the initialization
|
|
// of the new temp would just be a needless memcpy at method entry).
|
|
//
|
|
// Otherwise, see how many appearances there are. We keep two early ref counts: total
|
|
// number of references to the struct or some field, and how many of these are
|
|
// arguments to calls. We undo promotion unless we see enough non-call uses.
|
|
//
|
|
const unsigned totalAppearances = varDsc->lvRefCnt(RCS_EARLY);
|
|
const unsigned callAppearances = (unsigned)varDsc->lvRefCntWtd(RCS_EARLY);
|
|
assert(totalAppearances >= callAppearances);
|
|
const unsigned nonCallAppearances = totalAppearances - callAppearances;
|
|
|
|
bool undoPromotion = ((lvaGetPromotionType(newVarDsc) == PROMOTION_TYPE_DEPENDENT) ||
|
|
(nonCallAppearances <= varDsc->lvFieldCnt));
|
|
|
|
#ifdef DEBUG
|
|
// Above is a profitability heuristic; either value of
|
|
// undoPromotion should lead to correct code. So,
|
|
// under stress, make different decisions at times.
|
|
if (compStressCompile(STRESS_BYREF_PROMOTION, 25))
|
|
{
|
|
undoPromotion = !undoPromotion;
|
|
JITDUMP("Stress -- changing byref undo promotion for V%02u to %s undo\n", lclNum,
|
|
undoPromotion ? "" : "NOT");
|
|
}
|
|
#endif // DEBUG
|
|
|
|
JITDUMP("%s promotion of implicit by-ref V%02u: %s total: %u non-call: %u fields: %u\n",
|
|
undoPromotion ? "Undoing" : "Keeping", lclNum,
|
|
(lvaGetPromotionType(newVarDsc) == PROMOTION_TYPE_DEPENDENT) ? "dependent;" : "",
|
|
totalAppearances, nonCallAppearances, varDsc->lvFieldCnt);
|
|
|
|
if (!undoPromotion)
|
|
{
|
|
// Insert IR that initializes the temp from the parameter.
|
|
// The first BB should already be a valid insertion point,
|
|
// which is a precondition for this phase when optimizing.
|
|
assert(fgFirstBB->bbPreds == nullptr);
|
|
GenTree* addr = gtNewLclvNode(lclNum, TYP_BYREF);
|
|
GenTree* data = (varDsc->TypeGet() == TYP_STRUCT) ? gtNewBlkIndir(varDsc->GetLayout(), addr)
|
|
: gtNewIndir(varDsc->TypeGet(), addr);
|
|
GenTree* store = gtNewStoreLclVarNode(newLclNum, data);
|
|
fgNewStmtAtBeg(fgFirstBB, store);
|
|
}
|
|
|
|
// Update the locals corresponding to the promoted fields.
|
|
unsigned fieldLclStart = varDsc->lvFieldLclStart;
|
|
unsigned fieldCount = varDsc->lvFieldCnt;
|
|
unsigned fieldLclStop = fieldLclStart + fieldCount;
|
|
|
|
for (unsigned fieldLclNum = fieldLclStart; fieldLclNum < fieldLclStop; ++fieldLclNum)
|
|
{
|
|
LclVarDsc* fieldVarDsc = lvaGetDesc(fieldLclNum);
|
|
|
|
if (undoPromotion)
|
|
{
|
|
// Leave lvParentLcl pointing to the parameter so that fgMorphExpandImplicitByRefArg
|
|
// will know to rewrite appearances of this local.
|
|
assert(fieldVarDsc->lvParentLcl == lclNum);
|
|
}
|
|
else
|
|
{
|
|
// Set the new parent.
|
|
fieldVarDsc->lvParentLcl = newLclNum;
|
|
}
|
|
|
|
fieldVarDsc->lvIsParam = false;
|
|
// The fields shouldn't inherit any register preferences from
|
|
// the parameter which is really a pointer to the struct.
|
|
fieldVarDsc->lvIsRegArg = false;
|
|
fieldVarDsc->lvIsMultiRegArg = false;
|
|
fieldVarDsc->SetArgReg(REG_NA);
|
|
#if FEATURE_MULTIREG_ARGS
|
|
fieldVarDsc->SetOtherArgReg(REG_NA);
|
|
#endif
|
|
// Promoted fields of implicit byrefs can't be OSR locals.
|
|
//
|
|
if (fieldVarDsc->lvIsOSRLocal)
|
|
{
|
|
assert(opts.IsOSR());
|
|
fieldVarDsc->lvIsOSRLocal = false;
|
|
fieldVarDsc->lvIsOSRExposedLocal = false;
|
|
}
|
|
}
|
|
|
|
// Hijack lvFieldLclStart to record the new temp number.
|
|
// It will get fixed up in fgMarkDemotedImplicitByRefArgs.
|
|
varDsc->lvFieldLclStart = newLclNum;
|
|
// Go ahead and clear lvFieldCnt -- either we're promoting
|
|
// a replacement temp or we're not promoting this arg, and
|
|
// in either case the parameter is now a pointer that doesn't
|
|
// have these fields.
|
|
varDsc->lvFieldCnt = 0;
|
|
|
|
// Hijack lvPromoted to communicate to fgMorphExpandImplicitByRefArg
|
|
// whether references to the struct should be rewritten as
|
|
// indirections off the pointer (not promoted) or references
|
|
// to the new struct local (promoted).
|
|
varDsc->lvPromoted = !undoPromotion;
|
|
}
|
|
else
|
|
{
|
|
// The "undo promotion" path above clears lvPromoted for args that struct
|
|
// promotion wanted to promote but that aren't considered profitable to
|
|
// rewrite. It hijacks lvFieldLclStart to communicate to
|
|
// fgMarkDemotedImplicitByRefArgs that it needs to clean up annotations left
|
|
// on such args for fgMorphExpandImplicitByRefArg to consult in the interim.
|
|
// Here we have an arg that was simply never promoted, so make sure it doesn't
|
|
// have nonzero lvFieldLclStart, since that would confuse the aforementioned
|
|
// functions.
|
|
assert(varDsc->lvFieldLclStart == 0);
|
|
}
|
|
|
|
// Since the parameter in this position is really a pointer, its type is TYP_BYREF.
|
|
varDsc->lvType = TYP_BYREF;
|
|
|
|
// The struct parameter may have had its address taken, but the pointer parameter
|
|
// cannot -- any uses of the struct parameter's address are uses of the pointer
|
|
// parameter's value, and there's no way for the MSIL to reference the pointer
|
|
// parameter's address. So clear the address-taken bit for the parameter.
|
|
varDsc->CleanAddressExposed();
|
|
varDsc->lvDoNotEnregister = 0;
|
|
|
|
#ifdef DEBUG
|
|
// This should not be converted to a double in stress mode,
|
|
// because it is really a pointer
|
|
varDsc->lvKeepType = 1;
|
|
|
|
if (verbose)
|
|
{
|
|
printf("Changing the lvType for struct parameter V%02d to TYP_BYREF.\n", lclNum);
|
|
}
|
|
#endif // DEBUG
|
|
}
|
|
}
|
|
|
|
#endif // FEATURE_IMPLICIT_BYREFS
|
|
|
|
return madeChanges ? PhaseStatus::MODIFIED_EVERYTHING : PhaseStatus::MODIFIED_NOTHING;
|
|
}
|
|
|
|
//------------------------------------------------------------------------
|
|
// fgMarkDemotedImplicitByRefArgs: Clear annotations for any implicit byrefs that struct promotion
|
|
// asked to promote. Appearances of these have now been rewritten
|
|
// (by fgMorphExpandImplicitByRefArg) using indirections from
|
|
// the pointer parameter or references to the promotion temp, as
|
|
// appropriate.
|
|
//
|
|
void Compiler::fgMarkDemotedImplicitByRefArgs()
|
|
{
|
|
JITDUMP("\n*************** In fgMarkDemotedImplicitByRefArgs()\n");
|
|
|
|
#if FEATURE_IMPLICIT_BYREFS
|
|
|
|
for (unsigned lclNum = 0; lclNum < info.compArgsCount; lclNum++)
|
|
{
|
|
LclVarDsc* varDsc = lvaGetDesc(lclNum);
|
|
|
|
if (lvaIsImplicitByRefLocal(lclNum))
|
|
{
|
|
JITDUMP("Clearing annotation for V%02d\n", lclNum);
|
|
|
|
if (varDsc->lvPromoted)
|
|
{
|
|
// The parameter is simply a pointer now, so clear lvPromoted. It was left set by
|
|
// fgRetypeImplicitByRefArgs to communicate to fgMorphExpandImplicitByRefArg that
|
|
// appearances of this arg needed to be rewritten to a new promoted struct local.
|
|
varDsc->lvPromoted = false;
|
|
|
|
// Clear the lvFieldLclStart value that was set by fgRetypeImplicitByRefArgs
|
|
// to tell fgMorphExpandImplicitByRefArg which local is the new promoted struct one.
|
|
varDsc->lvFieldLclStart = 0;
|
|
}
|
|
else if (varDsc->lvFieldLclStart != 0)
|
|
{
|
|
// We created new temps to represent a promoted struct corresponding to this
|
|
// parameter, but decided not to go through with the promotion and have
|
|
// rewritten all uses as indirections off the pointer parameter.
|
|
// We stashed the pointer to the new struct temp in lvFieldLclStart; make
|
|
// note of that and clear the annotation.
|
|
unsigned structLclNum = varDsc->lvFieldLclStart;
|
|
varDsc->lvFieldLclStart = 0;
|
|
|
|
// The temp struct is now unused; set flags appropriately so that we
|
|
// won't allocate space for it on the stack.
|
|
LclVarDsc* structVarDsc = lvaGetDesc(structLclNum);
|
|
structVarDsc->CleanAddressExposed();
|
|
#ifdef DEBUG
|
|
structVarDsc->lvUnusedStruct = true;
|
|
structVarDsc->lvUndoneStructPromotion = true;
|
|
#endif // DEBUG
|
|
|
|
unsigned fieldLclStart = structVarDsc->lvFieldLclStart;
|
|
unsigned fieldCount = structVarDsc->lvFieldCnt;
|
|
unsigned fieldLclStop = fieldLclStart + fieldCount;
|
|
|
|
for (unsigned fieldLclNum = fieldLclStart; fieldLclNum < fieldLclStop; ++fieldLclNum)
|
|
{
|
|
JITDUMP("Fixing pointer for field V%02d from V%02d to V%02d\n", fieldLclNum, lclNum, structLclNum);
|
|
|
|
// Fix the pointer to the parent local.
|
|
LclVarDsc* fieldVarDsc = lvaGetDesc(fieldLclNum);
|
|
assert(fieldVarDsc->lvParentLcl == lclNum);
|
|
fieldVarDsc->lvParentLcl = structLclNum;
|
|
|
|
// The field local is now unused; set flags appropriately so that
|
|
// we won't allocate stack space for it.
|
|
fieldVarDsc->CleanAddressExposed();
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
#endif // FEATURE_IMPLICIT_BYREFS
|
|
}
|
|
|
|
//------------------------------------------------------------------------
|
|
// fgCanTailCallViaJitHelper: check whether we can use the faster tailcall
|
|
// JIT helper on x86.
|
|
//
|
|
// Arguments:
|
|
// call - the tailcall
|
|
//
|
|
// Return Value:
|
|
// 'true' if we can; or 'false' if we should use the generic tailcall mechanism.
|
|
//
|
|
bool Compiler::fgCanTailCallViaJitHelper(GenTreeCall* call)
|
|
{
|
|
#if !defined(TARGET_X86) || defined(UNIX_X86_ABI)
|
|
// On anything except windows X86 we have no faster mechanism available.
|
|
return false;
|
|
#else
|
|
// For R2R make sure we go through portable mechanism that the 'EE' side
|
|
// will properly turn into a runtime JIT.
|
|
if (opts.IsReadyToRun())
|
|
{
|
|
return false;
|
|
}
|
|
|
|
// The JIT helper does not properly handle the case where localloc was used.
|
|
if (compLocallocUsed)
|
|
{
|
|
return false;
|
|
}
|
|
|
|
// Delegate calls may go through VSD stub in rare cases. Those look at the
|
|
// call site so we cannot use the JIT helper.
|
|
if (call->IsDelegateInvoke())
|
|
{
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
#endif
|
|
}
|
|
|
|
//------------------------------------------------------------------------
|
|
// fgMorphReduceAddOps: reduce successive variable adds into a single multiply,
|
|
// e.g., i + i + i + i => i * 4.
|
|
//
|
|
// Arguments:
|
|
// tree - tree for reduction
|
|
//
|
|
// Return Value:
|
|
// reduced tree if pattern matches, original tree otherwise
|
|
//
|
|
GenTree* Compiler::fgMorphReduceAddOps(GenTree* tree)
|
|
{
|
|
// ADD(_, V0) starts the pattern match.
|
|
if (!tree->OperIs(GT_ADD) || tree->gtOverflow())
|
|
{
|
|
return tree;
|
|
}
|
|
|
|
#ifndef TARGET_64BIT
|
|
// Transforming 64-bit ADD to 64-bit MUL on 32-bit system results in replacing
|
|
// ADD ops with a helper function call. Don't apply optimization in that case.
|
|
if (tree->TypeGet() == TYP_LONG)
|
|
{
|
|
return tree;
|
|
}
|
|
#endif
|
|
|
|
GenTree* lclVarTree = tree->AsOp()->gtOp2;
|
|
GenTree* consTree = tree->AsOp()->gtOp1;
|
|
|
|
GenTree* op1 = consTree;
|
|
GenTree* op2 = lclVarTree;
|
|
|
|
if (!op2->OperIs(GT_LCL_VAR) || !varTypeIsIntegral(op2))
|
|
{
|
|
return tree;
|
|
}
|
|
|
|
int foldCount = 0;
|
|
unsigned lclNum = op2->AsLclVarCommon()->GetLclNum();
|
|
|
|
// Search for pattern of shape ADD(ADD(ADD(lclNum, lclNum), lclNum), lclNum).
|
|
while (true)
|
|
{
|
|
// ADD(lclNum, lclNum), end of tree
|
|
if (op1->OperIs(GT_LCL_VAR) && op1->AsLclVarCommon()->GetLclNum() == lclNum && op2->OperIs(GT_LCL_VAR) &&
|
|
op2->AsLclVarCommon()->GetLclNum() == lclNum)
|
|
{
|
|
foldCount += 2;
|
|
break;
|
|
}
|
|
// ADD(ADD(X, Y), lclNum), keep descending
|
|
else if (op1->OperIs(GT_ADD) && !op1->gtOverflow() && op2->OperIs(GT_LCL_VAR) &&
|
|
op2->AsLclVarCommon()->GetLclNum() == lclNum)
|
|
{
|
|
foldCount++;
|
|
op2 = op1->AsOp()->gtOp2;
|
|
op1 = op1->AsOp()->gtOp1;
|
|
}
|
|
// Any other case is a pattern we won't attempt to fold for now.
|
|
else
|
|
{
|
|
return tree;
|
|
}
|
|
}
|
|
|
|
// V0 + V0 ... + V0 becomes V0 * foldCount, where postorder transform will optimize
|
|
// accordingly
|
|
consTree->BashToConst(foldCount, tree->TypeGet());
|
|
|
|
GenTree* morphed = gtNewOperNode(GT_MUL, tree->TypeGet(), lclVarTree, consTree);
|
|
DEBUG_DESTROY_NODE(tree);
|
|
|
|
return morphed;
|
|
}
|
|
|
|
//------------------------------------------------------------------------
|
|
// Compiler::MorphMDArrayTempCache::TempList::GetTemp: return a local variable number to use as a temporary variable
|
|
// in multi-dimensional array operation expansion.
|
|
//
|
|
// A temp is either re-used from the cache, or allocated and added to the cache.
|
|
//
|
|
// Returns:
|
|
// A local variable temp number.
|
|
//
|
|
unsigned Compiler::MorphMDArrayTempCache::TempList::GetTemp()
|
|
{
|
|
if (m_nextAvail != nullptr)
|
|
{
|
|
unsigned tmp = m_nextAvail->tmp;
|
|
JITDUMP("Reusing temp V%02u\n", tmp);
|
|
m_nextAvail = m_nextAvail->next;
|
|
return tmp;
|
|
}
|
|
else
|
|
{
|
|
unsigned newTmp = m_compiler->lvaGrabTemp(true DEBUGARG("MD array shared temp"));
|
|
Node* newNode = new (m_compiler, CMK_Unknown) Node(newTmp);
|
|
assert(m_insertPtr != nullptr);
|
|
assert(*m_insertPtr == nullptr);
|
|
*m_insertPtr = newNode;
|
|
m_insertPtr = &newNode->next;
|
|
return newTmp;
|
|
}
|
|
}
|
|
|
|
//------------------------------------------------------------------------
|
|
// Compiler::MorphMDArrayTempCache::GrabTemp: return a local variable number to use as a temporary variable
|
|
// in multi-dimensional array operation expansion.
|
|
//
|
|
// Arguments:
|
|
// type - type of temp to get
|
|
//
|
|
// Returns:
|
|
// A local variable temp number.
|
|
//
|
|
unsigned Compiler::MorphMDArrayTempCache::GrabTemp(var_types type)
|
|
{
|
|
switch (genActualType(type))
|
|
{
|
|
case TYP_INT:
|
|
return intTemps.GetTemp();
|
|
case TYP_REF:
|
|
return refTemps.GetTemp();
|
|
default:
|
|
unreached();
|
|
}
|
|
}
|
|
|
|
//------------------------------------------------------------------------
|
|
// fgMorphArrayOpsStmt: Tree walk a statement to morph GT_ARR_ELEM.
|
|
//
|
|
// The nested `MorphMDArrayVisitor::PostOrderVisit()` does the morphing.
|
|
//
|
|
// See the comment for `fgMorphArrayOps()` for more details of the transformation.
|
|
//
|
|
// Arguments:
|
|
// pTempCache - pointer to the temp locals cache
|
|
// block - BasicBlock where the statement lives
|
|
// stmt - statement to walk
|
|
//
|
|
// Returns:
|
|
// True if anything changed, false if the IR was unchanged.
|
|
//
|
|
bool Compiler::fgMorphArrayOpsStmt(MorphMDArrayTempCache* pTempCache, BasicBlock* block, Statement* stmt)
|
|
{
|
|
class MorphMDArrayVisitor final : public GenTreeVisitor<MorphMDArrayVisitor>
|
|
{
|
|
public:
|
|
enum
|
|
{
|
|
DoPostOrder = true
|
|
};
|
|
|
|
MorphMDArrayVisitor(Compiler* compiler, BasicBlock* block, MorphMDArrayTempCache* pTempCache)
|
|
: GenTreeVisitor<MorphMDArrayVisitor>(compiler)
|
|
, m_changed(false)
|
|
, m_block(block)
|
|
, m_pTempCache(pTempCache)
|
|
{
|
|
}
|
|
|
|
bool Changed() const
|
|
{
|
|
return m_changed;
|
|
}
|
|
|
|
fgWalkResult PostOrderVisit(GenTree** use, GenTree* user)
|
|
{
|
|
GenTree* const node = *use;
|
|
|
|
if (!node->OperIs(GT_ARR_ELEM))
|
|
{
|
|
return Compiler::WALK_CONTINUE;
|
|
}
|
|
|
|
GenTreeArrElem* const arrElem = node->AsArrElem();
|
|
|
|
JITDUMP("Morphing GT_ARR_ELEM [%06u] in " FMT_BB " of '%s'\n", dspTreeID(arrElem), m_block->bbNum,
|
|
m_compiler->info.compFullName);
|
|
DISPTREE(arrElem);
|
|
|
|
// impArrayAccessIntrinsic() ensures the following.
|
|
assert((2 <= arrElem->gtArrRank) && (arrElem->gtArrRank <= GT_ARR_MAX_RANK));
|
|
assert(arrElem->gtArrObj->TypeIs(TYP_REF));
|
|
assert(arrElem->TypeIs(TYP_BYREF));
|
|
|
|
for (unsigned i = 0; i < arrElem->gtArrRank; i++)
|
|
{
|
|
assert(arrElem->gtArrInds[i] != nullptr);
|
|
|
|
// We cast the index operands to TYP_INT in the importer.
|
|
// Note that the offset calculation needs to be TYP_I_IMPL, as multiplying the linearized index
|
|
// by the array element scale might overflow (although does .NET support array objects larger than
|
|
// 2GB in size?).
|
|
assert(genActualType(arrElem->gtArrInds[i]->TypeGet()) == TYP_INT);
|
|
}
|
|
|
|
// The order of evaluation of a[i,j,k] is: a, i, j, k. That is, if any of the i, j, k throw an
|
|
// exception, it needs to happen before accessing `a`. For example, `a` could be null, but `i`
|
|
// could be an expression throwing an exception, and that exception needs to be thrown before
|
|
// indirecting using `a` (such as reading a dimension length or lower bound).
|
|
//
|
|
// First, we need to make temp copies of the index expressions that have side-effects. We
|
|
// always make a copy of the array object (below) so we can multi-use it.
|
|
//
|
|
GenTree* idxToUse[GT_ARR_MAX_RANK];
|
|
unsigned idxToCopy[GT_ARR_MAX_RANK];
|
|
bool anyIdxWithSideEffects = false;
|
|
for (unsigned i = 0; i < arrElem->gtArrRank; i++)
|
|
{
|
|
GenTree* idx = arrElem->gtArrInds[i];
|
|
if ((idx->gtFlags & GTF_ALL_EFFECT) == 0)
|
|
{
|
|
// No side-effect; just use it.
|
|
idxToUse[i] = idx;
|
|
idxToCopy[i] = BAD_VAR_NUM;
|
|
}
|
|
else
|
|
{
|
|
// Side-effect; create a temp.
|
|
// unsigned newIdxLcl = m_compiler->lvaGrabTemp(true DEBUGARG("MD array index copy"));
|
|
unsigned newIdxLcl = m_pTempCache->GrabTemp(idx->TypeGet());
|
|
GenTree* newIdx = m_compiler->gtNewLclvNode(newIdxLcl, genActualType(idx));
|
|
idxToUse[i] = newIdx;
|
|
idxToCopy[i] = newIdxLcl;
|
|
anyIdxWithSideEffects = true;
|
|
}
|
|
}
|
|
|
|
// `newArrLcl` is set to the lclvar with a copy of the array object, if needed. The creation/copy of the
|
|
// array object to this lcl is done as a top-level comma if needed.
|
|
unsigned arrLcl = BAD_VAR_NUM;
|
|
unsigned newArrLcl = BAD_VAR_NUM;
|
|
GenTree* arrObj = arrElem->gtArrObj;
|
|
unsigned rank = arrElem->gtArrRank;
|
|
|
|
// We are going to multiply reference the array object; create a new local var if necessary.
|
|
if (arrObj->OperIs(GT_LCL_VAR))
|
|
{
|
|
arrLcl = arrObj->AsLclVar()->GetLclNum();
|
|
}
|
|
else
|
|
{
|
|
// arrLcl = newArrLcl = m_compiler->lvaGrabTemp(true DEBUGARG("MD array copy"));
|
|
arrLcl = newArrLcl = m_pTempCache->GrabTemp(TYP_REF);
|
|
}
|
|
|
|
GenTree* fullTree = nullptr;
|
|
|
|
// Work from outer-to-inner rank (i.e., slowest-changing to fastest-changing index), building up the offset
|
|
// tree.
|
|
for (unsigned i = 0; i < arrElem->gtArrRank; i++)
|
|
{
|
|
GenTree* idx = idxToUse[i];
|
|
assert((idx->gtFlags & GTF_ALL_EFFECT) == 0); // We should have taken care of side effects earlier.
|
|
|
|
GenTreeMDArr* const mdArrLowerBound =
|
|
m_compiler->gtNewMDArrLowerBound(m_compiler->gtNewLclvNode(arrLcl, TYP_REF), i, rank, m_block);
|
|
// unsigned effIdxLcl = m_compiler->lvaGrabTemp(true DEBUGARG("MD array effective index"));
|
|
unsigned effIdxLcl = m_pTempCache->GrabTemp(TYP_INT);
|
|
GenTree* const effIndex = m_compiler->gtNewOperNode(GT_SUB, TYP_INT, idx, mdArrLowerBound);
|
|
GenTree* const effIdxLclDef = m_compiler->gtNewTempStore(effIdxLcl, effIndex);
|
|
GenTreeMDArr* const mdArrLength =
|
|
m_compiler->gtNewMDArrLen(m_compiler->gtNewLclvNode(arrLcl, TYP_REF), i, rank, m_block);
|
|
GenTreeBoundsChk* const arrBndsChk = new (m_compiler, GT_BOUNDS_CHECK)
|
|
GenTreeBoundsChk(m_compiler->gtNewLclvNode(effIdxLcl, TYP_INT), mdArrLength, SCK_RNGCHK_FAIL);
|
|
GenTree* const boundsCheckComma =
|
|
m_compiler->gtNewOperNode(GT_COMMA, TYP_INT, arrBndsChk,
|
|
m_compiler->gtNewLclvNode(effIdxLcl, TYP_INT));
|
|
GenTree* const idxComma = m_compiler->gtNewOperNode(GT_COMMA, TYP_INT, effIdxLclDef, boundsCheckComma);
|
|
|
|
// If it's not the first index, accumulate with the previously created calculation.
|
|
if (i > 0)
|
|
{
|
|
assert(fullTree != nullptr);
|
|
|
|
GenTreeMDArr* const mdArrLengthScale =
|
|
m_compiler->gtNewMDArrLen(m_compiler->gtNewLclvNode(arrLcl, TYP_REF), i, rank, m_block);
|
|
GenTree* const scale = m_compiler->gtNewOperNode(GT_MUL, TYP_INT, fullTree, mdArrLengthScale);
|
|
GenTree* const effIndex = m_compiler->gtNewOperNode(GT_ADD, TYP_INT, scale, idxComma);
|
|
|
|
fullTree = effIndex;
|
|
}
|
|
else
|
|
{
|
|
fullTree = idxComma;
|
|
}
|
|
}
|
|
|
|
#ifdef TARGET_64BIT
|
|
// Widen the linearized index on 64-bit targets; subsequent math will be done in TYP_I_IMPL.
|
|
assert(fullTree->TypeIs(TYP_INT));
|
|
fullTree = m_compiler->gtNewCastNode(TYP_I_IMPL, fullTree, true, TYP_I_IMPL);
|
|
#endif // TARGET_64BIT
|
|
|
|
// Now scale by element size and add offset from array object to array data base.
|
|
|
|
unsigned elemScale = arrElem->gtArrElemSize;
|
|
unsigned dataOffset = m_compiler->eeGetMDArrayDataOffset(arrElem->gtArrRank);
|
|
GenTree* const scale =
|
|
m_compiler->gtNewOperNode(GT_MUL, TYP_I_IMPL, fullTree,
|
|
m_compiler->gtNewIconNode(static_cast<ssize_t>(elemScale), TYP_I_IMPL));
|
|
GenTree* const scalePlusOffset =
|
|
m_compiler->gtNewOperNode(GT_ADD, TYP_I_IMPL, scale,
|
|
m_compiler->gtNewIconNode(static_cast<ssize_t>(dataOffset), TYP_I_IMPL));
|
|
GenTree* fullExpansion = m_compiler->gtNewOperNode(GT_ADD, TYP_BYREF, scalePlusOffset,
|
|
m_compiler->gtNewLclvNode(arrLcl, TYP_REF));
|
|
|
|
// Add copies of the index expressions with side effects. Add them in reverse order, so the first index
|
|
// ends up at the top of the tree (so, first in execution order).
|
|
if (anyIdxWithSideEffects)
|
|
{
|
|
for (unsigned i = arrElem->gtArrRank; i > 0; i--)
|
|
{
|
|
if (idxToCopy[i - 1] != BAD_VAR_NUM)
|
|
{
|
|
GenTree* const idxLclStore =
|
|
m_compiler->gtNewTempStore(idxToCopy[i - 1], arrElem->gtArrInds[i - 1]);
|
|
fullExpansion =
|
|
m_compiler->gtNewOperNode(GT_COMMA, fullExpansion->TypeGet(), idxLclStore, fullExpansion);
|
|
}
|
|
}
|
|
}
|
|
|
|
// If we needed to create a new local for the array object, copy that before everything.
|
|
if (newArrLcl != BAD_VAR_NUM)
|
|
{
|
|
GenTree* const arrLclStore = m_compiler->gtNewTempStore(newArrLcl, arrObj);
|
|
fullExpansion =
|
|
m_compiler->gtNewOperNode(GT_COMMA, fullExpansion->TypeGet(), arrLclStore, fullExpansion);
|
|
}
|
|
|
|
JITDUMP("fgMorphArrayOpsStmt (before remorph):\n");
|
|
DISPTREE(fullExpansion);
|
|
|
|
*use = fullExpansion;
|
|
m_changed = true;
|
|
|
|
// The GT_ARR_ELEM node is no longer needed.
|
|
DEBUG_DESTROY_NODE(node);
|
|
|
|
return fgWalkResult::WALK_CONTINUE;
|
|
}
|
|
|
|
private:
|
|
bool m_changed;
|
|
BasicBlock* m_block;
|
|
MorphMDArrayTempCache* m_pTempCache;
|
|
};
|
|
|
|
MorphMDArrayVisitor morphMDArrayVisitor(this, block, pTempCache);
|
|
morphMDArrayVisitor.WalkTree(stmt->GetRootNodePointer(), nullptr);
|
|
return morphMDArrayVisitor.Changed();
|
|
}
|
|
|
|
//------------------------------------------------------------------------
|
|
// fgMorphArrayOps: Morph multi-dimensional (MD) array operations in this method.
|
|
//
|
|
// GT_ARR_ELEM nodes are morphed to appropriate trees. Note that MD array `Get`, `Set`, or `Address`
|
|
// is imported as a call, and, if all required conditions are satisfied, is treated as an intrinsic
|
|
// and replaced by IR nodes, especially GT_ARR_ELEM nodes, in impArrayAccessIntrinsic().
|
|
//
|
|
// For example, a simple 2-dimensional array access like `a[i,j]` looks like:
|
|
//
|
|
// \--* ARR_ELEM[,] byref
|
|
// +--* LCL_VAR ref V00 arg0
|
|
// +--* LCL_VAR int V01 arg1
|
|
// \--* LCL_VAR int V02 arg2
|
|
//
|
|
// This is replaced by:
|
|
//
|
|
// &a + offset + elemSize * ((i - a.GetLowerBound(0)) * a.GetLength(1) + (j - a.GetLowerBound(1)))
|
|
//
|
|
// plus the appropriate `i` and `j` bounds checks.
|
|
//
|
|
// In IR, this is:
|
|
//
|
|
// * ADD byref
|
|
// +--* ADD long
|
|
// | +--* MUL long
|
|
// | | +--* CAST long <- uint
|
|
// | | | \--* ADD int
|
|
// | | | +--* MUL int
|
|
// | | | | +--* COMMA int
|
|
// | | | | | +--* STORE_LCL_VAR int V04 tmp1
|
|
// | | | | | | \--* SUB int
|
|
// | | | | | | +--* LCL_VAR int V01 arg1
|
|
// | | | | | | \--* MDARR_LOWER_BOUND int (0)
|
|
// | | | | | | \--* LCL_VAR ref V00 arg0
|
|
// | | | | | \--* COMMA int
|
|
// | | | | | +--* BOUNDS_CHECK_Rng void
|
|
// | | | | | | +--* LCL_VAR int V04 tmp1
|
|
// | | | | | | \--* MDARR_LENGTH int (0)
|
|
// | | | | | | \--* LCL_VAR ref V00 arg0
|
|
// | | | | | \--* LCL_VAR int V04 tmp1
|
|
// | | | | \--* MDARR_LENGTH int (1)
|
|
// | | | | \--* LCL_VAR ref V00 arg0
|
|
// | | | \--* COMMA int
|
|
// | | | +--* STORE_LCL_VAR int V05 tmp2
|
|
// | | | | \--* SUB int
|
|
// | | | | +--* LCL_VAR int V02 arg2
|
|
// | | | | \--* MDARR_LOWER_BOUND int (1)
|
|
// | | | | \--* LCL_VAR ref V00 arg0
|
|
// | | | \--* COMMA int
|
|
// | | | +--* BOUNDS_CHECK_Rng void
|
|
// | | | | +--* LCL_VAR int V05 tmp2
|
|
// | | | | \--* MDARR_LENGTH int (1)
|
|
// | | | | \--* LCL_VAR ref V00 arg0
|
|
// | | | \--* LCL_VAR int V05 tmp2
|
|
// | | \--* CNS_INT long 4
|
|
// | \--* CNS_INT long 32
|
|
// \--* LCL_VAR ref V00 arg0
|
|
//
|
|
// before being morphed by the usual morph transformations.
|
|
//
|
|
// Some things to consider:
|
|
// 1. MD have both a lower bound and length for each dimension (even if very few MD arrays actually have a
|
|
// lower bound)
|
|
// 2. GT_MDARR_LOWER_BOUND(dim) represents the lower-bound value for a particular array dimension. The "effective
|
|
// index" for a dimension is the index minus the lower bound.
|
|
// 3. GT_MDARR_LENGTH(dim) represents the length value (number of elements in a dimension) for a particular
|
|
// array dimension.
|
|
// 4. The effective index is bounds checked against the dimension length.
|
|
// 5. The lower bound and length values are 32-bit signed integers (TYP_INT).
|
|
// 6. After constructing a "linearized index", the index is scaled by the array element size, and the offset from
|
|
// the array object to the beginning of the array data is added.
|
|
// 7. Much of the complexity above is simply to assign temps to the various values that are used subsequently.
|
|
// 8. The index expressions are used exactly once. However, if have side effects, they need to be copied, early,
|
|
// to preserve exception ordering.
|
|
// 9. Only the top-level operation adds the array object to the scaled, linearized index, to create the final
|
|
// address `byref`. As usual, we need to be careful to not create an illegal byref by adding any partial index.
|
|
// calculation.
|
|
// 10. To avoid doing unnecessary work, the importer sets the global `OMF_HAS_MDARRAYREF` flag if there are any
|
|
// MD array expressions to expand. Also, the block flag `BBF_HAS_MDARRAYREF` is set to blocks where these exist,
|
|
// so only those blocks are processed.
|
|
//
|
|
// Returns:
|
|
// suitable phase status
|
|
//
|
|
PhaseStatus Compiler::fgMorphArrayOps()
|
|
{
|
|
if ((optMethodFlags & OMF_HAS_MDARRAYREF) == 0)
|
|
{
|
|
JITDUMP("No multi-dimensional array references in the function\n");
|
|
return PhaseStatus::MODIFIED_NOTHING;
|
|
}
|
|
|
|
// Maintain a cache of temp locals to use when we need a temp for this transformation. After each statement,
|
|
// reset the cache, meaning we can re-use any of the temps previously allocated. The idea here is to avoid
|
|
// creating too many temporaries, since the JIT has a limit on the number of tracked locals. A temp created
|
|
// here in one statement will have a distinct lifetime from a temp created in another statement, so register
|
|
// allocation is not constrained.
|
|
|
|
bool changed = false;
|
|
MorphMDArrayTempCache mdArrayTempCache(this);
|
|
|
|
for (BasicBlock* const block : Blocks())
|
|
{
|
|
if (!block->HasFlag(BBF_HAS_MDARRAYREF))
|
|
{
|
|
// No MD array references in this block
|
|
continue;
|
|
}
|
|
|
|
// Publish current block (needed for various morphing functions).
|
|
compCurBB = block;
|
|
|
|
for (Statement* const stmt : block->Statements())
|
|
{
|
|
if (fgMorphArrayOpsStmt(&mdArrayTempCache, block, stmt))
|
|
{
|
|
changed = true;
|
|
|
|
// Morph the statement if there have been changes.
|
|
|
|
GenTree* tree = stmt->GetRootNode();
|
|
GenTree* morphedTree = fgMorphTree(tree);
|
|
|
|
JITDUMP("fgMorphArrayOps (after remorph):\n");
|
|
DISPTREE(morphedTree);
|
|
|
|
stmt->SetRootNode(morphedTree);
|
|
}
|
|
}
|
|
|
|
mdArrayTempCache.Reset();
|
|
}
|
|
|
|
return changed ? PhaseStatus::MODIFIED_EVERYTHING : PhaseStatus::MODIFIED_NOTHING;
|
|
}
|