1
0
Fork 0
mirror of https://github.com/VSadov/Satori.git synced 2025-06-11 18:20:26 +09:00

Add basic support for folding and normalizing hwintrinsic trees in morph (#103143)

* Add basic support for folding hwintrinsic trees in morph

* Reduce the amount of copying required to evaluated vector constants

* Have gtFoldExprHWIntrinsic handle side effects
This commit is contained in:
Tanner Gooding 2024-06-13 14:45:50 -07:00 committed by GitHub
parent daf6cdcca8
commit d7ae8c61f0
Signed by: github
GPG key ID: B5690EEEBB952194
18 changed files with 2909 additions and 879 deletions

View file

@ -3649,6 +3649,10 @@ public:
GenTree* gtFoldTypeCompare(GenTree* tree);
GenTree* gtFoldTypeEqualityCall(bool isEq, GenTree* op1, GenTree* op2);
#if defined(FEATURE_HW_INTRINSICS)
GenTree* gtFoldExprHWIntrinsic(GenTreeHWIntrinsic* tree);
#endif // FEATURE_HW_INTRINSICS
// Options to control behavior of gtTryRemoveBoxUpstreamEffects
enum BoxRemovalOptions
{

File diff suppressed because it is too large Load diff

View file

@ -1763,8 +1763,11 @@ public:
inline bool IsFloatPositiveZero() const;
inline bool IsFloatNegativeZero() const;
inline bool IsVectorZero() const;
inline bool IsVectorNegativeZero(var_types simdBaseType) const;
inline bool IsVectorNaN(var_types simdBaseType) const;
inline bool IsVectorCreate() const;
inline bool IsVectorAllBitsSet() const;
inline bool IsVectorBroadcast(var_types simdBaseType) const;
inline bool IsMaskAllBitsSet() const;
inline bool IsVectorConst();
@ -6634,7 +6637,7 @@ struct GenTreeHWIntrinsic : public GenTreeJitIntrinsic
static bool Equals(GenTreeHWIntrinsic* op1, GenTreeHWIntrinsic* op2);
genTreeOps HWOperGet() const;
genTreeOps HWOperGet(bool* isScalar) const;
private:
void SetHWIntrinsicId(NamedIntrinsic intrinsicId);
@ -6878,6 +6881,178 @@ struct GenTreeVecCon : public GenTree
#endif // FEATURE_HW_INTRINSICS
void EvaluateUnaryInPlace(genTreeOps oper, bool scalar, var_types baseType);
void EvaluateBinaryInPlace(genTreeOps oper, bool scalar, var_types baseType, GenTreeVecCon* other);
template <typename TBase>
void EvaluateBroadcastInPlace(TBase scalar)
{
switch (gtType)
{
#if defined(FEATURE_SIMD)
case TYP_SIMD8:
{
simd8_t result = {};
BroadcastConstantToSimd<simd8_t, TBase>(&result, scalar);
gtSimd8Val = result;
break;
}
case TYP_SIMD12:
{
simd12_t result = {};
BroadcastConstantToSimd<simd12_t, TBase>(&result, scalar);
gtSimd12Val = result;
break;
}
case TYP_SIMD16:
{
simd16_t result = {};
BroadcastConstantToSimd<simd16_t, TBase>(&result, scalar);
gtSimd16Val = result;
break;
}
#if defined(TARGET_XARCH)
case TYP_SIMD32:
{
simd32_t result = {};
BroadcastConstantToSimd<simd32_t, TBase>(&result, scalar);
gtSimd32Val = result;
break;
}
case TYP_SIMD64:
{
simd64_t result = {};
BroadcastConstantToSimd<simd64_t, TBase>(&result, scalar);
gtSimd64Val = result;
break;
}
#endif // TARGET_XARCH
#endif // FEATURE_SIMD
default:
{
unreached();
}
}
}
void EvaluateBroadcastInPlace(var_types baseType, double scalar);
void EvaluateBroadcastInPlace(var_types baseType, int64_t scalar);
void SetElementFloating(var_types simdBaseType, int32_t index, double value)
{
switch (gtType)
{
#if defined(FEATURE_SIMD)
case TYP_SIMD8:
{
simd8_t result = {};
EvaluateWithElementFloating<simd8_t>(simdBaseType, &result, gtSimd8Val, index, value);
gtSimd8Val = result;
break;
}
case TYP_SIMD12:
{
simd12_t result = {};
EvaluateWithElementFloating<simd12_t>(simdBaseType, &result, gtSimd12Val, index, value);
gtSimd12Val = result;
break;
}
case TYP_SIMD16:
{
simd16_t result = {};
EvaluateWithElementFloating<simd16_t>(simdBaseType, &result, gtSimd16Val, index, value);
gtSimd16Val = result;
break;
}
#if defined(TARGET_XARCH)
case TYP_SIMD32:
{
simd32_t result = {};
EvaluateWithElementFloating<simd32_t>(simdBaseType, &result, gtSimd32Val, index, value);
gtSimd32Val = result;
break;
}
case TYP_SIMD64:
{
simd64_t result = {};
EvaluateWithElementFloating<simd64_t>(simdBaseType, &result, gtSimd64Val, index, value);
gtSimd64Val = result;
break;
}
#endif // TARGET_XARCH
#endif // FEATURE_SIMD
default:
{
unreached();
}
}
}
void SetElementIntegral(var_types simdBaseType, int32_t index, int64_t value)
{
switch (gtType)
{
#if defined(FEATURE_SIMD)
case TYP_SIMD8:
{
simd8_t result = {};
EvaluateWithElementIntegral<simd8_t>(simdBaseType, &result, gtSimd8Val, index, value);
gtSimd8Val = result;
break;
}
case TYP_SIMD12:
{
simd12_t result = {};
EvaluateWithElementIntegral<simd12_t>(simdBaseType, &result, gtSimd12Val, index, value);
gtSimd12Val = result;
break;
}
case TYP_SIMD16:
{
simd16_t result = {};
EvaluateWithElementIntegral<simd16_t>(simdBaseType, &result, gtSimd16Val, index, value);
gtSimd16Val = result;
break;
}
#if defined(TARGET_XARCH)
case TYP_SIMD32:
{
simd32_t result = {};
EvaluateWithElementIntegral<simd32_t>(simdBaseType, &result, gtSimd32Val, index, value);
gtSimd32Val = result;
break;
}
case TYP_SIMD64:
{
simd64_t result = {};
EvaluateWithElementIntegral<simd64_t>(simdBaseType, &result, gtSimd64Val, index, value);
gtSimd64Val = result;
break;
}
#endif // TARGET_XARCH
#endif // FEATURE_SIMD
default:
{
unreached();
}
}
}
bool IsAllBitsSet() const
{
switch (gtType)
@ -6923,6 +7098,8 @@ struct GenTreeVecCon : public GenTree
}
}
bool IsBroadcast(var_types simdBaseType) const;
static bool Equals(const GenTreeVecCon* left, const GenTreeVecCon* right)
{
var_types gtType = left->TypeGet();
@ -6975,6 +7152,10 @@ struct GenTreeVecCon : public GenTree
}
}
bool IsNaN(var_types simdBaseType) const;
bool IsNegativeZero(var_types simdBaseType) const;
bool IsZero() const
{
switch (gtType)
@ -7020,6 +7201,144 @@ struct GenTreeVecCon : public GenTree
}
}
double GetElementFloating(var_types simdBaseType, int32_t index) const
{
switch (gtType)
{
#if defined(FEATURE_SIMD)
case TYP_SIMD8:
{
return EvaluateGetElementFloating<simd8_t>(simdBaseType, gtSimd8Val, index);
}
case TYP_SIMD12:
{
return EvaluateGetElementFloating<simd12_t>(simdBaseType, gtSimd12Val, index);
}
case TYP_SIMD16:
{
return EvaluateGetElementFloating<simd16_t>(simdBaseType, gtSimd16Val, index);
}
#if defined(TARGET_XARCH)
case TYP_SIMD32:
{
return EvaluateGetElementFloating<simd32_t>(simdBaseType, gtSimd32Val, index);
}
case TYP_SIMD64:
{
return EvaluateGetElementFloating<simd64_t>(simdBaseType, gtSimd64Val, index);
}
#endif // TARGET_XARCH
#endif // FEATURE_SIMD
default:
{
unreached();
}
}
}
int64_t GetElementIntegral(var_types simdBaseType, int32_t index) const
{
switch (gtType)
{
#if defined(FEATURE_SIMD)
case TYP_SIMD8:
{
return EvaluateGetElementIntegral<simd8_t>(simdBaseType, gtSimd8Val, index);
}
case TYP_SIMD12:
{
return EvaluateGetElementIntegral<simd12_t>(simdBaseType, gtSimd12Val, index);
}
case TYP_SIMD16:
{
return EvaluateGetElementIntegral<simd16_t>(simdBaseType, gtSimd16Val, index);
}
#if defined(TARGET_XARCH)
case TYP_SIMD32:
{
return EvaluateGetElementIntegral<simd32_t>(simdBaseType, gtSimd32Val, index);
}
case TYP_SIMD64:
{
return EvaluateGetElementIntegral<simd64_t>(simdBaseType, gtSimd64Val, index);
}
#endif // TARGET_XARCH
#endif // FEATURE_SIMD
default:
{
unreached();
}
}
}
double ToScalarFloating(var_types simdBaseType) const
{
return GetElementFloating(simdBaseType, 0);
}
int64_t ToScalarIntegral(var_types simdBaseType) const
{
return GetElementIntegral(simdBaseType, 0);
}
bool IsElementZero(var_types simdBaseType, int32_t index) const
{
switch (simdBaseType)
{
case TYP_FLOAT:
{
return GetElementIntegral(TYP_INT, index) == 0;
}
case TYP_DOUBLE:
{
return GetElementIntegral(TYP_LONG, index) == 0;
}
default:
{
return GetElementIntegral(simdBaseType, index) == 0;
}
}
}
bool IsElementOne(var_types simdBaseType, int32_t index) const
{
switch (simdBaseType)
{
case TYP_FLOAT:
case TYP_DOUBLE:
{
return GetElementFloating(simdBaseType, index) == 1;
}
default:
{
return GetElementIntegral(simdBaseType, index) == 1;
}
}
}
bool IsScalarZero(var_types simdBaseType) const
{
return IsElementZero(simdBaseType, 0);
}
bool IsScalarOne(var_types simdBaseType) const
{
return IsElementOne(simdBaseType, 0);
}
GenTreeVecCon(var_types type)
: GenTree(GT_CNS_VEC, type)
{
@ -9238,6 +9557,36 @@ inline bool GenTree::IsVectorZero() const
return IsCnsVec() && AsVecCon()->IsZero();
}
//-------------------------------------------------------------------
// IsVectorNegativeZero: returns true if this node is a vector constant with all elements negative zero.
//
// Arguments:
// simdBaseType - the base type of the constant being checked
//
// Returns:
// True if this node is a vector constant with all elements negative zero
//
inline bool GenTree::IsVectorNegativeZero(var_types simdBaseType) const
{
assert(varTypeIsFloating(simdBaseType));
return IsCnsVec() && AsVecCon()->IsNegativeZero(simdBaseType);
}
//-------------------------------------------------------------------
// IsVectorZero: returns true if this node is a vector constant with all bits zero.
//
// Arguments:
// simdBaseType - the base type of the constant being checked
//
// Returns:
// True if this node is a vector constant with all bits zero
//
inline bool GenTree::IsVectorNaN(var_types simdBaseType) const
{
assert(varTypeIsFloating(simdBaseType));
return IsCnsVec() && AsVecCon()->IsNaN(simdBaseType);
}
//-------------------------------------------------------------------
// IsVectorCreate: returns true if this node is the creation of a vector.
// Does not include "Unsafe" method calls.
@ -9288,6 +9637,24 @@ inline bool GenTree::IsVectorAllBitsSet() const
return false;
}
//-------------------------------------------------------------------
// IsVectorBroadcast: returns true if this node is a vector constant with the same value in all elements.
//
// Returns:
// True if this node is a vector constant with the same value in all elements.
//
inline bool GenTree::IsVectorBroadcast(var_types simdBaseType) const
{
#ifdef FEATURE_SIMD
if (IsCnsVec())
{
return AsVecCon()->IsBroadcast(simdBaseType);
}
#endif // FEATURE_SIMD
return false;
}
inline bool GenTree::IsMaskAllBitsSet() const
{
#ifdef TARGET_ARM64

View file

@ -664,6 +664,8 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
return vecCon;
}
op1 = gtNewSimdHWIntrinsicNode(retType, op1, NI_Vector64_ToVector128Unsafe, simdBaseJitType, 8);
GenTree* idx = gtNewIconNode(2, TYP_INT);
GenTree* zero = gtNewZeroConNode(TYP_FLOAT);
op1 = gtNewSimdWithElementNode(retType, op1, idx, zero, simdBaseJitType, 16);
@ -690,6 +692,8 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
return vecCon;
}
op1 = gtNewSimdHWIntrinsicNode(retType, op1, NI_Vector128_AsVector128Unsafe, simdBaseJitType, 12);
GenTree* idx = gtNewIconNode(3, TYP_INT);
GenTree* zero = gtNewZeroConNode(TYP_FLOAT);
retNode = gtNewSimdWithElementNode(retType, op1, idx, zero, simdBaseJitType, 16);

View file

@ -1521,6 +1521,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
break;
case NI_Vector64_ToVector128Unsafe:
case NI_Vector128_AsVector128Unsafe:
case NI_Vector128_GetLower:
GetEmitter()->emitIns_Mov(ins, emitSize, targetReg, op1Reg, /* canSkip */ true);
break;

View file

@ -1766,6 +1766,7 @@ void CodeGen::genBaseIntrinsic(GenTreeHWIntrinsic* node, insOpts instOptions)
break;
}
case NI_Vector128_AsVector128Unsafe:
case NI_Vector128_AsVector2:
case NI_Vector128_AsVector3:
case NI_Vector128_ToScalar:

View file

@ -130,6 +130,7 @@ HARDWARE_INTRINSIC(Vector128, AsVector2,
HARDWARE_INTRINSIC(Vector128, AsVector3, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mov, INS_invalid}, HW_Category_SIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport)
HARDWARE_INTRINSIC(Vector128, AsVector4, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(Vector128, AsVector128, -1, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(Vector128, AsVector128Unsafe, -1, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mov, INS_invalid}, HW_Category_SIMD, HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(Vector128, Ceiling, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
HARDWARE_INTRINSIC(Vector128, ConditionalSelect, 16, 3, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
HARDWARE_INTRINSIC(Vector128, ConvertToDouble, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg)

View file

@ -49,6 +49,7 @@ HARDWARE_INTRINSIC(Vector128, AsVector2,
HARDWARE_INTRINSIC(Vector128, AsVector3, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movups, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(Vector128, AsVector4, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(Vector128, AsVector128, -1, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(Vector128, AsVector128Unsafe, -1, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movups, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics|HW_Flag_NoContainment)
HARDWARE_INTRINSIC(Vector128, Ceiling, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
HARDWARE_INTRINSIC(Vector128, ConditionalSelect, 16, 3, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
HARDWARE_INTRINSIC(Vector128, ConvertToDouble, 16, 1, false, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId|HW_Flag_BaseTypeFromFirstArg)

View file

@ -1214,6 +1214,8 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
return vecCon;
}
op1 = gtNewSimdHWIntrinsicNode(retType, op1, NI_Vector128_AsVector128Unsafe, simdBaseJitType, 8);
GenTree* idx = gtNewIconNode(2, TYP_INT);
GenTree* zero = gtNewZeroConNode(TYP_FLOAT);
op1 = gtNewSimdWithElementNode(retType, op1, idx, zero, simdBaseJitType, 16);
@ -1240,6 +1242,8 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic intrinsic,
return vecCon;
}
op1 = gtNewSimdHWIntrinsicNode(retType, op1, NI_Vector128_AsVector128Unsafe, simdBaseJitType, 12);
GenTree* idx = gtNewIconNode(3, TYP_INT);
GenTree* zero = gtNewZeroConNode(TYP_FLOAT);
retNode = gtNewSimdWithElementNode(retType, op1, idx, zero, simdBaseJitType, 16);

View file

@ -3070,12 +3070,17 @@ GenTree* Compiler::impIntrinsic(GenTree* newobjThis,
GenTree* hwintrinsic = impHWIntrinsic(ni, clsHnd, method, sig R2RARG(entryPoint), mustExpand);
if (mustExpand && (hwintrinsic == nullptr))
if (hwintrinsic == nullptr)
{
return impUnsupportedNamedIntrinsic(CORINFO_HELP_THROW_NOT_IMPLEMENTED, method, sig, mustExpand);
if (mustExpand)
{
return impUnsupportedNamedIntrinsic(CORINFO_HELP_THROW_NOT_IMPLEMENTED, method, sig, mustExpand);
}
return nullptr;
}
return hwintrinsic;
// Fold result, if possible
return gtFoldExpr(hwintrinsic);
}
else
{
@ -3083,7 +3088,16 @@ GenTree* Compiler::impIntrinsic(GenTree* newobjThis,
if (isIntrinsic)
{
return impSimdAsHWIntrinsic(ni, clsHnd, method, sig, newobjThis, mustExpand);
GenTree* hwintrinsic = impSimdAsHWIntrinsic(ni, clsHnd, method, sig, newobjThis, mustExpand);
if (hwintrinsic == nullptr)
{
assert(!mustExpand);
return nullptr;
}
// Fold result, if possible
return gtFoldExpr(hwintrinsic);
}
}
}

View file

@ -2202,7 +2202,8 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node)
break;
}
if (second->AsHWIntrinsic()->HWOperGet() == GT_AND_NOT)
bool isScalar = false;
if ((second->AsHWIntrinsic()->HWOperGet(&isScalar) == GT_AND_NOT) || isScalar)
{
// currently ANDNOT logic cannot be optimized by the ternary node.
break;
@ -9266,50 +9267,22 @@ void Lowering::TryFoldCnsVecForEmbeddedBroadcast(GenTreeHWIntrinsic* parentNode,
{
simdType = Compiler::getSIMDTypeForSize(simdSize);
}
int elementCount = GenTreeVecCon::ElementCount(genTypeSize(simdType), simdBaseType);
switch (simdBaseType)
if (varTypeIsSmall(simdBaseType))
{
case TYP_FLOAT:
case TYP_INT:
case TYP_UINT:
{
uint32_t firstElement = static_cast<uint32_t>(childNode->gtSimdVal.u32[0]);
for (int i = 1; i < elementCount; i++)
{
uint32_t elementToCheck = static_cast<uint32_t>(childNode->gtSimdVal.u32[i]);
if (firstElement != elementToCheck)
{
isCreatedFromScalar = false;
break;
}
}
break;
}
case TYP_DOUBLE:
#if defined(TARGET_AMD64)
case TYP_LONG:
case TYP_ULONG:
#endif // TARGET_AMD64
{
uint64_t firstElement = static_cast<uint64_t>(childNode->gtSimdVal.u64[0]);
for (int i = 1; i < elementCount; i++)
{
uint64_t elementToCheck = static_cast<uint64_t>(childNode->gtSimdVal.u64[i]);
if (firstElement != elementToCheck)
{
isCreatedFromScalar = false;
break;
}
}
break;
}
default:
isCreatedFromScalar = false;
break;
isCreatedFromScalar = false;
}
#ifndef TARGET_64BIT
else if (varTypeIsLong(simdBaseType))
{
isCreatedFromScalar = false;
}
#endif // TARGET_64BIT
else
{
isCreatedFromScalar = childNode->IsBroadcast(simdBaseType);
}
if (isCreatedFromScalar)
{
NamedIntrinsic broadcastName = NI_AVX2_BroadcastScalarToVector128;

View file

@ -1542,6 +1542,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou
}
case NI_Vector64_ToVector128Unsafe:
case NI_Vector128_AsVector128Unsafe:
case NI_Vector128_AsVector3:
case NI_Vector128_GetLower:
{

View file

@ -2272,6 +2272,7 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree, int* pDstCou
break;
}
case NI_Vector128_AsVector128Unsafe:
case NI_Vector128_AsVector2:
case NI_Vector128_AsVector3:
case NI_Vector128_ToVector256:

View file

@ -10679,10 +10679,16 @@ GenTree* Compiler::fgOptimizeHWIntrinsic(GenTreeHWIntrinsic* node)
default:
{
genTreeOps oper = node->HWOperGet();
bool isScalar = false;
genTreeOps oper = node->HWOperGet(&isScalar);
if (GenTreeHWIntrinsic::OperIsBitwiseHWIntrinsic(oper))
{
if (oper == GT_NOT)
{
break;
}
GenTree* op1 = node->Op(1);
GenTree* op2 = node->Op(2);
@ -10861,11 +10867,19 @@ GenTree* Compiler::fgOptimizeHWIntrinsic(GenTreeHWIntrinsic* node)
}
}
// Transforms:
// 1.(~v1 & v2) to VectorXxx.AndNot(v1, v2)
// 2.(v1 & (~v2)) to VectorXxx.AndNot(v2, v1)
switch (node->HWOperGet())
bool isScalar = false;
genTreeOps oper = node->HWOperGet(&isScalar);
if (isScalar)
{
return node;
}
switch (oper)
{
// Transforms:
// 1.(~v1 & v2) to VectorXxx.AndNot(v1, v2)
// 2.(v1 & (~v2)) to VectorXxx.AndNot(v2, v1)
case GT_AND:
{
GenTree* op1 = node->Op(1);
@ -10877,7 +10891,12 @@ GenTree* Compiler::fgOptimizeHWIntrinsic(GenTreeHWIntrinsic* node)
{
// Try handle: ~op1 & op2
GenTreeHWIntrinsic* hw = op1->AsHWIntrinsic();
genTreeOps hwOper = hw->HWOperGet();
genTreeOps hwOper = hw->HWOperGet(&isScalar);
if (isScalar)
{
return node;
}
if (hwOper == GT_NOT)
{
@ -10906,7 +10925,12 @@ GenTree* Compiler::fgOptimizeHWIntrinsic(GenTreeHWIntrinsic* node)
{
// Try handle: op1 & ~op2
GenTreeHWIntrinsic* hw = op2->AsHWIntrinsic();
genTreeOps hwOper = hw->HWOperGet();
genTreeOps hwOper = hw->HWOperGet(&isScalar);
if (isScalar)
{
return node;
}
if (hwOper == GT_NOT)
{
@ -11930,8 +11954,6 @@ GenTree* Compiler::fgMorphSmpOpOptional(GenTreeOp* tree, bool* optAssertionPropD
//
GenTree* Compiler::fgMorphMultiOp(GenTreeMultiOp* multiOp)
{
gtUpdateNodeOperSideEffects(multiOp);
bool dontCseConstArguments = false;
#if defined(FEATURE_HW_INTRINSICS)
// Opportunistically, avoid unexpected CSE for hw intrinsics with IMM arguments
@ -11954,12 +11976,10 @@ GenTree* Compiler::fgMorphMultiOp(GenTreeMultiOp* multiOp)
for (GenTree** use : multiOp->UseEdges())
{
*use = fgMorphTree(*use);
*use = fgMorphTree(*use);
GenTree* operand = *use;
multiOp->gtFlags |= (operand->gtFlags & GTF_ALL_EFFECT);
if (dontCseConstArguments && operand->OperIsConst())
if (dontCseConstArguments && operand->IsCnsIntOrI())
{
operand->SetDoNotCSE();
}
@ -11978,10 +11998,33 @@ GenTree* Compiler::fgMorphMultiOp(GenTreeMultiOp* multiOp)
}
}
#if defined(FEATURE_HW_INTRINSICS)
if (opts.OptimizationEnabled() && multiOp->OperIs(GT_HWINTRINSIC))
gtUpdateNodeOperSideEffects(multiOp);
for (GenTree** use : multiOp->UseEdges())
{
GenTreeHWIntrinsic* hw = multiOp->AsHWIntrinsic();
GenTree* operand = *use;
multiOp->AddAllEffectsFlags(operand);
}
#if defined(FEATURE_HW_INTRINSICS)
if (opts.OptimizationEnabled() && multiOp->OperIsHWIntrinsic())
{
// Try to fold it, maybe we get lucky,
GenTree* foldedTree = gtFoldExpr(multiOp);
if (foldedTree != multiOp)
{
assert(!fgIsCommaThrow(foldedTree));
INDEBUG(foldedTree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED);
return foldedTree;
}
else if (!foldedTree->OperIsHWIntrinsic())
{
INDEBUG(foldedTree->gtDebugFlags |= GTF_DEBUG_NODE_MORPHED);
return foldedTree;
}
GenTreeHWIntrinsic* hw = foldedTree->AsHWIntrinsic();
// Move constant vectors from op1 to op2 for commutative and compare operations
if ((hw->GetOperandCount() == 2) && hw->Op(1)->IsVectorConst() &&

View file

@ -415,7 +415,7 @@ TBase EvaluateUnaryScalar(genTreeOps oper, TBase arg0)
}
template <typename TSimd, typename TBase>
void EvaluateUnarySimd(genTreeOps oper, bool scalar, TSimd* result, TSimd arg0)
void EvaluateUnarySimd(genTreeOps oper, bool scalar, TSimd* result, const TSimd& arg0)
{
uint32_t count = sizeof(TSimd) / sizeof(TBase);
@ -445,7 +445,7 @@ void EvaluateUnarySimd(genTreeOps oper, bool scalar, TSimd* result, TSimd arg0)
}
template <typename TSimd>
void EvaluateUnarySimd(genTreeOps oper, bool scalar, var_types baseType, TSimd* result, TSimd arg0)
void EvaluateUnarySimd(genTreeOps oper, bool scalar, var_types baseType, TSimd* result, const TSimd& arg0)
{
switch (baseType)
{
@ -532,7 +532,8 @@ TBase EvaluateBinaryScalarRSZ(TBase arg0, TBase arg1)
}
#else
// Other platforms enforce masking in their encoding
assert((arg1 >= 0) && (arg1 < (sizeof(TBase) * 8)));
unsigned shiftCountMask = (sizeof(TBase) * 8) - 1;
arg1 &= shiftCountMask;
#endif
return arg0 >> arg1;
@ -608,7 +609,8 @@ TBase EvaluateBinaryScalarSpecialized(genTreeOps oper, TBase arg0, TBase arg1)
}
#else
// Other platforms enforce masking in their encoding
assert((arg1 >= 0) && (arg1 < (sizeof(TBase) * 8)));
unsigned shiftCountMask = (sizeof(TBase) * 8) - 1;
arg1 &= shiftCountMask;
#endif
return arg0 << arg1;
}
@ -647,7 +649,8 @@ TBase EvaluateBinaryScalarSpecialized(genTreeOps oper, TBase arg0, TBase arg1)
}
#else
// Other platforms enforce masking in their encoding
assert((arg1 >= 0) && (arg1 < (sizeof(TBase) * 8)));
unsigned shiftCountMask = (sizeof(TBase) * 8) - 1;
arg1 &= shiftCountMask;
#endif
return arg0 >> arg1;
}
@ -722,7 +725,7 @@ TBase EvaluateBinaryScalar(genTreeOps oper, TBase arg0, TBase arg1)
}
template <typename TSimd, typename TBase>
void EvaluateBinarySimd(genTreeOps oper, bool scalar, TSimd* result, TSimd arg0, TSimd arg1)
void EvaluateBinarySimd(genTreeOps oper, bool scalar, TSimd* result, const TSimd& arg0, const TSimd& arg1)
{
uint32_t count = sizeof(TSimd) / sizeof(TBase);
@ -755,7 +758,8 @@ void EvaluateBinarySimd(genTreeOps oper, bool scalar, TSimd* result, TSimd arg0,
}
template <typename TSimd>
void EvaluateBinarySimd(genTreeOps oper, bool scalar, var_types baseType, TSimd* result, TSimd arg0, TSimd arg1)
void EvaluateBinarySimd(
genTreeOps oper, bool scalar, var_types baseType, TSimd* result, const TSimd& arg0, const TSimd& arg1)
{
switch (baseType)
{
@ -826,6 +830,168 @@ void EvaluateBinarySimd(genTreeOps oper, bool scalar, var_types baseType, TSimd*
}
}
template <typename TSimd>
double EvaluateGetElementFloating(var_types simdBaseType, const TSimd& arg0, int32_t arg1)
{
switch (simdBaseType)
{
case TYP_FLOAT:
{
return arg0.f32[arg1];
}
case TYP_DOUBLE:
{
return arg0.f64[arg1];
}
default:
{
unreached();
}
}
}
template <typename TSimd>
int64_t EvaluateGetElementIntegral(var_types simdBaseType, const TSimd& arg0, int32_t arg1)
{
switch (simdBaseType)
{
case TYP_BYTE:
{
return arg0.i8[arg1];
}
case TYP_UBYTE:
{
return arg0.u8[arg1];
}
case TYP_SHORT:
{
return arg0.i16[arg1];
}
case TYP_USHORT:
{
return arg0.u16[arg1];
}
case TYP_INT:
{
return arg0.i32[arg1];
}
case TYP_UINT:
{
return arg0.u32[arg1];
}
case TYP_LONG:
{
return arg0.i64[arg1];
}
case TYP_ULONG:
{
return static_cast<int64_t>(arg0.u64[arg1]);
}
default:
{
unreached();
}
}
}
template <typename TSimd>
void EvaluateWithElementFloating(var_types simdBaseType, TSimd* result, const TSimd& arg0, int32_t arg1, double arg2)
{
*result = arg0;
switch (simdBaseType)
{
case TYP_FLOAT:
{
result->f32[arg1] = static_cast<float>(arg2);
break;
}
case TYP_DOUBLE:
{
result->f64[arg1] = static_cast<float>(arg2);
break;
}
default:
{
unreached();
}
}
}
template <typename TSimd>
void EvaluateWithElementIntegral(var_types simdBaseType, TSimd* result, const TSimd& arg0, int32_t arg1, int64_t arg2)
{
*result = arg0;
switch (simdBaseType)
{
case TYP_BYTE:
{
result->i8[arg1] = static_cast<int8_t>(arg2);
break;
}
case TYP_UBYTE:
{
result->u8[arg1] = static_cast<uint8_t>(arg2);
break;
}
case TYP_SHORT:
{
result->i16[arg1] = static_cast<int16_t>(arg2);
break;
}
case TYP_USHORT:
{
result->u16[arg1] = static_cast<uint16_t>(arg2);
break;
}
case TYP_INT:
{
result->i32[arg1] = static_cast<int32_t>(arg2);
break;
}
case TYP_UINT:
{
result->u32[arg1] = static_cast<uint32_t>(arg2);
break;
}
case TYP_LONG:
{
result->i64[arg1] = static_cast<int64_t>(arg2);
break;
}
case TYP_ULONG:
{
result->u64[arg1] = static_cast<uint64_t>(arg2);
break;
}
default:
{
unreached();
}
}
}
template <typename TSimd, typename TBase>
void BroadcastConstantToSimd(TSimd* result, TBase arg0)
{

View file

@ -1733,7 +1733,8 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic,
{
assert(retType == TYP_VOID);
assert(simdBaseType == TYP_FLOAT);
assert((simdSize == 12) || (simdSize == 16));
assert(simdSize == 12);
assert(simdType == TYP_SIMD12);
// TODO-CQ: We should be able to check for contiguous args here after
// the relevant methods are updated to support more than just float
@ -1743,21 +1744,19 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic intrinsic,
GenTreeVecCon* vecCon = op2->AsVecCon();
vecCon->gtType = simdType;
if (simdSize == 12)
{
vecCon->gtSimdVal.f32[2] = static_cast<float>(op3->AsDblCon()->DconValue());
}
else
{
vecCon->gtSimdVal.f32[3] = static_cast<float>(op3->AsDblCon()->DconValue());
}
copyBlkSrc = vecCon;
vecCon->gtSimdVal.f32[2] = static_cast<float>(op3->AsDblCon()->DconValue());
copyBlkSrc = vecCon;
}
else
{
GenTree* idx = gtNewIconNode((simdSize == 12) ? 2 : 3, TYP_INT);
copyBlkSrc = gtNewSimdWithElementNode(simdType, op2, idx, op3, simdBaseJitType, simdSize);
GenTree* idx = gtNewIconNode(2, TYP_INT);
op2 = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op2, NI_Vector128_AsVector128Unsafe, simdBaseJitType,
12);
op2 = gtNewSimdWithElementNode(TYP_SIMD16, op2, idx, op3, simdBaseJitType, 16);
copyBlkSrc =
gtNewSimdHWIntrinsicNode(TYP_SIMD12, op2, NI_Vector128_AsVector3, simdBaseJitType, 16);
}
copyBlkDst = op1;

File diff suppressed because it is too large Load diff

View file

@ -447,13 +447,13 @@ public:
ValueNum VNForByrefCon(target_size_t byrefVal);
#if defined(FEATURE_SIMD)
ValueNum VNForSimd8Con(simd8_t cnsVal);
ValueNum VNForSimd12Con(simd12_t cnsVal);
ValueNum VNForSimd16Con(simd16_t cnsVal);
ValueNum VNForSimd8Con(const simd8_t& cnsVal);
ValueNum VNForSimd12Con(const simd12_t& cnsVal);
ValueNum VNForSimd16Con(const simd16_t& cnsVal);
#if defined(TARGET_XARCH)
ValueNum VNForSimd32Con(simd32_t cnsVal);
ValueNum VNForSimd64Con(simd64_t cnsVal);
ValueNum VNForSimdMaskCon(simdmask_t cnsVal);
ValueNum VNForSimd32Con(const simd32_t& cnsVal);
ValueNum VNForSimd64Con(const simd64_t& cnsVal);
ValueNum VNForSimdMaskCon(const simdmask_t& cnsVal);
#endif // TARGET_XARCH
#endif // FEATURE_SIMD
ValueNum VNForGenericCon(var_types typ, uint8_t* cnsVal);
@ -553,11 +553,20 @@ public:
ValueNum VNAllBitsForType(var_types typ);
#ifdef FEATURE_SIMD
// Returns the value number broadcast of the given "simdType" and "simdBaseType".
ValueNum VNBroadcastForSimdType(var_types simdType, var_types simdBaseType, ValueNum valVN);
// Returns the value number for one of the given "simdType" and "simdBaseType".
ValueNum VNOneForSimdType(var_types simdType, var_types simdBaseType);
// A helper function for constructing VNF_SimdType VNs.
ValueNum VNForSimdType(unsigned simdSize, CorInfoType simdBaseJitType);
// Returns if a value number represents NaN in all elements
bool VNIsVectorNaN(var_types simdType, var_types simdBaseType, ValueNum valVN);
// Returns if a value number represents negative zero in all elements
bool VNIsVectorNegativeZero(var_types simdType, var_types simdBaseType, ValueNum valVN);
#endif // FEATURE_SIMD
// Create or return the existimg value number representing a singleton exception set
@ -1211,32 +1220,25 @@ public:
EvalMathFuncBinary(typ, mthFunc, arg0VNP.GetConservative(), arg1VNP.GetConservative()));
}
ValueNum EvalHWIntrinsicFunUnary(var_types type,
var_types baseType,
NamedIntrinsic ni,
VNFunc func,
ValueNum arg0VN,
bool encodeResultType,
ValueNum resultTypeVN);
#if defined(FEATURE_HW_INTRINSICS)
ValueNum EvalHWIntrinsicFunUnary(
GenTreeHWIntrinsic* tree, VNFunc func, ValueNum arg0VN, bool encodeResultType, ValueNum resultTypeVN);
ValueNum EvalHWIntrinsicFunBinary(var_types type,
var_types baseType,
NamedIntrinsic ni,
VNFunc func,
ValueNum arg0VN,
ValueNum arg1VN,
bool encodeResultType,
ValueNum resultTypeVN);
ValueNum EvalHWIntrinsicFunBinary(GenTreeHWIntrinsic* tree,
VNFunc func,
ValueNum arg0VN,
ValueNum arg1VN,
bool encodeResultType,
ValueNum resultTypeVN);
ValueNum EvalHWIntrinsicFunTernary(var_types type,
var_types baseType,
NamedIntrinsic ni,
VNFunc func,
ValueNum arg0VN,
ValueNum arg1VN,
ValueNum arg2VN,
bool encodeResultType,
ValueNum resultTypeVN);
ValueNum EvalHWIntrinsicFunTernary(GenTreeHWIntrinsic* tree,
VNFunc func,
ValueNum arg0VN,
ValueNum arg1VN,
ValueNum arg2VN,
bool encodeResultType,
ValueNum resultTypeVN);
#endif // FEATURE_HW_INTRINSICS
// Returns "true" iff "vn" represents a function application.
bool IsVNFunc(ValueNum vn);
@ -1611,12 +1613,12 @@ private:
#if defined(FEATURE_SIMD)
struct Simd8PrimitiveKeyFuncs : public JitKeyFuncsDefEquals<simd8_t>
{
static bool Equals(simd8_t x, simd8_t y)
static bool Equals(const simd8_t& x, const simd8_t& y)
{
return x == y;
}
static unsigned GetHashCode(const simd8_t val)
static unsigned GetHashCode(const simd8_t& val)
{
unsigned hash = 0;
@ -1640,12 +1642,12 @@ private:
struct Simd12PrimitiveKeyFuncs : public JitKeyFuncsDefEquals<simd12_t>
{
static bool Equals(simd12_t x, simd12_t y)
static bool Equals(const simd12_t& x, const simd12_t& y)
{
return x == y;
}
static unsigned GetHashCode(const simd12_t val)
static unsigned GetHashCode(const simd12_t& val)
{
unsigned hash = 0;
@ -1670,12 +1672,12 @@ private:
struct Simd16PrimitiveKeyFuncs : public JitKeyFuncsDefEquals<simd16_t>
{
static bool Equals(simd16_t x, simd16_t y)
static bool Equals(const simd16_t& x, const simd16_t& y)
{
return x == y;
}
static unsigned GetHashCode(const simd16_t val)
static unsigned GetHashCode(const simd16_t& val)
{
unsigned hash = 0;
@ -1702,12 +1704,12 @@ private:
#if defined(TARGET_XARCH)
struct Simd32PrimitiveKeyFuncs : public JitKeyFuncsDefEquals<simd32_t>
{
static bool Equals(simd32_t x, simd32_t y)
static bool Equals(const simd32_t& x, const simd32_t& y)
{
return x == y;
}
static unsigned GetHashCode(const simd32_t val)
static unsigned GetHashCode(const simd32_t& val)
{
unsigned hash = 0;
@ -1737,12 +1739,12 @@ private:
struct Simd64PrimitiveKeyFuncs : public JitKeyFuncsDefEquals<simd64_t>
{
static bool Equals(simd64_t x, simd64_t y)
static bool Equals(const simd64_t& x, const simd64_t& y)
{
return x == y;
}
static unsigned GetHashCode(const simd64_t val)
static unsigned GetHashCode(const simd64_t& val)
{
unsigned hash = 0;
@ -1780,12 +1782,12 @@ private:
struct SimdMaskPrimitiveKeyFuncs : public JitKeyFuncsDefEquals<simdmask_t>
{
static bool Equals(simdmask_t x, simdmask_t y)
static bool Equals(const simdmask_t& x, const simdmask_t& y)
{
return x == y;
}
static unsigned GetHashCode(const simdmask_t val)
static unsigned GetHashCode(const simdmask_t& val)
{
unsigned hash = 0;