mirror of
https://github.com/VSadov/Satori.git
synced 2025-06-08 03:27:04 +09:00
* fix #105969 * formatting * reset the base type of some intrinsics when the actual base type mismatches with the data type implied by the intrinsics. * Move the normalization to import. * clean up * bug fix * formatting. * bug fix * resolve comments. * Ensure that we're computing the correct memory operand size for disassembly * Ensure that we correctly handled rewriting PTESTM to account for a nested AND having an embedded broadcast --------- Co-authored-by: Tanner Gooding <tagoo@outlook.com>
This commit is contained in:
parent
0c67acb240
commit
ab3c7dae14
11 changed files with 205 additions and 73 deletions
|
@ -2249,8 +2249,7 @@ protected:
|
|||
ssize_t emitGetInsCIdisp(instrDesc* id);
|
||||
unsigned emitGetInsCIargs(instrDesc* id);
|
||||
|
||||
inline emitAttr emitGetMemOpSize(instrDesc* id) const;
|
||||
inline emitAttr emitGetBaseMemOpSize(instrDesc*) const;
|
||||
inline emitAttr emitGetMemOpSize(instrDesc* id, bool ignoreEmbeddedBroadcast = false) const;
|
||||
|
||||
// Return the argument count for a direct call "id".
|
||||
int emitGetInsCDinfo(instrDesc* id);
|
||||
|
@ -3962,51 +3961,11 @@ inline unsigned emitter::emitGetInsCIargs(instrDesc* id)
|
|||
//-----------------------------------------------------------------------------
|
||||
// emitGetMemOpSize: Get the memory operand size of instrDesc.
|
||||
//
|
||||
// Note: there are cases when embedded broadcast is enabled, so the memory operand
|
||||
// size is different from the intrinsic simd size, we will check here if emitter is
|
||||
// emiting a embedded broadcast enabled instruction.
|
||||
|
||||
// Arguments:
|
||||
// id - Instruction descriptor
|
||||
// id - Instruction descriptor
|
||||
// ignoreEmbeddedBroadcast - true to get the non-embedded operand size; otherwise false
|
||||
//
|
||||
emitAttr emitter::emitGetMemOpSize(instrDesc* id) const
|
||||
{
|
||||
if (id->idIsEvexbContextSet())
|
||||
{
|
||||
// should have the assumption that Evex.b now stands for the embedded broadcast context.
|
||||
// reference: Section 2.7.5 in Intel 64 and ia-32 architectures software developer's manual volume 2.
|
||||
ssize_t inputSize = GetInputSizeInBytes(id);
|
||||
switch (inputSize)
|
||||
{
|
||||
case 4:
|
||||
return EA_4BYTE;
|
||||
case 8:
|
||||
return EA_8BYTE;
|
||||
|
||||
default:
|
||||
unreached();
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
return emitGetBaseMemOpSize(id);
|
||||
}
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// emitGetMemOpSize: Get the memory operand size of instrDesc.
|
||||
//
|
||||
// Note: vextractf128 has a 128-bit output (register or memory) but a 256-bit input (register).
|
||||
// vinsertf128 is the inverse with a 256-bit output (register), a 256-bit input(register),
|
||||
// and a 128-bit input (register or memory).
|
||||
// Similarly, vextractf64x4 has a 256-bit output and 128-bit input and vinsertf64x4 the inverse
|
||||
// This method is mainly used for such instructions to return the appropriate memory operand
|
||||
// size, otherwise returns the regular operand size of the instruction.
|
||||
|
||||
// Arguments:
|
||||
// id - Instruction descriptor
|
||||
//
|
||||
emitAttr emitter::emitGetBaseMemOpSize(instrDesc* id) const
|
||||
emitAttr emitter::emitGetMemOpSize(instrDesc* id, bool ignoreEmbeddedBroadcast) const
|
||||
{
|
||||
ssize_t memSize = 0;
|
||||
|
||||
|
@ -4022,7 +3981,7 @@ emitAttr emitter::emitGetBaseMemOpSize(instrDesc* id) const
|
|||
else if (tupleType == INS_TT_FULL)
|
||||
{
|
||||
// Embedded broadcast supported, so either loading scalar or full vector
|
||||
if (id->idIsEvexbContextSet())
|
||||
if (id->idIsEvexbContextSet() && !ignoreEmbeddedBroadcast)
|
||||
{
|
||||
memSize = GetInputSizeInBytes(id);
|
||||
}
|
||||
|
@ -4044,7 +4003,7 @@ emitAttr emitter::emitGetBaseMemOpSize(instrDesc* id) const
|
|||
{
|
||||
memSize = 16;
|
||||
}
|
||||
else if (id->idIsEvexbContextSet())
|
||||
else if (id->idIsEvexbContextSet() && !ignoreEmbeddedBroadcast)
|
||||
{
|
||||
memSize = GetInputSizeInBytes(id);
|
||||
}
|
||||
|
@ -4056,7 +4015,7 @@ emitAttr emitter::emitGetBaseMemOpSize(instrDesc* id) const
|
|||
else if (tupleType == INS_TT_HALF)
|
||||
{
|
||||
// Embedded broadcast supported, so either loading scalar or half vector
|
||||
if (id->idIsEvexbContextSet())
|
||||
if (id->idIsEvexbContextSet() && !ignoreEmbeddedBroadcast)
|
||||
{
|
||||
memSize = GetInputSizeInBytes(id);
|
||||
}
|
||||
|
|
|
@ -10987,7 +10987,7 @@ void emitter::emitDispEmbBroadcastCount(instrDesc* id) const
|
|||
return;
|
||||
}
|
||||
ssize_t baseSize = GetInputSizeInBytes(id);
|
||||
ssize_t vectorSize = (ssize_t)emitGetBaseMemOpSize(id);
|
||||
ssize_t vectorSize = (ssize_t)emitGetMemOpSize(id, /* ignoreEmbeddedBroadcast */ true);
|
||||
printf(" {1to%d}", vectorSize / baseSize);
|
||||
}
|
||||
|
||||
|
|
|
@ -20933,6 +20933,13 @@ GenTree* Compiler::gtNewSimdBinOpNode(
|
|||
std::swap(op1, op2);
|
||||
#endif // TARGET_XARCH
|
||||
}
|
||||
#ifdef TARGET_XARCH
|
||||
if (HWIntrinsicInfo::NeedsNormalizeSmallTypeToInt(intrinsic) && varTypeIsSmall(simdBaseType))
|
||||
{
|
||||
simdBaseJitType = varTypeIsUnsigned(simdBaseType) ? CORINFO_TYPE_UINT : CORINFO_TYPE_INT;
|
||||
simdBaseType = JitType2PreciseVarType(simdBaseJitType);
|
||||
}
|
||||
#endif // TARGET_XARCH
|
||||
return gtNewSimdHWIntrinsicNode(type, op1, op2, intrinsic, simdBaseJitType, simdSize);
|
||||
}
|
||||
|
||||
|
@ -25691,6 +25698,15 @@ GenTree* Compiler::gtNewSimdTernaryLogicNode(var_types type,
|
|||
intrinsic = NI_AVX512F_VL_TernaryLogic;
|
||||
}
|
||||
|
||||
#ifdef TARGET_XARCH
|
||||
assert(HWIntrinsicInfo::NeedsNormalizeSmallTypeToInt(intrinsic));
|
||||
if (varTypeIsSmall(simdBaseType))
|
||||
{
|
||||
simdBaseJitType = varTypeIsUnsigned(simdBaseType) ? CORINFO_TYPE_UINT : CORINFO_TYPE_INT;
|
||||
simdBaseType = JitType2PreciseVarType(simdBaseJitType);
|
||||
}
|
||||
#endif // TARGET_XARCH
|
||||
|
||||
return gtNewSimdHWIntrinsicNode(type, op1, op2, op3, op4, intrinsic, simdBaseJitType, simdSize);
|
||||
}
|
||||
#endif // TARGET_XARCH
|
||||
|
|
|
@ -6667,6 +6667,20 @@ struct GenTreeHWIntrinsic : public GenTreeJitIntrinsic
|
|||
|
||||
bool ShouldConstantProp(GenTree* operand, GenTreeVecCon* vecCon);
|
||||
|
||||
void NormalizeJitBaseTypeToInt(NamedIntrinsic id, var_types simdBaseType)
|
||||
{
|
||||
assert(varTypeIsSmall(simdBaseType));
|
||||
|
||||
if (varTypeIsUnsigned(simdBaseType))
|
||||
{
|
||||
SetSimdBaseJitType(CORINFO_TYPE_UINT);
|
||||
}
|
||||
else
|
||||
{
|
||||
SetSimdBaseJitType(CORINFO_TYPE_UINT);
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
void SetHWIntrinsicId(NamedIntrinsic intrinsicId);
|
||||
|
||||
|
|
|
@ -1818,6 +1818,13 @@ GenTree* Compiler::impHWIntrinsic(NamedIntrinsic intrinsic,
|
|||
if (simdBaseJitType != CORINFO_TYPE_UNDEF)
|
||||
{
|
||||
simdBaseType = JitType2PreciseVarType(simdBaseJitType);
|
||||
#ifdef TARGET_XARCH
|
||||
if (HWIntrinsicInfo::NeedsNormalizeSmallTypeToInt(intrinsic) && varTypeIsSmall(simdBaseType))
|
||||
{
|
||||
simdBaseJitType = varTypeIsUnsigned(simdBaseType) ? CORINFO_TYPE_UINT : CORINFO_TYPE_INT;
|
||||
simdBaseType = JitType2PreciseVarType(simdBaseJitType);
|
||||
}
|
||||
#endif // TARGET_XARCH
|
||||
}
|
||||
|
||||
const unsigned simdSize = HWIntrinsicInfo::lookupSimdSize(this, intrinsic, sig);
|
||||
|
|
|
@ -227,6 +227,9 @@ enum HWIntrinsicFlag : unsigned int
|
|||
|
||||
// The intrinsic is an embedded masking compatible intrinsic
|
||||
HW_Flag_EmbMaskingCompatible = 0x10000000,
|
||||
|
||||
// The base type of this intrinsic needs to be normalized to int/uint unless it is long/ulong.
|
||||
HW_Flag_NormalizeSmallTypeToInt = 0x20000000,
|
||||
#elif defined(TARGET_ARM64)
|
||||
|
||||
// The intrinsic has an enum operand. Using this implies HW_Flag_HasImmediateOperand.
|
||||
|
@ -755,6 +758,12 @@ struct HWIntrinsicInfo
|
|||
HWIntrinsicFlag flags = lookupFlags(id);
|
||||
return (flags & HW_Flag_MaybeMemoryStore) != 0;
|
||||
}
|
||||
|
||||
static bool NeedsNormalizeSmallTypeToInt(NamedIntrinsic id)
|
||||
{
|
||||
HWIntrinsicFlag flags = lookupFlags(id);
|
||||
return (flags & HW_Flag_NormalizeSmallTypeToInt) != 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
static bool NoJmpTableImm(NamedIntrinsic id)
|
||||
|
|
|
@ -186,6 +186,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
|
|||
// We need to validate that other phases of the compiler haven't introduced unsupported intrinsics
|
||||
assert(compiler->compIsaSupportedDebugOnly(isa));
|
||||
assert(HWIntrinsicInfo::RequiresCodegen(intrinsicId));
|
||||
assert(!HWIntrinsicInfo::NeedsNormalizeSmallTypeToInt(intrinsicId) || !varTypeIsSmall(node->GetSimdBaseType()));
|
||||
|
||||
bool isTableDriven = genIsTableDrivenHWIntrinsic(intrinsicId, category);
|
||||
insOpts instOptions = INS_OPTS_NONE;
|
||||
|
|
|
@ -489,8 +489,8 @@ HARDWARE_INTRINSIC(SSE_X64, ConvertToInt64WithTruncation,
|
|||
HARDWARE_INTRINSIC(SSE2, Add, 16, 2, {INS_paddb, INS_paddb, INS_paddw, INS_paddw, INS_paddd, INS_paddd, INS_paddq, INS_paddq, INS_invalid, INS_addpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible)
|
||||
HARDWARE_INTRINSIC(SSE2, AddSaturate, 16, 2, {INS_paddsb, INS_paddusb, INS_paddsw, INS_paddusw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbMaskingCompatible)
|
||||
HARDWARE_INTRINSIC(SSE2, AddScalar, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_addsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits)
|
||||
HARDWARE_INTRINSIC(SSE2, And, 16, 2, {INS_pand, INS_pand, INS_pand, INS_pand, INS_pand, INS_pand, INS_pand, INS_pand, INS_invalid, INS_andpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible)
|
||||
HARDWARE_INTRINSIC(SSE2, AndNot, 16, 2, {INS_pandn, INS_pandn, INS_pandn, INS_pandn, INS_pandn, INS_pandn, INS_pandn, INS_pandn, INS_invalid, INS_andnpd}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible)
|
||||
HARDWARE_INTRINSIC(SSE2, And, 16, 2, {INS_pand, INS_pand, INS_pand, INS_pand, INS_pand, INS_pand, INS_pand, INS_pand, INS_invalid, INS_andpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_NormalizeSmallTypeToInt)
|
||||
HARDWARE_INTRINSIC(SSE2, AndNot, 16, 2, {INS_pandn, INS_pandn, INS_pandn, INS_pandn, INS_pandn, INS_pandn, INS_pandn, INS_pandn, INS_invalid, INS_andnpd}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_NormalizeSmallTypeToInt)
|
||||
HARDWARE_INTRINSIC(SSE2, Average, 16, 2, {INS_invalid, INS_pavgb, INS_invalid, INS_pavgw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbMaskingCompatible)
|
||||
HARDWARE_INTRINSIC(SSE2, CompareEqual, 16, 2, {INS_pcmpeqb, INS_pcmpeqb, INS_pcmpeqw, INS_pcmpeqw, INS_pcmpeqd, INS_pcmpeqd, INS_invalid, INS_invalid, INS_invalid, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics)
|
||||
HARDWARE_INTRINSIC(SSE2, CompareGreaterThan, 16, 2, {INS_pcmpgtb, INS_invalid, INS_pcmpgtw, INS_invalid, INS_pcmpgtd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_ReturnsPerElementMask|HW_Flag_NoEvexSemantics)
|
||||
|
@ -562,7 +562,7 @@ HARDWARE_INTRINSIC(SSE2, MultiplyAddAdjacent,
|
|||
HARDWARE_INTRINSIC(SSE2, MultiplyHigh, 16, 2, {INS_invalid, INS_invalid, INS_pmulhw, INS_pmulhuw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbMaskingCompatible)
|
||||
HARDWARE_INTRINSIC(SSE2, MultiplyLow, 16, 2, {INS_invalid, INS_invalid, INS_pmullw, INS_pmullw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbMaskingCompatible)
|
||||
HARDWARE_INTRINSIC(SSE2, MultiplyScalar, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mulsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits)
|
||||
HARDWARE_INTRINSIC(SSE2, Or, 16, 2, {INS_por, INS_por, INS_por, INS_por, INS_por, INS_por, INS_por, INS_por, INS_invalid, INS_orpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible)
|
||||
HARDWARE_INTRINSIC(SSE2, Or, 16, 2, {INS_por, INS_por, INS_por, INS_por, INS_por, INS_por, INS_por, INS_por, INS_invalid, INS_orpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_NormalizeSmallTypeToInt)
|
||||
HARDWARE_INTRINSIC(SSE2, PackSignedSaturate, 16, 2, {INS_packsswb, INS_invalid, INS_packssdw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible)
|
||||
HARDWARE_INTRINSIC(SSE2, PackUnsignedSaturate, 16, 2, {INS_invalid, INS_packuswb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbMaskingCompatible)
|
||||
HARDWARE_INTRINSIC(SSE2, ShiftLeftLogical, 16, 2, {INS_invalid, INS_invalid, INS_psllw, INS_psllw, INS_pslld, INS_pslld, INS_psllq, INS_psllq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible)
|
||||
|
@ -588,7 +588,7 @@ HARDWARE_INTRINSIC(SSE2, SubtractScalar,
|
|||
HARDWARE_INTRINSIC(SSE2, SumAbsoluteDifferences, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_psadbw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
|
||||
HARDWARE_INTRINSIC(SSE2, UnpackHigh, 16, 2, {INS_punpckhbw, INS_punpckhbw, INS_punpckhwd, INS_punpckhwd, INS_punpckhdq, INS_punpckhdq, INS_punpckhqdq, INS_punpckhqdq, INS_invalid, INS_unpckhpd}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible)
|
||||
HARDWARE_INTRINSIC(SSE2, UnpackLow, 16, 2, {INS_punpcklbw, INS_punpcklbw, INS_punpcklwd, INS_punpcklwd, INS_punpckldq, INS_punpckldq, INS_punpcklqdq, INS_punpcklqdq, INS_invalid, INS_unpcklpd}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible)
|
||||
HARDWARE_INTRINSIC(SSE2, Xor, 16, 2, {INS_pxor, INS_pxor, INS_pxor, INS_pxor, INS_pxor, INS_pxor, INS_pxor, INS_pxor, INS_invalid, INS_xorpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_CanBenefitFromConstantProp)
|
||||
HARDWARE_INTRINSIC(SSE2, Xor, 16, 2, {INS_pxor, INS_pxor, INS_pxor, INS_pxor, INS_pxor, INS_pxor, INS_pxor, INS_pxor, INS_invalid, INS_xorpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_CanBenefitFromConstantProp|HW_Flag_NormalizeSmallTypeToInt)
|
||||
#define LAST_NI_SSE2 NI_SSE2_Xor
|
||||
|
||||
// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
|
||||
|
@ -802,8 +802,8 @@ HARDWARE_INTRINSIC(AVX2, Abs,
|
|||
HARDWARE_INTRINSIC(AVX2, Add, 32, 2, {INS_paddb, INS_paddb, INS_paddw, INS_paddw, INS_paddd, INS_paddd, INS_paddq, INS_paddq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible)
|
||||
HARDWARE_INTRINSIC(AVX2, AddSaturate, 32, 2, {INS_paddsb, INS_paddusb, INS_paddsw, INS_paddusw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbMaskingCompatible)
|
||||
HARDWARE_INTRINSIC(AVX2, AlignRight, 32, 3, {INS_palignr, INS_palignr, INS_palignr, INS_palignr, INS_palignr, INS_palignr, INS_palignr, INS_palignr, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbMaskingCompatible)
|
||||
HARDWARE_INTRINSIC(AVX2, And, 32, 2, {INS_pand, INS_pand, INS_pand, INS_pand, INS_pand, INS_pand, INS_pand, INS_pand, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible)
|
||||
HARDWARE_INTRINSIC(AVX2, AndNot, 32, 2, {INS_pandn, INS_pandn, INS_pandn, INS_pandn, INS_pandn, INS_pandn, INS_pandn, INS_pandn, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible)
|
||||
HARDWARE_INTRINSIC(AVX2, And, 32, 2, {INS_pand, INS_pand, INS_pand, INS_pand, INS_pand, INS_pand, INS_pand, INS_pand, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_NormalizeSmallTypeToInt)
|
||||
HARDWARE_INTRINSIC(AVX2, AndNot, 32, 2, {INS_pandn, INS_pandn, INS_pandn, INS_pandn, INS_pandn, INS_pandn, INS_pandn, INS_pandn, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_NormalizeSmallTypeToInt)
|
||||
HARDWARE_INTRINSIC(AVX2, Average, 32, 2, {INS_invalid, INS_pavgb, INS_invalid, INS_pavgw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbMaskingCompatible)
|
||||
HARDWARE_INTRINSIC(AVX2, Blend, -1, 3, {INS_invalid, INS_invalid, INS_pblendw, INS_pblendw, INS_vpblendd, INS_vpblendd, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NoEvexSemantics)
|
||||
HARDWARE_INTRINSIC(AVX2, BlendVariable, 32, 3, {INS_vpblendvb, INS_vpblendvb, INS_vpblendvb, INS_vpblendvb, INS_vpblendvb, INS_vpblendvb, INS_vpblendvb, INS_vpblendvb, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoEvexSemantics)
|
||||
|
@ -840,7 +840,7 @@ HARDWARE_INTRINSIC(AVX2, MultiplyAddAdjacent,
|
|||
HARDWARE_INTRINSIC(AVX2, MultiplyHigh, 32, 2, {INS_invalid, INS_invalid, INS_pmulhw, INS_pmulhuw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbMaskingCompatible)
|
||||
HARDWARE_INTRINSIC(AVX2, MultiplyHighRoundScale, 32, 2, {INS_invalid, INS_invalid, INS_pmulhrsw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbMaskingCompatible)
|
||||
HARDWARE_INTRINSIC(AVX2, MultiplyLow, 32, 2, {INS_invalid, INS_invalid, INS_pmullw, INS_pmullw, INS_pmulld, INS_pmulld, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible)
|
||||
HARDWARE_INTRINSIC(AVX2, Or, 32, 2, {INS_por, INS_por, INS_por, INS_por, INS_por, INS_por, INS_por, INS_por, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible)
|
||||
HARDWARE_INTRINSIC(AVX2, Or, 32, 2, {INS_por, INS_por, INS_por, INS_por, INS_por, INS_por, INS_por, INS_por, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_NormalizeSmallTypeToInt)
|
||||
HARDWARE_INTRINSIC(AVX2, PackSignedSaturate, 32, 2, {INS_packsswb, INS_invalid, INS_packssdw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible)
|
||||
HARDWARE_INTRINSIC(AVX2, PackUnsignedSaturate, 32, 2, {INS_invalid, INS_packuswb, INS_invalid, INS_packusdw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible)
|
||||
HARDWARE_INTRINSIC(AVX2, Permute2x128, 32, 3, {INS_vperm2i128, INS_vperm2i128, INS_vperm2i128, INS_vperm2i128, INS_vperm2i128, INS_vperm2i128, INS_vperm2i128, INS_vperm2i128, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_NoEvexSemantics)
|
||||
|
@ -863,7 +863,7 @@ HARDWARE_INTRINSIC(AVX2, SubtractSaturate,
|
|||
HARDWARE_INTRINSIC(AVX2, SumAbsoluteDifferences, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_psadbw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
|
||||
HARDWARE_INTRINSIC(AVX2, UnpackHigh, 32, 2, {INS_punpckhbw, INS_punpckhbw, INS_punpckhwd, INS_punpckhwd, INS_punpckhdq, INS_punpckhdq, INS_punpckhqdq, INS_punpckhqdq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible)
|
||||
HARDWARE_INTRINSIC(AVX2, UnpackLow, 32, 2, {INS_punpcklbw, INS_punpcklbw, INS_punpcklwd, INS_punpcklwd, INS_punpckldq, INS_punpckldq, INS_punpcklqdq, INS_punpcklqdq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible)
|
||||
HARDWARE_INTRINSIC(AVX2, Xor, 32, 2, {INS_pxor, INS_pxor, INS_pxor, INS_pxor, INS_pxor, INS_pxor, INS_pxor, INS_pxor, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_CanBenefitFromConstantProp)
|
||||
HARDWARE_INTRINSIC(AVX2, Xor, 32, 2, {INS_pxor, INS_pxor, INS_pxor, INS_pxor, INS_pxor, INS_pxor, INS_pxor, INS_pxor, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_CanBenefitFromConstantProp|HW_Flag_NormalizeSmallTypeToInt)
|
||||
#define LAST_NI_AVX2 NI_AVX2_Xor
|
||||
|
||||
// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
|
||||
|
@ -877,8 +877,8 @@ HARDWARE_INTRINSIC(AVX512F, Add,
|
|||
HARDWARE_INTRINSIC(AVX512F, AddScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_addss, INS_addsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_EmbRoundingCompatible)
|
||||
HARDWARE_INTRINSIC(AVX512F, AlignRight32, 64, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_valignd, INS_valignd, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible)
|
||||
HARDWARE_INTRINSIC(AVX512F, AlignRight64, 64, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_valignq, INS_valignq, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible)
|
||||
HARDWARE_INTRINSIC(AVX512F, And, 64, 2, {INS_pand, INS_pand, INS_pand, INS_pand, INS_pand, INS_pand, INS_vpandq, INS_vpandq, INS_andps, INS_andpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible)
|
||||
HARDWARE_INTRINSIC(AVX512F, AndNot, 64, 2, {INS_pandn, INS_pandn, INS_pandn, INS_pandn, INS_pandn, INS_pandn, INS_vpandnq, INS_vpandnq, INS_andnps, INS_andnpd}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible)
|
||||
HARDWARE_INTRINSIC(AVX512F, And, 64, 2, {INS_pand, INS_pand, INS_pand, INS_pand, INS_pand, INS_pand, INS_vpandq, INS_vpandq, INS_andps, INS_andpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_NormalizeSmallTypeToInt)
|
||||
HARDWARE_INTRINSIC(AVX512F, AndNot, 64, 2, {INS_pandn, INS_pandn, INS_pandn, INS_pandn, INS_pandn, INS_pandn, INS_vpandnq, INS_vpandnq, INS_andnps, INS_andnpd}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_NormalizeSmallTypeToInt)
|
||||
HARDWARE_INTRINSIC(AVX512F, BlendVariable, 64, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_InvalidNodeId)
|
||||
HARDWARE_INTRINSIC(AVX512F, BroadcastScalarToVector512, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpbroadcastd, INS_vpbroadcastd, INS_vpbroadcastq, INS_vpbroadcastq, INS_vbroadcastss, INS_vbroadcastsd}, HW_Category_SIMDScalar, HW_Flag_NoFlag)
|
||||
HARDWARE_INTRINSIC(AVX512F, BroadcastVector128ToVector512, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vbroadcasti128, INS_vbroadcasti128, INS_invalid, INS_invalid, INS_vbroadcastf128, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_MaybeMemoryLoad)
|
||||
|
@ -933,7 +933,7 @@ HARDWARE_INTRINSIC(AVX512F, DivideScalar,
|
|||
HARDWARE_INTRINSIC(AVX512F, DuplicateEvenIndexed, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movsldup, INS_movddup}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
|
||||
HARDWARE_INTRINSIC(AVX512F, DuplicateOddIndexed, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movshdup, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
|
||||
HARDWARE_INTRINSIC(AVX512F, ExtractVector128, 64, 2, {INS_vextracti128, INS_vextracti128, INS_vextracti128, INS_vextracti128, INS_vextracti128, INS_vextracti128, INS_vextracti128, INS_vextracti128, INS_vextractf128, INS_vextractf128}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbMaskingCompatible)
|
||||
HARDWARE_INTRINSIC(AVX512F, ExtractVector256, 64, 2, {INS_vextracti64x4, INS_vextracti64x4, INS_vextracti64x4, INS_vextracti64x4, INS_vextracti64x4, INS_vextracti64x4, INS_vextracti64x4, INS_vextracti64x4, INS_vextractf64x4, INS_vextractf64x4}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbMaskingCompatible)
|
||||
HARDWARE_INTRINSIC(AVX512F, ExtractVector256, 64, 2, {INS_vextracti32x8, INS_vextracti32x8, INS_vextracti32x8, INS_vextracti32x8, INS_vextracti32x8, INS_vextracti32x8, INS_vextracti64x4, INS_vextracti64x4, INS_vextractf64x4, INS_vextractf64x4}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbMaskingCompatible|HW_Flag_NormalizeSmallTypeToInt)
|
||||
HARDWARE_INTRINSIC(AVX512F, Fixup, 64, 4, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfixupimmps, INS_vfixupimmpd}, HW_Category_IMM, HW_Flag_SpecialImport|HW_Flag_FullRangeIMM|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible)
|
||||
HARDWARE_INTRINSIC(AVX512F, FixupScalar, 16, 4, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfixupimmss, INS_vfixupimmsd}, HW_Category_IMM, HW_Flag_SpecialImport|HW_Flag_FullRangeIMM|HW_Flag_CopyUpperBits)
|
||||
HARDWARE_INTRINSIC(AVX512F, FusedMultiplyAdd, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vfmadd213ps, INS_vfmadd213pd}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_FmaIntrinsic|HW_Flag_RmwIntrinsic|HW_Flag_EmbRoundingCompatible|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible)
|
||||
|
@ -951,7 +951,7 @@ HARDWARE_INTRINSIC(AVX512F, GetExponentScalar,
|
|||
HARDWARE_INTRINSIC(AVX512F, GetMantissa, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vgetmantps, INS_vgetmantpd}, HW_Category_IMM, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible)
|
||||
HARDWARE_INTRINSIC(AVX512F, GetMantissaScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vgetmantss, INS_vgetmantsd}, HW_Category_IMM, HW_Flag_CopyUpperBits)
|
||||
HARDWARE_INTRINSIC(AVX512F, InsertVector128, 64, 3, {INS_vinserti128, INS_vinserti128, INS_vinserti128, INS_vinserti128, INS_vinserti128, INS_vinserti128, INS_vinserti128, INS_vinserti128, INS_vinsertf128, INS_vinsertf128}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbMaskingCompatible)
|
||||
HARDWARE_INTRINSIC(AVX512F, InsertVector256, 64, 3, {INS_vinserti64x4, INS_vinserti64x4, INS_vinserti64x4, INS_vinserti64x4, INS_vinserti64x4, INS_vinserti64x4, INS_vinserti64x4, INS_vinserti64x4, INS_vinsertf64x4, INS_vinsertf64x4}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbMaskingCompatible)
|
||||
HARDWARE_INTRINSIC(AVX512F, InsertVector256, 64, 3, {INS_vinserti32x8, INS_vinserti32x8, INS_vinserti32x8, INS_vinserti32x8, INS_vinserti32x8, INS_vinserti32x8, INS_vinserti64x4, INS_vinserti64x4, INS_vinsertf64x4, INS_vinsertf64x4}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbMaskingCompatible|HW_Flag_NormalizeSmallTypeToInt)
|
||||
HARDWARE_INTRINSIC(AVX512F, LoadAlignedVector512, 64, 1, {INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_vmovdqa64, INS_vmovdqa64, INS_movaps, INS_movapd}, HW_Category_MemoryLoad, HW_Flag_NoFlag)
|
||||
HARDWARE_INTRINSIC(AVX512F, LoadAlignedVector512NonTemporal, 64, 1, {INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_movntdqa, INS_invalid, INS_invalid}, HW_Category_MemoryLoad, HW_Flag_NoFlag)
|
||||
HARDWARE_INTRINSIC(AVX512F, LoadVector512, 64, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_InvalidNodeId)
|
||||
|
@ -960,7 +960,7 @@ HARDWARE_INTRINSIC(AVX512F, Min,
|
|||
HARDWARE_INTRINSIC(AVX512F, Multiply, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pmuldq, INS_pmuludq, INS_mulps, INS_mulpd}, HW_Category_SimpleSIMD, HW_Flag_MaybeCommutative|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_EmbRoundingCompatible)
|
||||
HARDWARE_INTRINSIC(AVX512F, MultiplyLow, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pmulld, INS_pmulld, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible)
|
||||
HARDWARE_INTRINSIC(AVX512F, MultiplyScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mulss, INS_mulsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_EmbRoundingCompatible)
|
||||
HARDWARE_INTRINSIC(AVX512F, Or, 64, 2, {INS_por, INS_por, INS_por, INS_por, INS_por, INS_por, INS_vporq, INS_vporq, INS_orps, INS_orpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible)
|
||||
HARDWARE_INTRINSIC(AVX512F, Or, 64, 2, {INS_por, INS_por, INS_por, INS_por, INS_por, INS_por, INS_vporq, INS_vporq, INS_orps, INS_orpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_NormalizeSmallTypeToInt)
|
||||
HARDWARE_INTRINSIC(AVX512F, Permute2x64, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermilpd}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible)
|
||||
HARDWARE_INTRINSIC(AVX512F, Permute4x32, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermilps, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible)
|
||||
HARDWARE_INTRINSIC(AVX512F, Permute4x64, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpermq, INS_vpermq, INS_invalid, INS_vpermpd}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible)
|
||||
|
@ -997,10 +997,10 @@ HARDWARE_INTRINSIC(AVX512F, StoreAligned,
|
|||
HARDWARE_INTRINSIC(AVX512F, StoreAlignedNonTemporal, 64, 2, {INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntdq, INS_movntps, INS_movntpd}, HW_Category_MemoryStore, HW_Flag_BaseTypeFromSecondArg)
|
||||
HARDWARE_INTRINSIC(AVX512F, Subtract, 64, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_psubd, INS_psubd, INS_psubq, INS_psubq, INS_subps, INS_subpd}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_EmbRoundingCompatible)
|
||||
HARDWARE_INTRINSIC(AVX512F, SubtractScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_subss, INS_subsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_EmbRoundingCompatible)
|
||||
HARDWARE_INTRINSIC(AVX512F, TernaryLogic, 64, 4, {INS_vpternlogd, INS_vpternlogd, INS_vpternlogd, INS_vpternlogd, INS_vpternlogd, INS_vpternlogd, INS_vpternlogq, INS_vpternlogq, INS_vpternlogd, INS_vpternlogq}, HW_Category_IMM, HW_Flag_SpecialImport|HW_Flag_FullRangeIMM|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible)
|
||||
HARDWARE_INTRINSIC(AVX512F, TernaryLogic, 64, 4, {INS_vpternlogd, INS_vpternlogd, INS_vpternlogd, INS_vpternlogd, INS_vpternlogd, INS_vpternlogd, INS_vpternlogq, INS_vpternlogq, INS_vpternlogd, INS_vpternlogq}, HW_Category_IMM, HW_Flag_SpecialImport|HW_Flag_FullRangeIMM|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_NormalizeSmallTypeToInt)
|
||||
HARDWARE_INTRINSIC(AVX512F, UnpackHigh, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_punpckhdq, INS_punpckhdq, INS_punpckhqdq, INS_punpckhqdq, INS_unpckhps, INS_unpckhpd}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible)
|
||||
HARDWARE_INTRINSIC(AVX512F, UnpackLow, 64, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_punpckldq, INS_punpckldq, INS_punpcklqdq, INS_punpcklqdq, INS_unpcklps, INS_unpcklpd}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible)
|
||||
HARDWARE_INTRINSIC(AVX512F, Xor, 64, 2, {INS_pxor, INS_pxor, INS_pxor, INS_pxor, INS_pxor, INS_pxor, INS_vpxorq, INS_vpxorq, INS_xorps, INS_xorpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_CanBenefitFromConstantProp)
|
||||
HARDWARE_INTRINSIC(AVX512F, Xor, 64, 2, {INS_pxor, INS_pxor, INS_pxor, INS_pxor, INS_pxor, INS_pxor, INS_vpxorq, INS_vpxorq, INS_xorps, INS_xorpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_CanBenefitFromConstantProp|HW_Flag_NormalizeSmallTypeToInt)
|
||||
#define LAST_NI_AVX512F NI_AVX512F_Xor
|
||||
|
||||
// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
|
||||
|
@ -1057,7 +1057,7 @@ HARDWARE_INTRINSIC(AVX512F_VL, Scale,
|
|||
HARDWARE_INTRINSIC(AVX512F_VL, ShiftRightArithmetic, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpsraq, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_MaybeIMM|HW_Flag_NoJmpTableIMM|HW_Flag_FullRangeIMM|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible)
|
||||
HARDWARE_INTRINSIC(AVX512F_VL, ShiftRightArithmeticVariable, -1, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vpsravq, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible)
|
||||
HARDWARE_INTRINSIC(AVX512F_VL, Shuffle2x128, 32, 3, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_vshufi32x4, INS_vshufi32x4, INS_vshufi64x2, INS_vshufi64x2, INS_vshuff32x4, INS_vshuff64x2}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible)
|
||||
HARDWARE_INTRINSIC(AVX512F_VL, TernaryLogic, -1, 4, {INS_vpternlogd, INS_vpternlogd, INS_vpternlogd, INS_vpternlogd, INS_vpternlogd, INS_vpternlogd, INS_vpternlogq, INS_vpternlogq, INS_vpternlogd, INS_vpternlogq}, HW_Category_IMM, HW_Flag_SpecialImport|HW_Flag_FullRangeIMM|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible)
|
||||
HARDWARE_INTRINSIC(AVX512F_VL, TernaryLogic, -1, 4, {INS_vpternlogd, INS_vpternlogd, INS_vpternlogd, INS_vpternlogd, INS_vpternlogd, INS_vpternlogd, INS_vpternlogq, INS_vpternlogq, INS_vpternlogd, INS_vpternlogq}, HW_Category_IMM, HW_Flag_SpecialImport|HW_Flag_FullRangeIMM|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_NormalizeSmallTypeToInt)
|
||||
#define LAST_NI_AVX512F_VL NI_AVX512F_VL_TernaryLogic
|
||||
|
||||
// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
|
||||
|
@ -1356,7 +1356,7 @@ HARDWARE_INTRINSIC(AVX10v1, Shuffle2x128,
|
|||
HARDWARE_INTRINSIC(AVX10v1, SqrtScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_sqrtss, INS_sqrtsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_EmbRoundingCompatible)
|
||||
HARDWARE_INTRINSIC(AVX10v1, SubtractScalar, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_subss, INS_subsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits|HW_Flag_EmbRoundingCompatible)
|
||||
HARDWARE_INTRINSIC(AVX10v1, SumAbsoluteDifferencesInBlock32, -1, 3, {INS_invalid, INS_invalid, INS_invalid, INS_vdbpsadbw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_EmbMaskingCompatible)
|
||||
HARDWARE_INTRINSIC(AVX10v1, TernaryLogic, -1, 4, {INS_vpternlogd, INS_vpternlogd, INS_vpternlogd, INS_vpternlogd, INS_vpternlogd, INS_vpternlogd, INS_vpternlogq, INS_vpternlogq, INS_vpternlogd, INS_vpternlogq}, HW_Category_IMM, HW_Flag_SpecialImport|HW_Flag_FullRangeIMM|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible)
|
||||
HARDWARE_INTRINSIC(AVX10v1, TernaryLogic, -1, 4, {INS_vpternlogd, INS_vpternlogd, INS_vpternlogd, INS_vpternlogd, INS_vpternlogd, INS_vpternlogd, INS_vpternlogq, INS_vpternlogq, INS_vpternlogd, INS_vpternlogq}, HW_Category_IMM, HW_Flag_SpecialImport|HW_Flag_FullRangeIMM|HW_Flag_EmbBroadcastCompatible|HW_Flag_EmbMaskingCompatible|HW_Flag_NormalizeSmallTypeToInt)
|
||||
#define LAST_NI_AVX10v1 NI_AVX10v1_TernaryLogic
|
||||
|
||||
// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
|
||||
|
|
|
@ -1672,6 +1672,11 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node)
|
|||
BlockRange().InsertBefore(userIntrin, op4);
|
||||
|
||||
userIntrin->ResetHWIntrinsicId(ternaryLogicId, comp, op1, op2, op3, op4);
|
||||
if (varTypeIsSmall(simdBaseType))
|
||||
{
|
||||
assert(HWIntrinsicInfo::NeedsNormalizeSmallTypeToInt(ternaryLogicId));
|
||||
userIntrin->NormalizeJitBaseTypeToInt(ternaryLogicId, simdBaseType);
|
||||
}
|
||||
return nextNode;
|
||||
}
|
||||
}
|
||||
|
@ -1737,6 +1742,11 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node)
|
|||
BlockRange().InsertBefore(node, control);
|
||||
|
||||
node->ResetHWIntrinsicId(ternaryLogicId, comp, op1, op2, op3, control);
|
||||
if (varTypeIsSmall(simdBaseType))
|
||||
{
|
||||
assert(HWIntrinsicInfo::NeedsNormalizeSmallTypeToInt(ternaryLogicId));
|
||||
node->NormalizeJitBaseTypeToInt(ternaryLogicId, simdBaseType);
|
||||
}
|
||||
return LowerNode(node);
|
||||
}
|
||||
}
|
||||
|
@ -1823,6 +1833,10 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node)
|
|||
LowerNode(op2);
|
||||
|
||||
node->ResetHWIntrinsicId(intrinsicId, comp, op1, op2);
|
||||
if (HWIntrinsicInfo::NeedsNormalizeSmallTypeToInt(intrinsicId) && varTypeIsSmall(simdBaseType))
|
||||
{
|
||||
node->NormalizeJitBaseTypeToInt(intrinsicId, simdBaseType);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -1882,6 +1896,10 @@ GenTree* Lowering::LowerHWIntrinsic(GenTreeHWIntrinsic* node)
|
|||
LowerNode(op3);
|
||||
|
||||
node->ResetHWIntrinsicId(intrinsicId, comp, op1, op2, op3);
|
||||
if (HWIntrinsicInfo::NeedsNormalizeSmallTypeToInt(intrinsicId) && varTypeIsSmall(simdBaseType))
|
||||
{
|
||||
node->NormalizeJitBaseTypeToInt(intrinsicId, simdBaseType);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -3052,11 +3070,12 @@ GenTree* Lowering::LowerHWIntrinsicCmpOp(GenTreeHWIntrinsic* node, genTreeOps cm
|
|||
GenTreeHWIntrinsic* op1Intrinsic = op1->AsHWIntrinsic();
|
||||
NamedIntrinsic op1IntrinsicId = op1Intrinsic->GetHWIntrinsicId();
|
||||
|
||||
switch (op1IntrinsicId)
|
||||
bool isScalar = false;
|
||||
genTreeOps oper = op1Intrinsic->GetOperForHWIntrinsicId(&isScalar);
|
||||
|
||||
switch (oper)
|
||||
{
|
||||
case NI_AVX512F_And:
|
||||
case NI_AVX512DQ_And:
|
||||
case NI_AVX10v1_V512_And:
|
||||
case GT_AND:
|
||||
{
|
||||
// We have `(x & y) == 0` with GenCondition::EQ (jz, setz, cmovz)
|
||||
// or `(x & y) != 0`with GenCondition::NE (jnz, setnz, cmovnz)
|
||||
|
@ -3076,11 +3095,62 @@ GenTree* Lowering::LowerHWIntrinsicCmpOp(GenTreeHWIntrinsic* node, genTreeOps cm
|
|||
|
||||
assert(testIntrinsicId == NI_EVEX_PTESTM);
|
||||
|
||||
node->Op(1) = op1Intrinsic->Op(1);
|
||||
node->Op(2) = op1Intrinsic->Op(2);
|
||||
GenTree* nestedOp1 = op1Intrinsic->Op(1);
|
||||
GenTree* nestedOp2 = op1Intrinsic->Op(2);
|
||||
|
||||
if (nestedOp2->isContained() && nestedOp2->OperIsHWIntrinsic())
|
||||
{
|
||||
GenTreeHWIntrinsic* nestedIntrin = nestedOp2->AsHWIntrinsic();
|
||||
NamedIntrinsic nestedIntrinId = nestedIntrin->GetHWIntrinsicId();
|
||||
|
||||
if ((nestedIntrinId == NI_SSE3_MoveAndDuplicate) ||
|
||||
(nestedIntrinId == NI_AVX2_BroadcastScalarToVector128) ||
|
||||
(nestedIntrinId == NI_AVX2_BroadcastScalarToVector256) ||
|
||||
(nestedIntrinId == NI_AVX512F_BroadcastScalarToVector512))
|
||||
{
|
||||
// We need to rewrite the embedded broadcast back to a regular constant
|
||||
// so that the subsequent containment check for ptestm can determine
|
||||
// if the embedded broadcast is still relevant
|
||||
|
||||
GenTree* broadcastOp = nestedIntrin->Op(1);
|
||||
|
||||
if (broadcastOp->OperIsHWIntrinsic(NI_Vector128_CreateScalarUnsafe))
|
||||
{
|
||||
BlockRange().Remove(broadcastOp);
|
||||
broadcastOp = broadcastOp->AsHWIntrinsic()->Op(1);
|
||||
}
|
||||
|
||||
assert(broadcastOp->OperIsConst());
|
||||
|
||||
GenTree* vecCns =
|
||||
comp->gtNewSimdCreateBroadcastNode(simdType, broadcastOp,
|
||||
op1Intrinsic->GetSimdBaseJitType(), simdSize);
|
||||
|
||||
BlockRange().InsertAfter(broadcastOp, vecCns);
|
||||
nestedOp2 = vecCns;
|
||||
|
||||
BlockRange().Remove(broadcastOp);
|
||||
BlockRange().Remove(nestedIntrin);
|
||||
}
|
||||
}
|
||||
|
||||
node->Op(1) = nestedOp1;
|
||||
node->Op(2) = nestedOp2;
|
||||
|
||||
// Make sure we aren't contained since ptestm will do its own containment check
|
||||
node->Op(2)->ClearContained();
|
||||
nestedOp2->ClearContained();
|
||||
|
||||
if (varTypeIsSmall(simdBaseType))
|
||||
{
|
||||
// Fixup the base type so embedded broadcast and the mask size checks still work
|
||||
node->NormalizeJitBaseTypeToInt(testIntrinsicId, simdBaseType);
|
||||
|
||||
simdBaseJitType = node->GetSimdBaseJitType();
|
||||
simdBaseType = node->GetSimdBaseType();
|
||||
|
||||
maskBaseJitType = simdBaseJitType;
|
||||
maskBaseType = simdBaseType;
|
||||
}
|
||||
|
||||
BlockRange().Remove(op1);
|
||||
BlockRange().Remove(op2);
|
||||
|
@ -3458,6 +3528,11 @@ GenTree* Lowering::LowerHWIntrinsicCndSel(GenTreeHWIntrinsic* node)
|
|||
BlockRange().InsertBefore(node, control);
|
||||
|
||||
node->ResetHWIntrinsicId(ternaryLogicId, comp, op1, op2, op3, control);
|
||||
if (varTypeIsSmall(simdBaseType))
|
||||
{
|
||||
assert(HWIntrinsicInfo::NeedsNormalizeSmallTypeToInt(ternaryLogicId));
|
||||
node->NormalizeJitBaseTypeToInt(ternaryLogicId, simdBaseType);
|
||||
}
|
||||
return LowerNode(node);
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,43 @@
|
|||
// Licensed to the .NET Foundation under one or more agreements.
|
||||
// The .NET Foundation licenses this file to you under the MIT license.
|
||||
|
||||
using System;
|
||||
using System.Runtime.CompilerServices;
|
||||
using System.Numerics;
|
||||
using System.Runtime.Intrinsics;
|
||||
using System.Runtime.Intrinsics.X86;
|
||||
using Xunit;
|
||||
|
||||
// Generated by Fuzzlyn v2.2 on 2024-08-04 15:50:11
|
||||
// Run on X64 Linux
|
||||
// Seed: 9513455124659677293-vectort,vector128,vector256,vector512,x86aes,x86avx,x86avx2,x86avx512bw,x86avx512bwvl,x86avx512cd,x86avx512cdvl,x86avx512dq,x86avx512dqvl,x86avx512f,x86avx512fvl,x86avx512fx64,x86avx512vbmi,x86avx512vbmivl,x86bmi1,x86bmi1x64,x86bmi2,x86bmi2x64,x86fma,x86lzcnt,x86lzcntx64,x86pclmulqdq,x86popcnt,x86popcntx64,x86sse,x86ssex64,x86sse2,x86sse2x64,x86sse3,x86sse41,x86sse41x64,x86sse42,x86sse42x64,x86ssse3,x86x86base
|
||||
// Reduced from 26.0 KiB to 1.1 KiB in 00:00:56
|
||||
// Debug: Outputs <0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0>
|
||||
// Release: Outputs <0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0>
|
||||
|
||||
public class Runtime_105969
|
||||
{
|
||||
public static byte s_5;
|
||||
|
||||
[Fact]
|
||||
public static void TestEntryPoint()
|
||||
{
|
||||
if (Avx512BW.IsSupported)
|
||||
{
|
||||
var vr16 = Vector512.CreateScalar(s_5);
|
||||
var vr17 = Vector512.Create<byte>(1);
|
||||
var vr18 = (byte)0;
|
||||
var vr19 = Vector512.CreateScalar(vr18);
|
||||
var vr20 = Vector128.Create<byte>(0);
|
||||
var vr21 = Avx512BW.BroadcastScalarToVector512(vr20);
|
||||
var vr22 = Vector256.Create<byte>(1);
|
||||
var vr23 = Avx512F.InsertVector256(vr21, vr22, 0);
|
||||
var vr24 = Vector512.Create(249, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0);
|
||||
var vr25 = Avx512BW.BlendVariable(vr19, vr23, vr24);
|
||||
var vr26 = Avx512BW.Min(vr17, vr25);
|
||||
Vector512<byte> vr27 = Avx512BW.UnpackLow(vr16, vr26);
|
||||
Vector512<byte> expected = Vector512.Create(0, (byte)1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
|
||||
Assert.Equal(expected, vr27);
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,8 @@
|
|||
<Project Sdk="Microsoft.NET.Sdk">
|
||||
<PropertyGroup>
|
||||
<Optimize>True</Optimize>
|
||||
</PropertyGroup>
|
||||
<ItemGroup>
|
||||
<Compile Include="$(MSBuildProjectName).cs" />
|
||||
</ItemGroup>
|
||||
</Project>
|
Loading…
Add table
Add a link
Reference in a new issue