1
0
Fork 0
mirror of https://github.com/VSadov/Satori.git synced 2025-06-09 17:44:48 +09:00

Ensure we consistently broadcast the result of simd dot product (#105888)

This commit is contained in:
Tanner Gooding 2024-08-06 09:58:19 -07:00 committed by GitHub
parent b6ab4637e6
commit c28adf1c26
Signed by: github
GPG key ID: B5690EEEBB952194
4 changed files with 65 additions and 15 deletions

View file

@ -6320,8 +6320,9 @@ GenTree* Lowering::LowerHWIntrinsicDot(GenTreeHWIntrinsic* node)
horizontalAdd = NI_SSE3_HorizontalAdd;
if (!comp->compOpportunisticallyDependsOn(InstructionSet_SSE3))
if ((simdSize == 8) || !comp->compOpportunisticallyDependsOn(InstructionSet_SSE3))
{
// We also do this for simdSize == 8 to ensure we broadcast the result as expected
shuffle = NI_SSE_Shuffle;
}
break;
@ -6372,10 +6373,8 @@ GenTree* Lowering::LowerHWIntrinsicDot(GenTreeHWIntrinsic* node)
horizontalAdd = NI_SSE3_HorizontalAdd;
if (!comp->compOpportunisticallyDependsOn(InstructionSet_SSE3))
{
shuffle = NI_SSE2_Shuffle;
}
// We need to ensure we broadcast the result as expected
shuffle = NI_SSE2_Shuffle;
break;
}

View file

@ -9916,16 +9916,6 @@ GenTree* Compiler::fgOptimizeHWIntrinsic(GenTreeHWIntrinsic* node)
break;
}
#if defined(TARGET_XARCH)
if ((simdSize == 8) && !compOpportunisticallyDependsOn(InstructionSet_SSE41))
{
// When SSE4.1 isn't supported then Vector2 only needs a single horizontal add
// which means the result isn't broadcast across the entire vector and we can't
// optimize
break;
}
#endif // TARGET_XARCH
GenTree* op1 = node->Op(1);
GenTree* sqrt = nullptr;
GenTree* toScalar = nullptr;

View file

@ -0,0 +1,50 @@
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
using System;
using System.Runtime.CompilerServices;
using System.Numerics;
using Xunit;
public class Runtime_99391
{
[Fact]
public static void TestEntryPoint()
{
Vector2 result2a = Vector2.Normalize(Value2);
Assert.Equal(new Vector2(0, 1), result2a);
Vector2 result2b = Vector2.Normalize(new Vector2(0, 2));
Assert.Equal(new Vector2(0, 1), result2b);
Vector3 result3a = Vector3.Normalize(Value3);
Assert.Equal(new Vector3(0, 0, 1), result3a);
Vector3 result3b = Vector3.Normalize(new Vector3(0, 0, 2));
Assert.Equal(new Vector3(0, 0, 1), result3b);
Vector4 result4a = Vector4.Normalize(Value4);
Assert.Equal(new Vector4(0, 0, 0, 1), result4a);
Vector4 result4b = Vector4.Normalize(new Vector4(0, 0, 0, 2));
Assert.Equal(new Vector4(0, 0, 0, 1), result4b);
}
private static Vector2 Value2
{
[MethodImpl(MethodImplOptions.NoInlining)]
get => new Vector2(0, 2);
}
private static Vector3 Value3
{
[MethodImpl(MethodImplOptions.NoInlining)]
get => new Vector3(0, 0, 2);
}
private static Vector4 Value4
{
[MethodImpl(MethodImplOptions.NoInlining)]
get => new Vector4(0, 0, 0, 2);
}
}

View file

@ -0,0 +1,11 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<Optimize>True</Optimize>
</PropertyGroup>
<ItemGroup>
<Compile Include="$(MSBuildProjectName).cs" />
</ItemGroup>
<ItemGroup>
<CLRTestEnvironmentVariable Include="DOTNET_TieredCompilation" Value="0" />
</ItemGroup>
</Project>