From c28adf1c268e69ebf9e077e330977addecf70d4c Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Tue, 6 Aug 2024 09:58:19 -0700 Subject: [PATCH] Ensure we consistently broadcast the result of simd dot product (#105888) --- src/coreclr/jit/lowerxarch.cpp | 9 ++-- src/coreclr/jit/morph.cpp | 10 ---- .../JitBlue/Runtime_99391/Runtime_99391.cs | 50 +++++++++++++++++++ .../Runtime_99391/Runtime_99391.csproj | 11 ++++ 4 files changed, 65 insertions(+), 15 deletions(-) create mode 100644 src/tests/JIT/Regression/JitBlue/Runtime_99391/Runtime_99391.cs create mode 100644 src/tests/JIT/Regression/JitBlue/Runtime_99391/Runtime_99391.csproj diff --git a/src/coreclr/jit/lowerxarch.cpp b/src/coreclr/jit/lowerxarch.cpp index 6ad03646f0b..c1167bdb493 100644 --- a/src/coreclr/jit/lowerxarch.cpp +++ b/src/coreclr/jit/lowerxarch.cpp @@ -6320,8 +6320,9 @@ GenTree* Lowering::LowerHWIntrinsicDot(GenTreeHWIntrinsic* node) horizontalAdd = NI_SSE3_HorizontalAdd; - if (!comp->compOpportunisticallyDependsOn(InstructionSet_SSE3)) + if ((simdSize == 8) || !comp->compOpportunisticallyDependsOn(InstructionSet_SSE3)) { + // We also do this for simdSize == 8 to ensure we broadcast the result as expected shuffle = NI_SSE_Shuffle; } break; @@ -6372,10 +6373,8 @@ GenTree* Lowering::LowerHWIntrinsicDot(GenTreeHWIntrinsic* node) horizontalAdd = NI_SSE3_HorizontalAdd; - if (!comp->compOpportunisticallyDependsOn(InstructionSet_SSE3)) - { - shuffle = NI_SSE2_Shuffle; - } + // We need to ensure we broadcast the result as expected + shuffle = NI_SSE2_Shuffle; break; } diff --git a/src/coreclr/jit/morph.cpp b/src/coreclr/jit/morph.cpp index 8dcce0486c1..de471a36371 100644 --- a/src/coreclr/jit/morph.cpp +++ b/src/coreclr/jit/morph.cpp @@ -9916,16 +9916,6 @@ GenTree* Compiler::fgOptimizeHWIntrinsic(GenTreeHWIntrinsic* node) break; } -#if defined(TARGET_XARCH) - if ((simdSize == 8) && !compOpportunisticallyDependsOn(InstructionSet_SSE41)) - { - // When SSE4.1 isn't supported then Vector2 only needs a single horizontal add - // which means the result isn't broadcast across the entire vector and we can't - // optimize - break; - } -#endif // TARGET_XARCH - GenTree* op1 = node->Op(1); GenTree* sqrt = nullptr; GenTree* toScalar = nullptr; diff --git a/src/tests/JIT/Regression/JitBlue/Runtime_99391/Runtime_99391.cs b/src/tests/JIT/Regression/JitBlue/Runtime_99391/Runtime_99391.cs new file mode 100644 index 00000000000..6033a74cdd6 --- /dev/null +++ b/src/tests/JIT/Regression/JitBlue/Runtime_99391/Runtime_99391.cs @@ -0,0 +1,50 @@ +// Licensed to the .NET Foundation under one or more agreements. +// The .NET Foundation licenses this file to you under the MIT license. + +using System; +using System.Runtime.CompilerServices; +using System.Numerics; +using Xunit; + +public class Runtime_99391 +{ + [Fact] + public static void TestEntryPoint() + { + Vector2 result2a = Vector2.Normalize(Value2); + Assert.Equal(new Vector2(0, 1), result2a); + + Vector2 result2b = Vector2.Normalize(new Vector2(0, 2)); + Assert.Equal(new Vector2(0, 1), result2b); + + Vector3 result3a = Vector3.Normalize(Value3); + Assert.Equal(new Vector3(0, 0, 1), result3a); + + Vector3 result3b = Vector3.Normalize(new Vector3(0, 0, 2)); + Assert.Equal(new Vector3(0, 0, 1), result3b); + + Vector4 result4a = Vector4.Normalize(Value4); + Assert.Equal(new Vector4(0, 0, 0, 1), result4a); + + Vector4 result4b = Vector4.Normalize(new Vector4(0, 0, 0, 2)); + Assert.Equal(new Vector4(0, 0, 0, 1), result4b); + } + + private static Vector2 Value2 + { + [MethodImpl(MethodImplOptions.NoInlining)] + get => new Vector2(0, 2); + } + + private static Vector3 Value3 + { + [MethodImpl(MethodImplOptions.NoInlining)] + get => new Vector3(0, 0, 2); + } + + private static Vector4 Value4 + { + [MethodImpl(MethodImplOptions.NoInlining)] + get => new Vector4(0, 0, 0, 2); + } +} diff --git a/src/tests/JIT/Regression/JitBlue/Runtime_99391/Runtime_99391.csproj b/src/tests/JIT/Regression/JitBlue/Runtime_99391/Runtime_99391.csproj new file mode 100644 index 00000000000..efa9e9b0224 --- /dev/null +++ b/src/tests/JIT/Regression/JitBlue/Runtime_99391/Runtime_99391.csproj @@ -0,0 +1,11 @@ + + + True + + + + + + + +