diff --git a/THIRD-PARTY-NOTICES.TXT b/THIRD-PARTY-NOTICES.TXT index feb4d4fe851..f60a240e7ee 100644 --- a/THIRD-PARTY-NOTICES.TXT +++ b/THIRD-PARTY-NOTICES.TXT @@ -374,6 +374,36 @@ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +License notice for vectorized hex parsing +-------------------------------------------------------- + +Copyright (c) 2022, Geoff Langdale +Copyright (c) 2022, Wojciech Mula +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + +- Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + +- Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED +TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + License notice for RFC 3492 --------------------------- diff --git a/src/libraries/Common/src/System/HexConverter.cs b/src/libraries/Common/src/System/HexConverter.cs index 81b56970be0..b80e404442b 100644 --- a/src/libraries/Common/src/System/HexConverter.cs +++ b/src/libraries/Common/src/System/HexConverter.cs @@ -4,10 +4,12 @@ using System.Diagnostics; using System.Runtime.CompilerServices; #if SYSTEM_PRIVATE_CORELIB +using System.Buffers.Binary; using System.Runtime.InteropServices; using System.Runtime.Intrinsics; using System.Runtime.Intrinsics.Arm; using System.Runtime.Intrinsics.X86; +using System.Text.Unicode; #endif namespace System @@ -223,9 +225,99 @@ namespace System public static bool TryDecodeFromUtf16(ReadOnlySpan chars, Span bytes) { +#if SYSTEM_PRIVATE_CORELIB + if (BitConverter.IsLittleEndian && (Ssse3.IsSupported || AdvSimd.Arm64.IsSupported) && + chars.Length >= Vector128.Count * 2) + { + return TryDecodeFromUtf16_Vector128(chars, bytes); + } +#endif return TryDecodeFromUtf16(chars, bytes, out _); } +#if SYSTEM_PRIVATE_CORELIB + public static bool TryDecodeFromUtf16_Vector128(ReadOnlySpan chars, Span bytes) + { + Debug.Assert(Ssse3.IsSupported || AdvSimd.Arm64.IsSupported); + Debug.Assert(chars.Length <= bytes.Length * 2); + Debug.Assert(chars.Length % 2 == 0); + Debug.Assert(chars.Length >= Vector128.Count * 2); + + nuint offset = 0; + nuint lengthSubTwoVector128 = (nuint)chars.Length - ((nuint)Vector128.Count * 2); + + ref ushort srcRef = ref Unsafe.As(ref MemoryMarshal.GetReference(chars)); + ref byte destRef = ref MemoryMarshal.GetReference(bytes); + + do + { + // The algorithm is UTF8 so we'll be loading two UTF-16 vectors to narrow them into a + // single UTF8 ASCII vector - the implementation can be shared with UTF8 paths. + Vector128 vec1 = Vector128.LoadUnsafe(ref srcRef, offset); + Vector128 vec2 = Vector128.LoadUnsafe(ref srcRef, offset + (nuint)Vector128.Count); + Vector128 vec = Vector128.Narrow(vec1, vec2); + + // Based on "Algorithm #3" https://github.com/WojciechMula/toys/blob/master/simd-parse-hex/geoff_algorithm.cpp + // by Geoff Langdale and Wojciech Mula + // Move digits '0'..'9' into range 0xf6..0xff. + Vector128 t1 = vec + Vector128.Create((byte)(0xFF - '9')); + // And then correct the range to 0xf0..0xf9. + // All other bytes become less than 0xf0. + Vector128 t2 = Vector128.SubtractSaturate(t1, Vector128.Create((byte)6)); + // Convert into uppercase 'a'..'f' => 'A'..'F' and + // move hex letter 'A'..'F' into range 0..5. + Vector128 t3 = (vec & Vector128.Create((byte)0xDF)) - Vector128.Create((byte)'A'); + // And correct the range into 10..15. + // The non-hex letters bytes become greater than 0x0f. + Vector128 t4 = Vector128.AddSaturate(t3, Vector128.Create((byte)10)); + // Convert '0'..'9' into nibbles 0..9. Non-digit bytes become + // greater than 0x0f. Finally choose the result: either valid nibble (0..9/10..15) + // or some byte greater than 0x0f. + Vector128 nibbles = Vector128.Min(t2 - Vector128.Create((byte)0xF0), t4); + // Any high bit is a sign that input is not a valid hex data + if (!Utf16Utility.AllCharsInVector128AreAscii(vec1 | vec2) || + Vector128.AddSaturate(nibbles, Vector128.Create((byte)(127 - 15))).ExtractMostSignificantBits() != 0) + { + // Input is either non-ASCII or invalid hex data + break; + } + Vector128 output; + if (Ssse3.IsSupported) + { + output = Ssse3.MultiplyAddAdjacent(nibbles, + Vector128.Create((short)0x0110).AsSByte()).AsByte(); + } + else + { + // Workaround for missing MultiplyAddAdjacent on ARM + Vector128 even = AdvSimd.Arm64.TransposeEven(nibbles, Vector128.Zero).AsInt16(); + Vector128 odd = AdvSimd.Arm64.TransposeOdd(nibbles, Vector128.Zero).AsInt16(); + even = AdvSimd.ShiftLeftLogical(even, 4).AsInt16(); + output = AdvSimd.AddSaturate(even, odd).AsByte(); + } + // Accumulate output in lower INT64 half and take care about endianness + output = Vector128.Shuffle(output, Vector128.Create((byte)0, 2, 4, 6, 8, 10, 12, 14, 0, 0, 0, 0, 0, 0, 0, 0)); + // Store 8 bytes in dest by given offset + Unsafe.WriteUnaligned(ref Unsafe.Add(ref destRef, offset / 2), output.AsUInt64().ToScalar()); + + offset += (nuint)Vector128.Count * 2; + if (offset == (nuint)chars.Length) + { + return true; + } + // Overlap with the current chunk for trailing elements + if (offset > lengthSubTwoVector128) + { + offset = lengthSubTwoVector128; + } + } + while (true); + + // Fall back to the scalar routine in case of invalid input. + return TryDecodeFromUtf16(chars.Slice((int)offset), bytes.Slice((int)(offset / 2)), out _); + } +#endif + public static bool TryDecodeFromUtf16(ReadOnlySpan chars, Span bytes, out int charsProcessed) { Debug.Assert(chars.Length % 2 == 0, "Un-even number of characters provided"); diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs index 69426ca3b2b..d1c7d5ee8a7 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs @@ -3244,5 +3244,35 @@ namespace System.Runtime.Intrinsics } return AdvSimd.Arm64.ZipHigh(left, right); } + + // TODO: Make generic versions of these public, see https://github.com/dotnet/runtime/issues/82559 + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal static Vector128 AddSaturate(Vector128 left, Vector128 right) + { + if (Sse2.IsSupported) + { + return Sse2.AddSaturate(left, right); + } + else if (!AdvSimd.Arm64.IsSupported) + { + ThrowHelper.ThrowNotSupportedException(); + } + return AdvSimd.AddSaturate(left, right); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal static Vector128 SubtractSaturate(Vector128 left, Vector128 right) + { + if (Sse2.IsSupported) + { + return Sse2.SubtractSaturate(left, right); + } + else if (!AdvSimd.Arm64.IsSupported) + { + ThrowHelper.ThrowNotSupportedException(); + } + return AdvSimd.SubtractSaturate(left, right); + } } } diff --git a/src/libraries/System.Runtime.Extensions/tests/System/Convert.FromHexString.cs b/src/libraries/System.Runtime.Extensions/tests/System/Convert.FromHexString.cs index da7a83aab1c..6ed2c2e7780 100644 --- a/src/libraries/System.Runtime.Extensions/tests/System/Convert.FromHexString.cs +++ b/src/libraries/System.Runtime.Extensions/tests/System/Convert.FromHexString.cs @@ -102,5 +102,25 @@ namespace System.Tests { Assert.Same(Array.Empty(), Convert.FromHexString(string.Empty)); } + + [Fact] + public static void ToHexFromHexRoundtrip() + { + for (int i = 1; i < 50; i++) + { + byte[] data = System.Security.Cryptography.RandomNumberGenerator.GetBytes(i); + string hex = Convert.ToHexString(data); + Assert.Equal(data, Convert.FromHexString(hex.ToLowerInvariant())); + Assert.Equal(data, Convert.FromHexString(hex.ToUpperInvariant())); + string mixedCase1 = hex.Substring(0, hex.Length / 2).ToUpperInvariant() + + hex.Substring(hex.Length / 2).ToLowerInvariant(); + string mixedCase2 = hex.Substring(0, hex.Length / 2).ToLowerInvariant() + + hex.Substring(hex.Length / 2).ToUpperInvariant(); + Assert.Equal(data, Convert.FromHexString(mixedCase1)); + Assert.Equal(data, Convert.FromHexString(mixedCase2)); + Assert.Throws(() => Convert.FromHexString(hex + " ")); + Assert.Throws(() => Convert.FromHexString("\uAAAA" + hex)); + } + } } }