From d074561fb17e778e6d7ff20a45d0700477a8cfaf Mon Sep 17 00:00:00 2001 From: Levi Broderick Date: Mon, 1 Aug 2022 18:17:56 -0700 Subject: [PATCH] Fix xxHash64 handling of large (> 4GB) inputs (#73093) --- .../src/System/IO/Hashing/XxHash64.State.cs | 2 +- .../src/System/IO/Hashing/XxHash64.cs | 8 +- .../tests/NonCryptoHashTestDriver.cs | 133 +++++++++++++----- .../tests/XxHash32Tests.007.cs | 33 +++++ .../System.IO.Hashing/tests/XxHash32Tests.cs | 33 +++++ .../tests/XxHash32Tests.f00d.cs | 33 +++++ .../tests/XxHash64Tests.007.cs | 33 +++++ .../System.IO.Hashing/tests/XxHash64Tests.cs | 33 +++++ .../tests/XxHash64Tests.f00d.cs | 33 +++++ 9 files changed, 302 insertions(+), 39 deletions(-) diff --git a/src/libraries/System.IO.Hashing/src/System/IO/Hashing/XxHash64.State.cs b/src/libraries/System.IO.Hashing/src/System/IO/Hashing/XxHash64.State.cs index ecb289a945c95..9c1493c629b90 100644 --- a/src/libraries/System.IO.Hashing/src/System/IO/Hashing/XxHash64.State.cs +++ b/src/libraries/System.IO.Hashing/src/System/IO/Hashing/XxHash64.State.cs @@ -90,7 +90,7 @@ private static ulong ApplyRound(ulong acc, ulong lane) return acc; } - internal readonly ulong Complete(int length, ReadOnlySpan remaining) + internal readonly ulong Complete(long length, ReadOnlySpan remaining) { ulong acc = _hadFullStripe ? Converge() : _smallAcc; diff --git a/src/libraries/System.IO.Hashing/src/System/IO/Hashing/XxHash64.cs b/src/libraries/System.IO.Hashing/src/System/IO/Hashing/XxHash64.cs index ab20bdda634e0..32cc5ae9a5f52 100644 --- a/src/libraries/System.IO.Hashing/src/System/IO/Hashing/XxHash64.cs +++ b/src/libraries/System.IO.Hashing/src/System/IO/Hashing/XxHash64.cs @@ -19,7 +19,7 @@ public sealed partial class XxHash64 : NonCryptographicHashAlgorithm private readonly ulong _seed; private State _state; private byte[]? _holdback; - private int _length; + private long _length; /// /// Initializes a new instance of the class. @@ -67,7 +67,7 @@ public override void Append(ReadOnlySpan source) // Data that isn't perfectly mod-32 gets stored in a holdback // buffer. - int held = _length & 0x1F; + int held = (int)_length & 0x1F; if (held != 0) { @@ -110,7 +110,7 @@ public override void Append(ReadOnlySpan source) /// protected override void GetCurrentHashCore(Span destination) { - int remainingLength = _length & 0x1F; + int remainingLength = (int)_length & 0x1F; ReadOnlySpan remaining = ReadOnlySpan.Empty; if (remainingLength > 0) @@ -225,7 +225,7 @@ private static int StaticHash(ReadOnlySpan source, Span destination, source = source.Slice(StripeSize); } - ulong val = state.Complete(totalLength, source); + ulong val = state.Complete((uint)totalLength, source); BinaryPrimitives.WriteUInt64BigEndian(destination, val); return HashSize; } diff --git a/src/libraries/System.IO.Hashing/tests/NonCryptoHashTestDriver.cs b/src/libraries/System.IO.Hashing/tests/NonCryptoHashTestDriver.cs index 0d4d9be1db455..cb684c030b849 100644 --- a/src/libraries/System.IO.Hashing/tests/NonCryptoHashTestDriver.cs +++ b/src/libraries/System.IO.Hashing/tests/NonCryptoHashTestDriver.cs @@ -48,7 +48,7 @@ public void TestsDefined() targetMethodName, BindingFlags.Instance | BindingFlags.Public); - if (info2 is null) + if (info2 is null && !info.IsDefined(typeof(OverrideOptionalAttribute))) { missingMethods ??= new List(); missingMethods.Add(targetMethodName); @@ -116,6 +116,20 @@ protected void InstanceMultiAppendGetCurrentHashDriver(TestCase testCase) testCase.VerifyResponse(answer); } + [OverrideOptional] + protected void InstanceMultiAppendLargeInputDriver(LargeTestCase testCase) + { + NonCryptographicHashAlgorithm hash = CreateInstance(); + + foreach (ReadOnlyMemory chunk in testCase.EnumerateDataChunks()) + { + hash.Append(chunk.Span); + } + + byte[] answer = hash.GetHashAndReset(); + testCase.VerifyResponse(answer); + } + protected void InstanceVerifyEmptyStateDriver(TestCase testCase) { Span buf = stackalloc byte[256]; @@ -280,46 +294,23 @@ private void VerifyEmptyResult(ReadOnlySpan result) } } - public sealed class TestCase + public abstract class TestCaseBase { - private byte[] _input; - private byte[] _output; - + private readonly byte[] _output; public string Name { get; } - public ReadOnlySpan Input => new ReadOnlySpan(_input); + public ReadOnlySpan OutputBytes => _output; public string OutputHex { get; } - public TestCase(string name, byte[] input, byte[] output) - { - Name = name; - _input = input; - OutputHex = ToHexString(output); - _output = FromHexString(OutputHex); - } - - public TestCase(string name, byte[] input, string outputHex) - { - Name = name; - _input = input; - OutputHex = outputHex.ToUpperInvariant(); - _output = FromHexString(OutputHex); - } - - public TestCase(string name, string inputHex, string outputHex) + protected TestCaseBase(string name, byte[] output) { - Name = name; - _input = FromHexString(inputHex); - OutputHex = outputHex.ToUpperInvariant(); - _output = FromHexString(OutputHex); - } - - internal void VerifyResponse(ReadOnlySpan response) - { - if (!response.SequenceEqual(_output)) + if (output is null || output.Length == 0) { - // We know this will fail, but it gives a nice presentation. - Assert.Equal(OutputHex, ToHexString(response)); + throw new ArgumentException("Argument should not be null or empty.", nameof(output)); } + + Name = name; + _output = output; + OutputHex = ToHexString(output); } internal static string ToHexString(ReadOnlySpan input) @@ -356,6 +347,80 @@ internal static byte[] FromHexString(string hexString) } public override string ToString() => Name; + + internal void VerifyResponse(ReadOnlySpan response) + { + if (!response.SequenceEqual(OutputBytes)) + { + // We know this will fail, but it gives a nice presentation. + Assert.Equal(OutputHex, ToHexString(response)); + } + } + } + + public sealed class TestCase : TestCaseBase + { + private readonly byte[] _input; + public ReadOnlySpan Input => new ReadOnlySpan(_input); + + public TestCase(string name, byte[] input, byte[] output) + : base(name, output) + { + _input = input; + } + + public TestCase(string name, byte[] input, string outputHex) + : base(name, FromHexString(outputHex)) + { + _input = input; + } + + public TestCase(string name, string inputHex, string outputHex) + : base(name, FromHexString(outputHex)) + { + _input = FromHexString(inputHex); + } + } + + public sealed class LargeTestCase : TestCaseBase + { + private readonly byte _data; + private readonly long _repeatCount; + + public LargeTestCase(string name, byte data, long repeatCount, string outputHex) + : base(name, FromHexString(outputHex)) + { + if (repeatCount < 0) + { + throw new ArgumentOutOfRangeException(nameof(repeatCount)); + } + + _data = data; + _repeatCount = repeatCount; + } + + public IEnumerable> EnumerateDataChunks() + { +#if NET5_0_OR_GREATER + byte[] chunk = GC.AllocateUninitializedArray(1024 * 1024); +#else + byte[] chunk = new byte[1024 * 1024]; +#endif + chunk.AsSpan().Fill(_data); + + long remaining = _repeatCount; + while (remaining > 0) + { + int thisChunkLength = (int)Math.Min(remaining, chunk.Length); + yield return chunk.AsMemory(0, thisChunkLength); + remaining -= thisChunkLength; + } + } + } + + [AttributeUsage(AttributeTargets.Method, AllowMultiple = false, Inherited = false)] + private sealed class OverrideOptionalAttribute : Attribute + { } } } diff --git a/src/libraries/System.IO.Hashing/tests/XxHash32Tests.007.cs b/src/libraries/System.IO.Hashing/tests/XxHash32Tests.007.cs index 9d390438fd566..28d00ce94d029 100644 --- a/src/libraries/System.IO.Hashing/tests/XxHash32Tests.007.cs +++ b/src/libraries/System.IO.Hashing/tests/XxHash32Tests.007.cs @@ -92,6 +92,31 @@ public static IEnumerable TestCases "FC23CD03"), }; + public static IEnumerable LargeTestCases + { + get + { + object[] arr = new object[1]; + + foreach (LargeTestCase testCase in LargeTestCaseDefinitions) + { + arr[0] = testCase; + yield return arr; + } + } + } + + protected static IEnumerable LargeTestCaseDefinitions { get; } = + new[] + { + // Manually run against the xxHash32 reference implementation. + new LargeTestCase( + "EEEEE... (10GB)", + (byte)'E', + 10L * 1024 * 1024 * 1024, // 10 GB + "1C44F650"), + }; + protected override NonCryptographicHashAlgorithm CreateInstance() => new XxHash32(Seed); protected override byte[] StaticOneShot(byte[] source) => XxHash32.Hash(source, Seed); @@ -125,6 +150,14 @@ public void InstanceMultiAppendGetCurrentHash(TestCase testCase) InstanceMultiAppendGetCurrentHashDriver(testCase); } + [Theory] + [MemberData(nameof(LargeTestCases))] + [OuterLoop] + public void InstanceMultiAppendLargeInput(LargeTestCase testCase) + { + InstanceMultiAppendLargeInputDriver(testCase); + } + [Theory] [MemberData(nameof(TestCases))] public void InstanceVerifyEmptyState(TestCase testCase) diff --git a/src/libraries/System.IO.Hashing/tests/XxHash32Tests.cs b/src/libraries/System.IO.Hashing/tests/XxHash32Tests.cs index 9cbd5653059bf..803a10ddb5424 100644 --- a/src/libraries/System.IO.Hashing/tests/XxHash32Tests.cs +++ b/src/libraries/System.IO.Hashing/tests/XxHash32Tests.cs @@ -106,6 +106,31 @@ public static IEnumerable TestCases "5DF7D6C0"), }; + public static IEnumerable LargeTestCases + { + get + { + object[] arr = new object[1]; + + foreach (LargeTestCase testCase in LargeTestCaseDefinitions) + { + arr[0] = testCase; + yield return arr; + } + } + } + + protected static IEnumerable LargeTestCaseDefinitions { get; } = + new[] + { + // Manually run against the xxHash32 reference implementation. + new LargeTestCase( + "EEEEE... (10GB)", + (byte)'E', + 10L * 1024 * 1024 * 1024, // 10 GB + "22CBC3AA"), + }; + protected override NonCryptographicHashAlgorithm CreateInstance() => new XxHash32(); protected override byte[] StaticOneShot(byte[] source) => XxHash32.Hash(source); @@ -139,6 +164,14 @@ public void InstanceMultiAppendGetCurrentHash(TestCase testCase) InstanceMultiAppendGetCurrentHashDriver(testCase); } + [Theory] + [MemberData(nameof(LargeTestCases))] + [OuterLoop] + public void InstanceMultiAppendLargeInput(LargeTestCase testCase) + { + InstanceMultiAppendLargeInputDriver(testCase); + } + [Theory] [MemberData(nameof(TestCases))] public void InstanceVerifyEmptyState(TestCase testCase) diff --git a/src/libraries/System.IO.Hashing/tests/XxHash32Tests.f00d.cs b/src/libraries/System.IO.Hashing/tests/XxHash32Tests.f00d.cs index 8a01bc8cd1693..ac17869188ff1 100644 --- a/src/libraries/System.IO.Hashing/tests/XxHash32Tests.f00d.cs +++ b/src/libraries/System.IO.Hashing/tests/XxHash32Tests.f00d.cs @@ -92,6 +92,31 @@ public static IEnumerable TestCases "C7A3D1CB"), }; + public static IEnumerable LargeTestCases + { + get + { + object[] arr = new object[1]; + + foreach (LargeTestCase testCase in LargeTestCaseDefinitions) + { + arr[0] = testCase; + yield return arr; + } + } + } + + protected static IEnumerable LargeTestCaseDefinitions { get; } = + new[] + { + // Manually run against the xxHash32 reference implementation. + new LargeTestCase( + "EEEEE... (10GB)", + (byte)'E', + 10L * 1024 * 1024 * 1024, // 10 GB + "B19FAE15"), + }; + protected override NonCryptographicHashAlgorithm CreateInstance() => new XxHash32(Seed); protected override byte[] StaticOneShot(byte[] source) => XxHash32.Hash(source, Seed); @@ -125,6 +150,14 @@ public void InstanceMultiAppendGetCurrentHash(TestCase testCase) InstanceMultiAppendGetCurrentHashDriver(testCase); } + [Theory] + [MemberData(nameof(LargeTestCases))] + [OuterLoop] + public void InstanceMultiAppendLargeInput(LargeTestCase testCase) + { + InstanceMultiAppendLargeInputDriver(testCase); + } + [Theory] [MemberData(nameof(TestCases))] public void InstanceVerifyEmptyState(TestCase testCase) diff --git a/src/libraries/System.IO.Hashing/tests/XxHash64Tests.007.cs b/src/libraries/System.IO.Hashing/tests/XxHash64Tests.007.cs index fb3c514343a15..94c53f8f14d75 100644 --- a/src/libraries/System.IO.Hashing/tests/XxHash64Tests.007.cs +++ b/src/libraries/System.IO.Hashing/tests/XxHash64Tests.007.cs @@ -114,6 +114,31 @@ protected override int StaticOneShot(ReadOnlySpan source, Span desti protected override bool TryStaticOneShot(ReadOnlySpan source, Span destination, out int bytesWritten) => XxHash64.TryHash(source, destination, out bytesWritten, Seed); + public static IEnumerable LargeTestCases + { + get + { + object[] arr = new object[1]; + + foreach (LargeTestCase testCase in LargeTestCaseDefinitions) + { + arr[0] = testCase; + yield return arr; + } + } + } + + protected static IEnumerable LargeTestCaseDefinitions { get; } = + new[] + { + // Manually run against the xxHash64 reference implementation. + new LargeTestCase( + "EEEEE... (10GB)", + (byte)'E', + 10L * 1024 * 1024 * 1024, // 10 GB + "DFBE10B17366232C"), + }; + [Theory] [MemberData(nameof(TestCases))] public void InstanceAppendAllocate(TestCase testCase) @@ -135,6 +160,14 @@ public void InstanceMultiAppendGetCurrentHash(TestCase testCase) InstanceMultiAppendGetCurrentHashDriver(testCase); } + [Theory] + [MemberData(nameof(LargeTestCases))] + [OuterLoop] + public void InstanceMultiAppendLargeInput(LargeTestCase testCase) + { + InstanceMultiAppendLargeInputDriver(testCase); + } + [Theory] [MemberData(nameof(TestCases))] public void InstanceVerifyEmptyState(TestCase testCase) diff --git a/src/libraries/System.IO.Hashing/tests/XxHash64Tests.cs b/src/libraries/System.IO.Hashing/tests/XxHash64Tests.cs index 197446d912cea..7e8cb82fc6fe6 100644 --- a/src/libraries/System.IO.Hashing/tests/XxHash64Tests.cs +++ b/src/libraries/System.IO.Hashing/tests/XxHash64Tests.cs @@ -119,6 +119,31 @@ public static IEnumerable TestCases "BDD40F0FAC166EAA"), }; + public static IEnumerable LargeTestCases + { + get + { + object[] arr = new object[1]; + + foreach (LargeTestCase testCase in LargeTestCaseDefinitions) + { + arr[0] = testCase; + yield return arr; + } + } + } + + protected static IEnumerable LargeTestCaseDefinitions { get; } = + new[] + { + // Manually run against the xxHash64 reference implementation. + new LargeTestCase( + "EEEEE... (10GB)", + (byte)'E', + 10L * 1024 * 1024 * 1024, // 10 GB + "F3CB8D45A8B695EF"), + }; + protected override NonCryptographicHashAlgorithm CreateInstance() => new XxHash64(); protected override byte[] StaticOneShot(byte[] source) => XxHash64.Hash(source); @@ -152,6 +177,14 @@ public void InstanceMultiAppendGetCurrentHash(TestCase testCase) InstanceMultiAppendGetCurrentHashDriver(testCase); } + [Theory] + [MemberData(nameof(LargeTestCases))] + [OuterLoop] + public void InstanceMultiAppendLargeInput(LargeTestCase testCase) + { + InstanceMultiAppendLargeInputDriver(testCase); + } + [Theory] [MemberData(nameof(TestCases))] public void InstanceVerifyEmptyState(TestCase testCase) diff --git a/src/libraries/System.IO.Hashing/tests/XxHash64Tests.f00d.cs b/src/libraries/System.IO.Hashing/tests/XxHash64Tests.f00d.cs index 71457595c324c..2b3bfdf3b4b78 100644 --- a/src/libraries/System.IO.Hashing/tests/XxHash64Tests.f00d.cs +++ b/src/libraries/System.IO.Hashing/tests/XxHash64Tests.f00d.cs @@ -102,6 +102,31 @@ public static IEnumerable TestCases "C9B96062B49FEC42"), }; + public static IEnumerable LargeTestCases + { + get + { + object[] arr = new object[1]; + + foreach (LargeTestCase testCase in LargeTestCaseDefinitions) + { + arr[0] = testCase; + yield return arr; + } + } + } + + protected static IEnumerable LargeTestCaseDefinitions { get; } = + new[] + { + // Manually run against the xxHash64 reference implementation. + new LargeTestCase( + "EEEEE... (10GB)", + (byte)'E', + 10L * 1024 * 1024 * 1024, // 10 GB + "CD7B3A954E199AE8"), + }; + protected override NonCryptographicHashAlgorithm CreateInstance() => new XxHash64(Seed); protected override byte[] StaticOneShot(byte[] source) => XxHash64.Hash(source, Seed); @@ -135,6 +160,14 @@ public void InstanceMultiAppendGetCurrentHash(TestCase testCase) InstanceMultiAppendGetCurrentHashDriver(testCase); } + [Theory] + [MemberData(nameof(LargeTestCases))] + [OuterLoop] + public void InstanceMultiAppendLargeInput(LargeTestCase testCase) + { + InstanceMultiAppendLargeInputDriver(testCase); + } + [Theory] [MemberData(nameof(TestCases))] public void InstanceVerifyEmptyState(TestCase testCase)