Skip to content

Commit

Permalink
Fix xxHash64 handling of large (> 4GB) inputs (#73093)
Browse files Browse the repository at this point in the history
  • Loading branch information
GrabYourPitchforks authored Aug 2, 2022
1 parent 1a861d2 commit d074561
Show file tree
Hide file tree
Showing 9 changed files with 302 additions and 39 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -90,7 +90,7 @@ private static ulong ApplyRound(ulong acc, ulong lane)
return acc;
}

internal readonly ulong Complete(int length, ReadOnlySpan<byte> remaining)
internal readonly ulong Complete(long length, ReadOnlySpan<byte> remaining)
{
ulong acc = _hadFullStripe ? Converge() : _smallAcc;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ public sealed partial class XxHash64 : NonCryptographicHashAlgorithm
private readonly ulong _seed;
private State _state;
private byte[]? _holdback;
private int _length;
private long _length;

/// <summary>
/// Initializes a new instance of the <see cref="XxHash64"/> class.
Expand Down Expand Up @@ -67,7 +67,7 @@ public override void Append(ReadOnlySpan<byte> source)
// Data that isn't perfectly mod-32 gets stored in a holdback
// buffer.

int held = _length & 0x1F;
int held = (int)_length & 0x1F;

if (held != 0)
{
Expand Down Expand Up @@ -110,7 +110,7 @@ public override void Append(ReadOnlySpan<byte> source)
/// </summary>
protected override void GetCurrentHashCore(Span<byte> destination)
{
int remainingLength = _length & 0x1F;
int remainingLength = (int)_length & 0x1F;
ReadOnlySpan<byte> remaining = ReadOnlySpan<byte>.Empty;

if (remainingLength > 0)
Expand Down Expand Up @@ -225,7 +225,7 @@ private static int StaticHash(ReadOnlySpan<byte> source, Span<byte> destination,
source = source.Slice(StripeSize);
}

ulong val = state.Complete(totalLength, source);
ulong val = state.Complete((uint)totalLength, source);
BinaryPrimitives.WriteUInt64BigEndian(destination, val);
return HashSize;
}
Expand Down
133 changes: 99 additions & 34 deletions src/libraries/System.IO.Hashing/tests/NonCryptoHashTestDriver.cs
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ public void TestsDefined()
targetMethodName,
BindingFlags.Instance | BindingFlags.Public);

if (info2 is null)
if (info2 is null && !info.IsDefined(typeof(OverrideOptionalAttribute)))
{
missingMethods ??= new List<string>();
missingMethods.Add(targetMethodName);
Expand Down Expand Up @@ -116,6 +116,20 @@ protected void InstanceMultiAppendGetCurrentHashDriver(TestCase testCase)
testCase.VerifyResponse(answer);
}

[OverrideOptional]
protected void InstanceMultiAppendLargeInputDriver(LargeTestCase testCase)
{
NonCryptographicHashAlgorithm hash = CreateInstance();

foreach (ReadOnlyMemory<byte> chunk in testCase.EnumerateDataChunks())
{
hash.Append(chunk.Span);
}

byte[] answer = hash.GetHashAndReset();
testCase.VerifyResponse(answer);
}

protected void InstanceVerifyEmptyStateDriver(TestCase testCase)
{
Span<byte> buf = stackalloc byte[256];
Expand Down Expand Up @@ -280,46 +294,23 @@ private void VerifyEmptyResult(ReadOnlySpan<byte> result)
}
}

public sealed class TestCase
public abstract class TestCaseBase
{
private byte[] _input;
private byte[] _output;

private readonly byte[] _output;
public string Name { get; }
public ReadOnlySpan<byte> Input => new ReadOnlySpan<byte>(_input);
public ReadOnlySpan<byte> OutputBytes => _output;
public string OutputHex { get; }

public TestCase(string name, byte[] input, byte[] output)
{
Name = name;
_input = input;
OutputHex = ToHexString(output);
_output = FromHexString(OutputHex);
}

public TestCase(string name, byte[] input, string outputHex)
{
Name = name;
_input = input;
OutputHex = outputHex.ToUpperInvariant();
_output = FromHexString(OutputHex);
}

public TestCase(string name, string inputHex, string outputHex)
protected TestCaseBase(string name, byte[] output)
{
Name = name;
_input = FromHexString(inputHex);
OutputHex = outputHex.ToUpperInvariant();
_output = FromHexString(OutputHex);
}

internal void VerifyResponse(ReadOnlySpan<byte> response)
{
if (!response.SequenceEqual(_output))
if (output is null || output.Length == 0)
{
// We know this will fail, but it gives a nice presentation.
Assert.Equal(OutputHex, ToHexString(response));
throw new ArgumentException("Argument should not be null or empty.", nameof(output));
}

Name = name;
_output = output;
OutputHex = ToHexString(output);
}

internal static string ToHexString(ReadOnlySpan<byte> input)
Expand Down Expand Up @@ -356,6 +347,80 @@ internal static byte[] FromHexString(string hexString)
}

public override string ToString() => Name;

internal void VerifyResponse(ReadOnlySpan<byte> response)
{
if (!response.SequenceEqual(OutputBytes))
{
// We know this will fail, but it gives a nice presentation.
Assert.Equal(OutputHex, ToHexString(response));
}
}
}

public sealed class TestCase : TestCaseBase
{
private readonly byte[] _input;
public ReadOnlySpan<byte> Input => new ReadOnlySpan<byte>(_input);

public TestCase(string name, byte[] input, byte[] output)
: base(name, output)
{
_input = input;
}

public TestCase(string name, byte[] input, string outputHex)
: base(name, FromHexString(outputHex))
{
_input = input;
}

public TestCase(string name, string inputHex, string outputHex)
: base(name, FromHexString(outputHex))
{
_input = FromHexString(inputHex);
}
}

public sealed class LargeTestCase : TestCaseBase
{
private readonly byte _data;
private readonly long _repeatCount;

public LargeTestCase(string name, byte data, long repeatCount, string outputHex)
: base(name, FromHexString(outputHex))
{
if (repeatCount < 0)
{
throw new ArgumentOutOfRangeException(nameof(repeatCount));
}

_data = data;
_repeatCount = repeatCount;
}

public IEnumerable<ReadOnlyMemory<byte>> EnumerateDataChunks()
{
#if NET5_0_OR_GREATER
byte[] chunk = GC.AllocateUninitializedArray<byte>(1024 * 1024);
#else
byte[] chunk = new byte[1024 * 1024];
#endif
chunk.AsSpan().Fill(_data);

long remaining = _repeatCount;
while (remaining > 0)
{
int thisChunkLength = (int)Math.Min(remaining, chunk.Length);
yield return chunk.AsMemory(0, thisChunkLength);
remaining -= thisChunkLength;
}
}
}

[AttributeUsage(AttributeTargets.Method, AllowMultiple = false, Inherited = false)]
private sealed class OverrideOptionalAttribute : Attribute
{
}
}
}
33 changes: 33 additions & 0 deletions src/libraries/System.IO.Hashing/tests/XxHash32Tests.007.cs
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,31 @@ public static IEnumerable<object[]> TestCases
"FC23CD03"),
};

public static IEnumerable<object[]> LargeTestCases
{
get
{
object[] arr = new object[1];

foreach (LargeTestCase testCase in LargeTestCaseDefinitions)
{
arr[0] = testCase;
yield return arr;
}
}
}

protected static IEnumerable<LargeTestCase> LargeTestCaseDefinitions { get; } =
new[]
{
// Manually run against the xxHash32 reference implementation.
new LargeTestCase(
"EEEEE... (10GB)",
(byte)'E',
10L * 1024 * 1024 * 1024, // 10 GB
"1C44F650"),
};

protected override NonCryptographicHashAlgorithm CreateInstance() => new XxHash32(Seed);

protected override byte[] StaticOneShot(byte[] source) => XxHash32.Hash(source, Seed);
Expand Down Expand Up @@ -125,6 +150,14 @@ public void InstanceMultiAppendGetCurrentHash(TestCase testCase)
InstanceMultiAppendGetCurrentHashDriver(testCase);
}

[Theory]
[MemberData(nameof(LargeTestCases))]
[OuterLoop]
public void InstanceMultiAppendLargeInput(LargeTestCase testCase)
{
InstanceMultiAppendLargeInputDriver(testCase);
}

[Theory]
[MemberData(nameof(TestCases))]
public void InstanceVerifyEmptyState(TestCase testCase)
Expand Down
33 changes: 33 additions & 0 deletions src/libraries/System.IO.Hashing/tests/XxHash32Tests.cs
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,31 @@ public static IEnumerable<object[]> TestCases
"5DF7D6C0"),
};

public static IEnumerable<object[]> LargeTestCases
{
get
{
object[] arr = new object[1];

foreach (LargeTestCase testCase in LargeTestCaseDefinitions)
{
arr[0] = testCase;
yield return arr;
}
}
}

protected static IEnumerable<LargeTestCase> LargeTestCaseDefinitions { get; } =
new[]
{
// Manually run against the xxHash32 reference implementation.
new LargeTestCase(
"EEEEE... (10GB)",
(byte)'E',
10L * 1024 * 1024 * 1024, // 10 GB
"22CBC3AA"),
};

protected override NonCryptographicHashAlgorithm CreateInstance() => new XxHash32();

protected override byte[] StaticOneShot(byte[] source) => XxHash32.Hash(source);
Expand Down Expand Up @@ -139,6 +164,14 @@ public void InstanceMultiAppendGetCurrentHash(TestCase testCase)
InstanceMultiAppendGetCurrentHashDriver(testCase);
}

[Theory]
[MemberData(nameof(LargeTestCases))]
[OuterLoop]
public void InstanceMultiAppendLargeInput(LargeTestCase testCase)
{
InstanceMultiAppendLargeInputDriver(testCase);
}

[Theory]
[MemberData(nameof(TestCases))]
public void InstanceVerifyEmptyState(TestCase testCase)
Expand Down
33 changes: 33 additions & 0 deletions src/libraries/System.IO.Hashing/tests/XxHash32Tests.f00d.cs
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,31 @@ public static IEnumerable<object[]> TestCases
"C7A3D1CB"),
};

public static IEnumerable<object[]> LargeTestCases
{
get
{
object[] arr = new object[1];

foreach (LargeTestCase testCase in LargeTestCaseDefinitions)
{
arr[0] = testCase;
yield return arr;
}
}
}

protected static IEnumerable<LargeTestCase> LargeTestCaseDefinitions { get; } =
new[]
{
// Manually run against the xxHash32 reference implementation.
new LargeTestCase(
"EEEEE... (10GB)",
(byte)'E',
10L * 1024 * 1024 * 1024, // 10 GB
"B19FAE15"),
};

protected override NonCryptographicHashAlgorithm CreateInstance() => new XxHash32(Seed);

protected override byte[] StaticOneShot(byte[] source) => XxHash32.Hash(source, Seed);
Expand Down Expand Up @@ -125,6 +150,14 @@ public void InstanceMultiAppendGetCurrentHash(TestCase testCase)
InstanceMultiAppendGetCurrentHashDriver(testCase);
}

[Theory]
[MemberData(nameof(LargeTestCases))]
[OuterLoop]
public void InstanceMultiAppendLargeInput(LargeTestCase testCase)
{
InstanceMultiAppendLargeInputDriver(testCase);
}

[Theory]
[MemberData(nameof(TestCases))]
public void InstanceVerifyEmptyState(TestCase testCase)
Expand Down
Loading

0 comments on commit d074561

Please sign in to comment.