Skip to content

Commit

Permalink
Optimize varint reading for Intel using intrinsics
Browse files Browse the repository at this point in the history
BenchmarkDotNet v0.14.0, Windows 11 (10.0.26100.2314)
Unknown processor
.NET SDK 9.0.100
  [Host]     : .NET 9.0.0 (9.0.24.52809), X64 RyuJIT AVX2
  DefaultJob : .NET 9.0.0 (9.0.24.52809), X64 RyuJIT AVX2

| Method   | Value | Mean     | Error     | StdDev    | Ratio | RatioSD | Rank |
|--------- |------ |---------:|----------:|----------:|------:|--------:|-----:|
| Previous | 0     | 3.881 ns | 0.0541 ns | 0.0506 ns |  1.00 |    0.02 |    2 |
| New      | 0     | 3.626 ns | 0.0312 ns | 0.0291 ns |  0.93 |    0.01 |    1 |
|          |       |          |           |           |       |         |      |
| Previous | 256   | 4.161 ns | 0.0096 ns | 0.0085 ns |  1.00 |    0.00 |    2 |
| New      | 256   | 3.596 ns | 0.0076 ns | 0.0067 ns |  0.86 |    0.00 |    1 |
|          |       |          |           |           |       |         |      |
| Previous | 65536 | 4.760 ns | 0.0103 ns | 0.0096 ns |  1.00 |    0.00 |    2 |
| New      | 65536 | 3.417 ns | 0.0087 ns | 0.0081 ns |  0.72 |    0.00 |    1 |
  • Loading branch information
brantburnett committed Nov 30, 2024
1 parent eb0af6e commit 242962b
Show file tree
Hide file tree
Showing 3 changed files with 234 additions and 0 deletions.
39 changes: 39 additions & 0 deletions Snappier.Benchmarks/VarIntEncodingRead.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
#if !PREVIOUS

using BenchmarkDotNet.Attributes;
using Snappier.Internal;

namespace Snappier.Benchmarks
{
public class VarIntEncodingRead
{
[Params(0u, 256u, 65536u)]
public uint Value { get; set; }

readonly byte[] _source = new byte[16];

[GlobalSetup]
public void GlobalSetup()
{
VarIntEncoding.Write(_source, Value);
}

[Benchmark(Baseline = true)]
public (int, uint) Previous()
{
var length = VarIntEncoding.ReadSlow(_source, out var result);

return (length, result);
}

[Benchmark]
public (int, uint) New()
{
var length = VarIntEncoding.Read(_source, out var result);

return (length, result);
}
}
}

#endif
79 changes: 79 additions & 0 deletions Snappier.Tests/Internal/VarIntEncodingReadTests.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
using System;
using Snappier.Internal;
using Xunit;

namespace Snappier.Tests.Internal
{
public class VarIntEncodingReadTests
{
public static TheoryData<uint, byte[]> TestData() =>
new() {
{ 0x00, [ 0x00 ] },
{ 0x01, [ 0x01 ] },
{ 0x7F, [ 0x7F ] },
{ 0x80, [ 0x80, 0x01 ] },
{ 0x555, [ 0xD5, 0x0A ] },
{ 0x7FFF, [ 0xFF, 0xFF, 0x01 ] },
{ 0xBFFF, [ 0xFF, 0xFF, 0x02 ] },
{ 0xFFFF, [ 0XFF, 0xFF, 0x03 ] },
{ 0x8000, [ 0x80, 0x80, 0x02 ] },
{ 0x5555, [ 0xD5, 0xAA, 0x01 ] },
{ 0xCAFEF00, [ 0x80, 0xDE, 0xBF, 0x65 ] },
{ 0xCAFEF00D, [ 0x8D, 0xE0, 0xFB, 0xD7, 0x0C ] },
{ 0xFFFFFFFF, [ 0xFF, 0xFF, 0xFF, 0xFF, 0x0F ] },
};

[Theory]
[MemberData(nameof(TestData))]
public void Test_Read(uint expected, byte[] input)
{
var length = VarIntEncoding.Read(input, out var result);
Assert.Equal(input.Length, length);
Assert.Equal(expected, result);
}

[Theory]
[MemberData(nameof(TestData))]
public void Test_Read_ZeroPadding(uint expected, byte[] input)
{
var bytes = new byte[16];
input.AsSpan().CopyTo(bytes);

var length = VarIntEncoding.Read(bytes, out var result);
Assert.Equal(input.Length, length);
Assert.Equal(expected, result);
}

[Theory]
[MemberData(nameof(TestData))]
public void Test_Read_OnePadding(uint expected, byte[] input)
{
var bytes = new byte[16];
bytes.AsSpan().Fill(0xff);
input.AsSpan().CopyTo(bytes);

var length = VarIntEncoding.Read(bytes, out var result);
Assert.Equal(input.Length, length);
Assert.Equal(expected, result);
}
}
}

/* ************************************************************
*
* @author Couchbase <[email protected]>
* @copyright 2021 Couchbase, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* ************************************************************/
116 changes: 116 additions & 0 deletions Snappier/Internal/VarIntEncoding.Read.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
using System;

#if NET7_0_OR_GREATER
using System.Buffers.Binary;
using System.Diagnostics;
using System.Numerics;
using System.Runtime.InteropServices;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
#endif

namespace Snappier.Internal
{
internal static partial class VarIntEncoding
{
public static int Read(ReadOnlySpan<byte> input, out uint result)
{
#if NET7_0_OR_GREATER
if (Sse2.IsSupported && Bmi2.IsSupported && BitConverter.IsLittleEndian && input.Length >= Vector128<byte>.Count)
{
return ReadFast(input, out result);
}
#endif

return ReadSlow(input, out result);
}

public static int ReadSlow(ReadOnlySpan<byte> input, out uint result)
{
result = 0;
int shift = 0;
bool foundEnd = false;

int i = 0;
while (input.Length > 0)
{
byte c = input[i];
i += 1;

int val = c & 0x7f;
if (Helpers.LeftShiftOverflows((byte) val, shift))
{
ThrowHelper.ThrowInvalidDataException("Invalid stream length");
}

result |= (uint)(val << shift);
shift += 7;

if (c < 128)
{
foundEnd = true;
break;
}

if (shift >= 32)
{
ThrowHelper.ThrowInvalidDataException("Invalid stream length");
}
}

if (!foundEnd)
{
ThrowHelper.ThrowInvalidDataException("Invalid stream length");
}

return shift / 7;
}

#if NET7_0_OR_GREATER

private static ReadOnlySpan<uint> ReadMasks =>
[
0x00000000, // Not used, present for padding
0x0000007f,
0x00003fff,
0x001fffff,
0x0fffffff,
0xffffffff
];

private static int ReadFast(ReadOnlySpan<byte> input, out uint result)
{
Debug.Assert(Sse2.IsSupported);
Debug.Assert(Bmi2.IsSupported);
Debug.Assert(input.Length >= Vector128<byte>.Count);
Debug.Assert(BitConverter.IsLittleEndian);

var mask = ~Sse2.MoveMask(Vector128.LoadUnsafe(ref MemoryMarshal.GetReference(input)));
var bytesRead = BitOperations.TrailingZeroCount(mask) + 1;

uint shuffledBits = Bmi2.X64.IsSupported
? unchecked((uint)Bmi2.X64.ParallelBitExtract(BinaryPrimitives.ReadUInt64LittleEndian(input), 0x7F7F7F7F7Fu))
: Bmi2.ParallelBitExtract(BinaryPrimitives.ReadUInt32LittleEndian(input), 0x7F7F7F7Fu) |
((BinaryPrimitives.ReadUInt32LittleEndian(input.Slice(4)) & 0xf) << 28);

if (bytesRead < ReadMasks.Length)
{
result = shuffledBits & ReadMasks[bytesRead];
}
else
{
// Currently, JIT doesn't optimize the bounds check away in the branch above,
// but we'll leave it written this way in case JIT improves in the future to avoid
// checking the bounds twice. We could just let it throw an IndexOutOfRangeException,
// but that would be inconsistent with the other code paths.

ThrowHelper.ThrowInvalidDataException("Invalid stream length");
result = 0;
}

return bytesRead;
}

#endif
}
}

0 comments on commit 242962b

Please sign in to comment.