Skip to content

Commit

Permalink
Unroll sketch increment (#653)
Browse files Browse the repository at this point in the history
* unroll

* bench

* unroll freq

* comment

* rem extra file

---------
  • Loading branch information
bitfaster authored Jan 13, 2025
1 parent b34b12c commit 410dae2
Show file tree
Hide file tree
Showing 5 changed files with 75 additions and 28 deletions.
3 changes: 3 additions & 0 deletions BitFaster.Caching.Benchmarks/Lfu/CmSketchNoPin.cs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@

namespace BitFaster.Caching.Benchmarks.Lfu
{
// Block sketch implementation without:
// - Pinned buffer for vector code paths
// - Loop unroll for non-vector code paths
internal class CmSketchNoPin<T, I>
where T : notnull
where I : struct, IsaProbe
Expand Down
18 changes: 15 additions & 3 deletions BitFaster.Caching.Benchmarks/Lfu/SketchFrequency.cs
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,8 @@ public class SketchFrequency
private CmSketchFlat<int, DisableHardwareIntrinsics> flatStd;
private CmSketchFlat<int, DetectIsa> flatAvx;

private CmSketchCore<int, DisableHardwareIntrinsics> blockStd;
private CmSketchNoPin<int, DisableHardwareIntrinsics> blockStdNoUnroll;
private CmSketchCore<int, DisableHardwareIntrinsics> blockStdUnroll;
private CmSketchNoPin<int, DetectIsa> blockAvxNoPin;
private CmSketchCore<int, DetectIsa> blockAvx;

Expand All @@ -37,7 +38,8 @@ public void Setup()
flatStd = new CmSketchFlat<int, DisableHardwareIntrinsics>(Size, EqualityComparer<int>.Default);
flatAvx = new CmSketchFlat<int, DetectIsa>(Size, EqualityComparer<int>.Default);

blockStd = new CmSketchCore<int, DisableHardwareIntrinsics>(Size, EqualityComparer<int>.Default);
blockStdNoUnroll = new CmSketchNoPin<int, DisableHardwareIntrinsics>(Size, EqualityComparer<int>.Default);
blockStdUnroll = new CmSketchCore<int, DisableHardwareIntrinsics>(Size, EqualityComparer<int>.Default);
blockAvxNoPin = new CmSketchNoPin<int, DetectIsa>(Size, EqualityComparer<int>.Default);
blockAvx = new CmSketchCore<int, DetectIsa>(Size, EqualityComparer<int>.Default);
}
Expand Down Expand Up @@ -67,7 +69,17 @@ public int FrequencyBlock()
{
int count = 0;
for (int i = 0; i < iterations; i++)
count += blockStd.EstimateFrequency(i) > blockStd.EstimateFrequency(i + 1) ? 1 : 0;
count += blockStdNoUnroll.EstimateFrequency(i) > blockStdNoUnroll.EstimateFrequency(i + 1) ? 1 : 0;

return count;
}

[Benchmark(OperationsPerInvoke = iterations)]
public int FrequencyBlockUnroll()
{
int count = 0;
for (int i = 0; i < iterations; i++)
count += blockStdUnroll.EstimateFrequency(i) > blockStdUnroll.EstimateFrequency(i + 1) ? 1 : 0;

return count;
}
Expand Down
17 changes: 14 additions & 3 deletions BitFaster.Caching.Benchmarks/Lfu/SketchIncrement.cs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@ public class SketchIncrement
private CmSketchFlat<int, DisableHardwareIntrinsics> flatStd;
private CmSketchFlat<int, DetectIsa> flatAvx;

private CmSketchCore<int, DisableHardwareIntrinsics> blockStd;
private CmSketchNoPin<int, DisableHardwareIntrinsics> blockStdNoUnroll;
private CmSketchCore<int, DisableHardwareIntrinsics> blockStdUnroll;
private CmSketchNoPin<int, DetectIsa> blockAvxNoPin;
private CmSketchCore<int, DetectIsa> blockAvx;

Expand All @@ -37,7 +38,8 @@ public void Setup()
flatStd = new CmSketchFlat<int, DisableHardwareIntrinsics>(Size, EqualityComparer<int>.Default);
flatAvx = new CmSketchFlat<int, DetectIsa>(Size, EqualityComparer<int>.Default);

blockStd = new CmSketchCore<int, DisableHardwareIntrinsics>(Size, EqualityComparer<int>.Default);
blockStdNoUnroll = new CmSketchNoPin<int, DisableHardwareIntrinsics>(Size, EqualityComparer<int>.Default);
blockStdUnroll = new CmSketchCore<int, DisableHardwareIntrinsics>(Size, EqualityComparer<int>.Default);
blockAvxNoPin = new CmSketchNoPin<int, DetectIsa>(Size, EqualityComparer<int>.Default);
blockAvx = new CmSketchCore<int, DetectIsa>(Size, EqualityComparer<int>.Default);
}
Expand Down Expand Up @@ -65,7 +67,16 @@ public void IncBlock()
{
for (int i = 0; i < iterations; i++)
{
blockStd.Increment(i);
blockStdNoUnroll.Increment(i);
}
}

[Benchmark(OperationsPerInvoke = iterations)]
public void IncBlockUnroll()
{
for (int i = 0; i < iterations; i++)
{
blockStdUnroll.Increment(i);
}
}

Expand Down
2 changes: 1 addition & 1 deletion BitFaster.Caching/BitFaster.Caching.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

<PropertyGroup>
<TargetFrameworks>netstandard2.0;netcoreapp3.1;net6.0</TargetFrameworks>
<LangVersion>10.0</LangVersion>
<LangVersion>11.0</LangVersion>
<Authors>Alex Peck</Authors>
<Company />
<Product>BitFaster.Caching</Product>
Expand Down
63 changes: 42 additions & 21 deletions BitFaster.Caching/Lfu/CmSketchCore.cs
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#if !NETSTANDARD2_0
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;

#endif

#if NET6_0_OR_GREATER
Expand Down Expand Up @@ -169,41 +170,61 @@ private void EnsureCapacity(long maximumSize)

private unsafe int EstimateFrequencyStd(T value)
{
var count = stackalloc int[4];
int blockHash = Spread(comparer.GetHashCode(value));
int counterHash = Rehash(blockHash);
int block = (blockHash & blockMask) << 3;

for (int i = 0; i < 4; i++)
{
int h = (int)((uint)counterHash >> (i << 3));
int index = (h >> 1) & 15;
int offset = h & 1;
count[i] = (int)(((ulong)table[block + offset + (i << 1)] >> (index << 2)) & 0xfL);
}
return Math.Min(Math.Min(count[0], count[1]), Math.Min(count[2], count[3]));
// Loop unrolling improves throughput
int h0 = counterHash;
int h1 = counterHash >>> 8;
int h2 = counterHash >>> 16;
int h3 = counterHash >>> 24;

int index0 = (h0 >>> 1) & 15;
int index1 = (h1 >>> 1) & 15;
int index2 = (h2 >>> 1) & 15;
int index3 = (h3 >>> 1) & 15;

int slot0 = block + (h0 & 1);
int slot1 = block + (h1 & 1) + 2;
int slot2 = block + (h2 & 1) + 4;
int slot3 = block + (h3 & 1) + 6;

int count0 = (int)((table[slot0] >>> (index0 << 2)) & 0xfL);
int count1 = (int)((table[slot1] >>> (index1 << 2)) & 0xfL);
int count2 = (int)((table[slot2] >>> (index2 << 2)) & 0xfL);
int count3 = (int)((table[slot3] >>> (index3 << 2)) & 0xfL);

return Math.Min(Math.Min(count0, count1), Math.Min(count2, count3));
}

private unsafe void IncrementStd(T value)
{
var index = stackalloc int[8];
int blockHash = Spread(comparer.GetHashCode(value));
int counterHash = Rehash(blockHash);
int block = (blockHash & blockMask) << 3;

for (int i = 0; i < 4; i++)
{
int h = (int)((uint)counterHash >> (i << 3));
index[i] = (h >> 1) & 15;
int offset = h & 1;
index[i + 4] = block + offset + (i << 1);
}
// Loop unrolling improves throughput
int h0 = counterHash;
int h1 = counterHash >>> 8;
int h2 = counterHash >>> 16;
int h3 = counterHash >>> 24;

int index0 = (h0 >>> 1) & 15;
int index1 = (h1 >>> 1) & 15;
int index2 = (h2 >>> 1) & 15;
int index3 = (h3 >>> 1) & 15;

int slot0 = block + (h0 & 1);
int slot1 = block + (h1 & 1) + 2;
int slot2 = block + (h2 & 1) + 4;
int slot3 = block + (h3 & 1) + 6;

bool added =
IncrementAt(index[4], index[0])
| IncrementAt(index[5], index[1])
| IncrementAt(index[6], index[2])
| IncrementAt(index[7], index[3]);
IncrementAt(slot0, index0)
| IncrementAt(slot1, index1)
| IncrementAt(slot2, index2)
| IncrementAt(slot3, index3);

if (added && (++size == sampleSize))
{
Expand Down

0 comments on commit 410dae2

Please sign in to comment.