Skip to content

Commit

Permalink
colblk: add SeekUnsetBitGE/LE
Browse files Browse the repository at this point in the history
These variants will be useful for skipping blocks of set bits, for
example to skip obsolete keys.
  • Loading branch information
RaduBerinde committed Sep 26, 2024
1 parent 7fad086 commit 50864fa
Show file tree
Hide file tree
Showing 3 changed files with 140 additions and 5 deletions.
66 changes: 61 additions & 5 deletions sstable/colblk/bitmap.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ package colblk
import (
"fmt"
"io"
"math"
"math/bits"
"slices"
"strings"
Expand Down Expand Up @@ -85,7 +86,7 @@ func (b Bitmap) SeekSetBitGE(i int) int {
wordIdx := i >> 6 // i/64
// Fast path for common case of reasonably dense bitmaps; if the there's a
// bit ≥ i set in the same word, return it.
if next := nextBitInWord(b.data.At(wordIdx), uint(i%64)); next < 64 {
if next := nextBitInWord(b.data.At(wordIdx), uint(i)&63); next < 64 {
return wordIdx<<6 + next
}

Expand Down Expand Up @@ -120,18 +121,18 @@ func (b Bitmap) SeekSetBitGE(i int) int {
return (wordIdx << 6) + bits.TrailingZeros64(b.data.At(wordIdx))
}

// Predecessor returns the previous bit less than or equal to i set in the
// SeekSetBitLE returns the previous bit less than or equal to i set in the
// bitmap. The i parameter must be in [0, bitCount). Returns -1 if no previous
// bit is set.
func (b Bitmap) Predecessor(i int) int {
func (b Bitmap) SeekSetBitLE(i int) int {
if b.data.ptr == nil {
// Zero bitmap case.
return -1
}
wordIdx := i >> 6 // i/64
// Fast path for common case of reasonably dense bitmaps; if the there's a
// bit ≤ i set in the same word, return it.
if prev := prevBitInWord(b.data.At(wordIdx), uint(i%64)); prev >= 0 {
if prev := prevBitInWord(b.data.At(wordIdx), uint(i)&63); prev >= 0 {
return (wordIdx << 6) + prev
}

Expand Down Expand Up @@ -167,6 +168,61 @@ func (b Bitmap) Predecessor(i int) int {
return (wordIdx << 6) + 63 - bits.LeadingZeros64(b.data.At(wordIdx))
}

// SeekUnsetBitGE returns the next bit greater than or equal to i that is unset
// in the bitmap. The i parameter must be in [0, bitCount). Returns the number
// of bits represented by the bitmap if no next bit is unset.
func (b Bitmap) SeekUnsetBitGE(i int) int {
if b.data.ptr == nil {
// Zero bitmap case.
return i
}

wordIdx := i >> 6 // i/64
// If the there's a bit ≥ i unset in the same word, return it.
if next := nextBitInWord(^b.data.At(wordIdx), uint(i)&63); next < 64 {
return wordIdx<<6 + next
}
numWords := (b.bitCount + 63) >> 6
var word uint64
for wordIdx++; ; wordIdx++ {
if wordIdx >= numWords {
return b.bitCount
}
word = b.data.At(wordIdx)
if word != math.MaxUint64 {
break
}
}
return wordIdx<<6 + bits.TrailingZeros64(^word)
}

// SeekUnsetBitLE returns the previous bit less than or equal to i set in the
// bitmap. The i parameter must be in [0, bitCount). Returns -1 if no previous
// bit is unset.
func (b Bitmap) SeekUnsetBitLE(i int) int {
if b.data.ptr == nil {
// Zero bitmap case.
return i
}

wordIdx := i >> 6 // i/64
// If there's a bit ≤ i unset in the same word, return it.
if prev := prevBitInWord(^b.data.At(wordIdx), uint(i)&63); prev >= 0 {
return (wordIdx << 6) + prev
}
var word uint64
for wordIdx--; ; wordIdx-- {
if wordIdx < 0 {
return -1
}
word = b.data.At(wordIdx)
if word != math.MaxUint64 {
break
}
}
return (wordIdx << 6) + 63 - bits.LeadingZeros64(^word)
}

func (b Bitmap) summaryTableBounds() (startOffset, endOffset int) {
startOffset = (b.bitCount + 63) >> 6
endOffset = startOffset + startOffset>>6
Expand Down Expand Up @@ -214,7 +270,7 @@ func (b *BitmapBuilder) Set(i int) {
for len(b.words) <= w {
b.words = append(b.words, 0)
}
b.words[w] |= 1 << uint(i%64)
b.words[w] |= 1 << uint(i&63)
}

// isZero returns true if no bits are set and Invert was not called.
Expand Down
39 changes: 39 additions & 0 deletions sstable/colblk/bitmap_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,20 @@ func TestBitmapFixed(t *testing.T) {
fmt.Fprintf(&buf, "bitmap.SeekSetBitLE(%d) = %d\n", idx, bitmap.SeekSetBitLE(idx))
}

case "seek-unset-ge":
var indexes []int
td.ScanArgs(t, "indexes", &indexes)
for _, idx := range indexes {
fmt.Fprintf(&buf, "bitmap.SeekUnsetBitGE(%d) = %d\n", idx, bitmap.SeekUnsetBitGE(idx))
}

case "seek-unset-le":
var indexes []int
td.ScanArgs(t, "indexes", &indexes)
for _, idx := range indexes {
fmt.Fprintf(&buf, "bitmap.SeekUnsetBitLE(%d) = %d\n", idx, bitmap.SeekUnsetBitLE(idx))
}

default:
panic(fmt.Sprintf("unknown command: %s", td.Cmd))
}
Expand Down Expand Up @@ -171,6 +185,31 @@ func TestBitmapRandom(t *testing.T) {
}
}
}
for i := 0; i < size; i++ {
succ := bitmap.SeekUnsetBitGE(i)
// Ensure that SeekUnsetBitGE always returns the index of an unset bit.
if succ != size && bitmap.At(succ) {
t.Fatalf("b.SeekUnsetBitGE(%d) = %d; bit at index %d is set", i, succ, succ)
}
pred := bitmap.SeekUnsetBitLE(i)
// Ensure that SeekUnsetBitLE always returns the index of an unset bit.
if pred >= 0 && bitmap.At(pred) {
t.Fatalf("b.SeekUnsetBitLE(%d) = %d; bit at index %d is set", i, pred, pred)
}

// Ensure there are only set bits between i and succ.
for j := i; j < succ; j++ {
if !bitmap.At(j) {
t.Fatalf("b.SeekUnsetBitGE(%d) = %d; bit at index %d is unset", i, succ, j)
}
}
// Ensure there are only set bits between pred and i.
for j := pred + 1; j < i; j++ {
if !bitmap.At(j) {
t.Fatalf("b.SeekUnsetBitLE(%d) = %d; bit at index %d is unset", i, pred, j)
}
}
}
}

fixedProbabilities := []float64{0.00001, 0.0001, 0.001, 0.1, 0.5, 0.9999}
Expand Down
40 changes: 40 additions & 0 deletions sstable/colblk/testdata/bitmap
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,46 @@ bitmap.SeekSetBitLE(14) = 14
bitmap.SeekSetBitLE(15) = 14
bitmap.SeekSetBitLE(16) = 14

seek-unset-ge indexes=(0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16)
----
bitmap.SeekUnsetBitGE(0) = 1
bitmap.SeekUnsetBitGE(1) = 1
bitmap.SeekUnsetBitGE(2) = 3
bitmap.SeekUnsetBitGE(3) = 3
bitmap.SeekUnsetBitGE(4) = 5
bitmap.SeekUnsetBitGE(5) = 5
bitmap.SeekUnsetBitGE(6) = 9
bitmap.SeekUnsetBitGE(7) = 9
bitmap.SeekUnsetBitGE(8) = 9
bitmap.SeekUnsetBitGE(9) = 9
bitmap.SeekUnsetBitGE(10) = 10
bitmap.SeekUnsetBitGE(11) = 11
bitmap.SeekUnsetBitGE(12) = 15
bitmap.SeekUnsetBitGE(13) = 15
bitmap.SeekUnsetBitGE(14) = 15
bitmap.SeekUnsetBitGE(15) = 15
bitmap.SeekUnsetBitGE(16) = 16

seek-unset-le indexes=(0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16)
----
bitmap.SeekUnsetBitLE(0) = -1
bitmap.SeekUnsetBitLE(1) = 1
bitmap.SeekUnsetBitLE(2) = 1
bitmap.SeekUnsetBitLE(3) = 3
bitmap.SeekUnsetBitLE(4) = 3
bitmap.SeekUnsetBitLE(5) = 5
bitmap.SeekUnsetBitLE(6) = 5
bitmap.SeekUnsetBitLE(7) = 5
bitmap.SeekUnsetBitLE(8) = 5
bitmap.SeekUnsetBitLE(9) = 9
bitmap.SeekUnsetBitLE(10) = 10
bitmap.SeekUnsetBitLE(11) = 11
bitmap.SeekUnsetBitLE(12) = 11
bitmap.SeekUnsetBitLE(13) = 11
bitmap.SeekUnsetBitLE(14) = 11
bitmap.SeekUnsetBitLE(15) = 15
bitmap.SeekUnsetBitLE(16) = 16

# Test calling Invert() before finishing.

build invert
Expand Down

0 comments on commit 50864fa

Please sign in to comment.