From b26e6326e40123d2e30be1c1216b809acc164a42 Mon Sep 17 00:00:00 2001 From: Jackson Owens Date: Thu, 18 Jul 2024 16:11:03 -0400 Subject: [PATCH] colblk: implement a common interface across column data accessors Introduce a new generic Array[T] type that allows indexed access to data and implement it across the various accessors. --- sstable/colblk/bitmap.go | 7 +++++-- sstable/colblk/bitmap_test.go | 12 ++++++------ sstable/colblk/block_test.go | 2 +- sstable/colblk/column.go | 6 ++++++ sstable/colblk/prefix_bytes.go | 11 +++++++++++ sstable/colblk/raw_bytes.go | 3 +++ sstable/colblk/unsafe_slice.go | 6 +++--- 7 files changed, 35 insertions(+), 12 deletions(-) diff --git a/sstable/colblk/bitmap.go b/sstable/colblk/bitmap.go index 1a542c4360..a89d256d6d 100644 --- a/sstable/colblk/bitmap.go +++ b/sstable/colblk/bitmap.go @@ -30,6 +30,9 @@ type Bitmap struct { bitCount int } +// Assert that Bitmap implements Array[bool]. +var _ Array[bool] = Bitmap{} + // DecodeBitmap decodes the structure of a Bitmap and returns a Bitmap that // reads from b supporting bitCount logical bits. No bounds checking is // performed, so the caller must guarantee the bitmap is appropriately sized and @@ -50,8 +53,8 @@ func DecodeBitmap(b []byte, off uint32, bitCount int) (bitmap Bitmap, endOffset // Assert that DecodeBitmap implements DecodeFunc. var _ DecodeFunc[Bitmap] = DecodeBitmap -// Get returns true if the bit at position i is set and false otherwise. -func (b Bitmap) Get(i int) bool { +// At returns true if the bit at position i is set and false otherwise. +func (b Bitmap) At(i int) bool { return (b.data.At(i>>6 /* i/64 */) & (1 << uint(i%64))) != 0 } diff --git a/sstable/colblk/bitmap_test.go b/sstable/colblk/bitmap_test.go index ff253e1065..58c5e320af 100644 --- a/sstable/colblk/bitmap_test.go +++ b/sstable/colblk/bitmap_test.go @@ -76,7 +76,7 @@ func dumpBitmap(w io.Writer, b Bitmap) { if i > 0 && i%64 == 0 { w.Write([]byte{'\n'}) } - if b.Get(i) { + if b.At(i) { w.Write([]byte{'1'}) } else { w.Write([]byte{'0'}) @@ -104,31 +104,31 @@ func TestBitmapRandom(t *testing.T) { bitmap, endOffset := DecodeBitmap(data, 0, size) require.Equal(t, uint32(len(data)), endOffset) for i := 0; i < size; i++ { - if got := bitmap.Get(i); got != v[i] { + if got := bitmap.At(i); got != v[i] { t.Fatalf("b.Get(%d) = %t; want %t", i, got, v[i]) } } for i := 0; i < size; i++ { succ := bitmap.Successor(i) // Ensure that Successor always returns the index of a set bit. - if succ != size && !bitmap.Get(succ) { + if succ != size && !bitmap.At(succ) { t.Fatalf("b.Successor(%d) = %d; bit at index %d is not set", i, succ, succ) } pred := bitmap.Predecessor(i) // Ensure that Predecessor always returns the index of a set bit. - if pred >= 0 && !bitmap.Get(pred) { + if pred >= 0 && !bitmap.At(pred) { t.Fatalf("b.Predecessor(%d) = %d; bit at index %d is not set", i, pred, pred) } // Ensure there are no set bits between i and succ. for j := i; j < succ; j++ { - if bitmap.Get(j) { + if bitmap.At(j) { t.Fatalf("b.Successor(%d) = %d; bit at index %d is set", i, succ, j) } } // Ensure there are no set bits between pred and i. for j := pred + 1; j < i; j++ { - if bitmap.Get(j) { + if bitmap.At(j) { t.Fatalf("b.Predecessor(%d) = %d; bit at index %d is set", i, pred, j) } } diff --git a/sstable/colblk/block_test.go b/sstable/colblk/block_test.go index eef8e92c58..b352625b75 100644 --- a/sstable/colblk/block_test.go +++ b/sstable/colblk/block_test.go @@ -303,7 +303,7 @@ func testRandomBlock(t *testing.T, rng *rand.Rand, rows int, schema []DataType) b := r.Bitmap(col) vals := make([]bool, r.header.Rows) for i := range vals { - vals[i] = b.Get(i) + vals[i] = b.At(i) } got = vals case DataTypeUint8: diff --git a/sstable/colblk/column.go b/sstable/colblk/column.go index 4e605169d4..bcbb3a9b03 100644 --- a/sstable/colblk/column.go +++ b/sstable/colblk/column.go @@ -114,3 +114,9 @@ type Encoder interface { // The rows argument must be number of logical rows encoded within the data // structure. type DecodeFunc[T any] func(buf []byte, offset uint32, rows int) (decoded T, nextOffset uint32) + +// An Array provides indexed access to an array of values. +type Array[V any] interface { + // At returns the i'th value in the array. + At(i int) V +} diff --git a/sstable/colblk/prefix_bytes.go b/sstable/colblk/prefix_bytes.go index 69a372ff50..7469cafa25 100644 --- a/sstable/colblk/prefix_bytes.go +++ b/sstable/colblk/prefix_bytes.go @@ -10,6 +10,7 @@ import ( "fmt" "io" "math/bits" + "slices" "strings" "unsafe" @@ -171,6 +172,9 @@ type PrefixBytes struct { rawBytes RawBytes } +// Assert that PrefixBytes implements Array[[]byte]. +var _ Array[[]byte] = PrefixBytes{} + // DecodePrefixBytes decodes the structure of a PrefixBytes, constructing an // accessor for an array of lexicographically sorted byte slices constructed by // PrefixBytesBuilder. Count must be the number of logical slices within the @@ -205,6 +209,13 @@ func DecodePrefixBytes( // Assert that DecodePrefixBytes implements DecodeFunc. var _ DecodeFunc[PrefixBytes] = DecodePrefixBytes +// At returns the i'th []byte slice in the PrefixBytes. At must allocate, so +// callers should prefer accessing a slice's constituent components through +// SharedPrefix, BundlePrefix and RowSuffix. +func (b PrefixBytes) At(i int) []byte { + return slices.Concat(b.SharedPrefix(), b.RowBundlePrefix(i), b.RowSuffix(i)) +} + // SharedPrefix return a []byte of the shared prefix that was extracted from // all of the values in the Bytes vector. The returned slice should not be // mutated. diff --git a/sstable/colblk/raw_bytes.go b/sstable/colblk/raw_bytes.go index 2f400c70af..c7eb8e6714 100644 --- a/sstable/colblk/raw_bytes.go +++ b/sstable/colblk/raw_bytes.go @@ -49,6 +49,9 @@ type RawBytes struct { data unsafe.Pointer } +// Assert that RawBytes implements Array[[]byte]. +var _ Array[[]byte] = RawBytes{} + // DecodeRawBytes decodes the structure of a RawBytes, constructing an accessor // for an array of byte slices constructed by RawBytesBuilder. Count must be the // number of byte slices within the array. diff --git a/sstable/colblk/unsafe_slice.go b/sstable/colblk/unsafe_slice.go index 3bf1830448..c857a760db 100644 --- a/sstable/colblk/unsafe_slice.go +++ b/sstable/colblk/unsafe_slice.go @@ -67,6 +67,9 @@ type UnsafeIntegerSlice[T constraints.Integer] struct { deltaWidth uintptr } +// Assert that UnsafeIntegerSlice implements Array. +var _ Array[uint8] = UnsafeIntegerSlice[uint8]{} + // DecodeUnsafeIntegerSlice decodes the structure of a slice of uints from a // byte slice. func DecodeUnsafeIntegerSlice[T constraints.Integer]( @@ -109,9 +112,6 @@ func makeUnsafeIntegerSlice[T constraints.Integer]( } } -// TODO(jackson): Remove when more of the read path is hooked up. -var _ = makeUnsafeIntegerSlice[uint64] - // At returns the `i`-th element of the slice. func (s UnsafeIntegerSlice[T]) At(i int) T { // TODO(jackson): Experiment with other alternatives that might be faster