diff --git a/sstable/colblk/bitmap.go b/sstable/colblk/bitmap.go index 2fd1429d07..1a542c4360 100644 --- a/sstable/colblk/bitmap.go +++ b/sstable/colblk/bitmap.go @@ -30,22 +30,26 @@ type Bitmap struct { bitCount int } -// MakeBitmap returns a Bitmap that reads from b supporting bitCount logical -// bits. No bounds checking is performed, so the caller must guarantee the -// bitmap is appropriately sized and the provided bitCount correctly identifies -// the number of bits in the bitmap. -func MakeBitmap(b []byte, off uint32, bitCount int) Bitmap { - if len(b) < int(off)+bitmapRequiredSize(bitCount) { +// DecodeBitmap decodes the structure of a Bitmap and returns a Bitmap that +// reads from b supporting bitCount logical bits. No bounds checking is +// performed, so the caller must guarantee the bitmap is appropriately sized and +// the provided bitCount correctly identifies the number of bits in the bitmap. +func DecodeBitmap(b []byte, off uint32, bitCount int) (bitmap Bitmap, endOffset uint32) { + sz := bitmapRequiredSize(bitCount) + off = align(off, align64) + if len(b) < int(off)+sz { panic(errors.AssertionFailedf("bitmap of %d bits requires at least %d bytes; provided with %d-byte slice", bitCount, bitmapRequiredSize(bitCount), len(b[off:]))) } - off = align(off, align64) return Bitmap{ data: makeUnsafeRawSlice[uint64](unsafe.Pointer(&b[off])), bitCount: bitCount, - } + }, off + uint32(sz) } +// Assert that DecodeBitmap implements DecodeFunc. +var _ DecodeFunc[Bitmap] = DecodeBitmap + // Get returns true if the bit at position i is set and false otherwise. func (b Bitmap) Get(i int) bool { return (b.data.At(i>>6 /* i/64 */) & (1 << uint(i%64))) != 0 diff --git a/sstable/colblk/bitmap_test.go b/sstable/colblk/bitmap_test.go index f3da1c3fc9..ff253e1065 100644 --- a/sstable/colblk/bitmap_test.go +++ b/sstable/colblk/bitmap_test.go @@ -14,6 +14,7 @@ import ( "github.com/cockroachdb/datadriven" "github.com/cockroachdb/pebble/internal/binfmt" + "github.com/stretchr/testify/require" "golang.org/x/exp/rand" ) @@ -43,7 +44,7 @@ func TestBitmapFixed(t *testing.T) { } _ = builder.Finish(0, n, 0, data) - bitmap = MakeBitmap(data, 0, n) + bitmap, _ = DecodeBitmap(data, 0, n) dumpBitmap(&buf, bitmap) fmt.Fprint(&buf, "\nBinary representation:\n") f := binfmt.New(data) @@ -100,7 +101,8 @@ func TestBitmapRandom(t *testing.T) { } data := make([]byte, builder.Size(size, 0)) _ = builder.Finish(0, size, 0, data) - bitmap := MakeBitmap(data, 0, size) + bitmap, endOffset := DecodeBitmap(data, 0, size) + require.Equal(t, uint32(len(data)), endOffset) for i := 0; i < size; i++ { if got := bitmap.Get(i); got != v[i] { t.Fatalf("b.Get(%d) = %t; want %t", i, got, v[i]) diff --git a/sstable/colblk/block.go b/sstable/colblk/block.go index 96526a6cfd..fb40480a57 100644 --- a/sstable/colblk/block.go +++ b/sstable/colblk/block.go @@ -165,6 +165,22 @@ func FinishBlock(rows int, writers []ColumnWriter) []byte { return buf } +// DecodeColumn decodes the col'th column of the provided reader's block as a +// column of dataType using decodeFunc. +func DecodeColumn[V any](r *BlockReader, col int, dataType DataType, decodeFunc DecodeFunc[V]) V { + if uint16(col) >= r.header.Columns { + panic(errors.AssertionFailedf("column %d is out of range [0, %d)", col, r.header.Columns)) + } + if dt := r.dataType(col); dt != dataType { + panic(errors.AssertionFailedf("column %d is type %s; not %s", col, dt, dataType)) + } + v, endOffset := decodeFunc(r.data, r.pageStart(col), int(r.header.Rows)) + if nextColumnOff := r.pageStart(col + 1); endOffset != nextColumnOff { + panic(errors.AssertionFailedf("column %d decoded to offset %d; expected %d", col, endOffset, nextColumnOff)) + } + return v +} + // A BlockReader holds metadata for accessing the columns of a columnar block. type BlockReader struct { data []byte @@ -196,74 +212,53 @@ func (r *BlockReader) DataType(col int) DataType { if uint16(col) >= r.header.Columns { panic(errors.AssertionFailedf("column %d is out of range [0, %d)", col, r.header.Columns)) } - return DataType(*(*uint8)(r.pointer(r.customHeaderSize + 7 + 5*uint32(col)))) + return r.dataType(col) +} + +func (r *BlockReader) dataType(col int) DataType { + return DataType(*(*uint8)(r.pointer(r.customHeaderSize + blockHeaderBaseSize + columnHeaderSize*uint32(col)))) } // Bitmap retrieves the col'th column as a bitmap. The column must be of type // DataTypeBool. func (r *BlockReader) Bitmap(col int) Bitmap { - if dt := r.DataType(col); dt != DataTypeBool { - panic(errors.AssertionFailedf("column %d is not a Bitmap; holds data type %s", dt)) - } - return MakeBitmap(r.data, r.pageStart(col), int(r.header.Rows)) + return DecodeColumn(r, col, DataTypeBool, DecodeBitmap) } // RawBytes retrieves the col'th column as a column of byte slices. The column // must be of type DataTypeBytes. func (r *BlockReader) RawBytes(col int) RawBytes { - if dt := r.DataType(col); dt != DataTypeBytes { - panic(errors.AssertionFailedf("column %d is not a RawBytes column; holds data type %s", dt)) - } - return MakeRawBytes(int(r.header.Rows), r.data, r.pageStart(col)) + return DecodeColumn(r, col, DataTypeBytes, DecodeRawBytes) } // PrefixBytes retrieves the col'th column as a prefix-compressed byte slice column. The column // must be of type DataTypePrefixBytes. func (r *BlockReader) PrefixBytes(col int) PrefixBytes { - if dt := r.DataType(col); dt != DataTypePrefixBytes { - panic(errors.AssertionFailedf("column %d is not a PrefixBytes column; holds data type %s", dt)) - } - return MakePrefixBytes(int(r.header.Rows), r.data, r.pageStart(col)) + return DecodeColumn(r, col, DataTypePrefixBytes, DecodePrefixBytes) } // Uint8s retrieves the col'th column as a column of uint8s. The column must be // of type DataTypeUint8. func (r *BlockReader) Uint8s(col int) UnsafeUint8s { - if dt := r.DataType(col); dt != DataTypeUint8 { - panic(errors.AssertionFailedf("column %d is not a Uint8 column; holds data type %s", col, dt)) - } - _, s := readUnsafeIntegerSlice[uint8](int(r.header.Rows), r.data, r.pageStart(col)) - return s + return DecodeColumn(r, col, DataTypeUint8, DecodeUnsafeIntegerSlice[uint8]) } // Uint16s retrieves the col'th column as a column of uint8s. The column must be // of type DataTypeUint16. func (r *BlockReader) Uint16s(col int) UnsafeUint16s { - if dt := r.DataType(col); dt != DataTypeUint16 { - panic(errors.AssertionFailedf("column %d is not a Uint16 column; holds data type %s", col, dt)) - } - _, s := readUnsafeIntegerSlice[uint16](int(r.header.Rows), r.data, r.pageStart(col)) - return s + return DecodeColumn(r, col, DataTypeUint16, DecodeUnsafeIntegerSlice[uint16]) } // Uint32s retrieves the col'th column as a column of uint32s. The column must be // of type DataTypeUint32. func (r *BlockReader) Uint32s(col int) UnsafeUint32s { - if dt := r.DataType(col); dt != DataTypeUint32 { - panic(errors.AssertionFailedf("column %d is not a Uint32 column; holds data type %s", col, dt)) - } - _, s := readUnsafeIntegerSlice[uint32](int(r.header.Rows), r.data, r.pageStart(col)) - return s + return DecodeColumn(r, col, DataTypeUint32, DecodeUnsafeIntegerSlice[uint32]) } // Uint64s retrieves the col'th column as a column of uint64s. The column must be // of type DataTypeUint64. func (r *BlockReader) Uint64s(col int) UnsafeUint64s { - if dt := r.DataType(col); dt != DataTypeUint64 { - panic(errors.AssertionFailedf("column %d is not a Uint64 column; holds data type %s", col, dt)) - } - _, s := readUnsafeIntegerSlice[uint64](int(r.header.Rows), r.data, r.pageStart(col)) - return s + return DecodeColumn(r, col, DataTypeUint64, DecodeUnsafeIntegerSlice[uint64]) } func (r *BlockReader) pageStart(col int) uint32 { diff --git a/sstable/colblk/column.go b/sstable/colblk/column.go index 9b1d6733f6..4e605169d4 100644 --- a/sstable/colblk/column.go +++ b/sstable/colblk/column.go @@ -108,3 +108,9 @@ type Encoder interface { // state to the provided writer. WriteDebug(w io.Writer, rows int) } + +// A DecodeFunc decodes a data structure from a byte slice, returning an +// accessor for the data and the offset of the first byte after the structure. +// The rows argument must be number of logical rows encoded within the data +// structure. +type DecodeFunc[T any] func(buf []byte, offset uint32, rows int) (decoded T, nextOffset uint32) diff --git a/sstable/colblk/prefix_bytes.go b/sstable/colblk/prefix_bytes.go index fbb5c96134..69a372ff50 100644 --- a/sstable/colblk/prefix_bytes.go +++ b/sstable/colblk/prefix_bytes.go @@ -171,10 +171,13 @@ type PrefixBytes struct { rawBytes RawBytes } -// MakePrefixBytes constructs an accessor for an array of lexicographically -// sorted byte slices constructed by PrefixBytesBuilder. Count must be the -// number of logical slices within the array. -func MakePrefixBytes(count int, b []byte, offset uint32) PrefixBytes { +// DecodePrefixBytes decodes the structure of a PrefixBytes, constructing an +// accessor for an array of lexicographically sorted byte slices constructed by +// PrefixBytesBuilder. Count must be the number of logical slices within the +// array. +func DecodePrefixBytes( + b []byte, offset uint32, count int, +) (prefixBytes PrefixBytes, endOffset uint32) { if count == 0 { panic(errors.AssertionFailedf("empty PrefixBytes")) } @@ -185,19 +188,23 @@ func MakePrefixBytes(count int, b []byte, offset uint32) PrefixBytes { calc := makeBundleCalc(bundleShift) nBundles := calc.bundleCount(count) + rb, endOffset := DecodeRawBytes(b, offset+1, count+nBundles) pb := PrefixBytes{ bundleCalc: calc, rows: count, - rawBytes: MakeRawBytes(count+nBundles, b, offset+1), + rawBytes: rb, } // We always set the base to zero. if pb.rawBytes.offsets.base != 0 { panic(errors.AssertionFailedf("unexpected non-zero base in offsets")) } pb.sharedPrefixLen = int(pb.rawBytes.offsets.At(0)) - return pb + return pb, endOffset } +// Assert that DecodePrefixBytes implements DecodeFunc. +var _ DecodeFunc[PrefixBytes] = DecodePrefixBytes + // SharedPrefix return a []byte of the shared prefix that was extracted from // all of the values in the Bytes vector. The returned slice should not be // mutated. @@ -419,7 +426,7 @@ func prefixBytesToBinFormatter(f *binfmt.Formatter, count int, sliceFormatter fu if sliceFormatter == nil { sliceFormatter = defaultSliceFormatter } - pb := MakePrefixBytes(count, f.Data(), uint32(f.Offset())) + pb, _ := DecodePrefixBytes(f.Data(), uint32(f.Offset()), count) f.CommentLine("PrefixBytes") f.HexBytesln(1, "bundleSize: %d", 1<