From bb1c2c45efb1a61bee0011117d200dbbf3ff06eb Mon Sep 17 00:00:00 2001 From: Jackson Owens Date: Thu, 29 Aug 2024 15:15:25 -0400 Subject: [PATCH] colblk: add IndexBlockWriter.UnsafeSeparator Add an API for retrieving a separator contained in an entry written to a pending index block. This will be used by the sstable writer when finishing an index block in a sstable with a two-level index. --- sstable/colblk/data_block.go | 6 ++++-- sstable/colblk/index_block.go | 10 ++++++++++ sstable/colblk/index_block_test.go | 4 +++- sstable/colblk/raw_bytes.go | 11 ++++++++--- sstable/colblk/testdata/index_block | 2 ++ sstable/colblk/unsafe_slice.go | 2 +- 6 files changed, 28 insertions(+), 7 deletions(-) diff --git a/sstable/colblk/data_block.go b/sstable/colblk/data_block.go index 1d252dbd2e..40c9d61005 100644 --- a/sstable/colblk/data_block.go +++ b/sstable/colblk/data_block.go @@ -181,7 +181,8 @@ func (w *defaultKeyWriter) ComparePrev(key []byte) KeyComparison { // CommonPrefixLen, it would sort after [key].) if cmpv.CommonPrefixLen == cmpv.PrefixLen { // The keys share the same MVCC prefix. Compare the suffixes. - cmpv.UserKeyComparison = int32(w.comparer.CompareSuffixes(key[cmpv.PrefixLen:], w.suffixes.LastSlice())) + cmpv.UserKeyComparison = int32(w.comparer.CompareSuffixes(key[cmpv.PrefixLen:], + w.suffixes.UnsafeGet(w.suffixes.rows-1))) if invariants.Enabled { if !w.comparer.Equal(lp, key[:cmpv.PrefixLen]) { panic(errors.AssertionFailedf("keys have different logical prefixes: %q != %q", lp, key[:cmpv.PrefixLen])) @@ -206,7 +207,8 @@ func (w *defaultKeyWriter) ComparePrev(key []byte) KeyComparison { // so the UserKeyComparison should be equal to the result of comparing // the prefixes and nonzero. if cmpv.UserKeyComparison == 0 { - panic(errors.AssertionFailedf("user keys should not be equal: %q+%q, %q", lp, w.suffixes.LastSlice(), key)) + panic(errors.AssertionFailedf("user keys should not be equal: %q+%q, %q", + lp, w.suffixes.UnsafeGet(w.suffixes.rows-1), key)) } if v := w.comparer.Compare(key, lp); v != int(cmpv.UserKeyComparison) { panic(errors.AssertionFailedf("user key comparison mismatch: Compare(%q, %q) = %d ≠ %d", diff --git a/sstable/colblk/index_block.go b/sstable/colblk/index_block.go index 71408989c9..861d1f6fec 100644 --- a/sstable/colblk/index_block.go +++ b/sstable/colblk/index_block.go @@ -69,6 +69,11 @@ func (w *IndexBlockWriter) Reset() { w.enc.reset() } +// Rows returns the number of entries in the index block so far. +func (w *IndexBlockWriter) Rows() int { + return w.rows +} + // AddBlockHandle adds a new separator and end offset of a data block to the // index block. Add returns the index of the row. // @@ -85,6 +90,11 @@ func (w *IndexBlockWriter) AddBlockHandle( return idx } +// UnsafeSeparator returns the separator of the i'th entry. +func (w *IndexBlockWriter) UnsafeSeparator(i int) []byte { + return w.separators.UnsafeGet(i) +} + // Size returns the size of the pending index block. func (w *IndexBlockWriter) Size() int { off := blockHeaderSize(indexBlockColumnCount, indexBlockCustomHeaderSize) diff --git a/sstable/colblk/index_block_test.go b/sstable/colblk/index_block_test.go index 7b06a1430f..fc47f5eec9 100644 --- a/sstable/colblk/index_block_test.go +++ b/sstable/colblk/index_block_test.go @@ -39,9 +39,11 @@ func TestIndexBlock(t *testing.T) { } w.AddBlockHandle([]byte(fields[0]), h, bp) } + fmt.Fprintf(&buf, "UnsafeSeparator(Rows()-1) = %q\n", w.UnsafeSeparator(w.Rows()-1)) data := w.Finish() r.Init(data) - return r.DebugString() + fmt.Fprint(&buf, r.DebugString()) + return buf.String() case "iter": var it IndexIter it.Init(&r) diff --git a/sstable/colblk/raw_bytes.go b/sstable/colblk/raw_bytes.go index 6d1f350878..75b1c2d2e7 100644 --- a/sstable/colblk/raw_bytes.go +++ b/sstable/colblk/raw_bytes.go @@ -165,13 +165,18 @@ func (b *RawBytesBuilder) PutConcat(s1, s2 []byte) { b.offsets.Set(b.rows, uint64(len(b.data))) } -// LastSlice returns the last slice added to the builder. The returned slice is +// Rows returns the count of slices that have been added to the builder. +func (b *RawBytesBuilder) Rows() int { + return b.rows +} + +// UnsafeGet returns the i'th slice added to the builder. The returned slice is // owned by the builder and must not be mutated. -func (b *RawBytesBuilder) LastSlice() []byte { +func (b *RawBytesBuilder) UnsafeGet(i int) []byte { if b.rows == 0 { return nil } - return b.data[b.offsets.array.elems.At(b.rows-1):b.offsets.array.elems.At(b.rows)] + return b.data[b.offsets.array.elems.At(i):b.offsets.array.elems.At(i+1)] } // Finish writes the serialized byte slices to buf starting at offset. The buf diff --git a/sstable/colblk/testdata/index_block b/sstable/colblk/testdata/index_block index eedd202f2f..0f1a3ed62a 100644 --- a/sstable/colblk/testdata/index_block +++ b/sstable/colblk/testdata/index_block @@ -6,6 +6,7 @@ bacitracin 412 212 banana 632 215 bp5 bonifide 963 326 bp6 ---- +UnsafeSeparator(Rows()-1) = "bonifide" # index block header # columnar block header 000-001: x 01 # version 1 @@ -130,6 +131,7 @@ catatonic 102422 20442 cephalopod 122864 9104 bp4 coat 293128 32104 ---- +UnsafeSeparator(Rows()-1) = "coat" # index block header # columnar block header 000-001: x 01 # version 1 diff --git a/sstable/colblk/unsafe_slice.go b/sstable/colblk/unsafe_slice.go index c8c63e8a69..956020ad93 100644 --- a/sstable/colblk/unsafe_slice.go +++ b/sstable/colblk/unsafe_slice.go @@ -59,7 +59,7 @@ var _ Array[uint64] = UnsafeUints{} func DecodeUnsafeUints(b []byte, off uint32, rows int) (_ UnsafeUints, endOffset uint32) { encoding := UintEncoding(b[off]) if !encoding.IsValid() { - panic(errors.AssertionFailedf("invalid encoding 0x%x", b)) + panic(errors.AssertionFailedf("invalid encoding 0x%x", b[off:off+1])) } off++ var base uint64