Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

sstable: order range keys consistently in colblk and rowblk encodings #4160

Merged
merged 1 commit into from
Nov 12, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions internal/keyspan/span.go
Original file line number Diff line number Diff line change
Expand Up @@ -470,9 +470,14 @@ func SortKeysByTrailer(keys []Key) {
})
}

// SortKeysBySuffix sorts a Keys slice by suffix.
func SortKeysBySuffix(suffixCmp base.CompareRangeSuffixes, keys []Key) {
// SortKeysByTrailerAndSuffix sorts a Keys slice by trailer, and among keys with
// equal trailers, by suffix.
func SortKeysByTrailerAndSuffix(suffixCmp base.CompareRangeSuffixes, keys []Key) {
slices.SortFunc(keys, func(a, b Key) int {
// Trailer are ordered in decreasing number order.
if v := cmp.Compare(b.Trailer, a.Trailer); v != 0 {
return v
}
return suffixCmp(a.Suffix, b.Suffix)
})
}
Expand Down
6 changes: 5 additions & 1 deletion sstable/rowblk/rowblk_fragment_iter.go
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,10 @@ func (i *fragmentIter) gatherForward(kv *base.InternalKV) (*keyspan.Span, error)
if err := i.applySpanTransforms(); err != nil {
return nil, err
}

// Apply a consistent ordering.
keyspan.SortKeysByTrailer(i.span.Keys)

// i.blockIter is positioned over the first internal key for the next span.
return &i.span, nil
}
Expand Down Expand Up @@ -264,7 +268,7 @@ func (i *fragmentIter) gatherBackward(kv *base.InternalKV) (*keyspan.Span, error
// i.blockIter is positioned over the last internal key for the previous
// span.

// Backwards iteration encounters internal keys in the wrong order.
// Apply a consistent ordering.
keyspan.SortKeysByTrailer(i.span.Keys)

i.applySpanTransforms()
Expand Down
16 changes: 12 additions & 4 deletions sstable/writer.go
Original file line number Diff line number Diff line change
Expand Up @@ -69,10 +69,18 @@ func (w *Writer) encodeFragmentedRangeKeySpan(s keyspan.Span) {
// NB: The span should only contain range keys and be internally consistent
// (eg, no duplicate suffixes, no additional keys after a RANGEKEYDEL).
//
// Sort the keys by suffix. Iteration doesn't *currently* depend on it, but
// we may want to in the future.
keyspan.SortKeysBySuffix(w.comparer.CompareRangeSuffixes, s.Keys)

// Sort the keys by trailer (descending).
//
// Note that sstables written in TableFormatPebblev4 and earlier (rowblk
// encoding) will always re-order the Keys in order to encode RANGEKEYSETs
// first, then RANGEKEYUNSETs and then RANGEKEYDELs. See rangekey.Encoder.
// SSTables written in TableFormatPebblev5 or later (colblk encoding) will
// encode the keys in this order.
//
// Iteration doesn't depend on this ordering, in particular because it's
// inconsistent between the two encodings, but we may want eventually begin
// to depend on this ordering for colblk-encoded sstables.
keyspan.SortKeysByTrailerAndSuffix(w.comparer.CompareRangeSuffixes, s.Keys)
if w.Error() == nil {
w.rw.EncodeSpan(s)
}
Expand Down
85 changes: 47 additions & 38 deletions sstable/writer_rangekey_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import (
"github.com/cockroachdb/errors"
"github.com/cockroachdb/pebble/internal/testkeys"
"github.com/cockroachdb/pebble/objstorage/objstorageprovider"
"github.com/cockroachdb/pebble/sstable/colblk"
"github.com/cockroachdb/pebble/vfs"
"github.com/stretchr/testify/require"
)
Expand All @@ -26,7 +27,9 @@ func TestWriter_RangeKeys(t *testing.T) {
}
}()

buildFn := func(td *datadriven.TestData) (*Reader, error) {
cmp := testkeys.Comparer
keySchema := colblk.DefaultKeySchema(cmp, 16)
buildFn := func(td *datadriven.TestData, format TableFormat) (*Reader, error) {
mem := vfs.NewMem()
f, err := mem.Create("test", vfs.WriteCategoryUnspecified)
if err != nil {
Expand All @@ -35,10 +38,10 @@ func TestWriter_RangeKeys(t *testing.T) {

// Use a "suffix-aware" Comparer, that will sort suffix-values in
// descending order of timestamp, rather than in lexical order.
cmp := testkeys.Comparer
w := NewWriter(objstorageprovider.NewFileWritable(f), WriterOptions{
Comparer: cmp,
TableFormat: TableFormatPebblev2,
TableFormat: format,
KeySchema: &keySchema,
})
defer func() {
if w != nil {
Expand Down Expand Up @@ -92,46 +95,52 @@ func TestWriter_RangeKeys(t *testing.T) {
return nil, err
}

r, err = newReader(f, ReaderOptions{Comparer: cmp})
r, err = newReader(f, ReaderOptions{
Comparer: cmp,
KeySchemas: MakeKeySchemas(&keySchema),
})
if err != nil {
return nil, err
}

return r, nil
}

datadriven.RunTest(t, "testdata/writer_range_keys", func(t *testing.T, td *datadriven.TestData) string {
switch td.Cmd {
case "build":
if r != nil {
_ = r.Close()
r = nil
}

var err error
r, err = buildFn(td)
if err != nil {
return err.Error()
}

iter, err := r.NewRawRangeKeyIter(context.Background(), NoFragmentTransforms)
if err != nil {
return err.Error()
}
defer iter.Close()

var buf bytes.Buffer
s, err := iter.First()
for ; s != nil; s, err = iter.Next() {
_, _ = fmt.Fprintf(&buf, "%s\n", s)
}
if err != nil {
return err.Error()
}
return buf.String()

default:
return fmt.Sprintf("unknown command: %s", td.Cmd)
}
})
for format := TableFormatPebblev2; format <= TableFormatMax; format++ {
t.Run(format.String(), func(t *testing.T) {
datadriven.RunTest(t, "testdata/writer_range_keys", func(t *testing.T, td *datadriven.TestData) string {
switch td.Cmd {
case "build":
if r != nil {
_ = r.Close()
r = nil
}
var err error
r, err = buildFn(td, format)
if err != nil {
return err.Error()
}

iter, err := r.NewRawRangeKeyIter(context.Background(), NoFragmentTransforms)
if err != nil {
return err.Error()
}
defer iter.Close()

var buf bytes.Buffer
s, err := iter.First()
for ; s != nil; s, err = iter.Next() {
_, _ = fmt.Fprintf(&buf, "%s\n", s)
}
if err != nil {
return err.Error()
}
return buf.String()

default:
return fmt.Sprintf("unknown command: %s", td.Cmd)
}
})
})
}
}
Loading