diff --git a/internal/keyspan/span.go b/internal/keyspan/span.go index a142248a68..6267728712 100644 --- a/internal/keyspan/span.go +++ b/internal/keyspan/span.go @@ -470,9 +470,14 @@ func SortKeysByTrailer(keys []Key) { }) } -// SortKeysBySuffix sorts a Keys slice by suffix. -func SortKeysBySuffix(suffixCmp base.CompareRangeSuffixes, keys []Key) { +// SortKeysByTrailerAndSuffix sorts a Keys slice by trailer, and among keys with +// equal trailers, by suffix. +func SortKeysByTrailerAndSuffix(suffixCmp base.CompareRangeSuffixes, keys []Key) { slices.SortFunc(keys, func(a, b Key) int { + // Trailer are ordered in decreasing number order. + if v := -cmp.Compare(a.Trailer, b.Trailer); v != 0 { + return v + } return suffixCmp(a.Suffix, b.Suffix) }) } diff --git a/sstable/rowblk/rowblk_fragment_iter.go b/sstable/rowblk/rowblk_fragment_iter.go index 0857f2d3b4..f2b20f2fa7 100644 --- a/sstable/rowblk/rowblk_fragment_iter.go +++ b/sstable/rowblk/rowblk_fragment_iter.go @@ -227,6 +227,10 @@ func (i *fragmentIter) gatherForward(kv *base.InternalKV) (*keyspan.Span, error) if err := i.applySpanTransforms(); err != nil { return nil, err } + + // Apply a consistent ordering. + keyspan.SortKeysByTrailer(i.span.Keys) + // i.blockIter is positioned over the first internal key for the next span. return &i.span, nil } @@ -264,7 +268,7 @@ func (i *fragmentIter) gatherBackward(kv *base.InternalKV) (*keyspan.Span, error // i.blockIter is positioned over the last internal key for the previous // span. - // Backwards iteration encounters internal keys in the wrong order. + // Apply a consistent ordering. keyspan.SortKeysByTrailer(i.span.Keys) i.applySpanTransforms() diff --git a/sstable/writer.go b/sstable/writer.go index 6ff28b8ab0..48ca8cb254 100644 --- a/sstable/writer.go +++ b/sstable/writer.go @@ -69,10 +69,18 @@ func (w *Writer) encodeFragmentedRangeKeySpan(s keyspan.Span) { // NB: The span should only contain range keys and be internally consistent // (eg, no duplicate suffixes, no additional keys after a RANGEKEYDEL). // - // Sort the keys by suffix. Iteration doesn't *currently* depend on it, but - // we may want to in the future. - keyspan.SortKeysBySuffix(w.comparer.CompareRangeSuffixes, s.Keys) - + // Sort the keys by trailer (descending). + // + // Note that sstables written in TableFormatPebblev4 and earlier (rowblk + // encoding) will always re-order the Keys in order to encode RANGEKEYSETs + // first, then RANGEKEYUNSETs and then RANGEKEYDELs. See rangekey.Encoder. + // SSTables written in TableFormatPebblev5 or later (colblk encoding) will + // encode the keys in this order. + // + // Iteration doesn't depend on this ordering, in particular because it's + // inconsistent between the two encodings, but we may want eventually begin + // to depend on this ordering for colblk-encoded sstables. + keyspan.SortKeysByTrailerAndSuffix(w.comparer.CompareRangeSuffixes, s.Keys) if w.Error() == nil { w.rw.EncodeSpan(s) } diff --git a/sstable/writer_rangekey_test.go b/sstable/writer_rangekey_test.go index 8833012221..232eec6121 100644 --- a/sstable/writer_rangekey_test.go +++ b/sstable/writer_rangekey_test.go @@ -13,6 +13,7 @@ import ( "github.com/cockroachdb/errors" "github.com/cockroachdb/pebble/internal/testkeys" "github.com/cockroachdb/pebble/objstorage/objstorageprovider" + "github.com/cockroachdb/pebble/sstable/colblk" "github.com/cockroachdb/pebble/vfs" "github.com/stretchr/testify/require" ) @@ -26,7 +27,9 @@ func TestWriter_RangeKeys(t *testing.T) { } }() - buildFn := func(td *datadriven.TestData) (*Reader, error) { + cmp := testkeys.Comparer + keySchema := colblk.DefaultKeySchema(cmp, 16) + buildFn := func(td *datadriven.TestData, format TableFormat) (*Reader, error) { mem := vfs.NewMem() f, err := mem.Create("test", vfs.WriteCategoryUnspecified) if err != nil { @@ -35,10 +38,10 @@ func TestWriter_RangeKeys(t *testing.T) { // Use a "suffix-aware" Comparer, that will sort suffix-values in // descending order of timestamp, rather than in lexical order. - cmp := testkeys.Comparer w := NewWriter(objstorageprovider.NewFileWritable(f), WriterOptions{ Comparer: cmp, - TableFormat: TableFormatPebblev2, + TableFormat: format, + KeySchema: &keySchema, }) defer func() { if w != nil { @@ -92,7 +95,10 @@ func TestWriter_RangeKeys(t *testing.T) { return nil, err } - r, err = newReader(f, ReaderOptions{Comparer: cmp}) + r, err = newReader(f, ReaderOptions{ + Comparer: cmp, + KeySchemas: MakeKeySchemas(&keySchema), + }) if err != nil { return nil, err } @@ -100,38 +106,41 @@ func TestWriter_RangeKeys(t *testing.T) { return r, nil } - datadriven.RunTest(t, "testdata/writer_range_keys", func(t *testing.T, td *datadriven.TestData) string { - switch td.Cmd { - case "build": - if r != nil { - _ = r.Close() - r = nil - } - - var err error - r, err = buildFn(td) - if err != nil { - return err.Error() - } - - iter, err := r.NewRawRangeKeyIter(context.Background(), NoFragmentTransforms) - if err != nil { - return err.Error() - } - defer iter.Close() - - var buf bytes.Buffer - s, err := iter.First() - for ; s != nil; s, err = iter.Next() { - _, _ = fmt.Fprintf(&buf, "%s\n", s) - } - if err != nil { - return err.Error() - } - return buf.String() - - default: - return fmt.Sprintf("unknown command: %s", td.Cmd) - } - }) + for format := TableFormatPebblev2; format <= TableFormatMax; format++ { + t.Run(format.String(), func(t *testing.T) { + datadriven.RunTest(t, "testdata/writer_range_keys", func(t *testing.T, td *datadriven.TestData) string { + switch td.Cmd { + case "build": + if r != nil { + _ = r.Close() + r = nil + } + var err error + r, err = buildFn(td, format) + if err != nil { + return err.Error() + } + + iter, err := r.NewRawRangeKeyIter(context.Background(), NoFragmentTransforms) + if err != nil { + return err.Error() + } + defer iter.Close() + + var buf bytes.Buffer + s, err := iter.First() + for ; s != nil; s, err = iter.Next() { + _, _ = fmt.Fprintf(&buf, "%s\n", s) + } + if err != nil { + return err.Error() + } + return buf.String() + + default: + return fmt.Sprintf("unknown command: %s", td.Cmd) + } + }) + }) + } }