Skip to content

Commit

Permalink
keyspan: use slices.Sort functions
Browse files Browse the repository at this point in the history
This commit replaces `sort.Interface` implementations with calls to
the newer slices.Sort functions. This is much cleaner and should be a
bit more efficient.
  • Loading branch information
RaduBerinde committed Jul 31, 2024
1 parent 8af904f commit 3bf84fc
Show file tree
Hide file tree
Showing 14 changed files with 86 additions and 143 deletions.
7 changes: 3 additions & 4 deletions internal/keyspan/defragment_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@ import (
"bytes"
"fmt"
"math/rand"
"sort"
"strings"
"testing"
"time"
Expand All @@ -26,8 +25,8 @@ func TestDefragmentingIter(t *testing.T) {
alwaysEqual := DefragmentMethodFunc(func(_ base.Equal, _, _ *Span) bool { return true })
staticReducer := StaticDefragmentReducer
collectReducer := func(cur, next []Key) []Key {
c := keysBySeqNumKind(append(cur, next...))
sort.Sort(&c)
c := append(cur, next...)
SortKeysByTrailer(c)
return c
}

Expand Down Expand Up @@ -217,7 +216,7 @@ func testDefragmentingIteRandomizedOnce(t *testing.T, seed int64) {
}

func fragment(cmp base.Compare, formatKey base.FormatKey, spans []Span) []Span {
Sort(cmp, spans)
SortSpansByStartKey(cmp, spans)
var fragments []Span
f := Fragmenter{
Cmp: cmp,
Expand Down
59 changes: 3 additions & 56 deletions internal/keyspan/fragmenter.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,60 +6,11 @@ package keyspan

import (
"fmt"
"sort"

"github.com/cockroachdb/pebble/internal/base"
"github.com/cockroachdb/pebble/internal/invariants"
)

type spansByStartKey struct {
cmp base.Compare
buf []Span
}

func (v *spansByStartKey) Len() int { return len(v.buf) }
func (v *spansByStartKey) Less(i, j int) bool {
return v.cmp(v.buf[i].Start, v.buf[j].Start) < 0
}
func (v *spansByStartKey) Swap(i, j int) {
v.buf[i], v.buf[j] = v.buf[j], v.buf[i]
}

type spansByEndKey struct {
cmp base.Compare
buf []Span
}

func (v *spansByEndKey) Len() int { return len(v.buf) }
func (v *spansByEndKey) Less(i, j int) bool {
return v.cmp(v.buf[i].End, v.buf[j].End) < 0
}
func (v *spansByEndKey) Swap(i, j int) {
v.buf[i], v.buf[j] = v.buf[j], v.buf[i]
}

// keysBySeqNumKind sorts spans by the start key's sequence number in
// descending order. If two spans have equal sequence number, they're compared
// by key kind in descending order. This ordering matches the ordering of
// base.InternalCompare among keys with matching user keys.
type keysBySeqNumKind []Key

func (v *keysBySeqNumKind) Len() int { return len(*v) }
func (v *keysBySeqNumKind) Less(i, j int) bool { return (*v)[i].Trailer > (*v)[j].Trailer }
func (v *keysBySeqNumKind) Swap(i, j int) { (*v)[i], (*v)[j] = (*v)[j], (*v)[i] }

// Sort the spans by start key. This is the ordering required by the
// Fragmenter. Usually spans are naturally sorted by their start key,
// but that isn't true for range deletion tombstones in the legacy
// range-del-v1 block format.
func Sort(cmp base.Compare, spans []Span) {
sorter := spansByStartKey{
cmp: cmp,
buf: spans,
}
sort.Sort(&sorter)
}

// Fragmenter fragments a set of spans such that overlapping spans are
// split at their overlap points. The fragmented spans are output to the
// supplied Output function.
Expand All @@ -80,10 +31,8 @@ type Fragmenter struct {
// specific key (e.g. TruncateAndFlushTo). It is cached in the Fragmenter to
// allow reuse.
doneBuf []Span
// sortBuf is used to sort fragments by end key when flushing.
sortBuf spansByEndKey
// flushBuf is used to sort keys by (seqnum,kind) before emitting.
flushBuf keysBySeqNumKind
flushBuf []Key
// flushedKey is the key that fragments have been flushed up to. Any
// additional spans added to the fragmenter must have a start key >=
// flushedKey. A nil value indicates flushedKey has not been set.
Expand Down Expand Up @@ -305,9 +254,7 @@ func (f *Fragmenter) flush(buf []Span, lastKey []byte) {

// Sort the spans by end key. This will allow us to walk over the spans and
// easily determine the next split point (the smallest end-key).
f.sortBuf.cmp = f.Cmp
f.sortBuf.buf = buf
sort.Sort(&f.sortBuf)
SortSpansByEndKey(f.Cmp, buf)

// Loop over the spans, splitting by end key.
for len(buf) > 0 {
Expand All @@ -324,7 +271,7 @@ func (f *Fragmenter) flush(buf []Span, lastKey []byte) {
f.flushBuf = append(f.flushBuf, buf[i].Keys...)
}

sort.Sort(&f.flushBuf)
SortKeysByTrailer(f.flushBuf)

f.Emit(Span{
Start: buf[0].Start,
Expand Down
2 changes: 1 addition & 1 deletion internal/keyspan/keyspanimpl/merging_iter_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ func testFragmenterEquivalenceOnce(t *testing.T, seed int64) {
allFragmented = append(allFragmented, span)
},
}
keyspan.Sort(f.Cmp, allSpans)
keyspan.SortSpansByStartKey(f.Cmp, allSpans)
for _, s := range allSpans {
f.Add(s)
}
Expand Down
44 changes: 29 additions & 15 deletions internal/keyspan/span.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@ package keyspan // import "github.com/cockroachdb/pebble/internal/keyspan"

import (
"bytes"
"cmp"
"fmt"
"slices"
"sort"
"strings"
"unicode"

Expand Down Expand Up @@ -455,24 +455,38 @@ func (s prettySpan) Format(fs fmt.State, c rune) {
fmt.Fprintf(fs, "}")
}

// SortKeysByTrailer sorts a keys slice by trailer.
func SortKeysByTrailer(keys *[]Key) {
// NB: keys is a pointer to a slice instead of a slice to avoid `sorted`
// escaping to the heap.
sorted := (*keysBySeqNumKind)(keys)
sort.Sort(sorted)
// SortKeysByTrailer sorts a Keys slice by trailer.
func SortKeysByTrailer(keys []Key) {
slices.SortFunc(keys, func(a, b Key) int {
// Trailer are ordered in decreasing number order.
return -cmp.Compare(a.Trailer, b.Trailer)
})
}

// SortKeysBySuffix sorts a Keys slice by suffix.
func SortKeysBySuffix(cmp base.Compare, keys []Key) {
slices.SortFunc(keys, func(a, b Key) int {
return cmp(a.Suffix, b.Suffix)
})
}

// KeysBySuffix implements sort.Interface, sorting its member Keys slice to by
// Suffix in the order dictated by Cmp.
type KeysBySuffix struct {
Cmp base.Compare
Keys []Key
// SortSpansByStartKey sorts the spans by start key.
//
// This is the ordering required by the Fragmenter. Usually spans are naturally
// sorted by their start key, but that isn't true for range deletion tombstones
// in the legacy range-del-v1 block format.
func SortSpansByStartKey(cmp base.Compare, spans []Span) {
slices.SortFunc(spans, func(a, b Span) int {
return cmp(a.Start, b.Start)
})
}

func (s *KeysBySuffix) Len() int { return len(s.Keys) }
func (s *KeysBySuffix) Less(i, j int) bool { return s.Cmp(s.Keys[i].Suffix, s.Keys[j].Suffix) < 0 }
func (s *KeysBySuffix) Swap(i, j int) { s.Keys[i], s.Keys[j] = s.Keys[j], s.Keys[i] }
// SortSpansByEndKey sorts the spans by the end key.
func SortSpansByEndKey(cmp base.Compare, spans []Span) {
slices.SortFunc(spans, func(a, b Span) int {
return cmp(a.End, b.End)
})
}

// ParseSpan parses the string representation of a Span. It's intended for
// tests. ParseSpan panics if passed a malformed span representation.
Expand Down
60 changes: 28 additions & 32 deletions internal/rangekey/coalesce.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ package rangekey

import (
"math"
"sort"
"slices"

"github.com/cockroachdb/pebble/internal/base"
"github.com/cockroachdb/pebble/internal/invariants"
Expand Down Expand Up @@ -52,22 +52,18 @@ import (
func Coalesce(cmp base.Compare, eq base.Equal, keys []keyspan.Key, dst *[]keyspan.Key) {
// TODO(jackson): Currently, Coalesce doesn't actually perform the sequence
// number promotion described in the comment above.
keysBySuffix := keyspan.KeysBySuffix{
Cmp: cmp,
Keys: (*dst)[:0],
}
CoalesceIntoKeysBySuffix(eq, &keysBySuffix, math.MaxUint64, keys)
*dst = CoalesceInto(cmp, eq, (*dst)[:0], math.MaxUint64, keys)
// Update the span with the (potentially reduced) keys slice. coalesce left
// the keys in *dst sorted by suffix. Re-sort them by trailer.
*dst = keysBySuffix.Keys
keyspan.SortKeysByTrailer(dst)
keyspan.SortKeysByTrailer(*dst)
}

// CoalesceIntoKeysBySuffix is a variant of Coalesce which outputs the results into
// keyspan.KeysBySuffix without sorting them.
func CoalesceIntoKeysBySuffix(
equal base.Equal, keysBySuffix *keyspan.KeysBySuffix, snapshot base.SeqNum, keys []keyspan.Key,
) {
// CoalesceInto is a variant of Coalesce which outputs the results into dst
// without sorting them.
func CoalesceInto(
cmp base.Compare, equal base.Equal, dst []keyspan.Key, snapshot base.SeqNum, keys []keyspan.Key,
) []keyspan.Key {
dst = dst[:0]
// First, enforce visibility and RangeKeyDelete mechanics. We only need to
// consider the prefix of keys before and including the first
// RangeKeyDelete. We also must skip any keys that aren't visible at the
Expand All @@ -94,7 +90,7 @@ func CoalesceIntoKeysBySuffix(
deleteIdx = i
break
}
keysBySuffix.Keys = append(keysBySuffix.Keys, keys[i])
dst = append(dst, keys[i])
}

// Sort the accumulated keys by suffix. There may be duplicates within a
Expand All @@ -103,12 +99,14 @@ func CoalesceIntoKeysBySuffix(
// We use a stable sort so that the first key with a given suffix is the one
// that with the highest InternalKeyTrailer (because the input `keys` was sorted by
// trailer descending).
sort.Stable(keysBySuffix)
slices.SortStableFunc(dst, func(a, b keyspan.Key) int {
return cmp(a.Suffix, b.Suffix)
})

// Grab a handle of the full sorted slice, before reslicing
// keysBySuffix.keys to accumulate the final coalesced keys.
sorted := keysBySuffix.Keys
keysBySuffix.Keys = keysBySuffix.Keys[:0]
// dst to accumulate the final coalesced keys.
sorted := dst
dst = dst[:0]

var (
// prevSuffix is updated on each iteration of the below loop, and
Expand All @@ -129,25 +127,26 @@ func CoalesceIntoKeysBySuffix(
// and reslice keysBySuffix.keys to hold the entire unshadowed
// prefix.
if !shadowing {
keysBySuffix.Keys = keysBySuffix.Keys[:i]
dst = dst[:i]
shadowing = true
}
continue
}
prevSuffix = sorted[i].Suffix
if shadowing {
keysBySuffix.Keys = append(keysBySuffix.Keys, sorted[i])
dst = append(dst, sorted[i])
}
}
// If there was no shadowing, keysBySuffix.keys is untouched. We can simply
// set it to the existing `sorted` slice (also backed by keysBySuffix.keys).
// If there was no shadowing, dst.keys is untouched. We can simply set it to
// the existing `sorted` slice (also backed by dst).
if !shadowing {
keysBySuffix.Keys = sorted
dst = sorted
}
// If the original input `keys` slice contained a RangeKeyDelete, add it.
if deleteIdx >= 0 {
keysBySuffix.Keys = append(keysBySuffix.Keys, keys[deleteIdx])
dst = append(dst, keys[deleteIdx])
}
return dst
}

// ForeignSSTTransformer implements a keyspan.Transformer for range keys in
Expand All @@ -161,7 +160,7 @@ func CoalesceIntoKeysBySuffix(
type ForeignSSTTransformer struct {
Equal base.Equal
SeqNum base.SeqNum
sortBuf keyspan.KeysBySuffix
sortBuf []keyspan.Key
}

// Transform implements the Transformer interface.
Expand All @@ -171,12 +170,9 @@ func (f *ForeignSSTTransformer) Transform(
// Apply shadowing of keys.
dst.Start = s.Start
dst.End = s.End
f.sortBuf = keyspan.KeysBySuffix{
Cmp: cmp,
Keys: f.sortBuf.Keys[:0],
}
CoalesceIntoKeysBySuffix(f.Equal, &f.sortBuf, math.MaxUint64, s.Keys)
keys := f.sortBuf.Keys
f.sortBuf = f.sortBuf[:0]
f.sortBuf = CoalesceInto(cmp, f.Equal, f.sortBuf, math.MaxUint64, s.Keys)
keys := f.sortBuf
dst.Keys = dst.Keys[:0]
for i := range keys {
switch keys[i].Kind() {
Expand All @@ -201,7 +197,7 @@ func (f *ForeignSSTTransformer) Transform(
}
// coalesce results in dst.Keys being sorted by Suffix. Change it back to
// ByTrailerDesc, as that's what the iterator stack will expect.
keyspan.SortKeysByTrailer(&dst.Keys)
keyspan.SortKeysByTrailer(dst.Keys)
dst.KeysOrder = keyspan.ByTrailerDesc
return nil
}
14 changes: 5 additions & 9 deletions internal/rangekeystack/user_iterator.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ type UserIteratorConfig struct {
type Buffers struct {
merging keyspanimpl.MergingBuffers
defragmenting keyspan.DefragmentingBuffers
sortBuf keyspan.KeysBySuffix
sortBuf []keyspan.Key
}

// PrepareForReuse discards any excessively large buffers.
Expand Down Expand Up @@ -147,22 +147,18 @@ func (ui *UserIteratorConfig) Transform(cmp base.Compare, s keyspan.Span, dst *k
// Apply shadowing of keys.
dst.Start = s.Start
dst.End = s.End
ui.bufs.sortBuf = keyspan.KeysBySuffix{
Cmp: cmp,
Keys: ui.bufs.sortBuf.Keys[:0],
}
rangekey.CoalesceIntoKeysBySuffix(ui.comparer.Equal, &ui.bufs.sortBuf, ui.snapshot, s.Keys)
ui.bufs.sortBuf = rangekey.CoalesceInto(cmp, ui.comparer.Equal, ui.bufs.sortBuf[:0], ui.snapshot, s.Keys)
if ui.internalKeys {
if s.KeysOrder != keyspan.ByTrailerDesc {
panic("unexpected key ordering in UserIteratorTransform with internalKeys = true")
}
dst.Keys = ui.bufs.sortBuf.Keys
keyspan.SortKeysByTrailer(&dst.Keys)
dst.Keys = ui.bufs.sortBuf
keyspan.SortKeysByTrailer(dst.Keys)
return nil
}
// During user iteration over range keys, unsets and deletes don't matter. This
// step helps logical defragmentation during iteration.
keys := ui.bufs.sortBuf.Keys
keys := ui.bufs.sortBuf
dst.Keys = dst.Keys[:0]
for i := range keys {
switch keys[i].Kind() {
Expand Down
16 changes: 6 additions & 10 deletions internal/rangekeystack/user_iterator_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,15 +47,11 @@ func TestIter(t *testing.T) {
spans = append(spans, keyspan.ParseSpan(line))
}
transform := keyspan.TransformerFunc(func(cmp base.Compare, s keyspan.Span, dst *keyspan.Span) error {
keysBySuffix := keyspan.KeysBySuffix{
Cmp: cmp,
Keys: dst.Keys[:0],
}
rangekey.CoalesceIntoKeysBySuffix(eq, &keysBySuffix, visibleSeqNum, s.Keys)
// Update the span with the (potentially reduced) keys slice. coalesce left
// the keys in *dst sorted by suffix. Re-sort them by trailer.
dst.Keys = keysBySuffix.Keys
keyspan.SortKeysByTrailer(&dst.Keys)
dst.Keys = rangekey.CoalesceInto(cmp, eq, dst.Keys[:0], visibleSeqNum, s.Keys)
// Update the span with the (potentially reduced) keys slice.
// CoalesceInto() left the keys sorted by suffix. Re-sort them by
// trailer.
keyspan.SortKeysByTrailer(dst.Keys)
dst.Start = s.Start
dst.End = s.End
return nil
Expand Down Expand Up @@ -263,7 +259,7 @@ func testDefragmentingIteRandomizedOnce(t *testing.T, seed int64) {
}

func fragment(cmp base.Compare, formatKey base.FormatKey, spans []keyspan.Span) []keyspan.Span {
keyspan.Sort(cmp, spans)
keyspan.SortSpansByStartKey(cmp, spans)
var fragments []keyspan.Span
f := keyspan.Fragmenter{
Cmp: cmp,
Expand Down
Loading

0 comments on commit 3bf84fc

Please sign in to comment.