Skip to content

Commit

Permalink
base: add CompareSuffixes to Comparer
Browse files Browse the repository at this point in the history
We currently use `Compare` to compare both keys and bare suffixes.
This makes the implementation more complicated as it has to handle
both cases. It also prevents adding many useful assertions - for
example, we can't verify that `Compare` does byte-wise comparison of
prefixes because we can't unconditionally call `Split`.

We add a mandatory `CompareSuffixes` method that is used when
comparing bare suffixes. The result of `Compare` can be defined
completely in terms of `Split` and `CompareSuffixes`; `Compare` now
has a default implementation.

We also add a `CheckComparer` testing facility that can be used to
verify `Split` and `Compare` on combinations of prefixes and suffixes.
We use it to add a test to all `Comparer`s and it will be used from
CRDB code as well.

Informs cockroachdb/cockroach#127914
  • Loading branch information
RaduBerinde committed Jul 31, 2024
1 parent 4cc91d2 commit 4f6c198
Show file tree
Hide file tree
Showing 33 changed files with 386 additions and 209 deletions.
7 changes: 7 additions & 0 deletions comparer.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,3 +29,10 @@ type Comparer = base.Comparer

// DefaultComparer exports the base.DefaultComparer variable.
var DefaultComparer = base.DefaultComparer

// CheckComparer is a mini test suite that verifies a comparer implementation.
//
// It takes strictly ordered (according to the comparator) lists of prefixes and
// suffixes. Both lists must contain the empty slice. It is recommended that
// both lists have at least three elements.
var CheckComparer = base.CheckComparer
2 changes: 1 addition & 1 deletion db.go
Original file line number Diff line number Diff line change
Expand Up @@ -1152,7 +1152,7 @@ func finishInitializingIter(ctx context.Context, buf *iterAlloc) *Iterator {
}

if dbi.opts.rangeKeys() {
dbi.rangeKeyMasking.init(dbi, dbi.comparer.Compare, dbi.comparer.Split)
dbi.rangeKeyMasking.init(dbi, &dbi.comparer)

// When iterating over both point and range keys, don't create the
// range-key iterator stack immediately if we can avoid it. This
Expand Down
2 changes: 1 addition & 1 deletion external_iterator.go
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,7 @@ func finishInitializingExternal(ctx context.Context, it *Iterator) error {
it.iter = it.pointIter

if it.opts.rangeKeys() {
it.rangeKeyMasking.init(it, it.comparer.Compare, it.comparer.Split)
it.rangeKeyMasking.init(it, &it.comparer)
var rangeKeyIters []keyspan.FragmentIterator
if it.rangeKey == nil {
// We could take advantage of the lack of overlaps in range keys within
Expand Down
168 changes: 129 additions & 39 deletions internal/base/comparer.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,29 +8,50 @@ import (
"bytes"
"encoding/binary"
"fmt"
"slices"
"strconv"
"unicode/utf8"

"github.com/cockroachdb/errors"
)

// CompareSuffixes compares two key suffixes and returns -1, 0, or +1.
//
// The empty slice suffix must be 'less than' any non-empty suffix.
//
// A full key k is composed of a prefix k[:Split(k)] and suffix k[Split(k):].
// Suffixes are compared to break ties between equal prefixes.
type CompareSuffixes func(a, b []byte) int

// Compare returns -1, 0, or +1 depending on whether a is 'less than', 'equal
// to' or 'greater than' b. The empty slice must be 'less than' any non-empty
// slice.
// to' or 'greater than' b.
//
// Compare is used to compare user keys, such as those passed as arguments to
// the various DB methods, as well as those returned from Separator, Successor,
// and Split. It is also used to compare key suffixes, i.e. the remainder of the
// key after Split.
// Both a and b must be valid keys.
//
// The comparison of the prefix parts must be a simple byte-wise compare. In
// other words, if a and be don't have suffixes, then
// Compare(a, b) = bytes.Compare(a, b).
// A key a is less than b if a's prefix is byte-wise less than b's prefix, or if
// the prefixes are equal and a's suffix is less than b's suffix (according to
// CompareSuffixes).
//
// In general, if prefix(a) = a[:Split(a)] and suffix(a) = a[Split(a):], then
// In other words, if prefix(a) = a[:Split(a)] and suffix(a) = a[Split(a):]:
//
// Compare(a, b) = bytes.Compare(prefix(a), prefix(b)) if not 0, or
// bytes.Compare(suffix(a), suffix(b)) otherwise.
// CompareSuffixes(suffix(a), suffix(b)) otherwise.
//
// Compare defaults to using the formula above but it can be customized if there
// is a (potentially faster) specialization.
type Compare func(a, b []byte) int

// defaultCompare implements Compare in terms of Split and CompareSuffixes, as
// mentioned above.
func defaultCompare(split Split, compareSuffixes CompareSuffixes, a, b []byte) int {
an := split(a)
bn := split(b)
if prefixCmp := bytes.Compare(a[:an], b[:bn]); prefixCmp != 0 {
return prefixCmp
}
return compareSuffixes(a[an:], b[bn:])
}

// Equal returns true if a and b are equivalent.
//
// For a given Compare, Equal(a,b)=true iff Compare(a,b)=0; that is, Equal is a
Expand Down Expand Up @@ -145,22 +166,28 @@ var DefaultSplit Split = func(key []byte) int { return len(key) }
// Comparer defines a total ordering over the space of []byte keys: a 'less
// than' relationship.
type Comparer struct {
// These fields must always be specified.
Compare Compare
// The following must always be specified.
AbbreviatedKey AbbreviatedKey
Separator Separator
Successor Successor

// ImmediateSuccessor must be specified if range keys are used.
ImmediateSuccessor ImmediateSuccessor

// Split defaults to a trivial implementation that returns the full key length
// if it is not specified.
Split Split

// CompareSuffixes defaults to bytes.Compare if it is not specified.
CompareSuffixes CompareSuffixes

// Compare defaults to a generic implementation that uses Split,
// bytes.Compare, and CompareSuffixes if it is not specified.
Compare Compare
// Equal defaults to using Compare() == 0 if it is not specified.
Equal Equal
// FormatKey defaults to the DefaultFormatter if it is not specified.
FormatKey FormatKey
// Split defaults to a trivial implementation that returns the full key length
// if it is not specified.
Split Split

// FormatValue is optional.
FormatValue FormatValue
Expand All @@ -182,23 +209,37 @@ func (c *Comparer) EnsureDefaults() *Comparer {
if c == nil {
return DefaultComparer
}
if c.Compare == nil || c.AbbreviatedKey == nil || c.Separator == nil || c.Successor == nil || c.Name == "" {
if c.AbbreviatedKey == nil || c.Separator == nil || c.Successor == nil || c.Name == "" {
panic("invalid Comparer: mandatory field not set")
}
if c.Equal != nil && c.Split != nil && c.FormatKey != nil {
if c.CompareSuffixes != nil && c.Compare != nil && c.Equal != nil && c.Split != nil && c.FormatKey != nil {
return c
}
n := &Comparer{}
*n = *c
if n.Equal == nil {
cmp := n.Compare
n.Equal = func(a, b []byte) bool {
return cmp(a, b) == 0
}
}

if n.Split == nil {
n.Split = DefaultSplit
}
if n.CompareSuffixes == nil && n.Compare == nil && n.Equal == nil {
n.CompareSuffixes = bytes.Compare
n.Compare = bytes.Compare
n.Equal = bytes.Equal
} else {
if n.CompareSuffixes == nil {
n.CompareSuffixes = bytes.Compare
}
if n.Compare == nil {
n.Compare = func(a, b []byte) int {
return defaultCompare(n.Split, n.CompareSuffixes, a, b)
}
}
if n.Equal == nil {
n.Equal = func(a, b []byte) bool {
return n.Compare(a, b) == 0
}
}
}
if n.FormatKey == nil {
n.FormatKey = DefaultFormatter
}
Expand All @@ -208,8 +249,9 @@ func (c *Comparer) EnsureDefaults() *Comparer {
// DefaultComparer is the default implementation of the Comparer interface.
// It uses the natural ordering, consistent with bytes.Compare.
var DefaultComparer = &Comparer{
Compare: bytes.Compare,
Equal: bytes.Equal,
CompareSuffixes: bytes.Compare,
Compare: bytes.Compare,
Equal: bytes.Equal,

AbbreviatedKey: func(key []byte) uint64 {
if len(key) >= 8 {
Expand Down Expand Up @@ -338,31 +380,25 @@ func MakeAssertComparer(c Comparer) Comparer {
return Comparer{
Compare: func(a []byte, b []byte) int {
res := c.Compare(a, b)
an := c.Split(a)
aPrefix, aSuffix := a[:an], a[an:]
bn := c.Split(b)
bPrefix, bSuffix := b[:bn], b[bn:]
if prefixCmp := bytes.Compare(aPrefix, bPrefix); prefixCmp == 0 {
if suffixCmp := c.Compare(aSuffix, bSuffix); suffixCmp != res {
panic(AssertionFailedf("%s: Compare with equal prefixes not consistent with Compare of suffixes: Compare(%q, %q)=%d, Compare(%q, %q)=%d",
c.Name, a, b, res, aSuffix, bSuffix, suffixCmp,
))
}
} else if prefixCmp != res {
panic(AssertionFailedf("%s: Compare did not perform byte-wise comparison of prefixes", c.Name))
// Verify that Compare is consistent with the default implementation.
if expected := defaultCompare(c.Split, c.CompareSuffixes, a, b); res != expected {
panic(AssertionFailedf("%s: Compare(%s, %s)=%d, expected %d",
c.Name, c.FormatKey(a), c.FormatKey(b), res, expected))
}
return res
},

Equal: func(a []byte, b []byte) bool {
eq := c.Equal(a, b)
if cmp := c.Compare(a, b); eq != (cmp == 0) {
// Verify that Equal is consistent with Compare.
if expected := c.Compare(a, b); eq != (expected == 0) {
panic("Compare and Equal are not consistent")
}
return eq
},

// TODO(radu): add more checks.
CompareSuffixes: c.CompareSuffixes,
AbbreviatedKey: c.AbbreviatedKey,
Separator: c.Separator,
Successor: c.Successor,
Expand All @@ -373,3 +409,57 @@ func MakeAssertComparer(c Comparer) Comparer {
Name: c.Name,
}
}

// CheckComparer is a mini test suite that verifies a comparer implementation.
//
// It takes lists of valid prefixes and suffixes. It is recommended that both
// lists have at least three elements.
func CheckComparer(c *Comparer, prefixes [][]byte, suffixes [][]byte) error {
// Empty slice is always a valid suffix.
suffixes = append(suffixes, nil)

// Verify the suffixes have a consistent ordering.
slices.SortFunc(suffixes, c.CompareSuffixes)
if !slices.IsSortedFunc(suffixes, c.CompareSuffixes) {
return errors.Errorf("CompareSuffixes is inconsistent")
}

// Check the split function.
for _, p := range prefixes {
for _, s := range suffixes {
key := slices.Concat(p, s)
if n := c.Split(key); n != len(p) {
return errors.Errorf("incorrect Split result %d on '%x' (prefix '%x' suffix '%x')", n, key, p, s)
}
}
}

// Check the Compare/Equals functions on all possible combinations.
for _, ap := range prefixes {
for _, as := range suffixes {
a := slices.Concat(ap, as)
for _, bp := range prefixes {
for _, bs := range suffixes {
b := slices.Concat(bp, bs)
result := c.Compare(a, b)

expected := bytes.Compare(ap, bp)
if expected == 0 {
expected = c.CompareSuffixes(as, bs)
}

if (result == 0) != c.Equal(a, b) {
return errors.Errorf("Equal(%s, %s) doesn't agree with Compare", c.FormatKey(a), c.FormatKey(b))
}

if result != expected {
return errors.Errorf("Compare(%s, %s)=%d, expected %d", c.FormatKey(a), c.FormatKey(b), result, expected)
}
}
}
}
}

// TODO(radu): check more methods.
return nil
}
6 changes: 6 additions & 0 deletions internal/base/comparer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,12 @@ func TestDefAppendSeparator(t *testing.T) {
}
}

func TestDefaultComparer(t *testing.T) {
if err := CheckComparer(DefaultComparer, [][]byte{{}, []byte("abc"), []byte("d"), []byte("ef")}, [][]byte{{}}); err != nil {
t.Error(err)
}
}

func TestAbbreviatedKey(t *testing.T) {
rng := rand.New(rand.NewSource(uint64(time.Now().UnixNano())))
randBytes := func(size int) []byte {
Expand Down
10 changes: 6 additions & 4 deletions internal/compact/iterator.go
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,8 @@ import (
// exported function, and before a subsequent call to Next advances the iterator
// and mutates the contents of the returned key and value.
type Iter struct {
cmp base.Compare
cmp base.Compare
suffixCmp base.CompareSuffixes

cfg IterConfig

Expand Down Expand Up @@ -308,8 +309,9 @@ func NewIter(
) *Iter {
cfg.ensureDefaults()
i := &Iter{
cmp: cfg.Comparer.Compare,
cfg: cfg,
cmp: cfg.Comparer.Compare,
suffixCmp: cfg.Comparer.CompareSuffixes,
cfg: cfg,
// We don't want a nil keyBuf because if the first key we encounter is
// empty, it would become nil.
keyBuf: make([]byte, 8),
Expand All @@ -329,7 +331,7 @@ func NewIter(
i.frontiers.Init(i.cmp)
i.delElider.Init(i.cmp, cfg.TombstoneElision)
i.rangeDelCompactor = MakeRangeDelSpanCompactor(i.cmp, i.cfg.Comparer.Equal, cfg.Snapshots, cfg.TombstoneElision)
i.rangeKeyCompactor = MakeRangeKeySpanCompactor(i.cmp, i.cfg.Comparer.Equal, cfg.Snapshots, cfg.RangeKeyElision)
i.rangeKeyCompactor = MakeRangeKeySpanCompactor(i.cmp, i.suffixCmp, cfg.Snapshots, cfg.RangeKeyElision)
i.lastRangeDelSpanFrontier.Init(&i.frontiers, nil, i.lastRangeDelSpanFrontierReached)
return i
}
Expand Down
10 changes: 5 additions & 5 deletions internal/compact/spans.go
Original file line number Diff line number Diff line change
Expand Up @@ -83,18 +83,18 @@ func (c *RangeDelSpanCompactor) Compact(span, output *keyspan.Span) {
// for at most one "compacted" span.
type RangeKeySpanCompactor struct {
cmp base.Compare
equal base.Equal
suffixCmp base.CompareSuffixes
snapshots Snapshots
elider rangeTombstoneElider
}

// MakeRangeKeySpanCompactor creates a new compactor for range key spans.
func MakeRangeKeySpanCompactor(
cmp base.Compare, equal base.Equal, snapshots Snapshots, elision TombstoneElision,
cmp base.Compare, suffixCmp base.CompareSuffixes, snapshots Snapshots, elision TombstoneElision,
) RangeKeySpanCompactor {
c := RangeKeySpanCompactor{
cmp: cmp,
equal: equal,
suffixCmp: suffixCmp,
snapshots: snapshots,
}
c.elider.Init(cmp, elision)
Expand Down Expand Up @@ -131,7 +131,7 @@ func (c *RangeKeySpanCompactor) Compact(span, output *keyspan.Span) {
}
if y > start {
keysDst := output.Keys[usedLen:cap(output.Keys)]
rangekey.Coalesce(c.cmp, c.equal, span.Keys[start:y], &keysDst)
rangekey.Coalesce(c.suffixCmp, span.Keys[start:y], &keysDst)
if y == len(span.Keys) {
// This is the last snapshot stripe. Unsets and deletes can be elided.
keysDst = c.elideInLastStripe(span.Start, span.End, keysDst)
Expand All @@ -143,7 +143,7 @@ func (c *RangeKeySpanCompactor) Compact(span, output *keyspan.Span) {
}
if y < len(span.Keys) {
keysDst := output.Keys[usedLen:cap(output.Keys)]
rangekey.Coalesce(c.cmp, c.equal, span.Keys[y:], &keysDst)
rangekey.Coalesce(c.suffixCmp, span.Keys[y:], &keysDst)
keysDst = c.elideInLastStripe(span.Start, span.End, keysDst)
usedLen += len(keysDst)
output.Keys = append(output.Keys, keysDst...)
Expand Down
2 changes: 1 addition & 1 deletion internal/compact/spans_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ func TestRangeKeySpanCompactor(t *testing.T) {

c = MakeRangeKeySpanCompactor(
base.DefaultComparer.Compare,
base.DefaultComparer.Equal,
base.DefaultComparer.CompareSuffixes,
s,
ElideTombstonesOutsideOf(keyRanges),
)
Expand Down
Loading

0 comments on commit 4f6c198

Please sign in to comment.