Skip to content

Commit

Permalink
ingest: require boundary keys without suffix for external ingest with…
Browse files Browse the repository at this point in the history
… suffix

Require that the smallest and largest keys for the external file have
no suffix when we are doing suffix replacement. This is sufficient for
our use case and makes things easier to reason about.

We also improve and correct a few comments around suffix replacement.
  • Loading branch information
RaduBerinde committed Feb 16, 2024
1 parent 4c68270 commit a634274
Show file tree
Hide file tree
Showing 4 changed files with 42 additions and 10 deletions.
17 changes: 16 additions & 1 deletion ingest.go
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,15 @@ func ingestLoad1External(
if e.HasRangeKey {
return nil, errors.New("pebble: range keys not supported in external files")
}
if len(e.SyntheticSuffix) > 0 {
if n := opts.Comparer.Split(e.SmallestUserKey); n != len(e.SmallestUserKey) {
return nil, errors.New("pebble: synthetic suffix is set but smallest key has suffix")
}
if n := opts.Comparer.Split(e.LargestUserKey); n != len(e.LargestUserKey) {
return nil, errors.New("pebble: synthetic suffix is set but largest key has suffix")
}
}

// Don't load table stats. Doing a round trip to shared storage, one SST
// at a time is not worth it as it slows down ingestion.
meta := &fileMetadata{
Expand Down Expand Up @@ -1119,8 +1128,14 @@ type ExternalFile struct {
// SyntheticPrefix must be a prefix of both SmallestUserKey and LargestUserKey.
ContentPrefix, SyntheticPrefix []byte
// SyntheticSuffix will replace the suffix of every key in the file during
// iteration. Note that the file itself is not modifed, rather, every key
// iteration. Note that the file itself is not modified, rather, every key
// returned by an iterator will have the synthetic suffix.
//
// The SyntheticSuffix must sort before any non-empty suffixes in the backing
// sstable.
//
// If SyntheticSuffix is set, then SmallestUserKey and LargestUserKey must not
// have suffixes.
SyntheticSuffix []byte
}

Expand Down
4 changes: 0 additions & 4 deletions sstable/block_property.go
Original file line number Diff line number Diff line change
Expand Up @@ -489,10 +489,6 @@ func (b *BlockIntervalFilter) SyntheticSuffixIntersects(prop []byte, suffix []by
if err != nil {
return false, err
}
if newLower < i.upper {
// This isn't an =< comparator because the original i.upper bound is exclusive.
return false, base.CorruptionErrorf(fmt.Sprintf("the synthetic suffix %d is the less than the property upper bound %d", newLower, i.upper))
}
newInterval := interval{lower: newLower, upper: newUpper}
return newInterval.intersects(b.filterInterval), nil
}
Expand Down
22 changes: 21 additions & 1 deletion sstable/block_property_test_utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
package sstable

import (
"fmt"
"math"

"github.com/cockroachdb/pebble/internal/base"
Expand Down Expand Up @@ -35,7 +36,26 @@ func NewTestKeysBlockPropertyCollector() BlockPropertyCollector {
// and keys with suffixes within the range [filterMin, filterMax). For keys with
// suffixes outside the range, iteration is nondeterministic.
func NewTestKeysBlockPropertyFilter(filterMin, filterMax uint64) *BlockIntervalFilter {
return NewBlockIntervalFilter(testKeysBlockPropertyName, filterMin, filterMax, nil)
return NewBlockIntervalFilter(testKeysBlockPropertyName, filterMin, filterMax, testKeysBlockIntervalSyntheticReplacer{})
}

var _ BlockIntervalSyntheticReplacer = testKeysBlockIntervalSyntheticReplacer{}

type testKeysBlockIntervalSyntheticReplacer struct{}

// AdjustIntervalWithSyntheticSuffix is part of the
// BlockIntervalSyntheticReplacer interface.
func (sr testKeysBlockIntervalSyntheticReplacer) AdjustIntervalWithSyntheticSuffix(
lower uint64, upper uint64, suffix []byte,
) (adjustedLower uint64, adjustedUpper uint64, err error) {
decoded, err := testkeys.ParseSuffix(suffix)
if err != nil {
return 0, 0, err
}
if uint64(decoded) < upper {
panic(fmt.Sprintf("the synthetic suffix %d is less than the property upper bound %d", decoded, upper))
}
return uint64(decoded), uint64(decoded) + 1, nil
}

// NewTestKeysMaskingFilter constructs a TestKeysMaskingFilter that implements
Expand Down
9 changes: 5 additions & 4 deletions sstable/reader.go
Original file line number Diff line number Diff line change
Expand Up @@ -158,10 +158,11 @@ func (c *cacheOpts) writerApply(w *Writer) {
}

// SyntheticSuffix will replace every suffix of every key surfaced during block
// iteration. The client should only initiate a reader with SuffixReplacement if:
//
// (1) no two keys in the sst share the same prefix
// (2) pebble.Compare(replacementSuffix,originalSuffix) > 0, for all keys
// iteration. A synthetic suffix can be used if:
// 1. no two keys in the sst share the same prefix; and
// 2. pebble.Compare(prefix + replacementSuffix, prefix + originalSuffix) < 0,
// for all keys in the backing sst which have a suffix (i.e. originalSuffix
// is not empty).
type SyntheticSuffix []byte

// rawTombstonesOpt is a Reader open option for specifying that range
Expand Down

0 comments on commit a634274

Please sign in to comment.