Skip to content

Commit

Permalink
remove support for prefix replacement
Browse files Browse the repository at this point in the history
Remove `PrefixReplacement` in favor of `SyntheticPrefix`. The general
prefix replacement turns out to be too difficult so we will instead
strip the prefix when writing out the backup files.
  • Loading branch information
RaduBerinde committed Mar 13, 2024
1 parent 760f3f5 commit f4855ad
Show file tree
Hide file tree
Showing 20 changed files with 124 additions and 805 deletions.
16 changes: 8 additions & 8 deletions compaction.go
Original file line number Diff line number Diff line change
Expand Up @@ -2444,14 +2444,14 @@ func (d *DB) runCopyCompaction(
// a new FileNum. This has the potential of making the block cache less
// effective, however.
newMeta := &fileMetadata{
Size: inputMeta.Size,
CreationTime: inputMeta.CreationTime,
SmallestSeqNum: inputMeta.SmallestSeqNum,
LargestSeqNum: inputMeta.LargestSeqNum,
Stats: inputMeta.Stats,
PrefixReplacement: inputMeta.PrefixReplacement,
Virtual: inputMeta.Virtual,
SyntheticSuffix: inputMeta.SyntheticSuffix,
Size: inputMeta.Size,
CreationTime: inputMeta.CreationTime,
SmallestSeqNum: inputMeta.SmallestSeqNum,
LargestSeqNum: inputMeta.LargestSeqNum,
Stats: inputMeta.Stats,
Virtual: inputMeta.Virtual,
SyntheticPrefix: inputMeta.SyntheticPrefix,
SyntheticSuffix: inputMeta.SyntheticSuffix,
}
if inputMeta.HasPointKeys {
newMeta.ExtendPointKeyBounds(c.cmp, inputMeta.SmallestPointKey, inputMeta.LargestPointKey)
Expand Down
7 changes: 1 addition & 6 deletions data_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1294,7 +1294,7 @@ func runIngestExternalCmd(
usageErr := func(info interface{}) {
t.Helper()
td.Fatalf(t, "error parsing %q: %v; "+
"usage: obj bounds=(smallest,largest) [size=x] [prefix-replace=(from,to)] [synthetic-prefix=prefix] [synthetic-suffix=suffix]",
"usage: obj bounds=(smallest,largest) [size=x] [synthetic-prefix=prefix] [synthetic-suffix=suffix]",
line, info,
)
}
Expand Down Expand Up @@ -1328,11 +1328,6 @@ func runIngestExternalCmd(
nArgs(1)
arg.Scan(t, 0, &ef.Size)

case "prefix-replace":
nArgs(2)
ef.ContentPrefix = []byte(arg.Vals[0])
ef.SyntheticPrefix = []byte(arg.Vals[1])

case "synthetic-prefix":
nArgs(1)
ef.SyntheticPrefix = []byte(arg.Vals[0])
Expand Down
17 changes: 4 additions & 13 deletions ingest.go
Original file line number Diff line number Diff line change
Expand Up @@ -232,12 +232,7 @@ func ingestLoad1External(
)
}

if len(e.SyntheticPrefix) != 0 {
meta.PrefixReplacement = &sstable.PrefixReplacement{
ContentPrefix: e.ContentPrefix,
SyntheticPrefix: e.SyntheticPrefix,
}
}
meta.SyntheticPrefix = e.SyntheticPrefix
meta.SyntheticSuffix = e.SyntheticSuffix

if err := meta.Validate(opts.Comparer.Compare, opts.Comparer.FormatKey); err != nil {
Expand Down Expand Up @@ -1153,15 +1148,11 @@ type ExternalFile struct {
// ingestion.
HasPointKey, HasRangeKey bool

// ContentPrefix and SyntheticPrefix denote a prefix replacement rule causing
// a file, in which all keys have prefix ContentPrefix, to appear whenever it
// is accessed as if those keys all instead have prefix SyntheticPrefix.
// SyntheticPrefix will prepend this suffix to all keys in the file during
// iteration. Note that the backing file itself is not modified.
//
// SyntheticPrefix must be a prefix of both Bounds.Start and Bounds.End.
//
// NB: If the SyntheticPrefix is non-empty and the ContentPrefix is empty,
// then the read path will conduct block level prefix synthesis.
ContentPrefix, SyntheticPrefix []byte
SyntheticPrefix []byte

// SyntheticSuffix will replace the suffix of every key in the file during
// iteration. Note that the file itself is not modified, rather, every key
Expand Down
32 changes: 14 additions & 18 deletions internal/manifest/version.go
Original file line number Diff line number Diff line change
Expand Up @@ -270,10 +270,11 @@ type FileMetadata struct {
// Virtual is true if the FileMetadata belongs to a virtual sstable.
Virtual bool

// PrefixReplacement is used for virtual files where the backing file has a
// different prefix on its keys than the span in which it is being exposed.
PrefixReplacement *sstable.PrefixReplacement
// SyntheticPrefix is used to prepend a prefix to all keys; used for some virtual
// tables.
SyntheticPrefix sstable.SyntheticPrefix

// SyntheticSuffix overrides all suffixes in a table; used for some virtual tables.
SyntheticSuffix sstable.SyntheticSuffix
}

Expand All @@ -293,14 +294,10 @@ func (m *FileMetadata) SyntheticSeqNum() sstable.SyntheticSeqNum {

// IterTransforms returns an sstable.IterTransforms that has SyntheticSeqNum set as needed.
func (m *FileMetadata) IterTransforms() sstable.IterTransforms {
var syntheticPrefix []byte
if m.PrefixReplacement != nil && !m.PrefixReplacement.UsePrefixReplacementIterator() {
syntheticPrefix = m.PrefixReplacement.SyntheticPrefix
}
return sstable.IterTransforms{
SyntheticSeqNum: m.SyntheticSeqNum(),
SyntheticSuffix: m.SyntheticSuffix,
SyntheticPrefix: syntheticPrefix,
SyntheticPrefix: m.SyntheticPrefix,
}
}

Expand Down Expand Up @@ -353,13 +350,12 @@ type VirtualFileMeta struct {
// sstable reader.
func (m VirtualFileMeta) VirtualReaderParams(isShared bool) sstable.VirtualReaderParams {
return sstable.VirtualReaderParams{
Lower: m.Smallest,
Upper: m.Largest,
FileNum: m.FileNum,
IsSharedIngested: isShared && m.SyntheticSeqNum() != 0,
Size: m.Size,
BackingSize: m.FileBacking.Size,
PrefixReplacement: m.PrefixReplacement,
Lower: m.Smallest,
Upper: m.Largest,
FileNum: m.FileNum,
IsSharedIngested: isShared && m.SyntheticSeqNum() != 0,
Size: m.Size,
BackingSize: m.FileBacking.Size,
}
}

Expand Down Expand Up @@ -865,14 +861,14 @@ func (m *FileMetadata) Validate(cmp Compare, formatKey base.FormatKey) error {
return base.CorruptionErrorf("file metadata FileBacking not set")
}

if m.PrefixReplacement != nil {
if m.SyntheticPrefix.IsSet() {
if !m.Virtual {
return base.CorruptionErrorf("prefix replacement rule set with non-virtual file")
}
if !bytes.HasPrefix(m.Smallest.UserKey, m.PrefixReplacement.SyntheticPrefix) {
if !bytes.HasPrefix(m.Smallest.UserKey, m.SyntheticPrefix) {
return base.CorruptionErrorf("virtual file with prefix replacement rules has smallest key with a different prefix: %s", m.Smallest.Pretty(formatKey))
}
if !bytes.HasPrefix(m.Largest.UserKey, m.PrefixReplacement.SyntheticPrefix) {
if !bytes.HasPrefix(m.Largest.UserKey, m.SyntheticPrefix) {
return base.CorruptionErrorf("virtual file with prefix replacement rules has largest key with a different prefix: %s", m.Largest.Pretty(formatKey))
}
}
Expand Down
22 changes: 12 additions & 10 deletions internal/manifest/version_edit.go
Original file line number Diff line number Diff line change
Expand Up @@ -342,8 +342,8 @@ func (v *VersionEdit) Decode(r io.Reader) error {
virtual bool
backingFileNum uint64
}{}
var virtualPrefix *sstable.PrefixReplacement
var syntheticSuffix []byte
var syntheticPrefix sstable.SyntheticPrefix
var syntheticSuffix sstable.SyntheticSuffix
if tag == tagNewFile4 || tag == tagNewFile5 {
for {
customTag, err := d.readUvarint()
Expand Down Expand Up @@ -385,18 +385,19 @@ func (v *VersionEdit) Decode(r io.Reader) error {
}

case customTagPrefixRewrite:
// We used to have a content prefix; we no longer use it.
content, err := d.readBytes()
if err != nil {
return err
}
if len(content) > 0 {
return base.CorruptionErrorf("content prefix not supported")
}
synthetic, err := d.readBytes()
if err != nil {
return err
}
virtualPrefix = &sstable.PrefixReplacement{
ContentPrefix: content,
SyntheticPrefix: synthetic,
}
syntheticPrefix = synthetic

case customTagSuffixRewrite:
if syntheticSuffix, err = d.readBytes(); err != nil {
Expand All @@ -421,7 +422,7 @@ func (v *VersionEdit) Decode(r io.Reader) error {
LargestSeqNum: largestSeqNum,
MarkedForCompaction: markedForCompaction,
Virtual: virtualState.virtual,
PrefixReplacement: virtualPrefix,
SyntheticPrefix: syntheticPrefix,
SyntheticSuffix: syntheticSuffix,
}
if tag != tagNewFile5 { // no range keys present
Expand Down Expand Up @@ -705,10 +706,11 @@ func (v *VersionEdit) Encode(w io.Writer) error {
e.writeUvarint(customTagVirtual)
e.writeUvarint(uint64(x.Meta.FileBacking.DiskFileNum))
}
if x.Meta.PrefixReplacement != nil {
if x.Meta.SyntheticPrefix != nil {
e.writeUvarint(customTagPrefixRewrite)
e.writeBytes(x.Meta.PrefixReplacement.ContentPrefix)
e.writeBytes(x.Meta.PrefixReplacement.SyntheticPrefix)
// We used to have a content prefix; we no longer use it.
e.writeBytes(nil)
e.writeBytes(x.Meta.SyntheticPrefix)
}
if x.Meta.SyntheticSuffix != nil {
e.writeUvarint(customTagSuffixRewrite)
Expand Down
16 changes: 6 additions & 10 deletions internal/manifest/version_edit_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ import (
"github.com/cockroachdb/errors"
"github.com/cockroachdb/pebble/internal/base"
"github.com/cockroachdb/pebble/record"
"github.com/cockroachdb/pebble/sstable"
"github.com/kr/pretty"
"github.com/stretchr/testify/require"
)
Expand All @@ -45,15 +44,12 @@ func checkRoundTrip(e0 VersionEdit) error {
func TestVERoundTripAndAccumulate(t *testing.T) {
cmp := base.DefaultComparer.Compare
m1 := (&FileMetadata{
FileNum: 810,
Size: 8090,
CreationTime: 809060,
SmallestSeqNum: 9,
LargestSeqNum: 11,
PrefixReplacement: &sstable.PrefixReplacement{
ContentPrefix: []byte("before"),
SyntheticPrefix: []byte("after"),
},
FileNum: 810,
Size: 8090,
CreationTime: 809060,
SmallestSeqNum: 9,
LargestSeqNum: 11,
SyntheticPrefix: []byte("after"),
SyntheticSuffix: []byte("foo"),
}).ExtendPointKeyBounds(
cmp,
Expand Down
7 changes: 4 additions & 3 deletions lsm_view.go
Original file line number Diff line number Diff line change
Expand Up @@ -181,10 +181,11 @@ func (b *lsmViewBuilder) tableDetails(
outf("virtual; backed by %s (%ssize: %s)", m.FileBacking.DiskFileNum, backingInfo, humanize.Bytes.Uint64(m.FileBacking.Size))
}
outf("seqnums: %d - %d", m.SmallestSeqNum, m.LargestSeqNum)
if p := m.PrefixReplacement; p != nil {
outf("prefix replacement: %s -> %s", p.ContentPrefix, p.SyntheticPrefix)
if m.SyntheticPrefix.IsSet() {
// Note: we are abusing the key formatter by passing just the prefix.
outf("synthetic prefix: %s", b.fmtKey(m.SyntheticPrefix))
}
if len(m.SyntheticSuffix) > 0 {
if m.SyntheticSuffix.IsSet() {
// Note: we are abusing the key formatter by passing just the suffix.
outf("synthetic suffix: %s", b.fmtKey(m.SyntheticSuffix))
}
Expand Down
28 changes: 14 additions & 14 deletions metamorphic/build.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ func writeSSTForIngestion(
rangeKeyIter keyspan.FragmentIterator,
uniquePrefixes bool,
syntheticSuffix sstable.SyntheticSuffix,
prefixChange *sstable.PrefixReplacement,
syntheticPrefix sstable.SyntheticPrefix,
writable objstorage.Writable,
targetFMV pebble.FormatMajorVersion,
) (*sstable.WriterMetadata, error) {
Expand All @@ -47,11 +47,11 @@ func writeSSTForIngestion(
defer rangeKeyIterCloser.Close()

outputKey := func(key []byte) []byte {
if prefixChange == nil && !syntheticSuffix.IsSet() {
if !syntheticPrefix.IsSet() && !syntheticSuffix.IsSet() {
return slices.Clone(key)
}
if prefixChange != nil {
key = prefixChange.Apply(key)
if syntheticPrefix.IsSet() {
key = syntheticPrefix.Apply(key)
}
if syntheticSuffix.IsSet() {
n := t.opts.Comparer.Split(key)
Expand Down Expand Up @@ -179,7 +179,7 @@ func buildForIngest(
iter, rangeDelIter, rangeKeyIter,
false, /* uniquePrefixes */
nil, /* syntheticSuffix */
nil, /* prefixChange */
nil, /* syntheticPrefix */
writable,
db.FormatMajorVersion(),
)
Expand All @@ -195,14 +195,14 @@ func buildForIngestExternalEmulation(
externalObjID objID,
bounds pebble.KeyRange,
syntheticSuffix sstable.SyntheticSuffix,
prefixChange *sstable.PrefixReplacement,
syntheticPrefix sstable.SyntheticPrefix,
i int,
) (path string, _ *sstable.WriterMetadata) {
path = t.opts.FS.PathJoin(t.tmpDir, fmt.Sprintf("ext%d-%d", dbID.slot(), i))
f, err := t.opts.FS.Create(path)
panicIfErr(err)

reader, pointIter, rangeDelIter, rangeKeyIter := openExternalObj(t, externalObjID, bounds, prefixChange)
reader, pointIter, rangeDelIter, rangeKeyIter := openExternalObj(t, externalObjID, bounds, syntheticPrefix)
defer reader.Close()

writable := objstorageprovider.NewFileWritable(f)
Expand All @@ -215,7 +215,7 @@ func buildForIngestExternalEmulation(
pointIter, rangeDelIter, rangeKeyIter,
uniquePrefixes,
syntheticSuffix,
prefixChange,
syntheticPrefix,
writable,
t.minFMV(),
)
Expand All @@ -226,7 +226,7 @@ func buildForIngestExternalEmulation(
}

func openExternalObj(
t *Test, externalObjID objID, bounds pebble.KeyRange, prefixChange *sstable.PrefixReplacement,
t *Test, externalObjID objID, bounds pebble.KeyRange, syntheticPrefix sstable.SyntheticPrefix,
) (
reader *sstable.Reader,
pointIter base.InternalIterator,
Expand All @@ -243,9 +243,9 @@ func openExternalObj(

start := bounds.Start
end := bounds.End
if prefixChange != nil {
start = prefixChange.Invert(start)
end = prefixChange.Invert(end)
if syntheticPrefix.IsSet() {
start = syntheticPrefix.Invert(start)
end = syntheticPrefix.Invert(end)
}
pointIter, err = reader.NewIter(sstable.NoTransforms, start, end)
panicIfErr(err)
Expand Down Expand Up @@ -277,9 +277,9 @@ func openExternalObj(
// externalObjIsEmpty returns true if the given external object has no point or
// range keys withing the given bounds.
func externalObjIsEmpty(
t *Test, externalObjID objID, bounds pebble.KeyRange, prefixChange *sstable.PrefixReplacement,
t *Test, externalObjID objID, bounds pebble.KeyRange, syntheticPrefix sstable.SyntheticPrefix,
) bool {
reader, pointIter, rangeDelIter, rangeKeyIter := openExternalObj(t, externalObjID, bounds, prefixChange)
reader, pointIter, rangeDelIter, rangeKeyIter := openExternalObj(t, externalObjID, bounds, syntheticPrefix)
defer reader.Close()
defer closeIters(pointIter, rangeDelIter, rangeKeyIter)

Expand Down
26 changes: 6 additions & 20 deletions metamorphic/generator.go
Original file line number Diff line number Diff line change
Expand Up @@ -1314,28 +1314,14 @@ func (g *generator) writerIngestExternalFiles() {
if g.cmp(start, end) == 0 {
end = objEnd
}
// Randomly set up prefix change.
var prefixChange *sstable.PrefixReplacement
// Randomly set up synthetic prefix.
var syntheticPrefix sstable.SyntheticPrefix
// We can only use a synthetic prefix if we don't have range dels.
// TODO(radu): we will want to support this at some point.
if !g.keyManager.objKeyMeta(id).hasRangeDels && g.rng.Intn(2) == 0 {
prefixChange = &sstable.PrefixReplacement{
SyntheticPrefix: randBytes(g.rng, 1, 5),
}
// TODO(radu): fix prefix replacement implementation or remove
// ContentPrefix altogether.
if false {
prefixLen := 0
limit := min(len(start), len(end))
for ; prefixLen < limit && start[prefixLen] == end[prefixLen]; prefixLen++ {
}
prefixLen = g.rng.Intn(prefixLen + 1)
if prefixLen > 0 {
prefixChange.ContentPrefix = start[:prefixLen]
}
}
start = prefixChange.Apply(start)
end = prefixChange.Apply(end)
syntheticPrefix = randBytes(g.rng, 1, 5)
start = syntheticPrefix.Apply(start)
end = syntheticPrefix.Apply(end)
}

objs[i] = externalObjWithBounds{
Expand All @@ -1344,7 +1330,7 @@ func (g *generator) writerIngestExternalFiles() {
Start: start,
End: end,
},
prefixChange: prefixChange,
syntheticPrefix: syntheticPrefix,
}
}

Expand Down
4 changes: 2 additions & 2 deletions metamorphic/key_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -302,8 +302,8 @@ func (k *keyManager) KeysForExternalIngest(obj externalObjWithBounds) []keyMeta
var res []keyMeta
for _, km := range k.SortedKeysForObj(obj.externalObjID) {
// Apply prefix and suffix changes, then check the bounds.
if obj.prefixChange != nil {
km.key = obj.prefixChange.Apply(km.key)
if obj.syntheticPrefix.IsSet() {
km.key = obj.syntheticPrefix.Apply(km.key)
}
if obj.syntheticSuffix.IsSet() {
n := k.comparer.Split(km.key)
Expand Down
Loading

0 comments on commit f4855ad

Please sign in to comment.