diff --git a/pkg/ccl/storageccl/engineccl/batch_repr.go b/pkg/ccl/storageccl/engineccl/batch_repr.go index bb2406d4b1a7..bb79bd74d3c4 100644 --- a/pkg/ccl/storageccl/engineccl/batch_repr.go +++ b/pkg/ccl/storageccl/engineccl/batch_repr.go @@ -84,5 +84,5 @@ func VerifyBatchRepr( } defer iter.Close() - return storage.ComputeStatsForRange(iter, start.Key, end.Key, nowNanos) + return storage.ComputeStatsGo(iter, start.Key, end.Key, nowNanos) } diff --git a/pkg/ccl/storageccl/import.go b/pkg/ccl/storageccl/import.go index 1a9e6466e79b..5577b7436812 100644 --- a/pkg/ccl/storageccl/import.go +++ b/pkg/ccl/storageccl/import.go @@ -142,8 +142,6 @@ func evalImport(ctx context.Context, cArgs batcheval.CommandArgs) (*roachpb.Impo } defer cArgs.EvalCtx.GetLimiters().ConcurrentImportRequests.Finish() - // The sstables only contain MVCC data and no intents, so using an MVCC - // iterator is sufficient. var iters []storage.SimpleMVCCIterator for _, file := range args.Files { log.VEventf(ctx, 2, "import file %s %s", file.Path, args.Key) diff --git a/pkg/ccl/workloadccl/format/sstable.go b/pkg/ccl/workloadccl/format/sstable.go index c36a753b2442..fefe05bd6ae3 100644 --- a/pkg/ccl/workloadccl/format/sstable.go +++ b/pkg/ccl/workloadccl/format/sstable.go @@ -83,7 +83,7 @@ func ToSSTable(t workload.Table, tableID descpb.ID, ts time.Time) ([]byte, error for kvBatch := range kvCh { for _, kv := range kvBatch.KVs { mvccKey := storage.MVCCKey{Timestamp: sstTS, Key: kv.Key} - if err := sw.PutMVCC(mvccKey, kv.Value.RawBytes); err != nil { + if err := sw.Put(mvccKey, kv.Value.RawBytes); err != nil { return err } } diff --git a/pkg/cli/debug.go b/pkg/cli/debug.go index 6d771595487a..a73eebdeddf2 100644 --- a/pkg/cli/debug.go +++ b/pkg/cli/debug.go @@ -316,7 +316,7 @@ func runDebugRangeData(cmd *cobra.Command, args []string) error { return err } - iter := rditer.NewReplicaEngineDataIterator(&desc, db, debugCtx.replicated) + iter := rditer.NewReplicaDataIterator(&desc, db, debugCtx.replicated, false /* seekEnd */) defer iter.Close() results := 0 for ; ; iter.Next() { @@ -325,7 +325,10 @@ func runDebugRangeData(cmd *cobra.Command, args []string) error { } else if !ok { break } - kvserver.PrintEngineKeyValue(iter.UnsafeKey(), iter.UnsafeValue()) + kvserver.PrintKeyValue(storage.MVCCKeyValue{ + Key: iter.Key(), + Value: iter.Value(), + }) results++ if results == debugCtx.maxResults { break @@ -458,14 +461,9 @@ Decode and print a hexadecimal-encoded key-value pair. isTS := bytes.HasPrefix(bs[0], keys.TimeseriesPrefix) k, err := storage.DecodeMVCCKey(bs[0]) if err != nil { - // - Could be an EngineKey. - // - Older versions of the consistency checker give you diffs with a raw_key that - // is already a roachpb.Key, so make a half-assed attempt to support both. + // Older versions of the consistency checker give you diffs with a raw_key that + // is already a roachpb.Key, so make a half-assed attempt to support both. if !isTS { - if k, ok := storage.DecodeEngineKey(bs[0]); ok { - kvserver.PrintEngineKeyValue(k, bs[1]) - return nil - } fmt.Printf("unable to decode key: %v, assuming it's a roachpb.Key with fake timestamp;\n"+ "if the result below looks like garbage, then it likely is:\n\n", err) } diff --git a/pkg/kv/bulk/sst_batcher.go b/pkg/kv/bulk/sst_batcher.go index 23d9ecbf732a..f027f944ee92 100644 --- a/pkg/kv/bulk/sst_batcher.go +++ b/pkg/kv/bulk/sst_batcher.go @@ -387,7 +387,7 @@ func AddSSTable( var stats enginepb.MVCCStats if (ms == enginepb.MVCCStats{}) { - stats, err = storage.ComputeStatsForRange(iter, start, end, now.UnixNano()) + stats, err = storage.ComputeStatsGo(iter, start, end, now.UnixNano()) if err != nil { return 0, errors.Wrapf(err, "computing stats for SST [%s, %s)", start, end) } @@ -437,7 +437,7 @@ func AddSSTable( return err } - right.stats, err = storage.ComputeStatsForRange( + right.stats, err = storage.ComputeStatsGo( iter, right.start, right.end, now.UnixNano(), ) if err != nil { diff --git a/pkg/kv/kvserver/batcheval/cmd_add_sstable.go b/pkg/kv/kvserver/batcheval/cmd_add_sstable.go index a8439dd853b7..77bad7472680 100644 --- a/pkg/kv/kvserver/batcheval/cmd_add_sstable.go +++ b/pkg/kv/kvserver/batcheval/cmd_add_sstable.go @@ -32,8 +32,6 @@ func init() { } // EvalAddSSTable evaluates an AddSSTable command. -// NB: These sstables do not contain intents/locks, so the code below only -// needs to deal with MVCCKeys. func EvalAddSSTable( ctx context.Context, readWriter storage.ReadWriter, cArgs CommandArgs, _ roachpb.Response, ) (result.Result, error) { @@ -91,7 +89,7 @@ func EvalAddSSTable( if args.MVCCStats == nil || verifyFastPath { log.VEventf(ctx, 2, "computing MVCCStats for SSTable [%s,%s)", mvccStartKey.Key, mvccEndKey.Key) - computed, err := storage.ComputeStatsForRange( + computed, err := storage.ComputeStatsGo( dataIter, mvccStartKey.Key, mvccEndKey.Key, h.Timestamp.WallTime) if err != nil { return result.Result{}, errors.Wrap(err, "computing SSTable MVCC stats") @@ -181,6 +179,7 @@ func EvalAddSSTable( ms.Add(stats) + // TODO(sumeer): use EngineIterator and replace the Put hack below. if args.IngestAsWrites { log.VEventf(ctx, 2, "ingesting SST (%d keys/%d bytes) via regular write batch", stats.KeyCount, len(args.Data)) dataIter.SeekGE(storage.MVCCKey{Key: keys.MinKey}) diff --git a/pkg/kv/kvserver/batcheval/cmd_add_sstable_test.go b/pkg/kv/kvserver/batcheval/cmd_add_sstable_test.go index e4f982a671de..41135cd23a75 100644 --- a/pkg/kv/kvserver/batcheval/cmd_add_sstable_test.go +++ b/pkg/kv/kvserver/batcheval/cmd_add_sstable_test.go @@ -392,7 +392,7 @@ func TestAddSSTableMVCCStats(t *testing.T) { beforeStats := func() enginepb.MVCCStats { iter := e.NewMVCCIterator(storage.MVCCKeyAndIntentsIterKind, storage.IterOptions{UpperBound: roachpb.KeyMax}) defer iter.Close() - beforeStats, err := storage.ComputeStatsForRange(iter, roachpb.KeyMin, roachpb.KeyMax, 10) + beforeStats, err := storage.ComputeStatsGo(iter, roachpb.KeyMin, roachpb.KeyMax, 10) if err != nil { t.Fatalf("%+v", err) } @@ -443,7 +443,7 @@ func TestAddSSTableMVCCStats(t *testing.T) { afterStats := func() enginepb.MVCCStats { iter := e.NewMVCCIterator(storage.MVCCKeyAndIntentsIterKind, storage.IterOptions{UpperBound: roachpb.KeyMax}) defer iter.Close() - afterStats, err := storage.ComputeStatsForRange(iter, roachpb.KeyMin, roachpb.KeyMax, 10) + afterStats, err := storage.ComputeStatsGo(iter, roachpb.KeyMin, roachpb.KeyMax, 10) if err != nil { t.Fatalf("%+v", err) } @@ -526,7 +526,7 @@ func TestAddSSTableDisallowShadowing(t *testing.T) { } defer dataIter.Close() - stats, err := storage.ComputeStatsForRange(dataIter, startKey, endKey, 0) + stats, err := storage.ComputeStatsGo(dataIter, startKey, endKey, 0) if err != nil { t.Fatalf("%+v", err) } diff --git a/pkg/kv/kvserver/batcheval/cmd_clear_range.go b/pkg/kv/kvserver/batcheval/cmd_clear_range.go index 1400ccfbb283..dba416b7d0ae 100644 --- a/pkg/kv/kvserver/batcheval/cmd_clear_range.go +++ b/pkg/kv/kvserver/batcheval/cmd_clear_range.go @@ -86,19 +86,15 @@ func ClearRange( // instead of using a range tombstone (inefficient for small ranges). if total := statsDelta.Total(); total < ClearRangeBytesThreshold { log.VEventf(ctx, 2, "delta=%d < threshold=%d; using non-range clear", total, ClearRangeBytesThreshold) - iter := readWriter.NewEngineIterator(storage.IterOptions{UpperBound: to}) - valid, err := iter.SeekEngineKeyGE(storage.EngineKey{Key: from}) - for ; valid; valid, err = iter.NextEngineKey() { - var k storage.EngineKey - if k, err = iter.UnsafeEngineKey(); err != nil { - break + if err := readWriter.MVCCIterate(from, to, storage.MVCCKeyAndIntentsIterKind, func(kv storage.MVCCKeyValue) error { + if kv.Key.Timestamp.IsEmpty() { + // It can be an intent or an inline MVCCMetadata -- we have no idea. + // TODO(sumeer): cannot clear separated intents in this manner. Write the iteration code + // here instead of using Reader.MVCCIterate. + return readWriter.ClearUnversioned(kv.Key.Key) } - if err = readWriter.ClearEngineKey(k); err != nil { - return result.Result{}, err - } - } - iter.Close() - if err != nil { + return readWriter.ClearMVCC(kv.Key) + }); err != nil { return result.Result{}, err } return pd, nil diff --git a/pkg/kv/kvserver/batcheval/cmd_clear_range_test.go b/pkg/kv/kvserver/batcheval/cmd_clear_range_test.go index 63689d85f3a6..8072bf3d5bea 100644 --- a/pkg/kv/kvserver/batcheval/cmd_clear_range_test.go +++ b/pkg/kv/kvserver/batcheval/cmd_clear_range_test.go @@ -33,9 +33,27 @@ type wrappedBatch struct { clearRangeCount int } -func (wb *wrappedBatch) ClearEngineKey(key storage.EngineKey) error { +// TODO(sbhola): narrow the calls where we increment counters to +// make this test stricter. + +func (wb *wrappedBatch) ClearMVCC(key storage.MVCCKey) error { + wb.clearCount++ + return wb.Batch.ClearMVCC(key) +} + +func (wb *wrappedBatch) ClearUnversioned(key roachpb.Key) error { + wb.clearCount++ + return wb.Batch.ClearUnversioned(key) +} + +func (wb *wrappedBatch) ClearIntent(key roachpb.Key) error { wb.clearCount++ - return wb.Batch.ClearEngineKey(key) + return wb.Batch.ClearIntent(key) +} + +func (wb *wrappedBatch) ClearRawRange(start, end roachpb.Key) error { + wb.clearRangeCount++ + return wb.Batch.ClearRawRange(start, end) } func (wb *wrappedBatch) ClearMVCCRangeAndIntents(start, end roachpb.Key) error { @@ -43,6 +61,11 @@ func (wb *wrappedBatch) ClearMVCCRangeAndIntents(start, end roachpb.Key) error { return wb.Batch.ClearMVCCRangeAndIntents(start, end) } +func (wb *wrappedBatch) ClearMVCCRange(start, end storage.MVCCKey) error { + wb.clearRangeCount++ + return wb.Batch.ClearMVCCRange(start, end) +} + // TestCmdClearRangeBytesThreshold verifies that clear range resorts to // clearing keys individually if under the bytes threshold and issues a // clear range command to the batch otherwise. diff --git a/pkg/kv/kvserver/batcheval/cmd_revert_range_test.go b/pkg/kv/kvserver/batcheval/cmd_revert_range_test.go index 9261d37c6c8a..e97319ac3b1f 100644 --- a/pkg/kv/kvserver/batcheval/cmd_revert_range_test.go +++ b/pkg/kv/kvserver/batcheval/cmd_revert_range_test.go @@ -44,7 +44,7 @@ func getStats(t *testing.T, reader storage.Reader) enginepb.MVCCStats { t.Helper() iter := reader.NewMVCCIterator(storage.MVCCKeyAndIntentsIterKind, storage.IterOptions{UpperBound: roachpb.KeyMax}) defer iter.Close() - s, err := storage.ComputeStatsForRange(iter, roachpb.KeyMin, roachpb.KeyMax, 1100) + s, err := storage.ComputeStatsGo(iter, roachpb.KeyMin, roachpb.KeyMax, 1100) if err != nil { t.Fatalf("%+v", err) } diff --git a/pkg/kv/kvserver/client_merge_test.go b/pkg/kv/kvserver/client_merge_test.go index 7d537c383efc..8dcd00c6efe7 100644 --- a/pkg/kv/kvserver/client_merge_test.go +++ b/pkg/kv/kvserver/client_merge_test.go @@ -3116,7 +3116,7 @@ func TestStoreRangeMergeRaftSnapshot(t *testing.T) { // Construct SST #1 through #3 as numbered above, but only ultimately // keep the 3rd one. keyRanges := rditer.MakeReplicatedKeyRanges(inSnap.State.Desc) - it := rditer.NewReplicaEngineDataIterator(inSnap.State.Desc, sendingEng, true /* replicatedOnly */) + it := rditer.NewReplicaDataIterator(inSnap.State.Desc, sendingEng, true /* replicatedOnly */, false /* seekEnd */) defer it.Close() // Write a range deletion tombstone to each of the SSTs then put in the // kv entries from the sender of the snapshot. @@ -3142,7 +3142,7 @@ func TestStoreRangeMergeRaftSnapshot(t *testing.T) { expectedSSTs = append(expectedSSTs, sstFile.Data()) break } - if err := sst.PutEngineKey(it.Key(), it.Value()); err != nil { + if err := sst.Put(it.Key(), it.Value()); err != nil { return err } } diff --git a/pkg/kv/kvserver/consistency_queue_test.go b/pkg/kv/kvserver/consistency_queue_test.go index fb087e4a053b..83dab00736e5 100644 --- a/pkg/kv/kvserver/consistency_queue_test.go +++ b/pkg/kv/kvserver/consistency_queue_test.go @@ -442,7 +442,7 @@ func TestCheckConsistencyInconsistent(t *testing.T) { iter := cpEng.NewMVCCIterator(storage.MVCCKeyAndIntentsIterKind, storage.IterOptions{UpperBound: []byte("\xff")}) defer iter.Close() - ms, err := storage.ComputeStatsForRange(iter, roachpb.KeyMin, roachpb.KeyMax, 0 /* nowNanos */) + ms, err := storage.ComputeStatsGo(iter, roachpb.KeyMin, roachpb.KeyMax, 0 /* nowNanos */) assert.NoError(t, err) assert.NotZero(t, ms.KeyBytes) diff --git a/pkg/kv/kvserver/debug_print.go b/pkg/kv/kvserver/debug_print.go index ff8246eb5052..c17939085e02 100644 --- a/pkg/kv/kvserver/debug_print.go +++ b/pkg/kv/kvserver/debug_print.go @@ -86,7 +86,6 @@ func tryRangeDescriptor(kv storage.MVCCKeyValue) (string, error) { return descStr(desc), nil } -// tryIntent does not look at the key. func tryIntent(kv storage.MVCCKeyValue) (string, error) { if len(kv.Value) == 0 { return "", errors.New("empty") @@ -375,20 +374,3 @@ func (s *stringifyWriteBatch) String() string { } return fmt.Sprintf("failed to stringify write batch (%x): %s", s.Data, err) } - -// PrintEngineKeyValue attempts to print the given key-value pair. -func PrintEngineKeyValue(k storage.EngineKey, v []byte) { - if k.IsMVCCKey() { - if key, err := k.ToMVCCKey(); err == nil { - PrintKeyValue(storage.MVCCKeyValue{Key: key, Value: v}) - return - } - } - var sb strings.Builder - fmt.Fprintf(&sb, "%s %x (%#x): ", k.Key, k.Version, k.Encode()) - if out, err := tryIntent(storage.MVCCKeyValue{Value: v}); err == nil { - sb.WriteString(out) - } else { - fmt.Fprintf(&sb, "%x", v) - } -} diff --git a/pkg/kv/kvserver/gc/gc_iterator.go b/pkg/kv/kvserver/gc/gc_iterator.go index efdf4c0215fa..2b7ed66292b1 100644 --- a/pkg/kv/kvserver/gc/gc_iterator.go +++ b/pkg/kv/kvserver/gc/gc_iterator.go @@ -17,10 +17,10 @@ import ( "github.com/cockroachdb/cockroach/pkg/util/bufalloc" ) -// gcIterator wraps an rditer.ReplicaMVCCDataIterator which it reverse iterates for +// gcIterator wraps an rditer.ReplicaDataIterator which it reverse iterates for // the purpose of discovering gc-able replicated data. type gcIterator struct { - it *rditer.ReplicaMVCCDataIterator + it *rditer.ReplicaDataIterator done bool err error buf gcIteratorRingBuf @@ -28,7 +28,7 @@ type gcIterator struct { func makeGCIterator(desc *roachpb.RangeDescriptor, snap storage.Reader) gcIterator { return gcIterator{ - it: rditer.NewReplicaMVCCDataIterator(desc, snap, + it: rditer.NewReplicaDataIterator(desc, snap, true /* replicatedOnly */, true /* seekEnd */), } } diff --git a/pkg/kv/kvserver/gc/gc_old_test.go b/pkg/kv/kvserver/gc/gc_old_test.go index 085587a6bd2c..558036d27e80 100644 --- a/pkg/kv/kvserver/gc/gc_old_test.go +++ b/pkg/kv/kvserver/gc/gc_old_test.go @@ -50,7 +50,7 @@ func runGCOld( cleanupTxnIntentsAsyncFn CleanupTxnIntentsAsyncFunc, ) (Info, error) { - iter := rditer.NewReplicaMVCCDataIterator(desc, snap, + iter := rditer.NewReplicaDataIterator(desc, snap, true /* replicatedOnly */, false /* seekEnd */) defer iter.Close() diff --git a/pkg/kv/kvserver/rditer/replica_data_iter.go b/pkg/kv/kvserver/rditer/replica_data_iter.go index b3048f0c1f7b..89e45a47a732 100644 --- a/pkg/kv/kvserver/rditer/replica_data_iter.go +++ b/pkg/kv/kvserver/rditer/replica_data_iter.go @@ -17,58 +17,34 @@ import ( "github.com/cockroachdb/cockroach/pkg/util/bufalloc" ) -// KeyRange is a helper struct for the ReplicaMVCCDataIterator and -// ReplicaEngineDataIterator. +// KeyRange is a helper struct for the ReplicaDataIterator. // TODO(sumeer): change these to roachpb.Key since the timestamp is -// always empty and the code below assumes that fact. +// always empty. type KeyRange struct { Start, End storage.MVCCKey } -// ReplicaMVCCDataIterator provides a complete iteration over MVCC or unversioned -// (which can be made to look like an MVCCKey) key / value -// rows in a range, including system-local metadata and user data. +// ReplicaDataIterator provides a complete iteration over all key / value +// rows in a range, including all system-local metadata and user data. // The ranges keyRange slice specifies the key ranges which comprise -// the range's data. This cannot be used to iterate over keys that are not -// representable as MVCCKeys, except when such non-MVCCKeys are limited to -// intents, which can be made to look like interleaved MVCCKeys. Most callers -// want the real keys, and should use ReplicaEngineDataIterator. +// all of the range's data. // -// A ReplicaMVCCDataIterator provides a subset of the engine.MVCCIterator interface. +// A ReplicaDataIterator provides a subset of the engine.MVCCIterator interface. // -// TODO(tschottdorf): the API is awkward. By default, ReplicaMVCCDataIterator uses +// TODO(tschottdorf): the API is awkward. By default, ReplicaDataIterator uses // a byte allocator which needs to be reset manually using `ResetAllocator`. // This is problematic as it requires of every user careful tracking of when // to call that method; some just never call it and pull the whole replica // into memory. Use of an allocator should be opt-in. // -// TODO(sumeer): merge with ReplicaEngineDataIterator. We can use an EngineIterator -// for MVCC key ranges and convert from EngineKey to MVCCKey. -type ReplicaMVCCDataIterator struct { +// TODO(sumeer): Should return EngineKeys, to handle separated lock table. +type ReplicaDataIterator struct { curIndex int ranges []KeyRange it storage.MVCCIterator a bufalloc.ByteAllocator } -// ReplicaEngineDataIterator is like ReplicaMVCCDataIterator, but iterates -// using the general EngineKeys. It provides a subset of the engine.EngineIterator -// interface. -// -// TODO(tschottdorf): the API is awkward. By default, ReplicaMVCCDataIterator uses -// a byte allocator which needs to be reset manually using `ResetAllocator`. -// This is problematic as it requires of every user careful tracking of when -// to call that method; some just never call it and pull the whole replica -// into memory. Use of an allocator should be opt-in. -type ReplicaEngineDataIterator struct { - curIndex int - ranges []KeyRange - it storage.EngineIterator - valid bool - err error - a bufalloc.ByteAllocator -} - // MakeAllKeyRanges returns all key ranges for the given Range. func MakeAllKeyRanges(d *roachpb.RangeDescriptor) []KeyRange { return []KeyRange{ @@ -138,19 +114,18 @@ func MakeUserKeyRange(d *roachpb.RangeDescriptor) KeyRange { } } -// NewReplicaMVCCDataIterator creates a ReplicaMVCCDataIterator for the given replica. -// TODO(sumeer): narrow this interface since only gcIterator uses -// ReplicaMVCCDataIterator. -func NewReplicaMVCCDataIterator( +// NewReplicaDataIterator creates a ReplicaDataIterator for the given replica. +func NewReplicaDataIterator( d *roachpb.RangeDescriptor, reader storage.Reader, replicatedOnly bool, seekEnd bool, -) *ReplicaMVCCDataIterator { +) *ReplicaDataIterator { + // TODO(sumeer): use an EngineIterator. it := reader.NewMVCCIterator(storage.MVCCKeyAndIntentsIterKind, storage.IterOptions{UpperBound: d.EndKey.AsRawKey()}) rangeFunc := MakeAllKeyRanges if replicatedOnly { rangeFunc = MakeReplicatedKeyRanges } - ri := &ReplicaMVCCDataIterator{ + ri := &ReplicaDataIterator{ ranges: rangeFunc(d), it: it, } @@ -163,27 +138,27 @@ func NewReplicaMVCCDataIterator( } // seekStart seeks the iterator to the start of its data range. -func (ri *ReplicaMVCCDataIterator) seekStart() { +func (ri *ReplicaDataIterator) seekStart() { ri.curIndex = 0 ri.it.SeekGE(ri.ranges[ri.curIndex].Start) ri.advance() } // seekEnd seeks the iterator to the end of its data range. -func (ri *ReplicaMVCCDataIterator) seekEnd() { +func (ri *ReplicaDataIterator) seekEnd() { ri.curIndex = len(ri.ranges) - 1 ri.it.SeekLT(ri.ranges[ri.curIndex].End) ri.retreat() } // Close the underlying iterator. -func (ri *ReplicaMVCCDataIterator) Close() { +func (ri *ReplicaDataIterator) Close() { ri.curIndex = len(ri.ranges) ri.it.Close() } // Next advances to the next key in the iteration. -func (ri *ReplicaMVCCDataIterator) Next() { +func (ri *ReplicaDataIterator) Next() { ri.it.Next() ri.advance() } @@ -191,7 +166,7 @@ func (ri *ReplicaMVCCDataIterator) Next() { // advance moves the iterator forward through the ranges until a valid // key is found or the iteration is done and the iterator becomes // invalid. -func (ri *ReplicaMVCCDataIterator) advance() { +func (ri *ReplicaDataIterator) advance() { for { if ok, _ := ri.Valid(); ok && ri.it.UnsafeKey().Less(ri.ranges[ri.curIndex].End) { return @@ -206,13 +181,13 @@ func (ri *ReplicaMVCCDataIterator) advance() { } // Prev advances the iterator one key backwards. -func (ri *ReplicaMVCCDataIterator) Prev() { +func (ri *ReplicaDataIterator) Prev() { ri.it.Prev() ri.retreat() } // retreat is the opposite of advance. -func (ri *ReplicaMVCCDataIterator) retreat() { +func (ri *ReplicaDataIterator) retreat() { for { if ok, _ := ri.Valid(); ok && ri.ranges[ri.curIndex].Start.Less(ri.it.UnsafeKey()) { return @@ -227,21 +202,21 @@ func (ri *ReplicaMVCCDataIterator) retreat() { } // Valid returns true if the iterator currently points to a valid value. -func (ri *ReplicaMVCCDataIterator) Valid() (bool, error) { +func (ri *ReplicaDataIterator) Valid() (bool, error) { ok, err := ri.it.Valid() ok = ok && ri.curIndex >= 0 && ri.curIndex < len(ri.ranges) return ok, err } // Key returns the current key. -func (ri *ReplicaMVCCDataIterator) Key() storage.MVCCKey { +func (ri *ReplicaDataIterator) Key() storage.MVCCKey { key := ri.it.UnsafeKey() ri.a, key.Key = ri.a.Copy(key.Key, 0) return key } // Value returns the current value. -func (ri *ReplicaMVCCDataIterator) Value() []byte { +func (ri *ReplicaDataIterator) Value() []byte { value := ri.it.UnsafeValue() ri.a, value = ri.a.Copy(value, 0) return value @@ -249,118 +224,17 @@ func (ri *ReplicaMVCCDataIterator) Value() []byte { // UnsafeKey returns the same value as Key, but the memory is invalidated on // the next call to {Next,Prev,Close}. -func (ri *ReplicaMVCCDataIterator) UnsafeKey() storage.MVCCKey { +func (ri *ReplicaDataIterator) UnsafeKey() storage.MVCCKey { return ri.it.UnsafeKey() } // UnsafeValue returns the same value as Value, but the memory is invalidated on // the next call to {Next,Prev,Close}. -func (ri *ReplicaMVCCDataIterator) UnsafeValue() []byte { +func (ri *ReplicaDataIterator) UnsafeValue() []byte { return ri.it.UnsafeValue() } -// ResetAllocator resets the ReplicaMVCCDataIterator's internal byte allocator. -func (ri *ReplicaMVCCDataIterator) ResetAllocator() { +// ResetAllocator resets the ReplicaDataIterator's internal byte allocator. +func (ri *ReplicaDataIterator) ResetAllocator() { ri.a = nil } - -// NewReplicaEngineDataIterator creates a ReplicaEngineDataIterator for the given replica. -func NewReplicaEngineDataIterator( - d *roachpb.RangeDescriptor, reader storage.Reader, replicatedOnly bool, -) *ReplicaEngineDataIterator { - it := reader.NewEngineIterator(storage.IterOptions{UpperBound: d.EndKey.AsRawKey()}) - - rangeFunc := MakeAllKeyRanges - if replicatedOnly { - rangeFunc = MakeReplicatedKeyRanges - } - ri := &ReplicaEngineDataIterator{ - ranges: rangeFunc(d), - it: it, - } - ri.seekStart() - return ri -} - -// seekStart seeks the iterator to the start of its data range. -func (ri *ReplicaEngineDataIterator) seekStart() { - ri.curIndex = 0 - ri.valid, ri.err = ri.it.SeekEngineKeyGE(storage.EngineKey{Key: ri.ranges[ri.curIndex].Start.Key}) - ri.advance() -} - -// Close the underlying iterator. -func (ri *ReplicaEngineDataIterator) Close() { - ri.valid = false - ri.it.Close() -} - -// Next advances to the next key in the iteration. -func (ri *ReplicaEngineDataIterator) Next() { - ri.valid, ri.err = ri.it.NextEngineKey() - ri.advance() -} - -// advance moves the iterator forward through the ranges until a valid -// key is found or the iteration is done and the iterator becomes -// invalid. -func (ri *ReplicaEngineDataIterator) advance() { - for ri.valid { - var k storage.EngineKey - k, ri.err = ri.it.UnsafeEngineKey() - if ri.err != nil { - ri.valid = false - return - } - if k.Key.Compare(ri.ranges[ri.curIndex].End.Key) < 0 { - return - } - ri.curIndex++ - if ri.curIndex < len(ri.ranges) { - ri.valid, ri.err = ri.it.SeekEngineKeyGE( - storage.EngineKey{Key: ri.ranges[ri.curIndex].Start.Key}) - } else { - ri.valid = false - return - } - } -} - -// Valid returns true if the iterator currently points to a valid value. -func (ri *ReplicaEngineDataIterator) Valid() (bool, error) { - return ri.valid, ri.err -} - -// Key returns the current key. -// TODO(sumeer): only used in tests. Remove method and allocator. -func (ri *ReplicaEngineDataIterator) Key() storage.EngineKey { - key := ri.UnsafeKey() - key, ri.a = key.CopyUsingAlloc(ri.a) - return key -} - -// Value returns the current value. -// TODO(sumeer): only used in tests. Remove method and allocator. -func (ri *ReplicaEngineDataIterator) Value() []byte { - value := ri.it.UnsafeValue() - ri.a, value = ri.a.Copy(value, 0) - return value -} - -// UnsafeKey returns the same value as Key, but the memory is invalidated on -// the next call to {Next,Close}. -func (ri *ReplicaEngineDataIterator) UnsafeKey() storage.EngineKey { - key, err := ri.it.UnsafeEngineKey() - if err != nil { - // If Valid(), we've already extracted an EngineKey earlier, - // when doing the key comparison, so this will not happen. - panic("method called on an invalid iter") - } - return key -} - -// UnsafeValue returns the same value as Value, but the memory is invalidated on -// the next call to {Next,Close}. -func (ri *ReplicaEngineDataIterator) UnsafeValue() []byte { - return ri.it.UnsafeValue() -} diff --git a/pkg/kv/kvserver/rditer/replica_data_iter_test.go b/pkg/kv/kvserver/rditer/replica_data_iter_test.go index 1ddacf3b3fe9..c7fb3da302c3 100644 --- a/pkg/kv/kvserver/rditer/replica_data_iter_test.go +++ b/pkg/kv/kvserver/rditer/replica_data_iter_test.go @@ -138,7 +138,7 @@ func verifyRDIter( }, hlc.Timestamp{WallTime: 42}) readWriter = spanset.NewReadWriterAt(readWriter, &spans, hlc.Timestamp{WallTime: 42}) } - iter := NewReplicaMVCCDataIterator(desc, readWriter, replicatedOnly, reverse /* seekEnd */) + iter := NewReplicaDataIterator(desc, readWriter, replicatedOnly, reverse /* seekEnd */) defer iter.Close() i := 0 if reverse { @@ -237,7 +237,7 @@ func TestReplicaDataIterator(t *testing.T) { // Verify that the replicated-only iterator ignores unreplicated keys. unreplicatedPrefix := keys.MakeRangeIDUnreplicatedPrefix(desc.RangeID) - iter := NewReplicaMVCCDataIterator(&desc, eng, + iter := NewReplicaDataIterator(&desc, eng, true /* replicatedOnly */, false /* seekEnd */) defer iter.Close() for ; ; iter.Next() { diff --git a/pkg/kv/kvserver/replica_consistency.go b/pkg/kv/kvserver/replica_consistency.go index ed700694c6ec..2c42f184d842 100644 --- a/pkg/kv/kvserver/replica_consistency.go +++ b/pkg/kv/kvserver/replica_consistency.go @@ -633,13 +633,13 @@ func (r *Replica) sha512( // all of the replicated key space. if !statsOnly { // TODO(sumeer): remember that this caller of MakeReplicatedKeyRanges does - // not want the lock table ranges since the iter has been constructed using - // MVCCKeyAndIntentsIterKind. By the time we have replicated locks + // not want the lock table ranges since it has already considered the + // intents earlier in this function. By the time we have replicated locks // other than exclusive locks, we will probably not have any interleaved - // intents so we could stop using MVCCKeyAndIntentsIterKind and + // intents so we could stop using MVCCKeyAndIntentsIterKind above and // consider all locks here. for _, span := range rditer.MakeReplicatedKeyRanges(&desc) { - spanMS, err := storage.ComputeStatsForRange( + spanMS, err := storage.ComputeStatsGo( iter, span.Start.Key, span.End.Key, 0 /* nowNanos */, visitor, ) if err != nil { diff --git a/pkg/kv/kvserver/replica_raftstorage.go b/pkg/kv/kvserver/replica_raftstorage.go index 71e32268f7e1..950a9de22756 100644 --- a/pkg/kv/kvserver/replica_raftstorage.go +++ b/pkg/kv/kvserver/replica_raftstorage.go @@ -464,7 +464,7 @@ type OutgoingSnapshot struct { // The RocksDB snapshot that will be streamed from. EngineSnap storage.Reader // The complete range iterator for the snapshot to stream. - Iter *rditer.ReplicaEngineDataIterator + Iter *rditer.ReplicaDataIterator // The replica state within the snapshot. State kvserverpb.ReplicaState // Allows access the the original Replica's sideloaded storage. Note that @@ -570,7 +570,8 @@ func snapshot( // Intentionally let this iterator and the snapshot escape so that the // streamer can send chunks from it bit by bit. - iter := rditer.NewReplicaEngineDataIterator(&desc, snap, true /* replicatedOnly */) + iter := rditer.NewReplicaDataIterator(&desc, snap, + true /* replicatedOnly */, false /* seekEnd */) return OutgoingSnapshot{ RaftEntryCache: eCache, @@ -725,6 +726,9 @@ func clearRangeData( return writer.ClearRawRange(start, end) } } else { + // TODO(sumeer): ClearRangeWithHeuristic uses MVCCKeyAndIntentsIterKind. But it is + // also going to be passed the lock table ranges explicitly. It should be using + // EngineIterator. clearRangeFn = storage.ClearRangeWithHeuristic } @@ -1100,6 +1104,7 @@ func (r *Replica) clearSubsumedReplicaDiskData( subsumedReplSSTFile := &storage.MemFile{} subsumedReplSST := storage.MakeIngestionSSTWriter(subsumedReplSSTFile) defer subsumedReplSST.Close() + // TODO(sumeer): ClearRangeWithHeuristic should use EngineIterator. if err := storage.ClearRangeWithHeuristic( r.store.Engine(), &subsumedReplSST, diff --git a/pkg/kv/kvserver/replica_test.go b/pkg/kv/kvserver/replica_test.go index 173c1af9dd19..e3693c985d2b 100644 --- a/pkg/kv/kvserver/replica_test.go +++ b/pkg/kv/kvserver/replica_test.go @@ -6990,8 +6990,8 @@ func TestReplicaDestroy(t *testing.T) { } }() - iter := rditer.NewReplicaEngineDataIterator(tc.repl.Desc(), tc.repl.store.Engine(), - false /* replicatedOnly */) + iter := rditer.NewReplicaDataIterator(tc.repl.Desc(), tc.repl.store.Engine(), + false /* replicatedOnly */, false /* seekEnd */) defer iter.Close() if ok, err := iter.Valid(); err != nil { t.Fatal(err) diff --git a/pkg/kv/kvserver/spanset/batch.go b/pkg/kv/kvserver/spanset/batch.go index 8a5e4b594824..3d98d7f35848 100644 --- a/pkg/kv/kvserver/spanset/batch.go +++ b/pkg/kv/kvserver/spanset/batch.go @@ -298,10 +298,6 @@ func (s spanSetReader) NewMVCCIterator( return NewIteratorAt(s.r.NewMVCCIterator(iterKind, opts), s.spans, s.ts) } -func (s spanSetReader) NewEngineIterator(opts storage.IterOptions) storage.EngineIterator { - panic("unsupported") -} - // GetDBEngine recursively searches for the underlying rocksDB engine. func GetDBEngine(reader storage.Reader, span roachpb.Span) storage.Reader { switch v := reader.(type) { @@ -373,10 +369,6 @@ func (s spanSetWriter) ClearIntent(key roachpb.Key) error { return s.w.ClearIntent(key) } -func (s spanSetWriter) ClearEngineKey(key storage.EngineKey) error { - panic("unsupported") -} - func (s spanSetWriter) checkAllowedRange(start, end roachpb.Key) error { if s.spansOnly { if err := s.spans.CheckAllowed(SpanReadWrite, roachpb.Span{Key: start, EndKey: end}); err != nil { @@ -452,10 +444,6 @@ func (s spanSetWriter) PutIntent(key roachpb.Key, value []byte) error { return s.w.PutIntent(key, value) } -func (s spanSetWriter) PutEngineKey(key storage.EngineKey, value []byte) error { - panic("unsupported") -} - func (s spanSetWriter) LogData(data []byte) error { return s.w.LogData(data) } @@ -512,8 +500,8 @@ type spanSetBatch struct { var _ storage.Batch = spanSetBatch{} -func (s spanSetBatch) SingleClearEngineKey(key storage.EngineKey) error { - return s.b.SingleClearEngineKey(key) +func (s spanSetBatch) SingleClearEngine(key storage.EngineKey) error { + return s.b.SingleClearEngine(key) } func (s spanSetBatch) Commit(sync bool) error { diff --git a/pkg/kv/kvserver/stateloader/stateloader.go b/pkg/kv/kvserver/stateloader/stateloader.go index 773b52791e98..d94ba874c523 100644 --- a/pkg/kv/kvserver/stateloader/stateloader.go +++ b/pkg/kv/kvserver/stateloader/stateloader.go @@ -290,7 +290,7 @@ func (rsl StateLoader) SetRangeAppliedState( RangeStats: newMS.ToPersistentStats(), } // The RangeAppliedStateKey is not included in stats. This is also reflected - // in C.MVCCComputeStats and ComputeStatsForRange. + // in C.MVCCComputeStats and ComputeStatsGo. ms := (*enginepb.MVCCStats)(nil) return storage.MVCCPutProto(ctx, readWriter, ms, rsl.RangeAppliedStateKey(), hlc.Timestamp{}, nil, &as) } @@ -400,7 +400,7 @@ func (rsl StateLoader) writeLegacyMVCCStatsInternal( // enlarges the size of the struct itself. This is mostly fine - we persist // MVCCStats under the RangeAppliedState key and don't account for the size of // the MVCCStats struct itself when doing so (we ignore the RangeAppliedState key - // in ComputeStatsForRange). This would not therefore not cause replica state divergence + // in ComputeStatsGo). This would not therefore not cause replica state divergence // in mixed version clusters (the enlarged struct does not contribute to a // persisted stats difference on disk because we're not accounting for the size of // the struct itself). diff --git a/pkg/kv/kvserver/store_snapshot.go b/pkg/kv/kvserver/store_snapshot.go index 4f3949f15ba6..6770080e0b69 100644 --- a/pkg/kv/kvserver/store_snapshot.go +++ b/pkg/kv/kvserver/store_snapshot.go @@ -162,7 +162,7 @@ func (msstw *multiSSTWriter) finalizeSST(ctx context.Context) error { return nil } -func (msstw *multiSSTWriter) Put(ctx context.Context, key storage.EngineKey, value []byte) error { +func (msstw *multiSSTWriter) Put(ctx context.Context, key storage.MVCCKey, value []byte) error { for msstw.keyRanges[msstw.currRange].End.Key.Compare(key.Key) <= 0 { // Finish the current SST, write to the file, and move to the next key // range. @@ -176,7 +176,7 @@ func (msstw *multiSSTWriter) Put(ctx context.Context, key storage.EngineKey, val if msstw.keyRanges[msstw.currRange].Start.Key.Compare(key.Key) > 0 { return crdberrors.AssertionFailedf("client error: expected %s to fall in one of %s", key.Key, msstw.keyRanges) } - if err := msstw.currSST.PutEngineKey(key, value); err != nil { + if err := msstw.currSST.Put(key, value); err != nil { return errors.Wrap(err, "failed to put in sst") } return nil @@ -246,7 +246,7 @@ func (kvSS *kvBatchSnapshotStrategy) Receive( if batchReader.BatchType() != storage.BatchTypeValue { return noSnap, crdberrors.AssertionFailedf("expected type %d, found type %d", storage.BatchTypeValue, batchReader.BatchType()) } - key, err := batchReader.EngineKey() + key, err := batchReader.MVCCKey() if err != nil { return noSnap, errors.Wrap(err, "failed to decode mvcc key") } @@ -334,13 +334,21 @@ func (kvSS *kvBatchSnapshotStrategy) Send( break } kvs++ + // TODO(sumeer): this is incorrect. We should be iterating using EngineIterator + // and Putting an EngineKey. unsafeKey := iter.UnsafeKey() unsafeValue := iter.UnsafeValue() if b == nil { b = kvSS.newBatch() } - if err := b.PutEngineKey(unsafeKey, unsafeValue); err != nil { - return 0, err + if unsafeKey.Timestamp.IsEmpty() { + if err := b.PutUnversioned(unsafeKey.Key, unsafeValue); err != nil { + return 0, err + } + } else { + if err := b.PutMVCC(unsafeKey, unsafeValue); err != nil { + return 0, err + } } if bLen := int64(b.Len()); bLen >= kvSS.batchSize { diff --git a/pkg/storage/batch.go b/pkg/storage/batch.go index 2c8069022109..e47f6e45e97e 100644 --- a/pkg/storage/batch.go +++ b/pkg/storage/batch.go @@ -275,15 +275,6 @@ func (r *RocksDBBatchReader) MVCCKey() (MVCCKey, error) { return decodeMVCCKey(r.Key()) } -// EngineKey returns the EngineKey for the current batch entry. -func (r *RocksDBBatchReader) EngineKey() (EngineKey, error) { - key, ok := DecodeEngineKey(r.Key()) - if !ok { - return key, errors.Errorf("invalid encoded engine key: %x", r.Key()) - } - return key, nil -} - // Value returns the value of the current batch entry. Value panics if the // BatchType is BatchTypeDeleted. func (r *RocksDBBatchReader) Value() []byte { diff --git a/pkg/storage/batch_test.go b/pkg/storage/batch_test.go index dc8cb5de8973..f387c4b0695b 100644 --- a/pkg/storage/batch_test.go +++ b/pkg/storage/batch_test.go @@ -98,7 +98,7 @@ func testBatchBasics(t *testing.T, writeOnly bool, commit func(e Engine, b Batch if err := e.PutUnversioned(mvccKey("d").Key, []byte("before")); err != nil { t.Fatal(err) } - if err := b.SingleClearEngineKey(EngineKey{Key: mvccKey("d").Key}); err != nil { + if err := b.SingleClearEngine(EngineKey{Key: mvccKey("d").Key}); err != nil { t.Fatal(err) } @@ -451,7 +451,7 @@ func TestBatchGet(t *testing.T) { if err := b.PutUnversioned(mvccKey("d").Key, []byte("before")); err != nil { t.Fatal(err) } - if err := b.SingleClearEngineKey(EngineKey{Key: mvccKey("d").Key}); err != nil { + if err := b.SingleClearEngine(EngineKey{Key: mvccKey("d").Key}); err != nil { t.Fatal(err) } if err := b.PutUnversioned(mvccKey("d").Key, []byte("after")); err != nil { @@ -1010,7 +1010,7 @@ func TestBatchDistinctPanics(t *testing.T) { func() { _ = batch.PutUnversioned(a.Key, nil) }, func() { _ = batch.Merge(a, nil) }, func() { _ = batch.ClearUnversioned(a.Key) }, - func() { _ = batch.SingleClearEngineKey(EngineKey{Key: a.Key}) }, + func() { _ = batch.SingleClearEngine(EngineKey{Key: a.Key}) }, func() { _ = batch.ApplyBatchRepr(nil, false) }, func() { _, _ = batch.MVCCGet(a) }, func() { _, _, _, _ = batch.MVCCGetProto(a, nil) }, diff --git a/pkg/storage/engine.go b/pkg/storage/engine.go index c4abbab3850a..2a2409184a9a 100644 --- a/pkg/storage/engine.go +++ b/pkg/storage/engine.go @@ -136,39 +136,35 @@ type MVCCIterator interface { // EngineIterator is an iterator over key-value pairs where the key is // an EngineKey. +//lint:ignore U1001 unused type EngineIterator interface { // Close frees up resources held by the iterator. Close() - // SeekEngineKeyGE advances the iterator to the first key in the engine - // which is >= the provided key. - SeekEngineKeyGE(key EngineKey) (valid bool, err error) - // SeekEngineKeyLT advances the iterator to the first key in the engine - // which is < the provided key. - SeekEngineKeyLT(key EngineKey) (valid bool, err error) - // NextEngineKey advances the iterator to the next key/value in the - // iteration. After this call, valid will be true if the iterator was not - // originally positioned at the last key. Note that unlike - // MVCCIterator.NextKey, this method does not skip other versions with the - // same EngineKey.Key. - // TODO(sumeer): change MVCCIterator.Next() to match the - // return values, change all its callers, and rename this - // to Next(). - NextEngineKey() (valid bool, err error) - // PrevEngineKey moves the iterator backward to the previous key/value in - // the iteration. After this call, valid will be true if the iterator was - // not originally positioned at the first key. - PrevEngineKey() (valid bool, err error) - // UnsafeEngineKey returns the same value as Key, but the memory is - // invalidated on the next call to {Next,NextKey,Prev,SeekGE,SeekLT,Close}. + // SeekGE advances the iterator to the first key in the engine which + // is >= the provided key. + SeekGE(key EngineKey) (valid bool, err error) + // SeekLT advances the iterator to the first key in the engine which + // is < the provided key. + SeekLT(key EngineKey) (valid bool, err error) + // Next advances the iterator to the next key/value in the + // iteration. After this call, valid will be true if the + // iterator was not originally positioned at the last key. + Next() (valid bool, err error) + // Prev moves the iterator backward to the previous key/value + // in the iteration. After this call, valid will be true if the + // iterator was not originally positioned at the first key. + Prev() (valid bool, err error) + // UnsafeKey returns the same value as Key, but the memory is invalidated on + // the next call to {Next,NextKey,Prev,SeekGE,SeekLT,Close}. // REQUIRES: latest positioning function returned valid=true. - UnsafeEngineKey() (EngineKey, error) + UnsafeKey() EngineKey // UnsafeValue returns the same value as Value, but the memory is // invalidated on the next call to {Next,NextKey,Prev,SeekGE,SeekLT,Close}. // REQUIRES: latest positioning function returned valid=true. UnsafeValue() []byte - // EngineKey returns the current key. + // Key returns the current key. // REQUIRES: latest positioning function returned valid=true. - EngineKey() (EngineKey, error) + Key() EngineKey // Value returns the current value as a byte slice. // REQUIRES: latest positioning function returned valid=true. Value() []byte @@ -292,10 +288,6 @@ type Reader interface { // engine. The caller must invoke MVCCIterator.Close() when finished // with the iterator to free resources. NewMVCCIterator(iterKind MVCCIterKind, opts IterOptions) MVCCIterator - // NewEngineIterator returns a new instance of an EngineIterator over this - // engine. The caller must invoke EngineIterator.Close() when finished - // with the iterator to free resources. - NewEngineIterator(opts IterOptions) EngineIterator } // Writer is the write interface to an engine's data. @@ -333,13 +325,6 @@ type Writer interface { // // It is safe to modify the contents of the arguments after it returns. ClearIntent(key roachpb.Key) error - // ClearEngineKey removes the item from the db with the given EngineKey. - // Note that clear actually removes entries from the storage engine. This is - // a general-purpose and low-level method that should be used sparingly, - // only when the other Clear* methods are not applicable. - // - // It is safe to modify the contents of the arguments after it returns. - ClearEngineKey(key EngineKey) error // ClearRawRange removes a set of entries, from start (inclusive) to end // (exclusive). It can be applied to a range consisting of MVCCKeys or the @@ -425,12 +410,6 @@ type Writer interface { // // It is safe to modify the contents of the arguments after Put returns. PutIntent(key roachpb.Key, value []byte) error - // PutEngineKey sets the given key to the value provided. This is a - // general-purpose and low-level method that should be used sparingly, - // only when the other Put* methods are not applicable. - // - // It is safe to modify the contents of the arguments after Put returns. - PutEngineKey(key EngineKey, value []byte) error // LogData adds the specified data to the RocksDB WAL. The data is // uninterpreted by RocksDB (i.e. not added to the memtable or sstables). @@ -556,7 +535,7 @@ type Engine interface { // Batch is the interface for batch specific operations. type Batch interface { ReadWriter - // SingleClearEngineKey removes the most recent write to the item from the db + // SingleClearEngine removes the most recent write to the item from the db // with the given key. Whether older writes of the item will come back // to life if not also removed with SingleClear is undefined. See the // following: @@ -570,7 +549,7 @@ type Batch interface { // TODO(sumeer): try to remove it from this exported interface. // // It is safe to modify the contents of the arguments after it returns. - SingleClearEngineKey(key EngineKey) error + SingleClearEngine(key EngineKey) error // Commit atomically applies any batched updates to the underlying // engine. This is a noop unless the batch was created via NewBatch(). If // sync is true, the batch is synchronously committed to disk. @@ -741,11 +720,10 @@ func WriteSyncNoop(ctx context.Context, eng Engine) error { } // ClearRangeWithHeuristic clears the keys from start (inclusive) to end -// (exclusive). Depending on the number of keys, it will either use ClearRawRange -// or clear individual keys. It works with EngineKeys, so don't expect it to -// find and clear separated intents if [start, end) refers to MVCC key space. +// (exclusive). Depending on the number of keys, it will either use ClearRange +// or ClearIterRange. func ClearRangeWithHeuristic(reader Reader, writer Writer, start, end roachpb.Key) error { - iter := reader.NewEngineIterator(IterOptions{UpperBound: end}) + iter := reader.NewMVCCIterator(MVCCKeyAndIntentsIterKind, IterOptions{UpperBound: end}) defer iter.Close() // It is expensive for there to be many range deletion tombstones in the same @@ -753,45 +731,43 @@ func ClearRangeWithHeuristic(reader Reader, writer Writer, start, end roachpb.Ke // sstable is accessed. So we avoid using range deletion unless there is some // minimum number of keys. The value here was pulled out of thin air. It might // be better to make this dependent on the size of the data being deleted. Or - // perhaps we should fix Pebble to handle large numbers of tombstones in an + // perhaps we should fix RocksDB to handle large numbers of tombstones in an // sstable better. Note that we are referring to storage-level tombstones here, // and not MVCC tombstones. const clearRangeMinKeys = 64 // Peek into the range to see whether it's large enough to justify - // ClearRawRange. Note that the work done here is bounded by + // ClearRange. Note that the work done here is bounded by // clearRangeMinKeys, so it will be fairly cheap even for large // ranges. // - // TODO(sumeer): Could add the iterated keys to the batch, so we don't have - // to do the scan again. If there are too many keys, this will mean a mix of - // point tombstones and range tombstone. + // TODO(bdarnell): Move this into ClearIterRange so we don't have + // to do this scan twice. count := 0 - valid, err := iter.SeekEngineKeyGE(EngineKey{Key: start}) - for valid { + iter.SeekGE(MakeMVCCMetadataKey(start)) + for { + valid, err := iter.Valid() + if err != nil { + return err + } + if !valid { + break + } count++ if count > clearRangeMinKeys { break } - valid, err = iter.NextEngineKey() - } - if err != nil { - return err + iter.Next() } + var err error if count > clearRangeMinKeys { - return writer.ClearRawRange(start, end) + err = writer.ClearRawRange(start, end) + } else { + err = writer.ClearIterRange(iter, start, end) } - valid, err = iter.SeekEngineKeyGE(EngineKey{Key: start}) - for valid { - var k EngineKey - if k, err = iter.UnsafeEngineKey(); err != nil { - break - } - if err = writer.ClearEngineKey(k); err != nil { - break - } - valid, err = iter.NextEngineKey() + if err != nil { + return err } - return err + return nil } var ingestDelayL0Threshold = settings.RegisterIntSetting( diff --git a/pkg/storage/engine_key.go b/pkg/storage/engine_key.go index b4207cb375af..1dc0a578dcfe 100644 --- a/pkg/storage/engine_key.go +++ b/pkg/storage/engine_key.go @@ -17,7 +17,6 @@ import ( "github.com/cockroachdb/cockroach/pkg/keys" "github.com/cockroachdb/cockroach/pkg/kv/kvserver/concurrency/lock" "github.com/cockroachdb/cockroach/pkg/roachpb" - "github.com/cockroachdb/cockroach/pkg/util/bufalloc" "github.com/cockroachdb/cockroach/pkg/util/uuid" "github.com/cockroachdb/errors" ) @@ -52,7 +51,7 @@ func (k EngineKey) Format(f fmt.State, c rune) { } // Encoding: -// Key + \x00 (sentinel) [+ Version + ] +// Key + \x00 (sentinel) [+ Version + ] // // The motivation for the sentinel is that we configure the underlying storage // engine (Pebble) with a Split function that can be used for constructing @@ -64,32 +63,6 @@ const ( suffixEncodedLengthLen = 1 ) -// Copy makes a copy of the key. -func (k EngineKey) Copy() EngineKey { - buf := make([]byte, len(k.Key)+len(k.Version)) - return k.copyUsingSizedBuf(buf) -} - -// CopyUsingAlloc makes a copy of the key using the given allocator. -func (k EngineKey) CopyUsingAlloc( - alloc bufalloc.ByteAllocator, -) (EngineKey, bufalloc.ByteAllocator) { - var buf []byte - alloc, buf = alloc.Alloc(len(k.Key)+len(k.Version), 0) - return k.copyUsingSizedBuf(buf), alloc -} - -func (k EngineKey) copyUsingSizedBuf(buf []byte) EngineKey { - copy(buf, k.Key) - k.Key = buf[:len(k.Key)] - if len(k.Version) > 0 { - versionCopy := buf[len(k.Key):] - copy(versionCopy, k.Version) - k.Version = versionCopy - } - return k -} - // EncodedLen returns the encoded length of k. func (k EngineKey) EncodedLen() int { n := len(k.Key) + suffixEncodedLengthLen @@ -124,11 +97,7 @@ func (k EngineKey) EncodeToBuf(buf []byte) []byte { func (k EngineKey) encodeToSizedBuf(buf []byte) { copy(buf, k.Key) pos := len(k.Key) - // The length of the suffix is the full encoded length (len(buf)) minus the - // length of the key minus the length of the sentinel. Note that the - // suffixLen is 0 when Version is empty, and when Version is non-empty, it - // is len(Version)+1. That is, it includes the length byte at the end. - suffixLen := len(buf) - pos - 1 + suffixLen := len(k.Version) if suffixLen > 0 { buf[pos] = 0 pos += sentinelLen @@ -194,11 +163,13 @@ func DecodeEngineKey(b []byte) (key EngineKey, ok bool) { if len(b) == 0 { return EngineKey{}, false } - // Last byte is the version length + 1 when there is a version, - // else it is 0. + // Last byte is the version length. versionLen := int(b[len(b)-1]) // keyPartEnd points to the sentinel byte. - keyPartEnd := len(b) - 1 - versionLen + keyPartEnd := len(b) - 1 + if versionLen > 0 { + keyPartEnd = len(b) - 1 - versionLen - 1 + } if keyPartEnd < 0 { return EngineKey{}, false } diff --git a/pkg/storage/engine_key_test.go b/pkg/storage/engine_key_test.go index 0630c7f27751..7ff10680c486 100644 --- a/pkg/storage/engine_key_test.go +++ b/pkg/storage/engine_key_test.go @@ -18,7 +18,6 @@ import ( "github.com/cockroachdb/cockroach/pkg/keys" "github.com/cockroachdb/cockroach/pkg/kv/kvserver/concurrency/lock" "github.com/cockroachdb/cockroach/pkg/roachpb" - "github.com/cockroachdb/cockroach/pkg/storage/enginepb" "github.com/cockroachdb/cockroach/pkg/util/hlc" "github.com/cockroachdb/cockroach/pkg/util/leaktest" "github.com/cockroachdb/cockroach/pkg/util/uuid" @@ -105,12 +104,6 @@ func TestMVCCAndEngineKeyEncodeDecode(t *testing.T) { keyDecoded, err := eKeyDecoded.ToMVCCKey() require.NoError(t, err) require.Equal(t, test.key, keyDecoded) - b3 := EncodeKey(test.key) - require.Equal(t, b3, b1) - k3, ts, ok := enginepb.SplitMVCCKey(b3) - require.True(t, ok) - require.Equal(t, k3, []byte(test.key.Key)) - require.Equal(t, ts, encodedTS) }) } } diff --git a/pkg/storage/mvcc.go b/pkg/storage/mvcc.go index eed099ae7353..4e714ab536bd 100644 --- a/pkg/storage/mvcc.go +++ b/pkg/storage/mvcc.go @@ -3360,7 +3360,7 @@ func MVCCGarbageCollect( } // For GCBytesAge, this requires keeping track of the previous key's - // timestamp (prevNanos). See ComputeStatsForRange for a more easily digested + // timestamp (prevNanos). See ComputeStatsGo for a more easily digested // and better commented version of this logic. The below block will set // prevNanos to the appropriate value and position the iterator at the // first garbage version. @@ -3584,7 +3584,7 @@ func willOverflow(a, b int64) bool { return math.MinInt64-b > a } -// ComputeStatsForRange scans the underlying engine from start to end keys and +// ComputeStatsGo scans the underlying engine from start to end keys and // computes stats counters based on the values. This method is used after a // range is split to recompute stats for each subrange. The start key is always // adjusted to avoid counting local keys in the event stats are being recomputed @@ -3592,6 +3592,12 @@ func willOverflow(a, b int64) bool { // specifies the wall time in nanoseconds since the epoch and is used to compute // the total age of all intents. // +// Most codepaths will be computing stats on a RocksDB iterator, which is +// implemented in c++, so iter.ComputeStats will save several cgo calls per kv +// processed. (Plus, on equal footing, the c++ implementation is slightly +// faster.) ComputeStatsGo is here for codepaths that have a pure-go +// implementation of SimpleMVCCIterator. +// // When optional callbacks are specified, they are invoked for each physical // key-value pair (i.e. not for implicit meta records), and iteration is aborted // on the first error returned from any of them. @@ -3599,7 +3605,7 @@ func willOverflow(a, b int64) bool { // Callbacks must copy any data they intend to hold on to. // // This implementation must match engine/db.cc:MVCCComputeStatsInternal. -func ComputeStatsForRange( +func ComputeStatsGo( iter SimpleMVCCIterator, start, end roachpb.Key, nowNanos int64, diff --git a/pkg/storage/mvcc_stats_test.go b/pkg/storage/mvcc_stats_test.go index 8ddbba84b6ff..b2bbb28c2d7b 100644 --- a/pkg/storage/mvcc_stats_test.go +++ b/pkg/storage/mvcc_stats_test.go @@ -1335,9 +1335,9 @@ var mvccStatsTests = []struct { }, }, { - name: "ComputeStatsForRange", + name: "ComputeStatsGo", fn: func(iter MVCCIterator, start, end roachpb.Key, nowNanos int64) (enginepb.MVCCStats, error) { - return ComputeStatsForRange(iter, start, end, nowNanos) + return ComputeStatsGo(iter, start, end, nowNanos) }, }, } diff --git a/pkg/storage/mvcc_test.go b/pkg/storage/mvcc_test.go index b79b267faf50..4aa9b5c8cdcf 100644 --- a/pkg/storage/mvcc_test.go +++ b/pkg/storage/mvcc_test.go @@ -2580,7 +2580,7 @@ func computeStats( t.Helper() iter := reader.NewMVCCIterator(MVCCKeyAndIntentsIterKind, IterOptions{UpperBound: to}) defer iter.Close() - s, err := ComputeStatsForRange(iter, from, to, nowNanos) + s, err := ComputeStatsGo(iter, from, to, nowNanos) if err != nil { t.Fatalf("%+v", err) } diff --git a/pkg/storage/pebble.go b/pkg/storage/pebble.go index 84c862809ab5..cc32e7be3333 100644 --- a/pkg/storage/pebble.go +++ b/pkg/storage/pebble.go @@ -724,15 +724,6 @@ func (p *Pebble) NewMVCCIterator(iterKind MVCCIterKind, opts IterOptions) MVCCIt return iter } -// NewEngineIterator implements the Engine interface. -func (p *Pebble) NewEngineIterator(opts IterOptions) EngineIterator { - iter := newPebbleIterator(p.db, opts) - if iter == nil { - panic("couldn't create a new iterator") - } - return iter -} - // ApplyBatchRepr implements the Engine interface. func (p *Pebble) ApplyBatchRepr(repr []byte, sync bool) error { // batch.SetRepr takes ownership of the underlying slice, so make a copy. @@ -769,14 +760,6 @@ func (p *Pebble) ClearIntent(key roachpb.Key) error { return p.clear(MVCCKey{Key: key}) } -// ClearEngineKey implements the Engine interface. -func (p *Pebble) ClearEngineKey(key EngineKey) error { - if len(key.Key) == 0 { - return emptyKeyError() - } - return p.db.Delete(key.Encode(), pebble.Sync) -} - func (p *Pebble) clear(key MVCCKey) error { if len(key.Key) == 0 { return emptyKeyError() @@ -843,14 +826,6 @@ func (p *Pebble) PutIntent(key roachpb.Key, value []byte) error { return p.put(MVCCKey{Key: key}, value) } -// PutEngineKey implements the Engine interface. -func (p *Pebble) PutEngineKey(key EngineKey, value []byte) error { - if len(key.Key) == 0 { - return emptyKeyError() - } - return p.db.Set(key.Encode(), value, pebble.Sync) -} - func (p *Pebble) put(key MVCCKey, value []byte) error { if len(key.Key) == 0 { return emptyKeyError() @@ -1175,19 +1150,10 @@ func (p *Pebble) CreateCheckpoint(dir string) error { } type pebbleReadOnly struct { - parent *Pebble - // The iterator reuse optimization in pebbleReadOnly is for servicing a - // BatchRequest, such that the iterators get reused across different - // requests in the batch. - // Reuse iterators for {normal,prefix} x {MVCCKey,EngineKey} iteration. We - // need separate iterators for EngineKey and MVCCKey iteration since - // iterators that make separated locks/intents look as interleaved need to - // use both simultaneously. - prefixIter pebbleIterator - normalIter pebbleIterator - prefixEngineIter pebbleIterator - normalEngineIter pebbleIterator - closed bool + parent *Pebble + prefixIter pebbleIterator + normalIter pebbleIterator + closed bool } var _ ReadWriter = &pebbleReadOnly{} @@ -1199,8 +1165,6 @@ func (p *pebbleReadOnly) Close() { p.closed = true p.prefixIter.destroy() p.normalIter.destroy() - p.prefixEngineIter.destroy() - p.normalEngineIter.destroy() } func (p *pebbleReadOnly) Closed() bool { @@ -1243,7 +1207,6 @@ func (p *pebbleReadOnly) MVCCIterate( return iterateOnReader(p, start, end, iterKind, f) } -// NewMVCCIterator implements the Engine interface. func (p *pebbleReadOnly) NewMVCCIterator(iterKind MVCCIterKind, opts IterOptions) MVCCIterator { if p.closed { panic("using a closed pebbleReadOnly") @@ -1273,31 +1236,6 @@ func (p *pebbleReadOnly) NewMVCCIterator(iterKind MVCCIterKind, opts IterOptions return iter } -// NewEngineIterator implements the Engine interface. -func (p *pebbleReadOnly) NewEngineIterator(opts IterOptions) EngineIterator { - if p.closed { - panic("using a closed pebbleReadOnly") - } - - iter := &p.normalEngineIter - if opts.Prefix { - iter = &p.prefixEngineIter - } - if iter.inuse { - panic("iterator already in use") - } - - if iter.iter != nil { - iter.setOptions(opts) - } else { - iter.init(p.parent.db, opts) - iter.reusable = true - } - - iter.inuse = true - return iter -} - // Writer methods are not implemented for pebbleReadOnly. Ideally, the code // could be refactored so that a Reader could be supplied to evaluateBatch @@ -1318,10 +1256,6 @@ func (p *pebbleReadOnly) ClearIntent(key roachpb.Key) error { panic("not implemented") } -func (p *pebbleReadOnly) ClearEngineKey(key EngineKey) error { - panic("not implemented") -} - func (p *pebbleReadOnly) ClearRawRange(start, end roachpb.Key) error { panic("not implemented") } @@ -1354,10 +1288,6 @@ func (p *pebbleReadOnly) PutIntent(key roachpb.Key, value []byte) error { panic("not implemented") } -func (p *pebbleReadOnly) PutEngineKey(key EngineKey, value []byte) error { - panic("not implemented") -} - func (p *pebbleReadOnly) LogData(data []byte) error { panic("not implemented") } @@ -1451,11 +1381,6 @@ func (p pebbleSnapshot) NewMVCCIterator(iterKind MVCCIterKind, opts IterOptions) return newPebbleIterator(p.snapshot, opts) } -// NewEngineIterator implements the Reader interface. -func (p pebbleSnapshot) NewEngineIterator(opts IterOptions) EngineIterator { - return newPebbleIterator(p.snapshot, opts) -} - func pebbleExportToSst( reader Reader, startKey, endKey roachpb.Key, diff --git a/pkg/storage/pebble_batch.go b/pkg/storage/pebble_batch.go index be8eba8fb79c..1a8c95b60362 100644 --- a/pkg/storage/pebble_batch.go +++ b/pkg/storage/pebble_batch.go @@ -22,24 +22,15 @@ import ( // Wrapper struct around a pebble.Batch. type pebbleBatch struct { - db *pebble.DB - batch *pebble.Batch - buf []byte - // The iterator reuse optimization in pebbleBatch is for servicing a - // BatchRequest, such that the iterators get reused across different - // requests in the batch. - // Reuse iterators for {normal,prefix} x {MVCCKey,EngineKey} iteration. We - // need separate iterators for EngineKey and MVCCKey iteration since - // iterators that make separated locks/intents look as interleaved need to - // use both simultaneously. - prefixIter pebbleIterator - normalIter pebbleIterator - prefixEngineIter pebbleIterator - normalEngineIter pebbleIterator - closed bool - isDistinct bool - distinctOpen bool - parentBatch *pebbleBatch + db *pebble.DB + batch *pebble.Batch + buf []byte + prefixIter pebbleIterator + normalIter pebbleIterator + closed bool + isDistinct bool + distinctOpen bool + parentBatch *pebbleBatch } var _ Batch = &pebbleBatch{} @@ -67,16 +58,6 @@ func newPebbleBatch(db *pebble.DB, batch *pebble.Batch) *pebbleBatch { upperBoundBuf: pb.normalIter.upperBoundBuf, reusable: true, }, - prefixEngineIter: pebbleIterator{ - lowerBoundBuf: pb.prefixIter.lowerBoundBuf, - upperBoundBuf: pb.prefixIter.upperBoundBuf, - reusable: true, - }, - normalEngineIter: pebbleIterator{ - lowerBoundBuf: pb.normalIter.lowerBoundBuf, - upperBoundBuf: pb.normalIter.upperBoundBuf, - reusable: true, - }, } return pb } @@ -91,8 +72,6 @@ func (p *pebbleBatch) Close() { // Destroy the iterators before closing the batch. p.prefixIter.destroy() p.normalIter.destroy() - p.prefixEngineIter.destroy() - p.normalEngineIter.destroy() if !p.isDistinct { _ = p.batch.Close() @@ -234,39 +213,6 @@ func (p *pebbleBatch) NewMVCCIterator(iterKind MVCCIterKind, opts IterOptions) M return iter } -// NewEngineIterator implements the Batch interface. -func (p *pebbleBatch) NewEngineIterator(opts IterOptions) EngineIterator { - if !opts.Prefix && len(opts.UpperBound) == 0 && len(opts.LowerBound) == 0 { - panic("iterator must set prefix or upper bound or lower bound") - } - - if !p.batch.Indexed() && !p.isDistinct { - panic("write-only batch") - } - if p.distinctOpen { - panic("distinct batch open") - } - - iter := &p.normalEngineIter - if opts.Prefix { - iter = &p.prefixEngineIter - } - if iter.inuse { - panic("iterator already in use") - } - - if iter.iter != nil { - iter.setOptions(opts) - } else if p.batch.Indexed() { - iter.init(p.batch, opts) - } else { - iter.init(p.db, opts) - } - - iter.inuse = true - return iter -} - // NewMVCCIterator implements the Batch interface. func (p *pebbleBatch) ApplyBatchRepr(repr []byte, sync bool) error { if p.distinctOpen { @@ -299,18 +245,6 @@ func (p *pebbleBatch) ClearIntent(key roachpb.Key) error { return p.clear(MVCCKey{Key: key}) } -// ClearEngineKey implements the Batch interface. -func (p *pebbleBatch) ClearEngineKey(key EngineKey) error { - if p.distinctOpen { - panic("distinct batch open") - } - if len(key.Key) == 0 { - return emptyKeyError() - } - p.buf = key.EncodeToBuf(p.buf[:0]) - return p.batch.Delete(p.buf, nil) -} - func (p *pebbleBatch) clear(key MVCCKey) error { if p.distinctOpen { panic("distinct batch open") @@ -323,8 +257,8 @@ func (p *pebbleBatch) clear(key MVCCKey) error { return p.batch.Delete(p.buf, nil) } -// SingleClearEngineKey implements the Batch interface. -func (p *pebbleBatch) SingleClearEngineKey(key EngineKey) error { +// SingleClearEngine implements the Batch interface. +func (p *pebbleBatch) SingleClearEngine(key EngineKey) error { if p.distinctOpen { panic("distinct batch open") } @@ -422,19 +356,6 @@ func (p *pebbleBatch) PutIntent(key roachpb.Key, value []byte) error { return p.put(MVCCKey{Key: key}, value) } -// PutEngineKey implements the Batch interface. -func (p *pebbleBatch) PutEngineKey(key EngineKey, value []byte) error { - if p.distinctOpen { - panic("distinct batch open") - } - if len(key.Key) == 0 { - return emptyKeyError() - } - - p.buf = key.EncodeToBuf(p.buf[:0]) - return p.batch.Set(p.buf, value, nil) -} - func (p *pebbleBatch) put(key MVCCKey, value []byte) error { if p.distinctOpen { panic("distinct batch open") diff --git a/pkg/storage/pebble_iterator.go b/pkg/storage/pebble_iterator.go index b93e673eb865..5f29bd78c1c4 100644 --- a/pkg/storage/pebble_iterator.go +++ b/pkg/storage/pebble_iterator.go @@ -20,18 +20,16 @@ import ( "github.com/cockroachdb/cockroach/pkg/storage/enginepb" "github.com/cockroachdb/cockroach/pkg/util/hlc" "github.com/cockroachdb/cockroach/pkg/util/protoutil" - "github.com/cockroachdb/errors" "github.com/cockroachdb/pebble" ) -// pebbleIterator is a wrapper around a pebble.Iterator that implements the -// MVCCIterator and EngineIterator interfaces. A single pebbleIterator -// should only be used in one of the two modes. +// pebbleIterator is a wrapper around a pebble.MVCCIterator that implements the +// MVCCIterator interface. type pebbleIterator struct { // Underlying iterator for the DB. iter *pebble.Iterator options pebble.IterOptions - // Reusable buffer for MVCCKey or EngineKey encoding. + // Reusable buffer for MVCC key encoding. keyBuf []byte // Buffers for copying iterator bounds to. Note that the underlying memory // is not GCed upon Close(), to reduce the number of overall allocations. We @@ -42,19 +40,18 @@ type pebbleIterator struct { upperBoundBuf [2][]byte curBuf int // Set to true to govern whether to call SeekPrefixGE or SeekGE. Skips - // SSTables based on MVCC/Engine key when true. + // SSTables based on MVCC key when true. prefix bool // If reusable is true, Close() does not actually close the underlying // iterator, but simply marks it as not inuse. Used by pebbleReadOnly. reusable bool inuse bool // Stat tracking the number of sstables encountered during time-bound - // iteration. Only used for MVCCIterator. + // iteration. timeBoundNumSSTables int } var _ MVCCIterator = &pebbleIterator{} -var _ EngineIterator = &pebbleIterator{} var pebbleIterPool = sync.Pool{ New: func() interface{} { @@ -63,7 +60,7 @@ var pebbleIterPool = sync.Pool{ } // Instantiates a new Pebble iterator, or gets one from the pool. -func newPebbleIterator(handle pebble.Reader, opts IterOptions) *pebbleIterator { +func newPebbleIterator(handle pebble.Reader, opts IterOptions) MVCCIterator { iter := pebbleIterPool.Get().(*pebbleIterator) iter.init(handle, opts) return iter @@ -87,11 +84,9 @@ func (p *pebbleIterator) init(handle pebble.Reader, opts IterOptions) { if opts.LowerBound != nil { // This is the same as - // p.options.LowerBound = EncodeKeyToBuf(p.lowerBoundBuf[0][:0], MVCCKey{Key: opts.LowerBound}) - // or EngineKey{Key: opts.LowerBound}.EncodeToBuf(...). - // Since we are encoding keys with an empty version anyway, we can just - // append the NUL byte instead of calling the above encode functions which - // will do the same thing. + // p.options.LowerBound = EncodeKeyToBuf(p.lowerBoundBuf[0][:0], MVCCKey{Key: opts.LowerBound}) . + // Since we are encoding zero-timestamp MVCC Keys anyway, we can just append + // the NUL byte instead of calling EncodeKey which will do the same thing. p.lowerBoundBuf[0] = append(p.lowerBoundBuf[0][:0], opts.LowerBound...) p.lowerBoundBuf[0] = append(p.lowerBoundBuf[0], 0x00) p.options.LowerBound = p.lowerBoundBuf[0] @@ -156,10 +151,8 @@ func (p *pebbleIterator) setOptions(opts IterOptions) { if opts.LowerBound != nil { // This is the same as // p.options.LowerBound = EncodeKeyToBuf(p.lowerBoundBuf[i][:0], MVCCKey{Key: opts.LowerBound}) . - // or EngineKey{Key: opts.LowerBound}.EncodeToBuf(...). - // Since we are encoding keys with an empty version anyway, we can just - // append the NUL byte instead of calling the above encode functions which - // will do the same thing. + // Since we are encoding zero-timestamp MVCC Keys anyway, we can just append + // the NUL byte instead of calling EncodeKey which will do the same thing. p.lowerBoundBuf[i] = append(p.lowerBoundBuf[i][:0], opts.LowerBound...) p.lowerBoundBuf[i] = append(p.lowerBoundBuf[i], 0x00) p.options.LowerBound = p.lowerBoundBuf[i] @@ -199,26 +192,9 @@ func (p *pebbleIterator) SeekGE(key MVCCKey) { } } -// SeekEngineKeyGE implements the EngineIterator interface. -func (p *pebbleIterator) SeekEngineKeyGE(key EngineKey) (valid bool, err error) { - p.keyBuf = key.EncodeToBuf(p.keyBuf[:0]) - var ok bool - if p.prefix { - ok = p.iter.SeekPrefixGE(p.keyBuf) - } else { - ok = p.iter.SeekGE(p.keyBuf) - } - // NB: A Pebble Iterator always returns ok==false when an error is - // present. - if ok { - return true, nil - } - return false, p.iter.Error() -} - // Valid implements the MVCCIterator interface. func (p *pebbleIterator) Valid() (bool, error) { - // NB: A Pebble Iterator always returns Valid()==false when an error is + // NB: A Pebble MVCCIterator always returns Valid()==false when an error is // present. If Valid() is true, there is no error. if ok := p.iter.Valid(); ok { return ok, nil @@ -231,17 +207,6 @@ func (p *pebbleIterator) Next() { p.iter.Next() } -// NextEngineKey implements the Engineterator interface. -func (p *pebbleIterator) NextEngineKey() (valid bool, err error) { - ok := p.iter.Next() - // NB: A Pebble Iterator always returns ok==false when an error is - // present. - if ok { - return true, nil - } - return false, p.iter.Error() -} - // NextKey implements the MVCCIterator interface. func (p *pebbleIterator) NextKey() { if valid, err := p.Valid(); err != nil || !valid { @@ -272,21 +237,12 @@ func (p *pebbleIterator) UnsafeKey() MVCCKey { return mvccKey } -// UnsafeEngineKey implements the EngineIterator interface. -func (p *pebbleIterator) UnsafeEngineKey() (EngineKey, error) { - engineKey, ok := DecodeEngineKey(p.iter.Key()) - if !ok { - return engineKey, errors.Errorf("invalid encoded engine key: %x", p.iter.Key()) - } - return engineKey, nil -} - -// UnsafeRawKey returns the raw key from the underlying pebble.Iterator. +// UnsafeRawKey returns the raw key from the underlying pebble.MVCCIterator. func (p *pebbleIterator) UnsafeRawKey() []byte { return p.iter.Key() } -// UnsafeValue implements the MVCCIterator and EngineIterator interfaces. +// UnsafeValue implements the MVCCIterator interface. func (p *pebbleIterator) UnsafeValue() []byte { if valid, err := p.Valid(); err != nil || !valid { return nil @@ -300,34 +256,11 @@ func (p *pebbleIterator) SeekLT(key MVCCKey) { p.iter.SeekLT(p.keyBuf) } -// SeekEngineKeyLT implements the EngineIterator interface. -func (p *pebbleIterator) SeekEngineKeyLT(key EngineKey) (valid bool, err error) { - p.keyBuf = key.EncodeToBuf(p.keyBuf[:0]) - ok := p.iter.SeekLT(p.keyBuf) - // NB: A Pebble Iterator always returns ok==false when an error is - // present. - if ok { - return true, nil - } - return false, p.iter.Error() -} - // Prev implements the MVCCIterator interface. func (p *pebbleIterator) Prev() { p.iter.Prev() } -// PrevEngineKey implements the EngineIterator interface. -func (p *pebbleIterator) PrevEngineKey() (valid bool, err error) { - ok := p.iter.Prev() - // NB: A Pebble Iterator always returns ok==false when an error is - // present. - if ok { - return true, nil - } - return false, p.iter.Error() -} - // Key implements the MVCCIterator interface. func (p *pebbleIterator) Key() MVCCKey { key := p.UnsafeKey() @@ -337,16 +270,7 @@ func (p *pebbleIterator) Key() MVCCKey { return key } -// EngineKey implements the EngineIterator interface. -func (p *pebbleIterator) EngineKey() (EngineKey, error) { - key, err := p.UnsafeEngineKey() - if err != nil { - return key, err - } - return key.Copy(), nil -} - -// Value implements the MVCCIterator and EngineIterator interfaces. +// Value implements the MVCCIterator interface. func (p *pebbleIterator) Value() []byte { value := p.UnsafeValue() valueCopy := make([]byte, len(value)) @@ -365,7 +289,7 @@ func (p *pebbleIterator) ValueProto(msg protoutil.Message) error { func (p *pebbleIterator) ComputeStats( start, end roachpb.Key, nowNanos int64, ) (enginepb.MVCCStats, error) { - return ComputeStatsForRange(p, start, end, nowNanos) + return ComputeStatsGo(p, start, end, nowNanos) } // Go-only version of IsValidSplitKey. Checks if the specified key is in diff --git a/pkg/storage/sst_writer.go b/pkg/storage/sst_writer.go index aeacf4be6413..1214bed251d5 100644 --- a/pkg/storage/sst_writer.go +++ b/pkg/storage/sst_writer.go @@ -139,19 +139,6 @@ func (fw *SSTWriter) PutIntent(key roachpb.Key, value []byte) error { return fw.put(MVCCKey{Key: key}, value) } -// PutEngineKey implements the Writer interface. -// An error is returned if it is not greater than any previously added entry -// (according to the comparator configured during writer creation). `Close` -// cannot have been called. -func (fw *SSTWriter) PutEngineKey(key EngineKey, value []byte) error { - if fw.fw == nil { - return errors.New("cannot call Put on a closed writer") - } - fw.DataSize += int64(len(key.Key)) + int64(len(value)) - fw.scratch = key.EncodeToBuf(fw.scratch[:0]) - return fw.fw.Set(fw.scratch, value) -} - // put puts a kv entry into the sstable being built. An error is returned if it // is not greater than any previously added entry (according to the comparator // configured during writer creation). `Close` cannot have been called. @@ -196,19 +183,6 @@ func (fw *SSTWriter) ClearIntent(key roachpb.Key) error { panic("ClearIntent is unsupported") } -// ClearEngineKey implements the Writer interface. An error is returned if it is -// not greater than any previous point key passed to this Writer (according to -// the comparator configured during writer creation). `Close` cannot have been -// called. -func (fw *SSTWriter) ClearEngineKey(key EngineKey) error { - if fw.fw == nil { - return errors.New("cannot call Clear on a closed writer") - } - fw.scratch = key.EncodeToBuf(fw.scratch[:0]) - fw.DataSize += int64(len(key.Key)) - return fw.fw.Delete(fw.scratch) -} - // An error is returned if it is not greater than any previous point key // passed to this Writer (according to the comparator configured during writer // creation). `Close` cannot have been called.