From 90c1ee697c8e92336ab0500b96c6b0b42a41f381 Mon Sep 17 00:00:00 2001 From: Jackson Owens Date: Wed, 28 Jun 2023 10:19:18 -0400 Subject: [PATCH] Revert "storage: use size-carrying point tombstones" This reverts commit 8edbf0f5b81a2ca1de69afc744f8af49d4e18d91. Epic: none Release note: none --- .../settings/settings-for-tenants.txt | 2 +- docs/generated/settings/settings.html | 2 +- pkg/clusterversion/cockroach_versions.go | 19 ----- pkg/cmd/roachtest/tests/tombstones.go | 10 ++- pkg/kv/kvserver/batch_spanset_test.go | 6 +- pkg/kv/kvserver/kvstorage/init.go | 2 +- pkg/kv/kvserver/loqrecovery/record.go | 2 +- pkg/kv/kvserver/replica_raft.go | 5 +- pkg/kv/kvserver/replica_test.go | 2 +- pkg/kv/kvserver/spanset/batch.go | 21 ++--- pkg/storage/batch_test.go | 49 +++-------- pkg/storage/engine.go | 55 ++---------- pkg/storage/engine_test.go | 19 ++--- pkg/storage/intent_interleaving_iter_test.go | 2 +- pkg/storage/intent_reader_writer.go | 4 +- pkg/storage/intent_reader_writer_test.go | 10 +-- pkg/storage/mvcc.go | 84 ++++--------------- pkg/storage/mvcc_history_test.go | 18 +--- pkg/storage/mvcc_test.go | 29 +------ pkg/storage/open.go | 12 --- pkg/storage/pebble.go | 52 ++++-------- pkg/storage/pebble_batch.go | 36 +++----- pkg/storage/pebble_test.go | 2 +- pkg/storage/read_as_of_iterator_test.go | 6 +- pkg/storage/sst_writer.go | 24 ++---- 25 files changed, 113 insertions(+), 360 deletions(-) diff --git a/docs/generated/settings/settings-for-tenants.txt b/docs/generated/settings/settings-for-tenants.txt index 31eae296ca4f..2c69b48d5879 100644 --- a/docs/generated/settings/settings-for-tenants.txt +++ b/docs/generated/settings/settings-for-tenants.txt @@ -303,4 +303,4 @@ trace.opentelemetry.collector string address of an OpenTelemetry trace collecto trace.snapshot.rate duration 0s if non-zero, interval at which background trace snapshots are captured tenant-rw trace.span_registry.enabled boolean true if set, ongoing traces can be seen at https:///#/debug/tracez tenant-rw trace.zipkin.collector string the address of a Zipkin instance to receive traces, as :. If no port is specified, 9411 will be used. tenant-rw -version version 1000023.1-14 set the active cluster version in the format '.' tenant-rw +version version 1000023.1-10 set the active cluster version in the format '.' tenant-rw diff --git a/docs/generated/settings/settings.html b/docs/generated/settings/settings.html index 1b7864866bae..02b7cf67da88 100644 --- a/docs/generated/settings/settings.html +++ b/docs/generated/settings/settings.html @@ -257,6 +257,6 @@
trace.snapshot.rate
duration0sif non-zero, interval at which background trace snapshots are capturedServerless/Dedicated/Self-Hosted
trace.span_registry.enabled
booleantrueif set, ongoing traces can be seen at https://<ui>/#/debug/tracezServerless/Dedicated/Self-Hosted
trace.zipkin.collector
stringthe address of a Zipkin instance to receive traces, as <host>:<port>. If no port is specified, 9411 will be used.Serverless/Dedicated/Self-Hosted -
version
version1000023.1-14set the active cluster version in the format '<major>.<minor>'Serverless/Dedicated/Self-Hosted +
version
version1000023.1-10set the active cluster version in the format '<major>.<minor>'Serverless/Dedicated/Self-Hosted diff --git a/pkg/clusterversion/cockroach_versions.go b/pkg/clusterversion/cockroach_versions.go index ed7c33cd58f3..6d0adb612248 100644 --- a/pkg/clusterversion/cockroach_versions.go +++ b/pkg/clusterversion/cockroach_versions.go @@ -542,17 +542,6 @@ const ( // that (optionally) embed below-raft admission data. V23_2_UseACRaftEntryEntryEncodings - // V23_2_PebbleFormatDeleteSizedAndObsolete upgrades Pebble's format major - // version to FormatDeleteSizedAndObsolete, allowing use of a new sstable - // format version Pebblev4. This version has two improvements: - // a) It allows the use of DELSIZED point tombstones. - // b) It encodes the obsolence of keys in a key-kind bit. - V23_2_PebbleFormatDeleteSizedAndObsolete - - // V23_2_UseSizedPebblePointTombstones enables the use of Pebble's new - // DeleteSized operations. - V23_2_UseSizedPebblePointTombstones - // ************************************************* // Step (1) Add new versions here. // Do not add new versions to a patch release. @@ -954,14 +943,6 @@ var rawVersionsSingleton = keyedVersions{ Key: V23_2_UseACRaftEntryEntryEncodings, Version: roachpb.Version{Major: 23, Minor: 1, Internal: 10}, }, - { - Key: V23_2_PebbleFormatDeleteSizedAndObsolete, - Version: roachpb.Version{Major: 23, Minor: 1, Internal: 12}, - }, - { - Key: V23_2_UseSizedPebblePointTombstones, - Version: roachpb.Version{Major: 23, Minor: 1, Internal: 14}, - }, // ************************************************* // Step (2): Add new versions here. diff --git a/pkg/cmd/roachtest/tests/tombstones.go b/pkg/cmd/roachtest/tests/tombstones.go index 65f979657dbc..a2eaa257ffad 100644 --- a/pkg/cmd/roachtest/tests/tombstones.go +++ b/pkg/cmd/roachtest/tests/tombstones.go @@ -29,6 +29,12 @@ import ( // registerPointTombstone registers the point tombstone test. func registerPointTombstone(r registry.Registry) { r.Add(registry.TestSpec{ + Skip: "pebble#2340", + SkipDetails: "This roachtest is implemented ahead of implementing and using " + + "pebble#2340 within Cockroach. Currently, this roachtest fails through " + + "a timeout because the disk space corresponding to the large KVs is " + + "never reclaimed. Once pebble#2340 is integrated into Cockroach, we " + + "expect this to begin passing, and we can un-skip it.", Name: "point-tombstone/heterogeneous-value-sizes", Owner: registry.OwnerStorage, Cluster: r.MakeClusterSpec(4), @@ -130,7 +136,7 @@ func registerPointTombstone(r registry.Registry) { require.LessOrEqual(t, statsAfterDeletes.livePercentage, 0.10) // Wait for garbage collection to delete the non-live data. - targetSize := uint64(3 << 30) /* 3 GiB */ + targetSize := uint64(2 << 30) /* 2 GB */ t.Status("waiting for garbage collection and compaction to reduce on-disk size to ", humanize.IBytes(targetSize)) m = c.NewMonitor(ctx, c.Range(1, 3)) m.Go(func(ctx context.Context) error { @@ -166,7 +172,7 @@ type tableSizeInfo struct { } func (info tableSizeInfo) String() string { - return fmt.Sprintf("databaseID: %d, tableID: %d, rangeCount: %d, approxDiskBytes: %s, liveBytes: %s, totalBytes: %s, livePercentage: %.2f", + return fmt.Sprintf("databaseID: %d, tableID: %d, rangeCount: %d, approxDiskBytes: %s, liveBytes: %s, totalBytes: %s, livePercentage: %.1f", info.databaseID, info.tableID, info.rangeCount, diff --git a/pkg/kv/kvserver/batch_spanset_test.go b/pkg/kv/kvserver/batch_spanset_test.go index 593c3ae41331..81e2559862ba 100644 --- a/pkg/kv/kvserver/batch_spanset_test.go +++ b/pkg/kv/kvserver/batch_spanset_test.go @@ -72,7 +72,7 @@ func TestSpanSetBatchBoundaries(t *testing.T) { } t.Run("writes before range", func(t *testing.T) { - if err := batch.ClearUnversioned(outsideKey.Key, storage.ClearOptions{}); !isWriteSpanErr(err) { + if err := batch.ClearUnversioned(outsideKey.Key); !isWriteSpanErr(err) { t.Errorf("ClearUnversioned: unexpected error %v", err) } if err := batch.ClearRawRange(outsideKey.Key, outsideKey2.Key, true, true); !isWriteSpanErr(err) { @@ -93,7 +93,7 @@ func TestSpanSetBatchBoundaries(t *testing.T) { }) t.Run("writes after range", func(t *testing.T) { - if err := batch.ClearUnversioned(outsideKey3.Key, storage.ClearOptions{}); !isWriteSpanErr(err) { + if err := batch.ClearUnversioned(outsideKey3.Key); !isWriteSpanErr(err) { t.Errorf("ClearUnversioned: unexpected error %v", err) } if err := batch.ClearRawRange(insideKey2.Key, outsideKey4.Key, true, true); !isWriteSpanErr(err) { @@ -303,7 +303,7 @@ func TestSpanSetBatchTimestamps(t *testing.T) { } for _, batch := range []storage.Batch{batchBefore, batchNonMVCC} { - if err := batch.ClearUnversioned(wkey.Key, storage.ClearOptions{}); !isWriteSpanErr(err) { + if err := batch.ClearUnversioned(wkey.Key); !isWriteSpanErr(err) { t.Errorf("ClearUnversioned: unexpected error %v", err) } { diff --git a/pkg/kv/kvserver/kvstorage/init.go b/pkg/kv/kvserver/kvstorage/init.go index fd65085c6ddc..ab0428dc212c 100644 --- a/pkg/kv/kvserver/kvstorage/init.go +++ b/pkg/kv/kvserver/kvstorage/init.go @@ -504,7 +504,7 @@ func LoadAndReconcileReplicas(ctx context.Context, eng storage.Engine) ([]Replic // TODO(tbg): if clearRangeData were in this package we could destroy more // effectively even if for some reason we had in the past written state // other than the HardState here (not supposed to happen, but still). - if err := eng.ClearUnversioned(logstore.NewStateLoader(repl.RangeID).RaftHardStateKey(), storage.ClearOptions{}); err != nil { + if err := eng.ClearUnversioned(logstore.NewStateLoader(repl.RangeID).RaftHardStateKey()); err != nil { return nil, errors.Wrapf(err, "removing HardState for r%d", repl.RangeID) } log.Eventf(ctx, "removed legacy uninitialized replica for r%s", repl.RangeID) diff --git a/pkg/kv/kvserver/loqrecovery/record.go b/pkg/kv/kvserver/loqrecovery/record.go index 0b980aa8b95a..874dfe2d3e3b 100644 --- a/pkg/kv/kvserver/loqrecovery/record.go +++ b/pkg/kv/kvserver/loqrecovery/record.go @@ -107,7 +107,7 @@ func RegisterOfflineRecoveryEvents( continue } if removeEvent { - if err := readWriter.ClearUnversioned(iter.UnsafeKey().Key, storage.ClearOptions{}); err != nil { + if err := readWriter.ClearUnversioned(iter.UnsafeKey().Key); err != nil { processingErrors = errors.CombineErrors(processingErrors, errors.Wrapf( err, "failed to delete replica recovery record at key %s", iter.UnsafeKey())) continue diff --git a/pkg/kv/kvserver/replica_raft.go b/pkg/kv/kvserver/replica_raft.go index 351302cbafc1..cfa6780a07ff 100644 --- a/pkg/kv/kvserver/replica_raft.go +++ b/pkg/kv/kvserver/replica_raft.go @@ -2601,10 +2601,7 @@ func handleTruncatedStateBelowRaftPreApply( // avoid allocating when constructing Raft log keys (16 bytes). prefix := prefixBuf.RaftLogPrefix() for idx := currentTruncatedState.Index + 1; idx <= suggestedTruncatedState.Index; idx++ { - if err := readWriter.ClearUnversioned( - keys.RaftLogKeyFromPrefix(prefix, idx), - storage.ClearOptions{}, - ); err != nil { + if err := readWriter.ClearUnversioned(keys.RaftLogKeyFromPrefix(prefix, idx)); err != nil { return false, errors.Wrapf(err, "unable to clear truncated Raft entries for %+v at index %d", suggestedTruncatedState, idx) } diff --git a/pkg/kv/kvserver/replica_test.go b/pkg/kv/kvserver/replica_test.go index 65c5fcf06f10..f311397cd8ad 100644 --- a/pkg/kv/kvserver/replica_test.go +++ b/pkg/kv/kvserver/replica_test.go @@ -1823,7 +1823,7 @@ func TestOptimizePuts(t *testing.T) { require.NoError(t, tc.engine.ClearMVCCRangeKey(storage.MVCCRangeKey{ StartKey: c.exKey, EndKey: c.exEndKey, Timestamp: hlc.MinTimestamp})) } else if c.exKey != nil { - require.NoError(t, tc.engine.ClearUnversioned(c.exKey, storage.ClearOptions{})) + require.NoError(t, tc.engine.ClearUnversioned(c.exKey)) } } } diff --git a/pkg/kv/kvserver/spanset/batch.go b/pkg/kv/kvserver/spanset/batch.go index 2b49b71c388e..5d2a773379ae 100644 --- a/pkg/kv/kvserver/spanset/batch.go +++ b/pkg/kv/kvserver/spanset/batch.go @@ -415,11 +415,6 @@ func (i *EngineIterator) Value() ([]byte, error) { return i.i.Value() } -// ValueLen is part of the storage.EngineIterator interface. -func (i *EngineIterator) ValueLen() int { - return i.i.ValueLen() -} - // UnsafeRawEngineKey is part of the storage.EngineIterator interface. func (i *EngineIterator) UnsafeRawEngineKey() []byte { return i.i.UnsafeRawEngineKey() @@ -527,34 +522,34 @@ func (s spanSetWriter) checkAllowed(key roachpb.Key) error { return nil } -func (s spanSetWriter) ClearMVCC(key storage.MVCCKey, opts storage.ClearOptions) error { +func (s spanSetWriter) ClearMVCC(key storage.MVCCKey) error { if err := s.checkAllowed(key.Key); err != nil { return err } - return s.w.ClearMVCC(key, opts) + return s.w.ClearMVCC(key) } -func (s spanSetWriter) ClearUnversioned(key roachpb.Key, opts storage.ClearOptions) error { +func (s spanSetWriter) ClearUnversioned(key roachpb.Key) error { if err := s.checkAllowed(key); err != nil { return err } - return s.w.ClearUnversioned(key, opts) + return s.w.ClearUnversioned(key) } func (s spanSetWriter) ClearIntent( - key roachpb.Key, txnDidNotUpdateMeta bool, txnUUID uuid.UUID, opts storage.ClearOptions, + key roachpb.Key, txnDidNotUpdateMeta bool, txnUUID uuid.UUID, ) error { if err := s.checkAllowed(key); err != nil { return err } - return s.w.ClearIntent(key, txnDidNotUpdateMeta, txnUUID, opts) + return s.w.ClearIntent(key, txnDidNotUpdateMeta, txnUUID) } -func (s spanSetWriter) ClearEngineKey(key storage.EngineKey, opts storage.ClearOptions) error { +func (s spanSetWriter) ClearEngineKey(key storage.EngineKey) error { if err := s.spans.CheckAllowed(SpanReadWrite, roachpb.Span{Key: key.Key}); err != nil { return err } - return s.w.ClearEngineKey(key, opts) + return s.w.ClearEngineKey(key) } func (s spanSetWriter) SingleClearEngineKey(key storage.EngineKey) error { diff --git a/pkg/storage/batch_test.go b/pkg/storage/batch_test.go index c9d8dc1d0d9b..c57617ff672d 100644 --- a/pkg/storage/batch_test.go +++ b/pkg/storage/batch_test.go @@ -13,7 +13,6 @@ package storage import ( "bytes" "context" - "encoding/binary" "fmt" "reflect" "strconv" @@ -79,7 +78,7 @@ func testBatchBasics(t *testing.T, writeOnly bool, commit func(e Engine, b Write // Write an engine value to be deleted. require.NoError(t, e.PutUnversioned(mvccKey("b").Key, []byte("value"))) - require.NoError(t, b.ClearUnversioned(mvccKey("b").Key, ClearOptions{})) + require.NoError(t, b.ClearUnversioned(mvccKey("b").Key)) // Write an engine value to be merged. require.NoError(t, e.PutUnversioned(mvccKey("c").Key, appender("foo"))) @@ -92,25 +91,12 @@ func testBatchBasics(t *testing.T, writeOnly bool, commit func(e Engine, b Write require.NoError(t, e.PutUnversioned(mvccKey("d").Key, []byte("before"))) require.NoError(t, b.SingleClearEngineKey(EngineKey{Key: mvccKey("d").Key})) - // Write a MVCC value to be deleted with a known value size. - keyF := mvccKey("f") - keyF.Timestamp.WallTime = 1 - valueF := MVCCValue{Value: roachpb.Value{RawBytes: []byte("fvalue")}} - encodedValueF, err := EncodeMVCCValue(valueF) - require.NoError(t, err) - require.NoError(t, e.PutMVCC(keyF, valueF)) - require.NoError(t, b.ClearMVCC(keyF, ClearOptions{ - ValueSizeKnown: true, - ValueSize: uint32(len(encodedValueF)), - })) - // Check all keys are in initial state (nothing from batch has gone // through to engine until commit). expValues := []MVCCKeyValue{ {Key: mvccKey("b"), Value: []byte("value")}, {Key: mvccKey("c"), Value: appender("foo")}, {Key: mvccKey("d"), Value: []byte("before")}, - {Key: keyF, Value: encodedValueF}, } kvs, err := Scan(e, localMax, roachpb.KeyMax, 0) require.NoError(t, err) @@ -213,7 +199,7 @@ func TestReadOnlyBasics(t *testing.T) { // For a read-only ReadWriter, all Writer methods should panic. failureTestCases := []func(){ func() { _ = ro.ApplyBatchRepr(nil, false) }, - func() { _ = ro.ClearUnversioned(a.Key, ClearOptions{}) }, + func() { _ = ro.ClearUnversioned(a.Key) }, func() { _ = ro.SingleClearEngineKey(EngineKey{Key: a.Key}) }, func() { _ = ro.ClearRawRange(a.Key, a.Key, true, true) }, func() { _ = ro.Merge(a, nil) }, @@ -229,7 +215,7 @@ func TestReadOnlyBasics(t *testing.T) { if err := e.PutUnversioned(mvccKey("b").Key, []byte("value")); err != nil { t.Fatal(err) } - if err := e.ClearUnversioned(mvccKey("b").Key, ClearOptions{}); err != nil { + if err := e.ClearUnversioned(mvccKey("b").Key); err != nil { t.Fatal(err) } if err := e.PutUnversioned(mvccKey("c").Key, appender("foo")); err != nil { @@ -263,17 +249,13 @@ func TestReadOnlyBasics(t *testing.T) { func TestBatchRepr(t *testing.T) { defer leaktest.AfterTest(t)() defer log.Scope(t).Close(t) - // Disable metamorphism in the value-encoding; our asserts include the - // length of the encoded value to test delete-sized. - - DisableMetamorphicSimpleValueEncoding(t) testBatchBasics(t, false /* writeOnly */, func(e Engine, b WriteBatch) error { repr := b.Repr() r, err := NewBatchReader(repr) require.NoError(t, err) - const expectedCount = 6 + const expectedCount = 5 require.Equal(t, expectedCount, r.Count()) count, err := BatchCount(repr) require.NoError(t, err) @@ -285,9 +267,6 @@ func TestBatchRepr(t *testing.T) { switch r.KeyKind() { case pebble.InternalKeyKindDelete: ops = append(ops, fmt.Sprintf("delete(%s)", string(r.Key()))) - case pebble.InternalKeyKindDeleteSized: - v, _ := binary.Uvarint(r.Value()) - ops = append(ops, fmt.Sprintf("delete-sized(%s,%d)", string(r.Key()), v)) case pebble.InternalKeyKindSet: ops = append(ops, fmt.Sprintf("put(%s,%s)", string(r.Key()), string(r.Value()))) case pebble.InternalKeyKindMerge: @@ -308,7 +287,6 @@ func TestBatchRepr(t *testing.T) { "merge(c\x00)", "put(e\x00,)", "single_delete(d\x00)", - "delete-sized(f\x00\x00\x00\x00\x00\x00\x00\x00\x01\t,17)", } require.Equal(t, expOps, ops) @@ -405,7 +383,7 @@ func TestBatchGet(t *testing.T) { if err := b.PutUnversioned(mvccKey("a").Key, []byte("value")); err != nil { t.Fatal(err) } - if err := b.ClearUnversioned(mvccKey("b").Key, ClearOptions{}); err != nil { + if err := b.ClearUnversioned(mvccKey("b").Key); err != nil { t.Fatal(err) } if err := b.Merge(mvccKey("c"), appender("bar")); err != nil { @@ -457,7 +435,7 @@ func TestBatchMerge(t *testing.T) { if err := b.PutUnversioned(mvccKey("a").Key, appender("a-value")); err != nil { t.Fatal(err) } - if err := b.ClearUnversioned(mvccKey("b").Key, ClearOptions{}); err != nil { + if err := b.ClearUnversioned(mvccKey("b").Key); err != nil { t.Fatal(err) } if err := b.Merge(mvccKey("c"), appender("c-value")); err != nil { @@ -600,10 +578,7 @@ func TestBatchScanWithDelete(t *testing.T) { if err := e.PutUnversioned(mvccKey("a").Key, []byte("value")); err != nil { t.Fatal(err) } - if err := b.ClearUnversioned(mvccKey("a").Key, ClearOptions{ - ValueSizeKnown: true, - ValueSize: uint32(len("value")), - }); err != nil { + if err := b.ClearUnversioned(mvccKey("a").Key); err != nil { t.Fatal(err) } kvs, err := Scan(b, localMax, roachpb.KeyMax, 0) @@ -636,10 +611,7 @@ func TestBatchScanMaxWithDeleted(t *testing.T) { t.Fatal(err) } // Now, delete "a" in batch. - if err := b.ClearUnversioned(mvccKey("a").Key, ClearOptions{ - ValueSizeKnown: true, - ValueSize: uint32(len("value1")), - }); err != nil { + if err := b.ClearUnversioned(mvccKey("a").Key); err != nil { t.Fatal(err) } // A scan with max=1 should scan "b". @@ -890,8 +862,7 @@ func TestDecodeKey(t *testing.T) { defer leaktest.AfterTest(t)() defer log.Scope(t).Close(t) - e, err := Open(context.Background(), InMemory(), - cluster.MakeTestingClusterSettings(), CacheSize(1<<20 /* 1 MiB */)) + e, err := Open(context.Background(), InMemory(), cluster.MakeClusterSettings(), CacheSize(1<<20 /* 1 MiB */)) assert.NoError(t, err) defer e.Close() @@ -953,7 +924,7 @@ func TestBatchReader(t *testing.T) { require.NoError(t, b.PutEngineRangeKey(roachpb.Key("rangeFrom"), roachpb.Key("rangeTo"), []byte{7}, []byte("engineRangeKey"))) // Clear some already empty keys. - require.NoError(t, b.ClearMVCC(pointKey("mvccKey", 9), ClearOptions{})) + require.NoError(t, b.ClearMVCC(pointKey("mvccKey", 9))) require.NoError(t, b.ClearMVCCRangeKey(rangeKey("rangeFrom", "rangeTo", 9))) require.NoError(t, b.ClearRawRange(roachpb.Key("clearFrom"), roachpb.Key("clearTo"), true, true)) diff --git a/pkg/storage/engine.go b/pkg/storage/engine.go index fbdfdd9320fe..0dc531e4014d 100644 --- a/pkg/storage/engine.go +++ b/pkg/storage/engine.go @@ -362,11 +362,6 @@ type EngineIterator interface { // Value returns the current value as a byte slice. // REQUIRES: latest positioning function returned valid=true. Value() ([]byte, error) - // ValueLen returns the length of the current value. ValueLen should be - // preferred when the actual value is not needed. In some circumstances, the - // storage engine may be able to avoid loading the value. - // REQUIRES: latest positioning function returned valid=true. - ValueLen() int // CloneContext is a low-level method only for use in the storage package, // that provides sufficient context that the iterator may be cloned. CloneContext() CloneContext @@ -636,22 +631,14 @@ type Writer interface { // actually removes entries from the storage engine, rather than inserting // MVCC tombstones. // - // If the caller knows the size of the value that is being cleared, they - // should set ClearOptions.{ValueSizeKnown, ValueSize} accordingly to - // improve the storage engine's ability to prioritize compactions. - // // It is safe to modify the contents of the arguments after it returns. - ClearMVCC(key MVCCKey, opts ClearOptions) error + ClearMVCC(key MVCCKey) error // ClearUnversioned removes an unversioned item from the db. It is for use // with inline metadata (not intents) and other unversioned keys (like // Range-ID local keys). It does not affect range keys. // - // If the caller knows the size of the value that is being cleared, they - // should set ClearOptions.{ValueSizeKnown, ValueSize} accordingly to - // improve the storage engine's ability to prioritize compactions. - // // It is safe to modify the contents of the arguments after it returns. - ClearUnversioned(key roachpb.Key, opts ClearOptions) error + ClearUnversioned(key roachpb.Key) error // ClearIntent removes an intent from the db. Unlike ClearMVCC and // ClearUnversioned, this is a higher-level method that may make changes in // parts of the key space that are not only a function of the input, and may @@ -666,18 +653,14 @@ type Writer interface { // that does a pair. If there isn't a performance // decrease, we can stop tracking txnDidNotUpdateMeta and still optimize // ClearIntent by always doing single-clear. - ClearIntent(key roachpb.Key, txnDidNotUpdateMeta bool, txnUUID uuid.UUID, opts ClearOptions) error + ClearIntent(key roachpb.Key, txnDidNotUpdateMeta bool, txnUUID uuid.UUID) error // ClearEngineKey removes the given point key from the engine. It does not // affect range keys. Note that clear actually removes entries from the // storage engine. This is a general-purpose and low-level method that should // be used sparingly, only when the other Clear* methods are not applicable. // - // If the caller knows the size of the value that is being cleared, they - // should set ClearOptions.{ValueSizeKnown, ValueSize} accordingly to - // improve the storage engine's ability to prioritize compactions. - // // It is safe to modify the contents of the arguments after it returns. - ClearEngineKey(key EngineKey, opts ClearOptions) error + ClearEngineKey(key EngineKey) error // ClearRawRange removes point and/or range keys from start (inclusive) to end // (exclusive) using Pebble range tombstones. It can be applied to a range @@ -854,31 +837,6 @@ type Writer interface { BufferedSize() int } -// ClearOptions holds optional parameters to methods that clear keys from the -// storage engine. -type ClearOptions struct { - // ValueSizeKnown indicates whether the ValueSize carries a meaningful - // value. If false, ValueSize is ignored. - ValueSizeKnown bool - // ValueSize may be provided to indicate the size of the existing KV - // record's value that is being removed. ValueSize should be the encoded - // value size that the storage engine observes. If the value is a - // MVCCMetadata, ValueSize should be the length of the encoded MVCCMetadata. - // If the value is a MVCCValue, ValueSize should be the length of the - // encoded MVCCValue. - // - // Setting ValueSize and ValueSizeKnown improves the storage engine's - // ability to estimate space amplification and prioritize compactions. - // Without it, compaction heuristics rely on average value sizes which are - // susceptible to over and under estimation. - // - // If the true value size is unknown, leave ValueSizeKnown false. - // Correctness is not compromised if ValueSize is incorrect; the underlying - // key will always be cleared regardless of whether its value size matches - // the provided value. - ValueSize uint32 -} - // ReadWriter is the read/write interface to an engine's data. type ReadWriter interface { Reader @@ -1516,10 +1474,7 @@ func ClearRangeWithHeuristic( if err != nil { return err } - if err = w.ClearEngineKey(key, ClearOptions{ - ValueSizeKnown: true, - ValueSize: uint32(iter.ValueLen()), - }); err != nil { + if err = w.ClearEngineKey(key); err != nil { return err } } diff --git a/pkg/storage/engine_test.go b/pkg/storage/engine_test.go index 2d38125772d8..8a9b28166028 100644 --- a/pkg/storage/engine_test.go +++ b/pkg/storage/engine_test.go @@ -151,7 +151,7 @@ func TestEngineBatchStaleCachedIterator(t *testing.T) { iter.SeekGE(key) - if err := batch.ClearUnversioned(key.Key, ClearOptions{}); err != nil { + if err := batch.ClearUnversioned(key.Key); err != nil { t.Fatal(err) } @@ -246,7 +246,7 @@ func TestEngineBatch(t *testing.T) { apply := func(rw ReadWriter, d data) error { if d.value == nil { - return rw.ClearUnversioned(d.key.Key, ClearOptions{}) + return rw.ClearUnversioned(d.key.Key) } else if d.merge { return rw.Merge(d.key, d.value) } @@ -277,7 +277,7 @@ func TestEngineBatch(t *testing.T) { currentBatch[k] = batch[shuffledIndices[k]] } // Reset the key - if err := engine.ClearUnversioned(key.Key, ClearOptions{}); err != nil { + if err := engine.ClearUnversioned(key.Key); err != nil { t.Fatal(err) } // Run it once with individual operations and remember the result. @@ -291,7 +291,7 @@ func TestEngineBatch(t *testing.T) { // Run the whole thing as a batch and compare. b := engine.NewBatch() defer b.Close() - if err := b.ClearUnversioned(key.Key, ClearOptions{}); err != nil { + if err := b.ClearUnversioned(key.Key); err != nil { t.Fatal(err) } for _, op := range currentBatch { @@ -350,9 +350,9 @@ func TestEnginePutGetDelete(t *testing.T) { for i, err := range []error{ engine.PutUnversioned(mvccKey("").Key, []byte("")), engine.PutUnversioned(NilKey.Key, []byte("")), - engine.ClearUnversioned(NilKey.Key, ClearOptions{}), - engine.ClearUnversioned(NilKey.Key, ClearOptions{}), - engine.ClearUnversioned(mvccKey("").Key, ClearOptions{}), + engine.ClearUnversioned(NilKey.Key), + engine.ClearUnversioned(NilKey.Key), + engine.ClearUnversioned(mvccKey("").Key), } { if err == nil { t.Fatalf("%d: illegal handling of empty key", i) @@ -382,10 +382,7 @@ func TestEnginePutGetDelete(t *testing.T) { if !bytes.Equal(val, c.value) { t.Errorf("expected key value %s to be %+v: got %+v", c.key, c.value, val) } - if err := engine.ClearUnversioned(c.key.Key, ClearOptions{ - ValueSizeKnown: true, - ValueSize: uint32(len(val)), - }); err != nil { + if err := engine.ClearUnversioned(c.key.Key); err != nil { t.Errorf("delete: expected no error, but got %s", err) } val = mvccGetRaw(t, engine, c.key) diff --git a/pkg/storage/intent_interleaving_iter_test.go b/pkg/storage/intent_interleaving_iter_test.go index 6bbcbde24be2..ab86814d12ea 100644 --- a/pkg/storage/intent_interleaving_iter_test.go +++ b/pkg/storage/intent_interleaving_iter_test.go @@ -550,7 +550,7 @@ func writeRandomData( if interleave { require.NoError(t, batch.PutUnversioned(kv.key.Key, kv.val)) if !kv.liveIntent { - require.NoError(t, batch.ClearUnversioned(kv.key.Key, ClearOptions{})) + require.NoError(t, batch.ClearUnversioned(kv.key.Key)) } } else { eKey, _ := kv.key.ToEngineKey(nil) diff --git a/pkg/storage/intent_reader_writer.go b/pkg/storage/intent_reader_writer.go index 8d3b74c6e555..c3968433485d 100644 --- a/pkg/storage/intent_reader_writer.go +++ b/pkg/storage/intent_reader_writer.go @@ -37,7 +37,7 @@ func wrapIntentWriter(w Writer) intentDemuxWriter { // scratch-space to avoid allocations -- its contents will be overwritten and // not appended to, and a possibly different buf returned. func (idw intentDemuxWriter) ClearIntent( - key roachpb.Key, txnDidNotUpdateMeta bool, txnUUID uuid.UUID, buf []byte, opts ClearOptions, + key roachpb.Key, txnDidNotUpdateMeta bool, txnUUID uuid.UUID, buf []byte, ) (_ []byte, _ error) { var engineKey EngineKey engineKey, buf = LockTableKey{ @@ -48,7 +48,7 @@ func (idw intentDemuxWriter) ClearIntent( if txnDidNotUpdateMeta { return buf, idw.w.SingleClearEngineKey(engineKey) } - return buf, idw.w.ClearEngineKey(engineKey, opts) + return buf, idw.w.ClearEngineKey(engineKey) } // PutIntent has the same behavior as Writer.PutIntent. buf is used as diff --git a/pkg/storage/intent_reader_writer_test.go b/pkg/storage/intent_reader_writer_test.go index 4c0e74ee10b9..55d20e8dcf08 100644 --- a/pkg/storage/intent_reader_writer_test.go +++ b/pkg/storage/intent_reader_writer_test.go @@ -114,12 +114,12 @@ func (p *printWriter) reset() { fmt.Fprintf(&p.b, "=== Calls ===\n") } -func (p *printWriter) ClearUnversioned(key roachpb.Key, opts ClearOptions) error { +func (p *printWriter) ClearUnversioned(key roachpb.Key) error { fmt.Fprintf(&p.b, "ClearUnversioned(%s)\n", string(key)) - return p.Writer.ClearUnversioned(key, opts) + return p.Writer.ClearUnversioned(key) } -func (p *printWriter) ClearEngineKey(key EngineKey, opts ClearOptions) error { +func (p *printWriter) ClearEngineKey(key EngineKey) error { ltKey, err := key.ToLockTableKey() var str string if err != nil { @@ -129,7 +129,7 @@ func (p *printWriter) ClearEngineKey(key EngineKey, opts ClearOptions) error { str = printLTKey(ltKey) } fmt.Fprintf(&p.b, "ClearEngineKey(%s)\n", str) - return p.Writer.ClearEngineKey(key, opts) + return p.Writer.ClearEngineKey(key) } func (p *printWriter) ClearRawRange(start, end roachpb.Key, pointKeys, rangeKeys bool) error { @@ -242,7 +242,7 @@ func TestIntentDemuxWriter(t *testing.T) { d.ScanArgs(t, "txn", &txn) txnUUID := uuid.FromUint128(uint128.FromInts(0, uint64(txn))) txnDidNotUpdateMeta := readTxnDidNotUpdateMeta(t, d) - scratch, err = w.ClearIntent(key, txnDidNotUpdateMeta, txnUUID, scratch, ClearOptions{}) + scratch, err = w.ClearIntent(key, txnDidNotUpdateMeta, txnUUID, scratch) if err != nil { return err.Error() } diff --git a/pkg/storage/mvcc.go b/pkg/storage/mvcc.go index dfbee01087c9..9e33a8179e13 100644 --- a/pkg/storage/mvcc.go +++ b/pkg/storage/mvcc.go @@ -910,8 +910,6 @@ func MVCCBlindPutInlineWithPrev( prev.IsPresent(), ok, origMetaKeySize, metaKeySize, origMetaValSize, metaValSize) } } - // TODO(jackson): Thread origMetaValSize through so that a resulting - // ClearUnversioned sets ClearOptions.ValueSize[Known]. return MVCCBlindPut(ctx, rw, ms, key, hlc.Timestamp{}, hlc.ClockTimestamp{}, value, nil) } @@ -1858,12 +1856,7 @@ func mvccPutInternal( return false, err } if !value.IsPresent() { - metaKeySize, metaValSize, err = 0, 0, writer.ClearUnversioned(metaKey.Key, ClearOptions{ - // NB: origMetaValSize is only populated by mvccGetMetadata if - // iter != nil. - ValueSizeKnown: iter != nil, - ValueSize: uint32(origMetaValSize), - }) + metaKeySize, metaValSize, err = 0, 0, writer.ClearUnversioned(metaKey.Key) } else { buf.meta = enginepb.MVCCMetadata{RawBytes: value.RawBytes} metaKeySize, metaValSize, err = buf.putInlineMeta(writer, metaKey, &buf.meta) @@ -2079,12 +2072,7 @@ func mvccPutInternal( iter = nil // prevent accidental use below } - // TODO(jackson): Do we know the encoded value size in the other - // cases? - if err := writer.ClearMVCC(oldVersionKey, ClearOptions{ - ValueSizeKnown: curProvValRaw != nil, - ValueSize: uint32(len(curProvValRaw)), - }); err != nil { + if err := writer.ClearMVCC(oldVersionKey); err != nil { return false, err } } else if writeTimestamp.Less(metaTimestamp) { @@ -2662,27 +2650,22 @@ func MVCCClearTimeRange( // This can be a big win for reverting bulk-ingestion of clustered data as the // entire span may likely match and thus could be cleared in one ClearRange // instead of hundreds of thousands of individual Clears. - type bufferedKey struct { - MVCCKey - valLen uint32 - } - buf := make([]bufferedKey, clearRangeThreshold) + buf := make([]MVCCKey, clearRangeThreshold) var bufSize int var clearRangeStart MVCCKey - clearMatchingKey := func(k MVCCKey, valLen uint32) { + clearMatchingKey := func(k MVCCKey) { if len(clearRangeStart.Key) == 0 { // Currently buffering keys to clear one-by-one. if bufSize < clearRangeThreshold { buf[bufSize].Key = append(buf[bufSize].Key[:0], k.Key...) buf[bufSize].Timestamp = k.Timestamp - buf[bufSize].valLen = valLen bufSize++ } else { // Buffer is now full -- switch to just tracking the start of the range // from which we will clear when we either see a non-matching key or if // we finish iterating. - clearRangeStart = buf[0].MVCCKey + clearRangeStart = buf[0] bufSize = 0 } } @@ -2699,13 +2682,13 @@ func MVCCClearTimeRange( } else if bufSize > 0 { var encodedBufSize int64 for i := 0; i < bufSize; i++ { - encodedBufSize += int64(buf[i].MVCCKey.EncodedSize()) + encodedBufSize += int64(buf[i].EncodedSize()) } // Even though we didn't get a large enough number of keys to switch to // clearrange, the byte size of the keys we did get is now too large to // encode them all within the byte size limit, so use clearrange anyway. if batchByteSize+encodedBufSize >= maxBatchByteSize { - if err := rw.ClearMVCCVersions(buf[0].MVCCKey, nonMatch); err != nil { + if err := rw.ClearMVCCVersions(buf[0], nonMatch); err != nil { return err } batchByteSize += int64(buf[0].EncodedSize() + nonMatch.EncodedSize()) @@ -2715,17 +2698,11 @@ func MVCCClearTimeRange( if buf[i].Timestamp.IsEmpty() { // Inline metadata. Not an intent because iteration below fails // if it sees an intent. - if err := rw.ClearUnversioned(buf[i].Key, ClearOptions{ - ValueSizeKnown: true, - ValueSize: buf[i].valLen, - }); err != nil { + if err := rw.ClearUnversioned(buf[i].Key); err != nil { return err } } else { - if err := rw.ClearMVCC(buf[i].MVCCKey, ClearOptions{ - ValueSizeKnown: true, - ValueSize: buf[i].valLen, - }); err != nil { + if err := rw.ClearMVCC(buf[i]); err != nil { return err } } @@ -2974,7 +2951,7 @@ func MVCCClearTimeRange( clearedMetaKey.Key = clearedMetaKey.Key[:0] if startTime.Less(k.Timestamp) && k.Timestamp.LessEq(endTime) { - clearMatchingKey(k, uint32(valueLen)) + clearMatchingKey(k) clearedMetaKey.Key = append(clearedMetaKey.Key[:0], k.Key...) clearedMeta.KeyBytes = MVCCVersionTimestampSize clearedMeta.ValBytes = int64(valueLen) @@ -4820,10 +4797,7 @@ func mvccResolveWriteIntent( if err = rw.PutMVCC(newKey, newValue); err != nil { return false, err } - if err = rw.ClearMVCC(oldKey, ClearOptions{ - ValueSizeKnown: true, - ValueSize: uint32(len(v)), - }); err != nil { + if err = rw.ClearMVCC(oldKey); err != nil { return false, err } @@ -4866,10 +4840,7 @@ func mvccResolveWriteIntent( ctx, rw, metaKey, newMeta, true /* alreadyExists */) } else { metaKeySize = int64(metaKey.EncodedSize()) - err = rw.ClearIntent(metaKey.Key, canSingleDelHelper.onCommitIntent(), meta.Txn.ID, ClearOptions{ - ValueSizeKnown: true, - ValueSize: uint32(origMetaValSize), - }) + err = rw.ClearIntent(metaKey.Key, canSingleDelHelper.onCommitIntent(), meta.Txn.ID) } if err != nil { return false, err @@ -4908,10 +4879,7 @@ func mvccResolveWriteIntent( // - ResolveIntent with epoch 0 aborts intent from epoch 1. // First clear the provisional value. - if err := rw.ClearMVCC(latestKey, ClearOptions{ - ValueSizeKnown: true, - ValueSize: uint32(meta.ValBytes), - }); err != nil { + if err := rw.ClearMVCC(latestKey); err != nil { return false, err } @@ -4977,10 +4945,7 @@ func mvccResolveWriteIntent( if !ok { // If there is no other version, we should just clean up the key entirely. - if err = rw.ClearIntent(metaKey.Key, canSingleDelHelper.onAbortIntent(), meta.Txn.ID, ClearOptions{ - ValueSizeKnown: true, - ValueSize: uint32(origMetaValSize), - }); err != nil { + if err = rw.ClearIntent(metaKey.Key, canSingleDelHelper.onAbortIntent(), meta.Txn.ID); err != nil { return false, err } // Clear stat counters attributable to the intent we're aborting. @@ -4997,10 +4962,7 @@ func mvccResolveWriteIntent( KeyBytes: MVCCVersionTimestampSize, ValBytes: int64(nextValueLen), } - if err = rw.ClearIntent(metaKey.Key, canSingleDelHelper.onAbortIntent(), meta.Txn.ID, ClearOptions{ - ValueSizeKnown: true, - ValueSize: uint32(origMetaValSize), - }); err != nil { + if err = rw.ClearIntent(metaKey.Key, canSingleDelHelper.onAbortIntent(), meta.Txn.ID); err != nil { return false, err } metaKeySize := int64(metaKey.EncodedSize()) @@ -5345,10 +5307,7 @@ func MVCCGarbageCollect( if !implicitMeta { // This must be an inline entry since we are not allowed to clear // intents, and we've confirmed that meta.Txn == nil earlier. - if err := rw.ClearUnversioned(iter.UnsafeKey().Key, ClearOptions{ - ValueSizeKnown: true, - ValueSize: uint32(iter.ValueLen()), - }); err != nil { + if err := rw.ClearUnversioned(iter.UnsafeKey().Key); err != nil { return err } count++ @@ -5474,15 +5433,11 @@ func MVCCGarbageCollect( if !unsafeIterKey.IsValue() { break } - - var clearOpts ClearOptions if ms != nil { valLen, valIsTombstone, err := iter.MVCCValueLenAndIsTombstone() if err != nil { return err } - clearOpts.ValueSizeKnown = true - clearOpts.ValueSize = uint32(valLen) keySize := MVCCVersionTimestampSize valSize := int64(valLen) @@ -5505,7 +5460,7 @@ func MVCCGarbageCollect( ms.Add(updateStatsOnGC(gcKey.Key, keySize, valSize, false /* metaKey */, fromNS)) } count++ - if err := rw.ClearMVCC(unsafeIterKey, clearOpts); err != nil { + if err := rw.ClearMVCC(unsafeIterKey); err != nil { return err } prevNanos = unsafeIterKey.Timestamp.WallTime @@ -7115,10 +7070,7 @@ func ReplacePointTombstonesWithRangeTombstones( clearedKey.Key = append(clearedKey.Key[:0], key.Key...) clearedKey.Timestamp = key.Timestamp clearedKeySize := int64(EncodedMVCCKeyPrefixLength(clearedKey.Key)) - if err := rw.ClearMVCC(key, ClearOptions{ - ValueSizeKnown: true, - ValueSize: uint32(valueLen), - }); err != nil { + if err := rw.ClearMVCC(key); err != nil { return err } diff --git a/pkg/storage/mvcc_history_test.go b/pkg/storage/mvcc_history_test.go index d7c1a9f13b4d..030af942c3fc 100644 --- a/pkg/storage/mvcc_history_test.go +++ b/pkg/storage/mvcc_history_test.go @@ -355,18 +355,6 @@ func TestMVCCHistories(t *testing.T) { } e := newEvalCtx(ctx, engine) - defer func() { - require.NoError(t, engine.Compact()) - m := engine.GetMetrics().Metrics - if m.Keys.MissizedTombstonesCount > 0 { - // A missized tombstone is a Pebble DELSIZED tombstone that encodes - // the wrong size of the value it deletes. This kind of tombstone is - // written when ClearOptions.ValueSizeKnown=true. If this assertion - // failed, something might be awry in the code clearing the key. Are - // we feeding the wrong value length to ValueSize? - t.Fatalf("expected to find 0 missized tombstones; found %d", m.Keys.MissizedTombstonesCount) - } - }() defer e.close() if strings.Contains(path, "_nometamorphiciter") { e.noMetamorphicIter = true @@ -890,11 +878,11 @@ func (rw intentPrintingReadWriter) PutIntent( } func (rw intentPrintingReadWriter) ClearIntent( - key roachpb.Key, txnDidNotUpdateMeta bool, txnUUID uuid.UUID, opts storage.ClearOptions, + key roachpb.Key, txnDidNotUpdateMeta bool, txnUUID uuid.UUID, ) error { rw.buf.Printf("called ClearIntent(%v, TDNUM(%t), %v)\n", key, txnDidNotUpdateMeta, txnUUID) - return rw.ReadWriter.ClearIntent(key, txnDidNotUpdateMeta, txnUUID, opts) + return rw.ReadWriter.ClearIntent(key, txnDidNotUpdateMeta, txnUUID) } func (e *evalCtx) tryWrapForIntentPrinting(rw storage.ReadWriter) storage.ReadWriter { @@ -1004,7 +992,7 @@ func cmdClear(e *evalCtx) error { key := e.getKey() ts := e.getTs(nil) return e.withWriter("clear", func(rw storage.ReadWriter) error { - return rw.ClearMVCC(storage.MVCCKey{Key: key, Timestamp: ts}, storage.ClearOptions{}) + return rw.ClearMVCC(storage.MVCCKey{Key: key, Timestamp: ts}) }) } diff --git a/pkg/storage/mvcc_test.go b/pkg/storage/mvcc_test.go index 7c09c4d5493e..8e650c5cbbfe 100644 --- a/pkg/storage/mvcc_test.go +++ b/pkg/storage/mvcc_test.go @@ -3685,8 +3685,7 @@ func generateBytes(rng *rand.Rand, min int, max int) []byte { } func createEngWithSeparatedIntents(t *testing.T) Engine { - eng, err := Open(context.Background(), InMemory(), - cluster.MakeTestingClusterSettings(), MaxSize(1<<20)) + eng, err := Open(context.Background(), InMemory(), cluster.MakeClusterSettings(), MaxSize(1<<20)) require.NoError(t, err) return eng } @@ -3925,7 +3924,7 @@ func TestRandomizedSavepointRollbackAndIntentResolution(t *testing.T) { rng := rand.New(rand.NewSource(seed)) ctx := context.Background() eng, err := Open( - context.Background(), InMemory(), cluster.MakeTestingClusterSettings(), + context.Background(), InMemory(), cluster.MakeClusterSettings(), func(cfg *engineConfig) error { cfg.Opts.LBaseMaxBytes = int64(100 + rng.Intn(16384)) log.Infof(ctx, "lbase: %d", cfg.Opts.LBaseMaxBytes) @@ -4872,9 +4871,6 @@ func TestMVCCGarbageCollect(t *testing.T) { assertEq(t, engine, "verification", ms, &expMS) }) } - // Compact the engine; the ForTesting() config option will assert that all - // DELSIZED tombstones were appropriately sized. - require.NoError(t, engine.Compact()) } // TestMVCCGarbageCollectNonDeleted verifies that the first value for @@ -4920,10 +4916,6 @@ func TestMVCCGarbageCollectNonDeleted(t *testing.T) { test.expError, err) } } - - // Compact the engine; the ForTesting() config option will assert that all - // DELSIZED tombstones were appropriately sized. - require.NoError(t, engine.Compact()) } // TestMVCCGarbageCollectIntent verifies that an intent cannot be GC'd. @@ -4958,9 +4950,6 @@ func TestMVCCGarbageCollectIntent(t *testing.T) { if err := MVCCGarbageCollect(ctx, engine, nil, keys, ts2); err == nil { t.Fatal("expected error garbage collecting an intent") } - // Compact the engine; the ForTesting() config option will assert that all - // DELSIZED tombstones were appropriately sized. - require.NoError(t, engine.Compact()) } // TestMVCCGarbageCollectPanicsWithMixOfLocalAndGlobalKeys verifies that @@ -5163,9 +5152,6 @@ func TestMVCCGarbageCollectUsesSeekLTAppropriately(t *testing.T) { engine := NewDefaultInMemForTesting() defer engine.Close() runTestCase(t, tc, engine) - // Compact the engine; the ForTesting() config option will assert - // that all DELSIZED tombstones were appropriately sized. - require.NoError(t, engine.Compact()) }) } } @@ -5716,10 +5702,6 @@ func TestMVCCGarbageCollectRanges(t *testing.T) { expMs, err := ComputeStats(engine, d.rangeStart, d.rangeEnd, tsMax.WallTime) require.NoError(t, err, "failed to compute stats for range") require.EqualValues(t, expMs, ms, "computed range stats vs gc'd") - - // Compact the engine; the ForTesting() config option will assert - // that all DELSIZED tombstones were appropriately sized. - require.NoError(t, engine.Compact()) }) } } @@ -5974,9 +5956,6 @@ func TestMVCCGarbageCollectClearRangeInlinedValue(t *testing.T) { require.Errorf(t, err, "expected error '%s' but found none", expectedError) require.True(t, testutils.IsError(err, expectedError), "expected error '%s' found '%s'", expectedError, err) - // Compact the engine; the ForTesting() config option will assert that all - // DELSIZED tombstones were appropriately sized. - require.NoError(t, engine.Compact()) } func TestMVCCGarbageCollectClearPointsInRange(t *testing.T) { @@ -6043,10 +6022,6 @@ func TestMVCCGarbageCollectClearPointsInRange(t *testing.T) { expMs, err := ComputeStats(engine, rangeStart, rangeEnd, tsMax.WallTime) require.NoError(t, err, "failed to compute stats for range") require.EqualValues(t, expMs, ms, "computed range stats vs gc'd") - - // Compact the engine; the ForTesting() config option will assert that all - // DELSIZED tombstones were appropriately sized. - require.NoError(t, engine.Compact()) } func TestMVCCGarbageCollectClearRangeFailure(t *testing.T) { diff --git a/pkg/storage/open.go b/pkg/storage/open.go index 044301b2f4c5..c59e4ea9cdfb 100644 --- a/pkg/storage/open.go +++ b/pkg/storage/open.go @@ -17,7 +17,6 @@ import ( "github.com/cockroachdb/cockroach/pkg/cloud" "github.com/cockroachdb/cockroach/pkg/roachpb" "github.com/cockroachdb/cockroach/pkg/settings/cluster" - "github.com/cockroachdb/errors" "github.com/cockroachdb/pebble" "github.com/cockroachdb/pebble/vfs" ) @@ -67,17 +66,6 @@ var ForceWriterParallelism ConfigOption = func(cfg *engineConfig) error { // ForTesting configures the engine for use in testing. It may randomize some // config options to improve test coverage. var ForTesting ConfigOption = func(cfg *engineConfig) error { - cfg.onClose = append(cfg.onClose, func(p *Pebble) { - m := p.db.Metrics() - if m.Keys.MissizedTombstonesCount > 0 { - // A missized tombstone is a Pebble DELSIZED tombstone that encodes - // the wrong size of the value it deletes. This kind of tombstone is - // written when ClearOptions.ValueSizeKnown=true. If this assertion - // failed, something might be awry in the code clearing the key. Are - // we feeding the wrong value length to ValueSize? - panic(errors.AssertionFailedf("expected to find 0 missized tombstones; found %d", m.Keys.MissizedTombstonesCount)) - } - }) return nil } diff --git a/pkg/storage/pebble.go b/pkg/storage/pebble.go index ec99e1cf4759..1a0e5be0c685 100644 --- a/pkg/storage/pebble.go +++ b/pkg/storage/pebble.go @@ -714,9 +714,6 @@ type PebbleConfig struct { // SharedStorage is a cloud.ExternalStorage that can be used by all Pebble // stores on this node and on other nodes to store sstables. SharedStorage cloud.ExternalStorage - - // onClose is a slice of functions to be invoked before the engine is closed. - onClose []func(*Pebble) } // EncryptionStatsHandler provides encryption related stats. @@ -799,8 +796,6 @@ type Pebble struct { // closer is populated when the database is opened. The closer is associated // with the filesystem. closer io.Closer - // onClose is a slice of functions to be invoked before the engine closes. - onClose []func(*Pebble) wrappedIntentWriter intentDemuxWriter @@ -1077,7 +1072,6 @@ func NewPebble(ctx context.Context, cfg PebbleConfig) (p *Pebble, err error) { logCtx: logCtx, storeIDPebbleLog: storeIDContainer, closer: filesystemCloser, - onClose: cfg.onClose, replayer: replay.NewWorkloadCollector(cfg.StorageConfig.Dir), } @@ -1334,10 +1328,6 @@ func (p *Pebble) Close() { p.logger.Infof("closing unopened pebble instance") return } - for _, closeFunc := range p.onClose { - closeFunc(p) - } - p.closed = true // Wait for any asynchronous goroutines to exit. @@ -1474,48 +1464,37 @@ func (p *Pebble) ApplyBatchRepr(repr []byte, sync bool) error { } // ClearMVCC implements the Engine interface. -func (p *Pebble) ClearMVCC(key MVCCKey, opts ClearOptions) error { +func (p *Pebble) ClearMVCC(key MVCCKey) error { if key.Timestamp.IsEmpty() { panic("ClearMVCC timestamp is empty") } - return p.clear(key, opts) + return p.clear(key) } // ClearUnversioned implements the Engine interface. -func (p *Pebble) ClearUnversioned(key roachpb.Key, opts ClearOptions) error { - return p.clear(MVCCKey{Key: key}, opts) +func (p *Pebble) ClearUnversioned(key roachpb.Key) error { + return p.clear(MVCCKey{Key: key}) } // ClearIntent implements the Engine interface. -func (p *Pebble) ClearIntent( - key roachpb.Key, txnDidNotUpdateMeta bool, txnUUID uuid.UUID, opts ClearOptions, -) error { - _, err := p.wrappedIntentWriter.ClearIntent(key, txnDidNotUpdateMeta, txnUUID, nil, opts) +func (p *Pebble) ClearIntent(key roachpb.Key, txnDidNotUpdateMeta bool, txnUUID uuid.UUID) error { + _, err := p.wrappedIntentWriter.ClearIntent(key, txnDidNotUpdateMeta, txnUUID, nil) return err } // ClearEngineKey implements the Engine interface. -func (p *Pebble) ClearEngineKey(key EngineKey, opts ClearOptions) error { +func (p *Pebble) ClearEngineKey(key EngineKey) error { if len(key.Key) == 0 { return emptyKeyError() } - if !opts.ValueSizeKnown || !p.settings.Version.ActiveVersionOrEmpty(context.TODO()). - IsActive(clusterversion.V23_2_UseSizedPebblePointTombstones) { - return p.db.Delete(key.Encode(), pebble.Sync) - } - return p.db.DeleteSized(key.Encode(), opts.ValueSize, pebble.Sync) + return p.db.Delete(key.Encode(), pebble.Sync) } -func (p *Pebble) clear(key MVCCKey, opts ClearOptions) error { +func (p *Pebble) clear(key MVCCKey) error { if len(key.Key) == 0 { return emptyKeyError() } - if !opts.ValueSizeKnown || !p.settings.Version.ActiveVersionOrEmpty(context.TODO()). - IsActive(clusterversion.V23_2_UseSizedPebblePointTombstones) { - return p.db.Delete(EncodeMVCCKey(key), pebble.Sync) - } - // Use DeleteSized to propagate the value size. - return p.db.DeleteSized(EncodeMVCCKey(key), opts.ValueSize, pebble.Sync) + return p.db.Delete(EncodeMVCCKey(key), pebble.Sync) } // SingleClearEngineKey implements the Engine interface. @@ -2134,9 +2113,6 @@ func (p *Pebble) SetMinVersion(version roachpb.Version) error { var formatVers pebble.FormatMajorVersion // Cases are ordered from newer to older versions. switch { - case !version.Less(clusterversion.ByKey(clusterversion.V23_2_PebbleFormatDeleteSizedAndObsolete)): - formatVers = pebble.ExperimentalFormatDeleteSizedAndObsolete - case !version.Less(clusterversion.ByKey(clusterversion.V23_1EnableFlushableIngest)): formatVers = pebble.FormatFlushableIngest @@ -2382,21 +2358,21 @@ func (p *pebbleReadOnly) ApplyBatchRepr(repr []byte, sync bool) error { panic("not implemented") } -func (p *pebbleReadOnly) ClearMVCC(key MVCCKey, opts ClearOptions) error { +func (p *pebbleReadOnly) ClearMVCC(key MVCCKey) error { panic("not implemented") } -func (p *pebbleReadOnly) ClearUnversioned(key roachpb.Key, opts ClearOptions) error { +func (p *pebbleReadOnly) ClearUnversioned(key roachpb.Key) error { panic("not implemented") } func (p *pebbleReadOnly) ClearIntent( - key roachpb.Key, txnDidNotUpdateMeta bool, txnUUID uuid.UUID, opts ClearOptions, + key roachpb.Key, txnDidNotUpdateMeta bool, txnUUID uuid.UUID, ) error { panic("not implemented") } -func (p *pebbleReadOnly) ClearEngineKey(key EngineKey, opts ClearOptions) error { +func (p *pebbleReadOnly) ClearEngineKey(key EngineKey) error { panic("not implemented") } diff --git a/pkg/storage/pebble_batch.go b/pkg/storage/pebble_batch.go index 36d45def93c1..fcfa7e7789f0 100644 --- a/pkg/storage/pebble_batch.go +++ b/pkg/storage/pebble_batch.go @@ -14,7 +14,6 @@ import ( "context" "sync" - "github.com/cockroachdb/cockroach/pkg/clusterversion" "github.com/cockroachdb/cockroach/pkg/roachpb" "github.com/cockroachdb/cockroach/pkg/settings/cluster" "github.com/cockroachdb/cockroach/pkg/storage/pebbleiter" @@ -59,7 +58,6 @@ type pebbleBatch struct { iterStatsReporter iterStatsReporter batchStatsReporter batchStatsReporter settings *cluster.Settings - mayWriteSizedDeletes bool shouldWriteLocalTimestamps bool shouldWriteLocalTimestampsCached bool } @@ -114,15 +112,7 @@ func newPebbleBatch( iterStatsReporter: iterStatsReporter, batchStatsReporter: batchStatsReporter, settings: settings, - // NB: We do not use settings.Version.IsActive because we do not - // generally have a guarantee that the cluster version has been - // initialized. As a part of initializing a store, we use a Batch to - // write the store identifer key; this is written before any cluster - // version has been initialized. - mayWriteSizedDeletes: settings.Version.ActiveVersionOrEmpty(context.TODO()). - IsActive(clusterversion.V23_2_UseSizedPebblePointTombstones), } - pb.wrappedIntentWriter = wrapIntentWriter(pb) return pb } @@ -287,49 +277,43 @@ func (p *pebbleBatch) ApplyBatchRepr(repr []byte, sync bool) error { } // ClearMVCC implements the Batch interface. -func (p *pebbleBatch) ClearMVCC(key MVCCKey, opts ClearOptions) error { +func (p *pebbleBatch) ClearMVCC(key MVCCKey) error { if key.Timestamp.IsEmpty() { panic("ClearMVCC timestamp is empty") } - return p.clear(key, opts) + return p.clear(key) } // ClearUnversioned implements the Batch interface. -func (p *pebbleBatch) ClearUnversioned(key roachpb.Key, opts ClearOptions) error { - return p.clear(MVCCKey{Key: key}, opts) +func (p *pebbleBatch) ClearUnversioned(key roachpb.Key) error { + return p.clear(MVCCKey{Key: key}) } // ClearIntent implements the Batch interface. func (p *pebbleBatch) ClearIntent( - key roachpb.Key, txnDidNotUpdateMeta bool, txnUUID uuid.UUID, opts ClearOptions, + key roachpb.Key, txnDidNotUpdateMeta bool, txnUUID uuid.UUID, ) error { var err error - p.scratch, err = p.wrappedIntentWriter.ClearIntent(key, txnDidNotUpdateMeta, txnUUID, p.scratch, opts) + p.scratch, err = p.wrappedIntentWriter.ClearIntent(key, txnDidNotUpdateMeta, txnUUID, p.scratch) return err } // ClearEngineKey implements the Batch interface. -func (p *pebbleBatch) ClearEngineKey(key EngineKey, opts ClearOptions) error { +func (p *pebbleBatch) ClearEngineKey(key EngineKey) error { if len(key.Key) == 0 { return emptyKeyError() } p.buf = key.EncodeToBuf(p.buf[:0]) - if !opts.ValueSizeKnown || !p.mayWriteSizedDeletes { - return p.batch.Delete(p.buf, nil) - } - return p.batch.DeleteSized(p.buf, opts.ValueSize, nil) + return p.batch.Delete(p.buf, nil) } -func (p *pebbleBatch) clear(key MVCCKey, opts ClearOptions) error { +func (p *pebbleBatch) clear(key MVCCKey) error { if len(key.Key) == 0 { return emptyKeyError() } p.buf = EncodeMVCCKeyToBuf(p.buf[:0], key) - if !opts.ValueSizeKnown || !p.mayWriteSizedDeletes { - return p.batch.Delete(p.buf, nil) - } - return p.batch.DeleteSized(p.buf, opts.ValueSize, nil) + return p.batch.Delete(p.buf, nil) } // SingleClearEngineKey implements the Batch interface. diff --git a/pkg/storage/pebble_test.go b/pkg/storage/pebble_test.go index d686323ed69f..d7b33f14b8e8 100644 --- a/pkg/storage/pebble_test.go +++ b/pkg/storage/pebble_test.go @@ -801,7 +801,7 @@ func TestPebbleMVCCTimeIntervalWithClears(t *testing.T) { require.NoError(t, eng.Flush()) // Clear a@5 and [c-d)@7 in a separate SST. - require.NoError(t, eng.ClearMVCC(pointKey("a", 5), ClearOptions{})) + require.NoError(t, eng.ClearMVCC(pointKey("a", 5))) require.NoError(t, eng.ClearMVCCRangeKey(rangeKey("c", "d", 7))) require.NoError(t, eng.Flush()) diff --git a/pkg/storage/read_as_of_iterator_test.go b/pkg/storage/read_as_of_iterator_test.go index 64ddfdb6a8df..600d341d42b4 100644 --- a/pkg/storage/read_as_of_iterator_test.go +++ b/pkg/storage/read_as_of_iterator_test.go @@ -35,8 +35,7 @@ func TestReadAsOfIterator(t *testing.T) { defer leaktest.AfterTest(t)() defer log.Scope(t).Close(t) - pebble, err := Open(context.Background(), InMemory(), - cluster.MakeTestingClusterSettings(), CacheSize(1<<20 /* 1 MiB */)) + pebble, err := Open(context.Background(), InMemory(), cluster.MakeClusterSettings(), CacheSize(1<<20 /* 1 MiB */)) require.NoError(t, err) defer pebble.Close() @@ -110,8 +109,7 @@ func TestReadAsOfIteratorSeek(t *testing.T) { defer leaktest.AfterTest(t)() defer log.Scope(t).Close(t) - pebble, err := Open(context.Background(), InMemory(), - cluster.MakeTestingClusterSettings(), CacheSize(1<<20 /* 1 MiB */)) + pebble, err := Open(context.Background(), InMemory(), cluster.MakeClusterSettings(), CacheSize(1<<20 /* 1 MiB */)) require.NoError(t, err) defer pebble.Close() diff --git a/pkg/storage/sst_writer.go b/pkg/storage/sst_writer.go index ac812b6bd5cd..0d46ecae9701 100644 --- a/pkg/storage/sst_writer.go +++ b/pkg/storage/sst_writer.go @@ -331,19 +331,19 @@ func (fw *SSTWriter) ApplyBatchRepr(repr []byte, sync bool) error { // not greater than any previous point key passed to this Writer (according to // the comparator configured during writer creation). `Close` cannot have been // called. -func (fw *SSTWriter) ClearMVCC(key MVCCKey, opts ClearOptions) error { +func (fw *SSTWriter) ClearMVCC(key MVCCKey) error { if key.Timestamp.IsEmpty() { panic("ClearMVCC timestamp is empty") } - return fw.clear(key, opts) + return fw.clear(key) } // ClearUnversioned implements the Writer interface. An error is returned if // it is not greater than any previous point key passed to this Writer // (according to the comparator configured during writer creation). `Close` // cannot have been called. -func (fw *SSTWriter) ClearUnversioned(key roachpb.Key, opts ClearOptions) error { - return fw.clear(MVCCKey{Key: key}, opts) +func (fw *SSTWriter) ClearUnversioned(key roachpb.Key) error { + return fw.clear(MVCCKey{Key: key}) } // ClearIntent implements the Writer interface. An error is returned if it is @@ -351,7 +351,7 @@ func (fw *SSTWriter) ClearUnversioned(key roachpb.Key, opts ClearOptions) error // the comparator configured during writer creation). `Close` cannot have been // called. func (fw *SSTWriter) ClearIntent( - key roachpb.Key, txnDidNotUpdateMeta bool, txnUUID uuid.UUID, opts ClearOptions, + key roachpb.Key, txnDidNotUpdateMeta bool, txnUUID uuid.UUID, ) error { panic("ClearIntent is unsupported") } @@ -360,34 +360,24 @@ func (fw *SSTWriter) ClearIntent( // not greater than any previous point key passed to this Writer (according to // the comparator configured during writer creation). `Close` cannot have been // called. -func (fw *SSTWriter) ClearEngineKey(key EngineKey, opts ClearOptions) error { +func (fw *SSTWriter) ClearEngineKey(key EngineKey) error { if fw.fw == nil { return errors.New("cannot call Clear on a closed writer") } fw.scratch = key.EncodeToBuf(fw.scratch[:0]) fw.DataSize += int64(len(key.Key)) - // TODO(jackson): We could use opts.ValueSize if known, but it would require - // additional logic around ensuring the cluster version is at least - // V23_2_UseSizedPebblePointTombstones. It's probably not worth it until we - // can unconditionally use it; I don't believe we ever write point - // tombstones to sstables constructed within Cockroach. return fw.fw.Delete(fw.scratch) } // An error is returned if it is not greater than any previous point key // passed to this Writer (according to the comparator configured during writer // creation). `Close` cannot have been called. -func (fw *SSTWriter) clear(key MVCCKey, opts ClearOptions) error { +func (fw *SSTWriter) clear(key MVCCKey) error { if fw.fw == nil { return errors.New("cannot call Clear on a closed writer") } fw.scratch = EncodeMVCCKeyToBuf(fw.scratch[:0], key) fw.DataSize += int64(len(key.Key)) - // TODO(jackson): We could use opts.ValueSize if known, but it would require - // additional logic around ensuring the cluster version is at least - // V23_2_UseSizedPebblePointTombstones. It's probably not worth it until we - // can unconditionally use it; I don't believe we ever write point - // tombstones to sstables constructed within Cockroach. return fw.fw.Delete(fw.scratch) }