From ed6329d98ee799efbdbedf2d24f6c3248b58e804 Mon Sep 17 00:00:00 2001 From: Nathan VanBenschoten Date: Sun, 10 Sep 2023 22:05:04 -0400 Subject: [PATCH 1/2] storage: implement intent resolution using LockTableIterator Informs #109648. This commit implements intent resolution (point and ranged) using a `LockTableIterator`, configured to return all locks for the transaction being resolved and no locks from other transactions. This is the first step towards releasing replicated locks during intent resolution. While switching to a LockTableIterator, the commit is also able to remove separatedIntentAndVersionIter, iterForKeyVersions and mvccGetIntent, which were all used to avoid handing an MVCCMetadata directly to mvccResolveWriteIntent. Instead of continuing to treat intents as interleaved, we switch to handling intents entirely separately from their provisional value during intent resolution, which avoids jumping through these hoops and makes the code simpler. The change to `TestMVCCResolveTxnRangeResumeWithManyVersions` is immaterial and has to do with the transaction ID filter being applied before the key limit (inside LockTableIterator), instead of after. The new behavior is actually better. ---- One concern I have about this change is that it removes the call to `SeekIntentGE` in `MVCCResolveWriteIntent`, which was added in d1c91e0e to guard against the case where many pebble tombstones from prior intents on a key surround the intent being resolved. Conceptually, we'd like to push optimizations that avoid scanning over these tombstones into the `LockTableIterator` like we plan to do for skipping over non-conflicting locks. Doing so would benefit all lock strengths. It would also benefit the case where an intent is not found and the seek hits tombstones from prior intents on later versions. However, it's not clear how to do this with the current Pebble API. Pebble exposes a `SeekGEWithLimit` method, but this "limit" value is expressed as a key and not as a number of steps. How would we construct a limit key to bound the number of tombstones a seek observes before seeking directly to a specific (txn_id, lock_strength) version? One option would be to seek to specific versions in the `LockTableIterator` when advancing the iterator in cases where the iterator is configured to match a specific txn ID. For example, performing the following translations: ``` SeekGE({Key: k}) -> SeekGE({Key: k, Strength: Intent, TxnID: }) Next() -> SeekGE({Key: k, Strength: Exclusive, TxnID: }) Next() -> SeekGE({Key: k, Strength: Shared, TxnID: }) ``` Of course, this gets more complicated when some of these locks are not found and the iterator advances past them while seeking. In such cases, we're back to paying the cost of scanning over the tombstones. If we knew which lock strengths we had acquired on a key, we could avoid some of this cost, but that would require API changes and client buy-in to track lock spans on a per-strength basis. I'll capture the impact of this change on the following benchmarks and evaluate: * BenchmarkIntentResolution * BenchmarkIntentRangeResolution * BenchmarkIntentScan Release note: Nonet --- pkg/kv/kvserver/batch_spanset_test.go | 3 +- pkg/kv/kvserver/replica_test.go | 2 +- pkg/storage/mvcc.go | 371 +++++++++----------------- pkg/storage/mvcc_test.go | 6 +- 4 files changed, 137 insertions(+), 245 deletions(-) diff --git a/pkg/kv/kvserver/batch_spanset_test.go b/pkg/kv/kvserver/batch_spanset_test.go index 88fff8ebf652..fe50b20380e8 100644 --- a/pkg/kv/kvserver/batch_spanset_test.go +++ b/pkg/kv/kvserver/batch_spanset_test.go @@ -24,6 +24,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/util/hlc" "github.com/cockroachdb/cockroach/pkg/util/leaktest" "github.com/cockroachdb/cockroach/pkg/util/log" + "github.com/cockroachdb/cockroach/pkg/util/uuid" "github.com/cockroachdb/errors" "github.com/stretchr/testify/require" ) @@ -581,7 +582,7 @@ func TestSpanSetMVCCResolveWriteIntentRange(t *testing.T) { defer batch.Close() intent := roachpb.LockUpdate{ Span: roachpb.Span{Key: roachpb.Key("a"), EndKey: roachpb.Key("b\x00")}, - Txn: enginepb.TxnMeta{}, // unused + Txn: enginepb.TxnMeta{ID: uuid.MakeV4()}, // unused Status: roachpb.PENDING, } if _, _, _, _, err := storage.MVCCResolveWriteIntentRange( diff --git a/pkg/kv/kvserver/replica_test.go b/pkg/kv/kvserver/replica_test.go index c7e312379762..0ed77816bd38 100644 --- a/pkg/kv/kvserver/replica_test.go +++ b/pkg/kv/kvserver/replica_test.go @@ -4722,7 +4722,7 @@ func TestEndTxnRollbackAbortedTransaction(t *testing.T) { if pErr := tc.store.intentResolver.ResolveIntents(ctx, []roachpb.LockUpdate{ - roachpb.MakeLockUpdate(&txnRecord, roachpb.Span{Key: key}), + roachpb.MakeLockUpdate(txn, roachpb.Span{Key: key}), }, intentresolver.ResolveOptions{Poison: true}); pErr != nil { t.Fatal(pErr) } diff --git a/pkg/storage/mvcc.go b/pkg/storage/mvcc.go index 83ae210dbeda..a35cfd9331df 100644 --- a/pkg/storage/mvcc.go +++ b/pkg/storage/mvcc.go @@ -4513,208 +4513,92 @@ func MVCCResolveWriteIntent( return false, 0, &roachpb.Span{Key: intent.Key}, nil } - iter, err := rw.NewMVCCIterator(MVCCKeyAndIntentsIterKind, IterOptions{ - KeyTypes: IterKeyTypePointsAndRanges, - Prefix: true, - }) - if err != nil { - return false, 0, nil, err - } - defer iter.Close() - iter.SeekIntentGE(intent.Key, intent.Txn.ID) - buf := newPutBuffer() - defer buf.release() // Production code will use a buffered writer, which makes the numBytes // calculation accurate. Note that an inaccurate numBytes (e.g. 0 in the // case of an unbuffered writer) does not affect any safety properties of // the database. beforeBytes := rw.BufferedSize() - ok, err = mvccResolveWriteIntent(ctx, rw, iter, ms, intent, buf) - numBytes = int64(rw.BufferedSize() - beforeBytes) - return ok, numBytes, nil, err -} - -// iterForKeyVersions provides a subset of the functionality of MVCCIterator. -// The expected use-case is when the iter is already positioned at the intent -// (if one exists) for a particular key, or some version, and positioning -// operators like SeekGE, Next are only being used to find other versions for -// that key, and never to find intents for other keys. A full-fledged -// MVCCIterator can be used here. The methods below have the same behavior as -// in MVCCIterator, but the caller should never call SeekGE with an empty -// MVCCKey.Timestamp. Additionally, Next must be preceded by at least one call -// to SeekGE. -type iterForKeyVersions interface { - Valid() (bool, error) - HasPointAndRange() (bool, bool) - SeekGE(key MVCCKey) - Next() - UnsafeKey() MVCCKey - UnsafeValue() ([]byte, error) - MVCCValueLenAndIsTombstone() (int, bool, error) - ValueProto(msg protoutil.Message) error - RangeKeys() MVCCRangeKeyStack -} - -// separatedIntentAndVersionIter is an implementation of iterForKeyVersions -// used for ranged intent resolution. The MVCCIterator used by it is of -// MVCCKeyIterKind. The caller attempting to do ranged intent resolution uses -// seekEngineKey, nextEngineKey to iterate over the lock table, and for each -// lock/intent that needs to be resolved passes this iterator to -// mvccResolveWriteIntent. The MVCCIterator is positioned lazily, only if -// needed -- the fast path for intent resolution when a transaction is -// committing and does not need to change the provisional value or timestamp, -// does not need to position the MVCCIterator. The other cases, which include -// transaction aborts and changing provisional value timestamps, or changing -// the provisional value due to savepoint rollback, will position the -// MVCCIterator, and are the slow path. -// Note that even this slow path is faster than when intents were interleaved, -// since it can avoid iterating over keys with no intents. -type separatedIntentAndVersionIter struct { - engineIter EngineIterator - mvccIter MVCCIterator - - // Already parsed meta, when the starting position is at an intent. - meta *enginepb.MVCCMetadata - atMVCCIter bool - engineIterValid bool - engineIterErr error - intentKey roachpb.Key -} - -var _ iterForKeyVersions = &separatedIntentAndVersionIter{} - -func (s *separatedIntentAndVersionIter) seekEngineKeyGE(key EngineKey) { - s.atMVCCIter = false - s.meta = nil - s.engineIterValid, s.engineIterErr = s.engineIter.SeekEngineKeyGE(key) - s.initIntentKey() -} - -func (s *separatedIntentAndVersionIter) nextEngineKey() { - s.atMVCCIter = false - s.meta = nil - s.engineIterValid, s.engineIterErr = s.engineIter.NextEngineKey() - s.initIntentKey() -} - -func (s *separatedIntentAndVersionIter) initIntentKey() { - if s.engineIterValid { - engineKey, err := s.engineIter.UnsafeEngineKey() - if err != nil { - s.engineIterErr = err - s.engineIterValid = false - return - } - if s.intentKey, err = keys.DecodeLockTableSingleKey(engineKey.Key); err != nil { - s.engineIterErr = err - s.engineIterValid = false - return - } - } -} - -func (s *separatedIntentAndVersionIter) Valid() (bool, error) { - if s.atMVCCIter { - return s.mvccIter.Valid() - } - return s.engineIterValid, s.engineIterErr -} - -func (s *separatedIntentAndVersionIter) HasPointAndRange() (bool, bool) { - hasPoint, hasRange := s.mvccIter.HasPointAndRange() - if !s.atMVCCIter { - hasPoint = s.engineIterValid - } - return hasPoint, hasRange -} -func (s *separatedIntentAndVersionIter) RangeKeys() MVCCRangeKeyStack { - return s.mvccIter.RangeKeys() -} - -func (s *separatedIntentAndVersionIter) SeekGE(key MVCCKey) { - if !key.IsValue() { - panic(errors.AssertionFailedf("SeekGE only permitted for values")) - } - s.mvccIter.SeekGE(key) - s.atMVCCIter = true -} - -func (s *separatedIntentAndVersionIter) Next() { - if !s.atMVCCIter { - panic(errors.AssertionFailedf("Next not preceded by SeekGE")) - } - s.mvccIter.Next() -} - -func (s *separatedIntentAndVersionIter) UnsafeKey() MVCCKey { - if s.atMVCCIter { - return s.mvccIter.UnsafeKey() - } - return MVCCKey{Key: s.intentKey} -} - -func (s *separatedIntentAndVersionIter) UnsafeValue() ([]byte, error) { - if s.atMVCCIter { - return s.mvccIter.UnsafeValue() - } - return s.engineIter.UnsafeValue() -} - -func (s *separatedIntentAndVersionIter) MVCCValueLenAndIsTombstone() (int, bool, error) { - if !s.atMVCCIter { - return 0, false, errors.AssertionFailedf("not at MVCC value") - } - return s.mvccIter.MVCCValueLenAndIsTombstone() -} - -func (s *separatedIntentAndVersionIter) ValueProto(msg protoutil.Message) error { - if s.atMVCCIter { - return s.mvccIter.ValueProto(msg) - } - meta, ok := msg.(*enginepb.MVCCMetadata) - if ok && meta == s.meta { - // Already parsed. - return nil - } - v, err := s.engineIter.UnsafeValue() + // Iterate over all locks held by intent.Txn on this key. + ltIter, err := NewLockTableIterator(rw, LockTableIteratorOptions{ + Prefix: true, + MatchTxnID: intent.Txn.ID, + }) if err != nil { - return err + return false, 0, nil, err } - return protoutil.Unmarshal(v, msg) -} + defer ltIter.Close() + buf := newPutBuffer() + defer buf.release() -// mvccGetIntent uses an iterForKeyVersions that has been seeked to -// metaKey.Key for a potential intent, and tries to retrieve an intent if it -// is present. ok returns true iff an intent for that key is found. In that -// case, keyBytes and valBytes are set to non-zero and the deserialized intent -// is placed in meta. -func mvccGetIntent( - iter iterForKeyVersions, metaKey MVCCKey, meta *enginepb.MVCCMetadata, -) (ok bool, keyBytes, valBytes int64, err error) { - if ok, err := iter.Valid(); !ok { - return false, 0, 0, err - } - if hasPoint, _ := iter.HasPointAndRange(); !hasPoint { - return false, 0, 0, nil - } - unsafeKey := iter.UnsafeKey() - if !unsafeKey.Key.Equal(metaKey.Key) { - return false, 0, 0, nil - } - if unsafeKey.IsValue() { - return false, 0, 0, nil - } - if err := iter.ValueProto(meta); err != nil { - return false, 0, 0, err - } - v, err := iter.UnsafeValue() - // iter.ValueProto returned without err, so we should not see an error now. - if err != nil { - return false, 0, 0, errors.HandleAsAssertionFailure(err) + var ltSeekKey EngineKey + ltSeekKey, buf.ltKeyBuf = LockTableKey{ + Key: intent.Key, + // lock.Intent is the first locking strength in the lock-table. As a + // minor performance optimization, we seek to this version and iterate + // instead of iterating from the beginning of the version prefix (i.e. + // keys.LockTableSingleKey(intent.Key)). This can seek past half of the + // LSM tombstones on this key in cases like those described in d1c91e0e + // where intents are repeatedly written and removed on a specific key so + // an intent is surrounded by a large number of tombstones during its + // resolution. + // + // This isn't a full solution to this problem, because we still end up + // iterating through the other half of the LSM tombstones while checking + // for Exclusive and Shared locks. For a full solution, we need to track + // the locking strengths that we intend to resolve on the client so that + // we can seek to just those versions. + // + // We could also seek to all three versions (Intent, Exclusive, Shared) + // with a limit, but that would require 3 seeks in all cases instead of + // a single seek and step in cases where only an intent is present. We + // chose not to pessimize the common case to optimize the uncommon case. + Strength: lock.Intent, + TxnUUID: intent.Txn.ID, + }.ToEngineKey(buf.ltKeyBuf) + + for valid, err := ltIter.SeekEngineKeyGE(ltSeekKey); ; valid, err = ltIter.NextEngineKey() { + if err != nil { + return false, 0, nil, errors.Wrap(err, "seeking lock table") + } else if !valid { + break + } + str, txnID, err := ltIter.LockTableKeyVersion() + if err != nil { + return false, 0, nil, errors.Wrap(err, "decoding lock table key version") + } + if txnID != intent.Txn.ID { + return false, 0, nil, errors.AssertionFailedf( + "unexpected txnID %v != %v while scanning lock table", txnID, intent.Txn.ID) + } + if err := ltIter.ValueProto(&buf.meta); err != nil { + return false, 0, nil, errors.Wrap(err, "unmarshaling lock table value") + } + if str == lock.Intent { + // Intent resolution requires an MVCC iterator to look up the MVCC + // version associated with the intent. Create one. + var iter MVCCIterator + { + iter, err = rw.NewMVCCIterator(MVCCKeyIterKind, IterOptions{ + Prefix: true, + KeyTypes: IterKeyTypePointsAndRanges, + }) + if err != nil { + return false, 0, nil, err + } + defer iter.Close() + } + ok, err = mvccResolveWriteIntent(ctx, rw, iter, ms, intent, &buf.meta, buf) + } else { + // TODO(nvanbenschoten): implement. + _ = str + } + if err != nil { + return false, 0, nil, err + } } - valLen := int64(len(v)) - return true, int64(unsafeKey.EncodedSize()), valLen, nil + numBytes = int64(rw.BufferedSize() - beforeBytes) + return ok, numBytes, nil, nil } // With the separated lock table, we are employing a performance optimization: @@ -4811,28 +4695,31 @@ func (h singleDelOptimizationHelper) onAbortIntent() bool { return false } -// mvccResolveWriteIntent is the core logic for resolving an intent. -// REQUIRES: iter is already seeked to intent.Key. +// mvccResolveWriteIntent is the core logic for resolving an intent. The +// function accepts instructions for how to resolve the intent (encoded in the +// LockUpdate), and the current value of the intent (meta). Returns whether the +// provided intent was resolved (true) or whether the resolution was a no-op +// (false). +// +// REQUIRES: intent and meta refer to the same intent on the same key. // REQUIRES: iter surfaces range keys via IterKeyTypePointsAndRanges. -// Returns whether an intent was found and resolved, false otherwise. func mvccResolveWriteIntent( ctx context.Context, writer Writer, - iter iterForKeyVersions, + iter MVCCIterator, ms *enginepb.MVCCStats, + // TODO(nvanbenschoten): rename this field to "resolution". intent roachpb.LockUpdate, + meta *enginepb.MVCCMetadata, buf *putBuffer, ) (bool, error) { - metaKey := MakeMVCCMetadataKey(intent.Key) - meta := &buf.meta - ok, origMetaKeySize, origMetaValSize, err := - mvccGetIntent(iter, metaKey, meta) - if err != nil { - return false, err - } - if !ok || meta.Txn == nil || intent.Txn.ID != meta.Txn.ID { - return false, nil + if meta.Txn == nil || meta.Txn.ID != intent.Txn.ID { + return false, errors.Errorf("txn does not match: %v != %v", meta.Txn, intent.Txn) } + + metaKey := MakeMVCCMetadataKey(intent.Key) + origMetaKeySize := int64(metaKey.EncodedSize()) + origMetaValSize := int64(meta.Size()) metaTimestamp := meta.Timestamp.ToTimestamp() canSingleDelHelper := singleDelOptimizationHelper{ _didNotUpdateMeta: meta.TxnDidNotUpdateMeta, @@ -4904,6 +4791,7 @@ func mvccResolveWriteIntent( // If only part of the intent history was rolled back, but the intent still // remains, the rolledBackVal is set to a non-nil value. var rolledBackVal *MVCCValue + var err error if len(intent.IgnoredSeqNums) > 0 { // NOTE: mvccMaybeRewriteIntentHistory mutates its meta argument. // TODO(nvanbenschoten): this is an awkward interface. We shouldn't @@ -5134,7 +5022,7 @@ func mvccResolveWriteIntent( Key: intent.Key, }) - ok = false + ok := false // These variables containing the next key-value information are initialized // in the following if-block when ok is set to true. These are only read @@ -5316,13 +5204,18 @@ func MVCCResolveWriteIntentRange( } return 0, 0, &resumeSpan, resumeReason, nil } + ltStart, _ := keys.LockTableSingleKey(intent.Key, nil) ltEnd, _ := keys.LockTableSingleKey(intent.EndKey, nil) - engineIter, err := rw.NewEngineIterator(IterOptions{LowerBound: ltStart, UpperBound: ltEnd}) + ltIter, err := NewLockTableIterator(rw, LockTableIteratorOptions{ + LowerBound: ltStart, + UpperBound: ltEnd, + MatchTxnID: intent.Txn.ID, + }) if err != nil { return 0, 0, nil, 0, err } - defer engineIter.Close() + defer ltIter.Close() var mvccIter MVCCIterator iterOpts := IterOptions{ KeyTypes: IterKeyTypePointsAndRanges, @@ -5337,26 +5230,19 @@ func MVCCResolveWriteIntentRange( } } else { // For correctness, we need mvccIter to be consistent with engineIter. - mvccIter = newPebbleIteratorByCloning(engineIter.CloneContext(), iterOpts, StandardDurability) + mvccIter = newPebbleIteratorByCloning(ltIter.CloneContext(), iterOpts, StandardDurability) } defer mvccIter.Close() buf := newPutBuffer() defer buf.release() - sepIter := &separatedIntentAndVersionIter{ - engineIter: engineIter, - mvccIter: mvccIter, - } - // Seek sepIter to position it for the loop below. The loop itself will - // only step the iterator and not seek. - sepIter.seekEngineKeyGE(EngineKey{Key: ltStart}) intentEndKey := intent.EndKey intent.EndKey = nil var lastResolvedKey roachpb.Key - for { - if valid, err := sepIter.Valid(); err != nil { - return 0, 0, nil, 0, err + for valid, err := ltIter.SeekEngineKeyGE(EngineKey{Key: ltStart}); ; valid, err = ltIter.NextEngineKey() { + if err != nil { + return 0, 0, nil, 0, errors.Wrap(err, "seeking lock table") } else if !valid { // No more intents in the given range. break @@ -5370,39 +5256,46 @@ func MVCCResolveWriteIntentRange( resumeReason = kvpb.RESUME_BYTE_LIMIT } // We could also compute a tighter nextKey here if we wanted to. + // TODO(nvanbenschoten): this resumeSpan won't be correct if there + // are multiple locks on the same key. What if only of the locks for + // a key are removed? Fix this by resolving zero or all locks on a + // given key. return numKeys, numBytes, &roachpb.Span{Key: lastResolvedKey.Next(), EndKey: intentEndKey}, resumeReason, nil } - // Parse the MVCCMetadata to see if it is a relevant intent. - meta := &buf.meta - if err := sepIter.ValueProto(meta); err != nil { - return 0, 0, nil, 0, err + ltEngineKey, err := ltIter.EngineKey() + if err != nil { + return 0, 0, nil, 0, errors.Wrap(err, "retrieving lock table key") } - if meta.Txn == nil { - return 0, 0, nil, 0, errors.Errorf("intent with no txn") + ltKey, err := ltEngineKey.ToLockTableKey() + if err != nil { + return 0, 0, nil, 0, errors.Wrap(err, "decoding lock table key") } - if intent.Txn.ID != meta.Txn.ID { - // Intent for a different txn, so ignore. - sepIter.nextEngineKey() - continue + if ltKey.TxnUUID != intent.Txn.ID { + return 0, 0, nil, 0, errors.AssertionFailedf( + "unexpected txnID %v != %v while scanning lock table", ltKey.TxnUUID, intent.Txn.ID) + } + if err := ltIter.ValueProto(&buf.meta); err != nil { + return 0, 0, nil, 0, errors.Wrap(err, "unmarshaling lock table value") } - // Stash the parsed meta so don't need to parse it again in - // mvccResolveWriteIntent. This parsing can be ~10% of the resolution cost - // in some benchmarks. - sepIter.meta = meta // Copy the underlying bytes of the unsafe key. This is needed for - // stability of the key passed to mvccResolveWriteIntent, and for the - // subsequent iteration to construct a resume span. - lastResolvedKey = append(lastResolvedKey[:0], sepIter.UnsafeKey().Key...) - intent.Key = lastResolvedKey + // stability of the key to construct a resume span on subsequent + // iteration. + intent.Key = ltKey.Key + lastResolvedKey = append(lastResolvedKey[:0], intent.Key...) beforeBytes := rw.BufferedSize() - ok, err := mvccResolveWriteIntent(ctx, rw, sepIter, ms, intent, buf) + var ok bool + if ltKey.Strength == lock.Intent { + ok, err = mvccResolveWriteIntent(ctx, rw, mvccIter, ms, intent, &buf.meta, buf) + } else { + // TODO(nvanbenschoten): implement. + _ = ltKey.Strength + } if err != nil { log.Warningf(ctx, "failed to resolve intent for key %q: %+v", lastResolvedKey, err) } else if ok { numKeys++ } numBytes += int64(rw.BufferedSize() - beforeBytes) - sepIter.nextEngineKey() } return numKeys, numBytes, nil, 0, nil } diff --git a/pkg/storage/mvcc_test.go b/pkg/storage/mvcc_test.go index 43daf2f6f50e..22f47936c0b5 100644 --- a/pkg/storage/mvcc_test.go +++ b/pkg/storage/mvcc_test.go @@ -3669,13 +3669,11 @@ func TestMVCCResolveTxnRangeResumeWithManyVersions(t *testing.T) { numKeys, _, resumeSpan, _, err := MVCCResolveWriteIntentRange(ctx, engine, nil, lockUpdate, MVCCResolveWriteIntentRangeOptions{MaxKeys: 20}) require.NoError(t, err) + require.Equal(t, int64(20), numKeys) + i++ if resumeSpan == nil { - // Last call resolves 0 intents. - require.Equal(t, int64(0), numKeys) break } - require.Equal(t, int64(20), numKeys) - i++ expResumeSpan := roachpb.Span{ Key: makeKey(nil, (i*20-1)*10).Next(), EndKey: lockUpdate.EndKey, From 4e4644352bb6bfdd7b4045e2a5dbbb89d3f68dfa Mon Sep 17 00:00:00 2001 From: Nathan VanBenschoten Date: Sun, 10 Sep 2023 21:29:35 -0400 Subject: [PATCH 2/2] storage: delete MVCCIterator.SeekIntentGE Now that the specialized method is no longer used in `MVCCResolveWriteIntent`, we can delete it. Epic: None Release note: None --- pkg/kv/kvserver/spanset/BUILD.bazel | 1 - pkg/kv/kvserver/spanset/batch.go | 7 -- pkg/storage/engine.go | 7 -- pkg/storage/intent_interleaving_iter.go | 39 ----------- .../mvcc_history_metamorphic_iterator_test.go | 11 ---- pkg/storage/mvcc_history_test.go | 12 ---- pkg/storage/pebble_iterator.go | 6 -- .../mvcc_histories/range_tombstone_iter | 57 ----------------- ...rse_intent_seek_rangekeychanged_regression | 64 ------------------- pkg/storage/verifying_iterator.go | 12 +--- 10 files changed, 1 insertion(+), 215 deletions(-) diff --git a/pkg/kv/kvserver/spanset/BUILD.bazel b/pkg/kv/kvserver/spanset/BUILD.bazel index 5afeede69aef..f02cd9a10d77 100644 --- a/pkg/kv/kvserver/spanset/BUILD.bazel +++ b/pkg/kv/kvserver/spanset/BUILD.bazel @@ -16,7 +16,6 @@ go_library( "//pkg/util/hlc", "//pkg/util/log", "//pkg/util/protoutil", - "//pkg/util/uuid", "@com_github_cockroachdb_errors//:errors", "@com_github_cockroachdb_pebble//:pebble", "@com_github_cockroachdb_pebble//rangekey", diff --git a/pkg/kv/kvserver/spanset/batch.go b/pkg/kv/kvserver/spanset/batch.go index 8275d6cf0ee9..3f5efb08d571 100644 --- a/pkg/kv/kvserver/spanset/batch.go +++ b/pkg/kv/kvserver/spanset/batch.go @@ -20,7 +20,6 @@ import ( "github.com/cockroachdb/cockroach/pkg/storage" "github.com/cockroachdb/cockroach/pkg/util/hlc" "github.com/cockroachdb/cockroach/pkg/util/protoutil" - "github.com/cockroachdb/cockroach/pkg/util/uuid" "github.com/cockroachdb/pebble" "github.com/cockroachdb/pebble/rangekey" ) @@ -85,12 +84,6 @@ func (i *MVCCIterator) SeekGE(key storage.MVCCKey) { i.checkAllowed(roachpb.Span{Key: key.Key}, true) } -// SeekIntentGE is part of the storage.MVCCIterator interface. -func (i *MVCCIterator) SeekIntentGE(key roachpb.Key, txnUUID uuid.UUID) { - i.i.SeekIntentGE(key, txnUUID) - i.checkAllowed(roachpb.Span{Key: key}, true) -} - // SeekLT is part of the storage.MVCCIterator interface. func (i *MVCCIterator) SeekLT(key storage.MVCCKey) { i.i.SeekLT(key) diff --git a/pkg/storage/engine.go b/pkg/storage/engine.go index 9697d654bc2e..ebc6cd71a6b4 100644 --- a/pkg/storage/engine.go +++ b/pkg/storage/engine.go @@ -257,13 +257,6 @@ type MVCCIterator interface { // the first key. Prev() - // SeekIntentGE is a specialized version of SeekGE(MVCCKey{Key: key}), when - // the caller expects to find an intent, and additionally has the txnUUID - // for the intent it is looking for. When running with separated intents, - // this can optimize the behavior of the underlying Engine for write heavy - // keys by avoiding the need to iterate over many deleted intents. - SeekIntentGE(key roachpb.Key, txnUUID uuid.UUID) - // UnsafeRawKey returns the current raw key which could be an encoded // MVCCKey, or the more general EngineKey (for a lock table key). // This is a low-level and dangerous method since it will expose the diff --git a/pkg/storage/intent_interleaving_iter.go b/pkg/storage/intent_interleaving_iter.go index f9e9fdd2fa1e..2660d2c724b4 100644 --- a/pkg/storage/intent_interleaving_iter.go +++ b/pkg/storage/intent_interleaving_iter.go @@ -21,7 +21,6 @@ import ( "github.com/cockroachdb/cockroach/pkg/roachpb" "github.com/cockroachdb/cockroach/pkg/util" "github.com/cockroachdb/cockroach/pkg/util/protoutil" - "github.com/cockroachdb/cockroach/pkg/util/uuid" "github.com/cockroachdb/errors" "github.com/cockroachdb/pebble" ) @@ -588,44 +587,6 @@ func (i *intentInterleavingIter) SeekGE(key MVCCKey) { i.computePos() } -func (i *intentInterleavingIter) SeekIntentGE(key roachpb.Key, txnUUID uuid.UUID) { - adjustRangeKeyChanged := i.shouldAdjustSeekRangeKeyChanged() - - i.dir = +1 - i.valid = true - i.err = nil - - if i.constraint != notConstrained { - i.checkConstraint(key, false) - } - i.iter.SeekGE(MVCCKey{Key: key}) - if err := i.tryDecodeKey(); err != nil { - return - } - i.rangeKeyChanged = i.iter.RangeKeyChanged() - if adjustRangeKeyChanged { - i.adjustSeekRangeKeyChanged() - } - var engineKey EngineKey - engineKey, i.intentKeyBuf = LockTableKey{ - Key: key, - Strength: lock.Intent, - TxnUUID: txnUUID, - }.ToEngineKey(i.intentKeyBuf) - var limitKey roachpb.Key - if i.iterValid && !i.prefix { - limitKey = i.makeUpperLimitKey() - } - iterState, err := i.intentIter.SeekEngineKeyGEWithLimit(engineKey, limitKey) - if err = i.tryDecodeLockKey(iterState, err); err != nil { - return - } - if err := i.maybeSkipIntentRangeKey(); err != nil { - return - } - i.computePos() -} - func (i *intentInterleavingIter) checkConstraint(k roachpb.Key, isExclusiveUpper bool) { kConstraint := constrainedToGlobal if isLocal(k) { diff --git a/pkg/storage/mvcc_history_metamorphic_iterator_test.go b/pkg/storage/mvcc_history_metamorphic_iterator_test.go index 10479293333f..8e09f1a25d8f 100644 --- a/pkg/storage/mvcc_history_metamorphic_iterator_test.go +++ b/pkg/storage/mvcc_history_metamorphic_iterator_test.go @@ -21,7 +21,6 @@ import ( "github.com/cockroachdb/cockroach/pkg/roachpb" "github.com/cockroachdb/cockroach/pkg/storage" "github.com/cockroachdb/cockroach/pkg/util/protoutil" - "github.com/cockroachdb/cockroach/pkg/util/uuid" "github.com/cockroachdb/pebble" "github.com/stretchr/testify/require" ) @@ -146,11 +145,6 @@ func (m *metamorphicIterator) moveAround() { actions = append(actions, action{ "SeekLT(cur)", func() { mvccIt.SeekLT(cur) }, - }, action{ - "SeekIntentGE(cur, 00000)", - func() { - mvccIt.SeekIntentGE(cur.Key, uuid.Nil) - }, }, action{ "SeekLT(Max)", func() { mvccIt.SeekLT(storage.MVCCKeyMax) }, @@ -369,11 +363,6 @@ func (m *metamorphicMVCCIterator) UnsafeLazyValue() pebble.LazyValue { return m.it.(storage.MVCCIterator).UnsafeLazyValue() } -func (m *metamorphicMVCCIterator) SeekIntentGE(key roachpb.Key, txnUUID uuid.UUID) { - m.it.(storage.MVCCIterator).SeekIntentGE(key, txnUUID) - m.moveAround() -} - func (m *metamorphicMVCCIterator) UnsafeRawKey() []byte { return m.it.(storage.MVCCIterator).UnsafeRawKey() } diff --git a/pkg/storage/mvcc_history_test.go b/pkg/storage/mvcc_history_test.go index c31c4ea10a56..51044eb9e979 100644 --- a/pkg/storage/mvcc_history_test.go +++ b/pkg/storage/mvcc_history_test.go @@ -107,7 +107,6 @@ var ( // iter_new_incremental [k=] [end=] [startTs=[,]] [endTs=[,]] [types=pointsOnly|pointsWithRanges|pointsAndRanges|rangesOnly] [maskBelow=[,]] [intents=error|aggregate|emit] // iter_seek_ge k= [ts=[,]] // iter_seek_lt k= [ts=[,]] -// iter_seek_intent_ge k= txn= // iter_next // iter_next_ignoring_time // iter_next_key_ignoring_time @@ -805,7 +804,6 @@ var commands = map[string]cmd{ "iter_new_read_as_of": {typReadOnly, cmdIterNewReadAsOf}, // readAsOfIterator "iter_seek_ge": {typReadOnly, cmdIterSeekGE}, "iter_seek_lt": {typReadOnly, cmdIterSeekLT}, - "iter_seek_intent_ge": {typReadOnly, cmdIterSeekIntentGE}, "iter_next": {typReadOnly, cmdIterNext}, "iter_next_ignoring_time": {typReadOnly, cmdIterNextIgnoringTime}, // MVCCIncrementalIterator "iter_next_key_ignoring_time": {typReadOnly, cmdIterNextKeyIgnoringTime}, // MVCCIncrementalIterator @@ -1925,16 +1923,6 @@ func cmdIterSeekGE(e *evalCtx) error { return nil } -func cmdIterSeekIntentGE(e *evalCtx) error { - key := e.getKey() - var txnName string - e.scanArg("txn", &txnName) - txn := e.txns[txnName] - e.mvccIter().SeekIntentGE(key, txn.ID) - printIter(e) - return nil -} - func cmdIterSeekLT(e *evalCtx) error { key := e.getKey() ts := e.getTs(nil) diff --git a/pkg/storage/pebble_iterator.go b/pkg/storage/pebble_iterator.go index b702847c973a..1aacfd253685 100644 --- a/pkg/storage/pebble_iterator.go +++ b/pkg/storage/pebble_iterator.go @@ -21,7 +21,6 @@ import ( "github.com/cockroachdb/cockroach/pkg/storage/pebbleiter" "github.com/cockroachdb/cockroach/pkg/util" "github.com/cockroachdb/cockroach/pkg/util/protoutil" - "github.com/cockroachdb/cockroach/pkg/util/uuid" "github.com/cockroachdb/errors" "github.com/cockroachdb/pebble" "github.com/cockroachdb/pebble/sstable" @@ -337,11 +336,6 @@ func (p *pebbleIterator) SeekGE(key MVCCKey) { } } -// SeekIntentGE implements the MVCCIterator interface. -func (p *pebbleIterator) SeekIntentGE(key roachpb.Key, _ uuid.UUID) { - p.SeekGE(MVCCKey{Key: key}) -} - // SeekEngineKeyGE implements the EngineIterator interface. func (p *pebbleIterator) SeekEngineKeyGE(key EngineKey) (valid bool, err error) { p.keyBuf = key.EncodeToBuf(p.keyBuf[:0]) diff --git a/pkg/storage/testdata/mvcc_histories/range_tombstone_iter b/pkg/storage/testdata/mvcc_histories/range_tombstone_iter index c9b888295bac..f81265182fc7 100644 --- a/pkg/storage/testdata/mvcc_histories/range_tombstone_iter +++ b/pkg/storage/testdata/mvcc_histories/range_tombstone_iter @@ -998,63 +998,6 @@ iter_next_key: {h-k}/[1.000000000,0=/] ! iter_seek_ge: "d"/0,0=txn={id=00000001 key=/Min iso=Serializable pri=0.00000000 epo=0 ts=7.000000000,0 min=0,0 seq=0} ts=7.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true {d-f}/[5.000000000,0=/ 1.000000000,0=/] ! iter_next_key: "e"/3.000000000,0=/BYTES/e3 {d-f}/[5.000000000,0=/ 1.000000000,0=/] -# Test SeekIntentGE both with and without intents and range keys. -run ok -iter_new types=pointsAndRanges -iter_seek_intent_ge k=b txn=A -iter_seek_intent_ge k=d txn=A -iter_seek_intent_ge k=i txn=A -iter_seek_intent_ge k=j txn=A -iter_seek_intent_ge k=k txn=A ----- -iter_seek_intent_ge: {b-c}/[3.000000000,0=/ 1.000000000,0=/] ! -iter_seek_intent_ge: "d"/0,0=txn={id=00000001 key=/Min iso=Serializable pri=0.00000000 epo=0 ts=7.000000000,0 min=0,0 seq=0} ts=7.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true {d-f}/[5.000000000,0=/ 1.000000000,0=/] ! -iter_seek_intent_ge: {h-k}/[1.000000000,0=/] ! -iter_seek_intent_ge: "j"/0,0=txn={id=00000001 key=/Min iso=Serializable pri=0.00000000 epo=0 ts=7.000000000,0 min=0,0 seq=0} ts=7.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true {h-k}/[1.000000000,0=/] -iter_seek_intent_ge: "k"/5.000000000,0=/BYTES/k5 ! - -run ok -iter_new kind=keys types=pointsAndRanges -iter_seek_intent_ge k=b txn=A -iter_seek_intent_ge k=d txn=A -iter_seek_intent_ge k=i txn=A -iter_seek_intent_ge k=j txn=A -iter_seek_intent_ge k=k txn=A ----- -iter_seek_intent_ge: {b-c}/[3.000000000,0=/ 1.000000000,0=/] ! -iter_seek_intent_ge: {d-f}/[5.000000000,0=/ 1.000000000,0=/] ! -iter_seek_intent_ge: {h-k}/[1.000000000,0=/] ! -iter_seek_intent_ge: {h-k}/[1.000000000,0=/] -iter_seek_intent_ge: "k"/5.000000000,0=/BYTES/k5 ! - -run ok -iter_new types=pointsOnly -iter_seek_intent_ge k=b txn=A -iter_seek_intent_ge k=d txn=A -iter_seek_intent_ge k=i txn=A -iter_seek_intent_ge k=j txn=A -iter_seek_intent_ge k=k txn=A ----- -iter_seek_intent_ge: "b"/4.000000000,0=/ -iter_seek_intent_ge: "d"/0,0=txn={id=00000001 key=/Min iso=Serializable pri=0.00000000 epo=0 ts=7.000000000,0 min=0,0 seq=0} ts=7.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true -iter_seek_intent_ge: "j"/0,0=txn={id=00000001 key=/Min iso=Serializable pri=0.00000000 epo=0 ts=7.000000000,0 min=0,0 seq=0} ts=7.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true -iter_seek_intent_ge: "j"/0,0=txn={id=00000001 key=/Min iso=Serializable pri=0.00000000 epo=0 ts=7.000000000,0 min=0,0 seq=0} ts=7.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true -iter_seek_intent_ge: "k"/5.000000000,0=/BYTES/k5 - -run ok -iter_new types=rangesOnly -iter_seek_intent_ge k=b txn=A -iter_seek_intent_ge k=d txn=A -iter_seek_intent_ge k=i txn=A -iter_seek_intent_ge k=j txn=A -iter_seek_intent_ge k=k txn=A ----- -iter_seek_intent_ge: {b-c}/[3.000000000,0=/ 1.000000000,0=/] ! -iter_seek_intent_ge: {d-f}/[5.000000000,0=/ 1.000000000,0=/] ! -iter_seek_intent_ge: {h-k}/[1.000000000,0=/] ! -iter_seek_intent_ge: {h-k}/[1.000000000,0=/] -iter_seek_intent_ge: {m-n}/[3.000000000,0={localTs=2.000000000,0}/] ! - # Try some masked scans at increasing timestamps. run ok iter_new types=pointsAndRanges maskBelow=1 diff --git a/pkg/storage/testdata/mvcc_histories/range_tombstone_iter_reverse_intent_seek_rangekeychanged_regression b/pkg/storage/testdata/mvcc_histories/range_tombstone_iter_reverse_intent_seek_rangekeychanged_regression index 43d9e718aac9..0e303c7ea67f 100644 --- a/pkg/storage/testdata/mvcc_histories/range_tombstone_iter_reverse_intent_seek_rangekeychanged_regression +++ b/pkg/storage/testdata/mvcc_histories/range_tombstone_iter_reverse_intent_seek_rangekeychanged_regression @@ -58,27 +58,6 @@ iter_seek_lt: "b"/3.000000000,0=/BYTES/b3 iter_prev: "b"/0,0=txn={id=00000001 key="b" iso=Serializable pri=0.00000000 epo=0 ts=3.000000000,0 min=0,0 seq=0} ts=3.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true iter_seek_ge: {a-b}/[1.000000000,0=/] ! -# Test the same for SeekIntentGE. -run ok -iter_new types=pointsAndRanges -iter_seek_lt k=b+ -iter_prev -iter_seek_ge txn=A k=b ----- -iter_seek_lt: "b"/3.000000000,0=/BYTES/b3 -iter_prev: "b"/0,0=txn={id=00000001 key="b" iso=Serializable pri=0.00000000 epo=0 ts=3.000000000,0 min=0,0 seq=0} ts=3.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true -iter_seek_ge: "b"/0,0=txn={id=00000001 key="b" iso=Serializable pri=0.00000000 epo=0 ts=3.000000000,0 min=0,0 seq=0} ts=3.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true - -run ok -iter_new types=pointsAndRanges -iter_seek_lt k=b+ -iter_prev -iter_seek_intent_ge txn=A k=a ----- -iter_seek_lt: "b"/3.000000000,0=/BYTES/b3 -iter_prev: "b"/0,0=txn={id=00000001 key="b" iso=Serializable pri=0.00000000 epo=0 ts=3.000000000,0 min=0,0 seq=0} ts=3.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true -iter_seek_intent_ge: {a-b}/[1.000000000,0=/] ! - # Test the same for SeekLT. run ok iter_new types=pointsAndRanges @@ -113,18 +92,6 @@ iter_prev: "b"/0,0=txn={id=00000001 key="b" iso=Serializable pri=0.00000000 epo= iter_prev: "a"/2.000000000,0=/BYTES/a2 {a-b}/[1.000000000,0=/] ! iter_seek_ge: "b"/0,0=txn={id=00000001 key="b" iso=Serializable pri=0.00000000 epo=0 ts=3.000000000,0 min=0,0 seq=0} ts=3.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true ! -run ok -iter_new types=pointsAndRanges -iter_seek_lt k=b+ -iter_prev -iter_prev -iter_seek_intent_ge txn=A k=b ----- -iter_seek_lt: "b"/3.000000000,0=/BYTES/b3 -iter_prev: "b"/0,0=txn={id=00000001 key="b" iso=Serializable pri=0.00000000 epo=0 ts=3.000000000,0 min=0,0 seq=0} ts=3.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true -iter_prev: "a"/2.000000000,0=/BYTES/a2 {a-b}/[1.000000000,0=/] ! -iter_seek_intent_ge: "b"/0,0=txn={id=00000001 key="b" iso=Serializable pri=0.00000000 epo=0 ts=3.000000000,0 min=0,0 seq=0} ts=3.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true ! - run ok iter_new types=pointsAndRanges iter_seek_lt k=b+ @@ -159,16 +126,6 @@ iter_seek_lt: "b"/3.000000000,0=/BYTES/b3 iter_prev: "b"/0,0=txn={id=00000001 key="b" iso=Serializable pri=0.00000000 epo=0 ts=3.000000000,0 min=0,0 seq=0} ts=3.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true iter_seek_ge: {c-d}/[1.000000000,0=/] ! -run ok -iter_new types=pointsAndRanges -iter_seek_lt k=b+ -iter_prev -iter_seek_intent_ge txn=A k=c ----- -iter_seek_lt: "b"/3.000000000,0=/BYTES/b3 -iter_prev: "b"/0,0=txn={id=00000001 key="b" iso=Serializable pri=0.00000000 epo=0 ts=3.000000000,0 min=0,0 seq=0} ts=3.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true -iter_seek_intent_ge: {c-d}/[1.000000000,0=/] ! - run ok iter_new types=pointsAndRanges iter_seek_lt k=b+ @@ -225,27 +182,6 @@ iter_seek_lt: "b"/3.000000000,0=/BYTES/b3 iter_prev: "b"/0,0=txn={id=00000002 key="b" iso=Serializable pri=0.00000000 epo=0 ts=3.000000000,0 min=0,0 seq=0} ts=3.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true iter_seek_ge: {a-b}/[2.000000000,0=/ 1.000000000,0=/] ! -# Test the same for SeekIntentGE. -run ok -iter_new types=pointsAndRanges -iter_seek_lt k=b+ -iter_prev -iter_seek_ge txn=A k=b ----- -iter_seek_lt: "b"/3.000000000,0=/BYTES/b3 -iter_prev: "b"/0,0=txn={id=00000002 key="b" iso=Serializable pri=0.00000000 epo=0 ts=3.000000000,0 min=0,0 seq=0} ts=3.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true -iter_seek_ge: "b"/0,0=txn={id=00000002 key="b" iso=Serializable pri=0.00000000 epo=0 ts=3.000000000,0 min=0,0 seq=0} ts=3.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true - -run ok -iter_new types=pointsAndRanges -iter_seek_lt k=b+ -iter_prev -iter_seek_intent_ge txn=A k=a ----- -iter_seek_lt: "b"/3.000000000,0=/BYTES/b3 -iter_prev: "b"/0,0=txn={id=00000002 key="b" iso=Serializable pri=0.00000000 epo=0 ts=3.000000000,0 min=0,0 seq=0} ts=3.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true -iter_seek_intent_ge: {a-b}/[2.000000000,0=/ 1.000000000,0=/] ! - # Test the same for SeekLT. run ok iter_new types=pointsAndRanges diff --git a/pkg/storage/verifying_iterator.go b/pkg/storage/verifying_iterator.go index 51c6b833f3d1..d1c8ac34a7f0 100644 --- a/pkg/storage/verifying_iterator.go +++ b/pkg/storage/verifying_iterator.go @@ -10,11 +10,7 @@ package storage -import ( - "github.com/cockroachdb/cockroach/pkg/roachpb" - "github.com/cockroachdb/cockroach/pkg/util/uuid" - "github.com/cockroachdb/pebble" -) +import "github.com/cockroachdb/pebble" // verifyingMVCCIterator is an MVCC iterator that wraps a pebbleIterator and // verifies roachpb.Value checksums for encountered values. @@ -85,12 +81,6 @@ func (i *verifyingMVCCIterator) SeekGE(key MVCCKey) { i.saveAndVerify() } -// SeekIntentGE implements MVCCIterator. -func (i *verifyingMVCCIterator) SeekIntentGE(key roachpb.Key, txnUUID uuid.UUID) { - i.pebbleIterator.SeekIntentGE(key, txnUUID) - i.saveAndVerify() -} - // SeekLT implements MVCCIterator. func (i *verifyingMVCCIterator) SeekLT(key MVCCKey) { i.pebbleIterator.SeekLT(key)