From df786dc6e098b46f4788713e6b67b8b9e311a322 Mon Sep 17 00:00:00 2001 From: Michael Butler Date: Tue, 28 Jun 2022 11:02:58 -0400 Subject: [PATCH] kvserver: add Time Bound Iteration to DeleteRange Previously, a kv client could only pass an AOST timestamp to a DelRange request. Now, the user can pass a lower bound timestamp, causing the kvserver to leverage time bound iteration while issuing delete requests. Specifically, the server uses an MVCCIncrementalIterator to iterate over the target span at the client provided time bounds, track a continuous run of keys in that time bound, and flush the run via point and MVCC range tombstones depending on the size of the run. In a future pr, this operation will replace the use of RevertRange during IMPORT INTO rollbacks to make them MVCC compatible. Informs #70428 Release note: none --- pkg/kv/kvserver/batcheval/cmd_delete_range.go | 33 ++- pkg/roachpb/api.proto | 18 ++ pkg/storage/mvcc.go | 251 ++++++++++++++++- pkg/storage/mvcc_history_test.go | 36 +++ .../mvcc_histories/delete_range_predicate | 255 ++++++++++++++++++ 5 files changed, 588 insertions(+), 5 deletions(-) create mode 100644 pkg/storage/testdata/mvcc_histories/delete_range_predicate diff --git a/pkg/kv/kvserver/batcheval/cmd_delete_range.go b/pkg/kv/kvserver/batcheval/cmd_delete_range.go index f0c0c52cd82e..6add8cb78601 100644 --- a/pkg/kv/kvserver/batcheval/cmd_delete_range.go +++ b/pkg/kv/kvserver/batcheval/cmd_delete_range.go @@ -12,6 +12,7 @@ package batcheval import ( "context" + "math" "time" "github.com/cockroachdb/cockroach/pkg/keys" @@ -67,8 +68,8 @@ func DeleteRange( h := cArgs.Header reply := resp.(*roachpb.DeleteRangeResponse) - // Use experimental MVCC range tombstone if requested. - if args.UseRangeTombstone { + // Use MVCC range tombstone if requested. + if args.UseRangeTombstone || args.Predicates != nil { if cArgs.Header.Txn != nil { return result.Result{}, ErrTransactionUnsupported } @@ -85,8 +86,32 @@ func DeleteRange( args.Key, args.EndKey, desc.StartKey.AsRawKey(), desc.EndKey.AsRawKey()) maxIntents := storage.MaxIntentsPerWriteIntentError.Get(&cArgs.EvalCtx.ClusterSettings().SV) - err := storage.MVCCDeleteRangeUsingTombstone(ctx, readWriter, cArgs.Stats, - args.Key, args.EndKey, h.Timestamp, cArgs.Now, leftPeekBound, rightPeekBound, maxIntents) + if args.Predicates == nil { + err := storage.MVCCDeleteRangeUsingTombstone(ctx, readWriter, cArgs.Stats, + args.Key, args.EndKey, h.Timestamp, cArgs.Now, leftPeekBound, rightPeekBound, maxIntents) + return result.Result{}, err + } + maxBatchSize := h.MaxSpanRequestKeys + if h.MaxSpanRequestKeys == 0 { + maxBatchSize = math.MaxInt64 + } + + // The minimum number of keys required in a run to use a range tombstone + // + // TODO (msbutler): Tune the threshold once DeleteRange and DeleteRangeUsingTombstone have + // been further optimized. + defaultRangeTombstoneThreshold := int64(64) + resumeSpan, err := storage.PredicateMVCCDeleteRange(ctx, readWriter, cArgs.Stats, + args.Key, args.EndKey, h.Timestamp, cArgs.Now, leftPeekBound, rightPeekBound, + args.Predicates, maxBatchSize, maxRevertRangeBatchBytes, defaultRangeTombstoneThreshold) + + // TODO (msbutler): plumb number of keys deleted into response, if needed + if resumeSpan != nil { + reply.ResumeSpan = resumeSpan + reply.ResumeReason = roachpb.RESUME_KEY_LIMIT + } + // Return result is always empty, since the reply is populated into the + // resp pointer that's passed into the function return result.Result{}, err } diff --git a/pkg/roachpb/api.proto b/pkg/roachpb/api.proto index 15595ee08306..47f534ec5f8e 100644 --- a/pkg/roachpb/api.proto +++ b/pkg/roachpb/api.proto @@ -356,6 +356,24 @@ message DeleteRangeRequest { // The caller must check the MVCCRangeTombstones version gate before using // this parameter, as it is new in 22.2. bool use_range_tombstone = 5; + + DeleteRangePredicates predicates = 6 [(gogoproto.nullable) = true]; +} + +// DeleteRangePredicates will conduct predicate based DeleteRange, if specified. +message DeleteRangePredicates { + // StartTime specifies an exclusive lower bound to surface keys + // for deletion. If specified, DeleteRange will issue tombstones to keys + // within the span [startKey, endKey) that also have MVCC versions with + // timestamps between (startTime, endTime]. + // + // The main application for this is a rollback of IMPORT INTO on a + // non-empty table. Here, the DeleteRange must only delete keys written by the + // import. In other words, older, pre-import, data cannot be touched. Because + // IMPORT INTO takes a table offline and does not allow masking an existing key, + // this operation will not issue tombstones to pre-import data that were + // written at or below predicateTime. + util.hlc.Timestamp start_time = 6 [(gogoproto.nullable) = false]; } // A DeleteRangeResponse is the return value from the DeleteRange() diff --git a/pkg/storage/mvcc.go b/pkg/storage/mvcc.go index 5f21a3120180..f6eac7b30f12 100644 --- a/pkg/storage/mvcc.go +++ b/pkg/storage/mvcc.go @@ -2321,8 +2321,16 @@ func MVCCClearTimeRange( }) defer iter.Close() + // clearedMetaKey is the latest surfaced key that will get cleared var clearedMetaKey MVCCKey - var clearedMeta, restoredMeta enginepb.MVCCMetadata + + // clearedMeta contains metadata on the clearedMetaKey + var clearedMeta enginepb.MVCCMetadata + + // restoredMeta contains metadata on the previous version the clearedMetaKey. + // Once the key in clearedMetaKey is cleared, the key represented in + // restoredMeta becomes the latest version of this MVCC key. + var restoredMeta enginepb.MVCCMetadata iter.SeekGE(MVCCKey{Key: key}) for { if ok, err := iter.Valid(); err != nil { @@ -2466,6 +2474,247 @@ func MVCCDeleteRange( return keys, res.ResumeSpan, res.NumKeys, nil } +// PredicateMVCCDeleteRange issues MVCC tombstones at endTime to keys within the +// span [startKey, endKey) that also have MVCC versions that match the predicate +// filters. Long runs of keys will get deleted with a range Tombstone, while +// smaller runs will get deleted with point tombstones. +// +// This operation is non-transactional, but will check for existing intents in +// the target key span, regardless of timestamp, and return a WriteIntentError +// containing up to maxIntents intents. +// +// If an MVCC key surfaced has a timestamp at or above endTime, +// PredicateMVCCDeleteRange returns an error without a resumeSpan, even if +// tombstones were already written to disk. To resolve, manual intervention is necessary. +// +// Limiting the number of keys or ranges of keys processed, via maxBatchSize, +// can still cause a batch that is too large -- in number of bytes -- for raft +// to replicate if the keys are very large. So if the total length of the keys +// or key spans cleared exceeds maxBatchByteSize it will also stop and return a +// resume span. +func PredicateMVCCDeleteRange( + ctx context.Context, + rw ReadWriter, + ms *enginepb.MVCCStats, + startKey, endKey roachpb.Key, + endTime hlc.Timestamp, + localTimestamp hlc.ClockTimestamp, + leftPeekBound, rightPeekBound roachpb.Key, + predicates *roachpb.DeleteRangePredicates, + maxBatchSize, maxBatchByteSize int64, + rangeTombstoneThreshold int64, +) (*roachpb.Span, error) { + + var batchSize int64 + var batchByteSize int64 + + // runSize is the number of non-tombstone keys in the run. Since runSize is used to + // track the number of tombstones that will get written in a run and because + // new point tombstones are not written on top of current tombstones, surfaced + // tombstones are not counted in runSize. + var runSize int64 + + // runByteSize is the number of bytes from non-tombstone keys in the current run + var runByteSize int64 + var runStart, runEnd MVCCKey + + const maxIntents = 0 + + if ms == nil { + return nil, errors.AssertionFailedf( + "MVCCStats passed in to PredicateMVCCDeleteRange must be non-nil to ensure proper stats" + + " computation during Delete operations") + } + + // Check for any overlapping intents, and return them to be resolved. + if intents, err := ScanIntents(ctx, rw, startKey, endKey, maxIntents, 0); err != nil { + return nil, err + } else if len(intents) > 0 { + return nil, &roachpb.WriteIntentError{Intents: intents} + } + + // continueRun returns two bools: the first is true if the current run should + // continue; the second is true if the latest key is a tombstone. If a non-nil + // error is returned, the booleans are invalid. The run should continue if: + // + // 1) The latest version of the key is a point or range tombstone, with a timestamp below + // the client provided EndTime. Since the goal is to create long runs, + // any tombstoned key should continue the run. + // + // 2) The latest key is not a tombstone, matches the predicates, + // and has a timestamp below EndTime. + continueRun := func(k MVCCKey, iter SimpleMVCCIterator) (bool, bool, error) { + vRaw := iter.UnsafeValue() + hasPointKey, hasRangeKey := iter.HasPointAndRange() + if hasRangeKey { + rangeKeys := iter.RangeKeys() + if endTime.LessEq(rangeKeys[0].RangeKey.Timestamp) { + return false, false, roachpb.NewWriteTooOldError(endTime, + rangeKeys[0].RangeKey.Timestamp.Next(), k.Key.Clone()) + } + if !hasPointKey { + // landed on bare range key. + return true, true, nil + } + if k.Timestamp.Less(rangeKeys[0].RangeKey.Timestamp) { + // The latest range tombstone shadows the point key; ok to continue run. + return true, true, nil + } + } + + // At this point, there exists a point key that shadows all range keys, + // if they exist. + if endTime.LessEq(k.Timestamp) { + return false, false, roachpb.NewWriteTooOldError(endTime, k.Timestamp.Next(), k.Key.Clone()) + } + if len(vRaw) == 0 { + // The latest version of the key is a point tombstone. + return true, true, nil + } + + // The latest key is a non-tombstoned point key. Conduct predicate filtering. + if k.Timestamp.LessEq(predicates.StartTime) { + return false, false, nil + } + + // TODO (msbutler): use MVCCValueHeader to match on job ID predicate + _, err := DecodeMVCCValue(vRaw) + if err != nil { + return false, false, err + } + return true, false, nil + } + + flushDeleteKeys := func(nonMatch MVCCKey) error { + if runSize == 0 { + return nil + } + if runSize >= rangeTombstoneThreshold || + // Even if we didn't get a large enough number of keys to switch to + // using range tombstones, the byte size of the keys we did get is now too large to + // encode them all within the byte size limit, so use a range tombstone anyway. + batchByteSize+runByteSize >= maxBatchByteSize { + if err := MVCCDeleteRangeUsingTombstone(ctx, rw, ms, + runStart.Key, nonMatch.Key, endTime, localTimestamp, leftPeekBound, rightPeekBound, + maxIntents); err != nil { + return err + } + batchByteSize += int64(runStart.EncodedSize() + nonMatch.EncodedSize()) + batchSize++ + } else if runSize > 0 { + // Use Point tombstones + batchByteSize += runByteSize + batchSize += runSize + _, _, _, err := MVCCDeleteRange( + ctx, rw, ms, runStart.Key, nonMatch.Key, + 0, endTime, localTimestamp, nil, false) + if err != nil { + return err + } + } + runSize = 0 + runStart = MVCCKey{} + runEnd = MVCCKey{} + return nil + } + + // Using the IncrementalIterator with the time-bound iter optimization could + // potentially be a big win here -- the expected use-case for this is to run + // over an entire table's span with a very recent timestamp, issuing tombstones to + // writes of some failed IMPORT and that could very likely only have hit + // some small subset of the table's keyspace. + // + // The MVCCIncrementalIterator uses a non-time-bound iter as its source + // of truth, and only uses the TBI iterator as an optimization when finding + // the next KV to iterate over. This pattern allows us to quickly skip over + // swaths of uninteresting keys, but then iterates over the latest key of each MVCC key. + // + // Notice that the iterator's EndTime is set to hlc.MaxTimestamp, in order to + // detect and fail on any keys written at or after the client provided + // endTime. We don't _expect_ to hit intents or newer keys in the client + // provided span since the PredicateMVCCDeleteRange is only intended for + // non-live key spans, but there could be an intent leftover. + iter := NewMVCCIncrementalIterator(rw, MVCCIncrementalIterOptions{ + EndKey: endKey, + StartTime: predicates.StartTime, + EndTime: hlc.MaxTimestamp, + RangeKeyMaskingBelow: endTime, + KeyTypes: IterKeyTypePointsAndRanges, + }) + defer iter.Close() + + iter.SeekGE(MVCCKey{Key: startKey}) + for { + if ok, err := iter.Valid(); err != nil { + return nil, err + } else if !ok { + break + } + k := iter.UnsafeKey() + toContinue, isTombstone, err := continueRun(k, iter) + if err != nil { + return nil, errors.CombineErrors(err, flushDeleteKeys(k)) + } + if isTombstone { + if hasPoint, hasRange := iter.HasPointAndRange(); hasRange && !hasPoint { + // Because range key information can be inferred at point keys, + // skip over the surfaced range key, and reason about shadowed keys at + // the surfaced point key. + // + // E.g. Scanning the keys below: + // 2 a2 + // 1 o---o + // a b + // + // would result in two surfaced keys: + // {a-b}@1; + // a2, {a-b}@1 + // + // Note that the range key gets surfaced before the point key, + // even though the point key shadows it. + iter.NextIgnoringTime() + } else { + iter.NextKeyIgnoringTime() + } + } else if toContinue { + if batchSize+runSize >= maxBatchSize || batchByteSize+runByteSize >= maxBatchByteSize { + // The matched key will be the start the resume span. + if err := flushDeleteKeys(MVCCKey{Key: k.Key}); err != nil { + return nil, err + } + return &roachpb.Span{Key: append([]byte{}, k.Key...), EndKey: endKey}, nil + } + if runSize == 0 { + runStart.Key = append(runStart.Key[:0], k.Key...) + runStart.Timestamp = k.Timestamp + } + + runEnd.Key = append(runEnd.Key[:0], k.Key...) + runEnd.Timestamp = k.Timestamp + + runSize++ + runByteSize += int64(k.EncodedSize()) + + // Move the iterator to the next key/value in linear iteration even if it + // lies outside (startTime, endTime), to see if there's a need to flush. + iter.NextKeyIgnoringTime() + } else { + // This key does not match. Flush the run of matching keys, + // to prevent issuing tombstones on keys that do not match the predicates. + if err := flushDeleteKeys(k); err != nil { + return nil, err + } + // Move the incremental iterator to the next valid MVCC key that can be + // deleted. If TBI was enabled when initializing the incremental iterator, + // this step could jump over large swaths of keys that do not qualify for + // clearing. + iter.NextKey() + } + } + + return nil, flushDeleteKeys(MVCCKey{Key: endKey}) +} + // MVCCDeleteRangeUsingTombstone deletes the given MVCC keyspan at the given // timestamp using an MVCC range tombstone (rather than MVCC point tombstones). // This operation is non-transactional, but will check for existing intents and diff --git a/pkg/storage/mvcc_history_test.go b/pkg/storage/mvcc_history_test.go index 17b04a8e8f02..468ace4cf3c3 100644 --- a/pkg/storage/mvcc_history_test.go +++ b/pkg/storage/mvcc_history_test.go @@ -13,6 +13,7 @@ package storage import ( "context" "fmt" + "math" "path/filepath" "regexp" "sort" @@ -73,6 +74,7 @@ var sstIterVerify = util.ConstantWithMetamorphicTestBool("mvcc-histories-sst-ite // del [t=] [ts=[,]] [localTs=[,]] [resolve [status=]] k= // del_range [t=] [ts=[,]] [localTs=[,]] [resolve [status=]] k= [end=] [max=] [returnKeys] // del_range_ts [ts=[,]] [localTs=[,]] k= end= +// del_range_pred [ts=[,]] [localTs=[,]] k= end= [predTs=,max=,maxBytes=,rangeThreshold=] // increment [t=] [ts=[,]] [localTs=[,]] [resolve [status=]] k= [inc=] // initput [t=] [ts=[,]] [resolve [status=]] k= v= [raw] [failOnTombstones] // merge [t=] [ts=[,]] [resolve [status=]] k= v= [raw] @@ -659,6 +661,7 @@ var commands = map[string]cmd{ "del": {typDataUpdate, cmdDelete}, "del_range": {typDataUpdate, cmdDeleteRange}, "del_range_ts": {typDataUpdate, cmdDeleteRangeTombstone}, + "del_range_pred": {typDataUpdate, cmdDeleteRangePredicate}, "export": {typReadOnly, cmdExport}, "get": {typReadOnly, cmdGet}, "increment": {typDataUpdate, cmdIncrement}, @@ -1018,6 +1021,39 @@ func cmdDeleteRangeTombstone(e *evalCtx) error { }) } +func cmdDeleteRangePredicate(e *evalCtx) error { + key, endKey := e.getKeyRange() + ts := e.getTs(nil) + localTs := hlc.ClockTimestamp(e.getTsWithName("localTs")) + + max := math.MaxInt64 + if e.hasArg("max") { + e.scanArg("max", &max) + } + + maxBytes := math.MaxInt64 + if e.hasArg("maxBytes") { + e.scanArg("maxBytes", &maxBytes) + } + predicates := &roachpb.DeleteRangePredicates{ + StartTime: e.getTsWithName("predTs"), + } + rangeThreshold := 64 + if e.hasArg("rangeThreshold") { + e.scanArg("rangeThreshold", &rangeThreshold) + } + return e.withWriter("del_range_ts", func(rw ReadWriter) error { + resumeSpan, err := PredicateMVCCDeleteRange(e.ctx, rw, e.ms, key, endKey, ts, + localTs, nil, nil, predicates, int64(max), int64(maxBytes), int64(rangeThreshold)) + + if resumeSpan != nil { + e.results.buf.Printf("del_range: resume span [%s,%s)\n", resumeSpan.Key, resumeSpan.EndKey) + } + return err + }, + ) +} + func cmdGet(e *evalCtx) error { txn := e.getTxn(optional) key := e.getKey() diff --git a/pkg/storage/testdata/mvcc_histories/delete_range_predicate b/pkg/storage/testdata/mvcc_histories/delete_range_predicate new file mode 100644 index 000000000000..e55d8864216f --- /dev/null +++ b/pkg/storage/testdata/mvcc_histories/delete_range_predicate @@ -0,0 +1,255 @@ +# Tests MVCC Del Range with timestamp predicate. +# +# Set up some point keys, point tombstones x, range tombstones o--o, +# and intents []. +# +# 7 [i7] +# 6 +# 5 +# 4 x d4 f4 x h4 o-------------------o +# 3 b3 +# 2 a2 e2 g2 +# 1 d1 +# 0 +# a b c d e f g h i j k l m n o p +run ok +put k=a ts=2 v=a2 +del k=a ts=4 +put k=b ts=3 v=b3 +put k=d ts=1 v=d1 +put k=d ts=4 v=d4 +put k=e ts=2 v=e2 +put k=f ts=4 v=f4 +put k=g ts=2 v=g2 +del k=g ts=4 +put k=h ts=4 v=h4 +del_range_ts k=k end=p ts=4 +with t=A + txn_begin ts=7 + put k=i v=i7 +---- +>> at end: +txn: "A" meta={id=00000000 key=/Min pri=0.00000000 epo=0 ts=7.000000000,0 min=0,0 seq=0} lock=true stat=PENDING rts=7.000000000,0 wto=false gul=0,0 +rangekey: {k-p}/[4.000000000,0=/] +data: "a"/4.000000000,0 -> / +data: "a"/2.000000000,0 -> /BYTES/a2 +data: "b"/3.000000000,0 -> /BYTES/b3 +data: "d"/4.000000000,0 -> /BYTES/d4 +data: "d"/1.000000000,0 -> /BYTES/d1 +data: "e"/2.000000000,0 -> /BYTES/e2 +data: "f"/4.000000000,0 -> /BYTES/f4 +data: "g"/4.000000000,0 -> / +data: "g"/2.000000000,0 -> /BYTES/g2 +data: "h"/4.000000000,0 -> /BYTES/h4 +meta: "i"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=7.000000000,0 min=0,0 seq=0} ts=7.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true +data: "i"/7.000000000,0 -> /BYTES/i7 + +# Writing next to or above point keys and tombstones should work. +run ok +del_range_pred k=a end=i ts=5 predTs=3 rangeThreshold=2 +---- +>> at end: +rangekey: {f-i}/[5.000000000,0=/] +rangekey: {k-p}/[4.000000000,0=/] +data: "a"/4.000000000,0 -> / +data: "a"/2.000000000,0 -> /BYTES/a2 +data: "b"/3.000000000,0 -> /BYTES/b3 +data: "d"/5.000000000,0 -> / +data: "d"/4.000000000,0 -> /BYTES/d4 +data: "d"/1.000000000,0 -> /BYTES/d1 +data: "e"/2.000000000,0 -> /BYTES/e2 +data: "f"/4.000000000,0 -> /BYTES/f4 +data: "g"/4.000000000,0 -> / +data: "g"/2.000000000,0 -> /BYTES/g2 +data: "h"/4.000000000,0 -> /BYTES/h4 +meta: "i"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=7.000000000,0 min=0,0 seq=0} ts=7.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true +data: "i"/7.000000000,0 -> /BYTES/i7 + +# error on intent, no tombstones should be written +run error +del_range_pred k=a end=p ts=6 predTs=1 +---- +>> at end: +rangekey: {f-i}/[5.000000000,0=/] +rangekey: {k-p}/[4.000000000,0=/] +data: "a"/4.000000000,0 -> / +data: "a"/2.000000000,0 -> /BYTES/a2 +data: "b"/3.000000000,0 -> /BYTES/b3 +data: "d"/5.000000000,0 -> / +data: "d"/4.000000000,0 -> /BYTES/d4 +data: "d"/1.000000000,0 -> /BYTES/d1 +data: "e"/2.000000000,0 -> /BYTES/e2 +data: "f"/4.000000000,0 -> /BYTES/f4 +data: "g"/4.000000000,0 -> / +data: "g"/2.000000000,0 -> /BYTES/g2 +data: "h"/4.000000000,0 -> /BYTES/h4 +meta: "i"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=7.000000000,0 min=0,0 seq=0} ts=7.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true +data: "i"/7.000000000,0 -> /BYTES/i7 +error: (*roachpb.WriteIntentError:) conflicting intents on "i" + +# error encountering point key at d5. +# a tombstone should get written to c5, since we +# flush on errors once iteration has started. However, a tombstone should +# not get written to e5 as DeleteRange has been aborted at 'd'. +run error +put k=c ts=2 v=c2 +del_range_pred k=c end=f ts=5 predTs=1 +---- +>> at end: +rangekey: {f-i}/[5.000000000,0=/] +rangekey: {k-p}/[4.000000000,0=/] +data: "a"/4.000000000,0 -> / +data: "a"/2.000000000,0 -> /BYTES/a2 +data: "b"/3.000000000,0 -> /BYTES/b3 +data: "c"/5.000000000,0 -> / +data: "c"/2.000000000,0 -> /BYTES/c2 +data: "d"/5.000000000,0 -> / +data: "d"/4.000000000,0 -> /BYTES/d4 +data: "d"/1.000000000,0 -> /BYTES/d1 +data: "e"/2.000000000,0 -> /BYTES/e2 +data: "f"/4.000000000,0 -> /BYTES/f4 +data: "g"/4.000000000,0 -> / +data: "g"/2.000000000,0 -> /BYTES/g2 +data: "h"/4.000000000,0 -> /BYTES/h4 +meta: "i"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=7.000000000,0 min=0,0 seq=0} ts=7.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true +data: "i"/7.000000000,0 -> /BYTES/i7 +error: (*roachpb.WriteTooOldError:) WriteTooOldError: write for key "d" at timestamp 5.000000000,0 too old; wrote at 5.000000000,1 + +# error encountering range key at k4. +# a tombstone should get written to j4, since we +# flush on errors once iteration has started. However, a tombstone should +# not get written to q4 as DeleteRange has been aborted at rangekey {k-p}4. +run error +put k=j ts=2 v=j2 +put k=q ts=2 v=q2 +del_range_pred k=j end=r ts=4 predTs=1 rangeThreshold=2 +---- +>> at end: +rangekey: {f-i}/[5.000000000,0=/] +rangekey: {k-p}/[4.000000000,0=/] +data: "a"/4.000000000,0 -> / +data: "a"/2.000000000,0 -> /BYTES/a2 +data: "b"/3.000000000,0 -> /BYTES/b3 +data: "c"/5.000000000,0 -> / +data: "c"/2.000000000,0 -> /BYTES/c2 +data: "d"/5.000000000,0 -> / +data: "d"/4.000000000,0 -> /BYTES/d4 +data: "d"/1.000000000,0 -> /BYTES/d1 +data: "e"/2.000000000,0 -> /BYTES/e2 +data: "f"/4.000000000,0 -> /BYTES/f4 +data: "g"/4.000000000,0 -> / +data: "g"/2.000000000,0 -> /BYTES/g2 +data: "h"/4.000000000,0 -> /BYTES/h4 +meta: "i"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=7.000000000,0 min=0,0 seq=0} ts=7.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true +data: "i"/7.000000000,0 -> /BYTES/i7 +data: "j"/4.000000000,0 -> / +data: "j"/2.000000000,0 -> /BYTES/j2 +data: "q"/2.000000000,0 -> /BYTES/q2 +error: (*roachpb.WriteTooOldError:) WriteTooOldError: write for key "k" at timestamp 4.000000000,0 too old; wrote at 4.000000000,1 + +# At this point the keyspace looks like this: +# 7 [i7] +# 6 +# 5 x x o-----------o +# 4 x d4 f4 x h4 o-------------------o +# 3 b3 c3 +# 2 a2 e2 g2 +# 1 d1 +# 0 +# a b c d e f g h i j k l m n o p +# +# check that we flush with a range tombstone, if maxBatchSize is exceeded +# even though range tombstone threshold has not been met +# and return a resume span +run ok +del_range_pred k=a end=i ts=6 predTs=1 maxBytes=1 +---- +del_range: resume span ["e","i") +>> at end: +rangekey: {b-e}/[6.000000000,0=/] +rangekey: {f-i}/[5.000000000,0=/] +rangekey: {k-p}/[4.000000000,0=/] +data: "a"/4.000000000,0 -> / +data: "a"/2.000000000,0 -> /BYTES/a2 +data: "b"/3.000000000,0 -> /BYTES/b3 +data: "c"/5.000000000,0 -> / +data: "c"/2.000000000,0 -> /BYTES/c2 +data: "d"/5.000000000,0 -> / +data: "d"/4.000000000,0 -> /BYTES/d4 +data: "d"/1.000000000,0 -> /BYTES/d1 +data: "e"/2.000000000,0 -> /BYTES/e2 +data: "f"/4.000000000,0 -> /BYTES/f4 +data: "g"/4.000000000,0 -> / +data: "g"/2.000000000,0 -> /BYTES/g2 +data: "h"/4.000000000,0 -> /BYTES/h4 +meta: "i"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=7.000000000,0 min=0,0 seq=0} ts=7.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true +data: "i"/7.000000000,0 -> /BYTES/i7 +data: "j"/4.000000000,0 -> / +data: "j"/2.000000000,0 -> /BYTES/j2 +data: "q"/2.000000000,0 -> /BYTES/q2 + +# check that we flush properly if maxBatchSize is exceeded. +# Since max is 1, write a tombstone to e, and as soon as it sees the +# next eligible key to delete (f), return a resume span. +# Note that we dont count shadowed tombstones in the batchSize +run ok +put k=f ts=6 v=f6 +del_range_pred k=c end=i ts=7 predTs=1 max=1 +---- +del_range: resume span ["f","i") +>> at end: +rangekey: {b-e}/[6.000000000,0=/] +rangekey: {f-i}/[5.000000000,0=/] +rangekey: {k-p}/[4.000000000,0=/] +data: "a"/4.000000000,0 -> / +data: "a"/2.000000000,0 -> /BYTES/a2 +data: "b"/3.000000000,0 -> /BYTES/b3 +data: "c"/5.000000000,0 -> / +data: "c"/2.000000000,0 -> /BYTES/c2 +data: "d"/5.000000000,0 -> / +data: "d"/4.000000000,0 -> /BYTES/d4 +data: "d"/1.000000000,0 -> /BYTES/d1 +data: "e"/7.000000000,0 -> / +data: "e"/2.000000000,0 -> /BYTES/e2 +data: "f"/6.000000000,0 -> /BYTES/f6 +data: "f"/4.000000000,0 -> /BYTES/f4 +data: "g"/4.000000000,0 -> / +data: "g"/2.000000000,0 -> /BYTES/g2 +data: "h"/4.000000000,0 -> /BYTES/h4 +meta: "i"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=7.000000000,0 min=0,0 seq=0} ts=7.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true +data: "i"/7.000000000,0 -> /BYTES/i7 +data: "j"/4.000000000,0 -> / +data: "j"/2.000000000,0 -> /BYTES/j2 +data: "q"/2.000000000,0 -> /BYTES/q2 + +# Run the same DeleteRange as above at ts 8 +# No resume span should get returned because the iterator goes through +# the whole span without encountering another eligible key to flush +run ok +del_range_pred k=c end=i ts=8 predTs=1 max=3 +---- +>> at end: +rangekey: {b-e}/[6.000000000,0=/] +rangekey: {f-i}/[5.000000000,0=/] +rangekey: {k-p}/[4.000000000,0=/] +data: "a"/4.000000000,0 -> / +data: "a"/2.000000000,0 -> /BYTES/a2 +data: "b"/3.000000000,0 -> /BYTES/b3 +data: "c"/5.000000000,0 -> / +data: "c"/2.000000000,0 -> /BYTES/c2 +data: "d"/5.000000000,0 -> / +data: "d"/4.000000000,0 -> /BYTES/d4 +data: "d"/1.000000000,0 -> /BYTES/d1 +data: "e"/7.000000000,0 -> / +data: "e"/2.000000000,0 -> /BYTES/e2 +data: "f"/8.000000000,0 -> / +data: "f"/6.000000000,0 -> /BYTES/f6 +data: "f"/4.000000000,0 -> /BYTES/f4 +data: "g"/4.000000000,0 -> / +data: "g"/2.000000000,0 -> /BYTES/g2 +data: "h"/4.000000000,0 -> /BYTES/h4 +meta: "i"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=7.000000000,0 min=0,0 seq=0} ts=7.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true +data: "i"/7.000000000,0 -> /BYTES/i7 +data: "j"/4.000000000,0 -> / +data: "j"/2.000000000,0 -> /BYTES/j2 +data: "q"/2.000000000,0 -> /BYTES/q2