diff --git a/pkg/kv/kvserver/batcheval/cmd_delete_range.go b/pkg/kv/kvserver/batcheval/cmd_delete_range.go index f0c0c52cd82e..6add8cb78601 100644 --- a/pkg/kv/kvserver/batcheval/cmd_delete_range.go +++ b/pkg/kv/kvserver/batcheval/cmd_delete_range.go @@ -12,6 +12,7 @@ package batcheval import ( "context" + "math" "time" "github.com/cockroachdb/cockroach/pkg/keys" @@ -67,8 +68,8 @@ func DeleteRange( h := cArgs.Header reply := resp.(*roachpb.DeleteRangeResponse) - // Use experimental MVCC range tombstone if requested. - if args.UseRangeTombstone { + // Use MVCC range tombstone if requested. + if args.UseRangeTombstone || args.Predicates != nil { if cArgs.Header.Txn != nil { return result.Result{}, ErrTransactionUnsupported } @@ -85,8 +86,32 @@ func DeleteRange( args.Key, args.EndKey, desc.StartKey.AsRawKey(), desc.EndKey.AsRawKey()) maxIntents := storage.MaxIntentsPerWriteIntentError.Get(&cArgs.EvalCtx.ClusterSettings().SV) - err := storage.MVCCDeleteRangeUsingTombstone(ctx, readWriter, cArgs.Stats, - args.Key, args.EndKey, h.Timestamp, cArgs.Now, leftPeekBound, rightPeekBound, maxIntents) + if args.Predicates == nil { + err := storage.MVCCDeleteRangeUsingTombstone(ctx, readWriter, cArgs.Stats, + args.Key, args.EndKey, h.Timestamp, cArgs.Now, leftPeekBound, rightPeekBound, maxIntents) + return result.Result{}, err + } + maxBatchSize := h.MaxSpanRequestKeys + if h.MaxSpanRequestKeys == 0 { + maxBatchSize = math.MaxInt64 + } + + // The minimum number of keys required in a run to use a range tombstone + // + // TODO (msbutler): Tune the threshold once DeleteRange and DeleteRangeUsingTombstone have + // been further optimized. + defaultRangeTombstoneThreshold := int64(64) + resumeSpan, err := storage.PredicateMVCCDeleteRange(ctx, readWriter, cArgs.Stats, + args.Key, args.EndKey, h.Timestamp, cArgs.Now, leftPeekBound, rightPeekBound, + args.Predicates, maxBatchSize, maxRevertRangeBatchBytes, defaultRangeTombstoneThreshold) + + // TODO (msbutler): plumb number of keys deleted into response, if needed + if resumeSpan != nil { + reply.ResumeSpan = resumeSpan + reply.ResumeReason = roachpb.RESUME_KEY_LIMIT + } + // Return result is always empty, since the reply is populated into the + // resp pointer that's passed into the function return result.Result{}, err } diff --git a/pkg/roachpb/api.proto b/pkg/roachpb/api.proto index 15595ee08306..47f534ec5f8e 100644 --- a/pkg/roachpb/api.proto +++ b/pkg/roachpb/api.proto @@ -356,6 +356,24 @@ message DeleteRangeRequest { // The caller must check the MVCCRangeTombstones version gate before using // this parameter, as it is new in 22.2. bool use_range_tombstone = 5; + + DeleteRangePredicates predicates = 6 [(gogoproto.nullable) = true]; +} + +// DeleteRangePredicates will conduct predicate based DeleteRange, if specified. +message DeleteRangePredicates { + // StartTime specifies an exclusive lower bound to surface keys + // for deletion. If specified, DeleteRange will issue tombstones to keys + // within the span [startKey, endKey) that also have MVCC versions with + // timestamps between (startTime, endTime]. + // + // The main application for this is a rollback of IMPORT INTO on a + // non-empty table. Here, the DeleteRange must only delete keys written by the + // import. In other words, older, pre-import, data cannot be touched. Because + // IMPORT INTO takes a table offline and does not allow masking an existing key, + // this operation will not issue tombstones to pre-import data that were + // written at or below predicateTime. + util.hlc.Timestamp start_time = 6 [(gogoproto.nullable) = false]; } // A DeleteRangeResponse is the return value from the DeleteRange() diff --git a/pkg/storage/mvcc.go b/pkg/storage/mvcc.go index 5f21a3120180..f6eac7b30f12 100644 --- a/pkg/storage/mvcc.go +++ b/pkg/storage/mvcc.go @@ -2321,8 +2321,16 @@ func MVCCClearTimeRange( }) defer iter.Close() + // clearedMetaKey is the latest surfaced key that will get cleared var clearedMetaKey MVCCKey - var clearedMeta, restoredMeta enginepb.MVCCMetadata + + // clearedMeta contains metadata on the clearedMetaKey + var clearedMeta enginepb.MVCCMetadata + + // restoredMeta contains metadata on the previous version the clearedMetaKey. + // Once the key in clearedMetaKey is cleared, the key represented in + // restoredMeta becomes the latest version of this MVCC key. + var restoredMeta enginepb.MVCCMetadata iter.SeekGE(MVCCKey{Key: key}) for { if ok, err := iter.Valid(); err != nil { @@ -2466,6 +2474,247 @@ func MVCCDeleteRange( return keys, res.ResumeSpan, res.NumKeys, nil } +// PredicateMVCCDeleteRange issues MVCC tombstones at endTime to keys within the +// span [startKey, endKey) that also have MVCC versions that match the predicate +// filters. Long runs of keys will get deleted with a range Tombstone, while +// smaller runs will get deleted with point tombstones. +// +// This operation is non-transactional, but will check for existing intents in +// the target key span, regardless of timestamp, and return a WriteIntentError +// containing up to maxIntents intents. +// +// If an MVCC key surfaced has a timestamp at or above endTime, +// PredicateMVCCDeleteRange returns an error without a resumeSpan, even if +// tombstones were already written to disk. To resolve, manual intervention is necessary. +// +// Limiting the number of keys or ranges of keys processed, via maxBatchSize, +// can still cause a batch that is too large -- in number of bytes -- for raft +// to replicate if the keys are very large. So if the total length of the keys +// or key spans cleared exceeds maxBatchByteSize it will also stop and return a +// resume span. +func PredicateMVCCDeleteRange( + ctx context.Context, + rw ReadWriter, + ms *enginepb.MVCCStats, + startKey, endKey roachpb.Key, + endTime hlc.Timestamp, + localTimestamp hlc.ClockTimestamp, + leftPeekBound, rightPeekBound roachpb.Key, + predicates *roachpb.DeleteRangePredicates, + maxBatchSize, maxBatchByteSize int64, + rangeTombstoneThreshold int64, +) (*roachpb.Span, error) { + + var batchSize int64 + var batchByteSize int64 + + // runSize is the number of non-tombstone keys in the run. Since runSize is used to + // track the number of tombstones that will get written in a run and because + // new point tombstones are not written on top of current tombstones, surfaced + // tombstones are not counted in runSize. + var runSize int64 + + // runByteSize is the number of bytes from non-tombstone keys in the current run + var runByteSize int64 + var runStart, runEnd MVCCKey + + const maxIntents = 0 + + if ms == nil { + return nil, errors.AssertionFailedf( + "MVCCStats passed in to PredicateMVCCDeleteRange must be non-nil to ensure proper stats" + + " computation during Delete operations") + } + + // Check for any overlapping intents, and return them to be resolved. + if intents, err := ScanIntents(ctx, rw, startKey, endKey, maxIntents, 0); err != nil { + return nil, err + } else if len(intents) > 0 { + return nil, &roachpb.WriteIntentError{Intents: intents} + } + + // continueRun returns two bools: the first is true if the current run should + // continue; the second is true if the latest key is a tombstone. If a non-nil + // error is returned, the booleans are invalid. The run should continue if: + // + // 1) The latest version of the key is a point or range tombstone, with a timestamp below + // the client provided EndTime. Since the goal is to create long runs, + // any tombstoned key should continue the run. + // + // 2) The latest key is not a tombstone, matches the predicates, + // and has a timestamp below EndTime. + continueRun := func(k MVCCKey, iter SimpleMVCCIterator) (bool, bool, error) { + vRaw := iter.UnsafeValue() + hasPointKey, hasRangeKey := iter.HasPointAndRange() + if hasRangeKey { + rangeKeys := iter.RangeKeys() + if endTime.LessEq(rangeKeys[0].RangeKey.Timestamp) { + return false, false, roachpb.NewWriteTooOldError(endTime, + rangeKeys[0].RangeKey.Timestamp.Next(), k.Key.Clone()) + } + if !hasPointKey { + // landed on bare range key. + return true, true, nil + } + if k.Timestamp.Less(rangeKeys[0].RangeKey.Timestamp) { + // The latest range tombstone shadows the point key; ok to continue run. + return true, true, nil + } + } + + // At this point, there exists a point key that shadows all range keys, + // if they exist. + if endTime.LessEq(k.Timestamp) { + return false, false, roachpb.NewWriteTooOldError(endTime, k.Timestamp.Next(), k.Key.Clone()) + } + if len(vRaw) == 0 { + // The latest version of the key is a point tombstone. + return true, true, nil + } + + // The latest key is a non-tombstoned point key. Conduct predicate filtering. + if k.Timestamp.LessEq(predicates.StartTime) { + return false, false, nil + } + + // TODO (msbutler): use MVCCValueHeader to match on job ID predicate + _, err := DecodeMVCCValue(vRaw) + if err != nil { + return false, false, err + } + return true, false, nil + } + + flushDeleteKeys := func(nonMatch MVCCKey) error { + if runSize == 0 { + return nil + } + if runSize >= rangeTombstoneThreshold || + // Even if we didn't get a large enough number of keys to switch to + // using range tombstones, the byte size of the keys we did get is now too large to + // encode them all within the byte size limit, so use a range tombstone anyway. + batchByteSize+runByteSize >= maxBatchByteSize { + if err := MVCCDeleteRangeUsingTombstone(ctx, rw, ms, + runStart.Key, nonMatch.Key, endTime, localTimestamp, leftPeekBound, rightPeekBound, + maxIntents); err != nil { + return err + } + batchByteSize += int64(runStart.EncodedSize() + nonMatch.EncodedSize()) + batchSize++ + } else if runSize > 0 { + // Use Point tombstones + batchByteSize += runByteSize + batchSize += runSize + _, _, _, err := MVCCDeleteRange( + ctx, rw, ms, runStart.Key, nonMatch.Key, + 0, endTime, localTimestamp, nil, false) + if err != nil { + return err + } + } + runSize = 0 + runStart = MVCCKey{} + runEnd = MVCCKey{} + return nil + } + + // Using the IncrementalIterator with the time-bound iter optimization could + // potentially be a big win here -- the expected use-case for this is to run + // over an entire table's span with a very recent timestamp, issuing tombstones to + // writes of some failed IMPORT and that could very likely only have hit + // some small subset of the table's keyspace. + // + // The MVCCIncrementalIterator uses a non-time-bound iter as its source + // of truth, and only uses the TBI iterator as an optimization when finding + // the next KV to iterate over. This pattern allows us to quickly skip over + // swaths of uninteresting keys, but then iterates over the latest key of each MVCC key. + // + // Notice that the iterator's EndTime is set to hlc.MaxTimestamp, in order to + // detect and fail on any keys written at or after the client provided + // endTime. We don't _expect_ to hit intents or newer keys in the client + // provided span since the PredicateMVCCDeleteRange is only intended for + // non-live key spans, but there could be an intent leftover. + iter := NewMVCCIncrementalIterator(rw, MVCCIncrementalIterOptions{ + EndKey: endKey, + StartTime: predicates.StartTime, + EndTime: hlc.MaxTimestamp, + RangeKeyMaskingBelow: endTime, + KeyTypes: IterKeyTypePointsAndRanges, + }) + defer iter.Close() + + iter.SeekGE(MVCCKey{Key: startKey}) + for { + if ok, err := iter.Valid(); err != nil { + return nil, err + } else if !ok { + break + } + k := iter.UnsafeKey() + toContinue, isTombstone, err := continueRun(k, iter) + if err != nil { + return nil, errors.CombineErrors(err, flushDeleteKeys(k)) + } + if isTombstone { + if hasPoint, hasRange := iter.HasPointAndRange(); hasRange && !hasPoint { + // Because range key information can be inferred at point keys, + // skip over the surfaced range key, and reason about shadowed keys at + // the surfaced point key. + // + // E.g. Scanning the keys below: + // 2 a2 + // 1 o---o + // a b + // + // would result in two surfaced keys: + // {a-b}@1; + // a2, {a-b}@1 + // + // Note that the range key gets surfaced before the point key, + // even though the point key shadows it. + iter.NextIgnoringTime() + } else { + iter.NextKeyIgnoringTime() + } + } else if toContinue { + if batchSize+runSize >= maxBatchSize || batchByteSize+runByteSize >= maxBatchByteSize { + // The matched key will be the start the resume span. + if err := flushDeleteKeys(MVCCKey{Key: k.Key}); err != nil { + return nil, err + } + return &roachpb.Span{Key: append([]byte{}, k.Key...), EndKey: endKey}, nil + } + if runSize == 0 { + runStart.Key = append(runStart.Key[:0], k.Key...) + runStart.Timestamp = k.Timestamp + } + + runEnd.Key = append(runEnd.Key[:0], k.Key...) + runEnd.Timestamp = k.Timestamp + + runSize++ + runByteSize += int64(k.EncodedSize()) + + // Move the iterator to the next key/value in linear iteration even if it + // lies outside (startTime, endTime), to see if there's a need to flush. + iter.NextKeyIgnoringTime() + } else { + // This key does not match. Flush the run of matching keys, + // to prevent issuing tombstones on keys that do not match the predicates. + if err := flushDeleteKeys(k); err != nil { + return nil, err + } + // Move the incremental iterator to the next valid MVCC key that can be + // deleted. If TBI was enabled when initializing the incremental iterator, + // this step could jump over large swaths of keys that do not qualify for + // clearing. + iter.NextKey() + } + } + + return nil, flushDeleteKeys(MVCCKey{Key: endKey}) +} + // MVCCDeleteRangeUsingTombstone deletes the given MVCC keyspan at the given // timestamp using an MVCC range tombstone (rather than MVCC point tombstones). // This operation is non-transactional, but will check for existing intents and diff --git a/pkg/storage/mvcc_history_test.go b/pkg/storage/mvcc_history_test.go index 17b04a8e8f02..468ace4cf3c3 100644 --- a/pkg/storage/mvcc_history_test.go +++ b/pkg/storage/mvcc_history_test.go @@ -13,6 +13,7 @@ package storage import ( "context" "fmt" + "math" "path/filepath" "regexp" "sort" @@ -73,6 +74,7 @@ var sstIterVerify = util.ConstantWithMetamorphicTestBool("mvcc-histories-sst-ite // del [t=] [ts=[,]] [localTs=[,]] [resolve [status=]] k= // del_range [t=] [ts=[,]] [localTs=[,]] [resolve [status=]] k= [end=] [max=] [returnKeys] // del_range_ts [ts=[,]] [localTs=[,]] k= end= +// del_range_pred [ts=[,]] [localTs=[,]] k= end= [predTs=,max=,maxBytes=,rangeThreshold=] // increment [t=] [ts=[,]] [localTs=[,]] [resolve [status=]] k= [inc=] // initput [t=] [ts=[,]] [resolve [status=]] k= v= [raw] [failOnTombstones] // merge [t=] [ts=[,]] [resolve [status=]] k= v= [raw] @@ -659,6 +661,7 @@ var commands = map[string]cmd{ "del": {typDataUpdate, cmdDelete}, "del_range": {typDataUpdate, cmdDeleteRange}, "del_range_ts": {typDataUpdate, cmdDeleteRangeTombstone}, + "del_range_pred": {typDataUpdate, cmdDeleteRangePredicate}, "export": {typReadOnly, cmdExport}, "get": {typReadOnly, cmdGet}, "increment": {typDataUpdate, cmdIncrement}, @@ -1018,6 +1021,39 @@ func cmdDeleteRangeTombstone(e *evalCtx) error { }) } +func cmdDeleteRangePredicate(e *evalCtx) error { + key, endKey := e.getKeyRange() + ts := e.getTs(nil) + localTs := hlc.ClockTimestamp(e.getTsWithName("localTs")) + + max := math.MaxInt64 + if e.hasArg("max") { + e.scanArg("max", &max) + } + + maxBytes := math.MaxInt64 + if e.hasArg("maxBytes") { + e.scanArg("maxBytes", &maxBytes) + } + predicates := &roachpb.DeleteRangePredicates{ + StartTime: e.getTsWithName("predTs"), + } + rangeThreshold := 64 + if e.hasArg("rangeThreshold") { + e.scanArg("rangeThreshold", &rangeThreshold) + } + return e.withWriter("del_range_ts", func(rw ReadWriter) error { + resumeSpan, err := PredicateMVCCDeleteRange(e.ctx, rw, e.ms, key, endKey, ts, + localTs, nil, nil, predicates, int64(max), int64(maxBytes), int64(rangeThreshold)) + + if resumeSpan != nil { + e.results.buf.Printf("del_range: resume span [%s,%s)\n", resumeSpan.Key, resumeSpan.EndKey) + } + return err + }, + ) +} + func cmdGet(e *evalCtx) error { txn := e.getTxn(optional) key := e.getKey() diff --git a/pkg/storage/testdata/mvcc_histories/delete_range_predicate b/pkg/storage/testdata/mvcc_histories/delete_range_predicate new file mode 100644 index 000000000000..e55d8864216f --- /dev/null +++ b/pkg/storage/testdata/mvcc_histories/delete_range_predicate @@ -0,0 +1,255 @@ +# Tests MVCC Del Range with timestamp predicate. +# +# Set up some point keys, point tombstones x, range tombstones o--o, +# and intents []. +# +# 7 [i7] +# 6 +# 5 +# 4 x d4 f4 x h4 o-------------------o +# 3 b3 +# 2 a2 e2 g2 +# 1 d1 +# 0 +# a b c d e f g h i j k l m n o p +run ok +put k=a ts=2 v=a2 +del k=a ts=4 +put k=b ts=3 v=b3 +put k=d ts=1 v=d1 +put k=d ts=4 v=d4 +put k=e ts=2 v=e2 +put k=f ts=4 v=f4 +put k=g ts=2 v=g2 +del k=g ts=4 +put k=h ts=4 v=h4 +del_range_ts k=k end=p ts=4 +with t=A + txn_begin ts=7 + put k=i v=i7 +---- +>> at end: +txn: "A" meta={id=00000000 key=/Min pri=0.00000000 epo=0 ts=7.000000000,0 min=0,0 seq=0} lock=true stat=PENDING rts=7.000000000,0 wto=false gul=0,0 +rangekey: {k-p}/[4.000000000,0=/] +data: "a"/4.000000000,0 -> / +data: "a"/2.000000000,0 -> /BYTES/a2 +data: "b"/3.000000000,0 -> /BYTES/b3 +data: "d"/4.000000000,0 -> /BYTES/d4 +data: "d"/1.000000000,0 -> /BYTES/d1 +data: "e"/2.000000000,0 -> /BYTES/e2 +data: "f"/4.000000000,0 -> /BYTES/f4 +data: "g"/4.000000000,0 -> / +data: "g"/2.000000000,0 -> /BYTES/g2 +data: "h"/4.000000000,0 -> /BYTES/h4 +meta: "i"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=7.000000000,0 min=0,0 seq=0} ts=7.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true +data: "i"/7.000000000,0 -> /BYTES/i7 + +# Writing next to or above point keys and tombstones should work. +run ok +del_range_pred k=a end=i ts=5 predTs=3 rangeThreshold=2 +---- +>> at end: +rangekey: {f-i}/[5.000000000,0=/] +rangekey: {k-p}/[4.000000000,0=/] +data: "a"/4.000000000,0 -> / +data: "a"/2.000000000,0 -> /BYTES/a2 +data: "b"/3.000000000,0 -> /BYTES/b3 +data: "d"/5.000000000,0 -> / +data: "d"/4.000000000,0 -> /BYTES/d4 +data: "d"/1.000000000,0 -> /BYTES/d1 +data: "e"/2.000000000,0 -> /BYTES/e2 +data: "f"/4.000000000,0 -> /BYTES/f4 +data: "g"/4.000000000,0 -> / +data: "g"/2.000000000,0 -> /BYTES/g2 +data: "h"/4.000000000,0 -> /BYTES/h4 +meta: "i"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=7.000000000,0 min=0,0 seq=0} ts=7.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true +data: "i"/7.000000000,0 -> /BYTES/i7 + +# error on intent, no tombstones should be written +run error +del_range_pred k=a end=p ts=6 predTs=1 +---- +>> at end: +rangekey: {f-i}/[5.000000000,0=/] +rangekey: {k-p}/[4.000000000,0=/] +data: "a"/4.000000000,0 -> / +data: "a"/2.000000000,0 -> /BYTES/a2 +data: "b"/3.000000000,0 -> /BYTES/b3 +data: "d"/5.000000000,0 -> / +data: "d"/4.000000000,0 -> /BYTES/d4 +data: "d"/1.000000000,0 -> /BYTES/d1 +data: "e"/2.000000000,0 -> /BYTES/e2 +data: "f"/4.000000000,0 -> /BYTES/f4 +data: "g"/4.000000000,0 -> / +data: "g"/2.000000000,0 -> /BYTES/g2 +data: "h"/4.000000000,0 -> /BYTES/h4 +meta: "i"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=7.000000000,0 min=0,0 seq=0} ts=7.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true +data: "i"/7.000000000,0 -> /BYTES/i7 +error: (*roachpb.WriteIntentError:) conflicting intents on "i" + +# error encountering point key at d5. +# a tombstone should get written to c5, since we +# flush on errors once iteration has started. However, a tombstone should +# not get written to e5 as DeleteRange has been aborted at 'd'. +run error +put k=c ts=2 v=c2 +del_range_pred k=c end=f ts=5 predTs=1 +---- +>> at end: +rangekey: {f-i}/[5.000000000,0=/] +rangekey: {k-p}/[4.000000000,0=/] +data: "a"/4.000000000,0 -> / +data: "a"/2.000000000,0 -> /BYTES/a2 +data: "b"/3.000000000,0 -> /BYTES/b3 +data: "c"/5.000000000,0 -> / +data: "c"/2.000000000,0 -> /BYTES/c2 +data: "d"/5.000000000,0 -> / +data: "d"/4.000000000,0 -> /BYTES/d4 +data: "d"/1.000000000,0 -> /BYTES/d1 +data: "e"/2.000000000,0 -> /BYTES/e2 +data: "f"/4.000000000,0 -> /BYTES/f4 +data: "g"/4.000000000,0 -> / +data: "g"/2.000000000,0 -> /BYTES/g2 +data: "h"/4.000000000,0 -> /BYTES/h4 +meta: "i"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=7.000000000,0 min=0,0 seq=0} ts=7.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true +data: "i"/7.000000000,0 -> /BYTES/i7 +error: (*roachpb.WriteTooOldError:) WriteTooOldError: write for key "d" at timestamp 5.000000000,0 too old; wrote at 5.000000000,1 + +# error encountering range key at k4. +# a tombstone should get written to j4, since we +# flush on errors once iteration has started. However, a tombstone should +# not get written to q4 as DeleteRange has been aborted at rangekey {k-p}4. +run error +put k=j ts=2 v=j2 +put k=q ts=2 v=q2 +del_range_pred k=j end=r ts=4 predTs=1 rangeThreshold=2 +---- +>> at end: +rangekey: {f-i}/[5.000000000,0=/] +rangekey: {k-p}/[4.000000000,0=/] +data: "a"/4.000000000,0 -> / +data: "a"/2.000000000,0 -> /BYTES/a2 +data: "b"/3.000000000,0 -> /BYTES/b3 +data: "c"/5.000000000,0 -> / +data: "c"/2.000000000,0 -> /BYTES/c2 +data: "d"/5.000000000,0 -> / +data: "d"/4.000000000,0 -> /BYTES/d4 +data: "d"/1.000000000,0 -> /BYTES/d1 +data: "e"/2.000000000,0 -> /BYTES/e2 +data: "f"/4.000000000,0 -> /BYTES/f4 +data: "g"/4.000000000,0 -> / +data: "g"/2.000000000,0 -> /BYTES/g2 +data: "h"/4.000000000,0 -> /BYTES/h4 +meta: "i"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=7.000000000,0 min=0,0 seq=0} ts=7.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true +data: "i"/7.000000000,0 -> /BYTES/i7 +data: "j"/4.000000000,0 -> / +data: "j"/2.000000000,0 -> /BYTES/j2 +data: "q"/2.000000000,0 -> /BYTES/q2 +error: (*roachpb.WriteTooOldError:) WriteTooOldError: write for key "k" at timestamp 4.000000000,0 too old; wrote at 4.000000000,1 + +# At this point the keyspace looks like this: +# 7 [i7] +# 6 +# 5 x x o-----------o +# 4 x d4 f4 x h4 o-------------------o +# 3 b3 c3 +# 2 a2 e2 g2 +# 1 d1 +# 0 +# a b c d e f g h i j k l m n o p +# +# check that we flush with a range tombstone, if maxBatchSize is exceeded +# even though range tombstone threshold has not been met +# and return a resume span +run ok +del_range_pred k=a end=i ts=6 predTs=1 maxBytes=1 +---- +del_range: resume span ["e","i") +>> at end: +rangekey: {b-e}/[6.000000000,0=/] +rangekey: {f-i}/[5.000000000,0=/] +rangekey: {k-p}/[4.000000000,0=/] +data: "a"/4.000000000,0 -> / +data: "a"/2.000000000,0 -> /BYTES/a2 +data: "b"/3.000000000,0 -> /BYTES/b3 +data: "c"/5.000000000,0 -> / +data: "c"/2.000000000,0 -> /BYTES/c2 +data: "d"/5.000000000,0 -> / +data: "d"/4.000000000,0 -> /BYTES/d4 +data: "d"/1.000000000,0 -> /BYTES/d1 +data: "e"/2.000000000,0 -> /BYTES/e2 +data: "f"/4.000000000,0 -> /BYTES/f4 +data: "g"/4.000000000,0 -> / +data: "g"/2.000000000,0 -> /BYTES/g2 +data: "h"/4.000000000,0 -> /BYTES/h4 +meta: "i"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=7.000000000,0 min=0,0 seq=0} ts=7.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true +data: "i"/7.000000000,0 -> /BYTES/i7 +data: "j"/4.000000000,0 -> / +data: "j"/2.000000000,0 -> /BYTES/j2 +data: "q"/2.000000000,0 -> /BYTES/q2 + +# check that we flush properly if maxBatchSize is exceeded. +# Since max is 1, write a tombstone to e, and as soon as it sees the +# next eligible key to delete (f), return a resume span. +# Note that we dont count shadowed tombstones in the batchSize +run ok +put k=f ts=6 v=f6 +del_range_pred k=c end=i ts=7 predTs=1 max=1 +---- +del_range: resume span ["f","i") +>> at end: +rangekey: {b-e}/[6.000000000,0=/] +rangekey: {f-i}/[5.000000000,0=/] +rangekey: {k-p}/[4.000000000,0=/] +data: "a"/4.000000000,0 -> / +data: "a"/2.000000000,0 -> /BYTES/a2 +data: "b"/3.000000000,0 -> /BYTES/b3 +data: "c"/5.000000000,0 -> / +data: "c"/2.000000000,0 -> /BYTES/c2 +data: "d"/5.000000000,0 -> / +data: "d"/4.000000000,0 -> /BYTES/d4 +data: "d"/1.000000000,0 -> /BYTES/d1 +data: "e"/7.000000000,0 -> / +data: "e"/2.000000000,0 -> /BYTES/e2 +data: "f"/6.000000000,0 -> /BYTES/f6 +data: "f"/4.000000000,0 -> /BYTES/f4 +data: "g"/4.000000000,0 -> / +data: "g"/2.000000000,0 -> /BYTES/g2 +data: "h"/4.000000000,0 -> /BYTES/h4 +meta: "i"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=7.000000000,0 min=0,0 seq=0} ts=7.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true +data: "i"/7.000000000,0 -> /BYTES/i7 +data: "j"/4.000000000,0 -> / +data: "j"/2.000000000,0 -> /BYTES/j2 +data: "q"/2.000000000,0 -> /BYTES/q2 + +# Run the same DeleteRange as above at ts 8 +# No resume span should get returned because the iterator goes through +# the whole span without encountering another eligible key to flush +run ok +del_range_pred k=c end=i ts=8 predTs=1 max=3 +---- +>> at end: +rangekey: {b-e}/[6.000000000,0=/] +rangekey: {f-i}/[5.000000000,0=/] +rangekey: {k-p}/[4.000000000,0=/] +data: "a"/4.000000000,0 -> / +data: "a"/2.000000000,0 -> /BYTES/a2 +data: "b"/3.000000000,0 -> /BYTES/b3 +data: "c"/5.000000000,0 -> / +data: "c"/2.000000000,0 -> /BYTES/c2 +data: "d"/5.000000000,0 -> / +data: "d"/4.000000000,0 -> /BYTES/d4 +data: "d"/1.000000000,0 -> /BYTES/d1 +data: "e"/7.000000000,0 -> / +data: "e"/2.000000000,0 -> /BYTES/e2 +data: "f"/8.000000000,0 -> / +data: "f"/6.000000000,0 -> /BYTES/f6 +data: "f"/4.000000000,0 -> /BYTES/f4 +data: "g"/4.000000000,0 -> / +data: "g"/2.000000000,0 -> /BYTES/g2 +data: "h"/4.000000000,0 -> /BYTES/h4 +meta: "i"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=7.000000000,0 min=0,0 seq=0} ts=7.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true +data: "i"/7.000000000,0 -> /BYTES/i7 +data: "j"/4.000000000,0 -> / +data: "j"/2.000000000,0 -> /BYTES/j2 +data: "q"/2.000000000,0 -> /BYTES/q2