From 3a2d49ac68b8b49dc2274faf12cc2b700ba59532 Mon Sep 17 00:00:00 2001 From: Yahor Yuzefovich Date: Thu, 16 Jun 2022 19:19:57 -0700 Subject: [PATCH 1/4] sql: reuse the slice of RequestUnion objects between fetches This commit teaches `txnKVFetcher` and `txnKVStreamer` to reuse the same slice of `RequestUnion` objects between different fetches. It is now extremely easy to do given the recent refactor. We do perform memory accounting for this slice (against a memory account bound to an unlimited memory monitor). Additionally, a similar optimization is applied to how resume requests are populated by the Streamer. Release note: None --- .../kvclient/kvstreamer/requests_provider.go | 10 ++++- pkg/kv/kvclient/kvstreamer/streamer.go | 45 +++++++++++-------- pkg/sql/colfetcher/index_join.go | 1 + pkg/sql/row/kv_batch_fetcher.go | 44 +++++++++++++++--- pkg/sql/row/kv_batch_streamer.go | 24 +++++++--- pkg/sql/row/kv_fetcher.go | 3 +- pkg/sql/rowexec/joinreader.go | 4 ++ 7 files changed, 98 insertions(+), 33 deletions(-) diff --git a/pkg/kv/kvclient/kvstreamer/requests_provider.go b/pkg/kv/kvclient/kvstreamer/requests_provider.go index e416191487a8..fda9b48bec1e 100644 --- a/pkg/kv/kvclient/kvstreamer/requests_provider.go +++ b/pkg/kv/kvclient/kvstreamer/requests_provider.go @@ -79,8 +79,16 @@ type singleRangeBatch struct { // Hints.SingleRowLookup is false and some Scan requests were enqueued. subRequestIdx []int32 // reqsReservedBytes tracks the memory reservation against the budget for - // the memory usage of reqs. + // the memory usage of reqs, excluding the overhead. reqsReservedBytes int64 + // overheadAccountedFor tracks the memory reservation against the budget for + // the overhead of the reqs slice (i.e. of roachpb.RequestUnion objects). + // Since we reuse the same reqs slice for resume requests, this can be + // released only when the BatchResponse doesn't have any resume spans. + // + // RequestUnion.Size() ignores the overhead of RequestUnion object, so we + // need to account for it separately. + overheadAccountedFor int64 // minTargetBytes, if positive, indicates the minimum TargetBytes limit that // this singleRangeBatch should be sent with in order for the response to // not be empty. Note that TargetBytes of at least minTargetBytes is diff --git a/pkg/kv/kvclient/kvstreamer/streamer.go b/pkg/kv/kvclient/kvstreamer/streamer.go index 6418c7e3964b..3a7dd8ddb366 100644 --- a/pkg/kv/kvclient/kvstreamer/streamer.go +++ b/pkg/kv/kvclient/kvstreamer/streamer.go @@ -501,12 +501,13 @@ func (s *Streamer) Enqueue(ctx context.Context, reqs []roachpb.RequestUnion) (re //} r := singleRangeBatch{ - reqs: singleRangeReqs, - positions: positions, - subRequestIdx: subRequestIdx, - reqsReservedBytes: requestsMemUsage(singleRangeReqs), + reqs: singleRangeReqs, + positions: positions, + subRequestIdx: subRequestIdx, + reqsReservedBytes: requestsMemUsage(singleRangeReqs), + overheadAccountedFor: requestUnionOverhead * int64(cap(singleRangeReqs)), } - totalReqsMemUsage += r.reqsReservedBytes + totalReqsMemUsage += r.reqsReservedBytes + r.overheadAccountedFor if s.mode == OutOfOrder { // Sort all single-range requests to be in the key order. @@ -1090,6 +1091,12 @@ func (w *workerCoordinator) performRequestAsync( // non-empty responses as well as resume spans, if any. respOverestimate := targetBytes - memoryFootprintBytes reqOveraccounted := req.reqsReservedBytes - resumeReqsMemUsage + if resumeReqsMemUsage == 0 { + // There will be no resume request, so we will lose the + // reference to the req.reqs slice and can release its memory + // reservation. + reqOveraccounted += req.overheadAccountedFor + } overaccountedTotal := respOverestimate + reqOveraccounted if overaccountedTotal >= 0 { w.s.budget.release(ctx, overaccountedTotal) @@ -1213,9 +1220,6 @@ func calculateFootprint( } } } - // This addendum is the first step of requestsMemUsage() and we've already - // added the size of each resume request above. - resumeReqsMemUsage += requestUnionOverhead * int64(numIncompleteGets+numIncompleteScans) return memoryFootprintBytes, resumeReqsMemUsage, numIncompleteGets, numIncompleteScans } @@ -1223,7 +1227,7 @@ func calculateFootprint( // in the BatchResponse. The ResumeSpans, if found, are added into a new // singleRangeBatch request that is added to be picked up by the mainLoop of the // worker coordinator. This method assumes that req is no longer needed by the -// caller, so req.positions is reused for the ResumeSpans. +// caller, so the slices from req are reused for the ResumeSpans. // // It also assumes that the budget has already been reconciled with the // reservations for Results that will be created. @@ -1236,14 +1240,15 @@ func (w *workerCoordinator) processSingleRangeResults( ) error { numIncompleteRequests := numIncompleteGets + numIncompleteScans var resumeReq singleRangeBatch - // We have to allocate the new slice for requests, but we can reuse the - // positions slice. - resumeReq.reqs = make([]roachpb.RequestUnion, numIncompleteRequests) + // We have to allocate the new Get and Scan requests, but we can reuse the + // reqs and the positions slices. + resumeReq.reqs = req.reqs[:numIncompleteRequests] resumeReq.positions = req.positions[:0] resumeReq.subRequestIdx = req.subRequestIdx[:0] // We've already reconciled the budget with the actual reservation for the // requests with the ResumeSpans. resumeReq.reqsReservedBytes = resumeReqsMemUsage + resumeReq.overheadAccountedFor = req.overheadAccountedFor gets := make([]struct { req roachpb.GetRequest union roachpb.RequestUnion_Get @@ -1415,7 +1420,14 @@ func (w *workerCoordinator) processSingleRangeResults( // If we have any incomplete requests, add them back into the work // pool. - if len(resumeReq.reqs) > 0 { + if numIncompleteRequests > 0 { + // Make sure to nil out old requests that we didn't include into the + // resume request. We don't have to do this if there aren't any + // incomplete requests since req and resumeReq will be garbage collected + // on their own. + for i := numIncompleteRequests; i < len(req.reqs); i++ { + req.reqs[i] = roachpb.RequestUnion{} + } w.s.requestsToServe.add(resumeReq) } @@ -1504,12 +1516,7 @@ func init() { const requestUnionOverhead = int64(unsafe.Sizeof(roachpb.RequestUnion{})) -func requestsMemUsage(reqs []roachpb.RequestUnion) int64 { - // RequestUnion.Size() ignores the overhead of RequestUnion object, so we'll - // account for it separately first. - memUsage := requestUnionOverhead * int64(cap(reqs)) - // No need to account for elements past len(reqs) because those must be - // unset and we have already accounted for RequestUnion object above. +func requestsMemUsage(reqs []roachpb.RequestUnion) (memUsage int64) { for _, r := range reqs { memUsage += int64(r.Size()) } diff --git a/pkg/sql/colfetcher/index_join.go b/pkg/sql/colfetcher/index_join.go index 0b94d2b5b3ef..22c9c2426ab2 100644 --- a/pkg/sql/colfetcher/index_join.go +++ b/pkg/sql/colfetcher/index_join.go @@ -512,6 +512,7 @@ func NewColIndexJoin( rowcontainer.NewKVStreamerResultDiskBuffer( flowCtx.Cfg.TempStorage, diskMonitor, ), + kvFetcherMemAcc, ) } else { kvFetcher = row.NewKVFetcher( diff --git a/pkg/sql/row/kv_batch_fetcher.go b/pkg/sql/row/kv_batch_fetcher.go index 818c32afcefd..da161806f8f7 100644 --- a/pkg/sql/row/kv_batch_fetcher.go +++ b/pkg/sql/row/kv_batch_fetcher.go @@ -13,6 +13,7 @@ package row import ( "context" "time" + "unsafe" "github.com/cockroachdb/cockroach/pkg/kv" "github.com/cockroachdb/cockroach/pkg/kv/kvserver/concurrency/lock" @@ -148,6 +149,7 @@ type txnKVFetcher struct { // least once. alreadyFetched bool batchIdx int + reqsScratch []roachpb.RequestUnion responses []roachpb.ResponseUnion remainingBatches [][]byte @@ -156,11 +158,12 @@ type txnKVFetcher struct { getResponseScratch [1]roachpb.KeyValue acc *mon.BoundAccount - // spansAccountedFor and batchResponseAccountedFor track the number of bytes - // that we've already registered with acc in regards to spans and the batch - // response, respectively. + // spansAccountedFor, batchResponseAccountedFor, and reqsScratchAccountedFor + // track the number of bytes that we've already registered with acc in + // regards to spans, the batch response, and reqsScratch, respectively. spansAccountedFor int64 batchResponseAccountedFor int64 + reqsScratchAccountedFor int64 // If set, we will use the production value for kvBatchSize. forceProductionKVBatchSize bool @@ -395,7 +398,7 @@ func (f *txnKVFetcher) fetch(ctx context.Context) error { ba.Header.TargetBytes = int64(f.batchBytesLimit) ba.Header.MaxSpanRequestKeys = int64(f.getBatchKeyLimit()) ba.AdmissionHeader = f.requestAdmissionHeader - ba.Requests = spansToRequests(f.spans.Spans, f.reverse, f.lockStrength) + ba.Requests = spansToRequests(f.spans.Spans, f.reverse, f.lockStrength, f.reqsScratch) if log.ExpensiveLogEnabled(ctx, 2) { log.VEventf(ctx, 2, "Scan %s", f.spans) @@ -472,6 +475,21 @@ func (f *txnKVFetcher) fetch(ctx context.Context) error { f.batchIdx++ f.scratchSpans.reset() f.alreadyFetched = true + // Keep the reference to the requests slice in order to reuse in the future + // after making sure to nil out the requests in order to lose references to + // the underlying Get and Scan requests which could keep large byte slices + // alive. + f.reqsScratch = ba.Requests + for i := range f.reqsScratch { + f.reqsScratch[i] = roachpb.RequestUnion{} + } + if monitoring { + reqsScratchMemUsage := requestUnionOverhead * int64(cap(f.reqsScratch)) + if err := f.acc.Resize(ctx, f.reqsScratchAccountedFor, reqsScratchMemUsage); err != nil { + return err + } + f.reqsScratchAccountedFor = reqsScratchMemUsage + } // TODO(radu): We should fetch the next chunk in the background instead of waiting for the next // call to fetch(). We can use a pool of workers to issue the KV ops which will also limit the @@ -623,7 +641,9 @@ func (f *txnKVFetcher) reset(ctx context.Context) { f.remainingBatches = nil f.spans = identifiableSpans{} f.scratchSpans = identifiableSpans{} - // Release only the allocations made by this fetcher. + // Release only the allocations made by this fetcher. Note that we're still + // keeping the reference to reqsScratch, so we don't release the allocation + // for it. f.acc.Shrink(ctx, f.batchResponseAccountedFor+f.spansAccountedFor) f.batchResponseAccountedFor, f.spansAccountedFor = 0, 0 } @@ -633,14 +653,24 @@ func (f *txnKVFetcher) close(ctx context.Context) { f.reset(ctx) } +const requestUnionOverhead = int64(unsafe.Sizeof(roachpb.RequestUnion{})) + // spansToRequests converts the provided spans to the corresponding requests. If // a span doesn't have the EndKey set, then a Get request is used for it; // otherwise, a Scan (or ReverseScan if reverse is true) request is used with // BATCH_RESPONSE format. +// +// The provided reqsScratch is reused if it has enough capacity for all spans, +// if not, a new slice is allocated. func spansToRequests( - spans roachpb.Spans, reverse bool, keyLocking lock.Strength, + spans roachpb.Spans, reverse bool, keyLocking lock.Strength, reqsScratch []roachpb.RequestUnion, ) []roachpb.RequestUnion { - reqs := make([]roachpb.RequestUnion, len(spans)) + var reqs []roachpb.RequestUnion + if cap(reqsScratch) >= len(spans) { + reqs = reqsScratch[:len(spans)] + } else { + reqs = make([]roachpb.RequestUnion, len(spans)) + } // Detect the number of gets vs scans, so we can batch allocate all of the // requests precisely. nGets := 0 diff --git a/pkg/sql/row/kv_batch_streamer.go b/pkg/sql/row/kv_batch_streamer.go index 20b948ba2888..d3451972d0fd 100644 --- a/pkg/sql/row/kv_batch_streamer.go +++ b/pkg/sql/row/kv_batch_streamer.go @@ -21,6 +21,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/sql/catalog/descpb" "github.com/cockroachdb/cockroach/pkg/sql/rowinfra" "github.com/cockroachdb/cockroach/pkg/util/log" + "github.com/cockroachdb/cockroach/pkg/util/mon" "github.com/cockroachdb/errors" ) @@ -45,8 +46,11 @@ type txnKVStreamer struct { streamer *kvstreamer.Streamer keyLocking lock.Strength - spans roachpb.Spans - spanIDs []int + spans roachpb.Spans + spanIDs []int + reqsScratch []roachpb.RequestUnion + + acc *mon.BoundAccount // getResponseScratch is reused to return the result of Get requests. getResponseScratch [1]roachpb.KeyValue @@ -63,11 +67,12 @@ var _ KVBatchFetcher = &txnKVStreamer{} // newTxnKVStreamer creates a new txnKVStreamer. func newTxnKVStreamer( - streamer *kvstreamer.Streamer, lockStrength descpb.ScanLockingStrength, + streamer *kvstreamer.Streamer, lockStrength descpb.ScanLockingStrength, acc *mon.BoundAccount, ) KVBatchFetcher { return &txnKVStreamer{ streamer: streamer, keyLocking: getKeyLockingStrength(lockStrength), + acc: acc, } } @@ -86,13 +91,22 @@ func (f *txnKVStreamer) SetupNextFetch( if log.ExpensiveLogEnabled(ctx, 2) { log.VEventf(ctx, 2, "Scan %s", spans) } - reqs := spansToRequests(spans, false /* reverse */, f.keyLocking) + reqs := spansToRequests(spans, false /* reverse */, f.keyLocking, f.reqsScratch) if err := f.streamer.Enqueue(ctx, reqs); err != nil { return err } f.spans = spans f.spanIDs = spanIDs - return nil + // Keep the reference to the requests slice in order to reuse in the future + // after making sure to nil out the requests in order to lose references to + // the underlying Get and Scan requests which could keep large byte slices + // alive. + f.reqsScratch = reqs + for i := range f.reqsScratch { + f.reqsScratch[i] = roachpb.RequestUnion{} + } + reqsScratchMemUsage := requestUnionOverhead * int64(cap(f.reqsScratch)) + return f.acc.ResizeTo(ctx, reqsScratchMemUsage) } func (f *txnKVStreamer) getSpanID(resultPosition int) int { diff --git a/pkg/sql/row/kv_fetcher.go b/pkg/sql/row/kv_fetcher.go index 6df5df898518..2f988a87c9c9 100644 --- a/pkg/sql/row/kv_fetcher.go +++ b/pkg/sql/row/kv_fetcher.go @@ -134,6 +134,7 @@ func NewStreamingKVFetcher( singleRowLookup bool, maxKeysPerRow int, diskBuffer kvstreamer.ResultDiskBuffer, + kvFetcherMemAcc *mon.BoundAccount, ) *KVFetcher { streamer := kvstreamer.NewStreamer( distSender, @@ -157,7 +158,7 @@ func NewStreamingKVFetcher( maxKeysPerRow, diskBuffer, ) - return newKVFetcher(newTxnKVStreamer(streamer, lockStrength)) + return newKVFetcher(newTxnKVStreamer(streamer, lockStrength, kvFetcherMemAcc)) } func newKVFetcher(batchFetcher KVBatchFetcher) *KVFetcher { diff --git a/pkg/sql/rowexec/joinreader.go b/pkg/sql/rowexec/joinreader.go index 65c56c2d1b6e..4d88690ac0bc 100644 --- a/pkg/sql/rowexec/joinreader.go +++ b/pkg/sql/rowexec/joinreader.go @@ -129,6 +129,7 @@ type joinReader struct { unlimitedMemMonitor *mon.BytesMonitor budgetAcc mon.BoundAccount diskMonitor *mon.BytesMonitor + txnKVStreamerMemAcc mon.BoundAccount } input execinfra.RowSource @@ -472,6 +473,7 @@ func newJoinReader( ) jr.streamerInfo.unlimitedMemMonitor.Start(flowCtx.EvalCtx.Ctx(), flowCtx.EvalCtx.Mon, mon.BoundAccount{}) jr.streamerInfo.budgetAcc = jr.streamerInfo.unlimitedMemMonitor.MakeBoundAccount() + jr.streamerInfo.txnKVStreamerMemAcc = jr.streamerInfo.unlimitedMemMonitor.MakeBoundAccount() var diskBuffer kvstreamer.ResultDiskBuffer if jr.maintainOrdering { @@ -496,6 +498,7 @@ func newJoinReader( singleRowLookup, int(spec.FetchSpec.MaxKeysPerRow), diskBuffer, + &jr.streamerInfo.txnKVStreamerMemAcc, ) } else { // When not using the Streamer API, we want to limit the batch size hint @@ -1090,6 +1093,7 @@ func (jr *joinReader) close() { } if jr.usesStreamer { jr.streamerInfo.budgetAcc.Close(jr.Ctx) + jr.streamerInfo.txnKVStreamerMemAcc.Close(jr.Ctx) jr.streamerInfo.unlimitedMemMonitor.Stop(jr.Ctx) if jr.streamerInfo.diskMonitor != nil { jr.streamerInfo.diskMonitor.Stop(jr.Ctx) From dbbc995468a66594491c982126eff491fe6e5f75 Mon Sep 17 00:00:00 2001 From: Erik Grinaker Date: Fri, 18 Mar 2022 12:32:15 +0000 Subject: [PATCH 2/4] storage: export some MVCC key encoding functions Release note: None --- pkg/storage/mvcc.go | 6 +++--- pkg/storage/mvcc_key.go | 12 ++++++------ pkg/storage/mvcc_key_test.go | 6 +++--- 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/pkg/storage/mvcc.go b/pkg/storage/mvcc.go index ffa61a1b362a..e4120f64cb63 100644 --- a/pkg/storage/mvcc.go +++ b/pkg/storage/mvcc.go @@ -940,7 +940,7 @@ func mvccGetMetadata( if !hasPoint || !unsafeKey.Key.Equal(metaKey.Key) { meta.Deleted = true meta.Timestamp = rkTimestamp.ToLegacyTimestamp() - return true, int64(encodedMVCCKeyPrefixLength(metaKey.Key)), 0, nil + return true, int64(EncodedMVCCKeyPrefixLength(metaKey.Key)), 0, nil } } @@ -950,7 +950,7 @@ func mvccGetMetadata( if rkTS := iter.RangeKeys()[0].RangeKey.Timestamp; unsafeKey.Timestamp.LessEq(rkTS) { meta.Deleted = true meta.Timestamp = rkTS.ToLegacyTimestamp() - return true, int64(encodedMVCCKeyPrefixLength(metaKey.Key)), 0, nil + return true, int64(EncodedMVCCKeyPrefixLength(metaKey.Key)), 0, nil } } @@ -968,7 +968,7 @@ func mvccGetMetadata( meta.Deleted = unsafeVal.IsTombstone() meta.Timestamp = unsafeKey.Timestamp.ToLegacyTimestamp() - return true, int64(encodedMVCCKeyPrefixLength(metaKey.Key)), 0, nil + return true, int64(EncodedMVCCKeyPrefixLength(metaKey.Key)), 0, nil } // putBuffer holds pointer data needed by mvccPutInternal. Bundling diff --git a/pkg/storage/mvcc_key.go b/pkg/storage/mvcc_key.go index f128b69c77cd..8f3db550897b 100644 --- a/pkg/storage/mvcc_key.go +++ b/pkg/storage/mvcc_key.go @@ -263,8 +263,8 @@ func encodeMVCCTimestampToBuf(buf []byte, ts hlc.Timestamp) { // encodedMVCCKeyLength returns the encoded length of the given MVCCKey. func encodedMVCCKeyLength(key MVCCKey) int { - // NB: We don't call into encodedMVCCKeyPrefixLength() or - // encodedMVCCTimestampSuffixLength() here because the additional function + // NB: We don't call into EncodedMVCCKeyPrefixLength() or + // EncodedMVCCTimestampSuffixLength() here because the additional function // call overhead is significant. keyLen := len(key.Key) + mvccEncodedTimeSentinelLen if !key.Timestamp.IsEmpty() { @@ -280,9 +280,9 @@ func encodedMVCCKeyLength(key MVCCKey) int { return keyLen } -// encodedMVCCKeyPrefixLength returns the encoded length of a roachpb.Key prefix +// EncodedMVCCKeyPrefixLength returns the encoded length of a roachpb.Key prefix // including the sentinel byte. -func encodedMVCCKeyPrefixLength(key roachpb.Key) int { +func EncodedMVCCKeyPrefixLength(key roachpb.Key) int { return len(key) + mvccEncodedTimeSentinelLen } @@ -300,10 +300,10 @@ func encodedMVCCTimestampLength(ts hlc.Timestamp) int { return tsLen } -// encodedMVCCTimestampSuffixLength returns the encoded length of the +// EncodedMVCCTimestampSuffixLength returns the encoded length of the // given MVCC timestamp, including the length suffix. It returns 0 // if the timestamp is empty. -func encodedMVCCTimestampSuffixLength(ts hlc.Timestamp) int { +func EncodedMVCCTimestampSuffixLength(ts hlc.Timestamp) int { // This is backwards, see comment in encodedMVCCTimestampLength() for why. return encodedMVCCKeyLength(MVCCKey{Timestamp: ts}) - mvccEncodedTimeSentinelLen } diff --git a/pkg/storage/mvcc_key_test.go b/pkg/storage/mvcc_key_test.go index 51509ed3ba92..aac8f2847ae1 100644 --- a/pkg/storage/mvcc_key_test.go +++ b/pkg/storage/mvcc_key_test.go @@ -193,7 +193,7 @@ func TestEncodeDecodeMVCCKeyAndTimestampWithLength(t *testing.T) { require.Equal(t, expect, encoded) require.Equal(t, len(encoded), encodedMVCCKeyLength(mvccKey)) require.Equal(t, len(encoded), - encodedMVCCKeyPrefixLength(mvccKey.Key)+encodedMVCCTimestampSuffixLength(mvccKey.Timestamp)) + EncodedMVCCKeyPrefixLength(mvccKey.Key)+EncodedMVCCTimestampSuffixLength(mvccKey.Timestamp)) decoded, err := DecodeMVCCKey(encoded) require.NoError(t, err) @@ -203,7 +203,7 @@ func TestEncodeDecodeMVCCKeyAndTimestampWithLength(t *testing.T) { expectPrefix, err := hex.DecodeString(tc.encoded[:2*len(tc.key)+2]) require.NoError(t, err) require.Equal(t, expectPrefix, EncodeMVCCKeyPrefix(roachpb.Key(tc.key))) - require.Equal(t, len(expectPrefix), encodedMVCCKeyPrefixLength(roachpb.Key(tc.key))) + require.Equal(t, len(expectPrefix), EncodedMVCCKeyPrefixLength(roachpb.Key(tc.key))) // Test encode/decodeMVCCTimestampSuffix too, since we can trivially do so. expectTS, err := hex.DecodeString(tc.encoded[2*len(tc.key)+2:]) @@ -214,7 +214,7 @@ func TestEncodeDecodeMVCCKeyAndTimestampWithLength(t *testing.T) { encodedTS := EncodeMVCCTimestampSuffix(tc.ts) require.Equal(t, expectTS, encodedTS) - require.Equal(t, len(encodedTS), encodedMVCCTimestampSuffixLength(tc.ts)) + require.Equal(t, len(encodedTS), EncodedMVCCTimestampSuffixLength(tc.ts)) decodedTS, err := decodeMVCCTimestampSuffix(encodedTS) require.NoError(t, err) From 3ef1dbd57241e3b74a1f6dd87f5a85c176df5bae Mon Sep 17 00:00:00 2001 From: Erik Grinaker Date: Tue, 15 Mar 2022 12:49:36 +0000 Subject: [PATCH 3/4] roachpb: add `Key.Prevish()` to find a previous key This patch adds `Key.Prevish()`, which returns a previous key in lexicographical sort order. This is needed to expand a latch span leftwards to peek at any left-adjacent range keys. It is impossible to find the exact immediate predecessor of a key, because it can have an infinite number of `0xff` bytes at the end, so this returns the nearest previous key right-padded with `0xff` up to the given length. It is still possible for an infinite number of keys to exist between `Key` and `Key.Prevish()`, as keys have unbounded length. Release note: None --- pkg/roachpb/data.go | 21 +++++++++++++++++++++ pkg/roachpb/data_test.go | 22 ++++++++++++++++++++++ pkg/util/encoding/encoding.go | 26 ++++++++++++++++++++++++++ 3 files changed, 69 insertions(+) diff --git a/pkg/roachpb/data.go b/pkg/roachpb/data.go index 14c55ad19d91..1814d1ee63c0 100644 --- a/pkg/roachpb/data.go +++ b/pkg/roachpb/data.go @@ -50,6 +50,12 @@ const ( localPrefixByte = '\x01' // LocalMaxByte is the end of the local key range. LocalMaxByte = '\x02' + // PrevishKeyLength is a reasonable key length to use for Key.Prevish(), + // typically when peeking to the left of a known key. We want this to be as + // tight as possible, since it can e.g. be used for latch spans. However, the + // exact previous key has infinite length, so we assume that most keys are + // less than 1024 bytes, or have a fairly unique 1024-byte prefix. + PrevishKeyLength = 1024 ) var ( @@ -157,6 +163,21 @@ func (k Key) Next() Key { return Key(encoding.BytesNext(k)) } +// Prevish returns a previous key in lexicographic sort order. It is impossible +// in general to find the exact immediate predecessor key, because it has an +// infinite number of 0xff bytes at the end, so this returns the nearest +// previous key right-padded with 0xff up to length bytes. An infinite number of +// keys may exist between Key and Key.Prevish(), as keys have unbounded length. +// This also implies that k.Prevish().IsPrev(k) will often be false. +// +// PrevishKeyLength can be used as a reasonable length in most situations. +// +// The method may only take a shallow copy of the Key, so both the receiver and +// the return value should be treated as immutable after. +func (k Key) Prevish(length int) Key { + return Key(encoding.BytesPrevish(k, length)) +} + // IsPrev is a more efficient version of k.Next().Equal(m). func (k Key) IsPrev(m Key) bool { l := len(m) - 1 diff --git a/pkg/roachpb/data_test.go b/pkg/roachpb/data_test.go index 2a3f51f10418..ab3a960d7f9f 100644 --- a/pkg/roachpb/data_test.go +++ b/pkg/roachpb/data_test.go @@ -12,6 +12,7 @@ package roachpb import ( "bytes" + "encoding/hex" "math" "math/rand" "reflect" @@ -221,6 +222,27 @@ func TestNextKey(t *testing.T) { } } +func TestPrevish(t *testing.T) { + const length = 4 + testcases := []struct { + key Key + expect Key + }{ + {nil, nil}, + {[]byte{}, []byte{}}, + {[]byte{0x00}, []byte{}}, + {[]byte{0x01, 0x00}, []byte{0x01}}, + {[]byte{0x01}, []byte{0x00, 0xff, 0xff, 0xff}}, + {[]byte{0x01, 0x01}, []byte{0x01, 0x00, 0xff, 0xff}}, + {[]byte{0xff, 0xff, 0xff, 0xff}, []byte{0xff, 0xff, 0xff, 0xfe}}, + } + for _, tc := range testcases { + t.Run(hex.EncodeToString(tc.key), func(t *testing.T) { + require.Equal(t, tc.expect, tc.key.Prevish(length)) + }) + } +} + func TestIsPrev(t *testing.T) { for i, tc := range []struct { k, m Key diff --git a/pkg/util/encoding/encoding.go b/pkg/util/encoding/encoding.go index 4a24429dc4dc..f3db0756247e 100644 --- a/pkg/util/encoding/encoding.go +++ b/pkg/util/encoding/encoding.go @@ -3215,3 +3215,29 @@ func BytesNext(b []byte) []byte { bn[len(bn)-1] = 0 return bn } + +// BytesPrevish returns a previous byte slice in lexicographical ordering. It is +// impossible in general to find the exact previous byte slice, because it has +// an infinite number of 0xff bytes at the end, so this returns the nearest +// previous slice right-padded with 0xff up to length bytes. It may reuse the +// given slice when possible. +func BytesPrevish(b []byte, length int) []byte { + bLen := len(b) + // An empty slice has no previous slice. + if bLen == 0 { + return b + } + // If the last byte is 0, just remove it. + if b[bLen-1] == 0 { + return b[:bLen-1] + } + // Otherwise, decrement the last byte and right-pad with 0xff. + if bLen > length { + length = bLen + } + buf := make([]byte, length) + copy(buf, b) + buf[bLen-1]-- + copy(buf[bLen:], bytes.Repeat([]byte{0xff}, length-bLen)) + return buf +} From 8fe23626965e7a93406f51f4726659ba8f1e7931 Mon Sep 17 00:00:00 2001 From: Erik Grinaker Date: Sun, 20 Feb 2022 14:15:07 +0000 Subject: [PATCH 4/4] storage: add `MVCCStats` for range keys This patch adds `MVCCStats` tracking for range keys. Four new fields are added to `MVCCStats`: * `RangeKeyCount`: the number of (fragmented) range keys, not counting historical versions. * `RangeKeyBytes`: the logical encoded byte size of all range keys. The latest version contributes the encoded key bounds, and all versions contribute encoded timestamps. Unlike point keys, which for historical reasons use a fixed-size timestamp contribution, this uses the actual variable-length timestamp size. * `RangeValCount`: the number of (fragmented) range key versions. * `RangeValBytes`: the encoded size of all range key values across all versions. The same value can be stored in multiple range keys due to fragmentation, which will be counted separately. Even though all range keys are currently MVCC range tombstones with no value, the `MVCCValueHeader` contribution can be non-zero due to e.g. a local timestamp. `ComputeStatsForRange()` has been extended to calculate the above quantities, and additionally account for range tombstones themselves in `GCBytesAge` along with their effect on point keys. All relevant call sites have been updated to surface range keys for the MVCC iterators passed to `ComputeStatsForRange()`. Most MVCC operations have been updated to correctly account for MVCC range tombstones, e.g. during point key writes and intent resolution. KV APIs are not yet updated, this will be addressed later. Range key stats are also adjusted during range splits and merges, which will split and merge any range keys that straddle the split key. This requires a single range key seek to the left and right of the split key during these operations. Release note: None --- .../batcheval/cmd_add_sstable_test.go | 1 + pkg/kv/kvserver/batcheval/cmd_clear_range.go | 8 +- pkg/kv/kvserver/batcheval/cmd_delete_range.go | 46 +- .../batcheval/cmd_delete_range_test.go | 156 +++-- .../kvserver/batcheval/cmd_end_transaction.go | 115 +++- .../batcheval/cmd_end_transaction_test.go | 109 +++ pkg/kv/kvserver/batcheval/cmd_truncate_log.go | 6 +- .../kvserver/batcheval/split_stats_helper.go | 19 +- pkg/kv/kvserver/below_raft_protos_test.go | 2 +- pkg/kv/kvserver/client_merge_test.go | 59 +- pkg/kv/kvserver/client_split_test.go | 161 ++--- pkg/kv/kvserver/client_test.go | 47 +- pkg/kv/kvserver/helpers_test.go | 50 +- pkg/kv/kvserver/rditer/stats.go | 7 +- pkg/kv/kvserver/replica_consistency.go | 15 +- pkg/kv/kvserver/replica_raft.go | 1 + pkg/kv/kvserver/replica_test.go | 2 +- .../logic_test/builtin_function_notenant | 4 +- pkg/storage/BUILD.bazel | 1 - pkg/storage/bench_test.go | 12 +- pkg/storage/engine.go | 19 +- pkg/storage/enginepb/mvcc.go | 18 +- pkg/storage/enginepb/mvcc.proto | 33 +- pkg/storage/enginepb/mvcc3.proto | 8 + pkg/storage/mvcc.go | 630 ++++++++++++++---- pkg/storage/mvcc_history_test.go | 21 +- pkg/storage/mvcc_incremental_iterator_test.go | 6 +- pkg/storage/mvcc_key.go | 30 + pkg/storage/mvcc_key_test.go | 62 ++ pkg/storage/mvcc_stats_test.go | 14 +- pkg/storage/mvcc_test.go | 78 +-- pkg/storage/sst_iterator.go | 12 +- .../testdata/mvcc_histories/delete_range | 5 +- .../mvcc_histories/range_tombstone_conflicts | 99 ++- .../range_tombstone_merge_fragment | 120 ++++ .../mvcc_histories/range_tombstone_stats | 145 ++++ ...range_tombstone_stats_intent_resolve_abort | 250 +++++++ ...ange_tombstone_stats_intent_resolve_commit | 268 ++++++++ ...ange_tombstone_stats_intent_resolve_pushed | 270 ++++++++ ...tombstone_stats_intent_resolve_range_abort | 198 ++++++ ...ombstone_stats_intent_resolve_range_commit | 216 ++++++ ...ombstone_stats_intent_resolve_range_pushed | 218 ++++++ .../range_tombstone_stats_intent_rewrite | 385 +++++++++++ .../mvcc_histories/range_tombstone_writes | 113 +--- 44 files changed, 3509 insertions(+), 530 deletions(-) create mode 100644 pkg/storage/testdata/mvcc_histories/range_tombstone_merge_fragment create mode 100644 pkg/storage/testdata/mvcc_histories/range_tombstone_stats create mode 100644 pkg/storage/testdata/mvcc_histories/range_tombstone_stats_intent_resolve_abort create mode 100644 pkg/storage/testdata/mvcc_histories/range_tombstone_stats_intent_resolve_commit create mode 100644 pkg/storage/testdata/mvcc_histories/range_tombstone_stats_intent_resolve_pushed create mode 100644 pkg/storage/testdata/mvcc_histories/range_tombstone_stats_intent_resolve_range_abort create mode 100644 pkg/storage/testdata/mvcc_histories/range_tombstone_stats_intent_resolve_range_commit create mode 100644 pkg/storage/testdata/mvcc_histories/range_tombstone_stats_intent_resolve_range_pushed create mode 100644 pkg/storage/testdata/mvcc_histories/range_tombstone_stats_intent_rewrite diff --git a/pkg/kv/kvserver/batcheval/cmd_add_sstable_test.go b/pkg/kv/kvserver/batcheval/cmd_add_sstable_test.go index bc1f9d96d251..4f04d14ea39e 100644 --- a/pkg/kv/kvserver/batcheval/cmd_add_sstable_test.go +++ b/pkg/kv/kvserver/batcheval/cmd_add_sstable_test.go @@ -1506,6 +1506,7 @@ func engineStats(t *testing.T, engine storage.Engine, nowNanos int64) *enginepb. t.Helper() iter := engine.NewMVCCIterator(storage.MVCCKeyAndIntentsIterKind, storage.IterOptions{ + KeyTypes: storage.IterKeyTypePointsAndRanges, LowerBound: keys.LocalMax, UpperBound: keys.MaxKey, }) diff --git a/pkg/kv/kvserver/batcheval/cmd_clear_range.go b/pkg/kv/kvserver/batcheval/cmd_clear_range.go index 9e76dc065d04..5d66f6f63741 100644 --- a/pkg/kv/kvserver/batcheval/cmd_clear_range.go +++ b/pkg/kv/kvserver/batcheval/cmd_clear_range.go @@ -157,8 +157,14 @@ func computeStatsDelta( // If we can't use the fast stats path, or race test is enabled, // compute stats across the key span to be cleared. + // + // TODO(erikgrinaker): This must handle range key stats adjustments. if !fast || util.RaceEnabled { - iter := readWriter.NewMVCCIterator(storage.MVCCKeyAndIntentsIterKind, storage.IterOptions{UpperBound: to}) + iter := readWriter.NewMVCCIterator(storage.MVCCKeyAndIntentsIterKind, storage.IterOptions{ + KeyTypes: storage.IterKeyTypePointsAndRanges, + LowerBound: from, + UpperBound: to, + }) computed, err := iter.ComputeStats(from, to, delta.LastUpdateNanos) iter.Close() if err != nil { diff --git a/pkg/kv/kvserver/batcheval/cmd_delete_range.go b/pkg/kv/kvserver/batcheval/cmd_delete_range.go index 51310294f327..b07432f9690f 100644 --- a/pkg/kv/kvserver/batcheval/cmd_delete_range.go +++ b/pkg/kv/kvserver/batcheval/cmd_delete_range.go @@ -14,6 +14,7 @@ import ( "context" "time" + "github.com/cockroachdb/cockroach/pkg/keys" "github.com/cockroachdb/cockroach/pkg/kv/kvserver/batcheval/result" "github.com/cockroachdb/cockroach/pkg/kv/kvserver/spanset" "github.com/cockroachdb/cockroach/pkg/roachpb" @@ -39,6 +40,22 @@ func declareKeysDeleteRange( } else { DefaultDeclareIsolatedKeys(rs, header, req, latchSpans, lockSpans, maxOffset) } + + // When writing range tombstones, we must look for adjacent range tombstones + // that we merge with or fragment, to update MVCC stats accordingly. But we + // make sure to stay within the range bounds. + if args.UseExperimentalRangeTombstone { + // NB: The range end key is not available, so this will pessimistically + // latch up to args.EndKey.Next(). If EndKey falls on the range end key, the + // span will be tightened during evaluation. + l, r := rangeTombstonePeekBounds(args.Key, args.EndKey, rs.GetStartKey().AsRawKey(), nil) + latchSpans.AddMVCC(spanset.SpanReadOnly, roachpb.Span{Key: l, EndKey: r}, header.Timestamp) + + // We need to read the range descriptor to determine the bounds during eval. + latchSpans.AddNonMVCC(spanset.SpanReadOnly, roachpb.Span{ + Key: keys.RangeDescriptorKey(rs.GetStartKey()), + }) + } } // DeleteRange deletes the range of key/value pairs specified by @@ -62,9 +79,14 @@ func DeleteRange( return result.Result{}, errors.AssertionFailedf( "ReturnKeys can't be used with range tombstones") } + + desc := cArgs.EvalCtx.Desc() + leftPeekBound, rightPeekBound := rangeTombstonePeekBounds( + args.Key, args.EndKey, desc.StartKey.AsRawKey(), desc.EndKey.AsRawKey()) maxIntents := storage.MaxIntentsPerWriteIntentError.Get(&cArgs.EvalCtx.ClusterSettings().SV) - err := storage.ExperimentalMVCCDeleteRangeUsingTombstone( - ctx, readWriter, cArgs.Stats, args.Key, args.EndKey, h.Timestamp, cArgs.Now, maxIntents) + + err := storage.ExperimentalMVCCDeleteRangeUsingTombstone(ctx, readWriter, cArgs.Stats, + args.Key, args.EndKey, h.Timestamp, cArgs.Now, leftPeekBound, rightPeekBound, maxIntents) return result.Result{}, err } @@ -95,3 +117,23 @@ func DeleteRange( // error is not consumed by the caller because the result will be discarded. return result.FromAcquiredLocks(h.Txn, deleted...), err } + +// rangeTombstonePeekBounds returns the left and right bounds that +// ExperimentalMVCCDeleteRangeUsingTombstone can read in order to detect +// adjacent range tombstones to merge with or fragment. The bounds will be +// truncated to the Raft range bounds if given. +func rangeTombstonePeekBounds( + startKey, endKey, rangeStart, rangeEnd roachpb.Key, +) (roachpb.Key, roachpb.Key) { + leftPeekBound := startKey.Prevish(roachpb.PrevishKeyLength) + if len(rangeStart) > 0 && leftPeekBound.Compare(rangeStart) <= 0 { + leftPeekBound = rangeStart + } + + rightPeekBound := endKey.Next() + if len(rangeEnd) > 0 && rightPeekBound.Compare(rangeEnd) >= 0 { + rightPeekBound = rangeEnd + } + + return leftPeekBound.Clone(), rightPeekBound.Clone() +} diff --git a/pkg/kv/kvserver/batcheval/cmd_delete_range_test.go b/pkg/kv/kvserver/batcheval/cmd_delete_range_test.go index 7fe61ebf0e5b..2865eaa9f877 100644 --- a/pkg/kv/kvserver/batcheval/cmd_delete_range_test.go +++ b/pkg/kv/kvserver/batcheval/cmd_delete_range_test.go @@ -8,13 +8,14 @@ // by the Apache License, Version 2.0, included in the file // licenses/APL.txt. -package batcheval_test +package batcheval import ( "context" "testing" - "github.com/cockroachdb/cockroach/pkg/kv/kvserver/batcheval" + "github.com/cockroachdb/cockroach/pkg/keys" + "github.com/cockroachdb/cockroach/pkg/kv/kvserver/spanset" "github.com/cockroachdb/cockroach/pkg/roachpb" "github.com/cockroachdb/cockroach/pkg/settings/cluster" "github.com/cockroachdb/cockroach/pkg/storage" @@ -35,6 +36,8 @@ func TestDeleteRangeTombstone(t *testing.T) { defer leaktest.AfterTest(t)() defer log.Scope(t).Close(t) + storage.DisableMetamorphicSimpleValueEncoding(t) + // Initial data for each test. x is point tombstone, [] is intent, // o---o is range tombstone. // @@ -44,19 +47,26 @@ func TestDeleteRangeTombstone(t *testing.T) { // 2 b2 d2 o-------o // 1 // a b c d e f g h i + // + // We also write two range tombstones abutting the Raft range a-z at [Z-a)@100 + // and [z-|)@100. Writing a range tombstone should not merge with these. writeInitialData := func(t *testing.T, ctx context.Context, rw storage.ReadWriter) { t.Helper() var localTS hlc.ClockTimestamp - txn := roachpb.MakeTransaction("test", nil /* baseKey */, roachpb.NormalUserPriority, hlc.Timestamp{WallTime: 5}, 0, 0) - require.NoError(t, storage.MVCCPut(ctx, rw, nil, roachpb.Key("b"), hlc.Timestamp{WallTime: 2}, localTS, roachpb.MakeValueFromString("b2"), nil)) - require.NoError(t, storage.MVCCPut(ctx, rw, nil, roachpb.Key("c"), hlc.Timestamp{WallTime: 4}, localTS, roachpb.MakeValueFromString("c4"), nil)) - require.NoError(t, storage.MVCCPut(ctx, rw, nil, roachpb.Key("d"), hlc.Timestamp{WallTime: 2}, localTS, roachpb.MakeValueFromString("d2"), nil)) - require.NoError(t, storage.MVCCDelete(ctx, rw, nil, roachpb.Key("d"), hlc.Timestamp{WallTime: 3}, localTS, nil)) - require.NoError(t, storage.MVCCPut(ctx, rw, nil, roachpb.Key("i"), hlc.Timestamp{WallTime: 5}, localTS, roachpb.MakeValueFromString("i5"), &txn)) - require.NoError(t, storage.ExperimentalMVCCDeleteRangeUsingTombstone(ctx, rw, nil, roachpb.Key("f"), roachpb.Key("h"), hlc.Timestamp{WallTime: 3}, localTS, 0)) + + txn := roachpb.MakeTransaction("test", nil /* baseKey */, roachpb.NormalUserPriority, hlc.Timestamp{WallTime: 5e9}, 0, 0) + require.NoError(t, storage.MVCCPut(ctx, rw, nil, roachpb.Key("b"), hlc.Timestamp{WallTime: 2e9}, localTS, roachpb.MakeValueFromString("b2"), nil)) + require.NoError(t, storage.MVCCPut(ctx, rw, nil, roachpb.Key("c"), hlc.Timestamp{WallTime: 4e9}, localTS, roachpb.MakeValueFromString("c4"), nil)) + require.NoError(t, storage.MVCCPut(ctx, rw, nil, roachpb.Key("d"), hlc.Timestamp{WallTime: 2e9}, localTS, roachpb.MakeValueFromString("d2"), nil)) + require.NoError(t, storage.MVCCDelete(ctx, rw, nil, roachpb.Key("d"), hlc.Timestamp{WallTime: 3e9}, localTS, nil)) + require.NoError(t, storage.MVCCPut(ctx, rw, nil, roachpb.Key("i"), hlc.Timestamp{WallTime: 5e9}, localTS, roachpb.MakeValueFromString("i5"), &txn)) + require.NoError(t, storage.ExperimentalMVCCDeleteRangeUsingTombstone(ctx, rw, nil, roachpb.Key("f"), roachpb.Key("h"), hlc.Timestamp{WallTime: 3e9}, localTS, nil, nil, 0)) + require.NoError(t, storage.ExperimentalMVCCDeleteRangeUsingTombstone(ctx, rw, nil, roachpb.Key("Z"), roachpb.Key("a"), hlc.Timestamp{WallTime: 100e9}, localTS, nil, nil, 0)) + require.NoError(t, storage.ExperimentalMVCCDeleteRangeUsingTombstone(ctx, rw, nil, roachpb.Key("z"), roachpb.Key("|"), hlc.Timestamp{WallTime: 100e9}, localTS, nil, nil, 0)) } now := hlc.ClockTimestamp{Logical: 9} + rangeStart, rangeEnd := roachpb.Key("a"), roachpb.Key("z") testcases := map[string]struct { start string @@ -68,54 +78,68 @@ func TestDeleteRangeTombstone(t *testing.T) { expectErr interface{} // error type, substring, or true (any) }{ "above points succeed": { - start: "a", - end: "f", - ts: 10, - expectErr: nil, + start: "a", + end: "f", + ts: 10e9, }, "above range tombstone succeed": { start: "f", end: "h", - ts: 10, + ts: 10e9, expectErr: nil, }, + "merging succeeds": { + start: "e", + end: "f", + ts: 3e9, + }, + "adjacent to external LHS range key": { + start: "a", + end: "f", + ts: 100e9, + }, + "adjacent to external RHS range key": { + start: "q", + end: "z", + ts: 100e9, + }, "transaction errors": { start: "a", end: "f", - ts: 10, + ts: 10e9, txn: true, - expectErr: batcheval.ErrTransactionUnsupported, + expectErr: ErrTransactionUnsupported, }, "inline errors": { start: "a", end: "f", - ts: 10, + ts: 10e9, inline: true, expectErr: "Inline can't be used with range tombstones", }, "returnKeys errors": { start: "a", end: "f", - ts: 10, + ts: 10e9, returnKeys: true, expectErr: "ReturnKeys can't be used with range tombstones", }, "intent errors with WriteIntentError": { start: "i", end: "j", - ts: 10, + ts: 10e9, expectErr: &roachpb.WriteIntentError{}, }, "below point errors with WriteTooOldError": { start: "a", end: "d", - ts: 1, + ts: 1e9, expectErr: &roachpb.WriteTooOldError{}, }, "below range tombstone errors with WriteTooOldError": { start: "f", end: "h", - ts: 1, + ts: 1e9, expectErr: &roachpb.WriteTooOldError{}, }, } @@ -134,32 +158,52 @@ func TestDeleteRangeTombstone(t *testing.T) { Timestamp: hlc.Timestamp{WallTime: tc.ts}, } - var txn *roachpb.Transaction + // Prepare the request and environment. + evalCtx := &MockEvalCtx{ + ClusterSettings: st, + Desc: &roachpb.RangeDescriptor{ + StartKey: roachpb.RKey(rangeStart), + EndKey: roachpb.RKey(rangeEnd), + }, + } + + h := roachpb.Header{ + Timestamp: rangeKey.Timestamp, + } if tc.txn { - tx := roachpb.MakeTransaction("txn", nil /* baseKey */, roachpb.NormalUserPriority, rangeKey.Timestamp, 0, 0) - txn = &tx + txn := roachpb.MakeTransaction("txn", nil /* baseKey */, roachpb.NormalUserPriority, rangeKey.Timestamp, 0, 0) + h.Txn = &txn } + req := &roachpb.DeleteRangeRequest{ + RequestHeader: roachpb.RequestHeader{ + Key: rangeKey.StartKey, + EndKey: rangeKey.EndKey, + }, + UseExperimentalRangeTombstone: true, + Inline: tc.inline, + ReturnKeys: tc.returnKeys, + } + + ms := computeStats(t, engine, rangeStart, rangeEnd, rangeKey.Timestamp.WallTime) + + // Use a spanset batch to assert latching of all accesses. In particular, + // the additional seeks necessary to check for adjacent range keys that we + // may merge with (for stats purposes) which should not cross the range + // bounds. + var latchSpans, lockSpans spanset.SpanSet + declareKeysDeleteRange(evalCtx.Desc, &h, req, &latchSpans, &lockSpans, 0) + batch := spanset.NewBatchAt(engine.NewBatch(), &latchSpans, h.Timestamp) + defer batch.Close() + // Run the request. - var ms enginepb.MVCCStats resp := &roachpb.DeleteRangeResponse{} - _, err := batcheval.DeleteRange(ctx, engine, batcheval.CommandArgs{ - EvalCtx: (&batcheval.MockEvalCtx{ClusterSettings: st}).EvalContext(), + _, err := DeleteRange(ctx, batch, CommandArgs{ + EvalCtx: evalCtx.EvalContext(), Stats: &ms, Now: now, - Header: roachpb.Header{ - Timestamp: rangeKey.Timestamp, - Txn: txn, - }, - Args: &roachpb.DeleteRangeRequest{ - RequestHeader: roachpb.RequestHeader{ - Key: rangeKey.StartKey, - EndKey: rangeKey.EndKey, - }, - UseExperimentalRangeTombstone: true, - Inline: tc.inline, - ReturnKeys: tc.returnKeys, - }, + Header: h, + Args: req, }, resp) // Check the error. @@ -177,6 +221,7 @@ func TestDeleteRangeTombstone(t *testing.T) { return } require.NoError(t, err) + require.NoError(t, batch.Commit(true)) // Check that the range tombstone was written successfully. iter := engine.NewMVCCIterator(storage.MVCCKeyAndIntentsIterKind, storage.IterOptions{ @@ -215,7 +260,36 @@ func TestDeleteRangeTombstone(t *testing.T) { require.True(t, value.IsTombstone()) require.Equal(t, now, value.LocalTimestamp) - // TODO(erikgrinaker): This should test MVCC stats when implemented. + // Check that range tombstone stats were updated correctly. + require.Equal(t, computeStats(t, engine, rangeStart, rangeEnd, rangeKey.Timestamp.WallTime), ms) }) } } + +// computeStats computes MVCC stats for the given range. +// +// TODO(erikgrinaker): This, storage.computeStats(), and engineStats() should be +// moved into a testutils package, somehow avoiding import cycles with storage +// tests. +func computeStats( + t *testing.T, reader storage.Reader, from, to roachpb.Key, nowNanos int64, +) enginepb.MVCCStats { + t.Helper() + + if len(from) == 0 { + from = keys.LocalMax + } + if len(to) == 0 { + to = keys.MaxKey + } + + iter := reader.NewMVCCIterator(storage.MVCCKeyAndIntentsIterKind, storage.IterOptions{ + KeyTypes: storage.IterKeyTypePointsAndRanges, + LowerBound: from, + UpperBound: to, + }) + defer iter.Close() + ms, err := storage.ComputeStatsForRange(iter, from, to, nowNanos) + require.NoError(t, err) + return ms +} diff --git a/pkg/kv/kvserver/batcheval/cmd_end_transaction.go b/pkg/kv/kvserver/batcheval/cmd_end_transaction.go index 76f975c4b892..61e816d2fc30 100644 --- a/pkg/kv/kvserver/batcheval/cmd_end_transaction.go +++ b/pkg/kv/kvserver/batcheval/cmd_end_transaction.go @@ -169,6 +169,19 @@ func declareKeysEndTxn( latchSpans.AddNonMVCC(spanset.SpanReadWrite, roachpb.Span{ Key: keys.RangePriorReadSummaryKey(mt.LeftDesc.RangeID), }) + // Merges need to adjust MVCC stats for merged MVCC range tombstones + // that straddle the ranges, by peeking to the left and right of the RHS + // start key. Since Prevish() is imprecise, we must also ensure we don't + // go outside of the LHS bounds. + leftPeekBound := mt.RightDesc.StartKey.AsRawKey().Prevish(roachpb.PrevishKeyLength) + rightPeekBound := mt.RightDesc.StartKey.AsRawKey().Next() + if leftPeekBound.Compare(mt.LeftDesc.StartKey.AsRawKey()) < 0 { + leftPeekBound = mt.LeftDesc.StartKey.AsRawKey() + } + latchSpans.AddNonMVCC(spanset.SpanReadOnly, roachpb.Span{ + Key: leftPeekBound, + EndKey: rightPeekBound, + }) } } } @@ -896,9 +909,17 @@ func splitTrigger( "unable to determine whether right hand side of split is empty") } + rangeKeyDeltaMS, err := computeSplitRangeKeyStatsDelta( + batch, split.LeftDesc, split.RightDesc, ts.WallTime) + if err != nil { + return enginepb.MVCCStats{}, result.Result{}, errors.Wrap(err, + "unable to compute range key stats delta for RHS") + } + h := splitStatsHelperInput{ AbsPreSplitBothEstimated: rec.GetMVCCStats(), DeltaBatchEstimated: bothDeltaMS, + DeltaRangeKey: rangeKeyDeltaMS, AbsPostSplitLeftFn: makeScanStatsFn(ctx, batch, ts, &split.LeftDesc, "left hand side"), AbsPostSplitRightFn: makeScanStatsFn(ctx, batch, ts, &split.RightDesc, "right hand side"), ScanRightFirst: splitScansRightForStatsFirst || emptyRHS, @@ -1172,15 +1193,27 @@ func mergeTrigger( } } - // The stats for the merged range are the sum of the LHS and RHS stats, less - // the RHS's replicated range ID stats. The only replicated range ID keys we - // copy from the RHS are the keys in the abort span, and we've already - // accounted for those stats above. + // The stats for the merged range are the sum of the LHS and RHS stats + // adjusted for range key merges (which is the inverse of the split + // adjustment). The RHS's replicated range ID stats are subtracted -- the only + // replicated range ID keys we copy from the RHS are the keys in the abort + // span, and we've already accounted for those stats above. ms.Add(merge.RightMVCCStats) + msRangeKeyDelta, err := computeSplitRangeKeyStatsDelta( + batch, merge.LeftDesc, merge.RightDesc, ts.WallTime) + if err != nil { + return result.Result{}, err + } + ms.Subtract(msRangeKeyDelta) + { ridPrefix := keys.MakeRangeIDReplicatedPrefix(merge.RightDesc.RangeID) // NB: Range-ID local keys have no versions and no intents. - iter := batch.NewMVCCIterator(storage.MVCCKeyIterKind, storage.IterOptions{UpperBound: ridPrefix.PrefixEnd()}) + iter := batch.NewMVCCIterator(storage.MVCCKeyIterKind, storage.IterOptions{ + KeyTypes: storage.IterKeyTypePointsAndRanges, + LowerBound: ridPrefix, + UpperBound: ridPrefix.PrefixEnd(), + }) defer iter.Close() sysMS, err := iter.ComputeStats(ridPrefix, ridPrefix.PrefixEnd(), 0 /* nowNanos */) if err != nil { @@ -1226,6 +1259,78 @@ func changeReplicasTrigger( return pd } +// computeSplitRangeKeyStatsDelta computes the delta in MVCCStats caused by +// the splitting of range keys that straddle the range split point. The inverse +// applies during range merges. Consider a range key [a-foo)@1 split at cc: +// +// Before: [a-foo)@1 RangeKeyCount=1 RangeKeyBytes=15 +// LHS: [a-cc)@1 RangeKeyCount=1 RangeKeyBytes=14 +// RHS: [cc-foo)@1 RangeKeyCount=1 RangeKeyBytes=16 +// +// If the LHS is computed directly then the RHS is calculated as: +// +// RHS = Before - LHS = RangeKeyCount=0 RangeKeyBytes=1 +// +// This is clearly incorrect. This function determines the delta such that: +// +// RHS = Before - LHS + Delta = RangeKeyCount=1 RangeKeyBytes=16 +// +// The same calculation can be used for merges, since Pebble will already have +// merged the range keys into one when appropriate. +func computeSplitRangeKeyStatsDelta( + r storage.Reader, lhs, rhs roachpb.RangeDescriptor, nowNanos int64, +) (enginepb.MVCCStats, error) { + var delta enginepb.MVCCStats + delta.AgeTo(nowNanos) + + // NB: When called during a merge trigger (for the inverse adjustment), lhs + // will contain the descriptor for the full, merged range. We therefore have + // to use the rhs start key as the reference split point. We also have to make + // sure the bounds fall within the ranges, since Prevish is imprecise. + splitKey := rhs.StartKey.AsRawKey() + lowerBound := splitKey.Prevish(roachpb.PrevishKeyLength) + if lowerBound.Compare(lhs.StartKey.AsRawKey()) < 0 { + lowerBound = lhs.StartKey.AsRawKey() + } + upperBound := splitKey.Next() + + // Check for range keys that straddle the split point. + iter := r.NewMVCCIterator(storage.MVCCKeyIterKind, storage.IterOptions{ + KeyTypes: storage.IterKeyTypeRangesOnly, + LowerBound: lowerBound, + UpperBound: upperBound, + }) + defer iter.Close() + + iter.SeekGE(storage.MVCCKey{Key: splitKey}) + if ok, err := iter.Valid(); err != nil { + return enginepb.MVCCStats{}, err + } else if !ok { + return delta, nil + } else if iter.RangeBounds().Key.Equal(splitKey) { + return delta, nil + } + + // Calculate the RHS adjustment, which turns out to be equivalent to the stats + // contribution of the range key fragmentation. The naïve calculation would be + // rhs.EncodedSize() - (keyLen(rhs.EndKey) - keyLen(lhs.EndKey)) + // which simplifies to 2 * keyLen(rhs.StartKey) + tsLen(rhs.Timestamp). + for i, rkv := range iter.RangeKeys() { + keyBytes := int64(storage.EncodedMVCCTimestampSuffixLength(rkv.RangeKey.Timestamp)) + valBytes := int64(len(rkv.Value)) + if i == 0 { + delta.RangeKeyCount++ + keyBytes += 2 * int64(storage.EncodedMVCCKeyPrefixLength(splitKey)) + } + delta.RangeKeyBytes += keyBytes + delta.RangeValCount++ + delta.RangeValBytes += valBytes + delta.GCBytesAge += (keyBytes + valBytes) * (nowNanos/1e9 - rkv.RangeKey.Timestamp.WallTime/1e9) + } + + return delta, nil +} + // txnAutoGC controls whether Transaction entries are automatically gc'ed upon // EndTxn if they only have local locks (which can be resolved synchronously // with EndTxn). Certain tests become simpler with this being turned off. diff --git a/pkg/kv/kvserver/batcheval/cmd_end_transaction_test.go b/pkg/kv/kvserver/batcheval/cmd_end_transaction_test.go index 132a55b3daeb..fb9554aba031 100644 --- a/pkg/kv/kvserver/batcheval/cmd_end_transaction_test.go +++ b/pkg/kv/kvserver/batcheval/cmd_end_transaction_test.go @@ -1183,3 +1183,112 @@ func TestCommitWaitBeforeIntentResolutionIfCommitTrigger(t *testing.T) { } }) } + +func TestComputeSplitRangeKeyStatsDelta(t *testing.T) { + defer leaktest.AfterTest(t)() + defer log.Scope(t).Close(t) + + storage.DisableMetamorphicSimpleValueEncoding(t) + + emptyValue := func() storage.MVCCValue { + return storage.MVCCValue{} + } + + localTSValue := func(ts int) storage.MVCCValue { + var v storage.MVCCValue + v.MVCCValueHeader.LocalTimestamp = hlc.ClockTimestamp{WallTime: int64(ts)} + return v + } + + rangeKV := func(start, end string, ts int, value storage.MVCCValue) storage.MVCCRangeKeyValue { + valueRaw, err := storage.EncodeMVCCValue(value) + require.NoError(t, err) + return storage.MVCCRangeKeyValue{ + RangeKey: storage.MVCCRangeKey{ + StartKey: roachpb.Key(start), + EndKey: roachpb.Key(end), + Timestamp: hlc.Timestamp{WallTime: int64(ts)}, + }, + Value: valueRaw, + } + } + + const nowNanos = 10e9 + lhsDesc := roachpb.RangeDescriptor{StartKey: roachpb.RKey("a"), EndKey: roachpb.RKey("l")} + rhsDesc := roachpb.RangeDescriptor{StartKey: roachpb.RKey("l"), EndKey: roachpb.RKey("z").PrefixEnd()} + + testcases := map[string]struct { + rangeKVs []storage.MVCCRangeKeyValue + expect enginepb.MVCCStats + }{ + // Empty stats shouldn't do anything. + "empty": {nil, enginepb.MVCCStats{}}, + // a-z splits into a-l and l-z: simple +1 range key + "full": {[]storage.MVCCRangeKeyValue{rangeKV("a", "z", 1e9, emptyValue())}, enginepb.MVCCStats{ + RangeKeyCount: 1, + RangeKeyBytes: 13, + RangeValCount: 1, + GCBytesAge: 117, + }}, + // a-z with local timestamp splits into a-l and l-z: simple +1 range key with value + "full value": {[]storage.MVCCRangeKeyValue{rangeKV("a", "z", 2e9, localTSValue(1))}, enginepb.MVCCStats{ + RangeKeyCount: 1, + RangeKeyBytes: 13, + RangeValCount: 1, + RangeValBytes: 9, + GCBytesAge: 176, + }}, + // foo-zz splits into foo-l and l-zzzz: contribution is same as for short + // keys, because we have to adjust for the change in LHS end key which ends + // up only depending on the split key, and that doesn't change. + "different key length": {[]storage.MVCCRangeKeyValue{rangeKV("foo", "zzzz", 1e9, emptyValue())}, enginepb.MVCCStats{ + RangeKeyCount: 1, + RangeKeyBytes: 13, + RangeValCount: 1, + GCBytesAge: 117, + }}, + // Two abutting keys at different timestamps at the split point should not + // require a delta. + "no straddling, timestamp": {[]storage.MVCCRangeKeyValue{ + rangeKV("a", "l", 1e9, emptyValue()), + rangeKV("l", "z", 2e9, emptyValue()), + }, enginepb.MVCCStats{}}, + // Two abutting keys at different local timestamps (values) at the split + // point should not require a delta. + "no straddling, value": {[]storage.MVCCRangeKeyValue{ + rangeKV("a", "l", 2e9, localTSValue(1)), + rangeKV("l", "z", 2e9, localTSValue(2)), + }, enginepb.MVCCStats{}}, + // Multiple straddling keys. + "multiple": { + []storage.MVCCRangeKeyValue{ + rangeKV("a", "z", 2e9, localTSValue(1)), + rangeKV("k", "p", 3e9, localTSValue(2)), + rangeKV("foo", "m", 4e9, emptyValue()), + }, enginepb.MVCCStats{ + RangeKeyCount: 1, + RangeKeyBytes: 31, + RangeValCount: 3, + RangeValBytes: 18, + GCBytesAge: 348, + }}, + } + for name, tc := range testcases { + t.Run(name, func(t *testing.T) { + engine := storage.NewDefaultInMemForTesting() + defer engine.Close() + + for _, rkv := range tc.rangeKVs { + value, err := storage.DecodeMVCCValue(rkv.Value) + require.NoError(t, err) + require.NoError(t, engine.ExperimentalPutMVCCRangeKey(rkv.RangeKey, value)) + } + + tc.expect.LastUpdateNanos = nowNanos + + msDelta, err := computeSplitRangeKeyStatsDelta(engine, lhsDesc, rhsDesc, nowNanos) + require.NoError(t, err) + require.Equal(t, tc.expect, msDelta) + }) + } +} diff --git a/pkg/kv/kvserver/batcheval/cmd_truncate_log.go b/pkg/kv/kvserver/batcheval/cmd_truncate_log.go index c7b7f1cf83cc..cdc3a06d320e 100644 --- a/pkg/kv/kvserver/batcheval/cmd_truncate_log.go +++ b/pkg/kv/kvserver/batcheval/cmd_truncate_log.go @@ -117,7 +117,11 @@ func TruncateLog( // Note that any sideloaded payloads that may be removed by this truncation // are not tracked in the raft log delta. The delta will be adjusted below // raft. - iter := readWriter.NewMVCCIterator(storage.MVCCKeyIterKind, storage.IterOptions{UpperBound: end}) + iter := readWriter.NewMVCCIterator(storage.MVCCKeyIterKind, storage.IterOptions{ + KeyTypes: storage.IterKeyTypePointsAndRanges, + LowerBound: start, + UpperBound: end, + }) defer iter.Close() // We can pass zero as nowNanos because we're only interested in SysBytes. ms, err := iter.ComputeStats(start, end, 0 /* nowNanos */) diff --git a/pkg/kv/kvserver/batcheval/split_stats_helper.go b/pkg/kv/kvserver/batcheval/split_stats_helper.go index d238d0704428..b8ab3301585b 100644 --- a/pkg/kv/kvserver/batcheval/split_stats_helper.go +++ b/pkg/kv/kvserver/batcheval/split_stats_helper.go @@ -31,6 +31,9 @@ import "github.com/cockroachdb/cockroach/pkg/storage/enginepb" // practice, we obtain this by recomputing the stats using the corresponding // AbsPostSplit{Left,Right}Fn, and so we don't expect ContainsEstimates to be // set in them. The choice of which side to scan is controlled by ScanRightFirst. +// - DeltaRangeKey: the stats delta that must be added to the non-computed +// half's stats to account for the splitting of range keys straddling the split +// point. See computeSplitRangeKeyStatsDelta() for details. // // We are interested in computing from this the quantities // @@ -60,7 +63,7 @@ import "github.com/cockroachdb/cockroach/pkg/storage/enginepb" // The two unknown quantities can be expressed in terms of the known quantities // because // -// (1) AbsPreSplitBoth + DeltaBatch +// (1) AbsPreSplitBoth + DeltaBatch + DeltaRangeKey // - CombinedErrorDelta = AbsPostSplitLeft + AbsPostSplitRight // // In words, this corresponds to "all bytes are accounted for": from the initial @@ -88,14 +91,16 @@ import "github.com/cockroachdb/cockroach/pkg/storage/enginepb" // // For AbsPostSplitRight(), there are two cases. First, due to the identity // -// CombinedErrorDelta = AbsPreSplitBothEstimated + DeltaBatchEstimated +// CombinedErrorDelta = AbsPreSplitBothEstimated + DeltaBatchEstimated // -(AbsPostSplitLeft + AbsPostSplitRight) +// + DeltaRangeKey. // -// and the fact that the second line contains no estimates, we know that -// CombinedErrorDelta is zero if the first line contains no estimates. Using -// this, we can rearrange as +// and the fact that the second and third lines contain no estimates, we know +// that CombinedErrorDelta is zero if the first line contains no estimates. +// Using this, we can rearrange as // -// AbsPostSplitRight() = AbsPreSplitBoth + DeltaBatch - AbsPostSplitLeft. +// AbsPostSplitRight() = AbsPreSplitBoth + DeltaBatch - AbsPostSplitLeft +// + DeltaRangeKey. // // where all quantities on the right are known. If CombinedErrorDelta is // nonzero, we effectively have one more unknown in our linear system and we @@ -116,6 +121,7 @@ type splitStatsScanFn func() (enginepb.MVCCStats, error) type splitStatsHelperInput struct { AbsPreSplitBothEstimated enginepb.MVCCStats DeltaBatchEstimated enginepb.MVCCStats + DeltaRangeKey enginepb.MVCCStats // AbsPostSplitLeftFn returns the stats for the left hand side of the // split. AbsPostSplitLeftFn splitStatsScanFn @@ -160,6 +166,7 @@ func makeSplitStatsHelper(input splitStatsHelperInput) (splitStatsHelper, error) ms := h.in.AbsPreSplitBothEstimated ms.Subtract(absPostSplitFirst) ms.Add(h.in.DeltaBatchEstimated) + ms.Add(h.in.DeltaRangeKey) if h.in.ScanRightFirst { h.absPostSplitLeft = &ms } else { diff --git a/pkg/kv/kvserver/below_raft_protos_test.go b/pkg/kv/kvserver/below_raft_protos_test.go index 02c5e8027863..89181b99f900 100644 --- a/pkg/kv/kvserver/below_raft_protos_test.go +++ b/pkg/kv/kvserver/below_raft_protos_test.go @@ -78,7 +78,7 @@ var belowRaftGoldenProtos = map[reflect.Type]fixture{ return enginepb.NewPopulatedRangeAppliedState(r, false) }, emptySum: 615555020845646359, - populatedSum: 12125419916111069931, + populatedSum: 4888917721712214316, }, reflect.TypeOf(&raftpb.HardState{}): { populatedConstructor: func(r *rand.Rand) protoutil.Message { diff --git a/pkg/kv/kvserver/client_merge_test.go b/pkg/kv/kvserver/client_merge_test.go index 92158b2becd8..84fb1060b850 100644 --- a/pkg/kv/kvserver/client_merge_test.go +++ b/pkg/kv/kvserver/client_merge_test.go @@ -1308,15 +1308,13 @@ func TestStoreRangeMergeStats(t *testing.T) { base.TestClusterArgs{ ReplicationMode: base.ReplicationManual, }) - defer tc.Stopper().Stop(context.Background()) + defer tc.Stopper().Stop(ctx) scratch := tc.ScratchRange(t) store := tc.GetFirstStoreFromServer(t, 0) // Split the range. lhsDesc, rhsDesc, err := createSplitRanges(ctx, scratch, store) - if err != nil { - t.Fatal(err) - } + require.NoError(t, err) // Write some values left and right of the proposed split key. kvserver.WriteRandomDataToRange(t, store, lhsDesc.RangeID, scratchKey("aaa")) @@ -1327,30 +1325,18 @@ func TestStoreRangeMergeStats(t *testing.T) { // tests whether the merge code properly accounts for merging abort span // records for the same transaction. txn1 := kv.NewTxn(ctx, store.DB(), 0 /* gatewayNodeID */) - if err := txn1.Put(ctx, scratchKey("a-txn1"), "val"); err != nil { - t.Fatal(err) - } + require.NoError(t, txn1.Put(ctx, scratchKey("a-txn1"), "val")) txn2 := kv.NewTxn(ctx, store.DB(), 0 /* gatewayNodeID */) - if err := txn2.Put(ctx, scratchKey("c-txn2"), "val"); err != nil { - t.Fatal(err) - } + require.NoError(t, txn2.Put(ctx, scratchKey("c-txn2"), "val")) txn3 := kv.NewTxn(ctx, store.DB(), 0 /* gatewayNodeID */) - if err := txn3.Put(ctx, scratchKey("a-txn3"), "val"); err != nil { - t.Fatal(err) - } - if err := txn3.Put(ctx, scratchKey("c-txn3"), "val"); err != nil { - t.Fatal(err) - } + require.NoError(t, txn3.Put(ctx, scratchKey("a-txn3"), "val")) + require.NoError(t, txn3.Put(ctx, scratchKey("c-txn3"), "val")) hiPriTxn := kv.NewTxn(ctx, store.DB(), 0 /* gatewayNodeID */) hiPriTxn.TestingSetPriority(enginepb.MaxTxnPriority) for _, key := range []string{"a-txn1", "c-txn2", "a-txn3", "c-txn3"} { - if err := hiPriTxn.Put(ctx, scratchKey(key), "val"); err != nil { - t.Fatal(err) - } - } - if err := hiPriTxn.Commit(ctx); err != nil { - t.Fatal(err) + require.NoError(t, hiPriTxn.Put(ctx, scratchKey(key), "val")) } + require.NoError(t, hiPriTxn.Commit(ctx)) // Leave txn1-txn3 open so that their abort span records exist during the // merge below. @@ -1358,43 +1344,30 @@ func TestStoreRangeMergeStats(t *testing.T) { snap := store.Engine().NewSnapshot() defer snap.Close() msA, err := stateloader.Make(lhsDesc.RangeID).LoadMVCCStats(ctx, snap) - if err != nil { - t.Fatal(err) - } + require.NoError(t, err) msB, err := stateloader.Make(rhsDesc.RangeID).LoadMVCCStats(ctx, snap) - if err != nil { - t.Fatal(err) - } + require.NoError(t, err) // Stats should agree with recomputation. - if err := verifyRecomputedStats(snap, lhsDesc, msA, tc.Servers[0].Clock().Now().WallTime); err != nil { - t.Fatalf("failed to verify range A's stats before split: %+v", err) - } - if err := verifyRecomputedStats(snap, rhsDesc, msB, tc.Servers[0].Clock().Now().WallTime); err != nil { - t.Fatalf("failed to verify range B's stats before split: %+v", err) - } + assertRecomputedStats(t, "range A before split", snap, lhsDesc, msA, store.Clock().PhysicalNow()) + assertRecomputedStats(t, "range B before split", snap, rhsDesc, msB, store.Clock().PhysicalNow()) // Merge the b range back into the a range. args := adminMergeArgs(lhsDesc.StartKey.AsRawKey()) - if _, err := kv.SendWrapped(ctx, store.TestSender(), args); err != nil { - t.Fatal(err) - } + _, pErr := kv.SendWrapped(ctx, store.TestSender(), args) + require.NoError(t, pErr.GoError()) replMerged := store.LookupReplica(lhsDesc.StartKey) // Get the range stats for the merged range and verify. snap = store.Engine().NewSnapshot() defer snap.Close() msMerged, err := stateloader.Make(replMerged.RangeID).LoadMVCCStats(ctx, snap) - if err != nil { - t.Fatal(err) - } + require.NoError(t, err) // Merged stats should agree with recomputation. nowNanos := tc.Servers[0].Clock().Now().WallTime msMerged.AgeTo(nowNanos) - if err := verifyRecomputedStats(snap, replMerged.Desc(), msMerged, nowNanos); err != nil { - t.Errorf("failed to verify range's stats after merge: %+v", err) - } + assertRecomputedStats(t, "merged range", snap, replMerged.Desc(), msMerged, nowNanos) } func TestStoreRangeMergeInFlightTxns(t *testing.T) { diff --git a/pkg/kv/kvserver/client_split_test.go b/pkg/kv/kvserver/client_split_test.go index 266b6c2e42c9..f3f2b878feae 100644 --- a/pkg/kv/kvserver/client_split_test.go +++ b/pkg/kv/kvserver/client_split_test.go @@ -745,12 +745,13 @@ func TestStoreRangeSplitIdempotency(t *testing.T) { } } -// TestStoreRangeSplitStats starts by splitting the system keys from user-space -// keys and verifying that the user space side of the split (which is empty), -// has all zeros for stats. It then writes random data to the user space side, -// splits it halfway and verifies the two splits have stats exactly equaling -// the pre-split. -func TestStoreRangeSplitStats(t *testing.T) { +// TestStoreRangeSplitMergeStats starts by splitting the system keys from +// user-space keys and verifying that the user space side of the split (which is +// empty), has all zeros for stats. It then writes random data to the user space +// side, splits it halfway and verifies the two splits have appropriate stats. +// Finally, it merges the ranges back and asserts that the stats equal the +// original stats. +func TestStoreRangeSplitMergeStats(t *testing.T) { defer leaktest.AfterTest(t)() defer log.Scope(t).Close(t) @@ -768,61 +769,54 @@ func TestStoreRangeSplitStats(t *testing.T) { store, err := s.Stores().GetStore(s.GetFirstStoreID()) require.NoError(t, err) - start := s.Clock().Now() - // Split the range after the last table data key. keyPrefix := keys.SystemSQLCodec.TablePrefix(bootstrap.TestingUserDescID(0)) args := adminSplitArgs(keyPrefix) - if _, pErr := kv.SendWrapped(ctx, store.TestSender(), args); pErr != nil { - t.Fatal(pErr) - } + _, pErr := kv.SendWrapped(ctx, store.TestSender(), args) + require.NoError(t, pErr.GoError()) + // Verify empty range has empty stats. repl := store.LookupReplica(roachpb.RKey(keyPrefix)) - // NOTE that this value is expected to change over time, depending on what - // we store in the sys-local keyspace. Update it accordingly for this test. - empty := enginepb.MVCCStats{LastUpdateNanos: start.WallTime} - if err := verifyRangeStats(store.Engine(), repl.RangeID, empty); err != nil { - t.Fatal(err) - } + assertRangeStats(t, "empty stats", store.Engine(), repl.RangeID, enginepb.MVCCStats{}) // Write random data. - midKey := kvserver.WriteRandomDataToRange(t, store, repl.RangeID, keyPrefix) + splitKey := kvserver.WriteRandomDataToRange(t, store, repl.RangeID, keyPrefix) + + start := s.Clock().Now() // Get the range stats now that we have data. snap := store.Engine().NewSnapshot() defer snap.Close() ms, err := stateloader.Make(repl.RangeID).LoadMVCCStats(ctx, snap) - if err != nil { - t.Fatal(err) - } - if err := verifyRecomputedStats(snap, repl.Desc(), ms, start.WallTime); err != nil { - t.Fatalf("failed to verify range's stats before split: %+v", err) - } - if inMemMS := repl.GetMVCCStats(); inMemMS != ms { - t.Fatalf("in-memory and on-disk diverged:\n%+v\n!=\n%+v", inMemMS, ms) - } + require.NoError(t, err) + assertRecomputedStats(t, "before split", snap, repl.Desc(), ms, start.WallTime) + require.Equal(t, repl.GetMVCCStats(), ms, "in-memory and on-disk stats diverge") // Split the range at approximate halfway point. - args = adminSplitArgs(midKey) - if _, pErr := kv.SendWrappedWith(ctx, store.TestSender(), roachpb.Header{ - RangeID: repl.RangeID, - }, args); pErr != nil { - t.Fatal(pErr) - } + _, pErr = kv.SendWrapped(ctx, store.TestSender(), adminSplitArgs(splitKey)) + require.NoError(t, pErr.GoError()) snap = store.Engine().NewSnapshot() defer snap.Close() msLeft, err := stateloader.Make(repl.RangeID).LoadMVCCStats(ctx, snap) - if err != nil { - t.Fatal(err) - } - replRight := store.LookupReplica(midKey) + require.NoError(t, err) + replRight := store.LookupReplica(splitKey) msRight, err := stateloader.Make(replRight.RangeID).LoadMVCCStats(ctx, snap) - if err != nil { - t.Fatal(err) - } + require.NoError(t, err) + + // Stats should both have the new timestamp. + require.Less(t, start.WallTime, msLeft.LastUpdateNanos, "LHS stats have old timestamp") + require.Less(t, start.WallTime, msRight.LastUpdateNanos, "RHS stats have old timestamp") + + // We don't care about system data. + ms.SysBytes, ms.SysCount, ms.AbortSpanBytes = 0, 0, 0 - // The stats should be exactly equal when added. + // The point key stats should be exactly equal when added. + pointMS := ms + pointMS.LastUpdateNanos = 0 + pointMS.RangeKeyCount, pointMS.RangeKeyBytes = 0, 0 + pointMS.RangeValCount, pointMS.RangeValBytes = 0, 0 + pointMS.GCBytesAge = 0 expMS := enginepb.MVCCStats{ LiveBytes: msLeft.LiveBytes + msRight.LiveBytes, KeyBytes: msLeft.KeyBytes + msRight.KeyBytes, @@ -833,27 +827,35 @@ func TestStoreRangeSplitStats(t *testing.T) { ValCount: msLeft.ValCount + msRight.ValCount, IntentCount: msLeft.IntentCount + msRight.IntentCount, } - ms.SysBytes, ms.SysCount, ms.AbortSpanBytes = 0, 0, 0 - ms.LastUpdateNanos = 0 - if expMS != ms { - t.Errorf("expected left plus right ranges to equal original, but\n %+v\n+\n %+v\n!=\n %+v", msLeft, msRight, ms) - } + require.Equal(t, expMS, pointMS, "left plus right point key stats does not match original") - // Stats should both have the new timestamp. - if lTs := msLeft.LastUpdateNanos; lTs < start.WallTime { - t.Errorf("expected left range stats to have new timestamp, want %d, got %d", start.WallTime, lTs) - } - if rTs := msRight.LastUpdateNanos; rTs < start.WallTime { - t.Errorf("expected right range stats to have new timestamp, want %d, got %d", start.WallTime, rTs) - } + // The range key stats should be equal or greater. + require.GreaterOrEqual(t, msLeft.RangeKeyCount+msRight.RangeKeyCount, ms.RangeKeyCount) + require.GreaterOrEqual(t, msLeft.RangeKeyBytes+msRight.RangeKeyBytes, ms.RangeKeyBytes) + require.GreaterOrEqual(t, msLeft.RangeValCount+msRight.RangeValCount, ms.RangeValCount) + require.GreaterOrEqual(t, msLeft.RangeValBytes+msRight.RangeValBytes, ms.RangeValBytes) + require.GreaterOrEqual(t, msLeft.GCBytesAge+msRight.GCBytesAge, ms.GCBytesAge) // Stats should agree with recomputation. - if err := verifyRecomputedStats(snap, repl.Desc(), msLeft, s.Clock().PhysicalNow()); err != nil { - t.Fatalf("failed to verify left range's stats after split: %+v", err) - } - if err := verifyRecomputedStats(snap, replRight.Desc(), msRight, s.Clock().PhysicalNow()); err != nil { - t.Fatalf("failed to verify right range's stats after split: %+v", err) - } + assertRecomputedStats(t, "LHS after split", snap, repl.Desc(), msLeft, s.Clock().PhysicalNow()) + assertRecomputedStats(t, "RHS after split", snap, replRight.Desc(), msRight, s.Clock().PhysicalNow()) + + // Merge the ranges back together, and assert that the merged stats + // agree with the pre-split stats. + _, pErr = kv.SendWrapped(ctx, store.TestSender(), adminMergeArgs(repl.Desc().StartKey.AsRawKey())) + require.NoError(t, pErr.GoError()) + + repl = store.LookupReplica(roachpb.RKey(keyPrefix)) + snap = store.Engine().NewSnapshot() + defer snap.Close() + + msMerged, err := stateloader.Make(repl.RangeID).LoadMVCCStats(ctx, snap) + require.NoError(t, err) + assertRecomputedStats(t, "in-mem after merge", snap, repl.Desc(), msMerged, s.Clock().PhysicalNow()) + + msMerged.SysBytes, msMerged.SysCount, msMerged.AbortSpanBytes = 0, 0, 0 + ms.AgeTo(msMerged.LastUpdateNanos) + require.Equal(t, ms, msMerged, "post-merge stats differ from pre-split") } // RaftMessageHandlerInterceptor wraps a storage.RaftMessageHandler. It @@ -976,56 +978,41 @@ func TestStoreRangeSplitStatsWithMerges(t *testing.T) { // Split the range after the last table data key. keyPrefix := keys.SystemSQLCodec.TablePrefix(bootstrap.TestingUserDescID(0)) args := adminSplitArgs(keyPrefix) - if _, pErr := kv.SendWrapped(ctx, store.TestSender(), args); pErr != nil { - t.Fatal(pErr) - } + _, pErr := kv.SendWrapped(ctx, store.TestSender(), args) + require.NoError(t, pErr.GoError()) + // Verify empty range has empty stats. repl := store.LookupReplica(roachpb.RKey(keyPrefix)) // NOTE that this value is expected to change over time, depending on what // we store in the sys-local keyspace. Update it accordingly for this test. empty := enginepb.MVCCStats{LastUpdateNanos: start.WallTime} - if err := verifyRangeStats(store.Engine(), repl.RangeID, empty); err != nil { - t.Fatal(err) - } + assertRangeStats(t, "empty stats", store.Engine(), repl.RangeID, empty) // Write random TimeSeries data. midKey := writeRandomTimeSeriesDataToRange(t, store, repl.RangeID, keyPrefix) // Split the range at approximate halfway point. args = adminSplitArgs(midKey) - if _, pErr := kv.SendWrappedWith(ctx, store.TestSender(), roachpb.Header{ + _, pErr = kv.SendWrappedWith(ctx, store.TestSender(), roachpb.Header{ RangeID: repl.RangeID, - }, args); pErr != nil { - t.Fatal(pErr) - } + }, args) + require.NoError(t, pErr.GoError()) snap := store.Engine().NewSnapshot() defer snap.Close() msLeft, err := stateloader.Make(repl.RangeID).LoadMVCCStats(ctx, snap) - if err != nil { - t.Fatal(err) - } + require.NoError(t, err) replRight := store.LookupReplica(midKey) msRight, err := stateloader.Make(replRight.RangeID).LoadMVCCStats(ctx, snap) - if err != nil { - t.Fatal(err) - } + require.NoError(t, err) // Stats should both have the new timestamp. - if lTs := msLeft.LastUpdateNanos; lTs < start.WallTime { - t.Errorf("expected left range stats to have new timestamp, want %d, got %d", start.WallTime, lTs) - } - if rTs := msRight.LastUpdateNanos; rTs < start.WallTime { - t.Errorf("expected right range stats to have new timestamp, want %d, got %d", start.WallTime, rTs) - } + require.Less(t, start.WallTime, msLeft.LastUpdateNanos, "LHS stats have old timestamp") + require.Less(t, start.WallTime, msRight.LastUpdateNanos, "RHS stats have old timestamp") // Stats should agree with recomputation. - if err := verifyRecomputedStats(snap, repl.Desc(), msLeft, s.Clock().PhysicalNow()); err != nil { - t.Fatalf("failed to verify left range's stats after split: %+v", err) - } - if err := verifyRecomputedStats(snap, replRight.Desc(), msRight, s.Clock().PhysicalNow()); err != nil { - t.Fatalf("failed to verify right range's stats after split: %+v", err) - } + assertRecomputedStats(t, "LHS after split", snap, repl.Desc(), msLeft, s.Clock().PhysicalNow()) + assertRecomputedStats(t, "RHS after split", snap, replRight.Desc(), msRight, s.Clock().PhysicalNow()) } // fillRange writes keys with the given prefix and associated values diff --git a/pkg/kv/kvserver/client_test.go b/pkg/kv/kvserver/client_test.go index a300b9dcdc4b..e3060da7efb8 100644 --- a/pkg/kv/kvserver/client_test.go +++ b/pkg/kv/kvserver/client_test.go @@ -31,8 +31,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/storage/enginepb" "github.com/cockroachdb/cockroach/pkg/testutils" "github.com/cockroachdb/cockroach/pkg/util/hlc" - "github.com/cockroachdb/errors" - "github.com/kr/pretty" + "github.com/stretchr/testify/require" ) // getArgs returns a GetRequest and GetResponse pair addressed to @@ -144,38 +143,38 @@ func adminTransferLeaseArgs(key roachpb.Key, target roachpb.StoreID) roachpb.Req } } -func verifyRangeStats( - reader storage.Reader, rangeID roachpb.RangeID, expMS enginepb.MVCCStats, -) error { - ms, err := stateloader.Make(rangeID).LoadMVCCStats(context.Background(), reader) - if err != nil { - return err - } +func assertRangeStats( + t *testing.T, name string, r storage.Reader, rangeID roachpb.RangeID, expMS enginepb.MVCCStats, +) { + t.Helper() + + ms, err := stateloader.Make(rangeID).LoadMVCCStats(context.Background(), r) + require.NoError(t, err) // When used with a real wall clock these will not be the same, since it // takes time to load stats. expMS.AgeTo(ms.LastUpdateNanos) // Clear system counts as these are expected to vary. ms.SysBytes, ms.SysCount, ms.AbortSpanBytes = 0, 0, 0 - if ms != expMS { - return errors.Errorf("expected and actual stats differ:\n%s", pretty.Diff(expMS, ms)) - } - return nil + require.Equal(t, expMS, ms, "%s: stats differ", name) } -func verifyRecomputedStats( - reader storage.Reader, d *roachpb.RangeDescriptor, expMS enginepb.MVCCStats, nowNanos int64, -) error { - ms, err := rditer.ComputeStatsForRange(d, reader, nowNanos) - if err != nil { - return err - } +func assertRecomputedStats( + t *testing.T, + name string, + r storage.Reader, + desc *roachpb.RangeDescriptor, + expMS enginepb.MVCCStats, + nowNanos int64, +) { + t.Helper() + + ms, err := rditer.ComputeStatsForRange(desc, r, nowNanos) + require.NoError(t, err) + // When used with a real wall clock these will not be the same, since it // takes time to load stats. expMS.AgeTo(ms.LastUpdateNanos) - if expMS != ms { - return fmt.Errorf("expected range's stats to agree with recomputation: got\n%+v\nrecomputed\n%+v", expMS, ms) - } - return nil + require.Equal(t, expMS, ms, "%s: recomputed stats diverge", name) } func waitForTombstone( diff --git a/pkg/kv/kvserver/helpers_test.go b/pkg/kv/kvserver/helpers_test.go index 4d5868f21b83..651c944260de 100644 --- a/pkg/kv/kvserver/helpers_test.go +++ b/pkg/kv/kvserver/helpers_test.go @@ -18,7 +18,6 @@ package kvserver import ( "context" "fmt" - "math/rand" "testing" "time" @@ -45,6 +44,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/util/randutil" "github.com/cockroachdb/cockroach/pkg/util/timeutil" "github.com/cockroachdb/errors" + "github.com/stretchr/testify/require" "go.etcd.io/etcd/raft/v3" ) @@ -485,24 +485,40 @@ func (t *RaftTransport) GetCircuitBreaker( } func WriteRandomDataToRange( - t testing.TB, store *Store, rangeID roachpb.RangeID, keyPrefix []byte, -) (midpoint []byte) { - src := rand.New(rand.NewSource(0)) - for i := 0; i < 100; i++ { - key := append([]byte(nil), keyPrefix...) - key = append(key, randutil.RandBytes(src, int(src.Int31n(1<<7)))...) - val := randutil.RandBytes(src, int(src.Int31n(1<<8))) - pArgs := putArgs(key, val) - if _, pErr := kv.SendWrappedWith(context.Background(), store.TestSender(), roachpb.Header{ - RangeID: rangeID, - }, &pArgs); pErr != nil { - t.Fatal(pErr) + t testing.TB, store *Store, rangeID roachpb.RangeID, keyPrefix roachpb.Key, +) (splitKey []byte) { + t.Helper() + + ctx := context.Background() + src, _ := randutil.NewTestRand() + for i := 0; i < 1000; i++ { + var req roachpb.Request + if src.Float64() < 0.05 { + // Write some occasional range tombstones. + startKey := append(keyPrefix.Clone(), randutil.RandBytes(src, int(src.Int31n(1<<4)))...) + var endKey roachpb.Key + for startKey.Compare(endKey) >= 0 { + endKey = append(keyPrefix.Clone(), randutil.RandBytes(src, int(src.Int31n(1<<4)))...) + } + req = &roachpb.DeleteRangeRequest{ + RequestHeader: roachpb.RequestHeader{ + Key: startKey, + EndKey: endKey, + }, + UseExperimentalRangeTombstone: true, + } + } else { + // Write regular point keys. + key := append(keyPrefix.Clone(), randutil.RandBytes(src, int(src.Int31n(1<<4)))...) + val := randutil.RandBytes(src, int(src.Int31n(1<<8))) + pArgs := putArgs(key, val) + req = &pArgs } + _, pErr := kv.SendWrappedWith(ctx, store.TestSender(), roachpb.Header{RangeID: rangeID}, req) + require.NoError(t, pErr.GoError()) } - // Return approximate midway point ("Z" in string "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"). - midKey := append([]byte(nil), keyPrefix...) - midKey = append(midKey, []byte("Z")...) - return midKey + // Return a random non-empty split key. + return append(keyPrefix.Clone(), randutil.RandBytes(src, int(src.Int31n(1<<4))+1)...) } func WatchForDisappearingReplicas(t testing.TB, store *Store) { diff --git a/pkg/kv/kvserver/rditer/stats.go b/pkg/kv/kvserver/rditer/stats.go index 8cf5ce11c1b7..20ae426748bd 100644 --- a/pkg/kv/kvserver/rditer/stats.go +++ b/pkg/kv/kvserver/rditer/stats.go @@ -26,8 +26,11 @@ func ComputeStatsForRange( var err error for _, keyRange := range MakeReplicatedKeyRangesExceptLockTable(d) { func() { - iter := reader.NewMVCCIterator(storage.MVCCKeyAndIntentsIterKind, - storage.IterOptions{UpperBound: keyRange.End}) + iter := reader.NewMVCCIterator(storage.MVCCKeyAndIntentsIterKind, storage.IterOptions{ + KeyTypes: storage.IterKeyTypePointsAndRanges, + LowerBound: keyRange.Start, + UpperBound: keyRange.End, + }) defer iter.Close() var msDelta enginepb.MVCCStats diff --git a/pkg/kv/kvserver/replica_consistency.go b/pkg/kv/kvserver/replica_consistency.go index 58fb8794839e..861696ab2edb 100644 --- a/pkg/kv/kvserver/replica_consistency.go +++ b/pkg/kv/kvserver/replica_consistency.go @@ -598,7 +598,8 @@ func (*Replica) sha512( var timestampBuf []byte hasher := sha512.New() - visitor := func(unsafeKey storage.MVCCKey, unsafeValue []byte) error { + // TODO(erikgrinaker): add a range key visitor to hash range keys. + pointKeyVisitor := func(unsafeKey storage.MVCCKey, unsafeValue []byte) error { // Rate Limit the scan through the range if err := limiter.WaitN(ctx, int64(len(unsafeKey.Key)+len(unsafeValue))); err != nil { return err @@ -653,11 +654,13 @@ func (*Replica) sha512( // we will probably not have any interleaved intents so we could stop // using MVCCKeyAndIntentsIterKind and consider all locks here. for _, span := range rditer.MakeReplicatedKeyRangesExceptLockTable(&desc) { - iter := snap.NewMVCCIterator(storage.MVCCKeyAndIntentsIterKind, - storage.IterOptions{UpperBound: span.End}) - spanMS, err := storage.ComputeStatsForRange( - iter, span.Start, span.End, 0 /* nowNanos */, visitor, - ) + iter := snap.NewMVCCIterator(storage.MVCCKeyAndIntentsIterKind, storage.IterOptions{ + KeyTypes: storage.IterKeyTypePointsAndRanges, + LowerBound: span.Start, + UpperBound: span.End, + }) + spanMS, err := storage.ComputeStatsForRangeWithVisitors( + iter, span.Start, span.End, 0 /* nowNanos */, pointKeyVisitor, nil /* rangeKeyVisitor */) iter.Close() if err != nil { return nil, err diff --git a/pkg/kv/kvserver/replica_raft.go b/pkg/kv/kvserver/replica_raft.go index 8e0d981503ea..09bfa7a587da 100644 --- a/pkg/kv/kvserver/replica_raft.go +++ b/pkg/kv/kvserver/replica_raft.go @@ -2149,6 +2149,7 @@ func ComputeRaftLogSize( prefix := keys.RaftLogPrefix(rangeID) prefixEnd := prefix.PrefixEnd() iter := reader.NewMVCCIterator(storage.MVCCKeyIterKind, storage.IterOptions{ + KeyTypes: storage.IterKeyTypePointsAndRanges, LowerBound: prefix, UpperBound: prefixEnd, }) diff --git a/pkg/kv/kvserver/replica_test.go b/pkg/kv/kvserver/replica_test.go index 9390385c7afa..a6a0063110b4 100644 --- a/pkg/kv/kvserver/replica_test.go +++ b/pkg/kv/kvserver/replica_test.go @@ -11345,7 +11345,7 @@ func TestRangeStatsRequest(t *testing.T) { defer stopper.Stop(ctx) tc.Start(ctx, t, stopper) - keyPrefix := roachpb.RKey("dummy-prefix") + keyPrefix := roachpb.Key("dummy-prefix") // Write some random data to the range and verify that a RangeStatsRequest // returns the same MVCC stats as the replica's in-memory state. diff --git a/pkg/sql/logictest/testdata/logic_test/builtin_function_notenant b/pkg/sql/logictest/testdata/logic_test/builtin_function_notenant index a35c18a41267..479fbe2371ee 100644 --- a/pkg/sql/logictest/testdata/logic_test/builtin_function_notenant +++ b/pkg/sql/logictest/testdata/logic_test/builtin_function_notenant @@ -19,7 +19,9 @@ SELECT crdb_internal.check_consistency(true, '\x03', '\x02') query ITT SELECT range_id, status, regexp_replace(detail, '[0-9]+', '', 'g') FROM crdb_internal.check_consistency(true, '\x02', '\xffff') WHERE range_id = 1 ---- -1 RANGE_CONSISTENT stats: {ContainsEstimates: LastUpdateNanos: IntentAge: GCBytesAge: LiveBytes: LiveCount: KeyBytes: KeyCount: ValBytes: ValCount: IntentBytes: IntentCount: SeparatedIntentCount: SysBytes: SysCount: AbortSpanBytes:} +1 RANGE_CONSISTENT stats: {ContainsEstimates: LastUpdateNanos: IntentAge: GCBytesAge: LiveBytes: LiveCount: KeyBytes: KeyCount: ValBytes: ValCount: IntentBytes: IntentCount: SeparatedIntentCount: RangeKeyCount: RangeKeyBytes: RangeValCount: RangeValBytes: SysBytes: SysCount: AbortSpanBytes:} + + # Without explicit keys, scans all ranges (we don't test this too precisely to # avoid flaking the test when the range count changes, just want to know that diff --git a/pkg/storage/BUILD.bazel b/pkg/storage/BUILD.bazel index 9251142095c8..345bf5d44435 100644 --- a/pkg/storage/BUILD.bazel +++ b/pkg/storage/BUILD.bazel @@ -147,7 +147,6 @@ go_test( "//pkg/testutils/skip", "//pkg/testutils/zerofields", "//pkg/util", - "//pkg/util/caller", "//pkg/util/encoding", "//pkg/util/fileutil", "//pkg/util/hlc", diff --git a/pkg/storage/bench_test.go b/pkg/storage/bench_test.go index 99f051072eb1..0f060ca619ba 100644 --- a/pkg/storage/bench_test.go +++ b/pkg/storage/bench_test.go @@ -837,7 +837,11 @@ func runMVCCScan(ctx context.Context, b *testing.B, emk engineMaker, opts benchS // Pull all of the sstables into the RocksDB cache in order to make the // timings more stable. Otherwise, the first run will be penalized pulling // data into the cache while later runs will not. - iter := eng.NewMVCCIterator(MVCCKeyAndIntentsIterKind, IterOptions{UpperBound: roachpb.KeyMax}) + iter := eng.NewMVCCIterator(MVCCKeyAndIntentsIterKind, IterOptions{ + KeyTypes: IterKeyTypePointsAndRanges, + LowerBound: keys.LocalMax, + UpperBound: roachpb.KeyMax, + }) _, _ = iter.ComputeStats(keys.LocalMax, roachpb.KeyMax, 0) iter.Close() } @@ -1328,7 +1332,11 @@ func runMVCCComputeStats(ctx context.Context, b *testing.B, emk engineMaker, val var stats enginepb.MVCCStats var err error for i := 0; i < b.N; i++ { - iter := eng.NewMVCCIterator(MVCCKeyAndIntentsIterKind, IterOptions{UpperBound: roachpb.KeyMax}) + iter := eng.NewMVCCIterator(MVCCKeyAndIntentsIterKind, IterOptions{ + KeyTypes: IterKeyTypePointsAndRanges, + LowerBound: keys.LocalMax, + UpperBound: roachpb.KeyMax, + }) stats, err = iter.ComputeStats(keys.LocalMax, roachpb.KeyMax, 0) iter.Close() if err != nil { diff --git a/pkg/storage/engine.go b/pkg/storage/engine.go index 34363b4ee56f..ecac0cc248d8 100644 --- a/pkg/storage/engine.go +++ b/pkg/storage/engine.go @@ -252,11 +252,20 @@ type MVCCIterator interface { ValueProto(msg protoutil.Message) error // ComputeStats scans the underlying engine from start to end keys and // computes stats counters based on the values. This method is used after a - // range is split to recompute stats for each subrange. The start key is - // always adjusted to avoid counting local keys in the event stats are being - // recomputed for the first range (i.e. the one with start key == KeyMin). - // The nowNanos arg specifies the wall time in nanoseconds since the - // epoch and is used to compute the total age of all intents. + // range is split to recompute stats for each subrange. The nowNanos arg + // specifies the wall time in nanoseconds since the epoch and is used to + // compute the total age of intents and garbage. + // + // To properly account for intents and range keys, the iterator must be + // created with MVCCKeyAndIntentsIterKind and IterKeyTypePointsAndRanges, + // and the LowerBound and UpperBound must be set equal to start and end + // in order for range keys to be truncated to the bounds. + // + // TODO(erikgrinaker): This should be replaced by ComputeStatsForRange + // instead, which should set up its own iterator with appropriate options. + // This isn't currently done in order to do spanset assertions on it, but this + // could be better solved by checking the iterator bounds in NewMVCCIterator + // and requiring callers to set them appropriately. ComputeStats(start, end roachpb.Key, nowNanos int64) (enginepb.MVCCStats, error) // FindSplitKey finds a key from the given span such that the left side of // the split is roughly targetSize bytes. The returned key will never be diff --git a/pkg/storage/enginepb/mvcc.go b/pkg/storage/enginepb/mvcc.go index c22ca362c951..96aad88a2a07 100644 --- a/pkg/storage/enginepb/mvcc.go +++ b/pkg/storage/enginepb/mvcc.go @@ -90,15 +90,17 @@ func (t TxnMeta) Short() redact.SafeString { } // Total returns the range size as the sum of the key and value -// bytes. This includes all non-live keys and all versioned values. +// bytes. This includes all non-live keys and all versioned values, +// both for point and range keys. func (ms MVCCStats) Total() int64 { - return ms.KeyBytes + ms.ValBytes + return ms.KeyBytes + ms.ValBytes + ms.RangeKeyBytes + ms.RangeValBytes } // GCBytes is a convenience function which returns the number of gc bytes, -// that is the key and value bytes excluding the live bytes. +// that is the key and value bytes excluding the live bytes, both for +// point keys and range keys. func (ms MVCCStats) GCBytes() int64 { - return ms.KeyBytes + ms.ValBytes - ms.LiveBytes + return ms.Total() - ms.LiveBytes } // AvgIntentAge returns the average age of outstanding intents, @@ -169,6 +171,10 @@ func (ms *MVCCStats) Add(oms MVCCStats) { ms.ValCount += oms.ValCount ms.IntentCount += oms.IntentCount ms.SeparatedIntentCount += oms.SeparatedIntentCount + ms.RangeKeyCount += oms.RangeKeyCount + ms.RangeKeyBytes += oms.RangeKeyBytes + ms.RangeValCount += oms.RangeValCount + ms.RangeValBytes += oms.RangeValBytes ms.SysBytes += oms.SysBytes ms.SysCount += oms.SysCount ms.AbortSpanBytes += oms.AbortSpanBytes @@ -196,6 +202,10 @@ func (ms *MVCCStats) Subtract(oms MVCCStats) { ms.ValCount -= oms.ValCount ms.IntentCount -= oms.IntentCount ms.SeparatedIntentCount -= oms.SeparatedIntentCount + ms.RangeKeyCount -= oms.RangeKeyCount + ms.RangeKeyBytes -= oms.RangeKeyBytes + ms.RangeValCount -= oms.RangeValCount + ms.RangeValBytes -= oms.RangeValBytes ms.SysBytes -= oms.SysBytes ms.SysCount -= oms.SysCount ms.AbortSpanBytes -= oms.AbortSpanBytes diff --git a/pkg/storage/enginepb/mvcc.proto b/pkg/storage/enginepb/mvcc.proto index 93bda348d0af..2d9cc1fbc4d1 100644 --- a/pkg/storage/enginepb/mvcc.proto +++ b/pkg/storage/enginepb/mvcc.proto @@ -163,9 +163,9 @@ message MVCCStats { // intent_age is the cumulative age of the tracked intents. // See the comment on MVCCStats. optional sfixed64 intent_age = 2 [(gogoproto.nullable) = false]; - // gc_bytes_age is the cumulative age of the non-live data (i.e. - // data included in key_bytes and val_bytes, but not live_bytes). - // See the comment on MVCCStats. + // gc_bytes_age is the cumulative age of the non-live data (i.e. data included + // in key_bytes, val_bytes, and range_key_bytes, and range_val_bytes, but not + // live_bytes). See the comment on MVCCStats. optional sfixed64 gc_bytes_age = 3 [(gogoproto.nullable) = false, (gogoproto.customname) = "GCBytesAge"]; // live_bytes is the number of bytes stored in keys and values which can in // principle be read by means of a Scan or Get in the far future, including @@ -177,7 +177,7 @@ message MVCCStats { // live_count is the number of meta keys tracked under live_bytes. optional sfixed64 live_count = 5 [(gogoproto.nullable) = false]; // key_bytes is the number of bytes stored in all non-system - // keys, including live, meta, old, and deleted keys. + // point keys, including live, meta, old, and deleted keys. // Only meta keys really account for the "full" key; value // keys only for the timestamp suffix. optional sfixed64 key_bytes = 6 [(gogoproto.nullable) = false]; @@ -201,6 +201,31 @@ message MVCCStats { // intents, so mixed-version clusters with nodes preceding this knowledge // will always have a 0 value for this field. optional sfixed64 separated_intent_count = 16 [(gogoproto.nullable) = false]; + // range_key_count is the number of range keys tracked under range_key_bytes. + // Overlapping range keys may fragment into version stacks with the same + // start/end bounds, thus writing a single range key may cause range_key_count + // to increase by more than 1 due to fragmentation. Multiple range key + // versions with the same bounds count as a single range key, but are + // tracked under range_val_count. + // + // Range keys that straddle range split boundaries will become two separate + // logical range keys (one in each range), and merge back to one range key + // when the ranges merge. + // + // NB: Currently, all range keys are MVCC range tombstones. Therefore, these + // do not contribute to live_count nor live_bytes. + optional sfixed64 range_key_count = 17 [(gogoproto.nullable) = false]; + // range_key_bytes is the encoded size of range keys. Only the latest + // version in a range key fragment stack contributes the key bounds, + // but each version contributes the version timestamp. + optional sfixed64 range_key_bytes = 18 [(gogoproto.nullable) = false]; + // range_val_count is the number of range key values tracked under + // range_val_bytes, i.e. the number of range key versions. + optional sfixed64 range_val_count = 19 [(gogoproto.nullable) = false]; + // range_val_bytes is the number of bytes stored in range keys. Even tough + // all range keys are currently MVCC range tombstones with no value, the + // MVCCValueHeader contribution can be non-zero. + optional sfixed64 range_val_bytes = 20 [(gogoproto.nullable) = false]; // sys_bytes is the number of bytes stored in system-local kv-pairs. // This tracks the same quantity as (key_bytes + val_bytes), but diff --git a/pkg/storage/enginepb/mvcc3.proto b/pkg/storage/enginepb/mvcc3.proto index c10364435bc4..ec05dad30d65 100644 --- a/pkg/storage/enginepb/mvcc3.proto +++ b/pkg/storage/enginepb/mvcc3.proto @@ -191,6 +191,10 @@ message MVCCStatsDelta { sint64 intent_bytes = 10; sint64 intent_count = 11; sint64 separated_intent_count = 16; + sint64 range_key_count = 17; + sint64 range_key_bytes = 18; + sint64 range_val_count = 19; + sint64 range_val_bytes = 20; sint64 sys_bytes = 12; sint64 sys_count = 13; sint64 abort_span_bytes = 15; @@ -220,6 +224,10 @@ message MVCCPersistentStats { int64 intent_bytes = 10; int64 intent_count = 11; int64 separated_intent_count = 16; + int64 range_key_count = 17; + int64 range_key_bytes = 18; + int64 range_val_count = 19; + int64 range_val_bytes = 20; int64 sys_bytes = 12; int64 sys_count = 13; int64 abort_span_bytes = 15; diff --git a/pkg/storage/mvcc.go b/pkg/storage/mvcc.go index e4120f64cb63..deeeac6facec 100644 --- a/pkg/storage/mvcc.go +++ b/pkg/storage/mvcc.go @@ -167,7 +167,7 @@ func isAbortSpanKey(key roachpb.Key) bool { // updateStatsForInline updates stat counters for an inline value // (abort span entries for example). These are simpler as they don't -// involve intents or multiple versions. +// involve intents, multiple versions, or MVCC range tombstones. func updateStatsForInline( ms *enginepb.MVCCStats, key roachpb.Key, @@ -875,6 +875,7 @@ func MVCCGetAsTxn( // mvccGetMetadata returns or reconstructs the meta key for the given key. // A prefix scan using the iterator is performed, resulting in one of the // following successful outcomes: +// // 1) iterator finds nothing; returns (false, 0, 0, nil). // 2) iterator finds an explicit meta key; unmarshals and returns its size. // ok is set to true. @@ -884,37 +885,47 @@ func MVCCGetAsTxn( // that is the usual contribution of the meta key). The value size returned // will be zero, as there is no stored MVCCMetadata. // ok is set to true. -// The passed in MVCCMetadata must not be nil. Any MVCC range tombstones will be -// treated like point tombstones. +// 4) iterator finds an MVCC range tombstone above a value. In this case, +// metadata for a synthetic point tombstone is returned. +// +// The timestamp where the real point key last changed is also returned, if a +// real point key was found. This may differ from the metadata timestamp when a +// point key is covered by multiple MVCC range tombstones (in which case the +// point key disappeared at the _lowest_ range tombstone above it), or when a +// point tombstone is covered by a range tombstone (in which case the point key +// disappeared at the point tombstone). It is needed to correctly account for +// the GCBytesAge contribution of the key prefix, which is not affected by MVCC +// range tombstones, and would be incorrect if we used the synthetic point +// tombstone of the newest MVCC range tombstone instead. // -// If the supplied iterator is nil, no seek operation is performed. This is -// used by the Blind{Put,ConditionalPut} operations to avoid seeking when the -// metadata is known not to exist. +// The passed in MVCCMetadata must not be nil. If the supplied iterator is nil, +// no seek operation is performed. This is used by the Blind{Put,ConditionalPut} +// operations to avoid seeking when the metadata is known not to exist. func mvccGetMetadata( iter MVCCIterator, metaKey MVCCKey, meta *enginepb.MVCCMetadata, -) (ok bool, keyBytes, valBytes int64, err error) { +) (ok bool, keyBytes, valBytes int64, realKeyChanged hlc.Timestamp, err error) { if iter == nil { - return false, 0, 0, nil + return false, 0, 0, hlc.Timestamp{}, nil } iter.SeekGE(metaKey) if ok, err = iter.Valid(); !ok { - return false, 0, 0, err + return false, 0, 0, hlc.Timestamp{}, err } unsafeKey := iter.UnsafeKey() if !unsafeKey.Key.Equal(metaKey.Key) { - return false, 0, 0, nil + return false, 0, 0, hlc.Timestamp{}, nil } - hasPoint, hasRange := iter.HasPointAndRange() // Check for existing intent metadata. Intents will be emitted colocated with - // a covering range key when seeking to it, so we don't need to handle range - // keys here. + // a covering range key when seeking to it, and always located above range + // keys, so we don't need to check for range keys here. if hasPoint && !unsafeKey.IsValue() { if err := iter.ValueProto(meta); err != nil { - return false, 0, 0, err + return false, 0, 0, hlc.Timestamp{}, err } - return true, int64(unsafeKey.EncodedSize()), int64(len(iter.UnsafeValue())), nil + return true, int64(unsafeKey.EncodedSize()), int64(len(iter.UnsafeValue())), + meta.Timestamp.ToTimestamp(), nil } // Synthesize point key metadata. For values, the size of keys is always @@ -929,46 +940,56 @@ func mvccGetMetadata( iter.Next() if ok, err = iter.Valid(); err != nil { - return false, 0, 0, err + return false, 0, 0, hlc.Timestamp{}, err } else if ok { // NB: For !ok, hasPoint is already false. hasPoint, hasRange = iter.HasPointAndRange() unsafeKey = iter.UnsafeKey() } // If only a bare range tombstone was found at the seek key, synthesize - // point tombstone metadata for it. + // point tombstone metadata for it. realKeyChanged is empty since there + // was no real point key here. if !hasPoint || !unsafeKey.Key.Equal(metaKey.Key) { meta.Deleted = true meta.Timestamp = rkTimestamp.ToLegacyTimestamp() - return true, int64(EncodedMVCCKeyPrefixLength(metaKey.Key)), 0, nil - } - } - - // We're now on a point key. Check if it's covered by an MVCC range tombstone, - // and synthesize point tombstone metadata for it in that case. - if hasRange { - if rkTS := iter.RangeKeys()[0].RangeKey.Timestamp; unsafeKey.Timestamp.LessEq(rkTS) { - meta.Deleted = true - meta.Timestamp = rkTS.ToLegacyTimestamp() - return true, int64(EncodedMVCCKeyPrefixLength(metaKey.Key)), 0, nil + return true, int64(EncodedMVCCKeyPrefixLength(metaKey.Key)), 0, hlc.Timestamp{}, nil } } - // Synthesize metadata for a regular point key. + // We're now on a point key. Decode its value. var unsafeVal MVCCValue unsafeValRaw := iter.UnsafeValue() if unsafeVal, ok, err = tryDecodeSimpleMVCCValue(unsafeValRaw); !ok && err == nil { unsafeVal, err = decodeExtendedMVCCValue(unsafeValRaw) } if err != nil { - return false, 0, 0, err + return false, 0, 0, hlc.Timestamp{}, err + } + + // Check if the point key is covered by an MVCC range tombstone, and + // synthesize point tombstone metadata for it in that case. realKeyChanged is + // set to the timestamp where the point key ceased to exist -- either the + // lowest range tombstone above the key (not the highest which is used for + // metadata), or the point version's timestamp if it was a tombstone. + if hasRange { + rangeKeys := iter.RangeKeys() + if rkv, ok := firstRangeKeyAbove(rangeKeys, unsafeKey.Timestamp); ok { + meta.Deleted = true + meta.Timestamp = rangeKeys[0].RangeKey.Timestamp.ToLegacyTimestamp() + keyLastSeen := rkv.RangeKey.Timestamp + if unsafeVal.IsTombstone() { + keyLastSeen = unsafeKey.Timestamp + } + return true, int64(EncodedMVCCKeyPrefixLength(metaKey.Key)), 0, keyLastSeen, nil + } } + // Synthesize metadata for a regular point key. meta.ValBytes = int64(len(unsafeValRaw)) meta.Deleted = unsafeVal.IsTombstone() meta.Timestamp = unsafeKey.Timestamp.ToLegacyTimestamp() - return true, int64(EncodedMVCCKeyPrefixLength(metaKey.Key)), 0, nil + return true, int64(EncodedMVCCKeyPrefixLength(metaKey.Key)), 0, unsafeKey.Timestamp, nil } // putBuffer holds pointer data needed by mvccPutInternal. Bundling @@ -1346,6 +1367,9 @@ func replayTransactionalWrite( // and vice versa. valueFn can delete by returning nil. Returning // []byte{} will write an empty value, not delete. // +// The given iter must surface range keys to correctly account for +// MVCC range tombstones in MVCC stats. +// // Note that, when writing transactionally, the txn's timestamps // dictate the timestamp of the operation, and the timestamp parameter // is redundant. Specifically, the intent is written at the txn's @@ -1395,7 +1419,8 @@ func mvccPutInternal( } metaKey := MakeMVCCMetadataKey(key) - ok, origMetaKeySize, origMetaValSize, err := mvccGetMetadata(iter, metaKey, &buf.meta) + ok, origMetaKeySize, origMetaValSize, origRealKeyChanged, err := + mvccGetMetadata(iter, metaKey, &buf.meta) if err != nil { return err } @@ -1406,8 +1431,9 @@ func mvccPutInternal( return errors.Errorf("%q: put is inline=%t, but existing value is inline=%t", metaKey, putIsInline, buf.meta.IsInline()) } - // Handle inline put. No IntentHistory is required for inline writes - // as they aren't allowed within transactions. + // Handle inline put. No IntentHistory is required for inline writes as they + // aren't allowed within transactions. MVCC range tombstones cannot exist + // across them either. if putIsInline { if txn != nil { return errors.Errorf("%q: inline writes not allowed within transactions", metaKey) @@ -1467,6 +1493,8 @@ func mvccPutInternal( meta = &buf.meta metaTimestamp := meta.Timestamp.ToTimestamp() + // Handle intents. MVCC range tombstones should not require any special + // handling, since they cannot be transactional. if meta.Txn != nil { // There is an uncommitted write intent. if txn == nil || meta.Txn.ID != txn.ID { @@ -1584,15 +1612,17 @@ func mvccPutInternal( // read its size because its GCBytesAge contribution may change as we // move the intent above it. A similar phenomenon occurs in // MVCCResolveWriteIntent. + // + // TODO(erikgrinaker): Consider using mvccGet() here instead, but + // needs benchmarking. prevKey := oldVersionKey.Next() iter.SeekGE(prevKey) valid, err := iter.Valid() if err != nil { return err } else if valid { - // TODO(erikgrinaker): We don't handle MVCC range tombstones in MVCC - // stats yet, so if we land on a bare range key just step onto the - // next point key (if any). + // If we land on a bare range key, step onto the next key. This may + // be a point key at the same key position, or a different key. if hasPoint, hasRange := iter.HasPointAndRange(); hasRange && !hasPoint { iter.Next() if valid, err = iter.Valid(); err != nil { @@ -1606,13 +1636,20 @@ func mvccPutInternal( return errors.Errorf("expected an MVCC value key: %s", prevUnsafeKey) } - prevValRaw := iter.UnsafeValue() - prevVal, err := DecodeMVCCValue(prevValRaw) - if err != nil { - return err + // We must now be on a point key, but it may be covered by an + // existing MVCC range tombstone. If it isn't, account for it. + _, hasRange := iter.HasPointAndRange() + if !hasRange || iter.RangeKeys()[0].RangeKey.Timestamp.Less(prevUnsafeKey.Timestamp) { + prevValRaw := iter.UnsafeValue() + prevVal, err := DecodeMVCCValue(prevValRaw) + if err != nil { + return err + } + if prevVal.Value.IsPresent() { + prevIsValue = prevVal.Value.IsPresent() + prevValSize = int64(len(prevValRaw)) + } } - prevIsValue = prevVal.Value.IsPresent() - prevValSize = int64(len(prevValRaw)) } iter = nil // prevent accidental use below } @@ -1778,6 +1815,21 @@ func mvccPutInternal( // Update MVCC stats. if ms != nil { + // Adjust the stats metadata for MVCC range tombstones. The MVCC stats + // update only cares about changes to real point keys, but the above logic + // needs to care about MVCC range tombstones for conflict purposes. + // + // Specifically, if a real point key was covered by a range tombstone, we + // must set meta.Timestamp to the timestamp where the real point key was + // deleted (either by a point tombstone or the lowest range tombstone). If + // there was no real point key, meta must be nil. In all other cases, + // meta.Timestamp will already equal origRealKeyChanged. + if origRealKeyChanged.IsEmpty() { + meta = nil // no real point key was found + } + if meta != nil { + meta.Timestamp = origRealKeyChanged.ToLegacyTimestamp() + } ms.Add(updateStatsOnPut(key, prevIsValue, prevValSize, origMetaKeySize, origMetaValSize, metaKeySize, metaValSize, meta, newMeta)) } @@ -2123,6 +2175,8 @@ func MVCCMerge( // // If the underlying iterator encounters an intent with a timestamp in the span // (startTime, endTime], or any inline meta, this method will return an error. +// +// TODO(erikgrinaker): This needs to handle MVCC range tombstones (stats too). func MVCCClearTimeRange( _ context.Context, rw ReadWriter, @@ -2392,11 +2446,14 @@ func MVCCDeleteRange( // existing intents and return a WriteIntentError containing up to maxIntents // intents. // +// The leftPeekBound and rightPeekBound parameters are used when looking for +// range tombstones that we'll merge or overlap with. These are provided to +// prevent the command from reading outside of the CRDB range bounds and latch +// bounds. nil means no bounds. +// // This method is EXPERIMENTAL: range keys are under active development, and // have severe limitations including being ignored by all KV and MVCC APIs and // only being stored in memory. -// -// TODO(erikgrinaker): This needs MVCC stats handling. func ExperimentalMVCCDeleteRangeUsingTombstone( ctx context.Context, rw ReadWriter, @@ -2404,6 +2461,7 @@ func ExperimentalMVCCDeleteRangeUsingTombstone( startKey, endKey roachpb.Key, timestamp hlc.Timestamp, localTimestamp hlc.ClockTimestamp, + leftPeekBound, rightPeekBound roachpb.Key, maxIntents int64, ) error { // Validate the range key. We must do this first, to catch e.g. any bound violations. @@ -2412,6 +2470,17 @@ func ExperimentalMVCCDeleteRangeUsingTombstone( return err } + // Encode the value. + var value MVCCValue + value.LocalTimestamp = localTimestamp + if !value.LocalTimestampNeeded(timestamp) || !rw.ShouldWriteLocalTimestamps(ctx) { + value.LocalTimestamp = hlc.ClockTimestamp{} + } + valueRaw, err := EncodeMVCCValue(value) + if err != nil { + return err + } + // Check for any overlapping intents, and return them to be resolved. if intents, err := ScanIntents(ctx, rw, startKey, endKey, maxIntents, 0); err != nil { return err @@ -2419,22 +2488,31 @@ func ExperimentalMVCCDeleteRangeUsingTombstone( return &roachpb.WriteIntentError{Intents: intents} } - // Check for any conflicts, i.e. newer values. We use a time-bounded scan, and - // ignore intents since we checked for those above already. + // Forward the (empty) stats time to the deletion timestamp first, making the + // range tombstone's own GCBytesAge contributions 0 at this timestamp. + if ms != nil { + ms.Forward(timestamp.WallTime) + } + + // First, set up an iterator covering only the range key span itself, and scan + // it to find conflicts and update MVCC stats within it. // - // TODO(erikgrinaker): This will probably need optimization, particularly when - // we're deleting an entire CRDB range. + // TODO(erikgrinaker): This introduces an O(n) read penalty. We should + // optimize it, in particular by making this optional in cases where we're + // deleting an entire range and the stats can be computed without the scan. + // However, in that case we'll still have to do a time-bounded scan to check + // for conflicts. iter := rw.NewMVCCIterator(MVCCKeyIterKind, IterOptions{ - KeyTypes: IterKeyTypePointsAndRanges, - LowerBound: startKey, - UpperBound: endKey, - MinTimestampHint: timestamp, - MaxTimestampHint: hlc.MaxTimestamp, + KeyTypes: IterKeyTypePointsAndRanges, + LowerBound: startKey, + UpperBound: endKey, + RangeKeyMaskingBelow: timestamp, // lower point keys have already been accounted for }) defer iter.Close() - var prevRangeStart roachpb.Key - for iter.SeekGE(MVCCKey{Key: startKey}); ; iter.NextKey() { + iter.SeekGE(MVCCKey{Key: startKey}) + prevRangeEnd := startKey.Clone() + for { if ok, err := iter.Valid(); err != nil { return err } else if !ok { @@ -2442,7 +2520,9 @@ func ExperimentalMVCCDeleteRangeUsingTombstone( } hasPoint, hasRange := iter.HasPointAndRange() + if hasPoint { + // Check for conflict with newer point key. key := iter.UnsafeKey() if timestamp.LessEq(key.Timestamp) { return roachpb.NewWriteTooOldError(timestamp, key.Timestamp.Next(), key.Key.Clone()) @@ -2450,24 +2530,212 @@ func ExperimentalMVCCDeleteRangeUsingTombstone( if key.Timestamp.IsEmpty() { return errors.Errorf("can't write range tombstone across inline key %s", key) } + + // Update stats for the covered point key, if it was a live key. + vRaw := iter.UnsafeValue() + v, ok, err := tryDecodeSimpleMVCCValue(vRaw) + if !ok && err == nil { + v, err = decodeExtendedMVCCValue(vRaw) + } + if err != nil { + return err + } + + if ms != nil && !v.IsTombstone() { + ms.LiveCount-- + ms.LiveBytes -= int64(key.EncodedSize()) + int64(len(vRaw)) + } } + if hasRange { - if rangeStart := iter.RangeBounds().Key; !rangeStart.Equal(prevRangeStart) { - prevRangeStart = append(prevRangeStart[:0], rangeStart...) - if newest := iter.RangeKeys()[0].RangeKey; timestamp.LessEq(newest.Timestamp) { - return roachpb.NewWriteTooOldError(timestamp, newest.Timestamp.Next(), newest.StartKey) + // Check if we've encountered a new range key stack. + if rangeBounds := iter.RangeBounds(); !rangeBounds.EndKey.Equal(prevRangeEnd) { + newest := iter.RangeKeys()[0].RangeKey + + // Check for conflict with newer range key. + if timestamp.LessEq(newest.Timestamp) { + return roachpb.NewWriteTooOldError( + timestamp, newest.Timestamp.Next(), newest.StartKey.Clone()) + } + + if ms != nil { + // If the encountered range key does not abut the previous range key, + // we'll write a new range key fragment in the gap between them. It + // has no GCBytesAge contribution because it's written at now. + if !rangeBounds.Key.Equal(prevRangeEnd) { + ms.RangeKeyCount++ + ms.RangeKeyBytes += int64(EncodedMVCCTimestampSuffixLength(timestamp) + + EncodedMVCCKeyPrefixLength(prevRangeEnd) + + EncodedMVCCKeyPrefixLength(rangeBounds.Key)) + ms.RangeValCount++ + ms.RangeValBytes += int64(len(valueRaw)) + } + // This range key will create a new version in the current fragment + // stack. It will also move the GCBytesAge contribution of the key + // bounds up from the latest existing range key to this one. It has no + // GCBytesAge contribution of its own because it's written at now. + ms.RangeKeyBytes += int64(EncodedMVCCTimestampSuffixLength(timestamp)) + ms.RangeValCount++ + ms.RangeValBytes += int64(len(valueRaw)) + ms.GCBytesAge -= (timestamp.WallTime/1e9 - newest.Timestamp.WallTime/1e9) * + int64(EncodedMVCCKeyPrefixLength(rangeBounds.Key)+ + EncodedMVCCKeyPrefixLength(rangeBounds.EndKey)) } + + prevRangeEnd = append(prevRangeEnd[:0], rangeBounds.EndKey...) } } + + // If we hit a bare range key, it's possible that there's a point key on the + // same key as its start key. NextKey() would skip over this, so we take a + // normal step to look for it. + if hasRange && !hasPoint { + iter.Next() + } else { + iter.NextKey() + } } - // Write the tombstone. - var value MVCCValue - value.LocalTimestamp = localTimestamp - if !value.LocalTimestampNeeded(timestamp) || !rw.ShouldWriteLocalTimestamps(ctx) { - value.LocalTimestamp = hlc.ClockTimestamp{} + // Once we've iterated across the range key span, fill in the final gap + // between the previous existing range key fragment and the end of the range + // key if any. If no existing fragments were found during iteration above, + // this will be the entire new range key. + if ms != nil && !prevRangeEnd.Equal(endKey) { + ms.RangeKeyCount++ + ms.RangeKeyBytes += int64(EncodedMVCCTimestampSuffixLength(timestamp) + + EncodedMVCCKeyPrefixLength(prevRangeEnd) + EncodedMVCCKeyPrefixLength(endKey)) + ms.RangeValCount++ + ms.RangeValBytes += int64(len(valueRaw)) } + // Check if the range key will merge with or fragment any existing range keys + // at the bounds, and adjust stats accordingly. + // + // TODO(erikgrinaker): This code is a bit ugly. Also, similar logic will be + // needed elsewhere, e.g. in AddSSTable, ClearRange, RevertRange, MVCC garbage + // collection, CRDB range splits/merges, etc. This should eventually be + // cleaned up and consolidated, but it'll do for now. + // + // TODO(erikgrinaker): This could be merged into the scan above to avoid the + // additional seeks. But we do the simple and correct thing for now and leave + // optimizations for later. + if ms != nil { + // fragmentRangeKeys adjusts ms to fragment an existing range key stack + // at the given split point. + fragmentRangeKeys := func(rangeKeys []MVCCRangeKeyValue, splitKey roachpb.Key) { + for i, rkv := range rangeKeys { + keyBytes := int64(EncodedMVCCTimestampSuffixLength(rkv.RangeKey.Timestamp)) + valBytes := int64(len(rkv.Value)) + if i == 0 { + ms.RangeKeyCount++ + keyBytes += 2 * int64(EncodedMVCCKeyPrefixLength(splitKey)) + } + ms.RangeKeyBytes += keyBytes + ms.RangeValCount++ + ms.RangeValBytes += valBytes + ms.GCBytesAge += (keyBytes + valBytes) * (timestamp.WallTime/1e9 - rkv.RangeKey.Timestamp.WallTime/1e9) + } + } + + // maybeMergeRangeKeys adjusts ms to merge two abutting range key stacks if + // they have the same timestamps and values. It assumes the lhs end key + // equals the rhs start key, and that they are in descending order. + maybeMergeRangeKeys := func(lhs, rhs []MVCCRangeKeyValue) { + if len(lhs) != len(rhs) || len(lhs) == 0 { + return + } + for i, l := range lhs { + if !l.RangeKey.Timestamp.Equal(rhs[i].RangeKey.Timestamp) { + return + } else if !bytes.Equal(l.Value, rhs[i].Value) { + return + } + } + mergeKey := rhs[0].RangeKey.StartKey + for i, rkv := range lhs { + keyBytes := int64(EncodedMVCCTimestampSuffixLength(rkv.RangeKey.Timestamp)) + valBytes := int64(len(rkv.Value)) + if i == 0 { + ms.RangeKeyCount-- + keyBytes += 2 * int64(EncodedMVCCKeyPrefixLength(mergeKey)) + } + ms.RangeKeyBytes -= keyBytes + ms.RangeValCount-- + ms.RangeValBytes -= valBytes + ms.GCBytesAge -= (keyBytes + valBytes) * + (timestamp.WallTime/1e9 - rkv.RangeKey.Timestamp.WallTime/1e9) + } + } + + // Peek to the left. + if !leftPeekBound.Equal(startKey) { + iter := rw.NewMVCCIterator(MVCCKeyIterKind, IterOptions{ + KeyTypes: IterKeyTypeRangesOnly, + LowerBound: leftPeekBound, + UpperBound: startKey.Next(), + }) + defer iter.Close() + iter.SeekLT(MVCCKey{Key: startKey}) + if ok, err := iter.Valid(); err != nil { + return err + } else if ok { + switch iter.RangeBounds().EndKey.Compare(startKey) { + case 1: // fragment + fragmentRangeKeys(iter.RangeKeys(), startKey) + case 0: // merge + lhs := iter.RangeKeys() + for i := range lhs { + lhs[i] = lhs[i].Clone() + } + rhs := []MVCCRangeKeyValue{{RangeKey: rangeKey, Value: valueRaw}} + iter.SeekGE(MVCCKey{Key: startKey}) + if ok, err := iter.Valid(); err != nil { + return err + } else if ok { + rhs = append(rhs, iter.RangeKeys()...) + } + maybeMergeRangeKeys(lhs, rhs) + } + } + } + + // Peek to the right. + if rightPeekBound == nil { + rightPeekBound = keys.MaxKey + } + if !rightPeekBound.Equal(endKey) { + iter := rw.NewMVCCIterator(MVCCKeyIterKind, IterOptions{ + KeyTypes: IterKeyTypeRangesOnly, + LowerBound: endKey.Prevish(roachpb.PrevishKeyLength), + UpperBound: rightPeekBound, + }) + defer iter.Close() + iter.SeekGE(MVCCKey{Key: endKey}) + if ok, err := iter.Valid(); err != nil { + return err + } else if ok { + switch iter.RangeBounds().Key.Compare(endKey) { + case -1: // fragment + fragmentRangeKeys(iter.RangeKeys(), endKey) + case 0: // merge + lhs := []MVCCRangeKeyValue{{RangeKey: rangeKey, Value: valueRaw}} + rhs := iter.RangeKeys() + for i := range rhs { + rhs[i] = rhs[i].Clone() + } + iter.SeekLT(MVCCKey{Key: endKey}) + if ok, err := iter.Valid(); err != nil { + return err + } else if ok { + lhs = append(lhs, iter.RangeKeys()...) + } + maybeMergeRangeKeys(lhs, rhs) + } + } + } + } + + // Write the tombstone. return rw.ExperimentalPutMVCCRangeKey(rangeKey, value) } @@ -2913,7 +3181,10 @@ func MVCCResolveWriteIntent( return false, errors.Errorf("can't resolve range intent as point intent") } - iterAndBuf := GetBufUsingIter(rw.NewMVCCIterator(MVCCKeyAndIntentsIterKind, IterOptions{Prefix: true})) + iterAndBuf := GetBufUsingIter(rw.NewMVCCIterator(MVCCKeyAndIntentsIterKind, IterOptions{ + KeyTypes: IterKeyTypePointsAndRanges, + Prefix: true, + })) iterAndBuf.iter.SeekIntentGE(intent.Key, intent.Txn.ID) ok, err := mvccResolveWriteIntent(ctx, rw, iterAndBuf.iter, ms, intent, iterAndBuf.buf) // Using defer would be more convenient, but it is measurably slower. @@ -2932,11 +3203,13 @@ func MVCCResolveWriteIntent( // to SeekGE. type iterForKeyVersions interface { Valid() (bool, error) + HasPointAndRange() (bool, bool) SeekGE(key MVCCKey) Next() UnsafeKey() MVCCKey UnsafeValue() []byte ValueProto(msg protoutil.Message) error + RangeKeys() []MVCCRangeKeyValue } // separatedIntentAndVersionIter is an implementation of iterForKeyVersions @@ -3004,6 +3277,18 @@ func (s *separatedIntentAndVersionIter) Valid() (bool, error) { return s.engineIterValid, s.engineIterErr } +func (s *separatedIntentAndVersionIter) HasPointAndRange() (bool, bool) { + hasPoint, hasRange := s.mvccIter.HasPointAndRange() + if !s.atMVCCIter { + hasPoint = s.engineIterValid + } + return hasPoint, hasRange +} + +func (s *separatedIntentAndVersionIter) RangeKeys() []MVCCRangeKeyValue { + return s.mvccIter.RangeKeys() +} + func (s *separatedIntentAndVersionIter) SeekGE(key MVCCKey) { if !key.IsValue() { panic(errors.AssertionFailedf("SeekGE only permitted for values")) @@ -3057,6 +3342,9 @@ func mvccGetIntent( if ok, err := iter.Valid(); !ok { return false, 0, 0, err } + if hasPoint, _ := iter.HasPointAndRange(); !hasPoint { + return false, 0, 0, nil + } unsafeKey := iter.UnsafeKey() if !unsafeKey.Key.Equal(metaKey.Key) { return false, 0, 0, nil @@ -3167,6 +3455,7 @@ func (h singleDelOptimizationHelper) onAbortIntent() bool { // mvccResolveWriteIntent is the core logic for resolving an intent. // REQUIRES: iter is already seeked to intent.Key. +// REQUIRES: iter surfaces range keys via IterKeyTypePointsAndRanges. // Returns whether an intent was found and resolved, false otherwise. func mvccResolveWriteIntent( ctx context.Context, @@ -3330,9 +3619,20 @@ func mvccResolveWriteIntent( // Rewrite the versioned value at the new timestamp. iter.SeekGE(oldKey) - if valid, err := iter.Valid(); err != nil { + valid, err := iter.Valid() + if err != nil { return false, err - } else if !valid || !iter.UnsafeKey().Equal(oldKey) { + } + if hasPoint, hasRange := iter.HasPointAndRange(); hasRange && !hasPoint { + // If the seek lands on a bare range key, attempt to step to a point. + iter.Next() + if valid, err = iter.Valid(); err != nil { + return false, err + } else if valid { + valid, _ = iter.HasPointAndRange() + } + } + if !valid || !iter.UnsafeKey().Equal(oldKey) { return false, errors.Errorf("existing intent value missing: %s", oldKey) } oldValue, err := DecodeMVCCValue(iter.UnsafeValue()) @@ -3381,21 +3681,27 @@ func mvccResolveWriteIntent( // have to read that version's size. // // Look for the first real versioned key, i.e. the key just below - // the (old) meta's timestamp. + // the (old) meta's timestamp, and for any MVCC range tombstones. iter.Next() if valid, err := iter.Valid(); err != nil { return false, err - } else if valid && iter.UnsafeKey().Key.Equal(oldKey.Key) { - unsafeValRaw := iter.UnsafeValue() - prevVal, prevValOK, err := tryDecodeSimpleMVCCValue(unsafeValRaw) - if !prevValOK && err == nil { - prevVal, err = decodeExtendedMVCCValue(unsafeValRaw) - } - if err != nil { - return false, err + } else if valid { + if hasPoint, hasRange := iter.HasPointAndRange(); hasPoint { + if unsafeKey := iter.UnsafeKey(); unsafeKey.Key.Equal(oldKey.Key) { + if !hasRange || iter.RangeKeys()[0].RangeKey.Timestamp.Less(unsafeKey.Timestamp) { + unsafeValRaw := iter.UnsafeValue() + prevVal, prevValOK, err := tryDecodeSimpleMVCCValue(unsafeValRaw) + if !prevValOK && err == nil { + prevVal, err = decodeExtendedMVCCValue(unsafeValRaw) + } + if err != nil { + return false, err + } + prevIsValue = prevVal.Value.IsPresent() + prevValSize = int64(len(iter.UnsafeValue())) + } + } } - prevIsValue = prevVal.Value.IsPresent() - prevValSize = int64(len(iter.UnsafeValue())) } } @@ -3460,20 +3766,27 @@ func mvccResolveWriteIntent( Key: intent.Key, }) - nextKey := latestKey.Next() ok = false var unsafeNextKey MVCCKey var unsafeNextValueRaw []byte - if nextKey.IsValue() { + if nextKey := latestKey.Next(); nextKey.IsValue() { // The latestKey was not the smallest possible timestamp {WallTime: 0, // Logical: 1}. Practically, this is the only case that will occur in // production. iter.SeekGE(nextKey) - ok, err = iter.Valid() - if err != nil { + if ok, err = iter.Valid(); err != nil { return false, err } - if ok && iter.UnsafeKey().Key.Equal(latestKey.Key) { + // If the seek lands on a bare range key, attempt to step to a point. + if hasPoint, hasRange := iter.HasPointAndRange(); hasRange && !hasPoint { + iter.Next() + if ok, err = iter.Valid(); err != nil { + return false, err + } else if ok { + ok, _ = iter.HasPointAndRange() + } + } + if ok = ok && iter.UnsafeKey().Key.Equal(latestKey.Key); ok { unsafeNextKey = iter.UnsafeKey() if !unsafeNextKey.IsValue() { // Should never see an intent for this key since we seeked to a @@ -3481,8 +3794,15 @@ func mvccResolveWriteIntent( return false, errors.Errorf("expected an MVCC value key: %s", unsafeNextKey) } unsafeNextValueRaw = iter.UnsafeValue() - } else { - ok = false + // If a non-tombstone point key is covered by a range tombstone, then + // synthesize a point tombstone at the lowest range tombstone covering it. + // This is where the point key ceases to exist, contributing to GCBytesAge. + if len(unsafeNextValueRaw) > 0 { + if rk, found := firstRangeKeyAbove(iter.RangeKeys(), unsafeNextKey.Timestamp); found { + unsafeNextKey.Timestamp = rk.RangeKey.Timestamp + unsafeNextValueRaw = []byte{} + } + } } iter = nil // prevent accidental use below } @@ -3620,7 +3940,11 @@ func MVCCResolveWriteIntentRange( ltEnd, _ := keys.LockTableSingleKey(intent.EndKey, nil) engineIter := rw.NewEngineIterator(IterOptions{LowerBound: ltStart, UpperBound: ltEnd}) var mvccIter MVCCIterator - iterOpts := IterOptions{UpperBound: intent.EndKey} + iterOpts := IterOptions{ + KeyTypes: IterKeyTypePointsAndRanges, + LowerBound: intent.Key, + UpperBound: intent.EndKey, + } if rw.ConsistentIterators() { // Production code should always have consistent iterators. mvccIter = rw.NewMVCCIterator(MVCCKeyIterKind, iterOpts) @@ -3704,6 +4028,8 @@ func MVCCResolveWriteIntentRange( // not a mix of the two. This is to accommodate the implementation below // that creates an iterator with bounds that span from the first to last // key (in sorted order). +// +// TODO(erikgrinaker): This must handle MVCC range tombstones. func MVCCGarbageCollect( ctx context.Context, rw ReadWriter, @@ -3744,7 +4070,7 @@ func MVCCGarbageCollect( meta := &enginepb.MVCCMetadata{} for _, gcKey := range keys { encKey := MakeMVCCMetadataKey(gcKey.Key) - ok, metaKeySize, metaValSize, err := mvccGetMetadata(iter, encKey, meta) + ok, metaKeySize, metaValSize, _, err := mvccGetMetadata(iter, encKey, meta) if err != nil { return err } @@ -4025,28 +4351,43 @@ func willOverflow(a, b int64) bool { return math.MinInt64-b > a } -// ComputeStatsForRange scans the underlying engine from start to end keys and -// computes stats counters based on the values. This method is used after a -// range is split to recompute stats for each subrange. The nowNanos arg -// specifies the wall time in nanoseconds since the epoch and is used to compute -// the total age of all intents. +// ComputeStatsForRange scans the iterator from start to end keys and computes +// stats counters based on the values. This method is used after a range is +// split to recompute stats for each subrange. The nowNanos arg specifies the +// wall time in nanoseconds since the epoch and is used to compute the total age +// of all intents. +// +// To account for intents and range keys, the iterator must be created with +// MVCCKeyAndIntentsIterKind and IterKeyTypePointsAndRanges. To correctly +// account for range key truncation bounds, the iterator must have an +// appropriate UpperBound and LowerBound. // -// When optional callbacks are specified, they are invoked for each physical +// TODO(erikgrinaker): Consider removing the start,end parameters, forcing the +// caller to set appropriate bounds on the iterator instead. +func ComputeStatsForRange( + iter SimpleMVCCIterator, start, end roachpb.Key, nowNanos int64, +) (enginepb.MVCCStats, error) { + return ComputeStatsForRangeWithVisitors(iter, start, end, nowNanos, nil, nil) +} + +// ComputeStatsForRangeWithVisitors is like ComputeStatsForRange, but also +// takes a point and/or range key callback that is invoked for each physical // key-value pair (i.e. not for implicit meta records), and iteration is aborted -// on the first error returned from any of them. +// on the first error returned from either of them. // // Callbacks must copy any data they intend to hold on to. -func ComputeStatsForRange( +func ComputeStatsForRangeWithVisitors( iter SimpleMVCCIterator, start, end roachpb.Key, nowNanos int64, - callbacks ...func(MVCCKey, []byte) error, + pointKeyVisitor func(MVCCKey, []byte) error, + rangeKeyVisitor func(MVCCRangeKeyValue) error, ) (enginepb.MVCCStats, error) { var ms enginepb.MVCCStats // Only some callers are providing an MVCCIterator. The others don't have // any intents. var meta enginepb.MVCCMetadata - var prevKey []byte + var prevKey, prevRangeStart []byte first := false // Values start accruing GCBytesAge at the timestamp at which they @@ -4056,22 +4397,63 @@ func ComputeStatsForRange( // of the point in time at which the current key begins to age. var accrueGCAgeNanos int64 mvccEndKey := MakeMVCCMetadataKey(end) + rangeKeys := []MVCCRangeKeyValue{} - iter.SeekGE(MakeMVCCMetadataKey(start)) - for ; ; iter.Next() { - ok, err := iter.Valid() - if err != nil { + for iter.SeekGE(MakeMVCCMetadataKey(start)); ; iter.Next() { + if ok, err := iter.Valid(); err != nil { return ms, err - } - if !ok || !iter.UnsafeKey().Less(mvccEndKey) { + } else if !ok || !iter.UnsafeKey().Less(mvccEndKey) { break } + hasPoint, hasRange := iter.HasPointAndRange() + + if hasRange { + if rangeStart := iter.RangeBounds().Key; !rangeStart.Equal(prevRangeStart) { + prevRangeStart = append(prevRangeStart[:0], rangeStart...) + rangeKeys = iter.RangeKeys() + + for i, rkv := range rangeKeys { + // Only the top-most fragment contributes the key and its bounds, but + // all versions contribute timestamps and values. + // + // NB: Point keys always use 12 bytes for the key timestamp, even + // though it is actually variable-length, likely for historical + // reasons. But for range keys we may as well use the actual + // variable-length encoded size. + keyBytes := int64(EncodedMVCCTimestampSuffixLength(rkv.RangeKey.Timestamp)) + valBytes := int64(len(rkv.Value)) + if i == 0 { + ms.RangeKeyCount++ + keyBytes += int64(EncodedMVCCKeyPrefixLength(rkv.RangeKey.StartKey) + + EncodedMVCCKeyPrefixLength(rkv.RangeKey.EndKey)) + } + ms.RangeKeyBytes += keyBytes + ms.RangeValCount++ + ms.RangeValBytes += valBytes + ms.GCBytesAge += (keyBytes + valBytes) * + (nowNanos/1e9 - rkv.RangeKey.Timestamp.WallTime/1e9) + + if rangeKeyVisitor != nil { + if err := rangeKeyVisitor(rkv); err != nil { + return enginepb.MVCCStats{}, err + } + } + } + } + } else if len(rangeKeys) > 0 { + rangeKeys = rangeKeys[:0] + } + + if !hasPoint { + continue + } + unsafeKey := iter.UnsafeKey() unsafeValue := iter.UnsafeValue() - for _, f := range callbacks { - if err := f(unsafeKey, unsafeValue); err != nil { + if pointKeyVisitor != nil { + if err := pointKeyVisitor(unsafeKey, unsafeValue); err != nil { return enginepb.MVCCStats{}, err } } @@ -4098,6 +4480,16 @@ func ComputeStatsForRange( implicitMeta := isValue && !bytes.Equal(unsafeKey.Key, prevKey) prevKey = append(prevKey[:0], unsafeKey.Key...) + // Find the closest range tombstone above the point key. Range tombstones + // cannot exist above intents, and are undefined across inline values, so we + // only take them into account for versioned values. + var nextRangeTombstone hlc.Timestamp + if isValue { + if rkv, ok := firstRangeKeyAbove(rangeKeys, unsafeKey.Timestamp); ok { + nextRangeTombstone = rkv.RangeKey.Timestamp + } + } + if implicitMeta { // No MVCCMetadata entry for this series of keys. var isTombstone bool @@ -4141,12 +4533,16 @@ func ComputeStatsForRange( ms.AbortSpanBytes += totalBytes } } else { - if !meta.Deleted { - ms.LiveBytes += totalBytes - ms.LiveCount++ - } else { + if meta.Deleted { // First value is deleted, so it's GC'able; add meta key & value bytes to age stat. ms.GCBytesAge += totalBytes * (nowNanos/1e9 - meta.Timestamp.WallTime/1e9) + } else if nextRangeTombstone.IsSet() { + // First value was deleted by a range tombstone, so it accumulates GC age from + // the range tombstone's timestamp. + ms.GCBytesAge += totalBytes * (nowNanos/1e9 - nextRangeTombstone.WallTime/1e9) + } else { + ms.LiveBytes += totalBytes + ms.LiveCount++ } ms.KeyBytes += metaKeySize ms.ValBytes += metaValSize @@ -4166,11 +4562,15 @@ func ComputeStatsForRange( } else { if first { first = false - if !meta.Deleted { - ms.LiveBytes += totalBytes - } else { + if meta.Deleted { // First value is deleted, so it's GC'able; add key & value bytes to age stat. ms.GCBytesAge += totalBytes * (nowNanos/1e9 - meta.Timestamp.WallTime/1e9) + } else if nextRangeTombstone.IsSet() { + // First value was deleted by a range tombstone; add key & value bytes to + // age stat from range tombstone onwards. + ms.GCBytesAge += totalBytes * (nowNanos/1e9 - nextRangeTombstone.WallTime/1e9) + } else { + ms.LiveBytes += totalBytes } if meta.Txn != nil { ms.IntentBytes += totalBytes @@ -4204,6 +4604,10 @@ func ComputeStatsForRange( if isTombstone { // The contribution of the tombstone picks up GCByteAge from its own timestamp on. ms.GCBytesAge += totalBytes * (nowNanos/1e9 - unsafeKey.Timestamp.WallTime/1e9) + } else if nextRangeTombstone.IsSet() && nextRangeTombstone.WallTime < accrueGCAgeNanos { + // The kv pair was deleted by a range tombstone below the next + // version, so it accumulates garbage from the range tombstone. + ms.GCBytesAge += totalBytes * (nowNanos/1e9 - nextRangeTombstone.WallTime/1e9) } else { // The kv pair is an overwritten value, so it became non-live when the closest more // recent value was written. diff --git a/pkg/storage/mvcc_history_test.go b/pkg/storage/mvcc_history_test.go index 1847ca64cf46..c81642aab599 100644 --- a/pkg/storage/mvcc_history_test.go +++ b/pkg/storage/mvcc_history_test.go @@ -64,6 +64,7 @@ var sstIterVerify = util.ConstantWithMetamorphicTestBool("mvcc-histories-sst-ite // txn_ignore_seqs t= seqs=[-[,-...]] // // resolve_intent t= k= [status=] [clockWhilePending=[,]] +// resolve_intent_range t= k= end= [status=] // check_intent k= [none] // // cput [t=] [ts=[,]] [localTs=[,]] [resolve [status=]] k= v= [raw] [cond=] @@ -614,9 +615,9 @@ var commands = map[string]cmd{ "txn_step": {typTxnUpdate, cmdTxnStep}, "txn_update": {typTxnUpdate, cmdTxnUpdate}, - "resolve_intent": {typDataUpdate, cmdResolveIntent}, - // TODO(nvanbenschoten): test "resolve_intent_range". - "check_intent": {typReadOnly, cmdCheckIntent}, + "resolve_intent": {typDataUpdate, cmdResolveIntent}, + "resolve_intent_range": {typDataUpdate, cmdResolveIntentRange}, + "check_intent": {typReadOnly, cmdCheckIntent}, "clear": {typDataUpdate, cmdClear}, "clear_range": {typDataUpdate, cmdClearRange}, @@ -794,6 +795,17 @@ func cmdResolveIntent(e *evalCtx) error { return e.resolveIntent(e.tryWrapForIntentPrinting(e.engine), key, txn, status, clockWhilePending) } +func cmdResolveIntentRange(e *evalCtx) error { + txn := e.getTxn(mandatory) + start, end := e.getKeyRange() + status := e.getTxnStatus() + + intent := roachpb.MakeLockUpdate(txn, roachpb.Span{Key: start, EndKey: end}) + intent.Status = status + _, _, err := MVCCResolveWriteIntentRange(e.ctx, e.tryWrapForIntentPrinting(e.engine), e.ms, intent, 0) + return err +} + func (e *evalCtx) resolveIntent( rw ReadWriter, key roachpb.Key, @@ -962,7 +974,7 @@ func cmdDeleteRangeTombstone(e *evalCtx) error { localTs := hlc.ClockTimestamp(e.getTsWithName("localTs")) return e.withWriter("del_range_ts", func(rw ReadWriter) error { - return ExperimentalMVCCDeleteRangeUsingTombstone(e.ctx, rw, nil, key, endKey, ts, localTs, 0) + return ExperimentalMVCCDeleteRangeUsingTombstone(e.ctx, rw, e.ms, key, endKey, ts, localTs, nil, nil, 0) }) } @@ -1525,6 +1537,7 @@ func formatStats(ms enginepb.MVCCStats, delta bool) string { // TODO(erikgrinaker): Consider just reordering the MVCCStats struct fields // instead, which determines the order of MVCCStats.String(). order := []string{"key_count", "key_bytes", "val_count", "val_bytes", + "range_key_count", "range_key_bytes", "range_val_count", "range_val_bytes", "live_count", "live_bytes", "gc_bytes_age", "intent_count", "intent_bytes", "separated_intent_count", "intent_age"} sort.SliceStable(fields, func(i, j int) bool { diff --git a/pkg/storage/mvcc_incremental_iterator_test.go b/pkg/storage/mvcc_incremental_iterator_test.go index d8a8666c536d..0d8903492c1e 100644 --- a/pkg/storage/mvcc_incremental_iterator_test.go +++ b/pkg/storage/mvcc_incremental_iterator_test.go @@ -1348,7 +1348,11 @@ func runIncrementalBenchmark( // Pull all of the sstables into the cache. This // probably defeats a lot of the benefits of the // time-based optimization. - iter := eng.NewMVCCIterator(MVCCKeyAndIntentsIterKind, IterOptions{UpperBound: roachpb.KeyMax}) + iter := eng.NewMVCCIterator(MVCCKeyAndIntentsIterKind, IterOptions{ + KeyTypes: IterKeyTypePointsAndRanges, + LowerBound: roachpb.LocalMax, + UpperBound: roachpb.KeyMax, + }) _, _ = iter.ComputeStats(keys.LocalMax, roachpb.KeyMax, 0) iter.Close() } diff --git a/pkg/storage/mvcc_key.go b/pkg/storage/mvcc_key.go index 8f3db550897b..8bf9b39abb9f 100644 --- a/pkg/storage/mvcc_key.go +++ b/pkg/storage/mvcc_key.go @@ -13,6 +13,7 @@ package storage import ( "encoding/binary" "fmt" + "sort" "github.com/cockroachdb/cockroach/pkg/roachpb" "github.com/cockroachdb/cockroach/pkg/storage/enginepb" @@ -389,6 +390,19 @@ func (k MVCCRangeKey) Compare(o MVCCRangeKey) int { return k.EndKey.Compare(o.EndKey) } +// EncodedSize returns the encoded size of this range key. This does not +// accurately reflect the on-disk size of the key, due to Pebble range key +// stacking and fragmentation. +// +// NB: This calculation differs from MVCCKey in that MVCCKey.EncodedSize() +// incorrectly always uses 13 bytes for the timestamp while this method +// calculates the actual encoded size. +func (k MVCCRangeKey) EncodedSize() int { + return EncodedMVCCKeyPrefixLength(k.StartKey) + + EncodedMVCCKeyPrefixLength(k.EndKey) + + EncodedMVCCTimestampSuffixLength(k.Timestamp) +} + // String formats the range key. func (k MVCCRangeKey) String() string { s := roachpb.Span{Key: k.StartKey, EndKey: k.EndKey}.String() @@ -423,3 +437,19 @@ func (k MVCCRangeKey) Validate() (err error) { return nil } } + +// firstRangeKeyAbove does a binary search for the first range key at or above +// the given timestamp. It assumes the range keys are ordered in descending +// timestamp order, as returned by SimpleMVCCIterator.RangeKeys(). Returns false +// if no matching range key was found. +func firstRangeKeyAbove(rangeKeys []MVCCRangeKeyValue, ts hlc.Timestamp) (MVCCRangeKeyValue, bool) { + // This is kind of odd due to sort.Search() semantics: we do a binary search + // for the first range tombstone that's below the timestamp, then return the + // previous range tombstone if any. + if i := sort.Search(len(rangeKeys), func(i int) bool { + return rangeKeys[i].RangeKey.Timestamp.Less(ts) + }); i > 0 { + return rangeKeys[i-1], true + } + return MVCCRangeKeyValue{}, false +} diff --git a/pkg/storage/mvcc_key_test.go b/pkg/storage/mvcc_key_test.go index aac8f2847ae1..f99b1b5f1fa2 100644 --- a/pkg/storage/mvcc_key_test.go +++ b/pkg/storage/mvcc_key_test.go @@ -439,6 +439,31 @@ func TestMVCCRangeKeyCompare(t *testing.T) { } } +func TestMVCCRangeKeyEncodedSize(t *testing.T) { + defer leaktest.AfterTest(t)() + + testcases := map[string]struct { + rk MVCCRangeKey + expect int + }{ + "empty": {MVCCRangeKey{}, 2}, // sentinel byte for start and end + "only start": {MVCCRangeKey{StartKey: roachpb.Key("foo")}, 5}, + "only end": {MVCCRangeKey{EndKey: roachpb.Key("foo")}, 5}, + "only walltime": {MVCCRangeKey{Timestamp: hlc.Timestamp{WallTime: 1}}, 11}, + "only logical": {MVCCRangeKey{Timestamp: hlc.Timestamp{Logical: 1}}, 15}, + "all": {MVCCRangeKey{ + StartKey: roachpb.Key("start"), + EndKey: roachpb.Key("end"), + Timestamp: hlc.Timestamp{WallTime: 1, Logical: 1, Synthetic: true}, + }, 24}, + } + for name, tc := range testcases { + t.Run(name, func(t *testing.T) { + require.Equal(t, tc.expect, tc.rk.EncodedSize()) + }) + } +} + func TestMVCCRangeKeyValidate(t *testing.T) { defer leaktest.AfterTest(t)() @@ -473,6 +498,43 @@ func TestMVCCRangeKeyValidate(t *testing.T) { } } +func TestFirstRangeKeyAbove(t *testing.T) { + defer leaktest.AfterTest(t)() + + rangeKVs := []MVCCRangeKeyValue{ + rangeKV("a", "f", 6, MVCCValue{}), + rangeKV("a", "f", 4, MVCCValue{}), + rangeKV("a", "f", 3, MVCCValue{}), + rangeKV("a", "f", 1, MVCCValue{}), + } + + testcases := []struct { + ts int64 + expect int64 + }{ + {0, 1}, + {1, 1}, + {2, 3}, + {3, 3}, + {4, 4}, + {5, 6}, + {6, 6}, + {7, 0}, + } + for _, tc := range testcases { + t.Run(fmt.Sprintf("%d", tc.ts), func(t *testing.T) { + rkv, ok := firstRangeKeyAbove(rangeKVs, hlc.Timestamp{WallTime: tc.ts}) + if tc.expect == 0 { + require.False(t, ok) + require.Empty(t, rkv) + } else { + require.True(t, ok) + require.Equal(t, rangeKV("a", "f", int(tc.expect), MVCCValue{}), rkv) + } + }) + } +} + func pointKey(key string, ts int) MVCCKey { return MVCCKey{Key: roachpb.Key(key), Timestamp: wallTS(ts)} } diff --git a/pkg/storage/mvcc_stats_test.go b/pkg/storage/mvcc_stats_test.go index 57c98d66e61f..b6d311a7970b 100644 --- a/pkg/storage/mvcc_stats_test.go +++ b/pkg/storage/mvcc_stats_test.go @@ -58,7 +58,11 @@ func assertEqImpl( keyMin = keys.LocalMax keyMax = roachpb.KeyMax } - it := rw.NewMVCCIterator(MVCCKeyAndIntentsIterKind, IterOptions{UpperBound: keyMax}) + it := rw.NewMVCCIterator(MVCCKeyAndIntentsIterKind, IterOptions{ + KeyTypes: IterKeyTypePointsAndRanges, + LowerBound: keyMin, + UpperBound: keyMax, + }) defer it.Close() for _, mvccStatsTest := range mvccStatsTests { @@ -1624,6 +1628,8 @@ func (s *randomTest) step(t *testing.T) { } } +// TODO(erikgrinaker): Add ExperimentalMVCCDeleteRangeUsingTombstone operations +// once they are fully integrated with other MVCC operations. func TestMVCCStatsRandomized(t *testing.T) { defer leaktest.AfterTest(t)() defer log.Scope(t).Close(t) @@ -1797,7 +1803,11 @@ func TestMVCCComputeStatsError(t *testing.T) { t.Fatal(err) } - iter := engine.NewMVCCIterator(MVCCKeyAndIntentsIterKind, IterOptions{UpperBound: roachpb.KeyMax}) + iter := engine.NewMVCCIterator(MVCCKeyAndIntentsIterKind, IterOptions{ + KeyTypes: IterKeyTypePointsAndRanges, + LowerBound: roachpb.LocalMax, + UpperBound: roachpb.KeyMax, + }) defer iter.Close() for _, mvccStatsTest := range mvccStatsTests { t.Run(mvccStatsTest.name, func(t *testing.T) { diff --git a/pkg/storage/mvcc_test.go b/pkg/storage/mvcc_test.go index cca71031351f..9a75ef158b6d 100644 --- a/pkg/storage/mvcc_test.go +++ b/pkg/storage/mvcc_test.go @@ -31,7 +31,6 @@ import ( "github.com/cockroachdb/cockroach/pkg/testutils" "github.com/cockroachdb/cockroach/pkg/testutils/skip" "github.com/cockroachdb/cockroach/pkg/testutils/zerofields" - "github.com/cockroachdb/cockroach/pkg/util/caller" "github.com/cockroachdb/cockroach/pkg/util/encoding" "github.com/cockroachdb/cockroach/pkg/util/hlc" "github.com/cockroachdb/cockroach/pkg/util/leaktest" @@ -126,6 +125,10 @@ func TestMVCCStatsAddSubForward(t *testing.T) { ValCount: 1, IntentBytes: 1, IntentCount: 1, + RangeKeyCount: 1, + RangeKeyBytes: 1, + RangeValCount: 1, + RangeValBytes: 1, SeparatedIntentCount: 1, IntentAge: 1, GCBytesAge: 1, @@ -136,17 +139,7 @@ func TestMVCCStatsAddSubForward(t *testing.T) { LastUpdateNanos: 1, AbortSpanBytes: 1, } - if err := zerofields.NoZeroField(&goldMS); err != nil { - t.Fatal(err) // prevent rot as fields are added - } - - cmp := func(act, exp enginepb.MVCCStats) { - t.Helper() - f, l, _ := caller.Lookup(1) - if !reflect.DeepEqual(act, exp) { - t.Fatalf("%s:%d: wanted %+v back, got %+v", f, l, exp, act) - } - } + require.NoError(t, zerofields.NoZeroField(&goldMS)) ms := goldMS zeroWithLU := enginepb.MVCCStats{ @@ -155,10 +148,10 @@ func TestMVCCStatsAddSubForward(t *testing.T) { } ms.Subtract(goldMS) - cmp(ms, zeroWithLU) + require.Equal(t, zeroWithLU, ms) ms.Add(goldMS) - cmp(ms, goldMS) + require.Equal(t, goldMS, ms) // Double-add double-sub guards against mistaking `+=` for `=`. ms = zeroWithLU @@ -166,7 +159,7 @@ func TestMVCCStatsAddSubForward(t *testing.T) { ms.Add(goldMS) ms.Subtract(goldMS) ms.Subtract(goldMS) - cmp(ms, zeroWithLU) + require.Equal(t, zeroWithLU, ms) // Run some checks for Forward. goldDelta := enginepb.MVCCStats{ @@ -176,31 +169,27 @@ func TestMVCCStatsAddSubForward(t *testing.T) { } delta := goldDelta - for i, ns := range []int64{1, 1e9 - 1001, 1e9 - 1000, 1e9 - 1, 1e9, 1e9 + 1, 2e9 - 1} { + for _, ns := range []int64{1, 1e9 - 1001, 1e9 - 1000, 1e9 - 1, 1e9, 1e9 + 1, 2e9 - 1} { oldDelta := delta delta.AgeTo(ns) - if delta.LastUpdateNanos < ns { - t.Fatalf("%d: expected LastUpdateNanos < %d, got %d", i, ns, delta.LastUpdateNanos) - } + require.GreaterOrEqual(t, delta.LastUpdateNanos, ns, "LastUpdateNanos") shouldAge := ns/1e9-oldDelta.LastUpdateNanos/1e9 > 0 didAge := delta.IntentAge != oldDelta.IntentAge && delta.GCBytesAge != oldDelta.GCBytesAge - if shouldAge != didAge { - t.Fatalf("%d: should age: %t, but had\n%+v\nand now\n%+v", i, shouldAge, oldDelta, delta) - } + require.Equal(t, shouldAge, didAge) } expDelta := goldDelta expDelta.LastUpdateNanos = 2e9 - 1 expDelta.GCBytesAge = 42 expDelta.IntentAge = 11 - cmp(delta, expDelta) + require.Equal(t, expDelta, delta) delta.AgeTo(2e9) expDelta.LastUpdateNanos = 2e9 expDelta.GCBytesAge += 42 expDelta.IntentAge += 11 - cmp(delta, expDelta) + require.Equal(t, expDelta, delta) { // Verify that AgeTo can go backwards in time. @@ -212,13 +201,13 @@ func TestMVCCStatsAddSubForward(t *testing.T) { expDelta.LastUpdateNanos = 2e9 - 1 expDelta.GCBytesAge -= 42 expDelta.IntentAge -= 11 - cmp(tmpDelta, expDelta) + require.Equal(t, expDelta, tmpDelta) } delta.AgeTo(3e9 - 1) delta.Forward(5) // should be noop expDelta.LastUpdateNanos = 3e9 - 1 - cmp(delta, expDelta) + require.Equal(t, expDelta, delta) // Check that Add calls Forward appropriately. mss := []enginepb.MVCCStats{goldMS, goldMS} @@ -229,13 +218,13 @@ func TestMVCCStatsAddSubForward(t *testing.T) { expMS := goldMS expMS.Add(goldMS) expMS.LastUpdateNanos = 10e9 + 1 - expMS.IntentAge += 9 // from aging 9 ticks from 2E9-1 to 10E9+1 - expMS.GCBytesAge += 9 // ditto + expMS.IntentAge += 9 // from aging 9 ticks from 2E9-1 to 10E9+1 + expMS.GCBytesAge += 3 * 9 // ditto for i := range mss[:1] { ms := mss[(1+i)%2] ms.Add(mss[i]) - cmp(ms, expMS) + require.Equal(t, expMS, ms) } // Finally, check Forward with negative counts (can happen). @@ -246,9 +235,9 @@ func TestMVCCStatsAddSubForward(t *testing.T) { neg.AgeTo(2e9) exp.LastUpdateNanos = 2e9 - exp.GCBytesAge = -3 + exp.GCBytesAge = -7 exp.IntentAge = -3 - cmp(neg, exp) + require.Equal(t, exp, neg) } func TestMVCCGetNotExist(t *testing.T) { @@ -909,7 +898,10 @@ func TestMVCCInvalidateIterator(t *testing.T) { switch which { case "get": iterOptions.Prefix = true - case "scan", "findSplitKey", "computeStats": + case "computeStats": + iterOptions.KeyTypes = IterKeyTypePointsAndRanges + iterOptions.UpperBound = roachpb.KeyMax + case "scan", "findSplitKey": iterOptions.UpperBound = roachpb.KeyMax } @@ -2199,13 +2191,23 @@ func computeStats( t *testing.T, reader Reader, from, to roachpb.Key, nowNanos int64, ) enginepb.MVCCStats { t.Helper() - iter := reader.NewMVCCIterator(MVCCKeyAndIntentsIterKind, IterOptions{UpperBound: to}) - defer iter.Close() - s, err := ComputeStatsForRange(iter, from, to, nowNanos) - if err != nil { - t.Fatalf("%+v", err) + + if len(from) == 0 { + from = keys.LocalMax + } + if len(to) == 0 { + to = keys.MaxKey } - return s + + iter := reader.NewMVCCIterator(MVCCKeyAndIntentsIterKind, IterOptions{ + KeyTypes: IterKeyTypePointsAndRanges, + LowerBound: from, + UpperBound: to, + }) + defer iter.Close() + ms, err := ComputeStatsForRange(iter, from, to, nowNanos) + require.NoError(t, err) + return ms } // TestMVCCClearTimeRangeOnRandomData sets up mostly random KVs and then picks diff --git a/pkg/storage/sst_iterator.go b/pkg/storage/sst_iterator.go index afa737041672..a70ac7351556 100644 --- a/pkg/storage/sst_iterator.go +++ b/pkg/storage/sst_iterator.go @@ -96,6 +96,10 @@ func NewSSTIterator(file sstable.ReadableFile) (SimpleMVCCIterator, error) { // It's compatible with sstables written by `RocksDBSstFileWriter` and // Pebble's `sstable.Writer`, and assumes the keys use Cockroach's MVCC // format. +// +// TODO(erikgrinaker): When this gets support for iterating over range keys, all +// call sites that use ComputeStatsForRange() must be updated to enable range +// keys for the iterators. func NewMemSSTIterator(data []byte, verify bool) (SimpleMVCCIterator, error) { sst, err := sstable.NewReader(vfs.NewMemFile(data), sstable.ReaderOptions{ Comparer: EngineComparer, @@ -213,16 +217,18 @@ func (r *sstIterator) verifyValue() { } // HasPointAndRange implements SimpleMVCCIterator. +// +// TODO(erikgrinaker): implement range key support. func (r *sstIterator) HasPointAndRange() (bool, bool) { - panic("not implemented") + return true, false } // RangeBounds implements SimpleMVCCIterator. func (r *sstIterator) RangeBounds() roachpb.Span { - panic("not implemented") + return roachpb.Span{} } // RangeKeys implements SimpleMVCCIterator. func (r *sstIterator) RangeKeys() []MVCCRangeKeyValue { - panic("not implemented") + return []MVCCRangeKeyValue{} } diff --git a/pkg/storage/testdata/mvcc_histories/delete_range b/pkg/storage/testdata/mvcc_histories/delete_range index 4073e71af5ad..5687f627ea46 100644 --- a/pkg/storage/testdata/mvcc_histories/delete_range +++ b/pkg/storage/testdata/mvcc_histories/delete_range @@ -49,15 +49,17 @@ stats: key_count=8 key_bytes=152 val_count=10 val_bytes=64 live_count=6 live_byt ## The delete tombstone is placed alongside each of the previous values, at the newer timestamp. ## The deleted keys are returned. -run stat +run stats ok with t=A txn_begin ts=46 del_range k=b end=c returnKeys txn_remove ---- +>> del_range k=b end=c returnKeys t=A del_range: "b"-"c" -> deleted 2 key(s) del_range: returned "b" del_range: returned "b/123" +stats: key_bytes=+24 val_count=+2 val_bytes=+100 live_count=-2 live_bytes=-48 gc_bytes_age=+9288 intent_count=+2 intent_bytes=+24 separated_intent_count=+2 intent_age=+108 >> at end: data: "a"/45.000000000,0 -> / data: "a"/44.000000000,0 -> /BYTES/abc @@ -73,6 +75,7 @@ data: "c"/44.000000000,0 -> /BYTES/abc data: "c/123"/44.000000000,0 -> /BYTES/abc data: "d"/44.000000000,0 -> /BYTES/abc data: "d/123"/44.000000000,0 -> /BYTES/abc +stats: key_count=8 key_bytes=176 val_count=12 val_bytes=164 live_count=4 live_bytes=96 gc_bytes_age=13248 intent_count=2 intent_bytes=24 separated_intent_count=2 intent_age=108 # A limited non-txn that deletes a range of keys. diff --git a/pkg/storage/testdata/mvcc_histories/range_tombstone_conflicts b/pkg/storage/testdata/mvcc_histories/range_tombstone_conflicts index 2c8a397dcfd3..ddc288deea55 100644 --- a/pkg/storage/testdata/mvcc_histories/range_tombstone_conflicts +++ b/pkg/storage/testdata/mvcc_histories/range_tombstone_conflicts @@ -9,7 +9,7 @@ # 2 x # 1 d1 e1 f1 g1 1 1 # a b c d e f g h i j k -run ok +run stats ok put k=d ts=1 v=d1 put k=e ts=1 v=e1 del k=e ts=2 @@ -23,8 +23,28 @@ with t=A ts=7 txn_begin put k=g v=7 ---- +>> put k=d ts=1 v=d1 +stats: key_count=+1 key_bytes=+14 val_count=+1 val_bytes=+7 live_count=+1 live_bytes=+21 +>> put k=e ts=1 v=e1 +stats: key_count=+1 key_bytes=+14 val_count=+1 val_bytes=+7 live_count=+1 live_bytes=+21 +>> del k=e ts=2 +stats: key_bytes=+12 val_count=+1 live_count=-1 live_bytes=-21 gc_bytes_age=+3234 +>> put k=f ts=1 v=f1 +stats: key_count=+1 key_bytes=+14 val_count=+1 val_bytes=+7 live_count=+1 live_bytes=+21 +>> put k=g ts=1 v=g1 +stats: key_count=+1 key_bytes=+14 val_count=+1 val_bytes=+7 live_count=+1 live_bytes=+21 +>> increment k=i ts=1 inc: current value = 1 +stats: key_count=+1 key_bytes=+14 val_count=+1 val_bytes=+6 live_count=+1 live_bytes=+20 +>> increment k=j ts=1 inc: current value = 1 +stats: key_count=+1 key_bytes=+14 val_count=+1 val_bytes=+6 live_count=+1 live_bytes=+20 +>> del_range_ts k=a end=k ts=3 +stats: range_key_count=+1 range_key_bytes=+13 range_val_count=+1 live_count=-5 live_bytes=-103 gc_bytes_age=+11252 +>> del_range_ts k=c end=k ts=5 +stats: range_key_count=+1 range_key_bytes=+22 range_val_count=+2 gc_bytes_age=+2108 +>> put k=g v=7 t=A ts=7 +stats: key_bytes=+12 val_count=+1 val_bytes=+54 live_count=+1 live_bytes=+68 gc_bytes_age=-194 intent_count=+1 intent_bytes=+18 separated_intent_count=+1 intent_age=+93 >> at end: txn: "A" meta={id=00000000 key=/Min pri=0.00000000 epo=0 ts=7.000000000,0 min=0,0 seq=0} lock=true stat=PENDING rts=7.000000000,0 wto=false gul=0,0 rangekey: {a-c}/[3.000000000,0=/] @@ -38,11 +58,14 @@ data: "g"/7.000000000,0 -> /BYTES/7 data: "g"/1.000000000,0 -> /BYTES/g1 data: "i"/1.000000000,0 -> /INT/1 data: "j"/1.000000000,0 -> /INT/1 +stats: key_count=6 key_bytes=108 val_count=8 val_bytes=94 range_key_count=2 range_key_bytes=35 range_val_count=3 live_count=1 live_bytes=68 gc_bytes_age=16400 intent_count=1 intent_bytes=18 separated_intent_count=1 intent_age=93 # Inline value or tombstone below range tombstone should error. -run error +run stats error put k=b ts=0 v=b0 ---- +>> put k=b ts=0 v=b0 +stats: no change >> at end: rangekey: {a-c}/[3.000000000,0=/] rangekey: {c-k}/[5.000000000,0=/ 3.000000000,0=/] @@ -55,11 +78,14 @@ data: "g"/7.000000000,0 -> /BYTES/7 data: "g"/1.000000000,0 -> /BYTES/g1 data: "i"/1.000000000,0 -> /INT/1 data: "j"/1.000000000,0 -> /INT/1 +stats: key_count=6 key_bytes=108 val_count=8 val_bytes=94 range_key_count=2 range_key_bytes=35 range_val_count=3 live_count=1 live_bytes=68 gc_bytes_age=16400 intent_count=1 intent_bytes=18 separated_intent_count=1 intent_age=93 error: (*withstack.withStack:) "b"/0,0: put is inline=true, but existing value is inline=false -run error +run stats error del k=b ts=0 ---- +>> del k=b ts=0 +stats: no change >> at end: rangekey: {a-c}/[3.000000000,0=/] rangekey: {c-k}/[5.000000000,0=/ 3.000000000,0=/] @@ -72,12 +98,15 @@ data: "g"/7.000000000,0 -> /BYTES/7 data: "g"/1.000000000,0 -> /BYTES/g1 data: "i"/1.000000000,0 -> /INT/1 data: "j"/1.000000000,0 -> /INT/1 +stats: key_count=6 key_bytes=108 val_count=8 val_bytes=94 range_key_count=2 range_key_bytes=35 range_val_count=3 live_count=1 live_bytes=68 gc_bytes_age=16400 intent_count=1 intent_bytes=18 separated_intent_count=1 intent_age=93 error: (*withstack.withStack:) "b"/0,0: put is inline=true, but existing value is inline=false # DeleteRange at ts=5 should error with WriteTooOldError. -run error +run stats error del_range k=a end=f ts=5 ---- +>> del_range k=a end=f ts=5 +stats: no change >> at end: rangekey: {a-c}/[3.000000000,0=/] rangekey: {c-k}/[5.000000000,0=/ 3.000000000,0=/] @@ -90,17 +119,17 @@ data: "g"/7.000000000,0 -> /BYTES/7 data: "g"/1.000000000,0 -> /BYTES/g1 data: "i"/1.000000000,0 -> /INT/1 data: "j"/1.000000000,0 -> /INT/1 +stats: key_count=6 key_bytes=108 val_count=8 val_bytes=94 range_key_count=2 range_key_bytes=35 range_val_count=3 live_count=1 live_bytes=68 gc_bytes_age=16400 intent_count=1 intent_bytes=18 separated_intent_count=1 intent_age=93 error: (*roachpb.WriteTooOldError:) WriteTooOldError: write for key "c" at timestamp 5.000000000,0 too old; wrote at 5.000000000,1 # Point key below range tombstones should error, but is written anyway at a -# higher timestamp. -# -# TODO(erikgrinaker): These should test stats too, once range tombstones are -# correctly accounted for: "Stats are updated correctly, even when there are -# existing point values and tombstones below the range tombstones". -run error +# higher timestamp. Stats are updated correctly, even when there are +# existing point values and tombstones below the range tombstones. +run stats error put k=c ts=3 v=c3 ---- +>> put k=c ts=3 v=c3 +stats: key_count=+1 key_bytes=+14 val_count=+1 val_bytes=+7 live_count=+1 live_bytes=+21 >> at end: rangekey: {a-c}/[3.000000000,0=/] rangekey: {c-k}/[5.000000000,0=/ 3.000000000,0=/] @@ -114,11 +143,14 @@ data: "g"/7.000000000,0 -> /BYTES/7 data: "g"/1.000000000,0 -> /BYTES/g1 data: "i"/1.000000000,0 -> /INT/1 data: "j"/1.000000000,0 -> /INT/1 +stats: key_count=7 key_bytes=122 val_count=9 val_bytes=101 range_key_count=2 range_key_bytes=35 range_val_count=3 live_count=2 live_bytes=89 gc_bytes_age=16400 intent_count=1 intent_bytes=18 separated_intent_count=1 intent_age=93 error: (*roachpb.WriteTooOldError:) WriteTooOldError: write for key "c" at timestamp 3.000000000,0 too old; wrote at 5.000000000,1 -run error +run stats error put k=d ts=3 v=d3 ---- +>> put k=d ts=3 v=d3 +stats: key_bytes=+12 val_count=+1 val_bytes=+7 live_count=+1 live_bytes=+21 gc_bytes_age=-194 >> at end: rangekey: {a-c}/[3.000000000,0=/] rangekey: {c-k}/[5.000000000,0=/ 3.000000000,0=/] @@ -133,11 +165,14 @@ data: "g"/7.000000000,0 -> /BYTES/7 data: "g"/1.000000000,0 -> /BYTES/g1 data: "i"/1.000000000,0 -> /INT/1 data: "j"/1.000000000,0 -> /INT/1 +stats: key_count=7 key_bytes=134 val_count=10 val_bytes=108 range_key_count=2 range_key_bytes=35 range_val_count=3 live_count=3 live_bytes=110 gc_bytes_age=16206 intent_count=1 intent_bytes=18 separated_intent_count=1 intent_age=93 error: (*roachpb.WriteTooOldError:) WriteTooOldError: write for key "d" at timestamp 3.000000000,0 too old; wrote at 5.000000000,1 -run error +run stats error put k=e ts=3 v=e3 ---- +>> put k=e ts=3 v=e3 +stats: key_bytes=+12 val_count=+1 val_bytes=+7 live_count=+1 live_bytes=+21 gc_bytes_age=-196 >> at end: rangekey: {a-c}/[3.000000000,0=/] rangekey: {c-k}/[5.000000000,0=/ 3.000000000,0=/] @@ -153,12 +188,15 @@ data: "g"/7.000000000,0 -> /BYTES/7 data: "g"/1.000000000,0 -> /BYTES/g1 data: "i"/1.000000000,0 -> /INT/1 data: "j"/1.000000000,0 -> /INT/1 +stats: key_count=7 key_bytes=146 val_count=11 val_bytes=115 range_key_count=2 range_key_bytes=35 range_val_count=3 live_count=4 live_bytes=131 gc_bytes_age=16010 intent_count=1 intent_bytes=18 separated_intent_count=1 intent_age=93 error: (*roachpb.WriteTooOldError:) WriteTooOldError: write for key "e" at timestamp 3.000000000,0 too old; wrote at 5.000000000,1 # CPuts expecting a value covered by a range tombstone should error. -run error +run stats error cput k=f ts=7 v=f7 cond=f1 ---- +>> cput k=f ts=7 v=f7 cond=f1 +stats: no change >> at end: rangekey: {a-c}/[3.000000000,0=/] rangekey: {c-k}/[5.000000000,0=/ 3.000000000,0=/] @@ -174,14 +212,17 @@ data: "g"/7.000000000,0 -> /BYTES/7 data: "g"/1.000000000,0 -> /BYTES/g1 data: "i"/1.000000000,0 -> /INT/1 data: "j"/1.000000000,0 -> /INT/1 +stats: key_count=7 key_bytes=146 val_count=11 val_bytes=115 range_key_count=2 range_key_bytes=35 range_val_count=3 live_count=4 live_bytes=131 gc_bytes_age=16010 intent_count=1 intent_bytes=18 separated_intent_count=1 intent_age=93 error: (*roachpb.ConditionFailedError:) unexpected value: timestamp: # A CPut replay of an intent expecting a value covered by a range tombstone # should error because of the range tombstone covering it. -run error +run stats error with t=A ts=7 cput k=g v=g7 cond=g1 ---- +>> cput k=g v=g7 cond=g1 t=A ts=7 +stats: no change >> at end: rangekey: {a-c}/[3.000000000,0=/] rangekey: {c-k}/[5.000000000,0=/ 3.000000000,0=/] @@ -197,16 +238,19 @@ data: "g"/7.000000000,0 -> /BYTES/7 data: "g"/1.000000000,0 -> /BYTES/g1 data: "i"/1.000000000,0 -> /INT/1 data: "j"/1.000000000,0 -> /INT/1 +stats: key_count=7 key_bytes=146 val_count=11 val_bytes=115 range_key_count=2 range_key_bytes=35 range_val_count=3 live_count=4 live_bytes=131 gc_bytes_age=16010 intent_count=1 intent_bytes=18 separated_intent_count=1 intent_age=93 error: (*roachpb.ConditionFailedError:) unexpected value: timestamp: # A CPut replacing an existing but ignored intent expecting a value covered # by a range tombstone should error because of the range tombstone covering it. -run error +run stats error with t=A ts=7 txn_step txn_ignore_seqs seqs=0-1 cput k=g v=g7 cond=g1 ---- +>> cput k=g v=g7 cond=g1 t=A ts=7 +stats: no change >> at end: txn: "A" meta={id=00000000 key=/Min pri=0.00000000 epo=0 ts=7.000000000,0 min=0,0 seq=1} lock=true stat=PENDING rts=7.000000000,0 wto=false gul=0,0 isn=1 rangekey: {a-c}/[3.000000000,0=/] @@ -223,12 +267,15 @@ data: "g"/7.000000000,0 -> /BYTES/7 data: "g"/1.000000000,0 -> /BYTES/g1 data: "i"/1.000000000,0 -> /INT/1 data: "j"/1.000000000,0 -> /INT/1 +stats: key_count=7 key_bytes=146 val_count=11 val_bytes=115 range_key_count=2 range_key_bytes=35 range_val_count=3 live_count=4 live_bytes=131 gc_bytes_age=16010 intent_count=1 intent_bytes=18 separated_intent_count=1 intent_age=93 error: (*roachpb.ConditionFailedError:) unexpected value: timestamp: # An InitPut with failOnTombstones above a range tombstone should error. -run error +run stats error initput k=f ts=7 v=f7 failOnTombstones ---- +>> initput k=f ts=7 v=f7 failOnTombstones +stats: no change >> at end: rangekey: {a-c}/[3.000000000,0=/] rangekey: {c-k}/[5.000000000,0=/ 3.000000000,0=/] @@ -244,15 +291,16 @@ data: "g"/7.000000000,0 -> /BYTES/7 data: "g"/1.000000000,0 -> /BYTES/g1 data: "i"/1.000000000,0 -> /INT/1 data: "j"/1.000000000,0 -> /INT/1 +stats: key_count=7 key_bytes=146 val_count=11 val_bytes=115 range_key_count=2 range_key_bytes=35 range_val_count=3 live_count=4 live_bytes=131 gc_bytes_age=16010 intent_count=1 intent_bytes=18 separated_intent_count=1 intent_age=93 error: (*roachpb.ConditionFailedError:) unexpected value: timestamp: # An InitPut with a different value as an existing key should succeed when there's # a range tombstone covering the existing value. -# -# TODO(erikgrinaker): This should test stats too. -run ok +run stats ok initput k=f ts=7 v=f7 ---- +>> initput k=f ts=7 v=f7 +stats: key_bytes=+12 val_count=+1 val_bytes=+7 live_count=+1 live_bytes=+21 gc_bytes_age=-194 >> at end: rangekey: {a-c}/[3.000000000,0=/] rangekey: {c-k}/[5.000000000,0=/ 3.000000000,0=/] @@ -269,12 +317,15 @@ data: "g"/7.000000000,0 -> /BYTES/7 data: "g"/1.000000000,0 -> /BYTES/g1 data: "i"/1.000000000,0 -> /INT/1 data: "j"/1.000000000,0 -> /INT/1 +stats: key_count=7 key_bytes=158 val_count=12 val_bytes=122 range_key_count=2 range_key_bytes=35 range_val_count=3 live_count=5 live_bytes=152 gc_bytes_age=15816 intent_count=1 intent_bytes=18 separated_intent_count=1 intent_age=93 # An increment below a range tombstone should reset to 1 and write above it with -# a WriteTooOldError. -run error +# a WriteTooOldError. This should update stats correctly. +run stats error increment k=i ts=2 ---- +>> increment k=i ts=2 +stats: key_bytes=+12 val_count=+1 val_bytes=+6 live_count=+1 live_bytes=+20 gc_bytes_age=-194 >> at end: rangekey: {a-c}/[3.000000000,0=/] rangekey: {c-k}/[5.000000000,0=/ 3.000000000,0=/] @@ -292,13 +343,16 @@ data: "g"/1.000000000,0 -> /BYTES/g1 data: "i"/5.000000000,1 -> /INT/1 data: "i"/1.000000000,0 -> /INT/1 data: "j"/1.000000000,0 -> /INT/1 +stats: key_count=7 key_bytes=170 val_count=13 val_bytes=128 range_key_count=2 range_key_bytes=35 range_val_count=3 live_count=6 live_bytes=172 gc_bytes_age=15622 intent_count=1 intent_bytes=18 separated_intent_count=1 intent_age=93 error: (*roachpb.WriteTooOldError:) WriteTooOldError: write for key "i" at timestamp 2.000000000,0 too old; wrote at 5.000000000,1 # An increment above a range tombstone should reset to 1. -run ok +run stats ok increment k=j ts=7 ---- +>> increment k=j ts=7 inc: current value = 1 +stats: key_bytes=+12 val_count=+1 val_bytes=+6 live_count=+1 live_bytes=+20 gc_bytes_age=-194 >> at end: rangekey: {a-c}/[3.000000000,0=/] rangekey: {c-k}/[5.000000000,0=/ 3.000000000,0=/] @@ -317,3 +371,4 @@ data: "i"/5.000000000,1 -> /INT/1 data: "i"/1.000000000,0 -> /INT/1 data: "j"/7.000000000,0 -> /INT/1 data: "j"/1.000000000,0 -> /INT/1 +stats: key_count=7 key_bytes=182 val_count=14 val_bytes=134 range_key_count=2 range_key_bytes=35 range_val_count=3 live_count=7 live_bytes=192 gc_bytes_age=15428 intent_count=1 intent_bytes=18 separated_intent_count=1 intent_age=93 diff --git a/pkg/storage/testdata/mvcc_histories/range_tombstone_merge_fragment b/pkg/storage/testdata/mvcc_histories/range_tombstone_merge_fragment new file mode 100644 index 000000000000..80984c156a86 --- /dev/null +++ b/pkg/storage/testdata/mvcc_histories/range_tombstone_merge_fragment @@ -0,0 +1,120 @@ +# Tests MVCC range tombstone merging and fragmentation. + +# Writing range tombstones next to others will merge them. First write +# two separate tombstones, then merge them in the middle, then extend +# left and right. +run stats trace ok +del_range_ts k=b end=c ts=1 +del_range_ts k=d end=e ts=1 +del_range_ts k=c end=d ts=1 +del_range_ts k=a end=b ts=1 +del_range_ts k=e end=f ts=1 +---- +>> del_range_ts k=b end=c ts=1 +rangekey: {b-c}/[1.000000000,0=/] +stats: range_key_count=+1 range_key_bytes=+13 range_val_count=+1 gc_bytes_age=+1287 +>> del_range_ts k=d end=e ts=1 +rangekey: {b-c}/[1.000000000,0=/] +rangekey: {d-e}/[1.000000000,0=/] +stats: range_key_count=+1 range_key_bytes=+13 range_val_count=+1 gc_bytes_age=+1287 +>> del_range_ts k=c end=d ts=1 +rangekey: {b-e}/[1.000000000,0=/] +stats: range_key_count=-1 range_key_bytes=-13 range_val_count=-1 gc_bytes_age=-1287 +>> del_range_ts k=a end=b ts=1 +rangekey: {a-e}/[1.000000000,0=/] +stats: no change +>> del_range_ts k=e end=f ts=1 +rangekey: {a-f}/[1.000000000,0=/] +stats: no change +stats: range_key_count=1 range_key_bytes=13 range_val_count=1 gc_bytes_age=1287 + +# Writing the same sequence above these should fragment and merge the existing +# ones too. We use different-length key bounds to test the stats contributions +# of those, and also a lower localTs to test the value handling. +run stats trace ok +del_range_ts k=bbb end=cc ts=2 localTs=1 +del_range_ts k=d end=eee ts=2 localTs=1 +del_range_ts k=cc end=d ts=2 localTs=1 +del_range_ts k=a end=bbb ts=2 localTs=1 +del_range_ts k=eee end=f ts=2 localTs=1 +---- +>> del_range_ts k=bbb end=cc ts=2 localTs=1 +rangekey: {a-bbb}/[1.000000000,0=/] +rangekey: {bbb-cc}/[2.000000000,0={localTs=1.000000000,0}/ 1.000000000,0=/] +rangekey: {cc-f}/[1.000000000,0=/] +stats: range_key_count=+2 range_key_bytes=+41 range_val_count=+3 range_val_bytes=+13 gc_bytes_age=+5317 +>> del_range_ts k=d end=eee ts=2 localTs=1 +rangekey: {a-bbb}/[1.000000000,0=/] +rangekey: {bbb-cc}/[2.000000000,0={localTs=1.000000000,0}/ 1.000000000,0=/] +rangekey: {cc-d}/[1.000000000,0=/] +rangekey: {d-eee}/[2.000000000,0={localTs=1.000000000,0}/ 1.000000000,0=/] +rangekey: {eee-f}/[1.000000000,0=/] +stats: range_key_count=+2 range_key_bytes=+39 range_val_count=+3 range_val_bytes=+13 gc_bytes_age=+5120 +>> del_range_ts k=cc end=d ts=2 localTs=1 +rangekey: {a-bbb}/[1.000000000,0=/] +rangekey: {bbb-eee}/[2.000000000,0={localTs=1.000000000,0}/ 1.000000000,0=/] +rangekey: {eee-f}/[1.000000000,0=/] +stats: range_key_count=-2 range_key_bytes=-37 range_val_count=-3 range_val_bytes=-13 gc_bytes_age=-4923 +>> del_range_ts k=a end=bbb ts=2 localTs=1 +rangekey: {a-eee}/[2.000000000,0={localTs=1.000000000,0}/ 1.000000000,0=/] +rangekey: {eee-f}/[1.000000000,0=/] +stats: range_key_count=-1 range_key_bytes=-17 range_val_count=-1 gc_bytes_age=-1681 +>> del_range_ts k=eee end=f ts=2 localTs=1 +rangekey: {a-f}/[2.000000000,0={localTs=1.000000000,0}/ 1.000000000,0=/] +stats: range_key_count=-1 range_key_bytes=-17 range_val_count=-1 gc_bytes_age=-1681 +stats: range_key_count=1 range_key_bytes=22 range_val_count=2 range_val_bytes=13 gc_bytes_age=3439 + +# Writing a key from the middle of the existing stack to outside the bounds +# should fragment appropriately. +run stats trace ok +del_range_ts k=d end=h ts=3 +---- +>> del_range_ts k=d end=h ts=3 +rangekey: {a-d}/[2.000000000,0={localTs=1.000000000,0}/ 1.000000000,0=/] +rangekey: {d-f}/[3.000000000,0=/ 2.000000000,0={localTs=1.000000000,0}/ 1.000000000,0=/] +rangekey: {f-h}/[3.000000000,0=/] +stats: range_key_count=+2 range_key_bytes=+44 range_val_count=+4 range_val_bytes=+13 gc_bytes_age=+5569 +stats: range_key_count=3 range_key_bytes=66 range_val_count=6 range_val_bytes=26 gc_bytes_age=9008 + +# Writing range tombstones at the same or higher local timestamp should +# still merge, since the values are normalized to empty. However, writing +# at a lower timestamp should not merge, since the values are different. +# Extending a range tombstone with the same local timestamp should +# merge with it, but different local timestamp should not merge. +run stats trace ok +del_range_ts k=h end=i ts=3 localTs=5 +del_range_ts k=i end=j ts=3 localTs=3 +del_range_ts k=j end=k ts=3 localTs=2 +del_range_ts k=k end=l ts=3 localTs=2 +del_range_ts k=l end=m ts=3 localTs=1 +---- +>> del_range_ts k=h end=i ts=3 localTs=5 +rangekey: {a-d}/[2.000000000,0={localTs=1.000000000,0}/ 1.000000000,0=/] +rangekey: {d-f}/[3.000000000,0=/ 2.000000000,0={localTs=1.000000000,0}/ 1.000000000,0=/] +rangekey: {f-i}/[3.000000000,0=/] +stats: no change +>> del_range_ts k=i end=j ts=3 localTs=3 +rangekey: {a-d}/[2.000000000,0={localTs=1.000000000,0}/ 1.000000000,0=/] +rangekey: {d-f}/[3.000000000,0=/ 2.000000000,0={localTs=1.000000000,0}/ 1.000000000,0=/] +rangekey: {f-j}/[3.000000000,0=/] +stats: no change +>> del_range_ts k=j end=k ts=3 localTs=2 +rangekey: {a-d}/[2.000000000,0={localTs=1.000000000,0}/ 1.000000000,0=/] +rangekey: {d-f}/[3.000000000,0=/ 2.000000000,0={localTs=1.000000000,0}/ 1.000000000,0=/] +rangekey: {f-j}/[3.000000000,0=/] +rangekey: {j-k}/[3.000000000,0={localTs=2.000000000,0}/] +stats: range_key_count=+1 range_key_bytes=+13 range_val_count=+1 range_val_bytes=+13 gc_bytes_age=+2522 +>> del_range_ts k=k end=l ts=3 localTs=2 +rangekey: {a-d}/[2.000000000,0={localTs=1.000000000,0}/ 1.000000000,0=/] +rangekey: {d-f}/[3.000000000,0=/ 2.000000000,0={localTs=1.000000000,0}/ 1.000000000,0=/] +rangekey: {f-j}/[3.000000000,0=/] +rangekey: {j-l}/[3.000000000,0={localTs=2.000000000,0}/] +stats: no change +>> del_range_ts k=l end=m ts=3 localTs=1 +rangekey: {a-d}/[2.000000000,0={localTs=1.000000000,0}/ 1.000000000,0=/] +rangekey: {d-f}/[3.000000000,0=/ 2.000000000,0={localTs=1.000000000,0}/ 1.000000000,0=/] +rangekey: {f-j}/[3.000000000,0=/] +rangekey: {j-l}/[3.000000000,0={localTs=2.000000000,0}/] +rangekey: {l-m}/[3.000000000,0={localTs=1.000000000,0}/] +stats: range_key_count=+1 range_key_bytes=+13 range_val_count=+1 range_val_bytes=+13 gc_bytes_age=+2522 +stats: range_key_count=5 range_key_bytes=92 range_val_count=8 range_val_bytes=52 gc_bytes_age=14052 diff --git a/pkg/storage/testdata/mvcc_histories/range_tombstone_stats b/pkg/storage/testdata/mvcc_histories/range_tombstone_stats new file mode 100644 index 000000000000..f2ab5278deb4 --- /dev/null +++ b/pkg/storage/testdata/mvcc_histories/range_tombstone_stats @@ -0,0 +1,145 @@ +# Tests basic MVCC stats for put, del, and del_range_ts. +# +# Final state, where x is tombstone, o---o is range tombstone, [] is intent: +# +# 6 d6 e6 f6 +# 5 x x x +# 4 b4 x o-----------------------o Two range tombstones: the lowest is the one +# 3 x c3 o-----------------------o that matters for point key GCBytesAge. +# 2 x +# 1 b1 x e1 x h1 x +# 0 a0 +# a b c d e f g h i j + +run stats ok +put k=a ts=0 v=a0 +put k=b ts=1 v=b1 +del k=b ts=2 +del k=b ts=3 +put k=b ts=4 v=b4 +del k=c ts=1 +put k=c ts=3 v=c3 +del k=c ts=4 +put k=e ts=1 v=e1 +del k=f ts=1 +put k=h ts=1 v=h1 +del k=i ts=1 +del_range_ts k=d end=j ts=3 +del_range_ts k=d end=f ts=4 localTs=3 # Write as two to test split/merge. +del_range_ts k=f end=j ts=4 localTs=3 +put k=d ts=6 v=d6 +put k=e ts=6 v=e6 +put k=f ts=6 v=f6 +del k=g ts=5 +del k=h ts=5 +del k=i ts=5 +---- +>> put k=a ts=0 v=a0 +stats: key_count=+1 key_bytes=+2 val_count=+1 val_bytes=+21 live_count=+1 live_bytes=+23 +>> put k=b ts=1 v=b1 +stats: key_count=+1 key_bytes=+14 val_count=+1 val_bytes=+7 live_count=+1 live_bytes=+21 +>> del k=b ts=2 +stats: key_bytes=+12 val_count=+1 live_count=-1 live_bytes=-21 gc_bytes_age=+3234 +>> del k=b ts=3 +stats: key_bytes=+12 val_count=+1 gc_bytes_age=+1162 +>> put k=b ts=4 v=b4 +stats: key_bytes=+12 val_count=+1 val_bytes=+7 live_count=+1 live_bytes=+21 gc_bytes_age=-194 +>> del k=c ts=1 +stats: key_count=+1 key_bytes=+14 val_count=+1 gc_bytes_age=+1386 +>> put k=c ts=3 v=c3 +stats: key_bytes=+12 val_count=+1 val_bytes=+7 live_count=+1 live_bytes=+21 gc_bytes_age=-198 +>> del k=c ts=4 +stats: key_bytes=+12 val_count=+1 live_count=-1 live_bytes=-21 gc_bytes_age=+3168 +>> put k=e ts=1 v=e1 +stats: key_count=+1 key_bytes=+14 val_count=+1 val_bytes=+7 live_count=+1 live_bytes=+21 +>> del k=f ts=1 +stats: key_count=+1 key_bytes=+14 val_count=+1 gc_bytes_age=+1386 +>> put k=h ts=1 v=h1 +stats: key_count=+1 key_bytes=+14 val_count=+1 val_bytes=+7 live_count=+1 live_bytes=+21 +>> del k=i ts=1 +stats: key_count=+1 key_bytes=+14 val_count=+1 gc_bytes_age=+1386 +>> del_range_ts k=d end=j ts=3 +stats: range_key_count=+1 range_key_bytes=+13 range_val_count=+1 live_count=-2 live_bytes=-42 gc_bytes_age=+5335 +>> del_range_ts k=d end=f ts=4 localTs=3 # Write as two to test split/merge. +stats: range_key_count=+1 range_key_bytes=+22 range_val_count=+2 range_val_bytes=+13 gc_bytes_age=+3369 +>> del_range_ts k=f end=j ts=4 localTs=3 +stats: range_key_count=-1 range_key_bytes=-13 range_val_count=-1 gc_bytes_age=-1261 +>> put k=d ts=6 v=d6 +stats: key_count=+1 key_bytes=+14 val_count=+1 val_bytes=+7 live_count=+1 live_bytes=+21 +>> put k=e ts=6 v=e6 +stats: key_bytes=+12 val_count=+1 val_bytes=+7 live_count=+1 live_bytes=+21 gc_bytes_age=-194 +>> put k=f ts=6 v=f6 +stats: key_bytes=+12 val_count=+1 val_bytes=+7 live_count=+1 live_bytes=+21 gc_bytes_age=-198 +>> del k=g ts=5 +stats: key_count=+1 key_bytes=+14 val_count=+1 gc_bytes_age=+1330 +>> del k=h ts=5 +stats: key_bytes=+12 val_count=+1 gc_bytes_age=+1136 +>> del k=i ts=5 +stats: key_bytes=+12 val_count=+1 gc_bytes_age=+1132 +>> at end: +rangekey: {d-j}/[4.000000000,0={localTs=3.000000000,0}/ 3.000000000,0=/] +meta: "a"/0,0 -> txn={} ts=0,0 del=false klen=0 vlen=0 raw=/BYTES/a0 mergeTs= txnDidNotUpdateMeta=false +data: "b"/4.000000000,0 -> /BYTES/b4 +data: "b"/3.000000000,0 -> / +data: "b"/2.000000000,0 -> / +data: "b"/1.000000000,0 -> /BYTES/b1 +data: "c"/4.000000000,0 -> / +data: "c"/3.000000000,0 -> /BYTES/c3 +data: "c"/1.000000000,0 -> / +data: "d"/6.000000000,0 -> /BYTES/d6 +data: "e"/6.000000000,0 -> /BYTES/e6 +data: "e"/1.000000000,0 -> /BYTES/e1 +data: "f"/6.000000000,0 -> /BYTES/f6 +data: "f"/1.000000000,0 -> / +data: "g"/5.000000000,0 -> / +data: "h"/5.000000000,0 -> / +data: "h"/1.000000000,0 -> /BYTES/h1 +data: "i"/5.000000000,0 -> / +data: "i"/1.000000000,0 -> / +stats: key_count=9 key_bytes=222 val_count=18 val_bytes=77 range_key_count=1 range_key_bytes=22 range_val_count=2 range_val_bytes=13 live_count=5 live_bytes=107 gc_bytes_age=21979 + +# Finally, let's delete everything -- first in parts, then all again. We can't +# delete the inline value at "a" though. +run stats ok +del_range_ts k=c end=d ts=8 localTs=7 +del_range_ts k=e end=f ts=8 localTs=7 +del_range_ts k=d end=e ts=8 localTs=7 +del_range_ts k=b end=c ts=8 localTs=7 +del_range_ts k=f end=j ts=8 +del_range_ts k=b end=j ts=9 +---- +>> del_range_ts k=c end=d ts=8 localTs=7 +stats: range_key_count=+1 range_key_bytes=+13 range_val_count=+1 range_val_bytes=+13 gc_bytes_age=+2392 +>> del_range_ts k=e end=f ts=8 localTs=7 +stats: range_key_count=+2 range_key_bytes=+53 range_val_count=+5 range_val_bytes=+39 live_count=-1 live_bytes=-21 gc_bytes_age=+10678 +>> del_range_ts k=d end=e ts=8 localTs=7 +stats: range_key_count=-1 range_key_bytes=-22 range_val_count=-2 range_val_bytes=-13 live_count=-1 live_bytes=-21 gc_bytes_age=-1437 +>> del_range_ts k=b end=c ts=8 localTs=7 +stats: live_count=-1 live_bytes=-21 gc_bytes_age=+1932 +>> del_range_ts k=f end=j ts=8 +stats: range_key_bytes=+9 range_val_count=+1 live_count=-1 live_bytes=-21 gc_bytes_age=+2744 +>> del_range_ts k=b end=j ts=9 +stats: range_key_bytes=+27 range_val_count=+3 gc_bytes_age=+2445 +>> at end: +rangekey: {b-d}/[9.000000000,0=/ 8.000000000,0={localTs=7.000000000,0}/] +rangekey: {d-f}/[9.000000000,0=/ 8.000000000,0={localTs=7.000000000,0}/ 4.000000000,0={localTs=3.000000000,0}/ 3.000000000,0=/] +rangekey: {f-j}/[9.000000000,0=/ 8.000000000,0=/ 4.000000000,0={localTs=3.000000000,0}/ 3.000000000,0=/] +meta: "a"/0,0 -> txn={} ts=0,0 del=false klen=0 vlen=0 raw=/BYTES/a0 mergeTs= txnDidNotUpdateMeta=false +data: "b"/4.000000000,0 -> /BYTES/b4 +data: "b"/3.000000000,0 -> / +data: "b"/2.000000000,0 -> / +data: "b"/1.000000000,0 -> /BYTES/b1 +data: "c"/4.000000000,0 -> / +data: "c"/3.000000000,0 -> /BYTES/c3 +data: "c"/1.000000000,0 -> / +data: "d"/6.000000000,0 -> /BYTES/d6 +data: "e"/6.000000000,0 -> /BYTES/e6 +data: "e"/1.000000000,0 -> /BYTES/e1 +data: "f"/6.000000000,0 -> /BYTES/f6 +data: "f"/1.000000000,0 -> / +data: "g"/5.000000000,0 -> / +data: "h"/5.000000000,0 -> / +data: "h"/1.000000000,0 -> /BYTES/h1 +data: "i"/5.000000000,0 -> / +data: "i"/1.000000000,0 -> / +stats: key_count=9 key_bytes=222 val_count=18 val_bytes=77 range_key_count=3 range_key_bytes=102 range_val_count=10 range_val_bytes=52 live_count=1 live_bytes=23 gc_bytes_age=40733 diff --git a/pkg/storage/testdata/mvcc_histories/range_tombstone_stats_intent_resolve_abort b/pkg/storage/testdata/mvcc_histories/range_tombstone_stats_intent_resolve_abort new file mode 100644 index 000000000000..518ee3a2ee8c --- /dev/null +++ b/pkg/storage/testdata/mvcc_histories/range_tombstone_stats_intent_resolve_abort @@ -0,0 +1,250 @@ +# Tests MVCC stats calculations when resolving intents. Intermediate states are +# tested through stats traces. Initial state: +# +# (x is tombstone, o---o is range tombstone, [] is intent) +# +# 7 +# 6 [a6][b6][c6][x] [x] [x] [g6][h6][i6][x] [x] [x] [m6][n6][o6] x x x +# 5 n5 x q5 x +# 4 o-----------------------------------------------o +# 3 o-----------------------------------------------o +# 2 +# 1 b1 x e1 x h1 x k1 x +# a b c d e f g h i j k l m n o p q r s +# +# This uses two range tombstones, since the lowest is the one that matters for +# point key GCBytesAge. It also uses points below/above range tombstones, +# because iterators surface range keys separately from point keys, which can +# cause bugs if callers don't step onto the point key. +# +# TODO(erikgrinaker): This is probably better handled by randomized or +# generative testing, since the combinations are getting unwieldy. But it'll do +# for now. + +run stats ok +with ts=1 + put k=b v=b1 + del k=c + put k=e v=e1 + del k=f + put k=g v=g1 + del k=h + put k=i v=i1 + del k=j +del_range_ts k=g end=s ts=3 +del_range_ts k=g end=s ts=4 +with ts=5 + put k=n v=n5 + del k=o + put k=q v=q5 + del k=r +with t=A + txn_begin ts=6 + put k=a v=a6 + put k=b v=b6 + put k=c v=c6 + del k=d + del k=e + del k=f + put k=g v=g6 + put k=h v=h6 + put k=i v=i6 + del k=j + del k=k + del k=l + put k=m v=m6 + put k=n v=n6 + put k=o v=o6 + del k=p + del k=q + del k=r +---- +>> put k=b v=b1 ts=1 +stats: key_count=+1 key_bytes=+14 val_count=+1 val_bytes=+7 live_count=+1 live_bytes=+21 +>> del k=c ts=1 +stats: key_count=+1 key_bytes=+14 val_count=+1 gc_bytes_age=+1386 +>> put k=e v=e1 ts=1 +stats: key_count=+1 key_bytes=+14 val_count=+1 val_bytes=+7 live_count=+1 live_bytes=+21 +>> del k=f ts=1 +stats: key_count=+1 key_bytes=+14 val_count=+1 gc_bytes_age=+1386 +>> put k=g v=g1 ts=1 +stats: key_count=+1 key_bytes=+14 val_count=+1 val_bytes=+7 live_count=+1 live_bytes=+21 +>> del k=h ts=1 +stats: key_count=+1 key_bytes=+14 val_count=+1 gc_bytes_age=+1386 +>> put k=i v=i1 ts=1 +stats: key_count=+1 key_bytes=+14 val_count=+1 val_bytes=+7 live_count=+1 live_bytes=+21 +>> del k=j ts=1 +stats: key_count=+1 key_bytes=+14 val_count=+1 gc_bytes_age=+1386 +>> del_range_ts k=g end=s ts=3 +stats: range_key_count=+1 range_key_bytes=+13 range_val_count=+1 live_count=-2 live_bytes=-42 gc_bytes_age=+5335 +>> del_range_ts k=g end=s ts=4 +stats: range_key_bytes=+9 range_val_count=+1 gc_bytes_age=+860 +>> put k=n v=n5 ts=5 +stats: key_count=+1 key_bytes=+14 val_count=+1 val_bytes=+7 live_count=+1 live_bytes=+21 +>> del k=o ts=5 +stats: key_count=+1 key_bytes=+14 val_count=+1 gc_bytes_age=+1330 +>> put k=q v=q5 ts=5 +stats: key_count=+1 key_bytes=+14 val_count=+1 val_bytes=+7 live_count=+1 live_bytes=+21 +>> del k=r ts=5 +stats: key_count=+1 key_bytes=+14 val_count=+1 gc_bytes_age=+1330 +>> put k=a v=a6 t=A +stats: key_count=+1 key_bytes=+14 val_count=+1 val_bytes=+55 live_count=+1 live_bytes=+69 intent_count=+1 intent_bytes=+19 separated_intent_count=+1 intent_age=+94 +>> put k=b v=b6 t=A +stats: key_bytes=+12 val_count=+1 val_bytes=+55 live_bytes=+48 gc_bytes_age=+1786 intent_count=+1 intent_bytes=+19 separated_intent_count=+1 intent_age=+94 +>> put k=c v=c6 t=A +stats: key_bytes=+12 val_count=+1 val_bytes=+55 live_count=+1 live_bytes=+69 gc_bytes_age=-198 intent_count=+1 intent_bytes=+19 separated_intent_count=+1 intent_age=+94 +>> del k=d t=A +stats: key_count=+1 key_bytes=+14 val_count=+1 val_bytes=+48 gc_bytes_age=+5828 intent_count=+1 intent_bytes=+12 separated_intent_count=+1 intent_age=+94 +>> del k=e t=A +stats: key_bytes=+12 val_count=+1 val_bytes=+48 live_count=-1 live_bytes=-21 gc_bytes_age=+7614 intent_count=+1 intent_bytes=+12 separated_intent_count=+1 intent_age=+94 +>> del k=f t=A +stats: key_bytes=+12 val_count=+1 val_bytes=+48 gc_bytes_age=+5630 intent_count=+1 intent_bytes=+12 separated_intent_count=+1 intent_age=+94 +>> put k=g v=g6 t=A +stats: key_bytes=+12 val_count=+1 val_bytes=+55 live_count=+1 live_bytes=+69 gc_bytes_age=-194 intent_count=+1 intent_bytes=+19 separated_intent_count=+1 intent_age=+94 +>> put k=h v=h6 t=A +stats: key_bytes=+12 val_count=+1 val_bytes=+55 live_count=+1 live_bytes=+69 gc_bytes_age=-198 intent_count=+1 intent_bytes=+19 separated_intent_count=+1 intent_age=+94 +>> put k=i v=i6 t=A +stats: key_bytes=+12 val_count=+1 val_bytes=+55 live_count=+1 live_bytes=+69 gc_bytes_age=-194 intent_count=+1 intent_bytes=+19 separated_intent_count=+1 intent_age=+94 +>> del k=j t=A +stats: key_bytes=+12 val_count=+1 val_bytes=+48 gc_bytes_age=+5630 intent_count=+1 intent_bytes=+12 separated_intent_count=+1 intent_age=+94 +>> del k=k t=A +stats: key_count=+1 key_bytes=+14 val_count=+1 val_bytes=+48 gc_bytes_age=+5828 intent_count=+1 intent_bytes=+12 separated_intent_count=+1 intent_age=+94 +>> del k=l t=A +stats: key_count=+1 key_bytes=+14 val_count=+1 val_bytes=+48 gc_bytes_age=+5828 intent_count=+1 intent_bytes=+12 separated_intent_count=+1 intent_age=+94 +>> put k=m v=m6 t=A +stats: key_count=+1 key_bytes=+14 val_count=+1 val_bytes=+55 live_count=+1 live_bytes=+69 intent_count=+1 intent_bytes=+19 separated_intent_count=+1 intent_age=+94 +>> put k=n v=n6 t=A +stats: key_bytes=+12 val_count=+1 val_bytes=+55 live_bytes=+48 gc_bytes_age=+1786 intent_count=+1 intent_bytes=+19 separated_intent_count=+1 intent_age=+94 +>> put k=o v=o6 t=A +stats: key_bytes=+12 val_count=+1 val_bytes=+55 live_count=+1 live_bytes=+69 gc_bytes_age=-190 intent_count=+1 intent_bytes=+19 separated_intent_count=+1 intent_age=+94 +>> del k=p t=A +stats: key_count=+1 key_bytes=+14 val_count=+1 val_bytes=+48 gc_bytes_age=+5828 intent_count=+1 intent_bytes=+12 separated_intent_count=+1 intent_age=+94 +>> del k=q t=A +stats: key_bytes=+12 val_count=+1 val_bytes=+48 live_count=-1 live_bytes=-21 gc_bytes_age=+7614 intent_count=+1 intent_bytes=+12 separated_intent_count=+1 intent_age=+94 +>> del k=r t=A +stats: key_bytes=+12 val_count=+1 val_bytes=+48 gc_bytes_age=+5638 intent_count=+1 intent_bytes=+12 separated_intent_count=+1 intent_age=+94 +>> at end: +txn: "A" meta={id=00000000 key=/Min pri=0.00000000 epo=0 ts=6.000000000,0 min=0,0 seq=0} lock=true stat=PENDING rts=6.000000000,0 wto=false gul=0,0 +rangekey: {g-s}/[4.000000000,0=/ 3.000000000,0=/] +meta: "a"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=6.000000000,0 min=0,0 seq=0} ts=6.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true +data: "a"/6.000000000,0 -> /BYTES/a6 +meta: "b"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=6.000000000,0 min=0,0 seq=0} ts=6.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true +data: "b"/6.000000000,0 -> /BYTES/b6 +data: "b"/1.000000000,0 -> /BYTES/b1 +meta: "c"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=6.000000000,0 min=0,0 seq=0} ts=6.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true +data: "c"/6.000000000,0 -> /BYTES/c6 +data: "c"/1.000000000,0 -> / +meta: "d"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=6.000000000,0 min=0,0 seq=0} ts=6.000000000,0 del=true klen=12 vlen=0 mergeTs= txnDidNotUpdateMeta=true +data: "d"/6.000000000,0 -> / +meta: "e"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=6.000000000,0 min=0,0 seq=0} ts=6.000000000,0 del=true klen=12 vlen=0 mergeTs= txnDidNotUpdateMeta=true +data: "e"/6.000000000,0 -> / +data: "e"/1.000000000,0 -> /BYTES/e1 +meta: "f"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=6.000000000,0 min=0,0 seq=0} ts=6.000000000,0 del=true klen=12 vlen=0 mergeTs= txnDidNotUpdateMeta=true +data: "f"/6.000000000,0 -> / +data: "f"/1.000000000,0 -> / +meta: "g"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=6.000000000,0 min=0,0 seq=0} ts=6.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true +data: "g"/6.000000000,0 -> /BYTES/g6 +data: "g"/1.000000000,0 -> /BYTES/g1 +meta: "h"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=6.000000000,0 min=0,0 seq=0} ts=6.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true +data: "h"/6.000000000,0 -> /BYTES/h6 +data: "h"/1.000000000,0 -> / +meta: "i"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=6.000000000,0 min=0,0 seq=0} ts=6.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true +data: "i"/6.000000000,0 -> /BYTES/i6 +data: "i"/1.000000000,0 -> /BYTES/i1 +meta: "j"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=6.000000000,0 min=0,0 seq=0} ts=6.000000000,0 del=true klen=12 vlen=0 mergeTs= txnDidNotUpdateMeta=true +data: "j"/6.000000000,0 -> / +data: "j"/1.000000000,0 -> / +meta: "k"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=6.000000000,0 min=0,0 seq=0} ts=6.000000000,0 del=true klen=12 vlen=0 mergeTs= txnDidNotUpdateMeta=true +data: "k"/6.000000000,0 -> / +meta: "l"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=6.000000000,0 min=0,0 seq=0} ts=6.000000000,0 del=true klen=12 vlen=0 mergeTs= txnDidNotUpdateMeta=true +data: "l"/6.000000000,0 -> / +meta: "m"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=6.000000000,0 min=0,0 seq=0} ts=6.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true +data: "m"/6.000000000,0 -> /BYTES/m6 +meta: "n"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=6.000000000,0 min=0,0 seq=0} ts=6.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true +data: "n"/6.000000000,0 -> /BYTES/n6 +data: "n"/5.000000000,0 -> /BYTES/n5 +meta: "o"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=6.000000000,0 min=0,0 seq=0} ts=6.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true +data: "o"/6.000000000,0 -> /BYTES/o6 +data: "o"/5.000000000,0 -> / +meta: "p"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=6.000000000,0 min=0,0 seq=0} ts=6.000000000,0 del=true klen=12 vlen=0 mergeTs= txnDidNotUpdateMeta=true +data: "p"/6.000000000,0 -> / +meta: "q"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=6.000000000,0 min=0,0 seq=0} ts=6.000000000,0 del=true klen=12 vlen=0 mergeTs= txnDidNotUpdateMeta=true +data: "q"/6.000000000,0 -> / +data: "q"/5.000000000,0 -> /BYTES/q5 +meta: "r"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=6.000000000,0 min=0,0 seq=0} ts=6.000000000,0 del=true klen=12 vlen=0 mergeTs= txnDidNotUpdateMeta=true +data: "r"/6.000000000,0 -> / +data: "r"/5.000000000,0 -> / +stats: key_count=18 key_bytes=396 val_count=30 val_bytes=969 range_key_count=1 range_key_bytes=22 range_val_count=2 live_count=9 live_bytes=621 gc_bytes_age=72435 intent_count=18 intent_bytes=279 separated_intent_count=18 intent_age=1692 + +run stats ok +with t=A status=ABORTED + resolve_intent k=a + resolve_intent k=b + resolve_intent k=c + resolve_intent k=d + resolve_intent k=e + resolve_intent k=f + resolve_intent k=g + resolve_intent k=h + resolve_intent k=i + resolve_intent k=j + resolve_intent k=k + resolve_intent k=l + resolve_intent k=m + resolve_intent k=n + resolve_intent k=o + resolve_intent k=p + resolve_intent k=q + resolve_intent k=r +---- +>> resolve_intent k=a t=A status=ABORTED +stats: key_count=-1 key_bytes=-14 val_count=-1 val_bytes=-55 live_count=-1 live_bytes=-69 intent_count=-1 intent_bytes=-19 separated_intent_count=-1 intent_age=-94 +>> resolve_intent k=b t=A status=ABORTED +stats: key_bytes=-12 val_count=-1 val_bytes=-55 live_bytes=-48 gc_bytes_age=-1786 intent_count=-1 intent_bytes=-19 separated_intent_count=-1 intent_age=-94 +>> resolve_intent k=c t=A status=ABORTED +stats: key_bytes=-12 val_count=-1 val_bytes=-55 live_count=-1 live_bytes=-69 gc_bytes_age=+198 intent_count=-1 intent_bytes=-19 separated_intent_count=-1 intent_age=-94 +>> resolve_intent k=d t=A status=ABORTED +stats: key_count=-1 key_bytes=-14 val_count=-1 val_bytes=-48 gc_bytes_age=-5828 intent_count=-1 intent_bytes=-12 separated_intent_count=-1 intent_age=-94 +>> resolve_intent k=e t=A status=ABORTED +stats: key_bytes=-12 val_count=-1 val_bytes=-48 live_count=+1 live_bytes=+21 gc_bytes_age=-7614 intent_count=-1 intent_bytes=-12 separated_intent_count=-1 intent_age=-94 +>> resolve_intent k=f t=A status=ABORTED +stats: key_bytes=-12 val_count=-1 val_bytes=-48 gc_bytes_age=-5630 intent_count=-1 intent_bytes=-12 separated_intent_count=-1 intent_age=-94 +>> resolve_intent k=g t=A status=ABORTED +stats: key_bytes=-12 val_count=-1 val_bytes=-55 live_count=-1 live_bytes=-69 gc_bytes_age=+194 intent_count=-1 intent_bytes=-19 separated_intent_count=-1 intent_age=-94 +>> resolve_intent k=h t=A status=ABORTED +stats: key_bytes=-12 val_count=-1 val_bytes=-55 live_count=-1 live_bytes=-69 gc_bytes_age=+198 intent_count=-1 intent_bytes=-19 separated_intent_count=-1 intent_age=-94 +>> resolve_intent k=i t=A status=ABORTED +stats: key_bytes=-12 val_count=-1 val_bytes=-55 live_count=-1 live_bytes=-69 gc_bytes_age=+194 intent_count=-1 intent_bytes=-19 separated_intent_count=-1 intent_age=-94 +>> resolve_intent k=j t=A status=ABORTED +stats: key_bytes=-12 val_count=-1 val_bytes=-48 gc_bytes_age=-5630 intent_count=-1 intent_bytes=-12 separated_intent_count=-1 intent_age=-94 +>> resolve_intent k=k t=A status=ABORTED +stats: key_count=-1 key_bytes=-14 val_count=-1 val_bytes=-48 gc_bytes_age=-5828 intent_count=-1 intent_bytes=-12 separated_intent_count=-1 intent_age=-94 +>> resolve_intent k=l t=A status=ABORTED +stats: key_count=-1 key_bytes=-14 val_count=-1 val_bytes=-48 gc_bytes_age=-5828 intent_count=-1 intent_bytes=-12 separated_intent_count=-1 intent_age=-94 +>> resolve_intent k=m t=A status=ABORTED +stats: key_count=-1 key_bytes=-14 val_count=-1 val_bytes=-55 live_count=-1 live_bytes=-69 intent_count=-1 intent_bytes=-19 separated_intent_count=-1 intent_age=-94 +>> resolve_intent k=n t=A status=ABORTED +stats: key_bytes=-12 val_count=-1 val_bytes=-55 live_bytes=-48 gc_bytes_age=-1786 intent_count=-1 intent_bytes=-19 separated_intent_count=-1 intent_age=-94 +>> resolve_intent k=o t=A status=ABORTED +stats: key_bytes=-12 val_count=-1 val_bytes=-55 live_count=-1 live_bytes=-69 gc_bytes_age=+190 intent_count=-1 intent_bytes=-19 separated_intent_count=-1 intent_age=-94 +>> resolve_intent k=p t=A status=ABORTED +stats: key_count=-1 key_bytes=-14 val_count=-1 val_bytes=-48 gc_bytes_age=-5828 intent_count=-1 intent_bytes=-12 separated_intent_count=-1 intent_age=-94 +>> resolve_intent k=q t=A status=ABORTED +stats: key_bytes=-12 val_count=-1 val_bytes=-48 live_count=+1 live_bytes=+21 gc_bytes_age=-7614 intent_count=-1 intent_bytes=-12 separated_intent_count=-1 intent_age=-94 +>> resolve_intent k=r t=A status=ABORTED +stats: key_bytes=-12 val_count=-1 val_bytes=-48 gc_bytes_age=-5638 intent_count=-1 intent_bytes=-12 separated_intent_count=-1 intent_age=-94 +>> at end: +rangekey: {g-s}/[4.000000000,0=/ 3.000000000,0=/] +data: "b"/1.000000000,0 -> /BYTES/b1 +data: "c"/1.000000000,0 -> / +data: "e"/1.000000000,0 -> /BYTES/e1 +data: "f"/1.000000000,0 -> / +data: "g"/1.000000000,0 -> /BYTES/g1 +data: "h"/1.000000000,0 -> / +data: "i"/1.000000000,0 -> /BYTES/i1 +data: "j"/1.000000000,0 -> / +data: "n"/5.000000000,0 -> /BYTES/n5 +data: "o"/5.000000000,0 -> / +data: "q"/5.000000000,0 -> /BYTES/q5 +data: "r"/5.000000000,0 -> / +stats: key_count=12 key_bytes=168 val_count=12 val_bytes=42 range_key_count=1 range_key_bytes=22 range_val_count=2 live_count=4 live_bytes=84 gc_bytes_age=14399 diff --git a/pkg/storage/testdata/mvcc_histories/range_tombstone_stats_intent_resolve_commit b/pkg/storage/testdata/mvcc_histories/range_tombstone_stats_intent_resolve_commit new file mode 100644 index 000000000000..d1ac8745f512 --- /dev/null +++ b/pkg/storage/testdata/mvcc_histories/range_tombstone_stats_intent_resolve_commit @@ -0,0 +1,268 @@ +# Tests MVCC stats calculations when resolving intents. Intermediate states are +# tested through stats traces. Initial state: +# +# (x is tombstone, o---o is range tombstone, [] is intent) +# +# 7 +# 6 [a6][b6][c6][x] [x] [x] [g6][h6][i6][x] [x] [x] [m6][n6][o6] x x x +# 5 n5 x q5 x +# 4 o-----------------------------------------------o +# 3 o-----------------------------------------------o +# 2 +# 1 b1 x e1 x h1 x k1 x +# a b c d e f g h i j k l m n o p q r s +# +# This uses two range tombstones, since the lowest is the one that matters for +# point key GCBytesAge. It also uses points below/above range tombstones, +# because iterators surface range keys separately from point keys, which can +# cause bugs if callers don't step onto the point key. +# +# TODO(erikgrinaker): This is probably better handled by randomized or +# generative testing, since the combinations are getting unwieldy. But it'll do +# for now. + +run stats ok +with ts=1 + put k=b v=b1 + del k=c + put k=e v=e1 + del k=f + put k=g v=g1 + del k=h + put k=i v=i1 + del k=j +del_range_ts k=g end=s ts=3 +del_range_ts k=g end=s ts=4 +with ts=5 + put k=n v=n5 + del k=o + put k=q v=q5 + del k=r +with t=A + txn_begin ts=6 + put k=a v=a6 + put k=b v=b6 + put k=c v=c6 + del k=d + del k=e + del k=f + put k=g v=g6 + put k=h v=h6 + put k=i v=i6 + del k=j + del k=k + del k=l + put k=m v=m6 + put k=n v=n6 + put k=o v=o6 + del k=p + del k=q + del k=r +---- +>> put k=b v=b1 ts=1 +stats: key_count=+1 key_bytes=+14 val_count=+1 val_bytes=+7 live_count=+1 live_bytes=+21 +>> del k=c ts=1 +stats: key_count=+1 key_bytes=+14 val_count=+1 gc_bytes_age=+1386 +>> put k=e v=e1 ts=1 +stats: key_count=+1 key_bytes=+14 val_count=+1 val_bytes=+7 live_count=+1 live_bytes=+21 +>> del k=f ts=1 +stats: key_count=+1 key_bytes=+14 val_count=+1 gc_bytes_age=+1386 +>> put k=g v=g1 ts=1 +stats: key_count=+1 key_bytes=+14 val_count=+1 val_bytes=+7 live_count=+1 live_bytes=+21 +>> del k=h ts=1 +stats: key_count=+1 key_bytes=+14 val_count=+1 gc_bytes_age=+1386 +>> put k=i v=i1 ts=1 +stats: key_count=+1 key_bytes=+14 val_count=+1 val_bytes=+7 live_count=+1 live_bytes=+21 +>> del k=j ts=1 +stats: key_count=+1 key_bytes=+14 val_count=+1 gc_bytes_age=+1386 +>> del_range_ts k=g end=s ts=3 +stats: range_key_count=+1 range_key_bytes=+13 range_val_count=+1 live_count=-2 live_bytes=-42 gc_bytes_age=+5335 +>> del_range_ts k=g end=s ts=4 +stats: range_key_bytes=+9 range_val_count=+1 gc_bytes_age=+860 +>> put k=n v=n5 ts=5 +stats: key_count=+1 key_bytes=+14 val_count=+1 val_bytes=+7 live_count=+1 live_bytes=+21 +>> del k=o ts=5 +stats: key_count=+1 key_bytes=+14 val_count=+1 gc_bytes_age=+1330 +>> put k=q v=q5 ts=5 +stats: key_count=+1 key_bytes=+14 val_count=+1 val_bytes=+7 live_count=+1 live_bytes=+21 +>> del k=r ts=5 +stats: key_count=+1 key_bytes=+14 val_count=+1 gc_bytes_age=+1330 +>> put k=a v=a6 t=A +stats: key_count=+1 key_bytes=+14 val_count=+1 val_bytes=+55 live_count=+1 live_bytes=+69 intent_count=+1 intent_bytes=+19 separated_intent_count=+1 intent_age=+94 +>> put k=b v=b6 t=A +stats: key_bytes=+12 val_count=+1 val_bytes=+55 live_bytes=+48 gc_bytes_age=+1786 intent_count=+1 intent_bytes=+19 separated_intent_count=+1 intent_age=+94 +>> put k=c v=c6 t=A +stats: key_bytes=+12 val_count=+1 val_bytes=+55 live_count=+1 live_bytes=+69 gc_bytes_age=-198 intent_count=+1 intent_bytes=+19 separated_intent_count=+1 intent_age=+94 +>> del k=d t=A +stats: key_count=+1 key_bytes=+14 val_count=+1 val_bytes=+48 gc_bytes_age=+5828 intent_count=+1 intent_bytes=+12 separated_intent_count=+1 intent_age=+94 +>> del k=e t=A +stats: key_bytes=+12 val_count=+1 val_bytes=+48 live_count=-1 live_bytes=-21 gc_bytes_age=+7614 intent_count=+1 intent_bytes=+12 separated_intent_count=+1 intent_age=+94 +>> del k=f t=A +stats: key_bytes=+12 val_count=+1 val_bytes=+48 gc_bytes_age=+5630 intent_count=+1 intent_bytes=+12 separated_intent_count=+1 intent_age=+94 +>> put k=g v=g6 t=A +stats: key_bytes=+12 val_count=+1 val_bytes=+55 live_count=+1 live_bytes=+69 gc_bytes_age=-194 intent_count=+1 intent_bytes=+19 separated_intent_count=+1 intent_age=+94 +>> put k=h v=h6 t=A +stats: key_bytes=+12 val_count=+1 val_bytes=+55 live_count=+1 live_bytes=+69 gc_bytes_age=-198 intent_count=+1 intent_bytes=+19 separated_intent_count=+1 intent_age=+94 +>> put k=i v=i6 t=A +stats: key_bytes=+12 val_count=+1 val_bytes=+55 live_count=+1 live_bytes=+69 gc_bytes_age=-194 intent_count=+1 intent_bytes=+19 separated_intent_count=+1 intent_age=+94 +>> del k=j t=A +stats: key_bytes=+12 val_count=+1 val_bytes=+48 gc_bytes_age=+5630 intent_count=+1 intent_bytes=+12 separated_intent_count=+1 intent_age=+94 +>> del k=k t=A +stats: key_count=+1 key_bytes=+14 val_count=+1 val_bytes=+48 gc_bytes_age=+5828 intent_count=+1 intent_bytes=+12 separated_intent_count=+1 intent_age=+94 +>> del k=l t=A +stats: key_count=+1 key_bytes=+14 val_count=+1 val_bytes=+48 gc_bytes_age=+5828 intent_count=+1 intent_bytes=+12 separated_intent_count=+1 intent_age=+94 +>> put k=m v=m6 t=A +stats: key_count=+1 key_bytes=+14 val_count=+1 val_bytes=+55 live_count=+1 live_bytes=+69 intent_count=+1 intent_bytes=+19 separated_intent_count=+1 intent_age=+94 +>> put k=n v=n6 t=A +stats: key_bytes=+12 val_count=+1 val_bytes=+55 live_bytes=+48 gc_bytes_age=+1786 intent_count=+1 intent_bytes=+19 separated_intent_count=+1 intent_age=+94 +>> put k=o v=o6 t=A +stats: key_bytes=+12 val_count=+1 val_bytes=+55 live_count=+1 live_bytes=+69 gc_bytes_age=-190 intent_count=+1 intent_bytes=+19 separated_intent_count=+1 intent_age=+94 +>> del k=p t=A +stats: key_count=+1 key_bytes=+14 val_count=+1 val_bytes=+48 gc_bytes_age=+5828 intent_count=+1 intent_bytes=+12 separated_intent_count=+1 intent_age=+94 +>> del k=q t=A +stats: key_bytes=+12 val_count=+1 val_bytes=+48 live_count=-1 live_bytes=-21 gc_bytes_age=+7614 intent_count=+1 intent_bytes=+12 separated_intent_count=+1 intent_age=+94 +>> del k=r t=A +stats: key_bytes=+12 val_count=+1 val_bytes=+48 gc_bytes_age=+5638 intent_count=+1 intent_bytes=+12 separated_intent_count=+1 intent_age=+94 +>> at end: +txn: "A" meta={id=00000000 key=/Min pri=0.00000000 epo=0 ts=6.000000000,0 min=0,0 seq=0} lock=true stat=PENDING rts=6.000000000,0 wto=false gul=0,0 +rangekey: {g-s}/[4.000000000,0=/ 3.000000000,0=/] +meta: "a"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=6.000000000,0 min=0,0 seq=0} ts=6.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true +data: "a"/6.000000000,0 -> /BYTES/a6 +meta: "b"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=6.000000000,0 min=0,0 seq=0} ts=6.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true +data: "b"/6.000000000,0 -> /BYTES/b6 +data: "b"/1.000000000,0 -> /BYTES/b1 +meta: "c"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=6.000000000,0 min=0,0 seq=0} ts=6.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true +data: "c"/6.000000000,0 -> /BYTES/c6 +data: "c"/1.000000000,0 -> / +meta: "d"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=6.000000000,0 min=0,0 seq=0} ts=6.000000000,0 del=true klen=12 vlen=0 mergeTs= txnDidNotUpdateMeta=true +data: "d"/6.000000000,0 -> / +meta: "e"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=6.000000000,0 min=0,0 seq=0} ts=6.000000000,0 del=true klen=12 vlen=0 mergeTs= txnDidNotUpdateMeta=true +data: "e"/6.000000000,0 -> / +data: "e"/1.000000000,0 -> /BYTES/e1 +meta: "f"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=6.000000000,0 min=0,0 seq=0} ts=6.000000000,0 del=true klen=12 vlen=0 mergeTs= txnDidNotUpdateMeta=true +data: "f"/6.000000000,0 -> / +data: "f"/1.000000000,0 -> / +meta: "g"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=6.000000000,0 min=0,0 seq=0} ts=6.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true +data: "g"/6.000000000,0 -> /BYTES/g6 +data: "g"/1.000000000,0 -> /BYTES/g1 +meta: "h"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=6.000000000,0 min=0,0 seq=0} ts=6.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true +data: "h"/6.000000000,0 -> /BYTES/h6 +data: "h"/1.000000000,0 -> / +meta: "i"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=6.000000000,0 min=0,0 seq=0} ts=6.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true +data: "i"/6.000000000,0 -> /BYTES/i6 +data: "i"/1.000000000,0 -> /BYTES/i1 +meta: "j"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=6.000000000,0 min=0,0 seq=0} ts=6.000000000,0 del=true klen=12 vlen=0 mergeTs= txnDidNotUpdateMeta=true +data: "j"/6.000000000,0 -> / +data: "j"/1.000000000,0 -> / +meta: "k"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=6.000000000,0 min=0,0 seq=0} ts=6.000000000,0 del=true klen=12 vlen=0 mergeTs= txnDidNotUpdateMeta=true +data: "k"/6.000000000,0 -> / +meta: "l"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=6.000000000,0 min=0,0 seq=0} ts=6.000000000,0 del=true klen=12 vlen=0 mergeTs= txnDidNotUpdateMeta=true +data: "l"/6.000000000,0 -> / +meta: "m"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=6.000000000,0 min=0,0 seq=0} ts=6.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true +data: "m"/6.000000000,0 -> /BYTES/m6 +meta: "n"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=6.000000000,0 min=0,0 seq=0} ts=6.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true +data: "n"/6.000000000,0 -> /BYTES/n6 +data: "n"/5.000000000,0 -> /BYTES/n5 +meta: "o"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=6.000000000,0 min=0,0 seq=0} ts=6.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true +data: "o"/6.000000000,0 -> /BYTES/o6 +data: "o"/5.000000000,0 -> / +meta: "p"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=6.000000000,0 min=0,0 seq=0} ts=6.000000000,0 del=true klen=12 vlen=0 mergeTs= txnDidNotUpdateMeta=true +data: "p"/6.000000000,0 -> / +meta: "q"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=6.000000000,0 min=0,0 seq=0} ts=6.000000000,0 del=true klen=12 vlen=0 mergeTs= txnDidNotUpdateMeta=true +data: "q"/6.000000000,0 -> / +data: "q"/5.000000000,0 -> /BYTES/q5 +meta: "r"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=6.000000000,0 min=0,0 seq=0} ts=6.000000000,0 del=true klen=12 vlen=0 mergeTs= txnDidNotUpdateMeta=true +data: "r"/6.000000000,0 -> / +data: "r"/5.000000000,0 -> / +stats: key_count=18 key_bytes=396 val_count=30 val_bytes=969 range_key_count=1 range_key_bytes=22 range_val_count=2 live_count=9 live_bytes=621 gc_bytes_age=72435 intent_count=18 intent_bytes=279 separated_intent_count=18 intent_age=1692 + +run stats ok +with t=A status=COMMITTED + resolve_intent k=a + resolve_intent k=b + resolve_intent k=c + resolve_intent k=d + resolve_intent k=e + resolve_intent k=f + resolve_intent k=g + resolve_intent k=h + resolve_intent k=i + resolve_intent k=j + resolve_intent k=k + resolve_intent k=l + resolve_intent k=m + resolve_intent k=n + resolve_intent k=o + resolve_intent k=p + resolve_intent k=q + resolve_intent k=r +---- +>> resolve_intent k=a t=A status=COMMITTED +stats: val_bytes=-48 live_bytes=-48 intent_count=-1 intent_bytes=-19 separated_intent_count=-1 intent_age=-94 +>> resolve_intent k=b t=A status=COMMITTED +stats: val_bytes=-48 live_bytes=-48 intent_count=-1 intent_bytes=-19 separated_intent_count=-1 intent_age=-94 +>> resolve_intent k=c t=A status=COMMITTED +stats: val_bytes=-48 live_bytes=-48 intent_count=-1 intent_bytes=-19 separated_intent_count=-1 intent_age=-94 +>> resolve_intent k=d t=A status=COMMITTED +stats: val_bytes=-48 gc_bytes_age=-4512 intent_count=-1 intent_bytes=-12 separated_intent_count=-1 intent_age=-94 +>> resolve_intent k=e t=A status=COMMITTED +stats: val_bytes=-48 gc_bytes_age=-4512 intent_count=-1 intent_bytes=-12 separated_intent_count=-1 intent_age=-94 +>> resolve_intent k=f t=A status=COMMITTED +stats: val_bytes=-48 gc_bytes_age=-4512 intent_count=-1 intent_bytes=-12 separated_intent_count=-1 intent_age=-94 +>> resolve_intent k=g t=A status=COMMITTED +stats: val_bytes=-48 live_bytes=-48 intent_count=-1 intent_bytes=-19 separated_intent_count=-1 intent_age=-94 +>> resolve_intent k=h t=A status=COMMITTED +stats: val_bytes=-48 live_bytes=-48 intent_count=-1 intent_bytes=-19 separated_intent_count=-1 intent_age=-94 +>> resolve_intent k=i t=A status=COMMITTED +stats: val_bytes=-48 live_bytes=-48 intent_count=-1 intent_bytes=-19 separated_intent_count=-1 intent_age=-94 +>> resolve_intent k=j t=A status=COMMITTED +stats: val_bytes=-48 gc_bytes_age=-4512 intent_count=-1 intent_bytes=-12 separated_intent_count=-1 intent_age=-94 +>> resolve_intent k=k t=A status=COMMITTED +stats: val_bytes=-48 gc_bytes_age=-4512 intent_count=-1 intent_bytes=-12 separated_intent_count=-1 intent_age=-94 +>> resolve_intent k=l t=A status=COMMITTED +stats: val_bytes=-48 gc_bytes_age=-4512 intent_count=-1 intent_bytes=-12 separated_intent_count=-1 intent_age=-94 +>> resolve_intent k=m t=A status=COMMITTED +stats: val_bytes=-48 live_bytes=-48 intent_count=-1 intent_bytes=-19 separated_intent_count=-1 intent_age=-94 +>> resolve_intent k=n t=A status=COMMITTED +stats: val_bytes=-48 live_bytes=-48 intent_count=-1 intent_bytes=-19 separated_intent_count=-1 intent_age=-94 +>> resolve_intent k=o t=A status=COMMITTED +stats: val_bytes=-48 live_bytes=-48 intent_count=-1 intent_bytes=-19 separated_intent_count=-1 intent_age=-94 +>> resolve_intent k=p t=A status=COMMITTED +stats: val_bytes=-48 gc_bytes_age=-4512 intent_count=-1 intent_bytes=-12 separated_intent_count=-1 intent_age=-94 +>> resolve_intent k=q t=A status=COMMITTED +stats: val_bytes=-48 gc_bytes_age=-4512 intent_count=-1 intent_bytes=-12 separated_intent_count=-1 intent_age=-94 +>> resolve_intent k=r t=A status=COMMITTED +stats: val_bytes=-48 gc_bytes_age=-4512 intent_count=-1 intent_bytes=-12 separated_intent_count=-1 intent_age=-94 +>> at end: +rangekey: {g-s}/[4.000000000,0=/ 3.000000000,0=/] +data: "a"/6.000000000,0 -> /BYTES/a6 +data: "b"/6.000000000,0 -> /BYTES/b6 +data: "b"/1.000000000,0 -> /BYTES/b1 +data: "c"/6.000000000,0 -> /BYTES/c6 +data: "c"/1.000000000,0 -> / +data: "d"/6.000000000,0 -> / +data: "e"/6.000000000,0 -> / +data: "e"/1.000000000,0 -> /BYTES/e1 +data: "f"/6.000000000,0 -> / +data: "f"/1.000000000,0 -> / +data: "g"/6.000000000,0 -> /BYTES/g6 +data: "g"/1.000000000,0 -> /BYTES/g1 +data: "h"/6.000000000,0 -> /BYTES/h6 +data: "h"/1.000000000,0 -> / +data: "i"/6.000000000,0 -> /BYTES/i6 +data: "i"/1.000000000,0 -> /BYTES/i1 +data: "j"/6.000000000,0 -> / +data: "j"/1.000000000,0 -> / +data: "k"/6.000000000,0 -> / +data: "l"/6.000000000,0 -> / +data: "m"/6.000000000,0 -> /BYTES/m6 +data: "n"/6.000000000,0 -> /BYTES/n6 +data: "n"/5.000000000,0 -> /BYTES/n5 +data: "o"/6.000000000,0 -> /BYTES/o6 +data: "o"/5.000000000,0 -> / +data: "p"/6.000000000,0 -> / +data: "q"/6.000000000,0 -> / +data: "q"/5.000000000,0 -> /BYTES/q5 +data: "r"/6.000000000,0 -> / +data: "r"/5.000000000,0 -> / +stats: key_count=18 key_bytes=396 val_count=30 val_bytes=105 range_key_count=1 range_key_bytes=22 range_val_count=2 live_count=9 live_bytes=189 gc_bytes_age=31827 diff --git a/pkg/storage/testdata/mvcc_histories/range_tombstone_stats_intent_resolve_pushed b/pkg/storage/testdata/mvcc_histories/range_tombstone_stats_intent_resolve_pushed new file mode 100644 index 000000000000..d927491cf8cb --- /dev/null +++ b/pkg/storage/testdata/mvcc_histories/range_tombstone_stats_intent_resolve_pushed @@ -0,0 +1,270 @@ +# Tests MVCC stats calculations when resolving intents. Intermediate states are +# tested through stats traces. Initial state: +# +# (x is tombstone, o---o is range tombstone, [] is intent) +# +# 7 +# 6 [a6][b6][c6][x] [x] [x] [g6][h6][i6][x] [x] [x] [m6][n6][o6] x x x +# 5 n5 x q5 x +# 4 o-----------------------------------------------o +# 3 o-----------------------------------------------o +# 2 +# 1 b1 x e1 x h1 x k1 x +# a b c d e f g h i j k l m n o p q r s +# +# This uses two range tombstones, since the lowest is the one that matters for +# point key GCBytesAge. It also uses points below/above range tombstones, +# because iterators surface range keys separately from point keys, which can +# cause bugs if callers don't step onto the point key. +# +# TODO(erikgrinaker): This is probably better handled by randomized or +# generative testing, since the combinations are getting unwieldy. But it'll do +# for now. + +run stats ok +with ts=1 + put k=b v=b1 + del k=c + put k=e v=e1 + del k=f + put k=g v=g1 + del k=h + put k=i v=i1 + del k=j +del_range_ts k=g end=s ts=3 +del_range_ts k=g end=s ts=4 +with ts=5 + put k=n v=n5 + del k=o + put k=q v=q5 + del k=r +with t=A + txn_begin ts=6 + put k=a v=a6 + put k=b v=b6 + put k=c v=c6 + del k=d + del k=e + del k=f + put k=g v=g6 + put k=h v=h6 + put k=i v=i6 + del k=j + del k=k + del k=l + put k=m v=m6 + put k=n v=n6 + put k=o v=o6 + del k=p + del k=q + del k=r +---- +>> put k=b v=b1 ts=1 +stats: key_count=+1 key_bytes=+14 val_count=+1 val_bytes=+7 live_count=+1 live_bytes=+21 +>> del k=c ts=1 +stats: key_count=+1 key_bytes=+14 val_count=+1 gc_bytes_age=+1386 +>> put k=e v=e1 ts=1 +stats: key_count=+1 key_bytes=+14 val_count=+1 val_bytes=+7 live_count=+1 live_bytes=+21 +>> del k=f ts=1 +stats: key_count=+1 key_bytes=+14 val_count=+1 gc_bytes_age=+1386 +>> put k=g v=g1 ts=1 +stats: key_count=+1 key_bytes=+14 val_count=+1 val_bytes=+7 live_count=+1 live_bytes=+21 +>> del k=h ts=1 +stats: key_count=+1 key_bytes=+14 val_count=+1 gc_bytes_age=+1386 +>> put k=i v=i1 ts=1 +stats: key_count=+1 key_bytes=+14 val_count=+1 val_bytes=+7 live_count=+1 live_bytes=+21 +>> del k=j ts=1 +stats: key_count=+1 key_bytes=+14 val_count=+1 gc_bytes_age=+1386 +>> del_range_ts k=g end=s ts=3 +stats: range_key_count=+1 range_key_bytes=+13 range_val_count=+1 live_count=-2 live_bytes=-42 gc_bytes_age=+5335 +>> del_range_ts k=g end=s ts=4 +stats: range_key_bytes=+9 range_val_count=+1 gc_bytes_age=+860 +>> put k=n v=n5 ts=5 +stats: key_count=+1 key_bytes=+14 val_count=+1 val_bytes=+7 live_count=+1 live_bytes=+21 +>> del k=o ts=5 +stats: key_count=+1 key_bytes=+14 val_count=+1 gc_bytes_age=+1330 +>> put k=q v=q5 ts=5 +stats: key_count=+1 key_bytes=+14 val_count=+1 val_bytes=+7 live_count=+1 live_bytes=+21 +>> del k=r ts=5 +stats: key_count=+1 key_bytes=+14 val_count=+1 gc_bytes_age=+1330 +>> put k=a v=a6 t=A +stats: key_count=+1 key_bytes=+14 val_count=+1 val_bytes=+55 live_count=+1 live_bytes=+69 intent_count=+1 intent_bytes=+19 separated_intent_count=+1 intent_age=+94 +>> put k=b v=b6 t=A +stats: key_bytes=+12 val_count=+1 val_bytes=+55 live_bytes=+48 gc_bytes_age=+1786 intent_count=+1 intent_bytes=+19 separated_intent_count=+1 intent_age=+94 +>> put k=c v=c6 t=A +stats: key_bytes=+12 val_count=+1 val_bytes=+55 live_count=+1 live_bytes=+69 gc_bytes_age=-198 intent_count=+1 intent_bytes=+19 separated_intent_count=+1 intent_age=+94 +>> del k=d t=A +stats: key_count=+1 key_bytes=+14 val_count=+1 val_bytes=+48 gc_bytes_age=+5828 intent_count=+1 intent_bytes=+12 separated_intent_count=+1 intent_age=+94 +>> del k=e t=A +stats: key_bytes=+12 val_count=+1 val_bytes=+48 live_count=-1 live_bytes=-21 gc_bytes_age=+7614 intent_count=+1 intent_bytes=+12 separated_intent_count=+1 intent_age=+94 +>> del k=f t=A +stats: key_bytes=+12 val_count=+1 val_bytes=+48 gc_bytes_age=+5630 intent_count=+1 intent_bytes=+12 separated_intent_count=+1 intent_age=+94 +>> put k=g v=g6 t=A +stats: key_bytes=+12 val_count=+1 val_bytes=+55 live_count=+1 live_bytes=+69 gc_bytes_age=-194 intent_count=+1 intent_bytes=+19 separated_intent_count=+1 intent_age=+94 +>> put k=h v=h6 t=A +stats: key_bytes=+12 val_count=+1 val_bytes=+55 live_count=+1 live_bytes=+69 gc_bytes_age=-198 intent_count=+1 intent_bytes=+19 separated_intent_count=+1 intent_age=+94 +>> put k=i v=i6 t=A +stats: key_bytes=+12 val_count=+1 val_bytes=+55 live_count=+1 live_bytes=+69 gc_bytes_age=-194 intent_count=+1 intent_bytes=+19 separated_intent_count=+1 intent_age=+94 +>> del k=j t=A +stats: key_bytes=+12 val_count=+1 val_bytes=+48 gc_bytes_age=+5630 intent_count=+1 intent_bytes=+12 separated_intent_count=+1 intent_age=+94 +>> del k=k t=A +stats: key_count=+1 key_bytes=+14 val_count=+1 val_bytes=+48 gc_bytes_age=+5828 intent_count=+1 intent_bytes=+12 separated_intent_count=+1 intent_age=+94 +>> del k=l t=A +stats: key_count=+1 key_bytes=+14 val_count=+1 val_bytes=+48 gc_bytes_age=+5828 intent_count=+1 intent_bytes=+12 separated_intent_count=+1 intent_age=+94 +>> put k=m v=m6 t=A +stats: key_count=+1 key_bytes=+14 val_count=+1 val_bytes=+55 live_count=+1 live_bytes=+69 intent_count=+1 intent_bytes=+19 separated_intent_count=+1 intent_age=+94 +>> put k=n v=n6 t=A +stats: key_bytes=+12 val_count=+1 val_bytes=+55 live_bytes=+48 gc_bytes_age=+1786 intent_count=+1 intent_bytes=+19 separated_intent_count=+1 intent_age=+94 +>> put k=o v=o6 t=A +stats: key_bytes=+12 val_count=+1 val_bytes=+55 live_count=+1 live_bytes=+69 gc_bytes_age=-190 intent_count=+1 intent_bytes=+19 separated_intent_count=+1 intent_age=+94 +>> del k=p t=A +stats: key_count=+1 key_bytes=+14 val_count=+1 val_bytes=+48 gc_bytes_age=+5828 intent_count=+1 intent_bytes=+12 separated_intent_count=+1 intent_age=+94 +>> del k=q t=A +stats: key_bytes=+12 val_count=+1 val_bytes=+48 live_count=-1 live_bytes=-21 gc_bytes_age=+7614 intent_count=+1 intent_bytes=+12 separated_intent_count=+1 intent_age=+94 +>> del k=r t=A +stats: key_bytes=+12 val_count=+1 val_bytes=+48 gc_bytes_age=+5638 intent_count=+1 intent_bytes=+12 separated_intent_count=+1 intent_age=+94 +>> at end: +txn: "A" meta={id=00000000 key=/Min pri=0.00000000 epo=0 ts=6.000000000,0 min=0,0 seq=0} lock=true stat=PENDING rts=6.000000000,0 wto=false gul=0,0 +rangekey: {g-s}/[4.000000000,0=/ 3.000000000,0=/] +meta: "a"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=6.000000000,0 min=0,0 seq=0} ts=6.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true +data: "a"/6.000000000,0 -> /BYTES/a6 +meta: "b"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=6.000000000,0 min=0,0 seq=0} ts=6.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true +data: "b"/6.000000000,0 -> /BYTES/b6 +data: "b"/1.000000000,0 -> /BYTES/b1 +meta: "c"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=6.000000000,0 min=0,0 seq=0} ts=6.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true +data: "c"/6.000000000,0 -> /BYTES/c6 +data: "c"/1.000000000,0 -> / +meta: "d"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=6.000000000,0 min=0,0 seq=0} ts=6.000000000,0 del=true klen=12 vlen=0 mergeTs= txnDidNotUpdateMeta=true +data: "d"/6.000000000,0 -> / +meta: "e"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=6.000000000,0 min=0,0 seq=0} ts=6.000000000,0 del=true klen=12 vlen=0 mergeTs= txnDidNotUpdateMeta=true +data: "e"/6.000000000,0 -> / +data: "e"/1.000000000,0 -> /BYTES/e1 +meta: "f"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=6.000000000,0 min=0,0 seq=0} ts=6.000000000,0 del=true klen=12 vlen=0 mergeTs= txnDidNotUpdateMeta=true +data: "f"/6.000000000,0 -> / +data: "f"/1.000000000,0 -> / +meta: "g"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=6.000000000,0 min=0,0 seq=0} ts=6.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true +data: "g"/6.000000000,0 -> /BYTES/g6 +data: "g"/1.000000000,0 -> /BYTES/g1 +meta: "h"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=6.000000000,0 min=0,0 seq=0} ts=6.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true +data: "h"/6.000000000,0 -> /BYTES/h6 +data: "h"/1.000000000,0 -> / +meta: "i"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=6.000000000,0 min=0,0 seq=0} ts=6.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true +data: "i"/6.000000000,0 -> /BYTES/i6 +data: "i"/1.000000000,0 -> /BYTES/i1 +meta: "j"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=6.000000000,0 min=0,0 seq=0} ts=6.000000000,0 del=true klen=12 vlen=0 mergeTs= txnDidNotUpdateMeta=true +data: "j"/6.000000000,0 -> / +data: "j"/1.000000000,0 -> / +meta: "k"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=6.000000000,0 min=0,0 seq=0} ts=6.000000000,0 del=true klen=12 vlen=0 mergeTs= txnDidNotUpdateMeta=true +data: "k"/6.000000000,0 -> / +meta: "l"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=6.000000000,0 min=0,0 seq=0} ts=6.000000000,0 del=true klen=12 vlen=0 mergeTs= txnDidNotUpdateMeta=true +data: "l"/6.000000000,0 -> / +meta: "m"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=6.000000000,0 min=0,0 seq=0} ts=6.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true +data: "m"/6.000000000,0 -> /BYTES/m6 +meta: "n"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=6.000000000,0 min=0,0 seq=0} ts=6.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true +data: "n"/6.000000000,0 -> /BYTES/n6 +data: "n"/5.000000000,0 -> /BYTES/n5 +meta: "o"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=6.000000000,0 min=0,0 seq=0} ts=6.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true +data: "o"/6.000000000,0 -> /BYTES/o6 +data: "o"/5.000000000,0 -> / +meta: "p"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=6.000000000,0 min=0,0 seq=0} ts=6.000000000,0 del=true klen=12 vlen=0 mergeTs= txnDidNotUpdateMeta=true +data: "p"/6.000000000,0 -> / +meta: "q"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=6.000000000,0 min=0,0 seq=0} ts=6.000000000,0 del=true klen=12 vlen=0 mergeTs= txnDidNotUpdateMeta=true +data: "q"/6.000000000,0 -> / +data: "q"/5.000000000,0 -> /BYTES/q5 +meta: "r"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=6.000000000,0 min=0,0 seq=0} ts=6.000000000,0 del=true klen=12 vlen=0 mergeTs= txnDidNotUpdateMeta=true +data: "r"/6.000000000,0 -> / +data: "r"/5.000000000,0 -> / +stats: key_count=18 key_bytes=396 val_count=30 val_bytes=969 range_key_count=1 range_key_bytes=22 range_val_count=2 live_count=9 live_bytes=621 gc_bytes_age=72435 intent_count=18 intent_bytes=279 separated_intent_count=18 intent_age=1692 + +run stats ok +with t=A status=COMMITTED + txn_advance ts=7 + resolve_intent k=a + resolve_intent k=b + resolve_intent k=c + resolve_intent k=d + resolve_intent k=e + resolve_intent k=f + resolve_intent k=g + resolve_intent k=h + resolve_intent k=i + resolve_intent k=j + resolve_intent k=k + resolve_intent k=l + resolve_intent k=m + resolve_intent k=n + resolve_intent k=o + resolve_intent k=p + resolve_intent k=q + resolve_intent k=r +---- +>> resolve_intent k=a t=A status=COMMITTED +stats: val_bytes=-35 live_bytes=-35 intent_count=-1 intent_bytes=-19 separated_intent_count=-1 intent_age=-94 +>> resolve_intent k=b t=A status=COMMITTED +stats: val_bytes=-35 live_bytes=-35 gc_bytes_age=-19 intent_count=-1 intent_bytes=-19 separated_intent_count=-1 intent_age=-94 +>> resolve_intent k=c t=A status=COMMITTED +stats: val_bytes=-35 live_bytes=-35 intent_count=-1 intent_bytes=-19 separated_intent_count=-1 intent_age=-94 +>> resolve_intent k=d t=A status=COMMITTED +stats: val_bytes=-35 gc_bytes_age=-3317 intent_count=-1 intent_bytes=-12 separated_intent_count=-1 intent_age=-94 +>> resolve_intent k=e t=A status=COMMITTED +stats: val_bytes=-35 gc_bytes_age=-3336 intent_count=-1 intent_bytes=-12 separated_intent_count=-1 intent_age=-94 +>> resolve_intent k=f t=A status=COMMITTED +stats: val_bytes=-35 gc_bytes_age=-3317 intent_count=-1 intent_bytes=-12 separated_intent_count=-1 intent_age=-94 +>> resolve_intent k=g t=A status=COMMITTED +stats: val_bytes=-35 live_bytes=-35 intent_count=-1 intent_bytes=-19 separated_intent_count=-1 intent_age=-94 +>> resolve_intent k=h t=A status=COMMITTED +stats: val_bytes=-35 live_bytes=-35 intent_count=-1 intent_bytes=-19 separated_intent_count=-1 intent_age=-94 +>> resolve_intent k=i t=A status=COMMITTED +stats: val_bytes=-35 live_bytes=-35 intent_count=-1 intent_bytes=-19 separated_intent_count=-1 intent_age=-94 +>> resolve_intent k=j t=A status=COMMITTED +stats: val_bytes=-35 gc_bytes_age=-3317 intent_count=-1 intent_bytes=-12 separated_intent_count=-1 intent_age=-94 +>> resolve_intent k=k t=A status=COMMITTED +stats: val_bytes=-35 gc_bytes_age=-3317 intent_count=-1 intent_bytes=-12 separated_intent_count=-1 intent_age=-94 +>> resolve_intent k=l t=A status=COMMITTED +stats: val_bytes=-35 gc_bytes_age=-3317 intent_count=-1 intent_bytes=-12 separated_intent_count=-1 intent_age=-94 +>> resolve_intent k=m t=A status=COMMITTED +stats: val_bytes=-35 live_bytes=-35 intent_count=-1 intent_bytes=-19 separated_intent_count=-1 intent_age=-94 +>> resolve_intent k=n t=A status=COMMITTED +stats: val_bytes=-35 live_bytes=-35 gc_bytes_age=-19 intent_count=-1 intent_bytes=-19 separated_intent_count=-1 intent_age=-94 +>> resolve_intent k=o t=A status=COMMITTED +stats: val_bytes=-35 live_bytes=-35 intent_count=-1 intent_bytes=-19 separated_intent_count=-1 intent_age=-94 +>> resolve_intent k=p t=A status=COMMITTED +stats: val_bytes=-35 gc_bytes_age=-3317 intent_count=-1 intent_bytes=-12 separated_intent_count=-1 intent_age=-94 +>> resolve_intent k=q t=A status=COMMITTED +stats: val_bytes=-35 gc_bytes_age=-3336 intent_count=-1 intent_bytes=-12 separated_intent_count=-1 intent_age=-94 +>> resolve_intent k=r t=A status=COMMITTED +stats: val_bytes=-35 gc_bytes_age=-3317 intent_count=-1 intent_bytes=-12 separated_intent_count=-1 intent_age=-94 +>> at end: +txn: "A" meta={id=00000000 key=/Min pri=0.00000000 epo=0 ts=7.000000000,0 min=0,0 seq=0} lock=true stat=PENDING rts=6.000000000,0 wto=false gul=0,0 +rangekey: {g-s}/[4.000000000,0=/ 3.000000000,0=/] +data: "a"/7.000000000,0 -> {localTs=6.000000000,0}/BYTES/a6 +data: "b"/7.000000000,0 -> {localTs=6.000000000,0}/BYTES/b6 +data: "b"/1.000000000,0 -> /BYTES/b1 +data: "c"/7.000000000,0 -> {localTs=6.000000000,0}/BYTES/c6 +data: "c"/1.000000000,0 -> / +data: "d"/7.000000000,0 -> {localTs=6.000000000,0}/ +data: "e"/7.000000000,0 -> {localTs=6.000000000,0}/ +data: "e"/1.000000000,0 -> /BYTES/e1 +data: "f"/7.000000000,0 -> {localTs=6.000000000,0}/ +data: "f"/1.000000000,0 -> / +data: "g"/7.000000000,0 -> {localTs=6.000000000,0}/BYTES/g6 +data: "g"/1.000000000,0 -> /BYTES/g1 +data: "h"/7.000000000,0 -> {localTs=6.000000000,0}/BYTES/h6 +data: "h"/1.000000000,0 -> / +data: "i"/7.000000000,0 -> {localTs=6.000000000,0}/BYTES/i6 +data: "i"/1.000000000,0 -> /BYTES/i1 +data: "j"/7.000000000,0 -> {localTs=6.000000000,0}/ +data: "j"/1.000000000,0 -> / +data: "k"/7.000000000,0 -> {localTs=6.000000000,0}/ +data: "l"/7.000000000,0 -> {localTs=6.000000000,0}/ +data: "m"/7.000000000,0 -> {localTs=6.000000000,0}/BYTES/m6 +data: "n"/7.000000000,0 -> {localTs=6.000000000,0}/BYTES/n6 +data: "n"/5.000000000,0 -> /BYTES/n5 +data: "o"/7.000000000,0 -> {localTs=6.000000000,0}/BYTES/o6 +data: "o"/5.000000000,0 -> / +data: "p"/7.000000000,0 -> {localTs=6.000000000,0}/ +data: "q"/7.000000000,0 -> {localTs=6.000000000,0}/ +data: "q"/5.000000000,0 -> /BYTES/q5 +data: "r"/7.000000000,0 -> {localTs=6.000000000,0}/ +data: "r"/5.000000000,0 -> / +stats: key_count=18 key_bytes=396 val_count=30 val_bytes=339 range_key_count=1 range_key_bytes=22 range_val_count=2 live_count=9 live_bytes=306 gc_bytes_age=42506 diff --git a/pkg/storage/testdata/mvcc_histories/range_tombstone_stats_intent_resolve_range_abort b/pkg/storage/testdata/mvcc_histories/range_tombstone_stats_intent_resolve_range_abort new file mode 100644 index 000000000000..0bab1efcbfba --- /dev/null +++ b/pkg/storage/testdata/mvcc_histories/range_tombstone_stats_intent_resolve_range_abort @@ -0,0 +1,198 @@ +# Tests MVCC stats calculations when resolving intents. Intermediate states are +# tested through stats traces. Initial state: +# +# (x is tombstone, o---o is range tombstone, [] is intent) +# +# 7 +# 6 [a6][b6][c6][x] [x] [x] [g6][h6][i6][x] [x] [x] [m6][n6][o6] x x x +# 5 n5 x q5 x +# 4 o-----------------------------------------------o +# 3 o-----------------------------------------------o +# 2 +# 1 b1 x e1 x h1 x k1 x +# a b c d e f g h i j k l m n o p q r s +# +# This uses two range tombstones, since the lowest is the one that matters for +# point key GCBytesAge. It also uses points below/above range tombstones, +# because iterators surface range keys separately from point keys, which can +# cause bugs if callers don't step onto the point key. +# +# TODO(erikgrinaker): This is probably better handled by randomized or +# generative testing, since the combinations are getting unwieldy. But it'll do +# for now. + +run stats ok +with ts=1 + put k=b v=b1 + del k=c + put k=e v=e1 + del k=f + put k=g v=g1 + del k=h + put k=i v=i1 + del k=j +del_range_ts k=g end=s ts=3 +del_range_ts k=g end=s ts=4 +with ts=5 + put k=n v=n5 + del k=o + put k=q v=q5 + del k=r +with t=A + txn_begin ts=6 + put k=a v=a6 + put k=b v=b6 + put k=c v=c6 + del k=d + del k=e + del k=f + put k=g v=g6 + put k=h v=h6 + put k=i v=i6 + del k=j + del k=k + del k=l + put k=m v=m6 + put k=n v=n6 + put k=o v=o6 + del k=p + del k=q + del k=r +---- +>> put k=b v=b1 ts=1 +stats: key_count=+1 key_bytes=+14 val_count=+1 val_bytes=+7 live_count=+1 live_bytes=+21 +>> del k=c ts=1 +stats: key_count=+1 key_bytes=+14 val_count=+1 gc_bytes_age=+1386 +>> put k=e v=e1 ts=1 +stats: key_count=+1 key_bytes=+14 val_count=+1 val_bytes=+7 live_count=+1 live_bytes=+21 +>> del k=f ts=1 +stats: key_count=+1 key_bytes=+14 val_count=+1 gc_bytes_age=+1386 +>> put k=g v=g1 ts=1 +stats: key_count=+1 key_bytes=+14 val_count=+1 val_bytes=+7 live_count=+1 live_bytes=+21 +>> del k=h ts=1 +stats: key_count=+1 key_bytes=+14 val_count=+1 gc_bytes_age=+1386 +>> put k=i v=i1 ts=1 +stats: key_count=+1 key_bytes=+14 val_count=+1 val_bytes=+7 live_count=+1 live_bytes=+21 +>> del k=j ts=1 +stats: key_count=+1 key_bytes=+14 val_count=+1 gc_bytes_age=+1386 +>> del_range_ts k=g end=s ts=3 +stats: range_key_count=+1 range_key_bytes=+13 range_val_count=+1 live_count=-2 live_bytes=-42 gc_bytes_age=+5335 +>> del_range_ts k=g end=s ts=4 +stats: range_key_bytes=+9 range_val_count=+1 gc_bytes_age=+860 +>> put k=n v=n5 ts=5 +stats: key_count=+1 key_bytes=+14 val_count=+1 val_bytes=+7 live_count=+1 live_bytes=+21 +>> del k=o ts=5 +stats: key_count=+1 key_bytes=+14 val_count=+1 gc_bytes_age=+1330 +>> put k=q v=q5 ts=5 +stats: key_count=+1 key_bytes=+14 val_count=+1 val_bytes=+7 live_count=+1 live_bytes=+21 +>> del k=r ts=5 +stats: key_count=+1 key_bytes=+14 val_count=+1 gc_bytes_age=+1330 +>> put k=a v=a6 t=A +stats: key_count=+1 key_bytes=+14 val_count=+1 val_bytes=+55 live_count=+1 live_bytes=+69 intent_count=+1 intent_bytes=+19 separated_intent_count=+1 intent_age=+94 +>> put k=b v=b6 t=A +stats: key_bytes=+12 val_count=+1 val_bytes=+55 live_bytes=+48 gc_bytes_age=+1786 intent_count=+1 intent_bytes=+19 separated_intent_count=+1 intent_age=+94 +>> put k=c v=c6 t=A +stats: key_bytes=+12 val_count=+1 val_bytes=+55 live_count=+1 live_bytes=+69 gc_bytes_age=-198 intent_count=+1 intent_bytes=+19 separated_intent_count=+1 intent_age=+94 +>> del k=d t=A +stats: key_count=+1 key_bytes=+14 val_count=+1 val_bytes=+48 gc_bytes_age=+5828 intent_count=+1 intent_bytes=+12 separated_intent_count=+1 intent_age=+94 +>> del k=e t=A +stats: key_bytes=+12 val_count=+1 val_bytes=+48 live_count=-1 live_bytes=-21 gc_bytes_age=+7614 intent_count=+1 intent_bytes=+12 separated_intent_count=+1 intent_age=+94 +>> del k=f t=A +stats: key_bytes=+12 val_count=+1 val_bytes=+48 gc_bytes_age=+5630 intent_count=+1 intent_bytes=+12 separated_intent_count=+1 intent_age=+94 +>> put k=g v=g6 t=A +stats: key_bytes=+12 val_count=+1 val_bytes=+55 live_count=+1 live_bytes=+69 gc_bytes_age=-194 intent_count=+1 intent_bytes=+19 separated_intent_count=+1 intent_age=+94 +>> put k=h v=h6 t=A +stats: key_bytes=+12 val_count=+1 val_bytes=+55 live_count=+1 live_bytes=+69 gc_bytes_age=-198 intent_count=+1 intent_bytes=+19 separated_intent_count=+1 intent_age=+94 +>> put k=i v=i6 t=A +stats: key_bytes=+12 val_count=+1 val_bytes=+55 live_count=+1 live_bytes=+69 gc_bytes_age=-194 intent_count=+1 intent_bytes=+19 separated_intent_count=+1 intent_age=+94 +>> del k=j t=A +stats: key_bytes=+12 val_count=+1 val_bytes=+48 gc_bytes_age=+5630 intent_count=+1 intent_bytes=+12 separated_intent_count=+1 intent_age=+94 +>> del k=k t=A +stats: key_count=+1 key_bytes=+14 val_count=+1 val_bytes=+48 gc_bytes_age=+5828 intent_count=+1 intent_bytes=+12 separated_intent_count=+1 intent_age=+94 +>> del k=l t=A +stats: key_count=+1 key_bytes=+14 val_count=+1 val_bytes=+48 gc_bytes_age=+5828 intent_count=+1 intent_bytes=+12 separated_intent_count=+1 intent_age=+94 +>> put k=m v=m6 t=A +stats: key_count=+1 key_bytes=+14 val_count=+1 val_bytes=+55 live_count=+1 live_bytes=+69 intent_count=+1 intent_bytes=+19 separated_intent_count=+1 intent_age=+94 +>> put k=n v=n6 t=A +stats: key_bytes=+12 val_count=+1 val_bytes=+55 live_bytes=+48 gc_bytes_age=+1786 intent_count=+1 intent_bytes=+19 separated_intent_count=+1 intent_age=+94 +>> put k=o v=o6 t=A +stats: key_bytes=+12 val_count=+1 val_bytes=+55 live_count=+1 live_bytes=+69 gc_bytes_age=-190 intent_count=+1 intent_bytes=+19 separated_intent_count=+1 intent_age=+94 +>> del k=p t=A +stats: key_count=+1 key_bytes=+14 val_count=+1 val_bytes=+48 gc_bytes_age=+5828 intent_count=+1 intent_bytes=+12 separated_intent_count=+1 intent_age=+94 +>> del k=q t=A +stats: key_bytes=+12 val_count=+1 val_bytes=+48 live_count=-1 live_bytes=-21 gc_bytes_age=+7614 intent_count=+1 intent_bytes=+12 separated_intent_count=+1 intent_age=+94 +>> del k=r t=A +stats: key_bytes=+12 val_count=+1 val_bytes=+48 gc_bytes_age=+5638 intent_count=+1 intent_bytes=+12 separated_intent_count=+1 intent_age=+94 +>> at end: +txn: "A" meta={id=00000000 key=/Min pri=0.00000000 epo=0 ts=6.000000000,0 min=0,0 seq=0} lock=true stat=PENDING rts=6.000000000,0 wto=false gul=0,0 +rangekey: {g-s}/[4.000000000,0=/ 3.000000000,0=/] +meta: "a"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=6.000000000,0 min=0,0 seq=0} ts=6.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true +data: "a"/6.000000000,0 -> /BYTES/a6 +meta: "b"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=6.000000000,0 min=0,0 seq=0} ts=6.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true +data: "b"/6.000000000,0 -> /BYTES/b6 +data: "b"/1.000000000,0 -> /BYTES/b1 +meta: "c"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=6.000000000,0 min=0,0 seq=0} ts=6.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true +data: "c"/6.000000000,0 -> /BYTES/c6 +data: "c"/1.000000000,0 -> / +meta: "d"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=6.000000000,0 min=0,0 seq=0} ts=6.000000000,0 del=true klen=12 vlen=0 mergeTs= txnDidNotUpdateMeta=true +data: "d"/6.000000000,0 -> / +meta: "e"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=6.000000000,0 min=0,0 seq=0} ts=6.000000000,0 del=true klen=12 vlen=0 mergeTs= txnDidNotUpdateMeta=true +data: "e"/6.000000000,0 -> / +data: "e"/1.000000000,0 -> /BYTES/e1 +meta: "f"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=6.000000000,0 min=0,0 seq=0} ts=6.000000000,0 del=true klen=12 vlen=0 mergeTs= txnDidNotUpdateMeta=true +data: "f"/6.000000000,0 -> / +data: "f"/1.000000000,0 -> / +meta: "g"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=6.000000000,0 min=0,0 seq=0} ts=6.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true +data: "g"/6.000000000,0 -> /BYTES/g6 +data: "g"/1.000000000,0 -> /BYTES/g1 +meta: "h"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=6.000000000,0 min=0,0 seq=0} ts=6.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true +data: "h"/6.000000000,0 -> /BYTES/h6 +data: "h"/1.000000000,0 -> / +meta: "i"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=6.000000000,0 min=0,0 seq=0} ts=6.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true +data: "i"/6.000000000,0 -> /BYTES/i6 +data: "i"/1.000000000,0 -> /BYTES/i1 +meta: "j"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=6.000000000,0 min=0,0 seq=0} ts=6.000000000,0 del=true klen=12 vlen=0 mergeTs= txnDidNotUpdateMeta=true +data: "j"/6.000000000,0 -> / +data: "j"/1.000000000,0 -> / +meta: "k"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=6.000000000,0 min=0,0 seq=0} ts=6.000000000,0 del=true klen=12 vlen=0 mergeTs= txnDidNotUpdateMeta=true +data: "k"/6.000000000,0 -> / +meta: "l"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=6.000000000,0 min=0,0 seq=0} ts=6.000000000,0 del=true klen=12 vlen=0 mergeTs= txnDidNotUpdateMeta=true +data: "l"/6.000000000,0 -> / +meta: "m"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=6.000000000,0 min=0,0 seq=0} ts=6.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true +data: "m"/6.000000000,0 -> /BYTES/m6 +meta: "n"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=6.000000000,0 min=0,0 seq=0} ts=6.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true +data: "n"/6.000000000,0 -> /BYTES/n6 +data: "n"/5.000000000,0 -> /BYTES/n5 +meta: "o"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=6.000000000,0 min=0,0 seq=0} ts=6.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true +data: "o"/6.000000000,0 -> /BYTES/o6 +data: "o"/5.000000000,0 -> / +meta: "p"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=6.000000000,0 min=0,0 seq=0} ts=6.000000000,0 del=true klen=12 vlen=0 mergeTs= txnDidNotUpdateMeta=true +data: "p"/6.000000000,0 -> / +meta: "q"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=6.000000000,0 min=0,0 seq=0} ts=6.000000000,0 del=true klen=12 vlen=0 mergeTs= txnDidNotUpdateMeta=true +data: "q"/6.000000000,0 -> / +data: "q"/5.000000000,0 -> /BYTES/q5 +meta: "r"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=6.000000000,0 min=0,0 seq=0} ts=6.000000000,0 del=true klen=12 vlen=0 mergeTs= txnDidNotUpdateMeta=true +data: "r"/6.000000000,0 -> / +data: "r"/5.000000000,0 -> / +stats: key_count=18 key_bytes=396 val_count=30 val_bytes=969 range_key_count=1 range_key_bytes=22 range_val_count=2 live_count=9 live_bytes=621 gc_bytes_age=72435 intent_count=18 intent_bytes=279 separated_intent_count=18 intent_age=1692 + +run stats ok +resolve_intent_range t=A k=a end=z status=ABORTED +---- +>> resolve_intent_range t=A k=a end=z status=ABORTED +stats: key_count=-6 key_bytes=-228 val_count=-18 val_bytes=-927 live_count=-5 live_bytes=-537 gc_bytes_age=-58036 intent_count=-18 intent_bytes=-279 separated_intent_count=-18 intent_age=-1692 +>> at end: +rangekey: {g-s}/[4.000000000,0=/ 3.000000000,0=/] +data: "b"/1.000000000,0 -> /BYTES/b1 +data: "c"/1.000000000,0 -> / +data: "e"/1.000000000,0 -> /BYTES/e1 +data: "f"/1.000000000,0 -> / +data: "g"/1.000000000,0 -> /BYTES/g1 +data: "h"/1.000000000,0 -> / +data: "i"/1.000000000,0 -> /BYTES/i1 +data: "j"/1.000000000,0 -> / +data: "n"/5.000000000,0 -> /BYTES/n5 +data: "o"/5.000000000,0 -> / +data: "q"/5.000000000,0 -> /BYTES/q5 +data: "r"/5.000000000,0 -> / +stats: key_count=12 key_bytes=168 val_count=12 val_bytes=42 range_key_count=1 range_key_bytes=22 range_val_count=2 live_count=4 live_bytes=84 gc_bytes_age=14399 diff --git a/pkg/storage/testdata/mvcc_histories/range_tombstone_stats_intent_resolve_range_commit b/pkg/storage/testdata/mvcc_histories/range_tombstone_stats_intent_resolve_range_commit new file mode 100644 index 000000000000..5bc127d55963 --- /dev/null +++ b/pkg/storage/testdata/mvcc_histories/range_tombstone_stats_intent_resolve_range_commit @@ -0,0 +1,216 @@ +# Tests MVCC stats calculations when resolving intents. Intermediate states are +# tested through stats traces. Initial state: +# +# (x is tombstone, o---o is range tombstone, [] is intent) +# +# 7 +# 6 [a6][b6][c6][x] [x] [x] [g6][h6][i6][x] [x] [x] [m6][n6][o6] x x x +# 5 n5 x q5 x +# 4 o-----------------------------------------------o +# 3 o-----------------------------------------------o +# 2 +# 1 b1 x e1 x h1 x k1 x +# a b c d e f g h i j k l m n o p q r s +# +# This uses two range tombstones, since the lowest is the one that matters for +# point key GCBytesAge. It also uses points below/above range tombstones, +# because iterators surface range keys separately from point keys, which can +# cause bugs if callers don't step onto the point key. +# +# TODO(erikgrinaker): This is probably better handled by randomized or +# generative testing, since the combinations are getting unwieldy. But it'll do +# for now. + +run stats ok +with ts=1 + put k=b v=b1 + del k=c + put k=e v=e1 + del k=f + put k=g v=g1 + del k=h + put k=i v=i1 + del k=j +del_range_ts k=g end=s ts=3 +del_range_ts k=g end=s ts=4 +with ts=5 + put k=n v=n5 + del k=o + put k=q v=q5 + del k=r +with t=A + txn_begin ts=6 + put k=a v=a6 + put k=b v=b6 + put k=c v=c6 + del k=d + del k=e + del k=f + put k=g v=g6 + put k=h v=h6 + put k=i v=i6 + del k=j + del k=k + del k=l + put k=m v=m6 + put k=n v=n6 + put k=o v=o6 + del k=p + del k=q + del k=r +---- +>> put k=b v=b1 ts=1 +stats: key_count=+1 key_bytes=+14 val_count=+1 val_bytes=+7 live_count=+1 live_bytes=+21 +>> del k=c ts=1 +stats: key_count=+1 key_bytes=+14 val_count=+1 gc_bytes_age=+1386 +>> put k=e v=e1 ts=1 +stats: key_count=+1 key_bytes=+14 val_count=+1 val_bytes=+7 live_count=+1 live_bytes=+21 +>> del k=f ts=1 +stats: key_count=+1 key_bytes=+14 val_count=+1 gc_bytes_age=+1386 +>> put k=g v=g1 ts=1 +stats: key_count=+1 key_bytes=+14 val_count=+1 val_bytes=+7 live_count=+1 live_bytes=+21 +>> del k=h ts=1 +stats: key_count=+1 key_bytes=+14 val_count=+1 gc_bytes_age=+1386 +>> put k=i v=i1 ts=1 +stats: key_count=+1 key_bytes=+14 val_count=+1 val_bytes=+7 live_count=+1 live_bytes=+21 +>> del k=j ts=1 +stats: key_count=+1 key_bytes=+14 val_count=+1 gc_bytes_age=+1386 +>> del_range_ts k=g end=s ts=3 +stats: range_key_count=+1 range_key_bytes=+13 range_val_count=+1 live_count=-2 live_bytes=-42 gc_bytes_age=+5335 +>> del_range_ts k=g end=s ts=4 +stats: range_key_bytes=+9 range_val_count=+1 gc_bytes_age=+860 +>> put k=n v=n5 ts=5 +stats: key_count=+1 key_bytes=+14 val_count=+1 val_bytes=+7 live_count=+1 live_bytes=+21 +>> del k=o ts=5 +stats: key_count=+1 key_bytes=+14 val_count=+1 gc_bytes_age=+1330 +>> put k=q v=q5 ts=5 +stats: key_count=+1 key_bytes=+14 val_count=+1 val_bytes=+7 live_count=+1 live_bytes=+21 +>> del k=r ts=5 +stats: key_count=+1 key_bytes=+14 val_count=+1 gc_bytes_age=+1330 +>> put k=a v=a6 t=A +stats: key_count=+1 key_bytes=+14 val_count=+1 val_bytes=+55 live_count=+1 live_bytes=+69 intent_count=+1 intent_bytes=+19 separated_intent_count=+1 intent_age=+94 +>> put k=b v=b6 t=A +stats: key_bytes=+12 val_count=+1 val_bytes=+55 live_bytes=+48 gc_bytes_age=+1786 intent_count=+1 intent_bytes=+19 separated_intent_count=+1 intent_age=+94 +>> put k=c v=c6 t=A +stats: key_bytes=+12 val_count=+1 val_bytes=+55 live_count=+1 live_bytes=+69 gc_bytes_age=-198 intent_count=+1 intent_bytes=+19 separated_intent_count=+1 intent_age=+94 +>> del k=d t=A +stats: key_count=+1 key_bytes=+14 val_count=+1 val_bytes=+48 gc_bytes_age=+5828 intent_count=+1 intent_bytes=+12 separated_intent_count=+1 intent_age=+94 +>> del k=e t=A +stats: key_bytes=+12 val_count=+1 val_bytes=+48 live_count=-1 live_bytes=-21 gc_bytes_age=+7614 intent_count=+1 intent_bytes=+12 separated_intent_count=+1 intent_age=+94 +>> del k=f t=A +stats: key_bytes=+12 val_count=+1 val_bytes=+48 gc_bytes_age=+5630 intent_count=+1 intent_bytes=+12 separated_intent_count=+1 intent_age=+94 +>> put k=g v=g6 t=A +stats: key_bytes=+12 val_count=+1 val_bytes=+55 live_count=+1 live_bytes=+69 gc_bytes_age=-194 intent_count=+1 intent_bytes=+19 separated_intent_count=+1 intent_age=+94 +>> put k=h v=h6 t=A +stats: key_bytes=+12 val_count=+1 val_bytes=+55 live_count=+1 live_bytes=+69 gc_bytes_age=-198 intent_count=+1 intent_bytes=+19 separated_intent_count=+1 intent_age=+94 +>> put k=i v=i6 t=A +stats: key_bytes=+12 val_count=+1 val_bytes=+55 live_count=+1 live_bytes=+69 gc_bytes_age=-194 intent_count=+1 intent_bytes=+19 separated_intent_count=+1 intent_age=+94 +>> del k=j t=A +stats: key_bytes=+12 val_count=+1 val_bytes=+48 gc_bytes_age=+5630 intent_count=+1 intent_bytes=+12 separated_intent_count=+1 intent_age=+94 +>> del k=k t=A +stats: key_count=+1 key_bytes=+14 val_count=+1 val_bytes=+48 gc_bytes_age=+5828 intent_count=+1 intent_bytes=+12 separated_intent_count=+1 intent_age=+94 +>> del k=l t=A +stats: key_count=+1 key_bytes=+14 val_count=+1 val_bytes=+48 gc_bytes_age=+5828 intent_count=+1 intent_bytes=+12 separated_intent_count=+1 intent_age=+94 +>> put k=m v=m6 t=A +stats: key_count=+1 key_bytes=+14 val_count=+1 val_bytes=+55 live_count=+1 live_bytes=+69 intent_count=+1 intent_bytes=+19 separated_intent_count=+1 intent_age=+94 +>> put k=n v=n6 t=A +stats: key_bytes=+12 val_count=+1 val_bytes=+55 live_bytes=+48 gc_bytes_age=+1786 intent_count=+1 intent_bytes=+19 separated_intent_count=+1 intent_age=+94 +>> put k=o v=o6 t=A +stats: key_bytes=+12 val_count=+1 val_bytes=+55 live_count=+1 live_bytes=+69 gc_bytes_age=-190 intent_count=+1 intent_bytes=+19 separated_intent_count=+1 intent_age=+94 +>> del k=p t=A +stats: key_count=+1 key_bytes=+14 val_count=+1 val_bytes=+48 gc_bytes_age=+5828 intent_count=+1 intent_bytes=+12 separated_intent_count=+1 intent_age=+94 +>> del k=q t=A +stats: key_bytes=+12 val_count=+1 val_bytes=+48 live_count=-1 live_bytes=-21 gc_bytes_age=+7614 intent_count=+1 intent_bytes=+12 separated_intent_count=+1 intent_age=+94 +>> del k=r t=A +stats: key_bytes=+12 val_count=+1 val_bytes=+48 gc_bytes_age=+5638 intent_count=+1 intent_bytes=+12 separated_intent_count=+1 intent_age=+94 +>> at end: +txn: "A" meta={id=00000000 key=/Min pri=0.00000000 epo=0 ts=6.000000000,0 min=0,0 seq=0} lock=true stat=PENDING rts=6.000000000,0 wto=false gul=0,0 +rangekey: {g-s}/[4.000000000,0=/ 3.000000000,0=/] +meta: "a"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=6.000000000,0 min=0,0 seq=0} ts=6.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true +data: "a"/6.000000000,0 -> /BYTES/a6 +meta: "b"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=6.000000000,0 min=0,0 seq=0} ts=6.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true +data: "b"/6.000000000,0 -> /BYTES/b6 +data: "b"/1.000000000,0 -> /BYTES/b1 +meta: "c"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=6.000000000,0 min=0,0 seq=0} ts=6.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true +data: "c"/6.000000000,0 -> /BYTES/c6 +data: "c"/1.000000000,0 -> / +meta: "d"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=6.000000000,0 min=0,0 seq=0} ts=6.000000000,0 del=true klen=12 vlen=0 mergeTs= txnDidNotUpdateMeta=true +data: "d"/6.000000000,0 -> / +meta: "e"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=6.000000000,0 min=0,0 seq=0} ts=6.000000000,0 del=true klen=12 vlen=0 mergeTs= txnDidNotUpdateMeta=true +data: "e"/6.000000000,0 -> / +data: "e"/1.000000000,0 -> /BYTES/e1 +meta: "f"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=6.000000000,0 min=0,0 seq=0} ts=6.000000000,0 del=true klen=12 vlen=0 mergeTs= txnDidNotUpdateMeta=true +data: "f"/6.000000000,0 -> / +data: "f"/1.000000000,0 -> / +meta: "g"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=6.000000000,0 min=0,0 seq=0} ts=6.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true +data: "g"/6.000000000,0 -> /BYTES/g6 +data: "g"/1.000000000,0 -> /BYTES/g1 +meta: "h"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=6.000000000,0 min=0,0 seq=0} ts=6.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true +data: "h"/6.000000000,0 -> /BYTES/h6 +data: "h"/1.000000000,0 -> / +meta: "i"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=6.000000000,0 min=0,0 seq=0} ts=6.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true +data: "i"/6.000000000,0 -> /BYTES/i6 +data: "i"/1.000000000,0 -> /BYTES/i1 +meta: "j"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=6.000000000,0 min=0,0 seq=0} ts=6.000000000,0 del=true klen=12 vlen=0 mergeTs= txnDidNotUpdateMeta=true +data: "j"/6.000000000,0 -> / +data: "j"/1.000000000,0 -> / +meta: "k"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=6.000000000,0 min=0,0 seq=0} ts=6.000000000,0 del=true klen=12 vlen=0 mergeTs= txnDidNotUpdateMeta=true +data: "k"/6.000000000,0 -> / +meta: "l"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=6.000000000,0 min=0,0 seq=0} ts=6.000000000,0 del=true klen=12 vlen=0 mergeTs= txnDidNotUpdateMeta=true +data: "l"/6.000000000,0 -> / +meta: "m"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=6.000000000,0 min=0,0 seq=0} ts=6.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true +data: "m"/6.000000000,0 -> /BYTES/m6 +meta: "n"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=6.000000000,0 min=0,0 seq=0} ts=6.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true +data: "n"/6.000000000,0 -> /BYTES/n6 +data: "n"/5.000000000,0 -> /BYTES/n5 +meta: "o"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=6.000000000,0 min=0,0 seq=0} ts=6.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true +data: "o"/6.000000000,0 -> /BYTES/o6 +data: "o"/5.000000000,0 -> / +meta: "p"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=6.000000000,0 min=0,0 seq=0} ts=6.000000000,0 del=true klen=12 vlen=0 mergeTs= txnDidNotUpdateMeta=true +data: "p"/6.000000000,0 -> / +meta: "q"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=6.000000000,0 min=0,0 seq=0} ts=6.000000000,0 del=true klen=12 vlen=0 mergeTs= txnDidNotUpdateMeta=true +data: "q"/6.000000000,0 -> / +data: "q"/5.000000000,0 -> /BYTES/q5 +meta: "r"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=6.000000000,0 min=0,0 seq=0} ts=6.000000000,0 del=true klen=12 vlen=0 mergeTs= txnDidNotUpdateMeta=true +data: "r"/6.000000000,0 -> / +data: "r"/5.000000000,0 -> / +stats: key_count=18 key_bytes=396 val_count=30 val_bytes=969 range_key_count=1 range_key_bytes=22 range_val_count=2 live_count=9 live_bytes=621 gc_bytes_age=72435 intent_count=18 intent_bytes=279 separated_intent_count=18 intent_age=1692 + +run stats ok +resolve_intent_range t=A k=a end=z status=COMMITTED +---- +>> resolve_intent_range t=A k=a end=z status=COMMITTED +stats: val_bytes=-864 live_bytes=-432 gc_bytes_age=-40608 intent_count=-18 intent_bytes=-279 separated_intent_count=-18 intent_age=-1692 +>> at end: +rangekey: {g-s}/[4.000000000,0=/ 3.000000000,0=/] +data: "a"/6.000000000,0 -> /BYTES/a6 +data: "b"/6.000000000,0 -> /BYTES/b6 +data: "b"/1.000000000,0 -> /BYTES/b1 +data: "c"/6.000000000,0 -> /BYTES/c6 +data: "c"/1.000000000,0 -> / +data: "d"/6.000000000,0 -> / +data: "e"/6.000000000,0 -> / +data: "e"/1.000000000,0 -> /BYTES/e1 +data: "f"/6.000000000,0 -> / +data: "f"/1.000000000,0 -> / +data: "g"/6.000000000,0 -> /BYTES/g6 +data: "g"/1.000000000,0 -> /BYTES/g1 +data: "h"/6.000000000,0 -> /BYTES/h6 +data: "h"/1.000000000,0 -> / +data: "i"/6.000000000,0 -> /BYTES/i6 +data: "i"/1.000000000,0 -> /BYTES/i1 +data: "j"/6.000000000,0 -> / +data: "j"/1.000000000,0 -> / +data: "k"/6.000000000,0 -> / +data: "l"/6.000000000,0 -> / +data: "m"/6.000000000,0 -> /BYTES/m6 +data: "n"/6.000000000,0 -> /BYTES/n6 +data: "n"/5.000000000,0 -> /BYTES/n5 +data: "o"/6.000000000,0 -> /BYTES/o6 +data: "o"/5.000000000,0 -> / +data: "p"/6.000000000,0 -> / +data: "q"/6.000000000,0 -> / +data: "q"/5.000000000,0 -> /BYTES/q5 +data: "r"/6.000000000,0 -> / +data: "r"/5.000000000,0 -> / +stats: key_count=18 key_bytes=396 val_count=30 val_bytes=105 range_key_count=1 range_key_bytes=22 range_val_count=2 live_count=9 live_bytes=189 gc_bytes_age=31827 diff --git a/pkg/storage/testdata/mvcc_histories/range_tombstone_stats_intent_resolve_range_pushed b/pkg/storage/testdata/mvcc_histories/range_tombstone_stats_intent_resolve_range_pushed new file mode 100644 index 000000000000..454076c6037c --- /dev/null +++ b/pkg/storage/testdata/mvcc_histories/range_tombstone_stats_intent_resolve_range_pushed @@ -0,0 +1,218 @@ +# Tests MVCC stats calculations when resolving intents. Intermediate states are +# tested through stats traces. Initial state: +# +# (x is tombstone, o---o is range tombstone, [] is intent) +# +# 7 +# 6 [a6][b6][c6][x] [x] [x] [g6][h6][i6][x] [x] [x] [m6][n6][o6] x x x +# 5 n5 x q5 x +# 4 o-----------------------------------------------o +# 3 o-----------------------------------------------o +# 2 +# 1 b1 x e1 x h1 x k1 x +# a b c d e f g h i j k l m n o p q r s +# +# This uses two range tombstones, since the lowest is the one that matters for +# point key GCBytesAge. It also uses points below/above range tombstones, +# because iterators surface range keys separately from point keys, which can +# cause bugs if callers don't step onto the point key. +# +# TODO(erikgrinaker): This is probably better handled by randomized or +# generative testing, since the combinations are getting unwieldy. But it'll do +# for now. + +run stats ok +with ts=1 + put k=b v=b1 + del k=c + put k=e v=e1 + del k=f + put k=g v=g1 + del k=h + put k=i v=i1 + del k=j +del_range_ts k=g end=s ts=3 +del_range_ts k=g end=s ts=4 +with ts=5 + put k=n v=n5 + del k=o + put k=q v=q5 + del k=r +with t=A + txn_begin ts=6 + put k=a v=a6 + put k=b v=b6 + put k=c v=c6 + del k=d + del k=e + del k=f + put k=g v=g6 + put k=h v=h6 + put k=i v=i6 + del k=j + del k=k + del k=l + put k=m v=m6 + put k=n v=n6 + put k=o v=o6 + del k=p + del k=q + del k=r +---- +>> put k=b v=b1 ts=1 +stats: key_count=+1 key_bytes=+14 val_count=+1 val_bytes=+7 live_count=+1 live_bytes=+21 +>> del k=c ts=1 +stats: key_count=+1 key_bytes=+14 val_count=+1 gc_bytes_age=+1386 +>> put k=e v=e1 ts=1 +stats: key_count=+1 key_bytes=+14 val_count=+1 val_bytes=+7 live_count=+1 live_bytes=+21 +>> del k=f ts=1 +stats: key_count=+1 key_bytes=+14 val_count=+1 gc_bytes_age=+1386 +>> put k=g v=g1 ts=1 +stats: key_count=+1 key_bytes=+14 val_count=+1 val_bytes=+7 live_count=+1 live_bytes=+21 +>> del k=h ts=1 +stats: key_count=+1 key_bytes=+14 val_count=+1 gc_bytes_age=+1386 +>> put k=i v=i1 ts=1 +stats: key_count=+1 key_bytes=+14 val_count=+1 val_bytes=+7 live_count=+1 live_bytes=+21 +>> del k=j ts=1 +stats: key_count=+1 key_bytes=+14 val_count=+1 gc_bytes_age=+1386 +>> del_range_ts k=g end=s ts=3 +stats: range_key_count=+1 range_key_bytes=+13 range_val_count=+1 live_count=-2 live_bytes=-42 gc_bytes_age=+5335 +>> del_range_ts k=g end=s ts=4 +stats: range_key_bytes=+9 range_val_count=+1 gc_bytes_age=+860 +>> put k=n v=n5 ts=5 +stats: key_count=+1 key_bytes=+14 val_count=+1 val_bytes=+7 live_count=+1 live_bytes=+21 +>> del k=o ts=5 +stats: key_count=+1 key_bytes=+14 val_count=+1 gc_bytes_age=+1330 +>> put k=q v=q5 ts=5 +stats: key_count=+1 key_bytes=+14 val_count=+1 val_bytes=+7 live_count=+1 live_bytes=+21 +>> del k=r ts=5 +stats: key_count=+1 key_bytes=+14 val_count=+1 gc_bytes_age=+1330 +>> put k=a v=a6 t=A +stats: key_count=+1 key_bytes=+14 val_count=+1 val_bytes=+55 live_count=+1 live_bytes=+69 intent_count=+1 intent_bytes=+19 separated_intent_count=+1 intent_age=+94 +>> put k=b v=b6 t=A +stats: key_bytes=+12 val_count=+1 val_bytes=+55 live_bytes=+48 gc_bytes_age=+1786 intent_count=+1 intent_bytes=+19 separated_intent_count=+1 intent_age=+94 +>> put k=c v=c6 t=A +stats: key_bytes=+12 val_count=+1 val_bytes=+55 live_count=+1 live_bytes=+69 gc_bytes_age=-198 intent_count=+1 intent_bytes=+19 separated_intent_count=+1 intent_age=+94 +>> del k=d t=A +stats: key_count=+1 key_bytes=+14 val_count=+1 val_bytes=+48 gc_bytes_age=+5828 intent_count=+1 intent_bytes=+12 separated_intent_count=+1 intent_age=+94 +>> del k=e t=A +stats: key_bytes=+12 val_count=+1 val_bytes=+48 live_count=-1 live_bytes=-21 gc_bytes_age=+7614 intent_count=+1 intent_bytes=+12 separated_intent_count=+1 intent_age=+94 +>> del k=f t=A +stats: key_bytes=+12 val_count=+1 val_bytes=+48 gc_bytes_age=+5630 intent_count=+1 intent_bytes=+12 separated_intent_count=+1 intent_age=+94 +>> put k=g v=g6 t=A +stats: key_bytes=+12 val_count=+1 val_bytes=+55 live_count=+1 live_bytes=+69 gc_bytes_age=-194 intent_count=+1 intent_bytes=+19 separated_intent_count=+1 intent_age=+94 +>> put k=h v=h6 t=A +stats: key_bytes=+12 val_count=+1 val_bytes=+55 live_count=+1 live_bytes=+69 gc_bytes_age=-198 intent_count=+1 intent_bytes=+19 separated_intent_count=+1 intent_age=+94 +>> put k=i v=i6 t=A +stats: key_bytes=+12 val_count=+1 val_bytes=+55 live_count=+1 live_bytes=+69 gc_bytes_age=-194 intent_count=+1 intent_bytes=+19 separated_intent_count=+1 intent_age=+94 +>> del k=j t=A +stats: key_bytes=+12 val_count=+1 val_bytes=+48 gc_bytes_age=+5630 intent_count=+1 intent_bytes=+12 separated_intent_count=+1 intent_age=+94 +>> del k=k t=A +stats: key_count=+1 key_bytes=+14 val_count=+1 val_bytes=+48 gc_bytes_age=+5828 intent_count=+1 intent_bytes=+12 separated_intent_count=+1 intent_age=+94 +>> del k=l t=A +stats: key_count=+1 key_bytes=+14 val_count=+1 val_bytes=+48 gc_bytes_age=+5828 intent_count=+1 intent_bytes=+12 separated_intent_count=+1 intent_age=+94 +>> put k=m v=m6 t=A +stats: key_count=+1 key_bytes=+14 val_count=+1 val_bytes=+55 live_count=+1 live_bytes=+69 intent_count=+1 intent_bytes=+19 separated_intent_count=+1 intent_age=+94 +>> put k=n v=n6 t=A +stats: key_bytes=+12 val_count=+1 val_bytes=+55 live_bytes=+48 gc_bytes_age=+1786 intent_count=+1 intent_bytes=+19 separated_intent_count=+1 intent_age=+94 +>> put k=o v=o6 t=A +stats: key_bytes=+12 val_count=+1 val_bytes=+55 live_count=+1 live_bytes=+69 gc_bytes_age=-190 intent_count=+1 intent_bytes=+19 separated_intent_count=+1 intent_age=+94 +>> del k=p t=A +stats: key_count=+1 key_bytes=+14 val_count=+1 val_bytes=+48 gc_bytes_age=+5828 intent_count=+1 intent_bytes=+12 separated_intent_count=+1 intent_age=+94 +>> del k=q t=A +stats: key_bytes=+12 val_count=+1 val_bytes=+48 live_count=-1 live_bytes=-21 gc_bytes_age=+7614 intent_count=+1 intent_bytes=+12 separated_intent_count=+1 intent_age=+94 +>> del k=r t=A +stats: key_bytes=+12 val_count=+1 val_bytes=+48 gc_bytes_age=+5638 intent_count=+1 intent_bytes=+12 separated_intent_count=+1 intent_age=+94 +>> at end: +txn: "A" meta={id=00000000 key=/Min pri=0.00000000 epo=0 ts=6.000000000,0 min=0,0 seq=0} lock=true stat=PENDING rts=6.000000000,0 wto=false gul=0,0 +rangekey: {g-s}/[4.000000000,0=/ 3.000000000,0=/] +meta: "a"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=6.000000000,0 min=0,0 seq=0} ts=6.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true +data: "a"/6.000000000,0 -> /BYTES/a6 +meta: "b"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=6.000000000,0 min=0,0 seq=0} ts=6.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true +data: "b"/6.000000000,0 -> /BYTES/b6 +data: "b"/1.000000000,0 -> /BYTES/b1 +meta: "c"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=6.000000000,0 min=0,0 seq=0} ts=6.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true +data: "c"/6.000000000,0 -> /BYTES/c6 +data: "c"/1.000000000,0 -> / +meta: "d"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=6.000000000,0 min=0,0 seq=0} ts=6.000000000,0 del=true klen=12 vlen=0 mergeTs= txnDidNotUpdateMeta=true +data: "d"/6.000000000,0 -> / +meta: "e"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=6.000000000,0 min=0,0 seq=0} ts=6.000000000,0 del=true klen=12 vlen=0 mergeTs= txnDidNotUpdateMeta=true +data: "e"/6.000000000,0 -> / +data: "e"/1.000000000,0 -> /BYTES/e1 +meta: "f"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=6.000000000,0 min=0,0 seq=0} ts=6.000000000,0 del=true klen=12 vlen=0 mergeTs= txnDidNotUpdateMeta=true +data: "f"/6.000000000,0 -> / +data: "f"/1.000000000,0 -> / +meta: "g"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=6.000000000,0 min=0,0 seq=0} ts=6.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true +data: "g"/6.000000000,0 -> /BYTES/g6 +data: "g"/1.000000000,0 -> /BYTES/g1 +meta: "h"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=6.000000000,0 min=0,0 seq=0} ts=6.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true +data: "h"/6.000000000,0 -> /BYTES/h6 +data: "h"/1.000000000,0 -> / +meta: "i"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=6.000000000,0 min=0,0 seq=0} ts=6.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true +data: "i"/6.000000000,0 -> /BYTES/i6 +data: "i"/1.000000000,0 -> /BYTES/i1 +meta: "j"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=6.000000000,0 min=0,0 seq=0} ts=6.000000000,0 del=true klen=12 vlen=0 mergeTs= txnDidNotUpdateMeta=true +data: "j"/6.000000000,0 -> / +data: "j"/1.000000000,0 -> / +meta: "k"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=6.000000000,0 min=0,0 seq=0} ts=6.000000000,0 del=true klen=12 vlen=0 mergeTs= txnDidNotUpdateMeta=true +data: "k"/6.000000000,0 -> / +meta: "l"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=6.000000000,0 min=0,0 seq=0} ts=6.000000000,0 del=true klen=12 vlen=0 mergeTs= txnDidNotUpdateMeta=true +data: "l"/6.000000000,0 -> / +meta: "m"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=6.000000000,0 min=0,0 seq=0} ts=6.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true +data: "m"/6.000000000,0 -> /BYTES/m6 +meta: "n"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=6.000000000,0 min=0,0 seq=0} ts=6.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true +data: "n"/6.000000000,0 -> /BYTES/n6 +data: "n"/5.000000000,0 -> /BYTES/n5 +meta: "o"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=6.000000000,0 min=0,0 seq=0} ts=6.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true +data: "o"/6.000000000,0 -> /BYTES/o6 +data: "o"/5.000000000,0 -> / +meta: "p"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=6.000000000,0 min=0,0 seq=0} ts=6.000000000,0 del=true klen=12 vlen=0 mergeTs= txnDidNotUpdateMeta=true +data: "p"/6.000000000,0 -> / +meta: "q"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=6.000000000,0 min=0,0 seq=0} ts=6.000000000,0 del=true klen=12 vlen=0 mergeTs= txnDidNotUpdateMeta=true +data: "q"/6.000000000,0 -> / +data: "q"/5.000000000,0 -> /BYTES/q5 +meta: "r"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=6.000000000,0 min=0,0 seq=0} ts=6.000000000,0 del=true klen=12 vlen=0 mergeTs= txnDidNotUpdateMeta=true +data: "r"/6.000000000,0 -> / +data: "r"/5.000000000,0 -> / +stats: key_count=18 key_bytes=396 val_count=30 val_bytes=969 range_key_count=1 range_key_bytes=22 range_val_count=2 live_count=9 live_bytes=621 gc_bytes_age=72435 intent_count=18 intent_bytes=279 separated_intent_count=18 intent_age=1692 + +run stats ok +txn_advance t=A ts=7 +resolve_intent_range t=A k=a end=z status=COMMITTED +---- +>> resolve_intent_range t=A k=a end=z status=COMMITTED +stats: val_bytes=-630 live_bytes=-315 gc_bytes_age=-29929 intent_count=-18 intent_bytes=-279 separated_intent_count=-18 intent_age=-1692 +>> at end: +txn: "A" meta={id=00000000 key=/Min pri=0.00000000 epo=0 ts=7.000000000,0 min=0,0 seq=0} lock=true stat=PENDING rts=6.000000000,0 wto=false gul=0,0 +rangekey: {g-s}/[4.000000000,0=/ 3.000000000,0=/] +data: "a"/7.000000000,0 -> {localTs=6.000000000,0}/BYTES/a6 +data: "b"/7.000000000,0 -> {localTs=6.000000000,0}/BYTES/b6 +data: "b"/1.000000000,0 -> /BYTES/b1 +data: "c"/7.000000000,0 -> {localTs=6.000000000,0}/BYTES/c6 +data: "c"/1.000000000,0 -> / +data: "d"/7.000000000,0 -> {localTs=6.000000000,0}/ +data: "e"/7.000000000,0 -> {localTs=6.000000000,0}/ +data: "e"/1.000000000,0 -> /BYTES/e1 +data: "f"/7.000000000,0 -> {localTs=6.000000000,0}/ +data: "f"/1.000000000,0 -> / +data: "g"/7.000000000,0 -> {localTs=6.000000000,0}/BYTES/g6 +data: "g"/1.000000000,0 -> /BYTES/g1 +data: "h"/7.000000000,0 -> {localTs=6.000000000,0}/BYTES/h6 +data: "h"/1.000000000,0 -> / +data: "i"/7.000000000,0 -> {localTs=6.000000000,0}/BYTES/i6 +data: "i"/1.000000000,0 -> /BYTES/i1 +data: "j"/7.000000000,0 -> {localTs=6.000000000,0}/ +data: "j"/1.000000000,0 -> / +data: "k"/7.000000000,0 -> {localTs=6.000000000,0}/ +data: "l"/7.000000000,0 -> {localTs=6.000000000,0}/ +data: "m"/7.000000000,0 -> {localTs=6.000000000,0}/BYTES/m6 +data: "n"/7.000000000,0 -> {localTs=6.000000000,0}/BYTES/n6 +data: "n"/5.000000000,0 -> /BYTES/n5 +data: "o"/7.000000000,0 -> {localTs=6.000000000,0}/BYTES/o6 +data: "o"/5.000000000,0 -> / +data: "p"/7.000000000,0 -> {localTs=6.000000000,0}/ +data: "q"/7.000000000,0 -> {localTs=6.000000000,0}/ +data: "q"/5.000000000,0 -> /BYTES/q5 +data: "r"/7.000000000,0 -> {localTs=6.000000000,0}/ +data: "r"/5.000000000,0 -> / +stats: key_count=18 key_bytes=396 val_count=30 val_bytes=339 range_key_count=1 range_key_bytes=22 range_val_count=2 live_count=9 live_bytes=306 gc_bytes_age=42506 diff --git a/pkg/storage/testdata/mvcc_histories/range_tombstone_stats_intent_rewrite b/pkg/storage/testdata/mvcc_histories/range_tombstone_stats_intent_rewrite new file mode 100644 index 000000000000..07f5a75714d9 --- /dev/null +++ b/pkg/storage/testdata/mvcc_histories/range_tombstone_stats_intent_rewrite @@ -0,0 +1,385 @@ +# Tests MVCC stats calculations when rewriting intents. Intermediate states are +# tested through stats traces. Initial state: +# +# (x is tombstone, o---o is range tombstone, [] is intent) +# +# 7 +# 6 [a6][b6][c6][x] [x] [x] [g6][h6][i6][x] [x] [x] [m6][n6][o6] x x x +# 5 n5 x q5 x +# 4 o-----------------------------------------------o +# 3 o-----------------------------------------------o +# 2 +# 1 b1 x e1 x h1 x k1 x +# a b c d e f g h i j k l m n o p q r s +# +# This uses two range tombstones, since the lowest is the one that matters for +# point key GCBytesAge. It also uses points below/above range tombstones, +# because iterators surface range keys separately from point keys, which can +# cause bugs if callers don't step onto the point key. +# +# TODO(erikgrinaker): This is probably better handled by randomized or +# generative testing, since the combinations are getting unwieldy. But it'll do +# for now. + +run stats ok +with ts=1 + put k=b v=b1 + del k=c + put k=e v=e1 + del k=f + put k=g v=g1 + del k=h + put k=i v=i1 + del k=j +del_range_ts k=g end=s ts=3 +del_range_ts k=g end=s ts=4 +with ts=5 + put k=n v=n5 + del k=o + put k=q v=q5 + del k=r +with t=A + txn_begin ts=6 + put k=a v=a6 + put k=b v=b6 + put k=c v=c6 + del k=d + del k=e + del k=f + put k=g v=g6 + put k=h v=h6 + put k=i v=i6 + del k=j + del k=k + del k=l + put k=m v=m6 + put k=n v=n6 + put k=o v=o6 + del k=p + del k=q + del k=r +---- +>> put k=b v=b1 ts=1 +stats: key_count=+1 key_bytes=+14 val_count=+1 val_bytes=+7 live_count=+1 live_bytes=+21 +>> del k=c ts=1 +stats: key_count=+1 key_bytes=+14 val_count=+1 gc_bytes_age=+1386 +>> put k=e v=e1 ts=1 +stats: key_count=+1 key_bytes=+14 val_count=+1 val_bytes=+7 live_count=+1 live_bytes=+21 +>> del k=f ts=1 +stats: key_count=+1 key_bytes=+14 val_count=+1 gc_bytes_age=+1386 +>> put k=g v=g1 ts=1 +stats: key_count=+1 key_bytes=+14 val_count=+1 val_bytes=+7 live_count=+1 live_bytes=+21 +>> del k=h ts=1 +stats: key_count=+1 key_bytes=+14 val_count=+1 gc_bytes_age=+1386 +>> put k=i v=i1 ts=1 +stats: key_count=+1 key_bytes=+14 val_count=+1 val_bytes=+7 live_count=+1 live_bytes=+21 +>> del k=j ts=1 +stats: key_count=+1 key_bytes=+14 val_count=+1 gc_bytes_age=+1386 +>> del_range_ts k=g end=s ts=3 +stats: range_key_count=+1 range_key_bytes=+13 range_val_count=+1 live_count=-2 live_bytes=-42 gc_bytes_age=+5335 +>> del_range_ts k=g end=s ts=4 +stats: range_key_bytes=+9 range_val_count=+1 gc_bytes_age=+860 +>> put k=n v=n5 ts=5 +stats: key_count=+1 key_bytes=+14 val_count=+1 val_bytes=+7 live_count=+1 live_bytes=+21 +>> del k=o ts=5 +stats: key_count=+1 key_bytes=+14 val_count=+1 gc_bytes_age=+1330 +>> put k=q v=q5 ts=5 +stats: key_count=+1 key_bytes=+14 val_count=+1 val_bytes=+7 live_count=+1 live_bytes=+21 +>> del k=r ts=5 +stats: key_count=+1 key_bytes=+14 val_count=+1 gc_bytes_age=+1330 +>> put k=a v=a6 t=A +stats: key_count=+1 key_bytes=+14 val_count=+1 val_bytes=+55 live_count=+1 live_bytes=+69 intent_count=+1 intent_bytes=+19 separated_intent_count=+1 intent_age=+94 +>> put k=b v=b6 t=A +stats: key_bytes=+12 val_count=+1 val_bytes=+55 live_bytes=+48 gc_bytes_age=+1786 intent_count=+1 intent_bytes=+19 separated_intent_count=+1 intent_age=+94 +>> put k=c v=c6 t=A +stats: key_bytes=+12 val_count=+1 val_bytes=+55 live_count=+1 live_bytes=+69 gc_bytes_age=-198 intent_count=+1 intent_bytes=+19 separated_intent_count=+1 intent_age=+94 +>> del k=d t=A +stats: key_count=+1 key_bytes=+14 val_count=+1 val_bytes=+48 gc_bytes_age=+5828 intent_count=+1 intent_bytes=+12 separated_intent_count=+1 intent_age=+94 +>> del k=e t=A +stats: key_bytes=+12 val_count=+1 val_bytes=+48 live_count=-1 live_bytes=-21 gc_bytes_age=+7614 intent_count=+1 intent_bytes=+12 separated_intent_count=+1 intent_age=+94 +>> del k=f t=A +stats: key_bytes=+12 val_count=+1 val_bytes=+48 gc_bytes_age=+5630 intent_count=+1 intent_bytes=+12 separated_intent_count=+1 intent_age=+94 +>> put k=g v=g6 t=A +stats: key_bytes=+12 val_count=+1 val_bytes=+55 live_count=+1 live_bytes=+69 gc_bytes_age=-194 intent_count=+1 intent_bytes=+19 separated_intent_count=+1 intent_age=+94 +>> put k=h v=h6 t=A +stats: key_bytes=+12 val_count=+1 val_bytes=+55 live_count=+1 live_bytes=+69 gc_bytes_age=-198 intent_count=+1 intent_bytes=+19 separated_intent_count=+1 intent_age=+94 +>> put k=i v=i6 t=A +stats: key_bytes=+12 val_count=+1 val_bytes=+55 live_count=+1 live_bytes=+69 gc_bytes_age=-194 intent_count=+1 intent_bytes=+19 separated_intent_count=+1 intent_age=+94 +>> del k=j t=A +stats: key_bytes=+12 val_count=+1 val_bytes=+48 gc_bytes_age=+5630 intent_count=+1 intent_bytes=+12 separated_intent_count=+1 intent_age=+94 +>> del k=k t=A +stats: key_count=+1 key_bytes=+14 val_count=+1 val_bytes=+48 gc_bytes_age=+5828 intent_count=+1 intent_bytes=+12 separated_intent_count=+1 intent_age=+94 +>> del k=l t=A +stats: key_count=+1 key_bytes=+14 val_count=+1 val_bytes=+48 gc_bytes_age=+5828 intent_count=+1 intent_bytes=+12 separated_intent_count=+1 intent_age=+94 +>> put k=m v=m6 t=A +stats: key_count=+1 key_bytes=+14 val_count=+1 val_bytes=+55 live_count=+1 live_bytes=+69 intent_count=+1 intent_bytes=+19 separated_intent_count=+1 intent_age=+94 +>> put k=n v=n6 t=A +stats: key_bytes=+12 val_count=+1 val_bytes=+55 live_bytes=+48 gc_bytes_age=+1786 intent_count=+1 intent_bytes=+19 separated_intent_count=+1 intent_age=+94 +>> put k=o v=o6 t=A +stats: key_bytes=+12 val_count=+1 val_bytes=+55 live_count=+1 live_bytes=+69 gc_bytes_age=-190 intent_count=+1 intent_bytes=+19 separated_intent_count=+1 intent_age=+94 +>> del k=p t=A +stats: key_count=+1 key_bytes=+14 val_count=+1 val_bytes=+48 gc_bytes_age=+5828 intent_count=+1 intent_bytes=+12 separated_intent_count=+1 intent_age=+94 +>> del k=q t=A +stats: key_bytes=+12 val_count=+1 val_bytes=+48 live_count=-1 live_bytes=-21 gc_bytes_age=+7614 intent_count=+1 intent_bytes=+12 separated_intent_count=+1 intent_age=+94 +>> del k=r t=A +stats: key_bytes=+12 val_count=+1 val_bytes=+48 gc_bytes_age=+5638 intent_count=+1 intent_bytes=+12 separated_intent_count=+1 intent_age=+94 +>> at end: +txn: "A" meta={id=00000000 key=/Min pri=0.00000000 epo=0 ts=6.000000000,0 min=0,0 seq=0} lock=true stat=PENDING rts=6.000000000,0 wto=false gul=0,0 +rangekey: {g-s}/[4.000000000,0=/ 3.000000000,0=/] +meta: "a"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=6.000000000,0 min=0,0 seq=0} ts=6.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true +data: "a"/6.000000000,0 -> /BYTES/a6 +meta: "b"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=6.000000000,0 min=0,0 seq=0} ts=6.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true +data: "b"/6.000000000,0 -> /BYTES/b6 +data: "b"/1.000000000,0 -> /BYTES/b1 +meta: "c"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=6.000000000,0 min=0,0 seq=0} ts=6.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true +data: "c"/6.000000000,0 -> /BYTES/c6 +data: "c"/1.000000000,0 -> / +meta: "d"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=6.000000000,0 min=0,0 seq=0} ts=6.000000000,0 del=true klen=12 vlen=0 mergeTs= txnDidNotUpdateMeta=true +data: "d"/6.000000000,0 -> / +meta: "e"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=6.000000000,0 min=0,0 seq=0} ts=6.000000000,0 del=true klen=12 vlen=0 mergeTs= txnDidNotUpdateMeta=true +data: "e"/6.000000000,0 -> / +data: "e"/1.000000000,0 -> /BYTES/e1 +meta: "f"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=6.000000000,0 min=0,0 seq=0} ts=6.000000000,0 del=true klen=12 vlen=0 mergeTs= txnDidNotUpdateMeta=true +data: "f"/6.000000000,0 -> / +data: "f"/1.000000000,0 -> / +meta: "g"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=6.000000000,0 min=0,0 seq=0} ts=6.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true +data: "g"/6.000000000,0 -> /BYTES/g6 +data: "g"/1.000000000,0 -> /BYTES/g1 +meta: "h"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=6.000000000,0 min=0,0 seq=0} ts=6.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true +data: "h"/6.000000000,0 -> /BYTES/h6 +data: "h"/1.000000000,0 -> / +meta: "i"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=6.000000000,0 min=0,0 seq=0} ts=6.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true +data: "i"/6.000000000,0 -> /BYTES/i6 +data: "i"/1.000000000,0 -> /BYTES/i1 +meta: "j"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=6.000000000,0 min=0,0 seq=0} ts=6.000000000,0 del=true klen=12 vlen=0 mergeTs= txnDidNotUpdateMeta=true +data: "j"/6.000000000,0 -> / +data: "j"/1.000000000,0 -> / +meta: "k"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=6.000000000,0 min=0,0 seq=0} ts=6.000000000,0 del=true klen=12 vlen=0 mergeTs= txnDidNotUpdateMeta=true +data: "k"/6.000000000,0 -> / +meta: "l"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=6.000000000,0 min=0,0 seq=0} ts=6.000000000,0 del=true klen=12 vlen=0 mergeTs= txnDidNotUpdateMeta=true +data: "l"/6.000000000,0 -> / +meta: "m"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=6.000000000,0 min=0,0 seq=0} ts=6.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true +data: "m"/6.000000000,0 -> /BYTES/m6 +meta: "n"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=6.000000000,0 min=0,0 seq=0} ts=6.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true +data: "n"/6.000000000,0 -> /BYTES/n6 +data: "n"/5.000000000,0 -> /BYTES/n5 +meta: "o"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=6.000000000,0 min=0,0 seq=0} ts=6.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true +data: "o"/6.000000000,0 -> /BYTES/o6 +data: "o"/5.000000000,0 -> / +meta: "p"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=6.000000000,0 min=0,0 seq=0} ts=6.000000000,0 del=true klen=12 vlen=0 mergeTs= txnDidNotUpdateMeta=true +data: "p"/6.000000000,0 -> / +meta: "q"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=6.000000000,0 min=0,0 seq=0} ts=6.000000000,0 del=true klen=12 vlen=0 mergeTs= txnDidNotUpdateMeta=true +data: "q"/6.000000000,0 -> / +data: "q"/5.000000000,0 -> /BYTES/q5 +meta: "r"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=6.000000000,0 min=0,0 seq=0} ts=6.000000000,0 del=true klen=12 vlen=0 mergeTs= txnDidNotUpdateMeta=true +data: "r"/6.000000000,0 -> / +data: "r"/5.000000000,0 -> / +stats: key_count=18 key_bytes=396 val_count=30 val_bytes=969 range_key_count=1 range_key_bytes=22 range_val_count=2 live_count=9 live_bytes=621 gc_bytes_age=72435 intent_count=18 intent_bytes=279 separated_intent_count=18 intent_age=1692 + +# Rewrite the same keys at a higher timestamp. +run stats ok +with t=A ts=7 + txn_advance + txn_restart + put k=a v=a7 + put k=b v=b7 + put k=c v=c7 + del k=d + del k=e + del k=f + put k=g v=g7 + put k=h v=h7 + put k=i v=i7 + del k=j + del k=k + del k=l + put k=m v=m7 + put k=n v=n7 + put k=o v=o7 + del k=p + del k=q + del k=r +---- +>> put k=a v=a7 t=A ts=7 +stats: intent_age=-1 +>> put k=b v=b7 t=A ts=7 +stats: gc_bytes_age=-19 intent_age=-1 +>> put k=c v=c7 t=A ts=7 +stats: intent_age=-1 +>> del k=d t=A ts=7 +stats: gc_bytes_age=-62 intent_age=-1 +>> del k=e t=A ts=7 +stats: gc_bytes_age=-81 intent_age=-1 +>> del k=f t=A ts=7 +stats: gc_bytes_age=-62 intent_age=-1 +>> put k=g v=g7 t=A ts=7 +stats: intent_age=-1 +>> put k=h v=h7 t=A ts=7 +stats: intent_age=-1 +>> put k=i v=i7 t=A ts=7 +stats: intent_age=-1 +>> del k=j t=A ts=7 +stats: gc_bytes_age=-62 intent_age=-1 +>> del k=k t=A ts=7 +stats: gc_bytes_age=-62 intent_age=-1 +>> del k=l t=A ts=7 +stats: gc_bytes_age=-62 intent_age=-1 +>> put k=m v=m7 t=A ts=7 +stats: intent_age=-1 +>> put k=n v=n7 t=A ts=7 +stats: gc_bytes_age=-19 intent_age=-1 +>> put k=o v=o7 t=A ts=7 +stats: intent_age=-1 +>> del k=p t=A ts=7 +stats: gc_bytes_age=-62 intent_age=-1 +>> del k=q t=A ts=7 +stats: gc_bytes_age=-81 intent_age=-1 +>> del k=r t=A ts=7 +stats: gc_bytes_age=-62 intent_age=-1 +>> at end: +txn: "A" meta={id=00000000 key=/Min pri=0.00000000 epo=1 ts=7.000000000,0 min=0,0 seq=0} lock=true stat=PENDING rts=7.000000000,0 wto=false gul=0,0 +rangekey: {g-s}/[4.000000000,0=/ 3.000000000,0=/] +meta: "a"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=1 ts=7.000000000,0 min=0,0 seq=0} ts=7.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=false +data: "a"/7.000000000,0 -> /BYTES/a7 +meta: "b"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=1 ts=7.000000000,0 min=0,0 seq=0} ts=7.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=false +data: "b"/7.000000000,0 -> /BYTES/b7 +data: "b"/1.000000000,0 -> /BYTES/b1 +meta: "c"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=1 ts=7.000000000,0 min=0,0 seq=0} ts=7.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=false +data: "c"/7.000000000,0 -> /BYTES/c7 +data: "c"/1.000000000,0 -> / +meta: "d"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=1 ts=7.000000000,0 min=0,0 seq=0} ts=7.000000000,0 del=true klen=12 vlen=0 mergeTs= txnDidNotUpdateMeta=false +data: "d"/7.000000000,0 -> / +meta: "e"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=1 ts=7.000000000,0 min=0,0 seq=0} ts=7.000000000,0 del=true klen=12 vlen=0 mergeTs= txnDidNotUpdateMeta=false +data: "e"/7.000000000,0 -> / +data: "e"/1.000000000,0 -> /BYTES/e1 +meta: "f"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=1 ts=7.000000000,0 min=0,0 seq=0} ts=7.000000000,0 del=true klen=12 vlen=0 mergeTs= txnDidNotUpdateMeta=false +data: "f"/7.000000000,0 -> / +data: "f"/1.000000000,0 -> / +meta: "g"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=1 ts=7.000000000,0 min=0,0 seq=0} ts=7.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=false +data: "g"/7.000000000,0 -> /BYTES/g7 +data: "g"/1.000000000,0 -> /BYTES/g1 +meta: "h"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=1 ts=7.000000000,0 min=0,0 seq=0} ts=7.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=false +data: "h"/7.000000000,0 -> /BYTES/h7 +data: "h"/1.000000000,0 -> / +meta: "i"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=1 ts=7.000000000,0 min=0,0 seq=0} ts=7.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=false +data: "i"/7.000000000,0 -> /BYTES/i7 +data: "i"/1.000000000,0 -> /BYTES/i1 +meta: "j"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=1 ts=7.000000000,0 min=0,0 seq=0} ts=7.000000000,0 del=true klen=12 vlen=0 mergeTs= txnDidNotUpdateMeta=false +data: "j"/7.000000000,0 -> / +data: "j"/1.000000000,0 -> / +meta: "k"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=1 ts=7.000000000,0 min=0,0 seq=0} ts=7.000000000,0 del=true klen=12 vlen=0 mergeTs= txnDidNotUpdateMeta=false +data: "k"/7.000000000,0 -> / +meta: "l"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=1 ts=7.000000000,0 min=0,0 seq=0} ts=7.000000000,0 del=true klen=12 vlen=0 mergeTs= txnDidNotUpdateMeta=false +data: "l"/7.000000000,0 -> / +meta: "m"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=1 ts=7.000000000,0 min=0,0 seq=0} ts=7.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=false +data: "m"/7.000000000,0 -> /BYTES/m7 +meta: "n"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=1 ts=7.000000000,0 min=0,0 seq=0} ts=7.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=false +data: "n"/7.000000000,0 -> /BYTES/n7 +data: "n"/5.000000000,0 -> /BYTES/n5 +meta: "o"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=1 ts=7.000000000,0 min=0,0 seq=0} ts=7.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=false +data: "o"/7.000000000,0 -> /BYTES/o7 +data: "o"/5.000000000,0 -> / +meta: "p"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=1 ts=7.000000000,0 min=0,0 seq=0} ts=7.000000000,0 del=true klen=12 vlen=0 mergeTs= txnDidNotUpdateMeta=false +data: "p"/7.000000000,0 -> / +meta: "q"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=1 ts=7.000000000,0 min=0,0 seq=0} ts=7.000000000,0 del=true klen=12 vlen=0 mergeTs= txnDidNotUpdateMeta=false +data: "q"/7.000000000,0 -> / +data: "q"/5.000000000,0 -> /BYTES/q5 +meta: "r"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=1 ts=7.000000000,0 min=0,0 seq=0} ts=7.000000000,0 del=true klen=12 vlen=0 mergeTs= txnDidNotUpdateMeta=false +data: "r"/7.000000000,0 -> / +data: "r"/5.000000000,0 -> / +stats: key_count=18 key_bytes=396 val_count=30 val_bytes=969 range_key_count=1 range_key_bytes=22 range_val_count=2 live_count=9 live_bytes=621 gc_bytes_age=71801 intent_count=18 intent_bytes=279 separated_intent_count=18 intent_age=1674 + +# Rewrite keys<->tombstones at a higher timestamp. +run stats ok +with t=A ts=8 + txn_advance + txn_restart + del k=a + del k=b + del k=c + put k=d v=d8 + put k=e v=e8 + put k=f v=f8 + del k=g + del k=h + del k=i + put k=j v=j8 + put k=k v=k8 + put k=l v=l8 +---- +>> del k=a t=A ts=8 +stats: val_bytes=-7 live_count=-1 live_bytes=-69 gc_bytes_age=+5704 intent_bytes=-7 intent_age=-1 +>> del k=b t=A ts=8 +stats: val_bytes=-7 live_count=-1 live_bytes=-69 gc_bytes_age=+5685 intent_bytes=-7 intent_age=-1 +>> del k=c t=A ts=8 +stats: val_bytes=-7 live_count=-1 live_bytes=-69 gc_bytes_age=+5704 intent_bytes=-7 intent_age=-1 +>> put k=d v=d8 t=A ts=8 +stats: val_bytes=+7 live_count=+1 live_bytes=+69 gc_bytes_age=-5766 intent_bytes=+7 intent_age=-1 +>> put k=e v=e8 t=A ts=8 +stats: val_bytes=+7 live_count=+1 live_bytes=+69 gc_bytes_age=-5785 intent_bytes=+7 intent_age=-1 +>> put k=f v=f8 t=A ts=8 +stats: val_bytes=+7 live_count=+1 live_bytes=+69 gc_bytes_age=-5766 intent_bytes=+7 intent_age=-1 +>> del k=g t=A ts=8 +stats: val_bytes=-7 live_count=-1 live_bytes=-69 gc_bytes_age=+5704 intent_bytes=-7 intent_age=-1 +>> del k=h t=A ts=8 +stats: val_bytes=-7 live_count=-1 live_bytes=-69 gc_bytes_age=+5704 intent_bytes=-7 intent_age=-1 +>> del k=i t=A ts=8 +stats: val_bytes=-7 live_count=-1 live_bytes=-69 gc_bytes_age=+5704 intent_bytes=-7 intent_age=-1 +>> put k=j v=j8 t=A ts=8 +stats: val_bytes=+7 live_count=+1 live_bytes=+69 gc_bytes_age=-5766 intent_bytes=+7 intent_age=-1 +>> put k=k v=k8 t=A ts=8 +stats: val_bytes=+7 live_count=+1 live_bytes=+69 gc_bytes_age=-5766 intent_bytes=+7 intent_age=-1 +>> put k=l v=l8 t=A ts=8 +stats: val_bytes=+7 live_count=+1 live_bytes=+69 gc_bytes_age=-5766 intent_bytes=+7 intent_age=-1 +>> at end: +txn: "A" meta={id=00000000 key=/Min pri=0.00000000 epo=2 ts=8.000000000,0 min=0,0 seq=0} lock=true stat=PENDING rts=8.000000000,0 wto=false gul=0,0 +rangekey: {g-s}/[4.000000000,0=/ 3.000000000,0=/] +meta: "a"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=2 ts=8.000000000,0 min=0,0 seq=0} ts=8.000000000,0 del=true klen=12 vlen=0 mergeTs= txnDidNotUpdateMeta=false +data: "a"/8.000000000,0 -> / +meta: "b"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=2 ts=8.000000000,0 min=0,0 seq=0} ts=8.000000000,0 del=true klen=12 vlen=0 mergeTs= txnDidNotUpdateMeta=false +data: "b"/8.000000000,0 -> / +data: "b"/1.000000000,0 -> /BYTES/b1 +meta: "c"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=2 ts=8.000000000,0 min=0,0 seq=0} ts=8.000000000,0 del=true klen=12 vlen=0 mergeTs= txnDidNotUpdateMeta=false +data: "c"/8.000000000,0 -> / +data: "c"/1.000000000,0 -> / +meta: "d"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=2 ts=8.000000000,0 min=0,0 seq=0} ts=8.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=false +data: "d"/8.000000000,0 -> /BYTES/d8 +meta: "e"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=2 ts=8.000000000,0 min=0,0 seq=0} ts=8.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=false +data: "e"/8.000000000,0 -> /BYTES/e8 +data: "e"/1.000000000,0 -> /BYTES/e1 +meta: "f"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=2 ts=8.000000000,0 min=0,0 seq=0} ts=8.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=false +data: "f"/8.000000000,0 -> /BYTES/f8 +data: "f"/1.000000000,0 -> / +meta: "g"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=2 ts=8.000000000,0 min=0,0 seq=0} ts=8.000000000,0 del=true klen=12 vlen=0 mergeTs= txnDidNotUpdateMeta=false +data: "g"/8.000000000,0 -> / +data: "g"/1.000000000,0 -> /BYTES/g1 +meta: "h"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=2 ts=8.000000000,0 min=0,0 seq=0} ts=8.000000000,0 del=true klen=12 vlen=0 mergeTs= txnDidNotUpdateMeta=false +data: "h"/8.000000000,0 -> / +data: "h"/1.000000000,0 -> / +meta: "i"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=2 ts=8.000000000,0 min=0,0 seq=0} ts=8.000000000,0 del=true klen=12 vlen=0 mergeTs= txnDidNotUpdateMeta=false +data: "i"/8.000000000,0 -> / +data: "i"/1.000000000,0 -> /BYTES/i1 +meta: "j"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=2 ts=8.000000000,0 min=0,0 seq=0} ts=8.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=false +data: "j"/8.000000000,0 -> /BYTES/j8 +data: "j"/1.000000000,0 -> / +meta: "k"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=2 ts=8.000000000,0 min=0,0 seq=0} ts=8.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=false +data: "k"/8.000000000,0 -> /BYTES/k8 +meta: "l"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=2 ts=8.000000000,0 min=0,0 seq=0} ts=8.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=false +data: "l"/8.000000000,0 -> /BYTES/l8 +meta: "m"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=1 ts=7.000000000,0 min=0,0 seq=0} ts=7.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=false +data: "m"/7.000000000,0 -> /BYTES/m7 +meta: "n"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=1 ts=7.000000000,0 min=0,0 seq=0} ts=7.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=false +data: "n"/7.000000000,0 -> /BYTES/n7 +data: "n"/5.000000000,0 -> /BYTES/n5 +meta: "o"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=1 ts=7.000000000,0 min=0,0 seq=0} ts=7.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=false +data: "o"/7.000000000,0 -> /BYTES/o7 +data: "o"/5.000000000,0 -> / +meta: "p"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=1 ts=7.000000000,0 min=0,0 seq=0} ts=7.000000000,0 del=true klen=12 vlen=0 mergeTs= txnDidNotUpdateMeta=false +data: "p"/7.000000000,0 -> / +meta: "q"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=1 ts=7.000000000,0 min=0,0 seq=0} ts=7.000000000,0 del=true klen=12 vlen=0 mergeTs= txnDidNotUpdateMeta=false +data: "q"/7.000000000,0 -> / +data: "q"/5.000000000,0 -> /BYTES/q5 +meta: "r"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=1 ts=7.000000000,0 min=0,0 seq=0} ts=7.000000000,0 del=true klen=12 vlen=0 mergeTs= txnDidNotUpdateMeta=false +data: "r"/7.000000000,0 -> / +data: "r"/5.000000000,0 -> / +stats: key_count=18 key_bytes=396 val_count=30 val_bytes=969 range_key_count=1 range_key_bytes=22 range_val_count=2 live_count=9 live_bytes=621 gc_bytes_age=71391 intent_count=18 intent_bytes=279 separated_intent_count=18 intent_age=1662 diff --git a/pkg/storage/testdata/mvcc_histories/range_tombstone_writes b/pkg/storage/testdata/mvcc_histories/range_tombstone_writes index 3064611f7171..1a5d13cc649b 100644 --- a/pkg/storage/testdata/mvcc_histories/range_tombstone_writes +++ b/pkg/storage/testdata/mvcc_histories/range_tombstone_writes @@ -15,6 +15,7 @@ run ok put k=a ts=2 v=a2 del k=a ts=4 +put k=b ts=3 v=b3 put k=d ts=4 v=d4 put k=f ts=4 v=f4 put k=g ts=2 v=g2 @@ -31,6 +32,7 @@ txn: "A" meta={id=00000000 key=/Min pri=0.00000000 epo=0 ts=7.000000000,0 min=0, rangekey: {k-p}/[4.000000000,0=/] data: "a"/4.000000000,0 -> / data: "a"/2.000000000,0 -> /BYTES/a2 +data: "b"/3.000000000,0 -> /BYTES/b3 meta: "d"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=7.000000000,0 min=0,0 seq=0} ts=7.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true data: "d"/7.000000000,0 -> /BYTES/d7 data: "d"/4.000000000,0 -> /BYTES/d4 @@ -49,6 +51,7 @@ del_range_ts k=z end=x ts=3 rangekey: {k-p}/[4.000000000,0=/] data: "a"/4.000000000,0 -> / data: "a"/2.000000000,0 -> /BYTES/a2 +data: "b"/3.000000000,0 -> /BYTES/b3 meta: "d"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=7.000000000,0 min=0,0 seq=0} ts=7.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true data: "d"/7.000000000,0 -> /BYTES/d7 data: "d"/4.000000000,0 -> /BYTES/d4 @@ -67,6 +70,7 @@ del_range_ts k=x end=z ts=0 rangekey: {k-p}/[4.000000000,0=/] data: "a"/4.000000000,0 -> / data: "a"/2.000000000,0 -> /BYTES/a2 +data: "b"/3.000000000,0 -> /BYTES/b3 meta: "d"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=7.000000000,0 min=0,0 seq=0} ts=7.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true data: "d"/7.000000000,0 -> /BYTES/d7 data: "d"/4.000000000,0 -> /BYTES/d4 @@ -85,6 +89,7 @@ del_range_ts k=x end=x ts=3 rangekey: {k-p}/[4.000000000,0=/] data: "a"/4.000000000,0 -> / data: "a"/2.000000000,0 -> /BYTES/a2 +data: "b"/3.000000000,0 -> /BYTES/b3 meta: "d"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=7.000000000,0 min=0,0 seq=0} ts=7.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true data: "d"/7.000000000,0 -> /BYTES/d7 data: "d"/4.000000000,0 -> /BYTES/d4 @@ -105,6 +110,7 @@ del_range_ts k=a end=b ts=3 rangekey: {k-p}/[4.000000000,0=/] data: "a"/4.000000000,0 -> / data: "a"/2.000000000,0 -> /BYTES/a2 +data: "b"/3.000000000,0 -> /BYTES/b3 meta: "d"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=7.000000000,0 min=0,0 seq=0} ts=7.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true data: "d"/7.000000000,0 -> /BYTES/d7 data: "d"/4.000000000,0 -> /BYTES/d4 @@ -123,6 +129,7 @@ del_range_ts k=a end=b ts=4 rangekey: {k-p}/[4.000000000,0=/] data: "a"/4.000000000,0 -> / data: "a"/2.000000000,0 -> /BYTES/a2 +data: "b"/3.000000000,0 -> /BYTES/b3 meta: "d"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=7.000000000,0 min=0,0 seq=0} ts=7.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true data: "d"/7.000000000,0 -> /BYTES/d7 data: "d"/4.000000000,0 -> /BYTES/d4 @@ -141,6 +148,7 @@ del_range_ts k=e end=g ts=3 rangekey: {k-p}/[4.000000000,0=/] data: "a"/4.000000000,0 -> / data: "a"/2.000000000,0 -> /BYTES/a2 +data: "b"/3.000000000,0 -> /BYTES/b3 meta: "d"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=7.000000000,0 min=0,0 seq=0} ts=7.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true data: "d"/7.000000000,0 -> /BYTES/d7 data: "d"/4.000000000,0 -> /BYTES/d4 @@ -161,6 +169,7 @@ del_range_ts k=k end=p ts=3 rangekey: {k-p}/[4.000000000,0=/] data: "a"/4.000000000,0 -> / data: "a"/2.000000000,0 -> /BYTES/a2 +data: "b"/3.000000000,0 -> /BYTES/b3 meta: "d"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=7.000000000,0 min=0,0 seq=0} ts=7.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true data: "d"/7.000000000,0 -> /BYTES/d7 data: "d"/4.000000000,0 -> /BYTES/d4 @@ -179,6 +188,7 @@ del_range_ts k=k end=p ts=4 rangekey: {k-p}/[4.000000000,0=/] data: "a"/4.000000000,0 -> / data: "a"/2.000000000,0 -> /BYTES/a2 +data: "b"/3.000000000,0 -> /BYTES/b3 meta: "d"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=7.000000000,0 min=0,0 seq=0} ts=7.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true data: "d"/7.000000000,0 -> /BYTES/d7 data: "d"/4.000000000,0 -> /BYTES/d4 @@ -197,6 +207,7 @@ del_range_ts k=j end=m ts=3 rangekey: {k-p}/[4.000000000,0=/] data: "a"/4.000000000,0 -> / data: "a"/2.000000000,0 -> /BYTES/a2 +data: "b"/3.000000000,0 -> /BYTES/b3 meta: "d"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=7.000000000,0 min=0,0 seq=0} ts=7.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true data: "d"/7.000000000,0 -> /BYTES/d7 data: "d"/4.000000000,0 -> /BYTES/d4 @@ -215,6 +226,7 @@ del_range_ts k=o end=q ts=3 rangekey: {k-p}/[4.000000000,0=/] data: "a"/4.000000000,0 -> / data: "a"/2.000000000,0 -> /BYTES/a2 +data: "b"/3.000000000,0 -> /BYTES/b3 meta: "d"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=7.000000000,0 min=0,0 seq=0} ts=7.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true data: "d"/7.000000000,0 -> /BYTES/d7 data: "d"/4.000000000,0 -> /BYTES/d4 @@ -233,6 +245,7 @@ del_range_ts k=j end=q ts=3 rangekey: {k-p}/[4.000000000,0=/] data: "a"/4.000000000,0 -> / data: "a"/2.000000000,0 -> /BYTES/a2 +data: "b"/3.000000000,0 -> /BYTES/b3 meta: "d"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=7.000000000,0 min=0,0 seq=0} ts=7.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true data: "d"/7.000000000,0 -> /BYTES/d7 data: "d"/4.000000000,0 -> /BYTES/d4 @@ -251,6 +264,7 @@ del_range_ts k=k end=n ts=3 rangekey: {k-p}/[4.000000000,0=/] data: "a"/4.000000000,0 -> / data: "a"/2.000000000,0 -> /BYTES/a2 +data: "b"/3.000000000,0 -> /BYTES/b3 meta: "d"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=7.000000000,0 min=0,0 seq=0} ts=7.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true data: "d"/7.000000000,0 -> /BYTES/d7 data: "d"/4.000000000,0 -> /BYTES/d4 @@ -271,6 +285,7 @@ del_range_ts k=d end=e ts=3 rangekey: {k-p}/[4.000000000,0=/] data: "a"/4.000000000,0 -> / data: "a"/2.000000000,0 -> /BYTES/a2 +data: "b"/3.000000000,0 -> /BYTES/b3 meta: "d"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=7.000000000,0 min=0,0 seq=0} ts=7.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true data: "d"/7.000000000,0 -> /BYTES/d7 data: "d"/4.000000000,0 -> /BYTES/d4 @@ -289,6 +304,7 @@ del_range_ts k=d end=e ts=5 rangekey: {k-p}/[4.000000000,0=/] data: "a"/4.000000000,0 -> / data: "a"/2.000000000,0 -> /BYTES/a2 +data: "b"/3.000000000,0 -> /BYTES/b3 meta: "d"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=7.000000000,0 min=0,0 seq=0} ts=7.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true data: "d"/7.000000000,0 -> /BYTES/d7 data: "d"/4.000000000,0 -> /BYTES/d4 @@ -307,6 +323,7 @@ del_range_ts k=i end=j ts=5 rangekey: {k-p}/[4.000000000,0=/] data: "a"/4.000000000,0 -> / data: "a"/2.000000000,0 -> /BYTES/a2 +data: "b"/3.000000000,0 -> /BYTES/b3 meta: "d"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=7.000000000,0 min=0,0 seq=0} ts=7.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true data: "d"/7.000000000,0 -> /BYTES/d7 data: "d"/4.000000000,0 -> /BYTES/d4 @@ -325,6 +342,7 @@ del_range_ts k=i end=j ts=7 rangekey: {k-p}/[4.000000000,0=/] data: "a"/4.000000000,0 -> / data: "a"/2.000000000,0 -> /BYTES/a2 +data: "b"/3.000000000,0 -> /BYTES/b3 meta: "d"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=7.000000000,0 min=0,0 seq=0} ts=7.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true data: "d"/7.000000000,0 -> /BYTES/d7 data: "d"/4.000000000,0 -> /BYTES/d4 @@ -343,6 +361,7 @@ del_range_ts k=i end=j ts=10 rangekey: {k-p}/[4.000000000,0=/] data: "a"/4.000000000,0 -> / data: "a"/2.000000000,0 -> /BYTES/a2 +data: "b"/3.000000000,0 -> /BYTES/b3 meta: "d"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=7.000000000,0 min=0,0 seq=0} ts=7.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true data: "d"/7.000000000,0 -> /BYTES/d7 data: "d"/4.000000000,0 -> /BYTES/d4 @@ -362,6 +381,7 @@ del_range_ts k=h end=i ts=3 rangekey: {k-p}/[4.000000000,0=/] data: "a"/4.000000000,0 -> / data: "a"/2.000000000,0 -> /BYTES/a2 +data: "b"/3.000000000,0 -> /BYTES/b3 meta: "d"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=7.000000000,0 min=0,0 seq=0} ts=7.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true data: "d"/7.000000000,0 -> /BYTES/d7 data: "d"/4.000000000,0 -> /BYTES/d4 @@ -384,98 +404,7 @@ rangekey: {b-d}/[4.000000000,0=/] rangekey: {k-p}/[4.000000000,0=/] data: "a"/4.000000000,0 -> / data: "a"/2.000000000,0 -> /BYTES/a2 -meta: "d"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=7.000000000,0 min=0,0 seq=0} ts=7.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true -data: "d"/7.000000000,0 -> /BYTES/d7 -data: "d"/4.000000000,0 -> /BYTES/d4 -data: "f"/4.000000000,0 -> /BYTES/f4 -data: "g"/4.000000000,0 -> / -data: "g"/2.000000000,0 -> /BYTES/g2 -meta: "h"/0,0 -> txn={} ts=0,0 del=false klen=0 vlen=0 raw=/BYTES/inline mergeTs= txnDidNotUpdateMeta=false -meta: "i"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=7.000000000,0 min=0,0 seq=0} ts=7.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true -data: "i"/7.000000000,0 -> /BYTES/i7 - -# Writing range tombstones next to other range tombstones will merge them, but -# only at same timestamp. -run ok -del_range_ts k=s end=x ts=4 -del_range_ts k=p end=s ts=3 ----- ->> at end: -rangekey: {a-b}/[10.000000000,0=/] -rangekey: {b-d}/[4.000000000,0=/] -rangekey: {k-p}/[4.000000000,0=/] -rangekey: {p-s}/[3.000000000,0=/] -rangekey: {s-x}/[4.000000000,0=/] -data: "a"/4.000000000,0 -> / -data: "a"/2.000000000,0 -> /BYTES/a2 -meta: "d"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=7.000000000,0 min=0,0 seq=0} ts=7.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true -data: "d"/7.000000000,0 -> /BYTES/d7 -data: "d"/4.000000000,0 -> /BYTES/d4 -data: "f"/4.000000000,0 -> /BYTES/f4 -data: "g"/4.000000000,0 -> / -data: "g"/2.000000000,0 -> /BYTES/g2 -meta: "h"/0,0 -> txn={} ts=0,0 del=false klen=0 vlen=0 raw=/BYTES/inline mergeTs= txnDidNotUpdateMeta=false -meta: "i"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=7.000000000,0 min=0,0 seq=0} ts=7.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true -data: "i"/7.000000000,0 -> /BYTES/i7 - -run ok -del_range_ts k=p end=s ts=4 ----- ->> at end: -rangekey: {a-b}/[10.000000000,0=/] -rangekey: {b-d}/[4.000000000,0=/] -rangekey: {k-p}/[4.000000000,0=/] -rangekey: {p-s}/[4.000000000,0=/ 3.000000000,0=/] -rangekey: {s-x}/[4.000000000,0=/] -data: "a"/4.000000000,0 -> / -data: "a"/2.000000000,0 -> /BYTES/a2 -meta: "d"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=7.000000000,0 min=0,0 seq=0} ts=7.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true -data: "d"/7.000000000,0 -> /BYTES/d7 -data: "d"/4.000000000,0 -> /BYTES/d4 -data: "f"/4.000000000,0 -> /BYTES/f4 -data: "g"/4.000000000,0 -> / -data: "g"/2.000000000,0 -> /BYTES/g2 -meta: "h"/0,0 -> txn={} ts=0,0 del=false klen=0 vlen=0 raw=/BYTES/inline mergeTs= txnDidNotUpdateMeta=false -meta: "i"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=7.000000000,0 min=0,0 seq=0} ts=7.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true -data: "i"/7.000000000,0 -> /BYTES/i7 - -# Writing a local timestamp above the key timestamp should be rounded down, -# and merge with an adjacent range tombstone. -run ok -del_range_ts k=x end=y ts=4 localTs=5 ----- ->> at end: -rangekey: {a-b}/[10.000000000,0=/] -rangekey: {b-d}/[4.000000000,0=/] -rangekey: {k-p}/[4.000000000,0=/] -rangekey: {p-s}/[4.000000000,0=/ 3.000000000,0=/] -rangekey: {s-y}/[4.000000000,0=/] -data: "a"/4.000000000,0 -> / -data: "a"/2.000000000,0 -> /BYTES/a2 -meta: "d"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=7.000000000,0 min=0,0 seq=0} ts=7.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true -data: "d"/7.000000000,0 -> /BYTES/d7 -data: "d"/4.000000000,0 -> /BYTES/d4 -data: "f"/4.000000000,0 -> /BYTES/f4 -data: "g"/4.000000000,0 -> / -data: "g"/2.000000000,0 -> /BYTES/g2 -meta: "h"/0,0 -> txn={} ts=0,0 del=false klen=0 vlen=0 raw=/BYTES/inline mergeTs= txnDidNotUpdateMeta=false -meta: "i"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=7.000000000,0 min=0,0 seq=0} ts=7.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true -data: "i"/7.000000000,0 -> /BYTES/i7 - -# Writing range tombstones next to others with a different local timestamp -# (and thus value) should not merge. -run ok -del_range_ts k=y end=z ts=4 localTs=3 ----- ->> at end: -rangekey: {a-b}/[10.000000000,0=/] -rangekey: {b-d}/[4.000000000,0=/] -rangekey: {k-p}/[4.000000000,0=/] -rangekey: {p-s}/[4.000000000,0=/ 3.000000000,0=/] -rangekey: {s-y}/[4.000000000,0=/] -rangekey: {y-z}/[4.000000000,0={localTs=3.000000000,0}/] -data: "a"/4.000000000,0 -> / -data: "a"/2.000000000,0 -> /BYTES/a2 +data: "b"/3.000000000,0 -> /BYTES/b3 meta: "d"/0,0 -> txn={id=00000000 key=/Min pri=0.00000000 epo=0 ts=7.000000000,0 min=0,0 seq=0} ts=7.000000000,0 del=false klen=12 vlen=7 mergeTs= txnDidNotUpdateMeta=true data: "d"/7.000000000,0 -> /BYTES/d7 data: "d"/4.000000000,0 -> /BYTES/d4