From a748dd747ecd96dae168bb4bc665c83698dbbcb7 Mon Sep 17 00:00:00 2001 From: Oleg Afanasyev Date: Mon, 8 Aug 2022 10:37:34 +0100 Subject: [PATCH] gc: add metrics for clear range requests in GC This commit adds metrics to GC ClearRange requests in GC. Metrics are shared with with existing GC ClearRangeKey full range deletions since they are peforming similar operation and exposed as queue.gc.info.clearrangesuccess/queue.gc.info.clearrangefailed. Release note (ops change): Metrics queue.gc.info.clearrangesuccess queue.gc.info.clearrangefailed updated to include statistics about GC operations that perform ClearRange on parts of range keyspace. Previously those metrics only included requests to remove range data completely when performing a schema change. --- pkg/kv/kvserver/gc/gc.go | 18 ++++++++++-------- pkg/kv/kvserver/gc/gc_random_test.go | 5 +++++ pkg/kv/kvserver/metrics.go | 4 ++-- pkg/kv/kvserver/mvcc_gc_queue.go | 4 ++-- 4 files changed, 19 insertions(+), 12 deletions(-) diff --git a/pkg/kv/kvserver/gc/gc.go b/pkg/kv/kvserver/gc/gc.go index 48c721e6fb66..40182aad27df 100644 --- a/pkg/kv/kvserver/gc/gc.go +++ b/pkg/kv/kvserver/gc/gc.go @@ -261,12 +261,12 @@ type Info struct { // AffectedVersionsRangeValBytes is the number of (fully encoded) bytes deleted from values that // belong to removed range keys. AffectedVersionsRangeValBytes int64 - // ClearRangeKeyOperations number of GCClearRange requests sent by GC to remove - // old data. Includes full range clear as well as requests to remove large - // number of consecutive keys and versions. - ClearRangeKeyOperations int - // ClearRangeKeyFailures number of failed requests to perform GC ClearRange. - ClearRangeKeyFailures int + // ClearRangeSpanOperations number of ClearRange requests performed by GC. This + // number includes full range clear requests as well as requests covering + // multiple keys or versions of the same key. + ClearRangeSpanOperations int + // ClearRangeSpanFailures number of ClearRange requests GC failed to perform. + ClearRangeSpanFailures int } // RunOptions contains collection of limits that GC run applies when performing operations @@ -444,11 +444,11 @@ func processReplicatedKeyRange( EndKey: end, }); err == nil { excludeUserKeySpan = true - info.ClearRangeKeyOperations++ + info.ClearRangeSpanOperations++ } else { log.Warningf(ctx, "failed to perform GC clear range operation on range %s: %s", desc.String(), err) - info.ClearRangeKeyFailures++ + info.ClearRangeSpanFailures++ } } } @@ -887,8 +887,10 @@ func (b *gcKeyBatcher) maybeFlushPendingBatches(ctx context.Context) (err error) // thresholds. We may leave some inconsistent history // behind, but nobody can read it. log.Warningf(ctx, "failed to GC keys with clear range: %v", err) + b.info.ClearRangeSpanFailures++ } b.clearRangeCounters.updateGcInfo(b.info) + b.info.ClearRangeSpanOperations++ b.totalMemUsed = 0 } else if flushTo := len(b.pointsBatches) - 1; flushTo > 0 { err := b.flushOldestPointBatches(ctx, flushTo) diff --git a/pkg/kv/kvserver/gc/gc_random_test.go b/pkg/kv/kvserver/gc/gc_random_test.go index 4bf9333422f6..24bbab213339 100644 --- a/pkg/kv/kvserver/gc/gc_random_test.go +++ b/pkg/kv/kvserver/gc/gc_random_test.go @@ -333,6 +333,11 @@ func TestNewVsInvariants(t *testing.T) { storage.MVCCGarbageCollectRangeKeys(ctx, eng, &stats, rangeKeys)) } + // For the sake of assertion we need to reset this counter as it signals + // counter for specific feature rather than processed data. Data and number + // of cleared keys and versions should be the same regardless of operations + // used to clear it. + gcInfoNew.ClearRangeSpanOperations = 0 assertLiveData(t, eng, beforeGC, *desc, tc.now, gcThreshold, intentThreshold, ttl, gcInfoNew) }) diff --git a/pkg/kv/kvserver/metrics.go b/pkg/kv/kvserver/metrics.go index eabb5ccd811b..af9fe5f7d06a 100644 --- a/pkg/kv/kvserver/metrics.go +++ b/pkg/kv/kvserver/metrics.go @@ -1416,13 +1416,13 @@ The messages are dropped to help these replicas to recover from I/O overload.`, } metaGCUsedClearRange = metric.Metadata{ Name: "queue.gc.info.clearrangesuccess", - Help: "Number of successful ClearRange operation during GC", + Help: "Number of successful ClearRange operations during GC", Measurement: "Requests", Unit: metric.Unit_COUNT, } metaGCFailedClearRange = metric.Metadata{ Name: "queue.gc.info.clearrangefailed", - Help: "Number of failed ClearRange operation during GC", + Help: "Number of failed ClearRange operations during GC", Measurement: "Requests", Unit: metric.Unit_COUNT, } diff --git a/pkg/kv/kvserver/mvcc_gc_queue.go b/pkg/kv/kvserver/mvcc_gc_queue.go index 7663ea77f295..7d5ad0106964 100644 --- a/pkg/kv/kvserver/mvcc_gc_queue.go +++ b/pkg/kv/kvserver/mvcc_gc_queue.go @@ -817,8 +817,8 @@ func updateStoreMetricsWithGCInfo(metrics *StoreMetrics, info gc.Info) { metrics.GCAbortSpanGCNum.Inc(int64(info.AbortSpanGCNum)) metrics.GCPushTxn.Inc(int64(info.PushTxn)) metrics.GCResolveTotal.Inc(int64(info.ResolveTotal)) - metrics.GCUsedClearRange.Inc(int64(info.ClearRangeKeyOperations)) - metrics.GCFailedClearRange.Inc(int64(info.ClearRangeKeyFailures)) + metrics.GCUsedClearRange.Inc(int64(info.ClearRangeSpanOperations)) + metrics.GCFailedClearRange.Inc(int64(info.ClearRangeSpanFailures)) } func (mgcq *mvccGCQueue) postProcessScheduled(