Skip to content

Commit

Permalink
gc: add metrics for clear range requests in GC
Browse files Browse the repository at this point in the history
This commit adds metrics to GC ClearRange requests in GC. Metrics are
shared with with existing GC ClearRangeKey full range deletions since
they are peforming similar operation and exposed as
queue.gc.info.clearrangesuccess/queue.gc.info.clearrangefailed.

Release note (ops change): Metrics queue.gc.info.clearrangesuccess
queue.gc.info.clearrangefailed updated to include statistics about
GC operations that perform ClearRange on parts of range keyspace.
Previously those metrics only included requests to remove range data
completely when performing a schema change.
  • Loading branch information
aliher1911 committed Dec 19, 2022
1 parent bce5877 commit a748dd7
Show file tree
Hide file tree
Showing 4 changed files with 19 additions and 12 deletions.
18 changes: 10 additions & 8 deletions pkg/kv/kvserver/gc/gc.go
Original file line number Diff line number Diff line change
Expand Up @@ -261,12 +261,12 @@ type Info struct {
// AffectedVersionsRangeValBytes is the number of (fully encoded) bytes deleted from values that
// belong to removed range keys.
AffectedVersionsRangeValBytes int64
// ClearRangeKeyOperations number of GCClearRange requests sent by GC to remove
// old data. Includes full range clear as well as requests to remove large
// number of consecutive keys and versions.
ClearRangeKeyOperations int
// ClearRangeKeyFailures number of failed requests to perform GC ClearRange.
ClearRangeKeyFailures int
// ClearRangeSpanOperations number of ClearRange requests performed by GC. This
// number includes full range clear requests as well as requests covering
// multiple keys or versions of the same key.
ClearRangeSpanOperations int
// ClearRangeSpanFailures number of ClearRange requests GC failed to perform.
ClearRangeSpanFailures int
}

// RunOptions contains collection of limits that GC run applies when performing operations
Expand Down Expand Up @@ -444,11 +444,11 @@ func processReplicatedKeyRange(
EndKey: end,
}); err == nil {
excludeUserKeySpan = true
info.ClearRangeKeyOperations++
info.ClearRangeSpanOperations++
} else {
log.Warningf(ctx, "failed to perform GC clear range operation on range %s: %s",
desc.String(), err)
info.ClearRangeKeyFailures++
info.ClearRangeSpanFailures++
}
}
}
Expand Down Expand Up @@ -887,8 +887,10 @@ func (b *gcKeyBatcher) maybeFlushPendingBatches(ctx context.Context) (err error)
// thresholds. We may leave some inconsistent history
// behind, but nobody can read it.
log.Warningf(ctx, "failed to GC keys with clear range: %v", err)
b.info.ClearRangeSpanFailures++
}
b.clearRangeCounters.updateGcInfo(b.info)
b.info.ClearRangeSpanOperations++
b.totalMemUsed = 0
} else if flushTo := len(b.pointsBatches) - 1; flushTo > 0 {
err := b.flushOldestPointBatches(ctx, flushTo)
Expand Down
5 changes: 5 additions & 0 deletions pkg/kv/kvserver/gc/gc_random_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -333,6 +333,11 @@ func TestNewVsInvariants(t *testing.T) {
storage.MVCCGarbageCollectRangeKeys(ctx, eng, &stats, rangeKeys))
}

// For the sake of assertion we need to reset this counter as it signals
// counter for specific feature rather than processed data. Data and number
// of cleared keys and versions should be the same regardless of operations
// used to clear it.
gcInfoNew.ClearRangeSpanOperations = 0
assertLiveData(t, eng, beforeGC, *desc, tc.now, gcThreshold, intentThreshold, ttl,
gcInfoNew)
})
Expand Down
4 changes: 2 additions & 2 deletions pkg/kv/kvserver/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -1416,13 +1416,13 @@ The messages are dropped to help these replicas to recover from I/O overload.`,
}
metaGCUsedClearRange = metric.Metadata{
Name: "queue.gc.info.clearrangesuccess",
Help: "Number of successful ClearRange operation during GC",
Help: "Number of successful ClearRange operations during GC",
Measurement: "Requests",
Unit: metric.Unit_COUNT,
}
metaGCFailedClearRange = metric.Metadata{
Name: "queue.gc.info.clearrangefailed",
Help: "Number of failed ClearRange operation during GC",
Help: "Number of failed ClearRange operations during GC",
Measurement: "Requests",
Unit: metric.Unit_COUNT,
}
Expand Down
4 changes: 2 additions & 2 deletions pkg/kv/kvserver/mvcc_gc_queue.go
Original file line number Diff line number Diff line change
Expand Up @@ -817,8 +817,8 @@ func updateStoreMetricsWithGCInfo(metrics *StoreMetrics, info gc.Info) {
metrics.GCAbortSpanGCNum.Inc(int64(info.AbortSpanGCNum))
metrics.GCPushTxn.Inc(int64(info.PushTxn))
metrics.GCResolveTotal.Inc(int64(info.ResolveTotal))
metrics.GCUsedClearRange.Inc(int64(info.ClearRangeKeyOperations))
metrics.GCFailedClearRange.Inc(int64(info.ClearRangeKeyFailures))
metrics.GCUsedClearRange.Inc(int64(info.ClearRangeSpanOperations))
metrics.GCFailedClearRange.Inc(int64(info.ClearRangeSpanFailures))
}

func (mgcq *mvccGCQueue) postProcessScheduled(
Expand Down

0 comments on commit a748dd7

Please sign in to comment.