Skip to content

Commit

Permalink
kvserver: add leases.requests.latency metric
Browse files Browse the repository at this point in the history
This patch adds a histogram of lease request latencies. It includes all
request types (acquisitions, transfers, and extensions) and all
outcomes (successes and errors), but only considers the coalesced lease
requests regardless of the number of waiters and how long they have been
waiting for.

Epic: none
Release note (ops change): Added a metric `leases.requests.latency`
recording a histogram of lease request latencies.
  • Loading branch information
erikgrinaker committed Apr 2, 2023
1 parent 1e27c65 commit 0c41a7e
Show file tree
Hide file tree
Showing 4 changed files with 25 additions and 2 deletions.
17 changes: 15 additions & 2 deletions pkg/kv/kvserver/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,12 @@ var (
Measurement: "Lease Requests",
Unit: metric.Unit_COUNT,
}
metaLeaseRequestLatency = metric.Metadata{
Name: "leases.requests.latency",
Help: "Lease request latency (all types and outcomes, coalesced)",
Measurement: "Latency",
Unit: metric.Unit_NANOSECONDS,
}
metaLeaseTransferSuccessCount = metric.Metadata{
Name: "leases.transfers.success",
Help: "Number of successful lease transfers",
Expand Down Expand Up @@ -1797,6 +1803,7 @@ type StoreMetrics struct {
// lease).
LeaseRequestSuccessCount *metric.Counter
LeaseRequestErrorCount *metric.Counter
LeaseRequestLatency metric.IHistogram
LeaseTransferSuccessCount *metric.Counter
LeaseTransferErrorCount *metric.Counter
LeaseExpirationCount *metric.Gauge
Expand Down Expand Up @@ -2355,8 +2362,14 @@ func newStoreMetrics(histogramWindow time.Duration) *StoreMetrics {
OverReplicatedRangeCount: metric.NewGauge(metaOverReplicatedRangeCount),

// Lease request metrics.
LeaseRequestSuccessCount: metric.NewCounter(metaLeaseRequestSuccessCount),
LeaseRequestErrorCount: metric.NewCounter(metaLeaseRequestErrorCount),
LeaseRequestSuccessCount: metric.NewCounter(metaLeaseRequestSuccessCount),
LeaseRequestErrorCount: metric.NewCounter(metaLeaseRequestErrorCount),
LeaseRequestLatency: metric.NewHistogram(metric.HistogramOptions{
Mode: metric.HistogramModePreferHdrLatency,
Metadata: metaLeaseRequestLatency,
Duration: histogramWindow,
Buckets: metric.NetworkLatencyBuckets,
}),
LeaseTransferSuccessCount: metric.NewCounter(metaLeaseTransferSuccessCount),
LeaseTransferErrorCount: metric.NewCounter(metaLeaseTransferErrorCount),
LeaseExpirationCount: metric.NewGauge(metaLeaseExpirationCount),
Expand Down
5 changes: 5 additions & 0 deletions pkg/kv/kvserver/replica_range_lease.go
Original file line number Diff line number Diff line change
Expand Up @@ -429,6 +429,11 @@ func (p *pendingLeaseRequest) requestLease(
status kvserverpb.LeaseStatus,
leaseReq kvpb.Request,
) error {
started := timeutil.Now()
defer func() {
p.repl.store.metrics.LeaseRequestLatency.RecordValue(timeutil.Since(started).Nanoseconds())
}()

// If requesting an epoch-based lease & current state is expired,
// potentially heartbeat our own liveness or increment epoch of
// prior owner. Note we only do this if the previous lease was
Expand Down
4 changes: 4 additions & 0 deletions pkg/ts/catalog/chart_catalog.go
Original file line number Diff line number Diff line change
Expand Up @@ -1757,6 +1757,10 @@ var charts = []sectionDescription{
Title: "Stuck Acquisition Count",
Metrics: []string{"requests.slow.lease"},
},
{
Title: "Lease Request Latency",
Metrics: []string{"leases.requests.latency"},
},
{
Title: "Succcess Rate",
Metrics: []string{
Expand Down
1 change: 1 addition & 0 deletions pkg/ts/catalog/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@ var histogramMetricsNames = map[string]struct{}{
"replication.flush_hist_nanos": {},
"kv.replica_read_batch_evaluate.latency": {},
"kv.replica_write_batch_evaluate.latency": {},
"leases.requests.latency": {},
}

func allInternalTSMetricsNames() []string {
Expand Down

0 comments on commit 0c41a7e

Please sign in to comment.