Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

kvserver: Add a metric for in-progress snapshots #99843

Merged
merged 1 commit into from
Mar 30, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 13 additions & 5 deletions pkg/kv/kvserver/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -825,15 +825,15 @@ evaluating the network savings of not sending cross region traffic.
Unit: metric.Unit_BYTES,
}
metaDelegateSnapshotSuccesses = metric.Metadata{
Name: "range.snapshot.delegate.successes",
Name: "range.snapshots.delegate.successes",
Help: `Number of snapshots that were delegated to a different node and
resulted in success on that delegate. This does not count self delegated snapshots.
`,
Measurement: "Snapshots",
Unit: metric.Unit_COUNT,
}
metaDelegateSnapshotFailures = metric.Metadata{
Name: "range.snapshot.delegate.failures",
Name: "range.snapshots.delegate.failures",
Help: `Number of snapshots that were delegated to a different node and
resulted in failure on that delegate. There are numerous reasons a failure can
occur on a delegate such as timeout, the delegate Raft log being too far behind
Expand All @@ -842,6 +842,12 @@ or the delegate being too busy to send.
Measurement: "Snapshots",
Unit: metric.Unit_COUNT,
}
metaDelegateSnapshotInProgress = metric.Metadata{
Name: "range.snapshots.delegate.in-progress",
Help: `Number of delegated snapshots that are currently in-flight.`,
Measurement: "Snapshots",
Unit: metric.Unit_COUNT,
}

// Quota pool metrics.
metaRaftQuotaPoolPercentUsed = metric.Metadata{
Expand Down Expand Up @@ -1922,9 +1928,10 @@ type StoreMetrics struct {
RangeSnapshotRecvTotalInProgress *metric.Gauge

// Delegate snapshot metrics. These don't count self-delegated snapshots.
DelegateSnapshotSendBytes *metric.Counter
DelegateSnapshotSuccesses *metric.Counter
DelegateSnapshotFailures *metric.Counter
DelegateSnapshotSendBytes *metric.Counter
DelegateSnapshotSuccesses *metric.Counter
DelegateSnapshotFailures *metric.Counter
DelegateSnapshotInProgress *metric.Gauge

// Raft processing metrics.
RaftTicks *metric.Counter
Expand Down Expand Up @@ -2461,6 +2468,7 @@ func newStoreMetrics(histogramWindow time.Duration) *StoreMetrics {
DelegateSnapshotSendBytes: metric.NewCounter(metaDelegateSnapshotSendBytes),
DelegateSnapshotSuccesses: metric.NewCounter(metaDelegateSnapshotSuccesses),
DelegateSnapshotFailures: metric.NewCounter(metaDelegateSnapshotFailures),
DelegateSnapshotInProgress: metric.NewGauge(metaDelegateSnapshotInProgress),

// Raft processing metrics.
RaftTicks: metric.NewCounter(metaRaftTicks),
Expand Down
9 changes: 8 additions & 1 deletion pkg/kv/kvserver/replica_command.go
Original file line number Diff line number Diff line change
Expand Up @@ -2845,13 +2845,20 @@ func (r *Replica) sendSnapshotUsingDelegate(
if selfDelegate {
delegateRequest.QueueOnDelegateLen = -1
}
if !selfDelegate {
r.store.Metrics().DelegateSnapshotInProgress.Inc(1)
}

retErr = contextutil.RunWithTimeout(
ctx, "send-snapshot", sendSnapshotTimeout, func(ctx context.Context) error {
// Sending snapshot
return r.store.cfg.Transport.DelegateSnapshot(ctx, delegateRequest)
},
)
if !selfDelegate {
r.store.Metrics().DelegateSnapshotInProgress.Dec(1)
}

// Return once we have success.
if retErr == nil {
if !selfDelegate {
Expand All @@ -2862,7 +2869,7 @@ func (r *Replica) sendSnapshotUsingDelegate(
if !selfDelegate {
r.store.Metrics().DelegateSnapshotFailures.Inc(1)
}
log.Warningf(ctx, "attempt %d: delegate snapshot %+v request failed %v", n+1, delegateRequest, retErr)
log.KvDistribution.Warningf(ctx, "attempt %d: delegate snapshot %+v request failed %v", n+1, delegateRequest, retErr)
}
}
return
Expand Down
5 changes: 3 additions & 2 deletions pkg/ts/catalog/chart_catalog.go
Original file line number Diff line number Diff line change
Expand Up @@ -636,8 +636,8 @@ var charts = []sectionDescription{
"range.snapshots.applied-voter",
"range.snapshots.applied-initial",
"range.snapshots.applied-non-voter",
"range.snapshot.delegate.successes",
"range.snapshot.delegate.failures",
"range.snapshots.delegate.successes",
"range.snapshots.delegate.failures",
},
},
{
Expand All @@ -649,6 +649,7 @@ var charts = []sectionDescription{
"range.snapshots.recv-in-progress",
"range.snapshots.send-total-in-progress",
"range.snapshots.recv-total-in-progress",
"range.snapshots.delegate.in-progress",
},
},
{
Expand Down