Skip to content

Commit

Permalink
Merge #110060
Browse files Browse the repository at this point in the history
110060: admission: fix wait queue histograms r=irfansharif a=irfansharif

We previously did not record anything into {IO,CPU} wait queue histograms when work either bypassed admission control (because of the nature of the work, or when certain admission queues were disabled through cluster settings). This meant that our histogram percentiles were not accurate.

This problem didn't exist at the flow control level where work may not be subject to flow control depending on the mode selected ('apply_to_elastic', 'apply_to_all'). We'd still record a measured wait duration (~0ms), so we had accurate waiting-for-flow-tokens histograms.

Part of #82743.

Release note: None

Co-authored-by: irfan sharif <[email protected]>
  • Loading branch information
craig[bot] and irfansharif committed Sep 6, 2023
2 parents c18029b + 4da2627 commit 4ad7b7b
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 1 deletion.
1 change: 0 additions & 1 deletion pkg/kv/kvserver/kvadmission/kvadmission.go
Original file line number Diff line number Diff line change
Expand Up @@ -334,7 +334,6 @@ func (n *controllerImpl) AdmitKVWork(
AdmissionOriginNode: n.nodeID.Get(),
}
}

}
// If flow control is disabled or if work bypasses flow control, we still
// subject it above-raft, leaseholder-only IO admission control.
Expand Down
23 changes: 23 additions & 0 deletions pkg/util/admission/work_queue.go
Original file line number Diff line number Diff line change
Expand Up @@ -564,6 +564,7 @@ func (q *WorkQueue) Admit(ctx context.Context, info WorkInfo) (enabled bool, err
if !info.ReplicatedWorkInfo.Enabled {
enabledSetting := admissionControlEnabledSettings[q.workKind]
if enabledSetting != nil && !enabledSetting.Get(&q.settings.SV) {
q.metrics.recordBypassedAdmission(info.Priority)
return false, nil
}
}
Expand Down Expand Up @@ -618,6 +619,7 @@ func (q *WorkQueue) Admit(ctx context.Context, info WorkInfo) (enabled bool, err
q.admitMu.Unlock()
q.granter.tookWithoutPermission(info.RequestedCount)
q.metrics.incAdmitted(info.Priority)
q.metrics.recordBypassedAdmission(info.Priority)
return true, nil
}
// Work is subject to admission control.
Expand Down Expand Up @@ -663,6 +665,7 @@ func (q *WorkQueue) Admit(ctx context.Context, info WorkInfo) (enabled bool, err
false, /* coordMuLocked */
)
}
q.metrics.recordFastPathAdmission(info.Priority)
return true, nil
}
// Did not get token/slot.
Expand Down Expand Up @@ -1757,6 +1760,26 @@ func (m *WorkQueueMetrics) recordFinishWait(priority admissionpb.WorkPriority, d
priorityStats.WaitDurations.RecordValue(dur.Nanoseconds())
}

func (m *WorkQueueMetrics) recordBypassedAdmission(priority admissionpb.WorkPriority) {
// For work that either bypasses admission queues (because of the nature of
// the work itself or because certain queues are disabled), we'll explicit
// record a zero wait duration so that the histogram percentiles remain
// accurate.
m.total.WaitDurations.RecordValue(0)
priorityStats := m.getOrCreate(priority)
priorityStats.WaitDurations.RecordValue(0)
}

func (m *WorkQueueMetrics) recordFastPathAdmission(priority admissionpb.WorkPriority) {
// Explicitly record a zero wait queue duration when we're able to acquire
// tokens/slots without needing to add ourselves to tenant heaps. Explicitly
// recording zeros ensure that our histograms are accurate with respect to
// all work going through admission control.
m.total.WaitDurations.RecordValue(0)
priorityStats := m.getOrCreate(priority)
priorityStats.WaitDurations.RecordValue(0)
}

// MetricStruct implements the metric.Struct interface.
func (*WorkQueueMetrics) MetricStruct() {}

Expand Down

0 comments on commit 4ad7b7b

Please sign in to comment.