Skip to content

Commit

Permalink
UPSTREAM: <carry>: 🐛(metrics) Initialize metrics for autoscaler error…
Browse files Browse the repository at this point in the history
…s, scale events, and pod evictions

- Set initial count to zero for various autoscaler error types (e.g., CloudProviderError, ApiCallError)
- Define failed scale-up reasons and initialize metrics (e.g., CloudProviderError, APIError)
- Initialize pod eviction result counters for success and failure cases
- Initialize skipped scale events for CPU and memory resource limits in both scale-up and scale-down directions

Signed-off-by: Thiha Min Thant <[email protected]>
  • Loading branch information
thiha-min-thant authored and openshift-cherrypick-robot committed Dec 13, 2024
1 parent 44d929e commit 2b41465
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 0 deletions.
3 changes: 3 additions & 0 deletions cluster-autoscaler/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -594,6 +594,9 @@ func buildAutoscaler(debuggingSnapshotter debuggingsnapshot.DebuggingSnapshotter
metrics.UpdateCPULimitsCores(autoscalingOptions.MinCoresTotal, autoscalingOptions.MaxCoresTotal)
metrics.UpdateMemoryLimitsBytes(autoscalingOptions.MinMemoryTotal, autoscalingOptions.MaxMemoryTotal)

// Initialize metrics.
metrics.InitMetrics()

// Create autoscaler.
autoscaler, err := core.NewAutoscaler(opts, informerFactory)
if err != nil {
Expand Down
22 changes: 22 additions & 0 deletions cluster-autoscaler/metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -473,6 +473,28 @@ func RegisterAll(emitPerNodeGroupMetrics bool) {
}
}

// InitMetrics initializes all metrics
func InitMetrics() {
for _, errorType := range []errors.AutoscalerErrorType{errors.CloudProviderError, errors.ApiCallError, errors.InternalError, errors.TransientError, errors.ConfigurationError, errors.NodeGroupDoesNotExistError, errors.UnexpectedScaleDownStateError} {
errorsCount.WithLabelValues(string(errorType)).Add(0)
}

for _, reason := range []FailedScaleUpReason{CloudProviderError, APIError, Timeout} {
scaleDownCount.WithLabelValues(string(reason)).Add(0)
failedScaleUpCount.WithLabelValues(string(reason)).Add(0)
}

for _, result := range []PodEvictionResult{PodEvictionSucceed, PodEvictionFailed} {
evictionsCount.WithLabelValues(string(result)).Add(0)
}

skippedScaleEventsCount.WithLabelValues(DirectionScaleDown, CpuResourceLimit).Add(0)
skippedScaleEventsCount.WithLabelValues(DirectionScaleDown, MemoryResourceLimit).Add(0)
skippedScaleEventsCount.WithLabelValues(DirectionScaleUp, CpuResourceLimit).Add(0)
skippedScaleEventsCount.WithLabelValues(DirectionScaleUp, MemoryResourceLimit).Add(0)

}

// UpdateDurationFromStart records the duration of the step identified by the
// label using start time
func UpdateDurationFromStart(label FunctionLabel, start time.Time) {
Expand Down

0 comments on commit 2b41465

Please sign in to comment.