From 013fa19be3417c08ca350df472b0b752585fa59b Mon Sep 17 00:00:00 2001 From: Brett Elliott Date: Tue, 23 Mar 2021 15:16:07 +0100 Subject: [PATCH] Log failed scale up metric based on string of AutoscalerErrorType. --- cluster-autoscaler/core/scale_up.go | 6 +----- cluster-autoscaler/core/scale_up_test.go | 24 +++++++++++++++++++++-- cluster-autoscaler/metrics/metrics.go | 2 -- cluster-autoscaler/utils/errors/errors.go | 2 -- 4 files changed, 23 insertions(+), 11 deletions(-) diff --git a/cluster-autoscaler/core/scale_up.go b/cluster-autoscaler/core/scale_up.go index 567ff064ba52..8539084d656b 100644 --- a/cluster-autoscaler/core/scale_up.go +++ b/cluster-autoscaler/core/scale_up.go @@ -666,12 +666,8 @@ func executeScaleUp(context *context.AutoscalingContext, clusterStateRegistry *c increase := info.NewSize - info.CurrentSize if err := info.Group.IncreaseSize(increase); err != nil { context.LogRecorder.Eventf(apiv1.EventTypeWarning, "FailedToScaleUpGroup", "Scale-up failed for group %s: %v", info.Group.Id(), err) - reason := metrics.CloudProviderError aerr := errors.ToAutoscalerError(errors.CloudProviderError, err).AddPrefix("failed to increase node group size: %v", err) - if aerr.Type() == errors.AuthorizationError { - reason = metrics.AuthorizationError - } - clusterStateRegistry.RegisterFailedScaleUp(info.Group, reason, now) + clusterStateRegistry.RegisterFailedScaleUp(info.Group, metrics.FailedScaleUpReason(string(aerr.Type())), now) return aerr } clusterStateRegistry.RegisterOrUpdateScaleUp( diff --git a/cluster-autoscaler/core/scale_up_test.go b/cluster-autoscaler/core/scale_up_test.go index 69cb35e67e1c..6ff0902bbe33 100644 --- a/cluster-autoscaler/core/scale_up_test.go +++ b/cluster-autoscaler/core/scale_up_test.go @@ -18,6 +18,8 @@ package core import ( "fmt" + "net/http" + "net/http/httptest" "regexp" "strings" "testing" @@ -30,12 +32,14 @@ import ( "k8s.io/autoscaler/cluster-autoscaler/config" "k8s.io/autoscaler/cluster-autoscaler/core/utils" "k8s.io/autoscaler/cluster-autoscaler/estimator" + "k8s.io/autoscaler/cluster-autoscaler/metrics" "k8s.io/autoscaler/cluster-autoscaler/processors/nodegroupset" "k8s.io/autoscaler/cluster-autoscaler/utils/errors" kube_util "k8s.io/autoscaler/cluster-autoscaler/utils/kubernetes" . "k8s.io/autoscaler/cluster-autoscaler/utils/test" "k8s.io/autoscaler/cluster-autoscaler/utils/units" kube_record "k8s.io/client-go/tools/record" + "k8s.io/component-base/metrics/legacyregistry" appsv1 "k8s.io/api/apps/v1" apiv1 "k8s.io/api/core/v1" @@ -974,17 +978,33 @@ func TestCheckScaleUpDeltaWithinLimits(t *testing.T) { } func TestAuthError(t *testing.T) { + metrics.RegisterAll() context, err := NewScaleTestAutoscalingContext(config.AutoscalingOptions{}, &fake.Clientset{}, nil, nil, nil) assert.NoError(t, err) nodeGroup := &mockprovider.NodeGroup{} info := nodegroupset.ScaleUpInfo{Group: nodeGroup} nodeGroup.On("Id").Return("A") - nodeGroup.On("IncreaseSize", 0).Return(errors.NewAutoscalerError(errors.AuthorizationError, "")) + nodeGroup.On("IncreaseSize", 0).Return(errors.NewAutoscalerError(errors.AutoscalerErrorType("abcd"), "")) clusterStateRegistry := clusterstate.NewClusterStateRegistry(nil, clusterstate.ClusterStateRegistryConfig{}, context.LogRecorder, newBackoff()) aerr := executeScaleUp(&context, clusterStateRegistry, info, "", time.Now()) assert.Error(t, aerr) - assert.Equal(t, errors.AuthorizationError, aerr.Type()) + + req, err := http.NewRequest("GET", "/", nil) + if err != nil { + t.Fatal(err) + } + rr := httptest.NewRecorder() + handler := http.HandlerFunc(legacyregistry.Handler().ServeHTTP) + handler.ServeHTTP(rr, req) + + // Check that the status code is what we expect. + if status := rr.Code; status != http.StatusOK { + t.Errorf("handler returned wrong status code: got %v want %v", + status, http.StatusOK) + } + // Check that the failed scale up reason is set correctly. + assert.Contains(t, rr.Body.String(), "cluster_autoscaler_failed_scale_ups_total{reason=\"abcd\"} 1") } diff --git a/cluster-autoscaler/metrics/metrics.go b/cluster-autoscaler/metrics/metrics.go index efcfc18540a8..e3b5ed22edbd 100644 --- a/cluster-autoscaler/metrics/metrics.go +++ b/cluster-autoscaler/metrics/metrics.go @@ -65,8 +65,6 @@ const ( APIError FailedScaleUpReason = "apiCallError" // Timeout was encountered when trying to scale-up Timeout FailedScaleUpReason = "timeout" - // AuthorizationError is an authorization error. - AuthorizationError FailedScaleUpReason = "authorizationError" // autoscaledGroup is managed by CA autoscaledGroup NodeGroupType = "autoscaled" diff --git a/cluster-autoscaler/utils/errors/errors.go b/cluster-autoscaler/utils/errors/errors.go index 22216e5f867b..fbdd6e22a674 100644 --- a/cluster-autoscaler/utils/errors/errors.go +++ b/cluster-autoscaler/utils/errors/errors.go @@ -61,8 +61,6 @@ const ( // NodeGroupDoesNotExistError signifies that a NodeGroup // does not exist. NodeGroupDoesNotExistError AutoscalerErrorType = "nodeGroupDoesNotExistError" - // AuthorizationError signifies that an authorization error occurred. - AuthorizationError AutoscalerErrorType = "authorizationError" ) // NewAutoscalerError returns new autoscaler error with a message constructed from format string