Skip to content

Commit

Permalink
Merge pull request #5557 from kisieland/gce-autopilot-client
Browse files Browse the repository at this point in the history
Simplify the GCE client FetchMigInstances function.
  • Loading branch information
k8s-ci-robot authored Mar 14, 2023
2 parents 63b334f + ee08e5b commit 1931ea6
Show file tree
Hide file tree
Showing 2 changed files with 184 additions and 43 deletions.
112 changes: 69 additions & 43 deletions cluster-autoscaler/cloudprovider/gce/autoscaling_gce_client.go
Original file line number Diff line number Diff line change
Expand Up @@ -290,63 +290,38 @@ func (client *autoscalingGceClientV1) FetchMigInstances(migRef GceRef) ([]cloudp
for _, gceInstance := range gceInstances.ManagedInstances {
ref, err := ParseInstanceUrlRef(gceInstance.Instance)
if err != nil {
return nil, err
klog.Errorf("Received error while parsing of the instance url: %v", err)
continue
}

instance := cloudprovider.Instance{
Id: ref.ToProviderId(),
Status: &cloudprovider.InstanceStatus{},
}

switch gceInstance.CurrentAction {
case "CREATING", "RECREATING", "CREATING_WITHOUT_RETRIES":
instance.Status.State = cloudprovider.InstanceCreating
case "ABANDONING", "DELETING":
instance.Status.State = cloudprovider.InstanceDeleting
default:
instance.Status.State = cloudprovider.InstanceRunning
Id: ref.ToProviderId(),
Status: &cloudprovider.InstanceStatus{
State: getInstanceState(gceInstance.CurrentAction),
},
}

if instance.Status.State == cloudprovider.InstanceCreating {
var errorInfo cloudprovider.InstanceErrorInfo
var errorInfo *cloudprovider.InstanceErrorInfo
errorMessages := []string{}
errorFound := false
lastAttemptErrors := getLastAttemptErrors(gceInstance)
for _, instanceError := range lastAttemptErrors {
errorCodeCounts[instanceError.Code]++
if isResourcePoolExhaustedErrorCode(instanceError.Code) {
errorInfo.ErrorClass = cloudprovider.OutOfResourcesErrorClass
errorInfo.ErrorCode = ErrorCodeResourcePoolExhausted
} else if isQuotaExceededErrorCode(instanceError.Code) {
errorInfo.ErrorClass = cloudprovider.OutOfResourcesErrorClass
errorInfo.ErrorCode = ErrorCodeQuotaExceeded
} else if isIPSpaceExhaustedErrorCode(instanceError.Code) {
errorInfo.ErrorClass = cloudprovider.OtherErrorClass
errorInfo.ErrorCode = ErrorIPSpaceExhausted
} else if isPermissionsError(instanceError.Code) {
errorInfo.ErrorClass = cloudprovider.OtherErrorClass
errorInfo.ErrorCode = ErrorCodePermissions
} else if isVmExternalIpAccessPolicyConstraintError(instanceError) {
errorInfo.ErrorClass = cloudprovider.OtherErrorClass
errorInfo.ErrorCode = ErrorCodeVmExternalIpAccessPolicyConstraint
} else if isInstanceNotRunningYet(gceInstance) {
if !errorFound {
// do not override error code with OTHER
errorInfo.ErrorClass = cloudprovider.OtherErrorClass
errorInfo.ErrorCode = ErrorCodeOther
}
if newErrorInfo := GetErrorInfo(instanceError.Code, instanceError.Message, gceInstance.InstanceStatus, errorInfo); newErrorInfo != nil {
// override older error
errorInfo = newErrorInfo
} else {
// no error
continue
}
errorFound = true

if instanceError.Message != "" {
errorMessages = append(errorMessages, instanceError.Message)
}
}
errorInfo.ErrorMessage = strings.Join(errorMessages, "; ")
if errorFound {
instance.Status.ErrorInfo = &errorInfo
if errorInfo != nil {
errorInfo.ErrorMessage = strings.Join(errorMessages, "; ")
instance.Status.ErrorInfo = errorInfo
}

if len(lastAttemptErrors) > 0 {
Expand All @@ -369,6 +344,57 @@ func (client *autoscalingGceClientV1) FetchMigInstances(migRef GceRef) ([]cloudp
return infos, nil
}

// GetErrorInfo maps the error code, error message and instance status to CA instance error info
func GetErrorInfo(errorCode, errorMessage, instanceStatus string, previousErrorInfo *cloudprovider.InstanceErrorInfo) *cloudprovider.InstanceErrorInfo {
if isResourcePoolExhaustedErrorCode(errorCode) {
return &cloudprovider.InstanceErrorInfo{
ErrorClass: cloudprovider.OutOfResourcesErrorClass,
ErrorCode: ErrorCodeResourcePoolExhausted,
}
} else if isQuotaExceededErrorCode(errorCode) {
return &cloudprovider.InstanceErrorInfo{
ErrorClass: cloudprovider.OutOfResourcesErrorClass,
ErrorCode: ErrorCodeQuotaExceeded,
}
} else if isIPSpaceExhaustedErrorCode(errorCode) {
return &cloudprovider.InstanceErrorInfo{
ErrorClass: cloudprovider.OtherErrorClass,
ErrorCode: ErrorIPSpaceExhausted,
}
} else if isPermissionsError(errorCode) {
return &cloudprovider.InstanceErrorInfo{
ErrorClass: cloudprovider.OtherErrorClass,
ErrorCode: ErrorCodePermissions,
}
} else if isVmExternalIpAccessPolicyConstraintError(errorCode, errorMessage) {
return &cloudprovider.InstanceErrorInfo{
ErrorClass: cloudprovider.OtherErrorClass,
ErrorCode: ErrorCodeVmExternalIpAccessPolicyConstraint,
}
} else if isInstanceStatusNotRunningYet(instanceStatus) {
if previousErrorInfo != nil {
// keep the current error
return previousErrorInfo
}
return &cloudprovider.InstanceErrorInfo{
ErrorClass: cloudprovider.OtherErrorClass,
ErrorCode: ErrorCodeOther,
}
}
return nil
}

func getInstanceState(currentAction string) cloudprovider.InstanceState {
switch currentAction {
case "CREATING", "RECREATING", "CREATING_WITHOUT_RETRIES":
return cloudprovider.InstanceCreating
case "ABANDONING", "DELETING":
return cloudprovider.InstanceDeleting
default:
return cloudprovider.InstanceRunning
}
}

func getLastAttemptErrors(instance *gce.ManagedInstance) []*gce.ManagedInstanceLastAttemptErrorsErrors {
if instance.LastAttempt != nil && instance.LastAttempt.Errors != nil {
return instance.LastAttempt.Errors.Errors
Expand All @@ -392,13 +418,13 @@ func isPermissionsError(errorCode string) bool {
return strings.Contains(errorCode, "PERMISSIONS_ERROR")
}

func isVmExternalIpAccessPolicyConstraintError(err *gce.ManagedInstanceLastAttemptErrorsErrors) bool {
func isVmExternalIpAccessPolicyConstraintError(errorCode, errorMessage string) bool {
regexProjectPolicyConstraint := regexp.MustCompile(`Constraint constraints/compute.vmExternalIpAccess violated for project`)
return strings.Contains(err.Code, "CONDITION_NOT_MET") && regexProjectPolicyConstraint.MatchString(err.Message)
return strings.Contains(errorCode, "CONDITION_NOT_MET") && regexProjectPolicyConstraint.MatchString(errorMessage)
}

func isInstanceNotRunningYet(gceInstance *gce.ManagedInstance) bool {
return gceInstance.InstanceStatus == "" || gceInstance.InstanceStatus == "PROVISIONING" || gceInstance.InstanceStatus == "STAGING"
func isInstanceStatusNotRunningYet(instanceStatus string) bool {
return instanceStatus == "" || instanceStatus == "PROVISIONING" || instanceStatus == "STAGING"
}

func generateInstanceName(baseName string, existingNames map[string]bool) string {
Expand Down
115 changes: 115 additions & 0 deletions cluster-autoscaler/cloudprovider/gce/autoscaling_gce_client_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,16 @@ package gce

import (
"encoding/json"
"fmt"
"net/http"
"testing"
"time"

"k8s.io/autoscaler/cluster-autoscaler/cloudprovider"
test_util "k8s.io/autoscaler/cluster-autoscaler/utils/test"

"github.com/google/go-cmp/cmp"
"github.com/google/go-cmp/cmp/cmpopts"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/mock"
gce_api "google.golang.org/api/compute/v1"
Expand Down Expand Up @@ -211,6 +214,118 @@ func TestErrors(t *testing.T) {
mock.AssertExpectationsForObjects(t, server)
}

func TestFetchMigInstancesInstanceUrlHandling(t *testing.T) {
const goodInstanceUrlTempl = "https://content.googleapis.com/compute/v1/projects/myprojid/zones/myzone/instances/myinst_%d"
const badInstanceUrl = "https://badurl.com/compute/v1/projects/myprojid/zones/myzone/instances/myinst"
server := test_util.NewHttpServerMock()
defer server.Close()
g := newTestAutoscalingGceClient(t, "project1", server.URL, "")

testCases := []struct {
name string
lmiResponse gce_api.InstanceGroupManagersListManagedInstancesResponse
wantInstances []cloudprovider.Instance
}{
{
name: "all instances good",
lmiResponse: gce_api.InstanceGroupManagersListManagedInstancesResponse{
ManagedInstances: []*gce_api.ManagedInstance{
{
Instance: fmt.Sprintf(goodInstanceUrlTempl, 2),
CurrentAction: "CREATING",
LastAttempt: &gce_api.ManagedInstanceLastAttempt{
Errors: &gce_api.ManagedInstanceLastAttemptErrors{},
},
},
{
Instance: fmt.Sprintf(goodInstanceUrlTempl, 42),
CurrentAction: "CREATING",
LastAttempt: &gce_api.ManagedInstanceLastAttempt{
Errors: &gce_api.ManagedInstanceLastAttemptErrors{},
},
},
},
},
wantInstances: []cloudprovider.Instance{
{
Id: "gce://myprojid/myzone/myinst_2",
Status: &cloudprovider.InstanceStatus{State: cloudprovider.InstanceCreating},
},
{
Id: "gce://myprojid/myzone/myinst_42",
Status: &cloudprovider.InstanceStatus{State: cloudprovider.InstanceCreating},
},
},
},
{
name: "instances with bad url",
lmiResponse: gce_api.InstanceGroupManagersListManagedInstancesResponse{
ManagedInstances: []*gce_api.ManagedInstance{
{
Instance: badInstanceUrl,
CurrentAction: "CREATING",
LastAttempt: &gce_api.ManagedInstanceLastAttempt{
Errors: &gce_api.ManagedInstanceLastAttemptErrors{},
},
},
{
Instance: fmt.Sprintf(goodInstanceUrlTempl, 42),
CurrentAction: "CREATING",
LastAttempt: &gce_api.ManagedInstanceLastAttempt{
Errors: &gce_api.ManagedInstanceLastAttemptErrors{},
},
},
},
},
wantInstances: []cloudprovider.Instance{
{
Id: "gce://myprojid/myzone/myinst_42",
Status: &cloudprovider.InstanceStatus{State: cloudprovider.InstanceCreating},
},
},
},
{
name: "instance with empty url",
lmiResponse: gce_api.InstanceGroupManagersListManagedInstancesResponse{
ManagedInstances: []*gce_api.ManagedInstance{
{
Instance: "",
CurrentAction: "CREATING",
LastAttempt: &gce_api.ManagedInstanceLastAttempt{
Errors: &gce_api.ManagedInstanceLastAttemptErrors{},
},
},
{
Instance: fmt.Sprintf(goodInstanceUrlTempl, 42),
CurrentAction: "CREATING",
LastAttempt: &gce_api.ManagedInstanceLastAttempt{
Errors: &gce_api.ManagedInstanceLastAttemptErrors{},
},
},
},
},
wantInstances: []cloudprovider.Instance{
{
Id: "gce://myprojid/myzone/myinst_42",
Status: &cloudprovider.InstanceStatus{State: cloudprovider.InstanceCreating},
},
},
},
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
b, err := json.Marshal(tc.lmiResponse)
assert.NoError(t, err)
server.On("handle", "/projects/zones/instanceGroupManagers/listManagedInstances").Return(string(b)).Times(1)
gotInstances, err := g.FetchMigInstances(GceRef{})
assert.NoError(t, err)
if diff := cmp.Diff(tc.wantInstances, gotInstances, cmpopts.EquateErrors()); diff != "" {
t.Errorf("FetchMigInstances(...): err diff (-want +got):\n%s", diff)
}
})
}
}

func TestUserAgent(t *testing.T) {
server := test_util.NewHttpServerMock(test_util.MockFieldUserAgent, test_util.MockFieldResponse)
defer server.Close()
Expand Down

0 comments on commit 1931ea6

Please sign in to comment.