From b73c4eaf0399ea28210c48fc553405846019c46d Mon Sep 17 00:00:00 2001 From: Todd Neal Date: Mon, 9 May 2022 14:01:22 -0500 Subject: [PATCH] fix instance type fallback Instead of logging a fatal error, log a standard error and fall back to loading instance types from the static list. --- .../cloudprovider/aws/aws_cloud_provider.go | 5 ++++- .../aws/aws_cloud_provider_test.go | 19 +++++++++++++++++++ 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/cluster-autoscaler/cloudprovider/aws/aws_cloud_provider.go b/cluster-autoscaler/cloudprovider/aws/aws_cloud_provider.go index 821b124ee698..e2aee7a36048 100644 --- a/cluster-autoscaler/cloudprovider/aws/aws_cloud_provider.go +++ b/cluster-autoscaler/cloudprovider/aws/aws_cloud_provider.go @@ -362,7 +362,10 @@ func BuildAWS(opts config.AutoscalingOptions, do cloudprovider.NodeGroupDiscover generatedInstanceTypes, err := GenerateEC2InstanceTypes(region) if err != nil { - klog.Fatalf("Failed to generate AWS EC2 Instance Types: %v", err) + klog.Errorf("Failed to generate AWS EC2 Instance Types: %v, falling back to static list with last update time: %s", err, lastUpdateTime) + } + if generatedInstanceTypes == nil { + generatedInstanceTypes = map[string]*InstanceType{} } // fallback on the static list if we miss any instance types in the generated output // credits to: https://github.com/lyft/cni-ipvlan-vpc-k8s/pull/80 diff --git a/cluster-autoscaler/cloudprovider/aws/aws_cloud_provider_test.go b/cluster-autoscaler/cloudprovider/aws/aws_cloud_provider_test.go index 587833779335..ad93facf0755 100644 --- a/cluster-autoscaler/cloudprovider/aws/aws_cloud_provider_test.go +++ b/cluster-autoscaler/cloudprovider/aws/aws_cloud_provider_test.go @@ -17,6 +17,7 @@ limitations under the License. package aws import ( + "os" "testing" "github.com/aws/aws-sdk-go/aws" @@ -26,6 +27,7 @@ import ( "github.com/stretchr/testify/mock" apiv1 "k8s.io/api/core/v1" "k8s.io/autoscaler/cluster-autoscaler/cloudprovider" + "k8s.io/autoscaler/cluster-autoscaler/config" ) type AutoScalingMock struct { @@ -148,6 +150,23 @@ func TestBuildAwsCloudProvider(t *testing.T) { assert.NoError(t, err) } +func TestInstanceTypeFallback(t *testing.T) { + resourceLimiter := cloudprovider.NewResourceLimiter( + map[string]int64{cloudprovider.ResourceNameCores: 1, cloudprovider.ResourceNameMemory: 10000000}, + map[string]int64{cloudprovider.ResourceNameCores: 10, cloudprovider.ResourceNameMemory: 100000000}) + + do := cloudprovider.NodeGroupDiscoveryOptions{} + opts := config.AutoscalingOptions{} + + os.Setenv("AWS_REGION", "non-existent-region") + defer os.Unsetenv("AWS_REGION") + + // This test ensures that no klog.Fatalf calls occur when constructing the AWS cloud provider. Specifically it is + // intended to ensure that instance type fallback works correctly in the event of an error enumerating instance + // types. + _ = BuildAWS(opts, do, resourceLimiter) +} + func TestName(t *testing.T) { provider := testProvider(t, testAwsManager) assert.Equal(t, provider.Name(), cloudprovider.AwsProviderName)