diff --git a/cluster-autoscaler/cloudprovider/aws/aws_cloud_provider.go b/cluster-autoscaler/cloudprovider/aws/aws_cloud_provider.go index 52df29f6e5d9..339c51be90d4 100644 --- a/cluster-autoscaler/cloudprovider/aws/aws_cloud_provider.go +++ b/cluster-autoscaler/cloudprovider/aws/aws_cloud_provider.go @@ -384,7 +384,10 @@ func BuildAWS(opts config.AutoscalingOptions, do cloudprovider.NodeGroupDiscover generatedInstanceTypes, err := GenerateEC2InstanceTypes(region) if err != nil { - klog.Fatalf("Failed to generate AWS EC2 Instance Types: %v", err) + klog.Errorf("Failed to generate AWS EC2 Instance Types: %v, falling back to static list with last update time: %s", err, lastUpdateTime) + } + if generatedInstanceTypes == nil { + generatedInstanceTypes = map[string]*InstanceType{} } // fallback on the static list if we miss any instance types in the generated output // credits to: https://github.com/lyft/cni-ipvlan-vpc-k8s/pull/80 diff --git a/cluster-autoscaler/cloudprovider/aws/aws_cloud_provider_test.go b/cluster-autoscaler/cloudprovider/aws/aws_cloud_provider_test.go index e1d6a92a1ef9..474a599603d8 100644 --- a/cluster-autoscaler/cloudprovider/aws/aws_cloud_provider_test.go +++ b/cluster-autoscaler/cloudprovider/aws/aws_cloud_provider_test.go @@ -17,6 +17,7 @@ limitations under the License. package aws import ( + "os" "testing" "github.com/aws/aws-sdk-go/aws" @@ -25,6 +26,7 @@ import ( "github.com/stretchr/testify/mock" apiv1 "k8s.io/api/core/v1" "k8s.io/autoscaler/cluster-autoscaler/cloudprovider" + "k8s.io/autoscaler/cluster-autoscaler/config" ) var testAwsManager = &AwsManager{ @@ -109,6 +111,23 @@ func TestBuildAwsCloudProvider(t *testing.T) { assert.NoError(t, err) } +func TestInstanceTypeFallback(t *testing.T) { + resourceLimiter := cloudprovider.NewResourceLimiter( + map[string]int64{cloudprovider.ResourceNameCores: 1, cloudprovider.ResourceNameMemory: 10000000}, + map[string]int64{cloudprovider.ResourceNameCores: 10, cloudprovider.ResourceNameMemory: 100000000}) + + do := cloudprovider.NodeGroupDiscoveryOptions{} + opts := config.AutoscalingOptions{} + + os.Setenv("AWS_REGION", "non-existent-region") + defer os.Unsetenv("AWS_REGION") + + // This test ensures that no klog.Fatalf calls occur when constructing the AWS cloud provider. Specifically it is + // intended to ensure that instance type fallback works correctly in the event of an error enumerating instance + // types. + _ = BuildAWS(opts, do, resourceLimiter) +} + func TestName(t *testing.T) { provider := testProvider(t, testAwsManager) assert.Equal(t, provider.Name(), cloudprovider.AwsProviderName) diff --git a/cluster-autoscaler/cloudprovider/aws/aws_manager.go b/cluster-autoscaler/cloudprovider/aws/aws_manager.go index f4e46be80290..8517987efaef 100644 --- a/cluster-autoscaler/cloudprovider/aws/aws_manager.go +++ b/cluster-autoscaler/cloudprovider/aws/aws_manager.go @@ -347,7 +347,7 @@ func (m *AwsManager) GetAsgOptions(asg asg, defaults config.NodeGroupAutoscaling if stringOpt, found := options[config.DefaultScaleDownUtilizationThresholdKey]; found { if opt, err := strconv.ParseFloat(stringOpt, 64); err != nil { - klog.Warning("failed to convert asg %s %s tag to float: %v", + klog.Warningf("failed to convert asg %s %s tag to float: %v", asg.Name, config.DefaultScaleDownUtilizationThresholdKey, err) } else { defaults.ScaleDownUtilizationThreshold = opt @@ -356,7 +356,7 @@ func (m *AwsManager) GetAsgOptions(asg asg, defaults config.NodeGroupAutoscaling if stringOpt, found := options[config.DefaultScaleDownGpuUtilizationThresholdKey]; found { if opt, err := strconv.ParseFloat(stringOpt, 64); err != nil { - klog.Warning("failed to convert asg %s %s tag to float: %v", + klog.Warningf("failed to convert asg %s %s tag to float: %v", asg.Name, config.DefaultScaleDownGpuUtilizationThresholdKey, err) } else { defaults.ScaleDownGpuUtilizationThreshold = opt @@ -365,7 +365,7 @@ func (m *AwsManager) GetAsgOptions(asg asg, defaults config.NodeGroupAutoscaling if stringOpt, found := options[config.DefaultScaleDownUnneededTimeKey]; found { if opt, err := time.ParseDuration(stringOpt); err != nil { - klog.Warning("failed to convert asg %s %s tag to duration: %v", + klog.Warningf("failed to convert asg %s %s tag to duration: %v", asg.Name, config.DefaultScaleDownUnneededTimeKey, err) } else { defaults.ScaleDownUnneededTime = opt @@ -374,7 +374,7 @@ func (m *AwsManager) GetAsgOptions(asg asg, defaults config.NodeGroupAutoscaling if stringOpt, found := options[config.DefaultScaleDownUnreadyTimeKey]; found { if opt, err := time.ParseDuration(stringOpt); err != nil { - klog.Warning("failed to convert asg %s %s tag to duration: %v", + klog.Warningf("failed to convert asg %s %s tag to duration: %v", asg.Name, config.DefaultScaleDownUnreadyTimeKey, err) } else { defaults.ScaleDownUnreadyTime = opt