Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix fallback for aws cloudprovider #4873

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion cluster-autoscaler/cloudprovider/aws/aws_cloud_provider.go
Original file line number Diff line number Diff line change
Expand Up @@ -383,7 +383,10 @@ func BuildAWS(opts config.AutoscalingOptions, do cloudprovider.NodeGroupDiscover

generatedInstanceTypes, err := GenerateEC2InstanceTypes(region)
if err != nil {
klog.Fatalf("Failed to generate AWS EC2 Instance Types: %v", err)
klog.Errorf("Failed to generate AWS EC2 Instance Types: %v, falling back to static list with last update time: %s", err, lastUpdateTime)
}
if generatedInstanceTypes == nil {
generatedInstanceTypes = map[string]*InstanceType{}
}
// fallback on the static list if we miss any instance types in the generated output
// credits to: https://github.com/lyft/cni-ipvlan-vpc-k8s/pull/80
Expand Down
19 changes: 19 additions & 0 deletions cluster-autoscaler/cloudprovider/aws/aws_cloud_provider_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ limitations under the License.
package aws

import (
"os"
"testing"

"github.com/aws/aws-sdk-go/aws"
Expand All @@ -25,6 +26,7 @@ import (
"github.com/stretchr/testify/mock"
apiv1 "k8s.io/api/core/v1"
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider"
"k8s.io/autoscaler/cluster-autoscaler/config"
)

var testAwsManager = &AwsManager{
Expand Down Expand Up @@ -109,6 +111,23 @@ func TestBuildAwsCloudProvider(t *testing.T) {
assert.NoError(t, err)
}

func TestInstanceTypeFallback(t *testing.T) {
resourceLimiter := cloudprovider.NewResourceLimiter(
map[string]int64{cloudprovider.ResourceNameCores: 1, cloudprovider.ResourceNameMemory: 10000000},
map[string]int64{cloudprovider.ResourceNameCores: 10, cloudprovider.ResourceNameMemory: 100000000})

do := cloudprovider.NodeGroupDiscoveryOptions{}
opts := config.AutoscalingOptions{}

os.Setenv("AWS_REGION", "non-existent-region")
defer os.Unsetenv("AWS_REGION")

// This test ensures that no klog.Fatalf calls occur when constructing the AWS cloud provider. Specifically it is
// intended to ensure that instance type fallback works correctly in the event of an error enumerating instance
// types.
_ = BuildAWS(opts, do, resourceLimiter)
Comment on lines +125 to +128
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

AFAICT, this test is only testing the happy path, though, right? If we passed in a non-existent AWS region, like "us-south-42", for instance, would that trigger an error being returned from GenerateEC2InstanceTypes and therefore trigger the fallback mechanism?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good catch, I tested with no credentials set locally, I assumed there were no credentials in the test environment. I'll fix it.

}

func TestName(t *testing.T) {
provider := testProvider(t, testAwsManager)
assert.Equal(t, provider.Name(), cloudprovider.AwsProviderName)
Expand Down
8 changes: 4 additions & 4 deletions cluster-autoscaler/cloudprovider/aws/aws_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -347,7 +347,7 @@ func (m *AwsManager) GetAsgOptions(asg asg, defaults config.NodeGroupAutoscaling

if stringOpt, found := options[config.DefaultScaleDownUtilizationThresholdKey]; found {
if opt, err := strconv.ParseFloat(stringOpt, 64); err != nil {
klog.Warning("failed to convert asg %s %s tag to float: %v",
klog.Warningf("failed to convert asg %s %s tag to float: %v",
asg.Name, config.DefaultScaleDownUtilizationThresholdKey, err)
} else {
defaults.ScaleDownUtilizationThreshold = opt
Expand All @@ -356,7 +356,7 @@ func (m *AwsManager) GetAsgOptions(asg asg, defaults config.NodeGroupAutoscaling

if stringOpt, found := options[config.DefaultScaleDownGpuUtilizationThresholdKey]; found {
if opt, err := strconv.ParseFloat(stringOpt, 64); err != nil {
klog.Warning("failed to convert asg %s %s tag to float: %v",
klog.Warningf("failed to convert asg %s %s tag to float: %v",
asg.Name, config.DefaultScaleDownGpuUtilizationThresholdKey, err)
} else {
defaults.ScaleDownGpuUtilizationThreshold = opt
Expand All @@ -365,7 +365,7 @@ func (m *AwsManager) GetAsgOptions(asg asg, defaults config.NodeGroupAutoscaling

if stringOpt, found := options[config.DefaultScaleDownUnneededTimeKey]; found {
if opt, err := time.ParseDuration(stringOpt); err != nil {
klog.Warning("failed to convert asg %s %s tag to duration: %v",
klog.Warningf("failed to convert asg %s %s tag to duration: %v",
asg.Name, config.DefaultScaleDownUnneededTimeKey, err)
} else {
defaults.ScaleDownUnneededTime = opt
Expand All @@ -374,7 +374,7 @@ func (m *AwsManager) GetAsgOptions(asg asg, defaults config.NodeGroupAutoscaling

if stringOpt, found := options[config.DefaultScaleDownUnreadyTimeKey]; found {
if opt, err := time.ParseDuration(stringOpt); err != nil {
klog.Warning("failed to convert asg %s %s tag to duration: %v",
klog.Warningf("failed to convert asg %s %s tag to duration: %v",
asg.Name, config.DefaultScaleDownUnreadyTimeKey, err)
} else {
defaults.ScaleDownUnreadyTime = opt
Expand Down