diff --git a/cluster-autoscaler/FAQ.md b/cluster-autoscaler/FAQ.md index ddae61f4cdf3..1fdb1c4a1327 100644 --- a/cluster-autoscaler/FAQ.md +++ b/cluster-autoscaler/FAQ.md @@ -674,6 +674,7 @@ The following startup parameters are supported for cluster autoscaler: | `leader-elect-renew-deadline` | The interval between attempts by the acting master to renew a leadership slot before it stops leading.
This must be less than or equal to the lease duration.
This is only applicable if leader election is enabled | 10 seconds | `leader-elect-retry-period` | The duration the clients should wait between attempting acquisition and renewal of a leadership.
This is only applicable if leader election is enabled | 2 seconds | `leader-elect-resource-lock` | The type of resource object that is used for locking during leader election.
Supported options are `endpoints` (default) and `configmaps` | "endpoints" +| `static-instance-list` | Should CA fetch instance types in runtime or use a static list. AWS only | false # Troubleshooting: diff --git a/cluster-autoscaler/cloudprovider/aws/README.md b/cluster-autoscaler/cloudprovider/aws/README.md index 8058bfc216b5..970ae74b09c9 100644 --- a/cluster-autoscaler/cloudprovider/aws/README.md +++ b/cluster-autoscaler/cloudprovider/aws/README.md @@ -204,6 +204,11 @@ spec: - r5ad.2xlarge ``` +## Use Static Instance List +The set of the latest supported EC2 instance types will be fetched by the CA at run time. You can find all the available instance types in the CA logs. +If your network access is restricted such that fetching this set is infeasible, you can specify the command-line flag `--static-instance-list=true` to switch the CA back to its original use of a statically defined set. + + ### Example usage: * Create a [Launch Template](https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-properties-autoscaling-autoscalinggroup-launchtemplate.html) (LT) with an instance type, for example, r5.2xlarge. Consider this the 'base' instance type. Do not define any spot purchase options here. diff --git a/cluster-autoscaler/cloudprovider/aws/aws_cloud_provider.go b/cluster-autoscaler/cloudprovider/aws/aws_cloud_provider.go index 1dc9774058ac..2fff0de7d651 100644 --- a/cluster-autoscaler/cloudprovider/aws/aws_cloud_provider.go +++ b/cluster-autoscaler/cloudprovider/aws/aws_cloud_provider.go @@ -49,13 +49,16 @@ var ( type awsCloudProvider struct { awsManager *AwsManager resourceLimiter *cloudprovider.ResourceLimiter + // InstanceTypes is a map of ec2 resources + instanceTypes map[string]*InstanceType } // BuildAwsCloudProvider builds CloudProvider implementation for AWS. -func BuildAwsCloudProvider(awsManager *AwsManager, resourceLimiter *cloudprovider.ResourceLimiter) (cloudprovider.CloudProvider, error) { +func BuildAwsCloudProvider(awsManager *AwsManager, instanceTypes map[string]*InstanceType, resourceLimiter *cloudprovider.ResourceLimiter) (cloudprovider.CloudProvider, error) { aws := &awsCloudProvider{ awsManager: awsManager, resourceLimiter: resourceLimiter, + instanceTypes: instanceTypes, } return aws, nil } @@ -343,12 +346,36 @@ func BuildAWS(opts config.AutoscalingOptions, do cloudprovider.NodeGroupDiscover defer config.Close() } + // Generate EC2 list + var instanceTypes map[string]*InstanceType + if opts.StaticInstanceList { + klog.Warning("Use static EC2 Instance Types, list could be outdated") + instanceTypes = GetStaticEC2InstanceTypes() + } else { + region, err := GetCurrentAwsRegion() + if err != nil { + klog.Fatalf("Failed to get AWS Region: %v", err) + } + + instanceTypes, err = GenerateEC2InstanceTypes(region) + if err != nil { + klog.Fatalf("Failed to generate AWS EC2 Instance Types: %v", err) + } + + keys := make([]string, 0, len(instanceTypes)) + for key := range instanceTypes { + keys = append(keys, key) + } + + klog.Infof("Successfully load %d EC2 Instance Types %s", len(keys), keys) + } + manager, err := CreateAwsManager(config, do) if err != nil { klog.Fatalf("Failed to create AWS Manager: %v", err) } - provider, err := BuildAwsCloudProvider(manager, rl) + provider, err := BuildAwsCloudProvider(manager, instanceTypes, rl) if err != nil { klog.Fatalf("Failed to create AWS cloud provider: %v", err) } diff --git a/cluster-autoscaler/cloudprovider/aws/aws_cloud_provider_test.go b/cluster-autoscaler/cloudprovider/aws/aws_cloud_provider_test.go index e64377f624a8..ece92296efbb 100644 --- a/cluster-autoscaler/cloudprovider/aws/aws_cloud_provider_test.go +++ b/cluster-autoscaler/cloudprovider/aws/aws_cloud_provider_test.go @@ -134,7 +134,7 @@ func testProvider(t *testing.T, m *AwsManager) *awsCloudProvider { map[string]int64{cloudprovider.ResourceNameCores: 1, cloudprovider.ResourceNameMemory: 10000000}, map[string]int64{cloudprovider.ResourceNameCores: 10, cloudprovider.ResourceNameMemory: 100000000}) - provider, err := BuildAwsCloudProvider(m, resourceLimiter) + provider, err := BuildAwsCloudProvider(m, GetStaticEC2InstanceTypes(), resourceLimiter) assert.NoError(t, err) return provider.(*awsCloudProvider) } @@ -144,7 +144,7 @@ func TestBuildAwsCloudProvider(t *testing.T) { map[string]int64{cloudprovider.ResourceNameCores: 1, cloudprovider.ResourceNameMemory: 10000000}, map[string]int64{cloudprovider.ResourceNameCores: 10, cloudprovider.ResourceNameMemory: 100000000}) - _, err := BuildAwsCloudProvider(testAwsManager, resourceLimiter) + _, err := BuildAwsCloudProvider(testAwsManager, GetStaticEC2InstanceTypes(), resourceLimiter) assert.NoError(t, err) } diff --git a/cluster-autoscaler/cloudprovider/aws/aws_manager.go b/cluster-autoscaler/cloudprovider/aws/aws_manager.go index 336644c1ab98..8deab85194ab 100644 --- a/cluster-autoscaler/cloudprovider/aws/aws_manager.go +++ b/cluster-autoscaler/cloudprovider/aws/aws_manager.go @@ -62,7 +62,7 @@ type AwsManager struct { } type asgTemplate struct { - InstanceType *instanceType + InstanceType *InstanceType Region string Zone string Tags []*autoscaling.TagDescription diff --git a/cluster-autoscaler/cloudprovider/aws/aws_manager_test.go b/cluster-autoscaler/cloudprovider/aws/aws_manager_test.go index 4e1c5bbac00b..afb6edda9e6c 100644 --- a/cluster-autoscaler/cloudprovider/aws/aws_manager_test.go +++ b/cluster-autoscaler/cloudprovider/aws/aws_manager_test.go @@ -70,7 +70,7 @@ func TestGetRegion(t *testing.T) { func TestBuildGenericLabels(t *testing.T) { labels := buildGenericLabels(&asgTemplate{ - InstanceType: &instanceType{ + InstanceType: &InstanceType{ InstanceType: "c4.large", VCPU: 2, MemoryMb: 3840, diff --git a/cluster-autoscaler/cloudprovider/aws/aws_util.go b/cluster-autoscaler/cloudprovider/aws/aws_util.go new file mode 100644 index 000000000000..49ac54ecd011 --- /dev/null +++ b/cluster-autoscaler/cloudprovider/aws/aws_util.go @@ -0,0 +1,172 @@ +/* +Copyright 2019 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package aws + +import ( + "encoding/json" + "errors" + "fmt" + "github.com/aws/aws-sdk-go/aws/endpoints" + "io/ioutil" + "k8s.io/klog" + "net/http" + "os" + "regexp" + "strconv" + "strings" +) + +var ( + ec2MetaDataServiceUrl = "http://169.254.169.254/latest/dynamic/instance-identity/document" + ec2PricingServiceUrlTemplate = "https://pricing.us-east-1.amazonaws.com/offers/v1.0/aws/AmazonEC2/current/%s/index.json" +) + +type response struct { + Products map[string]product `json:"products"` +} + +type product struct { + Attributes productAttributes `json:"attributes"` +} + +type productAttributes struct { + InstanceType string `json:"instanceType"` + VCPU string `json:"vcpu"` + Memory string `json:"memory"` + GPU string `json:"gpu"` +} + +// GenerateEC2InstanceTypes returns a map of ec2 resources +func GenerateEC2InstanceTypes(region string) (map[string]*InstanceType, error) { + instanceTypes := make(map[string]*InstanceType) + + resolver := endpoints.DefaultResolver() + partitions := resolver.(endpoints.EnumPartitions).Partitions() + + for _, p := range partitions { + for _, r := range p.Regions() { + if region != "" && region != r.ID() { + continue + } + + url := fmt.Sprintf(ec2PricingServiceUrlTemplate, r.ID()) + klog.V(1).Infof("fetching %s\n", url) + res, err := http.Get(url) + if err != nil { + klog.Warningf("Error fetching %s skipping...\n", url) + continue + } + + defer res.Body.Close() + + body, err := ioutil.ReadAll(res.Body) + if err != nil { + klog.Warningf("Error parsing %s skipping...\n", url) + continue + } + + var unmarshalled = response{} + err = json.Unmarshal(body, &unmarshalled) + if err != nil { + klog.Warningf("Error unmarshalling %s, skip...\n", url) + continue + } + + for _, product := range unmarshalled.Products { + attr := product.Attributes + if attr.InstanceType != "" { + instanceTypes[attr.InstanceType] = &InstanceType{ + InstanceType: attr.InstanceType, + } + if attr.Memory != "" && attr.Memory != "NA" { + instanceTypes[attr.InstanceType].MemoryMb = parseMemory(attr.Memory) + } + if attr.VCPU != "" { + instanceTypes[attr.InstanceType].VCPU = parseCPU(attr.VCPU) + } + if attr.GPU != "" { + instanceTypes[attr.InstanceType].GPU = parseCPU(attr.GPU) + } + } + } + } + } + + if len(instanceTypes) == 0 { + return nil, errors.New("unable to load EC2 Instance Type list") + } + + return instanceTypes, nil +} + +// GetStaticEC2InstanceTypes return pregenerated ec2 instance type list +func GetStaticEC2InstanceTypes() map[string]*InstanceType { + return InstanceTypes +} + +func parseMemory(memory string) int64 { + reg, err := regexp.Compile("[^0-9\\.]+") + if err != nil { + klog.Fatal(err) + } + + parsed := strings.TrimSpace(reg.ReplaceAllString(memory, "")) + mem, err := strconv.ParseFloat(parsed, 64) + if err != nil { + klog.Fatal(err) + } + + return int64(mem * float64(1024)) +} + +func parseCPU(cpu string) int64 { + i, err := strconv.ParseInt(cpu, 10, 64) + if err != nil { + klog.Fatal(err) + } + return i +} + +// GetCurrentAwsRegion return region of current cluster without building awsManager +func GetCurrentAwsRegion() (string, error) { + region, present := os.LookupEnv("AWS_REGION") + + if !present { + klog.V(1).Infof("fetching %s\n", ec2MetaDataServiceUrl) + res, err := http.Get(ec2MetaDataServiceUrl) + if err != nil { + return "", fmt.Errorf("Error fetching %s", ec2MetaDataServiceUrl) + } + + defer res.Body.Close() + + body, err := ioutil.ReadAll(res.Body) + if err != nil { + return "", fmt.Errorf("Error parsing %s", ec2MetaDataServiceUrl) + } + + var unmarshalled = map[string]string{} + err = json.Unmarshal(body, &unmarshalled) + if err != nil { + klog.Warningf("Error unmarshalling %s, skip...\n", ec2MetaDataServiceUrl) + } + + region = unmarshalled["region"] + } + + return region, nil +} diff --git a/cluster-autoscaler/cloudprovider/aws/aws_util_test.go b/cluster-autoscaler/cloudprovider/aws/aws_util_test.go new file mode 100644 index 000000000000..48b36f7c8607 --- /dev/null +++ b/cluster-autoscaler/cloudprovider/aws/aws_util_test.go @@ -0,0 +1,105 @@ +/* +Copyright 2019 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package aws + +import ( + "github.com/stretchr/testify/assert" + "net/http" + "net/http/httptest" + "os" + "strconv" + "testing" +) + +func TestGetStaticEC2InstanceTypes(t *testing.T) { + result := GetStaticEC2InstanceTypes() + assert.True(t, len(result) != 0) +} + +func TestParseMemory(t *testing.T) { + expectedResultInMiB := int64(3.75 * 1024) + tests := []struct { + input string + expect int64 + }{ + { + input: "3.75 GiB", + expect: expectedResultInMiB, + }, + { + input: "3.75 Gib", + expect: expectedResultInMiB, + }, + { + input: "3.75GiB", + expect: expectedResultInMiB, + }, + { + input: "3.75", + expect: expectedResultInMiB, + }, + } + + for _, test := range tests { + got := parseMemory(test.input) + assert.Equal(t, test.expect, got) + } +} + +func TestParseCPU(t *testing.T) { + tests := []struct { + input string + expect int64 + }{ + { + input: strconv.FormatInt(8, 10), + expect: int64(8), + }, + } + + for _, test := range tests { + got := parseCPU(test.input) + assert.Equal(t, test.expect, got) + } +} + +func TestGetCurrentAwsRegion(t *testing.T) { + region := "us-west-2" + os.Unsetenv("AWS_REGION") + + server := httptest.NewServer(http.HandlerFunc(func(rw http.ResponseWriter, req *http.Request) { + rw.Write([]byte("{\"region\" : \"" + region + "\"}")) + })) + // Close the server when test finishes + defer server.Close() + + ec2MetaDataServiceUrl = server.URL + result, err := GetCurrentAwsRegion() + + assert.Nil(t, err) + assert.NotNil(t, result) + assert.Equal(t, region, result) +} + +func TestGetCurrentAwsRegionWithRegionEnv(t *testing.T) { + region := "us-west-2" + os.Setenv("AWS_REGION", region) + + result, err := GetCurrentAwsRegion() + assert.Nil(t, err) + assert.Equal(t, region, result) +} diff --git a/cluster-autoscaler/cloudprovider/aws/ec2_instance_types.go b/cluster-autoscaler/cloudprovider/aws/ec2_instance_types.go index cfedc70db2e9..9e9c08ca1ed2 100644 --- a/cluster-autoscaler/cloudprovider/aws/ec2_instance_types.go +++ b/cluster-autoscaler/cloudprovider/aws/ec2_instance_types.go @@ -18,7 +18,8 @@ limitations under the License. package aws -type instanceType struct { +// InstanceType is sepc of EC2 instance +type InstanceType struct { InstanceType string VCPU int64 MemoryMb int64 @@ -26,7 +27,7 @@ type instanceType struct { } // InstanceTypes is a map of ec2 resources -var InstanceTypes = map[string]*instanceType{ +var InstanceTypes = map[string]*InstanceType{ "a1": { InstanceType: "a1", VCPU: 16, @@ -57,6 +58,12 @@ var InstanceTypes = map[string]*instanceType{ MemoryMb: 2048, GPU: 0, }, + "a1.metal": { + InstanceType: "a1.metal", + VCPU: 16, + MemoryMb: 32768, + GPU: 0, + }, "a1.xlarge": { InstanceType: "a1.xlarge", VCPU: 4, @@ -213,12 +220,24 @@ var InstanceTypes = map[string]*instanceType{ MemoryMb: 0, GPU: 0, }, + "c5d.12xlarge": { + InstanceType: "c5d.12xlarge", + VCPU: 48, + MemoryMb: 131072, + GPU: 0, + }, "c5d.18xlarge": { InstanceType: "c5d.18xlarge", VCPU: 72, MemoryMb: 147456, GPU: 0, }, + "c5d.24xlarge": { + InstanceType: "c5d.24xlarge", + VCPU: 96, + MemoryMb: 262144, + GPU: 0, + }, "c5d.2xlarge": { InstanceType: "c5d.2xlarge", VCPU: 8, @@ -243,6 +262,12 @@ var InstanceTypes = map[string]*instanceType{ MemoryMb: 4096, GPU: 0, }, + "c5d.metal": { + InstanceType: "c5d.metal", + VCPU: 96, + MemoryMb: 262144, + GPU: 0, + }, "c5d.xlarge": { InstanceType: "c5d.xlarge", VCPU: 4, @@ -285,6 +310,12 @@ var InstanceTypes = map[string]*instanceType{ MemoryMb: 5376, GPU: 0, }, + "c5n.metal": { + InstanceType: "c5n.metal", + VCPU: 72, + MemoryMb: 196608, + GPU: 0, + }, "c5n.xlarge": { InstanceType: "c5n.xlarge", VCPU: 4, @@ -405,6 +436,42 @@ var InstanceTypes = map[string]*instanceType{ MemoryMb: 31232, GPU: 1, }, + "g4dn.12xlarge": { + InstanceType: "g4dn.12xlarge", + VCPU: 48, + MemoryMb: 196608, + GPU: 4, + }, + "g4dn.16xlarge": { + InstanceType: "g4dn.16xlarge", + VCPU: 64, + MemoryMb: 262144, + GPU: 1, + }, + "g4dn.2xlarge": { + InstanceType: "g4dn.2xlarge", + VCPU: 8, + MemoryMb: 32768, + GPU: 1, + }, + "g4dn.4xlarge": { + InstanceType: "g4dn.4xlarge", + VCPU: 16, + MemoryMb: 65536, + GPU: 1, + }, + "g4dn.8xlarge": { + InstanceType: "g4dn.8xlarge", + VCPU: 32, + MemoryMb: 131072, + GPU: 1, + }, + "g4dn.xlarge": { + InstanceType: "g4dn.xlarge", + VCPU: 4, + MemoryMb: 16384, + GPU: 1, + }, "h1": { InstanceType: "h1", VCPU: 64, @@ -561,6 +628,12 @@ var InstanceTypes = map[string]*instanceType{ MemoryMb: 16384, GPU: 0, }, + "i3en.metal": { + InstanceType: "i3en.metal", + VCPU: 96, + MemoryMb: 786432, + GPU: 0, + }, "i3en.xlarge": { InstanceType: "i3en.xlarge", VCPU: 4, @@ -795,6 +868,12 @@ var InstanceTypes = map[string]*instanceType{ MemoryMb: 196608, GPU: 0, }, + "m5ad.16xlarge": { + InstanceType: "m5ad.16xlarge", + VCPU: 64, + MemoryMb: 262144, + GPU: 0, + }, "m5ad.24xlarge": { InstanceType: "m5ad.24xlarge", VCPU: 96, @@ -813,6 +892,12 @@ var InstanceTypes = map[string]*instanceType{ MemoryMb: 65536, GPU: 0, }, + "m5ad.8xlarge": { + InstanceType: "m5ad.8xlarge", + VCPU: 32, + MemoryMb: 131072, + GPU: 0, + }, "m5ad.large": { InstanceType: "m5ad.large", VCPU: 2, @@ -885,6 +970,126 @@ var InstanceTypes = map[string]*instanceType{ MemoryMb: 16384, GPU: 0, }, + "m5dn": { + InstanceType: "m5dn", + VCPU: 96, + MemoryMb: 0, + GPU: 0, + }, + "m5dn.12xlarge": { + InstanceType: "m5dn.12xlarge", + VCPU: 48, + MemoryMb: 196608, + GPU: 0, + }, + "m5dn.16xlarge": { + InstanceType: "m5dn.16xlarge", + VCPU: 64, + MemoryMb: 262144, + GPU: 0, + }, + "m5dn.24xlarge": { + InstanceType: "m5dn.24xlarge", + VCPU: 96, + MemoryMb: 393216, + GPU: 0, + }, + "m5dn.2xlarge": { + InstanceType: "m5dn.2xlarge", + VCPU: 8, + MemoryMb: 32768, + GPU: 0, + }, + "m5dn.4xlarge": { + InstanceType: "m5dn.4xlarge", + VCPU: 16, + MemoryMb: 65536, + GPU: 0, + }, + "m5dn.8xlarge": { + InstanceType: "m5dn.8xlarge", + VCPU: 32, + MemoryMb: 131072, + GPU: 0, + }, + "m5dn.large": { + InstanceType: "m5dn.large", + VCPU: 2, + MemoryMb: 8192, + GPU: 0, + }, + "m5dn.metal": { + InstanceType: "m5dn.metal", + VCPU: 96, + MemoryMb: 393216, + GPU: 0, + }, + "m5dn.xlarge": { + InstanceType: "m5dn.xlarge", + VCPU: 4, + MemoryMb: 16384, + GPU: 0, + }, + "m5n": { + InstanceType: "m5n", + VCPU: 96, + MemoryMb: 0, + GPU: 0, + }, + "m5n.12xlarge": { + InstanceType: "m5n.12xlarge", + VCPU: 48, + MemoryMb: 196608, + GPU: 0, + }, + "m5n.16xlarge": { + InstanceType: "m5n.16xlarge", + VCPU: 64, + MemoryMb: 262144, + GPU: 0, + }, + "m5n.24xlarge": { + InstanceType: "m5n.24xlarge", + VCPU: 96, + MemoryMb: 393216, + GPU: 0, + }, + "m5n.2xlarge": { + InstanceType: "m5n.2xlarge", + VCPU: 8, + MemoryMb: 32768, + GPU: 0, + }, + "m5n.4xlarge": { + InstanceType: "m5n.4xlarge", + VCPU: 16, + MemoryMb: 65536, + GPU: 0, + }, + "m5n.8xlarge": { + InstanceType: "m5n.8xlarge", + VCPU: 32, + MemoryMb: 131072, + GPU: 0, + }, + "m5n.large": { + InstanceType: "m5n.large", + VCPU: 2, + MemoryMb: 8192, + GPU: 0, + }, + "m5n.metal": { + InstanceType: "m5n.metal", + VCPU: 96, + MemoryMb: 393216, + GPU: 0, + }, + "m5n.xlarge": { + InstanceType: "m5n.xlarge", + VCPU: 4, + MemoryMb: 16384, + GPU: 0, + }, "p2": { InstanceType: "p2", VCPU: 64, @@ -1137,6 +1342,12 @@ var InstanceTypes = map[string]*instanceType{ MemoryMb: 393216, GPU: 0, }, + "r5ad.16xlarge": { + InstanceType: "r5ad.16xlarge", + VCPU: 64, + MemoryMb: 524288, + GPU: 0, + }, "r5ad.24xlarge": { InstanceType: "r5ad.24xlarge", VCPU: 96, @@ -1155,6 +1366,12 @@ var InstanceTypes = map[string]*instanceType{ MemoryMb: 131072, GPU: 0, }, + "r5ad.8xlarge": { + InstanceType: "r5ad.8xlarge", + VCPU: 32, + MemoryMb: 262144, + GPU: 0, + }, "r5ad.large": { InstanceType: "r5ad.large", VCPU: 2, @@ -1227,6 +1444,126 @@ var InstanceTypes = map[string]*instanceType{ MemoryMb: 32768, GPU: 0, }, + "r5dn": { + InstanceType: "r5dn", + VCPU: 96, + MemoryMb: 0, + GPU: 0, + }, + "r5dn.12xlarge": { + InstanceType: "r5dn.12xlarge", + VCPU: 48, + MemoryMb: 393216, + GPU: 0, + }, + "r5dn.16xlarge": { + InstanceType: "r5dn.16xlarge", + VCPU: 64, + MemoryMb: 524288, + GPU: 0, + }, + "r5dn.24xlarge": { + InstanceType: "r5dn.24xlarge", + VCPU: 96, + MemoryMb: 786432, + GPU: 0, + }, + "r5dn.2xlarge": { + InstanceType: "r5dn.2xlarge", + VCPU: 8, + MemoryMb: 65536, + GPU: 0, + }, + "r5dn.4xlarge": { + InstanceType: "r5dn.4xlarge", + VCPU: 16, + MemoryMb: 131072, + GPU: 0, + }, + "r5dn.8xlarge": { + InstanceType: "r5dn.8xlarge", + VCPU: 32, + MemoryMb: 262144, + GPU: 0, + }, + "r5dn.large": { + InstanceType: "r5dn.large", + VCPU: 2, + MemoryMb: 16384, + GPU: 0, + }, + "r5dn.metal": { + InstanceType: "r5dn.metal", + VCPU: 96, + MemoryMb: 786432, + GPU: 0, + }, + "r5dn.xlarge": { + InstanceType: "r5dn.xlarge", + VCPU: 4, + MemoryMb: 32768, + GPU: 0, + }, + "r5n": { + InstanceType: "r5n", + VCPU: 96, + MemoryMb: 0, + GPU: 0, + }, + "r5n.12xlarge": { + InstanceType: "r5n.12xlarge", + VCPU: 48, + MemoryMb: 393216, + GPU: 0, + }, + "r5n.16xlarge": { + InstanceType: "r5n.16xlarge", + VCPU: 64, + MemoryMb: 524288, + GPU: 0, + }, + "r5n.24xlarge": { + InstanceType: "r5n.24xlarge", + VCPU: 96, + MemoryMb: 786432, + GPU: 0, + }, + "r5n.2xlarge": { + InstanceType: "r5n.2xlarge", + VCPU: 8, + MemoryMb: 65536, + GPU: 0, + }, + "r5n.4xlarge": { + InstanceType: "r5n.4xlarge", + VCPU: 16, + MemoryMb: 131072, + GPU: 0, + }, + "r5n.8xlarge": { + InstanceType: "r5n.8xlarge", + VCPU: 32, + MemoryMb: 262144, + GPU: 0, + }, + "r5n.large": { + InstanceType: "r5n.large", + VCPU: 2, + MemoryMb: 16384, + GPU: 0, + }, + "r5n.metal": { + InstanceType: "r5n.metal", + VCPU: 96, + MemoryMb: 786432, + GPU: 0, + }, + "r5n.xlarge": { + InstanceType: "r5n.xlarge", + VCPU: 4, + MemoryMb: 32768, + GPU: 0, + }, "t1.micro": { InstanceType: "t1.micro", VCPU: 1, @@ -1485,46 +1822,4 @@ var InstanceTypes = map[string]*instanceType{ MemoryMb: 32768, GPU: 0, }, - "g4dn.xlarge": { - InstanceType: "g4dn.xlarge", - VCPU: 4, - MemoryMb: 16384, - GPU: 1, - }, - "g4dn.2xlarge": { - InstanceType: "g4dn.2xlarge", - VCPU: 8, - MemoryMb: 32768, - GPU: 1, - }, - "g4dn.4xlarge": { - InstanceType: "g4dn.4xlarge", - VCPU: 16, - MemoryMb: 65536, - GPU: 1, - }, - "g4dn.8xlarge": { - InstanceType: "g4dn.8xlarge", - VCPU: 32, - MemoryMb: 131072, - GPU: 1, - }, - "g4dn.16xlarge": { - InstanceType: "g4dn.16xlarge", - VCPU: 64, - MemoryMb: 262144, - GPU: 1, - }, - "g4dn.12xlarge": { - InstanceType: "g4dn.12xlarge", - VCPU: 48, - MemoryMb: 196608, - GPU: 4, - }, - "g4dn.metal": { //coming soon - InstanceType: "g4dn.metal", - VCPU: 96, - MemoryMb: 393216, - GPU: 8, - }, } diff --git a/cluster-autoscaler/cloudprovider/aws/ec2_instance_types/gen.go b/cluster-autoscaler/cloudprovider/aws/ec2_instance_types/gen.go index e6861a2f5ad2..54678621b1a2 100644 --- a/cluster-autoscaler/cloudprovider/aws/ec2_instance_types/gen.go +++ b/cluster-autoscaler/cloudprovider/aws/ec2_instance_types/gen.go @@ -19,42 +19,13 @@ limitations under the License. package main import ( - "encoding/json" "flag" "html/template" - "io/ioutil" - "net/http" - "os" - "regexp" - "strconv" - "strings" - - "github.com/aws/aws-sdk-go/aws/endpoints" + "k8s.io/autoscaler/cluster-autoscaler/cloudprovider/aws" "k8s.io/klog" + "os" ) -type response struct { - Products map[string]product `json:"products"` -} - -type product struct { - Attributes productAttributes `json:"attributes"` -} - -type productAttributes struct { - InstanceType string `json:"instanceType"` - VCPU string `json:"vcpu"` - Memory string `json:"memory"` - GPU string `json:"gpu"` -} - -type instanceType struct { - InstanceType string - VCPU int64 - Memory int64 - GPU int64 -} - var packageTemplate = template.Must(template.New("").Parse(`/* Copyright The Kubernetes Authors. @@ -75,7 +46,7 @@ limitations under the License. package aws -type instanceType struct { +type InstanceType struct { InstanceType string VCPU int64 MemoryMb int64 @@ -83,12 +54,12 @@ type instanceType struct { } // InstanceTypes is a map of ec2 resources -var InstanceTypes = map[string]*instanceType{ +var InstanceTypes = map[string]*InstanceType{ {{- range .InstanceTypes }} "{{ .InstanceType }}": { InstanceType: "{{ .InstanceType }}", VCPU: {{ .VCPU }}, - MemoryMb: {{ .Memory }}, + MemoryMb: {{ .MemoryMb }}, GPU: {{ .GPU }}, }, {{- end }} @@ -96,57 +67,14 @@ var InstanceTypes = map[string]*instanceType{ `)) func main() { + var region = flag.String("region", "", "aws region you'd like to generate instances from."+ + "It will populate list from all regions if region is not specified.") flag.Parse() defer klog.Flush() - instanceTypes := make(map[string]*instanceType) - - resolver := endpoints.DefaultResolver() - partitions := resolver.(endpoints.EnumPartitions).Partitions() - - for _, p := range partitions { - for _, r := range p.Regions() { - url := "https://pricing.us-east-1.amazonaws.com/offers/v1.0/aws/AmazonEC2/current/" + r.ID() + "/index.json" - klog.V(1).Infof("fetching %s\n", url) - res, err := http.Get(url) - if err != nil { - klog.Warningf("Error fetching %s skipping...\n", url) - continue - } - - defer res.Body.Close() - - body, err := ioutil.ReadAll(res.Body) - if err != nil { - klog.Warningf("Error parsing %s skipping...\n", url) - continue - } - - var unmarshalled = response{} - err = json.Unmarshal(body, &unmarshalled) - if err != nil { - klog.Warningf("Error unmarshalling %s skipping...\n", url) - continue - } - - for _, product := range unmarshalled.Products { - attr := product.Attributes - if attr.InstanceType != "" { - instanceTypes[attr.InstanceType] = &instanceType{ - InstanceType: attr.InstanceType, - } - if attr.Memory != "" && attr.Memory != "NA" { - instanceTypes[attr.InstanceType].Memory = parseMemory(attr.Memory) - } - if attr.VCPU != "" { - instanceTypes[attr.InstanceType].VCPU = parseCPU(attr.VCPU) - } - if attr.GPU != "" { - instanceTypes[attr.InstanceType].GPU = parseCPU(attr.GPU) - } - } - } - } + instanceTypes, err := aws.GenerateEC2InstanceTypes(*region) + if err != nil { + klog.Fatal(err) } f, err := os.Create("ec2_instance_types.go") @@ -157,7 +85,7 @@ func main() { defer f.Close() err = packageTemplate.Execute(f, struct { - InstanceTypes map[string]*instanceType + InstanceTypes map[string]*aws.InstanceType }{ InstanceTypes: instanceTypes, }) @@ -166,26 +94,3 @@ func main() { klog.Fatal(err) } } - -func parseMemory(memory string) int64 { - reg, err := regexp.Compile("[^0-9\\.]+") - if err != nil { - klog.Fatal(err) - } - - parsed := strings.TrimSpace(reg.ReplaceAllString(memory, "")) - mem, err := strconv.ParseFloat(parsed, 64) - if err != nil { - klog.Fatal(err) - } - - return int64(mem * float64(1024)) -} - -func parseCPU(cpu string) int64 { - i, err := strconv.ParseInt(cpu, 10, 64) - if err != nil { - klog.Fatal(err) - } - return i -} diff --git a/cluster-autoscaler/config/autoscaling_options.go b/cluster-autoscaler/config/autoscaling_options.go index e72caa0cb0e9..da97cf5cc5ba 100644 --- a/cluster-autoscaler/config/autoscaling_options.go +++ b/cluster-autoscaler/config/autoscaling_options.go @@ -139,4 +139,6 @@ type AutoscalingOptions struct { FilterOutSchedulablePodsUsesPacking bool // IgnoredTaints is a list of taints to ignore when considering a node template for scheduling. IgnoredTaints []string + // StaticInstanceList tells if cloud provider use static instance type list or dynamically fetch from remote APIs. Only aws uses it now. + StaticInstanceList bool } diff --git a/cluster-autoscaler/main.go b/cluster-autoscaler/main.go index 5bc70f2605a8..3589caac0c06 100644 --- a/cluster-autoscaler/main.go +++ b/cluster-autoscaler/main.go @@ -170,7 +170,8 @@ var ( "Setting it to false employs a more lenient filtering approach that does not try to pack the pods on the nodes."+ "Pods with nominatedNodeName set are always filtered out.") - ignoreTaintsFlag = multiStringFlag("ignore-taint", "Specifies a taint to ignore in node templates when considering to scale a node group") + ignoreTaintsFlag = multiStringFlag("ignore-taint", "Specifies a taint to ignore in node templates when considering to scale a node group") + staticInstanceList = flag.Bool("static-instance-list", false, "Should CA fetch instance types in runtime or use a static list. AWS only") ) func createAutoscalingOptions() config.AutoscalingOptions { @@ -237,6 +238,7 @@ func createAutoscalingOptions() config.AutoscalingOptions { FilterOutSchedulablePodsUsesPacking: *filterOutSchedulablePodsUsesPacking, IgnoredTaints: *ignoreTaintsFlag, NodeDeletionDelayTimeout: *nodeDeletionDelayTimeout, + StaticInstanceList: *staticInstanceList, } }