Skip to content

Commit

Permalink
Load AWS EC2 Instance Types dynamically
Browse files Browse the repository at this point in the history
  • Loading branch information
Jeffwan committed Oct 10, 2019
1 parent a258103 commit f52678e
Show file tree
Hide file tree
Showing 12 changed files with 671 additions and 157 deletions.
1 change: 1 addition & 0 deletions cluster-autoscaler/FAQ.md
Original file line number Diff line number Diff line change
Expand Up @@ -674,6 +674,7 @@ The following startup parameters are supported for cluster autoscaler:
| `leader-elect-renew-deadline` | The interval between attempts by the acting master to renew a leadership slot before it stops leading.<br>This must be less than or equal to the lease duration.<br>This is only applicable if leader election is enabled | 10 seconds
| `leader-elect-retry-period` | The duration the clients should wait between attempting acquisition and renewal of a leadership.<br>This is only applicable if leader election is enabled | 2 seconds
| `leader-elect-resource-lock` | The type of resource object that is used for locking during leader election.<br>Supported options are `endpoints` (default) and `configmaps` | "endpoints"
| `static-instance-list` | Should CA fetch instance types in runtime or use a static list. AWS only | false

# Troubleshooting:

Expand Down
5 changes: 5 additions & 0 deletions cluster-autoscaler/cloudprovider/aws/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,11 @@ spec:
- r5ad.2xlarge
```
## Use Static Instance List
The set of the latest supported EC2 instance types will be fetched by the CA at run time. You can find all the available instance types in the CA logs.
If your network access is restricted such that fetching this set is infeasible, you can specify the command-line flag `--static-instance-list=true` to switch the CA back to its original use of a statically defined set.


### Example usage:

* Create a [Launch Template](https://docs.aws.amazon.com/AWSCloudFormation/latest/UserGuide/aws-properties-autoscaling-autoscalinggroup-launchtemplate.html) (LT) with an instance type, for example, r5.2xlarge. Consider this the 'base' instance type. Do not define any spot purchase options here.
Expand Down
31 changes: 29 additions & 2 deletions cluster-autoscaler/cloudprovider/aws/aws_cloud_provider.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,13 +49,16 @@ var (
type awsCloudProvider struct {
awsManager *AwsManager
resourceLimiter *cloudprovider.ResourceLimiter
// InstanceTypes is a map of ec2 resources
instanceTypes map[string]*InstanceType
}

// BuildAwsCloudProvider builds CloudProvider implementation for AWS.
func BuildAwsCloudProvider(awsManager *AwsManager, resourceLimiter *cloudprovider.ResourceLimiter) (cloudprovider.CloudProvider, error) {
func BuildAwsCloudProvider(awsManager *AwsManager, instanceTypes map[string]*InstanceType, resourceLimiter *cloudprovider.ResourceLimiter) (cloudprovider.CloudProvider, error) {
aws := &awsCloudProvider{
awsManager: awsManager,
resourceLimiter: resourceLimiter,
instanceTypes: instanceTypes,
}
return aws, nil
}
Expand Down Expand Up @@ -343,12 +346,36 @@ func BuildAWS(opts config.AutoscalingOptions, do cloudprovider.NodeGroupDiscover
defer config.Close()
}

// Generate EC2 list
var instanceTypes map[string]*InstanceType
if opts.StaticInstanceList {
klog.Warning("Use static EC2 Instance Types, list could be outdated")
instanceTypes = GetStaticEC2InstanceTypes()
} else {
region, err := GetCurrentAwsRegion()
if err != nil {
klog.Fatalf("Failed to get AWS Region: %v", err)
}

instanceTypes, err = GenerateEC2InstanceTypes(region)
if err != nil {
klog.Fatalf("Failed to generate AWS EC2 Instance Types: %v", err)
}

keys := make([]string, 0, len(instanceTypes))
for key := range instanceTypes {
keys = append(keys, key)
}

klog.Infof("Successfully load %d EC2 Instance Types %s", len(keys), keys)
}

manager, err := CreateAwsManager(config, do)
if err != nil {
klog.Fatalf("Failed to create AWS Manager: %v", err)
}

provider, err := BuildAwsCloudProvider(manager, rl)
provider, err := BuildAwsCloudProvider(manager, instanceTypes, rl)
if err != nil {
klog.Fatalf("Failed to create AWS cloud provider: %v", err)
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ func testProvider(t *testing.T, m *AwsManager) *awsCloudProvider {
map[string]int64{cloudprovider.ResourceNameCores: 1, cloudprovider.ResourceNameMemory: 10000000},
map[string]int64{cloudprovider.ResourceNameCores: 10, cloudprovider.ResourceNameMemory: 100000000})

provider, err := BuildAwsCloudProvider(m, resourceLimiter)
provider, err := BuildAwsCloudProvider(m, GetStaticEC2InstanceTypes(), resourceLimiter)
assert.NoError(t, err)
return provider.(*awsCloudProvider)
}
Expand All @@ -144,7 +144,7 @@ func TestBuildAwsCloudProvider(t *testing.T) {
map[string]int64{cloudprovider.ResourceNameCores: 1, cloudprovider.ResourceNameMemory: 10000000},
map[string]int64{cloudprovider.ResourceNameCores: 10, cloudprovider.ResourceNameMemory: 100000000})

_, err := BuildAwsCloudProvider(testAwsManager, resourceLimiter)
_, err := BuildAwsCloudProvider(testAwsManager, GetStaticEC2InstanceTypes(), resourceLimiter)
assert.NoError(t, err)
}

Expand Down
2 changes: 1 addition & 1 deletion cluster-autoscaler/cloudprovider/aws/aws_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ type AwsManager struct {
}

type asgTemplate struct {
InstanceType *instanceType
InstanceType *InstanceType
Region string
Zone string
Tags []*autoscaling.TagDescription
Expand Down
2 changes: 1 addition & 1 deletion cluster-autoscaler/cloudprovider/aws/aws_manager_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ func TestGetRegion(t *testing.T) {

func TestBuildGenericLabels(t *testing.T) {
labels := buildGenericLabels(&asgTemplate{
InstanceType: &instanceType{
InstanceType: &InstanceType{
InstanceType: "c4.large",
VCPU: 2,
MemoryMb: 3840,
Expand Down
172 changes: 172 additions & 0 deletions cluster-autoscaler/cloudprovider/aws/aws_util.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,172 @@
/*
Copyright 2019 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package aws

import (
"encoding/json"
"errors"
"fmt"
"github.com/aws/aws-sdk-go/aws/endpoints"
"io/ioutil"
"k8s.io/klog"
"net/http"
"os"
"regexp"
"strconv"
"strings"
)

var (
ec2MetaDataServiceUrl = "http://169.254.169.254/latest/dynamic/instance-identity/document"
ec2PricingServiceUrlTemplate = "https://pricing.us-east-1.amazonaws.com/offers/v1.0/aws/AmazonEC2/current/%s/index.json"
)

type response struct {
Products map[string]product `json:"products"`
}

type product struct {
Attributes productAttributes `json:"attributes"`
}

type productAttributes struct {
InstanceType string `json:"instanceType"`
VCPU string `json:"vcpu"`
Memory string `json:"memory"`
GPU string `json:"gpu"`
}

// GenerateEC2InstanceTypes returns a map of ec2 resources
func GenerateEC2InstanceTypes(region string) (map[string]*InstanceType, error) {
instanceTypes := make(map[string]*InstanceType)

resolver := endpoints.DefaultResolver()
partitions := resolver.(endpoints.EnumPartitions).Partitions()

for _, p := range partitions {
for _, r := range p.Regions() {
if region != "" && region != r.ID() {
continue
}

url := fmt.Sprintf(ec2PricingServiceUrlTemplate, r.ID())
klog.V(1).Infof("fetching %s\n", url)
res, err := http.Get(url)
if err != nil {
klog.Warningf("Error fetching %s skipping...\n", url)
continue
}

defer res.Body.Close()

body, err := ioutil.ReadAll(res.Body)
if err != nil {
klog.Warningf("Error parsing %s skipping...\n", url)
continue
}

var unmarshalled = response{}
err = json.Unmarshal(body, &unmarshalled)
if err != nil {
klog.Warningf("Error unmarshalling %s, skip...\n", url)
continue
}

for _, product := range unmarshalled.Products {
attr := product.Attributes
if attr.InstanceType != "" {
instanceTypes[attr.InstanceType] = &InstanceType{
InstanceType: attr.InstanceType,
}
if attr.Memory != "" && attr.Memory != "NA" {
instanceTypes[attr.InstanceType].MemoryMb = parseMemory(attr.Memory)
}
if attr.VCPU != "" {
instanceTypes[attr.InstanceType].VCPU = parseCPU(attr.VCPU)
}
if attr.GPU != "" {
instanceTypes[attr.InstanceType].GPU = parseCPU(attr.GPU)
}
}
}
}
}

if len(instanceTypes) == 0 {
return nil, errors.New("unable to load EC2 Instance Type list")
}

return instanceTypes, nil
}

// GetStaticEC2InstanceTypes return pregenerated ec2 instance type list
func GetStaticEC2InstanceTypes() map[string]*InstanceType {
return InstanceTypes
}

func parseMemory(memory string) int64 {
reg, err := regexp.Compile("[^0-9\\.]+")
if err != nil {
klog.Fatal(err)
}

parsed := strings.TrimSpace(reg.ReplaceAllString(memory, ""))
mem, err := strconv.ParseFloat(parsed, 64)
if err != nil {
klog.Fatal(err)
}

return int64(mem * float64(1024))
}

func parseCPU(cpu string) int64 {
i, err := strconv.ParseInt(cpu, 10, 64)
if err != nil {
klog.Fatal(err)
}
return i
}

// GetCurrentAwsRegion return region of current cluster without building awsManager
func GetCurrentAwsRegion() (string, error) {
region, present := os.LookupEnv("AWS_REGION")

if !present {
klog.V(1).Infof("fetching %s\n", ec2MetaDataServiceUrl)
res, err := http.Get(ec2MetaDataServiceUrl)
if err != nil {
return "", fmt.Errorf("Error fetching %s", ec2MetaDataServiceUrl)
}

defer res.Body.Close()

body, err := ioutil.ReadAll(res.Body)
if err != nil {
return "", fmt.Errorf("Error parsing %s", ec2MetaDataServiceUrl)
}

var unmarshalled = map[string]string{}
err = json.Unmarshal(body, &unmarshalled)
if err != nil {
klog.Warningf("Error unmarshalling %s, skip...\n", ec2MetaDataServiceUrl)
}

region = unmarshalled["region"]
}

return region, nil
}
105 changes: 105 additions & 0 deletions cluster-autoscaler/cloudprovider/aws/aws_util_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
/*
Copyright 2019 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package aws

import (
"github.com/stretchr/testify/assert"
"net/http"
"net/http/httptest"
"os"
"strconv"
"testing"
)

func TestGetStaticEC2InstanceTypes(t *testing.T) {
result := GetStaticEC2InstanceTypes()
assert.True(t, len(result) != 0)
}

func TestParseMemory(t *testing.T) {
expectedResultInMiB := int64(3.75 * 1024)
tests := []struct {
input string
expect int64
}{
{
input: "3.75 GiB",
expect: expectedResultInMiB,
},
{
input: "3.75 Gib",
expect: expectedResultInMiB,
},
{
input: "3.75GiB",
expect: expectedResultInMiB,
},
{
input: "3.75",
expect: expectedResultInMiB,
},
}

for _, test := range tests {
got := parseMemory(test.input)
assert.Equal(t, test.expect, got)
}
}

func TestParseCPU(t *testing.T) {
tests := []struct {
input string
expect int64
}{
{
input: strconv.FormatInt(8, 10),
expect: int64(8),
},
}

for _, test := range tests {
got := parseCPU(test.input)
assert.Equal(t, test.expect, got)
}
}

func TestGetCurrentAwsRegion(t *testing.T) {
region := "us-west-2"
os.Unsetenv("AWS_REGION")

server := httptest.NewServer(http.HandlerFunc(func(rw http.ResponseWriter, req *http.Request) {
rw.Write([]byte("{\"region\" : \"" + region + "\"}"))
}))
// Close the server when test finishes
defer server.Close()

ec2MetaDataServiceUrl = server.URL
result, err := GetCurrentAwsRegion()

assert.Nil(t, err)
assert.NotNil(t, result)
assert.Equal(t, region, result)
}

func TestGetCurrentAwsRegionWithRegionEnv(t *testing.T) {
region := "us-west-2"
os.Setenv("AWS_REGION", region)

result, err := GetCurrentAwsRegion()
assert.Nil(t, err)
assert.Equal(t, region, result)
}
Loading

0 comments on commit f52678e

Please sign in to comment.