From 955eaef32239722fbf22a500c8b2a339737acfb5 Mon Sep 17 00:00:00 2001 From: Austin Siu Date: Wed, 10 Nov 2021 01:14:32 -0600 Subject: [PATCH] Use DescribeInstanceTypes API to get EC2 instance type details --- .../cloudprovider/aws/README.md | 3 +- .../cloudprovider/aws/auto_scaling_groups.go | 2 +- .../cloudprovider/aws/aws_cloud_provider.go | 2 +- .../cloudprovider/aws/aws_util.go | 195 +++++------------ .../cloudprovider/aws/aws_util_test.go | 203 ++++-------------- 5 files changed, 94 insertions(+), 311 deletions(-) diff --git a/cluster-autoscaler/cloudprovider/aws/README.md b/cluster-autoscaler/cloudprovider/aws/README.md index cbcb4439fb91..1aca53cff96f 100644 --- a/cluster-autoscaler/cloudprovider/aws/README.md +++ b/cluster-autoscaler/cloudprovider/aws/README.md @@ -32,7 +32,8 @@ The following policy provides the minimum privileges necessary for Cluster Autos "autoscaling:DescribeAutoScalingInstances", "autoscaling:DescribeLaunchConfigurations", "autoscaling:SetDesiredCapacity", - "autoscaling:TerminateInstanceInAutoScalingGroup" + "autoscaling:TerminateInstanceInAutoScalingGroup", + "ec2:DescribeInstanceTypes" ], "Resource": ["*"] } diff --git a/cluster-autoscaler/cloudprovider/aws/auto_scaling_groups.go b/cluster-autoscaler/cloudprovider/aws/auto_scaling_groups.go index e7075e50d7c9..74db9f8e9743 100644 --- a/cluster-autoscaler/cloudprovider/aws/auto_scaling_groups.go +++ b/cluster-autoscaler/cloudprovider/aws/auto_scaling_groups.go @@ -99,7 +99,7 @@ var getInstanceTypeForAsg = func(m *asgCache, group *asg) (string, error) { return result[group.AwsRef.Name], nil } - return "", fmt.Errorf("Could not find instance type for %s", group.AwsRef.Name) + return "", fmt.Errorf("could not find instance type for %s", group.AwsRef.Name) } // Fetch explicitly configured ASGs. These ASGs should never be unregistered diff --git a/cluster-autoscaler/cloudprovider/aws/aws_cloud_provider.go b/cluster-autoscaler/cloudprovider/aws/aws_cloud_provider.go index 3f2ff7b429bf..3ed8ea68e865 100644 --- a/cluster-autoscaler/cloudprovider/aws/aws_cloud_provider.go +++ b/cluster-autoscaler/cloudprovider/aws/aws_cloud_provider.go @@ -285,7 +285,7 @@ func (ng *AwsNodeGroup) DeleteNodes(nodes []*apiv1.Node) error { if err != nil { return err } - if belongs != true { + if !belongs { return fmt.Errorf("%s belongs to a different asg than %s", node.Name, ng.Id()) } awsref, err := AwsRefFromProviderId(node.Spec.ProviderID) diff --git a/cluster-autoscaler/cloudprovider/aws/aws_util.go b/cluster-autoscaler/cloudprovider/aws/aws_util.go index 47b7cdc45c52..7a076ef0b168 100644 --- a/cluster-autoscaler/cloudprovider/aws/aws_util.go +++ b/cluster-autoscaler/cloudprovider/aws/aws_util.go @@ -17,104 +17,40 @@ limitations under the License. package aws import ( - "encoding/json" "errors" "fmt" - "io" - "net/http" "os" - "regexp" - "strconv" - "strings" "github.com/aws/aws-sdk-go/aws" "github.com/aws/aws-sdk-go/aws/ec2metadata" - "github.com/aws/aws-sdk-go/aws/endpoints" "github.com/aws/aws-sdk-go/aws/session" - - klog "k8s.io/klog/v2" + "github.com/aws/aws-sdk-go/service/ec2" ) var ( ec2MetaDataServiceUrl = "http://169.254.169.254" - ec2PricingServiceUrlTemplate = "https://pricing.us-east-1.amazonaws.com/offers/v1.0/aws/AmazonEC2/current/%s/index.json" - ec2PricingServiceUrlTemplateCN = "https://pricing.cn-north-1.amazonaws.com.cn/offers/v1.0/cn/AmazonEC2/current/%s/index.json" - ec2Arm64Processors = []string{"AWS Graviton Processor", "AWS Graviton2 Processor"} ) -type response struct { - Products map[string]product `json:"products"` -} - -type product struct { - Attributes productAttributes `json:"attributes"` -} - -type productAttributes struct { - InstanceType string `json:"instanceType"` - VCPU string `json:"vcpu"` - Memory string `json:"memory"` - GPU string `json:"gpu"` - Architecture string `json:"physicalProcessor"` -} - // GenerateEC2InstanceTypes returns a map of ec2 resources func GenerateEC2InstanceTypes(region string) (map[string]*InstanceType, error) { - var pricingUrlTemplate string - if strings.HasPrefix(region, "cn-") { - pricingUrlTemplate = ec2PricingServiceUrlTemplateCN - } else { - pricingUrlTemplate = ec2PricingServiceUrlTemplate + sess, err := session.NewSession(&aws.Config{ + Region: aws.String(region)}, + ) + if err != nil { + return nil, err } + ec2Client := ec2.New(sess) + input := ec2.DescribeInstanceTypesInput{} instanceTypes := make(map[string]*InstanceType) - resolver := endpoints.DefaultResolver() - partitions := resolver.(endpoints.EnumPartitions).Partitions() - - for _, p := range partitions { - for _, r := range p.Regions() { - if region != "" && region != r.ID() { - continue - } - - url := fmt.Sprintf(pricingUrlTemplate, r.ID()) - klog.V(1).Infof("fetching %s\n", url) - res, err := http.Get(url) - if err != nil { - klog.Warningf("Error fetching %s skipping...\n%s\n", url, err) - continue - } - - defer res.Body.Close() - - unmarshalled, err := unmarshalProductsResponse(res.Body) - if err != nil { - klog.Warningf("Error parsing %s skipping...\n%s\n", url, err) - continue - } - - for _, product := range unmarshalled.Products { - attr := product.Attributes - if attr.InstanceType != "" { - instanceTypes[attr.InstanceType] = &InstanceType{ - InstanceType: attr.InstanceType, - } - if attr.Memory != "" && attr.Memory != "NA" { - instanceTypes[attr.InstanceType].MemoryMb = parseMemory(attr.Memory) - } - if attr.VCPU != "" { - instanceTypes[attr.InstanceType].VCPU = parseCPU(attr.VCPU) - } - if attr.GPU != "" { - instanceTypes[attr.InstanceType].GPU = parseCPU(attr.GPU) - } - if attr.Architecture != "" { - instanceTypes[attr.InstanceType].Architecture = parseArchitecture(attr.Architecture) - } - } - } + if err = ec2Client.DescribeInstanceTypesPages(&input, func(page *ec2.DescribeInstanceTypesOutput, isLastPage bool) bool { + for _, rawInstanceType := range page.InstanceTypes { + instanceTypes[*rawInstanceType.InstanceType] = transformInstanceType(rawInstanceType) } + return !isLastPage + }); err != nil { + return nil, err } if len(instanceTypes) == 0 { @@ -129,88 +65,53 @@ func GetStaticEC2InstanceTypes() (map[string]*InstanceType, string) { return InstanceTypes, StaticListLastUpdateTime } -func unmarshalProductsResponse(r io.Reader) (*response, error) { - dec := json.NewDecoder(r) - t, err := dec.Token() - if err != nil { - return nil, err +func transformInstanceType(rawInstanceType *ec2.InstanceTypeInfo) *InstanceType { + instanceType := &InstanceType{ + InstanceType: *rawInstanceType.InstanceType, } - if delim, ok := t.(json.Delim); !ok || delim.String() != "{" { - return nil, errors.New("Invalid products json") + if rawInstanceType.MemoryInfo != nil && rawInstanceType.MemoryInfo.SizeInMiB != nil { + instanceType.MemoryMb = *rawInstanceType.MemoryInfo.SizeInMiB } - - unmarshalled := response{map[string]product{}} - - for dec.More() { - t, err = dec.Token() - if err != nil { - return nil, err - } - - if t == "products" { - tt, err := dec.Token() - if err != nil { - return nil, err - } - if delim, ok := tt.(json.Delim); !ok || delim.String() != "{" { - return nil, errors.New("Invalid products json") - } - for dec.More() { - productCode, err := dec.Token() - if err != nil { - return nil, err - } - - prod := product{} - if err = dec.Decode(&prod); err != nil { - return nil, err - } - unmarshalled.Products[productCode.(string)] = prod - } - } + if rawInstanceType.VCpuInfo != nil && rawInstanceType.VCpuInfo.DefaultVCpus != nil { + instanceType.VCPU = *rawInstanceType.VCpuInfo.DefaultVCpus } - - t, err = dec.Token() - if err != nil { - return nil, err + if rawInstanceType.GpuInfo != nil && len(rawInstanceType.GpuInfo.Gpus) > 0 { + instanceType.GPU = getGpuCount(rawInstanceType.GpuInfo) } - if delim, ok := t.(json.Delim); !ok || delim.String() != "}" { - return nil, errors.New("Invalid products json") + if rawInstanceType.ProcessorInfo != nil && len(rawInstanceType.ProcessorInfo.SupportedArchitectures) > 0 { + instanceType.Architecture = interpretEc2SupportedArchitecure(*rawInstanceType.ProcessorInfo.SupportedArchitectures[0]) } - - return &unmarshalled, nil + return instanceType } -func parseMemory(memory string) int64 { - reg, err := regexp.Compile("[^0-9\\.]+") - if err != nil { - klog.Fatal(err) - } - - parsed := strings.TrimSpace(reg.ReplaceAllString(memory, "")) - mem, err := strconv.ParseFloat(parsed, 64) - if err != nil { - klog.Fatal(err) - } - - return int64(mem * float64(1024)) +// GetStaticEC2InstanceTypes return pregenerated ec2 instance type list +func GetStaticEC2InstanceTypes() (map[string]*InstanceType, string) { + return InstanceTypes, staticListLastUpdateTime } -func parseCPU(cpu string) int64 { - i, err := strconv.ParseInt(cpu, 10, 64) - if err != nil { - klog.Fatal(err) +func getGpuCount(gpuInfo *ec2.GpuInfo) int64 { + var gpuCountSum int64 + for _, gpu := range gpuInfo.Gpus { + if gpu.Count != nil { + gpuCountSum += *gpu.Count + } } - return i + return gpuCountSum } -func parseArchitecture(archName string) string { - for _, processor := range ec2Arm64Processors { - if archName == processor { - return "arm64" - } +func interpretEc2SupportedArchitecure(archName string) string { + switch archName { + case "arm64": + return "arm64" + case "i386": + return "amd64" + case "x86_64": + return "amd64" + case "x86_64_mac": + return "amd64" + default: + return "amd64" } - return "amd64" } // GetCurrentAwsRegion return region of current cluster without building awsManager diff --git a/cluster-autoscaler/cloudprovider/aws/aws_util_test.go b/cluster-autoscaler/cloudprovider/aws/aws_util_test.go index 243f96043a14..09462770609d 100644 --- a/cluster-autoscaler/cloudprovider/aws/aws_util_test.go +++ b/cluster-autoscaler/cloudprovider/aws/aws_util_test.go @@ -20,10 +20,10 @@ import ( "net/http" "net/http/httptest" "os" - "strconv" - "strings" "testing" + "github.com/aws/aws-sdk-go/aws" + "github.com/aws/aws-sdk-go/service/ec2" "github.com/stretchr/testify/assert" ) @@ -32,65 +32,49 @@ func TestGetStaticEC2InstanceTypes(t *testing.T) { assert.True(t, len(result) != 0) } -func TestParseMemory(t *testing.T) { - expectedResultInMiB := int64(3.75 * 1024) - tests := []struct { - input string - expect int64 - }{ - { - input: "3.75 GiB", - expect: expectedResultInMiB, +func TestInstanceTypeTransform(t *testing.T) { + rawInstanceType := ec2.InstanceTypeInfo{ + InstanceType: aws.String("c4.xlarge"), + ProcessorInfo: &ec2.ProcessorInfo{ + SupportedArchitectures: []*string{aws.String("x86_64")}, }, - { - input: "3.75 Gib", - expect: expectedResultInMiB, + VCpuInfo: &ec2.VCpuInfo{ + DefaultVCpus: aws.Int64(4), }, - { - input: "3.75GiB", - expect: expectedResultInMiB, - }, - { - input: "3.75", - expect: expectedResultInMiB, + MemoryInfo: &ec2.MemoryInfo{ + SizeInMiB: aws.Int64(7680), }, } - for _, test := range tests { - got := parseMemory(test.input) - assert.Equal(t, test.expect, got) - } -} - -func TestParseCPU(t *testing.T) { - tests := []struct { - input string - expect int64 - }{ - { - input: strconv.FormatInt(8, 10), - expect: int64(8), - }, - } + instanceType := transformInstanceType(&rawInstanceType) - for _, test := range tests { - got := parseCPU(test.input) - assert.Equal(t, test.expect, got) - } + assert.Equal(t, "c4.xlarge", instanceType.InstanceType) + assert.Equal(t, int64(4), instanceType.VCPU) + assert.Equal(t, int64(7680), instanceType.MemoryMb) + assert.Equal(t, int64(0), instanceType.GPU) + assert.Equal(t, "amd64", instanceType.Architecture) } -func TestParseArchitecture(t *testing.T) { +func TestInterpretEc2SupportedArchitecure(t *testing.T) { tests := []struct { input string expect string }{ { - input: "Intel Xeon Platinum 8259 (Cascade Lake)", + input: "arm64", + expect: "arm64", + }, + { + input: "i386", expect: "amd64", }, { - input: "AWS Graviton2 Processor", - expect: "arm64", + input: "x86_64", + expect: "amd64", + }, + { + input: "x86_64_mac", + expect: "amd64", }, { input: "anything default", @@ -99,11 +83,23 @@ func TestParseArchitecture(t *testing.T) { } for _, test := range tests { - got := parseArchitecture(test.input) + got := interpretEc2SupportedArchitecure(test.input) assert.Equal(t, test.expect, got) } } +func TestGetGpuCount(t *testing.T) { + gpuDeviceInfos := []*ec2.GpuDeviceInfo{ + {Count: aws.Int64(8)}, + {Count: aws.Int64(4)}, + {Count: aws.Int64(0)}, + } + + gpuInfo := ec2.GpuInfo{Gpus: gpuDeviceInfos} + + assert.Equal(t, int64(12), getGpuCount(&gpuInfo)) +} + func TestGetCurrentAwsRegion(t *testing.T) { region := "us-west-2" if oldRegion, found := os.LookupEnv("AWS_REGION"); found { @@ -138,118 +134,3 @@ func TestGetCurrentAwsRegionWithRegionEnv(t *testing.T) { assert.Nil(t, err) assert.Equal(t, region, result) } - -func TestUnmarshalProductsResponse(t *testing.T) { - body := ` -{ - "products": { - "VVD8BG8WWFD3DAZN" : { - "sku" : "VVD8BG8WWFD3DAZN", - "productFamily" : "Compute Instance", - "attributes" : { - "servicecode" : "AmazonEC2", - "location" : "US East (N. Virginia)", - "locationType" : "AWS Region", - "instanceType" : "r5b.4xlarge", - "currentGeneration" : "Yes", - "instanceFamily" : "Memory optimized", - "vcpu" : "16", - "physicalProcessor" : "Intel Xeon Platinum 8259 (Cascade Lake)", - "clockSpeed" : "3.1 GHz", - "memory" : "128 GiB", - "storage" : "EBS only", - "networkPerformance" : "Up to 10 Gigabit", - "processorArchitecture" : "64-bit", - "tenancy" : "Shared", - "operatingSystem" : "Linux", - "licenseModel" : "No License required", - "usagetype" : "UnusedBox:r5b.4xlarge", - "operation" : "RunInstances:0004", - "availabilityzone" : "NA", - "capacitystatus" : "UnusedCapacityReservation", - "classicnetworkingsupport" : "false", - "dedicatedEbsThroughput" : "10 Gbps", - "ecu" : "NA", - "enhancedNetworkingSupported" : "Yes", - "instancesku" : "G4NFAXD9TGJM3RY8", - "intelAvxAvailable" : "Yes", - "intelAvx2Available" : "No", - "intelTurboAvailable" : "No", - "marketoption" : "OnDemand", - "normalizationSizeFactor" : "32", - "preInstalledSw" : "SQL Std", - "servicename" : "Amazon Elastic Compute Cloud", - "vpcnetworkingsupport" : "true" - } - }, - "C36QEQQQJ8ZR7N32" : { - "sku" : "C36QEQQQJ8ZR7N32", - "productFamily" : "Compute Instance", - "attributes" : { - "servicecode" : "AmazonEC2", - "location" : "US East (N. Virginia)", - "locationType" : "AWS Region", - "instanceType" : "d3en.8xlarge", - "currentGeneration" : "Yes", - "instanceFamily" : "Storage optimized", - "vcpu" : "32", - "physicalProcessor" : "Intel Xeon Platinum 8259 (Cascade Lake)", - "clockSpeed" : "3.1 GHz", - "memory" : "128 GiB", - "storage" : "16 x 14000 HDD", - "networkPerformance" : "50 Gigabit", - "processorArchitecture" : "64-bit", - "tenancy" : "Dedicated", - "operatingSystem" : "SUSE", - "licenseModel" : "No License required", - "usagetype" : "DedicatedRes:d3en.8xlarge", - "operation" : "RunInstances:000g", - "availabilityzone" : "NA", - "capacitystatus" : "AllocatedCapacityReservation", - "classicnetworkingsupport" : "false", - "dedicatedEbsThroughput" : "5000 Mbps", - "ecu" : "NA", - "enhancedNetworkingSupported" : "Yes", - "instancesku" : "2XW3BCEZ83WMGFJY", - "intelAvxAvailable" : "Yes", - "intelAvx2Available" : "Yes", - "intelTurboAvailable" : "Yes", - "marketoption" : "OnDemand", - "normalizationSizeFactor" : "64", - "preInstalledSw" : "NA", - "processorFeatures" : "AVX; AVX2; Intel AVX; Intel AVX2; Intel AVX512; Intel Turbo", - "servicename" : "Amazon Elastic Compute Cloud", - "vpcnetworkingsupport" : "true" - } - } - } -} -` - r := strings.NewReader(body) - resp, err := unmarshalProductsResponse(r) - assert.Nil(t, err) - assert.Len(t, resp.Products, 2) - assert.NotNil(t, resp.Products["VVD8BG8WWFD3DAZN"]) - assert.NotNil(t, resp.Products["C36QEQQQJ8ZR7N32"]) - assert.Equal(t, resp.Products["VVD8BG8WWFD3DAZN"].Attributes.InstanceType, "r5b.4xlarge") - assert.Equal(t, resp.Products["C36QEQQQJ8ZR7N32"].Attributes.InstanceType, "d3en.8xlarge") - - invalidJsonTests := map[string]string{ - "[": "[", - "]": "]", - "}": "}", - "{": "{", - "Plain text": "invalid", - "List": "[]", - "Invalid products ([])": `{"products":[]}`, - "Invalid product ([])": `{"products":{"zz":[]}}`, - } - for name, body := range invalidJsonTests { - t.Run(name, func(t *testing.T) { - r := strings.NewReader(body) - resp, err := unmarshalProductsResponse(r) - assert.NotNil(t, err) - assert.Nil(t, resp) - }) - } -}