From 955eaef32239722fbf22a500c8b2a339737acfb5 Mon Sep 17 00:00:00 2001
From: Austin Siu <austinsi@amazon.com>
Date: Wed, 10 Nov 2021 01:14:32 -0600
Subject: [PATCH] Use DescribeInstanceTypes API to get EC2 instance type
 details

---
 .../cloudprovider/aws/README.md               |   3 +-
 .../cloudprovider/aws/auto_scaling_groups.go  |   2 +-
 .../cloudprovider/aws/aws_cloud_provider.go   |   2 +-
 .../cloudprovider/aws/aws_util.go             | 195 +++++------------
 .../cloudprovider/aws/aws_util_test.go        | 203 ++++--------------
 5 files changed, 94 insertions(+), 311 deletions(-)

diff --git a/cluster-autoscaler/cloudprovider/aws/README.md b/cluster-autoscaler/cloudprovider/aws/README.md
index cbcb4439fb91..1aca53cff96f 100644
--- a/cluster-autoscaler/cloudprovider/aws/README.md
+++ b/cluster-autoscaler/cloudprovider/aws/README.md
@@ -32,7 +32,8 @@ The following policy provides the minimum privileges necessary for Cluster Autos
         "autoscaling:DescribeAutoScalingInstances",
         "autoscaling:DescribeLaunchConfigurations",
         "autoscaling:SetDesiredCapacity",
-        "autoscaling:TerminateInstanceInAutoScalingGroup"
+        "autoscaling:TerminateInstanceInAutoScalingGroup",
+        "ec2:DescribeInstanceTypes"
       ],
       "Resource": ["*"]
     }
diff --git a/cluster-autoscaler/cloudprovider/aws/auto_scaling_groups.go b/cluster-autoscaler/cloudprovider/aws/auto_scaling_groups.go
index e7075e50d7c9..74db9f8e9743 100644
--- a/cluster-autoscaler/cloudprovider/aws/auto_scaling_groups.go
+++ b/cluster-autoscaler/cloudprovider/aws/auto_scaling_groups.go
@@ -99,7 +99,7 @@ var getInstanceTypeForAsg = func(m *asgCache, group *asg) (string, error) {
 		return result[group.AwsRef.Name], nil
 	}
 
-	return "", fmt.Errorf("Could not find instance type for %s", group.AwsRef.Name)
+	return "", fmt.Errorf("could not find instance type for %s", group.AwsRef.Name)
 }
 
 // Fetch explicitly configured ASGs. These ASGs should never be unregistered
diff --git a/cluster-autoscaler/cloudprovider/aws/aws_cloud_provider.go b/cluster-autoscaler/cloudprovider/aws/aws_cloud_provider.go
index 3f2ff7b429bf..3ed8ea68e865 100644
--- a/cluster-autoscaler/cloudprovider/aws/aws_cloud_provider.go
+++ b/cluster-autoscaler/cloudprovider/aws/aws_cloud_provider.go
@@ -285,7 +285,7 @@ func (ng *AwsNodeGroup) DeleteNodes(nodes []*apiv1.Node) error {
 		if err != nil {
 			return err
 		}
-		if belongs != true {
+		if !belongs {
 			return fmt.Errorf("%s belongs to a different asg than %s", node.Name, ng.Id())
 		}
 		awsref, err := AwsRefFromProviderId(node.Spec.ProviderID)
diff --git a/cluster-autoscaler/cloudprovider/aws/aws_util.go b/cluster-autoscaler/cloudprovider/aws/aws_util.go
index 47b7cdc45c52..7a076ef0b168 100644
--- a/cluster-autoscaler/cloudprovider/aws/aws_util.go
+++ b/cluster-autoscaler/cloudprovider/aws/aws_util.go
@@ -17,104 +17,40 @@ limitations under the License.
 package aws
 
 import (
-	"encoding/json"
 	"errors"
 	"fmt"
-	"io"
-	"net/http"
 	"os"
-	"regexp"
-	"strconv"
-	"strings"
 
 	"github.com/aws/aws-sdk-go/aws"
 	"github.com/aws/aws-sdk-go/aws/ec2metadata"
-	"github.com/aws/aws-sdk-go/aws/endpoints"
 	"github.com/aws/aws-sdk-go/aws/session"
-
-	klog "k8s.io/klog/v2"
+	"github.com/aws/aws-sdk-go/service/ec2"
 )
 
 var (
 	ec2MetaDataServiceUrl          = "http://169.254.169.254"
-	ec2PricingServiceUrlTemplate   = "https://pricing.us-east-1.amazonaws.com/offers/v1.0/aws/AmazonEC2/current/%s/index.json"
-	ec2PricingServiceUrlTemplateCN = "https://pricing.cn-north-1.amazonaws.com.cn/offers/v1.0/cn/AmazonEC2/current/%s/index.json"
-	ec2Arm64Processors             = []string{"AWS Graviton Processor", "AWS Graviton2 Processor"}
 )
 
-type response struct {
-	Products map[string]product `json:"products"`
-}
-
-type product struct {
-	Attributes productAttributes `json:"attributes"`
-}
-
-type productAttributes struct {
-	InstanceType string `json:"instanceType"`
-	VCPU         string `json:"vcpu"`
-	Memory       string `json:"memory"`
-	GPU          string `json:"gpu"`
-	Architecture string `json:"physicalProcessor"`
-}
-
 // GenerateEC2InstanceTypes returns a map of ec2 resources
 func GenerateEC2InstanceTypes(region string) (map[string]*InstanceType, error) {
-	var pricingUrlTemplate string
-	if strings.HasPrefix(region, "cn-") {
-		pricingUrlTemplate = ec2PricingServiceUrlTemplateCN
-	} else {
-		pricingUrlTemplate = ec2PricingServiceUrlTemplate
+	sess, err := session.NewSession(&aws.Config{
+		Region: aws.String(region)},
+	)
+	if err != nil {
+		return nil, err
 	}
 
+	ec2Client := ec2.New(sess)
+	input := ec2.DescribeInstanceTypesInput{}
 	instanceTypes := make(map[string]*InstanceType)
 
-	resolver := endpoints.DefaultResolver()
-	partitions := resolver.(endpoints.EnumPartitions).Partitions()
-
-	for _, p := range partitions {
-		for _, r := range p.Regions() {
-			if region != "" && region != r.ID() {
-				continue
-			}
-
-			url := fmt.Sprintf(pricingUrlTemplate, r.ID())
-			klog.V(1).Infof("fetching %s\n", url)
-			res, err := http.Get(url)
-			if err != nil {
-				klog.Warningf("Error fetching %s skipping...\n%s\n", url, err)
-				continue
-			}
-
-			defer res.Body.Close()
-
-			unmarshalled, err := unmarshalProductsResponse(res.Body)
-			if err != nil {
-				klog.Warningf("Error parsing %s skipping...\n%s\n", url, err)
-				continue
-			}
-
-			for _, product := range unmarshalled.Products {
-				attr := product.Attributes
-				if attr.InstanceType != "" {
-					instanceTypes[attr.InstanceType] = &InstanceType{
-						InstanceType: attr.InstanceType,
-					}
-					if attr.Memory != "" && attr.Memory != "NA" {
-						instanceTypes[attr.InstanceType].MemoryMb = parseMemory(attr.Memory)
-					}
-					if attr.VCPU != "" {
-						instanceTypes[attr.InstanceType].VCPU = parseCPU(attr.VCPU)
-					}
-					if attr.GPU != "" {
-						instanceTypes[attr.InstanceType].GPU = parseCPU(attr.GPU)
-					}
-					if attr.Architecture != "" {
-						instanceTypes[attr.InstanceType].Architecture = parseArchitecture(attr.Architecture)
-					}
-				}
-			}
+	if err = ec2Client.DescribeInstanceTypesPages(&input, func(page *ec2.DescribeInstanceTypesOutput, isLastPage bool) bool {
+		for _, rawInstanceType := range page.InstanceTypes {
+			instanceTypes[*rawInstanceType.InstanceType] = transformInstanceType(rawInstanceType)
 		}
+		return !isLastPage
+	}); err != nil {
+		return nil, err
 	}
 
 	if len(instanceTypes) == 0 {
@@ -129,88 +65,53 @@ func GetStaticEC2InstanceTypes() (map[string]*InstanceType, string) {
 	return InstanceTypes, StaticListLastUpdateTime
 }
 
-func unmarshalProductsResponse(r io.Reader) (*response, error) {
-	dec := json.NewDecoder(r)
-	t, err := dec.Token()
-	if err != nil {
-		return nil, err
+func transformInstanceType(rawInstanceType *ec2.InstanceTypeInfo) *InstanceType {
+	instanceType := &InstanceType{
+		InstanceType: *rawInstanceType.InstanceType,
 	}
-	if delim, ok := t.(json.Delim); !ok || delim.String() != "{" {
-		return nil, errors.New("Invalid products json")
+	if rawInstanceType.MemoryInfo != nil && rawInstanceType.MemoryInfo.SizeInMiB != nil {
+		instanceType.MemoryMb = *rawInstanceType.MemoryInfo.SizeInMiB
 	}
-
-	unmarshalled := response{map[string]product{}}
-
-	for dec.More() {
-		t, err = dec.Token()
-		if err != nil {
-			return nil, err
-		}
-
-		if t == "products" {
-			tt, err := dec.Token()
-			if err != nil {
-				return nil, err
-			}
-			if delim, ok := tt.(json.Delim); !ok || delim.String() != "{" {
-				return nil, errors.New("Invalid products json")
-			}
-			for dec.More() {
-				productCode, err := dec.Token()
-				if err != nil {
-					return nil, err
-				}
-
-				prod := product{}
-				if err = dec.Decode(&prod); err != nil {
-					return nil, err
-				}
-				unmarshalled.Products[productCode.(string)] = prod
-			}
-		}
+	if rawInstanceType.VCpuInfo != nil && rawInstanceType.VCpuInfo.DefaultVCpus != nil {
+		instanceType.VCPU = *rawInstanceType.VCpuInfo.DefaultVCpus
 	}
-
-	t, err = dec.Token()
-	if err != nil {
-		return nil, err
+	if rawInstanceType.GpuInfo != nil && len(rawInstanceType.GpuInfo.Gpus) > 0 {
+		instanceType.GPU = getGpuCount(rawInstanceType.GpuInfo)
 	}
-	if delim, ok := t.(json.Delim); !ok || delim.String() != "}" {
-		return nil, errors.New("Invalid products json")
+	if rawInstanceType.ProcessorInfo != nil && len(rawInstanceType.ProcessorInfo.SupportedArchitectures) > 0 {
+		instanceType.Architecture = interpretEc2SupportedArchitecure(*rawInstanceType.ProcessorInfo.SupportedArchitectures[0])
 	}
-
-	return &unmarshalled, nil
+	return instanceType
 }
 
-func parseMemory(memory string) int64 {
-	reg, err := regexp.Compile("[^0-9\\.]+")
-	if err != nil {
-		klog.Fatal(err)
-	}
-
-	parsed := strings.TrimSpace(reg.ReplaceAllString(memory, ""))
-	mem, err := strconv.ParseFloat(parsed, 64)
-	if err != nil {
-		klog.Fatal(err)
-	}
-
-	return int64(mem * float64(1024))
+// GetStaticEC2InstanceTypes return pregenerated ec2 instance type list
+func GetStaticEC2InstanceTypes() (map[string]*InstanceType, string) {
+	return InstanceTypes, staticListLastUpdateTime
 }
 
-func parseCPU(cpu string) int64 {
-	i, err := strconv.ParseInt(cpu, 10, 64)
-	if err != nil {
-		klog.Fatal(err)
+func getGpuCount(gpuInfo *ec2.GpuInfo) int64 {
+	var gpuCountSum int64
+	for _, gpu := range gpuInfo.Gpus {
+		if gpu.Count != nil {
+			gpuCountSum += *gpu.Count
+		}
 	}
-	return i
+	return gpuCountSum
 }
 
-func parseArchitecture(archName string) string {
-	for _, processor := range ec2Arm64Processors {
-		if archName == processor {
-			return "arm64"
-		}
+func interpretEc2SupportedArchitecure(archName string) string {
+	switch archName {
+	case "arm64":
+		return "arm64"
+	case "i386":
+		return "amd64"
+	case "x86_64":
+		return "amd64"
+	case "x86_64_mac":
+		return "amd64"
+	default:
+		return "amd64"
 	}
-	return "amd64"
 }
 
 // GetCurrentAwsRegion return region of current cluster without building awsManager
diff --git a/cluster-autoscaler/cloudprovider/aws/aws_util_test.go b/cluster-autoscaler/cloudprovider/aws/aws_util_test.go
index 243f96043a14..09462770609d 100644
--- a/cluster-autoscaler/cloudprovider/aws/aws_util_test.go
+++ b/cluster-autoscaler/cloudprovider/aws/aws_util_test.go
@@ -20,10 +20,10 @@ import (
 	"net/http"
 	"net/http/httptest"
 	"os"
-	"strconv"
-	"strings"
 	"testing"
 
+	"github.com/aws/aws-sdk-go/aws"
+	"github.com/aws/aws-sdk-go/service/ec2"
 	"github.com/stretchr/testify/assert"
 )
 
@@ -32,65 +32,49 @@ func TestGetStaticEC2InstanceTypes(t *testing.T) {
 	assert.True(t, len(result) != 0)
 }
 
-func TestParseMemory(t *testing.T) {
-	expectedResultInMiB := int64(3.75 * 1024)
-	tests := []struct {
-		input  string
-		expect int64
-	}{
-		{
-			input:  "3.75 GiB",
-			expect: expectedResultInMiB,
+func TestInstanceTypeTransform(t *testing.T) {
+	rawInstanceType := ec2.InstanceTypeInfo{
+		InstanceType: aws.String("c4.xlarge"),
+		ProcessorInfo: &ec2.ProcessorInfo{
+			SupportedArchitectures: []*string{aws.String("x86_64")},
 		},
-		{
-			input:  "3.75 Gib",
-			expect: expectedResultInMiB,
+		VCpuInfo: &ec2.VCpuInfo{
+			DefaultVCpus: aws.Int64(4),
 		},
-		{
-			input:  "3.75GiB",
-			expect: expectedResultInMiB,
-		},
-		{
-			input:  "3.75",
-			expect: expectedResultInMiB,
+		MemoryInfo: &ec2.MemoryInfo{
+			SizeInMiB: aws.Int64(7680),
 		},
 	}
 
-	for _, test := range tests {
-		got := parseMemory(test.input)
-		assert.Equal(t, test.expect, got)
-	}
-}
-
-func TestParseCPU(t *testing.T) {
-	tests := []struct {
-		input  string
-		expect int64
-	}{
-		{
-			input:  strconv.FormatInt(8, 10),
-			expect: int64(8),
-		},
-	}
+	instanceType := transformInstanceType(&rawInstanceType)
 
-	for _, test := range tests {
-		got := parseCPU(test.input)
-		assert.Equal(t, test.expect, got)
-	}
+	assert.Equal(t, "c4.xlarge", instanceType.InstanceType)
+	assert.Equal(t, int64(4), instanceType.VCPU)
+	assert.Equal(t, int64(7680), instanceType.MemoryMb)
+	assert.Equal(t, int64(0), instanceType.GPU)
+	assert.Equal(t, "amd64", instanceType.Architecture)
 }
 
-func TestParseArchitecture(t *testing.T) {
+func TestInterpretEc2SupportedArchitecure(t *testing.T) {
 	tests := []struct {
 		input  string
 		expect string
 	}{
 		{
-			input:  "Intel Xeon Platinum 8259 (Cascade Lake)",
+			input:  "arm64",
+			expect: "arm64",
+		},
+		{
+			input:  "i386",
 			expect: "amd64",
 		},
 		{
-			input:  "AWS Graviton2 Processor",
-			expect: "arm64",
+			input:  "x86_64",
+			expect: "amd64",
+		},
+		{
+			input:  "x86_64_mac",
+			expect: "amd64",
 		},
 		{
 			input:  "anything default",
@@ -99,11 +83,23 @@ func TestParseArchitecture(t *testing.T) {
 	}
 
 	for _, test := range tests {
-		got := parseArchitecture(test.input)
+		got := interpretEc2SupportedArchitecure(test.input)
 		assert.Equal(t, test.expect, got)
 	}
 }
 
+func TestGetGpuCount(t *testing.T) {
+	gpuDeviceInfos := []*ec2.GpuDeviceInfo{
+		{Count: aws.Int64(8)},
+		{Count: aws.Int64(4)},
+		{Count: aws.Int64(0)},
+	}
+
+	gpuInfo := ec2.GpuInfo{Gpus: gpuDeviceInfos}
+
+	assert.Equal(t, int64(12), getGpuCount(&gpuInfo))
+}
+
 func TestGetCurrentAwsRegion(t *testing.T) {
 	region := "us-west-2"
 	if oldRegion, found := os.LookupEnv("AWS_REGION"); found {
@@ -138,118 +134,3 @@ func TestGetCurrentAwsRegionWithRegionEnv(t *testing.T) {
 	assert.Nil(t, err)
 	assert.Equal(t, region, result)
 }
-
-func TestUnmarshalProductsResponse(t *testing.T) {
-	body := `
-{
-  "products": {
-	"VVD8BG8WWFD3DAZN" : {
-      "sku" : "VVD8BG8WWFD3DAZN",
-      "productFamily" : "Compute Instance",
-      "attributes" : {
-        "servicecode" : "AmazonEC2",
-        "location" : "US East (N. Virginia)",
-        "locationType" : "AWS Region",
-        "instanceType" : "r5b.4xlarge",
-        "currentGeneration" : "Yes",
-        "instanceFamily" : "Memory optimized",
-        "vcpu" : "16",
-        "physicalProcessor" : "Intel Xeon Platinum 8259 (Cascade Lake)",
-        "clockSpeed" : "3.1 GHz",
-        "memory" : "128 GiB",
-        "storage" : "EBS only",
-        "networkPerformance" : "Up to 10 Gigabit",
-        "processorArchitecture" : "64-bit",
-        "tenancy" : "Shared",
-        "operatingSystem" : "Linux",
-        "licenseModel" : "No License required",
-        "usagetype" : "UnusedBox:r5b.4xlarge",
-        "operation" : "RunInstances:0004",
-        "availabilityzone" : "NA",
-        "capacitystatus" : "UnusedCapacityReservation",
-        "classicnetworkingsupport" : "false",
-        "dedicatedEbsThroughput" : "10 Gbps",
-        "ecu" : "NA",
-        "enhancedNetworkingSupported" : "Yes",
-        "instancesku" : "G4NFAXD9TGJM3RY8",
-        "intelAvxAvailable" : "Yes",
-        "intelAvx2Available" : "No",
-        "intelTurboAvailable" : "No",
-        "marketoption" : "OnDemand",
-        "normalizationSizeFactor" : "32",
-        "preInstalledSw" : "SQL Std",
-        "servicename" : "Amazon Elastic Compute Cloud",
-        "vpcnetworkingsupport" : "true"
-      }
-    },
-    "C36QEQQQJ8ZR7N32" : {
-      "sku" : "C36QEQQQJ8ZR7N32",
-      "productFamily" : "Compute Instance",
-      "attributes" : {
-        "servicecode" : "AmazonEC2",
-        "location" : "US East (N. Virginia)",
-        "locationType" : "AWS Region",
-        "instanceType" : "d3en.8xlarge",
-        "currentGeneration" : "Yes",
-        "instanceFamily" : "Storage optimized",
-        "vcpu" : "32",
-        "physicalProcessor" : "Intel Xeon Platinum 8259 (Cascade Lake)",
-        "clockSpeed" : "3.1 GHz",
-        "memory" : "128 GiB",
-        "storage" : "16 x 14000 HDD",
-        "networkPerformance" : "50 Gigabit",
-        "processorArchitecture" : "64-bit",
-        "tenancy" : "Dedicated",
-        "operatingSystem" : "SUSE",
-        "licenseModel" : "No License required",
-        "usagetype" : "DedicatedRes:d3en.8xlarge",
-        "operation" : "RunInstances:000g",
-        "availabilityzone" : "NA",
-        "capacitystatus" : "AllocatedCapacityReservation",
-        "classicnetworkingsupport" : "false",
-        "dedicatedEbsThroughput" : "5000 Mbps",
-        "ecu" : "NA",
-        "enhancedNetworkingSupported" : "Yes",
-        "instancesku" : "2XW3BCEZ83WMGFJY",
-        "intelAvxAvailable" : "Yes",
-        "intelAvx2Available" : "Yes",
-        "intelTurboAvailable" : "Yes",
-        "marketoption" : "OnDemand",
-        "normalizationSizeFactor" : "64",
-        "preInstalledSw" : "NA",
-        "processorFeatures" : "AVX; AVX2; Intel AVX; Intel AVX2; Intel AVX512; Intel Turbo",
-        "servicename" : "Amazon Elastic Compute Cloud",
-        "vpcnetworkingsupport" : "true"
-      }
-    }
-  }
-}
-`
-	r := strings.NewReader(body)
-	resp, err := unmarshalProductsResponse(r)
-	assert.Nil(t, err)
-	assert.Len(t, resp.Products, 2)
-	assert.NotNil(t, resp.Products["VVD8BG8WWFD3DAZN"])
-	assert.NotNil(t, resp.Products["C36QEQQQJ8ZR7N32"])
-	assert.Equal(t, resp.Products["VVD8BG8WWFD3DAZN"].Attributes.InstanceType, "r5b.4xlarge")
-	assert.Equal(t, resp.Products["C36QEQQQJ8ZR7N32"].Attributes.InstanceType, "d3en.8xlarge")
-
-	invalidJsonTests := map[string]string{
-		"[":                     "[",
-		"]":                     "]",
-		"}":                     "}",
-		"{":                     "{",
-		"Plain text":            "invalid",
-		"List":                  "[]",
-		"Invalid products ([])": `{"products":[]}`,
-		"Invalid product ([])":  `{"products":{"zz":[]}}`,
-	}
-	for name, body := range invalidJsonTests {
-		t.Run(name, func(t *testing.T) {
-			r := strings.NewReader(body)
-			resp, err := unmarshalProductsResponse(r)
-			assert.NotNil(t, err)
-			assert.Nil(t, resp)
-		})
-	}
-}