diff --git a/cluster-autoscaler/cloudprovider/aws/README.md b/cluster-autoscaler/cloudprovider/aws/README.md index 51f7e49264ee..94df148bdb2f 100644 --- a/cluster-autoscaler/cloudprovider/aws/README.md +++ b/cluster-autoscaler/cloudprovider/aws/README.md @@ -351,3 +351,6 @@ To refresh static list, please run `go run ec2_instance_types/gen.go` under `aws:///us-east-1a/i-01234abcdef`. * If you want to use regional STS endpoints (e.g. when using VPC endpoint for STS) the env `AWS_STS_REGIONAL_ENDPOINTS=regional` should be set. +* If you want to run it on instances with IMDSv1 disabled make sure your + EC2 launch configuration has the setting `Metadata response hop limit` set to `2`. + Otherwise, the `/latest/api/token` call will timeout and result in an error. See [AWS docs here](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/configuring-instance-metadata-service.html#configuring-instance-metadata-options) for further information. diff --git a/cluster-autoscaler/cloudprovider/aws/auto_scaling_test.go b/cluster-autoscaler/cloudprovider/aws/auto_scaling_test.go index 90e5dba7e261..8b85887574e8 100644 --- a/cluster-autoscaler/cloudprovider/aws/auto_scaling_test.go +++ b/cluster-autoscaler/cloudprovider/aws/auto_scaling_test.go @@ -27,22 +27,22 @@ import ( "github.com/stretchr/testify/require" ) -func TestMoreThen50Groups(t *testing.T) { +func TestMoreThen100Groups(t *testing.T) { service := &AutoScalingMock{} autoScalingWrapper := &autoScalingWrapper{ autoScaling: service, } - // Generate 51 ASG names - names := make([]string, 51) + // Generate 101 ASG names + names := make([]string, 101) for i := 0; i < len(names); i++ { names[i] = fmt.Sprintf("asg-%d", i) } - // First batch, first 50 elements + // First batch, first 100 elements service.On("DescribeAutoScalingGroupsPages", &autoscaling.DescribeAutoScalingGroupsInput{ - AutoScalingGroupNames: aws.StringSlice(names[:50]), + AutoScalingGroupNames: aws.StringSlice(names[:100]), MaxRecords: aws.Int64(maxRecordsReturnedByAPI), }, mock.AnythingOfType("func(*autoscaling.DescribeAutoScalingGroupsOutput, bool) bool"), @@ -51,10 +51,10 @@ func TestMoreThen50Groups(t *testing.T) { fn(testNamedDescribeAutoScalingGroupsOutput("asg-1", 1, "test-instance-id"), false) }).Return(nil) - // Second batch, element 51 + // Second batch, element 101 service.On("DescribeAutoScalingGroupsPages", &autoscaling.DescribeAutoScalingGroupsInput{ - AutoScalingGroupNames: aws.StringSlice([]string{"asg-50"}), + AutoScalingGroupNames: aws.StringSlice([]string{"asg-100"}), MaxRecords: aws.Int64(maxRecordsReturnedByAPI), }, mock.AnythingOfType("func(*autoscaling.DescribeAutoScalingGroupsOutput, bool) bool"), diff --git a/cluster-autoscaler/cloudprovider/aws/aws_manager.go b/cluster-autoscaler/cloudprovider/aws/aws_manager.go index d9f71bec8281..33de44085f37 100644 --- a/cluster-autoscaler/cloudprovider/aws/aws_manager.go +++ b/cluster-autoscaler/cloudprovider/aws/aws_manager.go @@ -49,7 +49,7 @@ const ( operationWaitTimeout = 5 * time.Second operationPollInterval = 100 * time.Millisecond maxRecordsReturnedByAPI = 100 - maxAsgNamesPerDescribe = 50 + maxAsgNamesPerDescribe = 100 refreshInterval = 1 * time.Minute autoDiscovererTypeASG = "asg" asgAutoDiscovererKeyTag = "tag" diff --git a/cluster-autoscaler/cloudprovider/aws/aws_util.go b/cluster-autoscaler/cloudprovider/aws/aws_util.go index 3ace0a6fd9b3..62e0d31eff0e 100644 --- a/cluster-autoscaler/cloudprovider/aws/aws_util.go +++ b/cluster-autoscaler/cloudprovider/aws/aws_util.go @@ -20,18 +20,23 @@ import ( "encoding/json" "errors" "fmt" - "github.com/aws/aws-sdk-go/aws/endpoints" - "io/ioutil" - klog "k8s.io/klog/v2" + "io" "net/http" "os" "regexp" "strconv" "strings" + + "github.com/aws/aws-sdk-go/aws" + "github.com/aws/aws-sdk-go/aws/ec2metadata" + "github.com/aws/aws-sdk-go/aws/endpoints" + "github.com/aws/aws-sdk-go/aws/session" + + klog "k8s.io/klog/v2" ) var ( - ec2MetaDataServiceUrl = "http://169.254.169.254/latest/dynamic/instance-identity/document" + ec2MetaDataServiceUrl = "http://169.254.169.254" ec2PricingServiceUrlTemplate = "https://pricing.us-east-1.amazonaws.com/offers/v1.0/aws/AmazonEC2/current/%s/index.json" ec2PricingServiceUrlTemplateCN = "https://pricing.cn-north-1.amazonaws.com.cn/offers/v1.0/cn/AmazonEC2/current/%s/index.json" staticListLastUpdateTime = "2019-10-14" @@ -82,16 +87,9 @@ func GenerateEC2InstanceTypes(region string) (map[string]*InstanceType, error) { defer res.Body.Close() - body, err := ioutil.ReadAll(res.Body) + unmarshalled, err := unmarshalProductsResponse(res.Body) if err != nil { - klog.Warningf("Error parsing %s skipping...\n", url) - continue - } - - var unmarshalled = response{} - err = json.Unmarshal(body, &unmarshalled) - if err != nil { - klog.Warningf("Error unmarshalling %s, skip...\n", url) + klog.Warningf("Error parsing %s skipping...\n%s\n", url, err) continue } @@ -127,6 +125,58 @@ func GetStaticEC2InstanceTypes() (map[string]*InstanceType, string) { return InstanceTypes, staticListLastUpdateTime } +func unmarshalProductsResponse(r io.Reader) (*response, error) { + dec := json.NewDecoder(r) + t, err := dec.Token() + if err != nil { + return nil, err + } + if delim, ok := t.(json.Delim); !ok || delim.String() != "{" { + return nil, errors.New("Invalid products json") + } + + unmarshalled := response{map[string]product{}} + + for dec.More() { + t, err = dec.Token() + if err != nil { + return nil, err + } + + if t == "products" { + tt, err := dec.Token() + if err != nil { + return nil, err + } + if delim, ok := tt.(json.Delim); !ok || delim.String() != "{" { + return nil, errors.New("Invalid products json") + } + for dec.More() { + productCode, err := dec.Token() + if err != nil { + return nil, err + } + + prod := product{} + if err = dec.Decode(&prod); err != nil { + return nil, err + } + unmarshalled.Products[productCode.(string)] = prod + } + } + } + + t, err = dec.Token() + if err != nil { + return nil, err + } + if delim, ok := t.(json.Delim); !ok || delim.String() != "}" { + return nil, errors.New("Invalid products json") + } + + return &unmarshalled, nil +} + func parseMemory(memory string) int64 { reg, err := regexp.Compile("[^0-9\\.]+") if err != nil { @@ -155,26 +205,13 @@ func GetCurrentAwsRegion() (string, error) { region, present := os.LookupEnv("AWS_REGION") if !present { - klog.V(1).Infof("fetching %s\n", ec2MetaDataServiceUrl) - res, err := http.Get(ec2MetaDataServiceUrl) - if err != nil { - return "", fmt.Errorf("Error fetching %s", ec2MetaDataServiceUrl) - } - - defer res.Body.Close() - - body, err := ioutil.ReadAll(res.Body) + c := aws.NewConfig(). + WithEndpoint(ec2MetaDataServiceUrl) + sess, err := session.NewSession() if err != nil { - return "", fmt.Errorf("Error parsing %s", ec2MetaDataServiceUrl) + return "", fmt.Errorf("failed to create session") } - - var unmarshalled = map[string]string{} - err = json.Unmarshal(body, &unmarshalled) - if err != nil { - klog.Warningf("Error unmarshalling %s, skip...\n", ec2MetaDataServiceUrl) - } - - region = unmarshalled["region"] + return ec2metadata.New(sess, c).Region() } return region, nil diff --git a/cluster-autoscaler/cloudprovider/aws/aws_util_test.go b/cluster-autoscaler/cloudprovider/aws/aws_util_test.go index 6027babd8900..e29860b41f6c 100644 --- a/cluster-autoscaler/cloudprovider/aws/aws_util_test.go +++ b/cluster-autoscaler/cloudprovider/aws/aws_util_test.go @@ -17,12 +17,14 @@ limitations under the License. package aws import ( - "github.com/stretchr/testify/assert" "net/http" "net/http/httptest" "os" "strconv" + "strings" "testing" + + "github.com/stretchr/testify/assert" ) func TestGetStaticEC2InstanceTypes(t *testing.T) { @@ -111,3 +113,118 @@ func TestGetCurrentAwsRegionWithRegionEnv(t *testing.T) { assert.Nil(t, err) assert.Equal(t, region, result) } + +func TestUnmarshalProductsResponse(t *testing.T) { + body := ` +{ + "products": { + "VVD8BG8WWFD3DAZN" : { + "sku" : "VVD8BG8WWFD3DAZN", + "productFamily" : "Compute Instance", + "attributes" : { + "servicecode" : "AmazonEC2", + "location" : "US East (N. Virginia)", + "locationType" : "AWS Region", + "instanceType" : "r5b.4xlarge", + "currentGeneration" : "Yes", + "instanceFamily" : "Memory optimized", + "vcpu" : "16", + "physicalProcessor" : "Intel Xeon Platinum 8259 (Cascade Lake)", + "clockSpeed" : "3.1 GHz", + "memory" : "128 GiB", + "storage" : "EBS only", + "networkPerformance" : "Up to 10 Gigabit", + "processorArchitecture" : "64-bit", + "tenancy" : "Shared", + "operatingSystem" : "Linux", + "licenseModel" : "No License required", + "usagetype" : "UnusedBox:r5b.4xlarge", + "operation" : "RunInstances:0004", + "availabilityzone" : "NA", + "capacitystatus" : "UnusedCapacityReservation", + "classicnetworkingsupport" : "false", + "dedicatedEbsThroughput" : "10 Gbps", + "ecu" : "NA", + "enhancedNetworkingSupported" : "Yes", + "instancesku" : "G4NFAXD9TGJM3RY8", + "intelAvxAvailable" : "Yes", + "intelAvx2Available" : "No", + "intelTurboAvailable" : "No", + "marketoption" : "OnDemand", + "normalizationSizeFactor" : "32", + "preInstalledSw" : "SQL Std", + "servicename" : "Amazon Elastic Compute Cloud", + "vpcnetworkingsupport" : "true" + } + }, + "C36QEQQQJ8ZR7N32" : { + "sku" : "C36QEQQQJ8ZR7N32", + "productFamily" : "Compute Instance", + "attributes" : { + "servicecode" : "AmazonEC2", + "location" : "US East (N. Virginia)", + "locationType" : "AWS Region", + "instanceType" : "d3en.8xlarge", + "currentGeneration" : "Yes", + "instanceFamily" : "Storage optimized", + "vcpu" : "32", + "physicalProcessor" : "Intel Xeon Platinum 8259 (Cascade Lake)", + "clockSpeed" : "3.1 GHz", + "memory" : "128 GiB", + "storage" : "16 x 14000 HDD", + "networkPerformance" : "50 Gigabit", + "processorArchitecture" : "64-bit", + "tenancy" : "Dedicated", + "operatingSystem" : "SUSE", + "licenseModel" : "No License required", + "usagetype" : "DedicatedRes:d3en.8xlarge", + "operation" : "RunInstances:000g", + "availabilityzone" : "NA", + "capacitystatus" : "AllocatedCapacityReservation", + "classicnetworkingsupport" : "false", + "dedicatedEbsThroughput" : "5000 Mbps", + "ecu" : "NA", + "enhancedNetworkingSupported" : "Yes", + "instancesku" : "2XW3BCEZ83WMGFJY", + "intelAvxAvailable" : "Yes", + "intelAvx2Available" : "Yes", + "intelTurboAvailable" : "Yes", + "marketoption" : "OnDemand", + "normalizationSizeFactor" : "64", + "preInstalledSw" : "NA", + "processorFeatures" : "AVX; AVX2; Intel AVX; Intel AVX2; Intel AVX512; Intel Turbo", + "servicename" : "Amazon Elastic Compute Cloud", + "vpcnetworkingsupport" : "true" + } + } + } +} +` + r := strings.NewReader(body) + resp, err := unmarshalProductsResponse(r) + assert.Nil(t, err) + assert.Len(t, resp.Products, 2) + assert.NotNil(t, resp.Products["VVD8BG8WWFD3DAZN"]) + assert.NotNil(t, resp.Products["C36QEQQQJ8ZR7N32"]) + assert.Equal(t, resp.Products["VVD8BG8WWFD3DAZN"].Attributes.InstanceType, "r5b.4xlarge") + assert.Equal(t, resp.Products["C36QEQQQJ8ZR7N32"].Attributes.InstanceType, "d3en.8xlarge") + + invalidJsonTests := map[string]string{ + "[": "[", + "]": "]", + "}": "}", + "{": "{", + "Plain text": "invalid", + "List": "[]", + "Invalid products ([])": `{"products":[]}`, + "Invalid product ([])": `{"products":{"zz":[]}}`, + } + for name, body := range invalidJsonTests { + t.Run(name, func(t *testing.T) { + r := strings.NewReader(body) + resp, err := unmarshalProductsResponse(r) + assert.NotNil(t, err) + assert.Nil(t, resp) + }) + } +}