Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Backport #6245 [CA] AWS: cache instance requirements into CA 1.26 #6623

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 29 additions & 0 deletions cluster-autoscaler/cloudprovider/aws/auto_scaling_groups.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import (

"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/aws/aws-sdk-go/aws"
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/aws/aws-sdk-go/service/autoscaling"
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider/aws/aws-sdk-go/service/ec2"
"k8s.io/autoscaler/cluster-autoscaler/config/dynamic"
klog "k8s.io/klog/v2"
)
Expand Down Expand Up @@ -59,6 +60,7 @@ type mixedInstancesPolicy struct {
launchTemplate *launchTemplate
instanceTypesOverrides []string
instanceRequirementsOverrides *autoscaling.InstanceRequirements
instanceRequirements *ec2.InstanceRequirements
}

type asg struct {
Expand Down Expand Up @@ -539,6 +541,12 @@ func (m *asgCache) buildAsgFromAWS(g *autoscaling.Group) (*asg, error) {
instanceRequirementsOverrides: getInstanceTypeRequirements(g.MixedInstancesPolicy.LaunchTemplate.Overrides),
}

instanceRequirements, err := m.getInstanceRequirementsFromMixedInstancesPolicy(asg.MixedInstancesPolicy)
if err != nil {
return nil, fmt.Errorf("unable to retrieve instance requirements from mixed instance policy, err: %v", err)
}
asg.MixedInstancesPolicy.instanceRequirements = instanceRequirements

if len(asg.MixedInstancesPolicy.instanceTypesOverrides) != 0 && asg.MixedInstancesPolicy.instanceRequirementsOverrides != nil {
return nil, fmt.Errorf("invalid setup of both instance type and instance requirements overrides configured")
}
Expand All @@ -547,6 +555,27 @@ func (m *asgCache) buildAsgFromAWS(g *autoscaling.Group) (*asg, error) {
return asg, nil
}

func (m *asgCache) getInstanceRequirementsFromMixedInstancesPolicy(policy *mixedInstancesPolicy) (*ec2.InstanceRequirements, error) {
instanceRequirements := &ec2.InstanceRequirements{}
if policy.instanceRequirementsOverrides != nil {
var err error
instanceRequirements, err = m.awsService.getEC2RequirementsFromAutoscaling(policy.instanceRequirementsOverrides)
if err != nil {
return nil, err
}
} else if policy.launchTemplate != nil {
templateData, err := m.awsService.getLaunchTemplateData(policy.launchTemplate.name, policy.launchTemplate.version)
if err != nil {
return nil, err
}

if templateData.InstanceRequirements != nil {
instanceRequirements = templateData.InstanceRequirements
}
}
return instanceRequirements, nil
}

func (m *asgCache) buildInstanceRefFromAWS(instance *autoscaling.Instance) AwsInstanceRef {
providerID := fmt.Sprintf("aws:///%s/%s", aws.StringValue(instance.AvailabilityZone), aws.StringValue(instance.InstanceId))
return AwsInstanceRef{
Expand Down
38 changes: 5 additions & 33 deletions cluster-autoscaler/cloudprovider/aws/aws_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -271,9 +271,7 @@ func (m *AwsManager) buildNodeFromTemplate(asg *asg, template *asgTemplate) (*ap
node.Status.Capacity[gpu.ResourceNvidiaGPU] = *resource.NewQuantity(template.InstanceType.GPU, resource.DecimalSI)
node.Status.Capacity[apiv1.ResourceMemory] = *resource.NewQuantity(template.InstanceType.MemoryMb*1024*1024, resource.DecimalSI)

if err := m.updateCapacityWithRequirementsOverrides(&node.Status.Capacity, asg.MixedInstancesPolicy); err != nil {
return nil, err
}
m.updateCapacityWithRequirementsOverrides(&node.Status.Capacity, asg.MixedInstancesPolicy)

resourcesFromTags := extractAllocatableResourcesFromAsg(template.Tags)
klog.V(5).Infof("Extracted resources from ASG tags %v", resourcesFromTags)
Expand Down Expand Up @@ -348,15 +346,12 @@ func joinNodeLabelsChoosingUserValuesOverAPIValues(extractedLabels map[string]st
return result
}

func (m *AwsManager) updateCapacityWithRequirementsOverrides(capacity *apiv1.ResourceList, policy *mixedInstancesPolicy) error {
if policy == nil {
return nil
func (m *AwsManager) updateCapacityWithRequirementsOverrides(capacity *apiv1.ResourceList, policy *mixedInstancesPolicy) {
if policy == nil || policy.instanceRequirements == nil {
return
}

instanceRequirements, err := m.getInstanceRequirementsFromMixedInstancesPolicy(policy)
if err != nil {
return fmt.Errorf("error while building node template using instance requirements: (%s)", err)
}
instanceRequirements := policy.instanceRequirements

if instanceRequirements.VCpuCount != nil && instanceRequirements.VCpuCount.Min != nil {
(*capacity)[apiv1.ResourceCPU] = *resource.NewQuantity(*instanceRequirements.VCpuCount.Min, resource.DecimalSI)
Expand All @@ -375,29 +370,6 @@ func (m *AwsManager) updateCapacityWithRequirementsOverrides(capacity *apiv1.Res
}
}
}

return nil
}

func (m *AwsManager) getInstanceRequirementsFromMixedInstancesPolicy(policy *mixedInstancesPolicy) (*ec2.InstanceRequirements, error) {
instanceRequirements := &ec2.InstanceRequirements{}
if policy.instanceRequirementsOverrides != nil {
var err error
instanceRequirements, err = m.awsService.getEC2RequirementsFromAutoscaling(policy.instanceRequirementsOverrides)
if err != nil {
return nil, err
}
} else if policy.launchTemplate != nil {
templateData, err := m.awsService.getLaunchTemplateData(policy.launchTemplate.name, policy.launchTemplate.version)
if err != nil {
return nil, err
}

if templateData.InstanceRequirements != nil {
instanceRequirements = templateData.InstanceRequirements
}
}
return instanceRequirements, nil
}

func buildGenericLabels(template *asgTemplate, nodeName string) map[string]string {
Expand Down
8 changes: 4 additions & 4 deletions cluster-autoscaler/cloudprovider/aws/aws_manager_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -483,18 +483,18 @@ func TestBuildNodeFromTemplate(t *testing.T) {

// Node with instance requirements
asg.MixedInstancesPolicy = &mixedInstancesPolicy{
instanceRequirementsOverrides: &autoscaling.InstanceRequirements{
VCpuCount: &autoscaling.VCpuCountRequest{
instanceRequirements: &ec2.InstanceRequirements{
VCpuCount: &ec2.VCpuCountRange{
Min: aws.Int64(4),
Max: aws.Int64(8),
},
MemoryMiB: &autoscaling.MemoryMiBRequest{
MemoryMiB: &ec2.MemoryMiB{
Min: aws.Int64(4),
Max: aws.Int64(8),
},
AcceleratorTypes: []*string{aws.String(autoscaling.AcceleratorTypeGpu)},
AcceleratorManufacturers: []*string{aws.String(autoscaling.AcceleratorManufacturerNvidia)},
AcceleratorCount: &autoscaling.AcceleratorCountRequest{
AcceleratorCount: &ec2.AcceleratorCount{
Min: aws.Int64(4),
Max: aws.Int64(8),
},
Expand Down
Loading