Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[cluster-autoscaler-release-1.31] chore: defork labels and taints #7240

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion cluster-autoscaler/cloudprovider/azure/azure_scale_set.go
Original file line number Diff line number Diff line change
Expand Up @@ -633,7 +633,9 @@ func (scaleSet *ScaleSet) TemplateNodeInfo() (*schedulerframework.NodeInfo, erro
return nil, err
}

node, err := buildNodeFromTemplate(scaleSet.Name, template, scaleSet.manager, scaleSet.enableDynamicInstanceList)
inputLabels := map[string]string{}
inputTaints := ""
node, err := buildNodeFromTemplate(scaleSet.Name, inputLabels, inputTaints, template, scaleSet.manager, scaleSet.enableDynamicInstanceList)

if err != nil {
return nil, err
Expand Down
156 changes: 151 additions & 5 deletions cluster-autoscaler/cloudprovider/azure/azure_template.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,17 +31,61 @@ import (
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider"
"k8s.io/autoscaler/cluster-autoscaler/utils/gpu"
"k8s.io/klog/v2"
kubeletapis "k8s.io/kubelet/pkg/apis"
)

const (
azureDiskTopologyKey string = "topology.disk.csi.azure.com/zone"
// AKSLabelPrefixValue represents the constant prefix for AKSLabelKeyPrefixValue
AKSLabelPrefixValue = "kubernetes.azure.com"
// AKSLabelKeyPrefixValue represents prefix for AKS Labels
AKSLabelKeyPrefixValue = AKSLabelPrefixValue + "/"

azureDiskTopologyKey = "topology.disk.csi.azure.com/zone"
// For NP-series SKU, the xilinx device plugin uses that resource name
// https://github.com/Xilinx/FPGA_as_a_Service/tree/master/k8s-fpga-device-plugin
xilinxFpgaResourceName = "xilinx.com/fpga-xilinx_u250_gen3x16_xdma_shell_2_1-0"

// legacyPoolNameTag is the legacy tag that AKS adds to the VMSS with its value
// being the agentpool name
legacyPoolNameTag = "poolName"
// poolNameTag is the new tag that replaces the above one
// Newly created pools and clusters will have this one on the VMSS
// instead of the legacy one. We'll have to live with both tags for a while.
poolNameTag = "aks-managed-poolName"

// This is the legacy label is added by agentbaker, agentpool={poolName} and we want to predict that
// a node added to this agentpool will have this as a node label. The value is fetched
// from the VMSS tag with key poolNameTag/legacyPoolNameTag
legacyAgentPoolNodeLabelKey = "agentpool"
// New label that replaces the above
agentPoolNodeLabelKey = AKSLabelKeyPrefixValue + "agentpool"

// Storage profile node labels
legacyStorageProfileNodeLabelKey = "storageprofile"
storageProfileNodeLabelKey = AKSLabelKeyPrefixValue + "storageprofile"

// Storage tier node labels
legacyStorageTierNodeLabelKey = "storagetier"
storageTierNodeLabelKey = AKSLabelKeyPrefixValue + "storagetier"

// Fips node label
fipsNodeLabelKey = AKSLabelKeyPrefixValue + "fips_enabled"

// OS Sku node Label
osSkuLabelKey = AKSLabelKeyPrefixValue + "os-sku"

// Security node label
securityTypeLabelKey = AKSLabelKeyPrefixValue + "security-type"

customCATrustEnabledLabelKey = AKSLabelKeyPrefixValue + "custom-ca-trust-enabled"
kataMshvVMIsolationLabelKey = AKSLabelKeyPrefixValue + "kata-mshv-vm-isolation"

// Cluster node label
clusterLabelKey = AKSLabelKeyPrefixValue + "cluster"
)

func buildNodeFromTemplate(nodeGroupName string, template compute.VirtualMachineScaleSet, manager *AzureManager, enableDynamicInstanceList bool) (*apiv1.Node, error) {
func buildNodeFromTemplate(nodeGroupName string, inputLabels map[string]string, inputTaints string,
template compute.VirtualMachineScaleSet, manager *AzureManager, enableDynamicInstanceList bool) (*apiv1.Node, error) {
node := apiv1.Node{}
nodeName := fmt.Sprintf("%s-asg-%d", nodeGroupName, rand.Int63())

Expand Down Expand Up @@ -90,7 +134,9 @@ func buildNodeFromTemplate(nodeGroupName string, template compute.VirtualMachine
node.Status.Capacity[apiv1.ResourceCPU] = *resource.NewQuantity(vcpu, resource.DecimalSI)
// isNPSeries returns if a SKU is an NP-series SKU
// SKU API reports GPUs for NP-series but it's actually FPGAs
if !isNPSeries(*template.Sku.Name) {
if isNPSeries(*template.Sku.Name) {
node.Status.Capacity[xilinxFpgaResourceName] = *resource.NewQuantity(gpuCount, resource.DecimalSI)
} else {
node.Status.Capacity[gpu.ResourceNvidiaGPU] = *resource.NewQuantity(gpuCount, resource.DecimalSI)
}

Expand All @@ -113,16 +159,78 @@ func buildNodeFromTemplate(nodeGroupName string, template compute.VirtualMachine

// GenericLabels
node.Labels = cloudprovider.JoinStringMaps(node.Labels, buildGenericLabels(template, nodeName))

// Labels from the Scale Set's Tags
node.Labels = cloudprovider.JoinStringMaps(node.Labels, extractLabelsFromScaleSet(template.Tags))
labels := make(map[string]string)

// Prefer the explicit labels in spec coming from RP over the VMSS template
if len(inputLabels) > 0 {
labels = inputLabels
} else {
labels = extractLabelsFromScaleSet(template.Tags)
}

// Add the agentpool label, its value should come from the VMSS poolName tag
// NOTE: The plan is for agentpool label to be deprecated in favor of the aks-prefixed one
// We will have to live with both labels for a while
if node.Labels[legacyPoolNameTag] != "" {
labels[legacyAgentPoolNodeLabelKey] = node.Labels[legacyPoolNameTag]
labels[agentPoolNodeLabelKey] = node.Labels[legacyPoolNameTag]
}
if node.Labels[poolNameTag] != "" {
labels[legacyAgentPoolNodeLabelKey] = node.Labels[poolNameTag]
labels[agentPoolNodeLabelKey] = node.Labels[poolNameTag]
}

// Add the storage profile and storage tier labels
if template.VirtualMachineProfile != nil && template.VirtualMachineProfile.StorageProfile != nil && template.VirtualMachineProfile.StorageProfile.OsDisk != nil {
// ephemeral
if template.VirtualMachineProfile.StorageProfile.OsDisk.DiffDiskSettings != nil && template.VirtualMachineProfile.StorageProfile.OsDisk.DiffDiskSettings.Option == compute.Local {
labels[legacyStorageProfileNodeLabelKey] = "ephemeral"
labels[storageProfileNodeLabelKey] = "ephemeral"
} else {
labels[legacyStorageProfileNodeLabelKey] = "managed"
labels[storageProfileNodeLabelKey] = "managed"
}
if template.VirtualMachineProfile.StorageProfile.OsDisk.ManagedDisk != nil {
labels[legacyStorageTierNodeLabelKey] = string(template.VirtualMachineProfile.StorageProfile.OsDisk.ManagedDisk.StorageAccountType)
labels[storageTierNodeLabelKey] = string(template.VirtualMachineProfile.StorageProfile.OsDisk.ManagedDisk.StorageAccountType)
}
// Add ephemeral-storage value
if template.VirtualMachineProfile.StorageProfile.OsDisk.DiskSizeGB != nil {
node.Status.Capacity[apiv1.ResourceEphemeralStorage] = *resource.NewQuantity(int64(int(*template.VirtualMachineProfile.StorageProfile.OsDisk.DiskSizeGB)*1024*1024*1024), resource.DecimalSI)
klog.V(4).Infof("OS Disk Size from template is: %d", *template.VirtualMachineProfile.StorageProfile.OsDisk.DiskSizeGB)
klog.V(4).Infof("Setting ephemeral storage to: %v", node.Status.Capacity[apiv1.ResourceEphemeralStorage])
}
}

// If we are on GPU-enabled SKUs, append the accelerator
// label so that CA makes better decision when scaling from zero for GPU pools
if isNvidiaEnabledSKU(*template.Sku.Name) {
labels[GPULabel] = "nvidia"
labels[legacyGPULabel] = "nvidia"
}

// Extract allocatables from tags
resourcesFromTags := extractAllocatableResourcesFromScaleSet(template.Tags)
for resourceName, val := range resourcesFromTags {
node.Status.Capacity[apiv1.ResourceName(resourceName)] = *val
}

node.Labels = cloudprovider.JoinStringMaps(node.Labels, labels)
klog.V(4).Infof("Setting node %s labels to: %s", nodeName, node.Labels)

var taints []apiv1.Taint
// Prefer the explicit taints in spec over the VMSS template
if inputTaints != "" {
taints = extractTaintsFromSpecString(inputTaints)
} else {
taints = extractTaintsFromScaleSet(template.Tags)
}

// Taints from the Scale Set's Tags
node.Spec.Taints = extractTaintsFromScaleSet(template.Tags)
node.Spec.Taints = taints
klog.V(4).Infof("Setting node %s taints to: %s", nodeName, node.Spec.Taints)

node.Status.Conditions = cloudprovider.BuildReadyConditions()
return &node, nil
Expand All @@ -140,10 +248,15 @@ func buildInstanceOS(template compute.VirtualMachineScaleSet) string {
func buildGenericLabels(template compute.VirtualMachineScaleSet, nodeName string) map[string]string {
result := make(map[string]string)

result[kubeletapis.LabelArch] = cloudprovider.DefaultArch
result[apiv1.LabelArchStable] = cloudprovider.DefaultArch

result[kubeletapis.LabelOS] = buildInstanceOS(template)
result[apiv1.LabelOSStable] = buildInstanceOS(template)

result[apiv1.LabelInstanceType] = *template.Sku.Name
result[apiv1.LabelInstanceTypeStable] = *template.Sku.Name
result[apiv1.LabelZoneRegion] = strings.ToLower(*template.Location)
result[apiv1.LabelTopologyRegion] = strings.ToLower(*template.Location)

if template.Zones != nil && len(*template.Zones) > 0 {
Expand All @@ -157,9 +270,11 @@ func buildGenericLabels(template compute.VirtualMachineScaleSet, nodeName string
//Note that the if the customer is to have some pod affinity picking exact zone, this logic won't work.
//For now, discourage the customers from using podAffinity to pick the availability zones.
randomZone := failureDomains[rand.Intn(len(failureDomains))]
result[apiv1.LabelZoneFailureDomain] = randomZone
result[apiv1.LabelTopologyZone] = randomZone
result[azureDiskTopologyKey] = randomZone
} else {
result[apiv1.LabelZoneFailureDomain] = "0"
result[apiv1.LabelTopologyZone] = "0"
result[azureDiskTopologyKey] = ""
}
Expand Down Expand Up @@ -212,6 +327,37 @@ func extractTaintsFromScaleSet(tags map[string]*string) []apiv1.Taint {
return taints
}

// Example of a valid taints string, is the same argument to kubelet's `--register-with-taints`
// "dedicated=foo:NoSchedule,group=bar:NoExecute,app=fizz:PreferNoSchedule"
func extractTaintsFromSpecString(taintsString string) []apiv1.Taint {
taints := make([]apiv1.Taint, 0)
// First split the taints at the separator
splits := strings.Split(taintsString, ",")
for _, split := range splits {
taintSplit := strings.Split(split, "=")
if len(taintSplit) != 2 {
continue
}

taintKey := taintSplit[0]
taintValue := taintSplit[1]

r, _ := regexp.Compile("(.*):(?:NoSchedule|NoExecute|PreferNoSchedule)")
if !r.MatchString(taintValue) {
continue
}

values := strings.SplitN(taintValue, ":", 2)
taints = append(taints, apiv1.Taint{
Key: taintKey,
Value: values[0],
Effect: apiv1.TaintEffect(values[1]),
})
}

return taints
}

func extractAutoscalingOptionsFromScaleSetTags(tags map[string]*string) map[string]string {
options := make(map[string]string)
for tagName, tagValue := range tags {
Expand Down
66 changes: 58 additions & 8 deletions cluster-autoscaler/cloudprovider/azure/azure_template_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ package azure

import (
"fmt"
"strings"
"testing"

"github.com/Azure/azure-sdk-for-go/services/compute/mgmt/2022-08-01/compute"
Expand Down Expand Up @@ -104,6 +105,45 @@ func TestExtractTaintsFromScaleSet(t *testing.T) {
assert.Equal(t, makeTaintSet(expectedTaints), makeTaintSet(taints))
}

func TestExtractTaintsFromSpecString(t *testing.T) {
taintsString := []string{
"dedicated=foo:NoSchedule",
"group=bar:NoExecute",
"app=fizz:PreferNoSchedule",
"k8s.io/testing/underscore/to/slash=fizz:PreferNoSchedule",
"bar=baz",
"blank=",
"nosplit=some_value",
}

expectedTaints := []apiv1.Taint{
{
Key: "dedicated",
Value: "foo",
Effect: apiv1.TaintEffectNoSchedule,
},
{
Key: "group",
Value: "bar",
Effect: apiv1.TaintEffectNoExecute,
},
{
Key: "app",
Value: "fizz",
Effect: apiv1.TaintEffectPreferNoSchedule,
},
{
Key: "k8s.io/testing/underscore/to/slash",
Value: "fizz",
Effect: apiv1.TaintEffectPreferNoSchedule,
},
}

taints := extractTaintsFromSpecString(strings.Join(taintsString, ","))
assert.Len(t, taints, 4)
assert.Equal(t, makeTaintSet(expectedTaints), makeTaintSet(taints))
}

func TestExtractAllocatableResourcesFromScaleSet(t *testing.T) {
tags := map[string]*string{
fmt.Sprintf("%s%s", nodeResourcesTagName, "cpu"): to.StringPtr("100m"),
Expand All @@ -123,14 +163,6 @@ func TestExtractAllocatableResourcesFromScaleSet(t *testing.T) {
assert.Equal(t, (&exepectedCustomAllocatable).String(), labels["nvidia.com/Tesla-P100-PCIE"].String())
}

func makeTaintSet(taints []apiv1.Taint) map[apiv1.Taint]bool {
set := make(map[apiv1.Taint]bool)
for _, taint := range taints {
set[taint] = true
}
return set
}

func TestTopologyFromScaleSet(t *testing.T) {
testNodeName := "test-node"
testSkuName := "test-sku"
Expand All @@ -144,12 +176,16 @@ func TestTopologyFromScaleSet(t *testing.T) {
Location: to.StringPtr("westus"),
}
expectedZoneValues := []string{"westus-1", "westus-2", "westus-3"}

labels := buildGenericLabels(testVmss, testNodeName)
failureDomain, ok := labels[apiv1.LabelZoneFailureDomain]
assert.True(t, ok)
topologyZone, ok := labels[apiv1.LabelTopologyZone]
assert.True(t, ok)
azureDiskTopology, ok := labels[azureDiskTopologyKey]
assert.True(t, ok)

assert.Contains(t, expectedZoneValues, failureDomain)
assert.Contains(t, expectedZoneValues, topologyZone)
assert.Contains(t, expectedZoneValues, azureDiskTopology)
}
Expand All @@ -165,10 +201,16 @@ func TestEmptyTopologyFromScaleSet(t *testing.T) {
VirtualMachineProfile: &compute.VirtualMachineScaleSetVMProfile{OsProfile: nil}},
Location: to.StringPtr("westus"),
}

expectedFailureDomain := "0"
expectedTopologyZone := "0"
expectedAzureDiskTopology := ""
labels := buildGenericLabels(testVmss, testNodeName)

failureDomain, ok := labels[apiv1.LabelZoneFailureDomain]
assert.True(t, ok)
assert.Equal(t, expectedFailureDomain, failureDomain)

topologyZone, ok := labels[apiv1.LabelTopologyZone]
assert.True(t, ok)
assert.Equal(t, expectedTopologyZone, topologyZone)
Expand All @@ -177,3 +219,11 @@ func TestEmptyTopologyFromScaleSet(t *testing.T) {
assert.True(t, ok)
assert.Equal(t, expectedAzureDiskTopology, azureDiskTopology)
}

func makeTaintSet(taints []apiv1.Taint) map[apiv1.Taint]bool {
set := make(map[apiv1.Taint]bool)
for _, taint := range taints {
set[taint] = true
}
return set
}
Loading