Skip to content

Commit

Permalink
chore: retreat scale from zero labels change from this PR
Browse files Browse the repository at this point in the history
  • Loading branch information
comtalyst committed Jul 9, 2024
1 parent 968669d commit 8d93fdc
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 246 deletions.
194 changes: 7 additions & 187 deletions cluster-autoscaler/cloudprovider/azure/azure_template.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,72 +24,25 @@ import (
"strings"
"time"

"sigs.k8s.io/cloud-provider-azure/pkg/consts"

"github.com/Azure/azure-sdk-for-go/services/compute/mgmt/2022-08-01/compute"
apiv1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/autoscaler/cluster-autoscaler/cloudprovider"
"k8s.io/autoscaler/cluster-autoscaler/utils/gpu"
cloudvolume "k8s.io/cloud-provider/volume"
"k8s.io/klog/v2"
kubeletapis "k8s.io/kubelet/pkg/apis"
)

const (
azureDiskTopologyKey string = "topology.disk.csi.azure.com/zone"
// AKSLabelPrefixValue represents the constant prefix for AKSLabelKeyPrefixValue
AKSLabelPrefixValue = "kubernetes.azure.com"
// AKSLabelKeyPrefixValue represents prefix for AKS Labels
AKSLabelKeyPrefixValue = AKSLabelPrefixValue + "/"

azureDiskTopologyKey = "topology.disk.csi.azure.com/zone"
// For NP-series SKU, the xilinx device plugin uses that resource name
// https://github.com/Xilinx/FPGA_as_a_Service/tree/master/k8s-fpga-device-plugin
xilinxFpgaResourceName = "xilinx.com/fpga-xilinx_u250_gen3x16_xdma_shell_2_1-0"

// legacyPoolNameTag is the legacy tag that AKS adds to the VMSS with its value
// being the agentpool name
legacyPoolNameTag = "poolName"
// poolNameTag is the new tag that replaces the above one
// Newly created pools and clusters will have this one on the VMSS
// instead of the legacy one. We'll have to live with both tags for a while.
poolNameTag = "aks-managed-poolName"

// This is the legacy label is added by agentbaker, agentpool={poolName} and we want to predict that
// a node added to this agentpool will have this as a node label. The value is fetched
// from the VMSS tag with key poolNameTag/legacyPoolNameTag
legacyAgentPoolNodeLabelKey = "agentpool"
// New label that replaces the above
agentPoolNodeLabelKey = AKSLabelKeyPrefixValue + "agentpool"

// Storage profile node labels
legacyStorageProfileNodeLabelKey = "storageprofile"
storageProfileNodeLabelKey = AKSLabelKeyPrefixValue + "storageprofile"

// Storage tier node labels
legacyStorageTierNodeLabelKey = "storagetier"
storageTierNodeLabelKey = AKSLabelKeyPrefixValue + "storagetier"

// Fips node label
fipsNodeLabelKey = AKSLabelKeyPrefixValue + "fips_enabled"

// OS Sku node Label
osSkuLabelKey = AKSLabelKeyPrefixValue + "os-sku"

// Security node label
securityTypeLabelKey = AKSLabelKeyPrefixValue + "security-type"

// Labels defined in RP
// Since Cluster autoscaler cannot import RP, it is defined here.
customCATrustEnabledLabelKey = AKSLabelKeyPrefixValue + "custom-ca-trust-enabled"
kataMshvVMIsolationLabelKey = AKSLabelKeyPrefixValue + "kata-mshv-vm-isolation"

// Cluster node label
clusterLabelKey = AKSLabelKeyPrefixValue + "cluster"
)

func buildNodeFromTemplate(nodeGroupName string, template compute.VirtualMachineScaleSet, manager *AzureManager, enableDynamicInstanceList bool) (*apiv1.Node, error) {

node := apiv1.Node{}
nodeName := fmt.Sprintf("%s-asg-%d", nodeGroupName, rand.Int63())

Expand Down Expand Up @@ -138,9 +91,7 @@ func buildNodeFromTemplate(nodeGroupName string, template compute.VirtualMachine
node.Status.Capacity[apiv1.ResourceCPU] = *resource.NewQuantity(vcpu, resource.DecimalSI)
// isNPSeries returns if a SKU is an NP-series SKU
// SKU API reports GPUs for NP-series but it's actually FPGAs
if isNPSeries(*template.Sku.Name) {
node.Status.Capacity[xilinxFpgaResourceName] = *resource.NewQuantity(gpuCount, resource.DecimalSI)
} else {
if !isNPSeries(*template.Sku.Name) {
node.Status.Capacity[gpu.ResourceNvidiaGPU] = *resource.NewQuantity(gpuCount, resource.DecimalSI)
}

Expand All @@ -165,61 +116,15 @@ func buildNodeFromTemplate(nodeGroupName string, template compute.VirtualMachine
node.Labels = cloudprovider.JoinStringMaps(node.Labels, buildGenericLabels(template, nodeName))

// Labels from the Scale Set's Tags
labels := extractLabelsFromScaleSet(template.Tags)

// Add the agentpool label, its value should come from the VMSS poolName tag
// NOTE: The plan is for agentpool label to be deprecated in favor of the aks-prefixed one
// We will have to live with both labels for a while
if node.Labels[legacyPoolNameTag] != "" {
labels[legacyAgentPoolNodeLabelKey] = node.Labels[legacyPoolNameTag]
labels[agentPoolNodeLabelKey] = node.Labels[legacyPoolNameTag]
}
if node.Labels[poolNameTag] != "" {
labels[legacyAgentPoolNodeLabelKey] = node.Labels[poolNameTag]
labels[agentPoolNodeLabelKey] = node.Labels[poolNameTag]
}

// Add the storage profile and storage tier labels
if template.VirtualMachineProfile != nil && template.VirtualMachineProfile.StorageProfile != nil && template.VirtualMachineProfile.StorageProfile.OsDisk != nil {
// ephemeral
if template.VirtualMachineProfile.StorageProfile.OsDisk.DiffDiskSettings != nil && template.VirtualMachineProfile.StorageProfile.OsDisk.DiffDiskSettings.Option == compute.Local {
labels[legacyStorageProfileNodeLabelKey] = "ephemeral"
labels[storageProfileNodeLabelKey] = "ephemeral"
} else {
labels[legacyStorageProfileNodeLabelKey] = "managed"
labels[storageProfileNodeLabelKey] = "managed"
}
if template.VirtualMachineProfile.StorageProfile.OsDisk.ManagedDisk != nil {
labels[legacyStorageTierNodeLabelKey] = string(template.VirtualMachineProfile.StorageProfile.OsDisk.ManagedDisk.StorageAccountType)
labels[storageTierNodeLabelKey] = string(template.VirtualMachineProfile.StorageProfile.OsDisk.ManagedDisk.StorageAccountType)
}
// Add ephemeral-storage value
if template.VirtualMachineProfile.StorageProfile.OsDisk.DiskSizeGB != nil {
node.Status.Capacity[apiv1.ResourceEphemeralStorage] = *resource.NewQuantity(int64(int(*template.VirtualMachineProfile.StorageProfile.OsDisk.DiskSizeGB)*1024*1024*1024), resource.DecimalSI)
klog.V(4).Infof("OS Disk Size from template is: %d", *template.VirtualMachineProfile.StorageProfile.OsDisk.DiskSizeGB)
klog.V(4).Infof("Setting ephemeral storage to: %v", node.Status.Capacity[apiv1.ResourceEphemeralStorage])
}
}

// If we are on GPU-enabled SKUs, append the accelerator
// label so that CA makes better decision when scaling from zero for GPU pools
if isNvidiaEnabledSKU(*template.Sku.Name) {
labels[GPULabel] = "nvidia"
labels[legacyGPULabel] = "nvidia"
}
node.Labels = cloudprovider.JoinStringMaps(node.Labels, extractLabelsFromScaleSet(template.Tags))

// Extract allocatables from tags
resourcesFromTags := extractAllocatableResourcesFromScaleSet(template.Tags)
for resourceName, val := range resourcesFromTags {
node.Status.Capacity[apiv1.ResourceName(resourceName)] = *val
}

node.Labels = cloudprovider.JoinStringMaps(node.Labels, labels)
klog.V(4).Infof("Setting node %s labels to: %s", nodeName, node.Labels)

// Taints from the Scale Set's Tags
node.Spec.Taints = extractTaintsFromScaleSet(template.Tags)
klog.V(4).Infof("Setting node %s taints to: %s", nodeName, node.Spec.Taints)

node.Status.Conditions = cloudprovider.BuildReadyConditions()
return &node, nil
Expand All @@ -237,33 +142,21 @@ func buildInstanceOS(template compute.VirtualMachineScaleSet) string {
func buildGenericLabels(template compute.VirtualMachineScaleSet, nodeName string) map[string]string {
result := make(map[string]string)

result[kubeletapis.LabelArch] = cloudprovider.DefaultArch
result[apiv1.LabelArchStable] = cloudprovider.DefaultArch

result[kubeletapis.LabelOS] = buildInstanceOS(template)
result[apiv1.LabelOSStable] = buildInstanceOS(template)

result[apiv1.LabelInstanceType] = *template.Sku.Name
result[apiv1.LabelInstanceTypeStable] = *template.Sku.Name
result[apiv1.LabelZoneRegion] = strings.ToLower(*template.Location)
result[apiv1.LabelTopologyRegion] = strings.ToLower(*template.Location)

if template.Zones != nil && len(*template.Zones) > 0 {
failureDomains := make([]string, len(*template.Zones))
for k, v := range *template.Zones {
failureDomains[k] = strings.ToLower(*template.Location) + "-" + v
}
//Picks random zones for Multi-zone nodepool when scaling from zero.
//This random zone will not be the same as the zone of the VMSS that is being created, the purpose of creating
//the node template with random zone is to initiate scaling from zero on the multi-zone nodepool.
//Note that the if the customer is to have some pod affinity picking exact zone, this logic won't work.
//For now, discourage the customers from using podAffinity to pick the availability zones.
randomZone := failureDomains[rand.Intn(len(failureDomains))]
result[apiv1.LabelZoneFailureDomain] = randomZone
result[apiv1.LabelTopologyZone] = randomZone
result[azureDiskTopologyKey] = randomZone

result[apiv1.LabelTopologyZone] = strings.Join(failureDomains[:], cloudvolume.LabelMultiZoneDelimiter)
result[azureDiskTopologyKey] = strings.Join(failureDomains[:], cloudvolume.LabelMultiZoneDelimiter)
} else {
result[apiv1.LabelZoneFailureDomain] = "0"
result[apiv1.LabelTopologyZone] = "0"
result[azureDiskTopologyKey] = ""
}
Expand All @@ -272,79 +165,6 @@ func buildGenericLabels(template compute.VirtualMachineScaleSet, nodeName string
return result
}

func fetchLabel(template *compute.VirtualMachineScaleSet, nodeLabels map[string]string) map[string]string {
// Labels from the Scale Set's Tags
var labels = extractLabelsFromScaleSet(template.Tags)

// Add the agentpool label, its value should come from the VMSS poolName tag
// NOTE: The plan is for agentpool label to be deprecated in favor of the aks-prefixed one
// We will have to live with both labels for a while
if nodeLabels[legacyPoolNameTag] != "" {
labels[legacyAgentPoolNodeLabelKey] = nodeLabels[legacyPoolNameTag]
labels[agentPoolNodeLabelKey] = nodeLabels[legacyPoolNameTag]
}
if nodeLabels[poolNameTag] != "" {
labels[legacyAgentPoolNodeLabelKey] = nodeLabels[poolNameTag]
labels[agentPoolNodeLabelKey] = nodeLabels[poolNameTag]
}

// Add node-role label
if nodeLabels[consts.NodeLabelRole] != "" {
labels[consts.NodeLabelRole] = nodeLabels[consts.NodeLabelRole]
}

if nodeLabels[fipsNodeLabelKey] != "" {
labels[fipsNodeLabelKey] = nodeLabels[fipsNodeLabelKey]
}

if nodeLabels[osSkuLabelKey] != "" {
labels[osSkuLabelKey] = nodeLabels[osSkuLabelKey]
}

if nodeLabels[securityTypeLabelKey] != "" {
labels[securityTypeLabelKey] = nodeLabels[securityTypeLabelKey]
}

if nodeLabels[customCATrustEnabledLabelKey] != "" {
labels[customCATrustEnabledLabelKey] = nodeLabels[customCATrustEnabledLabelKey]
}

if nodeLabels[kataMshvVMIsolationLabelKey] != "" {
labels[kataMshvVMIsolationLabelKey] = nodeLabels[kataMshvVMIsolationLabelKey]
}

if nodeLabels[clusterLabelKey] != "" {
labels[clusterLabelKey] = nodeLabels[clusterLabelKey]
}

// Add the storage tier labels
if template.VirtualMachineProfile != nil && template.VirtualMachineProfile.StorageProfile != nil &&
template.VirtualMachineProfile.StorageProfile.OsDisk != nil {
// ephemeral
if template.VirtualMachineProfile.StorageProfile.OsDisk.DiffDiskSettings != nil &&
template.VirtualMachineProfile.StorageProfile.OsDisk.DiffDiskSettings.Option == compute.Local {
labels[legacyStorageProfileNodeLabelKey] = "ephemeral"
labels[storageProfileNodeLabelKey] = "ephemeral"
} else {
labels[legacyStorageProfileNodeLabelKey] = "managed"
labels[storageProfileNodeLabelKey] = "managed"
}
if template.VirtualMachineProfile.StorageProfile.OsDisk.ManagedDisk != nil {
labels[legacyStorageTierNodeLabelKey] = string(template.VirtualMachineProfile.StorageProfile.OsDisk.ManagedDisk.StorageAccountType)
labels[storageTierNodeLabelKey] = string(template.VirtualMachineProfile.StorageProfile.OsDisk.ManagedDisk.StorageAccountType)
}
}

// If we are on GPU-enabled SKUs, append the accelerator
// label so that CA makes better decision when scaling from zero for GPU pools
if isNvidiaEnabledSKU(*template.Sku.Name) {
labels[GPULabel] = "nvidia"
labels[legacyGPULabel] = "nvidia"
}

return labels
}

func extractLabelsFromScaleSet(tags map[string]*string) map[string]string {
result := make(map[string]string)

Expand Down
59 changes: 0 additions & 59 deletions cluster-autoscaler/cloudprovider/azure/azure_template_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,6 @@ import (
"fmt"
"testing"

"github.com/Azure/azure-sdk-for-go/services/compute/mgmt/2022-08-01/compute"
"github.com/Azure/go-autorest/autorest"
"github.com/Azure/go-autorest/autorest/to"
"github.com/stretchr/testify/assert"
apiv1 "k8s.io/api/core/v1"
Expand Down Expand Up @@ -122,63 +120,6 @@ func TestExtractAllocatableResourcesFromScaleSet(t *testing.T) {
assert.Equal(t, (&exepectedCustomAllocatable).String(), labels["nvidia.com/Tesla-P100-PCIE"].String())
}

func TestTopologyFromScaleSet(t *testing.T) {
testNodeName := "test-node"
testSkuName := "test-sku"
testVmss := compute.VirtualMachineScaleSet{
Response: autorest.Response{},
Sku: &compute.Sku{Name: &testSkuName},
Plan: nil,
VirtualMachineScaleSetProperties: &compute.VirtualMachineScaleSetProperties{
VirtualMachineProfile: &compute.VirtualMachineScaleSetVMProfile{OsProfile: nil}},
Zones: &[]string{"1", "2", "3"},
Location: to.StringPtr("westus"),
}
expectedZoneValues := []string{"westus-1", "westus-2", "westus-3"}

labels := buildGenericLabels(testVmss, testNodeName)
failureDomain, ok := labels[apiv1.LabelZoneFailureDomain]
assert.True(t, ok)
topologyZone, ok := labels[apiv1.LabelTopologyZone]
assert.True(t, ok)
azureDiskTopology, ok := labels[azureDiskTopologyKey]
assert.True(t, ok)

assert.Contains(t, expectedZoneValues, failureDomain)
assert.Contains(t, expectedZoneValues, topologyZone)
assert.Contains(t, expectedZoneValues, azureDiskTopology)
}

func TestEmptyTopologyFromScaleSet(t *testing.T) {
testNodeName := "test-node"
testSkuName := "test-sku"
testVmss := compute.VirtualMachineScaleSet{
Response: autorest.Response{},
Sku: &compute.Sku{Name: &testSkuName},
Plan: nil,
VirtualMachineScaleSetProperties: &compute.VirtualMachineScaleSetProperties{
VirtualMachineProfile: &compute.VirtualMachineScaleSetVMProfile{OsProfile: nil}},
Location: to.StringPtr("westus"),
}

expectedFailureDomain := "0"
expectedTopologyZone := "0"
expectedAzureDiskTopology := ""
labels := buildGenericLabels(testVmss, testNodeName)

failureDomain, ok := labels[apiv1.LabelZoneFailureDomain]
assert.True(t, ok)
assert.Equal(t, expectedFailureDomain, failureDomain)

topologyZone, ok := labels[apiv1.LabelTopologyZone]
assert.True(t, ok)
assert.Equal(t, expectedTopologyZone, topologyZone)

azureDiskTopology, ok := labels[azureDiskTopologyKey]
assert.True(t, ok)
assert.Equal(t, expectedAzureDiskTopology, azureDiskTopology)
}

func makeTaintSet(taints []apiv1.Taint) map[apiv1.Taint]bool {
set := make(map[apiv1.Taint]bool)
for _, taint := range taints {
Expand Down

0 comments on commit 8d93fdc

Please sign in to comment.