UPSTREAM: <carry>: revert capacity annotations

the upstream annotations for the scale from zero capacity resources is slighty different than the openshift implementation. the largest difference is the addition of a gpu type annotation. openshift does not yet utilize this annotation and thus this patch should be carried until the machineset controllers for the various providers on openshift have been modified to use the new annotations. another important change is the modification of the memory annotation. previously in openshift we expected this value to be a count of memory in Mebibytes. the conversion function and tests have been modified to allow continued openshift operation. this change can be dropped when the annotations in openshift have been updated, the progress for this effort can be followed at https://issues.redhat.com/browse/OCPCLOUD-944
openshift · Oct 14, 2024 · 43224a9 · 43224a9
1 parent 6462123
commit 43224a9
Show file tree

Hide file tree

Showing 5 changed files with 81 additions and 25 deletions.
diff --git a/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_nodegroup_test.go b/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_nodegroup_test.go
@@ -1330,7 +1330,7 @@ func TestNodeGroupTemplateNodeInfo(t *testing.T) {
 		{
 			name: "When the NodeGroup can scale from zero",
 			nodeGroupAnnotations: map[string]string{
-				memoryKey:   "2048Mi",
+				memoryKey:   "2048",
 				cpuKey:      "2",
 				gpuTypeKey:  gpuapis.ResourceNvidiaGPU,
 				gpuCountKey: "1",
@@ -1354,10 +1354,36 @@ func TestNodeGroupTemplateNodeInfo(t *testing.T) {
 				},
 			},
 		},
+		{
+			name: "When the NodeGroup can scale from zero and the nodegroup adds labels to the Node",
+			nodeGroupAnnotations: map[string]string{
+				memoryKey: "2048",
+				cpuKey:    "2",
+			},
+			config: testCaseConfig{
+				expectedErr: nil,
+				nodegroupLabels: map[string]string{
+					"nodeGroupLabel": "value",
+					"anotherLabel":   "anotherValue",
+				},
+				expectedCapacity: map[corev1.ResourceName]int64{
+					corev1.ResourceCPU:    2,
+					corev1.ResourceMemory: 2048 * 1024 * 1024,
+					corev1.ResourcePods:   110,
+				},
+				expectedNodeLabels: map[string]string{
+					"kubernetes.io/os":       "linux",
+					"kubernetes.io/arch":     "amd64",
+					"kubernetes.io/hostname": "random value",
+					"nodeGroupLabel":         "value",
+					"anotherLabel":           "anotherValue",
+				},
+			},
+		},
 		{
 			name: "When the NodeGroup can scale from zero, the label capacity annotations merge with the pre-built node labels and take precedence if the same key is defined in both",
 			nodeGroupAnnotations: map[string]string{
-				memoryKey:   "2048Mi",
+				memoryKey:   "2048",
 				cpuKey:      "2",
 				gpuTypeKey:  gpuapis.ResourceNvidiaGPU,
 				gpuCountKey: "1",
@@ -1366,8 +1392,9 @@ func TestNodeGroupTemplateNodeInfo(t *testing.T) {
 			config: testCaseConfig{
 				expectedErr: nil,
 				nodegroupLabels: map[string]string{
-					"nodeGroupLabel": "value",
-					"anotherLabel":   "anotherValue",
+					"nodeGroupLabel":  "value",
+					"anotherLabel":    "anotherValue",
+					"my-custom-label": "not-what-i-want",
 				},
 				expectedCapacity: map[corev1.ResourceName]int64{
 					corev1.ResourceCPU:        2,
@@ -1388,7 +1415,7 @@ func TestNodeGroupTemplateNodeInfo(t *testing.T) {
 		{
 			name: "When the NodeGroup can scale from zero and the Node still exists, it includes the known node labels",
 			nodeGroupAnnotations: map[string]string{
-				memoryKey: "2048Mi",
+				memoryKey: "2048",
 				cpuKey:    "2",
 			},
 			config: testCaseConfig{
@@ -1449,6 +1476,9 @@ func TestNodeGroupTemplateNodeInfo(t *testing.T) {
 			}
 			return
 		}
+		if err != nil {
+			t.Fatalf("unexpected error: %v", err)
+		}
 
 		nodeAllocatable := nodeInfo.Node().Status.Allocatable
 		nodeCapacity := nodeInfo.Node().Status.Capacity

diff --git a/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_unstructured.go b/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_unstructured.go
@@ -31,6 +31,7 @@ import (
 	"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
 	"k8s.io/apimachinery/pkg/runtime/schema"
 	"k8s.io/apimachinery/pkg/util/validation"
+	gpuapis "k8s.io/autoscaler/cluster-autoscaler/utils/gpu"
 	klog "k8s.io/klog/v2"
 )
 
@@ -285,9 +286,9 @@ func (r unstructuredScalableResource) InstanceCapacity() (map[corev1.ResourceNam
 	if err != nil {
 		return nil, err
 	}
-	gpuType := r.InstanceGPUTypeAnnotation()
-	if !gpuCount.IsZero() && gpuType != "" {
-		capacityAnnotations[corev1.ResourceName(gpuType)] = gpuCount
+	if !gpuCount.IsZero() {
+		// OpenShift does not yet use the gpu-type annotation, and assumes nvidia gpu
+		capacityAnnotations[gpuapis.ResourceNvidiaGPU] = gpuCount
 	}
 
 	maxPods, err := r.InstanceMaxPodsCapacityAnnotation()

diff --git a/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_unstructured_test.go b/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_unstructured_test.go
@@ -27,6 +27,7 @@ import (
 	"k8s.io/apimachinery/pkg/api/resource"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
+	"k8s.io/autoscaler/cluster-autoscaler/utils/units"
 	"k8s.io/client-go/tools/cache"
 )
 
@@ -292,8 +293,8 @@ func TestSetSizeAndReplicas(t *testing.T) {
 
 func TestAnnotations(t *testing.T) {
 	cpuQuantity := resource.MustParse("2")
-	memQuantity := resource.MustParse("1024Mi")
 	diskQuantity := resource.MustParse("100Gi")
+	memQuantity := resource.MustParse("1024")
 	gpuQuantity := resource.MustParse("1")
 	maxPodsQuantity := resource.MustParse("42")
 	expectedTaints := []v1.Taint{{Key: "key1", Effect: v1.TaintEffectNoSchedule, Value: "value1"}, {Key: "key2", Effect: v1.TaintEffectNoExecute, Value: "value2"}}
@@ -307,6 +308,11 @@ func TestAnnotations(t *testing.T) {
 		labelsKey:       "key3=value3,key4=value4,key5=value5",
 	}
 
+	// convert the initial memory value from Mebibytes to bytes as this conversion happens internally
+	// when we use InstanceMemoryCapacity()
+	memVal, _ := memQuantity.AsInt64()
+	memQuantityAsBytes := resource.NewQuantity(memVal*units.MiB, resource.DecimalSI)
+
 	test := func(t *testing.T, testConfig *testConfig, testResource *unstructured.Unstructured) {
 		controller, stop := mustCreateTestController(t, testConfig)
 		defer stop()
@@ -324,7 +330,7 @@ func TestAnnotations(t *testing.T) {
 
 		if mem, err := sr.InstanceMemoryCapacityAnnotation(); err != nil {
 			t.Fatal(err)
-		} else if memQuantity.Cmp(mem) != 0 {
+		} else if memQuantityAsBytes.Cmp(mem) != 0 {
 			t.Errorf("expected %v, got %v", memQuantity, mem)
 		}
 
@@ -378,15 +384,15 @@ func TestCanScaleFromZero(t *testing.T) {
 			"can scale from zero",
 			map[string]string{
 				cpuKey:    "1",
-				memoryKey: "1024Mi",
+				memoryKey: "1024",
 			},
 			nil,
 			true,
 		},
 		{
 			"with missing CPU info cannot scale from zero",
 			map[string]string{
-				memoryKey: "1024Mi",
+				memoryKey: "1024",
 			},
 			nil,
 			false,
@@ -434,7 +440,7 @@ func TestCanScaleFromZero(t *testing.T) {
 			"with both annotations and capacity in machine template can scale from zero",
 			map[string]string{
 				cpuKey:    "1",
-				memoryKey: "1024Mi",
+				memoryKey: "1024",
 			},
 			map[string]string{
 				cpuStatusKey:    "1",

diff --git a/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_utils.go b/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_utils.go
@@ -29,17 +29,25 @@ import (
 	"k8s.io/apimachinery/pkg/api/resource"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
+	"k8s.io/autoscaler/cluster-autoscaler/utils/units"
 )
 
 const (
-	cpuKey          = "capacity.cluster-autoscaler.kubernetes.io/cpu"
-	memoryKey       = "capacity.cluster-autoscaler.kubernetes.io/memory"
+	// the following constants are used for scaling from zero
+	// they are split into two sections to represent the values which have been historically used
+	// by openshift, and the values which have been added in the upstream.
+	// we are keeping the historical prefixes "machine.openshift.io" while we develop a solution
+	// which will allow the usage of either prefix, while preferring the upstream prefix "capacity.clsuter-autoscaler.kuberenetes.io".
+	cpuKey      = "machine.openshift.io/vCPU"
+	memoryKey   = "machine.openshift.io/memoryMb"
+	gpuCountKey = "machine.openshift.io/GPU"
+	maxPodsKey  = "machine.openshift.io/maxPods"
+	// the following constants keep the upstream prefix so that we do not introduce separate values into the openshift api
 	diskCapacityKey = "capacity.cluster-autoscaler.kubernetes.io/ephemeral-disk"
-	gpuTypeKey      = "capacity.cluster-autoscaler.kubernetes.io/gpu-type"
-	gpuCountKey     = "capacity.cluster-autoscaler.kubernetes.io/gpu-count"
-	maxPodsKey      = "capacity.cluster-autoscaler.kubernetes.io/maxPods"
-	taintsKey       = "capacity.cluster-autoscaler.kubernetes.io/taints"
 	labelsKey       = "capacity.cluster-autoscaler.kubernetes.io/labels"
+	gpuTypeKey      = "capacity.cluster-autoscaler.kubernetes.io/gpu-type" // not currently used on OpenShift
+	taintsKey       = "capacity.cluster-autoscaler.kubernetes.io/taints"   // not currently used on OpenShift
+
 	// UnknownArch is used if the Architecture is Unknown
 	UnknownArch SystemArchitecture = ""
 	// Amd64 is used if the Architecture is x86_64
@@ -260,7 +268,18 @@ func parseCPUCapacity(annotations map[string]string) (resource.Quantity, error)
 }
 
 func parseMemoryCapacity(annotations map[string]string) (resource.Quantity, error) {
-	return parseKey(annotations, memoryKey)
+	// The value for the memoryKey is expected to be an integer representing Mebibytes. e.g. "1024".
+	// https://www.iec.ch/si/binary.htm
+	val, exists := annotations[memoryKey]
+	if exists && val != "" {
+		valInt, err := strconv.ParseInt(val, 10, 0)
+		if err != nil {
+			return zeroQuantity.DeepCopy(), fmt.Errorf("value %q from annotation %q expected to be an integer: %v", val, memoryKey, err)
+		}
+		// Convert from Mebibytes to bytes
+		return *resource.NewQuantity(valInt*units.MiB, resource.DecimalSI), nil
+	}
+	return zeroQuantity.DeepCopy(), nil
 }
 
 func parseEphemeralDiskCapacity(annotations map[string]string) (resource.Quantity, error) {

diff --git a/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_utils_test.go b/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_utils_test.go
@@ -512,18 +512,18 @@ func TestParseMemoryCapacity(t *testing.T) {
 	}, {
 		description:      "quantity as with no unit type",
 		annotations:      map[string]string{memoryKey: "1024"},
-		expectedQuantity: *resource.NewQuantity(1024, resource.DecimalSI),
+		expectedQuantity: *resource.NewQuantity(1024*units.MiB, resource.DecimalSI),
 		expectedError:    false,
 	}, {
 		description:      "quantity with unit type (Mi)",
 		annotations:      map[string]string{memoryKey: "456Mi"},
-		expectedError:    false,
-		expectedQuantity: *resource.NewQuantity(456*units.MiB, resource.DecimalSI),
+		expectedQuantity: zeroQuantity.DeepCopy(),
+		expectedError:    true,
 	}, {
 		description:      "quantity with unit type (Gi)",
 		annotations:      map[string]string{memoryKey: "8Gi"},
-		expectedError:    false,
-		expectedQuantity: *resource.NewQuantity(8*units.GiB, resource.DecimalSI),
+		expectedQuantity: zeroQuantity.DeepCopy(),
+		expectedError:    true,
 	}} {
 		t.Run(tc.description, func(t *testing.T) {
 			got, err := parseMemoryCapacity(tc.annotations)