diff --git a/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_nodegroup_test.go b/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_nodegroup_test.go index 7ef6440f7532..039b71d498fd 100644 --- a/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_nodegroup_test.go +++ b/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_nodegroup_test.go @@ -1330,7 +1330,7 @@ func TestNodeGroupTemplateNodeInfo(t *testing.T) { { name: "When the NodeGroup can scale from zero", nodeGroupAnnotations: map[string]string{ - memoryKey: "2048Mi", + memoryKey: "2048", cpuKey: "2", gpuTypeKey: gpuapis.ResourceNvidiaGPU, gpuCountKey: "1", @@ -1354,10 +1354,36 @@ func TestNodeGroupTemplateNodeInfo(t *testing.T) { }, }, }, + { + name: "When the NodeGroup can scale from zero and the nodegroup adds labels to the Node", + nodeGroupAnnotations: map[string]string{ + memoryKey: "2048", + cpuKey: "2", + }, + config: testCaseConfig{ + expectedErr: nil, + nodegroupLabels: map[string]string{ + "nodeGroupLabel": "value", + "anotherLabel": "anotherValue", + }, + expectedCapacity: map[corev1.ResourceName]int64{ + corev1.ResourceCPU: 2, + corev1.ResourceMemory: 2048 * 1024 * 1024, + corev1.ResourcePods: 110, + }, + expectedNodeLabels: map[string]string{ + "kubernetes.io/os": "linux", + "kubernetes.io/arch": "amd64", + "kubernetes.io/hostname": "random value", + "nodeGroupLabel": "value", + "anotherLabel": "anotherValue", + }, + }, + }, { name: "When the NodeGroup can scale from zero, the label capacity annotations merge with the pre-built node labels and take precedence if the same key is defined in both", nodeGroupAnnotations: map[string]string{ - memoryKey: "2048Mi", + memoryKey: "2048", cpuKey: "2", gpuTypeKey: gpuapis.ResourceNvidiaGPU, gpuCountKey: "1", @@ -1366,8 +1392,9 @@ func TestNodeGroupTemplateNodeInfo(t *testing.T) { config: testCaseConfig{ expectedErr: nil, nodegroupLabels: map[string]string{ - "nodeGroupLabel": "value", - "anotherLabel": "anotherValue", + "nodeGroupLabel": "value", + "anotherLabel": "anotherValue", + "my-custom-label": "not-what-i-want", }, expectedCapacity: map[corev1.ResourceName]int64{ corev1.ResourceCPU: 2, @@ -1388,7 +1415,7 @@ func TestNodeGroupTemplateNodeInfo(t *testing.T) { { name: "When the NodeGroup can scale from zero and the Node still exists, it includes the known node labels", nodeGroupAnnotations: map[string]string{ - memoryKey: "2048Mi", + memoryKey: "2048", cpuKey: "2", }, config: testCaseConfig{ @@ -1449,6 +1476,9 @@ func TestNodeGroupTemplateNodeInfo(t *testing.T) { } return } + if err != nil { + t.Fatalf("unexpected error: %v", err) + } nodeAllocatable := nodeInfo.Node().Status.Allocatable nodeCapacity := nodeInfo.Node().Status.Capacity diff --git a/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_unstructured.go b/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_unstructured.go index ccf049bfc941..142ebc606750 100644 --- a/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_unstructured.go +++ b/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_unstructured.go @@ -31,6 +31,7 @@ import ( "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" "k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/apimachinery/pkg/util/validation" + gpuapis "k8s.io/autoscaler/cluster-autoscaler/utils/gpu" klog "k8s.io/klog/v2" ) @@ -285,9 +286,9 @@ func (r unstructuredScalableResource) InstanceCapacity() (map[corev1.ResourceNam if err != nil { return nil, err } - gpuType := r.InstanceGPUTypeAnnotation() - if !gpuCount.IsZero() && gpuType != "" { - capacityAnnotations[corev1.ResourceName(gpuType)] = gpuCount + if !gpuCount.IsZero() { + // OpenShift does not yet use the gpu-type annotation, and assumes nvidia gpu + capacityAnnotations[gpuapis.ResourceNvidiaGPU] = gpuCount } maxPods, err := r.InstanceMaxPodsCapacityAnnotation() diff --git a/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_unstructured_test.go b/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_unstructured_test.go index dd95096799e4..f620f7ec78e2 100644 --- a/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_unstructured_test.go +++ b/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_unstructured_test.go @@ -27,6 +27,7 @@ import ( "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/autoscaler/cluster-autoscaler/utils/units" "k8s.io/client-go/tools/cache" ) @@ -292,8 +293,8 @@ func TestSetSizeAndReplicas(t *testing.T) { func TestAnnotations(t *testing.T) { cpuQuantity := resource.MustParse("2") - memQuantity := resource.MustParse("1024Mi") diskQuantity := resource.MustParse("100Gi") + memQuantity := resource.MustParse("1024") gpuQuantity := resource.MustParse("1") maxPodsQuantity := resource.MustParse("42") expectedTaints := []v1.Taint{{Key: "key1", Effect: v1.TaintEffectNoSchedule, Value: "value1"}, {Key: "key2", Effect: v1.TaintEffectNoExecute, Value: "value2"}} @@ -307,6 +308,11 @@ func TestAnnotations(t *testing.T) { labelsKey: "key3=value3,key4=value4,key5=value5", } + // convert the initial memory value from Mebibytes to bytes as this conversion happens internally + // when we use InstanceMemoryCapacity() + memVal, _ := memQuantity.AsInt64() + memQuantityAsBytes := resource.NewQuantity(memVal*units.MiB, resource.DecimalSI) + test := func(t *testing.T, testConfig *testConfig, testResource *unstructured.Unstructured) { controller, stop := mustCreateTestController(t, testConfig) defer stop() @@ -324,7 +330,7 @@ func TestAnnotations(t *testing.T) { if mem, err := sr.InstanceMemoryCapacityAnnotation(); err != nil { t.Fatal(err) - } else if memQuantity.Cmp(mem) != 0 { + } else if memQuantityAsBytes.Cmp(mem) != 0 { t.Errorf("expected %v, got %v", memQuantity, mem) } @@ -378,7 +384,7 @@ func TestCanScaleFromZero(t *testing.T) { "can scale from zero", map[string]string{ cpuKey: "1", - memoryKey: "1024Mi", + memoryKey: "1024", }, nil, true, @@ -386,7 +392,7 @@ func TestCanScaleFromZero(t *testing.T) { { "with missing CPU info cannot scale from zero", map[string]string{ - memoryKey: "1024Mi", + memoryKey: "1024", }, nil, false, @@ -434,7 +440,7 @@ func TestCanScaleFromZero(t *testing.T) { "with both annotations and capacity in machine template can scale from zero", map[string]string{ cpuKey: "1", - memoryKey: "1024Mi", + memoryKey: "1024", }, map[string]string{ cpuStatusKey: "1", diff --git a/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_utils.go b/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_utils.go index 44bccb45f3ab..f8855636d734 100644 --- a/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_utils.go +++ b/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_utils.go @@ -29,17 +29,25 @@ import ( "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/autoscaler/cluster-autoscaler/utils/units" ) const ( - cpuKey = "capacity.cluster-autoscaler.kubernetes.io/cpu" - memoryKey = "capacity.cluster-autoscaler.kubernetes.io/memory" + // the following constants are used for scaling from zero + // they are split into two sections to represent the values which have been historically used + // by openshift, and the values which have been added in the upstream. + // we are keeping the historical prefixes "machine.openshift.io" while we develop a solution + // which will allow the usage of either prefix, while preferring the upstream prefix "capacity.clsuter-autoscaler.kuberenetes.io". + cpuKey = "machine.openshift.io/vCPU" + memoryKey = "machine.openshift.io/memoryMb" + gpuCountKey = "machine.openshift.io/GPU" + maxPodsKey = "machine.openshift.io/maxPods" + // the following constants keep the upstream prefix so that we do not introduce separate values into the openshift api diskCapacityKey = "capacity.cluster-autoscaler.kubernetes.io/ephemeral-disk" - gpuTypeKey = "capacity.cluster-autoscaler.kubernetes.io/gpu-type" - gpuCountKey = "capacity.cluster-autoscaler.kubernetes.io/gpu-count" - maxPodsKey = "capacity.cluster-autoscaler.kubernetes.io/maxPods" - taintsKey = "capacity.cluster-autoscaler.kubernetes.io/taints" labelsKey = "capacity.cluster-autoscaler.kubernetes.io/labels" + gpuTypeKey = "capacity.cluster-autoscaler.kubernetes.io/gpu-type" // not currently used on OpenShift + taintsKey = "capacity.cluster-autoscaler.kubernetes.io/taints" // not currently used on OpenShift + // UnknownArch is used if the Architecture is Unknown UnknownArch SystemArchitecture = "" // Amd64 is used if the Architecture is x86_64 @@ -260,7 +268,18 @@ func parseCPUCapacity(annotations map[string]string) (resource.Quantity, error) } func parseMemoryCapacity(annotations map[string]string) (resource.Quantity, error) { - return parseKey(annotations, memoryKey) + // The value for the memoryKey is expected to be an integer representing Mebibytes. e.g. "1024". + // https://www.iec.ch/si/binary.htm + val, exists := annotations[memoryKey] + if exists && val != "" { + valInt, err := strconv.ParseInt(val, 10, 0) + if err != nil { + return zeroQuantity.DeepCopy(), fmt.Errorf("value %q from annotation %q expected to be an integer: %v", val, memoryKey, err) + } + // Convert from Mebibytes to bytes + return *resource.NewQuantity(valInt*units.MiB, resource.DecimalSI), nil + } + return zeroQuantity.DeepCopy(), nil } func parseEphemeralDiskCapacity(annotations map[string]string) (resource.Quantity, error) { diff --git a/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_utils_test.go b/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_utils_test.go index 7f9d2a7843ce..5eaee9338265 100644 --- a/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_utils_test.go +++ b/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_utils_test.go @@ -512,18 +512,18 @@ func TestParseMemoryCapacity(t *testing.T) { }, { description: "quantity as with no unit type", annotations: map[string]string{memoryKey: "1024"}, - expectedQuantity: *resource.NewQuantity(1024, resource.DecimalSI), + expectedQuantity: *resource.NewQuantity(1024*units.MiB, resource.DecimalSI), expectedError: false, }, { description: "quantity with unit type (Mi)", annotations: map[string]string{memoryKey: "456Mi"}, - expectedError: false, - expectedQuantity: *resource.NewQuantity(456*units.MiB, resource.DecimalSI), + expectedQuantity: zeroQuantity.DeepCopy(), + expectedError: true, }, { description: "quantity with unit type (Gi)", annotations: map[string]string{memoryKey: "8Gi"}, - expectedError: false, - expectedQuantity: *resource.NewQuantity(8*units.GiB, resource.DecimalSI), + expectedQuantity: zeroQuantity.DeepCopy(), + expectedError: true, }} { t.Run(tc.description, func(t *testing.T) { got, err := parseMemoryCapacity(tc.annotations)