Skip to content

Commit

Permalink
UPSTREAM: <carry>: revert capacity annotations
Browse files Browse the repository at this point in the history
the upstream annotations for the scale from zero capacity resources is
slighty different than the openshift implementation. the largest
difference is the addition of a gpu type annotation. openshift does not
yet utilize this annotation and thus this patch should be carried until
the machineset controllers for the various providers on openshift have
been modified to use the new annotations.

another important change is the modification of the memory annotation.
previously in openshift we expected this value to be a count of memory
in Mebibytes. the conversion function and tests have been modified to
allow continued openshift operation.

this change can be dropped when the annotations in openshift have been
updated, the progress for this effort can be followed at
https://issues.redhat.com/browse/OCPCLOUD-944
  • Loading branch information
elmiko committed Oct 14, 2024
1 parent 6462123 commit 43224a9
Show file tree
Hide file tree
Showing 5 changed files with 81 additions and 25 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -1330,7 +1330,7 @@ func TestNodeGroupTemplateNodeInfo(t *testing.T) {
{
name: "When the NodeGroup can scale from zero",
nodeGroupAnnotations: map[string]string{
memoryKey: "2048Mi",
memoryKey: "2048",
cpuKey: "2",
gpuTypeKey: gpuapis.ResourceNvidiaGPU,
gpuCountKey: "1",
Expand All @@ -1354,10 +1354,36 @@ func TestNodeGroupTemplateNodeInfo(t *testing.T) {
},
},
},
{
name: "When the NodeGroup can scale from zero and the nodegroup adds labels to the Node",
nodeGroupAnnotations: map[string]string{
memoryKey: "2048",
cpuKey: "2",
},
config: testCaseConfig{
expectedErr: nil,
nodegroupLabels: map[string]string{
"nodeGroupLabel": "value",
"anotherLabel": "anotherValue",
},
expectedCapacity: map[corev1.ResourceName]int64{
corev1.ResourceCPU: 2,
corev1.ResourceMemory: 2048 * 1024 * 1024,
corev1.ResourcePods: 110,
},
expectedNodeLabels: map[string]string{
"kubernetes.io/os": "linux",
"kubernetes.io/arch": "amd64",
"kubernetes.io/hostname": "random value",
"nodeGroupLabel": "value",
"anotherLabel": "anotherValue",
},
},
},
{
name: "When the NodeGroup can scale from zero, the label capacity annotations merge with the pre-built node labels and take precedence if the same key is defined in both",
nodeGroupAnnotations: map[string]string{
memoryKey: "2048Mi",
memoryKey: "2048",
cpuKey: "2",
gpuTypeKey: gpuapis.ResourceNvidiaGPU,
gpuCountKey: "1",
Expand All @@ -1366,8 +1392,9 @@ func TestNodeGroupTemplateNodeInfo(t *testing.T) {
config: testCaseConfig{
expectedErr: nil,
nodegroupLabels: map[string]string{
"nodeGroupLabel": "value",
"anotherLabel": "anotherValue",
"nodeGroupLabel": "value",
"anotherLabel": "anotherValue",
"my-custom-label": "not-what-i-want",
},
expectedCapacity: map[corev1.ResourceName]int64{
corev1.ResourceCPU: 2,
Expand All @@ -1388,7 +1415,7 @@ func TestNodeGroupTemplateNodeInfo(t *testing.T) {
{
name: "When the NodeGroup can scale from zero and the Node still exists, it includes the known node labels",
nodeGroupAnnotations: map[string]string{
memoryKey: "2048Mi",
memoryKey: "2048",
cpuKey: "2",
},
config: testCaseConfig{
Expand Down Expand Up @@ -1449,6 +1476,9 @@ func TestNodeGroupTemplateNodeInfo(t *testing.T) {
}
return
}
if err != nil {
t.Fatalf("unexpected error: %v", err)
}

nodeAllocatable := nodeInfo.Node().Status.Allocatable
nodeCapacity := nodeInfo.Node().Status.Capacity
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ import (
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
"k8s.io/apimachinery/pkg/runtime/schema"
"k8s.io/apimachinery/pkg/util/validation"
gpuapis "k8s.io/autoscaler/cluster-autoscaler/utils/gpu"
klog "k8s.io/klog/v2"
)

Expand Down Expand Up @@ -285,9 +286,9 @@ func (r unstructuredScalableResource) InstanceCapacity() (map[corev1.ResourceNam
if err != nil {
return nil, err
}
gpuType := r.InstanceGPUTypeAnnotation()
if !gpuCount.IsZero() && gpuType != "" {
capacityAnnotations[corev1.ResourceName(gpuType)] = gpuCount
if !gpuCount.IsZero() {
// OpenShift does not yet use the gpu-type annotation, and assumes nvidia gpu
capacityAnnotations[gpuapis.ResourceNvidiaGPU] = gpuCount
}

maxPods, err := r.InstanceMaxPodsCapacityAnnotation()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ import (
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
"k8s.io/autoscaler/cluster-autoscaler/utils/units"
"k8s.io/client-go/tools/cache"
)

Expand Down Expand Up @@ -292,8 +293,8 @@ func TestSetSizeAndReplicas(t *testing.T) {

func TestAnnotations(t *testing.T) {
cpuQuantity := resource.MustParse("2")
memQuantity := resource.MustParse("1024Mi")
diskQuantity := resource.MustParse("100Gi")
memQuantity := resource.MustParse("1024")
gpuQuantity := resource.MustParse("1")
maxPodsQuantity := resource.MustParse("42")
expectedTaints := []v1.Taint{{Key: "key1", Effect: v1.TaintEffectNoSchedule, Value: "value1"}, {Key: "key2", Effect: v1.TaintEffectNoExecute, Value: "value2"}}
Expand All @@ -307,6 +308,11 @@ func TestAnnotations(t *testing.T) {
labelsKey: "key3=value3,key4=value4,key5=value5",
}

// convert the initial memory value from Mebibytes to bytes as this conversion happens internally
// when we use InstanceMemoryCapacity()
memVal, _ := memQuantity.AsInt64()
memQuantityAsBytes := resource.NewQuantity(memVal*units.MiB, resource.DecimalSI)

test := func(t *testing.T, testConfig *testConfig, testResource *unstructured.Unstructured) {
controller, stop := mustCreateTestController(t, testConfig)
defer stop()
Expand All @@ -324,7 +330,7 @@ func TestAnnotations(t *testing.T) {

if mem, err := sr.InstanceMemoryCapacityAnnotation(); err != nil {
t.Fatal(err)
} else if memQuantity.Cmp(mem) != 0 {
} else if memQuantityAsBytes.Cmp(mem) != 0 {
t.Errorf("expected %v, got %v", memQuantity, mem)
}

Expand Down Expand Up @@ -378,15 +384,15 @@ func TestCanScaleFromZero(t *testing.T) {
"can scale from zero",
map[string]string{
cpuKey: "1",
memoryKey: "1024Mi",
memoryKey: "1024",
},
nil,
true,
},
{
"with missing CPU info cannot scale from zero",
map[string]string{
memoryKey: "1024Mi",
memoryKey: "1024",
},
nil,
false,
Expand Down Expand Up @@ -434,7 +440,7 @@ func TestCanScaleFromZero(t *testing.T) {
"with both annotations and capacity in machine template can scale from zero",
map[string]string{
cpuKey: "1",
memoryKey: "1024Mi",
memoryKey: "1024",
},
map[string]string{
cpuStatusKey: "1",
Expand Down
33 changes: 26 additions & 7 deletions cluster-autoscaler/cloudprovider/clusterapi/clusterapi_utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,17 +29,25 @@ import (
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
"k8s.io/autoscaler/cluster-autoscaler/utils/units"
)

const (
cpuKey = "capacity.cluster-autoscaler.kubernetes.io/cpu"
memoryKey = "capacity.cluster-autoscaler.kubernetes.io/memory"
// the following constants are used for scaling from zero
// they are split into two sections to represent the values which have been historically used
// by openshift, and the values which have been added in the upstream.
// we are keeping the historical prefixes "machine.openshift.io" while we develop a solution
// which will allow the usage of either prefix, while preferring the upstream prefix "capacity.clsuter-autoscaler.kuberenetes.io".
cpuKey = "machine.openshift.io/vCPU"
memoryKey = "machine.openshift.io/memoryMb"
gpuCountKey = "machine.openshift.io/GPU"
maxPodsKey = "machine.openshift.io/maxPods"
// the following constants keep the upstream prefix so that we do not introduce separate values into the openshift api
diskCapacityKey = "capacity.cluster-autoscaler.kubernetes.io/ephemeral-disk"
gpuTypeKey = "capacity.cluster-autoscaler.kubernetes.io/gpu-type"
gpuCountKey = "capacity.cluster-autoscaler.kubernetes.io/gpu-count"
maxPodsKey = "capacity.cluster-autoscaler.kubernetes.io/maxPods"
taintsKey = "capacity.cluster-autoscaler.kubernetes.io/taints"
labelsKey = "capacity.cluster-autoscaler.kubernetes.io/labels"
gpuTypeKey = "capacity.cluster-autoscaler.kubernetes.io/gpu-type" // not currently used on OpenShift
taintsKey = "capacity.cluster-autoscaler.kubernetes.io/taints" // not currently used on OpenShift

// UnknownArch is used if the Architecture is Unknown
UnknownArch SystemArchitecture = ""
// Amd64 is used if the Architecture is x86_64
Expand Down Expand Up @@ -260,7 +268,18 @@ func parseCPUCapacity(annotations map[string]string) (resource.Quantity, error)
}

func parseMemoryCapacity(annotations map[string]string) (resource.Quantity, error) {
return parseKey(annotations, memoryKey)
// The value for the memoryKey is expected to be an integer representing Mebibytes. e.g. "1024".
// https://www.iec.ch/si/binary.htm
val, exists := annotations[memoryKey]
if exists && val != "" {
valInt, err := strconv.ParseInt(val, 10, 0)
if err != nil {
return zeroQuantity.DeepCopy(), fmt.Errorf("value %q from annotation %q expected to be an integer: %v", val, memoryKey, err)
}
// Convert from Mebibytes to bytes
return *resource.NewQuantity(valInt*units.MiB, resource.DecimalSI), nil
}
return zeroQuantity.DeepCopy(), nil
}

func parseEphemeralDiskCapacity(annotations map[string]string) (resource.Quantity, error) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -512,18 +512,18 @@ func TestParseMemoryCapacity(t *testing.T) {
}, {
description: "quantity as with no unit type",
annotations: map[string]string{memoryKey: "1024"},
expectedQuantity: *resource.NewQuantity(1024, resource.DecimalSI),
expectedQuantity: *resource.NewQuantity(1024*units.MiB, resource.DecimalSI),
expectedError: false,
}, {
description: "quantity with unit type (Mi)",
annotations: map[string]string{memoryKey: "456Mi"},
expectedError: false,
expectedQuantity: *resource.NewQuantity(456*units.MiB, resource.DecimalSI),
expectedQuantity: zeroQuantity.DeepCopy(),
expectedError: true,
}, {
description: "quantity with unit type (Gi)",
annotations: map[string]string{memoryKey: "8Gi"},
expectedError: false,
expectedQuantity: *resource.NewQuantity(8*units.GiB, resource.DecimalSI),
expectedQuantity: zeroQuantity.DeepCopy(),
expectedError: true,
}} {
t.Run(tc.description, func(t *testing.T) {
got, err := parseMemoryCapacity(tc.annotations)
Expand Down

0 comments on commit 43224a9

Please sign in to comment.