Skip to content

Commit

Permalink
Merge pull request #6066 from aleskandro/fix-default-label-arch
Browse files Browse the repository at this point in the history
The autoscaler does not scale node groups on non-amd64 clusters when pods explicitly require non-amd64 nodes in node affinity
  • Loading branch information
k8s-ci-robot authored Oct 19, 2023
2 parents 4103407 + 54d3a4c commit 8c35e0a
Show file tree
Hide file tree
Showing 4 changed files with 154 additions and 1 deletion.
11 changes: 11 additions & 0 deletions cluster-autoscaler/cloudprovider/clusterapi/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ cluster.
* [Scale from zero support](#scale-from-zero-support)
* [RBAC changes for scaling from zero](#rbac-changes-for-scaling-from-zero)
* [Pre-defined labels and taints on nodes scaled from zero](#pre-defined-labels-and-taints-on-nodes-scaled-from-zero)
* [CPU Architecture awareness for single-arch clusters](#cpu-architecture-awareness-for-single-arch-clusters)
* [Specifying a Custom Resource Group](#specifying-a-custom-resource-group)
* [Specifying a Custom Resource Version](#specifying-a-custom-resource-version)
* [Sample manifest](#sample-manifest)
Expand Down Expand Up @@ -276,6 +277,16 @@ metadata:
capacity.cluster-autoscaler.kubernetes.io/taints: "key1=value1:NoSchedule,key2=value2:NoExecute"
```

#### CPU Architecture awareness for single-arch clusters

Users of single-arch non-amd64 clusters who are using scale from zero
support should also set the `CAPI_SCALE_ZERO_DEFAULT_ARCH` environment variable
to set the architecture of the nodes they want to default the node group templates to.
The autoscaler will default to `amd64` if it is not set, and the node
group templates may not match the nodes' architecture, specifically when
the workload triggering the scale-up uses a node affinity predicate checking
for the node's architecture.

## Specifying a Custom Resource Group

By default all Kubernetes resources consumed by the Cluster API provider will
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -369,7 +369,7 @@ func buildGenericLabels(nodeName string) map[string]string {
// TODO revisit this function and add an explanation about what these
// labels are used for, or remove them if not necessary
m := make(map[string]string)
m[corev1.LabelArchStable] = cloudprovider.DefaultArch
m[corev1.LabelArchStable] = GetDefaultScaleFromZeroArchitecture().Name()

m[corev1.LabelOSStable] = cloudprovider.DefaultOS

Expand Down
66 changes: 66 additions & 0 deletions cluster-autoscaler/cloudprovider/clusterapi/clusterapi_utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,11 @@ package clusterapi

import (
"fmt"
"k8s.io/klog/v2"
"os"
"strconv"
"strings"
"sync"

"github.com/pkg/errors"
"k8s.io/apimachinery/pkg/api/resource"
Expand All @@ -36,6 +39,20 @@ const (
maxPodsKey = "capacity.cluster-autoscaler.kubernetes.io/maxPods"
taintsKey = "capacity.cluster-autoscaler.kubernetes.io/taints"
labelsKey = "capacity.cluster-autoscaler.kubernetes.io/labels"
// UnknownArch is used if the Architecture is Unknown
UnknownArch SystemArchitecture = ""
// Amd64 is used if the Architecture is x86_64
Amd64 SystemArchitecture = "amd64"
// Arm64 is used if the Architecture is ARM64
Arm64 SystemArchitecture = "arm64"
// Ppc64le is used if the Architecture is ppc64le
Ppc64le SystemArchitecture = "ppc64le"
// S390x is used if the Architecture is s390x
S390x SystemArchitecture = "s390x"
// DefaultArch should be used as a fallback if not passed by the environment via the --scale-up-from-zero-default-arch
DefaultArch = Amd64
// scaleUpFromZeroDefaultEnvVar is the name of the env var for the default architecture
scaleUpFromZeroDefaultArchEnvVar = "CAPI_SCALE_ZERO_DEFAULT_ARCH"
)

var (
Expand Down Expand Up @@ -79,10 +96,25 @@ var (
nodeGroupMinSizeAnnotationKey = getNodeGroupMinSizeAnnotationKey()
nodeGroupMaxSizeAnnotationKey = getNodeGroupMaxSizeAnnotationKey()
zeroQuantity = resource.MustParse("0")

systemArchitecture *SystemArchitecture
once sync.Once
)

type normalizedProviderID string

// SystemArchitecture represents a CPU architecture (e.g., amd64, arm64, ppc64le, s390x).
// It is used to determine the default architecture to use when building the nodes templates for scaling up from zero
// by some cloud providers. This code is the same as the GCE implementation at
// https://github.com/kubernetes/autoscaler/blob/3852f352d96b8763292a9122163c1152dfedec55/cluster-autoscaler/cloudprovider/gce/templates.go#L611-L657
// which is kept to allow for a smooth transition to this package, once the GCE team is ready to use it.
type SystemArchitecture string

// Name returns the string value for SystemArchitecture
func (s SystemArchitecture) Name() string {
return string(s)
}

// minSize returns the minimum value encoded in the annotations keyed
// by nodeGroupMinSizeAnnotationKey. Returns errMissingMinAnnotation
// if the annotation doesn't exist or errInvalidMinAnnotation if the
Expand Down Expand Up @@ -279,3 +311,37 @@ func getClusterNameLabel() string {
key := fmt.Sprintf("%s/cluster-name", getCAPIGroup())
return key
}

// SystemArchitectureFromString parses a string to SystemArchitecture. Returns UnknownArch if the string doesn't represent a
// valid architecture.
func SystemArchitectureFromString(arch string) SystemArchitecture {
switch arch {
case string(Arm64):
return Arm64
case string(Amd64):
return Amd64
case string(Ppc64le):
return Ppc64le
case string(S390x):
return S390x
default:
return UnknownArch
}
}

// GetDefaultScaleFromZeroArchitecture returns the SystemArchitecture from the environment variable
// CAPI_SCALE_ZERO_DEFAULT_ARCH or DefaultArch if the variable is set to an invalid value.
func GetDefaultScaleFromZeroArchitecture() SystemArchitecture {
once.Do(func() {
archStr := os.Getenv(scaleUpFromZeroDefaultArchEnvVar)
arch := SystemArchitectureFromString(archStr)
klog.V(5).Infof("the default scale from zero architecture value is set to %s (%s)", scaleUpFromZeroDefaultArchEnvVar, archStr, arch.Name())
if arch == UnknownArch {
arch = DefaultArch
klog.Errorf("Unrecognized architecture '%s', falling back to %s",
scaleUpFromZeroDefaultArchEnvVar, DefaultArch.Name())
}
systemArchitecture = &arch
})
return *systemArchitecture
}
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,10 @@ package clusterapi

import (
"fmt"
"github.com/google/go-cmp/cmp"
"reflect"
"strings"
"sync"
"testing"

"k8s.io/apimachinery/pkg/api/resource"
Expand Down Expand Up @@ -853,3 +855,77 @@ func Test_getKeyHelpers(t *testing.T) {
})
}
}

func TestSystemArchitectureFromString(t *testing.T) {
tcs := []struct {
name string
archName string
wantArch SystemArchitecture
}{
{
name: "valid architecture is converted",
archName: "amd64",
wantArch: Amd64,
},
{
name: "invalid architecture results in UnknownArchitecture",
archName: "some-arch",
wantArch: UnknownArch,
},
}
for _, tc := range tcs {
t.Run(tc.name, func(t *testing.T) {
gotArch := SystemArchitectureFromString(tc.archName)
if diff := cmp.Diff(tc.wantArch, gotArch); diff != "" {
t.Errorf("ToSystemArchitecture diff (-want +got):\n%s", diff)
}
})
}
}

func TestGetSystemArchitectureFromEnvOrDefault(t *testing.T) {
amd64 := Amd64.Name()
arm64 := Arm64.Name()
wrongValue := "wrong"

tcs := []struct {
name string
envValue *string
want SystemArchitecture
}{
{
name: fmt.Sprintf("%s is set to arm64", scaleUpFromZeroDefaultArchEnvVar),
envValue: &arm64,
want: Arm64,
},
{
name: fmt.Sprintf("%s is set to amd64", scaleUpFromZeroDefaultArchEnvVar),
envValue: &amd64,
want: Amd64,
},
{
name: fmt.Sprintf("%s is not set", scaleUpFromZeroDefaultArchEnvVar),
envValue: nil,
want: DefaultArch,
},
{
name: fmt.Sprintf("%s is set to a wrong value", scaleUpFromZeroDefaultArchEnvVar),
envValue: &wrongValue,
want: DefaultArch,
},
}
for _, tc := range tcs {
t.Run(tc.name, func(t *testing.T) {
// Reset the systemArchitecture variable to nil before each test due to the lazy initialization of the variable.
systemArchitecture = nil
// Reset the once variable to its initial state before each test.
once = sync.Once{}
if tc.envValue != nil {
t.Setenv(scaleUpFromZeroDefaultArchEnvVar, *tc.envValue)
}
if got := GetDefaultScaleFromZeroArchitecture(); got != tc.want {
t.Errorf("GetDefaultScaleFromZeroArchitecture() = %v, want %v", got, tc.want)
}
})
}
}

0 comments on commit 8c35e0a

Please sign in to comment.