Skip to content

Commit

Permalink
UPSTREAM: <carry>: improve replica counting on openshift
Browse files Browse the repository at this point in the history
This change adds logic to count the number of owned machines by each
machineset when calculating the replica count to the core autoscaler. It
is needed because the machine-api controllers do not include machines in
deleting phase when updating their replica field. This causes a problem
with the core autoscaler as the count of nodes will not match the
resources from the cloud provider.

This can be removed when the machine-api controllers have been fully
removed from openshift.
  • Loading branch information
elmiko committed Jan 8, 2024
1 parent af43b28 commit 32770e2
Show file tree
Hide file tree
Showing 2 changed files with 63 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ const (
machinePoolProviderIDIndex = "machinePoolProviderIDIndex"
nodeProviderIDIndex = "nodeProviderIDIndex"
defaultCAPIGroup = "cluster.x-k8s.io"
openshiftMAPIGroup = "machine.openshift.io"
// CAPIGroupEnvVar contains the environment variable name which allows overriding defaultCAPIGroup.
CAPIGroupEnvVar = "CAPI_GROUP"
// CAPIVersionEnvVar contains the environment variable name which allows overriding the Cluster API group version.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ package clusterapi

import (
"context"
"encoding/json"
"fmt"
"path"
"strings"
Expand Down Expand Up @@ -94,6 +95,13 @@ func (r unstructuredScalableResource) Replicas() (int, error) {
return 0, err
}

// this function needs to differentiate between machine-api and cluster-api
// due to the fact that the machine-api controllers exclude machines in
// deleting phase when calculating replicas.
if gvr.Group == openshiftMAPIGroup {
return r.replicasOpenshift()
}

s, err := r.controller.managementScaleClient.Scales(r.Namespace()).Get(context.TODO(), gvr.GroupResource(), r.Name(), metav1.GetOptions{})
if err != nil {
return 0, err
Expand All @@ -105,6 +113,60 @@ func (r unstructuredScalableResource) Replicas() (int, error) {
return int(s.Spec.Replicas), nil
}

func (r unstructuredScalableResource) replicasOpenshift() (int, error) {
gvr, err := r.GroupVersionResource()
if err != nil {
return 0, err
}

if gvr.Group != openshiftMAPIGroup {
return 0, fmt.Errorf("incorrect group for replica count on %s %s/%s", r.Kind(), r.Namespace(), r.Name())
}

// get the selector labels from the scalable resource to find the machines
rawSelector, found, err := unstructured.NestedMap(r.unstructured.Object, "spec", "selector")
if !found || err != nil {
return 0, err
}

// we want to massage the unstructured selector data into a LabelSelector struct
// so that we can more easily create the necessary string for the ListOptions struct,
// the following code helps with that.
data, err := json.Marshal(rawSelector)
if err != nil {
return 0, err
}

var labelSelector metav1.LabelSelector
err = json.Unmarshal(data, &labelSelector)
if err != nil {
return 0, err
}

selector, err := metav1.LabelSelectorAsSelector(&labelSelector)
if err != nil {
return 0, err
}

// get a list of machines filtered by the namespace and the selector labels from the scalable resource
machinesList, err := r.controller.managementClient.Resource(r.controller.machineResource).Namespace(r.Namespace()).List(context.TODO(), metav1.ListOptions{LabelSelector: selector.String()})
if err != nil {
return 0, err
}

// filter out inactive machines
var activeMachines []unstructured.Unstructured
for _, item := range machinesList.Items {
if metav1.GetControllerOf(&item) != nil && !metav1.IsControlledBy(&item, r.unstructured) {
continue
}

activeMachines = append(activeMachines, item)
}

return len(activeMachines), nil
}

func (r unstructuredScalableResource) SetSize(nreplicas int) error {
switch {
case nreplicas > r.maxSize:
Expand Down

0 comments on commit 32770e2

Please sign in to comment.