From 9f670d4ea8174b24979efa818adec6a1e09e848f Mon Sep 17 00:00:00 2001 From: Joel Speed Date: Fri, 21 Jan 2022 13:36:36 +0000 Subject: [PATCH] Ensure ClusterAPI DeleteNodes accounts for out of band changes scale Because the autoscaler assumes it can delete nodes in parallel, it fetches nodegroups for each node in separate go routines and then instructs each nodegroup to delete a single node. Because we don't share the nodegroup across go routines, the cached replica count in the scalableresource can become stale and as such, if the autoscaler attempts to scale down multiple nodes at a time, the cluster api provider only actually removes a single node. To prevent this, we must ensure we have a fresh replica count for every scale down attempt. --- .../clusterapi/clusterapi_nodegroup_test.go | 129 ++++++++++++++++++ .../clusterapi/clusterapi_unstructured.go | 16 ++- 2 files changed, 140 insertions(+), 5 deletions(-) diff --git a/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_nodegroup_test.go b/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_nodegroup_test.go index 53721371581d..37059450fc52 100644 --- a/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_nodegroup_test.go +++ b/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_nodegroup_test.go @@ -985,6 +985,135 @@ func TestNodeGroupDeleteNodesTwice(t *testing.T) { }) } +func TestNodeGroupDeleteNodesSequential(t *testing.T) { + // This is the size we expect the NodeGroup to be after we have called DeleteNodes. + // We need at least 8 nodes for this test to be valid. + expectedSize := 7 + + test := func(t *testing.T, testConfig *testConfig) { + controller, stop := mustCreateTestController(t, testConfig) + defer stop() + + nodegroups, err := controller.nodeGroups() + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if l := len(nodegroups); l != 1 { + t.Fatalf("expected 1 nodegroup, got %d", l) + } + + ng := nodegroups[0] + nodeNames, err := ng.Nodes() + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + // Check that the test case is valid before executing DeleteNodes + // 1. We must have at least 1 more node than the expected size otherwise DeleteNodes is a no-op + // 2. MinSize must be less than the expected size otherwise a second call to DeleteNodes may + // not make the nodegroup size less than the expected size. + if len(nodeNames) <= expectedSize { + t.Fatalf("expected more nodes than the expected size: %d <= %d", len(nodeNames), expectedSize) + } + if ng.MinSize() >= expectedSize { + t.Fatalf("expected min size to be less than expected size: %d >= %d", ng.MinSize(), expectedSize) + } + + if len(nodeNames) != len(testConfig.nodes) { + t.Fatalf("expected len=%v, got len=%v", len(testConfig.nodes), len(nodeNames)) + } + + sort.SliceStable(nodeNames, func(i, j int) bool { + return nodeNames[i].Id < nodeNames[j].Id + }) + + for i := 0; i < len(nodeNames); i++ { + if nodeNames[i].Id != testConfig.nodes[i].Spec.ProviderID { + t.Fatalf("expected %q, got %q", testConfig.nodes[i].Spec.ProviderID, nodeNames[i].Id) + } + } + + // These are the nodes which are over the final expectedSize + nodesToBeDeleted := testConfig.nodes[expectedSize:] + + // Assert that we have no DeletionTimestamp + for i := expectedSize; i < len(testConfig.machines); i++ { + if !testConfig.machines[i].GetDeletionTimestamp().IsZero() { + t.Fatalf("unexpected DeletionTimestamp") + } + } + + // When the core autoscaler scales down nodes, it fetches the node group for each node in separate + // go routines and then scales them down individually, we use a lock to ensure the scale down is + // performed sequentially but this means that the cached scalable resource may not be up to date. + // We need to replicate this out of date nature by constructing the node groups then deleting. + nodeToNodeGroup := make(map[*corev1.Node]*nodegroup) + + for _, node := range nodesToBeDeleted { + nodeGroup, err := controller.nodeGroupForNode(node) + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + nodeToNodeGroup[node] = nodeGroup + } + + for node, nodeGroup := range nodeToNodeGroup { + if err := nodeGroup.DeleteNodes([]*corev1.Node{node}); err != nil { + t.Fatalf("unexpected error deleting node: %v", err) + } + } + + nodegroups, err = controller.nodeGroups() + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + ng = nodegroups[0] + + // Check the nodegroup is at the expected size + actualSize, err := ng.TargetSize() + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + if actualSize != expectedSize { + t.Fatalf("expected %d nodes, got %d", expectedSize, actualSize) + } + + gvr, err := ng.scalableResource.GroupVersionResource() + if err != nil { + t.Fatalf("unexpected error: %v", err) + } + + scalableResource, err := ng.machineController.managementScaleClient.Scales(testConfig.spec.namespace). + Get(context.TODO(), gvr.GroupResource(), ng.scalableResource.Name(), metav1.GetOptions{}) + + if scalableResource.Spec.Replicas != int32(expectedSize) { + t.Errorf("expected %v, got %v", expectedSize, scalableResource.Spec.Replicas) + } + } + + // Note: 10 is an upper bound for the number of nodes/replicas + // Going beyond 10 will break the sorting that happens in the + // test() function because sort.Strings() will not do natural + // sorting and the expected semantics in test() will fail. + + t.Run("MachineSet", func(t *testing.T) { + test(t, createMachineSetTestConfig(RandomString(6), RandomString(6), RandomString(6), 10, map[string]string{ + nodeGroupMinSizeAnnotationKey: "1", + nodeGroupMaxSizeAnnotationKey: "10", + })) + }) + + t.Run("MachineDeployment", func(t *testing.T) { + test(t, createMachineDeploymentTestConfig(RandomString(6), RandomString(6), RandomString(6), 10, map[string]string{ + nodeGroupMinSizeAnnotationKey: "1", + nodeGroupMaxSizeAnnotationKey: "10", + })) + }) +} + func TestNodeGroupWithFailedMachine(t *testing.T) { test := func(t *testing.T, testConfig *testConfig) { controller, stop := mustCreateTestController(t, testConfig) diff --git a/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_unstructured.go b/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_unstructured.go index c9b88854d385..488e85f67e40 100644 --- a/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_unstructured.go +++ b/cluster-autoscaler/cloudprovider/clusterapi/clusterapi_unstructured.go @@ -80,14 +80,20 @@ func (r unstructuredScalableResource) ProviderIDs() ([]string, error) { } func (r unstructuredScalableResource) Replicas() (int, error) { - replicas, found, err := unstructured.NestedInt64(r.unstructured.UnstructuredContent(), "spec", "replicas") + gvr, err := r.GroupVersionResource() if err != nil { - return 0, errors.Wrap(err, "error getting replica count") + return 0, err } - if !found { - replicas = 0 + + s, err := r.controller.managementScaleClient.Scales(r.Namespace()).Get(context.TODO(), gvr.GroupResource(), r.Name(), metav1.GetOptions{}) + if err != nil { + return 0, err + } + + if s == nil { + return 0, fmt.Errorf("failed to fetch resource scale: unknown %s %s/%s", r.Kind(), r.Namespace(), r.Name()) } - return int(replicas), nil + return int(s.Spec.Replicas), nil } func (r unstructuredScalableResource) SetSize(nreplicas int) error {