diff --git a/cluster-autoscaler/core/scaledown/actuation/actuator_test.go b/cluster-autoscaler/core/scaledown/actuation/actuator_test.go index 587ce02b9535..1c7b7bc0f50f 100644 --- a/cluster-autoscaler/core/scaledown/actuation/actuator_test.go +++ b/cluster-autoscaler/core/scaledown/actuation/actuator_test.go @@ -538,12 +538,14 @@ func TestStartDeletion(t *testing.T) { wantTaintUpdates: map[string][][]apiv1.Taint{ "drain-node-0": { {toBeDeletedTaint}, + {}, }, "drain-node-1": { {toBeDeletedTaint}, }, "drain-node-2": { {toBeDeletedTaint}, + {}, }, "drain-node-3": { {toBeDeletedTaint}, @@ -623,12 +625,14 @@ func TestStartDeletion(t *testing.T) { }, "empty-node-1": { {toBeDeletedTaint}, + {}, }, "drain-node-0": { {toBeDeletedTaint}, }, "drain-node-1": { {toBeDeletedTaint}, + {}, }, }, wantNodeDeleteResults: map[string]status.NodeDeleteResult{ @@ -911,6 +915,15 @@ func TestStartDeletion(t *testing.T) { t.Errorf("taintUpdates diff (-want +got):\n%s", diff) } + // Wait for all expected deletions to be reported in NodeDeletionTracker. Reporting happens shortly after the deletion + // in cloud provider we sync to above and so this will usually not wait at all. However, it can still happen + // that there is a delay between cloud provider deletion and reporting, in which case the results are not there yet + // and we need to wait for them before asserting. + err = waitForDeletionResultsCount(actuator.nodeDeletionTracker, len(tc.wantNodeDeleteResults), 3*time.Second, 200*time.Millisecond) + if err != nil { + t.Errorf("Timeout while waiting for node deletion results") + } + // Run StartDeletion again to gather node deletion results for deletions started in the previous call, and verify // that they look as expected. gotNextStatus, gotNextErr := actuator.StartDeletion(nil, nil, time.Now()) @@ -1002,3 +1015,16 @@ func generateUtilInfo(cpuUtil, memUtil float64) utilization.Info { Utilization: higherUtilVal, } } + +func waitForDeletionResultsCount(ndt *deletiontracker.NodeDeletionTracker, resultsCount int, timeout, retryTime time.Duration) error { + // This is quite ugly, but shouldn't matter much since in most cases there shouldn't be a need to wait at all, and + // the function should return quickly after the first if check. + // An alternative could be to turn NodeDeletionTracker into an interface, and use an implementation which allows + // synchronizing calls to EndDeletion in the test code. + for retryUntil := time.Now().Add(timeout); time.Now().Before(retryUntil); time.Sleep(retryTime) { + if results, _ := ndt.DeletionResults(); len(results) == resultsCount { + return nil + } + } + return fmt.Errorf("timed out while waiting for node deletion results") +}