Skip to content

Commit

Permalink
Merge pull request kubernetes#4948 from towca/jtuznik/act-fix-4
Browse files Browse the repository at this point in the history
CA: fix flakiness in actuation.TestStartDeletion
k8s-ci-robot authored Jun 6, 2022

Verified

This commit was signed with the committer’s verified signature.
gsmet Guillaume Smet
2 parents 82bc463 + c707c53 commit 0e89350
Showing 1 changed file with 26 additions and 0 deletions.
26 changes: 26 additions & 0 deletions cluster-autoscaler/core/scaledown/actuation/actuator_test.go
Original file line number Diff line number Diff line change
@@ -538,12 +538,14 @@ func TestStartDeletion(t *testing.T) {
wantTaintUpdates: map[string][][]apiv1.Taint{
"drain-node-0": {
{toBeDeletedTaint},
{},
},
"drain-node-1": {
{toBeDeletedTaint},
},
"drain-node-2": {
{toBeDeletedTaint},
{},
},
"drain-node-3": {
{toBeDeletedTaint},
@@ -623,12 +625,14 @@ func TestStartDeletion(t *testing.T) {
},
"empty-node-1": {
{toBeDeletedTaint},
{},
},
"drain-node-0": {
{toBeDeletedTaint},
},
"drain-node-1": {
{toBeDeletedTaint},
{},
},
},
wantNodeDeleteResults: map[string]status.NodeDeleteResult{
@@ -911,6 +915,15 @@ func TestStartDeletion(t *testing.T) {
t.Errorf("taintUpdates diff (-want +got):\n%s", diff)
}

// Wait for all expected deletions to be reported in NodeDeletionTracker. Reporting happens shortly after the deletion
// in cloud provider we sync to above and so this will usually not wait at all. However, it can still happen
// that there is a delay between cloud provider deletion and reporting, in which case the results are not there yet
// and we need to wait for them before asserting.
err = waitForDeletionResultsCount(actuator.nodeDeletionTracker, len(tc.wantNodeDeleteResults), 3*time.Second, 200*time.Millisecond)
if err != nil {
t.Errorf("Timeout while waiting for node deletion results")
}

// Run StartDeletion again to gather node deletion results for deletions started in the previous call, and verify
// that they look as expected.
gotNextStatus, gotNextErr := actuator.StartDeletion(nil, nil, time.Now())
@@ -1002,3 +1015,16 @@ func generateUtilInfo(cpuUtil, memUtil float64) utilization.Info {
Utilization: higherUtilVal,
}
}

func waitForDeletionResultsCount(ndt *deletiontracker.NodeDeletionTracker, resultsCount int, timeout, retryTime time.Duration) error {
// This is quite ugly, but shouldn't matter much since in most cases there shouldn't be a need to wait at all, and
// the function should return quickly after the first if check.
// An alternative could be to turn NodeDeletionTracker into an interface, and use an implementation which allows
// synchronizing calls to EndDeletion in the test code.
for retryUntil := time.Now().Add(timeout); time.Now().Before(retryUntil); time.Sleep(retryTime) {
if results, _ := ndt.DeletionResults(); len(results) == resultsCount {
return nil
}
}
return fmt.Errorf("timed out while waiting for node deletion results")
}

0 comments on commit 0e89350

Please sign in to comment.