From 72d2d83484436de0729cbd303d9afda851863625 Mon Sep 17 00:00:00 2001 From: "mingzhou.swx" Date: Tue, 13 Jun 2023 17:46:17 +0800 Subject: [PATCH] advanced deployment scale down old unhealthy pods first Signed-off-by: mingzhou.swx --- pkg/controller/deployment/sync.go | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/pkg/controller/deployment/sync.go b/pkg/controller/deployment/sync.go index 9b821e96..cefb74f4 100644 --- a/pkg/controller/deployment/sync.go +++ b/pkg/controller/deployment/sync.go @@ -318,6 +318,27 @@ func (dc *DeploymentController) scale(ctx context.Context, deployment *apps.Depl scalingOperation = "down" } + // Scale down the unhealthy replicas in old replica sets firstly to avoid some bad cases. + // For example: + // _______________________________________________________________________ + // | ReplicaSet | oldRS-1 | oldRS-2 | newRS | + // | --------------| -------------------|----------------------| | + // | Replicas | 5 healthy Pods | 1 unhealthy Pods 1 | 0 | + // ------------------------------------------------------------------------ + // If we want to scale down these replica sets from 6 to 5, we expect to scale down the oldRS-2 + // from 1 to 0 firstly. + var err error + var cleanupCount int32 + if deploymentReplicasToAdd < 0 { + oldRSs, cleanupCount, err = dc.cleanupUnhealthyReplicas(ctx, oldRSs, deployment, -deploymentReplicasToAdd) + if err != nil { + return err + } + klog.V(4).Infof("Cleaned up unhealthy replicas from all RSes by %d during scaling", cleanupCount) + deploymentReplicasToAdd += cleanupCount + allRSs = deploymentutil.FilterActiveReplicaSets(append(oldRSs, newRS)) + } + // Iterate over all active replica sets and estimate proportions for each of them. // The absolute value of deploymentReplicasAdded should never exceed the absolute // value of deploymentReplicasToAdd.