Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix!: improve basic canary approximation accuracy and honor maxSurge #1759

Merged
merged 1 commit into from
Jan 14, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions rollout/analysis_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -735,7 +735,7 @@ func TestDoNothingWithAnalysisRunsWhileBackgroundAnalysisRunRunning(t *testing.T
SetWeight: pointer.Int32Ptr(10),
}}

r1 := newCanaryRollout("foo", 1, nil, steps, pointer.Int32Ptr(0), intstr.FromInt(0), intstr.FromInt(1))
r1 := newCanaryRollout("foo", 1, nil, steps, pointer.Int32Ptr(0), intstr.FromInt(1), intstr.FromInt(1))
r2 := bumpVersion(r1)
r2.Spec.Strategy.Canary.Analysis = &v1alpha1.RolloutAnalysisBackground{
RolloutAnalysis: v1alpha1.RolloutAnalysis{
Expand Down Expand Up @@ -792,7 +792,7 @@ func TestDoNothingWhileStepBasedAnalysisRunRunning(t *testing.T) {
},
}}

r1 := newCanaryRollout("foo", 1, nil, steps, pointer.Int32Ptr(0), intstr.FromInt(0), intstr.FromInt(1))
r1 := newCanaryRollout("foo", 1, nil, steps, pointer.Int32Ptr(0), intstr.FromInt(1), intstr.FromInt(1))
r2 := bumpVersion(r1)
ar := analysisRun(at, v1alpha1.RolloutTypeStepLabel, r2)
ar.Status.Phase = v1alpha1.AnalysisPhaseRunning
Expand Down Expand Up @@ -1067,7 +1067,7 @@ func TestPausedOnInconclusiveBackgroundAnalysisRun(t *testing.T) {
{SetWeight: pointer.Int32Ptr(30)},
}

r1 := newCanaryRollout("foo", 1, nil, steps, pointer.Int32Ptr(0), intstr.FromInt(0), intstr.FromInt(1))
r1 := newCanaryRollout("foo", 1, nil, steps, pointer.Int32Ptr(0), intstr.FromInt(1), intstr.FromInt(1))
r2 := bumpVersion(r1)
ar := analysisRun(at, v1alpha1.RolloutTypeBackgroundRunLabel, r2)
r2.Spec.Strategy.Canary.Analysis = &v1alpha1.RolloutAnalysisBackground{
Expand Down Expand Up @@ -1447,7 +1447,7 @@ func TestDoNotCreateBackgroundAnalysisRunAfterInconclusiveRun(t *testing.T) {
{SetWeight: pointer.Int32Ptr(10)},
}

r1 := newCanaryRollout("foo", 1, nil, steps, pointer.Int32Ptr(0), intstr.FromInt(0), intstr.FromInt(1))
r1 := newCanaryRollout("foo", 1, nil, steps, pointer.Int32Ptr(0), intstr.FromInt(1), intstr.FromInt(1))
r2 := bumpVersion(r1)
r2.Spec.Strategy.Canary.Analysis = &v1alpha1.RolloutAnalysisBackground{
RolloutAnalysis: v1alpha1.RolloutAnalysis{
Expand Down
4 changes: 2 additions & 2 deletions rollout/experiment_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -322,7 +322,7 @@ func TestPauseRolloutAfterInconclusiveExperiment(t *testing.T) {
Experiment: &v1alpha1.RolloutExperimentStep{},
}}

r1 := newCanaryRollout("foo", 1, nil, steps, pointer.Int32Ptr(0), intstr.FromInt(0), intstr.FromInt(1))
r1 := newCanaryRollout("foo", 1, nil, steps, pointer.Int32Ptr(0), intstr.FromInt(1), intstr.FromInt(1))
r2 := bumpVersion(r1)

rs1 := newReplicaSetWithStatus(r1, 1, 1)
Expand Down Expand Up @@ -373,7 +373,7 @@ func TestRolloutExperimentScaleDownExperimentFromPreviousStep(t *testing.T) {
{SetWeight: pointer.Int32Ptr(1)},
}

r1 := newCanaryRollout("foo", 1, nil, steps, pointer.Int32Ptr(1), intstr.FromInt(0), intstr.FromInt(1))
r1 := newCanaryRollout("foo", 1, nil, steps, pointer.Int32Ptr(1), intstr.FromInt(1), intstr.FromInt(1))
r2 := bumpVersion(r1)

rs1 := newReplicaSetWithStatus(r1, 1, 1)
Expand Down
147 changes: 80 additions & 67 deletions utils/replicaset/canary.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,56 +43,6 @@ func AtDesiredReplicaCountsForCanary(ro *v1alpha1.Rollout, newRS, stableRS *apps
return true
}

/*
// AtDesiredReplicaCountsForCanary indicates if the rollout is at the desired state for the current step
func AtDesiredReplicaCountsForCanary(rollout *v1alpha1.Rollout, newRS, stableRS *appsv1.ReplicaSet, olderRSs []*appsv1.ReplicaSet) bool {
desiredNewRSReplicaCount, desiredStableRSReplicaCount := DesiredReplicaCountsForCanary(rollout, newRS, stableRS)
if newRS == nil || desiredNewRSReplicaCount != *newRS.Spec.Replicas || desiredNewRSReplicaCount != newRS.Status.AvailableReplicas {
return false
}
if stableRS == nil || desiredStableRSReplicaCount != *stableRS.Spec.Replicas || desiredStableRSReplicaCount != stableRS.Status.AvailableReplicas {
return false
}
if GetAvailableReplicaCountForReplicaSets(olderRSs) != int32(0) {
return false
}
return true
}
*/

/*
//DesiredReplicaCountsForCanary calculates the desired endstate replica count for the new and stable replicasets
func DesiredReplicaCountsForCanary(rollout *v1alpha1.Rollout, newRS, stableRS *appsv1.ReplicaSet) (int32, int32) {
rolloutSpecReplica := defaults.GetReplicasOrDefault(rollout.Spec.Replicas)
replicas, weight := GetCanaryReplicasOrWeight(rollout)

desiredNewRSReplicaCount := int32(0)
desiredStableRSReplicaCount := int32(0)
if replicas != nil {
desiredNewRSReplicaCount = *replicas
desiredStableRSReplicaCount = rolloutSpecReplica
} else {
desiredNewRSReplicaCount = int32(math.Ceil(float64(rolloutSpecReplica) * (float64(weight) / 100)))
desiredStableRSReplicaCount = int32(math.Ceil(float64(rolloutSpecReplica) * (1 - (float64(weight) / 100))))
}

if !CheckStableRSExists(newRS, stableRS) {
// If there is no stableRS or it is the same as the newRS, then the rollout does not follow the canary steps.
// Instead the controller tries to get the newRS to 100% traffic.
desiredNewRSReplicaCount = rolloutSpecReplica
desiredStableRSReplicaCount = 0
}
// Unlike the ReplicaSet based weighted canary, a service mesh/ingress
// based canary leaves the stable as 100% scaled until the rollout completes.
if rollout.Spec.Strategy.Canary.TrafficRouting != nil {
desiredStableRSReplicaCount = rolloutSpecReplica
}

return desiredNewRSReplicaCount, desiredStableRSReplicaCount

}
*/

// CalculateReplicaCountsForBasicCanary calculates the number of replicas for the newRS and the stableRS
// when using the basic canary strategy. The function calculates the desired number of replicas for
// the new and stable RS using the following equations:
Expand Down Expand Up @@ -131,9 +81,9 @@ func DesiredReplicaCountsForCanary(rollout *v1alpha1.Rollout, newRS, stableRS *a
func CalculateReplicaCountsForBasicCanary(rollout *v1alpha1.Rollout, newRS *appsv1.ReplicaSet, stableRS *appsv1.ReplicaSet, oldRSs []*appsv1.ReplicaSet) (int32, int32) {
rolloutSpecReplica := defaults.GetReplicasOrDefault(rollout.Spec.Replicas)
_, desiredWeight := GetCanaryReplicasOrWeight(rollout)
maxSurge := MaxSurge(rollout)

desiredStableRSReplicaCount := int32(math.Ceil(float64(rolloutSpecReplica) * (1 - (float64(desiredWeight) / 100))))
desiredNewRSReplicaCount := int32(math.Ceil(float64(rolloutSpecReplica) * (float64(desiredWeight) / 100)))
desiredNewRSReplicaCount, desiredStableRSReplicaCount := approximateWeightedCanaryStableReplicaCounts(rolloutSpecReplica, desiredWeight, maxSurge)

stableRSReplicaCount := int32(0)
newRSReplicaCount := int32(0)
Expand All @@ -151,13 +101,6 @@ func CalculateReplicaCountsForBasicCanary(rollout *v1alpha1.Rollout, newRS *apps
desiredStableRSReplicaCount = 0
}

maxSurge := MaxSurge(rollout)

if extraReplicaAdded(rolloutSpecReplica, desiredWeight) {
// In the case where the weight of the stable and canary replica counts cannot be divided evenly,
// the controller needs to surges by one to account for both replica counts being rounded up.
maxSurge = maxSurge + 1
}
maxReplicaCountAllowed := rolloutSpecReplica + maxSurge

allRSs := append(oldRSs, newRS)
Expand Down Expand Up @@ -223,6 +166,84 @@ func CalculateReplicaCountsForBasicCanary(rollout *v1alpha1.Rollout, newRS *apps
return newRSReplicaCount, stableRSReplicaCount
}

// approximateWeightedCanaryStableReplicaCounts approximates the desired canary weight and returns
// the closest replica count values for the canary and stable to reach the desired weight. The
// canary/stable replica counts might sum to either spec.replicas or spec.replicas + 1 but will not
// exceed spec.replicas if maxSurge is 0. If the canary weight is between 1-99, and spec.replicas is > 1,
// we will always return a minimum of 1 for stable and canary as to not return 0.
func approximateWeightedCanaryStableReplicaCounts(specReplicas, desiredWeight, maxSurge int32) (int32, int32) {
if specReplicas == 0 {
return 0, 0
}
// canaryOption is one potential return value of this function. We will evaluate multiple options
// for the canary count in order to best approximate the desired weight.
type canaryOption struct {
canary int32
total int32
}
var options []canaryOption

ceilWeightedCanaryCount := int32(math.Ceil(float64(specReplicas*desiredWeight) / 100.0))
floorWeightedCanaryCount := int32(math.Floor(float64(specReplicas*desiredWeight) / 100.0))

tied := floorCeilingTied(desiredWeight, specReplicas)

// zeroAllowed indicates if are allowed to return the floored value if it is zero. We don't allow
// the value to be zero if when user has a weight from 1-99, and they run 2+ replicas (surge included)
zeroAllowed := desiredWeight == 100 || desiredWeight == 0 || (specReplicas == 1 && maxSurge == 0)

if ceilWeightedCanaryCount < specReplicas || zeroAllowed {
options = append(options, canaryOption{ceilWeightedCanaryCount, specReplicas})
}

if !tied && (floorWeightedCanaryCount != 0 || zeroAllowed) {
options = append(options, canaryOption{floorWeightedCanaryCount, specReplicas})
}

// check if we are allowed to surge. if we are, we can also consider rounding up to spec.replicas + 1
// in order to achieve a closer canary weight
if maxSurge > 0 {
options = append(options, canaryOption{ceilWeightedCanaryCount, specReplicas + 1})
surgeIsTied := floorCeilingTied(desiredWeight, specReplicas+1)
if !surgeIsTied && (floorWeightedCanaryCount != 0 || zeroAllowed) {
options = append(options, canaryOption{floorWeightedCanaryCount, specReplicas + 1})
}
}

if len(options) == 0 {
// should not get here
return 0, specReplicas
}

bestOption := options[0]
bestDelta := weightDelta(desiredWeight, bestOption.canary, bestOption.total)
for i := 1; i < len(options); i++ {
currOption := options[i]
currDelta := weightDelta(desiredWeight, currOption.canary, currOption.total)
if currDelta < bestDelta {
bestOption = currOption
bestDelta = currDelta
}
}
return bestOption.canary, bestOption.total - bestOption.canary
}

// floorCeilingTied indicates if the ceiling and floor values are equidistant from the desired weight
// For example: replicas: 3, desiredWeight: 50%
// A canary count of 1 (33.33%) or 2 (66.66%) are both equidistant from desired weight of 50%.
// When this happens, we will pick the larger canary count
func floorCeilingTied(desiredWeight, totalReplicas int32) bool {
_, frac := math.Modf(float64(totalReplicas) * (float64(desiredWeight) / 100))
return frac == 0.5
}

// weightDelta calculates the difference that the canary replicas will be from the desired weight
// This is used to pick the closest approximation of canary counts.
func weightDelta(desiredWeight, canaryReplicas, totalReplicas int32) float64 {
actualWeight := float64(canaryReplicas*100) / float64(totalReplicas)
return math.Abs(actualWeight - float64(desiredWeight))
}

// calculateScaleDownReplicaCount calculates drainRSReplicaCount
// drainRSReplicaCount can be either stableRS count or canaryRS count
// drainRSReplicaCount corresponds to RS whose availability is not considered in calculating replicasToScaleDown
Expand Down Expand Up @@ -397,14 +418,6 @@ func GetReplicasForScaleDown(rs *appsv1.ReplicaSet, ignoreAvailability bool) int
return rs.Status.AvailableReplicas
}

// extraReplicaAdded checks if an extra replica is added because the stable and canary replicas count are both
// rounded up. The controller rounds both of the replica counts when the setWeight does not distribute evenly
// in order to prevent either from having a 0 replica count.
func extraReplicaAdded(replicas int32, setWeight int32) bool {
_, frac := math.Modf(float64(replicas) * (float64(setWeight) / 100))
return frac != 0.0
}

// GetCurrentCanaryStep returns the current canary step. If there are no steps or the rollout
// has already executed the last step, the func returns nil
func GetCurrentCanaryStep(rollout *v1alpha1.Rollout) (*v1alpha1.CanaryStep, *int32) {
Expand Down
Loading