From 81efa420b40282f3dd2e41adbe44956376426bd1 Mon Sep 17 00:00:00 2001 From: Jennifer Chen <32009013+jennchenn@users.noreply.github.com> Date: Fri, 20 Dec 2024 11:08:00 -0500 Subject: [PATCH] [clusteragent/autoscaling] Implement stabilization for horizontal recommendations (#31547) --- go.mod | 2 +- go.sum | 4 +- .../workload/controller_horizontal.go | 62 +++ .../workload/controller_horizontal_test.go | 366 ++++++++++++++++++ .../workload/model/pod_autoscaler.go | 10 +- .../workload/model/pod_autoscaler_test.go | 190 +++++++++ 6 files changed, 625 insertions(+), 9 deletions(-) diff --git a/go.mod b/go.mod index df93e4e9392a3..744c460efe7c6 100644 --- a/go.mod +++ b/go.mod @@ -159,7 +159,7 @@ require ( github.com/DataDog/datadog-agent/pkg/util/pointer v0.59.0 github.com/DataDog/datadog-agent/pkg/util/scrubber v0.59.1 github.com/DataDog/datadog-go/v5 v5.6.0 - github.com/DataDog/datadog-operator v1.11.0-rc.2 + github.com/DataDog/datadog-operator v0.7.1-0.20241219210556-f517775059d1 github.com/DataDog/ebpf-manager v0.7.4 github.com/DataDog/gopsutil v1.2.2 github.com/DataDog/nikos v1.12.8 diff --git a/go.sum b/go.sum index e837e7c86b5b5..60055b97b4ad9 100644 --- a/go.sum +++ b/go.sum @@ -130,8 +130,8 @@ github.com/DataDog/datadog-api-client-go/v2 v2.33.0/go.mod h1:d3tOEgUd2kfsr9uuHQ github.com/DataDog/datadog-go v3.2.0+incompatible/go.mod h1:LButxg5PwREeZtORoXG3tL4fMGNddJ+vMq1mwgfaqoQ= github.com/DataDog/datadog-go/v5 v5.6.0 h1:2oCLxjF/4htd55piM75baflj/KoE6VYS7alEUqFvRDw= github.com/DataDog/datadog-go/v5 v5.6.0/go.mod h1:K9kcYBlxkcPP8tvvjZZKs/m1edNAUFzBbdpTUKfCsuw= -github.com/DataDog/datadog-operator v1.11.0-rc.2 h1:4tMZlxbYE0WEpRYAhoqEe8nLP67C/PDq7utJJcD8RM8= -github.com/DataDog/datadog-operator v1.11.0-rc.2/go.mod h1:mD+3PWR0wOSVJGaXjkpzsYEK/7PhqjOipx2usgfsxM0= +github.com/DataDog/datadog-operator v0.7.1-0.20241219210556-f517775059d1 h1:EFGXjo7iSZO1f0ZpLE2H2UIcPH2x4yhxQnKd8UKrs3k= +github.com/DataDog/datadog-operator v0.7.1-0.20241219210556-f517775059d1/go.mod h1:mD+3PWR0wOSVJGaXjkpzsYEK/7PhqjOipx2usgfsxM0= github.com/DataDog/dd-sensitive-data-scanner/sds-go/go v0.0.0-20240816154533-f7f9beb53a42 h1:RoH7VLzTnxHEugRPIgnGlxwDFszFGI7b3WZZUtWuPRM= github.com/DataDog/dd-sensitive-data-scanner/sds-go/go v0.0.0-20240816154533-f7f9beb53a42/go.mod h1:TX7CTOQ3LbQjfAi4SwqUoR5gY1zfUk7VRBDTuArjaDc= github.com/DataDog/dd-trace-go/v2 v2.0.0-beta.11 h1:6vwU//TjBIghQKMgIP9UyIRhN/LWS1y8tYzvRnu8JZw= diff --git a/pkg/clusteragent/autoscaling/workload/controller_horizontal.go b/pkg/clusteragent/autoscaling/workload/controller_horizontal.go index 555cf50b181d3..5bd044a18885c 100644 --- a/pkg/clusteragent/autoscaling/workload/controller_horizontal.go +++ b/pkg/clusteragent/autoscaling/workload/controller_horizontal.go @@ -246,6 +246,27 @@ func (hr *horizontalController) computeScaleAction( evalAfter = rulesNextEvalAfter.Truncate(time.Second) + time.Second } + // Stabilize recommendation + var stabilizationLimitReason string + var stabilizationLimitedReplicas int32 + upscaleStabilizationSeconds := int32(0) + downscaleStabilizationSeconds := int32(0) + + if policy := autoscalerInternal.Spec().Policy; policy != nil { + if upscalePolicy := policy.Upscale; upscalePolicy != nil { + upscaleStabilizationSeconds = int32(upscalePolicy.StabilizationWindowSeconds) + } + if downscalePolicy := policy.Downscale; downscalePolicy != nil { + downscaleStabilizationSeconds = int32(downscalePolicy.StabilizationWindowSeconds) + } + } + + stabilizationLimitedReplicas, stabilizationLimitReason = stabilizeRecommendations(scalingTimestamp, autoscalerInternal.HorizontalLastActions(), currentDesiredReplicas, targetDesiredReplicas, upscaleStabilizationSeconds, downscaleStabilizationSeconds, scaleDirection) + if stabilizationLimitReason != "" { + limitReason = stabilizationLimitReason + targetDesiredReplicas = stabilizationLimitedReplicas + } + horizontalAction := &datadoghq.DatadogPodAutoscalerHorizontalAction{ FromReplicas: currentDesiredReplicas, ToReplicas: targetDesiredReplicas, @@ -457,3 +478,44 @@ func accumulateReplicasChange(currentTime time.Time, events []datadoghq.DatadogP } return } + +func stabilizeRecommendations(currentTime time.Time, pastActions []datadoghq.DatadogPodAutoscalerHorizontalAction, currentReplicas int32, originalTargetDesiredReplicas int32, stabilizationWindowUpscaleSeconds int32, stabilizationWindowDownscaleSeconds int32, scaleDirection scaleDirection) (int32, string) { + limitReason := "" + + if len(pastActions) == 0 { + return originalTargetDesiredReplicas, limitReason + } + + upRecommendation := originalTargetDesiredReplicas + upCutoff := currentTime.Add(-time.Duration(stabilizationWindowUpscaleSeconds) * time.Second) + + downRecommendation := originalTargetDesiredReplicas + downCutoff := currentTime.Add(-time.Duration(stabilizationWindowDownscaleSeconds) * time.Second) + + for _, a := range pastActions { + if scaleDirection == scaleUp && a.Time.Time.After(upCutoff) { + upRecommendation = min(upRecommendation, *a.RecommendedReplicas) + } + + if scaleDirection == scaleDown && a.Time.Time.After(downCutoff) { + downRecommendation = max(downRecommendation, *a.RecommendedReplicas) + } + + if (scaleDirection == scaleUp && a.Time.Time.Before(upCutoff)) || (scaleDirection == scaleDown && a.Time.Time.Before(downCutoff)) { + break + } + } + + recommendation := currentReplicas + if recommendation < upRecommendation { + recommendation = upRecommendation + } + if recommendation > downRecommendation { + recommendation = downRecommendation + } + if recommendation != originalTargetDesiredReplicas { + limitReason = fmt.Sprintf("desired replica count limited to %d (originally %d) due to stabilization window", recommendation, originalTargetDesiredReplicas) + } + + return recommendation, limitReason +} diff --git a/pkg/clusteragent/autoscaling/workload/controller_horizontal_test.go b/pkg/clusteragent/autoscaling/workload/controller_horizontal_test.go index fc2eb1b1fe88c..89014b196a066 100644 --- a/pkg/clusteragent/autoscaling/workload/controller_horizontal_test.go +++ b/pkg/clusteragent/autoscaling/workload/controller_horizontal_test.go @@ -77,6 +77,15 @@ func (f *horizontalControllerFixture) runSync(fakePai *model.FakePodAutoscalerIn return autoscalerInternal, res, err } +func newHorizontalAction(time time.Time, fromReplicas, toReplicas, recommendedReplicas int32) datadoghq.DatadogPodAutoscalerHorizontalAction { + return datadoghq.DatadogPodAutoscalerHorizontalAction{ + Time: metav1.NewTime(time), + FromReplicas: fromReplicas, + ToReplicas: toReplicas, + RecommendedReplicas: pointer.Ptr[int32](recommendedReplicas), + } +} + type horizontalScalingTestArgs struct { fakePai *model.FakePodAutoscalerInternal dataSource datadoghq.DatadogPodAutoscalerValueSource @@ -181,6 +190,14 @@ func TestHorizontalControllerSyncPrerequisites(t *testing.T) { Kind: expectedGVK.Kind, APIVersion: expectedGVK.Group + "/" + expectedGVK.Version, }, + Policy: &datadoghq.DatadogPodAutoscalerPolicy{ + Upscale: &datadoghq.DatadogPodAutoscalerScalingPolicy{ + StabilizationWindowSeconds: 0, + }, + Downscale: &datadoghq.DatadogPodAutoscalerScalingPolicy{ + StabilizationWindowSeconds: 0, + }, + }, } fakePai.Error = nil f.scaler.On("get", mock.Anything, autoscalerNamespace, autoscalerName, expectedGVK).Return( @@ -1119,3 +1136,352 @@ func TestHorizontalControllerSyncScaleDecisionsWithRules(t *testing.T) { assert.Equal(t, autoscaling.NoRequeue, result) assert.NoError(t, err) } + +func TestStabilizeRecommendations(t *testing.T) { + currentTime := time.Now() + + tests := []struct { + name string + actions []datadoghq.DatadogPodAutoscalerHorizontalAction + currentReplicas int32 + recReplicas int32 + expected int32 + expectedReason string + upscaleWindow int32 + downscaleWindow int32 + scaleDirection scaleDirection + }{ + { + name: "no downscale stabilization - constant upscale", + actions: []datadoghq.DatadogPodAutoscalerHorizontalAction{ + newHorizontalAction(currentTime.Add(-60*time.Second), 4, 5, 6), + newHorizontalAction(currentTime.Add(-30*time.Second), 6, 4, 4), + }, + currentReplicas: 4, + recReplicas: 8, + expected: 8, + expectedReason: "", + upscaleWindow: 0, + downscaleWindow: 300, + scaleDirection: scaleUp, + }, + { + name: "downscale stabilization", + actions: []datadoghq.DatadogPodAutoscalerHorizontalAction{ + newHorizontalAction(currentTime.Add(-60*time.Second), 8, 6, 6), + newHorizontalAction(currentTime.Add(-30*time.Second), 6, 5, 5), + }, + currentReplicas: 5, + recReplicas: 4, + expected: 5, + expectedReason: "desired replica count limited to 5 (originally 4) due to stabilization window", + upscaleWindow: 0, + downscaleWindow: 300, + scaleDirection: scaleDown, + }, + { + name: "downscale stabilization, recommendation flapping", + actions: []datadoghq.DatadogPodAutoscalerHorizontalAction{ + newHorizontalAction(currentTime.Add(-90*time.Second), 8, 6, 6), + newHorizontalAction(currentTime.Add(-60*time.Second), 6, 9, 9), + newHorizontalAction(currentTime.Add(-30*time.Second), 9, 7, 7), + }, + currentReplicas: 7, + recReplicas: 5, + expected: 7, + expectedReason: "desired replica count limited to 7 (originally 5) due to stabilization window", + upscaleWindow: 0, + downscaleWindow: 300, + scaleDirection: scaleDown, + }, + { + name: "upscale stabilization", + actions: []datadoghq.DatadogPodAutoscalerHorizontalAction{ + newHorizontalAction(currentTime.Add(-60*time.Second), 5, 6, 6), + newHorizontalAction(currentTime.Add(-30*time.Second), 6, 8, 8), + }, + currentReplicas: 8, + recReplicas: 12, + expected: 8, + expectedReason: "desired replica count limited to 8 (originally 12) due to stabilization window", + upscaleWindow: 300, + downscaleWindow: 0, + scaleDirection: scaleUp, + }, + { + name: "upscale stabilization, recommendation flapping", + actions: []datadoghq.DatadogPodAutoscalerHorizontalAction{ + newHorizontalAction(currentTime.Add(-90*time.Second), 6, 8, 8), + newHorizontalAction(currentTime.Add(-60*time.Second), 8, 7, 7), + newHorizontalAction(currentTime.Add(-30*time.Second), 7, 9, 9), + }, + currentReplicas: 9, + recReplicas: 12, + expected: 9, + expectedReason: "desired replica count limited to 9 (originally 12) due to stabilization window", + upscaleWindow: 300, + downscaleWindow: 0, + scaleDirection: scaleUp, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + recommendedReplicas, limitReason := stabilizeRecommendations(currentTime, tt.actions, tt.currentReplicas, tt.recReplicas, tt.upscaleWindow, tt.downscaleWindow, tt.scaleDirection) + assert.Equal(t, tt.expected, recommendedReplicas) + assert.Equal(t, tt.expectedReason, limitReason) + }) + } +} + +func TestHorizontalControllerSyncDownscaleWithStabilization(t *testing.T) { + testTime := time.Now() + startTime := testTime.Add(-time.Hour) + defaultStepDuration := 30 * time.Second + + f := newHorizontalControllerFixture(t, startTime) + autoscalerNamespace := "default" + autoscalerName := "test" + + expectedGVK := schema.GroupVersionKind{ + Group: "apps", + Version: "v1", + Kind: "Deployment", + } + + fakePai := &model.FakePodAutoscalerInternal{ + Namespace: autoscalerNamespace, + Name: autoscalerName, + Spec: &datadoghq.DatadogPodAutoscalerSpec{ + TargetRef: v2.CrossVersionObjectReference{ + Name: autoscalerName, + Kind: expectedGVK.Kind, + APIVersion: expectedGVK.Group + "/" + expectedGVK.Version, + }, + Constraints: &datadoghq.DatadogPodAutoscalerConstraints{ + MinReplicas: pointer.Ptr[int32](90), + MaxReplicas: 120, + }, + Policy: &datadoghq.DatadogPodAutoscalerPolicy{ + Upscale: &datadoghq.DatadogPodAutoscalerScalingPolicy{ + StabilizationWindowSeconds: 0, + }, + Downscale: &datadoghq.DatadogPodAutoscalerScalingPolicy{ + StabilizationWindowSeconds: 300, + }, + }, + }, + HorizontalLastActions: []datadoghq.DatadogPodAutoscalerHorizontalAction{ + newHorizontalAction(f.clock.Now().Add(-60*time.Second), 90, 94, 94), + newHorizontalAction(f.clock.Now().Add(-30*time.Second), 94, 97, 97), + }, + ScalingValues: model.ScalingValues{ + Horizontal: &model.HorizontalScalingValues{ + Source: datadoghq.DatadogPodAutoscalerAutoscalingValueSource, + Timestamp: f.clock.Now().Add(-defaultStepDuration), + Replicas: 100, + }, + }, + TargetGVK: expectedGVK, + HorizontalEventsRetention: 5 * time.Minute, + } + + // Test upscale to 100 replicas (not limited) + f.clock.Step(defaultStepDuration) + result, err := f.testScalingDecision(horizontalScalingTestArgs{ + fakePai: fakePai, + dataSource: datadoghq.DatadogPodAutoscalerAutoscalingValueSource, + dataOffset: defaultStepDuration, + currentReplicas: 97, + statusReplicas: 97, + recReplicas: 100, + scaleReplicas: 100, + }) + assert.Equal(t, autoscaling.NoRequeue, result) + assert.NoError(t, err) + + // Test downscale to 97 replicas, limited to 100 + f.clock.Step(defaultStepDuration) + result, err = f.testScalingDecision(horizontalScalingTestArgs{ + fakePai: fakePai, + dataSource: datadoghq.DatadogPodAutoscalerAutoscalingValueSource, + dataOffset: defaultStepDuration, + currentReplicas: 100, + statusReplicas: 100, + recReplicas: 97, + scaleReplicas: 100, + scaleLimitReason: "desired replica count limited to 100 (originally 97) due to stabilization window", + }) + assert.Equal(t, autoscaling.NoRequeue, result) + assert.NoError(t, err) + + // Test downscale to 95 replicas, still limited to 100 + f.clock.Step(defaultStepDuration) + result, err = f.testScalingDecision(horizontalScalingTestArgs{ + fakePai: fakePai, + dataSource: datadoghq.DatadogPodAutoscalerAutoscalingValueSource, + dataOffset: defaultStepDuration, + currentReplicas: 100, + statusReplicas: 100, + recReplicas: 95, + scaleReplicas: 100, + scaleLimitReason: "desired replica count limited to 100 (originally 95) due to stabilization window", + }) + assert.Equal(t, autoscaling.NoRequeue, result) + assert.NoError(t, err) + + // Test downscale to 92 replicas (not limited) + // Moving clock 4 minutes forward to get past stabilization window + f.clock.Step(4 * time.Minute) + result, err = f.testScalingDecision(horizontalScalingTestArgs{ + fakePai: fakePai, + dataSource: datadoghq.DatadogPodAutoscalerAutoscalingValueSource, + dataOffset: defaultStepDuration, + currentReplicas: 100, + statusReplicas: 100, + recReplicas: 92, + scaleReplicas: 92, + }) + assert.Equal(t, autoscaling.NoRequeue, result) + assert.NoError(t, err) + + // Test upscale to 100 replicas (not limited) + f.clock.Step(defaultStepDuration) + result, err = f.testScalingDecision(horizontalScalingTestArgs{ + fakePai: fakePai, + dataSource: datadoghq.DatadogPodAutoscalerAutoscalingValueSource, + dataOffset: defaultStepDuration, + currentReplicas: 92, + statusReplicas: 92, + recReplicas: 100, + scaleReplicas: 100, + }) + assert.Equal(t, autoscaling.NoRequeue, result) + assert.NoError(t, err) +} + +func TestHorizontalControllerSyncUpscaleWithStabilization(t *testing.T) { + testTime := time.Now() + startTime := testTime.Add(-time.Hour) + defaultStepDuration := 30 * time.Second + + f := newHorizontalControllerFixture(t, startTime) + autoscalerNamespace := "default" + autoscalerName := "test" + + expectedGVK := schema.GroupVersionKind{ + Group: "apps", + Version: "v1", + Kind: "Deployment", + } + + fakePai := &model.FakePodAutoscalerInternal{ + Namespace: autoscalerNamespace, + Name: autoscalerName, + Spec: &datadoghq.DatadogPodAutoscalerSpec{ + TargetRef: v2.CrossVersionObjectReference{ + Name: autoscalerName, + Kind: expectedGVK.Kind, + APIVersion: expectedGVK.Group + "/" + expectedGVK.Version, + }, + Constraints: &datadoghq.DatadogPodAutoscalerConstraints{ + MinReplicas: pointer.Ptr[int32](90), + MaxReplicas: 120, + }, + Policy: &datadoghq.DatadogPodAutoscalerPolicy{ + Upscale: &datadoghq.DatadogPodAutoscalerScalingPolicy{ + StabilizationWindowSeconds: 300, + }, + Downscale: &datadoghq.DatadogPodAutoscalerScalingPolicy{ + StabilizationWindowSeconds: 0, + }, + }, + }, + HorizontalLastActions: []datadoghq.DatadogPodAutoscalerHorizontalAction{ + newHorizontalAction(f.clock.Now().Add(-60*time.Second), 120, 110, 110), + newHorizontalAction(f.clock.Now().Add(-30*time.Second), 110, 104, 104), + }, + ScalingValues: model.ScalingValues{ + Horizontal: &model.HorizontalScalingValues{ + Source: datadoghq.DatadogPodAutoscalerAutoscalingValueSource, + Timestamp: f.clock.Now().Add(-defaultStepDuration), + Replicas: 100, + }, + }, + TargetGVK: expectedGVK, + HorizontalEventsRetention: 5 * time.Minute, + } + + // Test downscale to 100 replicas (not limited) + f.clock.Step(defaultStepDuration) + result, err := f.testScalingDecision(horizontalScalingTestArgs{ + fakePai: fakePai, + dataSource: datadoghq.DatadogPodAutoscalerAutoscalingValueSource, + dataOffset: defaultStepDuration, + currentReplicas: 104, + statusReplicas: 104, + recReplicas: 100, + scaleReplicas: 100, + }) + assert.Equal(t, autoscaling.NoRequeue, result) + assert.NoError(t, err) + + // Test upscale to 102 replicas, limited to 100 + f.clock.Step(defaultStepDuration) + result, err = f.testScalingDecision(horizontalScalingTestArgs{ + fakePai: fakePai, + dataSource: datadoghq.DatadogPodAutoscalerAutoscalingValueSource, + dataOffset: defaultStepDuration, + currentReplicas: 100, + statusReplicas: 100, + recReplicas: 102, + scaleReplicas: 100, + scaleLimitReason: "desired replica count limited to 100 (originally 102) due to stabilization window", + }) + assert.Equal(t, autoscaling.NoRequeue, result) + assert.NoError(t, err) + + // Test upscale to 105 replicas, still limited to 100 + f.clock.Step(defaultStepDuration) + result, err = f.testScalingDecision(horizontalScalingTestArgs{ + fakePai: fakePai, + dataSource: datadoghq.DatadogPodAutoscalerAutoscalingValueSource, + dataOffset: defaultStepDuration, + currentReplicas: 100, + statusReplicas: 100, + recReplicas: 105, + scaleReplicas: 100, + scaleLimitReason: "desired replica count limited to 100 (originally 105) due to stabilization window", + }) + assert.Equal(t, autoscaling.NoRequeue, result) + assert.NoError(t, err) + + // Test upscale to 102 replicas (not limited) + // Moving clock 4 minutes forward to get past stabilization window + f.clock.Step(4 * time.Minute) + result, err = f.testScalingDecision(horizontalScalingTestArgs{ + fakePai: fakePai, + dataSource: datadoghq.DatadogPodAutoscalerAutoscalingValueSource, + dataOffset: defaultStepDuration, + currentReplicas: 100, + statusReplicas: 100, + recReplicas: 102, + scaleReplicas: 102, + }) + assert.Equal(t, autoscaling.NoRequeue, result) + assert.NoError(t, err) + + // Test downscale to 100 replicas (not limited) + f.clock.Step(defaultStepDuration) + result, err = f.testScalingDecision(horizontalScalingTestArgs{ + fakePai: fakePai, + dataSource: datadoghq.DatadogPodAutoscalerAutoscalingValueSource, + dataOffset: defaultStepDuration, + currentReplicas: 102, + statusReplicas: 102, + recReplicas: 100, + scaleReplicas: 100, + }) + assert.Equal(t, autoscaling.NoRequeue, result) + assert.NoError(t, err) +} diff --git a/pkg/clusteragent/autoscaling/workload/model/pod_autoscaler.go b/pkg/clusteragent/autoscaling/workload/model/pod_autoscaler.go index a1dc2f722beb1..0b36ab17ae2ef 100644 --- a/pkg/clusteragent/autoscaling/workload/model/pod_autoscaler.go +++ b/pkg/clusteragent/autoscaling/workload/model/pod_autoscaler.go @@ -589,16 +589,14 @@ func getHorizontalEventsRetention(policy *datadoghq.DatadogPodAutoscalerPolicy, if policy.Upscale != nil { upscaleRetention := getLongestScalingRulesPeriod(policy.Upscale.Rules) - if upscaleRetention > longestRetention { - longestRetention = upscaleRetention - } + upscaleStabilizationWindow := time.Second * time.Duration(policy.Upscale.StabilizationWindowSeconds) + longestRetention = max(longestRetention, upscaleRetention, upscaleStabilizationWindow) } if policy.Downscale != nil { downscaleRetention := getLongestScalingRulesPeriod(policy.Downscale.Rules) - if downscaleRetention > longestRetention { - longestRetention = downscaleRetention - } + downscaleStabilizationWindow := time.Second * time.Duration(policy.Downscale.StabilizationWindowSeconds) + longestRetention = max(longestRetention, downscaleRetention, downscaleStabilizationWindow) } if longestRetention > longestLookbackAllowed { diff --git a/pkg/clusteragent/autoscaling/workload/model/pod_autoscaler_test.go b/pkg/clusteragent/autoscaling/workload/model/pod_autoscaler_test.go index 15dc2a99bea8d..966f23b46c641 100644 --- a/pkg/clusteragent/autoscaling/workload/model/pod_autoscaler_test.go +++ b/pkg/clusteragent/autoscaling/workload/model/pod_autoscaler_test.go @@ -76,3 +76,193 @@ func TestAddHorizontalAction(t *testing.T) { *addedAction2, }, horizontalLastActions) } + +func TestGetHorizontalEventsRetention(t *testing.T) { + tests := []struct { + name string + policy *datadoghq.DatadogPodAutoscalerPolicy + longestLookbackAllowed time.Duration + expectedRetention time.Duration + }{ + { + name: "No policy, no retention", + policy: nil, + longestLookbackAllowed: 0, + expectedRetention: 0, + }, + { + name: "No policy, 15 minutes retention", + policy: nil, + longestLookbackAllowed: 15 * time.Minute, + expectedRetention: 0, + }, + { + name: "Upscale policy with rules, 30 minutes retention", + policy: &datadoghq.DatadogPodAutoscalerPolicy{ + Upscale: &datadoghq.DatadogPodAutoscalerScalingPolicy{ + Rules: []datadoghq.DatadogPodAutoscalerScalingRule{ + { + Type: "Pods", + PeriodSeconds: 900, + Value: 2, + }, + }, + }, + }, + longestLookbackAllowed: 30 * time.Minute, + expectedRetention: 15 * time.Minute, + }, + { + name: "Upscale policy with rules, 10 minutes max retention", + policy: &datadoghq.DatadogPodAutoscalerPolicy{ + Upscale: &datadoghq.DatadogPodAutoscalerScalingPolicy{ + Rules: []datadoghq.DatadogPodAutoscalerScalingRule{ + { + Type: "Pods", + PeriodSeconds: 900, + Value: 2, + }, + }, + }, + }, + longestLookbackAllowed: 10 * time.Minute, + expectedRetention: 10 * time.Minute, + }, + { + name: "Upscale and downscale policy with rules, 30 minutes retention", + policy: &datadoghq.DatadogPodAutoscalerPolicy{ + Upscale: &datadoghq.DatadogPodAutoscalerScalingPolicy{ + Rules: []datadoghq.DatadogPodAutoscalerScalingRule{ + { + Type: "Pods", + PeriodSeconds: 900, + Value: 2, + }, + }, + }, + Downscale: &datadoghq.DatadogPodAutoscalerScalingPolicy{ + Rules: []datadoghq.DatadogPodAutoscalerScalingRule{ + { + Type: "Pods", + PeriodSeconds: 960, + Value: 2, + }, + }, + }, + }, + longestLookbackAllowed: 30 * time.Minute, + expectedRetention: 16 * time.Minute, + }, + { + name: "Upscale and downscale policy with rules, 10 minutes max retention", + policy: &datadoghq.DatadogPodAutoscalerPolicy{ + Upscale: &datadoghq.DatadogPodAutoscalerScalingPolicy{ + Rules: []datadoghq.DatadogPodAutoscalerScalingRule{ + { + Type: "Pods", + PeriodSeconds: 900, + Value: 2, + }, + }, + }, + Downscale: &datadoghq.DatadogPodAutoscalerScalingPolicy{ + Rules: []datadoghq.DatadogPodAutoscalerScalingRule{ + { + Type: "Pods", + PeriodSeconds: 960, + Value: 2, + }, + }, + }, + }, + longestLookbackAllowed: 10 * time.Minute, + expectedRetention: 10 * time.Minute, + }, + { + name: "Upscale stabilization window 5 minutes", + policy: &datadoghq.DatadogPodAutoscalerPolicy{ + Upscale: &datadoghq.DatadogPodAutoscalerScalingPolicy{ + Rules: []datadoghq.DatadogPodAutoscalerScalingRule{ + { + Type: "Pods", + PeriodSeconds: 180, + Value: 2, + }, + }, + StabilizationWindowSeconds: 300, + }, + Downscale: &datadoghq.DatadogPodAutoscalerScalingPolicy{ + Rules: []datadoghq.DatadogPodAutoscalerScalingRule{ + { + Type: "Pods", + PeriodSeconds: 180, + Value: 2, + }, + }, + }, + }, + longestLookbackAllowed: 30 * time.Minute, + expectedRetention: 5 * time.Minute, + }, + { + name: "Downscale stabilization window 5 minutes", + policy: &datadoghq.DatadogPodAutoscalerPolicy{ + Upscale: &datadoghq.DatadogPodAutoscalerScalingPolicy{ + Rules: []datadoghq.DatadogPodAutoscalerScalingRule{ + { + Type: "Pods", + PeriodSeconds: 180, + Value: 2, + }, + }, + }, + Downscale: &datadoghq.DatadogPodAutoscalerScalingPolicy{ + Rules: []datadoghq.DatadogPodAutoscalerScalingRule{ + { + Type: "Pods", + PeriodSeconds: 180, + Value: 2, + }, + }, + StabilizationWindowSeconds: 300, + }, + }, + longestLookbackAllowed: 30 * time.Minute, + expectedRetention: 5 * time.Minute, + }, + { + name: "Stabilization, rules, max retention", + policy: &datadoghq.DatadogPodAutoscalerPolicy{ + Upscale: &datadoghq.DatadogPodAutoscalerScalingPolicy{ + Rules: []datadoghq.DatadogPodAutoscalerScalingRule{ + { + Type: "Pods", + PeriodSeconds: 360, + Value: 2, + }, + }, + StabilizationWindowSeconds: 300, + }, + Downscale: &datadoghq.DatadogPodAutoscalerScalingPolicy{ + Rules: []datadoghq.DatadogPodAutoscalerScalingRule{ + { + Type: "Pods", + PeriodSeconds: 420, + Value: 2, + }, + }, + StabilizationWindowSeconds: 180, + }, + }, + longestLookbackAllowed: 30 * time.Minute, + expectedRetention: 7 * time.Minute, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + horizontalEventsRetention := getHorizontalEventsRetention(tt.policy, tt.longestLookbackAllowed) + assert.Equal(t, tt.expectedRetention, horizontalEventsRetention) + }) + } +}