diff --git a/analysis/analysis.go b/analysis/analysis.go index 7d9793182d..3afc481635 100644 --- a/analysis/analysis.go +++ b/analysis/analysis.go @@ -80,6 +80,16 @@ func (c *Controller) reconcileAnalysisRun(origRun *v1alpha1.AnalysisRun) *v1alph return run } + measurementRetentionMetricsMap, err := analysisutil.GetMeasurementRetentionMetrics(run.Spec.MeasurementRetention, resolvedMetrics) + if err != nil { + message := fmt.Sprintf("Analysis spec invalid: %v", err) + logger.Warn(message) + run.Status.Phase = v1alpha1.AnalysisPhaseError + run.Status.Message = message + c.recordAnalysisRunCompletionEvent(run) + return run + } + tasks := generateMetricTasks(run, resolvedMetrics) logger.Infof("Taking %d Measurement(s)...", len(tasks)) err = c.runMeasurements(run, tasks, dryRunMetricsMap) @@ -101,7 +111,7 @@ func (c *Controller) reconcileAnalysisRun(origRun *v1alpha1.AnalysisRun) *v1alph } } - err = c.garbageCollectMeasurements(run, DefaultMeasurementHistoryLimit) + err = c.garbageCollectMeasurements(run, measurementRetentionMetricsMap, DefaultMeasurementHistoryLimit) if err != nil { // TODO(jessesuen): surface errors to controller so they can be retried logger.Warnf("Failed to garbage collect measurements: %v", err) @@ -693,7 +703,7 @@ func calculateNextReconcileTime(run *v1alpha1.AnalysisRun, metrics []v1alpha1.Me } // garbageCollectMeasurements trims the measurement history to the specified limit and GCs old measurements -func (c *Controller) garbageCollectMeasurements(run *v1alpha1.AnalysisRun, limit int) error { +func (c *Controller) garbageCollectMeasurements(run *v1alpha1.AnalysisRun, measurementRetentionMetricNamesMap map[string]*v1alpha1.MeasurementRetention, limit int) error { var errors []error metricsByName := make(map[string]v1alpha1.Metric) @@ -703,7 +713,12 @@ func (c *Controller) garbageCollectMeasurements(run *v1alpha1.AnalysisRun, limit for i, result := range run.Status.MetricResults { length := len(result.Measurements) - if length > limit { + measurementRetentionObject := measurementRetentionMetricNamesMap[result.Name] + measurementsLimit := limit + if measurementRetentionObject != nil && measurementRetentionObject.Limit > 0 { + measurementsLimit = measurementRetentionObject.Limit + } + if length > measurementsLimit { metric, ok := metricsByName[result.Name] if !ok { continue @@ -714,11 +729,11 @@ func (c *Controller) garbageCollectMeasurements(run *v1alpha1.AnalysisRun, limit errors = append(errors, err) continue } - err = provider.GarbageCollect(run, metric, limit) + err = provider.GarbageCollect(run, metric, measurementsLimit) if err != nil { return err } - result.Measurements = result.Measurements[length-limit : length] + result.Measurements = result.Measurements[length-measurementsLimit : length] } run.Status.MetricResults[i] = result } diff --git a/analysis/analysis_test.go b/analysis/analysis_test.go index 6533565e47..2eebd46dc7 100644 --- a/analysis/analysis_test.go +++ b/analysis/analysis_test.go @@ -1055,7 +1055,8 @@ func TestTrimMeasurementHistory(t *testing.T) { { run := newRun() - c.garbageCollectMeasurements(run, 2) + err := c.garbageCollectMeasurements(run, map[string]*v1alpha1.MeasurementRetention{}, 2) + assert.Nil(t, err) assert.Len(t, run.Status.MetricResults[0].Measurements, 1) assert.Equal(t, "1", run.Status.MetricResults[0].Measurements[0].Value) assert.Len(t, run.Status.MetricResults[1].Measurements, 2) @@ -1064,12 +1065,37 @@ func TestTrimMeasurementHistory(t *testing.T) { } { run := newRun() - c.garbageCollectMeasurements(run, 1) + err := c.garbageCollectMeasurements(run, map[string]*v1alpha1.MeasurementRetention{}, 1) + assert.Nil(t, err) assert.Len(t, run.Status.MetricResults[0].Measurements, 1) assert.Equal(t, "1", run.Status.MetricResults[0].Measurements[0].Value) assert.Len(t, run.Status.MetricResults[1].Measurements, 1) assert.Equal(t, "3", run.Status.MetricResults[1].Measurements[0].Value) } + { + run := newRun() + var measurementRetentionMetricsMap = map[string]*v1alpha1.MeasurementRetention{} + measurementRetentionMetricsMap["metric2"] = &v1alpha1.MeasurementRetention{MetricName: "*", Limit: 2} + err := c.garbageCollectMeasurements(run, measurementRetentionMetricsMap, 1) + assert.Nil(t, err) + assert.Len(t, run.Status.MetricResults[0].Measurements, 1) + assert.Equal(t, "1", run.Status.MetricResults[0].Measurements[0].Value) + assert.Len(t, run.Status.MetricResults[1].Measurements, 2) + assert.Equal(t, "2", run.Status.MetricResults[1].Measurements[0].Value) + assert.Equal(t, "3", run.Status.MetricResults[1].Measurements[1].Value) + } + { + run := newRun() + var measurementRetentionMetricsMap = map[string]*v1alpha1.MeasurementRetention{} + measurementRetentionMetricsMap["metric2"] = &v1alpha1.MeasurementRetention{MetricName: "metric2", Limit: 2} + err := c.garbageCollectMeasurements(run, measurementRetentionMetricsMap, 1) + assert.Nil(t, err) + assert.Len(t, run.Status.MetricResults[0].Measurements, 1) + assert.Equal(t, "1", run.Status.MetricResults[0].Measurements[0].Value) + assert.Len(t, run.Status.MetricResults[1].Measurements, 2) + assert.Equal(t, "2", run.Status.MetricResults[1].Measurements[0].Value) + assert.Equal(t, "3", run.Status.MetricResults[1].Measurements[1].Value) + } } func TestResolveMetricArgsUnableToSubstitute(t *testing.T) { diff --git a/manifests/crds/analysis-run-crd.yaml b/manifests/crds/analysis-run-crd.yaml index 6d399679a3..51d86d0e51 100644 --- a/manifests/crds/analysis-run-crd.yaml +++ b/manifests/crds/analysis-run-crd.yaml @@ -77,6 +77,19 @@ spec: - metricName type: object type: array + measurementRetention: + items: + properties: + limit: + format: int32 + type: integer + metricName: + type: string + required: + - limit + - metricName + type: object + type: array metrics: items: properties: diff --git a/manifests/crds/analysis-template-crd.yaml b/manifests/crds/analysis-template-crd.yaml index 0d61a67203..0677555741 100644 --- a/manifests/crds/analysis-template-crd.yaml +++ b/manifests/crds/analysis-template-crd.yaml @@ -73,6 +73,19 @@ spec: - metricName type: object type: array + measurementRetention: + items: + properties: + limit: + format: int32 + type: integer + metricName: + type: string + required: + - limit + - metricName + type: object + type: array metrics: items: properties: diff --git a/manifests/crds/cluster-analysis-template-crd.yaml b/manifests/crds/cluster-analysis-template-crd.yaml index e5cd0d29ba..6a25750124 100644 --- a/manifests/crds/cluster-analysis-template-crd.yaml +++ b/manifests/crds/cluster-analysis-template-crd.yaml @@ -73,6 +73,19 @@ spec: - metricName type: object type: array + measurementRetention: + items: + properties: + limit: + format: int32 + type: integer + metricName: + type: string + required: + - limit + - metricName + type: object + type: array metrics: items: properties: diff --git a/manifests/install.yaml b/manifests/install.yaml index ac1f78a792..2ffd3b6ac9 100644 --- a/manifests/install.yaml +++ b/manifests/install.yaml @@ -78,6 +78,19 @@ spec: - metricName type: object type: array + measurementRetention: + items: + properties: + limit: + format: int32 + type: integer + metricName: + type: string + required: + - limit + - metricName + type: object + type: array metrics: items: properties: @@ -2815,6 +2828,19 @@ spec: - metricName type: object type: array + measurementRetention: + items: + properties: + limit: + format: int32 + type: integer + metricName: + type: string + required: + - limit + - metricName + type: object + type: array metrics: items: properties: @@ -5442,6 +5468,19 @@ spec: - metricName type: object type: array + measurementRetention: + items: + properties: + limit: + format: int32 + type: integer + metricName: + type: string + required: + - limit + - metricName + type: object + type: array metrics: items: properties: diff --git a/manifests/namespace-install.yaml b/manifests/namespace-install.yaml index 05ff7bc8d7..1963a41218 100644 --- a/manifests/namespace-install.yaml +++ b/manifests/namespace-install.yaml @@ -78,6 +78,19 @@ spec: - metricName type: object type: array + measurementRetention: + items: + properties: + limit: + format: int32 + type: integer + metricName: + type: string + required: + - limit + - metricName + type: object + type: array metrics: items: properties: @@ -2815,6 +2828,19 @@ spec: - metricName type: object type: array + measurementRetention: + items: + properties: + limit: + format: int32 + type: integer + metricName: + type: string + required: + - limit + - metricName + type: object + type: array metrics: items: properties: @@ -5442,6 +5468,19 @@ spec: - metricName type: object type: array + measurementRetention: + items: + properties: + limit: + format: int32 + type: integer + metricName: + type: string + required: + - limit + - metricName + type: object + type: array metrics: items: properties: diff --git a/pkg/apis/api-rules/violation_exceptions.list b/pkg/apis/api-rules/violation_exceptions.list index a761bbfaf0..a4930858ab 100644 --- a/pkg/apis/api-rules/violation_exceptions.list +++ b/pkg/apis/api-rules/violation_exceptions.list @@ -1,10 +1,12 @@ API rule violation: list_type_missing,github.com/argoproj/argo-rollouts/pkg/apis/rollouts/v1alpha1,AmbassadorTrafficRouting,Mappings API rule violation: list_type_missing,github.com/argoproj/argo-rollouts/pkg/apis/rollouts/v1alpha1,AnalysisRunSpec,Args API rule violation: list_type_missing,github.com/argoproj/argo-rollouts/pkg/apis/rollouts/v1alpha1,AnalysisRunSpec,DryRun +API rule violation: list_type_missing,github.com/argoproj/argo-rollouts/pkg/apis/rollouts/v1alpha1,AnalysisRunSpec,MeasurementRetention API rule violation: list_type_missing,github.com/argoproj/argo-rollouts/pkg/apis/rollouts/v1alpha1,AnalysisRunSpec,Metrics API rule violation: list_type_missing,github.com/argoproj/argo-rollouts/pkg/apis/rollouts/v1alpha1,AnalysisRunStatus,MetricResults API rule violation: list_type_missing,github.com/argoproj/argo-rollouts/pkg/apis/rollouts/v1alpha1,AnalysisTemplateSpec,Args API rule violation: list_type_missing,github.com/argoproj/argo-rollouts/pkg/apis/rollouts/v1alpha1,AnalysisTemplateSpec,DryRun +API rule violation: list_type_missing,github.com/argoproj/argo-rollouts/pkg/apis/rollouts/v1alpha1,AnalysisTemplateSpec,MeasurementRetention API rule violation: list_type_missing,github.com/argoproj/argo-rollouts/pkg/apis/rollouts/v1alpha1,AnalysisTemplateSpec,Metrics API rule violation: list_type_missing,github.com/argoproj/argo-rollouts/pkg/apis/rollouts/v1alpha1,CanaryStrategy,Steps API rule violation: list_type_missing,github.com/argoproj/argo-rollouts/pkg/apis/rollouts/v1alpha1,CloudWatchMetric,MetricDataQueries diff --git a/pkg/apis/rollouts/v1alpha1/analysis_types.go b/pkg/apis/rollouts/v1alpha1/analysis_types.go index 12dd37970d..587ee90963 100644 --- a/pkg/apis/rollouts/v1alpha1/analysis_types.go +++ b/pkg/apis/rollouts/v1alpha1/analysis_types.go @@ -66,6 +66,11 @@ type AnalysisTemplateSpec struct { // +patchStrategy=merge // +optional DryRun []DryRun `json:"dryRun,omitempty" patchStrategy:"merge" patchMergeKey:"metricName" protobuf:"bytes,3,rep,name=dryRun"` + // MeasurementRetention object contains the settings for retaining the number of measurements during the analysis + // +patchMergeKey=metricName + // +patchStrategy=merge + // +optional + MeasurementRetention []MeasurementRetention `json:"measurementRetention,omitempty" patchStrategy:"merge" patchMergeKey:"metricName" protobuf:"bytes,4,rep,name=measurementRetention"` } // DurationString is a string representing a duration (e.g. 30s, 5m, 1h) @@ -120,6 +125,14 @@ type DryRun struct { MetricName string `json:"metricName" protobuf:"bytes,1,opt,name=metricName"` } +// MeasurementRetention defines the settings for retaining the number of measurements during the analysis. +type MeasurementRetention struct { + // MetricName is the name of the metric on which this retention policy should be applied. + MetricName string `json:"metricName" protobuf:"bytes,1,opt,name=metricName"` + // Limit is the maximum number of measurements to be retained for this given metric. + Limit int `json:"limit" protobuf:"varint,2,opt,name=limit"` +} + // EffectiveCount is the effective count based on whether or not count/interval is specified // If neither count or interval is specified, the effective count is 1 // If only interval is specified, metric runs indefinitely and there is no effective count (nil) @@ -292,6 +305,11 @@ type AnalysisRunSpec struct { // +patchStrategy=merge // +optional DryRun []DryRun `json:"dryRun,omitempty" patchStrategy:"merge" patchMergeKey:"metricName" protobuf:"bytes,4,rep,name=dryRun"` + // MeasurementRetention object contains the settings for retaining the number of measurements during the analysis + // +patchMergeKey=metricName + // +patchStrategy=merge + // +optional + MeasurementRetention []MeasurementRetention `json:"measurementRetention,omitempty" patchStrategy:"merge" patchMergeKey:"metricName" protobuf:"bytes,4,rep,name=measurementRetention"` } // Argument is an argument to an AnalysisRun diff --git a/pkg/apis/rollouts/v1alpha1/zz_generated.deepcopy.go b/pkg/apis/rollouts/v1alpha1/zz_generated.deepcopy.go index 0a048cbd5c..e7ff0ab8d5 100644 --- a/pkg/apis/rollouts/v1alpha1/zz_generated.deepcopy.go +++ b/pkg/apis/rollouts/v1alpha1/zz_generated.deepcopy.go @@ -191,6 +191,11 @@ func (in *AnalysisRunSpec) DeepCopyInto(out *AnalysisRunSpec) { *out = make([]DryRun, len(*in)) copy(*out, *in) } + if in.MeasurementRetention != nil { + in, out := &in.MeasurementRetention, &out.MeasurementRetention + *out = make([]MeasurementRetention, len(*in)) + copy(*out, *in) + } return } @@ -345,6 +350,11 @@ func (in *AnalysisTemplateSpec) DeepCopyInto(out *AnalysisTemplateSpec) { *out = make([]DryRun, len(*in)) copy(*out, *in) } + if in.MeasurementRetention != nil { + in, out := &in.MeasurementRetention, &out.MeasurementRetention + *out = make([]MeasurementRetention, len(*in)) + copy(*out, *in) + } return } @@ -1330,6 +1340,22 @@ func (in *Measurement) DeepCopy() *Measurement { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *MeasurementRetention) DeepCopyInto(out *MeasurementRetention) { + *out = *in + return +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new MeasurementRetention. +func (in *MeasurementRetention) DeepCopy() *MeasurementRetention { + if in == nil { + return nil + } + out := new(MeasurementRetention) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *Metric) DeepCopyInto(out *Metric) { *out = *in diff --git a/utils/analysis/helpers.go b/utils/analysis/helpers.go index bf9a70e70f..cb25dabcac 100644 --- a/utils/analysis/helpers.go +++ b/utils/analysis/helpers.go @@ -93,6 +93,28 @@ func IsTerminating(run *v1alpha1.AnalysisRun) bool { return false } +// GetMeasurementRetentionMetrics returns an array of metric names matching the RegEx rules from the MeasurementRetention rules. +func GetMeasurementRetentionMetrics(measurementRetentionMetrics []v1alpha1.MeasurementRetention, metrics []v1alpha1.Metric) (map[string]*v1alpha1.MeasurementRetention, error) { + metricsMap := make(map[string]*v1alpha1.MeasurementRetention) + if len(measurementRetentionMetrics) == 0 { + return metricsMap, nil + } + // Iterate all the rules in `measurementRetentionMetrics` and try to match the `metrics` one by one + for index, measurementRetentionObject := range measurementRetentionMetrics { + matchCount := 0 + for _, metric := range metrics { + if matched, _ := regexp.MatchString(measurementRetentionObject.MetricName, metric.Name); matched { + metricsMap[metric.Name] = &measurementRetentionObject + matchCount++ + } + } + if matchCount < 1 { + return metricsMap, fmt.Errorf("measurementRetention[%d]: Rule didn't match any metric name(s)", index) + } + } + return metricsMap, nil +} + // GetDryRunMetrics returns an array of metric names matching the RegEx rules from the Dry-Run metrics. func GetDryRunMetrics(dryRunMetrics []v1alpha1.DryRun, metrics []v1alpha1.Metric) (map[string]bool, error) { metricsMap := make(map[string]bool) diff --git a/utils/analysis/helpers_test.go b/utils/analysis/helpers_test.go index 5a517afb90..3d73dcb061 100644 --- a/utils/analysis/helpers_test.go +++ b/utils/analysis/helpers_test.go @@ -530,7 +530,7 @@ func TestNewAnalysisRunFromTemplates(t *testing.T) { }, }} - clustertemplates := []*v1alpha1.ClusterAnalysisTemplate{} + var clusterTemplates []*v1alpha1.ClusterAnalysisTemplate arg := v1alpha1.Argument{ Name: "my-arg", @@ -547,7 +547,7 @@ func TestNewAnalysisRunFromTemplates(t *testing.T) { } args := []v1alpha1.Argument{arg, secretArg} - run, err := NewAnalysisRunFromTemplates(templates, clustertemplates, args, []v1alpha1.DryRun{}, "foo-run", "foo-run-generate-", "my-ns") + run, err := NewAnalysisRunFromTemplates(templates, clusterTemplates, args, []v1alpha1.DryRun{}, "foo-run", "foo-run-generate-", "my-ns") assert.NoError(t, err) assert.Equal(t, "foo-run", run.Name) assert.Equal(t, "foo-run-generate-", run.GenerateName) @@ -560,7 +560,7 @@ func TestNewAnalysisRunFromTemplates(t *testing.T) { // Fail Merge Args unresolvedArg := v1alpha1.Argument{Name: "unresolved"} templates[0].Spec.Args = append(templates[0].Spec.Args, unresolvedArg) - run, err = NewAnalysisRunFromTemplates(templates, clustertemplates, args, []v1alpha1.DryRun{}, "foo-run", "foo-run-generate-", "my-ns") + run, err = NewAnalysisRunFromTemplates(templates, clusterTemplates, args, []v1alpha1.DryRun{}, "foo-run", "foo-run-generate-", "my-ns") assert.Nil(t, run) assert.Equal(t, fmt.Errorf("args.unresolved was not resolved"), err) // Fail flatten metric @@ -573,7 +573,7 @@ func TestNewAnalysisRunFromTemplates(t *testing.T) { } // Fail Flatten Templates templates = append(templates, matchingMetric) - run, err = NewAnalysisRunFromTemplates(templates, clustertemplates, args, []v1alpha1.DryRun{}, "foo-run", "foo-run-generate-", "my-ns") + run, err = NewAnalysisRunFromTemplates(templates, clusterTemplates, args, []v1alpha1.DryRun{}, "foo-run", "foo-run-generate-", "my-ns") assert.Nil(t, run) assert.Equal(t, fmt.Errorf("two metrics have the same name 'success-rate'"), err) } @@ -897,3 +897,89 @@ func TestGetDryRunMetrics(t *testing.T) { assert.Equal(t, len(dryRunMetricNamesMap), 0) }) } + +func TestGetMeasurementRetentionMetrics(t *testing.T) { + t.Run("GetMeasurementRetentionMetrics returns the metric names map", func(t *testing.T) { + failureLimit := intstr.FromInt(2) + count := intstr.FromInt(1) + spec := v1alpha1.AnalysisTemplateSpec{ + Metrics: []v1alpha1.Metric{ + { + Name: "success-rate", + Count: &count, + FailureLimit: &failureLimit, + Provider: v1alpha1.MetricProvider{ + Prometheus: &v1alpha1.PrometheusMetric{}, + }, + }, + }, + MeasurementRetention: []v1alpha1.MeasurementRetention{ + { + MetricName: "success-rate", + Limit: 10, + }, + }, + } + measurementRetentionMetricNamesMap, err := GetMeasurementRetentionMetrics(spec.MeasurementRetention, spec.Metrics) + assert.Nil(t, err) + assert.NotNil(t, measurementRetentionMetricNamesMap["success-rate"]) + }) + t.Run("GetMeasurementRetentionMetrics handles the RegEx rules", func(t *testing.T) { + failureLimit := intstr.FromInt(2) + count := intstr.FromInt(1) + spec := v1alpha1.AnalysisTemplateSpec{ + Metrics: []v1alpha1.Metric{ + { + Name: "success-rate", + Count: &count, + FailureLimit: &failureLimit, + Provider: v1alpha1.MetricProvider{ + Prometheus: &v1alpha1.PrometheusMetric{}, + }, + }, + { + Name: "error-rate", + Count: &count, + FailureLimit: &failureLimit, + Provider: v1alpha1.MetricProvider{ + Prometheus: &v1alpha1.PrometheusMetric{}, + }, + }, + }, + MeasurementRetention: []v1alpha1.MeasurementRetention{ + { + MetricName: ".*", + Limit: 15, + }, + }, + } + measurementRetentionMetricNamesMap, err := GetMeasurementRetentionMetrics(spec.MeasurementRetention, spec.Metrics) + assert.Nil(t, err) + assert.Equal(t, len(measurementRetentionMetricNamesMap), 2) + }) + t.Run("GetMeasurementRetentionMetrics throw error when a rule doesn't get matched", func(t *testing.T) { + failureLimit := intstr.FromInt(2) + count := intstr.FromInt(1) + spec := v1alpha1.AnalysisTemplateSpec{ + Metrics: []v1alpha1.Metric{ + { + Name: "success-rate", + Count: &count, + FailureLimit: &failureLimit, + Provider: v1alpha1.MetricProvider{ + Prometheus: &v1alpha1.PrometheusMetric{}, + }, + }, + }, + MeasurementRetention: []v1alpha1.MeasurementRetention{ + { + MetricName: "error-rate", + Limit: 11, + }, + }, + } + measurementRetentionMetricNamesMap, err := GetMeasurementRetentionMetrics(spec.MeasurementRetention, spec.Metrics) + assert.EqualError(t, err, "measurementRetention[0]: Rule didn't match any metric name(s)") + assert.Equal(t, len(measurementRetentionMetricNamesMap), 0) + }) +}