Skip to content

Commit

Permalink
feat(analysis): Add Measurements Retention Limit Option for Metrics
Browse files Browse the repository at this point in the history
Signed-off-by: Rohit Agrawal <[email protected]>
  • Loading branch information
agrawroh committed Dec 23, 2021
1 parent a3477cf commit 2c7071c
Show file tree
Hide file tree
Showing 12 changed files with 323 additions and 11 deletions.
25 changes: 20 additions & 5 deletions analysis/analysis.go
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,16 @@ func (c *Controller) reconcileAnalysisRun(origRun *v1alpha1.AnalysisRun) *v1alph
return run
}

measurementRetentionMetricsMap, err := analysisutil.GetMeasurementRetentionMetrics(run.Spec.MeasurementRetention, resolvedMetrics)
if err != nil {
message := fmt.Sprintf("Analysis spec invalid: %v", err)
logger.Warn(message)
run.Status.Phase = v1alpha1.AnalysisPhaseError
run.Status.Message = message
c.recordAnalysisRunCompletionEvent(run)
return run
}

tasks := generateMetricTasks(run, resolvedMetrics)
logger.Infof("Taking %d Measurement(s)...", len(tasks))
err = c.runMeasurements(run, tasks, dryRunMetricsMap)
Expand All @@ -101,7 +111,7 @@ func (c *Controller) reconcileAnalysisRun(origRun *v1alpha1.AnalysisRun) *v1alph
}
}

err = c.garbageCollectMeasurements(run, DefaultMeasurementHistoryLimit)
err = c.garbageCollectMeasurements(run, measurementRetentionMetricsMap, DefaultMeasurementHistoryLimit)
if err != nil {
// TODO(jessesuen): surface errors to controller so they can be retried
logger.Warnf("Failed to garbage collect measurements: %v", err)
Expand Down Expand Up @@ -693,7 +703,7 @@ func calculateNextReconcileTime(run *v1alpha1.AnalysisRun, metrics []v1alpha1.Me
}

// garbageCollectMeasurements trims the measurement history to the specified limit and GCs old measurements
func (c *Controller) garbageCollectMeasurements(run *v1alpha1.AnalysisRun, limit int) error {
func (c *Controller) garbageCollectMeasurements(run *v1alpha1.AnalysisRun, measurementRetentionMetricNamesMap map[string]*v1alpha1.MeasurementRetention, limit int) error {
var errors []error

metricsByName := make(map[string]v1alpha1.Metric)
Expand All @@ -703,7 +713,12 @@ func (c *Controller) garbageCollectMeasurements(run *v1alpha1.AnalysisRun, limit

for i, result := range run.Status.MetricResults {
length := len(result.Measurements)
if length > limit {
measurementRetentionObject := measurementRetentionMetricNamesMap[result.Name]
measurementsLimit := limit
if measurementRetentionObject != nil && measurementRetentionObject.Limit > 0 {
measurementsLimit = measurementRetentionObject.Limit
}
if length > measurementsLimit {
metric, ok := metricsByName[result.Name]
if !ok {
continue
Expand All @@ -714,11 +729,11 @@ func (c *Controller) garbageCollectMeasurements(run *v1alpha1.AnalysisRun, limit
errors = append(errors, err)
continue
}
err = provider.GarbageCollect(run, metric, limit)
err = provider.GarbageCollect(run, metric, measurementsLimit)
if err != nil {
return err
}
result.Measurements = result.Measurements[length-limit : length]
result.Measurements = result.Measurements[length-measurementsLimit : length]
}
run.Status.MetricResults[i] = result
}
Expand Down
30 changes: 28 additions & 2 deletions analysis/analysis_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1055,7 +1055,8 @@ func TestTrimMeasurementHistory(t *testing.T) {

{
run := newRun()
c.garbageCollectMeasurements(run, 2)
err := c.garbageCollectMeasurements(run, map[string]*v1alpha1.MeasurementRetention{}, 2)
assert.Nil(t, err)
assert.Len(t, run.Status.MetricResults[0].Measurements, 1)
assert.Equal(t, "1", run.Status.MetricResults[0].Measurements[0].Value)
assert.Len(t, run.Status.MetricResults[1].Measurements, 2)
Expand All @@ -1064,12 +1065,37 @@ func TestTrimMeasurementHistory(t *testing.T) {
}
{
run := newRun()
c.garbageCollectMeasurements(run, 1)
err := c.garbageCollectMeasurements(run, map[string]*v1alpha1.MeasurementRetention{}, 1)
assert.Nil(t, err)
assert.Len(t, run.Status.MetricResults[0].Measurements, 1)
assert.Equal(t, "1", run.Status.MetricResults[0].Measurements[0].Value)
assert.Len(t, run.Status.MetricResults[1].Measurements, 1)
assert.Equal(t, "3", run.Status.MetricResults[1].Measurements[0].Value)
}
{
run := newRun()
var measurementRetentionMetricsMap = map[string]*v1alpha1.MeasurementRetention{}
measurementRetentionMetricsMap["metric2"] = &v1alpha1.MeasurementRetention{MetricName: "*", Limit: 2}
err := c.garbageCollectMeasurements(run, measurementRetentionMetricsMap, 1)
assert.Nil(t, err)
assert.Len(t, run.Status.MetricResults[0].Measurements, 1)
assert.Equal(t, "1", run.Status.MetricResults[0].Measurements[0].Value)
assert.Len(t, run.Status.MetricResults[1].Measurements, 2)
assert.Equal(t, "2", run.Status.MetricResults[1].Measurements[0].Value)
assert.Equal(t, "3", run.Status.MetricResults[1].Measurements[1].Value)
}
{
run := newRun()
var measurementRetentionMetricsMap = map[string]*v1alpha1.MeasurementRetention{}
measurementRetentionMetricsMap["metric2"] = &v1alpha1.MeasurementRetention{MetricName: "metric2", Limit: 2}
err := c.garbageCollectMeasurements(run, measurementRetentionMetricsMap, 1)
assert.Nil(t, err)
assert.Len(t, run.Status.MetricResults[0].Measurements, 1)
assert.Equal(t, "1", run.Status.MetricResults[0].Measurements[0].Value)
assert.Len(t, run.Status.MetricResults[1].Measurements, 2)
assert.Equal(t, "2", run.Status.MetricResults[1].Measurements[0].Value)
assert.Equal(t, "3", run.Status.MetricResults[1].Measurements[1].Value)
}
}

func TestResolveMetricArgsUnableToSubstitute(t *testing.T) {
Expand Down
13 changes: 13 additions & 0 deletions manifests/crds/analysis-run-crd.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,19 @@ spec:
- metricName
type: object
type: array
measurementRetention:
items:
properties:
limit:
format: int32
type: integer
metricName:
type: string
required:
- limit
- metricName
type: object
type: array
metrics:
items:
properties:
Expand Down
13 changes: 13 additions & 0 deletions manifests/crds/analysis-template-crd.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,19 @@ spec:
- metricName
type: object
type: array
measurementRetention:
items:
properties:
limit:
format: int32
type: integer
metricName:
type: string
required:
- limit
- metricName
type: object
type: array
metrics:
items:
properties:
Expand Down
13 changes: 13 additions & 0 deletions manifests/crds/cluster-analysis-template-crd.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,19 @@ spec:
- metricName
type: object
type: array
measurementRetention:
items:
properties:
limit:
format: int32
type: integer
metricName:
type: string
required:
- limit
- metricName
type: object
type: array
metrics:
items:
properties:
Expand Down
39 changes: 39 additions & 0 deletions manifests/install.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,19 @@ spec:
- metricName
type: object
type: array
measurementRetention:
items:
properties:
limit:
format: int32
type: integer
metricName:
type: string
required:
- limit
- metricName
type: object
type: array
metrics:
items:
properties:
Expand Down Expand Up @@ -2815,6 +2828,19 @@ spec:
- metricName
type: object
type: array
measurementRetention:
items:
properties:
limit:
format: int32
type: integer
metricName:
type: string
required:
- limit
- metricName
type: object
type: array
metrics:
items:
properties:
Expand Down Expand Up @@ -5442,6 +5468,19 @@ spec:
- metricName
type: object
type: array
measurementRetention:
items:
properties:
limit:
format: int32
type: integer
metricName:
type: string
required:
- limit
- metricName
type: object
type: array
metrics:
items:
properties:
Expand Down
39 changes: 39 additions & 0 deletions manifests/namespace-install.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,19 @@ spec:
- metricName
type: object
type: array
measurementRetention:
items:
properties:
limit:
format: int32
type: integer
metricName:
type: string
required:
- limit
- metricName
type: object
type: array
metrics:
items:
properties:
Expand Down Expand Up @@ -2815,6 +2828,19 @@ spec:
- metricName
type: object
type: array
measurementRetention:
items:
properties:
limit:
format: int32
type: integer
metricName:
type: string
required:
- limit
- metricName
type: object
type: array
metrics:
items:
properties:
Expand Down Expand Up @@ -5442,6 +5468,19 @@ spec:
- metricName
type: object
type: array
measurementRetention:
items:
properties:
limit:
format: int32
type: integer
metricName:
type: string
required:
- limit
- metricName
type: object
type: array
metrics:
items:
properties:
Expand Down
2 changes: 2 additions & 0 deletions pkg/apis/api-rules/violation_exceptions.list
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
API rule violation: list_type_missing,github.com/argoproj/argo-rollouts/pkg/apis/rollouts/v1alpha1,AmbassadorTrafficRouting,Mappings
API rule violation: list_type_missing,github.com/argoproj/argo-rollouts/pkg/apis/rollouts/v1alpha1,AnalysisRunSpec,Args
API rule violation: list_type_missing,github.com/argoproj/argo-rollouts/pkg/apis/rollouts/v1alpha1,AnalysisRunSpec,DryRun
API rule violation: list_type_missing,github.com/argoproj/argo-rollouts/pkg/apis/rollouts/v1alpha1,AnalysisRunSpec,MeasurementRetention
API rule violation: list_type_missing,github.com/argoproj/argo-rollouts/pkg/apis/rollouts/v1alpha1,AnalysisRunSpec,Metrics
API rule violation: list_type_missing,github.com/argoproj/argo-rollouts/pkg/apis/rollouts/v1alpha1,AnalysisRunStatus,MetricResults
API rule violation: list_type_missing,github.com/argoproj/argo-rollouts/pkg/apis/rollouts/v1alpha1,AnalysisTemplateSpec,Args
API rule violation: list_type_missing,github.com/argoproj/argo-rollouts/pkg/apis/rollouts/v1alpha1,AnalysisTemplateSpec,DryRun
API rule violation: list_type_missing,github.com/argoproj/argo-rollouts/pkg/apis/rollouts/v1alpha1,AnalysisTemplateSpec,MeasurementRetention
API rule violation: list_type_missing,github.com/argoproj/argo-rollouts/pkg/apis/rollouts/v1alpha1,AnalysisTemplateSpec,Metrics
API rule violation: list_type_missing,github.com/argoproj/argo-rollouts/pkg/apis/rollouts/v1alpha1,CanaryStrategy,Steps
API rule violation: list_type_missing,github.com/argoproj/argo-rollouts/pkg/apis/rollouts/v1alpha1,CloudWatchMetric,MetricDataQueries
Expand Down
18 changes: 18 additions & 0 deletions pkg/apis/rollouts/v1alpha1/analysis_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,11 @@ type AnalysisTemplateSpec struct {
// +patchStrategy=merge
// +optional
DryRun []DryRun `json:"dryRun,omitempty" patchStrategy:"merge" patchMergeKey:"metricName" protobuf:"bytes,3,rep,name=dryRun"`
// MeasurementRetention object contains the settings for retaining the number of measurements during the analysis
// +patchMergeKey=metricName
// +patchStrategy=merge
// +optional
MeasurementRetention []MeasurementRetention `json:"measurementRetention,omitempty" patchStrategy:"merge" patchMergeKey:"metricName" protobuf:"bytes,4,rep,name=measurementRetention"`
}

// DurationString is a string representing a duration (e.g. 30s, 5m, 1h)
Expand Down Expand Up @@ -120,6 +125,14 @@ type DryRun struct {
MetricName string `json:"metricName" protobuf:"bytes,1,opt,name=metricName"`
}

// MeasurementRetention defines the settings for retaining the number of measurements during the analysis.
type MeasurementRetention struct {
// MetricName is the name of the metric on which this retention policy should be applied.
MetricName string `json:"metricName" protobuf:"bytes,1,opt,name=metricName"`
// Limit is the maximum number of measurements to be retained for this given metric.
Limit int `json:"limit" protobuf:"varint,2,opt,name=limit"`
}

// EffectiveCount is the effective count based on whether or not count/interval is specified
// If neither count or interval is specified, the effective count is 1
// If only interval is specified, metric runs indefinitely and there is no effective count (nil)
Expand Down Expand Up @@ -292,6 +305,11 @@ type AnalysisRunSpec struct {
// +patchStrategy=merge
// +optional
DryRun []DryRun `json:"dryRun,omitempty" patchStrategy:"merge" patchMergeKey:"metricName" protobuf:"bytes,4,rep,name=dryRun"`
// MeasurementRetention object contains the settings for retaining the number of measurements during the analysis
// +patchMergeKey=metricName
// +patchStrategy=merge
// +optional
MeasurementRetention []MeasurementRetention `json:"measurementRetention,omitempty" patchStrategy:"merge" patchMergeKey:"metricName" protobuf:"bytes,4,rep,name=measurementRetention"`
}

// Argument is an argument to an AnalysisRun
Expand Down
26 changes: 26 additions & 0 deletions pkg/apis/rollouts/v1alpha1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit 2c7071c

Please sign in to comment.