From fba91bb1240c3b239910a75eed781f4d044d8d5b Mon Sep 17 00:00:00 2001 From: awgreene Date: Wed, 30 Oct 2019 12:47:00 -0400 Subject: [PATCH] feat(metrics) Limit Cardinality of CSV metrics This commit introduces a change that limits the number of metrics that an OLM cluster reports at any given time for a CSV. The first metric introduced is called csv_up, which tracks CSVs that have reached the succeeded phase. The following information is provided about the CSV via labels: name, version. The value of this metric will always be 0 or 1. The second metric introduced is called csv_abnormal, which is reported whenever the CSV is updated and has not reached the succeeded phase. The following information is provided about the CSV via labels: name, version, phase, reason. Whenever a CSV is updated, the existing timeseries is deleted and replaced by an updated version. --- pkg/controller/operators/olm/operator.go | 4 +-- pkg/metrics/metrics.go | 39 ++++++++++++++++++------ 2 files changed, 31 insertions(+), 12 deletions(-) diff --git a/pkg/controller/operators/olm/operator.go b/pkg/controller/operators/olm/operator.go index d1c0b43dabc..7165bb5b3ad 100644 --- a/pkg/controller/operators/olm/operator.go +++ b/pkg/controller/operators/olm/operator.go @@ -930,8 +930,6 @@ func (a *Operator) syncClusterServiceVersion(obj interface{}) (syncError error) }) logger.Debug("syncing CSV") - metrics.EmitCSVMetric(clusterServiceVersion) - if a.csvNotification != nil { a.csvNotification.OnAddOrUpdate(clusterServiceVersion) } @@ -964,6 +962,8 @@ func (a *Operator) syncClusterServiceVersion(obj interface{}) (syncError error) } else { syncError = fmt.Errorf("error transitioning ClusterServiceVersion: %s and error updating CSV status: %s", syncError, updateErr) } + } else { + metrics.EmitCSVMetric(clusterServiceVersion, outCSV) } } diff --git a/pkg/metrics/metrics.go b/pkg/metrics/metrics.go index e401cf7c751..b5a0037da0a 100644 --- a/pkg/metrics/metrics.go +++ b/pkg/metrics/metrics.go @@ -5,17 +5,16 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/labels" + olmv1alpha1 "github.com/operator-framework/operator-lifecycle-manager/pkg/api/apis/operators/v1alpha1" "github.com/operator-framework/operator-lifecycle-manager/pkg/api/client/clientset/versioned" v1alpha1 "github.com/operator-framework/operator-lifecycle-manager/pkg/api/client/listers/operators/v1alpha1" - olmv1alpha1 "github.com/operator-framework/operator-lifecycle-manager/pkg/api/apis/operators/v1alpha1" - ) const ( NAME_LABEL = "name" INSTALLED_LABEL = "installed" VERSION_LABEL = "version" - PHASE_LABEL = "phase" + PHASE_LABEL = "phase" REASON_LABEL = "reason" ) @@ -151,10 +150,18 @@ var ( []string{NAME_LABEL, INSTALLED_LABEL}, ) - csvSyncCounter = prometheus.NewCounterVec( - prometheus.CounterOpts{ - Name: "csv_sync_total", - Help: "Monotonic count of CSV syncs", + csvSucceeded = prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Name: "csv_up", + Help: "Successful CSV Install", + }, + []string{NAME_LABEL, VERSION_LABEL}, + ) + + csvAbnormal = prometheus.NewGaugeVec( + prometheus.GaugeOpts{ + Name: "csv_abnormal", + Help: "The current state of a CSV not in the succeeded phase", }, []string{NAME_LABEL, VERSION_LABEL, PHASE_LABEL, REASON_LABEL}, ) @@ -162,7 +169,8 @@ var ( func RegisterOLM() { prometheus.MustRegister(csvCount) - prometheus.MustRegister(csvSyncCounter) + prometheus.MustRegister(csvSucceeded) + prometheus.MustRegister(csvAbnormal) prometheus.MustRegister(CSVUpgradeCount) } @@ -177,6 +185,17 @@ func CounterForSubscription(name, installedCSV string) prometheus.Counter { return SubscriptionSyncCount.WithLabelValues(name, installedCSV) } -func EmitCSVMetric(csv *olmv1alpha1.ClusterServiceVersion){ - csvSyncCounter.WithLabelValues(csv.Name, csv.Spec.Version.String(), string(csv.Status.Phase), string(csv.Status.Reason)).Inc() +func EmitCSVMetric(oldCSV *olmv1alpha1.ClusterServiceVersion, newCSV *olmv1alpha1.ClusterServiceVersion) { + // Delete the old CSV metrics + csvAbnormal.DeleteLabelValues(oldCSV.Name, oldCSV.Spec.Version.String(), string(oldCSV.Status.Phase), string(oldCSV.Status.Reason)) + + // Get the phase of the new CSV + newCSVPhase := string(newCSV.Status.Phase) + csvSucceededGauge := csvSucceeded.WithLabelValues(newCSV.Name, newCSV.Spec.Version.String()) + if newCSVPhase == string(olmv1alpha1.CSVPhaseSucceeded) { + csvSucceededGauge.Set(1) + } else { + csvSucceededGauge.Set(0) + csvAbnormal.WithLabelValues(newCSV.Name, newCSV.Spec.Version.String(), string(newCSV.Status.Phase), string(newCSV.Status.Reason)).Set(1) + } }