diff --git a/cmd/olm/main.go b/cmd/olm/main.go index 0577d99e0d..8ef1a06fdc 100644 --- a/cmd/olm/main.go +++ b/cmd/olm/main.go @@ -8,6 +8,7 @@ import ( "strings" "time" + "github.com/prometheus/client_golang/prometheus" log "github.com/sirupsen/logrus" "github.com/operator-framework/operator-lifecycle-manager/pkg/api/client" @@ -15,6 +16,7 @@ import ( "github.com/operator-framework/operator-lifecycle-manager/pkg/controller/operators/olm" "github.com/operator-framework/operator-lifecycle-manager/pkg/lib/operatorclient" "github.com/operator-framework/operator-lifecycle-manager/pkg/lib/signals" + "github.com/operator-framework/operator-lifecycle-manager/pkg/metrics" olmversion "github.com/operator-framework/operator-lifecycle-manager/pkg/version" ) @@ -54,6 +56,10 @@ var ( version = flag.Bool("version", false, "displays olm version") ) +func init() { + metrics.Register() +} + // main function - entrypoint to ALM operator func main() { stopCh := signals.SetupSignalHandler() @@ -109,6 +115,10 @@ func main() { http.HandleFunc("/healthz", func(w http.ResponseWriter, r *http.Request) { w.WriteHeader(http.StatusOK) }) + // TODO: both of the following require vendor updates (add k8s.io/apiserver and update prometheus) + //healthz.InstallHandler(mux) //(less code) + //mux.Handle("/metrics", promhttp.Handler()) //other form is deprecated + http.Handle("/metrics", prometheus.Handler()) go http.ListenAndServe(":8080", nil) operator.Run(stopCh) diff --git a/pkg/controller/operators/catalog/operator.go b/pkg/controller/operators/catalog/operator.go index a432772f8f..40f1e38f7d 100644 --- a/pkg/controller/operators/catalog/operator.go +++ b/pkg/controller/operators/catalog/operator.go @@ -9,6 +9,7 @@ import ( "time" olmerrors "github.com/operator-framework/operator-lifecycle-manager/pkg/controller/errors" + "github.com/operator-framework/operator-lifecycle-manager/pkg/metrics" log "github.com/sirupsen/logrus" "k8s.io/api/core/v1" rbacv1 "k8s.io/api/rbac/v1" @@ -102,6 +103,7 @@ func NewOperator(kubeconfigPath string, wakeupInterval time.Duration, operatorNa op.syncCatalogSources, nil, "catsrc", + metrics.NewMetricsCatalogSource(op.Operator.OpClient), ) for _, informer := range catsrcQueueInformer { op.RegisterQueueInformer(informer) @@ -115,6 +117,7 @@ func NewOperator(kubeconfigPath string, wakeupInterval time.Duration, operatorNa op.syncInstallPlans, nil, "installplan", + metrics.NewMetricsInstallPlan(op.Operator.OpClient), ) for _, informer := range ipQueueInformers { op.RegisterQueueInformer(informer) @@ -128,6 +131,7 @@ func NewOperator(kubeconfigPath string, wakeupInterval time.Duration, operatorNa op.syncSubscriptions, nil, "subscription", + metrics.NewMetricsSubscription(op.Operator.OpClient), ) op.subQueue = subscriptionQueue for _, informer := range subscriptionQueueInformers { diff --git a/pkg/controller/operators/olm/operator.go b/pkg/controller/operators/olm/operator.go index 110a64cd40..6e4b2b4e07 100644 --- a/pkg/controller/operators/olm/operator.go +++ b/pkg/controller/operators/olm/operator.go @@ -21,6 +21,7 @@ import ( "github.com/operator-framework/operator-lifecycle-manager/pkg/lib/operatorclient" "github.com/operator-framework/operator-lifecycle-manager/pkg/lib/ownerutil" "github.com/operator-framework/operator-lifecycle-manager/pkg/lib/queueinformer" + "github.com/operator-framework/operator-lifecycle-manager/pkg/metrics" ) var ErrRequirementsNotMet = errors.New("requirements were not met") @@ -73,6 +74,7 @@ func NewOperator(crClient versioned.Interface, opClient operatorclient.ClientInt op.annotateNamespace, nil, "namespace", + metrics.NewMetricsNil(), ) op.RegisterQueueInformer(queueInformer) } @@ -100,6 +102,7 @@ func NewOperator(crClient versioned.Interface, opClient operatorclient.ClientInt op.syncClusterServiceVersion, nil, "csv", + metrics.NewMetricsCSV(op.Operator.OpClient), ) for _, informer := range queueInformers { op.RegisterQueueInformer(informer) @@ -121,6 +124,7 @@ func NewOperator(crClient versioned.Interface, opClient operatorclient.ClientInt op.syncDeployment, nil, "deployment", + metrics.NewMetricsNil(), ) for _, informer := range depQueueInformers { op.RegisterQueueInformer(informer) @@ -345,6 +349,7 @@ func (a *Operator) checkReplacementsAndUpdateStatus(csv *v1alpha1.ClusterService log.Infof("newer ClusterServiceVersion replacing %s, no-op", csv.SelfLink) msg := fmt.Sprintf("being replaced by csv: %s", replacement.SelfLink) csv.SetPhase(v1alpha1.CSVPhaseReplacing, v1alpha1.CSVReasonBeingReplaced, msg) + metrics.CSVUpgradeCount.Inc() return fmt.Errorf("replacing") } diff --git a/pkg/lib/queueinformer/queueinformer.go b/pkg/lib/queueinformer/queueinformer.go index 504b53da6c..6b17be906a 100644 --- a/pkg/lib/queueinformer/queueinformer.go +++ b/pkg/lib/queueinformer/queueinformer.go @@ -1,6 +1,7 @@ package queueinformer import ( + "github.com/operator-framework/operator-lifecycle-manager/pkg/metrics" log "github.com/sirupsen/logrus" "k8s.io/client-go/tools/cache" "k8s.io/client-go/util/workqueue" @@ -18,6 +19,7 @@ type QueueInformer struct { syncHandler SyncHandler resourceEventHandlerFuncs *cache.ResourceEventHandlerFuncs name string + metrics.MetricsProvider } // enqueue adds a key to the queue. If obj is a key already it gets added directly. @@ -85,22 +87,23 @@ func (q *QueueInformer) defaultResourceEventHandlerFuncs() *cache.ResourceEventH // New creates a set of new queueinformers given a name, a set of informers, and a sync handler to handle the objects // that the operator is managing. Optionally, custom event handler funcs can be passed in (defaults will be provided) -func New(queue workqueue.RateLimitingInterface, informers []cache.SharedIndexInformer, handler SyncHandler, funcs *cache.ResourceEventHandlerFuncs, name string) []*QueueInformer { +func New(queue workqueue.RateLimitingInterface, informers []cache.SharedIndexInformer, handler SyncHandler, funcs *cache.ResourceEventHandlerFuncs, name string, metrics metrics.MetricsProvider) []*QueueInformer { queueInformers := []*QueueInformer{} for _, informer := range informers { - queueInformers = append(queueInformers, NewInformer(queue, informer, handler, funcs, name)) + queueInformers = append(queueInformers, NewInformer(queue, informer, handler, funcs, name, metrics)) } return queueInformers } // NewInformer creates a new queueinformer given a name, an informer, and a sync handler to handle the objects // that the operator is managing. Optionally, custom event handler funcs can be passed in (defaults will be provided) -func NewInformer(queue workqueue.RateLimitingInterface, informer cache.SharedIndexInformer, handler SyncHandler, funcs *cache.ResourceEventHandlerFuncs, name string) *QueueInformer { +func NewInformer(queue workqueue.RateLimitingInterface, informer cache.SharedIndexInformer, handler SyncHandler, funcs *cache.ResourceEventHandlerFuncs, name string, metrics metrics.MetricsProvider) *QueueInformer { queueInformer := &QueueInformer{ - queue: queue, - informer: informer, - syncHandler: handler, - name: name, + queue: queue, + informer: informer, + syncHandler: handler, + name: name, + MetricsProvider: metrics, } if funcs == nil { queueInformer.resourceEventHandlerFuncs = queueInformer.defaultResourceEventHandlerFuncs() diff --git a/pkg/lib/queueinformer/queueinformer_operator.go b/pkg/lib/queueinformer/queueinformer_operator.go index fa08ba7e9f..3baf59a78b 100644 --- a/pkg/lib/queueinformer/queueinformer_operator.go +++ b/pkg/lib/queueinformer/queueinformer_operator.go @@ -123,6 +123,9 @@ func (o *Operator) processNextWorkItem(loop *QueueInformer) bool { return true } queue.Forget(key) + if err := loop.HandleMetrics(); err != nil { + log.Error(err) + } return true } @@ -133,7 +136,6 @@ func (o *Operator) sync(loop *QueueInformer, key string) error { if err != nil { return err } - if !exists { // For now, we ignore the case where an object used to exist but no longer does logger.Info("couldn't get from queue") diff --git a/pkg/metrics/metrics.go b/pkg/metrics/metrics.go new file mode 100644 index 0000000000..cd0d577003 --- /dev/null +++ b/pkg/metrics/metrics.go @@ -0,0 +1,140 @@ +package metrics + +import ( + "github.com/operator-framework/operator-lifecycle-manager/pkg/api/apis/operators/v1alpha1" + "github.com/operator-framework/operator-lifecycle-manager/pkg/lib/operatorclient" + "github.com/prometheus/client_golang/prometheus" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +type MetricsProvider interface { + HandleMetrics() error +} + +type metricsCSV struct { + opClient operatorclient.ClientInterface +} + +func NewMetricsCSV(opClient operatorclient.ClientInterface) MetricsProvider { + return &metricsCSV{opClient} +} + +func (m *metricsCSV) HandleMetrics() error { + cList, err := m.opClient.ListCustomResource(v1alpha1.GroupName, v1alpha1.GroupVersion, metav1.NamespaceAll, v1alpha1.ClusterServiceVersionKind) + if err != nil { + return err + } + csvCount.Set(float64(len(cList.Items))) + return nil +} + +type metricsInstallPlan struct { + opClient operatorclient.ClientInterface +} + +func NewMetricsInstallPlan(opClient operatorclient.ClientInterface) MetricsProvider { + return &metricsInstallPlan{opClient} +} + +func (m *metricsInstallPlan) HandleMetrics() error { + cList, err := m.opClient.ListCustomResource(v1alpha1.GroupName, v1alpha1.GroupVersion, metav1.NamespaceAll, v1alpha1.InstallPlanKind) + if err != nil { + return err + } + installPlanCount.Set(float64(len(cList.Items))) + return nil +} + +type metricsSubscription struct { + opClient operatorclient.ClientInterface +} + +func NewMetricsSubscription(opClient operatorclient.ClientInterface) MetricsProvider { + return &metricsSubscription{opClient} +} + +func (m *metricsSubscription) HandleMetrics() error { + cList, err := m.opClient.ListCustomResource(v1alpha1.GroupName, v1alpha1.GroupVersion, metav1.NamespaceAll, v1alpha1.SubscriptionKind) + if err != nil { + return err + } + subscriptionCount.Set(float64(len(cList.Items))) + return nil +} + +type metricsCatalogSource struct { + opClient operatorclient.ClientInterface +} + +func NewMetricsCatalogSource(opClient operatorclient.ClientInterface) MetricsProvider { + return &metricsCatalogSource{opClient} + +} + +func (m *metricsCatalogSource) HandleMetrics() error { + cList, err := m.opClient.ListCustomResource(v1alpha1.GroupName, v1alpha1.GroupVersion, metav1.NamespaceAll, v1alpha1.CatalogSourceKind) + if err != nil { + return err + } + catalogSourceCount.Set(float64(len(cList.Items))) + return nil +} + +type MetricsNil struct{} + +func NewMetricsNil() MetricsProvider { + return &MetricsNil{} +} + +func (*MetricsNil) HandleMetrics() error { + return nil +} + +// To add new metrics: +// 1. Register new metrics in Register() below. +// 2. Add appropriate metric updates in HandleMetrics (or elsewhere instead). +var ( + csvCount = prometheus.NewGauge( + prometheus.GaugeOpts{ + Name: "csv_count", + Help: "Number of CSVs successfully registered", + }, + ) + + installPlanCount = prometheus.NewGauge( + prometheus.GaugeOpts{ + Name: "install_plan_count", + Help: "Number of install plans", + }, + ) + + subscriptionCount = prometheus.NewGauge( + prometheus.GaugeOpts{ + Name: "subscription_count", + Help: "Number of subscriptions", + }, + ) + + catalogSourceCount = prometheus.NewGauge( + prometheus.GaugeOpts{ + Name: "catalog_source_count", + Help: "Number of catalog sources", + }, + ) + + // exported since it's not handled by HandleMetrics + CSVUpgradeCount = prometheus.NewCounter( + prometheus.CounterOpts{ + Name: "csv_upgrade_count", + Help: "Monotonic count of catalog sources", + }, + ) +) + +func Register() { + prometheus.MustRegister(csvCount) + prometheus.MustRegister(installPlanCount) + prometheus.MustRegister(subscriptionCount) + prometheus.MustRegister(catalogSourceCount) + prometheus.MustRegister(CSVUpgradeCount) +} diff --git a/pkg/package-server/provider/inmem.go b/pkg/package-server/provider/inmem.go index 6156f5778e..78716565c9 100644 --- a/pkg/package-server/provider/inmem.go +++ b/pkg/package-server/provider/inmem.go @@ -15,6 +15,7 @@ import ( operatorsv1alpha1 "github.com/operator-framework/operator-lifecycle-manager/pkg/api/apis/operators/v1alpha1" "github.com/operator-framework/operator-lifecycle-manager/pkg/lib/queueinformer" + "github.com/operator-framework/operator-lifecycle-manager/pkg/metrics" packagev1alpha1 "github.com/operator-framework/operator-lifecycle-manager/pkg/package-server/apis/packagemanifest/v1alpha1" ) @@ -56,6 +57,7 @@ func NewInMemoryProvider(informers []cache.SharedIndexInformer, queueOperator *q prov.syncCatalogSource, nil, "catsrc", + metrics.NewMetricsNil(), ) for _, informer := range queueInformers { prov.RegisterQueueInformer(informer) diff --git a/test/e2e/metrics_e2e_test.go b/test/e2e/metrics_e2e_test.go new file mode 100644 index 0000000000..6a924b3508 --- /dev/null +++ b/test/e2e/metrics_e2e_test.go @@ -0,0 +1,48 @@ +package e2e + +import ( + "fmt" + "testing" + + "github.com/operator-framework/operator-lifecycle-manager/pkg/lib/operatorclient" + log "github.com/sirupsen/logrus" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +// TestMetrics tests the metrics endpoint of the OLM pod. +func TestMetricsEndpoint(t *testing.T) { + c := newKubeClient(t) + + listOptions := metav1.ListOptions{LabelSelector: "app=olm-operator"} + podList, err := c.KubernetesInterface().CoreV1().Pods(e2eNamespace).List(listOptions) + if err != nil { + log.Infof("Error %v\n", err) + t.Fatalf("Listing pods failed: %v\n", err) + } + if len(podList.Items) > 1 { + t.Fatalf("Expected only 1 olm-operator pod, got %v", len(podList.Items)) + } + + podName := podList.Items[0].GetName() + + rawOutput, err := getMetricsFromPod(t, c, podName, e2eNamespace, 8080) + if err != nil { + t.Fatalf("Metrics test failed: %v\n", err) + } + + log.Debugf("Metrics:\n%v", rawOutput) +} + +func getMetricsFromPod(t *testing.T, client operatorclient.ClientInterface, podName string, namespace string, port int) (string, error) { + rawOutput, err := client.KubernetesInterface().CoreV1().RESTClient().Get(). + Namespace(namespace). + Resource("pods"). + SubResource("proxy"). + Name(fmt.Sprintf("%v:%v", podName, port)). + Suffix("metrics"). + Do().Raw() + if err != nil { + return "", err + } + return string(rawOutput), nil +} diff --git a/test/e2e/util_test.go b/test/e2e/util_test.go index 964cd694ea..f905467cff 100644 --- a/test/e2e/util_test.go +++ b/test/e2e/util_test.go @@ -45,6 +45,7 @@ var ( cleaner *namespaceCleaner testNamespace = metav1.NamespaceDefault genName = names.SimpleNameGenerator.GenerateName + e2eNamespace string persistentCatalogNames = []string{ocsConfigMap} nonPersistentCatalogsFieldSelector = createFieldNotEqualSelector("metadata.name", persistentCatalogNames...) @@ -53,7 +54,7 @@ var ( ) func init() { - e2eNamespace := os.Getenv("NAMESPACE") + e2eNamespace = os.Getenv("NAMESPACE") if e2eNamespace != "" { testNamespace = e2eNamespace }