Skip to content

Commit

Permalink
feat(olm): add metrics
Browse files Browse the repository at this point in the history
This adds a /metrics endpoint on the OLM container that exposes counts for
various OLM specific resources, which currently are:
CSVs
InstallPlans
Subscriptions
CatalogSources

And also a count for CSV upgrades.

Some of the e2e code (getMetricsFromPod) was copied from the kubernetes e2e
metrics framework.
  • Loading branch information
Jeff Peeler committed Sep 18, 2018
1 parent 248ac74 commit 2c4f3bf
Show file tree
Hide file tree
Showing 9 changed files with 224 additions and 9 deletions.
10 changes: 10 additions & 0 deletions cmd/olm/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,15 @@ import (
"strings"
"time"

"github.com/prometheus/client_golang/prometheus"
log "github.com/sirupsen/logrus"

"github.com/operator-framework/operator-lifecycle-manager/pkg/api/client"
"github.com/operator-framework/operator-lifecycle-manager/pkg/controller/install"
"github.com/operator-framework/operator-lifecycle-manager/pkg/controller/operators/olm"
"github.com/operator-framework/operator-lifecycle-manager/pkg/lib/operatorclient"
"github.com/operator-framework/operator-lifecycle-manager/pkg/lib/signals"
"github.com/operator-framework/operator-lifecycle-manager/pkg/metrics"
olmversion "github.com/operator-framework/operator-lifecycle-manager/pkg/version"
)

Expand Down Expand Up @@ -54,6 +56,10 @@ var (
version = flag.Bool("version", false, "displays olm version")
)

func init() {
metrics.Register()
}

// main function - entrypoint to ALM operator
func main() {
stopCh := signals.SetupSignalHandler()
Expand Down Expand Up @@ -109,6 +115,10 @@ func main() {
http.HandleFunc("/healthz", func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusOK)
})
// TODO: both of the following require vendor updates (add k8s.io/apiserver and update prometheus)
//healthz.InstallHandler(mux) //(less code)
//mux.Handle("/metrics", promhttp.Handler()) //other form is deprecated
http.Handle("/metrics", prometheus.Handler())
go http.ListenAndServe(":8080", nil)

operator.Run(stopCh)
Expand Down
4 changes: 4 additions & 0 deletions pkg/controller/operators/catalog/operator.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
"time"

olmerrors "github.com/operator-framework/operator-lifecycle-manager/pkg/controller/errors"
"github.com/operator-framework/operator-lifecycle-manager/pkg/metrics"
log "github.com/sirupsen/logrus"
"k8s.io/api/core/v1"
rbacv1 "k8s.io/api/rbac/v1"
Expand Down Expand Up @@ -102,6 +103,7 @@ func NewOperator(kubeconfigPath string, wakeupInterval time.Duration, operatorNa
op.syncCatalogSources,
nil,
"catsrc",
metrics.NewMetricsCatalogSource(op.Operator.OpClient),
)
for _, informer := range catsrcQueueInformer {
op.RegisterQueueInformer(informer)
Expand All @@ -115,6 +117,7 @@ func NewOperator(kubeconfigPath string, wakeupInterval time.Duration, operatorNa
op.syncInstallPlans,
nil,
"installplan",
metrics.NewMetricsInstallPlan(op.Operator.OpClient),
)
for _, informer := range ipQueueInformers {
op.RegisterQueueInformer(informer)
Expand All @@ -128,6 +131,7 @@ func NewOperator(kubeconfigPath string, wakeupInterval time.Duration, operatorNa
op.syncSubscriptions,
nil,
"subscription",
metrics.NewMetricsSubscription(op.Operator.OpClient),
)
op.subQueue = subscriptionQueue
for _, informer := range subscriptionQueueInformers {
Expand Down
5 changes: 5 additions & 0 deletions pkg/controller/operators/olm/operator.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (
"github.com/operator-framework/operator-lifecycle-manager/pkg/lib/operatorclient"
"github.com/operator-framework/operator-lifecycle-manager/pkg/lib/ownerutil"
"github.com/operator-framework/operator-lifecycle-manager/pkg/lib/queueinformer"
"github.com/operator-framework/operator-lifecycle-manager/pkg/metrics"
)

var ErrRequirementsNotMet = errors.New("requirements were not met")
Expand Down Expand Up @@ -73,6 +74,7 @@ func NewOperator(crClient versioned.Interface, opClient operatorclient.ClientInt
op.annotateNamespace,
nil,
"namespace",
metrics.NewMetricsNil(),
)
op.RegisterQueueInformer(queueInformer)
}
Expand Down Expand Up @@ -100,6 +102,7 @@ func NewOperator(crClient versioned.Interface, opClient operatorclient.ClientInt
op.syncClusterServiceVersion,
nil,
"csv",
metrics.NewMetricsCSV(op.Operator.OpClient),
)
for _, informer := range queueInformers {
op.RegisterQueueInformer(informer)
Expand All @@ -121,6 +124,7 @@ func NewOperator(crClient versioned.Interface, opClient operatorclient.ClientInt
op.syncDeployment,
nil,
"deployment",
metrics.NewMetricsNil(),
)
for _, informer := range depQueueInformers {
op.RegisterQueueInformer(informer)
Expand Down Expand Up @@ -345,6 +349,7 @@ func (a *Operator) checkReplacementsAndUpdateStatus(csv *v1alpha1.ClusterService
log.Infof("newer ClusterServiceVersion replacing %s, no-op", csv.SelfLink)
msg := fmt.Sprintf("being replaced by csv: %s", replacement.SelfLink)
csv.SetPhase(v1alpha1.CSVPhaseReplacing, v1alpha1.CSVReasonBeingReplaced, msg)
metrics.CSVUpgradeCount.Inc()

return fmt.Errorf("replacing")
}
Expand Down
17 changes: 10 additions & 7 deletions pkg/lib/queueinformer/queueinformer.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package queueinformer

import (
"github.com/operator-framework/operator-lifecycle-manager/pkg/metrics"
log "github.com/sirupsen/logrus"
"k8s.io/client-go/tools/cache"
"k8s.io/client-go/util/workqueue"
Expand All @@ -18,6 +19,7 @@ type QueueInformer struct {
syncHandler SyncHandler
resourceEventHandlerFuncs *cache.ResourceEventHandlerFuncs
name string
metrics.MetricsProvider
}

// enqueue adds a key to the queue. If obj is a key already it gets added directly.
Expand Down Expand Up @@ -85,22 +87,23 @@ func (q *QueueInformer) defaultResourceEventHandlerFuncs() *cache.ResourceEventH

// New creates a set of new queueinformers given a name, a set of informers, and a sync handler to handle the objects
// that the operator is managing. Optionally, custom event handler funcs can be passed in (defaults will be provided)
func New(queue workqueue.RateLimitingInterface, informers []cache.SharedIndexInformer, handler SyncHandler, funcs *cache.ResourceEventHandlerFuncs, name string) []*QueueInformer {
func New(queue workqueue.RateLimitingInterface, informers []cache.SharedIndexInformer, handler SyncHandler, funcs *cache.ResourceEventHandlerFuncs, name string, metrics metrics.MetricsProvider) []*QueueInformer {
queueInformers := []*QueueInformer{}
for _, informer := range informers {
queueInformers = append(queueInformers, NewInformer(queue, informer, handler, funcs, name))
queueInformers = append(queueInformers, NewInformer(queue, informer, handler, funcs, name, metrics))
}
return queueInformers
}

// NewInformer creates a new queueinformer given a name, an informer, and a sync handler to handle the objects
// that the operator is managing. Optionally, custom event handler funcs can be passed in (defaults will be provided)
func NewInformer(queue workqueue.RateLimitingInterface, informer cache.SharedIndexInformer, handler SyncHandler, funcs *cache.ResourceEventHandlerFuncs, name string) *QueueInformer {
func NewInformer(queue workqueue.RateLimitingInterface, informer cache.SharedIndexInformer, handler SyncHandler, funcs *cache.ResourceEventHandlerFuncs, name string, metrics metrics.MetricsProvider) *QueueInformer {
queueInformer := &QueueInformer{
queue: queue,
informer: informer,
syncHandler: handler,
name: name,
queue: queue,
informer: informer,
syncHandler: handler,
name: name,
MetricsProvider: metrics,
}
if funcs == nil {
queueInformer.resourceEventHandlerFuncs = queueInformer.defaultResourceEventHandlerFuncs()
Expand Down
4 changes: 3 additions & 1 deletion pkg/lib/queueinformer/queueinformer_operator.go
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,9 @@ func (o *Operator) processNextWorkItem(loop *QueueInformer) bool {
return true
}
queue.Forget(key)
if err := loop.HandleMetrics(); err != nil {
log.Error(err)
}
return true
}

Expand All @@ -133,7 +136,6 @@ func (o *Operator) sync(loop *QueueInformer, key string) error {
if err != nil {
return err
}

if !exists {
// For now, we ignore the case where an object used to exist but no longer does
logger.Info("couldn't get from queue")
Expand Down
140 changes: 140 additions & 0 deletions pkg/metrics/metrics.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
package metrics

import (
"github.com/operator-framework/operator-lifecycle-manager/pkg/api/apis/operators/v1alpha1"
"github.com/operator-framework/operator-lifecycle-manager/pkg/lib/operatorclient"
"github.com/prometheus/client_golang/prometheus"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)

type MetricsProvider interface {
HandleMetrics() error
}

type metricsCSV struct {
opClient operatorclient.ClientInterface
}

func NewMetricsCSV(opClient operatorclient.ClientInterface) MetricsProvider {
return &metricsCSV{opClient}
}

func (m *metricsCSV) HandleMetrics() error {
cList, err := m.opClient.ListCustomResource(v1alpha1.GroupName, v1alpha1.GroupVersion, metav1.NamespaceAll, v1alpha1.ClusterServiceVersionKind)
if err != nil {
return err
}
csvCount.Set(float64(len(cList.Items)))
return nil
}

type metricsInstallPlan struct {
opClient operatorclient.ClientInterface
}

func NewMetricsInstallPlan(opClient operatorclient.ClientInterface) MetricsProvider {
return &metricsInstallPlan{opClient}
}

func (m *metricsInstallPlan) HandleMetrics() error {
cList, err := m.opClient.ListCustomResource(v1alpha1.GroupName, v1alpha1.GroupVersion, metav1.NamespaceAll, v1alpha1.InstallPlanKind)
if err != nil {
return err
}
installPlanCount.Set(float64(len(cList.Items)))
return nil
}

type metricsSubscription struct {
opClient operatorclient.ClientInterface
}

func NewMetricsSubscription(opClient operatorclient.ClientInterface) MetricsProvider {
return &metricsSubscription{opClient}
}

func (m *metricsSubscription) HandleMetrics() error {
cList, err := m.opClient.ListCustomResource(v1alpha1.GroupName, v1alpha1.GroupVersion, metav1.NamespaceAll, v1alpha1.SubscriptionKind)
if err != nil {
return err
}
subscriptionCount.Set(float64(len(cList.Items)))
return nil
}

type metricsCatalogSource struct {
opClient operatorclient.ClientInterface
}

func NewMetricsCatalogSource(opClient operatorclient.ClientInterface) MetricsProvider {
return &metricsCatalogSource{opClient}

}

func (m *metricsCatalogSource) HandleMetrics() error {
cList, err := m.opClient.ListCustomResource(v1alpha1.GroupName, v1alpha1.GroupVersion, metav1.NamespaceAll, v1alpha1.CatalogSourceKind)
if err != nil {
return err
}
catalogSourceCount.Set(float64(len(cList.Items)))
return nil
}

type MetricsNil struct{}

func NewMetricsNil() MetricsProvider {
return &MetricsNil{}
}

func (*MetricsNil) HandleMetrics() error {
return nil
}

// To add new metrics:
// 1. Register new metrics in Register() below.
// 2. Add appropriate metric updates in HandleMetrics (or elsewhere instead).
var (
csvCount = prometheus.NewGauge(
prometheus.GaugeOpts{
Name: "csv_count",
Help: "Number of CSVs successfully registered",
},
)

installPlanCount = prometheus.NewGauge(
prometheus.GaugeOpts{
Name: "install_plan_count",
Help: "Number of install plans",
},
)

subscriptionCount = prometheus.NewGauge(
prometheus.GaugeOpts{
Name: "subscription_count",
Help: "Number of subscriptions",
},
)

catalogSourceCount = prometheus.NewGauge(
prometheus.GaugeOpts{
Name: "catalog_source_count",
Help: "Number of catalog sources",
},
)

// exported since it's not handled by HandleMetrics
CSVUpgradeCount = prometheus.NewCounter(
prometheus.CounterOpts{
Name: "csv_upgrade_count",
Help: "Monotonic count of catalog sources",
},
)
)

func Register() {
prometheus.MustRegister(csvCount)
prometheus.MustRegister(installPlanCount)
prometheus.MustRegister(subscriptionCount)
prometheus.MustRegister(catalogSourceCount)
prometheus.MustRegister(CSVUpgradeCount)
}
2 changes: 2 additions & 0 deletions pkg/package-server/provider/inmem.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ import (

operatorsv1alpha1 "github.com/operator-framework/operator-lifecycle-manager/pkg/api/apis/operators/v1alpha1"
"github.com/operator-framework/operator-lifecycle-manager/pkg/lib/queueinformer"
"github.com/operator-framework/operator-lifecycle-manager/pkg/metrics"
packagev1alpha1 "github.com/operator-framework/operator-lifecycle-manager/pkg/package-server/apis/packagemanifest/v1alpha1"
)

Expand Down Expand Up @@ -56,6 +57,7 @@ func NewInMemoryProvider(informers []cache.SharedIndexInformer, queueOperator *q
prov.syncCatalogSource,
nil,
"catsrc",
metrics.NewMetricsNil(),
)
for _, informer := range queueInformers {
prov.RegisterQueueInformer(informer)
Expand Down
48 changes: 48 additions & 0 deletions test/e2e/metrics_e2e_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
package e2e

import (
"fmt"
"testing"

"github.com/operator-framework/operator-lifecycle-manager/pkg/lib/operatorclient"
log "github.com/sirupsen/logrus"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)

// TestMetrics tests the metrics endpoint of the OLM pod.
func TestMetricsEndpoint(t *testing.T) {
c := newKubeClient(t)

listOptions := metav1.ListOptions{LabelSelector: "app=olm-operator"}
podList, err := c.KubernetesInterface().CoreV1().Pods(e2eNamespace).List(listOptions)
if err != nil {
log.Infof("Error %v\n", err)
t.Fatalf("Listing pods failed: %v\n", err)
}
if len(podList.Items) > 1 {
t.Fatalf("Expected only 1 olm-operator pod, got %v", len(podList.Items))
}

podName := podList.Items[0].GetName()

rawOutput, err := getMetricsFromPod(t, c, podName, e2eNamespace, 8080)
if err != nil {
t.Fatalf("Metrics test failed: %v\n", err)
}

log.Debugf("Metrics:\n%v", rawOutput)
}

func getMetricsFromPod(t *testing.T, client operatorclient.ClientInterface, podName string, namespace string, port int) (string, error) {
rawOutput, err := client.KubernetesInterface().CoreV1().RESTClient().Get().
Namespace(namespace).
Resource("pods").
SubResource("proxy").
Name(fmt.Sprintf("%v:%v", podName, port)).
Suffix("metrics").
Do().Raw()
if err != nil {
return "", err
}
return string(rawOutput), nil
}
3 changes: 2 additions & 1 deletion test/e2e/util_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ var (
cleaner *namespaceCleaner
testNamespace = metav1.NamespaceDefault
genName = names.SimpleNameGenerator.GenerateName
e2eNamespace string

persistentCatalogNames = []string{ocsConfigMap}
nonPersistentCatalogsFieldSelector = createFieldNotEqualSelector("metadata.name", persistentCatalogNames...)
Expand All @@ -53,7 +54,7 @@ var (
)

func init() {
e2eNamespace := os.Getenv("NAMESPACE")
e2eNamespace = os.Getenv("NAMESPACE")
if e2eNamespace != "" {
testNamespace = e2eNamespace
}
Expand Down

0 comments on commit 2c4f3bf

Please sign in to comment.