From de9510b9408d5c2ac914775ec81a1bf4c1745825 Mon Sep 17 00:00:00 2001 From: Bill Maxwell Date: Fri, 27 Jul 2018 11:37:59 -0700 Subject: [PATCH] Default to noop metric provider With the built in prom. metrics provider, the k8s machinery doesnt deregister metrics when controllers are removed. So over time as things like clusters are created or removed the metrics are not cleaned up. The metrics types for the cache and queue are also very large. They can take ~1GB of RAM in a 100 cluster setup. Also, Rancher is not exposing these stats so they are unobservable. --- controller/generic_controller.go | 10 +++++ controller/noop_metrics.go | 71 ++++++++++++++++++++++++++++++++ 2 files changed, 81 insertions(+) create mode 100644 controller/noop_metrics.go diff --git a/controller/generic_controller.go b/controller/generic_controller.go index 076dab9b5..60fd3a100 100644 --- a/controller/generic_controller.go +++ b/controller/generic_controller.go @@ -3,6 +3,7 @@ package controller import ( "context" "fmt" + "os" "strings" "sync" "time" @@ -22,10 +23,19 @@ import ( "k8s.io/client-go/util/workqueue" ) +const MetricsEnv = "NORMAN_QUEUE_METRICS" + var ( resyncPeriod = 2 * time.Hour ) +// Override the metrics providers +func init() { + if os.Getenv(MetricsEnv) != "true" { + DisableAllControllerMetrics() + } +} + type HandlerFunc func(key string) error type GenericController interface { diff --git a/controller/noop_metrics.go b/controller/noop_metrics.go new file mode 100644 index 000000000..93ef91a56 --- /dev/null +++ b/controller/noop_metrics.go @@ -0,0 +1,71 @@ +package controller + +import ( + "k8s.io/client-go/tools/cache" + "k8s.io/client-go/util/workqueue" +) + +type noopMetric struct{} + +func (noopMetric) Inc() {} +func (noopMetric) Dec() {} +func (noopMetric) Observe(float64) {} +func (noopMetric) Set(float64) {} + +type noopWorkqueueMetricsProvider struct{} + +func (noopWorkqueueMetricsProvider) NewDepthMetric(name string) workqueue.GaugeMetric { + return noopMetric{} +} + +func (noopWorkqueueMetricsProvider) NewAddsMetric(name string) workqueue.CounterMetric { + return noopMetric{} +} + +func (noopWorkqueueMetricsProvider) NewLatencyMetric(name string) workqueue.SummaryMetric { + return noopMetric{} +} + +func (noopWorkqueueMetricsProvider) NewWorkDurationMetric(name string) workqueue.SummaryMetric { + return noopMetric{} +} + +func (noopWorkqueueMetricsProvider) NewRetriesMetric(name string) workqueue.CounterMetric { + return noopMetric{} +} + +type noopCacheMetricsProvider struct{} + +func (noopCacheMetricsProvider) NewListsMetric(name string) cache.CounterMetric { return noopMetric{} } +func (noopCacheMetricsProvider) NewListDurationMetric(name string) cache.SummaryMetric { + return noopMetric{} +} +func (noopCacheMetricsProvider) NewItemsInListMetric(name string) cache.SummaryMetric { + return noopMetric{} +} +func (noopCacheMetricsProvider) NewWatchesMetric(name string) cache.CounterMetric { return noopMetric{} } +func (noopCacheMetricsProvider) NewShortWatchesMetric(name string) cache.CounterMetric { + return noopMetric{} +} +func (noopCacheMetricsProvider) NewWatchDurationMetric(name string) cache.SummaryMetric { + return noopMetric{} +} +func (noopCacheMetricsProvider) NewItemsInWatchMetric(name string) cache.SummaryMetric { + return noopMetric{} +} +func (noopCacheMetricsProvider) NewLastResourceVersionMetric(name string) cache.GaugeMetric { + return noopMetric{} +} + +func DisableAllControllerMetrics() { + DisableControllerReflectorMetrics() + DisableControllerWorkqueuMetrics() +} + +func DisableControllerWorkqueuMetrics() { + workqueue.SetProvider(noopWorkqueueMetricsProvider{}) +} + +func DisableControllerReflectorMetrics() { + cache.SetReflectorMetricsProvider(noopCacheMetricsProvider{}) +}