diff --git a/cmd/shipper/main.go b/cmd/shipper/main.go index cae1d406e..8a6ce4ef7 100644 --- a/cmd/shipper/main.go +++ b/cmd/shipper/main.go @@ -88,7 +88,6 @@ type metricsCfg struct { restLatency *shippermetrics.RESTLatencyMetric restResult *shippermetrics.RESTResultMetric certExpire *shippermetrics.WebhookMetric - stateMetrics statemetrics.Metrics metricsBundle *metrics.MetricsBundle } @@ -219,7 +218,6 @@ func main() { restLatency: shippermetrics.NewRESTLatencyMetric(), restResult: shippermetrics.NewRESTResultMetric(), certExpire: shippermetrics.NewTLSCertExpireMetric(), - stateMetrics: ssm, metricsBundle: metrics.NewMetricsBundle(), }, } @@ -250,7 +248,6 @@ func runMetrics(cfg *metricsCfg) { prometheus.MustRegister(cfg.restLatency.Summary, cfg.restResult.Counter) prometheus.MustRegister(cfg.certExpire.GetMetrics()...) prometheus.MustRegister(instrumentedclient.GetMetrics()...) - prometheus.MustRegister(cfg.stateMetrics) prometheus.MustRegister(cfg.metricsBundle.TimeToInstallation) srv := http.Server{ @@ -528,9 +525,8 @@ func startMetricsController(cfg *cfg) (bool, error) { } c := metrics.NewController( - client.NewShipperClientOrDie(metrics.AgentName, cfg.restCfg), + client.NewShipperClientOrDie(cfg.restCfg, metrics.AgentName, cfg.restTimeout), cfg.shipperInformerFactory, - cfg.recorder(metrics.AgentName), cfg.metrics.metricsBundle, ) diff --git a/go.mod b/go.mod index 3bba64fb9..55481ee5f 100644 --- a/go.mod +++ b/go.mod @@ -16,6 +16,7 @@ require ( github.com/mitchellh/go-homedir v1.1.0 github.com/pmezard/go-difflib v1.0.0 github.com/prometheus/client_golang v1.7.1 + github.com/prometheus/client_model v0.2.0 github.com/rodaine/table v1.0.1 github.com/satori/go.uuid v1.2.0 // indirect github.com/spaolacci/murmur3 v1.1.0 // indirect diff --git a/pkg/controller/metrics/metrics_bundle.go b/pkg/controller/metrics/metrics_bundle.go index a08d145f6..13948b916 100644 --- a/pkg/controller/metrics/metrics_bundle.go +++ b/pkg/controller/metrics/metrics_bundle.go @@ -9,8 +9,10 @@ type MetricsBundle struct { func NewMetricsBundle() *MetricsBundle { timeToInstallation := prometheus.NewHistogram(prometheus.HistogramOpts{ - Name: "Time to installation", - Help: "The time it takes for a Release to be installed on all of the clusters it's scheduled on", + Namespace: "shipper", + Subsystem: "metrics_controller", + Name: "time_to_installation", + Help: "The time it takes for a Release to be installed on all of the clusters it's scheduled on", }) return &MetricsBundle{ diff --git a/pkg/controller/metrics/metrics_controller.go b/pkg/controller/metrics/metrics_controller.go index 7b4395fb8..b65cb9fa2 100644 --- a/pkg/controller/metrics/metrics_controller.go +++ b/pkg/controller/metrics/metrics_controller.go @@ -6,23 +6,21 @@ import ( "sync" "time" - objectutil "github.com/bookingcom/shipper/pkg/util/object" - corev1 "k8s.io/api/core/v1" - releaseconditions "github.com/bookingcom/shipper/pkg/util/release" + "github.com/bookingcom/shipper/pkg/util/application" + "github.com/bookingcom/shipper/pkg/util/release" + shipperclientset "github.com/bookingcom/shipper/pkg/client/clientset/versioned" listers "github.com/bookingcom/shipper/pkg/client/listers/shipper/v1alpha1" shipper "github.com/bookingcom/shipper/pkg/apis/shipper/v1alpha1" - clientset "github.com/bookingcom/shipper/pkg/client/clientset/versioned" informers "github.com/bookingcom/shipper/pkg/client/informers/externalversions" "k8s.io/apimachinery/pkg/util/runtime" "k8s.io/apimachinery/pkg/util/wait" "k8s.io/client-go/tools/cache" - "k8s.io/client-go/tools/record" "k8s.io/klog" ) @@ -40,12 +38,10 @@ type Controller struct { releaseLister listers.ReleaseLister releasesSynced cache.InformerSynced - recorder record.EventRecorder - - shipperClientset clientset.Interface + shipperClientset shipperclientset.Interface appLastModifiedTimes map[string]*appLastModifiedTimeEntry - appLastModifiedTimesLock sync.Mutex + appLastModifiedTimesLock sync.RWMutex metricsBundle *MetricsBundle } @@ -60,9 +56,8 @@ type appLastModifiedTimeEntry struct { } func NewController( - shipperClientset clientset.Interface, + shipperClientset shipperclientset.Interface, shipperInformerFactory informers.SharedInformerFactory, - recorder record.EventRecorder, metricsBundle *MetricsBundle, ) *Controller { appInformer := shipperInformerFactory.Shipper().V1alpha1().Applications() @@ -77,8 +72,9 @@ func NewController( releaseLister: relInformer.Lister(), releasesSynced: relInformer.Informer().HasSynced, - recorder: recorder, metricsBundle: metricsBundle, + + appLastModifiedTimes: make(map[string]*appLastModifiedTimeEntry), } appInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{ @@ -150,7 +146,7 @@ func (c *Controller) handleAppUpdates(old, new interface{}) { // If the specs are the same, this is not a user modification // and should be ignored - if !reflect.DeepEqual(oldApp.Spec, newApp.Spec) { + if reflect.DeepEqual(oldApp.Spec, newApp.Spec) { return } @@ -179,24 +175,43 @@ func (c *Controller) handleReleaseUpdates(old, new interface{}) { return } - // If the chart wasn't installed and now it is, calculate how - // long it took. The chart is always installed on step 0, so - // return if we've passed that step + // metrics-controller cares only when the following conditions apply: + // 1. We are at the step 0, because the chart is always installed at step 0 + // 2. The release that generates the event is a contender + // 3. The old release has not achieved installation and the new release has + // achived installation. In other words, we want to keep track of the event + // that updates the "contender achived installation" status to true. + + // See condition (1) if newRelease.Spec.TargetStep > 0 { return } - oldInstallationCondition := releaseconditions.GetReleaseStrategyConditionByType(oldRelease.Status.Strategy, shipper.StrategyConditionContenderAchievedInstallation) - newInstallationCondition := releaseconditions.GetReleaseStrategyConditionByType(newRelease.Status.Strategy, shipper.StrategyConditionContenderAchievedInstallation) - if newInstallationCondition != nil { + // See condition (2) + if !c.isContender(newRelease) { + return + } + + // See condition (3). If Status.Strategy.Conditions is nil just ignore + if oldRelease.Status.Strategy == nil || + oldRelease.Status.Strategy.Conditions == nil || + newRelease.Status.Strategy == nil || + newRelease.Status.Strategy.Conditions == nil { return } - if oldInstallationCondition != nil && oldInstallationCondition.Status == corev1.ConditionTrue { - // This release has already achieved installation, so ignore + oldInstallationCondition := release.GetReleaseStrategyConditionByType(oldRelease.Status.Strategy, shipper.StrategyConditionContenderAchievedInstallation) + newInstallationCondition := release.GetReleaseStrategyConditionByType(newRelease.Status.Strategy, shipper.StrategyConditionContenderAchievedInstallation) + + if oldInstallationCondition == nil || newInstallationCondition == nil { return } + // See condition (3). This release has already achieved installation, so ignore + if oldInstallationCondition.Status == corev1.ConditionTrue { + return + } + // See condition (3). The new release will not update the installation status, so ignore if newInstallationCondition.Status == corev1.ConditionFalse { return } @@ -207,14 +222,16 @@ func (c *Controller) handleReleaseUpdates(old, new interface{}) { klog.Errorf("Failed to get release key: %q", err) } - appName, err := objectutil.GetApplicationLabel(newRelease) - if err != nil { + appName, ok := newRelease.GetLabels()[shipper.AppLabel] + if !ok || len(appName) == 0 { klog.Errorf("Could not find application name for Release %q", releaseName) } + c.appLastModifiedTimesLock.RLock() lastModifiedTime, ok := c.appLastModifiedTimes[appName] + c.appLastModifiedTimesLock.RUnlock() if !ok { - // This probably means the 8informer didn't + // This probably means the informer didn't // run our application create/update callback, // so silently ignore the error return @@ -239,3 +256,39 @@ func (c *Controller) cleanCache() { } } } + +func (c *Controller) isContender(rel *shipper.Release) bool { + // Find the application of this release + appName, err := release.ApplicationNameForRelease(rel) + if err != nil { + klog.Errorf("Failed get application name for release %q: %v", rel.Name, err) + return false + } + + // Find all the releases of the application and sort them by generation + rels, err := c.findReleasesForApplication(appName, rel.Namespace) + if err != nil { + klog.Errorf("Error getting the releases list for app %q: %v", appName, err) + return false + } + + // Find the contender + contender, err := application.GetContender(appName, rels) + if err != nil { + klog.Errorf("Error finding the contender for app %q: %v", appName, err) + return false + } + + if contender.Name == rel.Name { + return true + } + return false +} + +func (c *Controller) findReleasesForApplication(appName, namespace string) ([]*shipper.Release, error) { + releases, err := c.releaseLister.Releases(namespace).ReleasesForApplication(appName) + if err != nil { + return nil, err + } + return release.SortByGenerationDescending(releases), nil +} diff --git a/pkg/controller/metrics/metrics_controller_test.go b/pkg/controller/metrics/metrics_controller_test.go new file mode 100644 index 000000000..680e5e55f --- /dev/null +++ b/pkg/controller/metrics/metrics_controller_test.go @@ -0,0 +1,196 @@ +package metrics + +import ( + "testing" + "time" + + shipper "github.com/bookingcom/shipper/pkg/apis/shipper/v1alpha1" + shippertesting "github.com/bookingcom/shipper/pkg/testing" + "github.com/prometheus/client_golang/prometheus" + dto "github.com/prometheus/client_model/go" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +func TestMetricsControllerCreateAndUpdatesCache(t *testing.T) { + f := shippertesting.NewControllerTestFixture() + c := runController(f) + + // adds the app to the cache + creationTime := parseTime("Jan 22 2021 08:38:22") + creationTimeWrapped := metav1.NewTime(creationTime) + app := &shipper.Application{ + ObjectMeta: metav1.ObjectMeta{ + CreationTimestamp: creationTimeWrapped, + Name: "unit-test-app", + }, + } + + c.handleAppCreates(app) + entry, ok := c.appLastModifiedTimes[app.Name] + if !ok { + t.Fatalf("expected %q to be in cache, got %v", app.Name, c.appLastModifiedTimes) + } + if !entry.time.Equal(creationTime) { + t.Fatalf("expected time to be the same as creation time\nentry.time = %s\ncreationTime = %s\n", entry.time, creationTime) + } + + // if the spec doesn't change than ignore the update + appWithLabels := &shipper.Application{ + ObjectMeta: metav1.ObjectMeta{ + CreationTimestamp: app.ObjectMeta.CreationTimestamp, + Name: "unit-test-app", + Labels: map[string]string{ + "random": "label", + }, + }, + } + c.handleAppUpdates(app, appWithLabels) + entry, ok = c.appLastModifiedTimes[app.Name] + if !ok { + t.Fatalf("expected %q to be in cache, got %v", app.Name, c.appLastModifiedTimes) + } + if !entry.time.Equal(creationTime) { + t.Fatalf("expected time to be the same as creation time\nentry.time = %s\ncreationTime = %s\n", entry.time, creationTime) + } + + // if the spec does change update the cache entry + var one int32 = 1 + appWithSpec := &shipper.Application{ + ObjectMeta: metav1.ObjectMeta{ + CreationTimestamp: app.ObjectMeta.CreationTimestamp, + Name: "unit-test-app", + }, + Spec: shipper.ApplicationSpec{ + RevisionHistoryLimit: &one, + }, + } + c.handleAppUpdates(app, appWithSpec) + entry, ok = c.appLastModifiedTimes[app.Name] + if !ok { + t.Fatalf("expected %q to be in cache, got %v", app.Name, c.appLastModifiedTimes) + } + if entry.time.Equal(creationTime) { + t.Fatalf("expected time to be the differt from the creation time\nentry.time = %s\ncreationTime = %s\n", entry.time, creationTime) + } +} + +func TestMetricsControllerCalculateTimeToInstall(t *testing.T) { + app := &shipper.Application{ + ObjectMeta: metav1.ObjectMeta{ + CreationTimestamp: metav1.NewTime(parseTime("Jan 22 2021 08:38:22")), + Name: "unit-test-app", + Namespace: "unit-test-ns", + }, + } + oldRelease := &shipper.Release{ + ObjectMeta: metav1.ObjectMeta{ + Name: "unit-test", + Namespace: "unit-test-ns", + Labels: map[string]string{ + shipper.AppLabel: "unit-test-app", + }, + Annotations: map[string]string{ + shipper.ReleaseGenerationAnnotation: "1", + }, + }, + Status: shipper.ReleaseStatus{ + Strategy: &shipper.ReleaseStrategyStatus{ + Conditions: []shipper.ReleaseStrategyCondition{ + { + Type: shipper.StrategyConditionContenderAchievedInstallation, + Status: corev1.ConditionFalse, + }, + }, + }, + }, + } + newRelease := &shipper.Release{ + ObjectMeta: metav1.ObjectMeta{ + Name: "unit-test", + Namespace: "unit-test-ns", + Labels: map[string]string{ + shipper.AppLabel: "unit-test-app", + }, + OwnerReferences: []metav1.OwnerReference{ + { + Name: "unit-test-app", + }, + }, + }, + Spec: shipper.ReleaseSpec{ + TargetStep: 0, + }, + Status: shipper.ReleaseStatus{ + Strategy: &shipper.ReleaseStrategyStatus{ + Conditions: []shipper.ReleaseStrategyCondition{ + { + Type: shipper.StrategyConditionContenderAchievedInstallation, + Status: corev1.ConditionTrue, + LastTransitionTime: metav1.NewTime(parseTime("Jan 22 2021 09:38:22")), + }, + }, + }, + }, + } + + f := shippertesting.NewControllerTestFixture(app, oldRelease) + c := runController(f) + + // adds the app to the cache + c.handleAppCreates(app) + + // calculate time to installation + c.handleReleaseUpdates(oldRelease, newRelease) + + // get the metrics + fakeHistogram := c.metricsBundle.TimeToInstallation.(*fakeHistogram) + if len(fakeHistogram.data) != 1 { + t.Fatalf("expected exactly one metric, got %v", fakeHistogram.data) + } + if fakeHistogram.data[0] != 3600.0 { + t.Fatalf("expected installation time to be 3600s but got %f", fakeHistogram.data[0]) + } +} + +func runController(f *shippertesting.ControllerTestFixture) *Controller { + c := NewController( + f.ShipperClient, + f.ShipperInformerFactory, + fakeMetricsBundle(), + ) + + stopCh := make(chan struct{}) + defer close(stopCh) + + f.Run(stopCh) + + return c +} + +func parseTime(str string) time.Time { + layout := "Jan 02 2006 15:04:05" + t, _ := time.Parse(layout, str) + return t +} + +func fakeMetricsBundle() *MetricsBundle { + return &MetricsBundle{ + TimeToInstallation: &fakeHistogram{ + data: make([]float64, 0), + }, + } +} + +type fakeHistogram struct { + data []float64 +} + +func (f *fakeHistogram) Observe(m float64) { + f.data = append(f.data, m) +} + +func (f *fakeHistogram) Describe(chan<- *prometheus.Desc) {} +func (f *fakeHistogram) Collect(chan<- prometheus.Metric) {} +func (f *fakeHistogram) Desc() *prometheus.Desc { return nil } +func (f *fakeHistogram) Write(*dto.Metric) error { return nil } diff --git a/pkg/testing/controller_test_fixture.go b/pkg/testing/controller_test_fixture.go index 077112ebe..9677006e4 100644 --- a/pkg/testing/controller_test_fixture.go +++ b/pkg/testing/controller_test_fixture.go @@ -3,6 +3,7 @@ package testing import ( "fmt" + "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/client-go/dynamic" "k8s.io/client-go/rest" @@ -23,10 +24,10 @@ type ControllerTestFixture struct { Recorder *record.FakeRecorder } -func NewControllerTestFixture() *ControllerTestFixture { +func NewControllerTestFixture(objects ...runtime.Object) *ControllerTestFixture { const recorderBufSize = 42 - shipperClient := shipperfake.NewSimpleClientset() + shipperClient := shipperfake.NewSimpleClientset(objects...) shipperInformerFactory := shipperinformers.NewSharedInformerFactory( shipperClient, NoResyncPeriod)