From f9c93c4468664c95818658fc53204d0afdc34482 Mon Sep 17 00:00:00 2001 From: Pavel Tumik <18602811+sagor999@users.noreply.github.com> Date: Fri, 15 Apr 2022 19:05:10 +0000 Subject: [PATCH] [ws-manager] add metrics to track initialize and finalize of workspaces --- components/ws-manager/pkg/manager/metrics.go | 18 ++++++++++++++++++ components/ws-manager/pkg/manager/monitor.go | 15 ++++++++++++++- 2 files changed, 32 insertions(+), 1 deletion(-) diff --git a/components/ws-manager/pkg/manager/metrics.go b/components/ws-manager/pkg/manager/metrics.go index 6de8bd97b26dfc..0d430ef57c11a7 100644 --- a/components/ws-manager/pkg/manager/metrics.go +++ b/components/ws-manager/pkg/manager/metrics.go @@ -38,6 +38,8 @@ type metrics struct { manager *Manager startupTimeHistVec *prometheus.HistogramVec + initializeTimeHistVec *prometheus.HistogramVec + finalizeTimeHistVec *prometheus.HistogramVec totalStartsCounterVec *prometheus.CounterVec totalStopsCounterVec *prometheus.CounterVec totalOpenPortGauge prometheus.GaugeFunc @@ -58,6 +60,20 @@ func newMetrics(m *Manager) *metrics { // same as components/ws-manager-bridge/src/prometheus-metrics-exporter.ts#L15 Buckets: prometheus.ExponentialBuckets(2, 2, 10), }, []string{"type"}), + initializeTimeHistVec: prometheus.NewHistogramVec(prometheus.HistogramOpts{ + Namespace: metricsNamespace, + Subsystem: metricsWorkspaceSubsystem, + Name: "workspace_initialize_seconds", + Help: "time it took to initialize workspace", + Buckets: prometheus.ExponentialBuckets(2, 2, 10), + }, []string{"type"}), + finalizeTimeHistVec: prometheus.NewHistogramVec(prometheus.HistogramOpts{ + Namespace: metricsNamespace, + Subsystem: metricsWorkspaceSubsystem, + Name: "workspace_finalize_seconds", + Help: "time it took to finalize workspace", + Buckets: prometheus.ExponentialBuckets(2, 2, 10), + }, []string{"type"}), totalStartsCounterVec: prometheus.NewCounterVec(prometheus.CounterOpts{ Namespace: metricsNamespace, Subsystem: metricsWorkspaceSubsystem, @@ -118,6 +134,8 @@ func newTotalOpenPortGaugeHandler(m *Manager) func() float64 { func (m *metrics) Register(reg prometheus.Registerer) error { collectors := []prometheus.Collector{ m.startupTimeHistVec, + m.initializeTimeHistVec, + m.finalizeTimeHistVec, newPhaseTotalVec(m.manager), newWorkspaceActivityVec(m.manager), newTimeoutSettingsVec(m.manager), diff --git a/components/ws-manager/pkg/manager/monitor.go b/components/ws-manager/pkg/manager/monitor.go index c2e0ee9ba59c56..9e5d80ba65f60e 100644 --- a/components/ws-manager/pkg/manager/monitor.go +++ b/components/ws-manager/pkg/manager/monitor.go @@ -734,7 +734,7 @@ func (m *Monitor) initializeWorkspaceContent(ctx context.Context, pod *corev1.Po // we are already initialising return nil } - + t := time.Now() err = retryIfUnavailable(ctx, func(ctx context.Context) error { _, err = snc.InitWorkspace(ctx, &wsdaemon.InitWorkspaceRequest{ Id: workspaceID, @@ -755,6 +755,12 @@ func (m *Monitor) initializeWorkspaceContent(ctx context.Context, pod *corev1.Po } else { err = handleGRPCError(ctx, err) } + wsType := pod.Labels[wsk8s.TypeLabel] + hist, errHist := m.manager.metrics.initializeTimeHistVec.GetMetricWithLabelValues(wsType) + if errHist != nil { + log.WithError(errHist).WithField("type", wsType).Warn("cannot get initialize time histogram metric") + } + hist.Observe(time.Since(t).Seconds()) if err != nil { return xerrors.Errorf("cannot initialize workspace: %w", err) } @@ -916,6 +922,7 @@ func (m *Monitor) finalizeWorkspaceContent(ctx context.Context, wso *workspaceOb backupError error gitStatus *csapi.GitStatus ) + t := time.Now() for i := 0; i < wsdaemonMaxAttempts; i++ { span.LogKV("attempt", i) didSometing, gs, err := doFinalize() @@ -959,6 +966,12 @@ func (m *Monitor) finalizeWorkspaceContent(ctx context.Context, wso *workspaceOb } break } + wsType := api.WorkspaceType_name[int32(tpe)] + hist, err := m.manager.metrics.finalizeTimeHistVec.GetMetricWithLabelValues(wsType) + if err != nil { + log.WithError(err).WithField("type", wsType).Warn("cannot get finalize time histogram metric") + } + hist.Observe(time.Since(t).Seconds()) disposalStatus = &workspaceDisposalStatus{ BackupComplete: true,