diff --git a/emailsender/README.md b/emailsender/README.md index 74dc1dc54e..9d05975770 100644 --- a/emailsender/README.md +++ b/emailsender/README.md @@ -11,6 +11,7 @@ from ACS Central instance without configuring any additional integration. CLUSTER_ID=test go run cmd/app/main.go ... -main.go:49] Creating api server... -main.go:65] Application started. Will shut down gracefully on [interrupt terminated]. +main.go:65] Creating metrics server... +main.go:51] Creating api server... +main.go:75] Application started. Will shut down gracefully on [interrupt terminated]. ``` diff --git a/emailsender/cmd/app/main.go b/emailsender/cmd/app/main.go index 0f657381da..0e1cb6b728 100644 --- a/emailsender/cmd/app/main.go +++ b/emailsender/cmd/app/main.go @@ -59,6 +59,14 @@ func main() { } }() + metricServer := metrics.NewMetricsServer(cfg) + go func() { + glog.Info("Creating metrics server...") + if err := metricServer.ListenAndServe(); err != nil { + glog.Errorf("serving metrics server error: %v", err) + } + }() + sigs := make(chan os.Signal, 1) notifySignals := []os.Signal{os.Interrupt, unix.SIGTERM} signal.Notify(sigs, notifySignals...) @@ -68,6 +76,9 @@ func main() { if err := server.Shutdown(ctx); err != nil { glog.Errorf("API Shutdown error: %v", err) } + if err := metricServer.Close(); err != nil { + glog.Errorf("closing metric server error: %v", err) + } glog.Infof("Caught %s signal", sig) glog.Info("Email sender application has been stopped") diff --git a/emailsender/config/config.go b/emailsender/config/config.go index 9a39677cda..1acc6dc30d 100644 --- a/emailsender/config/config.go +++ b/emailsender/config/config.go @@ -18,6 +18,7 @@ type Config struct { EnableHTTPS bool `env:"ENABLE_HTTPS" envDefault:"false"` HTTPSCertFile string `env:"HTTPS_CERT_FILE" envDefault:""` HTTPSKeyFile string `env:"HTTPS_KEY_FILE" envDefault:""` + MetricsAddress string `env:"METRICS_ADDRESS" envDefault:":9090"` } // GetConfig retrieves the current runtime configuration from the environment and returns it. diff --git a/emailsender/config/config_test.go b/emailsender/config/config_test.go index d58c25acd2..e525255639 100644 --- a/emailsender/config/config_test.go +++ b/emailsender/config/config_test.go @@ -15,6 +15,7 @@ func TestGetConfigSuccess(t *testing.T) { t.Setenv("ENABLE_HTTPS", "true") t.Setenv("HTTPS_CERT_FILE", "/some/tls.crt") t.Setenv("HTTPS_KEY_FILE", "/some/tls.key") + t.Setenv("METRICS_ADDRESS", ":9999") cfg, err := GetConfig() @@ -25,6 +26,7 @@ func TestGetConfigSuccess(t *testing.T) { assert.Equal(t, cfg.EnableHTTPS, true) assert.Equal(t, cfg.HTTPSCertFile, "/some/tls.crt") assert.Equal(t, cfg.HTTPSKeyFile, "/some/tls.key") + assert.Equal(t, cfg.MetricsAddress, ":9999") } func TestGetConfigFailureParsingWrongTimeDuration(t *testing.T) { diff --git a/emailsender/pkg/metrics/metrics.go b/emailsender/pkg/metrics/metrics.go new file mode 100644 index 0000000000..3b5d51a50a --- /dev/null +++ b/emailsender/pkg/metrics/metrics.go @@ -0,0 +1,54 @@ +// Package metrics implements Prometheus metrics for email sender service +package metrics + +import ( + "sync" + + "github.com/prometheus/client_golang/prometheus" +) + +const ( + prometheusNamespace = "acs" + prometheusSubsystem = "emailsender" + clusterIDLabelName = "cluster_id" +) + +var ( + metrics *Metrics + once sync.Once +) + +// Metrics holds the prometheus.Collector instances +type Metrics struct { + emailsSent *prometheus.CounterVec +} + +// Register registers the metrics with the given prometheus.Registerer. +func (m *Metrics) Register(r prometheus.Registerer) { + r.MustRegister(m.emailsSent) +} + +// IncEmailsSent increments the metric counter for started probe runs. +func (m *Metrics) IncEmailsSent(clusterID string) { + m.emailsSent.With(prometheus.Labels{clusterIDLabelName: clusterID}).Inc() +} + +// NewInstance returns the global Singleton instance for Metrics. +func NewInstance() *Metrics { + once.Do(func() { + metrics = newMetrics() + }) + return metrics +} + +func newMetrics() *Metrics { + return &Metrics{ + emailsSent: prometheus.NewCounterVec(prometheus.CounterOpts{ + Namespace: prometheusNamespace, + Subsystem: prometheusSubsystem, + Name: "email_sent_total", + Help: "The number of sent emails.", + }, []string{clusterIDLabelName}, + ), + } +} diff --git a/emailsender/pkg/metrics/metrics_test.go b/emailsender/pkg/metrics/metrics_test.go new file mode 100644 index 0000000000..6224d9ae8c --- /dev/null +++ b/emailsender/pkg/metrics/metrics_test.go @@ -0,0 +1,74 @@ +package metrics + +import ( + "testing" + + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/testutil" + io_prometheus_client "github.com/prometheus/client_model/go" + "github.com/stackrox/acs-fleet-manager/emailsender/config" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +var ( + clusterID = "test-1" + cfg = &config.Config{ + ClusterID: clusterID, + MetricsAddress: ":9999", + } +) + +func getMetricSeries(t *testing.T, registry *prometheus.Registry, name string) *io_prometheus_client.Metric { + metrics := serveMetrics(t, registry) + require.Contains(t, metrics, name) + targetMetric := metrics[name] + require.NotEmpty(t, targetMetric.Metric) + return targetMetric.Metric[0] +} + +func TestCounterIncrements(t *testing.T) { + const expectedIncrement = 1.0 + + tt := []struct { + metricName string + callIncrementFunc func(m *Metrics) + }{ + { + metricName: "acs_emailsender_email_sent_total", + callIncrementFunc: func(m *Metrics) { + m.IncEmailsSent(cfg.ClusterID) + }, + }, + } + + for _, tc := range tt { + tc := tc + t.Run(tc.metricName, func(t *testing.T) { + m := newMetrics() + registry := initPrometheus(m) + tc.callIncrementFunc(m) + + targetSeries := getMetricSeries(t, registry, tc.metricName) + + // Test that the metrics value is 1 after calling the incrementFunc. + value := targetSeries.GetCounter().GetValue() + assert.Equalf(t, expectedIncrement, value, "metric %s has unexpected value", tc.metricName) + label := targetSeries.GetLabel()[0] + assert.Containsf(t, label.GetName(), clusterIDLabelName, "metric %s has unexpected label", tc.metricName) + assert.Containsf(t, label.GetValue(), clusterID, "metric %s has unexpected label", tc.metricName) + }) + } +} + +func TestMetricsConformity(t *testing.T) { + metrics := newMetrics() + + for _, metric := range []prometheus.Collector{ + metrics.emailsSent, + } { + problems, err := testutil.CollectAndLint(metric) + assert.NoError(t, err) + assert.Empty(t, problems) + } +} diff --git a/emailsender/pkg/metrics/server.go b/emailsender/pkg/metrics/server.go new file mode 100644 index 0000000000..e47b491340 --- /dev/null +++ b/emailsender/pkg/metrics/server.go @@ -0,0 +1,49 @@ +package metrics + +import ( + "net/http" + + "github.com/pkg/errors" + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/collectors" + "github.com/prometheus/client_golang/prometheus/promhttp" + "github.com/stackrox/acs-fleet-manager/emailsender/config" + "github.com/stackrox/rox/pkg/utils" +) + +// NewMetricsServer returns the metrics server. +func NewMetricsServer(config *config.Config) *http.Server { + registry := initPrometheus(NewInstance()) + return newMetricsServer(config.MetricsAddress, registry) +} + +// ListenAndServe listens for incoming requests and serves the metrics. +func ListenAndServe(server *http.Server) { + if err := server.ListenAndServe(); err != nil && !errors.Is(err, http.ErrServerClosed) { + utils.Should(errors.Wrap(err, "failed to serve metrics")) + } +} + +// CloseMetricsServer closes the metrics server. +func CloseMetricsServer(server *http.Server) { + if err := server.Close(); err != nil { + utils.Should(errors.Wrap(err, "failed to close metrics server")) + } +} + +func initPrometheus(customMetrics *Metrics) *prometheus.Registry { + registry := prometheus.NewRegistry() + // Register default metrics to use a dedicated registry instead of prometheus.DefaultRegistry. + // This makes it easier to isolate metric state when unit testing this package. + registry.MustRegister(collectors.NewProcessCollector(collectors.ProcessCollectorOpts{})) + registry.MustRegister(collectors.NewGoCollector()) + customMetrics.Register(registry) + return registry +} + +func newMetricsServer(address string, registry *prometheus.Registry) *http.Server { + mux := http.NewServeMux() + mux.Handle("/metrics", promhttp.HandlerFor(registry, promhttp.HandlerOpts{})) + + return &http.Server{Addr: address, Handler: mux} +} diff --git a/emailsender/pkg/metrics/server_test.go b/emailsender/pkg/metrics/server_test.go new file mode 100644 index 0000000000..d85e92de9f --- /dev/null +++ b/emailsender/pkg/metrics/server_test.go @@ -0,0 +1,49 @@ +package metrics + +import ( + "net/http" + "net/http/httptest" + "testing" + + "github.com/prometheus/client_golang/prometheus" + io_prometheus_client "github.com/prometheus/client_model/go" + "github.com/prometheus/common/expfmt" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +type metricResponse map[string]*io_prometheus_client.MetricFamily + +func TestMetricsServerCorrectAddress(t *testing.T) { + server := NewMetricsServer(cfg) + defer func(server *http.Server) { + err := server.Close() + require.NoError(t, err) + }(server) + assert.Equal(t, ":9999", server.Addr) +} + +func TestMetricsServerServesDefaultMetrics(t *testing.T) { + registry := initPrometheus(NewInstance()) + metrics := serveMetrics(t, registry) + assert.Contains(t, metrics, "go_memstats_alloc_bytes", "not found default metrics") +} + +func serveMetrics(t *testing.T, registry *prometheus.Registry) metricResponse { + rec := httptest.NewRecorder() + req, err := http.NewRequest(http.MethodGet, "/metrics", nil) + require.NoError(t, err, "failed creating metrics requests") + + server := newMetricsServer(":9999", registry) + defer func(server *http.Server) { + err := server.Close() + require.NoError(t, err) + }(server) + server.Handler.ServeHTTP(rec, req) + require.Equal(t, http.StatusOK, rec.Code, "status code should be OK") + + promParser := expfmt.TextParser{} + metrics, err := promParser.TextToMetricFamilies(rec.Body) + require.NoError(t, err, "failed parsing metrics response") + return metrics +}