From 02aad5a85026b84ce9a017c975143153b38cae69 Mon Sep 17 00:00:00 2001 From: bitliu Date: Wed, 25 Oct 2023 10:37:06 +0800 Subject: [PATCH] feat: add control plane metrics library Signed-off-by: bitliu --- api/v1alpha1/envoygateway_helpers.go | 1 - .../validation/envoyproxy_validate_test.go | 52 +++++ go.mod | 14 +- go.sum | 26 +++ internal/admin/server.go | 8 +- internal/cmd/server.go | 6 + internal/metrics/api.go | 51 +++++ internal/metrics/doc.go | 6 + internal/metrics/metadata.go | 102 +++++++++ internal/metrics/metric.go | 74 +++++++ internal/metrics/options.go | 43 ++++ internal/metrics/otel_label.go | 37 ++++ internal/metrics/otel_metric_counter.go | 43 ++++ internal/metrics/otel_metric_gauge.go | 60 ++++++ internal/metrics/otel_metric_histogram.go | 43 ++++ internal/metrics/otel_metric_sink.go | 119 +++++++++++ internal/metrics/register.go | 200 ++++++++++++++++++ internal/metrics/sample_counter_test.go | 23 ++ internal/metrics/sample_gauge_test.go | 27 +++ internal/metrics/sample_histogram_test.go | 23 ++ internal/metrics/units.go | 18 ++ site/content/en/latest/design/eg-metrics.md | 2 +- 22 files changed, 971 insertions(+), 7 deletions(-) create mode 100644 internal/metrics/api.go create mode 100644 internal/metrics/doc.go create mode 100644 internal/metrics/metadata.go create mode 100644 internal/metrics/metric.go create mode 100644 internal/metrics/options.go create mode 100644 internal/metrics/otel_label.go create mode 100644 internal/metrics/otel_metric_counter.go create mode 100644 internal/metrics/otel_metric_gauge.go create mode 100644 internal/metrics/otel_metric_histogram.go create mode 100644 internal/metrics/otel_metric_sink.go create mode 100644 internal/metrics/register.go create mode 100644 internal/metrics/sample_counter_test.go create mode 100644 internal/metrics/sample_gauge_test.go create mode 100644 internal/metrics/sample_histogram_test.go create mode 100644 internal/metrics/units.go diff --git a/api/v1alpha1/envoygateway_helpers.go b/api/v1alpha1/envoygateway_helpers.go index e9e369f4e74f..ee49141fa605 100644 --- a/api/v1alpha1/envoygateway_helpers.go +++ b/api/v1alpha1/envoygateway_helpers.go @@ -98,7 +98,6 @@ func (e *EnvoyGateway) GetEnvoyGatewayTelemetry() *EnvoyGatewayTelemetry { if e.Telemetry.Metrics.Prometheus == nil { e.Telemetry.Metrics.Prometheus = DefaultEnvoyGatewayPrometheus() } - if e.Telemetry.Metrics == nil { e.Telemetry.Metrics = DefaultEnvoyGatewayMetrics() } diff --git a/api/v1alpha1/validation/envoyproxy_validate_test.go b/api/v1alpha1/validation/envoyproxy_validate_test.go index 0f60a6fa593b..21ed2452d3c1 100644 --- a/api/v1alpha1/validation/envoyproxy_validate_test.go +++ b/api/v1alpha1/validation/envoyproxy_validate_test.go @@ -675,6 +675,58 @@ func TestEnvoyGatewayAdmin(t *testing.T) { assert.True(t, eg.Admin.EnablePprof == false) } +func TestEnvoyGatewayTelemetry(t *testing.T) { + // default envoygateway config telemetry should not be nil + eg := egv1a1.DefaultEnvoyGateway() + assert.True(t, eg.Telemetry != nil) + + // get default telemetry config from envoygateway + // values should be set in default + egTelemetry := eg.GetEnvoyGatewayTelemetry() + assert.True(t, egTelemetry != nil) + assert.True(t, egTelemetry.Metrics != nil) + assert.True(t, egTelemetry.Metrics.Prometheus.Disable == false) + assert.True(t, egTelemetry.Metrics.Sinks == nil) + + // override the telemetry config + // values should be updated + eg.Telemetry.Metrics = &egv1a1.EnvoyGatewayMetrics{ + Prometheus: &egv1a1.EnvoyGatewayPrometheusProvider{ + Disable: true, + }, + Sinks: []egv1a1.EnvoyGatewayMetricSink{ + { + Type: egv1a1.MetricSinkTypeOpenTelemetry, + OpenTelemetry: &egv1a1.EnvoyGatewayOpenTelemetrySink{ + Host: "otel-collector.monitoring.svc.cluster.local", + Protocol: "grpc", + Port: 4317, + }, + }, { + Type: egv1a1.MetricSinkTypeOpenTelemetry, + OpenTelemetry: &egv1a1.EnvoyGatewayOpenTelemetrySink{ + Host: "otel-collector.monitoring.svc.cluster.local", + Protocol: "http", + Port: 4318, + }, + }, + }, + } + + assert.True(t, eg.GetEnvoyGatewayTelemetry().Metrics.Prometheus.Disable == true) + assert.True(t, len(eg.GetEnvoyGatewayTelemetry().Metrics.Sinks) == 2) + assert.True(t, eg.GetEnvoyGatewayTelemetry().Metrics.Sinks[0].Type == egv1a1.MetricSinkTypeOpenTelemetry) + + // set eg defaults when telemetry is nil + // the telemetry should not be nil + eg.Telemetry = nil + eg.SetEnvoyGatewayDefaults() + assert.True(t, eg.Telemetry != nil) + assert.True(t, eg.Telemetry.Metrics != nil) + assert.True(t, eg.Telemetry.Metrics.Prometheus.Disable == false) + assert.True(t, eg.Telemetry.Metrics.Sinks == nil) +} + func TestGetEnvoyProxyDefaultComponentLevel(t *testing.T) { cases := []struct { logging egv1a1.ProxyLogging diff --git a/go.mod b/go.mod index 168664d61cad..361460889c83 100644 --- a/go.mod +++ b/go.mod @@ -23,6 +23,12 @@ require ( github.com/telepresenceio/watchable v0.0.0-20220726211108-9bb86f92afa7 github.com/tetratelabs/multierror v1.1.1 github.com/tsaarni/certyaml v0.9.2 + go.opentelemetry.io/otel v1.19.0 + go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v0.42.0 + go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v0.42.0 + go.opentelemetry.io/otel/exporters/prometheus v0.42.0 + go.opentelemetry.io/otel/metric v1.19.0 + go.opentelemetry.io/otel/sdk/metric v1.19.0 go.opentelemetry.io/proto/otlp v1.0.0 go.uber.org/zap v1.26.0 golang.org/x/exp v0.0.0-20220722155223-a9213eeb770e @@ -42,7 +48,13 @@ require ( ) require ( + github.com/cenkalti/backoff/v4 v4.2.1 // indirect + github.com/go-logr/stdr v1.2.2 // indirect + github.com/grpc-ecosystem/grpc-gateway/v2 v2.16.0 // indirect github.com/matttproud/golang_protobuf_extensions/v2 v2.0.0 // indirect + go.opentelemetry.io/otel/exporters/otlp/otlpmetric v0.42.0 // indirect + go.opentelemetry.io/otel/sdk v1.19.0 // indirect + go.opentelemetry.io/otel/trace v1.19.0 // indirect golang.org/x/sync v0.3.0 // indirect ) @@ -88,7 +100,7 @@ require ( github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect github.com/peterbourgon/diskv v2.0.1+incompatible // indirect github.com/pmezard/go-difflib v1.0.0 // indirect - github.com/prometheus/client_golang v1.17.0 // indirect + github.com/prometheus/client_golang v1.17.0 github.com/prometheus/client_model v0.5.0 // indirect github.com/prometheus/procfs v0.11.1 // indirect github.com/russross/blackfriday/v2 v2.1.0 // indirect diff --git a/go.sum b/go.sum index 33cd1b9f6535..741b189a1786 100644 --- a/go.sum +++ b/go.sum @@ -37,6 +37,8 @@ github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= github.com/bgentry/speakeasy v0.1.0/go.mod h1:+zsyZBPWlz7T6j88CTgSN5bM796AkVf0kBD4zp0CCIs= github.com/blang/semver v3.5.0+incompatible/go.mod h1:kRBLl5iJ+tD4TcOOxsy/0fnwebNt5EWlYSAyrTnjyyk= +github.com/cenkalti/backoff/v4 v4.2.1 h1:y4OZtCnogmCPw98Zjyt5a6+QwPLGkiQsYW5oUqylYbM= +github.com/cenkalti/backoff/v4 v4.2.1/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyYozVcomhLiZE= github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= github.com/census-instrumentation/opencensus-proto v0.4.1 h1:iKLQ0xPNFxR/2hzXZMrBo8f1j86j5WHzznCCQxV/b8g= github.com/census-instrumentation/opencensus-proto v0.4.1/go.mod h1:4T9NM4+4Vw91VeyqjLS6ao50K5bOcLKN6Q42XnYaRYw= @@ -125,8 +127,11 @@ github.com/go-logfmt/logfmt v0.6.0 h1:wGYYu3uicYdqXVgoYbvnkrPVXkuLM1p1ifugDMEdRi github.com/go-logfmt/logfmt v0.6.0/go.mod h1:WYhtIu8zTZfxdn5+rREduYbwxfcBr/Vr6KEVveWlfTs= github.com/go-logr/logr v0.1.0/go.mod h1:ixOQHD9gLJUVQQ2ZOR7zLEifBX6tGkNJF4QyIY7sIas= github.com/go-logr/logr v1.2.0/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= +github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= github.com/go-logr/logr v1.2.4 h1:g01GSCwiDw2xSZfjJ2/T9M+S6pFdcNtFYsp+Y43HYDQ= github.com/go-logr/logr v1.2.4/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= +github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= +github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= github.com/go-logr/zapr v0.1.0/go.mod h1:tabnROwaDl0UNxkVeFRbY8bwB37GwRv0P8lg6aAiEnk= github.com/go-logr/zapr v1.2.4 h1:QHVo+6stLbfJmYGkQ7uGHUCu5hnAFAj6mDe6Ea0SeOo= github.com/go-logr/zapr v1.2.4/go.mod h1:FyHWQIzQORZ0QVE1BtVHv3cKtNLuXsbNLtpuhNapBOA= @@ -188,6 +193,7 @@ github.com/gogo/protobuf v1.3.1/go.mod h1:SlYgWuQ5SjCEi6WLHjHCa1yvBfUnHcTbrrZtXP github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= +github.com/golang/glog v1.1.2 h1:DVjP2PbBOzHyzA+dn3WhHIq4NdVu3Q+pvivFICf/7fo= github.com/golang/groupcache v0.0.0-20160516000752-02826c3e7903/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= github.com/golang/groupcache v0.0.0-20190129154638-5b532d6fd5ef/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da h1:oI5xCqsCo564l8iNU+DwB5epxmsaqB+rhGL0m5jtYqE= @@ -253,6 +259,8 @@ github.com/grpc-ecosystem/go-grpc-middleware v1.0.1-0.20190118093823-f849b5445de github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0/go.mod h1:8NvIoxWQoOIhqOTXgfV/d3M/q6VIi02HzZEHgUlZvzk= github.com/grpc-ecosystem/grpc-gateway v1.9.0/go.mod h1:vNeuVxBJEsws4ogUvrchl83t/GYV9WGTSLVdBhOQFDY= github.com/grpc-ecosystem/grpc-gateway v1.9.5/go.mod h1:vNeuVxBJEsws4ogUvrchl83t/GYV9WGTSLVdBhOQFDY= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.16.0 h1:YBftPWNWd4WwGqtY2yeZL2ef8rHAxPBD8KFhJpmcqms= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.16.0/go.mod h1:YN5jB8ie0yfIUg6VvR9Kz84aCaG7AsGZnLjhHbUqwPg= github.com/hashicorp/errwrap v1.1.0 h1:OxrOeh75EUXMY8TBjag2fzXGZ40LB6IKw45YeGUDY2I= github.com/hashicorp/errwrap v1.1.0/go.mod h1:YH+1FKiLXxHSkmPseP+kNlulaMuP3n2brvKWEqk/Jc4= github.com/hashicorp/golang-lru v0.5.0/go.mod h1:/m3WP610KZHVQ1SGc6re/UDhFvYD7pJ4Ao+sR/qLZy8= @@ -466,6 +474,24 @@ go.mongodb.org/mongo-driver v1.0.3/go.mod h1:u7ryQJ+DOzQmeO7zB6MHyr8jkEQvC8vH7qL go.mongodb.org/mongo-driver v1.1.1/go.mod h1:u7ryQJ+DOzQmeO7zB6MHyr8jkEQvC8vH7qLUO4lqsUM= go.mongodb.org/mongo-driver v1.1.2/go.mod h1:u7ryQJ+DOzQmeO7zB6MHyr8jkEQvC8vH7qLUO4lqsUM= go.opencensus.io v0.21.0/go.mod h1:mSImk1erAIZhrmZN+AvHh14ztQfjbGwt4TtuofqLduU= +go.opentelemetry.io/otel v1.19.0 h1:MuS/TNf4/j4IXsZuJegVzI1cwut7Qc00344rgH7p8bs= +go.opentelemetry.io/otel v1.19.0/go.mod h1:i0QyjOq3UPoTzff0PJB2N66fb4S0+rSbSB15/oyH9fY= +go.opentelemetry.io/otel/exporters/otlp/otlpmetric v0.42.0 h1:ZtfnDL+tUrs1F0Pzfwbg2d59Gru9NCH3bgSHBM6LDwU= +go.opentelemetry.io/otel/exporters/otlp/otlpmetric v0.42.0/go.mod h1:hG4Fj/y8TR/tlEDREo8tWstl9fO9gcFkn4xrx0Io8xU= +go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v0.42.0 h1:NmnYCiR0qNufkldjVvyQfZTHSdzeHoZ41zggMsdMcLM= +go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v0.42.0/go.mod h1:UVAO61+umUsHLtYb8KXXRoHtxUkdOPkYidzW3gipRLQ= +go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v0.42.0 h1:wNMDy/LVGLj2h3p6zg4d0gypKfWKSWI14E1C4smOgl8= +go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v0.42.0/go.mod h1:YfbDdXAAkemWJK3H/DshvlrxqFB2rtW4rY6ky/3x/H0= +go.opentelemetry.io/otel/exporters/prometheus v0.42.0 h1:jwV9iQdvp38fxXi8ZC+lNpxjK16MRcZlpDYvbuO1FiA= +go.opentelemetry.io/otel/exporters/prometheus v0.42.0/go.mod h1:f3bYiqNqhoPxkvI2LrXqQVC546K7BuRDL/kKuxkujhA= +go.opentelemetry.io/otel/metric v1.19.0 h1:aTzpGtV0ar9wlV4Sna9sdJyII5jTVJEvKETPiOKwvpE= +go.opentelemetry.io/otel/metric v1.19.0/go.mod h1:L5rUsV9kM1IxCj1MmSdS+JQAcVm319EUrDVLrt7jqt8= +go.opentelemetry.io/otel/sdk v1.19.0 h1:6USY6zH+L8uMH8L3t1enZPR3WFEmSTADlqldyHtJi3o= +go.opentelemetry.io/otel/sdk v1.19.0/go.mod h1:NedEbbS4w3C6zElbLdPJKOpJQOrGUJ+GfzpjUvI0v1A= +go.opentelemetry.io/otel/sdk/metric v1.19.0 h1:EJoTO5qysMsYCa+w4UghwFV/ptQgqSL/8Ni+hx+8i1k= +go.opentelemetry.io/otel/sdk/metric v1.19.0/go.mod h1:XjG0jQyFJrv2PbMvwND7LwCEhsJzCzV5210euduKcKY= +go.opentelemetry.io/otel/trace v1.19.0 h1:DFVQmlVbfVeOuBRrwdtaehRrWiL1JoVs9CPIQ1Dzxpg= +go.opentelemetry.io/otel/trace v1.19.0/go.mod h1:mfaSyvGyEJEI0nyV2I4qhNQnbBOUUmYZpYojqMnX2vo= go.opentelemetry.io/proto/otlp v1.0.0 h1:T0TX0tmXU8a3CbNXzEKGeU5mIVOdf0oykP+u2lIVU/I= go.opentelemetry.io/proto/otlp v1.0.0/go.mod h1:Sy6pihPLfYHkr3NkUbEhGHFhINUSI/v80hjKIs5JXpM= go.starlark.net v0.0.0-20230525235612-a134d8f9ddca h1:VdD38733bfYv5tUZwEIskMM93VanwNIi5bIKnDrJdEY= diff --git a/internal/admin/server.go b/internal/admin/server.go index be25bec5be3e..9c035b43816f 100644 --- a/internal/admin/server.go +++ b/internal/admin/server.go @@ -18,7 +18,7 @@ import ( ) var ( - debugLogger = logging.DefaultLogger(v1alpha1.LogLevelInfo).WithName("admin") + adminLogger = logging.DefaultLogger(v1alpha1.LogLevelInfo).WithName("admin") ) func Init(cfg *config.Server) error { @@ -36,7 +36,7 @@ func start(cfg *config.Server) error { address := cfg.EnvoyGateway.GetEnvoyGatewayAdminAddress() enablePprof := cfg.EnvoyGateway.GetEnvoyGatewayAdmin().EnablePprof - debugLogger.Info("starting admin server", "address", address, "enablePprof", enablePprof) + adminLogger.Info("starting admin server", "address", address, "enablePprof", enablePprof) if enablePprof { // Serve pprof endpoints to aid in live debugging. @@ -47,7 +47,7 @@ func start(cfg *config.Server) error { handlers.HandleFunc("/debug/pprof/cmdline", pprof.Cmdline) } - debugServer := &http.Server{ + adminServer := &http.Server{ Handler: handlers, Addr: address, ReadTimeout: 5 * time.Second, @@ -58,7 +58,7 @@ func start(cfg *config.Server) error { // Listen And Serve Admin Server. go func() { - if err := debugServer.ListenAndServe(); err != nil { + if err := adminServer.ListenAndServe(); err != nil { cfg.Logger.Error(err, "start admin server failed") } }() diff --git a/internal/cmd/server.go b/internal/cmd/server.go index 017ca9a6c423..6dc25a19946a 100644 --- a/internal/cmd/server.go +++ b/internal/cmd/server.go @@ -17,6 +17,7 @@ import ( infrarunner "github.com/envoyproxy/gateway/internal/infrastructure/runner" "github.com/envoyproxy/gateway/internal/logging" "github.com/envoyproxy/gateway/internal/message" + "github.com/envoyproxy/gateway/internal/metrics" providerrunner "github.com/envoyproxy/gateway/internal/provider/runner" xdsserverrunner "github.com/envoyproxy/gateway/internal/xds/server/runner" xdstranslatorrunner "github.com/envoyproxy/gateway/internal/xds/translator/runner" @@ -54,6 +55,11 @@ func server() error { if err := admin.Init(cfg); err != nil { return err } + // Init eg metrics servers. + if err := metrics.Init(cfg); err != nil { + return err + } + // init eg runners. if err := setupRunners(cfg); err != nil { return err diff --git a/internal/metrics/api.go b/internal/metrics/api.go new file mode 100644 index 000000000000..0747568a8223 --- /dev/null +++ b/internal/metrics/api.go @@ -0,0 +1,51 @@ +// Copyright Envoy Gateway Authors +// SPDX-License-Identifier: Apache-2.0 +// The full text of the Apache license is available in the LICENSE file at +// the root of the repo. + +package metrics + +// A Metric collects numerical observations. +type Metric interface { + // Name returns the name value of a Metric. + Name() string + + // Record makes an observation of the provided value for the given measure. + Record(value float64) + + // RecordInt makes an observation of the provided value for the measure. + RecordInt(value int64) + + // Increment records a value of 1 for the current measure. + // For Counters, this is equivalent to adding 1 to the current value. + // For Gauges, this is equivalent to setting the value to 1. + // For Histograms, this is equivalent to making an observation of value 1. + Increment() + + // Decrement records a value of -1 for the current measure. + // For Counters, this is equivalent to subtracting -1 to the current value. + // For Gauges, this is equivalent to setting the value to -1. + // For Histograms, this is equivalent to making an observation of value -1. + Decrement() + + // With creates a new Metric, with the LabelValues provided. + // This allows creating a set of pre-dimensioned data for recording purposes. + // This is primarily used for documentation and convenience. + // Metrics created with this method do not need to be registered (they share the registration of their parent Metric). + With(labelValues ...LabelValue) Metric +} + +// Label holds a metric dimension which can be operated on using the interface +// methods. +type Label interface { + // Value will set the provided value for the Label. + Value(value string) LabelValue +} + +// LabelValue holds an action to take on a metric dimension's value. +type LabelValue interface { + // Key will get the key of the Label. + Key() Label + // Value will get the value of the Label. + Value() string +} diff --git a/internal/metrics/doc.go b/internal/metrics/doc.go new file mode 100644 index 000000000000..b601b082be9c --- /dev/null +++ b/internal/metrics/doc.go @@ -0,0 +1,6 @@ +// Copyright Envoy Gateway Authors +// SPDX-License-Identifier: Apache-2.0 +// The full text of the Apache license is available in the LICENSE file at +// the root of the repo. + +package metrics diff --git a/internal/metrics/metadata.go b/internal/metrics/metadata.go new file mode 100644 index 000000000000..59d8d37323ca --- /dev/null +++ b/internal/metrics/metadata.go @@ -0,0 +1,102 @@ +// Copyright Envoy Gateway Authors +// SPDX-License-Identifier: Apache-2.0 +// The full text of the Apache license is available in the LICENSE file at +// the root of the repo. + +package metrics + +import ( + "errors" + "sync" + + "go.opentelemetry.io/otel" + api "go.opentelemetry.io/otel/metric" + "go.opentelemetry.io/otel/sdk/metric" + + "github.com/envoyproxy/gateway/api/v1alpha1" + log "github.com/envoyproxy/gateway/internal/logging" +) + +var ( + meter = func() api.Meter { + return otel.GetMeterProvider().Meter("envoy-gateway") + } + + metricsLogger = log.DefaultLogger(v1alpha1.LogLevelInfo).WithName("metrics") +) + +func init() { + otel.SetLogger(metricsLogger.Logger) +} + +// MetricType is the type of a metric. +type MetricType string + +// Metric type supports: +// * Counter: A Counter is a simple metric that only goes up (increments). +// +// * Gauge: A Gauge is a metric that represent +// a single numerical value that can arbitrarily go up and down. +// +// * Histogram: A Histogram samples observations and counts them in configurable buckets. +// It also provides a sum of all observed values. +// It's used to visualize the statistical distribution of these observations. + +const ( + CounterType MetricType = "Counter" + GaugeType MetricType = "Gauge" + HistogramType MetricType = "Histogram" +) + +// Metadata records a metric's metadata. +type Metadata struct { + Name string + Type MetricType + Description string + Bounds []float64 +} + +// metrics stores stores metrics +type metricstore struct { + started bool + mu sync.Mutex + stores map[string]Metadata +} + +// stores is a global that stores all registered metrics +var stores = metricstore{ + stores: map[string]Metadata{}, +} + +// register records a newly defined metric. Only valid before an exporter is set. +func (d *metricstore) register(metricstore Metadata) { + d.mu.Lock() + defer d.mu.Unlock() + if d.started { + metricsLogger.Error(errors.New("cannot initialize metric after metric has started"), "metric", metricstore.Name) + } + d.stores[metricstore.Name] = metricstore +} + +// preAddOptions runs pre-run steps before adding to meter provider. +func (d *metricstore) preAddOptions() []metric.Option { + d.mu.Lock() + defer d.mu.Unlock() + d.started = true + opts := []metric.Option{} + for name, metricstore := range d.stores { + if metricstore.Bounds == nil { + continue + } + // for each histogram metric (i.e. those with bounds), set up a view explicitly defining those buckets. + v := metric.WithView(metric.NewView( + metric.Instrument{Name: name}, + metric.Stream{ + Aggregation: metric.AggregationExplicitBucketHistogram{ + Boundaries: metricstore.Bounds, + }}, + )) + opts = append(opts, v) + } + return opts +} diff --git a/internal/metrics/metric.go b/internal/metrics/metric.go new file mode 100644 index 000000000000..34a74b12dea6 --- /dev/null +++ b/internal/metrics/metric.go @@ -0,0 +1,74 @@ +// Copyright Envoy Gateway Authors +// SPDX-License-Identifier: Apache-2.0 +// The full text of the Apache license is available in the LICENSE file at +// the root of the repo. + +package metrics + +import ( + "go.opentelemetry.io/otel/attribute" +) + +// embed metric implementation. +type embed struct { + name string + attrs []attribute.KeyValue + m Metric +} + +func (f embed) Name() string { + return f.name +} + +func (f embed) Increment() { + f.m.Record(1) +} + +func (f embed) Decrement() { + f.m.Record(-1) +} + +func (f embed) RecordInt(value int64) { + f.m.Record(float64(value)) +} + +// disabled metric implementation. +type disabled struct { + name string +} + +// Decrement implements Metric +func (dm *disabled) Decrement() {} + +// Increment implements Metric +func (dm *disabled) Increment() {} + +// Name implements Metric +func (dm *disabled) Name() string { + return dm.name +} + +// Record implements Metric +func (dm *disabled) Record(value float64) {} + +// RecordInt implements Metric +func (dm *disabled) RecordInt(value int64) {} + +// With implements Metric +func (dm *disabled) With(labelValues ...LabelValue) Metric { + return dm +} + +var _ Metric = &disabled{} + +func mergeLabelValues(bm embed, labelValues []LabelValue) ([]attribute.KeyValue, attribute.Set) { + attrs := make([]attribute.KeyValue, 0, len(bm.attrs)+len(labelValues)) + attrs = append(attrs, bm.attrs...) + for _, v := range labelValues { + kv := v.(otelLabelValue) + attrs = append(attrs, kv.keyValue) + } + + set := attribute.NewSet(attrs...) + return attrs, set +} diff --git a/internal/metrics/options.go b/internal/metrics/options.go new file mode 100644 index 000000000000..076349708a8c --- /dev/null +++ b/internal/metrics/options.go @@ -0,0 +1,43 @@ +// Copyright Envoy Gateway Authors +// SPDX-License-Identifier: Apache-2.0 +// The full text of the Apache license is available in the LICENSE file at +// the root of the repo. + +package metrics + +// Options encode changes to the options passed to a Metric at creation time. +type MetricOption func(*MetricOptions) + +type MetricOptions struct { + EnabledCondition func() bool + Unit Unit + Name string + Description string +} + +// WithUnit provides configuration options for a new Metric, providing unit of measure +// information for a new Metric. +func WithUnit(unit Unit) MetricOption { + return func(opts *MetricOptions) { + opts.Unit = unit + } +} + +// WithEnabled allows a metric to be condition enabled if the provided function returns true. +// If disabled, metric operations will do nothing. +func WithEnabled(enabled func() bool) MetricOption { + return func(opts *MetricOptions) { + opts.EnabledCondition = enabled + } +} + +func metricOptions(name, description string, opts ...MetricOption) (MetricOptions, Metric) { + o := MetricOptions{Unit: None, Name: name, Description: description} + for _, opt := range opts { + opt(&o) + } + if o.EnabledCondition != nil && !o.EnabledCondition() { + return o, &disabled{name: name} + } + return o, nil +} diff --git a/internal/metrics/otel_label.go b/internal/metrics/otel_label.go new file mode 100644 index 000000000000..bf0ca75d908c --- /dev/null +++ b/internal/metrics/otel_label.go @@ -0,0 +1,37 @@ +// Copyright Envoy Gateway Authors +// SPDX-License-Identifier: Apache-2.0 +// The full text of the Apache license is available in the LICENSE file at +// the root of the repo. + +package metrics + +import "go.opentelemetry.io/otel/attribute" + +// NewLabel will attempt to create a new Label. +func NewLabel(key string) Label { + return otelLabel{attribute.Key(key)} +} + +// A otelLabel provides a named dimension for a Metric. +type otelLabel struct { + key attribute.Key +} + +// Value creates a new LabelValue for the Label. +func (l otelLabel) Value(value string) LabelValue { + return otelLabelValue{l.key.String(value)} +} + +// A LabelValue represents a Label with a specific value. It is used to record +// values for a Metric. +type otelLabelValue struct { + keyValue attribute.KeyValue +} + +func (l otelLabelValue) Key() Label { + return otelLabel{l.keyValue.Key} +} + +func (l otelLabelValue) Value() string { + return l.keyValue.Value.AsString() +} diff --git a/internal/metrics/otel_metric_counter.go b/internal/metrics/otel_metric_counter.go new file mode 100644 index 000000000000..8d221cef65aa --- /dev/null +++ b/internal/metrics/otel_metric_counter.go @@ -0,0 +1,43 @@ +// Copyright Envoy Gateway Authors +// SPDX-License-Identifier: Apache-2.0 +// The full text of the Apache license is available in the LICENSE file at +// the root of the repo. + +package metrics + +import ( + "context" + + api "go.opentelemetry.io/otel/metric" +) + +var _ Metric = &otelCounter{} + +type otelCounter struct { + embed + + c api.Float64Counter + preRecordOptions []api.AddOption +} + +func (f *otelCounter) Record(value float64) { + if f.preRecordOptions != nil { + f.c.Add(context.Background(), value, f.preRecordOptions...) + } else { + f.c.Add(context.Background(), value) + } +} + +func (f *otelCounter) With(labelValues ...LabelValue) Metric { + attrs, set := mergeLabelValues(f.embed, labelValues) + m := &otelCounter{ + c: f.c, + preRecordOptions: []api.AddOption{api.WithAttributeSet(set)}, + } + m.embed = embed{ + name: f.name, + attrs: attrs, + m: m, + } + return m +} diff --git a/internal/metrics/otel_metric_gauge.go b/internal/metrics/otel_metric_gauge.go new file mode 100644 index 000000000000..b9cb07b094bd --- /dev/null +++ b/internal/metrics/otel_metric_gauge.go @@ -0,0 +1,60 @@ +// Copyright Envoy Gateway Authors +// SPDX-License-Identifier: Apache-2.0 +// The full text of the Apache license is available in the LICENSE file at +// the root of the repo. + +package metrics + +import ( + "sync" + + "go.opentelemetry.io/otel/attribute" + api "go.opentelemetry.io/otel/metric" +) + +var _ Metric = &otelGauge{} + +type otelGauge struct { + embed + + g api.Float64ObservableGauge + mutex *sync.RWMutex + stores map[attribute.Set]*otelGaugeValues + current *otelGaugeValues +} + +type otelGaugeValues struct { + val float64 + opt []api.ObserveOption +} + +func (f *otelGauge) Record(value float64) { + f.mutex.Lock() + if f.current == nil { + f.current = &otelGaugeValues{} + f.stores[attribute.NewSet()] = f.current + } + f.current.val = value + f.mutex.Unlock() +} + +func (f *otelGauge) With(labelValues ...LabelValue) Metric { + attrs, set := mergeLabelValues(f.embed, labelValues) + m := &otelGauge{ + g: f.g, + mutex: f.mutex, + stores: f.stores, + } + if _, f := m.stores[set]; !f { + m.stores[set] = &otelGaugeValues{ + opt: []api.ObserveOption{api.WithAttributeSet(set)}, + } + } + m.current = m.stores[set] + m.embed = embed{ + name: f.name, + attrs: attrs, + m: m, + } + return m +} diff --git a/internal/metrics/otel_metric_histogram.go b/internal/metrics/otel_metric_histogram.go new file mode 100644 index 000000000000..0aa13dbe6bdd --- /dev/null +++ b/internal/metrics/otel_metric_histogram.go @@ -0,0 +1,43 @@ +// Copyright Envoy Gateway Authors +// SPDX-License-Identifier: Apache-2.0 +// The full text of the Apache license is available in the LICENSE file at +// the root of the repo. + +package metrics + +import ( + "context" + + api "go.opentelemetry.io/otel/metric" +) + +var _ Metric = &otelHistogram{} + +type otelHistogram struct { + embed + + d api.Float64Histogram + preRecordOptions []api.RecordOption +} + +func (f *otelHistogram) Record(value float64) { + if f.preRecordOptions != nil { + f.d.Record(context.Background(), value, f.preRecordOptions...) + } else { + f.d.Record(context.Background(), value) + } +} + +func (f *otelHistogram) With(labelValues ...LabelValue) Metric { + attrs, set := mergeLabelValues(f.embed, labelValues) + m := &otelHistogram{ + d: f.d, + preRecordOptions: []api.RecordOption{api.WithAttributeSet(set)}, + } + m.embed = embed{ + name: f.name, + attrs: attrs, + m: m, + } + return m +} diff --git a/internal/metrics/otel_metric_sink.go b/internal/metrics/otel_metric_sink.go new file mode 100644 index 000000000000..e5eb019dbe86 --- /dev/null +++ b/internal/metrics/otel_metric_sink.go @@ -0,0 +1,119 @@ +// Copyright Envoy Gateway Authors +// SPDX-License-Identifier: Apache-2.0 +// The full text of the Apache license is available in the LICENSE file at +// the root of the repo. + +package metrics + +import ( + "context" + "sync" + + "go.opentelemetry.io/otel/attribute" + api "go.opentelemetry.io/otel/metric" +) + +// NewCounter creates a new Counter Metric (the values will be cumulative). +// That means that data collected by the new Metric will be summed before export. +func NewCounter(name, description string, opts ...MetricOption) Metric { + stores.register(Metadata{ + Name: name, + Type: CounterType, + Description: description, + }) + o, disabled := metricOptions(name, description, opts...) + if disabled != nil { + return disabled + } + return newCounter(o) +} + +// NewGauge creates a new Gauge Metric. That means that data collected by the new +// Metric will export only the last recorded value. +func NewGauge(name, description string, opts ...MetricOption) Metric { + stores.register(Metadata{ + Name: name, + Type: GaugeType, + Description: description, + }) + o, disabled := metricOptions(name, description, opts...) + if disabled != nil { + return disabled + } + + return newGauge(o) +} + +// NewHistogram creates a new Metric with an aggregation type of Histogram. +// This means that the data collected by the Metric will be collected and exported as a histogram, with the specified bounds. +func NewHistogram(name, description string, bounds []float64, opts ...MetricOption) Metric { + stores.register(Metadata{ + Name: name, + Type: HistogramType, + Description: description, + Bounds: bounds, + }) + o, disabled := metricOptions(name, description, opts...) + if disabled != nil { + return disabled + } + return newHistogram(o) +} + +func newCounter(o MetricOptions) *otelCounter { + c, err := meter().Float64Counter(o.Name, + api.WithDescription(o.Description), + api.WithUnit(string(o.Unit))) + if err != nil { + metricsLogger.Error(err, "failed to create otel Counter") + } + m := &otelCounter{c: c} + m.embed = embed{ + name: o.Name, + m: m, + } + return m +} + +func newGauge(o MetricOptions) *otelGauge { + r := &otelGauge{ + mutex: &sync.RWMutex{}, + } + r.stores = map[attribute.Set]*otelGaugeValues{} + g, err := meter().Float64ObservableGauge(o.Name, + api.WithFloat64Callback(func(ctx context.Context, observer api.Float64Observer) error { + r.mutex.Lock() + defer r.mutex.Unlock() + for _, gv := range r.stores { + observer.Observe(gv.val, gv.opt...) + } + return nil + }), + api.WithDescription(o.Description), + api.WithUnit(string(o.Unit))) + if err != nil { + metricsLogger.Error(err, "failed to create otel Gauge") + } + r.g = g + r.embed = embed{ + name: o.Name, + m: r, + } + + return r +} + +func newHistogram(o MetricOptions) *otelHistogram { + d, err := meter().Float64Histogram(o.Name, + api.WithDescription(o.Description), + api.WithUnit(string(o.Unit))) + if err != nil { + metricsLogger.Error(err, "failed to create otel Histogram") + } + m := &otelHistogram{d: d} + m.embed = embed{ + name: o.Name, + m: m, + } + return m +} diff --git a/internal/metrics/register.go b/internal/metrics/register.go new file mode 100644 index 000000000000..7cf0fd59bf9f --- /dev/null +++ b/internal/metrics/register.go @@ -0,0 +1,200 @@ +// Copyright Envoy Gateway Authors +// SPDX-License-Identifier: Apache-2.0 +// The full text of the Apache license is available in the LICENSE file at +// the root of the repo. + +package metrics + +import ( + "context" + "fmt" + "net/http" + "time" + + "go.opentelemetry.io/otel" + "go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc" + "go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp" + otelprom "go.opentelemetry.io/otel/exporters/prometheus" + "go.opentelemetry.io/otel/sdk/metric" + metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics" + + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promhttp" + + "github.com/envoyproxy/gateway/api/v1alpha1" + "github.com/envoyproxy/gateway/internal/envoygateway/config" +) + +const ( + defaultEndpoint = "/metrics" +) + +// Init initializes and registers the global metrics server. +func Init(cfg *config.Server) error { + options := newOptions(cfg) + handler, err := registerForHandler(options) + if err != nil { + return err + } + + return start(options.address, handler) +} + +func start(address string, handler http.Handler) error { + handlers := http.NewServeMux() + + metricsLogger.Info("starting metrics server", "address", address) + if handler != nil { + handlers.Handle(defaultEndpoint, handler) + } + + metricsServer := &http.Server{ + Handler: handlers, + Addr: address, + ReadTimeout: 5 * time.Second, + ReadHeaderTimeout: 5 * time.Second, + WriteTimeout: 10 * time.Second, + IdleTimeout: 15 * time.Second, + } + + // Listen And Serve Metrics Server. + go func() { + if err := metricsServer.ListenAndServe(); err != nil { + metricsLogger.Error(err, "start metrics server failed") + } + }() + + return nil +} + +func newOptions(svr *config.Server) registerOptions { + newOpts := registerOptions{} + newOpts.address = fmt.Sprintf("%s:%d", v1alpha1.GatewayMetricsHost, v1alpha1.GatewayMetricsPort) + + if !svr.EnvoyGateway.IfDisablePrometheus() { + newOpts.pullOptions.enable = true + newOpts.pullOptions.registry = metricsserver.Registry + newOpts.pullOptions.gatherer = metricsserver.Registry + } + + for _, config := range svr.EnvoyGateway.GetEnvoyGatewayTelemetry().Metrics.Sinks { + newOpts.pushOptions.sinks = append(newOpts.pushOptions.sinks, metricsSink{ + host: config.OpenTelemetry.Host, + port: config.OpenTelemetry.Port, + protocol: config.OpenTelemetry.Protocol, + }) + } + + return newOpts +} + +// registerForHandler sets the global metrics registry to the provided Prometheus registerer. +// if enables prometheus, it will return a prom http handler. +func registerForHandler(opts registerOptions) (http.Handler, error) { + otelOpts := []metric.Option{} + + if err := registerOTELPromExporter(&otelOpts, opts); err != nil { + return nil, err + } + if err := registerOTELHTTPexporter(&otelOpts, opts); err != nil { + return nil, err + } + if err := registerOTELgRPCexporter(&otelOpts, opts); err != nil { + return nil, err + } + otelOpts = append(otelOpts, stores.preAddOptions()...) + + mp := metric.NewMeterProvider(otelOpts...) + otel.SetMeterProvider(mp) + + if opts.pullOptions.enable { + return promhttp.HandlerFor(opts.pullOptions.gatherer, promhttp.HandlerOpts{}), nil + } + return nil, nil +} + +// registerOTELPromExporter registers OTEL prometheus exporter (PULL mode). +func registerOTELPromExporter(otelOpts *[]metric.Option, opts registerOptions) error { + if opts.pullOptions.enable { + promOpts := []otelprom.Option{ + otelprom.WithoutScopeInfo(), + otelprom.WithoutTargetInfo(), + otelprom.WithoutUnits(), + otelprom.WithRegisterer(opts.pullOptions.registry), + otelprom.WithoutCounterSuffixes(), + } + promreader, err := otelprom.New(promOpts...) + if err != nil { + return err + } + + *otelOpts = append(*otelOpts, metric.WithReader(promreader)) + metricsLogger.Info("initialized metrics pull endpoint", "address", opts.address, "endpoint", defaultEndpoint) + } + + return nil +} + +// registerOTELHTTPexporter registers OTEL HTTP metrics exporter (PUSH mode). +func registerOTELHTTPexporter(otelOpts *[]metric.Option, opts registerOptions) error { + for _, sink := range opts.pushOptions.sinks { + if sink.protocol == "http" { + address := fmt.Sprintf("%s:%d", sink.host, sink.port) + httpexporter, err := otlpmetrichttp.New( + context.Background(), + otlpmetrichttp.WithEndpoint(address), + otlpmetrichttp.WithInsecure(), + ) + if err != nil { + return err + } + + otelreader := metric.NewPeriodicReader(httpexporter) + *otelOpts = append(*otelOpts, metric.WithReader(otelreader)) + metricsLogger.Info("initialized otel http metrics push endpoint", "address", address) + } + } + + return nil +} + +// registerOTELgRPCexporter registers OTEL gRPC metrics exporter (PUSH mode). +func registerOTELgRPCexporter(otelOpts *[]metric.Option, opts registerOptions) error { + for _, sink := range opts.pushOptions.sinks { + if sink.protocol == "grpc" { + address := fmt.Sprintf("%s:%d", sink.host, sink.port) + httpexporter, err := otlpmetricgrpc.New( + context.Background(), + otlpmetricgrpc.WithEndpoint(address), + otlpmetricgrpc.WithInsecure(), + ) + if err != nil { + return err + } + + otelreader := metric.NewPeriodicReader(httpexporter) + *otelOpts = append(*otelOpts, metric.WithReader(otelreader)) + metricsLogger.Info("initialized otel grpc metrics push endpoint", "address", address) + } + } + + return nil +} + +type registerOptions struct { + address string + pullOptions struct { + registry prometheus.Registerer + gatherer prometheus.Gatherer + enable bool + } + pushOptions struct { + sinks []metricsSink + } +} + +type metricsSink struct { + protocol string + host string + port int32 +} diff --git a/internal/metrics/sample_counter_test.go b/internal/metrics/sample_counter_test.go new file mode 100644 index 000000000000..050cc557c519 --- /dev/null +++ b/internal/metrics/sample_counter_test.go @@ -0,0 +1,23 @@ +// Copyright Envoy Gateway Authors +// SPDX-License-Identifier: Apache-2.0 +// The full text of the Apache license is available in the LICENSE file at +// the root of the repo. + +package metrics_test + +import "github.com/envoyproxy/gateway/internal/metrics" + +var ( + irUpdates = metrics.NewCounter( + "ir_updates_total", + "Number of IR updates, by ir type", + ) +) + +func NewCounter() { + // increment on every xds ir update + irUpdates.With(irType.Value("xds")).Increment() + + // xds ir updates double + irUpdates.With(irType.Value("xds")).Record(2) +} diff --git a/internal/metrics/sample_gauge_test.go b/internal/metrics/sample_gauge_test.go new file mode 100644 index 000000000000..6b287ed9ca1a --- /dev/null +++ b/internal/metrics/sample_gauge_test.go @@ -0,0 +1,27 @@ +// Copyright Envoy Gateway Authors +// SPDX-License-Identifier: Apache-2.0 +// The full text of the Apache license is available in the LICENSE file at +// the root of the repo. + +package metrics_test + +import "github.com/envoyproxy/gateway/internal/metrics" + +var ( + irType = metrics.NewLabel("ir-type") + currentIRsNum = metrics.NewGauge( + "current_irs_queue_num", + "current number of ir in queue, by ir type", + ) +) + +func NewGauge() { + // only the last recorded value (2) will be exported for this gauge + currentIRsNum.With(irType.Value("xds")).Record(1) + currentIRsNum.With(irType.Value("xds")).Record(3) + currentIRsNum.With(irType.Value("xds")).Record(2) + + currentIRsNum.With(irType.Value("infra")).Record(1) + currentIRsNum.With(irType.Value("infra")).Record(3) + currentIRsNum.With(irType.Value("infra")).Record(2) +} diff --git a/internal/metrics/sample_histogram_test.go b/internal/metrics/sample_histogram_test.go new file mode 100644 index 000000000000..b34658fcbe54 --- /dev/null +++ b/internal/metrics/sample_histogram_test.go @@ -0,0 +1,23 @@ +// Copyright Envoy Gateway Authors +// SPDX-License-Identifier: Apache-2.0 +// The full text of the Apache license is available in the LICENSE file at +// the root of the repo. + +package metrics_test + +import "github.com/envoyproxy/gateway/internal/metrics" + +var ( + method = metrics.NewLabel("method") + + sentBytes = metrics.NewHistogram( + "sent_bytes_total", + "Histogram of sent bytes by method", + []float64{10, 50, 100, 1000, 10000}, + metrics.WithUnit(metrics.Bytes), + ) +) + +func NewHistogram() { + sentBytes.With(method.Value("/request/path/1")).Record(458) +} diff --git a/internal/metrics/units.go b/internal/metrics/units.go new file mode 100644 index 000000000000..1c7b5ff13c20 --- /dev/null +++ b/internal/metrics/units.go @@ -0,0 +1,18 @@ +// Copyright Envoy Gateway Authors +// SPDX-License-Identifier: Apache-2.0 +// The full text of the Apache license is available in the LICENSE file at +// the root of the repo. + +package metrics + +// Unit encodes the standard name for describing the quantity +// measured by a Metric (if applicable). +type Unit string + +// Predefined units for use with the metrics package. +const ( + None Unit = "1" + Bytes Unit = "By" + Seconds Unit = "s" + Milliseconds Unit = "ms" +) diff --git a/site/content/en/latest/design/eg-metrics.md b/site/content/en/latest/design/eg-metrics.md index f43af77be12b..60b938408525 100644 --- a/site/content/en/latest/design/eg-metrics.md +++ b/site/content/en/latest/design/eg-metrics.md @@ -6,7 +6,7 @@ title: "Control Plane Observability: Metrics" This document aims to cover all aspects of envoy gateway control plane metrics observability. {{% alert title="Note" color="secondary" %}} -**Data plane** observability (while important) is outside of scope for this document. For dataplane observability, refer to [here](./metrics). +**Data plane** observability (while important) is outside of scope for this document. For dataplane observability, refer to [here](../metrics). {{% /alert %}} ## Current State