grafana · tinitiuset · Nov 14, 2024 · Nov 15, 2024 · Nov 21, 2024 · Nov 21, 2024
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,6 +1,6 @@
 ## Changelog
 
-* [CHANGE] Add new metric `slow_request_server_throughput` to track the throughput of slow queries.
+* [CHANGE] Add new metric `slow_request_server_throughput` to track the throughput of slow queries. #619
 * [CHANGE] Log middleware updated to honor `logRequestHeaders` in all logging scenarios. #615
 * [CHANGE] Roll back the gRPC dependency to v1.65.0 to allow downstream projects to avoid a performance regression and maybe a bug in v1.66.0. #581
 * [CHANGE] Update the gRPC dependency to v1.66.0 and deprecate the `grpc_server_recv_buffer_pools_enabled` option that is no longer supported by it. #580

diff --git a/middleware/instrument.go b/middleware/instrument.go
@@ -46,15 +46,15 @@ func (f PerTenantCallback) shouldInstrument(ctx context.Context) (string, bool)
 
 // Instrument is a Middleware which records timings for every HTTP request
 type Instrument struct {
-	Duration                    *prometheus.HistogramVec
-	PerTenantDuration           *prometheus.HistogramVec
-	PerTenantCallback           PerTenantCallback
-	RequestBodySize             *prometheus.HistogramVec
-	ResponseBodySize            *prometheus.HistogramVec
-	InflightRequests            *prometheus.GaugeVec
-	SlowRequestCutoff           time.Duration
-	ServerThroughputUnit        string
-	SlowRequestServerThroughput *prometheus.HistogramVec
+	Duration              *prometheus.HistogramVec
+	PerTenantDuration     *prometheus.HistogramVec
+	PerTenantCallback     PerTenantCallback
+	RequestBodySize       *prometheus.HistogramVec
+	ResponseBodySize      *prometheus.HistogramVec
+	InflightRequests      *prometheus.GaugeVec
+	SlowRequestCutoff     time.Duration
+	ThroughputUnit        string
+	SlowRequestThroughput *prometheus.HistogramVec
 }
 
 // IsWSHandshakeRequest returns true if the given request is a websocket handshake request.
@@ -112,14 +112,16 @@ func (i Instrument) Wrap(next http.Handler) http.Handler {
 		}
 		if i.SlowRequestCutoff > 0 && respMetrics.Duration > i.SlowRequestCutoff {
 			parts := strings.Split(w.Header().Get("Server-Timing"), ", ")
-			volume := int64(0)
-			for _, part := range parts {
-				if strings.HasPrefix(part, i.ServerThroughputUnit) {
-					_, _ = fmt.Sscanf(part, i.ServerThroughputUnit+"=%d", &volume)
-					break
+			if len(parts) == 0 {
+				volume := int64(0)
+				for _, part := range parts {
+					if strings.HasPrefix(part, i.ThroughputUnit) {
+						_, _ = fmt.Sscanf(part, i.ThroughputUnit+"=%d", &volume)
+						instrument.ObserveWithExemplar(r.Context(), i.SlowRequestThroughput.WithLabelValues(r.Method, route), float64(volume)/respMetrics.Duration.Seconds())
+						break
+					}
 				}
 			}
-			i.SlowRequestServerThroughput.WithLabelValues(r.Method, route).Observe(float64(volume) / respMetrics.Duration.Seconds())
 		}
 	})
 }

diff --git a/server/metrics.go b/server/metrics.go
@@ -16,14 +16,14 @@ import (
 )
 
 type Metrics struct {
-	TCPConnections              *prometheus.GaugeVec
-	TCPConnectionsLimit         *prometheus.GaugeVec
-	RequestDuration             *prometheus.HistogramVec
-	PerTenantRequestDuration    *prometheus.HistogramVec
-	ReceivedMessageSize         *prometheus.HistogramVec
-	SentMessageSize             *prometheus.HistogramVec
-	InflightRequests            *prometheus.GaugeVec
-	SlowRequestServerThroughput *prometheus.HistogramVec
+	TCPConnections           *prometheus.GaugeVec
+	TCPConnectionsLimit      *prometheus.GaugeVec
+	RequestDuration          *prometheus.HistogramVec
+	PerTenantRequestDuration *prometheus.HistogramVec
+	ReceivedMessageSize      *prometheus.HistogramVec
+	SentMessageSize          *prometheus.HistogramVec
+	InflightRequests         *prometheus.GaugeVec
+	SlowRequestThroughput    *prometheus.HistogramVec
 }
 
 func NewServerMetrics(cfg Config) *Metrics {
@@ -75,11 +75,12 @@ func NewServerMetrics(cfg Config) *Metrics {
 			Name:      "inflight_requests",
 			Help:      "Current number of inflight requests.",
 		}, []string{"method", "route"}),
-		SlowRequestServerThroughput: reg.NewHistogramVec(prometheus.HistogramOpts{
+		SlowRequestThroughput: reg.NewHistogramVec(prometheus.HistogramOpts{
 			Namespace:                       cfg.MetricsNamespace,
-			Name:                            "slow_request_server_throughput_" + cfg.ThroughputConfig.Unit,
+			Name:                            "slow_request_throughput_" + cfg.Throughput.Unit,
 			Help:                            "Server throughput of long running requests.",
-			ConstLabels:                     prometheus.Labels{"cutoff_ms": strconv.FormatInt(cfg.ThroughputConfig.SlowRequestCutoff.Milliseconds(), 10)},
+			ConstLabels:                     prometheus.Labels{"cutoff_ms": strconv.FormatInt(cfg.Throughput.SlowRequestCutoff.Milliseconds(), 10)},
+			Buckets:                         instrument.DefBuckets,
 			NativeHistogramBucketFactor:     cfg.MetricsNativeHistogramFactor,
 			NativeHistogramMaxBucketNumber:  100,
 			NativeHistogramMinResetDuration: time.Hour,

diff --git a/server/server.go b/server/server.go
@@ -154,10 +154,10 @@ type Config struct {
 	// This limiter is called for every started and finished gRPC request.
 	GrpcMethodLimiter GrpcInflightMethodLimiter `yaml:"-"`
 
-	ThroughputConfig ThroughputConfig `yaml:"throughput_config"`
+	Throughput Throughput `yaml:"throughput"`
 }
 
-type ThroughputConfig struct {
+type Throughput struct {
 	SlowRequestCutoff time.Duration `yaml:"slow_request_cutoff"`
 	Unit              string        `yaml:"unit"`
 }
@@ -216,8 +216,8 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) {
 	f.StringVar(&cfg.LogRequestExcludeHeadersList, "server.log-request-headers-exclude-list", "", "Comma separated list of headers to exclude from loggin. Only used if server.log-request-headers is true.")
 	f.BoolVar(&cfg.LogRequestAtInfoLevel, "server.log-request-at-info-level-enabled", false, "Optionally log requests at info level instead of debug level. Applies to request headers as well if server.log-request-headers is enabled.")
 	f.BoolVar(&cfg.ProxyProtocolEnabled, "server.proxy-protocol-enabled", false, "Enables PROXY protocol.")
-	f.DurationVar(&cfg.ThroughputConfig.SlowRequestCutoff, "server.throughput-config.slow-request-cutoff", 0, "Duration after which a request is considered slow. For requests taking longer than this duration to finish, the throughput will be calculated. If set to 0, the throughput will not be calculated.")
-	f.StringVar(&cfg.ThroughputConfig.Unit, "server.throughput-config.unit", "total_samples", "Unit of the server throughput metric, for example 'processed_bytes' or 'total_samples'. If set, it is appended to the server_throughput metric name.")
+	f.DurationVar(&cfg.Throughput.SlowRequestCutoff, "server.throughput.slow-request-cutoff", 0, "Duration after which a request is considered slow. For requests taking longer than this duration to finish, the throughput will be calculated. If set to 0, the throughput will not be calculated.")
+	f.StringVar(&cfg.Throughput.Unit, "server.throughput.unit", "total_samples", "Unit of the server throughput metric, for example 'processed_bytes' or 'total_samples'. If set, it is appended to the slow_request_server_throughput metric name.")
 }
 
 func (cfg *Config) registererOrDefault() prometheus.Registerer {
@@ -530,15 +530,15 @@ func BuildHTTPMiddleware(cfg Config, router *mux.Router, metrics *Metrics, logge
 		},
 		defaultLogMiddleware,
 		middleware.Instrument{
-			Duration:                    metrics.RequestDuration,
-			PerTenantDuration:           metrics.PerTenantRequestDuration,
-			PerTenantCallback:           cfg.PerTenantDurationInstrumentation,
-			RequestBodySize:             metrics.ReceivedMessageSize,
-			ResponseBodySize:            metrics.SentMessageSize,
-			InflightRequests:            metrics.InflightRequests,
-			SlowRequestCutoff:           cfg.ThroughputConfig.SlowRequestCutoff,
-			ServerThroughputUnit:        cfg.ThroughputConfig.Unit,
-			SlowRequestServerThroughput: metrics.SlowRequestServerThroughput,
+			Duration:              metrics.RequestDuration,
+			PerTenantDuration:     metrics.PerTenantRequestDuration,
+			PerTenantCallback:     cfg.PerTenantDurationInstrumentation,
+			RequestBodySize:       metrics.ReceivedMessageSize,
+			ResponseBodySize:      metrics.SentMessageSize,
+			InflightRequests:      metrics.InflightRequests,
+			SlowRequestCutoff:     cfg.Throughput.SlowRequestCutoff,
+			ThroughputUnit:        cfg.Throughput.Unit,
+			SlowRequestThroughput: metrics.SlowRequestThroughput,
 		},
 	}
 	var httpMiddleware []middleware.Interface