Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Feature: Add new metric request_throughput #619

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
## Changelog

* [CHANGE] Add new metric `slow_request_server_throughput` to track the throughput of slow queries.
* [CHANGE] Add new metric `slow_request_server_throughput` to track the throughput of slow queries. #619
* [CHANGE] Log middleware updated to honor `logRequestHeaders` in all logging scenarios. #615
* [CHANGE] Roll back the gRPC dependency to v1.65.0 to allow downstream projects to avoid a performance regression and maybe a bug in v1.66.0. #581
* [CHANGE] Update the gRPC dependency to v1.66.0 and deprecate the `grpc_server_recv_buffer_pools_enabled` option that is no longer supported by it. #580
Expand Down
32 changes: 17 additions & 15 deletions middleware/instrument.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,15 +46,15 @@ func (f PerTenantCallback) shouldInstrument(ctx context.Context) (string, bool)

// Instrument is a Middleware which records timings for every HTTP request
type Instrument struct {
Duration *prometheus.HistogramVec
PerTenantDuration *prometheus.HistogramVec
PerTenantCallback PerTenantCallback
RequestBodySize *prometheus.HistogramVec
ResponseBodySize *prometheus.HistogramVec
InflightRequests *prometheus.GaugeVec
SlowRequestCutoff time.Duration
ServerThroughputUnit string
SlowRequestServerThroughput *prometheus.HistogramVec
Duration *prometheus.HistogramVec
PerTenantDuration *prometheus.HistogramVec
PerTenantCallback PerTenantCallback
RequestBodySize *prometheus.HistogramVec
ResponseBodySize *prometheus.HistogramVec
InflightRequests *prometheus.GaugeVec
SlowRequestCutoff time.Duration
ThroughputUnit string
SlowRequestThroughput *prometheus.HistogramVec
}

// IsWSHandshakeRequest returns true if the given request is a websocket handshake request.
Expand Down Expand Up @@ -112,14 +112,16 @@ func (i Instrument) Wrap(next http.Handler) http.Handler {
}
if i.SlowRequestCutoff > 0 && respMetrics.Duration > i.SlowRequestCutoff {
tinitiuset marked this conversation as resolved.
Show resolved Hide resolved
parts := strings.Split(w.Header().Get("Server-Timing"), ", ")
tinitiuset marked this conversation as resolved.
Show resolved Hide resolved
volume := int64(0)
for _, part := range parts {
if strings.HasPrefix(part, i.ServerThroughputUnit) {
_, _ = fmt.Sscanf(part, i.ServerThroughputUnit+"=%d", &volume)
break
if len(parts) == 0 {
tinitiuset marked this conversation as resolved.
Show resolved Hide resolved
volume := int64(0)
for _, part := range parts {
if strings.HasPrefix(part, i.ThroughputUnit) {
_, _ = fmt.Sscanf(part, i.ThroughputUnit+"=%d", &volume)
tinitiuset marked this conversation as resolved.
Show resolved Hide resolved
instrument.ObserveWithExemplar(r.Context(), i.SlowRequestThroughput.WithLabelValues(r.Method, route), float64(volume)/respMetrics.Duration.Seconds())
break
}
}
}
i.SlowRequestServerThroughput.WithLabelValues(r.Method, route).Observe(float64(volume) / respMetrics.Duration.Seconds())
}
})
}
Expand Down
23 changes: 12 additions & 11 deletions server/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,14 @@ import (
)

type Metrics struct {
TCPConnections *prometheus.GaugeVec
TCPConnectionsLimit *prometheus.GaugeVec
RequestDuration *prometheus.HistogramVec
PerTenantRequestDuration *prometheus.HistogramVec
ReceivedMessageSize *prometheus.HistogramVec
SentMessageSize *prometheus.HistogramVec
InflightRequests *prometheus.GaugeVec
SlowRequestServerThroughput *prometheus.HistogramVec
TCPConnections *prometheus.GaugeVec
TCPConnectionsLimit *prometheus.GaugeVec
RequestDuration *prometheus.HistogramVec
PerTenantRequestDuration *prometheus.HistogramVec
ReceivedMessageSize *prometheus.HistogramVec
SentMessageSize *prometheus.HistogramVec
InflightRequests *prometheus.GaugeVec
SlowRequestThroughput *prometheus.HistogramVec
}

func NewServerMetrics(cfg Config) *Metrics {
Expand Down Expand Up @@ -75,11 +75,12 @@ func NewServerMetrics(cfg Config) *Metrics {
Name: "inflight_requests",
Help: "Current number of inflight requests.",
}, []string{"method", "route"}),
SlowRequestServerThroughput: reg.NewHistogramVec(prometheus.HistogramOpts{
SlowRequestThroughput: reg.NewHistogramVec(prometheus.HistogramOpts{
Namespace: cfg.MetricsNamespace,
Name: "slow_request_server_throughput_" + cfg.ThroughputConfig.Unit,
Name: "slow_request_throughput_" + cfg.Throughput.Unit,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you suggested we change this to request_throughput_samples_total, are you still making the change?

Help: "Server throughput of long running requests.",
ConstLabels: prometheus.Labels{"cutoff_ms": strconv.FormatInt(cfg.ThroughputConfig.SlowRequestCutoff.Milliseconds(), 10)},
ConstLabels: prometheus.Labels{"cutoff_ms": strconv.FormatInt(cfg.Throughput.SlowRequestCutoff.Milliseconds(), 10)},
Buckets: instrument.DefBuckets,
NativeHistogramBucketFactor: cfg.MetricsNativeHistogramFactor,
tinitiuset marked this conversation as resolved.
Show resolved Hide resolved
NativeHistogramMaxBucketNumber: 100,
NativeHistogramMinResetDuration: time.Hour,
Expand Down
26 changes: 13 additions & 13 deletions server/server.go
Original file line number Diff line number Diff line change
Expand Up @@ -154,10 +154,10 @@ type Config struct {
// This limiter is called for every started and finished gRPC request.
GrpcMethodLimiter GrpcInflightMethodLimiter `yaml:"-"`

ThroughputConfig ThroughputConfig `yaml:"throughput_config"`
Throughput Throughput `yaml:"throughput"`
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i also prefer nested config, but i noticed that most of the other configs in server.Config are flattened (LogLevel, LogFormat instead of Log.Level, Log.Format). I think it's better to stay consistent because this is a shared library

so can you just move the two fields and prefix them with Throughput?

}

type ThroughputConfig struct {
type Throughput struct {
SlowRequestCutoff time.Duration `yaml:"slow_request_cutoff"`
Unit string `yaml:"unit"`
}
Expand Down Expand Up @@ -216,8 +216,8 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) {
f.StringVar(&cfg.LogRequestExcludeHeadersList, "server.log-request-headers-exclude-list", "", "Comma separated list of headers to exclude from loggin. Only used if server.log-request-headers is true.")
f.BoolVar(&cfg.LogRequestAtInfoLevel, "server.log-request-at-info-level-enabled", false, "Optionally log requests at info level instead of debug level. Applies to request headers as well if server.log-request-headers is enabled.")
f.BoolVar(&cfg.ProxyProtocolEnabled, "server.proxy-protocol-enabled", false, "Enables PROXY protocol.")
f.DurationVar(&cfg.ThroughputConfig.SlowRequestCutoff, "server.throughput-config.slow-request-cutoff", 0, "Duration after which a request is considered slow. For requests taking longer than this duration to finish, the throughput will be calculated. If set to 0, the throughput will not be calculated.")
f.StringVar(&cfg.ThroughputConfig.Unit, "server.throughput-config.unit", "total_samples", "Unit of the server throughput metric, for example 'processed_bytes' or 'total_samples'. If set, it is appended to the server_throughput metric name.")
f.DurationVar(&cfg.Throughput.SlowRequestCutoff, "server.throughput.slow-request-cutoff", 0, "Duration after which a request is considered slow. For requests taking longer than this duration to finish, the throughput will be calculated. If set to 0, the throughput will not be calculated.")
f.StringVar(&cfg.Throughput.Unit, "server.throughput.unit", "total_samples", "Unit of the server throughput metric, for example 'processed_bytes' or 'total_samples'. If set, it is appended to the slow_request_server_throughput metric name.")
}

func (cfg *Config) registererOrDefault() prometheus.Registerer {
Expand Down Expand Up @@ -530,15 +530,15 @@ func BuildHTTPMiddleware(cfg Config, router *mux.Router, metrics *Metrics, logge
},
defaultLogMiddleware,
middleware.Instrument{
Duration: metrics.RequestDuration,
PerTenantDuration: metrics.PerTenantRequestDuration,
PerTenantCallback: cfg.PerTenantDurationInstrumentation,
RequestBodySize: metrics.ReceivedMessageSize,
ResponseBodySize: metrics.SentMessageSize,
InflightRequests: metrics.InflightRequests,
SlowRequestCutoff: cfg.ThroughputConfig.SlowRequestCutoff,
ServerThroughputUnit: cfg.ThroughputConfig.Unit,
SlowRequestServerThroughput: metrics.SlowRequestServerThroughput,
Duration: metrics.RequestDuration,
PerTenantDuration: metrics.PerTenantRequestDuration,
PerTenantCallback: cfg.PerTenantDurationInstrumentation,
RequestBodySize: metrics.ReceivedMessageSize,
ResponseBodySize: metrics.SentMessageSize,
InflightRequests: metrics.InflightRequests,
SlowRequestCutoff: cfg.Throughput.SlowRequestCutoff,
ThroughputUnit: cfg.Throughput.Unit,
SlowRequestThroughput: metrics.SlowRequestThroughput,
},
}
var httpMiddleware []middleware.Interface
Expand Down