Skip to content

Commit

Permalink
ccl/sqlproxyccl: add metric that measures connection migration latency
Browse files Browse the repository at this point in the history
Previously, we added support for connection migration in cockroachdb#76805. This commit
adds a new `proxy.conn_migration.attempted.latency` metric that tracks latency
for attempted connection migrations. Having this metric would be beneficial
as we can now know how long users were blocked during connection migrations.

Release justification: Low-risk metric-only change within sqlproxy.

Release note: None
  • Loading branch information
jaylim-crl committed Mar 14, 2022
1 parent 9cdc505 commit 93d685b
Show file tree
Hide file tree
Showing 4 changed files with 35 additions and 9 deletions.
1 change: 1 addition & 0 deletions pkg/ccl/sqlproxyccl/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ go_library(
importpath = "github.com/cockroachdb/cockroach/pkg/ccl/sqlproxyccl",
visibility = ["//visibility:public"],
deps = [
"//pkg/base",
"//pkg/ccl/sqlproxyccl/denylist",
"//pkg/ccl/sqlproxyccl/idle",
"//pkg/ccl/sqlproxyccl/interceptor",
Expand Down
11 changes: 8 additions & 3 deletions pkg/ccl/sqlproxyccl/conn_migration.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
"github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgwirebase"
"github.com/cockroachdb/cockroach/pkg/util/log"
"github.com/cockroachdb/cockroach/pkg/util/syncutil"
"github.com/cockroachdb/cockroach/pkg/util/timeutil"
"github.com/cockroachdb/cockroach/pkg/util/uuid"
"github.com/cockroachdb/errors"
"github.com/cockroachdb/logtags"
Expand Down Expand Up @@ -133,18 +134,22 @@ func (f *forwarder) runTransfer() (retErr error) {
// the context (with the span) gets cleaned up. Some ideas to fix this:
// (1) errgroup (?), (2) use the stopper instead of the go keyword - that
// should fork a new span, and avoid this issue.
tBegin := timeutil.Now()
logCtx := logtags.WithTags(context.Background(), logtags.FromContext(f.ctx))
defer func() {
latencyDur := timeutil.Since(tBegin)
f.metrics.ConnMigrationAttemptedLatency.RecordValue(latencyDur.Nanoseconds())

if !ctx.isRecoverable() {
log.Infof(logCtx, "transfer failed: connection closed, err=%v", retErr)
log.Infof(logCtx, "transfer failed: connection closed, latency=%v, err=%v", latencyDur, retErr)
f.metrics.ConnMigrationErrorFatalCount.Inc(1)
} else {
// Transfer was successful.
if retErr == nil {
log.Infof(logCtx, "transfer successful")
log.Infof(logCtx, "transfer successful, latency=%v", latencyDur)
f.metrics.ConnMigrationSuccessCount.Inc(1)
} else {
log.Infof(logCtx, "transfer failed: connection recovered, err=%v", retErr)
log.Infof(logCtx, "transfer failed: connection recovered, latency=%v, err=%v", latencyDur, retErr)
f.metrics.ConnMigrationErrorRecoverableCount.Inc(1)
}
if err := f.resumeProcessors(); err != nil {
Expand Down
24 changes: 18 additions & 6 deletions pkg/ccl/sqlproxyccl/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
package sqlproxyccl

import (
"github.com/cockroachdb/cockroach/pkg/base"
"github.com/cockroachdb/cockroach/pkg/util/metric"
"github.com/cockroachdb/errors"
)
Expand All @@ -29,6 +30,7 @@ type metrics struct {
ConnMigrationErrorFatalCount *metric.Counter
ConnMigrationErrorRecoverableCount *metric.Counter
ConnMigrationAttemptedCount *metric.Counter
ConnMigrationAttemptedLatency *metric.Histogram
}

// MetricStruct implements the metrics.Struct interface.
Expand Down Expand Up @@ -100,12 +102,6 @@ var (
// Connection migration metrics.
//
// attempted = success + error_fatal + error_recoverable
metaConnMigrationAttemptedCount = metric.Metadata{
Name: "proxy.conn_migration.attempted",
Help: "Number of attempted connection migrations",
Measurement: "Connection Migrations",
Unit: metric.Unit_COUNT,
}
metaConnMigrationSuccessCount = metric.Metadata{
Name: "proxy.conn_migration.success",
Help: "Number of successful connection migrations",
Expand All @@ -126,6 +122,18 @@ var (
Measurement: "Connection Migrations",
Unit: metric.Unit_COUNT,
}
metaConnMigrationAttemptedCount = metric.Metadata{
Name: "proxy.conn_migration.attempted",
Help: "Number of attempted connection migrations",
Measurement: "Connection Migrations",
Unit: metric.Unit_COUNT,
}
metaConnMigrationAttemptedLatency = metric.Metadata{
Name: "proxy.conn_migration.attempted.latency",
Help: "Latency histogram for attempted connection migrations",
Measurement: "Latency",
Unit: metric.Unit_NANOSECONDS,
}
)

// makeProxyMetrics instantiates the metrics holder for proxy monitoring.
Expand All @@ -146,6 +154,10 @@ func makeProxyMetrics() metrics {
ConnMigrationErrorFatalCount: metric.NewCounter(metaConnMigrationErrorFatalCount),
ConnMigrationErrorRecoverableCount: metric.NewCounter(metaConnMigrationErrorRecoverableCount),
ConnMigrationAttemptedCount: metric.NewCounter(metaConnMigrationAttemptedCount),
ConnMigrationAttemptedLatency: metric.NewLatency(
metaConnMigrationAttemptedLatency,
base.DefaultHistogramWindowInterval(),
),
}
}

Expand Down
8 changes: 8 additions & 0 deletions pkg/ccl/sqlproxyccl/proxy_handler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -948,6 +948,8 @@ func TestConnectionMigration(t *testing.T) {
require.Equal(t, f.metrics.ConnMigrationAttemptedCount.Count(),
f.metrics.ConnMigrationSuccessCount.Count(),
)
require.Equal(t, f.metrics.ConnMigrationAttemptedCount.Count(),
f.metrics.ConnMigrationAttemptedLatency.TotalCount())
})

// Transfers should fail if there is an open transaction. These failed
Expand Down Expand Up @@ -995,6 +997,8 @@ func TestConnectionMigration(t *testing.T) {
require.NotEqual(t, initAddr, queryAddr(t, tCtx, db))
require.Equal(t, prevErrorRecoverableCount, f.metrics.ConnMigrationErrorRecoverableCount.Count())
require.Equal(t, int64(0), f.metrics.ConnMigrationErrorFatalCount.Count())
require.Equal(t, f.metrics.ConnMigrationAttemptedCount.Count(),
f.metrics.ConnMigrationAttemptedLatency.TotalCount())

// We have already asserted metrics above, so transfer must have
// been completed.
Expand Down Expand Up @@ -1022,6 +1026,8 @@ func TestConnectionMigration(t *testing.T) {
require.Equal(t, initAddr, queryAddr(t, tCtx, db))
require.Equal(t, initSuccessCount, f.metrics.ConnMigrationSuccessCount.Count())
require.Equal(t, int64(0), f.metrics.ConnMigrationErrorFatalCount.Count())
require.Equal(t, f.metrics.ConnMigrationAttemptedCount.Count(),
f.metrics.ConnMigrationAttemptedLatency.TotalCount())

// We have already asserted metrics above, so transfer must have
// been completed.
Expand Down Expand Up @@ -1125,6 +1131,8 @@ func TestConnectionMigration(t *testing.T) {
return f.metrics.ConnMigrationErrorFatalCount.Count() == 1
}, 30*time.Second, 100*time.Millisecond)
require.NotNil(t, f.ctx.Err())
require.Equal(t, f.metrics.ConnMigrationAttemptedCount.Count(),
f.metrics.ConnMigrationAttemptedLatency.TotalCount())
})
}

Expand Down

0 comments on commit 93d685b

Please sign in to comment.