From 93d685b20ccfc7c92edf72ccd92357336ab34b66 Mon Sep 17 00:00:00 2001 From: Jay Date: Fri, 11 Mar 2022 14:56:12 -0500 Subject: [PATCH] ccl/sqlproxyccl: add metric that measures connection migration latency Previously, we added support for connection migration in #76805. This commit adds a new `proxy.conn_migration.attempted.latency` metric that tracks latency for attempted connection migrations. Having this metric would be beneficial as we can now know how long users were blocked during connection migrations. Release justification: Low-risk metric-only change within sqlproxy. Release note: None --- pkg/ccl/sqlproxyccl/BUILD.bazel | 1 + pkg/ccl/sqlproxyccl/conn_migration.go | 11 ++++++++--- pkg/ccl/sqlproxyccl/metrics.go | 24 +++++++++++++++++------ pkg/ccl/sqlproxyccl/proxy_handler_test.go | 8 ++++++++ 4 files changed, 35 insertions(+), 9 deletions(-) diff --git a/pkg/ccl/sqlproxyccl/BUILD.bazel b/pkg/ccl/sqlproxyccl/BUILD.bazel index a59a6e0df844..85a6d2f6d7d9 100644 --- a/pkg/ccl/sqlproxyccl/BUILD.bazel +++ b/pkg/ccl/sqlproxyccl/BUILD.bazel @@ -20,6 +20,7 @@ go_library( importpath = "github.com/cockroachdb/cockroach/pkg/ccl/sqlproxyccl", visibility = ["//visibility:public"], deps = [ + "//pkg/base", "//pkg/ccl/sqlproxyccl/denylist", "//pkg/ccl/sqlproxyccl/idle", "//pkg/ccl/sqlproxyccl/interceptor", diff --git a/pkg/ccl/sqlproxyccl/conn_migration.go b/pkg/ccl/sqlproxyccl/conn_migration.go index 209d8dad4907..1bf8361dd7c5 100644 --- a/pkg/ccl/sqlproxyccl/conn_migration.go +++ b/pkg/ccl/sqlproxyccl/conn_migration.go @@ -20,6 +20,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/sql/pgwire/pgwirebase" "github.com/cockroachdb/cockroach/pkg/util/log" "github.com/cockroachdb/cockroach/pkg/util/syncutil" + "github.com/cockroachdb/cockroach/pkg/util/timeutil" "github.com/cockroachdb/cockroach/pkg/util/uuid" "github.com/cockroachdb/errors" "github.com/cockroachdb/logtags" @@ -133,18 +134,22 @@ func (f *forwarder) runTransfer() (retErr error) { // the context (with the span) gets cleaned up. Some ideas to fix this: // (1) errgroup (?), (2) use the stopper instead of the go keyword - that // should fork a new span, and avoid this issue. + tBegin := timeutil.Now() logCtx := logtags.WithTags(context.Background(), logtags.FromContext(f.ctx)) defer func() { + latencyDur := timeutil.Since(tBegin) + f.metrics.ConnMigrationAttemptedLatency.RecordValue(latencyDur.Nanoseconds()) + if !ctx.isRecoverable() { - log.Infof(logCtx, "transfer failed: connection closed, err=%v", retErr) + log.Infof(logCtx, "transfer failed: connection closed, latency=%v, err=%v", latencyDur, retErr) f.metrics.ConnMigrationErrorFatalCount.Inc(1) } else { // Transfer was successful. if retErr == nil { - log.Infof(logCtx, "transfer successful") + log.Infof(logCtx, "transfer successful, latency=%v", latencyDur) f.metrics.ConnMigrationSuccessCount.Inc(1) } else { - log.Infof(logCtx, "transfer failed: connection recovered, err=%v", retErr) + log.Infof(logCtx, "transfer failed: connection recovered, latency=%v, err=%v", latencyDur, retErr) f.metrics.ConnMigrationErrorRecoverableCount.Inc(1) } if err := f.resumeProcessors(); err != nil { diff --git a/pkg/ccl/sqlproxyccl/metrics.go b/pkg/ccl/sqlproxyccl/metrics.go index dd12e852001a..9fd3299c82ce 100644 --- a/pkg/ccl/sqlproxyccl/metrics.go +++ b/pkg/ccl/sqlproxyccl/metrics.go @@ -9,6 +9,7 @@ package sqlproxyccl import ( + "github.com/cockroachdb/cockroach/pkg/base" "github.com/cockroachdb/cockroach/pkg/util/metric" "github.com/cockroachdb/errors" ) @@ -29,6 +30,7 @@ type metrics struct { ConnMigrationErrorFatalCount *metric.Counter ConnMigrationErrorRecoverableCount *metric.Counter ConnMigrationAttemptedCount *metric.Counter + ConnMigrationAttemptedLatency *metric.Histogram } // MetricStruct implements the metrics.Struct interface. @@ -100,12 +102,6 @@ var ( // Connection migration metrics. // // attempted = success + error_fatal + error_recoverable - metaConnMigrationAttemptedCount = metric.Metadata{ - Name: "proxy.conn_migration.attempted", - Help: "Number of attempted connection migrations", - Measurement: "Connection Migrations", - Unit: metric.Unit_COUNT, - } metaConnMigrationSuccessCount = metric.Metadata{ Name: "proxy.conn_migration.success", Help: "Number of successful connection migrations", @@ -126,6 +122,18 @@ var ( Measurement: "Connection Migrations", Unit: metric.Unit_COUNT, } + metaConnMigrationAttemptedCount = metric.Metadata{ + Name: "proxy.conn_migration.attempted", + Help: "Number of attempted connection migrations", + Measurement: "Connection Migrations", + Unit: metric.Unit_COUNT, + } + metaConnMigrationAttemptedLatency = metric.Metadata{ + Name: "proxy.conn_migration.attempted.latency", + Help: "Latency histogram for attempted connection migrations", + Measurement: "Latency", + Unit: metric.Unit_NANOSECONDS, + } ) // makeProxyMetrics instantiates the metrics holder for proxy monitoring. @@ -146,6 +154,10 @@ func makeProxyMetrics() metrics { ConnMigrationErrorFatalCount: metric.NewCounter(metaConnMigrationErrorFatalCount), ConnMigrationErrorRecoverableCount: metric.NewCounter(metaConnMigrationErrorRecoverableCount), ConnMigrationAttemptedCount: metric.NewCounter(metaConnMigrationAttemptedCount), + ConnMigrationAttemptedLatency: metric.NewLatency( + metaConnMigrationAttemptedLatency, + base.DefaultHistogramWindowInterval(), + ), } } diff --git a/pkg/ccl/sqlproxyccl/proxy_handler_test.go b/pkg/ccl/sqlproxyccl/proxy_handler_test.go index c12689a97829..001f18a34de3 100644 --- a/pkg/ccl/sqlproxyccl/proxy_handler_test.go +++ b/pkg/ccl/sqlproxyccl/proxy_handler_test.go @@ -948,6 +948,8 @@ func TestConnectionMigration(t *testing.T) { require.Equal(t, f.metrics.ConnMigrationAttemptedCount.Count(), f.metrics.ConnMigrationSuccessCount.Count(), ) + require.Equal(t, f.metrics.ConnMigrationAttemptedCount.Count(), + f.metrics.ConnMigrationAttemptedLatency.TotalCount()) }) // Transfers should fail if there is an open transaction. These failed @@ -995,6 +997,8 @@ func TestConnectionMigration(t *testing.T) { require.NotEqual(t, initAddr, queryAddr(t, tCtx, db)) require.Equal(t, prevErrorRecoverableCount, f.metrics.ConnMigrationErrorRecoverableCount.Count()) require.Equal(t, int64(0), f.metrics.ConnMigrationErrorFatalCount.Count()) + require.Equal(t, f.metrics.ConnMigrationAttemptedCount.Count(), + f.metrics.ConnMigrationAttemptedLatency.TotalCount()) // We have already asserted metrics above, so transfer must have // been completed. @@ -1022,6 +1026,8 @@ func TestConnectionMigration(t *testing.T) { require.Equal(t, initAddr, queryAddr(t, tCtx, db)) require.Equal(t, initSuccessCount, f.metrics.ConnMigrationSuccessCount.Count()) require.Equal(t, int64(0), f.metrics.ConnMigrationErrorFatalCount.Count()) + require.Equal(t, f.metrics.ConnMigrationAttemptedCount.Count(), + f.metrics.ConnMigrationAttemptedLatency.TotalCount()) // We have already asserted metrics above, so transfer must have // been completed. @@ -1125,6 +1131,8 @@ func TestConnectionMigration(t *testing.T) { return f.metrics.ConnMigrationErrorFatalCount.Count() == 1 }, 30*time.Second, 100*time.Millisecond) require.NotNil(t, f.ctx.Err()) + require.Equal(t, f.metrics.ConnMigrationAttemptedCount.Count(), + f.metrics.ConnMigrationAttemptedLatency.TotalCount()) }) }