Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ccl/sqlproxyccl: add proxy.sql.routing_method_count metric #130892

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions pkg/ccl/sqlproxyccl/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ go_library(
"//pkg/util/httputil",
"//pkg/util/log",
"//pkg/util/metric",
"//pkg/util/metric/aggmetric",
"//pkg/util/netutil/addr",
"//pkg/util/randutil",
"//pkg/util/retry",
Expand Down
25 changes: 21 additions & 4 deletions pkg/ccl/sqlproxyccl/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ package sqlproxyccl
import (
"github.com/cockroachdb/cockroach/pkg/base"
"github.com/cockroachdb/cockroach/pkg/util/metric"
"github.com/cockroachdb/cockroach/pkg/util/metric/aggmetric"
)

// metrics contains pointers to the metrics for monitoring proxy operations.
Expand Down Expand Up @@ -45,6 +46,11 @@ type metrics struct {
QueryCancelSuccessful *metric.Counter

AccessControlFileErrorCount *metric.Gauge

RoutingMethodCount *aggmetric.AggCounter
SNIRoutingMethodCount *aggmetric.Counter
DatabaseRoutingMethodCount *aggmetric.Counter
ClusterOptionRoutingMethodCount *aggmetric.Counter
}

// MetricStruct implements the metrics.Struct interface.
Expand Down Expand Up @@ -212,18 +218,23 @@ var (
Measurement: "Query Cancel Requests",
Unit: metric.Unit_COUNT,
}
accessControlFileErrorCount = metric.Metadata{
metaAccessControlFileErrorCount = metric.Metadata{
Name: "proxy.access_control.errors",
Help: "Numbers of access control list files that are currently having errors",
Measurement: "Access Control File Errors",
Unit: metric.Unit_COUNT,
}
metaRoutingMethodCount = metric.Metadata{
Name: "proxy.sql.routing_method_count",
Help: "Number of occurrences of each proxy routing method",
Measurement: "Number of occurrences",
Unit: metric.Unit_COUNT,
}
)

// makeProxyMetrics instantiates the metrics holder for proxy monitoring.
func makeProxyMetrics() metrics {

return metrics{
m := &metrics{
BackendDisconnectCount: metric.NewCounter(metaBackendDisconnectCount),
IdleDisconnectCount: metric.NewCounter(metaIdleDisconnectCount),
BackendDownCount: metric.NewCounter(metaBackendDownCount),
Expand Down Expand Up @@ -273,8 +284,14 @@ func makeProxyMetrics() metrics {
QueryCancelForwarded: metric.NewCounter(metaQueryCancelForwarded),
QueryCancelSuccessful: metric.NewCounter(metaQueryCancelSuccessful),

AccessControlFileErrorCount: metric.NewGauge(accessControlFileErrorCount),
AccessControlFileErrorCount: metric.NewGauge(metaAccessControlFileErrorCount),

RoutingMethodCount: aggmetric.NewCounter(metaRoutingMethodCount, "method"),
}
m.SNIRoutingMethodCount = m.RoutingMethodCount.AddChild("sni")
m.DatabaseRoutingMethodCount = m.RoutingMethodCount.AddChild("database")
m.ClusterOptionRoutingMethodCount = m.RoutingMethodCount.AddChild("cluster_option")
return *m
}

// updateForError updates the metrics relevant for the type of the error
Expand Down
16 changes: 13 additions & 3 deletions pkg/ccl/sqlproxyccl/proxy_handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -375,7 +375,7 @@ func (handler *proxyHandler) handle(

// NOTE: Errors returned from this function are user-facing errors so we
// should be careful with the details that we want to expose.
backendStartupMsg, clusterName, tenID, err := clusterNameAndTenantFromParams(ctx, fe)
backendStartupMsg, clusterName, tenID, err := clusterNameAndTenantFromParams(ctx, fe, handler.metrics)
if err != nil {
clientErr := withCode(err, codeParamsRoutingFailed)
log.Errorf(ctx, "unable to extract cluster name and tenant id: %s", err.Error())
Expand Down Expand Up @@ -715,7 +715,7 @@ func (handler *proxyHandler) setupIncomingCert(ctx context.Context) error {
// through its command-line options, i.e. "-c NAME=VALUE", "-cNAME=VALUE", and
// "--NAME=VALUE".
func clusterNameAndTenantFromParams(
ctx context.Context, fe *FrontendAdmitInfo,
ctx context.Context, fe *FrontendAdmitInfo, metrics *metrics,
) (*pgproto3.StartupMessage, string, roachpb.TenantID, error) {
clusterIdentifierDB, databaseName, err := parseDatabaseParam(fe.Msg.Parameters["database"])
if err != nil {
Expand Down Expand Up @@ -748,6 +748,7 @@ func clusterNameAndTenantFromParams(
// are blank. If SNI doesn't parse as a valid cluster id, then we assume
// that the end user didn't intend to pass cluster info through SNI and
// show missing cluster identifier instead of invalid cluster identifier.
metrics.SNIRoutingMethodCount.Inc(1)
return fe.Msg, clusterName, tenID, nil
}
}
Expand Down Expand Up @@ -790,7 +791,16 @@ func clusterNameAndTenantFromParams(
paramsOut[key] = value
}
}

// If both are provided, they must be the same, and we will track both.
if clusterIdentifierDB != "" {
metrics.DatabaseRoutingMethodCount.Inc(1)
}
// This metric is specific to the --cluster option. If we introduce a new
// --routing-id in the future, the existing metrics will still work, and
// we should introduce a new RoutingIDOptionReads metric.
if clusterIdentifierOpt != "" {
metrics.ClusterOptionRoutingMethodCount.Inc(1)
}
outMsg := &pgproto3.StartupMessage{
ProtocolVersion: fe.Msg.ProtocolVersion,
Parameters: paramsOut,
Expand Down
77 changes: 74 additions & 3 deletions pkg/ccl/sqlproxyccl/proxy_handler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2517,15 +2517,17 @@ func TestClusterNameAndTenantFromParams(t *testing.T) {

testCases := []struct {
name string
sniServerName string
params map[string]string
expectedClusterName string
expectedTenantID uint64
expectedParams map[string]string
expectedError string
expectedHint string
expectedMetrics func(t *testing.T, m *metrics)
}{
{
name: "empty params",
name: "empty params and server name",
params: map[string]string{},
expectedError: "missing cluster identifier",
expectedHint: clusterIdentifierHint,
Expand Down Expand Up @@ -2640,6 +2642,32 @@ func TestClusterNameAndTenantFromParams(t *testing.T) {
expectedError: "invalid cluster identifier 'happy-koala-0'",
expectedHint: "Tenant ID 0 is invalid.",
},
{
name: "invalid sni format",
sniServerName: "foo-bar-baz",
params: map[string]string{},
expectedError: "missing cluster identifier",
expectedHint: clusterIdentifierHint,
},
{
name: "invalid sni value",
sniServerName: "happy2koala-.abc.aws-ap-south-1.cockroachlabs.cloud",
params: map[string]string{},
expectedError: "missing cluster identifier",
expectedHint: clusterIdentifierHint,
},
{
name: "valid sni value",
sniServerName: "happy-seal-10.abc.gcp-us-central1.cockroachlabs.cloud",
params: map[string]string{},
expectedClusterName: "happy-seal",
expectedTenantID: 10,
expectedParams: map[string]string{},
expectedMetrics: func(t *testing.T, m *metrics) {
require.Equal(t, int64(1), m.RoutingMethodCount.Count())
require.Equal(t, int64(1), m.SNIRoutingMethodCount.Value())
},
},
{
name: "multiple similar cluster identifiers",
params: map[string]string{
Expand All @@ -2649,6 +2677,11 @@ func TestClusterNameAndTenantFromParams(t *testing.T) {
expectedClusterName: "happy-koala",
expectedTenantID: 7,
expectedParams: map[string]string{"database": "defaultdb"},
expectedMetrics: func(t *testing.T, m *metrics) {
require.Equal(t, int64(2), m.RoutingMethodCount.Count())
require.Equal(t, int64(1), m.DatabaseRoutingMethodCount.Value())
require.Equal(t, int64(1), m.ClusterOptionRoutingMethodCount.Value())
},
},
{
name: "cluster identifier in database param",
Expand All @@ -2659,6 +2692,10 @@ func TestClusterNameAndTenantFromParams(t *testing.T) {
expectedClusterName: strings.Repeat("a", 100),
expectedTenantID: 7,
expectedParams: map[string]string{"database": "defaultdb", "foo": "bar"},
expectedMetrics: func(t *testing.T, m *metrics) {
require.Equal(t, int64(1), m.RoutingMethodCount.Count())
require.Equal(t, int64(1), m.DatabaseRoutingMethodCount.Value())
},
},
{
name: "valid cluster identifier with invalid arrangements",
Expand All @@ -2672,6 +2709,10 @@ func TestClusterNameAndTenantFromParams(t *testing.T) {
"database": "defaultdb",
"options": "-c -c -c -c",
},
expectedMetrics: func(t *testing.T, m *metrics) {
require.Equal(t, int64(1), m.RoutingMethodCount.Count())
require.Equal(t, int64(1), m.ClusterOptionRoutingMethodCount.Value())
},
},
{
name: "short option: cluster identifier in options param",
Expand All @@ -2682,6 +2723,10 @@ func TestClusterNameAndTenantFromParams(t *testing.T) {
expectedClusterName: "happy-koala",
expectedTenantID: 7,
expectedParams: map[string]string{"database": "defaultdb"},
expectedMetrics: func(t *testing.T, m *metrics) {
require.Equal(t, int64(1), m.RoutingMethodCount.Count())
require.Equal(t, int64(1), m.ClusterOptionRoutingMethodCount.Value())
},
},
{
name: "short option with spaces: cluster identifier in options param",
Expand All @@ -2692,6 +2737,10 @@ func TestClusterNameAndTenantFromParams(t *testing.T) {
expectedClusterName: "happy-koala",
expectedTenantID: 7,
expectedParams: map[string]string{"database": "defaultdb"},
expectedMetrics: func(t *testing.T, m *metrics) {
require.Equal(t, int64(1), m.RoutingMethodCount.Count())
require.Equal(t, int64(1), m.ClusterOptionRoutingMethodCount.Value())
},
},
{
name: "long option: cluster identifier in options param",
Expand All @@ -2705,6 +2754,10 @@ func TestClusterNameAndTenantFromParams(t *testing.T) {
"database": "defaultdb",
"options": "--foo=test",
},
expectedMetrics: func(t *testing.T, m *metrics) {
require.Equal(t, int64(1), m.RoutingMethodCount.Count())
require.Equal(t, int64(1), m.ClusterOptionRoutingMethodCount.Value())
},
},
{
name: "long option: cluster identifier in options param with other options",
Expand All @@ -2718,26 +2771,35 @@ func TestClusterNameAndTenantFromParams(t *testing.T) {
"database": "defaultdb",
"options": "-csearch_path=public \t--foo=test",
},
expectedMetrics: func(t *testing.T, m *metrics) {
require.Equal(t, int64(1), m.RoutingMethodCount.Count())
require.Equal(t, int64(1), m.ClusterOptionRoutingMethodCount.Value())
},
},
{
name: "leading 0s are ok",
params: map[string]string{"database": "happy-koala-0-07.defaultdb"},
expectedClusterName: "happy-koala-0",
expectedTenantID: 7,
expectedParams: map[string]string{"database": "defaultdb"},
expectedMetrics: func(t *testing.T, m *metrics) {
require.Equal(t, int64(1), m.RoutingMethodCount.Count())
require.Equal(t, int64(1), m.DatabaseRoutingMethodCount.Value())
},
},
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
msg := &pgproto3.StartupMessage{Parameters: tc.params}
m := makeProxyMetrics()

originalParams := make(map[string]string)
for k, v := range msg.Parameters {
originalParams[k] = v
}

fe := &FrontendAdmitInfo{Msg: msg}
outMsg, clusterName, tenantID, err := clusterNameAndTenantFromParams(ctx, fe)
fe := &FrontendAdmitInfo{Msg: msg, SniServerName: tc.sniServerName}
outMsg, clusterName, tenantID, err := clusterNameAndTenantFromParams(ctx, fe, &m)
if tc.expectedError == "" {
require.NoErrorf(t, err, "failed test case\n%+v", tc)

Expand All @@ -2755,6 +2817,15 @@ func TestClusterNameAndTenantFromParams(t *testing.T) {

// Check that the original parameters were not modified.
require.Equal(t, originalParams, msg.Parameters)

if tc.expectedMetrics != nil {
tc.expectedMetrics(t, &m)
} else {
require.Zero(t, m.RoutingMethodCount.Count())
require.Zero(t, m.SNIRoutingMethodCount.Value())
require.Zero(t, m.DatabaseRoutingMethodCount.Value())
require.Zero(t, m.ClusterOptionRoutingMethodCount.Value())
}
})
}
}
Expand Down
Loading