Skip to content

Commit

Permalink
Merge pull request #108959 from jaylim-crl/backport23.1-108914
Browse files Browse the repository at this point in the history
release-23.1: sqlproxyccl: do not report BackendDown metrics on throttle and routing errors
  • Loading branch information
jaylim-crl authored Aug 18, 2023
2 parents 7d5edc2 + e69ae42 commit 581d897
Show file tree
Hide file tree
Showing 10 changed files with 35 additions and 40 deletions.
10 changes: 5 additions & 5 deletions pkg/ccl/sqlproxyccl/backend_dialer.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ var BackendDial = func(
if err != nil {
return nil, withCode(
errors.Wrap(err, "unable to reach backend SQL server"),
codeBackendDown)
codeBackendDialFailed)
}

// Ensure that conn is closed whenever BackendDial returns an error.
Expand All @@ -54,18 +54,18 @@ var BackendDial = func(
if err := binary.Write(conn, binary.BigEndian, pgSSLRequest); err != nil {
return nil, withCode(
errors.Wrap(err, "sending SSLRequest to target server"),
codeBackendDown)
codeBackendDialFailed)
}
response := make([]byte, 1)
if _, err = io.ReadFull(conn, response); err != nil {
return nil, withCode(
errors.New("reading response to SSLRequest"),
codeBackendDown)
codeBackendDialFailed)
}
if response[0] != pgAcceptSSLRequest {
return nil, withCode(
errors.New("target server refused TLS connection"),
codeBackendRefusedTLS)
codeBackendDialFailed)
}
conn = tls.Client(conn, tlsConfig.Clone())
}
Expand All @@ -74,7 +74,7 @@ var BackendDial = func(
if _, err := conn.Write(msg.Encode(nil)); err != nil {
return nil, withCode(
errors.Wrapf(err, "relaying StartupMessage to target server %v", serverAddress),
codeBackendDown)
codeBackendDialFailed)
}
return conn, nil
}
8 changes: 4 additions & 4 deletions pkg/ccl/sqlproxyccl/backend_dialer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -94,22 +94,22 @@ func TestBackendDialTLS(t *testing.T) {
name: "tenant10To11",
addr: sql11.SQLAddr(),
tenantID: 10,
errCode: codeBackendDown,
errCode: codeBackendDialFailed,
}, {
name: "tenant11To10",
addr: sql10.SQLAddr(),
tenantID: 11,
errCode: codeBackendDown,
errCode: codeBackendDialFailed,
}, {
name: "tenant10ToStorage",
addr: storageServer.ServingSQLAddr(),
tenantID: 10,
errCode: codeBackendDown,
errCode: codeBackendDialFailed,
}, {
name: "tenantWithNodeIDToStoage",
addr: storageServer.ServingSQLAddr(),
tenantID: uint64(storageServer.NodeID()),
errCode: codeBackendDown,
errCode: codeBackendDialFailed,
}}
for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
Expand Down
2 changes: 1 addition & 1 deletion pkg/ccl/sqlproxyccl/connector.go
Original file line number Diff line number Diff line change
Expand Up @@ -365,7 +365,7 @@ func (c *connector) dialSQLServer(

conn, err := BackendDial(c.StartupMsg, serverAssignment.Addr(), tlsConf)
if err != nil {
if getErrorCode(err) == codeBackendDown {
if getErrorCode(err) == codeBackendDialFailed {
return nil, markAsRetriableConnectorError(err)
}
return nil, err
Expand Down
4 changes: 2 additions & 2 deletions pkg/ccl/sqlproxyccl/connector_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -888,14 +888,14 @@ func TestConnector_dialSQLServer(t *testing.T) {
require.Equal(t, c.StartupMsg, msg)
require.Equal(t, "127.0.0.2:4567", serverAddress)
require.Nil(t, tlsConfig)
return nil, withCode(errors.New("bar"), codeBackendDown)
return nil, withCode(errors.New("bar"), codeBackendDialFailed)
},
)()
sa := balancer.NewServerAssignment(tenantID, tracker, nil, "127.0.0.2:4567")
defer sa.Close()

conn, err := c.dialSQLServer(sa)
require.EqualError(t, err, "codeBackendDown: bar")
require.EqualError(t, err, "codeBackendDialFailed: bar")
require.True(t, isRetriableConnectorError(err))
require.Nil(t, conn)
})
Expand Down
10 changes: 3 additions & 7 deletions pkg/ccl/sqlproxyccl/error.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,13 +50,9 @@ const (
// on the client's session parameters.
codeParamsRoutingFailed

// codeBackendDown indicates an error establishing or maintaining a connection
// to the backend SQL server.
codeBackendDown

// codeBackendRefusedTLS indicates that the backend SQL server refused a TLS-
// enabled SQL connection.
codeBackendRefusedTLS
// codeBackendDialFailed indicates an error establishing a connection
// between the proxy and the backend SQL server.
codeBackendDialFailed

// codeBackendDisconnected indicates that the backend disconnected (with a
// connection error) while serving client traffic.
Expand Down
19 changes: 8 additions & 11 deletions pkg/ccl/sqlproxyccl/errorcode_string.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 5 additions & 3 deletions pkg/ccl/sqlproxyccl/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -292,11 +292,13 @@ func (metrics *metrics) updateForError(err error) {
metrics.ClientDisconnectCount.Inc(1)
case codeProxyRefusedConnection:
metrics.RefusedConnCount.Inc(1)
metrics.BackendDownCount.Inc(1)
case codeParamsRoutingFailed, codeUnavailable:
metrics.RoutingErrCount.Inc(1)
metrics.BackendDownCount.Inc(1)
case codeBackendDown:
case codeBackendDialFailed:
// NOTE: Historically, we had the code named codeBackendDown instead of
// codeBackendDialFailed. This has been renamed to codeBackendDialFailed
// for accuracy, and to prevent confusion by developers. We don't rename
// the metrics here as that may break downstream consumers.
metrics.BackendDownCount.Inc(1)
case codeAuthFailed:
metrics.AuthFailedCount.Inc(1)
Expand Down
8 changes: 4 additions & 4 deletions pkg/ccl/sqlproxyccl/metrics_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,12 +35,12 @@ func TestMetricsUpdateForError(t *testing.T) {

{codeExpiredClientConnection, []*metric.Counter{m.ExpiredClientConnCount}},

{codeProxyRefusedConnection, []*metric.Counter{m.RefusedConnCount, m.BackendDownCount}},
{codeProxyRefusedConnection, []*metric.Counter{m.RefusedConnCount}},

{codeParamsRoutingFailed, []*metric.Counter{m.RoutingErrCount, m.BackendDownCount}},
{codeUnavailable, []*metric.Counter{m.RoutingErrCount, m.BackendDownCount}},
{codeParamsRoutingFailed, []*metric.Counter{m.RoutingErrCount}},
{codeUnavailable, []*metric.Counter{m.RoutingErrCount}},

{codeBackendDown, []*metric.Counter{m.BackendDownCount}},
{codeBackendDialFailed, []*metric.Counter{m.BackendDownCount}},

{codeAuthFailed, []*metric.Counter{m.AuthFailedCount}},
}
Expand Down
2 changes: 1 addition & 1 deletion pkg/ccl/sqlproxyccl/proxy.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ func toPgError(err error) *pgproto3.ErrorResponse {
switch getErrorCode(err) {
// These are send as is.
case codeExpiredClientConnection,
codeBackendDown,
codeBackendDialFailed,
codeParamsRoutingFailed,
codeClientDisconnected,
codeBackendDisconnected,
Expand Down
4 changes: 2 additions & 2 deletions pkg/ccl/sqlproxyccl/proxy_handler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -595,7 +595,7 @@ func TestBackendDownRetry(t *testing.T) {
if callCount >= 3 {
directoryServer.DeleteTenant(roachpb.MustMakeTenantID(28))
}
return nil, withCode(errors.New("SQL pod is down"), codeBackendDown)
return nil, withCode(errors.New("SQL pod is down"), codeBackendDialFailed)
})()

// Valid connection, but no backend server running.
Expand Down Expand Up @@ -1358,7 +1358,7 @@ func TestDirectoryConnect(t *testing.T) {
if countFailures >= 3 {
return nil, withCode(errors.New("backend disconnected"), codeBackendDisconnected)
}
return nil, withCode(errors.New("backend down"), codeBackendDown)
return nil, withCode(errors.New("backend down"), codeBackendDialFailed)
})()

// Ensure that Directory.ReportFailure is being called correctly.
Expand Down

0 comments on commit 581d897

Please sign in to comment.