diff --git a/docs/generated/settings/settings-for-tenants.txt b/docs/generated/settings/settings-for-tenants.txt
index aa30dc4b924c..cb35d9ab57cb 100644
--- a/docs/generated/settings/settings-for-tenants.txt
+++ b/docs/generated/settings/settings-for-tenants.txt
@@ -94,6 +94,8 @@ server.shutdown.connections.timeout duration 0s the maximum amount of time a ser
server.shutdown.initial_wait duration 0s the amount of time a server waits in an unready state before proceeding with a drain (note that the --drain-wait parameter for cockroach node drain may need adjustment after changing this setting. --drain-wait is to specify the duration of the whole draining process, while server.shutdown.initial_wait is to set the wait time for health probes to notice that the node is not ready.) application
server.shutdown.jobs.timeout duration 10s the maximum amount of time a server waits for all currently executing jobs to notice drain request and to perform orderly shutdown application
server.shutdown.transactions.timeout duration 10s the timeout for waiting for active transactions to finish during a drain (note that the --drain-wait parameter for cockroach node drain may need adjustment after changing this setting) application
+server.sql_tcp_keep_alive.count integer 3 maximum number of probes that will be sent out before a connection is dropped because it's unresponsive (Linux and Darwin only) application
+server.sql_tcp_keep_alive.interval duration 10s time between keep alive probes and idle time before probes are sent out application
server.time_until_store_dead duration 5m0s the time after which if there is no new gossiped information about a store, it is considered dead application
server.user_login.cert_password_method.auto_scram_promotion.enabled boolean true whether to automatically promote cert-password authentication to use SCRAM application
server.user_login.downgrade_scram_stored_passwords_to_bcrypt.enabled boolean true if server.user_login.password_encryption=crdb-bcrypt, this controls whether to automatically re-encode stored passwords using scram-sha-256 to crdb-bcrypt application
diff --git a/docs/generated/settings/settings.html b/docs/generated/settings/settings.html
index e910fcaf7ae0..6fea58918a14 100644
--- a/docs/generated/settings/settings.html
+++ b/docs/generated/settings/settings.html
@@ -122,6 +122,8 @@
server.shutdown.jobs.timeout
| duration | 10s | the maximum amount of time a server waits for all currently executing jobs to notice drain request and to perform orderly shutdown | Serverless/Dedicated/Self-Hosted |
server.shutdown.lease_transfer_iteration.timeout
| duration | 5s | the timeout for a single iteration of the range lease transfer phase of draining (note that the --drain-wait parameter for cockroach node drain may need adjustment after changing this setting) | Dedicated/Self-Hosted |
server.shutdown.transactions.timeout
| duration | 10s | the timeout for waiting for active transactions to finish during a drain (note that the --drain-wait parameter for cockroach node drain may need adjustment after changing this setting) | Serverless/Dedicated/Self-Hosted |
+server.sql_tcp_keep_alive.count
| integer | 3 | maximum number of probes that will be sent out before a connection is dropped because it's unresponsive (Linux and Darwin only) | Serverless/Dedicated/Self-Hosted |
+server.sql_tcp_keep_alive.interval
| duration | 10s | time between keep alive probes and idle time before probes are sent out | Serverless/Dedicated/Self-Hosted |
server.time_until_store_dead
| duration | 5m0s | the time after which if there is no new gossiped information about a store, it is considered dead | Serverless/Dedicated/Self-Hosted |
server.user_login.cert_password_method.auto_scram_promotion.enabled
| boolean | true | whether to automatically promote cert-password authentication to use SCRAM | Serverless/Dedicated/Self-Hosted |
server.user_login.downgrade_scram_stored_passwords_to_bcrypt.enabled
| boolean | true | if server.user_login.password_encryption=crdb-bcrypt, this controls whether to automatically re-encode stored passwords using scram-sha-256 to crdb-bcrypt | Serverless/Dedicated/Self-Hosted |
diff --git a/pkg/cmd/roachtest/tests/network.go b/pkg/cmd/roachtest/tests/network.go
index 331cd4bfe0fd..976b20d9c656 100644
--- a/pkg/cmd/roachtest/tests/network.go
+++ b/pkg/cmd/roachtest/tests/network.go
@@ -12,7 +12,6 @@ package tests
import (
"context"
- "errors"
"fmt"
"os"
"path/filepath"
@@ -25,7 +24,9 @@ import (
"github.com/cockroachdb/cockroach/pkg/cmd/roachtest/test"
"github.com/cockroachdb/cockroach/pkg/roachprod/install"
"github.com/cockroachdb/cockroach/pkg/testutils"
+ "github.com/cockroachdb/cockroach/pkg/util/ctxgroup"
"github.com/cockroachdb/cockroach/pkg/util/timeutil"
+ errors "github.com/cockroachdb/errors"
_ "github.com/lib/pq" // register postgres driver
"github.com/stretchr/testify/require"
)
@@ -293,6 +294,101 @@ sudo iptables-save
m.Wait()
}
+// runClientNetworkConnectionTimeout simulates a scenario where the client and
+// server loose connectivity with a connection that is idle. The purpose of this
+// test is to confirm that the keep alive settings are enforced.
+func runClientNetworkConnectionTimeout(ctx context.Context, t test.Test, c cluster.Cluster) {
+ n := c.Spec().NodeCount
+ serverNodes, clientNode := c.Range(1, n-1), c.Nodes(n)
+ settings := install.MakeClusterSettings(install.SecureOption(true))
+ c.Start(ctx, t.L(), option.DefaultStartOpts(), settings, serverNodes)
+ certsDir := "/home/ubuntu/certs"
+ t.L().Printf("connecting to cluster from roachtest...")
+ db, err := c.ConnE(ctx, t.L(), 1)
+ require.NoError(t, err)
+ defer db.Close()
+
+ grp := ctxgroup.WithContext(ctx)
+ // Startup a connection on the client server, which will be running a
+ // long transaction (i.e. just the sleep builtin).
+ var runOutput install.RunResultDetails
+ grp.GoCtx(func(ctx context.Context) error {
+ ips, err := c.ExternalIP(ctx, t.L(), c.Node(1))
+ if err != nil {
+ return err
+ }
+ commandThatWillDisconnect := fmt.Sprintf(`./cockroach sql --certs-dir %s --url "postgres://root@%s:26257" -e "SELECT pg_sleep(600)"`, certsDir, ips[0])
+ t.L().Printf("Executing long running query: %s", commandThatWillDisconnect)
+ output, err := c.RunWithDetails(ctx, t.L(), clientNode, commandThatWillDisconnect)
+ runOutput = output[0]
+ return err
+ })
+ // Confirm that the connection was started.
+ testutils.SucceedsSoon(t, func() error {
+ row := db.QueryRow("SELECT count(*) FROM [SHOW CLUSTER SESSIONS] WHERE active_queries='SELECT pg_sleep(600)'")
+ var count int
+ if err := row.Scan(&count); err != nil {
+ return err
+ }
+ // Wait for the query to start up.
+ if count != 1 {
+ return errors.AssertionFailedf("unexepcted count :%v", count)
+ }
+ return nil
+ })
+
+ const netConfigCmd = `
+# ensure any failure fails the entire script.
+set -e;
+
+# Setting default filter policy
+sudo iptables -P INPUT ACCEPT;
+sudo iptables -P OUTPUT ACCEPT;
+
+# Drop any client traffic to CRDB.
+sudo iptables -A INPUT -p tcp --sport 26257 -j DROP;
+sudo iptables -A OUTPUT -p tcp --dport 26257 -j DROP;
+`
+ t.L().Printf("blocking networking on client; config cmd:\n%s", netConfigCmd)
+ blockStartTime := timeutil.Now()
+ require.NoError(t, c.RunE(ctx, clientNode, netConfigCmd))
+
+ // (attempt to) restore iptables when test end, so that the client
+ // can be investigated afterward.
+ defer func() {
+ const restoreNet = `
+set -e;
+sudo iptables -F INPUT;
+sudo iptables -F OUTPUT;
+`
+ t.L().Printf("restoring iptables; config cmd:\n%s", restoreNet)
+ require.NoError(t, c.RunE(ctx, clientNode, restoreNet))
+ }()
+
+ // We expect the connection to timeout within 30 seconds based on
+ // the default settings. We will wait for up to 1 minutes for the
+ // connection to drop.
+ testutils.SucceedsWithin(t, func() error {
+ row := db.QueryRow("SELECT count(*) FROM [SHOW CLUSTER SESSIONS] WHERE active_queries='SELECT pg_sleep(600)'")
+ var count int
+ if err := row.Scan(&count); err != nil {
+ return err
+ }
+ if count != 0 {
+ return errors.AssertionFailedf("unexepcted count :%d", count)
+ }
+ return nil
+ },
+ time.Minute)
+ // Confirm it took at least a minute for the connection to clear out.
+ require.Greaterf(t, timeutil.Since(blockStartTime), time.Second*30, "connection dropped earlier than expected")
+ t.L().Printf("Connection was dropped after %s", timeutil.Since(blockStartTime))
+ // We expect the connection to be dropped with the lower keep alive settings.
+ require.NoError(t, grp.Wait())
+ require.Contains(t, runOutput.Stderr, "If the server is running, check --host client-side and --advertise server-side",
+ "Did not detect connection failure %s %d", runOutput.Stderr, runOutput.RemoteExitStatus)
+}
+
func registerNetwork(r registry.Registry) {
const numNodes = 4
r.Add(registry.TestSpec{
@@ -306,4 +402,14 @@ func registerNetwork(r registry.Registry) {
runNetworkAuthentication(ctx, t, c)
},
})
+
+ r.Add(registry.TestSpec{
+ Name: "network/client-connection-timeout",
+ Owner: registry.OwnerSQLFoundations,
+ Cluster: r.MakeClusterSpec(2), // One server and client
+ CompatibleClouds: registry.AllExceptAWS,
+ Suites: registry.Suites(registry.Nightly),
+ Leases: registry.MetamorphicLeases,
+ Run: runClientNetworkConnectionTimeout,
+ })
}
diff --git a/pkg/server/BUILD.bazel b/pkg/server/BUILD.bazel
index 18e2eb828a03..3d0b6a569e8c 100644
--- a/pkg/server/BUILD.bazel
+++ b/pkg/server/BUILD.bazel
@@ -467,6 +467,7 @@ go_test(
"statements_test.go",
"status_ext_test.go",
"sticky_vfs_test.go",
+ "tcp_keepalive_manager_test.go",
"tenant_delayed_id_set_test.go",
"tenant_range_lookup_test.go",
"testserver_test.go",
@@ -582,5 +583,97 @@ go_test(
"@org_golang_google_grpc//codes",
"@org_golang_google_grpc//metadata",
"@org_golang_google_grpc//status",
- ],
+ ] + select({
+ "@io_bazel_rules_go//go/platform:android_386": [
+ "//pkg/util/ctxgroup",
+ "//pkg/util/sysutil",
+ "@com_github_cockroachdb_cmux//:cmux",
+ ],
+ "@io_bazel_rules_go//go/platform:android_amd64": [
+ "//pkg/util/ctxgroup",
+ "//pkg/util/sysutil",
+ "@com_github_cockroachdb_cmux//:cmux",
+ ],
+ "@io_bazel_rules_go//go/platform:android_arm": [
+ "//pkg/util/ctxgroup",
+ "//pkg/util/sysutil",
+ "@com_github_cockroachdb_cmux//:cmux",
+ ],
+ "@io_bazel_rules_go//go/platform:android_arm64": [
+ "//pkg/util/ctxgroup",
+ "//pkg/util/sysutil",
+ "@com_github_cockroachdb_cmux//:cmux",
+ ],
+ "@io_bazel_rules_go//go/platform:darwin_arm64": [
+ "//pkg/util/ctxgroup",
+ "//pkg/util/sysutil",
+ "@com_github_cockroachdb_cmux//:cmux",
+ ],
+ "@io_bazel_rules_go//go/platform:ios_arm64": [
+ "//pkg/util/ctxgroup",
+ "//pkg/util/sysutil",
+ "@com_github_cockroachdb_cmux//:cmux",
+ ],
+ "@io_bazel_rules_go//go/platform:linux_386": [
+ "//pkg/util/ctxgroup",
+ "//pkg/util/sysutil",
+ "@com_github_cockroachdb_cmux//:cmux",
+ ],
+ "@io_bazel_rules_go//go/platform:linux_amd64": [
+ "//pkg/util/ctxgroup",
+ "//pkg/util/sysutil",
+ "@com_github_cockroachdb_cmux//:cmux",
+ ],
+ "@io_bazel_rules_go//go/platform:linux_arm": [
+ "//pkg/util/ctxgroup",
+ "//pkg/util/sysutil",
+ "@com_github_cockroachdb_cmux//:cmux",
+ ],
+ "@io_bazel_rules_go//go/platform:linux_arm64": [
+ "//pkg/util/ctxgroup",
+ "//pkg/util/sysutil",
+ "@com_github_cockroachdb_cmux//:cmux",
+ ],
+ "@io_bazel_rules_go//go/platform:linux_mips": [
+ "//pkg/util/ctxgroup",
+ "//pkg/util/sysutil",
+ "@com_github_cockroachdb_cmux//:cmux",
+ ],
+ "@io_bazel_rules_go//go/platform:linux_mips64": [
+ "//pkg/util/ctxgroup",
+ "//pkg/util/sysutil",
+ "@com_github_cockroachdb_cmux//:cmux",
+ ],
+ "@io_bazel_rules_go//go/platform:linux_mips64le": [
+ "//pkg/util/ctxgroup",
+ "//pkg/util/sysutil",
+ "@com_github_cockroachdb_cmux//:cmux",
+ ],
+ "@io_bazel_rules_go//go/platform:linux_mipsle": [
+ "//pkg/util/ctxgroup",
+ "//pkg/util/sysutil",
+ "@com_github_cockroachdb_cmux//:cmux",
+ ],
+ "@io_bazel_rules_go//go/platform:linux_ppc64": [
+ "//pkg/util/ctxgroup",
+ "//pkg/util/sysutil",
+ "@com_github_cockroachdb_cmux//:cmux",
+ ],
+ "@io_bazel_rules_go//go/platform:linux_ppc64le": [
+ "//pkg/util/ctxgroup",
+ "//pkg/util/sysutil",
+ "@com_github_cockroachdb_cmux//:cmux",
+ ],
+ "@io_bazel_rules_go//go/platform:linux_riscv64": [
+ "//pkg/util/ctxgroup",
+ "//pkg/util/sysutil",
+ "@com_github_cockroachdb_cmux//:cmux",
+ ],
+ "@io_bazel_rules_go//go/platform:linux_s390x": [
+ "//pkg/util/ctxgroup",
+ "//pkg/util/sysutil",
+ "@com_github_cockroachdb_cmux//:cmux",
+ ],
+ "//conditions:default": [],
+ }),
)
diff --git a/pkg/server/server.go b/pkg/server/server.go
index 4ee3fbb3a19b..a6267f2b61bb 100644
--- a/pkg/server/server.go
+++ b/pkg/server/server.go
@@ -2243,6 +2243,7 @@ func (s *topLevelServer) AcceptClients(ctx context.Context) error {
s.pgPreServer,
s.serverController.sqlMux,
s.pgL,
+ s.ClusterSettings(),
&s.cfg.SocketFile,
); err != nil {
return err
diff --git a/pkg/server/server_sql.go b/pkg/server/server_sql.go
index 48df8d84aa45..e2f36f2ad72b 100644
--- a/pkg/server/server_sql.go
+++ b/pkg/server/server_sql.go
@@ -1895,12 +1895,13 @@ func startServeSQL(
pgPreServer *pgwire.PreServeConnHandler,
serveConn func(ctx context.Context, conn net.Conn, preServeStatus pgwire.PreServeStatus) error,
pgL net.Listener,
+ st *cluster.Settings,
socketFileCfg *string,
) error {
log.Ops.Info(ctx, "serving sql connections")
// Start servicing SQL connections.
- tcpKeepAlive := makeTCPKeepAliveManager()
+ tcpKeepAlive := makeTCPKeepAliveManager(st)
// The connManager is responsible for tearing down the net.Conn
// objects when the stopper tells us to shut down.
diff --git a/pkg/server/tcp_keepalive_manager.go b/pkg/server/tcp_keepalive_manager.go
index d3ff30f3c5b3..4c7d794e4037 100644
--- a/pkg/server/tcp_keepalive_manager.go
+++ b/pkg/server/tcp_keepalive_manager.go
@@ -17,31 +17,45 @@ import (
"time"
"github.com/cockroachdb/cmux"
- "github.com/cockroachdb/cockroach/pkg/util/envutil"
+ "github.com/cockroachdb/cockroach/pkg/settings"
+ "github.com/cockroachdb/cockroach/pkg/settings/cluster"
"github.com/cockroachdb/cockroach/pkg/util/log"
+ "github.com/cockroachdb/cockroach/pkg/util/sysutil"
+)
+
+var KeepAliveProbeCount = settings.RegisterIntSetting(
+ settings.ApplicationLevel,
+ "server.sql_tcp_keep_alive.count",
+ "maximum number of probes that will be sent out before a connection is dropped because "+
+ "it's unresponsive (Linux and Darwin only)",
+ 3,
+ settings.WithPublic,
+)
+
+var KeepAliveProbeFrequency = settings.RegisterDurationSetting(
+ settings.ApplicationLevel,
+ "server.sql_tcp_keep_alive.interval",
+ "time between keep alive probes and idle time before probes are sent out",
+ time.Second*10,
+ settings.WithPublic,
)
type tcpKeepAliveManager struct {
- // The keepalive duration.
- tcpKeepAlive time.Duration
// loggedKeepAliveStatus ensures that errors about setting the TCP
// keepalive status are only reported once.
loggedKeepAliveStatus int32
+ settings *cluster.Settings
}
-func makeTCPKeepAliveManager() tcpKeepAliveManager {
+func makeTCPKeepAliveManager(settings *cluster.Settings) tcpKeepAliveManager {
return tcpKeepAliveManager{
- tcpKeepAlive: envutil.EnvOrDefaultDuration("COCKROACH_SQL_TCP_KEEP_ALIVE", time.Minute),
+ settings: settings,
}
}
// configure attempts to set TCP keep-alive on
// connection. Does not fail on errors.
func (k *tcpKeepAliveManager) configure(ctx context.Context, conn net.Conn) {
- if k.tcpKeepAlive == 0 {
- return
- }
-
muxConn, ok := conn.(*cmux.MuxConn)
if !ok {
return
@@ -60,7 +74,18 @@ func (k *tcpKeepAliveManager) configure(ctx context.Context, conn net.Conn) {
return
}
- if err := tcpConn.SetKeepAlivePeriod(k.tcpKeepAlive); err != nil {
+ // Based on the maximum connection life span and probe interval, pick a maximum
+ // probe count.
+ probeCount := KeepAliveProbeCount.Get(&k.settings.SV)
+ probeFrequency := KeepAliveProbeFrequency.Get(&k.settings.SV)
+
+ if err := sysutil.SetKeepAliveCount(tcpConn, int(probeCount)); err != nil {
+ if doLog {
+ log.Ops.Warningf(ctx, "failed to set TCP keep-alive probe count for pgwire: %v", err)
+ }
+ }
+
+ if err := tcpConn.SetKeepAlivePeriod(probeFrequency); err != nil {
if doLog {
log.Ops.Warningf(ctx, "failed to set TCP keep-alive duration for pgwire: %v", err)
}
@@ -68,6 +93,6 @@ func (k *tcpKeepAliveManager) configure(ctx context.Context, conn net.Conn) {
}
if doLog {
- log.VEventf(ctx, 2, "setting TCP keep-alive to %s for pgwire", k.tcpKeepAlive)
+ log.VEventf(ctx, 2, "setting TCP keep-alive interval %d and probe count to %d for pgwire", probeFrequency, probeCount)
}
}
diff --git a/pkg/server/tcp_keepalive_manager_test.go b/pkg/server/tcp_keepalive_manager_test.go
new file mode 100644
index 000000000000..ff3dd1c3f54c
--- /dev/null
+++ b/pkg/server/tcp_keepalive_manager_test.go
@@ -0,0 +1,105 @@
+// Copyright 2023 The Cockroach Authors.
+//
+// Use of this software is governed by the Business Source License
+// included in the file licenses/BSL.txt.
+//
+// As of the Change Date specified in that file, in accordance with
+// the Business Source License, use of this software will be governed
+// by the Apache License, Version 2.0, included in the file
+// licenses/APL.txt.
+
+//go:build linux || (arm64 && darwin)
+
+package server
+
+import (
+ "context"
+ "net"
+ "testing"
+ "time"
+
+ "github.com/cockroachdb/cmux"
+ "github.com/cockroachdb/cockroach/pkg/settings/cluster"
+ "github.com/cockroachdb/cockroach/pkg/util/ctxgroup"
+ "github.com/cockroachdb/cockroach/pkg/util/leaktest"
+ "github.com/cockroachdb/cockroach/pkg/util/log"
+ "github.com/cockroachdb/cockroach/pkg/util/netutil"
+ "github.com/cockroachdb/cockroach/pkg/util/sysutil"
+ "github.com/stretchr/testify/require"
+)
+
+func TestKeepAliveManager(t *testing.T) {
+ defer leaktest.AfterTest(t)()
+ defer log.Scope(t).Close(t)
+ ctx := context.Background()
+
+ grp := ctxgroup.WithContext(ctx)
+ clusterSettings := cluster.MakeTestingClusterSettings()
+ KeepAliveProbeFrequency.Override(ctx, &clusterSettings.SV, time.Second*1)
+ KeepAliveProbeCount.Override(ctx, &clusterSettings.SV, 5)
+ keepAliveMgr := makeTCPKeepAliveManager(clusterSettings)
+
+ l, err := net.Listen("tcp", ":0")
+ require.NoError(t, err)
+ mux := cmux.New(l)
+ mux.HandleError(func(err error) bool {
+ return false
+ })
+
+ listener := mux.Match(cmux.Any())
+ grp.Go(func() error {
+ netutil.FatalIfUnexpected(mux.Serve())
+ return nil
+ })
+ connStr := listener.Addr()
+
+ grp.GoCtx(func(ctx context.Context) error {
+ conn, err := net.Dial(connStr.Network(), connStr.String())
+ if err != nil {
+ return err
+ }
+ reply := make([]byte, 1)
+ _, err = conn.Read(reply)
+ return err
+ })
+
+ conn, err := listener.Accept()
+ require.NoError(t, err)
+
+ // Configure this new connection with keep alive settings.
+ keepAliveMgr.configure(ctx, conn)
+ _, err = conn.Write([]byte("1"))
+ require.NoError(t, err)
+ // Confirm the settings are set on any TCP connection that we
+ // process.
+ muxConn, ok := conn.(*cmux.MuxConn)
+ if !ok {
+ return
+ }
+ tcpConn, ok := muxConn.Conn.(*net.TCPConn)
+ if !ok {
+ return
+ }
+ idleTime, probeInterval, probeCount, err := sysutil.GetKeepAliveSettings(tcpConn)
+ require.NoError(t, err)
+
+ require.Equal(t,
+ idleTime,
+ KeepAliveProbeFrequency.Get(&clusterSettings.SV),
+ "keep alive probe frequency not set")
+ require.Equal(t,
+ probeInterval,
+ KeepAliveProbeFrequency.Get(&clusterSettings.SV),
+ "keep alive probe frequency not set")
+
+ require.Equal(t,
+ probeCount,
+ int(KeepAliveProbeCount.Get(&clusterSettings.SV)),
+ "Computed wait time doesn't match our target timeout")
+
+ // Validate we didn't hit any errors using the sockets.
+ require.NoError(t, err)
+ require.NoError(t, listener.Close())
+ require.NoError(t, grp.Wait())
+ require.NoError(t, conn.Close())
+}
diff --git a/pkg/server/tenant.go b/pkg/server/tenant.go
index b8f7bd5905f2..9a8e5469d2fb 100644
--- a/pkg/server/tenant.go
+++ b/pkg/server/tenant.go
@@ -966,6 +966,7 @@ func (s *SQLServerWrapper) AcceptClients(ctx context.Context) error {
s.pgPreServer,
s.serveConn,
s.pgL,
+ s.ClusterSettings(),
&s.sqlServer.cfg.SocketFile,
); err != nil {
return err