diff --git a/pkg/acceptance/cluster/http.go b/pkg/acceptance/cluster/http.go index a55e72c066ed..5ba327f5e36d 100644 --- a/pkg/acceptance/cluster/http.go +++ b/pkg/acceptance/cluster/http.go @@ -13,8 +13,7 @@ package cluster import ( "crypto/tls" "net/http" - - "github.com/cockroachdb/cockroach/pkg/base" + "time" ) // HTTPClient is an http.Client configured for querying a cluster. We need to @@ -22,7 +21,7 @@ import ( // cannot use a fixed hostname to reach the cluster. This in turn means that we // do not have a verified server name in the certs. var HTTPClient = http.Client{ - Timeout: base.NetworkTimeout, + Timeout: 3 * time.Second, Transport: &http.Transport{ TLSClientConfig: &tls.Config{ InsecureSkipVerify: true, diff --git a/pkg/base/config.go b/pkg/base/config.go index d49b5930e87a..462e6824ba26 100644 --- a/pkg/base/config.go +++ b/pkg/base/config.go @@ -47,9 +47,6 @@ const ( defaultSQLAddr = ":" + DefaultPort defaultHTTPAddr = ":" + DefaultHTTPPort - // NetworkTimeout is the timeout used for network operations. - NetworkTimeout = 3 * time.Second - // defaultRaftTickInterval is the default resolution of the Raft timer. defaultRaftTickInterval = 200 * time.Millisecond @@ -66,10 +63,6 @@ const ( // each heartbeat. defaultRaftHeartbeatIntervalTicks = 5 - // defaultRPCHeartbeatInterval is the default value of RPCHeartbeatIntervalAndHalfTimeout - // used by the rpc context. - defaultRPCHeartbeatInterval = 3 * time.Second - // defaultRangeLeaseRenewalFraction specifies what fraction the range lease // renewal duration should be of the range lease active time. For example, // with a value of 0.2 and a lease duration of 10 seconds, leases would be @@ -118,6 +111,44 @@ func DefaultHistogramWindowInterval() time.Duration { } var ( + // NetworkTimeout is the timeout used for network operations that require a + // single network round trip. It is conservatively defined as one maximum + // network round trip time (RTT) plus one TCP packet retransmit (RTO), then + // multiplied by 2 as a safety margin. + // + // The maximum RTT between cloud regions is roughly 350ms both in GCP + // (asia-south2 to southamerica-west1) and AWS (af-south-1 to sa-east-1). It + // can occasionally be up to 500ms, but 400ms is a reasonable upper bound + // under nominal conditions. + // https://datastudio.google.com/reporting/fc733b10-9744-4a72-a502-92290f608571/page/70YCB + // https://www.cloudping.co/grid/p_99/timeframe/1W + // + // Linux has an RTT-dependant retransmission timeout (RTO) which we can + // approximate as 1.5x RTT (smoothed RTT + 4x RTT variance), with a lower + // bound of 200ms. Under nominal conditions, this is approximately 600ms. + // + // The maximum p99 RPC heartbeat latency in any Cockroach Cloud cluster over a + // 90-day period was 557ms. This was a single-region US cluster, where the + // high latency appeared to be due to CPU overload or throttling: the cluster + // had 2 vCPU nodes running at 100%. + // + // The NetworkTimeout is thus set to 2 * (400ms + 600ms) = 2s. + // + // TODO(erikgrinaker): Consider reducing this to 1 second, which should be + // sufficient but may be fragile under latency fluctuations. + NetworkTimeout = envutil.EnvOrDefaultDuration("COCKROACH_NETWORK_TIMEOUT", 2*time.Second) + + // DialTimeout is the timeout used when dialing a node. gRPC connections take + // up to 3 roundtrips for the TCP + TLS handshakes. Because NetworkTimeout + // allows for both a network roundtrip (RTT) and a TCP retransmit (RTO), with + // the RTO being greater than the RTT, and we don't need to tolerate more than + // 1 retransmit per connection attempt, 2 * NetworkTimeout is sufficient. + DialTimeout = 2 * NetworkTimeout + + // defaultRPCHeartbeatIntervalAndTimeout is the default value of + // RPCHeartbeatIntervalAndTimeout used by the RPC context. + defaultRPCHeartbeatIntervalAndTimeout = NetworkTimeout + // defaultRaftElectionTimeoutTicks specifies the number of Raft Tick // invocations that must pass between elections. defaultRaftElectionTimeoutTicks = envutil.EnvOrDefaultInt( @@ -221,13 +252,11 @@ type Config struct { // This is computed from HTTPAddr if specified otherwise Addr. HTTPAdvertiseAddr string - // RPCHeartbeatIntervalAndHalfTimeout controls how often a Ping request is - // sent on peer connections to determine connection health and update the - // local view of remote clocks. - // - // Twice this value is used as a timeout for heartbeats, so don't set this too - // low. - RPCHeartbeatIntervalAndHalfTimeout time.Duration + // RPCHeartbeatIntervalAndTimeout controls how often a Ping request is sent on + // peer connections to determine connection health and update the local view + // of remote clocks. This is also used as a timeout for heartbeats, so don't + // set this too low. + RPCHeartbeatIntervalAndTimeout time.Duration // SecondaryTenantPortOffset is the increment to add to the various // addresses to generate the network configuration for the in-memory @@ -285,7 +314,7 @@ func (cfg *Config) InitDefaults() { cfg.SQLAdvertiseAddr = cfg.SQLAddr cfg.SocketFile = "" cfg.SSLCertsDir = DefaultCertsDirectory - cfg.RPCHeartbeatIntervalAndHalfTimeout = defaultRPCHeartbeatInterval + cfg.RPCHeartbeatIntervalAndTimeout = defaultRPCHeartbeatIntervalAndTimeout cfg.ClusterName = "" cfg.DisableClusterNameVerification = false cfg.ClockDevicePath = "" diff --git a/pkg/kv/kvserver/replicate_queue.go b/pkg/kv/kvserver/replicate_queue.go index 74c165f1268b..c0cb51d5374d 100644 --- a/pkg/kv/kvserver/replicate_queue.go +++ b/pkg/kv/kvserver/replicate_queue.go @@ -17,7 +17,6 @@ import ( "sync/atomic" "time" - "github.com/cockroachdb/cockroach/pkg/base" "github.com/cockroachdb/cockroach/pkg/gossip" "github.com/cockroachdb/cockroach/pkg/kv/kvserver/allocator" "github.com/cockroachdb/cockroach/pkg/kv/kvserver/allocator/allocatorimpl" @@ -1426,9 +1425,10 @@ func (rq *replicateQueue) findRemoveVoter( MaxBackoff: 200 * time.Millisecond, Multiplier: 2, } + timeout := 5 * time.Second var candidates []roachpb.ReplicaDescriptor - deadline := timeutil.Now().Add(2 * base.NetworkTimeout) + deadline := timeutil.Now().Add(timeout) for r := retry.StartWithCtx(ctx, retryOpts); r.Next() && timeutil.Now().Before(deadline); { lastReplAdded, lastAddedTime := repl.LastReplicaAdded() if timeutil.Since(lastAddedTime) > newReplicaGracePeriod { diff --git a/pkg/rpc/addjoin.go b/pkg/rpc/addjoin.go index 0d5b2c6b4c4f..701d9fe54db9 100644 --- a/pkg/rpc/addjoin.go +++ b/pkg/rpc/addjoin.go @@ -16,6 +16,7 @@ import ( "math" "time" + "github.com/cockroachdb/cockroach/pkg/base" "google.golang.org/grpc" "google.golang.org/grpc/backoff" "google.golang.org/grpc/credentials" @@ -38,7 +39,7 @@ func GetAddJoinDialOptions(certPool *x509.CertPool) []grpc.DialOption { backoffConfig.MaxDelay = time.Second dialOpts = append(dialOpts, grpc.WithConnectParams(grpc.ConnectParams{ Backoff: backoffConfig, - MinConnectTimeout: minConnectionTimeout})) + MinConnectTimeout: base.DialTimeout})) dialOpts = append(dialOpts, grpc.WithKeepaliveParams(clientKeepalive)) dialOpts = append(dialOpts, grpc.WithInitialWindowSize(initialWindowSize), diff --git a/pkg/rpc/context.go b/pkg/rpc/context.go index d5b24eca5125..d1a31a69d480 100644 --- a/pkg/rpc/context.go +++ b/pkg/rpc/context.go @@ -101,9 +101,6 @@ var ( "COCKROACH_RANGEFEED_RPC_INITIAL_WINDOW_SIZE", RangefeedClass, 2*defaultWindowSize /* 128K */) ) -// GRPC Dialer connection timeout. -var minConnectionTimeout = 5 * time.Second - // errDialRejected is returned from client interceptors when the server's // stopper is quiescing. The error is constructed to return true in // `grpcutil.IsConnectionRejected` which prevents infinite retry loops during @@ -574,7 +571,7 @@ func NewContext(ctx context.Context, opts ContextOptions) *Context { rpcCompression: enableRPCCompression, MasterCtx: masterCtx, metrics: makeMetrics(), - heartbeatTimeout: 2 * opts.Config.RPCHeartbeatIntervalAndHalfTimeout, + heartbeatTimeout: opts.Config.RPCHeartbeatIntervalAndTimeout, logClosingConnEvery: log.Every(time.Second), } @@ -582,7 +579,7 @@ func NewContext(ctx context.Context, opts ContextOptions) *Context { // CLI commands are exempted. if !opts.ClientOnly { rpcCtx.RemoteClocks = newRemoteClockMonitor( - opts.Clock, opts.MaxOffset, 10*opts.Config.RPCHeartbeatIntervalAndHalfTimeout, opts.Config.HistogramWindowInterval()) + opts.Clock, opts.MaxOffset, 10*opts.Config.RPCHeartbeatIntervalAndTimeout, opts.Config.HistogramWindowInterval()) } if id := opts.Knobs.StorageClusterID; id != nil { @@ -1751,18 +1748,17 @@ func (rpcCtx *Context) grpcDialRaw( // our setup with onlyOnceDialer below. So note that our choice here is // inconsequential assuming all works as designed. backoff := time.Second - if backoff > minConnectionTimeout { - // This is for testing where we set a small minConnectionTimeout. - // gRPC will internally round up the min connection timeout to the max - // backoff. This can be unintuitive and so we opt out of it by lowering the - // max backoff. - backoff = minConnectionTimeout + if backoff > base.DialTimeout { + // This is for testing where we set a small DialTimeout. gRPC will + // internally round up the min connection timeout to the max backoff. This + // can be unintuitive and so we opt out of it by lowering the max backoff. + backoff = base.DialTimeout } backoffConfig.BaseDelay = backoff backoffConfig.MaxDelay = backoff dialOpts = append(dialOpts, grpc.WithConnectParams(grpc.ConnectParams{ Backoff: backoffConfig, - MinConnectTimeout: minConnectionTimeout})) + MinConnectTimeout: base.DialTimeout})) dialOpts = append(dialOpts, grpc.WithKeepaliveParams(clientKeepalive)) dialOpts = append(dialOpts, grpc.WithInitialConnWindowSize(initialConnWindowSize)) if class == RangefeedClass { @@ -2197,7 +2193,7 @@ func (rpcCtx *Context) runHeartbeat( }) } - heartbeatTimer.Reset(rpcCtx.Config.RPCHeartbeatIntervalAndHalfTimeout) + heartbeatTimer.Reset(rpcCtx.Config.RPCHeartbeatIntervalAndTimeout) } } diff --git a/pkg/rpc/context_test.go b/pkg/rpc/context_test.go index eed1d567e47d..e0f60381142d 100644 --- a/pkg/rpc/context_test.go +++ b/pkg/rpc/context_test.go @@ -265,7 +265,7 @@ func testClockOffsetInPingRequestInternal(t *testing.T, clientOnly bool) { clientOpts := opts clientOpts.Config = testutils.NewNodeTestBaseContext() // Experimentally, values below 50ms seem to incur flakiness. - clientOpts.Config.RPCHeartbeatIntervalAndHalfTimeout = 100 * time.Millisecond + clientOpts.Config.RPCHeartbeatIntervalAndTimeout = 100 * time.Millisecond clientOpts.ClientOnly = clientOnly clientOpts.OnOutgoingPing = func(ctx context.Context, req *PingRequest) error { select { @@ -897,7 +897,7 @@ func TestHeartbeatHealth(t *testing.T) { clientCtx.Config.AdvertiseAddr = lisLocalServer.Addr().String() // Make the interval shorter to speed up the test. - clientCtx.Config.RPCHeartbeatIntervalAndHalfTimeout = 1 * time.Millisecond + clientCtx.Config.RPCHeartbeatIntervalAndTimeout = 1 * time.Millisecond m := clientCtx.Metrics() @@ -1142,7 +1142,7 @@ func TestHeartbeatHealthTransport(t *testing.T) { clientCtx := newTestContext(clusterID, clock, maxOffset, stopper) // Make the interval shorter to speed up the test. - clientCtx.Config.RPCHeartbeatIntervalAndHalfTimeout = 1 * time.Millisecond + clientCtx.Config.RPCHeartbeatIntervalAndTimeout = 1 * time.Millisecond if _, err := clientCtx.GRPCDialNode(remoteAddr, serverNodeID, DefaultClass).Connect(context.Background()); err != nil { t.Fatal(err) } @@ -1253,7 +1253,7 @@ func TestHeartbeatHealthTransport(t *testing.T) { }) // Should stay unhealthy despite reconnection attempts. - for then := timeutil.Now(); timeutil.Since(then) < 50*clientCtx.Config.RPCHeartbeatIntervalAndHalfTimeout; { + for then := timeutil.Now(); timeutil.Since(then) < 50*clientCtx.Config.RPCHeartbeatIntervalAndTimeout; { err := clientCtx.TestingConnHealth(remoteAddr, serverNodeID) if !isUnhealthy(err) { t.Fatal(err) @@ -1297,7 +1297,7 @@ func TestOffsetMeasurement(t *testing.T) { clientMaxOffset := time.Duration(0) clientCtx := newTestContext(clusterID, clientClock, clientMaxOffset, stopper) // Make the interval shorter to speed up the test. - clientCtx.Config.RPCHeartbeatIntervalAndHalfTimeout = 1 * time.Millisecond + clientCtx.Config.RPCHeartbeatIntervalAndTimeout = 1 * time.Millisecond clientCtx.RemoteClocks.offsetTTL = 5 * clientClock.getAdvancementInterval() if _, err := clientCtx.GRPCDialNode(remoteAddr, serverNodeID, DefaultClass).Connect(ctx); err != nil { t.Fatal(err) @@ -1459,7 +1459,7 @@ func TestRemoteOffsetUnhealthy(t *testing.T) { clock := timeutil.NewManualTime(timeutil.Unix(0, start.Add(nodeCtxs[i].offset).UnixNano())) nodeCtxs[i].errChan = make(chan error, 1) nodeCtxs[i].ctx = newTestContext(clusterID, clock, maxOffset, stopper) - nodeCtxs[i].ctx.Config.RPCHeartbeatIntervalAndHalfTimeout = maxOffset + nodeCtxs[i].ctx.Config.RPCHeartbeatIntervalAndTimeout = maxOffset nodeCtxs[i].ctx.NodeID.Set(context.Background(), roachpb.NodeID(i+1)) s := newTestServer(t, nodeCtxs[i].ctx) @@ -1675,7 +1675,7 @@ func grpcRunKeepaliveTestCase(testCtx context.Context, c grpcKeepaliveTestCase) log.Infof(ctx, "setting up client") clientCtx := newTestContext(clusterID, clock, maxOffset, stopper) // Disable automatic heartbeats. We'll send them by hand. - clientCtx.Config.RPCHeartbeatIntervalAndHalfTimeout = math.MaxInt64 + clientCtx.Config.RPCHeartbeatIntervalAndTimeout = math.MaxInt64 var firstConn int32 = 1 diff --git a/pkg/rpc/down_node_test.go b/pkg/rpc/down_node_test.go index 4d2e8536689b..7cac162e83f0 100644 --- a/pkg/rpc/down_node_test.go +++ b/pkg/rpc/down_node_test.go @@ -17,6 +17,7 @@ import ( "testing" "time" + "github.com/cockroachdb/cockroach/pkg/base" "github.com/cockroachdb/cockroach/pkg/testutils" "github.com/cockroachdb/cockroach/pkg/util" "github.com/cockroachdb/cockroach/pkg/util/leaktest" @@ -33,9 +34,9 @@ func TestConnectingToDownNode(t *testing.T) { { defer func(prev time.Duration) { - minConnectionTimeout = prev - }(minConnectionTimeout) - minConnectionTimeout = time.Millisecond + base.DialTimeout = prev + }(base.DialTimeout) + base.DialTimeout = time.Millisecond } testutils.RunTrueAndFalse(t, "refused", func(t *testing.T, refused bool) { diff --git a/pkg/rpc/nodedialer/nodedialer_test.go b/pkg/rpc/nodedialer/nodedialer_test.go index f27907f2fced..0d21cad9f833 100644 --- a/pkg/rpc/nodedialer/nodedialer_test.go +++ b/pkg/rpc/nodedialer/nodedialer_test.go @@ -454,7 +454,7 @@ func newTestContext( ) *rpc.Context { cfg := testutils.NewNodeTestBaseContext() cfg.Insecure = true - cfg.RPCHeartbeatIntervalAndHalfTimeout = 100 * time.Millisecond + cfg.RPCHeartbeatIntervalAndTimeout = 100 * time.Millisecond ctx := context.Background() rctx := rpc.NewContext(ctx, rpc.ContextOptions{ TenantID: roachpb.SystemTenantID, diff --git a/pkg/security/tls_settings.go b/pkg/security/tls_settings.go index f5a82191c4a8..767b89b9bdc4 100644 --- a/pkg/security/tls_settings.go +++ b/pkg/security/tls_settings.go @@ -40,9 +40,6 @@ var ocspMode = settings.RegisterEnumSetting( "and in lax mode all certificates will be accepted.", "off", map[int64]string{ocspOff: "off", ocspLax: "lax", ocspStrict: "strict"}).WithPublic() -// TODO(bdarnell): 3 seconds is the same as base.NetworkTimeout, but -// we can't use it here due to import cycles. We need a real -// no-dependencies base package for constants like this. var ocspTimeout = settings.RegisterDurationSetting( settings.TenantWritable, "security.ocsp.timeout", "timeout before considering the OCSP server unreachable", diff --git a/pkg/server/admin.go b/pkg/server/admin.go index f99aad26d6b5..0dea6b3d7a2c 100644 --- a/pkg/server/admin.go +++ b/pkg/server/admin.go @@ -1280,7 +1280,7 @@ func (s *adminServer) statsForSpan( func(ctx context.Context) { // Set a generous timeout on the context for each individual query. var spanResponse *serverpb.SpanStatsResponse - err := contextutil.RunWithTimeout(ctx, "request remote stats", 5*base.NetworkTimeout, + err := contextutil.RunWithTimeout(ctx, "request remote stats", 20*time.Second, func(ctx context.Context) error { client, err := s.server.status.dialNode(ctx, nodeID) if err == nil { diff --git a/pkg/server/pagination.go b/pkg/server/pagination.go index 7bb3689e65a0..a6bd22bef326 100644 --- a/pkg/server/pagination.go +++ b/pkg/server/pagination.go @@ -367,7 +367,7 @@ func (r *rpcNodePaginator) queryNode(ctx context.Context, nodeID roachpb.NodeID, r.mu.currentIdx++ r.mu.turnCond.Broadcast() } - if err := contextutil.RunWithTimeout(ctx, "dial node", base.NetworkTimeout, func(ctx context.Context) error { + if err := contextutil.RunWithTimeout(ctx, "dial node", base.DialTimeout, func(ctx context.Context) error { var err error client, err = r.dialFn(ctx, nodeID) return err diff --git a/pkg/server/server_controller.go b/pkg/server/server_controller.go index fafe8bf6a776..a4fc00cad6a5 100644 --- a/pkg/server/server_controller.go +++ b/pkg/server/server_controller.go @@ -464,7 +464,7 @@ func makeInMemoryTenantServerConfig( baseCfg.Config.User = kvServerCfg.Config.User baseCfg.Config.DisableTLSForHTTP = kvServerCfg.Config.DisableTLSForHTTP baseCfg.Config.AcceptSQLWithoutTLS = kvServerCfg.Config.AcceptSQLWithoutTLS - baseCfg.Config.RPCHeartbeatIntervalAndHalfTimeout = kvServerCfg.Config.RPCHeartbeatIntervalAndHalfTimeout + baseCfg.Config.RPCHeartbeatIntervalAndTimeout = kvServerCfg.Config.RPCHeartbeatIntervalAndTimeout baseCfg.Config.ClockDevicePath = kvServerCfg.Config.ClockDevicePath baseCfg.Config.ClusterName = kvServerCfg.Config.ClusterName baseCfg.Config.DisableClusterNameVerification = kvServerCfg.Config.DisableClusterNameVerification diff --git a/pkg/server/status.go b/pkg/server/status.go index 6c663d943f13..0668c0273752 100644 --- a/pkg/server/status.go +++ b/pkg/server/status.go @@ -835,7 +835,7 @@ func (s *statusServer) AllocatorRange( ctx, "server.statusServer: requesting remote Allocator simulation", func(ctx context.Context) { - _ = contextutil.RunWithTimeout(ctx, "allocator range", base.NetworkTimeout, func(ctx context.Context) error { + _ = contextutil.RunWithTimeout(ctx, "allocator range", 3*time.Second, func(ctx context.Context) error { status, err := s.dialNode(ctx, nodeID) var allocatorResponse *serverpb.AllocatorResponse if err == nil { @@ -2681,7 +2681,7 @@ func (s *statusServer) iterateNodes( nodeQuery := func(ctx context.Context, nodeID roachpb.NodeID) { var client interface{} - err := contextutil.RunWithTimeout(ctx, "dial node", base.NetworkTimeout, func(ctx context.Context) error { + err := contextutil.RunWithTimeout(ctx, "dial node", base.DialTimeout, func(ctx context.Context) error { var err error client, err = dialFn(ctx, nodeID) return err diff --git a/pkg/server/tenant_status.go b/pkg/server/tenant_status.go index 26328ffe023f..5c98040b39a1 100644 --- a/pkg/server/tenant_status.go +++ b/pkg/server/tenant_status.go @@ -747,7 +747,7 @@ func (t *tenantStatusServer) iteratePods( instanceQuery := func(ctx context.Context, instance sqlinstance.InstanceInfo) { var client interface{} - err := contextutil.RunWithTimeout(ctx, "dial instance", base.NetworkTimeout, func(ctx context.Context) error { + err := contextutil.RunWithTimeout(ctx, "dial instance", base.DialTimeout, func(ctx context.Context) error { var err error client, err = dialFn(ctx, instance.InstanceID, instance.InstanceAddr) return err