From c993304696e8d87835354daee2f35363b2f635b9 Mon Sep 17 00:00:00 2001 From: Erik Grinaker Date: Wed, 8 Feb 2023 14:52:39 +0000 Subject: [PATCH] cli: add `--disable-max-offset-check` flag This patch adds a `--disable-max-offset-check` flag, which disables the clock offset check that will self-terminate the node if its offset to the rest of the cluster (as measured via RPC heartbeats) exceeds `--max-offset`. This is motivated by deployments with reliable, high-precision clock infrastructure where `--max-offset` may be set very low (e.g. 10 ms) to reduce undertainty restarts and write latency for global tables. With such low offset limits, RPC latency spikes could cause spurious node restarts. This flag allows operators to assume responsibility for ensuring real clock skew never exceeds `--max-offset`. Epic: none Release note (ops change): The flag `--disable-max-offset-check` has been added to disable node self-termination when it detects clock skew with the rest of the cluster beyond `--max-offset`. The operator assumes responsibility for ensuring that real clock skew never exceeds `--max-offset`. --- pkg/cli/cliflags/flags.go | 26 +++++++++++++++++++++----- pkg/cli/flags.go | 1 + pkg/cli/flags_test.go | 28 +++++++++++++++------------- pkg/server/config.go | 25 ++++++++++++++++++------- 4 files changed, 55 insertions(+), 25 deletions(-) diff --git a/pkg/cli/cliflags/flags.go b/pkg/cli/cliflags/flags.go index b5e844466748..47c05a24d6a1 100644 --- a/pkg/cli/cliflags/flags.go +++ b/pkg/cli/cliflags/flags.go @@ -894,20 +894,36 @@ only tested and supported on Linux. MaxOffset = FlagInfo{ Name: "max-offset", Description: ` -Maximum allowed clock offset for the cluster. If observed clock offsets exceed -this limit, servers will crash to minimize the likelihood of reading -inconsistent data. Increasing this value will increase the time to recovery of -failures as well as the frequency of uncertainty-based read restarts. +Maximum clock offset for the cluster. If real clock skew exceeds this value, +consistency guarantees can no longer be upheld, possibly resulting in stale +reads and other anomalies. This value affects the frequency of uncertainty-based +read restarts and write latencies for global tables. +
+
+
+If a node detects that its clock offset from other nodes is too large, it will +self-terminate to protect consistency guarantees. This check can be disabled +via --disable-max-offset-check.
 
 
This value should be the same on all nodes in the cluster. It is allowed to differ such that the max-offset value can be changed via a rolling restart of -the cluster, in which case the real clock offset between nodes must be below the +the cluster, in which case the real clock skew between nodes must be below the smallest max-offset value of any node. `, } + DisableMaxOffsetCheck = FlagInfo{ + Name: "disable-max-offset-check", + Description: ` +Normally, a node will self-terminate if it finds that its clock offset with the +rest of the cluster exceeds --max-offset. This flag disables this check. The +operator is responsible for ensuring that real clock skew never exceeds +max-offset, to avoid read inconsistencies and other correctness anomalies. +`, + } + Store = FlagInfo{ Name: "store", Shorthand: "s", diff --git a/pkg/cli/flags.go b/pkg/cli/flags.go index 14a69a4841a8..fa8dbc23a29a 100644 --- a/pkg/cli/flags.go +++ b/pkg/cli/flags.go @@ -478,6 +478,7 @@ func init() { cliflagcfg.VarFlag(f, &serverCfg.StorageEngine, cliflags.StorageEngine) cliflagcfg.StringFlag(f, &serverCfg.SharedStorage, cliflags.SharedStorage) cliflagcfg.VarFlag(f, &serverCfg.MaxOffset, cliflags.MaxOffset) + cliflagcfg.BoolFlag(f, &serverCfg.DisableMaxOffsetCheck, cliflags.DisableMaxOffsetCheck) cliflagcfg.StringFlag(f, &serverCfg.ClockDevicePath, cliflags.ClockDevice) cliflagcfg.StringFlag(f, &startCtx.listeningURLFile, cliflags.ListeningURLFile) diff --git a/pkg/cli/flags_test.go b/pkg/cli/flags_test.go index b0d05a45ac05..a1c6fe5a5020 100644 --- a/pkg/cli/flags_test.go +++ b/pkg/cli/flags_test.go @@ -202,23 +202,25 @@ func TestClockOffsetFlagValue(t *testing.T) { defer initCLIDefaults() f := startCmd.Flags() - testData := []struct { - args []string - expected time.Duration + testCases := []struct { + args []string + expectMax time.Duration + expectTolerated time.Duration }{ - {nil, base.DefaultMaxClockOffset}, - {[]string{"--max-offset", "200ms"}, 200 * time.Millisecond}, + {nil, 500 * time.Millisecond, 400 * time.Millisecond}, + {[]string{"--max-offset", "100ms"}, 100 * time.Millisecond, 80 * time.Millisecond}, + {[]string{"--disable-max-offset-check"}, base.DefaultMaxClockOffset, 0}, + {[]string{"--max-offset", "100ms", "--disable-max-offset-check"}, 100 * time.Millisecond, 0}, } - for i, td := range testData { - initCLIDefaults() + for _, tc := range testCases { + t.Run(strings.Join(tc.args, " "), func(t *testing.T) { + initCLIDefaults() - if err := f.Parse(td.args); err != nil { - t.Fatal(err) - } - if td.expected != time.Duration(serverCfg.MaxOffset) { - t.Errorf("%d. MaxOffset expected %v, but got %v", i, td.expected, serverCfg.MaxOffset) - } + require.NoError(t, f.Parse(tc.args)) + require.Equal(t, tc.expectMax, time.Duration(serverCfg.MaxOffset)) + require.Equal(t, tc.expectTolerated, serverCfg.ToleratedOffset()) + }) } } diff --git a/pkg/server/config.go b/pkg/server/config.go index c74bb506dfb8..0390fb1fc6a7 100644 --- a/pkg/server/config.go +++ b/pkg/server/config.go @@ -144,14 +144,20 @@ type BaseConfig struct { // AmbientCtx is used to annotate contexts used inside the server. AmbientCtx log.AmbientContext - // Maximum allowed clock offset for the cluster. If observed clock - // offsets exceed this limit, inconsistency may result, and servers - // will panic to minimize the likelihood of inconsistent data. - // Increasing this value will increase time to recovery after - // failures, and increase the frequency and impact of - // ReadWithinUncertaintyIntervalError. + // MaxOffset is the maximum clock offset for the cluster. If real clock skew + // exceeds this limit, it may result in linearizability violations. Increasing + // this will increase the frequency of ReadWithinUncertaintyIntervalError and + // the write latency of global tables. + // + // Nodes will self-terminate if they detect that their clock skew with other + // nodes is too large, see ToleratedOffset(). MaxOffset MaxOffsetType + // DisableMaxOffsetCheck disables the MaxOffset check with other cluster nodes. + // The operator assumes responsibility for ensuring real clock skew never + // exceeds MaxOffset. See also ToleratedOffset(). + DisableMaxOffsetCheck bool + // DisableRuntimeStatsMonitor prevents this server from starting the // async task that collects runtime stats and triggers // heap/goroutine dumps under high load. @@ -282,6 +288,7 @@ func (cfg *BaseConfig) SetDefaults( cfg.ClusterIDContainer = idsProvider.clusterID cfg.AmbientCtx = log.MakeServerAmbientContext(tr, idsProvider) cfg.MaxOffset = MaxOffsetType(base.DefaultMaxClockOffset) + cfg.DisableMaxOffsetCheck = false cfg.DefaultZoneConfig = zonepb.DefaultZoneConfig() cfg.StorageEngine = storage.DefaultStorageEngine cfg.TestingInsecureWebAccess = disableWebLogin @@ -332,8 +339,12 @@ func (cfg *BaseConfig) InitTestingKnobs() { } } -// ToleratedOffset returns the tolerated offset, or 0 if disabled. +// ToleratedOffset returns the tolerated clock offset with other cluster nodes +// as measured via RPC heartbeats, or 0 to disable clock offset checks. func (cfg *BaseConfig) ToleratedOffset() time.Duration { + if cfg.DisableMaxOffsetCheck { + return 0 + } return time.Duration(toleratedOffsetMultiplier * float64(cfg.MaxOffset)) }