diff --git a/pkg/cli/cliflags/flags.go b/pkg/cli/cliflags/flags.go index b5e844466748..47c05a24d6a1 100644 --- a/pkg/cli/cliflags/flags.go +++ b/pkg/cli/cliflags/flags.go @@ -894,20 +894,36 @@ only tested and supported on Linux. MaxOffset = FlagInfo{ Name: "max-offset", Description: ` -Maximum allowed clock offset for the cluster. If observed clock offsets exceed -this limit, servers will crash to minimize the likelihood of reading -inconsistent data. Increasing this value will increase the time to recovery of -failures as well as the frequency of uncertainty-based read restarts. +Maximum clock offset for the cluster. If real clock skew exceeds this value, +consistency guarantees can no longer be upheld, possibly resulting in stale +reads and other anomalies. This value affects the frequency of uncertainty-based +read restarts and write latencies for global tables. +
+ ++If a node detects that its clock offset from other nodes is too large, it will +self-terminate to protect consistency guarantees. This check can be disabled +via --disable-max-offset-check.
This value should be the same on all nodes in the cluster. It is allowed to differ such that the max-offset value can be changed via a rolling restart of -the cluster, in which case the real clock offset between nodes must be below the +the cluster, in which case the real clock skew between nodes must be below the smallest max-offset value of any node. `, } + DisableMaxOffsetCheck = FlagInfo{ + Name: "disable-max-offset-check", + Description: ` +Normally, a node will self-terminate if it finds that its clock offset with the +rest of the cluster exceeds --max-offset. This flag disables this check. The +operator is responsible for ensuring that real clock skew never exceeds +max-offset, to avoid read inconsistencies and other correctness anomalies. +`, + } + Store = FlagInfo{ Name: "store", Shorthand: "s", diff --git a/pkg/cli/flags.go b/pkg/cli/flags.go index 14a69a4841a8..fa8dbc23a29a 100644 --- a/pkg/cli/flags.go +++ b/pkg/cli/flags.go @@ -478,6 +478,7 @@ func init() { cliflagcfg.VarFlag(f, &serverCfg.StorageEngine, cliflags.StorageEngine) cliflagcfg.StringFlag(f, &serverCfg.SharedStorage, cliflags.SharedStorage) cliflagcfg.VarFlag(f, &serverCfg.MaxOffset, cliflags.MaxOffset) + cliflagcfg.BoolFlag(f, &serverCfg.DisableMaxOffsetCheck, cliflags.DisableMaxOffsetCheck) cliflagcfg.StringFlag(f, &serverCfg.ClockDevicePath, cliflags.ClockDevice) cliflagcfg.StringFlag(f, &startCtx.listeningURLFile, cliflags.ListeningURLFile) diff --git a/pkg/cli/flags_test.go b/pkg/cli/flags_test.go index b0d05a45ac05..a1c6fe5a5020 100644 --- a/pkg/cli/flags_test.go +++ b/pkg/cli/flags_test.go @@ -202,23 +202,25 @@ func TestClockOffsetFlagValue(t *testing.T) { defer initCLIDefaults() f := startCmd.Flags() - testData := []struct { - args []string - expected time.Duration + testCases := []struct { + args []string + expectMax time.Duration + expectTolerated time.Duration }{ - {nil, base.DefaultMaxClockOffset}, - {[]string{"--max-offset", "200ms"}, 200 * time.Millisecond}, + {nil, 500 * time.Millisecond, 400 * time.Millisecond}, + {[]string{"--max-offset", "100ms"}, 100 * time.Millisecond, 80 * time.Millisecond}, + {[]string{"--disable-max-offset-check"}, base.DefaultMaxClockOffset, 0}, + {[]string{"--max-offset", "100ms", "--disable-max-offset-check"}, 100 * time.Millisecond, 0}, } - for i, td := range testData { - initCLIDefaults() + for _, tc := range testCases { + t.Run(strings.Join(tc.args, " "), func(t *testing.T) { + initCLIDefaults() - if err := f.Parse(td.args); err != nil { - t.Fatal(err) - } - if td.expected != time.Duration(serverCfg.MaxOffset) { - t.Errorf("%d. MaxOffset expected %v, but got %v", i, td.expected, serverCfg.MaxOffset) - } + require.NoError(t, f.Parse(tc.args)) + require.Equal(t, tc.expectMax, time.Duration(serverCfg.MaxOffset)) + require.Equal(t, tc.expectTolerated, serverCfg.ToleratedOffset()) + }) } } diff --git a/pkg/server/config.go b/pkg/server/config.go index c74bb506dfb8..0390fb1fc6a7 100644 --- a/pkg/server/config.go +++ b/pkg/server/config.go @@ -144,14 +144,20 @@ type BaseConfig struct { // AmbientCtx is used to annotate contexts used inside the server. AmbientCtx log.AmbientContext - // Maximum allowed clock offset for the cluster. If observed clock - // offsets exceed this limit, inconsistency may result, and servers - // will panic to minimize the likelihood of inconsistent data. - // Increasing this value will increase time to recovery after - // failures, and increase the frequency and impact of - // ReadWithinUncertaintyIntervalError. + // MaxOffset is the maximum clock offset for the cluster. If real clock skew + // exceeds this limit, it may result in linearizability violations. Increasing + // this will increase the frequency of ReadWithinUncertaintyIntervalError and + // the write latency of global tables. + // + // Nodes will self-terminate if they detect that their clock skew with other + // nodes is too large, see ToleratedOffset(). MaxOffset MaxOffsetType + // DisableMaxOffsetCheck disables the MaxOffset check with other cluster nodes. + // The operator assumes responsibility for ensuring real clock skew never + // exceeds MaxOffset. See also ToleratedOffset(). + DisableMaxOffsetCheck bool + // DisableRuntimeStatsMonitor prevents this server from starting the // async task that collects runtime stats and triggers // heap/goroutine dumps under high load. @@ -282,6 +288,7 @@ func (cfg *BaseConfig) SetDefaults( cfg.ClusterIDContainer = idsProvider.clusterID cfg.AmbientCtx = log.MakeServerAmbientContext(tr, idsProvider) cfg.MaxOffset = MaxOffsetType(base.DefaultMaxClockOffset) + cfg.DisableMaxOffsetCheck = false cfg.DefaultZoneConfig = zonepb.DefaultZoneConfig() cfg.StorageEngine = storage.DefaultStorageEngine cfg.TestingInsecureWebAccess = disableWebLogin @@ -332,8 +339,12 @@ func (cfg *BaseConfig) InitTestingKnobs() { } } -// ToleratedOffset returns the tolerated offset, or 0 if disabled. +// ToleratedOffset returns the tolerated clock offset with other cluster nodes +// as measured via RPC heartbeats, or 0 to disable clock offset checks. func (cfg *BaseConfig) ToleratedOffset() time.Duration { + if cfg.DisableMaxOffsetCheck { + return 0 + } return time.Duration(toleratedOffsetMultiplier * float64(cfg.MaxOffset)) }