Skip to content

Commit

Permalink
cli: add --disable-max-offset-check flag
Browse files Browse the repository at this point in the history
This patch adds a `--disable-max-offset-check` flag, which disables the
clock offset check that will self-terminate the node if its offset to
the rest of the cluster (as measured via RPC heartbeats) exceeds
`--max-offset`.

This is motivated by deployments with reliable, high-precision clock
infrastructure where `--max-offset` may be set very low (e.g. 10 ms) to
reduce undertainty restarts and write latency for global tables. With
such low offset limits, RPC latency spikes could cause spurious node
restarts. This flag allows operators to assume responsibility for
ensuring real clock skew never exceeds `--max-offset`.

Epic: none
Release note (ops change): The flag `--disable-max-offset-check` has
been added to disable node self-termination when it detects clock skew
with the rest of the cluster beyond `--max-offset`. The operator assumes
responsibility for ensuring that real clock skew never exceeds
`--max-offset`.
  • Loading branch information
erikgrinaker committed Feb 13, 2023
1 parent 0a1b2ae commit c993304
Show file tree
Hide file tree
Showing 4 changed files with 55 additions and 25 deletions.
26 changes: 21 additions & 5 deletions pkg/cli/cliflags/flags.go
Original file line number Diff line number Diff line change
Expand Up @@ -894,20 +894,36 @@ only tested and supported on Linux.
MaxOffset = FlagInfo{
Name: "max-offset",
Description: `
Maximum allowed clock offset for the cluster. If observed clock offsets exceed
this limit, servers will crash to minimize the likelihood of reading
inconsistent data. Increasing this value will increase the time to recovery of
failures as well as the frequency of uncertainty-based read restarts.
Maximum clock offset for the cluster. If real clock skew exceeds this value,
consistency guarantees can no longer be upheld, possibly resulting in stale
reads and other anomalies. This value affects the frequency of uncertainty-based
read restarts and write latencies for global tables.
<PRE>
</PRE>
If a node detects that its clock offset from other nodes is too large, it will
self-terminate to protect consistency guarantees. This check can be disabled
via --disable-max-offset-check.
<PRE>
</PRE>
This value should be the same on all nodes in the cluster. It is allowed to
differ such that the max-offset value can be changed via a rolling restart of
the cluster, in which case the real clock offset between nodes must be below the
the cluster, in which case the real clock skew between nodes must be below the
smallest max-offset value of any node.
`,
}

DisableMaxOffsetCheck = FlagInfo{
Name: "disable-max-offset-check",
Description: `
Normally, a node will self-terminate if it finds that its clock offset with the
rest of the cluster exceeds --max-offset. This flag disables this check. The
operator is responsible for ensuring that real clock skew never exceeds
max-offset, to avoid read inconsistencies and other correctness anomalies.
`,
}

Store = FlagInfo{
Name: "store",
Shorthand: "s",
Expand Down
1 change: 1 addition & 0 deletions pkg/cli/flags.go
Original file line number Diff line number Diff line change
Expand Up @@ -478,6 +478,7 @@ func init() {
cliflagcfg.VarFlag(f, &serverCfg.StorageEngine, cliflags.StorageEngine)
cliflagcfg.StringFlag(f, &serverCfg.SharedStorage, cliflags.SharedStorage)
cliflagcfg.VarFlag(f, &serverCfg.MaxOffset, cliflags.MaxOffset)
cliflagcfg.BoolFlag(f, &serverCfg.DisableMaxOffsetCheck, cliflags.DisableMaxOffsetCheck)
cliflagcfg.StringFlag(f, &serverCfg.ClockDevicePath, cliflags.ClockDevice)

cliflagcfg.StringFlag(f, &startCtx.listeningURLFile, cliflags.ListeningURLFile)
Expand Down
28 changes: 15 additions & 13 deletions pkg/cli/flags_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -202,23 +202,25 @@ func TestClockOffsetFlagValue(t *testing.T) {
defer initCLIDefaults()

f := startCmd.Flags()
testData := []struct {
args []string
expected time.Duration
testCases := []struct {
args []string
expectMax time.Duration
expectTolerated time.Duration
}{
{nil, base.DefaultMaxClockOffset},
{[]string{"--max-offset", "200ms"}, 200 * time.Millisecond},
{nil, 500 * time.Millisecond, 400 * time.Millisecond},
{[]string{"--max-offset", "100ms"}, 100 * time.Millisecond, 80 * time.Millisecond},
{[]string{"--disable-max-offset-check"}, base.DefaultMaxClockOffset, 0},
{[]string{"--max-offset", "100ms", "--disable-max-offset-check"}, 100 * time.Millisecond, 0},
}

for i, td := range testData {
initCLIDefaults()
for _, tc := range testCases {
t.Run(strings.Join(tc.args, " "), func(t *testing.T) {
initCLIDefaults()

if err := f.Parse(td.args); err != nil {
t.Fatal(err)
}
if td.expected != time.Duration(serverCfg.MaxOffset) {
t.Errorf("%d. MaxOffset expected %v, but got %v", i, td.expected, serverCfg.MaxOffset)
}
require.NoError(t, f.Parse(tc.args))
require.Equal(t, tc.expectMax, time.Duration(serverCfg.MaxOffset))
require.Equal(t, tc.expectTolerated, serverCfg.ToleratedOffset())
})
}
}

Expand Down
25 changes: 18 additions & 7 deletions pkg/server/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -144,14 +144,20 @@ type BaseConfig struct {
// AmbientCtx is used to annotate contexts used inside the server.
AmbientCtx log.AmbientContext

// Maximum allowed clock offset for the cluster. If observed clock
// offsets exceed this limit, inconsistency may result, and servers
// will panic to minimize the likelihood of inconsistent data.
// Increasing this value will increase time to recovery after
// failures, and increase the frequency and impact of
// ReadWithinUncertaintyIntervalError.
// MaxOffset is the maximum clock offset for the cluster. If real clock skew
// exceeds this limit, it may result in linearizability violations. Increasing
// this will increase the frequency of ReadWithinUncertaintyIntervalError and
// the write latency of global tables.
//
// Nodes will self-terminate if they detect that their clock skew with other
// nodes is too large, see ToleratedOffset().
MaxOffset MaxOffsetType

// DisableMaxOffsetCheck disables the MaxOffset check with other cluster nodes.
// The operator assumes responsibility for ensuring real clock skew never
// exceeds MaxOffset. See also ToleratedOffset().
DisableMaxOffsetCheck bool

// DisableRuntimeStatsMonitor prevents this server from starting the
// async task that collects runtime stats and triggers
// heap/goroutine dumps under high load.
Expand Down Expand Up @@ -282,6 +288,7 @@ func (cfg *BaseConfig) SetDefaults(
cfg.ClusterIDContainer = idsProvider.clusterID
cfg.AmbientCtx = log.MakeServerAmbientContext(tr, idsProvider)
cfg.MaxOffset = MaxOffsetType(base.DefaultMaxClockOffset)
cfg.DisableMaxOffsetCheck = false
cfg.DefaultZoneConfig = zonepb.DefaultZoneConfig()
cfg.StorageEngine = storage.DefaultStorageEngine
cfg.TestingInsecureWebAccess = disableWebLogin
Expand Down Expand Up @@ -332,8 +339,12 @@ func (cfg *BaseConfig) InitTestingKnobs() {
}
}

// ToleratedOffset returns the tolerated offset, or 0 if disabled.
// ToleratedOffset returns the tolerated clock offset with other cluster nodes
// as measured via RPC heartbeats, or 0 to disable clock offset checks.
func (cfg *BaseConfig) ToleratedOffset() time.Duration {
if cfg.DisableMaxOffsetCheck {
return 0
}
return time.Duration(toleratedOffsetMultiplier * float64(cfg.MaxOffset))
}

Expand Down

0 comments on commit c993304

Please sign in to comment.