From fb5bd273f623083ddf3dc0e28977b368c17081cb Mon Sep 17 00:00:00 2001 From: irfan sharif Date: Tue, 5 Sep 2023 11:51:40 -0400 Subject: [PATCH] kvflowcontrol: use 'apply_to_elastic' mode by default We've been exercising the flow control machinery in the 'apply_to_all' mode for the last few months, to shake out latent bugs and surface performance regressions. Since flow control shapes quorum writes to the rate of IO admission by the slowest live replica, it's not necessarily the scheme we want by default for latency-sensitive foreground writes. It does however make sense for elastic writes, which by definition is not latency sensitive, and write shaping is ok. With 'apply_to_elastic', regular writes are still subject to admission control. But it happens only on the leaseholder where the write originates. On nodes observing follower writes from regular work, we'll deduct IO tokens without waiting. Crucially, with 'apply_to_elastic', this deduction-without-waiting will not happen for elastic writes, which tend to be bulkier and by definition, lower priority than regular writes. It might still make sense for users to opt into 'apply_to_all'. Either in this universal cluster setting form, or perhaps when exposed more selectively through zone configs, so it's done on a schema-by-schema basis. Consider setups with heterogenous regions where nodes in particular regions only contain follower replicas and also have lower IO admission rates (by virtue of there being fewer nodes in aggregate, or with hardware provisioned with lesser throughput). Without any write shaping, if the leader-only regions drive at higher throughput, the follower region will fall permanently behind, or will have uncontrolled LSM growth, making it inappropriate for fail overs. In such cases, users may want to in fact shape their quorum writes based on the IO admission rates of the slowest follower replica. Release note: None --- pkg/kv/kvserver/flow_control_integration_test.go | 1 + pkg/kv/kvserver/kvflowcontrol/kvflowcontrol.go | 2 +- .../kvflowcontrol/kvflowcontroller/kvflowcontroller_test.go | 1 + 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/pkg/kv/kvserver/flow_control_integration_test.go b/pkg/kv/kvserver/flow_control_integration_test.go index 8d658d14872a..84ca617f4a16 100644 --- a/pkg/kv/kvserver/flow_control_integration_test.go +++ b/pkg/kv/kvserver/flow_control_integration_test.go @@ -774,6 +774,7 @@ func TestFlowControlRaftSnapshot(t *testing.T) { ctx := context.Background() st := cluster.MakeTestingClusterSettings() kvflowcontrol.Enabled.Override(ctx, &st.SV, true) + kvflowcontrol.Mode.Override(ctx, &st.SV, int64(kvflowcontrol.ApplyToAll)) for i := 0; i < numServers; i++ { stickyServerArgs[i] = base.TestServerArgs{ diff --git a/pkg/kv/kvserver/kvflowcontrol/kvflowcontrol.go b/pkg/kv/kvserver/kvflowcontrol/kvflowcontrol.go index 0046a858f26a..a6e1609d4296 100644 --- a/pkg/kv/kvserver/kvflowcontrol/kvflowcontrol.go +++ b/pkg/kv/kvserver/kvflowcontrol/kvflowcontrol.go @@ -39,7 +39,7 @@ var Mode = settings.RegisterEnumSetting( settings.SystemOnly, "kvadmission.flow_control.mode", "determines the 'mode' of flow control we use for replication traffic in KV, if enabled", - ApplyToAll.String(), + ApplyToElastic.String(), map[int64]string{ int64(ApplyToElastic): modeDict[ApplyToElastic], int64(ApplyToAll): modeDict[ApplyToAll], diff --git a/pkg/kv/kvserver/kvflowcontrol/kvflowcontroller/kvflowcontroller_test.go b/pkg/kv/kvserver/kvflowcontrol/kvflowcontroller/kvflowcontroller_test.go index 56f05ce9fe80..651fb361dab7 100644 --- a/pkg/kv/kvserver/kvflowcontrol/kvflowcontroller/kvflowcontroller_test.go +++ b/pkg/kv/kvserver/kvflowcontrol/kvflowcontroller/kvflowcontroller_test.go @@ -189,6 +189,7 @@ func TestInspectController(t *testing.T) { st := cluster.MakeTestingClusterSettings() elasticTokensPerStream.Override(ctx, &st.SV, 8<<20 /* 8 MiB */) regularTokensPerStream.Override(ctx, &st.SV, 16<<20 /* 16 MiB */) + kvflowcontrol.Mode.Override(ctx, &st.SV, int64(kvflowcontrol.ApplyToAll)) controller := New(metric.NewRegistry(), st, hlc.NewClockForTesting(nil)) // No streams connected -- inspect state should be empty.