From 769ba1c46d5bfbdcfc14069f545809926f9f35c5 Mon Sep 17 00:00:00 2001 From: Josh Imhoff Date: Fri, 14 Jul 2023 10:39:11 -0400 Subject: [PATCH] kvprober: metamorphically enable / configure kvprober This commit metamorphically enables & configures kvprober. Though kvprober is off by default & not documented publicly, we run with kvprober enabled in CC. So we should test CRDB with kvprober enabled. This commit is inspired by the crdb_internal.probe_ranges corruption bug, tho it targets kvprober proper, not crdb_internal.probe_ranges. This commit also adjusts the kvprober default intervals to what they are set at in CC. This is mostly done to improve the quality of tests with kvprober enabled. Release note: None. --- pkg/kv/kvprober/BUILD.bazel | 1 + pkg/kv/kvprober/settings.go | 16 +++++++++------- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/pkg/kv/kvprober/BUILD.bazel b/pkg/kv/kvprober/BUILD.bazel index 83c099d35cc6..735724bbb7a7 100644 --- a/pkg/kv/kvprober/BUILD.bazel +++ b/pkg/kv/kvprober/BUILD.bazel @@ -17,6 +17,7 @@ go_library( "//pkg/roachpb", "//pkg/settings", "//pkg/settings/cluster", + "//pkg/util", "//pkg/util/log", "//pkg/util/log/logcrash", "//pkg/util/metric", diff --git a/pkg/kv/kvprober/settings.go b/pkg/kv/kvprober/settings.go index 0e0c9e60f622..238693ffa752 100644 --- a/pkg/kv/kvprober/settings.go +++ b/pkg/kv/kvprober/settings.go @@ -14,9 +14,12 @@ import ( "time" "github.com/cockroachdb/cockroach/pkg/settings" + "github.com/cockroachdb/cockroach/pkg/util" "github.com/cockroachdb/errors" ) +var enabled = util.ConstantWithMetamorphicTestBool("kv.prober.*.enabled", false) + // kv.prober.bypass_admission_control controls whether kvprober's requests // should bypass kv layer's admission control. Setting this value to true // ensures that kvprober will not be significantly affected if the cluster is @@ -27,14 +30,13 @@ var bypassAdmissionControl = settings.RegisterBoolSetting( "set to bypass admission control queue for kvprober requests; "+ "note that dedicated clusters should have this set as users own capacity planning "+ "but serverless clusters should not have this set as SREs own capacity planning", - true, -) + util.ConstantWithMetamorphicTestBool("kv.prober.bypass_admission_control.enabled", true)) var readEnabled = settings.RegisterBoolSetting( settings.TenantWritable, "kv.prober.read.enabled", "whether the KV read prober is enabled", - false) + enabled) // TODO(josh): Another option is for the cluster setting to be a QPS target // for the cluster as a whole. @@ -44,7 +46,7 @@ var readInterval = settings.RegisterDurationSetting( "how often each node sends a read probe to the KV layer on average (jitter is added); "+ "note that a very slow read can block kvprober from sending additional probes; "+ "kv.prober.read.timeout controls the max time kvprober can be blocked", - 1*time.Minute, func(duration time.Duration) error { + 1*time.Second, func(duration time.Duration) error { if duration <= 0 { return errors.New("param must be >0") } @@ -70,7 +72,7 @@ var writeEnabled = settings.RegisterBoolSetting( settings.TenantWritable, "kv.prober.write.enabled", "whether the KV write prober is enabled", - false) + enabled) var writeInterval = settings.RegisterDurationSetting( settings.TenantWritable, @@ -78,7 +80,7 @@ var writeInterval = settings.RegisterDurationSetting( "how often each node sends a write probe to the KV layer on average (jitter is added); "+ "note that a very slow read can block kvprober from sending additional probes; "+ "kv.prober.write.timeout controls the max time kvprober can be blocked", - 10*time.Second, func(duration time.Duration) error { + 5*time.Second, func(duration time.Duration) error { if duration <= 0 { return errors.New("param must be >0") } @@ -148,7 +150,7 @@ var quarantineWriteEnabled = settings.RegisterBoolSetting( "quarantine pool holds a separate group of ranges that have previously failed "+ "a probe which are continually probed. This helps determine outages for ranges "+ " with a high level of confidence", - false) + enabled) var quarantineWriteInterval = settings.RegisterDurationSetting( settings.TenantWritable,