From ce9e3b73463cfdff8f3eefbfe686ac951e6d356e Mon Sep 17 00:00:00 2001 From: irfan sharif Date: Tue, 31 Aug 2021 13:50:43 -0400 Subject: [PATCH] spanconfig: disable infrastructure unless envvar is set Cluster settings are too easy a switch to reach for to enable the new span configs machinery. Let's gate it behind a necessary envvar as well and use cluster settings to selectively toggle individual components. This commit also fixes a mighty silly bug introduced in #69047; for the two methods we intended to gate use `spanconfig.experimental_kvaccessor.enabled`, we were checking the opposite condition or not checking it at all. Oops. Release note: None Release justification: non-production code changes --- pkg/base/test_server_args.go | 3 +++ pkg/kv/kvserver/store.go | 4 ++++ pkg/server/config.go | 9 +++++++++ pkg/server/config_test.go | 8 ++++++++ pkg/server/node.go | 7 +++++++ pkg/server/server.go | 16 +++++++++++----- pkg/server/server_sql.go | 10 +++++----- pkg/server/testserver.go | 3 +++ .../spanconfigkvaccessor/kvaccessor.go | 14 +++++++++++--- .../spanconfigkvaccessor/kvaccessor_test.go | 7 ++++++- pkg/spanconfig/spanconfigmanager/manager_test.go | 2 ++ 11 files changed, 69 insertions(+), 14 deletions(-) diff --git a/pkg/base/test_server_args.go b/pkg/base/test_server_args.go index b6bb30c0d641..60f06901f702 100644 --- a/pkg/base/test_server_args.go +++ b/pkg/base/test_server_args.go @@ -132,6 +132,9 @@ type TestServerArgs struct { // If set, a TraceDir is initialized at the provided path. TraceDir string + + // If set, the span configs infrastructure will be enabled. + EnableSpanConfigs bool } // TestClusterArgs contains the parameters one can set when creating a test diff --git a/pkg/kv/kvserver/store.go b/pkg/kv/kvserver/store.go index 2e146af1b171..5be9177c3ed3 100644 --- a/pkg/kv/kvserver/store.go +++ b/pkg/kv/kvserver/store.go @@ -729,6 +729,10 @@ type StoreConfig struct { // KV Memory Monitor. Must be non-nil for production, and can be nil in some // tests. KVMemoryMonitor *mon.BytesMonitor + + // SpanConfigsEnabled determines whether we're able to use the span configs + // infrastructure. + SpanConfigsEnabled bool } // ConsistencyTestingKnobs is a BatchEvalTestingKnobs struct used to control the diff --git a/pkg/server/config.go b/pkg/server/config.go index 3d99f8c1eea1..9fefc0e47a67 100644 --- a/pkg/server/config.go +++ b/pkg/server/config.go @@ -222,6 +222,11 @@ type KVConfig struct { // The following values can only be set via environment variables and are // for testing only. They are not meant to be set by the end user. + // Enables the use of the (experimental) span configs infrastructure. + // + // Environment Variable: COCKROACH_EXPERIMENTAL_SPAN_CONFIGS + SpanConfigsEnabled bool + // Enables linearizable behavior of operations on this node by making sure // that no commit timestamp is reported back to the client until all other // node clocks have necessarily passed it. @@ -417,6 +422,9 @@ func (cfg *Config) String() string { if cfg.Linearizable { fmt.Fprintln(w, "linearizable\t", cfg.Linearizable) } + if cfg.SpanConfigsEnabled { + fmt.Fprintln(w, "span configs enabled\t", cfg.SpanConfigsEnabled) + } _ = w.Flush() return buf.String() @@ -647,6 +655,7 @@ func (cfg *Config) RequireWebSession() bool { // variable based. Note that this only happens when initializing a node and not // when NewContext is called. func (cfg *Config) readEnvironmentVariables() { + cfg.SpanConfigsEnabled = envutil.EnvOrDefaultBool("COCKROACH_EXPERIMENTAL_SPAN_CONFIGS", cfg.SpanConfigsEnabled) cfg.Linearizable = envutil.EnvOrDefaultBool("COCKROACH_EXPERIMENTAL_LINEARIZABLE", cfg.Linearizable) cfg.ScanInterval = envutil.EnvOrDefaultDuration("COCKROACH_SCAN_INTERVAL", cfg.ScanInterval) cfg.ScanMinIdleTime = envutil.EnvOrDefaultDuration("COCKROACH_SCAN_MIN_IDLE_TIME", cfg.ScanMinIdleTime) diff --git a/pkg/server/config_test.go b/pkg/server/config_test.go index 728a40b74c82..8a136da441e4 100644 --- a/pkg/server/config_test.go +++ b/pkg/server/config_test.go @@ -98,6 +98,9 @@ func TestReadEnvironmentVariables(t *testing.T) { if err := os.Unsetenv("COCKROACH_EXPERIMENTAL_LINEARIZABLE"); err != nil { t.Fatal(err) } + if err := os.Unsetenv("COCKROACH_EXPERIMENTAL_SPAN_CONFIGS"); err != nil { + t.Fatal(err) + } if err := os.Unsetenv("COCKROACH_SCAN_INTERVAL"); err != nil { t.Fatal(err) } @@ -125,6 +128,10 @@ func TestReadEnvironmentVariables(t *testing.T) { // Set all the environment variables to valid values and ensure they are set // correctly. + if err := os.Setenv("COCKROACH_EXPERIMENTAL_SPAN_CONFIGS", "true"); err != nil { + t.Fatal(err) + } + cfgExpected.SpanConfigsEnabled = true if err := os.Setenv("COCKROACH_EXPERIMENTAL_LINEARIZABLE", "true"); err != nil { t.Fatal(err) } @@ -149,6 +156,7 @@ func TestReadEnvironmentVariables(t *testing.T) { } for _, envVar := range []string{ + "COCKROACH_EXPERIMENTAL_SPAN_CONFIGS", "COCKROACH_EXPERIMENTAL_LINEARIZABLE", "COCKROACH_SCAN_INTERVAL", "COCKROACH_SCAN_MIN_IDLE_TIME", diff --git a/pkg/server/node.go b/pkg/server/node.go index 59ac2182aefb..bb44118ee370 100644 --- a/pkg/server/node.go +++ b/pkg/server/node.go @@ -1447,6 +1447,10 @@ func (emptyMetricStruct) MetricStruct() {} func (n *Node) GetSpanConfigs( ctx context.Context, req *roachpb.GetSpanConfigsRequest, ) (*roachpb.GetSpanConfigsResponse, error) { + if !n.storeCfg.SpanConfigsEnabled { + return nil, errors.New("use of span configs disabled") + } + entries, err := n.spanConfigAccessor.GetSpanConfigEntriesFor(ctx, req.Spans) if err != nil { return nil, err @@ -1459,6 +1463,9 @@ func (n *Node) GetSpanConfigs( func (n *Node) UpdateSpanConfigs( ctx context.Context, req *roachpb.UpdateSpanConfigsRequest, ) (*roachpb.UpdateSpanConfigsResponse, error) { + if !n.storeCfg.SpanConfigsEnabled { + return nil, errors.New("use of span configs disabled") + } // TODO(irfansharif): We want to protect ourselves from tenants creating // outlandishly large string buffers here and OOM-ing the host cluster. Is // the maximum protobuf message size enough of a safeguard? diff --git a/pkg/server/server.go b/pkg/server/server.go index 42bd132d59eb..572868ccfde9 100644 --- a/pkg/server/server.go +++ b/pkg/server/server.go @@ -68,6 +68,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/server/telemetry" "github.com/cockroachdb/cockroach/pkg/settings" "github.com/cockroachdb/cockroach/pkg/settings/cluster" + "github.com/cockroachdb/cockroach/pkg/spanconfig" _ "github.com/cockroachdb/cockroach/pkg/spanconfig/spanconfigjob" // register jobs declared outside of pkg/sql "github.com/cockroachdb/cockroach/pkg/spanconfig/spanconfigkvaccessor" "github.com/cockroachdb/cockroach/pkg/sql" @@ -627,6 +628,16 @@ func NewServer(cfg Config, stopper *stop.Stopper) (*Server, error) { ProtectedTimestampCache: protectedtsProvider, KVMemoryMonitor: kvMemoryMonitor, } + + var spanConfigAccessor spanconfig.KVAccessor + if cfg.SpanConfigsEnabled { + storeCfg.SpanConfigsEnabled = true + spanConfigAccessor = spanconfigkvaccessor.New( + db, internalExecutor, cfg.Settings, + systemschema.SpanConfigurationsTableName.FQString(), + ) + } + if storeTestingKnobs := cfg.TestingKnobs.Store; storeTestingKnobs != nil { storeCfg.TestingKnobs = *storeTestingKnobs.(*kvserver.StoreTestingKnobs) } @@ -655,11 +666,6 @@ func NewServer(cfg Config, stopper *stop.Stopper) (*Server, error) { tenantUsage := NewTenantUsageServer(db, internalExecutor) registry.AddMetricStruct(tenantUsage.Metrics()) - spanConfigAccessor := spanconfigkvaccessor.New( - db, internalExecutor, cfg.Settings, - systemschema.SpanConfigurationsTableName.FQString(), - ) - node := NewNode( storeCfg, recorder, registry, stopper, txnMetrics, stores, nil /* execCfg */, &rpcContext.ClusterID, diff --git a/pkg/server/server_sql.go b/pkg/server/server_sql.go index 36ef94c07197..6b747ff05b08 100644 --- a/pkg/server/server_sql.go +++ b/pkg/server/server_sql.go @@ -832,17 +832,17 @@ func newSQLServer(ctx context.Context, cfg sqlServerArgs) (*SQLServer, error) { // Instantiate a span config manager; it exposes a hook to idempotently // create the span config reconciliation job and captures all relevant job // dependencies. - knobs, _ := cfg.TestingKnobs.SpanConfig.(*spanconfig.TestingKnobs) - spanconfigMgr := spanconfigmanager.New( + spanConfigKnobs, _ := cfg.TestingKnobs.SpanConfig.(*spanconfig.TestingKnobs) + spanConfigMgr := spanconfigmanager.New( cfg.db, jobRegistry, cfg.circularInternalExecutor, cfg.stopper, cfg.Settings, cfg.spanConfigAccessor, - knobs, + spanConfigKnobs, ) - execCfg.SpanConfigReconciliationJobDeps = spanconfigMgr + execCfg.SpanConfigReconciliationJobDeps = spanConfigMgr temporaryObjectCleaner := sql.NewTemporaryObjectCleaner( cfg.Settings, @@ -904,7 +904,7 @@ func newSQLServer(ctx context.Context, cfg sqlServerArgs) (*SQLServer, error) { sqlInstanceProvider: cfg.sqlInstanceProvider, metricsRegistry: cfg.registry, diagnosticsReporter: reporter, - spanconfigMgr: spanconfigMgr, + spanconfigMgr: spanConfigMgr, settingsWatcher: settingsWatcher, }, nil } diff --git a/pkg/server/testserver.go b/pkg/server/testserver.go index 2903e6f10f90..7c3aa51d4063 100644 --- a/pkg/server/testserver.go +++ b/pkg/server/testserver.go @@ -221,6 +221,9 @@ func makeTestConfigFromParams(params base.TestServerArgs) Config { if params.EnableDemoLoginEndpoint { cfg.EnableDemoLoginEndpoint = true } + if params.EnableSpanConfigs { + cfg.SpanConfigsEnabled = true + } // Ensure we have the correct number of engines. Add in-memory ones where // needed. There must be at least one store/engine. diff --git a/pkg/spanconfig/spanconfigkvaccessor/kvaccessor.go b/pkg/spanconfig/spanconfigkvaccessor/kvaccessor.go index ac2b25ae6ac7..c340083de24c 100644 --- a/pkg/spanconfig/spanconfigkvaccessor/kvaccessor.go +++ b/pkg/spanconfig/spanconfigkvaccessor/kvaccessor.go @@ -52,17 +52,21 @@ func New( } } -var kvAccessorEnabled = settings.RegisterBoolSetting( - "spanconfig.kvaccessor_experimental.enabled", +// enabledSetting is a hidden cluster setting that gates usage of the +// KVAccessor. It has no effect unless COCKROACH_EXPERIMENTAL_SPAN_CONFIGS is +// also set. +var enabledSetting = settings.RegisterBoolSetting( + "spanconfig.experimental_kvaccessor.enabled", "enable the use of the kv accessor", false).WithSystemOnly() // GetSpanConfigEntriesFor is part of the KVAccessor interface. func (k *KVAccessor) GetSpanConfigEntriesFor( ctx context.Context, spans []roachpb.Span, ) (resp []roachpb.SpanConfigEntry, retErr error) { - if kvAccessorEnabled.Get(&k.settings.SV) { + if !enabledSetting.Get(&k.settings.SV) { return nil, errors.New("use of span configs disabled") } + if len(spans) == 0 { return resp, nil } @@ -111,6 +115,10 @@ func (k *KVAccessor) GetSpanConfigEntriesFor( func (k *KVAccessor) UpdateSpanConfigEntries( ctx context.Context, toDelete []roachpb.Span, toUpsert []roachpb.SpanConfigEntry, ) error { + if !enabledSetting.Get(&k.settings.SV) { + return errors.New("use of span configs disabled") + } + if err := validateUpdateArgs(toDelete, toUpsert); err != nil { return err } diff --git a/pkg/spanconfig/spanconfigkvaccessor/kvaccessor_test.go b/pkg/spanconfig/spanconfigkvaccessor/kvaccessor_test.go index 9b35890d564f..be5ebcff848f 100644 --- a/pkg/spanconfig/spanconfigkvaccessor/kvaccessor_test.go +++ b/pkg/spanconfig/spanconfigkvaccessor/kvaccessor_test.go @@ -33,7 +33,11 @@ func TestKVAccessor(t *testing.T) { defer leaktest.AfterTest(t)() ctx := context.Background() - tc := testcluster.StartTestCluster(t, 1, base.TestClusterArgs{}) + tc := testcluster.StartTestCluster(t, 1, base.TestClusterArgs{ + ServerArgs: base.TestServerArgs{ + EnableSpanConfigs: true, + }, + }) defer tc.Stopper().Stop(ctx) span := func(start, end string) roachpb.Span { @@ -72,6 +76,7 @@ func TestKVAccessor(t *testing.T) { const dummySpanConfigurationsFQN = "defaultdb.public.dummy_span_configurations" tdb := sqlutils.MakeSQLRunner(tc.ServerConn(0)) + tdb.Exec(t, `SET CLUSTER SETTING spanconfig.experimental_kvaccessor.enabled = true`) tdb.Exec(t, fmt.Sprintf("CREATE TABLE %s (LIKE system.span_configurations INCLUDING ALL)", dummySpanConfigurationsFQN)) accessor := spanconfigkvaccessor.New( tc.Server(0).DB(), diff --git a/pkg/spanconfig/spanconfigmanager/manager_test.go b/pkg/spanconfig/spanconfigmanager/manager_test.go index 2a365e4ebc61..4c684c4b356a 100644 --- a/pkg/spanconfig/spanconfigmanager/manager_test.go +++ b/pkg/spanconfig/spanconfigmanager/manager_test.go @@ -46,6 +46,7 @@ func TestManagerConcurrentJobCreation(t *testing.T) { ctx := context.Background() tc := testcluster.StartTestCluster(t, 1, base.TestClusterArgs{ ServerArgs: base.TestServerArgs{ + EnableSpanConfigs: true, Knobs: base.TestingKnobs{ SpanConfig: &spanconfig.TestingKnobs{ ManagerDisableJobCreation: true, // disable the automatic job creation @@ -136,6 +137,7 @@ func TestManagerStartsJobIfFailed(t *testing.T) { ctx := context.Background() tc := testcluster.StartTestCluster(t, 1, base.TestClusterArgs{ ServerArgs: base.TestServerArgs{ + EnableSpanConfigs: true, Knobs: base.TestingKnobs{ SpanConfig: &spanconfig.TestingKnobs{ ManagerDisableJobCreation: true, // disable the automatic job creation