Skip to content

Commit

Permalink
multitenant: add SQL server startup guardrails
Browse files Browse the repository at this point in the history
This code change prevents a SQL server from starting if its
binary version is less than the tenant's active version.

It makes `RowDecoder.DecodeRow` thread safe by making it
accept a `*tree.DatumAlloc` and use a new alloc if its nil.

The check runs after we run permanent upgrades because
that's when the version setting is written to the settings table
of the system tenant (and we want to apply this check to both system
and secondary tenants). To guard against future changes to where we
set the version, if GetClusterVersionFromStorage doesn't find a value for
the version setting it will return an error because this means the
version is not set yet.

Release note: None
Epic: CRDB-10829
  • Loading branch information
healthy-pod committed Jan 30, 2023
1 parent 69dd453 commit d418a4c
Show file tree
Hide file tree
Showing 13 changed files with 257 additions and 20 deletions.
3 changes: 3 additions & 0 deletions pkg/ccl/serverccl/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ go_test(
"role_authentication_test.go",
"server_controller_test.go",
"server_sql_test.go",
"server_startup_guardrails_test.go",
"tenant_decommissioned_host_test.go",
"tenant_vars_test.go",
],
Expand All @@ -50,6 +51,7 @@ go_test(
"//pkg/ccl",
"//pkg/ccl/kvccl",
"//pkg/ccl/utilccl/licenseccl",
"//pkg/clusterversion",
"//pkg/kv/kvserver/liveness",
"//pkg/kv/kvserver/liveness/livenesspb",
"//pkg/roachpb",
Expand Down Expand Up @@ -88,6 +90,7 @@ go_test(
"@com_github_lib_pq//:pq",
"@com_github_prometheus_client_model//go",
"@com_github_prometheus_common//expfmt",
"@com_github_stretchr_testify//assert",
"@com_github_stretchr_testify//require",
"@org_golang_x_crypto//bcrypt",
],
Expand Down
149 changes: 149 additions & 0 deletions pkg/ccl/serverccl/server_startup_guardrails_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
// Copyright 2023 The Cockroach Authors.
//
// Licensed as a CockroachDB Enterprise file under the Cockroach Community
// License (the "License"); you may not use this file except in compliance with
// the License. You may obtain a copy of the License at
//
// https://github.com/cockroachdb/cockroach/blob/master/licenses/CCL.txt

package serverccl

import (
"context"
"fmt"
"testing"

"github.com/cockroachdb/cockroach/pkg/base"
"github.com/cockroachdb/cockroach/pkg/clusterversion"
"github.com/cockroachdb/cockroach/pkg/roachpb"
"github.com/cockroachdb/cockroach/pkg/server"
"github.com/cockroachdb/cockroach/pkg/settings/cluster"
"github.com/cockroachdb/cockroach/pkg/testutils"
"github.com/cockroachdb/cockroach/pkg/testutils/serverutils"
"github.com/cockroachdb/cockroach/pkg/util/leaktest"
"github.com/stretchr/testify/assert"
)

// TestServerStartupGuardrails ensures that a SQL server will fail to start if its binary
// version (TBV) is less than the tenant's logical version (TLV). The following will be used
// to reference the different versions hereafter:
//
// SBV: Storage Binary Version
// SLV: Storage Logical Version
// TBV: Tenant Binary Version
// TLV: Tenant Logical Version
func TestServerStartupGuardrails(t *testing.T) {
defer leaktest.AfterTest(t)()

v := func(major, minor int32) roachpb.Version {
return roachpb.Version{Major: major, Minor: minor}
}

tests := []struct {
storageBinaryVersion roachpb.Version
storageBinaryMinSupportedVersion roachpb.Version
tenantSecondServerBinaryVersion roachpb.Version
tenantSecondServerBinaryMinSupportedVersion roachpb.Version
expErrMatch string // empty if expecting a nil error
}{
// First test case ensures that a tenant server can start if the server binary version is
// not too low for the tenant logical version.
{
storageBinaryVersion: clusterversion.ByKey(clusterversion.BinaryVersionKey),
storageBinaryMinSupportedVersion: v(21+clusterversion.DevOffset, 1),
tenantSecondServerBinaryVersion: clusterversion.ByKey(clusterversion.BinaryVersionKey),
tenantSecondServerBinaryMinSupportedVersion: v(21+clusterversion.DevOffset, 1),
expErrMatch: "",
},
// Second test case ensures that a tenant server is prevented from starting if its binary
// version is too low for the current tenant logical version.
{
storageBinaryVersion: clusterversion.ByKey(clusterversion.BinaryVersionKey),
storageBinaryMinSupportedVersion: v(21+clusterversion.DevOffset, 1),
tenantSecondServerBinaryVersion: v(21+clusterversion.DevOffset, 1),
tenantSecondServerBinaryMinSupportedVersion: v(20+clusterversion.DevOffset, 2),
expErrMatch: fmt.Sprintf("preventing SQL server from starting because its binary version is too low for the tenant active version: "+
"server binary version = %v, tenant active version = %v",
v(21+clusterversion.DevOffset, 1).PrettyPrint(), clusterversion.ByKey(clusterversion.BinaryVersionKey).PrettyPrint()),
},
}

for i, test := range tests {
storageSettings := cluster.MakeTestingClusterSettingsWithVersions(
test.storageBinaryVersion,
test.storageBinaryMinSupportedVersion,
false, /* initializeVersion */
)

// The active version of this server will be equal to its binary version. We ensure this is
// true with an assertion below. This is needed because in some test cases we want to ensure
// the active version of this server is greater than the binary version of the tenant. By knowing
// that the Storage Binary Version is higher than the Tenant Binary Version and the Storage Logical
// Version is equal to the Storage Binary Version we can be sure that the Storage Logical Version
// is higher than the Tenant Binary Version.
s, _, _ := serverutils.StartServer(t, base.TestServerArgs{
// Disable the default test tenant, since we create one explicitly
// below.
DisableDefaultTestTenant: true,
Settings: storageSettings,
Knobs: base.TestingKnobs{
Server: &server.TestingKnobs{
DisableAutomaticVersionUpgrade: make(chan struct{}),
},
},
})

// Ensure that the Storage Logical Version is equal to the Storage Binary Version.
assert.True(t, s.ClusterSettings().Version.ActiveVersion(context.Background()).Version == test.storageBinaryVersion,
"invalid test state: SLV not equal to SBV")

tenantID := serverutils.TestTenantID()
// The TLV will be equal to the SBV because in this case we already confirmed that the SBV
// is equal to the SLV. Note that if the SLV is less than the SBV, the TLV will be equal
// to the storage minimum supported version. The storage minimum supported version referenced
// here depends on this binary actual minimum supported version as set in `sql.tenantCreationMinSupportedVersionKey`
// and is not the override value we use above. The override value for the storage minimum supported version
// is only passed to let this test possible with versions lower than the actual min supported version.
tenantFirstServer, err := s.StartTenant(context.Background(),
base.TestTenantArgs{
Settings: cluster.MakeTestingClusterSettings(),
TenantID: tenantID,
TestingKnobs: base.TestingKnobs{
Server: &server.TestingKnobs{
DisableAutomaticVersionUpgrade: make(chan struct{}),
},
},
})
if err != nil {
t.Fatal(err)
}

// Attempt to start a second server with version overrides.
tenantSecondServerSettings := cluster.MakeTestingClusterSettingsWithVersions(
test.tenantSecondServerBinaryVersion,
test.tenantSecondServerBinaryMinSupportedVersion,
true, /* initializeVersion */
)
tenantSecondServer, err := s.StartTenant(context.Background(),
base.TestTenantArgs{
Settings: tenantSecondServerSettings,
TenantID: tenantID,
TestingKnobs: base.TestingKnobs{
Server: &server.TestingKnobs{
DisableAutomaticVersionUpgrade: make(chan struct{}),
},
},
})

if !testutils.IsError(err, test.expErrMatch) {
t.Fatalf("test %d: got error %s, wanted error matching '%s'", i, err, test.expErrMatch)
}

tenantFirstServer.Stopper().Stop(context.Background())
// Only attempt to stop the second server if it was started successfully.
if err == nil {
tenantSecondServer.Stopper().Stop(context.Background())
}
s.Stopper().Stop(context.Background())
}
}
15 changes: 11 additions & 4 deletions pkg/clusterversion/cockroach_versions.go
Original file line number Diff line number Diff line change
Expand Up @@ -422,6 +422,9 @@ func (k Key) String() string {
// previously referenced a < 22.1 version until that check/gate can be removed.
const TODOPreV22_1 = V22_1

// Offset every version +1M major versions into the future if this is a dev branch.
const DevOffset = 1000000

// rawVersionsSingleton lists all historical versions here in chronological
// order, with comments describing what backwards-incompatible features were
// introduced.
Expand Down Expand Up @@ -750,7 +753,6 @@ var versionsSingleton = func() keyedVersions {
// which conceptually is actually back down to 2 -- then back to to 1000003,
// then on to 1000004, etc.
skipFirst := allowUpgradeToDev
const devOffset = 1000000
first := true
for i := range rawVersionsSingleton {
// VPrimordial versions are not offset; they don't matter for the logic
Expand All @@ -763,7 +765,7 @@ var versionsSingleton = func() keyedVersions {
first = false
continue
}
rawVersionsSingleton[i].Major += devOffset
rawVersionsSingleton[i].Major += DevOffset
}
}
return rawVersionsSingleton
Expand All @@ -777,6 +779,10 @@ var versionsSingleton = func() keyedVersions {
// simply need to check is that the cluster has upgraded to 23.1.
var V23_1 = versionsSingleton[len(versionsSingleton)-1].Key

const (
BinaryMinSupportedVersionKey = V22_1
)

// TODO(irfansharif): clusterversion.binary{,MinimumSupported}Version
// feels out of place. A "cluster version" and a "binary version" are two
// separate concepts.
Expand All @@ -786,12 +792,13 @@ var (
// version than binaryMinSupportedVersion, then the binary will exit with
// an error. This typically trails the current release by one (see top-level
// comment).
binaryMinSupportedVersion = ByKey(V22_1)
binaryMinSupportedVersion = ByKey(BinaryMinSupportedVersionKey)

BinaryVersionKey = V23_1
// binaryVersion is the version of this binary.
//
// This is the version that a new cluster will use when created.
binaryVersion = versionsSingleton[len(versionsSingleton)-1].Version
binaryVersion = ByKey(BinaryVersionKey)
)

func init() {
Expand Down
16 changes: 16 additions & 0 deletions pkg/server/server_sql.go
Original file line number Diff line number Diff line change
Expand Up @@ -1520,6 +1520,22 @@ func (s *SQLServer) preStart(

log.Infof(ctx, "done ensuring all necessary startup migrations have run")

// Prevent the server from starting if its binary version is too low
// for the current tenant cluster version.
// This check needs to run after the "version" setting is set in the
// "system.settings" table of this tenant. This includes both system
// and secondary tenants.
tenantActiveVersion, err := s.settingsWatcher.GetClusterVersionFromStorage(ctx, s.execCfg.DB.Get)
if err != nil {
return err
}
if s.execCfg.Settings.Version.BinaryVersion().Less(tenantActiveVersion.Version) {
return errors.WithHintf(errors.Newf("preventing SQL server from starting because its binary version "+
"is too low for the tenant active version: server binary version = %v, tenant active version = %v",
s.execCfg.Settings.Version.BinaryVersion(), tenantActiveVersion.Version),
"use a tenant binary whose version is at least %v", tenantActiveVersion.Version)
}

// Delete all orphaned table leases created by a prior instance of this
// node. This also uses SQL.
s.leaseMgr.DeleteOrphanedLeases(ctx, orphanedLeasesTimeThresholdNanos)
Expand Down
2 changes: 1 addition & 1 deletion pkg/server/settings_cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ func initializeCachedSettings(
) error {
dec := settingswatcher.MakeRowDecoder(codec)
for _, kv := range kvs {
settings, val, _, err := dec.DecodeRow(kv)
settings, val, _, err := dec.DecodeRow(kv, nil)
if err != nil {
return errors.Wrap(err, `while decoding settings data
-this likely indicates the settings table structure or encoding has been altered;
Expand Down
3 changes: 3 additions & 0 deletions pkg/server/settingswatcher/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ go_library(
deps = [
"//pkg/clusterversion",
"//pkg/keys",
"//pkg/kv",
"//pkg/kv/kvclient/rangefeed",
"//pkg/kv/kvclient/rangefeed/rangefeedbuffer",
"//pkg/kv/kvclient/rangefeed/rangefeedcache",
Expand All @@ -25,6 +26,7 @@ go_library(
"//pkg/sql/rowenc/valueside",
"//pkg/sql/sem/tree",
"//pkg/sql/types",
"//pkg/util/encoding",
"//pkg/util/hlc",
"//pkg/util/log",
"//pkg/util/protoutil",
Expand Down Expand Up @@ -58,6 +60,7 @@ go_test(
"//pkg/settings",
"//pkg/settings/cluster",
"//pkg/sql",
"//pkg/sql/sem/tree",
"//pkg/testutils",
"//pkg/testutils/serverutils",
"//pkg/testutils/sqlutils",
Expand Down
11 changes: 7 additions & 4 deletions pkg/server/settingswatcher/row_decoder.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,9 @@ import (
)

// RowDecoder decodes rows from the settings table.
// It is safe for concurrent use.
type RowDecoder struct {
codec keys.SQLCodec
alloc tree.DatumAlloc
columns []catalog.Column
decoder valueside.Decoder
}
Expand All @@ -45,17 +45,20 @@ func MakeRowDecoder(codec keys.SQLCodec) RowDecoder {
// present, the setting key will be returned but the value will be zero and the
// tombstone bool will be set.
func (d *RowDecoder) DecodeRow(
kv roachpb.KeyValue,
kv roachpb.KeyValue, alloc *tree.DatumAlloc,
) (setting string, val settings.EncodedValue, tombstone bool, _ error) {
// First we need to decode the setting name field from the index key.
if alloc == nil {
alloc = &tree.DatumAlloc{}
}
{
types := []*types.T{d.columns[0].GetType()}
nameRow := make([]rowenc.EncDatum, 1)
_, _, err := rowenc.DecodeIndexKey(d.codec, types, nameRow, nil, kv.Key)
if err != nil {
return "", settings.EncodedValue{}, false, errors.Wrap(err, "failed to decode key")
}
if err := nameRow[0].EnsureDecoded(types[0], &d.alloc); err != nil {
if err := nameRow[0].EnsureDecoded(types[0], alloc); err != nil {
return "", settings.EncodedValue{}, false, err
}
setting = string(tree.MustBeDString(nameRow[0].Datum))
Expand All @@ -70,7 +73,7 @@ func (d *RowDecoder) DecodeRow(
return "", settings.EncodedValue{}, false, err
}

datums, err := d.decoder.Decode(&d.alloc, bytes)
datums, err := d.decoder.Decode(alloc, bytes)
if err != nil {
return "", settings.EncodedValue{}, false, err
}
Expand Down
6 changes: 4 additions & 2 deletions pkg/server/settingswatcher/row_decoder_external_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import (
"github.com/cockroachdb/cockroach/pkg/keys"
"github.com/cockroachdb/cockroach/pkg/roachpb"
"github.com/cockroachdb/cockroach/pkg/server/settingswatcher"
"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
"github.com/cockroachdb/cockroach/pkg/testutils/sqlutils"
"github.com/cockroachdb/cockroach/pkg/testutils/testcluster"
"github.com/cockroachdb/cockroach/pkg/util/leaktest"
Expand Down Expand Up @@ -70,13 +71,14 @@ func TestRowDecoder(t *testing.T) {
rows, err := tc.Server(0).DB().Scan(ctx, k, k.PrefixEnd(), 0 /* maxRows */)
require.NoError(t, err)
dec := settingswatcher.MakeRowDecoder(keys.SystemSQLCodec)
var alloc *tree.DatumAlloc
for _, row := range rows {
kv := roachpb.KeyValue{
Key: row.Key,
Value: *row.Value,
}

k, val, tombstone, err := dec.DecodeRow(kv)
k, val, tombstone, err := dec.DecodeRow(kv, alloc)
require.NoError(t, err)
require.False(t, tombstone)
if exp, ok := toSet[k]; ok {
Expand All @@ -88,7 +90,7 @@ func TestRowDecoder(t *testing.T) {
// Test the tombstone logic while we're here.
{
kv.Value.Reset()
tombstoneK, val, tombstone, err := dec.DecodeRow(kv)
tombstoneK, val, tombstone, err := dec.DecodeRow(kv, alloc)
require.NoError(t, err)
require.True(t, tombstone)
require.Equal(t, k, tombstoneK)
Expand Down
Loading

0 comments on commit d418a4c

Please sign in to comment.