From ae3184dba5ca593a4abf0080fdd46800a1571bd1 Mon Sep 17 00:00:00 2001 From: Faizan Qazi Date: Tue, 19 Nov 2024 19:46:08 +0000 Subject: [PATCH] upgrade: adding is_draining to system.sql_instance can fail Previously, the logic to add the is_draining column to the system.sql_instance descriptor would copy the bootstrap descriptor directly. While this works fine for non-multiregion system databases, this approach breaks for multi-region system databases. This is because bootstrap descriptors do not have multi-region modifications applied on top. To address this, this change modifies the upgrade to use ALTER TABLE ADD COLUMN. Fixes: #135736 Release note: None --- pkg/ccl/multiregionccl/BUILD.bazel | 1 + .../multiregion_system_table_test.go | 77 +++++++++++++++++++ .../v24_3_sql_instances_add_draining.go | 23 ++---- 3 files changed, 84 insertions(+), 17 deletions(-) diff --git a/pkg/ccl/multiregionccl/BUILD.bazel b/pkg/ccl/multiregionccl/BUILD.bazel index 8adf68475480..6d387ee2290f 100644 --- a/pkg/ccl/multiregionccl/BUILD.bazel +++ b/pkg/ccl/multiregionccl/BUILD.bazel @@ -51,6 +51,7 @@ go_test( "//pkg/ccl/multiregionccl/multiregionccltestutils", "//pkg/ccl/partitionccl", "//pkg/ccl/testutilsccl", + "//pkg/clusterversion", "//pkg/jobs", "//pkg/jobs/jobspb", "//pkg/keys", diff --git a/pkg/ccl/multiregionccl/multiregion_system_table_test.go b/pkg/ccl/multiregionccl/multiregion_system_table_test.go index 9a3f724f2e76..2a9600b93edf 100644 --- a/pkg/ccl/multiregionccl/multiregion_system_table_test.go +++ b/pkg/ccl/multiregionccl/multiregion_system_table_test.go @@ -15,8 +15,11 @@ import ( apd "github.com/cockroachdb/apd/v3" "github.com/cockroachdb/cockroach/pkg/base" "github.com/cockroachdb/cockroach/pkg/ccl/multiregionccl/multiregionccltestutils" + "github.com/cockroachdb/cockroach/pkg/clusterversion" "github.com/cockroachdb/cockroach/pkg/roachpb" + "github.com/cockroachdb/cockroach/pkg/server" "github.com/cockroachdb/cockroach/pkg/settings/cluster" + "github.com/cockroachdb/cockroach/pkg/sql" "github.com/cockroachdb/cockroach/pkg/sql/enum" "github.com/cockroachdb/cockroach/pkg/sql/sqlinstance/instancestorage" "github.com/cockroachdb/cockroach/pkg/sql/sqlliveness" @@ -551,3 +554,77 @@ func TestTenantStartupWithMultiRegionEnum(t *testing.T) { {"ALTER PARTITION \"us-east3\" OF INDEX system.public.lease@primary CONFIGURE ZONE USING\n\tnum_voters = 3,\n\tvoter_constraints = '[+region=us-east3]',\n\tlease_preferences = '[[+region=us-east3]]'"}, }) } + +func TestMrSystemDatabaseUpgrade(t *testing.T) { + defer leaktest.AfterTest(t)() + defer log.Scope(t).Close(t) + + ctx := context.Background() + + // Enable settings required for configuring a tenant's system database as multi-region. + makeSettings := func() *cluster.Settings { + cs := cluster.MakeTestingClusterSettingsWithVersions(clusterversion.Latest.Version(), + clusterversion.MinSupported.Version(), + false) + instancestorage.ReclaimLoopInterval.Override(ctx, &cs.SV, 150*time.Millisecond) + return cs + } + + cluster, _, cleanup := multiregionccltestutils.TestingCreateMultiRegionCluster(t, 3, + base.TestingKnobs{ + Server: &server.TestingKnobs{ + DisableAutomaticVersionUpgrade: make(chan struct{}), + ClusterVersionOverride: clusterversion.MinSupported.Version(), + }, + }, + multiregionccltestutils.WithSettings(makeSettings())) + defer cleanup() + id, err := roachpb.MakeTenantID(11) + require.NoError(t, err) + + // Disable license enforcement for this test. + for _, s := range cluster.Servers { + s.ExecutorConfig().(sql.ExecutorConfig).LicenseEnforcer.Disable(ctx) + } + + tenantArgs := base.TestTenantArgs{ + Settings: makeSettings(), + TestingKnobs: base.TestingKnobs{ + Server: &server.TestingKnobs{ + DisableAutomaticVersionUpgrade: make(chan struct{}), + ClusterVersionOverride: clusterversion.MinSupported.Version(), + }, + }, + TenantID: id, + Locality: cluster.Servers[0].Locality(), + } + appLayer, tenantSQL := serverutils.StartTenant(t, cluster.Servers[0], tenantArgs) + appLayer.ExecutorConfig().(sql.ExecutorConfig).LicenseEnforcer.Disable(ctx) + + tDB := sqlutils.MakeSQLRunner(tenantSQL) + + tDB.Exec(t, `ALTER DATABASE system SET PRIMARY REGION "us-east1"`) + tDB.Exec(t, `ALTER DATABASE system ADD REGION "us-east2"`) + tDB.Exec(t, `ALTER DATABASE system ADD REGION "us-east3"`) + tDB.Exec(t, `ALTER DATABASE defaultdb SET PRIMARY REGION "us-east1"`) + tDB.Exec(t, `ALTER DATABASE defaultdb ADD REGION "us-east2"`) + tDB.Exec(t, `ALTER DATABASE defaultdb ADD REGION "us-east3"`) + tDB.Exec(t, "ALTER DATABASE defaultdb SURVIVE REGION FAILURE") + + tDB.CheckQueryResults(t, "SELECT create_statement FROM [SHOW CREATE DATABASE system]", [][]string{ + {"CREATE DATABASE system PRIMARY REGION \"us-east1\" REGIONS = \"us-east1\", \"us-east2\", \"us-east3\" SURVIVE REGION FAILURE"}, + }) + + _, err = cluster.Conns[0].Exec("SET CLUSTER SETTING version = crdb_internal.node_executable_version();") + require.NoError(t, err) + tDB.Exec(t, "SET CLUSTER SETTING version = crdb_internal.node_executable_version();") + + tDB.CheckQueryResults(t, "SELECT create_statement FROM [SHOW CREATE DATABASE system]", [][]string{ + {"CREATE DATABASE system PRIMARY REGION \"us-east1\" REGIONS = \"us-east1\", \"us-east2\", \"us-east3\" SURVIVE REGION FAILURE"}, + }) + tDB.CheckQueryResults(t, "SELECT raw_config_sql FROM [SHOW ZONE CONFIGURATIONS] WHERE target LIKE 'PARTITION %lease%' ORDER BY target;", [][]string{ + {"ALTER PARTITION \"us-east1\" OF INDEX system.public.lease@primary CONFIGURE ZONE USING\n\tnum_voters = 3,\n\tvoter_constraints = '[+region=us-east1]',\n\tlease_preferences = '[[+region=us-east1]]'"}, + {"ALTER PARTITION \"us-east2\" OF INDEX system.public.lease@primary CONFIGURE ZONE USING\n\tnum_voters = 3,\n\tvoter_constraints = '[+region=us-east2]',\n\tlease_preferences = '[[+region=us-east2]]'"}, + {"ALTER PARTITION \"us-east3\" OF INDEX system.public.lease@primary CONFIGURE ZONE USING\n\tnum_voters = 3,\n\tvoter_constraints = '[+region=us-east3]',\n\tlease_preferences = '[[+region=us-east3]]'"}, + }) +} diff --git a/pkg/upgrade/upgrades/v24_3_sql_instances_add_draining.go b/pkg/upgrade/upgrades/v24_3_sql_instances_add_draining.go index d3a184468aa5..ba1fb2815b78 100644 --- a/pkg/upgrade/upgrades/v24_3_sql_instances_add_draining.go +++ b/pkg/upgrade/upgrades/v24_3_sql_instances_add_draining.go @@ -9,35 +9,24 @@ import ( "context" "github.com/cockroachdb/cockroach/pkg/clusterversion" - "github.com/cockroachdb/cockroach/pkg/sql/catalog/descpb" "github.com/cockroachdb/cockroach/pkg/sql/catalog/descs" - "github.com/cockroachdb/cockroach/pkg/sql/catalog/systemschema" "github.com/cockroachdb/cockroach/pkg/upgrade" - "github.com/cockroachdb/cockroach/pkg/util/protoutil" "github.com/cockroachdb/errors" ) // sqlInstancesAddDrainingMigration adds a new column `is_draining` to the // system.sql_instances table. func sqlInstancesAddDrainingMigration( - ctx context.Context, cs clusterversion.ClusterVersion, deps upgrade.TenantDeps, + ctx context.Context, _ clusterversion.ClusterVersion, deps upgrade.TenantDeps, ) error { - finalDescriptor := systemschema.SQLInstancesTable() - // Replace the stored descriptor with the bootstrap descriptor. + // Update the sql_instance table to add the is_draining column. err := deps.DB.DescsTxn(ctx, func(ctx context.Context, txn descs.Txn) error { - expectedDesc := finalDescriptor.TableDesc() - mutableDesc, err := txn.Descriptors().MutableByID(txn.KV()).Table(ctx, expectedDesc.GetID()) - if err != nil { - return err - } - version := mutableDesc.Version - mutableDesc.TableDescriptor = *protoutil.Clone(expectedDesc).(*descpb.TableDescriptor) - mutableDesc.Version = version - return txn.Descriptors().WriteDesc(ctx, false, mutableDesc, txn.KV()) + _, err := txn.Exec(ctx, "add-draining-column", txn.KV(), + `ALTER TABLE system.sql_instances ADD COLUMN IF NOT EXISTS is_draining BOOL NULL FAMILY "primary"`) + return err }) if err != nil { - return errors.Wrapf(err, "unable to replace system descriptor for system.%s (%+v)", - finalDescriptor.GetName(), finalDescriptor) + return errors.Wrapf(err, "unable to add column to system.sql_instances") } return err }