Skip to content

Commit

Permalink
multitenant: randomly run tests with tenants
Browse files Browse the repository at this point in the history
Previously, testServer would run by default in single-tenant mode.  This PR
changes testServer to run by default in multi-tenant mode, and runs all
statements through the default test tenant. Since we want the bulk of our
testing to remain in single-tenant mode, we only probabilistically run with
tenants (with probability 0.25). This commit also contains a few pieces of
cleanup which were necessary to make the testing changes (and were difficult to
split into a separate commit):

  - Change the "Existing" flag on tenant start to "DisableCreateTenant" and
    make the tenant start code more tolerant of existing tenants by checking
    to see if the tenant exists before trying to create it. The
    DisableCreateTenant flag is required for the cases where we want a test to
    fail due to the lack of a created tenant.
  - Changed the multi-region backup tests to use node 0 instead of node 1, to
    allow them to work within a tenant. Tenants aren't able to access remote
    nodes of nodelocal stoarge.
  - Cleaned up a couple of cases where our error handling was passing nil
    structs to be printed out.
  - Had to special case the setting of kv.range_merge.queue_enabled because
    it is only available to the system SQL server. As a result, all tests which
    check for its setting had to be modified to no longer check for it (since
    it will only be set in the case where we have no SQL server).

Additionally, we've created a tracking issue for all tests which currently
don't work under the default test tenant (cockroachdb#76378).

Release note: None
  • Loading branch information
ajstorm committed Feb 14, 2022
1 parent 5d50f4f commit bec90b3
Show file tree
Hide file tree
Showing 74 changed files with 775 additions and 174 deletions.
12 changes: 9 additions & 3 deletions pkg/base/test_server_args.go
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,11 @@ type TestServerArgs struct {
// TODO(irfansharif): Remove all uses of this when we rip out the system
// config span.
DisableSpanConfigs bool

// TestServer will probabilistically start a single SQL server on each
// node for multi-tenant testing, and default all connections to that
// SQL server. Use this flag to disable that behavior.
DisableDefaultSQLServer bool
}

// TestClusterArgs contains the parameters one can set when creating a test
Expand Down Expand Up @@ -244,9 +249,10 @@ const (
type TestTenantArgs struct {
TenantID roachpb.TenantID

// Existing, if true, indicates an existing tenant, rather than a new tenant
// to be created by StartTenant.
Existing bool
// DisableCreateTenant disables the explicit creation of a tenant when
// StartTenant is attempted. It's used in cases where we want to validate
// that a tenant doesn't start if it isn't existing.
DisableCreateTenant bool

// Settings allows the caller to control the settings object used for the
// tenant cluster.
Expand Down
78 changes: 64 additions & 14 deletions pkg/ccl/backupccl/backup_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,10 @@ func (d *datadrivenTestState) addServer(
var cleanup func()
params := base.TestClusterArgs{}
params.ServerArgs.ExternalIODirConfig = ioConf
// Unfortunately, we have to disable the default SQL server here because
// of numerous failures in the data driven test suite when it's enabled.
// This should be investigated. Tracked with #76378.
params.ServerArgs.DisableDefaultSQLServer = true
if tempCleanupFrequency != "" {
duration, err := time.ParseDuration(tempCleanupFrequency)
if err != nil {
Expand Down Expand Up @@ -1179,7 +1183,12 @@ func backupAndRestore(
t.Fatal(err)
}
}
args := base.TestServerArgs{ExternalIODir: tc.Servers[backupNodeID].ClusterSettings().ExternalIODir}
args := base.TestServerArgs{
ExternalIODir: tc.Servers[backupNodeID].ClusterSettings().ExternalIODir,
// Test fails when run under a SQL server. More investigation is
// required. Tracked with #76378.
DisableDefaultSQLServer: tc.Servers[backupNodeID].Cfg.DisableDefaultSQLServer,
}
tcRestore := testcluster.StartTestCluster(t, singleNode, base.TestClusterArgs{ServerArgs: args})
defer tcRestore.Stopper().Stop(ctx)
sqlDBRestore := sqlutils.MakeSQLRunner(tcRestore.Conns[0])
Expand Down Expand Up @@ -5776,7 +5785,9 @@ func TestBackupRestoreSequenceOwnership(t *testing.T) {
const numAccounts = 1
_, origDB, dir, cleanupFn := BackupRestoreTestSetup(t, singleNode, numAccounts, InitManualReplication)
defer cleanupFn()
args := base.TestServerArgs{ExternalIODir: dir}
// Test fails when run under a SQL server. More investigation is
// required. Tracked with #76378.
args := base.TestServerArgs{ExternalIODir: dir, DisableDefaultSQLServer: true}

// Setup for sequence ownership backup/restore tests in the same database.
backupLoc := LocalFoo + `/d`
Expand Down Expand Up @@ -6254,6 +6265,9 @@ func TestProtectedTimestampsDuringBackup(t *testing.T) {
defer dirCleanupFn()
params := base.TestClusterArgs{}
params.ServerArgs.ExternalIODir = dir
// This test hangs when run from a SQL server. More investigation is
// required. Tracked with #76378.
params.ServerArgs.DisableDefaultSQLServer = true
params.ServerArgs.Knobs.Store = &kvserver.StoreTestingKnobs{
TestingRequestFilter: func(ctx context.Context, ba roachpb.BatchRequest) *roachpb.Error {
for _, ru := range ba.Requests {
Expand Down Expand Up @@ -6721,6 +6735,9 @@ func TestRestoreErrorPropagates(t *testing.T) {
defer dirCleanupFn()
params := base.TestClusterArgs{}
params.ServerArgs.ExternalIODir = dir
// This test fails when run from a SQL server. More investigation is
// required. Tracked with #76378.
params.ServerArgs.DisableDefaultSQLServer = true
jobsTableKey := keys.SystemSQLCodec.TablePrefix(uint32(systemschema.JobsTable.GetID()))
var shouldFail, failures int64
params.ServerArgs.Knobs.Store = &kvserver.StoreTestingKnobs{
Expand Down Expand Up @@ -6772,6 +6789,9 @@ func TestProtectedTimestampsFailDueToLimits(t *testing.T) {
defer dirCleanupFn()
params := base.TestClusterArgs{}
params.ServerArgs.ExternalIODir = dir
// This test fails when run from a SQL server. More investigation is
// required. Tracked with #76378.
params.ServerArgs.DisableDefaultSQLServer = true
tc := testcluster.StartTestCluster(t, 1, params)
defer tc.Stopper().Stop(ctx)
db := tc.ServerConn(0)
Expand Down Expand Up @@ -7177,7 +7197,10 @@ func TestBackupRestoreTenant(t *testing.T) {
restoreTC := testcluster.StartTestCluster(
t, singleNode, base.TestClusterArgs{ServerArgs: base.TestServerArgs{
ExternalIODir: dir,
Knobs: base.TestingKnobs{JobsTestingKnobs: jobs.NewTestingKnobsWithShortIntervals()},
// This test fails when run from a SQL server as it assumes that
// it's not running in SQL server. Tracked with #76378.
DisableDefaultSQLServer: true,
Knobs: base.TestingKnobs{JobsTestingKnobs: jobs.NewTestingKnobsWithShortIntervals()},
}},
)
defer restoreTC.Stopper().Stop(ctx)
Expand All @@ -7198,7 +7221,7 @@ func TestBackupRestoreTenant(t *testing.T) {
ten10Stopper := stop.NewStopper()
_, restoreConn10 := serverutils.StartTenant(
t, restoreTC.Server(0), base.TestTenantArgs{
TenantID: roachpb.MakeTenantID(10), Existing: true, Stopper: ten10Stopper,
TenantID: roachpb.MakeTenantID(10), Stopper: ten10Stopper,
},
)
restoreTenant10 := sqlutils.MakeSQLRunner(restoreConn10)
Expand Down Expand Up @@ -7240,7 +7263,7 @@ func TestBackupRestoreTenant(t *testing.T) {
)

_, restoreConn10 = serverutils.StartTenant(
t, restoreTC.Server(0), base.TestTenantArgs{TenantID: roachpb.MakeTenantID(10), Existing: true},
t, restoreTC.Server(0), base.TestTenantArgs{TenantID: roachpb.MakeTenantID(10)},
)
defer restoreConn10.Close()
restoreTenant10 = sqlutils.MakeSQLRunner(restoreConn10)
Expand All @@ -7266,7 +7289,12 @@ func TestBackupRestoreTenant(t *testing.T) {

t.Run("restore-t10-from-cluster-backup", func(t *testing.T) {
restoreTC := testcluster.StartTestCluster(
t, singleNode, base.TestClusterArgs{ServerArgs: base.TestServerArgs{ExternalIODir: dir}},
t, singleNode, base.TestClusterArgs{ServerArgs: base.TestServerArgs{
ExternalIODir: dir,
// This test fails when run from a SQL server as it assumes that
// it's not running in SQL server. Tracked with #76378.
DisableDefaultSQLServer: true,
}},
)
defer restoreTC.Stopper().Stop(ctx)
restoreDB := sqlutils.MakeSQLRunner(restoreTC.Conns[0])
Expand All @@ -7279,7 +7307,7 @@ func TestBackupRestoreTenant(t *testing.T) {
)

_, restoreConn10 := serverutils.StartTenant(
t, restoreTC.Server(0), base.TestTenantArgs{TenantID: roachpb.MakeTenantID(10), Existing: true},
t, restoreTC.Server(0), base.TestTenantArgs{TenantID: roachpb.MakeTenantID(10)},
)
defer restoreConn10.Close()
restoreTenant10 := sqlutils.MakeSQLRunner(restoreConn10)
Expand All @@ -7290,7 +7318,12 @@ func TestBackupRestoreTenant(t *testing.T) {

t.Run("restore-all-from-cluster-backup", func(t *testing.T) {
restoreTC := testcluster.StartTestCluster(
t, singleNode, base.TestClusterArgs{ServerArgs: base.TestServerArgs{ExternalIODir: dir}},
t, singleNode, base.TestClusterArgs{ServerArgs: base.TestServerArgs{
ExternalIODir: dir,
// This test fails when run from a SQL server as it assumes that
// it's not running in SQL server. Tracked with #76378.
DisableDefaultSQLServer: true,
}},
)

defer restoreTC.Stopper().Stop(ctx)
Expand All @@ -7308,7 +7341,7 @@ func TestBackupRestoreTenant(t *testing.T) {
)

_, restoreConn10 := serverutils.StartTenant(
t, restoreTC.Server(0), base.TestTenantArgs{TenantID: roachpb.MakeTenantID(10), Existing: true},
t, restoreTC.Server(0), base.TestTenantArgs{TenantID: roachpb.MakeTenantID(10)},
)
defer restoreConn10.Close()
restoreTenant10 := sqlutils.MakeSQLRunner(restoreConn10)
Expand All @@ -7317,7 +7350,7 @@ func TestBackupRestoreTenant(t *testing.T) {
restoreTenant10.CheckQueryResults(t, `select * from foo.bar2`, tenant10.QueryStr(t, `select * from foo.bar2`))

_, restoreConn11 := serverutils.StartTenant(
t, restoreTC.Server(0), base.TestTenantArgs{TenantID: roachpb.MakeTenantID(11), Existing: true},
t, restoreTC.Server(0), base.TestTenantArgs{TenantID: roachpb.MakeTenantID(11)},
)
defer restoreConn11.Close()
restoreTenant11 := sqlutils.MakeSQLRunner(restoreConn11)
Expand All @@ -7327,15 +7360,20 @@ func TestBackupRestoreTenant(t *testing.T) {

t.Run("restore-tenant10-to-ts1", func(t *testing.T) {
restoreTC := testcluster.StartTestCluster(
t, singleNode, base.TestClusterArgs{ServerArgs: base.TestServerArgs{ExternalIODir: dir}},
t, singleNode, base.TestClusterArgs{ServerArgs: base.TestServerArgs{
ExternalIODir: dir,
// This test fails when run from a SQL server as it assumes that
// it's not running in SQL server. Tracked with #76378.
DisableDefaultSQLServer: true,
}},
)
defer restoreTC.Stopper().Stop(ctx)
restoreDB := sqlutils.MakeSQLRunner(restoreTC.Conns[0])

restoreDB.Exec(t, `RESTORE TENANT 10 FROM 'nodelocal://1/t10' AS OF SYSTEM TIME `+ts1)

_, restoreConn10 := serverutils.StartTenant(
t, restoreTC.Server(0), base.TestTenantArgs{TenantID: roachpb.MakeTenantID(10), Existing: true},
t, restoreTC.Server(0), base.TestTenantArgs{TenantID: roachpb.MakeTenantID(10)},
)
defer restoreConn10.Close()
restoreTenant10 := sqlutils.MakeSQLRunner(restoreConn10)
Expand All @@ -7345,15 +7383,20 @@ func TestBackupRestoreTenant(t *testing.T) {

t.Run("restore-tenant20-to-latest", func(t *testing.T) {
restoreTC := testcluster.StartTestCluster(
t, singleNode, base.TestClusterArgs{ServerArgs: base.TestServerArgs{ExternalIODir: dir}},
t, singleNode, base.TestClusterArgs{ServerArgs: base.TestServerArgs{
ExternalIODir: dir,
// This test fails when run from a SQL server as it assumes that
// it's not running in SQL server. Tracked with #76378.
DisableDefaultSQLServer: true,
}},
)
defer restoreTC.Stopper().Stop(ctx)
restoreDB := sqlutils.MakeSQLRunner(restoreTC.Conns[0])

restoreDB.Exec(t, `RESTORE TENANT 20 FROM 'nodelocal://1/t20'`)

_, restoreConn20 := serverutils.StartTenant(
t, restoreTC.Server(0), base.TestTenantArgs{TenantID: roachpb.MakeTenantID(20), Existing: true},
t, restoreTC.Server(0), base.TestTenantArgs{TenantID: roachpb.MakeTenantID(20)},
)
defer restoreConn20.Close()
restoreTenant20 := sqlutils.MakeSQLRunner(restoreConn20)
Expand Down Expand Up @@ -8262,6 +8305,8 @@ func TestFullClusterTemporaryBackupAndRestore(t *testing.T) {
defer dirCleanupFn()
params := base.TestClusterArgs{}
params.ServerArgs.ExternalIODir = dir
// This test fails when run from a SQL server. Tracked with #76378.
params.ServerArgs.DisableDefaultSQLServer = true
params.ServerArgs.UseDatabase = "defaultdb"
params.ServerArgs.Settings = settings
knobs := base.TestingKnobs{
Expand Down Expand Up @@ -9024,6 +9069,11 @@ func TestGCDropIndexSpanExpansion(t *testing.T) {
ctx := context.Background()
tc := testcluster.StartTestCluster(t, 1, base.TestClusterArgs{ServerArgs: base.TestServerArgs{
ExternalIODir: baseDir,
// This test hangs when run from a SQL server. It's likely that
// the cause of the hang is the fact that we're waiting on the GC to
// complete, and we don't have visibility into the GC completing from
// the tenant. More investigation is required. Tracked with #76378.
DisableDefaultSQLServer: true,
Knobs: base.TestingKnobs{
GCJob: &sql.GCJobTestingKnobs{RunBeforePerformGC: func(id jobspb.JobID) error {
gcJobID = id
Expand Down
3 changes: 3 additions & 0 deletions pkg/ccl/backupccl/create_scheduled_backup_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,9 @@ func newTestHelper(t *testing.T) (*testHelper, func()) {

args := base.TestServerArgs{
ExternalIODir: dir,
// Some scheduled backup tests fail when run from a SQL server. More
// investigation is required. Tracked with #76378.
DisableDefaultSQLServer: true,
Knobs: base.TestingKnobs{
JobsTestingKnobs: knobs,
},
Expand Down
10 changes: 8 additions & 2 deletions pkg/ccl/backupccl/full_cluster_backup_restore_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1056,7 +1056,10 @@ func TestRestoreWithRecreatedDefaultDB(t *testing.T) {
defer log.Scope(t).Close(t)

sqlDB, tempDir, cleanupFn := createEmptyCluster(t, singleNode)
_, sqlDBRestore, cleanupEmptyCluster := backupRestoreTestSetupEmpty(t, singleNode, tempDir, InitManualReplication, base.TestClusterArgs{})
_, sqlDBRestore, cleanupEmptyCluster := backupRestoreTestSetupEmpty(t, singleNode, tempDir, InitManualReplication,
// Disabling the default SQL server due to test failures. More
// investigation is required. Tracked with #76378.
base.TestClusterArgs{ServerArgs: base.TestServerArgs{DisableDefaultSQLServer: true}})
defer cleanupFn()
defer cleanupEmptyCluster()

Expand All @@ -1081,7 +1084,10 @@ func TestRestoreWithDroppedDefaultDB(t *testing.T) {
defer log.Scope(t).Close(t)

sqlDB, tempDir, cleanupFn := createEmptyCluster(t, singleNode)
_, sqlDBRestore, cleanupEmptyCluster := backupRestoreTestSetupEmpty(t, singleNode, tempDir, InitManualReplication, base.TestClusterArgs{})
_, sqlDBRestore, cleanupEmptyCluster := backupRestoreTestSetupEmpty(t, singleNode, tempDir, InitManualReplication,
// Disabling the default SQL server due to test failures. More
// investigation is required. Tracked with #76378.
base.TestClusterArgs{ServerArgs: base.TestServerArgs{DisableDefaultSQLServer: true}})
defer cleanupFn()
defer cleanupEmptyCluster()

Expand Down
17 changes: 17 additions & 0 deletions pkg/ccl/backupccl/helpers_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,11 +60,20 @@ func backupRestoreTestSetupWithParams(
dir, dirCleanupFn := testutils.TempDir(t)
params.ServerArgs.ExternalIODir = dir
params.ServerArgs.UseDatabase = "data"
// Need to disable the SQL server here. Below we're creating a database
// which gets used in various ways in different tests. One way it's used
// is to fetch the database's descriptor using TestingGetTableDescriptor
// which currently isn't multi-tenant enabled. The end result is that we
// can't find the created database and the test fails. Long term we should
// change TestingGetTableDescriptor so that it's multi-tenant enabled.
// Tracked with #76378.
params.ServerArgs.DisableDefaultSQLServer = true
if len(params.ServerArgsPerNode) > 0 {
for i := range params.ServerArgsPerNode {
param := params.ServerArgsPerNode[i]
param.ExternalIODir = dir
param.UseDatabase = "data"
param.DisableDefaultSQLServer = true
params.ServerArgsPerNode[i] = param
}
}
Expand Down Expand Up @@ -184,11 +193,16 @@ func backupRestoreTestSetupEmptyWithParams(
) (tc *testcluster.TestCluster, sqlDB *sqlutils.SQLRunner, cleanup func()) {
ctx := logtags.AddTag(context.Background(), "backup-restore-test-setup-empty", nil)

// Need to disable SQL server here. Much of the backup/restore tests
// perform validation of the restore by checking in the ranges directly.
// This is not supported from within a SQL server. Tracked with #76378.
params.ServerArgs.DisableDefaultSQLServer = true
params.ServerArgs.ExternalIODir = dir
if len(params.ServerArgsPerNode) > 0 {
for i := range params.ServerArgsPerNode {
param := params.ServerArgsPerNode[i]
param.ExternalIODir = dir
param.DisableDefaultSQLServer = true
params.ServerArgsPerNode[i] = param
}
}
Expand All @@ -212,6 +226,9 @@ func createEmptyCluster(
dir, dirCleanupFn := testutils.TempDir(t)
params := base.TestClusterArgs{}
params.ServerArgs.ExternalIODir = dir
// Disabling SQL server due to test failures. More investigation is
// required. Tracked with #76378.
params.ServerArgs.DisableDefaultSQLServer = true
tc := testcluster.StartTestCluster(t, clusterSize, params)

sqlDB = sqlutils.MakeSQLRunner(tc.Conns[0])
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,10 @@ func TestInsertMissingPublicSchemaNamespaceEntry(t *testing.T) {
defer cleanup()
tc := testcluster.StartTestCluster(t, 3, base.TestClusterArgs{
ServerArgs: base.TestServerArgs{
ExternalIODir: dir,
// Disabling the default SQL server due to test failures. More
// investigation is required. Tracked with #76378.
DisableDefaultSQLServer: true,
ExternalIODir: dir,
Knobs: base.TestingKnobs{
Server: &server.TestingKnobs{
DisableAutomaticVersionUpgrade: 1,
Expand Down
7 changes: 6 additions & 1 deletion pkg/ccl/backupccl/restore_mid_schema_change_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,12 @@ func restoreMidSchemaChange(
params := base.TestClusterArgs{
ServerArgs: base.TestServerArgs{
ExternalIODir: dir,
Knobs: base.TestingKnobs{JobsTestingKnobs: jobs.NewTestingKnobsWithShortIntervals()},
// This test fails when run with the default SQL server because
// it relies on TestingGetTableDescriptor which isn't supported
// in multi-tenancy. More work is required here. Tracked with
// #76378.
DisableDefaultSQLServer: true,
Knobs: base.TestingKnobs{JobsTestingKnobs: jobs.NewTestingKnobsWithShortIntervals()},
},
}
tc := testcluster.StartTestCluster(t, singleNode, params)
Expand Down
Loading

0 comments on commit bec90b3

Please sign in to comment.