Skip to content

Commit

Permalink
roachtest: add backup-restore/small-ranges
Browse files Browse the repository at this point in the history
This patch adds a new backup-restore roachtest variant that reduces default
range size for user databases in the backup-restore/round-trip roachtest to
simulate a larger cluster, in terms of range count, at smaller data sizes. In
addition, the roachtest scales down a few cluster settings such that the ratio
of rangeSize/clusterSetting remains constant.

This patch should allow us to recreate a roachtest workload that can simulate
the conditions that lead to cockroachdb#109483 but at smaller data sizes.

Informs cockroachdb#109483

Release note: None
  • Loading branch information
msbutler committed Oct 19, 2023
1 parent 2ea8659 commit 1155107
Show file tree
Hide file tree
Showing 2 changed files with 126 additions and 25 deletions.
85 changes: 63 additions & 22 deletions pkg/cmd/roachtest/tests/backup_restore_roundtrip.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,29 +30,66 @@ import (
"github.com/cockroachdb/errors"
)

var (
// maxRangeSizeBytes defines the possible non default (default is 512 MiB) maximum range
// sizes that may get set for all user databases.
maxRangeSizeBytes = []int64{4 << 20 /* 4 MiB*/, 32 << 20 /* 32 MiB */, 128 << 20}

// SystemSettingsValuesBoundOnRangeSize defines the cluster settings that
// should scale in proportion to the range size. For example, if the range
// size is halved, all the values of these cluster settings should also be
// halved.
systemSettingsScaledOnRangeSize = []string{
"backup.restore_span.target_size",
"bulkio.backup.file_size",
"kv.bulk_sst.target_size",
}
)

const numFullBackups = 5

type roundTripSpecs struct {
name string
metamorphicRangeSize bool
}

func registerBackupRestoreRoundTrip(r registry.Registry) {
// backup-restore/round-trip tests that a round trip of creating a backup and
// restoring the created backup create the same objects.
r.Add(registry.TestSpec{
Name: "backup-restore/round-trip",
Timeout: 8 * time.Hour,
Owner: registry.OwnerDisasterRecovery,
Cluster: r.MakeClusterSpec(4),
EncryptionSupport: registry.EncryptionMetamorphic,
RequiresLicense: true,
CompatibleClouds: registry.AllExceptAWS,
Suites: registry.Suites(registry.Nightly),
Run: backupRestoreRoundTrip,
})

for _, sp := range []roundTripSpecs{
{
name: "backup-restore/round-trip",
metamorphicRangeSize: false,
},
{
name: "backup-restore/small-ranges",
metamorphicRangeSize: true,
},
} {
sp := sp
r.Add(registry.TestSpec{
Name: sp.name,
Timeout: 8 * time.Hour,
Owner: registry.OwnerDisasterRecovery,
Cluster: r.MakeClusterSpec(4),
EncryptionSupport: registry.EncryptionMetamorphic,
RequiresLicense: true,
CompatibleClouds: registry.AllExceptAWS,
Suites: registry.Suites(registry.Nightly),
Run: func(ctx context.Context, t test.Test, c cluster.Cluster) {
backupRestoreRoundTrip(ctx, t, c, sp.metamorphicRangeSize)
},
})
}
}

func backupRestoreRoundTrip(ctx context.Context, t test.Test, c cluster.Cluster) {
// backup-restore/round-trip tests that a round trip of creating a backup and
// restoring the created backup create the same objects.
func backupRestoreRoundTrip(
ctx context.Context, t test.Test, c cluster.Cluster, metamorphicRangeSize bool,
) {
if c.Spec().Cloud != spec.GCE {
t.Skip("uses gs://cockroachdb-backup-testing; see https://github.com/cockroachdb/cockroach/issues/105968")
}

pauseProbability := 0.2
roachNodes := c.Range(1, c.Spec().NodeCount-1)
workloadNode := c.Node(c.Spec().NodeCount)
Expand All @@ -62,7 +99,11 @@ func backupRestoreRoundTrip(ctx context.Context, t test.Test, c cluster.Cluster)
// Upload binaries and start cluster.
uploadVersion(ctx, t, c, c.All(), clusterupgrade.MainVersion)

c.Start(ctx, t.L(), option.DefaultStartOptsNoBackups(), install.MakeClusterSettings(install.SecureOption(true)), roachNodes)
envOption := install.EnvOption([]string{
"COCKROACH_MIN_RANGE_MAX_BYTES=1",
})

c.Start(ctx, t.L(), option.DefaultStartOptsNoBackups(), install.MakeClusterSettings(install.SecureOption(true), envOption), roachNodes)
m := c.NewMonitor(ctx, roachNodes)

m.Go(func(ctx context.Context) error {
Expand All @@ -77,28 +118,30 @@ func backupRestoreRoundTrip(ctx context.Context, t test.Test, c cluster.Cluster)
if err != nil {
return err
}

tables, err := testUtils.loadTablesForDBs(ctx, t.L(), testRNG, dbs...)
if err != nil {
return err
}

d, err := newBackupRestoreTestDriver(ctx, t, c, testUtils, roachNodes, dbs, tables)
if err != nil {
return err
}

if err := testUtils.setShortJobIntervals(ctx, testRNG); err != nil {
return err
}
if err := testUtils.setClusterSettings(ctx, t.L(), testRNG); err != nil {
return err
}

if metamorphicRangeSize {
if err := testUtils.setMaxRangeSizeAndDependentSettings(ctx, t, testRNG, dbs); err != nil {
return err
}
}
stopBackgroundCommands, err := runBackgroundWorkload()
if err != nil {
return err
}
defer stopBackgroundCommands()

for i := 0; i < numFullBackups; i++ {
allNodes := labeledNodes{Nodes: roachNodes, Version: clusterupgrade.MainVersion}
Expand Down Expand Up @@ -146,8 +189,6 @@ func backupRestoreRoundTrip(ctx context.Context, t test.Test, c cluster.Cluster)
}
}
}

stopBackgroundCommands()
return nil
})

Expand Down
66 changes: 63 additions & 3 deletions pkg/cmd/roachtest/tests/mixed_version_backup.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ import (
"github.com/cockroachdb/cockroach/pkg/roachprod/logger"
"github.com/cockroachdb/cockroach/pkg/testutils"
"github.com/cockroachdb/cockroach/pkg/testutils/jobutils"
"github.com/cockroachdb/cockroach/pkg/util/humanizeutil"
"github.com/cockroachdb/cockroach/pkg/util/protoutil"
"github.com/cockroachdb/cockroach/pkg/util/randutil"
"github.com/cockroachdb/cockroach/pkg/util/retry"
Expand Down Expand Up @@ -143,6 +144,14 @@ var (
"kv.bulk_io_write.max_rate": {"250MiB", "500MiB", "2TiB"},
"kv.bulk_sst.max_allowed_overage": {"16MiB", "256MiB"},
"kv.bulk_sst.target_size": {"4MiB", "64MiB", "128MiB"},
// The default is currently 384 MB, which was set to be about 75% of a
// range's worth of data. This configuration will reduce the size of this
// setting to test restore_span_covering correctness, at the cost of a
// performance dip.
//
// Note that a size of 0 indicates that target_size will not be used while
// constructing restore span entries.
"backup.restore_span.target_size": {"0 B", "4 MiB", "32 MiB", "128 MiB"},
}

systemSettingNames = func() []string {
Expand Down Expand Up @@ -175,8 +184,8 @@ var (

possibleNumIncrementalBackups = []int{
1,
3,
5,
//3,
//5,
}
)

Expand Down Expand Up @@ -1285,6 +1294,57 @@ func (u *CommonTestUtils) loadTablesForDBs(
return allTables, nil
}

// setMaxRangeSizeAndDependentSettings chooses a random default range size from
// maxRangeSize bytes and scales the cluster settings in
// systemSettingsScaledOnRangeSize such that rangeSize/settingValue remains the
// same.
func (u *CommonTestUtils) setMaxRangeSizeAndDependentSettings(
ctx context.Context, t test.Test, rng *rand.Rand, dbs []string,
) error {
const defaultRangeMinBytes = 1024
const defaultRangeSize int64 = 512 << 20

rangeSize := maxRangeSizeBytes[rng.Intn(len(maxRangeSizeBytes))]
t.L().Printf("Set max range rangeSize to %s", humanizeutil.IBytes(rangeSize))

scale := func(current int64) int64 {
// Dividing an int by a larger int returns 0, not a fraction. Thus, the
// scalar function depends on whether we're scaling up or down.
if rangeSize > defaultRangeSize {
return current * (rangeSize / defaultRangeSize)
} else {
return current / (defaultRangeSize / rangeSize)
}
}
for _, dbName := range dbs {
query := fmt.Sprintf("ALTER DATABASE %s CONFIGURE ZONE USING range_max_bytes=%d, range_min_bytes=%d",
dbName, rangeSize, defaultRangeMinBytes)
if err := u.Exec(ctx, rng, query); err != nil {
return err
}
}

for _, setting := range systemSettingsScaledOnRangeSize {
var humanizedCurrentValue string
if err := u.QueryRow(ctx, rng, fmt.Sprintf("SHOW CLUSTER SETTING %s", setting)).Scan(&humanizedCurrentValue); err != nil {
return err
}
currentValue, err := humanizeutil.ParseBytes(humanizedCurrentValue)
if err != nil {
return err
}
newValue := scale(currentValue)
t.L().Printf("setting cluster setting %s from %s to %s ", setting, humanizedCurrentValue, humanizeutil.IBytes(newValue))
stmt := fmt.Sprintf("SET CLUSTER SETTING %s = '%d'", setting, newValue)
if err := u.Exec(ctx, rng, stmt); err != nil {
return err
}
}
// Ensure ranges have been properly replicated.
_, dbConn := u.RandomDB(rng, u.roachNodes)
return WaitFor3XReplication(ctx, t, dbConn)
}

// setClusterSettings may set up to numCustomSettings cluster settings
// as defined in `systemSettingValues`. The system settings changed
// are logged. This function should be called *before* the upgrade
Expand Down Expand Up @@ -2409,7 +2469,7 @@ func tpccWorkloadCmd(
testRNG *rand.Rand, numWarehouses int, roachNodes option.NodeListOption,
) (init *roachtestutil.Command, run *roachtestutil.Command) {
init = roachtestutil.NewCommand("./cockroach workload init tpcc").
MaybeOption(testRNG.Intn(2) == 0, "families").
Option("families").
Arg("{pgurl%s}", roachNodes).
Flag("warehouses", numWarehouses)
run = roachtestutil.NewCommand("./cockroach workload run tpcc").
Expand Down

0 comments on commit 1155107

Please sign in to comment.