Skip to content

Commit

Permalink
Merge #112356
Browse files Browse the repository at this point in the history
112356: roachtest: add backup-restore/small-ranges  r=renatolabs a=msbutler

This patch adds a new backup-restore roachtest variant that reduces default
range size for user databases in the backup-restore/round-trip roachtest to
simulate a larger cluster, in terms of range count, at smaller data sizes. In
addition, the roachtest scales down a few cluster settings such that the ratio
of rangeSize/clusterSetting remains constant.

This patch should allow us to recreate a roachtest workload that can simulate
the conditions that lead to #109483 but at smaller data sizes.

Informs #109483

Release note: None

Co-authored-by: Michael Butler <[email protected]>
  • Loading branch information
craig[bot] and msbutler committed Oct 26, 2023
2 parents 40bcea4 + e8bd197 commit b13be53
Show file tree
Hide file tree
Showing 2 changed files with 120 additions and 21 deletions.
83 changes: 62 additions & 21 deletions pkg/cmd/roachtest/tests/backup_restore_roundtrip.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,29 +30,66 @@ import (
"github.com/cockroachdb/errors"
)

var (
// maxRangeSizeBytes defines the possible non default (default is 512 MiB) maximum range
// sizes that may get set for all user databases.
maxRangeSizeBytes = []int64{4 << 20 /* 4 MiB*/, 32 << 20 /* 32 MiB */, 128 << 20}

// SystemSettingsValuesBoundOnRangeSize defines the cluster settings that
// should scale in proportion to the range size. For example, if the range
// size is halved, all the values of these cluster settings should also be
// halved.
systemSettingsScaledOnRangeSize = []string{
"backup.restore_span.target_size",
"bulkio.backup.file_size",
"kv.bulk_sst.target_size",
}
)

const numFullBackups = 5

type roundTripSpecs struct {
name string
metamorphicRangeSize bool
}

func registerBackupRestoreRoundTrip(r registry.Registry) {
// backup-restore/round-trip tests that a round trip of creating a backup and
// restoring the created backup create the same objects.
r.Add(registry.TestSpec{
Name: "backup-restore/round-trip",
Timeout: 8 * time.Hour,
Owner: registry.OwnerDisasterRecovery,
Cluster: r.MakeClusterSpec(4),
EncryptionSupport: registry.EncryptionMetamorphic,
RequiresLicense: true,
CompatibleClouds: registry.AllExceptAWS,
Suites: registry.Suites(registry.Nightly),
Run: backupRestoreRoundTrip,
})

for _, sp := range []roundTripSpecs{
{
name: "backup-restore/round-trip",
metamorphicRangeSize: false,
},
{
name: "backup-restore/small-ranges",
metamorphicRangeSize: true,
},
} {
sp := sp
r.Add(registry.TestSpec{
Name: sp.name,
Timeout: 4 * time.Hour,
Owner: registry.OwnerDisasterRecovery,
Cluster: r.MakeClusterSpec(4),
EncryptionSupport: registry.EncryptionMetamorphic,
RequiresLicense: true,
CompatibleClouds: registry.AllExceptAWS,
Suites: registry.Suites(registry.Nightly),
Run: func(ctx context.Context, t test.Test, c cluster.Cluster) {
backupRestoreRoundTrip(ctx, t, c, sp.metamorphicRangeSize)
},
})
}
}

func backupRestoreRoundTrip(ctx context.Context, t test.Test, c cluster.Cluster) {
// backup-restore/round-trip tests that a round trip of creating a backup and
// restoring the created backup create the same objects.
func backupRestoreRoundTrip(
ctx context.Context, t test.Test, c cluster.Cluster, metamorphicRangeSize bool,
) {
if c.Spec().Cloud != spec.GCE {
t.Skip("uses gs://cockroachdb-backup-testing; see https://github.com/cockroachdb/cockroach/issues/105968")
}

pauseProbability := 0.2
roachNodes := c.Range(1, c.Spec().NodeCount-1)
workloadNode := c.Node(c.Spec().NodeCount)
Expand All @@ -62,7 +99,11 @@ func backupRestoreRoundTrip(ctx context.Context, t test.Test, c cluster.Cluster)
// Upload binaries and start cluster.
uploadVersion(ctx, t, c, c.All(), clusterupgrade.CurrentVersion())

c.Start(ctx, t.L(), option.DefaultStartOptsNoBackups(), install.MakeClusterSettings(install.SecureOption(true)), roachNodes)
envOption := install.EnvOption([]string{
"COCKROACH_MIN_RANGE_MAX_BYTES=1",
})

c.Start(ctx, t.L(), option.DefaultStartOptsNoBackups(), install.MakeClusterSettings(install.SecureOption(true), envOption), roachNodes)
m := c.NewMonitor(ctx, roachNodes)

m.Go(func(ctx context.Context) error {
Expand All @@ -77,24 +118,25 @@ func backupRestoreRoundTrip(ctx context.Context, t test.Test, c cluster.Cluster)
if err != nil {
return err
}

tables, err := testUtils.loadTablesForDBs(ctx, t.L(), testRNG, dbs...)
if err != nil {
return err
}

d, err := newBackupRestoreTestDriver(ctx, t, c, testUtils, roachNodes, dbs, tables)
if err != nil {
return err
}

if err := testUtils.setShortJobIntervals(ctx, testRNG); err != nil {
return err
}
if err := testUtils.setClusterSettings(ctx, t.L(), testRNG); err != nil {
return err
}

if metamorphicRangeSize {
if err := testUtils.setMaxRangeSizeAndDependentSettings(ctx, t, testRNG, dbs); err != nil {
return err
}
}
stopBackgroundCommands, err := runBackgroundWorkload()
if err != nil {
return err
Expand Down Expand Up @@ -146,7 +188,6 @@ func backupRestoreRoundTrip(ctx context.Context, t test.Test, c cluster.Cluster)
}
}
}

stopBackgroundCommands()
return nil
})
Expand Down
58 changes: 58 additions & 0 deletions pkg/cmd/roachtest/tests/mixed_version_backup.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ import (
"github.com/cockroachdb/cockroach/pkg/roachprod/logger"
"github.com/cockroachdb/cockroach/pkg/testutils"
"github.com/cockroachdb/cockroach/pkg/testutils/jobutils"
"github.com/cockroachdb/cockroach/pkg/util/humanizeutil"
"github.com/cockroachdb/cockroach/pkg/util/protoutil"
"github.com/cockroachdb/cockroach/pkg/util/randutil"
"github.com/cockroachdb/cockroach/pkg/util/retry"
Expand Down Expand Up @@ -143,6 +144,14 @@ var (
"kv.bulk_io_write.max_rate": {"250MiB", "500MiB", "2TiB"},
"kv.bulk_sst.max_allowed_overage": {"16MiB", "256MiB"},
"kv.bulk_sst.target_size": {"4MiB", "64MiB", "128MiB"},
// The default is currently 384 MB, which was set to be about 75% of a
// range's worth of data. This configuration will reduce the size of this
// setting to test restore_span_covering correctness, at the cost of a
// performance dip.
//
// Note that a size of 0 indicates that target_size will not be used while
// constructing restore span entries.
"backup.restore_span.target_size": {"0 B", "4 MiB", "32 MiB", "128 MiB"},
}

systemSettingNames = func() []string {
Expand Down Expand Up @@ -1285,6 +1294,53 @@ func (u *CommonTestUtils) loadTablesForDBs(
return allTables, nil
}

// setMaxRangeSizeAndDependentSettings chooses a random default range size from
// maxRangeSize bytes and scales the cluster settings in
// systemSettingsScaledOnRangeSize such that rangeSize/settingValue remains the
// same.
func (u *CommonTestUtils) setMaxRangeSizeAndDependentSettings(
ctx context.Context, t test.Test, rng *rand.Rand, dbs []string,
) error {
const defaultRangeMinBytes = 1024
const defaultRangeSize int64 = 512 << 20

rangeSize := maxRangeSizeBytes[rng.Intn(len(maxRangeSizeBytes))]
t.L().Printf("Set max range rangeSize to %s", humanizeutil.IBytes(rangeSize))

scale := func(current int64) int64 {
currentF := float64(current)
ratio := float64(rangeSize) / float64(defaultRangeSize)
return int64(currentF * ratio)
}
for _, dbName := range dbs {
query := fmt.Sprintf("ALTER DATABASE %s CONFIGURE ZONE USING range_max_bytes=%d, range_min_bytes=%d",
dbName, rangeSize, defaultRangeMinBytes)
if err := u.Exec(ctx, rng, query); err != nil {
return err
}
}

for _, setting := range systemSettingsScaledOnRangeSize {
var humanizedCurrentValue string
if err := u.QueryRow(ctx, rng, fmt.Sprintf("SHOW CLUSTER SETTING %s", setting)).Scan(&humanizedCurrentValue); err != nil {
return err
}
currentValue, err := humanizeutil.ParseBytes(humanizedCurrentValue)
if err != nil {
return err
}
newValue := scale(currentValue)
t.L().Printf("changing cluster setting %s from %s to %s", setting, humanizedCurrentValue, humanizeutil.IBytes(newValue))
stmt := fmt.Sprintf("SET CLUSTER SETTING %s = '%d'", setting, newValue)
if err := u.Exec(ctx, rng, stmt); err != nil {
return err
}
}
// Ensure ranges have been properly replicated.
_, dbConn := u.RandomDB(rng, u.roachNodes)
return WaitFor3XReplication(ctx, t, dbConn)
}

// setClusterSettings may set up to numCustomSettings cluster settings
// as defined in `systemSettingValues`. The system settings changed
// are logged. This function should be called *before* the upgrade
Expand Down Expand Up @@ -1573,6 +1629,7 @@ func (d *BackupRestoreTestDriver) computeTableContents(
return err
}
result[j] = contents
l.Printf("loaded contents for %s", table)
return nil
})
}
Expand Down Expand Up @@ -2131,6 +2188,7 @@ func (bc *backupCollection) verifyBackupCollection(
restoredContents, err := d.computeTableContents(
ctx, l, rng, restoredTables, bc.contents, "", /* timestamp */
)

if err != nil {
return fmt.Errorf("backup %s: error loading restored contents: %w", bc.name, err)
}
Expand Down

0 comments on commit b13be53

Please sign in to comment.