Skip to content

Commit

Permalink
Merge #105455
Browse files Browse the repository at this point in the history
105455: roachtest: validate cluster when using fixtures and provide opt-out r=srosenberg a=renatolabs

This commit updates the `mixedversion` API with two main changes:

* cluster validation: previously, mixed-version tests would always start from checked-in fixtures. Those fixtures have an implied cluster topology and require the cluster to have 4 cockroach nodes -- if that's not the case, the cluster fails to start up with non-obvious error messages. To make this situation clearer, we now validate that the test is being run with the correct number of nodes.

* to allow for larger-scale mixed-version tests (more than 4 nodes), we now expose a `DisableFixtures` option that can be passed to `NewTest`. With this option, nodes start with an empty store directory, eliminating any restrictions on cluster size.

Epic: CRDB-19321

Release note: None

Co-authored-by: Renato Costa <[email protected]>
  • Loading branch information
craig[bot] and renatolabs committed Jul 4, 2023
2 parents 83bc207 + 2c3e868 commit c6a7828
Show file tree
Hide file tree
Showing 9 changed files with 276 additions and 117 deletions.
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,7 @@ require (
github.com/pierrre/geohash v1.0.0
github.com/pires/go-proxyproto v0.7.0
github.com/pkg/browser v0.0.0-20210115035449-ce105d075bb4
github.com/pkg/errors v0.9.1
github.com/pmezard/go-difflib v1.0.0
github.com/pressly/goose/v3 v3.5.3
github.com/prometheus/client_golang v1.12.1
Expand Down Expand Up @@ -361,7 +362,6 @@ require (
github.com/opencontainers/go-digest v1.0.0 // indirect
github.com/openzipkin/zipkin-go v0.2.5 // indirect
github.com/pelletier/go-toml v1.9.3 // indirect
github.com/pkg/errors v0.9.1 // indirect
github.com/pkg/profile v1.6.0 // indirect
github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c // indirect
github.com/pquerna/cachecontrol v0.0.0-20200921180117-858c6e7e6b7e // indirect
Expand Down
10 changes: 8 additions & 2 deletions pkg/cmd/roachtest/roachtestutil/clusterupgrade/clusterupgrade.go
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,10 @@ func UploadVersion(
func InstallFixtures(
ctx context.Context, l *logger.Logger, c cluster.Cluster, nodes option.NodeListOption, v string,
) error {
c.Run(ctx, nodes, "mkdir -p {store-dir}")
if err := c.RunE(ctx, nodes, "mkdir -p {store-dir}"); err != nil {
return fmt.Errorf("creating store-dir: %w", err)
}

vv := version.MustParse("v" + v)
// The fixtures use cluster version (major.minor) but the input might be
// a patch release.
Expand All @@ -133,7 +136,10 @@ func InstallFixtures(
}
}
// Extract fixture. Fail if there's already an LSM in the store dir.
c.Run(ctx, nodes, "ls {store-dir}/marker.* 1> /dev/null 2>&1 && exit 1 || (cd {store-dir} && tar -xf fixture.tgz)")
if err := c.RunE(ctx, nodes, "ls {store-dir}/marker.* 1> /dev/null 2>&1 && exit 1 || (cd {store-dir} && tar -xf fixture.tgz)"); err != nil {
return fmt.Errorf("extracting fixtures: %w", err)
}

return nil
}

Expand Down
2 changes: 2 additions & 0 deletions pkg/cmd/roachtest/roachtestutil/mixedversion/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,14 @@ go_library(
"//pkg/util/syncutil",
"//pkg/util/timeutil",
"//pkg/util/version",
"@com_github_pkg_errors//:errors",
],
)

go_test(
name = "mixedversion_test",
srcs = [
"mixedversion_test.go",
"planner_test.go",
"runner_test.go",
],
Expand Down
168 changes: 123 additions & 45 deletions pkg/cmd/roachtest/roachtestutil/mixedversion/mixedversion.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,22 +35,26 @@
//
// Typical usage:
//
// mvt, err := mixedversion.NewTest(...)
// mvt.InMixedVersion("test my feature", func(l *logger.Logger, rng *rand.Rand, h *mixedversion.Helper) error {
// l.Printf("testing feature X")
// node, db := h.RandomDB(rng, c.All())
// l.Printf("running query on node %d", node)
// _, err := db.ExecContext(ctx, "SELECT * FROM test")
// return err
// })
// mvt.InMixedVersion("test another feature", func(l *logger.Logger, rng *rand.Rand, h *mixedversion.Helper) error {
// l.Printf("testing feature Y")
// node, db := h.RandomDB(rng, c.All())
// l.Printf("running query on node %d", node)
// _, err := db.ExecContext(ctx, "SELECT * FROM test2")
// return err
// })
// mvt.Run()
// mvt, err := mixedversion.NewTest(...)
// mvt.InMixedVersion("test my feature", func(
// ctx context.Context, l *logger.Logger, rng *rand.Rand, h *mixedversion.Helper,
// ) error {
// l.Printf("testing feature X")
// node, db := h.RandomDB(rng, c.All())
// l.Printf("running query on node %d", node)
// _, err := db.ExecContext(ctx, "SELECT * FROM test")
// return err
// })
// mvt.InMixedVersion("test another feature", func(
// ctx context.Context, l *logger.Logger, rng *rand.Rand, h *mixedversion.Helper,
// ) error {
// l.Printf("testing feature Y")
// node, db := h.RandomDB(rng, c.All())
// l.Printf("running query on node %d", node)
// _, err := db.ExecContext(ctx, "SELECT * FROM test2")
// return err
// })
// mvt.Run()
//
// Functions passed to `InMixedVersion` will be called at arbitrary
// points during an upgrade/downgrade process. They may also be called
Expand Down Expand Up @@ -84,6 +88,7 @@ import (
"github.com/cockroachdb/cockroach/pkg/roachprod/logger"
"github.com/cockroachdb/cockroach/pkg/util/randutil"
"github.com/cockroachdb/cockroach/pkg/util/version"
"github.com/pkg/errors"
)

const (
Expand All @@ -101,6 +106,11 @@ const (
// finalized.
runWhileMigratingProbability = 0.5

// numNodesInFixtures is the number of nodes expected to exist in a
// cluster that can use the test fixtures in
// `pkg/cmd/roachtest/fixtures`.
numNodesInFixtures = 4

// CurrentCockroachPath is the path to the binary where the current
// version of cockroach being tested is located. This file is
// uploaded before any user functions are run. The primary use case
Expand All @@ -122,6 +132,12 @@ var (
defaultClusterSettings = []install.ClusterSettingOption{
install.SecureOption(true),
}

defaultTestOptions = testOptions{
// We use fixtures more often than not as they are more likely to
// detect bugs, especially in migrations.
useFixturesProbability: 0.7,
}
)

type (
Expand Down Expand Up @@ -214,6 +230,14 @@ type (
crdbNodes option.NodeListOption
}

// testOptions contains some options that can be changed by the user
// that expose some control over the generated test plan.
testOptions struct {
useFixturesProbability float64
}

customOption func(*testOptions)

// Test is the main struct callers of this package interact with.
Test struct {
ctx context.Context
Expand All @@ -222,6 +246,8 @@ type (
logger *logger.Logger
crdbNodes option.NodeListOption

options testOptions

rt test.Test
prng *rand.Rand
seed int64
Expand Down Expand Up @@ -252,6 +278,22 @@ type (
StopFunc func()
)

// NeverUseFixtures is an option that can be passed to `NewTest` to
// disable the use of fixtures in the test. Necessary if the test
// wants to use a number of cockroach nodes other than 4.
func NeverUseFixtures(opts *testOptions) {
opts.useFixturesProbability = 0
}

// AlwaysUseFixtures is an option that can be passed to `NewTest` to
// force the test to always start the cluster from the fixtures in
// `pkg/cmd/roachtest/fixtures`. Necessary if the test makes
// assertions that rely on the existence of data present in the
// fixtures.
func AlwaysUseFixtures(opts *testOptions) {
opts.useFixturesProbability = 1
}

// NewTest creates a Test struct that users can use to create and run
// a mixed-version roachtest.
func NewTest(
Expand All @@ -260,27 +302,37 @@ func NewTest(
l *logger.Logger,
c cluster.Cluster,
crdbNodes option.NodeListOption,
options ...customOption,
) *Test {
testLogger, err := prefixedLogger(l, logPrefix)
if err != nil {
t.Fatal(err)
}

opts := defaultTestOptions
for _, fn := range options {
fn(&opts)
}

prng, seed := randutil.NewLockedPseudoRand()
testLogger.Printf("mixed-version random seed: %d", seed)

testCtx, cancel := context.WithCancel(ctx)
return &Test{
test := &Test{
ctx: testCtx,
cancel: cancel,
cluster: c,
logger: testLogger,
crdbNodes: crdbNodes,
options: opts,
rt: t,
prng: prng,
seed: seed,
hooks: &testHooks{prng: prng, crdbNodes: crdbNodes},
}

assertValidTest(test, t.Fatal)
return test
}

// RNG returns the underlying random number generator used by the
Expand Down Expand Up @@ -433,6 +485,7 @@ func (t *Test) plan() (*TestPlan, error) {

planner := testPlanner{
initialVersion: previousRelease,
options: t.options,
rt: t.rt,
crdbNodes: t.crdbNodes,
hooks: t.hooks,
Expand All @@ -458,32 +511,47 @@ func (t *Test) runCommandFunc(nodes option.NodeListOption, cmd string) userFunc
}
}

// startFromCheckpointStep is the step that starts the cluster from a
// specific `version`, using checked-in fixtures.
type startFromCheckpointStep struct {
// installFixturesStep is the step that copies the fixtures from
// `pkg/cmd/roachtest/fixtures` for a specific version into the nodes'
// store dir.
type installFixturesStep struct {
id int
rt test.Test
version string
crdbNodes option.NodeListOption
}

func (s startFromCheckpointStep) ID() int { return s.id }
func (s startFromCheckpointStep) Background() shouldStop { return nil }
func (s installFixturesStep) ID() int { return s.id }
func (s installFixturesStep) Background() shouldStop { return nil }

func (s startFromCheckpointStep) Description() string {
return fmt.Sprintf("starting cluster from fixtures for version %q", s.version)
func (s installFixturesStep) Description() string {
return fmt.Sprintf("installing fixtures for version %q", s.version)
}

// Run will copy the fixtures to all database nodes in the cluster,
// upload the binary associated with that given version, and finally
// start the cockroach binary on these nodes.
func (s startFromCheckpointStep) Run(
ctx context.Context, l *logger.Logger, c cluster.Cluster, helper *Helper,
func (s installFixturesStep) Run(
ctx context.Context, l *logger.Logger, c cluster.Cluster, h *Helper,
) error {
if err := clusterupgrade.InstallFixtures(ctx, l, c, s.crdbNodes, s.version); err != nil {
return err
}
return clusterupgrade.InstallFixtures(ctx, l, c, s.crdbNodes, s.version)
}

// startStep is the step that starts the cluster from a specific
// `version`.
type startStep struct {
id int
rt test.Test
version string
crdbNodes option.NodeListOption
}

func (s startStep) ID() int { return s.id }
func (s startStep) Background() shouldStop { return nil }

func (s startStep) Description() string {
return fmt.Sprintf("starting cluster at version %q", s.version)
}

// Run uploads the binary associated with the given version and starts
// the cockroach binary on the nodes.
func (s startStep) Run(ctx context.Context, l *logger.Logger, c cluster.Cluster, h *Helper) error {
binaryPath, err := clusterupgrade.UploadVersion(ctx, s.rt, l, c, s.crdbNodes, s.version)
if err != nil {
return err
Expand Down Expand Up @@ -517,7 +585,7 @@ func (s uploadCurrentVersionStep) Description() string {
}

func (s uploadCurrentVersionStep) Run(
ctx context.Context, l *logger.Logger, c cluster.Cluster, helper *Helper,
ctx context.Context, l *logger.Logger, c cluster.Cluster, h *Helper,
) error {
_, err := clusterupgrade.UploadVersion(ctx, s.rt, l, c, s.crdbNodes, clusterupgrade.MainVersion)
if err != nil {
Expand Down Expand Up @@ -547,9 +615,9 @@ func (s waitForStableClusterVersionStep) Description() string {
}

func (s waitForStableClusterVersionStep) Run(
ctx context.Context, l *logger.Logger, c cluster.Cluster, helper *Helper,
ctx context.Context, l *logger.Logger, c cluster.Cluster, h *Helper,
) error {
return clusterupgrade.WaitForClusterUpgrade(ctx, l, s.nodes, helper.Connect)
return clusterupgrade.WaitForClusterUpgrade(ctx, l, s.nodes, h.Connect)
}

// preserveDowngradeOptionStep sets the `preserve_downgrade_option`
Expand All @@ -568,16 +636,16 @@ func (s preserveDowngradeOptionStep) Description() string {
}

func (s preserveDowngradeOptionStep) Run(
ctx context.Context, l *logger.Logger, c cluster.Cluster, helper *Helper,
ctx context.Context, l *logger.Logger, c cluster.Cluster, h *Helper,
) error {
node, db := helper.RandomDB(s.prng, s.crdbNodes)
node, db := h.RandomDB(s.prng, s.crdbNodes)
l.Printf("checking binary version (via node %d)", node)
bv, err := clusterupgrade.BinaryVersion(db)
if err != nil {
return err
}

node, db = helper.RandomDB(s.prng, s.crdbNodes)
node, db = h.RandomDB(s.prng, s.crdbNodes)
downgradeOption := bv.String()
l.Printf("setting `preserve_downgrade_option` to %s (via node %d)", downgradeOption, node)
_, err = db.ExecContext(ctx, "SET CLUSTER SETTING cluster.preserve_downgrade_option = $1", downgradeOption)
Expand All @@ -603,7 +671,7 @@ func (s restartWithNewBinaryStep) Description() string {
}

func (s restartWithNewBinaryStep) Run(
ctx context.Context, l *logger.Logger, c cluster.Cluster, helper *Helper,
ctx context.Context, l *logger.Logger, c cluster.Cluster, h *Helper,
) error {
return clusterupgrade.RestartNodesWithNewBinary(
ctx,
Expand Down Expand Up @@ -639,9 +707,9 @@ func (s finalizeUpgradeStep) Description() string {
}

func (s finalizeUpgradeStep) Run(
ctx context.Context, l *logger.Logger, c cluster.Cluster, helper *Helper,
ctx context.Context, l *logger.Logger, c cluster.Cluster, h *Helper,
) error {
node, db := helper.RandomDB(s.prng, s.crdbNodes)
node, db := h.RandomDB(s.prng, s.crdbNodes)
l.Printf("resetting preserve_downgrade_option (via node %d)", node)
_, err := db.ExecContext(ctx, "RESET CLUSTER SETTING cluster.preserve_downgrade_option")
return err
Expand All @@ -665,10 +733,10 @@ func (s runHookStep) Description() string {
}

func (s runHookStep) Run(
ctx context.Context, l *logger.Logger, c cluster.Cluster, helper *Helper,
ctx context.Context, l *logger.Logger, c cluster.Cluster, h *Helper,
) error {
helper.SetContext(&s.testContext)
return s.hook.fn(ctx, l, s.prng, helper)
h.SetContext(&s.testContext)
return s.hook.fn(ctx, l, s.prng, h)
}

// sequentialRunStep is a "meta-step" that indicates that a sequence
Expand Down Expand Up @@ -821,3 +889,13 @@ func rngFromRNG(rng *rand.Rand) *rand.Rand {
func versionMsg(version string) string {
return clusterupgrade.VersionMsg(version)
}

func assertValidTest(test *Test, fatalFunc func(...interface{})) {
if test.options.useFixturesProbability > 0 && len(test.crdbNodes) != numNodesInFixtures {
err := fmt.Errorf(
"invalid cluster: use of fixtures requires %d cockroach nodes, got %d (%v)",
numNodesInFixtures, len(test.crdbNodes), test.crdbNodes,
)
fatalFunc(errors.Wrap(err, "mixedversion.NewTest"))
}
}
Loading

0 comments on commit c6a7828

Please sign in to comment.