Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

roachtest: validate cluster when using fixtures and provide opt-out #105455

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,7 @@ require (
github.com/pierrre/geohash v1.0.0
github.com/pires/go-proxyproto v0.7.0
github.com/pkg/browser v0.0.0-20210115035449-ce105d075bb4
github.com/pkg/errors v0.9.1
github.com/pmezard/go-difflib v1.0.0
github.com/pressly/goose/v3 v3.5.3
github.com/prometheus/client_golang v1.12.1
Expand Down Expand Up @@ -361,7 +362,6 @@ require (
github.com/opencontainers/go-digest v1.0.0 // indirect
github.com/openzipkin/zipkin-go v0.2.5 // indirect
github.com/pelletier/go-toml v1.9.3 // indirect
github.com/pkg/errors v0.9.1 // indirect
github.com/pkg/profile v1.6.0 // indirect
github.com/power-devops/perfstat v0.0.0-20210106213030-5aafc221ea8c // indirect
github.com/pquerna/cachecontrol v0.0.0-20200921180117-858c6e7e6b7e // indirect
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,10 @@ func UploadVersion(
func InstallFixtures(
ctx context.Context, l *logger.Logger, c cluster.Cluster, nodes option.NodeListOption, v string,
) error {
c.Run(ctx, nodes, "mkdir -p {store-dir}")
if err := c.RunE(ctx, nodes, "mkdir -p {store-dir}"); err != nil {
return fmt.Errorf("creating store-dir: %w", err)
}

vv := version.MustParse("v" + v)
// The fixtures use cluster version (major.minor) but the input might be
// a patch release.
Expand All @@ -133,7 +136,10 @@ func InstallFixtures(
}
}
// Extract fixture. Fail if there's already an LSM in the store dir.
c.Run(ctx, nodes, "ls {store-dir}/marker.* 1> /dev/null 2>&1 && exit 1 || (cd {store-dir} && tar -xf fixture.tgz)")
if err := c.RunE(ctx, nodes, "ls {store-dir}/marker.* 1> /dev/null 2>&1 && exit 1 || (cd {store-dir} && tar -xf fixture.tgz)"); err != nil {
return fmt.Errorf("extracting fixtures: %w", err)
}

return nil
}

Expand Down
2 changes: 2 additions & 0 deletions pkg/cmd/roachtest/roachtestutil/mixedversion/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -23,12 +23,14 @@ go_library(
"//pkg/util/syncutil",
"//pkg/util/timeutil",
"//pkg/util/version",
"@com_github_pkg_errors//:errors",
],
)

go_test(
name = "mixedversion_test",
srcs = [
"mixedversion_test.go",
"planner_test.go",
"runner_test.go",
],
Expand Down
168 changes: 123 additions & 45 deletions pkg/cmd/roachtest/roachtestutil/mixedversion/mixedversion.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,22 +35,26 @@
//
// Typical usage:
//
// mvt, err := mixedversion.NewTest(...)
// mvt.InMixedVersion("test my feature", func(l *logger.Logger, rng *rand.Rand, h *mixedversion.Helper) error {
// l.Printf("testing feature X")
// node, db := h.RandomDB(rng, c.All())
// l.Printf("running query on node %d", node)
// _, err := db.ExecContext(ctx, "SELECT * FROM test")
// return err
// })
// mvt.InMixedVersion("test another feature", func(l *logger.Logger, rng *rand.Rand, h *mixedversion.Helper) error {
// l.Printf("testing feature Y")
// node, db := h.RandomDB(rng, c.All())
// l.Printf("running query on node %d", node)
// _, err := db.ExecContext(ctx, "SELECT * FROM test2")
// return err
// })
// mvt.Run()
// mvt, err := mixedversion.NewTest(...)
// mvt.InMixedVersion("test my feature", func(
// ctx context.Context, l *logger.Logger, rng *rand.Rand, h *mixedversion.Helper,
// ) error {
// l.Printf("testing feature X")
// node, db := h.RandomDB(rng, c.All())
// l.Printf("running query on node %d", node)
// _, err := db.ExecContext(ctx, "SELECT * FROM test")
// return err
// })
// mvt.InMixedVersion("test another feature", func(
// ctx context.Context, l *logger.Logger, rng *rand.Rand, h *mixedversion.Helper,
// ) error {
// l.Printf("testing feature Y")
// node, db := h.RandomDB(rng, c.All())
// l.Printf("running query on node %d", node)
// _, err := db.ExecContext(ctx, "SELECT * FROM test2")
// return err
// })
// mvt.Run()
//
// Functions passed to `InMixedVersion` will be called at arbitrary
// points during an upgrade/downgrade process. They may also be called
Expand Down Expand Up @@ -84,6 +88,7 @@ import (
"github.com/cockroachdb/cockroach/pkg/roachprod/logger"
"github.com/cockroachdb/cockroach/pkg/util/randutil"
"github.com/cockroachdb/cockroach/pkg/util/version"
"github.com/pkg/errors"
)

const (
Expand All @@ -101,6 +106,11 @@ const (
// finalized.
runWhileMigratingProbability = 0.5

// numNodesInFixtures is the number of nodes expected to exist in a
// cluster that can use the test fixtures in
// `pkg/cmd/roachtest/fixtures`.
numNodesInFixtures = 4

// CurrentCockroachPath is the path to the binary where the current
// version of cockroach being tested is located. This file is
// uploaded before any user functions are run. The primary use case
Expand All @@ -122,6 +132,12 @@ var (
defaultClusterSettings = []install.ClusterSettingOption{
install.SecureOption(true),
}

defaultTestOptions = testOptions{
// We use fixtures more often than not as they are more likely to
// detect bugs, especially in migrations.
useFixturesProbability: 0.7,
}
)

type (
Expand Down Expand Up @@ -214,6 +230,14 @@ type (
crdbNodes option.NodeListOption
}

// testOptions contains some options that can be changed by the user
// that expose some control over the generated test plan.
testOptions struct {
useFixturesProbability float64
}

customOption func(*testOptions)

// Test is the main struct callers of this package interact with.
Test struct {
ctx context.Context
Expand All @@ -222,6 +246,8 @@ type (
logger *logger.Logger
crdbNodes option.NodeListOption

options testOptions

rt test.Test
prng *rand.Rand
seed int64
Expand Down Expand Up @@ -252,6 +278,22 @@ type (
StopFunc func()
)

// NeverUseFixtures is an option that can be passed to `NewTest` to
// disable the use of fixtures in the test. Necessary if the test
// wants to use a number of cockroach nodes other than 4.
func NeverUseFixtures(opts *testOptions) {
opts.useFixturesProbability = 0
}

// AlwaysUseFixtures is an option that can be passed to `NewTest` to
// force the test to always start the cluster from the fixtures in
// `pkg/cmd/roachtest/fixtures`. Necessary if the test makes
// assertions that rely on the existence of data present in the
// fixtures.
func AlwaysUseFixtures(opts *testOptions) {
opts.useFixturesProbability = 1
}

// NewTest creates a Test struct that users can use to create and run
// a mixed-version roachtest.
func NewTest(
Expand All @@ -260,27 +302,37 @@ func NewTest(
l *logger.Logger,
c cluster.Cluster,
crdbNodes option.NodeListOption,
options ...customOption,
) *Test {
testLogger, err := prefixedLogger(l, logPrefix)
if err != nil {
t.Fatal(err)
}

opts := defaultTestOptions
for _, fn := range options {
fn(&opts)
}

prng, seed := randutil.NewLockedPseudoRand()
testLogger.Printf("mixed-version random seed: %d", seed)

testCtx, cancel := context.WithCancel(ctx)
return &Test{
test := &Test{
ctx: testCtx,
cancel: cancel,
cluster: c,
logger: testLogger,
crdbNodes: crdbNodes,
options: opts,
rt: t,
prng: prng,
seed: seed,
hooks: &testHooks{prng: prng, crdbNodes: crdbNodes},
}

assertValidTest(test, t.Fatal)
return test
}

// RNG returns the underlying random number generator used by the
Expand Down Expand Up @@ -433,6 +485,7 @@ func (t *Test) plan() (*TestPlan, error) {

planner := testPlanner{
initialVersion: previousRelease,
options: t.options,
rt: t.rt,
crdbNodes: t.crdbNodes,
hooks: t.hooks,
Expand All @@ -458,32 +511,47 @@ func (t *Test) runCommandFunc(nodes option.NodeListOption, cmd string) userFunc
}
}

// startFromCheckpointStep is the step that starts the cluster from a
// specific `version`, using checked-in fixtures.
type startFromCheckpointStep struct {
// installFixturesStep is the step that copies the fixtures from
// `pkg/cmd/roachtest/fixtures` for a specific version into the nodes'
// store dir.
type installFixturesStep struct {
id int
rt test.Test
version string
crdbNodes option.NodeListOption
}

func (s startFromCheckpointStep) ID() int { return s.id }
func (s startFromCheckpointStep) Background() shouldStop { return nil }
func (s installFixturesStep) ID() int { return s.id }
func (s installFixturesStep) Background() shouldStop { return nil }

func (s startFromCheckpointStep) Description() string {
return fmt.Sprintf("starting cluster from fixtures for version %q", s.version)
func (s installFixturesStep) Description() string {
return fmt.Sprintf("installing fixtures for version %q", s.version)
}

// Run will copy the fixtures to all database nodes in the cluster,
// upload the binary associated with that given version, and finally
// start the cockroach binary on these nodes.
func (s startFromCheckpointStep) Run(
ctx context.Context, l *logger.Logger, c cluster.Cluster, helper *Helper,
func (s installFixturesStep) Run(
ctx context.Context, l *logger.Logger, c cluster.Cluster, h *Helper,
) error {
if err := clusterupgrade.InstallFixtures(ctx, l, c, s.crdbNodes, s.version); err != nil {
return err
}
return clusterupgrade.InstallFixtures(ctx, l, c, s.crdbNodes, s.version)
}

// startStep is the step that starts the cluster from a specific
// `version`.
type startStep struct {
id int
rt test.Test
version string
crdbNodes option.NodeListOption
}

func (s startStep) ID() int { return s.id }
func (s startStep) Background() shouldStop { return nil }

func (s startStep) Description() string {
return fmt.Sprintf("starting cluster at version %q", s.version)
}

// Run uploads the binary associated with the given version and starts
// the cockroach binary on the nodes.
func (s startStep) Run(ctx context.Context, l *logger.Logger, c cluster.Cluster, h *Helper) error {
binaryPath, err := clusterupgrade.UploadVersion(ctx, s.rt, l, c, s.crdbNodes, s.version)
if err != nil {
return err
Expand Down Expand Up @@ -517,7 +585,7 @@ func (s uploadCurrentVersionStep) Description() string {
}

func (s uploadCurrentVersionStep) Run(
ctx context.Context, l *logger.Logger, c cluster.Cluster, helper *Helper,
ctx context.Context, l *logger.Logger, c cluster.Cluster, h *Helper,
) error {
_, err := clusterupgrade.UploadVersion(ctx, s.rt, l, c, s.crdbNodes, clusterupgrade.MainVersion)
if err != nil {
Expand Down Expand Up @@ -547,9 +615,9 @@ func (s waitForStableClusterVersionStep) Description() string {
}

func (s waitForStableClusterVersionStep) Run(
ctx context.Context, l *logger.Logger, c cluster.Cluster, helper *Helper,
ctx context.Context, l *logger.Logger, c cluster.Cluster, h *Helper,
) error {
return clusterupgrade.WaitForClusterUpgrade(ctx, l, s.nodes, helper.Connect)
return clusterupgrade.WaitForClusterUpgrade(ctx, l, s.nodes, h.Connect)
}

// preserveDowngradeOptionStep sets the `preserve_downgrade_option`
Expand All @@ -568,16 +636,16 @@ func (s preserveDowngradeOptionStep) Description() string {
}

func (s preserveDowngradeOptionStep) Run(
ctx context.Context, l *logger.Logger, c cluster.Cluster, helper *Helper,
ctx context.Context, l *logger.Logger, c cluster.Cluster, h *Helper,
) error {
node, db := helper.RandomDB(s.prng, s.crdbNodes)
node, db := h.RandomDB(s.prng, s.crdbNodes)
l.Printf("checking binary version (via node %d)", node)
bv, err := clusterupgrade.BinaryVersion(db)
if err != nil {
return err
}

node, db = helper.RandomDB(s.prng, s.crdbNodes)
node, db = h.RandomDB(s.prng, s.crdbNodes)
downgradeOption := bv.String()
l.Printf("setting `preserve_downgrade_option` to %s (via node %d)", downgradeOption, node)
_, err = db.ExecContext(ctx, "SET CLUSTER SETTING cluster.preserve_downgrade_option = $1", downgradeOption)
Expand All @@ -603,7 +671,7 @@ func (s restartWithNewBinaryStep) Description() string {
}

func (s restartWithNewBinaryStep) Run(
ctx context.Context, l *logger.Logger, c cluster.Cluster, helper *Helper,
ctx context.Context, l *logger.Logger, c cluster.Cluster, h *Helper,
) error {
return clusterupgrade.RestartNodesWithNewBinary(
ctx,
Expand Down Expand Up @@ -639,9 +707,9 @@ func (s finalizeUpgradeStep) Description() string {
}

func (s finalizeUpgradeStep) Run(
ctx context.Context, l *logger.Logger, c cluster.Cluster, helper *Helper,
ctx context.Context, l *logger.Logger, c cluster.Cluster, h *Helper,
) error {
node, db := helper.RandomDB(s.prng, s.crdbNodes)
node, db := h.RandomDB(s.prng, s.crdbNodes)
l.Printf("resetting preserve_downgrade_option (via node %d)", node)
_, err := db.ExecContext(ctx, "RESET CLUSTER SETTING cluster.preserve_downgrade_option")
return err
Expand All @@ -665,10 +733,10 @@ func (s runHookStep) Description() string {
}

func (s runHookStep) Run(
ctx context.Context, l *logger.Logger, c cluster.Cluster, helper *Helper,
ctx context.Context, l *logger.Logger, c cluster.Cluster, h *Helper,
) error {
helper.SetContext(&s.testContext)
return s.hook.fn(ctx, l, s.prng, helper)
h.SetContext(&s.testContext)
return s.hook.fn(ctx, l, s.prng, h)
}

// sequentialRunStep is a "meta-step" that indicates that a sequence
Expand Down Expand Up @@ -821,3 +889,13 @@ func rngFromRNG(rng *rand.Rand) *rand.Rand {
func versionMsg(version string) string {
return clusterupgrade.VersionMsg(version)
}

func assertValidTest(test *Test, fatalFunc func(...interface{})) {
if test.options.useFixturesProbability > 0 && len(test.crdbNodes) != numNodesInFixtures {
err := fmt.Errorf(
"invalid cluster: use of fixtures requires %d cockroach nodes, got %d (%v)",
numNodesInFixtures, len(test.crdbNodes), test.crdbNodes,
)
fatalFunc(errors.Wrap(err, "mixedversion.NewTest"))
}
}
Loading