Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
107756: roachtest: make path to default cockroach public r=srosenberg a=renatolabs

The roachtest test runner already uploads the `cockroach` binary passed with `--cockroach` to every node in the cluster. By making the path to that file public, we allow tests to use that binary, stopping avoidable uploads.

Concretely, this commit removes a step from `mixedversion` tests where the same logic of uploading the current binary took place. Tests can use the new constant in the `test` package instead.

Epic: none

Release note: None

108082: sql: remove slow functions from TestRandomSyntaxFunctions r=fqazi a=fqazi

Previously, we could easily timeout running slower,
functions in the random syntax functions test. We
attempted to minimize this risk with resettable timeouts
which helped, but libpq has limited support for cancellation,
so we need to fully pull these out. To address this,
this patch will remove:
crdb_internal.revalidate_unique_constraints_in_all_tables
and crdb_internal.validate_ttl_scheduled_jobs from testing.

Fixes: #107929
Release note: None

108118: kv: deflake TestFollowerReadsWithStaleDescriptor r=nvanbenschoten a=nvanbenschoten

Fixes #108087.

This fix avoids a data race in the test. The race was harmless, but could cause the test to fail when run with the race detector.

Release note: None

Co-authored-by: Renato Costa <[email protected]>
Co-authored-by: Faizan Qazi <[email protected]>
Co-authored-by: Nathan VanBenschoten <[email protected]>
  • Loading branch information
4 people committed Aug 3, 2023
4 parents 76b5348 + 97dba4f + 7a97af5 + 6db50f7 commit dc28e19
Show file tree
Hide file tree
Showing 8 changed files with 66 additions and 86 deletions.
14 changes: 8 additions & 6 deletions pkg/ccl/kvccl/kvfollowerreadsccl/followerreads_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import (
"fmt"
"math"
"strings"
"sync/atomic"
"testing"
"time"

Expand Down Expand Up @@ -697,7 +698,8 @@ func TestFollowerReadsWithStaleDescriptor(t *testing.T) {
// The test uses follower_read_timestamp().
defer utilccl.TestingEnableEnterprise()()

historicalQuery := `SELECT * FROM test AS OF SYSTEM TIME follower_read_timestamp() WHERE k=2`
var historicalQuery atomic.Value
historicalQuery.Store(`SELECT * FROM test AS OF SYSTEM TIME follower_read_timestamp() WHERE k=2`)
recCh := make(chan tracingpb.Recording, 1)

tc := testcluster.StartTestCluster(t, 4,
Expand Down Expand Up @@ -732,7 +734,7 @@ func TestFollowerReadsWithStaleDescriptor(t *testing.T) {
},
SQLExecutor: &sql.ExecutorTestingKnobs{
WithStatementTrace: func(trace tracingpb.Recording, stmt string) {
if stmt == historicalQuery {
if stmt == historicalQuery.Load().(string) {
recCh <- trace
}
},
Expand Down Expand Up @@ -786,7 +788,7 @@ func TestFollowerReadsWithStaleDescriptor(t *testing.T) {
// not be executed as a follower read since it attempts to use n2 which
// doesn't have a replica any more and then it tries n1 which returns an
// updated descriptor.
n4.Exec(t, historicalQuery)
n4.Exec(t, historicalQuery.Load().(string))
// As a sanity check, verify that this was not a follower read.
rec := <-recCh
require.False(t, kv.OnlyFollowerReads(rec), "query was served through follower reads: %s", rec)
Expand All @@ -812,7 +814,7 @@ func TestFollowerReadsWithStaleDescriptor(t *testing.T) {
// Run a historical query and assert that it's served from the follower (n3).
// n4 should attempt to route to n3 because we pretend n3 has a lower latency
// (see testing knob).
n4.Exec(t, historicalQuery)
n4.Exec(t, historicalQuery.Load().(string))
rec = <-recCh

// Look at the trace and check that we've served a follower read.
Expand Down Expand Up @@ -855,8 +857,8 @@ func TestFollowerReadsWithStaleDescriptor(t *testing.T) {
// the ReplicaInfo twice for the same range. This allows us to verify that
// the cached - in the spanResolverIterator - information is correctly
// preserved.
historicalQuery = `SELECT * FROM [SELECT * FROM test WHERE k=2 UNION ALL SELECT * FROM test WHERE k=3] AS OF SYSTEM TIME follower_read_timestamp()`
n4.Exec(t, historicalQuery)
historicalQuery.Store(`SELECT * FROM [SELECT * FROM test WHERE k=2 UNION ALL SELECT * FROM test WHERE k=3] AS OF SYSTEM TIME follower_read_timestamp()`)
n4.Exec(t, historicalQuery.Load().(string))
rec = <-recCh

// Sanity check that the plan was distributed.
Expand Down
13 changes: 6 additions & 7 deletions pkg/cmd/roachtest/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,6 @@ const (
defaultEncryptionProbability = 1
defaultFIPSProbability = 0
defaultARM64Probability = 0
defaultCockroachPath = "./cockroach-default"
)

type errBinaryOrLibraryNotFound struct {
Expand Down Expand Up @@ -1347,7 +1346,7 @@ func (c *clusterImpl) FetchLogs(ctx context.Context, l *logger.Logger) error {
}
}

if err := c.RunE(ctx, c.All(), fmt.Sprintf("mkdir -p logs/redacted && %s debug merge-logs --redact logs/*.log > logs/redacted/combined.log", defaultCockroachPath)); err != nil {
if err := c.RunE(ctx, c.All(), fmt.Sprintf("mkdir -p logs/redacted && %s debug merge-logs --redact logs/*.log > logs/redacted/combined.log", test.DefaultCockroachPath)); err != nil {
l.Printf("failed to redact logs: %v", err)
if ctx.Err() != nil {
return err
Expand Down Expand Up @@ -1428,7 +1427,7 @@ func (c *clusterImpl) FetchTimeseriesData(ctx context.Context, l *logger.Logger)
sec = fmt.Sprintf("--certs-dir=%s", certs)
}
if err := c.RunE(
ctx, c.Node(node), fmt.Sprintf("%s debug tsdump %s --format=raw > tsdump.gob", defaultCockroachPath, sec),
ctx, c.Node(node), fmt.Sprintf("%s debug tsdump %s --format=raw > tsdump.gob", test.DefaultCockroachPath, sec),
); err != nil {
return err
}
Expand Down Expand Up @@ -1499,15 +1498,15 @@ func (c *clusterImpl) FetchDebugZip(ctx context.Context, l *logger.Logger, dest
// Ignore the files in the the log directory; we pull the logs separately anyway
// so this would only cause duplication.
excludeFiles := "*.log,*.txt,*.pprof"
cmd := roachtestutil.NewCommand("%s debug zip", defaultCockroachPath).
cmd := roachtestutil.NewCommand("%s debug zip", test.DefaultCockroachPath).
Option("include-range-info").
Flag("exclude-files", fmt.Sprintf("'%s'", excludeFiles)).
Flag("url", fmt.Sprintf("{pgurl:%d}", i)).
MaybeFlag(c.IsSecure(), "certs-dir", "certs").
Arg(zipName).
String()
if err := c.RunE(ctx, c.Node(i), cmd); err != nil {
l.Printf("%s debug zip failed on node %d: %v", defaultCockroachPath, i, err)
l.Printf("%s debug zip failed on node %d: %v", test.DefaultCockroachPath, i, err)
if i < c.spec.NodeCount {
continue
}
Expand Down Expand Up @@ -1928,15 +1927,15 @@ func (c *clusterImpl) PutE(
}

// PutDefaultCockroach uploads the cockroach binary passed in the
// command line to `defaultCockroachPath` in every node in the
// command line to `test.DefaultCockroachPath` in every node in the
// cluster. This binary is used by the test runner to collect failure
// artifacts since tests are free to upload the cockroach binary they
// use to any location they desire.
func (c *clusterImpl) PutDefaultCockroach(
ctx context.Context, l *logger.Logger, cockroachPath string,
) error {
c.status("uploading default cockroach binary to nodes")
return c.PutE(ctx, l, cockroachPath, defaultCockroachPath, c.All())
return c.PutE(ctx, l, cockroachPath, test.DefaultCockroachPath, c.All())
}

// PutLibraries inserts the specified libraries, by name, into all nodes on the cluster
Expand Down
36 changes: 0 additions & 36 deletions pkg/cmd/roachtest/roachtestutil/mixedversion/mixedversion.go
Original file line number Diff line number Diff line change
Expand Up @@ -111,13 +111,6 @@ const (
// cluster that can use the test fixtures in
// `pkg/cmd/roachtest/fixtures`.
numNodesInFixtures = 4

// CurrentCockroachPath is the path to the binary where the current
// version of cockroach being tested is located. This file is
// uploaded before any user functions are run. The primary use case
// are tests that need long runnig background functions on startup
// (such as running a workload).
CurrentCockroachPath = "./cockroach-current"
)

var (
Expand Down Expand Up @@ -581,35 +574,6 @@ func (s startStep) Run(ctx context.Context, l *logger.Logger, c cluster.Cluster,
return clusterupgrade.StartWithSettings(ctx, l, c, s.crdbNodes, startOpts, clusterSettings...)
}

// uploadCurrentVersionStep uploads the current cockroach binary to
// all DB nodes in the test. This is so that startup steps can use
// them (if, for instance, they need to run a workload). The binary
// will be located in `dest`.
type uploadCurrentVersionStep struct {
id int
rt test.Test
crdbNodes option.NodeListOption
dest string
}

func (s uploadCurrentVersionStep) ID() int { return s.id }
func (s uploadCurrentVersionStep) Background() shouldStop { return nil }

func (s uploadCurrentVersionStep) Description() string {
return fmt.Sprintf("upload current binary to all cockroach nodes (%v)", s.crdbNodes)
}

func (s uploadCurrentVersionStep) Run(
ctx context.Context, l *logger.Logger, c cluster.Cluster, h *Helper,
) error {
_, err := clusterupgrade.UploadVersion(ctx, s.rt, l, c, s.crdbNodes, clusterupgrade.MainVersion)
if err != nil {
return err
}

return c.RunE(ctx, s.crdbNodes, fmt.Sprintf("mv ./cockroach %s", s.dest))
}

// waitForStableClusterVersionStep implements the process of waiting
// for the `version` cluster setting being the same on all nodes of
// the cluster and equal to the binary version of the first node in
Expand Down
1 change: 0 additions & 1 deletion pkg/cmd/roachtest/roachtestutil/mixedversion/planner.go
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,6 @@ func (p *testPlanner) initSteps() []testStep {

return append(
append(steps,
uploadCurrentVersionStep{id: p.nextID(), rt: p.rt, crdbNodes: p.crdbNodes, dest: CurrentCockroachPath},
waitForStableClusterVersionStep{id: p.nextID(), nodes: p.crdbNodes, timeout: p.options.upgradeTimeout},
preserveDowngradeOptionStep{id: p.nextID(), prng: p.newRNG(), crdbNodes: p.crdbNodes},
),
Expand Down
67 changes: 33 additions & 34 deletions pkg/cmd/roachtest/roachtestutil/mixedversion/planner_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -69,45 +69,44 @@ func TestTestPlanner(t *testing.T) {

plan, err := mvt.plan()
require.NoError(t, err)
require.Len(t, plan.steps, 11)
require.Len(t, plan.steps, 10)

// Assert on the pretty-printed version of the test plan as that
// asserts the ordering of the steps we want to take, and as a bonus
// tests the printing function itself.
expectedPrettyPlan := fmt.Sprintf(`
mixed-version test plan for upgrading from %[1]s to <current>:
├── starting cluster at version "%[1]s" (1)
├── upload current binary to all cockroach nodes (:1-4) (2)
├── wait for nodes :1-4 to all have the same cluster version (same as binary version of node 1) (3)
├── preventing auto-upgrades by setting `+"`preserve_downgrade_option`"+` (4)
├── run "initialize bank workload" (5)
├── wait for nodes :1-4 to all have the same cluster version (same as binary version of node 1) (2)
├── preventing auto-upgrades by setting `+"`preserve_downgrade_option`"+` (3)
├── run "initialize bank workload" (4)
├── start background hooks concurrently
│ ├── run "bank workload", after 50ms delay (6)
│ ├── run "rand workload", after 200ms delay (7)
│ └── run "csv server", after 500ms delay (8)
│ ├── run "bank workload", after 50ms delay (5)
│ ├── run "rand workload", after 200ms delay (6)
│ └── run "csv server", after 500ms delay (7)
├── upgrade nodes :1-4 from "%[1]s" to "<current>"
│ ├── restart node 1 with binary version <current> (9)
│ ├── run "mixed-version 1" (10)
│ ├── restart node 4 with binary version <current> (11)
│ ├── restart node 3 with binary version <current> (12)
│ ├── run "mixed-version 2" (13)
│ └── restart node 2 with binary version <current> (14)
│ ├── restart node 1 with binary version <current> (8)
│ ├── run "mixed-version 1" (9)
│ ├── restart node 4 with binary version <current> (10)
│ ├── restart node 3 with binary version <current> (11)
│ ├── run "mixed-version 2" (12)
│ └── restart node 2 with binary version <current> (13)
├── downgrade nodes :1-4 from "<current>" to "%[1]s"
│ ├── restart node 4 with binary version %[1]s (15)
│ ├── run "mixed-version 2" (16)
│ ├── restart node 2 with binary version %[1]s (17)
│ ├── restart node 3 with binary version %[1]s (18)
│ ├── restart node 1 with binary version %[1]s (19)
│ └── run "mixed-version 1" (20)
│ ├── restart node 4 with binary version %[1]s (14)
│ ├── run "mixed-version 2" (15)
│ ├── restart node 2 with binary version %[1]s (16)
│ ├── restart node 3 with binary version %[1]s (17)
│ ├── restart node 1 with binary version %[1]s (18)
│ └── run "mixed-version 1" (19)
├── upgrade nodes :1-4 from "%[1]s" to "<current>"
│ ├── restart node 4 with binary version <current> (21)
│ ├── run "mixed-version 1" (22)
│ ├── restart node 1 with binary version <current> (23)
│ ├── restart node 2 with binary version <current> (24)
│ ├── run "mixed-version 2" (25)
│ └── restart node 3 with binary version <current> (26)
├── finalize upgrade by resetting `+"`preserve_downgrade_option`"+` (27)
└── wait for nodes :1-4 to all have the same cluster version (same as binary version of node 1) (28)
│ ├── restart node 4 with binary version <current> (20)
│ ├── run "mixed-version 1" (21)
│ ├── restart node 1 with binary version <current> (22)
│ ├── restart node 2 with binary version <current> (23)
│ ├── run "mixed-version 2" (24)
│ └── restart node 3 with binary version <current> (25)
├── finalize upgrade by resetting `+"`preserve_downgrade_option`"+` (26)
└── wait for nodes :1-4 to all have the same cluster version (same as binary version of node 1) (27)
`, predecessorVersion)

expectedPrettyPlan = expectedPrettyPlan[1:] // remove leading newline
Expand All @@ -121,7 +120,7 @@ mixed-version test plan for upgrading from %[1]s to <current>:
mvt.OnStartup("startup 2", dummyHook)
plan, err = mvt.plan()
require.NoError(t, err)
requireConcurrentHooks(t, plan.steps[4], "startup 1", "startup 2")
requireConcurrentHooks(t, plan.steps[3], "startup 1", "startup 2")

// Assert that AfterUpgradeFinalized hooks are scheduled to run in
// the last step of the test.
Expand All @@ -131,8 +130,8 @@ mixed-version test plan for upgrading from %[1]s to <current>:
mvt.AfterUpgradeFinalized("finalizer 3", dummyHook)
plan, err = mvt.plan()
require.NoError(t, err)
require.Len(t, plan.steps, 10)
requireConcurrentHooks(t, plan.steps[9], "finalizer 1", "finalizer 2", "finalizer 3")
require.Len(t, plan.steps, 9)
requireConcurrentHooks(t, plan.steps[8], "finalizer 1", "finalizer 2", "finalizer 3")
}

// TestDeterministicTestPlan tests that generating a test plan with
Expand Down Expand Up @@ -199,15 +198,15 @@ func TestDeterministicHookSeeds(t *testing.T) {

// We can hardcode these paths since we are using a fixed seed in
// these tests.
firstRun := plan.steps[4].(sequentialRunStep).steps[4].(runHookStep)
firstRun := plan.steps[3].(sequentialRunStep).steps[4].(runHookStep)
require.Equal(t, "do something", firstRun.hook.name)
require.NoError(t, firstRun.Run(ctx, nilLogger, nilCluster, emptyHelper))

secondRun := plan.steps[5].(sequentialRunStep).steps[1].(runHookStep)
secondRun := plan.steps[4].(sequentialRunStep).steps[1].(runHookStep)
require.Equal(t, "do something", secondRun.hook.name)
require.NoError(t, secondRun.Run(ctx, nilLogger, nilCluster, emptyHelper))

thirdRun := plan.steps[6].(sequentialRunStep).steps[3].(runHookStep)
thirdRun := plan.steps[5].(sequentialRunStep).steps[3].(runHookStep)
require.Equal(t, "do something", thirdRun.hook.name)
require.NoError(t, thirdRun.Run(ctx, nilLogger, nilCluster, emptyHelper))

Expand Down
5 changes: 5 additions & 0 deletions pkg/cmd/roachtest/test/test_interface.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,11 @@ import (
"github.com/cockroachdb/cockroach/pkg/util/version"
)

// DefaultCockroachPath is the path where the binary passed to the
// `--cockroach` flag will be made available in every node in the
// cluster.
const DefaultCockroachPath = "./cockroach-default"

// Test is the interface through which roachtests interact with the
// test harness.
type Test interface {
Expand Down
2 changes: 1 addition & 1 deletion pkg/cmd/roachtest/tests/mixed_version_backup.go
Original file line number Diff line number Diff line change
Expand Up @@ -813,7 +813,7 @@ func (sc *systemTableContents) loadShowResults(
}

query := fmt.Sprintf("SELECT * FROM [%s]%s", showStmt, aostFor(timestamp))
showCmd := roachtestutil.NewCommand("%s sql", mixedversion.CurrentCockroachPath).
showCmd := roachtestutil.NewCommand("%s sql", test.DefaultCockroachPath).
Flag("certs-dir", "certs").
Flag("e", fmt.Sprintf("%q", query)).
String()
Expand Down
14 changes: 13 additions & 1 deletion pkg/sql/tests/rsg_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,16 @@ func (db *verifyFormatDB) execWithResettableTimeout(
return &nonCrasher{sql: sql, err: err}
}
return nil
case <-ctx.Done():
// Sanity: The context is cancelled when the test is about to
// timeout. We will log whatever statement we're waiting on for
// debugging purposes. Sometimes queries won't respect
// cancellation due to lib/pq limitations.
t.Logf("Context cancelled while executing: %q", sql)
// We will intentionally retry, which will us to wait for the
// go routine to complete above to avoid leaking it.
retry = true
return nil
case <-time.After(targetDuration):
db.mu.Lock()
defer db.mu.Unlock()
Expand Down Expand Up @@ -357,7 +367,9 @@ func TestRandomSyntaxFunctions(t *testing.T) {
case "crdb_internal.reset_sql_stats",
"crdb_internal.check_consistency",
"crdb_internal.request_statement_bundle",
"crdb_internal.reset_activity_tables":
"crdb_internal.reset_activity_tables",
"crdb_internal.revalidate_unique_constraints_in_all_tables",
"crdb_internal.validate_ttl_scheduled_jobs":
// Skipped due to long execution time.
continue
}
Expand Down

0 comments on commit dc28e19

Please sign in to comment.