From 28ab430b0be1336a4a8d783f9e6acd8010dc28d0 Mon Sep 17 00:00:00 2001 From: Andrei Matei Date: Fri, 24 May 2024 13:17:56 -0400 Subject: [PATCH 1/2] roachtest: improve some comments Epic: None Release note: None --- pkg/cmd/roachtest/test_runner.go | 2 +- pkg/roachprod/roachprod.go | 9 +++++++-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/pkg/cmd/roachtest/test_runner.go b/pkg/cmd/roachtest/test_runner.go index 6d1e14eadc4b..d6bd91761e1f 100644 --- a/pkg/cmd/roachtest/test_runner.go +++ b/pkg/cmd/roachtest/test_runner.go @@ -1277,7 +1277,7 @@ func (r *testRunner) runTest( // We still want to run the post-test assertions even if the test timed out as it // might provide useful information about the health of the nodes. Any assertion failures - // will will be recorded against, and eventually fail, the test. + // will be recorded against, and eventually fail, the test. if err := r.postTestAssertions(ctx, t, c, 10*time.Minute); err != nil { l.Printf("error during post test assertions: %v; see test-post-assertions.log for details", err) } diff --git a/pkg/roachprod/roachprod.go b/pkg/roachprod/roachprod.go index 8f6f00569c2a..947d50db391e 100644 --- a/pkg/roachprod/roachprod.go +++ b/pkg/roachprod/roachprod.go @@ -164,7 +164,8 @@ func sortedClusters() []string { // newCluster initializes a SyncedCluster for the given cluster name. // -// The cluster name can include a node selector (e.g. "foo:1-3"). +// The cluster name can include a node selector (e.g. "foo:1-3"). If the +// selector is missing, the returned cluster includes all the machines. func newCluster( l *logger.Logger, name string, opts ...install.ClusterSettingOption, ) (*install.SyncedCluster, error) { @@ -969,6 +970,8 @@ sudo chmod 777 /mnt/data1 } // Install installs third party software. +// +// The cluster name can include a node selector (e.g. "foo:1-3"). func Install(ctx context.Context, l *logger.Logger, clusterName string, software []string) error { c, err := getClusterFromCache(l, clusterName) if err != nil { @@ -2312,7 +2315,7 @@ func StartFluentBit( return fluentbit.Install(ctx, l, c, config) } -// Stop stops Fluent Bit on the cluster identified by clusterName. +// StopFluentBit stops Fluent Bit on the cluster identified by clusterName. func StopFluentBit(ctx context.Context, l *logger.Logger, clusterName string) error { if err := LoadClusters(); err != nil { return err @@ -2671,6 +2674,8 @@ func Deploy( // getClusterFromCache finds and returns a SyncedCluster from // the local cluster cache. +// +// The cluster name can include a node selector (e.g. "foo:1-3"). func getClusterFromCache( l *logger.Logger, clusterName string, opts ...install.ClusterSettingOption, ) (*install.SyncedCluster, error) { From bcd73496f92fef46b2fe042e9d94d44fab90200e Mon Sep 17 00:00:00 2001 From: Andrei Matei Date: Tue, 28 May 2024 18:25:45 -0400 Subject: [PATCH 2/2] roachtest: register a cluster with a worker earlier The roachtest runner consists of a number of workers running tests in parallel. Each worker is linked to the cluster that it is currently using. Before this patch, a cluster was only registered with the test once a test started. This was unnecessarily late; the patch moves the registration to happen earlier, closer to when the cluster is created or picked up by the worker, which better matches the intended semnatics of this link. In particular, the linking now happens before the cockroach binary is uploaded to the cluster (a potentially slow operation), whereas before it happened after. These links between workers and clusters are used by the roachtest's web UI, which will now more accurately show who's using a cluster. Epic: none Release note: None --- pkg/cmd/roachtest/test_impl.go | 5 ++++- pkg/cmd/roachtest/test_runner.go | 3 ++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/pkg/cmd/roachtest/test_impl.go b/pkg/cmd/roachtest/test_impl.go index a7bf3d5d79a6..bbe98fcf744e 100644 --- a/pkg/cmd/roachtest/test_impl.go +++ b/pkg/cmd/roachtest/test_impl.go @@ -667,7 +667,10 @@ type workerStatus struct { ttr testToRunRes t *testImpl - c *clusterImpl + // The cluster that the worker is currently operating on. If the worker is + // currently running a test, the test is using this cluster. Nil if the + // worker does not currently have a cluster. + c *clusterImpl } } diff --git a/pkg/cmd/roachtest/test_runner.go b/pkg/cmd/roachtest/test_runner.go index d6bd91761e1f..47ab22424be8 100644 --- a/pkg/cmd/roachtest/test_runner.go +++ b/pkg/cmd/roachtest/test_runner.go @@ -752,6 +752,8 @@ func (r *testRunner) runWorker( c.Save(ctx, "cluster saved since --debug-always set", l) } + wStatus.SetCluster(c) + // Prepare the test's logger. Always set this up with real files, using a // temp dir if necessary. This simplifies testing. artifactsRootDir := lopt.artifactsDir @@ -874,7 +876,6 @@ func (r *testRunner) runWorker( c.goCoverDir = t.GoCoverArtifactsDir() - wStatus.SetCluster(c) wStatus.SetTest(t, testToRun) wStatus.SetStatus("running test")