From c5ef3842eff354d3311522fd6294f18b96a23018 Mon Sep 17 00:00:00 2001 From: Michael Butler Date: Thu, 23 Feb 2023 11:57:30 -0500 Subject: [PATCH] roachtest: always use active node in backup schedule injection Previously, the schedule backup cmd injection on Start() would always run on the first node on the cluster, but if that node were not available, the cmd would fail. This patch ensures the injection runs on a node that was just started. Fixes #97558 Release note: None --- .../roachtest/tests/multitenant_upgrade.go | 4 +++- pkg/roachprod/install/cockroach.go | 19 +++++++++++++++---- 2 files changed, 18 insertions(+), 5 deletions(-) diff --git a/pkg/cmd/roachtest/tests/multitenant_upgrade.go b/pkg/cmd/roachtest/tests/multitenant_upgrade.go index 3a55e5ead85b..ec18e96018de 100644 --- a/pkg/cmd/roachtest/tests/multitenant_upgrade.go +++ b/pkg/cmd/roachtest/tests/multitenant_upgrade.go @@ -130,7 +130,9 @@ func runMultiTenantUpgrade(ctx context.Context, t test.Test, c cluster.Cluster, t.Status("upgrading system tenant binary") c.Stop(ctx, t.L(), option.DefaultStopOpts(), kvNodes) settings.Binary = currentBinary - c.Start(ctx, t.L(), option.DefaultStartOpts(), settings, kvNodes) + // TODO (msbutler): investigate why the scheduled backup command fails due to a `Is the Server + // running?` error. + c.Start(ctx, t.L(), option.DefaultStartOptsNoBackups(), settings, kvNodes) time.Sleep(time.Second) t.Status("checking the pre-upgrade sql server still works after the system tenant binary upgrade") diff --git a/pkg/roachprod/install/cockroach.go b/pkg/roachprod/install/cockroach.go index 73466f94bc93..7c8d1c14b4b4 100644 --- a/pkg/roachprod/install/cockroach.go +++ b/pkg/roachprod/install/cockroach.go @@ -826,15 +826,26 @@ func (c *SyncedCluster) createFixedBackupSchedule( createScheduleCmd := fmt.Sprintf(`CREATE SCHEDULE IF NOT EXISTS test_only_backup FOR BACKUP INTO '%s' %s`, collectionPath, scheduleArgs) - node := Node(1) + node := c.Nodes[0] binary := cockroachNodeBinary(c, node) url := c.NodeURL("localhost", c.NodePort(node), "" /* tenantName */) fullCmd := fmt.Sprintf(`COCKROACH_CONNECT_TIMEOUT=0 %s sql --url %s -e %q`, binary, url, createScheduleCmd) - // Instead of using `c.ExecSQL()`, use the more flexible c.Run(), which allows us to + // Instead of using `c.ExecSQL()`, use the more flexible c.newSession(), which allows us to // 1) prefix the schedule backup cmd with COCKROACH_CONNECT_TIMEOUT=0. - // 2) run the command against the first node on the cluster. - return c.Run(ctx, l, l.Stdout, l.Stderr, Nodes{node}, "init scheduled backup", fullCmd) + // 2) run the command against the first node in the cluster target. + sess := c.newSession(l, node, fullCmd, withDebugName("init-backup-schedule")) + defer sess.Close() + + out, err := sess.CombinedOutput(ctx) + if err != nil { + return errors.Wrapf(err, "~ %s\n%s", fullCmd, out) + } + + if out := strings.TrimSpace(string(out)); out != "" { + l.Printf(out) + } + return nil } // getEnvVars returns all COCKROACH_* environment variables, in the form