Skip to content

Commit

Permalink
roachtest: fix bugs related to secure clusters
Browse files Browse the repository at this point in the history
This commit fixes a number of small bugs related to using secure
clusters in roachtests, which probably stayed dormant up to this point
because we don't use secure clusters enough, especially in tests where
nodes are restarted.

In this commit, we avoid refetching certs when restarting, fix the
checks for certificate existence on nodes, and fix the command to wipe
a cluster when certs are to be preserved.

Epic: none

Release note: None
  • Loading branch information
renatolabs committed Jun 28, 2023
1 parent 4477505 commit 16bb892
Show file tree
Hide file tree
Showing 3 changed files with 17 additions and 25 deletions.
8 changes: 3 additions & 5 deletions pkg/cmd/roachtest/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -2096,10 +2096,6 @@ func (c *clusterImpl) StartE(
if ctx.Err() != nil {
return errors.Wrap(ctx.Err(), "cluster.StartE")
}
// If the test failed (indicated by a canceled ctx), short-circuit.
if ctx.Err() != nil {
return ctx.Err()
}
c.setStatusForClusterOpt("starting", startOpts.RoachtestOpts.Worker, opts...)
defer c.clearStatusForClusterOpt(startOpts.RoachtestOpts.Worker)

Expand Down Expand Up @@ -2134,7 +2130,9 @@ func (c *clusterImpl) StartE(
return err
}

if settings.Secure {
// Do not refetch certs if that step already happened once (i.e., we
// are restarting a node).
if settings.Secure && c.localCertsDir == "" {
if err := c.RefetchCertsFromNode(ctx, 1); err != nil {
return err
}
Expand Down
29 changes: 10 additions & 19 deletions pkg/roachprod/install/cluster_synced.go
Original file line number Diff line number Diff line change
Expand Up @@ -514,14 +514,16 @@ func (c *SyncedCluster) Wipe(ctx context.Context, l *logger.Logger, preserveCert
cmd += fmt.Sprintf(`rm -fr %s/%s ;`, c.localVMDir(c.Nodes[i]), dir)
}
} else {
cmd = `sudo find /mnt/data* -maxdepth 1 -type f -exec rm -f {} \; &&
sudo rm -fr /mnt/data*/{auxiliary,local,tmp,cassandra,cockroach,cockroach-temp*,mongo-data} &&
sudo rm -fr logs &&
`
rmCmds := []string{
`sudo find /mnt/data* -maxdepth 1 -type f -exec rm -f {} \;`,
`sudo rm -fr /mnt/data*/{auxiliary,local,tmp,cassandra,cockroach,cockroach-temp*,mongo-data}`,
`sudo rm -fr logs`,
}
if !preserveCerts {
cmd += "sudo rm -fr certs* ;\n"
cmd += "sudo rm -fr tenant-certs* ;\n"
rmCmds = append(rmCmds, "sudo rm -fr certs*", "sudo rm -fr tenant-certs*")
}

cmd = strings.Join(rmCmds, " && ")
}
sess := c.newSession(l, node, cmd, withDebugName("node-wipe"))
defer sess.Close()
Expand Down Expand Up @@ -1531,19 +1533,8 @@ func (c *SyncedCluster) fileExistsOnFirstNode(
ctx context.Context, l *logger.Logger, path string,
) (bool, error) {
l.Printf("%s: checking %s", c.Name, path)
testCmd := `$(test -e ` + path + `);`
// Do not log output to stdout/stderr because in some cases this call will be expected to exit 1.
result, err := c.runCmdOnSingleNode(ctx, l, c.Nodes[0], testCmd, true, nil, nil)
if (result.RemoteExitStatus != 0 && result.RemoteExitStatus != 1) || err != nil {
// Unexpected exit status (neither 0 nor 1) or non-nil error. Return combined output along with err returned
// from the call if it's not nil.
if err != nil {
return false, errors.Wrapf(err, "running '%s' failed with exit code=%d: got %s", testCmd, result.RemoteExitStatus, string(result.CombinedOut))
} else {
return false, errors.Newf("running '%s' failed with exit code=%d: got %s", testCmd, result.RemoteExitStatus, string(result.CombinedOut))
}
}
return result.RemoteExitStatus == 0, nil
result, err := c.runCmdOnSingleNode(ctx, l, 1, `$(test -e `+path+`); echo $?`, false, l.Stdout, l.Stderr)
return strings.TrimSpace(result.Stdout) == "0", err
}

// createNodeCertArguments returns a list of strings appropriate for use as
Expand Down
5 changes: 4 additions & 1 deletion pkg/roachprod/install/cockroach.go
Original file line number Diff line number Diff line change
Expand Up @@ -777,8 +777,11 @@ func (c *SyncedCluster) useStartSingleNode() bool {
// distributeCerts distributes certs if it's a secure cluster and we're
// starting n1.
func (c *SyncedCluster) distributeCerts(ctx context.Context, l *logger.Logger) error {
if !c.Secure {
return nil
}
for _, node := range c.TargetNodes() {
if node == 1 && c.Secure {
if node == 1 {
return c.DistributeCerts(ctx, l)
}
}
Expand Down

0 comments on commit 16bb892

Please sign in to comment.