Skip to content

Commit

Permalink
Merge #113320
Browse files Browse the repository at this point in the history
113320: roachprod: support config profiles r=herkolategan a=renatolabs

This commit adds a few changes with the goal of supporting starting cockroach processes with "config-profiles".

Most importantly, roachprod will now wait for the default target cluster if a config profile is detected: this causes subsequent initialization tasks to make use of the application tenant instead of the system one.

We also stop assuming that the system tenant is the default tenant; now, when a virtual cluster is not specified, we do not attemtp to connect to it automatically, falling back to the configured target cluster on crdb.

Epic: none

Release note: None

Co-authored-by: Renato Costa <[email protected]>
  • Loading branch information
craig[bot] and renatolabs committed Nov 1, 2023
2 parents 974b43d + 56a7916 commit 028d1fc
Showing 1 changed file with 75 additions and 4 deletions.
79 changes: 75 additions & 4 deletions pkg/roachprod/install/cockroach.go
Original file line number Diff line number Diff line change
Expand Up @@ -415,6 +415,9 @@ func (c *SyncedCluster) Start(ctx context.Context, l *logger.Logger, startOpts S
}

if startOpts.GetInitTarget() == node {
if err := c.waitForDefaultTargetCluster(ctx, l, startOpts); err != nil {
return res, errors.Wrap(err, "failed to wait for default target cluster")
}
c.createAdminUserForSecureCluster(ctx, l, startOpts)
return c.setClusterSettings(ctx, l, node, startOpts.VirtualClusterName)
}
Expand Down Expand Up @@ -501,7 +504,13 @@ func (c *SyncedCluster) NodeURL(
} else {
v.Add("sslmode", "disable")
}
if serviceMode == ServiceModeShared && virtualClusterName != "" && virtualClusterName != "system" {

// Add the virtual cluster name option explicitly for shared-process
// tenants or for the system tenant. This is to make sure we connect
// to the system tenant in case we have previously changed the
// default virtual cluster.
if (serviceMode == ServiceModeShared && virtualClusterName != "") ||
virtualClusterName == SystemInterfaceName {
v.Add("options", fmt.Sprintf("-ccluster=%s", virtualClusterName))
}
u.RawQuery = v.Encode()
Expand Down Expand Up @@ -542,9 +551,6 @@ func (c *SyncedCluster) ExecOrInteractiveSQL(
if err != nil {
return err
}
if virtualClusterName == "" {
virtualClusterName = SystemInterfaceName
}
url := c.NodeURL("localhost", desc.Port, virtualClusterName, desc.ServiceMode)
binary := cockroachNodeBinary(c, c.Nodes[0])
allArgs := []string{binary, "sql", "--url", url}
Expand Down Expand Up @@ -948,6 +954,71 @@ func (c *SyncedCluster) initializeCluster(
return res, err
}

// waitForDefaultTargetCluster checks for the existence of a
// config-profile flag that leads to the use of an application tenant
// as 'default target cluster'; if that is the case, we wait for all
// nodes to be aware of the cluster setting before proceding. Without
// this logic, follow-up tasks in the process of creating the cluster
// could run before the cluster setting is propagated, and they would
// apply to the system tenant instead.
func (c *SyncedCluster) waitForDefaultTargetCluster(
ctx context.Context, l *logger.Logger, startOpts StartOpts,
) error {
var hasCustomTargetCluster bool
for _, arg := range startOpts.ExtraArgs {
// If there is a config profile and that is set to either a '+app'
// profile or 'replication-source', we know that the default
// target cluster setting will be set to the application tenant.
if strings.Contains(arg, "config-profile") &&
(strings.Contains(arg, "+app") || strings.Contains(arg, "replication-source")) {
hasCustomTargetCluster = true
break
}
}

if !hasCustomTargetCluster {
return nil
}

l.Printf("waiting for default target cluster")
retryOpts := retry.Options{MaxRetries: 20}
return retryOpts.Do(ctx, func(ctx context.Context) error {
// TODO(renato): use server.controller.default_target_cluster once
// 23.1 is no longer supported.
const stmt = "SHOW CLUSTER SETTING server.controller.default_tenant"
res, err := c.ExecSQL(ctx, l, Nodes{startOpts.GetInitTarget()}, SystemInterfaceName, 0, []string{"-e", stmt})
if err != nil {
return errors.Wrap(err, "error reading cluster setting")
}

if len(res) > 0 {
if res[0].Err != nil {
return errors.Wrapf(res[0].Err, "node %d", res[0].Node)
}

if strings.Contains(res[0].CombinedOut, "system") {
return errors.Newf("target cluster on n%d is still system", res[0].Node)
}
}

// Once we know the cluster setting points to the default target
// cluster, we attempt to run a dummy SQL statement until that
// succeeds (i.e., until the target cluster is able to handle
// requests.)
const pingStmt = "SELECT 1;"
res, err = c.ExecSQL(ctx, l, Nodes{startOpts.GetInitTarget()}, "", 0, []string{"-e", pingStmt})
if err != nil {
return errors.Wrap(err, "error connecting to default target cluster")
}

if res[0] != nil && res[0].Err != nil {
err = errors.CombineErrors(err, res[0].Err)
}

return err
})
}

// createAdminUserForSecureCluster creates a `roach` user with admin
// privileges. The password used matches the virtual cluster name
// ('system' for the storage cluster). If it cannot be created, this
Expand Down

0 comments on commit 028d1fc

Please sign in to comment.