Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

roachprod: fixup roachprod --sequential #51893

Merged
merged 1 commit into from
Jul 25, 2020
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
115 changes: 48 additions & 67 deletions pkg/cmd/roachprod/install/cockroach.go
Original file line number Diff line number Diff line change
Expand Up @@ -119,87 +119,65 @@ func argExists(args []string, target string) int {
// `start-single-node` (this was written to provide a short hand to start a
// single node cluster with a replication factor of one).
func (r Cockroach) Start(c *SyncedCluster, extraArgs []string) {
// Check to see if node 1 was started, indicating the cluster is to be
// bootstrapped.
var bootstrap bool
for _, node := range c.ServerNodes() {
if node == 1 {
bootstrap = true
break
}
}

h := &crdbStartHelper{c: c, r: r}
h.distributeCerts()

display := fmt.Sprintf("%s: starting", c.Name)
nodes := c.ServerNodes()

p := 0
var parallelism = 0
if StartOpts.Sequential {
p = 1
parallelism = 1
}
c.Parallel(display, len(nodes), p, func(nodeIdx int) ([]byte, error) {

fmt.Printf("%s: starting nodes\n", c.Name)
c.Parallel("", len(nodes), parallelism, func(nodeIdx int) ([]byte, error) {
vers, err := getCockroachVersion(c, nodes[nodeIdx])
if err != nil {
return nil, err
}

if h.useStartSingleNode(vers) {
// `cockroach start-single-node` auto-bootstraps, so we skip doing
// so ourselves.
//
// TODO(irfansharif): We don't seem to be setting cluster settings
// for clusters started using `start-single-node`, we should.
//
// TODO(irfansharif): We shouldn't explicitly bootstrap clusters
// running versions <20.1, as the join flags are constructed in a
// way such that node 1 is started with an empty join flag, and thus
// auto-bootstraps.
//
// TODO(irfansharif): `roachprod start --sequential` is broken.
// Given we bootstrap the cluster after having started all the
// servers in parallel, there's no longer any guarantee that node
// IDs will have been allocated in sequential order. What we should
// do here instead is initialize the cluster as soon as we start
// node 1, that way subsequent node additions will be allocated the
// right node IDs.
bootstrap = false
}

// NB: if cockroach started successfully, we ignore the output as it is
// some harmless start messaging.
if _, err := h.startNode(nodeIdx, extraArgs, vers); err != nil {
return nil, err
}

// NB: if cockroach started successfully, we ignore the output as it is
// some harmless start messaging.
return nil, nil
})

if !bootstrap {
return
}
// We reserve a few special operations (bootstrapping, and setting
// cluster settings) for node 1.
if node := nodes[nodeIdx]; node != 1 {
return nil, nil
}

// ServerNodes returns an ordered list, and given we're cleared to bootstrap
// this cluster, we expect to be doing it through node 1.
nodeIdx := 0
if node := nodes[nodeIdx]; node != 1 {
log.Fatalf("programming error: expecting to initialization/set cluster settings through node 1, found node %d", node)
}
// NB: The code blocks below are not parallelized, so it's safe for us
// to use fmt.Printf style logging.

// 1. We don't init when invoking with `start-single-node`.
// 2. For nodes running <20.1, the --join flags are constructed in a manner
// such that the first node doesn't have any (see `generateStartArgs`),
// which prompts CRDB to auto-initialize. For nodes running >=20.1, we
// need to explicitly initialize.
shouldInit := !h.useStartSingleNode(vers) && vers.AtLeast(version.MustParse("v20.1.0"))
if shouldInit {
fmt.Printf("%s: initializing cluster", h.c.Name)
initOut, err := h.initializeCluster(nodeIdx)
if err != nil {
log.Fatalf("unable to initialize cluster: %v", err)
}

fmt.Printf("%s: bootstrapping cluster", h.c.Name)
initOut, err := h.initializeCluster(nodeIdx)
if err != nil {
log.Fatalf("unable to bootstrap cluster: %v", err)
}
fmt.Println(initOut)
if initOut != "" {
fmt.Println(initOut)
}
}

fmt.Printf("%s: initializing cluster settings", h.c.Name)
clusterSettingsOut, err := h.setClusterSettings(nodeIdx)
if err != nil {
log.Fatalf("unable to set cluster settings: %v", err)
}
fmt.Println(clusterSettingsOut)
fmt.Printf("%s: setting cluster settings", h.c.Name)
clusterSettingsOut, err := h.setClusterSettings(nodeIdx)
if err != nil {
log.Fatalf("unable to set cluster settings: %v", err)
}
if clusterSettingsOut != "" {
fmt.Println(clusterSettingsOut)
}
return nil, nil
})
}

// NodeDir implements the ClusterImpl.NodeDir interface.
Expand Down Expand Up @@ -445,10 +423,13 @@ func (h *crdbStartHelper) generateStartArgs(
}

if !h.useStartSingleNode(vers) {
// Every node points to node 1. For clusters <20.1, node 1 does not
// point to anything (which itself is used to trigger bootstrap). For
// clusters >20.1, node 1 also points to itself, and an explicit
// `cockroach init` is needed.
// --join flags are unsupported/unnecessary in `cockroach
// start-single-node`. That aside, setting up --join flags is a bit
// precise. We have every node point to node 1. For clusters running
// <20.1, we have node 1 not point to anything (which in turn is used to
// trigger auto-initialization node 1). For clusters running >=20.1,
// node 1 also points to itself, and an explicit `cockroach init` is
// needed.
if nodes[nodeIdx] != 1 || vers.AtLeast(version.MustParse("v20.1.0")) {
args = append(args, fmt.Sprintf("--join=%s:%d", h.c.host(1), h.r.NodePort(h.c, 1)))
}
Expand Down