Skip to content

Commit

Permalink
roachprod: explicitly bootstrap cluster instead of relying on auto-init
Browse files Browse the repository at this point in the history
`cockroach start` without `--join` auto-initializes the cluster. This
is deprecated behavior as of 19.2 and will be removed soon. `roachprod
start` inherited similar semantics, auto-initializing a cluster on
`roachprod start`. It did so by cutting around the join flags and using
auto-init behaviour of crdb. We keep the same external API of
`roachprod start`, but we now explicitly `cockroach init` the managed
clusters (using a dedicated `cluster-bootstrapped` signal file to avoid
double initialization).

Release note: None
  • Loading branch information
irfansharif committed Jul 14, 2020
1 parent 492cde2 commit ad380a4
Show file tree
Hide file tree
Showing 2 changed files with 93 additions and 43 deletions.
136 changes: 93 additions & 43 deletions pkg/cmd/roachprod/install/cockroach.go
Original file line number Diff line number Diff line change
Expand Up @@ -112,17 +112,17 @@ func argExists(args []string, target string) int {

// Start implements the ClusterImpl.NodeDir interface.
func (r Cockroach) Start(c *SyncedCluster, extraArgs []string) {
// Check to see if node 1 was started indicating the cluster was
// Check to see if node 1 was started, indicating the cluster is to be
// bootstrapped.
var bootstrapped bool
var bootstrappable bool
for _, i := range c.ServerNodes() {
if i == 1 {
bootstrapped = true
bootstrappable = true
break
}
}

if c.Secure && bootstrapped {
if c.Secure && bootstrappable {
c.DistributeCerts()
}

Expand Down Expand Up @@ -217,7 +217,8 @@ func (r Cockroach) Start(c *SyncedCluster, extraArgs []string) {
args = append(args, "--locality="+locality)
}
}
if nodes[i] != 1 {
// `cockroach start` without `--join` is no longer supported as 20.1.
if nodes[i] != 1 || vers.AtLeast(version.MustParse("v20.1.0")) {
args = append(args, fmt.Sprintf("--join=%s:%d", host1, r.NodePort(c, 1)))
}
if advertisePublicIP {
Expand Down Expand Up @@ -271,11 +272,13 @@ func (r Cockroach) Start(c *SyncedCluster, extraArgs []string) {
// unhelpful empty error (since everything has been redirected away). This is
// unfortunately equally awkward to address.
cmd := "ulimit -c unlimited; mkdir -p " + logDir + "; "

// TODO(peter): The ps and lslocks stuff is intended to debug why killing
// of a cockroach process sometimes doesn't release file locks immediately.
cmd += `echo ">>> roachprod start: $(date)" >> ` + logDir + "/roachprod.log; " +
`ps axeww -o pid -o command >> ` + logDir + "/roachprod.log; " +
`[ -x /usr/bin/lslocks ] && /usr/bin/lslocks >> ` + logDir + "/roachprod.log; "

cmd += keyCmd +
fmt.Sprintf(" export ROACHPROD=%d%s && ", nodes[i], c.Tag) +
"GOTRACEBACK=crash " +
Expand All @@ -297,49 +300,96 @@ func (r Cockroach) Start(c *SyncedCluster, extraArgs []string) {
return nil, nil
})

if bootstrapped {
license := envutil.EnvOrDefaultString("COCKROACH_DEV_LICENSE", "")
if license == "" {
fmt.Printf("%s: COCKROACH_DEV_LICENSE unset: enterprise features will be unavailable\n",
c.Name)
if !bootstrappable {
return
}

var initOut string
display = fmt.Sprintf("%s: bootstrapping cluster", c.Name)
c.Parallel(display, 1, 0, func(i int) ([]byte, error) {
vers, err := getCockroachVersion(c, nodes[i])
if err != nil {
return nil, err
}
if !vers.AtLeast(version.MustParse("v20.1.0")) {
// `cockroach start` without `--join` is no longer supported as v20.1.
return nil, nil
}
sess, err := c.newSession(1)
if err != nil {
return nil, err
}
defer sess.Close()

var msg string
display = fmt.Sprintf("%s: initializing cluster settings", c.Name)
c.Parallel(display, 1, 0, func(i int) ([]byte, error) {
sess, err := c.newSession(1)
if err != nil {
return nil, err
}
defer sess.Close()
var cmd string
if c.IsLocal() {
cmd = `cd ${HOME}/local/1 ; `
}

var cmd string
if c.IsLocal() {
cmd = `cd ${HOME}/local/1 ; `
}
dir := c.Impl.NodeDir(c, nodes[i])
cmd += `
if ! test -e ` + dir + `/settings-initialized ; then
COCKROACH_CONNECT_TIMEOUT=0 ` + cockroachNodeBinary(c, 1) + " sql --url " +
r.NodeURL(c, "localhost", r.NodePort(c, 1)) + " -e " +
fmt.Sprintf(`"
SET CLUSTER SETTING server.remote_debugging.mode = 'any';
SET CLUSTER SETTING cluster.organization = 'Cockroach Labs - Production Testing';
SET CLUSTER SETTING enterprise.license = '%s';"`, license) + ` &&
touch ` + dir + `/settings-initialized
fi
`
out, err := sess.CombinedOutput(cmd)
if err != nil {
return nil, errors.Wrapf(err, "~ %s\n%s", cmd, out)
}
msg = strings.TrimSpace(string(out))
return nil, nil
})
binary := cockroachNodeBinary(c, 1)
path := fmt.Sprintf("%s/%s", c.Impl.NodeDir(c, nodes[i]), "cluster-bootstrapped")
url := r.NodeURL(c, "localhost", r.NodePort(c, 1))

if msg != "" {
fmt.Println(msg)
cmd += fmt.Sprintf(`
if ! test -e %s ; then
COCKROACH_CONNECT_TIMEOUT=0 %s init --url %s && touch %s
fi`, path, binary, url, path)

out, err := sess.CombinedOutput(cmd)
if err != nil {
return nil, errors.Wrapf(err, "~ %s\n%s", cmd, out)
}
initOut = strings.TrimSpace(string(out))
return nil, nil
})

if initOut != "" {
fmt.Println(initOut)
}

license := envutil.EnvOrDefaultString("COCKROACH_DEV_LICENSE", "")
if license == "" {
fmt.Printf("%s: COCKROACH_DEV_LICENSE unset: enterprise features will be unavailable\n",
c.Name)
}

var clusterSettingsOut string
display = fmt.Sprintf("%s: initializing cluster settings", c.Name)
c.Parallel(display, 1, 0, func(i int) ([]byte, error) {
sess, err := c.newSession(1)
if err != nil {
return nil, err
}
defer sess.Close()

var cmd string
if c.IsLocal() {
cmd = `cd ${HOME}/local/1 ; `
}

binary := cockroachNodeBinary(c, 1)
path := fmt.Sprintf("%s/%s", c.Impl.NodeDir(c, nodes[i]), "settings-initialized")
url := r.NodeURL(c, "localhost", r.NodePort(c, 1))

cmd += fmt.Sprintf(`
if ! test -e %s ; then
COCKROACH_CONNECT_TIMEOUT=0 %s sql --url %s -e "
SET CLUSTER SETTING server.remote_debugging.mode = 'any';
SET CLUSTER SETTING cluster.organization = 'Cockroach Labs - Production Testing';
SET CLUSTER SETTING enterprise.license = '%s';" \
&& touch %s
fi`, path, binary, url, license, path)

out, err := sess.CombinedOutput(cmd)
if err != nil {
return nil, errors.Wrapf(err, "~ %s\n%s", cmd, out)
}
clusterSettingsOut = strings.TrimSpace(string(out))
return nil, nil
})

if clusterSettingsOut != "" {
fmt.Println(clusterSettingsOut)
}
}

Expand Down
Binary file modified pkg/cmd/roachtest/fixtures/1/checkpoint-v20.1.tgz
Binary file not shown.

0 comments on commit ad380a4

Please sign in to comment.