diff --git a/pkg/BUILD.bazel b/pkg/BUILD.bazel index 26728b28f236..c08e5540dfcb 100644 --- a/pkg/BUILD.bazel +++ b/pkg/BUILD.bazel @@ -158,6 +158,7 @@ ALL_TESTS = [ "//pkg/roachpb:roachpb_test", "//pkg/roachpb:string_test", "//pkg/roachprod/cloud:cloud_test", + "//pkg/roachprod/config:config_test", "//pkg/roachprod/install:install_test", "//pkg/roachprod/ssh:ssh_test", "//pkg/roachprod/vm:vm_test", diff --git a/pkg/cmd/roachprod/main.go b/pkg/cmd/roachprod/main.go index 0feb0288eacf..17ae94f3cf21 100644 --- a/pkg/cmd/roachprod/main.go +++ b/pkg/cmd/roachprod/main.go @@ -68,6 +68,7 @@ var ( username string dryrun bool destroyAllMine bool + destroyAllLocal bool extendLifetime time.Duration wipePreserveCerts bool listDetails bool @@ -230,18 +231,19 @@ if the user would like to update the keys on the remote hosts. } var destroyCmd = &cobra.Command{ - Use: "destroy [ --all-mine | [ ...] ]", + Use: "destroy [ --all-mine | --all-local | [ ...] ]", Short: "destroy clusters", Long: `Destroy one or more local or cloud-based clusters. The destroy command accepts the names of the clusters to destroy. Alternatively, -the --all-mine flag can be provided to destroy all clusters that are owned by the -current user. +the --all-mine flag can be provided to destroy all (non-local) clusters that are +owned by the current user, or the --all-local flag can be provided to destroy +all local clusters. Destroying a cluster releases the resources for a cluster. For a cloud-based cluster the machine and associated disk resources are freed. For a local -cluster, any processes started by roachprod are stopped, and the ${HOME}/local -directory is removed. +cluster, any processes started by roachprod are stopped, and the node +directories inside ${HOME}/local directory are removed. `, Args: cobra.ArbitraryArgs, Run: wrap(func(cmd *cobra.Command, args []string) error { @@ -249,7 +251,7 @@ directory is removed. for _, clusterName := range args { clusters = append(clusters, clusterOpts(clusterName)) } - return roachprod.Destroy(clusters, destroyAllMine, username) + return roachprod.Destroy(clusters, destroyAllMine, destroyAllLocal, username) }), } @@ -479,17 +481,16 @@ var stopCmd = &cobra.Command{ Stop roachprod created processes running on the nodes in a cluster, including processes started by the "start", "run" and "ssh" commands. Every process -started by roachprod is tagged with a ROACHPROD= environment variable -which is used by "stop" to locate the processes and terminate them. By default -processes are killed with signal 9 (SIGKILL) giving them no chance for a graceful -exit. +started by roachprod is tagged with a ROACHPROD environment variable which is +used by "stop" to locate the processes and terminate them. By default processes +are killed with signal 9 (SIGKILL) giving them no chance for a graceful exit. The --sig flag will pass a signal to kill to allow us finer control over how we shutdown cockroach. The --wait flag causes stop to loop waiting for all -processes with the ROACHPROD= environment variable to exit. Note that -stop will wait forever if you specify --wait with a non-terminating signal -(e.g. SIGHUP). --wait defaults to true for signal 9 (SIGKILL) and false for all -other signals. +processes with the right ROACHPROD environment variable to exit. Note that stop +will wait forever if you specify --wait with a non-terminating signal (e.g. +SIGHUP). --wait defaults to true for signal 9 (SIGKILL) and false for all other +signals. ` + tagHelp + ` `, Args: cobra.ExactArgs(1), @@ -1005,7 +1006,9 @@ func main() { } destroyCmd.Flags().BoolVarP(&destroyAllMine, - "all-mine", "m", false, "Destroy all clusters belonging to the current user") + "all-mine", "m", false, "Destroy all non-local clusters belonging to the current user") + destroyCmd.Flags().BoolVarP(&destroyAllLocal, + "all-local", "l", false, "Destroy all local clusters") extendCmd.Flags().DurationVarP(&extendLifetime, "lifetime", "l", 12*time.Hour, "Lifetime of the cluster") diff --git a/pkg/roachprod/cloud/cluster_cloud.go b/pkg/roachprod/cloud/cluster_cloud.go index 74278f81d150..c769ff4adbc3 100644 --- a/pkg/roachprod/cloud/cluster_cloud.go +++ b/pkg/roachprod/cloud/cluster_cloud.go @@ -150,16 +150,17 @@ func (c *Cluster) PrintDetails() { } } -// IsLocal TODO(peter): document +// IsLocal returns true if c is a local cluster. func (c *Cluster) IsLocal() bool { - return c.Name == config.Local + return config.IsLocalClusterName(c.Name) } const vmNameFormat = "user--" -func namesFromVM(v vm.VM) (string, string, error) { +// namesFromVM determines the user name and the cluster name from a VM. +func namesFromVM(v vm.VM) (userName string, clusterName string, _ error) { if v.IsLocal() { - return config.Local, config.Local, nil + return config.Local, v.LocalClusterName, nil } name := v.Name parts := strings.Split(name, "-") diff --git a/pkg/roachprod/cloud/gc.go b/pkg/roachprod/cloud/gc.go index a4e2d453a821..e40c37dead75 100644 --- a/pkg/roachprod/cloud/gc.go +++ b/pkg/roachprod/cloud/gc.go @@ -277,7 +277,7 @@ func GCClusters(cloud *Cloud, dryrun bool) error { var names []string for name := range cloud.Clusters { - if name != config.Local { + if !config.IsLocalClusterName(name) { names = append(names, name) } } diff --git a/pkg/roachprod/clusters_cache.go b/pkg/roachprod/clusters_cache.go index 2d6264a98904..41306d230fbe 100644 --- a/pkg/roachprod/clusters_cache.go +++ b/pkg/roachprod/clusters_cache.go @@ -115,7 +115,7 @@ func LoadClusters() error { install.Clusters[sc.Name] = sc - if local.IsLocal(c.Name) { + if config.IsLocalClusterName(c.Name) { // Add the local cluster to the local provider. local.AddCluster(c) } diff --git a/pkg/roachprod/config/BUILD.bazel b/pkg/roachprod/config/BUILD.bazel index b1103d103fda..a131fbbed1db 100644 --- a/pkg/roachprod/config/BUILD.bazel +++ b/pkg/roachprod/config/BUILD.bazel @@ -1,4 +1,4 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_library") +load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test") go_library( name = "config", @@ -7,3 +7,9 @@ go_library( visibility = ["//visibility:public"], deps = ["//pkg/util/log"], ) + +go_test( + name = "config_test", + srcs = ["config_test.go"], + embed = [":config"], +) diff --git a/pkg/roachprod/config/config.go b/pkg/roachprod/config/config.go index 5f6a4c828c50..cda44fbfcc42 100644 --- a/pkg/roachprod/config/config.go +++ b/pkg/roachprod/config/config.go @@ -13,6 +13,7 @@ package config import ( "context" "os/user" + "regexp" "github.com/cockroachdb/cockroach/pkg/util/log" ) @@ -41,7 +42,8 @@ const ( // EmailDomain is used to form the full account name for GCE and Slack. EmailDomain = "@cockroachlabs.com" - // Local is the name of the local cluster. + // Local is the prefix used to identify local clusters. + // It is also used as the zone for local clusters. Local = "local" // ClustersDir is the directory where we cache information about clusters. @@ -62,3 +64,13 @@ const ( // listening for HTTP connections for the Admin UI. DefaultAdminUIPort = 26258 ) + +// IsLocalClusterName returns true if the given name is a valid name for a local +// cluster. +// +// Local cluster names are either "local" or start with a "local-" prefix. +func IsLocalClusterName(clusterName string) bool { + return localClusterRegex.MatchString(clusterName) +} + +var localClusterRegex = regexp.MustCompile(`^local(|-[a-zA-Z0-9\-]+)$`) diff --git a/pkg/roachprod/config/config_test.go b/pkg/roachprod/config/config_test.go new file mode 100644 index 000000000000..0d1af6046786 --- /dev/null +++ b/pkg/roachprod/config/config_test.go @@ -0,0 +1,42 @@ +// Copyright 2021 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package config + +import "testing" + +func TestIsLocalClusterName(t *testing.T) { + yes := []string{ + "local", + "local-1", + "local-foo", + "local-foo-bar-123-aZy", + } + no := []string{ + "loca", + "locall", + "local1", + "local-", + "local-foo?", + "local-foo/", + } + + for _, s := range yes { + if !IsLocalClusterName(s) { + t.Errorf("expected '%s' to be a valid local cluster name", s) + } + } + + for _, s := range no { + if IsLocalClusterName(s) { + t.Errorf("expected '%s' to not be a valid local cluster name", s) + } + } +} diff --git a/pkg/roachprod/install/BUILD.bazel b/pkg/roachprod/install/BUILD.bazel index 502c705964c6..3e25985cac52 100644 --- a/pkg/roachprod/install/BUILD.bazel +++ b/pkg/roachprod/install/BUILD.bazel @@ -27,6 +27,7 @@ go_library( "//pkg/roachprod/ssh", "//pkg/roachprod/ui", "//pkg/roachprod/vm/aws", + "//pkg/roachprod/vm/local", "//pkg/util/envutil", "//pkg/util/httputil", "//pkg/util/log", @@ -41,7 +42,10 @@ go_library( go_test( name = "install_test", - srcs = ["start_template_test.go"], + srcs = [ + "cluster_synced_test.go", + "start_template_test.go", + ], data = glob(["testdata/**"]), embed = [":install"], deps = [ diff --git a/pkg/roachprod/install/cluster_synced.go b/pkg/roachprod/install/cluster_synced.go index 7f0acd47e265..704295910c25 100644 --- a/pkg/roachprod/install/cluster_synced.go +++ b/pkg/roachprod/install/cluster_synced.go @@ -36,6 +36,7 @@ import ( "github.com/cockroachdb/cockroach/pkg/roachprod/ssh" "github.com/cockroachdb/cockroach/pkg/roachprod/ui" "github.com/cockroachdb/cockroach/pkg/roachprod/vm/aws" + "github.com/cockroachdb/cockroach/pkg/roachprod/vm/local" "github.com/cockroachdb/cockroach/pkg/util/log" "github.com/cockroachdb/cockroach/pkg/util/syncutil" "github.com/cockroachdb/cockroach/pkg/util/timeutil" @@ -105,7 +106,11 @@ func (c *SyncedCluster) locality(index int) string { // TODO(tschottdorf): roachprod should cleanly encapsulate the home directory // which is currently the biggest culprit for awkward one-offs. func (c *SyncedCluster) IsLocal() bool { - return c.Name == config.Local + return config.IsLocalClusterName(c.Name) +} + +func (c *SyncedCluster) localVMDir(nodeIdx int) string { + return local.VMDir(c.Name, nodeIdx) } // ServerNodes is the fully expanded, ordered list of nodes that any given @@ -150,6 +155,51 @@ func (c *SyncedCluster) GetInternalIP(index int) (string, error) { return ip, nil } +// roachprodEnvValue returns the value of the ROACHPROD environment variable +// that is set when starting a process. This value is used to recognize the +// correct process, when monitoring or stopping. +// +// Normally, the value is of the form: +// [/][/tag] +// +// Examples: +// +// - non-local cluster without tags: +// ROACHPROD=1 +// +// - non-local cluster with tag foo: +// ROACHPROD=1/foo +// +// - non-local cluster with hierarchical tag foo/bar: +// ROACHPROD=1/foo/bar +// +// - local cluster: +// ROACHPROD=local-foo/1 +// +// - local cluster with tag bar: +// ROACHPROD=local-foo/1/bar +// +func (c *SyncedCluster) roachprodEnvValue(node int) string { + var parts []string + if c.IsLocal() { + parts = append(parts, c.Name) + } + parts = append(parts, fmt.Sprintf("%d", node)) + if c.Tag != "" { + parts = append(parts, c.Tag) + } + return strings.Join(parts, "/") +} + +// roachprodEnvRegex returns a regexp that matches the ROACHPROD value for the +// given node. +func (c *SyncedCluster) roachprodEnvRegex(node int) string { + escaped := strings.Replace(c.roachprodEnvValue(node), "/", "\\/", -1) + // We look for either a trailing space or a slash (in which case, we tolerate + // any remaining tag suffix). + return fmt.Sprintf(`ROACHPROD=%s[ \/]`, escaped) +} + // Start TODO(peter): document func (c *SyncedCluster) Start() { c.Impl.Start(c, c.Args) @@ -206,16 +256,15 @@ echo ">>> roachprod stop: $(date)" >> %[1]s/roachprod.log ps axeww -o pid -o command >> %[1]s/roachprod.log pids=$(ps axeww -o pid -o command | \ sed 's/export ROACHPROD=//g' | \ - awk '/ROACHPROD=(%[2]d%[3]s)[ \/]/ { print $1 }') + awk '/%[2]s/ { print $1 }') if [ -n "${pids}" ]; then - kill -%[4]d ${pids} -%[5]s + kill -%[3]d ${pids} +%[4]s fi`, - c.Impl.LogDir(c, c.Nodes[i]), // [1] - c.Nodes[i], // [2] - c.escapedTag(), // [3] - sig, // [4] - waitCmd, // [5] + c.Impl.LogDir(c, c.Nodes[i]), // [1] + c.roachprodEnvRegex(c.Nodes[i]), // [2] + sig, // [3] + waitCmd, // [4] ) return sess.CombinedOutput(cmd) }) @@ -240,7 +289,7 @@ func (c *SyncedCluster) Wipe(preserveCerts bool) { dirs = append(dirs, "certs*") } for _, dir := range dirs { - cmd += fmt.Sprintf(`rm -fr ${HOME}/local/%d/%s ;`, c.Nodes[i], dir) + cmd += fmt.Sprintf(`rm -fr %s/%s ;`, c.localVMDir(c.Nodes[i]), dir) } } else { cmd = `sudo find /mnt/data* -maxdepth 1 -type f -exec rm -f {} \; && @@ -270,8 +319,8 @@ func (c *SyncedCluster) Status() { binary := cockroachNodeBinary(c, c.Nodes[i]) cmd := fmt.Sprintf(`out=$(ps axeww -o pid -o ucomm -o command | \ sed 's/export ROACHPROD=//g' | \ - awk '/ROACHPROD=(%d%s)[ \/]/ {print $2, $1}'`, - c.Nodes[i], c.escapedTag()) + awk '/%s/ {print $2, $1}'`, + c.roachprodEnvRegex(c.Nodes[i])) cmd += ` | sort | uniq); vers=$(` + binary + ` version 2>/dev/null | awk '/Build Tag:/ {print $NF}') if [ -n "${out}" -a -n "${vers}" ]; then @@ -522,10 +571,10 @@ func (c *SyncedCluster) Run(stdout, stderr io.Writer, nodes []int, title, cmd st // // That command should return immediately. And a "roachprod status" should // reveal that the sleep command is running on the cluster. - nodeCmd := fmt.Sprintf(`export ROACHPROD=%d%s GOTRACEBACK=crash && bash -c %s`, - nodes[i], c.Tag, ssh.Escape1(expandedCmd)) + nodeCmd := fmt.Sprintf(`export ROACHPROD=%s GOTRACEBACK=crash && bash -c %s`, + c.roachprodEnvValue(nodes[i]), ssh.Escape1(expandedCmd)) if c.IsLocal() { - nodeCmd = fmt.Sprintf("cd ${HOME}/local/%d ; %s", nodes[i], nodeCmd) + nodeCmd = fmt.Sprintf("cd %s; %s", c.localVMDir(nodes[i]), nodeCmd) } if stream { @@ -827,7 +876,7 @@ fi func (c *SyncedCluster) DistributeCerts() { dir := "" if c.IsLocal() { - dir = `${HOME}/local/1` + dir = c.localVMDir(1) } // Check to see if the certs have already been initialized. @@ -895,7 +944,7 @@ func (c *SyncedCluster) DistributeCerts() { var cmd string if c.IsLocal() { - cmd = `cd ${HOME}/local/1 ; ` + cmd = fmt.Sprintf(`cd %s ; `, c.localVMDir(1)) } cmd += fmt.Sprintf(` rm -fr certs @@ -963,7 +1012,7 @@ tar cvf certs.tar certs sess.SetStdin(bytes.NewReader(certsTar)) var cmd string if c.IsLocal() { - cmd = fmt.Sprintf(`cd ${HOME}/local/%d ; `, nodes[i]) + cmd = fmt.Sprintf(`cd %s ; `, c.localVMDir(nodes[i])) } cmd += `tar xf -` if out, err := sess.CombinedOutput(cmd); err != nil { @@ -1086,7 +1135,7 @@ func (c *SyncedCluster) Put(src, dest string) { if filepath.IsAbs(dest) { to = dest } else { - to = fmt.Sprintf(os.ExpandEnv("${HOME}/local/%d/%s"), c.Nodes[i], dest) + to = filepath.Join(c.localVMDir(c.Nodes[i]), dest) } // Remove the destination if it exists, ignoring errors which we'll // handle via the os.Symlink() call. @@ -1394,7 +1443,7 @@ func (c *SyncedCluster) Get(src, dest string) { if c.IsLocal() { if !filepath.IsAbs(src) { - src = filepath.Join(fmt.Sprintf(os.ExpandEnv("${HOME}/local/%d"), c.Nodes[i]), src) + src = filepath.Join(c.localVMDir(c.Nodes[i]), src) } var copy func(src, dest string, info os.FileInfo) error @@ -1613,12 +1662,12 @@ func (c *SyncedCluster) SSH(sshArgs, args []string) error { allArgs = []string{ "/bin/bash", "-c", } - cmd := fmt.Sprintf("cd ${HOME}/local/%d ; ", c.Nodes[0]) + cmd := fmt.Sprintf("cd %s ; ", c.localVMDir(c.Nodes[0])) if len(args) == 0 /* interactive */ { cmd += "/bin/bash " } if len(args) > 0 { - cmd += fmt.Sprintf("export ROACHPROD=%d%s ; ", c.Nodes[0], c.Tag) + cmd += fmt.Sprintf("export ROACHPROD=%s ; ", c.roachprodEnvValue(c.Nodes[0])) cmd += strings.Join(expandedArgs, " ") } allArgs = append(allArgs, cmd) @@ -1632,7 +1681,9 @@ func (c *SyncedCluster) SSH(sshArgs, args []string) error { allArgs = append(allArgs, sshAuthArgs()...) allArgs = append(allArgs, sshArgs...) if len(args) > 0 { - allArgs = append(allArgs, fmt.Sprintf("export ROACHPROD=%d%s ;", c.Nodes[0], c.Tag)) + allArgs = append(allArgs, fmt.Sprintf( + "export ROACHPROD=%s ;", c.roachprodEnvValue(c.Nodes[0]), + )) } allArgs = append(allArgs, expandedArgs...) } @@ -1795,10 +1846,6 @@ func (c *SyncedCluster) ParallelE( return nil, nil } -func (c *SyncedCluster) escapedTag() string { - return strings.Replace(c.Tag, "/", "\\/", -1) -} - // Init initializes the cluster. It does it through node 1 (as per ServerNodes) // to maintain parity with auto-init behavior of `roachprod start` (when // --skip-init) is not specified. The implementation should be kept in diff --git a/pkg/roachprod/install/cluster_synced_test.go b/pkg/roachprod/install/cluster_synced_test.go new file mode 100644 index 000000000000..ce5dc7c0962f --- /dev/null +++ b/pkg/roachprod/install/cluster_synced_test.go @@ -0,0 +1,77 @@ +// Copyright 2021 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package install + +import ( + "fmt" + "testing" +) + +// TestRoachprodEnv tests the roachprodEnvRegex and roachprodEnvValue methods. +func TestRoachprodEnv(t *testing.T) { + cases := []struct { + clusterName string + node int + tag string + value string + regex string + }{ + { + clusterName: "a", + node: 1, + tag: "", + value: "1", + regex: `ROACHPROD=1[ \/]`, + }, + { + clusterName: "local-foo", + node: 2, + tag: "", + value: "local-foo/2", + regex: `ROACHPROD=local-foo\/2[ \/]`, + }, + { + clusterName: "a", + node: 3, + tag: "foo", + value: "3/foo", + regex: `ROACHPROD=3\/foo[ \/]`, + }, + { + clusterName: "a", + node: 4, + tag: "foo/bar", + value: "4/foo/bar", + regex: `ROACHPROD=4\/foo\/bar[ \/]`, + }, + { + clusterName: "local-foo", + node: 5, + tag: "tag", + value: "local-foo/5/tag", + regex: `ROACHPROD=local-foo\/5\/tag[ \/]`, + }, + } + + for idx, tc := range cases { + t.Run(fmt.Sprintf("%d", idx+1), func(t *testing.T) { + var c SyncedCluster + c.Name = tc.clusterName + c.Tag = tc.tag + if value := c.roachprodEnvValue(tc.node); value != tc.value { + t.Errorf("expected value `%s`, got `%s`", tc.value, value) + } + if regex := c.roachprodEnvRegex(tc.node); regex != tc.regex { + t.Errorf("expected regex `%s`, got `%s`", tc.regex, regex) + } + }) + } +} diff --git a/pkg/roachprod/install/cockroach.go b/pkg/roachprod/install/cockroach.go index 64271b46ba9d..e54411fae109 100644 --- a/pkg/roachprod/install/cockroach.go +++ b/pkg/roachprod/install/cockroach.go @@ -57,7 +57,7 @@ func cockroachNodeBinary(c *SyncedCluster, node int) string { return "./" + config.Binary } - path := filepath.Join(fmt.Sprintf(os.ExpandEnv("${HOME}/local/%d"), node), config.Binary) + path := filepath.Join(c.localVMDir(node), config.Binary) if _, err := os.Stat(path); err == nil { return path } @@ -240,27 +240,25 @@ func (Cockroach) NodeDir(c *SyncedCluster, index, storeIndex int) string { if storeIndex != 1 { panic("Cockroach.NodeDir only supports one store for local deployments") } - return os.ExpandEnv(fmt.Sprintf("${HOME}/local/%d/data", index)) + return filepath.Join(c.localVMDir(index), "data") } return fmt.Sprintf("/mnt/data%d/cockroach", storeIndex) } // LogDir implements the ClusterImpl.NodeDir interface. func (Cockroach) LogDir(c *SyncedCluster, index int) string { - dir := "logs" if c.IsLocal() { - dir = os.ExpandEnv(fmt.Sprintf("${HOME}/local/%d/logs", index)) + return filepath.Join(c.localVMDir(index), "logs") } - return dir + return "logs" } // CertsDir implements the ClusterImpl.NodeDir interface. func (Cockroach) CertsDir(c *SyncedCluster, index int) string { - dir := "certs" if c.IsLocal() { - dir = os.ExpandEnv(fmt.Sprintf("${HOME}/local/%d/certs", index)) + return filepath.Join(c.localVMDir(index), "certs") } - return dir + return "certs" } // NodeURL implements the ClusterImpl.NodeDir interface. @@ -325,7 +323,7 @@ func (r Cockroach) SQL(c *SyncedCluster, args []string) error { var cmd string if c.IsLocal() { - cmd = fmt.Sprintf(`cd ${HOME}/local/%d ; `, c.Nodes[nodeIdx]) + cmd = fmt.Sprintf(`cd %s ; `, c.localVMDir(c.Nodes[nodeIdx])) } cmd += cockroachNodeBinary(c, c.Nodes[nodeIdx]) + " sql --url " + r.NodeURL(c, "localhost", r.NodePort(c, c.Nodes[nodeIdx])) + " " + @@ -378,7 +376,7 @@ func (h *crdbInstallHelper) startNode( sess.SetStdin(strings.NewReader(startCmd)) var cmd string if h.c.IsLocal() { - cmd = fmt.Sprintf(`cd ${HOME}/local/%d ; `, nodes[nodeIdx]) + cmd = fmt.Sprintf(`cd %s ; `, h.c.localVMDir(nodes[nodeIdx])) } cmd += `cat > cockroach.sh && chmod +x cockroach.sh` if out, err := sess.CombinedOutput(cmd); err != nil { @@ -398,7 +396,7 @@ func (h *crdbInstallHelper) startNode( var cmd string if h.c.IsLocal() { - cmd = fmt.Sprintf(`cd ${HOME}/local/%d ; `, nodes[nodeIdx]) + cmd = fmt.Sprintf(`cd %s ; `, h.c.localVMDir(nodes[nodeIdx])) } cmd += "./cockroach.sh" out, err := sess.CombinedOutput(cmd) @@ -429,8 +427,8 @@ func (h *crdbInstallHelper) generateStartCmd( return execStartTemplate(startTemplateData{ LogDir: h.c.Impl.LogDir(h.c, nodes[nodeIdx]), KeyCmd: h.generateKeyCmd(nodeIdx, extraArgs), - Tag: h.c.Tag, EnvVars: append(append([]string{ + fmt.Sprintf("ROACHPROD=%s", h.c.roachprodEnvValue(nodes[nodeIdx])), "GOTRACEBACK=crash", "COCKROACH_SKIP_ENABLING_DIAGNOSTIC_REPORTING=1", }, h.c.Env...), h.getEnvVars()...), @@ -438,17 +436,21 @@ func (h *crdbInstallHelper) generateStartCmd( StartCmd: startCmd, Args: args, MemoryMax: config.MemoryMax, - NodeNum: nodes[nodeIdx], Local: h.c.IsLocal(), AdvertiseFirstIP: advertiseFirstIP, }) } type startTemplateData struct { - LogDir, KeyCmd, Tag, Binary, StartCmd, MemoryMax string - EnvVars, Args []string - NodeNum int - Local, AdvertiseFirstIP bool + Local bool + AdvertiseFirstIP bool + LogDir string + Binary string + StartCmd string + KeyCmd string + MemoryMax string + Args []string + EnvVars []string } func execStartTemplate(data startTemplateData) (string, error) { @@ -627,7 +629,7 @@ func (h *crdbInstallHelper) generateClusterSettingCmd(nodeIdx int) string { var clusterSettingCmd string if h.c.IsLocal() { - clusterSettingCmd = `cd ${HOME}/local/1 ; ` + clusterSettingCmd = fmt.Sprintf(`cd %s ; `, h.c.localVMDir(1)) } binary := cockroachNodeBinary(h.c, nodes[nodeIdx]) @@ -652,7 +654,7 @@ func (h *crdbInstallHelper) generateInitCmd(nodeIdx int) string { var initCmd string if h.c.IsLocal() { - initCmd = `cd ${HOME}/local/1 ; ` + initCmd = fmt.Sprintf(`cd %s ; `, h.c.localVMDir(1)) } path := fmt.Sprintf("%s/%s", h.c.Impl.NodeDir(h.c, nodes[nodeIdx], 1 /* storeIndex */), "cluster-bootstrapped") diff --git a/pkg/roachprod/install/download.go b/pkg/roachprod/install/download.go index 43f5373b9cca..e7809a3af23f 100644 --- a/pkg/roachprod/install/download.go +++ b/pkg/roachprod/install/download.go @@ -17,6 +17,8 @@ import ( "os" "path" "path/filepath" + + "github.com/cockroachdb/cockroach/pkg/roachprod/vm/local" ) const ( @@ -71,11 +73,10 @@ func Download(c *SyncedCluster, sourceURLStr string, sha string, dest string) er return err } - // If we are local and the destination is relative, then copy - // the file from the download node to the other nodes + // If we are local and the destination is relative, then copy the file from + // the download node to the other nodes. if c.IsLocal() && !filepath.IsAbs(dest) { - // ~/local/1/./bar.txt - src := fmt.Sprintf(os.ExpandEnv("${HOME}/local/%d/%s"), downloadNodes[0], dest) + src := filepath.Join(local.VMDir(c.Name, downloadNodes[0]), dest) cpCmd := fmt.Sprintf(`cp "%s" "%s"`, src, dest) return c.Run(os.Stdout, os.Stderr, c.Nodes[1:], "copying to remaining nodes", cpCmd) } diff --git a/pkg/roachprod/install/scripts/start.sh b/pkg/roachprod/install/scripts/start.sh index 76e6d76f5255..1166ff854c5d 100644 --- a/pkg/roachprod/install/scripts/start.sh +++ b/pkg/roachprod/install/scripts/start.sh @@ -17,8 +17,6 @@ set -euo pipefail LOCAL=#{if .Local#}true#{end#} ADVERTISE_FIRST_IP=#{if .AdvertiseFirstIP#}true#{end#} LOG_DIR=#{shesc .LogDir#} -NODE_NUM=#{shesc .NodeNum#} -TAG=#{shesc .Tag#} BINARY=#{shesc .Binary#} START_CMD=#{shesc .StartCmd#} KEY_CMD=#{.KeyCmd#} @@ -36,7 +34,6 @@ ENV_VARS=( # End of templated code. -ENV_VARS+=("ROACHPROD=${NODE_NUM}${TAG}") if [[ -n "${ADVERTISE_FIRST_IP}" ]]; then ARGS+=("--advertise-host" "$(hostname -I | awk '{print $1}')") fi diff --git a/pkg/roachprod/install/start_template_test.go b/pkg/roachprod/install/start_template_test.go index c4471b096e05..ae6ce8758bd3 100644 --- a/pkg/roachprod/install/start_template_test.go +++ b/pkg/roachprod/install/start_template_test.go @@ -23,13 +23,11 @@ func TestExecStartTemplate(t *testing.T) { LogDir: "./path with spaces/logs/$THIS_DOES_NOT_EVER_GET_EXPANDED", KeyCmd: `echo foo && \ echo bar $HOME`, - Tag: "tigtag", - EnvVars: []string{"COCKROACH=foo", "ROCKCOACH=17%"}, + EnvVars: []string{"ROACHPROD=1/tigtag", "COCKROACH=foo", "ROCKCOACH=17%"}, Binary: "./cockroach", StartCmd: "start-single-node", Args: []string{`--log "file-defaults: {dir: '/path with spaces/logs', exit-on-error: false}"`}, MemoryMax: "81%", - NodeNum: 1, Local: true, } datadriven.Walk(t, filepath.Join("testdata", "start"), func(t *testing.T, path string) { diff --git a/pkg/roachprod/install/testdata/start/start.txt b/pkg/roachprod/install/testdata/start/start.txt index c2aba756b160..1db454a021ea 100644 --- a/pkg/roachprod/install/testdata/start/start.txt +++ b/pkg/roachprod/install/testdata/start/start.txt @@ -20,8 +20,6 @@ set -euo pipefail LOCAL=true ADVERTISE_FIRST_IP= LOG_DIR='./path with spaces/logs/$THIS_DOES_NOT_EVER_GET_EXPANDED' -NODE_NUM=1 -TAG=tigtag BINARY=./cockroach START_CMD=start-single-node KEY_CMD=echo foo && \ @@ -31,13 +29,13 @@ ARGS=( '--log "file-defaults: {dir: '"'"'/path with spaces/logs'"'"', exit-on-error: false}"' ) ENV_VARS=( +ROACHPROD=1/tigtag COCKROACH=foo ROCKCOACH=17% ) # End of templated code. -ENV_VARS+=("ROACHPROD=${NODE_NUM}${TAG}") if [[ -n "${ADVERTISE_FIRST_IP}" ]]; then ARGS+=("--advertise-host" "$(hostname -I | awk '{print $1}')") fi diff --git a/pkg/roachprod/roachprod.go b/pkg/roachprod/roachprod.go index 9bccd417ffdb..782cd8dd20e2 100644 --- a/pkg/roachprod/roachprod.go +++ b/pkg/roachprod/roachprod.go @@ -56,9 +56,6 @@ func verifyClusterName(clusterName, username string) error { if clusterName == "" { return fmt.Errorf("cluster name cannot be blank") } - if clusterName == config.Local { - return nil - } alphaNum, err := regexp.Compile(`^[a-zA-Z0-9\-]+$`) if err != nil { @@ -68,6 +65,10 @@ func verifyClusterName(clusterName, username string) error { return errors.Errorf("cluster name must match %s", alphaNum.String()) } + if config.IsLocalClusterName(clusterName) { + return nil + } + // Use the vm.Provider account names, or --username. var accounts []string if len(username) > 0 { @@ -207,9 +208,7 @@ Available clusters: c.CertsDir = opts.CertsDir c.Env = opts.Env c.Args = opts.Args - if opts.Tag != "" { - c.Tag = "/" + opts.Tag - } + c.Tag = opts.Tag c.UseTreeDist = opts.UseTreeDist c.Quiet = opts.Quiet || !term.IsTerminal(int(os.Stdout.Fd())) c.MaxConcurrency = opts.MaxConcurrency @@ -513,7 +512,7 @@ func Reset(clusterOpts install.SyncedCluster, numNodes int, username string) err return err } - if clusterName == config.Local { + if config.IsLocalClusterName(clusterName) { return nil } @@ -969,67 +968,70 @@ func Pprof( } // Destroy TODO -func Destroy(clusters []install.SyncedCluster, destroyAllMine bool, username string) error { - type cloudAndName struct { - name string - cld *cloud.Cloud - } - var cns []cloudAndName - switch len(clusters) { - case 0: - if !destroyAllMine { - return errors.New("no cluster name provided") +func Destroy( + clusters []install.SyncedCluster, destroyAllMine bool, destroyAllLocal bool, username string, +) error { + var clusterNames []string + // We want to avoid running ListCloud() if we are only trying to destroy a + // local cluster. + var cld *cloud.Cloud + + switch { + case destroyAllMine: + if len(clusters) != 0 { + return errors.New("--all-mine cannot be combined with cluster names") + } + if destroyAllLocal { + return errors.New("--all-mine cannot be combined with --all-local") } - destroyPattern, err := userClusterNameRegexp() if err != nil { return err } - - cld, err := cloud.ListCloud() + cld, err = cloud.ListCloud() if err != nil { return err } + clusters := cld.Clusters.FilterByName(destroyPattern) + clusterNames = clusters.Names() - for name := range cld.Clusters { - if destroyPattern.MatchString(name) { - cns = append(cns, cloudAndName{name: name, cld: cld}) - } + case destroyAllLocal: + if len(clusters) != 0 { + return errors.New("--all-local cannot be combined with cluster names") } + clusterNames = local.Clusters() + default: - if destroyAllMine { - return errors.New("--all-mine cannot be combined with cluster names") + if len(clusters) == 0 { + return errors.New("no cluster name provided") } - var cld *cloud.Cloud for _, cluster := range clusters { - clusterName := cluster.Name - if err := verifyClusterName(clusterName, username); err != nil { + if err := verifyClusterName(cluster.Name, username); err != nil { return err } + clusterNames = append(clusterNames, cluster.Name) + } + } - if !local.IsLocal(clusterName) { - if cld == nil { - var err error - cld, err = cloud.ListCloud() - if err != nil { - return err - } - } - - cns = append(cns, cloudAndName{name: clusterName, cld: cld}) - } else { - if err := destroyLocalCluster(cluster); err != nil { + if err := ctxgroup.GroupWorkers( + context.TODO(), + len(clusterNames), + func(ctx context.Context, idx int) error { + name := clusterNames[idx] + if config.IsLocalClusterName(name) { + return destroyLocalCluster(name) + } + if cld == nil { + var err error + cld, err = cloud.ListCloud() + if err != nil { return err } } - } - } - - if err := ctxgroup.GroupWorkers(context.TODO(), len(cns), func(ctx context.Context, idx int) error { - return destroyCluster(cns[idx].cld, cns[idx].name) - }); err != nil { + return destroyCluster(cld, name) + }); err != nil { return err } fmt.Println("OK") @@ -1045,16 +1047,17 @@ func destroyCluster(cld *cloud.Cloud, clusterName string) error { return cloud.DestroyCluster(c) } -func destroyLocalCluster(clusterOpts install.SyncedCluster) error { - if _, ok := install.Clusters[clusterOpts.Name]; !ok { - return fmt.Errorf("cluster %s does not exist", clusterOpts.Name) +func destroyLocalCluster(clusterName string) error { + cluster, ok := install.Clusters[clusterName] + if !ok { + return fmt.Errorf("cluster %s does not exist", clusterName) } - c, err := newCluster(clusterOpts) + c, err := newCluster(*cluster) if err != nil { return err } c.Wipe(false) - return local.DeleteCluster(clusterOpts.Name) + return local.DeleteCluster(clusterName) } type clusterAlreadyExistsError struct { @@ -1099,7 +1102,7 @@ func Create( createVMOpts.ClusterName = clusterName defer func() { - if retErr == nil || clusterName == config.Local { + if retErr == nil || config.IsLocalClusterName(clusterName) { return } if errors.HasType(retErr, (*clusterAlreadyExistsError)(nil)) { @@ -1113,7 +1116,7 @@ func Create( } }() - if clusterName != config.Local { + if !config.IsLocalClusterName(clusterName) { cld, err := cloud.ListCloud() if err != nil { return err @@ -1145,14 +1148,8 @@ func Create( return createErr } - // Just create directories for the local cluster as there's no need for ssh. - if clusterName == config.Local { - for i := 0; i < numNodes; i++ { - err := os.MkdirAll(fmt.Sprintf(os.ExpandEnv("${HOME}/local/%d"), i+1), 0755) - if err != nil { - return err - } - } + if config.IsLocalClusterName(clusterName) { + // No need for ssh for local clusters. return nil } return SetupSSH(clusterOpts, username) diff --git a/pkg/roachprod/vm/local/BUILD.bazel b/pkg/roachprod/vm/local/BUILD.bazel index 370d700618f3..cc5b12b9d871 100644 --- a/pkg/roachprod/vm/local/BUILD.bazel +++ b/pkg/roachprod/vm/local/BUILD.bazel @@ -9,6 +9,7 @@ go_library( "//pkg/roachprod/cloud", "//pkg/roachprod/config", "//pkg/roachprod/vm", + "//pkg/util", "//pkg/util/timeutil", "@com_github_cockroachdb_errors//:errors", "@com_github_spf13_pflag//:pflag", diff --git a/pkg/roachprod/vm/local/local.go b/pkg/roachprod/vm/local/local.go index 54848686f091..a7be86ce42cc 100644 --- a/pkg/roachprod/vm/local/local.go +++ b/pkg/roachprod/vm/local/local.go @@ -13,11 +13,14 @@ package local import ( "fmt" "os" + "path/filepath" + "strings" "time" "github.com/cockroachdb/cockroach/pkg/roachprod/cloud" "github.com/cockroachdb/cockroach/pkg/roachprod/config" "github.com/cockroachdb/cockroach/pkg/roachprod/vm" + "github.com/cockroachdb/cockroach/pkg/util" "github.com/cockroachdb/cockroach/pkg/util/timeutil" "github.com/cockroachdb/errors" "github.com/spf13/pflag" @@ -26,6 +29,29 @@ import ( // ProviderName is config.Local. const ProviderName = config.Local +// VMDir returns the local directory for a given node in a cluster. +// Node indexes start at 1. +// +// If the cluster name is "local", node 1 directory is: +// ${HOME}/local/1 +// +// If the cluster name is "local-foo", node 1 directory is: +// ${HOME}/local/foo-1 +// +// WARNING: when we destroy a local cluster, we remove these directories so it's +// important that this function never returns things like "" or "/". +func VMDir(clusterName string, nodeIdx int) string { + if nodeIdx < 1 { + panic("invalid nodeIdx") + } + localDir := os.ExpandEnv("${HOME}/local") + if clusterName == config.Local { + return filepath.Join(localDir, fmt.Sprintf("%d", nodeIdx)) + } + name := strings.TrimPrefix(clusterName, config.Local+"-") + return filepath.Join(localDir, fmt.Sprintf("%s-%d", name, nodeIdx)) +} + // Init initializes the Local provider and registers it into vm.Providers. func Init(storage VMStorage) { vm.Providers[ProviderName] = &Provider{ @@ -34,11 +60,6 @@ func Init(storage VMStorage) { } } -// IsLocal returns true if the given cluster name is a local cluster. -func IsLocal(clusterName string) bool { - return clusterName == config.Local -} - // AddCluster adds the metadata of a local cluster; used when loading the saved // metadata for local clusters. func AddCluster(cluster *cloud.Cluster) { @@ -54,10 +75,10 @@ func DeleteCluster(name string) error { if c == nil { return fmt.Errorf("local cluster %s does not exist", name) } + fmt.Printf("Deleting local cluster %s\n", name) for i := range c.VMs { - err := os.RemoveAll(fmt.Sprintf(os.ExpandEnv("${HOME}/local/%d"), i+1)) - if err != nil { + if err := os.RemoveAll(VMDir(c.Name, i+1)); err != nil { return err } } @@ -70,6 +91,12 @@ func DeleteCluster(name string) error { return nil } +// Clusters returns a list of all known local clusters. +func Clusters() []string { + p := vm.Providers[ProviderName].(*Provider) + return p.clusters.Names() +} + // VMStorage is the interface for saving metadata for local clusters. type VMStorage interface { // SaveCluster saves the metadata for a local cluster. It is expected that @@ -122,21 +149,56 @@ func (p *Provider) Create(names []string, opts vm.CreateOpts) error { Lifetime: time.Hour, VMs: make(vm.List, len(names)), } + + if !config.IsLocalClusterName(c.Name) { + return errors.Errorf("'%s' is not a valid local cluster name", c.Name) + } + + // We will need to assign ports to the nodes, and they must not conflict with + // any other local clusters. + var portsTaken util.FastIntSet + for _, c := range p.clusters { + for i := range c.VMs { + portsTaken.Add(c.VMs[i].SQLPort) + portsTaken.Add(c.VMs[i].AdminUIPort) + } + } + sqlPort := config.DefaultSQLPort + adminUIPort := config.DefaultAdminUIPort + + // getPort returns the first available port (starting at *port), and modifies + // (*port) to be the following value. + getPort := func(port *int) int { + for portsTaken.Contains(*port) { + (*port)++ + } + result := *port + portsTaken.Add(result) + (*port)++ + return result + } + for i := range names { c.VMs[i] = vm.VM{ - Name: "localhost", - CreatedAt: now, - Lifetime: time.Hour, - PrivateIP: "127.0.0.1", - Provider: ProviderName, - ProviderID: ProviderName, - PublicIP: "127.0.0.1", - RemoteUser: config.OSUser.Username, - VPC: ProviderName, - MachineType: ProviderName, - Zone: ProviderName, - SQLPort: config.DefaultSQLPort + 2*i, - AdminUIPort: config.DefaultAdminUIPort + 2*i, + Name: "localhost", + CreatedAt: now, + Lifetime: time.Hour, + PrivateIP: "127.0.0.1", + Provider: ProviderName, + ProviderID: ProviderName, + PublicIP: "127.0.0.1", + RemoteUser: config.OSUser.Username, + VPC: ProviderName, + MachineType: ProviderName, + Zone: ProviderName, + SQLPort: getPort(&sqlPort), + AdminUIPort: getPort(&adminUIPort), + LocalClusterName: c.Name, + } + + err := os.MkdirAll(VMDir(c.Name, i+1), 0755) + if err != nil { + return err } } if err := p.storage.SaveCluster(c); err != nil { diff --git a/pkg/roachprod/vm/vm.go b/pkg/roachprod/vm/vm.go index e37cb7e5815b..1a61104759c0 100644 --- a/pkg/roachprod/vm/vm.go +++ b/pkg/roachprod/vm/vm.go @@ -66,6 +66,9 @@ type VM struct { // HTTP traffic for the Admin UI. // Usually config.DefaultAdminUIPort, except for local clusters. AdminUIPort int `json:"adminui_port"` + + // LocalClusterName is only set for VMs in a local cluster. + LocalClusterName string `json:"local_cluster_name,omitempty"` } // Name generates the name for the i'th node in a cluster.