Skip to content

Commit

Permalink
roachprod: support multiple local clusters
Browse files Browse the repository at this point in the history
This change adds support for multiple local clusters. Local cluster
names must be either "local" or of the form "local-foo".

When the cluster is named "local", the node directories stay in the
same place, e.g. `~/local/1`. If the cluster is named "local-foo",
node directories are like `~/local/foo-1`.

For local clusters we include the cluster name in the ROACHPROD
variable; this is necessary to distinguish between processes of
different local clusters. The relevant code is cleaned up to
centralize the logic related to the ROACHPROD variable.

Fixes cockroachdb#71945.

Release note: None

meh
  • Loading branch information
RaduBerinde committed Nov 3, 2021
1 parent f3453be commit 31c09fd
Show file tree
Hide file tree
Showing 20 changed files with 417 additions and 165 deletions.
1 change: 1 addition & 0 deletions pkg/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,7 @@ ALL_TESTS = [
"//pkg/roachpb:roachpb_test",
"//pkg/roachpb:string_test",
"//pkg/roachprod/cloud:cloud_test",
"//pkg/roachprod/config:config_test",
"//pkg/roachprod/install:install_test",
"//pkg/roachprod/ssh:ssh_test",
"//pkg/roachprod/vm:vm_test",
Expand Down
33 changes: 18 additions & 15 deletions pkg/cmd/roachprod/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ var (
username string
dryrun bool
destroyAllMine bool
destroyAllLocal bool
extendLifetime time.Duration
wipePreserveCerts bool
listDetails bool
Expand Down Expand Up @@ -230,26 +231,27 @@ if the user would like to update the keys on the remote hosts.
}

var destroyCmd = &cobra.Command{
Use: "destroy [ --all-mine | <cluster 1> [<cluster 2> ...] ]",
Use: "destroy [ --all-mine | --all-local | <cluster 1> [<cluster 2> ...] ]",
Short: "destroy clusters",
Long: `Destroy one or more local or cloud-based clusters.
The destroy command accepts the names of the clusters to destroy. Alternatively,
the --all-mine flag can be provided to destroy all clusters that are owned by the
current user.
the --all-mine flag can be provided to destroy all (non-local) clusters that are
owned by the current user, or the --all-local flag can be provided to destroy
all local clusters.
Destroying a cluster releases the resources for a cluster. For a cloud-based
cluster the machine and associated disk resources are freed. For a local
cluster, any processes started by roachprod are stopped, and the ${HOME}/local
directory is removed.
cluster, any processes started by roachprod are stopped, and the node
directories inside ${HOME}/local directory are removed.
`,
Args: cobra.ArbitraryArgs,
Run: wrap(func(cmd *cobra.Command, args []string) error {
var clusters []install.SyncedCluster
for _, clusterName := range args {
clusters = append(clusters, clusterOpts(clusterName))
}
return roachprod.Destroy(clusters, destroyAllMine, username)
return roachprod.Destroy(clusters, destroyAllMine, destroyAllLocal, username)
}),
}

Expand Down Expand Up @@ -479,17 +481,16 @@ var stopCmd = &cobra.Command{
Stop roachprod created processes running on the nodes in a cluster, including
processes started by the "start", "run" and "ssh" commands. Every process
started by roachprod is tagged with a ROACHPROD=<node> environment variable
which is used by "stop" to locate the processes and terminate them. By default
processes are killed with signal 9 (SIGKILL) giving them no chance for a graceful
exit.
started by roachprod is tagged with a ROACHPROD environment variable which is
used by "stop" to locate the processes and terminate them. By default processes
are killed with signal 9 (SIGKILL) giving them no chance for a graceful exit.
The --sig flag will pass a signal to kill to allow us finer control over how we
shutdown cockroach. The --wait flag causes stop to loop waiting for all
processes with the ROACHPROD=<node> environment variable to exit. Note that
stop will wait forever if you specify --wait with a non-terminating signal
(e.g. SIGHUP). --wait defaults to true for signal 9 (SIGKILL) and false for all
other signals.
processes with the right ROACHPROD environment variable to exit. Note that stop
will wait forever if you specify --wait with a non-terminating signal (e.g.
SIGHUP). --wait defaults to true for signal 9 (SIGKILL) and false for all other
signals.
` + tagHelp + `
`,
Args: cobra.ExactArgs(1),
Expand Down Expand Up @@ -1005,7 +1006,9 @@ func main() {
}

destroyCmd.Flags().BoolVarP(&destroyAllMine,
"all-mine", "m", false, "Destroy all clusters belonging to the current user")
"all-mine", "m", false, "Destroy all non-local clusters belonging to the current user")
destroyCmd.Flags().BoolVarP(&destroyAllLocal,
"all-local", "l", false, "Destroy all local clusters")

extendCmd.Flags().DurationVarP(&extendLifetime,
"lifetime", "l", 12*time.Hour, "Lifetime of the cluster")
Expand Down
9 changes: 5 additions & 4 deletions pkg/roachprod/cloud/cluster_cloud.go
Original file line number Diff line number Diff line change
Expand Up @@ -150,16 +150,17 @@ func (c *Cluster) PrintDetails() {
}
}

// IsLocal TODO(peter): document
// IsLocal returns true if c is a local cluster.
func (c *Cluster) IsLocal() bool {
return c.Name == config.Local
return config.IsLocalClusterName(c.Name)
}

const vmNameFormat = "user-<clusterid>-<nodeid>"

func namesFromVM(v vm.VM) (string, string, error) {
// namesFromVM determines the user name and the cluster name from a VM.
func namesFromVM(v vm.VM) (userName string, clusterName string, _ error) {
if v.IsLocal() {
return config.Local, config.Local, nil
return config.Local, v.LocalClusterName, nil
}
name := v.Name
parts := strings.Split(name, "-")
Expand Down
2 changes: 1 addition & 1 deletion pkg/roachprod/cloud/gc.go
Original file line number Diff line number Diff line change
Expand Up @@ -277,7 +277,7 @@ func GCClusters(cloud *Cloud, dryrun bool) error {

var names []string
for name := range cloud.Clusters {
if name != config.Local {
if !config.IsLocalClusterName(name) {
names = append(names, name)
}
}
Expand Down
2 changes: 1 addition & 1 deletion pkg/roachprod/clusters_cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ func LoadClusters() error {

install.Clusters[sc.Name] = sc

if local.IsLocal(c.Name) {
if config.IsLocalClusterName(c.Name) {
// Add the local cluster to the local provider.
local.AddCluster(c)
}
Expand Down
8 changes: 7 additions & 1 deletion pkg/roachprod/config/BUILD.bazel
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
load("@io_bazel_rules_go//go:def.bzl", "go_library")
load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test")

go_library(
name = "config",
Expand All @@ -7,3 +7,9 @@ go_library(
visibility = ["//visibility:public"],
deps = ["//pkg/util/log"],
)

go_test(
name = "config_test",
srcs = ["config_test.go"],
embed = [":config"],
)
14 changes: 13 additions & 1 deletion pkg/roachprod/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ package config
import (
"context"
"os/user"
"regexp"

"github.com/cockroachdb/cockroach/pkg/util/log"
)
Expand Down Expand Up @@ -41,7 +42,8 @@ const (
// EmailDomain is used to form the full account name for GCE and Slack.
EmailDomain = "@cockroachlabs.com"

// Local is the name of the local cluster.
// Local is the prefix used to identify local clusters.
// It is also used as the zone for local clusters.
Local = "local"

// ClustersDir is the directory where we cache information about clusters.
Expand All @@ -62,3 +64,13 @@ const (
// listening for HTTP connections for the Admin UI.
DefaultAdminUIPort = 26258
)

// IsLocalClusterName returns true if the given name is a valid name for a local
// cluster.
//
// Local cluster names are either "local" or start with a "local-" prefix.
func IsLocalClusterName(clusterName string) bool {
return localClusterRegex.MatchString(clusterName)
}

var localClusterRegex = regexp.MustCompile(`^local(|-[a-zA-Z0-9\-]+)$`)
42 changes: 42 additions & 0 deletions pkg/roachprod/config/config_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
// Copyright 2021 The Cockroach Authors.
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.txt.
//
// As of the Change Date specified in that file, in accordance with
// the Business Source License, use of this software will be governed
// by the Apache License, Version 2.0, included in the file
// licenses/APL.txt.

package config

import "testing"

func TestIsLocalClusterName(t *testing.T) {
yes := []string{
"local",
"local-1",
"local-foo",
"local-foo-bar-123-aZy",
}
no := []string{
"loca",
"locall",
"local1",
"local-",
"local-foo?",
"local-foo/",
}

for _, s := range yes {
if !IsLocalClusterName(s) {
t.Errorf("expected '%s' to be a valid local cluster name", s)
}
}

for _, s := range no {
if IsLocalClusterName(s) {
t.Errorf("expected '%s' to not be a valid local cluster name", s)
}
}
}
6 changes: 5 additions & 1 deletion pkg/roachprod/install/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ go_library(
"//pkg/roachprod/ssh",
"//pkg/roachprod/ui",
"//pkg/roachprod/vm/aws",
"//pkg/roachprod/vm/local",
"//pkg/util/envutil",
"//pkg/util/httputil",
"//pkg/util/log",
Expand All @@ -41,7 +42,10 @@ go_library(

go_test(
name = "install_test",
srcs = ["start_template_test.go"],
srcs = [
"cluster_synced_test.go",
"start_template_test.go",
],
data = glob(["testdata/**"]),
embed = [":install"],
deps = [
Expand Down
Loading

0 comments on commit 31c09fd

Please sign in to comment.