From 0aa7db1b75f052aef354308bae0cd2800be9adf9 Mon Sep 17 00:00:00 2001 From: Bhaskarjyoti Bora Date: Fri, 13 Dec 2024 17:25:36 +0530 Subject: [PATCH] roachprod: move command init out of main This PR refactors roachprod to move the commands out from the main.go to a separate package. Along with the commands, the flags are also moved. Separating the sub commands to a separate package will help us in better code maintenance and easier integration of these commands with other flows like drtprod. Also, initialisation outside of main can help us in making the commands unit testable. Not that, with this PR, there is no change in functionality of roachprod. The code will be further enhance for maintainability by separating the commands from commands.go into different categories (like cluster provisioning, datadog, prometheus) and make them separate go files. Epic: none Release note: None --- pkg/BUILD.bazel | 2 +- pkg/cmd/roachprod/BUILD.bazel | 31 +- pkg/cmd/roachprod/cli/BUILD.bazel | 45 + .../roachprod/{ => cli}/bash_completion.go | 12 +- pkg/cmd/roachprod/cli/commands.go | 2204 +++++++++++++++++ pkg/cmd/roachprod/{ => cli}/flags.go | 389 ++- pkg/cmd/roachprod/cli/handlers.go | 90 + pkg/cmd/roachprod/cli/resgistry.go | 74 + pkg/cmd/roachprod/{update => cli}/update.go | 4 +- pkg/cmd/roachprod/cli/util.go | 240 ++ pkg/cmd/roachprod/main.go | 2100 +--------------- pkg/cmd/roachprod/update/BUILD.bazel | 17 - pkg/cmd/roachprod/update/util.go | 93 - 13 files changed, 2860 insertions(+), 2441 deletions(-) create mode 100644 pkg/cmd/roachprod/cli/BUILD.bazel rename pkg/cmd/roachprod/{ => cli}/bash_completion.go (77%) create mode 100644 pkg/cmd/roachprod/cli/commands.go rename pkg/cmd/roachprod/{ => cli}/flags.go (68%) create mode 100644 pkg/cmd/roachprod/cli/handlers.go create mode 100644 pkg/cmd/roachprod/cli/resgistry.go rename pkg/cmd/roachprod/{update => cli}/update.go (96%) create mode 100644 pkg/cmd/roachprod/cli/util.go delete mode 100644 pkg/cmd/roachprod/update/BUILD.bazel delete mode 100644 pkg/cmd/roachprod/update/util.go diff --git a/pkg/BUILD.bazel b/pkg/BUILD.bazel index 17e3ea1eaf08..53af31cb50a8 100644 --- a/pkg/BUILD.bazel +++ b/pkg/BUILD.bazel @@ -1182,8 +1182,8 @@ GO_TARGETS = [ "//pkg/cmd/roachprod-microbench:roachprod-microbench_test", "//pkg/cmd/roachprod-stress:roachprod-stress", "//pkg/cmd/roachprod-stress:roachprod-stress_lib", + "//pkg/cmd/roachprod/cli:cli", "//pkg/cmd/roachprod/grafana:grafana", - "//pkg/cmd/roachprod/update:update", "//pkg/cmd/roachprod:roachprod", "//pkg/cmd/roachprod:roachprod_lib", "//pkg/cmd/roachtest/cluster:cluster", diff --git a/pkg/cmd/roachprod/BUILD.bazel b/pkg/cmd/roachprod/BUILD.bazel index 97c0b800c898..e0f7222cb621 100644 --- a/pkg/cmd/roachprod/BUILD.bazel +++ b/pkg/cmd/roachprod/BUILD.bazel @@ -2,40 +2,13 @@ load("@io_bazel_rules_go//go:def.bzl", "go_binary", "go_library") go_library( name = "roachprod_lib", - srcs = [ - "bash_completion.go", - "flags.go", - "main.go", - ], + srcs = ["main.go"], importpath = "github.com/cockroachdb/cockroach/pkg/cmd/roachprod", visibility = ["//visibility:private"], deps = [ "//pkg/build", - "//pkg/cmd/roachprod/grafana", - "//pkg/cmd/roachprod/update", - "//pkg/roachprod", - "//pkg/roachprod/cloud", - "//pkg/roachprod/config", - "//pkg/roachprod/errors", - "//pkg/roachprod/fluentbit", - "//pkg/roachprod/install", - "//pkg/roachprod/opentelemetry", - "//pkg/roachprod/roachprodutil", - "//pkg/roachprod/ssh", - "//pkg/roachprod/ui", - "//pkg/roachprod/vm", - "//pkg/roachprod/vm/gce", - "//pkg/util/envutil", - "//pkg/util/flagutil", - "//pkg/util/timeutil", - "@com_github_cockroachdb_errors//:errors", - "@com_github_fatih_color//:color", + "//pkg/cmd/roachprod/cli", "@com_github_spf13_cobra//:cobra", - "@org_golang_x_crypto//ssh", - "@org_golang_x_exp//maps", - "@org_golang_x_term//:term", - "@org_golang_x_text//language", - "@org_golang_x_text//message", ], ) diff --git a/pkg/cmd/roachprod/cli/BUILD.bazel b/pkg/cmd/roachprod/cli/BUILD.bazel new file mode 100644 index 000000000000..fb5fd85e709a --- /dev/null +++ b/pkg/cmd/roachprod/cli/BUILD.bazel @@ -0,0 +1,45 @@ +load("@io_bazel_rules_go//go:def.bzl", "go_library") + +go_library( + name = "cli", + srcs = [ + "bash_completion.go", + "commands.go", + "flags.go", + "handlers.go", + "resgistry.go", + "update.go", + "util.go", + ], + importpath = "github.com/cockroachdb/cockroach/pkg/cmd/roachprod/cli", + visibility = ["//visibility:public"], + deps = [ + "//pkg/cmd/roachprod/grafana", + "//pkg/roachprod", + "//pkg/roachprod/cloud", + "//pkg/roachprod/config", + "//pkg/roachprod/errors", + "//pkg/roachprod/fluentbit", + "//pkg/roachprod/install", + "//pkg/roachprod/opentelemetry", + "//pkg/roachprod/roachprodutil", + "//pkg/roachprod/ssh", + "//pkg/roachprod/ui", + "//pkg/roachprod/vm", + "//pkg/roachprod/vm/gce", + "//pkg/util/envutil", + "//pkg/util/flagutil", + "//pkg/util/timeutil", + "@com_github_cockroachdb_errors//:errors", + "@com_github_cockroachdb_errors//oserror", + "@com_github_fatih_color//:color", + "@com_github_spf13_cobra//:cobra", + "@com_google_cloud_go_storage//:storage", + "@org_golang_google_api//option", + "@org_golang_x_crypto//ssh", + "@org_golang_x_exp//maps", + "@org_golang_x_term//:term", + "@org_golang_x_text//language", + "@org_golang_x_text//message", + ], +) diff --git a/pkg/cmd/roachprod/bash_completion.go b/pkg/cmd/roachprod/cli/bash_completion.go similarity index 77% rename from pkg/cmd/roachprod/bash_completion.go rename to pkg/cmd/roachprod/cli/bash_completion.go index 44bf050fc491..f99cceb4c619 100644 --- a/pkg/cmd/roachprod/bash_completion.go +++ b/pkg/cmd/roachprod/cli/bash_completion.go @@ -3,26 +3,24 @@ // Use of this software is governed by the CockroachDB Software License // included in the /LICENSE file. -package main +package cli import ( "fmt" "strings" - - "github.com/spf13/cobra" ) // setBashCompletionFunction sets up a custom bash completion function to // autocomplete cluster names in various commands. -func setBashCompletionFunction() { +func (cr *commandRegistry) setBashCompletionFunction() { // Generate a list of commands that DON'T take a cluster argument. var s []string - for _, cmd := range []*cobra.Command{createCmd, listCmd, syncCmd, gcCmd} { - s = append(s, fmt.Sprintf("%s_%s", rootCmd.Name(), cmd.Name())) + for _, cmd := range cr.excludeFromBashCompletion { + s = append(s, fmt.Sprintf("%s_%s", cr.rootCmd.Name(), cmd.Name())) } excluded := strings.Join(s, " | ") - rootCmd.BashCompletionFunction = fmt.Sprintf( + cr.rootCmd.BashCompletionFunction = fmt.Sprintf( `__custom_func() { # only complete the 2nd arg, e.g. adminurl diff --git a/pkg/cmd/roachprod/cli/commands.go b/pkg/cmd/roachprod/cli/commands.go new file mode 100644 index 000000000000..9e7d26ad3b69 --- /dev/null +++ b/pkg/cmd/roachprod/cli/commands.go @@ -0,0 +1,2204 @@ +// Copyright 2024 The Cockroach Authors. +// +// Use of this software is governed by the CockroachDB Software License +// included in the /LICENSE file. + +package cli + +import ( + "context" + "encoding/json" + "fmt" + "net/url" + "os" + "path" + "runtime" + "sort" + "strconv" + "strings" + "text/tabwriter" + "time" + + "github.com/cockroachdb/cockroach/pkg/cmd/roachprod/grafana" + "github.com/cockroachdb/cockroach/pkg/roachprod" + "github.com/cockroachdb/cockroach/pkg/roachprod/cloud" + "github.com/cockroachdb/cockroach/pkg/roachprod/config" + "github.com/cockroachdb/cockroach/pkg/roachprod/install" + "github.com/cockroachdb/cockroach/pkg/roachprod/roachprodutil" + "github.com/cockroachdb/cockroach/pkg/roachprod/ui" + "github.com/cockroachdb/cockroach/pkg/roachprod/vm" + "github.com/cockroachdb/cockroach/pkg/roachprod/vm/gce" + "github.com/cockroachdb/cockroach/pkg/util/timeutil" + "github.com/cockroachdb/errors" + "github.com/fatih/color" + "github.com/spf13/cobra" + "golang.org/x/crypto/ssh" + "golang.org/x/exp/maps" + "golang.org/x/text/language" + "golang.org/x/text/message" +) + +const ( + AuthModeHelp = ` +--auth-mode specifies the method of authentication unless --insecure is passed. +Defaults to root if not passed. Available auth-modes are: + + root: authenticates with the root user and root certificates + + user-password: authenticates with the default roachprod user and password + + user-cert: authenticates with the default roachprod user and certificates` + + tagHelp = ` +The --tag flag can be used to to associate a tag with the process. This tag can +then be used to restrict the processes which are operated on by the status and +stop commands. Tags can have a hierarchical component by utilizing a slash +separated string similar to a filesystem path. A tag matches if a prefix of the +components match. For example, the tag "a/b" will match both "a/b" and +"a/b/c/d". +` + cockroachApp = ` + cockroach - Cockroach nightly builds. Can provide an optional SHA, otherwise + latest build version is used.` + releaseApp = ` + release - Official CockroachDB Release. Must provide a specific release + version.` + customizedApp = ` + customized - Cockroach customized builds, usually generated by running + ./scripts/tag-custom-build.sh. Must provide a specific tag.` + localApp = ` + local - Use a provided local binary, must provide the path to the binary.` + workloadApp = ` + workload - Cockroach workload application.` +) + +var bashCompletion = os.ExpandEnv("$HOME/.roachprod/bash-completion.sh") + +func (cr *commandRegistry) buildCreateCmd() *cobra.Command { + createCmd := &cobra.Command{ + Use: "create ", + Short: "create a cluster", + Long: `Create a local or cloud-based cluster. + +A cluster is composed of a set of nodes, configured during cluster creation via +the --nodes flag. Creating a cluster does not start any processes on the nodes +other than the base system processes (e.g. sshd). See "roachprod start" for +starting cockroach nodes and "roachprod {run,ssh}" for running arbitrary +commands on the nodes of a cluster. + +Cloud Clusters + + Cloud-based clusters are ephemeral and come with a lifetime (specified by the + --lifetime flag) after which they will be automatically + destroyed. Cloud-based clusters require the associated command line tool for + the cloud to be installed and configured (e.g. "gcloud auth login"). + + Clusters names are required to be prefixed by the authenticated user of the + cloud service. The suffix is an arbitrary string used to distinguish + clusters. For example, "marc-test" is a valid cluster name for the user + "marc". The authenticated user for the cloud service is automatically + detected and can be override by the ROACHPROD_USER environment variable or + the --username flag. + + The machine type and the use of local SSD storage can be specified during + cluster creation via the --{cloud}-machine-type and --local-ssd flags. The + machine-type is cloud specified. For example, --gce-machine-type=n1-highcpu-8 + requests the "n1-highcpu-8" machine type for a GCE-based cluster. No attempt + is made (or desired) to abstract machine types across cloud providers. See + the cloud provider's documentation for details on the machine types + available. + + The underlying filesystem can be provided using the --filesystem flag. + Use --filesystem=zfs, for zfs, and --filesystem=ext4, for ext4. The default + file system is ext4. The filesystem flag only works on gce currently. + +Local Clusters + + A local cluster stores the per-node data in ${HOME}/local on the machine + roachprod is being run on. Whether a cluster is local is specified on creation + by using the name 'local' or 'local-'. Local clusters have no expiration. +`, + Args: cobra.ExactArgs(1), + Run: wrap(func(cmd *cobra.Command, args []string) (retErr error) { + createVMOpts.ClusterName = args[0] + opts := cloud.ClusterCreateOpts{Nodes: numNodes, CreateOpts: createVMOpts, ProviderOptsContainer: providerOptsContainer} + return roachprod.Create(context.Background(), config.Logger, username, &opts) + }), + } + cr.addToExcludeFromBashCompletion(createCmd) + cr.addToExcludeFromClusterFlagsMulti(createCmd) + initCreateCmdFlags(createCmd) + initFlagUsernameForCmd(createCmd) + return createCmd +} + +func (cr *commandRegistry) buildGrowCmd() *cobra.Command { + growCmd := &cobra.Command{ + Use: `grow `, + Short: `grow a cluster by adding nodes`, + Long: `grow a cluster by adding the specified number of nodes to it. + +Only Google Cloud and local clusters currently support adding nodes. The Google +Cloud cluster has to be a managed cluster (i.e., a cluster created with the +gce-managed flag). The new nodes will use the instance template that was used to +create the cluster originally (Nodes will be created in the same zone as the +existing nodes, or if the cluster is geographically distributed, the nodes will +be fairly distributed across the zones of the cluster). +`, + Args: cobra.ExactArgs(2), + Run: wrap(func(cmd *cobra.Command, args []string) error { + count, err := strconv.ParseInt(args[1], 10, 8) + if err != nil || count < 1 { + return errors.Wrapf(err, "invalid num-nodes argument") + } + return roachprod.Grow(context.Background(), config.Logger, args[0], isSecure, int(count)) + }), + } + initFlagInsecureForCmd(growCmd) + return growCmd +} + +func (cr *commandRegistry) buildShrinkCmd() *cobra.Command { + return &cobra.Command{ + Use: `shrink `, + Short: `shrink a cluster by removing nodes`, + Long: `shrink a cluster by removing the specified number of nodes. + +Only Google Cloud and local clusters currently support removing nodes. The +Google Cloud cluster has to be a managed cluster (i.e., a cluster created with +the gce-managed flag). Nodes are removed from the tail end of the cluster. +Removing nodes from the middle of the cluster is not supported yet. +`, + Args: cobra.ExactArgs(2), + Run: wrap(func(cmd *cobra.Command, args []string) error { + count, err := strconv.ParseInt(args[1], 10, 8) + if err != nil || count < 1 { + return errors.Wrapf(err, "invalid num-nodes argument") + } + return roachprod.Shrink(context.Background(), config.Logger, args[0], int(count)) + }), + } +} + +func (cr *commandRegistry) buildResetCmd() *cobra.Command { + return &cobra.Command{ + Use: "reset ", + Short: "reset *all* VMs in a cluster", + Long: `Reset a cloud VM.`, + Args: cobra.ExactArgs(1), + Run: wrap(func(cmd *cobra.Command, args []string) (retErr error) { + return roachprod.Reset(config.Logger, args[0]) + }), + } +} + +func (cr *commandRegistry) buildDestroyCmd() *cobra.Command { + destroyCmd := &cobra.Command{ + Use: "destroy [ --all-mine | --all-local | [ ...] ]", + Short: "destroy clusters", + Long: `Destroy one or more local or cloud-based clusters. + +The destroy command accepts the names of the clusters to destroy. Alternatively, +the --all-mine flag can be provided to destroy all (non-local) clusters that are +owned by the current user, or the --all-local flag can be provided to destroy +all local clusters. + +Destroying a cluster releases the resources for a cluster. For a cloud-based +cluster the machine and associated disk resources are freed. For a local +cluster, any processes started by roachprod are stopped, and the node +directories inside ${HOME}/local directory are removed. +`, + Args: cobra.ArbitraryArgs, + Run: wrap(func(cmd *cobra.Command, args []string) error { + return roachprod.Destroy(config.Logger, username, destroyAllMine, destroyAllLocal, args...) + }), + } + initDestroyCmdFlags(destroyCmd) + initFlagUsernameForCmd(destroyCmd) + return destroyCmd +} + +func (cr *commandRegistry) buildExtendCmd() *cobra.Command { + extendCmd := &cobra.Command{ + Use: "extend ", + Short: "extend the lifetime of a cluster", + Long: `Extend the lifetime of the specified cluster to prevent it from being +destroyed: + + roachprod extend marc-test --lifetime=6h +`, + Args: cobra.ExactArgs(1), + Run: wrap(func(cmd *cobra.Command, args []string) error { + return roachprod.Extend(config.Logger, args[0], extendLifetime) + }), + } + extendCmd.Flags().DurationVarP(&extendLifetime, + "lifetime", "l", 12*time.Hour, "Lifetime of the cluster") + return extendCmd +} + +func (cr *commandRegistry) buildLoadBalancerCmd() *cobra.Command { + loadBalancerCmd := &cobra.Command{ + Use: "load-balancer [command]", + Short: "manage and query load balancers", + Long: `create load balancers for specific services, query the IP or postgres URL of a load balancer`, + Args: cobra.ExactArgs(1), + Run: wrap(func(cmd *cobra.Command, args []string) error { + return roachprod.CreateLoadBalancer(context.Background(), config.Logger, + args[0], isSecure, virtualClusterName, sqlInstance, + ) + }), + } + loadBalancerCmd.AddCommand( + buildCreateLoadBalancerCmd(), + buildLoadBalancerPGUrl(), + buildLoadBalancerIP(), + ) + return loadBalancerCmd +} + +func buildCreateLoadBalancerCmd() *cobra.Command { + createLoadBalancerCmd := &cobra.Command{ + Use: "create ", + Short: "create a load balancer for a cluster", + Long: `Create a load balancer for a specific service (port), system by default, for the given cluster. + +The load balancer is created using the cloud provider's load balancer service. +Currently only Google Cloud is supported, and the cluster must have been created +with the --gce-managed flag. On Google Cloud a load balancer consists of various +components that include backend services, health checks and forwarding rules. +These resources will automatically be destroyed when the cluster is destroyed. +`, + + Args: cobra.ExactArgs(1), + Run: wrap(func(cmd *cobra.Command, args []string) error { + return roachprod.CreateLoadBalancer(context.Background(), config.Logger, + args[0], isSecure, virtualClusterName, sqlInstance, + ) + }), + } + initFlagInsecureForCmd(createLoadBalancerCmd) + initFlagsClusterNSQLForCmd(createLoadBalancerCmd) + return createLoadBalancerCmd +} + +func buildLoadBalancerPGUrl() *cobra.Command { + loadBalancerPGUrl := &cobra.Command{ + Use: "pgurl ", + Short: "get the postgres URL of a load balancer", + Long: fmt.Sprintf(`Get the postgres URL of a load balancer. +%[1]s`, strings.TrimSpace(AuthModeHelp)), + Args: cobra.ExactArgs(1), + Run: wrap(func(cmd *cobra.Command, args []string) error { + auth, err := install.ResolveAuthMode(authMode) + if err != nil { + return err + } + url, err := roachprod.LoadBalancerPgURL(context.Background(), config.Logger, args[0], pgurlCertsDir, roachprod.PGURLOptions{ + Database: database, + External: external, + Secure: isSecure, + VirtualClusterName: virtualClusterName, + SQLInstance: sqlInstance, + Auth: auth, + }) + if err != nil { + return err + } + fmt.Println(url) + return nil + }), + } + initFlagPgurlCertsDirForCmd(loadBalancerPGUrl) + initFlagAuthModeNDatabaseForCmd(loadBalancerPGUrl) + initFlagInsecureForCmd(loadBalancerPGUrl) + initFlagsClusterNSQLForCmd(loadBalancerPGUrl) + return loadBalancerPGUrl +} + +func buildLoadBalancerIP() *cobra.Command { + loadBalancerIP := &cobra.Command{ + Use: "ip ", + Short: "get the IP address of a load balancer", + Long: "Get the IP address of a load balancer.", + Args: cobra.ExactArgs(1), + Run: wrap(func(cmd *cobra.Command, args []string) error { + ip, err := roachprod.LoadBalancerIP(context.Background(), config.Logger, args[0], virtualClusterName, sqlInstance) + if err != nil { + return err + } + fmt.Println(ip) + return nil + }), + } + initFlagsClusterNSQLForCmd(loadBalancerIP) + return loadBalancerIP +} + +func (cr *commandRegistry) buildListCmd() *cobra.Command { + listCmd := &cobra.Command{ + Use: "list [--details | --json] [ --mine | --pattern ]", + Short: "list all clusters", + Long: `List all clusters. + +The list command accepts a flag --pattern which is a regular +expression that will be matched against the cluster name pattern. Alternatively, +the --mine flag can be provided to list the clusters that are owned by the current +user. + +The default output shows one line per cluster, including the local cluster if +it exists: + + ~ roachprod list + local: [local] 1 (-) + marc-test: [aws gce] 4 (5h34m35s) + Syncing... + +The second column lists the cloud providers that host VMs for the cluster. + +The third and fourth columns are the number of nodes in the cluster and the +time remaining before the cluster will be automatically destroyed. Note that +local clusters do not have an expiration. + +The --details flag adjusts the output format to include per-node details: + + ~ roachprod list --details + local [local]: (no expiration) + localhost 127.0.0.1 127.0.0.1 + marc-test: [aws gce] 5h33m57s remaining + marc-test-0001 marc-test-0001.us-east1-b.cockroach-ephemeral 10.142.0.18 35.229.60.91 + marc-test-0002 marc-test-0002.us-east1-b.cockroach-ephemeral 10.142.0.17 35.231.0.44 + marc-test-0003 marc-test-0003.us-east1-b.cockroach-ephemeral 10.142.0.19 35.229.111.100 + marc-test-0004 marc-test-0004.us-east1-b.cockroach-ephemeral 10.142.0.20 35.231.102.125 + Syncing... + +The first and second column are the node hostname and fully qualified name +respectively. The third and fourth column are the private and public IP +addresses. + +The --json flag sets the format of the command output to json. + +Listing clusters has the side-effect of syncing ssh keys/configs and the local +hosts file. +`, + Args: cobra.NoArgs, + Run: wrap(func(cmd *cobra.Command, args []string) error { + if listJSON && listDetails { + return errors.New("'json' option cannot be combined with 'details' option") + } + filteredCloud, err := roachprod.List(config.Logger, listMine, listPattern, + vm.ListOptions{ + Username: username, + ComputeEstimatedCost: listCost, + }) + + if err != nil { + return err + } + + // sort by cluster names for stable output. + names := make([]string, len(filteredCloud.Clusters)) + maxClusterName := 0 + i := 0 + for name := range filteredCloud.Clusters { + names[i] = name + if len(name) > maxClusterName { + maxClusterName = len(name) + } + i++ + } + sort.Strings(names) + + p := message.NewPrinter(language.English) + if listJSON { + enc := json.NewEncoder(os.Stdout) + enc.SetIndent("", " ") + if err := enc.Encode(filteredCloud); err != nil { + return err + } + } else { + machineType := func(clusterVMs vm.List) string { + return clusterVMs[0].MachineType + } + cpuArch := func(clusterVMs vm.List) string { + // Display CPU architecture and family. + if clusterVMs[0].CPUArch == "" { + // N.B. Either a local cluster or unsupported cloud provider. + return "" + } + if clusterVMs[0].CPUFamily != "" { + return clusterVMs[0].CPUFamily + } + if clusterVMs[0].CPUArch != vm.ArchAMD64 { + return string(clusterVMs[0].CPUArch) + } + // AMD64 is the default, so don't display it. + return "" + } + // Align columns right and separate with at least two spaces. + tw := tabwriter.NewWriter(os.Stdout, 0, 8, 2, ' ', tabwriter.AlignRight) + // N.B. colors use escape codes which don't play nice with tabwriter [1]. + // We use a hacky workaround below to color the empty string. + // [1] https://github.com/golang/go/issues/12073 + + if !listDetails { + // Print header only if we are not printing cluster details. + fmt.Fprintf(tw, "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t\n", + "Cluster", "Clouds", "Size", "VM", "Arch", + color.HiWhiteString("$/hour"), color.HiWhiteString("$ Spent"), + color.HiWhiteString("Uptime"), color.HiWhiteString("TTL"), + color.HiWhiteString("$/TTL")) + // Print separator. + fmt.Fprintf(tw, "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t\n", + "", "", "", "", + color.HiWhiteString(""), color.HiWhiteString(""), + color.HiWhiteString(""), color.HiWhiteString(""), + color.HiWhiteString("")) + } + totalCostPerHour := 0.0 + for _, name := range names { + c := filteredCloud.Clusters[name] + if listDetails { + if err = c.PrintDetails(config.Logger); err != nil { + return err + } + } else { + // N.B. Tabwriter doesn't support per-column alignment. It looks odd to have the cluster names right-aligned, + // so we make it left-aligned. + fmt.Fprintf(tw, "%s\t%s\t%d\t%s\t%s", name+strings.Repeat(" ", maxClusterName-len(name)), c.Clouds(), + len(c.VMs), machineType(c.VMs), cpuArch(c.VMs)) + if !c.IsLocal() { + colorByCostBucket := func(cost float64) func(string, ...interface{}) string { + switch { + case cost <= 100: + return color.HiGreenString + case cost <= 1000: + return color.HiBlueString + default: + return color.HiRedString + } + } + timeRemaining := c.LifetimeRemaining().Round(time.Second) + formatTTL := func(ttl time.Duration) string { + if c.VMs[0].Preemptible { + return color.HiMagentaString(ttl.String()) + } else { + return color.HiBlueString(ttl.String()) + } + } + cost := c.CostPerHour + totalCostPerHour += cost + alive := timeutil.Since(c.CreatedAt).Round(time.Minute) + costSinceCreation := cost * float64(alive) / float64(time.Hour) + costRemaining := cost * float64(timeRemaining) / float64(time.Hour) + if cost > 0 { + fmt.Fprintf(tw, "\t%s\t%s\t%s\t%s\t%s\t", + color.HiGreenString(p.Sprintf("$%.2f", cost)), + colorByCostBucket(costSinceCreation)(p.Sprintf("$%.2f", costSinceCreation)), + color.HiWhiteString(alive.String()), + formatTTL(timeRemaining), + colorByCostBucket(costRemaining)(p.Sprintf("$%.2f", costRemaining))) + } else { + fmt.Fprintf(tw, "\t%s\t%s\t%s\t%s\t%s\t", + color.HiGreenString(""), + color.HiGreenString(""), + color.HiWhiteString(alive.String()), + formatTTL(timeRemaining), + color.HiGreenString("")) + } + } else { + fmt.Fprintf(tw, "\t(-)") + } + fmt.Fprintf(tw, "\n") + } + } + if err := tw.Flush(); err != nil { + return err + } + + if totalCostPerHour > 0 { + _, _ = p.Printf("\nTotal cost per hour: $%.2f\n", totalCostPerHour) + } + + // Optionally print any dangling instances with errors + if listDetails { + collated := filteredCloud.BadInstanceErrors() + + // Sort by Error() value for stable output + var errors ui.ErrorsByError + for err := range collated { + errors = append(errors, err) + } + sort.Sort(errors) + + for _, e := range errors { + fmt.Printf("%s: %s\n", e, collated[e].Names()) + } + } + } + return nil + }), + } + cr.addToExcludeFromBashCompletion(listCmd) + initListCmdFlags(listCmd) + initFlagUsernameForCmd(listCmd) + initFlagDNSRequiredProvidersForCmd(listCmd) + return listCmd +} + +// TODO(peter): Do we need this command given that the "list" command syncs as +// a side-effect. If you don't care about the list output, just "roachprod list +// &>/dev/null". +func (cr *commandRegistry) buildSyncCmd() *cobra.Command { + syncCmd := &cobra.Command{ + Use: "sync [flags]", + Short: "sync ssh keys/config and hosts files", + Long: ``, + Args: cobra.NoArgs, + Run: wrap(func(cmd *cobra.Command, args []string) error { + _, err := roachprod.Sync(config.Logger, listOpts) + _ = cr.rootCmd.GenBashCompletionFile(bashCompletion) + return err + }), + } + cr.addToExcludeFromBashCompletion(syncCmd) + initSyncCmdFlags(syncCmd) + initFlagDNSRequiredProvidersForCmd(syncCmd) + return syncCmd +} + +func (cr *commandRegistry) buildGCCmd() *cobra.Command { + gcCmd := &cobra.Command{ + Use: "gc", + Short: "GC expired clusters and unused AWS keypairs\n", + Long: `Garbage collect expired clusters and unused SSH keypairs in AWS. + +Destroys expired clusters, sending email if properly configured. Usually run +hourly by a cronjob so it is not necessary to run manually. +`, + Args: cobra.NoArgs, + Run: wrap(func(cmd *cobra.Command, args []string) error { + return roachprod.GC(config.Logger, dryrun) + }), + } + cr.addToExcludeFromBashCompletion(gcCmd) + initGCCmdFlags(gcCmd) + return gcCmd +} + +func (cr *commandRegistry) buildSetupSSHCmd() *cobra.Command { + return &cobra.Command{ + Use: "setup-ssh ", + Short: "set up ssh for a cluster", + Long: `Sets up the keys and host keys for the vms in the cluster. + +It first resets the machine credentials as though the cluster were newly created +using the cloud provider APIs and then proceeds to ensure that the hosts can +SSH into eachother and lastly adds additional public keys to AWS hosts as read +from the GCP project. This operation is performed as the last step of creating +a new cluster but can be useful to re-run if the operation failed previously or +if the user would like to update the keys on the remote hosts. +`, + + Args: cobra.ExactArgs(1), + Run: wrap(func(cmd *cobra.Command, args []string) (retErr error) { + return roachprod.SetupSSH(context.Background(), config.Logger, args[0]) + }), + } +} + +func (cr *commandRegistry) buildStatusCmd() *cobra.Command { + statusCmd := &cobra.Command{ + Use: "status ", + Short: "retrieve the status of nodes in a cluster", + Long: `Retrieve the status of nodes in a cluster. + +The "status" command outputs the binary and PID for the specified nodes: + + ~ roachprod status local + local: status 3/3 + 1: cockroach 29688 + 2: cockroach 29687 + 3: cockroach 29689 +` + tagHelp + ` +`, + Args: cobra.ExactArgs(1), + Run: wrap(func(cmd *cobra.Command, args []string) error { + statuses, err := roachprod.Status(context.Background(), config.Logger, args[0], tag) + if err != nil { + return err + } + for _, status := range statuses { + if status.Err != nil { + config.Logger.Printf(" %2d: %s %s\n", status.NodeID, status.Err.Error()) + } else if !status.Running { + // TODO(irfansharif): Surface the staged version here? + config.Logger.Printf(" %2d: not running\n", status.NodeID) + } else { + config.Logger.Printf(" %2d: %s %s\n", status.NodeID, status.Version, status.Pid) + } + } + return nil + }), + } + addHelpAboutNodes(statusCmd) + initFlagInsecureIgnoreHostKeyForCmd(statusCmd) + initFlagTagForCmd(statusCmd) + return statusCmd +} + +func (cr *commandRegistry) buildMonitorCmd() *cobra.Command { + monitorCmd := &cobra.Command{ + Use: "monitor", + Short: "monitor the status of nodes in a cluster", + Long: `Monitor the status of cockroach nodes in a cluster. + +The "monitor" command runs until terminated. At startup it outputs a line for +each specified node indicating the status of the node (either the PID of the +node if alive, or "dead" otherwise). It then watches for changes in the status +of nodes, outputting a line whenever a change is detected: + + ~ roachprod monitor local + 1: 29688 + 3: 29689 + 2: 29687 + 3: dead + 3: 30718 +`, + Args: cobra.ExactArgs(1), + Run: wrap(func(cmd *cobra.Command, args []string) error { + eventChan, err := roachprod.Monitor(context.Background(), config.Logger, args[0], monitorOpts) + if err != nil { + return err + } + for info := range eventChan { + fmt.Println(info.String()) + } + + return nil + }), + } + initMonitorCmdFlags(monitorCmd) + initFlagInsecureIgnoreHostKeyForCmd(monitorCmd) + return monitorCmd +} + +func (cr *commandRegistry) buildStartCmd() *cobra.Command { + startCmd := &cobra.Command{ + Use: "start ", + Short: "start nodes on a cluster", + Long: `Start nodes on a cluster. + +Nodes are started in secure mode by default and there is a one time +initialization for the cluster to create and distribute the certs. +Note that running some modes in secure mode and others in insecure +mode is not a supported Cockroach configuration. To start nodes in +insecure mode, use the --insecure flag. + +The --binary flag specifies the remote binary to run. It is up to the roachprod +user to ensure this binary exists, usually via "roachprod put". Note that no +cockroach software is installed by default on a newly created cluster. + +The --args and --env flags can be used to pass arbitrary command line flags and +environment variables to the cockroach process. +` + tagHelp + ` +The "start" command takes care of setting up the --join address and specifying +reasonable defaults for other flags. One side-effect of this convenience is +that node 1 is special and if started, is used to auto-initialize the cluster. +The --skip-init flag can be used to avoid auto-initialization (which can then +separately be done using the "init" command). + +If the COCKROACH_DEV_LICENSE environment variable is set the enterprise.license +cluster setting will be set to its value. +`, + Args: cobra.ExactArgs(1), + Run: wrap(func(cmd *cobra.Command, args []string) error { + clusterSettingsOpts := []install.ClusterSettingOption{ + install.TagOption(tag), + install.PGUrlCertsDirOption(pgurlCertsDir), + install.SecureOption(isSecure), + install.UseTreeDistOption(useTreeDist), + install.EnvOption(nodeEnv), + install.NumRacksOption(numRacks), + } + return roachprod.Start(context.Background(), config.Logger, args[0], startOpts, clusterSettingsOpts...) + }), + } + addHelpAboutNodes(startCmd) + initStartCmdFlags(startCmd) + initFlagsStartOpsForCmd(startCmd) + initFlagInsecureIgnoreHostKeyForCmd(startCmd) + initFlagTagForCmd(startCmd) + initFlagSCPForCmd(startCmd) + initFlagBinaryForCmd(startCmd) + initFlagInsecureForCmd(startCmd) + initFlagDNSRequiredProvidersForCmd(startCmd) + return startCmd +} + +func (cr *commandRegistry) buildUpdateTargetsCmd() *cobra.Command { + updateTargetsCmd := &cobra.Command{ + Use: "update-targets ", + Short: "update prometheus target configurations for a cluster", + Long: `Update prometheus target configurations of each node of a cluster. + +The "start" command updates the prometheus target configuration every time. But, in case of any +failure, this command can be used to update the configurations. + +The default prometheus url is https://grafana.testeng.crdb.io/. This can be overwritten by using the +environment variable COCKROACH_PROM_HOST_URL + +Note that if the cluster is started in insecure mode, set the insecure mode here as well by using the --insecure flag. +`, + Args: cobra.ExactArgs(1), + Run: wrap(func(cmd *cobra.Command, args []string) error { + clusterSettingsOpts := []install.ClusterSettingOption{ + install.SecureOption(isSecure), + } + return roachprod.UpdateTargets(context.Background(), config.Logger, args[0], clusterSettingsOpts...) + }), + } + initFlagInsecureForCmd(updateTargetsCmd) + return updateTargetsCmd +} + +func (cr *commandRegistry) buildStopCmd() *cobra.Command { + stopCmd := &cobra.Command{ + Use: "stop [--sig] [--wait]", + Short: "stop nodes on a cluster", + Long: `Stop nodes on a cluster. + +Stop roachprod created processes running on the nodes in a cluster, including +processes started by the "start", "run" and "ssh" commands. Every process +started by roachprod is tagged with a ROACHPROD environment variable which is +used by "stop" to locate the processes and terminate them. By default processes +are killed with signal 9 (SIGKILL) giving them no chance for a graceful exit. + +The --sig flag will pass a signal to kill to allow us finer control over how we +shutdown cockroach. The --wait flag causes stop to loop waiting for all +processes with the right ROACHPROD environment variable to exit. Note that stop +will wait forever if you specify --wait with a non-terminating signal (e.g. +SIGHUP), unless you also configure --max-wait. + +--wait defaults to true for signal 9 (SIGKILL) and false for all other signals. +` + tagHelp + ` +`, + Args: cobra.ExactArgs(1), + Run: wrap(func(cmd *cobra.Command, args []string) error { + wait := waitFlag + if sig == 9 /* SIGKILL */ && !cmd.Flags().Changed("wait") { + wait = true + } + stopOpts := roachprod.StopOpts{Wait: wait, GracePeriod: gracePeriod, ProcessTag: tag, Sig: sig} + return roachprod.Stop(context.Background(), config.Logger, args[0], stopOpts) + }), + } + addHelpAboutNodes(stopCmd) + initFlagsStopProcessForCmd(stopCmd, &sig, &waitFlag, &gracePeriod) + initFlagInsecureIgnoreHostKeyForCmd(stopCmd) + initFlagTagForCmd(stopCmd) + return stopCmd +} + +func (cr *commandRegistry) buildStartInstanceCmd() *cobra.Command { + startInstanceCmd := &cobra.Command{ + Use: "start-sql --storage-cluster [--external-nodes ]", + Short: "start the SQL/HTTP service for a virtual cluster as a separate process", + Long: `Start SQL/HTTP instances for a virtual cluster as separate processes. + +The --storage-cluster flag must be used to specify a storage cluster +(with optional node selector) which is already running. The command +will create the virtual cluster on the storage cluster if it does not +exist already. If creating multiple virtual clusters on the same +node, the --sql-instance flag must be passed to differentiate them. + +The instance is started in shared process (in memory) mode by +default. To start an external process instance, pass the +--external-cluster flag indicating where the SQL server processes +should be started. + +Nodes are started in secure mode by default and there is a one time +initialization for the cluster to create and distribute the certs. +Note that running some modes in secure mode and others in insecure +mode is not a supported Cockroach configuration. To start nodes in +insecure mode, use the --insecure flag. + +The --binary flag specifies the remote binary to run, if starting +external services. It is up to the roachprod user to ensure this +binary exists, usually via "roachprod put". Note that no cockroach +software is installed by default on a newly created cluster. + +The --args and --env flags can be used to pass arbitrary command line flags and +environment variables to the cockroach process. +` + tagHelp + ` +`, + Args: cobra.ExactArgs(1), + Run: wrap(func(cmd *cobra.Command, args []string) error { + clusterSettingsOpts := []install.ClusterSettingOption{ + install.TagOption(tag), + install.PGUrlCertsDirOption(pgurlCertsDir), + install.SecureOption(isSecure), + install.UseTreeDistOption(useTreeDist), + install.EnvOption(nodeEnv), + install.NumRacksOption(numRacks), + } + + startOpts.Target = install.StartSharedProcessForVirtualCluster + // If the user passed an `--external-nodes` option, we are + // starting a separate process virtual cluster. + if startOpts.VirtualClusterLocation != "" { + startOpts.Target = install.StartServiceForVirtualCluster + } + + startOpts.VirtualClusterName = args[0] + return roachprod.StartServiceForVirtualCluster( + context.Background(), config.Logger, storageCluster, startOpts, clusterSettingsOpts..., + ) + }), + } + initStartInstanceCmdFlags(startInstanceCmd) + initFlagsStartOpsForCmd(startInstanceCmd) + initFlagTagForCmd(startInstanceCmd) + initFlagBinaryForCmd(startInstanceCmd) + initFlagInsecureForCmd(startInstanceCmd) + return startInstanceCmd +} + +func (cr *commandRegistry) buildStopInstanceCmd() *cobra.Command { + stopInstanceCmd := &cobra.Command{ + Use: "stop-sql --cluster --sql-instance [--sig] [--wait]", + Short: "stop sql instances on a cluster", + Long: `Stop sql instances on a cluster. + +Stop roachprod created virtual clusters (shared or separate process). By default, +separate processes are killed with signal 9 (SIGKILL) giving them no chance for a +graceful exit. + +The --sig flag will pass a signal to kill to allow us finer control over how we +shutdown processes. The --wait flag causes stop to loop waiting for all +processes to exit. Note that stop will wait forever if you specify --wait with a +non-terminating signal (e.g. SIGHUP), unless you also configure --max-wait. + +--wait defaults to true for signal 9 (SIGKILL) and false for all other signals. +`, + Args: cobra.ExactArgs(1), + Run: wrap(func(cmd *cobra.Command, args []string) error { + wait := waitFlag + if sig == 9 /* SIGKILL */ && !cmd.Flags().Changed("wait") { + wait = true + } + stopOpts := roachprod.StopOpts{ + Wait: wait, + GracePeriod: gracePeriod, + Sig: sig, + VirtualClusterName: virtualClusterName, + SQLInstance: sqlInstance, + } + clusterName := args[0] + return roachprod.StopServiceForVirtualCluster(context.Background(), config.Logger, clusterName, isSecure, stopOpts) + }), + } + initFlagsStopProcessForCmd(stopInstanceCmd, &sig, &waitFlag, &gracePeriod) + initFlagInsecureForCmd(stopInstanceCmd) + initFlagsClusterNSQLForCmd(stopInstanceCmd) + return stopInstanceCmd +} + +func (cr *commandRegistry) buildDeployCmd() *cobra.Command { + deployCmd := &cobra.Command{ + Use: "deploy |", + Short: "deploy a new version of cockroach", + Long: fmt.Sprintf(`Performs a rolling upgrade of cockroach. + +The deploy command currently only supports redeploying the storage cluster +(system tenant). It should be run on a cluster that is already running +cockroach. The command will download the specified version of cockroach and +stage it on the cluster. It will then perform a rolling upgrade of the cluster, +one node at a time, to the new version. + +Currently available application options are: + %s`, strings.TrimSpace(cockroachApp+releaseApp+customizedApp+localApp)), + Args: cobra.RangeArgs(2, 3), + Run: wrap(func(cmd *cobra.Command, args []string) error { + versionArg := "" + pathToBinary := "" + if args[1] == "local" { + if len(args) < 3 { + return errors.New("local application requires a path to the binary: deploy local ") + } + pathToBinary = args[2] + } else if len(args) == 3 { + versionArg = args[2] + } + return roachprod.Deploy(context.Background(), config.Logger, args[0], args[1], + versionArg, pathToBinary, pause, deploySig, deployWaitFlag, deployGracePeriod, secure) + }), + } + deployCmd.Flags().DurationVar(&pause, "pause", pause, "duration to pause between node restarts") + initFlagsStopProcessForCmd(deployCmd, &deploySig, &deployWaitFlag, &deployGracePeriod) + return deployCmd +} + +func (cr *commandRegistry) buildInitCmd() *cobra.Command { + initCmd := &cobra.Command{ + Use: "init ", + Short: "initialize the cluster", + Long: `Initialize the cluster. + +The "init" command bootstraps the cluster (using "cockroach init"). It also sets +default cluster settings. It's intended to be used in conjunction with +'roachprod start --skip-init'. +`, + Args: cobra.ExactArgs(1), + Run: wrap(func(cmd *cobra.Command, args []string) error { + return roachprod.Init(context.Background(), config.Logger, args[0], startOpts) + }), + } + initCmd.Flags().IntVar(&startOpts.InitTarget, + "init-target", startOpts.InitTarget, "node on which to run initialization") + return initCmd +} + +func (cr *commandRegistry) buildRunCmd() *cobra.Command { + runCmd := &cobra.Command{ + Use: "run [args]", + Aliases: []string{"ssh"}, + Short: "run a command on the nodes in a cluster", + Long: `Run a command on the nodes in a cluster. +`, + Args: cobra.MinimumNArgs(1), + Run: wrap(func(_ *cobra.Command, args []string) error { + return roachprod.Run(context.Background(), config.Logger, args[0], extraSSHOptions, tag, + isSecure, os.Stdout, os.Stderr, args[1:], install.RunOptions{FailOption: install.FailSlow}) + }), + } + addHelpAboutNodes(runCmd) + runCmd.Flags().StringVarP(&extraSSHOptions, + "ssh-options", "O", "", "extra args to pass to ssh") + initFlagInsecureIgnoreHostKeyForCmd(runCmd) + initFlagTagForCmd(runCmd) + initFlagInsecureForCmd(runCmd) + return runCmd +} + +func (cr *commandRegistry) buildSignalCmd() *cobra.Command { + signalCmd := &cobra.Command{ + Use: "signal ", + Short: "send signal to cluster", + Long: "Send a POSIX signal, specified by its integer code, to every process started via roachprod in a cluster.", + Args: cobra.ExactArgs(2), + Run: wrap(func(cmd *cobra.Command, args []string) error { + sig, err := strconv.ParseInt(args[1], 10, 8) + if err != nil { + return errors.Wrapf(err, "invalid signal argument") + } + return roachprod.Signal(context.Background(), config.Logger, args[0], int(sig)) + }), + } + addHelpAboutNodes(signalCmd) + return signalCmd +} + +func (cr *commandRegistry) buildWipeCmd() *cobra.Command { + wipeCmd := &cobra.Command{ + Use: "wipe ", + Short: "wipe a cluster", + Long: `Wipe the nodes in a cluster. + +The "wipe" command first stops any processes running on the nodes in a cluster +(via the "stop" command) and then deletes the data directories used by the +nodes. +`, + Args: cobra.ExactArgs(1), + Run: wrap(func(cmd *cobra.Command, args []string) error { + return roachprod.Wipe(context.Background(), config.Logger, args[0], wipePreserveCerts) + }), + } + addHelpAboutNodes(wipeCmd) + wipeCmd.Flags().BoolVar(&wipePreserveCerts, "preserve-certs", false, "do not wipe certificates") + initFlagInsecureIgnoreHostKeyForCmd(wipeCmd) + return wipeCmd +} + +func (cr *commandRegistry) buildDestroyDNSCmd() *cobra.Command { + return &cobra.Command{ + Use: `destroy-dns `, + Short: `cleans up DNS entries for the cluster`, + Args: cobra.ExactArgs(1), + Run: wrap(func(cmd *cobra.Command, args []string) error { + return roachprod.DestroyDNS(context.Background(), config.Logger, args[0]) + }), + } +} + +func (cr *commandRegistry) buildReformatCmd() *cobra.Command { + reformatCmd := &cobra.Command{ + Use: "reformat ", + Short: "reformat disks in a cluster\n", + Long: ` +Reformat disks in a cluster to use the specified filesystem. + +WARNING: Reformatting will delete all existing data in the cluster. + +Filesystem options: + ext4 + zfs + +When running with ZFS, you can create a snapshot of the filesystem's current +state using the 'zfs snapshot' command: + + $ roachprod run 'sudo zfs snapshot data1@pristine' + +You can then nearly instantaneously restore the filesystem to this state with +the 'zfs rollback' command: + + $ roachprod run 'sudo zfs rollback data1@pristine' + +`, + + Args: cobra.ExactArgs(2), + Run: wrap(func(cmd *cobra.Command, args []string) error { + return roachprod.Reformat(context.Background(), config.Logger, args[0], args[1]) + }), + } + initFlagInsecureIgnoreHostKeyForCmd(reformatCmd) + return reformatCmd +} + +func (cr *commandRegistry) buildInstallCmd() *cobra.Command { + installCmd := &cobra.Command{ + Use: "install ", + Short: "install 3rd party software", + Long: `Install third party software. Currently available installation options are: + + ` + strings.Join(install.SortedCmds(), "\n ") + ` +`, + Args: cobra.MinimumNArgs(2), + Run: wrap(func(cmd *cobra.Command, args []string) error { + return roachprod.Install(context.Background(), config.Logger, args[0], args[1:]) + }), + } + addHelpAboutNodes(installCmd) + initFlagInsecureIgnoreHostKeyForCmd(installCmd) + return installCmd +} + +func (cr *commandRegistry) buildDistributeCertsCmd() *cobra.Command { + return &cobra.Command{ + Use: "distribute-certs ", + Short: "distribute certificates to the nodes in a cluster", + Long: `Distribute certificates to the nodes in a cluster. +If the certificates already exist, no action is taken. Note that this command is +invoked automatically when a secure cluster is bootstrapped by "roachprod +start." +`, + Args: cobra.ExactArgs(1), + Run: wrap(func(cmd *cobra.Command, args []string) error { + return roachprod.DistributeCerts(context.Background(), config.Logger, args[0]) + }), + } +} + +func (cr *commandRegistry) buildSshKeysCmd() *cobra.Command { + sshKeysCmd := &cobra.Command{ + Use: "ssh-keys", + Short: "manage SSH public keys added to clusters created by roachprod", + } + sshKeysCmd.AddCommand( + buildSSHKeysListCmd(), + buildSSHKeysAddCmd(), + buildSSHKeysRemoveCmd(), + ) + return sshKeysCmd +} + +func buildSSHKeysListCmd() *cobra.Command { + return &cobra.Command{ + Use: "list", + Short: "list every SSH public key installed on clusters managed by roachprod", + Run: wrap(func(cmd *cobra.Command, args []string) error { + authorizedKeys, err := gce.GetUserAuthorizedKeys() + if err != nil { + return err + } + + return printPublicKeyTable(authorizedKeys, true /* includeSize */) + }), + } +} + +func buildSSHKeysAddCmd() *cobra.Command { + sshKeysAddCmd := &cobra.Command{ + Use: "add [--user user]", + Short: "add a new SSH public key to the set of keys installed on clusters managed by roachprod", + Args: cobra.ExactArgs(1), + Run: wrap(func(cmd *cobra.Command, args []string) error { + sshKeyPath := args[0] + pkBytes, err := os.ReadFile(sshKeyPath) + if err != nil { + return fmt.Errorf("error reading public key file: %w", err) + } + + pubkey, comment, _, _, err := ssh.ParseAuthorizedKey(pkBytes) + if err != nil { + return fmt.Errorf("error parsing public key: %w", err) + } + + ak := gce.AuthorizedKey{ + User: sshKeyUser, + Key: pubkey, + Comment: comment, + } + + fmt.Printf("Adding new public key for user %s...\n", ak.User) + return gce.AddUserAuthorizedKey(ak) + }), + } + sshKeysAddCmd.Flags().StringVar(&sshKeyUser, "user", config.OSUser.Username, + "the user to be associated with the new key", + ) + return sshKeysAddCmd +} + +func buildSSHKeysRemoveCmd() *cobra.Command { + return &cobra.Command{ + Use: "remove ", + Short: "remove public keys belonging to a user from the set of keys installed on clusters managed by roachprod", + Args: cobra.ExactArgs(1), + Run: wrap(func(cmd *cobra.Command, args []string) error { + user := args[0] + + existingKeys, err := gce.GetUserAuthorizedKeys() + if err != nil { + return fmt.Errorf("failed to fetch existing keys: %w", err) + } + + var toBeDeleted gce.AuthorizedKeys + var newKeys gce.AuthorizedKeys + for _, existing := range existingKeys { + if existing.User == user { + toBeDeleted = append(toBeDeleted, existing) + } else { + newKeys = append(newKeys, existing) + } + } + + if len(toBeDeleted) == 0 { + fmt.Printf("No keys deleted.\n") + return nil + } + + fmt.Printf("The following keys are going to be deleted:\n") + if err := printPublicKeyTable(toBeDeleted, false /* includeSize */); err != nil { + return err + } + + if PromptYesNo("Are you sure?", false /* defaultYes */) { + fmt.Printf("Deleting %d keys belonging to %s...\n", len(toBeDeleted), user) + return gce.SetUserAuthorizedKeys(newKeys) + } else { + fmt.Printf("Aborted.\n") + return nil + } + }), + } +} + +func (cr *commandRegistry) buildPutCmd() *cobra.Command { + putCmd := &cobra.Command{ + Use: "put []", + Short: "copy a local file to the nodes in a cluster", + Long: `Copy a local file to the nodes in a cluster. +`, + Args: cobra.RangeArgs(2, 3), + Run: wrap(func(cmd *cobra.Command, args []string) error { + src := args[1] + dest := path.Base(src) + if len(args) == 3 { + dest = args[2] + } + return roachprod.Put(context.Background(), config.Logger, args[0], src, dest, useTreeDist) + }), + } + addHelpAboutNodes(putCmd) + putCmd.Flags().BoolVar(&useTreeDist, "treedist", useTreeDist, "use treedist copy algorithm") + initFlagInsecureIgnoreHostKeyForCmd(putCmd) + initFlagSCPForCmd(putCmd) + return putCmd +} + +func (cr *commandRegistry) buildGetCmd() *cobra.Command { + getCmd := &cobra.Command{ + Use: "get []", + Short: "copy a remote file from the nodes in a cluster", + Long: `Copy a remote file from the nodes in a cluster. If the file is retrieved from +multiple nodes the destination file name will be prefixed with the node number. +`, + Args: cobra.RangeArgs(2, 3), + Run: wrap(func(cmd *cobra.Command, args []string) error { + src := args[1] + dest := path.Base(src) + if len(args) == 3 { + dest = args[2] + } + return roachprod.Get(context.Background(), config.Logger, args[0], src, dest) + }), + } + addHelpAboutNodes(getCmd) + initFlagInsecureIgnoreHostKeyForCmd(getCmd) + initFlagSCPForCmd(getCmd) + return getCmd +} + +func (cr *commandRegistry) buildStageCmd() *cobra.Command { + stageCmd := &cobra.Command{ + Use: "stage []", + Short: "stage cockroach binaries", + Long: fmt.Sprintf(`Stages release and edge binaries to the cluster. + +Currently available application options are: + %s + +Some examples of usage: + -- stage edge build of cockroach build at a specific SHA: + roachprod stage my-cluster cockroach e90e6903fee7dd0f88e20e345c2ddfe1af1e5a97 + + -- Stage the most recent edge build of the workload tool: + roachprod stage my-cluster workload + + -- Stage the official release binary of CockroachDB at version 2.0.5 + roachprod stage my-cluster release v2.0.5 + + -- Stage customized binary of CockroachDB at version v23.2.0-alpha.2-4375-g7cd2b76ed00 + roachprod stage my-cluster customized v23.2.0-alpha.2-4375-g7cd2b76ed00 +`, strings.TrimSpace(cockroachApp+workloadApp+releaseApp+customizedApp)), + Args: cobra.RangeArgs(2, 3), + Run: wrap(func(cmd *cobra.Command, args []string) error { + versionArg := "" + if len(args) == 3 { + versionArg = args[2] + } + return roachprod.Stage(context.Background(), config.Logger, args[0], stageOS, stageArch, stageDir, args[1], versionArg) + }), + } + initStageCmdFlags(stageCmd) + return stageCmd +} + +func (cr *commandRegistry) buildDownloadCmd() *cobra.Command { + return &cobra.Command{ + Use: "download [DESTINATION]", + Short: "download 3rd party tools", + Long: "Downloads 3rd party tools, using a GCS cache if possible.", + Args: cobra.RangeArgs(3, 4), + Run: wrap(func(cmd *cobra.Command, args []string) error { + src, sha := args[1], args[2] + var dest string + if len(args) == 4 { + dest = args[3] + } + return roachprod.Download(context.Background(), config.Logger, args[0], src, sha, dest) + }), + } +} + +func (cr *commandRegistry) buildStageURLCmd() *cobra.Command { + stageURLCmd := &cobra.Command{ + Use: "stageurl []", + Short: "print URL to cockroach binaries", + Long: `Prints URL for release and edge binaries. + +Currently available application options are: + cockroach - Cockroach nightly builds. Can provide an optional SHA, otherwise + latest build version is used. + workload - Cockroach workload application. + release - Official CockroachDB Release. Must provide a specific release + version. + customized - Cockroach customized builds, usually generated by running + ./scripts/tag-custom-build.sh. Must provide a specific tag. +`, + Args: cobra.RangeArgs(1, 2), + Run: wrap(func(cmd *cobra.Command, args []string) error { + versionArg := "" + if len(args) == 2 { + versionArg = args[1] + } + urls, err := roachprod.StageURL(config.Logger, args[0], versionArg, stageOS, stageArch) + if err != nil { + return err + } + for _, u := range urls { + fmt.Println(u) + } + return nil + }), + } + initStageURLCmdFlags(stageURLCmd) + return stageURLCmd +} + +func (cr *commandRegistry) buildSQLCmd() *cobra.Command { + sqlCmd := &cobra.Command{ + Use: "sql -- [args]", + Short: "run `cockroach sql` on a remote cluster", + Long: "Run `cockroach sql` on a remote cluster.\n", + Args: cobra.MinimumNArgs(1), + Run: wrap(func(cmd *cobra.Command, args []string) error { + auth, ok := install.PGAuthModes[authMode] + if !ok { + return errors.Newf("unsupported auth-mode %s, valid auth-modes: %v", authMode, maps.Keys(install.PGAuthModes)) + } + + return roachprod.SQL(context.Background(), config.Logger, args[0], isSecure, virtualClusterName, sqlInstance, auth, database, args[1:]) + }), + } + addHelpAboutNodes(sqlCmd) + initFlagAuthModeNDatabaseForCmd(sqlCmd) + initFlagInsecureIgnoreHostKeyForCmd(sqlCmd) + initFlagBinaryForCmd(sqlCmd) + initFlagInsecureForCmd(sqlCmd) + initFlagsClusterNSQLForCmd(sqlCmd) + return sqlCmd +} + +func (cr *commandRegistry) buildIPCmd() *cobra.Command { + ipCmd := &cobra.Command{ + Use: "ip ", + Short: "get the IP addresses of the nodes in a cluster", + Long: `Get the IP addresses of the nodes in a cluster. +`, + Args: cobra.ExactArgs(1), + Run: wrap(func(cmd *cobra.Command, args []string) error { + ips, err := roachprod.IP(config.Logger, args[0], external) + if err != nil { + return err + } + for _, ip := range ips { + fmt.Println(ip) + } + return nil + }), + } + ipCmd.Flags().BoolVar(&external, + "external", false, "return external IP addresses") + initFlagInsecureIgnoreHostKeyForCmd(ipCmd) + return ipCmd +} + +func (cr *commandRegistry) buildPGUrlCmd() *cobra.Command { + pgurlCmd := &cobra.Command{ + Use: "pgurl --auth-mode ", + Short: "generate pgurls for the nodes in a cluster", + Long: fmt.Sprintf(`Generate pgurls for the nodes in a cluster. + +%[1]s +`, strings.TrimSpace(AuthModeHelp)), + Args: cobra.ExactArgs(1), + Run: wrap(func(cmd *cobra.Command, args []string) error { + auth, err := install.ResolveAuthMode(authMode) + if err != nil { + return err + } + urls, err := roachprod.PgURL(context.Background(), config.Logger, args[0], pgurlCertsDir, roachprod.PGURLOptions{ + Database: database, + External: external, + Secure: isSecure, + VirtualClusterName: virtualClusterName, + SQLInstance: sqlInstance, + Auth: auth, + }) + if err != nil { + return err + } + fmt.Println(strings.Join(urls, " ")) + return nil + }), + } + addHelpAboutNodes(pgurlCmd) + initFlagPgurlCertsDirForCmd(pgurlCmd) + initFlagAuthModeNDatabaseForCmd(pgurlCmd) + pgurlCmd.Flags().BoolVar(&external, + "external", false, "return pgurls for external connections") + initFlagInsecureIgnoreHostKeyForCmd(pgurlCmd) + initFlagInsecureForCmd(pgurlCmd) + initFlagsClusterNSQLForCmd(pgurlCmd) + return pgurlCmd +} + +func (cr *commandRegistry) buildAdminurlCmd() *cobra.Command { + adminurlCmd := &cobra.Command{ + Use: "adminurl ", + Aliases: []string{"admin", "adminui"}, + Short: "generate admin UI URLs for the nodes in a cluster\n", + Long: `Generate admin UI URLs for the nodes in a cluster. +`, + Args: cobra.ExactArgs(1), + Run: wrap(func(cmd *cobra.Command, args []string) error { + urls, err := roachprod.AdminURL( + context.Background(), config.Logger, args[0], virtualClusterName, sqlInstance, adminurlPath, adminurlIPs, urlOpen, isSecure, + ) + if err != nil { + return err + } + for _, url := range urls { + fmt.Println(url) + } + return nil + }), + } + addHelpAboutNodes(adminurlCmd) + initAdminurlCmdFlags(adminurlCmd) + initFlagOpenForCmd(adminurlCmd) + initFlagInsecureIgnoreHostKeyForCmd(adminurlCmd) + initFlagInsecureForCmd(adminurlCmd) + initFlagsClusterNSQLForCmd(adminurlCmd) + return adminurlCmd +} + +func (cr *commandRegistry) buildLogsCmd() *cobra.Command { + logsCmd := &cobra.Command{ + Use: "logs", + Short: "retrieve and merge logs in a cluster", + Long: `Retrieve and merge logs in a cluster. + +The "logs" command runs until terminated. It works similarly to get but is +specifically focused on retrieving logs periodically and then merging them +into a single stream. +`, + Args: cobra.RangeArgs(1, 2), + Run: wrap(func(cmd *cobra.Command, args []string) error { + logsOpts := roachprod.LogsOpts{ + Dir: logsDir, Filter: logsFilter, ProgramFilter: logsProgramFilter, + Interval: logsInterval, From: logsFrom, To: logsTo, Out: cmd.OutOrStdout(), + } + var dest string + if len(args) == 2 { + dest = args[1] + } else { + dest = args[0] + ".logs" + } + return roachprod.Logs(config.Logger, args[0], dest, logsOpts) + }), + } + initLogsCmdFlags(logsCmd) + return logsCmd +} + +func (cr *commandRegistry) buildPprofCmd() *cobra.Command { + pprofCmd := &cobra.Command{ + Use: "pprof ", + Args: cobra.ExactArgs(1), + Aliases: []string{"pprof-heap"}, + Short: "capture a pprof profile from the specified nodes", + Long: `Capture a pprof profile from the specified nodes. + +Examples: + + # Capture CPU profile for all nodes in the cluster + roachprod pprof CLUSTERNAME + # Capture CPU profile for the first node in the cluster for 60 seconds + roachprod pprof CLUSTERNAME:1 --duration 60s + # Capture a Heap profile for the first node in the cluster + roachprod pprof CLUSTERNAME:1 --heap + # Same as above + roachprod pprof-heap CLUSTERNAME:1 +`, + Run: wrap(func(cmd *cobra.Command, args []string) error { + if cmd.CalledAs() == "pprof-heap" { + pprofOpts.Heap = true + } + return roachprod.Pprof(context.Background(), config.Logger, args[0], pprofOpts) + }), + } + initPprofCmdFlags(pprofCmd) + return pprofCmd +} + +func (cr *commandRegistry) buildCachedHostsCmd() *cobra.Command { + cachedHostsCmd := &cobra.Command{ + Use: "cached-hosts", + Short: "list all clusters (and optionally their host numbers) from local cache", + Args: cobra.NoArgs, + Run: wrap(func(cmd *cobra.Command, args []string) error { + roachprod.CachedClusters(func(clusterName string, numVMs int) { + if strings.HasPrefix(clusterName, "teamcity") { + return + } + fmt.Printf("%s", clusterName) + // When invoked by bash-completion, cachedHostsCluster is what the user + // has currently typed -- if this cluster matches that, expand its hosts. + if strings.HasPrefix(cachedHostsCluster, clusterName) { + for i := 1; i <= numVMs; i++ { + fmt.Printf(" %s:%d", clusterName, i) + } + } + fmt.Printf("\n") + }) + return nil + }), + } + cachedHostsCmd.Flags().StringVar(&cachedHostsCluster, + "cluster", "", "print hosts matching cluster") + return cachedHostsCmd +} + +func (cr *commandRegistry) buildVersionCmd() *cobra.Command { + return &cobra.Command{ + Use: `version`, + Short: `print version information`, + RunE: func(cmd *cobra.Command, args []string) error { + fmt.Println(roachprod.Version(config.Logger)) + return nil + }, + } +} + +func (cr *commandRegistry) buildGetProvidersCmd() *cobra.Command { + return &cobra.Command{ + Use: `get-providers`, + Short: `print providers state (active/inactive)`, + RunE: func(cmd *cobra.Command, args []string) error { + providers := roachprod.InitProviders() + for provider, state := range providers { + fmt.Printf("%s: %s\n", provider, state) + } + return nil + }, + } +} + +func (cr *commandRegistry) buildGrafanaStartCmd() *cobra.Command { + grafanaStartCmd := &cobra.Command{ + Use: `grafana-start `, + Short: `spins up a prometheus and grafana instance on the last node in the cluster; NOTE: for arm64 clusters, use --arch arm64`, + Args: cobra.ExactArgs(1), + Run: wrap(func(cmd *cobra.Command, args []string) error { + var grafanaDashboardJSONs []string + var grafanaConfigURL string + if grafanaConfig != "" { + url, err := url.Parse(grafanaConfig) + if err != nil { + return err + } + switch url.Scheme { + case "http", "https": + grafanaConfigURL = grafanaConfig + case "file", "": + if data, err := grafana.GetDashboardJSONFromFile(url.Path); err != nil { + return err + } else { + grafanaDashboardJSONs = []string{data} + } + default: + return errors.Newf("unsupported scheme %s", url.Scheme) + } + } else { + var err error + if grafanaDashboardJSONs, err = grafana.GetDefaultDashboardJSONs(); err != nil { + return err + } + } + arch := vm.ArchAMD64 + if grafanaArch == "arm64" { + arch = vm.ArchARM64 + } + return roachprod.StartGrafana(context.Background(), config.Logger, args[0], arch, + grafanaConfigURL, grafanaDashboardJSONs, nil) + }), + } + initGrafanaStartCmdFlags(grafanaStartCmd) + return grafanaStartCmd +} + +func (cr *commandRegistry) buildGrafanaDumpCmd() *cobra.Command { + grafanaDumpCmd := &cobra.Command{ + Use: `grafana-dump `, + Short: `dump prometheus data to the specified directory`, + Args: cobra.ExactArgs(1), + Run: wrap(func(cmd *cobra.Command, args []string) error { + if grafanaDumpDir == "" { + return errors.New("--dump-dir unspecified") + } + return roachprod.PrometheusSnapshot(context.Background(), config.Logger, args[0], grafanaDumpDir) + }), + } + grafanaDumpCmd.Flags().StringVar(&grafanaDumpDir, "dump-dir", "", + "the absolute path to dump prometheus data to (use the contained 'prometheus-docker-run.sh' to visualize") + return grafanaDumpCmd +} + +func (cr *commandRegistry) buildGrafanaStopCmd() *cobra.Command { + return &cobra.Command{ + Use: `grafana-stop `, + Short: `spins down prometheus and grafana instances on the last node in the cluster`, + Args: cobra.ExactArgs(1), + Run: wrap(func(cmd *cobra.Command, args []string) error { + return roachprod.StopGrafana(context.Background(), config.Logger, args[0], "") + }), + } +} + +func (cr *commandRegistry) buildGrafanaURLCmd() *cobra.Command { + grafanaURLCmd := &cobra.Command{ + Use: `grafanaurl `, + Short: `returns a url to the grafana dashboard`, + Args: cobra.ExactArgs(1), + Run: wrap(func(cmd *cobra.Command, args []string) error { + url, err := roachprod.GrafanaURL(context.Background(), config.Logger, args[0], + urlOpen) + if err != nil { + return err + } + fmt.Println(url) + return nil + }), + } + initFlagOpenForCmd(grafanaURLCmd) + return grafanaURLCmd +} + +func (cr *commandRegistry) buildGrafanaAnnotationCmd() *cobra.Command { + grafanaAnnotationCmd := &cobra.Command{ + Use: `grafana-annotation --tags [, ...] --dashboard-uid --time-range [, ]`, + Short: `adds an annotation to the specified grafana instance`, + Long: fmt.Sprintf(`Adds an annotation to the specified grafana instance + +By default, we assume the grafana instance needs an authentication token to connect +to. A service account json and audience will be read in from the environment +variables %s and %s to attempt authentication through google IDP. Use the --insecure +option when a token is not necessary. + +--tags specifies the tags the annotation should have. + +--dashboard-uid specifies the dashboard you want the annotation to be created in. If +left empty, creates the annotation in the organization instead. + +--time-range can be used to specify in epoch millisecond time the annotation's timestamp. +If left empty, creates the annotation at the current time. If only start-time is specified, +creates an annotation at start-time. If both start-time and end-time are specified, +creates an annotation over time range. + +Example: +# Create an annotation over time range 1-100 on the centralized grafana instance, which needs authentication. +roachprod grafana-annotation grafana.testeng.crdb.io example-annotation-event --tags my-cluster --tags test-run-1 --dashboard-uid overview --time-range 1,100 +`, roachprodutil.ServiceAccountJson, roachprodutil.ServiceAccountAudience), + Args: cobra.ExactArgs(2), + Run: wrap(func(cmd *cobra.Command, args []string) error { + req := grafana.AddAnnotationRequest{ + Text: args[1], + Tags: grafanaTags, + DashboardUID: grafanaDashboardUID, + } + + switch len(grafanaTimeRange) { + case 0: + // Grafana API will default to adding annotation at current time. + case 1: + // Okay to only specify the start time. + req.StartTime = grafanaTimeRange[0] + case 2: + req.StartTime = grafanaTimeRange[0] + req.EndTime = grafanaTimeRange[1] + default: + return errors.Newf("Too many arguments for --time-range, expected 1 or 2, got: %d", len(grafanaTimeRange)) + } + + return roachprod.AddGrafanaAnnotation(context.Background(), args[0] /* host */, isSecure, req) + }), + } + initGrafanaAnnotationCmdFlags(grafanaAnnotationCmd) + initFlagInsecureForCmd(grafanaAnnotationCmd) + return grafanaAnnotationCmd +} + +func (cr *commandRegistry) buildRootStorageCmd() *cobra.Command { + rootStorageCmd := &cobra.Command{ + Use: `storage`, + Short: "storage enables administering storage related commands and configurations", + Args: cobra.MinimumNArgs(1), + } + rootStorageCollectionCmd := &cobra.Command{ + Use: `collection`, + Short: "the collection command allows for enable or disabling the storage workload " + + "collector for a provided cluster (including a subset of nodes). The storage workload " + + "collection is defined in pebble replay/workload_capture.go.", + Args: cobra.MinimumNArgs(1), + } + rootStorageCmd.AddCommand(rootStorageCollectionCmd) + rootStorageCollectionCmd.AddCommand( + buildCollectionStartCmd(), + buildCollectionStopCmd(), + buildStorageSnapshotCmd(), + buildCollectionListVolumes(), + ) + return rootStorageCmd +} + +func buildCollectionStartCmd() *cobra.Command { + collectionStartCmd := &cobra.Command{ + Use: `start `, + Short: "start the workload collector for a provided cluster (including a subset of nodes)", + Args: cobra.ExactArgs(1), + Run: wrap(func(cmd *cobra.Command, args []string) error { + cluster := args[0] + return roachprod.StorageCollectionPerformAction( + context.Background(), + config.Logger, + cluster, + "start", + volumeCreateOpts, + ) + }), + } + initCollectionStartCmdFlags(collectionStartCmd) + return collectionStartCmd +} + +func buildCollectionStopCmd() *cobra.Command { + return &cobra.Command{ + Use: `stop `, + Short: "stop the workload collector for a provided cluster (including a subset of nodes)", + Args: cobra.ExactArgs(1), + Run: wrap(func(cmd *cobra.Command, args []string) error { + cluster := args[0] + return roachprod.StorageCollectionPerformAction( + context.Background(), + config.Logger, + cluster, + "stop", + volumeCreateOpts, + ) + }), + } +} + +func buildCollectionListVolumes() *cobra.Command { + return &cobra.Command{ + Use: `list-volumes `, + Short: "list the nodes and their attached collector volumes", + Args: cobra.ExactArgs(1), + Run: wrap(func(cmd *cobra.Command, args []string) error { + cluster := args[0] + return roachprod.StorageCollectionPerformAction( + context.Background(), + config.Logger, + cluster, + "list-volumes", + volumeCreateOpts, + ) + }), + } +} + +func buildStorageSnapshotCmd() *cobra.Command { + return &cobra.Command{ + Use: `snapshot `, + Short: "snapshot a clusters workload collector volume", + Args: cobra.ExactArgs(3), + Run: wrap(func(cmd *cobra.Command, args []string) error { + cluster := args[0] + name := args[1] + desc := args[2] + _, err := roachprod.CreateSnapshot(context.Background(), config.Logger, cluster, vm.VolumeSnapshotCreateOpts{ + Name: name, + Description: desc, + }) + return err + }), + } +} + +func (cr *commandRegistry) buildSnapshotCmd() *cobra.Command { + snapshotCmd := &cobra.Command{ + Use: `snapshot`, + Short: "snapshot enables creating/listing/deleting/applying cluster snapshots", + Args: cobra.MinimumNArgs(1), + } + snapshotCmd.AddCommand( + buildSnapshotCreateCmd(), + buildSnapshotListCmd(), + buildSnapshotDeleteCmd(), + buildSnapshotApplyCmd(), + ) + return snapshotCmd +} + +func buildSnapshotCreateCmd() *cobra.Command { + return &cobra.Command{ + Use: `create `, + Short: "snapshot a named cluster, using the given snapshot name and description", + Args: cobra.ExactArgs(3), + Run: wrap(func(cmd *cobra.Command, args []string) error { + cluster := args[0] + name := args[1] + desc := args[2] + snapshots, err := roachprod.CreateSnapshot(context.Background(), config.Logger, cluster, vm.VolumeSnapshotCreateOpts{ + Name: name, + Description: desc, + }) + if err != nil { + return err + } + for _, snapshot := range snapshots { + config.Logger.Printf("created snapshot %s (id: %s)", snapshot.Name, snapshot.ID) + } + return nil + }), + } +} + +func buildSnapshotListCmd() *cobra.Command { + return &cobra.Command{ + Use: `list []`, + Short: "list all snapshots for the given cloud provider, optionally filtering by the given name", + Args: cobra.RangeArgs(1, 2), + Run: wrap(func(cmd *cobra.Command, args []string) error { + provider := args[0] + var name string + if len(args) == 2 { + name = args[1] + } + snapshots, err := roachprod.ListSnapshots(context.Background(), config.Logger, provider, + vm.VolumeSnapshotListOpts{ + NamePrefix: name, + }, + ) + if err != nil { + return err + } + for _, snapshot := range snapshots { + config.Logger.Printf("found snapshot %s (id: %s)", snapshot.Name, snapshot.ID) + } + return nil + }), + } +} + +func buildSnapshotDeleteCmd() *cobra.Command { + snapshotDeleteCmd := &cobra.Command{ + Use: `delete `, + Short: "delete all snapshots for the given cloud provider optionally filtering by the given name", + Args: cobra.ExactArgs(2), + Run: wrap(func(cmd *cobra.Command, args []string) error { + ctx := context.Background() + provider, name := args[0], args[1] + snapshots, err := roachprod.ListSnapshots(ctx, config.Logger, provider, + vm.VolumeSnapshotListOpts{ + NamePrefix: name, + }, + ) + if err != nil { + return err + } + + for _, snapshot := range snapshots { + config.Logger.Printf("deleting snapshot %s (id: %s)", snapshot.Name, snapshot.ID) + } + if !dryrun { + if err := roachprod.DeleteSnapshots(ctx, config.Logger, provider, snapshots...); err != nil { + return err + } + } + config.Logger.Printf("done") + return nil + }), + } + snapshotDeleteCmd.Flags().BoolVar(&dryrun, + "dry-run", false, "dry run (don't perform any actions)") + return snapshotDeleteCmd +} + +func buildSnapshotApplyCmd() *cobra.Command { + return &cobra.Command{ + Use: `apply `, + Short: "apply the named snapshots from the given cloud provider to the named cluster", + Args: cobra.ExactArgs(3), + Run: wrap(func(cmd *cobra.Command, args []string) error { + ctx := context.Background() + provider, name, cluster := args[0], args[1], args[2] + snapshots, err := roachprod.ListSnapshots(ctx, config.Logger, provider, + vm.VolumeSnapshotListOpts{ + NamePrefix: name, + }, + ) + if err != nil { + return err + } + + return roachprod.ApplySnapshots(ctx, config.Logger, cluster, snapshots, vm.VolumeCreateOpts{ + Size: 500, // TODO(irfansharif): Make this configurable? + Labels: map[string]string{ + vm.TagUsage: "roachprod", + }, + }) + }), + } +} + +func (cr *commandRegistry) buildUpdateCmd() *cobra.Command { + updateCmd := &cobra.Command{ + Use: "update", + Short: "check gs://cockroach-nightly for a new roachprod binary; update if available", + Long: "Attempts to download the latest roachprod binary (on master) from gs://cockroach-nightly. " + + " Swaps the current binary with it. The current roachprod binary will be backed up" + + " and can be restored via `roachprod update --revert`.", + Run: wrap(func(cmd *cobra.Command, args []string) error { + // We only have prebuilt binaries for Linux. See #120750. + if runtime.GOOS != "linux" { + return errors.New("this command is only available on Linux at this time") + } + + currentBinary, err := os.Executable() + if err != nil { + return err + } + + if roachprodUpdateRevert { + if PromptYesNo("Revert to previous version? Note: this will replace the"+ + " current roachprod binary with a previous roachprod.bak binary.", true /* defaultYes */) { + if err := SwapBinary(currentBinary, currentBinary+".bak"); err != nil { + return err + } + fmt.Println("roachprod successfully reverted, run `roachprod -v` to confirm.") + } + return nil + } + + newBinary := currentBinary + ".new" + if err := + DownloadLatestRoachprod(newBinary, roachprodUpdateBranch, roachprodUpdateOS, roachprodUpdateArch); err != nil { + return err + } + + if PromptYesNo("Continue with update? This will overwrite any existing roachprod.bak binary.", true /* defaultYes */) { + if err := SwapBinary(currentBinary, newBinary); err != nil { + return errors.WithDetail(err, "unable to update binary") + } + + fmt.Println("Update successful: run `roachprod -v` to confirm.") + } + return nil + }), + } + initUpdateCmdFlags(updateCmd) + return updateCmd +} + +func (cr *commandRegistry) buildJaegerStartCmd() *cobra.Command { + jaegerStartCmd := &cobra.Command{ + Use: `jaeger-start `, + Short: `starts a jaeger container on the last node in the cluster`, + Args: cobra.ExactArgs(1), + Run: wrap(func(cmd *cobra.Command, args []string) error { + return roachprod.StartJaeger(context.Background(), config.Logger, args[0], + virtualClusterName, isSecure, jaegerConfigNodes) + }), + } + jaegerStartCmd.Flags().StringVar(&jaegerConfigNodes, "configure-nodes", "", + "the nodes on which to set the relevant CRDB cluster settings") + initFlagInsecureForCmd(jaegerStartCmd) + initFlagsClusterNSQLForCmd(jaegerStartCmd) + return jaegerStartCmd +} + +func (cr *commandRegistry) buildJaegerStopCmd() *cobra.Command { + return &cobra.Command{ + Use: `jaeger-stop `, + Short: `stops a running jaeger container on the last node in the cluster`, + Args: cobra.ExactArgs(1), + Run: wrap(func(cmd *cobra.Command, args []string) error { + return roachprod.StopJaeger(context.Background(), config.Logger, args[0]) + }), + } +} + +func (cr *commandRegistry) buildJaegerURLCmd() *cobra.Command { + jaegerURLCmd := &cobra.Command{ + Use: `jaegerurl `, + Short: `returns the URL of the cluster's jaeger UI`, + Args: cobra.ExactArgs(1), + Run: wrap(func(cmd *cobra.Command, args []string) error { + url, err := roachprod.JaegerURL(context.Background(), config.Logger, args[0], + urlOpen) + if err != nil { + return err + } + fmt.Println(url) + return nil + }), + } + initFlagOpenForCmd(jaegerURLCmd) + return jaegerURLCmd +} + +func (cr *commandRegistry) buildSideEyeRootCmd() *cobra.Command { + sideEyeRootCmd := &cobra.Command{ + Use: "side-eye", + Short: "interact with side-eye.io functionality", + Long: `Interact with side-eye.io functionality + +Side-Eye (app.side-eye.io) is a distributed debugger that can be used to capture +snapshots of a CockroachDB cluster. +`, + Args: cobra.MinimumNArgs(1), + } + sideEyeRootCmd.AddCommand(buildSideEyeInstallCmd()) + sideEyeRootCmd.AddCommand(buildSideEyeSnapCmd()) + return sideEyeRootCmd +} + +func buildSideEyeInstallCmd() *cobra.Command { + return &cobra.Command{ + Use: "install ", + Short: "install and start the Side-Eye agents on all nodes in the cluster", + Long: `Install and start the Side-Eye agents on all nodes in the cluster + +` + "`roachprod side-eye snapshot `" + ` can then be used to capture cluster snapshots. +`, + Args: cobra.ExactArgs(1), + Run: wrap(func(cmd *cobra.Command, args []string) error { + cluster := args[0] + + ctx := context.Background() + l := config.Logger + sideEyeToken, ok := roachprod.GetSideEyeTokenFromEnv() + if !ok { + return errors.New("Side-Eye token is not configured via SIDE_EYE_API_TOKEN or gcloud secret") + } + + return roachprod.StartSideEyeAgents(ctx, l, cluster, cluster /* envName */, sideEyeToken) + }), + } +} + +func buildSideEyeSnapCmd() *cobra.Command { + return &cobra.Command{ + Use: "snapshot ", + Aliases: []string{"snap"}, + Short: "capture a cluster snapshot", + Long: `Capture a cluster snapshot using Side-Eye + +The command will print an app.side-eye.io URL where the snapshot can be viewed. +`, + Args: cobra.ExactArgs(1), + Run: wrap(func(cmd *cobra.Command, args []string) error { + cluster := args[0] + ctx := context.Background() + l := config.Logger + l.PrintfCtx(ctx, "capturing snapshot of the cluster with Side-Eye...") + snapURL, ok := roachprod.CaptureSideEyeSnapshot(context.Background(), config.Logger, cluster, nil /* client */) + if ok { + l.PrintfCtx(ctx, "captured Side-Eye snapshot: %s", snapURL) + } + return nil + }), + } +} + +func (cr *commandRegistry) buildFluentBitStartCmd() *cobra.Command { + fluentBitStartCmd := &cobra.Command{ + Use: "fluent-bit-start ", + Short: "Install and start Fluent Bit", + Long: "Install and start Fluent Bit", + Args: cobra.ExactArgs(1), + Run: wrap(func(cmd *cobra.Command, args []string) error { + return roachprod.StartFluentBit(context.Background(), config.Logger, args[0], fluentBitConfig) + }), + } + initFluentBitStartCmdFlags(fluentBitStartCmd) + return fluentBitStartCmd +} + +func (cr *commandRegistry) buildFluentBitStopCmd() *cobra.Command { + return &cobra.Command{ + Use: "fluent-bit-stop ", + Short: "Stop Fluent Bit", + Long: "Stop Fluent Bit", + Args: cobra.ExactArgs(1), + Run: wrap(func(cmd *cobra.Command, args []string) error { + return roachprod.StopFluentBit(context.Background(), config.Logger, args[0]) + }), + } +} + +func (cr *commandRegistry) buildOpentelemetryStartCmd() *cobra.Command { + opentelemetryStartCmd := &cobra.Command{ + Use: "opentelemetry-start ", + Short: "Install and start the OpenTelemetry Collector", + Long: "Install and start the OpenTelemetry Collector", + Args: cobra.ExactArgs(1), + Run: wrap(func(cmd *cobra.Command, args []string) error { + return roachprod.StartOpenTelemetry(context.Background(), config.Logger, args[0], opentelemetryConfig) + }), + } + initOpentelemetryStartCmdFlags(opentelemetryStartCmd) + return opentelemetryStartCmd +} + +func (cr *commandRegistry) buildOpentelemetryStopCmd() *cobra.Command { + return &cobra.Command{ + Use: "opentelemetry-stop ", + Short: "Stop the OpenTelemetry Collector", + Long: "Stop the OpenTelemetry Collector", + Args: cobra.ExactArgs(1), + Run: wrap(func(cmd *cobra.Command, args []string) error { + return roachprod.StopOpenTelemetry(context.Background(), config.Logger, args[0]) + }), + } +} + +func (cr *commandRegistry) buildFetchLogsCmd() *cobra.Command { + fetchLogsCmd := &cobra.Command{ + Use: "fetchlogs [flags]", + Aliases: []string{"getlogs"}, + Short: "download the logs from the cluster", + Long: `Download the logs from the cluster using "roachprod get". + +The logs will be placed in the directory if specified or in the directory named as _logs. +`, + Args: cobra.RangeArgs(1, 2), + Run: wrap(func(cmd *cobra.Command, args []string) error { + cluster := args[0] + ctx := context.Background() + var dest string + if len(args) == 2 { + dest = args[1] + } else { + // trim the node number and keep only the cluster name as prefix of the directory + dest = fmt.Sprintf("%s_logs", strings.Split(args[0], ":")[0]) + fmt.Printf("Placing logs at %s\n", dest) + } + if err := os.Mkdir(dest, 0755); err != nil { + return err + } + return roachprod.FetchLogs(ctx, config.Logger, cluster, dest, + fetchLogsTimeout) + }), + } + fetchLogsCmd.Flags().DurationVarP(&fetchLogsTimeout, + "timeout", "t", 5*time.Minute, "Timeout for fetching the logs from the cluster nodes") + return fetchLogsCmd +} + +func (cr *commandRegistry) buildGetLatestPProfCmd() *cobra.Command { + return &cobra.Command{ + Use: "get-latest-pprof [time-before]", + Short: "downloads the latest pprof file which is created on or before the provided time-before.", + Long: `Downloads the latest pprof file which is created on or before the provided time-before. +The time should be of the format 2022-08-31T15:23:22Z for UTC or 2022-08-31T15:23:22+05:30 for time zone. +If the time is not provided, it downloads the latest pprof file across all clusters. +`, + Args: cobra.MinimumNArgs(1), + // Wraps the command execution with additional error handling + Run: wrap(func(cmd *cobra.Command, args []string) (retErr error) { + cluster := args[0] + pprofTimeBefore := "" + if len(args) == 2 { + // time-before is optional + pprofTimeBefore = args[1] + } + ctx := context.Background() + return roachprod.DownloadLatestPProfFile(ctx, config.Logger, cluster, pprofTimeBefore) + }), + } +} diff --git a/pkg/cmd/roachprod/flags.go b/pkg/cmd/roachprod/cli/flags.go similarity index 68% rename from pkg/cmd/roachprod/flags.go rename to pkg/cmd/roachprod/cli/flags.go index 94fd671ab67a..0fd3883f39c1 100644 --- a/pkg/cmd/roachprod/flags.go +++ b/pkg/cmd/roachprod/cli/flags.go @@ -3,7 +3,7 @@ // Use of this software is governed by the CockroachDB Software License // included in the /LICENSE file. -package main +package cli import ( "fmt" @@ -107,9 +107,9 @@ var ( fetchLogsTimeout time.Duration ) -func initFlags() { +func initRootCmdFlags(rootCmd *cobra.Command) { rootCmd.PersistentFlags().BoolVarP(&config.Quiet, "quiet", "q", - false || !term.IsTerminal(int(os.Stdout.Fd())), "disable fancy progress output") + !term.IsTerminal(int(os.Stdout.Fd())), "disable fancy progress output") rootCmd.PersistentFlags().IntVarP(&config.MaxConcurrency, "max-concurrency", "", 32, "maximum number of operations to execute on nodes concurrently, set to zero for infinite", ) @@ -121,7 +121,9 @@ func initFlags() { "use-shared-user", true, fmt.Sprintf("use the shared user %q for ssh rather than your user %q", config.SharedUser, config.OSUser.Username)) +} +func initCreateCmdFlags(createCmd *cobra.Command) { createCmd.Flags().DurationVarP(&createVMOpts.Lifetime, "lifetime", "l", 12*time.Hour, "Lifetime of the cluster") createCmd.Flags().BoolVar(&createVMOpts.SSDOpts.UseLocalSSD, @@ -157,32 +159,43 @@ func initFlags() { for _, providerName := range vm.AllProviderNames() { if vm.Providers[providerName].Active() { providerOptsContainer[providerName].ConfigureCreateFlags(createCmd.Flags()) + // createCmd only accepts a single GCE project, as opposed to all the other + // commands. + providerOptsContainer[providerName].ConfigureClusterFlags(createCmd.Flags(), vm.SingleProject) + } + } +} +func initClusterFlagsForMultiProjects( + rootCmd *cobra.Command, excludeFromClusterFlagsMulti []*cobra.Command, +) { + for _, providerName := range vm.AllProviderNames() { + if vm.Providers[providerName].Active() { for _, cmd := range rootCmd.Commands() { - if cmd == createCmd { - // createCmd is handled below + excludeCmd := false + for _, c := range excludeFromClusterFlagsMulti { + if cmd == c { + excludeCmd = true + break + } + } + if excludeCmd { continue } providerOptsContainer[providerName].ConfigureClusterFlags(cmd.Flags(), vm.AcceptMultipleProjects) } - - // createCmd only accepts a single GCE project, as opposed to all the other - // commands. - providerOptsContainer[providerName].ConfigureClusterFlags(createCmd.Flags(), vm.SingleProject) - - // set up cluster cleanup flag for gcCmd - providerOptsContainer[providerName].ConfigureClusterCleanupFlags(gcCmd.Flags()) } } +} +func initDestroyCmdFlags(destroyCmd *cobra.Command) { destroyCmd.Flags().BoolVarP(&destroyAllMine, "all-mine", "m", false, "Destroy all non-local clusters belonging to the current user") destroyCmd.Flags().BoolVarP(&destroyAllLocal, "all-local", "l", false, "Destroy all local clusters") +} - extendCmd.Flags().DurationVarP(&extendLifetime, - "lifetime", "l", 12*time.Hour, "Lifetime of the cluster") - +func initListCmdFlags(listCmd *cobra.Command) { listCmd.Flags().BoolVarP(&listCost, "cost", "c", os.Getenv("ROACHPROD_COST_ESTIMATES") == "true", "Show cost estimates", @@ -195,29 +208,16 @@ func initFlags() { "mine", "m", false, "Show only clusters belonging to the current user") listCmd.Flags().StringVar(&listPattern, "pattern", "", "Show only clusters matching the regex pattern. Empty string matches everything.") +} +func initAdminurlCmdFlags(adminurlCmd *cobra.Command) { adminurlCmd.Flags().StringVar(&adminurlPath, "path", "/", "Path to add to URL (e.g. to open a same page on each node)") adminurlCmd.Flags().BoolVar(&adminurlIPs, "ips", false, `Use Public IPs instead of DNS names in URL`) +} - gcCmd.Flags().BoolVarP(&dryrun, - "dry-run", "n", dryrun, "dry run (don't perform any actions)") - gcCmd.Flags().StringVar(&config.SlackToken, "slack-token", "", "Slack bot token") - - pgurlCmd.Flags().BoolVar(&external, - "external", false, "return pgurls for external connections") - for _, cmd := range []*cobra.Command{pgurlCmd, loadBalancerPGUrl} { - cmd.Flags().StringVar(&pgurlCertsDir, - "certs-dir", install.CockroachNodeCertsDir, "cert dir to use for secure connections") - } - - for _, cmd := range []*cobra.Command{pgurlCmd, sqlCmd, loadBalancerPGUrl} { - cmd.Flags().StringVar(&authMode, - "auth-mode", install.DefaultAuthMode().String(), fmt.Sprintf("form of authentication to use, valid auth-modes: %v", maps.Keys(install.PGAuthModes))) - cmd.Flags().StringVar(&database, "database", "", "database to use") - } - +func initPprofCmdFlags(pprofCmd *cobra.Command) { pprofCmd.Flags().DurationVar(&pprofOpts.Duration, "duration", 30*time.Second, "Duration of profile to capture") pprofCmd.Flags().BoolVar(&pprofOpts.Heap, @@ -227,12 +227,9 @@ func initFlags() { pprofCmd.Flags().IntVar(&pprofOpts.StartingPort, "starting-port", 9000, "Initial port to use when opening pprof's HTTP interface") - ipCmd.Flags().BoolVar(&external, - "external", false, "return external IP addresses") - - runCmd.Flags().StringVarP(&extraSSHOptions, - "ssh-options", "O", "", "extra args to pass to ssh") +} +func initStartCmdFlags(startCmd *cobra.Command) { startCmd.Flags().IntVarP(&numRacks, "racks", "r", 0, "the number of racks to partition the nodes into") startCmd.Flags().StringArrayVarP(&startOpts.ExtraArgs, @@ -251,53 +248,33 @@ func initFlags() { "store-count", startOpts.StoreCount, "number of stores to start each node with") startCmd.Flags().IntVar(&startOpts.AdminUIPort, "admin-ui-port", startOpts.AdminUIPort, "port to serve the admin UI on") +} +func initStartInstanceCmdFlags(startInstanceCmd *cobra.Command) { startInstanceCmd.Flags().StringVarP(&storageCluster, "storage-cluster", "S", "", "storage cluster") _ = startInstanceCmd.MarkFlagRequired("storage-cluster") startInstanceCmd.Flags().IntVar(&startOpts.SQLInstance, "sql-instance", 0, "specific SQL/HTTP instance to connect to (this is a roachprod abstraction for separate-process deployments distinct from the internal instance ID)") startInstanceCmd.Flags().StringVar(&startOpts.VirtualClusterLocation, "external-nodes", startOpts.VirtualClusterLocation, "if set, starts service in external mode, as a separate process in the given nodes") +} - // Flags for processes that stop (kill) processes. - for _, stopProcessesCmd := range []*cobra.Command{stopCmd, stopInstanceCmd, deployCmd} { - // Cobra does not support reusing flags across multiple commands, especially - // if the defaults differ, so we need to supply different flags for the case - // where the defaults are different. - // See: https://github.com/spf13/cobra/issues/1398 - sigPtr := &sig - waitPtr := &waitFlag - gracePeriodPtr := &gracePeriod - // deployCmd is a special case, because it is used to stop processes in a - // rolling restart, and we want to drain the nodes by default. - if stopProcessesCmd == deployCmd { - sigPtr = &deploySig - waitPtr = &deployWaitFlag - gracePeriodPtr = &deployGracePeriod - } - stopProcessesCmd.Flags().IntVar(sigPtr, "sig", *sigPtr, "signal to pass to kill") - stopProcessesCmd.Flags().BoolVar(waitPtr, "wait", *waitPtr, "wait for processes to exit") - stopProcessesCmd.Flags().IntVar(gracePeriodPtr, "grace-period", *gracePeriodPtr, "approx number of seconds to wait for processes to exit, before a forceful shutdown (SIGKILL) is performed") - } - deployCmd.Flags().DurationVar(&pause, "pause", pause, "duration to pause between node restarts") - +func initSyncCmdFlags(syncCmd *cobra.Command) { syncCmd.Flags().BoolVar(&listOpts.IncludeVolumes, "include-volumes", false, "Include volumes when syncing") syncCmd.Flags().StringArrayVarP(&listOpts.IncludeProviders, "clouds", "c", make([]string, 0), "Specify the cloud providers when syncing. Important: Use this flag only if you are certain that you want to sync with a specific cloud. All DNS host entries for other clouds will be erased from the DNS zone.") - wipeCmd.Flags().BoolVar(&wipePreserveCerts, "preserve-certs", false, "do not wipe certificates") - - putCmd.Flags().BoolVar(&useTreeDist, "treedist", useTreeDist, "use treedist copy algorithm") +} +func initStageCmdFlags(stageCmd *cobra.Command) { stageCmd.Flags().StringVar(&stageOS, "os", "", "operating system override for staged binaries") stageCmd.Flags().StringVar(&stageArch, "arch", "", "architecture override for staged binaries [amd64, arm64, fips]; N.B. fips implies amd64 with openssl") stageCmd.Flags().StringVar(&stageDir, "dir", "", "destination for staged binaries") - // N.B. stageURLCmd just prints the URL that stageCmd would use. - stageURLCmd.Flags().StringVar(&stageOS, "os", "", "operating system override for staged binaries") - stageURLCmd.Flags().StringVar(&stageArch, "arch", "", - "architecture override for staged binaries [amd64, arm64, fips]; N.B. fips implies amd64 with openssl") +} + +func initLogsCmdFlags(logsCmd *cobra.Command) { logsCmd.Flags().StringVar(&logsFilter, "filter", "", "re to filter log messages") logsCmd.Flags().Var(flagutil.Time(&logsFrom), @@ -311,28 +288,79 @@ func initFlags() { logsCmd.Flags().StringVar(&logsProgramFilter, "logs-program", "^cockroach$", "regular expression of the name of program in log files to search") +} + +func initStageURLCmdFlags(stageURLCmd *cobra.Command) { + // N.B. stageURLCmd just prints the URL that stageCmd would use. + stageURLCmd.Flags().StringVar(&stageOS, "os", "", "operating system override for staged binaries") + stageURLCmd.Flags().StringVar(&stageArch, "arch", "", + "architecture override for staged binaries [amd64, arm64, fips]; N.B. fips implies amd64 with openssl") +} + +func initMonitorCmdFlags(monitorCmd *cobra.Command) { monitorCmd.Flags().BoolVar(&monitorOpts.IgnoreEmptyNodes, "ignore-empty-nodes", false, "Automatically detect the (subset of the given) nodes which to monitor "+ "based on the presence of a nontrivial data directory.") - monitorCmd.Flags().BoolVar(&monitorOpts.OneShot, "oneshot", false, "Report the status of all targeted nodes once, then exit. The exit "+ "status is nonzero if (and only if) any node was found not running.") +} + +func initUpdateCmdFlags(updateCmd *cobra.Command) { + updateCmd.Flags().BoolVar(&roachprodUpdateRevert, "revert", false, "restore roachprod to the previous version "+ + "which would have been renamed to roachprod.bak during the update process") + updateCmd.Flags().StringVarP(&roachprodUpdateBranch, "branch", "b", "master", "git branch") + updateCmd.Flags().StringVarP(&roachprodUpdateOS, "os", "o", "linux", "OS") + updateCmd.Flags().StringVarP(&roachprodUpdateArch, "arch", "a", "amd64", "CPU architecture") +} + +func initGrafanaAnnotationCmdFlags(grafanaAnnotationCmd *cobra.Command) { + grafanaAnnotationCmd.Flags().StringArrayVar(&grafanaTags, + "tags", []string{}, "grafana annotation tags") + grafanaAnnotationCmd.Flags().StringVar(&grafanaDashboardUID, + "dashboard-uid", "", "grafana dashboard UID") + grafanaAnnotationCmd.Flags().Int64SliceVar(&grafanaTimeRange, + "time-range", []int64{}, "grafana annotation time range in epoch time") + +} + +func initCollectionStartCmdFlags(collectionStartCmd *cobra.Command) { + collectionStartCmd.Flags().IntVarP(&volumeCreateOpts.Size, + "volume-size", "s", 10, + "the size of the volume in Gigabytes (GB) to create for each store. Note: This volume will be deleted "+ + "once the VM is deleted.") + + collectionStartCmd.Flags().BoolVar(&volumeCreateOpts.Encrypted, + "volume-encrypted", false, + "determines if the volume will be encrypted. Note: This volume will be deleted once the VM is deleted.") + + collectionStartCmd.Flags().StringVar(&volumeCreateOpts.Architecture, + "volume-arch", "", + "the architecture the volume should target. This flag is only relevant for gcp or azure. It is ignored "+ + "if supplied for other providers. Note: This volume will be deleted once the VM is deleted.") - cachedHostsCmd.Flags().StringVar(&cachedHostsCluster, - "cluster", "", "print hosts matching cluster") + collectionStartCmd.Flags().IntVarP(&volumeCreateOpts.IOPS, + "volume-iops", "i", 0, + "the iops to provision for the volume. Note: This volume will be deleted once the VM is deleted.") + collectionStartCmd.Flags().StringVarP(&volumeCreateOpts.Type, + "volume-type", "t", "", + "the volume type that should be created. Provide a volume type that is connected to"+ + " the provider chosen for the cluster. If no volume type is provided the provider default will be used. "+ + "Note: This volume will be deleted once the VM is deleted.") +} + +func initGrafanaStartCmdFlags(grafanaStartCmd *cobra.Command) { grafanaStartCmd.Flags().StringVar(&grafanaConfig, "grafana-config", "", "URI to grafana json config, supports local and http(s) schemes") grafanaStartCmd.Flags().StringVar(&grafanaArch, "arch", "", "binary architecture override [amd64, arm64]") +} - grafanaDumpCmd.Flags().StringVar(&grafanaDumpDir, "dump-dir", "", - "the absolute path to dump prometheus data to (use the contained 'prometheus-docker-run.sh' to visualize") - +func initFluentBitStartCmdFlags(fluentBitStartCmd *cobra.Command) { fluentBitStartCmd.Flags().StringVar(&fluentBitConfig.DatadogSite, "datadog-site", "us5.datadoghq.com", "Datadog site to send telemetry data to (e.g., us5.datadoghq.com)") @@ -345,6 +373,9 @@ func initFlags() { fluentBitStartCmd.Flags().StringSliceVar(&fluentBitConfig.DatadogTags, "datadog-tags", []string{}, "Datadog tags as a comma-separated list in the format KEY1:VAL1,KEY2:VAL2") +} + +func initOpentelemetryStartCmdFlags(opentelemetryStartCmd *cobra.Command) { opentelemetryStartCmd.Flags().StringVar(&opentelemetryConfig.DatadogSite, "datadog-site", "us5.datadoghq.com", "Datadog site to send telemetry data to (e.g., us5.datadoghq.com)") @@ -353,141 +384,109 @@ func initFlags() { opentelemetryStartCmd.Flags().StringSliceVar(&opentelemetryConfig.DatadogTags, "datadog-tags", []string{}, "Datadog tags as a comma-separated list in the format KEY1:VAL1,KEY2:VAL2") +} - sshKeysAddCmd.Flags().StringVar(&sshKeyUser, "user", config.OSUser.Username, - "the user to be associated with the new key", - ) - - jaegerStartCmd.Flags().StringVar(&jaegerConfigNodes, "configure-nodes", "", - "the nodes on which to set the relevant CRDB cluster settings") - - sideEyeRootCmd.AddCommand(sideEyeInstallCmd) - sideEyeRootCmd.AddCommand(sideEyeSnapCmd) - - initCmd.Flags().IntVar(&startOpts.InitTarget, - "init-target", startOpts.InitTarget, "node on which to run initialization") - - snapshotDeleteCmd.Flags().BoolVar(&dryrun, - "dry-run", false, "dry run (don't perform any actions)") - snapshotCmd.AddCommand(snapshotCreateCmd) - snapshotCmd.AddCommand(snapshotListCmd) - snapshotCmd.AddCommand(snapshotDeleteCmd) - snapshotCmd.AddCommand(snapshotApplyCmd) - - rootStorageCmd.AddCommand(rootStorageCollectionCmd) - rootStorageCollectionCmd.AddCommand(collectionStartCmd) - rootStorageCollectionCmd.AddCommand(collectionStopCmd) - rootStorageCollectionCmd.AddCommand(storageSnapshotCmd) - rootStorageCollectionCmd.AddCommand(collectionListVolumes) - collectionStartCmd.Flags().IntVarP(&volumeCreateOpts.Size, - "volume-size", "s", 10, - "the size of the volume in Gigabytes (GB) to create for each store. Note: This volume will be deleted "+ - "once the VM is deleted.") - - collectionStartCmd.Flags().BoolVar(&volumeCreateOpts.Encrypted, - "volume-encrypted", false, - "determines if the volume will be encrypted. Note: This volume will be deleted once the VM is deleted.") +func initGCCmdFlags(gcCmd *cobra.Command) { + gcCmd.Flags().BoolVarP(&dryrun, + "dry-run", "n", dryrun, "dry run (don't perform any actions)") + gcCmd.Flags().StringVar(&config.SlackToken, "slack-token", "", "Slack bot token") + // Allow each Provider to inject additional configuration flags + for _, providerName := range vm.AllProviderNames() { + if vm.Providers[providerName].Active() { + // set up cluster cleanup flag for gcCmd + providerOptsContainer[providerName].ConfigureClusterCleanupFlags(gcCmd.Flags()) + } + } +} - collectionStartCmd.Flags().StringVar(&volumeCreateOpts.Architecture, - "volume-arch", "", - "the architecture the volume should target. This flag is only relevant for gcp or azure. It is ignored "+ - "if supplied for other providers. Note: This volume will be deleted once the VM is deleted.") +func initFlagPgurlCertsDirForCmd(cmd *cobra.Command) { + cmd.Flags().StringVar(&pgurlCertsDir, + "certs-dir", install.CockroachNodeCertsDir, "cert dir to use for secure connections") +} - collectionStartCmd.Flags().IntVarP(&volumeCreateOpts.IOPS, - "volume-iops", "i", 0, - "the iops to provision for the volume. Note: This volume will be deleted once the VM is deleted.") +func initFlagAuthModeNDatabaseForCmd(cmd *cobra.Command) { + cmd.Flags().StringVar(&authMode, + "auth-mode", install.DefaultAuthMode().String(), fmt.Sprintf("form of authentication to use, valid auth-modes: %v", maps.Keys(install.PGAuthModes))) + cmd.Flags().StringVar(&database, "database", "", "database to use") +} - collectionStartCmd.Flags().StringVarP(&volumeCreateOpts.Type, - "volume-type", "t", "", - "the volume type that should be created. Provide a volume type that is connected to"+ - " the provider chosen for the cluster. If no volume type is provided the provider default will be used. "+ - "Note: This volume will be deleted once the VM is deleted.") +func initFlagOpenForCmd(cmd *cobra.Command) { + cmd.Flags().BoolVar(&urlOpen, "open", false, "Open the url in a browser") +} - updateCmd.Flags().BoolVar(&roachprodUpdateRevert, "revert", false, "restore roachprod to the previous version "+ - "which would have been renamed to roachprod.bak during the update process") - updateCmd.Flags().StringVarP(&roachprodUpdateBranch, "branch", "b", "master", "git branch") - updateCmd.Flags().StringVarP(&roachprodUpdateOS, "os", "o", "linux", "OS") - updateCmd.Flags().StringVarP(&roachprodUpdateArch, "arch", "a", "amd64", "CPU architecture") +func initFlagUsernameForCmd(cmd *cobra.Command) { + cmd.Flags().StringVarP(&username, "username", "u", os.Getenv("ROACHPROD_USER"), + "Username to run under, detect if blank") +} - for _, cmd := range []*cobra.Command{adminurlCmd, grafanaURLCmd, jaegerURLCmd} { - cmd.Flags().BoolVar(&urlOpen, "open", false, "Open the url in a browser") - } +// initFlagsStopProcessForCmd initializes Flags for processes that stop (kill) processes. +func initFlagsStopProcessForCmd( + stopProcessesCmd *cobra.Command, sigPtr *int, waitPtr *bool, gracePeriodPtr *int, +) { + // Cobra does not support reusing flags across multiple commands, especially + // if the defaults differ, so we need to supply different flags for the case + // where the defaults are different. + // See: https://github.com/spf13/cobra/issues/1398 + stopProcessesCmd.Flags().IntVar(sigPtr, "sig", *sigPtr, "signal to pass to kill") + stopProcessesCmd.Flags().BoolVar(waitPtr, "wait", *waitPtr, "wait for processes to exit") + stopProcessesCmd.Flags().IntVar(gracePeriodPtr, "grace-period", *gracePeriodPtr, "approx number of seconds to wait for processes to exit, before a forceful shutdown (SIGKILL) is performed") +} - for _, cmd := range []*cobra.Command{createCmd, listCmd, destroyCmd} { - cmd.Flags().StringVarP(&username, "username", "u", os.Getenv("ROACHPROD_USER"), - "Username to run under, detect if blank") - } +func initFlagsStartOpsForCmd(cmd *cobra.Command) { + cmd.Flags().BoolVar(&startOpts.ScheduleBackups, + "schedule-backups", startOpts.ScheduleBackups, + "create a cluster backup schedule once the cluster has started (by default, "+ + "full backup hourly and incremental every 15 minutes)") + cmd.Flags().StringVar(&startOpts.ScheduleBackupArgs, + "schedule-backup-args", startOpts.ScheduleBackupArgs, + "Recurrence and scheduled backup options specification") + cmd.Flags().Int64Var(&startOpts.NumFilesLimit, "num-files-limit", startOpts.NumFilesLimit, + "limit the number of files that can be created by the cockroach process") + cmd.Flags().IntVar(&startOpts.SQLPort, + "sql-port", startOpts.SQLPort, "port on which to listen for SQL clients") + cmd.Flags().BoolVar(&startOpts.EnableFluentSink, + "enable-fluent-sink", startOpts.EnableFluentSink, + "whether to enable the fluent-servers attribute in the CockroachDB logging configuration") +} - for _, cmd := range []*cobra.Command{statusCmd, monitorCmd, startCmd, - stopCmd, runCmd, wipeCmd, reformatCmd, installCmd, putCmd, getCmd, - sqlCmd, pgurlCmd, adminurlCmd, ipCmd, - } { - cmd.Flags().BoolVar( - &ssh.InsecureIgnoreHostKey, "insecure-ignore-host-key", true, "don't check ssh host keys") - } +func initFlagInsecureIgnoreHostKeyForCmd(cmd *cobra.Command) { + cmd.Flags().BoolVar( + &ssh.InsecureIgnoreHostKey, "insecure-ignore-host-key", true, "don't check ssh host keys") +} - for _, cmd := range []*cobra.Command{startCmd, startInstanceCmd} { - cmd.Flags().BoolVar(&startOpts.ScheduleBackups, - "schedule-backups", startOpts.ScheduleBackups, - "create a cluster backup schedule once the cluster has started (by default, "+ - "full backup hourly and incremental every 15 minutes)") - cmd.Flags().StringVar(&startOpts.ScheduleBackupArgs, - "schedule-backup-args", startOpts.ScheduleBackupArgs, - "Recurrence and scheduled backup options specification") - cmd.Flags().Int64Var(&startOpts.NumFilesLimit, "num-files-limit", startOpts.NumFilesLimit, - "limit the number of files that can be created by the cockroach process") - cmd.Flags().IntVar(&startOpts.SQLPort, - "sql-port", startOpts.SQLPort, "port on which to listen for SQL clients") - cmd.Flags().BoolVar(&startOpts.EnableFluentSink, - "enable-fluent-sink", startOpts.EnableFluentSink, - "whether to enable the fluent-servers attribute in the CockroachDB logging configuration") - } +func initFlagTagForCmd(cmd *cobra.Command) { + cmd.Flags().StringVar(&tag, "tag", "", "the process tag") +} - for _, cmd := range []*cobra.Command{ - startCmd, startInstanceCmd, statusCmd, stopCmd, runCmd, - } { - cmd.Flags().StringVar(&tag, "tag", "", "the process tag") - } +func initFlagSCPForCmd(cmd *cobra.Command) { + cmd.Flags().BoolVar(new(bool), "scp", false, "DEPRECATED") + _ = cmd.Flags().MarkDeprecated("scp", "always true") +} - for _, cmd := range []*cobra.Command{ - startCmd, putCmd, getCmd, - } { - cmd.Flags().BoolVar(new(bool), "scp", false, "DEPRECATED") - _ = cmd.Flags().MarkDeprecated("scp", "always true") - } +func initFlagBinaryForCmd(cmd *cobra.Command) { + cmd.Flags().StringVarP(&config.Binary, + "binary", "b", config.Binary, "the remote cockroach binary to use") +} - for _, cmd := range []*cobra.Command{startCmd, startInstanceCmd, sqlCmd} { - cmd.Flags().StringVarP(&config.Binary, - "binary", "b", config.Binary, "the remote cockroach binary to use") - } - for _, cmd := range []*cobra.Command{startCmd, startInstanceCmd, stopInstanceCmd, createLoadBalancerCmd, sqlCmd, pgurlCmd, loadBalancerPGUrl, adminurlCmd, runCmd, jaegerStartCmd, grafanaAnnotationCmd, updateTargetsCmd, growCmd} { - // TODO(renato): remove --secure once the default of secure - // clusters has existed in roachprod long enough. - cmd.Flags().BoolVar(&secure, - "secure", secure, "use a secure cluster (DEPRECATED: clusters are secure by default; use --insecure to create insecure clusters.)") - cmd.Flags().BoolVar(&insecure, - "insecure", insecure, "use an insecure cluster") - } - for _, cmd := range []*cobra.Command{pgurlCmd, sqlCmd, adminurlCmd, stopInstanceCmd, createLoadBalancerCmd, loadBalancerPGUrl, loadBalancerIP, jaegerStartCmd} { - cmd.Flags().StringVar(&virtualClusterName, - "cluster", "", "specific virtual cluster to connect to") - cmd.Flags().IntVar(&sqlInstance, - "sql-instance", 0, "specific SQL/HTTP instance to connect to (this is a roachprod abstraction distinct from the internal instance ID)") - } +func initFlagInsecureForCmd(cmd *cobra.Command) { + // TODO(renato): remove --secure once the default of secure + // clusters has existed in roachprod long enough. + cmd.Flags().BoolVar(&secure, + "secure", secure, "use a secure cluster (DEPRECATED: clusters are secure by default; use --insecure to create insecure clusters.)") + cmd.Flags().BoolVar(&insecure, + "insecure", insecure, "use an insecure cluster") +} - for _, cmd := range []*cobra.Command{startCmd, listCmd, syncCmd} { - cmd.Flags().StringSliceVar(&config.DNSRequiredProviders, - "dns-required-providers", config.DefaultDNSRequiredProviders, - "the cloud providers that must be active to refresh DNS entries", - ) - } +func initFlagsClusterNSQLForCmd(cmd *cobra.Command) { + cmd.Flags().StringVar(&virtualClusterName, + "cluster", "", "specific virtual cluster to connect to") + cmd.Flags().IntVar(&sqlInstance, + "sql-instance", 0, "specific SQL/HTTP instance to connect to (this is a roachprod abstraction distinct from the internal instance ID)") +} - grafanaAnnotationCmd.Flags().StringArrayVar(&grafanaTags, - "tags", []string{}, "grafana annotation tags") - grafanaAnnotationCmd.Flags().StringVar(&grafanaDashboardUID, - "dashboard-uid", "", "grafana dashboard UID") - grafanaAnnotationCmd.Flags().Int64SliceVar(&grafanaTimeRange, - "time-range", []int64{}, "grafana annotation time range in epoch time") - fetchLogsCmd.Flags().DurationVarP(&fetchLogsTimeout, - "timeout", "t", 5*time.Minute, "Timeout for fetching the logs from the cluster nodes") +func initFlagDNSRequiredProvidersForCmd(cmd *cobra.Command) { + cmd.Flags().StringSliceVar(&config.DNSRequiredProviders, + "dns-required-providers", config.DefaultDNSRequiredProviders, + "the cloud providers that must be active to refresh DNS entries", + ) } diff --git a/pkg/cmd/roachprod/cli/handlers.go b/pkg/cmd/roachprod/cli/handlers.go new file mode 100644 index 000000000000..152e00fe00ff --- /dev/null +++ b/pkg/cmd/roachprod/cli/handlers.go @@ -0,0 +1,90 @@ +// Copyright 2024 The Cockroach Authors. +// +// Use of this software is governed by the CockroachDB Software License +// included in the /LICENSE file. +package cli + +import ( + "fmt" + "os" + "os/user" + + "github.com/cockroachdb/cockroach/pkg/roachprod" + "github.com/cockroachdb/cockroach/pkg/roachprod/config" + "github.com/cockroachdb/cockroach/pkg/roachprod/vm" + "github.com/spf13/cobra" +) + +// commandRegistry maintains the registry of the commands registered to teh root command +type commandRegistry struct { + rootCmd *cobra.Command + excludeFromBashCompletion []*cobra.Command + excludeFromClusterFlagsMulti []*cobra.Command +} + +// addCommand adds the list of commands to the root command +func (cr *commandRegistry) addCommand(cmds []*cobra.Command) { + cr.rootCmd.AddCommand(cmds...) +} + +// addToExcludeFromBashCompletion adds the commands to be excluded from the bash completion script +func (cr *commandRegistry) addToExcludeFromBashCompletion(cmd *cobra.Command) { + cr.excludeFromBashCompletion = append(cr.excludeFromBashCompletion, cmd) +} + +// addToExcludeFromClusterFlagsMulti adds the commands to be excluded from the bash completion script +func (cr *commandRegistry) addToExcludeFromClusterFlagsMulti(cmd *cobra.Command) { + cr.excludeFromClusterFlagsMulti = append(cr.excludeFromClusterFlagsMulti, cmd) +} + +// newCommandRegistry returns a new commandRegistry +func newCommandRegistry(rootCmd *cobra.Command) *commandRegistry { + return &commandRegistry{ + rootCmd: rootCmd, + excludeFromBashCompletion: make([]*cobra.Command, 0), + } +} + +// Initialize sets up and initializes the command-line interface. +func Initialize(rootCmd *cobra.Command) { + _ = roachprod.InitProviders() + providerOptsContainer = vm.CreateProviderOptionsContainer() + // The commands are displayed in the order they are added to rootCmd. Note + // that gcCmd and adminurlCmd contain a trailing \n in their Short help in + // order to separate the commands into logical groups. + cobra.EnableCommandSorting = false + cr := newCommandRegistry(rootCmd) + cr.register() + initRootCmdFlags(rootCmd) + initClusterFlagsForMultiProjects(rootCmd, cr.excludeFromClusterFlagsMulti) + cr.setBashCompletionFunction() + + var err error + config.OSUser, err = user.Current() + if err != nil { + fmt.Fprintf(os.Stderr, "unable to lookup current user: %s\n", err) + os.Exit(1) + } + + if err := roachprod.InitDirs(); err != nil { + fmt.Fprintf(os.Stderr, "%s\n", err) + os.Exit(1) + } + + if err := roachprod.LoadClusters(); err != nil { + // We don't want to exit as we may be looking at the help message. + fmt.Printf("problem loading clusters: %s\n", err) + } + + updateTime, sha, err := CheckLatest(roachprodUpdateBranch, roachprodUpdateOS, roachprodUpdateArch) + if err != nil { + fmt.Fprintf(os.Stderr, "WARN: failed to check if a more recent 'roachprod' binary exists: %s\n", err) + } else { + age, err := TimeSinceUpdate(updateTime) + if err != nil { + fmt.Fprintf(os.Stderr, "WARN: unable to check mtime of 'roachprod' binary: %s\n", err) + } else if age.Hours() >= 14*24 { + fmt.Fprintf(os.Stderr, "WARN: roachprod binary is >= 2 weeks old (%s); latest sha: %q\nWARN: Consider updating the binary: `roachprod update`\n\n", age, sha) + } + } +} diff --git a/pkg/cmd/roachprod/cli/resgistry.go b/pkg/cmd/roachprod/cli/resgistry.go new file mode 100644 index 000000000000..3829600135df --- /dev/null +++ b/pkg/cmd/roachprod/cli/resgistry.go @@ -0,0 +1,74 @@ +// Copyright 2024 The Cockroach Authors. +// +// Use of this software is governed by the CockroachDB Software License +// included in the /LICENSE file. +package cli + +import "github.com/spf13/cobra" + +// register registers all roachprod subcommands. +// Add your commands here +func (cr *commandRegistry) register() { + cr.addCommand([]*cobra.Command{ + cr.buildCreateCmd(), + cr.buildGrowCmd(), + cr.buildShrinkCmd(), + cr.buildResetCmd(), + cr.buildDestroyCmd(), + cr.buildExtendCmd(), + cr.buildLoadBalancerCmd(), + cr.buildListCmd(), + cr.buildSyncCmd(), + cr.buildGCCmd(), + cr.buildSetupSSHCmd(), + cr.buildStatusCmd(), + cr.buildMonitorCmd(), + cr.buildStartCmd(), + cr.buildUpdateTargetsCmd(), + cr.buildStopCmd(), + cr.buildStartInstanceCmd(), + cr.buildStopInstanceCmd(), + cr.buildDeployCmd(), + cr.buildInitCmd(), + cr.buildRunCmd(), + cr.buildSignalCmd(), + cr.buildWipeCmd(), + cr.buildDestroyDNSCmd(), + cr.buildReformatCmd(), + cr.buildInstallCmd(), + cr.buildDistributeCertsCmd(), + cr.buildSshKeysCmd(), + cr.buildPutCmd(), + cr.buildGetCmd(), + cr.buildStageCmd(), + cr.buildStageURLCmd(), + cr.buildDownloadCmd(), + cr.buildSQLCmd(), + cr.buildIPCmd(), + cr.buildPGUrlCmd(), + cr.buildAdminurlCmd(), + cr.buildLogsCmd(), + cr.buildPprofCmd(), + cr.buildCachedHostsCmd(), + cr.buildVersionCmd(), + cr.buildGetProvidersCmd(), + cr.buildGrafanaStartCmd(), + cr.buildGrafanaStopCmd(), + cr.buildGrafanaDumpCmd(), + cr.buildGrafanaURLCmd(), + cr.buildGrafanaAnnotationCmd(), + cr.buildRootStorageCmd(), + cr.buildSnapshotCmd(), + cr.buildUpdateCmd(), + cr.buildJaegerStartCmd(), + cr.buildJaegerStopCmd(), + cr.buildJaegerURLCmd(), + cr.buildSideEyeRootCmd(), + cr.buildFluentBitStartCmd(), + cr.buildFluentBitStopCmd(), + cr.buildOpentelemetryStartCmd(), + cr.buildOpentelemetryStopCmd(), + cr.buildFetchLogsCmd(), + cr.buildGetLatestPProfCmd(), + }) +} diff --git a/pkg/cmd/roachprod/update/update.go b/pkg/cmd/roachprod/cli/update.go similarity index 96% rename from pkg/cmd/roachprod/update/update.go rename to pkg/cmd/roachprod/cli/update.go index 8a4ed8952d48..5e0bb763589a 100644 --- a/pkg/cmd/roachprod/update/update.go +++ b/pkg/cmd/roachprod/cli/update.go @@ -3,7 +3,7 @@ // Use of this software is governed by the CockroachDB Software License // included in the /LICENSE file. -package update +package cli import ( "context" @@ -58,7 +58,7 @@ func CheckLatest(branch string, os string, arch string) (time.Time, string, erro return checkLatest(client, ctx, branch, os, arch) } -// Downloads the latest binary into the file specified by `toFile`. +// DownloadLatestRoachprod downloads the latest binary into the file specified by `toFile`. func DownloadLatestRoachprod(toFile string, branch string, osName string, arch string) error { ctx := context.Background() client, err := storage.NewClient(ctx, option.WithScopes(storage.ScopeReadOnly)) diff --git a/pkg/cmd/roachprod/cli/util.go b/pkg/cmd/roachprod/cli/util.go new file mode 100644 index 000000000000..a62e31a8bac3 --- /dev/null +++ b/pkg/cmd/roachprod/cli/util.go @@ -0,0 +1,240 @@ +// Copyright 2023 The Cockroach Authors. +// +// Use of this software is governed by the CockroachDB Software License +// included in the /LICENSE file. + +package cli + +import ( + "fmt" + "os" + "strings" + "text/tabwriter" + "time" + + rperrors "github.com/cockroachdb/cockroach/pkg/roachprod/errors" + "github.com/cockroachdb/cockroach/pkg/roachprod/vm" + "github.com/cockroachdb/cockroach/pkg/roachprod/vm/gce" + "github.com/cockroachdb/errors" + "github.com/cockroachdb/errors/oserror" + "github.com/spf13/cobra" +) + +func PromptYesNo(msg string, defaultYes bool) bool { + if defaultYes { + fmt.Printf("%s y[default]/n: ", msg) + } else { + fmt.Printf("%s y/n[default]: ", msg) + } + + var answer string + _, _ = fmt.Scanln(&answer) + answer = strings.TrimSpace(answer) + + isYes := answer == "y" || answer == "Y" + isEmpty := answer == "" + + if defaultYes { + return isYes || isEmpty + } + + return isYes +} + +// SwapBinary attempts to swap the `old` file with the `new` file. Used to +// update a running roachprod binary. +// Note: there is special handling if `new` points to a file ending in `.bak`. +// In this case, it is assumed to be a `revert` operation, in which case we +// do *not* backup the old/current file. +func SwapBinary(old, new string) error { + destInfo, err := os.Stat(new) + + if err != nil { + if oserror.IsNotExist(err) { + return errors.WithDetail(err, "binary does not exist: "+new) + } + return err + } + + if destInfo.IsDir() { + return errors.Newf("binary path is a directory, not a file: %s", new) + } + + oldInfo, err := os.Stat(old) + if err != nil { + return err + } + + // Copy the current file permissions to the new binary and ensure it is executable. + err = os.Chmod(new, oldInfo.Mode()) + if err != nil { + return err + } + + // Backup only for upgrading, not when reverting which is assumed if the new binary ends in `.bak`. + if !strings.HasSuffix(new, ".bak") { + // Backup the current binary, so that it may be restored via `roachprod update --revert`. + err = os.Rename(old, old+".bak") + if err != nil { + return errors.WithDetail(err, "unable to backup current binary") + } + } + + // Move the new binary into place. + return os.Rename(new, old) +} + +// Computes the age of the current binary, relative to the given update time. +func TimeSinceUpdate(updateTime time.Time) (time.Duration, error) { + currentBinary, err := os.Executable() + if err != nil { + return -1, err + } + statInfo, err := os.Stat(currentBinary) + if err != nil { + return -1, err + } + return updateTime.Sub(statInfo.ModTime()), nil +} + +// Provide `cobra.Command` functions with a standard return code handler. +// Exit codes come from rperrors.Error.ExitCode(). +// +// If the wrapped error tree of an error does not contain an instance of +// rperrors.Error, the error will automatically be wrapped with +// rperrors.Unclassified. +func wrap(f func(cmd *cobra.Command, args []string) error) func(cmd *cobra.Command, args []string) { + return func(cmd *cobra.Command, args []string) { + var err error + isSecure, err = isSecureCluster(cmd) + if err != nil { + cmd.Printf("Error: %v\n", err) + os.Exit(1) + } + + err = f(cmd, args) + if err != nil { + roachprodError, ok := rperrors.AsError(err) + if !ok { + roachprodError = rperrors.Unclassified{Err: err} + err = roachprodError + } + + cmd.Printf("Error: %+v\n", err) + + os.Exit(roachprodError.ExitCode()) + } + } +} + +func isSecureCluster(cmd *cobra.Command) (bool, error) { + hasSecureFlag := cmd.Flags().Changed("secure") + hasInsecureFlag := cmd.Flags().Changed("insecure") + + switch { + case hasSecureFlag && hasInsecureFlag: + // Disallow passing both flags, even if they are consistent. + return false, fmt.Errorf("cannot pass both --secure and --insecure flags") + + case hasSecureFlag: + desc := "Clusters are secure by default" + if !secure { + desc = "Use the --insecure flag to create insecure clusters" + } + + fmt.Printf("WARNING: --secure flag is deprecated. %s.\n", desc) + return secure, nil + + default: + return !insecure, nil + } +} + +func printPublicKeyTable(keys gce.AuthorizedKeys, includeSize bool) error { + // Align columns left and separate with at least two spaces. + tw := tabwriter.NewWriter(os.Stdout, 0, 8, 2, ' ', 0) + + fmt.Fprintf(tw, "%s\t%s\n", "User", "Key") + for _, ak := range keys { + fmt.Fprintf(tw, "%s\t%s\n", ak.User, ak.Format(64 /* maxLen */)) + } + + err := tw.Flush() + if !includeSize { + return err + } + + const maxProjectMetadataBytes = 262144 /* 256 KiB */ + metadataLen := len(keys.AsProjectMetadata()) + + usage := int(float64(metadataLen*100) / float64(maxProjectMetadataBytes)) + _, err = fmt.Printf("\nTOTAL: %d bytes (usage: %d%%)\n", metadataLen, usage) + return err +} + +// addHelpAboutNodes adds help about nodes to each of the commands +func addHelpAboutNodes(cmd *cobra.Command) { + // Add help about specifying nodes + if cmd.Long == "" { + cmd.Long = cmd.Short + } + cmd.Long += fmt.Sprintf(` +Node specification + + By default the operation is performed on all nodes in . A subset of + nodes can be specified by appending : to the cluster name. The syntax + of is a comma separated list of specific node IDs or range of + IDs. For example: + + roachprod %[1]s marc-test:1-3,8-9 + + will perform %[1]s on: + + marc-test-1 + marc-test-2 + marc-test-3 + marc-test-8 + marc-test-9 +`, cmd.Name()) +} + +// Before executing any command, validate and canonicalize args. +func ValidateAndConfigure(cmd *cobra.Command, args []string) { + // Skip validation for commands that are self-sufficient. + switch cmd.Name() { + case "help", "version", "list": + return + } + + printErrAndExit := func(err error) { + if err != nil { + fmt.Fprintf(os.Stderr, "%s\n", err) + os.Exit(1) + } + } + + // Validate architecture flag, if set. + if archOpt := cmd.Flags().Lookup("arch"); archOpt != nil && archOpt.Changed { + arch := vm.CPUArch(strings.ToLower(archOpt.Value.String())) + + if arch != vm.ArchAMD64 && arch != vm.ArchARM64 && arch != vm.ArchFIPS { + printErrAndExit(fmt.Errorf("unsupported architecture %q", arch)) + } + if string(arch) != archOpt.Value.String() { + // Set the canonical value. + _ = cmd.Flags().Set("arch", string(arch)) + } + } + + // Validate cloud providers, if set. + providersSet := make(map[string]struct{}) + for _, p := range createVMOpts.VMProviders { + if _, ok := vm.Providers[p]; !ok { + printErrAndExit(fmt.Errorf("unknown cloud provider %q", p)) + } + if _, ok := providersSet[p]; ok { + printErrAndExit(fmt.Errorf("duplicate cloud provider specified %q", p)) + } + providersSet[p] = struct{}{} + } +} diff --git a/pkg/cmd/roachprod/main.go b/pkg/cmd/roachprod/main.go index 2558f29cab1a..83131ad629de 100644 --- a/pkg/cmd/roachprod/main.go +++ b/pkg/cmd/roachprod/main.go @@ -6,40 +6,11 @@ package main import ( - "context" - "encoding/json" - "fmt" - "net/url" "os" - "os/user" - "path" - "runtime" - "sort" - "strconv" - "strings" - "text/tabwriter" - "time" "github.com/cockroachdb/cockroach/pkg/build" - "github.com/cockroachdb/cockroach/pkg/cmd/roachprod/grafana" - "github.com/cockroachdb/cockroach/pkg/cmd/roachprod/update" - "github.com/cockroachdb/cockroach/pkg/roachprod" - "github.com/cockroachdb/cockroach/pkg/roachprod/cloud" - "github.com/cockroachdb/cockroach/pkg/roachprod/config" - rperrors "github.com/cockroachdb/cockroach/pkg/roachprod/errors" - "github.com/cockroachdb/cockroach/pkg/roachprod/install" - "github.com/cockroachdb/cockroach/pkg/roachprod/roachprodutil" - "github.com/cockroachdb/cockroach/pkg/roachprod/ui" - "github.com/cockroachdb/cockroach/pkg/roachprod/vm" - "github.com/cockroachdb/cockroach/pkg/roachprod/vm/gce" - "github.com/cockroachdb/cockroach/pkg/util/timeutil" - "github.com/cockroachdb/errors" - "github.com/fatih/color" + "github.com/cockroachdb/cockroach/pkg/cmd/roachprod/cli" "github.com/spf13/cobra" - "golang.org/x/crypto/ssh" - "golang.org/x/exp/maps" - "golang.org/x/text/language" - "golang.org/x/text/message" ) var rootCmd = &cobra.Command{ @@ -63,2079 +34,14 @@ cluster on these nodes, run a sql command on the 2nd node, stop, wipe and destroy the cluster. `, Version: "details:\n" + build.GetInfo().Long(), - PersistentPreRun: validateAndConfigure, -} - -// Provide `cobra.Command` functions with a standard return code handler. -// Exit codes come from rperrors.Error.ExitCode(). -// -// If the wrapped error tree of an error does not contain an instance of -// rperrors.Error, the error will automatically be wrapped with -// rperrors.Unclassified. -func wrap(f func(cmd *cobra.Command, args []string) error) func(cmd *cobra.Command, args []string) { - return func(cmd *cobra.Command, args []string) { - var err error - isSecure, err = isSecureCluster(cmd) - if err != nil { - cmd.Printf("Error: %v\n", err) - os.Exit(1) - } - - err = f(cmd, args) - if err != nil { - roachprodError, ok := rperrors.AsError(err) - if !ok { - roachprodError = rperrors.Unclassified{Err: err} - err = roachprodError - } - - cmd.Printf("Error: %+v\n", err) - - os.Exit(roachprodError.ExitCode()) - } - } -} - -var createCmd = &cobra.Command{ - Use: "create ", - Short: "create a cluster", - Long: `Create a local or cloud-based cluster. - -A cluster is composed of a set of nodes, configured during cluster creation via -the --nodes flag. Creating a cluster does not start any processes on the nodes -other than the base system processes (e.g. sshd). See "roachprod start" for -starting cockroach nodes and "roachprod {run,ssh}" for running arbitrary -commands on the nodes of a cluster. - -Cloud Clusters - - Cloud-based clusters are ephemeral and come with a lifetime (specified by the - --lifetime flag) after which they will be automatically - destroyed. Cloud-based clusters require the associated command line tool for - the cloud to be installed and configured (e.g. "gcloud auth login"). - - Clusters names are required to be prefixed by the authenticated user of the - cloud service. The suffix is an arbitrary string used to distinguish - clusters. For example, "marc-test" is a valid cluster name for the user - "marc". The authenticated user for the cloud service is automatically - detected and can be override by the ROACHPROD_USER environment variable or - the --username flag. - - The machine type and the use of local SSD storage can be specified during - cluster creation via the --{cloud}-machine-type and --local-ssd flags. The - machine-type is cloud specified. For example, --gce-machine-type=n1-highcpu-8 - requests the "n1-highcpu-8" machine type for a GCE-based cluster. No attempt - is made (or desired) to abstract machine types across cloud providers. See - the cloud provider's documentation for details on the machine types - available. - - The underlying filesystem can be provided using the --filesystem flag. - Use --filesystem=zfs, for zfs, and --filesystem=ext4, for ext4. The default - file system is ext4. The filesystem flag only works on gce currently. - -Local Clusters - - A local cluster stores the per-node data in ${HOME}/local on the machine - roachprod is being run on. Whether a cluster is local is specified on creation - by using the name 'local' or 'local-'. Local clusters have no expiration. -`, - Args: cobra.ExactArgs(1), - Run: wrap(func(cmd *cobra.Command, args []string) (retErr error) { - createVMOpts.ClusterName = args[0] - opts := cloud.ClusterCreateOpts{Nodes: numNodes, CreateOpts: createVMOpts, ProviderOptsContainer: providerOptsContainer} - return roachprod.Create(context.Background(), config.Logger, username, &opts) - }), -} - -var growCmd = &cobra.Command{ - Use: `grow `, - Short: `grow a cluster by adding nodes`, - Long: `grow a cluster by adding the specified number of nodes to it. - -Only Google Cloud and local clusters currently support adding nodes. The Google -Cloud cluster has to be a managed cluster (i.e., a cluster created with the -gce-managed flag). The new nodes will use the instance template that was used to -create the cluster originally (Nodes will be created in the same zone as the -existing nodes, or if the cluster is geographically distributed, the nodes will -be fairly distributed across the zones of the cluster). -`, - Args: cobra.ExactArgs(2), - Run: wrap(func(cmd *cobra.Command, args []string) error { - count, err := strconv.ParseInt(args[1], 10, 8) - if err != nil || count < 1 { - return errors.Wrapf(err, "invalid num-nodes argument") - } - return roachprod.Grow(context.Background(), config.Logger, args[0], isSecure, int(count)) - }), -} - -var shrinkCmd = &cobra.Command{ - Use: `shrink `, - Short: `shrink a cluster by removing nodes`, - Long: `shrink a cluster by removing the specified number of nodes. - -Only Google Cloud and local clusters currently support removing nodes. The -Google Cloud cluster has to be a managed cluster (i.e., a cluster created with -the gce-managed flag). Nodes are removed from the tail end of the cluster. -Removing nodes from the middle of the cluster is not supported yet. -`, - Args: cobra.ExactArgs(2), - Run: wrap(func(cmd *cobra.Command, args []string) error { - count, err := strconv.ParseInt(args[1], 10, 8) - if err != nil || count < 1 { - return errors.Wrapf(err, "invalid num-nodes argument") - } - return roachprod.Shrink(context.Background(), config.Logger, args[0], int(count)) - }), -} - -var setupSSHCmd = &cobra.Command{ - Use: "setup-ssh ", - Short: "set up ssh for a cluster", - Long: `Sets up the keys and host keys for the vms in the cluster. - -It first resets the machine credentials as though the cluster were newly created -using the cloud provider APIs and then proceeds to ensure that the hosts can -SSH into eachother and lastly adds additional public keys to AWS hosts as read -from the GCP project. This operation is performed as the last step of creating -a new cluster but can be useful to re-run if the operation failed previously or -if the user would like to update the keys on the remote hosts. -`, - - Args: cobra.ExactArgs(1), - Run: wrap(func(cmd *cobra.Command, args []string) (retErr error) { - return roachprod.SetupSSH(context.Background(), config.Logger, args[0]) - }), -} - -var destroyCmd = &cobra.Command{ - Use: "destroy [ --all-mine | --all-local | [ ...] ]", - Short: "destroy clusters", - Long: `Destroy one or more local or cloud-based clusters. - -The destroy command accepts the names of the clusters to destroy. Alternatively, -the --all-mine flag can be provided to destroy all (non-local) clusters that are -owned by the current user, or the --all-local flag can be provided to destroy -all local clusters. - -Destroying a cluster releases the resources for a cluster. For a cloud-based -cluster the machine and associated disk resources are freed. For a local -cluster, any processes started by roachprod are stopped, and the node -directories inside ${HOME}/local directory are removed. -`, - Args: cobra.ArbitraryArgs, - Run: wrap(func(cmd *cobra.Command, args []string) error { - return roachprod.Destroy(config.Logger, username, destroyAllMine, destroyAllLocal, args...) - }), -} - -var cachedHostsCmd = &cobra.Command{ - Use: "cached-hosts", - Short: "list all clusters (and optionally their host numbers) from local cache", - Args: cobra.NoArgs, - Run: wrap(func(cmd *cobra.Command, args []string) error { - roachprod.CachedClusters(func(clusterName string, numVMs int) { - if strings.HasPrefix(clusterName, "teamcity") { - return - } - fmt.Printf("%s", clusterName) - // When invoked by bash-completion, cachedHostsCluster is what the user - // has currently typed -- if this cluster matches that, expand its hosts. - if strings.HasPrefix(cachedHostsCluster, clusterName) { - for i := 1; i <= numVMs; i++ { - fmt.Printf(" %s:%d", clusterName, i) - } - } - fmt.Printf("\n") - }) - return nil - }), -} - -var listCmd = &cobra.Command{ - Use: "list [--details | --json] [ --mine | --pattern ]", - Short: "list all clusters", - Long: `List all clusters. - -The list command accepts a flag --pattern which is a regular -expression that will be matched against the cluster name pattern. Alternatively, -the --mine flag can be provided to list the clusters that are owned by the current -user. - -The default output shows one line per cluster, including the local cluster if -it exists: - - ~ roachprod list - local: [local] 1 (-) - marc-test: [aws gce] 4 (5h34m35s) - Syncing... - -The second column lists the cloud providers that host VMs for the cluster. - -The third and fourth columns are the number of nodes in the cluster and the -time remaining before the cluster will be automatically destroyed. Note that -local clusters do not have an expiration. - -The --details flag adjusts the output format to include per-node details: - - ~ roachprod list --details - local [local]: (no expiration) - localhost 127.0.0.1 127.0.0.1 - marc-test: [aws gce] 5h33m57s remaining - marc-test-0001 marc-test-0001.us-east1-b.cockroach-ephemeral 10.142.0.18 35.229.60.91 - marc-test-0002 marc-test-0002.us-east1-b.cockroach-ephemeral 10.142.0.17 35.231.0.44 - marc-test-0003 marc-test-0003.us-east1-b.cockroach-ephemeral 10.142.0.19 35.229.111.100 - marc-test-0004 marc-test-0004.us-east1-b.cockroach-ephemeral 10.142.0.20 35.231.102.125 - Syncing... - -The first and second column are the node hostname and fully qualified name -respectively. The third and fourth column are the private and public IP -addresses. - -The --json flag sets the format of the command output to json. - -Listing clusters has the side-effect of syncing ssh keys/configs and the local -hosts file. -`, - Args: cobra.NoArgs, - Run: wrap(func(cmd *cobra.Command, args []string) error { - if listJSON && listDetails { - return errors.New("'json' option cannot be combined with 'details' option") - } - filteredCloud, err := roachprod.List(config.Logger, listMine, listPattern, - vm.ListOptions{ - Username: username, - ComputeEstimatedCost: listCost, - }) - - if err != nil { - return err - } - - // sort by cluster names for stable output. - names := make([]string, len(filteredCloud.Clusters)) - maxClusterName := 0 - i := 0 - for name := range filteredCloud.Clusters { - names[i] = name - if len(name) > maxClusterName { - maxClusterName = len(name) - } - i++ - } - sort.Strings(names) - - p := message.NewPrinter(language.English) - if listJSON { - enc := json.NewEncoder(os.Stdout) - enc.SetIndent("", " ") - if err := enc.Encode(filteredCloud); err != nil { - return err - } - } else { - machineType := func(clusterVMs vm.List) string { - return clusterVMs[0].MachineType - } - cpuArch := func(clusterVMs vm.List) string { - // Display CPU architecture and family. - if clusterVMs[0].CPUArch == "" { - // N.B. Either a local cluster or unsupported cloud provider. - return "" - } - if clusterVMs[0].CPUFamily != "" { - return clusterVMs[0].CPUFamily - } - if clusterVMs[0].CPUArch != vm.ArchAMD64 { - return string(clusterVMs[0].CPUArch) - } - // AMD64 is the default, so don't display it. - return "" - } - // Align columns right and separate with at least two spaces. - tw := tabwriter.NewWriter(os.Stdout, 0, 8, 2, ' ', tabwriter.AlignRight) - // N.B. colors use escape codes which don't play nice with tabwriter [1]. - // We use a hacky workaround below to color the empty string. - // [1] https://github.com/golang/go/issues/12073 - - if !listDetails { - // Print header only if we are not printing cluster details. - fmt.Fprintf(tw, "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t\n", - "Cluster", "Clouds", "Size", "VM", "Arch", - color.HiWhiteString("$/hour"), color.HiWhiteString("$ Spent"), - color.HiWhiteString("Uptime"), color.HiWhiteString("TTL"), - color.HiWhiteString("$/TTL")) - // Print separator. - fmt.Fprintf(tw, "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t\n", - "", "", "", "", - color.HiWhiteString(""), color.HiWhiteString(""), - color.HiWhiteString(""), color.HiWhiteString(""), - color.HiWhiteString("")) - } - totalCostPerHour := 0.0 - for _, name := range names { - c := filteredCloud.Clusters[name] - if listDetails { - if err = c.PrintDetails(config.Logger); err != nil { - return err - } - } else { - // N.B. Tabwriter doesn't support per-column alignment. It looks odd to have the cluster names right-aligned, - // so we make it left-aligned. - fmt.Fprintf(tw, "%s\t%s\t%d\t%s\t%s", name+strings.Repeat(" ", maxClusterName-len(name)), c.Clouds(), - len(c.VMs), machineType(c.VMs), cpuArch(c.VMs)) - if !c.IsLocal() { - colorByCostBucket := func(cost float64) func(string, ...interface{}) string { - switch { - case cost <= 100: - return color.HiGreenString - case cost <= 1000: - return color.HiBlueString - default: - return color.HiRedString - } - } - timeRemaining := c.LifetimeRemaining().Round(time.Second) - formatTTL := func(ttl time.Duration) string { - if c.VMs[0].Preemptible { - return color.HiMagentaString(ttl.String()) - } else { - return color.HiBlueString(ttl.String()) - } - } - cost := c.CostPerHour - totalCostPerHour += cost - alive := timeutil.Since(c.CreatedAt).Round(time.Minute) - costSinceCreation := cost * float64(alive) / float64(time.Hour) - costRemaining := cost * float64(timeRemaining) / float64(time.Hour) - if cost > 0 { - fmt.Fprintf(tw, "\t%s\t%s\t%s\t%s\t%s\t", - color.HiGreenString(p.Sprintf("$%.2f", cost)), - colorByCostBucket(costSinceCreation)(p.Sprintf("$%.2f", costSinceCreation)), - color.HiWhiteString(alive.String()), - formatTTL(timeRemaining), - colorByCostBucket(costRemaining)(p.Sprintf("$%.2f", costRemaining))) - } else { - fmt.Fprintf(tw, "\t%s\t%s\t%s\t%s\t%s\t", - color.HiGreenString(""), - color.HiGreenString(""), - color.HiWhiteString(alive.String()), - formatTTL(timeRemaining), - color.HiGreenString("")) - } - } else { - fmt.Fprintf(tw, "\t(-)") - } - fmt.Fprintf(tw, "\n") - } - } - if err := tw.Flush(); err != nil { - return err - } - - if totalCostPerHour > 0 { - _, _ = p.Printf("\nTotal cost per hour: $%.2f\n", totalCostPerHour) - } - - // Optionally print any dangling instances with errors - if listDetails { - collated := filteredCloud.BadInstanceErrors() - - // Sort by Error() value for stable output - var errors ui.ErrorsByError - for err := range collated { - errors = append(errors, err) - } - sort.Sort(errors) - - for _, e := range errors { - fmt.Printf("%s: %s\n", e, collated[e].Names()) - } - } - } - return nil - }), -} - -var bashCompletion = os.ExpandEnv("$HOME/.roachprod/bash-completion.sh") - -// TODO(peter): Do we need this command given that the "list" command syncs as -// a side-effect. If you don't care about the list output, just "roachprod list -// &>/dev/null". -var syncCmd = &cobra.Command{ - Use: "sync [flags]", - Short: "sync ssh keys/config and hosts files", - Long: ``, - Args: cobra.NoArgs, - Run: wrap(func(cmd *cobra.Command, args []string) error { - _, err := roachprod.Sync(config.Logger, listOpts) - _ = rootCmd.GenBashCompletionFile(bashCompletion) - return err - }), -} - -var gcCmd = &cobra.Command{ - Use: "gc", - Short: "GC expired clusters and unused AWS keypairs\n", - Long: `Garbage collect expired clusters and unused SSH keypairs in AWS. - -Destroys expired clusters, sending email if properly configured. Usually run -hourly by a cronjob so it is not necessary to run manually. -`, - Args: cobra.NoArgs, - Run: wrap(func(cmd *cobra.Command, args []string) error { - return roachprod.GC(config.Logger, dryrun) - }), -} - -var extendCmd = &cobra.Command{ - Use: "extend ", - Short: "extend the lifetime of a cluster", - Long: `Extend the lifetime of the specified cluster to prevent it from being -destroyed: - - roachprod extend marc-test --lifetime=6h -`, - Args: cobra.ExactArgs(1), - Run: wrap(func(cmd *cobra.Command, args []string) error { - return roachprod.Extend(config.Logger, args[0], extendLifetime) - }), -} - -var loadBalancerCmd = &cobra.Command{ - Use: "load-balancer [command]", - Short: "manage and query load balancers", - Long: `create load balancers for specific services, query the IP or postgres URL of a load balancer`, - Args: cobra.ExactArgs(1), - Run: wrap(func(cmd *cobra.Command, args []string) error { - return roachprod.CreateLoadBalancer(context.Background(), config.Logger, - args[0], isSecure, virtualClusterName, sqlInstance, - ) - }), -} - -var createLoadBalancerCmd = &cobra.Command{ - Use: "create ", - Short: "create a load balancer for a cluster", - Long: `Create a load balancer for a specific service (port), system by default, for the given cluster. - -The load balancer is created using the cloud provider's load balancer service. -Currently only Google Cloud is supported, and the cluster must have been created -with the --gce-managed flag. On Google Cloud a load balancer consists of various -components that include backend services, health checks and forwarding rules. -These resources will automatically be destroyed when the cluster is destroyed. -`, - - Args: cobra.ExactArgs(1), - Run: wrap(func(cmd *cobra.Command, args []string) error { - return roachprod.CreateLoadBalancer(context.Background(), config.Logger, - args[0], isSecure, virtualClusterName, sqlInstance, - ) - }), -} - -var loadBalancerPGUrl = &cobra.Command{ - Use: "pgurl ", - Short: "get the postgres URL of a load balancer", - Long: fmt.Sprintf(`Get the postgres URL of a load balancer. -%[1]s`, strings.TrimSpace(AuthModeHelp)), - Args: cobra.ExactArgs(1), - Run: wrap(func(cmd *cobra.Command, args []string) error { - auth, err := install.ResolveAuthMode(authMode) - if err != nil { - return err - } - url, err := roachprod.LoadBalancerPgURL(context.Background(), config.Logger, args[0], pgurlCertsDir, roachprod.PGURLOptions{ - Database: database, - External: external, - Secure: isSecure, - VirtualClusterName: virtualClusterName, - SQLInstance: sqlInstance, - Auth: auth, - }) - if err != nil { - return err - } - fmt.Println(url) - return nil - }), -} - -var loadBalancerIP = &cobra.Command{ - Use: "ip ", - Short: "get the IP address of a load balancer", - Long: "Get the IP address of a load balancer.", - Args: cobra.ExactArgs(1), - Run: wrap(func(cmd *cobra.Command, args []string) error { - ip, err := roachprod.LoadBalancerIP(context.Background(), config.Logger, args[0], virtualClusterName, sqlInstance) - if err != nil { - return err - } - fmt.Println(ip) - return nil - }), -} - -const tagHelp = ` -The --tag flag can be used to to associate a tag with the process. This tag can -then be used to restrict the processes which are operated on by the status and -stop commands. Tags can have a hierarchical component by utilizing a slash -separated string similar to a filesystem path. A tag matches if a prefix of the -components match. For example, the tag "a/b" will match both "a/b" and -"a/b/c/d". -` - -var startCmd = &cobra.Command{ - Use: "start ", - Short: "start nodes on a cluster", - Long: `Start nodes on a cluster. - -Nodes are started in secure mode by default and there is a one time -initialization for the cluster to create and distribute the certs. -Note that running some modes in secure mode and others in insecure -mode is not a supported Cockroach configuration. To start nodes in -insecure mode, use the --insecure flag. - -The --binary flag specifies the remote binary to run. It is up to the roachprod -user to ensure this binary exists, usually via "roachprod put". Note that no -cockroach software is installed by default on a newly created cluster. - -The --args and --env flags can be used to pass arbitrary command line flags and -environment variables to the cockroach process. -` + tagHelp + ` -The "start" command takes care of setting up the --join address and specifying -reasonable defaults for other flags. One side-effect of this convenience is -that node 1 is special and if started, is used to auto-initialize the cluster. -The --skip-init flag can be used to avoid auto-initialization (which can then -separately be done using the "init" command). - -If the COCKROACH_DEV_LICENSE environment variable is set the enterprise.license -cluster setting will be set to its value. -`, - Args: cobra.ExactArgs(1), - Run: wrap(func(cmd *cobra.Command, args []string) error { - clusterSettingsOpts := []install.ClusterSettingOption{ - install.TagOption(tag), - install.PGUrlCertsDirOption(pgurlCertsDir), - install.SecureOption(isSecure), - install.UseTreeDistOption(useTreeDist), - install.EnvOption(nodeEnv), - install.NumRacksOption(numRacks), - } - return roachprod.Start(context.Background(), config.Logger, args[0], startOpts, clusterSettingsOpts...) - }), -} - -var updateTargetsCmd = &cobra.Command{ - Use: "update-targets ", - Short: "update prometheus target configurations for a cluster", - Long: `Update prometheus target configurations of each node of a cluster. - -The "start" command updates the prometheus target configuration every time. But, in case of any -failure, this command can be used to update the configurations. - -The default prometheus url is https://grafana.testeng.crdb.io/. This can be overwritten by using the -environment variable COCKROACH_PROM_HOST_URL - -Note that if the cluster is started in insecure mode, set the insecure mode here as well by using the --insecure flag. -`, - Args: cobra.ExactArgs(1), - Run: wrap(func(cmd *cobra.Command, args []string) error { - clusterSettingsOpts := []install.ClusterSettingOption{ - install.SecureOption(isSecure), - } - return roachprod.UpdateTargets(context.Background(), config.Logger, args[0], clusterSettingsOpts...) - }), -} - -var stopCmd = &cobra.Command{ - Use: "stop [--sig] [--wait]", - Short: "stop nodes on a cluster", - Long: `Stop nodes on a cluster. - -Stop roachprod created processes running on the nodes in a cluster, including -processes started by the "start", "run" and "ssh" commands. Every process -started by roachprod is tagged with a ROACHPROD environment variable which is -used by "stop" to locate the processes and terminate them. By default processes -are killed with signal 9 (SIGKILL) giving them no chance for a graceful exit. - -The --sig flag will pass a signal to kill to allow us finer control over how we -shutdown cockroach. The --wait flag causes stop to loop waiting for all -processes with the right ROACHPROD environment variable to exit. Note that stop -will wait forever if you specify --wait with a non-terminating signal (e.g. -SIGHUP), unless you also configure --max-wait. - ---wait defaults to true for signal 9 (SIGKILL) and false for all other signals. -` + tagHelp + ` -`, - Args: cobra.ExactArgs(1), - Run: wrap(func(cmd *cobra.Command, args []string) error { - wait := waitFlag - if sig == 9 /* SIGKILL */ && !cmd.Flags().Changed("wait") { - wait = true - } - stopOpts := roachprod.StopOpts{Wait: wait, GracePeriod: gracePeriod, ProcessTag: tag, Sig: sig} - return roachprod.Stop(context.Background(), config.Logger, args[0], stopOpts) - }), -} - -var startInstanceCmd = &cobra.Command{ - Use: "start-sql --storage-cluster [--external-nodes ]", - Short: "start the SQL/HTTP service for a virtual cluster as a separate process", - Long: `Start SQL/HTTP instances for a virtual cluster as separate processes. - -The --storage-cluster flag must be used to specify a storage cluster -(with optional node selector) which is already running. The command -will create the virtual cluster on the storage cluster if it does not -exist already. If creating multiple virtual clusters on the same -node, the --sql-instance flag must be passed to differentiate them. - -The instance is started in shared process (in memory) mode by -default. To start an external process instance, pass the ---external-cluster flag indicating where the SQL server processes -should be started. - -Nodes are started in secure mode by default and there is a one time -initialization for the cluster to create and distribute the certs. -Note that running some modes in secure mode and others in insecure -mode is not a supported Cockroach configuration. To start nodes in -insecure mode, use the --insecure flag. - -The --binary flag specifies the remote binary to run, if starting -external services. It is up to the roachprod user to ensure this -binary exists, usually via "roachprod put". Note that no cockroach -software is installed by default on a newly created cluster. - -The --args and --env flags can be used to pass arbitrary command line flags and -environment variables to the cockroach process. -` + tagHelp + ` -`, - Args: cobra.ExactArgs(1), - Run: wrap(func(cmd *cobra.Command, args []string) error { - clusterSettingsOpts := []install.ClusterSettingOption{ - install.TagOption(tag), - install.PGUrlCertsDirOption(pgurlCertsDir), - install.SecureOption(isSecure), - install.UseTreeDistOption(useTreeDist), - install.EnvOption(nodeEnv), - install.NumRacksOption(numRacks), - } - - startOpts.Target = install.StartSharedProcessForVirtualCluster - // If the user passed an `--external-nodes` option, we are - // starting a separate process virtual cluster. - if startOpts.VirtualClusterLocation != "" { - startOpts.Target = install.StartServiceForVirtualCluster - } - - startOpts.VirtualClusterName = args[0] - return roachprod.StartServiceForVirtualCluster( - context.Background(), config.Logger, storageCluster, startOpts, clusterSettingsOpts..., - ) - }), -} - -var stopInstanceCmd = &cobra.Command{ - Use: "stop-sql --cluster --sql-instance [--sig] [--wait]", - Short: "stop sql instances on a cluster", - Long: `Stop sql instances on a cluster. - -Stop roachprod created virtual clusters (shared or separate process). By default, -separate processes are killed with signal 9 (SIGKILL) giving them no chance for a -graceful exit. - -The --sig flag will pass a signal to kill to allow us finer control over how we -shutdown processes. The --wait flag causes stop to loop waiting for all -processes to exit. Note that stop will wait forever if you specify --wait with a -non-terminating signal (e.g. SIGHUP), unless you also configure --max-wait. - ---wait defaults to true for signal 9 (SIGKILL) and false for all other signals. -`, - Args: cobra.ExactArgs(1), - Run: wrap(func(cmd *cobra.Command, args []string) error { - wait := waitFlag - if sig == 9 /* SIGKILL */ && !cmd.Flags().Changed("wait") { - wait = true - } - stopOpts := roachprod.StopOpts{ - Wait: wait, - GracePeriod: gracePeriod, - Sig: sig, - VirtualClusterName: virtualClusterName, - SQLInstance: sqlInstance, - } - clusterName := args[0] - return roachprod.StopServiceForVirtualCluster(context.Background(), config.Logger, clusterName, isSecure, stopOpts) - }), -} - -const ( - cockroachApp = ` - cockroach - Cockroach nightly builds. Can provide an optional SHA, otherwise - latest build version is used.` - releaseApp = ` - release - Official CockroachDB Release. Must provide a specific release - version.` - customizedApp = ` - customized - Cockroach customized builds, usually generated by running - ./scripts/tag-custom-build.sh. Must provide a specific tag.` - localApp = ` - local - Use a provided local binary, must provide the path to the binary.` - workloadApp = ` - workload - Cockroach workload application.` -) - -var deployCmd = &cobra.Command{ - Use: "deploy |", - Short: "deploy a new version of cockroach", - Long: fmt.Sprintf(`Performs a rolling upgrade of cockroach. - -The deploy command currently only supports redeploying the storage cluster -(system tenant). It should be run on a cluster that is already running -cockroach. The command will download the specified version of cockroach and -stage it on the cluster. It will then perform a rolling upgrade of the cluster, -one node at a time, to the new version. - -Currently available application options are: - %s`, strings.TrimSpace(cockroachApp+releaseApp+customizedApp+localApp)), - Args: cobra.RangeArgs(2, 3), - Run: wrap(func(cmd *cobra.Command, args []string) error { - versionArg := "" - pathToBinary := "" - if args[1] == "local" { - if len(args) < 3 { - return errors.New("local application requires a path to the binary: deploy local ") - } - pathToBinary = args[2] - } else if len(args) == 3 { - versionArg = args[2] - } - return roachprod.Deploy(context.Background(), config.Logger, args[0], args[1], - versionArg, pathToBinary, pause, deploySig, deployWaitFlag, deployGracePeriod, secure) - }), -} - -var initCmd = &cobra.Command{ - Use: "init ", - Short: "initialize the cluster", - Long: `Initialize the cluster. - -The "init" command bootstraps the cluster (using "cockroach init"). It also sets -default cluster settings. It's intended to be used in conjunction with -'roachprod start --skip-init'. -`, - Args: cobra.ExactArgs(1), - Run: wrap(func(cmd *cobra.Command, args []string) error { - return roachprod.Init(context.Background(), config.Logger, args[0], startOpts) - }), -} - -var statusCmd = &cobra.Command{ - Use: "status ", - Short: "retrieve the status of nodes in a cluster", - Long: `Retrieve the status of nodes in a cluster. - -The "status" command outputs the binary and PID for the specified nodes: - - ~ roachprod status local - local: status 3/3 - 1: cockroach 29688 - 2: cockroach 29687 - 3: cockroach 29689 -` + tagHelp + ` -`, - Args: cobra.ExactArgs(1), - Run: wrap(func(cmd *cobra.Command, args []string) error { - statuses, err := roachprod.Status(context.Background(), config.Logger, args[0], tag) - if err != nil { - return err - } - for _, status := range statuses { - if status.Err != nil { - config.Logger.Printf(" %2d: %s %s\n", status.NodeID, status.Err.Error()) - } else if !status.Running { - // TODO(irfansharif): Surface the staged version here? - config.Logger.Printf(" %2d: not running\n", status.NodeID) - } else { - config.Logger.Printf(" %2d: %s %s\n", status.NodeID, status.Version, status.Pid) - } - } - return nil - }), -} - -var logsCmd = &cobra.Command{ - Use: "logs", - Short: "retrieve and merge logs in a cluster", - Long: `Retrieve and merge logs in a cluster. - -The "logs" command runs until terminated. It works similarly to get but is -specifically focused on retrieving logs periodically and then merging them -into a single stream. -`, - Args: cobra.RangeArgs(1, 2), - Run: wrap(func(cmd *cobra.Command, args []string) error { - logsOpts := roachprod.LogsOpts{ - Dir: logsDir, Filter: logsFilter, ProgramFilter: logsProgramFilter, - Interval: logsInterval, From: logsFrom, To: logsTo, Out: cmd.OutOrStdout(), - } - var dest string - if len(args) == 2 { - dest = args[1] - } else { - dest = args[0] + ".logs" - } - return roachprod.Logs(config.Logger, args[0], dest, logsOpts) - }), -} - -var monitorCmd = &cobra.Command{ - Use: "monitor", - Short: "monitor the status of nodes in a cluster", - Long: `Monitor the status of cockroach nodes in a cluster. - -The "monitor" command runs until terminated. At startup it outputs a line for -each specified node indicating the status of the node (either the PID of the -node if alive, or "dead" otherwise). It then watches for changes in the status -of nodes, outputting a line whenever a change is detected: - - ~ roachprod monitor local - 1: 29688 - 3: 29689 - 2: 29687 - 3: dead - 3: 30718 -`, - Args: cobra.ExactArgs(1), - Run: wrap(func(cmd *cobra.Command, args []string) error { - eventChan, err := roachprod.Monitor(context.Background(), config.Logger, args[0], monitorOpts) - if err != nil { - return err - } - for info := range eventChan { - fmt.Println(info.String()) - } - - return nil - }), -} - -var signalCmd = &cobra.Command{ - Use: "signal ", - Short: "send signal to cluster", - Long: "Send a POSIX signal, specified by its integer code, to every process started via roachprod in a cluster.", - Args: cobra.ExactArgs(2), - Run: wrap(func(cmd *cobra.Command, args []string) error { - sig, err := strconv.ParseInt(args[1], 10, 8) - if err != nil { - return errors.Wrapf(err, "invalid signal argument") - } - return roachprod.Signal(context.Background(), config.Logger, args[0], int(sig)) - }), -} - -var wipeCmd = &cobra.Command{ - Use: "wipe ", - Short: "wipe a cluster", - Long: `Wipe the nodes in a cluster. - -The "wipe" command first stops any processes running on the nodes in a cluster -(via the "stop" command) and then deletes the data directories used by the -nodes. -`, - Args: cobra.ExactArgs(1), - Run: wrap(func(cmd *cobra.Command, args []string) error { - return roachprod.Wipe(context.Background(), config.Logger, args[0], wipePreserveCerts) - }), -} - -var reformatCmd = &cobra.Command{ - Use: "reformat ", - Short: "reformat disks in a cluster\n", - Long: ` -Reformat disks in a cluster to use the specified filesystem. - -WARNING: Reformatting will delete all existing data in the cluster. - -Filesystem options: - ext4 - zfs - -When running with ZFS, you can create a snapshot of the filesystem's current -state using the 'zfs snapshot' command: - - $ roachprod run 'sudo zfs snapshot data1@pristine' - -You can then nearly instantaneously restore the filesystem to this state with -the 'zfs rollback' command: - - $ roachprod run 'sudo zfs rollback data1@pristine' - -`, - - Args: cobra.ExactArgs(2), - Run: wrap(func(cmd *cobra.Command, args []string) error { - return roachprod.Reformat(context.Background(), config.Logger, args[0], args[1]) - }), -} - -var runCmd = &cobra.Command{ - Use: "run [args]", - Aliases: []string{"ssh"}, - Short: "run a command on the nodes in a cluster", - Long: `Run a command on the nodes in a cluster. -`, - Args: cobra.MinimumNArgs(1), - Run: wrap(func(_ *cobra.Command, args []string) error { - return roachprod.Run(context.Background(), config.Logger, args[0], extraSSHOptions, tag, - isSecure, os.Stdout, os.Stderr, args[1:], install.RunOptions{FailOption: install.FailSlow}) - }), -} - -var resetCmd = &cobra.Command{ - Use: "reset ", - Short: "reset *all* VMs in a cluster", - Long: `Reset a cloud VM.`, - Args: cobra.ExactArgs(1), - Run: wrap(func(cmd *cobra.Command, args []string) (retErr error) { - return roachprod.Reset(config.Logger, args[0]) - }), -} - -var installCmd = &cobra.Command{ - Use: "install ", - Short: "install 3rd party software", - Long: `Install third party software. Currently available installation options are: - - ` + strings.Join(install.SortedCmds(), "\n ") + ` -`, - Args: cobra.MinimumNArgs(2), - Run: wrap(func(cmd *cobra.Command, args []string) error { - return roachprod.Install(context.Background(), config.Logger, args[0], args[1:]) - }), -} - -var downloadCmd = &cobra.Command{ - Use: "download [DESTINATION]", - Short: "download 3rd party tools", - Long: "Downloads 3rd party tools, using a GCS cache if possible.", - Args: cobra.RangeArgs(3, 4), - Run: wrap(func(cmd *cobra.Command, args []string) error { - src, sha := args[1], args[2] - var dest string - if len(args) == 4 { - dest = args[3] - } - return roachprod.Download(context.Background(), config.Logger, args[0], src, sha, dest) - }), -} - -var stageURLCmd = &cobra.Command{ - Use: "stageurl []", - Short: "print URL to cockroach binaries", - Long: `Prints URL for release and edge binaries. - -Currently available application options are: - cockroach - Cockroach nightly builds. Can provide an optional SHA, otherwise - latest build version is used. - workload - Cockroach workload application. - release - Official CockroachDB Release. Must provide a specific release - version. - customized - Cockroach customized builds, usually generated by running - ./scripts/tag-custom-build.sh. Must provide a specific tag. -`, - Args: cobra.RangeArgs(1, 2), - Run: wrap(func(cmd *cobra.Command, args []string) error { - versionArg := "" - if len(args) == 2 { - versionArg = args[1] - } - urls, err := roachprod.StageURL(config.Logger, args[0], versionArg, stageOS, stageArch) - if err != nil { - return err - } - for _, u := range urls { - fmt.Println(u) - } - return nil - }), -} - -var stageCmd = &cobra.Command{ - Use: "stage []", - Short: "stage cockroach binaries", - Long: fmt.Sprintf(`Stages release and edge binaries to the cluster. - -Currently available application options are: - %s - -Some examples of usage: - -- stage edge build of cockroach build at a specific SHA: - roachprod stage my-cluster cockroach e90e6903fee7dd0f88e20e345c2ddfe1af1e5a97 - - -- Stage the most recent edge build of the workload tool: - roachprod stage my-cluster workload - - -- Stage the official release binary of CockroachDB at version 2.0.5 - roachprod stage my-cluster release v2.0.5 - - -- Stage customized binary of CockroachDB at version v23.2.0-alpha.2-4375-g7cd2b76ed00 - roachprod stage my-cluster customized v23.2.0-alpha.2-4375-g7cd2b76ed00 -`, strings.TrimSpace(cockroachApp+workloadApp+releaseApp+customizedApp)), - Args: cobra.RangeArgs(2, 3), - Run: wrap(func(cmd *cobra.Command, args []string) error { - versionArg := "" - if len(args) == 3 { - versionArg = args[2] - } - return roachprod.Stage(context.Background(), config.Logger, args[0], stageOS, stageArch, stageDir, args[1], versionArg) - }), -} - -var distributeCertsCmd = &cobra.Command{ - Use: "distribute-certs ", - Short: "distribute certificates to the nodes in a cluster", - Long: `Distribute certificates to the nodes in a cluster. -If the certificates already exist, no action is taken. Note that this command is -invoked automatically when a secure cluster is bootstrapped by "roachprod -start." -`, - Args: cobra.ExactArgs(1), - Run: wrap(func(cmd *cobra.Command, args []string) error { - return roachprod.DistributeCerts(context.Background(), config.Logger, args[0]) - }), -} - -var putCmd = &cobra.Command{ - Use: "put []", - Short: "copy a local file to the nodes in a cluster", - Long: `Copy a local file to the nodes in a cluster. -`, - Args: cobra.RangeArgs(2, 3), - Run: wrap(func(cmd *cobra.Command, args []string) error { - src := args[1] - dest := path.Base(src) - if len(args) == 3 { - dest = args[2] - } - return roachprod.Put(context.Background(), config.Logger, args[0], src, dest, useTreeDist) - }), -} - -var getCmd = &cobra.Command{ - Use: "get []", - Short: "copy a remote file from the nodes in a cluster", - Long: `Copy a remote file from the nodes in a cluster. If the file is retrieved from -multiple nodes the destination file name will be prefixed with the node number. -`, - Args: cobra.RangeArgs(2, 3), - Run: wrap(func(cmd *cobra.Command, args []string) error { - src := args[1] - dest := path.Base(src) - if len(args) == 3 { - dest = args[2] - } - return roachprod.Get(context.Background(), config.Logger, args[0], src, dest) - }), -} - -var sqlCmd = &cobra.Command{ - Use: "sql -- [args]", - Short: "run `cockroach sql` on a remote cluster", - Long: "Run `cockroach sql` on a remote cluster.\n", - Args: cobra.MinimumNArgs(1), - Run: wrap(func(cmd *cobra.Command, args []string) error { - auth, ok := install.PGAuthModes[authMode] - if !ok { - return errors.Newf("unsupported auth-mode %s, valid auth-modes: %v", authMode, maps.Keys(install.PGAuthModes)) - } - - return roachprod.SQL(context.Background(), config.Logger, args[0], isSecure, virtualClusterName, sqlInstance, auth, database, args[1:]) - }), -} - -var pgurlCmd = &cobra.Command{ - Use: "pgurl --auth-mode ", - Short: "generate pgurls for the nodes in a cluster", - Long: fmt.Sprintf(`Generate pgurls for the nodes in a cluster. - -%[1]s -`, strings.TrimSpace(AuthModeHelp)), - Args: cobra.ExactArgs(1), - Run: wrap(func(cmd *cobra.Command, args []string) error { - auth, err := install.ResolveAuthMode(authMode) - if err != nil { - return err - } - urls, err := roachprod.PgURL(context.Background(), config.Logger, args[0], pgurlCertsDir, roachprod.PGURLOptions{ - Database: database, - External: external, - Secure: isSecure, - VirtualClusterName: virtualClusterName, - SQLInstance: sqlInstance, - Auth: auth, - }) - if err != nil { - return err - } - fmt.Println(strings.Join(urls, " ")) - return nil - }), -} - -const AuthModeHelp = ` ---auth-mode specifies the method of authentication unless --insecure is passed. -Defaults to root if not passed. Available auth-modes are: - - root: authenticates with the root user and root certificates - - user-password: authenticates with the default roachprod user and password - - user-cert: authenticates with the default roachprod user and certificates` - -var pprofCmd = &cobra.Command{ - Use: "pprof ", - Args: cobra.ExactArgs(1), - Aliases: []string{"pprof-heap"}, - Short: "capture a pprof profile from the specified nodes", - Long: `Capture a pprof profile from the specified nodes. - -Examples: - - # Capture CPU profile for all nodes in the cluster - roachprod pprof CLUSTERNAME - # Capture CPU profile for the first node in the cluster for 60 seconds - roachprod pprof CLUSTERNAME:1 --duration 60s - # Capture a Heap profile for the first node in the cluster - roachprod pprof CLUSTERNAME:1 --heap - # Same as above - roachprod pprof-heap CLUSTERNAME:1 -`, - Run: wrap(func(cmd *cobra.Command, args []string) error { - if cmd.CalledAs() == "pprof-heap" { - pprofOpts.Heap = true - } - return roachprod.Pprof(context.Background(), config.Logger, args[0], pprofOpts) - }), -} - -var adminurlCmd = &cobra.Command{ - Use: "adminurl ", - Aliases: []string{"admin", "adminui"}, - Short: "generate admin UI URLs for the nodes in a cluster\n", - Long: `Generate admin UI URLs for the nodes in a cluster. -`, - Args: cobra.ExactArgs(1), - Run: wrap(func(cmd *cobra.Command, args []string) error { - urls, err := roachprod.AdminURL( - context.Background(), config.Logger, args[0], virtualClusterName, sqlInstance, adminurlPath, adminurlIPs, urlOpen, isSecure, - ) - if err != nil { - return err - } - for _, url := range urls { - fmt.Println(url) - } - return nil - }), -} - -var ipCmd = &cobra.Command{ - Use: "ip ", - Short: "get the IP addresses of the nodes in a cluster", - Long: `Get the IP addresses of the nodes in a cluster. -`, - Args: cobra.ExactArgs(1), - Run: wrap(func(cmd *cobra.Command, args []string) error { - ips, err := roachprod.IP(config.Logger, args[0], external) - if err != nil { - return err - } - for _, ip := range ips { - fmt.Println(ip) - } - return nil - }), -} - -var versionCmd = &cobra.Command{ - Use: `version`, - Short: `print version information`, - RunE: func(cmd *cobra.Command, args []string) error { - fmt.Println(roachprod.Version(config.Logger)) - return nil - }, -} - -var getProvidersCmd = &cobra.Command{ - Use: `get-providers`, - Short: `print providers state (active/inactive)`, - RunE: func(cmd *cobra.Command, args []string) error { - providers := roachprod.InitProviders() - for provider, state := range providers { - fmt.Printf("%s: %s\n", provider, state) - } - return nil - }, -} - -var grafanaStartCmd = &cobra.Command{ - Use: `grafana-start `, - Short: `spins up a prometheus and grafana instance on the last node in the cluster; NOTE: for arm64 clusters, use --arch arm64`, - Args: cobra.ExactArgs(1), - Run: wrap(func(cmd *cobra.Command, args []string) error { - var grafanaDashboardJSONs []string - var grafanaConfigURL string - if grafanaConfig != "" { - url, err := url.Parse(grafanaConfig) - if err != nil { - return err - } - switch url.Scheme { - case "http", "https": - grafanaConfigURL = grafanaConfig - case "file", "": - if data, err := grafana.GetDashboardJSONFromFile(url.Path); err != nil { - return err - } else { - grafanaDashboardJSONs = []string{data} - } - default: - return errors.Newf("unsupported scheme %s", url.Scheme) - } - } else { - var err error - if grafanaDashboardJSONs, err = grafana.GetDefaultDashboardJSONs(); err != nil { - return err - } - } - arch := vm.ArchAMD64 - if grafanaArch == "arm64" { - arch = vm.ArchARM64 - } - return roachprod.StartGrafana(context.Background(), config.Logger, args[0], arch, - grafanaConfigURL, grafanaDashboardJSONs, nil) - }), -} - -var grafanaStopCmd = &cobra.Command{ - Use: `grafana-stop `, - Short: `spins down prometheus and grafana instances on the last node in the cluster`, - Args: cobra.ExactArgs(1), - Run: wrap(func(cmd *cobra.Command, args []string) error { - return roachprod.StopGrafana(context.Background(), config.Logger, args[0], "") - }), -} - -var grafanaDumpCmd = &cobra.Command{ - Use: `grafana-dump `, - Short: `dump prometheus data to the specified directory`, - Args: cobra.ExactArgs(1), - Run: wrap(func(cmd *cobra.Command, args []string) error { - if grafanaDumpDir == "" { - return errors.New("--dump-dir unspecified") - } - return roachprod.PrometheusSnapshot(context.Background(), config.Logger, args[0], grafanaDumpDir) - }), -} - -var grafanaURLCmd = &cobra.Command{ - Use: `grafanaurl `, - Short: `returns a url to the grafana dashboard`, - Args: cobra.ExactArgs(1), - Run: wrap(func(cmd *cobra.Command, args []string) error { - url, err := roachprod.GrafanaURL(context.Background(), config.Logger, args[0], - urlOpen) - if err != nil { - return err - } - fmt.Println(url) - return nil - }), -} - -var grafanaAnnotationCmd = &cobra.Command{ - Use: `grafana-annotation --tags [, ...] --dashboard-uid --time-range [, ]`, - Short: `adds an annotation to the specified grafana instance`, - Long: fmt.Sprintf(`Adds an annotation to the specified grafana instance - -By default, we assume the grafana instance needs an authentication token to connect -to. A service account json and audience will be read in from the environment -variables %s and %s to attempt authentication through google IDP. Use the --insecure -option when a token is not necessary. - ---tags specifies the tags the annotation should have. - ---dashboard-uid specifies the dashboard you want the annotation to be created in. If -left empty, creates the annotation in the organization instead. - ---time-range can be used to specify in epoch millisecond time the annotation's timestamp. -If left empty, creates the annotation at the current time. If only start-time is specified, -creates an annotation at start-time. If both start-time and end-time are specified, -creates an annotation over time range. - -Example: -# Create an annotation over time range 1-100 on the centralized grafana instance, which needs authentication. -roachprod grafana-annotation grafana.testeng.crdb.io example-annotation-event --tags my-cluster --tags test-run-1 --dashboard-uid overview --time-range 1,100 -`, roachprodutil.ServiceAccountJson, roachprodutil.ServiceAccountAudience), - Args: cobra.ExactArgs(2), - Run: wrap(func(cmd *cobra.Command, args []string) error { - req := grafana.AddAnnotationRequest{ - Text: args[1], - Tags: grafanaTags, - DashboardUID: grafanaDashboardUID, - } - - switch len(grafanaTimeRange) { - case 0: - // Grafana API will default to adding annotation at current time. - case 1: - // Okay to only specify the start time. - req.StartTime = grafanaTimeRange[0] - case 2: - req.StartTime = grafanaTimeRange[0] - req.EndTime = grafanaTimeRange[1] - default: - return errors.Newf("Too many arguments for --time-range, expected 1 or 2, got: %d", len(grafanaTimeRange)) - } - - return roachprod.AddGrafanaAnnotation(context.Background(), args[0] /* host */, isSecure, req) - }), -} - -var jaegerStartCmd = &cobra.Command{ - Use: `jaeger-start `, - Short: `starts a jaeger container on the last node in the cluster`, - Args: cobra.ExactArgs(1), - Run: wrap(func(cmd *cobra.Command, args []string) error { - return roachprod.StartJaeger(context.Background(), config.Logger, args[0], - virtualClusterName, isSecure, jaegerConfigNodes) - }), -} - -var jaegerStopCmd = &cobra.Command{ - Use: `jaeger-stop `, - Short: `stops a running jaeger container on the last node in the cluster`, - Args: cobra.ExactArgs(1), - Run: wrap(func(cmd *cobra.Command, args []string) error { - return roachprod.StopJaeger(context.Background(), config.Logger, args[0]) - }), -} - -var jaegerURLCmd = &cobra.Command{ - Use: `jaegerurl `, - Short: `returns the URL of the cluster's jaeger UI`, - Args: cobra.ExactArgs(1), - Run: wrap(func(cmd *cobra.Command, args []string) error { - url, err := roachprod.JaegerURL(context.Background(), config.Logger, args[0], - urlOpen) - if err != nil { - return err - } - fmt.Println(url) - return nil - }), -} - -var destroyDNSCmd = &cobra.Command{ - Use: `destroy-dns `, - Short: `cleans up DNS entries for the cluster`, - Args: cobra.ExactArgs(1), - Run: wrap(func(cmd *cobra.Command, args []string) error { - return roachprod.DestroyDNS(context.Background(), config.Logger, args[0]) - }), -} - -var snapshotCmd = &cobra.Command{ - Use: `snapshot`, - Short: "snapshot enables creating/listing/deleting/applying cluster snapshots", - Args: cobra.MinimumNArgs(1), -} - -var snapshotCreateCmd = &cobra.Command{ - Use: `create `, - Short: "snapshot a named cluster, using the given snapshot name and description", - Args: cobra.ExactArgs(3), - Run: wrap(func(cmd *cobra.Command, args []string) error { - cluster := args[0] - name := args[1] - desc := args[2] - snapshots, err := roachprod.CreateSnapshot(context.Background(), config.Logger, cluster, vm.VolumeSnapshotCreateOpts{ - Name: name, - Description: desc, - }) - if err != nil { - return err - } - for _, snapshot := range snapshots { - config.Logger.Printf("created snapshot %s (id: %s)", snapshot.Name, snapshot.ID) - } - return nil - }), -} - -var snapshotListCmd = &cobra.Command{ - Use: `list []`, - Short: "list all snapshots for the given cloud provider, optionally filtering by the given name", - Args: cobra.RangeArgs(1, 2), - Run: wrap(func(cmd *cobra.Command, args []string) error { - provider := args[0] - var name string - if len(args) == 2 { - name = args[1] - } - snapshots, err := roachprod.ListSnapshots(context.Background(), config.Logger, provider, - vm.VolumeSnapshotListOpts{ - NamePrefix: name, - }, - ) - if err != nil { - return err - } - for _, snapshot := range snapshots { - config.Logger.Printf("found snapshot %s (id: %s)", snapshot.Name, snapshot.ID) - } - return nil - }), -} - -var snapshotDeleteCmd = &cobra.Command{ - Use: `delete `, - Short: "delete all snapshots for the given cloud provider optionally filtering by the given name", - Args: cobra.ExactArgs(2), - Run: wrap(func(cmd *cobra.Command, args []string) error { - ctx := context.Background() - provider, name := args[0], args[1] - snapshots, err := roachprod.ListSnapshots(ctx, config.Logger, provider, - vm.VolumeSnapshotListOpts{ - NamePrefix: name, - }, - ) - if err != nil { - return err - } - - for _, snapshot := range snapshots { - config.Logger.Printf("deleting snapshot %s (id: %s)", snapshot.Name, snapshot.ID) - } - if !dryrun { - if err := roachprod.DeleteSnapshots(ctx, config.Logger, provider, snapshots...); err != nil { - return err - } - } - config.Logger.Printf("done") - return nil - }), -} - -var snapshotApplyCmd = &cobra.Command{ - Use: `apply `, - Short: "apply the named snapshots from the given cloud provider to the named cluster", - Args: cobra.ExactArgs(3), - Run: wrap(func(cmd *cobra.Command, args []string) error { - ctx := context.Background() - provider, name, cluster := args[0], args[1], args[2] - snapshots, err := roachprod.ListSnapshots(ctx, config.Logger, provider, - vm.VolumeSnapshotListOpts{ - NamePrefix: name, - }, - ) - if err != nil { - return err - } - - return roachprod.ApplySnapshots(ctx, config.Logger, cluster, snapshots, vm.VolumeCreateOpts{ - Size: 500, // TODO(irfansharif): Make this configurable? - Labels: map[string]string{ - vm.TagUsage: "roachprod", - }, - }) - }), -} - -var rootStorageCmd = &cobra.Command{ - Use: `storage`, - Short: "storage enables administering storage related commands and configurations", - Args: cobra.MinimumNArgs(1), -} - -var rootStorageCollectionCmd = &cobra.Command{ - Use: `collection`, - Short: "the collection command allows for enable or disabling the storage workload " + - "collector for a provided cluster (including a subset of nodes). The storage workload " + - "collection is defined in pebble replay/workload_capture.go.", - Args: cobra.MinimumNArgs(1), -} - -var collectionStartCmd = &cobra.Command{ - Use: `start `, - Short: "start the workload collector for a provided cluster (including a subset of nodes)", - Args: cobra.ExactArgs(1), - Run: wrap(func(cmd *cobra.Command, args []string) error { - cluster := args[0] - return roachprod.StorageCollectionPerformAction( - context.Background(), - config.Logger, - cluster, - "start", - volumeCreateOpts, - ) - }), -} - -var collectionStopCmd = &cobra.Command{ - Use: `stop `, - Short: "stop the workload collector for a provided cluster (including a subset of nodes)", - Args: cobra.ExactArgs(1), - Run: wrap(func(cmd *cobra.Command, args []string) error { - cluster := args[0] - return roachprod.StorageCollectionPerformAction( - context.Background(), - config.Logger, - cluster, - "stop", - volumeCreateOpts, - ) - }), -} - -var collectionListVolumes = &cobra.Command{ - Use: `list-volumes `, - Short: "list the nodes and their attached collector volumes", - Args: cobra.ExactArgs(1), - Run: wrap(func(cmd *cobra.Command, args []string) error { - cluster := args[0] - return roachprod.StorageCollectionPerformAction( - context.Background(), - config.Logger, - cluster, - "list-volumes", - volumeCreateOpts, - ) - }), -} - -var storageSnapshotCmd = &cobra.Command{ - Use: `snapshot `, - Short: "snapshot a clusters workload collector volume", - Args: cobra.ExactArgs(3), - Run: wrap(func(cmd *cobra.Command, args []string) error { - cluster := args[0] - name := args[1] - desc := args[2] - _, err := roachprod.CreateSnapshot(context.Background(), config.Logger, cluster, vm.VolumeSnapshotCreateOpts{ - Name: name, - Description: desc, - }) - return err - }), -} - -var sideEyeRootCmd = &cobra.Command{ - Use: "side-eye", - Short: "interact with side-eye.io functionality", - Long: `Interact with side-eye.io functionality - -Side-Eye (app.side-eye.io) is a distributed debugger that can be used to capture -snapshots of a CockroachDB cluster. -`, - Args: cobra.MinimumNArgs(1), -} - -var sideEyeInstallCmd = &cobra.Command{ - Use: "install ", - Short: "install and start the Side-Eye agents on all nodes in the cluster", - Long: `Install and start the Side-Eye agents on all nodes in the cluster - -` + "`roachprod side-eye snapshot `" + ` can then be used to capture cluster snapshots. -`, - Args: cobra.ExactArgs(1), - Run: wrap(func(cmd *cobra.Command, args []string) error { - cluster := args[0] - - ctx := context.Background() - l := config.Logger - sideEyeToken, ok := roachprod.GetSideEyeTokenFromEnv() - if !ok { - return errors.New("Side-Eye token is not configured via SIDE_EYE_API_TOKEN or gcloud secret") - } - - return roachprod.StartSideEyeAgents(ctx, l, cluster, cluster /* envName */, sideEyeToken) - }), -} - -var sideEyeSnapCmd = &cobra.Command{ - Use: "snapshot ", - Aliases: []string{"snap"}, - Short: "capture a cluster snapshot", - Long: `Capture a cluster snapshot using Side-Eye - -The command will print an app.side-eye.io URL where the snapshot can be viewed. -`, - Args: cobra.ExactArgs(1), - Run: wrap(func(cmd *cobra.Command, args []string) error { - cluster := args[0] - ctx := context.Background() - l := config.Logger - l.PrintfCtx(ctx, "capturing snapshot of the cluster with Side-Eye...") - snapURL, ok := roachprod.CaptureSideEyeSnapshot(context.Background(), config.Logger, cluster, nil /* client */) - if ok { - l.PrintfCtx(ctx, "captured Side-Eye snapshot: %s", snapURL) - } - return nil - }), -} - -// Before executing any command, validate and canonicalize args. -func validateAndConfigure(cmd *cobra.Command, args []string) { - // Skip validation for commands that are self-sufficient. - switch cmd.Name() { - case "help", "version", "list": - return - } - - printErrAndExit := func(err error) { - if err != nil { - fmt.Fprintf(os.Stderr, "%s\n", err) - os.Exit(1) - } - } - - // Validate architecture flag, if set. - if archOpt := cmd.Flags().Lookup("arch"); archOpt != nil && archOpt.Changed { - arch := vm.CPUArch(strings.ToLower(archOpt.Value.String())) - - if arch != vm.ArchAMD64 && arch != vm.ArchARM64 && arch != vm.ArchFIPS { - printErrAndExit(fmt.Errorf("unsupported architecture %q", arch)) - } - if string(arch) != archOpt.Value.String() { - // Set the canonical value. - _ = cmd.Flags().Set("arch", string(arch)) - } - } - - // Validate cloud providers, if set. - providersSet := make(map[string]struct{}) - for _, p := range createVMOpts.VMProviders { - if _, ok := vm.Providers[p]; !ok { - printErrAndExit(fmt.Errorf("unknown cloud provider %q", p)) - } - if _, ok := providersSet[p]; ok { - printErrAndExit(fmt.Errorf("duplicate cloud provider specified %q", p)) - } - providersSet[p] = struct{}{} - } -} - -var updateCmd = &cobra.Command{ - Use: "update", - Short: "check gs://cockroach-nightly for a new roachprod binary; update if available", - Long: "Attempts to download the latest roachprod binary (on master) from gs://cockroach-nightly. " + - " Swaps the current binary with it. The current roachprod binary will be backed up" + - " and can be restored via `roachprod update --revert`.", - Run: wrap(func(cmd *cobra.Command, args []string) error { - // We only have prebuilt binaries for Linux. See #120750. - if runtime.GOOS != "linux" { - return errors.New("this command is only available on Linux at this time") - } - - currentBinary, err := os.Executable() - if err != nil { - return err - } - - if roachprodUpdateRevert { - if update.PromptYesNo("Revert to previous version? Note: this will replace the"+ - " current roachprod binary with a previous roachprod.bak binary.", true /* defaultYes */) { - if err := update.SwapBinary(currentBinary, currentBinary+".bak"); err != nil { - return err - } - fmt.Println("roachprod successfully reverted, run `roachprod -v` to confirm.") - } - return nil - } - - newBinary := currentBinary + ".new" - if err := update.DownloadLatestRoachprod(newBinary, roachprodUpdateBranch, roachprodUpdateOS, roachprodUpdateArch); err != nil { - return err - } - - if update.PromptYesNo("Continue with update? This will overwrite any existing roachprod.bak binary.", true /* defaultYes */) { - if err := update.SwapBinary(currentBinary, newBinary); err != nil { - return errors.WithDetail(err, "unable to update binary") - } - - fmt.Println("Update successful: run `roachprod -v` to confirm.") - } - return nil - }), -} - -func printPublicKeyTable(keys gce.AuthorizedKeys, includeSize bool) error { - // Align columns left and separate with at least two spaces. - tw := tabwriter.NewWriter(os.Stdout, 0, 8, 2, ' ', 0) - - fmt.Fprintf(tw, "%s\t%s\n", "User", "Key") - for _, ak := range keys { - fmt.Fprintf(tw, "%s\t%s\n", ak.User, ak.Format(64 /* maxLen */)) - } - - err := tw.Flush() - if !includeSize { - return err - } - - const maxProjectMetadataBytes = 262144 /* 256 KiB */ - metadataLen := len(keys.AsProjectMetadata()) - - usage := int(float64(metadataLen*100) / float64(maxProjectMetadataBytes)) - _, err = fmt.Printf("\nTOTAL: %d bytes (usage: %d%%)\n", metadataLen, usage) - return err -} - -var sshKeysCmd = &cobra.Command{ - Use: "ssh-keys", - Short: "manage SSH public keys added to clusters created by roachprod", -} - -var sshKeysListCmd = &cobra.Command{ - Use: "list", - Short: "list every SSH public key installed on clusters managed by roachprod", - Run: wrap(func(cmd *cobra.Command, args []string) error { - authorizedKeys, err := gce.GetUserAuthorizedKeys() - if err != nil { - return err - } - - return printPublicKeyTable(authorizedKeys, true /* includeSize */) - }), -} - -var sshKeysAddCmd = &cobra.Command{ - Use: "add [--user user]", - Short: "add a new SSH public key to the set of keys installed on clusters managed by roachprod", - Args: cobra.ExactArgs(1), - Run: wrap(func(cmd *cobra.Command, args []string) error { - sshKeyPath := args[0] - pkBytes, err := os.ReadFile(sshKeyPath) - if err != nil { - return fmt.Errorf("error reading public key file: %w", err) - } - - pubkey, comment, _, _, err := ssh.ParseAuthorizedKey(pkBytes) - if err != nil { - return fmt.Errorf("error parsing public key: %w", err) - } - - ak := gce.AuthorizedKey{ - User: sshKeyUser, - Key: pubkey, - Comment: comment, - } - - fmt.Printf("Adding new public key for user %s...\n", ak.User) - return gce.AddUserAuthorizedKey(ak) - }), -} - -var sshKeysRemoveCmd = &cobra.Command{ - Use: "remove ", - Short: "remove public keys belonging to a user from the set of keys installed on clusters managed by roachprod", - Args: cobra.ExactArgs(1), - Run: wrap(func(cmd *cobra.Command, args []string) error { - user := args[0] - - existingKeys, err := gce.GetUserAuthorizedKeys() - if err != nil { - return fmt.Errorf("failed to fetch existing keys: %w", err) - } - - var toBeDeleted gce.AuthorizedKeys - var newKeys gce.AuthorizedKeys - for _, existing := range existingKeys { - if existing.User == user { - toBeDeleted = append(toBeDeleted, existing) - } else { - newKeys = append(newKeys, existing) - } - } - - if len(toBeDeleted) == 0 { - fmt.Printf("No keys deleted.\n") - return nil - } - - fmt.Printf("The following keys are going to be deleted:\n") - if err := printPublicKeyTable(toBeDeleted, false /* includeSize */); err != nil { - return err - } - - if update.PromptYesNo("Are you sure?", false /* defaultYes */) { - fmt.Printf("Deleting %d keys belonging to %s...\n", len(toBeDeleted), user) - return gce.SetUserAuthorizedKeys(newKeys) - } else { - fmt.Printf("Aborted.\n") - return nil - } - }), -} - -func isSecureCluster(cmd *cobra.Command) (bool, error) { - hasSecureFlag := cmd.Flags().Changed("secure") - hasInsecureFlag := cmd.Flags().Changed("insecure") - - switch { - case hasSecureFlag && hasInsecureFlag: - // Disallow passing both flags, even if they are consistent. - return false, fmt.Errorf("cannot pass both --secure and --insecure flags") - - case hasSecureFlag: - desc := "Clusters are secure by default" - if !secure { - desc = "Use the --insecure flag to create insecure clusters" - } - - fmt.Printf("WARNING: --secure flag is deprecated. %s.\n", desc) - return secure, nil - - default: - return !insecure, nil - } -} - -var _ = func() struct{} { - sshKeysCmd.AddCommand( - sshKeysListCmd, - sshKeysAddCmd, - sshKeysRemoveCmd, - ) - - return struct{}{} -}() - -var fluentBitStartCmd = &cobra.Command{ - Use: "fluent-bit-start ", - Short: "Install and start Fluent Bit", - Long: "Install and start Fluent Bit", - Args: cobra.ExactArgs(1), - Run: wrap(func(cmd *cobra.Command, args []string) error { - return roachprod.StartFluentBit(context.Background(), config.Logger, args[0], fluentBitConfig) - }), -} - -var fluentBitStopCmd = &cobra.Command{ - Use: "fluent-bit-stop ", - Short: "Stop Fluent Bit", - Long: "Stop Fluent Bit", - Args: cobra.ExactArgs(1), - Run: wrap(func(cmd *cobra.Command, args []string) error { - return roachprod.StopFluentBit(context.Background(), config.Logger, args[0]) - }), -} - -var opentelemetryStartCmd = &cobra.Command{ - Use: "opentelemetry-start ", - Short: "Install and start the OpenTelemetry Collector", - Long: "Install and start the OpenTelemetry Collector", - Args: cobra.ExactArgs(1), - Run: wrap(func(cmd *cobra.Command, args []string) error { - return roachprod.StartOpenTelemetry(context.Background(), config.Logger, args[0], opentelemetryConfig) - }), -} - -var opentelemetryStopCmd = &cobra.Command{ - Use: "opentelemetry-stop ", - Short: "Stop the OpenTelemetry Collector", - Long: "Stop the OpenTelemetry Collector", - Args: cobra.ExactArgs(1), - Run: wrap(func(cmd *cobra.Command, args []string) error { - return roachprod.StopOpenTelemetry(context.Background(), config.Logger, args[0]) - }), + PersistentPreRun: cli.ValidateAndConfigure, } func main() { - _ = roachprod.InitProviders() - providerOptsContainer = vm.CreateProviderOptionsContainer() - // The commands are displayed in the order they are added to rootCmd. Note - // that gcCmd and adminurlCmd contain a trailing \n in their Short help in - // order to separate the commands into logical groups. - cobra.EnableCommandSorting = false - rootCmd.AddCommand( - createCmd, - growCmd, - shrinkCmd, - resetCmd, - destroyCmd, - extendCmd, - loadBalancerCmd, - listCmd, - syncCmd, - gcCmd, - setupSSHCmd, - statusCmd, - monitorCmd, - startCmd, - updateTargetsCmd, - stopCmd, - startInstanceCmd, - stopInstanceCmd, - deployCmd, - initCmd, - runCmd, - signalCmd, - wipeCmd, - destroyDNSCmd, - reformatCmd, - installCmd, - distributeCertsCmd, - sshKeysCmd, - putCmd, - getCmd, - stageCmd, - stageURLCmd, - downloadCmd, - sqlCmd, - ipCmd, - pgurlCmd, - adminurlCmd, - logsCmd, - pprofCmd, - cachedHostsCmd, - versionCmd, - getProvidersCmd, - grafanaStartCmd, - grafanaStopCmd, - grafanaDumpCmd, - grafanaURLCmd, - grafanaAnnotationCmd, - rootStorageCmd, - snapshotCmd, - updateCmd, - jaegerStartCmd, - jaegerStopCmd, - jaegerURLCmd, - sideEyeRootCmd, - fluentBitStartCmd, - fluentBitStopCmd, - opentelemetryStartCmd, - opentelemetryStopCmd, - fetchLogsCmd, - getLatestPProf, - ) - loadBalancerCmd.AddCommand(createLoadBalancerCmd) - loadBalancerCmd.AddCommand(loadBalancerPGUrl) - loadBalancerCmd.AddCommand(loadBalancerIP) - setBashCompletionFunction() - - // Add help about specifying nodes - for _, cmd := range []*cobra.Command{ - getCmd, putCmd, runCmd, startCmd, statusCmd, stopCmd, signalCmd, - wipeCmd, pgurlCmd, adminurlCmd, sqlCmd, installCmd, - } { - if cmd.Long == "" { - cmd.Long = cmd.Short - } - cmd.Long += fmt.Sprintf(` -Node specification - - By default the operation is performed on all nodes in . A subset of - nodes can be specified by appending : to the cluster name. The syntax - of is a comma separated list of specific node IDs or range of - IDs. For example: - - roachprod %[1]s marc-test:1-3,8-9 - - will perform %[1]s on: - - marc-test-1 - marc-test-2 - marc-test-3 - marc-test-8 - marc-test-9 -`, cmd.Name()) - } - - initFlags() - - var err error - config.OSUser, err = user.Current() - if err != nil { - fmt.Fprintf(os.Stderr, "unable to lookup current user: %s\n", err) - os.Exit(1) - } - - if err := roachprod.InitDirs(); err != nil { - fmt.Fprintf(os.Stderr, "%s\n", err) - os.Exit(1) - } - - if err := roachprod.LoadClusters(); err != nil { - // We don't want to exit as we may be looking at the help message. - fmt.Printf("problem loading clusters: %s\n", err) - } - - updateTime, sha, err := update.CheckLatest(roachprodUpdateBranch, roachprodUpdateOS, roachprodUpdateArch) - if err != nil { - fmt.Fprintf(os.Stderr, "WARN: failed to check if a more recent 'roachprod' binary exists: %s\n", err) - } else { - age, err := update.TimeSinceUpdate(updateTime) - if err != nil { - fmt.Fprintf(os.Stderr, "WARN: unable to check mtime of 'roachprod' binary: %s\n", err) - } else if age.Hours() >= 14*24 { - fmt.Fprintf(os.Stderr, "WARN: roachprod binary is >= 2 weeks old (%s); latest sha: %q\nWARN: Consider updating the binary: `roachprod update`\n\n", age, sha) - } - } + cli.Initialize(rootCmd) if err := rootCmd.Execute(); err != nil { // Cobra has already printed the error message. os.Exit(1) } } - -var fetchLogsCmd = &cobra.Command{ - Use: "fetchlogs [flags]", - Aliases: []string{"getlogs"}, - Short: "download the logs from the cluster", - Long: `Download the logs from the cluster using "roachprod get". - -The logs will be placed in the directory if specified or in the directory named as _logs. -`, - Args: cobra.RangeArgs(1, 2), - Run: wrap(func(cmd *cobra.Command, args []string) error { - cluster := args[0] - ctx := context.Background() - var dest string - if len(args) == 2 { - dest = args[1] - } else { - // trim the node number and keep only the cluster name as prefix of the directory - dest = fmt.Sprintf("%s_logs", strings.Split(args[0], ":")[0]) - fmt.Printf("Placing logs at %s\n", dest) - } - if err := os.Mkdir(dest, 0755); err != nil { - return err - } - return roachprod.FetchLogs(ctx, config.Logger, cluster, dest, - fetchLogsTimeout) - }), -} - -var getLatestPProf = &cobra.Command{ - Use: "get-latest-pprof [time-before]", - Short: "downloads the latest pprof file which is created on or before the provided time-before.", - Long: `Downloads the latest pprof file which is created on or before the provided time-before. -The time should be of the format 2022-08-31T15:23:22Z for UTC or 2022-08-31T15:23:22+05:30 for time zone. -If the time is not provided, it downloads the latest pprof file across all clusters. -`, - Args: cobra.MinimumNArgs(1), - // Wraps the command execution with additional error handling - Run: wrap(func(cmd *cobra.Command, args []string) (retErr error) { - cluster := args[0] - pprofTimeBefore := "" - if len(args) == 2 { - // time-before is optional - pprofTimeBefore = args[1] - } - ctx := context.Background() - return roachprod.DownloadLatestPProfFile(ctx, config.Logger, cluster, pprofTimeBefore) - }), -} diff --git a/pkg/cmd/roachprod/update/BUILD.bazel b/pkg/cmd/roachprod/update/BUILD.bazel deleted file mode 100644 index dcbbc14916e7..000000000000 --- a/pkg/cmd/roachprod/update/BUILD.bazel +++ /dev/null @@ -1,17 +0,0 @@ -load("@io_bazel_rules_go//go:def.bzl", "go_library") - -go_library( - name = "update", - srcs = [ - "update.go", - "util.go", - ], - importpath = "github.com/cockroachdb/cockroach/pkg/cmd/roachprod/update", - visibility = ["//visibility:public"], - deps = [ - "@com_github_cockroachdb_errors//:errors", - "@com_github_cockroachdb_errors//oserror", - "@com_google_cloud_go_storage//:storage", - "@org_golang_google_api//option", - ], -) diff --git a/pkg/cmd/roachprod/update/util.go b/pkg/cmd/roachprod/update/util.go deleted file mode 100644 index 14211ad131fb..000000000000 --- a/pkg/cmd/roachprod/update/util.go +++ /dev/null @@ -1,93 +0,0 @@ -// Copyright 2023 The Cockroach Authors. -// -// Use of this software is governed by the CockroachDB Software License -// included in the /LICENSE file. - -package update - -import ( - "fmt" - "os" - "strings" - "time" - - "github.com/cockroachdb/errors" - "github.com/cockroachdb/errors/oserror" -) - -func PromptYesNo(msg string, defaultYes bool) bool { - if defaultYes { - fmt.Printf("%s y[default]/n: ", msg) - } else { - fmt.Printf("%s y/n[default]: ", msg) - } - - var answer string - _, _ = fmt.Scanln(&answer) - answer = strings.TrimSpace(answer) - - isYes := answer == "y" || answer == "Y" - isEmpty := answer == "" - - if defaultYes { - return isYes || isEmpty - } - - return isYes -} - -// SwapBinary attempts to swap the `old` file with the `new` file. Used to -// update a running roachprod binary. -// Note: there is special handling if `new` points to a file ending in `.bak`. -// In this case, it is assumed to be a `revert` operation, in which case we -// do *not* backup the old/current file. -func SwapBinary(old, new string) error { - destInfo, err := os.Stat(new) - - if err != nil { - if oserror.IsNotExist(err) { - return errors.WithDetail(err, "binary does not exist: "+new) - } - return err - } - - if destInfo.IsDir() { - return errors.Newf("binary path is a directory, not a file: %s", new) - } - - oldInfo, err := os.Stat(old) - if err != nil { - return err - } - - // Copy the current file permissions to the new binary and ensure it is executable. - err = os.Chmod(new, oldInfo.Mode()) - if err != nil { - return err - } - - // Backup only for upgrading, not when reverting which is assumed if the new binary ends in `.bak`. - if !strings.HasSuffix(new, ".bak") { - // Backup the current binary, so that it may be restored via `roachprod update --revert`. - err = os.Rename(old, old+".bak") - if err != nil { - return errors.WithDetail(err, "unable to backup current binary") - } - } - - // Move the new binary into place. - return os.Rename(new, old) -} - -// Computes the age of the current binary, relative to the given update time. -func TimeSinceUpdate(updateTime time.Time) (time.Duration, error) { - currentBinary, err := os.Executable() - if err != nil { - return -1, err - } - statInfo, err := os.Stat(currentBinary) - if err != nil { - return -1, err - } - return updateTime.Sub(statInfo.ModTime()), nil -}