Skip to content

Commit

Permalink
roachprod: add jaeger[-start|-stop|url] command
Browse files Browse the repository at this point in the history
This basically just does what we've been copy and pasting around. In
the future we should probably manage these processes ourselves rather
than using docker.

Release note: None
  • Loading branch information
stevendanna committed Nov 15, 2023
1 parent 2b85c39 commit 31af6c9
Show file tree
Hide file tree
Showing 4 changed files with 188 additions and 26 deletions.
19 changes: 11 additions & 8 deletions pkg/cmd/roachprod/flags.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,8 @@ var (
wipePreserveCerts bool
grafanaConfig string
grafanaArch string
grafanaurlOpen bool
grafanaDumpDir string
jaegerConfigNodes string
listDetails bool
listJSON bool
listMine bool
Expand All @@ -54,9 +54,9 @@ var (
tag string
external = false
pgurlCertsDir string
adminurlOpen = false
adminurlPath = ""
adminurlIPs = false
urlOpen = false
useTreeDist = true
sig = 9
waitFlag = false
Expand Down Expand Up @@ -159,7 +159,6 @@ func initFlags() {
listCmd.Flags().StringVar(&listPattern,
"pattern", "", "Show only clusters matching the regex pattern. Empty string matches everything.")

adminurlCmd.Flags().BoolVar(&adminurlOpen, "open", false, "Open the url in a browser")
adminurlCmd.Flags().StringVar(&adminurlPath,
"path", "/", "Path to add to URL (e.g. to open a same page on each node)")
adminurlCmd.Flags().BoolVar(&adminurlIPs,
Expand Down Expand Up @@ -272,12 +271,12 @@ Default is "RECURRING '*/15 * * * *' FULL BACKUP '@hourly' WITH SCHEDULE OPTIONS
grafanaStartCmd.Flags().StringVar(&grafanaArch, "arch", "",
"binary architecture override [amd64, arm64]")

grafanaURLCmd.Flags().BoolVar(&grafanaurlOpen,
"open", false, "open the grafana dashboard url on the browser")

grafanaDumpCmd.Flags().StringVar(&grafanaDumpDir, "dump-dir", "",
"the absolute path to dump prometheus data to (use the contained 'prometheus-docker-run.sh' to visualize")

jaegerStartCmd.Flags().StringVar(&jaegerConfigNodes, "configure-nodes", "",
"the nodes on which to set the relevant CRDB cluster settings")

initCmd.Flags().IntVar(&startOpts.InitTarget,
"init-target", startOpts.InitTarget, "node on which to run initialization")

Expand Down Expand Up @@ -320,6 +319,10 @@ Default is "RECURRING '*/15 * * * *' FULL BACKUP '@hourly' WITH SCHEDULE OPTIONS
updateCmd.Flags().BoolVar(&revertUpdate, "revert", false, "restore roachprod to the previous version "+
"which would have been renamed to roachprod.bak during the update process")

for _, cmd := range []*cobra.Command{adminurlCmd, grafanaURLCmd, jaegerURLCmd} {
cmd.Flags().BoolVar(&urlOpen, "open", false, "Open the url in a browser")
}

for _, cmd := range []*cobra.Command{createCmd, destroyCmd, extendCmd, logsCmd} {
cmd.Flags().StringVarP(&username, "username", "u", os.Getenv("ROACHPROD_USER"),
"Username to run under, detect if blank")
Expand Down Expand Up @@ -355,11 +358,11 @@ Default is "RECURRING '*/15 * * * *' FULL BACKUP '@hourly' WITH SCHEDULE OPTIONS
cmd.Flags().StringVarP(&config.Binary,
"binary", "b", config.Binary, "the remote cockroach binary to use")
}
for _, cmd := range []*cobra.Command{startCmd, startInstanceCmd, stopInstanceCmd, sqlCmd, pgurlCmd, adminurlCmd, runCmd} {
for _, cmd := range []*cobra.Command{startCmd, startInstanceCmd, stopInstanceCmd, sqlCmd, pgurlCmd, adminurlCmd, runCmd, jaegerStartCmd} {
cmd.Flags().BoolVar(&secure,
"secure", false, "use a secure cluster")
}
for _, cmd := range []*cobra.Command{pgurlCmd, sqlCmd, adminurlCmd, stopInstanceCmd} {
for _, cmd := range []*cobra.Command{pgurlCmd, sqlCmd, adminurlCmd, stopInstanceCmd, jaegerStartCmd} {
cmd.Flags().StringVar(&virtualClusterName,
"cluster", "", "specific virtual cluster to connect to")
cmd.Flags().IntVar(&sqlInstance,
Expand Down
41 changes: 39 additions & 2 deletions pkg/cmd/roachprod/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -1014,7 +1014,7 @@ var adminurlCmd = &cobra.Command{
Args: cobra.ExactArgs(1),
Run: wrap(func(cmd *cobra.Command, args []string) error {
urls, err := roachprod.AdminURL(
context.Background(), config.Logger, args[0], virtualClusterName, sqlInstance, adminurlPath, adminurlIPs, adminurlOpen, secure,
context.Background(), config.Logger, args[0], virtualClusterName, sqlInstance, adminurlPath, adminurlIPs, urlOpen, secure,
)
if err != nil {
return err
Expand Down Expand Up @@ -1131,7 +1131,41 @@ var grafanaURLCmd = &cobra.Command{
Args: cobra.ExactArgs(1),
Run: wrap(func(cmd *cobra.Command, args []string) error {
url, err := roachprod.GrafanaURL(context.Background(), config.Logger, args[0],
grafanaurlOpen)
urlOpen)
if err != nil {
return err
}
fmt.Println(url)
return nil
}),
}

var jaegerStartCmd = &cobra.Command{
Use: `jaeger-start <cluster>`,
Short: `starts a jaeger container on the last node in the cluster`,
Args: cobra.ExactArgs(1),
Run: wrap(func(cmd *cobra.Command, args []string) error {
return roachprod.StartJaeger(context.Background(), config.Logger, args[0],
virtualClusterName, secure, jaegerConfigNodes)
}),
}

var jaegerStopCmd = &cobra.Command{
Use: `jaeger-stop <cluster>`,
Short: `stops a running jaeger container on the last node in the cluster`,
Args: cobra.ExactArgs(1),
Run: wrap(func(cmd *cobra.Command, args []string) error {
return roachprod.StopJaeger(context.Background(), config.Logger, args[0])
}),
}

var jaegerURLCmd = &cobra.Command{
Use: `jaegerurl <cluster>`,
Short: `returns the URL of the cluster's jaeger UI`,
Args: cobra.ExactArgs(1),
Run: wrap(func(cmd *cobra.Command, args []string) error {
url, err := roachprod.JaegerURL(context.Background(), config.Logger, args[0],
urlOpen)
if err != nil {
return err
}
Expand Down Expand Up @@ -1443,6 +1477,9 @@ func main() {
rootStorageCmd,
snapshotCmd,
updateCmd,
jaegerStartCmd,
jaegerStopCmd,
jaegerURLCmd,
)
setBashCompletionFunction()

Expand Down
37 changes: 21 additions & 16 deletions pkg/roachprod/install/install.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import (
"bytes"
"context"
"fmt"
"io"
"sort"

"github.com/cockroachdb/cockroach/pkg/roachprod/logger"
Expand Down Expand Up @@ -95,6 +96,7 @@ sudo add-apt-repository \
sudo apt-get update;
sudo apt-get install -y docker-ce;
sudo usermod -aG docker ubuntu;
`,

"gcc": `
Expand Down Expand Up @@ -159,26 +161,29 @@ func SortedCmds() []string {

// Install TODO(peter): document
func Install(ctx context.Context, l *logger.Logger, c *SyncedCluster, args []string) error {
do := func(title, cmd string) error {
for _, arg := range args {
var buf bytes.Buffer
err := c.Run(ctx, l, &buf, &buf, OnNodes(c.Nodes), "installing "+title, cmd)
if err != nil {
if err := InstallTool(ctx, l, c, c.Nodes, arg, &buf, &buf); err != nil {
l.Printf(buf.String())
}
return err
}

for _, arg := range args {
cmd, ok := installCmds[arg]
if !ok {
return fmt.Errorf("unknown tool %q", arg)
}

// Ensure that we early exit if any of the shell statements fail.
cmd = "set -exuo pipefail;" + cmd
if err := do(arg, cmd); err != nil {
return err
}
}
return nil
}

func InstallTool(
ctx context.Context,
l *logger.Logger,
c *SyncedCluster,
nodes Nodes,
softwareName string,
stdout, stderr io.Writer,
) error {
cmd, ok := installCmds[softwareName]
if !ok {
return fmt.Errorf("unknown tool %q", softwareName)
}
// Ensure that we early exit if any of the shell statements fail.
cmd = "set -exuo pipefail;" + cmd
return c.Run(ctx, l, stdout, stderr, OnNodes(nodes), "installing "+softwareName, cmd)
}
117 changes: 117 additions & 0 deletions pkg/roachprod/roachprod.go
Original file line number Diff line number Diff line change
Expand Up @@ -1975,6 +1975,123 @@ func isWorkloadCollectorVolume(v vm.Volume) bool {
return false
}

const (
otelCollectorPort = 4317
jaegerUIPort = 16686
jaegerContainerName = "jaeger"
jaegerImageName = "jaegertracing/all-in-one:latest"
)

// StartJaeger starts a jaeger instance on the last node in the given
// cluster and configures the cluster to use it.
func StartJaeger(
ctx context.Context,
l *logger.Logger,
clusterName string,
virtualClusterName string,
secure bool,
configureNodes string,
) error {
if err := LoadClusters(); err != nil {
return err
}
c, err := newCluster(l, clusterName, install.SecureOption(secure))
if err != nil {
return err
}

// TODO(ssd): Currently this just uses the all-in-one docker
// container with in memory storage. Might be nicer to just
// install from source or get linux binaries and start them
// with systemd. For now this just matches what we've been
// copy and pasting.
jaegerNode := c.TargetNodes()[len(c.TargetNodes())-1:]
err = install.InstallTool(ctx, l, c, jaegerNode, "docker", l.Stdout, l.Stderr)
if err != nil {
return err
}
startCmd := fmt.Sprintf("docker run -d --name %s -p %[2]d:%[2]d -p %[3]d:%[3]d %s",
jaegerContainerName,
otelCollectorPort,
jaegerUIPort,
jaegerImageName)
err = c.Run(ctx, l, l.Stdout, l.Stderr, install.OnNodes(jaegerNode), "start jaegertracing/all-in-one using docker", startCmd)
if err != nil {
return err
}

otelCollectionHost, err := c.GetInternalIP(jaegerNode[0])
if err != nil {
return err
}
otelCollectionHostPort := net.JoinHostPort(otelCollectionHost, strconv.Itoa(otelCollectorPort))
setupStmt := fmt.Sprintf("SET CLUSTER SETTING trace.opentelemetry.collector='%s'", otelCollectionHostPort)

if configureNodes != "" {
nodes, err := install.ListNodes(configureNodes, len(c.VMs))
if err != nil {
return err
}
_, err = c.ExecSQL(ctx, l, nodes, virtualClusterName, 0, []string{"-e", setupStmt})
if err != nil {
return err
}
}

url, err := JaegerURL(ctx, l, clusterName, false)
if err != nil {
return err
}

l.Printf("To use with CRDB: %s", setupStmt)
l.Printf("Jaeger UI: %s", url)
return nil
}

// StopJaeger stops and removes the jaeger container.
func StopJaeger(ctx context.Context, l *logger.Logger, clusterName string) error {
if err := LoadClusters(); err != nil {
return err
}
c, err := newCluster(l, clusterName)
if err != nil {
return err
}
jaegerNode := c.TargetNodes()[len(c.TargetNodes())-1:]
stopCmd := fmt.Sprintf("docker stop %s", jaegerContainerName)
err = c.Run(ctx, l, l.Stdout, l.Stderr, install.OnNodes(jaegerNode), stopCmd, stopCmd)
if err != nil {
return err
}
rmCmd := fmt.Sprintf("docker rm %s", jaegerContainerName)
return c.Run(ctx, l, l.Stdout, l.Stderr, install.OnNodes(jaegerNode), rmCmd, rmCmd)
}

// JaegerURL returns a url to the jaeger UI, assuming it was installed
// on the lat node in the given cluster.
func JaegerURL(
ctx context.Context, l *logger.Logger, clusterName string, openInBrowser bool,
) (string, error) {
if err := LoadClusters(); err != nil {
return "", err
}
c, err := newCluster(l, clusterName)
if err != nil {
return "", err
}
jaegerNode := c.TargetNodes()[len(c.TargetNodes())-1:]
urls, err := urlGenerator(ctx, c, l, jaegerNode, urlConfig{
usePublicIP: true,
openInBrowser: openInBrowser,
secure: false,
port: jaegerUIPort,
})
if err != nil {
return "", err
}
return urls[0], nil
}

// StorageCollectionPerformAction either starts or stops workload collection on
// a target cluster.
//
Expand Down

0 comments on commit 31af6c9

Please sign in to comment.