Skip to content

Commit

Permalink
Merge #114483
Browse files Browse the repository at this point in the history
114483: roachprod: add jaeger-[start|stop|url] command r=dt a=stevendanna

This basically just does what we've been copy and pasting around. In the future we should probably manage these processes ourselves rather than using docker.

Epic: None

Release note: None

Co-authored-by: Steven Danna <[email protected]>
  • Loading branch information
craig[bot] and stevendanna committed Nov 15, 2023
2 parents 608e80c + 31af6c9 commit fe7c376
Show file tree
Hide file tree
Showing 4 changed files with 188 additions and 26 deletions.
19 changes: 11 additions & 8 deletions pkg/cmd/roachprod/flags.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,8 @@ var (
wipePreserveCerts bool
grafanaConfig string
grafanaArch string
grafanaurlOpen bool
grafanaDumpDir string
jaegerConfigNodes string
listDetails bool
listJSON bool
listMine bool
Expand All @@ -54,9 +54,9 @@ var (
tag string
external = false
pgurlCertsDir string
adminurlOpen = false
adminurlPath = ""
adminurlIPs = false
urlOpen = false
useTreeDist = true
sig = 9
waitFlag = false
Expand Down Expand Up @@ -159,7 +159,6 @@ func initFlags() {
listCmd.Flags().StringVar(&listPattern,
"pattern", "", "Show only clusters matching the regex pattern. Empty string matches everything.")

adminurlCmd.Flags().BoolVar(&adminurlOpen, "open", false, "Open the url in a browser")
adminurlCmd.Flags().StringVar(&adminurlPath,
"path", "/", "Path to add to URL (e.g. to open a same page on each node)")
adminurlCmd.Flags().BoolVar(&adminurlIPs,
Expand Down Expand Up @@ -272,12 +271,12 @@ Default is "RECURRING '*/15 * * * *' FULL BACKUP '@hourly' WITH SCHEDULE OPTIONS
grafanaStartCmd.Flags().StringVar(&grafanaArch, "arch", "",
"binary architecture override [amd64, arm64]")

grafanaURLCmd.Flags().BoolVar(&grafanaurlOpen,
"open", false, "open the grafana dashboard url on the browser")

grafanaDumpCmd.Flags().StringVar(&grafanaDumpDir, "dump-dir", "",
"the absolute path to dump prometheus data to (use the contained 'prometheus-docker-run.sh' to visualize")

jaegerStartCmd.Flags().StringVar(&jaegerConfigNodes, "configure-nodes", "",
"the nodes on which to set the relevant CRDB cluster settings")

initCmd.Flags().IntVar(&startOpts.InitTarget,
"init-target", startOpts.InitTarget, "node on which to run initialization")

Expand Down Expand Up @@ -320,6 +319,10 @@ Default is "RECURRING '*/15 * * * *' FULL BACKUP '@hourly' WITH SCHEDULE OPTIONS
updateCmd.Flags().BoolVar(&revertUpdate, "revert", false, "restore roachprod to the previous version "+
"which would have been renamed to roachprod.bak during the update process")

for _, cmd := range []*cobra.Command{adminurlCmd, grafanaURLCmd, jaegerURLCmd} {
cmd.Flags().BoolVar(&urlOpen, "open", false, "Open the url in a browser")
}

for _, cmd := range []*cobra.Command{createCmd, destroyCmd, extendCmd, logsCmd} {
cmd.Flags().StringVarP(&username, "username", "u", os.Getenv("ROACHPROD_USER"),
"Username to run under, detect if blank")
Expand Down Expand Up @@ -355,11 +358,11 @@ Default is "RECURRING '*/15 * * * *' FULL BACKUP '@hourly' WITH SCHEDULE OPTIONS
cmd.Flags().StringVarP(&config.Binary,
"binary", "b", config.Binary, "the remote cockroach binary to use")
}
for _, cmd := range []*cobra.Command{startCmd, startInstanceCmd, stopInstanceCmd, sqlCmd, pgurlCmd, adminurlCmd, runCmd} {
for _, cmd := range []*cobra.Command{startCmd, startInstanceCmd, stopInstanceCmd, sqlCmd, pgurlCmd, adminurlCmd, runCmd, jaegerStartCmd} {
cmd.Flags().BoolVar(&secure,
"secure", false, "use a secure cluster")
}
for _, cmd := range []*cobra.Command{pgurlCmd, sqlCmd, adminurlCmd, stopInstanceCmd} {
for _, cmd := range []*cobra.Command{pgurlCmd, sqlCmd, adminurlCmd, stopInstanceCmd, jaegerStartCmd} {
cmd.Flags().StringVar(&virtualClusterName,
"cluster", "", "specific virtual cluster to connect to")
cmd.Flags().IntVar(&sqlInstance,
Expand Down
41 changes: 39 additions & 2 deletions pkg/cmd/roachprod/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -1014,7 +1014,7 @@ var adminurlCmd = &cobra.Command{
Args: cobra.ExactArgs(1),
Run: wrap(func(cmd *cobra.Command, args []string) error {
urls, err := roachprod.AdminURL(
context.Background(), config.Logger, args[0], virtualClusterName, sqlInstance, adminurlPath, adminurlIPs, adminurlOpen, secure,
context.Background(), config.Logger, args[0], virtualClusterName, sqlInstance, adminurlPath, adminurlIPs, urlOpen, secure,
)
if err != nil {
return err
Expand Down Expand Up @@ -1131,7 +1131,41 @@ var grafanaURLCmd = &cobra.Command{
Args: cobra.ExactArgs(1),
Run: wrap(func(cmd *cobra.Command, args []string) error {
url, err := roachprod.GrafanaURL(context.Background(), config.Logger, args[0],
grafanaurlOpen)
urlOpen)
if err != nil {
return err
}
fmt.Println(url)
return nil
}),
}

var jaegerStartCmd = &cobra.Command{
Use: `jaeger-start <cluster>`,
Short: `starts a jaeger container on the last node in the cluster`,
Args: cobra.ExactArgs(1),
Run: wrap(func(cmd *cobra.Command, args []string) error {
return roachprod.StartJaeger(context.Background(), config.Logger, args[0],
virtualClusterName, secure, jaegerConfigNodes)
}),
}

var jaegerStopCmd = &cobra.Command{
Use: `jaeger-stop <cluster>`,
Short: `stops a running jaeger container on the last node in the cluster`,
Args: cobra.ExactArgs(1),
Run: wrap(func(cmd *cobra.Command, args []string) error {
return roachprod.StopJaeger(context.Background(), config.Logger, args[0])
}),
}

var jaegerURLCmd = &cobra.Command{
Use: `jaegerurl <cluster>`,
Short: `returns the URL of the cluster's jaeger UI`,
Args: cobra.ExactArgs(1),
Run: wrap(func(cmd *cobra.Command, args []string) error {
url, err := roachprod.JaegerURL(context.Background(), config.Logger, args[0],
urlOpen)
if err != nil {
return err
}
Expand Down Expand Up @@ -1443,6 +1477,9 @@ func main() {
rootStorageCmd,
snapshotCmd,
updateCmd,
jaegerStartCmd,
jaegerStopCmd,
jaegerURLCmd,
)
setBashCompletionFunction()

Expand Down
37 changes: 21 additions & 16 deletions pkg/roachprod/install/install.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import (
"bytes"
"context"
"fmt"
"io"
"sort"

"github.com/cockroachdb/cockroach/pkg/roachprod/logger"
Expand Down Expand Up @@ -95,6 +96,7 @@ sudo add-apt-repository \
sudo apt-get update;
sudo apt-get install -y docker-ce;
sudo usermod -aG docker ubuntu;
`,

"gcc": `
Expand Down Expand Up @@ -159,26 +161,29 @@ func SortedCmds() []string {

// Install TODO(peter): document
func Install(ctx context.Context, l *logger.Logger, c *SyncedCluster, args []string) error {
do := func(title, cmd string) error {
for _, arg := range args {
var buf bytes.Buffer
err := c.Run(ctx, l, &buf, &buf, OnNodes(c.Nodes), "installing "+title, cmd)
if err != nil {
if err := InstallTool(ctx, l, c, c.Nodes, arg, &buf, &buf); err != nil {
l.Printf(buf.String())
}
return err
}

for _, arg := range args {
cmd, ok := installCmds[arg]
if !ok {
return fmt.Errorf("unknown tool %q", arg)
}

// Ensure that we early exit if any of the shell statements fail.
cmd = "set -exuo pipefail;" + cmd
if err := do(arg, cmd); err != nil {
return err
}
}
return nil
}

func InstallTool(
ctx context.Context,
l *logger.Logger,
c *SyncedCluster,
nodes Nodes,
softwareName string,
stdout, stderr io.Writer,
) error {
cmd, ok := installCmds[softwareName]
if !ok {
return fmt.Errorf("unknown tool %q", softwareName)
}
// Ensure that we early exit if any of the shell statements fail.
cmd = "set -exuo pipefail;" + cmd
return c.Run(ctx, l, stdout, stderr, OnNodes(nodes), "installing "+softwareName, cmd)
}
117 changes: 117 additions & 0 deletions pkg/roachprod/roachprod.go
Original file line number Diff line number Diff line change
Expand Up @@ -1975,6 +1975,123 @@ func isWorkloadCollectorVolume(v vm.Volume) bool {
return false
}

const (
otelCollectorPort = 4317
jaegerUIPort = 16686
jaegerContainerName = "jaeger"
jaegerImageName = "jaegertracing/all-in-one:latest"
)

// StartJaeger starts a jaeger instance on the last node in the given
// cluster and configures the cluster to use it.
func StartJaeger(
ctx context.Context,
l *logger.Logger,
clusterName string,
virtualClusterName string,
secure bool,
configureNodes string,
) error {
if err := LoadClusters(); err != nil {
return err
}
c, err := newCluster(l, clusterName, install.SecureOption(secure))
if err != nil {
return err
}

// TODO(ssd): Currently this just uses the all-in-one docker
// container with in memory storage. Might be nicer to just
// install from source or get linux binaries and start them
// with systemd. For now this just matches what we've been
// copy and pasting.
jaegerNode := c.TargetNodes()[len(c.TargetNodes())-1:]
err = install.InstallTool(ctx, l, c, jaegerNode, "docker", l.Stdout, l.Stderr)
if err != nil {
return err
}
startCmd := fmt.Sprintf("docker run -d --name %s -p %[2]d:%[2]d -p %[3]d:%[3]d %s",
jaegerContainerName,
otelCollectorPort,
jaegerUIPort,
jaegerImageName)
err = c.Run(ctx, l, l.Stdout, l.Stderr, install.OnNodes(jaegerNode), "start jaegertracing/all-in-one using docker", startCmd)
if err != nil {
return err
}

otelCollectionHost, err := c.GetInternalIP(jaegerNode[0])
if err != nil {
return err
}
otelCollectionHostPort := net.JoinHostPort(otelCollectionHost, strconv.Itoa(otelCollectorPort))
setupStmt := fmt.Sprintf("SET CLUSTER SETTING trace.opentelemetry.collector='%s'", otelCollectionHostPort)

if configureNodes != "" {
nodes, err := install.ListNodes(configureNodes, len(c.VMs))
if err != nil {
return err
}
_, err = c.ExecSQL(ctx, l, nodes, virtualClusterName, 0, []string{"-e", setupStmt})
if err != nil {
return err
}
}

url, err := JaegerURL(ctx, l, clusterName, false)
if err != nil {
return err
}

l.Printf("To use with CRDB: %s", setupStmt)
l.Printf("Jaeger UI: %s", url)
return nil
}

// StopJaeger stops and removes the jaeger container.
func StopJaeger(ctx context.Context, l *logger.Logger, clusterName string) error {
if err := LoadClusters(); err != nil {
return err
}
c, err := newCluster(l, clusterName)
if err != nil {
return err
}
jaegerNode := c.TargetNodes()[len(c.TargetNodes())-1:]
stopCmd := fmt.Sprintf("docker stop %s", jaegerContainerName)
err = c.Run(ctx, l, l.Stdout, l.Stderr, install.OnNodes(jaegerNode), stopCmd, stopCmd)
if err != nil {
return err
}
rmCmd := fmt.Sprintf("docker rm %s", jaegerContainerName)
return c.Run(ctx, l, l.Stdout, l.Stderr, install.OnNodes(jaegerNode), rmCmd, rmCmd)
}

// JaegerURL returns a url to the jaeger UI, assuming it was installed
// on the lat node in the given cluster.
func JaegerURL(
ctx context.Context, l *logger.Logger, clusterName string, openInBrowser bool,
) (string, error) {
if err := LoadClusters(); err != nil {
return "", err
}
c, err := newCluster(l, clusterName)
if err != nil {
return "", err
}
jaegerNode := c.TargetNodes()[len(c.TargetNodes())-1:]
urls, err := urlGenerator(ctx, c, l, jaegerNode, urlConfig{
usePublicIP: true,
openInBrowser: openInBrowser,
secure: false,
port: jaegerUIPort,
})
if err != nil {
return "", err
}
return urls[0], nil
}

// StorageCollectionPerformAction either starts or stops workload collection on
// a target cluster.
//
Expand Down

0 comments on commit fe7c376

Please sign in to comment.