Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

roachprod: support for dynamic admin url port #123619

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ require (
)

require (
cloud.google.com/go/secretmanager v1.10.0
github.com/Azure/azure-sdk-for-go/sdk/azcore v1.3.0
github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.1.0
github.com/Azure/azure-sdk-for-go/sdk/keyvault/azkeys v0.9.0
Expand Down
2 changes: 2 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,8 @@ cloud.google.com/go/pubsub v1.2.0/go.mod h1:jhfEVHT8odbXTkndysNHCcx0awwzvfOlguIA
cloud.google.com/go/pubsub v1.3.1/go.mod h1:i+ucay31+CNRpDW4Lu78I4xXG+O1r/MAHgjpRVR+TSU=
cloud.google.com/go/pubsub v1.28.0 h1:XzabfdPx/+eNrsVVGLFgeUnQQKPGkMb8klRCeYK52is=
cloud.google.com/go/pubsub v1.28.0/go.mod h1:vuXFpwaVoIPQMGXqRyUQigu/AX1S3IWugR9xznmcXX8=
cloud.google.com/go/secretmanager v1.10.0 h1:pu03bha7ukxF8otyPKTFdDz+rr9sE3YauS5PliDXK60=
cloud.google.com/go/secretmanager v1.10.0/go.mod h1:MfnrdvKMPNra9aZtQFvBcvRU54hbPD8/HayQdlUgJpU=
cloud.google.com/go/storage v1.0.0/go.mod h1:IhtSnM/ZTZV8YYJWCY8RULGVqBDmpoyjwiyrjsg+URw=
cloud.google.com/go/storage v1.5.0/go.mod h1:tpKbwo567HUNpVclU5sGELwQWBDZ8gh0ZeosJ0Rtdos=
cloud.google.com/go/storage v1.6.0/go.mod h1:N7U0C8pVQ/+NIKOBQyamJIeKQKkZ+mxpohlUTyfDhBk=
Expand Down
3 changes: 3 additions & 0 deletions pkg/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -294,6 +294,7 @@ ALL_TESTS = [
"//pkg/roachprod/config:config_test",
"//pkg/roachprod/install:install_test",
"//pkg/roachprod/prometheus:prometheus_test",
"//pkg/roachprod/promhelperclient:promhelperclient_test",
"//pkg/roachprod/ssh:ssh_test",
"//pkg/roachprod/vm/gce:gce_test",
"//pkg/roachprod/vm/local:local_test",
Expand Down Expand Up @@ -1563,6 +1564,8 @@ GO_TARGETS = [
"//pkg/roachprod/logger:logger",
"//pkg/roachprod/prometheus:prometheus",
"//pkg/roachprod/prometheus:prometheus_test",
"//pkg/roachprod/promhelperclient:promhelperclient",
"//pkg/roachprod/promhelperclient:promhelperclient_test",
"//pkg/roachprod/ssh:ssh",
"//pkg/roachprod/ssh:ssh_test",
"//pkg/roachprod/ui:ui",
Expand Down
27 changes: 25 additions & 2 deletions pkg/cmd/roachprod/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -551,6 +551,30 @@ cluster setting will be set to its value.
}),
}

var updateTargetsCmd = &cobra.Command{
Use: "update-targets <cluster>",
Short: "update prometheus target configurations for a cluster",
Long: `Update prometheus target configurations of each node of a cluster.

The "start" command updates the prometheus target configuration every time. But, in case of any
failure, this command can be used to update the configurations.

The --args and --env flags can be used to pass arbitrary command line flags and
nameisbhaskar marked this conversation as resolved.
Show resolved Hide resolved
environment variables to the cockroach process.
` + tagHelp + `
nameisbhaskar marked this conversation as resolved.
Show resolved Hide resolved
The default prometheus url is https://grafana.testeng.crdb.io/. This can be overwritten by using the
environment variable COCKROACH_PROM_HOST_URL
`,
Args: cobra.ExactArgs(1),
Run: wrap(func(cmd *cobra.Command, args []string) error {
clusterSettingsOpts := []install.ClusterSettingOption{
install.TagOption(tag),
nameisbhaskar marked this conversation as resolved.
Show resolved Hide resolved
install.EnvOption(nodeEnv),
}
return roachprod.UpdateTargets(context.Background(), config.Logger, args[0], clusterSettingsOpts...)
}),
}

var stopCmd = &cobra.Command{
Use: "stop <cluster> [--sig] [--wait]",
Short: "stop nodes on a cluster",
Expand Down Expand Up @@ -626,8 +650,6 @@ environment variables to the cockroach process.
install.EnvOption(nodeEnv),
install.NumRacksOption(numRacks),
}
// TODO(DarrylWong): remove once #117125 is addressed.
startOpts.AdminUIPort = 0

startOpts.Target = install.StartSharedProcessForVirtualCluster
// If the user passed an `--external-nodes` option, we are
Expand Down Expand Up @@ -1755,6 +1777,7 @@ func main() {
statusCmd,
monitorCmd,
startCmd,
updateTargetsCmd,
stopCmd,
startInstanceCmd,
stopInstanceCmd,
Expand Down
2 changes: 2 additions & 0 deletions pkg/roachprod/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,15 @@ go_library(
"//pkg/roachprod/lock",
"//pkg/roachprod/logger",
"//pkg/roachprod/prometheus",
"//pkg/roachprod/promhelperclient",
"//pkg/roachprod/vm",
"//pkg/roachprod/vm/aws",
"//pkg/roachprod/vm/azure",
"//pkg/roachprod/vm/gce",
"//pkg/roachprod/vm/local",
"//pkg/server/debug/replay",
"//pkg/util/ctxgroup",
"//pkg/util/envutil",
"//pkg/util/httputil",
"//pkg/util/retry",
"//pkg/util/syncutil",
Expand Down
33 changes: 33 additions & 0 deletions pkg/roachprod/promhelperclient/BUILD.bazel
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
load("@io_bazel_rules_go//go:def.bzl", "go_library", "go_test")

go_library(
name = "promhelperclient",
srcs = [
"client.go",
"promhelper_utils.go",
],
importpath = "github.com/cockroachdb/cockroach/pkg/roachprod/promhelperclient",
visibility = ["//visibility:public"],
deps = [
"//pkg/roachprod/logger",
"//pkg/util/httputil",
"@com_github_cockroachdb_errors//:errors",
"@com_google_cloud_go_secretmanager//apiv1",
"@com_google_cloud_go_secretmanager//apiv1/secretmanagerpb",
"@org_golang_google_api//idtoken",
"@org_golang_x_oauth2//:oauth2",
],
)

go_test(
name = "promhelperclient_test",
srcs = ["client_test.go"],
embed = [":promhelperclient"],
deps = [
"//pkg/roachprod/logger",
"//pkg/util/httputil",
"@com_github_stretchr_testify//require",
"@org_golang_google_api//idtoken",
"@org_golang_x_oauth2//:oauth2",
],
)
214 changes: 214 additions & 0 deletions pkg/roachprod/promhelperclient/client.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,214 @@
// Copyright 2024 The Cockroach Authors.
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.txt.
//
// As of the Change Date specified in that file, in accordance with
// the Business Source License, use of this software will be governed
// by the Apache License, Version 2.0, included in the file
// licenses/APL.txt.

// Utility to connect and invoke APIs of the promhelperservice.
// Doc reference - https://cockroachlabs.atlassian.net/wiki/x/MAAlzg

package promhelperclient

import (
"bytes"
"context"
"encoding/json"
"fmt"
"io"
"net/http"
"os"
"strconv"
"strings"
"text/template"

"github.com/cockroachdb/cockroach/pkg/roachprod/logger"
"github.com/cockroachdb/cockroach/pkg/util/httputil"
"github.com/cockroachdb/errors"
"golang.org/x/oauth2"
"google.golang.org/api/idtoken"
)

const (
resourceName = "instance-configs"
resourceVersion = "v1"

ServiceAccountJson = "PROM_HELPER_SERVICE_ACCOUNT_JSON"
ServiceAccountAudience = "PROM_HELPER_SERVICE_ACCOUNT_AUDIENCE"
)

// PromClient is used to communicate with the prometheus helper service
// keeping the functions as a variable enables us to override the value for unit testing
type PromClient struct {
// httpPut is used for http PUT operation.
httpPut func(
ctx context.Context, url string, h *httputil.RequestHeaders, body io.Reader,
) (resp *http.Response, err error)
// httpDelete is used for http DELETE operation.
httpDelete func(ctx context.Context, url string, h *httputil.RequestHeaders) (
resp *http.Response, err error)
// newTokenSource is the token generator source.
newTokenSource func(ctx context.Context, audience string, opts ...idtoken.ClientOption) (
oauth2.TokenSource, error)
}

// NewPromClient returns a new instance of PromClient
func NewPromClient() *PromClient {
return &PromClient{
httpPut: httputil.Put,
httpDelete: httputil.Delete,
newTokenSource: idtoken.NewTokenSource,
}
}

// instanceConfigRequest is the HTTP request received for generating instance config
type instanceConfigRequest struct {
//Config is the content of the yaml file
Config string `json:"config"`
}

// UpdatePrometheusTargets updates the cluster config in the promUrl
func (c *PromClient) UpdatePrometheusTargets(
ctx context.Context,
promUrl, clusterName string,
forceFetchCreds bool,
nodes []string,
l *logger.Logger,
) error {
req, err := buildCreateRequest(nodes)
if err != nil {
return err
}
token, err := c.getToken(ctx, promUrl, forceFetchCreds, l)
if err != nil {
return err
}
url := getUrl(promUrl, clusterName)
l.Printf("invoking PUT for URL: %s", url)
response, err := c.httpPut(ctx, url, &httputil.RequestHeaders{
ContentType: "application/json",
Authorization: token,
}, req)
if err != nil {
return err
}
if response.StatusCode != http.StatusOK {
defer func() { _ = response.Body.Close() }()
nameisbhaskar marked this conversation as resolved.
Show resolved Hide resolved
if response.StatusCode == http.StatusUnauthorized && !forceFetchCreds {
l.Printf("request failed - this may be due to a stale token. retrying with forceFetchCreds true ...")
return c.UpdatePrometheusTargets(ctx, promUrl, clusterName, true, nodes, l)
}
body, err := io.ReadAll(response.Body)
if err != nil {
return err
}
return errors.Newf("request failed with status %d and error %s", response.StatusCode,
string(body))
}
return nil
}

// DeleteClusterConfig deletes the cluster config in the promUrl
func (c *PromClient) DeleteClusterConfig(
ctx context.Context, promUrl, clusterName string, forceFetchCreds bool, l *logger.Logger,
) error {
token, err := c.getToken(ctx, promUrl, forceFetchCreds, l)
if err != nil {
return err
}
url := getUrl(promUrl, clusterName)
l.Printf("invoking DELETE for URL: %s", url)
response, err := c.httpDelete(ctx, url, &httputil.RequestHeaders{
Authorization: token,
})
if err != nil {
return err
}
if response.StatusCode != http.StatusNoContent {
defer func() { _ = response.Body.Close() }()
if response.StatusCode == http.StatusUnauthorized && !forceFetchCreds {
return c.DeleteClusterConfig(ctx, promUrl, clusterName, true, l)
}
body, err := io.ReadAll(response.Body)
if err != nil {
return err
}
return errors.Newf("request failed with status %d and error %s", response.StatusCode,
string(body))
}
return nil
}

func getUrl(promUrl, clusterName string) string {
return fmt.Sprintf("%s/%s/%s/%s", promUrl, resourceVersion, resourceName, clusterName)
}

// ccParams are the params for the clusterConfFileTemplate
type ccParams struct {
Targets []string
Labels []string
}

const clusterConfFileTemplate = `- targets:
{{range $val := .Targets}} - {{$val}}
{{end}} labels:
{{range $val := .Labels}} {{$val}}
{{end}}
`

// createClusterConfigFile creates the cluster config file per node
func buildCreateRequest(nodes []string) (io.Reader, error) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This would likely be simpler if we used the yaml package to handle the serialization for us. Then we would be able to go from data structure -> YAML without relying on Go templates which is IMO a little easier to understand.

buffer := bytes.NewBufferString("---\n")
for i, n := range nodes {
if n == "" {
// this is an unsupported node
continue
}
params := &ccParams{
Targets: []string{n},
Labels: make([]string, 0),
}
params.Labels = append(params.Labels,
fmt.Sprintf("node: \"%s\"", strconv.Itoa(i+1)),
"tenant: system",
)
t := template.Must(template.New("start").Parse(clusterConfFileTemplate))
if err := t.Execute(buffer, params); err != nil {
return nil, err
}
}

b, err := json.Marshal(&instanceConfigRequest{Config: buffer.String()})
if err != nil {
return nil, err
}
return bytes.NewReader(b), nil
}

// getToken gets the Authorization token for grafana
func (c *PromClient) getToken(
ctx context.Context, promUrl string, forceFetchCreds bool, l *logger.Logger,
) (string, error) {
if strings.HasPrefix(promUrl, "http:/") {
// no token needed for insecure URL
return "", nil
}
// Read in the service account key and audience, so we can retrieve the identity token.
if _, err := SetPromHelperCredsEnv(ctx, forceFetchCreds, l); err != nil {
return "", err
}
grafanaKey := os.Getenv(ServiceAccountJson)
grafanaAudience := os.Getenv(ServiceAccountAudience)
ts, err := c.newTokenSource(ctx, grafanaAudience, idtoken.WithCredentialsJSON([]byte(grafanaKey)))
if err != nil {
return "", errors.Wrap(err, "error creating GCS oauth token source from specified credential")
}
token, err := ts.Token()
if err != nil {
return "", errors.Wrap(err, "error getting identity token")
}
return fmt.Sprintf("Bearer %s", token.AccessToken), nil
}
Loading
Loading