Skip to content

Commit

Permalink
roachtest: roachprod: add test name labels to vms for prometheus
Browse files Browse the repository at this point in the history
This commit will add a `test_name` label to each VM when a
particular roachtest is about to be executed on the cluster, with
the label being removed at the end of the roachtest.

The `test_name` label is being scraped by Prometheus to allow
filtering of dashboards based on the roachtest name. GCE labelling
rules mean that test names are sanitised to match `[a-zA-Z-]`.

Epic: none
Fixes: cockroachdb#98658

Release note: None
  • Loading branch information
Miral Gadani committed Aug 2, 2023
1 parent 45cd1c9 commit d6dbedc
Show file tree
Hide file tree
Showing 9 changed files with 192 additions and 39 deletions.
8 changes: 8 additions & 0 deletions pkg/cmd/roachtest/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -1873,6 +1873,14 @@ func (c *clusterImpl) doDestroy(ctx context.Context, l *logger.Logger) <-chan st
return ch
}

func (c *clusterImpl) addLabels(labels map[string]string) error {
return roachprod.AddLabels(c.l, c.name, labels)
}

func (c *clusterImpl) removeLabels(labels []string) error {
return roachprod.RemoveLabels(c.l, c.name, labels)
}

func (c *clusterImpl) ListSnapshots(
ctx context.Context, vslo vm.VolumeSnapshotListOpts,
) ([]vm.VolumeSnapshot, error) {
Expand Down
4 changes: 4 additions & 0 deletions pkg/cmd/roachtest/test_runner.go
Original file line number Diff line number Diff line change
Expand Up @@ -911,7 +911,11 @@ func (r *testRunner) runTest(
t.runnerID = goid.Get()

s := t.Spec().(*registry.TestSpec)
_ = c.addLabels(map[string]string{
"test_name": s.Name,
})
defer func() {
_ = c.removeLabels([]string{"test_name"})
t.end = timeutil.Now()

// We only have to record panics if the panic'd value is not the sentinel
Expand Down
28 changes: 28 additions & 0 deletions pkg/roachprod/roachprod.go
Original file line number Diff line number Diff line change
Expand Up @@ -1290,6 +1290,34 @@ func cleanupFailedCreate(l *logger.Logger, clusterName string) error {
return cloud.DestroyCluster(l, c)
}

func AddLabels(l *logger.Logger, clusterName string, labels map[string]string) error {
if err := LoadClusters(); err != nil {
return err
}
c, err := newCluster(l, clusterName)
if err != nil {
return err
}

return vm.FanOut(c.VMs, func(p vm.Provider, vms vm.List) error {
return p.AddLabels(l, vms, labels)
})
}

func RemoveLabels(l *logger.Logger, clusterName string, labels []string) error {
if err := LoadClusters(); err != nil {
return err
}
c, err := newCluster(l, clusterName)
if err != nil {
return err
}

return vm.FanOut(c.VMs, func(p vm.Provider, vms vm.List) error {
return p.RemoveLabels(l, vms, labels)
})
}

// Create TODO
func Create(
ctx context.Context,
Expand Down
79 changes: 58 additions & 21 deletions pkg/roachprod/vm/aws/aws.go
Original file line number Diff line number Diff line change
Expand Up @@ -421,6 +421,61 @@ func (p *Provider) ConfigSSH(l *logger.Logger, zones []string) error {
return g.Wait()
}

// editLabels is a helper that adds or removes labels from the given VMs.
func (p *Provider) editLabels(
l *logger.Logger, vms vm.List, labels map[string]string, remove bool,
) error {
args := []string{"ec2"}
if remove {
args = append(args, "delete-tags")
} else {
args = append(args, "create-tags")
}

args = append(args, "--tags")
tagArgs := make([]string, 0, len(labels))
for key, value := range labels {
if remove {
tagArgs = append(tagArgs, fmt.Sprintf("Key=%s", key))
} else {
tagArgs = append(tagArgs, fmt.Sprintf("Key=%s,Value=%s", key, vm.SanitizeLabel(value)))
}
}
args = append(args, tagArgs...)

byRegion, err := regionMap(vms)
if err != nil {
return err
}
g := errgroup.Group{}
for region, list := range byRegion {
// Capture loop vars here
regionArgs := append(args, "--region", region)
regionArgs = append(regionArgs, "--resources")
regionArgs = append(regionArgs, list.ProviderIDs()...)

g.Go(func() error {
_, err := p.runCommand(l, regionArgs)
return err
})
}
return g.Wait()
}

// AddLabels adds the given labels to the given VMs.
func (p *Provider) AddLabels(l *logger.Logger, vms vm.List, labels map[string]string) error {
return p.editLabels(l, vms, labels, false)
}

// RemoveLabels removes the given labels from the given VMs.
func (p *Provider) RemoveLabels(l *logger.Logger, vms vm.List, labels []string) error {
labelMap := make(map[string]string, len(labels))
for _, label := range labels {
labelMap[label] = ""
}
return p.editLabels(l, vms, labelMap, true)
}

// Create is part of the vm.Provider interface.
func (p *Provider) Create(
l *logger.Logger, names []string, opts vm.CreateOpts, vmProviderOpts vm.ProviderOpts,
Expand Down Expand Up @@ -594,27 +649,9 @@ func (p *Provider) Reset(l *logger.Logger, vms vm.List) error {
// Extend is part of the vm.Provider interface.
// This will update the Lifetime tag on the instances.
func (p *Provider) Extend(l *logger.Logger, vms vm.List, lifetime time.Duration) error {
byRegion, err := regionMap(vms)
if err != nil {
return err
}
g := errgroup.Group{}
for region, list := range byRegion {
// Capture loop vars here
args := []string{
"ec2", "create-tags",
"--region", region,
"--tags", "Key=Lifetime,Value=" + lifetime.String(),
"--resources",
}
args = append(args, list.ProviderIDs()...)

g.Go(func() error {
_, err := p.runCommand(l, args)
return err
})
}
return g.Wait()
return p.AddLabels(l, vms, map[string]string{
"Lifetime": lifetime.String(),
})
}

// cachedActiveAccount memoizes the return value from FindActiveAccount
Expand Down
10 changes: 10 additions & 0 deletions pkg/roachprod/vm/azure/azure.go
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,16 @@ func getAzureDefaultLabelMap(opts vm.CreateOpts) map[string]string {
return m
}

func (p *Provider) AddLabels(l *logger.Logger, vms vm.List, labels map[string]string) error {
l.Printf("adding labels to Azure VMs not yet supported")
return nil
}

func (p *Provider) RemoveLabels(l *logger.Logger, vms vm.List, labels []string) error {
l.Printf("removing labels from Azure VMs not yet supported")
return nil
}

// Create implements vm.Provider.
func (p *Provider) Create(
l *logger.Logger, names []string, opts vm.CreateOpts, vmProviderOpts vm.ProviderOpts,
Expand Down
8 changes: 8 additions & 0 deletions pkg/roachprod/vm/flagstub/flagstub.go
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,14 @@ func (p *provider) ConfigSSH(l *logger.Logger, zones []string) error {
return nil
}

func (p *provider) AddLabels(l *logger.Logger, vms vm.List, labels map[string]string) error {
return nil
}

func (p *provider) RemoveLabels(l *logger.Logger, vms vm.List, labels []string) error {
return nil
}

// Create implements vm.Provider and returns Unimplemented.
func (p *provider) Create(
l *logger.Logger, names []string, opts vm.CreateOpts, providerOpts vm.ProviderOpts,
Expand Down
67 changes: 49 additions & 18 deletions pkg/roachprod/vm/gce/gcloud.go
Original file line number Diff line number Diff line change
Expand Up @@ -883,6 +883,52 @@ func (p *Provider) ConfigSSH(l *logger.Logger, zones []string) error {
return nil
}

func (p *Provider) editLabels(
l *logger.Logger, vms vm.List, labels map[string]string, remove bool,
) error {
cmdArgs := []string{"compute", "instances"}
if remove {
cmdArgs = append(cmdArgs, "remove-labels")
} else {
cmdArgs = append(cmdArgs, "add-labels")
}

tagArgs := make([]string, 0, len(labels))
for key, value := range labels {
if remove {
tagArgs = append(tagArgs, key)
} else {
tagArgs = append(tagArgs, fmt.Sprintf("%s=%s", key, vm.SanitizeLabel(value)))
}
}
tagArgsString := strings.Join(tagArgs, ",")
commonArgs := []string{"--project", p.GetProject(), fmt.Sprintf("--labels=%s", tagArgsString)}

for _, v := range vms {
vmArgs := append(cmdArgs, v.Name, "--zone", v.Zone)
vmArgs = append(vmArgs, commonArgs...)
//fmt.Printf("gcloud %s\n", strings.Join(vmArgs, " "))
cmd := exec.Command("gcloud", vmArgs...)
if b, err := cmd.CombinedOutput(); err != nil {
return errors.Wrapf(err, "Command: gcloud %s\nOutput: %s", vmArgs, string(b))
}
}
return nil
}

// AddLabels adds the given labels to the given VMs.
func (p *Provider) AddLabels(l *logger.Logger, vms vm.List, labels map[string]string) error {
return p.editLabels(l, vms, labels, false /* remove */)
}

func (p *Provider) RemoveLabels(l *logger.Logger, vms vm.List, labels []string) error {
labelsMap := make(map[string]string, len(labels))
for _, label := range labels {
labelsMap[label] = ""
}
return p.editLabels(l, vms, labelsMap, true /* remove */)
}

// Create TODO(peter): document
func (p *Provider) Create(
l *logger.Logger, names []string, opts vm.CreateOpts, vmProviderOpts vm.ProviderOpts,
Expand Down Expand Up @@ -1292,24 +1338,9 @@ func (p *Provider) Reset(l *logger.Logger, vms vm.List) error {

// Extend TODO(peter): document
func (p *Provider) Extend(l *logger.Logger, vms vm.List, lifetime time.Duration) error {
// The gcloud command only takes a single instance. Unlike Delete() above, we have to
// perform the iteration here.
for _, v := range vms {
args := []string{"compute", "instances", "add-labels"}

args = append(args, "--project", v.Project)
args = append(args, "--zone", v.Zone)
args = append(args, "--labels", fmt.Sprintf("lifetime=%s", lifetime))
args = append(args, v.Name)

cmd := exec.Command("gcloud", args...)

output, err := cmd.CombinedOutput()
if err != nil {
return errors.Wrapf(err, "Command: gcloud %s\nOutput: %s", args, output)
}
}
return nil
return p.AddLabels(l, vms, map[string]string{
"lifetime": lifetime.String(),
})
}

// FindActiveAccount TODO(peter): document
Expand Down
8 changes: 8 additions & 0 deletions pkg/roachprod/vm/local/local.go
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,14 @@ func (p *Provider) ConfigSSH(l *logger.Logger, zones []string) error {
return nil
}

func (p *Provider) AddLabels(l *logger.Logger, vms vm.List, labels map[string]string) error {
return nil
}

func (p *Provider) RemoveLabels(l *logger.Logger, vms vm.List, labels []string) error {
return nil
}

// Create just creates fake host-info entries in the local filesystem
func (p *Provider) Create(
l *logger.Logger, names []string, opts vm.CreateOpts, unusedProviderOpts vm.ProviderOpts,
Expand Down
19 changes: 19 additions & 0 deletions pkg/roachprod/vm/vm.go
Original file line number Diff line number Diff line change
Expand Up @@ -406,6 +406,9 @@ type Provider interface {
FindActiveAccount(l *logger.Logger) (string, error)
List(l *logger.Logger, opts ListOptions) (List, error)
// The name of the Provider, which will also surface in the top-level Providers map.

AddLabels(l *logger.Logger, vms List, labels map[string]string) error
RemoveLabels(l *logger.Logger, vms List, labels []string) error
Name() string

// Active returns true if the provider is properly installed and capable of
Expand Down Expand Up @@ -643,3 +646,19 @@ func DNSSafeAccount(account string) string {
}
return strings.Map(safe, account)
}

func SanitizeLabel(label string) string {
// Replace any non-alphanumeric characters with hyphens
re := regexp.MustCompile("[^a-zA-Z0-9]+")
label = re.ReplaceAllString(label, "-")

// Remove any leading or trailing hyphens
label = strings.Trim(label, "-")

// Truncate the label to 63 characters (the maximum allowed by GCP)
if len(label) > 63 {
label = label[:63]
}

return label
}

0 comments on commit d6dbedc

Please sign in to comment.