Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
119796: cmd/roachtest: add registry.OperationSpec r=herkolategan a=itsbilal

This change introduces a new type in the roachtest registry for Operations; smaller subunits of tests that work strictly on existing clusters and are meant to have zero logical side-effects to a running cluster (eg. should not delete data in an irrecoverable way), but can induce chaos events like crashing nodes. This new spec type, OperationSpec, specifies one operation and is registered with a registry.Registry. It can be filtered with a registry.TestFilter similar to TestSpecs.

Split from cockroachdb#118364.

Epic: none

Release note: None

Co-authored-by: Bilal Akhtar <[email protected]>
  • Loading branch information
craig[bot] and itsbilal committed Mar 11, 2024
2 parents 340d060 + 967d8b7 commit 8ad5029
Show file tree
Hide file tree
Showing 8 changed files with 180 additions and 0 deletions.
1 change: 1 addition & 0 deletions pkg/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -1156,6 +1156,7 @@ GO_TARGETS = [
"//pkg/cmd/roachtest/clusterstats:clusterstats",
"//pkg/cmd/roachtest/clusterstats:clusterstats_test",
"//pkg/cmd/roachtest/grafana:grafana",
"//pkg/cmd/roachtest/operation:operation",
"//pkg/cmd/roachtest/option:option",
"//pkg/cmd/roachtest/option:option_test",
"//pkg/cmd/roachtest/registry:registry",
Expand Down
9 changes: 9 additions & 0 deletions pkg/cmd/roachtest/operation/BUILD.bazel
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
load("@io_bazel_rules_go//go:def.bzl", "go_library")

go_library(
name = "operation",
srcs = ["operation_interface.go"],
importpath = "github.com/cockroachdb/cockroach/pkg/cmd/roachtest/operation",
visibility = ["//visibility:public"],
deps = ["//pkg/roachprod/logger"],
)
29 changes: 29 additions & 0 deletions pkg/cmd/roachtest/operation/operation_interface.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
// Copyright 2024 The Cockroach Authors.
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.txt.
//
// As of the Change Date specified in that file, in accordance with
// the Business Source License, use of this software will be governed
// by the Apache License, Version 2.0, included in the file
// licenses/APL.txt.

package operation

import "github.com/cockroachdb/cockroach/pkg/roachprod/logger"

type Operation interface {
// ClusterCockroach returns the path to the Cockroach binary on the target
// cluster.
ClusterCockroach() string
Name() string
Error(args ...interface{})
Errorf(string, ...interface{})
FailNow()
Fatal(args ...interface{})
Fatalf(format string, args ...interface{})
Failed() bool

L() *logger.Logger
Status(args ...interface{})
}
2 changes: 2 additions & 0 deletions pkg/cmd/roachtest/registry/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ go_library(
"encryption.go",
"errors.go",
"filter.go",
"operation_spec.go",
"owners.go",
"registry_interface.go",
"tag.go",
Expand All @@ -15,6 +16,7 @@ go_library(
visibility = ["//visibility:public"],
deps = [
"//pkg/cmd/roachtest/cluster",
"//pkg/cmd/roachtest/operation",
"//pkg/cmd/roachtest/spec",
"//pkg/cmd/roachtest/test",
"//pkg/internal/team",
Expand Down
81 changes: 81 additions & 0 deletions pkg/cmd/roachtest/registry/operation_spec.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
// Copyright 2024 The Cockroach Authors.
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.txt.
//
// As of the Change Date specified in that file, in accordance with
// the Business Source License, use of this software will be governed
// by the Apache License, Version 2.0, included in the file
// licenses/APL.txt.

package registry

import (
"context"
"time"

"github.com/cockroachdb/cockroach/pkg/cmd/roachtest/cluster"
"github.com/cockroachdb/cockroach/pkg/cmd/roachtest/operation"
)

// OperationDependency specifies what an operation requires from a cluster to
// be able to run. The zero value is the simplest dependency (i.e. an existant
// cluster with nodes) and is always implied even if unspecified. All non-zero
// values could require additional pre-Run checks from any runner/scheduler
// to verify if this operation can be run.
type OperationDependency int

const (
OperationRequiresNodes OperationDependency = iota
OperationRequiresPopulatedDatabase
OperationRequiresZeroUnavailableRanges
OperationRequiresZeroLaggingRanges
)

// OperationCleanup specifies an operation that
type OperationCleanup interface {
Cleanup(ctx context.Context, o operation.Operation, c cluster.Cluster)
}

// OperationSpec is a spec for a roachtest operation.
type OperationSpec struct {
Skip string // if non-empty, operation will be skipped

Name string
// Owner is the name of the team responsible for signing off on failures of
// this operation that happen in the release process. This must be one of a limited
// set of values (the keys in the roachtestTeams map).
Owner Owner
// The maximum duration the operation is allowed to run before it is considered
// failed. This timeout applies _individually_ to Run() and the returned
// OperationCleanup Cleanup() each, but does not include any scheduling waits
// for either method.
Timeout time.Duration

// CompatibleClouds is the set of clouds this operation can run on (e.g. AllClouds,
// OnlyGCE, etc). Must be set.
CompatibleClouds CloudSet

// Dependencies specify the types of resources required for this roachtest
// operation to work. This will be used in filtering eligible operations to
// run. Multiple dependencies could be specified, and any schedulers will take
// care of ensuring those dependencies are met before running Run().
//
// TODO(bilal): Unused.
Dependencies []OperationDependency

// CanRunConcurrently specifies whether this operation is safe to run
// concurrently with other operations that have CanRunConcurrently = true. For
// instance, a random-index addition is safe to run concurrently with most
// other operations like node kills, while a drop would need to run on its own
// and will have CanRunConcurrently = false.
//
// TODO(bilal): Unused.
CanRunConcurrently bool

// Run is the operation function. It returns an OperationCleanup if this
// operation requires additional cleanup steps afterwards (eg. dropping an
// extra column that was created). A nil return value indicates no cleanup
// necessary
Run func(ctx context.Context, o operation.Operation, c cluster.Cluster) OperationCleanup
}
1 change: 1 addition & 0 deletions pkg/cmd/roachtest/registry/registry_interface.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,5 +20,6 @@ import (
type Registry interface {
MakeClusterSpec(nodeCount int, opts ...spec.Option) spec.ClusterSpec
Add(TestSpec)
AddOperation(OperationSpec)
PromFactory() promauto.Factory
}
53 changes: 53 additions & 0 deletions pkg/cmd/roachtest/test_registry.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import (

type testRegistryImpl struct {
m map[string]*registry.TestSpec
ops map[string]*registry.OperationSpec
snapshotPrefixes map[string]struct{}

promRegistry *prometheus.Registry
Expand All @@ -37,6 +38,7 @@ var _ registry.Registry = (*testRegistryImpl)(nil)
func makeTestRegistry() testRegistryImpl {
return testRegistryImpl{
m: make(map[string]*registry.TestSpec),
ops: make(map[string]*registry.OperationSpec),
snapshotPrefixes: make(map[string]struct{}),
promRegistry: prometheus.NewRegistry(),
}
Expand Down Expand Up @@ -65,6 +67,19 @@ func (r *testRegistryImpl) Add(spec registry.TestSpec) {
r.m[spec.Name] = &spec
}

// AddOperation adds an operation to the registry.
func (r *testRegistryImpl) AddOperation(spec registry.OperationSpec) {
if _, ok := r.ops[spec.Name]; ok {
fmt.Fprintf(os.Stderr, "operation %s already registered\n", spec.Name)
os.Exit(1)
}
if err := r.prepareOpSpec(&spec); err != nil {
fmt.Fprintf(os.Stderr, "%+v\n", err)
os.Exit(1)
}
r.ops[spec.Name] = &spec
}

// MakeClusterSpec makes a cluster spec. It should be used over `spec.MakeClusterSpec`
// because this method also adds options baked into the registry.
func (r *testRegistryImpl) MakeClusterSpec(nodeCount int, opts ...spec.Option) spec.ClusterSpec {
Expand All @@ -73,6 +88,32 @@ func (r *testRegistryImpl) MakeClusterSpec(nodeCount int, opts ...spec.Option) s

const testNameRE = "^[a-zA-Z0-9-_=/,]+$"

// prepareOpSpec validates a spec and does minor massaging of its fields.
func (r *testRegistryImpl) prepareOpSpec(spec *registry.OperationSpec) error {
// Operations follow the same naming conventions as tests and have the same
// alphabet of allowable characters in names.
if matched, err := regexp.MatchString(testNameRE, spec.Name); err != nil || !matched {
return fmt.Errorf("%s: Name must match this regexp: %s", spec.Name, testNameRE)
}

spec.CompatibleClouds.AssertInitialized()

if spec.Run == nil {
return fmt.Errorf("%s: must specify Run", spec.Name)
}

// All operations must have an owner so the release team knows who signs off on
// failures and so the github issue poster knows who to assign it to.
if spec.Owner == `` {
return fmt.Errorf(`%s: unspecified owner`, spec.Name)
}
if !spec.Owner.IsValid() {
return fmt.Errorf(`%s: unknown owner %q`, spec.Name, spec.Owner)
}

return nil
}

// prepareSpec validates a spec and does minor massaging of its fields.
func (r *testRegistryImpl) prepareSpec(spec *registry.TestSpec) error {
if matched, err := regexp.MatchString(testNameRE, spec.Name); err != nil || !matched {
Expand Down Expand Up @@ -130,3 +171,15 @@ func (r testRegistryImpl) AllTests() []registry.TestSpec {
})
return tests
}

// AllOperations returns all the operation specs, sorted by name.
func (r testRegistryImpl) AllOperations() []registry.OperationSpec {
var ops []registry.OperationSpec
for _, t := range r.ops {
ops = append(ops, *t)
}
sort.Slice(ops, func(i, j int) bool {
return ops[i].Name < ops[j].Name
})
return ops
}
4 changes: 4 additions & 0 deletions pkg/cmd/roachtest/tests/restore_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,10 @@ func (m *mockRegistry) Add(spec registry.TestSpec) {
m.testNames = append(m.testNames, spec.Name)
}

func (m *mockRegistry) AddOperation(spec registry.OperationSpec) {
// No-op.
}

func (m *mockRegistry) PromFactory() promauto.Factory {
return promauto.With(nil)
}
Expand Down

0 comments on commit 8ad5029

Please sign in to comment.