Skip to content

Commit

Permalink
Merge #111811
Browse files Browse the repository at this point in the history
111811: spec: move machine type, zone, local ssd defaults out of the TestSpec and the registry r=RaduBerinde a=RaduBerinde

This set of commits makes more progress towards #104029 and - more generally - not baking in any flag configuration into the registry itself.

#### spec: move RoachprodOpts args to separate struct

Epic: none
Release note: None

#### spec: move default machine type from ClusterSpec to RoachprodClusterConfig

This is much more logical and allows the removal of the parameter from
the registry.

Epic: none
Release note: None

#### spec: move default zones to RoachprodClusterConfig

Remove the default zones from `ClusterSpec` (and the registry), and
add it to `RoachprodClusterConfig`.

Epic: none
Release note: None

#### spec: move local SSD preference to RoachprodClusterConfig

We change the boolean in the TestSpec to a tri-state (default, prefer
on, disable). This way we can apply the default when creating the
cluster.

Epic: none
Release note: None

Co-authored-by: Radu Berinde <[email protected]>
  • Loading branch information
craig[bot] and RaduBerinde committed Oct 15, 2023
2 parents 96ab72f + 7abfd67 commit fbbf7ef
Show file tree
Hide file tree
Showing 15 changed files with 160 additions and 113 deletions.
11 changes: 10 additions & 1 deletion pkg/cmd/roachtest/cluster.go
Original file line number Diff line number Diff line change
Expand Up @@ -916,9 +916,18 @@ func (f *clusterFactory) newCluster(
defer setStatus("idle")

providerOptsContainer := vm.CreateProviderOptionsContainer()

params := spec.RoachprodClusterConfig{
UseIOBarrierOnLocalSSD: cfg.useIOBarrier,
PreferredArch: cfg.arch,
}
params.Defaults.MachineType = instanceType
params.Defaults.Zones = zonesF
params.Defaults.PreferLocalSSD = localSSDArg

// The ClusterName is set below in the retry loop to ensure
// that each create attempt gets a unique cluster name.
createVMOpts, providerOpts, err := cfg.spec.RoachprodOpts("", cfg.useIOBarrier, cfg.arch)
createVMOpts, providerOpts, err := cfg.spec.RoachprodOpts(params)
if err != nil {
// We must release the allocation because cluster creation is not possible at this point.
cfg.alloc.Release()
Expand Down
2 changes: 1 addition & 1 deletion pkg/cmd/roachtest/cluster_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ import (
)

func TestClusterNodes(t *testing.T) {
c := &clusterImpl{spec: spec.MakeClusterSpec(spec.GCE, "", 10)}
c := &clusterImpl{spec: spec.MakeClusterSpec(spec.GCE, 10)}
opts := func(opts ...option.Option) []option.Option {
return opts
}
Expand Down
4 changes: 2 additions & 2 deletions pkg/cmd/roachtest/github_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ func TestShouldPost(t *testing.T) {
{false, 1, "token", "master", true, ""},
}

reg := makeTestRegistry(spec.GCE, "", "", false)
reg := makeTestRegistry(spec.GCE)
for _, c := range testCases {
t.Setenv("GITHUB_API_TOKEN", c.envGithubAPIToken)
t.Setenv("TC_BUILD_BRANCH", c.envTcBuildBranch)
Expand Down Expand Up @@ -193,7 +193,7 @@ func TestCreatePostRequest(t *testing.T) {
},
}

reg := makeTestRegistry(spec.GCE, "", "", false)
reg := makeTestRegistry(spec.GCE)
for idx, c := range testCases {
t.Run(fmt.Sprintf("%d", idx+1), func(t *testing.T) {
clusterSpec := reg.MakeClusterSpec(1, spec.Arch(c.arch))
Expand Down
2 changes: 1 addition & 1 deletion pkg/cmd/roachtest/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ Examples:
roachtest list --suite weekly --owner kv
`,
RunE: func(cmd *cobra.Command, args []string) error {
r := makeTestRegistry(cloud, instanceType, zonesF, localSSDArg)
r := makeTestRegistry(cloud)
tests.RegisterTests(&r)

filter, err := makeTestFilter(args)
Expand Down
4 changes: 2 additions & 2 deletions pkg/cmd/roachtest/main_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,15 +30,15 @@ func init() {
}

func makeRegistry(names ...string) testRegistryImpl {
r := makeTestRegistry(spec.GCE, "", "", false /* preferSSD */)
r := makeTestRegistry(spec.GCE)
dummyRun := func(context.Context, test.Test, cluster.Cluster) {}

for _, name := range names {
r.Add(registry.TestSpec{
Name: name,
Owner: OwnerUnitTest,
Run: dummyRun,
Cluster: spec.MakeClusterSpec(spec.GCE, "", 0),
Cluster: spec.MakeClusterSpec(spec.GCE, 0),
CompatibleClouds: registry.AllExceptAWS,
Suites: registry.Suites(registry.Nightly),
})
Expand Down
2 changes: 1 addition & 1 deletion pkg/cmd/roachtest/run.go
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ func addBenchFlags(benchCmd *cobra.Command) {
// runTests is the main function for the run and bench commands.
// Assumes initRunFlagsBinariesAndLibraries was called.
func runTests(register func(registry.Registry), filter *registry.TestFilter) error {
r := makeTestRegistry(cloud, instanceType, zonesF, localSSDArg)
r := makeTestRegistry(cloud)

// actual registering of tests
// TODO: don't register if we can't run on the specified registry cloud
Expand Down
90 changes: 71 additions & 19 deletions pkg/cmd/roachtest/spec/cluster_spec.go
Original file line number Diff line number Diff line change
Expand Up @@ -59,22 +59,31 @@ func (m MemPerCPU) String() string {
return "unknown"
}

// LocalSSDSetting controls whether test cluster nodes use an instance-local SSD
// as storage.
type LocalSSDSetting int

const (
// LocalSSDDefault is the default mode, when the test does not have any
// preference. A local SSD may or may not be used, depending on --local-ssd
// flag and machine type.
LocalSSDDefault LocalSSDSetting = iota

// LocalSSDDisable means that we will never use a local SSD.
LocalSSDDisable

// LocalSSDPreferOn means that we prefer to use a local SSD. It is not a
// guarantee (depending on other constraints on machine type).
LocalSSDPreferOn
)

// ClusterSpec represents a test's description of what its cluster needs to
// look like. It becomes part of a clusterConfig when the cluster is created.
type ClusterSpec struct {
// TODO(#104029): We should remove the Cloud field; the tests now specify
// their compatible clouds.
Cloud string

// TODO(radu): defaultInstanceType is the default machine type used (unless
// overridden by GCE.MachineType or AWS.MachineType); it does not belong in
// the spec.
defaultInstanceType string

// TODO(radu): defaultZones is the default zones specification (unless
// overridden by GCE.Zones or AWS.Zones); it does not belong in the spec.
defaultZones string

Arch vm.CPUArch // CPU architecture; auto-chosen if left empty
NodeCount int
// CPUs is the number of CPUs per node.
Expand All @@ -83,7 +92,7 @@ type ClusterSpec struct {
SSDs int
RAID0 bool
VolumeSize int
PreferLocalSSD bool
LocalSSD LocalSSDSetting
Geo bool
Lifetime time.Duration
ReusePolicy clusterReusePolicy
Expand Down Expand Up @@ -116,8 +125,8 @@ type ClusterSpec struct {
}

// MakeClusterSpec makes a ClusterSpec.
func MakeClusterSpec(cloud string, instanceType string, nodeCount int, opts ...Option) ClusterSpec {
spec := ClusterSpec{Cloud: cloud, defaultInstanceType: instanceType, NodeCount: nodeCount}
func MakeClusterSpec(cloud string, nodeCount int, opts ...Option) ClusterSpec {
spec := ClusterSpec{Cloud: cloud, NodeCount: nodeCount}
defaultOpts := []Option{CPU(4), nodeLifetime(12 * time.Hour), ReuseAny()}
for _, o := range append(defaultOpts, opts...) {
o(&spec)
Expand Down Expand Up @@ -231,16 +240,59 @@ func getAzureOpts(machineType string, zones []string) vm.ProviderOpts {
return opts
}

// RoachprodClusterConfig contains general roachprod cluster configuration that
// does not depend on the test. It is used in conjunction with ClusterSpec to
// determine the final configuration.
type RoachprodClusterConfig struct {
// UseIOBarrierOnLocalSSD is set if we don't want to mount local SSDs with the
// `-o nobarrier` flag.
UseIOBarrierOnLocalSSD bool

// PreferredArch is the preferred CPU architecture; it is not guaranteed
// (depending on cloud and on other requirements on machine type).
PreferredArch vm.CPUArch

// Defaults contains configuration values that are used when the ClusterSpec
// does not specify the corresponding option.
Defaults struct {
// MachineType, if set, is the default machine type (used unless the
// ClusterSpec specifies a machine type for the current cloud).
//
// If it is not set (and the ClusterSpec doesn't specify one either), a
// machine type is determined automatically.
MachineType string

// Zones, if set, is the default zone configuration (unless the test
// specifies a zone configuration for the current cloud).
Zones string

// PreferLocalSSD is the default local SSD mode (unless the test specifies a
// preference). If true, we try to use a local SSD if allowed by the machine
// type. If false, we never use a local SSD.
PreferLocalSSD bool
}
}

// RoachprodOpts returns the opts to use when calling `roachprod.Create()`
// in order to create the cluster described in the spec.
func (s *ClusterSpec) RoachprodOpts(
clusterName string, useIOBarrier bool, arch vm.CPUArch,
params RoachprodClusterConfig,
) (vm.CreateOpts, vm.ProviderOpts, error) {
useIOBarrier := params.UseIOBarrierOnLocalSSD
arch := params.PreferredArch

preferLocalSSD := params.Defaults.PreferLocalSSD
switch s.LocalSSD {
case LocalSSDDisable:
preferLocalSSD = false
case LocalSSDPreferOn:
preferLocalSSD = true
}

createVMOpts := vm.DefaultCreateOpts()
// N.B. We set "usage=roachtest" as the default, custom label for billing tracking.
createVMOpts.CustomLabels = map[string]string{"usage": "roachtest"}
createVMOpts.ClusterName = clusterName
createVMOpts.ClusterName = "" // Will be set later.
if s.Lifetime != 0 {
createVMOpts.Lifetime = s.Lifetime
}
Expand Down Expand Up @@ -270,7 +322,7 @@ func (s *ClusterSpec) RoachprodOpts(
createVMOpts.Arch = string(arch)
ssdCount := s.SSDs

machineType := s.defaultInstanceType
machineType := params.Defaults.MachineType
switch s.Cloud {
case AWS:
if s.AWS.MachineType != "" {
Expand All @@ -293,11 +345,11 @@ func (s *ClusterSpec) RoachprodOpts(
var err error
switch s.Cloud {
case AWS:
machineType, selectedArch, err = SelectAWSMachineType(s.CPUs, s.Mem, s.PreferLocalSSD && s.VolumeSize == 0, arch)
machineType, selectedArch, err = SelectAWSMachineType(s.CPUs, s.Mem, preferLocalSSD && s.VolumeSize == 0, arch)
case GCE:
machineType, selectedArch = SelectGCEMachineType(s.CPUs, s.Mem, arch)
case Azure:
machineType, err = SelectAzureMachineType(s.CPUs, s.Mem, s.PreferLocalSSD)
machineType, err = SelectAzureMachineType(s.CPUs, s.Mem, preferLocalSSD)
}

if err != nil {
Expand All @@ -316,7 +368,7 @@ func (s *ClusterSpec) RoachprodOpts(
// - if no particular volume size is requested, and,
// - on AWS, if the machine type supports it.
// - on GCE, if the machine type is not ARM64.
if s.PreferLocalSSD && s.VolumeSize == 0 && (s.Cloud != AWS || awsMachineSupportsSSD(machineType)) &&
if preferLocalSSD && s.VolumeSize == 0 && (s.Cloud != AWS || awsMachineSupportsSSD(machineType)) &&
(s.Cloud != GCE || selectedArch != vm.ArchARM64) {
// Ensure SSD count is at least 1 if UseLocalSSD is true.
if ssdCount == 0 {
Expand All @@ -343,7 +395,7 @@ func (s *ClusterSpec) RoachprodOpts(
}
}

zonesStr := s.defaultZones
zonesStr := params.Defaults.Zones
switch s.Cloud {
case AWS:
if s.AWS.Zones != "" {
Expand Down
27 changes: 17 additions & 10 deletions pkg/cmd/roachtest/spec/option.go
Original file line number Diff line number Diff line change
Expand Up @@ -80,13 +80,6 @@ func Geo() Option {
}
}

// DefaultZones sets the default zones (set with the --zones flag).
func DefaultZones(zones string) Option {
return func(spec *ClusterSpec) {
spec.defaultZones = zones
}
}

func nodeLifetime(lifetime time.Duration) Option {
return func(spec *ClusterSpec) {
spec.Lifetime = lifetime
Expand Down Expand Up @@ -164,10 +157,24 @@ func ReuseTagged(tag string) Option {
}
}

// PreferLocalSSD specifies whether to prefer using local SSD, when possible.
func PreferLocalSSD(prefer bool) Option {
// PreferLocalSSD specifies that we use instance-local SSDs whenever possible
// (depending on other constraints on machine type).
//
// By default, a test cluster may or may not use a local SSD depending on
// --local-ssd flag and machine type.
func PreferLocalSSD() Option {
return func(spec *ClusterSpec) {
spec.LocalSSD = LocalSSDPreferOn
}
}

// DisableLocalSSD specifies that we never use instance-local SSDs.
//
// By default, a test cluster may or may not use a local SSD depending on
// --local-ssd flag and machine type.
func DisableLocalSSD() Option {
return func(spec *ClusterSpec) {
spec.PreferLocalSSD = prefer
spec.LocalSSD = LocalSSDDisable
}
}

Expand Down
22 changes: 2 additions & 20 deletions pkg/cmd/roachtest/test_registry.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,6 @@ import (
type testRegistryImpl struct {
m map[string]*registry.TestSpec
cloud string
instanceType string // optional
zones string
preferSSD bool
snapshotPrefixes map[string]struct{}

promRegistry *prometheus.Registry
Expand All @@ -38,14 +35,9 @@ type testRegistryImpl struct {
var _ registry.Registry = (*testRegistryImpl)(nil)

// makeTestRegistry constructs a testRegistryImpl and configures it with opts.
func makeTestRegistry(
cloud string, instanceType string, zones string, preferSSD bool,
) testRegistryImpl {
func makeTestRegistry(cloud string) testRegistryImpl {
return testRegistryImpl{
cloud: cloud,
instanceType: instanceType,
zones: zones,
preferSSD: preferSSD,
m: make(map[string]*registry.TestSpec),
snapshotPrefixes: make(map[string]struct{}),
promRegistry: prometheus.NewRegistry(),
Expand Down Expand Up @@ -79,17 +71,7 @@ func (r *testRegistryImpl) Add(spec registry.TestSpec) {
// MakeClusterSpec makes a cluster spec. It should be used over `spec.MakeClusterSpec`
// because this method also adds options baked into the registry.
func (r *testRegistryImpl) MakeClusterSpec(nodeCount int, opts ...spec.Option) spec.ClusterSpec {
// NB: we need to make sure that `opts` is appended at the end, so that it
// overrides the SSD and zones settings from the registry.
var finalOpts []spec.Option
if r.preferSSD {
finalOpts = append(finalOpts, spec.PreferLocalSSD(true))
}
if r.zones != "" {
finalOpts = append(finalOpts, spec.DefaultZones(r.zones))
}
finalOpts = append(finalOpts, opts...)
return spec.MakeClusterSpec(r.cloud, r.instanceType, nodeCount, finalOpts...)
return spec.MakeClusterSpec(r.cloud, nodeCount, opts...)
}

const testNameRE = "^[a-zA-Z0-9-_=/,]+$"
Expand Down
Loading

0 comments on commit fbbf7ef

Please sign in to comment.