Skip to content

Commit

Permalink
Merge pull request #115389 from RaduBerinde/backport23.1-111324
Browse files Browse the repository at this point in the history
release-23.1: roachtest: stop using ClusterSpec.Cloud in test code
  • Loading branch information
RaduBerinde authored Dec 5, 2023
2 parents 1bfb695 + 31718a3 commit 4ddd721
Show file tree
Hide file tree
Showing 31 changed files with 312 additions and 280 deletions.
8 changes: 4 additions & 4 deletions pkg/cmd/roachtest/cluster_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -320,15 +320,15 @@ func TestAWSMachineTypeNew(t *testing.T) {

for _, tc := range testCases {
t.Run(fmt.Sprintf("%d/%s/%t/%s", tc.cpus, tc.mem, tc.localSSD, tc.arch), func(t *testing.T) {
machineType, selectedArch := spec.AWSMachineTypeNew(tc.cpus, tc.mem, tc.localSSD, tc.arch)
machineType, selectedArch := spec.SelectAWSMachineTypeNew(tc.cpus, tc.mem, tc.localSSD, tc.arch)

require.Equal(t, tc.expectedMachineType, machineType)
require.Equal(t, tc.expectedArch, selectedArch)
})
}
// spec.Low is not supported.
require.Panics(t, func() { spec.AWSMachineTypeNew(4, spec.Low, false, vm.ArchAMD64) })
require.Panics(t, func() { spec.AWSMachineTypeNew(16, spec.Low, false, vm.ArchARM64) })
require.Panics(t, func() { spec.SelectAWSMachineTypeNew(4, spec.Low, false, vm.ArchAMD64) })
require.Panics(t, func() { spec.SelectAWSMachineTypeNew(16, spec.Low, false, vm.ArchARM64) })
}

// TODO(srosenberg): restore the change in https://github.com/cockroachdb/cockroach/pull/111140 after 23.2 branch cut.
Expand Down Expand Up @@ -415,7 +415,7 @@ func TestGCEMachineTypeNew(t *testing.T) {

for _, tc := range testCases {
t.Run(fmt.Sprintf("%d/%s/%s", tc.cpus, tc.mem, tc.arch), func(t *testing.T) {
machineType, selectedArch := spec.GCEMachineTypeNew(tc.cpus, tc.mem, tc.arch)
machineType, selectedArch := spec.SelectGCEMachineTypeNew(tc.cpus, tc.mem, tc.arch)

require.Equal(t, tc.expectedMachineType, machineType)
require.Equal(t, tc.expectedArch, selectedArch)
Expand Down
96 changes: 68 additions & 28 deletions pkg/cmd/roachtest/spec/cluster_spec.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,19 +65,25 @@ type ClusterSpec struct {
// TODO(#104029): We should remove the Cloud field; the tests now specify
// their compatible clouds.
Cloud string
Arch vm.CPUArch // CPU architecture; auto-chosen if left empty
// TODO(radu): An InstanceType can only make sense in the context of a
// specific cloud. We should replace this with cloud-specific arguments.
InstanceType string // auto-chosen if left empty
NodeCount int

// TODO(radu): defaultInstanceType is the default machine type used (unless
// overridden by GCE.MachineType or AWS.MachineType); it does not belong in
// the spec.
defaultInstanceType string

// TODO(radu): defaultZones is the default zones specification (unless
// overridden by GCE.Zones or AWS.Zones); it does not belong in the spec.
defaultZones string

Arch vm.CPUArch // CPU architecture; auto-chosen if left empty
NodeCount int
// CPUs is the number of CPUs per node.
CPUs int
Mem MemPerCPU
SSDs int
RAID0 bool
VolumeSize int
PreferLocalSSD bool
Zones string
Geo bool
Lifetime time.Duration
ReusePolicy clusterReusePolicy
Expand All @@ -92,18 +98,29 @@ type ClusterSpec struct {

GatherCores bool

// AWS-specific arguments.
//
// AWSVolumeThroughput is the min provisioned EBS volume throughput.
AWSVolumeThroughput int
// GCE-specific arguments. These values apply only on clusters instantiated on GCE.
GCE struct {
MachineType string
MinCPUPlatform string
VolumeType string
Zones string
}

// AWS-specific arguments. These values apply only on clusters instantiated on AWS.
AWS struct {
MachineType string
// VolumeThroughput is the min provisioned EBS volume throughput.
VolumeThroughput int
Zones string
}
}

// MakeClusterSpec makes a ClusterSpec.
func MakeClusterSpec(cloud string, instanceType string, nodeCount int, opts ...Option) ClusterSpec {
spec := ClusterSpec{Cloud: cloud, InstanceType: instanceType, NodeCount: nodeCount}
defaultOpts := []Option{CPU(4), nodeLifetimeOption(12 * time.Hour), ReuseAny()}
spec := ClusterSpec{Cloud: cloud, defaultInstanceType: instanceType, NodeCount: nodeCount}
defaultOpts := []Option{CPU(4), nodeLifetime(12 * time.Hour), ReuseAny()}
for _, o := range append(defaultOpts, opts...) {
o.apply(&spec)
o(&spec)
}
return spec
}
Expand Down Expand Up @@ -247,31 +264,42 @@ func (s *ClusterSpec) RoachprodOpts(

createVMOpts.GeoDistributed = s.Geo
createVMOpts.Arch = string(arch)
machineType := s.InstanceType
ssdCount := s.SSDs

machineType := s.defaultInstanceType
switch s.Cloud {
case AWS:
if s.AWS.MachineType != "" {
machineType = s.AWS.MachineType
}
case GCE:
if s.GCE.MachineType != "" {
machineType = s.GCE.MachineType
}
}

if s.CPUs != 0 {
// Default to the user-supplied machine type, if any.
// Otherwise, pick based on requested CPU count.
var selectedArch vm.CPUArch

if len(machineType) == 0 {
if machineType == "" {
// If no machine type was specified, choose one
// based on the cloud and CPU count.
switch s.Cloud {
case AWS:
machineType, selectedArch = AWSMachineType(s.CPUs, s.Mem, s.PreferLocalSSD && s.VolumeSize == 0, arch)
machineType, selectedArch = SelectAWSMachineType(s.CPUs, s.Mem, s.PreferLocalSSD && s.VolumeSize == 0, arch)
case GCE:
machineType, selectedArch = GCEMachineType(s.CPUs, s.Mem, arch)
machineType, selectedArch = SelectGCEMachineType(s.CPUs, s.Mem, arch)
case Azure:
machineType = AzureMachineType(s.CPUs, s.Mem)
machineType = SelectAzureMachineType(s.CPUs, s.Mem)
}
if selectedArch != "" && selectedArch != arch {
// TODO(srosenberg): we need a better way to monitor the rate of this mismatch, i.e.,
// other than grepping cluster creation logs.
fmt.Printf("WARN: requested arch %s for machineType %s, but selected %s\n", arch, machineType, selectedArch)
createVMOpts.Arch = string(selectedArch)
}
}
if selectedArch != "" && selectedArch != arch {
// TODO(srosenberg): we need a better way to monitor the rate of this mismatch, i.e.,
// other than grepping cluster creation logs.
fmt.Printf("WARN: requested arch %s for machineType %s, but selected %s\n", arch, machineType, selectedArch)
createVMOpts.Arch = string(selectedArch)
}

// Local SSD can only be requested
Expand Down Expand Up @@ -305,9 +333,21 @@ func (s *ClusterSpec) RoachprodOpts(
createVMOpts.SSDOpts.FileSystem = vm.Zfs
}
}

zonesStr := s.defaultZones
switch s.Cloud {
case AWS:
if s.AWS.Zones != "" {
zonesStr = s.AWS.Zones
}
case GCE:
if s.GCE.Zones != "" {
zonesStr = s.GCE.Zones
}
}
var zones []string
if s.Zones != "" {
zones = strings.Split(s.Zones, ",")
if zonesStr != "" {
zones = strings.Split(zonesStr, ",")
if !s.Geo {
zones = zones[:1]
}
Expand All @@ -321,12 +361,12 @@ func (s *ClusterSpec) RoachprodOpts(
var providerOpts vm.ProviderOpts
switch s.Cloud {
case AWS:
providerOpts = getAWSOpts(machineType, zones, s.VolumeSize, s.AWSVolumeThroughput,
providerOpts = getAWSOpts(machineType, zones, s.VolumeSize, s.AWS.VolumeThroughput,
createVMOpts.SSDOpts.UseLocalSSD)
case GCE:
providerOpts = getGCEOpts(machineType, zones, s.VolumeSize, ssdCount,
createVMOpts.SSDOpts.UseLocalSSD, s.RAID0, s.TerminateOnMigration,
"" /* minCPUPlatform */, vm.ParseArch(createVMOpts.Arch),
s.GCE.MinCPUPlatform, vm.ParseArch(createVMOpts.Arch),
)
case Azure:
providerOpts = getAzureOpts(machineType, zones)
Expand Down
40 changes: 22 additions & 18 deletions pkg/cmd/roachtest/spec/machine_type.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,20 +17,21 @@ import (
)

// TODO(srosenberg): restore the change in https://github.com/cockroachdb/cockroach/pull/111140 after 23.2 branch cut.
func AWSMachineType(
func SelectAWSMachineType(
cpus int, mem MemPerCPU, shouldSupportLocalSSD bool, arch vm.CPUArch,
) (string, vm.CPUArch) {
return AWSMachineTypeOld(cpus, mem, arch)
return SelectAWSMachineTypeOld(cpus, mem, arch)
}

// TODO(srosenberg): restore the change in https://github.com/cockroachdb/cockroach/pull/111140 after 23.2 branch cut.
func GCEMachineType(cpus int, mem MemPerCPU, arch vm.CPUArch) (string, vm.CPUArch) {
return GCEMachineTypeOld(cpus, mem, arch)
func SelectGCEMachineType(cpus int, mem MemPerCPU, arch vm.CPUArch) (string, vm.CPUArch) {
return SelectGCEMachineTypeOld(cpus, mem, arch)
}

// AWSMachineType selects a machine type given the desired number of CPUs and
// memory per CPU ratio. Also returns the architecture of the selected machine type.
func AWSMachineTypeOld(cpus int, mem MemPerCPU, arch vm.CPUArch) (string, vm.CPUArch) {
// SelectAWSMachineType selects a machine type given the desired number of CPUs
// and memory per CPU ratio. Also returns the architecture of the selected
// machine type.
func SelectAWSMachineTypeOld(cpus int, mem MemPerCPU, arch vm.CPUArch) (string, vm.CPUArch) {
// TODO(erikgrinaker): These have significantly less RAM than
// their GCE counterparts. Consider harmonizing them.
family := "c6id" // 2 GB RAM per CPU
Expand Down Expand Up @@ -87,8 +88,9 @@ func AWSMachineTypeOld(cpus int, mem MemPerCPU, arch vm.CPUArch) (string, vm.CPU
return fmt.Sprintf("%s.%s", family, size), selectedArch
}

// AWSMachineType selects a machine type given the desired number of CPUs, memory per CPU,
// support for locally-attached SSDs and CPU architecture. It returns a compatible machine type and its architecture.
// SelectAWSMachineType selects a machine type given the desired number of CPUs,
// memory per CPU, support for locally-attached SSDs and CPU architecture. It
// returns a compatible machine type and its architecture.
//
// When MemPerCPU is Standard, the memory per CPU ratio is 4 GB. For High, it is 8 GB.
// For Auto, it's 4 GB up to and including 16 CPUs, then 2 GB. Low is not supported.
Expand All @@ -99,7 +101,7 @@ func AWSMachineTypeOld(cpus int, mem MemPerCPU, arch vm.CPUArch) (string, vm.CPU
//
// At the time of writing, the intel machines are all third-generation Xeon, "Ice Lake" which are isomorphic to
// GCE's n2-(standard|highmem|custom) _with_ --minimum-cpu-platform="Intel Ice Lake" (roachprod's default).
func AWSMachineTypeNew(
func SelectAWSMachineTypeNew(
cpus int, mem MemPerCPU, shouldSupportLocalSSD bool, arch vm.CPUArch,
) (string, vm.CPUArch) {
family := "m6i" // 4 GB RAM per CPU
Expand Down Expand Up @@ -176,9 +178,10 @@ func AWSMachineTypeNew(
return fmt.Sprintf("%s.%s", family, size), selectedArch
}

// GCEMachineType selects a machine type given the desired number of CPUs and
// memory per CPU ratio. Also returns the architecture of the selected machine type.
func GCEMachineTypeOld(cpus int, mem MemPerCPU, arch vm.CPUArch) (string, vm.CPUArch) {
// SelectGCEMachineType selects a machine type given the desired number of CPUs
// and memory per CPU ratio. Also returns the architecture of the selected
// machine type.
func SelectGCEMachineTypeOld(cpus int, mem MemPerCPU, arch vm.CPUArch) (string, vm.CPUArch) {
// TODO(peter): This is awkward: at or below 16 cpus, use n2-standard so that
// the machines have a decent amount of RAM. We could use custom machine
// configurations, but the rules for the amount of RAM per CPU need to be
Expand Down Expand Up @@ -215,8 +218,9 @@ func GCEMachineTypeOld(cpus int, mem MemPerCPU, arch vm.CPUArch) (string, vm.CPU
return fmt.Sprintf("%s-%s-%d", series, kind, cpus), selectedArch
}

// GCEMachineType selects a machine type given the desired number of CPUs, memory per CPU, and CPU architecture.
// It returns a compatible machine type and its architecture.
// SelectGCEMachineType selects a machine type given the desired number of CPUs,
// memory per CPU, and CPU architecture. It returns a compatible machine type
// and its architecture.
//
// When MemPerCPU is Standard, the memory per CPU ratio is 4 GB. For High, it is 8 GB.
// For Auto, it's 4 GB up to and including 16 CPUs, then 2 GB. Low is 1 GB.
Expand All @@ -228,7 +232,7 @@ func GCEMachineTypeOld(cpus int, mem MemPerCPU, arch vm.CPUArch) (string, vm.CPU
// At the time of writing, the intel machines are all third-generation xeon, "Ice Lake" assuming
// --minimum-cpu-platform="Intel Ice Lake" (roachprod's default). This is isomorphic to AWS's m6i or c6i.
// The only exception is low memory machines (n2-highcpu-xxx), which aren't available in AWS.
func GCEMachineTypeNew(cpus int, mem MemPerCPU, arch vm.CPUArch) (string, vm.CPUArch) {
func SelectGCEMachineTypeNew(cpus int, mem MemPerCPU, arch vm.CPUArch) (string, vm.CPUArch) {
series := "n2"
selectedArch := vm.ArchAMD64

Expand Down Expand Up @@ -286,9 +290,9 @@ func GCEMachineTypeNew(cpus int, mem MemPerCPU, arch vm.CPUArch) (string, vm.CPU
return fmt.Sprintf("%s-%s-%d", series, kind, cpus), selectedArch
}

// AzureMachineType selects a machine type given the desired number of CPUs and
// SelectAzureMachineType selects a machine type given the desired number of CPUs and
// memory per CPU ratio.
func AzureMachineType(cpus int, mem MemPerCPU) string {
func SelectAzureMachineType(cpus int, mem MemPerCPU) string {
if mem != Auto && mem != Standard {
panic(fmt.Sprintf("custom memory per CPU not implemented for Azure, memory ratio requested: %d", mem))
}
Expand Down
Loading

0 comments on commit 4ddd721

Please sign in to comment.