Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

release-23.1: roachtest: stop using ClusterSpec.Cloud in test code #115389

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions pkg/cmd/roachtest/cluster_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -320,15 +320,15 @@ func TestAWSMachineTypeNew(t *testing.T) {

for _, tc := range testCases {
t.Run(fmt.Sprintf("%d/%s/%t/%s", tc.cpus, tc.mem, tc.localSSD, tc.arch), func(t *testing.T) {
machineType, selectedArch := spec.AWSMachineTypeNew(tc.cpus, tc.mem, tc.localSSD, tc.arch)
machineType, selectedArch := spec.SelectAWSMachineTypeNew(tc.cpus, tc.mem, tc.localSSD, tc.arch)

require.Equal(t, tc.expectedMachineType, machineType)
require.Equal(t, tc.expectedArch, selectedArch)
})
}
// spec.Low is not supported.
require.Panics(t, func() { spec.AWSMachineTypeNew(4, spec.Low, false, vm.ArchAMD64) })
require.Panics(t, func() { spec.AWSMachineTypeNew(16, spec.Low, false, vm.ArchARM64) })
require.Panics(t, func() { spec.SelectAWSMachineTypeNew(4, spec.Low, false, vm.ArchAMD64) })
require.Panics(t, func() { spec.SelectAWSMachineTypeNew(16, spec.Low, false, vm.ArchARM64) })
}

// TODO(srosenberg): restore the change in https://github.com/cockroachdb/cockroach/pull/111140 after 23.2 branch cut.
Expand Down Expand Up @@ -415,7 +415,7 @@ func TestGCEMachineTypeNew(t *testing.T) {

for _, tc := range testCases {
t.Run(fmt.Sprintf("%d/%s/%s", tc.cpus, tc.mem, tc.arch), func(t *testing.T) {
machineType, selectedArch := spec.GCEMachineTypeNew(tc.cpus, tc.mem, tc.arch)
machineType, selectedArch := spec.SelectGCEMachineTypeNew(tc.cpus, tc.mem, tc.arch)

require.Equal(t, tc.expectedMachineType, machineType)
require.Equal(t, tc.expectedArch, selectedArch)
Expand Down
96 changes: 68 additions & 28 deletions pkg/cmd/roachtest/spec/cluster_spec.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,19 +65,25 @@ type ClusterSpec struct {
// TODO(#104029): We should remove the Cloud field; the tests now specify
// their compatible clouds.
Cloud string
Arch vm.CPUArch // CPU architecture; auto-chosen if left empty
// TODO(radu): An InstanceType can only make sense in the context of a
// specific cloud. We should replace this with cloud-specific arguments.
InstanceType string // auto-chosen if left empty
NodeCount int

// TODO(radu): defaultInstanceType is the default machine type used (unless
// overridden by GCE.MachineType or AWS.MachineType); it does not belong in
// the spec.
defaultInstanceType string

// TODO(radu): defaultZones is the default zones specification (unless
// overridden by GCE.Zones or AWS.Zones); it does not belong in the spec.
defaultZones string

Arch vm.CPUArch // CPU architecture; auto-chosen if left empty
NodeCount int
// CPUs is the number of CPUs per node.
CPUs int
Mem MemPerCPU
SSDs int
RAID0 bool
VolumeSize int
PreferLocalSSD bool
Zones string
Geo bool
Lifetime time.Duration
ReusePolicy clusterReusePolicy
Expand All @@ -92,18 +98,29 @@ type ClusterSpec struct {

GatherCores bool

// AWS-specific arguments.
//
// AWSVolumeThroughput is the min provisioned EBS volume throughput.
AWSVolumeThroughput int
// GCE-specific arguments. These values apply only on clusters instantiated on GCE.
GCE struct {
MachineType string
MinCPUPlatform string
VolumeType string
Zones string
}

// AWS-specific arguments. These values apply only on clusters instantiated on AWS.
AWS struct {
MachineType string
// VolumeThroughput is the min provisioned EBS volume throughput.
VolumeThroughput int
Zones string
}
}

// MakeClusterSpec makes a ClusterSpec.
func MakeClusterSpec(cloud string, instanceType string, nodeCount int, opts ...Option) ClusterSpec {
spec := ClusterSpec{Cloud: cloud, InstanceType: instanceType, NodeCount: nodeCount}
defaultOpts := []Option{CPU(4), nodeLifetimeOption(12 * time.Hour), ReuseAny()}
spec := ClusterSpec{Cloud: cloud, defaultInstanceType: instanceType, NodeCount: nodeCount}
defaultOpts := []Option{CPU(4), nodeLifetime(12 * time.Hour), ReuseAny()}
for _, o := range append(defaultOpts, opts...) {
o.apply(&spec)
o(&spec)
}
return spec
}
Expand Down Expand Up @@ -247,31 +264,42 @@ func (s *ClusterSpec) RoachprodOpts(

createVMOpts.GeoDistributed = s.Geo
createVMOpts.Arch = string(arch)
machineType := s.InstanceType
ssdCount := s.SSDs

machineType := s.defaultInstanceType
switch s.Cloud {
case AWS:
if s.AWS.MachineType != "" {
machineType = s.AWS.MachineType
}
case GCE:
if s.GCE.MachineType != "" {
machineType = s.GCE.MachineType
}
}

if s.CPUs != 0 {
// Default to the user-supplied machine type, if any.
// Otherwise, pick based on requested CPU count.
var selectedArch vm.CPUArch

if len(machineType) == 0 {
if machineType == "" {
// If no machine type was specified, choose one
// based on the cloud and CPU count.
switch s.Cloud {
case AWS:
machineType, selectedArch = AWSMachineType(s.CPUs, s.Mem, s.PreferLocalSSD && s.VolumeSize == 0, arch)
machineType, selectedArch = SelectAWSMachineType(s.CPUs, s.Mem, s.PreferLocalSSD && s.VolumeSize == 0, arch)
case GCE:
machineType, selectedArch = GCEMachineType(s.CPUs, s.Mem, arch)
machineType, selectedArch = SelectGCEMachineType(s.CPUs, s.Mem, arch)
case Azure:
machineType = AzureMachineType(s.CPUs, s.Mem)
machineType = SelectAzureMachineType(s.CPUs, s.Mem)
}
if selectedArch != "" && selectedArch != arch {
// TODO(srosenberg): we need a better way to monitor the rate of this mismatch, i.e.,
// other than grepping cluster creation logs.
fmt.Printf("WARN: requested arch %s for machineType %s, but selected %s\n", arch, machineType, selectedArch)
createVMOpts.Arch = string(selectedArch)
}
}
if selectedArch != "" && selectedArch != arch {
// TODO(srosenberg): we need a better way to monitor the rate of this mismatch, i.e.,
// other than grepping cluster creation logs.
fmt.Printf("WARN: requested arch %s for machineType %s, but selected %s\n", arch, machineType, selectedArch)
createVMOpts.Arch = string(selectedArch)
}

// Local SSD can only be requested
Expand Down Expand Up @@ -305,9 +333,21 @@ func (s *ClusterSpec) RoachprodOpts(
createVMOpts.SSDOpts.FileSystem = vm.Zfs
}
}

zonesStr := s.defaultZones
switch s.Cloud {
case AWS:
if s.AWS.Zones != "" {
zonesStr = s.AWS.Zones
}
case GCE:
if s.GCE.Zones != "" {
zonesStr = s.GCE.Zones
}
}
var zones []string
if s.Zones != "" {
zones = strings.Split(s.Zones, ",")
if zonesStr != "" {
zones = strings.Split(zonesStr, ",")
if !s.Geo {
zones = zones[:1]
}
Expand All @@ -321,12 +361,12 @@ func (s *ClusterSpec) RoachprodOpts(
var providerOpts vm.ProviderOpts
switch s.Cloud {
case AWS:
providerOpts = getAWSOpts(machineType, zones, s.VolumeSize, s.AWSVolumeThroughput,
providerOpts = getAWSOpts(machineType, zones, s.VolumeSize, s.AWS.VolumeThroughput,
createVMOpts.SSDOpts.UseLocalSSD)
case GCE:
providerOpts = getGCEOpts(machineType, zones, s.VolumeSize, ssdCount,
createVMOpts.SSDOpts.UseLocalSSD, s.RAID0, s.TerminateOnMigration,
"" /* minCPUPlatform */, vm.ParseArch(createVMOpts.Arch),
s.GCE.MinCPUPlatform, vm.ParseArch(createVMOpts.Arch),
)
case Azure:
providerOpts = getAzureOpts(machineType, zones)
Expand Down
40 changes: 22 additions & 18 deletions pkg/cmd/roachtest/spec/machine_type.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,20 +17,21 @@ import (
)

// TODO(srosenberg): restore the change in https://github.com/cockroachdb/cockroach/pull/111140 after 23.2 branch cut.
func AWSMachineType(
func SelectAWSMachineType(
cpus int, mem MemPerCPU, shouldSupportLocalSSD bool, arch vm.CPUArch,
) (string, vm.CPUArch) {
return AWSMachineTypeOld(cpus, mem, arch)
return SelectAWSMachineTypeOld(cpus, mem, arch)
}

// TODO(srosenberg): restore the change in https://github.com/cockroachdb/cockroach/pull/111140 after 23.2 branch cut.
func GCEMachineType(cpus int, mem MemPerCPU, arch vm.CPUArch) (string, vm.CPUArch) {
return GCEMachineTypeOld(cpus, mem, arch)
func SelectGCEMachineType(cpus int, mem MemPerCPU, arch vm.CPUArch) (string, vm.CPUArch) {
return SelectGCEMachineTypeOld(cpus, mem, arch)
}

// AWSMachineType selects a machine type given the desired number of CPUs and
// memory per CPU ratio. Also returns the architecture of the selected machine type.
func AWSMachineTypeOld(cpus int, mem MemPerCPU, arch vm.CPUArch) (string, vm.CPUArch) {
// SelectAWSMachineType selects a machine type given the desired number of CPUs
// and memory per CPU ratio. Also returns the architecture of the selected
// machine type.
func SelectAWSMachineTypeOld(cpus int, mem MemPerCPU, arch vm.CPUArch) (string, vm.CPUArch) {
// TODO(erikgrinaker): These have significantly less RAM than
// their GCE counterparts. Consider harmonizing them.
family := "c6id" // 2 GB RAM per CPU
Expand Down Expand Up @@ -87,8 +88,9 @@ func AWSMachineTypeOld(cpus int, mem MemPerCPU, arch vm.CPUArch) (string, vm.CPU
return fmt.Sprintf("%s.%s", family, size), selectedArch
}

// AWSMachineType selects a machine type given the desired number of CPUs, memory per CPU,
// support for locally-attached SSDs and CPU architecture. It returns a compatible machine type and its architecture.
// SelectAWSMachineType selects a machine type given the desired number of CPUs,
// memory per CPU, support for locally-attached SSDs and CPU architecture. It
// returns a compatible machine type and its architecture.
//
// When MemPerCPU is Standard, the memory per CPU ratio is 4 GB. For High, it is 8 GB.
// For Auto, it's 4 GB up to and including 16 CPUs, then 2 GB. Low is not supported.
Expand All @@ -99,7 +101,7 @@ func AWSMachineTypeOld(cpus int, mem MemPerCPU, arch vm.CPUArch) (string, vm.CPU
//
// At the time of writing, the intel machines are all third-generation Xeon, "Ice Lake" which are isomorphic to
// GCE's n2-(standard|highmem|custom) _with_ --minimum-cpu-platform="Intel Ice Lake" (roachprod's default).
func AWSMachineTypeNew(
func SelectAWSMachineTypeNew(
cpus int, mem MemPerCPU, shouldSupportLocalSSD bool, arch vm.CPUArch,
) (string, vm.CPUArch) {
family := "m6i" // 4 GB RAM per CPU
Expand Down Expand Up @@ -176,9 +178,10 @@ func AWSMachineTypeNew(
return fmt.Sprintf("%s.%s", family, size), selectedArch
}

// GCEMachineType selects a machine type given the desired number of CPUs and
// memory per CPU ratio. Also returns the architecture of the selected machine type.
func GCEMachineTypeOld(cpus int, mem MemPerCPU, arch vm.CPUArch) (string, vm.CPUArch) {
// SelectGCEMachineType selects a machine type given the desired number of CPUs
// and memory per CPU ratio. Also returns the architecture of the selected
// machine type.
func SelectGCEMachineTypeOld(cpus int, mem MemPerCPU, arch vm.CPUArch) (string, vm.CPUArch) {
// TODO(peter): This is awkward: at or below 16 cpus, use n2-standard so that
// the machines have a decent amount of RAM. We could use custom machine
// configurations, but the rules for the amount of RAM per CPU need to be
Expand Down Expand Up @@ -215,8 +218,9 @@ func GCEMachineTypeOld(cpus int, mem MemPerCPU, arch vm.CPUArch) (string, vm.CPU
return fmt.Sprintf("%s-%s-%d", series, kind, cpus), selectedArch
}

// GCEMachineType selects a machine type given the desired number of CPUs, memory per CPU, and CPU architecture.
// It returns a compatible machine type and its architecture.
// SelectGCEMachineType selects a machine type given the desired number of CPUs,
// memory per CPU, and CPU architecture. It returns a compatible machine type
// and its architecture.
//
// When MemPerCPU is Standard, the memory per CPU ratio is 4 GB. For High, it is 8 GB.
// For Auto, it's 4 GB up to and including 16 CPUs, then 2 GB. Low is 1 GB.
Expand All @@ -228,7 +232,7 @@ func GCEMachineTypeOld(cpus int, mem MemPerCPU, arch vm.CPUArch) (string, vm.CPU
// At the time of writing, the intel machines are all third-generation xeon, "Ice Lake" assuming
// --minimum-cpu-platform="Intel Ice Lake" (roachprod's default). This is isomorphic to AWS's m6i or c6i.
// The only exception is low memory machines (n2-highcpu-xxx), which aren't available in AWS.
func GCEMachineTypeNew(cpus int, mem MemPerCPU, arch vm.CPUArch) (string, vm.CPUArch) {
func SelectGCEMachineTypeNew(cpus int, mem MemPerCPU, arch vm.CPUArch) (string, vm.CPUArch) {
series := "n2"
selectedArch := vm.ArchAMD64

Expand Down Expand Up @@ -286,9 +290,9 @@ func GCEMachineTypeNew(cpus int, mem MemPerCPU, arch vm.CPUArch) (string, vm.CPU
return fmt.Sprintf("%s-%s-%d", series, kind, cpus), selectedArch
}

// AzureMachineType selects a machine type given the desired number of CPUs and
// SelectAzureMachineType selects a machine type given the desired number of CPUs and
// memory per CPU ratio.
func AzureMachineType(cpus int, mem MemPerCPU) string {
func SelectAzureMachineType(cpus int, mem MemPerCPU) string {
if mem != Auto && mem != Standard {
panic(fmt.Sprintf("custom memory per CPU not implemented for Azure, memory ratio requested: %d", mem))
}
Expand Down
Loading