From a5be823456a445517be6a9f7721ad475e4dbf45c Mon Sep 17 00:00:00 2001 From: Radu Berinde Date: Thu, 28 Sep 2023 06:00:28 -0700 Subject: [PATCH] roachtest: clean up instance type specification `ClusterSpec` now provides options for GCE and AWS machine types. Epic: none Release note: None --- pkg/cmd/roachtest/cluster_test.go | 8 ++-- pkg/cmd/roachtest/spec/cluster_spec.go | 48 ++++++++++++------- pkg/cmd/roachtest/spec/machine_type.go | 4 +- pkg/cmd/roachtest/spec/option.go | 14 ++++++ pkg/cmd/roachtest/test_registry_test.go | 3 -- .../tests/admission_control_database_drop.go | 4 +- .../tests/admission_control_index_backfill.go | 4 +- pkg/cmd/roachtest/tests/restore.go | 6 +-- 8 files changed, 59 insertions(+), 32 deletions(-) diff --git a/pkg/cmd/roachtest/cluster_test.go b/pkg/cmd/roachtest/cluster_test.go index fcab21c2b325..451c98cc7cae 100644 --- a/pkg/cmd/roachtest/cluster_test.go +++ b/pkg/cmd/roachtest/cluster_test.go @@ -319,15 +319,15 @@ func TestAWSMachineType(t *testing.T) { for _, tc := range testCases { t.Run(fmt.Sprintf("%d/%s/%t/%s", tc.cpus, tc.mem, tc.localSSD, tc.arch), func(t *testing.T) { - machineType, selectedArch := spec.AWSMachineType(tc.cpus, tc.mem, tc.localSSD, tc.arch) + machineType, selectedArch := spec.SelectAWSMachineType(tc.cpus, tc.mem, tc.localSSD, tc.arch) require.Equal(t, tc.expectedMachineType, machineType) require.Equal(t, tc.expectedArch, selectedArch) }) } // spec.Low is not supported. - require.Panics(t, func() { spec.AWSMachineType(4, spec.Low, false, vm.ArchAMD64) }) - require.Panics(t, func() { spec.AWSMachineType(16, spec.Low, false, vm.ArchARM64) }) + require.Panics(t, func() { spec.SelectAWSMachineType(4, spec.Low, false, vm.ArchAMD64) }) + require.Panics(t, func() { spec.SelectAWSMachineType(16, spec.Low, false, vm.ArchARM64) }) } func TestGCEMachineType(t *testing.T) { @@ -413,7 +413,7 @@ func TestGCEMachineType(t *testing.T) { for _, tc := range testCases { t.Run(fmt.Sprintf("%d/%s/%s", tc.cpus, tc.mem, tc.arch), func(t *testing.T) { - machineType, selectedArch := spec.GCEMachineType(tc.cpus, tc.mem, tc.arch) + machineType, selectedArch := spec.SelectGCEMachineType(tc.cpus, tc.mem, tc.arch) require.Equal(t, tc.expectedMachineType, machineType) require.Equal(t, tc.expectedArch, selectedArch) diff --git a/pkg/cmd/roachtest/spec/cluster_spec.go b/pkg/cmd/roachtest/spec/cluster_spec.go index 7445ce35c3e8..2039bd827f71 100644 --- a/pkg/cmd/roachtest/spec/cluster_spec.go +++ b/pkg/cmd/roachtest/spec/cluster_spec.go @@ -65,11 +65,14 @@ type ClusterSpec struct { // TODO(#104029): We should remove the Cloud field; the tests now specify // their compatible clouds. Cloud string - Arch vm.CPUArch // CPU architecture; auto-chosen if left empty - // TODO(radu): An InstanceType can only make sense in the context of a - // specific cloud. We should replace this with cloud-specific arguments. - InstanceType string // auto-chosen if left empty - NodeCount int + + // TODO(radu): defaultInstanceType is the default machine type used (unless + // overridden by GCE.MachineType or AWS.MachineType); it does not belong in + // the spec. + defaultInstanceType string + + Arch vm.CPUArch // CPU architecture; auto-chosen if left empty + NodeCount int // CPUs is the number of CPUs per node. CPUs int Mem MemPerCPU @@ -93,12 +96,14 @@ type ClusterSpec struct { // GCE-specific arguments. These values apply only on clusters instantiated on GCE. GCE struct { + MachineType string MinCPUPlatform string VolumeType string } // AWS-specific arguments. These values apply only on clusters instantiated on AWS. AWS struct { + MachineType string // VolumeThroughput is the min provisioned EBS volume throughput. VolumeThroughput int } @@ -106,7 +111,7 @@ type ClusterSpec struct { // MakeClusterSpec makes a ClusterSpec. func MakeClusterSpec(cloud string, instanceType string, nodeCount int, opts ...Option) ClusterSpec { - spec := ClusterSpec{Cloud: cloud, InstanceType: instanceType, NodeCount: nodeCount} + spec := ClusterSpec{Cloud: cloud, defaultInstanceType: instanceType, NodeCount: nodeCount} defaultOpts := []Option{CPU(4), nodeLifetime(12 * time.Hour), ReuseAny()} for _, o := range append(defaultOpts, opts...) { o(&spec) @@ -257,31 +262,42 @@ func (s *ClusterSpec) RoachprodOpts( createVMOpts.GeoDistributed = s.Geo createVMOpts.Arch = string(arch) - machineType := s.InstanceType ssdCount := s.SSDs + machineType := s.defaultInstanceType + switch s.Cloud { + case AWS: + if s.AWS.MachineType != "" { + machineType = s.AWS.MachineType + } + case GCE: + if s.GCE.MachineType != "" { + machineType = s.GCE.MachineType + } + } + if s.CPUs != 0 { // Default to the user-supplied machine type, if any. // Otherwise, pick based on requested CPU count. var selectedArch vm.CPUArch - if len(machineType) == 0 { + if machineType == "" { // If no machine type was specified, choose one // based on the cloud and CPU count. switch s.Cloud { case AWS: - machineType, selectedArch = AWSMachineType(s.CPUs, s.Mem, s.PreferLocalSSD && s.VolumeSize == 0, arch) + machineType, selectedArch = SelectAWSMachineType(s.CPUs, s.Mem, s.PreferLocalSSD && s.VolumeSize == 0, arch) case GCE: - machineType, selectedArch = GCEMachineType(s.CPUs, s.Mem, arch) + machineType, selectedArch = SelectGCEMachineType(s.CPUs, s.Mem, arch) case Azure: machineType = AzureMachineType(s.CPUs, s.Mem) } - } - if selectedArch != "" && selectedArch != arch { - // TODO(srosenberg): we need a better way to monitor the rate of this mismatch, i.e., - // other than grepping cluster creation logs. - fmt.Printf("WARN: requested arch %s for machineType %s, but selected %s\n", arch, machineType, selectedArch) - createVMOpts.Arch = string(selectedArch) + if selectedArch != "" && selectedArch != arch { + // TODO(srosenberg): we need a better way to monitor the rate of this mismatch, i.e., + // other than grepping cluster creation logs. + fmt.Printf("WARN: requested arch %s for machineType %s, but selected %s\n", arch, machineType, selectedArch) + createVMOpts.Arch = string(selectedArch) + } } // Local SSD can only be requested diff --git a/pkg/cmd/roachtest/spec/machine_type.go b/pkg/cmd/roachtest/spec/machine_type.go index 8239ae522599..93a8e9a9017c 100644 --- a/pkg/cmd/roachtest/spec/machine_type.go +++ b/pkg/cmd/roachtest/spec/machine_type.go @@ -28,7 +28,7 @@ import ( // // At the time of writing, the intel machines are all third-generation Xeon, "Ice Lake" which are isomorphic to // GCE's n2-(standard|highmem|custom) _with_ --minimum-cpu-platform="Intel Ice Lake" (roachprod's default). -func AWSMachineType( +func SelectAWSMachineType( cpus int, mem MemPerCPU, shouldSupportLocalSSD bool, arch vm.CPUArch, ) (string, vm.CPUArch) { family := "m6i" // 4 GB RAM per CPU @@ -118,7 +118,7 @@ func AWSMachineType( // At the time of writing, the intel machines are all third-generation xeon, "Ice Lake" assuming // --minimum-cpu-platform="Intel Ice Lake" (roachprod's default). This is isomorphic to AWS's m6i or c6i. // The only exception is low memory machines (n2-highcpu-xxx), which aren't available in AWS. -func GCEMachineType(cpus int, mem MemPerCPU, arch vm.CPUArch) (string, vm.CPUArch) { +func SelectGCEMachineType(cpus int, mem MemPerCPU, arch vm.CPUArch) (string, vm.CPUArch) { series := "n2" selectedArch := vm.ArchAMD64 diff --git a/pkg/cmd/roachtest/spec/option.go b/pkg/cmd/roachtest/spec/option.go index d28ed50a6773..4443122f5266 100644 --- a/pkg/cmd/roachtest/spec/option.go +++ b/pkg/cmd/roachtest/spec/option.go @@ -198,6 +198,13 @@ func RandomlyUseZfs() Option { } } +// GCEMachineType sets the machine (instance) type when the cluster is on GCE. +func GCEMachineType(machineType string) Option { + return func(spec *ClusterSpec) { + spec.GCE.MachineType = machineType + } +} + // GCEMinCPUPlatform sets the minimum CPU platform when the cluster is on GCE. func GCEMinCPUPlatform(platform string) Option { return func(spec *ClusterSpec) { @@ -212,6 +219,13 @@ func GCEVolumeType(volumeType string) Option { } } +// AWSMachineType sets the machine (instance) type when the cluster is on AWS. +func AWSMachineType(machineType string) Option { + return func(spec *ClusterSpec) { + spec.AWS.MachineType = machineType + } +} + // AWSVolumeThroughput sets the minimum provisioned EBS volume throughput when // the cluster is on AWS. func AWSVolumeThroughput(throughput int) Option { diff --git a/pkg/cmd/roachtest/test_registry_test.go b/pkg/cmd/roachtest/test_registry_test.go index bf19ef33c6db..aa42782fe79f 100644 --- a/pkg/cmd/roachtest/test_registry_test.go +++ b/pkg/cmd/roachtest/test_registry_test.go @@ -32,7 +32,6 @@ func TestMakeTestRegistry(t *testing.T) { s := r.MakeClusterSpec(100, spec.Geo(), spec.Zones("zone99"), spec.CPU(12), spec.PreferLocalSSD(true)) require.EqualValues(t, 100, s.NodeCount) - require.Equal(t, "foo", s.InstanceType) require.True(t, s.Geo) require.Equal(t, "zone99", s.Zones) require.EqualValues(t, 12, s.CPUs) @@ -40,13 +39,11 @@ func TestMakeTestRegistry(t *testing.T) { s = r.MakeClusterSpec(100, spec.CPU(4), spec.TerminateOnMigration()) require.EqualValues(t, 100, s.NodeCount) - require.Equal(t, "foo", s.InstanceType) require.EqualValues(t, 4, s.CPUs) require.True(t, s.TerminateOnMigration) s = r.MakeClusterSpec(10, spec.CPU(16), spec.Arch(vm.ArchARM64)) require.EqualValues(t, 10, s.NodeCount) - require.Equal(t, "foo", s.InstanceType) require.EqualValues(t, 16, s.CPUs) require.EqualValues(t, vm.ArchARM64, s.Arch) }) diff --git a/pkg/cmd/roachtest/tests/admission_control_database_drop.go b/pkg/cmd/roachtest/tests/admission_control_database_drop.go index 2cc65ea03967..062677d00911 100644 --- a/pkg/cmd/roachtest/tests/admission_control_database_drop.go +++ b/pkg/cmd/roachtest/tests/admission_control_database_drop.go @@ -36,15 +36,15 @@ func registerDatabaseDrop(r registry.Registry) { spec.Cloud(spec.GCE), spec.GCEMinCPUPlatform("Intel Ice Lake"), spec.GCEVolumeType("pd-ssd"), + spec.GCEMachineType("n2-standard-8"), ) - clusterSpec.InstanceType = "n2-standard-8" r.Add(registry.TestSpec{ Name: "admission-control/database-drop", Timeout: 10 * time.Hour, Owner: registry.OwnerAdmissionControl, Benchmark: true, - CompatibleClouds: registry.AllExceptAWS, + CompatibleClouds: registry.OnlyGCE, Suites: registry.Suites(registry.Weekly), Tags: registry.Tags(`weekly`), Cluster: clusterSpec, diff --git a/pkg/cmd/roachtest/tests/admission_control_index_backfill.go b/pkg/cmd/roachtest/tests/admission_control_index_backfill.go index 7127978c9df2..991e8f4ab067 100644 --- a/pkg/cmd/roachtest/tests/admission_control_index_backfill.go +++ b/pkg/cmd/roachtest/tests/admission_control_index_backfill.go @@ -37,15 +37,15 @@ func registerIndexBackfill(r registry.Registry) { spec.Cloud(spec.GCE), spec.GCEMinCPUPlatform("Intel Ice Lake"), spec.GCEVolumeType("pd-ssd"), + spec.GCEMachineType("n2-standard-8"), ) - clusterSpec.InstanceType = "n2-standard-8" r.Add(registry.TestSpec{ Name: "admission-control/index-backfill", Timeout: 6 * time.Hour, Owner: registry.OwnerAdmissionControl, Benchmark: true, - CompatibleClouds: registry.AllExceptAWS, + CompatibleClouds: registry.OnlyGCE, Suites: registry.Suites(registry.Weekly), Tags: registry.Tags(`weekly`), Cluster: clusterSpec, diff --git a/pkg/cmd/roachtest/tests/restore.go b/pkg/cmd/roachtest/tests/restore.go index bafe60bc7050..564f75fa50cc 100644 --- a/pkg/cmd/roachtest/tests/restore.go +++ b/pkg/cmd/roachtest/tests/restore.go @@ -537,13 +537,13 @@ func (hw hardwareSpecs) makeClusterSpecs(r registry.Registry, backupCloud string } s := r.MakeClusterSpec(hw.nodes+addWorkloadNode, clusterOpts...) - if backupCloud == spec.AWS && s.Cloud == spec.AWS && s.VolumeSize != 0 { + if backupCloud == spec.AWS && s.VolumeSize != 0 { // Work around an issue that RAID0s local NVMe and GP3 storage together: // https://github.com/cockroachdb/cockroach/issues/98783. // // TODO(srosenberg): Remove this workaround when 98783 is addressed. - s.InstanceType, _ = spec.AWSMachineType(s.CPUs, s.Mem, s.PreferLocalSSD && s.VolumeSize == 0, vm.ArchAMD64) - s.InstanceType = strings.Replace(s.InstanceType, "d.", ".", 1) + s.AWS.MachineType, _ = spec.SelectAWSMachineType(s.CPUs, s.Mem, s.PreferLocalSSD && s.VolumeSize == 0, vm.ArchAMD64) + s.AWS.MachineType = strings.Replace(s.AWS.MachineType, "d.", ".", 1) s.Arch = vm.ArchAMD64 } return s