From 7ca1e73bf01c6ea471f409099c5a98993670973b Mon Sep 17 00:00:00 2001 From: Nick Travers Date: Fri, 3 Jun 2022 12:55:38 -0700 Subject: [PATCH] roachprod,roachtest: clean up references to raid and multiple stores Currently, roachprod has flags for enabling multiple stores on nodes in a cluster, defaulting to `false`. If more than one storage device is present on a node, the devices are combined into a RAID0 (striped) volume. The terms "using multiple disks" and "using multiple stores" are also used interchangeably. Roachtest has its own cluster spec option, `RAID0(enabled)` for enabling RAID0, passing through the `UseMultipleDisks` parameter to roachprod. The former is the negation of the latter (i.e. RAID0 implies _not_ using multiple disks, using multiple disks implies _not_ using RAID0, etc.) The combination of "using multiple disks", "using multiple stores" and "using RAID0" can result in some cognitive overhead. Simplify things by adopting the "using multiple stores" parlance. Replace the `RAID0` roachtest cluster spec option with the `MultipleStores` option, updating existing call-sites with the negation (i.e. `RAID0(true)` becomes `MultipleStores(false)`, etc.). Allow AWS roachtests to enable / disable multiple stores. Previously, this functionality was limited to GCP clusters. Touches #82423. Release note: None. --- pkg/cmd/roachtest/spec/cluster_spec.go | 25 ++++++++----------- pkg/cmd/roachtest/spec/option.go | 14 ++++++----- pkg/cmd/roachtest/tests/kv.go | 4 +-- .../tests/pebble_write_throughput.go | 2 +- pkg/cmd/roachtest/tests/rebalance_load.go | 4 +-- pkg/roachprod/vm/aws/aws.go | 18 ++++++------- pkg/roachprod/vm/aws/support.go | 10 ++++---- pkg/roachprod/vm/gce/gcloud.go | 20 +++++++-------- pkg/roachprod/vm/gce/utils.go | 16 ++++++------ 9 files changed, 55 insertions(+), 58 deletions(-) diff --git a/pkg/cmd/roachtest/spec/cluster_spec.go b/pkg/cmd/roachtest/spec/cluster_spec.go index cc61e34379a5..2278f9e0e0f8 100644 --- a/pkg/cmd/roachtest/spec/cluster_spec.go +++ b/pkg/cmd/roachtest/spec/cluster_spec.go @@ -45,7 +45,7 @@ type ClusterSpec struct { // CPUs is the number of CPUs per node. CPUs int SSDs int - RAID0 bool + MultipleStores bool VolumeSize int PreferLocalSSD bool Zones string @@ -64,7 +64,7 @@ type ClusterSpec struct { // MakeClusterSpec makes a ClusterSpec. func MakeClusterSpec(cloud string, instanceType string, nodeCount int, opts ...Option) ClusterSpec { spec := ClusterSpec{Cloud: cloud, InstanceType: instanceType, NodeCount: nodeCount} - defaultOpts := []Option{CPU(4), nodeLifetimeOption(12 * time.Hour), ReuseAny()} + defaultOpts := []Option{CPU(4), nodeLifetimeOption(12 * time.Hour), ReuseAny(), MultipleStores(true)} for _, o := range append(defaultOpts, opts...) { o.apply(&spec) } @@ -98,7 +98,9 @@ func awsMachineSupportsSSD(machineType string) bool { return false } -func getAWSOpts(machineType string, zones []string, localSSD bool) vm.ProviderOpts { +func getAWSOpts( + machineType string, zones []string, localSSD bool, multipleStores bool, +) vm.ProviderOpts { opts := aws.DefaultProviderOpts() if localSSD { opts.SSDMachineType = machineType @@ -108,6 +110,7 @@ func getAWSOpts(machineType string, zones []string, localSSD bool) vm.ProviderOp if len(zones) != 0 { opts.CreateZones = zones } + opts.UseMultipleStores = multipleStores return opts } @@ -115,9 +118,8 @@ func getGCEOpts( machineType string, zones []string, volumeSize, localSSDCount int, - localSSD bool, - RAID0 bool, terminateOnMigration bool, + multipleStores bool, ) vm.ProviderOpts { opts := gce.DefaultProviderOpts() opts.MachineType = machineType @@ -128,13 +130,7 @@ func getGCEOpts( opts.Zones = zones } opts.SSDCount = localSSDCount - if localSSD && localSSDCount > 0 { - // NB: As the default behavior for _roachprod_ (at least in AWS/GCP) is - // to mount multiple disks as a single store using a RAID 0 array, we - // must explicitly ask for multiple stores to be enabled, _unless_ the - // test has explicitly asked for RAID0. - opts.UseMultipleDisks = !RAID0 - } + opts.UseMultipleStores = multipleStores opts.TerminateOnMigration = terminateOnMigration return opts @@ -239,10 +235,9 @@ func (s *ClusterSpec) RoachprodOpts( var providerOpts vm.ProviderOpts switch s.Cloud { case AWS: - providerOpts = getAWSOpts(machineType, zones, createVMOpts.SSDOpts.UseLocalSSD) + providerOpts = getAWSOpts(machineType, zones, createVMOpts.SSDOpts.UseLocalSSD, s.MultipleStores) case GCE: - providerOpts = getGCEOpts(machineType, zones, s.VolumeSize, ssdCount, - createVMOpts.SSDOpts.UseLocalSSD, s.RAID0, s.TerminateOnMigration) + providerOpts = getGCEOpts(machineType, zones, s.VolumeSize, ssdCount, s.TerminateOnMigration, s.MultipleStores) case Azure: providerOpts = getAzureOpts(machineType, zones) } diff --git a/pkg/cmd/roachtest/spec/option.go b/pkg/cmd/roachtest/spec/option.go index b9c8bf7a0cb6..d6e65c54f0ed 100644 --- a/pkg/cmd/roachtest/spec/option.go +++ b/pkg/cmd/roachtest/spec/option.go @@ -50,15 +50,17 @@ func SSD(n int) Option { return nodeSSDOption(n) } -type raid0Option bool +type multipleStoresOption bool -func (o raid0Option) apply(spec *ClusterSpec) { - spec.RAID0 = bool(o) +func (o multipleStoresOption) apply(spec *ClusterSpec) { + spec.MultipleStores = bool(o) } -// RAID0 enables RAID 0 striping across all disks on the node. -func RAID0(enabled bool) Option { - return raid0Option(enabled) +// MultipleStores enables multiple stores on the nodes, provided there are +// multiple storage devices available. If multiple stores are disabled, devices +// are combined into a single RAID0 (striped) volume. +func MultipleStores(enabled bool) Option { + return multipleStoresOption(enabled) } type nodeGeoOption struct{} diff --git a/pkg/cmd/roachtest/tests/kv.go b/pkg/cmd/roachtest/tests/kv.go index 3857f48da06b..151d16c633eb 100644 --- a/pkg/cmd/roachtest/tests/kv.go +++ b/pkg/cmd/roachtest/tests/kv.go @@ -207,7 +207,7 @@ func registerKV(r registry.Registry) { // Configs for comparing single store and multi store clusters. {nodes: 4, cpus: 8, readPercent: 95}, {nodes: 4, cpus: 8, readPercent: 95, ssds: 8}, - {nodes: 4, cpus: 8, readPercent: 95, ssds: 8, raid0: true}, + {nodes: 4, cpus: 8, readPercent: 95, ssds: 8, raid0: true /* single store */}, // Configs with encryption. {nodes: 1, cpus: 8, readPercent: 0, encryption: true}, @@ -284,7 +284,7 @@ func registerKV(r registry.Registry) { r.Add(registry.TestSpec{ Name: strings.Join(nameParts, "/"), Owner: owner, - Cluster: r.MakeClusterSpec(opts.nodes+1, spec.CPU(opts.cpus), spec.SSD(opts.ssds), spec.RAID0(opts.raid0)), + Cluster: r.MakeClusterSpec(opts.nodes+1, spec.CPU(opts.cpus), spec.SSD(opts.ssds), spec.MultipleStores(!opts.raid0)), Run: func(ctx context.Context, t test.Test, c cluster.Cluster) { runKV(ctx, t, c, opts) }, diff --git a/pkg/cmd/roachtest/tests/pebble_write_throughput.go b/pkg/cmd/roachtest/tests/pebble_write_throughput.go index 23963785f8cb..ca0431a52dbf 100644 --- a/pkg/cmd/roachtest/tests/pebble_write_throughput.go +++ b/pkg/cmd/roachtest/tests/pebble_write_throughput.go @@ -35,7 +35,7 @@ func registerPebbleWriteThroughput(r registry.Registry) { Name: fmt.Sprintf("pebble/write/size=%d", size), Owner: registry.OwnerStorage, Timeout: 10 * time.Hour, - Cluster: r.MakeClusterSpec(5, spec.CPU(16), spec.SSD(16), spec.RAID0(true)), + Cluster: r.MakeClusterSpec(5, spec.CPU(16), spec.SSD(16), spec.MultipleStores(false)), Tags: []string{"pebble_nightly_write"}, Run: func(ctx context.Context, t test.Test, c cluster.Cluster) { runPebbleWriteBenchmark(ctx, t, c, size, pebble) diff --git a/pkg/cmd/roachtest/tests/rebalance_load.go b/pkg/cmd/roachtest/tests/rebalance_load.go index 1cdb9ea27e80..f517a55c165b 100644 --- a/pkg/cmd/roachtest/tests/rebalance_load.go +++ b/pkg/cmd/roachtest/tests/rebalance_load.go @@ -60,13 +60,13 @@ func registerRebalanceLoad(r registry.Registry) { roachNodes := c.Range(1, c.Spec().NodeCount-1) appNode := c.Node(c.Spec().NodeCount) numStores := len(roachNodes) - if c.Spec().SSDs > 1 && !c.Spec().RAID0 { + if c.Spec().SSDs > 1 && c.Spec().MultipleStores { numStores *= c.Spec().SSDs } splits := numStores - 1 // n-1 splits => n ranges => 1 lease per store startOpts := option.DefaultStartOpts() - if c.Spec().SSDs > 1 && !c.Spec().RAID0 { + if c.Spec().SSDs > 1 && c.Spec().MultipleStores { startOpts.RoachprodOpts.StoreCount = c.Spec().SSDs } startOpts.RoachprodOpts.ExtraArgs = append(startOpts.RoachprodOpts.ExtraArgs, diff --git a/pkg/roachprod/vm/aws/aws.go b/pkg/roachprod/vm/aws/aws.go index 1ad02a6d049f..8f449c2e537c 100644 --- a/pkg/roachprod/vm/aws/aws.go +++ b/pkg/roachprod/vm/aws/aws.go @@ -213,13 +213,13 @@ func (p *Provider) CreateProviderOpts() vm.ProviderOpts { // ProviderOpts provides user-configurable, aws-specific create options. type ProviderOpts struct { - MachineType string - SSDMachineType string - CPUOptions string - RemoteUserName string - DefaultEBSVolume ebsVolume - EBSVolumes ebsVolumeList - UseMultipleDisks bool + MachineType string + SSDMachineType string + CPUOptions string + RemoteUserName string + DefaultEBSVolume ebsVolume + EBSVolumes ebsVolumeList + UseMultipleStores bool // Use specified ImageAMI when provisioning. // Overrides config.json AMI. @@ -312,7 +312,7 @@ func (o *ProviderOpts) ConfigureCreateFlags(flags *pflag.FlagSet) { "of geo (default [%s])", strings.Join(defaultCreateZones, ","))) flags.StringVar(&o.ImageAMI, ProviderName+"-image-ami", o.ImageAMI, "Override image AMI to use. See https://awscli.amazonaws.com/v2/documentation/api/latest/reference/ec2/describe-images.html") - flags.BoolVar(&o.UseMultipleDisks, ProviderName+"-enable-multiple-stores", + flags.BoolVar(&o.UseMultipleStores, ProviderName+"-enable-multiple-stores", false, "Enable the use of multiple stores by creating one store directory per disk. "+ "Default is to raid0 stripe all disks. "+ "See repeating --"+ProviderName+"-ebs-volume for adding extra volumes.") @@ -885,7 +885,7 @@ func (p *Provider) runInstance( extraMountOpts = "nobarrier" } } - filename, err := writeStartupScript(extraMountOpts, providerOpts.UseMultipleDisks) + filename, err := writeStartupScript(extraMountOpts, providerOpts.UseMultipleStores) if err != nil { return errors.Wrapf(err, "could not write AWS startup script to temp file") } diff --git a/pkg/roachprod/vm/aws/support.go b/pkg/roachprod/vm/aws/support.go index 3b8da3475d06..2d839068f239 100644 --- a/pkg/roachprod/vm/aws/support.go +++ b/pkg/roachprod/vm/aws/support.go @@ -43,7 +43,7 @@ sudo apt-get install -qy --no-install-recommends mdadm mount_opts="defaults" {{if .ExtraMountOpts}}mount_opts="${mount_opts},{{.ExtraMountOpts}}"{{end}} -use_multiple_disks='{{if .UseMultipleDisks}}true{{end}}' +use_multiple_stores='{{ .UseMultipleStores }}' disks=() mount_prefix="/mnt/data" @@ -64,7 +64,7 @@ if [ "${#disks[@]}" -eq "0" ]; then echo "No disks mounted, creating ${mountpoint}" mkdir -p ${mountpoint} chmod 777 ${mountpoint} -elif [ "${#disks[@]}" -eq "1" ] || [ -n "$use_multiple_disks" ]; then +elif [ "${#disks[@]}" -eq "1" ] || [ "$use_multiple_stores" == 'true' ]; then disknum=1 for disk in "${disks[@]}" do @@ -158,11 +158,11 @@ sudo touch /mnt/data1/.roachprod-initialized // a comma-separated list of options for the "mount -o" flag. func writeStartupScript(extraMountOpts string, useMultiple bool) (string, error) { type tmplParams struct { - ExtraMountOpts string - UseMultipleDisks bool + ExtraMountOpts string + UseMultipleStores bool } - args := tmplParams{ExtraMountOpts: extraMountOpts, UseMultipleDisks: useMultiple} + args := tmplParams{ExtraMountOpts: extraMountOpts, UseMultipleStores: useMultiple} tmpfile, err := ioutil.TempFile("", "aws-startup-script") if err != nil { diff --git a/pkg/roachprod/vm/gce/gcloud.go b/pkg/roachprod/vm/gce/gcloud.go index ff2035f79402..defd03d1f1b2 100644 --- a/pkg/roachprod/vm/gce/gcloud.go +++ b/pkg/roachprod/vm/gce/gcloud.go @@ -213,14 +213,14 @@ type ProviderOpts struct { // projects represent the GCE projects to operate on. Accessed through // GetProject() or GetProjects() depending on whether the command accepts // multiple projects or a single one. - MachineType string - MinCPUPlatform string - Zones []string - Image string - SSDCount int - PDVolumeType string - PDVolumeSize int - UseMultipleDisks bool + MachineType string + MinCPUPlatform string + Zones []string + Image string + SSDCount int + PDVolumeType string + PDVolumeSize int + UseMultipleStores bool // GCE allows two availability policies in case of a maintenance event (see --maintenance-policy via gcloud), // 'TERMINATE' or 'MIGRATE'. The default is 'MIGRATE' which we denote by 'TerminateOnMigration == false'. TerminateOnMigration bool @@ -318,7 +318,7 @@ func (o *ProviderOpts) ConfigureCreateFlags(flags *pflag.FlagSet) { "Type of the persistent disk volume, only used if local-ssd=false") flags.IntVar(&o.PDVolumeSize, ProviderName+"-pd-volume-size", 500, "Size in GB of persistent disk volume, only used if local-ssd=false") - flags.BoolVar(&o.UseMultipleDisks, ProviderName+"-enable-multiple-stores", + flags.BoolVar(&o.UseMultipleStores, ProviderName+"-enable-multiple-stores", false, "Enable the use of multiple stores by creating one store directory per disk. "+ "Default is to raid0 stripe all disks.") @@ -483,7 +483,7 @@ func (p *Provider) Create( } // Create GCE startup script file. - filename, err := writeStartupScript(extraMountOpts, opts.SSDOpts.FileSystem, providerOpts.UseMultipleDisks) + filename, err := writeStartupScript(extraMountOpts, opts.SSDOpts.FileSystem, providerOpts.UseMultipleStores) if err != nil { return errors.Wrapf(err, "could not write GCE startup script to temp file") } diff --git a/pkg/roachprod/vm/gce/utils.go b/pkg/roachprod/vm/gce/utils.go index 90f10dece3a1..2ed022ffa360 100644 --- a/pkg/roachprod/vm/gce/utils.go +++ b/pkg/roachprod/vm/gce/utils.go @@ -52,7 +52,7 @@ mount_opts="defaults" {{if .ExtraMountOpts}}mount_opts="${mount_opts},{{.ExtraMountOpts}}"{{end}} {{ end }} -use_multiple_disks='{{if .UseMultipleDisks}}true{{end}}' +use_multiple_stores='{{ .UseMultipleStores }}' disks=() mount_prefix="/mnt/data" @@ -85,7 +85,7 @@ if [ "${#disks[@]}" -eq "0" ]; then echo "No disks mounted, creating ${mountpoint}" mkdir -p ${mountpoint} chmod 777 ${mountpoint} -elif [ "${#disks[@]}" -eq "1" ] || [ -n "$use_multiple_disks" ]; then +elif [ "${#disks[@]}" -eq "1" ] || [ "$use_multiple_stores" == "true" ]; then disknum=1 for disk in "${disks[@]}" do @@ -225,15 +225,15 @@ func writeStartupScript( extraMountOpts string, fileSystem string, useMultiple bool, ) (string, error) { type tmplParams struct { - ExtraMountOpts string - UseMultipleDisks bool - Zfs bool + ExtraMountOpts string + UseMultipleStores bool + Zfs bool } args := tmplParams{ - ExtraMountOpts: extraMountOpts, - UseMultipleDisks: useMultiple, - Zfs: fileSystem == vm.Zfs, + ExtraMountOpts: extraMountOpts, + UseMultipleStores: useMultiple, + Zfs: fileSystem == vm.Zfs, } tmpfile, err := ioutil.TempFile("", "gce-startup-script")