diff --git a/pkg/cmd/roachtest/tests/restore.go b/pkg/cmd/roachtest/tests/restore.go index d24d668c5fcf..67bd18deeabf 100644 --- a/pkg/cmd/roachtest/tests/restore.go +++ b/pkg/cmd/roachtest/tests/restore.go @@ -33,7 +33,6 @@ import ( "github.com/cockroachdb/cockroach/pkg/roachpb" "github.com/cockroachdb/cockroach/pkg/roachprod/install" "github.com/cockroachdb/cockroach/pkg/roachprod/logger" - "github.com/cockroachdb/cockroach/pkg/roachprod/vm" "github.com/cockroachdb/cockroach/pkg/testutils" "github.com/cockroachdb/cockroach/pkg/testutils/sqlutils" "github.com/cockroachdb/cockroach/pkg/ts/tspb" @@ -541,18 +540,6 @@ func (hw hardwareSpecs) makeClusterSpecs(r registry.Registry, backupCloud string } s := r.MakeClusterSpec(hw.nodes+addWorkloadNode, clusterOpts...) - if backupCloud == spec.AWS && s.VolumeSize != 0 { - // Work around an issue that RAID0s local NVMe and GP3 storage together: - // https://github.com/cockroachdb/cockroach/issues/98783. - // - // TODO(srosenberg): Remove this workaround when 98783 is addressed. - // TODO(miral): This now returns an error instead of panicking, so even though - // we haven't panicked here before, we should handle the error. Moot if this is - // removed as per TODO above. - s.AWS.MachineType, _, _ = spec.SelectAWSMachineType(s.CPUs, s.Mem, false /* shouldSupportLocalSSD */, vm.ArchAMD64) - s.AWS.MachineType = strings.Replace(s.AWS.MachineType, "d.", ".", 1) - s.Arch = vm.ArchAMD64 - } return s } diff --git a/pkg/roachprod/vm/aws/support.go b/pkg/roachprod/vm/aws/support.go index 5437a9df7e2d..b1abe03e79b4 100644 --- a/pkg/roachprod/vm/aws/support.go +++ b/pkg/roachprod/vm/aws/support.go @@ -50,19 +50,44 @@ mount_opts="defaults" use_multiple_disks='{{if .UseMultipleDisks}}true{{end}}' -disks=() mount_prefix="/mnt/data" +# if the use_multiple_disks is not set and there are more than 1 disk (excluding the boot disk), +# then the disks will be selected for RAID'ing. If there are both EC2 NVMe Instance Storage and +# EBS, RAID'ing in this case can cause performance differences. So, to avoid this, +# EC2 NVMe Instance Storage are ignored. +# Scenarios: +# (local SSD = 0, Network Disk - 1) - no RAID'ing and mount network disk +# (local SSD = 1, Network Disk - 0) - no RAID'ing and mount local SSD +# (local SSD >= 1, Network Disk = 1) - no RAID'ing and mount network disk +# (local SSD > 1, Network Disk = 0) - local SSDs selected for RAID'ing +# (local SSD >= 0, Network Disk > 1) - network disks selected for RAID'ing +# Keep track of the Local SSDs and EBS volumes for RAID'ing +local_disks=() +ebs_volumes=() + # On different machine types, the drives are either called nvme... or xvdd. for d in $(ls /dev/nvme?n1 /dev/xvdd); do if ! mount | grep ${d}; then - disks+=("${d}") + if udevadm info --query=property --name=${d} | grep "ID_MODEL=Amazon Elastic Block Store"; then + echo "EBS Volume ${d} identified!" + ebs_volumes+=("${d}") + else + local_disks+=("${d}") + fi echo "Disk ${d} not mounted, need to mount..." else echo "Disk ${d} already mounted, skipping..." fi done +# use only EBS volumes if available and ignore EC2 NVMe Instance Storage +disks=() +if [ "${#ebs_volumes[@]}" -gt "0" ]; then + disks=("${ebs_volumes[@]}") +else + disks=("${local_disks[@]}") +fi if [ "${#disks[@]}" -eq "0" ]; then mountpoint="${mount_prefix}1" diff --git a/pkg/roachprod/vm/gce/utils.go b/pkg/roachprod/vm/gce/utils.go index 7334c31002fb..59077ddd5118 100644 --- a/pkg/roachprod/vm/gce/utils.go +++ b/pkg/roachprod/vm/gce/utils.go @@ -71,7 +71,23 @@ for d in $(ls /dev/nvme?n? /dev/disk/by-id/google-persistent-disk-[1-9]); do zpool list -v -P | grep ${d} > /dev/null if [ $? -ne 0 ]; then {{ else }} -for d in $(ls /dev/disk/by-id/google-local-* /dev/disk/by-id/google-persistent-disk-[1-9]); do +# if the use_multiple_disks is not set and there are more than 1 disk (excluding the boot disk), +# then the disks will be selected for RAID'ing. If there are both Local SSDs and Persistent disks, +# RAID'ing in this case can cause performance differences. So, to avoid this, local SSDs are ignored. +# Scenarios: +# (local SSD = 0, Persistent Disk - 1) - no RAID'ing and Persistent Disk mounted +# (local SSD = 1, Persistent Disk - 0) - no RAID'ing and local SSD mounted +# (local SSD >= 1, Persistent Disk = 1) - no RAID'ing and Persistent Disk mounted +# (local SSD > 1, Persistent Disk = 0) - local SSDs selected for RAID'ing +# (local SSD >= 0, Persistent Disk > 1) - network disks selected for RAID'ing +disk_list=() +if [ "$(ls /dev/disk/by-id/google-persistent-disk-[1-9]|wc -l)" -eq "0" ]; then + disk_list=$(ls /dev/disk/by-id/google-local-*) +else + echo "Only persistent disks are selected." + disk_list=$(ls /dev/disk/by-id/google-persistent-disk-[1-9]) +fi +for d in ${disk_list}; do if ! mount | grep ${d}; then {{ end }} disks+=("${d}")