Skip to content

Commit

Permalink
Merge pull request #120429 from nameisbhaskar/backport23.1-119906
Browse files Browse the repository at this point in the history
release-23.1: roachprod: RAID0 only network disks if both local and network are present
  • Loading branch information
nameisbhaskar authored Mar 14, 2024
2 parents 220bde1 + 3a89d55 commit 40a1040
Show file tree
Hide file tree
Showing 3 changed files with 44 additions and 13 deletions.
10 changes: 0 additions & 10 deletions pkg/cmd/roachtest/tests/restore.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@ import (
"github.com/cockroachdb/cockroach/pkg/keys"
"github.com/cockroachdb/cockroach/pkg/roachpb"
"github.com/cockroachdb/cockroach/pkg/roachprod/install"
"github.com/cockroachdb/cockroach/pkg/roachprod/vm"
"github.com/cockroachdb/cockroach/pkg/testutils"
"github.com/cockroachdb/cockroach/pkg/testutils/sqlutils"
"github.com/cockroachdb/cockroach/pkg/ts/tspb"
Expand Down Expand Up @@ -474,15 +473,6 @@ func (hw hardwareSpecs) makeClusterSpecs(r registry.Registry, backupCloud string
}
s := r.MakeClusterSpec(hw.nodes, clusterOpts...)

if backupCloud == spec.AWS && s.VolumeSize != 0 {
// Work around an issue that RAID0s local NVMe and GP3 storage together:
// https://github.com/cockroachdb/cockroach/issues/98783.
//
// TODO(srosenberg): Remove this workaround when 98783 is addressed.
s.AWS.MachineType, _, _ = spec.SelectAWSMachineType(s.CPUs, s.Mem, false /* shouldSupportLocalSSD */, vm.ArchAMD64)
s.AWS.MachineType = strings.Replace(s.AWS.MachineType, "d.", ".", 1)
s.Arch = vm.ArchAMD64
}
return s
}

Expand Down
29 changes: 27 additions & 2 deletions pkg/roachprod/vm/aws/support.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,19 +50,44 @@ mount_opts="defaults"
use_multiple_disks='{{if .UseMultipleDisks}}true{{end}}'
disks=()
mount_prefix="/mnt/data"
# if the use_multiple_disks is not set and there are more than 1 disk (excluding the boot disk),
# then the disks will be selected for RAID'ing. If there are both EC2 NVMe Instance Storage and
# EBS, RAID'ing in this case can cause performance differences. So, to avoid this,
# EC2 NVMe Instance Storage are ignored.
# Scenarios:
# (local SSD = 0, Network Disk - 1) - no RAID'ing and mount network disk
# (local SSD = 1, Network Disk - 0) - no RAID'ing and mount local SSD
# (local SSD >= 1, Network Disk = 1) - no RAID'ing and mount network disk
# (local SSD > 1, Network Disk = 0) - local SSDs selected for RAID'ing
# (local SSD >= 0, Network Disk > 1) - network disks selected for RAID'ing
# Keep track of the Local SSDs and EBS volumes for RAID'ing
local_disks=()
ebs_volumes=()
# On different machine types, the drives are either called nvme... or xvdd.
for d in $(ls /dev/nvme?n1 /dev/xvdd); do
if ! mount | grep ${d}; then
disks+=("${d}")
if udevadm info --query=property --name=${d} | grep "ID_MODEL=Amazon Elastic Block Store"; then
echo "EBS Volume ${d} identified!"
ebs_volumes+=("${d}")
else
local_disks+=("${d}")
fi
echo "Disk ${d} not mounted, need to mount..."
else
echo "Disk ${d} already mounted, skipping..."
fi
done
# use only EBS volumes if available and ignore EC2 NVMe Instance Storage
disks=()
if [ "${#ebs_volumes[@]}" -gt "0" ]; then
disks=("${ebs_volumes[@]}")
else
disks=("${local_disks[@]}")
fi
if [ "${#disks[@]}" -eq "0" ]; then
mountpoint="${mount_prefix}1"
Expand Down
18 changes: 17 additions & 1 deletion pkg/roachprod/vm/gce/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,23 @@ for d in $(ls /dev/nvme?n? /dev/disk/by-id/google-persistent-disk-[1-9]); do
zpool list -v -P | grep ${d} > /dev/null
if [ $? -ne 0 ]; then
{{ else }}
for d in $(ls /dev/disk/by-id/google-local-* /dev/disk/by-id/google-persistent-disk-[1-9]); do
# if the use_multiple_disks is not set and there are more than 1 disk (excluding the boot disk),
# then the disks will be selected for RAID'ing. If there are both Local SSDs and Persistent disks,
# RAID'ing in this case can cause performance differences. So, to avoid this, local SSDs are ignored.
# Scenarios:
# (local SSD = 0, Persistent Disk - 1) - no RAID'ing and Persistent Disk mounted
# (local SSD = 1, Persistent Disk - 0) - no RAID'ing and local SSD mounted
# (local SSD >= 1, Persistent Disk = 1) - no RAID'ing and Persistent Disk mounted
# (local SSD > 1, Persistent Disk = 0) - local SSDs selected for RAID'ing
# (local SSD >= 0, Persistent Disk > 1) - network disks selected for RAID'ing
disk_list=()
if [ "$(ls /dev/disk/by-id/google-persistent-disk-[1-9]|wc -l)" -eq "0" ]; then
disk_list=$(ls /dev/disk/by-id/google-local-*)
else
echo "Only persistent disks are selected."
disk_list=$(ls /dev/disk/by-id/google-persistent-disk-[1-9])
fi
for d in ${disk_list}; do
if ! mount | grep ${d}; then
{{ end }}
disks+=("${d}")
Expand Down

0 comments on commit 40a1040

Please sign in to comment.