Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

roachprod: RAID0 only network disks if both local and network are present #119906

Merged
merged 1 commit into from
Mar 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 0 additions & 13 deletions pkg/cmd/roachtest/tests/restore.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@ import (
"github.com/cockroachdb/cockroach/pkg/roachpb"
"github.com/cockroachdb/cockroach/pkg/roachprod/install"
"github.com/cockroachdb/cockroach/pkg/roachprod/logger"
"github.com/cockroachdb/cockroach/pkg/roachprod/vm"
"github.com/cockroachdb/cockroach/pkg/testutils"
"github.com/cockroachdb/cockroach/pkg/testutils/sqlutils"
"github.com/cockroachdb/cockroach/pkg/ts/tspb"
Expand Down Expand Up @@ -537,18 +536,6 @@ func (hw hardwareSpecs) makeClusterSpecs(r registry.Registry, backupCloud string
}
s := r.MakeClusterSpec(hw.nodes+addWorkloadNode, clusterOpts...)

if backupCloud == spec.AWS && s.VolumeSize != 0 {
// Work around an issue that RAID0s local NVMe and GP3 storage together:
// https://github.com/cockroachdb/cockroach/issues/98783.
//
// TODO(srosenberg): Remove this workaround when 98783 is addressed.
// TODO(miral): This now returns an error instead of panicking, so even though
// we haven't panicked here before, we should handle the error. Moot if this is
// removed as per TODO above.
s.AWS.MachineType, _, _ = spec.SelectAWSMachineType(s.CPUs, s.Mem, false /* shouldSupportLocalSSD */, vm.ArchAMD64)
s.AWS.MachineType = strings.Replace(s.AWS.MachineType, "d.", ".", 1)
s.Arch = vm.ArchAMD64
}
return s
}

Expand Down
29 changes: 27 additions & 2 deletions pkg/roachprod/vm/aws/support.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,19 +50,44 @@ mount_opts="defaults"

use_multiple_disks='{{if .UseMultipleDisks}}true{{end}}'

disks=()
mount_prefix="/mnt/data"

# if the use_multiple_disks is not set and there are more than 1 disk (excluding the boot disk),
# then the disks will be selected for RAID'ing. If there are both EC2 NVMe Instance Storage and
# EBS, RAID'ing in this case can cause performance differences. So, to avoid this,
# EC2 NVMe Instance Storage are ignored.
# Scenarios:
# (local SSD = 0, Network Disk - 1) - no RAID'ing and mount network disk
# (local SSD = 1, Network Disk - 0) - no RAID'ing and mount local SSD
# (local SSD >= 1, Network Disk = 1) - no RAID'ing and mount network disk
# (local SSD > 1, Network Disk = 0) - local SSDs selected for RAID'ing
# (local SSD >= 0, Network Disk > 1) - network disks selected for RAID'ing
# Keep track of the Local SSDs and EBS volumes for RAID'ing
local_disks=()
ebs_volumes=()

# On different machine types, the drives are either called nvme... or xvdd.
for d in $(ls /dev/nvme?n1 /dev/xvdd); do
if ! mount | grep ${d}; then
disks+=("${d}")
if udevadm info --query=property --name=${d} | grep "ID_MODEL=Amazon Elastic Block Store"; then
echo "EBS Volume ${d} identified!"
ebs_volumes+=("${d}")
else
local_disks+=("${d}")
fi
echo "Disk ${d} not mounted, need to mount..."
else
echo "Disk ${d} already mounted, skipping..."
fi
done

# use only EBS volumes if available and ignore EC2 NVMe Instance Storage
disks=()
if [ "${#ebs_volumes[@]}" -gt "0" ]; then
disks=("${ebs_volumes[@]}")
else
disks=("${local_disks[@]}")
fi

if [ "${#disks[@]}" -eq "0" ]; then
mountpoint="${mount_prefix}1"
Expand Down
18 changes: 17 additions & 1 deletion pkg/roachprod/vm/gce/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,23 @@ for d in $(ls /dev/nvme?n? /dev/disk/by-id/google-persistent-disk-[1-9]); do
zpool list -v -P | grep ${d} > /dev/null
if [ $? -ne 0 ]; then
{{ else }}
for d in $(ls /dev/disk/by-id/google-local-* /dev/disk/by-id/google-persistent-disk-[1-9]); do
# if the use_multiple_disks is not set and there are more than 1 disk (excluding the boot disk),
# then the disks will be selected for RAID'ing. If there are both Local SSDs and Persistent disks,
# RAID'ing in this case can cause performance differences. So, to avoid this, local SSDs are ignored.
# Scenarios:
# (local SSD = 0, Persistent Disk - 1) - no RAID'ing and Persistent Disk mounted
# (local SSD = 1, Persistent Disk - 0) - no RAID'ing and local SSD mounted
# (local SSD >= 1, Persistent Disk = 1) - no RAID'ing and Persistent Disk mounted
# (local SSD > 1, Persistent Disk = 0) - local SSDs selected for RAID'ing
# (local SSD >= 0, Persistent Disk > 1) - network disks selected for RAID'ing
disk_list=()
if [ "$(ls /dev/disk/by-id/google-persistent-disk-[1-9]|wc -l)" -eq "0" ]; then
disk_list=$(ls /dev/disk/by-id/google-local-*)
else
echo "Only persistent disks are selected."
disk_list=$(ls /dev/disk/by-id/google-persistent-disk-[1-9])
fi
for d in ${disk_list}; do
if ! mount | grep ${d}; then
{{ end }}
disks+=("${d}")
Expand Down
Loading