From 33d95a02ecbaa30f4654374c59e934e296da2ebf Mon Sep 17 00:00:00 2001 From: Vidit Bhat Date: Thu, 2 Jan 2025 11:43:47 +0530 Subject: [PATCH] drtprod: setup dmsetup disk staller Previously, disk stall operations used to fail on drt clusters since there was no prior setup done. This PR adds a script that does the setup similar to how we do it for the disk stall roachtest. Epic: none Release note: None --- pkg/cmd/drtprod/configs/drt_chaos.yaml | 6 ++++-- pkg/cmd/drtprod/configs/drt_large.yaml | 1 + pkg/cmd/drtprod/configs/drt_test.yaml | 1 + pkg/cmd/drtprod/configs/drt_test_destroy.yaml | 21 +++++++++++++++++++ .../scripts/setup_dmsetup_disk_staller | 16 ++++++++++++++ 5 files changed, 43 insertions(+), 2 deletions(-) create mode 100755 pkg/cmd/drtprod/configs/drt_test_destroy.yaml create mode 100755 pkg/cmd/drtprod/scripts/setup_dmsetup_disk_staller diff --git a/pkg/cmd/drtprod/configs/drt_chaos.yaml b/pkg/cmd/drtprod/configs/drt_chaos.yaml index 3d8e8d8f9dfa..a37c39338a33 100644 --- a/pkg/cmd/drtprod/configs/drt_chaos.yaml +++ b/pkg/cmd/drtprod/configs/drt_chaos.yaml @@ -6,6 +6,7 @@ environment: ROACHPROD_GCE_DNS_ZONE: drt ROACHPROD_GCE_DEFAULT_PROJECT: cockroach-drt CLUSTER: drt-chaos + CLUSTER_NODES: 6 WORKLOAD_CLUSTER: workload-chaos WORKLOAD_NODES: 1 @@ -20,7 +21,7 @@ targets: gce-managed: true gce-enable-multiple-stores: true gce-zones: "us-east1-d,us-east1-b,us-east1-c" - nodes: 6 + nodes: $CLUSTER_NODES gce-machine-type: n2-standard-16 local-ssd: true gce-local-ssd-count: 4 @@ -38,6 +39,7 @@ targets: args: - $CLUSTER - cockroach + - script: "pkg/cmd/drtprod/scripts/setup_dmsetup_disk_staller" - script: "pkg/cmd/drtprod/scripts/setup_datadog_cluster" - command: start args: @@ -67,7 +69,7 @@ targets: flags: clouds: gce gce-zones: "us-east1-c" - nodes: 1 + nodes: $WORKLOAD_NODES gce-machine-type: n2-standard-8 os-volume-size: 100 username: workload diff --git a/pkg/cmd/drtprod/configs/drt_large.yaml b/pkg/cmd/drtprod/configs/drt_large.yaml index 9cb2613395f5..fad2be40ecd4 100644 --- a/pkg/cmd/drtprod/configs/drt_large.yaml +++ b/pkg/cmd/drtprod/configs/drt_large.yaml @@ -51,6 +51,7 @@ targets: args: - $CLUSTER - cockroach + - script: "pkg/cmd/drtprod/scripts/setup_dmsetup_disk_staller" - script: "pkg/cmd/drtprod/scripts/setup_datadog_cluster" - command: start args: diff --git a/pkg/cmd/drtprod/configs/drt_test.yaml b/pkg/cmd/drtprod/configs/drt_test.yaml index 0f79e8437e4b..f67c0d3479a8 100644 --- a/pkg/cmd/drtprod/configs/drt_test.yaml +++ b/pkg/cmd/drtprod/configs/drt_test.yaml @@ -36,6 +36,7 @@ targets: args: - $CLUSTER - cockroach + - script: "pkg/cmd/drtprod/scripts/setup_dmsetup_disk_staller" - script: "pkg/cmd/drtprod/scripts/setup_datadog_cluster" - command: start args: diff --git a/pkg/cmd/drtprod/configs/drt_test_destroy.yaml b/pkg/cmd/drtprod/configs/drt_test_destroy.yaml new file mode 100755 index 000000000000..cf42925c0892 --- /dev/null +++ b/pkg/cmd/drtprod/configs/drt_test_destroy.yaml @@ -0,0 +1,21 @@ +# Yaml for destroying the drt-large and workload-large clusters. +environment: + ROACHPROD_GCE_DEFAULT_SERVICE_ACCOUNT: 622274581499-compute@developer.gserviceaccount.com + ROACHPROD_DNS: drt.crdb.io + ROACHPROD_GCE_DNS_DOMAIN: drt.crdb.io + ROACHPROD_GCE_DNS_ZONE: drt + ROACHPROD_GCE_DEFAULT_PROJECT: cockroach-drt + CLUSTER: drt-test + WORKLOAD_CLUSTER: workload-test + +targets: + - target_name: $CLUSTER + steps: + - command: destroy + args: + - $CLUSTER + - target_name: $WORKLOAD_CLUSTER + steps: + - command: destroy + args: + - $WORKLOAD_CLUSTER diff --git a/pkg/cmd/drtprod/scripts/setup_dmsetup_disk_staller b/pkg/cmd/drtprod/scripts/setup_dmsetup_disk_staller new file mode 100755 index 000000000000..62557d613a68 --- /dev/null +++ b/pkg/cmd/drtprod/scripts/setup_dmsetup_disk_staller @@ -0,0 +1,16 @@ +#!/bin/bash + +# Sets up datadog for the drt clusters. +# NOTE - This uses CLUSTER environment variable, if not set the script fails + +if [ -z "${CLUSTER}" ]; then + echo "environment CLUSTER is not set" + exit 1 +fi + +roachprod ssh "$CLUSTER" -- "sudo apt-get purge -y snapd" +roachprod ssh "$CLUSTER" -- "sudo umount -f /mnt/data1" +roachprod ssh "$CLUSTER" -- "sudo dmsetup remove_all" +roachprod ssh "$CLUSTER" -- "sudo tune2fs -O ^has_journal /dev/nvme0n1" +roachprod ssh "$CLUSTER" -- 'echo "0 $(sudo blockdev --getsz /dev/nvme0n1) linear /dev/nvme0n1 0" | sudo dmsetup create data1' +roachprod ssh "$CLUSTER" -- "sudo mount /dev/mapper/data1 /mnt/data1"