Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
135231: scripts/ldr: record tpcc setup steps r=dt a=dt

Release note: none.
Epic: none.

135554: drtprod: update drt-chaos yaml and scripts r=shailendra-patel a=vidit-bhat

This PR updates the deployment yaml for `drt-chaos` and generalises the common scripts for workloads

Epic: none
Release note: None

Co-authored-by: David Taylor <[email protected]>
Co-authored-by: Vidit Bhat <[email protected]>
  • Loading branch information
3 people committed Nov 18, 2024
3 parents 3aaab0a + b002154 + a14970c commit a900420
Show file tree
Hide file tree
Showing 4 changed files with 253 additions and 11 deletions.
30 changes: 30 additions & 0 deletions pkg/cmd/drtprod/configs/drt_chaos.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ environment:
ROACHPROD_GCE_DEFAULT_PROJECT: cockroach-drt
CLUSTER: drt-chaos
WORKLOAD_CLUSTER: workload-chaos
WORKLOAD_NODES: 1

targets:
- target_name: $CLUSTER
Expand Down Expand Up @@ -114,6 +115,16 @@ targets:
- $WORKLOAD_CLUSTER
- certs-$CLUSTER
- certs
- command: put
args:
- $WORKLOAD_CLUSTER
- artifacts/roachprod
- roachprod
- command: put
args:
- $WORKLOAD_CLUSTER
- pkg/cmd/drt/scripts/roachtest_operations_run.sh
- roachtest_operations_run.sh
- command: ssh
args:
- $WORKLOAD_CLUSTER
Expand All @@ -128,3 +139,22 @@ targets:
flags:
warehouses: 12000
db: cct_tpcc
- script: "pkg/cmd/drtprod/scripts/generate_tpcc_run.sh"
args:
- cct_tpcc # suffix added to script name tpcc_run.sh
- false # determines whether to execute the script immediately on workload node
flags:
db: cct_tpcc
warehouses: 12000
max-rate: 500
workers: 50
conns: 50
duration: 12h
ramp: 10m
wait: 0
- script: "pkg/cmd/drtprod/scripts/generate_kv_run.sh"
args:
- false # determines whether to execute the script immediately on workload node
- script: "pkg/cmd/drtprod/scripts/generate_tpcc_drop.sh"
args:
- false # determines whether to execute the script immediately on workload node
82 changes: 82 additions & 0 deletions pkg/cmd/drtprod/scripts/generate_kv_run.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
#!/usr/bin/env bash

# Copyright 2024 The Cockroach Authors.
#
# Use of this software is governed by the CockroachDB Software License
# included in the /LICENSE file.

# This script sets up the kv workload script in the workload nodes
# NOTE - This uses CLUSTER and WORKLOAD_CLUSTER environment variable, if not set the script fails
if [ "$#" -lt 1 ]; then
echo "Usage: $0 <execute:true|false>"
exit 1
fi
execute_script=$1
shift

if [ -z "${CLUSTER}" ]; then
echo "environment CLUSTER is not set"
exit 1
fi

if [ -z "${WORKLOAD_CLUSTER}" ]; then
echo "environment WORKLOAD_CLUSTER is not set"
exit 1
fi

if [ -z "${WORKLOAD_NODES}" ]; then
echo "environment WORKLOAD_NODES is not set"
exit 1
fi

PGURLS=$(roachprod pgurl $CLUSTER --external | sed s/\'//g)

# Loop through each node
for NODE in $(seq 1 $WORKLOAD_NODES)
do
# Create the workload script
cat <<EOF >/tmp/kv_run.sh
#!/usr/bin/env bash
read -r -a PGURLS_ARR <<< "$PGURLS"
j=0
while true; do
echo ">> Starting kv workload"
((j++))
LOG=./kv_$j.txt
./cockroach workload run kv \
--init \
--drop \
--concurrency 128 \
--histograms kv/stats.json \
--db kv \
--splits 1000 \
--read-percent 50 \
--span-percent 20 \
--cycle-length 100000 \
--min-block-bytes 100 \
--max-block-bytes 1000 \
--prometheus-port 2114 \
--max-rate 200 \
--secure \
--ramp 10m \
--display-every 5s \
--duration 12h \
--tolerate-errors \
--enum \
"\${PGURLS_ARR[@]}" | tee "\$LOG"
if [ \$? -eq 0 ]; then
rm "\$LOG"
fi
sleep 1
done
EOF

# Upload the script to the workload cluster
roachprod put $WORKLOAD_CLUSTER:$NODE /tmp/kv_run.sh
roachprod ssh $WORKLOAD_CLUSTER:$NODE -- "chmod +x kv_run.sh"
if [ "$execute_script" = "true" ]; then
roachprod run "${WORKLOAD_CLUSTER}":1 -- "sudo systemd-run --unit kv_run --same-dir --uid \$(id -u) --gid \$(id -g) bash ${pwd}/kv_run.sh"
fi
done
98 changes: 98 additions & 0 deletions pkg/cmd/drtprod/scripts/generate_tpcc_drop.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
#!/usr/bin/env bash

# Copyright 2024 The Cockroach Authors.
#
# Use of this software is governed by the CockroachDB Software License
# included in the /LICENSE file.

# This script sets up the tpcc drop workload script in the workload nodes
# NOTE - This uses CLUSTER and WORKLOAD_CLUSTER environment variable, if not set the script fails
if [ "$#" -lt 1 ]; then
echo "Usage: $0 <execute:true|false>"
exit 1
fi
execute_script=$1
shift

if [ -z "${CLUSTER}" ]; then
echo "environment CLUSTER is not set"
exit 1
fi

if [ -z "${WORKLOAD_CLUSTER}" ]; then
echo "environment WORKLOAD_CLUSTER is not set"
exit 1
fi

if [ -z "${WORKLOAD_NODES}" ]; then
echo "environment WORKLOAD_NODES is not set"
exit 1
fi

PG_URL_N1=$(roachprod pgurl $CLUSTER:1 --external | sed s/\'//g)
PGURLS=$(roachprod pgurl $CLUSTER --external | sed s/\'//g)

# Loop through each node
for NODE in $(seq 1 $WORKLOAD_NODES)
do
# Create the workload script
cat <<EOF >/tmp/tpcc_drop.sh
#!/usr/bin/env bash
read -r -a PGURLS_ARR <<< "$PGURLS"
echo ">> Dropping old databases if they exist"
./cockroach sql --url "${PG_URL_N1}" -e "DROP DATABASE IF EXISTS cct_tpcc_drop_old CASCADE;"
./cockroach sql --url "${PG_URL_N1}" -e "DROP DATABASE IF EXISTS cct_tpcc_drop CASCADE;"
j=0
while true; do
echo ">> Starting tpcc-drop-db workload"
echo ">> Importing tpcc"
((j++))
INIT_LOG=./cct_tpcc_drop_init_\$j.txt
RUN_LOG=./cct_tpcc_drop_run_\$j.txt
# Temporarily, we cleanup and paused IMPORT jobs that may exist due to
# node failures. Long-term, we will address these failures by making
# IMPORT more resilient and/or creating a variant of IMPORT which cancels
# itself instead of pausing.
./cockroach sql --url "${PG_URL_N1}" -e "CANCEL JOBS (WITH x AS (SHOW JOBS) SELECT job_id FROM x WHERE status = 'paused' AND job_type = 'IMPORT');"
sleep 15
./cockroach workload init tpcc \
--warehouses=3000 \
--secure \
--concurrency 4 \
--db cct_tpcc_drop \
\$PG_URL_N1 | tee "\$INIT_LOG"
echo ">> Dropping cct_tpcc_drop_old if it exists"
./cockroach sql --url "${PG_URL_N1}" -e "DROP DATABASE cct_tpcc_drop_old CASCADE;"
sleep 5
echo ">> Starting tpcc workload for 1h"
./cockroach workload run tpcc \
--warehouses 3000 \
--active-warehouses 1000 \
--db cct_tpcc_drop \
--secure \
--prometheus-port 2113 \
--ramp 5m \
--display-every 5s \
--duration 60m \
--tolerate-errors \
"\${PGURLS_ARR[@]}" | tee "\$RUN_LOG"
echo ">> Renaming to cct_tpcc_drop_old"
./cockroach sql --url "${PG_URL_N1}" -e "ALTER DATABASE cct_tpcc_drop RENAME TO cct_tpcc_drop_old;"
sleep 1
done
EOF

# Upload the script to the workload cluster
roachprod put $WORKLOAD_CLUSTER:$NODE /tmp/tpcc_drop.sh
roachprod ssh $WORKLOAD_CLUSTER:$NODE -- "chmod +x tpcc_drop.sh"
if [ "$execute_script" = "true" ]; then
roachprod run "${WORKLOAD_CLUSTER}":1 -- "sudo systemd-run --unit tpcc_drop --same-dir --uid \$(id -u) --gid \$(id -g) bash ${pwd}/tpcc_drop.sh"
fi
done
54 changes: 43 additions & 11 deletions scripts/ldr
Original file line number Diff line number Diff line change
Expand Up @@ -61,17 +61,17 @@ fi
case $1 in
"setup")
shift
$0 create "$@"
$0 create "$@" --nodes=3
$0 go
;;

"go")
$0 start
$0 workload init
$0 jobs start
echo "starting the workload..."
$0 workload run
echo "LDR is setup and running!"
$0 ycsb init
$0 jobs start-ycsb
echo "starting ycsb..."
$0 ycsb run
echo "LDR is setup and running on the ycsb workload!"
echo
roachprod adminurl $A:1 --path "/#/metrics/logicalDataReplication/cluster"
roachprod adminurl $B:1 --path "/#/metrics/logicalDataReplication/cluster"
Expand All @@ -80,24 +80,34 @@ case $1 in
"create")
shift
roachprod create $A \
--clouds gce --gce-machine-type n2-standard-16 --local-ssd=false --nodes 3 --username "$USER" --lifetime 24h "$@"
--clouds gce --gce-machine-type n2-standard-16 --nodes 5 --username "$USER" --local-ssd=false --gce-pd-volume-size 3000 --lifetime 96h "$@"
roachprod create $B \
--clouds gce --gce-machine-type n2-standard-16 --local-ssd=false --nodes 3 --username "$USER" --lifetime 24h "$@"
--clouds gce --gce-machine-type n2-standard-16 --nodes 5 --username "$USER" --local-ssd=false --gce-pd-volume-size 3000 --lifetime 96h "$@"
$0 stage cockroach
$0 stage workload
;;

"jobs")
shift
case "${1:-}" in
"start")
"start-ycsb")
roachprod sql $A:1 -- -e "SET CLUSTER SETTING kv.rangefeed.enabled = true"
roachprod sql $B:1 -- -e "SET CLUSTER SETTING kv.rangefeed.enabled = true"
roachprod sql $A:1 -- -e "CREATE EXTERNAL CONNECTION IF NOT EXISTS b AS $(roachprod pgurl --database ycsb $B:1)"
roachprod sql $B:1 -- -e "CREATE EXTERNAL CONNECTION IF NOT EXISTS a AS $(roachprod pgurl --database ycsb $A:1)"
roachprod sql $A:1 -- -e "CREATE LOGICAL REPLICATION STREAM FROM TABLE usertable ON 'external://b' INTO TABLE ycsb.public.usertable;"
roachprod sql $B:1 -- -e "CREATE LOGICAL REPLICATION STREAM FROM TABLE usertable ON 'external://a' INTO TABLE ycsb.public.usertable;"
;;
"start-tpcc-a")
roachprod sql $A:1 -- -e "SET CLUSTER SETTING kv.rangefeed.enabled = true"
roachprod sql $A:1 -- -e "CREATE EXTERNAL CONNECTION IF NOT EXISTS b AS $(roachprod pgurl --database tpcc $B:1)"
roachprod sql $A:1 -- -e "CREATE LOGICAL REPLICATION STREAM FROM TABLES (tpcc.customer, tpcc.district, tpcc.history, tpcc.item, tpcc.new_order, tpcc.order, tpcc.order_line, tpcc.stock, tpcc.warehouse) ON 'external://b' INTO TABLES (tpcc.customer, tpcc.district, tpcc.history, tpcc.item, tpcc.new_order, tpcc.order, tpcc.order_line, tpcc.stock, tpcc.warehouse) WITH cursor='$(date +%s000000000.0)';"
;;
"start-tpcc-b")
roachprod sql $B:1 -- -e "SET CLUSTER SETTING kv.rangefeed.enabled = true"
roachprod sql $B:1 -- -e "CREATE EXTERNAL CONNECTION IF NOT EXISTS a AS $(roachprod pgurl --database tpcc $A:1)"
roachprod sql $B:1 -- -e "CREATE LOGICAL REPLICATION STREAM FROM TABLES (tpcc.customer, tpcc.district, tpcc.history, tpcc.item, tpcc.new_order, tpcc.order, tpcc.order_line, tpcc.stock, tpcc.warehouse) ON 'external://a' INTO TABLES (tpcc.customer, tpcc.district, tpcc.history, tpcc.item, tpcc.new_order, tpcc.order, tpcc.order_line, tpcc.stock, tpcc.warehouse) WITH cursor='$(date +%s000000000.0)';"
;;
"pause")
roachprod sql $A:1 -- -e "PAUSE JOBS (WITH x AS (SHOW JOBS) SELECT job_id FROM x WHERE job_type = 'LOGICAL REPLICATION' AND status = 'running');"
roachprod sql $B:1 -- -e "PAUSE JOBS (WITH x AS (SHOW JOBS) SELECT job_id FROM x WHERE job_type = 'LOGICAL REPLICATION' AND status = 'running');"
Expand Down Expand Up @@ -125,7 +135,7 @@ case $1 in
esac
;;

"workload")
"ycsb")
shift
case "${1:-}" in
"init")
Expand Down Expand Up @@ -153,12 +163,34 @@ case $1 in
roachprod run $B:1 -- "killall -9 workload || true"
;;
*)
echo "unknown command '$1'; useage: $0 {start|stop}"
echo "unknown command '$1'; useage: $0 ycsb {init|run|stop}"
exit 1
;;
esac
;;

"tpcc")
shift
case "${1:-}" in
"init")
roachprod sql $A:1 -- -e "RESTORE DATABASE tpcc FROM latest IN 'gs://cockroach-fixtures-us-east1/backups/tpc-c/v24.1/db/warehouses=150k?AUTH=implicit' WITH OPTIONS (detached, unsafe_restore_incompatible_version)"
roachprod sql $B:1 -- -e "RESTORE DATABASE tpcc FROM latest IN 'gs://cockroach-fixtures-us-east1/backups/tpc-c/v24.1/db/warehouses=150k?AUTH=implicit' WITH OPTIONS (detached, unsafe_restore_incompatible_version)"
echo "monitor the restores via DB console for completion"
;;
"run")
OUTPUT_FILE_A="a-tpcc-$(date '+%Y-%m-%d-%H:%M:%S').log"
shift
roachprod run $A:1 "env -i nohup ./workload run tpcc --warehouses=150000 --active-warehouses=5000 --max-rate 1600 --workers=5000 --active-workers=400 --wait=false $@ $(roachprod pgurl $A) $(roachprod pgurl $B) > $OUTPUT_FILE_A 2> $OUTPUT_FILE_A &" &
;;
"stop")
roachprod run $A:1 -- "killall -9 workload || true"
;;
*)
echo "unknown command '$1'; useage: $0 tpcc {init|run|stop}"
exit 1
;;
esac
;;
"settings")
shift
case "${1:-}" in
Expand Down

0 comments on commit a900420

Please sign in to comment.