From cbff5c03b9e2e7bc5920033580ce29278f131966 Mon Sep 17 00:00:00 2001 From: Herko Lategan Date: Fri, 6 Dec 2024 12:26:51 +0000 Subject: [PATCH] roachprod: monitor remote script Add a new monitor script for monitoring remote processes. `systemctl` is used to monitor cockroach processes, on remote nodes, and determine the exit status if a process has died. It emits the same frame format as the local script version, and depends on Monitor to implement the logic for detecting changes in the process list frame. Informs: #118214 Epic: None --- .../install/scripts/monitor_remote.sh | 42 +++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100755 pkg/roachprod/install/scripts/monitor_remote.sh diff --git a/pkg/roachprod/install/scripts/monitor_remote.sh b/pkg/roachprod/install/scripts/monitor_remote.sh new file mode 100755 index 000000000000..2f76593a3a48 --- /dev/null +++ b/pkg/roachprod/install/scripts/monitor_remote.sh @@ -0,0 +1,42 @@ +#!/usr/bin/env bash +# +# Copyright 2024 The Cockroach Authors. +# +# Use of this software is governed by the CockroachDB Software License +# included in the /LICENSE file. + +# This script is used to monitor the status of cockroach processes on a remote +# node where systemctl is available. +# It produces output in the following format: +#cockroach-system=500 +#status=unknown +#cockroach-tenant_0=501 +#status=1 +#\n = end of frame + +one_shot=#{if .OneShot#}true#{end#} + +prev_frame="" +while :; do + # Get all cockroach system units + sysctl_output=$(systemctl list-units cockroach\*.service --type=service --no-legend --no-pager | awk '{print $1}') + frame="" + while IFS= read -r name; do + # Query the PID and status of the cockroach system unit + pid=$(systemctl show "$name" --property MainPID --value) + status=$(systemctl show "$name" --property ExecMainStatus --value) + vc_label=${name%.service} + frame+="$vc_label=$pid\n" + frame+="status=$status\n" + done <<< "$sysctl_output" + # Only print the frame if it has changed. + if [ "$frame" != "$prev_frame" ]; then + echo -e "$frame" + prev_frame="$frame" + fi + # If one_shot is set, exit after the first iteration. + if [[ -n "${one_shot}" ]]; then + break + fi + sleep 1 +done