Skip to content

Commit

Permalink
roachprod: show process exit status with monitor
Browse files Browse the repository at this point in the history
This patch changes `roachprod monitor` to use `systemctl` to poll
process info on non-local clusters, and outputs the exit status for dead
nodes. On local clusters, it retains the old logic.

Release note: None
  • Loading branch information
erikgrinaker committed Jun 13, 2021
1 parent df5d799 commit 8ef43c5
Showing 1 changed file with 23 additions and 14 deletions.
37 changes: 23 additions & 14 deletions pkg/cmd/roachprod/install/cluster_synced.go
Original file line number Diff line number Diff line change
Expand Up @@ -349,42 +349,51 @@ func (c *SyncedCluster) Monitor(ignoreEmptyNodes bool, oneShot bool) chan NodeMo
IgnoreEmpty bool
Store string
Port int
Local bool
}{
OneShot: oneShot,
IgnoreEmpty: ignoreEmptyNodes,
Store: Cockroach{}.NodeDir(c, nodes[i], 1 /* storeIndex */),
Port: Cockroach{}.NodePort(c, nodes[i]),
Local: c.IsLocal(),
}

snippet := `
lastpid=0
{{ if .IgnoreEmpty}}
{{ if .IgnoreEmpty }}
if [ ! -f "{{.Store}}/CURRENT" ]; then
echo "skipped"
exit 0
fi
{{- end}}
while :; do
{{ if .Local }}
pid=$(lsof -i :{{.Port}} -sTCP:LISTEN | awk '!/COMMAND/ {print $2}')
pid=${pid:-0} # default to 0
status=unknown
{{- else }}
pid=$(systemctl show cockroach --property MainPID --value)
status=$(systemctl show cockroach --property ExecMainStatus --value)
{{- end }}
if [ "${pid}" != "${lastpid}" ]; then
if [ -n "${lastpid}" -a -z "${pid}" ]; then
echo dead
if [ "${pid}" == 0 ]; then
echo "dead (exit status ${status})"
else
echo "${pid}"
fi
lastpid=${pid}
if [ -n "${pid}" ]; then
echo ${pid}
fi
fi
{{if .OneShot }}
{{ if .OneShot }}
exit 0
{{- end}}
if [ -n "${lastpid}" ]; then
while kill -0 "${lastpid}"; do
{{- end }}
sleep 1
if [ "${pid}" != 0 ]; then
while kill -0 "${pid}"; do
sleep 1
done
echo "kill exited nonzero"
else
sleep 1
fi
done
`
Expand Down

0 comments on commit 8ef43c5

Please sign in to comment.