Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[UPS-4940] create uitpas watchdog service #357

Draft
wants to merge 3 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions manifests/uitpas/api.pp
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
Optional[String] $newrelic_license_key = lookup('data::newrelic::license_key', Optional[String], 'first', undef),
Integer $portbase = 4800,
Enum['running', 'stopped'] $service_status = 'running',
Boolean $watchdog_enabled = false,
Hash $settings = {}
) inherits ::profiles {

Expand Down Expand Up @@ -197,6 +198,10 @@
before => Profiles::Glassfish::Domain['uitpas']
}

if $watchdog_enabled {
include ::profiles::uitpas::api::watchdog
}

# include ::profiles::uitpas::api::monitoring
# include ::profiles::uitpas::api::metrics
# include ::profiles::uitpas::api::backup
Expand Down
34 changes: 34 additions & 0 deletions manifests/uitpas/api/watchdog.pp
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
class profiles::uitpas::api::watchdog (
String $health_url = 'https://localhost:4881/uitid/rest/uitpas/health',
String $cardsystem_health_url = 'https://localhost:4881/uitid/rest/cardsystem/login',
String $configfile = '/etc/default/uitpas-watchdog',
String $logfile = '/var/log/uitpas-watchdog',
Integer $check_frequency = 600,
Variant[String,Array[String]] $slack_webhooks = undef,
) inherits ::profiles {

file { 'uitpas watchdog configfile':
path => $configfile,
content => template('profiles/uitpas/api/deployment/uitpas-watchdog-config.erb'),
ensure => 'file',
owner => 'ubuntu',
group => 'ubuntu',
mode => '0600'
}

file { 'uitpas watchdog script':
path => '/usr/local/bin/uitpas-watchdog.sh',
content => template('profiles/uitpas/api/deployment/uitpas-watchdog.sh.erb'),
ensure => 'file',
owner => 'ubuntu',
group => 'ubuntu',
mode => '0755'
}

systemd::unit_file { 'uitpas-watchdog.service':
content => template('profiles/uitpas/api/deployment/uitpas-watchdog.service.erb'),
enable => true,
active => true,
require => [ Service['uitpas'], File['uitpas watchdog script'], File['uitpas watchdog configfile'] ]
}
}
5 changes: 5 additions & 0 deletions templates/uitpas/api/deployment/uitpas-watchdog-config.erb
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
HEALTH_URL="<%= @health_url %>"
CARDSYSTEM_HEALTH_URL="<%= @cardsystem_health_url %>"
LOGFILE="<%= @logfile %>"
SLACK_WEBHOOKS="<%- Array(@slack_webhooks).each do |webhook| -%><%= webhook %> <%- end -%>"
CHECK_FREQUENCY=<%= @check_frequency %>
17 changes: 17 additions & 0 deletions templates/uitpas/api/deployment/uitpas-watchdog.service.erb
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
[Unit]
Description=UITPAS API Watchdog service
Wants=basic.target
After=basic.target network.target glassfish-uitpas.service
PartOf=glassfish-uitpas.service

[Service]
User=ubuntu
Group=ubuntu
EnvironmentFile=-<%= @configfile %>
ExecStart=/usr/local/bin/uitpas-watchdog.sh
Restart=on-failure
RestartSec=15s
Type=simple

[Install]
WantedBy=multi-user.target
51 changes: 51 additions & 0 deletions templates/uitpas/api/deployment/uitpas-watchdog.sh.erb
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
#!/bin/bash

# init
echo "Starting uitpas watchdog with following config values:"
echo
echo "HEALTH_URL = ${HEALTH_URL}"
echo "CARDSYSTEM_HEALTH_URL= ${CARDSYSTEM_HEALTH_URL}"
echo "LOGFILE = ${LOGFILE}"
echo "SLACK_WEBHOOKS = ${SLACK_WEBHOOKS}"
echo "CHECK_FREQUENCY = ${CHECK_FREQUENCY}"
echo

# start watchdog loop
while :
do
# check state
/usr/bin/curl -s -k --max-time 30 --fail "${HEALTH_URL}" >& $LOGFILE
ERROR=$?

if [ $ERROR == 0 ]; then
grep "uitpas ok" /tmp/uitpas-watchdog >$ /dev/null
ERROR=$?
if [ $ERROR == 0 ]; then
/usr/bin/curl -s -k --max-time 30 --fail "${CARDSYSTEM_HEALTH_URL}" >& $LOGFILE
ERROR=$?
fi
fi

# if error, restart and post to slack
if [ $ERROR != 0 ]; then
MESSAGE="[PROD] UITPAS status: $ERROR - trying to restart"
for SLACK_WEBHOOK in $SLACK_WEBHOOKS
do
/usr/bin/curl -X POST --data-urlencode "payload={\"username\": \"uitpas watchdog\", \"text\": \"$MESSAGE\", \"icon_emoji\": \":ghost:\"}" $SLACK_WEBHOOK
done

MESSAGE="restarting payara"

sudo service uitpas stop
sudo service uitpas start

MESSAGE="$MESSAGE DONE"
for SLACK_WEBHOOK in $SLACK_WEBHOOKS
do
/usr/bin/curl -X POST --data-urlencode "payload={\"username\": \"uitpas watchdog\", \"text\": \"$MESSAGE\", \"icon_emoji\": \":ghost:\"}" $SLACK_WEBHOOK
done
fi

# repeat every $check_frequency seconds
sleep $CHECK_FREQUENCY
done