diff --git a/manifests/uitpas/api.pp b/manifests/uitpas/api.pp index 7d5f9c83..92390e98 100644 --- a/manifests/uitpas/api.pp +++ b/manifests/uitpas/api.pp @@ -9,6 +9,7 @@ Optional[String] $newrelic_license_key = lookup('data::newrelic::license_key', Optional[String], 'first', undef), Integer $portbase = 4800, Enum['running', 'stopped'] $service_status = 'running', + Boolean $watchdog_enabled = false, Hash $settings = {} ) inherits ::profiles { @@ -197,6 +198,10 @@ before => Profiles::Glassfish::Domain['uitpas'] } + if $watchdog_enabled { + include ::profiles::uitpas::api::watchdog + } + # include ::profiles::uitpas::api::monitoring # include ::profiles::uitpas::api::metrics # include ::profiles::uitpas::api::backup diff --git a/manifests/uitpas/api/watchdog.pp b/manifests/uitpas/api/watchdog.pp new file mode 100644 index 00000000..d2a25ef0 --- /dev/null +++ b/manifests/uitpas/api/watchdog.pp @@ -0,0 +1,34 @@ +class profiles::uitpas::api::watchdog ( + String $health_url = 'https://localhost:4881/uitid/rest/uitpas/health', + String $cardsystem_health_url = 'https://localhost:4881/uitid/rest/cardsystem/login', + String $configfile = '/etc/default/uitpas-watchdog', + String $logfile = '/var/log/uitpas-watchdog', + Integer $check_frequency = 600, + Variant[String,Array[String]] $slack_webhooks = undef, +) inherits ::profiles { + + file { 'uitpas watchdog configfile': + path => $configfile, + content => template('profiles/uitpas/api/deployment/uitpas-watchdog-config.erb'), + ensure => 'file', + owner => 'ubuntu', + group => 'ubuntu', + mode => '0600' + } + + file { 'uitpas watchdog script': + path => '/usr/local/bin/uitpas-watchdog.sh', + content => template('profiles/uitpas/api/deployment/uitpas-watchdog.sh.erb'), + ensure => 'file', + owner => 'ubuntu', + group => 'ubuntu', + mode => '0755' + } + + systemd::unit_file { 'uitpas-watchdog.service': + content => template('profiles/uitpas/api/deployment/uitpas-watchdog.service.erb'), + enable => true, + active => true, + require => [ Service['uitpas'], File['uitpas watchdog script'], File['uitpas watchdog configfile'] ] + } +} diff --git a/templates/uitpas/api/deployment/uitpas-watchdog-config.erb b/templates/uitpas/api/deployment/uitpas-watchdog-config.erb new file mode 100644 index 00000000..e50bc0e1 --- /dev/null +++ b/templates/uitpas/api/deployment/uitpas-watchdog-config.erb @@ -0,0 +1,5 @@ +HEALTH_URL="<%= @health_url %>" +CARDSYSTEM_HEALTH_URL="<%= @cardsystem_health_url %>" +LOGFILE="<%= @logfile %>" +SLACK_WEBHOOKS="<%- Array(@slack_webhooks).each do |webhook| -%><%= webhook %> <%- end -%>" +CHECK_FREQUENCY=<%= @check_frequency %> diff --git a/templates/uitpas/api/deployment/uitpas-watchdog.service.erb b/templates/uitpas/api/deployment/uitpas-watchdog.service.erb new file mode 100644 index 00000000..2df0a784 --- /dev/null +++ b/templates/uitpas/api/deployment/uitpas-watchdog.service.erb @@ -0,0 +1,17 @@ +[Unit] +Description=UITPAS API Watchdog service +Wants=basic.target +After=basic.target network.target glassfish-uitpas.service +PartOf=glassfish-uitpas.service + +[Service] +User=ubuntu +Group=ubuntu +EnvironmentFile=-<%= @configfile %> +ExecStart=/usr/local/bin/uitpas-watchdog.sh +Restart=on-failure +RestartSec=15s +Type=simple + +[Install] +WantedBy=multi-user.target diff --git a/templates/uitpas/api/deployment/uitpas-watchdog.sh.erb b/templates/uitpas/api/deployment/uitpas-watchdog.sh.erb new file mode 100644 index 00000000..6c191728 --- /dev/null +++ b/templates/uitpas/api/deployment/uitpas-watchdog.sh.erb @@ -0,0 +1,51 @@ +#!/bin/bash + +# init +echo "Starting uitpas watchdog with following config values:" +echo +echo "HEALTH_URL = ${HEALTH_URL}" +echo "CARDSYSTEM_HEALTH_URL= ${CARDSYSTEM_HEALTH_URL}" +echo "LOGFILE = ${LOGFILE}" +echo "SLACK_WEBHOOKS = ${SLACK_WEBHOOKS}" +echo "CHECK_FREQUENCY = ${CHECK_FREQUENCY}" +echo + +# start watchdog loop +while : +do + # check state + /usr/bin/curl -s -k --max-time 30 --fail "${HEALTH_URL}" >& $LOGFILE + ERROR=$? + + if [ $ERROR == 0 ]; then + grep "uitpas ok" /tmp/uitpas-watchdog >$ /dev/null + ERROR=$? + if [ $ERROR == 0 ]; then + /usr/bin/curl -s -k --max-time 30 --fail "${CARDSYSTEM_HEALTH_URL}" >& $LOGFILE + ERROR=$? + fi + fi + + # if error, restart and post to slack + if [ $ERROR != 0 ]; then + MESSAGE="[PROD] UITPAS status: $ERROR - trying to restart" + for SLACK_WEBHOOK in $SLACK_WEBHOOKS + do + /usr/bin/curl -X POST --data-urlencode "payload={\"username\": \"uitpas watchdog\", \"text\": \"$MESSAGE\", \"icon_emoji\": \":ghost:\"}" $SLACK_WEBHOOK + done + + MESSAGE="restarting payara" + + sudo service uitpas stop + sudo service uitpas start + + MESSAGE="$MESSAGE DONE" + for SLACK_WEBHOOK in $SLACK_WEBHOOKS + do + /usr/bin/curl -X POST --data-urlencode "payload={\"username\": \"uitpas watchdog\", \"text\": \"$MESSAGE\", \"icon_emoji\": \":ghost:\"}" $SLACK_WEBHOOK + done + fi + + # repeat every $check_frequency seconds + sleep $CHECK_FREQUENCY +done