Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Monit] Unmonitor the processes in containers which are disabled. #5153

Merged
merged 13 commits into from
Sep 25, 2020
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions dockers/docker-database/base_image_files/monit_database
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@
## process list:
## redis_server
###############################################################################
check process redis_server matching "/usr/bin/redis-server"
if does not exist for 5 times within 5 cycles then alert
check program database|redis_server with path "/usr/bin/process_checker database /usr/bin/redis-server"
if status != 0 for 5 times within 5 cycles then alert
24 changes: 12 additions & 12 deletions dockers/docker-fpm-frr/base_image_files/monit_bgp
Original file line number Diff line number Diff line change
Expand Up @@ -8,20 +8,20 @@
## bgpcfgd
## bgpmon
###############################################################################
check process zebra matching "/usr/lib/frr/zebra"
if does not exist for 5 times within 5 cycles then alert
check program bgp|zebra with path "/usr/bin/process_checker bgp /usr/lib/frr/zebra"
if status != 0 for 5 times within 5 cycles then alert

check process fpmsyncd matching "fpmsyncd"
if does not exist for 5 times within 5 cycles then alert
check program bgp|fpmsyncd with path "/usr/bin/process_checker bgp fpmsyncd"
if status != 0 for 5 times within 5 cycles then alert

check process bgpd matching "/usr/lib/frr/bgpd"
if does not exist for 5 times within 5 cycles then alert
check program bgp|bgpd with path "/usr/bin/process_checker bgp /usr/lib/frr/bgpd"
if status != 0 for 5 times within 5 cycles then alert

check process staticd matching "/usr/lib/frr/staticd"
if does not exist for 5 times within 5 cycles then alert
check program bgp|staticd with path "/usr/bin/process_checker bgp /usr/lib/frr/staticd"
if status != 0 for 5 times within 5 cycles then alert

check process bgpcfgd matching "python /usr/local/bin/bgpcfgd"
if does not exist for 5 times within 5 cycles then alert
check program bgp|bgpcfgd with path "/usr/bin/process_checker bgp /usr/bin/python /usr/local/bin/bgpcfgd"
jleveque marked this conversation as resolved.
Show resolved Hide resolved
if status != 0 for 5 times within 5 cycles then alert

check process bgpmon matching "python /usr/local/bin/bgpmon"
if does not exist for 5 times within 5 cycles then alert
check program bgp|bgpmon with path "/usr/bin/process_checker bgp python /usr/local/bin/bgpmon"
if status != 0 for 5 times within 5 cycles then alert
12 changes: 6 additions & 6 deletions dockers/docker-lldp/base_image_files/monit_lldp
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,11 @@
## lldp-syncd
## lldpmgrd
###############################################################################
check process lldpd_monitor matching "lldpd: "
if does not exist for 5 times within 5 cycles then alert
check program lldp|lldpd_monitor with path "/usr/bin/process_checker lldp lldpd:"
if status != 0 for 5 times within 5 cycles then alert

check process lldp_syncd matching "python2 -m lldp_syncd"
if does not exist for 5 times within 5 cycles then alert
check program lldp|lldp_syncd with path "/usr/bin/process_checker lldp python2 -m lldp_syncd"
if status != 0 for 5 times within 5 cycles then alert

check process lldpmgrd matching "python /usr/bin/lldpmgrd"
if does not exist for 5 times within 5 cycles then alert
check program lldp|lldpmgrd with path "/usr/bin/process_checker lldp python /usr/bin/lldpmgrd"
if status != 0 for 5 times within 5 cycles then alert
42 changes: 21 additions & 21 deletions dockers/docker-orchagent/base_image_files/monit_swss
Original file line number Diff line number Diff line change
Expand Up @@ -11,33 +11,33 @@
## buffermgrd
## nbrmgrd
## vxlanmgrd
###############################################################################
check process orchagent matching "/usr/bin/orchagent -d /var/log/swss"
if does not exist for 5 times within 5 cycles then alert
##############################################################################
check program swss|orchagent with path "/usr/bin/process_checker swss /usr/bin/orchagent -d /var/log/swss"
if status != 0 for 5 times within 5 cycles then alert

check process portsyncd matching "/usr/bin/portsyncd"
if does not exist for 5 times within 5 cycles then alert
check program swss|portsyncd with path "/usr/bin/process_checker swss /usr/bin/portsyncd"
if status != 0 for 5 times within 5 cycles then alert

check process neighsyncd matching "/usr/bin/neighsyncd"
if does not exist for 5 times within 5 cycles then alert
check program swss|neighsyncd with path "/usr/bin/process_checker swss /usr/bin/neighsyncd"
if status != 0 for 5 times within 5 cycles then alert

check process vrfmgrd matching "/usr/bin/vrfmgrd"
if does not exist for 5 times within 5 cycles then alert
check program swss|vrfmgrd with path "/usr/bin/process_checker swss /usr/bin/vrfmgrd"
if status != 0 for 5 times within 5 cycles then alert

check process vlanmgrd matching "/usr/bin/vlanmgrd"
if does not exist for 5 times within 5 cycles then alert
check program swss|vlanmgrd with path "/usr/bin/process_checker swss /usr/bin/vlanmgrd"
if status != 0 for 5 times within 5 cycles then alert

check process intfmgrd matching "/usr/bin/intfmgrd"
if does not exist for 5 times within 5 cycles then alert
check program swss|intfmgrd with path "/usr/bin/process_checker swss /usr/bin/intfmgrd"
if status != 0 for 5 times within 5 cycles then alert

check process portmgrd matching "/usr/bin/portmgrd"
if does not exist for 5 times within 5 cycles then alert
check program swss|portmgrd with path "/usr/bin/process_checker swss /usr/bin/portmgrd"
if status != 0 for 5 times within 5 cycles then alert

check process buffermgrd matching "/usr/bin/buffermgrd -l"
if does not exist for 5 times within 5 cycles then alert
check program swss|buffermgrd with path "/usr/bin/process_checker swss /usr/bin/buffermgrd -l"
if status != 0 for 5 times within 5 cycles then alert

check process nbrmgrd matching "/usr/bin/nbrmgrd"
if does not exist for 5 times within 5 cycles then alert
check program swss|nbrmgrd with path "/usr/bin/process_checker swss /usr/bin/nbrmgrd"
if status != 0 for 5 times within 5 cycles then alert

check process vxlanmgrd matching "/usr/bin/vxlanmgrd"
if does not exist for 5 times within 5 cycles then alert
check program swss|vxlanmgrd with path "/usr/bin/process_checker swss /usr/bin/vxlanmgrd"
if status != 0 for 5 times within 5 cycles then alert
4 changes: 2 additions & 2 deletions dockers/docker-sflow/base_image_files/monit_sflow
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@
## process list:
## sflowmgrd
###############################################################################
check process sflowmgrd matching "/usr/bin/sflowmgrd"
if does not exist for 5 times within 5 cycles then alert
check program sflow|sflowmgrd with path "/usr/bin/process_checker sflow /usr/bin/sflowmgrd"
if status != 0 for 5 times within 5 cycles then alert
8 changes: 4 additions & 4 deletions dockers/docker-snmp/base_image_files/monit_snmp
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
## snmpd
## snmpd_subagent
###############################################################################
check process snmpd matching "/usr/sbin/snmpd\s"
if does not exist for 5 times within 5 cycles then alert
check program snmp|snmpd with path "/usr/bin/process_checker snmp /usr/sbin/snmpd"
if status != 0 for 5 times within 5 cycles then alert

check process snmp_subagent matching "python3 -m sonic_ax_impl"
if does not exist for 5 times within 5 cycles then alert
check program snmp|snmp_subagent with path "/usr/bin/process_checker snmp python3.6 -m sonic_ax_impl"
if status != 0 for 5 times within 5 cycles then alert
4 changes: 2 additions & 2 deletions dockers/docker-sonic-restapi/base_image_files/monit_restapi
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@
## process list:
## restapi
###############################################################################
check process restapi matching "/usr/sbin/go-server-server"
if does not exist for 5 times within 5 cycles then alert
check program restapi|restapi with path "/usr/bin/process_checker restapi /usr/sbin/go-server-server"
if status != 0 for 5 times within 5 cycles then alert
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
## telemetry
## dialout_client
###############################################################################
check process telemetry matching "/usr/sbin/telemetry"
if does not exist for 5 times within 5 cycles then alert
check program telemetry|telemetry with path "/usr/bin/process_checker telemetry /usr/sbin/telemetry"
if status != 0 for 5 times within 5 cycles then alert

check process dialout_client matching "/usr/sbin/dialout_client_cli"
if does not exist for 5 times within 5 cycles then alert
check program telemetry|dialout_client with path "/usr/bin/process_checker telemetry /usr/sbin/dialout_client_cli"
if status != 0 for 5 times within 5 cycles then alert
11 changes: 11 additions & 0 deletions dockers/docker-teamd/base_image_files/monit_teamd
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
###############################################################################
## Monit configuration for teamd container
## process list:
## teamsyncd
## teammgrd
###############################################################################
check program teamd|teamsyncd with path "/usr/bin/process_checker teamd /usr/bin/teamsyncd"
if status != 0 for 5 times within 5 cycles then alert

check program teamd|teammgrd with path "/usr/bin/process_checker teamd /usr/bin/teammgrd"
if status != 0 for 5 times within 5 cycles then alert
5 changes: 5 additions & 0 deletions files/build_templates/sonic_debian_extension.j2
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,9 @@ sudo rm -rf $FILESYSTEM_ROOT/$REDIS_DUMP_LOAD_PY2_WHEEL_NAME
# Install Python module for ipaddress
sudo https_proxy=$https_proxy LANG=C chroot $FILESYSTEM_ROOT pip install ipaddress

# Install Python module for psutil
sudo https_proxy=$https_proxy LANG=C chroot $FILESYSTEM_ROOT pip install psutil

# Install SwSS SDK Python 3 package
# Note: the scripts will be overwritten by corresponding Python 2 package
if [ -e {{swsssdk_py3_wheel_path}} ]; then
Expand Down Expand Up @@ -239,6 +242,8 @@ sudo cp $IMAGE_CONFIGS/monit/monitrc $FILESYSTEM_ROOT/etc/monit/
sudo chmod 600 $FILESYSTEM_ROOT/etc/monit/monitrc
sudo cp $IMAGE_CONFIGS/monit/conf.d/* $FILESYSTEM_ROOT/etc/monit/conf.d/
sudo chmod 600 $FILESYSTEM_ROOT/etc/monit/conf.d/*
sudo cp $IMAGE_CONFIGS/monit/process_checker $FILESYSTEM_ROOT/usr/bin/
sudo chmod 755 $FILESYSTEM_ROOT/usr/bin/process_checker

# Copy crontabs
sudo cp -f $IMAGE_CONFIGS/cron.d/* $FILESYSTEM_ROOT/etc/cron.d/
Expand Down
57 changes: 57 additions & 0 deletions files/image_config/monit/process_checker
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
#!/usr/bin/python
import argparse
import sys
import syslog

import psutil
import swsssdk


def check_process_existence(container_name, process_cmdline):
"""
@summary: Check whether the process in the specified container is running or not and
an alerting message will written into syslog if it failed to run.
"""
config_db = swsssdk.ConfigDBConnector()
config_db.connect()
feature_table = config_db.get_table("FEATURE")

if container_name in feature_table.keys():
# We look into the 'FEATURE' table to verify whether the container is disabled or not.
# If the container is diabled, we exit.
if ("state" in feature_table[container_name].keys()
and feature_table[container_name]["state"] == "disabled"):
sys.exit(0)
else:
# We leveraged the psutil library to help us check whether the process is running or not.
# If the process entity is found in process tree and it is also in the 'running' or 'sleeping'
# state, then it will be marked as 'running'.
is_running = False
for process in psutil.process_iter(["cmdline", "status"]):
if ((' '.join(process.cmdline())).startswith(process_cmdline) and process.status() in ["running", "sleeping"]):
is_running = True
break

if not is_running:
# If this script is run by Monit, then the following output will be appneded to
jleveque marked this conversation as resolved.
Show resolved Hide resolved
# Monit's syslog message.
print("'{}' is not running.".format(process_cmdline))
jleveque marked this conversation as resolved.
Show resolved Hide resolved
sys.exit(1)
else:
syslog.syslog(syslog.LOG_ERR, "container '{}' is not included in SONiC image or the given container name is invalid!"
.format(container_name))


def main():
parser = argparse.ArgumentParser(description="Check whether the process in the specified \
container is running and an alerting message will be written into syslog if it \
failed to run.", usage="/usr/bin/process_checker <container_name> <process_cmdline>")
parser.add_argument("container_name", help="container name")
parser.add_argument("process_cmdline", nargs=argparse.REMAINDER, help="process command line")
args = parser.parse_args()

check_process_existence(args.container_name, ' '.join(args.process_cmdline))


if __name__ == '__main__':
main()
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@
## process list:
## syncd
###############################################################################
check process syncd matching "/usr/bin/syncd\s"
if does not exist for 5 times within 5 cycles then alert
check program syncd|syncd with path "/usr/bin/process_checker syncd /usr/bin/syncd"
if status != 0 for 5 times within 5 cycles then alert
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
## syncd
## dsserve
###############################################################################
check process syncd matching "/usr/bin/syncd\s"
if does not exist for 5 times within 5 cycles then alert
check program syncd|syncd with path "/usr/bin/process_checker syncd /usr/bin/syncd"
if status != 0 for 5 times within 5 cycles then alert

check process dsserve matching "/usr/bin/dsserve /usr/bin/syncd"
if does not exist for 5 times within 5 cycles then alert
check program syncd|dsserve with path "/usr/bin/process_checker syncd /usr/bin/dsserve /usr/bin/syncd"
if status != 0 for 5 times within 5 cycles then alert
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@
## process list:
## syncd
###############################################################################
check process syncd matching "/usr/bin/syncd\s"
if does not exist for 5 times within 5 cycles then alert
check program syncd|syncd with path "/usr/bin/process_checker syncd /usr/bin/syncd"
if status != 0 for 5 times within 5 cycles then alert
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@
## process list:
## syncd
###############################################################################
check process syncd matching "/usr/bin/syncd\s"
if does not exist for 5 times within 5 cycles then alert
check program syncd|syncd with path "/usr/bin/process_checker syncd /usr/bin/syncd"
if status != 0 for 5 times within 5 cycles then alert
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@
## process list:
## syncd
###############################################################################
check process syncd matching "/usr/bin/syncd\s"
if does not exist for 5 times within 5 cycles then alert
check program syncd|syncd with path "/usr/bin/process_checker syncd /usr/bin/syncd"
if status != 0 for 5 times within 5 cycles then alert
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@
## process list:
## syncd
###############################################################################
check process syncd matching "/usr/bin/syncd\s"
if does not exist for 5 times within 5 cycles then alert
check program syncd|syncd with path "/usr/bin/process_checker syncd /usr/bin/syncd"
if status != 0 for 5 times within 5 cycles then alert
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@
## process list:
## syncd
###############################################################################
check process syncd matching "/usr/bin/syncd\s"
if does not exist for 5 times within 5 cycles then alert
check program syncd|syncd with path "/usr/bin/process_checker syncd /usr/bin/syncd"
if status != 0 for 5 times within 5 cycles then alert
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@
## process list:
## syncd
###############################################################################
check process syncd matching "/usr/bin/syncd\s"
if does not exist for 5 times within 5 cycles then alert
check program syncd|syncd with path "/usr/bin/process_checker syncd /usr/bin/syncd"
if status != 0 for 5 times within 5 cycles then alert
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
## syncd
## dsserve
###############################################################################
check process syncd matching "/usr/bin/syncd\s"
if does not exist for 5 times within 5 cycles then alert
check program syncd|syncd with path "/usr/bin/process_checker syncd /usr/bin/syncd"
if status != 0 for 5 times within 5 cycles then alert

check process dsserve matching "/usr/bin/dsserve /usr/bin/syncd"
if does not exist for 5 times within 5 cycles then alert
check program syncd|dsserve with path "/usr/bin/process_checker syncd /usr/bin/dsserve /usr/bin/syncd"
if status != 0 for 5 times within 5 cycles then alert
1 change: 1 addition & 0 deletions rules/docker-teamd.mk
Original file line number Diff line number Diff line change
Expand Up @@ -27,4 +27,5 @@ $(DOCKER_TEAMD)_RUN_OPT += -v /etc/sonic:/etc/sonic:ro
$(DOCKER_TEAMD)_RUN_OPT += -v /host/warmboot:/var/warmboot

$(DOCKER_TEAMD)_BASE_IMAGE_FILES += teamdctl:/usr/bin/teamdctl
$(DOCKER_TEAMD)_BASE_IMAGE_FILES += monit_teamd:/etc/monit/conf.d
$(DOCKER_TEAMD)_FILES += $(SUPERVISOR_PROC_EXIT_LISTENER_SCRIPT)