Skip to content

Commit

Permalink
[monit] Unmonitor the processes in containers which are disabled.
Browse files Browse the repository at this point in the history
Signed-off-by: Yong Zhao <[email protected]>
  • Loading branch information
yozhao101 committed Aug 11, 2020
1 parent 2b5e418 commit ad64dc6
Show file tree
Hide file tree
Showing 19 changed files with 139 additions and 72 deletions.
4 changes: 2 additions & 2 deletions dockers/docker-database/base_image_files/monit_database
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@
## process list:
## redis_server
###############################################################################
check process redis_server matching "/usr/bin/redis-server"
if does not exist for 5 times within 5 cycles then alert
check program container_process_redis_server with path "/usr/bin/process_checker database redis-server /usr/bin/redis-server"
if status != 0 for 5 times within 5 cycles then alert
20 changes: 10 additions & 10 deletions dockers/docker-fpm-frr/base_image_files/monit_bgp
Original file line number Diff line number Diff line change
Expand Up @@ -7,17 +7,17 @@
## staticd
## bgpcfgd
###############################################################################
check process zebra matching "/usr/lib/frr/zebra"
if does not exist for 5 times within 5 cycles then alert
check program container_process_zebra with path "/usr/bin/process_checker bgp zebra /usr/lib/frr/zebra"
if status != 0 for 5 times within 5 cycles then alert

check process fpmsyncd matching "fpmsyncd"
if does not exist for 5 times within 5 cycles then alert
check program container_process_fpmsyncd with path "/usr/bin/process_checker bgp fpmsyncd fpmsyncd"
if status != 0 for 5 times within 5 cycles then alert

check process bgpd matching "/usr/lib/frr/bgpd"
if does not exist for 5 times within 5 cycles then alert
check program container_process_bgpd with path "/usr/bin/process_checker bgp bgpd /usr/lib/frr/bgpd"
if status != 0 for 5 times within 5 cycles then alert

check process staticd matching "/usr/lib/frr/staticd"
if does not exist for 5 times within 5 cycles then alert
check program container_process_staticd with path "/usr/bin/process_checker bgp staticd /usr/lib/frr/staticd"
if status != 0 for 5 times within 5 cycles then alert

check process bgpcfgd matching "python /usr/local/bin/bgpcfgd"
if does not exist for 5 times within 5 cycles then alert
check program container_process_bgpcfgd with path "/usr/bin/process_checker bgp bgpcfgd python /usr/local/bin/bgpcfgd"
if status != 0 for 5 times within 5 cycles then alert
12 changes: 6 additions & 6 deletions dockers/docker-lldp/base_image_files/monit_lldp
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,11 @@
## lldp-syncd
## lldpmgrd
###############################################################################
check process lldpd_monitor matching "lldpd: "
if does not exist for 5 times within 5 cycles then alert
check program container_process_lldpd_monitor with path "/usr/bin/process_checker lldp lldpd lldpd:"
if status != 0 for 5 times within 5 cycles then alert

check process lldp_syncd matching "python2 -m lldp_syncd"
if does not exist for 5 times within 5 cycles then alert
check program container_process_lldp_syncd with path "/usr/bin/process_checker lldp lldp_syncd python2 -m lldp_syncd"
if status != 0 for 5 times within 5 cycles then alert

check process lldpmgrd matching "python /usr/bin/lldpmgrd"
if does not exist for 5 times within 5 cycles then alert
check program container_process_lldpmgrd with path "/usr/bin/process_checker lldp lldpmgrd python /usr/bin/lldpmgrd"
if status != 0 for 5 times within 5 cycles then alert
40 changes: 20 additions & 20 deletions dockers/docker-orchagent/base_image_files/monit_swss
Original file line number Diff line number Diff line change
Expand Up @@ -12,32 +12,32 @@
## nbrmgrd
## vxlanmgrd
###############################################################################
check process orchagent matching "/usr/bin/orchagent -d /var/log/swss"
if does not exist for 5 times within 5 cycles then alert
check program container_process_orchagent with path "/usr/bin/process_checker swss orchagent /usr/bin/orchagent -d /var/log/swss"
if status != 0 for 5 times within 5 cycles then alert

check process portsyncd matching "/usr/bin/portsyncd"
if does not exist for 5 times within 5 cycles then alert
check program container_process_portsyncd with path "/usr/bin/process_checker swss portsyncd /usr/bin/portsyncd"
if status != 0 for 5 times within 5 cycles then alert

check process neighsyncd matching "/usr/bin/neighsyncd"
if does not exist for 5 times within 5 cycles then alert
check program container_process_neighsyncd with path "/usr/bin/process_checker swss neighsyncd /usr/bin/neighsyncd"
if status != 0 for 5 times within 5 cycles then alert

check process vrfmgrd matching "/usr/bin/vrfmgrd"
if does not exist for 5 times within 5 cycles then alert
check program container_process_vrfmgrd with path "/usr/bin/process_checker swss vrfmgrd /usr/bin/vrfmgrd"
if status != 0 for 5 times within 5 cycles then alert

check process vlanmgrd matching "/usr/bin/vlanmgrd"
if does not exist for 5 times within 5 cycles then alert
check program container_process_vlanmgrd with path "/usr/bin/process_checker swss vlanmgrd /usr/bin/vlanmgrd"
if status != 0 for 5 times within 5 cycles then alert

check process intfmgrd matching "/usr/bin/intfmgrd"
if does not exist for 5 times within 5 cycles then alert
check program container_process_intfmgrd with path "/usr/bin/process_checker swss intfmgrd /usr/bin/intfmgrd"
if status != 0 for 5 times within 5 cycles then alert

check process portmgrd matching "/usr/bin/portmgrd"
if does not exist for 5 times within 5 cycles then alert
check program container_process_portmgrd with path "/usr/bin/process_checker swss portmgrd /usr/bin/portmgrd"
if status != 0 for 5 times within 5 cycles then alert

check process buffermgrd matching "/usr/bin/buffermgrd -l"
if does not exist for 5 times within 5 cycles then alert
check program container_process_buffermgrd with path "/usr/bin/process_checker swss buffermgrd /usr/bin/buffermgrd -l"
if status != 0 for 5 times within 5 cycles then alert

check process nbrmgrd matching "/usr/bin/nbrmgrd"
if does not exist for 5 times within 5 cycles then alert
check program container_process_nbrmgrd with path "/usr/bin/process_checker swss nbrmgrd /usr/bin/nbrmgrd"
if status != 0 for 5 times within 5 cycles then alert

check process vxlanmgrd matching "/usr/bin/vxlanmgrd"
if does not exist for 5 times within 5 cycles then alert
check program container_process_vxlanmgrd with path "/usr/bin/process_checker swss vxlanmgrd /usr/bin/vxlanmgrd"
if status != 0 for 5 times within 5 cycles then alert
4 changes: 2 additions & 2 deletions dockers/docker-sflow/base_image_files/monit_sflow
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@
## process list:
## sflowmgrd
###############################################################################
check process sflowmgrd matching "/usr/bin/sflowmgrd"
if does not exist for 5 times within 5 cycles then alert
check program container_process_sflowmgrd with path "/usr/bin/process_checker sflow sflowmgrd /usr/bin/sflowmgrd"
if status != 0 for 5 times within 5 cycles then alert
8 changes: 4 additions & 4 deletions dockers/docker-snmp/base_image_files/monit_snmp
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
## snmpd
## snmpd_subagent
###############################################################################
check process snmpd matching "/usr/sbin/snmpd\s"
if does not exist for 5 times within 5 cycles then alert
check program container_process_snmpd with path "/usr/bin/process_checker snmp snmpd /usr/sbin/snmpd"
if status != 0 for 5 times within 5 cycles then alert

check process snmp_subagent matching "python3 -m sonic_ax_impl"
if does not exist for 5 times within 5 cycles then alert
check program container_process_snmp_subagent with path "/usr/bin/process_checker snmp snmp_subagent python3 -m sonic_ax_impl"
if status != 0 for 5 times within 5 cycles then alert
4 changes: 2 additions & 2 deletions dockers/docker-sonic-restapi/base_image_files/monit_restapi
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@
## process list:
## restapi
###############################################################################
check process restapi matching "/usr/sbin/go-server-server"
if does not exist for 5 times within 5 cycles then alert
check program container_process_restapi with path "/usr/bin/process_checker restapi restapi /usr/sbin/go-server-server"
if status != 0 for 5 times within 5 cycles then alert
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
## telemetry
## dialout_client
###############################################################################
check process telemetry matching "/usr/sbin/telemetry"
if does not exist for 5 times within 5 cycles then alert
check program container_process_telemetry with path "/usr/bin/process_checker telemetry telemetry /usr/sbin/telemetry"
if status != 0 for 5 times within 5 cycles then alert

check process dialout_client matching "/usr/sbin/dialout_client_cli"
if does not exist for 5 times within 5 cycles then alert
check program container_process_dialout_client with path "/usr/bin/process_checker telemetry dialout_client /usr/sbin/dialout_client_cli"
if status != 0 for 5 times within 5 cycles then alert
3 changes: 3 additions & 0 deletions files/build_templates/sonic_debian_extension.j2
Original file line number Diff line number Diff line change
Expand Up @@ -228,6 +228,9 @@ sudo cp $IMAGE_CONFIGS/monit/monitrc $FILESYSTEM_ROOT/etc/monit/
sudo chmod 600 $FILESYSTEM_ROOT/etc/monit/monitrc
sudo cp $IMAGE_CONFIGS/monit/conf.d/* $FILESYSTEM_ROOT/etc/monit/conf.d/
sudo chmod 600 $FILESYSTEM_ROOT/etc/monit/conf.d/*
sudo cp $IMAGE_CONFIGS/monit/process_checker $FILESYSTEM_ROOT/usr/bin/
sudo chmod 755 $FILESYSTEM_ROOT/usr/bin/process_checker


# Copy crontabs
sudo cp -f $IMAGE_CONFIGS/cron.d/* $FILESYSTEM_ROOT/etc/cron.d/
Expand Down
64 changes: 64 additions & 0 deletions files/image_config/monit/process_checker
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
#!/usr/bin/python
import argparse
import sys
import syslog

import psutil
import swsssdk


def check_process_existence(container_name, process_name, process_cmdline):
"""
@summary: Check whether the process in the specified container is running or not and
an alerting message will written into syslog if it failed to run.
"""
config_db = swsssdk.ConfigDBConnector()
config_db.connect()
feature_table = config_db.get_table("FEATURE")

if container_name in feature_table.keys():
# We look into the 'FEATURE' table to verify whether the container is disabled or not.
# If the container is diabled, we exit.
if ("state" in feature_table[container_name].keys()
and feature_table[container_name]["state"] == "disabled"):
sys.exit(0)
else:
# We leveraged the psutil library to help us check whether the process is running or not.
# If the process entity is found in process tree and it is also in the 'running' or 'sleeping'
# state, then it will be marked as 'running'.
is_running = False
for process in psutil.process_iter(["name", "cmdline", "status"]):
# The script process_checker has the command line format '/usr/bin/process_checker <container_name>
# <process_name> <process_cmdline>' such as '/usr/bin/process_checker bgp fpmsyncd fpmsyncd'. So
# when using psutil to search process 'fpmsyncd', we should skip the process which ran process_checker
# since it is not 'fpmsyncd' process although 'fpmsyncd' is a sustring of its cmdline.
if process.name() == "process_checker":
continue

if ((process_name == process.name() or process_cmdline in ' '.join(process.cmdline()))
and process.status() in ["running", "sleeping"]):
is_running = True
break

if not is_running:
print("'{}' is not running.".format(process_name))
sys.exit(1)
else:
syslog.syslog(syslog.LOG_ERR, "contianer '{}' is not included in SONiC image or the given container name is invalid!"
.format(container_name))


def main():
parser = argparse.ArgumentParser(description="Check whether the process in the specified \
container is running and an alerting message will be written into syslog if it \
failed to run.", usage="/usr/bin/process_checker <container_name> <process_name> <process_cmdline>")
parser.add_argument("container_name", help="container name")
parser.add_argument("process_name", help="process name")
parser.add_argument("process_cmdline", nargs=argparse.REMAINDER, help="process name")
args = parser.parse_args()

check_process_existence(args.container_name, args.process_name, ' '.join(args.process_cmdline))


if __name__ == '__main__':
main()
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@
## process list:
## syncd
###############################################################################
check process syncd matching "/usr/bin/syncd\s"
if does not exist for 5 times within 5 cycles then alert
check program container_process_syncd with path "/usr/bin/process_checker syncd syncd /usr/bin/syncd --diag"
if status != 0 for 5 times within 5 cycles then alert
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
## syncd
## dsserve
###############################################################################
check process syncd matching "/usr/bin/syncd\s"
if does not exist for 5 times within 5 cycles then alert
check program container_process_syncd with path "/usr/bin/process_checker syncd syncd /usr/bin/syncd --diag"
if status != 0 for 5 times within 5 cycles then alert

check process dsserve matching "/usr/bin/dsserve /usr/bin/syncd"
if does not exist for 5 times within 5 cycles then alert
check program container_process_dsserve with path "/usr/bin/process_checker syncd dsserve /usr/bin/dsserve /usr/bin/syncd"
if status != 0 for 5 times within 5 cycles then alert
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@
## process list:
## syncd
###############################################################################
check process syncd matching "/usr/bin/syncd\s"
if does not exist for 5 times within 5 cycles then alert
check program container_process_syncd with path "/usr/bin/process_checker syncd syncd /usr/bin/syncd --diag"
if status != 0 for 5 times within 5 cycles then alert
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@
## process list:
## syncd
###############################################################################
check process syncd matching "/usr/bin/syncd\s"
if does not exist for 5 times within 5 cycles then alert
check program container_process_syncd with path "/usr/bin/process_checker syncd syncd /usr/bin/syncd --diag"
if status != 0 for 5 times within 5 cycles then alert
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@
## process list:
## syncd
###############################################################################
check process syncd matching "/usr/bin/syncd\s"
if does not exist for 5 times within 5 cycles then alert
check program container_process_syncd with path "/usr/bin/process_checker syncd syncd /usr/bin/syncd --diag"
if status != 0 for 5 times within 5 cycles then alert
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@
## process list:
## syncd
###############################################################################
check process syncd matching "/usr/bin/syncd\s"
if does not exist for 5 times within 5 cycles then alert
check program container_process_syncd with path "/usr/bin/process_checker syncd syncd /usr/bin/syncd --diag"
if status != 0 for 5 times within 5 cycles then alert
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@
## process list:
## syncd
###############################################################################
check process syncd matching "/usr/bin/syncd\s"
if does not exist for 5 times within 5 cycles then alert
check program container_process_syncd with path "/usr/bin/process_checker syncd syncd /usr/bin/syncd --diag"
if status != 0 for 5 times within 5 cycles then alert
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,5 @@
## process list:
## syncd
###############################################################################
check process syncd matching "/usr/bin/syncd\s"
if does not exist for 5 times within 5 cycles then alert
check program container_process_syncd with path "/usr/bin/process_checker syncd syncd /usr/bin/syncd --diag"
if status != 0 for 5 times within 5 cycles then alert
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@
## syncd
## dsserve
###############################################################################
check process syncd matching "/usr/bin/syncd\s"
if does not exist for 5 times within 5 cycles then alert
check program container_process_syncd with path "/usr/bin/process_checker syncd syncd /usr/bin/syncd --diag"
if status != 0 for 5 times within 5 cycles then alert

check process dsserve matching "/usr/bin/dsserve /usr/bin/syncd"
if does not exist for 5 times within 5 cycles then alert
check program container_process_dsserve with path "/usr/bin/process_checker syncd dsserve /usr/bin/dsserve /usr/bin/syncd"
if status != 0 for 5 times within 5 cycles then alert

0 comments on commit ad64dc6

Please sign in to comment.