From 39836392cfc314cbdeba11e40c10f570eb863253 Mon Sep 17 00:00:00 2001 From: Abhishek Dosi Date: Mon, 26 Oct 2020 12:00:03 -0700 Subject: [PATCH 1/6] Adding patch to monit to make sure we alert/syslog is handle for clause like this: if status != 0 for x cycle then alert repeat every y cycle. With above clause error syslog will be generated after x cycle and for every yth cycle if error is persistent Signed-off-by: Abhishek Dosi --- .../0002-change_monit_alert_log_error.patch | 71 +++++++++++++++++++ src/monit/patch/series | 1 + 2 files changed, 72 insertions(+) create mode 100644 src/monit/patch/0002-change_monit_alert_log_error.patch diff --git a/src/monit/patch/0002-change_monit_alert_log_error.patch b/src/monit/patch/0002-change_monit_alert_log_error.patch new file mode 100644 index 000000000000..d867335e18ad --- /dev/null +++ b/src/monit/patch/0002-change_monit_alert_log_error.patch @@ -0,0 +1,71 @@ +From 97a5defc6a7fcc6a00f691bb5314ceb8fb7704e9 Mon Sep 17 00:00:00 2001 +From: Abhishek Dosi +Date: Mon, 26 Oct 2020 11:40:02 -0700 +Subject: [PATCH] Patch on top of commit Patch is addressing these changes:- + +a) Enable repeat keyword for alert action . This was we can log +syslog error message for continuous failure condition. + +b) Make sure log and error message are different so that we do not rate +limit error message with log message + +c) Make sure error message is loggged is state change to fail first time +or we have repeat clause for alert + +Signed-off-by: Abhishek Dosi + +Signed-off-by: Abhishek Dosi +--- + src/event.c | 6 +++++- + src/p.y | 8 +++++++- + 2 files changed, 12 insertions(+), 2 deletions(-) + +diff --git a/src/event.c b/src/event.c +index ed363ee..969568f 100644 +--- a/src/event.c ++++ b/src/event.c +@@ -336,8 +336,12 @@ static void _handleEvent(Service_T S, Event_T E) { + if (E->state != State_Init || E->state_map & 0x1) { + if (E->state == State_Succeeded || E->state == State_ChangedNot || E->id == Event_Instance || E->id == Event_Action) + LogInfo("'%s' %s\n", S->name, E->message); +- else ++ /* Send Error log if state change to failed for 1st time or if we have repeat clause then do periodically */ ++ else if ((E->state_changed) || (E->state == State_Failed && E->action->failed->repeat && E->count % E->action->failed->repeat == 0)) + LogError("'%s' %s\n", S->name, E->message); ++ else ++ /* Here we are making log message differnt so that we do not rate-limit Err and Info in same bucket*/ ++ LogInfo("'%s' %s for information\n", S->name, E->message); + } + if (E->state == State_Init) + return; +diff --git a/src/p.y b/src/p.y +index a57807d..b46b1a1 100644 +--- a/src/p.y ++++ b/src/p.y +@@ -2250,9 +2250,12 @@ repeat : /* EMPTY */ { + } + ; + +-action : ALERT { ++action : ALERT repeat{ + $$ = Action_Alert; + } ++ | ALERT { ++ $$ = Action_Alert; ++ } + | EXEC argumentlist repeat { + $$ = Action_Exec; + } +@@ -2281,6 +2284,9 @@ action1 : action { + repeat = 0; + command1 = command; + command = NULL; ++ } else if ($1 == Action_Alert) { ++ repeat1 = repeat; ++ repeat = 0; + } + } + ; +-- +2.17.1 + diff --git a/src/monit/patch/series b/src/monit/patch/series index 15fcdd50c8a5..f5534d0f554f 100644 --- a/src/monit/patch/series +++ b/src/monit/patch/series @@ -1,2 +1,3 @@ # This series applies on GIT commit dc9bc1c949125140d967edfc598dfad47eedc552 0001-used_system_memory_sysdep-Use-MemAvailable-value-if-.patch +0002-change_monit_alert_log_error.patch From 22bee20fe798359d16084aad1605dddd60593050 Mon Sep 17 00:00:00 2001 From: Abhishek Dosi Date: Mon, 26 Oct 2020 13:10:49 -0700 Subject: [PATCH 2/6] Address Review Comments. Signed-off-by: Abhishek Dosi --- .../patch/0002-change_monit_alert_log_error.patch | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/src/monit/patch/0002-change_monit_alert_log_error.patch b/src/monit/patch/0002-change_monit_alert_log_error.patch index d867335e18ad..884c9ff3614f 100644 --- a/src/monit/patch/0002-change_monit_alert_log_error.patch +++ b/src/monit/patch/0002-change_monit_alert_log_error.patch @@ -3,18 +3,17 @@ From: Abhishek Dosi Date: Mon, 26 Oct 2020 11:40:02 -0700 Subject: [PATCH] Patch on top of commit Patch is addressing these changes:- -a) Enable repeat keyword for alert action . This was we can log -syslog error message for continuous failure condition. +a) Make sure error message is loggged if state is changed to fail first time based on fault tolerance condition +or we have repeat clause enable for alert -b) Make sure log and error message are different so that we do not rate -limit error message with log message +b) Enable repeat keyword for alert action . Using this we can log +syslog error message for persistent failures in repeat window. -c) Make sure error message is loggged is state change to fail first time -or we have repeat clause for alert +c) Make sure log and error message are different string so that we do not rate +limit error message with log message Signed-off-by: Abhishek Dosi -Signed-off-by: Abhishek Dosi --- src/event.c | 6 +++++- src/p.y | 8 +++++++- @@ -33,7 +32,7 @@ index ed363ee..969568f 100644 + else if ((E->state_changed) || (E->state == State_Failed && E->action->failed->repeat && E->count % E->action->failed->repeat == 0)) LogError("'%s' %s\n", S->name, E->message); + else -+ /* Here we are making log message differnt so that we do not rate-limit Err and Info in same bucket*/ ++ /* Here we are making log message different so that we do not rate-limit Err and Info in same bucket*/ + LogInfo("'%s' %s for information\n", S->name, E->message); } if (E->state == State_Init) From b072551d72bcb647dcf6cddf9981facb08eca03f Mon Sep 17 00:00:00 2001 From: Abhishek Dosi Date: Mon, 26 Oct 2020 14:21:14 -0700 Subject: [PATCH 3/6] Review comments Signed-off-by: Abhishek Dosi --- src/monit/patch/0002-change_monit_alert_log_error.patch | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/monit/patch/0002-change_monit_alert_log_error.patch b/src/monit/patch/0002-change_monit_alert_log_error.patch index 884c9ff3614f..0a86983a3132 100644 --- a/src/monit/patch/0002-change_monit_alert_log_error.patch +++ b/src/monit/patch/0002-change_monit_alert_log_error.patch @@ -4,13 +4,13 @@ Date: Mon, 26 Oct 2020 11:40:02 -0700 Subject: [PATCH] Patch on top of commit Patch is addressing these changes:- a) Make sure error message is loggged if state is changed to fail first time based on fault tolerance condition -or we have repeat clause enable for alert +or we have repeat clause enable for alert. -b) Enable repeat keyword for alert action . Using this we can log +b) Enable repeat keyword for alert action. Using this we can log syslog error message for persistent failures in repeat window. c) Make sure log and error message are different string so that we do not rate -limit error message with log message +limit error message with log message. Signed-off-by: Abhishek Dosi From 2ae70a29f8284f12d7853417335194559f5630c2 Mon Sep 17 00:00:00 2001 From: Abhishek Dosi Date: Thu, 29 Oct 2020 12:20:53 -0700 Subject: [PATCH 4/6] Address the review comments to remove info message. --- .../0002-change_monit_alert_log_error.patch | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/src/monit/patch/0002-change_monit_alert_log_error.patch b/src/monit/patch/0002-change_monit_alert_log_error.patch index 0a86983a3132..1e43078e6215 100644 --- a/src/monit/patch/0002-change_monit_alert_log_error.patch +++ b/src/monit/patch/0002-change_monit_alert_log_error.patch @@ -3,14 +3,11 @@ From: Abhishek Dosi Date: Mon, 26 Oct 2020 11:40:02 -0700 Subject: [PATCH] Patch on top of commit Patch is addressing these changes:- -a) Make sure error message is loggged if state is changed to fail first time based on fault tolerance condition -or we have repeat clause enable for alert. +a) Enable repeat keyword for alert action . Using this we can log +syslog error message for persistent failure condition -b) Enable repeat keyword for alert action. Using this we can log -syslog error message for persistent failures in repeat window. - -c) Make sure log and error message are different string so that we do not rate -limit error message with log message. +b) Make sure error message is loggged if state is changed to fail first time (fault tolerance condition) +or we have repeat clause for alert Signed-off-by: Abhishek Dosi @@ -20,10 +17,10 @@ Signed-off-by: Abhishek Dosi 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/src/event.c b/src/event.c -index ed363ee..969568f 100644 +index ed363ee..9d08fc0 100644 --- a/src/event.c +++ b/src/event.c -@@ -336,8 +336,12 @@ static void _handleEvent(Service_T S, Event_T E) { +@@ -336,7 +336,8 @@ static void _handleEvent(Service_T S, Event_T E) { if (E->state != State_Init || E->state_map & 0x1) { if (E->state == State_Succeeded || E->state == State_ChangedNot || E->id == Event_Instance || E->id == Event_Action) LogInfo("'%s' %s\n", S->name, E->message); @@ -31,9 +28,6 @@ index ed363ee..969568f 100644 + /* Send Error log if state change to failed for 1st time or if we have repeat clause then do periodically */ + else if ((E->state_changed) || (E->state == State_Failed && E->action->failed->repeat && E->count % E->action->failed->repeat == 0)) LogError("'%s' %s\n", S->name, E->message); -+ else -+ /* Here we are making log message different so that we do not rate-limit Err and Info in same bucket*/ -+ LogInfo("'%s' %s for information\n", S->name, E->message); } if (E->state == State_Init) return; From 1d11abf404b98ebe62ccb2de873c1d516bb70b75 Mon Sep 17 00:00:00 2001 From: Abhishek Dosi Date: Thu, 29 Oct 2020 15:20:52 -0700 Subject: [PATCH 5/6] Updated monit config gile to add repeat clause. Signed-off-by: Abhishek Dosi --- .../base_image_files/monit_database | 2 +- .../docker-fpm-frr/base_image_files/monit_bgp | 12 +++++------ .../docker-lldp/base_image_files/monit_lldp | 6 +++--- .../base_image_files/monit_swss | 20 +++++++++---------- .../docker-sflow/base_image_files/monit_sflow | 2 +- .../docker-snmp/base_image_files/monit_snmp | 4 ++-- .../base_image_files/monit_restapi | 2 +- .../base_image_files/monit_telemetry | 4 ++-- .../docker-teamd/base_image_files/monit_teamd | 4 ++-- files/image_config/monit/conf.d/sonic-host | 13 ++++++------ .../base_image_files/monit_syncd | 2 +- .../base_image_files/monit_syncd | 4 ++-- .../base_image_files/monit_syncd | 2 +- .../base_image_files/monit_syncd | 2 +- .../base_image_files/monit_syncd | 2 +- .../base_image_files/monit_syncd | 2 +- .../base_image_files/monit_syncd | 2 +- .../base_image_files/monit_syncd | 2 +- .../base_image_files/monit_syncd | 4 ++-- 19 files changed, 46 insertions(+), 45 deletions(-) diff --git a/dockers/docker-database/base_image_files/monit_database b/dockers/docker-database/base_image_files/monit_database index c1addd8a6f05..fd871279e0bd 100644 --- a/dockers/docker-database/base_image_files/monit_database +++ b/dockers/docker-database/base_image_files/monit_database @@ -4,4 +4,4 @@ ## redis_server ############################################################################### check program database|redis_server with path "/usr/bin/process_checker database /usr/bin/redis-server" - if status != 0 for 5 times within 5 cycles then alert + if status != 0 for 5 times within 5 cycles then alert repeat every 5 cycle diff --git a/dockers/docker-fpm-frr/base_image_files/monit_bgp b/dockers/docker-fpm-frr/base_image_files/monit_bgp index 4567d45e3c48..f87014c10e80 100644 --- a/dockers/docker-fpm-frr/base_image_files/monit_bgp +++ b/dockers/docker-fpm-frr/base_image_files/monit_bgp @@ -9,19 +9,19 @@ ## bgpmon ############################################################################### check program bgp|zebra with path "/usr/bin/process_checker bgp /usr/lib/frr/zebra" - if status != 0 for 5 times within 5 cycles then alert + if status != 0 for 5 times within 5 cycles then alert repeat every 5 cycle check program bgp|fpmsyncd with path "/usr/bin/process_checker bgp fpmsyncd" - if status != 0 for 5 times within 5 cycles then alert + if status != 0 for 5 times within 5 cycles then alert repeat every 5 cycle check program bgp|bgpd with path "/usr/bin/process_checker bgp /usr/lib/frr/bgpd" - if status != 0 for 5 times within 5 cycles then alert + if status != 0 for 5 times within 5 cycles then alert repeat every 5 cycle check program bgp|staticd with path "/usr/bin/process_checker bgp /usr/lib/frr/staticd" - if status != 0 for 5 times within 5 cycles then alert + if status != 0 for 5 times within 5 cycles then alert repeat every 5 cycle check program bgp|bgpcfgd with path "/usr/bin/process_checker bgp /usr/bin/python /usr/local/bin/bgpcfgd" - if status != 0 for 5 times within 5 cycles then alert + if status != 0 for 5 times within 5 cycles then alert repeat every 5 cycle check program bgp|bgpmon with path "/usr/bin/process_checker bgp /usr/bin/python /usr/local/bin/bgpmon" - if status != 0 for 5 times within 5 cycles then alert + if status != 0 for 5 times within 5 cycles then alert repeat every 5 cycle diff --git a/dockers/docker-lldp/base_image_files/monit_lldp b/dockers/docker-lldp/base_image_files/monit_lldp index 194fa14a3088..65f341b718b1 100644 --- a/dockers/docker-lldp/base_image_files/monit_lldp +++ b/dockers/docker-lldp/base_image_files/monit_lldp @@ -6,10 +6,10 @@ ## lldpmgrd ############################################################################### check program lldp|lldpd_monitor with path "/usr/bin/process_checker lldp lldpd:" - if status != 0 for 5 times within 5 cycles then alert + if status != 0 for 5 times within 5 cycles then alert repeat every 5 cycle check program lldp|lldp_syncd with path "/usr/bin/process_checker lldp python2 -m lldp_syncd" - if status != 0 for 5 times within 5 cycles then alert + if status != 0 for 5 times within 5 cycles then alert repeat every 5 cycle check program lldp|lldpmgrd with path "/usr/bin/process_checker lldp python /usr/bin/lldpmgrd" - if status != 0 for 5 times within 5 cycles then alert + if status != 0 for 5 times within 5 cycles then alert repeat every 5 cycle diff --git a/dockers/docker-orchagent/base_image_files/monit_swss b/dockers/docker-orchagent/base_image_files/monit_swss index f5f4389f3fe4..982643bf96ca 100644 --- a/dockers/docker-orchagent/base_image_files/monit_swss +++ b/dockers/docker-orchagent/base_image_files/monit_swss @@ -13,31 +13,31 @@ ## vxlanmgrd ############################################################################## check program swss|orchagent with path "/usr/bin/process_checker swss /usr/bin/orchagent -d /var/log/swss" - if status != 0 for 5 times within 5 cycles then alert + if status != 0 for 5 times within 5 cycles then alert repeat every 5 cycle check program swss|portsyncd with path "/usr/bin/process_checker swss /usr/bin/portsyncd" - if status != 0 for 5 times within 5 cycles then alert + if status != 0 for 5 times within 5 cycles then alert repeat every 5 cycle check program swss|neighsyncd with path "/usr/bin/process_checker swss /usr/bin/neighsyncd" - if status != 0 for 5 times within 5 cycles then alert + if status != 0 for 5 times within 5 cycles then alert repeat every 5 cycle check program swss|vrfmgrd with path "/usr/bin/process_checker swss /usr/bin/vrfmgrd" - if status != 0 for 5 times within 5 cycles then alert + if status != 0 for 5 times within 5 cycles then alert repeat every 5 cycle check program swss|vlanmgrd with path "/usr/bin/process_checker swss /usr/bin/vlanmgrd" - if status != 0 for 5 times within 5 cycles then alert + if status != 0 for 5 times within 5 cycles then alert repeat every 5 cycle check program swss|intfmgrd with path "/usr/bin/process_checker swss /usr/bin/intfmgrd" - if status != 0 for 5 times within 5 cycles then alert + if status != 0 for 5 times within 5 cycles then alert repeat every 5 cycle check program swss|portmgrd with path "/usr/bin/process_checker swss /usr/bin/portmgrd" - if status != 0 for 5 times within 5 cycles then alert + if status != 0 for 5 times within 5 cycles then alert repeat every 5 cycle check program swss|buffermgrd with path "/usr/bin/process_checker swss /usr/bin/buffermgrd -l" - if status != 0 for 5 times within 5 cycles then alert + if status != 0 for 5 times within 5 cycles then alert repeat every 5 cycle check program swss|nbrmgrd with path "/usr/bin/process_checker swss /usr/bin/nbrmgrd" - if status != 0 for 5 times within 5 cycles then alert + if status != 0 for 5 times within 5 cycles then alert repeat every 5 cycle check program swss|vxlanmgrd with path "/usr/bin/process_checker swss /usr/bin/vxlanmgrd" - if status != 0 for 5 times within 5 cycles then alert + if status != 0 for 5 times within 5 cycles then alert repeat every 5 cycle diff --git a/dockers/docker-sflow/base_image_files/monit_sflow b/dockers/docker-sflow/base_image_files/monit_sflow index 217f2e625835..bc7e5f6b423d 100644 --- a/dockers/docker-sflow/base_image_files/monit_sflow +++ b/dockers/docker-sflow/base_image_files/monit_sflow @@ -4,4 +4,4 @@ ## sflowmgrd ############################################################################### check program sflow|sflowmgrd with path "/usr/bin/process_checker sflow /usr/bin/sflowmgrd" - if status != 0 for 5 times within 5 cycles then alert + if status != 0 for 5 times within 5 cycles then alert repeat every 5 cycle diff --git a/dockers/docker-snmp/base_image_files/monit_snmp b/dockers/docker-snmp/base_image_files/monit_snmp index b1725378c0b8..bab9a997ee50 100644 --- a/dockers/docker-snmp/base_image_files/monit_snmp +++ b/dockers/docker-snmp/base_image_files/monit_snmp @@ -5,7 +5,7 @@ ## snmpd_subagent ############################################################################### check program snmp|snmpd with path "/usr/bin/process_checker snmp /usr/sbin/snmpd" - if status != 0 for 5 times within 5 cycles then alert + if status != 0 for 5 times within 5 cycles then alert repeat every 5 cycle check program snmp|snmp_subagent with path "/usr/bin/process_checker snmp python3 -m sonic_ax_impl" - if status != 0 for 5 times within 5 cycles then alert + if status != 0 for 5 times within 5 cycles then alert repeat every 5 cycle diff --git a/dockers/docker-sonic-restapi/base_image_files/monit_restapi b/dockers/docker-sonic-restapi/base_image_files/monit_restapi index 84e4366f4ac5..fd2ebc6c56ce 100644 --- a/dockers/docker-sonic-restapi/base_image_files/monit_restapi +++ b/dockers/docker-sonic-restapi/base_image_files/monit_restapi @@ -4,4 +4,4 @@ ## restapi ############################################################################### check program restapi|restapi with path "/usr/bin/process_checker restapi /usr/sbin/go-server-server" - if status != 0 for 5 times within 5 cycles then alert + if status != 0 for 5 times within 5 cycles then alert repeat every 5 cycle diff --git a/dockers/docker-sonic-telemetry/base_image_files/monit_telemetry b/dockers/docker-sonic-telemetry/base_image_files/monit_telemetry index 7365ce51d1fd..590ca77d6880 100644 --- a/dockers/docker-sonic-telemetry/base_image_files/monit_telemetry +++ b/dockers/docker-sonic-telemetry/base_image_files/monit_telemetry @@ -5,7 +5,7 @@ ## dialout_client ############################################################################### check program telemetry|telemetry with path "/usr/bin/process_checker telemetry /usr/sbin/telemetry" - if status != 0 for 5 times within 5 cycles then alert + if status != 0 for 5 times within 5 cycles then alert repeat every 5 cycle check program telemetry|dialout_client with path "/usr/bin/process_checker telemetry /usr/sbin/dialout_client_cli" - if status != 0 for 5 times within 5 cycles then alert + if status != 0 for 5 times within 5 cycles then alert repeat every 5 cycle diff --git a/dockers/docker-teamd/base_image_files/monit_teamd b/dockers/docker-teamd/base_image_files/monit_teamd index 256482aef2bf..8a5853d7266c 100644 --- a/dockers/docker-teamd/base_image_files/monit_teamd +++ b/dockers/docker-teamd/base_image_files/monit_teamd @@ -5,7 +5,7 @@ ## teammgrd ############################################################################### check program teamd|teamsyncd with path "/usr/bin/process_checker teamd /usr/bin/teamsyncd" - if status != 0 for 5 times within 5 cycles then alert + if status != 0 for 5 times within 5 cycles then alert repeat every 5 cycle check program teamd|teammgrd with path "/usr/bin/process_checker teamd /usr/bin/teammgrd" - if status != 0 for 5 times within 5 cycles then alert + if status != 0 for 5 times within 5 cycles then alert repeat every 5 cycle diff --git a/files/image_config/monit/conf.d/sonic-host b/files/image_config/monit/conf.d/sonic-host index 3fd313e24bab..0fa89f237796 100644 --- a/files/image_config/monit/conf.d/sonic-host +++ b/files/image_config/monit/conf.d/sonic-host @@ -6,15 +6,15 @@ ############################################################################### check filesystem root-overlay with path / - if space usage > 90% for 10 times within 20 cycles then alert + if space usage > 90% for 10 times within 20 cycles then alert repeat every 10 cycle check filesystem var-log with path /var/log - if space usage > 90% for 10 times within 20 cycles then alert + if space usage > 90% for 10 times within 20 cycles then alert repeat every 10 cycle check system $HOST - if memory usage > 90% for 10 times within 20 cycles then alert - if cpu usage (user) > 90% for 10 times within 20 cycles then alert - if cpu usage (system) > 90% for 10 times within 20 cycles then alert + if memory usage > 90% for 10 times within 20 cycles then alert repeat every 10 cycle + if cpu usage (user) > 90% for 10 times within 20 cycles then alert repeat every 10 cycle + if cpu usage (system) > 90% for 10 times within 20 cycles then alert repeat every 10 cycle check process rsyslog with pidfile /var/run/rsyslogd.pid start program = "/bin/systemctl start rsyslog.service" @@ -29,4 +29,5 @@ check process rsyslog with pidfile /var/run/rsyslogd.pid # check program routeCheck with path "/usr/local/bin/route_check.py" every 5 cycles - if status != 0 then alert + if status != 0 for 3 cycle then alert repeat every 5 cycle + diff --git a/platform/barefoot/docker-syncd-bfn/base_image_files/monit_syncd b/platform/barefoot/docker-syncd-bfn/base_image_files/monit_syncd index 14789c67c3b8..43509a413915 100644 --- a/platform/barefoot/docker-syncd-bfn/base_image_files/monit_syncd +++ b/platform/barefoot/docker-syncd-bfn/base_image_files/monit_syncd @@ -4,4 +4,4 @@ ## syncd ############################################################################### check program syncd|syncd with path "/usr/bin/process_checker syncd /usr/bin/syncd" - if status != 0 for 5 times within 5 cycles then alert + if status != 0 for 5 times within 5 cycles then alert repeat every 5 cycle diff --git a/platform/broadcom/docker-syncd-brcm/base_image_files/monit_syncd b/platform/broadcom/docker-syncd-brcm/base_image_files/monit_syncd index 119548770096..2cc48afd6049 100644 --- a/platform/broadcom/docker-syncd-brcm/base_image_files/monit_syncd +++ b/platform/broadcom/docker-syncd-brcm/base_image_files/monit_syncd @@ -5,7 +5,7 @@ ## dsserve ############################################################################### check program syncd|syncd with path "/usr/bin/process_checker syncd /usr/bin/syncd" - if status != 0 for 5 times within 5 cycles then alert + if status != 0 for 5 times within 5 cycles then alert repeat every 5 cycle check program syncd|dsserve with path "/usr/bin/process_checker syncd /usr/bin/dsserve /usr/bin/syncd" - if status != 0 for 5 times within 5 cycles then alert + if status != 0 for 5 times within 5 cycles then alert repeat every 5 cycle diff --git a/platform/cavium/docker-syncd-cavm/base_image_files/monit_syncd b/platform/cavium/docker-syncd-cavm/base_image_files/monit_syncd index 14789c67c3b8..43509a413915 100644 --- a/platform/cavium/docker-syncd-cavm/base_image_files/monit_syncd +++ b/platform/cavium/docker-syncd-cavm/base_image_files/monit_syncd @@ -4,4 +4,4 @@ ## syncd ############################################################################### check program syncd|syncd with path "/usr/bin/process_checker syncd /usr/bin/syncd" - if status != 0 for 5 times within 5 cycles then alert + if status != 0 for 5 times within 5 cycles then alert repeat every 5 cycle diff --git a/platform/centec/docker-syncd-centec/base_image_files/monit_syncd b/platform/centec/docker-syncd-centec/base_image_files/monit_syncd index 14789c67c3b8..43509a413915 100644 --- a/platform/centec/docker-syncd-centec/base_image_files/monit_syncd +++ b/platform/centec/docker-syncd-centec/base_image_files/monit_syncd @@ -4,4 +4,4 @@ ## syncd ############################################################################### check program syncd|syncd with path "/usr/bin/process_checker syncd /usr/bin/syncd" - if status != 0 for 5 times within 5 cycles then alert + if status != 0 for 5 times within 5 cycles then alert repeat every 5 cycle diff --git a/platform/marvell-arm64/docker-syncd-mrvl/base_image_files/monit_syncd b/platform/marvell-arm64/docker-syncd-mrvl/base_image_files/monit_syncd index 14789c67c3b8..43509a413915 100644 --- a/platform/marvell-arm64/docker-syncd-mrvl/base_image_files/monit_syncd +++ b/platform/marvell-arm64/docker-syncd-mrvl/base_image_files/monit_syncd @@ -4,4 +4,4 @@ ## syncd ############################################################################### check program syncd|syncd with path "/usr/bin/process_checker syncd /usr/bin/syncd" - if status != 0 for 5 times within 5 cycles then alert + if status != 0 for 5 times within 5 cycles then alert repeat every 5 cycle diff --git a/platform/marvell-armhf/docker-syncd-mrvl/base_image_files/monit_syncd b/platform/marvell-armhf/docker-syncd-mrvl/base_image_files/monit_syncd index 14789c67c3b8..43509a413915 100644 --- a/platform/marvell-armhf/docker-syncd-mrvl/base_image_files/monit_syncd +++ b/platform/marvell-armhf/docker-syncd-mrvl/base_image_files/monit_syncd @@ -4,4 +4,4 @@ ## syncd ############################################################################### check program syncd|syncd with path "/usr/bin/process_checker syncd /usr/bin/syncd" - if status != 0 for 5 times within 5 cycles then alert + if status != 0 for 5 times within 5 cycles then alert repeat every 5 cycle diff --git a/platform/marvell/docker-syncd-mrvl/base_image_files/monit_syncd b/platform/marvell/docker-syncd-mrvl/base_image_files/monit_syncd index 14789c67c3b8..43509a413915 100644 --- a/platform/marvell/docker-syncd-mrvl/base_image_files/monit_syncd +++ b/platform/marvell/docker-syncd-mrvl/base_image_files/monit_syncd @@ -4,4 +4,4 @@ ## syncd ############################################################################### check program syncd|syncd with path "/usr/bin/process_checker syncd /usr/bin/syncd" - if status != 0 for 5 times within 5 cycles then alert + if status != 0 for 5 times within 5 cycles then alert repeat every 5 cycle diff --git a/platform/mellanox/docker-syncd-mlnx/base_image_files/monit_syncd b/platform/mellanox/docker-syncd-mlnx/base_image_files/monit_syncd index 14789c67c3b8..43509a413915 100644 --- a/platform/mellanox/docker-syncd-mlnx/base_image_files/monit_syncd +++ b/platform/mellanox/docker-syncd-mlnx/base_image_files/monit_syncd @@ -4,4 +4,4 @@ ## syncd ############################################################################### check program syncd|syncd with path "/usr/bin/process_checker syncd /usr/bin/syncd" - if status != 0 for 5 times within 5 cycles then alert + if status != 0 for 5 times within 5 cycles then alert repeat every 5 cycle diff --git a/platform/nephos/docker-syncd-nephos/base_image_files/monit_syncd b/platform/nephos/docker-syncd-nephos/base_image_files/monit_syncd index 119548770096..2cc48afd6049 100644 --- a/platform/nephos/docker-syncd-nephos/base_image_files/monit_syncd +++ b/platform/nephos/docker-syncd-nephos/base_image_files/monit_syncd @@ -5,7 +5,7 @@ ## dsserve ############################################################################### check program syncd|syncd with path "/usr/bin/process_checker syncd /usr/bin/syncd" - if status != 0 for 5 times within 5 cycles then alert + if status != 0 for 5 times within 5 cycles then alert repeat every 5 cycle check program syncd|dsserve with path "/usr/bin/process_checker syncd /usr/bin/dsserve /usr/bin/syncd" - if status != 0 for 5 times within 5 cycles then alert + if status != 0 for 5 times within 5 cycles then alert repeat every 5 cycle From e69fe803bcb73a4a511c5a39393d1b35d47280e5 Mon Sep 17 00:00:00 2001 From: Abhishek Dosi Date: Thu, 29 Oct 2020 18:01:16 -0700 Subject: [PATCH 6/6] Address Review comments Signed-off-by: Abhishek Dosi --- .../base_image_files/monit_database | 2 +- .../docker-fpm-frr/base_image_files/monit_bgp | 12 +++++------ .../docker-lldp/base_image_files/monit_lldp | 6 +++--- .../base_image_files/monit_swss | 20 +++++++++---------- .../docker-sflow/base_image_files/monit_sflow | 2 +- .../docker-snmp/base_image_files/monit_snmp | 4 ++-- .../base_image_files/monit_restapi | 2 +- .../base_image_files/monit_telemetry | 4 ++-- .../docker-teamd/base_image_files/monit_teamd | 4 ++-- files/image_config/monit/conf.d/sonic-host | 12 +++++------ .../base_image_files/monit_syncd | 2 +- .../base_image_files/monit_syncd | 4 ++-- .../base_image_files/monit_syncd | 2 +- .../base_image_files/monit_syncd | 2 +- .../base_image_files/monit_syncd | 2 +- .../base_image_files/monit_syncd | 2 +- .../base_image_files/monit_syncd | 2 +- .../base_image_files/monit_syncd | 2 +- .../base_image_files/monit_syncd | 4 ++-- 19 files changed, 45 insertions(+), 45 deletions(-) diff --git a/dockers/docker-database/base_image_files/monit_database b/dockers/docker-database/base_image_files/monit_database index fd871279e0bd..47c9d1b2d47f 100644 --- a/dockers/docker-database/base_image_files/monit_database +++ b/dockers/docker-database/base_image_files/monit_database @@ -4,4 +4,4 @@ ## redis_server ############################################################################### check program database|redis_server with path "/usr/bin/process_checker database /usr/bin/redis-server" - if status != 0 for 5 times within 5 cycles then alert repeat every 5 cycle + if status != 0 for 5 times within 5 cycles then alert repeat every 1 cycles diff --git a/dockers/docker-fpm-frr/base_image_files/monit_bgp b/dockers/docker-fpm-frr/base_image_files/monit_bgp index f87014c10e80..3361b9e64f3c 100644 --- a/dockers/docker-fpm-frr/base_image_files/monit_bgp +++ b/dockers/docker-fpm-frr/base_image_files/monit_bgp @@ -9,19 +9,19 @@ ## bgpmon ############################################################################### check program bgp|zebra with path "/usr/bin/process_checker bgp /usr/lib/frr/zebra" - if status != 0 for 5 times within 5 cycles then alert repeat every 5 cycle + if status != 0 for 5 times within 5 cycles then alert repeat every 1 cycles check program bgp|fpmsyncd with path "/usr/bin/process_checker bgp fpmsyncd" - if status != 0 for 5 times within 5 cycles then alert repeat every 5 cycle + if status != 0 for 5 times within 5 cycles then alert repeat every 1 cycles check program bgp|bgpd with path "/usr/bin/process_checker bgp /usr/lib/frr/bgpd" - if status != 0 for 5 times within 5 cycles then alert repeat every 5 cycle + if status != 0 for 5 times within 5 cycles then alert repeat every 1 cycles check program bgp|staticd with path "/usr/bin/process_checker bgp /usr/lib/frr/staticd" - if status != 0 for 5 times within 5 cycles then alert repeat every 5 cycle + if status != 0 for 5 times within 5 cycles then alert repeat every 1 cycles check program bgp|bgpcfgd with path "/usr/bin/process_checker bgp /usr/bin/python /usr/local/bin/bgpcfgd" - if status != 0 for 5 times within 5 cycles then alert repeat every 5 cycle + if status != 0 for 5 times within 5 cycles then alert repeat every 1 cycles check program bgp|bgpmon with path "/usr/bin/process_checker bgp /usr/bin/python /usr/local/bin/bgpmon" - if status != 0 for 5 times within 5 cycles then alert repeat every 5 cycle + if status != 0 for 5 times within 5 cycles then alert repeat every 1 cycles diff --git a/dockers/docker-lldp/base_image_files/monit_lldp b/dockers/docker-lldp/base_image_files/monit_lldp index 65f341b718b1..8dc2f3c15321 100644 --- a/dockers/docker-lldp/base_image_files/monit_lldp +++ b/dockers/docker-lldp/base_image_files/monit_lldp @@ -6,10 +6,10 @@ ## lldpmgrd ############################################################################### check program lldp|lldpd_monitor with path "/usr/bin/process_checker lldp lldpd:" - if status != 0 for 5 times within 5 cycles then alert repeat every 5 cycle + if status != 0 for 5 times within 5 cycles then alert repeat every 1 cycles check program lldp|lldp_syncd with path "/usr/bin/process_checker lldp python2 -m lldp_syncd" - if status != 0 for 5 times within 5 cycles then alert repeat every 5 cycle + if status != 0 for 5 times within 5 cycles then alert repeat every 1 cycles check program lldp|lldpmgrd with path "/usr/bin/process_checker lldp python /usr/bin/lldpmgrd" - if status != 0 for 5 times within 5 cycles then alert repeat every 5 cycle + if status != 0 for 5 times within 5 cycles then alert repeat every 1 cycles diff --git a/dockers/docker-orchagent/base_image_files/monit_swss b/dockers/docker-orchagent/base_image_files/monit_swss index 982643bf96ca..da601011e735 100644 --- a/dockers/docker-orchagent/base_image_files/monit_swss +++ b/dockers/docker-orchagent/base_image_files/monit_swss @@ -13,31 +13,31 @@ ## vxlanmgrd ############################################################################## check program swss|orchagent with path "/usr/bin/process_checker swss /usr/bin/orchagent -d /var/log/swss" - if status != 0 for 5 times within 5 cycles then alert repeat every 5 cycle + if status != 0 for 5 times within 5 cycles then alert repeat every 1 cycles check program swss|portsyncd with path "/usr/bin/process_checker swss /usr/bin/portsyncd" - if status != 0 for 5 times within 5 cycles then alert repeat every 5 cycle + if status != 0 for 5 times within 5 cycles then alert repeat every 1 cycles check program swss|neighsyncd with path "/usr/bin/process_checker swss /usr/bin/neighsyncd" - if status != 0 for 5 times within 5 cycles then alert repeat every 5 cycle + if status != 0 for 5 times within 5 cycles then alert repeat every 1 cycles check program swss|vrfmgrd with path "/usr/bin/process_checker swss /usr/bin/vrfmgrd" - if status != 0 for 5 times within 5 cycles then alert repeat every 5 cycle + if status != 0 for 5 times within 5 cycles then alert repeat every 1 cycles check program swss|vlanmgrd with path "/usr/bin/process_checker swss /usr/bin/vlanmgrd" - if status != 0 for 5 times within 5 cycles then alert repeat every 5 cycle + if status != 0 for 5 times within 5 cycles then alert repeat every 1 cycles check program swss|intfmgrd with path "/usr/bin/process_checker swss /usr/bin/intfmgrd" - if status != 0 for 5 times within 5 cycles then alert repeat every 5 cycle + if status != 0 for 5 times within 5 cycles then alert repeat every 1 cycles check program swss|portmgrd with path "/usr/bin/process_checker swss /usr/bin/portmgrd" - if status != 0 for 5 times within 5 cycles then alert repeat every 5 cycle + if status != 0 for 5 times within 5 cycles then alert repeat every 1 cycles check program swss|buffermgrd with path "/usr/bin/process_checker swss /usr/bin/buffermgrd -l" - if status != 0 for 5 times within 5 cycles then alert repeat every 5 cycle + if status != 0 for 5 times within 5 cycles then alert repeat every 1 cycles check program swss|nbrmgrd with path "/usr/bin/process_checker swss /usr/bin/nbrmgrd" - if status != 0 for 5 times within 5 cycles then alert repeat every 5 cycle + if status != 0 for 5 times within 5 cycles then alert repeat every 1 cycles check program swss|vxlanmgrd with path "/usr/bin/process_checker swss /usr/bin/vxlanmgrd" - if status != 0 for 5 times within 5 cycles then alert repeat every 5 cycle + if status != 0 for 5 times within 5 cycles then alert repeat every 1 cycles diff --git a/dockers/docker-sflow/base_image_files/monit_sflow b/dockers/docker-sflow/base_image_files/monit_sflow index bc7e5f6b423d..84b36b18ce65 100644 --- a/dockers/docker-sflow/base_image_files/monit_sflow +++ b/dockers/docker-sflow/base_image_files/monit_sflow @@ -4,4 +4,4 @@ ## sflowmgrd ############################################################################### check program sflow|sflowmgrd with path "/usr/bin/process_checker sflow /usr/bin/sflowmgrd" - if status != 0 for 5 times within 5 cycles then alert repeat every 5 cycle + if status != 0 for 5 times within 5 cycles then alert repeat every 1 cycles diff --git a/dockers/docker-snmp/base_image_files/monit_snmp b/dockers/docker-snmp/base_image_files/monit_snmp index bab9a997ee50..6a368a9b6035 100644 --- a/dockers/docker-snmp/base_image_files/monit_snmp +++ b/dockers/docker-snmp/base_image_files/monit_snmp @@ -5,7 +5,7 @@ ## snmpd_subagent ############################################################################### check program snmp|snmpd with path "/usr/bin/process_checker snmp /usr/sbin/snmpd" - if status != 0 for 5 times within 5 cycles then alert repeat every 5 cycle + if status != 0 for 5 times within 5 cycles then alert repeat every 1 cycles check program snmp|snmp_subagent with path "/usr/bin/process_checker snmp python3 -m sonic_ax_impl" - if status != 0 for 5 times within 5 cycles then alert repeat every 5 cycle + if status != 0 for 5 times within 5 cycles then alert repeat every 1 cycles diff --git a/dockers/docker-sonic-restapi/base_image_files/monit_restapi b/dockers/docker-sonic-restapi/base_image_files/monit_restapi index fd2ebc6c56ce..6752100b84f2 100644 --- a/dockers/docker-sonic-restapi/base_image_files/monit_restapi +++ b/dockers/docker-sonic-restapi/base_image_files/monit_restapi @@ -4,4 +4,4 @@ ## restapi ############################################################################### check program restapi|restapi with path "/usr/bin/process_checker restapi /usr/sbin/go-server-server" - if status != 0 for 5 times within 5 cycles then alert repeat every 5 cycle + if status != 0 for 5 times within 5 cycles then alert repeat every 1 cycles diff --git a/dockers/docker-sonic-telemetry/base_image_files/monit_telemetry b/dockers/docker-sonic-telemetry/base_image_files/monit_telemetry index 590ca77d6880..3680bbe6cf9a 100644 --- a/dockers/docker-sonic-telemetry/base_image_files/monit_telemetry +++ b/dockers/docker-sonic-telemetry/base_image_files/monit_telemetry @@ -5,7 +5,7 @@ ## dialout_client ############################################################################### check program telemetry|telemetry with path "/usr/bin/process_checker telemetry /usr/sbin/telemetry" - if status != 0 for 5 times within 5 cycles then alert repeat every 5 cycle + if status != 0 for 5 times within 5 cycles then alert repeat every 1 cycles check program telemetry|dialout_client with path "/usr/bin/process_checker telemetry /usr/sbin/dialout_client_cli" - if status != 0 for 5 times within 5 cycles then alert repeat every 5 cycle + if status != 0 for 5 times within 5 cycles then alert repeat every 1 cycles diff --git a/dockers/docker-teamd/base_image_files/monit_teamd b/dockers/docker-teamd/base_image_files/monit_teamd index 8a5853d7266c..626a6145604e 100644 --- a/dockers/docker-teamd/base_image_files/monit_teamd +++ b/dockers/docker-teamd/base_image_files/monit_teamd @@ -5,7 +5,7 @@ ## teammgrd ############################################################################### check program teamd|teamsyncd with path "/usr/bin/process_checker teamd /usr/bin/teamsyncd" - if status != 0 for 5 times within 5 cycles then alert repeat every 5 cycle + if status != 0 for 5 times within 5 cycles then alert repeat every 1 cycles check program teamd|teammgrd with path "/usr/bin/process_checker teamd /usr/bin/teammgrd" - if status != 0 for 5 times within 5 cycles then alert repeat every 5 cycle + if status != 0 for 5 times within 5 cycles then alert repeat every 1 cycles diff --git a/files/image_config/monit/conf.d/sonic-host b/files/image_config/monit/conf.d/sonic-host index 0fa89f237796..202c49f8d7b2 100644 --- a/files/image_config/monit/conf.d/sonic-host +++ b/files/image_config/monit/conf.d/sonic-host @@ -6,15 +6,15 @@ ############################################################################### check filesystem root-overlay with path / - if space usage > 90% for 10 times within 20 cycles then alert repeat every 10 cycle + if space usage > 90% for 10 times within 20 cycles then alert repeat every 1 cycles check filesystem var-log with path /var/log - if space usage > 90% for 10 times within 20 cycles then alert repeat every 10 cycle + if space usage > 90% for 10 times within 20 cycles then alert repeat every 1 cycles check system $HOST - if memory usage > 90% for 10 times within 20 cycles then alert repeat every 10 cycle - if cpu usage (user) > 90% for 10 times within 20 cycles then alert repeat every 10 cycle - if cpu usage (system) > 90% for 10 times within 20 cycles then alert repeat every 10 cycle + if memory usage > 90% for 10 times within 20 cycles then alert repeat every 1 cycles + if cpu usage (user) > 90% for 10 times within 20 cycles then alert repeat every 1 cycles + if cpu usage (system) > 90% for 10 times within 20 cycles then alert repeat every 1 cycles check process rsyslog with pidfile /var/run/rsyslogd.pid start program = "/bin/systemctl start rsyslog.service" @@ -29,5 +29,5 @@ check process rsyslog with pidfile /var/run/rsyslogd.pid # check program routeCheck with path "/usr/local/bin/route_check.py" every 5 cycles - if status != 0 for 3 cycle then alert repeat every 5 cycle + if status != 0 for 3 cycle then alert repeat every 1 cycles diff --git a/platform/barefoot/docker-syncd-bfn/base_image_files/monit_syncd b/platform/barefoot/docker-syncd-bfn/base_image_files/monit_syncd index 43509a413915..61e290e3189e 100644 --- a/platform/barefoot/docker-syncd-bfn/base_image_files/monit_syncd +++ b/platform/barefoot/docker-syncd-bfn/base_image_files/monit_syncd @@ -4,4 +4,4 @@ ## syncd ############################################################################### check program syncd|syncd with path "/usr/bin/process_checker syncd /usr/bin/syncd" - if status != 0 for 5 times within 5 cycles then alert repeat every 5 cycle + if status != 0 for 5 times within 5 cycles then alert repeat every 1 cycles diff --git a/platform/broadcom/docker-syncd-brcm/base_image_files/monit_syncd b/platform/broadcom/docker-syncd-brcm/base_image_files/monit_syncd index 2cc48afd6049..d63346d9ee20 100644 --- a/platform/broadcom/docker-syncd-brcm/base_image_files/monit_syncd +++ b/platform/broadcom/docker-syncd-brcm/base_image_files/monit_syncd @@ -5,7 +5,7 @@ ## dsserve ############################################################################### check program syncd|syncd with path "/usr/bin/process_checker syncd /usr/bin/syncd" - if status != 0 for 5 times within 5 cycles then alert repeat every 5 cycle + if status != 0 for 5 times within 5 cycles then alert repeat every 1 cycles check program syncd|dsserve with path "/usr/bin/process_checker syncd /usr/bin/dsserve /usr/bin/syncd" - if status != 0 for 5 times within 5 cycles then alert repeat every 5 cycle + if status != 0 for 5 times within 5 cycles then alert repeat every 1 cycles diff --git a/platform/cavium/docker-syncd-cavm/base_image_files/monit_syncd b/platform/cavium/docker-syncd-cavm/base_image_files/monit_syncd index 43509a413915..61e290e3189e 100644 --- a/platform/cavium/docker-syncd-cavm/base_image_files/monit_syncd +++ b/platform/cavium/docker-syncd-cavm/base_image_files/monit_syncd @@ -4,4 +4,4 @@ ## syncd ############################################################################### check program syncd|syncd with path "/usr/bin/process_checker syncd /usr/bin/syncd" - if status != 0 for 5 times within 5 cycles then alert repeat every 5 cycle + if status != 0 for 5 times within 5 cycles then alert repeat every 1 cycles diff --git a/platform/centec/docker-syncd-centec/base_image_files/monit_syncd b/platform/centec/docker-syncd-centec/base_image_files/monit_syncd index 43509a413915..61e290e3189e 100644 --- a/platform/centec/docker-syncd-centec/base_image_files/monit_syncd +++ b/platform/centec/docker-syncd-centec/base_image_files/monit_syncd @@ -4,4 +4,4 @@ ## syncd ############################################################################### check program syncd|syncd with path "/usr/bin/process_checker syncd /usr/bin/syncd" - if status != 0 for 5 times within 5 cycles then alert repeat every 5 cycle + if status != 0 for 5 times within 5 cycles then alert repeat every 1 cycles diff --git a/platform/marvell-arm64/docker-syncd-mrvl/base_image_files/monit_syncd b/platform/marvell-arm64/docker-syncd-mrvl/base_image_files/monit_syncd index 43509a413915..61e290e3189e 100644 --- a/platform/marvell-arm64/docker-syncd-mrvl/base_image_files/monit_syncd +++ b/platform/marvell-arm64/docker-syncd-mrvl/base_image_files/monit_syncd @@ -4,4 +4,4 @@ ## syncd ############################################################################### check program syncd|syncd with path "/usr/bin/process_checker syncd /usr/bin/syncd" - if status != 0 for 5 times within 5 cycles then alert repeat every 5 cycle + if status != 0 for 5 times within 5 cycles then alert repeat every 1 cycles diff --git a/platform/marvell-armhf/docker-syncd-mrvl/base_image_files/monit_syncd b/platform/marvell-armhf/docker-syncd-mrvl/base_image_files/monit_syncd index 43509a413915..61e290e3189e 100644 --- a/platform/marvell-armhf/docker-syncd-mrvl/base_image_files/monit_syncd +++ b/platform/marvell-armhf/docker-syncd-mrvl/base_image_files/monit_syncd @@ -4,4 +4,4 @@ ## syncd ############################################################################### check program syncd|syncd with path "/usr/bin/process_checker syncd /usr/bin/syncd" - if status != 0 for 5 times within 5 cycles then alert repeat every 5 cycle + if status != 0 for 5 times within 5 cycles then alert repeat every 1 cycles diff --git a/platform/marvell/docker-syncd-mrvl/base_image_files/monit_syncd b/platform/marvell/docker-syncd-mrvl/base_image_files/monit_syncd index 43509a413915..61e290e3189e 100644 --- a/platform/marvell/docker-syncd-mrvl/base_image_files/monit_syncd +++ b/platform/marvell/docker-syncd-mrvl/base_image_files/monit_syncd @@ -4,4 +4,4 @@ ## syncd ############################################################################### check program syncd|syncd with path "/usr/bin/process_checker syncd /usr/bin/syncd" - if status != 0 for 5 times within 5 cycles then alert repeat every 5 cycle + if status != 0 for 5 times within 5 cycles then alert repeat every 1 cycles diff --git a/platform/mellanox/docker-syncd-mlnx/base_image_files/monit_syncd b/platform/mellanox/docker-syncd-mlnx/base_image_files/monit_syncd index 43509a413915..61e290e3189e 100644 --- a/platform/mellanox/docker-syncd-mlnx/base_image_files/monit_syncd +++ b/platform/mellanox/docker-syncd-mlnx/base_image_files/monit_syncd @@ -4,4 +4,4 @@ ## syncd ############################################################################### check program syncd|syncd with path "/usr/bin/process_checker syncd /usr/bin/syncd" - if status != 0 for 5 times within 5 cycles then alert repeat every 5 cycle + if status != 0 for 5 times within 5 cycles then alert repeat every 1 cycles diff --git a/platform/nephos/docker-syncd-nephos/base_image_files/monit_syncd b/platform/nephos/docker-syncd-nephos/base_image_files/monit_syncd index 2cc48afd6049..d63346d9ee20 100644 --- a/platform/nephos/docker-syncd-nephos/base_image_files/monit_syncd +++ b/platform/nephos/docker-syncd-nephos/base_image_files/monit_syncd @@ -5,7 +5,7 @@ ## dsserve ############################################################################### check program syncd|syncd with path "/usr/bin/process_checker syncd /usr/bin/syncd" - if status != 0 for 5 times within 5 cycles then alert repeat every 5 cycle + if status != 0 for 5 times within 5 cycles then alert repeat every 1 cycles check program syncd|dsserve with path "/usr/bin/process_checker syncd /usr/bin/dsserve /usr/bin/syncd" - if status != 0 for 5 times within 5 cycles then alert repeat every 5 cycle + if status != 0 for 5 times within 5 cycles then alert repeat every 1 cycles