From 46613c6938d1be5ee45daee93ae33e5b5de0487e Mon Sep 17 00:00:00 2001 From: Hua Liu <58683130+liuh-80@users.noreply.github.com> Date: Fri, 26 Jan 2024 16:01:50 +0800 Subject: [PATCH] Change orchagent stuck message from ERR to WARNING (#17872) Change orchagent stuck message from ERR to WARNING #### Why I did it During switch initialization, sometime Orchagent will busy for more than 40seconds and will trigger process stuck workdog error. To improve this issue, change watchdog error message to warning message. ##### Work item tracking - Microsoft ADO: 26517622 #### How I did it Change orchagent stuck message from ERR to WARNING. #### How to verify it Pass all UT. ### Description for the changelog Change orchagent stuck message from ERR to WARNING. --- files/scripts/supervisor-proc-exit-listener | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/files/scripts/supervisor-proc-exit-listener b/files/scripts/supervisor-proc-exit-listener index 61c12d8ce45e..8628826e6157 100755 --- a/files/scripts/supervisor-proc-exit-listener +++ b/files/scripts/supervisor-proc-exit-listener @@ -73,7 +73,7 @@ def get_group_and_process_list(process_file): return group_list, process_list -def generate_alerting_message(process_name, status, dead_minutes): +def generate_alerting_message(process_name, status, dead_minutes, priority=syslog.LOG_ERR): """ @summary: If a critical process was not running, this function will determine it resides in host or in a specific namespace. Then an alerting message will be written into syslog. @@ -86,7 +86,7 @@ def generate_alerting_message(process_name, status, dead_minutes): else: namespace = namespace_prefix + namespace_id - syslog.syslog(syslog.LOG_ERR, "Process '{}' is {} in namespace '{}' ({} minutes)." + syslog.syslog(priority, "Process '{}' is {} in namespace '{}' ({} minutes)." .format(process_name, status, namespace, dead_minutes)) @@ -213,7 +213,7 @@ def main(argv): elapsed_secs = epoch_time - process_heart_beat_info[process]["last_heart_beat"] if elapsed_secs >= ALERTING_INTERVAL_SECS: elapsed_mins = elapsed_secs // 60 - generate_alerting_message(process, "stuck", elapsed_mins) + generate_alerting_message(process, "stuck", elapsed_mins, syslog.LOG_WARNING) if __name__ == "__main__": main(sys.argv[1:]) \ No newline at end of file