From 781188f54941a2eb9e4a23a96f05986ec51ff106 Mon Sep 17 00:00:00 2001 From: Junchao-Mellanox <57339448+Junchao-Mellanox@users.noreply.github.com> Date: Sat, 31 Oct 2020 03:01:17 +0800 Subject: [PATCH] [thermalctld] Enlarge startretries value to avoid thermalctld not able to restart during regression test (#5633) Increase startretires value from default of 10 to 50 to prevent supervisor from placing thermalctld in FATAL state during regression testing. Also ensures supervisord tries hard to get thermalctld running in production, as thermalctld is critical to prevent device from overheating. --- dockers/docker-platform-monitor/docker-pmon.supervisord.conf.j2 | 1 + 1 file changed, 1 insertion(+) diff --git a/dockers/docker-platform-monitor/docker-pmon.supervisord.conf.j2 b/dockers/docker-platform-monitor/docker-pmon.supervisord.conf.j2 index f1eacf5b408c..c44bbbbf8eb4 100644 --- a/dockers/docker-platform-monitor/docker-pmon.supervisord.conf.j2 +++ b/dockers/docker-platform-monitor/docker-pmon.supervisord.conf.j2 @@ -125,6 +125,7 @@ autorestart=unexpected stdout_logfile=syslog stderr_logfile=syslog startsecs=10 +startretries=50 dependent_startup=true dependent_startup_wait_for=start:exited {% endif %}