From 934c6bb216022e38a9415cf72e14f566e1b1afad Mon Sep 17 00:00:00 2001
From: sai chaithanya <sai.chaithanya@mayadata.io>
Date: Wed, 11 Dec 2019 18:16:18 +0530
Subject: [PATCH] fix(cstor-pool-mgmt): fix livenessprobe in cStor pool
 deployment (#1544)

This PR fixes the liveness probe on cstor-pool container by adding `timeout` setting for command execution(run a command with a time limit). `timeout` will be helpful in a case when the disks are detached from the node and when liveness triggers command(zfs set... command) it will be hung forever and kubelet will not treat them as a failures.

Kubelet also retries execute the same command after timeoutSeconds mentioned in the liveness probe. By triggering timeout 120 zfs set io.openebs:livenesstimestamp="$(date +%s)" cstor-<pool_name> will kill the process if it exceeds more than 120 seconds and returns non-zero exit status.


Signed-off-by: mittachaitu <sai.chaithanya@mayadata.io>
---
 pkg/install/v1alpha1/cstor_pool.go | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/pkg/install/v1alpha1/cstor_pool.go b/pkg/install/v1alpha1/cstor_pool.go
index 2fe251d2c1..a7f37936fb 100644
--- a/pkg/install/v1alpha1/cstor_pool.go
+++ b/pkg/install/v1alpha1/cstor_pool.go
@@ -260,11 +260,17 @@ spec:
                 command:
                 - /bin/sh
                 - -c
-                - zfs set io.openebs:livenesstimestamp="$(date +%s)" cstor-$OPENEBS_IO_CSTOR_ID
+                ## timeout 120 is added to exit the command forcefully with non-zero exit code if command takes
+                ## more than 120 seconds.
+                - timeout 120 zfs set io.openebs:livenesstimestamp="$(date +%s)" cstor-$OPENEBS_IO_CSTOR_ID
               failureThreshold: 3
               initialDelaySeconds: 300
-              periodSeconds: 10
-              timeoutSeconds: 30
+              ## how often (in seconds) to perform the probe
+              periodSeconds: 60
+              ## Number of seconds after which the probe times out. i.e informing
+              ## to kubelet probe should timeout after 150 seconds(Note: It will
+              ## not honour because of timeout 120 value before command)
+              timeoutSeconds: 150
             securityContext:
               privileged: true
             volumeMounts: