From 3d725af50361ee5213b22877d205af5df04939ab Mon Sep 17 00:00:00 2001
From: Armel Soro <asoro@redhat.com>
Date: Fri, 13 Dec 2024 16:30:35 +0100
Subject: [PATCH] fix: adjust startup, liveness and readiness probes settings
 (#564) (#565)

* fix: adjust startup, liveness and readiness probes settings

Startup probe settings seem to have been added in the upstream Backstage Chart
in [1], but the current settings do not allow the RHDH Chart for the
liveness probe to be triggered sufficiently enough for the app to be
considered live.
This adjust such settings by accounting for the worst case scenario
where the application might take a bit long to start.

This also aligns the probe endpoints with the upstream chart.

[1] https://github.com/backstage/charts/pull/216

* Update bundle/rhdh/manifests/rhdh-default-config_v1_configmap.yaml


* Regenerate bundle manifests


* Apply suggestions from code review


* Regenerate bundle manifests


---------

Co-authored-by: Patrick Knight <pknight@redhat.com>
Co-authored-by: Gustavo Lira e Silva <guga.java@gmail.com>
Co-authored-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
Co-authored-by: rm3l <rm3l@users.noreply.github.com>
Co-authored-by: Nick Boldt <nboldt@redhat.com>
Co-authored-by: Tomas Kral <tkral@redhat.com>
---
 ...kstage-operator.clusterserviceversion.yaml |  2 +-
 ...kstage-operator.clusterserviceversion.yaml |  2 +-
 .../rhdh-default-config_v1_configmap.yaml     | 36 ++++++++++++++-----
 .../rhdh/default-config/deployment.yaml       | 36 ++++++++++++++-----
 4 files changed, 58 insertions(+), 18 deletions(-)

diff --git a/bundle/backstage.io/manifests/backstage-operator.clusterserviceversion.yaml b/bundle/backstage.io/manifests/backstage-operator.clusterserviceversion.yaml
index 91af82c8..ce61f9f0 100644
--- a/bundle/backstage.io/manifests/backstage-operator.clusterserviceversion.yaml
+++ b/bundle/backstage.io/manifests/backstage-operator.clusterserviceversion.yaml
@@ -35,7 +35,7 @@ metadata:
           }
         }
       ]
-    createdAt: "2024-11-15T19:48:10Z"
+    createdAt: "2024-12-13T14:46:15Z"
     description: Backstage Operator
     operators.operatorframework.io/builder: operator-sdk-v1.37.0
     operators.operatorframework.io/project_layout: go.kubebuilder.io/v4
diff --git a/bundle/rhdh/manifests/backstage-operator.clusterserviceversion.yaml b/bundle/rhdh/manifests/backstage-operator.clusterserviceversion.yaml
index ba20c03a..b76b27b3 100644
--- a/bundle/rhdh/manifests/backstage-operator.clusterserviceversion.yaml
+++ b/bundle/rhdh/manifests/backstage-operator.clusterserviceversion.yaml
@@ -39,7 +39,7 @@ metadata:
     categories: Developer Tools
     certified: "true"
     containerImage: registry-proxy.engineering.redhat.com/rh-osbs/rhdh-rhdh-rhel9-operator:1.3
-    createdAt: "2024-11-27T11:19:54Z"
+    createdAt: "2024-12-13T14:46:16Z"
     description: Red Hat Developer Hub is a Red Hat supported version of Backstage.
       It comes with pre-built plug-ins and configuration settings, supports use of
       an external database, and can help streamline the process of setting up a self-managed
diff --git a/bundle/rhdh/manifests/rhdh-default-config_v1_configmap.yaml b/bundle/rhdh/manifests/rhdh-default-config_v1_configmap.yaml
index 72de26e4..41284e85 100644
--- a/bundle/rhdh/manifests/rhdh-default-config_v1_configmap.yaml
+++ b/bundle/rhdh/manifests/rhdh-default-config_v1_configmap.yaml
@@ -251,26 +251,46 @@ data:
                   type: RuntimeDefault
                 runAsNonRoot: true
                 allowPrivilegeEscalation: false
+              startupProbe:
+                # This gives enough time upon container startup before the liveness and readiness probes are triggered.
+                # Giving (120s = initialDelaySeconds + failureThreshold * periodSeconds) to account for the worst case scenario.
+                httpGet:
+                  path: /.backstage/health/v1/liveness
+                  port: backend
+                  scheme: HTTP
+                initialDelaySeconds: 30
+                timeoutSeconds: 4
+                periodSeconds: 20
+                successThreshold: 1
+                failureThreshold: 3
               readinessProbe:
                 failureThreshold: 3
                 httpGet:
-                  path: /healthcheck
-                  port: 7007
+                  path: /.backstage/health/v1/readiness
+                  port: backend
                   scheme: HTTP
-                initialDelaySeconds: 30
+                # Both liveness and readiness probes won't be triggered until the startup probe is successful.
+                # The startup probe is already configured to give enough time for the application to be started.
+                # So removing the additional delay here allows the readiness probe to be checked right away after the startup probe,
+                # which helps make the application available faster to the end-user.
+                #initialDelaySeconds: 30
                 periodSeconds: 10
                 successThreshold: 2
-                timeoutSeconds: 2
+                timeoutSeconds: 4
               livenessProbe:
                 failureThreshold: 3
                 httpGet:
-                  path: /healthcheck
-                  port: 7007
+                  path: /.backstage/health/v1/liveness
+                  port: backend
                   scheme: HTTP
-                initialDelaySeconds: 60
+                # Both liveness and readiness probes won't be triggered until the startup probe is successful.
+                # The startup probe is already configured to give enough time for the application to be started.
+                # So removing the additional delay here allows the readiness probe to be checked right away after the startup probe,
+                # which helps make the application available faster to the end-user.
+                #initialDelaySeconds: 60
                 periodSeconds: 10
                 successThreshold: 1
-                timeoutSeconds: 2
+                timeoutSeconds: 4
               ports:
                 - name: backend
                   containerPort: 7007
diff --git a/config/profile/rhdh/default-config/deployment.yaml b/config/profile/rhdh/default-config/deployment.yaml
index 13f074d7..608e6221 100644
--- a/config/profile/rhdh/default-config/deployment.yaml
+++ b/config/profile/rhdh/default-config/deployment.yaml
@@ -96,26 +96,46 @@ spec:
               type: RuntimeDefault
             runAsNonRoot: true
             allowPrivilegeEscalation: false
+          startupProbe:
+            # This gives enough time upon container startup before the liveness and readiness probes are triggered.
+            # Giving (120s = initialDelaySeconds + failureThreshold * periodSeconds) to account for the worst case scenario.
+            httpGet:
+              path: /.backstage/health/v1/liveness
+              port: backend
+              scheme: HTTP
+            initialDelaySeconds: 30
+            timeoutSeconds: 4
+            periodSeconds: 20
+            successThreshold: 1
+            failureThreshold: 3
           readinessProbe:
             failureThreshold: 3
             httpGet:
-              path: /healthcheck
-              port: 7007
+              path: /.backstage/health/v1/readiness
+              port: backend
               scheme: HTTP
-            initialDelaySeconds: 30
+            # Both liveness and readiness probes won't be triggered until the startup probe is successful.
+            # The startup probe is already configured to give enough time for the application to be started.
+            # So removing the additional delay here allows the readiness probe to be checked right away after the startup probe,
+            # which helps make the application available faster to the end-user.
+            #initialDelaySeconds: 30
             periodSeconds: 10
             successThreshold: 2
-            timeoutSeconds: 2
+            timeoutSeconds: 4
           livenessProbe:
             failureThreshold: 3
             httpGet:
-              path: /healthcheck
-              port: 7007
+              path: /.backstage/health/v1/liveness
+              port: backend
               scheme: HTTP
-            initialDelaySeconds: 60
+            # Both liveness and readiness probes won't be triggered until the startup probe is successful.
+            # The startup probe is already configured to give enough time for the application to be started.
+            # So removing the additional delay here allows the readiness probe to be checked right away after the startup probe,
+            # which helps make the application available faster to the end-user.
+            #initialDelaySeconds: 60
             periodSeconds: 10
             successThreshold: 1
-            timeoutSeconds: 2
+            timeoutSeconds: 4
           ports:
             - name: backend
               containerPort: 7007