From 3d725af50361ee5213b22877d205af5df04939ab Mon Sep 17 00:00:00 2001 From: Armel Soro Date: Fri, 13 Dec 2024 16:30:35 +0100 Subject: [PATCH] fix: adjust startup, liveness and readiness probes settings (#564) (#565) * fix: adjust startup, liveness and readiness probes settings Startup probe settings seem to have been added in the upstream Backstage Chart in [1], but the current settings do not allow the RHDH Chart for the liveness probe to be triggered sufficiently enough for the app to be considered live. This adjust such settings by accounting for the worst case scenario where the application might take a bit long to start. This also aligns the probe endpoints with the upstream chart. [1] https://github.com/backstage/charts/pull/216 * Update bundle/rhdh/manifests/rhdh-default-config_v1_configmap.yaml * Regenerate bundle manifests * Apply suggestions from code review * Regenerate bundle manifests --------- Co-authored-by: Patrick Knight Co-authored-by: Gustavo Lira e Silva Co-authored-by: github-actions[bot] Co-authored-by: rm3l Co-authored-by: Nick Boldt Co-authored-by: Tomas Kral --- ...kstage-operator.clusterserviceversion.yaml | 2 +- ...kstage-operator.clusterserviceversion.yaml | 2 +- .../rhdh-default-config_v1_configmap.yaml | 36 ++++++++++++++----- .../rhdh/default-config/deployment.yaml | 36 ++++++++++++++----- 4 files changed, 58 insertions(+), 18 deletions(-) diff --git a/bundle/backstage.io/manifests/backstage-operator.clusterserviceversion.yaml b/bundle/backstage.io/manifests/backstage-operator.clusterserviceversion.yaml index 91af82c8..ce61f9f0 100644 --- a/bundle/backstage.io/manifests/backstage-operator.clusterserviceversion.yaml +++ b/bundle/backstage.io/manifests/backstage-operator.clusterserviceversion.yaml @@ -35,7 +35,7 @@ metadata: } } ] - createdAt: "2024-11-15T19:48:10Z" + createdAt: "2024-12-13T14:46:15Z" description: Backstage Operator operators.operatorframework.io/builder: operator-sdk-v1.37.0 operators.operatorframework.io/project_layout: go.kubebuilder.io/v4 diff --git a/bundle/rhdh/manifests/backstage-operator.clusterserviceversion.yaml b/bundle/rhdh/manifests/backstage-operator.clusterserviceversion.yaml index ba20c03a..b76b27b3 100644 --- a/bundle/rhdh/manifests/backstage-operator.clusterserviceversion.yaml +++ b/bundle/rhdh/manifests/backstage-operator.clusterserviceversion.yaml @@ -39,7 +39,7 @@ metadata: categories: Developer Tools certified: "true" containerImage: registry-proxy.engineering.redhat.com/rh-osbs/rhdh-rhdh-rhel9-operator:1.3 - createdAt: "2024-11-27T11:19:54Z" + createdAt: "2024-12-13T14:46:16Z" description: Red Hat Developer Hub is a Red Hat supported version of Backstage. It comes with pre-built plug-ins and configuration settings, supports use of an external database, and can help streamline the process of setting up a self-managed diff --git a/bundle/rhdh/manifests/rhdh-default-config_v1_configmap.yaml b/bundle/rhdh/manifests/rhdh-default-config_v1_configmap.yaml index 72de26e4..41284e85 100644 --- a/bundle/rhdh/manifests/rhdh-default-config_v1_configmap.yaml +++ b/bundle/rhdh/manifests/rhdh-default-config_v1_configmap.yaml @@ -251,26 +251,46 @@ data: type: RuntimeDefault runAsNonRoot: true allowPrivilegeEscalation: false + startupProbe: + # This gives enough time upon container startup before the liveness and readiness probes are triggered. + # Giving (120s = initialDelaySeconds + failureThreshold * periodSeconds) to account for the worst case scenario. + httpGet: + path: /.backstage/health/v1/liveness + port: backend + scheme: HTTP + initialDelaySeconds: 30 + timeoutSeconds: 4 + periodSeconds: 20 + successThreshold: 1 + failureThreshold: 3 readinessProbe: failureThreshold: 3 httpGet: - path: /healthcheck - port: 7007 + path: /.backstage/health/v1/readiness + port: backend scheme: HTTP - initialDelaySeconds: 30 + # Both liveness and readiness probes won't be triggered until the startup probe is successful. + # The startup probe is already configured to give enough time for the application to be started. + # So removing the additional delay here allows the readiness probe to be checked right away after the startup probe, + # which helps make the application available faster to the end-user. + #initialDelaySeconds: 30 periodSeconds: 10 successThreshold: 2 - timeoutSeconds: 2 + timeoutSeconds: 4 livenessProbe: failureThreshold: 3 httpGet: - path: /healthcheck - port: 7007 + path: /.backstage/health/v1/liveness + port: backend scheme: HTTP - initialDelaySeconds: 60 + # Both liveness and readiness probes won't be triggered until the startup probe is successful. + # The startup probe is already configured to give enough time for the application to be started. + # So removing the additional delay here allows the readiness probe to be checked right away after the startup probe, + # which helps make the application available faster to the end-user. + #initialDelaySeconds: 60 periodSeconds: 10 successThreshold: 1 - timeoutSeconds: 2 + timeoutSeconds: 4 ports: - name: backend containerPort: 7007 diff --git a/config/profile/rhdh/default-config/deployment.yaml b/config/profile/rhdh/default-config/deployment.yaml index 13f074d7..608e6221 100644 --- a/config/profile/rhdh/default-config/deployment.yaml +++ b/config/profile/rhdh/default-config/deployment.yaml @@ -96,26 +96,46 @@ spec: type: RuntimeDefault runAsNonRoot: true allowPrivilegeEscalation: false + startupProbe: + # This gives enough time upon container startup before the liveness and readiness probes are triggered. + # Giving (120s = initialDelaySeconds + failureThreshold * periodSeconds) to account for the worst case scenario. + httpGet: + path: /.backstage/health/v1/liveness + port: backend + scheme: HTTP + initialDelaySeconds: 30 + timeoutSeconds: 4 + periodSeconds: 20 + successThreshold: 1 + failureThreshold: 3 readinessProbe: failureThreshold: 3 httpGet: - path: /healthcheck - port: 7007 + path: /.backstage/health/v1/readiness + port: backend scheme: HTTP - initialDelaySeconds: 30 + # Both liveness and readiness probes won't be triggered until the startup probe is successful. + # The startup probe is already configured to give enough time for the application to be started. + # So removing the additional delay here allows the readiness probe to be checked right away after the startup probe, + # which helps make the application available faster to the end-user. + #initialDelaySeconds: 30 periodSeconds: 10 successThreshold: 2 - timeoutSeconds: 2 + timeoutSeconds: 4 livenessProbe: failureThreshold: 3 httpGet: - path: /healthcheck - port: 7007 + path: /.backstage/health/v1/liveness + port: backend scheme: HTTP - initialDelaySeconds: 60 + # Both liveness and readiness probes won't be triggered until the startup probe is successful. + # The startup probe is already configured to give enough time for the application to be started. + # So removing the additional delay here allows the readiness probe to be checked right away after the startup probe, + # which helps make the application available faster to the end-user. + #initialDelaySeconds: 60 periodSeconds: 10 successThreshold: 1 - timeoutSeconds: 2 + timeoutSeconds: 4 ports: - name: backend containerPort: 7007