From a8fa74beb00afd1d4976b63af43857f02ad0dff9 Mon Sep 17 00:00:00 2001
From: Saylor Berman <s.berman@f5.com>
Date: Fri, 8 Sep 2023 10:40:57 -0600
Subject: [PATCH 1/4] Add readiness probe

Problem: The NKG Pod would report Ready as soon as it started, which could lead to traffic being sent too soon before nginx was actually configured.

Solution: Add a readiness probe that will report Ready once the controller has written its first config to nginx (or has nothing to do on startup).

Also changed the metrics "disable" helm flag to "enable" to be consistent and easier to read.
---
 Makefile                                      |   2 +-
 cmd/gateway/commands.go                       |  29 ++-
 cmd/gateway/commands_test.go                  |  29 +++
 .../provisioner/static-deployment.yaml        |  10 +
 deploy/helm-chart/README.md                   |  47 ++--
 deploy/helm-chart/templates/deployment.yaml   |  21 +-
 deploy/helm-chart/values.yaml                 |  14 +-
 deploy/manifests/nginx-gateway.yaml           |   9 +
 docs/cli-help.md                              |   8 +-
 internal/mode/static/config/config.go         |  10 +
 internal/mode/static/handler.go               |   8 +-
 internal/mode/static/handler_test.go          |   6 +
 internal/mode/static/health.go                |  20 ++
 internal/mode/static/health_test.go           |  16 ++
 internal/mode/static/manager.go               | 214 ++++++++++--------
 15 files changed, 314 insertions(+), 129 deletions(-)
 create mode 100644 internal/mode/static/health.go
 create mode 100644 internal/mode/static/health_test.go

diff --git a/Makefile b/Makefile
index 52f371b676..670c7b3427 100644
--- a/Makefile
+++ b/Makefile
@@ -135,7 +135,7 @@ build-nkg-debug-image: debug-build build-nkg-image ## Build NKG image with debug
 generate-manifests: ## Generate manifests using Helm.
 	cp $(CHART_DIR)/crds/* $(MANIFEST_DIR)/crds/
 	helm template nginx-gateway $(CHART_DIR) $(HELM_TEMPLATE_COMMON_ARGS) $(HELM_TEMPLATE_EXTRA_ARGS_FOR_ALL_MANIFESTS_FILE) -n nginx-gateway | cat $(strip $(MANIFEST_DIR))/namespace.yaml - > $(strip $(MANIFEST_DIR))/nginx-gateway.yaml
-	helm template nginx-gateway $(CHART_DIR) $(HELM_TEMPLATE_COMMON_ARGS) --set metrics.disable=true -n nginx-gateway -s templates/deployment.yaml > conformance/provisioner/static-deployment.yaml
+	helm template nginx-gateway $(CHART_DIR) $(HELM_TEMPLATE_COMMON_ARGS) --set metrics.enable=false -n nginx-gateway -s templates/deployment.yaml > conformance/provisioner/static-deployment.yaml
 	helm template nginx-gateway $(CHART_DIR) $(HELM_TEMPLATE_COMMON_ARGS) -n nginx-gateway -s templates/service.yaml > $(strip $(MANIFEST_DIR))/service/loadbalancer.yaml
 	helm template nginx-gateway $(CHART_DIR) $(HELM_TEMPLATE_COMMON_ARGS) --set service.annotations.'service\.beta\.kubernetes\.io\/aws-load-balancer-type'="nlb" -n nginx-gateway -s templates/service.yaml > $(strip $(MANIFEST_DIR))/service/loadbalancer-aws-nlb.yaml
 	helm template nginx-gateway $(CHART_DIR) $(HELM_TEMPLATE_COMMON_ARGS) --set service.type=NodePort --set service.externalTrafficPolicy="" -n nginx-gateway -s templates/service.yaml > $(strip $(MANIFEST_DIR))/service/nodeport.yaml
diff --git a/cmd/gateway/commands.go b/cmd/gateway/commands.go
index 498780db14..e712776a85 100644
--- a/cmd/gateway/commands.go
+++ b/cmd/gateway/commands.go
@@ -9,6 +9,7 @@ import (
 	"go.uber.org/zap"
 	"k8s.io/apimachinery/pkg/types"
 	utilruntime "k8s.io/apimachinery/pkg/util/runtime"
+	"sigs.k8s.io/controller-runtime/pkg/log"
 	ctlrZap "sigs.k8s.io/controller-runtime/pkg/log/zap"
 
 	"github.com/nginxinc/nginx-kubernetes-gateway/internal/mode/provisioner"
@@ -42,11 +43,16 @@ var (
 	updateGCStatus bool
 	disableMetrics bool
 	metricsSecure  bool
+	disableHealth  bool
 
 	metricsListenPort = intValidatingValue{
 		validator: validatePort,
 		value:     9113,
 	}
+	healthListenPort = intValidatingValue{
+		validator: validatePort,
+		value:     8081,
+	}
 	gateway    = namespacedNameValue{}
 	configName = stringValidatingValue{
 		validator: validateResourceName,
@@ -92,6 +98,7 @@ func createStaticModeCommand() *cobra.Command {
 				"commit", commit,
 				"date", date,
 			)
+			log.SetLogger(logger)
 
 			podIP := os.Getenv("POD_IP")
 			if err := validateIP(podIP); err != nil {
@@ -115,6 +122,12 @@ func createStaticModeCommand() *cobra.Command {
 				metricsConfig.Secure = metricsSecure
 			}
 
+			healthConfig := config.HealthConfig{}
+			if !disableHealth {
+				healthConfig.Enabled = true
+				healthConfig.Port = healthListenPort.value
+			}
+
 			conf := config.Config{
 				GatewayCtlrName:          gatewayCtlrName.value,
 				ConfigName:               configName.String(),
@@ -126,6 +139,7 @@ func createStaticModeCommand() *cobra.Command {
 				GatewayNsName:            gwNsName,
 				UpdateGatewayClassStatus: updateGCStatus,
 				MetricsConfig:            metricsConfig,
+				HealthConfig:             healthConfig,
 			}
 
 			if err := static.StartManager(conf); err != nil {
@@ -171,7 +185,7 @@ func createStaticModeCommand() *cobra.Command {
 	cmd.Flags().Var(
 		&metricsListenPort,
 		"metrics-port",
-		"Set the port where the metrics are exposed. Format: [1023 - 65535]",
+		"Set the port where the metrics are exposed. Format: [1024 - 65535]",
 	)
 
 	cmd.Flags().BoolVar(
@@ -182,6 +196,19 @@ func createStaticModeCommand() *cobra.Command {
 			" Please note that this endpoint will be secured with a self-signed certificate.",
 	)
 
+	cmd.Flags().BoolVar(
+		&disableHealth,
+		"health-disable",
+		false,
+		"Disable running the health probe server.",
+	)
+
+	cmd.Flags().Var(
+		&healthListenPort,
+		"health-port",
+		"Set the port where the health probe server is exposed. Format: [1024 - 65535]",
+	)
+
 	return cmd
 }
 
diff --git a/cmd/gateway/commands_test.go b/cmd/gateway/commands_test.go
index a8fe461447..f6a4006e3f 100644
--- a/cmd/gateway/commands_test.go
+++ b/cmd/gateway/commands_test.go
@@ -121,6 +121,8 @@ func TestStaticModeCmdFlagValidation(t *testing.T) {
 				"--metrics-port=9114",
 				"--metrics-disable",
 				"--metrics-secure-serving",
+				"--health-port=8081",
+				"--health-disable",
 			},
 			wantErr: false,
 		},
@@ -214,6 +216,33 @@ func TestStaticModeCmdFlagValidation(t *testing.T) {
 			expectedErrPrefix: `invalid argument "999" for "--metrics-secure-serving" flag: strconv.ParseBool:` +
 				` parsing "999": invalid syntax`,
 		},
+		{
+			name: "health-port is invalid type",
+			args: []string{
+				"--health-port=invalid", // not an int
+			},
+			wantErr: true,
+			expectedErrPrefix: `invalid argument "invalid" for "--health-port" flag: failed to parse int value:` +
+				` strconv.ParseInt: parsing "invalid": invalid syntax`,
+		},
+		{
+			name: "health-port is outside of range",
+			args: []string{
+				"--health-port=999", // outside of range
+			},
+			wantErr: true,
+			expectedErrPrefix: `invalid argument "999" for "--health-port" flag:` +
+				` port outside of valid port range [1024 - 65535]: 999`,
+		},
+		{
+			name: "health-disable is not a bool",
+			args: []string{
+				"--health-disable=999", // not a bool
+			},
+			wantErr: true,
+			expectedErrPrefix: `invalid argument "999" for "--health-disable" flag: strconv.ParseBool:` +
+				` parsing "999": invalid syntax`,
+		},
 	}
 
 	for _, test := range tests {
diff --git a/conformance/provisioner/static-deployment.yaml b/conformance/provisioner/static-deployment.yaml
index fe2cbd20de..38c66a247f 100644
--- a/conformance/provisioner/static-deployment.yaml
+++ b/conformance/provisioner/static-deployment.yaml
@@ -29,6 +29,7 @@ spec:
         - --gatewayclass=nginx
         - --config=nginx-gateway-config
         - --metrics-disable
+        - --health-port=8081
         env:
         - name: POD_IP
           valueFrom:
@@ -41,6 +42,15 @@ spec:
         image: ghcr.io/nginxinc/nginx-kubernetes-gateway:edge
         imagePullPolicy: Always
         name: nginx-gateway
+        ports:
+        - name: health
+          containerPort: 8081
+        readinessProbe:
+          httpGet:
+            path: /readyz
+            port: health
+          initialDelaySeconds: 3
+          periodSeconds: 1
         securityContext:
           allowPrivilegeEscalation: false
           capabilities:
diff --git a/deploy/helm-chart/README.md b/deploy/helm-chart/README.md
index a9b05469ad..c3479d5604 100644
--- a/deploy/helm-chart/README.md
+++ b/deploy/helm-chart/README.md
@@ -132,25 +132,28 @@ kubectl delete -f https://github.com/kubernetes-sigs/gateway-api/releases/downlo
 
 The following tables lists the configurable parameters of the NGINX Kubernetes Gateway chart and their default values.
 
-| Parameter                            | Description                                                                                                                                                                                                  | Default Value                                                                                                   |
-|--------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|-----------------------------------------------------------------------------------------------------------------|
-| `nginxGateway.image.repository`      | The repository for the NGINX Kubernetes Gateway image.                                                                                                                                                       | ghcr.io/nginxinc/nginx-kubernetes-gateway                                                                       |
-| `nginxGateway.image.tag`             | The tag for the NGINX Kubernetes Gateway image.                                                                                                                                                              | edge                                                                                                            |
-| `nginxGateway.image.pullPolicy`      | The `imagePullPolicy` for the NGINX Kubernetes Gateway image.                                                                                                                                                | Always                                                                                                          |
-| `nginxGateway.gatewayClassName`      | The name of the GatewayClass for the NGINX Kubernetes Gateway deployment.                                                                                                                                    | nginx                                                                                                           |
-| `nginxGateway.gatewayControllerName` | The name of the Gateway controller. The controller name must be of the form: DOMAIN/PATH. The controller's domain is gateway.nginx.org.                                                                  | gateway.nginx.org/nginx-gateway-controller                                                                  |
-| `nginxGateway.kind`                  | The kind of the NGINX Kubernetes Gateway installation - currently, only Deployment is supported.                                                                                                             | deployment                                                                                                      |
-|`nginxGateway.config`                 | The dynamic configuration for the control plane that is contained in the NginxGateway resource                                                                                                               | [See nginxGateway.config section](values.yaml) |
-| `nginx.image.repository`             | The repository for the NGINX image.                                                                                                                                                                          | ghcr.io/nginxinc/nginx-kubernetes-gateway/nginx                                                                 |
-| `nginx.image.tag`                    | The tag for the NGINX image.                                                                                                                                                                                 | edge                                                                                                            |
-| `nginx.image.pullPolicy`             | The `imagePullPolicy` for the NGINX image.                                                                                                                                                                   | Always                                                                                                          |
-| `serviceAccount.annotations`         | The `annotations` for the ServiceAccount used by the NGINX Kubernetes Gateway deployment.                                                                                                                    | {}                                                                                                              |
-| `serviceAccount.name`                | Name of the ServiceAccount used by the NGINX Kubernetes Gateway deployment.                                                                                                                                  | Autogenerated                                                                                                   |
-| `service.create`                     | Creates a service to expose the NGINX Kubernetes Gateway pods.                                                                                                                                               | true                                                                                                            |
-| `service.type`                       | The type of service to create for the NGINX Kubernetes Gateway.                                                                                                                                              | Loadbalancer                                                                                                    |
-| `service.externalTrafficPolicy`      | The `externalTrafficPolicy` of the service. The value `Local` preserves the client source IP.                                                                                                                | Local                                                                                                           |
-| `service.annotations`                | The `annotations` of the NGINX Kubernetes Gateway service.                                                                                                                                                   | {}                                                                                                              |
-| `service.ports`                      | A list of ports to expose through the NGINX Kubernetes Gateway service. Update it to match the listener ports from your Gateway resource. Follows the conventional Kubernetes yaml syntax for service ports. | [ port: 80, targetPort: 80, protocol: TCP, name: http; port: 443, targetPort: 443, protocol: TCP, name: https ] |
-| `metrics.disable`                    | Disable exposing metrics in the Prometheus format.                                                                                                                                                   |false                                                                                                              |
-| `metrics.port`                       | Set the port where the Prometheus metrics are exposed. Format: [1024 - 65535]                                                                                                                                  |9113                                                                                                              |
-| `metrics.secure`                     | Enable serving metrics via https. By default metrics are served via http. Please note that this endpoint will be secured with a self-signed certificate.                                                                                                              |false                                                                                                              |
+| Parameter | Description | Default Value |
+|-----------|-------------|---------------|
+| `nginxGateway.image.repository` | The repository for the NGINX Kubernetes Gateway image. | ghcr.io/nginxinc/nginx-kubernetes-gateway |
+| `nginxGateway.image.tag` | The tag for the NGINX Kubernetes Gateway image. | edge |
+| `nginxGateway.image.pullPolicy` | The `imagePullPolicy` for the NGINX Kubernetes Gateway image. | Always |
+| `nginxGateway.gatewayClassName` | The name of the GatewayClass for the NGINX Kubernetes Gateway deployment. | nginx |
+| `nginxGateway.gatewayControllerName` | The name of the Gateway controller. The controller name must be of the form: DOMAIN/PATH. The controller's domain is gateway.nginx.org. | gateway.nginx.org/nginx-gateway-controller |
+| `nginxGateway.kind` | The kind of the NGINX Kubernetes Gateway installation - currently, only Deployment is supported. | deployment |
+| `nginxGateway.config` | The dynamic configuration for the control plane that is contained in the NginxGateway resource. | [See nginxGateway.config section](values.yaml) |
+| `nginxGateway.readinessProbe.enable` | Enable the /readyz endpoint on the control plane. | true |
+| `nginxGateway.readinessProbe.port` | Port in which the readiness endpoint is exposed. | 8081 |
+| `nginxGateway.readinessProbe.initialDelaySeconds` | PThe number of seconds after the Pod has started before the readiness probes are initiated. | 3 |
+| `nginx.image.repository` | The repository for the NGINX image. | ghcr.io/nginxinc/nginx-kubernetes-gateway/nginx |
+| `nginx.image.tag` | The tag for the NGINX image.  | edge |
+| `nginx.image.pullPolicy` | The `imagePullPolicy` for the NGINX image. | Always  |
+| `serviceAccount.annotations` | The `annotations` for the ServiceAccount used by the NGINX Kubernetes Gateway deployment. | {} |
+| `serviceAccount.name` | Name of the ServiceAccount used by the NGINX Kubernetes Gateway deployment. | Autogenerated |
+| `service.create` | Creates a service to expose the NGINX Kubernetes Gateway pods. | true |
+| `service.type` | The type of service to create for the NGINX Kubernetes Gateway. | Loadbalancer |
+| `service.externalTrafficPolicy` | The `externalTrafficPolicy` of the service. The value `Local` preserves the client source IP. | Local |
+| `service.annotations` | The `annotations` of the NGINX Kubernetes Gateway service. | {} |
+| `service.ports` | A list of ports to expose through the NGINX Kubernetes Gateway service. Update it to match the listener ports from your Gateway resource. Follows the conventional Kubernetes yaml syntax for service ports. | [ port: 80, targetPort: 80, protocol: TCP, name: http; port: 443, targetPort: 443, protocol: TCP, name: https ] |
+| `metrics.enable` | Enable exposing metrics in the Prometheus format. | true |
+| `metrics.port` | Set the port where the Prometheus metrics are exposed. Format: [1024 - 65535] | 9113 |
+| `metrics.secure` | Enable serving metrics via https. By default metrics are served via http. Please note that this endpoint will be secured with a self-signed certificate. | false |
diff --git a/deploy/helm-chart/templates/deployment.yaml b/deploy/helm-chart/templates/deployment.yaml
index 8bf7492e15..3e062d903f 100644
--- a/deploy/helm-chart/templates/deployment.yaml
+++ b/deploy/helm-chart/templates/deployment.yaml
@@ -16,7 +16,7 @@ spec:
     metadata:
       labels:
       {{- include "nginx-gateway.selectorLabels" . | nindent 8 }}
-      {{- if not .Values.metrics.disable }}
+      {{- if .Values.metrics.enable }}
       annotations:
         prometheus.io/scrape: "true"
         prometheus.io/port: "{{ .Values.metrics.port }}"
@@ -31,7 +31,7 @@ spec:
         - --gateway-ctlr-name={{ .Values.nginxGateway.gatewayControllerName }}
         - --gatewayclass={{ .Values.nginxGateway.gatewayClassName }}
         - --config={{ include "nginx-gateway.config-name" . }}
-        {{- if not .Values.metrics.disable }}
+        {{- if .Values.metrics.enable }}
         - --metrics-port={{ .Values.metrics.port }}
         {{- if .Values.metrics.secure  }}
         - --metrics-secure-serving
@@ -39,6 +39,11 @@ spec:
         {{- else }}
         - --metrics-disable
         {{- end }}
+        {{- if .Values.nginxGateway.readinessProbe.enable }}
+        - --health-port={{ .Values.nginxGateway.readinessProbe.port }}
+        {{- else }}
+        - --health-disable
+        {{- end }}
         env:
         - name: POD_IP
           valueFrom:
@@ -51,11 +56,21 @@ spec:
         image: {{ .Values.nginxGateway.image.repository }}:{{ .Values.nginxGateway.image.tag | default .Chart.AppVersion }}
         imagePullPolicy: {{ .Values.nginxGateway.image.pullPolicy }}
         name: nginx-gateway
-        {{- if not .Values.metrics.disable }}
         ports:
+        {{- if .Values.metrics.enable }}
         - name: metrics
           containerPort: {{ .Values.metrics.port }}
         {{- end }}
+        {{- if .Values.nginxGateway.readinessProbe.enable }}
+        - name: health
+          containerPort: {{ .Values.nginxGateway.readinessProbe.port }}
+        readinessProbe:
+          httpGet:
+            path: /readyz
+            port: health
+          initialDelaySeconds: {{ .Values.nginxGateway.readinessProbe.initialDelaySeconds }}
+          periodSeconds: 1
+        {{- end }}
         securityContext:
           allowPrivilegeEscalation: false
           capabilities:
diff --git a/deploy/helm-chart/values.yaml b/deploy/helm-chart/values.yaml
index fd3a77007e..7d5134d124 100644
--- a/deploy/helm-chart/values.yaml
+++ b/deploy/helm-chart/values.yaml
@@ -14,6 +14,16 @@ nginxGateway:
       ## Log level. Supported values "info", "debug", "error".
       level: info
 
+  ## Defines the settings for the control plane readiness probe. This probe returns Ready when the controller
+  ## has started and configured NGINX to serve traffic.
+  readinessProbe:
+    ## Enable the /readyz endpoint on the control plane.
+    enable: true
+    ## Port in which the readiness endpoint is exposed.
+    port: 8081
+    ## The number of seconds after the Pod has started before the readiness probes are initiated.
+    initialDelaySeconds: 3
+
   image:
     ## The NGINX Kubernetes Gateway image to use
     repository: ghcr.io/nginxinc/nginx-kubernetes-gateway
@@ -56,8 +66,8 @@ service:
     name: https
 
 metrics:
-  ## Disable exposing metrics in the Prometheus format.
-  disable: false
+  ## Enable exposing metrics in the Prometheus format.
+  enable: true
   ## Set the port where the Prometheus metrics are exposed. Format: [1024 - 65535]
   port: 9113
   ## Enable serving metrics via https. By default metrics are served via http.
diff --git a/deploy/manifests/nginx-gateway.yaml b/deploy/manifests/nginx-gateway.yaml
index 129147c3db..4c94453bfc 100644
--- a/deploy/manifests/nginx-gateway.yaml
+++ b/deploy/manifests/nginx-gateway.yaml
@@ -133,6 +133,7 @@ spec:
         - --gatewayclass=nginx
         - --config=nginx-gateway-config
         - --metrics-port=9113
+        - --health-port=8081
         env:
         - name: POD_IP
           valueFrom:
@@ -148,6 +149,14 @@ spec:
         ports:
         - name: metrics
           containerPort: 9113
+        - name: health
+          containerPort: 8081
+        readinessProbe:
+          httpGet:
+            path: /readyz
+            port: health
+          initialDelaySeconds: 3
+          periodSeconds: 1
         securityContext:
           allowPrivilegeEscalation: false
           capabilities:
diff --git a/docs/cli-help.md b/docs/cli-help.md
index fb1adeb9de..60154b694c 100644
--- a/docs/cli-help.md
+++ b/docs/cli-help.md
@@ -14,12 +14,14 @@ Usage:
 
 Flags:
 
-| Name                         | Type     | Description                                                                                                                                                                                                                                                                                                                                                                                       |
-|------------------------------|----------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
+| Name | Type | Description |
+|------|------|-------------|
 | `gateway-ctlr-name`          | `string` | The name of the Gateway controller. The controller name must be of the form: `DOMAIN/PATH`. The controller's domain is `gateway.nginx.org`. |
 | `gatewayclass`               | `string` | The name of the GatewayClass resource. Every NGINX Gateway must have a unique corresponding GatewayClass resource. |
 | `gateway`                    | `string` | The namespaced name of the Gateway resource to use. Must be of the form: `NAMESPACE/NAME`. If not specified, the control plane will process all Gateways for the configured GatewayClass. However, among them, it will choose the oldest resource by creation timestamp. If the timestamps are equal, it will choose the resource that appears first in alphabetical order by {namespace}/{name}. |
 | `config`                     | `string` | The name of the NginxGateway resource to be used for this controller's dynamic configuration. Lives in the same Namespace as the controller. |
 | `metrics-disable`            | `bool`   | Disable exposing metrics in the Prometheus format. (default false) |
-| `metrics-listen-port`        | `int`    | Sets the port where the Prometheus metrics are exposed. Format: `[1024 - 65535]` (default `9113`) |
+| `metrics-port`               | `int`    | Sets the port where the Prometheus metrics are exposed. Format: `[1024 - 65535]` (default `9113`) |
 | `metrics-secure-serving`     | `bool`   | Configures if the metrics endpoint should be secured using https. Please note that this endpoint will be secured with a self-signed certificate. (default false) |
+| `health-disable`             | `bool`   | Disable running the health probe server. (default false) |
+| `health-port`                | `int`    | Set the port where the health probe server is exposed. Format: `[1024 - 65535]` (default `8081`) |
diff --git a/internal/mode/static/config/config.go b/internal/mode/static/config/config.go
index 09bdcf1240..de92fdfd2c 100644
--- a/internal/mode/static/config/config.go
+++ b/internal/mode/static/config/config.go
@@ -28,6 +28,8 @@ type Config struct {
 	UpdateGatewayClassStatus bool
 	// MetricsConfig specifies the metrics config.
 	MetricsConfig MetricsConfig
+	// HealthConfig specifies the health probe config.
+	HealthConfig HealthConfig
 }
 
 // MetricsConfig specifies the metrics config.
@@ -39,3 +41,11 @@ type MetricsConfig struct {
 	// Secure is the flag for toggling the metrics endpoint to https.
 	Secure bool
 }
+
+// HealthConfig specifies the health probe config.
+type HealthConfig struct {
+	// Port is the port that the health probe server listens on.
+	Port int
+	// Enabled is the flag for toggling the health probe server on or off.
+	Enabled bool
+}
diff --git a/internal/mode/static/handler.go b/internal/mode/static/handler.go
index 144cc0adc2..0987a572f0 100644
--- a/internal/mode/static/handler.go
+++ b/internal/mode/static/handler.go
@@ -41,12 +41,14 @@ type eventHandlerConfig struct {
 	eventRecorder record.EventRecorder
 	// logLevelSetter is used to update the logging level.
 	logLevelSetter ZapLogLevelSetter
-	// logger is the logger to be used by the EventHandler.
-	logger logr.Logger
+	// healthChecker sets the health of the Pod to Ready once we've written out our initial config
+	healthChecker *healthChecker
 	// controlConfigNSName is the NamespacedName of the NginxGateway config for this controller.
 	controlConfigNSName types.NamespacedName
 	// version is the current version number of the nginx config.
 	version int
+	// logger is the logger to be used by the EventHandler.
+	logger logr.Logger
 }
 
 // eventHandlerImpl implements EventHandler.
@@ -88,6 +90,7 @@ func (h *eventHandlerImpl) HandleEventBatch(ctx context.Context, batch events.Ev
 	changed, graph := h.cfg.processor.Process()
 	if !changed {
 		h.cfg.logger.Info("Handling events didn't result into NGINX configuration changes")
+		h.cfg.healthChecker.ready = true
 		return
 	}
 
@@ -101,6 +104,7 @@ func (h *eventHandlerImpl) HandleEventBatch(ctx context.Context, batch events.Ev
 		nginxReloadRes.error = err
 	} else {
 		h.cfg.logger.Info("NGINX configuration was successfully updated")
+		h.cfg.healthChecker.ready = true
 	}
 
 	h.cfg.statusUpdater.Update(ctx, buildStatuses(graph, nginxReloadRes))
diff --git a/internal/mode/static/handler_test.go b/internal/mode/static/handler_test.go
index a6b0fa4a42..c50795e043 100644
--- a/internal/mode/static/handler_test.go
+++ b/internal/mode/static/handler_test.go
@@ -73,8 +73,10 @@ var _ = Describe("eventHandler", func() {
 			nginxRuntimeMgr:     fakeNginxRuntimeMgr,
 			statusUpdater:       fakeStatusUpdater,
 			eventRecorder:       fakeEventRecorder,
+			healthChecker:       &healthChecker{},
 			controlConfigNSName: types.NamespacedName{Namespace: namespace, Name: configName},
 		})
+		Expect(handler.cfg.healthChecker.ready).To(BeFalse())
 	})
 
 	Describe("Process the Gateway API resources events", func() {
@@ -103,6 +105,10 @@ var _ = Describe("eventHandler", func() {
 			fakeGenerator.GenerateReturns(fakeCfgFiles)
 		})
 
+		AfterEach(func() {
+			Expect(handler.cfg.healthChecker.ready).To(BeTrue())
+		})
+
 		When("a batch has one event", func() {
 			It("should process Upsert", func() {
 				e := &events.UpsertEvent{Resource: &v1beta1.HTTPRoute{}}
diff --git a/internal/mode/static/health.go b/internal/mode/static/health.go
new file mode 100644
index 0000000000..48b84e31ce
--- /dev/null
+++ b/internal/mode/static/health.go
@@ -0,0 +1,20 @@
+package static
+
+import (
+	"errors"
+	"net/http"
+)
+
+type healthChecker struct {
+	ready bool
+}
+
+// readyCheck returns the ready-state of the Pod. It satisfies the controller-runtime Checker type.
+// We are considered ready when we have written out the initial nginx configuration and can serve traffic.
+func (h *healthChecker) readyCheck(_ *http.Request) error {
+	if !h.ready {
+		return errors.New("nginx has not yet been configured on startup")
+	}
+
+	return nil
+}
diff --git a/internal/mode/static/health_test.go b/internal/mode/static/health_test.go
new file mode 100644
index 0000000000..5e03558da7
--- /dev/null
+++ b/internal/mode/static/health_test.go
@@ -0,0 +1,16 @@
+package static
+
+import (
+	. "github.com/onsi/ginkgo/v2"
+	. "github.com/onsi/gomega"
+)
+
+var _ = Describe("health", func() {
+	It("returns an error if not ready", func() {
+		hc := healthChecker{}
+		Expect(hc.readyCheck(nil)).ToNot(Succeed())
+
+		hc.ready = true
+		Expect(hc.readyCheck(nil)).To(Succeed())
+	})
+})
diff --git a/internal/mode/static/manager.go b/internal/mode/static/manager.go
index ac5a239775..e6ebbc2345 100644
--- a/internal/mode/static/manager.go
+++ b/internal/mode/static/manager.go
@@ -45,6 +45,11 @@ const (
 	clusterTimeout = 10 * time.Second
 )
 
+type ctlrCfg struct {
+	objectType client.Object
+	options    []controller.Option
+}
+
 var scheme = runtime.NewScheme()
 
 func init() {
@@ -55,14 +60,16 @@ func init() {
 }
 
 func StartManager(cfg config.Config) error {
-	logger := cfg.Logger
-
 	options := manager.Options{
 		Scheme:  scheme,
-		Logger:  logger,
+		Logger:  cfg.Logger,
 		Metrics: getMetricsOptions(cfg.MetricsConfig),
 	}
 
+	if cfg.HealthConfig.Enabled {
+		options.HealthProbeBindAddress = fmt.Sprintf(":%d", cfg.HealthConfig.Port)
+	}
+
 	eventCh := make(chan interface{})
 
 	clusterCfg := ctlr.GetConfigOrDie()
@@ -73,16 +80,118 @@ func StartManager(cfg config.Config) error {
 		return fmt.Errorf("cannot build runtime manager: %w", err)
 	}
 
+	hc := &healthChecker{}
+	if cfg.HealthConfig.Enabled {
+		if err := mgr.AddReadyzCheck("readyz", hc.readyCheck); err != nil {
+			return fmt.Errorf("error adding ready check: %w", err)
+		}
+	}
+
 	recorderName := fmt.Sprintf("nginx-kubernetes-gateway-%s", cfg.GatewayClassName)
 	recorder := mgr.GetEventRecorderFor(recorderName)
 	logLevelSetter := newZapLogLevelSetter(cfg.AtomicLevel)
 
+	ctx := ctlr.SetupSignalHandler()
+
+	controlConfigNSName := types.NamespacedName{
+		Namespace: cfg.Namespace,
+		Name:      cfg.ConfigName,
+	}
+	if err := registerControllers(ctx, cfg, mgr, recorder, logLevelSetter, eventCh, controlConfigNSName); err != nil {
+		return err
+	}
+
+	// protectedPorts is the map of ports that may not be configured by a listener, and the name of what it is used for
+	protectedPorts := map[int32]string{
+		int32(cfg.MetricsConfig.Port): "MetricsPort",
+	}
+
+	processor := state.NewChangeProcessorImpl(state.ChangeProcessorConfig{
+		GatewayCtlrName:      cfg.GatewayCtlrName,
+		GatewayClassName:     cfg.GatewayClassName,
+		RelationshipCapturer: relationship.NewCapturerImpl(),
+		Logger:               cfg.Logger.WithName("changeProcessor"),
+		Validators: validation.Validators{
+			HTTPFieldsValidator: ngxvalidation.HTTPValidator{},
+		},
+		EventRecorder:  recorder,
+		Scheme:         scheme,
+		ProtectedPorts: protectedPorts,
+	})
+
+	// Clear the configuration folders to ensure that no files are left over in case the control plane was restarted
+	// (this assumes the folders are in a shared volume).
+	removedPaths, err := file.ClearFolders(file.NewStdLibOSFileManager(), ngxcfg.ConfigFolders)
+	for _, path := range removedPaths {
+		cfg.Logger.Info("removed configuration file", "path", path)
+	}
+	if err != nil {
+		return fmt.Errorf("cannot clear NGINX configuration folders: %w", err)
+	}
+
+	statusUpdater := status.NewUpdater(status.UpdaterConfig{
+		GatewayCtlrName:          cfg.GatewayCtlrName,
+		GatewayClassName:         cfg.GatewayClassName,
+		Client:                   mgr.GetClient(),
+		PodIP:                    cfg.PodIP,
+		Logger:                   cfg.Logger.WithName("statusUpdater"),
+		Clock:                    status.NewRealClock(),
+		UpdateGatewayClassStatus: cfg.UpdateGatewayClassStatus,
+	})
+
+	eventHandler := newEventHandlerImpl(eventHandlerConfig{
+		processor:           processor,
+		serviceResolver:     resolver.NewServiceResolverImpl(mgr.GetClient()),
+		generator:           ngxcfg.NewGeneratorImpl(),
+		logger:              cfg.Logger.WithName("eventHandler"),
+		logLevelSetter:      logLevelSetter,
+		nginxFileMgr:        file.NewManagerImpl(cfg.Logger.WithName("nginxFileManager"), file.NewStdLibOSFileManager()),
+		nginxRuntimeMgr:     ngxruntime.NewManagerImpl(),
+		statusUpdater:       statusUpdater,
+		eventRecorder:       recorder,
+		healthChecker:       hc,
+		controlConfigNSName: controlConfigNSName,
+	})
+
+	objects, objectLists := prepareFirstEventBatchPreparerArgs(cfg.GatewayClassName, cfg.GatewayNsName)
+	firstBatchPreparer := events.NewFirstEventBatchPreparerImpl(mgr.GetCache(), objects, objectLists)
+	eventLoop := events.NewEventLoop(
+		eventCh,
+		cfg.Logger.WithName("eventLoop"),
+		eventHandler,
+		firstBatchPreparer,
+	)
+
+	if err = mgr.Add(eventLoop); err != nil {
+		return fmt.Errorf("cannot register event loop: %w", err)
+	}
+
+	// Ensure NGINX is running before registering metrics & starting the manager.
+	if err := ngxruntime.EnsureNginxRunning(ctx); err != nil {
+		return fmt.Errorf("NGINX is not running: %w", err)
+	}
+
+	if cfg.MetricsConfig.Enabled {
+		if err := configureNginxMetrics(cfg.GatewayClassName); err != nil {
+			return err
+		}
+	}
+
+	cfg.Logger.Info("Starting manager")
+	return mgr.Start(ctx)
+}
+
+func registerControllers(
+	ctx context.Context,
+	cfg config.Config,
+	mgr manager.Manager,
+	recorder record.EventRecorder,
+	logLevelSetter zapSetterImpl,
+	eventCh chan interface{},
+	controlConfigNSName types.NamespacedName,
+) error {
 	// Note: for any new object type or a change to the existing one,
 	// make sure to also update prepareFirstEventBatchPreparerArgs()
-	type ctlrCfg struct {
-		objectType client.Object
-		options    []controller.Option
-	}
 	controllerRegCfgs := []ctlrCfg{
 		{
 			objectType: &gatewayv1beta1.GatewayClass{},
@@ -131,10 +240,6 @@ func StartManager(cfg config.Config) error {
 		},
 	}
 
-	controlConfigNSName := types.NamespacedName{
-		Namespace: cfg.Namespace,
-		Name:      cfg.ConfigName,
-	}
 	if cfg.ConfigName != "" {
 		controllerRegCfgs = append(controllerRegCfgs,
 			ctlrCfg{
@@ -145,7 +250,8 @@ func StartManager(cfg config.Config) error {
 			})
 		if err := setInitialConfig(
 			mgr.GetAPIReader(),
-			logger, recorder,
+			cfg.Logger,
+			recorder,
 			logLevelSetter,
 			controlConfigNSName,
 		); err != nil {
@@ -153,8 +259,6 @@ func StartManager(cfg config.Config) error {
 		}
 	}
 
-	ctx := ctlr.SetupSignalHandler()
-
 	for _, regCfg := range controllerRegCfgs {
 		if err := controller.Register(
 			ctx,
@@ -167,87 +271,7 @@ func StartManager(cfg config.Config) error {
 		}
 	}
 
-	// protectedPorts is the map of ports that may not be configured by a listener, and the name of what it is used for
-	protectedPorts := map[int32]string{
-		int32(cfg.MetricsConfig.Port): "MetricsPort",
-	}
-
-	processor := state.NewChangeProcessorImpl(state.ChangeProcessorConfig{
-		GatewayCtlrName:      cfg.GatewayCtlrName,
-		GatewayClassName:     cfg.GatewayClassName,
-		RelationshipCapturer: relationship.NewCapturerImpl(),
-		Logger:               cfg.Logger.WithName("changeProcessor"),
-		Validators: validation.Validators{
-			HTTPFieldsValidator: ngxvalidation.HTTPValidator{},
-		},
-		EventRecorder:  recorder,
-		Scheme:         scheme,
-		ProtectedPorts: protectedPorts,
-	})
-
-	configGenerator := ngxcfg.NewGeneratorImpl()
-
-	// Clear the configuration folders to ensure that no files are left over in case the control plane was restarted
-	// (this assumes the folders are in a shared volume).
-	removedPaths, err := file.ClearFolders(file.NewStdLibOSFileManager(), ngxcfg.ConfigFolders)
-	for _, path := range removedPaths {
-		logger.Info("removed configuration file", "path", path)
-	}
-	if err != nil {
-		return fmt.Errorf("cannot clear NGINX configuration folders: %w", err)
-	}
-
-	nginxFileMgr := file.NewManagerImpl(logger.WithName("nginxFileManager"), file.NewStdLibOSFileManager())
-	nginxRuntimeMgr := ngxruntime.NewManagerImpl()
-	statusUpdater := status.NewUpdater(status.UpdaterConfig{
-		GatewayCtlrName:          cfg.GatewayCtlrName,
-		GatewayClassName:         cfg.GatewayClassName,
-		Client:                   mgr.GetClient(),
-		PodIP:                    cfg.PodIP,
-		Logger:                   cfg.Logger.WithName("statusUpdater"),
-		Clock:                    status.NewRealClock(),
-		UpdateGatewayClassStatus: cfg.UpdateGatewayClassStatus,
-	})
-
-	eventHandler := newEventHandlerImpl(eventHandlerConfig{
-		processor:           processor,
-		serviceResolver:     resolver.NewServiceResolverImpl(mgr.GetClient()),
-		generator:           configGenerator,
-		logger:              cfg.Logger.WithName("eventHandler"),
-		logLevelSetter:      logLevelSetter,
-		nginxFileMgr:        nginxFileMgr,
-		nginxRuntimeMgr:     nginxRuntimeMgr,
-		statusUpdater:       statusUpdater,
-		eventRecorder:       recorder,
-		controlConfigNSName: controlConfigNSName,
-	})
-
-	objects, objectLists := prepareFirstEventBatchPreparerArgs(cfg.GatewayClassName, cfg.GatewayNsName)
-	firstBatchPreparer := events.NewFirstEventBatchPreparerImpl(mgr.GetCache(), objects, objectLists)
-
-	eventLoop := events.NewEventLoop(
-		eventCh,
-		cfg.Logger.WithName("eventLoop"),
-		eventHandler,
-		firstBatchPreparer)
-
-	if err = mgr.Add(eventLoop); err != nil {
-		return fmt.Errorf("cannot register event loop: %w", err)
-	}
-
-	// Ensure NGINX is running before registering metrics & starting the manager.
-	if err := ngxruntime.EnsureNginxRunning(ctx); err != nil {
-		return fmt.Errorf("NGINX is not running: %w", err)
-	}
-
-	if cfg.MetricsConfig.Enabled {
-		if err := configureNginxMetrics(cfg.GatewayClassName); err != nil {
-			return err
-		}
-	}
-
-	logger.Info("Starting manager")
-	return mgr.Start(ctx)
+	return nil
 }
 
 func prepareFirstEventBatchPreparerArgs(

From ee7179f8c94e1b851009970c899a6db40bcd8df9 Mon Sep 17 00:00:00 2001
From: Saylor Berman <s.berman@f5.com>
Date: Mon, 11 Sep 2023 08:33:36 -0600
Subject: [PATCH 2/4] Code review changes

---
 internal/mode/static/health_test.go            | 18 +++++++++---------
 internal/mode/static/manager.go                | 11 ++++++-----
 internal/mode/static/state/graph/graph_test.go |  1 +
 3 files changed, 16 insertions(+), 14 deletions(-)

diff --git a/internal/mode/static/health_test.go b/internal/mode/static/health_test.go
index 5e03558da7..77dfa30ed9 100644
--- a/internal/mode/static/health_test.go
+++ b/internal/mode/static/health_test.go
@@ -1,16 +1,16 @@
 package static
 
 import (
-	. "github.com/onsi/ginkgo/v2"
+	"testing"
+
 	. "github.com/onsi/gomega"
 )
 
-var _ = Describe("health", func() {
-	It("returns an error if not ready", func() {
-		hc := healthChecker{}
-		Expect(hc.readyCheck(nil)).ToNot(Succeed())
+func TestReadyCheck(t *testing.T) {
+	g := NewWithT(t)
+	hc := healthChecker{}
+	g.Expect(hc.readyCheck(nil)).ToNot(Succeed())
 
-		hc.ready = true
-		Expect(hc.readyCheck(nil)).To(Succeed())
-	})
-})
+	hc.ready = true
+	g.Expect(hc.readyCheck(nil)).To(Succeed())
+}
diff --git a/internal/mode/static/manager.go b/internal/mode/static/manager.go
index e6ebbc2345..90e65db6d3 100644
--- a/internal/mode/static/manager.go
+++ b/internal/mode/static/manager.go
@@ -45,11 +45,6 @@ const (
 	clusterTimeout = 10 * time.Second
 )
 
-type ctlrCfg struct {
-	objectType client.Object
-	options    []controller.Option
-}
-
 var scheme = runtime.NewScheme()
 
 func init() {
@@ -104,6 +99,7 @@ func StartManager(cfg config.Config) error {
 	// protectedPorts is the map of ports that may not be configured by a listener, and the name of what it is used for
 	protectedPorts := map[int32]string{
 		int32(cfg.MetricsConfig.Port): "MetricsPort",
+		int32(cfg.HealthConfig.Port):  "HealthPort",
 	}
 
 	processor := state.NewChangeProcessorImpl(state.ChangeProcessorConfig{
@@ -190,6 +186,11 @@ func registerControllers(
 	eventCh chan interface{},
 	controlConfigNSName types.NamespacedName,
 ) error {
+	type ctlrCfg struct {
+		objectType client.Object
+		options    []controller.Option
+	}
+
 	// Note: for any new object type or a change to the existing one,
 	// make sure to also update prepareFirstEventBatchPreparerArgs()
 	controllerRegCfgs := []ctlrCfg{
diff --git a/internal/mode/static/state/graph/graph_test.go b/internal/mode/static/state/graph/graph_test.go
index d45445d5d5..d7340036ac 100644
--- a/internal/mode/static/state/graph/graph_test.go
+++ b/internal/mode/static/state/graph/graph_test.go
@@ -24,6 +24,7 @@ func TestBuildGraph(t *testing.T) {
 
 	protectedPorts := ProtectedPorts{
 		9113: "MetricsPort",
+		8081: "HealthPort",
 	}
 
 	createValidRuleWithBackendRefs := func(refs []BackendRef) Rule {

From acd4b840176ea789213dffc1abd3cafd1f7ea95c Mon Sep 17 00:00:00 2001
From: Saylor Berman <s.berman@f5.com>
Date: Tue, 12 Sep 2023 13:19:31 -0600
Subject: [PATCH 3/4] Add lock and more validation

---
 cmd/gateway/commands.go              | 15 ++++++-----
 cmd/gateway/validation.go            | 14 ++++++++++
 cmd/gateway/validation_test.go       |  7 +++++
 deploy/helm-chart/README.md          |  2 +-
 docs/architecture.md                 | 10 +++++++
 internal/mode/static/handler.go      | 15 ++++++++---
 internal/mode/static/handler_test.go | 40 ++++++++++++++++++++++++++++
 internal/mode/static/health.go       | 24 ++++++++++++++---
 8 files changed, 112 insertions(+), 15 deletions(-)

diff --git a/cmd/gateway/commands.go b/cmd/gateway/commands.go
index e712776a85..505939bce6 100644
--- a/cmd/gateway/commands.go
+++ b/cmd/gateway/commands.go
@@ -100,6 +100,10 @@ func createStaticModeCommand() *cobra.Command {
 			)
 			log.SetLogger(logger)
 
+			if err := ensureNoPortCollisions(metricsListenPort.value, healthListenPort.value); err != nil {
+				return fmt.Errorf("error validating ports: %w", err)
+			}
+
 			podIP := os.Getenv("POD_IP")
 			if err := validateIP(podIP); err != nil {
 				return fmt.Errorf("error validating POD_IP environment variable: %w", err)
@@ -122,12 +126,6 @@ func createStaticModeCommand() *cobra.Command {
 				metricsConfig.Secure = metricsSecure
 			}
 
-			healthConfig := config.HealthConfig{}
-			if !disableHealth {
-				healthConfig.Enabled = true
-				healthConfig.Port = healthListenPort.value
-			}
-
 			conf := config.Config{
 				GatewayCtlrName:          gatewayCtlrName.value,
 				ConfigName:               configName.String(),
@@ -139,7 +137,10 @@ func createStaticModeCommand() *cobra.Command {
 				GatewayNsName:            gwNsName,
 				UpdateGatewayClassStatus: updateGCStatus,
 				MetricsConfig:            metricsConfig,
-				HealthConfig:             healthConfig,
+				HealthConfig: config.HealthConfig{
+					Enabled: !disableHealth,
+					Port:    healthListenPort.value,
+				},
 			}
 
 			if err := static.StartManager(conf); err != nil {
diff --git a/cmd/gateway/validation.go b/cmd/gateway/validation.go
index 8a40d7970f..46b0c56ae7 100644
--- a/cmd/gateway/validation.go
+++ b/cmd/gateway/validation.go
@@ -107,3 +107,17 @@ func validatePort(port int) error {
 	}
 	return nil
 }
+
+// ensureNoPortCollisions checks if the same port has been defined multiple times
+func ensureNoPortCollisions(ports ...int) error {
+	seen := make(map[int]struct{})
+
+	for _, port := range ports {
+		if _, ok := seen[port]; ok {
+			return fmt.Errorf("port %d has been defined multiple times", port)
+		}
+		seen[port] = struct{}{}
+	}
+
+	return nil
+}
diff --git a/cmd/gateway/validation_test.go b/cmd/gateway/validation_test.go
index 4bf6781475..b3865a88b4 100644
--- a/cmd/gateway/validation_test.go
+++ b/cmd/gateway/validation_test.go
@@ -331,3 +331,10 @@ func TestValidatePort(t *testing.T) {
 		})
 	}
 }
+
+func TestEnsureNoPortCollisions(t *testing.T) {
+	g := NewWithT(t)
+
+	g.Expect(ensureNoPortCollisions(9113, 8081)).To(Succeed())
+	g.Expect(ensureNoPortCollisions(9113, 9113)).ToNot(Succeed())
+}
diff --git a/deploy/helm-chart/README.md b/deploy/helm-chart/README.md
index c3479d5604..88f41ae6f5 100644
--- a/deploy/helm-chart/README.md
+++ b/deploy/helm-chart/README.md
@@ -143,7 +143,7 @@ The following tables lists the configurable parameters of the NGINX Kubernetes G
 | `nginxGateway.config` | The dynamic configuration for the control plane that is contained in the NginxGateway resource. | [See nginxGateway.config section](values.yaml) |
 | `nginxGateway.readinessProbe.enable` | Enable the /readyz endpoint on the control plane. | true |
 | `nginxGateway.readinessProbe.port` | Port in which the readiness endpoint is exposed. | 8081 |
-| `nginxGateway.readinessProbe.initialDelaySeconds` | PThe number of seconds after the Pod has started before the readiness probes are initiated. | 3 |
+| `nginxGateway.readinessProbe.initialDelaySeconds` | The number of seconds after the Pod has started before the readiness probes are initiated. | 3 |
 | `nginx.image.repository` | The repository for the NGINX image. | ghcr.io/nginxinc/nginx-kubernetes-gateway/nginx |
 | `nginx.image.tag` | The tag for the NGINX image.  | edge |
 | `nginx.image.pullPolicy` | The `imagePullPolicy` for the NGINX image. | Always  |
diff --git a/docs/architecture.md b/docs/architecture.md
index 026135587e..ebe95b83fa 100644
--- a/docs/architecture.md
+++ b/docs/architecture.md
@@ -144,3 +144,13 @@ API to update the handled resources' statuses and emit events.
 [conf-file]: https://github.com/nginxinc/nginx-kubernetes-gateway/blob/main/internal/mode/static/nginx/conf/nginx.conf
 
 [share]: https://kubernetes.io/docs/tasks/configure-pod-container/share-process-namespace/
+
+## Pod Readiness
+
+The `nginx-gateway` container includes a readiness endpoint available via the `/readyz` path. This endpoint
+is periodically checked by a [readiness probe][readiness] on startup, and returns a 200 OK response when the Pod is
+ready to accept traffic for the data plane. The Pod will become Ready after the control plane successfully starts.
+If there are relevant Gateway API resources in the cluster, the control plane will also generate the first NGINX
+configuration and successfully reload NGINX before the Pod is considered Ready.
+
+[readiness]: (https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#define-readiness-probes)
diff --git a/internal/mode/static/handler.go b/internal/mode/static/handler.go
index 0987a572f0..d69f45cb6e 100644
--- a/internal/mode/static/handler.go
+++ b/internal/mode/static/handler.go
@@ -45,10 +45,10 @@ type eventHandlerConfig struct {
 	healthChecker *healthChecker
 	// controlConfigNSName is the NamespacedName of the NginxGateway config for this controller.
 	controlConfigNSName types.NamespacedName
-	// version is the current version number of the nginx config.
-	version int
 	// logger is the logger to be used by the EventHandler.
 	logger logr.Logger
+	// version is the current version number of the nginx config.
+	version int
 }
 
 // eventHandlerImpl implements EventHandler.
@@ -90,7 +90,9 @@ func (h *eventHandlerImpl) HandleEventBatch(ctx context.Context, batch events.Ev
 	changed, graph := h.cfg.processor.Process()
 	if !changed {
 		h.cfg.logger.Info("Handling events didn't result into NGINX configuration changes")
-		h.cfg.healthChecker.ready = true
+		if !h.cfg.healthChecker.ready && h.cfg.healthChecker.firstBatchError == nil {
+			h.cfg.healthChecker.setAsReady()
+		}
 		return
 	}
 
@@ -102,9 +104,14 @@ func (h *eventHandlerImpl) HandleEventBatch(ctx context.Context, batch events.Ev
 	); err != nil {
 		h.cfg.logger.Error(err, "Failed to update NGINX configuration")
 		nginxReloadRes.error = err
+		if !h.cfg.healthChecker.ready {
+			h.cfg.healthChecker.firstBatchError = err
+		}
 	} else {
 		h.cfg.logger.Info("NGINX configuration was successfully updated")
-		h.cfg.healthChecker.ready = true
+		if !h.cfg.healthChecker.ready {
+			h.cfg.healthChecker.setAsReady()
+		}
 	}
 
 	h.cfg.statusUpdater.Update(ctx, buildStatuses(graph, nginxReloadRes))
diff --git a/internal/mode/static/handler_test.go b/internal/mode/static/handler_test.go
index c50795e043..04600ce227 100644
--- a/internal/mode/static/handler_test.go
+++ b/internal/mode/static/handler_test.go
@@ -2,6 +2,7 @@ package static
 
 import (
 	"context"
+	"errors"
 
 	. "github.com/onsi/ginkgo/v2"
 	. "github.com/onsi/gomega"
@@ -220,6 +221,45 @@ var _ = Describe("eventHandler", func() {
 		})
 	})
 
+	It("should set the health checker status properly when there are changes", func() {
+		e := &events.UpsertEvent{Resource: &v1beta1.HTTPRoute{}}
+		batch := []interface{}{e}
+
+		fakeProcessor.ProcessReturns(true, &graph.Graph{})
+
+		Expect(handler.cfg.healthChecker.readyCheck(nil)).ToNot(Succeed())
+		handler.HandleEventBatch(context.Background(), batch)
+		Expect(handler.cfg.healthChecker.readyCheck(nil)).To(Succeed())
+	})
+
+	It("should set the health checker status properly when there are no changes or errors", func() {
+		e := &events.UpsertEvent{Resource: &v1beta1.HTTPRoute{}}
+		batch := []interface{}{e}
+
+		Expect(handler.cfg.healthChecker.readyCheck(nil)).ToNot(Succeed())
+		handler.HandleEventBatch(context.Background(), batch)
+		Expect(handler.cfg.healthChecker.readyCheck(nil)).To(Succeed())
+	})
+
+	It("should set the health checker status properly when there is an error", func() {
+		e := &events.UpsertEvent{Resource: &v1beta1.HTTPRoute{}}
+		batch := []interface{}{e}
+
+		fakeProcessor.ProcessReturns(true, &graph.Graph{})
+		fakeNginxRuntimeMgr.ReloadReturns(errors.New("reload error"))
+
+		handler.HandleEventBatch(context.Background(), batch)
+
+		Expect(handler.cfg.healthChecker.readyCheck(nil)).ToNot(Succeed())
+
+		// now send an update with no changes; should still return an error
+		fakeProcessor.ProcessReturns(false, &graph.Graph{})
+
+		handler.HandleEventBatch(context.Background(), batch)
+
+		Expect(handler.cfg.healthChecker.readyCheck(nil)).ToNot(Succeed())
+	})
+
 	It("should panic for an unknown event type", func() {
 		e := &struct{}{}
 
diff --git a/internal/mode/static/health.go b/internal/mode/static/health.go
index 48b84e31ce..3a764c347b 100644
--- a/internal/mode/static/health.go
+++ b/internal/mode/static/health.go
@@ -3,18 +3,36 @@ package static
 import (
 	"errors"
 	"net/http"
+	"sync"
 )
 
 type healthChecker struct {
-	ready bool
+	// firstBatchError is set when the first batch fails to configure nginx
+	// and we don't want to set ourselves as ready on the next batch if nothing changes
+	firstBatchError error
+	lock            sync.RWMutex
+	ready           bool
 }
 
 // readyCheck returns the ready-state of the Pod. It satisfies the controller-runtime Checker type.
-// We are considered ready when we have written out the initial nginx configuration and can serve traffic.
+// We are considered ready after the handler processed the first batch. In case there is NGINX configuration
+// to write, it must be written and NGINX must be reloaded successfully.
 func (h *healthChecker) readyCheck(_ *http.Request) error {
+	h.lock.RLock()
+	defer h.lock.RUnlock()
+
 	if !h.ready {
-		return errors.New("nginx has not yet been configured on startup")
+		return errors.New("nginx has not yet become ready to accept traffic")
 	}
 
 	return nil
 }
+
+// setAsReady marks the health check as ready.
+func (h *healthChecker) setAsReady() {
+	h.lock.Lock()
+	defer h.lock.Unlock()
+
+	h.ready = true
+	h.firstBatchError = nil
+}

From adaf882f554722d2f70ec15ca80c54ad5a6e2400 Mon Sep 17 00:00:00 2001
From: Saylor Berman <s.berman@f5.com>
Date: Fri, 15 Sep 2023 09:55:25 -0600
Subject: [PATCH 4/4] Enhance test

---
 internal/mode/static/handler_test.go | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/internal/mode/static/handler_test.go b/internal/mode/static/handler_test.go
index 04600ce227..007a6cca52 100644
--- a/internal/mode/static/handler_test.go
+++ b/internal/mode/static/handler_test.go
@@ -258,6 +258,14 @@ var _ = Describe("eventHandler", func() {
 		handler.HandleEventBatch(context.Background(), batch)
 
 		Expect(handler.cfg.healthChecker.readyCheck(nil)).ToNot(Succeed())
+
+		// error goes away
+		fakeProcessor.ProcessReturns(true, &graph.Graph{})
+		fakeNginxRuntimeMgr.ReloadReturns(nil)
+
+		handler.HandleEventBatch(context.Background(), batch)
+
+		Expect(handler.cfg.healthChecker.readyCheck(nil)).To(Succeed())
 	})
 
 	It("should panic for an unknown event type", func() {