kubernetes · motoki317 · Nov 20, 2024 · Nov 20, 2024 · Nov 20, 2024 · Nov 21, 2024
diff --git a/build/build.sh b/build/build.sh
@@ -57,13 +57,3 @@ ${GO_BUILD_CMD} \
   -X ${PKG}/version.REPO=${REPO_INFO}" \
   -buildvcs=false \
   -o "${TARGETS_DIR}/dbg" "${PKG}/cmd/dbg"
-
-echo "Building ${PKG}/cmd/waitshutdown"
-
-${GO_BUILD_CMD} \
-  -trimpath -ldflags="-buildid= -w -s \
-  -X ${PKG}/version.RELEASE=${TAG} \
-  -X ${PKG}/version.COMMIT=${COMMIT_SHA} \
-  -X ${PKG}/version.REPO=${REPO_INFO}" \
-  -buildvcs=false \
-  -o "${TARGETS_DIR}/wait-shutdown" "${PKG}/cmd/waitshutdown"
diff --git a/charts/ingress-nginx/README.md b/charts/ingress-nginx/README.md
@@ -372,7 +372,7 @@ metadata:
 | controller.keda.triggers | list | `[]` |  |
 | controller.kind | string | `"Deployment"` | Use a `DaemonSet` or `Deployment` |
 | controller.labels | object | `{}` | Labels to be added to the controller Deployment or DaemonSet and other resources that do not have option to specify labels # |
-| controller.lifecycle | object | `{"preStop":{"exec":{"command":["/wait-shutdown"]}}}` | Improve connection draining when ingress controller pod is deleted using a lifecycle hook: With this new hook, we increased the default terminationGracePeriodSeconds from 30 seconds to 300, allowing the draining of connections up to five minutes. If the active connections end before that, the pod will terminate gracefully at that time. To effectively take advantage of this feature, the Configmap feature worker-shutdown-timeout new value is 240s instead of 10s. # |
+| controller.lifecycle | object | `{}` |  |
 | controller.livenessProbe.failureThreshold | int | `5` |  |
 | controller.livenessProbe.httpGet.path | string | `"/healthz"` |  |
 | controller.livenessProbe.httpGet.port | int | `10254` |  |

diff --git a/charts/ingress-nginx/values.yaml b/charts/ingress-nginx/values.yaml
@@ -945,18 +945,7 @@ controller:
       #   annotations:
       #     description: Too many 4XXs
       #     summary: More than 5% of all requests returned 4XX, this requires your attention
-  # -- Improve connection draining when ingress controller pod is deleted using a lifecycle hook:
-  # With this new hook, we increased the default terminationGracePeriodSeconds from 30 seconds
-  # to 300, allowing the draining of connections up to five minutes.
-  # If the active connections end before that, the pod will terminate gracefully at that time.
-  # To effectively take advantage of this feature, the Configmap feature
-  # worker-shutdown-timeout new value is 240s instead of 10s.
-  ##
-  lifecycle:
-    preStop:
-      exec:
-        command:
-          - /wait-shutdown
+  lifecycle: {}
   priorityClassName: ""
 # -- Rollback limit
 ##

diff --git a/cmd/waitshutdown/main.go b/cmd/waitshutdown/main.go
diff --git a/internal/ingress/controller/nginx.go b/internal/ingress/controller/nginx.go
@@ -37,6 +37,8 @@ import (
 	"time"
 	"unicode"
 
+	"k8s.io/ingress-nginx/internal/ingress/metric/collectors"
+
 	proxyproto "github.com/armon/go-proxyproto"
 	"github.com/eapache/channels"
 	apiv1 "k8s.io/api/core/v1"
@@ -377,6 +379,66 @@ func (n *NGINXController) Start() {
 	}
 }
 
+// stopWait waits until no more connections are made to nginx.
+//
+// This waits until all of following conditions are met:
+//   - No more requests are made to nginx for the last 5 seconds.
+//   - 'shutdown-grace-period' seconds have passed after calling this method.
+//
+// Pods in Kubernetes endpoints are expected to shut-down 'gracefully' after receiving SIGTERM -
+// we should keep accepting new connections for a while. This is because Kubernetes updates Service endpoints
+// and sends SIGTERM to pods *in parallel*.
+// If we don't see new requests for 5 seconds, then we assume that this pod was removed from the upstream endpoints
+// (AWS ALB endpoints for example), and proceed with shutdown.
+//
+// See https://github.com/kubernetes/kubernetes/issues/106476 for more detail on this issue.
+func (n *NGINXController) stopWait() {
+	const checkFrequency = time.Second
+	const waitUntilNoConnectionsFor = int((5 * time.Second) / checkFrequency)
+	waitAtLeastUntil := time.Now().Add(time.Duration(n.cfg.ShutdownGracePeriod) * time.Second)
+
+	var scraper collectors.NginxStatusScraper
+	lastRequests := 0
+	noChangeTimes := 0
+	failures := 0
+	const failureThreshold = 5
+
+	for ; ; time.Sleep(checkFrequency) {
+		st, err := scraper.Scrape()
+		if err != nil {
+			klog.Warningf("failed to scrape nginx status: %v", err)
+			failures++
+			if failures >= failureThreshold {
+				klog.Warningf("giving up graceful shutdown: too many nginx status scrape failures")
+				break
+			}
+			continue
+		}
+
+		diff := st.Requests - lastRequests
+		// We assume that there were no client requests to nginx, if and only if
+		// there were 0 to 1 increase in handled requests from the last scrape -
+		// 1 is to account for our own stub_status request from this method.
+		noChange := 0 <= diff && diff <= 1
+		if noChange {
+			noChangeTimes++
+			if noChangeTimes >= waitUntilNoConnectionsFor {
+				// Safe to proceed shutdown, we are seeing no more client request.
+				break
+			}
+		} else {
+			noChangeTimes = 0
+		}
+		lastRequests = st.Requests
+	}
+
+	// Wait at least for the configured duration, if any
+	delay := time.Until(waitAtLeastUntil)
+	if delay > 0 {
+		time.Sleep(delay)
+	}
+}
+
 // Stop gracefully stops the NGINX master process.
 func (n *NGINXController) Stop() error {
 	n.isShuttingDown = true
@@ -388,7 +450,8 @@ func (n *NGINXController) Stop() error {
 		return fmt.Errorf("shutdown already in progress")
 	}
 
-	time.Sleep(time.Duration(n.cfg.ShutdownGracePeriod) * time.Second)
+	klog.InfoS("Graceful shutdown - waiting until no more requests are made")
+	n.stopWait()
 
 	klog.InfoS("Shutting down controller queues")
 	close(n.stopCh)

diff --git a/internal/ingress/metric/collectors/nginx_status.go b/internal/ingress/metric/collectors/nginx_status.go
@@ -17,7 +17,7 @@ limitations under the License.
 package collectors
 
 import (
-	"log"
+	"fmt"
 	"regexp"
 	"strconv"
 
@@ -35,7 +35,10 @@ var (
 )
 
 type (
+	NginxStatusScraper struct{}
+
 	nginxStatusCollector struct {
+		scraper    NginxStatusScraper
 		scrapeChan chan scrapeRequest
 
 		data *nginxStatusData
@@ -47,7 +50,7 @@ type (
 		connections      *prometheus.Desc
 	}
 
-	basicStatus struct {
+	NginxStubStatus struct {
 		// Active total number of active connections
 		Active int
 		// Accepted total number of accepted client connections
@@ -65,6 +68,49 @@ type (
 	}
 )
 
+func toInt(data []string, pos int) int {
+	if len(data) == 0 {
+		return 0
+	}
+	if pos > len(data) {
+		return 0
+	}
+	if v, err := strconv.Atoi(data[pos]); err == nil {
+		return v
+	}
+	return 0
+}
+
+func parse(data string) *NginxStubStatus {
+	acr := ac.FindStringSubmatch(data)
+	sahrr := sahr.FindStringSubmatch(data)
+	readingr := reading.FindStringSubmatch(data)
+	writingr := writing.FindStringSubmatch(data)
+	waitingr := waiting.FindStringSubmatch(data)
+
+	return &NginxStubStatus{
+		toInt(acr, 1),
+		toInt(sahrr, 1),
+		toInt(sahrr, 2),
+		toInt(sahrr, 3),
+		toInt(readingr, 1),
+		toInt(writingr, 1),
+		toInt(waitingr, 1),
+	}
+}
+
+func (s *NginxStatusScraper) Scrape() (*NginxStubStatus, error) {
+	klog.V(3).InfoS("starting scraping socket", "path", nginx.StatusPath)
+	status, data, err := nginx.NewGetStatusRequest(nginx.StatusPath)
+	if err != nil {
+		return nil, fmt.Errorf("obtaining nginx status info: %w", err)
+	}
+	if status < 200 || status >= 400 {
+		return nil, fmt.Errorf("obtaining nginx status info (status %v)", status)
+	}
+	return parse(string(data)), nil
+}
+
 // NGINXStatusCollector defines a status collector interface
 type NGINXStatusCollector interface {
 	prometheus.Collector
@@ -131,54 +177,14 @@ func (p nginxStatusCollector) Stop() {
 	close(p.scrapeChan)
 }
 
-func toInt(data []string, pos int) int {
-	if len(data) == 0 {
-		return 0
-	}
-	if pos > len(data) {
-		return 0
-	}
-	if v, err := strconv.Atoi(data[pos]); err == nil {
-		return v
-	}
-	return 0
-}
-
-func parse(data string) *basicStatus {
-	acr := ac.FindStringSubmatch(data)
-	sahrr := sahr.FindStringSubmatch(data)
-	readingr := reading.FindStringSubmatch(data)
-	writingr := writing.FindStringSubmatch(data)
-	waitingr := waiting.FindStringSubmatch(data)
-
-	return &basicStatus{
-		toInt(acr, 1),
-		toInt(sahrr, 1),
-		toInt(sahrr, 2),
-		toInt(sahrr, 3),
-		toInt(readingr, 1),
-		toInt(writingr, 1),
-		toInt(waitingr, 1),
-	}
-}
-
 // nginxStatusCollector scrape the nginx status
 func (p nginxStatusCollector) scrape(ch chan<- prometheus.Metric) {
-	klog.V(3).InfoS("starting scraping socket", "path", nginx.StatusPath)
-	status, data, err := nginx.NewGetStatusRequest(nginx.StatusPath)
+	s, err := p.scraper.Scrape()
 	if err != nil {
-		log.Printf("%v", err)
-		klog.Warningf("unexpected error obtaining nginx status info: %v", err)
+		klog.Warningf("failed to scrape nginx status: %v", err)
 		return
 	}
 
-	if status < 200 || status >= 400 {
-		klog.Warningf("unexpected error obtaining nginx status info (status %v)", status)
-		return
-	}
-
-	s := parse(string(data))
-
 	ch <- prometheus.MustNewConstMetric(p.data.connectionsTotal,
 		prometheus.CounterValue, float64(s.Accepted), "accepted")
 	ch <- prometheus.MustNewConstMetric(p.data.connectionsTotal,

diff --git a/pkg/flags/flags.go b/pkg/flags/flags.go
@@ -222,9 +222,11 @@ Takes the form "<host>:port". If not provided, no admission controller is starte
 
 		statusUpdateInterval = flags.Int("status-update-interval", status.UpdateInterval, "Time interval in seconds in which the status should check if an update is required. Default is 60 seconds")
 
-		shutdownGracePeriod = flags.Int("shutdown-grace-period", 0, "Seconds to wait after receiving the shutdown signal, before stopping the nginx process.")
+		shutdownGracePeriod = flags.Int("shutdown-grace-period", 10, "Seconds to wait after receiving the shutdown signal, before stopping the nginx process.")
 
-		postShutdownGracePeriod = flags.Int("post-shutdown-grace-period", 10, "Seconds to wait after the nginx process has stopped before controller exits.")
+		postShutdownGracePeriod = flags.Int("post-shutdown-grace-period", 0, `[IN DEPRECATION] Seconds to wait after the nginx process has stopped before controller exits.
+Note that increasing this value doesn't seem to contribute to graceful shutdown of the ingress controller.
+If you would like to configure period for accepting requests before shutting down, use 'shutdown-grace-period' instead.'`)
 
 		deepInspector = flags.Bool("deep-inspect", true, "Enables ingress object security deep inspector")
 

diff --git a/pkg/util/process/sigterm.go b/pkg/util/process/sigterm.go
@@ -22,7 +22,7 @@ import (
 	"syscall"
 	"time"
 
-	klog "k8s.io/klog/v2"
+	"k8s.io/klog/v2"
 )
 
 type exiter func(code int)
@@ -41,8 +41,11 @@ func HandleSigterm(ngx Controller, delay int, exit exiter) {
 		exitCode = 1
 	}
 
-	klog.Infof("Handled quit, delaying controller exit for %d seconds", delay)
-	time.Sleep(time.Duration(delay) * time.Second)
+	if delay > 0 {
+		klog.Warningf("[DEPRECATED] Delaying controller exit for %d seconds", delay)
+		klog.Warning("[DEPRECATED] 'post-shutdown-grace-period' does not have any effect for graceful shutdown - use 'shutdown-grace-period' flag instead.")
+		time.Sleep(time.Duration(delay) * time.Second)
+	}
 
 	klog.InfoS("Exiting", "code", exitCode)
 	exit(exitCode)

diff --git a/rootfs/Dockerfile b/rootfs/Dockerfile
@@ -43,7 +43,6 @@ COPY --chown=www-data:www-data etc /etc
 
 COPY --chown=www-data:www-data bin/${TARGETARCH}/dbg /
 COPY --chown=www-data:www-data bin/${TARGETARCH}/nginx-ingress-controller /
-COPY --chown=www-data:www-data bin/${TARGETARCH}/wait-shutdown /
 
 # Fix permission during the build to avoid issues at runtime
 # with volumes (custom templates)

diff --git a/rootfs/Dockerfile-chroot b/rootfs/Dockerfile-chroot
@@ -65,7 +65,6 @@ COPY --chown=www-data:www-data etc /chroot/etc
 
 COPY --chown=www-data:www-data bin/${TARGETARCH}/dbg /
 COPY --chown=www-data:www-data bin/${TARGETARCH}/nginx-ingress-controller /
-COPY --chown=www-data:www-data bin/${TARGETARCH}/wait-shutdown /
 COPY --chown=www-data:www-data nginx-chroot-wrapper.sh /usr/bin/nginx
 
 WORKDIR /chroot/etc/nginx

diff --git a/test/e2e/framework/deployment.go b/test/e2e/framework/deployment.go
@@ -624,6 +624,10 @@ func (f *Framework) ScaleDeploymentToZero(name string) {
 	assert.Nil(ginkgo.GinkgoT(), err, "getting deployment")
 	assert.NotNil(ginkgo.GinkgoT(), d, "expected a deployment but none returned")
 
+	err = waitForPodsDeleted(f.KubeClientSet, 2*time.Minute, f.Namespace, &metav1.ListOptions{
+		LabelSelector: labelSelectorToString(d.Spec.Selector.MatchLabels),
+	})
+	assert.Nil(ginkgo.GinkgoT(), err, "waiting for no pods")
 	err = WaitForEndpoints(f.KubeClientSet, DefaultTimeout, name, f.Namespace, 0)
 	assert.Nil(ginkgo.GinkgoT(), err, "waiting for no endpoints")
 }

diff --git a/test/e2e/gracefulshutdown/grace_period.go b/test/e2e/gracefulshutdown/grace_period.go
@@ -41,16 +41,14 @@ var _ = framework.IngressNginxDescribe("[Shutdown] Grace period shutdown", func(
 				if strings.Contains(v, "--shutdown-grace-period") {
 					continue
 				}
-
 				args = append(args, v)
 			}
-
 			args = append(args, "--shutdown-grace-period=90")
 			deployment.Spec.Template.Spec.Containers[0].Args = args
-			cmds := []string{"/wait-shutdown"}
-			deployment.Spec.Template.Spec.Containers[0].Lifecycle.PreStop.Exec.Command = cmds
+
 			grace := int64(3600)
 			deployment.Spec.Template.Spec.TerminationGracePeriodSeconds = &grace
+
 			_, err := f.KubeClientSet.AppsV1().Deployments(f.Namespace).Update(context.TODO(), deployment, metav1.UpdateOptions{})
 			return err
 		})