From 3d21c924d8b0373ae0b1d2b3a6da1cd577c8fe11 Mon Sep 17 00:00:00 2001 From: drivebyer Date: Tue, 3 Sep 2024 16:59:13 +0800 Subject: [PATCH] fix: multi controller run concurrently after leadership lost --- pkg/controller/controller.go | 4 ++++ pkg/controller/main-controller.go | 7 ++++++- 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/pkg/controller/controller.go b/pkg/controller/controller.go index 2bfc6542533..d1bb88a4e1a 100644 --- a/pkg/controller/controller.go +++ b/pkg/controller/controller.go @@ -207,6 +207,10 @@ func setupSignalHandler() (stopCh <-chan struct{}) { return stop } +func shutdown() error { + return syscall.Kill(os.Getpid(), syscall.SIGTERM) +} + // Result contains the result of a sync invocation. type Result struct { // Requeue tells the Controller to requeue the reconcile key. Defaults to false. diff --git a/pkg/controller/main-controller.go b/pkg/controller/main-controller.go index 431e5d0c842..d9ef12f45ed 100644 --- a/pkg/controller/main-controller.go +++ b/pkg/controller/main-controller.go @@ -492,7 +492,7 @@ func leaderRun(ctx context.Context, c *Controller, threadiness int, stopCh <-cha for { select { case oerr := <-notificationChannel: - if !errors.Is(oerr.Err, http.ErrServerClosed) { + if oerr != nil && !errors.Is(oerr.Err, http.ErrServerClosed) { klog.Errorf("STS API Server stopped: %v, going to restart", oerr.Err) go c.startSTSAPIServer(ctx, notificationChannel) } @@ -586,6 +586,11 @@ func (c *Controller) Start(threadiness int, stopCh <-chan struct{}) error { OnStoppedLeading: func() { // we can do cleanup here klog.Infof("leader lost: %s", c.podName) + // When we lose leadership, we should do cleanup, such as stop the controller. + if err := shutdown(); err != nil { + klog.Errorf("error shutting down: %v", err) + } + }, OnNewLeader: func(identity string) { // we're notified when new leader elected