From 9e24c025f9a8ede0153ec9065e2545da54946239 Mon Sep 17 00:00:00 2001 From: Steven Danna Date: Thu, 9 Feb 2023 12:06:29 +0000 Subject: [PATCH] server: avoid leaking tenant stopper We've seen at least one stopper leak during a stress run. I was not able to reproduce it locally, but from the code it seems possible that we are leaking it here if something like this happens. 1. serverController's scanner ticks and starts scanning for expected tenants. 2. The server's stopper stops. 3. The tenant stopper for some tenant, foo, is also stopped in response. This tenant is stopped and removed from the running tenant entries. 4. Back in the serverController, we see that tenant foo isn't started. We try to start it. 5. We make a new stopper, but can't run an async task, so we exit with an error, leaving the stopper. Epic: none Release note: None --- pkg/server/server_controller_orchestration.go | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pkg/server/server_controller_orchestration.go b/pkg/server/server_controller_orchestration.go index b07f8d6f15a4..e8f2f619b8d3 100644 --- a/pkg/server/server_controller_orchestration.go +++ b/pkg/server/server_controller_orchestration.go @@ -211,6 +211,7 @@ func (c *serverController) startControlledServer( tenantCtx := logtags.AddTag(context.Background(), "tenant-orchestration", nil) tenantStopper := stop.NewStopper() + // Ensure that if the surrounding server requests shutdown, we // propagate it to the new server. if err := c.stopper.RunAsyncTask(ctx, "propagate-close", func(ctx context.Context) { @@ -233,6 +234,10 @@ func (c *serverController) startControlledServer( tenantStopper.Stop(tenantCtx) } }); err != nil { + // The goroutine above is responsible for stopping the + // tenantStopper. If it fails to stop, we stop it here + // to avoid leaking the stopper. + tenantStopper.Stop(ctx) return nil, err }