From be63cb92f548c392e5eded4e0bda031461e3a22e Mon Sep 17 00:00:00 2001 From: Seth Hoenig Date: Wed, 9 Nov 2022 09:39:42 -0600 Subject: [PATCH] template: protect use of template manager with a lock This PR protects access to `templateHook.templateManager` with its lock. So far we have not been able to reproduce the panic - but it seems either Poststart is running without a Prestart being run first (should be impossible), or the Update hook is running concurrently with Poststart, nil-ing out the templateManager in a race with Poststart. Fixes #15189 --- .changelog/15192.txt | 3 +++ .../allocrunner/taskrunner/template_hook.go | 23 ++++++++++++------- 2 files changed, 18 insertions(+), 8 deletions(-) create mode 100644 .changelog/15192.txt diff --git a/.changelog/15192.txt b/.changelog/15192.txt new file mode 100644 index 00000000000..abe797edd61 --- /dev/null +++ b/.changelog/15192.txt @@ -0,0 +1,3 @@ +```release-note:bug +template: Fixed a bug where template could cause agent panic on startup +``` diff --git a/client/allocrunner/taskrunner/template_hook.go b/client/allocrunner/taskrunner/template_hook.go index 30949bac38e..593403e991d 100644 --- a/client/allocrunner/taskrunner/template_hook.go +++ b/client/allocrunner/taskrunner/template_hook.go @@ -112,11 +112,18 @@ func (h *templateHook) Prestart(ctx context.Context, req *interfaces.TaskPrestar } func (h *templateHook) Poststart(ctx context.Context, req *interfaces.TaskPoststartRequest, resp *interfaces.TaskPoststartResponse) error { + h.managerLock.Lock() + defer h.managerLock.Unlock() + + if h.templateManager == nil { + return nil + } + if req.DriverExec != nil { h.templateManager.SetDriverHandle(req.DriverExec) } else { - for _, template := range h.config.templates { - if template.ChangeMode == structs.TemplateChangeModeScript { + for _, tmpl := range h.config.templates { + if tmpl.ChangeMode == structs.TemplateChangeModeScript { return fmt.Errorf("template has change mode set to 'script' but the driver it uses does not provide exec capability") } } @@ -161,12 +168,12 @@ func (h *templateHook) Stop(ctx context.Context, req *interfaces.TaskStopRequest return nil } -// Handle new Vault token +// Update is used to handle updates to vault and/or nomad tokens. func (h *templateHook) Update(ctx context.Context, req *interfaces.TaskUpdateRequest, resp *interfaces.TaskUpdateResponse) error { h.managerLock.Lock() defer h.managerLock.Unlock() - // Nothing to do + // no template manager to manage if h.templateManager == nil { return nil } @@ -178,15 +185,15 @@ func (h *templateHook) Update(ctx context.Context, req *interfaces.TaskUpdateReq h.vaultToken = req.VaultToken } - // Shutdown the old template + // shutdown the old template h.templateManager.Stop() h.templateManager = nil - // Create the new template + // create the new template if _, err := h.newManager(); err != nil { - err := fmt.Errorf("failed to build template manager: %v", err) + err = fmt.Errorf("failed to build template manager: %v", err) h.logger.Error("failed to build template manager", "error", err) - h.config.lifecycle.Kill(context.Background(), + _ = h.config.lifecycle.Kill(context.Background(), structs.NewTaskEvent(structs.TaskKilling). SetFailsTask(). SetDisplayMessage(fmt.Sprintf("Template update %v", err)))