From b41002eb86d4843e6de76fbde051f6df43ac049a Mon Sep 17 00:00:00 2001 From: hc-github-team-nomad-core <82989552+hc-github-team-nomad-core@users.noreply.github.com> Date: Wed, 24 Jul 2024 15:17:14 +0200 Subject: [PATCH] Backport of template: allow change_mode script to run after client restart into release/1.8.x (#23676) Co-authored-by: Tim Gross --- .changelog/23663.txt | 3 + .../taskrunner/task_runner_hooks.go | 1 + .../taskrunner/template/template.go | 2 +- .../allocrunner/taskrunner/template_hook.go | 13 +++- .../taskrunner/template_hook_test.go | 69 +++++++++++++++++++ 5 files changed, 85 insertions(+), 3 deletions(-) create mode 100644 .changelog/23663.txt diff --git a/.changelog/23663.txt b/.changelog/23663.txt new file mode 100644 index 00000000000..99452f34563 --- /dev/null +++ b/.changelog/23663.txt @@ -0,0 +1,3 @@ +```release-note:bug +template: Fixed a bug where change_mode = "script" would not execute after a client restart +``` diff --git a/client/allocrunner/taskrunner/task_runner_hooks.go b/client/allocrunner/taskrunner/task_runner_hooks.go index b0fdcea1a4d..5b5f64708b3 100644 --- a/client/allocrunner/taskrunner/task_runner_hooks.go +++ b/client/allocrunner/taskrunner/task_runner_hooks.go @@ -126,6 +126,7 @@ func (tr *TaskRunner) initHooks() { consulNamespace: consulNamespace, nomadNamespace: tr.alloc.Job.Namespace, renderOnTaskRestart: task.RestartPolicy.RenderTemplates, + driverHandle: tr.handle, })) } diff --git a/client/allocrunner/taskrunner/template/template.go b/client/allocrunner/taskrunner/template/template.go index a5cd317f96b..f329ee0121f 100644 --- a/client/allocrunner/taskrunner/template/template.go +++ b/client/allocrunner/taskrunner/template/template.go @@ -585,7 +585,7 @@ func (tm *TaskTemplateManager) processScript(script *structs.ChangeScript, wg *s if tm.handle == nil { failureMsg := fmt.Sprintf( - "Template failed to run script %v with arguments %v because task driver doesn't support the exec operation", + "Template failed to run script %v with arguments %v because task driver handle is not available", script.Command, script.Args, ) diff --git a/client/allocrunner/taskrunner/template_hook.go b/client/allocrunner/taskrunner/template_hook.go index 7aaa6fb7d87..c61a8503261 100644 --- a/client/allocrunner/taskrunner/template_hook.go +++ b/client/allocrunner/taskrunner/template_hook.go @@ -55,6 +55,11 @@ type templateHookConfig struct { // hookResources are used to fetch Consul tokens hookResources *cstructs.AllocHookResources + + // driverHandle is the task driver executor used to run scripts when the + // template change mode is set to script. Typically this will be nil in this + // config struct, unless we're restoring a task after a client restart. + driverHandle ti.ScriptExecutor } type templateHook struct { @@ -68,7 +73,10 @@ type templateHook struct { managerLock sync.Mutex // driverHandle is the task driver executor used by the template manager to - // run scripts when the template change mode is set to script. + // run scripts when the template change mode is set to script. This value is + // set in the Poststart hook after the task has run, or passed in as + // configuration if this is a task that's being restored after a client + // restart. // // Must obtain a managerLock before changing. It may be nil. driverHandle ti.ScriptExecutor @@ -105,6 +113,7 @@ func newTemplateHook(config *templateHookConfig) *templateHook { config: config, consulNamespace: config.consulNamespace, logger: config.logger.Named(templateHookName), + driverHandle: config.driverHandle, } } @@ -206,7 +215,7 @@ func (h *templateHook) Poststart(_ context.Context, req *interfaces.TaskPoststar } else { for _, tmpl := range h.config.templates { if tmpl.ChangeMode == structs.TemplateChangeModeScript { - return fmt.Errorf("template has change mode set to 'script' but the driver it uses does not provide exec capability") + return fmt.Errorf("template has change mode set to 'script' but task driver handle is not available") } } } diff --git a/client/allocrunner/taskrunner/template_hook_test.go b/client/allocrunner/taskrunner/template_hook_test.go index 12ff54467c7..94f6c7e3c63 100644 --- a/client/allocrunner/taskrunner/template_hook_test.go +++ b/client/allocrunner/taskrunner/template_hook_test.go @@ -8,7 +8,9 @@ import ( "fmt" "net/http" "net/http/httptest" + "os" "path" + "path/filepath" "sync" "testing" "time" @@ -269,3 +271,70 @@ func Test_templateHook_Prestart_Vault(t *testing.T) { }) } } + +// TestTemplateHook_RestoreChangeModeScript exercises change_mode=script +// behavior for a task restored after a client restart +func TestTemplateHook_RestoreChangeModeScript(t *testing.T) { + + logger := testlog.HCLogger(t) + tmpDir := t.TempDir() + destPath := filepath.Join(tmpDir, "foo.txt") + must.NoError(t, os.WriteFile(destPath, []byte("original-content"), 0755)) + + clientConfig := config.DefaultConfig() + clientConfig.TemplateConfig.DisableSandbox = true + + alloc := mock.BatchAlloc() + task := alloc.Job.TaskGroups[0].Tasks[0] + envBuilder := taskenv.NewBuilder(mock.Node(), alloc, task, clientConfig.Region) + + lifecycle := trtesting.NewMockTaskHooks() + lifecycle.HasHandle = true + + events := &trtesting.MockEmitter{} + + executor := &simpleExec{ + code: 117, + err: fmt.Errorf("oh no"), + } + + hook := newTemplateHook(&templateHookConfig{ + alloc: alloc, + logger: logger, + lifecycle: lifecycle, + events: events, + templates: []*structs.Template{{ + DestPath: destPath, + EmbeddedTmpl: "changed-content", + ChangeMode: structs.TemplateChangeModeScript, + ChangeScript: &structs.ChangeScript{ + Command: "echo", + Args: []string{"foo"}, + }, + }}, + clientConfig: clientConfig, + envBuilder: envBuilder, + hookResources: &cstructs.AllocHookResources{}, + driverHandle: executor, + }) + req := &interfaces.TaskPrestartRequest{ + Alloc: alloc, + Task: task, + TaskDir: &allocdir.TaskDir{Dir: tmpDir}, + } + + must.NoError(t, hook.Prestart(context.TODO(), req, nil)) + + // self-test the test by making sure we really changed the template file + out, err := os.ReadFile(destPath) + must.NoError(t, err) + must.Eq(t, "changed-content", string(out)) + + // verify our change script executed + gotEvents := events.Events() + must.Len(t, 1, gotEvents) + must.Eq(t, structs.TaskHookFailed, gotEvents[0].Type) + must.Eq(t, "Template failed to run script echo with arguments [foo] on change: oh no Exit code: 117", + gotEvents[0].DisplayMessage) + +}