From e5af7f5ceaf4e75d991e802e8c9de8669d092105 Mon Sep 17 00:00:00 2001 From: Ripolin Date: Wed, 4 Oct 2023 15:51:23 +0200 Subject: [PATCH] Add WaitForReady flag to check container readiness state before exec a hook Signed-off-by: Ripolin --- changelogs/unreleased/6918-Ripolin | 1 + internal/hook/item_hook_handler.go | 28 +++- internal/hook/item_hook_handler_test.go | 154 +++++++++++-------- internal/hook/wait_exec_hook_handler.go | 17 +- internal/hook/wait_exec_hook_handler_test.go | 52 ++++++- pkg/apis/velero/v1/restore_types.go | 5 + site/content/docs/main/restore-hooks.md | 2 + 7 files changed, 182 insertions(+), 77 deletions(-) create mode 100644 changelogs/unreleased/6918-Ripolin diff --git a/changelogs/unreleased/6918-Ripolin b/changelogs/unreleased/6918-Ripolin new file mode 100644 index 0000000000..6bd21f8dfa --- /dev/null +++ b/changelogs/unreleased/6918-Ripolin @@ -0,0 +1 @@ +Optional check if targeted container is ready before executing a hook \ No newline at end of file diff --git a/internal/hook/item_hook_handler.go b/internal/hook/item_hook_handler.go index fc97541140..38c982c550 100644 --- a/internal/hook/item_hook_handler.go +++ b/internal/hook/item_hook_handler.go @@ -19,6 +19,7 @@ package hook import ( "encoding/json" "fmt" + "strconv" "strings" "time" @@ -37,6 +38,7 @@ import ( "github.com/vmware-tanzu/velero/pkg/kuberesource" "github.com/vmware-tanzu/velero/pkg/podexec" "github.com/vmware-tanzu/velero/pkg/restorehelper" + "github.com/vmware-tanzu/velero/pkg/util/boolptr" "github.com/vmware-tanzu/velero/pkg/util/collections" "github.com/vmware-tanzu/velero/pkg/util/kube" ) @@ -61,6 +63,7 @@ const ( podRestoreHookOnErrorAnnotationKey = "post.hook.restore.velero.io/on-error" podRestoreHookTimeoutAnnotationKey = "post.hook.restore.velero.io/exec-timeout" podRestoreHookWaitTimeoutAnnotationKey = "post.hook.restore.velero.io/wait-timeout" + podRestoreHookWaitForReadyAnnotationKey = "post.hook.restore.velero.io/wait-for-ready" podRestoreHookInitContainerImageAnnotationKey = "init.hook.restore.velero.io/container-image" podRestoreHookInitContainerNameAnnotationKey = "init.hook.restore.velero.io/container-name" podRestoreHookInitContainerCommandAnnotationKey = "init.hook.restore.velero.io/command" @@ -477,12 +480,23 @@ func getPodExecRestoreHookFromAnnotations(annotations map[string]string, log log } } + waitForReadyString := annotations[podRestoreHookWaitForReadyAnnotationKey] + waitForReady := boolptr.False() + if waitForReadyString != "" { + var err error + *waitForReady, err = strconv.ParseBool(waitForReadyString) + if err != nil { + log.Warn(errors.Wrapf(err, "Unable to parse wait for ready %s, ignoring", waitForReadyString)) + } + } + return &velerov1api.ExecRestoreHook{ - Container: container, - Command: parseStringToCommand(commandValue), - OnError: onError, - ExecTimeout: metav1.Duration{Duration: execTimeout}, - WaitTimeout: metav1.Duration{Duration: waitTimeout}, + Container: container, + Command: parseStringToCommand(commandValue), + OnError: onError, + ExecTimeout: metav1.Duration{Duration: execTimeout}, + WaitTimeout: metav1.Duration{Duration: waitTimeout}, + WaitForReady: waitForReady, } } @@ -542,6 +556,10 @@ func GroupRestoreExecHooks( Hook: *rh.Exec, HookSource: "backupSpec", } + // default to false if attr WaitForReady not set + if named.Hook.WaitForReady == nil { + named.Hook.WaitForReady = boolptr.False() + } // default to first container in pod if unset, without mutating resource restore hook if named.Hook.Container == "" { named.Hook.Container = pod.Spec.Containers[0].Name diff --git a/internal/hook/item_hook_handler_test.go b/internal/hook/item_hook_handler_test.go index 8ca9f5c9a1..0912b1bdd2 100644 --- a/internal/hook/item_hook_handler_test.go +++ b/internal/hook/item_hook_handler_test.go @@ -36,6 +36,7 @@ import ( "github.com/vmware-tanzu/velero/pkg/builder" "github.com/vmware-tanzu/velero/pkg/kuberesource" velerotest "github.com/vmware-tanzu/velero/pkg/test" + "github.com/vmware-tanzu/velero/pkg/util/boolptr" "github.com/vmware-tanzu/velero/pkg/util/collections" ) @@ -724,7 +725,8 @@ func TestGetPodExecRestoreHookFromAnnotations(t *testing.T) { podRestoreHookCommandAnnotationKey: "/usr/bin/foo", }, expected: &velerov1api.ExecRestoreHook{ - Command: []string{"/usr/bin/foo"}, + Command: []string{"/usr/bin/foo"}, + WaitForReady: boolptr.False(), }, }, { @@ -733,7 +735,8 @@ func TestGetPodExecRestoreHookFromAnnotations(t *testing.T) { podRestoreHookCommandAnnotationKey: `["a","b","c"]`, }, expected: &velerov1api.ExecRestoreHook{ - Command: []string{"a", "b", "c"}, + Command: []string{"a", "b", "c"}, + WaitForReady: boolptr.False(), }, }, { @@ -743,8 +746,9 @@ func TestGetPodExecRestoreHookFromAnnotations(t *testing.T) { podRestoreHookOnErrorAnnotationKey: string(velerov1api.HookErrorModeContinue), }, expected: &velerov1api.ExecRestoreHook{ - Command: []string{"/usr/bin/foo"}, - OnError: velerov1api.HookErrorModeContinue, + Command: []string{"/usr/bin/foo"}, + OnError: velerov1api.HookErrorModeContinue, + WaitForReady: boolptr.False(), }, }, { @@ -754,8 +758,9 @@ func TestGetPodExecRestoreHookFromAnnotations(t *testing.T) { podRestoreHookOnErrorAnnotationKey: string(velerov1api.HookErrorModeFail), }, expected: &velerov1api.ExecRestoreHook{ - Command: []string{"/usr/bin/foo"}, - OnError: velerov1api.HookErrorModeFail, + Command: []string{"/usr/bin/foo"}, + OnError: velerov1api.HookErrorModeFail, + WaitForReady: boolptr.False(), }, }, { @@ -766,9 +771,10 @@ func TestGetPodExecRestoreHookFromAnnotations(t *testing.T) { podRestoreHookWaitTimeoutAnnotationKey: "1h", }, expected: &velerov1api.ExecRestoreHook{ - Command: []string{"/usr/bin/foo"}, - ExecTimeout: metav1.Duration{Duration: 45 * time.Second}, - WaitTimeout: metav1.Duration{Duration: time.Hour}, + Command: []string{"/usr/bin/foo"}, + ExecTimeout: metav1.Duration{Duration: 45 * time.Second}, + WaitTimeout: metav1.Duration{Duration: time.Hour}, + WaitForReady: boolptr.False(), }, }, { @@ -778,8 +784,9 @@ func TestGetPodExecRestoreHookFromAnnotations(t *testing.T) { podRestoreHookContainerAnnotationKey: "my-app", }, expected: &velerov1api.ExecRestoreHook{ - Command: []string{"/usr/bin/foo"}, - Container: "my-app", + Command: []string{"/usr/bin/foo"}, + Container: "my-app", + WaitForReady: boolptr.False(), }, }, { @@ -790,9 +797,10 @@ func TestGetPodExecRestoreHookFromAnnotations(t *testing.T) { podRestoreHookTimeoutAnnotationKey: "none", }, expected: &velerov1api.ExecRestoreHook{ - Command: []string{"/usr/bin/foo"}, - Container: "my-app", - ExecTimeout: metav1.Duration{Duration: 0}, + Command: []string{"/usr/bin/foo"}, + Container: "my-app", + ExecTimeout: metav1.Duration{Duration: 0}, + WaitForReady: boolptr.False(), }, }, { @@ -803,9 +811,10 @@ func TestGetPodExecRestoreHookFromAnnotations(t *testing.T) { podRestoreHookWaitTimeoutAnnotationKey: "none", }, expected: &velerov1api.ExecRestoreHook{ - Command: []string{"/usr/bin/foo"}, - Container: "my-app", - ExecTimeout: metav1.Duration{Duration: 0}, + Command: []string{"/usr/bin/foo"}, + Container: "my-app", + ExecTimeout: metav1.Duration{Duration: 0}, + WaitForReady: boolptr.False(), }, }, } @@ -842,6 +851,7 @@ func TestGroupRestoreExecHooks(t *testing.T) { podRestoreHookOnErrorAnnotationKey, string(velerov1api.HookErrorModeContinue), podRestoreHookTimeoutAnnotationKey, "1s", podRestoreHookWaitTimeoutAnnotationKey, "1m", + podRestoreHookWaitForReadyAnnotationKey, "true", )). Containers(&corev1api.Container{ Name: "container1", @@ -853,11 +863,12 @@ func TestGroupRestoreExecHooks(t *testing.T) { HookName: "", HookSource: "annotation", Hook: velerov1api.ExecRestoreHook{ - Container: "container1", - Command: []string{"/usr/bin/foo"}, - OnError: velerov1api.HookErrorModeContinue, - ExecTimeout: metav1.Duration{Duration: time.Second}, - WaitTimeout: metav1.Duration{Duration: time.Minute}, + Container: "container1", + Command: []string{"/usr/bin/foo"}, + OnError: velerov1api.HookErrorModeContinue, + ExecTimeout: metav1.Duration{Duration: time.Second}, + WaitTimeout: metav1.Duration{Duration: time.Minute}, + WaitForReady: boolptr.True(), }, }, }, @@ -883,11 +894,12 @@ func TestGroupRestoreExecHooks(t *testing.T) { HookName: "", HookSource: "annotation", Hook: velerov1api.ExecRestoreHook{ - Container: "container1", - Command: []string{"/usr/bin/foo"}, - OnError: velerov1api.HookErrorModeContinue, - ExecTimeout: metav1.Duration{Duration: time.Second}, - WaitTimeout: metav1.Duration{Duration: time.Minute}, + Container: "container1", + Command: []string{"/usr/bin/foo"}, + OnError: velerov1api.HookErrorModeContinue, + ExecTimeout: metav1.Duration{Duration: time.Second}, + WaitTimeout: metav1.Duration{Duration: time.Minute}, + WaitForReady: boolptr.False(), }, }, }, @@ -923,11 +935,12 @@ func TestGroupRestoreExecHooks(t *testing.T) { HookName: "hook1", HookSource: "backupSpec", Hook: velerov1api.ExecRestoreHook{ - Container: "container1", - Command: []string{"/usr/bin/foo"}, - OnError: velerov1api.HookErrorModeContinue, - ExecTimeout: metav1.Duration{Duration: time.Second}, - WaitTimeout: metav1.Duration{Duration: time.Minute}, + Container: "container1", + Command: []string{"/usr/bin/foo"}, + OnError: velerov1api.HookErrorModeContinue, + ExecTimeout: metav1.Duration{Duration: time.Second}, + WaitTimeout: metav1.Duration{Duration: time.Minute}, + WaitForReady: boolptr.False(), }, }, }, @@ -962,11 +975,12 @@ func TestGroupRestoreExecHooks(t *testing.T) { HookName: "hook1", HookSource: "backupSpec", Hook: velerov1api.ExecRestoreHook{ - Container: "container1", - Command: []string{"/usr/bin/foo"}, - OnError: velerov1api.HookErrorModeContinue, - ExecTimeout: metav1.Duration{Duration: time.Second}, - WaitTimeout: metav1.Duration{Duration: time.Minute}, + Container: "container1", + Command: []string{"/usr/bin/foo"}, + OnError: velerov1api.HookErrorModeContinue, + ExecTimeout: metav1.Duration{Duration: time.Second}, + WaitTimeout: metav1.Duration{Duration: time.Minute}, + WaitForReady: boolptr.False(), }, }, }, @@ -1009,11 +1023,12 @@ func TestGroupRestoreExecHooks(t *testing.T) { HookName: "", HookSource: "annotation", Hook: velerov1api.ExecRestoreHook{ - Container: "container1", - Command: []string{"/usr/bin/foo"}, - OnError: velerov1api.HookErrorModeContinue, - ExecTimeout: metav1.Duration{Duration: time.Second}, - WaitTimeout: metav1.Duration{Duration: time.Minute}, + Container: "container1", + Command: []string{"/usr/bin/foo"}, + OnError: velerov1api.HookErrorModeContinue, + ExecTimeout: metav1.Duration{Duration: time.Second}, + WaitTimeout: metav1.Duration{Duration: time.Minute}, + WaitForReady: boolptr.False(), }, }, }, @@ -1105,11 +1120,12 @@ func TestGroupRestoreExecHooks(t *testing.T) { RestoreHooks: []velerov1api.RestoreResourceHook{ { Exec: &velerov1api.ExecRestoreHook{ - Container: "container1", - Command: []string{"/usr/bin/aaa"}, - OnError: velerov1api.HookErrorModeContinue, - ExecTimeout: metav1.Duration{Duration: time.Second * 4}, - WaitTimeout: metav1.Duration{Duration: time.Minute * 4}, + Container: "container1", + Command: []string{"/usr/bin/aaa"}, + OnError: velerov1api.HookErrorModeContinue, + ExecTimeout: metav1.Duration{Duration: time.Second * 4}, + WaitTimeout: metav1.Duration{Duration: time.Minute * 4}, + WaitForReady: boolptr.True(), }, }, }, @@ -1126,33 +1142,36 @@ func TestGroupRestoreExecHooks(t *testing.T) { HookName: "hook1", HookSource: "backupSpec", Hook: velerov1api.ExecRestoreHook{ - Container: "container1", - Command: []string{"/usr/bin/foo"}, - OnError: velerov1api.HookErrorModeFail, - ExecTimeout: metav1.Duration{Duration: time.Second}, - WaitTimeout: metav1.Duration{Duration: time.Minute}, + Container: "container1", + Command: []string{"/usr/bin/foo"}, + OnError: velerov1api.HookErrorModeFail, + ExecTimeout: metav1.Duration{Duration: time.Second}, + WaitTimeout: metav1.Duration{Duration: time.Minute}, + WaitForReady: boolptr.False(), }, }, { HookName: "hook1", HookSource: "backupSpec", Hook: velerov1api.ExecRestoreHook{ - Container: "container1", - Command: []string{"/usr/bin/bar"}, - OnError: velerov1api.HookErrorModeContinue, - ExecTimeout: metav1.Duration{Duration: time.Second * 2}, - WaitTimeout: metav1.Duration{Duration: time.Minute * 2}, + Container: "container1", + Command: []string{"/usr/bin/bar"}, + OnError: velerov1api.HookErrorModeContinue, + ExecTimeout: metav1.Duration{Duration: time.Second * 2}, + WaitTimeout: metav1.Duration{Duration: time.Minute * 2}, + WaitForReady: boolptr.False(), }, }, { HookName: "hook2", HookSource: "backupSpec", Hook: velerov1api.ExecRestoreHook{ - Container: "container1", - Command: []string{"/usr/bin/aaa"}, - OnError: velerov1api.HookErrorModeContinue, - ExecTimeout: metav1.Duration{Duration: time.Second * 4}, - WaitTimeout: metav1.Duration{Duration: time.Minute * 4}, + Container: "container1", + Command: []string{"/usr/bin/aaa"}, + OnError: velerov1api.HookErrorModeContinue, + ExecTimeout: metav1.Duration{Duration: time.Second * 4}, + WaitTimeout: metav1.Duration{Duration: time.Minute * 4}, + WaitForReady: boolptr.True(), }, }, }, @@ -1161,11 +1180,12 @@ func TestGroupRestoreExecHooks(t *testing.T) { HookName: "hook1", HookSource: "backupSpec", Hook: velerov1api.ExecRestoreHook{ - Container: "container2", - Command: []string{"/usr/bin/baz"}, - OnError: velerov1api.HookErrorModeContinue, - ExecTimeout: metav1.Duration{Duration: time.Second * 3}, - WaitTimeout: metav1.Duration{Duration: time.Second * 3}, + Container: "container2", + Command: []string{"/usr/bin/baz"}, + OnError: velerov1api.HookErrorModeContinue, + ExecTimeout: metav1.Duration{Duration: time.Second * 3}, + WaitTimeout: metav1.Duration{Duration: time.Second * 3}, + WaitForReady: boolptr.False(), }, }, }, diff --git a/internal/hook/wait_exec_hook_handler.go b/internal/hook/wait_exec_hook_handler.go index 890a8117bd..aa6b9b1882 100644 --- a/internal/hook/wait_exec_hook_handler.go +++ b/internal/hook/wait_exec_hook_handler.go @@ -29,6 +29,7 @@ import ( velerov1api "github.com/vmware-tanzu/velero/pkg/apis/velero/v1" "github.com/vmware-tanzu/velero/pkg/podexec" + "github.com/vmware-tanzu/velero/pkg/util/boolptr" "github.com/vmware-tanzu/velero/pkg/util/kube" ) @@ -126,8 +127,8 @@ func (e *DefaultWaitExecHookHandler) HandleHooks( } for containerName, hooks := range byContainer { - if !isContainerRunning(newPod, containerName) { - podLog.Infof("Container %s is not running: post-restore hooks will not yet be executed", containerName) + if !isContainerUp(newPod, containerName, hooks) { + podLog.Infof("Container %s is not up: post-restore hooks will not yet be executed", containerName) continue } podMap, err := runtime.DefaultUnstructuredConverter.ToUnstructured(newPod) @@ -243,14 +244,24 @@ func podHasContainer(pod *v1.Pod, containerName string) bool { return false } -func isContainerRunning(pod *v1.Pod, containerName string) bool { +func isContainerUp(pod *v1.Pod, containerName string, hooks []PodExecRestoreHook) bool { if pod == nil { return false } + var waitForReady bool + for _, hook := range hooks { + if boolptr.IsSetToTrue(hook.Hook.WaitForReady) { + waitForReady = true + break + } + } for _, cs := range pod.Status.ContainerStatuses { if cs.Name != containerName { continue } + if waitForReady { + return cs.Ready + } return cs.State.Running != nil } diff --git a/internal/hook/wait_exec_hook_handler_test.go b/internal/hook/wait_exec_hook_handler_test.go index 933e5d34df..2702e9c452 100644 --- a/internal/hook/wait_exec_hook_handler_test.go +++ b/internal/hook/wait_exec_hook_handler_test.go @@ -35,6 +35,7 @@ import ( velerov1api "github.com/vmware-tanzu/velero/pkg/apis/velero/v1" "github.com/vmware-tanzu/velero/pkg/builder" velerotest "github.com/vmware-tanzu/velero/pkg/test" + "github.com/vmware-tanzu/velero/pkg/util/boolptr" ) type fakeListWatchFactory struct { @@ -790,12 +791,13 @@ func TestPodHasContainer(t *testing.T) { } } -func TestIsContainerRunning(t *testing.T) { +func TestIsContainerUp(t *testing.T) { tests := []struct { name string pod *v1.Pod container string expect bool + hooks []PodExecRestoreHook }{ { name: "should return true when running", @@ -809,6 +811,49 @@ func TestIsContainerRunning(t *testing.T) { }, }). Result(), + hooks: []PodExecRestoreHook{}, + }, + { + name: "should return false when running but not ready", + container: "container1", + expect: false, + pod: builder.ForPod("default", "my-pod"). + ContainerStatuses(&v1.ContainerStatus{ + Name: "container1", + State: v1.ContainerState{ + Running: &v1.ContainerStateRunning{}, + }, + Ready: false, + }). + Result(), + hooks: []PodExecRestoreHook{ + { + Hook: velerov1api.ExecRestoreHook{ + WaitForReady: boolptr.True(), + }, + }, + }, + }, + { + name: "should return true when running and ready", + container: "container1", + expect: true, + pod: builder.ForPod("default", "my-pod"). + ContainerStatuses(&v1.ContainerStatus{ + Name: "container1", + State: v1.ContainerState{ + Running: &v1.ContainerStateRunning{}, + }, + Ready: true, + }). + Result(), + hooks: []PodExecRestoreHook{ + { + Hook: velerov1api.ExecRestoreHook{ + WaitForReady: boolptr.True(), + }, + }, + }, }, { name: "should return false when no state is set", @@ -820,6 +865,7 @@ func TestIsContainerRunning(t *testing.T) { State: v1.ContainerState{}, }). Result(), + hooks: []PodExecRestoreHook{}, }, { name: "should return false when waiting", @@ -833,6 +879,7 @@ func TestIsContainerRunning(t *testing.T) { }, }). Result(), + hooks: []PodExecRestoreHook{}, }, { name: "should return true when running and first container is terminated", @@ -852,11 +899,12 @@ func TestIsContainerRunning(t *testing.T) { }, }). Result(), + hooks: []PodExecRestoreHook{}, }, } for _, test := range tests { t.Run(test.name, func(t *testing.T) { - actual := isContainerRunning(test.pod, test.container) + actual := isContainerUp(test.pod, test.container, test.hooks) assert.Equal(t, actual, test.expect) }) } diff --git a/pkg/apis/velero/v1/restore_types.go b/pkg/apis/velero/v1/restore_types.go index a961d5c176..3438d8ff44 100644 --- a/pkg/apis/velero/v1/restore_types.go +++ b/pkg/apis/velero/v1/restore_types.go @@ -214,6 +214,11 @@ type ExecRestoreHook struct { // before attempting to run the command. // +optional WaitTimeout metav1.Duration `json:"waitTimeout,omitempty"` + + // WaitForReady ensures command will be launched when container is Ready instead of Running. + // +optional + // +nullable + WaitForReady *bool `json:"waitForReady,omitempty"` } // InitRestoreHook is a hook that adds an init container to a PodSpec to run commands before the diff --git a/site/content/docs/main/restore-hooks.md b/site/content/docs/main/restore-hooks.md index 4d04122d42..f36f348419 100644 --- a/site/content/docs/main/restore-hooks.md +++ b/site/content/docs/main/restore-hooks.md @@ -181,6 +181,8 @@ Below are the annotations that can be added to a pod to specify exec restore hoo * How long to wait once execution begins. Defaults is 30 seconds. Optional. * `post.hook.restore.velero.io/wait-timeout` * How long to wait for a container to become ready. This should be long enough for the container to start plus any preceding hooks in the same container to complete. The wait timeout begins when the container is restored and may require time for the image to pull and volumes to mount. If not set the restore will wait indefinitely. Optional. +* `post.hook.restore.velero.io/wait-for-ready` + * String representation of a boolean that ensure command will be launched when underlying container is fully [Ready](https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/#define-readiness-probes). Use with caution because if only one restore hook for a pod consists of `WaitForReady` flag as "true", all the other hook executions for that pod, whatever their origin (`Backup` or `Restore` CRD), will wait for `Ready` state too. Any value except "true" will be considered as "false". Defaults is false. Optional. #### Exec Restore Hooks As Pod Annotation Example