From 2b7d97888fff8e11ae040aa9642f245938667b56 Mon Sep 17 00:00:00 2001 From: Hamza El-Saawy Date: Mon, 10 Jan 2022 11:35:51 -0500 Subject: [PATCH 1/3] Delete shim workloads tasks in pod. This commit supports restarting containers and pods using CRI. Delete task request now removes tasks from `pod`'s `workloadTasks` map, and added `DeleteTask` function to `shimPod` interface so new tasks can use the same ID (ie, so a task can be restarted in a running pod). Signed-off-by: Hamza El-Saawy --- cmd/containerd-shim-runhcs-v1/pod.go | 30 ++++ cmd/containerd-shim-runhcs-v1/pod_test.go | 161 ++++++++++++++++++ .../service_internal.go | 18 +- 3 files changed, 206 insertions(+), 3 deletions(-) diff --git a/cmd/containerd-shim-runhcs-v1/pod.go b/cmd/containerd-shim-runhcs-v1/pod.go index a4713f2892..d3f0a9448e 100644 --- a/cmd/containerd-shim-runhcs-v1/pod.go +++ b/cmd/containerd-shim-runhcs-v1/pod.go @@ -51,6 +51,14 @@ type shimPod interface { // the `shimExecStateRunning, shimExecStateExited` states. If the exec is // not in this state this pod MUST return `errdefs.ErrFailedPrecondition`. KillTask(ctx context.Context, tid, eid string, signal uint32, all bool) error + // DeleteTask removes a task from being tracked by this pod, and cleans up + // and resources the shim allocated to the task. + // + // The task's init exec (eid == "") must be either `shimExecStateCreated` or + // `shimExecStateExited`. If the exec is not in this state this pod MUST + // return `errdefs.ErrFailedPrecondition`. Deleting the pod's sandbox task + // is a no-op. + DeleteTask(ctx context.Context, tid string) error } func createPod(ctx context.Context, events publisher, req *task.CreateTaskRequest, s *specs.Spec) (shimPod, error) { @@ -390,3 +398,25 @@ func (p *pod) KillTask(ctx context.Context, tid, eid string, signal uint32, all }) return eg.Wait() } + +func (p *pod) DeleteTask(ctx context.Context, tid string) error { + t, err := p.GetTask(tid) + if err != nil { + return errors.Wrapf(err, "could not find task to delete") + } + + // although deleting the sandbox task is a no-op, still check that it is not running + e, err := t.GetExec("") + if err != nil { + return errors.Wrapf(err, "could not get initial exec") + } + if e.State() == shimExecStateRunning { + return errors.Wrapf(errdefs.ErrFailedPrecondition, "cannot delete task with running exec") + } + + if p.id != tid { + p.workloadTasks.Delete(tid) + } + + return nil +} diff --git a/cmd/containerd-shim-runhcs-v1/pod_test.go b/cmd/containerd-shim-runhcs-v1/pod_test.go index 5322fba052..ff9941c8ae 100644 --- a/cmd/containerd-shim-runhcs-v1/pod_test.go +++ b/cmd/containerd-shim-runhcs-v1/pod_test.go @@ -44,6 +44,28 @@ func (tsp *testShimPod) KillTask(ctx context.Context, tid, eid string, signal ui return s.KillExec(ctx, eid, signal, all) } +func (tsp *testShimPod) DeleteTask(ctx context.Context, tid string) error { + t, err := tsp.GetTask(tid) + if err != nil { + return err + } + + e, err := t.GetExec("") + if err != nil { + return err + } + switch e.State() { + case shimExecStateRunning: + return errdefs.ErrFailedPrecondition + default: + } + + if tid != tsp.ID() { + tsp.tasks.Delete(tid) + } + return nil +} + // Pod tests func setupTestPodWithFakes(t *testing.T) (*pod, *testShimTask) { @@ -111,6 +133,8 @@ func Test_pod_GetTask_WorkloadID_Created_Success(t *testing.T) { } } +// kill tests + func Test_pod_KillTask_UnknownTaskID_Error(t *testing.T) { p, _ := setupTestPodWithFakes(t) err := p.KillTask(context.TODO(), "thisshouldnotmatch", "", 0xf, false) @@ -205,3 +229,140 @@ func Test_pod_KillTask_WorkloadID_2ndExecID_All_Error(t *testing.T) { verifyExpectedError(t, nil, err, errdefs.ErrFailedPrecondition) } } + +// delete tests + +func Test_pod_DeleteTask_SandboxID(t *testing.T) { + p, st := setupTestPodWithFakes(t) + + err := p.KillTask(context.Background(), st.ID(), "", 0xf, true) + if err != nil { + t.Fatalf("should not have failed, got: %v", err) + } + + err = p.DeleteTask(context.Background(), st.ID()) + if err != nil { + t.Fatalf("should not have failed, got: %v", err) + } + + // it should not be possible to delete the sandbox task + _, err = p.GetTask(t.Name()) + if err != nil { + t.Fatalf("should not have failed, got: %v", err) + } +} + +func Test_pod_DeleteTask_SandboxID_Running(t *testing.T) { + p, st := setupTestPodWithFakes(t) + + // start the task + e, err := st.GetExec("") + if err != nil { + t.Fatalf("should not have failed, got: %v", err) + } + err = e.Start(context.Background()) + if err != nil { + t.Fatalf("should not have failed, got: %v", err) + } + + err = p.DeleteTask(context.Background(), st.ID()) + verifyExpectedError(t, nil, err, errdefs.ErrFailedPrecondition) +} + +func Test_pod_DeleteTask_SandboxID_Repeated(t *testing.T) { + p, st := setupTestPodWithFakes(t) + + err := p.KillTask(context.Background(), st.ID(), "", 0xf, true) + if err != nil { + t.Fatalf("should not have failed, got: %v", err) + } + + err = p.DeleteTask(context.Background(), st.ID()) + if err != nil { + t.Fatalf("should not have failed, got: %v", err) + } + + err = p.DeleteTask(context.Background(), st.ID()) + if err != nil { + t.Fatalf("should not have failed, got: %v", err) + } +} + +func Test_pod_DeleteTask_TaskID(t *testing.T) { + p, _ := setupTestPodWithFakes(t) + st := setupTestTaskInPod(t, p) + + err := p.KillTask(context.Background(), st.ID(), "", 0xf, true) + if err != nil { + t.Fatalf("should not have failed, got: %v", err) + } + + err = p.DeleteTask(context.Background(), st.ID()) + if err != nil { + t.Fatalf("should not have failed, got: %v", err) + } + + _, err = p.GetTask(st.ID()) + verifyExpectedError(t, nil, err, errdefs.ErrNotFound) +} + +func Test_pod_DeleteTask_TaskID_Running(t *testing.T) { + p, _ := setupTestPodWithFakes(t) + st := setupTestTaskInPod(t, p) + + // start the task + e, err := st.GetExec("") + if err != nil { + t.Fatalf("should not have failed, got: %v", err) + } + err = e.Start(context.Background()) + if err != nil { + t.Fatalf("should not have failed, got: %v", err) + } + + err = p.DeleteTask(context.Background(), st.ID()) + verifyExpectedError(t, nil, err, errdefs.ErrFailedPrecondition) + + // should not actually delete the sandbox task + _, err = p.GetTask(t.Name()) + if err != nil { + t.Fatalf("should not have failed, got: %v", err) + } + + stp, err := p.GetTask(st.ID()) + if err != nil { + t.Fatalf("should not have failed, got: %v", err) + } + + if stp != st { + t.Fatalf("task should not have changed: %v != %v", st, stp) + } +} + +func Test_pod_DeleteTask_TaskID_Repeated(t *testing.T) { + p, _ := setupTestPodWithFakes(t) + st := setupTestTaskInPod(t, p) + + err := p.KillTask(context.Background(), st.ID(), "", 0xf, true) + if err != nil { + t.Fatalf("should not have failed, got: %v", err) + } + + err = p.DeleteTask(context.Background(), st.ID()) + if err != nil { + t.Fatalf("should not have failed, got: %v", err) + } + + err = p.DeleteTask(context.Background(), st.ID()) + verifyExpectedError(t, nil, err, errdefs.ErrNotFound) +} + +func Test_pod_DeleteTask_TaskID_Not_Created(t *testing.T) { + p, _ := setupTestPodWithFakes(t) + // Add two workload tasks + setupTestTaskInPod(t, p) + setupTestTaskInPod(t, p) + + err := p.KillTask(context.Background(), strconv.Itoa(rand.Int()), "", 0xf, true) + verifyExpectedError(t, nil, err, errdefs.ErrNotFound) +} diff --git a/cmd/containerd-shim-runhcs-v1/service_internal.go b/cmd/containerd-shim-runhcs-v1/service_internal.go index eb85f69722..9824a7d8a6 100644 --- a/cmd/containerd-shim-runhcs-v1/service_internal.go +++ b/cmd/containerd-shim-runhcs-v1/service_internal.go @@ -205,17 +205,29 @@ func (s *service) startInternal(ctx context.Context, req *task.StartRequest) (*t } func (s *service) deleteInternal(ctx context.Context, req *task.DeleteRequest) (*task.DeleteResponse, error) { - // TODO: JTERRY75 we need to send this to the POD for isSandbox - t, err := s.getTask(req.ID) if err != nil { return nil, err } + pid, exitStatus, exitedAt, err := t.DeleteExec(ctx, req.ExecID) if err != nil { return nil, err } - // TODO: We should be removing the task after this right? + + // remove the pod sandbox's reference to the task, if the delete is for a + // task and not an exec + if s.isSandbox && req.ExecID == "" { + p, err := s.getPod() + if err != nil { + return nil, errors.Wrapf(err, "could not get pod %q to delete task %q", s.tid, req.ID) + } + err = p.DeleteTask(ctx, req.ID) + if err != nil { + return nil, errors.Wrapf(err, "could not delete task %q in pod %q", req.ID, s.tid) + } + } + return &task.DeleteResponse{ Pid: uint32(pid), ExitStatus: exitStatus, From b846a9aefb9ac57bfc7807e74113c89f14809f70 Mon Sep 17 00:00:00 2001 From: Hamza El-Saawy Date: Tue, 1 Feb 2022 18:14:35 -0500 Subject: [PATCH 2/3] PR: .Wrapf( to .Wrap( Signed-off-by: Hamza El-Saawy --- cmd/containerd-shim-runhcs-v1/pod.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/cmd/containerd-shim-runhcs-v1/pod.go b/cmd/containerd-shim-runhcs-v1/pod.go index d3f0a9448e..df98fb8820 100644 --- a/cmd/containerd-shim-runhcs-v1/pod.go +++ b/cmd/containerd-shim-runhcs-v1/pod.go @@ -402,16 +402,16 @@ func (p *pod) KillTask(ctx context.Context, tid, eid string, signal uint32, all func (p *pod) DeleteTask(ctx context.Context, tid string) error { t, err := p.GetTask(tid) if err != nil { - return errors.Wrapf(err, "could not find task to delete") + return errors.Wrap(err, "could not delete task") } // although deleting the sandbox task is a no-op, still check that it is not running e, err := t.GetExec("") if err != nil { - return errors.Wrapf(err, "could not get initial exec") + return errors.Wrap(err, "could not get initial exec") } if e.State() == shimExecStateRunning { - return errors.Wrapf(errdefs.ErrFailedPrecondition, "cannot delete task with running exec") + return errors.Wrap(errdefs.ErrFailedPrecondition, "cannot delete task with running exec") } if p.id != tid { From 5e5baea6e70eb846dde3c0b11ef405a613b189f3 Mon Sep 17 00:00:00 2001 From: Hamza El-Saawy Date: Mon, 7 Feb 2022 19:24:31 -0500 Subject: [PATCH 3/3] PR: error messages, docs, and formatting Signed-off-by: Hamza El-Saawy --- cmd/containerd-shim-runhcs-v1/pod.go | 11 ++++++++--- cmd/containerd-shim-runhcs-v1/service_internal.go | 7 ++++--- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/cmd/containerd-shim-runhcs-v1/pod.go b/cmd/containerd-shim-runhcs-v1/pod.go index df98fb8820..922f8cb3b2 100644 --- a/cmd/containerd-shim-runhcs-v1/pod.go +++ b/cmd/containerd-shim-runhcs-v1/pod.go @@ -52,7 +52,7 @@ type shimPod interface { // not in this state this pod MUST return `errdefs.ErrFailedPrecondition`. KillTask(ctx context.Context, tid, eid string, signal uint32, all bool) error // DeleteTask removes a task from being tracked by this pod, and cleans up - // and resources the shim allocated to the task. + // the resources the shim allocated for the task. // // The task's init exec (eid == "") must be either `shimExecStateCreated` or // `shimExecStateExited`. If the exec is not in this state this pod MUST @@ -400,12 +400,17 @@ func (p *pod) KillTask(ctx context.Context, tid, eid string, signal uint32, all } func (p *pod) DeleteTask(ctx context.Context, tid string) error { + // Deleting the sandbox task is a no-op, since the service should delete its + // reference to the sandbox task or pod, and `p.sandboxTask != nil` is an + // invariant that is relied on elsewhere. + // However, still get the init exec for all tasks to ensure that they have + // been properly stopped. + t, err := p.GetTask(tid) if err != nil { - return errors.Wrap(err, "could not delete task") + return errors.Wrap(err, "could not find task to delete") } - // although deleting the sandbox task is a no-op, still check that it is not running e, err := t.GetExec("") if err != nil { return errors.Wrap(err, "could not get initial exec") diff --git a/cmd/containerd-shim-runhcs-v1/service_internal.go b/cmd/containerd-shim-runhcs-v1/service_internal.go index 9824a7d8a6..ab1a49fd70 100644 --- a/cmd/containerd-shim-runhcs-v1/service_internal.go +++ b/cmd/containerd-shim-runhcs-v1/service_internal.go @@ -3,6 +3,7 @@ package main import ( "context" "encoding/json" + "fmt" "os" "path/filepath" "strings" @@ -215,8 +216,7 @@ func (s *service) deleteInternal(ctx context.Context, req *task.DeleteRequest) ( return nil, err } - // remove the pod sandbox's reference to the task, if the delete is for a - // task and not an exec + // if the delete is for a task and not an exec, remove the pod sandbox's reference to the task if s.isSandbox && req.ExecID == "" { p, err := s.getPod() if err != nil { @@ -224,9 +224,10 @@ func (s *service) deleteInternal(ctx context.Context, req *task.DeleteRequest) ( } err = p.DeleteTask(ctx, req.ID) if err != nil { - return nil, errors.Wrapf(err, "could not delete task %q in pod %q", req.ID, s.tid) + return nil, fmt.Errorf("could not delete task %q in pod %q: %w", req.ID, s.tid, err) } } + // TODO: check if the pod's workload tasks is empty, and, if so, reset p.taskOrPod to nil return &task.DeleteResponse{ Pid: uint32(pid),