From e02f6dc067494935f6079069018c4e6c9eb4373c Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Fri, 19 Jul 2019 04:22:43 -0700 Subject: [PATCH] shimv2: monitor sandbox liveness When sandbox quits unexpected, clean things up as much as we can. Fixes: #1896 Signed-off-by: Peng Tao --- containerd-shim-v2/service.go | 1 + containerd-shim-v2/start.go | 6 ++++++ containerd-shim-v2/wait.go | 40 +++++++++++++++++++++++++++++++++++ 3 files changed, 47 insertions(+) diff --git a/containerd-shim-v2/service.go b/containerd-shim-v2/service.go index 9103885080..af3dd47da5 100644 --- a/containerd-shim-v2/service.go +++ b/containerd-shim-v2/service.go @@ -114,6 +114,7 @@ type service struct { containers map[string]*container config *oci.RuntimeConfig events chan interface{} + monitor chan error cancel func() diff --git a/containerd-shim-v2/start.go b/containerd-shim-v2/start.go index 71a90ed131..173ca7c769 100644 --- a/containerd-shim-v2/start.go +++ b/containerd-shim-v2/start.go @@ -30,6 +30,12 @@ func startContainer(ctx context.Context, s *service, c *container) error { if err != nil { return err } + // Start monitor after starting sandbox + s.monitor, err = s.sandbox.Monitor() + if err != nil { + return err + } + go watchSandbox(s) } else { _, err := s.sandbox.StartContainer(c.id) if err != nil { diff --git a/containerd-shim-v2/wait.go b/containerd-shim-v2/wait.go index 92cf23963b..f99bca47f2 100644 --- a/containerd-shim-v2/wait.go +++ b/containerd-shim-v2/wait.go @@ -6,9 +6,11 @@ package containerdshim import ( + "path" "time" "github.com/containerd/containerd/api/types/task" + "github.com/containerd/containerd/mount" "github.com/sirupsen/logrus" ) @@ -82,3 +84,41 @@ func wait(s *service, c *container, execID string) (int32, error) { return ret, nil } + +func watchSandbox(s *service) { + if s.monitor == nil { + return + } + err := <-s.monitor + if err == nil { + return + } + s.monitor = nil + + s.mu.Lock() + defer s.mu.Unlock() + // sandbox malfunctioning, cleanup as much as we can + logrus.WithError(err).Warn("sandbox stopped unexpectedly") + err = s.sandbox.Stop(true) + if err != nil { + logrus.WithError(err).Warn("stop sandbox failed") + } + err = s.sandbox.Delete() + if err != nil { + logrus.WithError(err).Warn("delete sandbox failed") + } + + if s.mount { + for _, c := range s.containers { + rootfs := path.Join(c.bundle, "rootfs") + logrus.WithField("rootfs", rootfs).WithField("id", c.id).Debug("container umount rootfs") + if err := mount.UnmountAll(rootfs, 0); err != nil { + logrus.WithError(err).Warn("failed to cleanup rootfs mount") + } + } + } + s.containers = make(map[string]*container) + + // Existing container/exec will be cleaned up by its waiters. + // No need to send async events here. +}