From 849548ffb8e958e901317eceffdcc2d918cafd8d Mon Sep 17 00:00:00 2001 From: Giuseppe Scrivano Date: Thu, 21 Mar 2019 12:18:42 +0100 Subject: [PATCH] userns: do not use an intermediate mount namespace We have an issue in the current implementation where the cleanup process is not able to umount the storage as it is running in a separate namespace. Simplify the implementation for user namespaces by not using an intermediate mount namespace. For doing it, we need to relax the permissions on the parent directories and allow browsing them. Containers that are running without a user namespace, will still maintain mode 0700 on their directory. Signed-off-by: Giuseppe Scrivano --- libpod/container.go | 12 +--- libpod/container_internal.go | 47 ++++----------- libpod/container_internal_linux.go | 25 +------- libpod/oci_linux.go | 91 +++++++++--------------------- libpod/runtime_ctr.go | 6 +- 5 files changed, 43 insertions(+), 138 deletions(-) diff --git a/libpod/container.go b/libpod/container.go index 6d80a9bf4c..739406e424 100644 --- a/libpod/container.go +++ b/libpod/container.go @@ -146,18 +146,12 @@ type ContainerState struct { ConfigPath string `json:"configPath,omitempty"` // RunDir is a per-boot directory for container content RunDir string `json:"runDir,omitempty"` - // DestinationRunDir is where the files in RunDir will be accessible for the container. - // It is different than RunDir when using userNS - DestinationRunDir string `json:"destinationRunDir,omitempty"` // Mounted indicates whether the container's storage has been mounted // for use Mounted bool `json:"mounted,omitempty"` // Mountpoint contains the path to the container's mounted storage as given - // by containers/storage. It can be different than RealMountpoint when - // usernamespaces are used + // by containers/storage. Mountpoint string `json:"mountPoint,omitempty"` - // RealMountpoint contains the path to the container's mounted storage - RealMountpoint string `json:"realMountPoint,omitempty"` // StartedTime is the time the container was started StartedTime time.Time `json:"startedTime,omitempty"` // FinishedTime is the time the container finished executing @@ -186,10 +180,6 @@ type ContainerState struct { // the path of the file on disk outside the container BindMounts map[string]string `json:"bindMounts,omitempty"` - // UserNSRoot is the directory used as root for the container when using - // user namespaces. - UserNSRoot string `json:"userNSRoot,omitempty"` - // ExtensionStageHooks holds hooks which will be executed by libpod // and not delegated to the OCI runtime. ExtensionStageHooks map[string][]spec.Hook `json:"extensionStageHooks,omitempty"` diff --git a/libpod/container_internal.go b/libpod/container_internal.go index 7a90bc7d47..daa32007a3 100644 --- a/libpod/container_internal.go +++ b/libpod/container_internal.go @@ -310,23 +310,12 @@ func (c *Container) setupStorage(ctx context.Context) error { } if !rootless.IsRootless() && (len(c.config.IDMappings.UIDMap) != 0 || len(c.config.IDMappings.GIDMap) != 0) { - info, err := os.Stat(c.runtime.config.TmpDir) - if err != nil { - return errors.Wrapf(err, "cannot stat `%s`", c.runtime.config.TmpDir) - } - if err := os.Chmod(c.runtime.config.TmpDir, info.Mode()|0111); err != nil { - return errors.Wrapf(err, "cannot chmod `%s`", c.runtime.config.TmpDir) - } - root := filepath.Join(c.runtime.config.TmpDir, "containers-root", c.ID()) - if err := os.MkdirAll(root, 0755); err != nil { - return errors.Wrapf(err, "error creating userNS tmpdir for container %s", c.ID()) - } - if err := os.Chown(root, c.RootUID(), c.RootGID()); err != nil { + if err := os.Chown(containerInfo.RunDir, c.RootUID(), c.RootGID()); err != nil { return err } - c.state.UserNSRoot, err = filepath.EvalSymlinks(root) - if err != nil { - return errors.Wrapf(err, "failed to eval symlinks for %s", root) + + if err := os.Chown(containerInfo.Dir, c.RootUID(), c.RootGID()); err != nil { + return err } } @@ -334,10 +323,6 @@ func (c *Container) setupStorage(ctx context.Context) error { c.config.MountLabel = containerInfo.MountLabel c.config.StaticDir = containerInfo.Dir c.state.RunDir = containerInfo.RunDir - c.state.DestinationRunDir = c.state.RunDir - if c.state.UserNSRoot != "" { - c.state.DestinationRunDir = filepath.Join(c.state.UserNSRoot, "rundir") - } // Set the default Entrypoint and Command if containerInfo.Config != nil { @@ -372,12 +357,6 @@ func (c *Container) teardownStorage() error { return errors.Wrapf(err, "failed to cleanup container %s storage", c.ID()) } - if c.state.UserNSRoot != "" { - if err := os.RemoveAll(c.state.UserNSRoot); err != nil { - return errors.Wrapf(err, "error removing userns root %q", c.state.UserNSRoot) - } - } - if err := c.runtime.storageService.DeleteContainer(c.ID()); err != nil { // If the container has already been removed, warn but do not // error - we wanted it gone, it is already gone. @@ -432,6 +411,7 @@ func (c *Container) refresh() error { if err != nil { return errors.Wrapf(err, "error retrieving temporary directory for container %s", c.ID()) } + c.state.RunDir = dir if len(c.config.IDMappings.UIDMap) != 0 || len(c.config.IDMappings.GIDMap) != 0 { info, err := os.Stat(c.runtime.config.TmpDir) @@ -448,16 +428,6 @@ func (c *Container) refresh() error { if err := os.Chown(root, c.RootUID(), c.RootGID()); err != nil { return err } - c.state.UserNSRoot, err = filepath.EvalSymlinks(root) - if err != nil { - return errors.Wrapf(err, "failed to eval symlinks for %s", root) - } - } - - c.state.RunDir = dir - c.state.DestinationRunDir = c.state.RunDir - if c.state.UserNSRoot != "" { - c.state.DestinationRunDir = filepath.Join(c.state.UserNSRoot, "rundir") } // We need to pick up a new lock @@ -1260,7 +1230,7 @@ func (c *Container) writeStringToRundir(destFile, output string) (string, error) return "", err } - return filepath.Join(c.state.DestinationRunDir, destFile), nil + return filepath.Join(c.state.RunDir, destFile), nil } // appendStringToRundir appends the provided string to the runtimedir file @@ -1277,7 +1247,7 @@ func (c *Container) appendStringToRundir(destFile, output string) (string, error return "", errors.Wrapf(err, "unable to write %s", destFileName) } - return filepath.Join(c.state.DestinationRunDir, destFile), nil + return filepath.Join(c.state.RunDir, destFile), nil } // Save OCI spec to disk, replacing any existing specs for the container @@ -1410,6 +1380,9 @@ func (c *Container) mount() (string, error) { if err != nil { return "", errors.Wrapf(err, "error resolving storage path for container %s", c.ID()) } + if err := os.Chown(mountPoint, c.RootUID(), c.RootGID()); err != nil { + return "", errors.Wrapf(err, "cannot chown %s to %d:%d", mountPoint, c.RootUID(), c.RootGID()) + } return mountPoint, nil } diff --git a/libpod/container_internal_linux.go b/libpod/container_internal_linux.go index 23de1aa01a..504d6c1352 100644 --- a/libpod/container_internal_linux.go +++ b/libpod/container_internal_linux.go @@ -25,7 +25,6 @@ import ( "github.com/containers/libpod/pkg/lookup" "github.com/containers/libpod/pkg/resolvconf" "github.com/containers/libpod/pkg/rootless" - "github.com/containers/storage/pkg/idtools" "github.com/cyphar/filepath-securejoin" "github.com/opencontainers/runc/libcontainer/user" spec "github.com/opencontainers/runtime-spec/specs-go" @@ -99,11 +98,6 @@ func (c *Container) prepare() (err error) { // Finish up mountStorage c.state.Mounted = true c.state.Mountpoint = mountPoint - if c.state.UserNSRoot == "" { - c.state.RealMountpoint = c.state.Mountpoint - } else { - c.state.RealMountpoint = filepath.Join(c.state.UserNSRoot, "mountpoint") - } logrus.Debugf("Created root filesystem for container %s at %s", c.ID(), c.state.Mountpoint) }() @@ -220,13 +214,6 @@ func (c *Container) generateSpec(ctx context.Context) (*spec.Spec, error) { } } m.Options = options - - // If we are using a user namespace, we will use an intermediate - // directory to bind mount volumes - if c.state.UserNSRoot != "" && strings.HasPrefix(m.Source, c.runtime.config.VolumePath) { - newSourceDir := filepath.Join(c.state.UserNSRoot, "volumes") - m.Source = strings.Replace(m.Source, c.runtime.config.VolumePath, newSourceDir, 1) - } } g.SetProcessSelinuxLabel(c.ProcessLabel()) @@ -313,13 +300,7 @@ func (c *Container) generateSpec(ctx context.Context) (*spec.Spec, error) { } } - if c.config.Rootfs == "" { - if err := idtools.MkdirAllAs(c.state.RealMountpoint, 0700, c.RootUID(), c.RootGID()); err != nil { - return nil, err - } - } - - g.SetRootPath(c.state.RealMountpoint) + g.SetRootPath(c.state.Mountpoint) g.AddAnnotation(crioAnnotations.Created, c.config.CreatedTime.Format(time.RFC3339Nano)) g.AddAnnotation("org.opencontainers.image.stopSignal", fmt.Sprintf("%d", c.config.StopSignal)) @@ -820,7 +801,7 @@ func (c *Container) makeBindMounts() error { } // Add Secret Mounts - secretMounts := secrets.SecretMountsWithUIDGID(c.config.MountLabel, c.state.RunDir, c.runtime.config.DefaultMountsFile, c.state.DestinationRunDir, c.RootUID(), c.RootGID(), rootless.IsRootless()) + secretMounts := secrets.SecretMountsWithUIDGID(c.config.MountLabel, c.state.RunDir, c.runtime.config.DefaultMountsFile, c.state.RunDir, c.RootUID(), c.RootGID(), rootless.IsRootless()) for _, mount := range secretMounts { if _, ok := c.state.BindMounts[mount.Destination]; !ok { c.state.BindMounts[mount.Destination] = mount.Source @@ -907,7 +888,7 @@ func (c *Container) generateResolvConf() (string, error) { return "", err } - return filepath.Join(c.state.DestinationRunDir, "resolv.conf"), nil + return filepath.Join(c.state.RunDir, "resolv.conf"), nil } // generateHosts creates a containers hosts file diff --git a/libpod/oci_linux.go b/libpod/oci_linux.go index f85c5ee623..8c0abad80f 100644 --- a/libpod/oci_linux.go +++ b/libpod/oci_linux.go @@ -3,17 +3,14 @@ package libpod import ( - "fmt" "os" "os/exec" "path/filepath" - "runtime" "strings" - "sync" + "syscall" "github.com/containerd/cgroups" "github.com/containers/libpod/utils" - "github.com/containers/storage/pkg/idtools" spec "github.com/opencontainers/runtime-spec/specs-go" "github.com/sirupsen/logrus" "golang.org/x/sys/unix" @@ -62,72 +59,40 @@ func newPipe() (parent *os.File, child *os.File, err error) { return os.NewFile(uintptr(fds[1]), "parent"), os.NewFile(uintptr(fds[0]), "child"), nil } -// CreateContainer creates a container in the OCI runtime -// TODO terminal support for container -// Presently just ignoring conmon opts related to it -func (r *OCIRuntime) createContainer(ctr *Container, cgroupParent string, restoreOptions *ContainerCheckpointOptions) (err error) { - if ctr.state.UserNSRoot == "" { - // no need of an intermediate mount ns - return r.createOCIContainer(ctr, cgroupParent, restoreOptions) - } - var wg sync.WaitGroup - wg.Add(1) - go func() { - defer wg.Done() - runtime.LockOSThread() - - var fd *os.File - fd, err = os.Open(fmt.Sprintf("/proc/%d/task/%d/ns/mnt", os.Getpid(), unix.Gettid())) +// makeAccessible changes the path permission and each parent directory to have --x--x--x +func makeAccessible(path string, uid, gid int) error { + for ; path != "/"; path = filepath.Dir(path) { + st, err := os.Stat(path) if err != nil { - return - } - defer fd.Close() - - // create a new mountns on the current thread - if err = unix.Unshare(unix.CLONE_NEWNS); err != nil { - return - } - defer unix.Setns(int(fd.Fd()), unix.CLONE_NEWNS) - - // don't spread our mounts around - err = unix.Mount("/", "/", "none", unix.MS_REC|unix.MS_SLAVE, "") - if err != nil { - return - } - err = unix.Mount(ctr.state.Mountpoint, ctr.state.RealMountpoint, "none", unix.MS_BIND, "") - if err != nil { - return + if os.IsNotExist(err) { + return nil + } + return err } - if err := idtools.MkdirAllAs(ctr.state.DestinationRunDir, 0700, ctr.RootUID(), ctr.RootGID()); err != nil { - return + if int(st.Sys().(*syscall.Stat_t).Uid) == uid && int(st.Sys().(*syscall.Stat_t).Gid) == gid { + continue } - - err = unix.Mount(ctr.state.RunDir, ctr.state.DestinationRunDir, "none", unix.MS_BIND, "") - if err != nil { - return + if st.Mode()&0111 != 0111 { + if err := os.Chmod(path, os.FileMode(st.Mode()|0111)); err != nil { + return err + } } + } + return nil +} - if ctr.state.UserNSRoot != "" { - _, err := os.Stat(ctr.runtime.config.VolumePath) - if err != nil && !os.IsNotExist(err) { - return - } - if err == nil { - volumesTarget := filepath.Join(ctr.state.UserNSRoot, "volumes") - if err := idtools.MkdirAs(volumesTarget, 0700, ctr.RootUID(), ctr.RootGID()); err != nil { - return - } - if err = unix.Mount(ctr.runtime.config.VolumePath, volumesTarget, "none", unix.MS_BIND, ""); err != nil { - return - } +// CreateContainer creates a container in the OCI runtime +// TODO terminal support for container +// Presently just ignoring conmon opts related to it +func (r *OCIRuntime) createContainer(ctr *Container, cgroupParent string, restoreOptions *ContainerCheckpointOptions) (err error) { + if len(ctr.config.IDMappings.UIDMap) != 0 || len(ctr.config.IDMappings.GIDMap) != 0 { + for _, i := range []string{ctr.state.RunDir, ctr.runtime.config.TmpDir, ctr.config.StaticDir, ctr.state.Mountpoint, ctr.runtime.config.VolumePath} { + if err := makeAccessible(i, ctr.RootUID(), ctr.RootGID()); err != nil { + return err } } - - err = r.createOCIContainer(ctr, cgroupParent, restoreOptions) - }() - wg.Wait() - - return err + } + return r.createOCIContainer(ctr, cgroupParent, restoreOptions) } func rpmVersion(path string) string { diff --git a/libpod/runtime_ctr.go b/libpod/runtime_ctr.go index 17340f56dd..506aee4776 100644 --- a/libpod/runtime_ctr.go +++ b/libpod/runtime_ctr.go @@ -201,11 +201,7 @@ func (r *Runtime) newContainer(ctx context.Context, rSpec *spec.Spec, options .. } if !MountExists(ctr.config.Spec.Mounts, "/dev/shm") && ctr.config.ShmDir == "" { - if ctr.state.UserNSRoot == "" { - ctr.config.ShmDir = filepath.Join(ctr.bundlePath(), "shm") - } else { - ctr.config.ShmDir = filepath.Join(ctr.state.UserNSRoot, "shm") - } + ctr.config.ShmDir = filepath.Join(ctr.bundlePath(), "shm") if err := os.MkdirAll(ctr.config.ShmDir, 0700); err != nil { if !os.IsExist(err) { return nil, errors.Wrapf(err, "unable to create shm %q dir", ctr.config.ShmDir)