diff --git a/mount/mount_linux.go b/mount/mount_linux.go index a69f65c2ddd3..79176b2ae6d2 100644 --- a/mount/mount_linux.go +++ b/mount/mount_linux.go @@ -24,6 +24,7 @@ import ( "strings" "time" + "github.com/containerd/containerd/pkg/userns" exec "golang.org/x/sys/execabs" "golang.org/x/sys/unix" ) @@ -104,12 +105,56 @@ func (m *Mount) Mount(target string) (err error) { const broflags = unix.MS_BIND | unix.MS_RDONLY if oflags&broflags == broflags { + // Preserve CL_UNPRIVILEGED "locked" flags of the + // bind mount target when we remount to make the bind readonly. + // This is necessary to ensure that + // bind-mounting "with options" will not fail with user namespaces, due to + // kernel restrictions that require user namespace mounts to preserve + // CL_UNPRIVILEGED locked flags. + var unprivFlags int + if userns.RunningInUserNS() { + unprivFlags, err = getUnprivilegedMountFlags(target) + if err != nil { + return err + } + } // Remount the bind to apply read only. - return unix.Mount("", target, "", uintptr(oflags|unix.MS_REMOUNT), "") + return unix.Mount("", target, "", uintptr(oflags|unprivFlags|unix.MS_REMOUNT), "") } return nil } +// Get the set of mount flags that are set on the mount that contains the given +// path and are locked by CL_UNPRIVILEGED. +// +// From https://github.com/moby/moby/blob/v23.0.1/daemon/oci_linux.go#L430-L460 +func getUnprivilegedMountFlags(path string) (int, error) { + var statfs unix.Statfs_t + if err := unix.Statfs(path, &statfs); err != nil { + return 0, err + } + + // The set of keys come from https://github.com/torvalds/linux/blob/v4.13/fs/namespace.c#L1034-L1048. + unprivilegedFlags := []int{ + unix.MS_RDONLY, + unix.MS_NODEV, + unix.MS_NOEXEC, + unix.MS_NOSUID, + unix.MS_NOATIME, + unix.MS_RELATIME, + unix.MS_NODIRATIME, + } + + var flags int + for flag := range unprivilegedFlags { + if int(statfs.Flags)&flag == flag { + flags |= flag + } + } + + return flags, nil +} + // Unmount the provided mount path with the flags func Unmount(target string, flags int) error { if err := unmount(target, flags); err != nil && err != unix.EINVAL {