From 934f1ccb2a7ebba4432904f7b34e2c34f68d551e Mon Sep 17 00:00:00 2001 From: Rodrigo Campos Date: Fri, 5 Jul 2024 14:03:45 +0200 Subject: [PATCH 1/3] userns: Skip tests if the host doesn't support idmap mounts critest is used in projects like containerd, that test against older distros (like AlmaLinux 8). In those distros, CI will fail when we upgrade to runc 1.2.0. With runc 1.1 those test don't fail because runc doesn't support idmap mounts and the tests are skipped in that case. But with runc 1.2.0-rc.2, that supports idmap mounts, the tests are not skipped but fail on distros with older kernels that don't support idmap mounts. This commit just tries to detect if the path used for the container rootfs supports idmap mounts. To do that it uses the Status() message from CRI with verbose param set to true. It parses the output that containerd sets (it's quite unspecified that field), and otherwise fallbacks to "/var/lib" as the path to test idmap mounts support. Signed-off-by: Rodrigo Campos --- pkg/validate/security_context_linux.go | 83 +++++++++++++++++++++++++- 1 file changed, 82 insertions(+), 1 deletion(-) diff --git a/pkg/validate/security_context_linux.go b/pkg/validate/security_context_linux.go index 2fc786a970..954d49c243 100644 --- a/pkg/validate/security_context_linux.go +++ b/pkg/validate/security_context_linux.go @@ -18,14 +18,17 @@ package validate import ( "context" + "encoding/json" "fmt" "net" "os" "os/exec" "path/filepath" "strings" + "syscall" "time" + "golang.org/x/sys/unix" internalapi "k8s.io/cri-api/pkg/apis" runtimeapi "k8s.io/cri-api/pkg/apis/runtime/v1" "sigs.k8s.io/cri-tools/pkg/common" @@ -38,6 +41,8 @@ import ( const ( nginxContainerImage string = framework.DefaultRegistryE2ETestImagesPrefix + "nginx:1.14-2" noNewPrivsImage string = framework.DefaultRegistryE2ETestImagesPrefix + "nonewprivs:1.3" + usernsSize int = 65536 + usernsHostID int = 65536 ) var _ = framework.KubeDescribe("Security Context", func() { @@ -860,7 +865,7 @@ var _ = framework.KubeDescribe("Security Context", func() { By("searching for runtime handler which supports user namespaces") ctx, cancel := context.WithTimeout(context.Background(), time.Minute) defer cancel() - resp, err := rc.Status(ctx, false) + resp, err := rc.Status(ctx, true) // Set verbose to true so the info field is populated. framework.ExpectNoError(err, "failed to get runtime config: %v", err) supportsUserNamespaces := false @@ -876,6 +881,11 @@ var _ = framework.KubeDescribe("Security Context", func() { if !supportsUserNamespaces { Skip("no runtime handler found which supports user namespaces") } + + pathIDMap := rootfsPath(resp.GetInfo()) + if err := supportsIDMap(pathIDMap); err != nil { + Skip("ID mapping is not supported" + " with path: " + pathIDMap + ": " + err.Error()) + } }) It("runtime should support NamespaceMode_POD", func() { @@ -1458,3 +1468,74 @@ func runUserNamespacePodWithError( framework.RunPodSandboxError(rc, config) } + +func supportsIDMap(path string) error { + treeFD, err := unix.OpenTree(-1, path, uint(unix.OPEN_TREE_CLONE|unix.OPEN_TREE_CLOEXEC)) + if err != nil { + return err + } + defer unix.Close(treeFD) + + // We want to test if idmap mounts are supported. + // So we use just some random mapping, it doesn't really matter which one. + // For the helper command, we just need something that is alive while we + // test this, a sleep 5 will do it. + cmd := exec.Command("sleep", "5") + cmd.SysProcAttr = &syscall.SysProcAttr{ + Cloneflags: syscall.CLONE_NEWUSER, + UidMappings: []syscall.SysProcIDMap{{ContainerID: 0, HostID: usernsHostID, Size: usernsSize}}, + GidMappings: []syscall.SysProcIDMap{{ContainerID: 0, HostID: usernsHostID, Size: usernsSize}}, + } + if err := cmd.Start(); err != nil { + return err + } + defer func() { + _ = cmd.Process.Kill() + _ = cmd.Wait() + }() + + usernsPath := fmt.Sprintf("/proc/%d/ns/user", cmd.Process.Pid) + var usernsFile *os.File + if usernsFile, err = os.Open(usernsPath); err != nil { + return err + } + defer usernsFile.Close() + + attr := unix.MountAttr{ + Attr_set: unix.MOUNT_ATTR_IDMAP, + Userns_fd: uint64(usernsFile.Fd()), + } + if err := unix.MountSetattr(treeFD, "", unix.AT_EMPTY_PATH, &attr); err != nil { + return err + } + + return nil +} + +// rootfsPath returns the parent path used for containerd stateDir (the container rootfs lives +// inside there). If the object can't be parsed, it returns the "/var/lib". +// Usually the rootfs is inside /var/lib and it's the same filesystem. In the end, to see if a path +// supports idmap, we only care about its fs so this is a good fallback. +func rootfsPath(info map[string]string) string { + defaultPath := "/var/lib" + jsonCfg, ok := info["config"] + if !ok { + return defaultPath + } + + // Get only the StateDir from the json. + type containerdConfig struct { + StateDir string `json:"stateDir"` + } + cfg := containerdConfig{} + if err := json.Unmarshal([]byte(jsonCfg), &cfg); err != nil { + return defaultPath + } + if cfg.StateDir == "" { + return defaultPath + } + + // The stateDir might have not been created yet. Let's use the parent directory that should + // always exist. + return filepath.Join(cfg.StateDir, "../") +} From 316d6d3c54d0cf9ea237fe844dcc72b0511143b5 Mon Sep 17 00:00:00 2001 From: Rodrigo Campos Date: Mon, 8 Jul 2024 17:52:21 +0200 Subject: [PATCH 2/3] userns: Call runtime only once Sascha suggested to run this only once. Let's cache the answer from the runtime and move the tests that need idmap mounts on the host to `When("Host idmap mount support is needed"`. While we split the tests in that way, let's just query idmap mount support for the tests that need it, using the cache. Signed-off-by: Rodrigo Campos --- pkg/validate/security_context_linux.go | 181 ++++++++++++++----------- 1 file changed, 103 insertions(+), 78 deletions(-) diff --git a/pkg/validate/security_context_linux.go b/pkg/validate/security_context_linux.go index 954d49c243..0aca61196d 100644 --- a/pkg/validate/security_context_linux.go +++ b/pkg/validate/security_context_linux.go @@ -25,6 +25,7 @@ import ( "os/exec" "path/filepath" "strings" + "sync" "syscall" "time" @@ -850,7 +851,12 @@ var _ = framework.KubeDescribe("Security Context", func() { Context("UserNamespaces", func() { var ( - podName string + podName string + + // We call rc.Status() once and save the result in statusResp. + statusOnce sync.Once + statusResp *runtimeapi.StatusResponse + defaultMapping = []*runtimeapi.IDMapping{{ ContainerId: 0, HostId: 1000, @@ -863,13 +869,21 @@ var _ = framework.KubeDescribe("Security Context", func() { // Find a working runtime handler if none provided By("searching for runtime handler which supports user namespaces") - ctx, cancel := context.WithTimeout(context.Background(), time.Minute) - defer cancel() - resp, err := rc.Status(ctx, true) // Set verbose to true so the info field is populated. - framework.ExpectNoError(err, "failed to get runtime config: %v", err) - - supportsUserNamespaces := false - for _, rh := range resp.GetRuntimeHandlers() { + statusOnce.Do(func() { + ctx, cancel := context.WithTimeout(context.Background(), time.Minute) + defer cancel() + // Set verbose to true, other BeforeEachs calls need the info field + // populated. + // XXX: Do NOT use ":=" here, it breaks the closure reference to + // statusResp. + var err error + statusResp, err = rc.Status(ctx, true) + framework.ExpectNoError(err, "failed to get runtime config: %v", err) + _ = statusResp // Avoid unused variable error + }) + + var supportsUserNamespaces bool + for _, rh := range statusResp.GetRuntimeHandlers() { if rh.GetName() == framework.TestContext.RuntimeHandler { if rh.GetFeatures().GetUserNamespaces() { supportsUserNamespaces = true @@ -877,94 +891,105 @@ var _ = framework.KubeDescribe("Security Context", func() { } } } - if !supportsUserNamespaces { Skip("no runtime handler found which supports user namespaces") } - - pathIDMap := rootfsPath(resp.GetInfo()) - if err := supportsIDMap(pathIDMap); err != nil { - Skip("ID mapping is not supported" + " with path: " + pathIDMap + ": " + err.Error()) - } }) - It("runtime should support NamespaceMode_POD", func() { - namespaceOption := &runtimeapi.NamespaceOption{ - UsernsOptions: &runtimeapi.UserNamespace{ - Mode: runtimeapi.NamespaceMode_POD, - Uids: defaultMapping, - Gids: defaultMapping, - }, - } + When("Host idmap mount support is needed", func() { + BeforeEach(func() { + pathIDMap := rootfsPath(statusResp.GetInfo()) + if err := supportsIDMap(pathIDMap); err != nil { + Skip("ID mapping is not supported" + " with path: " + pathIDMap + ": " + err.Error()) + } + }) + + It("runtime should support NamespaceMode_POD", func() { + namespaceOption := &runtimeapi.NamespaceOption{ + UsernsOptions: &runtimeapi.UserNamespace{ + Mode: runtimeapi.NamespaceMode_POD, + Uids: defaultMapping, + Gids: defaultMapping, + }, + } - hostLogPath, podLogPath := createLogTempDir(podName) - defer os.RemoveAll(hostLogPath) - podID, podConfig = createNamespacePodSandbox(rc, namespaceOption, podName, podLogPath) - containerName := runUserNamespaceContainer(rc, ic, podID, podConfig) + hostLogPath, podLogPath := createLogTempDir(podName) + defer os.RemoveAll(hostLogPath) + podID, podConfig = createNamespacePodSandbox(rc, namespaceOption, podName, podLogPath) + containerName := runUserNamespaceContainer(rc, ic, podID, podConfig) - matchContainerOutputRe(podConfig, containerName, `\s+0\s+1000\s+100000\n`) - }) + matchContainerOutputRe(podConfig, containerName, `\s+0\s+1000\s+100000\n`) + }) - It("runtime should support NamespaceMode_NODE", func() { - namespaceOption := &runtimeapi.NamespaceOption{ - UsernsOptions: &runtimeapi.UserNamespace{ - Mode: runtimeapi.NamespaceMode_NODE, - }, - } + }) - hostLogPath, podLogPath := createLogTempDir(podName) - defer os.RemoveAll(hostLogPath) - podID, podConfig = createNamespacePodSandbox(rc, namespaceOption, podName, podLogPath) - containerName := runUserNamespaceContainer(rc, ic, podID, podConfig) + When("Host idmap mount support is not needed", func() { + It("runtime should support NamespaceMode_NODE", func() { + namespaceOption := &runtimeapi.NamespaceOption{ + UsernsOptions: &runtimeapi.UserNamespace{ + Mode: runtimeapi.NamespaceMode_NODE, + }, + } - // 4294967295 means that the entire range is available - matchContainerOutputRe(podConfig, containerName, `\s+0\s+0\s+4294967295\n`) - }) + hostLogPath, podLogPath := createLogTempDir(podName) + defer os.RemoveAll(hostLogPath) + podID, podConfig = createNamespacePodSandbox(rc, namespaceOption, podName, podLogPath) + containerName := runUserNamespaceContainer(rc, ic, podID, podConfig) - It("runtime should fail if more than one mapping provided", func() { - wrongMapping := []*runtimeapi.IDMapping{{ - ContainerId: 0, - HostId: 1000, - Length: 100000, - }, { - ContainerId: 0, - HostId: 2000, - Length: 100000, - }} - usernsOptions := &runtimeapi.UserNamespace{ - Mode: runtimeapi.NamespaceMode_POD, - Uids: wrongMapping, - Gids: wrongMapping, - } + // 4294967295 means that the entire range is available + expectedOutput := hostUsernsContent() + if expectedOutput == "" { + Fail("failed to get host userns content") + } + // 4294967295 means that the entire range is available + matchContainerOutputRe(podConfig, containerName, `\s+0\s+0\s+4294967295\n`) + }) + + It("runtime should fail if more than one mapping provided", func() { + wrongMapping := []*runtimeapi.IDMapping{{ + ContainerId: 0, + HostId: 1000, + Length: 100000, + }, { + ContainerId: 0, + HostId: 2000, + Length: 100000, + }} + usernsOptions := &runtimeapi.UserNamespace{ + Mode: runtimeapi.NamespaceMode_POD, + Uids: wrongMapping, + Gids: wrongMapping, + } - runUserNamespacePodWithError(rc, podName, usernsOptions) - }) + runUserNamespacePodWithError(rc, podName, usernsOptions) + }) - It("runtime should fail if container ID 0 is not mapped", func() { - mapping := []*runtimeapi.IDMapping{{ - ContainerId: 1, - HostId: 1000, - Length: 100000, - }} - usernsOptions := &runtimeapi.UserNamespace{ - Mode: runtimeapi.NamespaceMode_POD, - Uids: mapping, - Gids: mapping, - } + It("runtime should fail if container ID 0 is not mapped", func() { + mapping := []*runtimeapi.IDMapping{{ + ContainerId: 1, + HostId: 1000, + Length: 100000, + }} + usernsOptions := &runtimeapi.UserNamespace{ + Mode: runtimeapi.NamespaceMode_POD, + Uids: mapping, + Gids: mapping, + } - runUserNamespacePodWithError(rc, podName, usernsOptions) - }) + runUserNamespacePodWithError(rc, podName, usernsOptions) + }) - It("runtime should fail with NamespaceMode_CONTAINER", func() { - usernsOptions := &runtimeapi.UserNamespace{Mode: runtimeapi.NamespaceMode_CONTAINER} + It("runtime should fail with NamespaceMode_CONTAINER", func() { + usernsOptions := &runtimeapi.UserNamespace{Mode: runtimeapi.NamespaceMode_CONTAINER} - runUserNamespacePodWithError(rc, podName, usernsOptions) - }) + runUserNamespacePodWithError(rc, podName, usernsOptions) + }) - It("runtime should fail with NamespaceMode_TARGET", func() { - usernsOptions := &runtimeapi.UserNamespace{Mode: runtimeapi.NamespaceMode_TARGET} + It("runtime should fail with NamespaceMode_TARGET", func() { + usernsOptions := &runtimeapi.UserNamespace{Mode: runtimeapi.NamespaceMode_TARGET} - runUserNamespacePodWithError(rc, podName, usernsOptions) + runUserNamespacePodWithError(rc, podName, usernsOptions) + }) }) }) }) From c81525dac803c22b9a3fe55b9cd449b85208e923 Mon Sep 17 00:00:00 2001 From: Rodrigo Campos Date: Mon, 8 Jul 2024 17:53:35 +0200 Subject: [PATCH 3/3] userns: Fix running tests inside a userns containerd creates a userns and inside there, it runs the critest tool. However, in that setup, the length of containerd's userns is not the whole UID space. Let's verify that the length of the userns inside the pod, when we created it with NamespaceMode_NODE (IOW, when not using a new userns for the pod) is the same as outside the pod. This works fine when contained itself runs inside a userns. Signed-off-by: Rodrigo Campos --- pkg/validate/security_context_linux.go | 44 ++++++++++++++++++++++++-- 1 file changed, 41 insertions(+), 3 deletions(-) diff --git a/pkg/validate/security_context_linux.go b/pkg/validate/security_context_linux.go index 0aca61196d..2bc3e45d66 100644 --- a/pkg/validate/security_context_linux.go +++ b/pkg/validate/security_context_linux.go @@ -24,6 +24,7 @@ import ( "os" "os/exec" "path/filepath" + "slices" "strings" "sync" "syscall" @@ -936,13 +937,29 @@ var _ = framework.KubeDescribe("Security Context", func() { podID, podConfig = createNamespacePodSandbox(rc, namespaceOption, podName, podLogPath) containerName := runUserNamespaceContainer(rc, ic, podID, podConfig) - // 4294967295 means that the entire range is available + // If this test is run inside a userns, we need to check the + // container userns is the same as the one we see outside. expectedOutput := hostUsernsContent() if expectedOutput == "" { Fail("failed to get host userns content") } - // 4294967295 means that the entire range is available - matchContainerOutputRe(podConfig, containerName, `\s+0\s+0\s+4294967295\n`) + // The userns mapping can have several lines, we match each of them. + for _, line := range strings.Split(expectedOutput, "\n") { + if line == "" { + continue + } + mapping := parseUsernsMappingLine(line) + if len(mapping) != 3 { + msg := fmt.Sprintf("slice: %#v, len: %v", mapping, len(mapping)) + Fail("Unexpected host mapping line: " + msg) + } + + // The container outputs the content of its /proc/self/uid_map. + // That output should match the regex of the host userns content. + containerId, hostId, length := mapping[0], mapping[1], mapping[2] + regex := fmt.Sprintf(`\s+%v\s+%v\s+%v`, containerId, hostId, length) + matchContainerOutputRe(podConfig, containerName, regex) + } }) It("runtime should fail if more than one mapping provided", func() { @@ -1564,3 +1581,24 @@ func rootfsPath(info map[string]string) string { // always exist. return filepath.Join(cfg.StateDir, "../") } + +func hostUsernsContent() string { + uidMapPath := "/proc/self/uid_map" + uidMapContent, err := os.ReadFile(uidMapPath) + if err != nil { + return "" + } + return string(uidMapContent) +} + +func parseUsernsMappingLine(line string) []string { + // The line format is: + // + // But there could be a lot of spaces between the fields. + line = strings.TrimSpace(line) + m := strings.Split(line, " ") + m = slices.DeleteFunc(m, func(s string) bool { + return s == "" + }) + return m +}