From eeea2437e816f9b4fc5167cb80ceb9a653c277a3 Mon Sep 17 00:00:00 2001 From: Geon Kim Date: Mon, 2 Dec 2024 18:19:06 +0900 Subject: [PATCH] feat(memlimit): replace containerd/cgroup with own cgroup implementation --- go.mod | 9 - go.sum | 57 ------ memlimit/cgroups_linux.go | 387 +++++++++++++++++++++++++++++++++----- 3 files changed, 338 insertions(+), 115 deletions(-) diff --git a/go.mod b/go.mod index 57ec15c..9f8f223 100644 --- a/go.mod +++ b/go.mod @@ -10,15 +10,6 @@ require ( ) require ( - github.com/cilium/ebpf v0.16.0 // indirect - github.com/containerd/log v0.1.0 // indirect - github.com/coreos/go-systemd/v22 v22.5.0 // indirect - github.com/docker/go-units v0.5.0 // indirect - github.com/godbus/dbus/v5 v5.1.0 // indirect github.com/moby/sys/userns v0.1.0 // indirect - github.com/opencontainers/runtime-spec v1.2.0 // indirect - github.com/sirupsen/logrus v1.9.3 // indirect - golang.org/x/exp v0.0.0-20241108190413-2d47ceb2692f // indirect golang.org/x/sys v0.27.0 // indirect - google.golang.org/protobuf v1.35.2 // indirect ) diff --git a/go.sum b/go.sum index bbe612a..4aeace7 100644 --- a/go.sum +++ b/go.sum @@ -1,65 +1,8 @@ -github.com/cilium/ebpf v0.16.0 h1:+BiEnHL6Z7lXnlGUsXQPPAE7+kenAd4ES8MQ5min0Ok= -github.com/cilium/ebpf v0.16.0/go.mod h1:L7u2Blt2jMM/vLAVgjxluxtBKlz3/GWjB0dMOEngfwE= github.com/containerd/cgroups/v3 v3.0.4 h1:2fs7l3P0Qxb1nKWuJNFiwhp2CqiKzho71DQkDrHJIo4= github.com/containerd/cgroups/v3 v3.0.4/go.mod h1:SA5DLYnXO8pTGYiAHXz94qvLQTKfVM5GEVisn4jpins= -github.com/containerd/log v0.1.0 h1:TCJt7ioM2cr/tfR8GPbGf9/VRAX8D2B4PjzCpfX540I= -github.com/containerd/log v0.1.0/go.mod h1:VRRf09a7mHDIRezVKTRCrOq78v577GXq3bSa3EhrzVo= -github.com/coreos/go-systemd/v22 v22.5.0 h1:RrqgGjYQKalulkV8NGVIfkXQf6YYmOyiJKk8iXXhfZs= -github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc= -github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= -github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= -github.com/docker/go-units v0.5.0 h1:69rxXcBk27SvSaaxTtLh/8llcHD8vYHT7WSdRZ/jvr4= -github.com/docker/go-units v0.5.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk= -github.com/go-quicktest/qt v1.101.0 h1:O1K29Txy5P2OK0dGo59b7b0LR6wKfIhttaAhHUyn7eI= -github.com/go-quicktest/qt v1.101.0/go.mod h1:14Bz/f7NwaXPtdYEgzsx46kqSxVwTbzVZsDC26tQJow= -github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= -github.com/godbus/dbus/v5 v5.1.0 h1:4KLkAxT3aOY8Li4FRJe/KvhoNFFxo0m6fNuFUO8QJUk= -github.com/godbus/dbus/v5 v5.1.0/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA= -github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= -github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= -github.com/josharian/native v1.1.0 h1:uuaP0hAbW7Y4l0ZRQ6C9zfb7Mg1mbFKry/xzDAfmtLA= -github.com/josharian/native v1.1.0/go.mod h1:7X/raswPFr05uY3HiLlYeyQntB6OO7E/d2Cu7qoaN2w= -github.com/jsimonetti/rtnetlink/v2 v2.0.1 h1:xda7qaHDSVOsADNouv7ukSuicKZO7GgVUCXxpaIEIlM= -github.com/jsimonetti/rtnetlink/v2 v2.0.1/go.mod h1:7MoNYNbb3UaDHtF8udiJo/RH6VsTKP1pqKLUTVCvToE= -github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= -github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= -github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= -github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= -github.com/mdlayher/netlink v1.7.2 h1:/UtM3ofJap7Vl4QWCPDGXY8d3GIY2UGSDbK+QWmY8/g= -github.com/mdlayher/netlink v1.7.2/go.mod h1:xraEF7uJbxLhc5fpHL4cPe221LI2bdttWlU+ZGLfQSw= -github.com/mdlayher/socket v0.4.1 h1:eM9y2/jlbs1M615oshPQOHZzj6R6wMT7bX5NPiQvn2U= -github.com/mdlayher/socket v0.4.1/go.mod h1:cAqeGjoufqdxWkD7DkpyS+wcefOtmu5OQ8KuoJGIReA= github.com/moby/sys/userns v0.1.0 h1:tVLXkFOxVu9A64/yh59slHVv9ahO9UIev4JZusOLG/g= github.com/moby/sys/userns v0.1.0/go.mod h1:IHUYgu/kao6N8YZlp9Cf444ySSvCmDlmzUcYfDHOl28= -github.com/opencontainers/runtime-spec v1.2.0 h1:z97+pHb3uELt/yiAWD691HNHQIF07bE7dzrbT927iTk= -github.com/opencontainers/runtime-spec v1.2.0/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0= github.com/pbnjay/memory v0.0.0-20210728143218-7b4eea64cf58 h1:onHthvaw9LFnH4t2DcNVpwGmV9E1BkGknEliJkfwQj0= github.com/pbnjay/memory v0.0.0-20210728143218-7b4eea64cf58/go.mod h1:DXv8WO4yhMYhSNPKjeNKa5WY9YCIEBRbNzFFPJbWO6Y= -github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= -github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= -github.com/rogpeppe/go-internal v1.11.0 h1:cWPaGQEPrBb5/AsnsZesgZZ9yb1OQ+GOISoDNXVBh4M= -github.com/rogpeppe/go-internal v1.11.0/go.mod h1:ddIwULY96R17DhadqLgMfk9H9tvdUzkipdSkR5nkCZA= -github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ= -github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ= -github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= -github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= -github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= -github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= -go.uber.org/goleak v1.1.12 h1:gZAh5/EyT/HQwlpkCy6wTpqfH9H8Lz8zbm3dZh+OyzA= -go.uber.org/goleak v1.1.12/go.mod h1:cwTWslyiVhfpKIDGSZEM2HlOvcqm+tG4zioyIeLoqMQ= -golang.org/x/exp v0.0.0-20241108190413-2d47ceb2692f h1:XdNn9LlyWAhLVp6P/i8QYBW+hlyhrhei9uErw2B5GJo= -golang.org/x/exp v0.0.0-20241108190413-2d47ceb2692f/go.mod h1:D5SMRVC3C2/4+F/DB1wZsLRnSNimn2Sp/NPsCrsv8ak= -golang.org/x/net v0.23.0 h1:7EYJ93RZ9vYSZAIb2x3lnuvqO5zneoD6IvWjuhfxjTs= -golang.org/x/net v0.23.0/go.mod h1:JKghWKKOSdJwpW2GEx0Ja7fmaKnMsbu+MWVZTokSYmg= -golang.org/x/sync v0.9.0 h1:fEo0HyrW1GIgZdpbhCRO0PkJajUS5H9IFUztCgEo2jQ= -golang.org/x/sync v0.9.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= -golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.27.0 h1:wBqf8DvsY9Y/2P8gAfPDEYNuS30J4lPHJxXSb/nJZ+s= golang.org/x/sys v0.27.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= -google.golang.org/protobuf v1.35.2 h1:8Ar7bF+apOIoThw1EdZl0p1oWvMqTHmpA2fRTyZO8io= -google.golang.org/protobuf v1.35.2/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE= -gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= -gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= -gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= -gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/memlimit/cgroups_linux.go b/memlimit/cgroups_linux.go index 98a9548..93cbc6c 100644 --- a/memlimit/cgroups_linux.go +++ b/memlimit/cgroups_linux.go @@ -4,51 +4,175 @@ package memlimit import ( + "bufio" + "errors" + "fmt" + "io" "math" "os" "path/filepath" - - "github.com/containerd/cgroups/v3" - "github.com/containerd/cgroups/v3/cgroup1" - "github.com/containerd/cgroups/v3/cgroup2" -) - -const ( - cgroupMountPoint = "/sys/fs/cgroup" + "slices" + "strconv" + "strings" ) -// FromCgroup returns the memory limit based on the cgroups version on this system. +// GetMemoryLimit retrieves the memory limit for the current cgroup, supporting: +// - cgroup v1 +// - cgroup v2 +// - Hybrid mode (fallback to v1 if v2 fails) func FromCgroup() (uint64, error) { - switch cgroups.Mode() { - case cgroups.Legacy: - return FromCgroupV1() - case cgroups.Hybrid: - return FromCgroupHybrid() - case cgroups.Unified: - return FromCgroupV2() - } - return 0, ErrNoCgroup + return fromCgroup(detectCgroupVersion) } -// FromCgroupV1 returns the memory limit from the cgroup v1. func FromCgroupV1() (uint64, error) { - cg, err := cgroup1.Load(cgroup1.RootPath, cgroup1.WithHiearchy( - cgroup1.SingleSubsystem(cgroup1.Default, cgroup1.Memory), - )) + return fromCgroup(func(_ []mountInfo) (bool, bool) { + return true, false + }) +} + +func FromCgroupHybrid() (uint64, error) { + return FromCgroup() +} + +func FromCgroupV2() (uint64, error) { + return fromCgroup(func(_ []mountInfo) (bool, bool) { + return false, true + }) +} + +func fromCgroup(versionDetector func(mis []mountInfo) (bool, bool)) (uint64, error) { + mf, err := os.Open("/proc/self/mountinfo") + if err != nil { + return 0, fmt.Errorf("failed to open /proc/self/mountinfo: %w", err) + } + defer mf.Close() + + mis, err := parseMountInfo(mf) + if err != nil { + return 0, fmt.Errorf("failed to parse mountinfo: %w", err) + } + + v1, v2 := versionDetector(mis) + if !(v1 || v2) { + return 0, ErrNoCgroup + } + + cf, err := os.Open("/proc/self/cgroup") + if err != nil { + return 0, fmt.Errorf("failed to open /proc/self/cgroup: %w", err) + } + defer cf.Close() + + chs, err := parseCgroupFile(cf) + if err != nil { + return 0, fmt.Errorf("failed to parse cgroup file: %w", err) + } + + if v2 { + limit, err := getMemoryLimitV2(chs, mis) + if err == nil { + return limit, nil + } else if !v1 { + return 0, err + } + } + + return getMemoryLimitV1(chs, mis) +} + +func detectCgroupVersion(mis []mountInfo) (bool, bool) { + var v1, v2 bool + for _, mi := range mis { + switch mi.FilesystemType { + case "cgroup": + v1 = true + case "cgroup2": + v2 = true + } + } + return v1, v2 +} + +// getMemoryLimitV2 retrieves the memory limit for cgroup v2. +func getMemoryLimitV2(chs []cgroupHierarchy, mis []mountInfo) (uint64, error) { + idx := slices.IndexFunc(chs, func(ch cgroupHierarchy) bool { + return ch.HierarchyID == "0" && ch.ControllerList == "" + }) + if idx == -1 { + return 0, errors.New("cgroup v2 path not found") + } + relPath := chs[idx].CgroupPath + + idx = slices.IndexFunc(mis, func(mi mountInfo) bool { + return mi.FilesystemType == "cgroup2" + }) + if idx == -1 { + return 0, errors.New("cgroup v2 mountpoint not found") + } + root, mountPoint := mis[idx].Root, mis[idx].MountPoint + + // Resolve the actual cgroup path + cgroupPath, err := resolveCgroupPath(mountPoint, root, relPath) if err != nil { return 0, err } - metrics, err := cg.Stat(cgroup1.IgnoreNotExist) + // Construct the path to memory.max + memoryMaxPath := filepath.Join(cgroupPath, "memory.max") + + // Read the memory limit from memory.max + return readMemoryLimitV2FromPath(memoryMaxPath) +} + +// getMemoryLimitV1 retrieves the memory limit for cgroup v1. +func getMemoryLimitV1(chs []cgroupHierarchy, mis []mountInfo) (uint64, error) { + idx := slices.IndexFunc(chs, func(ch cgroupHierarchy) bool { + return slices.Contains(strings.Split(ch.ControllerList, ","), "memory") + }) + if idx == -1 { + return 0, errors.New("cgroup v1 path for memory controller not found") + } + relPath := chs[idx].CgroupPath + + idx = slices.IndexFunc(mis, func(mi mountInfo) bool { + return mi.FilesystemType == "cgroup" && slices.Contains(strings.Split(mi.SuperOptions, ","), "memory") + }) + if idx == -1 { + return 0, errors.New("cgroup v1 mountpoint for memory controller not found") + } + root, mountPoint := mis[idx].Root, mis[idx].MountPoint + + // Resolve the actual cgroup path + cgroupPath, err := resolveCgroupPath(mountPoint, root, relPath) if err != nil { return 0, err } - if limit := metrics.GetMemory().GetHierarchicalMemoryLimit(); limit != 0 && limit != getCgroupV1NoLimit() { - return limit, nil + // Retrieve the memory limit + return readMemoryLimitV1FromPath(cgroupPath) +} + +// readMemoryLimitV2FromPath reads the memory limit from the memory.max file for cgroup v2. +func readMemoryLimitV2FromPath(path string) (uint64, error) { + data, err := os.ReadFile(path) + if err != nil { + if errors.Is(err, os.ErrNotExist) { + return 0, ErrNoLimit + } + return 0, fmt.Errorf("failed to read memory.max: %w", err) + } + + trimmed := strings.TrimSpace(string(data)) + if trimmed == "max" { + return 0, ErrNoLimit + } + + value, err := strconv.ParseUint(trimmed, 10, 64) + if err != nil { + return 0, fmt.Errorf("failed to parse memory.max value: %w", err) } - return 0, ErrNoLimit + return value, nil } func getCgroupV1NoLimit() uint64 { @@ -56,43 +180,208 @@ func getCgroupV1NoLimit() uint64 { return math.MaxInt64 / ps * ps } -// FromCgroupHybrid returns the memory limit from the cgroup v1 or v2. -// It checks the cgroup v2 first, and if it fails, it falls back to cgroup v1. -func FromCgroupHybrid() (uint64, error) { - limit, err := fromCgroupV2(filepath.Join(cgroupMountPoint, "unified")) - if err == nil { - return limit, nil - } else if err != ErrNoLimit { - return 0, err +// readMemoryLimitV1FromPath reads the memory limit for cgroup v1 from the given path. +func readMemoryLimitV1FromPath(cgroupPath string) (uint64, error) { + hml, err := readHierarchicalMemoryLimit(filepath.Join(cgroupPath, "memory.stats")) + if err != nil && !errors.Is(err, os.ErrNotExist) { + return 0, fmt.Errorf("failed to read hierarchical_memory_limit: %w", err) + } else if hml == 0 { + hml = math.MaxUint64 } - return FromCgroupV1() -} + b, err := os.ReadFile(filepath.Join(cgroupPath, "memory.limit_in_bytes")) + if err != nil && !errors.Is(err, os.ErrNotExist) { + return 0, fmt.Errorf("failed to read memory.limit_in_bytes: %w", err) + } + lib, err := strconv.ParseUint(string(b), 10, 64) + if err != nil { + return 0, fmt.Errorf("failed to parse memory.limit_in_bytes value: %w", err) + } else if lib == 0 { + hml = math.MaxUint64 + } -// FromCgroupV2 returns the memory limit from the cgroup v2. -func FromCgroupV2() (uint64, error) { - return fromCgroupV2(cgroupMountPoint) + limit := min(hml, lib) + if limit >= getCgroupV1NoLimit() { + return 0, ErrNoLimit + } + + return limit, nil } -func fromCgroupV2(mountPoint string) (uint64, error) { - path, err := cgroup2.NestedGroupPath("") +// readHierarchicalMemoryLimit extracts hierarchical_memory_limit from memory.stats for cgroup v1. +func readHierarchicalMemoryLimit(statPath string) (uint64, error) { + file, err := os.Open(statPath) if err != nil { return 0, err } + defer file.Close() - m, err := cgroup2.Load(path, cgroup2.WithMountpoint(mountPoint)) - if err != nil { + scanner := bufio.NewScanner(file) + for scanner.Scan() { + line := scanner.Text() + + fields := strings.Fields(line) + if len(fields) < 2 { + return 0, fmt.Errorf("failed to parse memory.stat %q: not enough fields", line) + } + + if fields[0] == "hierarchical_memory_limit" { + if len(fields) > 2 { + return 0, fmt.Errorf("failed to parse memory.stat %q: too many fields for hierarchical_memory_limit", line) + } + return strconv.ParseUint(fields[1], 10, 64) + } + } + if err := scanner.Err(); err != nil { return 0, err } - stats, err := m.Stat() - if err != nil { - return 0, err + return 0, nil +} + +// https://www.man7.org/linux/man-pages/man5/proc_pid_mountinfo.5.html +// 731 771 0:59 /sysrq-trigger /proc/sysrq-trigger ro,nosuid,nodev,noexec,relatime - proc proc rw +// +// 36 35 98:0 /mnt1 /mnt2 rw,noatime master:1 - ext3 /dev/root rw,errors=continue +// (1)(2)(3) (4) (5) (6) (7) (8) (9) (10) (11) +// +// (1) mount ID: a unique ID for the mount (may be reused after umount(2)). +// (2) parent ID: the ID of the parent mount (or of self for the root of this mount namespace's mount tree). +// (3) major:minor: the value of st_dev for files on this filesystem (see stat(2)). +// (4) root: the pathname of the directory in the filesystem which forms the root of this mount. +// (5) mount point: the pathname of the mount point relative to the process's root directory. +// (6) mount options: per-mount options (see mount(2)). +// (7) optional fields: zero or more fields of the form "tag[:value]"; see below. +// (8) separator: the end of the optional fields is marked by a single hyphen. +// (9) filesystem type: the filesystem type in the form "type[.subtype]". +// (10) mount source: filesystem-specific information or "none". +// (11) super options: per-superblock options (see mount(2)). +type mountInfo struct { + Root string + MountPoint string + FilesystemType string + SuperOptions string +} + +func parseMountInfo(r io.Reader) ([]mountInfo, error) { + var ( + s = bufio.NewScanner(r) + mis []mountInfo + ) + for s.Scan() { + line := s.Text() + + fieldss := strings.SplitN(line, " - ", 2) + if len(fieldss) != 2 { + return nil, fmt.Errorf("failed to parse mountinfo %q: invalid separator", line) + } + + fields1 := strings.Split(fieldss[0], " ") + if len(fields1) < 6 { + return nil, fmt.Errorf("failed to parse mountinfo %q: not enough fields1 %v", line, fields1) + } else if len(fields1) > 7 { + return nil, fmt.Errorf("failed to parse mountinfo %q: too many fields", line) + } else if len(fields1) == 6 { + fields1 = append(fields1, "") + } + + fields2 := strings.Split(fieldss[1], " ") + if len(fields2) < 3 { + return nil, fmt.Errorf("failed to parse mountinfo %q: not enough fields2 %v", line, fields2) + } else if len(fields2) > 3 { + return nil, fmt.Errorf("failed to parse mountinfo %q: too many fields", line) + } + + mis = append(mis, mountInfo{ + Root: fields1[3], + MountPoint: fields1[4], + FilesystemType: fields2[0], + SuperOptions: fields2[2], + }) } + if err := s.Err(); err != nil { + return nil, err + } + + return mis, nil +} + +// https://www.man7.org/linux/man-pages/man7/cgroups.7.html +// +// 5:cpuacct,cpu,cpuset:/daemons +// (1) (2) (3) +// +// (1) hierarchy ID: +// +// cgroups version 1 hierarchies, this field +// contains a unique hierarchy ID number that can be +// matched to a hierarchy ID in /proc/cgroups. For the +// cgroups version 2 hierarchy, this field contains the +// value 0. +// +// (2) controller list: +// +// For cgroups version 1 hierarchies, this field +// contains a comma-separated list of the controllers +// bound to the hierarchy. For the cgroups version 2 +// hierarchy, this field is empty. +// +// (3) cgroup path: +// +// This field contains the pathname of the control group +// in the hierarchy to which the process belongs. This +// pathname is relative to the mount point of the +// hierarchy. +type cgroupHierarchy struct { + HierarchyID string + ControllerList string + CgroupPath string +} + +func parseCgroupFile(r io.Reader) ([]cgroupHierarchy, error) { + var ( + s = bufio.NewScanner(r) + chs []cgroupHierarchy + ) + for s.Scan() { + line := s.Text() + + fields := strings.Split(line, ":") + if len(fields) != 3 { + return nil, fmt.Errorf("failed to parse cgroup file %q: invalid separator", line) + } + + chs = append(chs, cgroupHierarchy{ + HierarchyID: fields[0], + ControllerList: fields[1], + CgroupPath: fields[2], + }) + } + if err := s.Err(); err != nil { + return nil, err + } + + return chs, nil +} + +func resolveCgroupPath(mountpoint, root, cgroupRelPath string) (string, error) { + root = filepath.Clean(strings.TrimPrefix(root, "/")) + cgroupRelPath = filepath.Clean(strings.TrimPrefix(cgroupRelPath, "/")) + + if root == cgroupRelPath || (root == "." && cgroupRelPath == ".") { + return mountpoint, nil + } + + if strings.HasPrefix(cgroupRelPath, root) { + relativePath := strings.TrimPrefix(cgroupRelPath, root) + finalPath := filepath.Join(mountpoint, relativePath) + + if _, err := os.Stat(finalPath); os.IsNotExist(err) { + return "", fmt.Errorf("resolved cgroup path does not exist: %s", finalPath) + } - if limit := stats.GetMemory().GetUsageLimit(); limit != 0 && limit != math.MaxUint64 { - return limit, nil + return finalPath, nil } - return 0, ErrNoLimit + return "", fmt.Errorf("invalid cgroup path: %s is not under root %s", cgroupRelPath, root) }