Skip to content

Commit

Permalink
cgroup v2: support rootless systemd
Browse files Browse the repository at this point in the history
Tested with both Podman (master) and Moby (master), on Ubuntu 19.10 .

$ podman --cgroup-manager=systemd run -it --rm --runtime=runc \
  --cgroupns=host --memory 42m --cpus 0.42 --pids-limit 42 alpine
/ # cat /proc/self/cgroup
0::/user.slice/user-1001.slice/[email protected]/user.slice/libpod-132ff0d72245e6f13a3bbc6cdc5376886897b60ac59eaa8dea1df7ab959cbf1c.scope
/ # cat /sys/fs/cgroup/user.slice/user-1001.slice/[email protected]/user.slice/libpod-132ff0d72245e6f13a3bbc6cdc5376886897b60ac59eaa8dea1df7ab959cbf1c.scope/memory.max
44040192
/ # cat /sys/fs/cgroup/user.slice/user-1001.slice/[email protected]/user.slice/libpod-132ff0d72245e6f13a3bbc6cdc5376886897b60ac59eaa8dea1df7ab959cbf1c.scope/cpu.max
42000 100000
/ # cat /sys/fs/cgroup/user.slice/user-1001.slice/[email protected]/user.slice/libpod-132ff0d72245e6f13a3bbc6cdc5376886897b60ac59eaa8dea1df7ab959cbf1c.scope/pids.max
42

Signed-off-by: Akihiro Suda <[email protected]>
  • Loading branch information
AkihiroSuda committed May 8, 2020
1 parent 492cfd8 commit bf15cc9
Show file tree
Hide file tree
Showing 13 changed files with 276 additions and 43 deletions.
4 changes: 3 additions & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,9 @@ matrix:
- sudo ssh default -t 'cd /vagrant && sudo make localintegration RUNC_USE_SYSTEMD=yes'
# same setup but with fs2 driver instead of systemd
- sudo ssh default -t 'cd /vagrant && sudo make localintegration'
# rootless
# cgroupv2+systemd (rootless)
- sudo ssh default -t 'cd /vagrant && sudo make localrootlessintegration RUNC_USE_SYSTEMD=yes'
# same setup but with fs2 driver (rootless) instead of systemd
- sudo ssh default -t 'cd /vagrant && sudo make localrootlessintegration'
allow_failures:
- go: tip
Expand Down
16 changes: 16 additions & 0 deletions Vagrantfile
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,25 @@ EOF
# Add a user for rootless tests
useradd -u2000 -m -d/home/rootless -s/bin/bash rootless
# Allow root to execute `ssh rootless@localhost` in tests/rootless.sh
ssh-keygen -t ecdsa -N "" -f /root/rootless.key
mkdir -m 0700 -p /home/rootless/.ssh
cat /root/rootless.key.pub >> /home/rootless/.ssh/authorized_keys
chown -R rootless.rootless /home/rootless
# Add busybox for libcontainer/integration tests
. /vagrant/tests/integration/multi-arch.bash \
&& mkdir /busybox \
&& curl -fsSL $(get_busybox) | tar xfJC - /busybox
# Delegate cgroup v2 controllers to rootless user via --systemd-cgroup
mkdir -p /etc/systemd/system/[email protected]
cat > /etc/systemd/system/[email protected]/delegate.conf << EOF
[Service]
# default: Delegate=pids memory
# NOTE: delegation of cpuset requires systemd >= 244 (Fedora >= 32, Ubuntu >= 20.04). cpuset is ignored on Fedora 31.
Delegate=cpu cpuset io memory pids
EOF
systemctl daemon-reload
SHELL
end
22 changes: 8 additions & 14 deletions libcontainer/cgroups/systemd/common.go
Original file line number Diff line number Diff line change
Expand Up @@ -71,9 +71,13 @@ func ExpandSlice(slice string) (string, error) {

// getDbusConnection lazy initializes systemd dbus connection
// and returns it
func getDbusConnection() (*systemdDbus.Conn, error) {
func getDbusConnection(rootless bool) (*systemdDbus.Conn, error) {
connOnce.Do(func() {
connDbus, connErr = systemdDbus.New()
if rootless {
connDbus, connErr = NewUserSystemdDbus()
} else {
connDbus, connErr = systemdDbus.New()
}
})
return connDbus, connErr
}
Expand Down Expand Up @@ -103,12 +107,7 @@ func isUnitExists(err error) bool {
return false
}

func startUnit(unitName string, properties []systemdDbus.Property) error {
dbusConnection, err := getDbusConnection()
if err != nil {
return err
}

func startUnit(dbusConnection *systemdDbus.Conn, unitName string, properties []systemdDbus.Property) error {
statusChan := make(chan string, 1)
if _, err := dbusConnection.StartTransientUnit(unitName, "replace", properties, statusChan); err == nil {
select {
Expand All @@ -129,12 +128,7 @@ func startUnit(unitName string, properties []systemdDbus.Property) error {
return nil
}

func stopUnit(unitName string) error {
dbusConnection, err := getDbusConnection()
if err != nil {
return err
}

func stopUnit(dbusConnection *systemdDbus.Conn, unitName string) error {
statusChan := make(chan string, 1)
if _, err := dbusConnection.StopUnit(unitName, "replace", statusChan); err == nil {
select {
Expand Down
106 changes: 106 additions & 0 deletions libcontainer/cgroups/systemd/user.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
// +build linux

package systemd

import (
"bufio"
"bytes"
"os"
"os/exec"
"path/filepath"
"strconv"
"strings"

systemdDbus "github.com/coreos/go-systemd/v22/dbus"
dbus "github.com/godbus/dbus/v5"
"github.com/opencontainers/runc/libcontainer/system"
"github.com/pkg/errors"
)

// NewUserSystemdDbus creates a connection for systemd user-instance.
func NewUserSystemdDbus() (*systemdDbus.Conn, error) {
addr, err := DetectUserDbusSessionBusAddress()
if err != nil {
return nil, err
}
uid, err := DetectUID()
if err != nil {
return nil, err
}

return systemdDbus.NewConnection(func() (*dbus.Conn, error) {
conn, err := dbus.Dial(addr)
if err != nil {
return nil, errors.Wrapf(err, "error while dialing %q", addr)
}
methods := []dbus.Auth{dbus.AuthExternal(strconv.Itoa(uid))}
err = conn.Auth(methods)
if err != nil {
conn.Close()
return nil, errors.Wrapf(err, "error while authenticating connection, address=%q, UID=%d", addr, uid)
}
if err = conn.Hello(); err != nil {
conn.Close()
return nil, errors.Wrapf(err, "error while sending Hello message, address=%q, UID=%d", addr, uid)
}
return conn, nil
})
}

// DetectUID detects UID from the OwnerUID field of `busctl --user status`
// if running in userNS. The value corresponds to sd_bus_creds_get_owner_uid(3) .
//
// Otherwise returns os.Getuid() .
func DetectUID() (int, error) {
if !system.RunningInUserNS() {
return os.Getuid(), nil
}
b, err := exec.Command("busctl", "--user", "--no-pager", "status").CombinedOutput()
if err != nil {
return -1, errors.Wrap(err, "could not execute `busctl --user --no-pager status`")
}
scanner := bufio.NewScanner(bytes.NewReader(b))
for scanner.Scan() {
s := strings.TrimSpace(scanner.Text())
if strings.HasPrefix(s, "OwnerUID=") {
uidStr := strings.TrimPrefix(s, "OwnerUID=")
i, err := strconv.Atoi(uidStr)
if err != nil {
return -1, errors.Wrapf(err, "could not detect the OwnerUID: %s", s)
}
return i, nil
}
}
if err := scanner.Err(); err != nil {
return -1, err
}
return -1, errors.New("could not detect the OwnerUID")
}

// DetectUserDbusSessionBusAddress returns $DBUS_SESSION_BUS_ADDRESS if set.
// Otherwise returns "unix:path=$XDG_RUNTIME_DIR/bus" if $XDG_RUNTIME_DIR/bus exists.
// Otherwise parses the value from `systemctl --user show-environment` .
func DetectUserDbusSessionBusAddress() (string, error) {
if env := os.Getenv("DBUS_SESSION_BUS_ADDRESS"); env != "" {
return env, nil
}
if xdr := os.Getenv("XDG_RUNTIME_DIR"); xdr != "" {
busPath := filepath.Join(xdr, "bus")
if _, err := os.Stat(busPath); err == nil {
busAddress := "unix:path=" + busPath
return busAddress, nil
}
}
b, err := exec.Command("systemctl", "--user", "--no-pager", "show-environment").CombinedOutput()
if err != nil {
return "", errors.Wrapf(err, "could not execute `systemctl --user --no-pager show-environment`, output=%q", string(b))
}
scanner := bufio.NewScanner(bytes.NewReader(b))
for scanner.Scan() {
s := strings.TrimSpace(scanner.Text())
if strings.HasPrefix(s, "DBUS_SESSION_BUS_ADDRESS=") {
return strings.TrimPrefix(s, "DBUS_SESSION_BUS_ADDRESS="), nil
}
}
return "", errors.New("could not detect DBUS_SESSION_BUS_ADDRESS from `systemctl --user --no-pager show-environment`")
}
14 changes: 11 additions & 3 deletions libcontainer/cgroups/systemd/v1.go
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,11 @@ func (m *LegacyManager) Apply(pid int) error {
properties = append(properties, resourcesProperties...)
properties = append(properties, c.SystemdProps...)

if err := startUnit(unitName, properties); err != nil {
dbusConnection, err := getDbusConnection(false)
if err != nil {
return err
}
if err := startUnit(dbusConnection, unitName, properties); err != nil {
return err
}

Expand Down Expand Up @@ -213,8 +217,12 @@ func (m *LegacyManager) Destroy() error {
m.mu.Lock()
defer m.mu.Unlock()

dbusConnection, err := getDbusConnection(false)
if err != nil {
return err
}
unitName := getUnitName(m.Cgroups)
if err := stopUnit(unitName); err != nil {
if err := stopUnit(dbusConnection, unitName); err != nil {
return err
}
m.Paths = make(map[string]string)
Expand Down Expand Up @@ -371,7 +379,7 @@ func (m *LegacyManager) Set(container *configs.Config) error {
if err != nil {
return err
}
dbusConnection, err := getDbusConnection()
dbusConnection, err := getDbusConnection(false)
if err != nil {
return err
}
Expand Down
76 changes: 62 additions & 14 deletions libcontainer/cgroups/systemd/v2.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"math"
"os"
"path/filepath"
"strconv"
"strings"
"sync"

Expand All @@ -14,6 +15,7 @@ import (
"github.com/opencontainers/runc/libcontainer/cgroups"
"github.com/opencontainers/runc/libcontainer/cgroups/fs2"
"github.com/opencontainers/runc/libcontainer/configs"
"github.com/pkg/errors"
)

type unifiedManager struct {
Expand Down Expand Up @@ -89,14 +91,17 @@ func (m *unifiedManager) Apply(pid int) error {
var (
c = m.cgroups
unitName = getUnitName(c)
slice = "system.slice"
properties []systemdDbus.Property
)

if c.Paths != nil {
return cgroups.WriteCgroupProc(m.path, pid)
}

slice := "system.slice"
if m.rootless {
slice = "user.slice"
}
if c.Parent != "" {
slice = c.Parent
}
Expand Down Expand Up @@ -140,9 +145,13 @@ func (m *unifiedManager) Apply(pid int) error {
properties = append(properties, resourcesProperties...)
properties = append(properties, c.SystemdProps...)

if err := startUnit(unitName, properties); err != nil {
dbusConnection, err := getDbusConnection(m.rootless)
if err != nil {
return err
}
if err := startUnit(dbusConnection, unitName, properties); err != nil {
return errors.Wrapf(err, "error while starting unit %q with properties %+v", unitName, properties)
}

_, err = m.GetUnifiedPath()
if err != nil {
Expand All @@ -161,13 +170,17 @@ func (m *unifiedManager) Destroy() error {
m.mu.Lock()
defer m.mu.Unlock()

dbusConnection, err := getDbusConnection(m.rootless)
if err != nil {
return err
}
unitName := getUnitName(m.cgroups)
if err := stopUnit(unitName); err != nil {
if err := stopUnit(dbusConnection, unitName); err != nil {
return err
}

// XXX this is probably not needed, systemd should handle it
err := os.Remove(m.path)
err = os.Remove(m.path)
if err != nil && !os.IsNotExist(err) {
return err
}
Expand All @@ -190,31 +203,66 @@ func (m *unifiedManager) GetPaths() map[string]string {
return paths
}

// getSliceFull value is used in GetUnifiedPath.
// The value is incompatible with systemdDbus.PropSlice.
func (m *unifiedManager) getSliceFull() (string, error) {
c := m.cgroups
slice := "system.slice"
if m.rootless {
slice = "user.slice"
}
if c.Parent != "" {
var err error
slice, err = ExpandSlice(c.Parent)
if err != nil {
return "", err
}
}

if m.rootless {
dbusConnection, err := getDbusConnection(m.rootless)
if err != nil {
return "", err
}
// managerCGQuoted is typically "/user.slice/user-${uid}.slice/user@${uid}.service" including the quote symbols
managerCGQuoted, err := dbusConnection.GetManagerProperty("ControlGroup")
if err != nil {
return "", err
}
managerCG, err := strconv.Unquote(managerCGQuoted)
if err != nil {
return "", err
}
slice = filepath.Join(managerCG, slice)
}

// an example of the final slice in rootless: "/user.slice/user-1001.slice/[email protected]/user.slice"
// NOTE: systemdDbus.PropSlice requires the "/user.slice/user-1001.slice/[email protected]/" prefix NOT to be specified.
return slice, nil
}

func (m *unifiedManager) GetUnifiedPath() (string, error) {
m.mu.Lock()
defer m.mu.Unlock()
if m.path != "" {
return m.path, nil
}

c := m.cgroups
slice := "system.slice"
if c.Parent != "" {
slice = c.Parent
}

slice, err := ExpandSlice(slice)
sliceFull, err := m.getSliceFull()
if err != nil {
return "", err
}

path := filepath.Join(slice, getUnitName(c))
c := m.cgroups
path := filepath.Join(sliceFull, getUnitName(c))
path, err = securejoin.SecureJoin(fs2.UnifiedMountpoint, path)
if err != nil {
return "", err
}
m.path = path

// an example of the final path in rootless:
// "/sys/fs/cgroup/user.slice/user-1001.slice/[email protected]/user.slice/libpod-132ff0d72245e6f13a3bbc6cdc5376886897b60ac59eaa8dea1df7ab959cbf1c.scope"
return m.path, nil
}

Expand Down Expand Up @@ -263,12 +311,12 @@ func (m *unifiedManager) Set(container *configs.Config) error {
if err != nil {
return err
}
dbusConnection, err := getDbusConnection()
dbusConnection, err := getDbusConnection(m.rootless)
if err != nil {
return err
}
if err := dbusConnection.SetUnitProperties(getUnitName(m.cgroups), true, properties...); err != nil {
return err
return errors.Wrap(err, "error while setting unit properties")
}

fsMgr, err := m.fsManager()
Expand Down
Loading

0 comments on commit bf15cc9

Please sign in to comment.