Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support systemd in containers with podman-style --systemd flag #2785

Merged
merged 1 commit into from
Feb 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,7 @@ Minor:
- Better multi-platform support, e.g., `nerdctl pull --all-platforms IMAGE`
- Applying an (existing) AppArmor profile to rootless containers: `nerdctl run --security-opt apparmor=<PROFILE>`.
Use `sudo nerdctl apparmor load` to load the `nerdctl-default` profile.
- Systemd compatibility support: `nerdctl run --systemd=always`

Trivial:

Expand Down
4 changes: 4 additions & 0 deletions cmd/nerdctl/container_create.go
Original file line number Diff line number Diff line change
Expand Up @@ -255,6 +255,10 @@ func processContainerCreateOptions(cmd *cobra.Command) (opt types.ContainerCreat
if err != nil {
return
}
opt.Systemd, err = cmd.Flags().GetString("systemd")
if err != nil {
return
}
// #endregion

// #region for runtime flags
Expand Down
1 change: 1 addition & 0 deletions cmd/nerdctl/container_run.go
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,7 @@ func setCreateFlags(cmd *cobra.Command) {
cmd.Flags().StringSlice("cap-drop", []string{}, "Drop Linux capabilities")
cmd.RegisterFlagCompletionFunc("cap-drop", capShellComplete)
cmd.Flags().Bool("privileged", false, "Give extended privileges to this container")
cmd.Flags().String("systemd", "false", "Allow running systemd in this container (default: false)")
// #endregion

// #region runtime flags
Expand Down
104 changes: 104 additions & 0 deletions cmd/nerdctl/container_run_systemd_linux_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
/*
Copyright The containerd Authors.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package main

import (
"testing"

"github.com/containerd/nerdctl/v2/pkg/testutil"
)

func TestRunWithSystemdAlways(t *testing.T) {
testutil.DockerIncompatible(t)
t.Parallel()
base := testutil.NewBase(t)
containerName := testutil.Identifier(t)
defer base.Cmd("container", "rm", "-f", containerName).AssertOK()

base.Cmd("run", "--name", containerName, "--systemd=always", "--entrypoint=/bin/bash", testutil.UbuntuImage, "-c", "mount | grep cgroup").AssertOutContains("(rw,")

base.Cmd("inspect", "--format", "{{json .Config.Labels}}", containerName).AssertOutContains("SIGRTMIN+3")

}

func TestRunWithSystemdTrueEnabled(t *testing.T) {
testutil.DockerIncompatible(t)
t.Parallel()
base := testutil.NewBase(t)
containerName := testutil.Identifier(t)
defer base.Cmd("container", "rm", "-f", containerName).AssertOK()

base.Cmd("run", "-d", "--name", containerName, "--systemd=true", "--entrypoint=/sbin/init", testutil.SystemdImage).AssertOK()

base.Cmd("inspect", "--format", "{{json .Config.Labels}}", containerName).AssertOutContains("SIGRTMIN+3")

base.Cmd("exec", containerName, "systemctl", "list-jobs").AssertOutContains("jobs listed.")
}

func TestRunWithSystemdTrueDisabled(t *testing.T) {
testutil.DockerIncompatible(t)
t.Parallel()
base := testutil.NewBase(t)
containerName := testutil.Identifier(t)
defer base.Cmd("rm", "-f", containerName).AssertOK()

base.Cmd("run", "--name", containerName, "--systemd=true", "--entrypoint=/bin/bash", testutil.SystemdImage, "-c", "systemctl list-jobs || true").AssertCombinedOutContains("System has not been booted with systemd as init system")
}

func TestRunWithSystemdFalse(t *testing.T) {
testutil.DockerIncompatible(t)
t.Parallel()
base := testutil.NewBase(t)
containerName := testutil.Identifier(t)
defer base.Cmd("rm", "-f", containerName).AssertOK()

base.Cmd("run", "--name", containerName, "--systemd=false", "--entrypoint=/bin/bash", testutil.UbuntuImage, "-c", "mount | grep cgroup").AssertOutContains("(ro,")

base.Cmd("inspect", "--format", "{{json .Config.Labels}}", containerName).AssertOutContains("SIGTERM")
}

func TestRunWithNoSystemd(t *testing.T) {
testutil.DockerIncompatible(t)
t.Parallel()
base := testutil.NewBase(t)
containerName := testutil.Identifier(t)
defer base.Cmd("rm", "-f", containerName).AssertOK()

base.Cmd("run", "--name", containerName, "--entrypoint=/bin/bash", testutil.UbuntuImage, "-c", "mount | grep cgroup").AssertOutContains("(ro,")

base.Cmd("inspect", "--format", "{{json .Config.Labels}}", containerName).AssertOutContains("SIGTERM")
}

func TestRunWithSystemdPrivilegedError(t *testing.T) {
testutil.DockerIncompatible(t)
t.Parallel()
base := testutil.NewBase(t)

base.Cmd("run", "--privileged", "--rm", "--systemd=always", "--entrypoint=/sbin/init", testutil.SystemdImage).AssertCombinedOutContains("if --privileged is used with systemd `--security-opt privileged-without-host-devices` must also be used")
}

func TestRunWithSystemdPrivilegedSuccess(t *testing.T) {
testutil.DockerIncompatible(t)
t.Parallel()
base := testutil.NewBase(t)
containerName := testutil.Identifier(t)
defer base.Cmd("container", "rm", "-f", containerName).AssertOK()

base.Cmd("run", "-d", "--name", containerName, "--privileged", "--security-opt", "privileged-without-host-devices", "--systemd=true", "--entrypoint=/sbin/init", testutil.SystemdImage).AssertOK()

base.Cmd("inspect", "--format", "{{json .Config.Labels}}", containerName).AssertOutContains("SIGRTMIN+3")
}
9 changes: 9 additions & 0 deletions docs/command-reference.md
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,15 @@ Security flags:
- :whale: `--cap-add=<CAP>`: Add Linux capabilities
- :whale: `--cap-drop=<CAP>`: Drop Linux capabilities
- :whale: `--privileged`: Give extended privileges to this container
- :nerd_face: `--systemd=(true|false|always)`: Enable systemd compatibility (default: false).
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How does always differ from true?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added some more to docs about the options and added a note to nerdctl specific features in README:
https://github.com/containerd/nerdctl/pull/2785/files#diff-b335630551682c19a781afebcf4d07bf978fb1f8ac04c6bf87428ed5106870f5R206

- Default: "false"
- true: Enable systemd compatibility is enabled if the entrypoint executable matches one of the following paths:
- `/sbin/init`
- `/usr/sbin/init`
- `/usr/local/sbin/init`
- always: Always enable systemd compatibility

Corresponds to Podman CLI.

Runtime flags:

Expand Down
2 changes: 2 additions & 0 deletions pkg/api/types/container_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,8 @@ type ContainerCreateOptions struct {
CapDrop []string
// Privileged gives extended privileges to this container
Privileged bool
// Systemd
Systemd string
// #endregion

// #region for runtime flags
Expand Down
50 changes: 49 additions & 1 deletion pkg/cmd/container/create.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ import (
"github.com/containerd/nerdctl/v2/pkg/inspecttypes/dockercompat"
"github.com/containerd/nerdctl/v2/pkg/labels"
"github.com/containerd/nerdctl/v2/pkg/logging"
"github.com/containerd/nerdctl/v2/pkg/maputil"
"github.com/containerd/nerdctl/v2/pkg/mountutil"
"github.com/containerd/nerdctl/v2/pkg/namestore"
"github.com/containerd/nerdctl/v2/pkg/platformutil"
Expand Down Expand Up @@ -173,7 +174,6 @@ func Create(ctx context.Context, client *containerd.Client, args []string, netMa
return nil, nil, err
}
cOpts = append(cOpts, restartOpts...)
cOpts = append(cOpts, withStop(options.StopSignal, options.StopTimeout, ensuredImage))
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.


if err = netManager.VerifyNetworkOptions(ctx); err != nil {
return nil, nil, fmt.Errorf("failed to verify networking settings: %s", err)
Expand Down Expand Up @@ -340,6 +340,15 @@ func generateRootfsOpts(args []string, id string, ensured *imgutil.EnsuredImage,
opts = append(opts, oci.WithRootFSPath(absRootfs), oci.WithDefaultPathEnv)
}

entrypointPath := ""
if ensured != nil {
if len(ensured.ImageConfig.Entrypoint) > 0 {
entrypointPath = ensured.ImageConfig.Entrypoint[0]
} else if len(ensured.ImageConfig.Cmd) > 0 {
entrypointPath = ensured.ImageConfig.Cmd[0]
}
}

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There might be an easier way to determine the entrypoint executable path, if there is I am open to updating

if !options.Rootfs && !options.EntrypointChanged {
opts = append(opts, oci.WithImageConfigArgs(ensured.Image, args[1:]))
} else {
Expand All @@ -357,8 +366,47 @@ func generateRootfsOpts(args []string, id string, ensured *imgutil.EnsuredImage,
// error message is from Podman
return nil, nil, errors.New("no command or entrypoint provided, and no CMD or ENTRYPOINT from image")
}

entrypointPath = processArgs[0]

opts = append(opts, oci.WithProcessArgs(processArgs...))
}

isEntryPointSystemd := (entrypointPath == "/sbin/init" ||
entrypointPath == "/usr/sbin/init" ||
entrypointPath == "/usr/local/sbin/init")

stopSignal := options.StopSignal

if options.Systemd == "always" || (options.Systemd == "true" && isEntryPointSystemd) {
if options.Privileged {
securityOptsMap := strutil.ConvertKVStringsToMap(strutil.DedupeStrSlice(options.SecurityOpt))
privilegedWithoutHostDevices, err := maputil.MapBoolValueAsOpt(securityOptsMap, "privileged-without-host-devices")
if err != nil {
return nil, nil, err
}

// See: https://github.com/containers/podman/issues/15878
if !privilegedWithoutHostDevices {
return nil, nil, errors.New("if --privileged is used with systemd `--security-opt privileged-without-host-devices` must also be used")
}
}

opts = append(opts,
oci.WithoutMounts("/sys/fs/cgroup"),
Copy link

@jfernandez jfernandez Nov 10, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@sazzy4o I found your change while looking into supporting containers with systemd using k8s + containerd. I did the tmpfs mounts for /run, /tmp/, etc., but I was mounting the host's /sys/fs/cgroup as ready-only, which didn't work.

Here, you are removing the mount, which caught my attention. Is this so that systemd creates /sys/fs/cgroup when it initializes?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@jfernandez Yes, this allow systemd to run inside the container and create /sys/fs/cgroup

This was based on the podman --systemd flag

oci.WithMounts([]specs.Mount{
{Type: "cgroup", Source: "cgroup", Destination: "/sys/fs/cgroup", Options: []string{"rw"}},
{Type: "tmpfs", Source: "tmpfs", Destination: "/run"},
{Type: "tmpfs", Source: "tmpfs", Destination: "/run/lock"},
{Type: "tmpfs", Source: "tmpfs", Destination: "/tmp"},
{Type: "tmpfs", Source: "tmpfs", Destination: "/var/lib/journal"},
}),
)
stopSignal = "SIGRTMIN+3"
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

SIGTERM causes restart in systemd (This functionality is the same as podman)

See:
https://www.freedesktop.org/software/systemd/man/latest/systemd.html#Signals

}

cOpts = append(cOpts, withStop(stopSignal, options.StopTimeout, ensured))

if options.InitBinary != nil {
options.InitProcessFlag = true
}
Expand Down
1 change: 1 addition & 0 deletions pkg/testutil/testutil_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ var (
DockerAuthImage = mirrorOf("cesanta/docker_auth:1.7")
FluentdImage = mirrorOf("fluent/fluentd:v1.14-1")
KuboImage = mirrorOf("ipfs/kubo:v0.16.0")
SystemdImage = "ghcr.io/containerd/stargz-snapshotter:0.15.1-kind"
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Those this image since it has a working systemd and is controlled by containerd


// Source: https://gist.github.com/cpuguy83/fcf3041e5d8fb1bb5c340915aabeebe0
NonDistBlobImage = "ghcr.io/cpuguy83/non-dist-blob:latest"
Expand Down
Loading