Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

checkpoint/restore: implement --manage-cgroups-mode ignore #3546

Merged
merged 7 commits into from
Jan 27, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
104 changes: 57 additions & 47 deletions checkpoint.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ checkpointed.`,
cli.StringFlag{Name: "page-server", Value: "", Usage: "ADDRESS:PORT of the page server"},
cli.BoolFlag{Name: "file-locks", Usage: "handle file locks, for safety"},
cli.BoolFlag{Name: "pre-dump", Usage: "dump container's memory information only, leave the container running after this"},
cli.StringFlag{Name: "manage-cgroups-mode", Value: "", Usage: "cgroups mode: 'soft' (default), 'full' and 'strict'"},
cli.StringFlag{Name: "manage-cgroups-mode", Value: "", Usage: "cgroups mode: soft|full|strict|ignore (default: soft)"},
cli.StringSliceFlag{Name: "empty-ns", Usage: "create a namespace, but don't restore its properties"},
cli.BoolFlag{Name: "auto-dedup", Usage: "enable auto deduplication of memory images"},
},
Expand Down Expand Up @@ -67,17 +67,6 @@ checkpointed.`,
return err
}

// these are the mandatory criu options for a container
if err := setPageServer(context, options); err != nil {
return err
}
if err := setManageCgroupsMode(context, options); err != nil {
return err
}
if err := setEmptyNsMask(context, options); err != nil {
return err
}

err = container.Checkpoint(options)
if err == nil && !(options.LeaveRunning || options.PreDump) {
// Destroy the container unless we tell CRIU to keep it.
Expand Down Expand Up @@ -119,59 +108,80 @@ func prepareImagePaths(context *cli.Context) (string, string, error) {
return imagePath, parentPath, nil
}

func setPageServer(context *cli.Context, options *libcontainer.CriuOpts) error {
// xxx following criu opts are optional
// The dump image can be sent to a criu page server
func criuOptions(context *cli.Context) (*libcontainer.CriuOpts, error) {
imagePath, parentPath, err := prepareImagePaths(context)
if err != nil {
return nil, err
}

opts := &libcontainer.CriuOpts{
ImagesDirectory: imagePath,
WorkDirectory: context.String("work-path"),
ParentImage: parentPath,
LeaveRunning: context.Bool("leave-running"),
TcpEstablished: context.Bool("tcp-established"),
ExternalUnixConnections: context.Bool("ext-unix-sk"),
ShellJob: context.Bool("shell-job"),
FileLocks: context.Bool("file-locks"),
PreDump: context.Bool("pre-dump"),
AutoDedup: context.Bool("auto-dedup"),
LazyPages: context.Bool("lazy-pages"),
StatusFd: context.Int("status-fd"),
LsmProfile: context.String("lsm-profile"),
LsmMountContext: context.String("lsm-mount-context"),
}

// CRIU options below may or may not be set.

if psOpt := context.String("page-server"); psOpt != "" {
address, port, err := net.SplitHostPort(psOpt)

if err != nil || address == "" || port == "" {
return errors.New("Use --page-server ADDRESS:PORT to specify page server")
return nil, errors.New("Use --page-server ADDRESS:PORT to specify page server")
}
portInt, err := strconv.Atoi(port)
if err != nil {
return errors.New("Invalid port number")
return nil, errors.New("Invalid port number")
}
options.PageServer = libcontainer.CriuPageServerInfo{
opts.PageServer = libcontainer.CriuPageServerInfo{
Address: address,
Port: int32(portInt),
}
}
return nil
}

func setManageCgroupsMode(context *cli.Context, options *libcontainer.CriuOpts) error {
if cgOpt := context.String("manage-cgroups-mode"); cgOpt != "" {
switch cgOpt {
case "soft":
options.ManageCgroupsMode = criu.CriuCgMode_SOFT
case "full":
options.ManageCgroupsMode = criu.CriuCgMode_FULL
case "strict":
options.ManageCgroupsMode = criu.CriuCgMode_STRICT
default:
return errors.New("Invalid manage cgroups mode")
}
switch context.String("manage-cgroups-mode") {
case "":
// do nothing
case "soft":
opts.ManageCgroupsMode = criu.CriuCgMode_SOFT
case "full":
opts.ManageCgroupsMode = criu.CriuCgMode_FULL
case "strict":
opts.ManageCgroupsMode = criu.CriuCgMode_STRICT
case "ignore":
opts.ManageCgroupsMode = criu.CriuCgMode_IGNORE
default:
return nil, errors.New("Invalid manage-cgroups-mode value")
}
return nil
}

var namespaceMapping = map[specs.LinuxNamespaceType]int{
specs.NetworkNamespace: unix.CLONE_NEWNET,
}

func setEmptyNsMask(context *cli.Context, options *libcontainer.CriuOpts) error {
/* Runc doesn't manage network devices and their configuration */
// runc doesn't manage network devices and their configuration.
nsmask := unix.CLONE_NEWNET

for _, ns := range context.StringSlice("empty-ns") {
f, exists := namespaceMapping[specs.LinuxNamespaceType(ns)]
if !exists {
return fmt.Errorf("namespace %q is not supported", ns)
if context.IsSet("empty-ns") {
namespaceMapping := map[specs.LinuxNamespaceType]int{
specs.NetworkNamespace: unix.CLONE_NEWNET,
}

for _, ns := range context.StringSlice("empty-ns") {
f, exists := namespaceMapping[specs.LinuxNamespaceType(ns)]
if !exists {
return nil, fmt.Errorf("namespace %q is not supported", ns)
}
nsmask |= f
}
nsmask |= f
}

options.EmptyNs = uint32(nsmask)
return nil
opts.EmptyNs = uint32(nsmask)

return opts, nil
}
7 changes: 2 additions & 5 deletions libcontainer/container_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -1560,11 +1560,8 @@ func (c *Container) criuApplyCgroups(pid int, req *criurpc.CriuReq) error {
return err
}

if cgroups.IsCgroup2UnifiedMode() {
return nil
}
// the stuff below is cgroupv1-specific

// TODO(@kolyshkin): should we use c.cgroupManager.GetPaths()
// instead of reading /proc/pid/cgroup?
path := fmt.Sprintf("/proc/%d/cgroup", pid)
cgroupsPaths, err := cgroups.ParseCgroupFile(path)
if err != nil {
Expand Down
2 changes: 1 addition & 1 deletion man/runc-checkpoint.8.md
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ together with **criu lazy-pages**. See
: Do a pre-dump, i.e. dump container's memory information only, leaving the
container running. See [criu iterative migration](https://criu.org/Iterative_migration).

**--manage-cgroups-mode** **soft**|**full**|**strict**.
**--manage-cgroups-mode** **soft**|**full**|**strict**|**ignore**.
: Cgroups mode. Default is **soft**. See
[criu --manage-cgroups option](https://criu.org/CLI/opt/--manage-cgroups).

Expand Down
7 changes: 6 additions & 1 deletion man/runc-restore.8.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,15 @@ image files directory.
: Allow checkpoint/restore of file locks. See
[criu --file-locks option](https://criu.org/CLI/opt/--file-locks).

**--manage-cgroups-mode** **soft**|**full**|**strict**.
**--manage-cgroups-mode** **soft**|**full**|**strict**|**ignore**.
: Cgroups mode. Default is **soft**. See
[criu --manage-cgroups option](https://criu.org/CLI/opt/--manage-cgroups).

: In particular, to restore the container into a different cgroup,
**--manage-cgroups-mode ignore** must be used during both
**checkpoint** and **restore**, and the _container_id_ (or
**cgroupsPath** property in OCI config, if set) must be changed.

**--bundle**|**-b** _path_
: Path to the root of the bundle directory. Default is current directory.

Expand Down
30 changes: 1 addition & 29 deletions restore.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ package main
import (
"os"

"github.com/opencontainers/runc/libcontainer"
"github.com/opencontainers/runc/libcontainer/userns"
"github.com/sirupsen/logrus"
"github.com/urfave/cli"
Expand Down Expand Up @@ -53,7 +52,7 @@ using the runc checkpoint command.`,
cli.StringFlag{
Name: "manage-cgroups-mode",
Value: "",
Usage: "cgroups mode: 'soft' (default), 'full' and 'strict'",
Usage: "cgroups mode: soft|full|strict|ignore (default: soft)",
},
cli.StringFlag{
Name: "bundle, b",
Expand Down Expand Up @@ -113,9 +112,6 @@ using the runc checkpoint command.`,
if err != nil {
return err
}
if err := setEmptyNsMask(context, options); err != nil {
return err
}
status, err := startContainer(context, CT_ACT_RESTORE, options)
if err != nil {
return err
Expand All @@ -126,27 +122,3 @@ using the runc checkpoint command.`,
return nil
},
}

func criuOptions(context *cli.Context) (*libcontainer.CriuOpts, error) {
imagePath, parentPath, err := prepareImagePaths(context)
if err != nil {
return nil, err
}

return &libcontainer.CriuOpts{
ImagesDirectory: imagePath,
WorkDirectory: context.String("work-path"),
ParentImage: parentPath,
LeaveRunning: context.Bool("leave-running"),
TcpEstablished: context.Bool("tcp-established"),
ExternalUnixConnections: context.Bool("ext-unix-sk"),
ShellJob: context.Bool("shell-job"),
FileLocks: context.Bool("file-locks"),
PreDump: context.Bool("pre-dump"),
AutoDedup: context.Bool("auto-dedup"),
LazyPages: context.Bool("lazy-pages"),
StatusFd: context.Int("status-fd"),
LsmProfile: context.String("lsm-profile"),
LsmMountContext: context.String("lsm-mount-context"),
}, nil
}
66 changes: 59 additions & 7 deletions tests/integration/checkpoint.bats
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,14 @@ function simple_cr() {
# TCP port for lazy migration
port=27277

__runc checkpoint --lazy-pages --page-server 0.0.0.0:${port} --status-fd ${lazy_w} --work-path ./work-dir --image-path ./image-dir test_busybox &
__runc checkpoint \
--lazy-pages \
--page-server 0.0.0.0:${port} \
--status-fd ${lazy_w} \
--manage-cgroups-mode=ignore \
--work-path ./work-dir \
--image-path ./image-dir \
test_busybox &
cpt_pid=$!

# wait for lazy page server to be ready
Expand All @@ -246,14 +253,18 @@ function simple_cr() {
lp_pid=$!

# Restore lazily from checkpoint.
# The restored container needs a different name (as well as systemd
# unit name, in case systemd cgroup driver is used) as the checkpointed
# container is not yet destroyed. It is only destroyed at that point
# in time when the last page is lazily transferred to the destination.
#
# The restored container needs a different name and a different cgroup
# (and a different systemd unit name, in case systemd cgroup driver is
# used) as the checkpointed container is not yet destroyed. It is only
# destroyed at that point in time when the last page is lazily
# transferred to the destination.
#
# Killing the CRIU on the checkpoint side will let the container
# continue to run if the migration failed at some point.
[ -v RUNC_USE_SYSTEMD ] && set_cgroups_path
runc_restore_with_pipes ./image-dir test_busybox_restore --lazy-pages
runc_restore_with_pipes ./image-dir test_busybox_restore \
--lazy-pages \
--manage-cgroups-mode=ignore

wait $cpt_pid

Expand Down Expand Up @@ -405,3 +416,44 @@ function simple_cr() {
# busybox should be back up and running
testcontainer test_busybox running
}

@test "checkpoint then restore into a different cgroup (via --manage-cgroups-mode ignore)" {
set_resources_limit
set_cgroups_path
runc run -d --console-socket "$CONSOLE_SOCKET" test_busybox
[ "$status" -eq 0 ]
testcontainer test_busybox running

local orig_path
orig_path=$(get_cgroup_path "pids")
# Check that the cgroup exists.
test -d "$orig_path"

runc checkpoint --work-path ./work-dir --manage-cgroups-mode ignore test_busybox
grep -B 5 Error ./work-dir/dump.log || true
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think this is robust, but I don't have an alternative idea, so LGTM.

Eventually we need to have a robust error reporting system.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is just to print anything containing Error (with some context) in case the checkpointing failed. It's merely a way to debug criu failures, and does not affect the test itself (only its output in case of an error). The || true part here is so that the test won't fail in case there's no error; IOW, we ignore grep exit code.

Alternatively, we could create an after-test artefact containing all the files, but this works OK so far.

Thinking about it, we might work on making runc do something like what grep does here in case of an error. Currently, this is just criu writing the log file.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Filed #3711 to not forget about it

[ "$status" -eq 0 ]
testcontainer test_busybox checkpointed
# Check that the cgroup is gone.
! test -d "$orig_path"

# Restore into a different cgroup.
set_cgroups_path # Changes the path.
runc restore -d --manage-cgroups-mode ignore --pid-file pid \
--work-path ./work-dir --console-socket "$CONSOLE_SOCKET" test_busybox
grep -B 5 Error ./work-dir/restore.log || true
[ "$status" -eq 0 ]
testcontainer test_busybox running

# Check that the old cgroup path doesn't exist.
! test -d "$orig_path"

# Check that the new path exists.
local new_path
new_path=$(get_cgroup_path "pids")
test -d "$new_path"

# Check that container's init is in the new cgroup.
local pid
pid=$(cat "pid")
grep -q "${REL_CGROUPS_PATH}$" "/proc/$pid/cgroup"
}
30 changes: 19 additions & 11 deletions tests/integration/helpers.bash
Original file line number Diff line number Diff line change
Expand Up @@ -232,19 +232,27 @@ function set_cgroups_path() {
update_config '.linux.cgroupsPath |= "'"${OCI_CGROUPS_PATH}"'"'
}

# Get a value from a cgroup file.
function get_cgroup_value() {
local source=$1
local cgroup var current

# Get a path to cgroup directory, based on controller name.
# Parameters:
# $1: controller name (like "pids") or a file name (like "pids.max").
function get_cgroup_path() {
if [ -v CGROUP_V2 ]; then
cgroup=$CGROUP_PATH
else
var=${source%%.*} # controller name (e.g. memory)
var=CGROUP_${var^^}_BASE_PATH # variable name (e.g. CGROUP_MEMORY_BASE_PATH)
eval cgroup=\$"${var}${REL_CGROUPS_PATH}"
echo "$CGROUP_PATH"
return
fi
cat "$cgroup/$source"

local var cgroup
var=${1%%.*} # controller name (e.g. memory)
var=CGROUP_${var^^}_BASE_PATH # variable name (e.g. CGROUP_MEMORY_BASE_PATH)
eval cgroup=\$"${var}${REL_CGROUPS_PATH}"
echo "$cgroup"
}

# Get a value from a cgroup file.
function get_cgroup_value() {
local cgroup
cgroup="$(get_cgroup_path "$1")"
cat "$cgroup/$1"
}

# Helper to check a if value in a cgroup file matches the expected one.
Expand Down