diff --git a/cmd/podman/common/create.go b/cmd/podman/common/create.go index bbe31d19f3..f6fbe8e105 100644 --- a/cmd/podman/common/create.go +++ b/cmd/podman/common/create.go @@ -6,6 +6,7 @@ import ( "github.com/containers/common/pkg/auth" "github.com/containers/libpod/v2/cmd/podman/registry" + "github.com/containers/libpod/v2/libpod/define" "github.com/spf13/pflag" ) @@ -394,6 +395,11 @@ func GetCreateFlags(cf *ContainerCLIOpts) *pflag.FlagSet { "rootfs", false, "The first argument is not an image but the rootfs to the exploded container", ) + createFlags.StringVar( + &cf.SdNotifyMode, + "sdnotify", define.SdNotifyModeContainer, + `control sd-notify behavior ("container"|"conmon"|"ignore")`, + ) createFlags.StringArrayVar( &cf.SecurityOpt, "security-opt", containerConfig.SecurityOptions(), diff --git a/cmd/podman/common/create_opts.go b/cmd/podman/common/create_opts.go index 3183a5cceb..eafe7f0902 100644 --- a/cmd/podman/common/create_opts.go +++ b/cmd/podman/common/create_opts.go @@ -81,6 +81,7 @@ type ContainerCLIOpts struct { Rm bool RootFS bool SecurityOpt []string + SdNotifyMode string ShmSize string StopSignal string StopTimeout uint diff --git a/cmd/podman/common/specgen.go b/cmd/podman/common/specgen.go index 225370368e..0948e78f13 100644 --- a/cmd/podman/common/specgen.go +++ b/cmd/podman/common/specgen.go @@ -443,6 +443,7 @@ func FillOutSpecGen(s *specgen.SpecGenerator, c *ContainerCLIOpts, args []string } s.Systemd = c.Systemd + s.SdNotifyMode = c.SdNotifyMode if s.ResourceLimits == nil { s.ResourceLimits = &specs.LinuxResources{} } diff --git a/docs/source/markdown/podman-create.1.md b/docs/source/markdown/podman-create.1.md index 1fc99cd873..e0703fd228 100644 --- a/docs/source/markdown/podman-create.1.md +++ b/docs/source/markdown/podman-create.1.md @@ -702,6 +702,17 @@ If specified, the first argument refers to an exploded container on the file sys This is useful to run a container without requiring any image management, the rootfs of the container is assumed to be managed externally. +**--sdnotify**=**container**|**conmon**|**ignore** + +Determines how to use the NOTIFY_SOCKET, as passed with systemd and Type=notify. + +Default is **container**, which means allow the OCI runtime to proxy the socket into the +container to receive ready notification. Podman will set the MAINPID to conmon's pid. +The **conmon** option sets MAINPID to conmon's pid, and sends READY when the container +has started. The socket is never passed to the runtime or the container. +The **ignore** option removes NOTIFY_SOCKET from the environment for itself and child processes, +for the case where some other process above Podman uses NOTIFY_SOCKET and Podman should not use it. + **--seccomp-policy**=*policy* Specify the policy to select the seccomp profile. If set to *image*, Podman will look for a "io.podman.seccomp.profile" label in the container-image config and use its value as a seccomp profile. Otherwise, Podman will follow the *default* policy by applying the default profile unless specified otherwise via *--security-opt seccomp* as described below. diff --git a/docs/source/markdown/podman-run.1.md b/docs/source/markdown/podman-run.1.md index 86179e63c4..54c497ba2d 100644 --- a/docs/source/markdown/podman-run.1.md +++ b/docs/source/markdown/podman-run.1.md @@ -723,6 +723,17 @@ of the container is assumed to be managed externally. Note: On **SELinux** systems, the rootfs needs the correct label, which is by default **unconfined_u:object_r:container_file_t**. +**--sdnotify**=**container**|**conmon**|**ignore** + +Determines how to use the NOTIFY_SOCKET, as passed with systemd and Type=notify. + +Default is **container**, which means allow the OCI runtime to proxy the socket into the +container to receive ready notification. Podman will set the MAINPID to conmon's pid. +The **conmon** option sets MAINPID to conmon's pid, and sends READY when the container +has started. The socket is never passed to the runtime or the container. +The **ignore** option removes NOTIFY_SOCKET from the environment for itself and child processes, +for the case where some other process above Podman uses NOTIFY_SOCKET and Podman should not use it. + **--seccomp-policy**=*policy* Specify the policy to select the seccomp profile. If set to *image*, Podman will look for a "io.podman.seccomp.profile" label in the container-image config and use its value as a seccomp profile. Otherwise, Podman will follow the *default* policy by applying the default profile unless specified otherwise via *--security-opt seccomp* as described below. diff --git a/libpod/container.go b/libpod/container.go index a71692dd8b..fa90fef7a9 100644 --- a/libpod/container.go +++ b/libpod/container.go @@ -414,6 +414,8 @@ type ContainerConfig struct { // sharing kernel namespaces in a pod IsInfra bool `json:"pause"` + // SdNotifyMode tells libpod what to do with a NOTIFY_SOCKET if passed + SdNotifyMode string `json:"sdnotifyMode,omitempty"` // Systemd tells libpod to setup the container in systemd mode Systemd bool `json:"systemd"` diff --git a/libpod/container_internal.go b/libpod/container_internal.go index 7a547e5659..12132cf414 100644 --- a/libpod/container_internal.go +++ b/libpod/container_internal.go @@ -24,6 +24,7 @@ import ( "github.com/containers/storage/pkg/archive" "github.com/containers/storage/pkg/idtools" "github.com/containers/storage/pkg/mount" + "github.com/coreos/go-systemd/v22/daemon" securejoin "github.com/cyphar/filepath-securejoin" spec "github.com/opencontainers/runtime-spec/specs-go" "github.com/opencontainers/runtime-tools/generate" @@ -1192,6 +1193,19 @@ func (c *Container) start() error { c.state.State = define.ContainerStateRunning + if c.config.SdNotifyMode != define.SdNotifyModeIgnore { + payload := fmt.Sprintf("MAINPID=%d", c.state.ConmonPID) + if c.config.SdNotifyMode == define.SdNotifyModeConmon { + payload += "\n" + payload += daemon.SdNotifyReady + } + if sent, err := daemon.SdNotify(false, payload); err != nil { + logrus.Errorf("Error notifying systemd of Conmon PID: %s", err.Error()) + } else if sent { + logrus.Debugf("Notify sent successfully") + } + } + if c.config.HealthCheckConfig != nil { if err := c.updateHealthStatus(define.HealthCheckStarting); err != nil { logrus.Error(err) diff --git a/libpod/define/config.go b/libpod/define/config.go index 900a363d8f..c436015545 100644 --- a/libpod/define/config.go +++ b/libpod/define/config.go @@ -75,3 +75,10 @@ const JSONLogging = "json-file" // NoLogging is the string conmon expects when specifying to use no log driver whatsoever const NoLogging = "none" + +// Strings used for --sdnotify option to podman +const ( + SdNotifyModeContainer = "container" + SdNotifyModeConmon = "conmon" + SdNotifyModeIgnore = "ignore" +) diff --git a/libpod/oci_conmon_exec_linux.go b/libpod/oci_conmon_exec_linux.go index 1a6fe827b9..4196bdcaf5 100644 --- a/libpod/oci_conmon_exec_linux.go +++ b/libpod/oci_conmon_exec_linux.go @@ -444,7 +444,7 @@ func (r *ConmonOCIRuntime) startExec(c *Container, sessionID string, options *Ex // } // } - conmonEnv, extraFiles, err := r.configureConmonEnv(runtimeDir) + conmonEnv, extraFiles, err := r.configureConmonEnv(c, runtimeDir) if err != nil { return nil, nil, err } diff --git a/libpod/oci_conmon_linux.go b/libpod/oci_conmon_linux.go index 98e436e785..5346223829 100644 --- a/libpod/oci_conmon_linux.go +++ b/libpod/oci_conmon_linux.go @@ -30,6 +30,7 @@ import ( "github.com/containers/libpod/v2/utils" pmount "github.com/containers/storage/pkg/mount" "github.com/coreos/go-systemd/v22/activation" + "github.com/coreos/go-systemd/v22/daemon" spec "github.com/opencontainers/runtime-spec/specs-go" "github.com/opencontainers/selinux/go-selinux" "github.com/opencontainers/selinux/go-selinux/label" @@ -365,8 +366,10 @@ func (r *ConmonOCIRuntime) StartContainer(ctr *Container) error { return err } env := []string{fmt.Sprintf("XDG_RUNTIME_DIR=%s", runtimeDir)} - if notify, ok := os.LookupEnv("NOTIFY_SOCKET"); ok { - env = append(env, fmt.Sprintf("NOTIFY_SOCKET=%s", notify)) + if ctr.config.SdNotifyMode == define.SdNotifyModeContainer { + if notify, ok := os.LookupEnv("NOTIFY_SOCKET"); ok { + env = append(env, fmt.Sprintf("NOTIFY_SOCKET=%s", notify)) + } } if path, ok := os.LookupEnv("PATH"); ok { env = append(env, fmt.Sprintf("PATH=%s", path)) @@ -887,6 +890,12 @@ func (r *ConmonOCIRuntime) createOCIContainer(ctr *Container, restoreOptions *Co } } + if ctr.config.SdNotifyMode == define.SdNotifyModeIgnore { + if err := os.Unsetenv("NOTIFY_SOCKET"); err != nil { + logrus.Warnf("Error unsetting NOTIFY_SOCKET %s", err.Error()) + } + } + args := r.sharedConmonArgs(ctr, ctr.ID(), ctr.bundlePath(), filepath.Join(ctr.state.RunDir, "pidfile"), ctr.LogPath(), r.exitsDir, ociLog, ctr.LogDriver(), logTag) if ctr.config.Spec.Process.Terminal { @@ -940,7 +949,7 @@ func (r *ConmonOCIRuntime) createOCIContainer(ctr *Container, restoreOptions *Co } // 0, 1 and 2 are stdin, stdout and stderr - conmonEnv, envFiles, err := r.configureConmonEnv(runtimeDir) + conmonEnv, envFiles, err := r.configureConmonEnv(ctr, runtimeDir) if err != nil { return err } @@ -1034,6 +1043,13 @@ func (r *ConmonOCIRuntime) createOCIContainer(ctr *Container, restoreOptions *Co // conmon not having a pid file is a valid state, so don't set it if we don't have it logrus.Infof("Got Conmon PID as %d", conmonPID) ctr.state.ConmonPID = conmonPID + if ctr.config.SdNotifyMode != define.SdNotifyModeIgnore { + if sent, err := daemon.SdNotify(false, fmt.Sprintf("MAINPID=%d", conmonPID)); err != nil { + logrus.Errorf("Error notifying systemd of Conmon PID: %s", err.Error()) + } else if sent { + logrus.Debugf("Notify MAINPID sent successfully") + } + } } if ctr.config.PreserveFDs > 0 { @@ -1137,7 +1153,7 @@ func prepareProcessExec(c *Container, cmd, env []string, tty bool, cwd, user, se // configureConmonEnv gets the environment values to add to conmon's exec struct // TODO this may want to be less hardcoded/more configurable in the future -func (r *ConmonOCIRuntime) configureConmonEnv(runtimeDir string) ([]string, []*os.File, error) { +func (r *ConmonOCIRuntime) configureConmonEnv(ctr *Container, runtimeDir string) ([]string, []*os.File, error) { env := make([]string, 0, 6) env = append(env, fmt.Sprintf("XDG_RUNTIME_DIR=%s", runtimeDir)) env = append(env, fmt.Sprintf("_CONTAINERS_USERNS_CONFIGURED=%s", os.Getenv("_CONTAINERS_USERNS_CONFIGURED"))) @@ -1149,8 +1165,10 @@ func (r *ConmonOCIRuntime) configureConmonEnv(runtimeDir string) ([]string, []*o env = append(env, fmt.Sprintf("HOME=%s", home)) extraFiles := make([]*os.File, 0) - if notify, ok := os.LookupEnv("NOTIFY_SOCKET"); ok { - env = append(env, fmt.Sprintf("NOTIFY_SOCKET=%s", notify)) + if ctr.config.SdNotifyMode == define.SdNotifyModeContainer { + if notify, ok := os.LookupEnv("NOTIFY_SOCKET"); ok { + env = append(env, fmt.Sprintf("NOTIFY_SOCKET=%s", notify)) + } } if !r.sdNotify { if listenfds, ok := os.LookupEnv("LISTEN_FDS"); ok { diff --git a/libpod/options.go b/libpod/options.go index c1a8fdbe1f..61d1676f13 100644 --- a/libpod/options.go +++ b/libpod/options.go @@ -4,6 +4,7 @@ import ( "net" "os" "path/filepath" + "strings" "syscall" "github.com/containers/common/pkg/config" @@ -22,6 +23,10 @@ import ( ) // Runtime Creation Options +var ( + // SdNotifyModeValues describes the only values that SdNotifyMode can be + SdNotifyModeValues = []string{define.SdNotifyModeContainer, define.SdNotifyModeConmon, define.SdNotifyModeIgnore} +) // WithStorageConfig uses the given configuration to set up container storage. // If this is not specified, the system default configuration will be used @@ -550,6 +555,23 @@ func WithSystemd() CtrCreateOption { } } +// WithSdNotifyMode sets the sd-notify method +func WithSdNotifyMode(mode string) CtrCreateOption { + return func(ctr *Container) error { + if ctr.valid { + return define.ErrCtrFinalized + } + + // verify values + if len(mode) > 0 && !util.StringInSlice(strings.ToLower(mode), SdNotifyModeValues) { + return errors.Wrapf(define.ErrInvalidArg, "--sdnotify values must be one of %q", strings.Join(SdNotifyModeValues, ", ")) + } + + ctr.config.SdNotifyMode = mode + return nil + } +} + // WithShmSize sets the size of /dev/shm tmpfs mount. func WithShmSize(size int64) CtrCreateOption { return func(ctr *Container) error { diff --git a/pkg/specgen/container_validate.go b/pkg/specgen/container_validate.go index 8063bee386..bf03ff0e7e 100644 --- a/pkg/specgen/container_validate.go +++ b/pkg/specgen/container_validate.go @@ -3,6 +3,7 @@ package specgen import ( "strings" + "github.com/containers/libpod/v2/libpod/define" "github.com/containers/libpod/v2/pkg/rootless" "github.com/containers/libpod/v2/pkg/util" "github.com/pkg/errors" @@ -13,6 +14,8 @@ var ( ErrInvalidSpecConfig = errors.New("invalid configuration") // SystemDValues describes the only values that SystemD can be SystemDValues = []string{"true", "false", "always"} + // SdNotifyModeValues describes the only values that SdNotifyMode can be + SdNotifyModeValues = []string{define.SdNotifyModeContainer, define.SdNotifyModeConmon, define.SdNotifyModeIgnore} // ImageVolumeModeValues describes the only values that ImageVolumeMode can be ImageVolumeModeValues = []string{"ignore", "tmpfs", "anonymous"} ) @@ -40,6 +43,10 @@ func (s *SpecGenerator) Validate() error { if len(s.ContainerBasicConfig.Systemd) > 0 && !util.StringInSlice(strings.ToLower(s.ContainerBasicConfig.Systemd), SystemDValues) { return errors.Wrapf(ErrInvalidSpecConfig, "--systemd values must be one of %q", strings.Join(SystemDValues, ", ")) } + // sdnotify values must be container, conmon, or ignore + if len(s.ContainerBasicConfig.SdNotifyMode) > 0 && !util.StringInSlice(strings.ToLower(s.ContainerBasicConfig.SdNotifyMode), SdNotifyModeValues) { + return errors.Wrapf(ErrInvalidSpecConfig, "--sdnotify values must be one of %q", strings.Join(SdNotifyModeValues, ", ")) + } // // ContainerStorageConfig diff --git a/pkg/specgen/generate/container_create.go b/pkg/specgen/generate/container_create.go index 1ab576869a..8df5b996e0 100644 --- a/pkg/specgen/generate/container_create.go +++ b/pkg/specgen/generate/container_create.go @@ -175,6 +175,10 @@ func createContainerOptions(ctx context.Context, rt *libpod.Runtime, s *specgen. options = append(options, libpod.WithSystemd()) } + if len(s.SdNotifyMode) > 0 { + options = append(options, libpod.WithSdNotifyMode(s.SdNotifyMode)) + } + if len(s.Name) > 0 { logrus.Debugf("setting container name %s", s.Name) options = append(options, libpod.WithName(s.Name)) diff --git a/pkg/specgen/specgen.go b/pkg/specgen/specgen.go index fe735bc1f1..b4e10fa87f 100644 --- a/pkg/specgen/specgen.go +++ b/pkg/specgen/specgen.go @@ -107,6 +107,11 @@ type ContainerBasicConfig struct { // If not specified, "false" will be assumed. // Optional. Systemd string `json:"systemd,omitempty"` + // Determine how to handle the NOTIFY_SOCKET - do we participate or pass it through + // "container" - let the OCI runtime deal with it, advertise conmon's MAINPID + // "conmon-only" - advertise conmon's MAINPID, send READY when started, don't pass to OCI + // "ignore" - unset NOTIFY_SOCKET + SdNotifyMode string `json:"sdnotifyMode,omitempty"` // Namespace is the libpod namespace the container will be placed in. // Optional. Namespace string `json:"namespace,omitempty"` diff --git a/vendor/github.com/coreos/go-systemd/v22/daemon/sdnotify.go b/vendor/github.com/coreos/go-systemd/v22/daemon/sdnotify.go new file mode 100644 index 0000000000..ba4ae31f19 --- /dev/null +++ b/vendor/github.com/coreos/go-systemd/v22/daemon/sdnotify.go @@ -0,0 +1,84 @@ +// Copyright 2014 Docker, Inc. +// Copyright 2015-2018 CoreOS, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +// Package daemon provides a Go implementation of the sd_notify protocol. +// It can be used to inform systemd of service start-up completion, watchdog +// events, and other status changes. +// +// https://www.freedesktop.org/software/systemd/man/sd_notify.html#Description +package daemon + +import ( + "net" + "os" +) + +const ( + // SdNotifyReady tells the service manager that service startup is finished + // or the service finished loading its configuration. + SdNotifyReady = "READY=1" + + // SdNotifyStopping tells the service manager that the service is beginning + // its shutdown. + SdNotifyStopping = "STOPPING=1" + + // SdNotifyReloading tells the service manager that this service is + // reloading its configuration. Note that you must call SdNotifyReady when + // it completed reloading. + SdNotifyReloading = "RELOADING=1" + + // SdNotifyWatchdog tells the service manager to update the watchdog + // timestamp for the service. + SdNotifyWatchdog = "WATCHDOG=1" +) + +// SdNotify sends a message to the init daemon. It is common to ignore the error. +// If `unsetEnvironment` is true, the environment variable `NOTIFY_SOCKET` +// will be unconditionally unset. +// +// It returns one of the following: +// (false, nil) - notification not supported (i.e. NOTIFY_SOCKET is unset) +// (false, err) - notification supported, but failure happened (e.g. error connecting to NOTIFY_SOCKET or while sending data) +// (true, nil) - notification supported, data has been sent +func SdNotify(unsetEnvironment bool, state string) (bool, error) { + socketAddr := &net.UnixAddr{ + Name: os.Getenv("NOTIFY_SOCKET"), + Net: "unixgram", + } + + // NOTIFY_SOCKET not set + if socketAddr.Name == "" { + return false, nil + } + + if unsetEnvironment { + if err := os.Unsetenv("NOTIFY_SOCKET"); err != nil { + return false, err + } + } + + conn, err := net.DialUnix(socketAddr.Net, nil, socketAddr) + // Error connecting to NOTIFY_SOCKET + if err != nil { + return false, err + } + defer conn.Close() + + if _, err = conn.Write([]byte(state)); err != nil { + return false, err + } + return true, nil +} diff --git a/vendor/github.com/coreos/go-systemd/v22/daemon/watchdog.go b/vendor/github.com/coreos/go-systemd/v22/daemon/watchdog.go new file mode 100644 index 0000000000..7a0e0d3a51 --- /dev/null +++ b/vendor/github.com/coreos/go-systemd/v22/daemon/watchdog.go @@ -0,0 +1,73 @@ +// Copyright 2016 CoreOS, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package daemon + +import ( + "fmt" + "os" + "strconv" + "time" +) + +// SdWatchdogEnabled returns watchdog information for a service. +// Processes should call daemon.SdNotify(false, daemon.SdNotifyWatchdog) every +// time / 2. +// If `unsetEnvironment` is true, the environment variables `WATCHDOG_USEC` and +// `WATCHDOG_PID` will be unconditionally unset. +// +// It returns one of the following: +// (0, nil) - watchdog isn't enabled or we aren't the watched PID. +// (0, err) - an error happened (e.g. error converting time). +// (time, nil) - watchdog is enabled and we can send ping. +// time is delay before inactive service will be killed. +func SdWatchdogEnabled(unsetEnvironment bool) (time.Duration, error) { + wusec := os.Getenv("WATCHDOG_USEC") + wpid := os.Getenv("WATCHDOG_PID") + if unsetEnvironment { + wusecErr := os.Unsetenv("WATCHDOG_USEC") + wpidErr := os.Unsetenv("WATCHDOG_PID") + if wusecErr != nil { + return 0, wusecErr + } + if wpidErr != nil { + return 0, wpidErr + } + } + + if wusec == "" { + return 0, nil + } + s, err := strconv.Atoi(wusec) + if err != nil { + return 0, fmt.Errorf("error converting WATCHDOG_USEC: %s", err) + } + if s <= 0 { + return 0, fmt.Errorf("error WATCHDOG_USEC must be a positive number") + } + interval := time.Duration(s) * time.Microsecond + + if wpid == "" { + return interval, nil + } + p, err := strconv.Atoi(wpid) + if err != nil { + return 0, fmt.Errorf("error converting WATCHDOG_PID: %s", err) + } + if os.Getpid() != p { + return 0, nil + } + + return interval, nil +} diff --git a/vendor/modules.txt b/vendor/modules.txt index 12f44afea3..5183ef19b3 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -200,6 +200,7 @@ github.com/containers/storage/pkg/unshare github.com/coreos/go-iptables/iptables # github.com/coreos/go-systemd/v22 v22.1.0 github.com/coreos/go-systemd/v22/activation +github.com/coreos/go-systemd/v22/daemon github.com/coreos/go-systemd/v22/dbus github.com/coreos/go-systemd/v22/internal/dlopen github.com/coreos/go-systemd/v22/journal