Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

k8systemd: run k8s workloads in systemd #14256

Merged
merged 1 commit into from
May 18, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
/build/
/conmon/
contrib/spec/podman.spec
contrib/systemd/*/*.service
*.coverprofile
coverprofile
/.coverage
Expand Down
5 changes: 4 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -832,7 +832,8 @@ install.docker-full: install.docker install.docker-docs
ifneq (,$(findstring systemd,$(BUILDTAGS)))
PODMAN_UNIT_FILES = contrib/systemd/auto-update/podman-auto-update.service \
contrib/systemd/system/podman.service \
contrib/systemd/system/podman-restart.service
contrib/systemd/system/podman-restart.service \
contrib/systemd/system/[email protected]

%.service: %.service.in
sed -e 's;@@PODMAN@@;$(BINDIR)/podman;g' $< >[email protected].$$ \
Expand All @@ -846,12 +847,14 @@ install.systemd: $(PODMAN_UNIT_FILES)
install ${SELINUXOPT} -m 644 contrib/systemd/system/podman.socket ${DESTDIR}${USERSYSTEMDDIR}/podman.socket
install ${SELINUXOPT} -m 644 contrib/systemd/system/podman.service ${DESTDIR}${USERSYSTEMDDIR}/podman.service
install ${SELINUXOPT} -m 644 contrib/systemd/system/podman-restart.service ${DESTDIR}${USERSYSTEMDDIR}/podman-restart.service
install ${SELINUXOPT} -m 644 contrib/systemd/system/[email protected] ${DESTDIR}${USERSYSTEMDDIR}/[email protected]
# System services
install ${SELINUXOPT} -m 644 contrib/systemd/auto-update/podman-auto-update.service ${DESTDIR}${SYSTEMDDIR}/podman-auto-update.service
install ${SELINUXOPT} -m 644 contrib/systemd/auto-update/podman-auto-update.timer ${DESTDIR}${SYSTEMDDIR}/podman-auto-update.timer
install ${SELINUXOPT} -m 644 contrib/systemd/system/podman.socket ${DESTDIR}${SYSTEMDDIR}/podman.socket
install ${SELINUXOPT} -m 644 contrib/systemd/system/podman.service ${DESTDIR}${SYSTEMDDIR}/podman.service
install ${SELINUXOPT} -m 644 contrib/systemd/system/podman-restart.service ${DESTDIR}${SYSTEMDDIR}/podman-restart.service
install ${SELINUXOPT} -m 644 contrib/systemd/system/[email protected] ${DESTDIR}${SYSTEMDDIR}/[email protected]
rm -f $(PODMAN_UNIT_FILES)
else
install.systemd:
Expand Down
18 changes: 18 additions & 0 deletions contrib/systemd/system/[email protected]
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
[Unit]
Description=A template for running K8s workloads via podman-play-kube
Documentation=man:podman-play-kube(1)
Wants=network-online.target
After=network-online.target
RequiresMountsFor=%t/containers

[Service]
Environment=PODMAN_SYSTEMD_UNIT=%n
Restart=never
TimeoutStopSec=70
ExecStart=@@PODMAN@@ play kube --replace --service-container=true %I
ExecStop=@@PODMAN@@ play kube --down %I
Type=notify
NotifyAccess=all

[Install]
WantedBy=default.target
2 changes: 1 addition & 1 deletion libpod/container_inspect.go
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@ func (c *Container) getContainerInspectData(size bool, driverData *define.Driver
Mounts: inspectMounts,
Dependencies: c.Dependencies(),
IsInfra: c.IsInfra(),
IsService: c.isService(),
IsService: c.IsService(),
}

if c.state.ConfigPath != "" {
Expand Down
2 changes: 1 addition & 1 deletion libpod/container_validate.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ func (c *Container) validate() error {

// A container cannot be marked as an infra and service container at
// the same time.
if c.IsInfra() && c.isService() {
if c.IsInfra() && c.IsService() {
return fmt.Errorf("cannot be infra and service container at the same time: %w", define.ErrInvalidArg)
}

Expand Down
20 changes: 20 additions & 0 deletions libpod/runtime_ctr.go
Original file line number Diff line number Diff line change
Expand Up @@ -644,6 +644,16 @@ func (r *Runtime) removeContainer(ctx context.Context, c *Container, force, remo
return err
}

if c.IsService() {
canStop, err := c.canStopServiceContainer()
if err != nil {
return err
}
if !canStop {
return fmt.Errorf("container %s is the service container of pod(s) %s and cannot be removed without removing the pod(s)", c.ID(), strings.Join(c.state.Service.Pods, ","))
}
}

// If we're not force-removing, we need to check if we're in a good
// state to remove.
if !force {
Expand Down Expand Up @@ -903,6 +913,16 @@ func (r *Runtime) evictContainer(ctx context.Context, idOrName string, removeVol
}
}

if c.IsService() {
canStop, err := c.canStopServiceContainer()
if err != nil {
return id, err
}
if !canStop {
return id, fmt.Errorf("container %s is the service container of pod(s) %s and cannot be removed without removing the pod(s)", c.ID(), strings.Join(c.state.Service.Pods, ","))
}
}

var cleanupErr error
// Remove the container from the state
if c.config.Pod != "" {
Expand Down
15 changes: 11 additions & 4 deletions libpod/service.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,11 +54,12 @@ func (c *Container) addServicePodLocked(id string) error {
return c.save()
}

func (c *Container) isService() bool {
// IsService returns true when the container is a "service container".
func (c *Container) IsService() bool {
return c.config.IsService
}

// canStopServiceContainer returns true if all pods of the service are stopped.
// canStopServiceContainerLocked returns true if all pods of the service are stopped.
// Note that the method acquires the container lock.
func (c *Container) canStopServiceContainerLocked() (bool, error) {
c.lock.Lock()
Expand All @@ -67,10 +68,16 @@ func (c *Container) canStopServiceContainerLocked() (bool, error) {
return false, err
}

if !c.isService() {
if !c.IsService() {
return false, fmt.Errorf("internal error: checking service: container %s is not a service container", c.ID())
}

return c.canStopServiceContainer()
}

// canStopServiceContainer returns true if all pods of the service are stopped.
// Note that the method expects the container to be locked.
func (c *Container) canStopServiceContainer() (bool, error) {
for _, id := range c.state.Service.Pods {
pod, err := c.runtime.LookupPod(id)
if err != nil {
Expand Down Expand Up @@ -163,7 +170,7 @@ func (c *Container) canRemoveServiceContainerLocked() (bool, error) {
return false, err
}

if !c.isService() {
if !c.IsService() {
return false, fmt.Errorf("internal error: checking service: container %s is not a service container", c.ID())
}

Expand Down
8 changes: 7 additions & 1 deletion pkg/domain/infra/abi/containers.go
Original file line number Diff line number Diff line change
Expand Up @@ -292,7 +292,13 @@ func (ic *ContainerEngine) removeContainer(ctx context.Context, ctr *libpod.Cont
logrus.Debugf("Failed to remove container %s: %s", ctr.ID(), err.Error())
switch errors.Cause(err) {
case define.ErrNoSuchCtr:
if options.Ignore {
// Ignore if the container does not exist (anymore) when either
// it has been requested by the user of if the container is a
// service one. Service containers are removed along with its
// pods which in turn are removed along with their infra
// container. Hence, there is an inherent race when removing
// infra containers with service containers in parallel.
if options.Ignore || ctr.IsService() {
logrus.Debugf("Ignoring error (--allow-missing): %v", err)
return nil
}
Expand Down
34 changes: 27 additions & 7 deletions pkg/domain/infra/abi/play.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,15 @@ import (
// createServiceContainer creates a container that can later on
// be associated with the pods of a K8s yaml. It will be started along with
// the first pod.
func (ic *ContainerEngine) createServiceContainer(ctx context.Context, name string) (*libpod.Container, error) {
func (ic *ContainerEngine) createServiceContainer(ctx context.Context, name string, options entities.PlayKubeOptions) (*libpod.Container, error) {
// Make sure to replace the service container as well if requested by
// the user.
if options.Replace {
if _, err := ic.ContainerRm(ctx, []string{name}, entities.RmOptions{Force: true, Ignore: true}); err != nil {
return nil, fmt.Errorf("replacing service container: %w", err)
}
}

// Similar to infra containers, a service container is using the pause image.
image, err := generate.PullOrBuildInfraImage(ic.Libpod, "")
if err != nil {
Expand Down Expand Up @@ -65,6 +73,7 @@ func (ic *ContainerEngine) createServiceContainer(ctx context.Context, name stri
return nil, fmt.Errorf("creating runtime spec for service container: %w", err)
}
opts = append(opts, libpod.WithIsService())
opts = append(opts, libpod.WithSdNotifyMode(define.SdNotifyModeConmon))

// Create a new libpod container based on the spec.
ctr, err := ic.Libpod.NewContainer(ctx, runtimeSpec, spec, false, opts...)
Expand All @@ -75,6 +84,17 @@ func (ic *ContainerEngine) createServiceContainer(ctx context.Context, name stri
return ctr, nil
}

// Creates the name for a service container based on the provided content of a
// K8s yaml file.
func serviceContainerName(content []byte) string {
// The name of the service container is the first 12
// characters of the yaml file's hash followed by the
// '-service' suffix to guarantee a predictable and
// discoverable name.
hash := digest.FromBytes(content).Encoded()
return hash[0:12] + "-service"
}

func (ic *ContainerEngine) PlayKube(ctx context.Context, body io.Reader, options entities.PlayKubeOptions) (_ *entities.PlayKubeReport, finalErr error) {
report := &entities.PlayKubeReport{}
validKinds := 0
Expand Down Expand Up @@ -112,12 +132,7 @@ func (ic *ContainerEngine) PlayKube(ctx context.Context, body io.Reader, options
// TODO: create constants for the various "kinds" of yaml files.
var serviceContainer *libpod.Container
if options.ServiceContainer && (kind == "Pod" || kind == "Deployment") {
// The name of the service container is the first 12
// characters of the yaml file's hash followed by the
// '-service' suffix to guarantee a predictable and
// discoverable name.
hash := digest.FromBytes(content).Encoded()
ctr, err := ic.createServiceContainer(ctx, hash[0:12]+"-service")
ctr, err := ic.createServiceContainer(ctx, serviceContainerName(content), options)
if err != nil {
return nil, err
}
Expand Down Expand Up @@ -433,6 +448,7 @@ func (ic *ContainerEngine) playKubePod(ctx context.Context, podName string, podY
podSpec.PodSpecGen.NoInfra = false
podSpec.PodSpecGen.InfraContainerSpec = specgen.NewSpecGenerator(infraImage, false)
podSpec.PodSpecGen.InfraContainerSpec.NetworkOptions = p.NetworkOptions
podSpec.PodSpecGen.InfraContainerSpec.SdNotifyMode = define.SdNotifyModeIgnore

err = specgenutil.FillOutSpecGen(podSpec.PodSpecGen.InfraContainerSpec, &infraOptions, []string{})
if err != nil {
Expand Down Expand Up @@ -516,10 +532,12 @@ func (ic *ContainerEngine) playKubePod(ctx context.Context, podName string, podY
if err != nil {
return nil, err
}
specGen.SdNotifyMode = define.SdNotifyModeIgnore
rtSpec, spec, opts, err := generate.MakeContainer(ctx, ic.Libpod, specGen, false, nil)
if err != nil {
return nil, err
}
opts = append(opts, libpod.WithSdNotifyMode(define.SdNotifyModeIgnore))
ctr, err := generate.ExecuteCreate(ctx, ic.Libpod, rtSpec, spec, false, opts...)
if err != nil {
return nil, err
Expand Down Expand Up @@ -570,6 +588,7 @@ func (ic *ContainerEngine) playKubePod(ctx context.Context, podName string, podY
if err != nil {
return nil, err
}
opts = append(opts, libpod.WithSdNotifyMode(define.SdNotifyModeIgnore))
ctr, err := generate.ExecuteCreate(ctx, ic.Libpod, rtSpec, spec, false, opts...)
if err != nil {
return nil, err
Expand Down Expand Up @@ -942,5 +961,6 @@ func (ic *ContainerEngine) PlayKubeDown(ctx context.Context, body io.Reader, _ e
if err != nil {
return nil, err
}

return reports, nil
}
2 changes: 2 additions & 0 deletions podman.spec.rpkg
Original file line number Diff line number Diff line change
Expand Up @@ -242,11 +242,13 @@ done
%{_unitdir}/%{name}.service
%{_unitdir}/%{name}.socket
%{_unitdir}/%{name}-restart.service
%{_unitdir}/%{name}[email protected]
%{_userunitdir}/%{name}-auto-update.service
%{_userunitdir}/%{name}-auto-update.timer
%{_userunitdir}/%{name}.service
%{_userunitdir}/%{name}.socket
%{_userunitdir}/%{name}-restart.service
%{_userunitdir}/%{name}[email protected]
%{_tmpfilesdir}/%{name}.conf
%if 0%{?fedora} >= 36
%{_modulesloaddir}/%{name}-iptables.conf
Expand Down
76 changes: 76 additions & 0 deletions test/system/250-systemd.bats
Original file line number Diff line number Diff line change
Expand Up @@ -292,4 +292,80 @@ LISTEN_FDNAMES=listen_fdnames" | sort)
run_podman network rm -f $netname
}

@test "[email protected] template" {
skip_if_remote "systemd units do not work with remote clients"

# If running from a podman source directory, build and use the source
# version of the play-kube-@ unit file
unit_name="[email protected]"
unit_file="contrib/systemd/system/${unit_name}"
if [[ -e ${unit_file}.in ]]; then
echo "# [Building & using $unit_name from source]" >&3
BINDIR=$(dirname $PODMAN) make $unit_file
cp $unit_file $UNIT_DIR/$unit_name
fi

# Create the YAMl file
yaml_source="$PODMAN_TMPDIR/test.yaml"
cat >$yaml_source <<EOF
apiVersion: v1
kind: Pod
metadata:
labels:
app: test
name: test_pod
spec:
containers:
- command:
- top
image: $IMAGE
name: test
resources: {}
EOF

# Dispatch the YAML file
service_name="podman-play-kube@$(systemd-escape $yaml_source).service"
systemctl start $service_name
systemctl is-active $service_name

# The name of the service container is predictable: the first 12 characters
# of the hash of the YAML file followed by the "-service" suffix
yaml_sha=$(sha256sum $yaml_source)
service_container="${yaml_sha:0:12}-service"

# Make sure that the service container exists and runs.
run_podman container inspect $service_container --format "{{.State.Running}}"
is "$output" "true"

# Check for an error when trying to remove the service container
run_podman 125 container rm $service_container
is "$output" "Error: container .* is the service container of pod(s) .* and cannot be removed without removing the pod(s)"

# Kill the pod and make sure the service is not running.
# The restart policy is set to "never" since there is no
# design yet for propagating exit codes up to the service
# container.
run_podman pod kill test_pod
for i in {0..5}; do
run systemctl is-failed $service_name
if [[ $output == "failed" ]]; then
break
fi
sleep 0.5
done
is "$output" "failed" "systemd service transitioned to 'failed' state"

# Now stop and start the service again.
systemctl stop $service_name
systemctl start $service_name
systemctl is-active $service_name
run_podman container inspect $service_container --format "{{.State.Running}}"
is "$output" "true"

# Clean up
systemctl stop $service_name
run_podman 1 container exists $service_container
run_podman 1 pod exists test_pod
}

# vim: filetype=sh
Loading