Skip to content

Commit

Permalink
Reap exec sessions on cleanup and removal
Browse files Browse the repository at this point in the history
We currently rely on exec sessions being removed from the state
by the Exec() API itself, on detecting the session stopping. This
is not a reliable method, though. The Podman frontend for exec
could be killed before the session ended, or another Podman
process could be holding the lock and prevent update (most
notable in `run --rm`, when a container with an active exec
session is stopped).

To resolve this, add a function to reap active exec sessions from
the state, and use it on cleanup (to clear sessions after the
container stops) and remove (to do the same when --rm is passed).
This is a bit more complicated than it ought to be because Kata
and company exist, and we can't guarantee the exec session has a
PID on the host, so we have to plumb this through to the OCI
runtime.

Fixes containers#4666

Signed-off-by: Matthew Heon <[email protected]>
  • Loading branch information
mheon authored and rhatdan committed Dec 15, 2020
1 parent dd2b7bf commit 2cf4204
Show file tree
Hide file tree
Showing 5 changed files with 76 additions and 4 deletions.
7 changes: 6 additions & 1 deletion libpod/container_api.go
Original file line number Diff line number Diff line change
Expand Up @@ -578,7 +578,12 @@ func (c *Container) Cleanup(ctx context.Context) error {

// If we didn't restart, we perform a normal cleanup

// Check if we have active exec sessions
// Reap exec sessions first.
if err := c.reapExecSessions(); err != nil {
return err
}

// Check if we have active exec sessions after reaping.
if len(c.state.ExecSessions) != 0 {
return errors.Wrapf(define.ErrCtrStateInvalid, "container %s has active exec sessions, refusing to clean up", c.ID())
}
Expand Down
40 changes: 40 additions & 0 deletions libpod/container_internal.go
Original file line number Diff line number Diff line change
Expand Up @@ -1755,6 +1755,11 @@ func (c *Container) checkReadyForRemoval() error {
return errors.Wrapf(define.ErrCtrStateInvalid, "cannot remove container %s as it is %s - running or paused containers cannot be removed without force", c.ID(), c.state.State.String())
}

// Reap exec sessions
if err := c.reapExecSessions(); err != nil {
return err
}

if len(c.state.ExecSessions) != 0 {
return errors.Wrapf(define.ErrCtrStateInvalid, "cannot remove container %s as it has active exec sessions", c.ID())
}
Expand Down Expand Up @@ -1861,3 +1866,38 @@ func (c *Container) checkExitFile() error {
// Read the exit file to get our stopped time and exit code.
return c.handleExitFile(exitFile, info)
}

// Reap dead exec sessions
func (c *Container) reapExecSessions() error {
// Instead of saving once per iteration, use a defer to do it once at
// the end.
var lastErr error
needSave := false
for id := range c.state.ExecSessions {
alive, err := c.ociRuntime.ExecUpdateStatus(c, id)
if err != nil {
if lastErr != nil {
logrus.Errorf("Error reaping exec sessions for container %s: %v", c.ID(), lastErr)
}
lastErr = err
continue
}
if !alive {
// Clean up lingering files and remove the exec session
if err := c.ociRuntime.ExecContainerCleanup(c, id); err != nil {
return errors.Wrapf(err, "error cleaning up container %s exec session %s files", c.ID(), id)
}
delete(c.state.ExecSessions, id)
needSave = true
}
}
if needSave {
if err := c.save(); err != nil {
if lastErr != nil {
logrus.Errorf("Error reaping exec sessions for container %s: %v", c.ID(), lastErr)
}
lastErr = err
}
}
return lastErr
}
6 changes: 3 additions & 3 deletions libpod/oci.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,6 @@ type OCIRuntime interface {
// CreateContainer creates the container in the OCI runtime.
CreateContainer(ctr *Container, restoreOptions *ContainerCheckpointOptions) error
// UpdateContainerStatus updates the status of the given container.
// It includes a switch for whether to perform a hard query of the
// runtime. If unset, the exit file (if supported by the implementation)
// will be used.
UpdateContainerStatus(ctr *Container) error
// StartContainer starts the given container.
StartContainer(ctr *Container) error
Expand Down Expand Up @@ -59,6 +56,9 @@ type OCIRuntime interface {
// If timeout is 0, SIGKILL will be sent immediately, and SIGTERM will
// be omitted.
ExecStopContainer(ctr *Container, sessionID string, timeout uint) error
// ExecUpdateStatus checks the status of a given exec session.
// Returns true if the session is still running, or false if it exited.
ExecUpdateStatus(ctr *Container, sessionID string) (bool, error)
// ExecContainerCleanup cleans up after an exec session exits.
// It removes any files left by the exec session that are no longer
// needed, including the attach socket.
Expand Down
22 changes: 22 additions & 0 deletions libpod/oci_conmon_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -687,6 +687,28 @@ func (r *ConmonOCIRuntime) ExecStopContainer(ctr *Container, sessionID string, t
return nil
}

// ExecUpdateStatus checks if the given exec session is still running.
func (r *ConmonOCIRuntime) ExecUpdateStatus(ctr *Container, sessionID string) (bool, error) {
session, ok := ctr.state.ExecSessions[sessionID]
if !ok {
// TODO This should probably be a separate error
return false, errors.Wrapf(define.ErrInvalidArg, "no exec session with ID %s found in container %s", sessionID, ctr.ID())
}

logrus.Debugf("Checking status of container %s exec session %s", ctr.ID(), sessionID)

// Is the session dead?
// Ping the PID with signal 0 to see if it still exists.
if err := unix.Kill(session.PID, 0); err != nil {
if err == unix.ESRCH {
return false, nil
}
return false, errors.Wrapf(err, "error pinging container %s exec session %s PID %d with signal 0", ctr.ID(), sessionID, session.PID)
}

return true, nil
}

// ExecCleanupContainer cleans up files created when a command is run via
// ExecContainer. This includes the attach socket for the exec session.
func (r *ConmonOCIRuntime) ExecContainerCleanup(ctr *Container, sessionID string) error {
Expand Down
5 changes: 5 additions & 0 deletions libpod/oci_missing.go
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,11 @@ func (r *MissingRuntime) ExecStopContainer(ctr *Container, sessionID string, tim
return r.printError()
}

// ExecUpdateStatus is not available as the runtime is missing.
func (r *MissingRuntime) ExecUpdateStatus(ctr *Container, sessionID string) (bool, error) {
return false, r.printError()
}

// ExecContainerCleanup is not available as the runtime is missing
func (r *MissingRuntime) ExecContainerCleanup(ctr *Container, sessionID string) error {
return r.printError()
Expand Down

0 comments on commit 2cf4204

Please sign in to comment.