Skip to content

Commit

Permalink
Merge pull request #151 from arangodb/bugfix/cleanup-long-terminating…
Browse files Browse the repository at this point in the history
…-stateless-pods

Cleanup stateless pods that are in terminating state for a long time
  • Loading branch information
ewoutp authored Jun 6, 2018
2 parents efb5f6d + 9a12a97 commit 095dc19
Show file tree
Hide file tree
Showing 3 changed files with 40 additions and 9 deletions.
10 changes: 10 additions & 0 deletions pkg/apis/deployment/v1alpha/server_group.go
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,16 @@ func (g ServerGroup) DefaultTerminationGracePeriod() time.Duration {
}
}

// IsStateless returns true when the groups runs servers without a persistent volume.
func (g ServerGroup) IsStateless() bool {
switch g {
case ServerGroupCoordinators, ServerGroupSyncMasters, ServerGroupSyncWorkers:
return true
default:
return false
}
}

// IsArangod returns true when the groups runs servers of type `arangod`.
func (g ServerGroup) IsArangod() bool {
switch g {
Expand Down
33 changes: 24 additions & 9 deletions pkg/deployment/resources/pod_cleanup.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,17 @@
package resources

import (
"time"

"github.com/arangodb/kube-arangodb/pkg/util/k8sutil"

api "github.com/arangodb/kube-arangodb/pkg/apis/deployment/v1alpha"
)

const (
statelessTerminationPeriod = time.Minute // We wait this long for a stateless server to terminate on it's own. Afterwards we kill it.
)

// CleanupTerminatedPods removes all pods in Terminated state that belong to a member in Created state.
func (r *Resources) CleanupTerminatedPods() error {
log := r.log
Expand All @@ -47,20 +53,29 @@ func (r *Resources) CleanupTerminatedPods() error {
}

// Check pod state
if !(k8sutil.IsPodSucceeded(&p) || k8sutil.IsPodFailed(&p)) {
if !(k8sutil.IsPodSucceeded(&p) || k8sutil.IsPodFailed(&p) || k8sutil.IsPodTerminating(&p)) {
continue
}

// Find member status
memberStatus, _, found := status.Members.MemberStatusByPodName(p.GetName())
memberStatus, group, found := status.Members.MemberStatusByPodName(p.GetName())
if !found {
log.Debug().Str("pod", p.GetName()).Msg("no memberstatus found for pod")
continue
}

// Check member termination condition
if !memberStatus.Conditions.IsTrue(api.ConditionTypeTerminated) {
continue
log.Debug().Str("pod", p.GetName()).Msg("no memberstatus found for pod. Performing cleanup")
} else {
// Check member termination condition
if !memberStatus.Conditions.IsTrue(api.ConditionTypeTerminated) {
if !group.IsStateless() {
// For statefull members, we have to wait for confirmed termination
continue
} else {
// If a stateless server does not terminate within a reasonable amount or time, we kill it.
t := p.GetDeletionTimestamp()
if t == nil || t.Add(statelessTerminationPeriod).After(time.Now()) {
// Either delete timestamp is not set, or not yet waiting long enough
continue
}
}
}
}

// Ok, we can delete the pod
Expand Down
6 changes: 6 additions & 0 deletions pkg/util/k8sutil/pods.go
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,12 @@ func IsPodMarkedForDeletion(pod *v1.Pod) bool {
return pod.DeletionTimestamp != nil
}

// IsPodTerminating returns true if the pod has been marked for deletion
// but is still running.
func IsPodTerminating(pod *v1.Pod) bool {
return IsPodMarkedForDeletion(pod) && pod.Status.Phase == v1.PodRunning
}

// IsArangoDBImageIDAndVersionPod returns true if the given pod is used for fetching image ID and ArangoDB version of an image
func IsArangoDBImageIDAndVersionPod(p v1.Pod) bool {
role, found := p.GetLabels()[LabelKeyRole]
Expand Down

0 comments on commit 095dc19

Please sign in to comment.