Skip to content

Commit

Permalink
Merge pull request #140 from arangodb/feature/tolerations
Browse files Browse the repository at this point in the history
Added `tolerations` field to configure tolerations of generated pods.
  • Loading branch information
ewoutp authored May 15, 2018
2 parents 2b802f2 + 7321e65 commit 87dd61e
Show file tree
Hide file tree
Showing 7 changed files with 151 additions and 8 deletions.
12 changes: 12 additions & 0 deletions docs/Manual/Deployment/Kubernetes/DeploymentResource.md
Original file line number Diff line number Diff line change
Expand Up @@ -353,3 +353,15 @@ for each server of this group.

This setting is not available for group `coordinators`, `syncmasters` & `syncworkers`
because servers in these groups do not need persistent storage.

### `spec.<group>.tolerations: [Toleration]`

This setting specifies the `tolerations` for the `Pod`s created
for each server of this group.

By default, suitable tolerations are set for the following keys with the `NoExecute` effect:

- `node.kubernetes.io/not-ready`
- `node.alpha.kubernetes.io/unreachable`

For more information on tolerations, consult the [Kubernetes documentation](https://kubernetes.io/docs/concepts/configuration/taint-and-toleration/).
10 changes: 10 additions & 0 deletions pkg/apis/deployment/v1alpha/server_group_spec.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ type ServerGroupSpec struct {
StorageClassName *string `json:"storageClassName,omitempty"`
// Resources holds resource requests & limits
Resources v1.ResourceRequirements `json:"resources,omitempty"`
// Tolerations specifies the tolerations added to Pods in this group.
Tolerations []v1.Toleration `json:"tolerations,omitempty"`
}

// GetCount returns the value of count.
Expand All @@ -57,6 +59,11 @@ func (s ServerGroupSpec) GetStorageClassName() string {
return util.StringOrDefault(s.StorageClassName)
}

// GetTolerations returns the value of tolerations.
func (s ServerGroupSpec) GetTolerations() []v1.Toleration {
return s.Tolerations
}

// Validate the given group spec
func (s ServerGroupSpec) Validate(group ServerGroup, used bool, mode DeploymentMode, env Environment) error {
if used {
Expand Down Expand Up @@ -133,6 +140,9 @@ func (s *ServerGroupSpec) SetDefaultsFrom(source ServerGroupSpec) {
if s.StorageClassName == nil {
s.StorageClassName = util.NewStringOrNil(source.StorageClassName)
}
if s.Tolerations == nil {
s.Tolerations = source.Tolerations
}
setDefaultsFromResourceList(&s.Resources.Limits, source.Resources.Limits)
setDefaultsFromResourceList(&s.Resources.Requests, source.Resources.Requests)
}
Expand Down
7 changes: 7 additions & 0 deletions pkg/apis/deployment/v1alpha/zz_generated.deepcopy.go
Original file line number Diff line number Diff line change
Expand Up @@ -590,6 +590,13 @@ func (in *ServerGroupSpec) DeepCopyInto(out *ServerGroupSpec) {
}
}
in.Resources.DeepCopyInto(&out.Resources)
if in.Tolerations != nil {
in, out := &in.Tolerations, &out.Tolerations
*out = make([]core_v1.Toleration, len(*in))
for i := range *in {
(*in)[i].DeepCopyInto(&(*out)[i])
}
}
return
}

Expand Down
9 changes: 8 additions & 1 deletion pkg/deployment/images.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ import (
"time"

"github.com/rs/zerolog"
"k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/kubernetes"

Expand Down Expand Up @@ -168,7 +169,13 @@ func (ib *imagesBuilder) fetchArangoDBImageIDAndVersion(ctx context.Context, ima
fmt.Sprintf("--server.endpoint=tcp://[::]:%d", k8sutil.ArangoPort),
}
terminationGracePeriod := time.Second * 30
if err := k8sutil.CreateArangodPod(ib.KubeCli, true, ib.APIObject, role, id, podName, "", image, "", ib.Spec.GetImagePullPolicy(), "", false, terminationGracePeriod, args, nil, nil, nil, nil, "", ""); err != nil {
tolerations := make([]v1.Toleration, 0, 2)
shortDur := k8sutil.TolerationDuration{Forever: false, TimeSpan: time.Second * 5}
tolerations = k8sutil.AddTolerationIfNotFound(tolerations, k8sutil.NewNoExecuteToleration(k8sutil.TolerationKeyNodeNotReady, shortDur))
tolerations = k8sutil.AddTolerationIfNotFound(tolerations, k8sutil.NewNoExecuteToleration(k8sutil.TolerationKeyNodeUnreachable, shortDur))
tolerations = k8sutil.AddTolerationIfNotFound(tolerations, k8sutil.NewNoExecuteToleration(k8sutil.TolerationKeyNodeAlphaUnreachable, shortDur))

if err := k8sutil.CreateArangodPod(ib.KubeCli, true, ib.APIObject, role, id, podName, "", image, "", ib.Spec.GetImagePullPolicy(), "", false, terminationGracePeriod, args, nil, nil, nil, nil, tolerations, "", ""); err != nil {
log.Debug().Err(err).Msg("Failed to create image ID pod")
return true, maskAny(err)
}
Expand Down
43 changes: 41 additions & 2 deletions pkg/deployment/resources/pod_creator.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,12 +32,14 @@ import (
"sort"
"strconv"
"strings"
"time"

"github.com/arangodb/go-driver/jwt"
api "github.com/arangodb/kube-arangodb/pkg/apis/deployment/v1alpha"
"github.com/arangodb/kube-arangodb/pkg/util/constants"
"github.com/arangodb/kube-arangodb/pkg/util/k8sutil"
"github.com/pkg/errors"
"k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)

Expand Down Expand Up @@ -389,6 +391,42 @@ func (r *Resources) createPodFinalizers(group api.ServerGroup) []string {
}
}

// createPodTolerations creates a list of tolerations for a pod created for the given group.
func (r *Resources) createPodTolerations(group api.ServerGroup, groupSpec api.ServerGroupSpec) []v1.Toleration {
notReadyDur := k8sutil.TolerationDuration{Forever: false, TimeSpan: time.Minute}
unreachableDur := k8sutil.TolerationDuration{Forever: false, TimeSpan: time.Minute}
switch group {
case api.ServerGroupAgents:
notReadyDur.Forever = true
unreachableDur.Forever = true
case api.ServerGroupCoordinators:
notReadyDur.TimeSpan = 15 * time.Second
unreachableDur.TimeSpan = 15 * time.Second
case api.ServerGroupDBServers:
notReadyDur.TimeSpan = 5 * time.Minute
unreachableDur.TimeSpan = 5 * time.Minute
case api.ServerGroupSingle:
if r.context.GetSpec().GetMode() == api.DeploymentModeSingle {
notReadyDur.Forever = true
unreachableDur.Forever = true
} else {
notReadyDur.TimeSpan = 5 * time.Minute
unreachableDur.TimeSpan = 5 * time.Minute
}
case api.ServerGroupSyncMasters:
notReadyDur.TimeSpan = 15 * time.Second
unreachableDur.TimeSpan = 15 * time.Second
case api.ServerGroupSyncWorkers:
notReadyDur.TimeSpan = 1 * time.Minute
unreachableDur.TimeSpan = 1 * time.Minute
}
tolerations := groupSpec.GetTolerations()
tolerations = k8sutil.AddTolerationIfNotFound(tolerations, k8sutil.NewNoExecuteToleration(k8sutil.TolerationKeyNodeNotReady, notReadyDur))
tolerations = k8sutil.AddTolerationIfNotFound(tolerations, k8sutil.NewNoExecuteToleration(k8sutil.TolerationKeyNodeUnreachable, unreachableDur))
tolerations = k8sutil.AddTolerationIfNotFound(tolerations, k8sutil.NewNoExecuteToleration(k8sutil.TolerationKeyNodeAlphaUnreachable, unreachableDur))
return tolerations
}

// createPodForMember creates all Pods listed in member status
func (r *Resources) createPodForMember(spec api.DeploymentSpec, group api.ServerGroup,
groupSpec api.ServerGroupSpec, m api.MemberStatus, memberStatusList *api.MemberStatusList) error {
Expand All @@ -399,6 +437,7 @@ func (r *Resources) createPodForMember(spec api.DeploymentSpec, group api.Server
status := r.context.GetStatus()
lifecycleImage := r.context.GetLifecycleImage()
terminationGracePeriod := group.DefaultTerminationGracePeriod()
tolerations := r.createPodTolerations(group, groupSpec)

// Update pod name
role := group.AsRole()
Expand Down Expand Up @@ -461,7 +500,7 @@ func (r *Resources) createPodForMember(spec api.DeploymentSpec, group api.Server
requireUUID := group == api.ServerGroupDBServers && m.IsInitialized
finalizers := r.createPodFinalizers(group)
if err := k8sutil.CreateArangodPod(kubecli, spec.IsDevelopment(), apiObject, role, m.ID, m.PodName, m.PersistentVolumeClaimName, info.ImageID, lifecycleImage, spec.GetImagePullPolicy(),
engine, requireUUID, terminationGracePeriod, args, env, finalizers, livenessProbe, readinessProbe, tlsKeyfileSecretName, rocksdbEncryptionSecretName); err != nil {
engine, requireUUID, terminationGracePeriod, args, env, finalizers, livenessProbe, readinessProbe, tolerations, tlsKeyfileSecretName, rocksdbEncryptionSecretName); err != nil {
return maskAny(err)
}
log.Debug().Str("pod-name", m.PodName).Msg("Created pod")
Expand Down Expand Up @@ -532,7 +571,7 @@ func (r *Resources) createPodForMember(spec api.DeploymentSpec, group api.Server
affinityWithRole = api.ServerGroupDBServers.AsRole()
}
if err := k8sutil.CreateArangoSyncPod(kubecli, spec.IsDevelopment(), apiObject, role, m.ID, m.PodName, info.ImageID, lifecycleImage, spec.Sync.GetImagePullPolicy(), terminationGracePeriod, args, env,
livenessProbe, tlsKeyfileSecretName, clientAuthCASecretName, masterJWTSecretName, clusterJWTSecretName, affinityWithRole); err != nil {
livenessProbe, tolerations, tlsKeyfileSecretName, clientAuthCASecretName, masterJWTSecretName, clusterJWTSecretName, affinityWithRole); err != nil {
return maskAny(err)
}
log.Debug().Str("pod-name", m.PodName).Msg("Created pod")
Expand Down
11 changes: 6 additions & 5 deletions pkg/util/k8sutil/pods.go
Original file line number Diff line number Diff line change
Expand Up @@ -384,7 +384,7 @@ func initLifecycleContainer(image string) (v1.Container, error) {
}

// newPod creates a basic Pod for given settings.
func newPod(deploymentName, ns, role, id, podName string, finalizers []string) v1.Pod {
func newPod(deploymentName, ns, role, id, podName string, finalizers []string, tolerations []v1.Toleration) v1.Pod {
hostname := CreatePodHostName(deploymentName, role, id)
p := v1.Pod{
ObjectMeta: metav1.ObjectMeta{
Expand All @@ -396,6 +396,7 @@ func newPod(deploymentName, ns, role, id, podName string, finalizers []string) v
Hostname: hostname,
Subdomain: CreateHeadlessServiceName(deploymentName),
RestartPolicy: v1.RestartPolicyNever,
Tolerations: tolerations,
},
}
return p
Expand All @@ -408,10 +409,10 @@ func CreateArangodPod(kubecli kubernetes.Interface, developmentMode bool, deploy
role, id, podName, pvcName, image, lifecycleImage string, imagePullPolicy v1.PullPolicy,
engine string, requireUUID bool, terminationGracePeriod time.Duration,
args []string, env map[string]EnvValue, finalizers []string,
livenessProbe *HTTPProbeConfig, readinessProbe *HTTPProbeConfig,
livenessProbe *HTTPProbeConfig, readinessProbe *HTTPProbeConfig, tolerations []v1.Toleration,
tlsKeyfileSecretName, rocksdbEncryptionSecretName string) error {
// Prepare basic pod
p := newPod(deployment.GetName(), deployment.GetNamespace(), role, id, podName, finalizers)
p := newPod(deployment.GetName(), deployment.GetNamespace(), role, id, podName, finalizers, tolerations)
terminationGracePeriodSeconds := int64(math.Ceil(terminationGracePeriod.Seconds()))
p.Spec.TerminationGracePeriodSeconds = &terminationGracePeriodSeconds

Expand Down Expand Up @@ -509,10 +510,10 @@ func CreateArangodPod(kubecli kubernetes.Interface, developmentMode bool, deploy
// If the pod already exists, nil is returned.
// If another error occurs, that error is returned.
func CreateArangoSyncPod(kubecli kubernetes.Interface, developmentMode bool, deployment APIObject, role, id, podName, image, lifecycleImage string, imagePullPolicy v1.PullPolicy,
terminationGracePeriod time.Duration, args []string, env map[string]EnvValue, livenessProbe *HTTPProbeConfig,
terminationGracePeriod time.Duration, args []string, env map[string]EnvValue, livenessProbe *HTTPProbeConfig, tolerations []v1.Toleration,
tlsKeyfileSecretName, clientAuthCASecretName, masterJWTSecretName, clusterJWTSecretName, affinityWithRole string) error {
// Prepare basic pod
p := newPod(deployment.GetName(), deployment.GetNamespace(), role, id, podName, nil)
p := newPod(deployment.GetName(), deployment.GetNamespace(), role, id, podName, nil, tolerations)
terminationGracePeriodSeconds := int64(math.Ceil(terminationGracePeriod.Seconds()))
p.Spec.TerminationGracePeriodSeconds = &terminationGracePeriodSeconds

Expand Down
67 changes: 67 additions & 0 deletions pkg/util/k8sutil/tolerations.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
//
// DISCLAIMER
//
// Copyright 2018 ArangoDB GmbH, Cologne, Germany
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Copyright holder is ArangoDB GmbH, Cologne, Germany
//
// Author Ewout Prangsma
//

package k8sutil

import (
"time"

"k8s.io/api/core/v1"
)

const (
TolerationKeyNodeNotReady = "node.kubernetes.io/not-ready"
TolerationKeyNodeAlphaUnreachable = "node.alpha.kubernetes.io/unreachable"
TolerationKeyNodeUnreachable = "node.kubernetes.io/unreachable"
)

// TolerationDuration is a duration spec for tolerations.
type TolerationDuration struct {
Forever bool
TimeSpan time.Duration
}

// NewNoExecuteToleration is a helper to create a Toleration with
// Key=key, Operator='Exists' Effect='NoExecute', TolerationSeconds=tolerationDuration.Seconds().
func NewNoExecuteToleration(key string, duration TolerationDuration) v1.Toleration {
t := v1.Toleration{
Key: key,
Operator: "Exists",
Effect: "NoExecute",
}
if !duration.Forever {
tolerationSeconds := int64(duration.TimeSpan.Seconds())
t.TolerationSeconds = &tolerationSeconds
}
return t
}

// AddTolerationIfNotFound adds the given tolerations, if no such toleration has been set in the given source.
func AddTolerationIfNotFound(source []v1.Toleration, toAdd v1.Toleration) []v1.Toleration {
for _, t := range source {
if (t.Key == toAdd.Key || len(t.Key) == 0) && (t.Effect == toAdd.Effect || len(t.Effect) == 0) {
// Toleration alread exists, do not add
return source
}
}
return append(source, toAdd)
}

0 comments on commit 87dd61e

Please sign in to comment.