Skip to content

Commit

Permalink
Eviction controller
Browse files Browse the repository at this point in the history
  • Loading branch information
Arvindthiru committed Nov 13, 2024
1 parent cb9a7a0 commit e910804
Show file tree
Hide file tree
Showing 7 changed files with 1,868 additions and 0 deletions.
13 changes: 13 additions & 0 deletions apis/placement/v1alpha1/eviction_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ Licensed under the MIT license.
package v1alpha1

import (
"k8s.io/apimachinery/pkg/api/meta"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)

Expand Down Expand Up @@ -118,6 +119,18 @@ type ClusterResourcePlacementEvictionList struct {
Items []ClusterResourcePlacementEviction `json:"items"`
}

// SetConditions set the given conditions on the ClusterResourcePlacementEviction.
func (e *ClusterResourcePlacementEviction) SetConditions(conditions ...metav1.Condition) {
for _, c := range conditions {
meta.SetStatusCondition(&e.Status.Conditions, c)
}
}

// GetCondition returns the condition of the given ClusterResourcePlacementEviction.
func (e *ClusterResourcePlacementEviction) GetCondition(conditionType string) *metav1.Condition {
return meta.FindStatusCondition(e.Status.Conditions, conditionType)
}

func init() {
SchemeBuilder.Register(
&ClusterResourcePlacementEviction{},
Expand Down
243 changes: 243 additions & 0 deletions pkg/controllers/clusterresourceplacementeviction/controller.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,243 @@
/*
Copyright (c) Microsoft Corporation.
Licensed under the MIT license.
*/

package clusterresourceplacementeviction

import (
"context"
"fmt"
"time"

"k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/intstr"
"k8s.io/klog/v2"
runtime "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
ctrl "sigs.k8s.io/controller-runtime/pkg/controller"

placementv1alpha1 "go.goms.io/fleet/apis/placement/v1alpha1"
placementv1beta1 "go.goms.io/fleet/apis/placement/v1beta1"
"go.goms.io/fleet/pkg/utils/condition"
"go.goms.io/fleet/pkg/utils/controller"
)

const (
reasonClusterResourcePlacementEvictionValid = "ClusterResourcePlacementEvictionValid"
reasonClusterResourcePlacementEvictionInvalid = "ClusterResourcePlacementEvictionInvalid"
reasonClusterResourcePlacementEvictionExecuted = "ClusterResourcePlacementEvictionExecuted"
reasonClusterResourcePlacementEvictionNotExecuted = "ClusterResourcePlacementEvictionNotExecuted"

evictionInvalidMissingCRP = "Failed to find cluster resource placement targeted by eviction"
evictionInvalidMissingCRB = "Failed to find cluster resource binding for cluster targeted by eviction"
evictionValid = "Eviction is valid"
evictionAllowedNoPDB = "Eviction Allowed, no ClusterResourcePlacementDisruptionBudget specified"

evictionAllowedPDBSpecified = "Eviction is allowed by specified ClusterResourcePlacementDisruptionBudget, disruptionsAllowed: %d, availableBindings: %d, desiredBindings: %d, totalBindings: %d"
evictionBlockedPDBSpecified = "Eviction is blocked by specified ClusterResourcePlacementDisruptionBudget, disruptionsAllowed: %d, availableBindings: %d, desiredBindings: %d, totalBindings: %d"
)

// Reconciler reconciles a ClusterResourcePlacementEviction object.
type Reconciler struct {
client.Client
}

// Reconcile triggers a single eviction reconcile round.
func (r *Reconciler) Reconcile(ctx context.Context, req runtime.Request) (runtime.Result, error) {
startTime := time.Now()
evictionName := req.NamespacedName.Name
klog.V(2).InfoS("ClusterResourcePlacementEviction reconciliation starts", "clusterResourcePlacementEviction", evictionName)
defer func() {
latency := time.Since(startTime).Milliseconds()
klog.V(2).InfoS("ClusterResourcePlacementEviction reconciliation ends", "clusterResourcePlacementEviction", evictionName, "latency", latency)
}()

var eviction placementv1alpha1.ClusterResourcePlacementEviction
if err := r.Client.Get(ctx, req.NamespacedName, &eviction); err != nil {
klog.ErrorS(err, "Failed to get cluster resource placement eviction", "clusterResourcePlacementEviction", evictionName)
return runtime.Result{}, client.IgnoreNotFound(err)
}

validCondition := eviction.GetCondition(string(placementv1alpha1.PlacementEvictionConditionTypeValid))
if condition.IsConditionStatusFalse(validCondition, eviction.GetGeneration()) {
klog.V(2).InfoS("Invalid eviction, no need to reconcile", "clusterResourcePlacementEviction", evictionName)
return runtime.Result{}, nil
}

executedCondition := eviction.GetCondition(string(placementv1alpha1.PlacementEvictionConditionTypeExecuted))
if executedCondition != nil {
klog.V(2).InfoS("Eviction has executed condition specified, no need to reconcile", "clusterResourcePlacementEviction", evictionName)
return runtime.Result{}, nil
}

isCRPPresent := true
var crp placementv1beta1.ClusterResourcePlacement
if err := r.Client.Get(ctx, types.NamespacedName{Name: eviction.Spec.PlacementName}, &crp); err != nil {
if !errors.IsNotFound(err) {
return runtime.Result{}, err
}
isCRPPresent = false
}
if !isCRPPresent {
klog.V(2).InfoS(evictionInvalidMissingCRP, "clusterResourcePlacementEviction", evictionName, "clusterResourcePlacement", eviction.Spec.PlacementName)
markEvictionInvalid(&eviction, evictionInvalidMissingCRP)
return runtime.Result{}, r.updateEvictionStatus(ctx, &eviction)
}

var crbList placementv1beta1.ClusterResourceBindingList
if err := r.Client.List(ctx, &crbList, client.MatchingLabels{placementv1beta1.CRPTrackingLabel: crp.Name}); err != nil {
return runtime.Result{}, err
}

var evictionTargetBinding *placementv1beta1.ClusterResourceBinding
for i := range crbList.Items {
if crbList.Items[i].Spec.TargetCluster == eviction.Spec.ClusterName {
evictionTargetBinding = &crbList.Items[i]
}
}
if evictionTargetBinding == nil {
klog.V(2).InfoS(evictionInvalidMissingCRB, "clusterResourcePlacementEviction", evictionName, "clusterName", eviction.Spec.ClusterName)
markEvictionInvalid(&eviction, evictionInvalidMissingCRB)
return runtime.Result{}, r.updateEvictionStatus(ctx, &eviction)
}

markEvictionValid(&eviction)
isDBPresent := true
var db placementv1alpha1.ClusterResourcePlacementDisruptionBudget
if err := r.Client.Get(ctx, types.NamespacedName{Name: crp.Name}, &db); err != nil {
if !errors.IsNotFound(err) {
return runtime.Result{}, err
}
isDBPresent = false
}

if !isDBPresent {
if err := r.deleteClusterResourceBinding(ctx, evictionTargetBinding); err != nil {
return runtime.Result{}, err
}
markEvictionExecuted(&eviction, evictionAllowedNoPDB)
return runtime.Result{}, r.updateEvictionStatus(ctx, &eviction)
}

var desiredBindings int
switch crp.Spec.Policy.PlacementType {
case placementv1beta1.PickAllPlacementType:
desiredBindings = len(crbList.Items)
case placementv1beta1.PickNPlacementType:
desiredBindings = int(*crp.Spec.Policy.NumberOfClusters)
case placementv1beta1.PickFixedPlacementType:
desiredBindings = len(crp.Spec.Policy.ClusterNames)
}

totalBindings := len(crbList.Items)
allowed, disruptionsAllowed, availableBindings := isEvictionAllowed(desiredBindings, crbList.Items, db)
if allowed {
if err := r.deleteClusterResourceBinding(ctx, evictionTargetBinding); err != nil {
return runtime.Result{}, err
}
markEvictionExecuted(&eviction, fmt.Sprintf(evictionAllowedPDBSpecified, disruptionsAllowed, availableBindings, desiredBindings, totalBindings))
} else {
markEvictionNotExecuted(&eviction, fmt.Sprintf(evictionBlockedPDBSpecified, disruptionsAllowed, availableBindings, desiredBindings, totalBindings))
}

return runtime.Result{}, r.updateEvictionStatus(ctx, &eviction)
}

func (r *Reconciler) updateEvictionStatus(ctx context.Context, eviction *placementv1alpha1.ClusterResourcePlacementEviction) error {
evictionRef := klog.KObj(eviction)
if err := r.Client.Status().Update(ctx, eviction); err != nil {
klog.ErrorS(err, "Failed to update eviction status", "clusterResourcePlacementEviction", evictionRef)
return controller.NewUpdateIgnoreConflictError(err)
}
klog.V(2).InfoS("Updated the status of a eviction", "clusterResourcePlacementEviction", evictionRef)
return nil
}

func (r *Reconciler) deleteClusterResourceBinding(ctx context.Context, binding *placementv1beta1.ClusterResourceBinding) error {
bindingRef := klog.KObj(binding)
if err := r.Client.Delete(ctx, binding); err != nil {
klog.ErrorS(err, "Failed to delete cluster resource binding", "clusterResourceBinding", bindingRef)
return controller.NewDeleteIgnoreNotFoundError(err)
}
klog.V(2).InfoS("Issued delete on cluster resource binding, eviction succeeded", "clusterResourceBinding", bindingRef)
return nil
}

func isEvictionAllowed(desiredBindings int, bindings []placementv1beta1.ClusterResourceBinding, db placementv1alpha1.ClusterResourcePlacementDisruptionBudget) (bool, int, int) {
availableBindings := 0
for i := range bindings {
availableCondition := bindings[i].GetCondition(string(placementv1beta1.ResourceBindingAvailable))
if condition.IsConditionStatusTrue(availableCondition, bindings[i].GetGeneration()) {
availableBindings++
}
}
var disruptionsAllowed int
if db.Spec.MaxUnavailable != nil {
maxUnavailable, _ := intstr.GetScaledValueFromIntOrPercent(db.Spec.MaxUnavailable, desiredBindings, true)
unavailableBindings := len(bindings) - availableBindings
disruptionsAllowed = maxUnavailable - unavailableBindings
}
if db.Spec.MinAvailable != nil {
minAvailable, _ := intstr.GetScaledValueFromIntOrPercent(db.Spec.MinAvailable, desiredBindings, true)
disruptionsAllowed = availableBindings - minAvailable
}
if disruptionsAllowed < 0 {
disruptionsAllowed = 0
}
return disruptionsAllowed > 0, disruptionsAllowed, availableBindings
}

func markEvictionValid(eviction *placementv1alpha1.ClusterResourcePlacementEviction) {
cond := metav1.Condition{
Type: string(placementv1alpha1.PlacementEvictionConditionTypeValid),
Status: metav1.ConditionTrue,
ObservedGeneration: eviction.Generation,
Reason: reasonClusterResourcePlacementEvictionValid,
Message: evictionValid,
}
eviction.SetConditions(cond)
}

func markEvictionInvalid(eviction *placementv1alpha1.ClusterResourcePlacementEviction, message string) {
cond := metav1.Condition{
Type: string(placementv1alpha1.PlacementEvictionConditionTypeValid),
Status: metav1.ConditionFalse,
ObservedGeneration: eviction.Generation,
Reason: reasonClusterResourcePlacementEvictionInvalid,
Message: message,
}
eviction.SetConditions(cond)
}

func markEvictionExecuted(eviction *placementv1alpha1.ClusterResourcePlacementEviction, message string) {
cond := metav1.Condition{
Type: string(placementv1alpha1.PlacementEvictionConditionTypeExecuted),
Status: metav1.ConditionTrue,
ObservedGeneration: eviction.Generation,
Reason: reasonClusterResourcePlacementEvictionExecuted,
Message: message,
}
eviction.SetConditions(cond)
}

func markEvictionNotExecuted(eviction *placementv1alpha1.ClusterResourcePlacementEviction, message string) {
cond := metav1.Condition{
Type: string(placementv1alpha1.PlacementEvictionConditionTypeExecuted),
Status: metav1.ConditionFalse,
ObservedGeneration: eviction.Generation,
Reason: reasonClusterResourcePlacementEvictionNotExecuted,
Message: message,
}
eviction.SetConditions(cond)
}

// SetupWithManager sets up the controller with the Manager.
func (r *Reconciler) SetupWithManager(mgr runtime.Manager) error {
return runtime.NewControllerManagedBy(mgr).
WithOptions(ctrl.Options{MaxConcurrentReconciles: 1}). // set the max number of concurrent reconciles
For(&placementv1alpha1.ClusterResourcePlacementEviction{}).
Complete(r)
}
Loading

0 comments on commit e910804

Please sign in to comment.