diff --git a/go.mod b/go.mod index 7464ee60..5a9a099e 100644 --- a/go.mod +++ b/go.mod @@ -10,6 +10,7 @@ require ( github.com/openkruise/kruise-api v1.3.0 github.com/spf13/pflag v1.0.5 github.com/yuin/gopher-lua v0.0.0-20220504180219-658193537a64 + golang.org/x/time v0.0.0-20210723032227-1f47c861a9ac gopkg.in/yaml.v2 v2.4.0 k8s.io/api v0.22.6 k8s.io/apiextensions-apiserver v0.22.6 diff --git a/pkg/controller/deployment/controller.go b/pkg/controller/deployment/controller.go new file mode 100644 index 00000000..85324e5e --- /dev/null +++ b/pkg/controller/deployment/controller.go @@ -0,0 +1,204 @@ +/* +Copyright 2019 The Kruise Authors. +Copyright 2016 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package deployment + +import ( + "context" + "flag" + "reflect" + + "github.com/openkruise/rollouts/pkg/feature" + clientutil "github.com/openkruise/rollouts/pkg/util/client" + utilfeature "github.com/openkruise/rollouts/pkg/util/feature" + appsv1 "k8s.io/api/apps/v1" + v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/errors" + clientset "k8s.io/client-go/kubernetes" + "k8s.io/client-go/kubernetes/scheme" + v1core "k8s.io/client-go/kubernetes/typed/core/v1" + appslisters "k8s.io/client-go/listers/apps/v1" + corelisters "k8s.io/client-go/listers/core/v1" + toolscache "k8s.io/client-go/tools/cache" + "k8s.io/client-go/tools/record" + "k8s.io/klog/v2" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/controller" + "sigs.k8s.io/controller-runtime/pkg/event" + "sigs.k8s.io/controller-runtime/pkg/handler" + "sigs.k8s.io/controller-runtime/pkg/manager" + "sigs.k8s.io/controller-runtime/pkg/predicate" + "sigs.k8s.io/controller-runtime/pkg/reconcile" + "sigs.k8s.io/controller-runtime/pkg/source" +) + +func init() { + flag.IntVar(&concurrentReconciles, "deployment-workers", concurrentReconciles, "Max concurrent workers for StatefulSet controller.") +} + +var ( + concurrentReconciles = 3 +) + +// Add creates a new StatefulSet Controller and adds it to the Manager with default RBAC. The Manager will set fields on the Controller +// and Start it when the Manager is Started. +func Add(mgr manager.Manager) error { + if !utilfeature.DefaultFeatureGate.Enabled(feature.AdvancedDeploymentGate) { + klog.Warningf("Advanced deployment controller is disabled") + return nil + } + r, err := newReconciler(mgr) + if err != nil { + return err + } + return add(mgr, r) +} + +// newReconciler returns a new reconcile.Reconciler +func newReconciler(mgr manager.Manager) (reconcile.Reconciler, error) { + cacher := mgr.GetCache() + podInformer, err := cacher.GetInformerForKind(context.TODO(), v1.SchemeGroupVersion.WithKind("Pod")) + if err != nil { + return nil, err + } + dInformer, err := cacher.GetInformerForKind(context.TODO(), appsv1.SchemeGroupVersion.WithKind("Deployment")) + if err != nil { + return nil, err + } + rsInformer, err := cacher.GetInformerForKind(context.TODO(), appsv1.SchemeGroupVersion.WithKind("ReplicaSet")) + if err != nil { + return nil, err + } + + // Lister + dLister := appslisters.NewDeploymentLister(dInformer.(toolscache.SharedIndexInformer).GetIndexer()) + rsLister := appslisters.NewReplicaSetLister(rsInformer.(toolscache.SharedIndexInformer).GetIndexer()) + podLister := corelisters.NewPodLister(podInformer.(toolscache.SharedIndexInformer).GetIndexer()) + + // Client & Recorder + genericClient := clientutil.GetGenericClientWithName("advanced-deployment-controller") + eventBroadcaster := record.NewBroadcaster() + eventBroadcaster.StartLogging(klog.Infof) + eventBroadcaster.StartRecordingToSink(&v1core.EventSinkImpl{Interface: genericClient.KubeClient.CoreV1().Events("")}) + recorder := eventBroadcaster.NewRecorder(scheme.Scheme, v1.EventSource{Component: "advanced-deployment-controller"}) + + // Deployment controller factory + factory := &controllerFactory{ + client: genericClient.KubeClient, + eventBroadcaster: eventBroadcaster, + eventRecorder: recorder, + dLister: dLister, + rsLister: rsLister, + podLister: podLister, + } + return &ReconcileDeployment{Client: mgr.GetClient(), controllerFactory: factory}, nil +} + +var _ reconcile.Reconciler = &ReconcileDeployment{} + +// ReconcileDeployment reconciles a Deployment object +type ReconcileDeployment struct { + // client interface + client.Client + controllerFactory *controllerFactory +} + +// add adds a new Controller to mgr with r as the reconcile.Reconciler +func add(mgr manager.Manager, r reconcile.Reconciler) error { + // Create a new controller + c, err := controller.New("advanced-deployment-controller", mgr, controller.Options{ + Reconciler: r, MaxConcurrentReconciles: concurrentReconciles}) + if err != nil { + return err + } + + if err = c.Watch(&source.Kind{Type: &appsv1.ReplicaSet{}}, &handler.EnqueueRequestForOwner{ + IsController: true, OwnerType: &appsv1.ReplicaSet{}}, predicate.Funcs{}); err != nil { + return err + } + + // TODO: handle deployment only when the deployment is under our control + updateHandler := func(e event.UpdateEvent) bool { + oldObject := e.ObjectOld.(*appsv1.Deployment) + newObject := e.ObjectNew.(*appsv1.Deployment) + if oldObject.Generation != newObject.Generation || newObject.DeletionTimestamp != nil { + klog.V(3).Infof("Observed updated Spec for Deployment: %s/%s", newObject.Namespace, newObject.Name) + return true + } + if len(oldObject.Annotations) != len(newObject.Annotations) || !reflect.DeepEqual(oldObject.Annotations, newObject.Annotations) { + klog.V(3).Infof("Observed updated Annotation for Deployment: %s/%s", newObject.Namespace, newObject.Name) + return true + } + return false + } + + // Watch for changes to Deployment + return c.Watch(&source.Kind{Type: &appsv1.Deployment{}}, &handler.EnqueueRequestForObject{}, predicate.Funcs{UpdateFunc: updateHandler}) +} + +// Reconcile reads that state of the cluster for a Deployment object and makes changes based on the state read +// and what is in the Deployment.Spec and Deployment.Annotations +// Automatically generate RBAC rules to allow the Controller to read and write ReplicaSets +func (r *ReconcileDeployment) Reconcile(_ context.Context, request reconcile.Request) (res reconcile.Result, retErr error) { + deployment := new(appsv1.Deployment) + err := r.Get(context.TODO(), request.NamespacedName, deployment) + if err != nil { + if errors.IsNotFound(err) { + // Object not found, return. Created objects are automatically garbage collected. + // For additional cleanup logic use finalizers. + return ctrl.Result{}, nil + } + // Error reading the object - requeue the request. + return ctrl.Result{}, err + } + + // TODO: create new controller only when deployment is under our control + dc, err := r.controllerFactory.NewController(deployment) + if err != nil { + return reconcile.Result{}, nil + } + + err = dc.syncDeployment(context.Background(), request.NamespacedName.String()) + return ctrl.Result{}, err +} + +type controllerFactory struct { + client clientset.Interface + eventBroadcaster record.EventBroadcaster + eventRecorder record.EventRecorder + + // dLister can list/get deployments from the shared informer's store + dLister appslisters.DeploymentLister + // rsLister can list/get replica sets from the shared informer's store + rsLister appslisters.ReplicaSetLister + // podLister can list/get pods from the shared informer's store + podLister corelisters.PodLister +} + +// NewController create a new DeploymentController +// TODO: create new controller only when deployment is under our control +func (f *controllerFactory) NewController(_ *appsv1.Deployment) (*DeploymentController, error) { + return &DeploymentController{ + client: f.client, + eventBroadcaster: f.eventBroadcaster, + eventRecorder: f.eventRecorder, + dLister: f.dLister, + rsLister: f.rsLister, + podLister: f.podLister, + }, nil +} diff --git a/pkg/controller/deployment/deployment_controller.go b/pkg/controller/deployment/deployment_controller.go new file mode 100644 index 00000000..093b0655 --- /dev/null +++ b/pkg/controller/deployment/deployment_controller.go @@ -0,0 +1,280 @@ +/* +Copyright 2015 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +// Package deployment contains all the logic for handling Kubernetes Deployments. +// It implements a set of strategies (rolling, recreate) for deploying an application, +// the means to rollback to previous versions, proportional scaling for mitigating +// risk, cleanup policy, and other useful features of Deployments. +package deployment + +import ( + "context" + "fmt" + "reflect" + "time" + + "github.com/openkruise/rollouts/pkg/controller/deployment/util" + apps "k8s.io/api/apps/v1" + v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + clientset "k8s.io/client-go/kubernetes" + appslisters "k8s.io/client-go/listers/apps/v1" + corelisters "k8s.io/client-go/listers/core/v1" + "k8s.io/client-go/tools/cache" + "k8s.io/client-go/tools/record" + "k8s.io/klog/v2" +) + +const ( + // maxRetries is the number of times a deployment will be retried before it is dropped out of the queue. + // With the current rate-limiter in use (5ms*2^(maxRetries-1)) the following numbers represent the times + // a deployment is going to be requeued: + // + // 5ms, 10ms, 20ms, 40ms, 80ms, 160ms, 320ms, 640ms, 1.3s, 2.6s, 5.1s, 10.2s, 20.4s, 41s, 82s + maxRetries = 15 +) + +// controllerKind contains the schema.GroupVersionKind for this controller type. +var controllerKind = apps.SchemeGroupVersion.WithKind("Deployment") + +// DeploymentController is responsible for synchronizing Deployment objects stored +// in the system with actual running replica sets and pods. +type DeploymentController struct { + client clientset.Interface + + eventBroadcaster record.EventBroadcaster + eventRecorder record.EventRecorder + + // dLister can list/get deployments from the shared informer's store + dLister appslisters.DeploymentLister + // rsLister can list/get replica sets from the shared informer's store + rsLister appslisters.ReplicaSetLister + // podLister can list/get pods from the shared informer's store + podLister corelisters.PodLister +} + +// getDeploymentsForReplicaSet returns a list of Deployments that potentially +// match a ReplicaSet. +func (dc *DeploymentController) getDeploymentsForReplicaSet(rs *apps.ReplicaSet) []*apps.Deployment { + deployments, err := util.GetDeploymentsForReplicaSet(dc.dLister, rs) + if err != nil || len(deployments) == 0 { + return nil + } + // Because all ReplicaSet's belonging to a deployment should have a unique label key, + // there should never be more than one deployment returned by the above method. + // If that happens we should probably dynamically repair the situation by ultimately + // trying to clean up one of the controllers, for now we just return the older one + if len(deployments) > 1 { + // ControllerRef will ensure we don't do anything crazy, but more than one + // item in this list nevertheless constitutes user error. + klog.V(4).InfoS("user error! more than one deployment is selecting replica set", + "replicaSet", klog.KObj(rs), "labels", rs.Labels, "deployment", klog.KObj(deployments[0])) + } + return deployments +} + +// getDeploymentForPod returns the deployment managing the given Pod. +func (dc *DeploymentController) getDeploymentForPod(pod *v1.Pod) *apps.Deployment { + // Find the owning replica set + var rs *apps.ReplicaSet + var err error + controllerRef := metav1.GetControllerOf(pod) + if controllerRef == nil { + // No controller owns this Pod. + return nil + } + if controllerRef.Kind != apps.SchemeGroupVersion.WithKind("ReplicaSet").Kind { + // Not a pod owned by a replica set. + return nil + } + rs, err = dc.rsLister.ReplicaSets(pod.Namespace).Get(controllerRef.Name) + if err != nil || rs.UID != controllerRef.UID { + klog.V(4).InfoS("Cannot get replicaset for pod", "ownerReference", controllerRef.Name, "pod", klog.KObj(pod), "err", err) + return nil + } + + // Now find the Deployment that owns that ReplicaSet. + controllerRef = metav1.GetControllerOf(rs) + if controllerRef == nil { + return nil + } + return dc.resolveControllerRef(rs.Namespace, controllerRef) +} + +// resolveControllerRef returns the controller referenced by a ControllerRef, +// or nil if the ControllerRef could not be resolved to a matching controller +// of the correct Kind. +func (dc *DeploymentController) resolveControllerRef(namespace string, controllerRef *metav1.OwnerReference) *apps.Deployment { + // We can't look up by UID, so look up by Name and then verify UID. + // Don't even try to look up by Name if it's the wrong Kind. + if controllerRef.Kind != controllerKind.Kind { + return nil + } + d, err := dc.dLister.Deployments(namespace).Get(controllerRef.Name) + if err != nil { + return nil + } + if d.UID != controllerRef.UID { + // The controller we found with this Name is not the same one that the + // ControllerRef points to. + return nil + } + return d +} + +// getReplicaSetsForDeployment uses ControllerRefManager to reconcile +// ControllerRef by adopting and orphaning. +// It returns the list of ReplicaSets that this Deployment should manage. +func (dc *DeploymentController) getReplicaSetsForDeployment(ctx context.Context, d *apps.Deployment) ([]*apps.ReplicaSet, error) { + deploymentSelector, err := metav1.LabelSelectorAsSelector(d.Spec.Selector) + if err != nil { + return nil, fmt.Errorf("deployment %s/%s has invalid label selector: %v", d.Namespace, d.Name, err) + } + // List all ReplicaSets to find those we own but that no longer match our + // selector. They will be orphaned by ClaimReplicaSets(). + return dc.rsLister.ReplicaSets(d.Namespace).List(deploymentSelector) +} + +// getPodMapForDeployment returns the Pods managed by a Deployment. +// +// It returns a map from ReplicaSet UID to a list of Pods controlled by that RS, +// according to the Pod's ControllerRef. +// NOTE: The pod pointers returned by this method point the pod objects in the cache and thus +// shouldn't be modified in any way. +func (dc *DeploymentController) getPodMapForDeployment(d *apps.Deployment, rsList []*apps.ReplicaSet) (map[types.UID][]*v1.Pod, error) { + // Get all Pods that potentially belong to this Deployment. + selector, err := metav1.LabelSelectorAsSelector(d.Spec.Selector) + if err != nil { + return nil, err + } + pods, err := dc.podLister.Pods(d.Namespace).List(selector) + if err != nil { + return nil, err + } + // Group Pods by their controller (if it's in rsList). + podMap := make(map[types.UID][]*v1.Pod, len(rsList)) + for _, rs := range rsList { + podMap[rs.UID] = []*v1.Pod{} + } + for _, pod := range pods { + // Do not ignore inactive Pods because Recreate Deployments need to verify that no + // Pods from older versions are running before spinning up new Pods. + controllerRef := metav1.GetControllerOf(pod) + if controllerRef == nil { + continue + } + // Only append if we care about this UID. + if _, ok := podMap[controllerRef.UID]; ok { + podMap[controllerRef.UID] = append(podMap[controllerRef.UID], pod) + } + } + return podMap, nil +} + +// syncDeployment will sync the deployment with the given key. +// This function is not meant to be invoked concurrently with the same key. +func (dc *DeploymentController) syncDeployment(ctx context.Context, key string) error { + namespace, name, err := cache.SplitMetaNamespaceKey(key) + if err != nil { + klog.ErrorS(err, "Failed to split meta namespace cache key", "cacheKey", key) + return err + } + + startTime := time.Now() + klog.V(4).InfoS("Started syncing deployment", "deployment", klog.KRef(namespace, name), "startTime", startTime) + defer func() { + klog.V(4).InfoS("Finished syncing deployment", "deployment", klog.KRef(namespace, name), "duration", time.Since(startTime)) + }() + + deployment, err := dc.dLister.Deployments(namespace).Get(name) + if errors.IsNotFound(err) { + klog.V(2).InfoS("Deployment has been deleted", "deployment", klog.KRef(namespace, name)) + return nil + } + if err != nil { + return err + } + + // Deep-copy otherwise we are mutating our cache. + // TODO: Deep-copy only when needed. + d := deployment.DeepCopy() + + everything := metav1.LabelSelector{} + if reflect.DeepEqual(d.Spec.Selector, &everything) { + dc.eventRecorder.Eventf(d, v1.EventTypeWarning, "SelectingAll", "This deployment is selecting all pods. A non-empty selector is required.") + if d.Status.ObservedGeneration < d.Generation { + d.Status.ObservedGeneration = d.Generation + dc.client.AppsV1().Deployments(d.Namespace).UpdateStatus(ctx, d, metav1.UpdateOptions{}) + } + return nil + } + + // List ReplicaSets owned by this Deployment, while reconciling ControllerRef + // through adoption/orphaning. + rsList, err := dc.getReplicaSetsForDeployment(ctx, d) + if err != nil { + return err + } + // List all Pods owned by this Deployment, grouped by their ReplicaSet. + // Current uses of the podMap are: + // + // * check if a Pod is labeled correctly with the pod-template-hash label. + // * check that no old Pods are running in the middle of Recreate Deployments. + podMap, err := dc.getPodMapForDeployment(d, rsList) + if err != nil { + return err + } + + if d.DeletionTimestamp != nil { + return dc.syncStatusOnly(ctx, d, rsList) + } + + // Update deployment conditions with an Unknown condition when pausing/resuming + // a deployment. In this way, we can be sure that we won't timeout when a user + // resumes a Deployment with a set progressDeadlineSeconds. + if err = dc.checkPausedConditions(ctx, d); err != nil { + return err + } + + if d.Spec.Paused { + return dc.sync(ctx, d, rsList) + } + + // rollback is not re-entrant in case the underlying replica sets are updated with a new + // revision so we should ensure that we won't proceed to update replica sets until we + // make sure that the deployment has cleaned up its rollback spec in subsequent enqueues. + if getRollbackTo(d) != nil { + return dc.rollback(ctx, d, rsList) + } + + scalingEvent, err := dc.isScalingEvent(ctx, d, rsList) + if err != nil { + return err + } + if scalingEvent { + return dc.sync(ctx, d, rsList) + } + + switch d.Spec.Strategy.Type { + case apps.RecreateDeploymentStrategyType: + return dc.rolloutRecreate(ctx, d, rsList, podMap) + case apps.RollingUpdateDeploymentStrategyType: + return dc.rolloutRolling(ctx, d, rsList) + } + return fmt.Errorf("unexpected deployment strategy type: %s", d.Spec.Strategy.Type) +} diff --git a/pkg/controller/deployment/progress.go b/pkg/controller/deployment/progress.go new file mode 100644 index 00000000..6788f25c --- /dev/null +++ b/pkg/controller/deployment/progress.go @@ -0,0 +1,200 @@ +/* +Copyright 2016 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package deployment + +import ( + "context" + "fmt" + "reflect" + "time" + + apps "k8s.io/api/apps/v1" + v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/klog/v2" + + "github.com/openkruise/rollouts/pkg/controller/deployment/util" +) + +// syncRolloutStatus updates the status of a deployment during a rollout. There are +// cases this helper will run that cannot be prevented from the scaling detection, +// for example a resync of the deployment after it was scaled up. In those cases, +// we shouldn't try to estimate any progress. +func (dc *DeploymentController) syncRolloutStatus(ctx context.Context, allRSs []*apps.ReplicaSet, newRS *apps.ReplicaSet, d *apps.Deployment) error { + newStatus := calculateStatus(allRSs, newRS, d) + + // If there is no progressDeadlineSeconds set, remove any Progressing condition. + if !util.HasProgressDeadline(d) { + util.RemoveDeploymentCondition(&newStatus, apps.DeploymentProgressing) + } + + // If there is only one replica set that is active then that means we are not running + // a new rollout and this is a resync where we don't need to estimate any progress. + // In such a case, we should simply not estimate any progress for this deployment. + currentCond := util.GetDeploymentCondition(d.Status, apps.DeploymentProgressing) + isCompleteDeployment := newStatus.Replicas == newStatus.UpdatedReplicas && currentCond != nil && currentCond.Reason == util.NewRSAvailableReason + // Check for progress only if there is a progress deadline set and the latest rollout + // hasn't completed yet. + if util.HasProgressDeadline(d) && !isCompleteDeployment { + switch { + case util.DeploymentComplete(d, &newStatus): + // Update the deployment conditions with a message for the new replica set that + // was successfully deployed. If the condition already exists, we ignore this update. + msg := fmt.Sprintf("Deployment %q has successfully progressed.", d.Name) + if newRS != nil { + msg = fmt.Sprintf("ReplicaSet %q has successfully progressed.", newRS.Name) + } + condition := util.NewDeploymentCondition(apps.DeploymentProgressing, v1.ConditionTrue, util.NewRSAvailableReason, msg) + util.SetDeploymentCondition(&newStatus, *condition) + + case util.DeploymentProgressing(d, &newStatus): + // If there is any progress made, continue by not checking if the deployment failed. This + // behavior emulates the rolling updater progressDeadline check. + msg := fmt.Sprintf("Deployment %q is progressing.", d.Name) + if newRS != nil { + msg = fmt.Sprintf("ReplicaSet %q is progressing.", newRS.Name) + } + condition := util.NewDeploymentCondition(apps.DeploymentProgressing, v1.ConditionTrue, util.ReplicaSetUpdatedReason, msg) + // Update the current Progressing condition or add a new one if it doesn't exist. + // If a Progressing condition with status=true already exists, we should update + // everything but lastTransitionTime. SetDeploymentCondition already does that but + // it also is not updating conditions when the reason of the new condition is the + // same as the old. The Progressing condition is a special case because we want to + // update with the same reason and change just lastUpdateTime iff we notice any + // progress. That's why we handle it here. + if currentCond != nil { + if currentCond.Status == v1.ConditionTrue { + condition.LastTransitionTime = currentCond.LastTransitionTime + } + util.RemoveDeploymentCondition(&newStatus, apps.DeploymentProgressing) + } + util.SetDeploymentCondition(&newStatus, *condition) + + case util.DeploymentTimedOut(d, &newStatus): + // Update the deployment with a timeout condition. If the condition already exists, + // we ignore this update. + msg := fmt.Sprintf("Deployment %q has timed out progressing.", d.Name) + if newRS != nil { + msg = fmt.Sprintf("ReplicaSet %q has timed out progressing.", newRS.Name) + } + condition := util.NewDeploymentCondition(apps.DeploymentProgressing, v1.ConditionFalse, util.TimedOutReason, msg) + util.SetDeploymentCondition(&newStatus, *condition) + } + } + + // Move failure conditions of all replica sets in deployment conditions. For now, + // only one failure condition is returned from getReplicaFailures. + if replicaFailureCond := dc.getReplicaFailures(allRSs, newRS); len(replicaFailureCond) > 0 { + // There will be only one ReplicaFailure condition on the replica set. + util.SetDeploymentCondition(&newStatus, replicaFailureCond[0]) + } else { + util.RemoveDeploymentCondition(&newStatus, apps.DeploymentReplicaFailure) + } + + // Do not update if there is nothing new to add. + if reflect.DeepEqual(d.Status, newStatus) { + // Requeue the deployment if required. + dc.requeueStuckDeployment(d, newStatus) + return nil + } + + newDeployment := d + newDeployment.Status = newStatus + _, err := dc.client.AppsV1().Deployments(newDeployment.Namespace).UpdateStatus(ctx, newDeployment, metav1.UpdateOptions{}) + return err +} + +// getReplicaFailures will convert replica failure conditions from replica sets +// to deployment conditions. +func (dc *DeploymentController) getReplicaFailures(allRSs []*apps.ReplicaSet, newRS *apps.ReplicaSet) []apps.DeploymentCondition { + var conditions []apps.DeploymentCondition + if newRS != nil { + for _, c := range newRS.Status.Conditions { + if c.Type != apps.ReplicaSetReplicaFailure { + continue + } + conditions = append(conditions, util.ReplicaSetToDeploymentCondition(c)) + } + } + + // Return failures for the new replica set over failures from old replica sets. + if len(conditions) > 0 { + return conditions + } + + for i := range allRSs { + rs := allRSs[i] + if rs == nil { + continue + } + + for _, c := range rs.Status.Conditions { + if c.Type != apps.ReplicaSetReplicaFailure { + continue + } + conditions = append(conditions, util.ReplicaSetToDeploymentCondition(c)) + } + } + return conditions +} + +// used for unit testing +var nowFn = func() time.Time { return time.Now() } + +// requeueStuckDeployment checks whether the provided deployment needs to be synced for a progress +// check. It returns the time after the deployment will be requeued for the progress check, 0 if it +// will be requeued now, or -1 if it does not need to be requeued. +func (dc *DeploymentController) requeueStuckDeployment(d *apps.Deployment, newStatus apps.DeploymentStatus) time.Duration { + currentCond := util.GetDeploymentCondition(d.Status, apps.DeploymentProgressing) + // Can't estimate progress if there is no deadline in the spec or progressing condition in the current status. + if !util.HasProgressDeadline(d) || currentCond == nil { + return time.Duration(-1) + } + // No need to estimate progress if the rollout is complete or already timed out. + if util.DeploymentComplete(d, &newStatus) || currentCond.Reason == util.TimedOutReason { + return time.Duration(-1) + } + // If there is no sign of progress at this point then there is a high chance that the + // deployment is stuck. We should resync this deployment at some point in the future[1] + // and check whether it has timed out. We definitely need this, otherwise we depend on the + // controller resync interval. See https://github.com/kubernetes/kubernetes/issues/34458. + // + // [1] ProgressingCondition.LastUpdatedTime + progressDeadlineSeconds - time.Now() + // + // For example, if a Deployment updated its Progressing condition 3 minutes ago and has a + // deadline of 10 minutes, it would need to be resynced for a progress check after 7 minutes. + // + // lastUpdated: 00:00:00 + // now: 00:03:00 + // progressDeadlineSeconds: 600 (10 minutes) + // + // lastUpdated + progressDeadlineSeconds - now => 00:00:00 + 00:10:00 - 00:03:00 => 07:00 + after := currentCond.LastUpdateTime.Time.Add(time.Duration(*d.Spec.ProgressDeadlineSeconds) * time.Second).Sub(nowFn()) + // If the remaining time is less than a second, then requeue the deployment immediately. + // Make it ratelimited so we stay on the safe side, eventually the Deployment should + // transition either to a Complete or to a TimedOut condition. + if after < time.Second { + klog.V(4).Infof("Queueing up deployment %q for a progress check now", d.Name) + // dc.enqueueRateLimited(d) requeue + return time.Duration(0) + } + klog.V(4).Infof("Queueing up deployment %q for a progress check after %ds", d.Name, int(after.Seconds())) + // Add a second to avoid milliseconds skew in AddAfter. + // See https://github.com/kubernetes/kubernetes/issues/39785#issuecomment-279959133 for more info. + // dc.enqueueAfter(d, after+time.Second) requeue + return after +} diff --git a/pkg/controller/deployment/recreate.go b/pkg/controller/deployment/recreate.go new file mode 100644 index 00000000..f462fd65 --- /dev/null +++ b/pkg/controller/deployment/recreate.go @@ -0,0 +1,132 @@ +/* +Copyright 2016 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package deployment + +import ( + "context" + + "github.com/openkruise/rollouts/pkg/controller/deployment/util" + apps "k8s.io/api/apps/v1" + v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/types" +) + +// rolloutRecreate implements the logic for recreating a replica set. +func (dc *DeploymentController) rolloutRecreate(ctx context.Context, d *apps.Deployment, rsList []*apps.ReplicaSet, podMap map[types.UID][]*v1.Pod) error { + // Don't create a new RS if not already existed, so that we avoid scaling up before scaling down. + newRS, oldRSs, err := dc.getAllReplicaSetsAndSyncRevision(ctx, d, rsList, false) + if err != nil { + return err + } + allRSs := append(oldRSs, newRS) + activeOldRSs := util.FilterActiveReplicaSets(oldRSs) + + // scale down old replica sets. + scaledDown, err := dc.scaleDownOldReplicaSetsForRecreate(ctx, activeOldRSs, d) + if err != nil { + return err + } + if scaledDown { + // Update DeploymentStatus. + return dc.syncRolloutStatus(ctx, allRSs, newRS, d) + } + + // Do not process a deployment when it has old pods running. + if oldPodsRunning(newRS, oldRSs, podMap) { + return dc.syncRolloutStatus(ctx, allRSs, newRS, d) + } + + // If we need to create a new RS, create it now. + if newRS == nil { + newRS, oldRSs, err = dc.getAllReplicaSetsAndSyncRevision(ctx, d, rsList, true) + if err != nil { + return err + } + allRSs = append(oldRSs, newRS) + } + + // scale up new replica set. + if _, err := dc.scaleUpNewReplicaSetForRecreate(ctx, newRS, d); err != nil { + return err + } + + if util.DeploymentComplete(d, &d.Status) { + if err := dc.cleanupDeployment(ctx, oldRSs, d); err != nil { + return err + } + } + + // Sync deployment status. + return dc.syncRolloutStatus(ctx, allRSs, newRS, d) +} + +// scaleDownOldReplicaSetsForRecreate scales down old replica sets when deployment strategy is "Recreate". +func (dc *DeploymentController) scaleDownOldReplicaSetsForRecreate(ctx context.Context, oldRSs []*apps.ReplicaSet, deployment *apps.Deployment) (bool, error) { + scaled := false + for i := range oldRSs { + rs := oldRSs[i] + // Scaling not required. + if *(rs.Spec.Replicas) == 0 { + continue + } + scaledRS, updatedRS, err := dc.scaleReplicaSetAndRecordEvent(ctx, rs, 0, deployment) + if err != nil { + return false, err + } + if scaledRS { + oldRSs[i] = updatedRS + scaled = true + } + } + return scaled, nil +} + +// oldPodsRunning returns whether there are old pods running or any of the old ReplicaSets thinks that it runs pods. +func oldPodsRunning(newRS *apps.ReplicaSet, oldRSs []*apps.ReplicaSet, podMap map[types.UID][]*v1.Pod) bool { + if oldPods := util.GetActualReplicaCountForReplicaSets(oldRSs); oldPods > 0 { + return true + } + for rsUID, podList := range podMap { + // If the pods belong to the new ReplicaSet, ignore. + if newRS != nil && newRS.UID == rsUID { + continue + } + for _, pod := range podList { + switch pod.Status.Phase { + case v1.PodFailed, v1.PodSucceeded: + // Don't count pods in terminal state. + continue + case v1.PodUnknown: + // v1.PodUnknown is a deprecated status. + // This logic is kept for backward compatibility. + // This used to happen in situation like when the node is temporarily disconnected from the cluster. + // If we can't be sure that the pod is not running, we have to count it. + return true + default: + // Pod is not in terminal phase. + return true + } + } + } + return false +} + +// scaleUpNewReplicaSetForRecreate scales up new replica set when deployment strategy is "Recreate". +func (dc *DeploymentController) scaleUpNewReplicaSetForRecreate(ctx context.Context, newRS *apps.ReplicaSet, deployment *apps.Deployment) (bool, error) { + scaled, _, err := dc.scaleReplicaSetAndRecordEvent(ctx, newRS, *(deployment.Spec.Replicas), deployment) + return scaled, err +} diff --git a/pkg/controller/deployment/rollback.go b/pkg/controller/deployment/rollback.go new file mode 100644 index 00000000..53467f7f --- /dev/null +++ b/pkg/controller/deployment/rollback.go @@ -0,0 +1,149 @@ +/* +Copyright 2016 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package deployment + +import ( + "context" + "fmt" + "strconv" + + apps "k8s.io/api/apps/v1" + v1 "k8s.io/api/core/v1" + extensions "k8s.io/api/extensions/v1beta1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/klog/v2" + + deploymentutil "github.com/openkruise/rollouts/pkg/controller/deployment/util" +) + +// rollback the deployment to the specified revision. In any case cleanup the rollback spec. +func (dc *DeploymentController) rollback(ctx context.Context, d *apps.Deployment, rsList []*apps.ReplicaSet) error { + newRS, allOldRSs, err := dc.getAllReplicaSetsAndSyncRevision(ctx, d, rsList, true) + if err != nil { + return err + } + + allRSs := append(allOldRSs, newRS) + rollbackTo := getRollbackTo(d) + // If rollback revision is 0, rollback to the last revision + if rollbackTo.Revision == 0 { + if rollbackTo.Revision = deploymentutil.LastRevision(allRSs); rollbackTo.Revision == 0 { + // If we still can't find the last revision, gives up rollback + dc.emitRollbackWarningEvent(d, deploymentutil.RollbackRevisionNotFound, "Unable to find last revision.") + // Gives up rollback + return dc.updateDeploymentAndClearRollbackTo(ctx, d) + } + } + for _, rs := range allRSs { + v, err := deploymentutil.Revision(rs) + if err != nil { + klog.V(4).Infof("Unable to extract revision from deployment's replica set %q: %v", rs.Name, err) + continue + } + if v == rollbackTo.Revision { + klog.V(4).Infof("Found replica set %q with desired revision %d", rs.Name, v) + // rollback by copying podTemplate.Spec from the replica set + // revision number will be incremented during the next getAllReplicaSetsAndSyncRevision call + // no-op if the spec matches current deployment's podTemplate.Spec + performedRollback, err := dc.rollbackToTemplate(ctx, d, rs) + if performedRollback && err == nil { + dc.emitRollbackNormalEvent(d, fmt.Sprintf("Rolled back deployment %q to revision %d", d.Name, rollbackTo.Revision)) + } + return err + } + } + dc.emitRollbackWarningEvent(d, deploymentutil.RollbackRevisionNotFound, "Unable to find the revision to rollback to.") + // Gives up rollback + return dc.updateDeploymentAndClearRollbackTo(ctx, d) +} + +// rollbackToTemplate compares the templates of the provided deployment and replica set and +// updates the deployment with the replica set template in case they are different. It also +// cleans up the rollback spec so subsequent requeues of the deployment won't end up in here. +func (dc *DeploymentController) rollbackToTemplate(ctx context.Context, d *apps.Deployment, rs *apps.ReplicaSet) (bool, error) { + performedRollback := false + if !deploymentutil.EqualIgnoreHash(&d.Spec.Template, &rs.Spec.Template) { + klog.V(4).Infof("Rolling back deployment %q to template spec %+v", d.Name, rs.Spec.Template.Spec) + deploymentutil.SetFromReplicaSetTemplate(d, rs.Spec.Template) + // set RS (the old RS we'll rolling back to) annotations back to the deployment; + // otherwise, the deployment's current annotations (should be the same as current new RS) will be copied to the RS after the rollback. + // + // For example, + // A Deployment has old RS1 with annotation {change-cause:create}, and new RS2 {change-cause:edit}. + // Note that both annotations are copied from Deployment, and the Deployment should be annotated {change-cause:edit} as well. + // Now, rollback Deployment to RS1, we should update Deployment's pod-template and also copy annotation from RS1. + // Deployment is now annotated {change-cause:create}, and we have new RS1 {change-cause:create}, old RS2 {change-cause:edit}. + // + // If we don't copy the annotations back from RS to deployment on rollback, the Deployment will stay as {change-cause:edit}, + // and new RS1 becomes {change-cause:edit} (copied from deployment after rollback), old RS2 {change-cause:edit}, which is not correct. + deploymentutil.SetDeploymentAnnotationsTo(d, rs) + performedRollback = true + } else { + klog.V(4).Infof("Rolling back to a revision that contains the same template as current deployment %q, skipping rollback...", d.Name) + eventMsg := fmt.Sprintf("The rollback revision contains the same template as current deployment %q", d.Name) + dc.emitRollbackWarningEvent(d, deploymentutil.RollbackTemplateUnchanged, eventMsg) + } + + return performedRollback, dc.updateDeploymentAndClearRollbackTo(ctx, d) +} + +func (dc *DeploymentController) emitRollbackWarningEvent(d *apps.Deployment, reason, message string) { + dc.eventRecorder.Eventf(d, v1.EventTypeWarning, reason, message) +} + +func (dc *DeploymentController) emitRollbackNormalEvent(d *apps.Deployment, message string) { + dc.eventRecorder.Eventf(d, v1.EventTypeNormal, deploymentutil.RollbackDone, message) +} + +// updateDeploymentAndClearRollbackTo sets .spec.rollbackTo to nil and update the input deployment +// It is assumed that the caller will have updated the deployment template appropriately (in case +// we want to rollback). +func (dc *DeploymentController) updateDeploymentAndClearRollbackTo(ctx context.Context, d *apps.Deployment) error { + klog.V(4).Infof("Cleans up rollbackTo of deployment %q", d.Name) + setRollbackTo(d, nil) + _, err := dc.client.AppsV1().Deployments(d.Namespace).Update(ctx, d, metav1.UpdateOptions{}) + return err +} + +// TODO: Remove this when extensions/v1beta1 and apps/v1beta1 Deployment are dropped. +func getRollbackTo(d *apps.Deployment) *extensions.RollbackConfig { + // Extract the annotation used for round-tripping the deprecated RollbackTo field. + revision := d.Annotations[apps.DeprecatedRollbackTo] + if revision == "" { + return nil + } + revision64, err := strconv.ParseInt(revision, 10, 64) + if err != nil { + // If it's invalid, ignore it. + return nil + } + return &extensions.RollbackConfig{ + Revision: revision64, + } +} + +// TODO: Remove this when extensions/v1beta1 and apps/v1beta1 Deployment are dropped. +func setRollbackTo(d *apps.Deployment, rollbackTo *extensions.RollbackConfig) { + if rollbackTo == nil { + delete(d.Annotations, apps.DeprecatedRollbackTo) + return + } + if d.Annotations == nil { + d.Annotations = make(map[string]string) + } + d.Annotations[apps.DeprecatedRollbackTo] = strconv.FormatInt(rollbackTo.Revision, 10) +} diff --git a/pkg/controller/deployment/rolling.go b/pkg/controller/deployment/rolling.go new file mode 100644 index 00000000..84f50f65 --- /dev/null +++ b/pkg/controller/deployment/rolling.go @@ -0,0 +1,234 @@ +/* +Copyright 2016 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package deployment + +import ( + "context" + "fmt" + "sort" + + apps "k8s.io/api/apps/v1" + "k8s.io/klog/v2" + "k8s.io/utils/integer" + + deploymentutil "github.com/openkruise/rollouts/pkg/controller/deployment/util" +) + +// rolloutRolling implements the logic for rolling a new replica set. +func (dc *DeploymentController) rolloutRolling(ctx context.Context, d *apps.Deployment, rsList []*apps.ReplicaSet) error { + newRS, oldRSs, err := dc.getAllReplicaSetsAndSyncRevision(ctx, d, rsList, true) + if err != nil { + return err + } + allRSs := append(oldRSs, newRS) + + // Scale up, if we can. + scaledUp, err := dc.reconcileNewReplicaSet(ctx, allRSs, newRS, d) + if err != nil { + return err + } + if scaledUp { + // Update DeploymentStatus + return dc.syncRolloutStatus(ctx, allRSs, newRS, d) + } + + // Scale down, if we can. + scaledDown, err := dc.reconcileOldReplicaSets(ctx, allRSs, deploymentutil.FilterActiveReplicaSets(oldRSs), newRS, d) + if err != nil { + return err + } + if scaledDown { + // Update DeploymentStatus + return dc.syncRolloutStatus(ctx, allRSs, newRS, d) + } + + if deploymentutil.DeploymentComplete(d, &d.Status) { + if err := dc.cleanupDeployment(ctx, oldRSs, d); err != nil { + return err + } + } + + // Sync deployment status + return dc.syncRolloutStatus(ctx, allRSs, newRS, d) +} + +func (dc *DeploymentController) reconcileNewReplicaSet(ctx context.Context, allRSs []*apps.ReplicaSet, newRS *apps.ReplicaSet, deployment *apps.Deployment) (bool, error) { + if *(newRS.Spec.Replicas) == *(deployment.Spec.Replicas) { + // Scaling not required. + return false, nil + } + if *(newRS.Spec.Replicas) > *(deployment.Spec.Replicas) { + // Scale down. + scaled, _, err := dc.scaleReplicaSetAndRecordEvent(ctx, newRS, *(deployment.Spec.Replicas), deployment) + return scaled, err + } + newReplicasCount, err := deploymentutil.NewRSNewReplicas(deployment, allRSs, newRS) + if err != nil { + return false, err + } + scaled, _, err := dc.scaleReplicaSetAndRecordEvent(ctx, newRS, newReplicasCount, deployment) + return scaled, err +} + +func (dc *DeploymentController) reconcileOldReplicaSets(ctx context.Context, allRSs []*apps.ReplicaSet, oldRSs []*apps.ReplicaSet, newRS *apps.ReplicaSet, deployment *apps.Deployment) (bool, error) { + oldPodsCount := deploymentutil.GetReplicaCountForReplicaSets(oldRSs) + if oldPodsCount == 0 { + // Can't scale down further + return false, nil + } + + allPodsCount := deploymentutil.GetReplicaCountForReplicaSets(allRSs) + klog.V(4).Infof("New replica set %s/%s has %d available pods.", newRS.Namespace, newRS.Name, newRS.Status.AvailableReplicas) + maxUnavailable := deploymentutil.MaxUnavailable(*deployment) + + // Check if we can scale down. We can scale down in the following 2 cases: + // * Some old replica sets have unhealthy replicas, we could safely scale down those unhealthy replicas since that won't further + // increase unavailability. + // * New replica set has scaled up and it's replicas becomes ready, then we can scale down old replica sets in a further step. + // + // maxScaledDown := allPodsCount - minAvailable - newReplicaSetPodsUnavailable + // take into account not only maxUnavailable and any surge pods that have been created, but also unavailable pods from + // the newRS, so that the unavailable pods from the newRS would not make us scale down old replica sets in a further + // step(that will increase unavailability). + // + // Concrete example: + // + // * 10 replicas + // * 2 maxUnavailable (absolute number, not percent) + // * 3 maxSurge (absolute number, not percent) + // + // case 1: + // * Deployment is updated, newRS is created with 3 replicas, oldRS is scaled down to 8, and newRS is scaled up to 5. + // * The new replica set pods crashloop and never become available. + // * allPodsCount is 13. minAvailable is 8. newRSPodsUnavailable is 5. + // * A node fails and causes one of the oldRS pods to become unavailable. However, 13 - 8 - 5 = 0, so the oldRS won't be scaled down. + // * The user notices the crashloop and does kubectl rollout undo to rollback. + // * newRSPodsUnavailable is 1, since we rolled back to the good replica set, so maxScaledDown = 13 - 8 - 1 = 4. 4 of the crashlooping pods will be scaled down. + // * The total number of pods will then be 9 and the newRS can be scaled up to 10. + // + // case 2: + // Same example, but pushing a new pod template instead of rolling back (aka "roll over"): + // * The new replica set created must start with 0 replicas because allPodsCount is already at 13. + // * However, newRSPodsUnavailable would also be 0, so the 2 old replica sets could be scaled down by 5 (13 - 8 - 0), which would then + // allow the new replica set to be scaled up by 5. + minAvailable := *(deployment.Spec.Replicas) - maxUnavailable + newRSUnavailablePodCount := *(newRS.Spec.Replicas) - newRS.Status.AvailableReplicas + maxScaledDown := allPodsCount - minAvailable - newRSUnavailablePodCount + if maxScaledDown <= 0 { + return false, nil + } + + // Clean up unhealthy replicas first, otherwise unhealthy replicas will block deployment + // and cause timeout. See https://github.com/kubernetes/kubernetes/issues/16737 + oldRSs, cleanupCount, err := dc.cleanupUnhealthyReplicas(ctx, oldRSs, deployment, maxScaledDown) + if err != nil { + return false, nil + } + klog.V(4).Infof("Cleaned up unhealthy replicas from old RSes by %d", cleanupCount) + + // Scale down old replica sets, need check maxUnavailable to ensure we can scale down + allRSs = append(oldRSs, newRS) + scaledDownCount, err := dc.scaleDownOldReplicaSetsForRollingUpdate(ctx, allRSs, oldRSs, deployment) + if err != nil { + return false, nil + } + klog.V(4).Infof("Scaled down old RSes of deployment %s by %d", deployment.Name, scaledDownCount) + + totalScaledDown := cleanupCount + scaledDownCount + return totalScaledDown > 0, nil +} + +// cleanupUnhealthyReplicas will scale down old replica sets with unhealthy replicas, so that all unhealthy replicas will be deleted. +func (dc *DeploymentController) cleanupUnhealthyReplicas(ctx context.Context, oldRSs []*apps.ReplicaSet, deployment *apps.Deployment, maxCleanupCount int32) ([]*apps.ReplicaSet, int32, error) { + sort.Sort(deploymentutil.ReplicaSetsByCreationTimestamp(oldRSs)) + // Safely scale down all old replica sets with unhealthy replicas. Replica set will sort the pods in the order + // such that not-ready < ready, unscheduled < scheduled, and pending < running. This ensures that unhealthy replicas will + // been deleted first and won't increase unavailability. + totalScaledDown := int32(0) + for i, targetRS := range oldRSs { + if totalScaledDown >= maxCleanupCount { + break + } + if *(targetRS.Spec.Replicas) == 0 { + // cannot scale down this replica set. + continue + } + klog.V(4).Infof("Found %d available pods in old RS %s/%s", targetRS.Status.AvailableReplicas, targetRS.Namespace, targetRS.Name) + if *(targetRS.Spec.Replicas) == targetRS.Status.AvailableReplicas { + // no unhealthy replicas found, no scaling required. + continue + } + + scaledDownCount := int32(integer.IntMin(int(maxCleanupCount-totalScaledDown), int(*(targetRS.Spec.Replicas)-targetRS.Status.AvailableReplicas))) + newReplicasCount := *(targetRS.Spec.Replicas) - scaledDownCount + if newReplicasCount > *(targetRS.Spec.Replicas) { + return nil, 0, fmt.Errorf("when cleaning up unhealthy replicas, got invalid request to scale down %s/%s %d -> %d", targetRS.Namespace, targetRS.Name, *(targetRS.Spec.Replicas), newReplicasCount) + } + _, updatedOldRS, err := dc.scaleReplicaSetAndRecordEvent(ctx, targetRS, newReplicasCount, deployment) + if err != nil { + return nil, totalScaledDown, err + } + totalScaledDown += scaledDownCount + oldRSs[i] = updatedOldRS + } + return oldRSs, totalScaledDown, nil +} + +// scaleDownOldReplicaSetsForRollingUpdate scales down old replica sets when deployment strategy is "RollingUpdate". +// Need check maxUnavailable to ensure availability +func (dc *DeploymentController) scaleDownOldReplicaSetsForRollingUpdate(ctx context.Context, allRSs []*apps.ReplicaSet, oldRSs []*apps.ReplicaSet, deployment *apps.Deployment) (int32, error) { + maxUnavailable := deploymentutil.MaxUnavailable(*deployment) + + // Check if we can scale down. + minAvailable := *(deployment.Spec.Replicas) - maxUnavailable + // Find the number of available pods. + availablePodCount := deploymentutil.GetAvailableReplicaCountForReplicaSets(allRSs) + if availablePodCount <= minAvailable { + // Cannot scale down. + return 0, nil + } + klog.V(4).Infof("Found %d available pods in deployment %s, scaling down old RSes", availablePodCount, deployment.Name) + + sort.Sort(deploymentutil.ReplicaSetsByCreationTimestamp(oldRSs)) + + totalScaledDown := int32(0) + totalScaleDownCount := availablePodCount - minAvailable + for _, targetRS := range oldRSs { + if totalScaledDown >= totalScaleDownCount { + // No further scaling required. + break + } + if *(targetRS.Spec.Replicas) == 0 { + // cannot scale down this ReplicaSet. + continue + } + // Scale down. + scaleDownCount := int32(integer.IntMin(int(*(targetRS.Spec.Replicas)), int(totalScaleDownCount-totalScaledDown))) + newReplicasCount := *(targetRS.Spec.Replicas) - scaleDownCount + if newReplicasCount > *(targetRS.Spec.Replicas) { + return 0, fmt.Errorf("when scaling down old RS, got invalid request to scale down %s/%s %d -> %d", targetRS.Namespace, targetRS.Name, *(targetRS.Spec.Replicas), newReplicasCount) + } + _, _, err := dc.scaleReplicaSetAndRecordEvent(ctx, targetRS, newReplicasCount, deployment) + if err != nil { + return totalScaledDown, err + } + + totalScaledDown += scaleDownCount + } + + return totalScaledDown, nil +} diff --git a/pkg/controller/deployment/sync.go b/pkg/controller/deployment/sync.go new file mode 100644 index 00000000..2ec3dd45 --- /dev/null +++ b/pkg/controller/deployment/sync.go @@ -0,0 +1,546 @@ +/* +Copyright 2016 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package deployment + +import ( + "context" + "fmt" + "reflect" + "sort" + "strconv" + + apps "k8s.io/api/apps/v1" + v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/klog/v2" + + deploymentutil "github.com/openkruise/rollouts/pkg/controller/deployment/util" + "github.com/openkruise/rollouts/pkg/util" + labelsutil "github.com/openkruise/rollouts/pkg/util/labels" +) + +// syncStatusOnly only updates Deployments Status and doesn't take any mutating actions. +func (dc *DeploymentController) syncStatusOnly(ctx context.Context, d *apps.Deployment, rsList []*apps.ReplicaSet) error { + newRS, oldRSs, err := dc.getAllReplicaSetsAndSyncRevision(ctx, d, rsList, false) + if err != nil { + return err + } + + allRSs := append(oldRSs, newRS) + return dc.syncDeploymentStatus(ctx, allRSs, newRS, d) +} + +// sync is responsible for reconciling deployments on scaling events or when they +// are paused. +func (dc *DeploymentController) sync(ctx context.Context, d *apps.Deployment, rsList []*apps.ReplicaSet) error { + newRS, oldRSs, err := dc.getAllReplicaSetsAndSyncRevision(ctx, d, rsList, false) + if err != nil { + return err + } + if err := dc.scale(ctx, d, newRS, oldRSs); err != nil { + // If we get an error while trying to scale, the deployment will be requeued + // so we can abort this resync + return err + } + + // Clean up the deployment when it's paused and no rollback is in flight. + if d.Spec.Paused && getRollbackTo(d) == nil { + if err := dc.cleanupDeployment(ctx, oldRSs, d); err != nil { + return err + } + } + + allRSs := append(oldRSs, newRS) + return dc.syncDeploymentStatus(ctx, allRSs, newRS, d) +} + +// checkPausedConditions checks if the given deployment is paused or not and adds an appropriate condition. +// These conditions are needed so that we won't accidentally report lack of progress for resumed deployments +// that were paused for longer than progressDeadlineSeconds. +func (dc *DeploymentController) checkPausedConditions(ctx context.Context, d *apps.Deployment) error { + if !deploymentutil.HasProgressDeadline(d) { + return nil + } + cond := deploymentutil.GetDeploymentCondition(d.Status, apps.DeploymentProgressing) + if cond != nil && cond.Reason == deploymentutil.TimedOutReason { + // If we have reported lack of progress, do not overwrite it with a paused condition. + return nil + } + pausedCondExists := cond != nil && cond.Reason == deploymentutil.PausedDeployReason + + needsUpdate := false + if d.Spec.Paused && !pausedCondExists { + condition := deploymentutil.NewDeploymentCondition(apps.DeploymentProgressing, v1.ConditionUnknown, deploymentutil.PausedDeployReason, "Deployment is paused") + deploymentutil.SetDeploymentCondition(&d.Status, *condition) + needsUpdate = true + } else if !d.Spec.Paused && pausedCondExists { + condition := deploymentutil.NewDeploymentCondition(apps.DeploymentProgressing, v1.ConditionUnknown, deploymentutil.ResumedDeployReason, "Deployment is resumed") + deploymentutil.SetDeploymentCondition(&d.Status, *condition) + needsUpdate = true + } + + if !needsUpdate { + return nil + } + + var err error + _, err = dc.client.AppsV1().Deployments(d.Namespace).UpdateStatus(ctx, d, metav1.UpdateOptions{}) + return err +} + +// getAllReplicaSetsAndSyncRevision returns all the replica sets for the provided deployment (new and all old), with new RS's and deployment's revision updated. +// +// rsList should come from getReplicaSetsForDeployment(d). +// +// 1. Get all old RSes this deployment targets, and calculate the max revision number among them (maxOldV). +// 2. Get new RS this deployment targets (whose pod template matches deployment's), and update new RS's revision number to (maxOldV + 1), +// only if its revision number is smaller than (maxOldV + 1). If this step failed, we'll update it in the next deployment sync loop. +// 3. Copy new RS's revision number to deployment (update deployment's revision). If this step failed, we'll update it in the next deployment sync loop. +// +// Note that currently the deployment controller is using caches to avoid querying the server for reads. +// This may lead to stale reads of replica sets, thus incorrect deployment status. +func (dc *DeploymentController) getAllReplicaSetsAndSyncRevision(ctx context.Context, d *apps.Deployment, rsList []*apps.ReplicaSet, createIfNotExisted bool) (*apps.ReplicaSet, []*apps.ReplicaSet, error) { + _, allOldRSs := deploymentutil.FindOldReplicaSets(d, rsList) + + // Get new replica set with the updated revision number + newRS, err := dc.getNewReplicaSet(ctx, d, rsList, allOldRSs, createIfNotExisted) + if err != nil { + return nil, nil, err + } + + return newRS, allOldRSs, nil +} + +const ( + // limit revision history length to 100 element (~2000 chars) + maxRevHistoryLengthInChars = 2000 +) + +// Returns a replica set that matches the intent of the given deployment. Returns nil if the new replica set doesn't exist yet. +// 1. Get existing new RS (the RS that the given deployment targets, whose pod template is the same as deployment's). +// 2. If there's existing new RS, update its revision number if it's smaller than (maxOldRevision + 1), where maxOldRevision is the max revision number among all old RSes. +// 3. If there's no existing new RS and createIfNotExisted is true, create one with appropriate revision number (maxOldRevision + 1) and replicas. +// Note that the pod-template-hash will be added to adopted RSes and pods. +func (dc *DeploymentController) getNewReplicaSet(ctx context.Context, d *apps.Deployment, rsList, oldRSs []*apps.ReplicaSet, createIfNotExisted bool) (*apps.ReplicaSet, error) { + existingNewRS := deploymentutil.FindNewReplicaSet(d, rsList) + + // Calculate the max revision number among all old RSes + maxOldRevision := deploymentutil.MaxRevision(oldRSs) + // Calculate revision number for this new replica set + newRevision := strconv.FormatInt(maxOldRevision+1, 10) + + // Latest replica set exists. We need to sync its annotations (includes copying all but + // annotationsToSkip from the parent deployment, and update revision, desiredReplicas, + // and maxReplicas) and also update the revision annotation in the deployment with the + // latest revision. + if existingNewRS != nil { + rsCopy := existingNewRS.DeepCopy() + + // Set existing new replica set's annotation + annotationsUpdated := deploymentutil.SetNewReplicaSetAnnotations(d, rsCopy, newRevision, true, maxRevHistoryLengthInChars) + minReadySecondsNeedsUpdate := rsCopy.Spec.MinReadySeconds != d.Spec.MinReadySeconds + if annotationsUpdated || minReadySecondsNeedsUpdate { + rsCopy.Spec.MinReadySeconds = d.Spec.MinReadySeconds + return dc.client.AppsV1().ReplicaSets(rsCopy.ObjectMeta.Namespace).Update(ctx, rsCopy, metav1.UpdateOptions{}) + } + + // Should use the revision in existingNewRS's annotation, since it set by before + needsUpdate := deploymentutil.SetDeploymentRevision(d, rsCopy.Annotations[deploymentutil.RevisionAnnotation]) + // If no other Progressing condition has been recorded and we need to estimate the progress + // of this deployment then it is likely that old users started caring about progress. In that + // case we need to take into account the first time we noticed their new replica set. + cond := deploymentutil.GetDeploymentCondition(d.Status, apps.DeploymentProgressing) + if deploymentutil.HasProgressDeadline(d) && cond == nil { + msg := fmt.Sprintf("Found new replica set %q", rsCopy.Name) + condition := deploymentutil.NewDeploymentCondition(apps.DeploymentProgressing, v1.ConditionTrue, deploymentutil.FoundNewRSReason, msg) + deploymentutil.SetDeploymentCondition(&d.Status, *condition) + needsUpdate = true + } + + if needsUpdate { + var err error + if _, err = dc.client.AppsV1().Deployments(d.Namespace).UpdateStatus(ctx, d, metav1.UpdateOptions{}); err != nil { + return nil, err + } + } + return rsCopy, nil + } + + if !createIfNotExisted { + return nil, nil + } + + // new ReplicaSet does not exist, create one. + newRSTemplate := *d.Spec.Template.DeepCopy() + podTemplateSpecHash := util.ComputeHash(&newRSTemplate, d.Status.CollisionCount) + newRSTemplate.Labels = labelsutil.CloneAndAddLabel(d.Spec.Template.Labels, apps.DefaultDeploymentUniqueLabelKey, podTemplateSpecHash) + // Add podTemplateHash label to selector. + newRSSelector := labelsutil.CloneSelectorAndAddLabel(d.Spec.Selector, apps.DefaultDeploymentUniqueLabelKey, podTemplateSpecHash) + + // Create new ReplicaSet + newRS := apps.ReplicaSet{ + ObjectMeta: metav1.ObjectMeta{ + // Make the name deterministic, to ensure idempotence + Name: d.Name + "-" + podTemplateSpecHash, + Namespace: d.Namespace, + OwnerReferences: []metav1.OwnerReference{*metav1.NewControllerRef(d, controllerKind)}, + Labels: newRSTemplate.Labels, + }, + Spec: apps.ReplicaSetSpec{ + Replicas: new(int32), + MinReadySeconds: d.Spec.MinReadySeconds, + Selector: newRSSelector, + Template: newRSTemplate, + }, + } + allRSs := append(oldRSs, &newRS) + newReplicasCount, err := deploymentutil.NewRSNewReplicas(d, allRSs, &newRS) + if err != nil { + return nil, err + } + + *(newRS.Spec.Replicas) = newReplicasCount + // Set new replica set's annotation + deploymentutil.SetNewReplicaSetAnnotations(d, &newRS, newRevision, false, maxRevHistoryLengthInChars) + // Create the new ReplicaSet. If it already exists, then we need to check for possible + // hash collisions. If there is any other error, we need to report it in the status of + // the Deployment. + alreadyExists := false + createdRS, err := dc.client.AppsV1().ReplicaSets(d.Namespace).Create(ctx, &newRS, metav1.CreateOptions{}) + switch { + // We may end up hitting this due to a slow cache or a fast resync of the Deployment. + case errors.IsAlreadyExists(err): + alreadyExists = true + + // Fetch a copy of the ReplicaSet. + rs, rsErr := dc.rsLister.ReplicaSets(newRS.Namespace).Get(newRS.Name) + if rsErr != nil { + return nil, rsErr + } + + // If the Deployment owns the ReplicaSet and the ReplicaSet's PodTemplateSpec is semantically + // deep equal to the PodTemplateSpec of the Deployment, it's the Deployment's new ReplicaSet. + // Otherwise, this is a hash collision and we need to increment the collisionCount field in + // the status of the Deployment and requeue to try the creation in the next sync. + controllerRef := metav1.GetControllerOf(rs) + if controllerRef != nil && controllerRef.UID == d.UID && deploymentutil.EqualIgnoreHash(&d.Spec.Template, &rs.Spec.Template) { + createdRS = rs + err = nil + break + } + + // Matching ReplicaSet is not equal - increment the collisionCount in the DeploymentStatus + // and requeue the Deployment. + if d.Status.CollisionCount == nil { + d.Status.CollisionCount = new(int32) + } + preCollisionCount := *d.Status.CollisionCount + *d.Status.CollisionCount++ + // Update the collisionCount for the Deployment and let it requeue by returning the original + // error. + _, dErr := dc.client.AppsV1().Deployments(d.Namespace).UpdateStatus(ctx, d, metav1.UpdateOptions{}) + if dErr == nil { + klog.V(2).Infof("Found a hash collision for deployment %q - bumping collisionCount (%d->%d) to resolve it", d.Name, preCollisionCount, *d.Status.CollisionCount) + } + return nil, err + case errors.HasStatusCause(err, v1.NamespaceTerminatingCause): + // if the namespace is terminating, all subsequent creates will fail and we can safely do nothing + return nil, err + case err != nil: + msg := fmt.Sprintf("Failed to create new replica set %q: %v", newRS.Name, err) + if deploymentutil.HasProgressDeadline(d) { + cond := deploymentutil.NewDeploymentCondition(apps.DeploymentProgressing, v1.ConditionFalse, deploymentutil.FailedRSCreateReason, msg) + deploymentutil.SetDeploymentCondition(&d.Status, *cond) + // We don't really care about this error at this point, since we have a bigger issue to report. + // TODO: Identify which errors are permanent and switch DeploymentIsFailed to take into account + // these reasons as well. Related issue: https://github.com/kubernetes/kubernetes/issues/18568 + _, _ = dc.client.AppsV1().Deployments(d.Namespace).UpdateStatus(ctx, d, metav1.UpdateOptions{}) + } + dc.eventRecorder.Eventf(d, v1.EventTypeWarning, deploymentutil.FailedRSCreateReason, msg) + return nil, err + } + if !alreadyExists && newReplicasCount > 0 { + dc.eventRecorder.Eventf(d, v1.EventTypeNormal, "ScalingReplicaSet", "Scaled up replica set %s to %d", createdRS.Name, newReplicasCount) + } + + needsUpdate := deploymentutil.SetDeploymentRevision(d, newRevision) + if !alreadyExists && deploymentutil.HasProgressDeadline(d) { + msg := fmt.Sprintf("Created new replica set %q", createdRS.Name) + condition := deploymentutil.NewDeploymentCondition(apps.DeploymentProgressing, v1.ConditionTrue, deploymentutil.NewReplicaSetReason, msg) + deploymentutil.SetDeploymentCondition(&d.Status, *condition) + needsUpdate = true + } + if needsUpdate { + _, err = dc.client.AppsV1().Deployments(d.Namespace).UpdateStatus(ctx, d, metav1.UpdateOptions{}) + } + return createdRS, err +} + +// scale scales proportionally in order to mitigate risk. Otherwise, scaling up can increase the size +// of the new replica set and scaling down can decrease the sizes of the old ones, both of which would +// have the effect of hastening the rollout progress, which could produce a higher proportion of unavailable +// replicas in the event of a problem with the rolled out template. Should run only on scaling events or +// when a deployment is paused and not during the normal rollout process. +func (dc *DeploymentController) scale(ctx context.Context, deployment *apps.Deployment, newRS *apps.ReplicaSet, oldRSs []*apps.ReplicaSet) error { + // If there is only one active replica set then we should scale that up to the full count of the + // deployment. If there is no active replica set, then we should scale up the newest replica set. + if activeOrLatest := deploymentutil.FindActiveOrLatest(newRS, oldRSs); activeOrLatest != nil { + if *(activeOrLatest.Spec.Replicas) == *(deployment.Spec.Replicas) { + return nil + } + _, _, err := dc.scaleReplicaSetAndRecordEvent(ctx, activeOrLatest, *(deployment.Spec.Replicas), deployment) + return err + } + + // If the new replica set is saturated, old replica sets should be fully scaled down. + // This case handles replica set adoption during a saturated new replica set. + if deploymentutil.IsSaturated(deployment, newRS) { + for _, old := range deploymentutil.FilterActiveReplicaSets(oldRSs) { + if _, _, err := dc.scaleReplicaSetAndRecordEvent(ctx, old, 0, deployment); err != nil { + return err + } + } + return nil + } + + // There are old replica sets with pods and the new replica set is not saturated. + // We need to proportionally scale all replica sets (new and old) in case of a + // rolling deployment. + if deploymentutil.IsRollingUpdate(deployment) { + allRSs := deploymentutil.FilterActiveReplicaSets(append(oldRSs, newRS)) + allRSsReplicas := deploymentutil.GetReplicaCountForReplicaSets(allRSs) + + allowedSize := int32(0) + if *(deployment.Spec.Replicas) > 0 { + allowedSize = *(deployment.Spec.Replicas) + deploymentutil.MaxSurge(*deployment) + } + + // Number of additional replicas that can be either added or removed from the total + // replicas count. These replicas should be distributed proportionally to the active + // replica sets. + deploymentReplicasToAdd := allowedSize - allRSsReplicas + + // The additional replicas should be distributed proportionally amongst the active + // replica sets from the larger to the smaller in size replica set. Scaling direction + // drives what happens in case we are trying to scale replica sets of the same size. + // In such a case when scaling up, we should scale up newer replica sets first, and + // when scaling down, we should scale down older replica sets first. + var scalingOperation string + switch { + case deploymentReplicasToAdd > 0: + sort.Sort(deploymentutil.ReplicaSetsBySizeNewer(allRSs)) + scalingOperation = "up" + + case deploymentReplicasToAdd < 0: + sort.Sort(deploymentutil.ReplicaSetsBySizeOlder(allRSs)) + scalingOperation = "down" + } + + // Iterate over all active replica sets and estimate proportions for each of them. + // The absolute value of deploymentReplicasAdded should never exceed the absolute + // value of deploymentReplicasToAdd. + deploymentReplicasAdded := int32(0) + nameToSize := make(map[string]int32) + for i := range allRSs { + rs := allRSs[i] + + // Estimate proportions if we have replicas to add, otherwise simply populate + // nameToSize with the current sizes for each replica set. + if deploymentReplicasToAdd != 0 { + proportion := deploymentutil.GetProportion(rs, *deployment, deploymentReplicasToAdd, deploymentReplicasAdded) + + nameToSize[rs.Name] = *(rs.Spec.Replicas) + proportion + deploymentReplicasAdded += proportion + } else { + nameToSize[rs.Name] = *(rs.Spec.Replicas) + } + } + + // Update all replica sets + for i := range allRSs { + rs := allRSs[i] + + // Add/remove any leftovers to the largest replica set. + if i == 0 && deploymentReplicasToAdd != 0 { + leftover := deploymentReplicasToAdd - deploymentReplicasAdded + nameToSize[rs.Name] = nameToSize[rs.Name] + leftover + if nameToSize[rs.Name] < 0 { + nameToSize[rs.Name] = 0 + } + } + + // TODO: Use transactions when we have them. + if _, _, err := dc.scaleReplicaSet(ctx, rs, nameToSize[rs.Name], deployment, scalingOperation); err != nil { + // Return as soon as we fail, the deployment is requeued + return err + } + } + } + return nil +} + +func (dc *DeploymentController) scaleReplicaSetAndRecordEvent(ctx context.Context, rs *apps.ReplicaSet, newScale int32, deployment *apps.Deployment) (bool, *apps.ReplicaSet, error) { + // No need to scale + if *(rs.Spec.Replicas) == newScale { + return false, rs, nil + } + var scalingOperation string + if *(rs.Spec.Replicas) < newScale { + scalingOperation = "up" + } else { + scalingOperation = "down" + } + scaled, newRS, err := dc.scaleReplicaSet(ctx, rs, newScale, deployment, scalingOperation) + return scaled, newRS, err +} + +func (dc *DeploymentController) scaleReplicaSet(ctx context.Context, rs *apps.ReplicaSet, newScale int32, deployment *apps.Deployment, scalingOperation string) (bool, *apps.ReplicaSet, error) { + + sizeNeedsUpdate := *(rs.Spec.Replicas) != newScale + + annotationsNeedUpdate := deploymentutil.ReplicasAnnotationsNeedUpdate(rs, *(deployment.Spec.Replicas), *(deployment.Spec.Replicas)+deploymentutil.MaxSurge(*deployment)) + + scaled := false + var err error + if sizeNeedsUpdate || annotationsNeedUpdate { + oldScale := *(rs.Spec.Replicas) + rsCopy := rs.DeepCopy() + *(rsCopy.Spec.Replicas) = newScale + deploymentutil.SetReplicasAnnotations(rsCopy, *(deployment.Spec.Replicas), *(deployment.Spec.Replicas)+deploymentutil.MaxSurge(*deployment)) + rs, err = dc.client.AppsV1().ReplicaSets(rsCopy.Namespace).Update(ctx, rsCopy, metav1.UpdateOptions{}) + if err == nil && sizeNeedsUpdate { + scaled = true + dc.eventRecorder.Eventf(deployment, v1.EventTypeNormal, "ScalingReplicaSet", "Scaled %s replica set %s to %d from %d", scalingOperation, rs.Name, newScale, oldScale) + } + } + return scaled, rs, err +} + +// cleanupDeployment is responsible for cleaning up a deployment ie. retains all but the latest N old replica sets +// where N=d.Spec.RevisionHistoryLimit. Old replica sets are older versions of the podtemplate of a deployment kept +// around by default 1) for historical reasons and 2) for the ability to rollback a deployment. +func (dc *DeploymentController) cleanupDeployment(ctx context.Context, oldRSs []*apps.ReplicaSet, deployment *apps.Deployment) error { + if !deploymentutil.HasRevisionHistoryLimit(deployment) { + return nil + } + + // Avoid deleting replica set with deletion timestamp set + aliveFilter := func(rs *apps.ReplicaSet) bool { + return rs != nil && rs.ObjectMeta.DeletionTimestamp == nil + } + cleanableRSes := deploymentutil.FilterReplicaSets(oldRSs, aliveFilter) + + diff := int32(len(cleanableRSes)) - *deployment.Spec.RevisionHistoryLimit + if diff <= 0 { + return nil + } + + sort.Sort(deploymentutil.ReplicaSetsByRevision(cleanableRSes)) + klog.V(4).Infof("Looking to cleanup old replica sets for deployment %q", deployment.Name) + + for i := int32(0); i < diff; i++ { + rs := cleanableRSes[i] + // Avoid delete replica set with non-zero replica counts + if rs.Status.Replicas != 0 || *(rs.Spec.Replicas) != 0 || rs.Generation > rs.Status.ObservedGeneration || rs.DeletionTimestamp != nil { + continue + } + klog.V(4).Infof("Trying to cleanup replica set %q for deployment %q", rs.Name, deployment.Name) + if err := dc.client.AppsV1().ReplicaSets(rs.Namespace).Delete(ctx, rs.Name, metav1.DeleteOptions{}); err != nil && !errors.IsNotFound(err) { + // Return error instead of aggregating and continuing DELETEs on the theory + // that we may be overloading the api server. + return err + } + } + + return nil +} + +// syncDeploymentStatus checks if the status is up-to-date and sync it if necessary +func (dc *DeploymentController) syncDeploymentStatus(ctx context.Context, allRSs []*apps.ReplicaSet, newRS *apps.ReplicaSet, d *apps.Deployment) error { + newStatus := calculateStatus(allRSs, newRS, d) + + if reflect.DeepEqual(d.Status, newStatus) { + return nil + } + + newDeployment := d + newDeployment.Status = newStatus + _, err := dc.client.AppsV1().Deployments(newDeployment.Namespace).UpdateStatus(ctx, newDeployment, metav1.UpdateOptions{}) + return err +} + +// calculateStatus calculates the latest status for the provided deployment by looking into the provided replica sets. +func calculateStatus(allRSs []*apps.ReplicaSet, newRS *apps.ReplicaSet, deployment *apps.Deployment) apps.DeploymentStatus { + availableReplicas := deploymentutil.GetAvailableReplicaCountForReplicaSets(allRSs) + totalReplicas := deploymentutil.GetReplicaCountForReplicaSets(allRSs) + unavailableReplicas := totalReplicas - availableReplicas + // If unavailableReplicas is negative, then that means the Deployment has more available replicas running than + // desired, e.g. whenever it scales down. In such a case we should simply default unavailableReplicas to zero. + if unavailableReplicas < 0 { + unavailableReplicas = 0 + } + + status := apps.DeploymentStatus{ + // TODO: Ensure that if we start retrying status updates, we won't pick up a new Generation value. + ObservedGeneration: deployment.Generation, + Replicas: deploymentutil.GetActualReplicaCountForReplicaSets(allRSs), + UpdatedReplicas: deploymentutil.GetActualReplicaCountForReplicaSets([]*apps.ReplicaSet{newRS}), + ReadyReplicas: deploymentutil.GetReadyReplicaCountForReplicaSets(allRSs), + AvailableReplicas: availableReplicas, + UnavailableReplicas: unavailableReplicas, + CollisionCount: deployment.Status.CollisionCount, + } + + // Copy conditions one by one so we won't mutate the original object. + conditions := deployment.Status.Conditions + for i := range conditions { + status.Conditions = append(status.Conditions, conditions[i]) + } + + if availableReplicas >= *(deployment.Spec.Replicas)-deploymentutil.MaxUnavailable(*deployment) { + minAvailability := deploymentutil.NewDeploymentCondition(apps.DeploymentAvailable, v1.ConditionTrue, deploymentutil.MinimumReplicasAvailable, "Deployment has minimum availability.") + deploymentutil.SetDeploymentCondition(&status, *minAvailability) + } else { + noMinAvailability := deploymentutil.NewDeploymentCondition(apps.DeploymentAvailable, v1.ConditionFalse, deploymentutil.MinimumReplicasUnavailable, "Deployment does not have minimum availability.") + deploymentutil.SetDeploymentCondition(&status, *noMinAvailability) + } + + return status +} + +// isScalingEvent checks whether the provided deployment has been updated with a scaling event +// by looking at the desired-replicas annotation in the active replica sets of the deployment. +// +// rsList should come from getReplicaSetsForDeployment(d). +func (dc *DeploymentController) isScalingEvent(ctx context.Context, d *apps.Deployment, rsList []*apps.ReplicaSet) (bool, error) { + newRS, oldRSs, err := dc.getAllReplicaSetsAndSyncRevision(ctx, d, rsList, false) + if err != nil { + return false, err + } + allRSs := append(oldRSs, newRS) + for _, rs := range deploymentutil.FilterActiveReplicaSets(allRSs) { + desired, ok := deploymentutil.GetDesiredReplicasAnnotation(rs) + if !ok { + continue + } + if desired != *(d.Spec.Replicas) { + return true, nil + } + } + return false, nil +} diff --git a/pkg/controller/deployment/util/deployment_util.go b/pkg/controller/deployment/util/deployment_util.go new file mode 100644 index 00000000..1b42f210 --- /dev/null +++ b/pkg/controller/deployment/util/deployment_util.go @@ -0,0 +1,1023 @@ +/* +Copyright 2016 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package util + +import ( + "context" + "fmt" + "math" + "sort" + "strconv" + "strings" + "time" + + apps "k8s.io/api/apps/v1" + v1 "k8s.io/api/core/v1" + apiequality "k8s.io/apimachinery/pkg/api/equality" + "k8s.io/apimachinery/pkg/api/meta" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/labels" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/types" + intstrutil "k8s.io/apimachinery/pkg/util/intstr" + "k8s.io/apimachinery/pkg/util/wait" + appsclient "k8s.io/client-go/kubernetes/typed/apps/v1" + appslisters "k8s.io/client-go/listers/apps/v1" + "k8s.io/klog/v2" + "k8s.io/utils/integer" + + labelsutil "github.com/openkruise/rollouts/pkg/util/labels" +) + +const ( + // RevisionAnnotation is the revision annotation of a deployment's replica sets which records its rollout sequence + RevisionAnnotation = "deployment.kubernetes.io/revision" + // RevisionHistoryAnnotation maintains the history of all old revisions that a replica set has served for a deployment. + RevisionHistoryAnnotation = "deployment.kubernetes.io/revision-history" + // DesiredReplicasAnnotation is the desired replicas for a deployment recorded as an annotation + // in its replica sets. Helps in separating scaling events from the rollout process and for + // determining if the new replica set for a deployment is really saturated. + DesiredReplicasAnnotation = "deployment.kubernetes.io/desired-replicas" + // MaxReplicasAnnotation is the maximum replicas a deployment can have at a given point, which + // is deployment.spec.replicas + maxSurge. Used by the underlying replica sets to estimate their + // proportions in case the deployment has surge replicas. + MaxReplicasAnnotation = "deployment.kubernetes.io/max-replicas" + + // RollbackRevisionNotFound is not found rollback event reason + RollbackRevisionNotFound = "DeploymentRollbackRevisionNotFound" + // RollbackTemplateUnchanged is the template unchanged rollback event reason + RollbackTemplateUnchanged = "DeploymentRollbackTemplateUnchanged" + // RollbackDone is the done rollback event reason + RollbackDone = "DeploymentRollback" + + // Reasons for deployment conditions + // + // Progressing: + + // ReplicaSetUpdatedReason is added in a deployment when one of its replica sets is updated as part + // of the rollout process. + ReplicaSetUpdatedReason = "ReplicaSetUpdated" + // FailedRSCreateReason is added in a deployment when it cannot create a new replica set. + FailedRSCreateReason = "ReplicaSetCreateError" + // NewReplicaSetReason is added in a deployment when it creates a new replica set. + NewReplicaSetReason = "NewReplicaSetCreated" + // FoundNewRSReason is added in a deployment when it adopts an existing replica set. + FoundNewRSReason = "FoundNewReplicaSet" + // NewRSAvailableReason is added in a deployment when its newest replica set is made available + // ie. the number of new pods that have passed readiness checks and run for at least minReadySeconds + // is at least the minimum available pods that need to run for the deployment. + NewRSAvailableReason = "NewReplicaSetAvailable" + // TimedOutReason is added in a deployment when its newest replica set fails to show any progress + // within the given deadline (progressDeadlineSeconds). + TimedOutReason = "ProgressDeadlineExceeded" + // PausedDeployReason is added in a deployment when it is paused. Lack of progress shouldn't be + // estimated once a deployment is paused. + PausedDeployReason = "DeploymentPaused" + // ResumedDeployReason is added in a deployment when it is resumed. Useful for not failing accidentally + // deployments that paused amidst a rollout and are bounded by a deadline. + ResumedDeployReason = "DeploymentResumed" + // + // Available: + + // MinimumReplicasAvailable is added in a deployment when it has its minimum replicas required available. + MinimumReplicasAvailable = "MinimumReplicasAvailable" + // MinimumReplicasUnavailable is added in a deployment when it doesn't have the minimum required replicas + // available. + MinimumReplicasUnavailable = "MinimumReplicasUnavailable" +) + +// NewDeploymentCondition creates a new deployment condition. +func NewDeploymentCondition(condType apps.DeploymentConditionType, status v1.ConditionStatus, reason, message string) *apps.DeploymentCondition { + return &apps.DeploymentCondition{ + Type: condType, + Status: status, + LastUpdateTime: metav1.Now(), + LastTransitionTime: metav1.Now(), + Reason: reason, + Message: message, + } +} + +// GetDeploymentCondition returns the condition with the provided type. +func GetDeploymentCondition(status apps.DeploymentStatus, condType apps.DeploymentConditionType) *apps.DeploymentCondition { + for i := range status.Conditions { + c := status.Conditions[i] + if c.Type == condType { + return &c + } + } + return nil +} + +// SetDeploymentCondition updates the deployment to include the provided condition. If the condition that +// we are about to add already exists and has the same status and reason then we are not going to update. +func SetDeploymentCondition(status *apps.DeploymentStatus, condition apps.DeploymentCondition) { + currentCond := GetDeploymentCondition(*status, condition.Type) + if currentCond != nil && currentCond.Status == condition.Status && currentCond.Reason == condition.Reason { + return + } + // Do not update lastTransitionTime if the status of the condition doesn't change. + if currentCond != nil && currentCond.Status == condition.Status { + condition.LastTransitionTime = currentCond.LastTransitionTime + } + newConditions := filterOutCondition(status.Conditions, condition.Type) + status.Conditions = append(newConditions, condition) +} + +// RemoveDeploymentCondition removes the deployment condition with the provided type. +func RemoveDeploymentCondition(status *apps.DeploymentStatus, condType apps.DeploymentConditionType) { + status.Conditions = filterOutCondition(status.Conditions, condType) +} + +// filterOutCondition returns a new slice of deployment conditions without conditions with the provided type. +func filterOutCondition(conditions []apps.DeploymentCondition, condType apps.DeploymentConditionType) []apps.DeploymentCondition { + var newConditions []apps.DeploymentCondition + for _, c := range conditions { + if c.Type == condType { + continue + } + newConditions = append(newConditions, c) + } + return newConditions +} + +// ReplicaSetToDeploymentCondition converts a replica set condition into a deployment condition. +// Useful for promoting replica set failure conditions into deployments. +func ReplicaSetToDeploymentCondition(cond apps.ReplicaSetCondition) apps.DeploymentCondition { + return apps.DeploymentCondition{ + Type: apps.DeploymentConditionType(cond.Type), + Status: cond.Status, + LastTransitionTime: cond.LastTransitionTime, + LastUpdateTime: cond.LastTransitionTime, + Reason: cond.Reason, + Message: cond.Message, + } +} + +// SetDeploymentRevision updates the revision for a deployment. +func SetDeploymentRevision(deployment *apps.Deployment, revision string) bool { + updated := false + + if deployment.Annotations == nil { + deployment.Annotations = make(map[string]string) + } + if deployment.Annotations[RevisionAnnotation] != revision { + deployment.Annotations[RevisionAnnotation] = revision + updated = true + } + + return updated +} + +// MaxRevision finds the highest revision in the replica sets +func MaxRevision(allRSs []*apps.ReplicaSet) int64 { + max := int64(0) + for _, rs := range allRSs { + if v, err := Revision(rs); err != nil { + // Skip the replica sets when it failed to parse their revision information + klog.V(4).Infof("Error: %v. Couldn't parse revision for replica set %#v, deployment controller will skip it when reconciling revisions.", err, rs) + } else if v > max { + max = v + } + } + return max +} + +// LastRevision finds the second max revision number in all replica sets (the last revision) +func LastRevision(allRSs []*apps.ReplicaSet) int64 { + max, secMax := int64(0), int64(0) + for _, rs := range allRSs { + if v, err := Revision(rs); err != nil { + // Skip the replica sets when it failed to parse their revision information + klog.V(4).Infof("Error: %v. Couldn't parse revision for replica set %#v, deployment controller will skip it when reconciling revisions.", err, rs) + } else if v >= max { + secMax = max + max = v + } else if v > secMax { + secMax = v + } + } + return secMax +} + +// Revision returns the revision number of the input object. +func Revision(obj runtime.Object) (int64, error) { + acc, err := meta.Accessor(obj) + if err != nil { + return 0, err + } + v, ok := acc.GetAnnotations()[RevisionAnnotation] + if !ok { + return 0, nil + } + return strconv.ParseInt(v, 10, 64) +} + +// SetNewReplicaSetAnnotations sets new replica set's annotations appropriately by updating its revision and +// copying required deployment annotations to it; it returns true if replica set's annotation is changed. +func SetNewReplicaSetAnnotations(deployment *apps.Deployment, newRS *apps.ReplicaSet, newRevision string, exists bool, revHistoryLimitInChars int) bool { + // First, copy deployment's annotations (except for apply and revision annotations) + annotationChanged := copyDeploymentAnnotationsToReplicaSet(deployment, newRS) + // Then, update replica set's revision annotation + if newRS.Annotations == nil { + newRS.Annotations = make(map[string]string) + } + oldRevision, ok := newRS.Annotations[RevisionAnnotation] + // The newRS's revision should be the greatest among all RSes. Usually, its revision number is newRevision (the max revision number + // of all old RSes + 1). However, it's possible that some of the old RSes are deleted after the newRS revision being updated, and + // newRevision becomes smaller than newRS's revision. We should only update newRS revision when it's smaller than newRevision. + + oldRevisionInt, err := strconv.ParseInt(oldRevision, 10, 64) + if err != nil { + if oldRevision != "" { + klog.Warningf("Updating replica set revision OldRevision not int %s", err) + return false + } + //If the RS annotation is empty then initialise it to 0 + oldRevisionInt = 0 + } + newRevisionInt, err := strconv.ParseInt(newRevision, 10, 64) + if err != nil { + klog.Warningf("Updating replica set revision NewRevision not int %s", err) + return false + } + if oldRevisionInt < newRevisionInt { + newRS.Annotations[RevisionAnnotation] = newRevision + annotationChanged = true + klog.V(4).Infof("Updating replica set %q revision to %s", newRS.Name, newRevision) + } + // If a revision annotation already existed and this replica set was updated with a new revision + // then that means we are rolling back to this replica set. We need to preserve the old revisions + // for historical information. + if ok && oldRevisionInt < newRevisionInt { + revisionHistoryAnnotation := newRS.Annotations[RevisionHistoryAnnotation] + oldRevisions := strings.Split(revisionHistoryAnnotation, ",") + if len(oldRevisions[0]) == 0 { + newRS.Annotations[RevisionHistoryAnnotation] = oldRevision + } else { + totalLen := len(revisionHistoryAnnotation) + len(oldRevision) + 1 + // index for the starting position in oldRevisions + start := 0 + for totalLen > revHistoryLimitInChars && start < len(oldRevisions) { + totalLen = totalLen - len(oldRevisions[start]) - 1 + start++ + } + if totalLen <= revHistoryLimitInChars { + oldRevisions = append(oldRevisions[start:], oldRevision) + newRS.Annotations[RevisionHistoryAnnotation] = strings.Join(oldRevisions, ",") + } else { + klog.Warningf("Not appending revision due to length limit of %v reached", revHistoryLimitInChars) + } + } + } + // If the new replica set is about to be created, we need to add replica annotations to it. + if !exists && SetReplicasAnnotations(newRS, *(deployment.Spec.Replicas), *(deployment.Spec.Replicas)+MaxSurge(*deployment)) { + annotationChanged = true + } + return annotationChanged +} + +var annotationsToSkip = map[string]bool{ + v1.LastAppliedConfigAnnotation: true, + RevisionAnnotation: true, + RevisionHistoryAnnotation: true, + DesiredReplicasAnnotation: true, + MaxReplicasAnnotation: true, + apps.DeprecatedRollbackTo: true, +} + +// skipCopyAnnotation returns true if we should skip copying the annotation with the given annotation key +// TODO: How to decide which annotations should / should not be copied? +// +// See https://github.com/kubernetes/kubernetes/pull/20035#issuecomment-179558615 +func skipCopyAnnotation(key string) bool { + return annotationsToSkip[key] +} + +// copyDeploymentAnnotationsToReplicaSet copies deployment's annotations to replica set's annotations, +// and returns true if replica set's annotation is changed. +// Note that apply and revision annotations are not copied. +func copyDeploymentAnnotationsToReplicaSet(deployment *apps.Deployment, rs *apps.ReplicaSet) bool { + rsAnnotationsChanged := false + if rs.Annotations == nil { + rs.Annotations = make(map[string]string) + } + for k, v := range deployment.Annotations { + // newRS revision is updated automatically in getNewReplicaSet, and the deployment's revision number is then updated + // by copying its newRS revision number. We should not copy deployment's revision to its newRS, since the update of + // deployment revision number may fail (revision becomes stale) and the revision number in newRS is more reliable. + if _, exist := rs.Annotations[k]; skipCopyAnnotation(k) || (exist && rs.Annotations[k] == v) { + continue + } + rs.Annotations[k] = v + rsAnnotationsChanged = true + } + return rsAnnotationsChanged +} + +// SetDeploymentAnnotationsTo sets deployment's annotations as given RS's annotations. +// This action should be done if and only if the deployment is rolling back to this rs. +// Note that apply and revision annotations are not changed. +func SetDeploymentAnnotationsTo(deployment *apps.Deployment, rollbackToRS *apps.ReplicaSet) { + deployment.Annotations = getSkippedAnnotations(deployment.Annotations) + for k, v := range rollbackToRS.Annotations { + if !skipCopyAnnotation(k) { + deployment.Annotations[k] = v + } + } +} + +func getSkippedAnnotations(annotations map[string]string) map[string]string { + skippedAnnotations := make(map[string]string) + for k, v := range annotations { + if skipCopyAnnotation(k) { + skippedAnnotations[k] = v + } + } + return skippedAnnotations +} + +// FindActiveOrLatest returns the only active or the latest replica set in case there is at most one active +// replica set. If there are more active replica sets, then we should proportionally scale them. +func FindActiveOrLatest(newRS *apps.ReplicaSet, oldRSs []*apps.ReplicaSet) *apps.ReplicaSet { + if newRS == nil && len(oldRSs) == 0 { + return nil + } + + sort.Sort(sort.Reverse(ReplicaSetsByCreationTimestamp(oldRSs))) + allRSs := FilterActiveReplicaSets(append(oldRSs, newRS)) + + switch len(allRSs) { + case 0: + // If there is no active replica set then we should return the newest. + if newRS != nil { + return newRS + } + return oldRSs[0] + case 1: + return allRSs[0] + default: + return nil + } +} + +// GetDesiredReplicasAnnotation returns the number of desired replicas +func GetDesiredReplicasAnnotation(rs *apps.ReplicaSet) (int32, bool) { + return getIntFromAnnotation(rs, DesiredReplicasAnnotation) +} + +func getMaxReplicasAnnotation(rs *apps.ReplicaSet) (int32, bool) { + return getIntFromAnnotation(rs, MaxReplicasAnnotation) +} + +func getIntFromAnnotation(rs *apps.ReplicaSet, annotationKey string) (int32, bool) { + annotationValue, ok := rs.Annotations[annotationKey] + if !ok { + return int32(0), false + } + intValue, err := strconv.Atoi(annotationValue) + if err != nil { + klog.V(2).Infof("Cannot convert the value %q with annotation key %q for the replica set %q", annotationValue, annotationKey, rs.Name) + return int32(0), false + } + return int32(intValue), true +} + +// SetReplicasAnnotations sets the desiredReplicas and maxReplicas into the annotations +func SetReplicasAnnotations(rs *apps.ReplicaSet, desiredReplicas, maxReplicas int32) bool { + updated := false + if rs.Annotations == nil { + rs.Annotations = make(map[string]string) + } + desiredString := fmt.Sprintf("%d", desiredReplicas) + if hasString := rs.Annotations[DesiredReplicasAnnotation]; hasString != desiredString { + rs.Annotations[DesiredReplicasAnnotation] = desiredString + updated = true + } + maxString := fmt.Sprintf("%d", maxReplicas) + if hasString := rs.Annotations[MaxReplicasAnnotation]; hasString != maxString { + rs.Annotations[MaxReplicasAnnotation] = maxString + updated = true + } + return updated +} + +// ReplicasAnnotationsNeedUpdate return true if ReplicasAnnotations need to be updated +func ReplicasAnnotationsNeedUpdate(rs *apps.ReplicaSet, desiredReplicas, maxReplicas int32) bool { + if rs.Annotations == nil { + return true + } + desiredString := fmt.Sprintf("%d", desiredReplicas) + if hasString := rs.Annotations[DesiredReplicasAnnotation]; hasString != desiredString { + return true + } + maxString := fmt.Sprintf("%d", maxReplicas) + if hasString := rs.Annotations[MaxReplicasAnnotation]; hasString != maxString { + return true + } + return false +} + +// MaxUnavailable returns the maximum unavailable pods a rolling deployment can take. +func MaxUnavailable(deployment apps.Deployment) int32 { + if !IsRollingUpdate(&deployment) || *(deployment.Spec.Replicas) == 0 { + return int32(0) + } + // Error caught by validation + _, maxUnavailable, _ := ResolveFenceposts(deployment.Spec.Strategy.RollingUpdate.MaxSurge, deployment.Spec.Strategy.RollingUpdate.MaxUnavailable, *(deployment.Spec.Replicas)) + if maxUnavailable > *deployment.Spec.Replicas { + return *deployment.Spec.Replicas + } + return maxUnavailable +} + +// MinAvailable returns the minimum available pods of a given deployment +func MinAvailable(deployment *apps.Deployment) int32 { + if !IsRollingUpdate(deployment) { + return int32(0) + } + return *(deployment.Spec.Replicas) - MaxUnavailable(*deployment) +} + +// MaxSurge returns the maximum surge pods a rolling deployment can take. +func MaxSurge(deployment apps.Deployment) int32 { + if !IsRollingUpdate(&deployment) { + return int32(0) + } + // Error caught by validation + maxSurge, _, _ := ResolveFenceposts(deployment.Spec.Strategy.RollingUpdate.MaxSurge, deployment.Spec.Strategy.RollingUpdate.MaxUnavailable, *(deployment.Spec.Replicas)) + return maxSurge +} + +// GetProportion will estimate the proportion for the provided replica set using 1. the current size +// of the parent deployment, 2. the replica count that needs be added on the replica sets of the +// deployment, and 3. the total replicas added in the replica sets of the deployment so far. +func GetProportion(rs *apps.ReplicaSet, d apps.Deployment, deploymentReplicasToAdd, deploymentReplicasAdded int32) int32 { + if rs == nil || *(rs.Spec.Replicas) == 0 || deploymentReplicasToAdd == 0 || deploymentReplicasToAdd == deploymentReplicasAdded { + return int32(0) + } + + rsFraction := getReplicaSetFraction(*rs, d) + allowed := deploymentReplicasToAdd - deploymentReplicasAdded + + if deploymentReplicasToAdd > 0 { + // Use the minimum between the replica set fraction and the maximum allowed replicas + // when scaling up. This way we ensure we will not scale up more than the allowed + // replicas we can add. + return integer.Int32Min(rsFraction, allowed) + } + // Use the maximum between the replica set fraction and the maximum allowed replicas + // when scaling down. This way we ensure we will not scale down more than the allowed + // replicas we can remove. + return integer.Int32Max(rsFraction, allowed) +} + +// getReplicaSetFraction estimates the fraction of replicas a replica set can have in +// 1. a scaling event during a rollout or 2. when scaling a paused deployment. +func getReplicaSetFraction(rs apps.ReplicaSet, d apps.Deployment) int32 { + // If we are scaling down to zero then the fraction of this replica set is its whole size (negative) + if *(d.Spec.Replicas) == int32(0) { + return -*(rs.Spec.Replicas) + } + + deploymentReplicas := *(d.Spec.Replicas) + MaxSurge(d) + annotatedReplicas, ok := getMaxReplicasAnnotation(&rs) + if !ok { + // If we cannot find the annotation then fallback to the current deployment size. Note that this + // will not be an accurate proportion estimation in case other replica sets have different values + // which means that the deployment was scaled at some point but we at least will stay in limits + // due to the min-max comparisons in getProportion. + annotatedReplicas = d.Status.Replicas + } + + // We should never proportionally scale up from zero which means rs.spec.replicas and annotatedReplicas + // will never be zero here. + newRSsize := (float64(*(rs.Spec.Replicas) * deploymentReplicas)) / float64(annotatedReplicas) + return integer.RoundToInt32(newRSsize) - *(rs.Spec.Replicas) +} + +// RsListFromClient returns an rsListFunc that wraps the given client. +func RsListFromClient(c appsclient.AppsV1Interface) RsListFunc { + return func(namespace string, options metav1.ListOptions) ([]*apps.ReplicaSet, error) { + rsList, err := c.ReplicaSets(namespace).List(context.TODO(), options) + if err != nil { + return nil, err + } + var ret []*apps.ReplicaSet + for i := range rsList.Items { + ret = append(ret, &rsList.Items[i]) + } + return ret, err + } +} + +// TODO: switch RsListFunc and podListFunc to full namespacers + +// RsListFunc returns the ReplicaSet from the ReplicaSet namespace and the List metav1.ListOptions. +type RsListFunc func(string, metav1.ListOptions) ([]*apps.ReplicaSet, error) + +// podListFunc returns the PodList from the Pod namespace and the List metav1.ListOptions. +type podListFunc func(string, metav1.ListOptions) (*v1.PodList, error) + +// ListReplicaSets returns a slice of RSes the given deployment targets. +// Note that this does NOT attempt to reconcile ControllerRef (adopt/orphan), +// because only the controller itself should do that. +// However, it does filter out anything whose ControllerRef doesn't match. +func ListReplicaSets(deployment *apps.Deployment, getRSList RsListFunc) ([]*apps.ReplicaSet, error) { + // TODO: Right now we list replica sets by their labels. We should list them by selector, i.e. the replica set's selector + // should be a superset of the deployment's selector, see https://github.com/kubernetes/kubernetes/issues/19830. + namespace := deployment.Namespace + selector, err := metav1.LabelSelectorAsSelector(deployment.Spec.Selector) + if err != nil { + return nil, err + } + options := metav1.ListOptions{LabelSelector: selector.String()} + all, err := getRSList(namespace, options) + if err != nil { + return nil, err + } + // Only include those whose ControllerRef matches the Deployment. + owned := make([]*apps.ReplicaSet, 0, len(all)) + for _, rs := range all { + if metav1.IsControlledBy(rs, deployment) { + owned = append(owned, rs) + } + } + return owned, nil +} + +// ListPods returns a list of pods the given deployment targets. +// This needs a list of ReplicaSets for the Deployment, +// which can be found with ListReplicaSets(). +// Note that this does NOT attempt to reconcile ControllerRef (adopt/orphan), +// because only the controller itself should do that. +// However, it does filter out anything whose ControllerRef doesn't match. +func ListPods(deployment *apps.Deployment, rsList []*apps.ReplicaSet, getPodList podListFunc) (*v1.PodList, error) { + namespace := deployment.Namespace + selector, err := metav1.LabelSelectorAsSelector(deployment.Spec.Selector) + if err != nil { + return nil, err + } + options := metav1.ListOptions{LabelSelector: selector.String()} + all, err := getPodList(namespace, options) + if err != nil { + return all, err + } + // Only include those whose ControllerRef points to a ReplicaSet that is in + // turn owned by this Deployment. + rsMap := make(map[types.UID]bool, len(rsList)) + for _, rs := range rsList { + rsMap[rs.UID] = true + } + owned := &v1.PodList{Items: make([]v1.Pod, 0, len(all.Items))} + for i := range all.Items { + pod := &all.Items[i] + controllerRef := metav1.GetControllerOf(pod) + if controllerRef != nil && rsMap[controllerRef.UID] { + owned.Items = append(owned.Items, *pod) + } + } + return owned, nil +} + +// EqualIgnoreHash returns true if two given podTemplateSpec are equal, ignoring the diff in value of Labels[pod-template-hash] +// We ignore pod-template-hash because: +// 1. The hash result would be different upon podTemplateSpec API changes +// (e.g. the addition of a new field will cause the hash code to change) +// 2. The deployment template won't have hash labels +func EqualIgnoreHash(template1, template2 *v1.PodTemplateSpec) bool { + t1Copy := template1.DeepCopy() + t2Copy := template2.DeepCopy() + // Remove hash labels from template.Labels before comparing + delete(t1Copy.Labels, apps.DefaultDeploymentUniqueLabelKey) + delete(t2Copy.Labels, apps.DefaultDeploymentUniqueLabelKey) + return apiequality.Semantic.DeepEqual(t1Copy, t2Copy) +} + +// FindNewReplicaSet returns the new RS this given deployment targets (the one with the same pod template). +func FindNewReplicaSet(deployment *apps.Deployment, rsList []*apps.ReplicaSet) *apps.ReplicaSet { + sort.Sort(ReplicaSetsByCreationTimestamp(rsList)) + for i := range rsList { + if EqualIgnoreHash(&rsList[i].Spec.Template, &deployment.Spec.Template) { + // In rare cases, such as after cluster upgrades, Deployment may end up with + // having more than one new ReplicaSets that have the same template as its template, + // see https://github.com/kubernetes/kubernetes/issues/40415 + // We deterministically choose the oldest new ReplicaSet. + return rsList[i] + } + } + // new ReplicaSet does not exist. + return nil +} + +// FindOldReplicaSets returns the old replica sets targeted by the given Deployment, with the given slice of RSes. +// Note that the first set of old replica sets doesn't include the ones with no pods, and the second set of old replica sets include all old replica sets. +func FindOldReplicaSets(deployment *apps.Deployment, rsList []*apps.ReplicaSet) ([]*apps.ReplicaSet, []*apps.ReplicaSet) { + var requiredRSs []*apps.ReplicaSet + var allRSs []*apps.ReplicaSet + newRS := FindNewReplicaSet(deployment, rsList) + for _, rs := range rsList { + // Filter out new replica set + if newRS != nil && rs.UID == newRS.UID { + continue + } + allRSs = append(allRSs, rs) + if *(rs.Spec.Replicas) != 0 { + requiredRSs = append(requiredRSs, rs) + } + } + return requiredRSs, allRSs +} + +// SetFromReplicaSetTemplate sets the desired PodTemplateSpec from a replica set template to the given deployment. +func SetFromReplicaSetTemplate(deployment *apps.Deployment, template v1.PodTemplateSpec) *apps.Deployment { + deployment.Spec.Template.ObjectMeta = template.ObjectMeta + deployment.Spec.Template.Spec = template.Spec + deployment.Spec.Template.ObjectMeta.Labels = labelsutil.CloneAndRemoveLabel( + deployment.Spec.Template.ObjectMeta.Labels, + apps.DefaultDeploymentUniqueLabelKey) + return deployment +} + +// GetReplicaCountForReplicaSets returns the sum of Replicas of the given replica sets. +func GetReplicaCountForReplicaSets(replicaSets []*apps.ReplicaSet) int32 { + totalReplicas := int32(0) + for _, rs := range replicaSets { + if rs != nil { + totalReplicas += *(rs.Spec.Replicas) + } + } + return totalReplicas +} + +// GetActualReplicaCountForReplicaSets returns the sum of actual replicas of the given replica sets. +func GetActualReplicaCountForReplicaSets(replicaSets []*apps.ReplicaSet) int32 { + totalActualReplicas := int32(0) + for _, rs := range replicaSets { + if rs != nil { + totalActualReplicas += rs.Status.Replicas + } + } + return totalActualReplicas +} + +// GetReadyReplicaCountForReplicaSets returns the number of ready pods corresponding to the given replica sets. +func GetReadyReplicaCountForReplicaSets(replicaSets []*apps.ReplicaSet) int32 { + totalReadyReplicas := int32(0) + for _, rs := range replicaSets { + if rs != nil { + totalReadyReplicas += rs.Status.ReadyReplicas + } + } + return totalReadyReplicas +} + +// GetAvailableReplicaCountForReplicaSets returns the number of available pods corresponding to the given replica sets. +func GetAvailableReplicaCountForReplicaSets(replicaSets []*apps.ReplicaSet) int32 { + totalAvailableReplicas := int32(0) + for _, rs := range replicaSets { + if rs != nil { + totalAvailableReplicas += rs.Status.AvailableReplicas + } + } + return totalAvailableReplicas +} + +// IsRollingUpdate returns true if the strategy type is a rolling update. +func IsRollingUpdate(deployment *apps.Deployment) bool { + return deployment.Spec.Strategy.Type == apps.RollingUpdateDeploymentStrategyType +} + +// DeploymentComplete considers a deployment to be complete once all of its desired replicas +// are updated and available, and no old pods are running. +func DeploymentComplete(deployment *apps.Deployment, newStatus *apps.DeploymentStatus) bool { + return newStatus.UpdatedReplicas == *(deployment.Spec.Replicas) && + newStatus.Replicas == *(deployment.Spec.Replicas) && + newStatus.AvailableReplicas == *(deployment.Spec.Replicas) && + newStatus.ObservedGeneration >= deployment.Generation +} + +// DeploymentProgressing reports progress for a deployment. Progress is estimated by comparing the +// current with the new status of the deployment that the controller is observing. More specifically, +// when new pods are scaled up or become ready or available, or old pods are scaled down, then we +// consider the deployment is progressing. +func DeploymentProgressing(deployment *apps.Deployment, newStatus *apps.DeploymentStatus) bool { + oldStatus := deployment.Status + + // Old replicas that need to be scaled down + oldStatusOldReplicas := oldStatus.Replicas - oldStatus.UpdatedReplicas + newStatusOldReplicas := newStatus.Replicas - newStatus.UpdatedReplicas + + return (newStatus.UpdatedReplicas > oldStatus.UpdatedReplicas) || + (newStatusOldReplicas < oldStatusOldReplicas) || + newStatus.ReadyReplicas > deployment.Status.ReadyReplicas || + newStatus.AvailableReplicas > deployment.Status.AvailableReplicas +} + +// used for unit testing +var nowFn = func() time.Time { return time.Now() } + +// DeploymentTimedOut considers a deployment to have timed out once its condition that reports progress +// is older than progressDeadlineSeconds or a Progressing condition with a TimedOutReason reason already +// exists. +func DeploymentTimedOut(deployment *apps.Deployment, newStatus *apps.DeploymentStatus) bool { + if !HasProgressDeadline(deployment) { + return false + } + + // Look for the Progressing condition. If it doesn't exist, we have no base to estimate progress. + // If it's already set with a TimedOutReason reason, we have already timed out, no need to check + // again. + condition := GetDeploymentCondition(*newStatus, apps.DeploymentProgressing) + if condition == nil { + return false + } + // If the previous condition has been a successful rollout then we shouldn't try to + // estimate any progress. Scenario: + // + // * progressDeadlineSeconds is smaller than the difference between now and the time + // the last rollout finished in the past. + // * the creation of a new ReplicaSet triggers a resync of the Deployment prior to the + // cached copy of the Deployment getting updated with the status.condition that indicates + // the creation of the new ReplicaSet. + // + // The Deployment will be resynced and eventually its Progressing condition will catch + // up with the state of the world. + if condition.Reason == NewRSAvailableReason { + return false + } + if condition.Reason == TimedOutReason { + return true + } + + // Look at the difference in seconds between now and the last time we reported any + // progress or tried to create a replica set, or resumed a paused deployment and + // compare against progressDeadlineSeconds. + from := condition.LastUpdateTime + now := nowFn() + delta := time.Duration(*deployment.Spec.ProgressDeadlineSeconds) * time.Second + timedOut := from.Add(delta).Before(now) + + klog.V(4).Infof("Deployment %q timed out (%t) [last progress check: %v - now: %v]", deployment.Name, timedOut, from, now) + return timedOut +} + +// NewRSNewReplicas calculates the number of replicas a deployment's new RS should have. +// When one of the followings is true, we're rolling out the deployment; otherwise, we're scaling it. +// 1) The new RS is saturated: newRS's replicas == deployment's replicas +// 2) Max number of pods allowed is reached: deployment's replicas + maxSurge == all RSs' replicas +func NewRSNewReplicas(deployment *apps.Deployment, allRSs []*apps.ReplicaSet, newRS *apps.ReplicaSet) (int32, error) { + switch deployment.Spec.Strategy.Type { + case apps.RollingUpdateDeploymentStrategyType: + // Check if we can scale up. + maxSurge, err := intstrutil.GetScaledValueFromIntOrPercent(deployment.Spec.Strategy.RollingUpdate.MaxSurge, int(*(deployment.Spec.Replicas)), true) + if err != nil { + return 0, err + } + // Find the total number of pods + currentPodCount := GetReplicaCountForReplicaSets(allRSs) + maxTotalPods := *(deployment.Spec.Replicas) + int32(maxSurge) + if currentPodCount >= maxTotalPods { + // Cannot scale up. + return *(newRS.Spec.Replicas), nil + } + // Scale up. + scaleUpCount := maxTotalPods - currentPodCount + // Do not exceed the number of desired replicas. + scaleUpCount = int32(integer.IntMin(int(scaleUpCount), int(*(deployment.Spec.Replicas)-*(newRS.Spec.Replicas)))) + return *(newRS.Spec.Replicas) + scaleUpCount, nil + case apps.RecreateDeploymentStrategyType: + return *(deployment.Spec.Replicas), nil + default: + return 0, fmt.Errorf("deployment type %v isn't supported", deployment.Spec.Strategy.Type) + } +} + +// IsSaturated checks if the new replica set is saturated by comparing its size with its deployment size. +// Both the deployment and the replica set have to believe this replica set can own all of the desired +// replicas in the deployment and the annotation helps in achieving that. All pods of the ReplicaSet +// need to be available. +func IsSaturated(deployment *apps.Deployment, rs *apps.ReplicaSet) bool { + if rs == nil { + return false + } + desiredString := rs.Annotations[DesiredReplicasAnnotation] + desired, err := strconv.Atoi(desiredString) + if err != nil { + return false + } + return *(rs.Spec.Replicas) == *(deployment.Spec.Replicas) && + int32(desired) == *(deployment.Spec.Replicas) && + rs.Status.AvailableReplicas == *(deployment.Spec.Replicas) +} + +// WaitForObservedDeployment polls for deployment to be updated so that deployment.Status.ObservedGeneration >= desiredGeneration. +// Returns error if polling timesout. +func WaitForObservedDeployment(getDeploymentFunc func() (*apps.Deployment, error), desiredGeneration int64, interval, timeout time.Duration) error { + // TODO: This should take clientset.Interface when all code is updated to use clientset. Keeping it this way allows the function to be used by callers who have client.Interface. + return wait.PollImmediate(interval, timeout, func() (bool, error) { + deployment, err := getDeploymentFunc() + if err != nil { + return false, err + } + return deployment.Status.ObservedGeneration >= desiredGeneration, nil + }) +} + +// ResolveFenceposts resolves both maxSurge and maxUnavailable. This needs to happen in one +// step. For example: +// +// 2 desired, max unavailable 1%, surge 0% - should scale old(-1), then new(+1), then old(-1), then new(+1) +// 1 desired, max unavailable 1%, surge 0% - should scale old(-1), then new(+1) +// 2 desired, max unavailable 25%, surge 1% - should scale new(+1), then old(-1), then new(+1), then old(-1) +// 1 desired, max unavailable 25%, surge 1% - should scale new(+1), then old(-1) +// 2 desired, max unavailable 0%, surge 1% - should scale new(+1), then old(-1), then new(+1), then old(-1) +// 1 desired, max unavailable 0%, surge 1% - should scale new(+1), then old(-1) +func ResolveFenceposts(maxSurge, maxUnavailable *intstrutil.IntOrString, desired int32) (int32, int32, error) { + surge, err := intstrutil.GetScaledValueFromIntOrPercent(intstrutil.ValueOrDefault(maxSurge, intstrutil.FromInt(0)), int(desired), true) + if err != nil { + return 0, 0, err + } + unavailable, err := intstrutil.GetScaledValueFromIntOrPercent(intstrutil.ValueOrDefault(maxUnavailable, intstrutil.FromInt(0)), int(desired), false) + if err != nil { + return 0, 0, err + } + + if surge == 0 && unavailable == 0 { + // Validation should never allow the user to explicitly use zero values for both maxSurge + // maxUnavailable. Due to rounding down maxUnavailable though, it may resolve to zero. + // If both fenceposts resolve to zero, then we should set maxUnavailable to 1 on the + // theory that surge might not work due to quota. + unavailable = 1 + } + + return int32(surge), int32(unavailable), nil +} + +// HasProgressDeadline checks if the Deployment d is expected to surface the reason +// "ProgressDeadlineExceeded" when the Deployment progress takes longer than expected time. +func HasProgressDeadline(d *apps.Deployment) bool { + return d.Spec.ProgressDeadlineSeconds != nil && *d.Spec.ProgressDeadlineSeconds != math.MaxInt32 +} + +// HasRevisionHistoryLimit checks if the Deployment d is expected to keep a specified number of +// old replicaSets. These replicaSets are mainly kept with the purpose of rollback. +// The RevisionHistoryLimit can start from 0 (no retained replicasSet). When set to math.MaxInt32, +// the Deployment will keep all revisions. +func HasRevisionHistoryLimit(d *apps.Deployment) bool { + return d.Spec.RevisionHistoryLimit != nil && *d.Spec.RevisionHistoryLimit != math.MaxInt32 +} + +// GetDeploymentsForReplicaSet returns a list of Deployments that potentially +// match a ReplicaSet. Only the one specified in the ReplicaSet's ControllerRef +// will actually manage it. +// Returns an error only if no matching Deployments are found. +func GetDeploymentsForReplicaSet(deploymentLister appslisters.DeploymentLister, rs *apps.ReplicaSet) ([]*apps.Deployment, error) { + if len(rs.Labels) == 0 { + return nil, fmt.Errorf("no deployments found for ReplicaSet %v because it has no labels", rs.Name) + } + + // TODO: MODIFY THIS METHOD so that it checks for the podTemplateSpecHash label + dList, err := deploymentLister.Deployments(rs.Namespace).List(labels.Everything()) + if err != nil { + return nil, err + } + + var deployments []*apps.Deployment + for _, d := range dList { + selector, err := metav1.LabelSelectorAsSelector(d.Spec.Selector) + if err != nil { + // This object has an invalid selector, it does not match the replicaset + continue + } + // If a deployment with a nil or empty selector creeps in, it should match nothing, not everything. + if selector.Empty() || !selector.Matches(labels.Set(rs.Labels)) { + continue + } + deployments = append(deployments, d) + } + + if len(deployments) == 0 { + return nil, fmt.Errorf("could not find deployments set for ReplicaSet %s in namespace %s with labels: %v", rs.Name, rs.Namespace, rs.Labels) + } + + return deployments, nil +} + +// ReplicaSetsByRevision sorts a list of ReplicaSet by revision, using their creation timestamp or name as a tie breaker. +// By using the creation timestamp, this sorts from old to new replica sets. +type ReplicaSetsByRevision []*apps.ReplicaSet + +func (o ReplicaSetsByRevision) Len() int { return len(o) } +func (o ReplicaSetsByRevision) Swap(i, j int) { o[i], o[j] = o[j], o[i] } +func (o ReplicaSetsByRevision) Less(i, j int) bool { + revision1, err1 := Revision(o[i]) + revision2, err2 := Revision(o[j]) + if err1 != nil || err2 != nil || revision1 == revision2 { + return ReplicaSetsByCreationTimestamp(o).Less(i, j) + } + return revision1 < revision2 +} + +/* + **** Copied from "k8s.io/kubernetes/pkg/controller/controller_utils.go" **** + -------------------------------- BEGIN -------------------------------------- +*/ + +// FilterActiveReplicaSets returns replica sets that have (or at least ought to have) pods. +func FilterActiveReplicaSets(replicaSets []*apps.ReplicaSet) []*apps.ReplicaSet { + activeFilter := func(rs *apps.ReplicaSet) bool { + return rs != nil && *(rs.Spec.Replicas) > 0 + } + return FilterReplicaSets(replicaSets, activeFilter) +} + +type filterRS func(rs *apps.ReplicaSet) bool + +// FilterReplicaSets returns replica sets that are filtered by filterFn (all returned ones should match filterFn). +func FilterReplicaSets(RSes []*apps.ReplicaSet, filterFn filterRS) []*apps.ReplicaSet { + var filtered []*apps.ReplicaSet + for i := range RSes { + if filterFn(RSes[i]) { + filtered = append(filtered, RSes[i]) + } + } + return filtered +} + +// PodKey returns a key unique to the given pod within a cluster. +// It's used so we consistently use the same key scheme in this module. +// It does exactly what cache.MetaNamespaceKeyFunc would have done +// except there's not possibility for error since we know the exact type. +func PodKey(pod *v1.Pod) string { + return fmt.Sprintf("%v/%v", pod.Namespace, pod.Name) +} + +// ControllersByCreationTimestamp sorts a list of ReplicationControllers by creation timestamp, using their names as a tie breaker. +type ControllersByCreationTimestamp []*v1.ReplicationController + +func (o ControllersByCreationTimestamp) Len() int { return len(o) } +func (o ControllersByCreationTimestamp) Swap(i, j int) { o[i], o[j] = o[j], o[i] } +func (o ControllersByCreationTimestamp) Less(i, j int) bool { + if o[i].CreationTimestamp.Equal(&o[j].CreationTimestamp) { + return o[i].Name < o[j].Name + } + return o[i].CreationTimestamp.Before(&o[j].CreationTimestamp) +} + +// ReplicaSetsByCreationTimestamp sorts a list of ReplicaSet by creation timestamp, using their names as a tie breaker. +type ReplicaSetsByCreationTimestamp []*apps.ReplicaSet + +func (o ReplicaSetsByCreationTimestamp) Len() int { return len(o) } +func (o ReplicaSetsByCreationTimestamp) Swap(i, j int) { o[i], o[j] = o[j], o[i] } +func (o ReplicaSetsByCreationTimestamp) Less(i, j int) bool { + if o[i].CreationTimestamp.Equal(&o[j].CreationTimestamp) { + return o[i].Name < o[j].Name + } + return o[i].CreationTimestamp.Before(&o[j].CreationTimestamp) +} + +// ReplicaSetsBySizeOlder sorts a list of ReplicaSet by size in descending order, using their creation timestamp or name as a tie breaker. +// By using the creation timestamp, this sorts from old to new replica sets. +type ReplicaSetsBySizeOlder []*apps.ReplicaSet + +func (o ReplicaSetsBySizeOlder) Len() int { return len(o) } +func (o ReplicaSetsBySizeOlder) Swap(i, j int) { o[i], o[j] = o[j], o[i] } +func (o ReplicaSetsBySizeOlder) Less(i, j int) bool { + if *(o[i].Spec.Replicas) == *(o[j].Spec.Replicas) { + return ReplicaSetsByCreationTimestamp(o).Less(i, j) + } + return *(o[i].Spec.Replicas) > *(o[j].Spec.Replicas) +} + +// ReplicaSetsBySizeNewer sorts a list of ReplicaSet by size in descending order, using their creation timestamp or name as a tie breaker. +// By using the creation timestamp, this sorts from new to old replica sets. +type ReplicaSetsBySizeNewer []*apps.ReplicaSet + +func (o ReplicaSetsBySizeNewer) Len() int { return len(o) } +func (o ReplicaSetsBySizeNewer) Swap(i, j int) { o[i], o[j] = o[j], o[i] } +func (o ReplicaSetsBySizeNewer) Less(i, j int) bool { + if *(o[i].Spec.Replicas) == *(o[j].Spec.Replicas) { + return ReplicaSetsByCreationTimestamp(o).Less(j, i) + } + return *(o[i].Spec.Replicas) > *(o[j].Spec.Replicas) +} + +/* + --------------------------------- END --------------------------------------- + **** Copied from "k8s.io/kubernetes/pkg/controller/controller_utils.go" **** +*/ diff --git a/pkg/controller/deployment/util/deployment_util_test.go b/pkg/controller/deployment/util/deployment_util_test.go new file mode 100644 index 00000000..fa322728 --- /dev/null +++ b/pkg/controller/deployment/util/deployment_util_test.go @@ -0,0 +1,1169 @@ +/* +Copyright 2015 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package util + +import ( + "fmt" + "math" + "math/rand" + "reflect" + "sort" + "strconv" + "testing" + "time" + + apps "k8s.io/api/apps/v1" + v1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/intstr" +) + +func newDControllerRef(d *apps.Deployment) *metav1.OwnerReference { + isController := true + return &metav1.OwnerReference{ + APIVersion: "apps/v1", + Kind: "Deployment", + Name: d.GetName(), + UID: d.GetUID(), + Controller: &isController, + } +} + +// generateRS creates a replica set, with the input deployment's template as its template +func generateRS(deployment apps.Deployment) apps.ReplicaSet { + template := deployment.Spec.Template.DeepCopy() + return apps.ReplicaSet{ + ObjectMeta: metav1.ObjectMeta{ + UID: randomUID(), + Name: randomName("replicaset"), + Labels: template.Labels, + OwnerReferences: []metav1.OwnerReference{*newDControllerRef(&deployment)}, + }, + Spec: apps.ReplicaSetSpec{ + Replicas: new(int32), + Template: *template, + Selector: &metav1.LabelSelector{MatchLabels: template.Labels}, + }, + } +} + +func randomUID() types.UID { + return types.UID(strconv.FormatInt(rand.Int63(), 10)) +} + +func randomName(prefix string) string { + return fmt.Sprintf("%s-%s", prefix, strconv.FormatInt(5, 10)) +} + +// generateDeployment creates a deployment, with the input image as its template +func generateDeployment(image string) apps.Deployment { + podLabels := map[string]string{"name": image} + terminationSec := int64(30) + enableServiceLinks := v1.DefaultEnableServiceLinks + return apps.Deployment{ + ObjectMeta: metav1.ObjectMeta{ + Name: image, + Annotations: make(map[string]string), + }, + Spec: apps.DeploymentSpec{ + Replicas: func(i int32) *int32 { return &i }(1), + Selector: &metav1.LabelSelector{MatchLabels: podLabels}, + Template: v1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Labels: podLabels, + }, + Spec: v1.PodSpec{ + Containers: []v1.Container{ + { + Name: image, + Image: image, + ImagePullPolicy: v1.PullAlways, + TerminationMessagePath: v1.TerminationMessagePathDefault, + }, + }, + DNSPolicy: v1.DNSClusterFirst, + TerminationGracePeriodSeconds: &terminationSec, + RestartPolicy: v1.RestartPolicyAlways, + SecurityContext: &v1.PodSecurityContext{}, + EnableServiceLinks: &enableServiceLinks, + }, + }, + }, + } +} + +func generatePodTemplateSpec(name, nodeName string, annotations, labels map[string]string) v1.PodTemplateSpec { + return v1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Annotations: annotations, + Labels: labels, + }, + Spec: v1.PodSpec{ + NodeName: nodeName, + }, + } +} + +func TestEqualIgnoreHash(t *testing.T) { + tests := []struct { + Name string + former, latter v1.PodTemplateSpec + expected bool + }{ + { + "Same spec, same labels", + generatePodTemplateSpec("foo", "foo-node", map[string]string{}, map[string]string{apps.DefaultDeploymentUniqueLabelKey: "value-1", "something": "else"}), + generatePodTemplateSpec("foo", "foo-node", map[string]string{}, map[string]string{apps.DefaultDeploymentUniqueLabelKey: "value-1", "something": "else"}), + true, + }, + { + "Same spec, only pod-template-hash label value is different", + generatePodTemplateSpec("foo", "foo-node", map[string]string{}, map[string]string{apps.DefaultDeploymentUniqueLabelKey: "value-1", "something": "else"}), + generatePodTemplateSpec("foo", "foo-node", map[string]string{}, map[string]string{apps.DefaultDeploymentUniqueLabelKey: "value-2", "something": "else"}), + true, + }, + { + "Same spec, the former doesn't have pod-template-hash label", + generatePodTemplateSpec("foo", "foo-node", map[string]string{}, map[string]string{"something": "else"}), + generatePodTemplateSpec("foo", "foo-node", map[string]string{}, map[string]string{apps.DefaultDeploymentUniqueLabelKey: "value-2", "something": "else"}), + true, + }, + { + "Same spec, the label is different, the former doesn't have pod-template-hash label, same number of labels", + generatePodTemplateSpec("foo", "foo-node", map[string]string{}, map[string]string{"something": "else"}), + generatePodTemplateSpec("foo", "foo-node", map[string]string{}, map[string]string{apps.DefaultDeploymentUniqueLabelKey: "value-2"}), + false, + }, + { + "Same spec, the label is different, the latter doesn't have pod-template-hash label, same number of labels", + generatePodTemplateSpec("foo", "foo-node", map[string]string{}, map[string]string{apps.DefaultDeploymentUniqueLabelKey: "value-1"}), + generatePodTemplateSpec("foo", "foo-node", map[string]string{}, map[string]string{"something": "else"}), + false, + }, + { + "Same spec, the label is different, and the pod-template-hash label value is the same", + generatePodTemplateSpec("foo", "foo-node", map[string]string{}, map[string]string{apps.DefaultDeploymentUniqueLabelKey: "value-1"}), + generatePodTemplateSpec("foo", "foo-node", map[string]string{}, map[string]string{apps.DefaultDeploymentUniqueLabelKey: "value-1", "something": "else"}), + false, + }, + { + "Different spec, same labels", + generatePodTemplateSpec("foo", "foo-node", map[string]string{"former": "value"}, map[string]string{apps.DefaultDeploymentUniqueLabelKey: "value-1", "something": "else"}), + generatePodTemplateSpec("foo", "foo-node", map[string]string{"latter": "value"}, map[string]string{apps.DefaultDeploymentUniqueLabelKey: "value-1", "something": "else"}), + false, + }, + { + "Different spec, different pod-template-hash label value", + generatePodTemplateSpec("foo-1", "foo-node", map[string]string{}, map[string]string{apps.DefaultDeploymentUniqueLabelKey: "value-1", "something": "else"}), + generatePodTemplateSpec("foo-2", "foo-node", map[string]string{}, map[string]string{apps.DefaultDeploymentUniqueLabelKey: "value-2", "something": "else"}), + false, + }, + { + "Different spec, the former doesn't have pod-template-hash label", + generatePodTemplateSpec("foo-1", "foo-node-1", map[string]string{}, map[string]string{"something": "else"}), + generatePodTemplateSpec("foo-2", "foo-node-2", map[string]string{}, map[string]string{apps.DefaultDeploymentUniqueLabelKey: "value-2", "something": "else"}), + false, + }, + { + "Different spec, different labels", + generatePodTemplateSpec("foo", "foo-node-1", map[string]string{}, map[string]string{"something": "else"}), + generatePodTemplateSpec("foo", "foo-node-2", map[string]string{}, map[string]string{"nothing": "else"}), + false, + }, + } + + for _, test := range tests { + t.Run(test.Name, func(t *testing.T) { + runTest := func(t1, t2 *v1.PodTemplateSpec, reversed bool) { + reverseString := "" + if reversed { + reverseString = " (reverse order)" + } + // Run + equal := EqualIgnoreHash(t1, t2) + if equal != test.expected { + t.Errorf("%q%s: expected %v", test.Name, reverseString, test.expected) + return + } + if t1.Labels == nil || t2.Labels == nil { + t.Errorf("%q%s: unexpected labels becomes nil", test.Name, reverseString) + } + } + + runTest(&test.former, &test.latter, false) + // Test the same case in reverse order + runTest(&test.latter, &test.former, true) + }) + } +} + +func TestFindNewReplicaSet(t *testing.T) { + now := metav1.Now() + later := metav1.Time{Time: now.Add(time.Minute)} + + deployment := generateDeployment("nginx") + newRS := generateRS(deployment) + newRS.Labels[apps.DefaultDeploymentUniqueLabelKey] = "hash" + newRS.CreationTimestamp = later + + newRSDup := generateRS(deployment) + newRSDup.Labels[apps.DefaultDeploymentUniqueLabelKey] = "different-hash" + newRSDup.CreationTimestamp = now + + oldDeployment := generateDeployment("nginx") + oldDeployment.Spec.Template.Spec.Containers[0].Name = "nginx-old-1" + oldRS := generateRS(oldDeployment) + oldRS.Status.FullyLabeledReplicas = *(oldRS.Spec.Replicas) + + tests := []struct { + Name string + deployment apps.Deployment + rsList []*apps.ReplicaSet + expected *apps.ReplicaSet + }{ + { + Name: "Get new ReplicaSet with the same template as Deployment spec but different pod-template-hash value", + deployment: deployment, + rsList: []*apps.ReplicaSet{&newRS, &oldRS}, + expected: &newRS, + }, + { + Name: "Get the oldest new ReplicaSet when there are more than one ReplicaSet with the same template", + deployment: deployment, + rsList: []*apps.ReplicaSet{&newRS, &oldRS, &newRSDup}, + expected: &newRSDup, + }, + { + Name: "Get nil new ReplicaSet", + deployment: deployment, + rsList: []*apps.ReplicaSet{&oldRS}, + expected: nil, + }, + } + + for _, test := range tests { + t.Run(test.Name, func(t *testing.T) { + if rs := FindNewReplicaSet(&test.deployment, test.rsList); !reflect.DeepEqual(rs, test.expected) { + t.Errorf("In test case %q, expected %#v, got %#v", test.Name, test.expected, rs) + } + }) + } +} + +func TestFindOldReplicaSets(t *testing.T) { + now := metav1.Now() + later := metav1.Time{Time: now.Add(time.Minute)} + before := metav1.Time{Time: now.Add(-time.Minute)} + + deployment := generateDeployment("nginx") + newRS := generateRS(deployment) + *(newRS.Spec.Replicas) = 1 + newRS.Labels[apps.DefaultDeploymentUniqueLabelKey] = "hash" + newRS.CreationTimestamp = later + + newRSDup := generateRS(deployment) + newRSDup.Labels[apps.DefaultDeploymentUniqueLabelKey] = "different-hash" + newRSDup.CreationTimestamp = now + + oldDeployment := generateDeployment("nginx") + oldDeployment.Spec.Template.Spec.Containers[0].Name = "nginx-old-1" + oldRS := generateRS(oldDeployment) + oldRS.Status.FullyLabeledReplicas = *(oldRS.Spec.Replicas) + oldRS.CreationTimestamp = before + + tests := []struct { + Name string + deployment apps.Deployment + rsList []*apps.ReplicaSet + expected []*apps.ReplicaSet + expectedRequire []*apps.ReplicaSet + }{ + { + Name: "Get old ReplicaSets", + deployment: deployment, + rsList: []*apps.ReplicaSet{&newRS, &oldRS}, + expected: []*apps.ReplicaSet{&oldRS}, + expectedRequire: nil, + }, + { + Name: "Get old ReplicaSets with no new ReplicaSet", + deployment: deployment, + rsList: []*apps.ReplicaSet{&oldRS}, + expected: []*apps.ReplicaSet{&oldRS}, + expectedRequire: nil, + }, + { + Name: "Get old ReplicaSets with two new ReplicaSets, only the oldest new ReplicaSet is seen as new ReplicaSet", + deployment: deployment, + rsList: []*apps.ReplicaSet{&oldRS, &newRS, &newRSDup}, + expected: []*apps.ReplicaSet{&oldRS, &newRS}, + expectedRequire: []*apps.ReplicaSet{&newRS}, + }, + { + Name: "Get empty old ReplicaSets", + deployment: deployment, + rsList: []*apps.ReplicaSet{&newRS}, + expected: nil, + expectedRequire: nil, + }, + } + + for _, test := range tests { + t.Run(test.Name, func(t *testing.T) { + requireRS, allRS := FindOldReplicaSets(&test.deployment, test.rsList) + sort.Sort(ReplicaSetsByCreationTimestamp(allRS)) + sort.Sort(ReplicaSetsByCreationTimestamp(test.expected)) + if !reflect.DeepEqual(allRS, test.expected) { + t.Errorf("In test case %q, expected %#v, got %#v", test.Name, test.expected, allRS) + } + // RSs are getting filtered correctly by rs.spec.replicas + if !reflect.DeepEqual(requireRS, test.expectedRequire) { + t.Errorf("In test case %q, expected %#v, got %#v", test.Name, test.expectedRequire, requireRS) + } + }) + } +} + +func TestGetReplicaCountForReplicaSets(t *testing.T) { + rs1 := generateRS(generateDeployment("foo")) + *(rs1.Spec.Replicas) = 1 + rs1.Status.Replicas = 2 + rs2 := generateRS(generateDeployment("bar")) + *(rs2.Spec.Replicas) = 2 + rs2.Status.Replicas = 3 + + tests := []struct { + Name string + sets []*apps.ReplicaSet + expectedCount int32 + expectedActual int32 + }{ + { + "1:2 Replicas", + []*apps.ReplicaSet{&rs1}, + 1, + 2, + }, + { + "3:5 Replicas", + []*apps.ReplicaSet{&rs1, &rs2}, + 3, + 5, + }, + } + + for _, test := range tests { + t.Run(test.Name, func(t *testing.T) { + rs := GetReplicaCountForReplicaSets(test.sets) + if rs != test.expectedCount { + t.Errorf("In test case %s, expectedCount %+v, got %+v", test.Name, test.expectedCount, rs) + } + rs = GetActualReplicaCountForReplicaSets(test.sets) + if rs != test.expectedActual { + t.Errorf("In test case %s, expectedActual %+v, got %+v", test.Name, test.expectedActual, rs) + } + }) + } +} + +func TestResolveFenceposts(t *testing.T) { + tests := []struct { + maxSurge *string + maxUnavailable *string + desired int32 + expectSurge int32 + expectUnavailable int32 + expectError bool + }{ + { + maxSurge: newString("0%"), + maxUnavailable: newString("0%"), + desired: 0, + expectSurge: 0, + expectUnavailable: 1, + expectError: false, + }, + { + maxSurge: newString("39%"), + maxUnavailable: newString("39%"), + desired: 10, + expectSurge: 4, + expectUnavailable: 3, + expectError: false, + }, + { + maxSurge: newString("oops"), + maxUnavailable: newString("39%"), + desired: 10, + expectSurge: 0, + expectUnavailable: 0, + expectError: true, + }, + { + maxSurge: newString("55%"), + maxUnavailable: newString("urg"), + desired: 10, + expectSurge: 0, + expectUnavailable: 0, + expectError: true, + }, + { + maxSurge: nil, + maxUnavailable: newString("39%"), + desired: 10, + expectSurge: 0, + expectUnavailable: 3, + expectError: false, + }, + { + maxSurge: newString("39%"), + maxUnavailable: nil, + desired: 10, + expectSurge: 4, + expectUnavailable: 0, + expectError: false, + }, + { + maxSurge: nil, + maxUnavailable: nil, + desired: 10, + expectSurge: 0, + expectUnavailable: 1, + expectError: false, + }, + } + + for num, test := range tests { + t.Run(fmt.Sprintf("%d", num), func(t *testing.T) { + var maxSurge, maxUnavail *intstr.IntOrString + if test.maxSurge != nil { + surge := intstr.FromString(*test.maxSurge) + maxSurge = &surge + } + if test.maxUnavailable != nil { + unavail := intstr.FromString(*test.maxUnavailable) + maxUnavail = &unavail + } + surge, unavail, err := ResolveFenceposts(maxSurge, maxUnavail, test.desired) + if err != nil && !test.expectError { + t.Errorf("unexpected error %v", err) + } + if err == nil && test.expectError { + t.Error("expected error") + } + if surge != test.expectSurge || unavail != test.expectUnavailable { + t.Errorf("#%v got %v:%v, want %v:%v", num, surge, unavail, test.expectSurge, test.expectUnavailable) + } + }) + } +} + +func newString(s string) *string { + return &s +} + +func TestNewRSNewReplicas(t *testing.T) { + tests := []struct { + Name string + strategyType apps.DeploymentStrategyType + depReplicas int32 + newRSReplicas int32 + maxSurge int + expected int32 + }{ + { + "can not scale up - to newRSReplicas", + apps.RollingUpdateDeploymentStrategyType, + 1, 5, 1, 5, + }, + { + "scale up - to depReplicas", + apps.RollingUpdateDeploymentStrategyType, + 6, 2, 10, 6, + }, + { + "recreate - to depReplicas", + apps.RecreateDeploymentStrategyType, + 3, 1, 1, 3, + }, + } + newDeployment := generateDeployment("nginx") + newRC := generateRS(newDeployment) + rs5 := generateRS(newDeployment) + *(rs5.Spec.Replicas) = 5 + + for _, test := range tests { + t.Run(test.Name, func(t *testing.T) { + *(newDeployment.Spec.Replicas) = test.depReplicas + newDeployment.Spec.Strategy = apps.DeploymentStrategy{Type: test.strategyType} + newDeployment.Spec.Strategy.RollingUpdate = &apps.RollingUpdateDeployment{ + MaxUnavailable: func(i int) *intstr.IntOrString { + x := intstr.FromInt(i) + return &x + }(1), + MaxSurge: func(i int) *intstr.IntOrString { + x := intstr.FromInt(i) + return &x + }(test.maxSurge), + } + *(newRC.Spec.Replicas) = test.newRSReplicas + rs, err := NewRSNewReplicas(&newDeployment, []*apps.ReplicaSet{&rs5}, &newRC) + if err != nil { + t.Errorf("In test case %s, got unexpected error %v", test.Name, err) + } + if rs != test.expected { + t.Errorf("In test case %s, expected %+v, got %+v", test.Name, test.expected, rs) + } + }) + } +} + +var ( + condProgressing = func() apps.DeploymentCondition { + return apps.DeploymentCondition{ + Type: apps.DeploymentProgressing, + Status: v1.ConditionFalse, + Reason: "ForSomeReason", + } + } + + condProgressing2 = func() apps.DeploymentCondition { + return apps.DeploymentCondition{ + Type: apps.DeploymentProgressing, + Status: v1.ConditionTrue, + Reason: "BecauseItIs", + } + } + + condAvailable = func() apps.DeploymentCondition { + return apps.DeploymentCondition{ + Type: apps.DeploymentAvailable, + Status: v1.ConditionTrue, + Reason: "AwesomeController", + } + } + + status = func() *apps.DeploymentStatus { + return &apps.DeploymentStatus{ + Conditions: []apps.DeploymentCondition{condProgressing(), condAvailable()}, + } + } +) + +func TestGetCondition(t *testing.T) { + exampleStatus := status() + + tests := []struct { + name string + + status apps.DeploymentStatus + condType apps.DeploymentConditionType + + expected bool + }{ + { + name: "condition exists", + + status: *exampleStatus, + condType: apps.DeploymentAvailable, + + expected: true, + }, + { + name: "condition does not exist", + + status: *exampleStatus, + condType: apps.DeploymentReplicaFailure, + + expected: false, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + cond := GetDeploymentCondition(test.status, test.condType) + exists := cond != nil + if exists != test.expected { + t.Errorf("%s: expected condition to exist: %t, got: %t", test.name, test.expected, exists) + } + }) + } +} + +func TestSetCondition(t *testing.T) { + tests := []struct { + name string + + status *apps.DeploymentStatus + cond apps.DeploymentCondition + + expectedStatus *apps.DeploymentStatus + }{ + { + name: "set for the first time", + + status: &apps.DeploymentStatus{}, + cond: condAvailable(), + + expectedStatus: &apps.DeploymentStatus{Conditions: []apps.DeploymentCondition{condAvailable()}}, + }, + { + name: "simple set", + + status: &apps.DeploymentStatus{Conditions: []apps.DeploymentCondition{condProgressing()}}, + cond: condAvailable(), + + expectedStatus: status(), + }, + { + name: "overwrite", + + status: &apps.DeploymentStatus{Conditions: []apps.DeploymentCondition{condProgressing()}}, + cond: condProgressing2(), + + expectedStatus: &apps.DeploymentStatus{Conditions: []apps.DeploymentCondition{condProgressing2()}}, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + SetDeploymentCondition(test.status, test.cond) + if !reflect.DeepEqual(test.status, test.expectedStatus) { + t.Errorf("%s: expected status: %v, got: %v", test.name, test.expectedStatus, test.status) + } + }) + } +} + +func TestRemoveCondition(t *testing.T) { + tests := []struct { + name string + + status *apps.DeploymentStatus + condType apps.DeploymentConditionType + + expectedStatus *apps.DeploymentStatus + }{ + { + name: "remove from empty status", + + status: &apps.DeploymentStatus{}, + condType: apps.DeploymentProgressing, + + expectedStatus: &apps.DeploymentStatus{}, + }, + { + name: "simple remove", + + status: &apps.DeploymentStatus{Conditions: []apps.DeploymentCondition{condProgressing()}}, + condType: apps.DeploymentProgressing, + + expectedStatus: &apps.DeploymentStatus{}, + }, + { + name: "doesn't remove anything", + + status: status(), + condType: apps.DeploymentReplicaFailure, + + expectedStatus: status(), + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + RemoveDeploymentCondition(test.status, test.condType) + if !reflect.DeepEqual(test.status, test.expectedStatus) { + t.Errorf("%s: expected status: %v, got: %v", test.name, test.expectedStatus, test.status) + } + }) + } +} + +func TestDeploymentComplete(t *testing.T) { + deployment := func(desired, current, updated, available, maxUnavailable, maxSurge int32) *apps.Deployment { + return &apps.Deployment{ + Spec: apps.DeploymentSpec{ + Replicas: &desired, + Strategy: apps.DeploymentStrategy{ + RollingUpdate: &apps.RollingUpdateDeployment{ + MaxUnavailable: func(i int) *intstr.IntOrString { x := intstr.FromInt(i); return &x }(int(maxUnavailable)), + MaxSurge: func(i int) *intstr.IntOrString { x := intstr.FromInt(i); return &x }(int(maxSurge)), + }, + Type: apps.RollingUpdateDeploymentStrategyType, + }, + }, + Status: apps.DeploymentStatus{ + Replicas: current, + UpdatedReplicas: updated, + AvailableReplicas: available, + }, + } + } + + tests := []struct { + name string + + d *apps.Deployment + + expected bool + }{ + { + name: "not complete: min but not all pods become available", + + d: deployment(5, 5, 5, 4, 1, 0), + expected: false, + }, + { + name: "not complete: min availability is not honored", + + d: deployment(5, 5, 5, 3, 1, 0), + expected: false, + }, + { + name: "complete", + + d: deployment(5, 5, 5, 5, 0, 0), + expected: true, + }, + { + name: "not complete: all pods are available but not updated", + + d: deployment(5, 5, 4, 5, 0, 0), + expected: false, + }, + { + name: "not complete: still running old pods", + + // old replica set: spec.replicas=1, status.replicas=1, status.availableReplicas=1 + // new replica set: spec.replicas=1, status.replicas=1, status.availableReplicas=0 + d: deployment(1, 2, 1, 1, 0, 1), + expected: false, + }, + { + name: "not complete: one replica deployment never comes up", + + d: deployment(1, 1, 1, 0, 1, 1), + expected: false, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + if got, exp := DeploymentComplete(test.d, &test.d.Status), test.expected; got != exp { + t.Errorf("expected complete: %t, got: %t", exp, got) + } + }) + } +} + +func TestDeploymentProgressing(t *testing.T) { + deployment := func(current, updated, ready, available int32) *apps.Deployment { + return &apps.Deployment{ + Status: apps.DeploymentStatus{ + Replicas: current, + UpdatedReplicas: updated, + ReadyReplicas: ready, + AvailableReplicas: available, + }, + } + } + newStatus := func(current, updated, ready, available int32) apps.DeploymentStatus { + return apps.DeploymentStatus{ + Replicas: current, + UpdatedReplicas: updated, + ReadyReplicas: ready, + AvailableReplicas: available, + } + } + + tests := []struct { + name string + + d *apps.Deployment + newStatus apps.DeploymentStatus + + expected bool + }{ + { + name: "progressing: updated pods", + + d: deployment(10, 4, 4, 4), + newStatus: newStatus(10, 6, 4, 4), + + expected: true, + }, + { + name: "not progressing", + + d: deployment(10, 4, 4, 4), + newStatus: newStatus(10, 4, 4, 4), + + expected: false, + }, + { + name: "progressing: old pods removed", + + d: deployment(10, 4, 6, 6), + newStatus: newStatus(8, 4, 6, 6), + + expected: true, + }, + { + name: "not progressing: less new pods", + + d: deployment(10, 7, 3, 3), + newStatus: newStatus(10, 6, 3, 3), + + expected: false, + }, + { + name: "progressing: less overall but more new pods", + + d: deployment(10, 4, 7, 7), + newStatus: newStatus(8, 8, 5, 5), + + expected: true, + }, + { + name: "progressing: more ready pods", + + d: deployment(10, 10, 9, 8), + newStatus: newStatus(10, 10, 10, 8), + + expected: true, + }, + { + name: "progressing: more available pods", + + d: deployment(10, 10, 10, 9), + newStatus: newStatus(10, 10, 10, 10), + + expected: true, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + if got, exp := DeploymentProgressing(test.d, &test.newStatus), test.expected; got != exp { + t.Errorf("expected progressing: %t, got: %t", exp, got) + } + }) + } +} + +func TestDeploymentTimedOut(t *testing.T) { + var ( + null *int32 + ten = int32(10) + infinite = int32(math.MaxInt32) + ) + + timeFn := func(min, sec int) time.Time { + return time.Date(2016, 1, 1, 0, min, sec, 0, time.UTC) + } + deployment := func(condType apps.DeploymentConditionType, status v1.ConditionStatus, reason string, pds *int32, from time.Time) apps.Deployment { + return apps.Deployment{ + Spec: apps.DeploymentSpec{ + ProgressDeadlineSeconds: pds, + }, + Status: apps.DeploymentStatus{ + Conditions: []apps.DeploymentCondition{ + { + Type: condType, + Status: status, + Reason: reason, + LastUpdateTime: metav1.Time{Time: from}, + }, + }, + }, + } + } + + tests := []struct { + name string + + d apps.Deployment + nowFn func() time.Time + + expected bool + }{ + { + name: "nil progressDeadlineSeconds specified - no timeout", + + d: deployment(apps.DeploymentProgressing, v1.ConditionTrue, "", null, timeFn(1, 9)), + nowFn: func() time.Time { return timeFn(1, 20) }, + expected: false, + }, + { + name: "infinite progressDeadlineSeconds specified - no timeout", + + d: deployment(apps.DeploymentProgressing, v1.ConditionTrue, "", &infinite, timeFn(1, 9)), + nowFn: func() time.Time { return timeFn(1, 20) }, + expected: false, + }, + { + name: "progressDeadlineSeconds: 10s, now - started => 00:01:20 - 00:01:09 => 11s", + + d: deployment(apps.DeploymentProgressing, v1.ConditionTrue, "", &ten, timeFn(1, 9)), + nowFn: func() time.Time { return timeFn(1, 20) }, + expected: true, + }, + { + name: "progressDeadlineSeconds: 10s, now - started => 00:01:20 - 00:01:11 => 9s", + + d: deployment(apps.DeploymentProgressing, v1.ConditionTrue, "", &ten, timeFn(1, 11)), + nowFn: func() time.Time { return timeFn(1, 20) }, + expected: false, + }, + { + name: "previous status was a complete deployment", + + d: deployment(apps.DeploymentProgressing, v1.ConditionTrue, NewRSAvailableReason, nil, time.Time{}), + expected: false, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + nowFn = test.nowFn + if got, exp := DeploymentTimedOut(&test.d, &test.d.Status), test.expected; got != exp { + t.Errorf("expected timeout: %t, got: %t", exp, got) + } + }) + } +} + +func TestMaxUnavailable(t *testing.T) { + deployment := func(replicas int32, maxUnavailable intstr.IntOrString) apps.Deployment { + return apps.Deployment{ + Spec: apps.DeploymentSpec{ + Replicas: func(i int32) *int32 { return &i }(replicas), + Strategy: apps.DeploymentStrategy{ + RollingUpdate: &apps.RollingUpdateDeployment{ + MaxSurge: func(i int) *intstr.IntOrString { x := intstr.FromInt(i); return &x }(int(1)), + MaxUnavailable: &maxUnavailable, + }, + Type: apps.RollingUpdateDeploymentStrategyType, + }, + }, + } + } + tests := []struct { + name string + deployment apps.Deployment + expected int32 + }{ + { + name: "maxUnavailable less than replicas", + deployment: deployment(10, intstr.FromInt(5)), + expected: int32(5), + }, + { + name: "maxUnavailable equal replicas", + deployment: deployment(10, intstr.FromInt(10)), + expected: int32(10), + }, + { + name: "maxUnavailable greater than replicas", + deployment: deployment(5, intstr.FromInt(10)), + expected: int32(5), + }, + { + name: "maxUnavailable with replicas is 0", + deployment: deployment(0, intstr.FromInt(10)), + expected: int32(0), + }, + { + name: "maxUnavailable with Recreate deployment strategy", + deployment: apps.Deployment{ + Spec: apps.DeploymentSpec{ + Strategy: apps.DeploymentStrategy{ + Type: apps.RecreateDeploymentStrategyType, + }, + }, + }, + expected: int32(0), + }, + { + name: "maxUnavailable less than replicas with percents", + deployment: deployment(10, intstr.FromString("50%")), + expected: int32(5), + }, + { + name: "maxUnavailable equal replicas with percents", + deployment: deployment(10, intstr.FromString("100%")), + expected: int32(10), + }, + { + name: "maxUnavailable greater than replicas with percents", + deployment: deployment(5, intstr.FromString("100%")), + expected: int32(5), + }, + } + + for _, test := range tests { + t.Log(test.name) + t.Run(test.name, func(t *testing.T) { + maxUnavailable := MaxUnavailable(test.deployment) + if test.expected != maxUnavailable { + t.Fatalf("expected:%v, got:%v", test.expected, maxUnavailable) + } + }) + } +} + +// Set of simple tests for annotation related util functions +func TestAnnotationUtils(t *testing.T) { + + //Setup + tDeployment := generateDeployment("nginx") + tRS := generateRS(tDeployment) + tDeployment.Annotations[RevisionAnnotation] = "1" + + //Test Case 1: Check if anotations are copied properly from deployment to RS + t.Run("SetNewReplicaSetAnnotations", func(t *testing.T) { + //Try to set the increment revision from 11 through 20 + for i := 10; i < 20; i++ { + + nextRevision := fmt.Sprintf("%d", i+1) + SetNewReplicaSetAnnotations(&tDeployment, &tRS, nextRevision, true, 5) + //Now the ReplicaSets Revision Annotation should be i+1 + + if i >= 12 { + expectedHistoryAnnotation := fmt.Sprintf("%d,%d", i-1, i) + if tRS.Annotations[RevisionHistoryAnnotation] != expectedHistoryAnnotation { + t.Errorf("Revision History Expected=%s Obtained=%s", expectedHistoryAnnotation, tRS.Annotations[RevisionHistoryAnnotation]) + } + } + if tRS.Annotations[RevisionAnnotation] != nextRevision { + t.Errorf("Revision Expected=%s Obtained=%s", nextRevision, tRS.Annotations[RevisionAnnotation]) + } + } + }) + + //Test Case 2: Check if annotations are set properly + t.Run("SetReplicasAnnotations", func(t *testing.T) { + updated := SetReplicasAnnotations(&tRS, 10, 11) + if !updated { + t.Errorf("SetReplicasAnnotations() failed") + } + value, ok := tRS.Annotations[DesiredReplicasAnnotation] + if !ok { + t.Errorf("SetReplicasAnnotations did not set DesiredReplicasAnnotation") + } + if value != "10" { + t.Errorf("SetReplicasAnnotations did not set DesiredReplicasAnnotation correctly value=%s", value) + } + if value, ok = tRS.Annotations[MaxReplicasAnnotation]; !ok { + t.Errorf("SetReplicasAnnotations did not set DesiredReplicasAnnotation") + } + if value != "11" { + t.Errorf("SetReplicasAnnotations did not set MaxReplicasAnnotation correctly value=%s", value) + } + }) + + //Test Case 3: Check if annotations reflect deployments state + tRS.Annotations[DesiredReplicasAnnotation] = "1" + tRS.Status.AvailableReplicas = 1 + tRS.Spec.Replicas = new(int32) + *tRS.Spec.Replicas = 1 + + t.Run("IsSaturated", func(t *testing.T) { + saturated := IsSaturated(&tDeployment, &tRS) + if !saturated { + t.Errorf("SetReplicasAnnotations Expected=true Obtained=false") + } + }) + //Tear Down +} + +func TestReplicasAnnotationsNeedUpdate(t *testing.T) { + + desiredReplicas := fmt.Sprintf("%d", int32(10)) + maxReplicas := fmt.Sprintf("%d", int32(20)) + + tests := []struct { + name string + replicaSet *apps.ReplicaSet + expected bool + }{ + { + name: "test Annotations nil", + replicaSet: &apps.ReplicaSet{ + ObjectMeta: metav1.ObjectMeta{Name: "hello", Namespace: "test"}, + Spec: apps.ReplicaSetSpec{ + Selector: &metav1.LabelSelector{MatchLabels: map[string]string{"foo": "bar"}}, + }, + }, + expected: true, + }, + { + name: "test desiredReplicas update", + replicaSet: &apps.ReplicaSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: "hello", + Namespace: "test", + Annotations: map[string]string{DesiredReplicasAnnotation: "8", MaxReplicasAnnotation: maxReplicas}, + }, + Spec: apps.ReplicaSetSpec{ + Selector: &metav1.LabelSelector{MatchLabels: map[string]string{"foo": "bar"}}, + }, + }, + expected: true, + }, + { + name: "test maxReplicas update", + replicaSet: &apps.ReplicaSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: "hello", + Namespace: "test", + Annotations: map[string]string{DesiredReplicasAnnotation: desiredReplicas, MaxReplicasAnnotation: "16"}, + }, + Spec: apps.ReplicaSetSpec{ + Selector: &metav1.LabelSelector{MatchLabels: map[string]string{"foo": "bar"}}, + }, + }, + expected: true, + }, + { + name: "test needn't update", + replicaSet: &apps.ReplicaSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: "hello", + Namespace: "test", + Annotations: map[string]string{DesiredReplicasAnnotation: desiredReplicas, MaxReplicasAnnotation: maxReplicas}, + }, + Spec: apps.ReplicaSetSpec{ + Selector: &metav1.LabelSelector{MatchLabels: map[string]string{"foo": "bar"}}, + }, + }, + expected: false, + }, + } + + for i, test := range tests { + t.Run(test.name, func(t *testing.T) { + result := ReplicasAnnotationsNeedUpdate(test.replicaSet, 10, 20) + if result != test.expected { + t.Errorf("case[%d]:%s Expected %v, Got: %v", i, test.name, test.expected, result) + } + }) + } +} diff --git a/pkg/feature/rollout_features.go b/pkg/feature/rollout_features.go index 3eb96472..d3bde30c 100644 --- a/pkg/feature/rollout_features.go +++ b/pkg/feature/rollout_features.go @@ -13,6 +13,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ + package feature import ( @@ -23,13 +24,15 @@ import ( ) const ( - // PodProbeMarkerGate enable Kruise provide the ability to execute custom Probes. - // Note: custom probe execution requires kruise daemon, so currently only traditional Kubelet is supported, not virtual-kubelet. - RolloutHistoryGate featuregate.Feature = "RolloutHistoryGate" + // RolloutHistoryGate enable recording history for each rolling. + RolloutHistoryGate featuregate.Feature = "RolloutHistory" + // AdvancedDeploymentGate enable advanced deployment controller. + AdvancedDeploymentGate featuregate.Feature = "AdvancedDeployment" ) var defaultFeatureGates = map[featuregate.Feature]featuregate.FeatureSpec{ - RolloutHistoryGate: {Default: false, PreRelease: featuregate.Alpha}, + RolloutHistoryGate: {Default: false, PreRelease: featuregate.Alpha}, + AdvancedDeploymentGate: {Default: false, PreRelease: featuregate.Alpha}, } func init() { diff --git a/pkg/util/client/client.go b/pkg/util/client/client.go index a81c6447..8e68488f 100644 --- a/pkg/util/client/client.go +++ b/pkg/util/client/client.go @@ -17,6 +17,8 @@ limitations under the License. package client import ( + "fmt" + kruiseclientset "github.com/openkruise/kruise-api/client/clientset/versioned" "k8s.io/apimachinery/pkg/runtime" "k8s.io/client-go/discovery" @@ -75,3 +77,14 @@ func NewRegistry(c *rest.Config) error { func GetGenericClient() *GenericClientset { return defaultGenericClient } + +// GetGenericClientWithName returns clientset with given name as user-agent +func GetGenericClientWithName(name string) *GenericClientset { + if cfg == nil { + return nil + } + newCfg := *cfg + newCfg.UserAgent = fmt.Sprintf("%s/%s", cfg.UserAgent, name) + clientset, _ := newForConfig(cfg) + return clientset +} diff --git a/pkg/util/labels/labels.go b/pkg/util/labels/labels.go new file mode 100644 index 00000000..0ce48cfb --- /dev/null +++ b/pkg/util/labels/labels.go @@ -0,0 +1,124 @@ +/* +Copyright 2016 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package labels + +import ( + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +// Clones the given map and returns a new map with the given key and value added. +// Returns the given map, if labelKey is empty. +func CloneAndAddLabel(labels map[string]string, labelKey, labelValue string) map[string]string { + if labelKey == "" { + // Don't need to add a label. + return labels + } + // Clone. + newLabels := map[string]string{} + for key, value := range labels { + newLabels[key] = value + } + newLabels[labelKey] = labelValue + return newLabels +} + +// CloneAndRemoveLabel clones the given map and returns a new map with the given key removed. +// Returns the given map, if labelKey is empty. +func CloneAndRemoveLabel(labels map[string]string, labelKey string) map[string]string { + if labelKey == "" { + // Don't need to add a label. + return labels + } + // Clone. + newLabels := map[string]string{} + for key, value := range labels { + newLabels[key] = value + } + delete(newLabels, labelKey) + return newLabels +} + +// AddLabel returns a map with the given key and value added to the given map. +func AddLabel(labels map[string]string, labelKey, labelValue string) map[string]string { + if labelKey == "" { + // Don't need to add a label. + return labels + } + if labels == nil { + labels = make(map[string]string) + } + labels[labelKey] = labelValue + return labels +} + +// Clones the given selector and returns a new selector with the given key and value added. +// Returns the given selector, if labelKey is empty. +func CloneSelectorAndAddLabel(selector *metav1.LabelSelector, labelKey, labelValue string) *metav1.LabelSelector { + if labelKey == "" { + // Don't need to add a label. + return selector + } + + // Clone. + newSelector := new(metav1.LabelSelector) + + // TODO(madhusudancs): Check if you can use deepCopy_extensions_LabelSelector here. + newSelector.MatchLabels = make(map[string]string) + if selector.MatchLabels != nil { + for key, val := range selector.MatchLabels { + newSelector.MatchLabels[key] = val + } + } + newSelector.MatchLabels[labelKey] = labelValue + + if selector.MatchExpressions != nil { + newMExps := make([]metav1.LabelSelectorRequirement, len(selector.MatchExpressions)) + for i, me := range selector.MatchExpressions { + newMExps[i].Key = me.Key + newMExps[i].Operator = me.Operator + if me.Values != nil { + newMExps[i].Values = make([]string, len(me.Values)) + copy(newMExps[i].Values, me.Values) + } else { + newMExps[i].Values = nil + } + } + newSelector.MatchExpressions = newMExps + } else { + newSelector.MatchExpressions = nil + } + + return newSelector +} + +// AddLabelToSelector returns a selector with the given key and value added to the given selector's MatchLabels. +func AddLabelToSelector(selector *metav1.LabelSelector, labelKey, labelValue string) *metav1.LabelSelector { + if labelKey == "" { + // Don't need to add a label. + return selector + } + if selector.MatchLabels == nil { + selector.MatchLabels = make(map[string]string) + } + selector.MatchLabels[labelKey] = labelValue + return selector +} + +// SelectorHasLabel checks if the given selector contains the given label key in its MatchLabels +func SelectorHasLabel(selector *metav1.LabelSelector, labelKey string) bool { + return len(selector.MatchLabels[labelKey]) > 0 +} diff --git a/pkg/util/labels/labels_test.go b/pkg/util/labels/labels_test.go new file mode 100644 index 00000000..029977a6 --- /dev/null +++ b/pkg/util/labels/labels_test.go @@ -0,0 +1,211 @@ +/* +Copyright 2016 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package labels + +import ( + "reflect" + "testing" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +func TestCloneAndAddLabel(t *testing.T) { + labels := map[string]string{ + "foo1": "bar1", + "foo2": "bar2", + "foo3": "bar3", + } + + cases := []struct { + labels map[string]string + labelKey string + labelValue string + want map[string]string + }{ + { + labels: labels, + want: labels, + }, + { + labels: labels, + labelKey: "foo4", + labelValue: "42", + want: map[string]string{ + "foo1": "bar1", + "foo2": "bar2", + "foo3": "bar3", + "foo4": "42", + }, + }, + } + + for _, tc := range cases { + got := CloneAndAddLabel(tc.labels, tc.labelKey, tc.labelValue) + if !reflect.DeepEqual(got, tc.want) { + t.Errorf("[Add] got %v, want %v", got, tc.want) + } + // now test the inverse. + got_rm := CloneAndRemoveLabel(got, tc.labelKey) + if !reflect.DeepEqual(got_rm, tc.labels) { + t.Errorf("[RM] got %v, want %v", got_rm, tc.labels) + } + } +} + +func TestAddLabel(t *testing.T) { + labels := map[string]string{ + "foo1": "bar1", + "foo2": "bar2", + "foo3": "bar3", + } + + cases := []struct { + labels map[string]string + labelKey string + labelValue string + want map[string]string + }{ + { + labels: labels, + want: labels, + }, + { + labels: labels, + labelKey: "foo4", + labelValue: "food", + want: map[string]string{ + "foo1": "bar1", + "foo2": "bar2", + "foo3": "bar3", + "foo4": "food", + }, + }, + { + labels: nil, + labelKey: "foo4", + labelValue: "food", + want: map[string]string{ + "foo4": "food", + }, + }, + } + + for _, tc := range cases { + got := AddLabel(tc.labels, tc.labelKey, tc.labelValue) + if !reflect.DeepEqual(got, tc.want) { + t.Errorf("got %v, want %v", got, tc.want) + } + } +} + +func TestCloneSelectorAndAddLabel(t *testing.T) { + labels := map[string]string{ + "foo1": "bar1", + "foo2": "bar2", + "foo3": "bar3", + } + + cases := []struct { + labels map[string]string + labelKey string + labelValue string + want map[string]string + }{ + { + labels: labels, + want: labels, + }, + { + labels: labels, + labelKey: "foo4", + labelValue: "89", + want: map[string]string{ + "foo1": "bar1", + "foo2": "bar2", + "foo3": "bar3", + "foo4": "89", + }, + }, + { + labels: nil, + labelKey: "foo4", + labelValue: "12", + want: map[string]string{ + "foo4": "12", + }, + }, + } + + for _, tc := range cases { + ls_in := metav1.LabelSelector{MatchLabels: tc.labels} + ls_out := metav1.LabelSelector{MatchLabels: tc.want} + + got := CloneSelectorAndAddLabel(&ls_in, tc.labelKey, tc.labelValue) + if !reflect.DeepEqual(got, &ls_out) { + t.Errorf("got %v, want %v", got, tc.want) + } + } +} + +func TestAddLabelToSelector(t *testing.T) { + labels := map[string]string{ + "foo1": "bar1", + "foo2": "bar2", + "foo3": "bar3", + } + + cases := []struct { + labels map[string]string + labelKey string + labelValue string + want map[string]string + }{ + { + labels: labels, + want: labels, + }, + { + labels: labels, + labelKey: "foo4", + labelValue: "89", + want: map[string]string{ + "foo1": "bar1", + "foo2": "bar2", + "foo3": "bar3", + "foo4": "89", + }, + }, + { + labels: nil, + labelKey: "foo4", + labelValue: "12", + want: map[string]string{ + "foo4": "12", + }, + }, + } + + for _, tc := range cases { + ls_in := metav1.LabelSelector{MatchLabels: tc.labels} + ls_out := metav1.LabelSelector{MatchLabels: tc.want} + + got := AddLabelToSelector(&ls_in, tc.labelKey, tc.labelValue) + if !reflect.DeepEqual(got, &ls_out) { + t.Errorf("got %v, want %v", got, tc.want) + } + } +} diff --git a/pkg/util/ratelimiter/rate_limiter.go b/pkg/util/ratelimiter/rate_limiter.go new file mode 100644 index 00000000..dc032d74 --- /dev/null +++ b/pkg/util/ratelimiter/rate_limiter.go @@ -0,0 +1,26 @@ +package ratelimiter + +import ( + "flag" + "time" + + "golang.org/x/time/rate" + "k8s.io/client-go/util/workqueue" +) + +func init() { + flag.DurationVar(&baseDelay, "rate-limiter-base-delay", time.Millisecond*5, "The base delay for rate limiter. Defaults 5ms") + flag.DurationVar(&maxDelay, "rate-limiter-max-delay", time.Second*1000, "The max delay for rate limiter. Defaults 1000s") + flag.IntVar(&qps, "rate-limiter-qps", 10, "The qps for rate limier. Defaults 10") + flag.IntVar(&bucketSize, "rate-limiter-bucket-size", 100, "The bucket size for rate limier. Defaults 100") +} + +var baseDelay, maxDelay time.Duration +var qps, bucketSize int + +func DefaultControllerRateLimiter() workqueue.RateLimiter { + return workqueue.NewMaxOfRateLimiter( + workqueue.NewItemExponentialFailureRateLimiter(baseDelay, maxDelay), + &workqueue.BucketRateLimiter{Limiter: rate.NewLimiter(rate.Limit(qps), bucketSize)}, + ) +}