From f9c8d949610f7e74470ffe68500cf3dde243938d Mon Sep 17 00:00:00 2001 From: Jesse Suen Date: Tue, 29 Oct 2019 00:32:05 -0700 Subject: [PATCH] Fix issue where a replicaset name collision could cause hot loop --- experiments/experiment.go | 22 +++++------ experiments/replicaset.go | 67 +++++++++++++++++++++++----------- experiments/replicaset_test.go | 36 ++++++++++++++++++ 3 files changed, 92 insertions(+), 33 deletions(-) diff --git a/experiments/experiment.go b/experiments/experiment.go index faed9c02e5..de7c587eed 100644 --- a/experiments/experiment.go +++ b/experiments/experiment.go @@ -107,11 +107,7 @@ func (ec *experimentContext) reconcileTemplate(template v1alpha1.TemplateSpec) { rs := ec.templateRSs[template.Name] if rs == nil { // Create the ReplicaSet if necessary - if templateStatus.Status.Completed() { - // do nothing (not even pollute the logs) - } else if ec.isTerminating { - logCtx.Info("Skipping ReplicaSet creation: experiment is terminating") - } else { + if desiredReplicaCount > 0 { newRS, err := ec.createReplicaSet(template, templateStatus.CollisionCount) if err != nil { logCtx.Warnf("Failed to create ReplicaSet: %v", err) @@ -123,19 +119,23 @@ func (ec *experimentContext) reconcileTemplate(template v1alpha1.TemplateSpec) { if newRS != nil { ec.templateRSs[template.Name] = newRS templateStatus.LastTransitionTime = &now + rs = newRS } } - templateStatus.Replicas = 0 - templateStatus.UpdatedReplicas = 0 - templateStatus.ReadyReplicas = 0 - templateStatus.AvailableReplicas = 0 } else { - // If we get here, replicaset exists. We need to ensure it's scaled properly based on - // termination, or changed replica count + // Replicaset exists. We ensure it is scaled properly based on termination, or changed replica count if *rs.Spec.Replicas != desiredReplicaCount { ec.scaleReplicaSetAndRecordEvent(rs, desiredReplicaCount) templateStatus.LastTransitionTime = &now } + } + + if rs == nil { + templateStatus.Replicas = 0 + templateStatus.UpdatedReplicas = 0 + templateStatus.ReadyReplicas = 0 + templateStatus.AvailableReplicas = 0 + } else { templateStatus.Replicas = replicasetutil.GetActualReplicaCountForReplicaSets([]*appsv1.ReplicaSet{rs}) templateStatus.UpdatedReplicas = replicasetutil.GetActualReplicaCountForReplicaSets([]*appsv1.ReplicaSet{rs}) templateStatus.ReadyReplicas = replicasetutil.GetReadyReplicaCountForReplicaSets([]*appsv1.ReplicaSet{rs}) diff --git a/experiments/replicaset.go b/experiments/replicaset.go index b76afa8df6..8afbb0c08e 100644 --- a/experiments/replicaset.go +++ b/experiments/replicaset.go @@ -89,26 +89,7 @@ func (c *ExperimentController) getReplicaSetsForExperiment(experiment *v1alpha1. // createReplicaSet creates a new replicaset based on the template func (ec *experimentContext) createReplicaSet(template v1alpha1.TemplateSpec, collisionCount *int32) (*appsv1.ReplicaSet, error) { - newRSTemplate := *template.Template.DeepCopy() - // The labels must be different for each template because labels are used to match replicasets - // to templates. We inject the experiment and template name in the replicaset labels to ensure - // uniqueness. - replicaSetlabels := newReplicaSetLabels(ec.ex.Name, template.Name) - podTemplateSpecHash := controller.ComputeHash(&newRSTemplate, collisionCount) - newRS := appsv1.ReplicaSet{ - ObjectMeta: metav1.ObjectMeta{ - Name: fmt.Sprintf("%s-%s-%s", ec.ex.Name, template.Name, podTemplateSpecHash), - Namespace: ec.ex.Namespace, - OwnerReferences: []metav1.OwnerReference{*metav1.NewControllerRef(ec.ex, controllerKind)}, - Labels: replicaSetlabels, - }, - Spec: appsv1.ReplicaSetSpec{ - Replicas: new(int32), - MinReadySeconds: template.MinReadySeconds, - Selector: template.Selector, - Template: newRSTemplate, - }, - } + newRS := newReplicaSetFromTemplate(ec.ex, template, collisionCount) newReplicasCount := experimentutil.CalculateTemplateReplicasCount(ec.ex, template) *(newRS.Spec.Replicas) = newReplicasCount @@ -133,10 +114,13 @@ func (ec *experimentContext) createReplicaSet(template v1alpha1.TemplateSpec, co // deep equal to the PodTemplateSpec of the Experiment, it's the Experiment's new ReplicaSet. // Otherwise, this is a hash collision and we need to increment the collisionCount field in // the status of the Experiment and requeue to try the creation in the next sync. - controllerRef := metav1.GetControllerOf(rs) - if controllerRef != nil && controllerRef.UID == ec.ex.UID && replicasetutil.PodTemplateEqualIgnoreHash(&rs.Spec.Template, &template.Template) { + if ec.isReplicaSetSemanticallyEqual(&newRS, rs) { + // NOTE: it should be impossible to get here, because the isReplicaSetSemanticallyEqual() + // helper is stricter than the the replicaset claim logic that builds up ec.templateRSss + // at the start of reconciliation createdRS = rs err = nil + ec.log.Warnf("Claimed existing ReplicaSet %s with equivalent template spec", createdRS.Name) break } @@ -183,6 +167,45 @@ func (ec *experimentContext) createReplicaSet(template v1alpha1.TemplateSpec, co return createdRS, nil } +// newReplicaSetFromTemplate is a helper to formulate a replicaset from an experiment's template +func newReplicaSetFromTemplate(experiment *v1alpha1.Experiment, template v1alpha1.TemplateSpec, collisionCount *int32) appsv1.ReplicaSet { + newRSTemplate := *template.Template.DeepCopy() + // The labels must be different for each template because labels are used to match replicasets + // to templates. We inject the experiment and template name in the replicaset labels to ensure + // uniqueness. + replicaSetlabels := newReplicaSetLabels(experiment.Name, template.Name) + podTemplateSpecHash := controller.ComputeHash(&newRSTemplate, collisionCount) + return appsv1.ReplicaSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: fmt.Sprintf("%s-%s-%s", experiment.Name, template.Name, podTemplateSpecHash), + Namespace: experiment.Namespace, + OwnerReferences: []metav1.OwnerReference{*metav1.NewControllerRef(experiment, controllerKind)}, + Labels: replicaSetlabels, + }, + Spec: appsv1.ReplicaSetSpec{ + Replicas: new(int32), + MinReadySeconds: template.MinReadySeconds, + Selector: template.Selector, + Template: newRSTemplate, + }, + } +} + +// isReplicaSetSemanticallyEqual checks to see if an existing ReplicaSet is semantically equal +// to the ReplicaSet we are trying to create +func (ec *experimentContext) isReplicaSetSemanticallyEqual(newRS, existingRS *appsv1.ReplicaSet) bool { + controllerRef := metav1.GetControllerOf(existingRS) + podTemplatesEqual := replicasetutil.PodTemplateEqualIgnoreHash(&existingRS.Spec.Template, &newRS.Spec.Template) + existingLabels := existingRS.GetLabels() + newLabels := newRS.GetLabels() + return controllerRef != nil && + controllerRef.UID == ec.ex.UID && + podTemplatesEqual && + existingLabels != nil && + existingLabels[ExperimentNameLabelKey] == newLabels[ExperimentNameLabelKey] && + existingLabels[ExperimentTemplateNameLabelKey] == newLabels[ExperimentTemplateNameLabelKey] +} + func (ec *experimentContext) scaleReplicaSetAndRecordEvent(rs *appsv1.ReplicaSet, newScale int32) (bool, *appsv1.ReplicaSet, error) { // No need to scale if *(rs.Spec.Replicas) == newScale { diff --git a/experiments/replicaset_test.go b/experiments/replicaset_test.go index 7a9640a1b8..94d56e9b51 100644 --- a/experiments/replicaset_test.go +++ b/experiments/replicaset_test.go @@ -156,3 +156,39 @@ func TestNameCollision(t *testing.T) { validatePatch(t, patch, "", NoChange, templateStatuses, cond) } } + +// TestNameCollisionWithEquivalentPodTemplateAndControllerUID verifies we consider the labels of the +// replicaset when encountering name collisions +func TestNameCollisionWithEquivalentPodTemplateAndControllerUID(t *testing.T) { + templates := generateTemplates("bar") + e := newExperiment("foo", templates, nil) + e.Status.Status = v1alpha1.AnalysisStatusPending + + rs := templateToRS(e, templates[0], 0) + rs.ObjectMeta.Labels[ExperimentTemplateNameLabelKey] = "something-different" // change this to something different + + f := newFixture(t, e, rs) + defer f.Close() + + f.expectCreateReplicaSetAction(rs) + collisionCountPatchIndex := f.expectPatchExperimentAction(e) // update collision count + statusUpdatePatchIndex := f.expectPatchExperimentAction(e) // updates status + f.run(getKey(e, t)) + + { + patch := f.getPatchedExperiment(collisionCountPatchIndex) + templateStatuses := []v1alpha1.TemplateStatus{ + generateTemplatesStatus("bar", 0, 0, "", nil), + } + templateStatuses[0].CollisionCount = pointer.Int32Ptr(1) + validatePatch(t, patch, "", NoChange, templateStatuses, nil) + } + { + patch := f.getPatchedExperiment(statusUpdatePatchIndex) + templateStatuses := []v1alpha1.TemplateStatus{ + generateTemplatesStatus("bar", 0, 0, v1alpha1.TemplateStatusProgressing, nil), + } + cond := []v1alpha1.ExperimentCondition{*newCondition(conditions.ReplicaSetUpdatedReason, e)} + validatePatch(t, patch, "", NoChange, templateStatuses, cond) + } +}