Skip to content

Commit

Permalink
Fix issue where a replicaset name collision could cause hot loop
Browse files Browse the repository at this point in the history
  • Loading branch information
jessesuen committed Oct 29, 2019
1 parent 30cf6ab commit f9c8d94
Show file tree
Hide file tree
Showing 3 changed files with 92 additions and 33 deletions.
22 changes: 11 additions & 11 deletions experiments/experiment.go
Original file line number Diff line number Diff line change
Expand Up @@ -107,11 +107,7 @@ func (ec *experimentContext) reconcileTemplate(template v1alpha1.TemplateSpec) {
rs := ec.templateRSs[template.Name]
if rs == nil {
// Create the ReplicaSet if necessary
if templateStatus.Status.Completed() {
// do nothing (not even pollute the logs)
} else if ec.isTerminating {
logCtx.Info("Skipping ReplicaSet creation: experiment is terminating")
} else {
if desiredReplicaCount > 0 {
newRS, err := ec.createReplicaSet(template, templateStatus.CollisionCount)
if err != nil {
logCtx.Warnf("Failed to create ReplicaSet: %v", err)
Expand All @@ -123,19 +119,23 @@ func (ec *experimentContext) reconcileTemplate(template v1alpha1.TemplateSpec) {
if newRS != nil {
ec.templateRSs[template.Name] = newRS
templateStatus.LastTransitionTime = &now
rs = newRS
}
}
templateStatus.Replicas = 0
templateStatus.UpdatedReplicas = 0
templateStatus.ReadyReplicas = 0
templateStatus.AvailableReplicas = 0
} else {
// If we get here, replicaset exists. We need to ensure it's scaled properly based on
// termination, or changed replica count
// Replicaset exists. We ensure it is scaled properly based on termination, or changed replica count
if *rs.Spec.Replicas != desiredReplicaCount {
ec.scaleReplicaSetAndRecordEvent(rs, desiredReplicaCount)
templateStatus.LastTransitionTime = &now
}
}

if rs == nil {
templateStatus.Replicas = 0
templateStatus.UpdatedReplicas = 0
templateStatus.ReadyReplicas = 0
templateStatus.AvailableReplicas = 0
} else {
templateStatus.Replicas = replicasetutil.GetActualReplicaCountForReplicaSets([]*appsv1.ReplicaSet{rs})
templateStatus.UpdatedReplicas = replicasetutil.GetActualReplicaCountForReplicaSets([]*appsv1.ReplicaSet{rs})
templateStatus.ReadyReplicas = replicasetutil.GetReadyReplicaCountForReplicaSets([]*appsv1.ReplicaSet{rs})
Expand Down
67 changes: 45 additions & 22 deletions experiments/replicaset.go
Original file line number Diff line number Diff line change
Expand Up @@ -89,26 +89,7 @@ func (c *ExperimentController) getReplicaSetsForExperiment(experiment *v1alpha1.

// createReplicaSet creates a new replicaset based on the template
func (ec *experimentContext) createReplicaSet(template v1alpha1.TemplateSpec, collisionCount *int32) (*appsv1.ReplicaSet, error) {
newRSTemplate := *template.Template.DeepCopy()
// The labels must be different for each template because labels are used to match replicasets
// to templates. We inject the experiment and template name in the replicaset labels to ensure
// uniqueness.
replicaSetlabels := newReplicaSetLabels(ec.ex.Name, template.Name)
podTemplateSpecHash := controller.ComputeHash(&newRSTemplate, collisionCount)
newRS := appsv1.ReplicaSet{
ObjectMeta: metav1.ObjectMeta{
Name: fmt.Sprintf("%s-%s-%s", ec.ex.Name, template.Name, podTemplateSpecHash),
Namespace: ec.ex.Namespace,
OwnerReferences: []metav1.OwnerReference{*metav1.NewControllerRef(ec.ex, controllerKind)},
Labels: replicaSetlabels,
},
Spec: appsv1.ReplicaSetSpec{
Replicas: new(int32),
MinReadySeconds: template.MinReadySeconds,
Selector: template.Selector,
Template: newRSTemplate,
},
}
newRS := newReplicaSetFromTemplate(ec.ex, template, collisionCount)

newReplicasCount := experimentutil.CalculateTemplateReplicasCount(ec.ex, template)
*(newRS.Spec.Replicas) = newReplicasCount
Expand All @@ -133,10 +114,13 @@ func (ec *experimentContext) createReplicaSet(template v1alpha1.TemplateSpec, co
// deep equal to the PodTemplateSpec of the Experiment, it's the Experiment's new ReplicaSet.
// Otherwise, this is a hash collision and we need to increment the collisionCount field in
// the status of the Experiment and requeue to try the creation in the next sync.
controllerRef := metav1.GetControllerOf(rs)
if controllerRef != nil && controllerRef.UID == ec.ex.UID && replicasetutil.PodTemplateEqualIgnoreHash(&rs.Spec.Template, &template.Template) {
if ec.isReplicaSetSemanticallyEqual(&newRS, rs) {
// NOTE: it should be impossible to get here, because the isReplicaSetSemanticallyEqual()
// helper is stricter than the the replicaset claim logic that builds up ec.templateRSss
// at the start of reconciliation
createdRS = rs
err = nil
ec.log.Warnf("Claimed existing ReplicaSet %s with equivalent template spec", createdRS.Name)
break
}

Expand Down Expand Up @@ -183,6 +167,45 @@ func (ec *experimentContext) createReplicaSet(template v1alpha1.TemplateSpec, co
return createdRS, nil
}

// newReplicaSetFromTemplate is a helper to formulate a replicaset from an experiment's template
func newReplicaSetFromTemplate(experiment *v1alpha1.Experiment, template v1alpha1.TemplateSpec, collisionCount *int32) appsv1.ReplicaSet {
newRSTemplate := *template.Template.DeepCopy()
// The labels must be different for each template because labels are used to match replicasets
// to templates. We inject the experiment and template name in the replicaset labels to ensure
// uniqueness.
replicaSetlabels := newReplicaSetLabels(experiment.Name, template.Name)
podTemplateSpecHash := controller.ComputeHash(&newRSTemplate, collisionCount)
return appsv1.ReplicaSet{
ObjectMeta: metav1.ObjectMeta{
Name: fmt.Sprintf("%s-%s-%s", experiment.Name, template.Name, podTemplateSpecHash),
Namespace: experiment.Namespace,
OwnerReferences: []metav1.OwnerReference{*metav1.NewControllerRef(experiment, controllerKind)},
Labels: replicaSetlabels,
},
Spec: appsv1.ReplicaSetSpec{
Replicas: new(int32),
MinReadySeconds: template.MinReadySeconds,
Selector: template.Selector,
Template: newRSTemplate,
},
}
}

// isReplicaSetSemanticallyEqual checks to see if an existing ReplicaSet is semantically equal
// to the ReplicaSet we are trying to create
func (ec *experimentContext) isReplicaSetSemanticallyEqual(newRS, existingRS *appsv1.ReplicaSet) bool {
controllerRef := metav1.GetControllerOf(existingRS)
podTemplatesEqual := replicasetutil.PodTemplateEqualIgnoreHash(&existingRS.Spec.Template, &newRS.Spec.Template)
existingLabels := existingRS.GetLabels()
newLabels := newRS.GetLabels()
return controllerRef != nil &&
controllerRef.UID == ec.ex.UID &&
podTemplatesEqual &&
existingLabels != nil &&
existingLabels[ExperimentNameLabelKey] == newLabels[ExperimentNameLabelKey] &&
existingLabels[ExperimentTemplateNameLabelKey] == newLabels[ExperimentTemplateNameLabelKey]
}

func (ec *experimentContext) scaleReplicaSetAndRecordEvent(rs *appsv1.ReplicaSet, newScale int32) (bool, *appsv1.ReplicaSet, error) {
// No need to scale
if *(rs.Spec.Replicas) == newScale {
Expand Down
36 changes: 36 additions & 0 deletions experiments/replicaset_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -156,3 +156,39 @@ func TestNameCollision(t *testing.T) {
validatePatch(t, patch, "", NoChange, templateStatuses, cond)
}
}

// TestNameCollisionWithEquivalentPodTemplateAndControllerUID verifies we consider the labels of the
// replicaset when encountering name collisions
func TestNameCollisionWithEquivalentPodTemplateAndControllerUID(t *testing.T) {
templates := generateTemplates("bar")
e := newExperiment("foo", templates, nil)
e.Status.Status = v1alpha1.AnalysisStatusPending

rs := templateToRS(e, templates[0], 0)
rs.ObjectMeta.Labels[ExperimentTemplateNameLabelKey] = "something-different" // change this to something different

f := newFixture(t, e, rs)
defer f.Close()

f.expectCreateReplicaSetAction(rs)
collisionCountPatchIndex := f.expectPatchExperimentAction(e) // update collision count
statusUpdatePatchIndex := f.expectPatchExperimentAction(e) // updates status
f.run(getKey(e, t))

{
patch := f.getPatchedExperiment(collisionCountPatchIndex)
templateStatuses := []v1alpha1.TemplateStatus{
generateTemplatesStatus("bar", 0, 0, "", nil),
}
templateStatuses[0].CollisionCount = pointer.Int32Ptr(1)
validatePatch(t, patch, "", NoChange, templateStatuses, nil)
}
{
patch := f.getPatchedExperiment(statusUpdatePatchIndex)
templateStatuses := []v1alpha1.TemplateStatus{
generateTemplatesStatus("bar", 0, 0, v1alpha1.TemplateStatusProgressing, nil),
}
cond := []v1alpha1.ExperimentCondition{*newCondition(conditions.ReplicaSetUpdatedReason, e)}
validatePatch(t, patch, "", NoChange, templateStatuses, cond)
}
}

0 comments on commit f9c8d94

Please sign in to comment.