From 2d0cfe2f26641dcedf0c7b864b7fce8a7954776d Mon Sep 17 00:00:00 2001 From: Kenny Leung Date: Wed, 30 May 2018 14:10:06 -0700 Subject: [PATCH] Fix a race condition where machineset reconciles too quickly The race condition is where the machineset reconciles on the same key too quickly, where the creation/deletion of machines is not detected by the second reconcilation, causing it create/delete additional machines. I attempted to use WaitForCacheSync, but that is also insufficient in preventing the race condition. The fix here is to add 1 second sleep before releasing the mutex lock when reconciling, which gives the system a chance to recognize the changes made from the first reconciliation. Issue #245 was created to improve this hacky fix. --- pkg/controller/machineset/controller.go | 5 +++++ pkg/controller/machineset/reconcile_test.go | 1 + 2 files changed, 6 insertions(+) diff --git a/pkg/controller/machineset/controller.go b/pkg/controller/machineset/controller.go index bcb54e5c01f8..9841a64460cb 100644 --- a/pkg/controller/machineset/controller.go +++ b/pkg/controller/machineset/controller.go @@ -39,6 +39,10 @@ import ( // controllerKind contains the schema.GroupVersionKind for this controller type. var controllerKind = v1alpha1.SchemeGroupVersion.WithKind("MachineSet") +// reconcileMutexSleepSec is the duration to sleep before releasing the mutex lock that is held for reconcilation. +// See https://github.com/kubernetes-sigs/cluster-api/issues/245 +var reconcileMutexSleepSec = time.Second + // +controller:group=cluster,version=v1alpha1,kind=MachineSet,resource=machinesets type MachineSetControllerImpl struct { builders.DefaultControllerFns @@ -118,6 +122,7 @@ func (c *MachineSetControllerImpl) Reconcile(machineSet *v1alpha1.MachineSet) er mux := c.msKeyMuxMap[key] mux.Lock() defer mux.Unlock() + defer time.Sleep(reconcileMutexSleepSec) glog.V(4).Infof("Reconcile machineset %v", machineSet.Name) allMachines, err := c.machineLister.Machines(machineSet.Namespace).List(labels.Everything()) diff --git a/pkg/controller/machineset/reconcile_test.go b/pkg/controller/machineset/reconcile_test.go index 46a5f60b07e7..af8ad76326b5 100644 --- a/pkg/controller/machineset/reconcile_test.go +++ b/pkg/controller/machineset/reconcile_test.go @@ -132,6 +132,7 @@ func TestMachineSetControllerReconcileHandler(t *testing.T) { }, } + reconcileMutexSleepSec = 0 for _, test := range tests { t.Run(test.name, func(t *testing.T) { // setup the test scenario