From f2c22fa024350340aaa838a219622a9fdbbd4b47 Mon Sep 17 00:00:00 2001
From: Himanshu Sharma
Date: Wed, 22 Nov 2023 19:16:08 +0530
Subject: [PATCH 01/22] preliminary changes
---
pkg/util/provider/machinecontroller/controller.go | 11 +++++------
pkg/util/provider/machinecontroller/machine.go | 14 +++++++-------
2 files changed, 12 insertions(+), 13 deletions(-)
diff --git a/pkg/util/provider/machinecontroller/controller.go b/pkg/util/provider/machinecontroller/controller.go
index a9ff3c941..863d65961 100644
--- a/pkg/util/provider/machinecontroller/controller.go
+++ b/pkg/util/provider/machinecontroller/controller.go
@@ -135,7 +135,6 @@ func NewController(
controller.pvcLister = pvcInformer.Lister()
controller.pvLister = pvInformer.Lister()
controller.secretLister = secretInformer.Lister()
- // TODO: Need to handle K8s versions below 1.13 differently
controller.volumeAttachementLister = volumeAttachmentInformer.Lister()
controller.machineClassLister = machineClassInformer.Lister()
controller.nodeLister = nodeInformer.Lister()
@@ -158,7 +157,7 @@ func NewController(
controller.pdbV1beta1Synced = pdbV1beta1Informer.Informer().HasSynced
}
- // Secret Controller Informers
+ // Secret Controller's Informers
secretInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
AddFunc: controller.secretAdd,
DeleteFunc: controller.secretDelete,
@@ -170,7 +169,7 @@ func NewController(
DeleteFunc: controller.machineClassToSecretDelete,
})
- // Machine Class Controller Informers
+ // Machine Class Controller's Informers
machineInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
AddFunc: controller.machineToMachineClassAdd,
UpdateFunc: controller.machineToMachineClassUpdate,
@@ -183,7 +182,7 @@ func NewController(
DeleteFunc: controller.machineClassDelete,
})
- // Machine Controller Informers
+ // Machine Controller's Informers
nodeInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
AddFunc: controller.addNodeToMachine,
UpdateFunc: controller.updateNodeToMachine,
@@ -196,7 +195,7 @@ func NewController(
DeleteFunc: controller.deleteMachine,
})
- // MachineSafety Controller Informers
+ // MachineSafety Controller's Informers
// We follow the kubernetes way of reconciling the safety controller
// done by adding empty key objects. We initialize it, to trigger
// running of different safety loop on MCM startup.
@@ -209,7 +208,7 @@ func NewController(
DeleteFunc: controller.deleteMachineToSafety,
})
- // Drain Controller Informers
+ // Drain Controller's Informers
if k8sutils.IsResourceSupported(
targetCoreClient,
schema.GroupResource{
diff --git a/pkg/util/provider/machinecontroller/machine.go b/pkg/util/provider/machinecontroller/machine.go
index dbd95b480..5d6268afe 100644
--- a/pkg/util/provider/machinecontroller/machine.go
+++ b/pkg/util/provider/machinecontroller/machine.go
@@ -61,26 +61,26 @@ func (c *controller) deleteMachine(obj interface{}) {
c.enqueueMachine(obj)
}
-// isToBeEnqueued returns true if the key is to be managed by this controller
-func (c *controller) isToBeEnqueued(obj interface{}) (bool, string) {
+// getKeyForObj returns key for object, else returns false
+func (c *controller) getKeyForObj(obj interface{}) (string, bool) {
key, err := cache.MetaNamespaceKeyFunc(obj)
if err != nil {
klog.Errorf("Couldn't get key for object %+v: %v", obj, err)
- return false, ""
+ return "", false
}
-
- return true, key
+ return key, true
}
+
func (c *controller) enqueueMachine(obj interface{}) {
- if toBeEnqueued, key := c.isToBeEnqueued(obj); toBeEnqueued {
+ if key, ok := c.getKeyForObj(obj); ok {
klog.V(5).Infof("Adding machine object to the queue %q", key)
c.machineQueue.Add(key)
}
}
func (c *controller) enqueueMachineAfter(obj interface{}, after time.Duration) {
- if toBeEnqueued, key := c.isToBeEnqueued(obj); toBeEnqueued {
+ if key, ok := c.getKeyForObj(obj); ok {
klog.V(5).Infof("Adding machine object to the queue %q after %s", key, after)
c.machineQueue.AddAfter(key, after)
}
From b5feb2d15a25e9a2d12e8fb170d398c4bf6b8f6b Mon Sep 17 00:00:00 2001
From: Himanshu Sharma
Date: Wed, 22 Nov 2023 19:29:35 +0530
Subject: [PATCH 02/22] reconcile machineClass only on addition/deletion of
machines
---
.../provider/machinecontroller/controller.go | 1 -
.../machinecontroller/machineclass.go | 30 -------------------
2 files changed, 31 deletions(-)
diff --git a/pkg/util/provider/machinecontroller/controller.go b/pkg/util/provider/machinecontroller/controller.go
index 863d65961..e4b458371 100644
--- a/pkg/util/provider/machinecontroller/controller.go
+++ b/pkg/util/provider/machinecontroller/controller.go
@@ -172,7 +172,6 @@ func NewController(
// Machine Class Controller's Informers
machineInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
AddFunc: controller.machineToMachineClassAdd,
- UpdateFunc: controller.machineToMachineClassUpdate,
DeleteFunc: controller.machineToMachineClassDelete,
})
diff --git a/pkg/util/provider/machinecontroller/machineclass.go b/pkg/util/provider/machinecontroller/machineclass.go
index 6ba849975..f7c43e1a2 100644
--- a/pkg/util/provider/machinecontroller/machineclass.go
+++ b/pkg/util/provider/machinecontroller/machineclass.go
@@ -45,36 +45,6 @@ func (c *controller) machineToMachineClassAdd(obj interface{}) {
}
}
-func (c *controller) machineToMachineClassUpdate(oldObj, newObj interface{}) {
- oldMachine, ok := oldObj.(*v1alpha1.Machine)
- if oldMachine == nil || !ok {
- klog.Warningf("Couldn't get machine from object: %+v", oldObj)
- return
- }
- newMachine, ok := newObj.(*v1alpha1.Machine)
- if newMachine == nil || !ok {
- klog.Warningf("Couldn't get machine from object: %+v", newObj)
- return
- }
-
- if oldMachine.Spec.Class.Kind == newMachine.Spec.Class.Kind {
- if newMachine.Spec.Class.Kind == machineutils.MachineClassKind {
- // Both old and new machine refer to the same machineClass object
- // And the correct kind so enqueuing only one of them.
- c.machineClassQueue.Add(newMachine.Spec.Class.Name)
- }
- } else {
- // If both are pointing to different machineClasses
- // we might have to enqueue both.
- if oldMachine.Spec.Class.Kind == machineutils.MachineClassKind {
- c.machineClassQueue.Add(oldMachine.Spec.Class.Name)
- }
- if newMachine.Spec.Class.Kind == machineutils.MachineClassKind {
- c.machineClassQueue.Add(newMachine.Spec.Class.Name)
- }
- }
-}
-
func (c *controller) machineToMachineClassDelete(obj interface{}) {
c.machineToMachineClassAdd(obj)
}
From f8bd6aba1a70ac1ad85d56939c45b0fe02c6a0b9 Mon Sep 17 00:00:00 2001
From: Himanshu Sharma
Date: Wed, 22 Nov 2023 19:37:03 +0530
Subject: [PATCH 03/22] removed double check in secret reconciler
---
pkg/util/provider/machinecontroller/secret.go | 4 ----
1 file changed, 4 deletions(-)
diff --git a/pkg/util/provider/machinecontroller/secret.go b/pkg/util/provider/machinecontroller/secret.go
index b620ae0a4..7e279a809 100644
--- a/pkg/util/provider/machinecontroller/secret.go
+++ b/pkg/util/provider/machinecontroller/secret.go
@@ -80,10 +80,6 @@ func (c *controller) reconcileClusterSecret(ctx context.Context, secret *corev1.
return err
}
} else {
- if finalizers := sets.NewString(secret.Finalizers...); !finalizers.Has(MCFinalizerName) {
- // Finalizer doesn't exist, simply return nil
- return nil
- }
err = c.deleteSecretFinalizers(ctx, secret)
if err != nil {
return err
From db98422e3b45e4696b30a3131eb01df8fe989cda Mon Sep 17 00:00:00 2001
From: Himanshu Sharma
Date: Thu, 23 Nov 2023 00:48:04 +0530
Subject: [PATCH 04/22] small refactoring
---
pkg/apis/machine/v1alpha1/machine_types.go | 20 ++++++------
.../machinecontroller/machineclass.go | 12 ++++---
.../machinecontroller/machineclass_util.go | 31 -------------------
3 files changed, 18 insertions(+), 45 deletions(-)
diff --git a/pkg/apis/machine/v1alpha1/machine_types.go b/pkg/apis/machine/v1alpha1/machine_types.go
index b5920e570..00f87aaf1 100644
--- a/pkg/apis/machine/v1alpha1/machine_types.go
+++ b/pkg/apis/machine/v1alpha1/machine_types.go
@@ -136,10 +136,10 @@ type LastOperation struct {
Type MachineOperationType `json:"type,omitempty"`
}
-// MachinePhase is a label for the condition of a machines at the current time.
+// MachinePhase is a label for the condition of a machine at the current time.
type MachinePhase string
-// These are the valid statuses of machines.
+// These are the valid phases of machines.
const (
// MachinePending means that the machine is being created
MachinePending MachinePhase = "Pending"
@@ -156,25 +156,25 @@ const (
// MachineUnknown indicates that the node is not ready at the movement
MachineUnknown MachinePhase = "Unknown"
- // MachineFailed means operation failed leading to machine status failure
+ // MachineFailed means operation timeod out
MachineFailed MachinePhase = "Failed"
- // MachineCrashLoopBackOff means creation or deletion of the machine is failing. It means that machine object is present but there is no corresponding VM.
+ // MachineCrashLoopBackOff means creation machine is failing. It means that machine object is present but there is no corresponding VM.
MachineCrashLoopBackOff MachinePhase = "CrashLoopBackOff"
)
-// MachineState is a current state of the machine.
+// MachineState is a current state of the operation.
type MachineState string
-// These are the valid statuses of machines.
+// These are the valid states of the operation performed on the machine.
const (
- // MachineStatePending means there are operations pending on this machine state
+ // MachineStateProcessing means operation is not yet complete
MachineStateProcessing MachineState = "Processing"
- // MachineStateFailed means operation failed leading to machine status failure
+ // MachineStateFailed means operation failed
MachineStateFailed MachineState = "Failed"
- // MachineStateSuccessful indicates that the node is not ready at the moment
+ // MachineStateSuccessful means operation completed successfully
MachineStateSuccessful MachineState = "Successful"
)
@@ -189,7 +189,7 @@ const (
// MachineOperationUpdate indicates that the operation was an update
MachineOperationUpdate MachineOperationType = "Update"
- // MachineOperationHealthCheck indicates that the operation was a create
+ // MachineOperationHealthCheck indicates that the operation was a health check of node object
MachineOperationHealthCheck MachineOperationType = "HealthCheck"
// MachineOperationDelete indicates that the operation was a delete
diff --git a/pkg/util/provider/machinecontroller/machineclass.go b/pkg/util/provider/machinecontroller/machineclass.go
index f7c43e1a2..13e1dc031 100644
--- a/pkg/util/provider/machinecontroller/machineclass.go
+++ b/pkg/util/provider/machinecontroller/machineclass.go
@@ -171,16 +171,16 @@ func (c *controller) reconcileClusterMachineClass(ctx context.Context, class *v1
func (c *controller) addMachineClassFinalizers(ctx context.Context, class *v1alpha1.MachineClass) error {
finalizers := sets.NewString(class.Finalizers...)
finalizers.Insert(MCMFinalizerName)
- return c.updateMachineClassFinalizers(ctx, class, finalizers.List())
+ return c.updateMachineClassFinalizers(ctx, class, finalizers.List(),true)
}
func (c *controller) deleteMachineClassFinalizers(ctx context.Context, class *v1alpha1.MachineClass) error {
finalizers := sets.NewString(class.Finalizers...)
finalizers.Delete(MCMFinalizerName)
- return c.updateMachineClassFinalizers(ctx, class, finalizers.List())
+ return c.updateMachineClassFinalizers(ctx, class, finalizers.List(),false)
}
-func (c *controller) updateMachineClassFinalizers(ctx context.Context, class *v1alpha1.MachineClass, finalizers []string) error {
+func (c *controller) updateMachineClassFinalizers(ctx context.Context, class *v1alpha1.MachineClass, finalizers []string, addFinalizers bool) error {
// Get the latest version of the class so that we can avoid conflicts
class, err := c.controlMachineClient.MachineClasses(class.Namespace).Get(ctx, class.Name, metav1.GetOptions{})
if err != nil {
@@ -194,7 +194,11 @@ func (c *controller) updateMachineClassFinalizers(ctx context.Context, class *v1
klog.Warning("Updating machineClass failed, retrying. ", class.Name, err)
return err
}
- klog.V(3).Infof("Successfully added/removed finalizer on the machineclass %q", class.Name)
+ if addFinalizers{
+ klog.V(3).Infof("Successfully added finalizer on the machineclass %q", class.Name)
+ }else{
+ klog.V(3).Infof("Successfully removed finalizer on the machineclass %q", class.Name)
+ }
return err
}
diff --git a/pkg/util/provider/machinecontroller/machineclass_util.go b/pkg/util/provider/machinecontroller/machineclass_util.go
index fdd15ea15..7d2250faf 100644
--- a/pkg/util/provider/machinecontroller/machineclass_util.go
+++ b/pkg/util/provider/machinecontroller/machineclass_util.go
@@ -22,37 +22,6 @@ import (
"k8s.io/apimachinery/pkg/labels"
)
-/*
-TODO: Move this code to MCM/MachineSet controller as well?
-func (c *controller) findMachineDeploymentsForClass(kind, name string) ([]*v1alpha1.MachineDeployment, error) {
- machineDeployments, err := c.machineDeploymentLister.List(labels.Everything())
- if err != nil {
- return nil, err
- }
- var filtered []*v1alpha1.MachineDeployment
- for _, machineDeployment := range machineDeployments {
- if machineDeployment.Spec.Template.Spec.Class.Kind == kind && machineDeployment.Spec.Template.Spec.Class.Name == name {
- filtered = append(filtered, machineDeployment)
- }
- }
- return filtered, nil
-}
-
-func (c *controller) findMachineSetsForClass(kind, name string) ([]*v1alpha1.MachineSet, error) {
- machineSets, err := c.machineSetLister.List(labels.Everything())
- if err != nil {
- return nil, err
- }
- var filtered []*v1alpha1.MachineSet
- for _, machineSet := range machineSets {
- if machineSet.Spec.Template.Spec.Class.Kind == kind && machineSet.Spec.Template.Spec.Class.Name == name {
- filtered = append(filtered, machineSet)
- }
- }
- return filtered, nil
-}
-*/
-
func (c *controller) findMachinesForClass(kind, name string) ([]*v1alpha1.Machine, error) {
machines, err := c.machineLister.List(labels.Everything())
if err != nil {
From 5bdd6bc1efa1c3a9ec7144a751afaa3ecfe0f43a Mon Sep 17 00:00:00 2001
From: Himanshu Sharma
Date: Thu, 23 Nov 2023 00:49:59 +0530
Subject: [PATCH 05/22] introduced machine conditions diff logging
---
.../machinecontroller/machine_util.go | 111 +++++++++---------
1 file changed, 56 insertions(+), 55 deletions(-)
diff --git a/pkg/util/provider/machinecontroller/machine_util.go b/pkg/util/provider/machinecontroller/machine_util.go
index a9b75e750..7a8f4300b 100644
--- a/pkg/util/provider/machinecontroller/machine_util.go
+++ b/pkg/util/provider/machinecontroller/machine_util.go
@@ -60,6 +60,7 @@ import (
// emptyMap is a dummy emptyMap to compare with
var emptyMap = make(map[string]string)
+var errSuccessfulPhaseUpdate = errors.New("machine creation is successful. Machine Phase/Conditions have been UPDATED")
const (
maxReplacements = 1
@@ -68,43 +69,6 @@ const (
cacheUpdateTimeout = 1 * time.Second
)
-// TODO: use client library instead when it starts to support update retries
-//
-// see https://github.com/kubernetes/kubernetes/issues/21479
-// type updateMachineFunc func(machine *v1alpha1.Machine) error
-
-/*
-// UpdateMachineWithRetries updates a machine with given applyUpdate function. Note that machine not found error is ignored.
-// The returned bool value can be used to tell if the machine is actually updated.
-func UpdateMachineWithRetries(machineClient v1alpha1client.MachineInterface, machineLister v1alpha1listers.MachineLister, namespace, name string, applyUpdate updateMachineFunc) (*v1alpha1.Machine, error) {
- var machine *v1alpha1.Machine
-
- retryErr := retry.RetryOnConflict(retry.DefaultBackoff, func() error {
- var err error
- machine, err = machineLister.Machines(namespace).Get(name)
- if err != nil {
- return err
- }
- machine = machine.DeepCopy()
- // Apply the update, then attempt to push it to the apiserver.
- if applyErr := applyUpdate(machine); applyErr != nil {
- return applyErr
- }
- machine, err = machineClient.Update(machine)
- return err
- })
-
- // Ignore the precondition violated error, this machine is already updated
- // with the desired label.
- if retryErr == errorsutil.ErrPreconditionViolated {
- klog.V(4).Infof("Machine %s precondition doesn't hold, skip updating it.", name)
- retryErr = nil
- }
-
- return machine, retryErr
-}
-*/
-
// ValidateMachineClass validates the machine class.
func (c *controller) ValidateMachineClass(_ context.Context, classSpec *v1alpha1.ClassSpec) (*v1alpha1.MachineClass, map[string][]byte, machineutils.RetryPeriod, error) {
var (
@@ -219,19 +183,38 @@ func (c *controller) getSecret(ref *v1.SecretReference, MachineClassName string)
return secretRef, err
}
-// nodeConditionsHaveChanged compares two node statuses to see if any of the statuses have changed
-func nodeConditionsHaveChanged(machineConditions []v1.NodeCondition, nodeConditions []v1.NodeCondition) bool {
- if len(machineConditions) != len(nodeConditions) {
- return true
+// nodeConditionsHaveChanged compares two node status.conditions to see if any of the statuses have changed
+func nodeConditionsHaveChanged(oldConditions []v1.NodeCondition, newConditions []v1.NodeCondition) ([]v1.NodeCondition, []v1.NodeCondition, bool) {
+ var (
+ oldConditionsByType = make(map[v1.NodeConditionType]v1.NodeCondition, len(oldConditions))
+ newConditionsByType = make(map[v1.NodeConditionType]v1.NodeCondition, len(newConditions))
+ addedOrUpdatedConditions = make([]v1.NodeCondition, 0, len(newConditions))
+ removedConditions = make([]v1.NodeCondition, 0, len(oldConditions))
+ )
+
+ for _, c := range oldConditions {
+ oldConditionsByType[c.Type] = c
+ }
+ for _, c := range newConditions {
+ newConditionsByType[c.Type] = c
+ }
+
+ // checking for any added/updated new condition
+ for _, c := range newConditions {
+ oldC, exists := oldConditionsByType[c.Type]
+ if !exists || (oldC.Status != c.Status) {
+ addedOrUpdatedConditions = append(addedOrUpdatedConditions, c)
+ }
}
- for i := range nodeConditions {
- if nodeConditions[i].Status != machineConditions[i].Status {
- return true
+ // checking for any deleted condition
+ for _, c := range oldConditions {
+ if _, exists := newConditionsByType[c.Type]; !exists {
+ removedConditions = append(removedConditions, c)
}
}
- return false
+ return addedOrUpdatedConditions, removedConditions, len(addedOrUpdatedConditions) != 0 || len(removedConditions) != 0
}
func mergeDataMaps(in map[string][]byte, maps ...map[string][]byte) map[string][]byte {
@@ -601,14 +584,13 @@ func (c *controller) reconcileMachineHealth(ctx context.Context, machine *v1alph
// and corresponding node object went missing
// and if machine object still reports healthy
description = fmt.Sprintf(
- "Node object went missing. Machine %s is unhealthy - changing MachineState to Unknown",
+ "Node object went missing. Machine %s is unhealthy - changing MachinePhase to Unknown",
machine.Name,
)
klog.Warning(description)
clone.Status.CurrentStatus = v1alpha1.CurrentStatus{
- Phase: v1alpha1.MachineUnknown,
- // TimeoutActive: true,
+ Phase: v1alpha1.MachineUnknown,
LastUpdateTime: metav1.Now(),
}
clone.Status.LastOperation = v1alpha1.LastOperation{
@@ -625,9 +607,11 @@ func (c *controller) reconcileMachineHealth(ctx context.Context, machine *v1alph
return machineutils.ShortRetry, err
}
} else {
- if nodeConditionsHaveChanged(machine.Status.Conditions, node.Status.Conditions) {
+ populatedConditions, removedConditions, isChanged := nodeConditionsHaveChanged(machine.Status.Conditions, node.Status.Conditions)
+ if isChanged {
clone.Status.Conditions = node.Status.Conditions
- klog.V(3).Infof("Conditions of Machine %q with providerID %q and backing node %q are changing", machine.Name, getProviderID(machine), getNodeName(machine))
+
+ klog.V(3).Infof("Conditions of Machine %q with providerID %q and backing node %q are changing.\nAdded/Updated Conditions:\n\n%s\nRemoved Conditions:\n\n%s", machine.Name, getProviderID(machine), getNodeName(machine), getFormattedNodeConditions(populatedConditions), getFormattedNodeConditions(removedConditions))
cloneDirty = true
}
@@ -756,12 +740,12 @@ func (c *controller) reconcileMachineHealth(ctx context.Context, machine *v1alph
if cloneDirty {
_, err = c.controlMachineClient.Machines(clone.Namespace).UpdateStatus(ctx, clone, metav1.UpdateOptions{})
if err != nil {
- // Keep retrying until update goes through
- klog.Errorf("Update failed for machine %q. Retrying, error: %q", machine.Name, err)
+ // Keep retrying across reconciles until update goes through
+ klog.Errorf("Update of Phase/Conditions failed for machine %q. Retrying, error: %q", machine.Name, err)
} else {
- klog.V(2).Infof("Machine State has been updated for %q with providerID %q and backing node %q", machine.Name, getProviderID(machine), getNodeName(machine))
- // Return error for continuing in next iteration
- err = fmt.Errorf("machine creation is successful. Machine State has been UPDATED")
+ klog.V(2).Infof("Machine Phase/Conditions have been updated for %q with providerID %q and backing node %q", machine.Name, getProviderID(machine), getNodeName(machine))
+ // Return error to end the reconcile
+ err = errSuccessfulPhaseUpdate
}
return machineutils.ShortRetry, err
@@ -770,6 +754,23 @@ func (c *controller) reconcileMachineHealth(ctx context.Context, machine *v1alph
return machineutils.LongRetry, nil
}
+func NodeConditionsHaveChanged(oldConditions []v1.NodeCondition, newConditions []v1.NodeCondition) ([]v1.NodeCondition, []v1.NodeCondition, bool) {
+ return nodeConditionsHaveChanged(oldConditions, newConditions)
+}
+
+func GetFormattedNodeConditions(conditions []v1.NodeCondition) string {
+ return getFormattedNodeConditions(conditions)
+}
+
+func getFormattedNodeConditions(conditions []v1.NodeCondition) string {
+ var result string
+ for _, c := range conditions {
+ result = fmt.Sprintf("%sType: %s | Status: %s | Reason: %s | Message: %s\n", result, c.Type, c.Status, c.Reason, c.Message)
+ }
+
+ return result
+}
+
/*
SECTION
Manipulate Finalizers
From e1849e4eca6e1a25fbe278e86aad2bf0d6bf0d98 Mon Sep 17 00:00:00 2001
From: Himanshu Sharma
Date: Thu, 23 Nov 2023 10:08:45 +0530
Subject: [PATCH 06/22] removed old in-tree related code
---
pkg/util/provider/machineutils/utils.go | 3 ---
1 file changed, 3 deletions(-)
diff --git a/pkg/util/provider/machineutils/utils.go b/pkg/util/provider/machineutils/utils.go
index 8bc546a0f..8e906f588 100644
--- a/pkg/util/provider/machineutils/utils.go
+++ b/pkg/util/provider/machineutils/utils.go
@@ -54,9 +54,6 @@ const (
// MachineClassKind is used to identify the machineClassKind for generic machineClasses
MachineClassKind = "MachineClass"
- // MigratedMachineClass annotation helps in identifying machineClasses who have been migrated by migration controller
- MigratedMachineClass = "machine.sapcloud.io/migrated"
-
// NotManagedByMCM annotation helps in identifying the nodes which are not handled by MCM
NotManagedByMCM = "node.machine.sapcloud.io/not-managed-by-mcm"
From b50ff02b3547f1aefe5b594af5f7d94002737530 Mon Sep 17 00:00:00 2001
From: Himanshu Sharma
Date: Thu, 23 Nov 2023 10:09:30 +0530
Subject: [PATCH 07/22] adapted unit tests ot new const
errSuccessfulPhaseUpdate
---
.../machinecontroller/machine_util_test.go | 16 ++++++++--------
1 file changed, 8 insertions(+), 8 deletions(-)
diff --git a/pkg/util/provider/machinecontroller/machine_util_test.go b/pkg/util/provider/machinecontroller/machine_util_test.go
index cadd5b550..ea8a82731 100644
--- a/pkg/util/provider/machinecontroller/machine_util_test.go
+++ b/pkg/util/provider/machinecontroller/machine_util_test.go
@@ -2177,7 +2177,7 @@ var _ = Describe("machine_util", func() {
},
expect: expect{
retryPeriod: machineutils.ShortRetry,
- err: errors.New("machine creation is successful. Machine State has been UPDATED"),
+ err: errSuccessfulPhaseUpdate,
expectedPhase: v1alpha1.MachineFailed,
},
}),
@@ -2196,7 +2196,7 @@ var _ = Describe("machine_util", func() {
},
expect: expect{
retryPeriod: machineutils.ShortRetry,
- err: errors.New("machine creation is successful. Machine State has been UPDATED"),
+ err: errSuccessfulPhaseUpdate,
expectedPhase: v1alpha1.MachineRunning,
},
}),
@@ -2215,7 +2215,7 @@ var _ = Describe("machine_util", func() {
},
expect: expect{
retryPeriod: machineutils.ShortRetry,
- err: errors.New("machine creation is successful. Machine State has been UPDATED"),
+ err: errSuccessfulPhaseUpdate,
expectedPhase: v1alpha1.MachineUnknown,
},
}),
@@ -2234,7 +2234,7 @@ var _ = Describe("machine_util", func() {
},
expect: expect{
retryPeriod: machineutils.ShortRetry,
- err: errors.New("machine creation is successful. Machine State has been UPDATED"),
+ err: errSuccessfulPhaseUpdate,
expectedPhase: v1alpha1.MachineUnknown,
},
}),
@@ -2250,7 +2250,7 @@ var _ = Describe("machine_util", func() {
},
expect: expect{
retryPeriod: machineutils.ShortRetry,
- err: errors.New("machine creation is successful. Machine State has been UPDATED"),
+ err: errSuccessfulPhaseUpdate,
expectedPhase: v1alpha1.MachineUnknown,
},
}),
@@ -2363,7 +2363,7 @@ var _ = Describe("machine_util", func() {
},
expect: expect{
retryPeriod: machineutils.ShortRetry,
- err: errors.New("machine creation is successful. Machine State has been UPDATED"),
+ err: errors.New("machine creation is successful. Machine Phase or conditions have been UPDATED"),
expectedPhase: v1alpha1.MachineRunning,
},
}),
@@ -2390,7 +2390,7 @@ var _ = Describe("machine_util", func() {
},
expect: expect{
retryPeriod: machineutils.ShortRetry,
- err: errors.New("machine creation is successful. Machine State has been UPDATED"),
+ err: errors.New("machine creation is successful. Machine Phase or conditions have been UPDATED"),
expectedPhase: v1alpha1.MachineRunning,
},
}),
@@ -2424,7 +2424,7 @@ var _ = Describe("machine_util", func() {
},
expect: expect{
retryPeriod: machineutils.ShortRetry,
- err: errors.New("machine creation is successful. Machine State has been UPDATED"),
+ err: errors.New("machine creation is successful. Machine Phase or conditions have been UPDATED"),
expectedPhase: v1alpha1.MachineRunning,
},
}),
From 0c81be07d5b87815f882d06665d99fb0a8b05fcc Mon Sep 17 00:00:00 2001
From: Himanshu Sharma
Date: Fri, 1 Dec 2023 17:36:44 +0530
Subject: [PATCH 08/22] added new retry period ConflictRetry
---
pkg/util/provider/machineutils/utils.go | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/pkg/util/provider/machineutils/utils.go b/pkg/util/provider/machineutils/utils.go
index 8e906f588..332349206 100644
--- a/pkg/util/provider/machineutils/utils.go
+++ b/pkg/util/provider/machineutils/utils.go
@@ -79,8 +79,10 @@ type RetryPeriod time.Duration
// These are the valid values for RetryPeriod
const (
+ // ConflictRetry tells the controller to retry quickly - 200 milliseconds
+ ConflictRetry RetryPeriod = RetryPeriod(200 * time.Millisecond)
// ShortRetry tells the controller to retry after a short duration - 15 seconds
- ShortRetry RetryPeriod = RetryPeriod(15 * time.Second)
+ ShortRetry RetryPeriod = RetryPeriod(5 * time.Second)
// MediumRetry tells the controller to retry after a medium duration - 2 minutes
MediumRetry RetryPeriod = RetryPeriod(3 * time.Minute)
// LongRetry tells the controller to retry after a long duration - 10 minutes
From 61ceccd495056e8fc885207610f5f4b6f7652277 Mon Sep 17 00:00:00 2001
From: Himanshu Sharma
Date: Fri, 1 Dec 2023 17:37:41 +0530
Subject: [PATCH 09/22] updated log level for better readability
---
cmd/machine-controller-manager/app/controllermanager.go | 8 ++++----
pkg/util/provider/drain/drain.go | 2 +-
pkg/util/provider/drain/volume_attachment.go | 4 ++--
pkg/util/provider/machinecontroller/secret.go | 4 ++--
4 files changed, 9 insertions(+), 9 deletions(-)
diff --git a/cmd/machine-controller-manager/app/controllermanager.go b/cmd/machine-controller-manager/app/controllermanager.go
index 2f9aec1f3..3128acd74 100644
--- a/cmd/machine-controller-manager/app/controllermanager.go
+++ b/cmd/machine-controller-manager/app/controllermanager.go
@@ -208,7 +208,7 @@ func StartControllers(s *options.MCMServer,
recorder record.EventRecorder,
stop <-chan struct{}) error {
- klog.V(5).Info("Getting available resources")
+ klog.V(4).Info("Getting available resources")
availableResources, err := getAvailableResources(controlCoreClientBuilder)
if err != nil {
return err
@@ -229,7 +229,7 @@ func StartControllers(s *options.MCMServer,
}
if availableResources[machineGVR] || availableResources[machineSetGVR] || availableResources[machineDeploymentGVR] {
- klog.V(5).Infof("Creating shared informers; resync interval: %v", s.MinResyncPeriod)
+ klog.V(4).Infof("Creating shared informers; resync interval: %v", s.MinResyncPeriod)
controlMachineInformerFactory := machineinformers.NewFilteredSharedInformerFactory(
controlMachineClientBuilder.ClientOrDie("control-machine-shared-informers"),
@@ -253,7 +253,7 @@ func StartControllers(s *options.MCMServer,
// All shared informers are v1alpha1 API level
machineSharedInformers := controlMachineInformerFactory.Machine().V1alpha1()
- klog.V(5).Infof("Creating controllers...")
+ klog.V(4).Infof("Creating controllers...")
mcmController, err := mcmcontroller.NewController(
s.Namespace,
controlMachineClient,
@@ -276,7 +276,7 @@ func StartControllers(s *options.MCMServer,
controlCoreInformerFactory.Start(stop)
targetCoreInformerFactory.Start(stop)
- klog.V(5).Info("Running controller")
+ klog.V(4).Info("Running controller")
go mcmController.Run(int(s.ConcurrentNodeSyncs), stop)
} else {
diff --git a/pkg/util/provider/drain/drain.go b/pkg/util/provider/drain/drain.go
index 0b5a9f6e4..8b607056e 100644
--- a/pkg/util/provider/drain/drain.go
+++ b/pkg/util/provider/drain/drain.go
@@ -934,7 +934,7 @@ func (o *Options) waitForReattach(ctx context.Context, podVolumeInfo PodVolumeIn
case incomingEvent := <-volumeAttachmentEventCh:
persistentVolumeName := *incomingEvent.Spec.Source.PersistentVolumeName
- klog.V(5).Infof("VolumeAttachment event received for PV: %s", persistentVolumeName)
+ klog.V(4).Infof("VolumeAttachment event received for PV: %s", persistentVolumeName)
// Checking if event for an PV that is being waited on
if _, present := pvsWaitingForReattachments[persistentVolumeName]; present {
diff --git a/pkg/util/provider/drain/volume_attachment.go b/pkg/util/provider/drain/volume_attachment.go
index f3dcc24ec..5c35afa3a 100644
--- a/pkg/util/provider/drain/volume_attachment.go
+++ b/pkg/util/provider/drain/volume_attachment.go
@@ -70,13 +70,13 @@ func (v *VolumeAttachmentHandler) dispatch(obj interface{}) {
// AddVolumeAttachment is the event handler for VolumeAttachment add
func (v *VolumeAttachmentHandler) AddVolumeAttachment(obj interface{}) {
- klog.V(5).Infof("Adding volume attachment object")
+ klog.V(4).Infof("Adding volume attachment object")
v.dispatch(obj)
}
// UpdateVolumeAttachment is the event handler for VolumeAttachment update
func (v *VolumeAttachmentHandler) UpdateVolumeAttachment(oldObj, newObj interface{}) {
- klog.V(5).Info("Updating volume attachment object")
+ klog.V(4).Info("Updating volume attachment object")
v.dispatch(newObj)
}
diff --git a/pkg/util/provider/machinecontroller/secret.go b/pkg/util/provider/machinecontroller/secret.go
index 7e279a809..6e072fb54 100644
--- a/pkg/util/provider/machinecontroller/secret.go
+++ b/pkg/util/provider/machinecontroller/secret.go
@@ -61,10 +61,10 @@ func (c *controller) reconcileClusterSecretKey(key string) error {
func (c *controller) reconcileClusterSecret(ctx context.Context, secret *corev1.Secret) error {
startTime := time.Now()
- klog.V(5).Infof("Start syncing %q", secret.Name)
+ klog.V(4).Infof("Start syncing %q", secret.Name)
defer func() {
c.enqueueSecretAfter(secret, 10*time.Minute)
- klog.V(5).Infof("Finished syncing %q (%v)", secret.Name, time.Since(startTime))
+ klog.V(4).Infof("Finished syncing %q (%v)", secret.Name, time.Since(startTime))
}()
// Check if machineClasses are referring to this secret
From aeb28f7377cf859ad1876fc04e2d80ba19f9e88d Mon Sep 17 00:00:00 2001
From: Himanshu Sharma
Date: Fri, 1 Dec 2023 17:37:56 +0530
Subject: [PATCH 10/22] some more updated log levels
---
pkg/util/provider/app/app.go | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
diff --git a/pkg/util/provider/app/app.go b/pkg/util/provider/app/app.go
index 3b095fce8..c1129b147 100644
--- a/pkg/util/provider/app/app.go
+++ b/pkg/util/provider/app/app.go
@@ -209,7 +209,7 @@ func StartControllers(s *options.MCServer,
recorder record.EventRecorder,
stop <-chan struct{}) error {
- klog.V(5).Info("Getting available resources")
+ klog.V(4).Info("Getting available resources")
availableResources, err := getAvailableResources(controlCoreClientBuilder)
if err != nil {
return err
@@ -239,7 +239,7 @@ func StartControllers(s *options.MCServer,
}
if availableResources[machineGVR] {
- klog.V(5).Infof("Creating shared informers; resync interval: %v", s.MinResyncPeriod)
+ klog.V(4).Infof("Creating shared informers; resync interval: %v", s.MinResyncPeriod)
controlMachineInformerFactory := machineinformers.NewFilteredSharedInformerFactory(
controlMachineClientBuilder.ClientOrDie("control-machine-shared-informers"),
@@ -274,7 +274,7 @@ func StartControllers(s *options.MCServer,
// All shared informers are v1alpha1 API level
machineSharedInformers := controlMachineInformerFactory.Machine().V1alpha1()
- klog.V(5).Infof("Creating controllers...")
+ klog.V(4).Infof("Creating controllers...")
machineController, err := machinecontroller.NewController(
s.Namespace,
controlMachineClient,
@@ -305,7 +305,7 @@ func StartControllers(s *options.MCServer,
controlCoreInformerFactory.Start(stop)
targetCoreInformerFactory.Start(stop)
- klog.V(5).Info("Running controller")
+ klog.V(4).Info("Running controller")
go machineController.Run(int(s.ConcurrentNodeSyncs), stop)
} else {
From a672b570d9643424e4b361caadf5af1f0e3fa7ec Mon Sep 17 00:00:00 2001
From: Himanshu Sharma
Date: Fri, 1 Dec 2023 17:38:28 +0530
Subject: [PATCH 11/22] adapted test cases
---
pkg/util/provider/machinecontroller/machine_test.go | 2 +-
.../provider/machinecontroller/machine_util_test.go | 12 ++++++------
2 files changed, 7 insertions(+), 7 deletions(-)
diff --git a/pkg/util/provider/machinecontroller/machine_test.go b/pkg/util/provider/machinecontroller/machine_test.go
index 98707bc5d..23ab69061 100644
--- a/pkg/util/provider/machinecontroller/machine_test.go
+++ b/pkg/util/provider/machinecontroller/machine_test.go
@@ -2794,7 +2794,7 @@ var _ = Describe("machine", func() {
},
},
expect: expect{
- err: fmt.Errorf("Machine deletion in process. Deletion of node object was succesful"),
+ err: fmt.Errorf("Machine deletion in process. Deletion of node object was successful"),
retry: machineutils.ShortRetry,
nodeDeleted: true,
machine: newMachine(
diff --git a/pkg/util/provider/machinecontroller/machine_util_test.go b/pkg/util/provider/machinecontroller/machine_util_test.go
index ea8a82731..8626c0f88 100644
--- a/pkg/util/provider/machinecontroller/machine_util_test.go
+++ b/pkg/util/provider/machinecontroller/machine_util_test.go
@@ -196,7 +196,7 @@ var _ = Describe("machine_util", func() {
},
},
},
- err: fmt.Errorf("Machine ALTs have been reconciled"),
+ err: errSuccessfulALTsync,
},
}),
@@ -276,7 +276,7 @@ var _ = Describe("machine_util", func() {
},
},
},
- err: fmt.Errorf("Machine ALTs have been reconciled"),
+ err: errSuccessfulALTsync,
},
}),
@@ -422,7 +422,7 @@ var _ = Describe("machine_util", func() {
},
},
},
- err: fmt.Errorf("Machine ALTs have been reconciled"),
+ err: errSuccessfulALTsync,
},
}),
)
@@ -2363,7 +2363,7 @@ var _ = Describe("machine_util", func() {
},
expect: expect{
retryPeriod: machineutils.ShortRetry,
- err: errors.New("machine creation is successful. Machine Phase or conditions have been UPDATED"),
+ err: errors.New("machine creation is successful. Machine Phase/Conditions have been UPDATED"),
expectedPhase: v1alpha1.MachineRunning,
},
}),
@@ -2390,7 +2390,7 @@ var _ = Describe("machine_util", func() {
},
expect: expect{
retryPeriod: machineutils.ShortRetry,
- err: errors.New("machine creation is successful. Machine Phase or conditions have been UPDATED"),
+ err: errors.New("machine creation is successful. Machine Phase/Conditions have been UPDATED"),
expectedPhase: v1alpha1.MachineRunning,
},
}),
@@ -2424,7 +2424,7 @@ var _ = Describe("machine_util", func() {
},
expect: expect{
retryPeriod: machineutils.ShortRetry,
- err: errors.New("machine creation is successful. Machine Phase or conditions have been UPDATED"),
+ err: errors.New("machine creation is successful. Machine Phase/Conditions have been UPDATED"),
expectedPhase: v1alpha1.MachineRunning,
},
}),
From a653be5383e46fc83ea80897fcbec44fe3177cbf Mon Sep 17 00:00:00 2001
From: Himanshu Sharma
Date: Fri, 1 Dec 2023 17:39:45 +0530
Subject: [PATCH 12/22] no machine reconcile on status update
---
.../provider/machinecontroller/machine.go | 128 +++++++++---------
1 file changed, 61 insertions(+), 67 deletions(-)
diff --git a/pkg/util/provider/machinecontroller/machine.go b/pkg/util/provider/machinecontroller/machine.go
index 5d6268afe..8fbc82710 100644
--- a/pkg/util/provider/machinecontroller/machine.go
+++ b/pkg/util/provider/machinecontroller/machine.go
@@ -47,18 +47,28 @@ SECTION
Machine controller - Machine add, update, delete watches
*/
func (c *controller) addMachine(obj interface{}) {
- klog.V(5).Infof("Adding machine object")
- c.enqueueMachine(obj)
+ c.enqueueMachine(obj, "handling machine obj ADD event")
}
func (c *controller) updateMachine(oldObj, newObj interface{}) {
- klog.V(5).Info("Updating machine object")
- c.enqueueMachine(newObj)
+ oldMachine := oldObj.(*v1alpha1.Machine)
+ newMachine := newObj.(*v1alpha1.Machine)
+
+ if oldMachine == nil || newMachine == nil {
+ klog.Errorf("Couldn't convert to machine resource from object")
+ return
+ }
+
+ if oldMachine.ResourceVersion != newMachine.ResourceVersion && oldMachine.Generation == newMachine.Generation {
+ klog.V(3).Infof("Skipping non-spec updates for machine %s", oldMachine.Name)
+ return
+ }
+
+ c.enqueueMachine(newObj, "handling machine object UPDATE event")
}
func (c *controller) deleteMachine(obj interface{}) {
- klog.V(5).Info("Deleting machine object")
- c.enqueueMachine(obj)
+ c.enqueueMachine(obj, "handling machine object DELETE event")
}
// getKeyForObj returns key for object, else returns false
@@ -71,17 +81,16 @@ func (c *controller) getKeyForObj(obj interface{}) (string, bool) {
return key, true
}
-
-func (c *controller) enqueueMachine(obj interface{}) {
+func (c *controller) enqueueMachine(obj interface{}, reason string) {
if key, ok := c.getKeyForObj(obj); ok {
- klog.V(5).Infof("Adding machine object to the queue %q", key)
+ klog.V(3).Infof("Adding machine object to queue %q, reason: %s", key, reason)
c.machineQueue.Add(key)
}
}
-func (c *controller) enqueueMachineAfter(obj interface{}, after time.Duration) {
+func (c *controller) enqueueMachineAfter(obj interface{}, after time.Duration, reason string) {
if key, ok := c.getKeyForObj(obj); ok {
- klog.V(5).Infof("Adding machine object to the queue %q after %s", key, after)
+ klog.V(3).Infof("Adding machine object to queue %q after %s, reason: %s", key, after, reason)
c.machineQueue.AddAfter(key, after)
}
}
@@ -105,16 +114,19 @@ func (c *controller) reconcileClusterMachineKey(key string) error {
}
retryPeriod, err := c.reconcileClusterMachine(ctx, machine)
- klog.V(5).Info(err, retryPeriod)
- c.enqueueMachineAfter(machine, time.Duration(retryPeriod))
+ var reEnqueReason = "periodic reconcile"
+ if err != nil {
+ reEnqueReason = err.Error()
+ }
+ c.enqueueMachineAfter(machine, time.Duration(retryPeriod), reEnqueReason)
return nil
}
func (c *controller) reconcileClusterMachine(ctx context.Context, machine *v1alpha1.Machine) (machineutils.RetryPeriod, error) {
- klog.V(5).Infof("Start Reconciling machine: %q , nodeName: %q ,providerID: %q", machine.Name, getNodeName(machine), getProviderID(machine))
- defer klog.V(5).Infof("Stop Reconciling machine %q, nodeName: %q ,providerID: %q", machine.Name, getNodeName(machine), getProviderID(machine))
+ klog.V(2).Infof("reconcileClusterMachine: Start for %q with phase:%q, description:%q", machine.Name, machine.Status.CurrentStatus.Phase, machine.Status.LastOperation.Description)
+ defer klog.V(2).Infof("reconcileClusterMachine: Stop for %q", machine.Name)
if c.safetyOptions.MachineControllerFrozen && machine.DeletionTimestamp == nil {
// If Machine controller is frozen and
@@ -132,7 +144,7 @@ func (c *controller) reconcileClusterMachine(ctx context.Context, machine *v1alp
validationerr := validation.ValidateMachine(internalMachine)
if validationerr.ToAggregate() != nil && len(validationerr.ToAggregate().Errors()) > 0 {
- err := fmt.Errorf("Validation of Machine failed %s", validationerr.ToAggregate().Error())
+ err := fmt.Errorf("validation of Machine failed %s", validationerr.ToAggregate().Error())
klog.Error(err)
return machineutils.LongRetry, err
}
@@ -140,7 +152,7 @@ func (c *controller) reconcileClusterMachine(ctx context.Context, machine *v1alp
// Validate MachineClass
machineClass, secretData, retry, err := c.ValidateMachineClass(ctx, &machine.Spec.Class)
if err != nil {
- klog.Error(err)
+ klog.Errorf("Cannot reconcile machine %s: %s", machine.Name, err)
return retry, err
}
@@ -193,8 +205,8 @@ SECTION
Machine controller - nodeToMachine
*/
var (
- errMultipleMachineMatch = errors.New("Multiple machines matching node")
- errNoMachineMatch = errors.New("No machines matching node found")
+ errMultipleMachineMatch = errors.New("multiple machines matching node")
+ errNoMachineMatch = errors.New("no machines matching node found")
)
func (c *controller) addNodeToMachine(obj interface{}) {
@@ -226,9 +238,12 @@ func (c *controller) addNodeToMachine(obj interface{}) {
return
}
- if machine.Status.CurrentStatus.Phase != v1alpha1.MachineCrashLoopBackOff && nodeConditionsHaveChanged(machine.Status.Conditions, node.Status.Conditions) {
- klog.V(5).Infof("Enqueue machine object %q as conditions of backing node %q have changed", machine.Name, getNodeName(machine))
- c.enqueueMachine(machine)
+ isMachineCrashLooping := machine.Status.CurrentStatus.Phase == v1alpha1.MachineCrashLoopBackOff
+ isMachineTerminating := machine.Status.CurrentStatus.Phase == v1alpha1.MachineTerminating
+ _, _, nodeConditionsHaveChanged := nodeConditionsHaveChanged(machine.Status.Conditions, node.Status.Conditions)
+
+ if !isMachineCrashLooping && !isMachineTerminating && nodeConditionsHaveChanged {
+ c.enqueueMachine(machine, fmt.Sprintf("handling node UPDATE event. conditions of backing node %q have changed", getNodeName(machine)))
}
}
@@ -242,7 +257,6 @@ func (c *controller) updateNodeToMachine(oldObj, newObj interface{}) {
// check for the TriggerDeletionByMCM annotation on the node object
// if it is present then mark the machine object for deletion
if value, ok := node.Annotations[machineutils.TriggerDeletionByMCM]; ok && value == "true" {
-
machine, err := c.getMachineFromNode(node.Name)
if err != nil {
klog.Errorf("Couldn't fetch machine %s, Error: %s", machine.Name, err)
@@ -250,7 +264,7 @@ func (c *controller) updateNodeToMachine(oldObj, newObj interface{}) {
}
if machine.DeletionTimestamp == nil {
- klog.Infof("Node %s is annotated to trigger deletion by MCM.", node.Name)
+ klog.Infof("Node %s for machine %s is annotated to trigger deletion by MCM.", node.Name, machine.Name)
if err := c.controlMachineClient.Machines(c.namespace).Delete(context.Background(), machine.Name, metav1.DeleteOptions{}); err != nil {
klog.Errorf("Machine object %s backing the node %s could not be marked for deletion.", machine.Name, node.Name)
return
@@ -275,7 +289,10 @@ func (c *controller) deleteNodeToMachine(obj interface{}) {
return
}
- c.enqueueMachine(machine)
+ // donot respond if machine obj is already in termination flow
+ if machine.DeletionTimestamp == nil {
+ c.enqueueMachine(machine, fmt.Sprintf("backing node obj %q got deleted", key))
+ }
}
/*
@@ -304,7 +321,7 @@ func (c *controller) getMachineFromNode(nodeName string) (*v1alpha1.Machine, err
/*
SECTION
- Machine operations - Create, Update, Delete
+ Machine operations - Create, Delete
*/
func (c *controller) triggerCreationFlow(ctx context.Context, createMachineRequest *driver.CreateMachineRequest) (machineutils.RetryPeriod, error) {
@@ -397,7 +414,7 @@ func (c *controller) triggerCreationFlow(ctx context.Context, createMachineReque
}
// machine obj marked Failed for double security
- c.machineStatusUpdate(
+ updateErr := c.machineStatusUpdate(
ctx,
machine,
v1alpha1.LastOperation{
@@ -412,6 +429,11 @@ func (c *controller) triggerCreationFlow(ctx context.Context, createMachineReque
},
machine.Status.LastKnownState,
)
+ if apierrors.IsConflict(updateErr) {
+ return machineutils.ConflictRetry, err
+ } else if updateErr != nil {
+ return machineutils.ShortRetry, updateErr
+ }
klog.V(2).Infof("Machine %q marked Failed as VM was referring to a stale node object", machine.Name)
return machineutils.ShortRetry, err
@@ -424,7 +446,7 @@ func (c *controller) triggerCreationFlow(ctx context.Context, createMachineReque
case codes.Unknown, codes.DeadlineExceeded, codes.Aborted, codes.Unavailable:
// GetMachineStatus() returned with one of the above error codes.
// Retry operation.
- c.machineStatusUpdate(
+ updateErr := c.machineStatusUpdate(
ctx,
machine,
v1alpha1.LastOperation{
@@ -439,11 +461,16 @@ func (c *controller) triggerCreationFlow(ctx context.Context, createMachineReque
},
machine.Status.LastKnownState,
)
+ if apierrors.IsConflict(updateErr) {
+ return machineutils.ConflictRetry, err
+ } else if updateErr != nil {
+ return machineutils.ShortRetry, updateErr
+ }
return machineutils.ShortRetry, err
default:
- c.machineStatusUpdate(
+ updateErr := c.machineStatusUpdate(
ctx,
machine,
v1alpha1.LastOperation{
@@ -458,6 +485,11 @@ func (c *controller) triggerCreationFlow(ctx context.Context, createMachineReque
},
machine.Status.LastKnownState,
)
+ if apierrors.IsConflict(updateErr) {
+ return machineutils.ConflictRetry, err
+ } else if updateErr != nil {
+ return machineutils.MediumRetry, updateErr
+ }
return machineutils.MediumRetry, err
}
@@ -528,44 +560,6 @@ func (c *controller) triggerCreationFlow(ctx context.Context, createMachineReque
return machineutils.LongRetry, nil
}
-func (c *controller) triggerUpdationFlow(ctx context.Context, machine *v1alpha1.Machine, actualProviderID string) (machineutils.RetryPeriod, error) {
- klog.V(2).Infof("Setting ProviderID of machine %s with backing node %s to %s", machine.Name, getNodeName(machine), actualProviderID)
-
- for {
- machine, err := c.controlMachineClient.Machines(machine.Namespace).Get(ctx, machine.Name, metav1.GetOptions{})
- if err != nil {
- klog.Warningf("Machine GET failed. Retrying, error: %s", err)
- continue
- }
-
- clone := machine.DeepCopy()
- clone.Spec.ProviderID = actualProviderID
- machine, err = c.controlMachineClient.Machines(clone.Namespace).Update(ctx, clone, metav1.UpdateOptions{})
- if err != nil {
- klog.Warningf("Machine UPDATE failed. Retrying, error: %s", err)
- continue
- }
-
- clone = machine.DeepCopy()
- lastOperation := v1alpha1.LastOperation{
- Description: "Updated provider ID",
- State: v1alpha1.MachineStateSuccessful,
- Type: v1alpha1.MachineOperationUpdate,
- LastUpdateTime: metav1.Now(),
- }
- clone.Status.LastOperation = lastOperation
- _, err = c.controlMachineClient.Machines(clone.Namespace).UpdateStatus(ctx, clone, metav1.UpdateOptions{})
- if err != nil {
- klog.Warningf("Machine/status UPDATE failed. Retrying, error: %s", err)
- continue
- }
- // Update went through, exit out of infinite loop
- break
- }
-
- return machineutils.LongRetry, nil
-}
-
func (c *controller) triggerDeletionFlow(ctx context.Context, deleteMachineRequest *driver.DeleteMachineRequest) (machineutils.RetryPeriod, error) {
var (
machine = deleteMachineRequest.Machine
From 0c0aaafd6f8f19ea4cc632296536b5c242d58d5d Mon Sep 17 00:00:00 2001
From: Himanshu Sharma
Date: Fri, 1 Dec 2023 17:40:38 +0530
Subject: [PATCH 13/22] updated log messages and used ConflictRetry
---
.../machinecontroller/machine_safety.go | 5 +-
.../machinecontroller/machine_util.go | 101 +++++++++++++-----
.../machinecontroller/machineclass.go | 16 +--
3 files changed, 87 insertions(+), 35 deletions(-)
diff --git a/pkg/util/provider/machinecontroller/machine_safety.go b/pkg/util/provider/machinecontroller/machine_safety.go
index 23ce2e132..8c02bdcb7 100644
--- a/pkg/util/provider/machinecontroller/machine_safety.go
+++ b/pkg/util/provider/machinecontroller/machine_safety.go
@@ -107,8 +107,9 @@ func (c *controller) reconcileClusterMachineSafetyAPIServer(key string) error {
klog.V(2).Infof("SafetyController: Reinitializing machine health check for machine: %q with backing node: %q and providerID: %q", machine.Name, getNodeName(machine), getProviderID(machine))
}
- // En-queue after 30 seconds, to ensure all machine states are reconciled
- c.enqueueMachineAfter(machine, 30*time.Second)
+ // En-queue after 30 seconds, to ensure all machine phases are reconciled to actual as all are
+ // reseted to `Running` currently
+ c.enqueueMachineAfter(machine, 30*time.Second, "kube-api-servers are up again, so reconcile of machine phase is needed")
}
c.safetyOptions.MachineControllerFrozen = false
diff --git a/pkg/util/provider/machinecontroller/machine_util.go b/pkg/util/provider/machinecontroller/machine_util.go
index 7a8f4300b..a00c15f8e 100644
--- a/pkg/util/provider/machinecontroller/machine_util.go
+++ b/pkg/util/provider/machinecontroller/machine_util.go
@@ -60,7 +60,10 @@ import (
// emptyMap is a dummy emptyMap to compare with
var emptyMap = make(map[string]string)
-var errSuccessfulPhaseUpdate = errors.New("machine creation is successful. Machine Phase/Conditions have been UPDATED")
+var (
+ errSuccessfulALTsync = errors.New("machine ALTs have been reconciled")
+ errSuccessfulPhaseUpdate = errors.New("machine creation is successful. Machine Phase/Conditions have been UPDATED")
+)
const (
maxReplacements = 1
@@ -101,7 +104,6 @@ func (c *controller) ValidateMachineClass(_ context.Context, classSpec *v1alpha1
errMessage := fmt.Sprintf("The machine class %s has no finalizers set. So not reconciling the machine.", machineClass.Name)
err := errors.New(errMessage)
- klog.Warning(errMessage)
return nil, nil, machineutils.ShortRetry, err
}
@@ -297,10 +299,13 @@ func (c *controller) syncMachineNodeTemplates(ctx context.Context, machine *v1al
// Keep retrying until update goes through
klog.Errorf("Updated failed for node object of machine %q. Retrying, error: %q", machine.Name, err)
} else {
- // Return error even when machine object is updated
- err = fmt.Errorf("Machine ALTs have been reconciled")
+ // Return error to continue in next reconcile
+ err = errSuccessfulALTsync
}
+ if apierrors.IsConflict(err) {
+ return machineutils.ConflictRetry, err
+ }
return machineutils.ShortRetry, err
}
@@ -477,7 +482,7 @@ func (c *controller) machineCreateErrorHandler(ctx context.Context, machine *v1a
lastKnownState = createMachineResponse.LastKnownState
}
- c.machineStatusUpdate(
+ updateErr := c.machineStatusUpdate(
ctx,
machine,
v1alpha1.LastOperation{
@@ -493,8 +498,13 @@ func (c *controller) machineCreateErrorHandler(ctx context.Context, machine *v1a
},
lastKnownState,
)
+ if apierrors.IsConflict(updateErr) {
+ return machineutils.ConflictRetry, err
+ } else if updateErr != nil {
+ return retryRequired, updateErr
+ }
- return retryRequired, nil
+ return retryRequired, err
}
func (c *controller) machineStatusUpdate(
@@ -557,7 +567,7 @@ func (c *controller) getCreateFailurePhase(machine *v1alpha1.Machine) v1alpha1.M
if timeOut > 0 {
// Machine creation timeout occured while joining of machine
// Machine set controller would replace this machine with a new one as phase is failed.
- klog.V(2).Infof("Machine %q , providerID %q and backing node %q couldn't join in creation timeout of %s. Changing phase to failed.", machine.Name, getProviderID(machine), getNodeName(machine), timeOutDuration)
+ klog.V(2).Infof("Machine %q , providerID %q and backing node %q couldn't join in creation timeout of %s. Changing phase to Failed.", machine.Name, getProviderID(machine), getNodeName(machine), timeOutDuration)
return v1alpha1.MachineFailed
}
@@ -611,7 +621,7 @@ func (c *controller) reconcileMachineHealth(ctx context.Context, machine *v1alph
if isChanged {
clone.Status.Conditions = node.Status.Conditions
- klog.V(3).Infof("Conditions of Machine %q with providerID %q and backing node %q are changing.\nAdded/Updated Conditions:\n\n%s\nRemoved Conditions:\n\n%s", machine.Name, getProviderID(machine), getNodeName(machine), getFormattedNodeConditions(populatedConditions), getFormattedNodeConditions(removedConditions))
+ klog.V(3).Infof("Conditions of node %q backing machine %q with providerID %q have changed.\nAdded/Updated Conditions:\n\n%s\nRemoved Conditions:\n\n%s\n", getNodeName(machine), machine.Name, getProviderID(machine), getFormattedNodeConditions(populatedConditions), getFormattedNodeConditions(removedConditions))
cloneDirty = true
}
@@ -633,7 +643,7 @@ func (c *controller) reconcileMachineHealth(ctx context.Context, machine *v1alph
description = fmt.Sprintf("Machine %s successfully re-joined the cluster", clone.Name)
lastOperationType = v1alpha1.MachineOperationHealthCheck
}
- klog.V(2).Info(description)
+ klog.V(2).Infof("%s with backing node %q and providerID %q", description, getNodeName(clone), getProviderID(clone))
// Machine is ready and has joined/re-joined the cluster
clone.Status.LastOperation = v1alpha1.LastOperation{
@@ -651,9 +661,9 @@ func (c *controller) reconcileMachineHealth(ctx context.Context, machine *v1alph
}
} else {
if clone.Status.CurrentStatus.Phase == v1alpha1.MachineRunning {
- // If machine is not healthy, and current state is running,
- // change the machinePhase to unknown and activate health check timeout
- description = fmt.Sprintf("Machine %s is unhealthy - changing MachineState to Unknown. Node conditions: %+v", clone.Name, clone.Status.Conditions)
+ // If machine is not healthy, and current phase is Running,
+ // change the machinePhase to Unknown and activate health check timeout
+ description = fmt.Sprintf("Machine %s is unhealthy - changing MachinePhase to Unknown. Node conditions: %+v", clone.Name, clone.Status.Conditions)
klog.Warning(description)
clone.Status.CurrentStatus = v1alpha1.CurrentStatus{
@@ -719,7 +729,7 @@ func (c *controller) reconcileMachineHealth(ctx context.Context, machine *v1alph
} else {
// Timeout occurred due to machine being unhealthy for too long
description = fmt.Sprintf(
- "Machine %s is not healthy since %s minutes. Changing status to failed. Node Conditions: %+v",
+ "Machine %s health checks failing since last %s minutes. Updating machine phase to Failed. Node Conditions: %+v",
machine.Name,
timeOutDuration,
machine.Status.Conditions,
@@ -733,7 +743,8 @@ func (c *controller) reconcileMachineHealth(ctx context.Context, machine *v1alph
} else {
// If timeout has not occurred, re-enqueue the machine
// after a specified sleep time
- c.enqueueMachineAfter(machine, sleepTime)
+ klog.V(4).Infof("Creation/Health Timeout hasn't occured yet , will re-enqueue after %s", time.Duration(sleepTime))
+ c.enqueueMachineAfter(machine, sleepTime, "re-check for creation/health timeout")
}
}
@@ -742,8 +753,11 @@ func (c *controller) reconcileMachineHealth(ctx context.Context, machine *v1alph
if err != nil {
// Keep retrying across reconciles until update goes through
klog.Errorf("Update of Phase/Conditions failed for machine %q. Retrying, error: %q", machine.Name, err)
+ if apierrors.IsConflict(err) {
+ return machineutils.ConflictRetry, err
+ }
} else {
- klog.V(2).Infof("Machine Phase/Conditions have been updated for %q with providerID %q and backing node %q", machine.Name, getProviderID(machine), getNodeName(machine))
+ klog.V(2).Infof("Machine Phase/Conditions have been updated for %q with providerID %q and are in sync with backing node %q", machine.Name, getProviderID(machine), getNodeName(machine))
// Return error to end the reconcile
err = errSuccessfulPhaseUpdate
}
@@ -764,10 +778,13 @@ func GetFormattedNodeConditions(conditions []v1.NodeCondition) string {
func getFormattedNodeConditions(conditions []v1.NodeCondition) string {
var result string
+ if len(conditions) == 0 {
+ return "\n"
+ }
+
for _, c := range conditions {
result = fmt.Sprintf("%sType: %s | Status: %s | Reason: %s | Message: %s\n", result, c.Type, c.Status, c.Reason, c.Message)
}
-
return result
}
@@ -857,7 +874,11 @@ func criticalComponentsNotReadyTaintPresent(node *v1.Node) bool {
}
func isPendingMachineWithCriticalComponentsNotReadyTaint(clone *v1alpha1.Machine, node *v1.Node) bool {
- return clone.Status.CurrentStatus.Phase == v1alpha1.MachinePending && criticalComponentsNotReadyTaintPresent(node)
+ if clone.Status.CurrentStatus.Phase == v1alpha1.MachinePending && criticalComponentsNotReadyTaintPresent(node) {
+ klog.V(3).Infof("Critical component taint %q still present on node %q for machine %q", machineutils.TaintNodeCriticalComponentsNotReady, getNodeName(clone), clone.Name)
+ return true
+ }
+ return false
}
/*
@@ -889,6 +910,10 @@ func (c *controller) setMachineTerminationStatus(ctx context.Context, deleteMach
// Return error even when machine object is updated to ensure reconcilation is restarted
err = fmt.Errorf("Machine deletion in process. Phase set to termination")
}
+
+ if apierrors.IsConflict(err) {
+ return machineutils.ConflictRetry, err
+ }
return machineutils.ShortRetry, err
}
@@ -949,7 +974,7 @@ func (c *controller) getVMStatus(ctx context.Context, getMachineStatusRequest *d
}
}
- c.machineStatusUpdate(
+ updateErr := c.machineStatusUpdate(
ctx,
getMachineStatusRequest.Machine,
v1alpha1.LastOperation{
@@ -964,6 +989,11 @@ func (c *controller) getVMStatus(ctx context.Context, getMachineStatusRequest *d
getMachineStatusRequest.Machine.Status.CurrentStatus,
getMachineStatusRequest.Machine.Status.LastKnownState,
)
+ if apierrors.IsConflict(updateErr) {
+ return machineutils.ConflictRetry, err
+ } else if updateErr != nil {
+ return retry, updateErr
+ }
return retry, err
}
@@ -1077,7 +1107,7 @@ func (c *controller) drainNode(ctx context.Context, deleteMachineRequest *driver
)
}
- // update node with the machine's state prior to termination
+ // update node with the machine's phase prior to termination
if err = c.UpdateNodeTerminationCondition(ctx, machine); err != nil {
if forceDeleteMachine {
klog.Warningf("Failed to update node conditions: %v. However, since it's a force deletion shall continue deletion of VM.", err)
@@ -1148,7 +1178,7 @@ func (c *controller) drainNode(ctx context.Context, deleteMachineRequest *driver
}
}
- c.machineStatusUpdate(
+ updateErr := c.machineStatusUpdate(
ctx,
machine,
v1alpha1.LastOperation{
@@ -1163,6 +1193,11 @@ func (c *controller) drainNode(ctx context.Context, deleteMachineRequest *driver
machine.Status.CurrentStatus,
machine.Status.LastKnownState,
)
+ if apierrors.IsConflict(updateErr) {
+ return machineutils.ConflictRetry, err
+ } else if updateErr != nil {
+ return machineutils.ShortRetry, updateErr
+ }
return machineutils.ShortRetry, err
}
@@ -1212,7 +1247,7 @@ func (c *controller) deleteNodeVolAttachments(ctx context.Context, deleteMachine
}
now := metav1.Now()
klog.V(4).Infof("(deleteVolumeAttachmentsForNode) For node %q, machine %q, set LastOperation.Description: %q", nodeName, machine.Name, description)
- err = c.machineStatusUpdate(
+ updateErr := c.machineStatusUpdate(
ctx,
machine,
v1alpha1.LastOperation{
@@ -1224,6 +1259,12 @@ func (c *controller) deleteNodeVolAttachments(ctx context.Context, deleteMachine
machine.Status.CurrentStatus,
machine.Status.LastKnownState,
)
+ if apierrors.IsConflict(updateErr) {
+ return machineutils.ConflictRetry, err
+ } else if updateErr != nil {
+ return retryPeriod, updateErr
+ }
+
return retryPeriod, err
}
@@ -1275,7 +1316,7 @@ func (c *controller) deleteVM(ctx context.Context, deleteMachineRequest *driver.
lastKnownState = deleteMachineResponse.LastKnownState
}
- c.machineStatusUpdate(
+ updateErr := c.machineStatusUpdate(
ctx,
machine,
v1alpha1.LastOperation{
@@ -1290,6 +1331,11 @@ func (c *controller) deleteVM(ctx context.Context, deleteMachineRequest *driver.
machine.Status.CurrentStatus,
lastKnownState,
)
+ if apierrors.IsConflict(updateErr) {
+ return machineutils.ConflictRetry, err
+ } else if updateErr != nil {
+ return retryRequired, updateErr
+ }
return retryRequired, err
}
@@ -1308,14 +1354,14 @@ func (c *controller) deleteNodeObject(ctx context.Context, machine *v1alpha1.Mac
// Delete node object
err = c.targetCoreClient.CoreV1().Nodes().Delete(ctx, nodeName, metav1.DeleteOptions{})
if err != nil && !apierrors.IsNotFound(err) {
- // If its an error, and anyother error than object not found
+ // If its an error, and any other error than object not found
description = fmt.Sprintf("Deletion of Node Object %q failed due to error: %s. %s", nodeName, err, machineutils.InitiateNodeDeletion)
state = v1alpha1.MachineStateFailed
} else if err == nil {
description = fmt.Sprintf("Deletion of Node Object %q is successful. %s", nodeName, machineutils.InitiateFinalizerRemoval)
state = v1alpha1.MachineStateProcessing
- err = fmt.Errorf("Machine deletion in process. Deletion of node object was succesful")
+ err = fmt.Errorf("Machine deletion in process. Deletion of node object was successful")
} else {
description = fmt.Sprintf("No node object found for %q, continuing deletion flow. %s", nodeName, machineutils.InitiateFinalizerRemoval)
state = v1alpha1.MachineStateProcessing
@@ -1327,7 +1373,7 @@ func (c *controller) deleteNodeObject(ctx context.Context, machine *v1alpha1.Mac
err = fmt.Errorf("Machine deletion in process. No node object found")
}
- c.machineStatusUpdate(
+ updateErr := c.machineStatusUpdate(
ctx,
machine,
v1alpha1.LastOperation{
@@ -1342,6 +1388,11 @@ func (c *controller) deleteNodeObject(ctx context.Context, machine *v1alpha1.Mac
machine.Status.CurrentStatus,
machine.Status.LastKnownState,
)
+ if apierrors.IsConflict(updateErr) {
+ return machineutils.ConflictRetry, err
+ } else if updateErr != nil {
+ return machineutils.ShortRetry, updateErr
+ }
return machineutils.ShortRetry, err
}
diff --git a/pkg/util/provider/machinecontroller/machineclass.go b/pkg/util/provider/machinecontroller/machineclass.go
index 13e1dc031..c90c139aa 100644
--- a/pkg/util/provider/machinecontroller/machineclass.go
+++ b/pkg/util/provider/machinecontroller/machineclass.go
@@ -96,12 +96,12 @@ func (c *controller) reconcileClusterMachineClassKey(key string) error {
err = c.reconcileClusterMachineClass(ctx, class)
if err != nil {
- // Re-enqueue after a 10s window
- c.enqueueMachineClassAfter(class, 10*time.Second)
+ // Re-enqueue after a ShortRetry window
+ c.enqueueMachineClassAfter(class, time.Duration(machineutils.ShortRetry))
} else {
// Re-enqueue periodically to avoid missing of events
// TODO: Get ride of this logic
- c.enqueueMachineClassAfter(class, 10*time.Minute)
+ c.enqueueMachineClassAfter(class, time.Duration(machineutils.LongRetry))
}
return nil
@@ -137,7 +137,7 @@ func (c *controller) reconcileClusterMachineClass(ctx context.Context, class *v1
// Enqueue all machines once finalizer is added to machineClass
// This is to allow processing of such machines
for _, machine := range machines {
- c.enqueueMachine(machine)
+ c.enqueueMachine(machine, "finalizer placed on machineClass")
}
}
@@ -171,13 +171,13 @@ func (c *controller) reconcileClusterMachineClass(ctx context.Context, class *v1
func (c *controller) addMachineClassFinalizers(ctx context.Context, class *v1alpha1.MachineClass) error {
finalizers := sets.NewString(class.Finalizers...)
finalizers.Insert(MCMFinalizerName)
- return c.updateMachineClassFinalizers(ctx, class, finalizers.List(),true)
+ return c.updateMachineClassFinalizers(ctx, class, finalizers.List(), true)
}
func (c *controller) deleteMachineClassFinalizers(ctx context.Context, class *v1alpha1.MachineClass) error {
finalizers := sets.NewString(class.Finalizers...)
finalizers.Delete(MCMFinalizerName)
- return c.updateMachineClassFinalizers(ctx, class, finalizers.List(),false)
+ return c.updateMachineClassFinalizers(ctx, class, finalizers.List(), false)
}
func (c *controller) updateMachineClassFinalizers(ctx context.Context, class *v1alpha1.MachineClass, finalizers []string, addFinalizers bool) error {
@@ -194,9 +194,9 @@ func (c *controller) updateMachineClassFinalizers(ctx context.Context, class *v1
klog.Warning("Updating machineClass failed, retrying. ", class.Name, err)
return err
}
- if addFinalizers{
+ if addFinalizers {
klog.V(3).Infof("Successfully added finalizer on the machineclass %q", class.Name)
- }else{
+ } else {
klog.V(3).Infof("Successfully removed finalizer on the machineclass %q", class.Name)
}
return err
From f606092e25f2f3ae538a3e50e1e88fdf2fdf6148 Mon Sep 17 00:00:00 2001
From: Himanshu Sharma
Date: Fri, 1 Dec 2023 17:47:17 +0530
Subject: [PATCH 14/22] make generate && make check
---
docs/documents/apis.md | 4 ++--
kubernetes/crds/machine.sapcloud.io_machines.yaml | 2 +-
pkg/util/provider/machinecontroller/machine_util.go | 8 --------
3 files changed, 3 insertions(+), 11 deletions(-)
diff --git a/docs/documents/apis.md b/docs/documents/apis.md
index 6ea44ddc0..401062f35 100644
--- a/docs/documents/apis.md
+++ b/docs/documents/apis.md
@@ -1555,7 +1555,7 @@ to be.
CurrentStatus)
-
MachinePhase is a label for the condition of a machines at the current time.
+MachinePhase is a label for the condition of a machine at the current time.
@@ -1992,7 +1992,7 @@ MachineConfiguration
LastOperation)
-
MachineState is a current state of the machine.
+MachineState is a current state of the operation.
diff --git a/kubernetes/crds/machine.sapcloud.io_machines.yaml b/kubernetes/crds/machine.sapcloud.io_machines.yaml
index 1b800af25..744354ac5 100644
--- a/kubernetes/crds/machine.sapcloud.io_machines.yaml
+++ b/kubernetes/crds/machine.sapcloud.io_machines.yaml
@@ -245,7 +245,7 @@ spec:
format: date-time
type: string
phase:
- description: MachinePhase is a label for the condition of a machines
+ description: MachinePhase is a label for the condition of a machine
at the current time.
type: string
timeoutActive:
diff --git a/pkg/util/provider/machinecontroller/machine_util.go b/pkg/util/provider/machinecontroller/machine_util.go
index a00c15f8e..6720859b3 100644
--- a/pkg/util/provider/machinecontroller/machine_util.go
+++ b/pkg/util/provider/machinecontroller/machine_util.go
@@ -768,14 +768,6 @@ func (c *controller) reconcileMachineHealth(ctx context.Context, machine *v1alph
return machineutils.LongRetry, nil
}
-func NodeConditionsHaveChanged(oldConditions []v1.NodeCondition, newConditions []v1.NodeCondition) ([]v1.NodeCondition, []v1.NodeCondition, bool) {
- return nodeConditionsHaveChanged(oldConditions, newConditions)
-}
-
-func GetFormattedNodeConditions(conditions []v1.NodeCondition) string {
- return getFormattedNodeConditions(conditions)
-}
-
func getFormattedNodeConditions(conditions []v1.NodeCondition) string {
var result string
if len(conditions) == 0 {
From 41b8a731a5649b7012fb3c223377b39ead654c62 Mon Sep 17 00:00:00 2001
From: Himanshu Sharma
Date: Fri, 8 Dec 2023 16:33:44 +0530
Subject: [PATCH 15/22] reconcile on removal of critical component taint
---
.../provider/machinecontroller/machine.go | 25 ++++++++++++++-----
1 file changed, 19 insertions(+), 6 deletions(-)
diff --git a/pkg/util/provider/machinecontroller/machine.go b/pkg/util/provider/machinecontroller/machine.go
index 8fbc82710..521f3288d 100644
--- a/pkg/util/provider/machinecontroller/machine.go
+++ b/pkg/util/provider/machinecontroller/machine.go
@@ -21,6 +21,7 @@ import (
"context"
"errors"
"fmt"
+ "reflect"
"strings"
"time"
@@ -248,21 +249,22 @@ func (c *controller) addNodeToMachine(obj interface{}) {
}
func (c *controller) updateNodeToMachine(oldObj, newObj interface{}) {
+ oldNode := oldObj.(*corev1.Node)
node := newObj.(*corev1.Node)
if node == nil {
klog.Errorf("Couldn't convert to node from object")
return
}
+ machine, err := c.getMachineFromNode(node.Name)
+ if err != nil {
+ klog.Errorf("Couldn't fetch machine %s, Error: %s", machine.Name, err)
+ return
+ }
+
// check for the TriggerDeletionByMCM annotation on the node object
// if it is present then mark the machine object for deletion
if value, ok := node.Annotations[machineutils.TriggerDeletionByMCM]; ok && value == "true" {
- machine, err := c.getMachineFromNode(node.Name)
- if err != nil {
- klog.Errorf("Couldn't fetch machine %s, Error: %s", machine.Name, err)
- return
- }
-
if machine.DeletionTimestamp == nil {
klog.Infof("Node %s for machine %s is annotated to trigger deletion by MCM.", node.Name, machine.Name)
if err := c.controlMachineClient.Machines(c.namespace).Delete(context.Background(), machine.Name, metav1.DeleteOptions{}); err != nil {
@@ -273,6 +275,13 @@ func (c *controller) updateNodeToMachine(oldObj, newObj interface{}) {
}
}
+ // to reconcile on removal of critical component taint
+ nodeSpecHasChanged := isNodeSpecChanged(oldNode, node)
+ if nodeSpecHasChanged {
+ c.enqueueMachine(machine, fmt.Sprintf("handling node UPDATE event. spec of backing node %q has changed", getNodeName(machine)))
+ return
+ }
+
c.addNodeToMachine(newObj)
}
@@ -319,6 +328,10 @@ func (c *controller) getMachineFromNode(nodeName string) (*v1alpha1.Machine, err
return machines[0], nil
}
+func isNodeSpecChanged(oldNode *corev1.Node, node *corev1.Node) bool {
+ return !reflect.DeepEqual(oldNode.Spec, node.Spec)
+}
+
/*
SECTION
Machine operations - Create, Delete
From 2099f976ebed35db69e90e9a45ed0637914cff6c Mon Sep 17 00:00:00 2001
From: Himanshu Sharma
Date: Fri, 8 Dec 2023 20:25:45 +0530
Subject: [PATCH 16/22] reconcile machine on specific node taint updates
---
.../provider/machinecontroller/machine.go | 30 ++++++++++++++-----
pkg/util/provider/machineutils/utils.go | 4 +++
2 files changed, 27 insertions(+), 7 deletions(-)
diff --git a/pkg/util/provider/machinecontroller/machine.go b/pkg/util/provider/machinecontroller/machine.go
index 521f3288d..b9cc4f1f9 100644
--- a/pkg/util/provider/machinecontroller/machine.go
+++ b/pkg/util/provider/machinecontroller/machine.go
@@ -21,7 +21,6 @@ import (
"context"
"errors"
"fmt"
- "reflect"
"strings"
"time"
@@ -275,10 +274,9 @@ func (c *controller) updateNodeToMachine(oldObj, newObj interface{}) {
}
}
- // to reconcile on removal of critical component taint
- nodeSpecHasChanged := isNodeSpecChanged(oldNode, node)
- if nodeSpecHasChanged {
- c.enqueueMachine(machine, fmt.Sprintf("handling node UPDATE event. spec of backing node %q has changed", getNodeName(machine)))
+ // to reconcile on addition/removal of essential taints in machine lifecycle, example - critical component taint
+ if areEssentialTaintsUpdated(oldNode, node, machineutils.EssentialTaints) {
+ c.enqueueMachine(machine, fmt.Sprintf("handling node UPDATE event. atleast one of essential taints on backing node %q has changed", getNodeName(machine)))
return
}
@@ -328,8 +326,26 @@ func (c *controller) getMachineFromNode(nodeName string) (*v1alpha1.Machine, err
return machines[0], nil
}
-func isNodeSpecChanged(oldNode *corev1.Node, node *corev1.Node) bool {
- return !reflect.DeepEqual(oldNode.Spec, node.Spec)
+func areEssentialTaintsUpdated(oldNode, node *corev1.Node, taintKeys []string) bool {
+ mapOldNodeTaintKeys := make(map[string]bool)
+ mapNodeTaintKeys := make(map[string]bool)
+
+ for _, t := range oldNode.Spec.Taints {
+ mapOldNodeTaintKeys[t.Key] = true
+ }
+
+ for _, t := range node.Spec.Taints {
+ mapNodeTaintKeys[t.Key] = true
+ }
+
+ for _, tk := range taintKeys {
+ _, oldNodeHasTaint := mapOldNodeTaintKeys[tk]
+ _, newNodeHasTaint := mapNodeTaintKeys[tk]
+ if oldNodeHasTaint != newNodeHasTaint {
+ return true
+ }
+ }
+ return false
}
/*
diff --git a/pkg/util/provider/machineutils/utils.go b/pkg/util/provider/machineutils/utils.go
index 332349206..3d91e1a68 100644
--- a/pkg/util/provider/machineutils/utils.go
+++ b/pkg/util/provider/machineutils/utils.go
@@ -88,3 +88,7 @@ const (
// LongRetry tells the controller to retry after a long duration - 10 minutes
LongRetry RetryPeriod = RetryPeriod(10 * time.Minute)
)
+
+// EssentialTaints are taints on node object which if added/removed, require an immediate reconcile by machine controller
+// TODO: update this when taints for ALT updation and PostCreate operations is introduced.
+var EssentialTaints = []string{TaintNodeCriticalComponentsNotReady}
From 692e894494a74a52fb8223a69111afa9ddf87aed Mon Sep 17 00:00:00 2001
From: Himanshu Sharma
Date: Tue, 12 Dec 2023 16:39:31 +0530
Subject: [PATCH 17/22] adapted unit tests for create errors
---
pkg/util/provider/machinecontroller/machine_test.go | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/pkg/util/provider/machinecontroller/machine_test.go b/pkg/util/provider/machinecontroller/machine_test.go
index 23ab69061..39334ee71 100644
--- a/pkg/util/provider/machinecontroller/machine_test.go
+++ b/pkg/util/provider/machinecontroller/machine_test.go
@@ -885,7 +885,7 @@ var _ = Describe("machine", func() {
ErrorCode: codes.Internal.String(),
},
}, nil, nil, nil, true, metav1.NewTime(metav1.Now().Add(-time.Hour))),
- err: nil,
+ err: status.Error(codes.Internal, "Provider is returning error on create call"),
retry: machineutils.MediumRetry,
},
}),
From 174a4b19841d7b900ffab686cf3e1eafc0b25b3c Mon Sep 17 00:00:00 2001
From: Himanshu Sharma
Date: Wed, 13 Dec 2023 12:12:11 +0530
Subject: [PATCH 18/22] fixed some failing tests
---
pkg/util/provider/machinecontroller/machine_test.go | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/pkg/util/provider/machinecontroller/machine_test.go b/pkg/util/provider/machinecontroller/machine_test.go
index 39334ee71..9bfd4bf81 100644
--- a/pkg/util/provider/machinecontroller/machine_test.go
+++ b/pkg/util/provider/machinecontroller/machine_test.go
@@ -781,7 +781,7 @@ var _ = Describe("machine", func() {
ErrorCode: codes.Internal.String(),
},
}, nil, nil, nil, true, metav1.Now()),
- err: nil,
+ err: status.Error(codes.Internal, "Provider is returning error on create call"),
retry: machineutils.MediumRetry,
},
}),
@@ -833,7 +833,7 @@ var _ = Describe("machine", func() {
ErrorCode: codes.ResourceExhausted.String(),
},
}, nil, nil, nil, true, metav1.Now()),
- err: nil,
+ err: status.Error(codes.ResourceExhausted, "Provider does not have capacity to create VM"),
retry: machineutils.MediumRetry,
},
}),
From ca12dd0abca0efc07b9307c5b58c6a8eb80be3a6 Mon Sep 17 00:00:00 2001
From: Himanshu Sharma <79965161+himanshu-kun@users.noreply.github.com>
Date: Fri, 15 Dec 2023 12:12:29 +0530
Subject: [PATCH 19/22] Apply suggestions from code review
Co-authored-by: Aaron Francis Fernandes <79958509+aaronfern@users.noreply.github.com>
Co-authored-by: Rishabh Patel <66425093+rishabh-11@users.noreply.github.com>
---
pkg/apis/machine/v1alpha1/machine_types.go | 8 ++++----
pkg/util/provider/machinecontroller/machine.go | 6 +++---
pkg/util/provider/machinecontroller/machine_safety.go | 4 ++--
3 files changed, 9 insertions(+), 9 deletions(-)
diff --git a/pkg/apis/machine/v1alpha1/machine_types.go b/pkg/apis/machine/v1alpha1/machine_types.go
index 00f87aaf1..d3541743c 100644
--- a/pkg/apis/machine/v1alpha1/machine_types.go
+++ b/pkg/apis/machine/v1alpha1/machine_types.go
@@ -156,19 +156,19 @@ const (
// MachineUnknown indicates that the node is not ready at the movement
MachineUnknown MachinePhase = "Unknown"
- // MachineFailed means operation timeod out
+ // MachineFailed means operation timed out
MachineFailed MachinePhase = "Failed"
- // MachineCrashLoopBackOff means creation machine is failing. It means that machine object is present but there is no corresponding VM.
+ // MachineCrashLoopBackOff means machine creation is failing. It means that machine object is present but there is no corresponding VM.
MachineCrashLoopBackOff MachinePhase = "CrashLoopBackOff"
)
// MachineState is a current state of the operation.
type MachineState string
-// These are the valid states of the operation performed on the machine.
+// These are the valid states of operations performed on the machine.
const (
- // MachineStateProcessing means operation is not yet complete
+ // MachineStateProcessing means operation is not yet complete
MachineStateProcessing MachineState = "Processing"
// MachineStateFailed means operation failed
diff --git a/pkg/util/provider/machinecontroller/machine.go b/pkg/util/provider/machinecontroller/machine.go
index b9cc4f1f9..7ebc8507a 100644
--- a/pkg/util/provider/machinecontroller/machine.go
+++ b/pkg/util/provider/machinecontroller/machine.go
@@ -55,7 +55,7 @@ func (c *controller) updateMachine(oldObj, newObj interface{}) {
newMachine := newObj.(*v1alpha1.Machine)
if oldMachine == nil || newMachine == nil {
- klog.Errorf("Couldn't convert to machine resource from object")
+ klog.Errorf("couldn't convert to machine resource from object")
return
}
@@ -152,7 +152,7 @@ func (c *controller) reconcileClusterMachine(ctx context.Context, machine *v1alp
// Validate MachineClass
machineClass, secretData, retry, err := c.ValidateMachineClass(ctx, &machine.Spec.Class)
if err != nil {
- klog.Errorf("Cannot reconcile machine %s: %s", machine.Name, err)
+ klog.Errorf("cannot reconcile machine %s: %s", machine.Name, err)
return retry, err
}
@@ -257,7 +257,7 @@ func (c *controller) updateNodeToMachine(oldObj, newObj interface{}) {
machine, err := c.getMachineFromNode(node.Name)
if err != nil {
- klog.Errorf("Couldn't fetch machine %s, Error: %s", machine.Name, err)
+ klog.Errorf("Unable to handle update event for node %s, couldn't fetch machine %s, Error: %s", machine.Name, err)
return
}
diff --git a/pkg/util/provider/machinecontroller/machine_safety.go b/pkg/util/provider/machinecontroller/machine_safety.go
index 8c02bdcb7..7da65e9b3 100644
--- a/pkg/util/provider/machinecontroller/machine_safety.go
+++ b/pkg/util/provider/machinecontroller/machine_safety.go
@@ -107,8 +107,8 @@ func (c *controller) reconcileClusterMachineSafetyAPIServer(key string) error {
klog.V(2).Infof("SafetyController: Reinitializing machine health check for machine: %q with backing node: %q and providerID: %q", machine.Name, getNodeName(machine), getProviderID(machine))
}
- // En-queue after 30 seconds, to ensure all machine phases are reconciled to actual as all are
- // reseted to `Running` currently
+ // En-queue after 30 seconds, to ensure all machine phases are reconciled to their actual value
+ // as they are currently reset to `Running`
c.enqueueMachineAfter(machine, 30*time.Second, "kube-api-servers are up again, so reconcile of machine phase is needed")
}
From b73c1476307bcf8d7e09e38aee513ad83d6c814c Mon Sep 17 00:00:00 2001
From: Himanshu Sharma
Date: Fri, 15 Dec 2023 12:13:57 +0530
Subject: [PATCH 20/22] addresed Rishab's suggestion part 1
---
pkg/util/provider/machinecontroller/machine.go | 4 ++--
pkg/util/provider/machinecontroller/machine_util_test.go | 6 +++---
pkg/util/provider/machinecontroller/machineclass.go | 8 ++++----
3 files changed, 9 insertions(+), 9 deletions(-)
diff --git a/pkg/util/provider/machinecontroller/machine.go b/pkg/util/provider/machinecontroller/machine.go
index 7ebc8507a..dcd223d8c 100644
--- a/pkg/util/provider/machinecontroller/machine.go
+++ b/pkg/util/provider/machinecontroller/machine.go
@@ -275,7 +275,7 @@ func (c *controller) updateNodeToMachine(oldObj, newObj interface{}) {
}
// to reconcile on addition/removal of essential taints in machine lifecycle, example - critical component taint
- if areEssentialTaintsUpdated(oldNode, node, machineutils.EssentialTaints) {
+ if addedOrRemovedEssentialTaints(oldNode, node, machineutils.EssentialTaints) {
c.enqueueMachine(machine, fmt.Sprintf("handling node UPDATE event. atleast one of essential taints on backing node %q has changed", getNodeName(machine)))
return
}
@@ -326,7 +326,7 @@ func (c *controller) getMachineFromNode(nodeName string) (*v1alpha1.Machine, err
return machines[0], nil
}
-func areEssentialTaintsUpdated(oldNode, node *corev1.Node, taintKeys []string) bool {
+func addedOrRemovedEssentialTaints(oldNode, node *corev1.Node, taintKeys []string) bool {
mapOldNodeTaintKeys := make(map[string]bool)
mapNodeTaintKeys := make(map[string]bool)
diff --git a/pkg/util/provider/machinecontroller/machine_util_test.go b/pkg/util/provider/machinecontroller/machine_util_test.go
index 8626c0f88..2e83bfed2 100644
--- a/pkg/util/provider/machinecontroller/machine_util_test.go
+++ b/pkg/util/provider/machinecontroller/machine_util_test.go
@@ -2363,7 +2363,7 @@ var _ = Describe("machine_util", func() {
},
expect: expect{
retryPeriod: machineutils.ShortRetry,
- err: errors.New("machine creation is successful. Machine Phase/Conditions have been UPDATED"),
+ err: errSuccessfulPhaseUpdate,
expectedPhase: v1alpha1.MachineRunning,
},
}),
@@ -2390,7 +2390,7 @@ var _ = Describe("machine_util", func() {
},
expect: expect{
retryPeriod: machineutils.ShortRetry,
- err: errors.New("machine creation is successful. Machine Phase/Conditions have been UPDATED"),
+ err: errSuccessfulPhaseUpdate,
expectedPhase: v1alpha1.MachineRunning,
},
}),
@@ -2424,7 +2424,7 @@ var _ = Describe("machine_util", func() {
},
expect: expect{
retryPeriod: machineutils.ShortRetry,
- err: errors.New("machine creation is successful. Machine Phase/Conditions have been UPDATED"),
+ err: errSuccessfulPhaseUpdate,
expectedPhase: v1alpha1.MachineRunning,
},
}),
diff --git a/pkg/util/provider/machinecontroller/machineclass.go b/pkg/util/provider/machinecontroller/machineclass.go
index c90c139aa..91a1ab460 100644
--- a/pkg/util/provider/machinecontroller/machineclass.go
+++ b/pkg/util/provider/machinecontroller/machineclass.go
@@ -129,7 +129,7 @@ func (c *controller) reconcileClusterMachineClass(ctx context.Context, class *v1
if finalizers := sets.NewString(class.Finalizers...); !finalizers.Has(MCMFinalizerName) {
// Add machineClassFinalizer as if doesn't exist
- err = c.addMachineClassFinalizers(ctx, class)
+ err = c.addMCMFinalizerToMachineClass(ctx, class)
if err != nil {
return err
}
@@ -157,7 +157,7 @@ func (c *controller) reconcileClusterMachineClass(ctx context.Context, class *v1
if finalizers := sets.NewString(class.Finalizers...); finalizers.Has(MCMFinalizerName) {
// Delete finalizer if exists on machineClass
- return c.deleteMachineClassFinalizers(ctx, class)
+ return c.deleteMCMFinalizerFromMachineClass(ctx, class)
}
return nil
@@ -168,13 +168,13 @@ func (c *controller) reconcileClusterMachineClass(ctx context.Context, class *v1
Manipulate Finalizers
*/
-func (c *controller) addMachineClassFinalizers(ctx context.Context, class *v1alpha1.MachineClass) error {
+func (c *controller) addMCMFinalizerToMachineClass(ctx context.Context, class *v1alpha1.MachineClass) error {
finalizers := sets.NewString(class.Finalizers...)
finalizers.Insert(MCMFinalizerName)
return c.updateMachineClassFinalizers(ctx, class, finalizers.List(), true)
}
-func (c *controller) deleteMachineClassFinalizers(ctx context.Context, class *v1alpha1.MachineClass) error {
+func (c *controller) deleteMCMFinalizerFromMachineClass(ctx context.Context, class *v1alpha1.MachineClass) error {
finalizers := sets.NewString(class.Finalizers...)
finalizers.Delete(MCMFinalizerName)
return c.updateMachineClassFinalizers(ctx, class, finalizers.List(), false)
From 9d41bf78e2a7a3f19d74888366730ac602f733b7 Mon Sep 17 00:00:00 2001
From: Himanshu Sharma
Date: Fri, 15 Dec 2023 13:10:26 +0530
Subject: [PATCH 21/22] addressed Rishab's review comments part 2
---
.../provider/machinecontroller/machine.go | 28 ++++----
.../machinecontroller/machine_safety.go | 4 +-
.../machinecontroller/machine_util.go | 64 +++++++++----------
3 files changed, 45 insertions(+), 51 deletions(-)
diff --git a/pkg/util/provider/machinecontroller/machine.go b/pkg/util/provider/machinecontroller/machine.go
index dcd223d8c..c85d658c5 100644
--- a/pkg/util/provider/machinecontroller/machine.go
+++ b/pkg/util/provider/machinecontroller/machine.go
@@ -59,7 +59,7 @@ func (c *controller) updateMachine(oldObj, newObj interface{}) {
return
}
- if oldMachine.ResourceVersion != newMachine.ResourceVersion && oldMachine.Generation == newMachine.Generation {
+ if oldMachine.Generation == newMachine.Generation {
klog.V(3).Infof("Skipping non-spec updates for machine %s", oldMachine.Name)
return
}
@@ -342,6 +342,7 @@ func addedOrRemovedEssentialTaints(oldNode, node *corev1.Node, taintKeys []strin
_, oldNodeHasTaint := mapOldNodeTaintKeys[tk]
_, newNodeHasTaint := mapNodeTaintKeys[tk]
if oldNodeHasTaint != newNodeHasTaint {
+ klog.V(2).Infof("Taint with key %q has been added/removed from the node %q", tk, node.Name)
return true
}
}
@@ -443,7 +444,7 @@ func (c *controller) triggerCreationFlow(ctx context.Context, createMachineReque
}
// machine obj marked Failed for double security
- updateErr := c.machineStatusUpdate(
+ updateRetryRequired, updateErr := c.machineStatusUpdate(
ctx,
machine,
v1alpha1.LastOperation{
@@ -458,10 +459,9 @@ func (c *controller) triggerCreationFlow(ctx context.Context, createMachineReque
},
machine.Status.LastKnownState,
)
- if apierrors.IsConflict(updateErr) {
- return machineutils.ConflictRetry, err
- } else if updateErr != nil {
- return machineutils.ShortRetry, updateErr
+
+ if updateErr != nil {
+ return updateRetryRequired, updateErr
}
klog.V(2).Infof("Machine %q marked Failed as VM was referring to a stale node object", machine.Name)
@@ -475,7 +475,7 @@ func (c *controller) triggerCreationFlow(ctx context.Context, createMachineReque
case codes.Unknown, codes.DeadlineExceeded, codes.Aborted, codes.Unavailable:
// GetMachineStatus() returned with one of the above error codes.
// Retry operation.
- updateErr := c.machineStatusUpdate(
+ updateRetryRequired, updateErr := c.machineStatusUpdate(
ctx,
machine,
v1alpha1.LastOperation{
@@ -490,16 +490,14 @@ func (c *controller) triggerCreationFlow(ctx context.Context, createMachineReque
},
machine.Status.LastKnownState,
)
- if apierrors.IsConflict(updateErr) {
- return machineutils.ConflictRetry, err
- } else if updateErr != nil {
- return machineutils.ShortRetry, updateErr
+ if updateErr != nil {
+ return updateRetryRequired, updateErr
}
return machineutils.ShortRetry, err
default:
- updateErr := c.machineStatusUpdate(
+ updateRetryRequired, updateErr := c.machineStatusUpdate(
ctx,
machine,
v1alpha1.LastOperation{
@@ -514,10 +512,8 @@ func (c *controller) triggerCreationFlow(ctx context.Context, createMachineReque
},
machine.Status.LastKnownState,
)
- if apierrors.IsConflict(updateErr) {
- return machineutils.ConflictRetry, err
- } else if updateErr != nil {
- return machineutils.MediumRetry, updateErr
+ if updateErr != nil {
+ return updateRetryRequired, updateErr
}
return machineutils.MediumRetry, err
diff --git a/pkg/util/provider/machinecontroller/machine_safety.go b/pkg/util/provider/machinecontroller/machine_safety.go
index 7da65e9b3..44b99d286 100644
--- a/pkg/util/provider/machinecontroller/machine_safety.go
+++ b/pkg/util/provider/machinecontroller/machine_safety.go
@@ -107,8 +107,8 @@ func (c *controller) reconcileClusterMachineSafetyAPIServer(key string) error {
klog.V(2).Infof("SafetyController: Reinitializing machine health check for machine: %q with backing node: %q and providerID: %q", machine.Name, getNodeName(machine), getProviderID(machine))
}
- // En-queue after 30 seconds, to ensure all machine phases are reconciled to their actual value
- // as they are currently reset to `Running`
+ // En-queue after 30 seconds, to ensure all machine phases are reconciled to their actual value
+ // as they are currently reset to `Running`
c.enqueueMachineAfter(machine, 30*time.Second, "kube-api-servers are up again, so reconcile of machine phase is needed")
}
diff --git a/pkg/util/provider/machinecontroller/machine_util.go b/pkg/util/provider/machinecontroller/machine_util.go
index 6720859b3..c49c35ec4 100644
--- a/pkg/util/provider/machinecontroller/machine_util.go
+++ b/pkg/util/provider/machinecontroller/machine_util.go
@@ -482,7 +482,7 @@ func (c *controller) machineCreateErrorHandler(ctx context.Context, machine *v1a
lastKnownState = createMachineResponse.LastKnownState
}
- updateErr := c.machineStatusUpdate(
+ updateRetryRequired, updateErr := c.machineStatusUpdate(
ctx,
machine,
v1alpha1.LastOperation{
@@ -498,10 +498,9 @@ func (c *controller) machineCreateErrorHandler(ctx context.Context, machine *v1a
},
lastKnownState,
)
- if apierrors.IsConflict(updateErr) {
- return machineutils.ConflictRetry, err
- } else if updateErr != nil {
- return retryRequired, updateErr
+
+ if updateErr != nil {
+ return updateRetryRequired, updateErr
}
return retryRequired, err
@@ -513,7 +512,7 @@ func (c *controller) machineStatusUpdate(
lastOperation v1alpha1.LastOperation,
currentStatus v1alpha1.CurrentStatus,
lastKnownState string,
-) error {
+) (machineutils.RetryPeriod, error) {
clone := machine.DeepCopy()
clone.Status.LastOperation = lastOperation
clone.Status.CurrentStatus = currentStatus
@@ -521,7 +520,7 @@ func (c *controller) machineStatusUpdate(
if isMachineStatusSimilar(clone.Status, machine.Status) {
klog.V(3).Infof("Not updating the status of the machine object %q, as the content is similar", clone.Name)
- return nil
+ return machineutils.ShortRetry, nil
}
_, err := c.controlMachineClient.Machines(clone.Namespace).UpdateStatus(ctx, clone, metav1.UpdateOptions{})
@@ -532,7 +531,11 @@ func (c *controller) machineStatusUpdate(
klog.V(2).Infof("Machine/status UPDATE for %q", machine.Name)
}
- return err
+ if apierrors.IsConflict(err) {
+ return machineutils.ConflictRetry, err
+ }
+
+ return machineutils.ShortRetry, err
}
// isMachineStatusSimilar checks if the status of 2 machines is similar or not.
@@ -966,7 +969,7 @@ func (c *controller) getVMStatus(ctx context.Context, getMachineStatusRequest *d
}
}
- updateErr := c.machineStatusUpdate(
+ updateRetryRequired, updateErr := c.machineStatusUpdate(
ctx,
getMachineStatusRequest.Machine,
v1alpha1.LastOperation{
@@ -981,10 +984,9 @@ func (c *controller) getVMStatus(ctx context.Context, getMachineStatusRequest *d
getMachineStatusRequest.Machine.Status.CurrentStatus,
getMachineStatusRequest.Machine.Status.LastKnownState,
)
- if apierrors.IsConflict(updateErr) {
- return machineutils.ConflictRetry, err
- } else if updateErr != nil {
- return retry, updateErr
+
+ if updateErr != nil {
+ return updateRetryRequired, updateErr
}
return retry, err
@@ -1170,7 +1172,7 @@ func (c *controller) drainNode(ctx context.Context, deleteMachineRequest *driver
}
}
- updateErr := c.machineStatusUpdate(
+ updateRetryRequired, updateErr := c.machineStatusUpdate(
ctx,
machine,
v1alpha1.LastOperation{
@@ -1185,10 +1187,9 @@ func (c *controller) drainNode(ctx context.Context, deleteMachineRequest *driver
machine.Status.CurrentStatus,
machine.Status.LastKnownState,
)
- if apierrors.IsConflict(updateErr) {
- return machineutils.ConflictRetry, err
- } else if updateErr != nil {
- return machineutils.ShortRetry, updateErr
+
+ if updateErr != nil {
+ return updateRetryRequired, updateErr
}
return machineutils.ShortRetry, err
@@ -1239,7 +1240,7 @@ func (c *controller) deleteNodeVolAttachments(ctx context.Context, deleteMachine
}
now := metav1.Now()
klog.V(4).Infof("(deleteVolumeAttachmentsForNode) For node %q, machine %q, set LastOperation.Description: %q", nodeName, machine.Name, description)
- updateErr := c.machineStatusUpdate(
+ updateRetryRequired, updateErr := c.machineStatusUpdate(
ctx,
machine,
v1alpha1.LastOperation{
@@ -1251,10 +1252,9 @@ func (c *controller) deleteNodeVolAttachments(ctx context.Context, deleteMachine
machine.Status.CurrentStatus,
machine.Status.LastKnownState,
)
- if apierrors.IsConflict(updateErr) {
- return machineutils.ConflictRetry, err
- } else if updateErr != nil {
- return retryPeriod, updateErr
+
+ if updateErr != nil {
+ return updateRetryRequired, updateErr
}
return retryPeriod, err
@@ -1308,7 +1308,7 @@ func (c *controller) deleteVM(ctx context.Context, deleteMachineRequest *driver.
lastKnownState = deleteMachineResponse.LastKnownState
}
- updateErr := c.machineStatusUpdate(
+ updateRetryRequired, updateErr := c.machineStatusUpdate(
ctx,
machine,
v1alpha1.LastOperation{
@@ -1323,10 +1323,9 @@ func (c *controller) deleteVM(ctx context.Context, deleteMachineRequest *driver.
machine.Status.CurrentStatus,
lastKnownState,
)
- if apierrors.IsConflict(updateErr) {
- return machineutils.ConflictRetry, err
- } else if updateErr != nil {
- return retryRequired, updateErr
+
+ if updateErr != nil {
+ return updateRetryRequired, updateErr
}
return retryRequired, err
@@ -1365,7 +1364,7 @@ func (c *controller) deleteNodeObject(ctx context.Context, machine *v1alpha1.Mac
err = fmt.Errorf("Machine deletion in process. No node object found")
}
- updateErr := c.machineStatusUpdate(
+ updateRetryRequired, updateErr := c.machineStatusUpdate(
ctx,
machine,
v1alpha1.LastOperation{
@@ -1380,10 +1379,9 @@ func (c *controller) deleteNodeObject(ctx context.Context, machine *v1alpha1.Mac
machine.Status.CurrentStatus,
machine.Status.LastKnownState,
)
- if apierrors.IsConflict(updateErr) {
- return machineutils.ConflictRetry, err
- } else if updateErr != nil {
- return machineutils.ShortRetry, updateErr
+
+ if updateErr != nil {
+ return updateRetryRequired, updateErr
}
return machineutils.ShortRetry, err
From c93ce96d10b5a89363be09fdb4a1439b663f0011 Mon Sep 17 00:00:00 2001
From: Himanshu Sharma
Date: Fri, 15 Dec 2023 14:03:35 +0530
Subject: [PATCH 22/22] renamed updateRetryRequired to updateRetryPeriod
---
.../provider/machinecontroller/machine.go | 12 +++++-----
.../machinecontroller/machine_util.go | 24 +++++++++----------
2 files changed, 18 insertions(+), 18 deletions(-)
diff --git a/pkg/util/provider/machinecontroller/machine.go b/pkg/util/provider/machinecontroller/machine.go
index c85d658c5..e28c9e9b4 100644
--- a/pkg/util/provider/machinecontroller/machine.go
+++ b/pkg/util/provider/machinecontroller/machine.go
@@ -444,7 +444,7 @@ func (c *controller) triggerCreationFlow(ctx context.Context, createMachineReque
}
// machine obj marked Failed for double security
- updateRetryRequired, updateErr := c.machineStatusUpdate(
+ updateRetryPeriod, updateErr := c.machineStatusUpdate(
ctx,
machine,
v1alpha1.LastOperation{
@@ -461,7 +461,7 @@ func (c *controller) triggerCreationFlow(ctx context.Context, createMachineReque
)
if updateErr != nil {
- return updateRetryRequired, updateErr
+ return updateRetryPeriod, updateErr
}
klog.V(2).Infof("Machine %q marked Failed as VM was referring to a stale node object", machine.Name)
@@ -475,7 +475,7 @@ func (c *controller) triggerCreationFlow(ctx context.Context, createMachineReque
case codes.Unknown, codes.DeadlineExceeded, codes.Aborted, codes.Unavailable:
// GetMachineStatus() returned with one of the above error codes.
// Retry operation.
- updateRetryRequired, updateErr := c.machineStatusUpdate(
+ updateRetryPeriod, updateErr := c.machineStatusUpdate(
ctx,
machine,
v1alpha1.LastOperation{
@@ -491,13 +491,13 @@ func (c *controller) triggerCreationFlow(ctx context.Context, createMachineReque
machine.Status.LastKnownState,
)
if updateErr != nil {
- return updateRetryRequired, updateErr
+ return updateRetryPeriod, updateErr
}
return machineutils.ShortRetry, err
default:
- updateRetryRequired, updateErr := c.machineStatusUpdate(
+ updateRetryPeriod, updateErr := c.machineStatusUpdate(
ctx,
machine,
v1alpha1.LastOperation{
@@ -513,7 +513,7 @@ func (c *controller) triggerCreationFlow(ctx context.Context, createMachineReque
machine.Status.LastKnownState,
)
if updateErr != nil {
- return updateRetryRequired, updateErr
+ return updateRetryPeriod, updateErr
}
return machineutils.MediumRetry, err
diff --git a/pkg/util/provider/machinecontroller/machine_util.go b/pkg/util/provider/machinecontroller/machine_util.go
index c49c35ec4..67919c5c8 100644
--- a/pkg/util/provider/machinecontroller/machine_util.go
+++ b/pkg/util/provider/machinecontroller/machine_util.go
@@ -482,7 +482,7 @@ func (c *controller) machineCreateErrorHandler(ctx context.Context, machine *v1a
lastKnownState = createMachineResponse.LastKnownState
}
- updateRetryRequired, updateErr := c.machineStatusUpdate(
+ updateRetryPeriod, updateErr := c.machineStatusUpdate(
ctx,
machine,
v1alpha1.LastOperation{
@@ -500,7 +500,7 @@ func (c *controller) machineCreateErrorHandler(ctx context.Context, machine *v1a
)
if updateErr != nil {
- return updateRetryRequired, updateErr
+ return updateRetryPeriod, updateErr
}
return retryRequired, err
@@ -969,7 +969,7 @@ func (c *controller) getVMStatus(ctx context.Context, getMachineStatusRequest *d
}
}
- updateRetryRequired, updateErr := c.machineStatusUpdate(
+ updateRetryPeriod, updateErr := c.machineStatusUpdate(
ctx,
getMachineStatusRequest.Machine,
v1alpha1.LastOperation{
@@ -986,7 +986,7 @@ func (c *controller) getVMStatus(ctx context.Context, getMachineStatusRequest *d
)
if updateErr != nil {
- return updateRetryRequired, updateErr
+ return updateRetryPeriod, updateErr
}
return retry, err
@@ -1172,7 +1172,7 @@ func (c *controller) drainNode(ctx context.Context, deleteMachineRequest *driver
}
}
- updateRetryRequired, updateErr := c.machineStatusUpdate(
+ updateRetryPeriod, updateErr := c.machineStatusUpdate(
ctx,
machine,
v1alpha1.LastOperation{
@@ -1189,7 +1189,7 @@ func (c *controller) drainNode(ctx context.Context, deleteMachineRequest *driver
)
if updateErr != nil {
- return updateRetryRequired, updateErr
+ return updateRetryPeriod, updateErr
}
return machineutils.ShortRetry, err
@@ -1240,7 +1240,7 @@ func (c *controller) deleteNodeVolAttachments(ctx context.Context, deleteMachine
}
now := metav1.Now()
klog.V(4).Infof("(deleteVolumeAttachmentsForNode) For node %q, machine %q, set LastOperation.Description: %q", nodeName, machine.Name, description)
- updateRetryRequired, updateErr := c.machineStatusUpdate(
+ updateRetryPeriod, updateErr := c.machineStatusUpdate(
ctx,
machine,
v1alpha1.LastOperation{
@@ -1254,7 +1254,7 @@ func (c *controller) deleteNodeVolAttachments(ctx context.Context, deleteMachine
)
if updateErr != nil {
- return updateRetryRequired, updateErr
+ return updateRetryPeriod, updateErr
}
return retryPeriod, err
@@ -1308,7 +1308,7 @@ func (c *controller) deleteVM(ctx context.Context, deleteMachineRequest *driver.
lastKnownState = deleteMachineResponse.LastKnownState
}
- updateRetryRequired, updateErr := c.machineStatusUpdate(
+ updateRetryPeriod, updateErr := c.machineStatusUpdate(
ctx,
machine,
v1alpha1.LastOperation{
@@ -1325,7 +1325,7 @@ func (c *controller) deleteVM(ctx context.Context, deleteMachineRequest *driver.
)
if updateErr != nil {
- return updateRetryRequired, updateErr
+ return updateRetryPeriod, updateErr
}
return retryRequired, err
@@ -1364,7 +1364,7 @@ func (c *controller) deleteNodeObject(ctx context.Context, machine *v1alpha1.Mac
err = fmt.Errorf("Machine deletion in process. No node object found")
}
- updateRetryRequired, updateErr := c.machineStatusUpdate(
+ updateRetryPeriod, updateErr := c.machineStatusUpdate(
ctx,
machine,
v1alpha1.LastOperation{
@@ -1381,7 +1381,7 @@ func (c *controller) deleteNodeObject(ctx context.Context, machine *v1alpha1.Mac
)
if updateErr != nil {
- return updateRetryRequired, updateErr
+ return updateRetryPeriod, updateErr
}
return machineutils.ShortRetry, err