Skip to content

Commit

Permalink
Fix statefulset PreSync to work for certain cases of unhealthy etcd…
Browse files Browse the repository at this point in the history
… clusters upon druid upgrade (#823)

Co-authored-by: madhav bhargava <[email protected]>
  • Loading branch information
shreyas-s-rao and unmarshall authored Aug 2, 2024
1 parent 4e9971a commit fb44e67
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 26 deletions.
15 changes: 12 additions & 3 deletions api/v1alpha1/helper.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,11 +44,20 @@ func GetOrdinalPodName(etcdObjMeta metav1.ObjectMeta, ordinal int) string {
return fmt.Sprintf("%s-%d", etcdObjMeta.Name, ordinal)
}

// GetAllPodNames returns the names of all pods for the Etcd.
func GetAllPodNames(etcdObjMeta metav1.ObjectMeta, replicas int32) []string {
podNames := make([]string, replicas)
for i := range int(replicas) {
podNames[i] = GetOrdinalPodName(etcdObjMeta, i)
}
return podNames
}

// GetMemberLeaseNames returns the name of member leases for the Etcd.
func GetMemberLeaseNames(etcdObjMeta metav1.ObjectMeta, replicas int32) []string {
leaseNames := make([]string, 0, replicas)
for i := 0; i < int(replicas); i++ {
leaseNames = append(leaseNames, fmt.Sprintf("%s-%d", etcdObjMeta.Name, i))
leaseNames := make([]string, replicas)
for i := range int(replicas) {
leaseNames[i] = fmt.Sprintf("%s-%d", etcdObjMeta.Name, i)
}
return leaseNames
}
Expand Down
44 changes: 21 additions & 23 deletions internal/component/statefulset/statefulset.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import (
"github.com/gardener/gardener/pkg/controllerutils"
"github.com/gardener/gardener/pkg/utils/imagevector"
appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
Expand Down Expand Up @@ -97,23 +98,23 @@ func (r _resource) PreSync(ctx component.OperatorContext, etcd *druidv1alpha1.Et
fmt.Sprintf("Error checking and patching StatefulSet pods with new labels for etcd: %v", druidv1alpha1.GetNamespaceName(etcd.ObjectMeta)))
}

// check if pods have been updated with new labels and become ready.
podsUpdatedAndReady, err := r.areStatefulSetPodsUpdatedAndReady(ctx, etcd, sts)
// check if pods have been updated with new labels.
podsHaveDesiredLabels, err := r.doStatefulSetPodsHaveDesiredLabels(ctx, etcd, sts)
if err != nil {
return druiderr.WrapError(err,
ErrPreSyncStatefulSet,
"PreSync",
fmt.Sprintf("Error checking if StatefulSet pods are updated for etcd: %v", druidv1alpha1.GetNamespaceName(etcd.ObjectMeta)))
}
if !podsUpdatedAndReady {
errMessage := fmt.Sprintf("StatefulSet pods are not yet updated with new pod labels and ready, for StatefulSet: %v for etcd: %v", getObjectKey(sts.ObjectMeta), druidv1alpha1.GetNamespaceName(etcd.ObjectMeta))
if !podsHaveDesiredLabels {
errMessage := fmt.Sprintf("StatefulSet pods are not yet updated with new labels, for StatefulSet: %v for etcd: %v", getObjectKey(sts.ObjectMeta), druidv1alpha1.GetNamespaceName(etcd.ObjectMeta))
return druiderr.WrapError(fmt.Errorf(errMessage),
druiderr.ErrRequeueAfter,
"PreSync",
errMessage,
)
} else {
ctx.Logger.Info("StatefulSet pods are updated with new pod labels and ready", "objectKey", getObjectKey(etcd.ObjectMeta))
ctx.Logger.Info("StatefulSet pods are updated with new labels", "objectKey", getObjectKey(etcd.ObjectMeta))
}

// if sts label selector needs to be changed, then delete the statefulset, but keeping the pods intact.
Expand Down Expand Up @@ -282,25 +283,22 @@ func getDesiredPodTemplateLabels(etcd *druidv1alpha1.Etcd) map[string]string {
return utils.MergeMaps(etcd.Spec.Labels, getStatefulSetLabels(etcd.Name))
}

func (r _resource) areStatefulSetPodsUpdatedAndReady(ctx component.OperatorContext, etcd *druidv1alpha1.Etcd, sts *appsv1.StatefulSet) (bool, error) {
if err := r.client.Get(ctx, getObjectKey(sts.ObjectMeta), sts); err != nil {
return false, err
func (r _resource) doStatefulSetPodsHaveDesiredLabels(ctx component.OperatorContext, etcd *druidv1alpha1.Etcd, sts *appsv1.StatefulSet) (bool, error) {
// sts.spec.replicas is more accurate than Etcd.spec.replicas, specifically when
// Etcd.spec.replicas is updated but not yet reflected in the etcd cluster
if sts.Spec.Replicas == nil {
return false, fmt.Errorf("statefulset %s does not have a replicas count defined", sts.Name)
}
desiredPodTemplateLabels := getDesiredPodTemplateLabels(etcd)
if !utils.ContainsAllDesiredLabels(sts.Spec.Template.Labels, desiredPodTemplateLabels) {
return false, nil
}
if sts.Status.ObservedGeneration < sts.Generation {
return false, nil
}
if sts.Status.UpdateRevision != sts.Status.CurrentRevision {
return false, nil
}
if sts.Spec.Replicas != nil && sts.Status.UpdatedReplicas != *sts.Spec.Replicas {
return false, nil
}
if sts.Spec.Replicas != nil && sts.Status.ReadyReplicas < *sts.Spec.Replicas {
return false, nil
podNames := druidv1alpha1.GetAllPodNames(etcd.ObjectMeta, *sts.Spec.Replicas)
desiredLabels := getDesiredPodTemplateLabels(etcd)
for _, podName := range podNames {
pod := &corev1.Pod{}
if err := r.client.Get(ctx, client.ObjectKey{Name: podName, Namespace: etcd.Namespace}, pod); err != nil {
return false, err
}
if !utils.ContainsAllDesiredLabels(pod.Labels, desiredLabels) {
return false, nil
}
}
return true, nil
}
Expand Down

0 comments on commit fb44e67

Please sign in to comment.