Skip to content

Commit

Permalink
Add new labels to sts pods, for backward compatibility with v0.23.0, …
Browse files Browse the repository at this point in the history
…which includes #777 (#804)

* Add new labels to sts pods, for backward compatibility with v0.23.0 which includes #777

* Fix unit tests for status.members checker

* Address review comment by @anveshreddy18; check exact match for sts label selector for sts recreation

* Statefulset PreDeploy now only checks pod labels, and not whether they are updated or ready, similar to #823

* Add comments for `PreDeploy` methods
  • Loading branch information
shreyas-s-rao authored Jul 8, 2024
1 parent aa7046d commit 798a9b1
Show file tree
Hide file tree
Showing 12 changed files with 359 additions and 111 deletions.
18 changes: 18 additions & 0 deletions api/v1alpha1/types_etcd.go
Original file line number Diff line number Diff line change
Expand Up @@ -447,6 +447,15 @@ func (e *Etcd) GetCompactionJobName() string {
return fmt.Sprintf("%s-compactor", e.Name)
}

// GetAllPodNames returns the names of all pods for the Etcd.
func (e *Etcd) GetAllPodNames(replicas int32) []string {
podNames := make([]string, 0, replicas)
for i := 0; i < int(replicas); i++ {
podNames = append(podNames, e.GetOrdinalPodName(i))
}
return podNames
}

// GetOrdinalPodName returns the Etcd pod name based on the ordinal.
func (e *Etcd) GetOrdinalPodName(ordinal int) string {
return fmt.Sprintf("%s-%d", e.Name, ordinal)
Expand All @@ -462,6 +471,15 @@ func (e *Etcd) GetFullSnapshotLeaseName() string {
return fmt.Sprintf("%s-full-snap", e.Name)
}

// GetMemberLeaseNames returns the name of member leases for the Etcd.
func (e *Etcd) GetMemberLeaseNames() []string {
leaseNames := make([]string, 0, e.Spec.Replicas)
for i := 0; i < int(e.Spec.Replicas); i++ {
leaseNames = append(leaseNames, fmt.Sprintf("%s-%d", e.Name, i))
}
return leaseNames
}

// GetDefaultLabels returns the default labels for etcd.
func (e *Etcd) GetDefaultLabels() map[string]string {
return map[string]string{
Expand Down
30 changes: 24 additions & 6 deletions controllers/etcd/reconciler.go
Original file line number Diff line number Diff line change
Expand Up @@ -188,13 +188,12 @@ func (r *Reconciler) reconcile(ctx context.Context, etcd *druidv1alpha1.Etcd) (c
}, err
}

if err = r.removeOperationAnnotation(ctx, logger, etcd); err != nil {
if apierrors.IsNotFound(err) {
return ctrl.Result{}, nil
}
preReconcileResult := r.preReconcileEtcd(ctx, logger, etcd)
if preReconcileResult.err != nil {
logger.Error(err, "Error during pre-reconciling ETCD")
return ctrl.Result{
Requeue: true,
}, err
}, preReconcileResult.err
}

result := r.reconcileEtcd(ctx, logger, etcd)
Expand All @@ -209,7 +208,17 @@ func (r *Reconciler) reconcile(ctx context.Context, etcd *druidv1alpha1.Etcd) (c
Requeue: true,
}, result.err
}
if err := r.updateEtcdStatus(ctx, etcd, result); err != nil {

if err = r.updateEtcdStatus(ctx, etcd, result); err != nil {
return ctrl.Result{
Requeue: true,
}, err
}

if err = r.removeOperationAnnotation(ctx, logger, etcd); err != nil {
if apierrors.IsNotFound(err) {
return ctrl.Result{}, nil
}
return ctrl.Result{
Requeue: true,
}, err
Expand Down Expand Up @@ -294,6 +303,15 @@ func (r *Reconciler) delete(ctx context.Context, etcd *druidv1alpha1.Etcd) (ctrl
return ctrl.Result{}, nil
}

func (r *Reconciler) preReconcileEtcd(ctx context.Context, logger logr.Logger, etcd *druidv1alpha1.Etcd) reconcileResult {
statefulSetValues := componentsts.GeneratePreDeployValues(etcd)
stsDeployer := componentsts.New(r.Client, logger, *statefulSetValues, r.config.FeatureGates)
if err := stsDeployer.PreDeploy(ctx, etcd); err != nil {
return reconcileResult{err: err}
}
return reconcileResult{}
}

func (r *Reconciler) reconcileEtcd(ctx context.Context, logger logr.Logger, etcd *druidv1alpha1.Etcd) reconcileResult {
// Check if Spec.Replicas is odd or even.
// TODO(timuthy): The following checks should rather be part of a validation. Also re-enqueuing doesn't make sense in case the values are invalid.
Expand Down
121 changes: 119 additions & 2 deletions pkg/component/etcd/statefulset/statefulset.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,8 @@ type Interface interface {
gardenercomponent.DeployWaiter
// Get gets the etcd StatefulSet.
Get(context.Context) (*appsv1.StatefulSet, error)
// PreDeploy performs operations prior to the deployment of the StatefulSet component.
PreDeploy(ctx context.Context, etcd *druidv1alpha1.Etcd) error
}

type component struct {
Expand Down Expand Up @@ -75,6 +77,18 @@ func (c *component) Destroy(ctx context.Context) error {
return client.IgnoreNotFound(c.client.Delete(ctx, sts))
}

// PreDeploy performs operations prior to the deployment of the StatefulSet component.
func (c *component) PreDeploy(ctx context.Context, etcd *druidv1alpha1.Etcd) error {
preDeployFlow, err := c.createPreDeployFlow(ctx, etcd)
if err != nil {
return err
}
if preDeployFlow == nil {
return nil
}
return preDeployFlow.Run(ctx, flow.Opts{})
}

// Deploy executes a deploy-flow to ensure that the StatefulSet is synchronized correctly
func (c *component) Deploy(ctx context.Context) error {
deployFlow, err := c.createDeployFlow(ctx)
Expand Down Expand Up @@ -200,6 +214,109 @@ func (c *component) WaitCleanup(ctx context.Context) error {
})
}

// createPreDeployFlow gets the existing statefulset. If it exists, then it patches the statefulset
// with additional new pod template labels, and wait for all pods to be updated. Then, if the statefulset
// label selector is not as expected, it deletes the statefulset with orphan cascade option.
// If the statefulset doesn't exist, createPreDeployFlow is a no-op.
// This flow is required to ensure that downgrade of druid from the next version (v0.23.0+) to the
// previous version (v0.22.1+) is handled correctly.
func (c *component) createPreDeployFlow(ctx context.Context, etcd *druidv1alpha1.Etcd) (*flow.Flow, error) {
var (
existingSts *appsv1.StatefulSet
err error
)
existingSts, err = c.getExistingSts(ctx)
if err != nil {
return nil, err
}
if existingSts == nil {
return nil, nil
}

flowName := fmt.Sprintf("(etcd: %s) Pre-Deploy Flow for StatefulSet %s for Namespace: %s", getOwnerReferenceNameWithUID(c.values.OwnerReference), c.values.Name, c.values.Namespace)
g := flow.NewGraph(flowName)

c.addTasksForLabelsAndSelectorUpdation(g, etcd, existingSts)

return g.Compile(), nil
}

func (c *component) addTasksForLabelsAndSelectorUpdation(g *flow.Graph, etcd *druidv1alpha1.Etcd, sts *appsv1.StatefulSet) {
// If the labels are not as expected, then patch the labels.
patchLabelsOpName := "(patch-labels): Patching labels"
patchLabelsTaskID := g.Add(flow.Task{
Name: patchLabelsOpName,
Fn: func(ctx context.Context) error {
return c.patchPodTemplateLabels(ctx, sts)
},
Dependencies: nil,
})
c.logger.Info("adding task to pre-deploy flow", "name", patchLabelsOpName, "ID", patchLabelsTaskID)

// Wait for pods to be updated with expected labels as well as expected updateRevision.
waitPodsOpName := "(wait-sts-pods-sync): Waiting for pods to have desired labels"
waitPodsTaskID := g.Add(flow.Task{
Name: waitPodsOpName,
Fn: func(ctx context.Context) error {
return c.waitUntilPodsHaveDesiredLabels(ctx, etcd, sts, defaultInterval, defaultTimeout*2)
},
Dependencies: flow.NewTaskIDs(patchLabelsTaskID),
})
c.logger.Info("adding task to pre-deploy flow", "name", waitPodsOpName, "ID", waitPodsTaskID)

// If the selector is not as expected, then delete the StatefulSet.
deleteStsOpName := "(delete-sts-with-orphans): Deleting StatefulSet by orphaning pods"
deleteStsTaskID := g.Add(flow.Task{
Name: deleteStsOpName,
Fn: func(ctx context.Context) error {
return c.deleteWithOrphanCascade(ctx, sts)
},
Dependencies: flow.NewTaskIDs(waitPodsTaskID),
})
c.logger.Info("adding task to pre-deploy flow", "name", deleteStsOpName, "ID", deleteStsTaskID)
}

// patchPodTemplateLabels patches the StatefulSet pod template labels with new labels.
func (c *component) patchPodTemplateLabels(ctx context.Context, sts *appsv1.StatefulSet) error {
if !utils.ContainsAllDesiredLabels(sts.Spec.Template.Labels, c.values.PodLabels) {
c.logger.Info("Patching StatefulSet pod template labels", "namespace", c.values.Namespace, "name", c.values.Name, "podTemplateLabels", utils.MergeStringMaps(c.values.PodLabels, c.values.AdditionalPodLabels))
patch := client.MergeFrom(sts.DeepCopy())
sts.Spec.Template.Labels = utils.MergeStringMaps(c.values.PodLabels, c.values.AdditionalPodLabels)
return c.client.Patch(ctx, sts, patch)
}
return nil
}

// waitUntilPodsHaveDesiredLabels waits until all pods of the StatefulSet have the desired labels.
func (c *component) waitUntilPodsHaveDesiredLabels(ctx context.Context, etcd *druidv1alpha1.Etcd, sts *appsv1.StatefulSet, interval, timeout time.Duration) error {
return gardenerretry.UntilTimeout(ctx, interval, timeout, func(ctx context.Context) (bool, error) {
c.logger.Info("Waiting for StatefulSet pods to have desired labels", "namespace", c.values.Namespace, "name", c.values.Name)
// sts.spec.replicas is more accurate than Etcd.spec.replicas, specifically when
// Etcd.spec.replicas is updated but not yet reflected in the etcd cluster
podNames := etcd.GetAllPodNames(*sts.Spec.Replicas)
for _, podName := range podNames {
pod := &corev1.Pod{}
if err := c.client.Get(ctx, client.ObjectKey{Name: podName, Namespace: etcd.Namespace}, pod); err != nil {
return false, err
}
if !utils.ContainsAllDesiredLabels(pod.Labels, utils.MergeStringMaps(c.values.PodLabels, c.values.AdditionalPodLabels)) {
return false, nil
}
}
return gardenerretry.Ok()
})
}

// deleteWithOrphanCascade deletes the StatefulSet with orphan cascade option if the selector labels are not as expected.
// During the subsequent Statefulset Deploy flow, the StatefulSet will be recreated with the correct selector labels.
func (c *component) deleteWithOrphanCascade(ctx context.Context, sts *appsv1.StatefulSet) error {
if !utils.ExactlyMatchesLabels(sts.Spec.Selector.MatchLabels, c.values.SelectorLabels) {
c.logger.Info("Deleting StatefulSet with orphan cascade", "namespace", c.values.Namespace, "name", c.values.Name)
return c.client.Delete(ctx, sts, client.PropagationPolicy(metav1.DeletePropagationOrphan))
}
return nil
}

func (c *component) createDeployFlow(ctx context.Context) (*flow.Flow, error) {
var (
sts *appsv1.StatefulSet
Expand Down Expand Up @@ -415,12 +532,12 @@ func (c *component) createOrPatch(ctx context.Context, sts *appsv1.StatefulSet,
Replicas: &replicas,
ServiceName: c.values.PeerServiceName,
Selector: &metav1.LabelSelector{
MatchLabels: c.values.Labels,
MatchLabels: c.values.SelectorLabels,
},
Template: corev1.PodTemplateSpec{
ObjectMeta: metav1.ObjectMeta{
Annotations: c.values.Annotations,
Labels: utils.MergeStringMaps(make(map[string]string), c.values.AdditionalPodLabels, c.values.Labels),
Labels: utils.MergeStringMaps(make(map[string]string), c.values.AdditionalPodLabels, c.values.PodLabels),
},
Spec: corev1.PodSpec{
HostAliases: []corev1.HostAlias{
Expand Down
10 changes: 7 additions & 3 deletions pkg/component/etcd/statefulset/statefulset_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -620,9 +620,13 @@ func checkStatefulset(sts *appsv1.StatefulSet, values Values) {
"instance": Equal(values.Name),
}),
"Labels": MatchAllKeys(Keys{
"foo": Equal("bar"),
"name": Equal("etcd"),
"instance": Equal(values.Name),
"foo": Equal("bar"),
"name": Equal("etcd"),
"instance": Equal(values.Name),
"app.kubernetes.io/component": Equal("etcd-statefulset"),
"app.kubernetes.io/name": Equal(values.Name),
"app.kubernetes.io/managed-by": Equal("etcd-druid"),
"app.kubernetes.io/part-of": Equal(values.Name),
}),
}),
//s.Spec.Template.Spec.HostAliases
Expand Down
6 changes: 5 additions & 1 deletion pkg/component/etcd/statefulset/values.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,12 @@ type Values struct {

// Annotations is the annotation provided in ETCD spec.
Annotations map[string]string
// Labels is the labels of StatefulSet..
// Labels defines the labels used for the statefulset.
Labels map[string]string
// SelectorLabels defines the selector's matchLabels for the statefulset.
SelectorLabels map[string]string
// PodLabels represents the labels to be applied to the statefulset pods.
PodLabels map[string]string
// AdditionalPodLabels represents additional labels to be applied to the StatefulSet pods.
AdditionalPodLabels map[string]string
// BackupImage is the backup restore image.
Expand Down
40 changes: 40 additions & 0 deletions pkg/component/etcd/statefulset/values_helper.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,23 @@ import (
"k8s.io/utils/pointer"
)

// Standard label keys to be placed on the statefulset, required for backward compatibility with
// druid:v0.23.0, which removes old, non-standard labels via PR https://github.com/gardener/etcd-druid/pull/777.
const (
// labelAppNameKey is a label which sets the name of the resource provisioned for an etcd cluster.
labelAppNameKey = "app.kubernetes.io/name"
// labelManagedByKey is a key of a label which sets druid as a manager for resources provisioned for an etcd cluster.
labelManagedByKey = "app.kubernetes.io/managed-by"
// labelManagedByValue is the value for labelManagedByKey.
labelManagedByValue = "etcd-druid"
// labelPartOfKey is a key of a label which establishes that a provisioned resource belongs to a parent etcd cluster.
labelPartOfKey = "app.kubernetes.io/part-of"
// labelComponentKey is a key for a label that sets the component type on resources provisioned for an etcd cluster.
labelComponentKey = "app.kubernetes.io/component"
// labelAppNameValueStatefulSet is the component name for statefulset resource.
labelAppNameValueStatefulSet = "etcd-statefulset"
)

const (
defaultBackupPort int32 = 8080
defaultServerPort int32 = 2380
Expand Down Expand Up @@ -63,6 +80,8 @@ func GenerateValues(
StatusReplicas: etcd.Status.Replicas,
Annotations: utils.MergeStringMaps(checksumAnnotations, etcd.Spec.Annotations),
Labels: etcd.GetDefaultLabels(),
SelectorLabels: etcd.GetDefaultLabels(),
PodLabels: utils.MergeStringMaps(getNewPodLabels(etcd), etcd.GetDefaultLabels()),
AdditionalPodLabels: etcd.Spec.Labels,
EtcdImage: etcdImage,
BackupImage: backupImage,
Expand Down Expand Up @@ -140,6 +159,27 @@ func GenerateValues(
return &values, nil
}

// GeneratePreDeployValues generates `statefulset.Values` for the statefulset component with the given parameters,
// used specifically for the PreDeploy method.
func GeneratePreDeployValues(etcd *druidv1alpha1.Etcd) *Values {
return &Values{
Name: etcd.Name,
Namespace: etcd.Namespace,
SelectorLabels: etcd.GetDefaultLabels(),
PodLabels: utils.MergeStringMaps(getNewPodLabels(etcd), etcd.GetDefaultLabels()),
AdditionalPodLabels: etcd.Spec.Labels,
}
}

func getNewPodLabels(etcd *druidv1alpha1.Etcd) map[string]string {
return map[string]string{
labelComponentKey: labelAppNameValueStatefulSet,
labelAppNameKey: etcd.Name,
labelManagedByKey: labelManagedByValue,
labelPartOfKey: etcd.Name,
}
}

func getEtcdCommandArgs(val Values) []string {
if !val.UseEtcdWrapper {
// safe to return an empty string array here since etcd-custom-image:v3.4.13-bootstrap-12 (as well as v3.4.26) now uses an entry point that calls bootstrap.sh
Expand Down
24 changes: 15 additions & 9 deletions pkg/health/etcdmember/check_ready.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,10 @@ package etcdmember

import (
"context"
"fmt"
"strings"
"time"

v1beta1constants "github.com/gardener/gardener/pkg/apis/core/v1beta1/constants"

kutil "github.com/gardener/gardener/pkg/utils/kubernetes"
"github.com/go-logr/logr"
coordinationv1 "k8s.io/api/coordination/v1"
Expand All @@ -29,8 +28,6 @@ import (
"sigs.k8s.io/controller-runtime/pkg/client"

druidv1alpha1 "github.com/gardener/etcd-druid/api/v1alpha1"
"github.com/gardener/etcd-druid/pkg/common"
"github.com/gardener/etcd-druid/pkg/utils"
)

type readyCheck struct {
Expand All @@ -49,13 +46,22 @@ func (r *readyCheck) Check(ctx context.Context, etcd druidv1alpha1.Etcd) []Resul
checkTime = TimeNow().UTC()
)

leases := &coordinationv1.LeaseList{}
if err := r.cl.List(ctx, leases, client.InNamespace(etcd.Namespace), client.MatchingLabels{
common.GardenerOwnedBy: etcd.Name, v1beta1constants.GardenerPurpose: utils.PurposeMemberLease}); err != nil {
r.logger.Error(err, "failed to get leases for etcd member readiness check")
leaseNames := etcd.GetMemberLeaseNames()
leases := make([]*coordinationv1.Lease, 0, len(leaseNames))
for _, leaseName := range leaseNames {
lease := &coordinationv1.Lease{}
if err := r.cl.Get(ctx, kutil.Key(etcd.Namespace, leaseName), lease); err != nil {
if apierrors.IsNotFound(err) {
r.logger.Error(fmt.Errorf("lease not found"), "name", leaseName)
continue
}
r.logger.Error(err, "failed to get lease", "name", leaseName)
continue
}
leases = append(leases, lease)
}

for _, lease := range leases.Items {
for _, lease := range leases {
var (
id, role = separateIdFromRole(lease.Spec.HolderIdentity)
res = &result{
Expand Down
Loading

0 comments on commit 798a9b1

Please sign in to comment.