From 7e55c1df32c59a0ceda364165acebffef9707dad Mon Sep 17 00:00:00 2001 From: madhav bhargava Date: Sat, 5 Oct 2024 11:06:04 +0530 Subject: [PATCH] fixes for edge cases --- internal/component/statefulset/builder.go | 14 ++-- internal/component/statefulset/statefulset.go | 68 +++++++++++++------ internal/images/images.yaml | 12 ++-- internal/utils/statefulset.go | 13 ++++ 4 files changed, 77 insertions(+), 30 deletions(-) diff --git a/internal/component/statefulset/builder.go b/internal/component/statefulset/builder.go index 86772b394..f596cf590 100644 --- a/internal/component/statefulset/builder.go +++ b/internal/component/statefulset/builder.go @@ -361,9 +361,10 @@ func (b *stsBuilder) getEtcdDataVolumeMount() corev1.VolumeMount { func (b *stsBuilder) getEtcdContainer() corev1.Container { return corev1.Container{ - Name: common.ContainerNameEtcd, - Image: b.etcdImage, - ImagePullPolicy: corev1.PullIfNotPresent, + Name: common.ContainerNameEtcd, + Image: b.etcdImage, + //ImagePullPolicy: corev1.PullIfNotPresent, + ImagePullPolicy: corev1.PullAlways, Args: b.getEtcdContainerCommandArgs(), ReadinessProbe: b.getEtcdContainerReadinessProbe(), Ports: []corev1.ContainerPort{ @@ -396,9 +397,10 @@ func (b *stsBuilder) getBackupRestoreContainer() (corev1.Container, error) { env = append(env, providerEnv...) return corev1.Container{ - Name: common.ContainerNameEtcdBackupRestore, - Image: b.etcdBackupRestoreImage, - ImagePullPolicy: corev1.PullIfNotPresent, + Name: common.ContainerNameEtcdBackupRestore, + Image: b.etcdBackupRestoreImage, + //ImagePullPolicy: corev1.PullIfNotPresent, + ImagePullPolicy: corev1.PullAlways, Args: b.getBackupRestoreContainerCommandArgs(), Ports: []corev1.ContainerPort{ { diff --git a/internal/component/statefulset/statefulset.go b/internal/component/statefulset/statefulset.go index 5c687f7bf..9e556d9e4 100644 --- a/internal/component/statefulset/statefulset.go +++ b/internal/component/statefulset/statefulset.go @@ -124,14 +124,8 @@ func (r _resource) Sync(ctx component.OperatorContext, etcd *druidv1alpha1.Etcd) return r.createOrPatch(ctx, etcd) } } - // StatefulSet exists, check if TLS has been enabled for peer communication, if yes then it is currently a multistep - // process to ensure that all members are updated and establish peer TLS communication. - // There is no need to do this check if the etcd.Spec.Replicas have been set to 0 (scale down to 0 case). - // TODO: Once we support scale down to non-zero replicas then we will need to adjust the replicas for which we check for TLS. - if etcd.Spec.Replicas > 0 { - if err = r.handleTLSChanges(ctx, etcd, existingSTS); err != nil { - return err - } + if err = r.handleTLSChanges(ctx, etcd, existingSTS); err != nil { + return err } return r.createOrPatch(ctx, etcd) } @@ -174,7 +168,7 @@ func (r _resource) handleStsPodLabelsOnMismatch(ctx component.OperatorContext, e if !podsHaveDesiredLabels { return druiderr.New(druiderr.ErrRequeueAfter, component.OperationPreSync, - fmt.Sprintf("StatefulSet pods are not yet updated with new labels, for StatefulSet: %v for etcd: %v", getObjectKey(sts.ObjectMeta), druidv1alpha1.GetNamespaceName(etcd.ObjectMeta)), + fmt.Sprintf("StatefulSet pods are not yet updated with new labels or post update all replicas of StatefulSet are not yet ready, for StatefulSet: %v for etcd: %v", getObjectKey(sts.ObjectMeta), druidv1alpha1.GetNamespaceName(etcd.ObjectMeta)), ) } else { r.logger.Info("StatefulSet pods have all the desired labels", "objectKey", getObjectKey(etcd.ObjectMeta)) @@ -312,15 +306,43 @@ func (r _resource) createOrPatch(ctx component.OperatorContext, etcd *druidv1alp } func (r _resource) handleTLSChanges(ctx component.OperatorContext, etcd *druidv1alpha1.Etcd, existingSts *appsv1.StatefulSet) error { - isSTSTLSConfigNotInSync := r.isStatefulSetTLSConfigNotInSync(etcd, existingSts) - if isSTSTLSConfigNotInSync { - if err := r.createOrPatchWithReplicas(ctx, etcd, *existingSts.Spec.Replicas); err != nil { - return druiderr.WrapError(err, - ErrSyncStatefulSet, - component.OperationSync, - fmt.Sprintf("Error creating or patching StatefulSet with TLS changes for StatefulSet: %v, etcd: %v", client.ObjectKeyFromObject(existingSts), client.ObjectKeyFromObject(etcd))) - } + // There are no replicas and there is no need to handle any TLS changes. Once replicas are increased then new pods will automatically have the TLS changes. + if etcd.Spec.Replicas == 0 { + r.logger.Info("Skipping handling TLS changes for StatefulSet as replicas are set to 0") + return nil + } + + isSTSTLSConfigInSync := isStatefulSetTLSConfigInSync(etcd, existingSts) + if isSTSTLSConfigInSync { + r.logger.Info("TLS configuration is in sync for StatefulSet") + return nil + } + // check if the etcd cluster is in a state where it can handle TLS changes. + // If the peer URL TLS has changed and there are more than 1 replicas in the etcd cluster. Then wait for all members to be ready. + // If we do not wait for all members to be ready patching STS to reflect peer TLS changes will cause rolling update which will never finish + // and the cluster will be stuck in a bad state. Updating peer URL is a cluster wide operation as all members will need to know that a peer TLS has changed. + // If not all members are ready then rolling-update of StatefulSet can potentially cause a healthy node to be restarted causing loss of quorum from which + // there will not be an automatic recovery. + if existingSts.Spec.Replicas != nil && + *existingSts.Spec.Replicas > 1 && + existingSts.Status.ReadyReplicas > 0 && + existingSts.Status.ReadyReplicas < *existingSts.Spec.Replicas { + return druiderr.New( + druiderr.ErrRequeueAfter, + component.OperationSync, + fmt.Sprintf("Not all etcd cluster members are ready. It is not safe to patch STS for Peer URL TLS changes. Replicas: %d, ReadyReplicas: %d", *existingSts.Spec.Replicas, existingSts.Status.ReadyReplicas)) } + return r.processTLSChanges(ctx, etcd, existingSts) +} + +func (r _resource) processTLSChanges(ctx component.OperatorContext, etcd *druidv1alpha1.Etcd, existingSts *appsv1.StatefulSet) error { + if err := r.createOrPatchWithReplicas(ctx, etcd, *existingSts.Spec.Replicas); err != nil { + return druiderr.WrapError(err, + ErrSyncStatefulSet, + component.OperationSync, + fmt.Sprintf("Error creating or patching StatefulSet with TLS changes for StatefulSet: %v, etcd: %v", client.ObjectKeyFromObject(existingSts), client.ObjectKeyFromObject(etcd))) + } + peerTLSInSyncForAllMembers, err := utils.IsPeerURLInSyncForAllMembers(ctx, r.client, ctx.Logger, etcd, *existingSts.Spec.Replicas) if err != nil { return druiderr.WrapError(err, @@ -339,12 +361,18 @@ func (r _resource) handleTLSChanges(ctx component.OperatorContext, etcd *druidv1 } } -func (r _resource) isStatefulSetTLSConfigNotInSync(etcd *druidv1alpha1.Etcd, sts *appsv1.StatefulSet) bool { +func hasPeerTLSConfigChanged(etcd *druidv1alpha1.Etcd, existingSts *appsv1.StatefulSet) bool { + newEtcdWrapperTLSVolMounts := getEtcdContainerSecretVolumeMounts(etcd) + existingPeerTLSVolMounts := utils.GetStatefulSetPeerTLSVolumeMounts(existingSts) + return hasTLSVolumeMountsChanged(existingPeerTLSVolMounts, newEtcdWrapperTLSVolMounts) +} + +func isStatefulSetTLSConfigInSync(etcd *druidv1alpha1.Etcd, sts *appsv1.StatefulSet) bool { newEtcdbrTLSVolMounts := getBackupRestoreContainerSecretVolumeMounts(etcd) newEtcdWrapperTLSVolMounts := getEtcdContainerSecretVolumeMounts(etcd) containerTLSVolMounts := utils.GetStatefulSetContainerTLSVolumeMounts(sts) - return hasTLSVolumeMountsChanged(containerTLSVolMounts[common.ContainerNameEtcd], newEtcdWrapperTLSVolMounts) || - hasTLSVolumeMountsChanged(containerTLSVolMounts[common.ContainerNameEtcdBackupRestore], newEtcdbrTLSVolMounts) + return !hasTLSVolumeMountsChanged(containerTLSVolMounts[common.ContainerNameEtcd], newEtcdWrapperTLSVolMounts) && + !hasTLSVolumeMountsChanged(containerTLSVolMounts[common.ContainerNameEtcdBackupRestore], newEtcdbrTLSVolMounts) } func hasTLSVolumeMountsChanged(existingVolMounts, newVolMounts []corev1.VolumeMount) bool { diff --git a/internal/images/images.yaml b/internal/images/images.yaml index 250fc90de..0c598a4ba 100644 --- a/internal/images/images.yaml +++ b/internal/images/images.yaml @@ -13,12 +13,16 @@ images: resourceId: name: 'etcdbrctl' sourceRepository: github.com/gardener/etcd-backup-restore - repository: europe-docker.pkg.dev/gardener-project/public/gardener/etcdbrctl - tag: "v0.30.1" + repository: europe-docker.pkg.dev/gardener-project/snapshots/gardener/etcdbrctl +# repository: europe-docker.pkg.dev/gardener-project/public/gardener/etcdbrctl + tag: "v0.31.0-dev-endpointTest1.0" +# tag: "v0.30.1" - name: etcd-wrapper sourceRepository: github.com/gardener/etcd-wrapper - repository: europe-docker.pkg.dev/gardener-project/public/gardener/etcd-wrapper - tag: "v0.1.1" +# repository: europe-docker.pkg.dev/gardener-project/public/gardener/etcd-wrapper +# tag: "v0.1.1" + repository: europe-docker.pkg.dev/gardener-project/snapshots/gardener/etcd-wrapper + tag: "v0.2.0-dev-a8891a5d4f8039ca2beb8afc5c2169517ba8d92a-linux-arm64" - name: alpine repository: europe-docker.pkg.dev/gardener-project/public/3rd/alpine tag: "3.18.4" diff --git a/internal/utils/statefulset.go b/internal/utils/statefulset.go index b87f3234f..a4b4117d1 100644 --- a/internal/utils/statefulset.go +++ b/internal/utils/statefulset.go @@ -93,9 +93,22 @@ func FetchPVCWarningMessagesForStatefulSet(ctx context.Context, cl client.Client var ( etcdTLSVolumeMountNames = sets.New[string](common.VolumeNameEtcdCA, common.VolumeNameEtcdServerTLS, common.VolumeNameEtcdClientTLS, common.VolumeNameEtcdPeerCA, common.VolumeNameEtcdPeerServerTLS, common.VolumeNameBackupRestoreCA) + peerTLSVolumeMountNames = sets.New[string](common.VolumeNameEtcdPeerCA, common.VolumeNameEtcdPeerServerTLS) etcdbrTLSVolumeMountNames = sets.New[string](common.VolumeNameBackupRestoreServerTLS, common.VolumeNameEtcdCA, common.VolumeNameEtcdClientTLS) ) +// GetStatefulSetPeerTLSVolumeMounts returns the TLS volume mounts used for peer communication +func GetStatefulSetPeerTLSVolumeMounts(sts *appsv1.StatefulSet) []corev1.VolumeMount { + tlsVolMounts := filterTLSVolumeMounts(common.ContainerNameEtcd, sts.Spec.Template.Spec.Containers[0].VolumeMounts) + peerTLSVolMounts := make([]corev1.VolumeMount, 0, len(tlsVolMounts)) + for _, volMount := range tlsVolMounts { + if peerTLSVolumeMountNames.Has(volMount.Name) { + peerTLSVolMounts = append(peerTLSVolMounts, volMount) + } + } + return peerTLSVolMounts +} + // GetStatefulSetContainerTLSVolumeMounts returns a map of container name to TLS volume mounts for the given StatefulSet. func GetStatefulSetContainerTLSVolumeMounts(sts *appsv1.StatefulSet) map[string][]corev1.VolumeMount { containerVolMounts := make(map[string][]corev1.VolumeMount, 2) // each pod is a 2 container pod. Init containers are not counted as containers.