From 54a22aff21122417c008b3c182debebb2df25ded Mon Sep 17 00:00:00 2001 From: aparajita <5053095+aparajita89@users.noreply.github.com> Date: Fri, 13 Aug 2021 17:11:31 +0530 Subject: [PATCH] Issue #348: trigger cluster restart via crd field (#373) * trigger cluster restart via crd field Signed-off-by: aparajita.singh * added test cases Signed-off-by: aparajita.singh * ran go fmt Signed-off-by: aparajita.singh * rolling restart e2e test cases Signed-off-by: aparajita.singh * Fix containerd CVE-2021-32760 (#374) See: https://github.com/advisories/GHSA-c72p-9xmj-rx3w Signed-off-by: Adi Muraru Signed-off-by: aparajita.singh * added unit tests Signed-off-by: aparajita.singh * ran go fmt Signed-off-by: aparajita.singh * ran go fmt Signed-off-by: aparajita.singh * test bugfix Signed-off-by: aparajita.singh * test case fix Signed-off-by: aparajita.singh * test cases fix Signed-off-by: aparajita.singh * fixed test case Signed-off-by: aparajita.singh * ran go fmt Signed-off-by: aparajita.singh * fixed tests Signed-off-by: aparajita.singh * ran go fmt Signed-off-by: aparajita.singh * comments Signed-off-by: aparajita.singh * removed zk image Signed-off-by: aparajita.singh * removed zk image Signed-off-by: aparajita.singh * removed a println Signed-off-by: aparajita.singh * review comments Signed-off-by: aparajita.singh Co-authored-by: aparajita.singh Co-authored-by: Adrian Muraru --- .../templates/_crd_openapiv3schema.tpl | 3 + charts/zookeeper/README.md | 1 + charts/zookeeper/templates/zookeeper.yaml | 3 + charts/zookeeper/values.yaml | 2 + ...eper.pravega.io_zookeeperclusters_crd.yaml | 3 + .../v1beta1/zookeepercluster_types.go | 13 +++ .../v1beta1/zookeepercluster_types_test.go | 17 +++ .../zookeepercluster_controller.go | 14 +++ .../zookeepercluster_controller_test.go | 87 ++++++++++++++ pkg/test/e2e/e2eutil/zookeepercluster_util.go | 6 + test/e2e/rolling_restart_test.go | 110 ++++++++++++++++++ test/e2e/zookeepercluster_test.go | 1 + 12 files changed, 260 insertions(+) create mode 100644 test/e2e/rolling_restart_test.go diff --git a/charts/zookeeper-operator/templates/_crd_openapiv3schema.tpl b/charts/zookeeper-operator/templates/_crd_openapiv3schema.tpl index e521f3c37..59e57df15 100644 --- a/charts/zookeeper-operator/templates/_crd_openapiv3schema.tpl +++ b/charts/zookeeper-operator/templates/_crd_openapiv3schema.tpl @@ -3615,6 +3615,9 @@ openAPIV3Schema: will be using It can take either Ephemeral or persistence Default StorageType is Persistence storage type: string + triggerRollingRestart: + description: if set to true, triggers a cluster restart. this value will be auto-reverted to false by the operator once the restart is triggered. + type: boolean volumeMounts: description: VolumeMounts defines to support customized volumeMounts items: diff --git a/charts/zookeeper/README.md b/charts/zookeeper/README.md index 62afbec0e..0aa7012ef 100644 --- a/charts/zookeeper/README.md +++ b/charts/zookeeper/README.md @@ -54,6 +54,7 @@ The following table lists the configurable parameters of the zookeeper chart and | Parameter | Description | Default | | ----- | ----------- | ------ | | `replicas` | Expected size of the zookeeper cluster (valid range is from 1 to 7) | `3` | +| `triggerRollingRestart` | If true, the zookeeper cluster is restarted. After the restart is triggered, this value is auto-reverted to false. | `false` | | `image.repository` | Image repository | `pravega/zookeeper` | | `image.tag` | Image tag | `0.2.12` | | `image.pullPolicy` | Image pull policy | `IfNotPresent` | diff --git a/charts/zookeeper/templates/zookeeper.yaml b/charts/zookeeper/templates/zookeeper.yaml index 8e5502adf..6ee77b2e1 100644 --- a/charts/zookeeper/templates/zookeeper.yaml +++ b/charts/zookeeper/templates/zookeeper.yaml @@ -56,6 +56,9 @@ spec: {{- if .Values.ports }} ports: {{ toYaml .Values.ports | indent 4 }} + {{- end }} + {{- if .Values.triggerRollingRestart }} + triggerRollingRestart: {{ .Values.triggerRollingRestart }} {{- end }} pod: {{- if .Values.pod.labels }} diff --git a/charts/zookeeper/values.yaml b/charts/zookeeper/values.yaml index d678f3f51..0eb925ea3 100644 --- a/charts/zookeeper/values.yaml +++ b/charts/zookeeper/values.yaml @@ -5,6 +5,8 @@ image: tag: 0.2.12 pullPolicy: IfNotPresent +triggerRollingRestart: false + domainName: labels: {} ports: [] diff --git a/deploy/crds/zookeeper.pravega.io_zookeeperclusters_crd.yaml b/deploy/crds/zookeeper.pravega.io_zookeeperclusters_crd.yaml index 4fa070e84..a4ca5cdf5 100644 --- a/deploy/crds/zookeeper.pravega.io_zookeeperclusters_crd.yaml +++ b/deploy/crds/zookeeper.pravega.io_zookeeperclusters_crd.yaml @@ -3659,6 +3659,9 @@ spec: will be using It can take either Ephemeral or persistence Default StorageType is Persistence storage type: string + triggerRollingRestart: + description: if set to true, triggers a cluster restart. this value should be set to false by the operator once the restart completes for all pods. + type: boolean volumeMounts: description: VolumeMounts defines to support customized volumeMounts items: diff --git a/pkg/apis/zookeeper/v1beta1/zookeepercluster_types.go b/pkg/apis/zookeeper/v1beta1/zookeepercluster_types.go index 038db67e2..f0d22b473 100644 --- a/pkg/apis/zookeeper/v1beta1/zookeepercluster_types.go +++ b/pkg/apis/zookeeper/v1beta1/zookeepercluster_types.go @@ -108,6 +108,10 @@ type ZookeeperClusterSpec struct { // for the zookeeper cluster. ClientService ClientServicePolicy `json:"clientService,omitempty"` + // TriggerRollingRestart if set to true will instruct operator to restart all + // the pods in the zookeeper cluster, after which this value will be set to false + TriggerRollingRestart bool `json:"triggerRollingRestart,omitempty"` + // HeadlessService defines the policy to create headless Service // for the zookeeper cluster. HeadlessService HeadlessServicePolicy `json:"headlessService,omitempty"` @@ -369,6 +373,15 @@ func (z *ZookeeperCluster) GetAdminServerServiceName() string { return fmt.Sprintf("%s-admin-server", z.GetName()) } +func (z *ZookeeperCluster) GetTriggerRollingRestart() bool { + return z.Spec.TriggerRollingRestart +} + +// set the value of triggerRollingRestart function +func (z *ZookeeperCluster) SetTriggerRollingRestart(val bool) { + z.Spec.TriggerRollingRestart = val +} + // Ports groups the ports for a zookeeper cluster node for easy access type Ports struct { Client int32 diff --git a/pkg/apis/zookeeper/v1beta1/zookeepercluster_types_test.go b/pkg/apis/zookeeper/v1beta1/zookeepercluster_types_test.go index 80c0fec03..4fb83623d 100644 --- a/pkg/apis/zookeeper/v1beta1/zookeepercluster_types_test.go +++ b/pkg/apis/zookeeper/v1beta1/zookeepercluster_types_test.go @@ -55,6 +55,10 @@ var _ = Describe("ZookeeperCluster Types", func() { Ω(z.Spec.Labels["release"]).To(Equal("example")) }) + It("should have a triggerRollingRestart set to false", func() { + Ω(z.GetTriggerRollingRestart()).To(Equal(false)) + }) + Context("Image", func() { var i v1beta1.ContainerImage @@ -334,4 +338,17 @@ var _ = Describe("ZookeeperCluster Types", func() { Ω(p.AdminServer).To(BeEquivalentTo(8080)) }) }) + + Context("#TriggerRollingRestart is set", func() { + var t bool + + BeforeEach(func() { + z.WithDefaults() + z.SetTriggerRollingRestart(true) + t = z.GetTriggerRollingRestart() + }) + It("should return the value of triggerRollingRestart", func() { + Ω(t).To(BeEquivalentTo(true)) + }) + }) }) diff --git a/pkg/controller/zookeepercluster/zookeepercluster_controller.go b/pkg/controller/zookeepercluster/zookeepercluster_controller.go index f222b1a2e..e48c3509e 100644 --- a/pkg/controller/zookeepercluster/zookeepercluster_controller.go +++ b/pkg/controller/zookeepercluster/zookeepercluster_controller.go @@ -139,6 +139,16 @@ func (r *ReconcileZookeeperCluster) Reconcile(request reconcile.Request) (reconc return reconcile.Result{}, err } changed := instance.WithDefaults() + if instance.GetTriggerRollingRestart() { + r.log.Info("Restarting zookeeper cluster") + annotationkey, annotationvalue := getRollingRestartAnnotation() + if instance.Spec.Pod.Annotations == nil { + instance.Spec.Pod.Annotations = make(map[string]string) + } + instance.Spec.Pod.Annotations[annotationkey] = annotationvalue + instance.SetTriggerRollingRestart(false) + changed = true + } if changed { r.log.Info("Setting default settings for zookeeper-cluster") if err := r.client.Update(context.TODO(), instance); err != nil { @@ -164,6 +174,10 @@ func (r *ReconcileZookeeperCluster) Reconcile(request reconcile.Request) (reconc return reconcile.Result{RequeueAfter: ReconcileTime}, nil } +func getRollingRestartAnnotation() (string, string) { + return "restartTime", time.Now().Format(time.RFC850) +} + // compareResourceVersion compare resoure versions for the supplied ZookeeperCluster and StatefulSet // resources // Returns: diff --git a/pkg/controller/zookeepercluster/zookeepercluster_controller_test.go b/pkg/controller/zookeepercluster/zookeepercluster_controller_test.go index c0ad1073a..55b5347db 100644 --- a/pkg/controller/zookeepercluster/zookeepercluster_controller_test.go +++ b/pkg/controller/zookeepercluster/zookeepercluster_controller_test.go @@ -609,5 +609,92 @@ var _ = Describe("ZookeeperCluster Controller", func() { Ω(updated).To(Equal(0)) }) }) + + Context("trigger rolling restart", func() { + var ( + cl client.Client + err error + foundZk = &v1beta1.ZookeeperCluster{} + next *v1beta1.ZookeeperCluster + svc *corev1.Service + ) + BeforeEach(func() { + z.WithDefaults() + next = z.DeepCopy() + next.Spec.TriggerRollingRestart = true + svc = zk.MakeClientService(z) + cl = fake.NewFakeClient([]runtime.Object{next, svc}...) + r = &ReconcileZookeeperCluster{client: cl, scheme: s, zkClient: mockZkClient} + res, err = r.Reconcile(req) + err = cl.Get(context.TODO(), req.NamespacedName, foundZk) + }) + + It("should update restartTime annotation and reset triggerRollingRestart to false when triggerRollingRestart is set to true", func() { + Ω(res.Requeue).To(Equal(true)) + Ω(err).To(BeNil()) + Ω(foundZk.Spec.TriggerRollingRestart).To(Equal(false)) + _, restartTimeExists := foundZk.Spec.Pod.Annotations["restartTime"] + Ω(restartTimeExists).To(Equal(true)) + }) + + It("should not update restartTime annotation when set triggerRollingRestart to false", func() { + _, restartTimeExists := foundZk.Spec.Pod.Annotations["restartTime"] + Ω(restartTimeExists).To(Equal(true)) + + next.Spec.TriggerRollingRestart = false + svc = zk.MakeClientService(z) + cl = fake.NewFakeClient([]runtime.Object{next, svc}...) + r = &ReconcileZookeeperCluster{client: cl, scheme: s, zkClient: mockZkClient} + res, err = r.Reconcile(req) + + Ω(res.Requeue).To(Equal(false)) + Ω(err).To(BeNil()) + Ω(foundZk.Spec.TriggerRollingRestart).To(Equal(false)) + _, restartTimeExists = foundZk.Spec.Pod.Annotations["restartTime"] + Ω(restartTimeExists).To(Equal(true)) + }) + + It("should update restartTime annotation to new value when rolling restart is triggered multiple times", func() { + oldRestartValue, restartTimeExists := foundZk.Spec.Pod.Annotations["restartTime"] + Ω(restartTimeExists).To(Equal(true)) + + // wait 1 second to ensure that restartTime, if set, will have a different value + time.Sleep(1 * time.Second) + + // update the crd instance + next.Spec.TriggerRollingRestart = false + svc = zk.MakeClientService(z) + cl = fake.NewFakeClient([]runtime.Object{next, svc}...) + r = &ReconcileZookeeperCluster{client: cl, scheme: s, zkClient: mockZkClient} + res, err = r.Reconcile(req) + err = cl.Get(context.TODO(), req.NamespacedName, foundZk) + + // check that restartTime was not updated + Ω(res.Requeue).To(Equal(false)) + Ω(err).To(BeNil()) + Ω(foundZk.Spec.TriggerRollingRestart).To(Equal(false)) + _, restartTimeExists = foundZk.Spec.Pod.Annotations["restartTime"] + Ω(restartTimeExists).To(Equal(true)) + + // wait 1 second to ensure that restartTime, if set, will have a different value + time.Sleep(1 * time.Second) + + // update the crd instance to trigger rolling restart + next.Spec.TriggerRollingRestart = true + svc = zk.MakeClientService(z) + cl = fake.NewFakeClient([]runtime.Object{next, svc}...) + r = &ReconcileZookeeperCluster{client: cl, scheme: s, zkClient: mockZkClient} + res, err = r.Reconcile(req) + err = cl.Get(context.TODO(), req.NamespacedName, foundZk) + + // check that restartTime was updated + Ω(res.Requeue).To(Equal(true)) + Ω(err).To(BeNil()) + Ω(foundZk.Spec.TriggerRollingRestart).To(Equal(false)) + newRestartValue, restartTimeExists := foundZk.Spec.Pod.Annotations["restartTime"] + Ω(restartTimeExists).To(Equal(true)) + Ω(oldRestartValue).NotTo(Equal(newRestartValue)) + }) + }) }) }) diff --git a/pkg/test/e2e/e2eutil/zookeepercluster_util.go b/pkg/test/e2e/e2eutil/zookeepercluster_util.go index 9ae06559c..8a306b8f3 100644 --- a/pkg/test/e2e/e2eutil/zookeepercluster_util.go +++ b/pkg/test/e2e/e2eutil/zookeepercluster_util.go @@ -231,6 +231,12 @@ func DeletePods(t *testing.T, f *framework.Framework, ctx *framework.TestCtx, z } return nil } +func GetPods(t *testing.T, f *framework.Framework, z *api.ZookeeperCluster) (*corev1.PodList, error) { + listOptions := metav1.ListOptions{ + LabelSelector: labels.SelectorFromSet(map[string]string{"app": z.GetName()}).String(), + } + return f.KubeClient.CoreV1().Pods(z.Namespace).List(goctx.TODO(), listOptions) +} func CheckAdminService(t *testing.T, f *framework.Framework, ctx *framework.TestCtx, z *api.ZookeeperCluster) error { listOptions := metav1.ListOptions{ LabelSelector: labels.SelectorFromSet(map[string]string{"app": z.GetName()}).String(), diff --git a/test/e2e/rolling_restart_test.go b/test/e2e/rolling_restart_test.go new file mode 100644 index 000000000..35fcedc47 --- /dev/null +++ b/test/e2e/rolling_restart_test.go @@ -0,0 +1,110 @@ +/** + * Copyright (c) 2018 Dell Inc., or its subsidiaries. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + */ + +package e2e + +import ( + "testing" + + . "github.com/onsi/gomega" + framework "github.com/operator-framework/operator-sdk/pkg/test" + zk_e2eutil "github.com/pravega/zookeeper-operator/pkg/test/e2e/e2eutil" + "time" +) + +func testRollingRestart(t *testing.T) { + g := NewGomegaWithT(t) + + doCleanup := true + ctx := framework.NewTestCtx(t) + defer func() { + if doCleanup { + ctx.Cleanup() + } + }() + + namespace, err := ctx.GetNamespace() + g.Expect(err).NotTo(HaveOccurred()) + f := framework.Global + + cluster := zk_e2eutil.NewDefaultCluster(namespace) + + cluster.WithDefaults() + cluster.Status.Init() + cluster.Spec.Persistence.VolumeReclaimPolicy = "Delete" + + zk, err := zk_e2eutil.CreateCluster(t, f, ctx, cluster) + g.Expect(err).NotTo(HaveOccurred()) + + // A default Zookeepercluster should have 3 replicas + podSize := 3 + start := time.Now().Minute()*60 + time.Now().Second() + err = zk_e2eutil.WaitForClusterToBecomeReady(t, f, ctx, zk, podSize) + clusterCreateDuration := time.Now().Minute()*60 + time.Now().Second() - start + g.Expect(err).NotTo(HaveOccurred()) + + // This is to get the latest Zookeeper cluster object + zk, err = zk_e2eutil.GetCluster(t, f, ctx, zk) + g.Expect(err).NotTo(HaveOccurred()) + podList, err := zk_e2eutil.GetPods(t, f, zk) + g.Expect(err).NotTo(HaveOccurred()) + for i := 0; i < len(podList.Items); i++ { + g.Expect(podList.Items[i].Annotations).NotTo(HaveKey("restartTime")) + } + g.Expect(zk.GetTriggerRollingRestart()).To(Equal(false)) + + // Trigger a rolling restart + zk.Spec.TriggerRollingRestart = true + err = zk_e2eutil.UpdateCluster(t, f, ctx, zk) + // zk_e2eutil.WaitForClusterToBecomeReady(...) will return as soon as any pod is restarted as the cluster is briefly reported to be healthy even though the restart is not completed. this method is hence called after a sleep to ensure that the restart has completed before asserting the test cases. + time.Sleep(time.Duration(clusterCreateDuration) * 2 * time.Second) + err = zk_e2eutil.WaitForClusterToBecomeReady(t, f, ctx, zk, podSize) + g.Expect(err).NotTo(HaveOccurred()) + + zk, err = zk_e2eutil.GetCluster(t, f, ctx, zk) + g.Expect(err).NotTo(HaveOccurred()) + newPodList, err := zk_e2eutil.GetPods(t, f, zk) + g.Expect(err).NotTo(HaveOccurred()) + var firstRestartTime []string + for i := 0; i < len(newPodList.Items); i++ { + g.Expect(newPodList.Items[i].Annotations).To(HaveKey("restartTime")) + firstRestartTime = append(firstRestartTime, newPodList.Items[i].Annotations["restartTime"]) + } + g.Expect(zk.GetTriggerRollingRestart()).To(Equal(false)) + + // Trigger a rolling restart again + zk.Spec.TriggerRollingRestart = true + err = zk_e2eutil.UpdateCluster(t, f, ctx, zk) + // zk_e2eutil.WaitForClusterToBecomeReady(...) will return as soon as any pod is restarted as the cluster is briefly reported to be healthy even though the complete restart is not completed. this method is hence called after a sleep to ensure that the restart has completed before asserting the test cases. + time.Sleep(time.Duration(clusterCreateDuration) * 2 * time.Second) + err = zk_e2eutil.WaitForClusterToBecomeReady(t, f, ctx, zk, podSize) + g.Expect(err).NotTo(HaveOccurred()) + + zk, err = zk_e2eutil.GetCluster(t, f, ctx, zk) + g.Expect(err).NotTo(HaveOccurred()) + newPodList2, err := zk_e2eutil.GetPods(t, f, zk) + g.Expect(err).NotTo(HaveOccurred()) + for i := 0; i < len(newPodList2.Items); i++ { + g.Expect(newPodList2.Items[i].Annotations).To(HaveKey("restartTime")) + g.Expect(newPodList2.Items[i].Annotations["restartTime"]).NotTo(Equal(firstRestartTime[i])) + } + g.Expect(zk.GetTriggerRollingRestart()).To(Equal(false)) + + // Delete cluster + err = zk_e2eutil.DeleteCluster(t, f, ctx, zk) + g.Expect(err).NotTo(HaveOccurred()) + + // No need to do cleanup since the cluster CR has already been deleted + doCleanup = false + + err = zk_e2eutil.WaitForClusterToTerminate(t, f, ctx, zk) + g.Expect(err).NotTo(HaveOccurred()) + +} diff --git a/test/e2e/zookeepercluster_test.go b/test/e2e/zookeepercluster_test.go index b378b3220..6ebce5683 100644 --- a/test/e2e/zookeepercluster_test.go +++ b/test/e2e/zookeepercluster_test.go @@ -64,6 +64,7 @@ func testZookeeperCluster(t *testing.T) { "testImagePullSecret": testImagePullSecret, "testScaleCluster": testScaleCluster, "testEphemeralStorage": testEphemeralStorage, + "testRollingRestart": testRollingRestart, } for name, f := range testFuncs {