Skip to content

Commit

Permalink
Issue #348: trigger cluster restart via crd field (#373)
Browse files Browse the repository at this point in the history
* trigger cluster restart via crd field

Signed-off-by: aparajita.singh <[email protected]>

* added test cases

Signed-off-by: aparajita.singh <[email protected]>

* ran go fmt

Signed-off-by: aparajita.singh <[email protected]>

* rolling restart e2e test cases

Signed-off-by: aparajita.singh <[email protected]>

* Fix containerd CVE-2021-32760 (#374)

See: GHSA-c72p-9xmj-rx3w
Signed-off-by: Adi Muraru <[email protected]>
Signed-off-by: aparajita.singh <[email protected]>

* added unit tests

Signed-off-by: aparajita.singh <[email protected]>

* ran go fmt

Signed-off-by: aparajita.singh <[email protected]>

* ran go fmt

Signed-off-by: aparajita.singh <[email protected]>

* test bugfix

Signed-off-by: aparajita.singh <[email protected]>

* test case fix

Signed-off-by: aparajita.singh <[email protected]>

* test cases fix

Signed-off-by: aparajita.singh <[email protected]>

* fixed test case

Signed-off-by: aparajita.singh <[email protected]>

* ran go fmt

Signed-off-by: aparajita.singh <[email protected]>

* fixed tests

Signed-off-by: aparajita.singh <[email protected]>

* ran go fmt

Signed-off-by: aparajita.singh <[email protected]>

* comments

Signed-off-by: aparajita.singh <[email protected]>

* removed zk image

Signed-off-by: aparajita.singh <[email protected]>

* removed zk image

Signed-off-by: aparajita.singh <[email protected]>

* removed a println

Signed-off-by: aparajita.singh <[email protected]>

* review comments

Signed-off-by: aparajita.singh <[email protected]>

Co-authored-by: aparajita.singh <[email protected]>
Co-authored-by: Adrian Muraru <[email protected]>
  • Loading branch information
3 people authored Aug 13, 2021
1 parent 53669c1 commit 54a22af
Show file tree
Hide file tree
Showing 12 changed files with 260 additions and 0 deletions.
3 changes: 3 additions & 0 deletions charts/zookeeper-operator/templates/_crd_openapiv3schema.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -3615,6 +3615,9 @@ openAPIV3Schema:
will be using It can take either Ephemeral or persistence Default
StorageType is Persistence storage
type: string
triggerRollingRestart:
description: if set to true, triggers a cluster restart. this value will be auto-reverted to false by the operator once the restart is triggered.
type: boolean
volumeMounts:
description: VolumeMounts defines to support customized volumeMounts
items:
Expand Down
1 change: 1 addition & 0 deletions charts/zookeeper/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ The following table lists the configurable parameters of the zookeeper chart and
| Parameter | Description | Default |
| ----- | ----------- | ------ |
| `replicas` | Expected size of the zookeeper cluster (valid range is from 1 to 7) | `3` |
| `triggerRollingRestart` | If true, the zookeeper cluster is restarted. After the restart is triggered, this value is auto-reverted to false. | `false` |
| `image.repository` | Image repository | `pravega/zookeeper` |
| `image.tag` | Image tag | `0.2.12` |
| `image.pullPolicy` | Image pull policy | `IfNotPresent` |
Expand Down
3 changes: 3 additions & 0 deletions charts/zookeeper/templates/zookeeper.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,9 @@ spec:
{{- if .Values.ports }}
ports:
{{ toYaml .Values.ports | indent 4 }}
{{- end }}
{{- if .Values.triggerRollingRestart }}
triggerRollingRestart: {{ .Values.triggerRollingRestart }}
{{- end }}
pod:
{{- if .Values.pod.labels }}
Expand Down
2 changes: 2 additions & 0 deletions charts/zookeeper/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ image:
tag: 0.2.12
pullPolicy: IfNotPresent

triggerRollingRestart: false

domainName:
labels: {}
ports: []
Expand Down
3 changes: 3 additions & 0 deletions deploy/crds/zookeeper.pravega.io_zookeeperclusters_crd.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3659,6 +3659,9 @@ spec:
will be using It can take either Ephemeral or persistence Default
StorageType is Persistence storage
type: string
triggerRollingRestart:
description: if set to true, triggers a cluster restart. this value should be set to false by the operator once the restart completes for all pods.
type: boolean
volumeMounts:
description: VolumeMounts defines to support customized volumeMounts
items:
Expand Down
13 changes: 13 additions & 0 deletions pkg/apis/zookeeper/v1beta1/zookeepercluster_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,10 @@ type ZookeeperClusterSpec struct {
// for the zookeeper cluster.
ClientService ClientServicePolicy `json:"clientService,omitempty"`

// TriggerRollingRestart if set to true will instruct operator to restart all
// the pods in the zookeeper cluster, after which this value will be set to false
TriggerRollingRestart bool `json:"triggerRollingRestart,omitempty"`

// HeadlessService defines the policy to create headless Service
// for the zookeeper cluster.
HeadlessService HeadlessServicePolicy `json:"headlessService,omitempty"`
Expand Down Expand Up @@ -369,6 +373,15 @@ func (z *ZookeeperCluster) GetAdminServerServiceName() string {
return fmt.Sprintf("%s-admin-server", z.GetName())
}

func (z *ZookeeperCluster) GetTriggerRollingRestart() bool {
return z.Spec.TriggerRollingRestart
}

// set the value of triggerRollingRestart function
func (z *ZookeeperCluster) SetTriggerRollingRestart(val bool) {
z.Spec.TriggerRollingRestart = val
}

// Ports groups the ports for a zookeeper cluster node for easy access
type Ports struct {
Client int32
Expand Down
17 changes: 17 additions & 0 deletions pkg/apis/zookeeper/v1beta1/zookeepercluster_types_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,10 @@ var _ = Describe("ZookeeperCluster Types", func() {
Ω(z.Spec.Labels["release"]).To(Equal("example"))
})

It("should have a triggerRollingRestart set to false", func() {
Ω(z.GetTriggerRollingRestart()).To(Equal(false))
})

Context("Image", func() {
var i v1beta1.ContainerImage

Expand Down Expand Up @@ -334,4 +338,17 @@ var _ = Describe("ZookeeperCluster Types", func() {
Ω(p.AdminServer).To(BeEquivalentTo(8080))
})
})

Context("#TriggerRollingRestart is set", func() {
var t bool

BeforeEach(func() {
z.WithDefaults()
z.SetTriggerRollingRestart(true)
t = z.GetTriggerRollingRestart()
})
It("should return the value of triggerRollingRestart", func() {
Ω(t).To(BeEquivalentTo(true))
})
})
})
14 changes: 14 additions & 0 deletions pkg/controller/zookeepercluster/zookeepercluster_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,16 @@ func (r *ReconcileZookeeperCluster) Reconcile(request reconcile.Request) (reconc
return reconcile.Result{}, err
}
changed := instance.WithDefaults()
if instance.GetTriggerRollingRestart() {
r.log.Info("Restarting zookeeper cluster")
annotationkey, annotationvalue := getRollingRestartAnnotation()
if instance.Spec.Pod.Annotations == nil {
instance.Spec.Pod.Annotations = make(map[string]string)
}
instance.Spec.Pod.Annotations[annotationkey] = annotationvalue
instance.SetTriggerRollingRestart(false)
changed = true
}
if changed {
r.log.Info("Setting default settings for zookeeper-cluster")
if err := r.client.Update(context.TODO(), instance); err != nil {
Expand All @@ -164,6 +174,10 @@ func (r *ReconcileZookeeperCluster) Reconcile(request reconcile.Request) (reconc
return reconcile.Result{RequeueAfter: ReconcileTime}, nil
}

func getRollingRestartAnnotation() (string, string) {
return "restartTime", time.Now().Format(time.RFC850)
}

// compareResourceVersion compare resoure versions for the supplied ZookeeperCluster and StatefulSet
// resources
// Returns:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -609,5 +609,92 @@ var _ = Describe("ZookeeperCluster Controller", func() {
Ω(updated).To(Equal(0))
})
})

Context("trigger rolling restart", func() {
var (
cl client.Client
err error
foundZk = &v1beta1.ZookeeperCluster{}
next *v1beta1.ZookeeperCluster
svc *corev1.Service
)
BeforeEach(func() {
z.WithDefaults()
next = z.DeepCopy()
next.Spec.TriggerRollingRestart = true
svc = zk.MakeClientService(z)
cl = fake.NewFakeClient([]runtime.Object{next, svc}...)
r = &ReconcileZookeeperCluster{client: cl, scheme: s, zkClient: mockZkClient}
res, err = r.Reconcile(req)
err = cl.Get(context.TODO(), req.NamespacedName, foundZk)
})

It("should update restartTime annotation and reset triggerRollingRestart to false when triggerRollingRestart is set to true", func() {
Ω(res.Requeue).To(Equal(true))
Ω(err).To(BeNil())
Ω(foundZk.Spec.TriggerRollingRestart).To(Equal(false))
_, restartTimeExists := foundZk.Spec.Pod.Annotations["restartTime"]
Ω(restartTimeExists).To(Equal(true))
})

It("should not update restartTime annotation when set triggerRollingRestart to false", func() {
_, restartTimeExists := foundZk.Spec.Pod.Annotations["restartTime"]
Ω(restartTimeExists).To(Equal(true))

next.Spec.TriggerRollingRestart = false
svc = zk.MakeClientService(z)
cl = fake.NewFakeClient([]runtime.Object{next, svc}...)
r = &ReconcileZookeeperCluster{client: cl, scheme: s, zkClient: mockZkClient}
res, err = r.Reconcile(req)

Ω(res.Requeue).To(Equal(false))
Ω(err).To(BeNil())
Ω(foundZk.Spec.TriggerRollingRestart).To(Equal(false))
_, restartTimeExists = foundZk.Spec.Pod.Annotations["restartTime"]
Ω(restartTimeExists).To(Equal(true))
})

It("should update restartTime annotation to new value when rolling restart is triggered multiple times", func() {
oldRestartValue, restartTimeExists := foundZk.Spec.Pod.Annotations["restartTime"]
Ω(restartTimeExists).To(Equal(true))

// wait 1 second to ensure that restartTime, if set, will have a different value
time.Sleep(1 * time.Second)

// update the crd instance
next.Spec.TriggerRollingRestart = false
svc = zk.MakeClientService(z)
cl = fake.NewFakeClient([]runtime.Object{next, svc}...)
r = &ReconcileZookeeperCluster{client: cl, scheme: s, zkClient: mockZkClient}
res, err = r.Reconcile(req)
err = cl.Get(context.TODO(), req.NamespacedName, foundZk)

// check that restartTime was not updated
Ω(res.Requeue).To(Equal(false))
Ω(err).To(BeNil())
Ω(foundZk.Spec.TriggerRollingRestart).To(Equal(false))
_, restartTimeExists = foundZk.Spec.Pod.Annotations["restartTime"]
Ω(restartTimeExists).To(Equal(true))

// wait 1 second to ensure that restartTime, if set, will have a different value
time.Sleep(1 * time.Second)

// update the crd instance to trigger rolling restart
next.Spec.TriggerRollingRestart = true
svc = zk.MakeClientService(z)
cl = fake.NewFakeClient([]runtime.Object{next, svc}...)
r = &ReconcileZookeeperCluster{client: cl, scheme: s, zkClient: mockZkClient}
res, err = r.Reconcile(req)
err = cl.Get(context.TODO(), req.NamespacedName, foundZk)

// check that restartTime was updated
Ω(res.Requeue).To(Equal(true))
Ω(err).To(BeNil())
Ω(foundZk.Spec.TriggerRollingRestart).To(Equal(false))
newRestartValue, restartTimeExists := foundZk.Spec.Pod.Annotations["restartTime"]
Ω(restartTimeExists).To(Equal(true))
Ω(oldRestartValue).NotTo(Equal(newRestartValue))
})
})
})
})
6 changes: 6 additions & 0 deletions pkg/test/e2e/e2eutil/zookeepercluster_util.go
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,12 @@ func DeletePods(t *testing.T, f *framework.Framework, ctx *framework.TestCtx, z
}
return nil
}
func GetPods(t *testing.T, f *framework.Framework, z *api.ZookeeperCluster) (*corev1.PodList, error) {
listOptions := metav1.ListOptions{
LabelSelector: labels.SelectorFromSet(map[string]string{"app": z.GetName()}).String(),
}
return f.KubeClient.CoreV1().Pods(z.Namespace).List(goctx.TODO(), listOptions)
}
func CheckAdminService(t *testing.T, f *framework.Framework, ctx *framework.TestCtx, z *api.ZookeeperCluster) error {
listOptions := metav1.ListOptions{
LabelSelector: labels.SelectorFromSet(map[string]string{"app": z.GetName()}).String(),
Expand Down
110 changes: 110 additions & 0 deletions test/e2e/rolling_restart_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
/**
* Copyright (c) 2018 Dell Inc., or its subsidiaries. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*/

package e2e

import (
"testing"

. "github.com/onsi/gomega"
framework "github.com/operator-framework/operator-sdk/pkg/test"
zk_e2eutil "github.com/pravega/zookeeper-operator/pkg/test/e2e/e2eutil"
"time"
)

func testRollingRestart(t *testing.T) {
g := NewGomegaWithT(t)

doCleanup := true
ctx := framework.NewTestCtx(t)
defer func() {
if doCleanup {
ctx.Cleanup()
}
}()

namespace, err := ctx.GetNamespace()
g.Expect(err).NotTo(HaveOccurred())
f := framework.Global

cluster := zk_e2eutil.NewDefaultCluster(namespace)

cluster.WithDefaults()
cluster.Status.Init()
cluster.Spec.Persistence.VolumeReclaimPolicy = "Delete"

zk, err := zk_e2eutil.CreateCluster(t, f, ctx, cluster)
g.Expect(err).NotTo(HaveOccurred())

// A default Zookeepercluster should have 3 replicas
podSize := 3
start := time.Now().Minute()*60 + time.Now().Second()
err = zk_e2eutil.WaitForClusterToBecomeReady(t, f, ctx, zk, podSize)
clusterCreateDuration := time.Now().Minute()*60 + time.Now().Second() - start
g.Expect(err).NotTo(HaveOccurred())

// This is to get the latest Zookeeper cluster object
zk, err = zk_e2eutil.GetCluster(t, f, ctx, zk)
g.Expect(err).NotTo(HaveOccurred())
podList, err := zk_e2eutil.GetPods(t, f, zk)
g.Expect(err).NotTo(HaveOccurred())
for i := 0; i < len(podList.Items); i++ {
g.Expect(podList.Items[i].Annotations).NotTo(HaveKey("restartTime"))
}
g.Expect(zk.GetTriggerRollingRestart()).To(Equal(false))

// Trigger a rolling restart
zk.Spec.TriggerRollingRestart = true
err = zk_e2eutil.UpdateCluster(t, f, ctx, zk)
// zk_e2eutil.WaitForClusterToBecomeReady(...) will return as soon as any pod is restarted as the cluster is briefly reported to be healthy even though the restart is not completed. this method is hence called after a sleep to ensure that the restart has completed before asserting the test cases.
time.Sleep(time.Duration(clusterCreateDuration) * 2 * time.Second)
err = zk_e2eutil.WaitForClusterToBecomeReady(t, f, ctx, zk, podSize)
g.Expect(err).NotTo(HaveOccurred())

zk, err = zk_e2eutil.GetCluster(t, f, ctx, zk)
g.Expect(err).NotTo(HaveOccurred())
newPodList, err := zk_e2eutil.GetPods(t, f, zk)
g.Expect(err).NotTo(HaveOccurred())
var firstRestartTime []string
for i := 0; i < len(newPodList.Items); i++ {
g.Expect(newPodList.Items[i].Annotations).To(HaveKey("restartTime"))
firstRestartTime = append(firstRestartTime, newPodList.Items[i].Annotations["restartTime"])
}
g.Expect(zk.GetTriggerRollingRestart()).To(Equal(false))

// Trigger a rolling restart again
zk.Spec.TriggerRollingRestart = true
err = zk_e2eutil.UpdateCluster(t, f, ctx, zk)
// zk_e2eutil.WaitForClusterToBecomeReady(...) will return as soon as any pod is restarted as the cluster is briefly reported to be healthy even though the complete restart is not completed. this method is hence called after a sleep to ensure that the restart has completed before asserting the test cases.
time.Sleep(time.Duration(clusterCreateDuration) * 2 * time.Second)
err = zk_e2eutil.WaitForClusterToBecomeReady(t, f, ctx, zk, podSize)
g.Expect(err).NotTo(HaveOccurred())

zk, err = zk_e2eutil.GetCluster(t, f, ctx, zk)
g.Expect(err).NotTo(HaveOccurred())
newPodList2, err := zk_e2eutil.GetPods(t, f, zk)
g.Expect(err).NotTo(HaveOccurred())
for i := 0; i < len(newPodList2.Items); i++ {
g.Expect(newPodList2.Items[i].Annotations).To(HaveKey("restartTime"))
g.Expect(newPodList2.Items[i].Annotations["restartTime"]).NotTo(Equal(firstRestartTime[i]))
}
g.Expect(zk.GetTriggerRollingRestart()).To(Equal(false))

// Delete cluster
err = zk_e2eutil.DeleteCluster(t, f, ctx, zk)
g.Expect(err).NotTo(HaveOccurred())

// No need to do cleanup since the cluster CR has already been deleted
doCleanup = false

err = zk_e2eutil.WaitForClusterToTerminate(t, f, ctx, zk)
g.Expect(err).NotTo(HaveOccurred())

}
1 change: 1 addition & 0 deletions test/e2e/zookeepercluster_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ func testZookeeperCluster(t *testing.T) {
"testImagePullSecret": testImagePullSecret,
"testScaleCluster": testScaleCluster,
"testEphemeralStorage": testEphemeralStorage,
"testRollingRestart": testRollingRestart,
}

for name, f := range testFuncs {
Expand Down

0 comments on commit 54a22af

Please sign in to comment.