diff --git a/Makefile b/Makefile index 0ea3d5dd22d..27815b463d3 100644 --- a/Makefile +++ b/Makefile @@ -24,15 +24,15 @@ CSV_VERSION?=0.10.0 # WORK_IMAGE can be set in the env to override calculated value WORK_TAG?=latest -WORK_IMAGE?=$(IMAGE_REGISTRY)/work:$(WORK_TAG) +export WORK_IMAGE?=$(IMAGE_REGISTRY)/work:$(WORK_TAG) # REGISTRATION_IMAGE can be set in the env to override calculated value REGISTRATION_TAG?=latest -REGISTRATION_IMAGE?=$(IMAGE_REGISTRY)/registration:$(REGISTRATION_TAG) +export REGISTRATION_IMAGE?=$(IMAGE_REGISTRY)/registration:$(REGISTRATION_TAG) # PLACEMENT_IMAGE can be set in the env to override calculated value PLACEMENT_TAG?=latest -PLACEMENT_IMAGE?=$(IMAGE_REGISTRY)/placement:$(PLACEMENT_TAG) +export PLACEMENT_IMAGE?=$(IMAGE_REGISTRY)/placement:$(PLACEMENT_TAG) OPERATOR_SDK?=$(PERMANENT_TMP_GOPATH)/bin/operator-sdk OPERATOR_SDK_VERSION?=v1.1.0 diff --git a/pkg/helpers/helpers.go b/pkg/helpers/helpers.go index b71cae4d225..02db5bf24eb 100644 --- a/pkg/helpers/helpers.go +++ b/pkg/helpers/helpers.go @@ -56,6 +56,8 @@ const ( FeatureGatesTypeValid = "ValidFeatureGates" FeatureGatesReasonAllValid = "FeatureGatesAllValid" FeatureGatesReasonInvalidExisting = "InvalidFeatureGatesExisting" + + KlusterletRebootstrapProgressing = "RebootstrapProgressing" ) // OperatorType represents the type of operator supported by the current controller, value could be diff --git a/pkg/operators/klusterlet/controllers/bootstrapcontroller/bootstrapcontroller.go b/pkg/operators/klusterlet/controllers/bootstrapcontroller/bootstrapcontroller.go index f521c5a4537..1b6a784b648 100644 --- a/pkg/operators/klusterlet/controllers/bootstrapcontroller/bootstrapcontroller.go +++ b/pkg/operators/klusterlet/controllers/bootstrapcontroller/bootstrapcontroller.go @@ -6,6 +6,7 @@ import ( "fmt" "time" + operatorv1client "open-cluster-management.io/api/client/operator/clientset/versioned/typed/operator/v1" operatorinformer "open-cluster-management.io/api/client/operator/informers/externalversions/operator/v1" operatorlister "open-cluster-management.io/api/client/operator/listers/operator/v1" "open-cluster-management.io/registration-operator/pkg/helpers" @@ -40,17 +41,20 @@ var BootstrapControllerSyncInterval = 5 * time.Minute type bootstrapController struct { kubeClient kubernetes.Interface klusterletLister operatorlister.KlusterletLister + klusterletClient operatorv1client.KlusterletInterface secretInformers map[string]coreinformer.SecretInformer } // NewBootstrapController returns a bootstrapController func NewBootstrapController( kubeClient kubernetes.Interface, + klusterletClient operatorv1client.KlusterletInterface, klusterletInformer operatorinformer.KlusterletInformer, secretInformers map[string]coreinformer.SecretInformer, recorder events.Recorder) factory.Controller { controller := &bootstrapController{ kubeClient: kubeClient, + klusterletClient: klusterletClient, klusterletLister: klusterletInformer.Lister(), secretInformers: secretInformers, } @@ -93,6 +97,18 @@ func (k *bootstrapController) sync(ctx context.Context, controllerContext factor return nil } + // handle rebootstrap if the klusterlet is in rebootstrapping state + klusterlet, err := k.klusterletLister.Get(klusterletName) + if err != nil { + return err + } + requeueFunc := func(duration time.Duration) { + controllerContext.Queue().AddAfter(queueKey, duration) + } + if meta.IsStatusConditionTrue(klusterlet.Status.Conditions, helpers.KlusterletRebootstrapProgressing) { + return k.processRebootstrap(ctx, agentNamespace, klusterletName, controllerContext.Recorder(), requeueFunc) + } + bootstrapHubKubeconfigSecret, err := k.secretInformers[helpers.BootstrapHubKubeConfig].Lister().Secrets(agentNamespace).Get(helpers.BootstrapHubKubeConfig) switch { case errors.IsNotFound(err): @@ -135,7 +151,7 @@ func (k *bootstrapController) sync(ctx context.Context, controllerContext factor !bytes.Equal(bootstrapKubeconfig.CertificateAuthorityData, hubKubeconfig.CertificateAuthorityData) { // the bootstrap kubeconfig secret is changed, reload the klusterlet agents reloadReason := fmt.Sprintf("the bootstrap secret %s/%s is changed", agentNamespace, helpers.BootstrapHubKubeConfig) - return k.reloadAgents(ctx, controllerContext, agentNamespace, klusterletName, reloadReason) + return k.startRebootstrap(ctx, klusterletName, reloadReason, controllerContext.Recorder(), requeueFunc) } expired, err := isHubKubeconfigSecretExpired(hubKubeconfigSecret) @@ -153,33 +169,70 @@ func (k *bootstrapController) sync(ctx context.Context, controllerContext factor // the hub kubeconfig secret cert is expired, reload klusterlet to restart bootstrap reloadReason := fmt.Sprintf("the hub kubeconfig secret %s/%s is expired", agentNamespace, helpers.HubKubeConfig) - return k.reloadAgents(ctx, controllerContext, agentNamespace, klusterletName, reloadReason) + return k.startRebootstrap(ctx, klusterletName, reloadReason, controllerContext.Recorder(), requeueFunc) } -// reloadAgents reload klusterlet agents by -// 1. make the registration agent re-bootstrap by deleting the current hub kubeconfig secret to -// 2. restart the registration and work agents to reload the new hub ca by deleting the agent deployments -func (k *bootstrapController) reloadAgents(ctx context.Context, ctrlContext factory.SyncContext, namespace, klusterletName, reason string) error { - if err := k.kubeClient.CoreV1().Secrets(namespace).Delete(ctx, helpers.HubKubeConfig, metav1.DeleteOptions{}); err != nil { +func (k *bootstrapController) processRebootstrap(ctx context.Context, agentNamespace, klusterletName string, recorder events.Recorder, requeueFunc func(time.Duration)) error { + deploymentName := fmt.Sprintf("%s-registration-agent", klusterletName) + deployment, err := k.kubeClient.AppsV1().Deployments(agentNamespace).Get(ctx, deploymentName, metav1.GetOptions{}) + if errors.IsNotFound(err) { + return k.completeRebootstrap(ctx, agentNamespace, klusterletName, recorder) + } + if err != nil { return err } - ctrlContext.Recorder().Eventf("HubKubeconfigSecretDeleted", fmt.Sprintf("the hub kubeconfig secret %s/%s is deleted due to %s", - namespace, helpers.HubKubeConfig, reason)) - registrationName := fmt.Sprintf("%s-registration-agent", klusterletName) - if err := k.kubeClient.AppsV1().Deployments(namespace).Delete(ctx, registrationName, metav1.DeleteOptions{}); err != nil { - return err + if deployment.Status.AvailableReplicas == 0 { + return k.completeRebootstrap(ctx, agentNamespace, klusterletName, recorder) } - ctrlContext.Recorder().Eventf("KlusterletAgentDeploymentDeleted", fmt.Sprintf("the deployment %s/%s is deleted due to %s", - namespace, registrationName, reason)) - workName := fmt.Sprintf("%s-work-agent", klusterletName) - if err := k.kubeClient.AppsV1().Deployments(namespace).Delete(ctx, workName, metav1.DeleteOptions{}); err != nil { + // there still is registation agent pod running. Resync in 5 seconds + requeueFunc(5 * time.Second) + return nil +} + +func (k *bootstrapController) startRebootstrap(ctx context.Context, klusterletName, message string, recorder events.Recorder, requeueFunc func(duration time.Duration)) error { + condition := metav1.Condition{ + Type: helpers.KlusterletRebootstrapProgressing, + Status: metav1.ConditionTrue, + Reason: "RebootstrapStarted", + Message: message, + } + _, _, err := helpers.UpdateKlusterletStatus(ctx, k.klusterletClient, klusterletName, + helpers.UpdateKlusterletConditionFn(condition), + ) + if err != nil { return err } - ctrlContext.Recorder().Eventf("KlusterletAgentDeploymentDeleted", fmt.Sprintf("the deployment %s/%s is deleted due to %s", - namespace, workName, reason)) + recorder.Eventf("KlusterletRebootstrap", fmt.Sprintf("The klusterlet %q starts rebootstrapping due to %s", + klusterletName, message)) + + // requeue and check the rebootstrap progress in 5 seconds + requeueFunc(5 * time.Second) + return nil +} +func (k *bootstrapController) completeRebootstrap(ctx context.Context, agentNamespace, klusterletName string, recorder events.Recorder) error { + // delete the existing hub kubeconfig + if err := k.kubeClient.CoreV1().Secrets(agentNamespace).Delete(ctx, helpers.HubKubeConfig, metav1.DeleteOptions{}); err != nil && !errors.IsNotFound(err) { + return err + } + recorder.Eventf("KlusterletRebootstrap", fmt.Sprintf("Secret %s/%s is deleted", agentNamespace, helpers.HubKubeConfig)) + + // update the condition of klusterlet + condition := metav1.Condition{ + Type: helpers.KlusterletRebootstrapProgressing, + Status: metav1.ConditionFalse, + Reason: "RebootstrapCompleted", + Message: fmt.Sprintf("Secret %s/%s is deleted and bootstrap is triggered", agentNamespace, helpers.HubKubeConfig), + } + _, _, err := helpers.UpdateKlusterletStatus(ctx, k.klusterletClient, klusterletName, + helpers.UpdateKlusterletConditionFn(condition), + ) + if err != nil { + return err + } + recorder.Eventf("KlusterletRebootstrap", fmt.Sprintf("Rebootstrap of the klusterlet %q is completed", klusterletName)) return nil } diff --git a/pkg/operators/klusterlet/controllers/bootstrapcontroller/bootstrapcontroller_test.go b/pkg/operators/klusterlet/controllers/bootstrapcontroller/bootstrapcontroller_test.go index af7664a3e69..f4578679460 100644 --- a/pkg/operators/klusterlet/controllers/bootstrapcontroller/bootstrapcontroller_test.go +++ b/pkg/operators/klusterlet/controllers/bootstrapcontroller/bootstrapcontroller_test.go @@ -7,17 +7,19 @@ import ( "crypto/x509" "crypto/x509/pkix" "encoding/pem" - "k8s.io/apimachinery/pkg/fields" - kubeinformers "k8s.io/client-go/informers" - corev1informers "k8s.io/client-go/informers/core/v1" "math/big" - "open-cluster-management.io/registration-operator/pkg/helpers" "testing" "time" + "k8s.io/apimachinery/pkg/api/meta" + "k8s.io/apimachinery/pkg/fields" + kubeinformers "k8s.io/client-go/informers" + corev1informers "k8s.io/client-go/informers/core/v1" + fakeoperatorclient "open-cluster-management.io/api/client/operator/clientset/versioned/fake" operatorinformers "open-cluster-management.io/api/client/operator/informers/externalversions" operatorapiv1 "open-cluster-management.io/api/operator/v1" + "open-cluster-management.io/registration-operator/pkg/helpers" testinghelper "open-cluster-management.io/registration-operator/pkg/helpers/testing" appsv1 "k8s.io/api/apps/v1" @@ -33,10 +35,12 @@ import ( func TestSync(t *testing.T) { cases := []struct { - name string - queueKey string - objects []runtime.Object - validateActions func(t *testing.T, actions []clienttesting.Action) + name string + queueKey string + initRebootstrapping bool + objects []runtime.Object + expectedRebootstrapping bool + validateActions func(t *testing.T, actions []clienttesting.Action) }{ { name: "the changed secret is not bootstrap secret", @@ -48,18 +52,17 @@ func TestSync(t *testing.T) { }, }, { - name: "checking the hub kubeconfig secret", + name: "client certificate expired", queueKey: "test/test", objects: []runtime.Object{ newSecret("bootstrap-hub-kubeconfig", "test", newKubeConfig("https://10.0.118.47:6443")), newHubKubeConfigSecret("test", time.Now().Add(-60*time.Second).UTC()), - newDeployment("test-registration-agent", "test"), - newDeployment("test-work-agent", "test"), }, + expectedRebootstrapping: true, validateActions: func(t *testing.T, actions []clienttesting.Action) { - testinghelper.AssertDelete(t, actions[0], "secrets", "test", "hub-kubeconfig-secret") - testinghelper.AssertDelete(t, actions[1], "deployments", "test", "test-registration-agent") - testinghelper.AssertDelete(t, actions[2], "deployments", "test", "test-work-agent") + if len(actions) != 0 { + t.Errorf("expected no actions happens, but got %#v", actions) + } }, }, { @@ -88,16 +91,42 @@ func TestSync(t *testing.T) { { name: "the bootstrap secret is changed", queueKey: "test/test", + objects: []runtime.Object{ + newSecret("bootstrap-hub-kubeconfig", "test", newKubeConfig("https://10.0.118.48:6443")), + newHubKubeConfigSecret("test", time.Now().Add(60*time.Second).UTC()), + }, + expectedRebootstrapping: true, + validateActions: func(t *testing.T, actions []clienttesting.Action) { + if len(actions) != 0 { + t.Errorf("expected no actions happens, but got %#v", actions) + } + }, + }, + { + name: "wait for scaling down", + queueKey: "test/test", + initRebootstrapping: true, + objects: []runtime.Object{ + newSecret("bootstrap-hub-kubeconfig", "test", newKubeConfig("https://10.0.118.48:6443")), + newHubKubeConfigSecret("test", time.Now().Add(60*time.Second).UTC()), + newDeploymentWithAvailableReplicas("test-registration-agent", "test", 1), + }, + expectedRebootstrapping: true, + validateActions: func(t *testing.T, actions []clienttesting.Action) { + testinghelper.AssertGet(t, actions[0], "apps", "v1", "deployments") + }, + }, + { + name: "rebootstrap is completed", + queueKey: "test/test", + initRebootstrapping: true, objects: []runtime.Object{ newSecret("bootstrap-hub-kubeconfig", "test", newKubeConfig("https://10.0.118.48:6443")), newHubKubeConfigSecret("test", time.Now().Add(60*time.Second).UTC()), newDeployment("test-registration-agent", "test"), - newDeployment("test-work-agent", "test"), }, validateActions: func(t *testing.T, actions []clienttesting.Action) { - testinghelper.AssertDelete(t, actions[0], "secrets", "test", "hub-kubeconfig-secret") - testinghelper.AssertDelete(t, actions[1], "deployments", "test", "test-registration-agent") - testinghelper.AssertDelete(t, actions[2], "deployments", "test", "test-work-agent") + testinghelper.AssertDelete(t, actions[1], "secrets", "test", "hub-kubeconfig-secret") }, }, } @@ -105,11 +134,20 @@ func TestSync(t *testing.T) { for _, c := range cases { t.Run(c.name, func(t *testing.T) { fakeKubeClient := fakekube.NewSimpleClientset(c.objects...) - - fakeOperatorClient := fakeoperatorclient.NewSimpleClientset() + klusterlet := newKlusterlet("test", "test") + if c.initRebootstrapping { + klusterlet.Status.Conditions = []metav1.Condition{ + { + Type: helpers.KlusterletRebootstrapProgressing, + Status: metav1.ConditionTrue, + }, + } + } + fakeOperatorClient := fakeoperatorclient.NewSimpleClientset(klusterlet) operatorInformers := operatorinformers.NewSharedInformerFactory(fakeOperatorClient, 5*time.Minute) + operatorStore := operatorInformers.Operator().V1().Klusterlets().Informer().GetStore() - if err := operatorStore.Add(newKlusterlet("test", "test")); err != nil { + if err := operatorStore.Add(klusterlet); err != nil { t.Fatal(err) } newOnTermInformer := func(name string) kubeinformers.SharedInformerFactory { @@ -150,6 +188,7 @@ func TestSync(t *testing.T) { controller := &bootstrapController{ kubeClient: fakeKubeClient, + klusterletClient: fakeOperatorClient.OperatorV1().Klusterlets(), klusterletLister: operatorInformers.Operator().V1().Klusterlets().Lister(), secretInformers: secretInformers, } @@ -160,6 +199,15 @@ func TestSync(t *testing.T) { } c.validateActions(t, fakeKubeClient.Actions()) + + klusterlet, err := fakeOperatorClient.OperatorV1().Klusterlets().Get(context.Background(), klusterlet.Name, metav1.GetOptions{}) + if err != nil { + t.Errorf("Expected no errors, but got %v", err) + } + rebootstrapping := meta.IsStatusConditionTrue(klusterlet.Status.Conditions, helpers.KlusterletRebootstrapProgressing) + if c.expectedRebootstrapping != rebootstrapping { + t.Errorf("Expected rebootstrapping is %v, but got %v", c.expectedRebootstrapping, rebootstrapping) + } }) } } @@ -310,3 +358,9 @@ func newDeployment(name, namespace string) *appsv1.Deployment { Spec: appsv1.DeploymentSpec{}, } } + +func newDeploymentWithAvailableReplicas(name, namespace string, availableReplicas int32) *appsv1.Deployment { + deploy := newDeployment(name, namespace) + deploy.Status.AvailableReplicas = availableReplicas + return deploy +} diff --git a/pkg/operators/klusterlet/controllers/klusterletcontroller/klusterlet_controller_test.go b/pkg/operators/klusterlet/controllers/klusterletcontroller/klusterlet_controller_test.go index 1153da2c544..71ca7cd6ce0 100644 --- a/pkg/operators/klusterlet/controllers/klusterletcontroller/klusterlet_controller_test.go +++ b/pkg/operators/klusterlet/controllers/klusterletcontroller/klusterlet_controller_test.go @@ -4,11 +4,12 @@ import ( "context" "crypto/sha256" "fmt" - "k8s.io/client-go/rest" "strings" "testing" "time" + "k8s.io/client-go/rest" + "github.com/openshift/library-go/pkg/operator/resource/resourceapply" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" @@ -737,8 +738,32 @@ func TestReplica(t *testing.T) { t.Errorf("Expected non error when sync, %v", err) } + // should have 3 replicas for clusters with multiple nodes assertRegistrationDeployment(t, controller.kubeClient.Actions(), "update", "", "cluster1", 3) assertWorkDeployment(t, controller.kubeClient.Actions(), "update", "cluster1", operatorapiv1.InstallModeDefault, 3) + + klusterlet = newKlusterlet("klusterlet", "testns", "cluster1") + klusterlet.Status.Conditions = []metav1.Condition{ + { + Type: helpers.KlusterletRebootstrapProgressing, + Status: metav1.ConditionTrue, + }, + } + if err := controller.operatorStore.Update(klusterlet); err != nil { + t.Fatal(err) + } + + controller.kubeClient.ClearActions() + controller.operatorClient.ClearActions() + + err = controller.controller.sync(context.TODO(), syncContext) + if err != nil { + t.Errorf("Expected non error when sync, %v", err) + } + + // should have 0 replicas for klusterlet in rebootstrapping state + assertRegistrationDeployment(t, controller.kubeClient.Actions(), "update", "", "cluster1", 0) + assertWorkDeployment(t, controller.kubeClient.Actions(), "update", "cluster1", operatorapiv1.InstallModeDefault, 0) } func TestClusterNameChange(t *testing.T) { diff --git a/pkg/operators/klusterlet/controllers/klusterletcontroller/klusterlet_runtime_reconcile.go b/pkg/operators/klusterlet/controllers/klusterletcontroller/klusterlet_runtime_reconcile.go index ca4be83cf83..661a5448d25 100644 --- a/pkg/operators/klusterlet/controllers/klusterletcontroller/klusterlet_runtime_reconcile.go +++ b/pkg/operators/klusterlet/controllers/klusterletcontroller/klusterlet_runtime_reconcile.go @@ -7,6 +7,8 @@ package klusterletcontroller import ( "context" "fmt" + "strings" + "github.com/openshift/library-go/pkg/assets" "github.com/openshift/library-go/pkg/operator/events" "github.com/openshift/library-go/pkg/operator/resource/resourceapply" @@ -17,7 +19,6 @@ import ( operatorapiv1 "open-cluster-management.io/api/operator/v1" "open-cluster-management.io/registration-operator/manifests" "open-cluster-management.io/registration-operator/pkg/helpers" - "strings" ) // runtimeReconcile ensure all runtime of klusterlet is applied @@ -39,6 +40,14 @@ func (r *runtimeReconcile) reconcile(ctx context.Context, klusterlet *operatorap } } + // Check if the klusterlet is in rebootstrapping state + // Both registration agent and work agent are scaled to 0 if the klusterlet is + // in rebootstrapping state. + runtimeConfig := config + if meta.IsStatusConditionTrue(klusterlet.Status.Conditions, helpers.KlusterletRebootstrapProgressing) { + runtimeConfig.Replica = 0 + } + // Deploy registration agent _, generationStatus, err := helpers.ApplyDeployment( ctx, @@ -50,7 +59,7 @@ func (r *runtimeReconcile) reconcile(ctx context.Context, klusterlet *operatorap if err != nil { return nil, err } - objData := assets.MustCreateAssetFromTemplate(name, template, config).Data + objData := assets.MustCreateAssetFromTemplate(name, template, runtimeConfig).Data helpers.SetRelatedResourcesStatusesWithObj(&klusterlet.Status.RelatedResources, objData) return objData, nil }, @@ -66,7 +75,7 @@ func (r *runtimeReconcile) reconcile(ctx context.Context, klusterlet *operatorap // If cluster name is empty, read cluster name from hub config secret. // registration-agent generated the cluster name and set it into hub config secret. - workConfig := config + workConfig := runtimeConfig if workConfig.ClusterName == "" { workConfig.ClusterName, err = r.getClusterNameFromHubKubeConfigSecret(ctx, config.AgentNamespace, klusterlet) if err != nil { @@ -75,11 +84,13 @@ func (r *runtimeReconcile) reconcile(ctx context.Context, klusterlet *operatorap } // Deploy work agent. - // * work agent is scaled to 0 only when degrade is true with the reason is HubKubeConfigSecretMissing. - // It is to ensure a fast startup of work agent when the klusterlet is bootstrapped at the first time. - // * The work agent should not be scaled to 0 in degraded condition with other reasons, - // because we still need work agent running even though the hub kubconfig is missing some certain permission. - // It can ensure work agent to clean up the resources defined in manifestworks when cluster is detaching from the hub. + // Work agent is scaled to 0 when + // 1). the klusterlet is in re-bootstrapping state; + // 2). degrade is true with the reason is HubKubeConfigSecretMissing. It is to ensure a fast startup of work + // agent when the klusterlet is bootstrapped at the first time. The work agent should not be scaled to 0 + // in degraded condition with other reasons, because we still need work agent running even though the hub + // kubconfig is missing some certain permission. It can ensure work agent to clean up the resources defined + // in manifestworks when cluster is detaching from the hub. hubConnectionDegradedCondition := meta.FindStatusCondition(klusterlet.Status.Conditions, hubConnectionDegraded) if hubConnectionDegradedCondition == nil { workConfig.Replica = 0 diff --git a/pkg/operators/klusterlet/options.go b/pkg/operators/klusterlet/options.go index 62186cc0728..8e30f47dce7 100644 --- a/pkg/operators/klusterlet/options.go +++ b/pkg/operators/klusterlet/options.go @@ -3,11 +3,12 @@ package klusterlet import ( "context" "io/ioutil" + "time" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/fields" corev1informers "k8s.io/client-go/informers/core/v1" "open-cluster-management.io/registration-operator/pkg/helpers" - "time" "github.com/openshift/library-go/pkg/controller/controllercmd" apiextensionsclient "k8s.io/apiextensions-apiserver/pkg/client/clientset/clientset" @@ -138,6 +139,7 @@ func (o *Options) RunKlusterletOperator(ctx context.Context, controllerContext * bootstrapController := bootstrapcontroller.NewBootstrapController( kubeClient, + operatorClient.OperatorV1().Klusterlets(), operatorInformer.Operator().V1().Klusterlets(), secretInformers, controllerContext.EventRecorder, diff --git a/test/e2e/common.go b/test/e2e/common.go index eb1b2a4833b..9cf7198baa9 100644 --- a/test/e2e/common.go +++ b/test/e2e/common.go @@ -165,8 +165,8 @@ func (t *Tester) CreateKlusterlet(name, clusterName, klusterletNamespace string, Name: name, }, Spec: operatorapiv1.KlusterletSpec{ - RegistrationImagePullSpec: "quay.io/open-cluster-management/registration:latest", - WorkImagePullSpec: "quay.io/open-cluster-management/work:latest", + RegistrationImagePullSpec: getRegistrationImage(), + WorkImagePullSpec: getWorkImage(), ExternalServerURLs: []operatorapiv1.ServerURL{ { URL: "https://localhost", @@ -255,8 +255,8 @@ func (t *Tester) CreatePureHostedKlusterlet(name, clusterName string) (*operator Name: name, }, Spec: operatorapiv1.KlusterletSpec{ - RegistrationImagePullSpec: "quay.io/open-cluster-management/registration:latest", - WorkImagePullSpec: "quay.io/open-cluster-management/work:latest", + RegistrationImagePullSpec: getRegistrationImage(), + WorkImagePullSpec: getWorkImage(), ExternalServerURLs: []operatorapiv1.ServerURL{ { URL: "https://localhost", @@ -702,3 +702,17 @@ func (t *Tester) CheckManagedClusterAddOnStatus(managedClusterNamespace, addOnNa return nil } + +func getRegistrationImage() string { + if image := os.Getenv("REGISTRATION_IMAGE"); len(image) > 0 { + return image + } + return "quay.io/open-cluster-management/registration:latest" +} + +func getWorkImage() string { + if image := os.Getenv("WORK_IMAGE"); len(image) > 0 { + return image + } + return "quay.io/open-cluster-management/work:latest" +} diff --git a/test/integration/klusterlet_test.go b/test/integration/klusterlet_test.go index e9584a49203..d7158542c99 100644 --- a/test/integration/klusterlet_test.go +++ b/test/integration/klusterlet_test.go @@ -14,6 +14,7 @@ import ( appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/rand" "k8s.io/client-go/rest" @@ -481,11 +482,15 @@ var _ = ginkgo.Describe("Klusterlet", func() { return true }, eventuallyTimeout, eventuallyInterval).Should(gomega.BeTrue()) - klusterlet, err = operatorClient.OperatorV1().Klusterlets().Get(context.Background(), klusterlet.Name, metav1.GetOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) - klusterlet.Spec.ClusterName = "cluster2" - _, err = operatorClient.OperatorV1().Klusterlets().Update(context.Background(), klusterlet, metav1.UpdateOptions{}) - gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Eventually(func() error { + klusterlet, err = operatorClient.OperatorV1().Klusterlets().Get(context.Background(), klusterlet.Name, metav1.GetOptions{}) + if err != nil { + return err + } + klusterlet.Spec.ClusterName = "cluster2" + _, err = operatorClient.OperatorV1().Klusterlets().Update(context.Background(), klusterlet, metav1.UpdateOptions{}) + return err + }, eventuallyTimeout, eventuallyInterval).Should(gomega.Succeed()) gomega.Eventually(func() bool { actual, err := kubeClient.AppsV1().Deployments(klusterletNamespace).Get(context.Background(), workDeploymentName, metav1.GetOptions{}) @@ -754,6 +759,15 @@ var _ = ginkgo.Describe("Klusterlet", func() { hubSecret.Data["kubeconfig"] = util.NewKubeConfig(&rest.Config{Host: "https://nohost"}) _, err = kubeClient.CoreV1().Secrets(klusterletNamespace).Update(context.Background(), hubSecret, metav1.UpdateOptions{}) gomega.Expect(err).ToNot(gomega.HaveOccurred()) + + // Update replica of deployment + registrationDeployment, err = kubeClient.AppsV1().Deployments(klusterletNamespace).Get(context.Background(), registrationDeploymentName, metav1.GetOptions{}) + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + registrationDeployment = registrationDeployment.DeepCopy() + registrationDeployment.Status.AvailableReplicas = 0 + _, err = kubeClient.AppsV1().Deployments(klusterletNamespace).UpdateStatus(context.Background(), registrationDeployment, metav1.UpdateOptions{}) + gomega.Expect(err).ToNot(gomega.HaveOccurred()) + util.AssertKlusterletCondition(klusterlet.Name, operatorClient, "HubConnectionDegraded", "BootstrapSecretFunctional,HubKubeConfigSecretMissing", metav1.ConditionTrue) }) @@ -798,7 +812,7 @@ var _ = ginkgo.Describe("Klusterlet", func() { }) }) - ginkgo.Context("bootstrap reconciliation", func() { + ginkgo.Context("rebootstrap", func() { ginkgo.BeforeEach(func() { registrationDeploymentName = fmt.Sprintf("%s-registration-agent", klusterlet.Name) workDeploymentName = fmt.Sprintf("%s-work-agent", klusterlet.Name) @@ -807,6 +821,73 @@ var _ = ginkgo.Describe("Klusterlet", func() { gomega.Expect(operatorClient.OperatorV1().Klusterlets().Delete(context.Background(), klusterlet.Name, metav1.DeleteOptions{})).To(gomega.BeNil()) }) + assertRebootstrap := func() { + // Check if the rebootstrap is started + gomega.Eventually(func() error { + klusterlet, err := operatorClient.OperatorV1().Klusterlets().Get(context.Background(), klusterlet.Name, metav1.GetOptions{}) + if err != nil { + return err + } + if meta.IsStatusConditionTrue(klusterlet.Status.Conditions, helpers.KlusterletRebootstrapProgressing) { + return nil + } + + return fmt.Errorf("Rebootstrap is not started yet") + }, eventuallyTimeout, eventuallyInterval).Should(gomega.Succeed()) + + // Make sure the deployments are scaled down to 0 during rebootstrapping; + gomega.Eventually(func() error { + // return if the rebootstrap is completed + klusterlet, err := operatorClient.OperatorV1().Klusterlets().Get(context.Background(), klusterlet.Name, metav1.GetOptions{}) + if err != nil { + return err + } + if meta.IsStatusConditionFalse(klusterlet.Status.Conditions, helpers.KlusterletRebootstrapProgressing) { + return nil + } + + // check the Replicas of deployments + registrationDeployment, err := kubeClient.AppsV1().Deployments(klusterletNamespace).Get(context.Background(), registrationDeploymentName, metav1.GetOptions{}) + if err != nil { + return err + } + + if *registrationDeployment.Spec.Replicas != 0 { + return fmt.Errorf("registrationDeployment.Spec.Replicas is not 0: %d", *registrationDeployment.Spec.Replicas) + } + + workDeployment, err := kubeClient.AppsV1().Deployments(klusterletNamespace).Get(context.Background(), workDeploymentName, metav1.GetOptions{}) + if err != nil { + return err + } + + if *workDeployment.Spec.Replicas != 0 { + return fmt.Errorf("workDeployment.Spec.Replicas is not 0: %d", *registrationDeployment.Spec.Replicas) + } + + return nil + }, eventuallyTimeout, eventuallyInterval).Should(gomega.Succeed()) + + // check if the rebootstrap is completed + gomega.Eventually(func() error { + klusterlet, err := operatorClient.OperatorV1().Klusterlets().Get(context.Background(), klusterlet.Name, metav1.GetOptions{}) + if err != nil { + return err + } + if !meta.IsStatusConditionFalse(klusterlet.Status.Conditions, helpers.KlusterletRebootstrapProgressing) { + return fmt.Errorf("Rebootstrap is not completed yet") + } + + _, err = kubeClient.CoreV1().Secrets(klusterletNamespace).Get(context.Background(), helpers.HubKubeConfig, metav1.GetOptions{}) + if errors.IsNotFound(err) { + return nil + } else if err != nil { + return err + } + return fmt.Errorf("hub kubeconfig secret %s/%s is not deleted yet", klusterletNamespace, helpers.HubKubeConfig) + }, eventuallyTimeout, eventuallyInterval).Should(gomega.Succeed()) + } + ginkgo.It("should reload the klusterlet after the bootstrap secret is changed", func() { _, err := operatorClient.OperatorV1().Klusterlets().Create(context.Background(), klusterlet, metav1.CreateOptions{}) gomega.Expect(err).NotTo(gomega.HaveOccurred()) @@ -839,19 +920,6 @@ var _ = ginkgo.Describe("Klusterlet", func() { return true }, eventuallyTimeout, eventuallyInterval).Should(gomega.BeTrue()) - // Get the deployments - var registrationDeployment *appsv1.Deployment - var workDeployment *appsv1.Deployment - gomega.Eventually(func() bool { - if registrationDeployment, err = kubeClient.AppsV1().Deployments(klusterletNamespace).Get(context.Background(), registrationDeploymentName, metav1.GetOptions{}); err != nil { - return false - } - if workDeployment, err = kubeClient.AppsV1().Deployments(klusterletNamespace).Get(context.Background(), workDeploymentName, metav1.GetOptions{}); err != nil { - return false - } - return true - }, eventuallyTimeout, eventuallyInterval).Should(gomega.BeTrue()) - // Change the bootstrap secret server address bootStrapSecret, err = kubeClient.CoreV1().Secrets(klusterletNamespace).Get(context.Background(), helpers.BootstrapHubKubeConfig, metav1.GetOptions{}) gomega.Expect(err).ToNot(gomega.HaveOccurred()) @@ -860,24 +928,7 @@ var _ = ginkgo.Describe("Klusterlet", func() { _, err = kubeClient.CoreV1().Secrets(klusterletNamespace).Update(context.Background(), bootStrapSecret, metav1.UpdateOptions{}) gomega.Expect(err).ToNot(gomega.HaveOccurred()) - // Make sure the deployments are deleted and recreated - gomega.Eventually(func() bool { - lastRegistrationDeployment, err := kubeClient.AppsV1().Deployments(klusterletNamespace).Get(context.Background(), registrationDeploymentName, metav1.GetOptions{}) - if err != nil { - return false - } - lastWorkDeployment, err := kubeClient.AppsV1().Deployments(klusterletNamespace).Get(context.Background(), workDeploymentName, metav1.GetOptions{}) - if err != nil { - return false - } - if registrationDeployment.UID == lastRegistrationDeployment.UID { - return false - } - if workDeployment.UID == lastWorkDeployment.UID { - return false - } - return true - }, eventuallyTimeout, eventuallyInterval).Should(gomega.BeTrue()) + assertRebootstrap() }) ginkgo.It("should reload the klusterlet after the hub secret is expired", func() { @@ -897,19 +948,6 @@ var _ = ginkgo.Describe("Klusterlet", func() { _, err = kubeClient.CoreV1().Secrets(klusterletNamespace).Create(context.Background(), bootStrapSecret, metav1.CreateOptions{}) gomega.Expect(err).ToNot(gomega.HaveOccurred()) - // Get the deployments - var registrationDeployment *appsv1.Deployment - var workDeployment *appsv1.Deployment - gomega.Eventually(func() bool { - if registrationDeployment, err = kubeClient.AppsV1().Deployments(klusterletNamespace).Get(context.Background(), registrationDeploymentName, metav1.GetOptions{}); err != nil { - return false - } - if workDeployment, err = kubeClient.AppsV1().Deployments(klusterletNamespace).Get(context.Background(), workDeploymentName, metav1.GetOptions{}); err != nil { - return false - } - return true - }, eventuallyTimeout, eventuallyInterval).Should(gomega.BeTrue()) - // Update the hub secret and make it same with the bootstrap secret gomega.Eventually(func() bool { hubSecret, err := kubeClient.CoreV1().Secrets(klusterletNamespace).Get(context.Background(), helpers.HubKubeConfig, metav1.GetOptions{}) @@ -926,24 +964,7 @@ var _ = ginkgo.Describe("Klusterlet", func() { return true }, eventuallyTimeout, eventuallyInterval).Should(gomega.BeTrue()) - // Make sure the deployments are deleted and recreated - gomega.Eventually(func() bool { - lastRegistrationDeployment, err := kubeClient.AppsV1().Deployments(klusterletNamespace).Get(context.Background(), registrationDeploymentName, metav1.GetOptions{}) - if err != nil { - return false - } - lastWorkDeployment, err := kubeClient.AppsV1().Deployments(klusterletNamespace).Get(context.Background(), workDeploymentName, metav1.GetOptions{}) - if err != nil { - return false - } - if registrationDeployment.UID == lastRegistrationDeployment.UID { - return false - } - if workDeployment.UID == lastWorkDeployment.UID { - return false - } - return true - }, eventuallyTimeout, eventuallyInterval).Should(gomega.BeTrue()) + assertRebootstrap() }) })