From 81f261a54e3a9151e2d6a8ab724a1d89ac6abbad Mon Sep 17 00:00:00 2001 From: Mikhail Fedosin Date: Wed, 27 Sep 2023 20:30:08 +0200 Subject: [PATCH] Add provider healthcheck controller --- api/v1alpha2/conditions_consts.go | 5 +- cmd/main.go | 8 + .../controller/genericprovider_controller.go | 6 +- .../healthcheck/healthcheck_controller.go | 198 +++++++++++++++++ .../healthcheck_controller_test.go | 200 ++++++++++++++++++ internal/controller/healthcheck/suite_test.go | 77 +++++++ 6 files changed, 488 insertions(+), 6 deletions(-) create mode 100644 internal/controller/healthcheck/healthcheck_controller.go create mode 100644 internal/controller/healthcheck/healthcheck_controller_test.go create mode 100644 internal/controller/healthcheck/suite_test.go diff --git a/api/v1alpha2/conditions_consts.go b/api/v1alpha2/conditions_consts.go index f595e439f..e125eb8bb 100644 --- a/api/v1alpha2/conditions_consts.go +++ b/api/v1alpha2/conditions_consts.go @@ -44,7 +44,7 @@ const ( // CAPIVersionIncompatibilityReason documents that the provider version is incompatible with operator. CAPIVersionIncompatibilityReason = "CAPIVersionIncompatibility" - // ComponentsFetchErrorReason documents that an error occurred fetching the componets. + // ComponentsFetchErrorReason documents that an error occurred fetching the components. ComponentsFetchErrorReason = "ComponentsFetchError" // OldComponentsDeletionErrorReason documents that an error occurred deleting the old components prior to upgrading. @@ -55,6 +55,9 @@ const ( // InvalidGithubTokenReason documents that the provided github token is invalid. InvalidGithubTokenReason = "InvalidGithubTokenError" + + // NoDeploymentAvailableConditionReason documents that there is no Available condition for provider deployment yet. + NoDeploymentAvailableConditionReason = "NoDeploymentAvailableConditionReason" ) const ( diff --git a/cmd/main.go b/cmd/main.go index 2ee9e1d1b..381df1ab5 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -43,6 +43,7 @@ import ( operatorv1alpha1 "sigs.k8s.io/cluster-api-operator/api/v1alpha1" operatorv1 "sigs.k8s.io/cluster-api-operator/api/v1alpha2" providercontroller "sigs.k8s.io/cluster-api-operator/internal/controller" + healtchcheckcontroller "sigs.k8s.io/cluster-api-operator/internal/controller/healthcheck" ) var ( @@ -233,6 +234,13 @@ func setupReconcilers(mgr ctrl.Manager) { setupLog.Error(err, "unable to create controller", "controller", "AddonProvider") os.Exit(1) } + + if err := (&healtchcheckcontroller.ProviderHealthCheckReconciler{ + Client: mgr.GetClient(), + }).SetupWithManager(mgr, concurrency(concurrencyNumber)); err != nil { + setupLog.Error(err, "unable to create controller", "controller", "Healthcheck") + os.Exit(1) + } } func setupWebhooks(mgr ctrl.Manager) { diff --git a/internal/controller/genericprovider_controller.go b/internal/controller/genericprovider_controller.go index 04a179d1d..197f6a8d3 100644 --- a/internal/controller/genericprovider_controller.go +++ b/internal/controller/genericprovider_controller.go @@ -155,11 +155,7 @@ func patchProvider(ctx context.Context, provider genericprovider.GenericProvider operatorv1.ProviderInstalledCondition, } - conditions.SetSummary(provider, conditions.WithConditions(conds...)) - - options = append(options, - patch.WithOwnedConditions{Conditions: append(conds, clusterv1.ReadyCondition)}, - ) + options = append(options, patch.WithOwnedConditions{Conditions: conds}) return patchHelper.Patch(ctx, provider.GetObject(), options...) } diff --git a/internal/controller/healthcheck/healthcheck_controller.go b/internal/controller/healthcheck/healthcheck_controller.go new file mode 100644 index 000000000..1b5f14757 --- /dev/null +++ b/internal/controller/healthcheck/healthcheck_controller.go @@ -0,0 +1,198 @@ +/* +Copyright 2023 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package healthcheck + +import ( + "context" + "fmt" + "time" + + appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/types" + operatorv1 "sigs.k8s.io/cluster-api-operator/api/v1alpha2" + "sigs.k8s.io/cluster-api-operator/internal/controller/genericprovider" + clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" + "sigs.k8s.io/cluster-api/util/conditions" + "sigs.k8s.io/cluster-api/util/patch" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/builder" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/controller" + "sigs.k8s.io/controller-runtime/pkg/event" + "sigs.k8s.io/controller-runtime/pkg/predicate" + "sigs.k8s.io/controller-runtime/pkg/reconcile" +) + +type ProviderHealthCheckReconciler struct { + Client client.Client +} + +const ( + providerLabelKey = "cluster.x-k8s.io/provider" +) + +func (r *ProviderHealthCheckReconciler) SetupWithManager(mgr ctrl.Manager, options controller.Options) error { + return ctrl.NewControllerManagedBy(mgr). + For(&appsv1.Deployment{}, builder.WithPredicates(providerDeploymentPredicates())). + WithOptions(options). + Complete(r) +} + +func (r *ProviderHealthCheckReconciler) Reconcile(ctx context.Context, req reconcile.Request) (_ reconcile.Result, reterr error) { + log := ctrl.LoggerFrom(ctx) + + log.Info("Checking provider health") + + result := ctrl.Result{} + + deployment := &appsv1.Deployment{} + + if err := r.Client.Get(ctx, req.NamespacedName, deployment); err != nil { + // Error reading the object - requeue the request. + return result, err + } + + // There should be just one owner reference - to a Provider resource. + if len(deployment.GetOwnerReferences()) != 1 { + return result, fmt.Errorf("incorrect number of owner references for provider deployment %s", req.NamespacedName) + } + + deploymentOwner := deployment.GetOwnerReferences()[0] + + deploymentAvailableCondition := getDeploymentCondition(deployment.Status, appsv1.DeploymentAvailable) + + typedProvider, err := r.getGenericProvider(ctx, deploymentOwner.Kind, deploymentOwner.Name, req.Namespace) + if err != nil { + return result, err + } + + // Stop earlier if this provider is not fully installed yet. + if !conditions.IsTrue(typedProvider, operatorv1.ProviderInstalledCondition) { + return ctrl.Result{RequeueAfter: 5 * time.Second}, nil + } + + // Compare provider's Ready condition with the deployment's Available condition and stop if they already match. + currentReadyCondition := conditions.Get(typedProvider, clusterv1.ReadyCondition) + if currentReadyCondition != nil && deploymentAvailableCondition != nil && currentReadyCondition.Status == deploymentAvailableCondition.Status { + return result, nil + } + + // Initialize the patch helper + patchHelper, err := patch.NewHelper(typedProvider.GetObject(), r.Client) + if err != nil { + return result, err + } + + if deploymentAvailableCondition != nil { + conditions.Set(typedProvider, &clusterv1.Condition{ + Type: clusterv1.ReadyCondition, + Status: deploymentAvailableCondition.Status, + Reason: deploymentAvailableCondition.Reason, + }) + } else { + conditions.Set(typedProvider, &clusterv1.Condition{ + Type: clusterv1.ReadyCondition, + Status: corev1.ConditionFalse, + Reason: operatorv1.NoDeploymentAvailableConditionReason, + }) + } + + // Don't requeue immediately if the deployment is not ready, but rather wait 5 seconds. + if conditions.IsFalse(typedProvider, clusterv1.ReadyCondition) { + result = ctrl.Result{RequeueAfter: 5 * time.Second} + } + + options := patch.WithOwnedConditions{Conditions: []clusterv1.ConditionType{clusterv1.ReadyCondition}} + + return result, patchHelper.Patch(ctx, typedProvider.GetObject(), options) +} + +func (r *ProviderHealthCheckReconciler) getGenericProvider(ctx context.Context, providerKind, providerName, providerNamespace string) (genericprovider.GenericProvider, error) { + switch providerKind { + case "CoreProvider": + provider := &operatorv1.CoreProvider{} + if err := r.Client.Get(ctx, types.NamespacedName{Name: providerName, Namespace: providerNamespace}, provider); err != nil { + return nil, err + } + + return &genericprovider.CoreProviderWrapper{CoreProvider: provider}, nil + case "BootstrapProvider": + provider := &operatorv1.BootstrapProvider{} + if err := r.Client.Get(ctx, types.NamespacedName{Name: providerName, Namespace: providerNamespace}, provider); err != nil { + return nil, err + } + + return &genericprovider.BootstrapProviderWrapper{BootstrapProvider: provider}, nil + case "ControlPlaneProvider": + provider := &operatorv1.ControlPlaneProvider{} + if err := r.Client.Get(ctx, types.NamespacedName{Name: providerName, Namespace: providerNamespace}, provider); err != nil { + return nil, err + } + + return &genericprovider.ControlPlaneProviderWrapper{ControlPlaneProvider: provider}, nil + case "InfrastructureProvider": + provider := &operatorv1.InfrastructureProvider{} + if err := r.Client.Get(ctx, types.NamespacedName{Name: providerName, Namespace: providerNamespace}, provider); err != nil { + return nil, err + } + + return &genericprovider.InfrastructureProviderWrapper{InfrastructureProvider: provider}, nil + case "AddonProvider": + provider := &operatorv1.AddonProvider{} + if err := r.Client.Get(ctx, types.NamespacedName{Name: providerName, Namespace: providerNamespace}, provider); err != nil { + return nil, err + } + + return &genericprovider.AddonProviderWrapper{AddonProvider: provider}, nil + default: + return nil, fmt.Errorf("failed to cast interface for type: %s", providerKind) + } +} + +// getDeploymentCondition returns the deployment condition with the provided type. +func getDeploymentCondition(status appsv1.DeploymentStatus, condType appsv1.DeploymentConditionType) *appsv1.DeploymentCondition { + for i := range status.Conditions { + c := status.Conditions[i] + if c.Type == condType { + return &c + } + } + + return nil +} + +func providerDeploymentPredicates() predicate.Funcs { + isProviderDeployment := func(obj runtime.Object) bool { + clusterOperator, ok := obj.(*appsv1.Deployment) + if !ok { + panic("expected to get an of object of type appsv1.Deployment") + } + + _, found := clusterOperator.GetLabels()[providerLabelKey] + + return found + } + + return predicate.Funcs{ + CreateFunc: func(e event.CreateEvent) bool { return false }, + UpdateFunc: func(e event.UpdateEvent) bool { return isProviderDeployment(e.ObjectNew) }, + GenericFunc: func(e event.GenericEvent) bool { return false }, + DeleteFunc: func(e event.DeleteEvent) bool { return false }, + } +} diff --git a/internal/controller/healthcheck/healthcheck_controller_test.go b/internal/controller/healthcheck/healthcheck_controller_test.go new file mode 100644 index 000000000..9049c2ef5 --- /dev/null +++ b/internal/controller/healthcheck/healthcheck_controller_test.go @@ -0,0 +1,200 @@ +/* +Copyright 2023 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package healthcheck + +import ( + "testing" + + . "github.com/onsi/gomega" + appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" + "sigs.k8s.io/controller-runtime/pkg/client" + + operatorv1 "sigs.k8s.io/cluster-api-operator/api/v1alpha2" + "sigs.k8s.io/cluster-api-operator/internal/controller/genericprovider" +) + +const ( + testMetadata = ` +apiVersion: clusterctl.cluster.x-k8s.io/v1alpha3 +releaseSeries: + - major: 0 + minor: 4 + contract: v1alpha4 +` + testComponents = ` +apiVersion: apps/v1 +kind: Deployment +metadata: + labels: + cluster.x-k8s.io/provider: cluster-api + control-plane: controller-manager + name: capi-controller-manager + namespace: capi-system +spec: + replicas: 1 + selector: + matchLabels: + cluster.x-k8s.io/provider: cluster-api + control-plane: controller-manager + template: + metadata: + labels: + cluster.x-k8s.io/provider: cluster-api + control-plane: controller-manager + spec: + containers: + - image: gcr.io/google-samples/hello-app:1.0 + name: manager + ports: + - containerPort: 8080 + resources: + requests: + cpu: 200m +` + + testCurrentVersion = "v0.4.2" +) + +func insertDummyConfig(provider genericprovider.GenericProvider) { + spec := provider.GetSpec() + spec.FetchConfig = &operatorv1.FetchConfiguration{ + Selector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "test": "dummy-config", + }, + }, + } + provider.SetSpec(spec) +} + +func dummyConfigMap(ns, name string) *corev1.ConfigMap { + return &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: ns, + Labels: map[string]string{ + "test": "dummy-config", + }, + }, + Data: map[string]string{ + "metadata": testMetadata, + "components": testComponents, + }, + } +} + +func TestReconcilerReadyConditions(t *testing.T) { + testCases := []struct { + name string + expectedAvailability corev1.ConditionStatus + }{ + { + name: "correct CoreProvider", + expectedAvailability: corev1.ConditionTrue, + }, + { + name: "invalid CoreProvider", + expectedAvailability: corev1.ConditionFalse, + }, + } + + namespace := "capi-system" + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + g := NewWithT(t) + + provider := &genericprovider.CoreProviderWrapper{ + CoreProvider: &operatorv1.CoreProvider{ + ObjectMeta: metav1.ObjectMeta{ + Name: "cluster-api", + }, + Spec: operatorv1.CoreProviderSpec{ + ProviderSpec: operatorv1.ProviderSpec{ + Version: testCurrentVersion, + }, + }, + }, + } + + g.Expect(env.EnsureNamespaceExists(ctx, namespace)).To(Succeed()) + + g.Expect(env.CreateAndWait(ctx, dummyConfigMap(namespace, testCurrentVersion))).To(Succeed()) + + insertDummyConfig(provider) + provider.SetNamespace(namespace) + + g.Expect(env.CreateAndWait(ctx, provider.GetObject())).To(Succeed()) + + g.Eventually(func() bool { + deployment := &appsv1.Deployment{} + + if err := env.Client.Get(ctx, types.NamespacedName{ + Name: "capi-controller-manager", + Namespace: namespace, + }, deployment); err != nil { + return false + } + + deployment.Status.Conditions = []appsv1.DeploymentCondition{ + { + Type: appsv1.DeploymentAvailable, + Status: tc.expectedAvailability, + }, + } + + if err := env.Status().Update(ctx, deployment); err != nil { + return false + } + + return true + }, timeout).Should(BeTrue()) + + g.Eventually(func() bool { + if err := env.Get(ctx, client.ObjectKeyFromObject(provider.GetObject()), provider.GetObject()); err != nil { + return false + } + + for _, cond := range provider.GetStatus().Conditions { + if cond.Type == clusterv1.ReadyCondition { + t.Log(t.Name(), provider.GetName(), cond) + if cond.Status == tc.expectedAvailability { + return true + } + } + } + + return false + }, timeout).Should(BeTrue()) + + objs := []client.Object{provider.GetObject()} + + objs = append(objs, &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: testCurrentVersion, + Namespace: namespace, + }, + }) + + g.Expect(env.CleanupAndWait(ctx, objs...)).To(Succeed()) + }) + } +} diff --git a/internal/controller/healthcheck/suite_test.go b/internal/controller/healthcheck/suite_test.go new file mode 100644 index 000000000..04b4b69a4 --- /dev/null +++ b/internal/controller/healthcheck/suite_test.go @@ -0,0 +1,77 @@ +/* +Copyright 2023 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package healthcheck + +import ( + "fmt" + "os" + "testing" + "time" + + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/controller" + + operatorv1 "sigs.k8s.io/cluster-api-operator/api/v1alpha2" + providercontroller "sigs.k8s.io/cluster-api-operator/internal/controller" + "sigs.k8s.io/cluster-api-operator/internal/envtest" +) + +const ( + timeout = time.Second * 30 +) + +var ( + env *envtest.Environment + ctx = ctrl.SetupSignalHandler() +) + +func TestMain(m *testing.M) { + fmt.Println("Creating new test environment") + + env = envtest.New() + + if err := (&providercontroller.GenericProviderReconciler{ + Provider: &operatorv1.CoreProvider{}, + ProviderList: &operatorv1.CoreProviderList{}, + Client: env, + }).SetupWithManager(env.Manager, controller.Options{MaxConcurrentReconciles: 1}); err != nil { + panic(fmt.Sprintf("Failed to start CoreProviderReconciler: %v", err)) + } + + if err := (&ProviderHealthCheckReconciler{ + Client: env, + }).SetupWithManager(env.Manager, controller.Options{MaxConcurrentReconciles: 1}); err != nil { + panic(fmt.Sprintf("Failed to start Healthcheck controller: %v", err)) + } + + go func() { + if err := env.Start(ctx); err != nil { + panic(fmt.Sprintf("Failed to start the envtest manager: %v", err)) + } + }() + <-env.Manager.Elected() + + // Run tests + code := m.Run() + // Tearing down the test environment + if err := env.Stop(); err != nil { + panic(fmt.Sprintf("Failed to stop the envtest: %v", err)) + } + + // Report exit code + os.Exit(code) +}