diff --git a/cmd/main.go b/cmd/main.go index 2ee9e1d1b..72b00e0e5 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -43,6 +43,7 @@ import ( operatorv1alpha1 "sigs.k8s.io/cluster-api-operator/api/v1alpha1" operatorv1 "sigs.k8s.io/cluster-api-operator/api/v1alpha2" providercontroller "sigs.k8s.io/cluster-api-operator/internal/controller" + healtchcheckcontroller "sigs.k8s.io/cluster-api-operator/internal/controller/healthcheck" ) var ( @@ -184,6 +185,7 @@ func setupChecks(mgr ctrl.Manager) { } func setupReconcilers(mgr ctrl.Manager) { + // Generic provider reconcilers if err := (&providercontroller.GenericProviderReconciler{ Provider: &operatorv1.CoreProvider{}, ProviderList: &operatorv1.CoreProviderList{}, @@ -233,6 +235,47 @@ func setupReconcilers(mgr ctrl.Manager) { setupLog.Error(err, "unable to create controller", "controller", "AddonProvider") os.Exit(1) } + + // Provider Health Check Reconcilers + if err := (&healtchcheckcontroller.ProviderHealthCheckReconciler{ + Provider: &operatorv1.CoreProvider{}, + Client: mgr.GetClient(), + }).SetupWithManager(mgr, concurrency(concurrencyNumber)); err != nil { + setupLog.Error(err, "unable to create controller", "controller", "CoreProviderHealthCheck") + os.Exit(1) + } + + if err := (&healtchcheckcontroller.ProviderHealthCheckReconciler{ + Provider: &operatorv1.InfrastructureProvider{}, + Client: mgr.GetClient(), + }).SetupWithManager(mgr, concurrency(concurrencyNumber)); err != nil { + setupLog.Error(err, "unable to create controller", "controller", "InfrastructureProviderHealthCheck") + os.Exit(1) + } + + if err := (&healtchcheckcontroller.ProviderHealthCheckReconciler{ + Provider: &operatorv1.BootstrapProvider{}, + Client: mgr.GetClient(), + }).SetupWithManager(mgr, concurrency(concurrencyNumber)); err != nil { + setupLog.Error(err, "unable to create controller", "controller", "BootstrapProviderHealthCheck") + os.Exit(1) + } + + if err := (&healtchcheckcontroller.ProviderHealthCheckReconciler{ + Provider: &operatorv1.ControlPlaneProvider{}, + Client: mgr.GetClient(), + }).SetupWithManager(mgr, concurrency(concurrencyNumber)); err != nil { + setupLog.Error(err, "unable to create controller", "controller", "ControlPlaneProviderHealthCheck") + os.Exit(1) + } + + if err := (&healtchcheckcontroller.ProviderHealthCheckReconciler{ + Provider: &operatorv1.AddonProvider{}, + Client: mgr.GetClient(), + }).SetupWithManager(mgr, concurrency(concurrencyNumber)); err != nil { + setupLog.Error(err, "unable to create controller", "controller", "AddonProviderHealthCheck") + os.Exit(1) + } } func setupWebhooks(mgr ctrl.Manager) { diff --git a/internal/controller/genericprovider_controller.go b/internal/controller/genericprovider_controller.go index 04a179d1d..197f6a8d3 100644 --- a/internal/controller/genericprovider_controller.go +++ b/internal/controller/genericprovider_controller.go @@ -155,11 +155,7 @@ func patchProvider(ctx context.Context, provider genericprovider.GenericProvider operatorv1.ProviderInstalledCondition, } - conditions.SetSummary(provider, conditions.WithConditions(conds...)) - - options = append(options, - patch.WithOwnedConditions{Conditions: append(conds, clusterv1.ReadyCondition)}, - ) + options = append(options, patch.WithOwnedConditions{Conditions: conds}) return patchHelper.Patch(ctx, provider.GetObject(), options...) } diff --git a/internal/controller/healthcheck/healthcheck_controller.go b/internal/controller/healthcheck/healthcheck_controller.go new file mode 100644 index 000000000..f82f6e500 --- /dev/null +++ b/internal/controller/healthcheck/healthcheck_controller.go @@ -0,0 +1,163 @@ +/* +Copyright 2023 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package healthcheck + +import ( + "context" + "fmt" + "reflect" + "time" + + appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/labels" + operatorv1 "sigs.k8s.io/cluster-api-operator/api/v1alpha2" + "sigs.k8s.io/cluster-api-operator/internal/controller/genericprovider" + "sigs.k8s.io/cluster-api-operator/util" + clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" + "sigs.k8s.io/cluster-api/util/conditions" + "sigs.k8s.io/cluster-api/util/patch" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/controller" + "sigs.k8s.io/controller-runtime/pkg/reconcile" +) + +type ProviderHealthCheckReconciler struct { + Provider client.Object + Client client.Client +} + +func (r *ProviderHealthCheckReconciler) SetupWithManager(mgr ctrl.Manager, options controller.Options) error { + return ctrl.NewControllerManagedBy(mgr). + For(r.Provider). + WithOptions(options). + Complete(r) +} + +func (r *ProviderHealthCheckReconciler) Reconcile(ctx context.Context, req reconcile.Request) (_ reconcile.Result, reterr error) { + log := ctrl.LoggerFrom(ctx) + + log.Info("Checking provider health") + + result := ctrl.Result{} + + typedProvider, err := r.newGenericProvider() + if err != nil { + return result, err + } + + if err := r.Client.Get(ctx, req.NamespacedName, typedProvider.GetObject()); err != nil { + if apierrors.IsNotFound(err) { + // Object not found, return. Created objects are automatically garbage collected. + // For additional cleanup logic use finalizers. + return result, nil + } + // Error reading the object - requeue the request. + return result, err + } + + // Stop earlier if this provider is not installed yet. + if !conditions.IsTrue(typedProvider, operatorv1.ProviderInstalledCondition) { + return ctrl.Result{RequeueAfter: 5 * time.Second}, nil + } + + // Get Deployment based on the provider type. + providerName := util.GenerateProviderName(typedProvider) + namespace := typedProvider.GetNamespace() + deployments := &appsv1.DeploymentList{} + + if err := r.Client.List( + ctx, deployments, + client.InNamespace(namespace), + client.MatchingLabelsSelector{Selector: labels.Set{clusterv1.ProviderNameLabel: providerName}.AsSelector()}, + ); err != nil { + return result, fmt.Errorf("error fetching deployment for provider %s: %w", providerName, err) + } + + if len(deployments.Items) > 1 { + return result, fmt.Errorf("more than one deployments found for provider %s", providerName) + } + + if len(deployments.Items) == 0 { + return result, fmt.Errorf("no deployments found for provider %s", providerName) + } + + deployment := deployments.Items[0] + + deploymentCondition := getDeploymentCondition(deployment.Status, appsv1.DeploymentAvailable) + + // Compare existing Ready condition with the deployment condition and stop if they already match. + currentReadyCondition := conditions.Get(typedProvider, clusterv1.ReadyCondition) + if currentReadyCondition != nil && deploymentCondition != nil && currentReadyCondition.Status == deploymentCondition.Status { + return result, nil + } + + // Initialize the patch helper + patchHelper, err := patch.NewHelper(typedProvider.GetObject(), r.Client) + if err != nil { + return result, err + } + + if deploymentCondition != nil { + conditions.Set(typedProvider, &clusterv1.Condition{Type: clusterv1.ReadyCondition, Status: deploymentCondition.Status}) + } else { + conditions.Set(typedProvider, &clusterv1.Condition{Type: clusterv1.ReadyCondition, Status: corev1.ConditionFalse}) + } + + // Don't requeue immediately if the deployment is not ready, but rather wait 5 seconds. + if conditions.IsFalse(typedProvider, clusterv1.ReadyCondition) { + result = ctrl.Result{RequeueAfter: 5 * time.Second} + } + + options := patch.WithOwnedConditions{Conditions: []clusterv1.ConditionType{clusterv1.ReadyCondition}} + + return result, patchHelper.Patch(ctx, typedProvider.GetObject(), options) +} + +func (r *ProviderHealthCheckReconciler) newGenericProvider() (genericprovider.GenericProvider, error) { + switch r.Provider.(type) { + case *operatorv1.CoreProvider: + return &genericprovider.CoreProviderWrapper{CoreProvider: &operatorv1.CoreProvider{}}, nil + case *operatorv1.BootstrapProvider: + return &genericprovider.BootstrapProviderWrapper{BootstrapProvider: &operatorv1.BootstrapProvider{}}, nil + case *operatorv1.ControlPlaneProvider: + return &genericprovider.ControlPlaneProviderWrapper{ControlPlaneProvider: &operatorv1.ControlPlaneProvider{}}, nil + case *operatorv1.InfrastructureProvider: + return &genericprovider.InfrastructureProviderWrapper{InfrastructureProvider: &operatorv1.InfrastructureProvider{}}, nil + case *operatorv1.AddonProvider: + return &genericprovider.AddonProviderWrapper{AddonProvider: &operatorv1.AddonProvider{}}, nil + default: + providerKind := reflect.Indirect(reflect.ValueOf(r.Provider)).Type().Name() + failedToCastInterfaceErr := fmt.Errorf("failed to cast interface for type: %s", providerKind) + + return nil, failedToCastInterfaceErr + } +} + +// getDeploymentCondition returns the deployment condition with the provided type. +func getDeploymentCondition(status appsv1.DeploymentStatus, condType appsv1.DeploymentConditionType) *appsv1.DeploymentCondition { + for i := range status.Conditions { + c := status.Conditions[i] + if c.Type == condType { + return &c + } + } + + return nil +} diff --git a/internal/controller/phases.go b/internal/controller/phases.go index f4b90ed91..ce32c032d 100644 --- a/internal/controller/phases.go +++ b/internal/controller/phases.go @@ -480,7 +480,7 @@ func (p *phaseReconciler) delete(ctx context.Context) (reconcile.Result, error) clusterClient := p.newClusterClient() - p.clusterctlProvider.Name = clusterctlProviderName(p.provider).Name + p.clusterctlProvider.Name = util.GenerateProviderName(p.provider) p.clusterctlProvider.Namespace = p.provider.GetNamespace() p.clusterctlProvider.Type = string(util.ClusterctlProviderType(p.provider)) p.clusterctlProvider.ProviderName = p.provider.GetName() @@ -500,22 +500,6 @@ func (p *phaseReconciler) delete(ctx context.Context) (reconcile.Result, error) return reconcile.Result{}, wrapPhaseError(err, operatorv1.OldComponentsDeletionErrorReason) } -func clusterctlProviderName(provider genericprovider.GenericProvider) client.ObjectKey { - prefix := "" - switch provider.GetObject().(type) { - case *operatorv1.BootstrapProvider: - prefix = "bootstrap-" - case *operatorv1.ControlPlaneProvider: - prefix = "control-plane-" - case *operatorv1.InfrastructureProvider: - prefix = "infrastructure-" - case *operatorv1.AddonProvider: - prefix = "addon-" - } - - return client.ObjectKey{Name: prefix + provider.GetName(), Namespace: provider.GetNamespace()} -} - // newClusterClient returns a clusterctl client for interacting with management cluster. func (p *phaseReconciler) newClusterClient() cluster.Client { return cluster.New(cluster.Kubeconfig{}, p.configClient, cluster.InjectProxy(&controllerProxy{ diff --git a/util/util.go b/util/util.go index c9aac381b..c1a032611 100644 --- a/util/util.go +++ b/util/util.go @@ -44,3 +44,20 @@ func ClusterctlProviderType(genericProvider genericprovider.GenericProvider) clu return clusterctlv1.ProviderTypeUnknown } + +// GenerateProviderName generates provider name based on its type. +func GenerateProviderName(provider genericprovider.GenericProvider) string { + prefix := "" + switch provider.GetObject().(type) { + case *operatorv1.BootstrapProvider: + prefix = "bootstrap-" + case *operatorv1.ControlPlaneProvider: + prefix = "control-plane-" + case *operatorv1.InfrastructureProvider: + prefix = "infrastructure-" + case *operatorv1.AddonProvider: + prefix = "addon-" + } + + return prefix + provider.GetName() +}