Skip to content

Commit

Permalink
Add provider healthcheck controller
Browse files Browse the repository at this point in the history
  • Loading branch information
Fedosin committed Sep 12, 2023
1 parent 2f40926 commit cc75bdc
Show file tree
Hide file tree
Showing 5 changed files with 225 additions and 22 deletions.
43 changes: 43 additions & 0 deletions cmd/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ import (
operatorv1alpha1 "sigs.k8s.io/cluster-api-operator/api/v1alpha1"
operatorv1 "sigs.k8s.io/cluster-api-operator/api/v1alpha2"
providercontroller "sigs.k8s.io/cluster-api-operator/internal/controller"
healtchcheckcontroller "sigs.k8s.io/cluster-api-operator/internal/controller/healthcheck"
)

var (
Expand Down Expand Up @@ -184,6 +185,7 @@ func setupChecks(mgr ctrl.Manager) {
}

func setupReconcilers(mgr ctrl.Manager) {
// Generic provider reconcilers
if err := (&providercontroller.GenericProviderReconciler{
Provider: &operatorv1.CoreProvider{},
ProviderList: &operatorv1.CoreProviderList{},
Expand Down Expand Up @@ -233,6 +235,47 @@ func setupReconcilers(mgr ctrl.Manager) {
setupLog.Error(err, "unable to create controller", "controller", "AddonProvider")
os.Exit(1)
}

// Provider Health Check Reconcilers
if err := (&healtchcheckcontroller.ProviderHealthCheckReconciler{
Provider: &operatorv1.CoreProvider{},
Client: mgr.GetClient(),
}).SetupWithManager(mgr, concurrency(concurrencyNumber)); err != nil {
setupLog.Error(err, "unable to create controller", "controller", "CoreProviderHealthCheck")
os.Exit(1)
}

if err := (&healtchcheckcontroller.ProviderHealthCheckReconciler{
Provider: &operatorv1.InfrastructureProvider{},
Client: mgr.GetClient(),
}).SetupWithManager(mgr, concurrency(concurrencyNumber)); err != nil {
setupLog.Error(err, "unable to create controller", "controller", "InfrastructureProviderHealthCheck")
os.Exit(1)
}

if err := (&healtchcheckcontroller.ProviderHealthCheckReconciler{
Provider: &operatorv1.BootstrapProvider{},
Client: mgr.GetClient(),
}).SetupWithManager(mgr, concurrency(concurrencyNumber)); err != nil {
setupLog.Error(err, "unable to create controller", "controller", "BootstrapProviderHealthCheck")
os.Exit(1)
}

if err := (&healtchcheckcontroller.ProviderHealthCheckReconciler{
Provider: &operatorv1.ControlPlaneProvider{},
Client: mgr.GetClient(),
}).SetupWithManager(mgr, concurrency(concurrencyNumber)); err != nil {
setupLog.Error(err, "unable to create controller", "controller", "ControlPlaneProviderHealthCheck")
os.Exit(1)
}

if err := (&healtchcheckcontroller.ProviderHealthCheckReconciler{
Provider: &operatorv1.AddonProvider{},
Client: mgr.GetClient(),
}).SetupWithManager(mgr, concurrency(concurrencyNumber)); err != nil {
setupLog.Error(err, "unable to create controller", "controller", "AddonProviderHealthCheck")
os.Exit(1)
}
}

func setupWebhooks(mgr ctrl.Manager) {
Expand Down
6 changes: 1 addition & 5 deletions internal/controller/genericprovider_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -155,11 +155,7 @@ func patchProvider(ctx context.Context, provider genericprovider.GenericProvider
operatorv1.ProviderInstalledCondition,
}

conditions.SetSummary(provider, conditions.WithConditions(conds...))

options = append(options,
patch.WithOwnedConditions{Conditions: append(conds, clusterv1.ReadyCondition)},
)
options = append(options, patch.WithOwnedConditions{Conditions: conds})

return patchHelper.Patch(ctx, provider.GetObject(), options...)
}
Expand Down
163 changes: 163 additions & 0 deletions internal/controller/healthcheck/healthcheck_controller.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
/*
Copyright 2023 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package healthcheck

import (
"context"
"fmt"
"reflect"
"time"

appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/labels"
operatorv1 "sigs.k8s.io/cluster-api-operator/api/v1alpha2"
"sigs.k8s.io/cluster-api-operator/internal/controller/genericprovider"
"sigs.k8s.io/cluster-api-operator/util"
clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
"sigs.k8s.io/cluster-api/util/conditions"
"sigs.k8s.io/cluster-api/util/patch"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/controller"
"sigs.k8s.io/controller-runtime/pkg/reconcile"
)

type ProviderHealthCheckReconciler struct {
Provider client.Object
Client client.Client
}

func (r *ProviderHealthCheckReconciler) SetupWithManager(mgr ctrl.Manager, options controller.Options) error {
return ctrl.NewControllerManagedBy(mgr).
For(r.Provider).
WithOptions(options).
Complete(r)
}

func (r *ProviderHealthCheckReconciler) Reconcile(ctx context.Context, req reconcile.Request) (_ reconcile.Result, reterr error) {
log := ctrl.LoggerFrom(ctx)

log.Info("Checking provider health")

result := ctrl.Result{}

typedProvider, err := r.newGenericProvider()
if err != nil {
return result, err
}

if err := r.Client.Get(ctx, req.NamespacedName, typedProvider.GetObject()); err != nil {
if apierrors.IsNotFound(err) {
// Object not found, return. Created objects are automatically garbage collected.
// For additional cleanup logic use finalizers.
return result, nil
}
// Error reading the object - requeue the request.
return result, err
}

// Stop earlier if this provider is not installed yet.
if !conditions.IsTrue(typedProvider, operatorv1.ProviderInstalledCondition) {
return ctrl.Result{RequeueAfter: 5 * time.Second}, nil
}

// Get Deployment based on the provider type.
providerName := util.GenerateProviderName(typedProvider)
namespace := typedProvider.GetNamespace()
deployments := &appsv1.DeploymentList{}

if err := r.Client.List(
ctx, deployments,
client.InNamespace(namespace),
client.MatchingLabelsSelector{Selector: labels.Set{clusterv1.ProviderNameLabel: providerName}.AsSelector()},
); err != nil {
return result, fmt.Errorf("error fetching deployment for provider %s: %w", providerName, err)
}

if len(deployments.Items) > 1 {
return result, fmt.Errorf("more than one deployments found for provider %s", providerName)
}

if len(deployments.Items) == 0 {
return result, fmt.Errorf("no deployments found for provider %s", providerName)
}

deployment := deployments.Items[0]

deploymentCondition := getDeploymentCondition(deployment.Status, appsv1.DeploymentAvailable)

// Compare existing Ready condition with the deployment condition and stop if they already match.
currentReadyCondition := conditions.Get(typedProvider, clusterv1.ReadyCondition)
if currentReadyCondition != nil && deploymentCondition != nil && currentReadyCondition.Status == deploymentCondition.Status {
return result, nil
}

// Initialize the patch helper
patchHelper, err := patch.NewHelper(typedProvider.GetObject(), r.Client)
if err != nil {
return result, err
}

if deploymentCondition != nil {
conditions.Set(typedProvider, &clusterv1.Condition{Type: clusterv1.ReadyCondition, Status: deploymentCondition.Status})
} else {
conditions.Set(typedProvider, &clusterv1.Condition{Type: clusterv1.ReadyCondition, Status: corev1.ConditionFalse})
}

// Don't requeue immediately if the deployment is not ready, but rather wait 5 seconds.
if conditions.IsFalse(typedProvider, clusterv1.ReadyCondition) {
result = ctrl.Result{RequeueAfter: 5 * time.Second}
}

options := patch.WithOwnedConditions{Conditions: []clusterv1.ConditionType{clusterv1.ReadyCondition}}

return result, patchHelper.Patch(ctx, typedProvider.GetObject(), options)
}

func (r *ProviderHealthCheckReconciler) newGenericProvider() (genericprovider.GenericProvider, error) {
switch r.Provider.(type) {
case *operatorv1.CoreProvider:
return &genericprovider.CoreProviderWrapper{CoreProvider: &operatorv1.CoreProvider{}}, nil
case *operatorv1.BootstrapProvider:
return &genericprovider.BootstrapProviderWrapper{BootstrapProvider: &operatorv1.BootstrapProvider{}}, nil
case *operatorv1.ControlPlaneProvider:
return &genericprovider.ControlPlaneProviderWrapper{ControlPlaneProvider: &operatorv1.ControlPlaneProvider{}}, nil
case *operatorv1.InfrastructureProvider:
return &genericprovider.InfrastructureProviderWrapper{InfrastructureProvider: &operatorv1.InfrastructureProvider{}}, nil
case *operatorv1.AddonProvider:
return &genericprovider.AddonProviderWrapper{AddonProvider: &operatorv1.AddonProvider{}}, nil
default:
providerKind := reflect.Indirect(reflect.ValueOf(r.Provider)).Type().Name()
failedToCastInterfaceErr := fmt.Errorf("failed to cast interface for type: %s", providerKind)

return nil, failedToCastInterfaceErr
}
}

// getDeploymentCondition returns the deployment condition with the provided type.
func getDeploymentCondition(status appsv1.DeploymentStatus, condType appsv1.DeploymentConditionType) *appsv1.DeploymentCondition {
for i := range status.Conditions {
c := status.Conditions[i]
if c.Type == condType {
return &c
}
}

return nil
}
18 changes: 1 addition & 17 deletions internal/controller/phases.go
Original file line number Diff line number Diff line change
Expand Up @@ -480,7 +480,7 @@ func (p *phaseReconciler) delete(ctx context.Context) (reconcile.Result, error)

clusterClient := p.newClusterClient()

p.clusterctlProvider.Name = clusterctlProviderName(p.provider).Name
p.clusterctlProvider.Name = util.GenerateProviderName(p.provider)
p.clusterctlProvider.Namespace = p.provider.GetNamespace()
p.clusterctlProvider.Type = string(util.ClusterctlProviderType(p.provider))
p.clusterctlProvider.ProviderName = p.provider.GetName()
Expand All @@ -500,22 +500,6 @@ func (p *phaseReconciler) delete(ctx context.Context) (reconcile.Result, error)
return reconcile.Result{}, wrapPhaseError(err, operatorv1.OldComponentsDeletionErrorReason)
}

func clusterctlProviderName(provider genericprovider.GenericProvider) client.ObjectKey {
prefix := ""
switch provider.GetObject().(type) {
case *operatorv1.BootstrapProvider:
prefix = "bootstrap-"
case *operatorv1.ControlPlaneProvider:
prefix = "control-plane-"
case *operatorv1.InfrastructureProvider:
prefix = "infrastructure-"
case *operatorv1.AddonProvider:
prefix = "addon-"
}

return client.ObjectKey{Name: prefix + provider.GetName(), Namespace: provider.GetNamespace()}
}

// newClusterClient returns a clusterctl client for interacting with management cluster.
func (p *phaseReconciler) newClusterClient() cluster.Client {
return cluster.New(cluster.Kubeconfig{}, p.configClient, cluster.InjectProxy(&controllerProxy{
Expand Down
17 changes: 17 additions & 0 deletions util/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,3 +44,20 @@ func ClusterctlProviderType(genericProvider genericprovider.GenericProvider) clu

return clusterctlv1.ProviderTypeUnknown
}

// GenerateProviderName generates provider name based on its type.
func GenerateProviderName(provider genericprovider.GenericProvider) string {
prefix := ""
switch provider.GetObject().(type) {
case *operatorv1.BootstrapProvider:
prefix = "bootstrap-"
case *operatorv1.ControlPlaneProvider:
prefix = "control-plane-"
case *operatorv1.InfrastructureProvider:
prefix = "infrastructure-"
case *operatorv1.AddonProvider:
prefix = "addon-"
}

return prefix + provider.GetName()
}

0 comments on commit cc75bdc

Please sign in to comment.