diff --git a/api/v1alpha3/condition_consts.go b/api/v1alpha3/condition_consts.go index d75d0fc78cea..5e2e59f815a6 100644 --- a/api/v1alpha3/condition_consts.go +++ b/api/v1alpha3/condition_consts.go @@ -109,7 +109,8 @@ const ( // MachineHasFailureReason is the reason used when a machine has either a FailureReason or a FailureMessage set on its status. MachineHasFailureReason = "MachineHasFailure" - // NodeNotFoundReason is the reason used when a machine's node has previously been observed but is now gone. + // NodeNotFoundReason (Severity=Error) documents a machine's node has previously been observed but is now gone. + // NB. provisioned --> NodeRef != "" NodeNotFoundReason = "NodeNotFound" // NodeStartupTimeoutReason is the reason used when a machine's node does not appear within the specified timeout. @@ -120,7 +121,7 @@ const ( ) const ( - // MachineOwnerRemediatedCondition is set on machines that have failed a healthcheck by the MachineHealthCheck controller. + // MachineOwnerRemediatedCondition is set on machines that have failed a healthcheck by the Machine's owner controller. // MachineOwnerRemediatedCondition is set to False after a health check fails, but should be changed to True by the owning controller after remediation succeeds. MachineOwnerRemediatedCondition ConditionType = "OwnerRemediated" diff --git a/controlplane/kubeadm/api/v1alpha3/condition_consts.go b/controlplane/kubeadm/api/v1alpha3/condition_consts.go index bf512965e6cc..2db9020a1a3a 100644 --- a/controlplane/kubeadm/api/v1alpha3/condition_consts.go +++ b/controlplane/kubeadm/api/v1alpha3/condition_consts.go @@ -66,3 +66,63 @@ const ( // ScalingDownReason (Severity=Info) documents a KubeadmControlPlane that is decreasing the number of replicas. ScalingDownReason = "ScalingDown" ) + +const ( + // EtcdClusterHealthy documents the overall etcd cluster's health for the KCP-managed etcd. + EtcdClusterHealthy clusterv1.ConditionType = "EtcdClusterHealthy" + + // EtcdClusterUnhealthyReason (Severity=Warning) is set when the etcd cluster as unhealthy due to + // i) if etcd cluster has lost its quorum. + // ii) if etcd cluster has alarms armed. + // iii) if etcd pods do not match with etcd members. + EtcdClusterUnhealthyReason = "EtcdClusterUnhealthy" +) + +// Common Pod-related Condition Reasons used by Pod-related Conditions such as MachineAPIServerPodHealthyCondition etc. +const ( + // PodProvisioningReason (Severity=Info) documents a pod waiting to be provisioned i.e., Pod is in "Pending" phase and + // PodScheduled and Initialized conditions are not yet set to True. + PodProvisioningReason = "PodProvisioning" + + // PodMissingReason (Severity=Warning) documents a pod does not exist. + PodMissingReason = "PodMissing" + + // PodFailedReason (Severity=Error) documents if + // i) a pod failed during provisioning i.e., Pod is in "Pending" phase and + // PodScheduled and Initialized conditions are set to True but ContainersReady or Ready condition is false + // (i.e., at least one of the containers are in waiting state(e.g CrashLoopbackOff, ImagePullBackOff) + // ii) a pod has at least one container that is terminated with a failure and hence Pod is in "Failed" phase. + PodFailedReason = "PodFailed" +) + +// Conditions that are only for control-plane machines. KubeadmControlPlane is the owner of these conditions. + +const ( + // MachineAPIServerPodHealthyCondition reports a machine's kube-apiserver's health status. + // Set to true if kube-apiserver pod is in "Running" phase, otherwise uses Pod-related Condition Reasons. + MachineAPIServerPodHealthyCondition clusterv1.ConditionType = "APIServerPodHealthy" + + // MachineControllerManagerHealthyCondition reports a machine's kube-controller-manager's health status. + // Set to true if kube-controller-manager pod is in "Running" phase, otherwise uses Pod-related Condition Reasons. + MachineControllerManagerHealthyCondition clusterv1.ConditionType = "ControllerManagerPodHealthy" + + // MachineSchedulerPodHealthyCondition reports a machine's kube-scheduler's health status. + // Set to true if kube-scheduler pod is in "Running" phase, otherwise uses Pod-related Condition Reasons. + MachineSchedulerPodHealthyCondition clusterv1.ConditionType = "SchedulerPodHealthy" + + // MachineEtcdPodHealthyCondition reports a machine's etcd pod's health status. + // Set to true if etcd pod is in "Running" phase, otherwise uses Pod-related Condition Reasons. + MachineEtcdPodHealthyCondition clusterv1.ConditionType = "EtcdPodHealthy" +) + +const ( + // MachineEtcdMemberHealthyCondition documents if the machine has an healthy etcd member. + // If not true, Pod-related Condition Reasons can be used as reasons. + MachineEtcdMemberHealthyCondition clusterv1.ConditionType = "EtcdMemberHealthy" + + // EtcdMemberUnhealthyReason (Severity=Error) documents a Machine's etcd member is unhealthy for a number of reasons: + // i) etcd member has alarms. + // ii) creating etcd client fails or using the created etcd client to perform some operations fails. + // iii) Quorum is lost + EtcdMemberUnhealthyReason = "EtcdMemberUnhealthy" +) diff --git a/controlplane/kubeadm/controllers/controller.go b/controlplane/kubeadm/controllers/controller.go index d6a10904cd75..7140ffcb384b 100644 --- a/controlplane/kubeadm/controllers/controller.go +++ b/controlplane/kubeadm/controllers/controller.go @@ -221,6 +221,7 @@ func patchKubeadmControlPlane(ctx context.Context, patchHelper *patch.Helper, kc controlplanev1.MachinesReadyCondition, controlplanev1.AvailableCondition, controlplanev1.CertificatesAvailableCondition, + controlplanev1.EtcdClusterHealthy, ), ) @@ -289,21 +290,25 @@ func (r *KubeadmControlPlaneReconciler) reconcile(ctx context.Context, cluster * return ctrl.Result{}, err } - ownedMachines := controlPlaneMachines.Filter(machinefilters.OwnedMachines(kcp)) - if len(ownedMachines) != len(controlPlaneMachines) { - logger.Info("Not all control plane machines are owned by this KubeadmControlPlane, refusing to operate in mixed management mode") - return ctrl.Result{}, nil - } - - controlPlane, err := internal.NewControlPlane(ctx, r.Client, cluster, kcp, ownedMachines) + controlPlane, err := r.createControlPlane(ctx, cluster, kcp) if err != nil { logger.Error(err, "failed to initialize control plane") return ctrl.Result{}, err } + if len(controlPlane.Machines) != len(controlPlaneMachines) { + logger.Info("Not all control plane machines are owned by this KubeadmControlPlane, refusing to operate in mixed management mode") + return ctrl.Result{}, nil + } // Aggregate the operational state of all the machines; while aggregating we are adding the // source ref (reason@machine/name) so the problem can be easily tracked down to its source machine. - conditions.SetAggregate(controlPlane.KCP, controlplanev1.MachinesReadyCondition, ownedMachines.ConditionGetters(), conditions.AddSourceRef()) + conditions.SetAggregate(controlPlane.KCP, controlplanev1.MachinesReadyCondition, controlPlane.Machines.ConditionGetters(), conditions.AddSourceRef()) + + // reconcileControlPlaneHealth returns err if there is a machine being delete or control plane is unhealthy. + // If control plane is not initialized, then control-plane machines will be empty and hence health check will not fail. + if result, err := r.reconcileControlPlaneHealth(ctx, cluster, kcp, controlPlane); err != nil || !result.IsZero() { + return result, err + } // Control plane machines rollout due to configuration changes (e.g. upgrades) takes precedence over other operations. needRollout := controlPlane.MachinesNeedingRollout() @@ -324,7 +329,7 @@ func (r *KubeadmControlPlaneReconciler) reconcile(ctx context.Context, cluster * } // If we've made it this far, we can assume that all ownedMachines are up to date - numMachines := len(ownedMachines) + numMachines := len(controlPlane.Machines) desiredReplicas := int(*kcp.Spec.Replicas) switch { @@ -372,6 +377,24 @@ func (r *KubeadmControlPlaneReconciler) reconcile(ctx context.Context, cluster * return ctrl.Result{}, nil } +func (r *KubeadmControlPlaneReconciler) createControlPlane(ctx context.Context, cluster *clusterv1.Cluster, kcp *controlplanev1.KubeadmControlPlane) (*internal.ControlPlane, error) { + logger := r.Log.WithValues("namespace", kcp.Namespace, "kubeadmControlPlane", kcp.Name, "cluster", cluster.Name) + + controlPlaneMachines, err := r.managementCluster.GetMachinesForCluster(ctx, util.ObjectKey(cluster), machinefilters.ControlPlaneMachines(cluster.Name)) + if err != nil { + logger.Error(err, "failed to retrieve control plane machines for cluster") + return nil, err + } + ownedMachines := controlPlaneMachines.Filter(machinefilters.OwnedMachines(kcp)) + + controlPlane, err := internal.NewControlPlane(ctx, r.Client, cluster, kcp, ownedMachines) + if err != nil { + logger.Error(err, "failed to initialize control plane") + return nil, err + } + return controlPlane, nil +} + // reconcileDelete handles KubeadmControlPlane deletion. // The implementation does not take non-control plane workloads into consideration. This may or may not change in the future. // Please see https://github.com/kubernetes-sigs/cluster-api/issues/2064. @@ -379,6 +402,26 @@ func (r *KubeadmControlPlaneReconciler) reconcileDelete(ctx context.Context, clu logger := r.Log.WithValues("namespace", kcp.Namespace, "kubeadmControlPlane", kcp.Name, "cluster", cluster.Name) logger.Info("Reconcile KubeadmControlPlane deletion") + controlPlane, err := r.createControlPlane(ctx, cluster, kcp) + if err != nil { + logger.Error(err, "failed to initialize control plane") + return ctrl.Result{}, err + } + + // Ignore the health check results here as well as the errors, health check functions are to set health related conditions on Machines. + // Errors may be due to not being able to get workload cluster nodes. + _, err = r.managementCluster.TargetClusterControlPlaneHealthCheck(ctx, controlPlane, util.ObjectKey(cluster)) + if err != nil { + // Do nothing + r.Log.Info("Control plane did not pass control plane health check during delete reconciliation", "err", err.Error()) + } + _, err = r.managementCluster.TargetClusterEtcdHealthCheck(ctx, controlPlane, util.ObjectKey(cluster)) + if err != nil { + // Do nothing + r.Log.Info("Control plane did not pass etcd health check during delete reconciliation", "err", err.Error()) + } + + // Gets all machines, not just control plane machines. allMachines, err := r.managementCluster.GetMachinesForCluster(ctx, util.ObjectKey(cluster)) if err != nil { return ctrl.Result{}, err @@ -442,21 +485,43 @@ func (r *KubeadmControlPlaneReconciler) ClusterToKubeadmControlPlane(o handler.M return nil } -// reconcileHealth performs health checks for control plane components and etcd +// reconcileControlPlaneHealth performs health checks for control plane components and etcd // It removes any etcd members that do not have a corresponding node. // Also, as a final step, checks if there is any machines that is being deleted. -func (r *KubeadmControlPlaneReconciler) reconcileHealth(ctx context.Context, cluster *clusterv1.Cluster, kcp *controlplanev1.KubeadmControlPlane, controlPlane *internal.ControlPlane) (ctrl.Result, error) { +func (r *KubeadmControlPlaneReconciler) reconcileControlPlaneHealth(ctx context.Context, cluster *clusterv1.Cluster, kcp *controlplanev1.KubeadmControlPlane, controlPlane *internal.ControlPlane) (ctrl.Result, error) { + logger := r.Log.WithValues("namespace", kcp.Namespace, "kubeadmControlPlane", kcp.Name) + + // If there is no KCP-owned control-plane machines, then control-plane has not been initialized yet. + if controlPlane.Machines.Len() == 0 { + return ctrl.Result{}, nil + } + + for i := range controlPlane.Machines { + m := controlPlane.Machines[i] + // Initialize the patch helper. + patchHelper, err := patch.NewHelper(m, r.Client) + if err != nil { + return ctrl.Result{}, err + } + + defer func() { + // Always attempt to Patch the Machine conditions after each health reconciliation. + if err := patchHelper.Patch(ctx, m); err != nil { + logger.Error(err, "Failed to patch KubeadmControlPlane Machine", "machine", m.Name) + } + }() + } // Do a health check of the Control Plane components - if err := r.managementCluster.TargetClusterControlPlaneIsHealthy(ctx, util.ObjectKey(cluster)); err != nil { + if err := r.managementCluster.TargetClusterControlPlaneIsHealthy(ctx, controlPlane, util.ObjectKey(cluster)); err != nil { r.recorder.Eventf(kcp, corev1.EventTypeWarning, "ControlPlaneUnhealthy", "Waiting for control plane to pass control plane health check to continue reconciliation: %v", err) - return ctrl.Result{RequeueAfter: healthCheckFailedRequeueAfter}, nil + return ctrl.Result{}, errors.Wrap(err, "failed to pass control-plane health check") } // If KCP should manage etcd, ensure etcd is healthy. if controlPlane.IsEtcdManaged() { - if err := r.managementCluster.TargetClusterEtcdIsHealthy(ctx, util.ObjectKey(cluster)); err != nil { + if err := r.managementCluster.TargetClusterEtcdIsHealthy(ctx, controlPlane, util.ObjectKey(cluster)); err != nil { errList := []error{errors.Wrap(err, "failed to pass etcd health check")} r.recorder.Eventf(kcp, corev1.EventTypeWarning, "ControlPlaneUnhealthy", "Waiting for control plane to pass etcd health check to continue reconciliation: %v", err) diff --git a/controlplane/kubeadm/controllers/controller_test.go b/controlplane/kubeadm/controllers/controller_test.go index 01dd7b2bf6eb..6085113831e5 100644 --- a/controlplane/kubeadm/controllers/controller_test.go +++ b/controlplane/kubeadm/controllers/controller_test.go @@ -572,7 +572,7 @@ func TestKubeadmControlPlaneReconciler_adoption(t *testing.T) { g := NewWithT(t) cluster, kcp, tmpl := createClusterWithControlPlane() - cluster.Spec.ControlPlaneEndpoint.Host = "nodomain.example.com" + cluster.Spec.ControlPlaneEndpoint.Host = "nodomain2.example.com" cluster.Spec.ControlPlaneEndpoint.Port = 6443 kcp.Spec.Version = version @@ -642,7 +642,7 @@ func TestKubeadmControlPlaneReconciler_adoption(t *testing.T) { g := NewWithT(t) cluster, kcp, tmpl := createClusterWithControlPlane() - cluster.Spec.ControlPlaneEndpoint.Host = "nodomain.example.com" + cluster.Spec.ControlPlaneEndpoint.Host = "nodomain3.example.com" cluster.Spec.ControlPlaneEndpoint.Port = 6443 kcp.Spec.Version = "v1.17.0" diff --git a/controlplane/kubeadm/controllers/fakes_test.go b/controlplane/kubeadm/controllers/fakes_test.go index 46af76804696..60f182dd2284 100644 --- a/controlplane/kubeadm/controllers/fakes_test.go +++ b/controlplane/kubeadm/controllers/fakes_test.go @@ -57,19 +57,32 @@ func (f *fakeManagementCluster) GetMachinesForCluster(c context.Context, n clien return f.Machines, nil } -func (f *fakeManagementCluster) TargetClusterControlPlaneIsHealthy(_ context.Context, _ client.ObjectKey) error { +func (f *fakeManagementCluster) TargetClusterControlPlaneIsHealthy(_ context.Context, _ *internal.ControlPlane, _ client.ObjectKey) error { if !f.ControlPlaneHealthy { return errors.New("control plane is not healthy") } return nil } -func (f *fakeManagementCluster) TargetClusterEtcdIsHealthy(_ context.Context, _ client.ObjectKey) error { +func (f *fakeManagementCluster) TargetClusterEtcdIsHealthy(_ context.Context, _ *internal.ControlPlane, _ client.ObjectKey) error { if !f.EtcdHealthy { return errors.New("etcd is not healthy") } return nil } +func (f *fakeManagementCluster) TargetClusterEtcdHealthCheck(_ context.Context, _ *internal.ControlPlane, _ client.ObjectKey) (internal.HealthCheckResult, error) { + if !f.EtcdHealthy { + return nil, errors.New("etcd is not healthy") + } + return nil, nil +} + +func (f *fakeManagementCluster) TargetClusterControlPlaneHealthCheck(_ context.Context, _ *internal.ControlPlane, _ client.ObjectKey) (internal.HealthCheckResult, error) { + if !f.ControlPlaneHealthy { + return nil, errors.New("control plane is not healthy") + } + return nil, nil +} type fakeWorkloadCluster struct { *internal.Workload diff --git a/controlplane/kubeadm/controllers/scale.go b/controlplane/kubeadm/controllers/scale.go index a89d5b8e5322..51e7a305caf1 100644 --- a/controlplane/kubeadm/controllers/scale.go +++ b/controlplane/kubeadm/controllers/scale.go @@ -63,11 +63,6 @@ func (r *KubeadmControlPlaneReconciler) initializeControlPlane(ctx context.Conte func (r *KubeadmControlPlaneReconciler) scaleUpControlPlane(ctx context.Context, cluster *clusterv1.Cluster, kcp *controlplanev1.KubeadmControlPlane, controlPlane *internal.ControlPlane) (ctrl.Result, error) { logger := controlPlane.Logger() - // reconcileHealth returns err if there is a machine being delete which is a required condition to check before scaling up - if result, err := r.reconcileHealth(ctx, cluster, kcp, controlPlane); err != nil || !result.IsZero() { - return result, err - } - // Create the bootstrap configuration bootstrapSpec := controlPlane.JoinControlPlaneConfig() fd := controlPlane.NextFailureDomainForScaleUp() @@ -90,10 +85,6 @@ func (r *KubeadmControlPlaneReconciler) scaleDownControlPlane( ) (ctrl.Result, error) { logger := controlPlane.Logger() - if result, err := r.reconcileHealth(ctx, cluster, kcp, controlPlane); err != nil || !result.IsZero() { - return result, err - } - workloadCluster, err := r.managementCluster.GetWorkloadCluster(ctx, util.ObjectKey(cluster)) if err != nil { logger.Error(err, "Failed to create client to workload cluster") @@ -123,7 +114,8 @@ func (r *KubeadmControlPlaneReconciler) scaleDownControlPlane( } } - if err := r.managementCluster.TargetClusterControlPlaneIsHealthy(ctx, util.ObjectKey(cluster)); err != nil { + // TODO: check if this is needed after moving the health check to the main reconcile + if err := r.managementCluster.TargetClusterControlPlaneIsHealthy(ctx, controlPlane, util.ObjectKey(cluster)); err != nil { logger.V(2).Info("Waiting for control plane to pass control plane health check before removing a control plane machine", "cause", err) r.recorder.Eventf(kcp, corev1.EventTypeWarning, "ControlPlaneUnhealthy", "Waiting for control plane to pass control plane health check before removing a control plane machine: %v", err) diff --git a/controlplane/kubeadm/controllers/scale_test.go b/controlplane/kubeadm/controllers/scale_test.go index ceed678aec27..de19e72a3559 100644 --- a/controlplane/kubeadm/controllers/scale_test.go +++ b/controlplane/kubeadm/controllers/scale_test.go @@ -116,7 +116,7 @@ func TestKubeadmControlPlaneReconciler_scaleUpControlPlane(t *testing.T) { result, err := r.scaleUpControlPlane(context.Background(), cluster, kcp, controlPlane) g.Expect(result).To(Equal(ctrl.Result{Requeue: true})) - g.Expect(err).ToNot(HaveOccurred()) + g.Expect(err).NotTo(HaveOccurred()) controlPlaneMachines := clusterv1.MachineList{} g.Expect(fakeClient.List(context.Background(), &controlPlaneMachines)).To(Succeed()) @@ -124,6 +124,8 @@ func TestKubeadmControlPlaneReconciler_scaleUpControlPlane(t *testing.T) { }) t.Run("does not create a control plane Machine if health checks fail", func(t *testing.T) { cluster, kcp, genericMachineTemplate := createClusterWithControlPlane() + cluster.Spec.ControlPlaneEndpoint.Host = "nodomain.example.com" + cluster.Spec.ControlPlaneEndpoint.Port = 6443 initObjs := []runtime.Object{cluster.DeepCopy(), kcp.DeepCopy(), genericMachineTemplate.DeepCopy()} beforeMachines := internal.NewFilterableMachineCollection() @@ -170,18 +172,11 @@ func TestKubeadmControlPlaneReconciler_scaleUpControlPlane(t *testing.T) { Log: log.Log, recorder: record.NewFakeRecorder(32), } - controlPlane := &internal.ControlPlane{ - KCP: kcp, - Cluster: cluster, - Machines: beforeMachines, - } - result, err := r.scaleUpControlPlane(context.Background(), cluster.DeepCopy(), kcp.DeepCopy(), controlPlane) - if tc.expectErr { - g.Expect(err).To(HaveOccurred()) - } - g.Expect(result).To(Equal(tc.expectResult)) + _, err := r.reconcile(context.Background(), cluster, kcp) + g.Expect(err).To(HaveOccurred()) + // scaleUpControlPlane is never called due to health check failure and new machine is not created to scale up. controlPlaneMachines := &clusterv1.MachineList{} g.Expect(fakeClient.List(context.Background(), controlPlaneMachines)).To(Succeed()) g.Expect(controlPlaneMachines.Items).To(HaveLen(len(beforeMachines))) diff --git a/controlplane/kubeadm/controllers/status.go b/controlplane/kubeadm/controllers/status.go index 0c6b0ceac13a..6227ca00637b 100644 --- a/controlplane/kubeadm/controllers/status.go +++ b/controlplane/kubeadm/controllers/status.go @@ -18,7 +18,6 @@ package controllers import ( "context" - "github.com/pkg/errors" clusterv1 "sigs.k8s.io/cluster-api/api/v1alpha3" controlplanev1 "sigs.k8s.io/cluster-api/controlplane/kubeadm/api/v1alpha3" @@ -29,7 +28,7 @@ import ( ) // updateStatus is called after every reconcilitation loop in a defer statement to always make sure we have the -// resource status subresourcs up-to-date. +// resource status subresources up-to-date. func (r *KubeadmControlPlaneReconciler) updateStatus(ctx context.Context, kcp *controlplanev1.KubeadmControlPlane, cluster *clusterv1.Cluster) error { selector := machinefilters.ControlPlaneSelectorForCluster(cluster.Name) // Copy label selector to its status counterpart in string format. diff --git a/controlplane/kubeadm/controllers/upgrade_test.go b/controlplane/kubeadm/controllers/upgrade_test.go index 53596c317188..f0702e2a3da2 100644 --- a/controlplane/kubeadm/controllers/upgrade_test.go +++ b/controlplane/kubeadm/controllers/upgrade_test.go @@ -36,6 +36,8 @@ func TestKubeadmControlPlaneReconciler_upgradeControlPlane(t *testing.T) { g := NewWithT(t) cluster, kcp, genericMachineTemplate := createClusterWithControlPlane() + cluster.Spec.ControlPlaneEndpoint.Host = "nodomain.example.com" + cluster.Spec.ControlPlaneEndpoint.Port = 6443 kcp.Spec.Version = "v1.17.3" kcp.Spec.KubeadmConfigSpec.ClusterConfiguration = nil kcp.Spec.Replicas = pointer.Int32Ptr(1) @@ -89,9 +91,9 @@ func TestKubeadmControlPlaneReconciler_upgradeControlPlane(t *testing.T) { // run upgrade a second time, simulate that the node has not appeared yet but the machine exists r.managementCluster.(*fakeManagementCluster).ControlPlaneHealthy = false - result, err = r.upgradeControlPlane(context.Background(), cluster, kcp, controlPlane, needingUpgrade) - g.Expect(result).To(Equal(ctrl.Result{RequeueAfter: healthCheckFailedRequeueAfter})) - g.Expect(err).To(BeNil()) + // Unhealthy control plane will be detected during reconcile loop and upgrade will never be called. + _, err = r.reconcile(context.Background(), cluster, kcp) + g.Expect(err).To(HaveOccurred()) g.Expect(fakeClient.List(context.Background(), bothMachines, client.InNamespace(cluster.Namespace))).To(Succeed()) g.Expect(bothMachines.Items).To(HaveLen(2)) diff --git a/controlplane/kubeadm/internal/cluster.go b/controlplane/kubeadm/internal/cluster.go index 0789c27e88f1..53711331f82a 100644 --- a/controlplane/kubeadm/internal/cluster.go +++ b/controlplane/kubeadm/internal/cluster.go @@ -28,13 +28,12 @@ import ( "k8s.io/apimachinery/pkg/runtime" kerrors "k8s.io/apimachinery/pkg/util/errors" "k8s.io/client-go/kubernetes/scheme" - "sigs.k8s.io/controller-runtime/pkg/client" - ctrlclient "sigs.k8s.io/controller-runtime/pkg/client" - clusterv1 "sigs.k8s.io/cluster-api/api/v1alpha3" "sigs.k8s.io/cluster-api/controllers/remote" "sigs.k8s.io/cluster-api/controlplane/kubeadm/internal/machinefilters" "sigs.k8s.io/cluster-api/util/secret" + "sigs.k8s.io/controller-runtime/pkg/client" + ctrlclient "sigs.k8s.io/controller-runtime/pkg/client" ) // ManagementCluster defines all behaviors necessary for something to function as a management cluster. @@ -42,8 +41,10 @@ type ManagementCluster interface { ctrlclient.Reader GetMachinesForCluster(ctx context.Context, cluster client.ObjectKey, filters ...machinefilters.Func) (FilterableMachineCollection, error) - TargetClusterEtcdIsHealthy(ctx context.Context, clusterKey client.ObjectKey) error - TargetClusterControlPlaneIsHealthy(ctx context.Context, clusterKey client.ObjectKey) error + TargetClusterControlPlaneHealthCheck(ctx context.Context, controlPlane *ControlPlane, clusterKey client.ObjectKey) (HealthCheckResult, error) + TargetClusterEtcdHealthCheck(ctx context.Context, controlPlane *ControlPlane, clusterKey client.ObjectKey) (HealthCheckResult, error) + TargetClusterEtcdIsHealthy(ctx context.Context, controlPlane *ControlPlane, clusterKey client.ObjectKey) error + TargetClusterControlPlaneIsHealthy(ctx context.Context, controlPlane *ControlPlane, clusterKey client.ObjectKey) error GetWorkloadCluster(ctx context.Context, clusterKey client.ObjectKey) (WorkloadCluster, error) } @@ -180,16 +181,15 @@ func (m *Management) getApiServerEtcdClientCert(ctx context.Context, clusterKey return tls.X509KeyPair(crtData, keyData) } -type healthCheck func(context.Context) (HealthCheckResult, error) - // HealthCheck will run a generic health check function and report any errors discovered. // In addition to the health check, it also ensures there is a 1;1 match between nodes and machines. -func (m *Management) healthCheck(ctx context.Context, check healthCheck, clusterKey client.ObjectKey) error { +// To have access to the owned control-plane machines during health checks, need to pass owningMachines here. +func (m *Management) healthCheck(controlPlane *ControlPlane, nodeChecks HealthCheckResult, clusterKey client.ObjectKey) error { var errorList []error - nodeChecks, err := check(ctx) - if err != nil { - errorList = append(errorList, err) - } + kcpMachines := controlPlane.Machines.UnsortedList() + // Make sure Cluster API is aware of all the nodes. + + // TODO: If any node has a failure, healthCheck fails. This may be too strict for the health check of HA clusters (Missing a single etcd pod does not indicate a problem)... for nodeName, err := range nodeChecks { if err != nil { errorList = append(errorList, fmt.Errorf("node %q: %v", nodeName, err)) @@ -199,44 +199,57 @@ func (m *Management) healthCheck(ctx context.Context, check healthCheck, cluster return kerrors.NewAggregate(errorList) } - // Make sure Cluster API is aware of all the nodes. - machines, err := m.GetMachinesForCluster(ctx, clusterKey, machinefilters.ControlPlaneMachines(clusterKey.Name)) - if err != nil { - return err - } - // This check ensures there is a 1 to 1 correspondence of nodes and machines. // If a machine was not checked this is considered an error. - for _, machine := range machines { + for _, machine := range kcpMachines { if machine.Status.NodeRef == nil { + // The condition for this case is set by the Machine controller return errors.Errorf("control plane machine %s/%s has no status.nodeRef", machine.Namespace, machine.Name) } if _, ok := nodeChecks[machine.Status.NodeRef.Name]; !ok { return errors.Errorf("machine's (%s/%s) node (%s) was not checked", machine.Namespace, machine.Name, machine.Status.NodeRef.Name) } } - if len(nodeChecks) != len(machines) { - return errors.Errorf("number of nodes and machines in namespace %s did not match: %d nodes %d machines", clusterKey.Namespace, len(nodeChecks), len(machines)) + if len(nodeChecks) != len(kcpMachines) { + // MachinesReadyCondition covers this health failure. + return errors.Errorf("number of nodes and machines in namespace %s did not match: %d nodes %d machines", clusterKey.Namespace, len(nodeChecks), len(kcpMachines)) } return nil } +func (m *Management) TargetClusterControlPlaneHealthCheck(ctx context.Context, controlPlane *ControlPlane, clusterKey client.ObjectKey) (HealthCheckResult, error) { + workloadCluster, err := m.GetWorkloadCluster(ctx, clusterKey) + if err != nil { + return nil, err + } + return workloadCluster.ControlPlaneIsHealthy(ctx, controlPlane) +} + +func (m *Management) TargetClusterEtcdHealthCheck(ctx context.Context, controlPlane *ControlPlane, clusterKey client.ObjectKey) (HealthCheckResult, error) { + workloadCluster, err := m.GetWorkloadCluster(ctx, clusterKey) + if err != nil { + return nil, err + } + return workloadCluster.EtcdIsHealthy(ctx, controlPlane) +} + // TargetClusterControlPlaneIsHealthy checks every node for control plane health. -func (m *Management) TargetClusterControlPlaneIsHealthy(ctx context.Context, clusterKey client.ObjectKey) error { +func (m *Management) TargetClusterControlPlaneIsHealthy(ctx context.Context, controlPlane *ControlPlane, clusterKey client.ObjectKey) error { // TODO: add checks for expected taints/labels - cluster, err := m.GetWorkloadCluster(ctx, clusterKey) + + checkResult, err := m.TargetClusterControlPlaneHealthCheck(ctx, controlPlane, clusterKey) if err != nil { return err } - return m.healthCheck(ctx, cluster.ControlPlaneIsHealthy, clusterKey) + return m.healthCheck(controlPlane, checkResult, clusterKey) } // TargetClusterEtcdIsHealthy runs a series of checks over a target cluster's etcd cluster. // In addition, it verifies that there are the same number of etcd members as control plane Machines. -func (m *Management) TargetClusterEtcdIsHealthy(ctx context.Context, clusterKey client.ObjectKey) error { - cluster, err := m.GetWorkloadCluster(ctx, clusterKey) +func (m *Management) TargetClusterEtcdIsHealthy(ctx context.Context, controlPlane *ControlPlane, clusterKey client.ObjectKey) error { + checkResult, err := m.TargetClusterEtcdHealthCheck(ctx, controlPlane, clusterKey) if err != nil { return err } - return m.healthCheck(ctx, cluster.EtcdIsHealthy, clusterKey) + return m.healthCheck(controlPlane, checkResult, clusterKey) } diff --git a/controlplane/kubeadm/internal/cluster_test.go b/controlplane/kubeadm/internal/cluster_test.go index 4cc990e90e85..c9cc1f436ce7 100644 --- a/controlplane/kubeadm/internal/cluster_test.go +++ b/controlplane/kubeadm/internal/cluster_test.go @@ -38,6 +38,7 @@ import ( "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/runtime/schema" clusterv1 "sigs.k8s.io/cluster-api/api/v1alpha3" + "sigs.k8s.io/cluster-api/controlplane/kubeadm/api/v1alpha3" "sigs.k8s.io/cluster-api/controlplane/kubeadm/internal/machinefilters" "sigs.k8s.io/cluster-api/util/certs" "sigs.k8s.io/cluster-api/util/kubeconfig" @@ -63,7 +64,7 @@ func TestCheckStaticPodReadyCondition(t *testing.T) { Spec: corev1.PodSpec{}, Status: corev1.PodStatus{Conditions: test.conditions}, } - g.Expect(checkStaticPodReadyCondition(pod)).To(Succeed()) + g.Expect(isStaticPodInReadyCondition(pod)).To(Succeed()) }) } } @@ -89,7 +90,7 @@ func TestCheckStaticPodNotReadyCondition(t *testing.T) { Spec: corev1.PodSpec{}, Status: corev1.PodStatus{Conditions: test.conditions}, } - g.Expect(checkStaticPodReadyCondition(pod)).NotTo(Succeed()) + g.Expect(isStaticPodInReadyCondition(pod)).NotTo(Succeed()) }) } } @@ -118,8 +119,10 @@ func TestControlPlaneIsHealthy(t *testing.T) { }, }, } + controlPlane, err := NewControlPlane(ctx, workloadCluster.Client, &clusterv1.Cluster{}, &v1alpha3.KubeadmControlPlane{}, nil) + g.Expect(err).NotTo(HaveOccurred()) - health, err := workloadCluster.ControlPlaneIsHealthy(context.Background()) + health, err := workloadCluster.ControlPlaneIsHealthy(context.Background(), controlPlane) g.Expect(err).NotTo(HaveOccurred()) g.Expect(health).NotTo(HaveLen(0)) g.Expect(health).To(HaveLen(len(nodeListForTestControlPlaneIsHealthy().Items))) @@ -156,7 +159,7 @@ func TestGetMachinesForCluster(t *testing.T) { func TestGetWorkloadCluster(t *testing.T) { g := NewWithT(t) - ns, err := testEnv.CreateNamespace(ctx, "workload-cluster") + ns, err := testEnv.CreateNamespace(ctx, "workload-cluster2") g.Expect(err).ToNot(HaveOccurred()) defer func() { g.Expect(testEnv.Cleanup(ctx, ns)).To(Succeed()) @@ -459,138 +462,105 @@ func (f *fakeClient) Update(_ context.Context, _ runtime.Object, _ ...client.Upd return nil } +func createControlPlane(machines []*clusterv1.Machine) *ControlPlane { + controlPlane, _ := NewControlPlane(ctx, &fakeClient{}, &clusterv1.Cluster{}, &v1alpha3.KubeadmControlPlane{}, NewFilterableMachineCollection(machines...)) + return controlPlane +} + func TestManagementCluster_healthCheck_NoError(t *testing.T) { + threeMachines := []*clusterv1.Machine{ + controlPlaneMachine("one"), + controlPlaneMachine("two"), + controlPlaneMachine("three"), + } + controlPlane := createControlPlane(threeMachines) tests := []struct { name string - machineList *clusterv1.MachineList - check healthCheck + checkResult HealthCheckResult clusterKey client.ObjectKey controlPlaneName string + controlPlane *ControlPlane }{ { name: "simple", - machineList: &clusterv1.MachineList{ - Items: []clusterv1.Machine{ - controlPlaneMachine("one"), - controlPlaneMachine("two"), - controlPlaneMachine("three"), - }, - }, - check: func(ctx context.Context) (HealthCheckResult, error) { - return HealthCheckResult{ - "one": nil, - "two": nil, - "three": nil, - }, nil + checkResult: HealthCheckResult{ + "one": nil, + "two": nil, + "three": nil, }, - clusterKey: client.ObjectKey{Namespace: "default", Name: "cluster-name"}, + clusterKey: client.ObjectKey{Namespace: "default", Name: "cluster-name"}, + controlPlane: controlPlane, }, } + for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { g := NewWithT(t) - ctx := context.Background() m := &Management{ - Client: &fakeClient{list: tt.machineList}, + Client: &fakeClient{}, } - g.Expect(m.healthCheck(ctx, tt.check, tt.clusterKey)).To(Succeed()) + g.Expect(m.healthCheck(controlPlane, tt.checkResult, tt.clusterKey)).To(Succeed()) }) } } func TestManagementCluster_healthCheck_Errors(t *testing.T) { + tests := []struct { name string - machineList *clusterv1.MachineList - check healthCheck + checkResult HealthCheckResult clusterKey client.ObjectKey controlPlaneName string + controlPlane *ControlPlane // expected errors will ensure the error contains this list of strings. // If not supplied, no check on the error's value will occur. expectedErrors []string }{ { name: "machine's node was not checked for health", - machineList: &clusterv1.MachineList{ - Items: []clusterv1.Machine{ - controlPlaneMachine("one"), - controlPlaneMachine("two"), - controlPlaneMachine("three"), - }, - }, - check: func(ctx context.Context) (HealthCheckResult, error) { - return HealthCheckResult{ - "one": nil, - }, nil + controlPlane: createControlPlane([]*clusterv1.Machine{ + controlPlaneMachine("one"), + controlPlaneMachine("two"), + controlPlaneMachine("three"), + }), + checkResult: HealthCheckResult{ + "one": nil, }, }, - { - name: "health check returns an error not related to the nodes health", - machineList: &clusterv1.MachineList{ - Items: []clusterv1.Machine{ - controlPlaneMachine("one"), - controlPlaneMachine("two"), - controlPlaneMachine("three"), - }, - }, - check: func(ctx context.Context) (HealthCheckResult, error) { - return HealthCheckResult{ - "one": nil, - "two": errors.New("two"), - "three": errors.New("three"), - }, errors.New("meta") - }, - expectedErrors: []string{"two", "three", "meta"}, - }, { name: "two nodes error on the check but no overall error occurred", - machineList: &clusterv1.MachineList{ - Items: []clusterv1.Machine{ - controlPlaneMachine("one"), - controlPlaneMachine("two"), - controlPlaneMachine("three"), - }, - }, - check: func(ctx context.Context) (HealthCheckResult, error) { - return HealthCheckResult{ - "one": nil, - "two": errors.New("two"), - "three": errors.New("three"), - }, nil + controlPlane: createControlPlane([]*clusterv1.Machine{ + controlPlaneMachine("one"), + controlPlaneMachine("two"), + controlPlaneMachine("three")}), + checkResult: HealthCheckResult{ + "one": nil, + "two": errors.New("two"), + "three": errors.New("three"), }, expectedErrors: []string{"two", "three"}, }, { name: "more nodes than machines were checked (out of band control plane nodes)", - machineList: &clusterv1.MachineList{ - Items: []clusterv1.Machine{ - controlPlaneMachine("one"), - }, - }, - check: func(ctx context.Context) (HealthCheckResult, error) { - return HealthCheckResult{ - "one": nil, - "two": nil, - "three": nil, - }, nil + controlPlane: createControlPlane([]*clusterv1.Machine{ + controlPlaneMachine("one")}), + checkResult: HealthCheckResult{ + "one": nil, + "two": nil, + "three": nil, }, }, { name: "a machine that has a nil node reference", - machineList: &clusterv1.MachineList{ - Items: []clusterv1.Machine{ - controlPlaneMachine("one"), - controlPlaneMachine("two"), - nilNodeRef(controlPlaneMachine("three")), - }, - }, - check: func(ctx context.Context) (HealthCheckResult, error) { - return HealthCheckResult{ - "one": nil, - "two": nil, - "three": nil, - }, nil + controlPlane: createControlPlane([]*clusterv1.Machine{ + controlPlaneMachine("one"), + controlPlaneMachine("two"), + nilNodeRef(controlPlaneMachine("three"))}), + checkResult: HealthCheckResult{ + "one": nil, + "two": nil, + "three": nil, }, }, } @@ -598,13 +568,10 @@ func TestManagementCluster_healthCheck_Errors(t *testing.T) { t.Run(tt.name, func(t *testing.T) { g := NewWithT(t) - ctx := context.Background() clusterKey := client.ObjectKey{Namespace: "default", Name: "cluster-name"} - m := &Management{ - Client: &fakeClient{list: tt.machineList}, - } - err := m.healthCheck(ctx, tt.check, clusterKey) + m := &Management{Client: &fakeClient{}} + err := m.healthCheck(tt.controlPlane, tt.checkResult, clusterKey) g.Expect(err).To(HaveOccurred()) for _, expectedError := range tt.expectedErrors { @@ -614,9 +581,9 @@ func TestManagementCluster_healthCheck_Errors(t *testing.T) { } } -func controlPlaneMachine(name string) clusterv1.Machine { +func controlPlaneMachine(name string) *clusterv1.Machine { t := true - return clusterv1.Machine{ + return &clusterv1.Machine{ ObjectMeta: metav1.ObjectMeta{ Namespace: "default", Name: name, @@ -637,7 +604,7 @@ func controlPlaneMachine(name string) clusterv1.Machine { } } -func nilNodeRef(machine clusterv1.Machine) clusterv1.Machine { +func nilNodeRef(machine *clusterv1.Machine) *clusterv1.Machine { machine.Status.NodeRef = nil return machine } diff --git a/controlplane/kubeadm/internal/control_plane.go b/controlplane/kubeadm/internal/control_plane.go index 0c2c02dffd56..6ce64c0f8f27 100644 --- a/controlplane/kubeadm/internal/control_plane.go +++ b/controlplane/kubeadm/internal/control_plane.go @@ -41,7 +41,6 @@ type ControlPlane struct { KCP *controlplanev1.KubeadmControlPlane Cluster *clusterv1.Cluster Machines FilterableMachineCollection - // reconciliationTime is the time of the current reconciliation, and should be used for all "now" calculations reconciliationTime metav1.Time @@ -57,10 +56,12 @@ func NewControlPlane(ctx context.Context, client client.Client, cluster *cluster if err != nil { return nil, err } + kubeadmConfigs, err := getKubeadmConfigs(ctx, client, ownedMachines) if err != nil { return nil, err } + return &ControlPlane{ KCP: kcp, Cluster: cluster, diff --git a/controlplane/kubeadm/internal/etcd/etcd.go b/controlplane/kubeadm/internal/etcd/etcd.go index a71fbb898de7..8a75d66d1fd0 100644 --- a/controlplane/kubeadm/internal/etcd/etcd.go +++ b/controlplane/kubeadm/internal/etcd/etcd.go @@ -24,6 +24,7 @@ import ( "github.com/pkg/errors" "go.etcd.io/etcd/clientv3" + "go.etcd.io/etcd/etcdserver/api/v3rpc/rpctypes" "go.etcd.io/etcd/etcdserver/etcdserverpb" "google.golang.org/grpc" "sigs.k8s.io/cluster-api/controlplane/kubeadm/internal/proxy" @@ -53,6 +54,7 @@ type Client struct { EtcdClient etcd Endpoint string LeaderID uint64 + KVClient clientv3.KV } // MemberAlarm represents an alarm type association with a cluster member. @@ -132,14 +134,15 @@ func NewClient(ctx context.Context, endpoints []string, p proxy.Proxy, tlsConfig }, TLS: tlsConfig, }) + if err != nil { return nil, errors.Wrap(err, "unable to create etcd client") } - return newEtcdClient(ctx, etcdClient) + return newEtcdClient(ctx, etcdClient, etcdClient) } -func newEtcdClient(ctx context.Context, etcdClient etcd) (*Client, error) { +func newEtcdClient(ctx context.Context, etcdClient etcd, kvClient clientv3.KV) (*Client, error) { endpoints := etcdClient.Endpoints() if len(endpoints) == 0 { return nil, errors.New("etcd client was not configured with any endpoints") @@ -154,6 +157,7 @@ func newEtcdClient(ctx context.Context, etcdClient etcd) (*Client, error) { Endpoint: endpoints[0], EtcdClient: etcdClient, LeaderID: status.Leader, + KVClient: kvClient, }, nil } @@ -234,3 +238,13 @@ func (c *Client) Alarms(ctx context.Context) ([]MemberAlarm, error) { return memberAlarms, nil } + +// HealthCheck checks the healthiness of endpoints specified in endpoints during Client creation. +// Using the same logic used in etcdctl health command. +func (c *Client) HealthCheck(ctx context.Context) error { + _, err := c.KVClient.Get(ctx, "health") + if err == nil || err == rpctypes.ErrPermissionDenied { + return nil + } + return err +} diff --git a/controlplane/kubeadm/internal/etcd/etcd_test.go b/controlplane/kubeadm/internal/etcd/etcd_test.go index 1d7f99abcaec..487846df6dec 100644 --- a/controlplane/kubeadm/internal/etcd/etcd_test.go +++ b/controlplane/kubeadm/internal/etcd/etcd_test.go @@ -47,7 +47,7 @@ func TestEtcdMembers_WithErrors(t *testing.T) { ErrorResponse: errors.New("something went wrong"), } - client, err := newEtcdClient(ctx, fakeEtcdClient) + client, err := newEtcdClient(ctx, fakeEtcdClient, nil) g.Expect(err).NotTo(HaveOccurred()) members, err := client.Members(ctx) @@ -86,7 +86,11 @@ func TestEtcdMembers_WithSuccess(t *testing.T) { StatusResponse: &clientv3.StatusResponse{}, } - client, err := newEtcdClient(ctx, fakeEtcdClient) + fakeKVClient := &etcdfake.FakeKVClient{ + GetResponse: &clientv3.GetResponse{}, + } + + client, err := newEtcdClient(ctx, fakeEtcdClient, fakeKVClient) g.Expect(err).NotTo(HaveOccurred()) members, err := client.Members(ctx) diff --git a/controlplane/kubeadm/internal/etcd/fake/client.go b/controlplane/kubeadm/internal/etcd/fake/client.go index 98ce83c9a2bb..1ec75d8446ae 100644 --- a/controlplane/kubeadm/internal/etcd/fake/client.go +++ b/controlplane/kubeadm/internal/etcd/fake/client.go @@ -22,6 +22,31 @@ import ( "go.etcd.io/etcd/clientv3" ) +type FakeKVClient struct { + GetResponse *clientv3.GetResponse +} + +func (kv *FakeKVClient) Put(ctx context.Context, key, val string, opts ...clientv3.OpOption) (*clientv3.PutResponse, error) { + return nil, nil +} +func (kv *FakeKVClient) Get(ctx context.Context, key string, opts ...clientv3.OpOption) (*clientv3.GetResponse, error) { + return kv.GetResponse, nil +} +func (kv *FakeKVClient) Delete(ctx context.Context, key string, opts ...clientv3.OpOption) (*clientv3.DeleteResponse, error) { + return nil, nil +} +func (kv *FakeKVClient) Compact(ctx context.Context, rev int64, opts ...clientv3.CompactOption) (*clientv3.CompactResponse, error) { + return nil, nil +} + +func (kv *FakeKVClient) Do(ctx context.Context, op clientv3.Op) (clientv3.OpResponse, error) { + return clientv3.OpResponse{}, nil +} + +func (kv *FakeKVClient) Txn(ctx context.Context) clientv3.Txn { + return nil +} + type FakeEtcdClient struct { AlarmResponse *clientv3.AlarmResponse EtcdEndpoints []string diff --git a/controlplane/kubeadm/internal/etcd_client_generator.go b/controlplane/kubeadm/internal/etcd_client_generator.go index e709a3bae48d..62eb77a7791b 100644 --- a/controlplane/kubeadm/internal/etcd_client_generator.go +++ b/controlplane/kubeadm/internal/etcd_client_generator.go @@ -19,7 +19,6 @@ package internal import ( "context" "crypto/tls" - "github.com/pkg/errors" kerrors "k8s.io/apimachinery/pkg/util/errors" @@ -39,7 +38,7 @@ type etcdClientGenerator struct { func (c *etcdClientGenerator) forNodes(ctx context.Context, nodes []corev1.Node) (*etcd.Client, error) { endpoints := make([]string, len(nodes)) for i, node := range nodes { - endpoints[i] = staticPodName("etcd", node.Name) + endpoints[i] = staticPodName(EtcdPodNamePrefix, node.Name) } p := proxy.Proxy{ diff --git a/controlplane/kubeadm/internal/machine_collection.go b/controlplane/kubeadm/internal/machine_collection.go index 425cea20344a..975aea467218 100644 --- a/controlplane/kubeadm/internal/machine_collection.go +++ b/controlplane/kubeadm/internal/machine_collection.go @@ -83,8 +83,8 @@ func (s FilterableMachineCollection) SortedByCreationTimestamp() []*clusterv1.Ma return res } -// unsortedList returns the slice with contents in random order. -func (s FilterableMachineCollection) unsortedList() []*clusterv1.Machine { +// UnsortedList returns the slice with contents in random order. +func (s FilterableMachineCollection) UnsortedList() []*clusterv1.Machine { res := make([]*clusterv1.Machine, 0, len(s)) for _, value := range s { res = append(res, value) @@ -110,12 +110,12 @@ func newFilteredMachineCollection(filter machinefilters.Func, machines ...*clust // Filter returns a FilterableMachineCollection containing only the Machines that match all of the given MachineFilters func (s FilterableMachineCollection) Filter(filters ...machinefilters.Func) FilterableMachineCollection { - return newFilteredMachineCollection(machinefilters.And(filters...), s.unsortedList()...) + return newFilteredMachineCollection(machinefilters.And(filters...), s.UnsortedList()...) } // AnyFilter returns a FilterableMachineCollection containing only the Machines that match any of the given MachineFilters func (s FilterableMachineCollection) AnyFilter(filters ...machinefilters.Func) FilterableMachineCollection { - return newFilteredMachineCollection(machinefilters.Or(filters...), s.unsortedList()...) + return newFilteredMachineCollection(machinefilters.Or(filters...), s.UnsortedList()...) } // Oldest returns the Machine with the oldest CreationTimestamp diff --git a/controlplane/kubeadm/internal/workload_cluster.go b/controlplane/kubeadm/internal/workload_cluster.go index d0db1fe684a9..b9545cd591cf 100644 --- a/controlplane/kubeadm/internal/workload_cluster.go +++ b/controlplane/kubeadm/internal/workload_cluster.go @@ -34,10 +34,12 @@ import ( corev1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + kerrors "k8s.io/apimachinery/pkg/util/errors" clusterv1 "sigs.k8s.io/cluster-api/api/v1alpha3" controlplanev1 "sigs.k8s.io/cluster-api/controlplane/kubeadm/api/v1alpha3" "sigs.k8s.io/cluster-api/util" "sigs.k8s.io/cluster-api/util/certs" + "sigs.k8s.io/cluster-api/util/conditions" containerutil "sigs.k8s.io/cluster-api/util/container" "sigs.k8s.io/cluster-api/util/patch" ctrlclient "sigs.k8s.io/controller-runtime/pkg/client" @@ -53,12 +55,20 @@ var ( ErrControlPlaneMinNodes = errors.New("cluster has fewer than 2 control plane nodes; removing an etcd member is not supported") ) +// Common control-plane pod name prefixes +const ( + KubeAPIServerPodNamePrefix = "kube-apiserver" + KubeControllerManagerPodNamePrefix = "kube-controller-manager" + KubeSchedulerHealthyPodNamePrefix = "kube-scheduler" + EtcdPodNamePrefix = "etcd" +) + // WorkloadCluster defines all behaviors necessary to upgrade kubernetes on a workload cluster type WorkloadCluster interface { // Basic health and status checks. ClusterStatus(ctx context.Context) (ClusterStatus, error) - ControlPlaneIsHealthy(ctx context.Context) (HealthCheckResult, error) - EtcdIsHealthy(ctx context.Context) (HealthCheckResult, error) + ControlPlaneIsHealthy(ctx context.Context, controlPlane *ControlPlane) (HealthCheckResult, error) + EtcdIsHealthy(ctx context.Context, controlPlane *ControlPlane) (HealthCheckResult, error) // Upgrade related tasks. ReconcileKubeletRBACBinding(ctx context.Context, version semver.Version) error @@ -112,7 +122,7 @@ type HealthCheckResult map[string]error // controlPlaneIsHealthy does a best effort check of the control plane components the kubeadm control plane cares about. // The return map is a map of node names as keys to error that that node encountered. // All nodes will exist in the map with nil errors if there were no errors for that node. -func (w *Workload) ControlPlaneIsHealthy(ctx context.Context) (HealthCheckResult, error) { +func (w *Workload) ControlPlaneIsHealthy(ctx context.Context, controlPlane *ControlPlane) (HealthCheckResult, error) { controlPlaneNodes, err := w.getControlPlaneNodes(ctx) if err != nil { return nil, err @@ -123,37 +133,112 @@ func (w *Workload) ControlPlaneIsHealthy(ctx context.Context) (HealthCheckResult name := node.Name response[name] = nil - if err := checkNodeNoExecuteCondition(node); err != nil { - response[name] = err - continue + var owningMachine *clusterv1.Machine = nil + // If there is no owning Machine for the node, setting conditions are no-op. + for _, machine := range controlPlane.Machines { + if machine.Status.NodeRef == nil { + continue + } + if machine.Status.NodeRef.Name == node.Name { + owningMachine = machine + } } - apiServerPodKey := ctrlclient.ObjectKey{ - Namespace: metav1.NamespaceSystem, - Name: staticPodName("kube-apiserver", name), + // If there is any problem with the checks below, set response to any one of the problems. + errList := []error{} + + if err := checkNodeNoExecuteTaint(node); err != nil { + errList = append(errList, err) } - apiServerPod := corev1.Pod{} - if err := w.Client.Get(ctx, apiServerPodKey, &apiServerPod); err != nil { - response[name] = err - continue + + // Check kube-api-server health + if _, err = w.reconcilePodStatusCondition(KubeAPIServerPodNamePrefix, node, owningMachine, controlplanev1.MachineAPIServerPodHealthyCondition); err != nil { + errList = append(errList, err) } - response[name] = checkStaticPodReadyCondition(apiServerPod) - controllerManagerPodKey := ctrlclient.ObjectKey{ - Namespace: metav1.NamespaceSystem, - Name: staticPodName("kube-controller-manager", name), + // Check kube-controller-manager health + if _, err = w.reconcilePodStatusCondition(KubeControllerManagerPodNamePrefix, node, owningMachine, controlplanev1.MachineControllerManagerHealthyCondition); err != nil { + errList = append(errList, err) } - controllerManagerPod := corev1.Pod{} - if err := w.Client.Get(ctx, controllerManagerPodKey, &controllerManagerPod); err != nil { - response[name] = err + + // Check kube-scheduler health + if _, err = w.reconcilePodStatusCondition(KubeSchedulerHealthyPodNamePrefix, node, owningMachine, controlplanev1.MachineSchedulerPodHealthyCondition); err != nil { + errList = append(errList, err) + } + + if len(errList) > 0 { + response[name] = kerrors.NewAggregate(errList) continue } - response[name] = checkStaticPodReadyCondition(controllerManagerPod) } return response, nil } +type podState struct { + provisioning bool + unknown bool + ready bool +} + +// reconcilePodStatusCondition returns error if all podState states are false, i.e., Node is reachable, provisioned, and not in Ready state. +func (w *Workload) reconcilePodStatusCondition(staticPodPrefix string, node corev1.Node, owningMachine *clusterv1.Machine, conditionType clusterv1.ConditionType) (podState, error) { + staticPodKey := ctrlclient.ObjectKey{ + Namespace: metav1.NamespaceSystem, + Name: staticPodName(staticPodPrefix, node.Name), + } + + staticPod := corev1.Pod{} + if err := w.Client.Get(context.TODO(), staticPodKey, &staticPod); err != nil { + // If there is an error getting the Pod, do not set any conditions. + if apierrors.IsNotFound(err) { + if owningMachine != nil { + conditions.MarkFalse(owningMachine, conditionType, controlplanev1.PodMissingReason, clusterv1.ConditionSeverityWarning, "") + } + return podState{}, errors.Errorf("static pod %s is missing", staticPodPrefix) + } + // Do not return error here, because this may be a transient error. + return podState{unknown: true}, nil + } + + // If Pod is Ready, no need to check the rest of the states below. + if err := isStaticPodInReadyCondition(staticPod); err == nil { + if owningMachine != nil { + conditions.MarkTrue(owningMachine, conditionType) + } + return podState{ready: true}, nil + } + + // If Pod is not in Ready state, it can be in Provisioning or Failed state. + + if isStaticPodInFailedPhase(staticPod) { + if owningMachine != nil { + conditions.MarkFalse(owningMachine, conditionType, controlplanev1.PodFailedReason, clusterv1.ConditionSeverityError, "") + } + return podState{}, errors.Errorf("static pod %s is failed", staticPodPrefix) + } + + // Check if the Pod is in Pending state due to provisioning. + if isStaticPodProvisioning(staticPod) { + if owningMachine != nil { + conditions.MarkFalse(owningMachine, conditionType, controlplanev1.PodProvisioningReason, clusterv1.ConditionSeverityInfo, "") + } + // This is not an error case. + return podState{provisioning: true}, nil + } + + // Pod is provisioned, but if it is still in Pending Phase, surface the errors in Containers. + // Non-nil error means there is at least one container in waiting state. + if err := checkStaticPodAfterProvisioningState(staticPod); err != nil { + if owningMachine != nil { + conditions.MarkFalse(owningMachine, conditionType, controlplanev1.PodFailedReason, clusterv1.ConditionSeverityError, "Pod is provisioned but not ready: %s", err.Error()) + } + return podState{}, errors.Errorf("static pod %s is provisioned but still is not ready", staticPodPrefix) + } + + return podState{}, errors.Errorf("static pod %s is not ready", staticPodPrefix) +} + // UpdateKubernetesVersionInKubeadmConfigMap updates the kubernetes version in the kubeadm config map. func (w *Workload) UpdateImageRepositoryInKubeadmConfigMap(ctx context.Context, imageRepository string) error { configMapKey := ctrlclient.ObjectKey{Name: "kubeadm-config", Namespace: metav1.NamespaceSystem} @@ -348,23 +433,64 @@ func staticPodName(component, nodeName string) string { return fmt.Sprintf("%s-%s", component, nodeName) } -func checkStaticPodReadyCondition(pod corev1.Pod) error { +func isStaticPodInReadyCondition(pod corev1.Pod) error { found := false for _, condition := range pod.Status.Conditions { if condition.Type == corev1.PodReady { found = true - } - if condition.Type == corev1.PodReady && condition.Status != corev1.ConditionTrue { - return errors.Errorf("static pod %s/%s is not ready", pod.Namespace, pod.Name) + if condition.Status != corev1.ConditionTrue { + return errors.Errorf("static pod %s/%s is not ready", pod.Namespace, pod.Name) + } } } + + // TODO: Remove this as static pods have ready condition. if !found { return errors.Errorf("pod does not have ready condition: %v", pod.Name) } return nil } -func checkNodeNoExecuteCondition(node corev1.Node) error { +func isStaticPodInFailedPhase(pod corev1.Pod) bool { + return pod.Status.Phase == corev1.PodFailed +} + +// If Pod is in Pending phase and PodScheduled or Initialized condition is set to false, then pod is in provisioning state. +func isStaticPodProvisioning(pod corev1.Pod) bool { + // Pod being not in Pending phase means it has already provisioned and running or failed or in unknown phase. + if pod.Status.Phase != corev1.PodPending { + return false + } + + for _, condition := range pod.Status.Conditions { + if condition.Status != corev1.ConditionTrue { + switch condition.Type { + case corev1.PodScheduled, corev1.PodInitialized: + return true + } + } + } + return false +} + +// If Pod is in Pending phase, there may be some failing containers in the pod. +// This function must be called when PodScheduled and Initialized condition is set to true. If a Pod is initialized but still in Pending state, +// returns non-nil string with the reason of the container in waiting state. +func checkStaticPodAfterProvisioningState(pod corev1.Pod) error { + // Pod being not in Pending phase means it has already provisioned and running or failed or in unknown phase. + if pod.Status.Phase != corev1.PodPending { + return nil + } + + for _, containerStatus := range pod.Status.ContainerStatuses { + if containerStatus.State.Waiting != nil { + return errors.New(containerStatus.State.Waiting.Reason) + } + } + return nil +} + +func checkNodeNoExecuteTaint(node corev1.Node) error { for _, taint := range node.Spec.Taints { if taint.Key == corev1.TaintNodeUnreachable && taint.Effect == corev1.TaintEffectNoExecute { return errors.Errorf("node has NoExecute taint: %v", node.Name) diff --git a/controlplane/kubeadm/internal/workload_cluster_etcd.go b/controlplane/kubeadm/internal/workload_cluster_etcd.go index 78261db27911..688b8a2abbc6 100644 --- a/controlplane/kubeadm/internal/workload_cluster_etcd.go +++ b/controlplane/kubeadm/internal/workload_cluster_etcd.go @@ -18,14 +18,15 @@ package internal import ( "context" - "github.com/pkg/errors" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" kerrors "k8s.io/apimachinery/pkg/util/errors" clusterv1 "sigs.k8s.io/cluster-api/api/v1alpha3" + controlplanev1 "sigs.k8s.io/cluster-api/controlplane/kubeadm/api/v1alpha3" "sigs.k8s.io/cluster-api/controlplane/kubeadm/internal/etcd" etcdutil "sigs.k8s.io/cluster-api/controlplane/kubeadm/internal/etcd/util" + "sigs.k8s.io/cluster-api/util/conditions" ctrlclient "sigs.k8s.io/controller-runtime/pkg/client" ) @@ -38,7 +39,7 @@ type etcdClientFor interface { // This is a best effort check and nodes can become unhealthy after the check is complete. It is not a guarantee. // It's used a signal for if we should allow a target cluster to scale up, scale down or upgrade. // It returns a map of nodes checked along with an error for a given node. -func (w *Workload) EtcdIsHealthy(ctx context.Context) (HealthCheckResult, error) { +func (w *Workload) EtcdIsHealthy(ctx context.Context, controlPlane *ControlPlane) (HealthCheckResult, error) { var knownClusterID uint64 var knownMemberIDSet etcdutil.UInt64Set @@ -49,6 +50,11 @@ func (w *Workload) EtcdIsHealthy(ctx context.Context) (HealthCheckResult, error) expectedMembers := 0 response := make(map[string]error) + var owningMachine *clusterv1.Machine + + // Initial assumtion is that etcd cluster is healthy. If otherwise is observed below, it is set to false. + conditions.MarkTrue(controlPlane.KCP, controlplanev1.EtcdClusterHealthy) + for _, node := range controlPlaneNodes.Items { name := node.Name response[name] = nil @@ -57,19 +63,24 @@ func (w *Workload) EtcdIsHealthy(ctx context.Context) (HealthCheckResult, error) continue } - // Check to see if the pod is ready - etcdPodKey := ctrlclient.ObjectKey{ - Namespace: metav1.NamespaceSystem, - Name: staticPodName("etcd", name), - } - pod := corev1.Pod{} - if err := w.Client.Get(ctx, etcdPodKey, &pod); err != nil { - response[name] = errors.Wrap(err, "failed to get etcd pod") - continue + for _, m := range controlPlane.Machines { + if m.Spec.ProviderID != nil && *m.Spec.ProviderID == node.Spec.ProviderID { + owningMachine = m + // Only set this condition if the node has an owning machine. + conditions.MarkTrue(owningMachine, controlplanev1.MachineEtcdMemberHealthyCondition) + break + } } - if err := checkStaticPodReadyCondition(pod); err != nil { + + // TODO: If owning machine is nil, should not continue. But this change breaks the logic below. + + // Check etcd pod's health + etcdPodState, _ := w.reconcilePodStatusCondition(EtcdPodNamePrefix, node, owningMachine, controlplanev1.MachineEtcdPodHealthyCondition) + // etcdPodState is + if !etcdPodState.ready { // Nothing wrong here, etcd on this node is just not running. // If it's a true failure the healthcheck will fail since it won't have checked enough members. + response[name] = errors.Wrap(err, "etcd pod is not ready") continue } // Only expect a member reports healthy if its pod is ready. @@ -80,14 +91,24 @@ func (w *Workload) EtcdIsHealthy(ctx context.Context) (HealthCheckResult, error) // Create the etcd Client for the etcd Pod scheduled on the Node etcdClient, err := w.etcdClientGenerator.forNodes(ctx, []corev1.Node{node}) if err != nil { + if owningMachine != nil { + conditions.MarkFalse(owningMachine, controlplanev1.MachineEtcdMemberHealthyCondition, controlplanev1.EtcdMemberUnhealthyReason, clusterv1.ConditionSeverityError, "etcd client related failure.") + } response[name] = errors.Wrap(err, "failed to create etcd client") continue } defer etcdClient.Close() + if err := etcdClient.HealthCheck(ctx); err != nil { + response[name] = errors.Wrap(err, "etcd member is unhealthy") + continue + } // List etcd members. This checks that the member is healthy, because the request goes through consensus. members, err := etcdClient.Members(ctx) if err != nil { + if owningMachine != nil { + conditions.MarkFalse(owningMachine, controlplanev1.MachineEtcdMemberHealthyCondition, controlplanev1.EtcdMemberUnhealthyReason, clusterv1.ConditionSeverityError, "etcd client related failure.") + } response[name] = errors.Wrap(err, "failed to list etcd members using etcd client") continue } @@ -96,6 +117,9 @@ func (w *Workload) EtcdIsHealthy(ctx context.Context) (HealthCheckResult, error) // Check that the member reports no alarms. if len(member.Alarms) > 0 { + if owningMachine != nil { + conditions.MarkFalse(owningMachine, controlplanev1.MachineEtcdMemberHealthyCondition, controlplanev1.EtcdMemberUnhealthyReason, clusterv1.ConditionSeverityError, "etcd member has alarms.") + } response[name] = errors.Errorf("etcd member reports alarms: %v", member.Alarms) continue } @@ -116,6 +140,7 @@ func (w *Workload) EtcdIsHealthy(ctx context.Context) (HealthCheckResult, error) } else { unknownMembers := memberIDSet.Difference(knownMemberIDSet) if unknownMembers.Len() > 0 { + conditions.MarkFalse(controlPlane.KCP, controlplanev1.EtcdClusterHealthy, controlplanev1.EtcdClusterUnhealthyReason, clusterv1.ConditionSeverityWarning, "etcd members do not have the same member-list view") response[name] = errors.Errorf("etcd member reports members IDs %v, but all previously seen etcd members reported member IDs %v", memberIDSet.UnsortedList(), knownMemberIDSet.UnsortedList()) } continue @@ -127,9 +152,48 @@ func (w *Workload) EtcdIsHealthy(ctx context.Context) (HealthCheckResult, error) // Check that there is exactly one etcd member for every healthy pod. // This allows us to handle the expected case where there is a failing pod but it's been removed from the member list. if expectedMembers != len(knownMemberIDSet) { + conditions.MarkFalse(controlPlane.KCP, controlplanev1.EtcdClusterHealthy, controlplanev1.EtcdClusterUnhealthyReason, clusterv1.ConditionSeverityWarning, "etcd pods does not match with etcd members.") return response, errors.Errorf("there are %d healthy etcd pods, but %d etcd members", expectedMembers, len(knownMemberIDSet)) } + // Check etcd cluster alarms + etcdClient, err := w.etcdClientGenerator.forNodes(ctx, controlPlaneNodes.Items) + if err != nil { + conditions.MarkFalse(controlPlane.KCP, controlplanev1.EtcdClusterHealthy, controlplanev1.EtcdClusterUnhealthyReason, clusterv1.ConditionSeverityWarning, "failed to get etcd client.") + return response, err + } + + defer etcdClient.Close() + alarmList, err := etcdClient.Alarms(ctx) + if len(alarmList) > 0 || err != nil { + conditions.MarkFalse(controlPlane.KCP, controlplanev1.EtcdClusterHealthy, controlplanev1.EtcdClusterUnhealthyReason, clusterv1.ConditionSeverityWarning, "etcd cluster has alarms.") + return response, errors.Errorf("etcd cluster has %d alarms", len(alarmList)) + } + + members, err := etcdClient.Members(ctx) + if err != nil { + conditions.MarkFalse(controlPlane.KCP, controlplanev1.EtcdClusterHealthy, controlplanev1.EtcdClusterUnhealthyReason, clusterv1.ConditionSeverityWarning, "failed to get etcd members.") + return response, err + } + + healthyMembers := 0 + for _, m := range members { + if val, ok := response[m.Name]; ok { + if val == nil { + healthyMembers++ + } + } else { + // There are members in etcd cluster that is not part of controlplane nodes. + conditions.MarkFalse(controlPlane.KCP, controlplanev1.EtcdClusterHealthy, controlplanev1.EtcdClusterUnhealthyReason, clusterv1.ConditionSeverityWarning, "unknown etcd member that is not part of control plane nodes.") + return response, err + } + } + // TODO: During provisioning, this condition may be set false for a short time until all pods are provisioned, can add additional checks here to prevent this. + if healthyMembers < (len(members)/2 + 1) { + conditions.MarkFalse(controlPlane.KCP, controlplanev1.EtcdClusterHealthy, controlplanev1.EtcdClusterUnhealthyReason, clusterv1.ConditionSeverityWarning, "etcd cluster's quorum is lost.") + return response, errors.Errorf("etcd lost its quorum: there are %d control-plane machines, but %d etcd members", controlPlane.Machines.Len(), len(knownMemberIDSet)) + } + return response, nil } diff --git a/controlplane/kubeadm/internal/workload_cluster_etcd_test.go b/controlplane/kubeadm/internal/workload_cluster_etcd_test.go index d4384d4bbec7..e997135494bf 100644 --- a/controlplane/kubeadm/internal/workload_cluster_etcd_test.go +++ b/controlplane/kubeadm/internal/workload_cluster_etcd_test.go @@ -39,6 +39,12 @@ import ( func TestWorkload_EtcdIsHealthy(t *testing.T) { g := NewWithT(t) + controlPlane := createControlPlane([]*clusterv1.Machine{ + controlPlaneMachine("one"), + controlPlaneMachine("two"), + controlPlaneMachine("three"), + }) + workload := &Workload{ Client: &fakeClient{ get: map[string]interface{}{ @@ -75,7 +81,7 @@ func TestWorkload_EtcdIsHealthy(t *testing.T) { }, } ctx := context.Background() - health, err := workload.EtcdIsHealthy(ctx) + health, err := workload.EtcdIsHealthy(ctx, controlPlane) g.Expect(err).NotTo(HaveOccurred()) for _, err := range health {