From 94ebc1fd1643dd8ea456ab456fc8a4f42b5fb5e0 Mon Sep 17 00:00:00 2001 From: fabriziopandini Date: Thu, 17 Oct 2024 22:17:32 +0200 Subject: [PATCH] Add v1beta2 Etcd and ControlPlaneComponents conditions to KCP # Conflicts: # controlplane/kubeadm/api/v1beta1/v1beta2_condition_consts.go --- api/v1beta1/v1beta2_condition_consts.go | 3 + .../api/v1beta1/v1beta2_condition_consts.go | 103 ++- .../internal/controllers/controller.go | 32 +- .../internal/workload_cluster_conditions.go | 452 +++++++++++-- .../workload_cluster_conditions_test.go | 595 ++++++++++++++++-- ...240916-improve-status-in-CAPI-resources.md | 28 +- 6 files changed, 1070 insertions(+), 143 deletions(-) diff --git a/api/v1beta1/v1beta2_condition_consts.go b/api/v1beta1/v1beta2_condition_consts.go index 5a0f887e2a64..d6784d0c3392 100644 --- a/api/v1beta1/v1beta2_condition_consts.go +++ b/api/v1beta1/v1beta2_condition_consts.go @@ -139,6 +139,9 @@ const ( // This means that the object will go away (i.e. be removed from etcd), except if there are other // finalizers on the object. DeletionCompletedV1Beta2Reason = "DeletionCompleted" + + // InspectionFailedV1Beta2Reason applies to a condition when inspection of the underlying object failed. + InspectionFailedV1Beta2Reason = "InspectionFailed" ) // Conditions that will be used for the MachineSet object in v1Beta2 API version. diff --git a/controlplane/kubeadm/api/v1beta1/v1beta2_condition_consts.go b/controlplane/kubeadm/api/v1beta1/v1beta2_condition_consts.go index f24d19bbd283..47c3630e1488 100644 --- a/controlplane/kubeadm/api/v1beta1/v1beta2_condition_consts.go +++ b/controlplane/kubeadm/api/v1beta1/v1beta2_condition_consts.go @@ -20,7 +20,7 @@ import clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" // KubeadmControlPlane's Available condition and corresponding reasons that will be used in v1Beta2 API version. const ( - // KubeadmControlPlaneAvailableV1Beta2Condition True if the control plane can be reached, EtcdClusterAvailable is true, + // KubeadmControlPlaneAvailableV1Beta2Condition True if the control plane can be reached, EtcdClusterHealthy is true, // and CertificatesAvailable is true. KubeadmControlPlaneAvailableV1Beta2Condition = clusterv1.AvailableV1Beta2Condition ) @@ -40,12 +40,54 @@ const ( KubeadmControlPlaneCertificatesAvailableV1Beta2Reason = clusterv1.AvailableV1Beta2Reason ) -// KubeadmControlPlane's EtcdClusterAvailable condition and corresponding reasons that will be used in v1Beta2 API version. +// KubeadmControlPlane's EtcdClusterHealthy condition and corresponding reasons that will be used in v1Beta2 API version. const ( - // KubeadmControlPlaneEtcdClusterAvailableV1Beta2Condition surfaces issues to the managed etcd cluster, if any. - // It is computed as aggregation of Machines's EtcdMemberHealthy (if not using an external etcd) conditions plus - // additional checks validating potential issues to etcd quorum. - KubeadmControlPlaneEtcdClusterAvailableV1Beta2Condition = "EtcdClusterAvailable" + // KubeadmControlPlaneEtcdClusterHealthyV1Beta2Condition surfaces issues to etcd cluster hosted on machines managed by this object. + // It is computed as aggregation of Machine's EtcdMemberHealthy conditions plus additional checks validating + // potential issues to etcd quorum. + // Note: this condition is not set when using an external etcd. + KubeadmControlPlaneEtcdClusterHealthyV1Beta2Condition = "EtcdClusterHealthy" + + // KubeadmControlPlaneEtcdClusterInspectionFailedV1Beta2Reason documents a failure when inspecting the status of the + // etcd cluster hosted on KubeadmControlPlane controlled machines. + KubeadmControlPlaneEtcdClusterInspectionFailedV1Beta2Reason = clusterv1.InspectionFailedV1Beta2Reason + + // KubeadmControlPlaneEtcdClusterHealthyV1Beta2Reason surfaces when the etcd cluster hosted on KubeadmControlPlane + // machines is healthy. + KubeadmControlPlaneEtcdClusterHealthyV1Beta2Reason = "Healthy" + + // KubeadmControlPlaneEtcdClusterNotHealthyV1Beta2Reason surfaces when the etcd cluster hosted on KubeadmControlPlane + // machines is not healthy. + KubeadmControlPlaneEtcdClusterNotHealthyV1Beta2Reason = "NotHealthy" + + // KubeadmControlPlaneEtcdClusterHealthUnknownV1Beta2Reason surfaces when the health status of the etcd cluster hosted + // on KubeadmControlPlane machines is unknown. + KubeadmControlPlaneEtcdClusterHealthUnknownV1Beta2Reason = "HealthUnknown" +) + +// KubeadmControlPlane's ControlPlaneComponentsHealthy condition and corresponding reasons that will be used in v1Beta2 API version. +const ( + // KubeadmControlPlaneControlPlaneComponentsHealthyV1Beta2Condition surfaces issues to Kubernetes control plane components + // hosted on machines managed by this object. It is computed as aggregation of Machine's `APIServerPodHealthy`, + // `ControllerManagerPodHealthy`, `SchedulerPodHealthy`, `EtcdPodHealthy` conditions plus additional checks on + // control plane machines and nodes. + KubeadmControlPlaneControlPlaneComponentsHealthyV1Beta2Condition = "ControlPlaneComponentsHealthy" + + // KubeadmControlPlaneControlPlaneComponentsInspectionFailedV1Beta2Reason documents a failure when inspecting the status of the + // control plane components hosted on KubeadmControlPlane controlled machines. + KubeadmControlPlaneControlPlaneComponentsInspectionFailedV1Beta2Reason = clusterv1.InspectionFailedV1Beta2Reason + + // KubeadmControlPlaneControlPlaneComponentsHealthyV1Beta2Reason surfaces when the Kubernetes control plane components + // hosted on KubeadmControlPlane machines are healthy. + KubeadmControlPlaneControlPlaneComponentsHealthyV1Beta2Reason = "Healthy" + + // KubeadmControlPlaneControlPlaneComponentsNotHealthyV1Beta2Reason surfaces when the Kubernetes control plane components + // hosted on KubeadmControlPlane machines are not healthy. + KubeadmControlPlaneControlPlaneComponentsNotHealthyV1Beta2Reason = "NotHealthy" + + // KubeadmControlPlaneControlPlaneComponentsHealthUnknownV1Beta2Reason surfaces when the health status of the + // Kubernetes control plane components hosted on KubeadmControlPlane machines is unknown. + KubeadmControlPlaneControlPlaneComponentsHealthUnknownV1Beta2Reason = "HealthUnknown" ) // KubeadmControlPlane's MachinesReady condition and corresponding reasons that will be used in v1Beta2 API version. @@ -113,7 +155,8 @@ const ( KubeadmControlPlanePausedV1Beta2Condition = clusterv1.PausedV1Beta2Condition ) -// Conditions that will be used for the KubeadmControlPlane controlled machines in v1Beta2 API version. +// APIServerPodHealthy, ControllerManagerPodHealthy, SchedulerPodHealthy and EtcdPodHealthy condition and corresponding +// reasons that will be used for KubeadmControlPlane controlled machines in v1Beta2 API version. const ( // KubeadmControlPlaneMachineAPIServerPodHealthyV1Beta2Condition surfaces the status of the API server pod hosted on a KubeadmControlPlane controlled machine. KubeadmControlPlaneMachineAPIServerPodHealthyV1Beta2Condition = "APIServerPodHealthy" @@ -127,6 +170,50 @@ const ( // KubeadmControlPlaneMachineEtcdPodHealthyV1Beta2Condition surfaces the status of the etcd pod hosted on a KubeadmControlPlane controlled machine. KubeadmControlPlaneMachineEtcdPodHealthyV1Beta2Condition = "EtcdPodHealthy" + // KubeadmControlPlaneMachinePodRunningV1Beta2Reason surfaces a pod hosted on a KubeadmControlPlane controlled machine that is running. + KubeadmControlPlaneMachinePodRunningV1Beta2Reason = "Running" + + // KubeadmControlPlaneMachinePodProvisioningV1Beta2Reason surfaces a pod hosted on a KubeadmControlPlane controlled machine + // waiting to be provisioned i.e., Pod is in "Pending" phase. + KubeadmControlPlaneMachinePodProvisioningV1Beta2Reason = "Provisioning" + + // KubeadmControlPlaneMachinePodDoesNotExistV1Beta2Reason surfaces a when a pod hosted on a KubeadmControlPlane controlled machine + // does not exist. + KubeadmControlPlaneMachinePodDoesNotExistV1Beta2Reason = "DoesNotExist" + + // KubeadmControlPlaneMachinePodFailedV1Beta2Reason surfaces a when a pod hosted on a KubeadmControlPlane controlled machine + // failed during provisioning, e.g. CrashLoopBackOff, ImagePullBackOff or if all the containers in a pod have terminated. + KubeadmControlPlaneMachinePodFailedV1Beta2Reason = "Failed" + + // KubeadmControlPlaneMachinePodInspectionFailedV1Beta2Reason documents a failure when inspecting the status of a + // pod hosted on a KubeadmControlPlane controlled machine. + KubeadmControlPlaneMachinePodInspectionFailedV1Beta2Reason = clusterv1.InspectionFailedV1Beta2Reason + + // KubeadmControlPlaneMachinePodDeletingV1Beta2Reason surfaces when the machine hosting control plane components + // is being deleted. + KubeadmControlPlaneMachinePodDeletingV1Beta2Reason = "Deleting" + + // KubeadmControlPlaneMachinePodInternalErrorV1Beta2Reason surfaces unexpected failures when reading pod hosted + // on a KubeadmControlPlane controlled machine. + KubeadmControlPlaneMachinePodInternalErrorV1Beta2Reason = clusterv1.InternalErrorV1Beta2Reason +) + +// EtcdMemberHealthy condition and corresponding reasons that will be used for KubeadmControlPlane controlled machines in v1Beta2 API version. +const ( // KubeadmControlPlaneMachineEtcdMemberHealthyV1Beta2Condition surfaces the status of the etcd member hosted on a KubeadmControlPlane controlled machine. - KubeadmControlPlaneMachineEtcdMemberHealthyV1Beta2Condition = "EtcdMemberHealthy" + KubeadmControlPlaneMachineEtcdMemberHealthyV1Beta2Condition = "Healthy" + + // KubeadmControlPlaneMachineEtcdMemberNotHealthyV1Beta2Reason surfaces when the etcd member hosted on a KubeadmControlPlane controlled machine is not healthy. + KubeadmControlPlaneMachineEtcdMemberNotHealthyV1Beta2Reason = "NotHealthy" + + // KubeadmControlPlaneMachineEtcdMemberHealthyV1Beta2Reason surfaces when the etcd member hosted on a KubeadmControlPlane controlled machine is healthy. + KubeadmControlPlaneMachineEtcdMemberHealthyV1Beta2Reason = "Healthy" + + // KubeadmControlPlaneMachineEtcdMemberInspectionFailedV1Beta2Reason documents a failure when inspecting the status of an + // etcd member hosted on a KubeadmControlPlane controlled machine. + KubeadmControlPlaneMachineEtcdMemberInspectionFailedV1Beta2Reason = clusterv1.InspectionFailedV1Beta2Reason + + // KubeadmControlPlaneMachineEtcdMemberDeletingV1Beta2Reason surfaces when the machine hosting an etcd member + // is being deleted. + KubeadmControlPlaneMachineEtcdMemberDeletingV1Beta2Reason = "Deleting" ) diff --git a/controlplane/kubeadm/internal/controllers/controller.go b/controlplane/kubeadm/internal/controllers/controller.go index b8bb9af3b9e4..2fab8444b934 100644 --- a/controlplane/kubeadm/internal/controllers/controller.go +++ b/controlplane/kubeadm/internal/controllers/controller.go @@ -317,15 +317,29 @@ func patchKubeadmControlPlane(ctx context.Context, patchHelper *patch.Helper, kc // Patch the object, ignoring conflicts on the conditions owned by this controller. // Also, if requested, we are adding additional options like e.g. Patch ObservedGeneration when issuing the // patch at the end of the reconcile loop. - options = append(options, patch.WithOwnedConditions{Conditions: []clusterv1.ConditionType{ - controlplanev1.MachinesCreatedCondition, - clusterv1.ReadyCondition, - controlplanev1.MachinesSpecUpToDateCondition, - controlplanev1.ResizedCondition, - controlplanev1.MachinesReadyCondition, - controlplanev1.AvailableCondition, - controlplanev1.CertificatesAvailableCondition, - }}) + options = append(options, + patch.WithOwnedConditions{Conditions: []clusterv1.ConditionType{ + controlplanev1.MachinesCreatedCondition, + clusterv1.ReadyCondition, + controlplanev1.MachinesSpecUpToDateCondition, + controlplanev1.ResizedCondition, + controlplanev1.MachinesReadyCondition, + controlplanev1.AvailableCondition, + controlplanev1.CertificatesAvailableCondition, + }}, + patch.WithOwnedV1Beta2Conditions{Conditions: []string{ + controlplanev1.KubeadmControlPlaneAvailableV1Beta2Condition, + controlplanev1.KubeadmControlPlaneCertificatesAvailableV1Beta2Condition, + controlplanev1.KubeadmControlPlaneEtcdClusterHealthyV1Beta2Condition, + controlplanev1.KubeadmControlPlaneControlPlaneComponentsHealthyV1Beta2Condition, + controlplanev1.KubeadmControlPlaneMachinesReadyV1Beta2Condition, + controlplanev1.KubeadmControlPlaneMachinesUpToDateV1Beta2Condition, + controlplanev1.KubeadmControlPlaneScalingUpV1Beta2Condition, + controlplanev1.KubeadmControlPlaneScalingDownV1Beta2Condition, + controlplanev1.KubeadmControlPlaneRemediatingV1Beta2Condition, + controlplanev1.KubeadmControlPlaneDeletingV1Beta2Condition, + }}, + ) return patchHelper.Patch(ctx, kcp, options...) } diff --git a/controlplane/kubeadm/internal/workload_cluster_conditions.go b/controlplane/kubeadm/internal/workload_cluster_conditions.go index c54b1cdf617d..fabc32f0a35d 100644 --- a/controlplane/kubeadm/internal/workload_cluster_conditions.go +++ b/controlplane/kubeadm/internal/workload_cluster_conditions.go @@ -26,6 +26,8 @@ import ( apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/sets" + "k8s.io/klog/v2" + ctrl "sigs.k8s.io/controller-runtime" ctrlclient "sigs.k8s.io/controller-runtime/pkg/client" clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" @@ -34,6 +36,7 @@ import ( etcdutil "sigs.k8s.io/cluster-api/controlplane/kubeadm/internal/etcd/util" "sigs.k8s.io/cluster-api/util/collections" "sigs.k8s.io/cluster-api/util/conditions" + v1beta2conditions "sigs.k8s.io/cluster-api/util/conditions/v1beta2" ) // UpdateEtcdConditions is responsible for updating machine conditions reflecting the status of all the etcd members. @@ -51,20 +54,37 @@ func (w *Workload) updateExternalEtcdConditions(_ context.Context, controlPlane // When KCP is not responsible for external etcd, we are reporting only health at KCP level. conditions.MarkTrue(controlPlane.KCP, controlplanev1.EtcdClusterHealthyCondition) - // TODO: check external etcd for alarms an possibly also for member errors - // this requires implementing an new type of etcd client generator given that it is not possible to use nodes - // as a source for the etcd endpoint address; the address of the external etcd should be available on the kubeadm configuration. + // Note: KCP is going to stop setting the `EtcdClusterHealthy` condition to true in case of external etcd. + // This will allow tools managing the external etcd instance to use the `EtcdClusterHealthy` to report back status into + // the KubeadmControlPlane if they want to. + // As soon as the v1beta1 condition above will be removed, we should drop this func entirely. } func (w *Workload) updateManagedEtcdConditions(ctx context.Context, controlPlane *ControlPlane) { // NOTE: This methods uses control plane nodes only to get in contact with etcd but then it relies on etcd // as ultimate source of truth for the list of members and for their health. + // TODO: Integrate this with clustercache / handle the grace period controlPlaneNodes, err := w.getControlPlaneNodes(ctx) if err != nil { - conditions.MarkUnknown(controlPlane.KCP, controlplanev1.EtcdClusterHealthyCondition, controlplanev1.EtcdClusterInspectionFailedReason, "Failed to list nodes which are hosting the etcd members") for _, m := range controlPlane.Machines { - conditions.MarkUnknown(m, controlplanev1.MachineEtcdMemberHealthyCondition, controlplanev1.EtcdMemberInspectionFailedReason, "Failed to get the node which is hosting the etcd member") + conditions.MarkUnknown(m, controlplanev1.MachineEtcdMemberHealthyCondition, controlplanev1.EtcdMemberInspectionFailedReason, "Failed to get the Node which is hosting the etcd member") + + v1beta2conditions.Set(m, metav1.Condition{ + Type: controlplanev1.KubeadmControlPlaneMachineEtcdMemberHealthyV1Beta2Condition, + Status: metav1.ConditionUnknown, + Reason: controlplanev1.KubeadmControlPlaneMachineEtcdMemberInspectionFailedV1Beta2Reason, + Message: "Failed to get the Node hosting the etcd member", + }) } + + conditions.MarkUnknown(controlPlane.KCP, controlplanev1.EtcdClusterHealthyCondition, controlplanev1.EtcdClusterInspectionFailedReason, "Failed to list Nodes which are hosting the etcd members") + + v1beta2conditions.Set(controlPlane.KCP, metav1.Condition{ + Type: controlplanev1.KubeadmControlPlaneEtcdClusterHealthyV1Beta2Condition, + Status: metav1.ConditionUnknown, + Reason: controlplanev1.KubeadmControlPlaneEtcdClusterInspectionFailedV1Beta2Reason, + Message: "Failed to get Nodes hosting the etcd cluster", + }) return } @@ -78,6 +98,16 @@ func (w *Workload) updateManagedEtcdConditions(ctx context.Context, controlPlane members []*etcd.Member ) + provisioningMachines := controlPlane.Machines.Filter(collections.Not(collections.HasNode())) + for _, machine := range provisioningMachines { + v1beta2conditions.Set(machine, metav1.Condition{ + Type: controlplanev1.KubeadmControlPlaneMachineEtcdMemberHealthyV1Beta2Condition, + Status: metav1.ConditionUnknown, + Reason: controlplanev1.KubeadmControlPlaneMachineEtcdMemberInspectionFailedV1Beta2Reason, + Message: "Node does not exist", + }) + } + for _, node := range controlPlaneNodes.Items { // Search for the machine corresponding to the node. var machine *clusterv1.Machine @@ -90,16 +120,22 @@ func (w *Workload) updateManagedEtcdConditions(ctx context.Context, controlPlane if machine == nil { // If there are machines still provisioning there is the chance that a chance that a node might be linked to a machine soon, // otherwise report the error at KCP level given that there is no machine to report on. - if hasProvisioningMachine(controlPlane.Machines) { + if len(provisioningMachines) > 0 { continue } - kcpErrors = append(kcpErrors, fmt.Sprintf("Control plane node %s does not have a corresponding machine", node.Name)) + kcpErrors = append(kcpErrors, fmt.Sprintf("Control plane Node %s does not have a corresponding Machine", node.Name)) continue } - // If the machine is deleting, report all the conditions as deleting + // If the machine is deleting, report all the conditions as deleting. if !machine.ObjectMeta.DeletionTimestamp.IsZero() { conditions.MarkFalse(machine, controlplanev1.MachineEtcdMemberHealthyCondition, clusterv1.DeletingReason, clusterv1.ConditionSeverityInfo, "") + + v1beta2conditions.Set(machine, metav1.Condition{ + Type: controlplanev1.KubeadmControlPlaneMachineEtcdMemberHealthyV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: controlplanev1.KubeadmControlPlaneMachineEtcdMemberDeletingV1Beta2Reason, + }) continue } @@ -114,7 +150,14 @@ func (w *Workload) updateManagedEtcdConditions(ctx context.Context, controlPlane members = currentMembers } if !etcdutil.MemberEqual(members, currentMembers) { - conditions.MarkFalse(machine, controlplanev1.MachineEtcdMemberHealthyCondition, controlplanev1.EtcdMemberUnhealthyReason, clusterv1.ConditionSeverityError, "etcd member reports the cluster is composed by members %s, but all previously seen etcd members are reporting %s", etcdutil.MemberNames(currentMembers), etcdutil.MemberNames(members)) + conditions.MarkFalse(machine, controlplanev1.MachineEtcdMemberHealthyCondition, controlplanev1.EtcdMemberUnhealthyReason, clusterv1.ConditionSeverityError, "Etcd member reports the cluster is composed by members %s, but all previously seen etcd members are reporting %s", etcdutil.MemberNames(currentMembers), etcdutil.MemberNames(members)) + + v1beta2conditions.Set(machine, metav1.Condition{ + Type: controlplanev1.KubeadmControlPlaneMachineEtcdMemberHealthyV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: controlplanev1.KubeadmControlPlaneMachineEtcdMemberNotHealthyV1Beta2Reason, + Message: fmt.Sprintf("The etcd member hosted on this Machine reports the cluster is composed by %s, but all previously seen etcd members are reporting %s", etcdutil.MemberNames(currentMembers), etcdutil.MemberNames(members)), + }) continue } @@ -122,7 +165,14 @@ func (w *Workload) updateManagedEtcdConditions(ctx context.Context, controlPlane // NB. The member for this node always exists given forFirstAvailableNode(node) used above member := etcdutil.MemberForName(currentMembers, node.Name) if member == nil { - conditions.MarkFalse(machine, controlplanev1.MachineEtcdMemberHealthyCondition, controlplanev1.EtcdMemberUnhealthyReason, clusterv1.ConditionSeverityError, "etcd member reports the cluster is composed by members %s, but the member itself (%s) is not included", etcdutil.MemberNames(currentMembers), node.Name) + conditions.MarkFalse(machine, controlplanev1.MachineEtcdMemberHealthyCondition, controlplanev1.EtcdMemberUnhealthyReason, clusterv1.ConditionSeverityError, "Etcd member reports the cluster is composed by members %s, but the member hosted on this Machine is not included", etcdutil.MemberNames(currentMembers)) + + v1beta2conditions.Set(machine, metav1.Condition{ + Type: controlplanev1.KubeadmControlPlaneMachineEtcdMemberHealthyV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: controlplanev1.KubeadmControlPlaneMachineEtcdMemberNotHealthyV1Beta2Reason, + Message: fmt.Sprintf("Etcd reports the cluster is composed by %s, but the etcd member hosted on this Machine is not included", etcdutil.MemberNames(currentMembers)), + }) continue } if len(member.Alarms) > 0 { @@ -137,6 +187,13 @@ func (w *Workload) updateManagedEtcdConditions(ctx context.Context, controlPlane } if len(alarmList) > 0 { conditions.MarkFalse(machine, controlplanev1.MachineEtcdMemberHealthyCondition, controlplanev1.EtcdMemberUnhealthyReason, clusterv1.ConditionSeverityError, "Etcd member reports alarms: %s", strings.Join(alarmList, ", ")) + + v1beta2conditions.Set(machine, metav1.Condition{ + Type: controlplanev1.KubeadmControlPlaneMachineEtcdMemberHealthyV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: controlplanev1.KubeadmControlPlaneMachineEtcdMemberNotHealthyV1Beta2Reason, + Message: fmt.Sprintf("Etcd reports alarms: %s", strings.Join(alarmList, ", ")), + }) continue } } @@ -147,18 +204,31 @@ func (w *Workload) updateManagedEtcdConditions(ctx context.Context, controlPlane clusterID = &member.ClusterID } if *clusterID != member.ClusterID { - conditions.MarkFalse(machine, controlplanev1.MachineEtcdMemberHealthyCondition, controlplanev1.EtcdMemberUnhealthyReason, clusterv1.ConditionSeverityError, "etcd member has cluster ID %d, but all previously seen etcd members have cluster ID %d", member.ClusterID, *clusterID) + conditions.MarkFalse(machine, controlplanev1.MachineEtcdMemberHealthyCondition, controlplanev1.EtcdMemberUnhealthyReason, clusterv1.ConditionSeverityError, "Etcd member has cluster ID %d, but all previously seen etcd members have cluster ID %d", member.ClusterID, *clusterID) + + v1beta2conditions.Set(machine, metav1.Condition{ + Type: controlplanev1.KubeadmControlPlaneMachineEtcdMemberHealthyV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: controlplanev1.KubeadmControlPlaneMachineEtcdMemberNotHealthyV1Beta2Reason, + Message: fmt.Sprintf("Etcd member has cluster ID %d, but all previously seen etcd members have cluster ID %d", member.ClusterID, *clusterID), + }) continue } conditions.MarkTrue(machine, controlplanev1.MachineEtcdMemberHealthyCondition) + + v1beta2conditions.Set(machine, metav1.Condition{ + Type: controlplanev1.KubeadmControlPlaneMachineEtcdMemberHealthyV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: controlplanev1.KubeadmControlPlaneMachineEtcdMemberHealthyV1Beta2Reason, + }) } // Make sure that the list of etcd members and machines is consistent. kcpErrors = compareMachinesAndMembers(controlPlane, members, kcpErrors) // Aggregate components error from machines at KCP level - aggregateFromMachinesToKCP(aggregateFromMachinesToKCPInput{ + aggregateConditionsFromMachinesToKCP(aggregateConditionsFromMachinesToKCPInput{ controlPlane: controlPlane, machineConditions: []clusterv1.ConditionType{controlplanev1.MachineEtcdMemberHealthyCondition}, kcpErrors: kcpErrors, @@ -167,20 +237,45 @@ func (w *Workload) updateManagedEtcdConditions(ctx context.Context, controlPlane unknownReason: controlplanev1.EtcdClusterUnknownReason, note: "etcd member", }) + + aggregateV1Beta2ConditionsFromMachinesToKCP(aggregateV1Beta2ConditionsFromMachinesToKCPInput{ + controlPlane: controlPlane, + machineConditions: []string{controlplanev1.KubeadmControlPlaneMachineEtcdMemberHealthyV1Beta2Condition}, + kcpErrors: kcpErrors, + condition: controlplanev1.KubeadmControlPlaneEtcdClusterHealthyV1Beta2Condition, + falseReason: controlplanev1.KubeadmControlPlaneEtcdClusterNotHealthyV1Beta2Reason, + unknownReason: controlplanev1.KubeadmControlPlaneEtcdClusterHealthUnknownV1Beta2Reason, + trueReason: controlplanev1.KubeadmControlPlaneEtcdClusterHealthyV1Beta2Reason, + note: "etcd member", + }) } func (w *Workload) getCurrentEtcdMembers(ctx context.Context, machine *clusterv1.Machine, nodeName string) ([]*etcd.Member, error) { // Create the etcd Client for the etcd Pod scheduled on the Node etcdClient, err := w.etcdClientGenerator.forFirstAvailableNode(ctx, []string{nodeName}) if err != nil { - conditions.MarkUnknown(machine, controlplanev1.MachineEtcdMemberHealthyCondition, controlplanev1.EtcdMemberInspectionFailedReason, "Failed to connect to the etcd pod on the %s node: %s", nodeName, err) - return nil, errors.Wrapf(err, "failed to get current etcd members: failed to connect to the etcd pod on the %s node", nodeName) + conditions.MarkUnknown(machine, controlplanev1.MachineEtcdMemberHealthyCondition, controlplanev1.EtcdMemberInspectionFailedReason, "Failed to connect to the etcd Pod on the %s Node: %s", nodeName, err) + + v1beta2conditions.Set(machine, metav1.Condition{ + Type: controlplanev1.KubeadmControlPlaneMachineEtcdMemberHealthyV1Beta2Condition, + Status: metav1.ConditionUnknown, + Reason: controlplanev1.KubeadmControlPlaneMachineEtcdMemberInspectionFailedV1Beta2Reason, + Message: fmt.Sprintf("Failed to connect to the etcd Pod on the %s Node: %s", nodeName, err), + }) + return nil, errors.Wrapf(err, "failed to get current etcd members: failed to connect to the etcd Pod on the %s Node", nodeName) } defer etcdClient.Close() // While creating a new client, forFirstAvailableNode retrieves the status for the endpoint; check if the endpoint has errors. if len(etcdClient.Errors) > 0 { conditions.MarkFalse(machine, controlplanev1.MachineEtcdMemberHealthyCondition, controlplanev1.EtcdMemberUnhealthyReason, clusterv1.ConditionSeverityError, "Etcd member status reports errors: %s", strings.Join(etcdClient.Errors, ", ")) + + v1beta2conditions.Set(machine, metav1.Condition{ + Type: controlplanev1.KubeadmControlPlaneMachineEtcdMemberHealthyV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: controlplanev1.KubeadmControlPlaneMachineEtcdMemberNotHealthyV1Beta2Reason, + Message: fmt.Sprintf("Etcd reports errors: %s", strings.Join(etcdClient.Errors, ", ")), + }) return nil, errors.Errorf("failed to get current etcd members: etcd member status reports errors: %s", strings.Join(etcdClient.Errors, ", ")) } @@ -189,8 +284,15 @@ func (w *Workload) getCurrentEtcdMembers(ctx context.Context, machine *clusterv1 if err != nil { // NB. We should never be in here, given that we just received answer to the etcd calls included in forFirstAvailableNode; // however, we are considering the calls to Members a signal of etcd not being stable. - conditions.MarkFalse(machine, controlplanev1.MachineEtcdMemberHealthyCondition, controlplanev1.EtcdMemberUnhealthyReason, clusterv1.ConditionSeverityError, "Failed get answer from the etcd member on the %s node", nodeName) - return nil, errors.Errorf("failed to get current etcd members: failed get answer from the etcd member on the %s node", nodeName) + conditions.MarkFalse(machine, controlplanev1.MachineEtcdMemberHealthyCondition, controlplanev1.EtcdMemberUnhealthyReason, clusterv1.ConditionSeverityError, "Failed to get answer from the etcd member on the %s Node", nodeName) + + v1beta2conditions.Set(machine, metav1.Condition{ + Type: controlplanev1.KubeadmControlPlaneMachineEtcdMemberHealthyV1Beta2Condition, + Status: metav1.ConditionUnknown, + Reason: controlplanev1.KubeadmControlPlaneMachineEtcdMemberInspectionFailedV1Beta2Reason, + Message: fmt.Sprintf("Failed to get answer from the etcd member on the %s Node: %s", nodeName, err.Error()), + }) + return nil, errors.Wrapf(err, "failed to get answer from the etcd member on the %s Node", nodeName) } return currentMembers, nil @@ -217,6 +319,13 @@ func compareMachinesAndMembers(controlPlane *ControlPlane, members []*etcd.Membe } if !found { conditions.MarkFalse(machine, controlplanev1.MachineEtcdMemberHealthyCondition, controlplanev1.EtcdMemberUnhealthyReason, clusterv1.ConditionSeverityError, "Missing etcd member") + + v1beta2conditions.Set(machine, metav1.Condition{ + Type: controlplanev1.KubeadmControlPlaneMachineEtcdMemberHealthyV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: controlplanev1.KubeadmControlPlaneMachineEtcdMemberNotHealthyV1Beta2Reason, + Message: fmt.Sprintf("Etcd doesn't have an etcd member for Node %s", machine.Status.NodeRef.Name), + }) } } @@ -234,7 +343,7 @@ func compareMachinesAndMembers(controlPlane *ControlPlane, members []*etcd.Membe if name == "" { name = fmt.Sprintf("%d (Name not yet assigned)", member.ID) } - kcpErrors = append(kcpErrors, fmt.Sprintf("etcd member %s does not have a corresponding machine", name)) + kcpErrors = append(kcpErrors, fmt.Sprintf("Etcd member %s does not have a corresponding Machine", name)) } } return kcpErrors @@ -253,22 +362,61 @@ func (w *Workload) UpdateStaticPodConditions(ctx context.Context, controlPlane * allMachinePodConditions = append(allMachinePodConditions, controlplanev1.MachineEtcdPodHealthyCondition) } + allMachinePodV1beta2Conditions := []string{ + controlplanev1.KubeadmControlPlaneMachineAPIServerPodHealthyV1Beta2Condition, + controlplanev1.KubeadmControlPlaneMachineControllerManagerPodHealthyV1Beta2Condition, + controlplanev1.KubeadmControlPlaneMachineSchedulerPodHealthyV1Beta2Condition, + } + if controlPlane.IsEtcdManaged() { + allMachinePodV1beta2Conditions = append(allMachinePodV1beta2Conditions, controlplanev1.KubeadmControlPlaneMachineEtcdPodHealthyV1Beta2Condition) + } + // NOTE: this fun uses control plane nodes from the workload cluster as a source of truth for the current state. + // TODO: integrate this with clustercache / handle the grace period controlPlaneNodes, err := w.getControlPlaneNodes(ctx) if err != nil { for i := range controlPlane.Machines { machine := controlPlane.Machines[i] for _, condition := range allMachinePodConditions { - conditions.MarkUnknown(machine, condition, controlplanev1.PodInspectionFailedReason, "Failed to get the node which is hosting this component: %v", err) + conditions.MarkUnknown(machine, condition, controlplanev1.PodInspectionFailedReason, "Failed to get the Node which is hosting this component: %v", err) + } + + for _, condition := range allMachinePodV1beta2Conditions { + v1beta2conditions.Set(machine, metav1.Condition{ + Type: condition, + Status: metav1.ConditionUnknown, + Reason: controlplanev1.KubeadmControlPlaneMachinePodInspectionFailedV1Beta2Reason, + Message: fmt.Sprintf("Failed to get the Node hosting the Pod: %s", err.Error()), + }) } } - conditions.MarkUnknown(controlPlane.KCP, controlplanev1.ControlPlaneComponentsHealthyCondition, controlplanev1.ControlPlaneComponentsInspectionFailedReason, "Failed to list nodes which are hosting control plane components: %v", err) + + conditions.MarkUnknown(controlPlane.KCP, controlplanev1.ControlPlaneComponentsHealthyCondition, controlplanev1.ControlPlaneComponentsInspectionFailedReason, "Failed to list Nodes which are hosting control plane components: %v", err) + + v1beta2conditions.Set(controlPlane.KCP, metav1.Condition{ + Type: controlplanev1.KubeadmControlPlaneControlPlaneComponentsHealthyV1Beta2Condition, + Status: metav1.ConditionUnknown, + Reason: controlplanev1.KubeadmControlPlaneControlPlaneComponentsInspectionFailedV1Beta2Reason, + Message: fmt.Sprintf("Failed to get Nodes hosting control plane components: %s", err.Error()), + }) return } // Update conditions for control plane components hosted as static pods on the nodes. var kcpErrors []string + provisioningMachines := controlPlane.Machines.Filter(collections.Not(collections.HasNode())) + for _, machine := range provisioningMachines { + for _, condition := range allMachinePodV1beta2Conditions { + v1beta2conditions.Set(machine, metav1.Condition{ + Type: condition, + Status: metav1.ConditionUnknown, + Reason: controlplanev1.KubeadmControlPlaneMachinePodInspectionFailedV1Beta2Reason, + Message: "Node does not exist", + }) + } + } + for _, node := range controlPlaneNodes.Items { // Search for the machine corresponding to the node. var machine *clusterv1.Machine @@ -281,12 +429,12 @@ func (w *Workload) UpdateStaticPodConditions(ctx context.Context, controlPlane * // If there is no machine corresponding to a node, determine if this is an error or not. if machine == nil { - // If there are machines still provisioning there is the chance that a chance that a node might be linked to a machine soon, + // If there are machines still provisioning there is the chance that a node might be linked to a machine soon, // otherwise report the error at KCP level given that there is no machine to report on. - if hasProvisioningMachine(controlPlane.Machines) { + if len(provisioningMachines) > 0 { continue } - kcpErrors = append(kcpErrors, fmt.Sprintf("Control plane node %s does not have a corresponding machine", node.Name)) + kcpErrors = append(kcpErrors, fmt.Sprintf("Control plane Node %s does not have a corresponding Machine", node.Name)) continue } @@ -295,6 +443,14 @@ func (w *Workload) UpdateStaticPodConditions(ctx context.Context, controlPlane * for _, condition := range allMachinePodConditions { conditions.MarkFalse(machine, condition, clusterv1.DeletingReason, clusterv1.ConditionSeverityInfo, "") } + + for _, condition := range allMachinePodV1beta2Conditions { + v1beta2conditions.Set(machine, metav1.Condition{ + Type: condition, + Status: metav1.ConditionFalse, + Reason: controlplanev1.KubeadmControlPlaneMachinePodDeletingV1Beta2Reason, + }) + } continue } @@ -305,15 +461,24 @@ func (w *Workload) UpdateStaticPodConditions(ctx context.Context, controlPlane * for _, condition := range allMachinePodConditions { conditions.MarkUnknown(machine, condition, controlplanev1.PodInspectionFailedReason, "Node is unreachable") } + + for _, condition := range allMachinePodV1beta2Conditions { + v1beta2conditions.Set(machine, metav1.Condition{ + Type: condition, + Status: metav1.ConditionUnknown, + Reason: controlplanev1.KubeadmControlPlaneMachinePodInspectionFailedV1Beta2Reason, + Message: fmt.Sprintf("Node %s is unreachable", node.Name), + }) + } continue } // Otherwise updates static pod based conditions reflecting the status of the underlying object generated by kubeadm. - w.updateStaticPodCondition(ctx, machine, node, "kube-apiserver", controlplanev1.MachineAPIServerPodHealthyCondition) - w.updateStaticPodCondition(ctx, machine, node, "kube-controller-manager", controlplanev1.MachineControllerManagerPodHealthyCondition) - w.updateStaticPodCondition(ctx, machine, node, "kube-scheduler", controlplanev1.MachineSchedulerPodHealthyCondition) + w.updateStaticPodCondition(ctx, machine, node, "kube-apiserver", controlplanev1.MachineAPIServerPodHealthyCondition, controlplanev1.KubeadmControlPlaneMachineAPIServerPodHealthyV1Beta2Condition) + w.updateStaticPodCondition(ctx, machine, node, "kube-controller-manager", controlplanev1.MachineControllerManagerPodHealthyCondition, controlplanev1.KubeadmControlPlaneMachineControllerManagerPodHealthyV1Beta2Condition) + w.updateStaticPodCondition(ctx, machine, node, "kube-scheduler", controlplanev1.MachineSchedulerPodHealthyCondition, controlplanev1.KubeadmControlPlaneMachineSchedulerPodHealthyV1Beta2Condition) if controlPlane.IsEtcdManaged() { - w.updateStaticPodCondition(ctx, machine, node, "etcd", controlplanev1.MachineEtcdPodHealthyCondition) + w.updateStaticPodCondition(ctx, machine, node, "etcd", controlplanev1.MachineEtcdPodHealthyCondition, controlplanev1.KubeadmControlPlaneMachineEtcdPodHealthyV1Beta2Condition) } } @@ -332,13 +497,22 @@ func (w *Workload) UpdateStaticPodConditions(ctx context.Context, controlPlane * } if !found { for _, condition := range allMachinePodConditions { - conditions.MarkFalse(machine, condition, controlplanev1.PodFailedReason, clusterv1.ConditionSeverityError, "Missing node") + conditions.MarkFalse(machine, condition, controlplanev1.PodFailedReason, clusterv1.ConditionSeverityError, "Missing Node") + } + + for _, condition := range allMachinePodV1beta2Conditions { + v1beta2conditions.Set(machine, metav1.Condition{ + Type: condition, + Status: metav1.ConditionUnknown, + Reason: controlplanev1.KubeadmControlPlaneMachinePodInspectionFailedV1Beta2Reason, + Message: fmt.Sprintf("Node %s does not exist", machine.Status.NodeRef.Name), + }) } } } // Aggregate components error from machines at KCP level. - aggregateFromMachinesToKCP(aggregateFromMachinesToKCPInput{ + aggregateConditionsFromMachinesToKCP(aggregateConditionsFromMachinesToKCPInput{ controlPlane: controlPlane, machineConditions: allMachinePodConditions, kcpErrors: kcpErrors, @@ -347,15 +521,17 @@ func (w *Workload) UpdateStaticPodConditions(ctx context.Context, controlPlane * unknownReason: controlplanev1.ControlPlaneComponentsUnknownReason, note: "control plane", }) -} -func hasProvisioningMachine(machines collections.Machines) bool { - for _, machine := range machines { - if machine.Status.NodeRef == nil { - return true - } - } - return false + aggregateV1Beta2ConditionsFromMachinesToKCP(aggregateV1Beta2ConditionsFromMachinesToKCPInput{ + controlPlane: controlPlane, + machineConditions: allMachinePodV1beta2Conditions, + kcpErrors: kcpErrors, + condition: controlplanev1.KubeadmControlPlaneControlPlaneComponentsHealthyV1Beta2Condition, + falseReason: controlplanev1.KubeadmControlPlaneControlPlaneComponentsNotHealthyV1Beta2Reason, + unknownReason: controlplanev1.KubeadmControlPlaneControlPlaneComponentsHealthUnknownV1Beta2Reason, + trueReason: controlplanev1.KubeadmControlPlaneControlPlaneComponentsHealthyV1Beta2Reason, + note: "control plane", + }) } // nodeHasUnreachableTaint returns true if the node has is unreachable from the node controller. @@ -371,11 +547,20 @@ func nodeHasUnreachableTaint(node corev1.Node) bool { // updateStaticPodCondition is responsible for updating machine conditions reflecting the status of a component running // in a static pod generated by kubeadm. This operation is best effort, in the sense that in case of problems // in retrieving the pod status, it sets the condition to Unknown state without returning any error. -func (w *Workload) updateStaticPodCondition(ctx context.Context, machine *clusterv1.Machine, node corev1.Node, component string, staticPodCondition clusterv1.ConditionType) { +func (w *Workload) updateStaticPodCondition(ctx context.Context, machine *clusterv1.Machine, node corev1.Node, component string, staticPodCondition clusterv1.ConditionType, staticPodV1beta2Condition string) { + log := ctrl.LoggerFrom(ctx) + // If node ready is unknown there is a good chance that kubelet is not updating mirror pods, so we consider pod status // to be unknown as well without further investigations. if nodeReadyUnknown(node) { - conditions.MarkUnknown(machine, staticPodCondition, controlplanev1.PodInspectionFailedReason, "Node Ready condition is unknown, pod data might be stale") + conditions.MarkUnknown(machine, staticPodCondition, controlplanev1.PodInspectionFailedReason, "Node Ready condition is Unknown, Pod data might be stale") + + v1beta2conditions.Set(machine, metav1.Condition{ + Type: staticPodV1beta2Condition, + Status: metav1.ConditionUnknown, + Reason: controlplanev1.KubeadmControlPlaneMachinePodInspectionFailedV1Beta2Reason, + Message: "Node Ready condition is Unknown, Pod data might be stale", + }) return } @@ -389,9 +574,25 @@ func (w *Workload) updateStaticPodCondition(ctx context.Context, machine *cluste // If there is an error getting the Pod, do not set any conditions. if apierrors.IsNotFound(err) { conditions.MarkFalse(machine, staticPodCondition, controlplanev1.PodMissingReason, clusterv1.ConditionSeverityError, "Pod %s is missing", podKey.Name) + + v1beta2conditions.Set(machine, metav1.Condition{ + Type: staticPodV1beta2Condition, + Status: metav1.ConditionFalse, + Reason: controlplanev1.KubeadmControlPlaneMachinePodDoesNotExistV1Beta2Reason, + Message: fmt.Sprintf("Pod %s does not exist", podKey.Name), + }) return } - conditions.MarkUnknown(machine, staticPodCondition, controlplanev1.PodInspectionFailedReason, "Failed to get pod status") + conditions.MarkUnknown(machine, staticPodCondition, controlplanev1.PodInspectionFailedReason, "Failed to get Pod status") + + v1beta2conditions.Set(machine, metav1.Condition{ + Type: staticPodV1beta2Condition, + Status: metav1.ConditionUnknown, + Reason: controlplanev1.KubeadmControlPlaneMachinePodInspectionFailedV1Beta2Reason, + Message: "Please check controller logs for errors", + }) + + log.Error(err, fmt.Sprintf("Failed to get Pod %s", klog.KRef(podKey.Namespace, podKey.Name))) return } @@ -404,6 +605,13 @@ func (w *Workload) updateStaticPodCondition(ctx context.Context, machine *cluste // NOTE: This should never happen for static pods, however this check is implemented for completeness. if podCondition(pod, corev1.PodScheduled) != corev1.ConditionTrue { conditions.MarkFalse(machine, staticPodCondition, controlplanev1.PodProvisioningReason, clusterv1.ConditionSeverityInfo, "Waiting to be scheduled") + + v1beta2conditions.Set(machine, metav1.Condition{ + Type: staticPodV1beta2Condition, + Status: metav1.ConditionFalse, + Reason: controlplanev1.KubeadmControlPlaneMachinePodProvisioningV1Beta2Reason, + Message: "Waiting to be scheduled", + }) return } @@ -411,11 +619,24 @@ func (w *Workload) updateStaticPodCondition(ctx context.Context, machine *cluste // NOTE: As of today there are not init containers in static pods generated by kubeadm, however this check is implemented for completeness. if podCondition(pod, corev1.PodInitialized) != corev1.ConditionTrue { conditions.MarkFalse(machine, staticPodCondition, controlplanev1.PodProvisioningReason, clusterv1.ConditionSeverityInfo, "Running init containers") + + v1beta2conditions.Set(machine, metav1.Condition{ + Type: staticPodV1beta2Condition, + Status: metav1.ConditionFalse, + Reason: controlplanev1.KubeadmControlPlaneMachinePodProvisioningV1Beta2Reason, + Message: "Running init containers", + }) return } // If there are no error from containers, report provisioning without further details. conditions.MarkFalse(machine, staticPodCondition, controlplanev1.PodProvisioningReason, clusterv1.ConditionSeverityInfo, "") + + v1beta2conditions.Set(machine, metav1.Condition{ + Type: staticPodV1beta2Condition, + Status: metav1.ConditionFalse, + Reason: controlplanev1.KubeadmControlPlaneMachinePodProvisioningV1Beta2Reason, + }) case corev1.PodRunning: // PodRunning means the pod has been bound to a node and all of the containers have been started. // At least one container is still running or is in the process of being restarted. @@ -425,6 +646,12 @@ func (w *Workload) updateStaticPodCondition(ctx context.Context, machine *cluste // PodReady condition means the pod is able to service requests if podCondition(pod, corev1.PodReady) == corev1.ConditionTrue { conditions.MarkTrue(machine, staticPodCondition) + + v1beta2conditions.Set(machine, metav1.Condition{ + Type: staticPodV1beta2Condition, + Status: metav1.ConditionTrue, + Reason: controlplanev1.KubeadmControlPlaneMachinePodRunningV1Beta2Reason, + }) return } @@ -444,11 +671,25 @@ func (w *Workload) updateStaticPodCondition(ctx context.Context, machine *cluste if len(containerWaitingMessages) > 0 { if terminatedWithError { conditions.MarkFalse(machine, staticPodCondition, controlplanev1.PodFailedReason, clusterv1.ConditionSeverityError, strings.Join(containerWaitingMessages, ", ")) + + v1beta2conditions.Set(machine, metav1.Condition{ + Type: staticPodV1beta2Condition, + Status: metav1.ConditionFalse, + Reason: controlplanev1.KubeadmControlPlaneMachinePodFailedV1Beta2Reason, + Message: strings.Join(containerWaitingMessages, ", "), + }) return } // Note: Some error cases cannot be caught when container state == "Waiting", // e.g., "waiting.reason: ErrImagePull" is an error, but since LastTerminationState does not exist, this cannot be differentiated from "PodProvisioningReason" conditions.MarkFalse(machine, staticPodCondition, controlplanev1.PodProvisioningReason, clusterv1.ConditionSeverityInfo, strings.Join(containerWaitingMessages, ", ")) + + v1beta2conditions.Set(machine, metav1.Condition{ + Type: staticPodV1beta2Condition, + Status: metav1.ConditionFalse, + Reason: controlplanev1.KubeadmControlPlaneMachinePodProvisioningV1Beta2Reason, + Message: strings.Join(containerWaitingMessages, ", "), + }) return } @@ -461,26 +702,61 @@ func (w *Workload) updateStaticPodCondition(ctx context.Context, machine *cluste } if len(containerTerminatedMessages) > 0 { conditions.MarkFalse(machine, staticPodCondition, controlplanev1.PodFailedReason, clusterv1.ConditionSeverityError, strings.Join(containerTerminatedMessages, ", ")) + + v1beta2conditions.Set(machine, metav1.Condition{ + Type: staticPodV1beta2Condition, + Status: metav1.ConditionFalse, + Reason: controlplanev1.KubeadmControlPlaneMachinePodFailedV1Beta2Reason, + Message: strings.Join(containerTerminatedMessages, ", "), + }) return } // If the pod is not yet ready, most probably it is waiting for startup or readiness probes. // Report this as part of the provisioning process because the corresponding control plane component is not ready yet. conditions.MarkFalse(machine, staticPodCondition, controlplanev1.PodProvisioningReason, clusterv1.ConditionSeverityInfo, "Waiting for startup or readiness probes") + + v1beta2conditions.Set(machine, metav1.Condition{ + Type: staticPodV1beta2Condition, + Status: metav1.ConditionFalse, + Reason: controlplanev1.KubeadmControlPlaneMachinePodProvisioningV1Beta2Reason, + Message: "Waiting for startup or readiness probes", + }) case corev1.PodSucceeded: // PodSucceeded means that all containers in the pod have voluntarily terminated // with a container exit code of 0, and the system is not going to restart any of these containers. // NOTE: This should never happen for the static pods running control plane components. conditions.MarkFalse(machine, staticPodCondition, controlplanev1.PodFailedReason, clusterv1.ConditionSeverityError, "All the containers have been terminated") + + v1beta2conditions.Set(machine, metav1.Condition{ + Type: staticPodV1beta2Condition, + Status: metav1.ConditionFalse, + Reason: controlplanev1.KubeadmControlPlaneMachinePodFailedV1Beta2Reason, + Message: "All the containers have been terminated", + }) case corev1.PodFailed: // PodFailed means that all containers in the pod have terminated, and at least one container has // terminated in a failure (exited with a non-zero exit code or was stopped by the system). // NOTE: This should never happen for the static pods running control plane components. conditions.MarkFalse(machine, staticPodCondition, controlplanev1.PodFailedReason, clusterv1.ConditionSeverityError, "All the containers have been terminated") + + v1beta2conditions.Set(machine, metav1.Condition{ + Type: staticPodV1beta2Condition, + Status: metav1.ConditionFalse, + Reason: controlplanev1.KubeadmControlPlaneMachinePodFailedV1Beta2Reason, + Message: "All the containers have been terminated", + }) case corev1.PodUnknown: // PodUnknown means that for some reason the state of the pod could not be obtained, typically due // to an error in communicating with the host of the pod. - conditions.MarkUnknown(machine, staticPodCondition, controlplanev1.PodInspectionFailedReason, "Pod is reporting unknown status") + conditions.MarkUnknown(machine, staticPodCondition, controlplanev1.PodInspectionFailedReason, "Pod is reporting Unknown status") + + v1beta2conditions.Set(machine, metav1.Condition{ + Type: staticPodV1beta2Condition, + Status: metav1.ConditionUnknown, + Reason: controlplanev1.KubeadmControlPlaneMachinePodInspectionFailedV1Beta2Reason, + Message: "Pod is reporting Unknown status", + }) } } @@ -502,7 +778,7 @@ func podCondition(pod corev1.Pod, condition corev1.PodConditionType) corev1.Cond return corev1.ConditionUnknown } -type aggregateFromMachinesToKCPInput struct { +type aggregateConditionsFromMachinesToKCPInput struct { controlPlane *ControlPlane machineConditions []clusterv1.ConditionType kcpErrors []string @@ -512,10 +788,10 @@ type aggregateFromMachinesToKCPInput struct { note string } -// aggregateFromMachinesToKCP aggregates a group of conditions from machines to KCP. +// aggregateConditionsFromMachinesToKCP aggregates a group of conditions from machines to KCP. // NOTE: this func follows the same aggregation rules used by conditions.Merge thus giving priority to // errors, then warning, info down to unknown. -func aggregateFromMachinesToKCP(input aggregateFromMachinesToKCPInput) { +func aggregateConditionsFromMachinesToKCP(input aggregateConditionsFromMachinesToKCPInput) { // Aggregates machines for condition status. // NB. A machine could be assigned to many groups, but only the group with the highest severity will be reported. kcpMachinesWithErrors := sets.Set[string]{} @@ -549,7 +825,7 @@ func aggregateFromMachinesToKCP(input aggregateFromMachinesToKCPInput) { // In case of at least one machine with errors or KCP level errors (nodes without machines), report false, error. if len(kcpMachinesWithErrors) > 0 { - input.kcpErrors = append(input.kcpErrors, fmt.Sprintf("Following machines are reporting %s errors: %s", input.note, strings.Join(sets.List(kcpMachinesWithErrors), ", "))) + input.kcpErrors = append(input.kcpErrors, fmt.Sprintf("Following Machines are reporting %s errors: %s", input.note, strings.Join(sets.List(kcpMachinesWithErrors), ", "))) } if len(input.kcpErrors) > 0 { conditions.MarkFalse(input.controlPlane.KCP, input.condition, input.unhealthyReason, clusterv1.ConditionSeverityError, strings.Join(input.kcpErrors, "; ")) @@ -558,13 +834,13 @@ func aggregateFromMachinesToKCP(input aggregateFromMachinesToKCPInput) { // In case of no errors and at least one machine with warnings, report false, warnings. if len(kcpMachinesWithWarnings) > 0 { - conditions.MarkFalse(input.controlPlane.KCP, input.condition, input.unhealthyReason, clusterv1.ConditionSeverityWarning, "Following machines are reporting %s warnings: %s", input.note, strings.Join(sets.List(kcpMachinesWithWarnings), ", ")) + conditions.MarkFalse(input.controlPlane.KCP, input.condition, input.unhealthyReason, clusterv1.ConditionSeverityWarning, "Following Machines are reporting %s warnings: %s", input.note, strings.Join(sets.List(kcpMachinesWithWarnings), ", ")) return } // In case of no errors, no warning, and at least one machine with info, report false, info. if len(kcpMachinesWithInfo) > 0 { - conditions.MarkFalse(input.controlPlane.KCP, input.condition, input.unhealthyReason, clusterv1.ConditionSeverityInfo, "Following machines are reporting %s info: %s", input.note, strings.Join(sets.List(kcpMachinesWithInfo), ", ")) + conditions.MarkFalse(input.controlPlane.KCP, input.condition, input.unhealthyReason, clusterv1.ConditionSeverityInfo, "Following Machines are reporting %s info: %s", input.note, strings.Join(sets.List(kcpMachinesWithInfo), ", ")) return } @@ -576,10 +852,92 @@ func aggregateFromMachinesToKCP(input aggregateFromMachinesToKCPInput) { // Otherwise, if there is at least one machine with unknown, report unknown. if len(kcpMachinesWithUnknown) > 0 { - conditions.MarkUnknown(input.controlPlane.KCP, input.condition, input.unknownReason, "Following machines are reporting unknown %s status: %s", input.note, strings.Join(sets.List(kcpMachinesWithUnknown), ", ")) + conditions.MarkUnknown(input.controlPlane.KCP, input.condition, input.unknownReason, "Following Machines are reporting unknown %s status: %s", input.note, strings.Join(sets.List(kcpMachinesWithUnknown), ", ")) return } // This last case should happen only if there are no provisioned machines, and thus without conditions. // So there will be no condition at KCP level too. } + +type aggregateV1Beta2ConditionsFromMachinesToKCPInput struct { + controlPlane *ControlPlane + machineConditions []string + kcpErrors []string + condition string + trueReason string + unknownReason string + falseReason string + note string +} + +// aggregateV1Beta2ConditionsFromMachinesToKCP aggregates a group of conditions from machines to KCP. +// Note: the aggregation is computed in way that is similar to how v1beta2conditions.NewAggregateCondition works, but in this case the +// implementation is simpler/less flexible and it surfaces only issues & unknown conditions. +func aggregateV1Beta2ConditionsFromMachinesToKCP(input aggregateV1Beta2ConditionsFromMachinesToKCPInput) { + // Aggregates machines for condition status. + // NB. A machine could be assigned to many groups, but only the group with the highest severity will be reported. + kcpMachinesWithErrors := sets.Set[string]{} + kcpMachinesWithUnknown := sets.Set[string]{} + kcpMachinesWithInfo := sets.Set[string]{} + + for i := range input.controlPlane.Machines { + machine := input.controlPlane.Machines[i] + for _, condition := range input.machineConditions { + if machineCondition := v1beta2conditions.Get(machine, condition); machineCondition != nil { + switch machineCondition.Status { + case metav1.ConditionTrue: + kcpMachinesWithInfo.Insert(machine.Name) + case metav1.ConditionFalse: + kcpMachinesWithErrors.Insert(machine.Name) + case metav1.ConditionUnknown: + kcpMachinesWithUnknown.Insert(machine.Name) + } + } + } + } + + // In case of at least one machine with errors or KCP level errors (nodes without machines), report false. + if len(input.kcpErrors) > 0 || len(kcpMachinesWithErrors) > 0 { + messages := input.kcpErrors + if len(kcpMachinesWithErrors) > 0 { + messages = append(messages, fmt.Sprintf("Following Machines are reporting %s errors: %s", input.note, strings.Join(sets.List(kcpMachinesWithErrors), ", "))) + } + v1beta2conditions.Set(input.controlPlane.KCP, metav1.Condition{ + Type: input.condition, + Status: metav1.ConditionFalse, + Reason: input.falseReason, + Message: strings.Join(messages, ", "), + }) + return + } + + // Otherwise, if there is at least one machine with unknown, report unknown. + if len(kcpMachinesWithUnknown) > 0 { + v1beta2conditions.Set(input.controlPlane.KCP, metav1.Condition{ + Type: input.condition, + Status: metav1.ConditionUnknown, + Reason: input.unknownReason, + Message: fmt.Sprintf("Following Machines are reporting %s unknown: %s", input.note, strings.Join(sets.List(kcpMachinesWithUnknown), ", ")), + }) + return + } + + // In case of no errors, no unknown, and at least one machine with info, report true. + if len(kcpMachinesWithInfo) > 0 { + v1beta2conditions.Set(input.controlPlane.KCP, metav1.Condition{ + Type: input.condition, + Status: metav1.ConditionTrue, + Reason: input.trueReason, + }) + return + } + + // This last case should happen only if there are no provisioned machines. + v1beta2conditions.Set(input.controlPlane.KCP, metav1.Condition{ + Type: input.condition, + Status: metav1.ConditionUnknown, + Reason: input.unknownReason, + Message: fmt.Sprintf("No Machines reporting %s status", input.note), + }) +} diff --git a/controlplane/kubeadm/internal/workload_cluster_conditions_test.go b/controlplane/kubeadm/internal/workload_cluster_conditions_test.go index 7f3f5e3f365c..e79c538089f2 100644 --- a/controlplane/kubeadm/internal/workload_cluster_conditions_test.go +++ b/controlplane/kubeadm/internal/workload_cluster_conditions_test.go @@ -37,17 +37,20 @@ import ( fake2 "sigs.k8s.io/cluster-api/controlplane/kubeadm/internal/etcd/fake" "sigs.k8s.io/cluster-api/util/collections" "sigs.k8s.io/cluster-api/util/conditions" + v1beta2conditions "sigs.k8s.io/cluster-api/util/conditions/v1beta2" ) func TestUpdateEtcdConditions(t *testing.T) { tests := []struct { - name string - kcp *controlplanev1.KubeadmControlPlane - machines []*clusterv1.Machine - injectClient client.Client // This test is injecting a fake client because it is required to create nodes with a controlled Status or to fail with a specific error. - injectEtcdClientGenerator etcdClientFor // This test is injecting a fake etcdClientGenerator because it is required to nodes with a controlled Status or to fail with a specific error. - expectedKCPCondition *clusterv1.Condition - expectedMachineConditions map[string]clusterv1.Conditions + name string + kcp *controlplanev1.KubeadmControlPlane + machines []*clusterv1.Machine + injectClient client.Client // This test is injecting a fake client because it is required to create nodes with a controlled Status or to fail with a specific error. + injectEtcdClientGenerator etcdClientFor // This test is injecting a fake etcdClientGenerator because it is required to nodes with a controlled Status or to fail with a specific error. + expectedKCPCondition *clusterv1.Condition + expectedKCPV1Beta2Condition *metav1.Condition + expectedMachineConditions map[string]clusterv1.Conditions + expectedMachineV1Beta2Conditions map[string][]metav1.Condition }{ { name: "if list nodes return an error should report all the conditions Unknown", @@ -55,17 +58,28 @@ func TestUpdateEtcdConditions(t *testing.T) { fakeMachine("m1"), }, injectClient: &fakeClient{ - listErr: errors.New("failed to list nodes"), + listErr: errors.New("failed to list Nodes"), }, - expectedKCPCondition: conditions.UnknownCondition(controlplanev1.EtcdClusterHealthyCondition, controlplanev1.EtcdClusterInspectionFailedReason, "Failed to list nodes which are hosting the etcd members"), + expectedKCPCondition: conditions.UnknownCondition(controlplanev1.EtcdClusterHealthyCondition, controlplanev1.EtcdClusterInspectionFailedReason, "Failed to list Nodes which are hosting the etcd members"), expectedMachineConditions: map[string]clusterv1.Conditions{ "m1": { - *conditions.UnknownCondition(controlplanev1.MachineEtcdMemberHealthyCondition, controlplanev1.EtcdMemberInspectionFailedReason, "Failed to get the node which is hosting the etcd member"), + *conditions.UnknownCondition(controlplanev1.MachineEtcdMemberHealthyCondition, controlplanev1.EtcdMemberInspectionFailedReason, "Failed to get the Node which is hosting the etcd member"), + }, + }, + expectedKCPV1Beta2Condition: &metav1.Condition{ + Type: controlplanev1.KubeadmControlPlaneEtcdClusterHealthyV1Beta2Condition, + Status: metav1.ConditionUnknown, + Reason: controlplanev1.KubeadmControlPlaneEtcdClusterInspectionFailedV1Beta2Reason, + Message: "Failed to get Nodes hosting the etcd cluster", + }, + expectedMachineV1Beta2Conditions: map[string][]metav1.Condition{ + "m1": { + {Type: controlplanev1.KubeadmControlPlaneMachineEtcdMemberHealthyV1Beta2Condition, Status: metav1.ConditionUnknown, Reason: controlplanev1.KubeadmControlPlaneMachineEtcdMemberInspectionFailedV1Beta2Reason, Message: "Failed to get the Node hosting the etcd member"}, }, }, }, { - name: "node without machine should be ignored if there are provisioning machines", + name: "If there are provisioning machines, a node without machine should be ignored in v1beta1, reported in v1beta2", machines: []*clusterv1.Machine{ fakeMachine("m1"), // without NodeRef (provisioning) }, @@ -78,16 +92,33 @@ func TestUpdateEtcdConditions(t *testing.T) { expectedMachineConditions: map[string]clusterv1.Conditions{ "m1": {}, }, + expectedKCPV1Beta2Condition: &metav1.Condition{ + Type: controlplanev1.KubeadmControlPlaneEtcdClusterHealthyV1Beta2Condition, + Status: metav1.ConditionUnknown, + Reason: controlplanev1.KubeadmControlPlaneEtcdClusterHealthUnknownV1Beta2Reason, + Message: "Following Machines are reporting etcd member unknown: m1", + }, + expectedMachineV1Beta2Conditions: map[string][]metav1.Condition{ + "m1": { + {Type: controlplanev1.KubeadmControlPlaneMachineEtcdMemberHealthyV1Beta2Condition, Status: metav1.ConditionUnknown, Reason: controlplanev1.KubeadmControlPlaneMachineEtcdMemberInspectionFailedV1Beta2Reason, Message: "Node does not exist"}, + }, + }, }, { - name: "node without machine should report a problem at KCP level if there are no provisioning machines", + name: "If there are no provisioning machines, a node without machine should be reported as False condition at KCP level", machines: []*clusterv1.Machine{}, injectClient: &fakeClient{ list: &corev1.NodeList{ Items: []corev1.Node{*fakeNode("n1")}, }, }, - expectedKCPCondition: conditions.FalseCondition(controlplanev1.EtcdClusterHealthyCondition, controlplanev1.EtcdClusterUnhealthyReason, clusterv1.ConditionSeverityError, "Control plane node %s does not have a corresponding machine", "n1"), + expectedKCPCondition: conditions.FalseCondition(controlplanev1.EtcdClusterHealthyCondition, controlplanev1.EtcdClusterUnhealthyReason, clusterv1.ConditionSeverityError, "Control plane Node %s does not have a corresponding Machine", "n1"), + expectedKCPV1Beta2Condition: &metav1.Condition{ + Type: controlplanev1.KubeadmControlPlaneEtcdClusterHealthyV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: controlplanev1.KubeadmControlPlaneEtcdClusterNotHealthyV1Beta2Reason, + Message: "Control plane Node n1 does not have a corresponding Machine", + }, }, { name: "failure creating the etcd client should report unknown condition", @@ -102,10 +133,21 @@ func TestUpdateEtcdConditions(t *testing.T) { injectEtcdClientGenerator: &fakeEtcdClientGenerator{ forNodesErr: errors.New("failed to get client for node"), }, - expectedKCPCondition: conditions.UnknownCondition(controlplanev1.EtcdClusterHealthyCondition, controlplanev1.EtcdClusterUnknownReason, "Following machines are reporting unknown etcd member status: m1"), + expectedKCPCondition: conditions.UnknownCondition(controlplanev1.EtcdClusterHealthyCondition, controlplanev1.EtcdClusterUnknownReason, "Following Machines are reporting unknown etcd member status: m1"), expectedMachineConditions: map[string]clusterv1.Conditions{ "m1": { - *conditions.UnknownCondition(controlplanev1.MachineEtcdMemberHealthyCondition, controlplanev1.EtcdMemberInspectionFailedReason, "Failed to connect to the etcd pod on the %s node: failed to get client for node", "n1"), + *conditions.UnknownCondition(controlplanev1.MachineEtcdMemberHealthyCondition, controlplanev1.EtcdMemberInspectionFailedReason, "Failed to connect to the etcd Pod on the %s Node: failed to get client for node", "n1"), + }, + }, + expectedKCPV1Beta2Condition: &metav1.Condition{ + Type: controlplanev1.KubeadmControlPlaneEtcdClusterHealthyV1Beta2Condition, + Status: metav1.ConditionUnknown, + Reason: controlplanev1.KubeadmControlPlaneEtcdClusterHealthUnknownV1Beta2Reason, + Message: "Following Machines are reporting etcd member unknown: m1", + }, + expectedMachineV1Beta2Conditions: map[string][]metav1.Condition{ + "m1": { + {Type: controlplanev1.KubeadmControlPlaneMachineEtcdMemberHealthyV1Beta2Condition, Status: metav1.ConditionUnknown, Reason: controlplanev1.KubeadmControlPlaneMachineEtcdMemberInspectionFailedV1Beta2Reason, Message: "Failed to connect to the etcd Pod on the n1 Node: failed to get client for node"}, }, }, }, @@ -127,15 +169,26 @@ func TestUpdateEtcdConditions(t *testing.T) { Errors: []string{"some errors"}, }, }, - expectedKCPCondition: conditions.FalseCondition(controlplanev1.EtcdClusterHealthyCondition, controlplanev1.EtcdClusterUnhealthyReason, clusterv1.ConditionSeverityError, "Following machines are reporting etcd member errors: %s", "m1"), + expectedKCPCondition: conditions.FalseCondition(controlplanev1.EtcdClusterHealthyCondition, controlplanev1.EtcdClusterUnhealthyReason, clusterv1.ConditionSeverityError, "Following Machines are reporting etcd member errors: %s", "m1"), expectedMachineConditions: map[string]clusterv1.Conditions{ "m1": { *conditions.FalseCondition(controlplanev1.MachineEtcdMemberHealthyCondition, controlplanev1.EtcdMemberUnhealthyReason, clusterv1.ConditionSeverityError, "Etcd member status reports errors: %s", "some errors"), }, }, + expectedKCPV1Beta2Condition: &metav1.Condition{ + Type: controlplanev1.KubeadmControlPlaneEtcdClusterHealthyV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: controlplanev1.KubeadmControlPlaneEtcdClusterNotHealthyV1Beta2Reason, + Message: "Following Machines are reporting etcd member errors: m1", + }, + expectedMachineV1Beta2Conditions: map[string][]metav1.Condition{ + "m1": { + {Type: controlplanev1.KubeadmControlPlaneMachineEtcdMemberHealthyV1Beta2Condition, Status: metav1.ConditionFalse, Reason: controlplanev1.KubeadmControlPlaneMachineEtcdMemberNotHealthyV1Beta2Reason, Message: "Etcd reports errors: some errors"}, + }, + }, }, { - name: "failure listing members should report false condition", + name: "failure listing members should report false condition in v1beta1, unknown in v1beta2", machines: []*clusterv1.Machine{ fakeMachine("m1", withNodeRef("n1")), }, @@ -152,10 +205,21 @@ func TestUpdateEtcdConditions(t *testing.T) { }, }, }, - expectedKCPCondition: conditions.FalseCondition(controlplanev1.EtcdClusterHealthyCondition, controlplanev1.EtcdClusterUnhealthyReason, clusterv1.ConditionSeverityError, "Following machines are reporting etcd member errors: %s", "m1"), + expectedKCPCondition: conditions.FalseCondition(controlplanev1.EtcdClusterHealthyCondition, controlplanev1.EtcdClusterUnhealthyReason, clusterv1.ConditionSeverityError, "Following Machines are reporting etcd member errors: %s", "m1"), expectedMachineConditions: map[string]clusterv1.Conditions{ "m1": { - *conditions.FalseCondition(controlplanev1.MachineEtcdMemberHealthyCondition, controlplanev1.EtcdMemberUnhealthyReason, clusterv1.ConditionSeverityError, "Failed get answer from the etcd member on the %s node", "n1"), + *conditions.FalseCondition(controlplanev1.MachineEtcdMemberHealthyCondition, controlplanev1.EtcdMemberUnhealthyReason, clusterv1.ConditionSeverityError, "Failed to get answer from the etcd member on the %s Node", "n1"), + }, + }, + expectedKCPV1Beta2Condition: &metav1.Condition{ + Type: controlplanev1.KubeadmControlPlaneEtcdClusterHealthyV1Beta2Condition, + Status: metav1.ConditionUnknown, + Reason: controlplanev1.KubeadmControlPlaneEtcdClusterHealthUnknownV1Beta2Reason, + Message: "Following Machines are reporting etcd member unknown: m1", + }, + expectedMachineV1Beta2Conditions: map[string][]metav1.Condition{ + "m1": { + {Type: controlplanev1.KubeadmControlPlaneMachineEtcdMemberHealthyV1Beta2Condition, Status: metav1.ConditionUnknown, Reason: controlplanev1.KubeadmControlPlaneMachineEtcdMemberInspectionFailedV1Beta2Reason, Message: "Failed to get answer from the etcd member on the n1 Node: failed to get list of members for etcd cluster: failed to list members"}, }, }, }, @@ -186,12 +250,23 @@ func TestUpdateEtcdConditions(t *testing.T) { }, }, }, - expectedKCPCondition: conditions.FalseCondition(controlplanev1.EtcdClusterHealthyCondition, controlplanev1.EtcdClusterUnhealthyReason, clusterv1.ConditionSeverityError, "Following machines are reporting etcd member errors: %s", "m1"), + expectedKCPCondition: conditions.FalseCondition(controlplanev1.EtcdClusterHealthyCondition, controlplanev1.EtcdClusterUnhealthyReason, clusterv1.ConditionSeverityError, "Following Machines are reporting etcd member errors: %s", "m1"), expectedMachineConditions: map[string]clusterv1.Conditions{ "m1": { *conditions.FalseCondition(controlplanev1.MachineEtcdMemberHealthyCondition, controlplanev1.EtcdMemberUnhealthyReason, clusterv1.ConditionSeverityError, "Etcd member reports alarms: %s", "NOSPACE"), }, }, + expectedKCPV1Beta2Condition: &metav1.Condition{ + Type: controlplanev1.KubeadmControlPlaneEtcdClusterHealthyV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: controlplanev1.KubeadmControlPlaneEtcdClusterNotHealthyV1Beta2Reason, + Message: "Following Machines are reporting etcd member errors: m1", + }, + expectedMachineV1Beta2Conditions: map[string][]metav1.Condition{ + "m1": { + {Type: controlplanev1.KubeadmControlPlaneMachineEtcdMemberHealthyV1Beta2Condition, Status: metav1.ConditionFalse, Reason: controlplanev1.KubeadmControlPlaneMachineEtcdMemberNotHealthyV1Beta2Reason, Message: "Etcd reports alarms: NOSPACE"}, + }, + }, }, { name: "etcd members with different Cluster ID should report false condition", @@ -251,13 +326,27 @@ func TestUpdateEtcdConditions(t *testing.T) { } }, }, - expectedKCPCondition: conditions.FalseCondition(controlplanev1.EtcdClusterHealthyCondition, controlplanev1.EtcdClusterUnhealthyReason, clusterv1.ConditionSeverityError, "Following machines are reporting etcd member errors: %s", "m2"), + expectedKCPCondition: conditions.FalseCondition(controlplanev1.EtcdClusterHealthyCondition, controlplanev1.EtcdClusterUnhealthyReason, clusterv1.ConditionSeverityError, "Following Machines are reporting etcd member errors: %s", "m2"), expectedMachineConditions: map[string]clusterv1.Conditions{ "m1": { *conditions.TrueCondition(controlplanev1.MachineEtcdMemberHealthyCondition), }, "m2": { - *conditions.FalseCondition(controlplanev1.MachineEtcdMemberHealthyCondition, controlplanev1.EtcdMemberUnhealthyReason, clusterv1.ConditionSeverityError, "etcd member has cluster ID %d, but all previously seen etcd members have cluster ID %d", uint64(2), uint64(1)), + *conditions.FalseCondition(controlplanev1.MachineEtcdMemberHealthyCondition, controlplanev1.EtcdMemberUnhealthyReason, clusterv1.ConditionSeverityError, "Etcd member has cluster ID %d, but all previously seen etcd members have cluster ID %d", uint64(2), uint64(1)), + }, + }, + expectedKCPV1Beta2Condition: &metav1.Condition{ + Type: controlplanev1.KubeadmControlPlaneEtcdClusterHealthyV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: controlplanev1.KubeadmControlPlaneEtcdClusterNotHealthyV1Beta2Reason, + Message: "Following Machines are reporting etcd member errors: m2", + }, + expectedMachineV1Beta2Conditions: map[string][]metav1.Condition{ + "m1": { + {Type: controlplanev1.KubeadmControlPlaneMachineEtcdMemberHealthyV1Beta2Condition, Status: metav1.ConditionTrue, Reason: controlplanev1.KubeadmControlPlaneMachineEtcdMemberHealthyV1Beta2Reason, Message: ""}, + }, + "m2": { + {Type: controlplanev1.KubeadmControlPlaneMachineEtcdMemberHealthyV1Beta2Condition, Status: metav1.ConditionFalse, Reason: controlplanev1.KubeadmControlPlaneMachineEtcdMemberNotHealthyV1Beta2Reason, Message: "Etcd member has cluster ID 2, but all previously seen etcd members have cluster ID 1"}, }, }, }, @@ -319,13 +408,27 @@ func TestUpdateEtcdConditions(t *testing.T) { } }, }, - expectedKCPCondition: conditions.FalseCondition(controlplanev1.EtcdClusterHealthyCondition, controlplanev1.EtcdClusterUnhealthyReason, clusterv1.ConditionSeverityError, "Following machines are reporting etcd member errors: %s", "m2"), + expectedKCPCondition: conditions.FalseCondition(controlplanev1.EtcdClusterHealthyCondition, controlplanev1.EtcdClusterUnhealthyReason, clusterv1.ConditionSeverityError, "Following Machines are reporting etcd member errors: %s", "m2"), expectedMachineConditions: map[string]clusterv1.Conditions{ "m1": { *conditions.TrueCondition(controlplanev1.MachineEtcdMemberHealthyCondition), }, "m2": { - *conditions.FalseCondition(controlplanev1.MachineEtcdMemberHealthyCondition, controlplanev1.EtcdMemberUnhealthyReason, clusterv1.ConditionSeverityError, "etcd member reports the cluster is composed by members [n2 n3], but all previously seen etcd members are reporting [n1 n2]"), + *conditions.FalseCondition(controlplanev1.MachineEtcdMemberHealthyCondition, controlplanev1.EtcdMemberUnhealthyReason, clusterv1.ConditionSeverityError, "Etcd member reports the cluster is composed by members [n2 n3], but all previously seen etcd members are reporting [n1 n2]"), + }, + }, + expectedKCPV1Beta2Condition: &metav1.Condition{ + Type: controlplanev1.KubeadmControlPlaneEtcdClusterHealthyV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: controlplanev1.KubeadmControlPlaneEtcdClusterNotHealthyV1Beta2Reason, + Message: "Following Machines are reporting etcd member errors: m2", + }, + expectedMachineV1Beta2Conditions: map[string][]metav1.Condition{ + "m1": { + {Type: controlplanev1.KubeadmControlPlaneMachineEtcdMemberHealthyV1Beta2Condition, Status: metav1.ConditionTrue, Reason: controlplanev1.KubeadmControlPlaneMachineEtcdMemberHealthyV1Beta2Reason, Message: ""}, + }, + "m2": { + {Type: controlplanev1.KubeadmControlPlaneMachineEtcdMemberHealthyV1Beta2Condition, Status: metav1.ConditionFalse, Reason: controlplanev1.KubeadmControlPlaneMachineEtcdMemberNotHealthyV1Beta2Reason, Message: "The etcd member hosted on this Machine reports the cluster is composed by [n2 n3], but all previously seen etcd members are reporting [n1 n2]"}, }, }, }, @@ -369,7 +472,7 @@ func TestUpdateEtcdConditions(t *testing.T) { } }, }, - expectedKCPCondition: conditions.FalseCondition(controlplanev1.EtcdClusterHealthyCondition, controlplanev1.EtcdClusterUnhealthyReason, clusterv1.ConditionSeverityError, "Following machines are reporting etcd member errors: %s", "m2"), + expectedKCPCondition: conditions.FalseCondition(controlplanev1.EtcdClusterHealthyCondition, controlplanev1.EtcdClusterUnhealthyReason, clusterv1.ConditionSeverityError, "Following Machines are reporting etcd member errors: %s", "m2"), expectedMachineConditions: map[string]clusterv1.Conditions{ "m1": { *conditions.TrueCondition(controlplanev1.MachineEtcdMemberHealthyCondition), @@ -378,6 +481,20 @@ func TestUpdateEtcdConditions(t *testing.T) { *conditions.FalseCondition(controlplanev1.MachineEtcdMemberHealthyCondition, controlplanev1.EtcdMemberUnhealthyReason, clusterv1.ConditionSeverityError, "Missing etcd member"), }, }, + expectedKCPV1Beta2Condition: &metav1.Condition{ + Type: controlplanev1.KubeadmControlPlaneEtcdClusterHealthyV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: controlplanev1.KubeadmControlPlaneEtcdClusterNotHealthyV1Beta2Reason, + Message: "Following Machines are reporting etcd member errors: m2", + }, + expectedMachineV1Beta2Conditions: map[string][]metav1.Condition{ + "m1": { + {Type: controlplanev1.KubeadmControlPlaneMachineEtcdMemberHealthyV1Beta2Condition, Status: metav1.ConditionTrue, Reason: controlplanev1.KubeadmControlPlaneMachineEtcdMemberHealthyV1Beta2Reason, Message: ""}, + }, + "m2": { + {Type: controlplanev1.KubeadmControlPlaneMachineEtcdMemberHealthyV1Beta2Condition, Status: metav1.ConditionFalse, Reason: controlplanev1.KubeadmControlPlaneMachineEtcdMemberNotHealthyV1Beta2Reason, Message: "Etcd doesn't have an etcd member for Node n2"}, + }, + }, }, { name: "healthy etcd members should report true", @@ -446,9 +563,22 @@ func TestUpdateEtcdConditions(t *testing.T) { *conditions.TrueCondition(controlplanev1.MachineEtcdMemberHealthyCondition), }, }, + expectedKCPV1Beta2Condition: &metav1.Condition{ + Type: controlplanev1.KubeadmControlPlaneEtcdClusterHealthyV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: controlplanev1.KubeadmControlPlaneEtcdClusterHealthyV1Beta2Reason, + }, + expectedMachineV1Beta2Conditions: map[string][]metav1.Condition{ + "m1": { + {Type: controlplanev1.KubeadmControlPlaneMachineEtcdMemberHealthyV1Beta2Condition, Status: metav1.ConditionTrue, Reason: controlplanev1.KubeadmControlPlaneMachineEtcdMemberHealthyV1Beta2Reason, Message: ""}, + }, + "m2": { + {Type: controlplanev1.KubeadmControlPlaneMachineEtcdMemberHealthyV1Beta2Condition, Status: metav1.ConditionTrue, Reason: controlplanev1.KubeadmControlPlaneMachineEtcdMemberHealthyV1Beta2Reason, Message: ""}, + }, + }, }, { - name: "Eternal etcd should set a condition at KCP level", + name: "External etcd should set a condition at KCP level for v1beta1, not for v1beta2", kcp: &controlplanev1.KubeadmControlPlane{ Spec: controlplanev1.KubeadmControlPlaneSpec{ KubeadmConfigSpec: bootstrapv1.KubeadmConfigSpec{ @@ -460,7 +590,8 @@ func TestUpdateEtcdConditions(t *testing.T) { }, }, }, - expectedKCPCondition: conditions.TrueCondition(controlplanev1.EtcdClusterHealthyCondition), + expectedKCPCondition: conditions.TrueCondition(controlplanev1.EtcdClusterHealthyCondition), + expectedKCPV1Beta2Condition: nil, }, } for _, tt := range tests { @@ -483,9 +614,14 @@ func TestUpdateEtcdConditions(t *testing.T) { if tt.expectedKCPCondition != nil { g.Expect(*conditions.Get(tt.kcp, controlplanev1.EtcdClusterHealthyCondition)).To(conditions.MatchCondition(*tt.expectedKCPCondition)) } + if tt.expectedKCPV1Beta2Condition != nil { + g.Expect(*v1beta2conditions.Get(tt.kcp, controlplanev1.KubeadmControlPlaneEtcdClusterHealthyV1Beta2Condition)).To(v1beta2conditions.MatchCondition(*tt.expectedKCPV1Beta2Condition, v1beta2conditions.IgnoreLastTransitionTime(true))) + } + for _, m := range tt.machines { g.Expect(tt.expectedMachineConditions).To(HaveKey(m.Name)) - g.Expect(m.GetConditions()).To(conditions.MatchConditions(tt.expectedMachineConditions[m.Name]), "unexpected conditions for machine %s", m.Name) + g.Expect(m.GetConditions()).To(conditions.MatchConditions(tt.expectedMachineConditions[m.Name]), "unexpected conditions for Machine %s", m.Name) + g.Expect(m.GetV1Beta2Conditions()).To(v1beta2conditions.MatchConditions(tt.expectedMachineV1Beta2Conditions[m.Name], v1beta2conditions.IgnoreLastTransitionTime(true)), "unexpected conditions for Machine %s", m.Name) } }) } @@ -513,12 +649,14 @@ func TestUpdateStaticPodConditions(t *testing.T) { Name: n1EtcdPodName, }.String() tests := []struct { - name string - kcp *controlplanev1.KubeadmControlPlane - machines []*clusterv1.Machine - injectClient client.Client // This test is injecting a fake client because it is required to create nodes with a controlled Status or to fail with a specific error. - expectedKCPCondition *clusterv1.Condition - expectedMachineConditions map[string]clusterv1.Conditions + name string + kcp *controlplanev1.KubeadmControlPlane + machines []*clusterv1.Machine + injectClient client.Client // This test is injecting a fake client because it is required to create nodes with a controlled Status or to fail with a specific error. + expectedKCPCondition *clusterv1.Condition + expectedKCPV1Beta2Condition metav1.Condition + expectedMachineV1Beta2Conditions map[string][]metav1.Condition + expectedMachineConditions map[string]clusterv1.Conditions }{ { name: "if list nodes return an error, it should report all the conditions Unknown", @@ -526,20 +664,34 @@ func TestUpdateStaticPodConditions(t *testing.T) { fakeMachine("m1"), }, injectClient: &fakeClient{ - listErr: errors.New("failed to list nodes"), + listErr: errors.New("failed to list Nodes"), }, - expectedKCPCondition: conditions.UnknownCondition(controlplanev1.ControlPlaneComponentsHealthyCondition, controlplanev1.ControlPlaneComponentsInspectionFailedReason, "Failed to list nodes which are hosting control plane components: failed to list nodes"), + expectedKCPCondition: conditions.UnknownCondition(controlplanev1.ControlPlaneComponentsHealthyCondition, controlplanev1.ControlPlaneComponentsInspectionFailedReason, "Failed to list Nodes which are hosting control plane components: failed to list Nodes"), expectedMachineConditions: map[string]clusterv1.Conditions{ "m1": { - *conditions.UnknownCondition(controlplanev1.MachineAPIServerPodHealthyCondition, controlplanev1.PodInspectionFailedReason, "Failed to get the node which is hosting this component: failed to list nodes"), - *conditions.UnknownCondition(controlplanev1.MachineControllerManagerPodHealthyCondition, controlplanev1.PodInspectionFailedReason, "Failed to get the node which is hosting this component: failed to list nodes"), - *conditions.UnknownCondition(controlplanev1.MachineSchedulerPodHealthyCondition, controlplanev1.PodInspectionFailedReason, "Failed to get the node which is hosting this component: failed to list nodes"), - *conditions.UnknownCondition(controlplanev1.MachineEtcdPodHealthyCondition, controlplanev1.PodInspectionFailedReason, "Failed to get the node which is hosting this component: failed to list nodes"), + *conditions.UnknownCondition(controlplanev1.MachineAPIServerPodHealthyCondition, controlplanev1.PodInspectionFailedReason, "Failed to get the Node which is hosting this component: failed to list Nodes"), + *conditions.UnknownCondition(controlplanev1.MachineControllerManagerPodHealthyCondition, controlplanev1.PodInspectionFailedReason, "Failed to get the Node which is hosting this component: failed to list Nodes"), + *conditions.UnknownCondition(controlplanev1.MachineSchedulerPodHealthyCondition, controlplanev1.PodInspectionFailedReason, "Failed to get the Node which is hosting this component: failed to list Nodes"), + *conditions.UnknownCondition(controlplanev1.MachineEtcdPodHealthyCondition, controlplanev1.PodInspectionFailedReason, "Failed to get the Node which is hosting this component: failed to list Nodes"), + }, + }, + expectedKCPV1Beta2Condition: metav1.Condition{ + Type: controlplanev1.KubeadmControlPlaneControlPlaneComponentsHealthyV1Beta2Condition, + Status: metav1.ConditionUnknown, + Reason: controlplanev1.KubeadmControlPlaneControlPlaneComponentsInspectionFailedV1Beta2Reason, + Message: "Failed to get Nodes hosting control plane components: failed to list Nodes", + }, + expectedMachineV1Beta2Conditions: map[string][]metav1.Condition{ + "m1": { + {Type: controlplanev1.KubeadmControlPlaneMachineAPIServerPodHealthyV1Beta2Condition, Status: metav1.ConditionUnknown, Reason: controlplanev1.KubeadmControlPlaneMachinePodInspectionFailedV1Beta2Reason, Message: "Failed to get the Node hosting the Pod: failed to list Nodes"}, + {Type: controlplanev1.KubeadmControlPlaneMachineControllerManagerPodHealthyV1Beta2Condition, Status: metav1.ConditionUnknown, Reason: controlplanev1.KubeadmControlPlaneMachinePodInspectionFailedV1Beta2Reason, Message: "Failed to get the Node hosting the Pod: failed to list Nodes"}, + {Type: controlplanev1.KubeadmControlPlaneMachineEtcdPodHealthyV1Beta2Condition, Status: metav1.ConditionUnknown, Reason: controlplanev1.KubeadmControlPlaneMachinePodInspectionFailedV1Beta2Reason, Message: "Failed to get the Node hosting the Pod: failed to list Nodes"}, + {Type: controlplanev1.KubeadmControlPlaneMachineSchedulerPodHealthyV1Beta2Condition, Status: metav1.ConditionUnknown, Reason: controlplanev1.KubeadmControlPlaneMachinePodInspectionFailedV1Beta2Reason, Message: "Failed to get the Node hosting the Pod: failed to list Nodes"}, }, }, }, { - name: "If there are provisioning machines, a node without machine should be ignored", + name: "If there are provisioning machines, a node without machine should be ignored in v1beta1, reported in v1beta2", machines: []*clusterv1.Machine{ fakeMachine("m1"), // without NodeRef (provisioning) }, @@ -552,6 +704,20 @@ func TestUpdateStaticPodConditions(t *testing.T) { expectedMachineConditions: map[string]clusterv1.Conditions{ "m1": {}, }, + expectedKCPV1Beta2Condition: metav1.Condition{ + Type: controlplanev1.KubeadmControlPlaneControlPlaneComponentsHealthyV1Beta2Condition, + Status: metav1.ConditionUnknown, + Reason: controlplanev1.KubeadmControlPlaneControlPlaneComponentsHealthUnknownV1Beta2Reason, + Message: "Following Machines are reporting control plane unknown: m1", + }, + expectedMachineV1Beta2Conditions: map[string][]metav1.Condition{ + "m1": { + {Type: controlplanev1.KubeadmControlPlaneMachineAPIServerPodHealthyV1Beta2Condition, Status: metav1.ConditionUnknown, Reason: controlplanev1.KubeadmControlPlaneMachinePodInspectionFailedV1Beta2Reason, Message: "Node does not exist"}, + {Type: controlplanev1.KubeadmControlPlaneMachineControllerManagerPodHealthyV1Beta2Condition, Status: metav1.ConditionUnknown, Reason: controlplanev1.KubeadmControlPlaneMachinePodInspectionFailedV1Beta2Reason, Message: "Node does not exist"}, + {Type: controlplanev1.KubeadmControlPlaneMachineEtcdPodHealthyV1Beta2Condition, Status: metav1.ConditionUnknown, Reason: controlplanev1.KubeadmControlPlaneMachinePodInspectionFailedV1Beta2Reason, Message: "Node does not exist"}, + {Type: controlplanev1.KubeadmControlPlaneMachineSchedulerPodHealthyV1Beta2Condition, Status: metav1.ConditionUnknown, Reason: controlplanev1.KubeadmControlPlaneMachinePodInspectionFailedV1Beta2Reason, Message: "Node does not exist"}, + }, + }, }, { name: "If there are no provisioning machines, a node without machine should be reported as False condition at KCP level", @@ -561,7 +727,13 @@ func TestUpdateStaticPodConditions(t *testing.T) { Items: []corev1.Node{*fakeNode("n1")}, }, }, - expectedKCPCondition: conditions.FalseCondition(controlplanev1.ControlPlaneComponentsHealthyCondition, controlplanev1.ControlPlaneComponentsUnhealthyReason, clusterv1.ConditionSeverityError, "Control plane node %s does not have a corresponding machine", "n1"), + expectedKCPCondition: conditions.FalseCondition(controlplanev1.ControlPlaneComponentsHealthyCondition, controlplanev1.ControlPlaneComponentsUnhealthyReason, clusterv1.ConditionSeverityError, "Control plane Node %s does not have a corresponding Machine", "n1"), + expectedKCPV1Beta2Condition: metav1.Condition{ + Type: controlplanev1.KubeadmControlPlaneControlPlaneComponentsHealthyV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: controlplanev1.KubeadmControlPlaneControlPlaneComponentsNotHealthyV1Beta2Reason, + Message: "Control plane Node n1 does not have a corresponding Machine", + }, }, { name: "A node with unreachable taint should report all the conditions Unknown", @@ -573,7 +745,7 @@ func TestUpdateStaticPodConditions(t *testing.T) { Items: []corev1.Node{*fakeNode("n1", withUnreachableTaint())}, }, }, - expectedKCPCondition: conditions.UnknownCondition(controlplanev1.ControlPlaneComponentsHealthyCondition, controlplanev1.ControlPlaneComponentsUnknownReason, "Following machines are reporting unknown control plane status: m1"), + expectedKCPCondition: conditions.UnknownCondition(controlplanev1.ControlPlaneComponentsHealthyCondition, controlplanev1.ControlPlaneComponentsUnknownReason, "Following Machines are reporting unknown control plane status: m1"), expectedMachineConditions: map[string]clusterv1.Conditions{ "m1": { *conditions.UnknownCondition(controlplanev1.MachineAPIServerPodHealthyCondition, controlplanev1.PodInspectionFailedReason, "Node is unreachable"), @@ -582,9 +754,23 @@ func TestUpdateStaticPodConditions(t *testing.T) { *conditions.UnknownCondition(controlplanev1.MachineEtcdPodHealthyCondition, controlplanev1.PodInspectionFailedReason, "Node is unreachable"), }, }, + expectedKCPV1Beta2Condition: metav1.Condition{ + Type: controlplanev1.KubeadmControlPlaneControlPlaneComponentsHealthyV1Beta2Condition, + Status: metav1.ConditionUnknown, + Reason: controlplanev1.KubeadmControlPlaneControlPlaneComponentsHealthUnknownV1Beta2Reason, + Message: "Following Machines are reporting control plane unknown: m1", + }, + expectedMachineV1Beta2Conditions: map[string][]metav1.Condition{ + "m1": { + {Type: controlplanev1.KubeadmControlPlaneMachineAPIServerPodHealthyV1Beta2Condition, Status: metav1.ConditionUnknown, Reason: controlplanev1.KubeadmControlPlaneMachinePodInspectionFailedV1Beta2Reason, Message: "Node n1 is unreachable"}, + {Type: controlplanev1.KubeadmControlPlaneMachineControllerManagerPodHealthyV1Beta2Condition, Status: metav1.ConditionUnknown, Reason: controlplanev1.KubeadmControlPlaneMachinePodInspectionFailedV1Beta2Reason, Message: "Node n1 is unreachable"}, + {Type: controlplanev1.KubeadmControlPlaneMachineEtcdPodHealthyV1Beta2Condition, Status: metav1.ConditionUnknown, Reason: controlplanev1.KubeadmControlPlaneMachinePodInspectionFailedV1Beta2Reason, Message: "Node n1 is unreachable"}, + {Type: controlplanev1.KubeadmControlPlaneMachineSchedulerPodHealthyV1Beta2Condition, Status: metav1.ConditionUnknown, Reason: controlplanev1.KubeadmControlPlaneMachinePodInspectionFailedV1Beta2Reason, Message: "Node n1 is unreachable"}, + }, + }, }, { - name: "A provisioning machine without node should be ignored", + name: "A provisioning machine without node should be ignored in v1beta1, should surface in v1beta2", machines: []*clusterv1.Machine{ fakeMachine("m1"), // without NodeRef (provisioning) }, @@ -595,22 +781,50 @@ func TestUpdateStaticPodConditions(t *testing.T) { expectedMachineConditions: map[string]clusterv1.Conditions{ "m1": {}, }, + expectedKCPV1Beta2Condition: metav1.Condition{ + Type: controlplanev1.KubeadmControlPlaneControlPlaneComponentsHealthyV1Beta2Condition, + Status: metav1.ConditionUnknown, + Reason: controlplanev1.KubeadmControlPlaneControlPlaneComponentsHealthUnknownV1Beta2Reason, + Message: "Following Machines are reporting control plane unknown: m1", + }, + expectedMachineV1Beta2Conditions: map[string][]metav1.Condition{ + "m1": { + {Type: controlplanev1.KubeadmControlPlaneMachineAPIServerPodHealthyV1Beta2Condition, Status: metav1.ConditionUnknown, Reason: controlplanev1.KubeadmControlPlaneMachinePodInspectionFailedV1Beta2Reason, Message: "Node does not exist"}, + {Type: controlplanev1.KubeadmControlPlaneMachineControllerManagerPodHealthyV1Beta2Condition, Status: metav1.ConditionUnknown, Reason: controlplanev1.KubeadmControlPlaneMachinePodInspectionFailedV1Beta2Reason, Message: "Node does not exist"}, + {Type: controlplanev1.KubeadmControlPlaneMachineEtcdPodHealthyV1Beta2Condition, Status: metav1.ConditionUnknown, Reason: controlplanev1.KubeadmControlPlaneMachinePodInspectionFailedV1Beta2Reason, Message: "Node does not exist"}, + {Type: controlplanev1.KubeadmControlPlaneMachineSchedulerPodHealthyV1Beta2Condition, Status: metav1.ConditionUnknown, Reason: controlplanev1.KubeadmControlPlaneMachinePodInspectionFailedV1Beta2Reason, Message: "Node does not exist"}, + }, + }, }, { - name: "A provisioned machine without node should report all the conditions as false", + name: "A provisioned machine without node should report all the conditions as false in v1beta1, unknown in v1beta2", machines: []*clusterv1.Machine{ fakeMachine("m1", withNodeRef("n1")), }, injectClient: &fakeClient{ list: &corev1.NodeList{}, }, - expectedKCPCondition: conditions.FalseCondition(controlplanev1.ControlPlaneComponentsHealthyCondition, controlplanev1.ControlPlaneComponentsUnhealthyReason, clusterv1.ConditionSeverityError, "Following machines are reporting control plane errors: %s", "m1"), + expectedKCPCondition: conditions.FalseCondition(controlplanev1.ControlPlaneComponentsHealthyCondition, controlplanev1.ControlPlaneComponentsUnhealthyReason, clusterv1.ConditionSeverityError, "Following Machines are reporting control plane errors: %s", "m1"), expectedMachineConditions: map[string]clusterv1.Conditions{ "m1": { - *conditions.FalseCondition(controlplanev1.MachineAPIServerPodHealthyCondition, controlplanev1.PodFailedReason, clusterv1.ConditionSeverityError, "Missing node"), - *conditions.FalseCondition(controlplanev1.MachineControllerManagerPodHealthyCondition, controlplanev1.PodFailedReason, clusterv1.ConditionSeverityError, "Missing node"), - *conditions.FalseCondition(controlplanev1.MachineEtcdPodHealthyCondition, controlplanev1.PodFailedReason, clusterv1.ConditionSeverityError, "Missing node"), - *conditions.FalseCondition(controlplanev1.MachineSchedulerPodHealthyCondition, controlplanev1.PodFailedReason, clusterv1.ConditionSeverityError, "Missing node"), + *conditions.FalseCondition(controlplanev1.MachineAPIServerPodHealthyCondition, controlplanev1.PodFailedReason, clusterv1.ConditionSeverityError, "Missing Node"), + *conditions.FalseCondition(controlplanev1.MachineControllerManagerPodHealthyCondition, controlplanev1.PodFailedReason, clusterv1.ConditionSeverityError, "Missing Node"), + *conditions.FalseCondition(controlplanev1.MachineEtcdPodHealthyCondition, controlplanev1.PodFailedReason, clusterv1.ConditionSeverityError, "Missing Node"), + *conditions.FalseCondition(controlplanev1.MachineSchedulerPodHealthyCondition, controlplanev1.PodFailedReason, clusterv1.ConditionSeverityError, "Missing Node"), + }, + }, + expectedKCPV1Beta2Condition: metav1.Condition{ + Type: controlplanev1.KubeadmControlPlaneControlPlaneComponentsHealthyV1Beta2Condition, + Status: metav1.ConditionUnknown, + Reason: controlplanev1.KubeadmControlPlaneControlPlaneComponentsHealthUnknownV1Beta2Reason, + Message: "Following Machines are reporting control plane unknown: m1", + }, + expectedMachineV1Beta2Conditions: map[string][]metav1.Condition{ + "m1": { + {Type: controlplanev1.KubeadmControlPlaneMachineAPIServerPodHealthyV1Beta2Condition, Status: metav1.ConditionUnknown, Reason: controlplanev1.KubeadmControlPlaneMachinePodInspectionFailedV1Beta2Reason, Message: "Node n1 does not exist"}, + {Type: controlplanev1.KubeadmControlPlaneMachineControllerManagerPodHealthyV1Beta2Condition, Status: metav1.ConditionUnknown, Reason: controlplanev1.KubeadmControlPlaneMachinePodInspectionFailedV1Beta2Reason, Message: "Node n1 does not exist"}, + {Type: controlplanev1.KubeadmControlPlaneMachineEtcdPodHealthyV1Beta2Condition, Status: metav1.ConditionUnknown, Reason: controlplanev1.KubeadmControlPlaneMachinePodInspectionFailedV1Beta2Reason, Message: "Node n1 does not exist"}, + {Type: controlplanev1.KubeadmControlPlaneMachineSchedulerPodHealthyV1Beta2Condition, Status: metav1.ConditionUnknown, Reason: controlplanev1.KubeadmControlPlaneMachinePodInspectionFailedV1Beta2Reason, Message: "Node n1 does not exist"}, }, }, }, @@ -640,7 +854,7 @@ func TestUpdateStaticPodConditions(t *testing.T) { ), }, }, - expectedKCPCondition: conditions.FalseCondition(controlplanev1.ControlPlaneComponentsHealthyCondition, controlplanev1.ControlPlaneComponentsUnhealthyReason, clusterv1.ConditionSeverityError, "Following machines are reporting control plane errors: %s", "m1"), + expectedKCPCondition: conditions.FalseCondition(controlplanev1.ControlPlaneComponentsHealthyCondition, controlplanev1.ControlPlaneComponentsUnhealthyReason, clusterv1.ConditionSeverityError, "Following Machines are reporting control plane errors: %s", "m1"), expectedMachineConditions: map[string]clusterv1.Conditions{ "m1": { *conditions.TrueCondition(controlplanev1.MachineAPIServerPodHealthyCondition), @@ -649,6 +863,20 @@ func TestUpdateStaticPodConditions(t *testing.T) { *conditions.FalseCondition(controlplanev1.MachineEtcdPodHealthyCondition, controlplanev1.PodFailedReason, clusterv1.ConditionSeverityError, "All the containers have been terminated"), }, }, + expectedKCPV1Beta2Condition: metav1.Condition{ + Type: controlplanev1.KubeadmControlPlaneControlPlaneComponentsHealthyV1Beta2Condition, + Status: metav1.ConditionFalse, + Reason: controlplanev1.KubeadmControlPlaneControlPlaneComponentsNotHealthyV1Beta2Reason, + Message: "Following Machines are reporting control plane errors: m1", + }, + expectedMachineV1Beta2Conditions: map[string][]metav1.Condition{ + "m1": { + {Type: controlplanev1.KubeadmControlPlaneMachineAPIServerPodHealthyV1Beta2Condition, Status: metav1.ConditionTrue, Reason: controlplanev1.KubeadmControlPlaneMachinePodRunningV1Beta2Reason, Message: ""}, + {Type: controlplanev1.KubeadmControlPlaneMachineControllerManagerPodHealthyV1Beta2Condition, Status: metav1.ConditionFalse, Reason: controlplanev1.KubeadmControlPlaneMachinePodProvisioningV1Beta2Reason, Message: "Waiting to be scheduled"}, + {Type: controlplanev1.KubeadmControlPlaneMachineEtcdPodHealthyV1Beta2Condition, Status: metav1.ConditionFalse, Reason: controlplanev1.KubeadmControlPlaneMachinePodFailedV1Beta2Reason, Message: "All the containers have been terminated"}, + {Type: controlplanev1.KubeadmControlPlaneMachineSchedulerPodHealthyV1Beta2Condition, Status: metav1.ConditionFalse, Reason: controlplanev1.KubeadmControlPlaneMachinePodFailedV1Beta2Reason, Message: "All the containers have been terminated"}, + }, + }, }, { name: "Should surface control plane components health", @@ -687,9 +915,22 @@ func TestUpdateStaticPodConditions(t *testing.T) { *conditions.TrueCondition(controlplanev1.MachineEtcdPodHealthyCondition), }, }, + expectedKCPV1Beta2Condition: metav1.Condition{ + Type: controlplanev1.KubeadmControlPlaneControlPlaneComponentsHealthyV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: controlplanev1.KubeadmControlPlaneControlPlaneComponentsHealthyV1Beta2Reason, + }, + expectedMachineV1Beta2Conditions: map[string][]metav1.Condition{ + "m1": { + {Type: controlplanev1.KubeadmControlPlaneMachineAPIServerPodHealthyV1Beta2Condition, Status: metav1.ConditionTrue, Reason: controlplanev1.KubeadmControlPlaneMachinePodRunningV1Beta2Reason, Message: ""}, + {Type: controlplanev1.KubeadmControlPlaneMachineControllerManagerPodHealthyV1Beta2Condition, Status: metav1.ConditionTrue, Reason: controlplanev1.KubeadmControlPlaneMachinePodRunningV1Beta2Reason, Message: ""}, + {Type: controlplanev1.KubeadmControlPlaneMachineEtcdPodHealthyV1Beta2Condition, Status: metav1.ConditionTrue, Reason: controlplanev1.KubeadmControlPlaneMachinePodRunningV1Beta2Reason, Message: ""}, + {Type: controlplanev1.KubeadmControlPlaneMachineSchedulerPodHealthyV1Beta2Condition, Status: metav1.ConditionTrue, Reason: controlplanev1.KubeadmControlPlaneMachinePodRunningV1Beta2Reason, Message: ""}, + }, + }, }, { - name: "Should surface control plane components health with eternal etcd", + name: "Should surface control plane components health with external etcd", kcp: &controlplanev1.KubeadmControlPlane{ Spec: controlplanev1.KubeadmControlPlaneSpec{ KubeadmConfigSpec: bootstrapv1.KubeadmConfigSpec{ @@ -733,6 +974,19 @@ func TestUpdateStaticPodConditions(t *testing.T) { // no condition for etcd Pod }, }, + expectedKCPV1Beta2Condition: metav1.Condition{ + Type: controlplanev1.KubeadmControlPlaneControlPlaneComponentsHealthyV1Beta2Condition, + Status: metav1.ConditionTrue, + Reason: controlplanev1.KubeadmControlPlaneControlPlaneComponentsHealthyV1Beta2Reason, + }, + expectedMachineV1Beta2Conditions: map[string][]metav1.Condition{ + "m1": { + {Type: controlplanev1.KubeadmControlPlaneMachineAPIServerPodHealthyV1Beta2Condition, Status: metav1.ConditionTrue, Reason: controlplanev1.KubeadmControlPlaneMachinePodRunningV1Beta2Reason, Message: ""}, + {Type: controlplanev1.KubeadmControlPlaneMachineControllerManagerPodHealthyV1Beta2Condition, Status: metav1.ConditionTrue, Reason: controlplanev1.KubeadmControlPlaneMachinePodRunningV1Beta2Reason, Message: ""}, + {Type: controlplanev1.KubeadmControlPlaneMachineSchedulerPodHealthyV1Beta2Condition, Status: metav1.ConditionTrue, Reason: controlplanev1.KubeadmControlPlaneMachinePodRunningV1Beta2Reason, Message: ""}, + // no condition for etcd Pod + }, + }, }, } @@ -755,9 +1009,12 @@ func TestUpdateStaticPodConditions(t *testing.T) { if tt.expectedKCPCondition != nil { g.Expect(*conditions.Get(tt.kcp, controlplanev1.ControlPlaneComponentsHealthyCondition)).To(conditions.MatchCondition(*tt.expectedKCPCondition)) } + g.Expect(*v1beta2conditions.Get(tt.kcp, controlplanev1.KubeadmControlPlaneControlPlaneComponentsHealthyV1Beta2Condition)).To(v1beta2conditions.MatchCondition(tt.expectedKCPV1Beta2Condition, v1beta2conditions.IgnoreLastTransitionTime(true))) + for _, m := range tt.machines { g.Expect(tt.expectedMachineConditions).To(HaveKey(m.Name)) g.Expect(m.GetConditions()).To(conditions.MatchConditions(tt.expectedMachineConditions[m.Name])) + g.Expect(m.GetV1Beta2Conditions()).To(v1beta2conditions.MatchConditions(tt.expectedMachineV1Beta2Conditions[m.Name], v1beta2conditions.IgnoreLastTransitionTime(true))) } }) } @@ -768,6 +1025,7 @@ func TestUpdateStaticPodCondition(t *testing.T) { nodeName := "node" component := "kube-component" condition := clusterv1.ConditionType("kubeComponentHealthy") + v1beta2Condition := "kubeComponentHealthy" podName := staticPodName(component, nodeName) podkey := client.ObjectKey{ Namespace: metav1.NamespaceSystem, @@ -775,15 +1033,22 @@ func TestUpdateStaticPodCondition(t *testing.T) { }.String() tests := []struct { - name string - injectClient client.Client // This test is injecting a fake client because it is required to create pods with a controlled Status or to fail with a specific error. - node *corev1.Node - expectedCondition clusterv1.Condition + name string + injectClient client.Client // This test is injecting a fake client because it is required to create pods with a controlled Status or to fail with a specific error. + node *corev1.Node + expectedCondition clusterv1.Condition + expectedV1Beta2Condition metav1.Condition }{ { name: "if node Ready is unknown, assume pod status is stale", node: fakeNode(nodeName, withReadyCondition(corev1.ConditionUnknown)), - expectedCondition: *conditions.UnknownCondition(condition, controlplanev1.PodInspectionFailedReason, "Node Ready condition is unknown, pod data might be stale"), + expectedCondition: *conditions.UnknownCondition(condition, controlplanev1.PodInspectionFailedReason, "Node Ready condition is Unknown, Pod data might be stale"), + expectedV1Beta2Condition: metav1.Condition{ + Type: v1beta2Condition, + Status: metav1.ConditionUnknown, + Reason: controlplanev1.KubeadmControlPlaneMachinePodInspectionFailedV1Beta2Reason, + Message: "Node Ready condition is Unknown, Pod data might be stale", + }, }, { name: "if gets pod return a NotFound error should report PodCondition=False, PodMissing", @@ -792,6 +1057,12 @@ func TestUpdateStaticPodCondition(t *testing.T) { }, node: fakeNode(nodeName), expectedCondition: *conditions.FalseCondition(condition, controlplanev1.PodMissingReason, clusterv1.ConditionSeverityError, "Pod kube-component-node is missing"), + expectedV1Beta2Condition: metav1.Condition{ + Type: v1beta2Condition, + Status: metav1.ConditionFalse, + Reason: controlplanev1.KubeadmControlPlaneMachinePodDoesNotExistV1Beta2Reason, + Message: "Pod kube-component-node does not exist", + }, }, { name: "if gets pod return a generic error should report PodCondition=Unknown, PodInspectionFailed", @@ -799,7 +1070,13 @@ func TestUpdateStaticPodCondition(t *testing.T) { getErr: errors.New("get failure"), }, node: fakeNode(nodeName), - expectedCondition: *conditions.UnknownCondition(condition, controlplanev1.PodInspectionFailedReason, "Failed to get pod status"), + expectedCondition: *conditions.UnknownCondition(condition, controlplanev1.PodInspectionFailedReason, "Failed to get Pod status"), + expectedV1Beta2Condition: metav1.Condition{ + Type: v1beta2Condition, + Status: metav1.ConditionUnknown, + Reason: controlplanev1.KubeadmControlPlaneMachinePodInspectionFailedV1Beta2Reason, + Message: "Please check controller logs for errors", + }, }, { name: "pending pod not yet scheduled should report PodCondition=False, PodProvisioning", @@ -813,6 +1090,12 @@ func TestUpdateStaticPodCondition(t *testing.T) { }, node: fakeNode(nodeName), expectedCondition: *conditions.FalseCondition(condition, controlplanev1.PodProvisioningReason, clusterv1.ConditionSeverityInfo, "Waiting to be scheduled"), + expectedV1Beta2Condition: metav1.Condition{ + Type: v1beta2Condition, + Status: metav1.ConditionFalse, + Reason: controlplanev1.KubeadmControlPlaneMachinePodProvisioningV1Beta2Reason, + Message: "Waiting to be scheduled", + }, }, { name: "pending pod running init containers should report PodCondition=False, PodProvisioning", @@ -827,6 +1110,12 @@ func TestUpdateStaticPodCondition(t *testing.T) { }, node: fakeNode(nodeName), expectedCondition: *conditions.FalseCondition(condition, controlplanev1.PodProvisioningReason, clusterv1.ConditionSeverityInfo, "Running init containers"), + expectedV1Beta2Condition: metav1.Condition{ + Type: v1beta2Condition, + Status: metav1.ConditionFalse, + Reason: controlplanev1.KubeadmControlPlaneMachinePodProvisioningV1Beta2Reason, + Message: "Running init containers", + }, }, { name: "pending pod with PodScheduled and PodInitialized report PodCondition=False, PodProvisioning", @@ -841,6 +1130,12 @@ func TestUpdateStaticPodCondition(t *testing.T) { }, node: fakeNode(nodeName), expectedCondition: *conditions.FalseCondition(condition, controlplanev1.PodProvisioningReason, clusterv1.ConditionSeverityInfo, ""), + expectedV1Beta2Condition: metav1.Condition{ + Type: v1beta2Condition, + Status: metav1.ConditionFalse, + Reason: controlplanev1.KubeadmControlPlaneMachinePodProvisioningV1Beta2Reason, + Message: "", + }, }, { name: "running pod with podReady should report PodCondition=true", @@ -854,6 +1149,12 @@ func TestUpdateStaticPodCondition(t *testing.T) { }, node: fakeNode(nodeName), expectedCondition: *conditions.TrueCondition(condition), + expectedV1Beta2Condition: metav1.Condition{ + Type: v1beta2Condition, + Status: metav1.ConditionTrue, + Reason: controlplanev1.KubeadmControlPlaneMachinePodRunningV1Beta2Reason, + Message: "", + }, }, { name: "running pod with ContainerStatus Waiting should report PodCondition=False, PodProvisioning", @@ -871,6 +1172,12 @@ func TestUpdateStaticPodCondition(t *testing.T) { }, node: fakeNode(nodeName), expectedCondition: *conditions.FalseCondition(condition, controlplanev1.PodProvisioningReason, clusterv1.ConditionSeverityInfo, "Waiting something"), + expectedV1Beta2Condition: metav1.Condition{ + Type: v1beta2Condition, + Status: metav1.ConditionFalse, + Reason: controlplanev1.KubeadmControlPlaneMachinePodProvisioningV1Beta2Reason, + Message: "Waiting something", + }, }, { name: "running pod with ContainerStatus Waiting but with exit code != 0 should report PodCondition=False, PodFailed", @@ -893,6 +1200,12 @@ func TestUpdateStaticPodCondition(t *testing.T) { }, node: fakeNode(nodeName), expectedCondition: *conditions.FalseCondition(condition, controlplanev1.PodFailedReason, clusterv1.ConditionSeverityError, "Waiting something"), + expectedV1Beta2Condition: metav1.Condition{ + Type: v1beta2Condition, + Status: metav1.ConditionFalse, + Reason: controlplanev1.KubeadmControlPlaneMachinePodFailedV1Beta2Reason, + Message: "Waiting something", + }, }, { name: "running pod with ContainerStatus Terminated should report PodCondition=False, PodFailed", @@ -910,6 +1223,12 @@ func TestUpdateStaticPodCondition(t *testing.T) { }, node: fakeNode(nodeName), expectedCondition: *conditions.FalseCondition(condition, controlplanev1.PodFailedReason, clusterv1.ConditionSeverityError, "Something failed"), + expectedV1Beta2Condition: metav1.Condition{ + Type: v1beta2Condition, + Status: metav1.ConditionFalse, + Reason: controlplanev1.KubeadmControlPlaneMachinePodFailedV1Beta2Reason, + Message: "Something failed", + }, }, { name: "running pod without podReady and without Container status messages should report PodCondition=False, PodProvisioning", @@ -922,6 +1241,12 @@ func TestUpdateStaticPodCondition(t *testing.T) { }, node: fakeNode(nodeName), expectedCondition: *conditions.FalseCondition(condition, controlplanev1.PodProvisioningReason, clusterv1.ConditionSeverityInfo, "Waiting for startup or readiness probes"), + expectedV1Beta2Condition: metav1.Condition{ + Type: v1beta2Condition, + Status: metav1.ConditionFalse, + Reason: controlplanev1.KubeadmControlPlaneMachinePodProvisioningV1Beta2Reason, + Message: "Waiting for startup or readiness probes", + }, }, { name: "failed pod should report PodCondition=False, PodFailed", @@ -934,6 +1259,12 @@ func TestUpdateStaticPodCondition(t *testing.T) { }, node: fakeNode(nodeName), expectedCondition: *conditions.FalseCondition(condition, controlplanev1.PodFailedReason, clusterv1.ConditionSeverityError, "All the containers have been terminated"), + expectedV1Beta2Condition: metav1.Condition{ + Type: v1beta2Condition, + Status: metav1.ConditionFalse, + Reason: controlplanev1.KubeadmControlPlaneMachinePodFailedV1Beta2Reason, + Message: "All the containers have been terminated", + }, }, { name: "succeeded pod should report PodCondition=False, PodFailed", @@ -946,6 +1277,12 @@ func TestUpdateStaticPodCondition(t *testing.T) { }, node: fakeNode(nodeName), expectedCondition: *conditions.FalseCondition(condition, controlplanev1.PodFailedReason, clusterv1.ConditionSeverityError, "All the containers have been terminated"), + expectedV1Beta2Condition: metav1.Condition{ + Type: v1beta2Condition, + Status: metav1.ConditionFalse, + Reason: controlplanev1.KubeadmControlPlaneMachinePodFailedV1Beta2Reason, + Message: "All the containers have been terminated", + }, }, { name: "pod in unknown phase should report PodCondition=Unknown, PodInspectionFailed", @@ -957,7 +1294,13 @@ func TestUpdateStaticPodCondition(t *testing.T) { }, }, node: fakeNode(nodeName), - expectedCondition: *conditions.UnknownCondition(condition, controlplanev1.PodInspectionFailedReason, "Pod is reporting unknown status"), + expectedCondition: *conditions.UnknownCondition(condition, controlplanev1.PodInspectionFailedReason, "Pod is reporting Unknown status"), + expectedV1Beta2Condition: metav1.Condition{ + Type: v1beta2Condition, + Status: metav1.ConditionUnknown, + Reason: controlplanev1.KubeadmControlPlaneMachinePodInspectionFailedV1Beta2Reason, + Message: "Pod is reporting Unknown status", + }, }, } @@ -968,9 +1311,10 @@ func TestUpdateStaticPodCondition(t *testing.T) { w := &Workload{ Client: tt.injectClient, } - w.updateStaticPodCondition(ctx, machine, *tt.node, component, condition) + w.updateStaticPodCondition(ctx, machine, *tt.node, component, condition, v1beta2Condition) g.Expect(*conditions.Get(machine, condition)).To(conditions.MatchCondition(tt.expectedCondition)) + g.Expect(*v1beta2conditions.Get(machine, v1beta2Condition)).To(v1beta2conditions.MatchCondition(tt.expectedV1Beta2Condition, v1beta2conditions.IgnoreLastTransitionTime(true))) }) } } @@ -1043,6 +1387,19 @@ func withMachineReadyCondition(status corev1.ConditionStatus, severity clusterv1 } } +func withMachineReadyV1beta2Condition(status metav1.ConditionStatus) fakeMachineOption { + return func(machine *clusterv1.Machine) { + if machine.Status.V1Beta2 == nil { + machine.Status.V1Beta2 = &clusterv1.MachineV1Beta2Status{} + } + machine.Status.V1Beta2.Conditions = append(machine.Status.V1Beta2.Conditions, metav1.Condition{ + Type: clusterv1.MachinesReadyV1Beta2Condition, + Status: status, + Reason: "SomeReason", + }) + } +} + type fakePodOption func(*corev1.Pod) func fakePod(name string, options ...fakePodOption) *corev1.Pod { @@ -1080,7 +1437,7 @@ func withCondition(condition corev1.PodConditionType, status corev1.ConditionSta } } -func TestAggregateFromMachinesToKCP(t *testing.T) { +func TestAggregateConditionsFromMachinesToKCP(t *testing.T) { conditionType := controlplanev1.ControlPlaneComponentsHealthyCondition unhealthyReason := "unhealthy reason" unknownReason := "unknown reason" @@ -1097,7 +1454,7 @@ func TestAggregateFromMachinesToKCP(t *testing.T) { machines: []*clusterv1.Machine{ fakeMachine("m1", withMachineReadyCondition(corev1.ConditionFalse, clusterv1.ConditionSeverityError)), }, - expectedCondition: *conditions.FalseCondition(conditionType, unhealthyReason, clusterv1.ConditionSeverityError, fmt.Sprintf("Following machines are reporting %s errors: %s", note, "m1")), + expectedCondition: *conditions.FalseCondition(conditionType, unhealthyReason, clusterv1.ConditionSeverityError, fmt.Sprintf("Following Machines are reporting %s errors: %s", note, "m1")), }, { name: "input kcp errors", @@ -1112,14 +1469,14 @@ func TestAggregateFromMachinesToKCP(t *testing.T) { machines: []*clusterv1.Machine{ fakeMachine("m1", withMachineReadyCondition(corev1.ConditionFalse, clusterv1.ConditionSeverityWarning)), }, - expectedCondition: *conditions.FalseCondition(conditionType, unhealthyReason, clusterv1.ConditionSeverityWarning, fmt.Sprintf("Following machines are reporting %s warnings: %s", note, "m1")), + expectedCondition: *conditions.FalseCondition(conditionType, unhealthyReason, clusterv1.ConditionSeverityWarning, fmt.Sprintf("Following Machines are reporting %s warnings: %s", note, "m1")), }, { name: "kcp machines with info", machines: []*clusterv1.Machine{ fakeMachine("m1", withMachineReadyCondition(corev1.ConditionFalse, clusterv1.ConditionSeverityInfo)), }, - expectedCondition: *conditions.FalseCondition(conditionType, unhealthyReason, clusterv1.ConditionSeverityInfo, fmt.Sprintf("Following machines are reporting %s info: %s", note, "m1")), + expectedCondition: *conditions.FalseCondition(conditionType, unhealthyReason, clusterv1.ConditionSeverityInfo, fmt.Sprintf("Following Machines are reporting %s info: %s", note, "m1")), }, { name: "kcp machines with true", @@ -1133,7 +1490,7 @@ func TestAggregateFromMachinesToKCP(t *testing.T) { machines: []*clusterv1.Machine{ fakeMachine("m1", withMachineReadyCondition(corev1.ConditionUnknown, clusterv1.ConditionSeverityNone)), }, - expectedCondition: *conditions.UnknownCondition(conditionType, unknownReason, fmt.Sprintf("Following machines are reporting unknown %s status: %s", note, "m1")), + expectedCondition: *conditions.UnknownCondition(conditionType, unknownReason, fmt.Sprintf("Following Machines are reporting unknown %s status: %s", note, "m1")), }, } @@ -1141,7 +1498,7 @@ func TestAggregateFromMachinesToKCP(t *testing.T) { t.Run(tt.name, func(t *testing.T) { g := NewWithT(t) - input := aggregateFromMachinesToKCPInput{ + input := aggregateConditionsFromMachinesToKCPInput{ controlPlane: &ControlPlane{ KCP: &controlplanev1.KubeadmControlPlane{}, Machines: collections.FromMachines(tt.machines...), @@ -1153,9 +1510,113 @@ func TestAggregateFromMachinesToKCP(t *testing.T) { unknownReason: unknownReason, note: note, } - aggregateFromMachinesToKCP(input) + aggregateConditionsFromMachinesToKCP(input) g.Expect(*conditions.Get(input.controlPlane.KCP, conditionType)).To(conditions.MatchCondition(tt.expectedCondition)) }) } } + +func TestAggregateV1Beta2ConditionsFromMachinesToKCP(t *testing.T) { + conditionType := controlplanev1.KubeadmControlPlaneEtcdClusterHealthyV1Beta2Condition + trueReason := "true reason" + unknownReason := "unknown reason" + falseReason := "false reason" + note := "something" + + tests := []struct { + name string + machines []*clusterv1.Machine + kcpErrors []string + expectedCondition metav1.Condition + }{ + { + name: "kcp machines with errors", + machines: []*clusterv1.Machine{ + fakeMachine("m1", withMachineReadyV1beta2Condition(metav1.ConditionFalse)), + fakeMachine("m2", withMachineReadyV1beta2Condition(metav1.ConditionFalse)), + fakeMachine("m3", withMachineReadyV1beta2Condition(metav1.ConditionTrue)), + fakeMachine("m4", withMachineReadyV1beta2Condition(metav1.ConditionUnknown)), + }, + expectedCondition: metav1.Condition{ + Type: conditionType, + Status: metav1.ConditionFalse, + Reason: falseReason, + Message: "Following Machines are reporting something errors: m1, m2", + }, + }, + { + name: "kcp errors", + machines: []*clusterv1.Machine{ + fakeMachine("m1", withMachineReadyV1beta2Condition(metav1.ConditionTrue)), + }, + kcpErrors: []string{"something error"}, + expectedCondition: metav1.Condition{ + Type: conditionType, + Status: metav1.ConditionFalse, + Reason: falseReason, + Message: "something error", + }, + }, + { + name: "kcp machines with unknown", + machines: []*clusterv1.Machine{ + fakeMachine("m1", withMachineReadyV1beta2Condition(metav1.ConditionUnknown)), + fakeMachine("m2", withMachineReadyV1beta2Condition(metav1.ConditionTrue)), + fakeMachine("m3", withMachineReadyV1beta2Condition(metav1.ConditionUnknown)), + }, + expectedCondition: metav1.Condition{ + Type: conditionType, + Status: metav1.ConditionUnknown, + Reason: unknownReason, + Message: "Following Machines are reporting something unknown: m1, m3", + }, + }, + { + name: "kcp machines with true", + machines: []*clusterv1.Machine{ + fakeMachine("m1", withMachineReadyV1beta2Condition(metav1.ConditionTrue)), + fakeMachine("m2", withMachineReadyV1beta2Condition(metav1.ConditionTrue)), + }, + expectedCondition: metav1.Condition{ + Type: conditionType, + Status: metav1.ConditionTrue, + Reason: trueReason, + Message: "", + }, + }, + { + name: "kcp without machines", + machines: []*clusterv1.Machine{}, + expectedCondition: metav1.Condition{ + Type: conditionType, + Status: metav1.ConditionUnknown, + Reason: unknownReason, + Message: "No Machines reporting something status", + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + g := NewWithT(t) + + input := aggregateV1Beta2ConditionsFromMachinesToKCPInput{ + controlPlane: &ControlPlane{ + KCP: &controlplanev1.KubeadmControlPlane{}, + Machines: collections.FromMachines(tt.machines...), + }, + machineConditions: []string{clusterv1.MachinesReadyV1Beta2Condition}, + kcpErrors: tt.kcpErrors, + condition: conditionType, + trueReason: trueReason, + unknownReason: unknownReason, + falseReason: falseReason, + note: note, + } + aggregateV1Beta2ConditionsFromMachinesToKCP(input) + + g.Expect(*v1beta2conditions.Get(input.controlPlane.KCP, conditionType)).To(v1beta2conditions.MatchCondition(tt.expectedCondition, v1beta2conditions.IgnoreLastTransitionTime(true))) + }) + } +} diff --git a/docs/proposals/20240916-improve-status-in-CAPI-resources.md b/docs/proposals/20240916-improve-status-in-CAPI-resources.md index 8ab763cb4483..b996ca7084ef 100644 --- a/docs/proposals/20240916-improve-status-in-CAPI-resources.md +++ b/docs/proposals/20240916-improve-status-in-CAPI-resources.md @@ -1023,18 +1023,19 @@ Notes: ##### KubeadmControlPlane (New)Conditions -| Condition | Note | -|-------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `Available` | True if the control plane can be reached, `EtcdClusterAvailable` is true, and `CertificatesAvailable` is true | -| `CertificatesAvailable` | True if all the cluster certificates exist. | -| `EtcdClusterAvailable` | This condition surfaces issues to the managed etcd cluster, if any It is computed as aggregation of Machines's `EtcdMemberHealthy` (if not using an external etcd) conditions plus additional checks validating potential issues to etcd quorum | -| `MachinesReady` | This condition surfaces detail of issues on the controlled machines, if any. Please note this will include also `APIServerPodHealthy`, `ControllerManagerPodHealthy`, `SchedulerPodHealthy`, and if not using an external etcd also `EtcdPodHealthy`, `EtcdMemberHealthy` | -| `MachinesUpToDate` | This condition surfaces details of controlled machines not up to date, if any | -| `ScalingUp` | True if available replicas < desired replicas | -| `ScalingDown` | True if replicas > desired replicas | -| `Remediating` | This condition surfaces details about ongoing remediation of the controlled machines, if any | -| `Deleting` | If KubeadmControlPlane is deleted, this condition surfaces details about ongoing deletion of the controlled machines | -| `Paused` | True if this resource or the Cluster it belongs to are paused | +| Condition | Note | +|---------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `Available` | True if the control plane can be reached, `EtcdClusterHealthy` is true, and `CertificatesAvailable` is true | +| `CertificatesAvailable` | True if all the cluster certificates exist. | +| `EtcdClusterHealthy` | This condition surfaces issues to the etcd cluster hosted on machines managed by this object, if any. It is computed as aggregation of Machine's `EtcdMemberHealthy` conditions plus additional checks validating potential issues to etcd quorum | +| `ControlPlaneComponentsHealthy` | This condition surfaces issues to Kubernetes control plane components hosted on machines managed by this object. It is computed as aggregation of Machine's `APIServerPodHealthy`, `ControllerManagerPodHealthy`, `SchedulerPodHealthy`, `EtcdPodHealthy` conditions plus additional checks on control plane machines and nodes | +| `MachinesReady` | This condition surfaces detail of issues on the controlled machines, if any. Please note this will include also `APIServerPodHealthy`, `ControllerManagerPodHealthy`, `SchedulerPodHealthy`, and if not using an external etcd also `EtcdPodHealthy`, `EtcdMemberHealthy` | +| `MachinesUpToDate` | This condition surfaces details of controlled machines not up to date, if any | +| `ScalingUp` | True if available replicas < desired replicas | +| `ScalingDown` | True if replicas > desired replicas | +| `Remediating` | This condition surfaces details about ongoing remediation of the controlled machines, if any | +| `Deleting` | If KubeadmControlPlane is deleted, this condition surfaces details about ongoing deletion of the controlled machines | +| `Paused` | True if this resource or the Cluster it belongs to are paused | > To better evaluate proposed changes, below you can find the list of current KubeadmControlPlane's conditions: > Ready, CertificatesAvailable, MachinesCreated, Available, MachinesSpecUpToDate, Resized, MachinesReady, @@ -1047,6 +1048,9 @@ Notes: - The KubeadmControlPlane controller is going to add `APIServerPodHealthy`, `ControllerManagerPodHealthy`, `SchedulerPodHealthy`, `EtcdPodHealthy`, `EtcdMemberHealthy`conditions to the controller machines. These conditions will also be defined as `readinessGates` for computing Machine's `Ready` condition. +- The KubeadmControlPlane controller is going to stop setting the `EtcdClusterHealthy` condition to true in case of external etcd. + This will allow tools managing the external etcd instance to use the `EtcdClusterHealthy` condition to report back status into + the KubeadmControlPlane if they want to. #### KubeadmControlPlane Print columns