diff --git a/controlplane/kubeadm/controllers/kubeadm_control_plane_controller.go b/controlplane/kubeadm/controllers/kubeadm_control_plane_controller.go index 569576d7be76..9c6ac13529d0 100644 --- a/controlplane/kubeadm/controllers/kubeadm_control_plane_controller.go +++ b/controlplane/kubeadm/controllers/kubeadm_control_plane_controller.go @@ -122,6 +122,9 @@ func (r *KubeadmControlPlaneReconciler) SetupWithManager(mgr ctrl.Manager, optio if r.remoteClientGetter == nil { r.remoteClientGetter = remote.NewClusterClient } + if r.managementCluster == nil { + r.managementCluster = &internal.ManagementCluster{Client: r.Client} + } return nil } @@ -307,36 +310,33 @@ func (r *KubeadmControlPlaneReconciler) updateStatus(ctx context.Context, kcp *c kcp.Status.UpdatedReplicas = int32(len(currentMachines)) replicas := int32(len(ownedMachines)) + + // set some basics in case the workload cluster is not yet available kcp.Status.Replicas = replicas - readyMachines := int32(0) + kcp.Status.UnavailableReplicas = replicas - remoteClient, err := r.remoteClientGetter(ctx, r.Client, util.ObjectKey(cluster), r.scheme) + workloadCluster, err := r.managementCluster.GetWorkloadCluster(ctx, util.ObjectKey(cluster)) + // Not being able to get the workload cluster is fine, but if this message persists there is a problem if err != nil { - if cause := errors.Cause(err); !apierrors.IsNotFound(cause) && !apierrors.IsTimeout(cause) { - return errors.Wrap(err, "failed to create remote cluster client") - } - } else { - for i := range ownedMachines { - node, err := getMachineNode(ctx, remoteClient, ownedMachines[i]) - if err != nil { - return errors.Wrap(err, "failed to get referenced Node") - } - if node == nil { - continue - } - if node.Spec.ProviderID != "" { - readyMachines++ - } - } + r.Log.Info("unable to get workload cluster", "err", fmt.Sprintf("%s", err)) + return nil + } + status, err := workloadCluster.ClusterStatus(ctx) + if err != nil { + return err } - kcp.Status.ReadyReplicas = readyMachines - kcp.Status.UnavailableReplicas = replicas - readyMachines + kcp.Status.ReadyReplicas = status.ReadyNodes + kcp.Status.UnavailableReplicas = status.Nodes - status.ReadyNodes - if !kcp.Status.Initialized { - if kcp.Status.ReadyReplicas > 0 { - kcp.Status.Initialized = true - } + // This only gets initialized once and does not change if the kubeadm config map goes away. + if status.HasKubeadmConfig { + kcp.Status.Initialized = true + } + + if kcp.Status.ReadyReplicas > 0 { + kcp.Status.Ready = true } + return nil } @@ -933,25 +933,3 @@ func (r *KubeadmControlPlaneReconciler) ClusterToKubeadmControlPlane(o handler.M return nil } - -func getMachineNode(ctx context.Context, crClient client.Client, machine *clusterv1.Machine) (*corev1.Node, error) { - nodeRef := machine.Status.NodeRef - if nodeRef == nil { - return nil, nil - } - - node := &corev1.Node{} - err := crClient.Get( - ctx, - types.NamespacedName{Name: nodeRef.Name}, - node, - ) - if err != nil { - if apierrors.IsNotFound(errors.Cause(err)) { - return nil, nil - } - return nil, err - } - - return node, nil -} diff --git a/controlplane/kubeadm/controllers/kubeadm_control_plane_controller_test.go b/controlplane/kubeadm/controllers/kubeadm_control_plane_controller_test.go index 4a6fae1d3bde..0d54dacf3ed3 100644 --- a/controlplane/kubeadm/controllers/kubeadm_control_plane_controller_test.go +++ b/controlplane/kubeadm/controllers/kubeadm_control_plane_controller_test.go @@ -444,7 +444,10 @@ func TestReconcileClusterNoEndpoints(t *testing.T) { Log: log.Log, remoteClientGetter: fakeremote.NewClusterClient, recorder: record.NewFakeRecorder(32), - managementCluster: &internal.ManagementCluster{Client: fakeClient}, + managementCluster: &fakeManagementCluster{ + ManagementCluster: &internal.ManagementCluster{Client: fakeClient}, + Cluster: &internal.Cluster{Client: fakeClient}, + }, } result, err := r.Reconcile(ctrl.Request{NamespacedName: types.NamespacedName{Name: kcp.Name, Namespace: kcp.Namespace}}) @@ -533,14 +536,16 @@ func TestReconcileInitializeControlPlane(t *testing.T) { log.SetLogger(klogr.New()) expectedLabels := map[string]string{clusterv1.ClusterLabelName: "foo"} - r := &KubeadmControlPlaneReconciler{ Client: fakeClient, Log: log.Log, remoteClientGetter: fakeremote.NewClusterClient, scheme: scheme.Scheme, recorder: record.NewFakeRecorder(32), - managementCluster: &internal.ManagementCluster{Client: fakeClient}, + managementCluster: &fakeManagementCluster{ + ManagementCluster: &internal.ManagementCluster{Client: fakeClient}, + Cluster: &internal.Cluster{Client: fakeClient}, + }, } result, err := r.Reconcile(ctrl.Request{NamespacedName: types.NamespacedName{Name: kcp.Name, Namespace: kcp.Namespace}}) @@ -693,60 +698,6 @@ func TestKubeadmControlPlaneReconciler_generateKubeadmConfig(t *testing.T) { g.Expect(bootstrapConfig.Spec).To(Equal(spec)) } -func Test_getMachineNodeNoNodeRef(t *testing.T) { - g := NewWithT(t) - - fakeClient := newFakeClient(g) - - m := &clusterv1.Machine{} - node, err := getMachineNode(context.Background(), fakeClient, m) - g.Expect(err).NotTo(HaveOccurred()) - g.Expect(node).To(BeNil()) -} - -func Test_getMachineNodeNotFound(t *testing.T) { - g := NewWithT(t) - - fakeClient := newFakeClient(g) - - m := &clusterv1.Machine{ - Status: clusterv1.MachineStatus{ - NodeRef: &corev1.ObjectReference{ - Kind: "Node", - APIVersion: corev1.SchemeGroupVersion.String(), - Name: "notfound", - }, - }, - } - node, err := getMachineNode(context.Background(), fakeClient, m) - g.Expect(err).NotTo(HaveOccurred()) - g.Expect(node).To(BeNil()) -} - -func Test_getMachineNodeFound(t *testing.T) { - g := NewWithT(t) - - node := &corev1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: "testNode", - }, - } - fakeClient := newFakeClient(g, node.DeepCopy()) - - m := &clusterv1.Machine{ - Status: clusterv1.MachineStatus{ - NodeRef: &corev1.ObjectReference{ - Kind: "Node", - APIVersion: corev1.SchemeGroupVersion.String(), - Name: "testNode", - }, - }, - } - node, err := getMachineNode(context.Background(), fakeClient, m) - g.Expect(err).NotTo(HaveOccurred()) - g.Expect(node).To(Equal(node)) -} - func TestKubeadmControlPlaneReconciler_updateStatusNoMachines(t *testing.T) { g := NewWithT(t) @@ -810,7 +761,8 @@ func createMachineNodePair(name string, cluster *clusterv1.Cluster, kcp *control node := &corev1.Node{ ObjectMeta: metav1.ObjectMeta{ - Name: name, + Name: name, + Labels: map[string]string{"node-role.kubernetes.io/master": ""}, }, } @@ -876,6 +828,15 @@ func TestKubeadmControlPlaneReconciler_updateStatusAllMachinesNotReady(t *testin g.Expect(kcp.Status.Ready).To(BeFalse()) } +func kubeadmConfigMap() *corev1.ConfigMap { + return &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: "kube-config", + Namespace: metav1.NamespaceSystem, + }, + } +} + func TestKubeadmControlPlaneReconciler_updateStatusAllMachinesReady(t *testing.T) { g := NewWithT(t) @@ -895,11 +856,13 @@ func TestKubeadmControlPlaneReconciler_updateStatusAllMachinesReady(t *testing.T kcp.Default() g.Expect(kcp.ValidateCreate()).To(Succeed()) - objs := []runtime.Object{cluster.DeepCopy(), kcp.DeepCopy()} + objs := []runtime.Object{cluster.DeepCopy(), kcp.DeepCopy(), kubeadmConfigMap()} + machines := map[string]*clusterv1.Machine{} for i := 0; i < 3; i++ { name := fmt.Sprintf("test-%d", i) m, n := createMachineNodePair(name, cluster, kcp, true) - objs = append(objs, m, n) + objs = append(objs, n) + machines[m.Name] = m } fakeClient := newFakeClient(g, objs...) @@ -910,8 +873,11 @@ func TestKubeadmControlPlaneReconciler_updateStatusAllMachinesReady(t *testing.T Log: log.Log, remoteClientGetter: fakeremote.NewClusterClient, scheme: scheme.Scheme, - managementCluster: &internal.ManagementCluster{Client: fakeClient}, - recorder: record.NewFakeRecorder(32), + managementCluster: &fakeManagementCluster{ + Machines: machines, + Cluster: &internal.Cluster{Client: fakeClient}, + }, + recorder: record.NewFakeRecorder(32), } g.Expect(r.updateStatus(context.Background(), kcp, cluster)).To(Succeed()) @@ -922,9 +888,7 @@ func TestKubeadmControlPlaneReconciler_updateStatusAllMachinesReady(t *testing.T g.Expect(kcp.Status.FailureMessage).To(BeNil()) g.Expect(kcp.Status.FailureReason).To(BeEquivalentTo("")) g.Expect(kcp.Status.Initialized).To(BeTrue()) - - // TODO: will need to be updated once we start handling Ready - g.Expect(kcp.Status.Ready).To(BeFalse()) + g.Expect(kcp.Status.Ready).To(BeTrue()) } func TestKubeadmControlPlaneReconciler_updateStatusMachinesReadyMixed(t *testing.T) { @@ -945,16 +909,17 @@ func TestKubeadmControlPlaneReconciler_updateStatusMachinesReadyMixed(t *testing } kcp.Default() g.Expect(kcp.ValidateCreate()).To(Succeed()) - + machines := map[string]*clusterv1.Machine{} objs := []runtime.Object{cluster.DeepCopy(), kcp.DeepCopy()} for i := 0; i < 4; i++ { name := fmt.Sprintf("test-%d", i) m, n := createMachineNodePair(name, cluster, kcp, false) - objs = append(objs, m, n) + machines[m.Name] = m + objs = append(objs, n) } m, n := createMachineNodePair("testReady", cluster, kcp, true) - objs = append(objs, m, n) - + objs = append(objs, n, kubeadmConfigMap()) + machines[m.Name] = m fakeClient := newFakeClient(g, objs...) log.SetLogger(klogr.New()) @@ -963,8 +928,11 @@ func TestKubeadmControlPlaneReconciler_updateStatusMachinesReadyMixed(t *testing Log: log.Log, remoteClientGetter: fakeremote.NewClusterClient, scheme: scheme.Scheme, - managementCluster: &internal.ManagementCluster{Client: fakeClient}, - recorder: record.NewFakeRecorder(32), + managementCluster: &fakeManagementCluster{ + Machines: machines, + Cluster: &internal.Cluster{Client: fakeClient}, + }, + recorder: record.NewFakeRecorder(32), } g.Expect(r.updateStatus(context.Background(), kcp, cluster)).To(Succeed()) @@ -975,9 +943,7 @@ func TestKubeadmControlPlaneReconciler_updateStatusMachinesReadyMixed(t *testing g.Expect(kcp.Status.FailureMessage).To(BeNil()) g.Expect(kcp.Status.FailureReason).To(BeEquivalentTo("")) g.Expect(kcp.Status.Initialized).To(BeTrue()) - - // TODO: will need to be updated once we start handling Ready - g.Expect(kcp.Status.Ready).To(BeFalse()) + g.Expect(kcp.Status.Ready).To(BeTrue()) } func TestCloneConfigsAndGenerateMachine(t *testing.T) { @@ -1222,16 +1188,21 @@ func TestKubeadmControlPlaneReconciler_reconcileDelete(t *testing.T) { } type fakeManagementCluster struct { + *internal.ManagementCluster ControlPlaneHealthy bool EtcdHealthy bool Machines internal.FilterableMachineCollection + Cluster *internal.Cluster } func (f *fakeManagementCluster) GetWorkloadCluster(_ context.Context, _ types.NamespacedName) (*internal.Cluster, error) { - return nil, nil + return f.Cluster, nil } -func (f *fakeManagementCluster) GetMachinesForCluster(_ context.Context, _ types.NamespacedName, _ ...internal.MachineFilter) (internal.FilterableMachineCollection, error) { +func (f *fakeManagementCluster) GetMachinesForCluster(c context.Context, n types.NamespacedName, filters ...internal.MachineFilter) (internal.FilterableMachineCollection, error) { + if f.Machines == nil { + return f.ManagementCluster.GetMachinesForCluster(c, n, filters...) + } return f.Machines, nil } diff --git a/controlplane/kubeadm/internal/workload_cluster.go b/controlplane/kubeadm/internal/workload_cluster.go index acbebaa1d200..3d9fa0d01bf9 100644 --- a/controlplane/kubeadm/internal/workload_cluster.go +++ b/controlplane/kubeadm/internal/workload_cluster.go @@ -37,6 +37,7 @@ import ( clusterv1 "sigs.k8s.io/cluster-api/api/v1alpha3" "sigs.k8s.io/cluster-api/controlplane/kubeadm/internal/etcd" etcdutil "sigs.k8s.io/cluster-api/controlplane/kubeadm/internal/etcd/util" + "sigs.k8s.io/cluster-api/util" "sigs.k8s.io/cluster-api/util/certs" ctrlclient "sigs.k8s.io/controller-runtime/pkg/client" ) @@ -324,6 +325,44 @@ func (c *Cluster) RemoveMachineFromKubeadmConfigMap(ctx context.Context, machine return nil } +// ClusterStatus holds stats information about the cluster. +type ClusterStatus struct { + // Nodes are a total count of nodes + Nodes int32 + // ReadyNodes are the count of nodes that are reporting ready + ReadyNodes int32 + // HasKubeadmConfig will be true if the kubeadm config map has been uploaded, false otherwise. + HasKubeadmConfig bool +} + +// ClusterStatus returns the status of the cluster. +func (c *Cluster) ClusterStatus(ctx context.Context) (ClusterStatus, error) { + status := ClusterStatus{} + + // count the control plane nodes + nodes, err := c.getControlPlaneNodes(ctx) + if err != nil { + return status, err + } + + for _, node := range nodes.Items { + nodeCopy := node + status.Nodes++ + if util.IsNodeReady(&nodeCopy) { + status.ReadyNodes++ + } + } + + // find the kubeadm conifg + kubeadmConfigKey := ctrlclient.ObjectKey{ + Name: "kube-config", + Namespace: metav1.NamespaceSystem, + } + err = c.Client.Get(ctx, kubeadmConfigKey, &corev1.ConfigMap{}) + status.HasKubeadmConfig = err == nil + return status, nil +} + func generateClientCert(caCertEncoded, caKeyEncoded []byte) (tls.Certificate, error) { privKey, err := certs.NewPrivateKey() if err != nil {