From 4c4f45ca9409794f563e2f3a763645c8bc6f78a6 Mon Sep 17 00:00:00 2001 From: Chuck Ha Date: Fri, 28 Feb 2020 11:51:55 -0500 Subject: [PATCH] Refactor workload cluster out of cluster Signed-off-by: Chuck Ha --- .../kubeadm_control_plane_controller.go | 33 +- .../kubeadm_control_plane_controller_test.go | 265 ++---------- controlplane/kubeadm/internal/cluster.go | 401 +----------------- controlplane/kubeadm/internal/cluster_test.go | 4 +- .../kubeadm/internal/workload_cluster.go | 393 +++++++++++++++++ 5 files changed, 452 insertions(+), 644 deletions(-) create mode 100644 controlplane/kubeadm/internal/workload_cluster.go diff --git a/controlplane/kubeadm/controllers/kubeadm_control_plane_controller.go b/controlplane/kubeadm/controllers/kubeadm_control_plane_controller.go index 3140838c5137..8d011b8fa438 100644 --- a/controlplane/kubeadm/controllers/kubeadm_control_plane_controller.go +++ b/controlplane/kubeadm/controllers/kubeadm_control_plane_controller.go @@ -73,12 +73,9 @@ const ( type managementCluster interface { GetMachinesForCluster(ctx context.Context, cluster types.NamespacedName, filters ...internal.MachineFilter) (internal.FilterableMachineCollection, error) - TargetClusterControlPlaneIsHealthy(ctx context.Context, clusterKey types.NamespacedName, controlPlaneName string) error + GetWorkloadCluster(ctx context.Context, cluster types.NamespacedName) (*internal.Cluster, error) TargetClusterEtcdIsHealthy(ctx context.Context, clusterKey types.NamespacedName, controlPlaneName string) error - RemoveEtcdMemberForMachine(ctx context.Context, clusterKey types.NamespacedName, machine *clusterv1.Machine) error - RemoveMachineFromKubeadmConfigMap(ctx context.Context, clusterKey types.NamespacedName, machine *clusterv1.Machine) error - UpdateKubernetesVersionInKubeadmConfigMap(ctx context.Context, clusterKey types.NamespacedName, version string) error - UpdateKubeletConfigMap(ctx context.Context, clusterKey types.NamespacedName, version semver.Version) error + TargetClusterControlPlaneIsHealthy(ctx context.Context, clusterKey types.NamespacedName, controlPlaneName string) error } // +kubebuilder:rbac:groups=core,resources=events,verbs=get;list;watch;create;patch @@ -348,8 +345,14 @@ func (r *KubeadmControlPlaneReconciler) upgradeControlPlane(ctx context.Context, // TODO: handle reconciliation of etcd members and kubeadm config in case they get out of sync with cluster + workloadCluster, err := r.managementCluster.GetWorkloadCluster(ctx, util.ObjectKey(cluster)) + if err != nil { + logger.Error(err, "failed to get remote client for workload cluster", "cluster key", util.ObjectKey(cluster)) + return ctrl.Result{}, err + } + // Reconcile the remote cluster's configuration necessary for upgrade - if err := r.reconcileConfiguration(ctx, kcp.Spec.Version, util.ObjectKey(cluster)); err != nil { + if err := r.reconcileConfiguration(ctx, kcp.Spec.Version, util.ObjectKey(cluster), workloadCluster); err != nil { logger.Error(err, "failed reconcile remote cluster configuration for upgrade") return ctrl.Result{}, err } @@ -384,7 +387,7 @@ func (r *KubeadmControlPlaneReconciler) upgradeControlPlane(ctx context.Context, } // reconcileConfiguration will update the remote cluster's system configurations in preparation for an upgrade. -func (r *KubeadmControlPlaneReconciler) reconcileConfiguration(ctx context.Context, version string, clusterKey client.ObjectKey) error { +func (r *KubeadmControlPlaneReconciler) reconcileConfiguration(ctx context.Context, version string, clusterKey client.ObjectKey, workloadCluster *internal.Cluster) error { parsedVersion, err := semver.ParseTolerant(version) if err != nil { return errors.Wrapf(err, "failed to parse kubernetes version %q", version) @@ -404,11 +407,11 @@ func (r *KubeadmControlPlaneReconciler) reconcileConfiguration(ctx context.Conte return errors.Wrap(err, "failed to reconcile the remote kubelet RBAC binding") } - if err := r.managementCluster.UpdateKubernetesVersionInKubeadmConfigMap(ctx, clusterKey, version); err != nil { + if err := workloadCluster.UpdateKubernetesVersionInKubeadmConfigMap(ctx, version); err != nil { return errors.Wrap(err, "failed to update the kubernetes version in the kubeadm config map") } - if err := r.managementCluster.UpdateKubeletConfigMap(ctx, clusterKey, parsedVersion); err != nil { + if err := workloadCluster.UpdateKubeletConfigMap(ctx, parsedVersion); err != nil { return errors.Wrap(err, "failed to upgrade kubelet config map") } @@ -570,6 +573,13 @@ func (r *KubeadmControlPlaneReconciler) scaleUpControlPlane(ctx context.Context, func (r *KubeadmControlPlaneReconciler) scaleDownControlPlane(ctx context.Context, cluster *clusterv1.Cluster, kcp *controlplanev1.KubeadmControlPlane, machines internal.FilterableMachineCollection) (ctrl.Result, error) { logger := r.Log.WithValues("namespace", kcp.Namespace, "kubeadmControlPlane", kcp.Name, "cluster", cluster.Name) + + workloadCluster, err := r.managementCluster.GetWorkloadCluster(ctx, util.ObjectKey(cluster)) + if err != nil { + logger.Error(err, "failed to create client to workload cluster") + return ctrl.Result{}, errors.New("failed to create client to workload cluster") + } + // We don't want to health check at the beginning of this method to avoid blocking re-entrancy // Wait for any delete in progress to complete before deleting another Machine @@ -604,9 +614,8 @@ func (r *KubeadmControlPlaneReconciler) scaleDownControlPlane(ctx context.Contex logger.Error(err, "waiting for control plane to pass etcd health check before adding removing a control plane machine") r.recorder.Eventf(kcp, corev1.EventTypeWarning, "ControlPlaneUnhealthy", "Waiting for control plane to pass etcd health check before removing a control plane machine: %v", err) return ctrl.Result{}, &capierrors.RequeueAfterError{RequeueAfter: HealthCheckFailedRequeueAfter} - } - if err := r.managementCluster.RemoveEtcdMemberForMachine(ctx, util.ObjectKey(cluster), machineToDelete); err != nil { + if err := workloadCluster.RemoveEtcdMemberForMachine(ctx, machineToDelete); err != nil { logger.Error(err, "failed to remove etcd member for machine") return ctrl.Result{}, err } @@ -622,7 +631,7 @@ func (r *KubeadmControlPlaneReconciler) scaleDownControlPlane(ctx context.Contex return ctrl.Result{}, &capierrors.RequeueAfterError{RequeueAfter: HealthCheckFailedRequeueAfter} } - if err := r.managementCluster.RemoveMachineFromKubeadmConfigMap(ctx, util.ObjectKey(cluster), machineToDelete); err != nil { + if err := workloadCluster.RemoveMachineFromKubeadmConfigMap(ctx, machineToDelete); err != nil { logger.Error(err, "failed to remove machine from kubeadm ConfigMap") return ctrl.Result{}, err } diff --git a/controlplane/kubeadm/controllers/kubeadm_control_plane_controller_test.go b/controlplane/kubeadm/controllers/kubeadm_control_plane_controller_test.go index cc6262837e29..653aea8fff56 100644 --- a/controlplane/kubeadm/controllers/kubeadm_control_plane_controller_test.go +++ b/controlplane/kubeadm/controllers/kubeadm_control_plane_controller_test.go @@ -22,7 +22,6 @@ import ( "testing" "time" - "github.com/blang/semver" . "github.com/onsi/gomega" "github.com/pkg/errors" corev1 "k8s.io/api/core/v1" @@ -1226,6 +1225,10 @@ type fakeManagementCluster struct { Machines internal.FilterableMachineCollection } +func (f *fakeManagementCluster) GetWorkloadCluster(_ context.Context, _ types.NamespacedName) (*internal.Cluster, error) { + return nil, nil +} + func (f *fakeManagementCluster) GetMachinesForCluster(_ context.Context, _ types.NamespacedName, _ ...internal.MachineFilter) (internal.FilterableMachineCollection, error) { return f.Machines, nil } @@ -1244,22 +1247,6 @@ func (f *fakeManagementCluster) TargetClusterEtcdIsHealthy(_ context.Context, _ return nil } -func (f *fakeManagementCluster) RemoveEtcdMemberForMachine(_ context.Context, _ types.NamespacedName, _ *clusterv1.Machine) error { - return nil -} - -func (f *fakeManagementCluster) RemoveMachineFromKubeadmConfigMap(_ context.Context, _ types.NamespacedName, _ *clusterv1.Machine) error { - return nil -} - -func (f *fakeManagementCluster) UpdateKubernetesVersionInKubeadmConfigMap(_ context.Context, _ types.NamespacedName, _ string) error { - return nil -} - -func (f *fakeManagementCluster) UpdateKubeletConfigMap(_ context.Context, _ types.NamespacedName, _ semver.Version) error { - return nil -} - func TestKubeadmControlPlaneReconciler_scaleUpControlPlane(t *testing.T) { t.Run("creates a control plane Machine if health checks pass", func(t *testing.T) { g := NewWithT(t) @@ -1357,234 +1344,34 @@ func TestKubeadmControlPlaneReconciler_scaleUpControlPlane(t *testing.T) { }) } -func TestKubeadmControlPlaneReconciler_scaleDownControlPlane(t *testing.T) { - t.Run("deletes a control plane Machine if health checks pass", func(t *testing.T) { - g := NewWithT(t) +func TestKubeadmControlPlaneReconciler_scaleDownControlPlane_NoError(t *testing.T) { + g := NewWithT(t) - cluster, kcp, genericMachineTemplate := createClusterWithControlPlane() - initObjs := []runtime.Object{cluster.DeepCopy(), kcp.DeepCopy(), genericMachineTemplate.DeepCopy()} + machines := map[string]*clusterv1.Machine{ + "one": machine("one"), + } - fmc := &fakeManagementCluster{ - Machines: internal.NewFilterableMachineCollection(), - ControlPlaneHealthy: true, + r := &KubeadmControlPlaneReconciler{ + Log: log.Log, + recorder: record.NewFakeRecorder(32), + Client: newFakeClient(g, machines["one"]), + managementCluster: &fakeManagementCluster{ EtcdHealthy: true, - } - - for i := 0; i < 2; i++ { - m, _ := createMachineNodePair(fmt.Sprintf("test-%d", i), cluster, kcp, true) - fmc.Machines = fmc.Machines.Insert(m) - initObjs = append(initObjs, m.DeepCopy()) - } - - fakeClient := newFakeClient(g, initObjs...) - - r := &KubeadmControlPlaneReconciler{ - Client: fakeClient, - managementCluster: fmc, - Log: log.Log, - recorder: record.NewFakeRecorder(32), - } - - fmc.ControlPlaneHealthy = true - fmc.EtcdHealthy = true - result, err := r.scaleDownControlPlane(context.Background(), &clusterv1.Cluster{}, &controlplanev1.KubeadmControlPlane{}, fmc.Machines.DeepCopy()) - g.Expect(err).ToNot(HaveOccurred()) - g.Expect(result).To(Equal(ctrl.Result{Requeue: true})) - - controlPlaneMachines := clusterv1.MachineList{} - g.Expect(fakeClient.List(context.Background(), &controlPlaneMachines)).To(Succeed()) - g.Expect(controlPlaneMachines.Items).To(HaveLen(1)) - }) - t.Run("does not delete a control plane Machine if health checks fail", func(t *testing.T) { - cluster, kcp, genericMachineTemplate := createClusterWithControlPlane() - initObjs := []runtime.Object{cluster.DeepCopy(), kcp.DeepCopy(), genericMachineTemplate.DeepCopy()} - - beforeMachines := internal.NewFilterableMachineCollection() - for i := 0; i < 3; i++ { - m, _ := createMachineNodePair(fmt.Sprintf("test-%d", i), cluster.DeepCopy(), kcp.DeepCopy(), true) - beforeMachines = beforeMachines.Insert(m) - initObjs = append(initObjs, m.DeepCopy()) - } - - testCases := []struct { - name string - etcdUnHealthy bool - controlPlaneUnHealthy bool - selectedAnnotationKeys []string - }{ - { - name: "etcd health check fails", - etcdUnHealthy: true, - selectedAnnotationKeys: []string{ - controlplanev1.DeleteForScaleDownAnnotation, - }, - }, - { - name: "controlplane component health check fails", - controlPlaneUnHealthy: true, - selectedAnnotationKeys: []string{ - controlplanev1.DeleteForScaleDownAnnotation, - controlplanev1.ScaleDownEtcdMemberRemovedAnnotation, - }, - }, - { - name: "both health check fails", - etcdUnHealthy: true, - controlPlaneUnHealthy: true, - selectedAnnotationKeys: []string{ - controlplanev1.DeleteForScaleDownAnnotation, - }, - }, - } - for _, tc := range testCases { - g := NewWithT(t) - - fakeClient := newFakeClient(g, initObjs...) - fmc := &fakeManagementCluster{ - Machines: beforeMachines.DeepCopy(), - ControlPlaneHealthy: !tc.controlPlaneUnHealthy, - EtcdHealthy: !tc.etcdUnHealthy, - } - - r := &KubeadmControlPlaneReconciler{ - Client: fakeClient, - managementCluster: fmc, - Log: log.Log, - recorder: record.NewFakeRecorder(32), - } - - ownedMachines := fmc.Machines.DeepCopy() - _, err := r.scaleDownControlPlane(context.Background(), cluster.DeepCopy(), kcp.DeepCopy(), ownedMachines) - g.Expect(err).To(HaveOccurred()) - g.Expect(err).To(MatchError(&capierrors.RequeueAfterError{RequeueAfter: HealthCheckFailedRequeueAfter})) - - controlPlaneMachines := &clusterv1.MachineList{} - g.Expect(fakeClient.List(context.Background(), controlPlaneMachines)).To(Succeed()) - g.Expect(controlPlaneMachines.Items).To(HaveLen(len(beforeMachines))) - - // We expect that a machine has been marked for deletion, since that isn't blocked by health checks - endMachines := internal.NewFilterableMachineCollectionFromMachineList(controlPlaneMachines) - selected := endMachines.Filter(internal.HasAnnotationKey(controlplanev1.DeleteForScaleDownAnnotation)) - g.Expect(selected).To(HaveLen(1)) - for _, m := range selected { - cm := m.DeepCopy() - - g.Expect(m.Annotations).To(HaveLen(len(tc.selectedAnnotationKeys))) - for _, key := range tc.selectedAnnotationKeys { - g.Expect(m.Annotations).To(HaveKey(key)) - } - - // Remove the annotations and resource version to compare against the before copy - cm.Annotations = nil - cm.ResourceVersion = "" - bm, ok := beforeMachines[cm.Name] - g.Expect(ok).To(BeTrue()) - g.Expect(cm).To(Equal(bm)) - } + ControlPlaneHealthy: true, + }, + } - // Ensure the non-selected machine match the before machines exactly - notSelected := endMachines.Filter(internal.Not(internal.HasAnnotationKey(controlplanev1.DeleteForScaleDownAnnotation))) - for _, m := range notSelected { - bm, ok := beforeMachines[m.Name] - g.Expect(ok).To(BeTrue()) - g.Expect(m).To(Equal(bm)) - } - } - }) + _, err := r.scaleDownControlPlane(context.Background(), &clusterv1.Cluster{}, &controlplanev1.KubeadmControlPlane{}, machines) + g.Expect(err).ToNot(HaveOccurred()) } -func TestKubeadmControlPlaneReconciler_upgradeControlPlane(t *testing.T) { - t.Run("upgrades control plane Machines if health checks pass", func(t *testing.T) { - // TODO: add tests for positive condition - }) - t.Run("does not upgrade a control plane Machine if health checks fail", func(t *testing.T) { - cluster, kcp, genericMachineTemplate := createClusterWithControlPlane() - kubeletConfigMap := &corev1.ConfigMap{ - ObjectMeta: metav1.ObjectMeta{ - Name: "kubelet-config-1.16", - Namespace: metav1.NamespaceSystem, - }, - } - initObjs := []runtime.Object{cluster.DeepCopy(), kcp.DeepCopy(), genericMachineTemplate.DeepCopy(), kubeletConfigMap.DeepCopy()} - beforeMachines := internal.NewFilterableMachineCollection() - for i := 0; i < 3; i++ { - m, _ := createMachineNodePair(fmt.Sprintf("test-%d", i), cluster.DeepCopy(), kcp.DeepCopy(), true) - beforeMachines = beforeMachines.Insert(m) - initObjs = append(initObjs, m.DeepCopy()) - } - - // mutate the kcp resource - kcp.Spec.Version = "v1.16.9" - - testCases := []struct { - name string - etcdUnHealthy bool - controlPlaneUnHealthy bool - }{ - { - name: "etcd health check fails", - etcdUnHealthy: true, - }, - { - name: "controlplane component health check fails", - controlPlaneUnHealthy: true, - }, - } - for _, tc := range testCases { - g := NewWithT(t) - - fakeClient := newFakeClient(g, initObjs...) - fmc := &fakeManagementCluster{ - Machines: beforeMachines.DeepCopy(), - ControlPlaneHealthy: !tc.controlPlaneUnHealthy, - EtcdHealthy: !tc.etcdUnHealthy, - } - - r := &KubeadmControlPlaneReconciler{ - Client: fakeClient, - managementCluster: fmc, - Log: log.Log, - recorder: record.NewFakeRecorder(32), - remoteClientGetter: fakeremote.NewClusterClient, - } - - ownedMachines := fmc.Machines.DeepCopy() - requireUpgrade := fmc.Machines.Filter(internal.OwnedControlPlaneMachines(kcp.Name)).DeepCopy() - _, err := r.upgradeControlPlane(context.Background(), cluster.DeepCopy(), kcp.DeepCopy(), ownedMachines, requireUpgrade) - g.Expect(err).To(HaveOccurred()) - g.Expect(err).To(MatchError(&capierrors.RequeueAfterError{RequeueAfter: HealthCheckFailedRequeueAfter})) - - controlPlaneMachines := &clusterv1.MachineList{} - g.Expect(fakeClient.List(context.Background(), controlPlaneMachines)).To(Succeed()) - g.Expect(controlPlaneMachines.Items).To(HaveLen(len(beforeMachines))) - - // We expect that a machine has been selected for upgrade, since that isn't blocked by health checks - endMachines := internal.NewFilterableMachineCollectionFromMachineList(controlPlaneMachines) - selected := endMachines.Filter(internal.HasAnnotationKey(controlplanev1.SelectedForUpgradeAnnotation)) - g.Expect(selected).To(HaveLen(1)) - for _, m := range selected { - g.Expect(m.Annotations).To(HaveLen(1)) - cm := m.DeepCopy() - cm.Annotations = nil - cm.ResourceVersion = "" - bm, ok := beforeMachines[cm.Name] - g.Expect(ok).To(BeTrue()) - g.Expect(cm).To(Equal(bm)) - } - - // We expect that a replacement is not created, since that is blocked by health checks - replacementCreated := endMachines.Filter(internal.HasAnnotationKey(controlplanev1.UpgradeReplacementCreatedAnnotation)) - g.Expect(replacementCreated).To(BeEmpty()) - - // Ensure the non-selected machine match the before machines exactly - notSelected := endMachines.Filter(internal.Not(internal.HasAnnotationKey(controlplanev1.SelectedForUpgradeAnnotation))) - for _, m := range notSelected { - bm, ok := beforeMachines[m.Name] - g.Expect(ok).To(BeTrue()) - g.Expect(m).To(Equal(bm)) - } - } - }) +func machine(name string) *clusterv1.Machine { + return &clusterv1.Machine{ + ObjectMeta: metav1.ObjectMeta{ + Namespace: "default", + Name: name, + }, + } } func TestKubeadmControlPlaneReconciler_failureDomainForScaleDown(t *testing.T) { diff --git a/controlplane/kubeadm/internal/cluster.go b/controlplane/kubeadm/internal/cluster.go index 80c010b8213a..30224b2d6cae 100644 --- a/controlplane/kubeadm/internal/cluster.go +++ b/controlplane/kubeadm/internal/cluster.go @@ -18,35 +18,24 @@ package internal import ( "context" - "crypto/rand" - "crypto/rsa" "crypto/tls" "crypto/x509" - "crypto/x509/pkix" "fmt" - "math/big" - "time" - "github.com/blang/semver" "github.com/pkg/errors" corev1 "k8s.io/api/core/v1" - apierrors "k8s.io/apimachinery/pkg/api/errors" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" kerrors "k8s.io/apimachinery/pkg/util/errors" "k8s.io/client-go/kubernetes/scheme" - "sigs.k8s.io/cluster-api/controlplane/kubeadm/internal/etcd" "sigs.k8s.io/controller-runtime/pkg/client" ctrlclient "sigs.k8s.io/controller-runtime/pkg/client" clusterv1 "sigs.k8s.io/cluster-api/api/v1alpha3" "sigs.k8s.io/cluster-api/controllers/remote" - etcdutil "sigs.k8s.io/cluster-api/controlplane/kubeadm/internal/etcd/util" - "sigs.k8s.io/cluster-api/util/certs" "sigs.k8s.io/cluster-api/util/secret" ) -// ManagementCluster holds operations on the ManagementCluster +// ManagementCluster holds operations on the ManagementCluster. type ManagementCluster struct { Client ctrlclient.Client } @@ -66,11 +55,11 @@ func (m *ManagementCluster) GetMachinesForCluster(ctx context.Context, cluster t return machines.Filter(filters...), nil } -// getCluster builds a cluster object. -// The cluster comes with an etcd client generator to connect to any etcd pod living on a managed machine. -func (m *ManagementCluster) getCluster(ctx context.Context, clusterKey types.NamespacedName) (*cluster, error) { +// GetWorkloadCluster builds a cluster object. +// The cluster comes with an etcd Client generator to connect to any etcd pod living on a managed machine. +func (m *ManagementCluster) GetWorkloadCluster(ctx context.Context, clusterKey types.NamespacedName) (*Cluster, error) { // TODO(chuckha): Unroll remote.NewClusterClient if we are unhappy with getting a restConfig twice. - // TODO(chuckha): Inject this dependency if necessary. + // TODO(chuckha): Inject this dependency. restConfig, err := remote.RESTConfig(ctx, m.Client, clusterKey) if err != nil { return nil, err @@ -95,9 +84,9 @@ func (m *ManagementCluster) getCluster(ctx context.Context, clusterKey types.Nam Certificates: []tls.Certificate{clientCert}, } - return &cluster{ - client: c, - etcdClientGenerator: &etcdClientGenerator{ + return &Cluster{ + Client: c, + EtcdClientGenerator: &etcdClientGenerator{ restConfig: restConfig, tlsConfig: cfg, }, @@ -169,7 +158,7 @@ func (m *ManagementCluster) healthCheck(ctx context.Context, check healthCheck, // TargetClusterControlPlaneIsHealthy checks every node for control plane health. func (m *ManagementCluster) TargetClusterControlPlaneIsHealthy(ctx context.Context, clusterKey types.NamespacedName, controlPlaneName string) error { // TODO: add checks for expected taints/labels - cluster, err := m.getCluster(ctx, clusterKey) + cluster, err := m.GetWorkloadCluster(ctx, clusterKey) if err != nil { return err } @@ -179,379 +168,9 @@ func (m *ManagementCluster) TargetClusterControlPlaneIsHealthy(ctx context.Conte // TargetClusterEtcdIsHealthy runs a series of checks over a target cluster's etcd cluster. // In addition, it verifies that there are the same number of etcd members as control plane Machines. func (m *ManagementCluster) TargetClusterEtcdIsHealthy(ctx context.Context, clusterKey types.NamespacedName, controlPlaneName string) error { - cluster, err := m.getCluster(ctx, clusterKey) + cluster, err := m.GetWorkloadCluster(ctx, clusterKey) if err != nil { return err } return m.healthCheck(ctx, cluster.etcdIsHealthy, clusterKey, controlPlaneName) } - -// RemoveMachineFromKubeadmConfigMap removes the entry for the machine from the kubeadm configmap. -func (m *ManagementCluster) RemoveMachineFromKubeadmConfigMap(ctx context.Context, clusterKey types.NamespacedName, machine *clusterv1.Machine) error { - if machine == nil || machine.Status.NodeRef == nil { - // Nothing to do, no node for Machine - return nil - } - c, err := m.getCluster(ctx, clusterKey) - if err != nil { - return err - } - - configMapKey := types.NamespacedName{Name: "kubeadm-config", Namespace: metav1.NamespaceSystem} - kubeadmConfigMap, err := c.getConfigMap(ctx, configMapKey) - if err != nil { - return err - } - config := &kubeadmConfig{ConfigMap: kubeadmConfigMap} - if err := config.RemoveAPIEndpoint(machine.Status.NodeRef.Name); err != nil { - return err - } - if err := c.client.Update(ctx, config.ConfigMap); err != nil { - return errors.Wrap(err, "error updating kubeadm ConfigMap") - } - return nil -} - -// UpdateKubernetesVersionInKubeadmConfigMap updates the kubernetes version in the kubeadm config map. -func (m *ManagementCluster) UpdateKubernetesVersionInKubeadmConfigMap(ctx context.Context, clusterKey types.NamespacedName, version string) error { - c, err := m.getCluster(ctx, clusterKey) - if err != nil { - return err - } - - configMapKey := types.NamespacedName{Name: "kubeadm-config", Namespace: metav1.NamespaceSystem} - kubeadmConfigMap, err := c.getConfigMap(ctx, configMapKey) - if err != nil { - return err - } - config := &kubeadmConfig{ConfigMap: kubeadmConfigMap} - if err := config.UpdateKubernetesVersion(version); err != nil { - return err - } - if err := c.client.Update(ctx, config.ConfigMap); err != nil { - return errors.Wrap(err, "error updating kubeadm ConfigMap") - } - return nil -} - -// UpdateKubeletConfigMap will create a new kubelet-config-1.x config map for a new version of the kubelet. -// This is a necessary process for upgrades. -func (m *ManagementCluster) UpdateKubeletConfigMap(ctx context.Context, clusterKey types.NamespacedName, version semver.Version) error { - c, err := m.getCluster(ctx, clusterKey) - if err != nil { - return err - } - - // Check if the desired configmap already exists - desiredKubeletConfigMapName := fmt.Sprintf("kubelet-config-%d.%d", version.Major, version.Minor) - configMapKey := types.NamespacedName{Name: desiredKubeletConfigMapName, Namespace: metav1.NamespaceSystem} - _, err = c.getConfigMap(ctx, configMapKey) - if err == nil { - // Nothing to do, the configmap already exists - return nil - } - if !apierrors.IsNotFound(errors.Cause(err)) { - return errors.Wrapf(err, "error determining if kubelet configmap %s exists", desiredKubeletConfigMapName) - } - - previousMinorVersionKubeletConfigMapName := fmt.Sprintf("kubelet-config-%d.%d", version.Major, version.Minor-1) - configMapKey = types.NamespacedName{Name: previousMinorVersionKubeletConfigMapName, Namespace: metav1.NamespaceSystem} - // Returns a copy - cm, err := c.getConfigMap(ctx, configMapKey) - if apierrors.IsNotFound(errors.Cause(err)) { - return errors.Errorf("unable to find kubelet configmap %s", previousMinorVersionKubeletConfigMapName) - } - if err != nil { - return err - } - - // Update the name to the new name - cm.Name = desiredKubeletConfigMapName - // Clear the resource version. Is this necessary since this cm is actually a DeepCopy()? - cm.ResourceVersion = "" - - if err := c.client.Create(ctx, cm); err != nil && !apierrors.IsAlreadyExists(err) { - return errors.Wrapf(err, "error creating configmap %s", desiredKubeletConfigMapName) - } - return nil -} - -// RemoveEtcdMemberForMachine removes the etcd member from the target cluster's etcd cluster. -func (m *ManagementCluster) RemoveEtcdMemberForMachine(ctx context.Context, clusterKey types.NamespacedName, machine *clusterv1.Machine) error { - if machine == nil || machine.Status.NodeRef == nil { - // Nothing to do, no node for Machine - return nil - } - - cluster, err := m.getCluster(ctx, clusterKey) - if err != nil { - return err - } - - controlPlaneNodes, err := cluster.getControlPlaneNodes(ctx) - if err != nil { - return err - } - - nodeToRemove := machine.Status.NodeRef.Name - errs := []error{} - - // Try all node other than nodeToRemove for proxying etcd client. - // and returns the first successful response. - for _, node := range controlPlaneNodes.Items { - nodeForEtcdClient := node.Name - if nodeForEtcdClient == nodeToRemove { - continue - } - - err = cluster.removeMemberForNode(ctx, nodeForEtcdClient, nodeToRemove) - if err == nil { - return nil - } - - errs = append(errs, err) - } - - return kerrors.NewAggregate(errs) -} - -type etcdClientFor interface { - forNode(name string) (*etcd.Client, error) -} - -// cluster are operations on target clusters. -type cluster struct { - client ctrlclient.Client - etcdClientGenerator etcdClientFor -} - -func (c *cluster) getControlPlaneNodes(ctx context.Context) (*corev1.NodeList, error) { - nodes := &corev1.NodeList{} - labels := map[string]string{ - "node-role.kubernetes.io/master": "", - } - - if err := c.client.List(ctx, nodes, client.MatchingLabels(labels)); err != nil { - return nil, err - } - return nodes, nil -} - -func (c *cluster) getConfigMap(ctx context.Context, configMap types.NamespacedName) (*corev1.ConfigMap, error) { - original := &corev1.ConfigMap{} - if err := c.client.Get(ctx, configMap, original); err != nil { - return nil, errors.Wrapf(err, "error getting %s/%s configmap from target cluster", configMap.Namespace, configMap.Name) - } - return original.DeepCopy(), nil -} - -// healthCheckResult maps nodes that are checked to any errors the node has related to the check. -type healthCheckResult map[string]error - -// controlPlaneIsHealthy does a best effort check of the control plane components the kubeadm control plane cares about. -// The return map is a map of node names as keys to error that that node encountered. -// All nodes will exist in the map with nil errors if there were no errors for that node. -func (c *cluster) controlPlaneIsHealthy(ctx context.Context) (healthCheckResult, error) { - controlPlaneNodes, err := c.getControlPlaneNodes(ctx) - if err != nil { - return nil, err - } - - response := make(map[string]error) - for _, node := range controlPlaneNodes.Items { - name := node.Name - response[name] = nil - apiServerPodKey := types.NamespacedName{ - Namespace: metav1.NamespaceSystem, - Name: staticPodName("kube-apiserver", name), - } - apiServerPod := &corev1.Pod{} - if err := c.client.Get(ctx, apiServerPodKey, apiServerPod); err != nil { - response[name] = err - continue - } - response[name] = checkStaticPodReadyCondition(apiServerPod) - - controllerManagerPodKey := types.NamespacedName{ - Namespace: metav1.NamespaceSystem, - Name: staticPodName("kube-controller-manager", name), - } - controllerManagerPod := &corev1.Pod{} - if err := c.client.Get(ctx, controllerManagerPodKey, controllerManagerPod); err != nil { - response[name] = err - continue - } - response[name] = checkStaticPodReadyCondition(controllerManagerPod) - } - - return response, nil -} - -// removeMemberForNode removes etcd member (nodeToRemove) through another (nodeForEtcdClient). -// It's create etcd connection using nodeForEtcdClient to removing nodeToRemove. -func (c *cluster) removeMemberForNode(ctx context.Context, nodeForEtcdClient, nodeToRemove string) error { - etcdClient, err := c.etcdClientGenerator.forNode(nodeForEtcdClient) - if err != nil { - return errors.Wrap(err, "failed to create etcd client") - } - - // List etcd members. This checks that the member is healthy, because the request goes through consensus. - members, err := etcdClient.Members(ctx) - if err != nil { - return errors.Wrap(err, "failed to list etcd members using etcd client") - } - member := etcdutil.MemberForName(members, nodeToRemove) - - // The member has already been removed, return immediately - if member == nil { - return nil - } - - if err := etcdClient.RemoveMember(ctx, member.ID); err != nil { - return errors.Wrap(err, "failed to remove member from etcd") - } - - return nil -} - -// etcdIsHealthy runs checks for every etcd member in the cluster to satisfy our definition of healthy. -// This is a best effort check and nodes can become unhealthy after the check is complete. It is not a guarantee. -// It's used a signal for if we should allow a target cluster to scale up, scale down or upgrade. -// It returns a map of nodes checked along with an error for a given node. -func (c *cluster) etcdIsHealthy(ctx context.Context) (healthCheckResult, error) { - var knownClusterID uint64 - var knownMemberIDSet etcdutil.UInt64Set - - controlPlaneNodes, err := c.getControlPlaneNodes(ctx) - if err != nil { - return nil, err - } - - response := make(map[string]error) - for _, node := range controlPlaneNodes.Items { - name := node.Name - response[name] = nil - if node.Spec.ProviderID == "" { - response[name] = errors.New("empty provider ID") - continue - } - - // Create the etcd client for the etcd Pod scheduled on the Node - etcdClient, err := c.etcdClientGenerator.forNode(name) - if err != nil { - response[name] = errors.Wrap(err, "failed to create etcd client") - continue - } - - // List etcd members. This checks that the member is healthy, because the request goes through consensus. - members, err := etcdClient.Members(ctx) - if err != nil { - response[name] = errors.Wrap(err, "failed to list etcd members using etcd client") - continue - } - member := etcdutil.MemberForName(members, name) - - // Check that the member reports no alarms. - if len(member.Alarms) > 0 { - response[name] = errors.Errorf("etcd member reports alarms: %v", member.Alarms) - continue - } - - // Check that the member belongs to the same cluster as all other members. - clusterID := member.ClusterID - if knownClusterID == 0 { - knownClusterID = clusterID - } else if knownClusterID != clusterID { - response[name] = errors.Errorf("etcd member has cluster ID %d, but all previously seen etcd members have cluster ID %d", clusterID, knownClusterID) - continue - } - - // Check that the member list is stable. - memberIDSet := etcdutil.MemberIDSet(members) - if knownMemberIDSet.Len() == 0 { - knownMemberIDSet = memberIDSet - } else { - unknownMembers := memberIDSet.Difference(knownMemberIDSet) - if unknownMembers.Len() > 0 { - response[name] = errors.Errorf("etcd member reports members IDs %v, but all previously seen etcd members reported member IDs %v", memberIDSet.UnsortedList(), knownMemberIDSet.UnsortedList()) - } - continue - } - } - - // Check that there is exactly one etcd member for every control plane machine. - // There should be no etcd members added "out of band."" - if len(controlPlaneNodes.Items) != len(knownMemberIDSet) { - return response, errors.Errorf("there are %d control plane nodes, but %d etcd members", len(controlPlaneNodes.Items), len(knownMemberIDSet)) - } - - return response, nil -} - -func generateClientCert(caCertEncoded, caKeyEncoded []byte) (tls.Certificate, error) { - privKey, err := certs.NewPrivateKey() - if err != nil { - return tls.Certificate{}, err - } - caCert, err := certs.DecodeCertPEM(caCertEncoded) - if err != nil { - return tls.Certificate{}, err - } - caKey, err := certs.DecodePrivateKeyPEM(caKeyEncoded) - if err != nil { - return tls.Certificate{}, err - } - x509Cert, err := newClientCert(caCert, privKey, caKey) - if err != nil { - return tls.Certificate{}, err - } - return tls.X509KeyPair(certs.EncodeCertPEM(x509Cert), certs.EncodePrivateKeyPEM(privKey)) -} - -func newClientCert(caCert *x509.Certificate, key *rsa.PrivateKey, caKey *rsa.PrivateKey) (*x509.Certificate, error) { - cfg := certs.Config{ - CommonName: "cluster-api.x-k8s.io", - } - - now := time.Now().UTC() - - tmpl := x509.Certificate{ - SerialNumber: new(big.Int).SetInt64(0), - Subject: pkix.Name{ - CommonName: cfg.CommonName, - Organization: cfg.Organization, - }, - NotBefore: now.Add(time.Minute * -5), - NotAfter: now.Add(time.Hour * 24 * 365 * 10), // 10 years - KeyUsage: x509.KeyUsageDigitalSignature, - ExtKeyUsage: []x509.ExtKeyUsage{x509.ExtKeyUsageClientAuth}, - } - - b, err := x509.CreateCertificate(rand.Reader, &tmpl, caCert, key.Public(), caKey) - if err != nil { - return nil, errors.Wrapf(err, "failed to create signed client certificate: %+v", tmpl) - } - - c, err := x509.ParseCertificate(b) - return c, errors.WithStack(err) -} - -func staticPodName(component, nodeName string) string { - return fmt.Sprintf("%s-%s", component, nodeName) -} - -func checkStaticPodReadyCondition(pod *corev1.Pod) error { - found := false - for _, condition := range pod.Status.Conditions { - if condition.Type == corev1.PodReady { - found = true - } - if condition.Type == corev1.PodReady && condition.Status != corev1.ConditionTrue { - return errors.Errorf("static pod %s/%s is not ready", pod.Namespace, pod.Name) - } - } - if !found { - return errors.Errorf("pod does not have ready condition: %v", pod.Name) - } - return nil -} diff --git a/controlplane/kubeadm/internal/cluster_test.go b/controlplane/kubeadm/internal/cluster_test.go index 2e294702b5cf..e202e102022a 100644 --- a/controlplane/kubeadm/internal/cluster_test.go +++ b/controlplane/kubeadm/internal/cluster_test.go @@ -101,8 +101,8 @@ func TestControlPlaneIsHealthy(t *testing.T) { }, }, } - workloadCluster := &cluster{ - client: &fakeClient{ + workloadCluster := &Cluster{ + Client: &fakeClient{ list: nodeListForTestControlPlaneIsHealthy(), get: map[string]interface{}{ "kube-system/kube-apiserver-first-control-plane": &corev1.Pod{Status: readyStatus}, diff --git a/controlplane/kubeadm/internal/workload_cluster.go b/controlplane/kubeadm/internal/workload_cluster.go new file mode 100644 index 000000000000..acbebaa1d200 --- /dev/null +++ b/controlplane/kubeadm/internal/workload_cluster.go @@ -0,0 +1,393 @@ +/* +Copyright 2020 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package internal + +import ( + "context" + "crypto/rand" + "crypto/rsa" + "crypto/tls" + "crypto/x509" + "crypto/x509/pkix" + "fmt" + "math/big" + "time" + + "github.com/blang/semver" + "github.com/pkg/errors" + corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + kerrors "k8s.io/apimachinery/pkg/util/errors" + clusterv1 "sigs.k8s.io/cluster-api/api/v1alpha3" + "sigs.k8s.io/cluster-api/controlplane/kubeadm/internal/etcd" + etcdutil "sigs.k8s.io/cluster-api/controlplane/kubeadm/internal/etcd/util" + "sigs.k8s.io/cluster-api/util/certs" + ctrlclient "sigs.k8s.io/controller-runtime/pkg/client" +) + +type etcdClientFor interface { + forNode(name string) (*etcd.Client, error) +} + +// Cluster are operations on workload clusters. +type Cluster struct { + Client ctrlclient.Client + EtcdClientGenerator etcdClientFor +} + +func (c *Cluster) getControlPlaneNodes(ctx context.Context) (*corev1.NodeList, error) { + nodes := &corev1.NodeList{} + labels := map[string]string{ + "node-role.kubernetes.io/master": "", + } + + if err := c.Client.List(ctx, nodes, ctrlclient.MatchingLabels(labels)); err != nil { + return nil, err + } + return nodes, nil +} + +func (c *Cluster) getConfigMap(ctx context.Context, configMap types.NamespacedName) (*corev1.ConfigMap, error) { + original := &corev1.ConfigMap{} + if err := c.Client.Get(ctx, configMap, original); err != nil { + return nil, errors.Wrapf(err, "error getting %s/%s configmap from target cluster", configMap.Namespace, configMap.Name) + } + return original.DeepCopy(), nil +} + +// healthCheckResult maps nodes that are checked to any errors the node has related to the check. +type healthCheckResult map[string]error + +// controlPlaneIsHealthy does a best effort check of the control plane components the kubeadm control plane cares about. +// The return map is a map of node names as keys to error that that node encountered. +// All nodes will exist in the map with nil errors if there were no errors for that node. +func (c *Cluster) controlPlaneIsHealthy(ctx context.Context) (healthCheckResult, error) { + controlPlaneNodes, err := c.getControlPlaneNodes(ctx) + if err != nil { + return nil, err + } + + response := make(map[string]error) + for _, node := range controlPlaneNodes.Items { + name := node.Name + response[name] = nil + apiServerPodKey := types.NamespacedName{ + Namespace: metav1.NamespaceSystem, + Name: staticPodName("kube-apiserver", name), + } + apiServerPod := &corev1.Pod{} + if err := c.Client.Get(ctx, apiServerPodKey, apiServerPod); err != nil { + response[name] = err + continue + } + response[name] = checkStaticPodReadyCondition(apiServerPod) + + controllerManagerPodKey := types.NamespacedName{ + Namespace: metav1.NamespaceSystem, + Name: staticPodName("kube-controller-manager", name), + } + controllerManagerPod := &corev1.Pod{} + if err := c.Client.Get(ctx, controllerManagerPodKey, controllerManagerPod); err != nil { + response[name] = err + continue + } + response[name] = checkStaticPodReadyCondition(controllerManagerPod) + } + + return response, nil +} + +// removeMemberForNode removes etcd member (nodeToRemove) through another (nodeForEtcdClient). +// It's create etcd connection using nodeForEtcdClient to removing nodeToRemove. +func (c *Cluster) removeMemberForNode(ctx context.Context, nodeForEtcdClient, nodeToRemove string) error { + etcdClient, err := c.EtcdClientGenerator.forNode(nodeForEtcdClient) + if err != nil { + return errors.Wrap(err, "failed to create etcd Client") + } + + // List etcd members. This checks that the member is healthy, because the request goes through consensus. + members, err := etcdClient.Members(ctx) + if err != nil { + return errors.Wrap(err, "failed to list etcd members using etcd Client") + } + member := etcdutil.MemberForName(members, nodeToRemove) + + // The member has already been removed, return immediately + if member == nil { + return nil + } + + if err := etcdClient.RemoveMember(ctx, member.ID); err != nil { + return errors.Wrap(err, "failed to remove member from etcd") + } + + return nil +} + +// etcdIsHealthy runs checks for every etcd member in the cluster to satisfy our definition of healthy. +// This is a best effort check and nodes can become unhealthy after the check is complete. It is not a guarantee. +// It's used a signal for if we should allow a target cluster to scale up, scale down or upgrade. +// It returns a map of nodes checked along with an error for a given node. +func (c *Cluster) etcdIsHealthy(ctx context.Context) (healthCheckResult, error) { + var knownClusterID uint64 + var knownMemberIDSet etcdutil.UInt64Set + + controlPlaneNodes, err := c.getControlPlaneNodes(ctx) + if err != nil { + return nil, err + } + + response := make(map[string]error) + for _, node := range controlPlaneNodes.Items { + name := node.Name + response[name] = nil + if node.Spec.ProviderID == "" { + response[name] = errors.New("empty provider ID") + continue + } + + // Create the etcd Client for the etcd Pod scheduled on the Node + etcdClient, err := c.EtcdClientGenerator.forNode(name) + if err != nil { + response[name] = errors.Wrap(err, "failed to create etcd Client") + continue + } + + // List etcd members. This checks that the member is healthy, because the request goes through consensus. + members, err := etcdClient.Members(ctx) + if err != nil { + response[name] = errors.Wrap(err, "failed to list etcd members using etcd Client") + continue + } + member := etcdutil.MemberForName(members, name) + + // Check that the member reports no alarms. + if len(member.Alarms) > 0 { + response[name] = errors.Errorf("etcd member reports alarms: %v", member.Alarms) + continue + } + + // Check that the member belongs to the same cluster as all other members. + clusterID := member.ClusterID + if knownClusterID == 0 { + knownClusterID = clusterID + } else if knownClusterID != clusterID { + response[name] = errors.Errorf("etcd member has cluster ID %d, but all previously seen etcd members have cluster ID %d", clusterID, knownClusterID) + continue + } + + // Check that the member list is stable. + memberIDSet := etcdutil.MemberIDSet(members) + if knownMemberIDSet.Len() == 0 { + knownMemberIDSet = memberIDSet + } else { + unknownMembers := memberIDSet.Difference(knownMemberIDSet) + if unknownMembers.Len() > 0 { + response[name] = errors.Errorf("etcd member reports members IDs %v, but all previously seen etcd members reported member IDs %v", memberIDSet.UnsortedList(), knownMemberIDSet.UnsortedList()) + } + continue + } + } + + // Check that there is exactly one etcd member for every control plane machine. + // There should be no etcd members added "out of band."" + if len(controlPlaneNodes.Items) != len(knownMemberIDSet) { + return response, errors.Errorf("there are %d control plane nodes, but %d etcd members", len(controlPlaneNodes.Items), len(knownMemberIDSet)) + } + + return response, nil +} + +// UpdateKubernetesVersionInKubeadmConfigMap updates the kubernetes version in the kubeadm config map. +func (c *Cluster) UpdateKubernetesVersionInKubeadmConfigMap(ctx context.Context, version string) error { + configMapKey := types.NamespacedName{Name: "kubeadm-config", Namespace: metav1.NamespaceSystem} + kubeadmConfigMap, err := c.getConfigMap(ctx, configMapKey) + if err != nil { + return err + } + config := &kubeadmConfig{ConfigMap: kubeadmConfigMap} + if err := config.UpdateKubernetesVersion(version); err != nil { + return err + } + if err := c.Client.Update(ctx, config.ConfigMap); err != nil { + return errors.Wrap(err, "error updating kubeadm ConfigMap") + } + return nil +} + +// UpdateKubeletConfigMap will create a new kubelet-config-1.x config map for a new version of the kubelet. +// This is a necessary process for upgrades. +func (c *Cluster) UpdateKubeletConfigMap(ctx context.Context, version semver.Version) error { + // Check if the desired configmap already exists + desiredKubeletConfigMapName := fmt.Sprintf("kubelet-config-%d.%d", version.Major, version.Minor) + configMapKey := types.NamespacedName{Name: desiredKubeletConfigMapName, Namespace: metav1.NamespaceSystem} + _, err := c.getConfigMap(ctx, configMapKey) + if err == nil { + // Nothing to do, the configmap already exists + return nil + } + if !apierrors.IsNotFound(errors.Cause(err)) { + return errors.Wrapf(err, "error determining if kubelet configmap %s exists", desiredKubeletConfigMapName) + } + + previousMinorVersionKubeletConfigMapName := fmt.Sprintf("kubelet-config-%d.%d", version.Major, version.Minor-1) + configMapKey = types.NamespacedName{Name: previousMinorVersionKubeletConfigMapName, Namespace: metav1.NamespaceSystem} + // Returns a copy + cm, err := c.getConfigMap(ctx, configMapKey) + if apierrors.IsNotFound(errors.Cause(err)) { + return errors.Errorf("unable to find kubelet configmap %s", previousMinorVersionKubeletConfigMapName) + } + if err != nil { + return err + } + + // Update the name to the new name + cm.Name = desiredKubeletConfigMapName + // Clear the resource version. Is this necessary since this cm is actually a DeepCopy()? + cm.ResourceVersion = "" + + if err := c.Client.Create(ctx, cm); err != nil && !apierrors.IsAlreadyExists(err) { + return errors.Wrapf(err, "error creating configmap %s", desiredKubeletConfigMapName) + } + return nil +} + +// RemoveEtcdMemberForMachine removes the etcd member from the target cluster's etcd cluster. +func (c *Cluster) RemoveEtcdMemberForMachine(ctx context.Context, machine *clusterv1.Machine) error { + if machine == nil || machine.Status.NodeRef == nil { + // Nothing to do, no node for Machine + return nil + } + + controlPlaneNodes, err := c.getControlPlaneNodes(ctx) + if err != nil { + return err + } + + nodeToRemove := machine.Status.NodeRef.Name + errs := []error{} + + // Try all node other than nodeToRemove for proxying etcd Client. + // and returns the first successful response. + for _, node := range controlPlaneNodes.Items { + nodeForEtcdClient := node.Name + if nodeForEtcdClient == nodeToRemove { + continue + } + + err = c.removeMemberForNode(ctx, nodeForEtcdClient, nodeToRemove) + if err == nil { + return nil + } + + errs = append(errs, err) + } + + return kerrors.NewAggregate(errs) +} + +// RemoveMachineFromKubeadmConfigMap removes the entry for the machine from the kubeadm configmap. +func (c *Cluster) RemoveMachineFromKubeadmConfigMap(ctx context.Context, machine *clusterv1.Machine) error { + if machine == nil || machine.Status.NodeRef == nil { + // Nothing to do, no node for Machine + return nil + } + + configMapKey := types.NamespacedName{Name: "kubeadm-config", Namespace: metav1.NamespaceSystem} + kubeadmConfigMap, err := c.getConfigMap(ctx, configMapKey) + if err != nil { + return err + } + config := &kubeadmConfig{ConfigMap: kubeadmConfigMap} + if err := config.RemoveAPIEndpoint(machine.Status.NodeRef.Name); err != nil { + return err + } + if err := c.Client.Update(ctx, config.ConfigMap); err != nil { + return errors.Wrap(err, "error updating kubeadm ConfigMap") + } + return nil +} + +func generateClientCert(caCertEncoded, caKeyEncoded []byte) (tls.Certificate, error) { + privKey, err := certs.NewPrivateKey() + if err != nil { + return tls.Certificate{}, err + } + caCert, err := certs.DecodeCertPEM(caCertEncoded) + if err != nil { + return tls.Certificate{}, err + } + caKey, err := certs.DecodePrivateKeyPEM(caKeyEncoded) + if err != nil { + return tls.Certificate{}, err + } + x509Cert, err := newClientCert(caCert, privKey, caKey) + if err != nil { + return tls.Certificate{}, err + } + return tls.X509KeyPair(certs.EncodeCertPEM(x509Cert), certs.EncodePrivateKeyPEM(privKey)) +} + +func newClientCert(caCert *x509.Certificate, key *rsa.PrivateKey, caKey *rsa.PrivateKey) (*x509.Certificate, error) { + cfg := certs.Config{ + CommonName: "cluster-api.x-k8s.io", + } + + now := time.Now().UTC() + + tmpl := x509.Certificate{ + SerialNumber: new(big.Int).SetInt64(0), + Subject: pkix.Name{ + CommonName: cfg.CommonName, + Organization: cfg.Organization, + }, + NotBefore: now.Add(time.Minute * -5), + NotAfter: now.Add(time.Hour * 24 * 365 * 10), // 10 years + KeyUsage: x509.KeyUsageDigitalSignature, + ExtKeyUsage: []x509.ExtKeyUsage{x509.ExtKeyUsageClientAuth}, + } + + b, err := x509.CreateCertificate(rand.Reader, &tmpl, caCert, key.Public(), caKey) + if err != nil { + return nil, errors.Wrapf(err, "failed to create signed Client certificate: %+v", tmpl) + } + + c, err := x509.ParseCertificate(b) + return c, errors.WithStack(err) +} + +func staticPodName(component, nodeName string) string { + return fmt.Sprintf("%s-%s", component, nodeName) +} + +func checkStaticPodReadyCondition(pod *corev1.Pod) error { + found := false + for _, condition := range pod.Status.Conditions { + if condition.Type == corev1.PodReady { + found = true + } + if condition.Type == corev1.PodReady && condition.Status != corev1.ConditionTrue { + return errors.Errorf("static pod %s/%s is not ready", pod.Namespace, pod.Name) + } + } + if !found { + return errors.Errorf("pod does not have ready condition: %v", pod.Name) + } + return nil +}