diff --git a/api/v1alpha3/common_types.go b/api/v1alpha3/common_types.go index 63f7a2cdeca9..1b96e491172f 100644 --- a/api/v1alpha3/common_types.go +++ b/api/v1alpha3/common_types.go @@ -39,11 +39,11 @@ const ( PausedAnnotation = "cluster.x-k8s.io/paused" // TemplateClonedFromNameAnnotation is the infrastructure machine annotation that stores the name of the infrastructure template resource - // that was cloned for the machine. + // that was cloned for the machine. This annotation is set only during cloning a template. Older/adopted machines will not have this annotation. TemplateClonedFromNameAnnotation = "cluster.x-k8s.io/cloned-from-name" // TemplateClonedFromGroupKindAnnotation is the infrastructure machine annotation that stores the group-kind of the infrastructure template resource - // that was cloned for the machine. + // that was cloned for the machine. This annotation is set only during cloning a template. Older/adopted machines will not have this annotation. TemplateClonedFromGroupKindAnnotation = "cluster.x-k8s.io/cloned-from-groupkind" // ClusterSecretType defines the type of secret created by core components diff --git a/controlplane/kubeadm/api/v1alpha3/kubeadm_control_plane_types.go b/controlplane/kubeadm/api/v1alpha3/kubeadm_control_plane_types.go index 137162bc90c7..9454b511dba9 100644 --- a/controlplane/kubeadm/api/v1alpha3/kubeadm_control_plane_types.go +++ b/controlplane/kubeadm/api/v1alpha3/kubeadm_control_plane_types.go @@ -31,6 +31,10 @@ const ( // SkipCoreDNSAnnotation annotation explicitly skips reconciling CoreDNS if set SkipCoreDNSAnnotation = "controlplane.cluster.x-k8s.io/skip-coredns" + + // KubeadmClusterConfigurationAnnotation is a machine annotation that stores the json-marshalled string of KCP ClusterConfiguration. + // This annotation is used to detect any changes in ClusterConfiguration and trigger machine rollout in KCP. + KubeadmClusterConfigurationAnnotation = "controlplane.cluster.x-k8s.io/kubeadm-cluster-configuration" ) // KubeadmControlPlaneSpec defines the desired state of KubeadmControlPlane. diff --git a/controlplane/kubeadm/controllers/controller.go b/controlplane/kubeadm/controllers/controller.go index 64fcc62646c0..ad1a99bc253d 100644 --- a/controlplane/kubeadm/controllers/controller.go +++ b/controlplane/kubeadm/controllers/controller.go @@ -291,14 +291,14 @@ func (r *KubeadmControlPlaneReconciler) reconcile(ctx context.Context, cluster * conditions.SetAggregate(controlPlane.KCP, controlplanev1.MachinesReadyCondition, ownedMachines.ConditionGetters(), conditions.AddSourceRef()) // Control plane machines rollout due to configuration changes (e.g. upgrades) takes precedence over other operations. - needRollout := controlPlane.MachinesNeedingRollout() + needRollout := r.machinesNeedingRollout(ctx, controlPlane) switch { case len(needRollout) > 0: logger.Info("Rolling out Control Plane machines") // NOTE: we are using Status.UpdatedReplicas from the previous reconciliation only to provide a meaningful message // and this does not influence any reconciliation logic. conditions.MarkFalse(controlPlane.KCP, controlplanev1.MachinesSpecUpToDateCondition, controlplanev1.RollingUpdateInProgressReason, clusterv1.ConditionSeverityWarning, "Rolling %d replicas with outdated spec (%d replicas up to date)", len(needRollout), kcp.Status.UpdatedReplicas) - return r.upgradeControlPlane(ctx, cluster, kcp, controlPlane) + return r.upgradeControlPlane(ctx, cluster, kcp, controlPlane, needRollout) default: // make sure last upgrade operation is marked as completed. // NOTE: we are checking the condition already exists in order to avoid to set this condition at the first @@ -327,7 +327,8 @@ func (r *KubeadmControlPlaneReconciler) reconcile(ctx context.Context, cluster * // We are scaling down case numMachines > desiredReplicas: logger.Info("Scaling down control plane", "Desired", desiredReplicas, "Existing", numMachines) - return r.scaleDownControlPlane(ctx, cluster, kcp, controlPlane) + // The last parameter (i.e. machines needing to be rolled out) should always be empty here. + return r.scaleDownControlPlane(ctx, cluster, kcp, controlPlane, internal.FilterableMachineCollection{}) } // Get the workload cluster client. diff --git a/controlplane/kubeadm/controllers/helpers.go b/controlplane/kubeadm/controllers/helpers.go index e157be831344..d77624f870b9 100644 --- a/controlplane/kubeadm/controllers/helpers.go +++ b/controlplane/kubeadm/controllers/helpers.go @@ -18,6 +18,7 @@ package controllers import ( "context" + "encoding/json" "strings" "github.com/pkg/errors" @@ -33,6 +34,7 @@ import ( controlplanev1 "sigs.k8s.io/cluster-api/controlplane/kubeadm/api/v1alpha3" "sigs.k8s.io/cluster-api/controlplane/kubeadm/internal" "sigs.k8s.io/cluster-api/controlplane/kubeadm/internal/hash" + "sigs.k8s.io/cluster-api/controlplane/kubeadm/internal/machinefilters" capierrors "sigs.k8s.io/cluster-api/errors" "sigs.k8s.io/cluster-api/util" "sigs.k8s.io/cluster-api/util/certs" @@ -241,8 +243,36 @@ func (r *KubeadmControlPlaneReconciler) generateMachine(ctx context.Context, kcp }, } + // Machine's bootstrap config may be missing ClusterConfiguration if it is not the first machine in the control plane. + // We store ClusterConfiguration as annotation here to detect any changes in KCP ClusterConfiguration and rollout the machine if any. + clusterConfig, err := json.Marshal(kcp.Spec.KubeadmConfigSpec.ClusterConfiguration) + if err != nil { + return errors.Wrap(err, "failed to marshal cluster configuration") + } + machine.SetAnnotations(map[string]string{controlplanev1.KubeadmClusterConfigurationAnnotation: string(clusterConfig)}) + if err := r.Client.Create(ctx, machine); err != nil { - return errors.Wrap(err, "Failed to create machine") + return errors.Wrap(err, "failed to create machine") } return nil } + +// machinesNeedingRollout return a list of machines that need to be rolled out. +func (r *KubeadmControlPlaneReconciler) machinesNeedingRollout(ctx context.Context, c *internal.ControlPlane) internal.FilterableMachineCollection { + now := metav1.Now() + + // Ignore machines to be deleted. + machines := c.Machines.Filter(machinefilters.Not(machinefilters.HasDeletionTimestamp)) + + // Return machines if their creation timestamp is older than the KCP.Spec.UpgradeAfter, or any machine with an outdated configuration. + if c.KCP.Spec.UpgradeAfter != nil && c.KCP.Spec.UpgradeAfter.Before(&now) { + return machines.Filter(machinefilters.Or( + // Machines that are old. + machinefilters.OlderThan(c.KCP.Spec.UpgradeAfter), + // Machines that do not match with KCP config. + machinefilters.Not(machinefilters.MatchesKCPConfiguration(ctx, r.Client, *c.KCP, *c.Cluster)), + )) + } + + return machines.Filter(machinefilters.Not(machinefilters.MatchesKCPConfiguration(ctx, r.Client, *c.KCP, *c.Cluster))) +} diff --git a/controlplane/kubeadm/controllers/helpers_test.go b/controlplane/kubeadm/controllers/helpers_test.go index cb80d97dca51..1b3967c8bff1 100644 --- a/controlplane/kubeadm/controllers/helpers_test.go +++ b/controlplane/kubeadm/controllers/helpers_test.go @@ -18,13 +18,16 @@ package controllers import ( "context" + "encoding/json" "testing" + "time" . "github.com/onsi/gomega" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/apimachinery/pkg/runtime" "k8s.io/client-go/kubernetes/scheme" "k8s.io/client-go/tools/record" utilpointer "k8s.io/utils/pointer" @@ -398,6 +401,385 @@ func TestKubeadmControlPlaneReconciler_generateKubeadmConfig(t *testing.T) { g.Expect(bootstrapConfig.Spec).To(Equal(spec)) } +func TestMachinesNeedingUpgrade(t *testing.T) { + g := NewWithT(t) + + namespace := "default" + cluster := &clusterv1.Cluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: "foo", + Namespace: namespace, + }, + } + + genericMachineTemplate := &unstructured.Unstructured{ + Object: map[string]interface{}{ + "kind": "GenericMachineTemplate", + "apiVersion": "generic.io/v1", + "metadata": map[string]interface{}{ + "name": "infra-foo", + "namespace": cluster.Namespace, + }, + "spec": map[string]interface{}{ + "template": map[string]interface{}{ + "spec": map[string]interface{}{ + "hello": "world", + }, + }, + }, + }, + } + + genericMachineWithoutAnnotation := &unstructured.Unstructured{ + Object: map[string]interface{}{ + "kind": "GenericMachine", + "apiVersion": "generic.io/v1", + "metadata": map[string]interface{}{ + "name": "infra-no-annotation", + "namespace": cluster.Namespace, + }, + "spec": map[string]interface{}{ + "template": map[string]interface{}{ + "spec": map[string]interface{}{}, + }, + }, + }, + } + infraRefWithoutAnnotation := corev1.ObjectReference{ + Kind: genericMachineWithoutAnnotation.GetKind(), + Namespace: genericMachineWithoutAnnotation.GetNamespace(), + Name: genericMachineWithoutAnnotation.GetName(), + APIVersion: genericMachineWithoutAnnotation.GetAPIVersion(), + } + + genericMachineWithAnnotation := genericMachineWithoutAnnotation.DeepCopy() + genericMachineWithAnnotation.SetName("infra-with-annotation") + genericMachineWithAnnotation.SetAnnotations(map[string]string{clusterv1.TemplateClonedFromNameAnnotation: genericMachineTemplate.GetName(), + clusterv1.TemplateClonedFromGroupKindAnnotation: genericMachineTemplate.GroupVersionKind().GroupKind().String()}) + + infraRefWithAnnotation := corev1.ObjectReference{ + Kind: genericMachineWithAnnotation.GetKind(), + Namespace: genericMachineWithAnnotation.GetNamespace(), + Name: genericMachineWithAnnotation.GetName(), + APIVersion: genericMachineWithAnnotation.GetAPIVersion(), + } + + clusterConfig, initConfig, joinConfig := createConfigs(cluster.Name) + + initKubeadmConfigMapName := "init" + joinKubeadmConfigMapName := "join" + + initKubeadmConfig := &bootstrapv1.KubeadmConfig{ + TypeMeta: metav1.TypeMeta{ + Kind: "KubeadmConfig", + APIVersion: bootstrapv1.GroupVersion.String(), + }, + ObjectMeta: metav1.ObjectMeta{ + Namespace: namespace, + Name: initKubeadmConfigMapName, + }, + Spec: bootstrapv1.KubeadmConfigSpec{ + ClusterConfiguration: &clusterConfig, + InitConfiguration: &initConfig, + }, + } + + joinKubeadmConfig := &bootstrapv1.KubeadmConfig{ + TypeMeta: metav1.TypeMeta{ + Kind: "KubeadmConfig", + APIVersion: bootstrapv1.GroupVersion.String(), + }, + ObjectMeta: metav1.ObjectMeta{ + Namespace: namespace, + Name: joinKubeadmConfigMapName, + }, + Spec: bootstrapv1.KubeadmConfigSpec{ + JoinConfiguration: &joinConfig, + }, + } + + kcp := &controlplanev1.KubeadmControlPlane{ + ObjectMeta: metav1.ObjectMeta{ + Name: "kcp-foo", + Namespace: cluster.Namespace, + }, + Spec: controlplanev1.KubeadmControlPlaneSpec{ + Version: "v1.18.4", + InfrastructureTemplate: corev1.ObjectReference{ + Kind: genericMachineTemplate.GetKind(), + APIVersion: genericMachineTemplate.GetAPIVersion(), + Name: genericMachineTemplate.GetName(), + Namespace: cluster.Namespace, + }, + KubeadmConfigSpec: bootstrapv1.KubeadmConfigSpec{ + ClusterConfiguration: &clusterConfig, + InitConfiguration: &initConfig, + JoinConfiguration: &joinConfig, + }, + }, + } + + machine := func(name string) *clusterv1.Machine { + m := &clusterv1.Machine{ + TypeMeta: metav1.TypeMeta{}, + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: namespace, + Labels: map[string]string{ + clusterv1.ClusterLabelName: "foo", + }, + }, + Spec: clusterv1.MachineSpec{ + ClusterName: cluster.Name, + Version: utilpointer.StringPtr("v1.18.4"), + }, + } + m.CreationTimestamp = metav1.Time{Time: time.Date(1900, 0, 0, 0, 0, 0, 0, time.UTC)} + return m + } + + initMachine := machine("init-machine") + initMachine.Spec.Bootstrap = clusterv1.Bootstrap{ConfigRef: &corev1.ObjectReference{ + Namespace: namespace, + Name: initKubeadmConfigMapName, + }, DataSecretName: nil} + initMachine.Spec.InfrastructureRef = infraRefWithAnnotation + + joinMachine := machine("join-machine") + joinMachine.Spec.Bootstrap = clusterv1.Bootstrap{ConfigRef: &corev1.ObjectReference{ + Namespace: namespace, + Name: joinKubeadmConfigMapName, + }, DataSecretName: nil} + joinMachine.Spec.InfrastructureRef = infraRefWithAnnotation + + unmatchingJoinMachine := joinMachine.DeepCopy() + unmatchingJoinConfig := joinConfig.DeepCopy() + unmatchingJoinConfig.NodeRegistration.Name = "different" + unmatchingJoinMachine.Name = "join-unmatching" + + versionMismatchMachine := machine("version-mismatch") + versionMismatchMachine.Spec.Version = utilpointer.StringPtr("v1.19.1") + + noInfraAnnotationMachine := initMachine.DeepCopy() + noInfraAnnotationMachine.Name = "no-annotation" + noInfraAnnotationMachine.Spec.InfrastructureRef = infraRefWithoutAnnotation + + deletedMachine := initMachine.DeepCopy() + deletedMachine.Name = "deleted" + deletedMachine.DeletionTimestamp = &metav1.Time{Time: time.Date(1900, 0, 0, 0, 0, 0, 0, time.UTC)} + + machineAnnotatedWithCorrectClusterConfig := joinMachine.DeepCopy() + machineAnnotatedWithCorrectClusterConfig.Name = "annotated-with-cluster-configuration" + clusterConfigMarshalled, err := json.Marshal(clusterConfig) + g.Expect(err).NotTo(HaveOccurred()) + machineAnnotatedWithCorrectClusterConfig.SetAnnotations(map[string]string{controlplanev1.KubeadmClusterConfigurationAnnotation: string(clusterConfigMarshalled)}) + + machineAnnotatedWithWrongClusterConfig := joinMachine.DeepCopy() + machineAnnotatedWithWrongClusterConfig.Name = "annotated-with-wrong-cluster-configuration" + clusterConf := kcp.Spec.KubeadmConfigSpec.ClusterConfiguration.DeepCopy() + clusterConf.ClusterName = "not-nil" + clusterConfigMarshalled, err = json.Marshal(clusterConf) + g.Expect(err).NotTo(HaveOccurred()) + machineAnnotatedWithWrongClusterConfig.SetAnnotations(map[string]string{controlplanev1.KubeadmClusterConfigurationAnnotation: string(clusterConfigMarshalled)}) + + kcpInitEmpty := (*kcp).DeepCopy() + kcpInitEmpty.Spec.KubeadmConfigSpec.InitConfiguration = nil + + kcpRetryJoinSet := (*kcp).DeepCopy() + kcpRetryJoinSet.Spec.KubeadmConfigSpec.UseExperimentalRetryJoin = true + + kcpUpgradeAfterFuture := (*kcp).DeepCopy() + kcpUpgradeAfterFuture.Spec.UpgradeAfter = &metav1.Time{Time: time.Date(3000, 0, 0, 0, 0, 0, 0, time.UTC)} + + kcpUpgradeAfterPast := (*kcp).DeepCopy() + kcpUpgradeAfterPast.Spec.UpgradeAfter = &metav1.Time{Time: time.Date(2000, 0, 0, 0, 0, 0, 0, time.UTC)} + + kcpJoinNil := (*kcp).DeepCopy() + kcpJoinNil.Spec.KubeadmConfigSpec.JoinConfiguration = nil + + objs := []runtime.Object{cluster.DeepCopy(), kcp.DeepCopy(), initKubeadmConfig.DeepCopy(), joinKubeadmConfig.DeepCopy(), + genericMachineTemplate.DeepCopy(), genericMachineWithAnnotation.DeepCopy(), genericMachineWithoutAnnotation.DeepCopy()} + + fakeClient := newFakeClient(g, objs...) + + r := &KubeadmControlPlaneReconciler{ + Client: fakeClient, + Log: log.Log, + recorder: record.NewFakeRecorder(32), + scheme: scheme.Scheme, + } + + tests := []struct { + name string + kcp *controlplanev1.KubeadmControlPlane + machines []*clusterv1.Machine + result internal.FilterableMachineCollection + }{ + { + name: "should not return any machines if KCP upgradeAfter is after machines' creation time", + kcp: kcpUpgradeAfterFuture, + machines: []*clusterv1.Machine{joinMachine, initMachine}, + result: internal.FilterableMachineCollection{}, + }, + { + name: "should return machines if KCP upgradeAfter is before machines' creation time (but not deleted ones)", + kcp: kcpUpgradeAfterPast, + machines: []*clusterv1.Machine{initMachine, joinMachine, deletedMachine}, + result: internal.FilterableMachineCollection{"init-machine": initMachine, "join-machine": joinMachine}, + }, + { + name: "should not return any machines if owned machines are empty", + kcp: kcp, + machines: []*clusterv1.Machine{}, + result: internal.FilterableMachineCollection{}, + }, + { + name: "should return the machine if there is a version mismatch", + kcp: kcp, + machines: []*clusterv1.Machine{versionMismatchMachine, initMachine, joinMachine}, + result: internal.FilterableMachineCollection{"version-mismatch": versionMismatchMachine}, + }, + { + name: "should not return any machines if machine has only init config or join config; and it matches with kcp", + kcp: kcp, + machines: []*clusterv1.Machine{initMachine, joinMachine}, + result: internal.FilterableMachineCollection{}, + }, + { + name: "should return machines that are not matching with KCP KubeadmConfig", + kcp: kcpInitEmpty, + machines: []*clusterv1.Machine{initMachine, joinMachine}, + result: internal.FilterableMachineCollection{"init-machine": initMachine}, + }, + { + name: "should return machines that are not matching with KCP KubeadmConfig when additional fields are set", + kcp: kcpRetryJoinSet, + machines: []*clusterv1.Machine{initMachine, joinMachine}, + result: internal.FilterableMachineCollection{"init-machine": initMachine, "join-machine": joinMachine}, + }, + { + name: "should not return the machine if it is missing infra ref annotation", + kcp: kcp, + machines: []*clusterv1.Machine{noInfraAnnotationMachine}, + result: internal.FilterableMachineCollection{}, + }, + { + name: "should not return the machine if its ClusterConfiguration annotation matches KCP", + kcp: kcp, + machines: []*clusterv1.Machine{machineAnnotatedWithCorrectClusterConfig}, + result: internal.FilterableMachineCollection{}, + }, + { + name: "should return the machine if its ClusterConfiguration annotation does not match KCP", + kcp: kcp, + machines: []*clusterv1.Machine{machineAnnotatedWithWrongClusterConfig}, + result: internal.FilterableMachineCollection{"annotated-with-wrong-cluster-configuration": machineAnnotatedWithWrongClusterConfig}, + }, + { + name: "should not return the machine if KCP JoinConfiguration is nil", + kcp: kcpJoinNil, + machines: []*clusterv1.Machine{initMachine, joinMachine}, + result: internal.FilterableMachineCollection{}, + }, + { + name: "should not return the machine if JoinConfiguration is unmatching due to", + kcp: kcpJoinNil, + machines: []*clusterv1.Machine{initMachine, joinMachine}, + result: internal.FilterableMachineCollection{}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + g := NewWithT(t) + controlPlane := &internal.ControlPlane{ + KCP: tt.kcp, + Cluster: cluster, + Machines: internal.NewFilterableMachineCollection(tt.machines...), + } + g.Expect(r.machinesNeedingRollout(context.Background(), controlPlane)).To(BeEquivalentTo(tt.result)) + }) + } +} + +func createConfigs(clusterName string) (kubeadmv1.ClusterConfiguration, kubeadmv1.InitConfiguration, kubeadmv1.JoinConfiguration) { + clusterConf := kubeadmv1.ClusterConfiguration{ + APIServer: kubeadmv1.APIServer{ + ControlPlaneComponent: kubeadmv1.ControlPlaneComponent{ + ExtraArgs: map[string]string{"arg": "arg"}, + ExtraVolumes: []kubeadmv1.HostPathMount{{ + HostPath: "/var/log/kubernetes", + ReadOnly: false, + }}, + }, + TimeoutForControlPlane: &metav1.Duration{}, + }, + CertificatesDir: "tmpdir", + ClusterName: clusterName, + ControlPlaneEndpoint: "genericprovider.com:443", + ControllerManager: kubeadmv1.ControlPlaneComponent{ + ExtraArgs: map[string]string{"controller manager field": "controller manager value"}, + }, + DNS: kubeadmv1.DNS{ + Type: "CoreDNS", + ImageMeta: kubeadmv1.ImageMeta{}, + }, + Etcd: kubeadmv1.Etcd{ + Local: &kubeadmv1.LocalEtcd{ + ImageMeta: kubeadmv1.ImageMeta{}, + DataDir: "/var/lib/etcd", + ExtraArgs: map[string]string{"arg": "arg"}, + }, + }, + KubernetesVersion: "v1.18.4", + Networking: kubeadmv1.Networking{ + ServiceSubnet: "10.96.0.0/12", + PodSubnet: "192.168.0.0/16", + DNSDomain: "", + }, + Scheduler: kubeadmv1.ControlPlaneComponent{ + ExtraArgs: map[string]string{"arg": "not-nil"}, + }, + } + + initConf := kubeadmv1.InitConfiguration{ + TypeMeta: metav1.TypeMeta{}, + BootstrapTokens: nil, + NodeRegistration: kubeadmv1.NodeRegistrationOptions{ + Name: "{{ ds.meta_data.hostname }}", + CRISocket: "", + Taints: nil, + KubeletExtraArgs: map[string]string{"arg": "arg"}, + }, + LocalAPIEndpoint: kubeadmv1.APIEndpoint{ + AdvertiseAddress: "", + BindPort: 0, + }, + } + + joinConf := kubeadmv1.JoinConfiguration{ + TypeMeta: metav1.TypeMeta{}, + NodeRegistration: initConf.NodeRegistration, + Discovery: kubeadmv1.Discovery{ + BootstrapToken: &kubeadmv1.BootstrapTokenDiscovery{ + APIServerEndpoint: "genericprovider.com:443", + CACertHashes: []string{"str"}, + Token: "token", + UnsafeSkipCAVerification: false, + }, + File: nil, + TLSBootstrapToken: "", + Timeout: nil, + }, + ControlPlane: &kubeadmv1.JoinControlPlane{LocalAPIEndpoint: kubeadmv1.APIEndpoint{ + AdvertiseAddress: "", + BindPort: 0, + }}, + } + + return clusterConf, initConf, joinConf +} + // TODO func TestReconcileExternalReference(t *testing.T) {} diff --git a/controlplane/kubeadm/controllers/scale.go b/controlplane/kubeadm/controllers/scale.go index cedc48bc6b6c..f48b4c7ab23a 100644 --- a/controlplane/kubeadm/controllers/scale.go +++ b/controlplane/kubeadm/controllers/scale.go @@ -86,6 +86,7 @@ func (r *KubeadmControlPlaneReconciler) scaleDownControlPlane( cluster *clusterv1.Cluster, kcp *controlplanev1.KubeadmControlPlane, controlPlane *internal.ControlPlane, + outdatedMachines internal.FilterableMachineCollection, ) (ctrl.Result, error) { logger := controlPlane.Logger() @@ -99,7 +100,7 @@ func (r *KubeadmControlPlaneReconciler) scaleDownControlPlane( return ctrl.Result{}, errors.Wrapf(err, "failed to create client to workload cluster") } - machineToDelete, err := selectMachineForScaleDown(controlPlane) + machineToDelete, err := selectMachineForScaleDown(controlPlane, outdatedMachines) if err != nil { return ctrl.Result{}, errors.Wrap(err, "failed to select machine for scale down") } @@ -144,10 +145,10 @@ func (r *KubeadmControlPlaneReconciler) scaleDownControlPlane( return ctrl.Result{Requeue: true}, nil } -func selectMachineForScaleDown(controlPlane *internal.ControlPlane) (*clusterv1.Machine, error) { +func selectMachineForScaleDown(controlPlane *internal.ControlPlane, outdatedMachines internal.FilterableMachineCollection) (*clusterv1.Machine, error) { machines := controlPlane.Machines - if needingUpgrade := controlPlane.MachinesNeedingRollout(); needingUpgrade.Len() > 0 { - machines = needingUpgrade + if outdatedMachines.Len() > 0 { + machines = outdatedMachines } return controlPlane.MachineInFailureDomainWithMostMachines(machines) } diff --git a/controlplane/kubeadm/controllers/scale_test.go b/controlplane/kubeadm/controllers/scale_test.go index 8485966df3f4..87c7e5f7c883 100644 --- a/controlplane/kubeadm/controllers/scale_test.go +++ b/controlplane/kubeadm/controllers/scale_test.go @@ -215,7 +215,7 @@ func TestKubeadmControlPlaneReconciler_scaleDownControlPlane_NoError(t *testing. Machines: machines, } - _, err := r.scaleDownControlPlane(context.Background(), cluster, kcp, controlPlane) + _, err := r.scaleDownControlPlane(context.Background(), cluster, kcp, controlPlane, controlPlane.Machines) g.Expect(err).ToNot(HaveOccurred()) } @@ -250,22 +250,25 @@ func TestSelectMachineForScaleDown(t *testing.T) { } testCases := []struct { - name string - cp *internal.ControlPlane - expectErr bool - expectedMachine clusterv1.Machine + name string + cp *internal.ControlPlane + outDatedMachines internal.FilterableMachineCollection + expectErr bool + expectedMachine clusterv1.Machine }{ { - name: "when there are are machines needing upgrade, it returns the oldest machine in the failure domain with the most machines needing upgrade", - cp: needsUpgradeControlPlane, - expectErr: false, - expectedMachine: clusterv1.Machine{ObjectMeta: metav1.ObjectMeta{Name: "machine-5"}}, + name: "when there are are machines needing upgrade, it returns the oldest machine in the failure domain with the most machines needing upgrade", + cp: needsUpgradeControlPlane, + outDatedMachines: internal.NewFilterableMachineCollection(m5), + expectErr: false, + expectedMachine: clusterv1.Machine{ObjectMeta: metav1.ObjectMeta{Name: "machine-5"}}, }, { - name: "when there are no outdated machines, it returns the oldest machine in the largest failure domain", - cp: upToDateControlPlane, - expectErr: false, - expectedMachine: clusterv1.Machine{ObjectMeta: metav1.ObjectMeta{Name: "machine-3"}}, + name: "when there are no outdated machines, it returns the oldest machine in the largest failure domain", + cp: upToDateControlPlane, + outDatedMachines: internal.NewFilterableMachineCollection(), + expectErr: false, + expectedMachine: clusterv1.Machine{ObjectMeta: metav1.ObjectMeta{Name: "machine-3"}}, }, } @@ -275,7 +278,7 @@ func TestSelectMachineForScaleDown(t *testing.T) { g.Expect(clusterv1.AddToScheme(scheme.Scheme)).To(Succeed()) - selectedMachine, err := selectMachineForScaleDown(tc.cp) + selectedMachine, err := selectMachineForScaleDown(tc.cp, tc.outDatedMachines) if tc.expectErr { g.Expect(err).To(HaveOccurred()) diff --git a/controlplane/kubeadm/controllers/upgrade.go b/controlplane/kubeadm/controllers/upgrade.go index 8b53ff58f800..ae2eb5a66762 100644 --- a/controlplane/kubeadm/controllers/upgrade.go +++ b/controlplane/kubeadm/controllers/upgrade.go @@ -33,6 +33,7 @@ func (r *KubeadmControlPlaneReconciler) upgradeControlPlane( cluster *clusterv1.Cluster, kcp *controlplanev1.KubeadmControlPlane, controlPlane *internal.ControlPlane, + machinesRequireUpgrade internal.FilterableMachineCollection, ) (ctrl.Result, error) { logger := controlPlane.Logger() @@ -94,5 +95,5 @@ func (r *KubeadmControlPlaneReconciler) upgradeControlPlane( // scaleUp ensures that we don't continue scaling up while waiting for Machines to have NodeRefs return r.scaleUpControlPlane(ctx, cluster, kcp, controlPlane) } - return r.scaleDownControlPlane(ctx, cluster, kcp, controlPlane) + return r.scaleDownControlPlane(ctx, cluster, kcp, controlPlane, machinesRequireUpgrade) } diff --git a/controlplane/kubeadm/controllers/upgrade_test.go b/controlplane/kubeadm/controllers/upgrade_test.go index 7dbd859c046b..c04ec67ef5b4 100644 --- a/controlplane/kubeadm/controllers/upgrade_test.go +++ b/controlplane/kubeadm/controllers/upgrade_test.go @@ -81,7 +81,7 @@ func TestKubeadmControlPlaneReconciler_upgradeControlPlane(t *testing.T) { // run upgrade the first time, expect we scale up needingUpgrade := internal.NewFilterableMachineCollectionFromMachineList(initialMachine) controlPlane.Machines = needingUpgrade - result, err = r.upgradeControlPlane(context.Background(), cluster, kcp, controlPlane) + result, err = r.upgradeControlPlane(context.Background(), cluster, kcp, controlPlane, needingUpgrade) g.Expect(result).To(Equal(ctrl.Result{Requeue: true})) g.Expect(err).To(BeNil()) bothMachines := &clusterv1.MachineList{} @@ -90,7 +90,7 @@ func TestKubeadmControlPlaneReconciler_upgradeControlPlane(t *testing.T) { // run upgrade a second time, simulate that the node has not appeared yet but the machine exists r.managementCluster.(*fakeManagementCluster).ControlPlaneHealthy = false - _, err = r.upgradeControlPlane(context.Background(), cluster, kcp, controlPlane) + _, err = r.upgradeControlPlane(context.Background(), cluster, kcp, controlPlane, needingUpgrade) g.Expect(err).To(Equal(&capierrors.RequeueAfterError{RequeueAfter: healthCheckFailedRequeueAfter})) g.Expect(fakeClient.List(context.Background(), bothMachines, client.InNamespace(cluster.Namespace))).To(Succeed()) g.Expect(bothMachines.Items).To(HaveLen(2)) @@ -102,7 +102,7 @@ func TestKubeadmControlPlaneReconciler_upgradeControlPlane(t *testing.T) { r.managementCluster.(*fakeManagementCluster).ControlPlaneHealthy = true // run upgrade the second time, expect we scale down - result, err = r.upgradeControlPlane(context.Background(), cluster, kcp, controlPlane) + result, err = r.upgradeControlPlane(context.Background(), cluster, kcp, controlPlane, controlPlane.Machines) g.Expect(err).To(BeNil()) g.Expect(result).To(Equal(ctrl.Result{Requeue: true})) finalMachine := &clusterv1.MachineList{} diff --git a/controlplane/kubeadm/internal/control_plane.go b/controlplane/kubeadm/internal/control_plane.go index 405d62f278be..0a140960afd6 100644 --- a/controlplane/kubeadm/internal/control_plane.go +++ b/controlplane/kubeadm/internal/control_plane.go @@ -93,24 +93,6 @@ func (c *ControlPlane) EtcdImageData() (string, string) { return "", "" } -// MachinesNeedingRollout return a list of machines that need to be rolled out due to configuration changes. -// -// NOTE: Expiration of the spec.UpgradeAfter value forces inclusion of all the machines in this set even if -// no changes have been made to the KubeadmControlPlane. -func (c *ControlPlane) MachinesNeedingRollout() FilterableMachineCollection { - now := metav1.Now() - if c.KCP.Spec.UpgradeAfter != nil && c.KCP.Spec.UpgradeAfter.Before(&now) { - return c.Machines.AnyFilter( - machinefilters.Not(machinefilters.MatchesConfigurationHash(c.SpecHash())), - machinefilters.OlderThan(c.KCP.Spec.UpgradeAfter), - ) - } - - return c.Machines.Filter( - machinefilters.Not(machinefilters.MatchesConfigurationHash(c.SpecHash())), - ) -} - // MachineInFailureDomainWithMostMachines returns the first matching failure domain with machines that has the most control-plane machines on it. func (c *ControlPlane) MachineInFailureDomainWithMostMachines(machines FilterableMachineCollection) (*clusterv1.Machine, error) { fd := c.FailureDomainWithMostMachines(machines) diff --git a/controlplane/kubeadm/internal/control_plane_test.go b/controlplane/kubeadm/internal/control_plane_test.go index 17adcbfd62a3..a411e98a6325 100644 --- a/controlplane/kubeadm/internal/control_plane_test.go +++ b/controlplane/kubeadm/internal/control_plane_test.go @@ -18,10 +18,10 @@ package internal import ( "testing" - "time" . "github.com/onsi/ginkgo" . "github.com/onsi/gomega" + corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" @@ -86,89 +86,6 @@ var _ = Describe("Control Plane", func() { }) }) - Describe("MachinesNeedingUpgrade", func() { - Context("With no machines", func() { - It("should return no machines", func() { - Expect(controlPlane.MachinesNeedingRollout()).To(HaveLen(0)) - }) - }) - - Context("With machines", func() { - BeforeEach(func() { - controlPlane.KCP.Spec.Version = "2" - controlPlane.Machines = FilterableMachineCollection{ - "machine-1": machine("machine-1", withHash(controlPlane.SpecHash())), - "machine-2": machine("machine-2", withHash(controlPlane.SpecHash())), - "machine-3": machine("machine-3", withHash(controlPlane.SpecHash())), - } - }) - - Context("That have an old configuration", func() { - JustBeforeEach(func() { - controlPlane.Machines.Insert(machine("machine-4", withHash(controlPlane.SpecHash()+"outdated"))) - }) - It("should return some machines", func() { - Expect(controlPlane.MachinesNeedingRollout()).To(HaveLen(1)) - }) - }) - - Context("That have an up-to-date configuration", func() { - year := 2000 - BeforeEach(func() { - controlPlane.Machines = FilterableMachineCollection{ - "machine-1": machine("machine-1", - withCreationTimestamp(metav1.Time{Time: time.Date(year-1, 0, 0, 0, 0, 0, 0, time.UTC)}), - withHash(controlPlane.SpecHash())), - "machine-2": machine("machine-2", - withCreationTimestamp(metav1.Time{Time: time.Date(year, 0, 0, 0, 0, 0, 0, time.UTC)}), - withHash(controlPlane.SpecHash())), - "machine-3": machine("machine-3", - withCreationTimestamp(metav1.Time{Time: time.Date(year+1, 0, 0, 0, 0, 0, 0, time.UTC)}), - withHash(controlPlane.SpecHash())), - } - }) - - Context("That has no upgradeAfter value set", func() { - It("should return no machines", func() { - Expect(controlPlane.MachinesNeedingRollout()).To(HaveLen(0)) - }) - }) - - Context("That has an upgradeAfter value set", func() { - Context("That is in the future", func() { - BeforeEach(func() { - future := time.Date(year+1000, 0, 0, 0, 0, 0, 0, time.UTC) - controlPlane.KCP.Spec.UpgradeAfter = &metav1.Time{Time: future} - }) - It("should return no machines", func() { - Expect(controlPlane.MachinesNeedingRollout()).To(HaveLen(0)) - }) - }) - - Context("That is in the past", func() { - Context("That is before machine creation time", func() { - JustBeforeEach(func() { - controlPlane.KCP.Spec.UpgradeAfter = &metav1.Time{Time: time.Date(year-2, 0, 0, 0, 0, 0, 0, time.UTC)} - }) - It("should return no machines", func() { - Expect(controlPlane.MachinesNeedingRollout()).To(HaveLen(0)) - }) - }) - - Context("That is after machine creation time", func() { - JustBeforeEach(func() { - controlPlane.KCP.Spec.UpgradeAfter = &metav1.Time{Time: time.Date(year, 1, 0, 0, 0, 0, 0, time.UTC)} - }) - It("should return all machines older than this date machines", func() { - Expect(controlPlane.MachinesNeedingRollout()).To(HaveLen(2)) - }) - }) - }) - }) - }) - }) - }) - Describe("Generating components", func() { Context("That is after machine creation time", func() { BeforeEach(func() { @@ -211,9 +128,3 @@ func withFailureDomain(fd string) machineOpt { m.Spec.FailureDomain = &fd } } - -func withHash(hash string) machineOpt { - return func(m *clusterv1.Machine) { - m.SetLabels(map[string]string{controlplanev1.KubeadmControlPlaneHashLabelKey: hash}) - } -} diff --git a/controlplane/kubeadm/internal/machinefilters/machine_filters.go b/controlplane/kubeadm/internal/machinefilters/machine_filters.go index 69d4e70e6dfd..0bed1f8431d8 100644 --- a/controlplane/kubeadm/internal/machinefilters/machine_filters.go +++ b/controlplane/kubeadm/internal/machinefilters/machine_filters.go @@ -17,14 +17,21 @@ limitations under the License. package machinefilters import ( + "context" + "encoding/json" + "reflect" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/selection" - "sigs.k8s.io/cluster-api/util/conditions" - clusterv1 "sigs.k8s.io/cluster-api/api/v1alpha3" + bootstrapv1 "sigs.k8s.io/cluster-api/bootstrap/kubeadm/api/v1alpha3" + kubeadmv1 "sigs.k8s.io/cluster-api/bootstrap/kubeadm/types/v1beta1" + "sigs.k8s.io/cluster-api/controllers/external" controlplanev1 "sigs.k8s.io/cluster-api/controlplane/kubeadm/api/v1alpha3" "sigs.k8s.io/cluster-api/util" + "sigs.k8s.io/cluster-api/util/conditions" + "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" ) @@ -197,3 +204,146 @@ func ControlPlaneSelectorForCluster(clusterName string) labels.Selector { must(labels.NewRequirement(clusterv1.MachineControlPlaneLabelName, selection.Exists, []string{})), ) } + +// MatchesKCPConfiguration returns a filter to find all machines that matches with KCP config and do not require any rollout. +// Kubernetes version, infrastructure template, and KubeadmConfig field need to be equivalent. +func MatchesKCPConfiguration(ctx context.Context, c client.Client, kcp controlplanev1.KubeadmControlPlane, cluster clusterv1.Cluster) func(machine *clusterv1.Machine) bool { + return And( + MatchesKubernetesVersion(kcp.Spec.Version), + MatchesKubeadmBootstrapConfig(ctx, c, kcp, cluster), + MatchesTemplateClonedFrom(ctx, c, kcp), + ) +} + +// MatchesTemplateClonedFrom returns a filter to find all machines that match a given KCP infra template. +func MatchesTemplateClonedFrom(ctx context.Context, c client.Client, kcp controlplanev1.KubeadmControlPlane) Func { + return func(machine *clusterv1.Machine) bool { + if machine == nil { + return false + } + infraObj, err := external.Get(ctx, c, &machine.Spec.InfrastructureRef, kcp.Namespace) + // Return true here because failing to get infrastructure machine should not be considered as unmatching. + if err != nil { + return true + } + + clonedFromName, ok1 := infraObj.GetAnnotations()[clusterv1.TemplateClonedFromNameAnnotation] + clonedFromGroupKind, ok2 := infraObj.GetAnnotations()[clusterv1.TemplateClonedFromGroupKindAnnotation] + if !ok1 || !ok2 { + // All kcp cloned infra machines should have this annotation. + // Missing the annotation may be due to older version machines or adopted machines. + // Should not be considered as mismatch. + return true + } + + // Check if the machine's infrastructure reference has been created from the current KCP infrastructure template. + if clonedFromName != kcp.Spec.InfrastructureTemplate.Name || + clonedFromGroupKind != kcp.Spec.InfrastructureTemplate.GroupVersionKind().GroupKind().String() { + return false + } + return true + } +} + +// MatchesKubernetesVersion returns a filter to find all machines that match a given Kubernetes version. +func MatchesKubernetesVersion(kubernetesVersion string) Func { + return func(machine *clusterv1.Machine) bool { + if machine == nil { + return false + } + if machine.Spec.Version == nil { + return false + } + return *machine.Spec.Version == kubernetesVersion + } +} + +// MatchesKubeadmBootstrapConfig checks if machine's KubeadmConfigSpec is equivalent with KCP's KubeadmConfigSpec. +func MatchesKubeadmBootstrapConfig(ctx context.Context, c client.Client, kcp controlplanev1.KubeadmControlPlane, cluster clusterv1.Cluster) Func { + return func(machine *clusterv1.Machine) bool { + if machine == nil { + return false + } + bootstrapRef := machine.Spec.Bootstrap.ConfigRef + if bootstrapRef == nil { + // Missing bootstrap reference should not be considered as unmatching. + // This is a safety precaution to avoid selecting machines that are broken, which in the future should be remediated separately. + return true + } + + bootstrapObj := &bootstrapv1.KubeadmConfig{} + if err := c.Get(ctx, client.ObjectKey{Name: bootstrapRef.Name, Namespace: machine.Namespace}, bootstrapObj); err != nil { + // Return true here because failing to get KubeadmConfig should not be considered as unmatching. + // This is a safety precaution to avoid rolling out machines if the client or the api-server is misbehaving. + return true + } + + kcpConfigSpecLocal := kcp.Spec.KubeadmConfigSpec.DeepCopy() + + // Machine's init configuration is nil when machine is the control plane is already initialized. + if bootstrapObj.Spec.InitConfiguration == nil { + kcpConfigSpecLocal.InitConfiguration = nil + } + + // Machine's join configuration is nil when a machine is the first machine in the control plane. + if bootstrapObj.Spec.JoinConfiguration == nil { + kcpConfigSpecLocal.JoinConfiguration = nil + } else if kcpConfigSpecLocal.JoinConfiguration == nil { + // If KCP join configuration is not present, set machine join configuration to nil (nothing can trigger rollout here). + bootstrapObj.Spec.JoinConfiguration = nil + } + + // Clear up the TypeMeta information from the comparison. Today, the kubeadm types are embedded in CABPK and KCP types don't carry this information. + if bootstrapObj.Spec.InitConfiguration != nil && kcpConfigSpecLocal.InitConfiguration != nil { + bootstrapObj.Spec.InitConfiguration.TypeMeta = kcpConfigSpecLocal.InitConfiguration.TypeMeta + } + if bootstrapObj.Spec.JoinConfiguration != nil && kcpConfigSpecLocal.JoinConfiguration != nil { + bootstrapObj.Spec.JoinConfiguration.TypeMeta = kcpConfigSpecLocal.JoinConfiguration.TypeMeta + } + + // Machines that have KubeadmClusterConfigurationAnnotation will have to match with KCP ClusterConfiguration. + // If the annotation is not present (machine is either old or adopted), we won't roll out on any possible changes + // made in KCP's ClusterConfiguration given that we don't have enough information to make a decision. + // Users should use KCP.Spec.UpgradeAfter field to force a rollout in this case. + machineClusterConfigStr, ok := machine.GetAnnotations()[controlplanev1.KubeadmClusterConfigurationAnnotation] + if ok { + machineClusterConfig := &kubeadmv1.ClusterConfiguration{} + // ClusterConfiguration annotation is not correct, only solution is to rollout. + if err := json.Unmarshal([]byte(machineClusterConfigStr), machineClusterConfig); err != nil { + return false + } + if !reflect.DeepEqual(machineClusterConfig, kcp.Spec.KubeadmConfigSpec.ClusterConfiguration) { + return false + } + } + + // KCP ClusterConfiguration will only be compared with a machine's ClusterConfiguration annotation. + // To skip ClusterConfiguration merge during SemanticMerge(), set both Machine's and KCP's ClusterConfigurations to nil here. + kcpConfigSpecLocal.ClusterConfiguration = nil + bootstrapObj.Spec.ClusterConfiguration = nil + + // Machine's JoinConfiguration Discovery is set to an empty object by KubeadmConfig controller if KCP is Discovery is nil, also + // adopted machines may have an unrelated discovery setting so ignore discovery completely when comparing JoinConfigurations. + emptyDiscovery := kubeadmv1.Discovery{} + if kcpConfigSpecLocal.JoinConfiguration != nil { + kcpConfigSpecLocal.JoinConfiguration.Discovery = emptyDiscovery + } + if bootstrapObj.Spec.JoinConfiguration != nil { + bootstrapObj.Spec.JoinConfiguration.Discovery = emptyDiscovery + } + + // Machine's JoinConfiguration ControlPlane is set to an empty object by KubeadmConfig controller if KCP is ControlPlane is nil. + // Set Machine's ControlPlane to nil to avoid + if kcpConfigSpecLocal.JoinConfiguration != nil && kcpConfigSpecLocal.JoinConfiguration.ControlPlane == nil { + bootstrapObj.Spec.JoinConfiguration.ControlPlane = nil + } + + // If KCP's join NodeRegistration is empty, set machine's node registration to empty as no changes should trigger rollout. + emptyNodeRegistration := kubeadmv1.NodeRegistrationOptions{} + if kcpConfigSpecLocal.JoinConfiguration != nil && reflect.DeepEqual(kcpConfigSpecLocal.JoinConfiguration.NodeRegistration, emptyNodeRegistration) { + bootstrapObj.Spec.JoinConfiguration.NodeRegistration = emptyNodeRegistration + } + + return reflect.DeepEqual(bootstrapObj.Spec, *kcpConfigSpecLocal) + } +}