From e4bbad8b2dc5fdbb7a4135db3744e3cfc4e852f1 Mon Sep 17 00:00:00 2001 From: enxebre Date: Tue, 25 Jun 2024 11:03:52 +0200 Subject: [PATCH] Keep old user data for aws < 4.16 We currently need to keep old userdata secrets in AWS < 4.16 to prevent machineDeployments rollout from failing to delete old Machines. https://github.com/openshift/hypershift/blob/3efa23b932a59681346a7a432d349cfb6e44b13d/hypershift-operator/controllers/nodepool/nodepool_controller.go#L775-L777 https://github.com/kubernetes-sigs/cluster-api-provider-aws/pull/3805 We regress that behaviour here https://github.com/openshift/hypershift/pull/3969 This PR fixes that by statically checking the hc release version. --- .../nodepool/nodepool_controller.go | 47 +++++++ .../nodepool/nodepool_controller_test.go | 128 ++++++++++++++++-- 2 files changed, 162 insertions(+), 13 deletions(-) diff --git a/hypershift-operator/controllers/nodepool/nodepool_controller.go b/hypershift-operator/controllers/nodepool/nodepool_controller.go index 4514cf748a4..da861b9f195 100644 --- a/hypershift-operator/controllers/nodepool/nodepool_controller.go +++ b/hypershift-operator/controllers/nodepool/nodepool_controller.go @@ -3056,6 +3056,15 @@ func (r *secretJanitor) Reconcile(ctx context.Context, req reconcile.Request) (r return ctrl.Result{}, err } + shouldKeepOldUserData, err := r.shouldKeepOldUserData(ctx, hcluster) + if err != nil { + return ctrl.Result{}, err + } + if shouldKeepOldUserData { + log.V(3).Info("Skipping secretJanitor reconciliation and keeping old user data secret") + return ctrl.Result{}, nil + } + controlPlaneNamespace := manifests.HostedControlPlaneNamespace(hcluster.Namespace, hcluster.Name) expectedCoreConfigResources := expectedCoreConfigResourcesForHostedCluster(hcluster) releaseImage, err := r.getReleaseImage(ctx, hcluster, nodePool.Status.Version, nodePool.Spec.Release.Image) @@ -3128,6 +3137,44 @@ func (r *secretJanitor) Reconcile(ctx context.Context, req reconcile.Request) (r return ctrl.Result{}, cleanup(ctx, r.Client, secret) } +// shouldKeepOldUserData determines if the old user data should be kept. +// For AWS < 4.16, we keep the old userdata Secret so old Machines during rolled out can be deleted. +// Otherwise, deletion fails because of https://github.com/kubernetes-sigs/cluster-api-provider-aws/pull/3805. +// TODO (alberto): Drop this check when support for old versions without the fix is not needed anymore. +func (r *NodePoolReconciler) shouldKeepOldUserData(ctx context.Context, hc *hyperv1.HostedCluster) (bool, error) { + if hc.Spec.Platform.Type != hyperv1.AWSPlatform { + return false, nil + } + + // If there's a curren version in status, be conservative and assume that one is the one running CAPA. + releaseImage := hc.Spec.Release.Image + if hc.Status.Version != nil { + if len(hc.Status.Version.History) > 0 { + releaseImage = hc.Status.Version.History[0].Image + } + } + + pullSecretBytes, err := r.getPullSecretBytes(ctx, hc) + if err != nil { + return true, fmt.Errorf("failed to get pull secret bytes: %w", err) + } + + releaseInfo, err := r.ReleaseProvider.Lookup(ctx, releaseImage, pullSecretBytes) + if err != nil { + return true, fmt.Errorf("failed to lookup release image: %w", err) + } + hostedClusterVersion, err := semver.Parse(releaseInfo.Version()) + if err != nil { + return true, err + } + + if hostedClusterVersion.LT(semver.MustParse("4.16.0")) { + return true, nil + } + + return false, nil +} + func globalConfigString(hcluster *hyperv1.HostedCluster) (string, error) { // 1. - Reconcile conditions according to current state of the world. proxy := globalconfig.ProxyConfig() diff --git a/hypershift-operator/controllers/nodepool/nodepool_controller_test.go b/hypershift-operator/controllers/nodepool/nodepool_controller_test.go index 87677bd5228..b6ef4b27f0c 100644 --- a/hypershift-operator/controllers/nodepool/nodepool_controller_test.go +++ b/hypershift-operator/controllers/nodepool/nodepool_controller_test.go @@ -9,6 +9,7 @@ import ( "testing" "time" + "github.com/blang/semver" "github.com/go-logr/logr" "github.com/go-logr/zapr" "github.com/google/go-cmp/cmp" @@ -16,8 +17,20 @@ import ( . "github.com/onsi/gomega" "github.com/openshift/api/image/docker10" imagev1 "github.com/openshift/api/image/v1" + hyperv1 "github.com/openshift/hypershift/api/hypershift/v1beta1" + "github.com/openshift/hypershift/api/util/ipnet" + "github.com/openshift/hypershift/hypershift-operator/controllers/hostedcluster" + "github.com/openshift/hypershift/hypershift-operator/controllers/manifests" + ignserver "github.com/openshift/hypershift/ignition-server/controllers" + kvinfra "github.com/openshift/hypershift/kubevirtexternalinfra" + "github.com/openshift/hypershift/support/api" + "github.com/openshift/hypershift/support/releaseinfo" + fakereleaseprovider "github.com/openshift/hypershift/support/releaseinfo/fake" "github.com/openshift/hypershift/support/supportedversion" "github.com/openshift/hypershift/support/testutil" + "github.com/openshift/hypershift/support/thirdparty/library-go/pkg/image/dockerv1client" + "github.com/openshift/hypershift/support/upsert" + "github.com/openshift/hypershift/support/util/fakeimagemetadataprovider" "go.uber.org/zap/zaptest" corev1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" @@ -34,19 +47,6 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client/apiutil" "sigs.k8s.io/controller-runtime/pkg/client/fake" "sigs.k8s.io/controller-runtime/pkg/reconcile" - - hyperv1 "github.com/openshift/hypershift/api/hypershift/v1beta1" - "github.com/openshift/hypershift/api/util/ipnet" - "github.com/openshift/hypershift/hypershift-operator/controllers/hostedcluster" - "github.com/openshift/hypershift/hypershift-operator/controllers/manifests" - ignserver "github.com/openshift/hypershift/ignition-server/controllers" - kvinfra "github.com/openshift/hypershift/kubevirtexternalinfra" - "github.com/openshift/hypershift/support/api" - "github.com/openshift/hypershift/support/releaseinfo" - fakereleaseprovider "github.com/openshift/hypershift/support/releaseinfo/fake" - "github.com/openshift/hypershift/support/thirdparty/library-go/pkg/image/dockerv1client" - "github.com/openshift/hypershift/support/upsert" - "github.com/openshift/hypershift/support/util/fakeimagemetadataprovider" ) func TestIsUpdatingConfig(t *testing.T) { @@ -3601,3 +3601,105 @@ spec: }) } } + +func TestShouldKeepOldUserData(t *testing.T) { + pullSecret := &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{Name: "pull-secret", Namespace: "test"}, + Data: map[string][]byte{ + corev1.DockerConfigJsonKey: []byte("whatever"), + }, + } + + testCases := []struct { + name string + hc *hyperv1.HostedCluster + releaseProvider releaseinfo.Provider + expected bool + }{ + { + name: "hosted cluster is not aws", + hc: &hyperv1.HostedCluster{ + TypeMeta: metav1.TypeMeta{}, + ObjectMeta: metav1.ObjectMeta{ + Namespace: pullSecret.Namespace, + }, + Spec: hyperv1.HostedClusterSpec{ + PullSecret: corev1.LocalObjectReference{ + Name: pullSecret.Name, + }, + Platform: hyperv1.PlatformSpec{ + Type: hyperv1.AgentPlatform, + }, + Release: hyperv1.Release{ + Image: "fake-release-image", + }, + }, + Status: hyperv1.HostedClusterStatus{}, + }, + expected: false, + }, + { + name: "hosted cluster is less than 4.16", + hc: &hyperv1.HostedCluster{ + TypeMeta: metav1.TypeMeta{}, + ObjectMeta: metav1.ObjectMeta{ + Namespace: pullSecret.Namespace, + }, + Spec: hyperv1.HostedClusterSpec{ + PullSecret: corev1.LocalObjectReference{ + Name: pullSecret.Name, + }, + Platform: hyperv1.PlatformSpec{ + Type: hyperv1.AWSPlatform, + }, + Release: hyperv1.Release{ + Image: "fake-release-image", + }, + }, + Status: hyperv1.HostedClusterStatus{}, + }, + releaseProvider: &fakereleaseprovider.FakeReleaseProvider{Version: semver.MustParse("4.15.0").String()}, + expected: false, + }, + { + name: "hosted cluster is equal or greater than 4.16", + hc: &hyperv1.HostedCluster{ + TypeMeta: metav1.TypeMeta{}, + ObjectMeta: metav1.ObjectMeta{ + Namespace: pullSecret.Namespace, + }, + Spec: hyperv1.HostedClusterSpec{ + PullSecret: corev1.LocalObjectReference{ + Name: pullSecret.Name, + }, + Platform: hyperv1.PlatformSpec{ + Type: hyperv1.AWSPlatform, + }, + Release: hyperv1.Release{ + Image: "fake-release-image", + }, + }, + }, + releaseProvider: &fakereleaseprovider.FakeReleaseProvider{Version: semver.MustParse("4.16.0").String()}, + expected: false, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + g := NewWithT(t) + c := fake.NewClientBuilder().WithScheme(api.Scheme).WithObjects( + pullSecret, + ).Build() + + r := &NodePoolReconciler{ + Client: c, + ReleaseProvider: &fakereleaseprovider.FakeReleaseProvider{Version: semver.MustParse("4.17.0").String()}, + } + + shouldKeepOldUserData, err := r.shouldKeepOldUserData(context.Background(), tc.hc) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(shouldKeepOldUserData).To(Equal(tc.expected)) + }) + } +}