Skip to content

Commit

Permalink
[release-1.7] 🌱 clusterctl: always run crd migration if possible to r…
Browse files Browse the repository at this point in the history
…educe conversion webhook usage (#11513)

* clusterctl: always run crd migration if possible to reduce conversion webhook usage

If objects are still stored in the old version, every time an object is requested
the kube-apiserver will call the conversion webhook. Instead of lots of conversion
webhook calls we once do the migration if possible.

Also increases the timeout used for the migrations: Before running CRD migration
clusterctl usually upgrades cert-manager which may lead to updating Certificates and
rolling out new certificates to our controllers. There is a chance that this can take
up to 90s in which conversion, validating or mutatingwebhooks may be unavailable.
Because the migration gets run more aggresively during upgrades this also increases
the relevant timeouts.

* review fixes

* review fixes

---------

Co-authored-by: Christian Schlotter <[email protected]>
  • Loading branch information
k8s-infra-cherrypick-robot and chrischdi authored Dec 2, 2024
1 parent 4a54410 commit b13114e
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 82 deletions.
43 changes: 28 additions & 15 deletions cmd/clusterctl/client/cluster/crd_migration.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ import (
"k8s.io/apimachinery/pkg/runtime/schema"
"k8s.io/apimachinery/pkg/util/rand"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/apimachinery/pkg/util/wait"
"sigs.k8s.io/controller-runtime/pkg/client"

"sigs.k8s.io/cluster-api/cmd/clusterctl/internal/scheme"
Expand Down Expand Up @@ -83,12 +84,8 @@ func (m *crdMigrator) run(ctx context.Context, newCRD *apiextensionsv1.CustomRes

// Gets the list of version supported by the new CRD
newVersions := sets.Set[string]{}
servedVersions := sets.Set[string]{}
for _, version := range newCRD.Spec.Versions {
newVersions.Insert(version.Name)
if version.Served {
servedVersions.Insert(version.Name)
}
}

// Get the current CRD.
Expand All @@ -115,23 +112,22 @@ func (m *crdMigrator) run(ctx context.Context, newCRD *apiextensionsv1.CustomRes
}

currentStatusStoredVersions := sets.Set[string]{}.Insert(currentCRD.Status.StoredVersions...)
// If the new CRD still contains all current stored versions, nothing to do
// as no previous storage version will be dropped.
if servedVersions.HasAll(currentStatusStoredVersions.UnsortedList()...) {
// If the old CRD only contains its current storageVersion as storedVersion,
// nothing to do as all objects are already on the current storageVersion.
// Note: We want to migrate objects to new storage versions as soon as possible
// to prevent unnecessary conversion webhook calls.
if currentStatusStoredVersions.Len() == 1 && currentCRD.Status.StoredVersions[0] == currentStorageVersion {
log.V(2).Info("CRD migration check passed", "name", newCRD.Name)
return false, nil
}

// Otherwise a version that has been used as storage version will be dropped, so it is necessary to migrate all the
// objects and drop the storage version from the current CRD status before installing the new CRD.
// Ref https://kubernetes.io/docs/tasks/extend-kubernetes/custom-resources/custom-resource-definition-versioning/#writing-reading-and-updating-versioned-customresourcedefinition-objects
// Note: We are simply migrating all CR objects independent of the version in which they are actually stored in etcd.
// This way we can make sure that all CR objects are now stored in the current storage version.
// Alternatively, we would have to figure out which objects are stored in which version but this information is not
// exposed by the apiserver.
storedVersionsToDelete := currentStatusStoredVersions.Difference(servedVersions)
storedVersionsToPreserve := currentStatusStoredVersions.Intersection(servedVersions)
log.Info("CR migration required", "kind", newCRD.Spec.Names.Kind, "storedVersionsToDelete", strings.Join(sets.List(storedVersionsToDelete), ","), "storedVersionsToPreserve", strings.Join(sets.List(storedVersionsToPreserve), ","))
// Ref https://kubernetes.io/docs/tasks/extend-kubernetes/custom-resources/custom-resource-definition-versioning/#writing-reading-and-updating-versioned-customresourcedefinition-objects
storedVersionsToDelete := currentStatusStoredVersions.Delete(currentStorageVersion)
log.Info("CR migration required", "kind", newCRD.Spec.Names.Kind, "storedVersionsToDelete", strings.Join(sets.List(storedVersionsToDelete), ","), "storedVersionToPreserve", currentStorageVersion)

if err := m.migrateResourcesForCRD(ctx, currentCRD, currentStorageVersion); err != nil {
return false, err
Expand All @@ -157,7 +153,7 @@ func (m *crdMigrator) migrateResourcesForCRD(ctx context.Context, crd *apiextens

var i int
for {
if err := retryWithExponentialBackoff(ctx, newReadBackoff(), func(ctx context.Context) error {
if err := retryWithExponentialBackoff(ctx, newCRDMigrationBackoff(), func(ctx context.Context) error {
return m.Client.List(ctx, list, client.Continue(list.GetContinue()))
}); err != nil {
return errors.Wrapf(err, "failed to list %q", list.GetKind())
Expand All @@ -167,7 +163,7 @@ func (m *crdMigrator) migrateResourcesForCRD(ctx context.Context, crd *apiextens
obj := list.Items[i]

log.V(5).Info("Migrating", logf.UnstructuredToValues(obj)...)
if err := retryWithExponentialBackoff(ctx, newWriteBackoff(), func(ctx context.Context) error {
if err := retryWithExponentialBackoff(ctx, newCRDMigrationBackoff(), func(ctx context.Context) error {
return handleMigrateErr(m.Client.Update(ctx, &obj))
}); err != nil {
return errors.Wrapf(err, "failed to migrate %s/%s", obj.GetNamespace(), obj.GetName())
Expand Down Expand Up @@ -230,3 +226,20 @@ func storageVersionForCRD(crd *apiextensionsv1.CustomResourceDefinition) (string
}
return "", errors.Errorf("could not find storage version for CRD %q", crd.Name)
}

// newCRDMigrationBackoff creates a new API Machinery backoff parameter set suitable for use with crd migration operations.
// Clusterctl upgrades cert-manager right before doing CRD migration. This may lead to rollout of new certificates.
// The time between new certificate creation + injection into objects (CRD, Webhooks) and the new secrets getting propagated
// to the controller can be 60-90s, because the kubelet only periodically syncs secret contents to pods.
// During this timespan conversion, validating- or mutating-webhooks may be unavailable and cause a failure.
func newCRDMigrationBackoff() wait.Backoff {
// Return a exponential backoff configuration which returns durations for a total time of ~1m30s + some buffer.
// Example: 0, .25s, .6s, 1.1s, 1.8s, 2.7s, 4s, 6s, 9s, 12s, 17s, 25s, 35s, 49s, 69s, 97s, 135s
// Jitter is added as a random fraction of the duration multiplied by the jitter factor.
return wait.Backoff{
Duration: 250 * time.Millisecond,
Factor: 1.4,
Steps: 17,
Jitter: 0.1,
}
}
73 changes: 6 additions & 67 deletions cmd/clusterctl/client/cluster/crd_migration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ func Test_CRDMigrator(t *testing.T) {
wantIsMigrated: false,
},
{
name: "No-op if new CRD supports same versions",
name: "No-op if new CRD uses the same storage version",
currentCRD: &apiextensionsv1.CustomResourceDefinition{
ObjectMeta: metav1.ObjectMeta{Name: "foo"},
Spec: apiextensionsv1.CustomResourceDefinitionSpec{
Expand All @@ -90,7 +90,7 @@ func Test_CRDMigrator(t *testing.T) {
wantIsMigrated: false,
},
{
name: "No-op if new CRD adds a new versions",
name: "No-op if new CRD adds a new versions and stored versions is only the old storage version",
currentCRD: &apiextensionsv1.CustomResourceDefinition{
ObjectMeta: metav1.ObjectMeta{Name: "foo"},
Spec: apiextensionsv1.CustomResourceDefinitionSpec{
Expand All @@ -104,8 +104,8 @@ func Test_CRDMigrator(t *testing.T) {
ObjectMeta: metav1.ObjectMeta{Name: "foo"},
Spec: apiextensionsv1.CustomResourceDefinitionSpec{
Versions: []apiextensionsv1.CustomResourceDefinitionVersion{
{Name: "v1beta1", Storage: true, Served: true}, // v1beta1 is being added
{Name: "v1alpha1", Served: true}, // v1alpha1 still exists
{Name: "v1beta1", Storage: true, Served: false}, // v1beta1 is being added
{Name: "v1alpha1", Served: true}, // v1alpha1 still exists
},
},
},
Expand Down Expand Up @@ -133,7 +133,7 @@ func Test_CRDMigrator(t *testing.T) {
wantErr: true,
},
{
name: "Migrate CRs if their storage version is removed from the CRD",
name: "Migrate CRs if there are stored versions is not only the current storage version",
CRs: []unstructured.Unstructured{
{
Object: map[string]interface{}{
Expand Down Expand Up @@ -185,75 +185,14 @@ func Test_CRDMigrator(t *testing.T) {
Names: apiextensionsv1.CustomResourceDefinitionNames{Kind: "Foo", ListKind: "FooList"},
Versions: []apiextensionsv1.CustomResourceDefinitionVersion{
{Name: "v1", Storage: true, Served: true}, // v1 is being added
{Name: "v1beta1", Served: true}, // v1beta1 still there (required for migration)
{Name: "v1beta1", Served: true}, // v1beta1 still there
// v1alpha1 is being dropped
},
},
},
wantStoredVersions: []string{"v1beta1"}, // v1alpha1 should be dropped from current CRD's stored versions
wantIsMigrated: true,
},
{
name: "Migrate the CR if their storage version is no longer served by the CRD",
CRs: []unstructured.Unstructured{
{
Object: map[string]interface{}{
"apiVersion": "foo/v1beta1",
"kind": "Foo",
"metadata": map[string]interface{}{
"name": "cr1",
"namespace": metav1.NamespaceDefault,
},
},
},
{
Object: map[string]interface{}{
"apiVersion": "foo/v1beta1",
"kind": "Foo",
"metadata": map[string]interface{}{
"name": "cr2",
"namespace": metav1.NamespaceDefault,
},
},
},
{
Object: map[string]interface{}{
"apiVersion": "foo/v1beta1",
"kind": "Foo",
"metadata": map[string]interface{}{
"name": "cr3",
"namespace": metav1.NamespaceDefault,
},
},
},
},
currentCRD: &apiextensionsv1.CustomResourceDefinition{
ObjectMeta: metav1.ObjectMeta{Name: "foo"},
Spec: apiextensionsv1.CustomResourceDefinitionSpec{
Group: "foo",
Names: apiextensionsv1.CustomResourceDefinitionNames{Kind: "Foo", ListKind: "FooList"},
Versions: []apiextensionsv1.CustomResourceDefinitionVersion{
{Name: "v1beta1", Storage: true, Served: true},
{Name: "v1alpha1", Served: true},
},
},
Status: apiextensionsv1.CustomResourceDefinitionStatus{StoredVersions: []string{"v1beta1", "v1alpha1"}},
},
newCRD: &apiextensionsv1.CustomResourceDefinition{
ObjectMeta: metav1.ObjectMeta{Name: "foo"},
Spec: apiextensionsv1.CustomResourceDefinitionSpec{
Group: "foo",
Names: apiextensionsv1.CustomResourceDefinitionNames{Kind: "Foo", ListKind: "FooList"},
Versions: []apiextensionsv1.CustomResourceDefinitionVersion{
{Name: "v1", Storage: true, Served: true}, // v1 is being added
{Name: "v1beta1", Served: true}, // v1beta1 still there (required for migration)
{Name: "v1alpha1", Served: false}, // v1alpha1 is no longer being served (required for migration)
},
},
},
wantStoredVersions: []string{"v1beta1"}, // v1alpha1 should be dropped from current CRD's stored versions
wantIsMigrated: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
Expand Down

0 comments on commit b13114e

Please sign in to comment.