From e9406d21bc7dbe303e733f2c80d860ed52a5e944 Mon Sep 17 00:00:00 2001 From: Lei Li Date: Thu, 12 Oct 2023 00:53:56 -0700 Subject: [PATCH] peerpod-ctrl: check and clean old PeerPod objects For Create and Update events without DeletionTimestamp, check if there is old PeerPod object owned by the same Pod, and delete it to clean the dangling Pod VM Fixes #1510 Signed-off-by: Lei Li Signed-off-by: Lei Li --- .../controllers/peerpod_controller.go | 25 +++++++++++++++++++ test/e2e/rolling_update_test.go | 10 ++++---- 2 files changed, 30 insertions(+), 5 deletions(-) diff --git a/peerpod-ctrl/controllers/peerpod_controller.go b/peerpod-ctrl/controllers/peerpod_controller.go index 27ecf0740..6c217c69c 100644 --- a/peerpod-ctrl/controllers/peerpod_controller.go +++ b/peerpod-ctrl/controllers/peerpod_controller.go @@ -82,6 +82,25 @@ func (r *PeerPodReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ct } if pp.ObjectMeta.GetDeletionTimestamp() == nil { // TODO: consider filter events + // Create or Update events without DeletionTimestamp + // Check if existing old PeerPod owned by current Pod, + // and delete it to clean the dangling VM. + // It may caused by CAA update, unplanned down or crashes + ppList := confidentialcontainersorgv1alpha1.PeerPodList{} + if err := r.List(ctx, &ppList); err != nil { + logger.Info("Failed to get PeerPod list", "error", err) + return ctrl.Result{}, err + } + + for _, item := range ppList.Items { + if isOldPeerPod(item, pp) { + logger.Info("Found old PeerPod object owned by the same Pod", "old PeerPod", item) + if err := r.Delete(ctx, &item); err != nil { + logger.Info("Failed to delete old PeerPod", "error", err) + } + } + } + return ctrl.Result{}, nil } @@ -154,3 +173,9 @@ func SetProvider() (cloud.Provider, error) { return nil, fmt.Errorf("cloudmgr: %s cloud provider not supported", cloudName) } + +func isOldPeerPod(pp, cur confidentialcontainersorgv1alpha1.PeerPod) bool { + return pp.OwnerReferences[0].UID == cur.OwnerReferences[0].UID && // Same owner + pp.UID != cur.UID && // Not cur itself + pp.CreationTimestamp.Before(&cur.CreationTimestamp) // Created before cur +} diff --git a/test/e2e/rolling_update_test.go b/test/e2e/rolling_update_test.go index 7dc47383a..cc2b620de 100644 --- a/test/e2e/rolling_update_test.go +++ b/test/e2e/rolling_update_test.go @@ -27,7 +27,7 @@ import ( ) const WAIT_DEPLOYMENT_AVAILABLE_TIMEOUT = time.Second * 180 -const OLD_VM_DELETION_TIMEOUT = time.Second * 15 +const OLD_VM_DELETION_TIMEOUT = time.Second * 60 func doTestCaaDaemonsetRollingUpdate(t *testing.T, assert RollingUpdateAssert) { runtimeClassName := "kata-remote" @@ -229,6 +229,10 @@ func doTestCaaDaemonsetRollingUpdate(t *testing.T, assert RollingUpdateAssert) { t.Fatal(fmt.Errorf("verify pod is not running")) } + time.Sleep(OLD_VM_DELETION_TIMEOUT) + log.Info("Verify old VM instances have been deleted:") + assert.VerifyOldVmDeleted(t) + return ctx }). Teardown(func(ctx context.Context, t *testing.T, cfg *envconf.Config) context.Context { @@ -252,10 +256,6 @@ func doTestCaaDaemonsetRollingUpdate(t *testing.T, assert RollingUpdateAssert) { t.Fatal(err) } - time.Sleep(OLD_VM_DELETION_TIMEOUT) - log.Info("Verify old VM instances have been deleted:") - assert.VerifyOldVmDeleted(t) - return ctx }).Feature()