diff --git a/changelogs/unreleased/6479-allenxu404 b/changelogs/unreleased/6479-allenxu404 new file mode 100644 index 00000000000..559fe2046af --- /dev/null +++ b/changelogs/unreleased/6479-allenxu404 @@ -0,0 +1 @@ +Add restore finalizer to clean up external resources \ No newline at end of file diff --git a/pkg/cmd/cli/uninstall/uninstall.go b/pkg/cmd/cli/uninstall/uninstall.go index 884ba0312a4..79ed048eb32 100644 --- a/pkg/cmd/cli/uninstall/uninstall.go +++ b/pkg/cmd/cli/uninstall/uninstall.go @@ -35,6 +35,7 @@ import ( "k8s.io/apimachinery/pkg/util/wait" kbclient "sigs.k8s.io/controller-runtime/pkg/client" + velerov1api "github.com/vmware-tanzu/velero/pkg/apis/velero/v1" "github.com/vmware-tanzu/velero/pkg/client" "github.com/vmware-tanzu/velero/pkg/cmd" "github.com/vmware-tanzu/velero/pkg/cmd/cli" @@ -159,6 +160,12 @@ func Run(ctx context.Context, kbClient kbclient.Client, namespace string) error } func deleteNamespace(ctx context.Context, kbClient kbclient.Client, namespace string) error { + // delete resources with finalizer attached first to ensure finalizer can be handled by corresponding controller and resources can be deleted successfully before controller's pod is deleted. + // otherwise the process of deleting namespace will get stuck in deleting those resources forever. + if err := deleteResourcesWithFinalizer(ctx, kbClient, namespace); err != nil { + return errors.Wrap(err, "Fail to remove finalizer from restores") + } + ns := &corev1.Namespace{} key := kbclient.ObjectKey{Name: namespace} if err := kbClient.Get(ctx, key, ns); err != nil { @@ -176,7 +183,7 @@ func deleteNamespace(ctx context.Context, kbClient kbclient.Client, namespace st } return err } - + fmt.Println() fmt.Printf("Waiting for velero namespace %q to be deleted\n", namespace) ctx, cancel := context.WithCancel(ctx) defer cancel() @@ -203,3 +210,48 @@ func deleteNamespace(ctx context.Context, kbClient kbclient.Client, namespace st fmt.Printf("Velero namespace %q deleted\n", namespace) return nil } + +func deleteResourcesWithFinalizer(ctx context.Context, kbClient kbclient.Client, namespace string) error { + // delete restores + restoreList := &velerov1api.RestoreList{} + if err := kbClient.List(ctx, restoreList, &kbclient.ListOptions{Namespace: namespace}); err != nil { + return err + } + + for i := range restoreList.Items { + if err := kbClient.Delete(ctx, &restoreList.Items[i]); err != nil { + if apierrors.IsNotFound(err) { + continue + } + return err + } + } + + fmt.Println("Waiting for resource with finalizer attached to be deleted") + ctx, cancel := context.WithCancel(ctx) + defer cancel() + + var err error + checkFunc := func() { + restoreList := &velerov1api.RestoreList{} + if err = kbClient.List(ctx, restoreList, &kbclient.ListOptions{Namespace: namespace}); err != nil { + cancel() + return + } + + if len(restoreList.Items) > 0 { + fmt.Print(".") + } else { + cancel() + return + } + } + + // wait until all the restores are deleted + wait.Until(checkFunc, 100*time.Millisecond, ctx.Done()) + if err != nil { + return err + } + + return nil +} diff --git a/pkg/controller/restore_controller.go b/pkg/controller/restore_controller.go index 923b33bdfeb..b118781eb3d 100644 --- a/pkg/controller/restore_controller.go +++ b/pkg/controller/restore_controller.go @@ -30,6 +30,7 @@ import ( "github.com/pkg/errors" "github.com/sirupsen/logrus" corev1api "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/labels" "k8s.io/apimachinery/pkg/types" @@ -38,6 +39,7 @@ import ( "k8s.io/utils/clock" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" "github.com/vmware-tanzu/velero/internal/hook" "github.com/vmware-tanzu/velero/internal/resourcemodifiers" @@ -86,6 +88,8 @@ var nonRestorableResources = []string{ "backuprepositories.velero.io", } +var ExternalResourcesFinalizer = "restores.velero.io/external-resources-finalizer" + type restoreReconciler struct { ctx context.Context namespace string @@ -157,10 +161,58 @@ func (r *restoreReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ct restore := &api.Restore{} err := r.kbClient.Get(ctx, client.ObjectKey{Namespace: req.Namespace, Name: req.Name}, restore) if err != nil { - log.Infof("Fail to get restore %s: %s", req.NamespacedName.String(), err.Error()) + if apierrors.IsNotFound(err) { + log.Debugf("restore[%s] not found", req.Name) + return ctrl.Result{}, nil + } + + log.Errorf("Fail to get restore %s: %s", req.NamespacedName.String(), err.Error()) return ctrl.Result{}, err } + // deal with finalizer + if !restore.DeletionTimestamp.IsZero() { + // check the finalizer and run clean-up + if controllerutil.ContainsFinalizer(restore, ExternalResourcesFinalizer) { + if err := r.deleteExternalResources(restore); err != nil { + log.Errorf("fail to delete external resources: %s", err.Error()) + return ctrl.Result{}, err + } + // once finish clean-up, remove the finalizer from the restore so that the restore will be unlocked and deleted. + original := restore.DeepCopy() + controllerutil.RemoveFinalizer(restore, ExternalResourcesFinalizer) + if err := kubeutil.PatchResource(original, restore, r.kbClient); err != nil { + log.Errorf("fail to remove finalizer: %s", err.Error()) + return ctrl.Result{}, err + } + return ctrl.Result{}, nil + } else { + log.Error("DeletionTimestamp is marked but can't find the expected finalizer") + return ctrl.Result{}, nil + } + } + + // add finalizer + if restore.DeletionTimestamp.IsZero() && !controllerutil.ContainsFinalizer(restore, ExternalResourcesFinalizer) { + original := restore.DeepCopy() + controllerutil.AddFinalizer(restore, ExternalResourcesFinalizer) + if err := kubeutil.PatchResource(original, restore, r.kbClient); err != nil { + log.Errorf("fail to add finalizer: %s", err.Error()) + return ctrl.Result{}, err + } + } + + switch restore.Status.Phase { + case "", api.RestorePhaseNew: + // only process new restores + default: + r.logger.WithFields(logrus.Fields{ + "restore": kubeutil.NamespaceAndName(restore), + "phase": restore.Status.Phase, + }).Debug("Restore is not handled") + return ctrl.Result{}, nil + } + // store a copy of the original restore for creating patch original := restore.DeepCopy() @@ -213,6 +265,7 @@ func (r *restoreReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ct restore.Status.CompletionTimestamp = &metav1.Time{Time: r.clock.Now()} } log.Debug("Updating restore's final status") + if err = kubeutil.PatchResource(original, restore, r.kbClient); err != nil { log.WithError(errors.WithStack(err)).Info("Error updating restore's final status") // No need to re-enqueue here, because restore's already set to InProgress before. @@ -224,21 +277,6 @@ func (r *restoreReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ct func (r *restoreReconciler) SetupWithManager(mgr ctrl.Manager) error { return ctrl.NewControllerManagedBy(mgr). - WithEventFilter(kubeutil.NewCreateEventPredicate(func(obj client.Object) bool { - restore := obj.(*api.Restore) - - switch restore.Status.Phase { - case "", api.RestorePhaseNew: - // only process new restores - return true - default: - r.logger.WithFields(logrus.Fields{ - "restore": kubeutil.NamespaceAndName(restore), - "phase": restore.Status.Phase, - }).Debug("Restore is not new, skipping") - return false - } - })). For(&api.Restore{}). Complete(r) } @@ -623,6 +661,40 @@ func (r *restoreReconciler) updateTotalRestoreMetric() { }() } +// deleteExternalResources deletes all the external resources related to the restore +func (r *restoreReconciler) deleteExternalResources(restore *api.Restore) error { + r.logger.Infof("Finalizer is deleting external resources, backup: %s", restore.Spec.BackupName) + + if restore.Spec.BackupName == "" { + return nil + } + + backupInfo, err := r.fetchBackupInfo(restore.Spec.BackupName) + if err != nil { + return errors.Wrap(err, fmt.Sprintf("can't get backup info, backup: %s", restore.Spec.BackupName)) + } + + // if storage locations is read-only, skip deletion + if backupInfo.location.Spec.AccessMode == api.BackupStorageLocationAccessModeReadOnly { + return nil + } + + // delete restore files in object storage + pluginManager := r.newPluginManager(r.logger) + defer pluginManager.CleanupClients() + + backupStore, err := r.backupStoreGetter.Get(backupInfo.location, pluginManager, r.logger) + if err != nil { + return errors.Wrap(err, fmt.Sprintf("can't get backupStore, backup: %s", restore.Spec.BackupName)) + } + + if err = backupStore.DeleteRestore(restore.Name); err != nil { + return errors.Wrap(err, fmt.Sprintf("can't delete restore files in object storage, backup: %s", restore.Spec.BackupName)) + } + + return nil +} + func putResults(restore *api.Restore, results map[string]results.Result, backupStore persistence.BackupStore) error { buf := new(bytes.Buffer) gzw := gzip.NewWriter(buf) diff --git a/pkg/controller/restore_controller_test.go b/pkg/controller/restore_controller_test.go index 2ed22dcec93..e36b23204ea 100644 --- a/pkg/controller/restore_controller_test.go +++ b/pkg/controller/restore_controller_test.go @@ -158,16 +158,10 @@ func TestProcessQueueItemSkips(t *testing.T) { expectError bool }{ { - name: "invalid key returns error", - namespace: "invalid", - restoreName: "key/value", - expectError: true, - }, - { - name: "missing restore returns error", + name: "missing restore returns nil", namespace: "foo", restoreName: "bar", - expectError: true, + expectError: false, }, } @@ -237,6 +231,7 @@ func TestRestoreReconcile(t *testing.T) { backupStoreGetBackupContentsErr error putRestoreLogErr error expectedFinalPhase string + addValidFinalizer bool }{ { name: "restore with both namespace in both includedNamespaces and excludedNamespaces fails validation", @@ -394,6 +389,22 @@ func TestRestoreReconcile(t *testing.T) { backupStoreGetBackupContentsErr: errors.New("Couldn't download backup"), backup: defaultBackup().StorageLocation("default").Result(), }, + { + name: "restore attached with an expected finalizer gets cleaned up successfully", + location: defaultStorageLocation, + restore: NewRestore("foo", "bar", "backup-1", "ns-1", "", velerov1api.RestorePhaseCompleted).ObjectMeta(builder.WithFinalizers(ExternalResourcesFinalizer), builder.WithDeletionTimestamp(timestamp.Time)).Result(), + backup: defaultBackup().StorageLocation("default").Result(), + expectedErr: false, + addValidFinalizer: true, + }, + { + name: "restore attached with an unknown finalizer will be skipped", + location: defaultStorageLocation, + restore: NewRestore("foo", "bar", "backup-1", "ns-1", "", velerov1api.RestorePhaseCompleted).ObjectMeta(builder.WithFinalizers("restores.velero.io/unknown-finalizer"), builder.WithDeletionTimestamp(timestamp.Time)).Result(), + backup: defaultBackup().StorageLocation("default").Result(), + expectedErr: false, + addValidFinalizer: false, + }, } formatFlag := logging.FormatText @@ -486,6 +497,10 @@ func TestRestoreReconcile(t *testing.T) { pluginManager.On("CleanupClients") } + if test.addValidFinalizer { + backupStore.On("DeleteRestore", test.restore.Name).Return(nil) + } + //err = r.processQueueItem(key) _, err = r.Reconcile(context.Background(), ctrl.Request{NamespacedName: types.NamespacedName{ Namespace: test.restore.Namespace, @@ -539,7 +554,9 @@ func TestRestoreReconcile(t *testing.T) { assert.Zero(t, restorer.calledWithArg) return } - assert.Equal(t, 1, len(restorer.Calls)) + if !test.addValidFinalizer { + assert.Equal(t, 1, len(restorer.Calls)) + } // validate Patch call 2 (setting phase)