Skip to content

Commit

Permalink
fix: improve restore stability (#996)
Browse files Browse the repository at this point in the history
* fix: restore reconciler

Signed-off-by: Anatolii Bazko <[email protected]>
  • Loading branch information
tolusha authored Aug 10, 2021
1 parent bbb2b47 commit 7ff7399
Show file tree
Hide file tree
Showing 4 changed files with 32 additions and 14 deletions.
32 changes: 24 additions & 8 deletions controllers/checlusterrestore/backup_data_restorer.go
Original file line number Diff line number Diff line change
Expand Up @@ -124,14 +124,25 @@ func cleanPreviousInstallation(rctx *RestoreContext, dataDir string) (bool, erro
}

// Delete Che CR to stop operator from dealing with current installation
err := rctx.r.client.Delete(context.TODO(), rctx.cheCR)
if err == nil {
// Che CR is marked for deletion, but actually still exists.
// Wait for finalizers and actual resource deletion (not found expected).
logrus.Info("Restore: Waiting for old Che CR finalizers to be completed")
return false, nil
} else if !errors.IsNotFound(err) {
actualCheCR, cheCRCount, err := util.FindCheCRinNamespace(rctx.r.client, rctx.namespace)
if cheCRCount == -1 {
// error occurred while retreiving CheCluster CR
return false, err
} else if actualCheCR != nil {
if actualCheCR.GetObjectMeta().GetDeletionTimestamp().IsZero() {
logrus.Infof("Restore: Deleteing CheCluster custom resource in '%s' namespace", rctx.namespace)
err := rctx.r.client.Delete(context.TODO(), actualCheCR)
if err == nil {
// Che CR is marked for deletion, but actually still exists.
// Wait for finalizers and actual resource deletion (not found expected).
logrus.Info("Restore: Waiting for old Che CR finalizers to be completed")
return false, nil
} else if !errors.IsNotFound(err) {
return false, err
}
} else {
return false, nil
}
}

// Define label selector for resources to clean up
Expand Down Expand Up @@ -316,11 +327,16 @@ func restoreCheCR(rctx *RestoreContext, dataDir string) (bool, error) {

if err := rctx.r.client.Create(context.TODO(), cheCR); err != nil {
if errors.IsAlreadyExists(err) {
return false, rctx.r.client.Delete(context.TODO(), cheCR)
// We should take into account that every step can be executed several times due to async behavior.
// 1. We ensured that CheCluster is removed before restoring.
// 2. If it is already created then it is safe to continue (was created here on a previous reconcile loop)
return true, nil
}
return false, err
}

logrus.Info("Restore: CheCluster custom resource created")

rctx.cheCR = cheCR
return true, nil
}
Expand Down
6 changes: 3 additions & 3 deletions controllers/checlusterrestore/checlusterrestore_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -211,13 +211,12 @@ func (r *ReconcileCheClusterRestore) doReconcile(restoreCR *chev1.CheClusterRest
return done, err
}

rctx.state.cheRestored = true
rctx.UpdateRestoreStatus()

// Clean up backup data after successful restore
if err := os.RemoveAll(backupDataDestDir); err != nil {
return false, err
}

rctx.state.cheRestored = true
}

rctx.restoreCR.Status.Message = "Restore successfully finished"
Expand Down Expand Up @@ -247,5 +246,6 @@ func (r *ReconcileCheClusterRestore) UpdateCRStatus(cr *chev1.CheClusterRestore)
logrus.Errorf("Failed to update %s CR status: %s", cr.Name, err.Error())
return err
}
logrus.Infof("Status updated with %v: ", cr.Status)
return nil
}
2 changes: 2 additions & 0 deletions controllers/checlusterrestore/restore_context.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import (
chev1 "github.com/eclipse-che/che-operator/api/v1"
backup "github.com/eclipse-che/che-operator/pkg/backup_servers"
"github.com/eclipse-che/che-operator/pkg/util"
"github.com/sirupsen/logrus"
)

type RestoreContext struct {
Expand Down Expand Up @@ -158,5 +159,6 @@ func NewRestoreState(restoreCR *chev1.CheClusterRestore) (*RestoreState, error)
}
}

logrus.Debugf("Restore state: %v", rs)
return rs, nil
}
6 changes: 3 additions & 3 deletions pkg/util/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -571,7 +571,7 @@ func ReloadCheCluster(client client.Client, cheCluster *orgv1.CheCluster) error
func FindCheCRinNamespace(client client.Client, namespace string) (*orgv1.CheCluster, int, error) {
cheClusters := &orgv1.CheClusterList{}
if err := client.List(context.TODO(), cheClusters); err != nil {
return nil, 0, err
return nil, -1, err
}

if len(cheClusters.Items) != 1 {
Expand All @@ -582,7 +582,7 @@ func FindCheCRinNamespace(client client.Client, namespace string) (*orgv1.CheClu
namespacedName := types.NamespacedName{Namespace: namespace, Name: cheClusters.Items[0].GetName()}
err := client.Get(context.TODO(), namespacedName, cheCR)
if err != nil {
return nil, 0, err
return nil, -1, err
}
return cheCR, 1, nil
}
Expand All @@ -609,6 +609,6 @@ func UpdateBackupServerConfigurationStatus(client client.Client, backupServerCon
// It is required to remove ResourceVersion in order to be able to apply the yaml again.
func ClearMetadata(objectMeta *metav1.ObjectMeta) {
objectMeta.ResourceVersion = ""

objectMeta.Finalizers = []string{}
objectMeta.ManagedFields = []metav1.ManagedFieldsEntry{}
}

0 comments on commit 7ff7399

Please sign in to comment.