Skip to content

Commit

Permalink
drpc: set ProgressionWaitOnUserToCleanUp along with placement update
Browse files Browse the repository at this point in the history
For discovered apps, we want user to perform the cleanup of the
workload. We should advertise the progression to them at the same time
when we ask OCM/ACM to perform the cleanup.

Signed-off-by: Raghavendra Talur <[email protected]>
  • Loading branch information
raghavendra-talur committed Nov 11, 2024
1 parent e0e3ab3 commit 7c6546d
Show file tree
Hide file tree
Showing 6 changed files with 101 additions and 16 deletions.
12 changes: 10 additions & 2 deletions api/v1alpha1/volumereplicationgroup_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,13 @@ const (
UnknownState State = "Unknown"
)

type WorkloadStatus string

const (
WorkloadStatusUnknown WorkloadStatus = "Unknown"
WorkloadStatusInactive WorkloadStatus = "Inactive"
)

// VRGAsyncSpec has the parameters associated with RegionalDR
type VRGAsyncSpec struct {
// Label selector to identify the VolumeReplicationClass resources
Expand Down Expand Up @@ -336,8 +343,9 @@ type VolumeReplicationGroupStatus struct {
//+optional
KubeObjectProtection KubeObjectProtectionStatus `json:"kubeObjectProtection,omitempty"`

PrepareForFinalSyncComplete bool `json:"prepareForFinalSyncComplete,omitempty"`
FinalSyncComplete bool `json:"finalSyncComplete,omitempty"`
PrepareForFinalSyncComplete bool `json:"prepareForFinalSyncComplete,omitempty"`
FinalSyncComplete bool `json:"finalSyncComplete,omitempty"`
WorkloadStatus string `json:"workloadStatus,omitempty"`

// lastGroupSyncTime is the time of the most recent successful synchronization of all PVCs
//+optional
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1088,6 +1088,8 @@ spec:
description: State captures the latest state of the replication
operation
type: string
workloadStatus:
type: string
type: object
type: object
type: array
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1022,6 +1022,8 @@ spec:
state:
description: State captures the latest state of the replication operation
type: string
workloadStatus:
type: string
type: object
type: object
served: true
Expand Down
94 changes: 80 additions & 14 deletions internal/controller/drplacementcontrol.go
Original file line number Diff line number Diff line change
Expand Up @@ -373,7 +373,7 @@ func (d *DRPCInstance) RunFailover() (bool, error) {
return !done, nil
}

return d.ensureActionCompleted(failoverCluster)
return d.ensureFailoverActionCompleted(failoverCluster)
} else if yes, err := d.mwExistsAndPlacementUpdated(failoverCluster); yes || err != nil {
// We have to wait for the VRG to appear on the failoverCluster or
// in case of an error, try again later
Expand Down Expand Up @@ -863,7 +863,7 @@ func (d *DRPCInstance) RunRelocate() (bool, error) {
addOrUpdateCondition(&d.instance.Status.Conditions, rmn.ConditionAvailable, d.instance.Generation,
metav1.ConditionTrue, string(d.instance.Status.Phase), "Completed")

return d.ensureActionCompleted(preferredCluster)
return d.ensureRelocateActionCompleted(preferredCluster)
}

d.setStatusInitiating()
Expand Down Expand Up @@ -896,6 +896,35 @@ func (d *DRPCInstance) RunRelocate() (bool, error) {
return d.relocate(preferredCluster, preferredClusterNamespace, rmn.Relocating)
}

func (d *DRPCInstance) ensureRelocateActionCompleted(srcCluster string) (bool, error) {
d.setProgression(rmn.ProgressionCleaningUp)

return d.ensureActionCompleted(srcCluster)
}

func (d *DRPCInstance) ensureFailoverActionCompleted(srcCluster string) (bool, error) {
done := true

// This is the time to cleanup the workload from the preferredCluster.
// For managed apps, it will be done automatically by ACM, when we update
// the placement to the targetCluster. For discovered apps, we have to let
// the user know that they need to clean up the apps.
// So set the progression to wait on user to clean up.
// If not discovered apps, then we can set the progression to cleaning up.
if d.instance.Spec.ProtectedNamespaces != nil &&
len(*d.instance.Spec.ProtectedNamespaces) > 0 {
d.setProgression(rmn.ProgressionWaitOnUserToCleanUp)
} else {
d.setProgression(rmn.ProgressionCleaningUp)
}

if !d.ensureWorkloadInactive(srcCluster) {
return !done, nil
}

return d.ensureActionCompleted(srcCluster)
}

func (d *DRPCInstance) ensureActionCompleted(srcCluster string) (bool, error) {
const done = true

Expand All @@ -909,8 +938,6 @@ func (d *DRPCInstance) ensureActionCompleted(srcCluster string) (bool, error) {
return !done, err
}

d.setProgression(rmn.ProgressionCleaningUp)

// Cleanup and setup VolSync if enabled
err = d.ensureCleanupAndVolSyncReplicationSetup(srcCluster)
if err != nil {
Expand Down Expand Up @@ -974,8 +1001,19 @@ func (d *DRPCInstance) quiesceAndRunFinalSync(homeCluster string) (bool, error)
addOrUpdateCondition(&d.instance.Status.Conditions, rmn.ConditionAvailable, d.instance.Generation,
d.getConditionStatusForTypeAvailable(), string(d.instance.Status.Phase), "Starting quiescing for relocation")

// clear current user PlacementRule's decision
d.setProgression(rmn.ProgressionClearingPlacement)
// We are going to clear the placement, this is when ACM will start
// deleting the workloads from the current cluster. In case of
// discovered apps, we have to let the user know that they need to
// clean up the apps from the current cluster. So set the progression
// to wait on user to clean up. For non-discovered apps, we can set the
// progression to clearing placement.
if d.instance.Spec.ProtectedNamespaces != nil &&
len(*d.instance.Spec.ProtectedNamespaces) > 0 {
d.setProgression(rmn.ProgressionWaitOnUserToCleanUp)
} else {
// clear current user PlacementRule's decision
d.setProgression(rmn.ProgressionClearingPlacement)
}

err := d.clearUserPlacementRuleStatus()
if err != nil {
Expand All @@ -990,6 +1028,11 @@ func (d *DRPCInstance) quiesceAndRunFinalSync(homeCluster string) (bool, error)
}

if !result {
if !d.ensureWorkloadInactive(homeCluster) {
d.log.Info("Final sync is waiting for workload to be inactive", "cluster", homeCluster)
return !done, nil

Check failure on line 1033 in internal/controller/drplacementcontrol.go

View workflow job for this annotation

GitHub Actions / Golangci Lint (.)

return with no blank line before (nlreturn)
}

d.setProgression(rmn.ProgressionRunningFinalSync)

return !done, nil
Expand Down Expand Up @@ -2048,10 +2091,6 @@ func (d *DRPCInstance) ensureVRGManifestWorkOnClusterDeleted(clusterName string)

d.log.Info("Request not complete yet", "cluster", clusterName)

if d.instance.Spec.ProtectedNamespaces != nil && len(*d.instance.Spec.ProtectedNamespaces) > 0 {
d.setProgression(rmn.ProgressionWaitOnUserToCleanUp)
}

// IF we get here, either the VRG has not transitioned to secondary (yet) or delete didn't succeed. In either cases,
// we need to make sure that the VRG object is deleted. IOW, we still have to wait
return !done, nil
Expand All @@ -2067,10 +2106,6 @@ func (d *DRPCInstance) ensureVRGIsSecondaryEverywhere(clusterToSkip string) bool
continue
}

if d.instance.Spec.ProtectedNamespaces != nil && len(*d.instance.Spec.ProtectedNamespaces) > 0 {
d.setProgression(rmn.ProgressionWaitOnUserToCleanUp)
}

if !d.ensureVRGIsSecondaryOnCluster(clusterName) {
d.log.Info("Still waiting for VRG to transition to secondary", "cluster", clusterName)

Expand All @@ -2081,6 +2116,37 @@ func (d *DRPCInstance) ensureVRGIsSecondaryEverywhere(clusterToSkip string) bool
return true
}

// ensureWorkloadInactive returns true when the workload is not active on the cluster
func (d *DRPCInstance) ensureWorkloadInactive(clusterName string) bool {
d.log.Info(fmt.Sprintf("Ensure workload is inactive on %s", clusterName))

d.mcvRequestInProgress = false

annotations := make(map[string]string)

annotations[DRPCNameAnnotation] = d.instance.Name
annotations[DRPCNamespaceAnnotation] = d.instance.Namespace

vrg, err := d.reconciler.MCVGetter.GetVRGFromManagedCluster(d.instance.Name,
d.vrgNamespace, clusterName, annotations)
if err != nil {

d.log.Info("Failed to get VRG", "errorValue", err)

d.mcvRequestInProgress = true

return false
}

if vrg.Status.WorkloadStatus != string(rmn.WorkloadStatusInactive) || vrg.Status.ObservedGeneration != vrg.Generation {
d.log.Info(fmt.Sprintf("VRG on %s has active workload.", clusterName))

return false
}

return true
}

// ensureVRGIsSecondaryOnCluster returns true when VRG is secondary or it does not exists on the cluster
func (d *DRPCInstance) ensureVRGIsSecondaryOnCluster(clusterName string) bool {
d.log.Info(fmt.Sprintf("Ensure VRG %s is secondary on cluster %s", d.instance.Name, clusterName))
Expand Down
6 changes: 6 additions & 0 deletions internal/controller/volsync/vshandler.go
Original file line number Diff line number Diff line change
Expand Up @@ -364,6 +364,9 @@ func (v *VSHandler) validatePVCBeforeRS(rsSpec ramendrv1alpha1.VolSyncReplicatio
return false, nil
}

// RTALUR TODO


return true, nil // Good to proceed - PVC is not in use, not mounted to node (or does not exist-should not happen)
}

Expand Down Expand Up @@ -1634,6 +1637,8 @@ func (v *VSHandler) IsRDDataProtected(pvcName, pvcNamespace string) (bool, error
func (v *VSHandler) PrecreateDestPVCIfEnabled(rdSpec ramendrv1alpha1.VolSyncReplicationDestinationSpec,
) (*string, error) {
if !v.IsCopyMethodDirect() {
// TODO:
// We need to check the workload status even in other cases.
v.log.Info("Using default copyMethod of Snapshot")

return nil, nil // use default copyMethod
Expand All @@ -1659,6 +1664,7 @@ func (v *VSHandler) PrecreateDestPVCIfEnabled(rdSpec ramendrv1alpha1.VolSyncRepl
util.ProtectedPVCNamespacedName(rdSpec.ProtectedPVC))
}


v.log.Info(fmt.Sprintf("Using App PVC %s for syncing directly to it",
util.ProtectedPVCNamespacedName(rdSpec.ProtectedPVC)))
// Using the application PVC for syncing from source to destination and save a snapshot
Expand Down
1 change: 1 addition & 0 deletions internal/controller/vrg_volsync.go
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,7 @@ func (v *VRGInstance) reconcileVolSyncAsSecondary() bool {
// If we are secondary, and RDSpec is not set, then we don't want to have any PVC
// flagged as a VolSync PVC.
if v.instance.Spec.VolSync.RDSpec == nil {
// RTALUR TODO : return
idx := 0

for _, protectedPVC := range v.instance.Status.ProtectedPVCs {
Expand Down

0 comments on commit 7c6546d

Please sign in to comment.