From 5dfc74aa47e219e9d846c3cb082a9e45d4c99439 Mon Sep 17 00:00:00 2001 From: Nir Soffer Date: Mon, 23 Sep 2024 16:56:09 +0300 Subject: [PATCH] Fix disable dr if VR failed validation When deleting a primary VRG, we wait until the VR Completed condition is met. However if a VR precondition failed, for example using a drpolicy without flattening enabled when the PVC needs flattening, the VR will never complete and the vrg and drpc deletion will never complete. Since csi-addons 0.10.0 we have a new Validated VR condition, set to true if pre conditions are met, and false if not. VR is can be deleted safely in this state, since mirroring was not enabled. This changes modifies deleted VRG processing to check the new VR Validated status. If the condition exist and the condition status is false, validateVRStatus() return true, signaling that the VR is in the desired state, and ramen completes the delete flow. If the VR does not report the Validated condition (e.g. old csi-addon version) or the condition status is true (mirroring in progress), we continue in the normal flow. The VR will be deleted only when the Completed condition status is true. Tested with discovered deployment and vm using a pvc created from a volume snapshot. Signed-off-by: Nir Soffer --- internal/controller/vrg_volrep.go | 37 +++++++++++++++++++++++++++---- 1 file changed, 33 insertions(+), 4 deletions(-) diff --git a/internal/controller/vrg_volrep.go b/internal/controller/vrg_volrep.go index 0c4c4f743..5904ae767 100644 --- a/internal/controller/vrg_volrep.go +++ b/internal/controller/vrg_volrep.go @@ -1412,13 +1412,27 @@ func (v *VRGInstance) checkVRStatus(pvc *corev1.PersistentVolumeClaim, volRep *v } // validateVRStatus validates if the VolumeReplication resource has the desired status for the -// current generation and returns true if so, false otherwise -// - When replication state is Primary, only Completed condition is checked. -// - When replication state is Secondary, all 3 conditions for Completed/Degraded/Resyncing is -// checked and ensured healthy. +// current generation, deletion status, and repliaction state. +// +// We handle 3 cases: +// - Primary deleted VRG: If Validated condition exists and false, the VR will never complete and can be +// deleted safely. Otherwise Completed condition is checked. +// - Primary VRG: Completed condition is checked. +// - Secondary VRG: Completed, Degraded and Resyncing conditions are checked and ensured healthy. func (v *VRGInstance) validateVRStatus(pvc *corev1.PersistentVolumeClaim, volRep *volrep.VolumeReplication, state ramendrv1alpha1.ReplicationState, ) bool { + // Check validated for primary during VRG deletion. + if state == ramendrv1alpha1.Primary && rmnutil.ResourceIsDeleted(v.instance) { + validated, ok := v.validateVRValidatedStatus(volRep) + if !validated && ok { + v.log.Info(fmt.Sprintf("VolumeReplication %s/%s failed validation and can be deleted", + volRep.GetName(), volRep.GetNamespace())) + + return true + } + } + // Check completed for both primary and secondary. if !v.validateVRCompletedStatus(pvc, volRep, state) { return false @@ -1441,6 +1455,21 @@ func (v *VRGInstance) validateVRStatus(pvc *corev1.PersistentVolumeClaim, volRep return true } +// validateVRValidatedStatus validates that VolumeReplicaion resource was validated. +// Return 2 booleans +// - validated: true if the condition is true, otherwise false +// - ok: true if the check was succeesfull, false if the condition is missing, stale, or unknown. +func (v *VRGInstance) validateVRValidatedStatus( + volRep *volrep.VolumeReplication, +) (bool, bool) { + conditionMet, errorMsg := isVRConditionMet(volRep, volrep.ConditionValidated, metav1.ConditionTrue) + if errorMsg != "" { + v.log.Info(fmt.Sprintf("%s (VolRep: %s/%s)", errorMsg, volRep.GetName(), volRep.GetNamespace())) + } + + return conditionMet, errorMsg == "" +} + // validateVRCompletedStatus validates if the VolumeReplication resource Completed condition is met and update // the PVC DataReady and Protected conditions. // Returns true if the condtion is true, false if the condition is missing, stale, ubnknown, of false.