From 05a011c36cc7c77ecefaeaf0f9fb6cdc65eec73d Mon Sep 17 00:00:00 2001 From: Nir Soffer Date: Wed, 25 Sep 2024 14:14:13 +0300 Subject: [PATCH 1/5] Fix waitForVolRepPromotion check and error message This helper fail with this error: "failed to wait for volRep condition type to change to 'ConditionCompleted' (%d)" But the actual check was: len(updatedVolRep.Status.Conditions) == 3 and no condition status was checked. This causes 24 tests to fail when adding the new Validated condition to match csi-addons 0.10.0 conditions. Now we check that the Completed condition is true and log a more sensible error message. Signed-off-by: Nir Soffer --- internal/controller/vrg_volrep_test.go | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/internal/controller/vrg_volrep_test.go b/internal/controller/vrg_volrep_test.go index 73e4e3e20..b4f4882bc 100644 --- a/internal/controller/vrg_volrep_test.go +++ b/internal/controller/vrg_volrep_test.go @@ -2273,11 +2273,18 @@ func (v *vrgTest) waitForVolRepPromotion(vrNamespacedName types.NamespacedName) Eventually(func() bool { err := k8sClient.Get(context.TODO(), vrNamespacedName, &updatedVolRep) + if err != nil { + return false + } + + condition := meta.FindStatusCondition(updatedVolRep.Status.Conditions, volrep.ConditionCompleted) + if condition == nil { + return false + } - return err == nil && len(updatedVolRep.Status.Conditions) == 3 + return condition.Status == metav1.ConditionTrue }, vrgtimeout, vrginterval).Should(BeTrue(), - "failed to wait for volRep condition type to change to 'ConditionCompleted' (%d)", - len(updatedVolRep.Status.Conditions)) + "failed to wait for volRep condition %q to become %q", volrep.ConditionCompleted, metav1.ConditionTrue) Eventually(func() bool { vrg := v.getVRG() From 9a8ae781991e06f8f385025c271744d531ce7f06 Mon Sep 17 00:00:00 2001 From: Nir Soffer Date: Wed, 25 Sep 2024 20:58:40 +0300 Subject: [PATCH 2/5] Split waitForPromotion() to allow testing negative cases Split to: - waitForVolRepCondition() - accept condition name and status. With this we can wait until VR is Completed=true, or Validated=false. - waitForProtectedPVC() - wait until the related protected pvc is successful. Used only for positive tests. Signed-off-by: Nir Soffer --- internal/controller/vrg_volrep_test.go | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/internal/controller/vrg_volrep_test.go b/internal/controller/vrg_volrep_test.go index b4f4882bc..14eb950cf 100644 --- a/internal/controller/vrg_volrep_test.go +++ b/internal/controller/vrg_volrep_test.go @@ -2224,7 +2224,8 @@ func (v *vrgTest) promoteVolRepsAndDo(do func(int, int)) { Name: volRep.Name, Namespace: volRep.Namespace, } - v.waitForVolRepPromotion(volrepKey) + v.waitForVolRepCondition(volrepKey, volrep.ConditionCompleted, metav1.ConditionTrue) + v.waitForProtectedPVCs(volrepKey) do(index, len(volRepList.Items)) } @@ -2268,7 +2269,11 @@ func (v *vrgTest) unprotectDeletionOfVolReps() { } } -func (v *vrgTest) waitForVolRepPromotion(vrNamespacedName types.NamespacedName) { +func (v *vrgTest) waitForVolRepCondition( + vrNamespacedName types.NamespacedName, + conditionType string, + conditionStatus metav1.ConditionStatus, +) { updatedVolRep := volrep.VolumeReplication{} Eventually(func() bool { @@ -2277,21 +2282,23 @@ func (v *vrgTest) waitForVolRepPromotion(vrNamespacedName types.NamespacedName) return false } - condition := meta.FindStatusCondition(updatedVolRep.Status.Conditions, volrep.ConditionCompleted) + condition := meta.FindStatusCondition(updatedVolRep.Status.Conditions, conditionType) if condition == nil { return false } - return condition.Status == metav1.ConditionTrue + return condition.Status == conditionStatus }, vrgtimeout, vrginterval).Should(BeTrue(), - "failed to wait for volRep condition %q to become %q", volrep.ConditionCompleted, metav1.ConditionTrue) + "failed to wait for volRep condition %q to become %q", conditionType, conditionStatus) +} +func (v *vrgTest) waitForProtectedPVCs(vrNamespacedName types.NamespacedName) { Eventually(func() bool { vrg := v.getVRG() // as of now name of VolumeReplication resource created by the VolumeReplicationGroup // is same as the pvc that it replicates. When that changes this has to be changed to // use the right name to get the appropriate protected PVC condition from VRG status. - protectedPVC := vrgController.FindProtectedPVC(vrg, updatedVolRep.Namespace, updatedVolRep.Name) + protectedPVC := vrgController.FindProtectedPVC(vrg, vrNamespacedName.Namespace, vrNamespacedName.Name) // failed to get the protectedPVC. Returning false if protectedPVC == nil { @@ -2300,7 +2307,7 @@ func (v *vrgTest) waitForVolRepPromotion(vrNamespacedName types.NamespacedName) return v.checkProtectedPVCSuccess(vrg, protectedPVC) }, vrgtimeout, vrginterval).Should(BeTrue(), - "while waiting for protected pvc condition %s/%s", updatedVolRep.Namespace, updatedVolRep.Name) + "while waiting for protected pvc condition %s/%s", vrNamespacedName.Namespace, vrNamespacedName.Name) } func (v *vrgTest) checkProtectedPVCSuccess(vrg *ramendrv1alpha1.VolumeReplicationGroup, From ef4f2d991b64d99db0f198f4456d16102ba14702 Mon Sep 17 00:00:00 2001 From: Nir Soffer Date: Wed, 25 Sep 2024 14:19:40 +0300 Subject: [PATCH 3/5] Allow simulation of abnormal VR valiadtion Add the new VR Validate condition to all tests, and allow testing negative flows like VR that failed validation or missing Validated condition. New promoteOptions{} allows simulation of 2 issues: - Validated failed: The VR will never complete, blocking deletion of the VR and VRG. With this fix deletion will succeed. - Validated condition missing - running ramen 4.17 on system with older csi-addons that does not report the condition. In this case ramen continue normally, and deletion of the VR and VRG will be blocked. We can extend this later to support more options. Signed-off-by: Nir Soffer --- internal/controller/vrg_volrep_test.go | 113 ++++++++++++++++++------- 1 file changed, 82 insertions(+), 31 deletions(-) diff --git a/internal/controller/vrg_volrep_test.go b/internal/controller/vrg_volrep_test.go index 14eb950cf..3973373de 100644 --- a/internal/controller/vrg_volrep_test.go +++ b/internal/controller/vrg_volrep_test.go @@ -2164,17 +2164,22 @@ func (v *vrgTest) waitForVRCountToMatch(vrCount int) { } func (v *vrgTest) promoteVolReps() { - v.promoteVolRepsAndDo(func(index, count int) { + v.promoteVolRepsAndDo(promoteOptions{}, func(index, count int) { // VRG should not be ready until last VolRep is ready. v.verifyVRGStatusExpectation(index == count-1, vrgController.VRGConditionReasonReady) }) } func (v *vrgTest) promoteVolRepsWithoutVrgStatusCheck() { - v.promoteVolRepsAndDo(func(index, count int) {}) + v.promoteVolRepsAndDo(promoteOptions{}, func(index, count int) {}) } -func (v *vrgTest) promoteVolRepsAndDo(do func(int, int)) { +type promoteOptions struct { + ValidatedMissing bool + ValidatedFailed bool +} + +func (v *vrgTest) promoteVolRepsAndDo(options promoteOptions, do func(int, int)) { By("Promoting VolumeReplication resources " + v.namespace) volRepList := &volrep.VolumeReplicationList{} @@ -2188,33 +2193,17 @@ func (v *vrgTest) promoteVolRepsAndDo(do func(int, int)) { volRep := volRepList.Items[index] volRepStatus := volrep.VolumeReplicationStatus{ - Conditions: []metav1.Condition{ - { - Type: volrep.ConditionCompleted, - Reason: volrep.Promoted, - ObservedGeneration: volRep.Generation, - Status: metav1.ConditionTrue, - LastTransitionTime: metav1.NewTime(time.Now()), - }, - { - Type: volrep.ConditionDegraded, - Reason: volrep.Healthy, - ObservedGeneration: volRep.Generation, - Status: metav1.ConditionFalse, - LastTransitionTime: metav1.NewTime(time.Now()), - }, - { - Type: volrep.ConditionResyncing, - Reason: volrep.NotResyncing, - ObservedGeneration: volRep.Generation, - Status: metav1.ConditionFalse, - LastTransitionTime: metav1.NewTime(time.Now()), - }, - }, + Conditions: v.generateVRConditions(volRep.Generation, options), + ObservedGeneration: volRep.Generation, + State: volrep.PrimaryState, + Message: "volume is marked primary", + } + + if options.ValidatedFailed { + volRepStatus.State = volrep.UnknownState + volRepStatus.Message = "precondition failed ..." } - volRepStatus.ObservedGeneration = volRep.Generation - volRepStatus.State = volrep.PrimaryState - volRepStatus.Message = "volume is marked primary" + volRep.Status = volRepStatus err = k8sClient.Status().Update(context.TODO(), &volRep) @@ -2224,13 +2213,75 @@ func (v *vrgTest) promoteVolRepsAndDo(do func(int, int)) { Name: volRep.Name, Namespace: volRep.Namespace, } - v.waitForVolRepCondition(volrepKey, volrep.ConditionCompleted, metav1.ConditionTrue) - v.waitForProtectedPVCs(volrepKey) + + if options.ValidatedFailed { + if options.ValidatedMissing { + v.waitForVolRepCondition(volrepKey, volrep.ConditionCompleted, metav1.ConditionFalse) + } else { + v.waitForVolRepCondition(volrepKey, volrep.ConditionValidated, metav1.ConditionFalse) + } + } else { + v.waitForVolRepCondition(volrepKey, volrep.ConditionCompleted, metav1.ConditionTrue) + v.waitForProtectedPVCs(volrepKey) + } do(index, len(volRepList.Items)) } } +func (v *vrgTest) generateVRConditions(generation int64, options promoteOptions) []metav1.Condition { + var conditions []metav1.Condition + + lastTransitionTime := metav1.NewTime(time.Now()) + + if !options.ValidatedMissing { + validated := metav1.Condition{ + Type: volrep.ConditionValidated, + Reason: volrep.PrerequisiteNotMet, + ObservedGeneration: generation, + Status: metav1.ConditionFalse, + LastTransitionTime: lastTransitionTime, + } + + if options.ValidatedFailed { + validated.Status = metav1.ConditionFalse + validated.Reason = volrep.PrerequisiteNotMet + } + + conditions = append(conditions, validated) + } + + completed := metav1.Condition{ + Type: volrep.ConditionCompleted, + Reason: volrep.Promoted, + ObservedGeneration: generation, + Status: metav1.ConditionTrue, + LastTransitionTime: lastTransitionTime, + } + + if options.ValidatedFailed { + completed.Status = metav1.ConditionFalse + completed.Reason = volrep.FailedToPromote + } + + degraded := metav1.Condition{ + Type: volrep.ConditionDegraded, + Reason: volrep.Healthy, + ObservedGeneration: generation, + Status: metav1.ConditionFalse, + LastTransitionTime: lastTransitionTime, + } + resyncing := metav1.Condition{ + Type: volrep.ConditionResyncing, + Reason: volrep.NotResyncing, + ObservedGeneration: generation, + Status: metav1.ConditionFalse, + LastTransitionTime: lastTransitionTime, + } + + return append(conditions, completed, degraded, resyncing) +} + func (v *vrgTest) protectDeletionOfVolReps() { By("Adding a finalizer to protect VolumeReplication resources being deleted " + v.namespace) From ff739ec8a5272d66a4824cc47613172ce94dd464 Mon Sep 17 00:00:00 2001 From: Nir Soffer Date: Wed, 25 Sep 2024 21:05:18 +0300 Subject: [PATCH 4/5] Test deleting a VRG when VR failed validation This simulates the bug when user select the wrong dr policy without flattening enabled when protecting for a PVC that needs flattening. Without the fix this test will timeout when deleting the VRG. With the fix the VRG should be deleted. Signed-off-by: Nir Soffer --- internal/controller/vrg_volrep_test.go | 49 ++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/internal/controller/vrg_volrep_test.go b/internal/controller/vrg_volrep_test.go index 3973373de..6a06cd7ab 100644 --- a/internal/controller/vrg_volrep_test.go +++ b/internal/controller/vrg_volrep_test.go @@ -614,6 +614,51 @@ var _ = Describe("VolumeReplicationGroupVolRepController", func() { }) }) + // Test VRG deletion when VR failed validation + var vrgDeleteFailedVR *vrgTest + //nolint:dupl + Context("VR failed validation in primary state", func() { + createTestTemplate := &template{ + ClaimBindInfo: corev1.ClaimBound, + VolumeBindInfo: corev1.VolumeBound, + schedulingInterval: "1h", + storageClassName: "manual", + replicationClassName: "test-replicationclass", + vrcProvisioner: "manual.storage.com", + scProvisioner: "manual.storage.com", + replicationClassLabels: map[string]string{"protection": "ramen"}, + } + It("sets up PVCs, PVs and VRGs (with s3 stores that fail uploads)", func() { + createTestTemplate.s3Profiles = []string{s3Profiles[vrgS3ProfileNumber].S3ProfileName} + vrgDeleteFailedVR = newVRGTestCaseCreateAndStart(1, createTestTemplate, true, false) + }) + It("waits for VRG to create a VR for each PVC", func() { + expectedVRCount := len(vrgDeleteFailedVR.pvcNames) + vrgDeleteFailedVR.waitForVRCountToMatch(expectedVRCount) + }) + It("simulate VR with failed validation", func() { + vrgDeleteFailedVR.promoteVolRepsWithOptions(promoteOptions{ValidatedFailed: true}) + }) + It("VRG can be deleted", func() { + By("deleting the VRG") + vrg := vrgDeleteFailedVR.getVRG() + Expect(k8sClient.Delete(context.TODO(), vrg)).To(Succeed()) + + By("ensuring VRG is deleted") + Eventually(func() error { + return apiReader.Get(context.TODO(), vrgDeleteFailedVR.vrgNamespacedName(), vrg) + }, vrgtimeout, vrginterval). + Should(MatchError(errors.NewNotFound(schema.GroupResource{ + Group: ramendrv1alpha1.GroupVersion.Group, + Resource: "volumereplicationgroups", + }, vrgDeleteFailedVR.vrgName))) + + vrgDeleteFailedVR.cleanupNamespace() + vrgDeleteFailedVR.cleanupSC() + vrgDeleteFailedVR.cleanupVRC() + }) + }) + // Try the simple case of creating VRG, PVC, PV and // check whether VolRep resources are created or not var vrgTestCases []*vrgTest @@ -2174,6 +2219,10 @@ func (v *vrgTest) promoteVolRepsWithoutVrgStatusCheck() { v.promoteVolRepsAndDo(promoteOptions{}, func(index, count int) {}) } +func (v *vrgTest) promoteVolRepsWithOptions(options promoteOptions) { + v.promoteVolRepsAndDo(options, func(index, count int) {}) +} + type promoteOptions struct { ValidatedMissing bool ValidatedFailed bool From 1f247fc26cd6a2d493e04c17eb0184d411b3ffaa Mon Sep 17 00:00:00 2001 From: Nir Soffer Date: Thu, 26 Sep 2024 16:34:34 +0300 Subject: [PATCH 5/5] Test delete VRG with an older csi-addons In csi-addons < 0.10.0 we could not detect failed validation since the Validated condition is missing. Add 2 tests: - VR failed validation: the VRG should block deletion until the VR is deleted manually. - VR completed: the VRG should not block deletion because the Validated condition is missing. Signed-off-by: Nir Soffer --- internal/controller/vrg_volrep_test.go | 112 +++++++++++++++++++++++++ 1 file changed, 112 insertions(+) diff --git a/internal/controller/vrg_volrep_test.go b/internal/controller/vrg_volrep_test.go index 6a06cd7ab..74e9027e9 100644 --- a/internal/controller/vrg_volrep_test.go +++ b/internal/controller/vrg_volrep_test.go @@ -659,6 +659,105 @@ var _ = Describe("VolumeReplicationGroupVolRepController", func() { }) }) + // Test VRG deletion when VR failed validation and Validated condition is missing (csi-addons < 0.10.0) + var vrgDeleteIncompleteVR *vrgTest + //nolint:dupl + Context("VR failed validation in primary state and Validated condition is missing", func() { + createTestTemplate := &template{ + ClaimBindInfo: corev1.ClaimBound, + VolumeBindInfo: corev1.VolumeBound, + schedulingInterval: "1h", + storageClassName: "manual", + replicationClassName: "test-replicationclass", + vrcProvisioner: "manual.storage.com", + scProvisioner: "manual.storage.com", + replicationClassLabels: map[string]string{"protection": "ramen"}, + } + It("sets up PVCs, PVs and VRGs (with s3 stores that fail uploads)", func() { + createTestTemplate.s3Profiles = []string{s3Profiles[vrgS3ProfileNumber].S3ProfileName} + vrgDeleteIncompleteVR = newVRGTestCaseCreateAndStart(1, createTestTemplate, true, false) + }) + It("waits for VRG to create a VR for each PVC", func() { + expectedVRCount := len(vrgDeleteFailedVR.pvcNames) + vrgDeleteIncompleteVR.waitForVRCountToMatch(expectedVRCount) + }) + It("simulate incomplete VR", func() { + vrgDeleteIncompleteVR.promoteVolRepsWithOptions(promoteOptions{ValidatedFailed: true, ValidatedMissing: true}) + }) + It("VRG can not be deleted", func() { + By("deleting the VRG") + vrg := vrgDeleteIncompleteVR.getVRG() + Expect(k8sClient.Delete(context.TODO(), vrg)).To(Succeed()) + + By("ensuring VRG cannot be deleted") + Consistently(func() error { + return apiReader.Get(context.TODO(), vrgDeleteIncompleteVR.vrgNamespacedName(), vrg) + }, vrgtimeout, vrginterval). + Should(Succeed(), "VRG %s was deleted when VR is incomplete", vrgDeleteIncompleteVR.vrgName) + + By("deleting the VRs") + vrgDeleteIncompleteVR.deleteVolReps() + + By("ensuring the VRG is deleted") + Eventually(func() error { + return apiReader.Get(context.TODO(), vrgDeleteFailedVR.vrgNamespacedName(), vrg) + }, vrgtimeout, vrginterval). + Should(MatchError(errors.NewNotFound(schema.GroupResource{ + Group: ramendrv1alpha1.GroupVersion.Group, + Resource: "volumereplicationgroups", + }, vrgDeleteFailedVR.vrgName))) + + vrgDeleteIncompleteVR.cleanupNamespace() + vrgDeleteIncompleteVR.cleanupSC() + vrgDeleteIncompleteVR.cleanupVRC() + }) + }) + + // Test VRG deletion when VR completed and Validated condition is missing (csi-addons < 0.10.0) + var vrgDeleteCompletedVR *vrgTest + //nolint:dupl + Context("VR failed validation in primary state and Validated condition is missing", func() { + createTestTemplate := &template{ + ClaimBindInfo: corev1.ClaimBound, + VolumeBindInfo: corev1.VolumeBound, + schedulingInterval: "1h", + storageClassName: "manual", + replicationClassName: "test-replicationclass", + vrcProvisioner: "manual.storage.com", + scProvisioner: "manual.storage.com", + replicationClassLabels: map[string]string{"protection": "ramen"}, + } + It("sets up PVCs, PVs and VRGs (with s3 stores that fail uploads)", func() { + createTestTemplate.s3Profiles = []string{s3Profiles[vrgS3ProfileNumber].S3ProfileName} + vrgDeleteCompletedVR = newVRGTestCaseCreateAndStart(1, createTestTemplate, true, false) + }) + It("waits for VRG to create a VR for each PVC", func() { + expectedVRCount := len(vrgDeleteFailedVR.pvcNames) + vrgDeleteCompletedVR.waitForVRCountToMatch(expectedVRCount) + }) + It("simulate completed VR", func() { + vrgDeleteCompletedVR.promoteVolRepsWithOptions(promoteOptions{ValidatedMissing: true}) + }) + It("VRG can be deleted", func() { + By("deleting the VRG") + vrg := vrgDeleteCompletedVR.getVRG() + Expect(k8sClient.Delete(context.TODO(), vrg)).To(Succeed()) + + By("ensuring the VRG is deleted") + Eventually(func() error { + return apiReader.Get(context.TODO(), vrgDeleteFailedVR.vrgNamespacedName(), vrg) + }, vrgtimeout, vrginterval). + Should(MatchError(errors.NewNotFound(schema.GroupResource{ + Group: ramendrv1alpha1.GroupVersion.Group, + Resource: "volumereplicationgroups", + }, vrgDeleteFailedVR.vrgName))) + + vrgDeleteCompletedVR.cleanupNamespace() + vrgDeleteCompletedVR.cleanupSC() + vrgDeleteCompletedVR.cleanupVRC() + }) + }) + // Try the simple case of creating VRG, PVC, PV and // check whether VolRep resources are created or not var vrgTestCases []*vrgTest @@ -2331,6 +2430,19 @@ func (v *vrgTest) generateVRConditions(generation int64, options promoteOptions) return append(conditions, completed, degraded, resyncing) } +func (v *vrgTest) deleteVolReps() { + vrList := &volrep.VolumeReplicationList{} + err := k8sClient.List(context.TODO(), vrList, &client.ListOptions{Namespace: v.namespace}) + Expect(err).NotTo(HaveOccurred(), "failed to get a list of VRs in namespace %s", v.namespace) + + for i := range vrList.Items { + vr := vrList.Items[i] + + err := k8sClient.Delete(context.TODO(), &vr) + Expect(err).NotTo(HaveOccurred(), "failed to delete volRep %v/%s", vr.Namespace, vr.Name) + } +} + func (v *vrgTest) protectDeletionOfVolReps() { By("Adding a finalizer to protect VolumeReplication resources being deleted " + v.namespace)