diff --git a/tests/e2e-test-framework/framework/const.py b/tests/e2e-test-framework/framework/const.py index 2b6b5ebd0..9dfcd3f7d 100644 --- a/tests/e2e-test-framework/framework/const.py +++ b/tests/e2e-test-framework/framework/const.py @@ -39,6 +39,7 @@ FAKE_ATTACH_PVC_ANNOTATION_KEY = "pv.attach.kubernetes.io/ignore-if-inaccessible" # annotation values +DRIVE_HEALTH_BAD_ANNOTATION = "BAD" VOLUME_RELEASE_DONE_VALUE = "done" FAKE_ATTACH_PVC_ANNOTATION_VALUE = "yes" diff --git a/tests/e2e-test-framework/framework/utils.py b/tests/e2e-test-framework/framework/utils.py index 0dd9acfb4..009557aee 100644 --- a/tests/e2e-test-framework/framework/utils.py +++ b/tests/e2e-test-framework/framework/utils.py @@ -48,19 +48,14 @@ def get_worker_ips(self) -> List[str]: worker_nodes = [ node for node in nodes - if "node-role.kubernetes.io/control-plane" - not in node.metadata.labels + if "node-role.kubernetes.io/control-plane" not in node.metadata.labels ] assert worker_nodes, "No worker nodes found in the cluster" logging.info("[ASSERT] Worker nodes found in the cluster.") - worker_ips = [ - node.status.addresses[0].address for node in worker_nodes - ] + worker_ips = [node.status.addresses[0].address for node in worker_nodes] assert worker_ips, "No IP addresses found for worker nodes" - logging.info( - f"[ASSERT] IP addresses found for worker nodes - {worker_ips}" - ) + logging.info(f"[ASSERT] IP addresses found for worker nodes - {worker_ips}") return worker_ips @@ -77,17 +72,13 @@ def get_controlplane_ips(self) -> List[str]: for node in nodes if "node-role.kubernetes.io/control-plane" in node.metadata.labels ] - assert ( - controlplane_nodes - ), "No control plane nodes found in the cluster" + assert controlplane_nodes, "No control plane nodes found in the cluster" logging.info("[ASSERT] Control plane nodes found in the cluster.") controlplane_ips = [ node.status.addresses[0].address for node in controlplane_nodes ] - assert ( - controlplane_ips - ), "No IP addresses found for control plane nodes" + assert controlplane_ips, "No IP addresses found for control plane nodes" logging.info( f"[ASSERT] IP addresses found for control plane nodes - {controlplane_ips}" ) @@ -212,9 +203,7 @@ def list_persistent_volume_claims( Returns: List[V1PersistentVolumeClaim]: A list of PersistentVolumeClaim objects that match the provided filters. """ - pvcs = self.core_v1_api.list_namespaced_persistent_volume_claim( - namespace - ).items + pvcs = self.core_v1_api.list_namespaced_persistent_volume_claim(namespace).items if name: pvcs = [p for p in pvcs if p.metadata.name == name] if namespace: @@ -272,9 +261,7 @@ def list_volumes( namespace=self.namespace, pod_name=pod_name ) volume_names = set(pvc.spec.volume_name for pvc in pvcs) - volumes = [ - v for v in volumes if v["metadata"]["name"] in volume_names - ] + volumes = [v for v in volumes if v["metadata"]["name"] in volume_names] if location: volumes = [v for v in volumes if v["spec"]["Location"] == location] if storage_class: @@ -403,7 +390,7 @@ def wait_volume( expected_health (Optional[str], optional): The expected health of the volume. Defaults to None. expected_usage (Optional[str], optional): The expected usage of the volume. Defaults to None. expected_operational_status (Optional[str], optional): The expected operational status of the volume. Defaults to None. - timeout (int): The maximum time to wait for the volume in seconds. Defaults to 60. + timeout (int): The maximum time to wait for the volume in seconds. Defaults to 90. Returns: bool: True if the volume meets the expected status, health, and usage within the given timeout, False otherwise. @@ -416,14 +403,12 @@ def wait_volume( if expected_health: expected["Health"] = expected_health if expected_operational_status: - expected['OperationalStatus'] = expected_operational_status + expected["OperationalStatus"] = expected_operational_status def callback(): return self.list_volumes(name)[0] - return self._wait_cr( - expected=expected, get_cr_fn=callback, timeout=timeout - ) + return self._wait_cr(expected=expected, get_cr_fn=callback, timeout=timeout) def wait_drive( self, @@ -443,7 +428,7 @@ def wait_drive( expected_health (Optional[str], optional): The expected health of the drive. Defaults to None. expected_usage (Optional[str], optional): The expected usage of the drive. Defaults to None. expected_led_state (Optional[str], optional): The expected LED state of the drive. Defaults to None. - timeout (int): The maximum time to wait for the drive in seconds. Defaults to 60. + timeout (int): The maximum time to wait for the drive in seconds. Defaults to 90. Returns: bool: True if the drive meets the expected status, health, and usage within the given timeout, False otherwise. @@ -463,9 +448,7 @@ def callback(): const.CR_GROUP, const.CR_VERSION, "drives", name ) - return self._wait_cr( - expected=expected, get_cr_fn=callback, timeout=timeout - ) + return self._wait_cr(expected=expected, get_cr_fn=callback, timeout=timeout) def _wait_cr( self, @@ -506,9 +489,7 @@ def _wait_cr( for k, v in assertions.items(): if not v: - logging.error( - f"CR is not in expected state: {k} != {expected[k]}" - ) + logging.error(f"CR is not in expected state: {k} != {expected[k]}") return False @@ -534,23 +515,19 @@ def annotate_custom_resource( None: This function does not return anything. """ if namespace: - custom_resource = ( - self.custom_objects_api.get_namespaced_custom_object( - const.CR_GROUP, - const.CR_VERSION, - namespace, - resource_type, - resource_name, - ) + custom_resource = self.custom_objects_api.get_namespaced_custom_object( + const.CR_GROUP, + const.CR_VERSION, + namespace, + resource_type, + resource_name, ) else: - custom_resource = ( - self.custom_objects_api.get_cluster_custom_object( - const.CR_GROUP, - const.CR_VERSION, - resource_type, - resource_name, - ) + custom_resource = self.custom_objects_api.get_cluster_custom_object( + const.CR_GROUP, + const.CR_VERSION, + resource_type, + resource_name, ) annotations = custom_resource["metadata"].get("annotations", {}) @@ -695,9 +672,7 @@ def recreate_pod(self, name: str, namespace: str) -> V1Pod: V1Pod: The recreated Pod. """ self.core_v1_api.delete_namespaced_pod(name=name, namespace=namespace) - logging.info( - f"pod {name} deleted, waiting for a new pod to be created" - ) + logging.info(f"pod {name} deleted, waiting for a new pod to be created") time.sleep(5) pod = self.list_pods(name, namespace=namespace)[0] @@ -737,11 +712,15 @@ def wait_for_event_with_reason( return False def clear_pvc_and_pod( - self, pod_name: str, namespace: str, pvc_name: Optional[str] = None, volume_name: Optional[str] = None + self, + pod_name: str, + namespace: str, + pvc_name: Optional[str] = None, + volume_name: Optional[str] = None, ) -> None: """ Clears the PersistentVolumeClaim (PVC) and the Pod with the specified names in the Kubernetes cluster. - If the name of pvc or volume is not specified it clears all PVCs connected with specific Pod. + If the name of pvc or volume is not specified it clears all PVCs connected with specific Pod. Args: pod_name (str): The name of the Pod to be cleared. @@ -764,8 +743,8 @@ def clear_pvc_and_pod( ), f"Volume: {volume_name} failed to reach expected usage: {const.USAGE_RELEASED}" else: pvcs = self.list_persistent_volume_claims( - namespace=namespace, pod_name=pod_name - ) + namespace=namespace, pod_name=pod_name + ) for pvc in pvcs: logging.info(f"clearing pvc {pvc.metadata.name}") self.core_v1_api.delete_namespaced_persistent_volume_claim( @@ -775,23 +754,26 @@ def clear_pvc_and_pod( for pvc in pvcs: assert self.wait_volume( name=pvc.spec.volume_name, - expected_usage=const.USAGE_RELEASED, - ), f"Volume: {pvc.spec.volume_name} failed to reach expected usage: {const.USAGE_RELEASED}" - logging.info(f"volume: {pvc.spec.volume_name} reach expected usage: {const.USAGE_RELEASED}") + expected_usage=",".join([const.USAGE_RELEASED, const.USAGE_IN_USE]), + ), f"Volume: {pvc.spec.volume_name} failed to reach expected usage: {','.join([const.USAGE_RELEASED, const.USAGE_IN_USE])}" + logging.info(f"volume: {pvc.spec.volume_name} reach expected usage") time.sleep(30) self.recreate_pod(name=pod_name, namespace=namespace) - - def check_drive_cr_not_exist(self, drive_name: str, timeout: int = 120) -> bool: + + def check_drive_cr_exist_or_not( + self, drive_name: str, cr_existence: bool, timeout: int = 120 + ) -> bool: """ - Checks if a custom resource (CR) representing a drive with the given name does not exist. + Checks if a custom resource (CR) representing a drive with the given name exists or not. Args: drive_name (str): The name of the drive CR. + cr_existence (bool): The state if drive CR should exist (True) or not (False). timeout (int, optional): The timeout for checking the CR, defaults to 120. Returns: - bool: True if the drive CR was removed within the given timeout, False otherwise. + bool: True if the drive CR existance is as expected within the given timeout, False otherwise. """ end_time = time.time() + timeout while time.time() < end_time: @@ -803,16 +785,14 @@ def check_drive_cr_not_exist(self, drive_name: str, timeout: int = 120) -> bool: name=drive_name, ) logging.warning(f"Drive CR '{drive_name}' still exists.") + if cr_existence: + return True except ApiException as e: if e.status == 404: logging.info(f"Drive CR {drive_name} does not exist.") - return True + if not cr_existence: + return True else: raise time.sleep(2) - logging.warning( - f"Drive CR '{drive_name}' still exists after {timeout} seconds timeout." - ) return False - - \ No newline at end of file diff --git a/tests/e2e-test-framework/tests/test_drive_replacement_multi_volumes.py b/tests/e2e-test-framework/tests/test_drive_replacement_multi_volumes.py index 82a4824d7..aa2195e82 100644 --- a/tests/e2e-test-framework/tests/test_drive_replacement_multi_volumes.py +++ b/tests/e2e-test-framework/tests/test_drive_replacement_multi_volumes.py @@ -1,6 +1,6 @@ -import pytest import logging from typing import Dict +import pytest import framework.const as const @@ -9,8 +9,6 @@ from framework.drive import DriveUtils - - class TestAutoDriveReplacementWithMultipleVolumesPerPod: @classmethod @pytest.fixture(autouse=True) @@ -31,14 +29,14 @@ def setup_class( cls.sts = STS(cls.namespace, cls.name, cls.replicas) cls.sts.delete() cls.sts.create(storage_classes=[const.SSD_SC, const.HDD_SC]) - + yield cls.sts.delete() - + @pytest.mark.hal def test_5921_auto_drive_replacement_with_multiple_volumes_per_pod(self): - # 1. get volume and volume groups for deployed pod + # 1. get volume and volume groups for deployed pod assert ( self.sts.verify(self.timeout) is True ), f"STS: {self.name} failed to reach desired number of replicas: {self.replicas}" @@ -51,130 +49,136 @@ def test_5921_auto_drive_replacement_with_multiple_volumes_per_pod(self): drives = [] for volume in volumes: drive = self.utils.get_drive_cr( - volume_name=volume["metadata"]["name"], - namespace=volume["metadata"]["namespace"]) + volume_name=volume["metadata"]["name"], + namespace=volume["metadata"]["namespace"], + ) drives.append(drive) - # 2. simulate drive failure. Annotate drive used by pod with health=BAD + # 2. simulate drive failure. Annotate drive used by pod with health=BAD for drive in drives: drive_name = drive["metadata"]["name"] self.utils.annotate_custom_resource( - resource_name=drive_name, - resource_type="drives", - annotation_key="health", - annotation_value="BAD" + resource_name=drive_name, + resource_type="drives", + annotation_key=const.DRIVE_HEALTH_ANNOTATION, + annotation_value=const.DRIVE_HEALTH_BAD_ANNOTATION, ) logging.info(f"drive: {drive_name} was annotated with health=BAD") - # 3. wait until drive health is BAD, status=ONLINE, usage=RELEASING. + # 3. wait until drive health is BAD, status=ONLINE, usage=RELEASING. for drive in drives: drive_name = drive["metadata"]["name"] logging.info(f"Waiting for drive: {drive_name}") assert self.utils.wait_drive( - name=drive_name, - expected_status=const.STATUS_ONLINE, - expected_health=const.HEALTH_BAD, - expected_usage=const.USAGE_RELEASING + name=drive_name, + expected_status=const.STATUS_ONLINE, + expected_health=const.HEALTH_BAD, + expected_usage=const.USAGE_RELEASING, ), f"Drive {drive_name} failed to reach expected Status: {const.STATUS_ONLINE}, Health: {const.HEALTH_BAD}, Usage: {const.USAGE_RELEASING}" - logging.info(f"drive {drive_name} went in Status: {const.STATUS_ONLINE}, Health: {const.HEALTH_BAD}, Usage: {const.USAGE_RELEASING}") - # 4. wait until volume health is BAD, status=OPERATIVE, usage=RELEASING. + logging.info( + f"drive {drive_name} went in Status: {const.STATUS_ONLINE}, Health: {const.HEALTH_BAD}, Usage: {const.USAGE_RELEASING}" + ) + # 4. wait until volume health is BAD, status=OPERATIVE, usage=RELEASING. for volume in volumes: volume_name = volume["metadata"]["name"] logging.info(f"Waiting for volume: {volume_name}") assert self.utils.wait_volume( - name=volume_name, - expected_health=const.HEALTH_BAD, - expected_usage=const.USAGE_RELEASING, - expected_operational_status=const.STATUS_OPERATIVE + name=volume_name, + expected_health=const.HEALTH_BAD, + expected_usage=const.USAGE_RELEASING, + expected_operational_status=const.STATUS_OPERATIVE, ), f"Volume {volume_name} failed to reach OperationalStatus: {const.STATUS_OPERATIVE}, Health: {const.HEALTH_BAD}, Usage: {const.USAGE_RELEASING}" - logging.info(f"volume {volume_name} went in OperationalStatus: {const.STATUS_OPERATIVE}, Health: {const.HEALTH_BAD}, Usage: {const.USAGE_RELEASING}") - # 5. check events and locate event related to DriveHealthFailure + logging.info( + f"volume {volume_name} went in OperationalStatus: {const.STATUS_OPERATIVE}, Health: {const.HEALTH_BAD}, Usage: {const.USAGE_RELEASING}" + ) + # 5. check events and locate event related to DriveHealthFailure for drive in drives: drive_name = drive["metadata"]["name"] assert self.utils.event_in( resource_name=drive_name, reason=const.DRIVE_HEALTH_FAILURE, ), f"event {const.DRIVE_HEALTH_FAILURE} for drive {drive_name} not found" - # 6. annotate volume with release=done + # 6. annotate volume with release=done for volume in volumes: volume_name = volume["metadata"]["name"] self.utils.annotate_custom_resource( - resource_name=volume_name, - resource_type="volumes", - annotation_key="release", - annotation_value="done", - namespace=volume['metadata']['namespace'] + resource_name=volume_name, + resource_type="volumes", + annotation_key=const.VOLUME_RELEASE_ANNOTATION, + annotation_value=const.VOLUME_RELEASE_DONE_VALUE, + namespace=volume["metadata"]["namespace"], ) logging.info(f"volume: {volume_name} was annotated with release=done") - # 7. check drive usages are RELEASED + # 7. check drive usages are RELEASED for drive in drives: assert self.utils.wait_drive( - name=drive['metadata']['name'], - expected_usage=const.USAGE_RELEASED + name=drive["metadata"]["name"], expected_usage=const.USAGE_RELEASED ), f"Drive {drive_name} failed to reach expected Usage: {const.USAGE_RELEASED}" - logging.info(f"drive {drive_name} went in Usage: {const.USAGE_RELEASED}") - # 8. check volumes are RELEASED + logging.info(f"drive {drive_name} went in Usage: {const.USAGE_RELEASED}") + # 8. check volumes are RELEASED for volume in volumes: assert self.utils.wait_volume( - name=volume['metadata']['name'], - expected_usage=const.USAGE_RELEASED + name=volume["metadata"]["name"], expected_usage=const.USAGE_RELEASED ), f"Volume {volume_name} failed to reach expected Usage {const.USAGE_RELEASED}" logging.info(f"volume {volume_name} went in Usage: {const.USAGE_RELEASED}") - # 9. check event DriveReadyForRemoval is generated + # 9. check event DriveReadyForRemoval is generated for drive in drives: drive_name = drive["metadata"]["name"] assert self.utils.event_in( resource_name=drive_name, reason=const.DRIVE_READY_FOR_REMOVAL, - ), f"event {const.DRIVE_READY_FOR_REMOVAL} for drive {drive_name} not found" - # 10. check events and locate event related to VolumeBadHealth + ), f"event {const.DRIVE_READY_FOR_REMOVAL} for drive {drive_name} not found" + # 10. check events and locate event related to VolumeBadHealth for volume in volumes: volume_name = volume["metadata"]["name"] assert self.utils.event_in( resource_name=volume_name, reason=const.VOLUME_BAD_HEALTH, ), f"event {const.VOLUME_BAD_HEALTH} for volume {volume_name} not found" - # 11. delete pod and pvc - self.utils.clear_pvc_and_pod(pod_name=pod.metadata.name, namespace=self.namespace) - # 12. check Drive status to be REMOVING or REMOVED and LED state to be 1 (if drive supports LED ) or 2 (if drive does not support LED) Status to be ONLINE #TODO: status LED 2 => another test case + # 11. delete pod and pvc + self.utils.clear_pvc_and_pod( + pod_name=pod.metadata.name, namespace=self.namespace + ) + # 12. check Drive status to be REMOVING or REMOVED and LED state to be 1 (if drive supports LED ) or 2 (if drive does not support LED) Status to be ONLINE for drive in drives: assert self.utils.wait_drive( - name=drive['metadata']['name'], - expected_status=const.STATUS_ONLINE, - expected_usage=const.USAGE_REMOVED, - expected_health=const.HEALTH_BAD, - expected_led_state=const.LED_STATE + name=drive["metadata"]["name"], + expected_status=const.STATUS_ONLINE, + expected_usage=const.USAGE_REMOVED, + expected_health=const.HEALTH_BAD, + expected_led_state=const.LED_STATE, ), f"Drive {drive_name} failed to reach expected Status: {const.STATUS_ONLINE}, Health: {const.HEALTH_BAD}, Usage: {const.USAGE_REMOVED}, LEDState: {drive["spec"]["LEDState"]}" - logging.info(f"drive {drive_name} went in Status: {const.STATUS_ONLINE}, Health: {const.HEALTH_BAD}, Usage: {const.USAGE_REMOVED}, LEDState: {drive["spec"]["LEDState"]}") - # 13. check for events: DriveReadyForPhysicalRemoval + logging.info( + f"drive {drive_name} went in Status: {const.STATUS_ONLINE}, Health: {const.HEALTH_BAD}, Usage: {const.USAGE_REMOVED}, LEDState: {drive["spec"]["LEDState"]}" + ) + # 13. check for events: DriveReadyForPhysicalRemoval for drive in drives: drive_name = drive["metadata"]["name"] assert self.utils.event_in( resource_name=drive_name, reason=const.DRIVE_READY_FOR_PHYSICAL_REMOVAL, ), f"event {const.DRIVE_READY_FOR_PHYSICAL_REMOVAL} for drive {drive_name} not found" - # 14. get Node ID on which drives reside, Obtain path for affected drives, identify node name for corresponding node id and remove drives + # 14. get Node ID on which drives reside, Obtain path for affected drives, identify node name for corresponding node id and remove drives for drive in drives: drive_name = drive["metadata"]["name"] drive_path = drive["spec"]["Path"] assert drive_path, f"Drive path for drive {drive_name} not found" logging.info(f"drive_path: {drive_path}") - host_num = self.drive_utils[node_ip].get_host_num(drive_path) scsi_id = self.drive_utils[node_ip].get_scsi_id(drive_path) assert scsi_id, f"scsi_id for drive {drive_name} not found" logging.info(f"scsi_id: {scsi_id}") self.drive_utils[node_ip].remove(scsi_id) logging.info(f"drive {drive_path}, {scsi_id} removed") - # 15. check driveCR succesfully removed + # 15. check driveCR succesfully removed for drive in drives: - drive_name = drive["metadata"]["name"] - assert self.utils.check_drive_cr_not_exist( - drive_name=drive_name + drive_name = drive["metadata"]["name"] + assert self.utils.check_drive_cr_exist_or_not( + drive_name=drive_name, cr_existence=False ), f"Drive CR {drive_name} still exists" - # 16. check for events DriveSuccessfullyRemoved in kubernetes events + # 16. check for events DriveSuccessfullyRemoved in kubernetes events for drive in drives: drive_name = drive["metadata"]["name"] assert self.utils.event_in( resource_name=drive_name, reason=const.DRIVE_SUCCESSFULLY_REMOVED, - ), f"event {const.DRIVE_SUCCESSFULLY_REMOVED} for drive {drive_name} not found" + ), f"event {const.DRIVE_SUCCESSFULLY_REMOVED} for drive {drive_name} not found" diff --git a/tests/e2e-test-framework/tests/test_drive_replacement_multi_volumes_single_fail.py b/tests/e2e-test-framework/tests/test_drive_replacement_multi_volumes_single_fail.py new file mode 100644 index 000000000..8574c10af --- /dev/null +++ b/tests/e2e-test-framework/tests/test_drive_replacement_multi_volumes_single_fail.py @@ -0,0 +1,174 @@ +import logging +from typing import Dict +import pytest + +import framework.const as const + +from framework.sts import STS +from framework.utils import Utils +from framework.drive import DriveUtils + + +class TestAutoDriveReplacementWithMultipleVolumesPerPodSingleFailure: + @classmethod + @pytest.fixture(autouse=True) + def setup_class( + cls, + namespace: str, + drive_utils_executors: Dict[str, DriveUtils], + utils: Utils, + ): + cls.namespace = namespace + cls.name = "test-auto-dr-multiple-volumes-single-failure" + cls.timeout = 120 + cls.replicas = 1 + + cls.utils = utils + + cls.drive_utils = drive_utils_executors + cls.sts = STS(cls.namespace, cls.name, cls.replicas) + cls.sts.delete() + cls.sts.create(storage_classes=[const.SSD_SC, const.HDD_SC]) + + yield + + cls.sts.delete() + + @pytest.mark.hal + def test_5955_auto_drive_replacement_with_multiple_volumes_per_pod_single_failure( + self, + ): + # 1. get volume and volume groups for deployed pod + assert ( + self.sts.verify(self.timeout) is True + ), f"STS: {self.name} failed to reach desired number of replicas: {self.replicas}" + pod = self.utils.list_pods(name_prefix=self.name)[0] + node_ip = self.utils.get_pod_node_ip( + pod_name=pod.metadata.name, namespace=self.namespace + ) + volumes = self.utils.list_volumes(pod_name=pod.metadata.name) + # get all drives + drives = [] + for volume in volumes: + drive = self.utils.get_drive_cr( + volume_name=volume["metadata"]["name"], + namespace=volume["metadata"]["namespace"], + ) + drives.append(drive) + failed_drive = drives[0] + healthy_drive = drives[1] + failed_volume = volumes[0] + # 2. simulate drive failure. Annotate drive used by pod with health=BAD + failed_drive_name = failed_drive["metadata"]["name"] + self.utils.annotate_custom_resource( + resource_name=failed_drive_name, + resource_type="drives", + annotation_key=const.DRIVE_HEALTH_ANNOTATION, + annotation_value=const.DRIVE_HEALTH_BAD_ANNOTATION, + ) + logging.info(f"drive: {failed_drive_name} was annotated with health=BAD") + # 3. wait until drive health is BAD, status=ONLINE, usage=RELEASING. + logging.info(f"Waiting for drive: {failed_drive_name}") + assert self.utils.wait_drive( + name=failed_drive_name, + expected_status=const.STATUS_ONLINE, + expected_health=const.HEALTH_BAD, + expected_usage=const.USAGE_RELEASING, + ), f"Drive {failed_drive_name} failed to reach expected Status: {const.STATUS_ONLINE}, Health: {const.HEALTH_BAD}, Usage: {const.USAGE_RELEASING}" + logging.info( + f"drive {failed_drive_name} went in Status: {const.STATUS_ONLINE}, Health: {const.HEALTH_BAD}, Usage: {const.USAGE_RELEASING}" + ) + # 4. wait until volume health is BAD, status=OPERATIVE, usage=RELEASING. + failed_volume_name = failed_volume["metadata"]["name"] + logging.info(f"Waiting for volume: {failed_volume_name}") + assert self.utils.wait_volume( + name=failed_volume_name, + expected_health=const.HEALTH_BAD, + expected_usage=const.USAGE_RELEASING, + expected_operational_status=const.STATUS_OPERATIVE, + ), f"Volume {failed_volume_name} failed to reach OperationalStatus: {const.STATUS_OPERATIVE}, Health: {const.HEALTH_BAD}, Usage: {const.USAGE_RELEASING}" + logging.info( + f"volume {failed_volume_name} went in OperationalStatus: {const.STATUS_OPERATIVE}, Health: {const.HEALTH_BAD}, Usage: {const.USAGE_RELEASING}" + ) + # 5. check events and locate event related to DriveHealthFailure + assert self.utils.event_in( + resource_name=failed_drive_name, + reason=const.DRIVE_HEALTH_FAILURE, + ), f"event {const.DRIVE_HEALTH_FAILURE} for drive {failed_drive_name} not found" + # 6. annotate volume with release=done + self.utils.annotate_custom_resource( + resource_name=failed_volume_name, + resource_type="volumes", + annotation_key=const.VOLUME_RELEASE_ANNOTATION, + annotation_value=const.VOLUME_RELEASE_DONE_VALUE, + namespace=failed_volume["metadata"]["namespace"], + ) + logging.info(f"volume: {failed_volume_name} was annotated with release=done") + # 7. check drive usages are RELEASED + assert self.utils.wait_drive( + name=failed_drive_name, expected_usage=const.USAGE_RELEASED + ), f"Drive {failed_drive_name} failed to reach expected Usage: {const.USAGE_RELEASED}" + logging.info(f"drive {failed_drive_name} went in Usage: {const.USAGE_RELEASED}") + # 8. check volumes are RELEASED + assert self.utils.wait_volume( + name=failed_volume_name, expected_usage=const.USAGE_RELEASED + ), f"Volume {failed_volume_name} failed to reach expected Usage {const.USAGE_RELEASED}" + logging.info( + f"volume {failed_volume_name} went in Usage: {const.USAGE_RELEASED}" + ) + # 9. check event DriveReadyForRemoval is generated + assert self.utils.event_in( + resource_name=failed_drive_name, + reason=const.DRIVE_READY_FOR_REMOVAL, + ), f"event {const.DRIVE_READY_FOR_REMOVAL} for drive {failed_drive_name} not found" + # 10. check events and locate event related to VolumeBadHealth + assert self.utils.event_in( + resource_name=failed_volume_name, + reason=const.VOLUME_BAD_HEALTH, + ), f"event {const.VOLUME_BAD_HEALTH} for volume {failed_volume_name} not found" + # 11. delete pod and pvc + self.utils.clear_pvc_and_pod( + pod_name=pod.metadata.name, namespace=self.namespace + ) + # 12. check Drive status to be REMOVING or REMOVED and LED state to be 1 (if drive supports LED ) or 2 (if drive does not support LED) Status to be ONLINE + assert self.utils.wait_drive( + name=failed_drive_name, + expected_status=const.STATUS_ONLINE, + expected_usage=const.USAGE_REMOVED, + expected_health=const.HEALTH_BAD, + expected_led_state=const.LED_STATE, + ), f"Drive {failed_drive_name} failed to reach expected Status: {const.STATUS_ONLINE}, Health: {const.HEALTH_BAD}, Usage: {const.USAGE_REMOVED}, LEDState: {drive["spec"]["LEDState"]}" + logging.info( + f"drive {failed_drive_name} went in Status: {const.STATUS_ONLINE}, Health: {const.HEALTH_BAD}, Usage: {const.USAGE_REMOVED}, LEDState: {drive["spec"]["LEDState"]}" + ) + # 13. check for events: DriveReadyForPhysicalRemoval + assert self.utils.event_in( + resource_name=failed_drive_name, + reason=const.DRIVE_READY_FOR_PHYSICAL_REMOVAL, + ), f"event {const.DRIVE_READY_FOR_PHYSICAL_REMOVAL} for drive {failed_drive_name} not found" + # 14. get Node ID on which drive resides, obtain path for affected drive, identify node name for corresponding node id and remove drive + failed_drive_path = failed_drive["spec"]["Path"] + assert failed_drive_path, f"Drive path for drive {failed_drive_name} not found" + logging.info(f"drive_path: {failed_drive_path}") + + scsi_id = self.drive_utils[node_ip].get_scsi_id(failed_drive_path) + assert scsi_id, f"scsi_id for drive {failed_drive_name} not found" + logging.info(f"scsi_id: {scsi_id}") + + self.drive_utils[node_ip].remove(scsi_id) + logging.info(f"drive {failed_drive_path}, {scsi_id} removed") + # 15. check driveCR succesfully removed -> only removed one, the second still should be on the cluster + assert self.utils.check_drive_cr_exist_or_not( + drive_name=failed_drive_name, cr_existence=False + ), f"Drive CR {failed_drive_name} still exists" + + healthy_drive_name = healthy_drive["metadata"]["name"] + assert self.utils.check_drive_cr_exist_or_not( + drive_name=healthy_drive_name, + cr_existence=True, + ), f"Drive CR {healthy_drive_name} does not exist" + # 16. check for events DriveSuccessfullyRemoved in kubernetes events + assert self.utils.event_in( + resource_name=failed_drive_name, + reason=const.DRIVE_SUCCESSFULLY_REMOVED, + ), f"event {const.DRIVE_SUCCESSFULLY_REMOVED} for drive {failed_drive_name} not found" diff --git a/tests/e2e-test-framework/tests/test_fake_attach.py b/tests/e2e-test-framework/tests/test_fake_attach.py index 05634cc65..ba7471de3 100644 --- a/tests/e2e-test-framework/tests/test_fake_attach.py +++ b/tests/e2e-test-framework/tests/test_fake_attach.py @@ -1,4 +1,5 @@ import logging +from typing import Dict import pytest import framework.const as const @@ -6,7 +7,6 @@ from framework.sts import STS from framework.utils import Utils from framework.drive import DriveUtils -from typing import Dict class TestFakeAttach: diff --git a/tests/e2e-test-framework/tests/test_fake_attach_dr.py b/tests/e2e-test-framework/tests/test_fake_attach_dr.py index b43b67fa3..4a103a422 100644 --- a/tests/e2e-test-framework/tests/test_fake_attach_dr.py +++ b/tests/e2e-test-framework/tests/test_fake_attach_dr.py @@ -1,7 +1,7 @@ import logging import time -import pytest from typing import Dict +import pytest import framework.const as const