From b8d42dd4472b8b82eca43b9477129c439833394b Mon Sep 17 00:00:00 2001 From: Gil Bregman Date: Wed, 22 Nov 2023 18:29:55 +0200 Subject: [PATCH] Fix OMAP lock test. Fixes #327 Signed-off-by: Gil Bregman --- tests/test_cli.py | 9 ++ tests/test_grpc.py | 2 +- tests/test_omap_lock.py | 225 ++++++++++++++++++++++++++-------------- 3 files changed, 160 insertions(+), 76 deletions(-) diff --git a/tests/test_cli.py b/tests/test_cli.py index 3ad304d6e..fc7c6564e 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -235,6 +235,15 @@ def test_create_bdev_ana_ipv6(self, caplog, gateway): def test_create_subsystem_ana(self, caplog, gateway): caplog.clear() + with pytest.raises(Exception) as ex: + try: + cli(["create_subsystem", "-n", subsystem, "-t"]) + except SystemExit as sysex: + # should fail with non-zero return code + assert sysex != 0 + pass + assert "HA enabled but ANA-reporting is disabled" in str(ex.value) + caplog.clear() cli(["create_subsystem", "-n", subsystem, "-a", "-t"]) assert f"Created subsystem {subsystem}: True" in caplog.text assert "ana reporting: True" in caplog.text diff --git a/tests/test_grpc.py b/tests/test_grpc.py index e92a11e37..3bd67a437 100644 --- a/tests/test_grpc.py +++ b/tests/test_grpc.py @@ -37,7 +37,7 @@ def test_create_get_subsys(caplog, config): for i in range(created_resource_count): create_resource_by_index(i) - assert "Failed" not in caplog.text + assert "failed" not in caplog.text.lower() caplog.clear() diff --git a/tests/test_omap_lock.py b/tests/test_omap_lock.py index cd48e08ba..0a4fc59c2 100644 --- a/tests/test_omap_lock.py +++ b/tests/test_omap_lock.py @@ -15,24 +15,13 @@ subsystem_prefix = "nqn.2016-06.io.spdk:cnode" created_resource_count = 500 -@pytest.fixture(scope="function") -def conn(config, request): - """Sets up and tears down Gateways A and B.""" - update_notify = True - update_interval_sec = 5 - disable_unlock = False - lock_duration = 60 - if request.node.name == "test_multi_gateway_omap_reread": - update_notify = False - update_interval_sec = 300 - elif request.node.name == "test_trying_to_lock_twice": - disable_unlock = True - lock_duration = 100 # This should be bigger than lock retries * retry sleep interval +def setup_config(config, gw1_name, gw2_name, gw_group, update_notify ,update_interval_sec, disable_unlock, lock_duration, + sock1_name, sock2_name, port_inc): + """Sets up the config objects for gateways A and B """ - # Setup GatewayA and GatewayB configs configA = copy.deepcopy(config) - configA.config["gateway"]["name"] = "GatewayA" - configA.config["gateway"]["group"] = "Group1" + configA.config["gateway"]["name"] = gw1_name + configA.config["gateway"]["group"] = gw_group configA.config["gateway"]["state_update_notify"] = str(update_notify) configA.config["gateway"]["state_update_interval_sec"] = str(update_interval_sec) configA.config["gateway"]["omap_file_disable_unlock"] = str(disable_unlock) @@ -40,45 +29,147 @@ def conn(config, request): configA.config["gateway"]["min_controller_id"] = "1" configA.config["gateway"]["max_controller_id"] = "20000" configA.config["gateway"]["enable_spdk_discovery_controller"] = "True" - configA.config["spdk"]["rpc_socket_name"] = "spdk_GatewayA.sock" + configA.config["spdk"]["rpc_socket_name"] = sock1_name configB = copy.deepcopy(configA) - addr = configA.get("gateway", "addr") - portA = configA.getint("gateway", "port") + portA = configA.getint("gateway", "port") + port_inc + configA.config["gateway"]["port"] = str(portA) portB = portA + 1 - configB.config["gateway"]["name"] = "GatewayB" + configB.config["gateway"]["name"] = gw2_name configB.config["gateway"]["port"] = str(portB) configB.config["gateway"]["min_controller_id"] = "20001" configB.config["gateway"]["max_controller_id"] = "40000" - configB.config["spdk"]["rpc_socket_name"] = "spdk_GatewayB.sock" + configB.config["spdk"]["rpc_socket_name"] = sock2_name configB.config["spdk"]["tgt_cmd_extra_args"] = "-m 0x02" + return configA, configB + +def start_servers(gatewayA, gatewayB, addr, portA, portB): + gatewayA.serve() + # Delete existing OMAP state + gatewayA.gateway_rpc.gateway_state.delete_state() + # Create new + gatewayB.serve() + gatewayB.gateway_rpc.gateway_state.delete_state() + + # Bind the client and Gateways A & B + channelA = grpc.insecure_channel(f"{addr}:{portA}") + stubA = pb2_grpc.GatewayStub(channelA) + channelB = grpc.insecure_channel(f"{addr}:{portB}") + stubB = pb2_grpc.GatewayStub(channelB) + + return stubA, stubB + +def stop_servers(gatewayA, gatewayB): + # Stop gateways + gatewayA.server.stop(grace=1) + gatewayB.server.stop(grace=1) + +@pytest.fixture(scope="function") +def conn_omap_reread(config, request): + """Sets up and tears down Gateways A and B.""" + + # Setup GatewayA and GatewayB configs + configA, configB = setup_config(config, "GatewayA", "GatewayB", "Group1", False, 300, False, 60, + "spdk_GatewayA.sock", "spdk_GatewayB.sock", 0) + addr = configA.get("gateway", "addr") + portA = configA.getint("gateway", "port") + portB = configB.getint("gateway", "port") # Start servers with ( GatewayServer(configA) as gatewayA, GatewayServer(configB) as gatewayB, ): - gatewayA.serve() - # Delete existing OMAP state - gatewayA.gateway_rpc.gateway_state.delete_state() - # Create new - gatewayB.serve() - - # Bind the client and Gateways A & B - channelA = grpc.insecure_channel(f"{addr}:{portA}") - stubA = pb2_grpc.GatewayStub(channelA) - channelB = grpc.insecure_channel(f"{addr}:{portB}") - stubB = pb2_grpc.GatewayStub(channelB) + stubA, stubB = start_servers(gatewayA, gatewayB, addr, portA, portB) yield stubA, stubB, gatewayA.gateway_rpc, gatewayB.gateway_rpc + stop_servers(gatewayA, gatewayB) + +@pytest.fixture(scope="function") +def conn_lock_twice(config, request): + """Sets up and tears down Gateways A and B.""" + + # Setup GatewayA and GatewayB configs + configA, configB = setup_config(config, "GatewayAA", "GatewayBB", "Group2", True, 5, True, 100, + "spdk_GatewayAA.sock", "spdk_GatewayBB.sock", 2) + addr = configA.get("gateway", "addr") + portA = configA.getint("gateway", "port") + portB = configB.getint("gateway", "port") + # Start servers + with ( + GatewayServer(configA) as gatewayA, + GatewayServer(configB) as gatewayB, + ): + stubA, stubB = start_servers(gatewayA, gatewayB, addr, portA, portB) + yield stubA, stubB + stop_servers(gatewayA, gatewayB) + +@pytest.fixture(scope="function") +def conn_concurrent(config, request): + """Sets up and tears down Gateways A and B.""" + update_notify = True + update_interval_sec = 5 + disable_unlock = False + lock_duration = 60 - # Stop gateways - gatewayA.server.stop(grace=1) - gatewayB.server.stop(grace=1) - gatewayB.gateway_rpc.gateway_state.delete_state() + # Setup GatewayA and GatewayB configs + configA, configB = setup_config(config, "GatewayAAA", "GatewayBBB", "Group3", True, 5, False, 60, + "spdk_GatewayAAA.sock", "spdk_GatewayBBB.sock", 4) + addr = configA.get("gateway", "addr") + portA = configA.getint("gateway", "port") + portB = configB.getint("gateway", "port") + # Start servers + with ( + GatewayServer(configA) as gatewayA, + GatewayServer(configB) as gatewayB, + ): + stubA, stubB = start_servers(gatewayA, gatewayB, addr, portA, portB) + yield stubA, stubB + stop_servers(gatewayA, gatewayB) -def test_multi_gateway_omap_reread(config, conn, caplog): +def create_resource_by_index(stub, i, caplog): + bdev = f"{bdev_prefix}{i}" + bdev_req = pb2.create_bdev_req(bdev_name=bdev, + rbd_pool_name=pool, + rbd_image_name=image, + block_size=4096) + ret_bdev = stub.create_bdev(bdev_req) + assert ret_bdev + if caplog != None: + assert f"create_bdev: {bdev}" in caplog.text + assert "create_bdev failed" not in caplog.text + subsystem = f"{subsystem_prefix}{i}" + subsystem_req = pb2.create_subsystem_req(subsystem_nqn=subsystem) + ret_subsystem = stub.create_subsystem(subsystem_req) + assert ret_subsystem + if caplog != None: + assert f"create_subsystem {subsystem}: True" in caplog.text + assert "create_subsystem failed" not in caplog.text + namespace_req = pb2.add_namespace_req(subsystem_nqn=subsystem, + bdev_name=bdev) + ret_namespace = stub.add_namespace(namespace_req) + assert ret_namespace + +def check_resource_by_index(i, resource_list): + # notice that this also verifies the namespace as the bdev name is in the namespaces section + bdev = f"{bdev_prefix}{i}" + subsystem = f"{subsystem_prefix}{i}" + found = False + for res in resource_list: + try: + if res["nqn"] != subsystem: + continue + for ns in res["namespaces"]: + if ns["bdev_name"] == bdev: + found = True + break + break + except Exception: + pass + assert found + +def test_multi_gateway_omap_reread(config, conn_omap_reread, caplog): """Tests reading out of date OMAP file """ - stubA, stubB, gatewayA, gatewayB = conn + stubA, stubB, gatewayA, gatewayB = conn_omap_reread bdev = bdev_prefix + "X0" bdev2 = bdev_prefix + "X1" bdev3 = bdev_prefix + "X2" @@ -112,7 +203,7 @@ def test_multi_gateway_omap_reread(config, conn, caplog): assert len(listB) == 1 listA = json.loads(json_format.MessageToJson( - stubB.get_subsystems(get_subsystems_req), + stubA.get_subsystems(get_subsystems_req), preserving_proto_field_name=True))['subsystems'] assert len(listA) == num_subsystems @@ -154,59 +245,43 @@ def test_multi_gateway_omap_reread(config, conn, caplog): assert bdevsB[1]["name"] == bdev2 assert bdevsB[2]["name"] == bdev3 -def test_trying_to_lock_twice(config, image, conn, caplog): +def test_trying_to_lock_twice(config, image, conn_lock_twice, caplog): """Tests an attempt to lock the OMAP file from two gateways at the same time """ caplog.clear() - stubA, stubB, gatewayA, gatewayB = conn + stubA, stubB = conn_lock_twice with pytest.raises(Exception) as ex: - create_resource_by_index(stubA, 0) - create_resource_by_index(stubB, 1) + create_resource_by_index(stubA, 100000, None) + create_resource_by_index(stubB, 100001, None) assert "OMAP file unlock was disabled, will not unlock file" in caplog.text assert "The OMAP file is locked, will try again in" in caplog.text assert "Unable to lock OMAP file" in caplog.text time.sleep(120) # Wait enough time for OMAP lock to be released -def create_resource_by_index(stub, i): - bdev = f"{bdev_prefix}{i}" - bdev_req = pb2.create_bdev_req(bdev_name=bdev, - rbd_pool_name=pool, - rbd_image_name=image, - block_size=4096) - ret_bdev = stub.create_bdev(bdev_req) - assert ret_bdev - subsystem = f"{subsystem_prefix}{i}" - subsystem_req = pb2.create_subsystem_req(subsystem_nqn=subsystem) - ret_subsystem = stub.create_subsystem(subsystem_req) - assert ret_subsystem - namespace_req = pb2.add_namespace_req(subsystem_nqn=subsystem, - bdev_name=bdev) - ret_namespace = stub.add_namespace(namespace_req) - assert ret_namespace - -def check_resource_by_index(i, caplog): - bdev = f"{bdev_prefix}{i}" - # notice that this also verifies the namespace as the bdev name is in the namespaces section - assert f"{bdev}" in caplog.text - subsystem = f"{subsystem_prefix}{i}" - assert f"{subsystem}" in caplog.text - -def test_multi_gateway_concurrent_changes(config, image, conn, caplog): +def test_multi_gateway_concurrent_changes(config, image, conn_concurrent, caplog): """Tests concurrent changes to the OMAP from two gateways """ caplog.clear() - stubA, stubB, gatewayA, gatewayB = conn + stubA, stubB = conn_concurrent for i in range(created_resource_count): - if i % 2: - stub = stubA + if i % 2 == 0: + create_resource_by_index(stubA, i, caplog) else: - stub = stubB - create_resource_by_index(stub, i) - assert "Failed" not in caplog.text + create_resource_by_index(stubB, i, caplog) + assert "failed" not in caplog.text.lower() # Let the update some time to bring both gateways to the same page time.sleep(15) + caplog.clear() + get_subsystems_req = pb2.get_subsystems_req() + listA = json.loads(json_format.MessageToJson( + stubA.get_subsystems(get_subsystems_req), + preserving_proto_field_name=True))['subsystems'] + listB = json.loads(json_format.MessageToJson( + stubB.get_subsystems(get_subsystems_req), + preserving_proto_field_name=True))['subsystems'] for i in range(created_resource_count): - check_resource_by_index(i, caplog) + check_resource_by_index(i, listA) + check_resource_by_index(i, listB)