From c7389bd5df69bec6037addadebd0b56df448ce29 Mon Sep 17 00:00:00 2001 From: Jing Zhang Date: Tue, 5 Jul 2022 13:41:09 -0700 Subject: [PATCH] show linkmgrd status in `show mux status` (#2254) What I did Replace status in show mux status with APP DB MUX_CABLE_TABLE:, which is written by linkmgrd and indicates linkmgrd's state transition. This change is required for active-active dualtor setup. In active-active setup, we care more about whether state transition happens on ToR side. In tests, we want to see if linkmgrd makes immediate reaction on link state changes. If we continue to use STATE DB entries, when gRPC is not available, we will get unknow in show mux status, which is not informatic and not the actual value we want to check. sign-off: Jing Zhang zhangjing@microsoft.com How I did it Use APP DB entries instead of STATE DB. Move STATE DB to column SERVER_STATUS. How to verify it Unit tests with mock DB values. Tests on dual testbeds. --- show/muxcable.py | 53 +++++++++++++++++++++++----------- tests/mock_tables/appl_db.json | 18 ++++++++++++ tests/muxcable_test.py | 48 ++++++++++++++++++------------ 3 files changed, 84 insertions(+), 35 deletions(-) diff --git a/show/muxcable.py b/show/muxcable.py index 6d01727868..e99a3332d8 100644 --- a/show/muxcable.py +++ b/show/muxcable.py @@ -401,13 +401,15 @@ def get_switch_name(config_db): sys.exit(STATUS_FAIL) -def create_json_dump_per_port_status(db, port_status_dict, muxcable_info_dict, muxcable_health_dict, muxcable_metrics_dict, asic_index, port): +def create_json_dump_per_port_status(db, port_status_dict, muxcable_info_dict, muxcable_grpc_dict, muxcable_health_dict, muxcable_metrics_dict, asic_index, port): res_dict = {} status_value = get_value_for_key_in_dict(muxcable_info_dict[asic_index], port, "state", "MUX_CABLE_TABLE") port_name = platform_sfputil_helper.get_interface_alias(port, db) port_status_dict["MUX_CABLE"][port_name] = {} port_status_dict["MUX_CABLE"][port_name]["STATUS"] = status_value + gRPC_value = get_value_for_key_in_dict(muxcable_grpc_dict[asic_index], port, "state", "MUX_CABLE_TABLE") + port_status_dict["MUX_CABLE"][port_name]["SERVER_STATUS"] = gRPC_value health_value = get_value_for_key_in_dict(muxcable_health_dict[asic_index], port, "state", "MUX_LINKMGR_TABLE") port_status_dict["MUX_CABLE"][port_name]["HEALTH"] = health_value res_dict = get_hwmode_mux_direction_port(db, port) @@ -428,7 +430,7 @@ def create_json_dump_per_port_status(db, port_status_dict, muxcable_info_dict, m last_switch_end_time = muxcable_metrics_dict[asic_index].get("linkmgrd_switch_active_end") port_status_dict["MUX_CABLE"][port_name]["LAST_SWITCHOVER_TIME"] = last_switch_end_time -def create_table_dump_per_port_status(db, print_data, muxcable_info_dict, muxcable_health_dict, muxcable_metrics_dict, asic_index, port): +def create_table_dump_per_port_status(db, print_data, muxcable_info_dict, muxcable_grpc_dict, muxcable_health_dict, muxcable_metrics_dict, asic_index, port): print_port_data = [] res_dict = {} @@ -436,6 +438,7 @@ def create_table_dump_per_port_status(db, print_data, muxcable_info_dict, muxcab res_dict = get_hwmode_mux_direction_port(db, port) status_value = get_value_for_key_in_dict(muxcable_info_dict[asic_index], port, "state", "MUX_CABLE_TABLE") #status_value = get_value_for_key_in_tbl(y_cable_asic_table, port, "status") + gRPC_value = get_value_for_key_in_dict(muxcable_grpc_dict[asic_index], port, "state", "MUX_CABLE_TABLE") health_value = get_value_for_key_in_dict(muxcable_health_dict[asic_index], port, "state", "MUX_LINKMGR_TABLE") last_switch_end_time = "" @@ -447,6 +450,7 @@ def create_table_dump_per_port_status(db, print_data, muxcable_info_dict, muxcab port_name = platform_sfputil_helper.get_interface_alias(port, db) print_port_data.append(port_name) print_port_data.append(status_value) + print_port_data.append(gRPC_value) print_port_data.append(health_value) if res_dict[2] == "False": hwstatus = "absent" @@ -510,10 +514,13 @@ def status(db, port, json_output): port = platform_sfputil_helper.get_interface_name(port, db) port_table_keys = {} + appl_db_muxcable_tbl_keys = {} port_health_table_keys = {} port_metrics_table_keys = {} per_npu_statedb = {} + per_npu_appl_db = {} muxcable_info_dict = {} + muxcable_grpc_dict = {} muxcable_health_dict = {} muxcable_metrics_dict = {} @@ -525,6 +532,11 @@ def status(db, port, json_output): per_npu_statedb[asic_id] = SonicV2Connector(use_unix_socket_path=False, namespace=namespace) per_npu_statedb[asic_id].connect(per_npu_statedb[asic_id].STATE_DB) + per_npu_appl_db[asic_id] = swsscommon.SonicV2Connector(use_unix_socket_path=False, namespace=namespace) + per_npu_appl_db[asic_id].connect(per_npu_appl_db[asic_id].APPL_DB) + + appl_db_muxcable_tbl_keys[asic_id] = per_npu_appl_db[asic_id].keys( + per_npu_appl_db[asic_id].APPL_DB, 'MUX_CABLE_TABLE:*') port_table_keys[asic_id] = per_npu_statedb[asic_id].keys( per_npu_statedb[asic_id].STATE_DB, 'MUX_CABLE_TABLE|*') port_health_table_keys[asic_id] = per_npu_statedb[asic_id].keys( @@ -546,17 +558,20 @@ def status(db, port, json_output): click.echo("Got invalid asic index for port {}, cant retreive mux status".format(port_name)) sys.exit(STATUS_FAIL) - muxcable_info_dict[asic_index] = per_npu_statedb[asic_index].get_all( + muxcable_info_dict[asic_index] = per_npu_appl_db[asic_id].get_all( + per_npu_appl_db[asic_id].APPL_DB, 'MUX_CABLE_TABLE:{}'.format(port)) + muxcable_grpc_dict[asic_index] = per_npu_statedb[asic_index].get_all( per_npu_statedb[asic_index].STATE_DB, 'MUX_CABLE_TABLE|{}'.format(port)) muxcable_health_dict[asic_index] = per_npu_statedb[asic_index].get_all( per_npu_statedb[asic_index].STATE_DB, 'MUX_LINKMGR_TABLE|{}'.format(port)) muxcable_metrics_dict[asic_index] = per_npu_statedb[asic_index].get_all( per_npu_statedb[asic_index].STATE_DB, 'MUX_METRICS_TABLE|{}'.format(port)) + if muxcable_info_dict[asic_index] is not None: - logical_key = "MUX_CABLE_TABLE|{}".format(port) + logical_key = "MUX_CABLE_TABLE:{}".format(port) logical_health_key = "MUX_LINKMGR_TABLE|{}".format(port) logical_metrics_key = "MUX_METRICS_TABLE|{}".format(port) - if logical_key in port_table_keys[asic_index] and logical_health_key in port_health_table_keys[asic_index]: + if logical_key in appl_db_muxcable_tbl_keys[asic_index] and logical_health_key in port_health_table_keys[asic_index]: if logical_metrics_key not in port_metrics_table_keys[asic_index]: muxcable_metrics_dict[asic_index] = {} @@ -565,7 +580,7 @@ def status(db, port, json_output): port_status_dict = {} port_status_dict["MUX_CABLE"] = {} - create_json_dump_per_port_status(db, port_status_dict, muxcable_info_dict, + create_json_dump_per_port_status(db, port_status_dict, muxcable_info_dict, muxcable_grpc_dict, muxcable_health_dict, muxcable_metrics_dict, asic_index, port) click.echo("{}".format(json.dumps(port_status_dict, indent=4))) @@ -573,10 +588,10 @@ def status(db, port, json_output): else: print_data = [] - create_table_dump_per_port_status(db, print_data, muxcable_info_dict, + create_table_dump_per_port_status(db, print_data, muxcable_info_dict, muxcable_grpc_dict, muxcable_health_dict, muxcable_metrics_dict, asic_index, port) - headers = ['PORT', 'STATUS', 'HEALTH', 'HWSTATUS', 'LAST_SWITCHOVER_TIME'] + headers = ['PORT', 'STATUS', 'SERVER_STATUS', 'HEALTH', 'HWSTATUS', 'LAST_SWITCHOVER_TIME'] click.echo(tabulate(print_data, headers=headers)) sys.exit(STATUS_SUCCESSFUL) @@ -595,9 +610,11 @@ def status(db, port, json_output): port_status_dict["MUX_CABLE"] = {} for namespace in namespaces: asic_id = multi_asic.get_asic_index_from_namespace(namespace) - for key in natsorted(port_table_keys[asic_id]): - port = key.split("|")[1] - muxcable_info_dict[asic_id] = per_npu_statedb[asic_id].get_all( + for key in natsorted(appl_db_muxcable_tbl_keys[asic_id]): + port = key.split(":")[1] + muxcable_info_dict[asic_id] = per_npu_appl_db[asic_id].get_all( + per_npu_appl_db[asic_id].APPL_DB, 'MUX_CABLE_TABLE:{}'.format(port)) + muxcable_grpc_dict[asic_id] = per_npu_statedb[asic_id].get_all( per_npu_statedb[asic_id].STATE_DB, 'MUX_CABLE_TABLE|{}'.format(port)) muxcable_health_dict[asic_id] = per_npu_statedb[asic_id].get_all( per_npu_statedb[asic_id].STATE_DB, 'MUX_LINKMGR_TABLE|{}'.format(port)) @@ -605,7 +622,7 @@ def status(db, port, json_output): per_npu_statedb[asic_id].STATE_DB, 'MUX_METRICS_TABLE|{}'.format(port)) if not muxcable_metrics_dict[asic_id]: muxcable_metrics_dict[asic_id] = {} - create_json_dump_per_port_status(db, port_status_dict, muxcable_info_dict, + create_json_dump_per_port_status(db, port_status_dict, muxcable_info_dict, muxcable_grpc_dict, muxcable_health_dict, muxcable_metrics_dict, asic_id, port) click.echo("{}".format(json.dumps(port_status_dict, indent=4))) @@ -613,20 +630,22 @@ def status(db, port, json_output): print_data = [] for namespace in namespaces: asic_id = multi_asic.get_asic_index_from_namespace(namespace) - for key in natsorted(port_table_keys[asic_id]): - port = key.split("|")[1] + for key in natsorted(appl_db_muxcable_tbl_keys[asic_id]): + port = key.split(":")[1] + muxcable_info_dict[asic_id] = per_npu_appl_db[asic_id].get_all( + per_npu_appl_db[asic_id].APPL_DB, 'MUX_CABLE_TABLE:{}'.format(port)) muxcable_health_dict[asic_id] = per_npu_statedb[asic_id].get_all( per_npu_statedb[asic_id].STATE_DB, 'MUX_LINKMGR_TABLE|{}'.format(port)) - muxcable_info_dict[asic_id] = per_npu_statedb[asic_id].get_all( + muxcable_grpc_dict[asic_id] = per_npu_statedb[asic_id].get_all( per_npu_statedb[asic_id].STATE_DB, 'MUX_CABLE_TABLE|{}'.format(port)) muxcable_metrics_dict[asic_id] = per_npu_statedb[asic_id].get_all( per_npu_statedb[asic_id].STATE_DB, 'MUX_METRICS_TABLE|{}'.format(port)) if not muxcable_metrics_dict[asic_id]: muxcable_metrics_dict[asic_id] = {} - create_table_dump_per_port_status(db, print_data, muxcable_info_dict, + create_table_dump_per_port_status(db, print_data, muxcable_info_dict, muxcable_grpc_dict, muxcable_health_dict, muxcable_metrics_dict, asic_id, port) - headers = ['PORT', 'STATUS', 'HEALTH', 'HWSTATUS','LAST_SWITCHOVER_TIME'] + headers = ['PORT', 'STATUS', 'SERVER_STATUS', 'HEALTH', 'HWSTATUS', 'LAST_SWITCHOVER_TIME'] click.echo(tabulate(print_data, headers=headers)) sys.exit(STATUS_SUCCESSFUL) diff --git a/tests/mock_tables/appl_db.json b/tests/mock_tables/appl_db.json index e3cacf284b..6e0e333372 100644 --- a/tests/mock_tables/appl_db.json +++ b/tests/mock_tables/appl_db.json @@ -263,5 +263,23 @@ }, "VXLAN_REMOTE_VNI_TABLE:Vlan200:25.25.25.27": { "vni": "200" + }, + "MUX_CABLE_TABLE:Ethernet32": { + "state": "active" + }, + "MUX_CABLE_TABLE:Ethernet0": { + "state": "active" + }, + "MUX_CABLE_TABLE:Ethernet4": { + "state": "standby" + }, + "MUX_CABLE_TABLE:Ethernet8": { + "state": "standby" + }, + "MUX_CABLE_TABLE:Ethernet16": { + "state": "standby" + }, + "MUX_CABLE_TABLE:Ethernet12": { + "state": "active" } } diff --git a/tests/muxcable_test.py b/tests/muxcable_test.py index d79606e3dc..fab77e055e 100644 --- a/tests/muxcable_test.py +++ b/tests/muxcable_test.py @@ -25,25 +25,25 @@ tabular_data_status_output_expected = """\ -PORT STATUS HEALTH HWSTATUS LAST_SWITCHOVER_TIME ----------- -------- --------- ------------ --------------------------- -Ethernet0 active healthy inconsistent 2021-May-13 10:01:15.696728 -Ethernet4 standby healthy consistent -Ethernet8 standby unhealthy consistent -Ethernet12 unknown unhealthy inconsistent -Ethernet16 standby healthy consistent -Ethernet32 active healthy inconsistent +PORT STATUS SERVER_STATUS HEALTH HWSTATUS LAST_SWITCHOVER_TIME +---------- -------- --------------- --------- ------------ --------------------------- +Ethernet0 active active healthy inconsistent 2021-May-13 10:01:15.696728 +Ethernet4 standby standby healthy consistent +Ethernet8 standby standby unhealthy consistent +Ethernet12 active unknown unhealthy inconsistent +Ethernet16 standby standby healthy consistent +Ethernet32 active active healthy inconsistent """ tabular_data_status_output_expected_alias = """\ -PORT STATUS HEALTH HWSTATUS LAST_SWITCHOVER_TIME ------- -------- --------- ------------ --------------------------- -etp1 active healthy inconsistent 2021-May-13 10:01:15.696728 -etp2 standby healthy consistent -etp3 standby unhealthy consistent -etp4 unknown unhealthy inconsistent -etp5 standby healthy consistent -etp9 active healthy inconsistent +PORT STATUS SERVER_STATUS HEALTH HWSTATUS LAST_SWITCHOVER_TIME +------ -------- --------------- --------- ------------ --------------------------- +etp1 active active healthy inconsistent 2021-May-13 10:01:15.696728 +etp2 standby standby healthy consistent +etp3 standby standby unhealthy consistent +etp4 active unknown unhealthy inconsistent +etp5 standby standby healthy consistent +etp9 active active healthy inconsistent """ @@ -52,36 +52,42 @@ "MUX_CABLE": { "Ethernet0": { "STATUS": "active", + "SERVER_STATUS": "active", "HEALTH": "healthy", "HWSTATUS": "inconsistent", "LAST_SWITCHOVER_TIME": "2021-May-13 10:01:15.696728" }, "Ethernet4": { "STATUS": "standby", + "SERVER_STATUS": "standby", "HEALTH": "healthy", "HWSTATUS": "consistent", "LAST_SWITCHOVER_TIME": "" }, "Ethernet8": { "STATUS": "standby", + "SERVER_STATUS": "standby", "HEALTH": "unhealthy", "HWSTATUS": "consistent", "LAST_SWITCHOVER_TIME": "" }, "Ethernet12": { - "STATUS": "unknown", + "STATUS": "active", + "SERVER_STATUS": "unknown", "HEALTH": "unhealthy", "HWSTATUS": "inconsistent", "LAST_SWITCHOVER_TIME": "" }, "Ethernet16": { "STATUS": "standby", + "SERVER_STATUS": "standby", "HEALTH": "healthy", "HWSTATUS": "consistent", "LAST_SWITCHOVER_TIME": "" }, "Ethernet32": { "STATUS": "active", + "SERVER_STATUS": "active", "HEALTH": "healthy", "HWSTATUS": "inconsistent", "LAST_SWITCHOVER_TIME": "" @@ -95,36 +101,42 @@ "MUX_CABLE": { "etp1": { "STATUS": "active", + "SERVER_STATUS": "active", "HEALTH": "healthy", "HWSTATUS": "inconsistent", "LAST_SWITCHOVER_TIME": "2021-May-13 10:01:15.696728" }, "etp2": { "STATUS": "standby", + "SERVER_STATUS": "standby", "HEALTH": "healthy", "HWSTATUS": "consistent", "LAST_SWITCHOVER_TIME": "" }, "etp3": { "STATUS": "standby", + "SERVER_STATUS": "standby", "HEALTH": "unhealthy", "HWSTATUS": "consistent", "LAST_SWITCHOVER_TIME": "" }, "etp4": { - "STATUS": "unknown", + "STATUS": "active", + "SERVER_STATUS": "unknown", "HEALTH": "unhealthy", "HWSTATUS": "inconsistent", "LAST_SWITCHOVER_TIME": "" }, "etp5": { "STATUS": "standby", + "SERVER_STATUS": "standby", "HEALTH": "healthy", "HWSTATUS": "consistent", "LAST_SWITCHOVER_TIME": "" }, "etp9": { "STATUS": "active", + "SERVER_STATUS": "active", "HEALTH": "healthy", "HWSTATUS": "inconsistent", "LAST_SWITCHOVER_TIME": ""