diff --git a/orchagent/fabricportsorch.cpp b/orchagent/fabricportsorch.cpp index 1adb84ec0822..54d815aaaa99 100644 --- a/orchagent/fabricportsorch.cpp +++ b/orchagent/fabricportsorch.cpp @@ -11,17 +11,18 @@ #include "timer.h" #define FABRIC_POLLING_INTERVAL_DEFAULT (30) +#define FABRIC_PORT_PREFIX "PORT" #define FABRIC_PORT_ERROR 0 #define FABRIC_PORT_SUCCESS 1 #define FABRIC_PORT_STAT_COUNTER_FLEX_COUNTER_GROUP "FABRIC_PORT_STAT_COUNTER" #define FABRIC_PORT_STAT_FLEX_COUNTER_POLLING_INTERVAL_MS 10000 #define FABRIC_QUEUE_STAT_COUNTER_FLEX_COUNTER_GROUP "FABRIC_QUEUE_STAT_COUNTER" #define FABRIC_QUEUE_STAT_FLEX_COUNTER_POLLING_INTERVAL_MS 100000 -#define FABRIC_PORT_TABLE "FABRIC_PORT_TABLE" extern sai_object_id_t gSwitchId; extern sai_switch_api_t *sai_switch_api; extern sai_port_api_t *sai_port_api; +extern sai_queue_api_t *sai_queue_api; const vector port_stat_ids = { @@ -42,7 +43,8 @@ static const vector queue_stat_ids = SAI_QUEUE_STAT_CURR_OCCUPANCY_LEVEL, }; -FabricPortsOrch::FabricPortsOrch(DBConnector *appl_db, vector &tableNames) : +FabricPortsOrch::FabricPortsOrch(DBConnector *appl_db, vector &tableNames, + bool fabricPortStatEnabled, bool fabricQueueStatEnabled) : Orch(appl_db, tableNames), port_stat_manager(FABRIC_PORT_STAT_COUNTER_FLEX_COUNTER_GROUP, StatsMode::READ, FABRIC_PORT_STAT_FLEX_COUNTER_POLLING_INTERVAL_MS, true), @@ -55,14 +57,17 @@ FabricPortsOrch::FabricPortsOrch(DBConnector *appl_db, vector(new DBConnector("STATE_DB", 0)); - m_stateTable = unique_ptr(new Table(m_state_db.get(), FABRIC_PORT_TABLE)); + m_stateTable = unique_ptr
(new Table(m_state_db.get(), APP_FABRIC_PORT_TABLE_NAME)); m_counter_db = shared_ptr(new DBConnector("COUNTERS_DB", 0)); - m_laneQueueCounterTable = unique_ptr
(new Table(m_counter_db.get(), COUNTERS_QUEUE_NAME_MAP)); - m_lanePortCounterTable = unique_ptr
(new Table(m_counter_db.get(), COUNTERS_QUEUE_PORT_MAP)); + m_portNameQueueCounterTable = unique_ptr
(new Table(m_counter_db.get(), COUNTERS_FABRIC_QUEUE_NAME_MAP)); + m_portNamePortCounterTable = unique_ptr
(new Table(m_counter_db.get(), COUNTERS_FABRIC_PORT_NAME_MAP)); m_flex_db = shared_ptr(new DBConnector("FLEX_COUNTER_DB", 0)); - m_flexCounterTable = unique_ptr(new ProducerTable(m_flex_db.get(), FABRIC_PORT_TABLE)); + m_flexCounterTable = unique_ptr(new ProducerTable(m_flex_db.get(), APP_FABRIC_PORT_TABLE_NAME)); + + m_fabricPortStatEnabled = fabricPortStatEnabled; + m_fabricQueueStatEnabled = fabricQueueStatEnabled; getFabricPortList(); @@ -147,32 +152,96 @@ bool FabricPortsOrch::allPortsReady() void FabricPortsOrch::generatePortStats() { - // FIX_ME: This function installs flex counters for port stats - // on fabric ports for fabric asics and voq asics (that connect - // to fabric asics via fabric ports). These counters will be - // installed in FLEX_COUNTER_DB, and queried by syncd and updated - // to COUNTERS_DB. - // However, currently BCM SAI doesn't update its code to query - // port stats (metrics in list port_stat_ids) yet. - // Also, BCM sets too low value for "Max logical port count" (256), - // causing syncd to crash on voq asics that now include regular front - // panel ports, fabric ports, and multiple logical ports. - // So, this function will just do nothing for now, and we will readd - // code to install port stats counters when BCM completely supports. + if (!m_fabricPortStatEnabled) return; + + SWSS_LOG_NOTICE("Generate fabric port stats"); + + vector portNamePortCounterMap; + for (auto p : m_fabricLanePortMap) + { + int lane = p.first; + sai_object_id_t port = p.second; + + std::ostringstream portName; + portName << FABRIC_PORT_PREFIX << lane; + portNamePortCounterMap.emplace_back(portName.str(), sai_serialize_object_id(port)); + + // Install flex counters for port stats + std::unordered_set counter_stats; + for (const auto& it: port_stat_ids) + { + counter_stats.emplace(sai_serialize_port_stat(it)); + } + port_stat_manager.setCounterIdList(port, CounterType::PORT, counter_stats); + } + m_portNamePortCounterTable->set("", portNamePortCounterMap); } void FabricPortsOrch::generateQueueStats() { + if (!m_fabricQueueStatEnabled) return; if (m_isQueueStatsGenerated) return; if (!m_getFabricPortListDone) return; - // FIX_ME: Similar to generatePortStats(), generateQueueStats() installs - // flex counters for queue stats on fabric ports for fabric asics and voq asics. - // However, currently BCM SAI doesn't fully support queue stats query. - // Query on queue type and index is not supported for fabric asics while - // voq asics are not completely supported. - // So, this function will just do nothing for now, and we will readd - // code to install queue stats counters when BCM completely supports. + SWSS_LOG_NOTICE("Generate queue map for fabric ports"); + + sai_status_t status; + sai_attribute_t attr; + + for (auto p : m_fabricLanePortMap) + { + int lane = p.first; + sai_object_id_t port = p.second; + + // Each serdes has some pipes (queues) for unicast and multicast. + // But normally fabric serdes uses only one pipe. + attr.id = SAI_PORT_ATTR_QOS_NUMBER_OF_QUEUES; + status = sai_port_api->get_port_attribute(port, 1, &attr); + if (status != SAI_STATUS_SUCCESS) + { + throw runtime_error("FabricPortsOrch get port queue number failure"); + } + int num_queues = attr.value.u32; + + if (num_queues > 0) + { + vector m_queue_ids; + m_queue_ids.resize(num_queues); + + attr.id = SAI_PORT_ATTR_QOS_QUEUE_LIST; + attr.value.objlist.count = (uint32_t) num_queues; + attr.value.objlist.list = m_queue_ids.data(); + + status = sai_port_api->get_port_attribute(port, 1, &attr); + if (status != SAI_STATUS_SUCCESS) + { + throw runtime_error("FabricPortsOrch get port queue list failure"); + } + + // Maintain queue map and install flex counters for queue stats + vector portNameQueueMap; + + // Fabric serdes queue type is SAI_QUEUE_TYPE_FABRIC_TX. Since we always + // maintain only one queue for fabric serdes, m_queue_ids size is 1. + // And so, there is no need to query SAI_QUEUE_ATTR_TYPE and SAI_QUEUE_ATTR_INDEX + // for queue. Actually, SAI does not support query these attributes on fabric serdes. + int queueIndex = 0; + std::ostringstream portName; + portName << FABRIC_PORT_PREFIX << lane << ":" << queueIndex; + const auto queue = sai_serialize_object_id(m_queue_ids[queueIndex]); + portNameQueueMap.emplace_back(portName.str(), queue); + + // We collect queue counters like occupancy level + std::unordered_set counter_stats; + for (const auto& it: queue_stat_ids) + { + counter_stats.emplace(sai_serialize_queue_stat(it)); + } + queue_stat_manager.setCounterIdList(m_queue_ids[queueIndex], CounterType::QUEUE, counter_stats); + + m_portNameQueueCounterTable->set("", portNameQueueMap); + } + } m_isQueueStatsGenerated = true; } @@ -199,7 +268,7 @@ void FabricPortsOrch::updateFabricPortState() int lane = p.first; sai_object_id_t port = p.second; - string key = "PORT" + to_string(lane); + string key = FABRIC_PORT_PREFIX + to_string(lane); std::vector values; uint32_t remote_peer; uint32_t remote_port; diff --git a/orchagent/fabricportsorch.h b/orchagent/fabricportsorch.h index c641ee566d6d..de7ee7a7b0e9 100644 --- a/orchagent/fabricportsorch.h +++ b/orchagent/fabricportsorch.h @@ -12,18 +12,22 @@ class FabricPortsOrch : public Orch, public Subject { public: - FabricPortsOrch(DBConnector *appl_db, vector &tableNames); + FabricPortsOrch(DBConnector *appl_db, vector &tableNames, + bool fabricPortStatEnabled=true, bool fabricQueueStatEnabled=true); bool allPortsReady(); void generateQueueStats(); private: + bool m_fabricPortStatEnabled; + bool m_fabricQueueStatEnabled; + shared_ptr m_state_db; shared_ptr m_counter_db; shared_ptr m_flex_db; unique_ptr
m_stateTable; - unique_ptr
m_laneQueueCounterTable; - unique_ptr
m_lanePortCounterTable; + unique_ptr
m_portNameQueueCounterTable; + unique_ptr
m_portNamePortCounterTable; unique_ptr m_flexCounterTable; swss::SelectableTimer *m_timer = nullptr; diff --git a/orchagent/main.cpp b/orchagent/main.cpp index 2e140d5893b2..8d204dbf2d52 100644 --- a/orchagent/main.cpp +++ b/orchagent/main.cpp @@ -708,6 +708,9 @@ int main(int argc, char **argv) if (gMySwitchType == "voq") { orchDaemon->setFabricEnabled(true); + // SAI doesn't fully support counters for non fabric asics + orchDaemon->setFabricPortStatEnabled(false); + orchDaemon->setFabricQueueStatEnabled(false); } } else diff --git a/orchagent/orchdaemon.cpp b/orchagent/orchdaemon.cpp index cd6c94c213b3..d1b418882cfc 100644 --- a/orchagent/orchdaemon.cpp +++ b/orchagent/orchdaemon.cpp @@ -459,7 +459,7 @@ bool OrchDaemon::init() vector fabric_port_tables = { // empty for now }; - gFabricPortsOrch = new FabricPortsOrch(m_applDb, fabric_port_tables); + gFabricPortsOrch = new FabricPortsOrch(m_applDb, fabric_port_tables, m_fabricPortStatEnabled, m_fabricQueueStatEnabled); m_orchList.push_back(gFabricPortsOrch); } diff --git a/orchagent/orchdaemon.h b/orchagent/orchdaemon.h index 998e72335a2e..84bcd627b381 100644 --- a/orchagent/orchdaemon.h +++ b/orchagent/orchdaemon.h @@ -69,6 +69,14 @@ class OrchDaemon { m_fabricEnabled = enabled; } + void setFabricPortStatEnabled(bool enabled) + { + m_fabricPortStatEnabled = enabled; + } + void setFabricQueueStatEnabled(bool enabled) + { + m_fabricQueueStatEnabled = enabled; + } void logRotate(); private: DBConnector *m_applDb; @@ -77,6 +85,8 @@ class OrchDaemon DBConnector *m_chassisAppDb; bool m_fabricEnabled = false; + bool m_fabricPortStatEnabled = true; + bool m_fabricQueueStatEnabled = true; std::vector m_orchList; Select *m_select; diff --git a/tests/conftest.py b/tests/conftest.py index 9a7abb1f0699..b53f5dbcc5ca 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -100,11 +100,12 @@ def random_string(size=4, chars=string.ascii_uppercase + string.digits): class AsicDbValidator(DVSDatabase): - def __init__(self, db_id: int, connector: str): + def __init__(self, db_id: int, connector: str, switch_type: str): DVSDatabase.__init__(self, db_id, connector) - self._wait_for_asic_db_to_initialize() - self._populate_default_asic_db_values() - self._generate_oid_to_interface_mapping() + if switch_type not in ['fabric']: + self._wait_for_asic_db_to_initialize() + self._populate_default_asic_db_values() + self._generate_oid_to_interface_mapping() def _wait_for_asic_db_to_initialize(self) -> None: """Wait up to 30 seconds for the default fields to appear in ASIC DB.""" @@ -497,7 +498,9 @@ def _polling_function(): wait_for_result(_polling_function, service_polling_config) def init_asic_db_validator(self) -> None: - self.asicdb = AsicDbValidator(self.ASIC_DB_ID, self.redis_sock) + self.get_config_db() + metadata = self.config_db.get_entry('DEVICE_METADATA|localhost', '') + self.asicdb = AsicDbValidator(self.ASIC_DB_ID, self.redis_sock, metadata.get("switch_type")) def init_appl_db_validator(self) -> None: self.appldb = ApplDbValidator(self.APPL_DB_ID, self.redis_sock) @@ -526,11 +529,13 @@ def _polling_function(): port_table_keys = app_db.get_keys("PORT_TABLE") return ("PortInitDone" in port_table_keys and "PortConfigDone" in port_table_keys, None) - wait_for_result(_polling_function, startup_polling_config) + if metadata.get('switch_type') not in ['fabric']: + wait_for_result(_polling_function, startup_polling_config) # Verify that all ports have been created - asic_db = self.get_asic_db() - asic_db.wait_for_n_keys("ASIC_STATE:SAI_OBJECT_TYPE_PORT", num_ports + 1) # +1 CPU Port + if metadata.get('switch_type') not in ['fabric']: + asic_db = self.get_asic_db() + asic_db.wait_for_n_keys("ASIC_STATE:SAI_OBJECT_TYPE_PORT", num_ports + 1) # +1 CPU Port # Verify that fabric ports are monitored in STATE_DB if metadata.get('switch_type', 'npu') in ['voq', 'fabric']: @@ -1802,6 +1807,25 @@ def dvs(request, manage_dvs) -> DockerVirtualSwitch: return manage_dvs(log_path, dvs_env) +@pytest.yield_fixture(scope="module") +def vst(request): + vctns = request.config.getoption("--vctns") + topo = request.config.getoption("--topo") + forcedvs = request.config.getoption("--forcedvs") + keeptb = request.config.getoption("--keeptb") + imgname = request.config.getoption("--imgname") + max_cpu = request.config.getoption("--max_cpu") + log_path = vctns if vctns else request.module.__name__ + dvs_env = getattr(request.module, "DVS_ENV", []) + if not topo: + # use ecmp topology as default + topo = "virtual_chassis/chassis_supervisor.json" + vct = DockerVirtualChassisTopology(vctns, imgname, keeptb, dvs_env, log_path, max_cpu, + forcedvs, topo) + yield vct + vct.get_logs(request.module.__name__) + vct.destroy() + @pytest.fixture(scope="module") def vct(request): vctns = request.config.getoption("--vctns") diff --git a/tests/test_fabric.py b/tests/test_fabric.py new file mode 100644 index 000000000000..2d1ea8c29302 --- /dev/null +++ b/tests/test_fabric.py @@ -0,0 +1,83 @@ +from swsscommon import swsscommon +from dvslib.dvs_database import DVSDatabase +import ast +import json + +# Fabric counters +NUMBER_OF_RETRIES = 10 + +counter_group_meta = { + 'fabric_port_counter': { + 'key': 'FABRIC_PORT', + 'group_name': 'FABRIC_PORT_STAT_COUNTER', + 'name_map': 'COUNTERS_FABRIC_PORT_NAME_MAP', + 'post_test': 'post_port_counter_test', + }, + 'fabric_queue_counter': { + 'key': 'FABRIC_QUEUE', + 'group_name': 'FABRIC_QUEUE_STAT_COUNTER', + 'name_map': 'COUNTERS_FABRIC_QUEUE_NAME_MAP', + }, +} + +class TestVirtualChassis(object): + + def wait_for_id_list(self, flex_db, stat, name, oid): + for retry in range(NUMBER_OF_RETRIES): + id_list = flex_db.db_connection.hgetall("FLEX_COUNTER_TABLE:" + stat + ":" + oid).items() + if len(id_list) > 0: + return + else: + time.sleep(1) + + assert False, "No ID list for counter " + str(name) + + def verify_flex_counters_populated(self, flex_db, counters_db, map, stat): + counters_keys = counters_db.db_connection.hgetall(map) + for counter_entry in counters_keys.items(): + name = counter_entry[0] + oid = counter_entry[1] + self.wait_for_id_list(flex_db, stat, name, oid) + + def test_voq_switch(self, vst): + """Test VOQ switch objects configuration. + + This test validates configuration of switch creation objects required for + VOQ switches. The switch_type, max_cores and switch_id attributes configuration + are verified. For the System port config list, it is verified that all the + configured system ports are avaiable in the asic db by checking the count. + """ + + if vst is None: + return + + dvss = vst.dvss + for name in dvss.keys(): + dvs = dvss[name] + # Get the config info + config_db = dvs.get_config_db() + metatbl = config_db.get_entry("DEVICE_METADATA", "localhost") + + cfg_switch_type = metatbl.get("switch_type") + if cfg_switch_type == "fabric": + flex_db = dvs.get_flex_db() + counters_db = dvs.get_counters_db() + for ct in counter_group_meta.keys(): + meta_data = counter_group_meta[ct] + counter_key = meta_data['key'] + counter_stat = meta_data['group_name'] + counter_map = meta_data['name_map'] + self.verify_flex_counters_populated(flex_db, counters_db, counter_map, counter_stat) + + port_counters_keys = counters_db.db_connection.hgetall(meta_data['name_map']) + port_counters_stat_keys = flex_db.get_keys("FLEX_COUNTER_TABLE:" + meta_data['group_name']) + for port_stat in port_counters_stat_keys: + assert port_stat in dict(port_counters_keys.items()).values(), "Non port created on PORT_STAT_COUNTER group: {}".format(port_stat) + else: + print( "We do not check switch type:", cfg_switch_type ) + +# Add Dummy always-pass test at end as workaroud +# for issue when Flaky fail on final test it invokes module tear-down before retrying +def test_nonflaky_dummy(): + pass + diff --git a/tests/virtual_chassis/8/default_config.json b/tests/virtual_chassis/8/default_config.json new file mode 100644 index 000000000000..523ab8e450dc --- /dev/null +++ b/tests/virtual_chassis/8/default_config.json @@ -0,0 +1,13 @@ +{ + "DEVICE_METADATA": { + "localhost": { + "hostname": "supervisor", + "chassis_db_address" : "10.8.1.200", + "inband_address" : "10.8.1.200/24", + "switch_type": "fabric", + "sub_role" : "BackEnd", + "start_chassis_db" : "1", + "comment" : "default_config for a vs that runs chassis_db" + } + } +} diff --git a/tests/virtual_chassis/chassis_supervisor.json b/tests/virtual_chassis/chassis_supervisor.json new file mode 100644 index 000000000000..373b44f25712 --- /dev/null +++ b/tests/virtual_chassis/chassis_supervisor.json @@ -0,0 +1,5 @@ +{ + "VIRTUAL_TOPOLOGY": { + "chassis_instances" : [ "8", "1", "2", "3" ] + } +}