diff --git a/platform/mellanox/mlnx-platform-api/sonic_platform/chassis.py b/platform/mellanox/mlnx-platform-api/sonic_platform/chassis.py index 1167f56368db..88ec916a33ea 100644 --- a/platform/mellanox/mlnx-platform-api/sonic_platform/chassis.py +++ b/platform/mellanox/mlnx-platform-api/sonic_platform/chassis.py @@ -571,18 +571,22 @@ def get_change_event(self, timeout=0): wait_for_ever = (timeout == 0) port_dict = {} + error_dict = {} if wait_for_ever: timeout = MAX_SELECT_DELAY while True: - status = self.sfp_event.check_sfp_status(port_dict, timeout) + status = self.sfp_event.check_sfp_status(port_dict, error_dict, timeout) if bool(port_dict): break else: - status = self.sfp_event.check_sfp_status(port_dict, timeout) + status = self.sfp_event.check_sfp_status(port_dict, error_dict, timeout) if status: self.reinit_sfps(port_dict) - return True, {'sfp':port_dict} + result_dict = {'sfp':port_dict} + if error_dict: + result_dict['sfp_error'] = error_dict + return True, result_dict else: return True, {'sfp':{}} diff --git a/platform/mellanox/mlnx-platform-api/sonic_platform/sfp.py b/platform/mellanox/mlnx-platform-api/sonic_platform/sfp.py index 76a83a4472c9..f8f8bb402ced 100644 --- a/platform/mellanox/mlnx-platform-api/sonic_platform/sfp.py +++ b/platform/mellanox/mlnx-platform-api/sonic_platform/sfp.py @@ -10,6 +10,7 @@ try: import subprocess + import os from sonic_platform_base.sfp_base import SfpBase from sonic_platform_base.sonic_eeprom import eeprom_dts from sonic_platform_base.sonic_sfp.sff8472 import sff8472InterfaceId @@ -35,6 +36,18 @@ except ImportError as e: pass +try: + if os.environ["PLATFORM_API_UNIT_TESTING"] == "1": + # Unable to import SDK constants under unit test + # Define them here + SX_PORT_MODULE_STATUS_INITIALIZING = 0 + SX_PORT_MODULE_STATUS_PLUGGED = 1 + SX_PORT_MODULE_STATUS_UNPLUGGED = 2 + SX_PORT_MODULE_STATUS_PLUGGED_WITH_ERROR = 3 + SX_PORT_MODULE_STATUS_PLUGGED_DISABLED = 4 +except KeyError: + pass + # definitions of the offset and width for values in XCVR info eeprom XCVR_INTFACE_BULK_OFFSET = 0 XCVR_INTFACE_BULK_WIDTH_QSFP = 20 @@ -330,6 +343,18 @@ def __exit__(self, exc_type, exc_val, exc_tb): class SFP(SfpBase): """Platform-specific SFP class""" + SFP_MLNX_ERROR_DESCRIPTION_LONGRANGE_NON_MLNX_CABLE = 'Long range for non-Mellanox cable or module' + SFP_MLNX_ERROR_DESCRIPTION_ENFORCE_PART_NUMBER_LIST = 'Enforce part number list' + SFP_MLNX_ERROR_DESCRIPTION_PMD_TYPE_NOT_ENABLED = 'PMD type not enabled' + SFP_MLNX_ERROR_DESCRIPTION_PCIE_POWER_SLOT_EXCEEDED = 'PCIE system power slot exceeded' + SFP_MLNX_ERROR_DESCRIPTION_RESERVED = 'Reserved' + + SFP_MLNX_ERROR_BIT_LONGRANGE_NON_MLNX_CABLE = 0x00010000 + SFP_MLNX_ERROR_BIT_ENFORCE_PART_NUMBER_LIST = 0x00020000 + SFP_MLNX_ERROR_BIT_PMD_TYPE_NOT_ENABLED = 0x00040000 + SFP_MLNX_ERROR_BIT_PCIE_POWER_SLOT_EXCEEDED = 0x00080000 + SFP_MLNX_ERROR_BIT_RESERVED = 0x80000000 + def __init__(self, sfp_index, sfp_type, sdk_handle_getter, platform): SfpBase.__init__(self) self.index = sfp_index + 1 @@ -388,7 +413,7 @@ def get_presence(self): # Read out any bytes from any offset def _read_eeprom_specific_bytes(self, offset, num_bytes): eeprom_raw = [] - ethtool_cmd = "ethtool -m sfp{} hex on offset {} length {}".format(self.index, offset, num_bytes) + ethtool_cmd = "ethtool -m sfp{} hex on offset {} length {} 2>/dev/null".format(self.index, offset, num_bytes) try: output = subprocess.check_output(ethtool_cmd, shell=True, @@ -2158,3 +2183,68 @@ def is_replaceable(self): bool: True if it is replaceable. """ return True + + def _get_error_code(self): + """ + Get error code of the SFP module + + Returns: + The error code fetch from SDK API + """ + module_id_info_list = new_sx_mgmt_module_id_info_t_arr(1) + module_info_list = new_sx_mgmt_phy_module_info_t_arr(1) + + module_id_info = sx_mgmt_module_id_info_t() + module_id_info.slot_id = 0 + module_id_info.module_id = self.sdk_index + sx_mgmt_module_id_info_t_arr_setitem(module_id_info_list, 0, module_id_info) + + rc = sx_mgmt_phy_module_info_get(self.sdk_handle, module_id_info_list, 1, module_info_list) + assert SX_STATUS_SUCCESS == rc, "sx_mgmt_phy_module_info_get failed, error code {}".format(rc) + + mod_info = sx_mgmt_phy_module_info_t_arr_getitem(module_info_list, 0) + return mod_info.module_state.oper_state, mod_info.module_state.error_type + + @classmethod + def _get_error_description_dict(cls): + return {0: cls.SFP_ERROR_DESCRIPTION_POWER_BUDGET_EXCEEDED, + 1: cls.SFP_MLNX_ERROR_DESCRIPTION_LONGRANGE_NON_MLNX_CABLE, + 2: cls.SFP_ERROR_DESCRIPTION_I2C_STUCK, + 3: cls.SFP_ERROR_DESCRIPTION_BAD_EEPROM, + 4: cls.SFP_MLNX_ERROR_DESCRIPTION_ENFORCE_PART_NUMBER_LIST, + 5: cls.SFP_ERROR_DESCRIPTION_UNSUPPORTED_CABLE, + 6: cls.SFP_ERROR_DESCRIPTION_HIGH_TEMP, + 7: cls.SFP_ERROR_DESCRIPTION_BAD_CABLE, + 8: cls.SFP_MLNX_ERROR_DESCRIPTION_PMD_TYPE_NOT_ENABLED, + 12: cls.SFP_MLNX_ERROR_DESCRIPTION_PCIE_POWER_SLOT_EXCEEDED, + 255: cls.SFP_MLNX_ERROR_DESCRIPTION_RESERVED + } + + def get_error_description(self): + """ + Get error description + + Args: + error_code: The error code returned by _get_error_code + + Returns: + The error description + """ + oper_status, error_code = self._get_error_code() + if oper_status == SX_PORT_MODULE_STATUS_INITIALIZING: + error_description = self.SFP_STATUS_INITIALIZING + elif oper_status == SX_PORT_MODULE_STATUS_PLUGGED: + error_description = self.SFP_STATUS_OK + elif oper_status == SX_PORT_MODULE_STATUS_UNPLUGGED: + error_description = self.SFP_STATUS_UNPLUGGED + elif oper_status == SX_PORT_MODULE_STATUS_PLUGGED_DISABLED: + error_description = self.SFP_STATUS_DISABLED + elif oper_status == SX_PORT_MODULE_STATUS_PLUGGED_WITH_ERROR: + error_description_dict = self._get_error_description_dict() + if error_code in error_description_dict: + error_description = error_description_dict[error_code] + else: + error_description = "Unknown error ({})".format(error_code) + else: + error_description = "Unknow SFP module status ({})".format(oper_status) + return error_description diff --git a/platform/mellanox/mlnx-platform-api/sonic_platform/sfp_event.py b/platform/mellanox/mlnx-platform-api/sonic_platform/sfp_event.py index 2164f4e18cf2..289a210482f1 100644 --- a/platform/mellanox/mlnx-platform-api/sonic_platform/sfp_event.py +++ b/platform/mellanox/mlnx-platform-api/sonic_platform/sfp_event.py @@ -17,6 +17,7 @@ class MockSxFd(object): new_sx_fd_t_p = MagicMock(return_value=MockSxFd()) new_sx_user_channel_t_p = MagicMock() from sonic_py_common.logger import Logger +from .sfp import SFP # SFP status from PMAOS register # 0x1 plug in @@ -30,16 +31,6 @@ class MockSxFd(object): SDK_SFP_STATE_ERR = 0x3 SDK_SFP_STATE_DIS = 0x4 -# SFP status that will be handled by XCVRD -STATUS_PLUGIN = '1' # 00000001 -STATUS_PLUGOUT = '0' # 00000000 -# SFP error status always come with STATUS_PLUGIN, so the last bit is always 1 -STATUS_ERR_I2C_STUCK = '3' # 00000011 -STATUS_ERR_BAD_EEPROM = '5' # 00000101 -STATUS_ERR_UNSUPPORTED_CABLE = '9' # 00001001 -STATUS_ERR_HIGH_TEMP = '17' # 00010001 -STATUS_ERR_BAD_CABLE = '33' # 00100001 - # SFP status used in this file only, will not expose to XCVRD # STATUS_ERROR will be mapped to different status according to the error code STATUS_UNKNOWN = '-1' @@ -69,19 +60,39 @@ class MockSxFd(object): ''' # SFP errors that will block eeprom accessing -sdk_sfp_err_type_dict = { - 0x2: STATUS_ERR_I2C_STUCK, - 0x3: STATUS_ERR_BAD_EEPROM, - 0x5: STATUS_ERR_UNSUPPORTED_CABLE, - 0x6: STATUS_ERR_HIGH_TEMP, - 0x7: STATUS_ERR_BAD_CABLE +SDK_SFP_BLOCKING_ERRORS = [ + 0x2, # SFP.SFP_ERROR_BIT_I2C_STUCK, + 0x3, # SFP.SFP_ERROR_BIT_BAD_EEPROM, + 0x5, # SFP.SFP_ERROR_BIT_UNSUPPORTED_CABLE, + 0x6, # SFP.SFP_ERROR_BIT_HIGH_TEMP, + 0x7, # SFP.SFP_ERROR_BIT_BAD_CABLE +] + +SDK_ERRORS_TO_ERROR_BITS = { + 0x0: SFP.SFP_ERROR_BIT_POWER_BUDGET_EXCEEDED, + 0x1: SFP.SFP_MLNX_ERROR_BIT_LONGRANGE_NON_MLNX_CABLE, + 0x2: SFP.SFP_ERROR_BIT_I2C_STUCK, + 0x3: SFP.SFP_ERROR_BIT_BAD_EEPROM, + 0x4: SFP.SFP_MLNX_ERROR_BIT_ENFORCE_PART_NUMBER_LIST, + 0x5: SFP.SFP_ERROR_BIT_UNSUPPORTED_CABLE, + 0x6: SFP.SFP_ERROR_BIT_HIGH_TEMP, + 0x7: SFP.SFP_ERROR_BIT_BAD_CABLE, + 0x8: SFP.SFP_MLNX_ERROR_BIT_PMD_TYPE_NOT_ENABLED, + 0xc: SFP.SFP_MLNX_ERROR_BIT_PCIE_POWER_SLOT_EXCEEDED +} + +SDK_ERRORS_TO_DESCRIPTION = { + 0x1: SFP.SFP_MLNX_ERROR_DESCRIPTION_LONGRANGE_NON_MLNX_CABLE, + 0x4: SFP.SFP_MLNX_ERROR_DESCRIPTION_ENFORCE_PART_NUMBER_LIST, + 0x8: SFP.SFP_MLNX_ERROR_DESCRIPTION_PMD_TYPE_NOT_ENABLED, + 0xc: SFP.SFP_MLNX_ERROR_DESCRIPTION_PCIE_POWER_SLOT_EXCEEDED } sfp_value_status_dict = { - SDK_SFP_STATE_IN: STATUS_PLUGIN, - SDK_SFP_STATE_OUT: STATUS_PLUGOUT, + SDK_SFP_STATE_IN: str(SFP.SFP_STATUS_BIT_INSERTED), + SDK_SFP_STATE_OUT: str(SFP.SFP_STATUS_BIT_REMOVED), SDK_SFP_STATE_ERR: STATUS_ERROR, - SDK_SFP_STATE_DIS: STATUS_PLUGOUT, + SDK_SFP_STATE_DIS: str(SFP.SFP_STATUS_BIT_REMOVED), } # system level event/error @@ -204,7 +215,7 @@ def deinitialize(self): delete_sx_fd_t_p(self.rx_fd_p) delete_sx_user_channel_t_p(self.user_channel_p) - def check_sfp_status(self, port_change, timeout): + def check_sfp_status(self, port_change, error_dict, timeout): """ the meaning of timeout is aligned with select.select, which has the following meaning: 0: poll, returns without blocked @@ -242,6 +253,7 @@ def check_sfp_status(self, port_change, timeout): break sfp_state = sfp_value_status_dict.get(module_state, STATUS_UNKNOWN) + error_description = None if sfp_state == STATUS_UNKNOWN: # in the following sequence, STATUS_UNKNOWN can be returned. # so we shouldn't raise exception here. @@ -256,18 +268,29 @@ def check_sfp_status(self, port_change, timeout): # If get SFP status error(0x3) from SDK, then need to read the error_type to get the detailed error if sfp_state == STATUS_ERROR: - if error_type in sdk_sfp_err_type_dict.keys(): - # In SFP at error status case, need to overwrite the sfp_state with the exact error code - sfp_state = sdk_sfp_err_type_dict[error_type] - else: - # For errors don't block the eeprom accessing, we don't report it to XCVRD - logger.log_info("SFP error on port but not blocking eeprom read, error_type {}".format(error_type)) - found +=1 + sfp_state_bits = SDK_ERRORS_TO_ERROR_BITS.get(error_type) + if sfp_state_bits is None: + logger.log_error("Unrecognized error {} detected on ports {}".format(error_type, port_list)) + found += 1 continue + if error_type in SDK_SFP_BLOCKING_ERRORS: + # In SFP at error status case, need to overwrite the sfp_state with the exact error code + sfp_state_bits |= SFP.SFP_ERROR_BIT_BLOCKING + + # An error should be always set along with 'INSERTED' + sfp_state_bits |= SFP.SFP_STATUS_BIT_INSERTED + + # For vendor specific errors, the description should be returned as well + error_description = SDK_ERRORS_TO_DESCRIPTION.get(error_type) + + sfp_state = str(sfp_state_bits) + for port in port_list: logger.log_info("SFP on port {} state {}".format(port, sfp_state)) port_change[port+1] = sfp_state + if error_description: + error_dict[port+1] = error_description found += 1 return found != 0 diff --git a/platform/mellanox/mlnx-platform-api/tests/test_sfp.py b/platform/mellanox/mlnx-platform-api/tests/test_sfp.py index 0c24eb83354e..405a48a77b79 100644 --- a/platform/mellanox/mlnx-platform-api/tests/test_sfp.py +++ b/platform/mellanox/mlnx-platform-api/tests/test_sfp.py @@ -8,8 +8,11 @@ modules_path = os.path.dirname(test_path) sys.path.insert(0, modules_path) +os.environ["PLATFORM_API_UNIT_TESTING"] = "1" + from sonic_py_common import device_info -from sonic_platform.sfp import SFP +from sonic_platform.sfp import SFP, SX_PORT_MODULE_STATUS_INITIALIZING, SX_PORT_MODULE_STATUS_PLUGGED, SX_PORT_MODULE_STATUS_UNPLUGGED, SX_PORT_MODULE_STATUS_PLUGGED_WITH_ERROR, SX_PORT_MODULE_STATUS_PLUGGED_DISABLED + from sonic_platform.chassis import Chassis @@ -26,8 +29,14 @@ def mock_get_sdk_handle(self): self.sdk_handle = 1 return self.sdk_handle + +def mock_get_sfp_error_code(self): + return self.oper_code, self.error_code + + device_info.get_platform = mock_get_platform SFP._read_eeprom_specific_bytes = mock_read_eeprom_specific_bytes +SFP._get_error_code = mock_get_sfp_error_code Chassis.get_sdk_handle = mock_get_sdk_handle @@ -82,3 +91,35 @@ def test_sfp_full_initialize_without_partial(): # Verify when get_sfp is called, the SFP modules won't be initialized again sfp1 = allsfp[0] assert sfp1 == chassis.get_sfp(1) + + +def test_sfp_get_error_status(): + chassis = Chassis() + + # Fetch an SFP module to test + sfp = chassis.get_sfp(1) + + description_dict = sfp._get_error_description_dict() + + sfp.oper_code = SX_PORT_MODULE_STATUS_PLUGGED_WITH_ERROR + for error in description_dict.keys(): + sfp.error_code = error + description = sfp.get_error_description() + + assert description == description_dict[sfp.error_code] + + sfp.error_code = -1 + description = sfp.get_error_description() + assert description == "Unknown error (-1)" + + expected_description_list = [ + (SX_PORT_MODULE_STATUS_INITIALIZING, "Initializing"), + (SX_PORT_MODULE_STATUS_PLUGGED, "OK"), + (SX_PORT_MODULE_STATUS_UNPLUGGED, "Unplugged"), + (SX_PORT_MODULE_STATUS_PLUGGED_DISABLED, "Disabled") + ] + for oper_code, expected_description in expected_description_list: + sfp.oper_code = oper_code + description = sfp.get_error_description() + + assert description == expected_description diff --git a/platform/mellanox/mlnx-platform-api/tests/test_sfp_event.py b/platform/mellanox/mlnx-platform-api/tests/test_sfp_event.py index a21d2d5d8318..3edcc362a5e6 100644 --- a/platform/mellanox/mlnx-platform-api/tests/test_sfp_event.py +++ b/platform/mellanox/mlnx-platform-api/tests/test_sfp_event.py @@ -8,6 +8,8 @@ modules_path = os.path.dirname(test_path) sys.path.insert(0, modules_path) +from sonic_platform_base.sfp_base import SfpBase + class TestSfpEvent(object): @classmethod def setup_class(cls): @@ -16,21 +18,29 @@ def setup_class(cls): def test_check_sfp_status(self): from sonic_platform.sfp_event import SDK_SFP_STATE_IN, SDK_SFP_STATE_OUT, SDK_SFP_STATE_ERR - from sonic_platform.sfp_event import STATUS_PLUGIN, STATUS_PLUGOUT - from sonic_platform.sfp_event import sdk_sfp_err_type_dict + from sonic_platform.sfp_event import SDK_ERRORS_TO_ERROR_BITS, SDK_ERRORS_TO_DESCRIPTION, SDK_SFP_BLOCKING_ERRORS - self.executor(SDK_SFP_STATE_IN, None, STATUS_PLUGIN) - self.executor(SDK_SFP_STATE_OUT, None, STATUS_PLUGOUT) - for error_type, error_status in sdk_sfp_err_type_dict.items(): - self.executor(SDK_SFP_STATE_ERR, error_type, error_status) + self.executor(SDK_SFP_STATE_IN, None, SfpBase.SFP_STATUS_BIT_INSERTED) + self.executor(SDK_SFP_STATE_OUT, None, SfpBase.SFP_STATUS_BIT_REMOVED) + for error_type, error_status in SDK_ERRORS_TO_ERROR_BITS.items(): + description = SDK_ERRORS_TO_DESCRIPTION.get(error_type) + if error_type in SDK_SFP_BLOCKING_ERRORS: + error_status |= SfpBase.SFP_ERROR_BIT_BLOCKING + error_status |= SfpBase.SFP_STATUS_BIT_INSERTED + self.executor(SDK_SFP_STATE_ERR, error_type, error_status, description) - def executor(self, mock_module_state, mock_error_type, expect_status): + def executor(self, mock_module_state, mock_error_type, expect_status, description=None): from sonic_platform.sfp_event import sfp_event event = sfp_event() event.on_pmpe = MagicMock(return_value=(True, [0,1], mock_module_state, mock_error_type)) port_change = {} - found = event.check_sfp_status(port_change, 0) + error_dict = {} + found = event.check_sfp_status(port_change, error_dict, 0) assert found - assert 1 in port_change and port_change[1] == expect_status - assert 2 in port_change and port_change[2] == expect_status + expect_status_str = str(expect_status) + assert 1 in port_change and port_change[1] == expect_status_str + assert 2 in port_change and port_change[2] == expect_status_str + if description: + assert 1 in error_dict and error_dict[1] == description + assert 2 in error_dict and error_dict[2] == description