diff --git a/sonic-chassisd/scripts/chassisd b/sonic-chassisd/scripts/chassisd index 504829bf7..3e3b5d264 100755 --- a/sonic-chassisd/scripts/chassisd +++ b/sonic-chassisd/scripts/chassisd @@ -785,6 +785,21 @@ class SmartSwitchModuleUpdater(ModuleUpdater): except Exception as e: self.log_error(f"Unexpected error: {e}") + def get_dpu_midplane_state(self, key): + """ + Get DPU midplane-state from chassisStateDB using the given key. + """ + try: + # Connect to the CHASSIS_STATE_DB using daemon_base + if not self.chassis_state_db: + self.chassis_state_db = daemon_base.db_connect("CHASSIS_STATE_DB") + + # Fetch the dpu_midplane_link_state + return self.chassis_state_db.hget(key, "dpu_midplane_link_state") + + except Exception as e: + self.log_error(f"Unexpected error: {e}") + def _convert_to_dict(self, data): """ Converts SWIG proxy object or native dict to a Python dictionary. @@ -952,8 +967,6 @@ class SmartSwitchModuleUpdater(ModuleUpdater): module_key = try_get(module.get_name, default='MODULE {}'.format(index)) midplane_ip = try_get(module.get_midplane_ip, default=INVALID_IP) midplane_access = try_get(module.is_midplane_reachable, default=False) - dpu_admin_state = self.get_module_admin_status(module_key) - # Generate syslog for the loss of midplane connectivity when midplane connectivity # loss is detected for the first time current_midplane_state = 'False' @@ -965,20 +978,16 @@ class SmartSwitchModuleUpdater(ModuleUpdater): if midplane_access is False and current_midplane_state == 'True': self.log_warning("Unexpected: Module {} lost midplane connectivity".format(module_key)) - # Update midplane state DOWN in the chassisStateDB DPU_STATE table - key = "DPU_STATE|" + module_key - self.update_dpu_state(key, "DOWN") - elif midplane_access is True and current_midplane_state == 'False': self.log_notice("Module {} midplane connectivity is up".format(module_key)) - # Update midplane state UP in the chassisStateDB DPU_STATE table - key = "DPU_STATE|" + module_key + # Update midplane state in the chassisStateDB DPU_STATE table + key = "DPU_STATE|" + module_key + dpu_mp_state = self.get_dpu_midplane_state(key) + if midplane_access and dpu_mp_state != 'UP': self.update_dpu_state(key, "UP") - - elif midplane_access is False and current_midplane_state == 'False': - if self.is_module_reboot_system_up_expired(module_key): - self.log_warning("Unexpected: Module {} midplane connectivity is not restored in {} seconds".format(module_key, self.linecard_reboot_timeout)) + elif not midplane_access and dpu_mp_state != 'DOWN': + self.update_dpu_state(key, "DOWN") # Update db with midplane information fvs = swsscommon.FieldValuePairs([(CHASSIS_MIDPLANE_INFO_IP_FIELD, midplane_ip), @@ -1235,40 +1244,35 @@ class ChassisdDaemon(daemon_base.DaemonBase): threads = [] for module_index in range(0, self.module_updater.num_modules): op = None - # Get midplane state of DPU - module_info_dict = self.module_updater._get_module_info(module_index) + # Get operational state of DPU module_name = self.platform_chassis.get_module(module_index).get_name() - state_db = daemon_base.db_connect("STATE_DB") - midplane_table = swsscommon.Table(state_db, CHASSIS_MIDPLANE_INFO_TABLE) - fvs = midplane_table.get(module_name) - midplane_state = 'False' - if isinstance(fvs, list) and fvs[0] is True: - fvs = dict(fvs[-1]) - midplane_state = fvs[CHASSIS_MIDPLANE_INFO_ACCESS_FIELD] + operational_state = self.platform_chassis.get_module(module_index).get_oper_status() try: - if module_info_dict is not None: - # Get admin state of DPU - key = module_info_dict[CHASSIS_MODULE_INFO_NAME_FIELD] - admin_state = self.module_updater.get_module_admin_status(key) - if admin_state == 'up' and midplane_state == 'False': - # startup DPU - op = MODULE_ADMIN_UP - - elif admin_state != 'up' and midplane_state == 'True': - # shutdown DPU - op = MODULE_ADMIN_DOWN - - # Initialize DPU_STATE DB table on bootup - dpu_state_key = "DPU_STATE|" + module_name - self.module_updater.update_dpu_state(dpu_state_key, admin_state.upper()) - - if op is not None: - # Create and start a thread for the DPU logic - thread = threading.Thread(target=self.submit_dpu_callback, args=(module_index, op)) - thread.daemon = True # Set as a daemon thread - thread.start() - threads.append(thread) + # Get admin state of DPU + admin_state = self.module_updater.get_module_admin_status(module_name) + if admin_state == 'up' and operational_state != ModuleBase.MODULE_STATUS_ONLINE: + # startup DPU + op = MODULE_ADMIN_UP + + elif admin_state == 'down' and operational_state != ModuleBase.MODULE_STATUS_OFFLINE: + # shutdown DPU + op = MODULE_ADMIN_DOWN + + # Initialize DPU_STATE DB table on bootup + dpu_state_key = "DPU_STATE|" + module_name + if operational_state == ModuleBase.MODULE_STATUS_ONLINE: + op_state = 'UP' + else: + op_state = 'DOWN' + self.module_updater.update_dpu_state(dpu_state_key, op_state) + + if op is not None: + # Create and start a thread for the DPU logic + thread = threading.Thread(target=self.submit_dpu_callback, args=(module_index, op)) + thread.daemon = True # Set as a daemon thread + thread.start() + threads.append(thread) except Exception as e: self.log_error(f"Error in run: {str(e)}", exc_info=True)