Skip to content

Commit

Permalink
Addition of DPU Chassis for thermalctld (#564)
Browse files Browse the repository at this point in the history
  • Loading branch information
gpunathilell authored Nov 27, 2024
1 parent b276e41 commit 0431fa3
Show file tree
Hide file tree
Showing 3 changed files with 100 additions and 6 deletions.
15 changes: 9 additions & 6 deletions sonic-thermalctld/scripts/thermalctld
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ SYSLOG_IDENTIFIER = 'thermalctld'
NOT_AVAILABLE = 'N/A'
CHASSIS_INFO_KEY = 'chassis 1'
PHYSICAL_ENTITY_INFO_TABLE = 'PHYSICAL_ENTITY_INFO'
INVALID_SLOT = -1
INVALID_SLOT_OR_DPU = -1

ERR_UNKNOWN = 1

Expand Down Expand Up @@ -523,9 +523,11 @@ class TemperatureUpdater(logger.Logger):
self.all_thermals = set()

self.is_chassis_system = chassis.is_modular_chassis()
if self.is_chassis_system:
my_slot = try_get(chassis.get_my_slot, INVALID_SLOT)
if my_slot != INVALID_SLOT:
self.is_smartswitch_dpu = chassis.is_smartswitch() and chassis.is_dpu()
self.is_chassis_upd_required = self.is_chassis_system or self.is_smartswitch_dpu
if self.is_chassis_upd_required:
my_slot = try_get(chassis.get_my_slot if self.is_chassis_system else chassis.get_dpu_id, INVALID_SLOT_OR_DPU)
if my_slot != INVALID_SLOT_OR_DPU:
try:
# Modular chassis does not have to have table CHASSIS_STATE_DB.
# So catch the exception here and ignore it.
Expand All @@ -540,7 +542,7 @@ class TemperatureUpdater(logger.Logger):
table_keys = self.table.getKeys()
for tk in table_keys:
self.table._del(tk)
if self.is_chassis_system and self.chassis_table is not None:
if self.is_chassis_upd_required and self.chassis_table is not None:
self.chassis_table._del(tk)
if self.phy_entity_table:
phy_entity_keys = self.phy_entity_table.getKeys()
Expand Down Expand Up @@ -593,6 +595,7 @@ class TemperatureUpdater(logger.Logger):
available_thermals.add((thermal, parent_name, thermal_index))
self._refresh_temperature_status(parent_name, thermal, thermal_index)

# As there are no modules present in DPU, this IF condition is not updated to consider DPU chassis
if self.is_chassis_system:
for module_index, module in enumerate(self.chassis.get_all_modules()):
module_name = try_get(module.get_name, 'Module {}'.format(module_index + 1))
Expand Down Expand Up @@ -702,7 +705,7 @@ class TemperatureUpdater(logger.Logger):
])

self.table.set(name, fvs)
if self.is_chassis_system and self.chassis_table is not None:
if self.is_chassis_upd_required and self.chassis_table is not None:
self.chassis_table.set(name, fvs)
except Exception as e:
self.log_warning('Failed to update thermal status for {} - {}'.format(name, repr(e)))
Expand Down
23 changes: 23 additions & 0 deletions sonic-thermalctld/tests/mock_platform.py
Original file line number Diff line number Diff line change
Expand Up @@ -335,7 +335,10 @@ def __init__(self):
self._replaceable = False

self._is_chassis_system = False
self._is_dpu = False
self._is_smartswitch = False
self._my_slot = module_base.ModuleBase.MODULE_INVALID_SLOT
self._dpu_id = None
self._thermal_manager = MockThermalManager()

def make_absent_fan(self):
Expand Down Expand Up @@ -445,6 +448,26 @@ def get_position_in_parent(self):
def is_replaceable(self):
return self._replaceable

def is_dpu(self):
return self._is_dpu

def is_smartswitch(self):
return self._is_smartswitch

def set_smartswitch(self, is_true):
self._is_smartswitch = is_true

def set_dpu(self, is_true):
self._is_dpu = is_true

def set_dpu_id(self, dpu_id):
self._dpu_id = dpu_id

def get_dpu_id(self):
# The default behaviour is Not implemented Error
if not self._dpu_id:
raise NotImplementedError
return self._dpu_id

class MockModule(module_base.ModuleBase):
def __init__(self):
Expand Down
68 changes: 68 additions & 0 deletions sonic-thermalctld/tests/test_thermalctld.py
Original file line number Diff line number Diff line change
Expand Up @@ -508,6 +508,74 @@ def test_update_module_thermals(self):
assert len(temperature_updater.all_thermals) == 0


# DPU chassis-related tests
def test_dpu_chassis_thermals():
chassis = MockChassis()
# Modular chassis (Not a dpu chassis) No Change in TemperatureUpdater Behaviour
chassis.set_modular_chassis(True)
chassis.set_my_slot(1)
temperature_updater = thermalctld.TemperatureUpdater(chassis, multiprocessing.Event())
assert temperature_updater.chassis_table
# DPU chassis TemperatureUpdater without is_smartswitch False return - No update to CHASSIS_STATE_DB
chassis.set_modular_chassis(False)
chassis.set_dpu(True)
temperature_updater = thermalctld.TemperatureUpdater(chassis, multiprocessing.Event())
assert not temperature_updater.chassis_table
# DPU chassis TemperatureUpdater without get_dpu_id implmenetation- No update to CHASSIS_STATE_DB
chassis.set_smartswitch(True)
temperature_updater = thermalctld.TemperatureUpdater(chassis, multiprocessing.Event())
assert not temperature_updater.chassis_table
# DPU chassis TemperatureUpdater with get_dpu_id implemented - Update data to CHASSIS_STATE_DB
dpu_id = 1
chassis.set_dpu_id(dpu_id)
temperature_updater = thermalctld.TemperatureUpdater(chassis, multiprocessing.Event())
assert temperature_updater.chassis_table
# Table name in chassis state db = TEMPERATURE_INFO_0 for dpu_id 0
assert temperature_updater.chassis_table.table_name == f"{TEMPER_INFO_TABLE_NAME}_{dpu_id}"
temperature_updater.table = Table("STATE_DB", "xtable")
temperature_updater.table._del = mock.MagicMock()


def test_dpu_chassis_state_deinit():
# Confirm that the chassis_table entries for DPU Chassis are removed on deletion
chassis = MockChassis()
chassis.set_smartswitch(True)
chassis.set_modular_chassis(False)
chassis.set_dpu(True)
chassis.set_dpu_id(1)
temperature_updater = thermalctld.TemperatureUpdater(chassis, multiprocessing.Event())
assert temperature_updater.chassis_table
temperature_updater.table = Table("STATE_DB", "xtable")
temperature_updater.phy_entity_table = None
temperature_updater.table.getKeys = mock.MagicMock(return_value=['key1', 'key2'])
temperature_updater.table._del = mock.MagicMock()
temperature_updater.chassis_table = Table("CHASSIS_STATE_DB", "ctable")
temperature_updater.chassis_table._del = mock.MagicMock()
temperature_updater.__del__()
assert temperature_updater.chassis_table._del.call_count == 2
expected_calls = [mock.call('key1'), mock.call('key2')]
temperature_updater.chassis_table._del.assert_has_calls(expected_calls, any_order=True)


def test_updater_dpu_thermal_check_chassis_table():
chassis = MockChassis()

thermal1 = MockThermal()
chassis.get_all_thermals().append(thermal1)

chassis.set_dpu(True)
chassis.set_smartswitch(True)
chassis.set_dpu_id(1)
temperature_updater = thermalctld.TemperatureUpdater(chassis, multiprocessing.Event())
temperature_updater.update()
assert temperature_updater.chassis_table.get_size() == chassis.get_num_thermals()

thermal2 = MockThermal()
chassis.get_all_thermals().append(thermal2)
temperature_updater.update()
assert temperature_updater.chassis_table.get_size() == chassis.get_num_thermals()


# Modular chassis-related tests


Expand Down

0 comments on commit 0431fa3

Please sign in to comment.