Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Mellanox] Add new thermal sensors for SN5600 #12671

Merged
merged 2 commits into from
Nov 14, 2022
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,11 @@
}
},
'x86_64-nvidia_sn5600-r0': {
'thermal': {
"capability": {
"pch_temp": True
}
}
}
}

Expand Down Expand Up @@ -225,6 +230,11 @@ def get_gearbox_count(cls, sysfs_folder):
def get_cpu_thermal_count(cls):
return len(glob.glob('run/hw-management/thermal/cpu_core[!_]'))

@classmethod
@utils.read_only_cache()
def get_sodimm_thermal_count(cls):
return len(glob.glob('/run/hw-management/thermal/sodimm*_temp_input'))

@classmethod
@utils.read_only_cache()
def get_minimum_table(cls):
Expand Down
16 changes: 15 additions & 1 deletion platform/mellanox/mlnx-platform-api/sonic_platform/thermal.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,18 @@
"name": "Ambient Switch Board Temp",
"temperature": "swb_amb",
"default_present": False
},
{
"name": "PCH Temp",
"temperature": "pch_temp",
"default_present": False
},
{
"name": "SODIMM {} Temp",
"temperature": "sodimm{}_temp_input",
"high_threshold": "sodimm{}_temp_max",
"high_critical_threshold": "sodimm{}_temp_crit",
"type": "indexable",
}
],
'linecard thermals': {
Expand Down Expand Up @@ -161,6 +173,8 @@ def initialize_chassis_thermals():
count = DeviceDataManager.get_gearbox_count('/run/hw-management/config')
elif 'CPU Core' in rule['name']:
count = DeviceDataManager.get_cpu_thermal_count()
elif 'SODIMM' in rule['name']:
count = DeviceDataManager.get_sodimm_thermal_count()
if count == 0:
logger.log_debug('Failed to get thermal object count for {}'.format(rule['name']))
continue
Expand Down Expand Up @@ -524,7 +538,7 @@ def monitor_asic_themal_zone(cls):
else:
cls.expect_cooling_state = None


class RemovableThermal(Thermal):
def __init__(self, name, temp_file, high_th_file, high_crit_th_file, position, presence_cb):
super(RemovableThermal, self).__init__(name, temp_file, high_th_file, high_crit_th_file, position)
Expand Down
22 changes: 19 additions & 3 deletions platform/mellanox/mlnx-platform-api/tests/test_thermal.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ class TestThermal:
@mock.patch('os.path.exists', mock.MagicMock(return_value=True))
@mock.patch('sonic_platform.device_data.DeviceDataManager.get_gearbox_count', mock.MagicMock(return_value=2))
@mock.patch('sonic_platform.device_data.DeviceDataManager.get_cpu_thermal_count', mock.MagicMock(return_value=2))
@mock.patch('sonic_platform.device_data.DeviceDataManager.get_sodimm_thermal_count', mock.MagicMock(return_value=2))
@mock.patch('sonic_platform.device_data.DeviceDataManager.get_platform_name', mock.MagicMock(return_value='x86_64-mlnx_msn2700-r0'))
def test_chassis_thermal(self):
from sonic_platform.thermal import THERMAL_NAMING_RULE
Expand All @@ -48,6 +49,7 @@ def test_chassis_thermal(self):
thermal_dict = {thermal.get_name(): thermal for thermal in thermal_list}
gearbox_thermal_rule = None
cpu_thermal_rule = None
sodimm_thermal_rule = None
for rule in THERMAL_NAMING_RULE['chassis thermals']:
thermal_type = rule.get('type', 'single')
if thermal_type == 'single':
Expand All @@ -69,9 +71,12 @@ def test_chassis_thermal(self):
gearbox_thermal_rule = rule
elif 'CPU Core' in rule['name']:
cpu_thermal_rule = rule
elif 'SODIMM' in rule['name']:
sodimm_thermal_rule = rule

gearbox_thermal_count = 0
cpu_thermal_count = 0
sodimm_thermal_count = 0
for thermal in thermal_list:
if 'Gearbox' in thermal.get_name():
start_index = gearbox_thermal_rule.get('start_index', 1)
Expand All @@ -89,21 +94,32 @@ def test_chassis_thermal(self):
assert cpu_thermal_rule['high_threshold'].format(start_index) in thermal.high_threshold
assert cpu_thermal_rule['high_critical_threshold'].format(start_index) in thermal.high_critical_threshold
cpu_thermal_count += 1
elif 'SODIMM' in thermal.get_name():
start_index = sodimm_thermal_rule.get('start_index', 1)
start_index += sodimm_thermal_count
assert thermal.get_name() == sodimm_thermal_rule['name'].format(start_index)
assert sodimm_thermal_rule['temperature'].format(start_index) in thermal.temperature
assert sodimm_thermal_rule['high_threshold'].format(start_index) in thermal.high_threshold
assert sodimm_thermal_rule['high_critical_threshold'].format(start_index) in thermal.high_critical_threshold
sodimm_thermal_count += 1

assert gearbox_thermal_count == 2
assert cpu_thermal_count == 2
assert sodimm_thermal_count == 2

@mock.patch('sonic_platform.device_data.DeviceDataManager.get_platform_name', mock.MagicMock(return_value='x86_64-nvidia_sn2201-r0'))
@mock.patch('sonic_platform.device_data.DeviceDataManager.get_thermal_capability', mock.MagicMock(return_value={'comex_amb': False, 'cpu_amb': True, 'swb_amb': True}))
def test_chassis_thermal_includes(self):
@mock.patch('sonic_platform.device_data.DeviceDataManager.get_thermal_capability')
def test_chassis_thermal_includes(self, mock_capability):
from sonic_platform.thermal import THERMAL_NAMING_RULE
thermal_capability = {'comex_amb': False, 'cpu_amb': True, 'swb_amb': True}
mock_capability.return_value = thermal_capability
chassis = Chassis()
thermal_list = chassis.get_all_thermals()
assert thermal_list
thermal_dict = {thermal.get_name(): thermal for thermal in thermal_list}
for rule in THERMAL_NAMING_RULE['chassis thermals']:
default_present = rule.get('default_present', True)
if not default_present:
if not default_present and thermal_capability.get(rule['temperature']):
thermal_name = rule['name']
assert thermal_name in thermal_dict

Expand Down