diff --git a/dockers/docker-platform-monitor/Dockerfile.j2 b/dockers/docker-platform-monitor/Dockerfile.j2 index fd11f628559c..1763eb9bac0b 100755 --- a/dockers/docker-platform-monitor/Dockerfile.j2 +++ b/dockers/docker-platform-monitor/Dockerfile.j2 @@ -18,7 +18,8 @@ RUN apt-get update && \ rrdtool \ python-smbus \ ethtool \ - dmidecode + dmidecode \ + i2c-tools {% if docker_platform_monitor_debs.strip() -%} # Copy locally-built Debian package dependencies diff --git a/platform/mellanox/mlnx-platform-api/sonic_platform/fan.py b/platform/mellanox/mlnx-platform-api/sonic_platform/fan.py index 14bb873ee8ef..347807e43f9a 100644 --- a/platform/mellanox/mlnx-platform-api/sonic_platform/fan.py +++ b/platform/mellanox/mlnx-platform-api/sonic_platform/fan.py @@ -9,6 +9,7 @@ ############################################################################# import os.path +import subprocess try: from sonic_platform_base.fan_base import FanBase @@ -22,6 +23,7 @@ FAN_PATH = "/var/run/hw-management/thermal/" LED_PATH = "/var/run/hw-management/led/" +CONFIG_PATH = "/var/run/hw-management/config" # fan_dir isn't supported on Spectrum 1. It is supported on Spectrum 2 and later switches FAN_DIR = "/var/run/hw-management/system/fan_dir" COOLING_STATE_PATH = "/var/run/hw-management/thermal/cooling_cur_state" @@ -39,6 +41,9 @@ class Fan(FanBase): STATUS_LED_COLOR_ORANGE = "orange" min_cooling_level = 2 + # PSU fan speed vector + PSU_FAN_SPEED = ['0x3c', '0x3c', '0x3c', '0x3c', '0x3c', + '0x3c', '0x3c', '0x46', '0x50', '0x5a', '0x64'] def __init__(self, has_fan_dir, fan_index, drawer_index = 1, psu_fan = False, sku = None): # API index is starting from 0, Mellanox platform index is starting from 1 @@ -60,6 +65,10 @@ def __init__(self, has_fan_dir, fan_index, drawer_index = 1, psu_fan = False, sk self.fan_presence_path = "psu{}_fan1_speed_get".format(self.index) self._name = 'psu_{}_fan_{}'.format(self.index, 1) self.fan_max_speed_path = None + self.psu_i2c_bus_path = os.path.join(CONFIG_PATH, 'psu{0}_i2c_bus'.format(self.index)) + self.psu_i2c_addr_path = os.path.join(CONFIG_PATH, 'psu{0}_i2c_addr'.format(self.index)) + self.psu_i2c_command_path = os.path.join(CONFIG_PATH, 'fan_command') + self.fan_status_path = "fan{}_fault".format(self.index) self.fan_green_led_path = "led_fan{}_green".format(self.drawer_index) self.fan_red_led_path = "led_fan{}_red".format(self.drawer_index) @@ -239,9 +248,25 @@ def set_speed(self, speed): status = True if self.is_psu_fan: - #PSU fan speed is not setable. - return False - + from .thermal import logger + try: + with open(self.psu_i2c_bus_path, 'r') as f: + bus = f.read().strip() + with open(self.psu_i2c_addr_path, 'r') as f: + addr = f.read().strip() + with open(self.psu_i2c_command_path, 'r') as f: + command = f.read().strip() + speed = Fan.PSU_FAN_SPEED[int(speed / 10)] + command = "i2cset -f -y {0} {1} {2} {3} wp".format(bus, addr, command, speed) + res = subprocess.check_call(command, shell = True) + return True + except subprocess.CalledProcessError as ce: + logger.log_error('Failed to call command {}, return code={}, command output={}'.format(ce.cmd, ce.returncode, ce.output)) + return False + except Exception as e: + logger.log_error('Failed to set PSU FAN speed - {}'.format(e)) + return False + try: cooling_level = int(speed / 10) if cooling_level < self.min_cooling_level: diff --git a/platform/mellanox/mlnx-platform-api/sonic_platform/thermal.py b/platform/mellanox/mlnx-platform-api/sonic_platform/thermal.py index a9a83daccd7c..66ff58f7ae07 100644 --- a/platform/mellanox/mlnx-platform-api/sonic_platform/thermal.py +++ b/platform/mellanox/mlnx-platform-api/sonic_platform/thermal.py @@ -549,8 +549,8 @@ def get_air_flow_direction(cls): port_ambient_path = join(HW_MGMT_THERMAL_ROOT, THERMAL_DEV_PORT_AMBIENT) # if there is any exception, let it raise - fan_ambient_temp = int(cls._read_generic_file(fan_ambient_path)) - port_ambient_temp = int(cls._read_generic_file(port_ambient_path)) + fan_ambient_temp = int(cls._read_generic_file(fan_ambient_path, 0)) + port_ambient_temp = int(cls._read_generic_file(port_ambient_path, 0)) if fan_ambient_temp > port_ambient_temp: return 'p2c', fan_ambient_temp elif fan_ambient_temp < port_ambient_temp: diff --git a/platform/mellanox/mlnx-platform-api/sonic_platform/thermal_actions.py b/platform/mellanox/mlnx-platform-api/sonic_platform/thermal_actions.py index 4bde98fcdc5b..dbb2e2e54059 100644 --- a/platform/mellanox/mlnx-platform-api/sonic_platform/thermal_actions.py +++ b/platform/mellanox/mlnx-platform-api/sonic_platform/thermal_actions.py @@ -1,5 +1,6 @@ from sonic_platform_base.sonic_thermal_control.thermal_action_base import ThermalPolicyActionBase from sonic_platform_base.sonic_thermal_control.thermal_json_object import thermal_json_object +from .thermal import logger class SetFanSpeedAction(ThermalPolicyActionBase): @@ -52,6 +53,21 @@ def execute(self, thermal_info_dict): fan_info_obj = thermal_info_dict[FanInfo.INFO_NAME] for fan in fan_info_obj.get_presence_fans(): fan.set_speed(self.speed) + logger.log_info('Set all system FAN speed to {}'.format(self.speed)) + + SetAllFanSpeedAction.set_psu_fan_speed(thermal_info_dict, self.speed) + + @classmethod + def set_psu_fan_speed(cls, thermal_info_dict, speed): + from .thermal_infos import ChassisInfo + if ChassisInfo.INFO_NAME in thermal_info_dict and isinstance(thermal_info_dict[ChassisInfo.INFO_NAME], ChassisInfo): + chassis = thermal_info_dict[ChassisInfo.INFO_NAME].get_chassis() + for psu in chassis.get_all_psus(): + for psu_fan in psu.get_all_fans(): + psu_fan.set_speed(speed) + + logger.log_info('Updated PSU FAN speed to {}%'.format(speed)) + @thermal_json_object('fan.all.check_and_set_speed') @@ -121,10 +137,18 @@ def execute(self, thermal_info_dict): # save power if Thermal.check_thermal_zone_temperature(): fan_info_obj = thermal_info_dict[FanInfo.INFO_NAME] + update_psu_fan_speed = False + speed = Fan.min_cooling_level * 10 for fan in fan_info_obj.get_presence_fans(): if fan.get_target_speed() != 100: break - fan.set_speed(Fan.min_cooling_level * 10) + update_psu_fan_speed = True + fan.set_speed(speed) + + if update_psu_fan_speed: + SetAllFanSpeedAction.set_psu_fan_speed(thermal_info_dict, speed) + + logger.log_info('Changed thermal algorithm status to {}'.format(self.status)) class ChangeMinCoolingLevelAction(ThermalPolicyActionBase): @@ -146,8 +170,8 @@ def execute(self, thermal_info_dict): for key, cooling_level in minimum_table.items(): temp_range = key.split(':') - temp_min = int(temp_range[0]) * 1000 - temp_max = int(temp_range[1]) * 1000 + temp_min = int(temp_range[0]) + temp_max = int(temp_range[1]) if temp_min <= temperature <= temp_max: Fan.min_cooling_level = cooling_level - 10 break @@ -155,3 +179,12 @@ def execute(self, thermal_info_dict): current_cooling_level = Fan.get_cooling_level() if current_cooling_level < Fan.min_cooling_level: Fan.set_cooling_level(Fan.min_cooling_level) + SetAllFanSpeedAction.set_psu_fan_speed(thermal_info_dict, Fan.min_cooling_level * 10) + + logger.log_info('Changed minimum cooling level to {}'.format(Fan.min_cooling_level)) + + +class UpdatePsuFanSpeedAction(ThermalPolicyActionBase): + def execute(self, thermal_info_dict): + from .thermal_conditions import CoolingLevelChangeCondition + SetAllFanSpeedAction.set_psu_fan_speed(thermal_info_dict, CoolingLevelChangeCondition.cooling_level * 10) diff --git a/platform/mellanox/mlnx-platform-api/sonic_platform/thermal_conditions.py b/platform/mellanox/mlnx-platform-api/sonic_platform/thermal_conditions.py index 8593b0f32881..5d8a5821ad42 100644 --- a/platform/mellanox/mlnx-platform-api/sonic_platform/thermal_conditions.py +++ b/platform/mellanox/mlnx-platform-api/sonic_platform/thermal_conditions.py @@ -86,6 +86,7 @@ def is_match(self, thermal_info_dict): trust_state = Thermal.check_module_temperature_trustable() air_flow_dir, temperature = Thermal.get_air_flow_direction() + temperature = temperature / 1000 change_cooling_level = False if trust_state != MinCoolingLevelChangeCondition.trust_state: @@ -101,3 +102,16 @@ def is_match(self, thermal_info_dict): change_cooling_level = True return change_cooling_level + + +class CoolingLevelChangeCondition(ThermalPolicyConditionBase): + cooling_level = None + + def is_match(self, thermal_info_dict): + from .fan import Fan + current_cooling_level = Fan.get_cooling_level() + if current_cooling_level != CoolingLevelChangeCondition.cooling_level: + CoolingLevelChangeCondition.cooling_level = current_cooling_level + return True + else: + return False diff --git a/platform/mellanox/mlnx-platform-api/sonic_platform/thermal_manager.py b/platform/mellanox/mlnx-platform-api/sonic_platform/thermal_manager.py index efcfb154717f..d70a5bcd1a03 100644 --- a/platform/mellanox/mlnx-platform-api/sonic_platform/thermal_manager.py +++ b/platform/mellanox/mlnx-platform-api/sonic_platform/thermal_manager.py @@ -40,7 +40,12 @@ def stop_thermal_control_algorithm(cls): @classmethod def _add_private_thermal_policy(cls): - policy = ThermalPolicy() - policy.conditions[MinCoolingLevelChangeCondition] = MinCoolingLevelChangeCondition() - policy.actions[ChangeMinCoolingLevelAction] = ChangeMinCoolingLevelAction() - cls._policy_dict['DynamicMinCoolingLevelPolicy'] = policy + dynamic_min_speed_policy = ThermalPolicy() + dynamic_min_speed_policy.conditions[MinCoolingLevelChangeCondition] = MinCoolingLevelChangeCondition() + dynamic_min_speed_policy.actions[ChangeMinCoolingLevelAction] = ChangeMinCoolingLevelAction() + cls._policy_dict['DynamicMinCoolingLevelPolicy'] = dynamic_min_speed_policy + + update_psu_fan_speed_policy = ThermalPolicy() + update_psu_fan_speed_policy.conditions[CoolingLevelChangeCondition] = CoolingLevelChangeCondition() + update_psu_fan_speed_policy.actions[UpdatePsuFanSpeedAction] = UpdatePsuFanSpeedAction() + cls._policy_dict['UpdatePsuFanSpeedPolicy'] = update_psu_fan_speed_policy diff --git a/platform/mellanox/mlnx-platform-api/tests/mock_platform.py b/platform/mellanox/mlnx-platform-api/tests/mock_platform.py index ff8adb66ac91..c53480584889 100644 --- a/platform/mellanox/mlnx-platform-api/tests/mock_platform.py +++ b/platform/mellanox/mlnx-platform-api/tests/mock_platform.py @@ -28,6 +28,9 @@ def get_presence(self): def get_powergood_status(self): return self.powergood + def get_all_fans(self): + return [] + class MockChassis: def __init__(self): diff --git a/platform/mellanox/mlnx-platform-api/tests/test_thermal_policy.py b/platform/mellanox/mlnx-platform-api/tests/test_thermal_policy.py index a89cd75c6e3b..789e316bb540 100644 --- a/platform/mellanox/mlnx-platform-api/tests/test_thermal_policy.py +++ b/platform/mellanox/mlnx-platform-api/tests/test_thermal_policy.py @@ -486,7 +486,7 @@ def test_dynamic_minimum_policy(thermal_manager): assert condition.is_match(None) assert MinCoolingLevelChangeCondition.trust_state == 'trust' assert MinCoolingLevelChangeCondition.air_flow_dir == 'p2c' - assert MinCoolingLevelChangeCondition.temperature == 35000 + assert MinCoolingLevelChangeCondition.temperature == 35 assert not condition.is_match(None) Thermal.check_module_temperature_trustable = MagicMock(return_value='untrust') @@ -499,7 +499,7 @@ def test_dynamic_minimum_policy(thermal_manager): Thermal.get_air_flow_direction = MagicMock(return_value=('c2p', 25000)) assert condition.is_match(None) - assert MinCoolingLevelChangeCondition.temperature == 25000 + assert MinCoolingLevelChangeCondition.temperature == 25 chassis = MockChassis() chassis.sku_name = 'invalid'