Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support to set PSU fan speed #10

Closed
wants to merge 7 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion dockers/docker-platform-monitor/Dockerfile.j2
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@ RUN apt-get update && \
rrdtool \
python-smbus \
ethtool \
dmidecode
dmidecode \
i2c-tools

{% if docker_platform_monitor_debs.strip() -%}
# Copy locally-built Debian package dependencies
Expand Down
31 changes: 28 additions & 3 deletions platform/mellanox/mlnx-platform-api/sonic_platform/fan.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#############################################################################

import os.path
import subprocess

try:
from sonic_platform_base.fan_base import FanBase
Expand All @@ -22,6 +23,7 @@

FAN_PATH = "/var/run/hw-management/thermal/"
LED_PATH = "/var/run/hw-management/led/"
CONFIG_PATH = "/var/run/hw-management/config"
# fan_dir isn't supported on Spectrum 1. It is supported on Spectrum 2 and later switches
FAN_DIR = "/var/run/hw-management/system/fan_dir"
COOLING_STATE_PATH = "/var/run/hw-management/thermal/cooling_cur_state"
Expand All @@ -39,6 +41,9 @@ class Fan(FanBase):

STATUS_LED_COLOR_ORANGE = "orange"
min_cooling_level = 2
# PSU fan speed vector
PSU_FAN_SPEED = ['0x3c', '0x3c', '0x3c', '0x3c', '0x3c',
'0x3c', '0x3c', '0x46', '0x50', '0x5a', '0x64']

def __init__(self, has_fan_dir, fan_index, drawer_index = 1, psu_fan = False, sku = None):
# API index is starting from 0, Mellanox platform index is starting from 1
Expand All @@ -60,6 +65,10 @@ def __init__(self, has_fan_dir, fan_index, drawer_index = 1, psu_fan = False, sk
self.fan_presence_path = "psu{}_fan1_speed_get".format(self.index)
self._name = 'psu_{}_fan_{}'.format(self.index, 1)
self.fan_max_speed_path = None
self.psu_i2c_bus_path = os.path.join(CONFIG_PATH, 'psu{0}_i2c_bus'.format(self.index))
self.psu_i2c_addr_path = os.path.join(CONFIG_PATH, 'psu{0}_i2c_addr'.format(self.index))
self.psu_i2c_command_path = os.path.join(CONFIG_PATH, 'fan_command')

self.fan_status_path = "fan{}_fault".format(self.index)
self.fan_green_led_path = "led_fan{}_green".format(self.drawer_index)
self.fan_red_led_path = "led_fan{}_red".format(self.drawer_index)
Expand Down Expand Up @@ -239,9 +248,25 @@ def set_speed(self, speed):
status = True

if self.is_psu_fan:
#PSU fan speed is not setable.
return False

from .thermal import logger
try:
with open(self.psu_i2c_bus_path, 'r') as f:
bus = f.read().strip()
with open(self.psu_i2c_addr_path, 'r') as f:
addr = f.read().strip()
with open(self.psu_i2c_command_path, 'r') as f:
command = f.read().strip()
speed = Fan.PSU_FAN_SPEED[int(speed / 10)]
command = "i2cset -f -y {0} {1} {2} {3} wp".format(bus, addr, command, speed)
res = subprocess.check_call(command, shell = True)
return True
except subprocess.CalledProcessError as ce:
logger.log_error('Failed to call command {}, return code={}, command output={}'.format(ce.cmd, ce.returncode, ce.output))
return False
except Exception as e:
logger.log_error('Failed to set PSU FAN speed - {}'.format(e))
return False

try:
cooling_level = int(speed / 10)
if cooling_level < self.min_cooling_level:
Expand Down
4 changes: 2 additions & 2 deletions platform/mellanox/mlnx-platform-api/sonic_platform/thermal.py
Original file line number Diff line number Diff line change
Expand Up @@ -549,8 +549,8 @@ def get_air_flow_direction(cls):
port_ambient_path = join(HW_MGMT_THERMAL_ROOT, THERMAL_DEV_PORT_AMBIENT)

# if there is any exception, let it raise
fan_ambient_temp = int(cls._read_generic_file(fan_ambient_path))
port_ambient_temp = int(cls._read_generic_file(port_ambient_path))
fan_ambient_temp = int(cls._read_generic_file(fan_ambient_path, 0))
port_ambient_temp = int(cls._read_generic_file(port_ambient_path, 0))
if fan_ambient_temp > port_ambient_temp:
return 'p2c', fan_ambient_temp
elif fan_ambient_temp < port_ambient_temp:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from sonic_platform_base.sonic_thermal_control.thermal_action_base import ThermalPolicyActionBase
from sonic_platform_base.sonic_thermal_control.thermal_json_object import thermal_json_object
from .thermal import logger


class SetFanSpeedAction(ThermalPolicyActionBase):
Expand Down Expand Up @@ -52,6 +53,21 @@ def execute(self, thermal_info_dict):
fan_info_obj = thermal_info_dict[FanInfo.INFO_NAME]
for fan in fan_info_obj.get_presence_fans():
fan.set_speed(self.speed)
logger.log_info('Set all system FAN speed to {}'.format(self.speed))

SetAllFanSpeedAction.set_psu_fan_speed(thermal_info_dict, self.speed)

@classmethod
def set_psu_fan_speed(cls, thermal_info_dict, speed):
from .thermal_infos import ChassisInfo
if ChassisInfo.INFO_NAME in thermal_info_dict and isinstance(thermal_info_dict[ChassisInfo.INFO_NAME], ChassisInfo):
chassis = thermal_info_dict[ChassisInfo.INFO_NAME].get_chassis()
for psu in chassis.get_all_psus():
for psu_fan in psu.get_all_fans():
psu_fan.set_speed(speed)

logger.log_info('Updated PSU FAN speed to {}%'.format(speed))
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is the fan speed here percentage or RPM? if PRM shouldn't have a '%'?

Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is percentage.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I thought we don't konw the max speed of PSU fan?

Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we don't know the max speed, but we have a vector which contains 11 values, so if cooling level is 5, we use the 6th element to set the PSU FAN speed.




@thermal_json_object('fan.all.check_and_set_speed')
Expand Down Expand Up @@ -121,10 +137,18 @@ def execute(self, thermal_info_dict):
# save power
if Thermal.check_thermal_zone_temperature():
fan_info_obj = thermal_info_dict[FanInfo.INFO_NAME]
update_psu_fan_speed = False
speed = Fan.min_cooling_level * 10
for fan in fan_info_obj.get_presence_fans():
if fan.get_target_speed() != 100:
break
fan.set_speed(Fan.min_cooling_level * 10)
update_psu_fan_speed = True
fan.set_speed(speed)

if update_psu_fan_speed:
SetAllFanSpeedAction.set_psu_fan_speed(thermal_info_dict, speed)

logger.log_info('Changed thermal algorithm status to {}'.format(self.status))


class ChangeMinCoolingLevelAction(ThermalPolicyActionBase):
Expand All @@ -146,12 +170,21 @@ def execute(self, thermal_info_dict):

for key, cooling_level in minimum_table.items():
temp_range = key.split(':')
temp_min = int(temp_range[0]) * 1000
temp_max = int(temp_range[1]) * 1000
temp_min = int(temp_range[0])
temp_max = int(temp_range[1])
if temp_min <= temperature <= temp_max:
Fan.min_cooling_level = cooling_level - 10
break

current_cooling_level = Fan.get_cooling_level()
if current_cooling_level < Fan.min_cooling_level:
Fan.set_cooling_level(Fan.min_cooling_level)
SetAllFanSpeedAction.set_psu_fan_speed(thermal_info_dict, Fan.min_cooling_level * 10)

logger.log_info('Changed minimum cooling level to {}'.format(Fan.min_cooling_level))


class UpdatePsuFanSpeedAction(ThermalPolicyActionBase):
def execute(self, thermal_info_dict):
from .thermal_conditions import CoolingLevelChangeCondition
SetAllFanSpeedAction.set_psu_fan_speed(thermal_info_dict, CoolingLevelChangeCondition.cooling_level * 10)
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ def is_match(self, thermal_info_dict):

trust_state = Thermal.check_module_temperature_trustable()
air_flow_dir, temperature = Thermal.get_air_flow_direction()
temperature = temperature / 1000

change_cooling_level = False
if trust_state != MinCoolingLevelChangeCondition.trust_state:
Expand All @@ -101,3 +102,16 @@ def is_match(self, thermal_info_dict):
change_cooling_level = True

return change_cooling_level


class CoolingLevelChangeCondition(ThermalPolicyConditionBase):
cooling_level = None

def is_match(self, thermal_info_dict):
from .fan import Fan
current_cooling_level = Fan.get_cooling_level()
if current_cooling_level != CoolingLevelChangeCondition.cooling_level:
CoolingLevelChangeCondition.cooling_level = current_cooling_level
return True
else:
return False
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,12 @@ def stop_thermal_control_algorithm(cls):

@classmethod
def _add_private_thermal_policy(cls):
policy = ThermalPolicy()
policy.conditions[MinCoolingLevelChangeCondition] = MinCoolingLevelChangeCondition()
policy.actions[ChangeMinCoolingLevelAction] = ChangeMinCoolingLevelAction()
cls._policy_dict['DynamicMinCoolingLevelPolicy'] = policy
dynamic_min_speed_policy = ThermalPolicy()
dynamic_min_speed_policy.conditions[MinCoolingLevelChangeCondition] = MinCoolingLevelChangeCondition()
dynamic_min_speed_policy.actions[ChangeMinCoolingLevelAction] = ChangeMinCoolingLevelAction()
cls._policy_dict['DynamicMinCoolingLevelPolicy'] = dynamic_min_speed_policy

update_psu_fan_speed_policy = ThermalPolicy()
update_psu_fan_speed_policy.conditions[CoolingLevelChangeCondition] = CoolingLevelChangeCondition()
update_psu_fan_speed_policy.actions[UpdatePsuFanSpeedAction] = UpdatePsuFanSpeedAction()
cls._policy_dict['UpdatePsuFanSpeedPolicy'] = update_psu_fan_speed_policy
3 changes: 3 additions & 0 deletions platform/mellanox/mlnx-platform-api/tests/mock_platform.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@ def get_presence(self):
def get_powergood_status(self):
return self.powergood

def get_all_fans(self):
return []


class MockChassis:
def __init__(self):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -486,7 +486,7 @@ def test_dynamic_minimum_policy(thermal_manager):
assert condition.is_match(None)
assert MinCoolingLevelChangeCondition.trust_state == 'trust'
assert MinCoolingLevelChangeCondition.air_flow_dir == 'p2c'
assert MinCoolingLevelChangeCondition.temperature == 35000
assert MinCoolingLevelChangeCondition.temperature == 35
assert not condition.is_match(None)

Thermal.check_module_temperature_trustable = MagicMock(return_value='untrust')
Expand All @@ -499,7 +499,7 @@ def test_dynamic_minimum_policy(thermal_manager):

Thermal.get_air_flow_direction = MagicMock(return_value=('c2p', 25000))
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why here the temperature not changed from "25000" to "25"?

Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it wiil divide by 1000 in the condition.is_match

assert condition.is_match(None)
assert MinCoolingLevelChangeCondition.temperature == 25000
assert MinCoolingLevelChangeCondition.temperature == 25

chassis = MockChassis()
chassis.sku_name = 'invalid'
Expand Down