Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add dynamic minimum fan speed support #9

Closed
wants to merge 21 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
45f1229
[thermal control] Fix pmon docker stop issue on 3800
Junchao-Mellanox Feb 18, 2020
23bf172
[thermal fix] Fix QA test issue
Junchao-Mellanox Feb 21, 2020
a1aaa93
Fix thermal control issues
stephenxs Dec 4, 2019
7f341f9
[thermal fix] change psu._get_power_available_status to psu.get_power…
Junchao-Mellanox Feb 25, 2020
3206e95
[thermal fix] adjust log for PSU absence and power absence
Junchao-Mellanox Feb 28, 2020
4e689ac
[thermal fix] add unit test for loading thermal policy file with dupl…
Junchao-Mellanox Feb 28, 2020
0b45c45
[thermal] fix fan.get_presence for non-removable SKU
Junchao-Mellanox Mar 10, 2020
bdfc652
[thermal fix] fix issue: fan direction is based on drawer
Junchao-Mellanox Mar 10, 2020
80e0b88
Fix issue: when fan is not present, should not read fan direction fro…
Junchao-Mellanox Mar 10, 2020
5b46d6f
[thermal fix] add unit test for get_direction for absent FAN
Junchao-Mellanox Mar 11, 2020
2d42cd3
Unplugable PSU has no FAN, no need add a FAN object for this PSU
Junchao-Mellanox Mar 16, 2020
6f80098
1. Enable thermal alogrithm by default; 2. set cooling level before s…
Junchao-Mellanox Mar 18, 2020
d891200
start thermal algorithm should also check thermal zone temperature
Junchao-Mellanox Mar 18, 2020
71c3665
Should write string to file
Junchao-Mellanox Mar 19, 2020
f409195
We should force enable or disable thermal algo when thermal control d…
Junchao-Mellanox Mar 19, 2020
12a9d8f
Change thermal algorithm status should also change thermal zone policy
Junchao-Mellanox Mar 19, 2020
527679a
Merge remote-tracking branch 'origin/master' into thermal-algo-fix
Junchao-Mellanox Mar 26, 2020
766aff0
Add fan speed dynamic minimum value
Junchao-Mellanox Mar 26, 2020
1bcec63
Add unit test for DynamicMinCoolingLevelPolicy
Junchao-Mellanox Mar 26, 2020
c6ed366
If current cooling state below minimum cooling state, set it to minim…
Junchao-Mellanox Mar 27, 2020
5541d71
Fix review comment by Kebo
Junchao-Mellanox Apr 7, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 24 additions & 3 deletions device/mellanox/x86_64-mlnx_msn2700-r0/thermal_policy.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"thermal_control_algorithm": {
"run_at_boot_up": "false",
"run_at_boot_up": "true",
"fan_speed_when_suspend": "60"
},
"info_types": [
Expand Down Expand Up @@ -51,6 +51,24 @@
}
]
},
{
"name": "any fan broken",
"conditions": [
{
"type": "fan.any.fault"
}
],
"actions": [
{
"type": "thermal_control.control",
"status": "false"
},
{
"type": "fan.all.set_speed",
"speed": "100"
}
]
},
{
"name": "all fan and psu presence",
"conditions": [
Expand All @@ -59,12 +77,15 @@
},
{
"type": "psu.all.presence"
},
{
"type": "fan.all.good"
}
],
"actions": [
{
"type": "fan.all.set_speed",
"speed": "60"
"type": "thermal_control.control",
"status": "true"
}
]
}
Expand Down
134 changes: 134 additions & 0 deletions platform/mellanox/mlnx-platform-api/sonic_platform/device_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
DEVICE_DATA = {
'ACS-MSN2700': {
'thermal': {
'minimum_table': {
"p2c_trust": {"-127:40":13, "41:120":15},
"p2c_untrust": {"-127:25":13, "26:30":14 , "31:35":15, "36:120":16},
"c2p_trust": {"-127:20":13, "21:25":14 , "26:30":15, "31:120":16},
"c2p_untrust": {"-127:20":13, "21:25":14 , "26:30":15, "31:120":16},
"unk_trust": {"-127:20":13, "21:25":14 , "26:30":15, "31:120":16},
"unk_untrust": {"-127:20":13, "21:25":14 , "26:30":15, "31:120":16}
}
}
},
'LS-SN2700': {
'thermal': {
'minimum_table': {
"p2c_trust": {"-127:40":13, "41:120":15},
"p2c_untrust": {"-127:25":13, "26:30":14 , "31:35":15, "36:120":16},
"c2p_trust": {"-127:20":13, "21:25":14 , "26:30":15, "31:120":16},
"c2p_untrust": {"-127:20":13, "21:25":14 , "26:30":15, "31:120":16},
"unk_trust": {"-127:20":13, "21:25":14 , "26:30":15, "31:120":16},
"unk_untrust": {"-127:20":13, "21:25":14 , "26:30":15, "31:120":16}
}
}
},
'ACS-MSN2740': {
'thermal': {
'minimum_table': {
"p2c_trust": {"-127:120":13},
"p2c_untrust": {"-127:35":13, "36:40":14 , "41:120":15},
"c2p_trust": {"-127:120":13},
"c2p_untrust": {"-127:15":13, "16:30":14 , "31:35":15, "36:120":17},
"unk_trust": {"-127:120":13},
"unk_untrust": {"-127:15":13, "16:30":14 , "31:35":15, "36:120":17},
}
}
},
'ACS-MSN2410': {
'thermal': {
'minimum_table': {
"p2c_trust": {"-127:40":13, "41:120":15},
"p2c_untrust": {"-127:25":13, "26:30":14 , "31:35":15, "36:120":16},
"c2p_trust": {"-127:20":13, "21:25":14 , "26:30":15, "31:120":16},
"c2p_untrust": {"-127:20":13, "21:25":14 , "26:30":15, "31:120":16},
"unk_trust": {"-127:20":13, "21:25":14 , "26:30":15, "31:120":16},
"unk_untrust": {"-127:20":13, "21:25":14 , "26:30":15, "31:120":16}
}
}
},
'Mellanox-SN2700': {
'thermal': {
'minimum_table': {
"p2c_trust": {"-127:40":13, "41:120":15},
"p2c_untrust": {"-127:25":13, "26:30":14 , "31:35":15, "36:120":16},
"c2p_trust": {"-127:20":13, "21:25":14 , "26:30":15, "31:120":16},
"c2p_untrust": {"-127:20":13, "21:25":14 , "26:30":15, "31:120":16},
"unk_trust": {"-127:20":13, "21:25":14 , "26:30":15, "31:120":16},
"unk_untrust": {"-127:20":13, "21:25":14 , "26:30":15, "31:120":16}
}
}
},
'Mellanox-SN2700-D48C8': {
'thermal': {
'minimum_table': {
"p2c_trust": {"-127:40":13, "41:120":15},
"p2c_untrust": {"-127:25":13, "26:30":14 , "31:35":15, "36:120":16},
"c2p_trust": {"-127:20":13, "21:25":14 , "26:30":15, "31:120":16},
"c2p_untrust": {"-127:20":13, "21:25":14 , "26:30":15, "31:120":16},
"unk_trust": {"-127:20":13, "21:25":14 , "26:30":15, "31:120":16},
"unk_untrust": {"-127:20":13, "21:25":14 , "26:30":15, "31:120":16}
}
}
},
'ACS-MSN2100': {
'thermal': {
'minimum_table': {
"p2c_trust": {"-127:120":12},
"p2c_untrust": {"-127:15":12, "16:25":13, "26:30":14, "31:35":15, "36:120":16},
"c2p_trust": {"-127:40":12, "41:120":13},
"c2p_untrust": {"-127:40":12, "41:120":13},
"unk_trust": {"-127:40":12, "41:120":13},
"unk_untrust": {"-127:15":12, "16:25":13, "26:30":14, "31:35":15, "36:120":16}
}
}
},
'ACS-MSN2010': {
'thermal': {
'minimum_table': {
"p2c_trust": {"-127:120":12},
"p2c_untrust": {"-127:15":12, "16:20":13, "21:30":14, "31:35":15, "36:120":16},
"c2p_trust": {"-127:120":12},
"c2p_untrust": {"-127:20":12, "21:25":13 , "26:30":14, "31:35":15, "36:120":16},
"unk_trust": {"-127:120":12},
"unk_untrust": {"-127:15":12, "16:20":13 , "21:30":14, "31:35":15, "36:120":16}
}
}
},
'ACS-MSN3700': {
'thermal': {
'minimum_table': {
"p2c_trust": {"-127:25":12, "26:40":13 , "41:120":14},
"p2c_untrust": {"-127:15":12, "16:30":13 , "31:35":14, "36:40":15, "41:120":16},
"c2p_trust": {"-127:25":12, "26:40":13 , "41:120":14},
"c2p_untrust": {"-127:25":12, "26:40":13 , "41:120":14},
"unk_trust": {"-127:25":12, "26:40":13 , "41:120":14},
"unk_untrust": {"-127:15":12, "16:30":13 , "31:35":14, "36:40":15, "41:120":16},
}
}
},
'ACS-MSN3800': {
'thermal': {
'minimum_table': {
"p2c_trust": {"-127:35":12, "36:120":13},
"p2c_untrust": {"-127:0":12, "1:10":13 , "11:15":14, "16:20":15, "21:35":16, "36:120":17},
"c2p_trust": {"-127:30":12, "31:40":13 , "41:120":14},
"c2p_untrust": {"-127:20":12, "21:30":13 , "31:35":14, "36:40":15, "41:120":16},
"unk_trust": {"-127:30":12, "31:40":13 , "41:120":14},
"unk_untrust": {"-127:0":12, "1:10":13 , "11:15":14, "16:20":15, "21:35":16, "36:120":17},
}
}
},
'Mellanox-SN3800-D112C8': {
'thermal': {
'minimum_table': {
"p2c_trust": {"-127:35":12, "36:120":13},
"p2c_untrust": {"-127:0":12, "1:10":13 , "11:15":14, "16:20":15, "21:35":16, "36:120":17},
"c2p_trust": {"-127:30":12, "31:40":13 , "41:120":14},
"c2p_untrust": {"-127:20":12, "21:30":13 , "31:35":14, "36:40":15, "41:120":16},
"unk_trust": {"-127:30":12, "31:40":13 , "41:120":14},
"unk_untrust": {"-127:0":12, "1:10":13 , "11:15":14, "16:20":15, "21:35":16, "36:120":17},
}
}
},
}
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I know we don't have the table for 4700 yet, what's the behavior on 4700 w/o this table?

Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

minimum FAN speed of 4700 is 60%

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we should follow up when can we have the table for 4700.

Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

sure

43 changes: 42 additions & 1 deletion platform/mellanox/mlnx-platform-api/sonic_platform/fan.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,15 +24,18 @@
LED_PATH = "/var/run/hw-management/led/"
# fan_dir isn't supported on Spectrum 1. It is supported on Spectrum 2 and later switches
FAN_DIR = "/var/run/hw-management/system/fan_dir"
COOLING_STATE_PATH = "/var/run/hw-management/thermal/cooling_cur_state"

# SKUs with unplugable FANs:
# 1. don't have fanX_status and should be treated as always present
hwsku_dict_with_unplugable_fan = ['ACS-MSN2010', 'ACS-MSN2100']


class Fan(FanBase):
"""Platform-specific Fan class"""

STATUS_LED_COLOR_ORANGE = "orange"
min_cooling_level = 2

def __init__(self, has_fan_dir, fan_index, drawer_index = 1, psu_fan = False, sku = None):
# API index is starting from 0, Mellanox platform index is starting from 1
Expand Down Expand Up @@ -231,13 +234,18 @@ def set_speed(self, speed):
bool: True if set success, False if fail.
"""
status = True
pwm = int(round(PWM_MAX*speed/100.0))

if self.is_psu_fan:
#PSU fan speed is not setable.
return False

try:
cooling_level = int(speed / 10)
if cooling_level < self.min_cooling_level:
cooling_level = self.min_cooling_level
speed = self.min_cooling_level * 10
self.set_cooling_level(cooling_level)
pwm = int(round(PWM_MAX*speed/100.0))
with open(os.path.join(FAN_PATH, self.fan_speed_set_path), 'w') as fan_pwm:
fan_pwm.write(str(pwm))
except (ValueError, IOError):
Expand Down Expand Up @@ -352,3 +360,36 @@ def get_speed_tolerance(self):
"""
# The tolerance value is fixed as 20% for all the Mellanox platform
return 20

@classmethod
def set_cooling_level(cls, level):
"""
Change cooling level. The input level should be an integer value [1, 10].
1 means 10%, 2 means 20%, 10 means 100%.
"""
if not isinstance(level, int):
raise RuntimeError("Failed to set cooling level, input parameter must be integer")

if level < 1 or level > 10:
raise RuntimeError("Failed to set cooling level, level value must be in range [1, 10], got {}".format(level))

try:
# reset FAN driver and change cooling state
with open(COOLING_STATE_PATH, 'w') as cooling_state:
cooling_state.write(str(level + 10))

# make cooling state display correct value
with open(COOLING_STATE_PATH, 'w') as cooling_state:
cooling_state.write(str(level))
except (ValueError, IOError) as e:
raise RuntimeError("Failed to set cooling level - {}".format(e))

@classmethod
def get_cooling_level(cls):
try:
with open(COOLING_STATE_PATH, 'r') as cooling_state:
cooling_level = int(cooling_state.read())
return cooling_level
except (ValueError, IOError) as e:
raise RuntimeError("Failed to get cooling level - {}".format(e))

Loading