Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[DELL] S6100, Z9100 Last Reboot Reason Watchdog, Thermal Support #3479

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions files/image_config/platform/rc.local
Original file line number Diff line number Diff line change
Expand Up @@ -238,6 +238,13 @@ if [ -f $FIRST_BOOT_FILE ]; then
# Notify firstboot to Platform, to use it for reboot-cause
touch /tmp/notify_firstboot_to_platform

# create reboot-cause/platform/$platform/ directory
# /host/reboot-cause/platform/reboot-reason
# will be used to track last reboot reason
if [ ! -d /host/reboot-cause/platform ]; then
mkdir -p /host/reboot-cause/platform
fi

if [ -d /host/image-$SONIC_VERSION/platform/$platform ]; then
dpkg -i /host/image-$SONIC_VERSION/platform/$platform/*.deb
fi
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@ s6100/scripts/s6100_platform.sh usr/local/bin
common/dell_i2c_utils.sh usr/local/bin
common/io_rd_wr.py usr/local/bin
s6100/scripts/platform_reboot_override usr/share/sonic/device/x86_64-dell_s6100_c2538-r0
s6100/scripts/fastboot_plugin usr/share/sonic/device/x86_64-dell_s6100_c2538-r0
s6100/scripts/track_reboot_reason.sh usr/share/sonic/device/x86_64-dell_s6100_c2538-r0
s6100/scripts/override.conf /etc/systemd/system/systemd-reboot.service.d
common/dell_lpc_mon.sh usr/local/bin
s6100/scripts/platform_sensors.py usr/local/bin
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@ z9100/scripts/z9100_platform.sh usr/local/bin
common/dell_i2c_utils.sh usr/local/bin
common/dell_lpc_mon.sh usr/local/bin
common/io_rd_wr.py usr/local/bin
z9100/scripts/fastboot_plugin usr/share/sonic/device/x86_64-dell_z9100_c2538-r0
z9100/scripts/track_reboot_reason.sh usr/share/sonic/device/x86_64-dell_z9100_c2538-r0
z9100/scripts/platform_reboot_override usr/share/sonic/device/x86_64-dell_z9100_c2538-r0
z9100/scripts/override.conf /etc/systemd/system/systemd-reboot.service.d
z9100/scripts/platform_sensors.py usr/local/bin
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#!/bin/bash

if [[ -d /sys/devices/platform/SMF.512/hwmon/ ]]; then
cd /sys/devices/platform/SMF.512/hwmon/*
echo 0xcc > mb_poweron_reason
fi
Original file line number Diff line number Diff line change
@@ -1,10 +1,15 @@
#!/usr/bin/python
import sys
import os
import subprocess
import struct

PORT_RES = '/dev/port'

def track_reboot():
# Run plugin script which will track the cli triggered reboot, fastboot, warmboot
res = subprocess.check_output(['/usr/share/sonic/device/x86_64-dell_s6100_c2538-r0/fastboot_plugin'])
return

def portio_reg_write(resource, offset, val):
fd = os.open(resource, os.O_RDWR)
Expand All @@ -21,5 +26,6 @@ def portio_reg_write(resource, offset, val):
os.close(fd)

if __name__ == "__main__":
track_reboot()
portio_reg_write(PORT_RES, 0xcf9, 0xe)

Original file line number Diff line number Diff line change
Expand Up @@ -218,21 +218,9 @@ reset_muxes() {
}

track_reboot_reason() {
if [[ -d /sys/devices/platform/SMF.512/hwmon/ ]]; then
rv=$(cd /sys/devices/platform/SMF.512/hwmon/*; cat mb_poweron_reason)
reason=$(echo $rv | cut -d 'x' -f2)
if [ $reason == "ff" ]; then
cd /sys/devices/platform/SMF.512/hwmon/*
if [[ -e /tmp/notify_firstboot_to_platform ]]; then
echo 0x01 > mb_poweron_reason
else
echo 0xbb > mb_poweron_reason
fi
elif [ $reason == "bb" ] || [ $reason == "1" ]; then
cd /sys/devices/platform/SMF.512/hwmon/*
echo 0xaa > mb_poweron_reason
fi
fi
/usr/share/sonic/device/x86_64-dell_s6100_c2538-r0/track_reboot_reason.sh
status=$(echo $?)
return status
}

install_python_api_package() {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
#!/bin/bash

syslog_found=false
syslog_1_found=false
reboot_dir_found=false
reboot_file_found=false

if [[ -f /var/log/syslog ]]; then
syslog_found=true
fi

if [[ -f /var/log/syslog.1 ]]; then
syslog_1_found=true
fi

if [[ -d /host/reboot-cause/platform ]]; then
reboot_dir_found=true
if [[ -f /host/reboot-cause/platform/reboot_reason ]]; then
reboot_file_found=true
fi
fi

track_thermal() {
prev_thermal=$(cat /host/reboot-cause/platform/reboot_reason | grep thermal | cut -d ' ' -f2-4)
curr_poweron_reason=$(cd /sys/devices/platform/SMF.512/hwmon/*; cat smf_poweron_reason)
if [[ $curr_poweron_reason = "11" ]]; then
return 0
fi
if [[ $prev_thermal = $curr_poweron_reason ]]; then
return 0
else
sed -i "2s/.*/thermal $curr_poweron_reason/" /host/reboot-cause/platform/reboot_reason
return 1
fi

return 0
}

track_watchdog()
{
if [[ $syslog_found = true ]]; then
wd_log="$(tac /var/log/syslog | grep -a "watchdog did not stop" | grep -a "CRIT kernel")"
if [[ $wd_log = "" ]] && [[ $syslog_1_found = true ]]; then
wd_log="$(tac /var/log/syslog.1 | grep -a "watchdog did not stop" | grep -a "CRIT kernel")"
fi
fi

if [[ $wd_log = "" ]]; then
return 0
fi

if [[ $reboot_dir_found = true ]]; then
echo "$wd_log" > /host/reboot-cause/platform/tmp
wd_log=$(head -n 1 /host/reboot-cause/platform/tmp)
wd_timestamp=$(echo $wd_log | cut -d ' ' -f1-3)
wd_date=$(echo $wd_timestamp | cut -d '.' -f1)

if [[ $reboot_file_found = true ]]; then
prev_wd=$(cat /host/reboot-cause/platform/reboot_reason | grep watchdog | cut -d ' ' -f2-4)

if [[ $prev_wd = $wd_date ]]; then
rm -rf /host/reboot-cause/platform/tmp
return 0
else
sed -i "1s/.*/watchdog $wd_date/" /host/reboot-cause/platform/reboot_reason
rm -rf /host/reboot-cause/platform/tmp
return 1
fi
fi
fi
}

track_reboot_reason() {
if [[ $reboot_file_found = false ]]; then
echo "watchdog None" > /host/reboot-cause/platform/reboot_reason
echo "thermal None" >> /host/reboot-cause/platform/reboot_reason
fi

if [[ -d /sys/devices/platform/SMF.512/hwmon/ ]]; then
track_thermal
is_thermal_reboot=$(echo $?)

track_watchdog
is_wd_reboot=$(echo $?)

rv=$(cd /sys/devices/platform/SMF.512/hwmon/*; cat mb_poweron_reason)
reason=$(echo $rv | cut -d 'x' -f2)
if [[ $reason = "ff" ]]; then
sed -i "2s/.*/thermal None/" /host/reboot-cause/platform/reboot_reason
cd /sys/devices/platform/SMF.512/hwmon/*
if [[ -e /tmp/notify_firstboot_to_platform ]]; then
echo 0x01 > mb_poweron_reason
else
echo 0xbb > mb_poweron_reason
fi
elif [[ $is_thermal_reboot = 1 ]]; then
cd /sys/devices/platform/SMF.512/hwmon/*
echo 0xee > mb_poweron_reason
elif [[ $is_wd_reboot = 1 ]]; then
cd /sys/devices/platform/SMF.512/hwmon/*
echo 0xdd > mb_poweron_reason
elif [[ $reason = "cc" ]]; then
cd /sys/devices/platform/SMF.512/hwmon/*
echo 0xaa > mb_poweron_reason
else
cd /sys/devices/platform/SMF.512/hwmon/*
echo 0x99 > mb_poweron_reason
fi
fi
}
track_reboot_reason
Original file line number Diff line number Diff line change
Expand Up @@ -227,11 +227,16 @@ def get_reboot_cause(self):
if (power_reason in self.power_reason_dict):
return (self.power_reason_dict[power_reason], None)
else:
if ((smf_mb_reg_reason == 0xbb) or (smf_mb_reg_reason == 0xff)):
if (smf_mb_reg_reason == 0xaa):
return (ChassisBase.REBOOT_CAUSE_NON_HARDWARE, None)
elif ((smf_mb_reg_reason == 0xbb) or (smf_mb_reg_reason == 0xff)):
return (ChassisBase.REBOOT_CAUSE_POWER_LOSS, None)

if (reset_reason in self.reset_reason_dict):
return (self.reset_reason_dict[reset_reason], None)
elif (smf_mb_reg_reason == 0xdd):
return (ChassisBase.REBOOT_CAUSE_WATCHDOG, None)
elif (smf_mb_reg_reason == 0xee):
return (self.power_reason_dict[power_reason], None)
else:
return (ChassisBase.REBOOT_CAUSE_NON_HARDWARE, None)

return (ChassisBase.REBOOT_CAUSE_HARDWARE_OTHER, "Invalid Reason")

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
#!/bin/bash

if [[ -d /sys/devices/platform/SMF.512/hwmon/ ]]; then
cd /sys/devices/platform/SMF.512/hwmon/*
echo 0xcc > mb_poweron_reason
fi
Original file line number Diff line number Diff line change
@@ -1,11 +1,17 @@
#!/usr/bin/python
import sys
import os
import subprocess
import struct

PORT_RES = '/dev/port'


def track_reboot():
# Run plugin script which will track the cli triggered reboot, fastboot, warmboot
res = subprocess.check_output(['/usr/share/sonic/device/x86_64-dell_z9100_c2538-r0/fastboot_plugin'])
return

def portio_reg_write(resource, offset, val):
fd = os.open(resource, os.O_RDWR)
if(fd < 0):
Expand All @@ -21,5 +27,6 @@ def portio_reg_write(resource, offset, val):
os.close(fd)

if __name__ == "__main__":
track_reboot()
portio_reg_write(PORT_RES, 0xcf9, 0xe)

Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
#!/bin/bash

syslog_found=false
syslog_1_found=false
reboot_dir_found=false
reboot_file_found=false

if [[ -f /var/log/syslog ]]; then
syslog_found=true
fi

if [[ -f /var/log/syslog.1 ]]; then
syslog_1_found=true
fi

if [[ -d /host/reboot-cause/platform ]]; then
reboot_dir_found=true
if [[ -f /host/reboot-cause/platform/reboot_reason ]]; then
reboot_file_found=true
fi
fi

track_thermal() {
prev_thermal=$(cat /host/reboot-cause/platform/reboot_reason | grep thermal | cut -d ' ' -f2-4)
curr_poweron_reason=$(cd /sys/devices/platform/SMF.512/hwmon/*; cat smf_poweron_reason)
if [[ $curr_poweron_reason = "11" ]]; then
return 0
fi
if [[ $prev_thermal = $curr_poweron_reason ]]; then
return 0
else
sed -i "2s/.*/thermal $curr_poweron_reason/" /host/reboot-cause/platform/reboot_reason
return 1
fi

return 0
}

track_watchdog()
{
if [[ $syslog_found = true ]]; then
wd_log="$(tac /var/log/syslog | grep -a "watchdog did not stop" | grep -a "CRIT kernel")"
if [[ $wd_log = "" ]] && [[ $syslog_1_found = true ]]; then
wd_log="$(tac /var/log/syslog.1 | grep -a "watchdog did not stop" | grep -a "CRIT kernel")"
fi
fi

if [[ $wd_log = "" ]]; then
return 0
fi

if [[ $reboot_dir_found = true ]]; then
echo "$wd_log" > /host/reboot-cause/platform/tmp
wd_log=$(head -n 1 /host/reboot-cause/platform/tmp)
wd_timestamp=$(echo $wd_log | cut -d ' ' -f1-3)
wd_date=$(echo $wd_timestamp | cut -d '.' -f1)

if [[ $reboot_file_found = true ]]; then
prev_wd=$(cat /host/reboot-cause/platform/reboot_reason | grep watchdog | cut -d ' ' -f2-4)

if [[ $prev_wd = $wd_date ]]; then
rm -rf /host/reboot-cause/platform/tmp
return 0
else
sed -i "1s/.*/watchdog $wd_date/" /host/reboot-cause/platform/reboot_reason
rm -rf /host/reboot-cause/platform/tmp
return 1
fi
fi
fi
}

track_reboot_reason() {
if [[ $reboot_file_found = false ]]; then
echo "watchdog None" > /host/reboot-cause/platform/reboot_reason
echo "thermal None" >> /host/reboot-cause/platform/reboot_reason
fi

if [[ -d /sys/devices/platform/SMF.512/hwmon/ ]]; then
track_thermal
is_thermal_reboot=$(echo $?)

track_watchdog
is_wd_reboot=$(echo $?)

rv=$(cd /sys/devices/platform/SMF.512/hwmon/*; cat mb_poweron_reason)
reason=$(echo $rv | cut -d 'x' -f2)
if [[ $reason = "ff" ]]; then
sed -i "2s/.*/thermal None/" /host/reboot-cause/platform/reboot_reason
cd /sys/devices/platform/SMF.512/hwmon/*
if [[ -e /tmp/notify_firstboot_to_platform ]]; then
echo 0x01 > mb_poweron_reason
else
echo 0xbb > mb_poweron_reason
fi
elif [[ $is_thermal_reboot = 1 ]]; then
cd /sys/devices/platform/SMF.512/hwmon/*
echo 0xee > mb_poweron_reason
elif [[ $is_wd_reboot = 1 ]]; then
cd /sys/devices/platform/SMF.512/hwmon/*
echo 0xdd > mb_poweron_reason
elif [[ $reason = "cc" ]]; then
cd /sys/devices/platform/SMF.512/hwmon/*
echo 0xaa > mb_poweron_reason
else
cd /sys/devices/platform/SMF.512/hwmon/*
echo 0x99 > mb_poweron_reason
fi
fi
}
track_reboot_reason
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,12 @@ init_switch_port_led() {

}

track_reboot_reason() {
/usr/share/sonic/device/x86_64-dell_z9100_c2538-r0/track_reboot_reason.sh
status=$(echo $?)
return status
}

install_python_api_package() {
device="/usr/share/sonic/device"
platform=$(/usr/local/bin/sonic-cfggen -H -v DEVICE_METADATA.localhost.platform)
Expand All @@ -222,6 +228,7 @@ if [[ "$1" == "init" ]]; then
modprobe dell_ich
modprobe dell_mailbox
modprobe dell_z9100_cpld
track_reboot_reason

cpu_board_mux "new_device"
switch_board_mux "new_device"
Expand Down
Loading