From e32b5ac4b33235723b220d5c97981f22d0823f45 Mon Sep 17 00:00:00 2001 From: Sujin Kang Date: Tue, 2 Mar 2021 09:27:14 -0800 Subject: [PATCH] Add soft-reboot reboot type (#1453) What I did Add a new reboot named as soft-reboot which can be performed by "kexec -e" How I did it Replace the platform reboot with "kexec -e" for the cold reboot case. How to verify it Verified the reboot on DUT and check the reboot-cause --- scripts/soft-reboot | 227 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 227 insertions(+) create mode 100755 scripts/soft-reboot diff --git a/scripts/soft-reboot b/scripts/soft-reboot new file mode 100755 index 000000000000..52ccdd690b0d --- /dev/null +++ b/scripts/soft-reboot @@ -0,0 +1,227 @@ +#!/bin/bash +DEVPATH="/usr/share/sonic/device" +REBOOT_CAUSE_FILE="/host/reboot-cause/reboot-cause.txt" +REBOOT_TIME=$(date) +REBOOT_METHOD="/sbin/kexec -e" +LOG_SSD_HEALTH="/usr/local/bin/log_ssd_health" +WATCHDOG_UTIL="/usr/local/bin/watchdogutil" + +EXIT_SUCCESS=0 +EXIT_FAILURE=1 +EXIT_NOT_SUPPORTED=2 +EXIT_FILE_SYSTEM_FULL=3 +EXIT_NEXT_IMAGE_NOT_EXISTS=4 + +# Reboot immediately if we run the kdump capture kernel +VMCORE_FILE=/proc/vmcore +if [ -e $VMCORE_FILE -a -s $VMCORE_FILE ]; then + echo "We have a /proc/vmcore, then we just kdump'ed" + echo "User issued 'kdump' command [User: kdump, Time: ${REBOOT_TIME}]" > ${REBOOT_CAUSE_FILE} + sync + PLATFORM=$(grep -oP 'sonic_platform=\K\S+' /proc/cmdline) + if [ ! -z "${PLATFORM}" -a -x ${DEVPATH}/${PLATFORM}/${PLAT_REBOOT} ]; then + exec ${DEVPATH}/${PLATFORM}/${PLAT_REBOOT} + fi + # If no platform-specific reboot tool, just run /sbin/reboot + /sbin/reboot + echo 1 > /proc/sys/kernel/sysrq + echo b > /proc/sysrq-trigger +fi + +REBOOT_USER=$(logname) +PLATFORM=$(sonic-cfggen -H -v DEVICE_METADATA.localhost.platform) +ASIC_TYPE=$(sonic-cfggen -y /etc/sonic/sonic_version.yml -v asic_type) +VERBOSE=no +EXIT_NEXT_IMAGE_NOT_EXISTS=4 +EXIT_SONIC_INSTALLER_VERIFY_REBOOT=21 +SSD_FW_UPDATE="ssd-fw-upgrade" +REBOOT_SCRIPT_NAME=$(basename $0) +REBOOT_TYPE="${REBOOT_SCRIPT_NAME}" +PLATFORM_PLUGIN="${REBOOT_TYPE}_plugin" +BOOT_TYPE_ARG="soft" +TAG_LATEST=yes + +function debug() +{ + if [[ x"${VERBOSE}" == x"yes" ]]; then + echo `date` $@ + fi + logger "$@" +} + +function tag_images() +{ + if test -f /usr/local/bin/ctrmgr_tools.py + then + if [[ x"${TAG_LATEST}" == x"yes" ]]; then + /usr/local/bin/ctrmgr_tools.py tag-all + fi + fi +} + +function stop_sonic_services() +{ + if [[ x"$ASIC_TYPE" != x"mellanox" ]]; then + debug "Stopping syncd process..." + docker exec -i syncd /usr/bin/syncd_request_shutdown --cold > /dev/null + sleep 3 + fi +} + +function clear_lingering_reboot_config() +{ + # Clear any outstanding warm-reboot config + result=`timeout 10s config warm_restart disable; if [[ $? == 124 ]]; then echo timeout; else echo "code ($?)"; fi` || /bin/true + debug "Cancel warm-reboot: ${result}" + + WARM_DIR="/host/warmboot" + REDIS_FILE=dump.rdb + TIMESTAMP=`date +%Y%m%d-%H%M%S` + if [[ -f ${WARM_DIR}/${REDIS_FILE} ]]; then + mv -f ${WARM_DIR}/${REDIS_FILE} ${WARM_DIR}/${REDIS_FILE}.${TIMESTAMP} || /bin/true + fi + /sbin/kexec -u || /bin/true +} + +SCRIPT=$0 + +function show_help_and_exit() +{ + echo "Usage ${SCRIPT} [options]" + echo " Request rebooting the device. Invoke platform-specific tool when available." + echo " This script will shutdown syncd before rebooting." + echo " " + echo " Available options:" + echo " -h, -? : getting this help" + + exit "${EXIT_SUCCESS}" +} + +function setup_reboot_variables() +{ + # Kernel and initrd image + NEXT_SONIC_IMAGE=$(sonic-installer list | grep "Next: " | cut -d ' ' -f 2) + IMAGE_PATH="/host/image-${NEXT_SONIC_IMAGE#SONiC-OS-}" + if grep -q aboot_platform= /host/machine.conf; then + KERNEL_IMAGE="$(ls $IMAGE_PATH/boot/vmlinuz-*)" + BOOT_OPTIONS="$(cat "$IMAGE_PATH/kernel-cmdline" | tr '\n' ' ') SONIC_BOOT_TYPE=${BOOT_TYPE_ARG}" + elif grep -q onie_platform= /host/machine.conf; then + KERNEL_OPTIONS=$(cat /host/grub/grub.cfg | sed "/$NEXT_SONIC_IMAGE'/,/}/"'!'"g" | grep linux) + KERNEL_IMAGE="/host$(echo $KERNEL_OPTIONS | cut -d ' ' -f 2)" + BOOT_OPTIONS="$(echo $KERNEL_OPTIONS | sed -e 's/\s*linux\s*/BOOT_IMAGE=/') SONIC_BOOT_TYPE=${BOOT_TYPE_ARG}" + else + error "Unknown bootloader. ${REBOOT_TYPE} is not supported." + exit "${EXIT_NOT_SUPPORTED}" + fi + INITRD=$(echo $KERNEL_IMAGE | sed 's/vmlinuz/initrd.img/g') +} + +function load_kernel() { + # Load kernel into the memory + /sbin/kexec -l "$KERNEL_IMAGE" --initrd="$INITRD" --append="$BOOT_OPTIONS" +} + +function reboot_pre_check() +{ + # Make sure that the file system is normal: read-write able + filename="/host/test-`date +%Y%m%d-%H%M%S`" + ERR=0 + touch ${filename} || ERR=$? + if [[ ${ERR} -ne 0 ]]; then + # Continue rebooting in this case, but log the error + VERBOSE=yes debug "Filesystem might be read-only or full ..." + fi + rm ${filename} + + # Verify the next image by sonic-installer + local message=$(sonic-installer verify-next-image 2>&1) + if [ $? -ne 0 ]; then + VERBOSE=yes debug "Failed to verify next image: ${message}" + exit ${EXIT_SONIC_INSTALLER_VERIFY_REBOOT} + fi +} + +function parse_options() +{ + while getopts "h?v" opt; do + case ${opt} in + h|\? ) + show_help_and_exit + ;; + v ) + VERBOSE=yes + ;; + t ) + TAG_LATEST=no + ;; + esac + done +} + +parse_options $@ + +# Exit if not superuser +if [[ "$EUID" -ne 0 ]]; then + echo "This command must be run as root" >&2 + exit "${EXIT_FAILURE}" +fi + +if [ -x ${LOG_SSD_HEALTH} ]; then + debug "Collecting logs to check ssd health before ${REBOOT_TYPE}..." + ${LOG_SSD_HEALTH} +fi + +debug "User requested rebooting device ..." + +setup_reboot_variables +reboot_pre_check + +# Tag remotely deployed images as local +tag_images + +# Stop SONiC services gracefully. +stop_sonic_services + +clear_lingering_reboot_config + +load_kernel + +# Update the reboot cause file to reflect that user issued 'reboot' command +# Upon next boot, the contents of this file will be used to determine the +# cause of the previous reboot +echo "User issued '${REBOOT_SCRIPT_NAME}' command [User: ${REBOOT_USER}, Time: ${REBOOT_TIME}]" > ${REBOOT_CAUSE_FILE} + +sync +sleep 3 +sync + +# sync the current system time to CMOS +if [ -x /sbin/hwclock ]; then + /sbin/hwclock -w || /bin/true +fi + +if [ -x ${DEVPATH}/${PLATFORM}/${SSD_FW_UPDATE} ]; then + debug "updating ssd fw for${REBOOT_TYPE}" + ${DEVPATH}/${PLATFORM}/${SSD_FW_UPDATE} ${REBOOT_TYPE} +fi + +# Enable Watchdog Timer +if [ -x ${WATCHDOG_UTIL} ]; then + debug "Enabling Watchdog before ${REBOOT_TYPE}" + ${WATCHDOG_UTIL} arm +fi + +# Run platform specific reboot plugin +if [ -x ${DEVPATH}/${PLATFORM}/${PLATFORM_PLUGIN} ]; then + debug "Running ${PLATFORM} specific plugin..." + ${DEVPATH}/${PLATFORM}/${PLATFORM_PLUGIN} +fi + +# Reboot: explicitly call Linux "kexec -e" +debug "Rebooting with ${REBOOT_METHOD} to ${NEXT_SONIC_IMAGE} ..." +exec ${REBOOT_METHOD} + +# Should never reach here +error "${REBOOT_TYPE} failed!" +exit "${EXIT_FAILURE}" +