From 4d89510cf767418c9bddf7b1df7dc4d9ee17d6ae Mon Sep 17 00:00:00 2001 From: yozhao101 <56170650+yozhao101@users.noreply.github.com> Date: Sun, 28 Mar 2021 10:14:27 -0700 Subject: [PATCH] [reboot] User-friendly reboot cause message for kernel panic (#1486) Signed-off-by: Yong Zhao yozhao@microsoft.com What I did If the rebooting of SONiC device was caused by kernel panic, then the CLI command show reboot-cause should show Kernel Panic. How I did it Currently if kernel was panicked, then the device would be rebooted. The reboot script wrote a message into reboot-cause.txt. I just updated the content of this message. How to verify it I verified this change on the virtual switch in the following steps: Trigger kernel panic: echo c > /proc/sysrq-trigger After device was rebooted, run the CLI show reboot-cause: admin@vlab-01:~$ show reboot-cause Kernel Panic [Time: Tue 09 Mar 2021 03:03:56 AM UTC] Previous command output (if the output of a command-line utility has changed) admin@vlab-01:~$ show reboot-cause User issued 'kdump' command [User: kdump, Time: Mon 08 Mar 2021 01:47:43 AM UTC] New command output (if the output of a command-line utility has changed) admin@vlab-01:~$ show reboot-cause Kernel Panic [Time: Tue 09 Mar 2021 03:03:56 AM UTC] --- scripts/reboot | 2 +- show/reboot_cause.py | 54 ++++++++++++++++++++++++++++---------- tests/reboot_cause_test.py | 2 +- 3 files changed, 42 insertions(+), 16 deletions(-) diff --git a/scripts/reboot b/scripts/reboot index ee10c77ba3d0..1384fb89e74f 100755 --- a/scripts/reboot +++ b/scripts/reboot @@ -9,7 +9,7 @@ REBOOT_TIME=$(date) VMCORE_FILE=/proc/vmcore if [ -e $VMCORE_FILE -a -s $VMCORE_FILE ]; then echo "We have a /proc/vmcore, then we just kdump'ed" - echo "User issued 'kdump' command [User: kdump, Time: ${REBOOT_TIME}]" > ${REBOOT_CAUSE_FILE} + echo "Kernel Panic [Time: ${REBOOT_TIME}]" > ${REBOOT_CAUSE_FILE} sync PLATFORM=$(grep -oP 'sonic_platform=\K\S+' /proc/cmdline) if [ ! -z "${PLATFORM}" -a -x ${DEVPATH}/${PLATFORM}/${PLAT_REBOOT} ]; then diff --git a/show/reboot_cause.py b/show/reboot_cause.py index d1424d867672..57bd15e863a6 100755 --- a/show/reboot_cause.py +++ b/show/reboot_cause.py @@ -8,15 +8,21 @@ import utilities_common.cli as clicommon -PREVIOUS_REBOOT_CAUSE_FILE = "/host/reboot-cause/previous-reboot-cause.json" -USER_ISSUED_REBOOT_CAUSE_REGEX ="User issued \'{}\' command [User: {}, Time: {}]" +PREVIOUS_REBOOT_CAUSE_FILE_PATH = "/host/reboot-cause/previous-reboot-cause.json" + def read_reboot_cause_file(): - result = "" - if os.path.exists(PREVIOUS_REBOOT_CAUSE_FILE): - with open(PREVIOUS_REBOOT_CAUSE_FILE) as f: - result = json.load(f) - return result + reboot_cause_dict = {} + + if os.path.exists(PREVIOUS_REBOOT_CAUSE_FILE_PATH): + with open(PREVIOUS_REBOOT_CAUSE_FILE_PATH) as prev_reboot_cause_file: + try: + reboot_cause_dict = json.load(prev_reboot_cause_file) + except json.JSONDecodeError as err: + click.echo("Failed to load JSON file '{}'!".format(PREVIOUS_REBOOT_CAUSE_FILE_PATH), err=True) + + return reboot_cause_dict + # # 'reboot-cause' group ("show reboot-cause") @@ -26,15 +32,35 @@ def read_reboot_cause_file(): def reboot_cause(ctx): """Show cause of most recent reboot""" if ctx.invoked_subcommand is None: - reboot_cause = "" + reboot_cause_str = "" + # Read the previous reboot cause - data = read_reboot_cause_file() - if data['user'] == "N/A": - reboot_cause = "{}".format(data['cause']) + reboot_cause_dict = read_reboot_cause_file() + + reboot_cause = reboot_cause_dict.get("cause", "Unknown") + reboot_user = reboot_cause_dict.get("user", "N/A") + reboot_time = reboot_cause_dict.get("time", "N/A") + + if reboot_user != "N/A": + reboot_cause_str = "User issued '{}' command".format(reboot_cause) else: - reboot_cause = USER_ISSUED_REBOOT_CAUSE_REGEX.format(data['cause'], data['user'], data['time']) + reboot_cause_str = reboot_cause + + if reboot_user != "N/A" or reboot_time != "N/A": + reboot_cause_str += " [" + + if reboot_user != "N/A": + reboot_cause_str += "User: {}".format(reboot_user) + if reboot_time != "N/A": + reboot_cause_str += ", " + + if reboot_time != "N/A": + reboot_cause_str += "Time: {}".format(reboot_time) + + reboot_cause_str += "]" + + click.echo(reboot_cause_str) - click.echo(reboot_cause) # 'history' subcommand ("show reboot-cause history") @reboot_cause.command() @@ -54,7 +80,7 @@ def history(): for tk in table_keys: entry = db.get_all(db.STATE_DB, tk) r = [] - r.append(tk.replace(prefix,"")) + r.append(tk.replace(prefix, "")) r.append(entry['cause'] if 'cause' in entry else "") r.append(entry['time'] if 'time' in entry else "") r.append(entry['user'] if 'user' in entry else "") diff --git a/tests/reboot_cause_test.py b/tests/reboot_cause_test.py index 6ecc248dd53d..f3372c3eb19a 100644 --- a/tests/reboot_cause_test.py +++ b/tests/reboot_cause_test.py @@ -31,7 +31,7 @@ def setup_class(cls): # Test 'show reboot-cause' without previous-reboot-cause.json def test_reboot_cause_no_history_file(self): - expected_output = "" + expected_output = "Unknown\n" runner = CliRunner() result = runner.invoke(show.cli.commands["reboot-cause"], []) assert result.output == expected_output