From fa2a49eb96877981e1ca7e600f0bfdae65efb820 Mon Sep 17 00:00:00 2001 From: dgsudharsan Date: Wed, 11 Jan 2023 05:45:40 +0000 Subject: [PATCH] [sai_failure_dump]Invoking dump during SAI failure --- scripts/generate_dump | 64 ++++++++++++++++++++++++++++++++++--------- 1 file changed, 51 insertions(+), 13 deletions(-) diff --git a/scripts/generate_dump b/scripts/generate_dump index f58c2d0c4f..c0d1f5054f 100755 --- a/scripts/generate_dump +++ b/scripts/generate_dump @@ -1059,21 +1059,26 @@ collect_mellanox() { local sai_dump_folder="/tmp/saisdkdump" local sai_dump_filename="${sai_dump_folder}/sai_sdk_dump_$(date +"%m_%d_%Y_%I_%M_%p")" - ${CMD_PREFIX}docker exec syncd mkdir -p $sai_dump_folder - ${CMD_PREFIX}docker exec syncd saisdkdump -f $sai_dump_filename - - if [ $? != 0 ]; then - echo "Failed to collect saisdkdump." - fi + if [[ "$( docker container inspect -f '{{.State.Running}}' syncd )" == "true" ]]; then + if [[ x"$(sonic-db-cli APPL_DB EXISTS PORT_TABLE:PortInitDone)" == x"1" ]]; then + # Run saisdkdump only after the create_switch is known to be successful + ${CMD_PREFIX}docker exec syncd mkdir -p $sai_dump_folder + ${CMD_PREFIX}docker exec syncd saisdkdump -f $sai_dump_filename + + if [ $? != 0 ]; then + echo "Failed to collect saisdkdump." + fi - copy_from_docker syncd $sai_dump_folder $sai_dump_folder - echo "$sai_dump_folder" - for file in `ls $sai_dump_folder`; do - save_file ${sai_dump_folder}/${file} sai_sdk_dump true - done + copy_from_docker syncd $sai_dump_folder $sai_dump_folder + echo "$sai_dump_folder" + for file in `ls $sai_dump_folder`; do + save_file ${sai_dump_folder}/${file} sai_sdk_dump true + done - ${CMD_PREFIX}rm -rf $sai_dump_folder - ${CMD_PREFIX}docker exec syncd rm -rf $sai_dump_folder + ${CMD_PREFIX}rm -rf $sai_dump_folder + ${CMD_PREFIX}docker exec syncd rm -rf $sai_dump_folder + fi + fi # run 'hw-management-generate-dump.sh' script and save the result file HW_DUMP_FILE=/usr/bin/hw-management-generate-dump.sh @@ -1427,6 +1432,38 @@ save_crash_files() { fi } +############################################################################### +# Collect SAI failure dump files under /var/log/sai_failure_dump/. These files are +# created because of the orchagent abort triggered by SAI programming failure +# Globals: +# None +# Arguments: +# None +# Returns: +# None +############################################################################### +save_sai_failure_dump(){ + for file in $(find_files "/var/log/sai_failure_dump/"); do + if $TAR -tf $TARFILE | grep $BASE/log/$(basename $file); then + # if the files are already collected under the log/ dir + # just add a symbolic link + if [ ! -z "${file##*.gz}" ]; then + # files saved under log/ are zipped with gz + file=$file.gz + fi + ${CMD_PREFIX}save_symlink ${file} sai_failure_dump log + else + if [ ! -z "${file##*.gz}" ]; then + ${CMD_PREFIX}save_file ${file} sai_failure_dump true + else + ${CMD_PREFIX}save_file ${file} sai_failure_dump false + fi + fi + #Clean up the file once its part of tech support + rm -f $file + done +} + ############################################################################### # Get number of ASICs in the platform # Globals: @@ -1710,6 +1747,7 @@ main() { save_log_files & save_crash_files & save_warmboot_files & + save_sai_failure_dump & wait if [[ "$asic" = "mellanox" ]]; then