From 719415db9b280a3bb53d0fdf3e94e780790284b7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20S=C3=A1nchez=20L=C3=B3pez?= <1175054+carlossanlop@users.noreply.github.com> Date: Tue, 14 Nov 2023 20:09:31 -0800 Subject: [PATCH] Add XUnitLogChecker to log libraries dumps (#93906) Libraries CI failures that generate dumps will now show the dump output in the console log itself. --- Directory.Build.props | 6 +- .../libraries/prepare-for-bin-publish.yml | 1 + eng/testing/RunnerTemplate.cmd | 35 ++++ eng/testing/RunnerTemplate.sh | 178 ++++++++---------- eng/testing/gen-debug-dump-docs.py | 35 ++-- src/libraries/sendtohelixhelp.proj | 20 +- src/libraries/tests.proj | 6 + .../CoreclrTestWrapperLib.cs | 2 +- .../Common/XUnitLogChecker/XUnitLogChecker.cs | 9 +- 9 files changed, 168 insertions(+), 124 deletions(-) diff --git a/Directory.Build.props b/Directory.Build.props index 89b427fba10fa..4589267c02124 100644 --- a/Directory.Build.props +++ b/Directory.Build.props @@ -180,7 +180,6 @@ $([MSBuild]::NormalizePath('$(TestExclusionListTasksDir)', 'TestExclusionListTasks.dll')) $([MSBuild]::NormalizeDirectory('$(ArtifactsBinDir)', 'coreclr', '$(TargetOS).$(TargetArchitecture).$(RuntimeConfiguration)')) $(CoreCLRToolPath) - $(WASMTIME_PATH) $([MSBuild]::NormalizeDirectory($(ArtifactsObjDir), 'wasmtime')) true @@ -332,6 +331,11 @@ true + + + false + true + $([MSBuild]::NormalizeDirectory('$(ArtifactsBinDir)', 'XUnitLogChecker')) diff --git a/eng/pipelines/libraries/prepare-for-bin-publish.yml b/eng/pipelines/libraries/prepare-for-bin-publish.yml index fbdb90f18716b..7ea6630fa51cd 100644 --- a/eng/pipelines/libraries/prepare-for-bin-publish.yml +++ b/eng/pipelines/libraries/prepare-for-bin-publish.yml @@ -14,6 +14,7 @@ steps: ref/** runtime/** testhost/** + XUnitLogChecker/** - task: CopyFiles@2 displayName: Prepare bin folders to publish diff --git a/eng/testing/RunnerTemplate.cmd b/eng/testing/RunnerTemplate.cmd index 4f4646a72315d..fb877e215bb0f 100644 --- a/eng/testing/RunnerTemplate.cmd +++ b/eng/testing/RunnerTemplate.cmd @@ -92,6 +92,41 @@ if %_exit_code%==1 ( ) ) ) + +if NOT "%__IsXUnitLogCheckerSupported%"=="1" ( + echo XUnitLogChecker not supported for this test case. Skipping. + GOTO SKIP_XUNITLOGCHECKER +) + +echo ----- start =============== XUnitLogChecker Output ===================================================== + +set DOTNET_EXE=%RUNTIME_PATH%\dotnet.exe +set XUNITLOGCHECKER_DLL=%HELIX_CORRELATION_PAYLOAD%\XUnitLogChecker.dll +set XUNITLOGCHECKER_COMMAND=%DOTNET_EXE% --roll-forward Major %XUNITLOGCHECKER_DLL% --dumps-path %HELIX_DUMP_FOLDER% +set XUNITLOGCHECKER_EXIT_CODE=1 + +if NOT EXIST %DOTNET_EXE% ( + echo dotnet.exe does not exist in the expected location: %DOTNET_EXE% + GOTO XUNITLOGCHECKER_END +) else if NOT EXIST %XUNITLOGCHECKER_DLL% ( + echo XUnitLogChecker.dll does not exist in the expected location: %XUNITLOGCHECKER_DLL% + GOTO XUNITLOGCHECKER_END +) + +echo %XUNITLOGCHECKER_COMMAND% +%XUNITLOGCHECKER_COMMAND% +set XUNITLOGCHECKER_EXIT_CODE=%ERRORLEVEL% + +:XUNITLOGCHECKER_END + +if %XUNITLOGCHECKER_EXIT_CODE% NEQ 0 ( + set _exit_code=%XUNITLOGCHECKER_EXIT_CODE% +) + +echo ----- end =============== XUnitLogChecker Output - exit code %XUNITLOGCHECKER_EXIT_CODE% =============== + +:SKIP_XUNITLOGCHECKER + exit /b %_exit_code% :: ========================= END Test Execution ================================= diff --git a/eng/testing/RunnerTemplate.sh b/eng/testing/RunnerTemplate.sh index 9252c30576ed9..ef19d6c95c074 100644 --- a/eng/testing/RunnerTemplate.sh +++ b/eng/testing/RunnerTemplate.sh @@ -54,80 +54,60 @@ exitcode_list[131]="SIGQUIT Ctrl-\ occurred. Core dumped." exitcode_list[132]="SIGILL Illegal Instruction. Core dumped. Likely codegen issue." exitcode_list[133]="SIGTRAP Breakpoint hit. Core dumped." exitcode_list[134]="SIGABRT Abort. Managed or native assert, or runtime check such as heap corruption, caused call to abort(). Core dumped." -exitcode_list[135]="IGBUS Unaligned memory access. Core dumped." +exitcode_list[135]="IGBUS Unaligned memory access. Core dumped." exitcode_list[136]="SIGFPE Bad floating point arguments. Core dumped." exitcode_list[137]="SIGKILL Killed eg by kill" exitcode_list[139]="SIGSEGV Illegal memory access. Deref invalid pointer, overrunning buffer, stack overflow etc. Core dumped." exitcode_list[143]="SIGTERM Terminated. Usually before SIGKILL." exitcode_list[159]="SIGSYS Bad System Call." -function print_info_from_core_file_using_lldb { +function move_core_file_to_temp_location { local core_file_name=$1 - local executable_name=$2 - local plugin_path_name="$RUNTIME_PATH/shared/Microsoft.NETCore.App/9.9.9/libsosplugin.so" - # check for existence of lldb on the path - hash lldb 2>/dev/null || { echo >&2 "lldb was not found. Unable to print core file."; return; } + # Append the dmp extension to ensure XUnitLogChecker finds it + local new_location=$HELIX_DUMP_FOLDER/$core_file_name.dmp - # pe, clrstack, and dumpasync are defined in libsosplugin.so - if [ ! -f $plugin_path_name ]; then - echo $plugin_path_name cannot be found. - return - fi - - echo ----- start =============== lldb Output ===================================================== - echo Printing managed exceptions, managed call stacks, and async state machines. - lldb -O "settings set target.exec-search-paths $RUNTIME_PATH" -o "plugin load $plugin_path_name" -o "clrthreads -managedexception" -o "pe -nested" -o "clrstack -all -a -f" -o "dumpasync -fields -stacks -roots" -o "quit" --core $core_file_name $executable_name - echo ----- end =============== lldb Output ======================================================= -} - -function print_info_from_core_file_using_gdb { - local core_file_name=$1 - local executable_name=$2 - - # Check for the existence of GDB on the path - hash gdb 2>/dev/null || { echo >&2 "GDB was not found. Unable to print core file."; return; } + echo "Copying dump file '$core_file_name' to '$new_location'" + cp $core_file_name $new_location - echo ----- start =============== GDB Output ===================================================== - # Open the dump in GDB and print the stack from each thread. We can add more - # commands here if desired. - echo printing native stack. - gdb --batch -ex "thread apply all bt full" -ex "quit" $executable_name $core_file_name - echo ----- end =============== GDB Output ======================================================= + # Delete the old one + rm $core_file_name } -function print_info_from_core_file { - local core_file_name=$1 - local executable_name=$RUNTIME_PATH/$2 - - if ! [ -e $executable_name ]; then - echo "Unable to find executable $executable_name" - return - elif ! [ -e $core_file_name ]; then - echo "Unable to find core file $core_file_name" - return +xunitlogchecker_exit_code=0 +function invoke_xunitlogchecker { + local dump_folder=$1 + + total_dumps=$(find $dump_folder -name "*.dmp" | wc -l) + + if [[ $total_dumps > 0 ]]; then + echo "Total dumps found in $dump_folder: $total_dumps" + xunitlogchecker_file_name="$HELIX_CORRELATION_PAYLOAD/XUnitLogChecker.dll" + dotnet_file_name="$RUNTIME_PATH/dotnet" + + if [[ ! -f $dotnet_file_name ]]; then + echo "'$dotnet_file_name' was not found. Unable to run XUnitLogChecker." + xunitlogchecker_exit_code=1 + elif [[ ! -f $xunitlogchecker_file_name ]]; then + echo "'$xunitlogchecker_file_name' was not found. Unable to print dump file contents." + xunitlogchecker_exit_code=2 + elif [[ ! -d $dump_folder ]]; then + echo "The dump directory '$dump_folder' does not exist." + else + echo "Executing XUnitLogChecker in $dump_folder..." + cmd="$dotnet_file_name --roll-forward Major $xunitlogchecker_file_name --dumps-path $dump_folder" + echo "$cmd" + $cmd + xunitlogchecker_exit_code=$? + fi + else + echo "No dumps found in $dump_folder." fi - echo "Printing info from core file $core_file_name" - print_info_from_core_file_using_gdb $core_file_name $executable_name - print_info_from_core_file_using_lldb $core_file_name $executable_name -} - -function copy_core_file_to_temp_location { - local core_file_name=$1 - - local storage_location="/tmp/coredumps" - - # Create the directory (this shouldn't fail even if it already exists). - mkdir -p $storage_location - - local new_location=$storage_location/core.$RANDOM - - echo "Copying core file $core_file_name to $new_location in case you need it." - cp $core_file_name $new_location } # ========================= BEGIN Core File Setup ============================ -if [[ "$(uname -s)" == "Darwin" ]]; then +system_name="$(uname -s)" +if [[ $system_name == "Darwin" ]]; then # On OS X, we will enable core dump generation only if there are no core # files already in /cores/ at this point. This is being done to prevent # inadvertently flooding the CI machines with dumps. @@ -140,21 +120,11 @@ if [[ "$(uname -s)" == "Darwin" ]]; then # https://github.com/dotnet/core-eng/issues/15597 ulimit -c 0 fi - -elif [[ "$(uname -s)" == "Linux" ]]; then - # On Linux, we'll enable core file generation unconditionally, and if a dump - # is generated, we will print some useful information from it and delete the - # dump immediately. - - if [ -e /proc/self/coredump_filter ]; then - # Include memory in private and shared file-backed mappings in the dump. - # This ensures that we can see disassembly from our shared libraries when - # inspecting the contents of the dump. See 'man core' for details. - echo -n 0x3F > /proc/self/coredump_filter - fi - - ulimit -c unlimited fi + +export DOTNET_DbgEnableMiniDump=1 +export DOTNET_EnableCrashReport=1 +export DOTNET_DbgMiniDumpName=$HELIX_DUMP_FOLDER/coredump.%d.dmp # ========================= END Core File Setup ============================== # ========================= BEGIN support for SuperPMI collection ============================== @@ -171,7 +141,7 @@ if [ ! -z $spmi_enable_collection ]; then fi mkdir -p $spmi_collect_dir export spmi_file_extension=so - if [[ "$(uname -s)" == "Darwin" ]]; then + if [[ $system_name == "Darwin" ]]; then export spmi_file_extension=dylib fi export SuperPMIShimLogPath=$spmi_collect_dir @@ -220,27 +190,11 @@ if [[ $test_exitcode -ne 0 ]]; then echo ulimit -c value: $(ulimit -c) fi -if [[ "$(uname -s)" == "Linux" && $test_exitcode -ne 0 ]]; then - if [ -n "$HELIX_WORKITEM_PAYLOAD" ]; then - - # For abrupt failures, in Helix, dump some of the kernel log, in case there is a hint - if [[ $test_exitcode -ne 1 ]]; then - dmesg | tail -50 - fi - - have_sleep=$(which sleep) - if [ -x "$have_sleep" ]; then - echo Waiting a few seconds for any dump to be written.. - sleep 10s - fi - fi - +if [[ $system_name == "Linux" && $test_exitcode -ne 0 ]]; then echo cat /proc/sys/kernel/core_pattern: $(cat /proc/sys/kernel/core_pattern) echo cat /proc/sys/kernel/core_uses_pid: $(cat /proc/sys/kernel/core_uses_pid) echo cat /proc/sys/kernel/coredump_filter: $(cat /proc/sys/kernel/coredump_filter) - echo Looking around for any Linux dump.. - # Depending on distro/configuration, the core files may either be named "core" # or "core." by default. We read /proc/sys/kernel/core_uses_pid to # determine which it is. @@ -248,23 +202,47 @@ if [[ "$(uname -s)" == "Linux" && $test_exitcode -ne 0 ]]; then if [[ -e /proc/sys/kernel/core_uses_pid && "1" == $(cat /proc/sys/kernel/core_uses_pid) ]]; then core_name_uses_pid=1 fi + + # The osx dumps are too large to egress the machine + echo Looking around for any Linux dumps... if [[ "$core_name_uses_pid" == "1" ]]; then # We don't know what the PID of the process was, so let's look at all core # files whose name matches core.NUMBER - echo Looking for files matching core.* ... - for f in core.*; do - [[ $f =~ core.[0-9]+ ]] && print_info_from_core_file "$f" "dotnet" && copy_core_file_to_temp_location "$f" && rm "$f" + echo "Looking for files matching core.* ..." + for f in $(find . -name "core.*"); do + [[ $f =~ core.[0-9]+ ]] && move_core_file_to_temp_location "$f" done - elif [ -f core ]; then - echo found a dump named core in $EXECUTION_DIR ! - print_info_from_core_file "core" "dotnet" - copy_core_file_to_temp_location "core" - rm "core" - else - echo ... found no dump in $PWD fi + + if [ -f core ]; then + move_core_file_to_temp_location "core" + fi +fi + +if [ -n "$HELIX_WORKITEM_PAYLOAD" ]; then + # For abrupt failures, in Helix, dump some of the kernel log, in case there is a hint + if [[ $test_exitcode -ne 1 ]]; then + dmesg | tail -50 + fi + fi + +if [[ -z "$__IsXUnitLogCheckerSupported" ]]; then + echo "The '__IsXUnitLogCheckerSupported' env var is not set." +elif [[ "$__IsXUnitLogCheckerSupported" != "1" ]]; then + echo "XUnitLogChecker not supported for this test case. Skipping." +else + echo ----- start =============== XUnitLogChecker Output ===================================================== + + invoke_xunitlogchecker "$HELIX_DUMP_FOLDER" + + if [[ $xunitlogchecker_exit_code -ne 0 ]]; then + test_exitcode=$xunitlogchecker_exit_code + fi + echo ----- end =============== XUnitLogChecker Output - exit code $xunitlogchecker_exit_code =========================== +fi + popd >/dev/null # ======================== END Core File Inspection ========================== # The helix work item should not exit with non-zero if tests ran and produced results diff --git a/eng/testing/gen-debug-dump-docs.py b/eng/testing/gen-debug-dump-docs.py index 04f860b031452..788e382ae16e2 100644 --- a/eng/testing/gen-debug-dump-docs.py +++ b/eng/testing/gen-debug-dump-docs.py @@ -2,6 +2,11 @@ import sys import platform +scriptname = os.path.basename(__file__) + +def print_detail(str): + print(f"{scriptname}: {str}") + build_id = '' job_id = '' workitem = '' @@ -16,7 +21,7 @@ idx += 1 if arg == '-buildid': if idx >= args_len or sys.argv[idx].startswith('-'): - print("Must specify a value for -buildid") + print_detail("Must specify a value for -buildid") exit(1) build_id = sys.argv[idx] @@ -24,7 +29,7 @@ if arg == '-jobid': if idx >= args_len or sys.argv[idx].startswith('-'): - print("Must specify a value for -jobid") + print_detail("Must specify a value for -jobid") exit(1) job_id = sys.argv[idx] @@ -32,7 +37,7 @@ if arg == '-workitem': if idx >= args_len or sys.argv[idx].startswith('-'): - print("Must specify a value for -workitem") + print_detail("Must specify a value for -workitem") exit(1) workitem = sys.argv[idx] @@ -40,7 +45,7 @@ if arg == '-templatedir': if idx >= args_len or sys.argv[idx].startswith('-'): - print("Must specify a value for -templatedir") + print_detail("Must specify a value for -templatedir") exit(1) template_dir = sys.argv[idx] @@ -48,7 +53,7 @@ if arg == '-outdir': if idx >= args_len or sys.argv[idx].startswith('-'): - print("Must specify a value for -outdir") + print_detail("Must specify a value for -outdir") exit(1) out_dir = sys.argv[idx] @@ -56,7 +61,7 @@ if arg == '-dumpdir': if idx >= args_len or sys.argv[idx].startswith('-'): - print("Must specify a value for -dumpdir") + print_detail("Must specify a value for -dumpdir") exit(1) dump_dir = sys.argv[idx] @@ -64,7 +69,7 @@ if arg == '-productver': if idx >= args_len or sys.argv[idx].startswith('-'): - print("Must specify a value for -productver") + print_detail("Must specify a value for -productver") exit(1) product_ver = sys.argv[idx] @@ -77,23 +82,23 @@ dump_names.append(filename) if len(dump_names) == 0: - print("Did not find dumps, skipping dump docs generation.") + print_detail("Did not find dumps, skipping dump docs generation.") exit(0) if build_id == '': - print("ERROR: unespecified required argument -buildid") + print_detail("ERROR: unespecified required argument -buildid") exit(1) if workitem == '': - print("ERROR: unespecified required argument -workitem") + print_detail("ERROR: unespecified required argument -workitem") exit(1) if job_id == '': - print("ERROR: unespecified required argument -jobid") + print_detail("ERROR: unespecified required argument -jobid") exit(1) if product_ver == '': - print("ERROR: unespecified required argument -productver") + print_detail("ERROR: unespecified required argument -productver") exit(1) replace_string = '' @@ -104,7 +109,7 @@ with open(source_file, 'r') as f: file_text = f.read() - print('read file: ' + source_file) + print_detail('read file: ' + source_file) replace_string = file_text.replace('%JOBID%', job_id) replace_string = replace_string.replace('%WORKITEM%', workitem) @@ -115,7 +120,7 @@ output_file = out_dir + dir_separator + 'how-to-debug-dump.md' with open(output_file, 'w+') as output: - print('writing output file: ' + output_file) + print_detail('writing output file: ' + output_file) lines = replace_string.split(os.linesep) lin_dump_dir= workitem + "/workitems/" + workitem + "/" @@ -133,4 +138,4 @@ else: output.write(line + os.linesep) -print('done writing debug dump information') +print_detail('done writing debug dump information') diff --git a/src/libraries/sendtohelixhelp.proj b/src/libraries/sendtohelixhelp.proj index f772d60d2b177..82f9490f25cd9 100644 --- a/src/libraries/sendtohelixhelp.proj +++ b/src/libraries/sendtohelixhelp.proj @@ -175,7 +175,6 @@ - @@ -233,6 +232,24 @@ + + <_ShouldSetXUnitLogCheckerEnvVar>1 + + $(HelixPreCommands); + set __TestArchitecture=$(TargetArchitecture); + set __IsXUnitLogCheckerSupported=$(_ShouldSetXUnitLogCheckerEnvVar) + + + $(HelixPreCommands); + export __TestArchitecture=$(TargetArchitecture); + export __IsXUnitLogCheckerSupported=$(_ShouldSetXUnitLogCheckerEnvVar) + + + + + + + - diff --git a/src/libraries/tests.proj b/src/libraries/tests.proj index ad3ac69f49b88..f0308552bad3d 100644 --- a/src/libraries/tests.proj +++ b/src/libraries/tests.proj @@ -703,6 +703,12 @@ BuildInParallel="$(Samples_BuildInParallel)" /> + + + + DateTime.Compare(File.GetCreationTime(dmp), testRunDateTime) >= 0); } else { - dumpsFound = Directory.GetFiles(s_configuration.DumpsPath, - "*coredump*.dmp"); + dumpsFound = Directory.GetFiles(s_configuration.DumpsPath, "*.dmp"); } if (dumpsFound.Count() == 0) @@ -510,7 +509,7 @@ static void PrintStackTracesFromDumps(string testLogPath) { if (OperatingSystem.IsWindows()) { - WriteLineTimestamp("Reading crash dump '{dumpPath}'..."); + WriteLineTimestamp($"Reading crash dump '{dumpPath}'..."); WriteLineTimestamp("Stack Trace Found:\n"); CoreclrTestWrapperLib.TryPrintStackTraceFromDmp(dumpPath, @@ -527,7 +526,7 @@ static void PrintStackTracesFromDumps(string testLogPath) continue; } - WriteLineTimestamp("Reading crash report '{crashReportPath}'..."); + WriteLineTimestamp($"Reading crash report '{crashReportPath}'..."); WriteLineTimestamp("Stack Trace Found:\n"); CoreclrTestWrapperLib.TryPrintStackTraceFromCrashReport(crashReportPath,