From 719415db9b280a3bb53d0fdf3e94e780790284b7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Carlos=20S=C3=A1nchez=20L=C3=B3pez?=
<1175054+carlossanlop@users.noreply.github.com>
Date: Tue, 14 Nov 2023 20:09:31 -0800
Subject: [PATCH] Add XUnitLogChecker to log libraries dumps (#93906)
Libraries CI failures that generate dumps will now show the dump output in the console log itself.
---
Directory.Build.props | 6 +-
.../libraries/prepare-for-bin-publish.yml | 1 +
eng/testing/RunnerTemplate.cmd | 35 ++++
eng/testing/RunnerTemplate.sh | 178 ++++++++----------
eng/testing/gen-debug-dump-docs.py | 35 ++--
src/libraries/sendtohelixhelp.proj | 20 +-
src/libraries/tests.proj | 6 +
.../CoreclrTestWrapperLib.cs | 2 +-
.../Common/XUnitLogChecker/XUnitLogChecker.cs | 9 +-
9 files changed, 168 insertions(+), 124 deletions(-)
diff --git a/Directory.Build.props b/Directory.Build.props
index 89b427fba10fa..4589267c02124 100644
--- a/Directory.Build.props
+++ b/Directory.Build.props
@@ -180,7 +180,6 @@
$([MSBuild]::NormalizePath('$(TestExclusionListTasksDir)', 'TestExclusionListTasks.dll'))
$([MSBuild]::NormalizeDirectory('$(ArtifactsBinDir)', 'coreclr', '$(TargetOS).$(TargetArchitecture).$(RuntimeConfiguration)'))
$(CoreCLRToolPath)
-
$(WASMTIME_PATH)
$([MSBuild]::NormalizeDirectory($(ArtifactsObjDir), 'wasmtime'))
true
@@ -332,6 +331,11 @@
true
+
+
+ false
+ true
+ $([MSBuild]::NormalizeDirectory('$(ArtifactsBinDir)', 'XUnitLogChecker'))
diff --git a/eng/pipelines/libraries/prepare-for-bin-publish.yml b/eng/pipelines/libraries/prepare-for-bin-publish.yml
index fbdb90f18716b..7ea6630fa51cd 100644
--- a/eng/pipelines/libraries/prepare-for-bin-publish.yml
+++ b/eng/pipelines/libraries/prepare-for-bin-publish.yml
@@ -14,6 +14,7 @@ steps:
ref/**
runtime/**
testhost/**
+ XUnitLogChecker/**
- task: CopyFiles@2
displayName: Prepare bin folders to publish
diff --git a/eng/testing/RunnerTemplate.cmd b/eng/testing/RunnerTemplate.cmd
index 4f4646a72315d..fb877e215bb0f 100644
--- a/eng/testing/RunnerTemplate.cmd
+++ b/eng/testing/RunnerTemplate.cmd
@@ -92,6 +92,41 @@ if %_exit_code%==1 (
)
)
)
+
+if NOT "%__IsXUnitLogCheckerSupported%"=="1" (
+ echo XUnitLogChecker not supported for this test case. Skipping.
+ GOTO SKIP_XUNITLOGCHECKER
+)
+
+echo ----- start =============== XUnitLogChecker Output =====================================================
+
+set DOTNET_EXE=%RUNTIME_PATH%\dotnet.exe
+set XUNITLOGCHECKER_DLL=%HELIX_CORRELATION_PAYLOAD%\XUnitLogChecker.dll
+set XUNITLOGCHECKER_COMMAND=%DOTNET_EXE% --roll-forward Major %XUNITLOGCHECKER_DLL% --dumps-path %HELIX_DUMP_FOLDER%
+set XUNITLOGCHECKER_EXIT_CODE=1
+
+if NOT EXIST %DOTNET_EXE% (
+ echo dotnet.exe does not exist in the expected location: %DOTNET_EXE%
+ GOTO XUNITLOGCHECKER_END
+) else if NOT EXIST %XUNITLOGCHECKER_DLL% (
+ echo XUnitLogChecker.dll does not exist in the expected location: %XUNITLOGCHECKER_DLL%
+ GOTO XUNITLOGCHECKER_END
+)
+
+echo %XUNITLOGCHECKER_COMMAND%
+%XUNITLOGCHECKER_COMMAND%
+set XUNITLOGCHECKER_EXIT_CODE=%ERRORLEVEL%
+
+:XUNITLOGCHECKER_END
+
+if %XUNITLOGCHECKER_EXIT_CODE% NEQ 0 (
+ set _exit_code=%XUNITLOGCHECKER_EXIT_CODE%
+)
+
+echo ----- end =============== XUnitLogChecker Output - exit code %XUNITLOGCHECKER_EXIT_CODE% ===============
+
+:SKIP_XUNITLOGCHECKER
+
exit /b %_exit_code%
:: ========================= END Test Execution =================================
diff --git a/eng/testing/RunnerTemplate.sh b/eng/testing/RunnerTemplate.sh
index 9252c30576ed9..ef19d6c95c074 100644
--- a/eng/testing/RunnerTemplate.sh
+++ b/eng/testing/RunnerTemplate.sh
@@ -54,80 +54,60 @@ exitcode_list[131]="SIGQUIT Ctrl-\ occurred. Core dumped."
exitcode_list[132]="SIGILL Illegal Instruction. Core dumped. Likely codegen issue."
exitcode_list[133]="SIGTRAP Breakpoint hit. Core dumped."
exitcode_list[134]="SIGABRT Abort. Managed or native assert, or runtime check such as heap corruption, caused call to abort(). Core dumped."
-exitcode_list[135]="IGBUS Unaligned memory access. Core dumped."
+exitcode_list[135]="IGBUS Unaligned memory access. Core dumped."
exitcode_list[136]="SIGFPE Bad floating point arguments. Core dumped."
exitcode_list[137]="SIGKILL Killed eg by kill"
exitcode_list[139]="SIGSEGV Illegal memory access. Deref invalid pointer, overrunning buffer, stack overflow etc. Core dumped."
exitcode_list[143]="SIGTERM Terminated. Usually before SIGKILL."
exitcode_list[159]="SIGSYS Bad System Call."
-function print_info_from_core_file_using_lldb {
+function move_core_file_to_temp_location {
local core_file_name=$1
- local executable_name=$2
- local plugin_path_name="$RUNTIME_PATH/shared/Microsoft.NETCore.App/9.9.9/libsosplugin.so"
- # check for existence of lldb on the path
- hash lldb 2>/dev/null || { echo >&2 "lldb was not found. Unable to print core file."; return; }
+ # Append the dmp extension to ensure XUnitLogChecker finds it
+ local new_location=$HELIX_DUMP_FOLDER/$core_file_name.dmp
- # pe, clrstack, and dumpasync are defined in libsosplugin.so
- if [ ! -f $plugin_path_name ]; then
- echo $plugin_path_name cannot be found.
- return
- fi
-
- echo ----- start =============== lldb Output =====================================================
- echo Printing managed exceptions, managed call stacks, and async state machines.
- lldb -O "settings set target.exec-search-paths $RUNTIME_PATH" -o "plugin load $plugin_path_name" -o "clrthreads -managedexception" -o "pe -nested" -o "clrstack -all -a -f" -o "dumpasync -fields -stacks -roots" -o "quit" --core $core_file_name $executable_name
- echo ----- end =============== lldb Output =======================================================
-}
-
-function print_info_from_core_file_using_gdb {
- local core_file_name=$1
- local executable_name=$2
-
- # Check for the existence of GDB on the path
- hash gdb 2>/dev/null || { echo >&2 "GDB was not found. Unable to print core file."; return; }
+ echo "Copying dump file '$core_file_name' to '$new_location'"
+ cp $core_file_name $new_location
- echo ----- start =============== GDB Output =====================================================
- # Open the dump in GDB and print the stack from each thread. We can add more
- # commands here if desired.
- echo printing native stack.
- gdb --batch -ex "thread apply all bt full" -ex "quit" $executable_name $core_file_name
- echo ----- end =============== GDB Output =======================================================
+ # Delete the old one
+ rm $core_file_name
}
-function print_info_from_core_file {
- local core_file_name=$1
- local executable_name=$RUNTIME_PATH/$2
-
- if ! [ -e $executable_name ]; then
- echo "Unable to find executable $executable_name"
- return
- elif ! [ -e $core_file_name ]; then
- echo "Unable to find core file $core_file_name"
- return
+xunitlogchecker_exit_code=0
+function invoke_xunitlogchecker {
+ local dump_folder=$1
+
+ total_dumps=$(find $dump_folder -name "*.dmp" | wc -l)
+
+ if [[ $total_dumps > 0 ]]; then
+ echo "Total dumps found in $dump_folder: $total_dumps"
+ xunitlogchecker_file_name="$HELIX_CORRELATION_PAYLOAD/XUnitLogChecker.dll"
+ dotnet_file_name="$RUNTIME_PATH/dotnet"
+
+ if [[ ! -f $dotnet_file_name ]]; then
+ echo "'$dotnet_file_name' was not found. Unable to run XUnitLogChecker."
+ xunitlogchecker_exit_code=1
+ elif [[ ! -f $xunitlogchecker_file_name ]]; then
+ echo "'$xunitlogchecker_file_name' was not found. Unable to print dump file contents."
+ xunitlogchecker_exit_code=2
+ elif [[ ! -d $dump_folder ]]; then
+ echo "The dump directory '$dump_folder' does not exist."
+ else
+ echo "Executing XUnitLogChecker in $dump_folder..."
+ cmd="$dotnet_file_name --roll-forward Major $xunitlogchecker_file_name --dumps-path $dump_folder"
+ echo "$cmd"
+ $cmd
+ xunitlogchecker_exit_code=$?
+ fi
+ else
+ echo "No dumps found in $dump_folder."
fi
- echo "Printing info from core file $core_file_name"
- print_info_from_core_file_using_gdb $core_file_name $executable_name
- print_info_from_core_file_using_lldb $core_file_name $executable_name
-}
-
-function copy_core_file_to_temp_location {
- local core_file_name=$1
-
- local storage_location="/tmp/coredumps"
-
- # Create the directory (this shouldn't fail even if it already exists).
- mkdir -p $storage_location
-
- local new_location=$storage_location/core.$RANDOM
-
- echo "Copying core file $core_file_name to $new_location in case you need it."
- cp $core_file_name $new_location
}
# ========================= BEGIN Core File Setup ============================
-if [[ "$(uname -s)" == "Darwin" ]]; then
+system_name="$(uname -s)"
+if [[ $system_name == "Darwin" ]]; then
# On OS X, we will enable core dump generation only if there are no core
# files already in /cores/ at this point. This is being done to prevent
# inadvertently flooding the CI machines with dumps.
@@ -140,21 +120,11 @@ if [[ "$(uname -s)" == "Darwin" ]]; then
# https://github.com/dotnet/core-eng/issues/15597
ulimit -c 0
fi
-
-elif [[ "$(uname -s)" == "Linux" ]]; then
- # On Linux, we'll enable core file generation unconditionally, and if a dump
- # is generated, we will print some useful information from it and delete the
- # dump immediately.
-
- if [ -e /proc/self/coredump_filter ]; then
- # Include memory in private and shared file-backed mappings in the dump.
- # This ensures that we can see disassembly from our shared libraries when
- # inspecting the contents of the dump. See 'man core' for details.
- echo -n 0x3F > /proc/self/coredump_filter
- fi
-
- ulimit -c unlimited
fi
+
+export DOTNET_DbgEnableMiniDump=1
+export DOTNET_EnableCrashReport=1
+export DOTNET_DbgMiniDumpName=$HELIX_DUMP_FOLDER/coredump.%d.dmp
# ========================= END Core File Setup ==============================
# ========================= BEGIN support for SuperPMI collection ==============================
@@ -171,7 +141,7 @@ if [ ! -z $spmi_enable_collection ]; then
fi
mkdir -p $spmi_collect_dir
export spmi_file_extension=so
- if [[ "$(uname -s)" == "Darwin" ]]; then
+ if [[ $system_name == "Darwin" ]]; then
export spmi_file_extension=dylib
fi
export SuperPMIShimLogPath=$spmi_collect_dir
@@ -220,27 +190,11 @@ if [[ $test_exitcode -ne 0 ]]; then
echo ulimit -c value: $(ulimit -c)
fi
-if [[ "$(uname -s)" == "Linux" && $test_exitcode -ne 0 ]]; then
- if [ -n "$HELIX_WORKITEM_PAYLOAD" ]; then
-
- # For abrupt failures, in Helix, dump some of the kernel log, in case there is a hint
- if [[ $test_exitcode -ne 1 ]]; then
- dmesg | tail -50
- fi
-
- have_sleep=$(which sleep)
- if [ -x "$have_sleep" ]; then
- echo Waiting a few seconds for any dump to be written..
- sleep 10s
- fi
- fi
-
+if [[ $system_name == "Linux" && $test_exitcode -ne 0 ]]; then
echo cat /proc/sys/kernel/core_pattern: $(cat /proc/sys/kernel/core_pattern)
echo cat /proc/sys/kernel/core_uses_pid: $(cat /proc/sys/kernel/core_uses_pid)
echo cat /proc/sys/kernel/coredump_filter: $(cat /proc/sys/kernel/coredump_filter)
- echo Looking around for any Linux dump..
-
# Depending on distro/configuration, the core files may either be named "core"
# or "core." by default. We read /proc/sys/kernel/core_uses_pid to
# determine which it is.
@@ -248,23 +202,47 @@ if [[ "$(uname -s)" == "Linux" && $test_exitcode -ne 0 ]]; then
if [[ -e /proc/sys/kernel/core_uses_pid && "1" == $(cat /proc/sys/kernel/core_uses_pid) ]]; then
core_name_uses_pid=1
fi
+
+ # The osx dumps are too large to egress the machine
+ echo Looking around for any Linux dumps...
if [[ "$core_name_uses_pid" == "1" ]]; then
# We don't know what the PID of the process was, so let's look at all core
# files whose name matches core.NUMBER
- echo Looking for files matching core.* ...
- for f in core.*; do
- [[ $f =~ core.[0-9]+ ]] && print_info_from_core_file "$f" "dotnet" && copy_core_file_to_temp_location "$f" && rm "$f"
+ echo "Looking for files matching core.* ..."
+ for f in $(find . -name "core.*"); do
+ [[ $f =~ core.[0-9]+ ]] && move_core_file_to_temp_location "$f"
done
- elif [ -f core ]; then
- echo found a dump named core in $EXECUTION_DIR !
- print_info_from_core_file "core" "dotnet"
- copy_core_file_to_temp_location "core"
- rm "core"
- else
- echo ... found no dump in $PWD
fi
+
+ if [ -f core ]; then
+ move_core_file_to_temp_location "core"
+ fi
+fi
+
+if [ -n "$HELIX_WORKITEM_PAYLOAD" ]; then
+ # For abrupt failures, in Helix, dump some of the kernel log, in case there is a hint
+ if [[ $test_exitcode -ne 1 ]]; then
+ dmesg | tail -50
+ fi
+
fi
+
+if [[ -z "$__IsXUnitLogCheckerSupported" ]]; then
+ echo "The '__IsXUnitLogCheckerSupported' env var is not set."
+elif [[ "$__IsXUnitLogCheckerSupported" != "1" ]]; then
+ echo "XUnitLogChecker not supported for this test case. Skipping."
+else
+ echo ----- start =============== XUnitLogChecker Output =====================================================
+
+ invoke_xunitlogchecker "$HELIX_DUMP_FOLDER"
+
+ if [[ $xunitlogchecker_exit_code -ne 0 ]]; then
+ test_exitcode=$xunitlogchecker_exit_code
+ fi
+ echo ----- end =============== XUnitLogChecker Output - exit code $xunitlogchecker_exit_code ===========================
+fi
+
popd >/dev/null
# ======================== END Core File Inspection ==========================
# The helix work item should not exit with non-zero if tests ran and produced results
diff --git a/eng/testing/gen-debug-dump-docs.py b/eng/testing/gen-debug-dump-docs.py
index 04f860b031452..788e382ae16e2 100644
--- a/eng/testing/gen-debug-dump-docs.py
+++ b/eng/testing/gen-debug-dump-docs.py
@@ -2,6 +2,11 @@
import sys
import platform
+scriptname = os.path.basename(__file__)
+
+def print_detail(str):
+ print(f"{scriptname}: {str}")
+
build_id = ''
job_id = ''
workitem = ''
@@ -16,7 +21,7 @@
idx += 1
if arg == '-buildid':
if idx >= args_len or sys.argv[idx].startswith('-'):
- print("Must specify a value for -buildid")
+ print_detail("Must specify a value for -buildid")
exit(1)
build_id = sys.argv[idx]
@@ -24,7 +29,7 @@
if arg == '-jobid':
if idx >= args_len or sys.argv[idx].startswith('-'):
- print("Must specify a value for -jobid")
+ print_detail("Must specify a value for -jobid")
exit(1)
job_id = sys.argv[idx]
@@ -32,7 +37,7 @@
if arg == '-workitem':
if idx >= args_len or sys.argv[idx].startswith('-'):
- print("Must specify a value for -workitem")
+ print_detail("Must specify a value for -workitem")
exit(1)
workitem = sys.argv[idx]
@@ -40,7 +45,7 @@
if arg == '-templatedir':
if idx >= args_len or sys.argv[idx].startswith('-'):
- print("Must specify a value for -templatedir")
+ print_detail("Must specify a value for -templatedir")
exit(1)
template_dir = sys.argv[idx]
@@ -48,7 +53,7 @@
if arg == '-outdir':
if idx >= args_len or sys.argv[idx].startswith('-'):
- print("Must specify a value for -outdir")
+ print_detail("Must specify a value for -outdir")
exit(1)
out_dir = sys.argv[idx]
@@ -56,7 +61,7 @@
if arg == '-dumpdir':
if idx >= args_len or sys.argv[idx].startswith('-'):
- print("Must specify a value for -dumpdir")
+ print_detail("Must specify a value for -dumpdir")
exit(1)
dump_dir = sys.argv[idx]
@@ -64,7 +69,7 @@
if arg == '-productver':
if idx >= args_len or sys.argv[idx].startswith('-'):
- print("Must specify a value for -productver")
+ print_detail("Must specify a value for -productver")
exit(1)
product_ver = sys.argv[idx]
@@ -77,23 +82,23 @@
dump_names.append(filename)
if len(dump_names) == 0:
- print("Did not find dumps, skipping dump docs generation.")
+ print_detail("Did not find dumps, skipping dump docs generation.")
exit(0)
if build_id == '':
- print("ERROR: unespecified required argument -buildid")
+ print_detail("ERROR: unespecified required argument -buildid")
exit(1)
if workitem == '':
- print("ERROR: unespecified required argument -workitem")
+ print_detail("ERROR: unespecified required argument -workitem")
exit(1)
if job_id == '':
- print("ERROR: unespecified required argument -jobid")
+ print_detail("ERROR: unespecified required argument -jobid")
exit(1)
if product_ver == '':
- print("ERROR: unespecified required argument -productver")
+ print_detail("ERROR: unespecified required argument -productver")
exit(1)
replace_string = ''
@@ -104,7 +109,7 @@
with open(source_file, 'r') as f:
file_text = f.read()
- print('read file: ' + source_file)
+ print_detail('read file: ' + source_file)
replace_string = file_text.replace('%JOBID%', job_id)
replace_string = replace_string.replace('%WORKITEM%', workitem)
@@ -115,7 +120,7 @@
output_file = out_dir + dir_separator + 'how-to-debug-dump.md'
with open(output_file, 'w+') as output:
- print('writing output file: ' + output_file)
+ print_detail('writing output file: ' + output_file)
lines = replace_string.split(os.linesep)
lin_dump_dir= workitem + "/workitems/" + workitem + "/"
@@ -133,4 +138,4 @@
else:
output.write(line + os.linesep)
-print('done writing debug dump information')
+print_detail('done writing debug dump information')
diff --git a/src/libraries/sendtohelixhelp.proj b/src/libraries/sendtohelixhelp.proj
index f772d60d2b177..82f9490f25cd9 100644
--- a/src/libraries/sendtohelixhelp.proj
+++ b/src/libraries/sendtohelixhelp.proj
@@ -175,7 +175,6 @@
-
@@ -233,6 +232,24 @@
+
+ <_ShouldSetXUnitLogCheckerEnvVar>1
+
+ $(HelixPreCommands);
+ set __TestArchitecture=$(TargetArchitecture);
+ set __IsXUnitLogCheckerSupported=$(_ShouldSetXUnitLogCheckerEnvVar)
+
+
+ $(HelixPreCommands);
+ export __TestArchitecture=$(TargetArchitecture);
+ export __IsXUnitLogCheckerSupported=$(_ShouldSetXUnitLogCheckerEnvVar)
+
+
+
+
+
+
+
-
diff --git a/src/libraries/tests.proj b/src/libraries/tests.proj
index ad3ac69f49b88..f0308552bad3d 100644
--- a/src/libraries/tests.proj
+++ b/src/libraries/tests.proj
@@ -703,6 +703,12 @@
BuildInParallel="$(Samples_BuildInParallel)" />
+
+
+
+
DateTime.Compare(File.GetCreationTime(dmp),
testRunDateTime) >= 0);
}
else
{
- dumpsFound = Directory.GetFiles(s_configuration.DumpsPath,
- "*coredump*.dmp");
+ dumpsFound = Directory.GetFiles(s_configuration.DumpsPath, "*.dmp");
}
if (dumpsFound.Count() == 0)
@@ -510,7 +509,7 @@ static void PrintStackTracesFromDumps(string testLogPath)
{
if (OperatingSystem.IsWindows())
{
- WriteLineTimestamp("Reading crash dump '{dumpPath}'...");
+ WriteLineTimestamp($"Reading crash dump '{dumpPath}'...");
WriteLineTimestamp("Stack Trace Found:\n");
CoreclrTestWrapperLib.TryPrintStackTraceFromDmp(dumpPath,
@@ -527,7 +526,7 @@ static void PrintStackTracesFromDumps(string testLogPath)
continue;
}
- WriteLineTimestamp("Reading crash report '{crashReportPath}'...");
+ WriteLineTimestamp($"Reading crash report '{crashReportPath}'...");
WriteLineTimestamp("Stack Trace Found:\n");
CoreclrTestWrapperLib.TryPrintStackTraceFromCrashReport(crashReportPath,