From e58f897a1466bde099cef6d59d1890315f478a42 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20S=C3=A1nchez=20L=C3=B3pez?= <1175054+carlossanlop@users.noreply.github.com> Date: Wed, 15 Nov 2023 13:08:41 -0800 Subject: [PATCH] Revert "Add XUnitLogChecker to log libraries dumps (#93906)" (#94807) This reverts commit d561f390b8085ffed309c49cf30ec205a47868c0. --- Directory.Build.props | 6 +- .../libraries/prepare-for-bin-publish.yml | 1 - eng/testing/RunnerTemplate.cmd | 35 ---- eng/testing/RunnerTemplate.sh | 178 ++++++++++-------- eng/testing/gen-debug-dump-docs.py | 35 ++-- src/libraries/sendtohelixhelp.proj | 20 +- src/libraries/tests.proj | 6 - .../CoreclrTestWrapperLib.cs | 2 +- .../Common/XUnitLogChecker/XUnitLogChecker.cs | 9 +- 9 files changed, 124 insertions(+), 168 deletions(-) diff --git a/Directory.Build.props b/Directory.Build.props index 4589267c02124..89b427fba10fa 100644 --- a/Directory.Build.props +++ b/Directory.Build.props @@ -180,6 +180,7 @@ $([MSBuild]::NormalizePath('$(TestExclusionListTasksDir)', 'TestExclusionListTasks.dll')) $([MSBuild]::NormalizeDirectory('$(ArtifactsBinDir)', 'coreclr', '$(TargetOS).$(TargetArchitecture).$(RuntimeConfiguration)')) $(CoreCLRToolPath) + $(WASMTIME_PATH) $([MSBuild]::NormalizeDirectory($(ArtifactsObjDir), 'wasmtime')) true @@ -331,11 +332,6 @@ true - - - false - true - $([MSBuild]::NormalizeDirectory('$(ArtifactsBinDir)', 'XUnitLogChecker')) diff --git a/eng/pipelines/libraries/prepare-for-bin-publish.yml b/eng/pipelines/libraries/prepare-for-bin-publish.yml index 7ea6630fa51cd..fbdb90f18716b 100644 --- a/eng/pipelines/libraries/prepare-for-bin-publish.yml +++ b/eng/pipelines/libraries/prepare-for-bin-publish.yml @@ -14,7 +14,6 @@ steps: ref/** runtime/** testhost/** - XUnitLogChecker/** - task: CopyFiles@2 displayName: Prepare bin folders to publish diff --git a/eng/testing/RunnerTemplate.cmd b/eng/testing/RunnerTemplate.cmd index fb877e215bb0f..4f4646a72315d 100644 --- a/eng/testing/RunnerTemplate.cmd +++ b/eng/testing/RunnerTemplate.cmd @@ -92,41 +92,6 @@ if %_exit_code%==1 ( ) ) ) - -if NOT "%__IsXUnitLogCheckerSupported%"=="1" ( - echo XUnitLogChecker not supported for this test case. Skipping. - GOTO SKIP_XUNITLOGCHECKER -) - -echo ----- start =============== XUnitLogChecker Output ===================================================== - -set DOTNET_EXE=%RUNTIME_PATH%\dotnet.exe -set XUNITLOGCHECKER_DLL=%HELIX_CORRELATION_PAYLOAD%\XUnitLogChecker.dll -set XUNITLOGCHECKER_COMMAND=%DOTNET_EXE% --roll-forward Major %XUNITLOGCHECKER_DLL% --dumps-path %HELIX_DUMP_FOLDER% -set XUNITLOGCHECKER_EXIT_CODE=1 - -if NOT EXIST %DOTNET_EXE% ( - echo dotnet.exe does not exist in the expected location: %DOTNET_EXE% - GOTO XUNITLOGCHECKER_END -) else if NOT EXIST %XUNITLOGCHECKER_DLL% ( - echo XUnitLogChecker.dll does not exist in the expected location: %XUNITLOGCHECKER_DLL% - GOTO XUNITLOGCHECKER_END -) - -echo %XUNITLOGCHECKER_COMMAND% -%XUNITLOGCHECKER_COMMAND% -set XUNITLOGCHECKER_EXIT_CODE=%ERRORLEVEL% - -:XUNITLOGCHECKER_END - -if %XUNITLOGCHECKER_EXIT_CODE% NEQ 0 ( - set _exit_code=%XUNITLOGCHECKER_EXIT_CODE% -) - -echo ----- end =============== XUnitLogChecker Output - exit code %XUNITLOGCHECKER_EXIT_CODE% =============== - -:SKIP_XUNITLOGCHECKER - exit /b %_exit_code% :: ========================= END Test Execution ================================= diff --git a/eng/testing/RunnerTemplate.sh b/eng/testing/RunnerTemplate.sh index ef19d6c95c074..9252c30576ed9 100644 --- a/eng/testing/RunnerTemplate.sh +++ b/eng/testing/RunnerTemplate.sh @@ -54,60 +54,80 @@ exitcode_list[131]="SIGQUIT Ctrl-\ occurred. Core dumped." exitcode_list[132]="SIGILL Illegal Instruction. Core dumped. Likely codegen issue." exitcode_list[133]="SIGTRAP Breakpoint hit. Core dumped." exitcode_list[134]="SIGABRT Abort. Managed or native assert, or runtime check such as heap corruption, caused call to abort(). Core dumped." -exitcode_list[135]="IGBUS Unaligned memory access. Core dumped." +exitcode_list[135]="IGBUS Unaligned memory access. Core dumped." exitcode_list[136]="SIGFPE Bad floating point arguments. Core dumped." exitcode_list[137]="SIGKILL Killed eg by kill" exitcode_list[139]="SIGSEGV Illegal memory access. Deref invalid pointer, overrunning buffer, stack overflow etc. Core dumped." exitcode_list[143]="SIGTERM Terminated. Usually before SIGKILL." exitcode_list[159]="SIGSYS Bad System Call." -function move_core_file_to_temp_location { +function print_info_from_core_file_using_lldb { local core_file_name=$1 + local executable_name=$2 + local plugin_path_name="$RUNTIME_PATH/shared/Microsoft.NETCore.App/9.9.9/libsosplugin.so" - # Append the dmp extension to ensure XUnitLogChecker finds it - local new_location=$HELIX_DUMP_FOLDER/$core_file_name.dmp + # check for existence of lldb on the path + hash lldb 2>/dev/null || { echo >&2 "lldb was not found. Unable to print core file."; return; } - echo "Copying dump file '$core_file_name' to '$new_location'" - cp $core_file_name $new_location + # pe, clrstack, and dumpasync are defined in libsosplugin.so + if [ ! -f $plugin_path_name ]; then + echo $plugin_path_name cannot be found. + return + fi - # Delete the old one - rm $core_file_name + echo ----- start =============== lldb Output ===================================================== + echo Printing managed exceptions, managed call stacks, and async state machines. + lldb -O "settings set target.exec-search-paths $RUNTIME_PATH" -o "plugin load $plugin_path_name" -o "clrthreads -managedexception" -o "pe -nested" -o "clrstack -all -a -f" -o "dumpasync -fields -stacks -roots" -o "quit" --core $core_file_name $executable_name + echo ----- end =============== lldb Output ======================================================= } -xunitlogchecker_exit_code=0 -function invoke_xunitlogchecker { - local dump_folder=$1 - - total_dumps=$(find $dump_folder -name "*.dmp" | wc -l) - - if [[ $total_dumps > 0 ]]; then - echo "Total dumps found in $dump_folder: $total_dumps" - xunitlogchecker_file_name="$HELIX_CORRELATION_PAYLOAD/XUnitLogChecker.dll" - dotnet_file_name="$RUNTIME_PATH/dotnet" - - if [[ ! -f $dotnet_file_name ]]; then - echo "'$dotnet_file_name' was not found. Unable to run XUnitLogChecker." - xunitlogchecker_exit_code=1 - elif [[ ! -f $xunitlogchecker_file_name ]]; then - echo "'$xunitlogchecker_file_name' was not found. Unable to print dump file contents." - xunitlogchecker_exit_code=2 - elif [[ ! -d $dump_folder ]]; then - echo "The dump directory '$dump_folder' does not exist." - else - echo "Executing XUnitLogChecker in $dump_folder..." - cmd="$dotnet_file_name --roll-forward Major $xunitlogchecker_file_name --dumps-path $dump_folder" - echo "$cmd" - $cmd - xunitlogchecker_exit_code=$? - fi - else - echo "No dumps found in $dump_folder." +function print_info_from_core_file_using_gdb { + local core_file_name=$1 + local executable_name=$2 + + # Check for the existence of GDB on the path + hash gdb 2>/dev/null || { echo >&2 "GDB was not found. Unable to print core file."; return; } + + echo ----- start =============== GDB Output ===================================================== + # Open the dump in GDB and print the stack from each thread. We can add more + # commands here if desired. + echo printing native stack. + gdb --batch -ex "thread apply all bt full" -ex "quit" $executable_name $core_file_name + echo ----- end =============== GDB Output ======================================================= +} + +function print_info_from_core_file { + local core_file_name=$1 + local executable_name=$RUNTIME_PATH/$2 + + if ! [ -e $executable_name ]; then + echo "Unable to find executable $executable_name" + return + elif ! [ -e $core_file_name ]; then + echo "Unable to find core file $core_file_name" + return fi + echo "Printing info from core file $core_file_name" + print_info_from_core_file_using_gdb $core_file_name $executable_name + print_info_from_core_file_using_lldb $core_file_name $executable_name +} + +function copy_core_file_to_temp_location { + local core_file_name=$1 + + local storage_location="/tmp/coredumps" + + # Create the directory (this shouldn't fail even if it already exists). + mkdir -p $storage_location + + local new_location=$storage_location/core.$RANDOM + + echo "Copying core file $core_file_name to $new_location in case you need it." + cp $core_file_name $new_location } # ========================= BEGIN Core File Setup ============================ -system_name="$(uname -s)" -if [[ $system_name == "Darwin" ]]; then +if [[ "$(uname -s)" == "Darwin" ]]; then # On OS X, we will enable core dump generation only if there are no core # files already in /cores/ at this point. This is being done to prevent # inadvertently flooding the CI machines with dumps. @@ -120,11 +140,21 @@ if [[ $system_name == "Darwin" ]]; then # https://github.com/dotnet/core-eng/issues/15597 ulimit -c 0 fi -fi -export DOTNET_DbgEnableMiniDump=1 -export DOTNET_EnableCrashReport=1 -export DOTNET_DbgMiniDumpName=$HELIX_DUMP_FOLDER/coredump.%d.dmp +elif [[ "$(uname -s)" == "Linux" ]]; then + # On Linux, we'll enable core file generation unconditionally, and if a dump + # is generated, we will print some useful information from it and delete the + # dump immediately. + + if [ -e /proc/self/coredump_filter ]; then + # Include memory in private and shared file-backed mappings in the dump. + # This ensures that we can see disassembly from our shared libraries when + # inspecting the contents of the dump. See 'man core' for details. + echo -n 0x3F > /proc/self/coredump_filter + fi + + ulimit -c unlimited +fi # ========================= END Core File Setup ============================== # ========================= BEGIN support for SuperPMI collection ============================== @@ -141,7 +171,7 @@ if [ ! -z $spmi_enable_collection ]; then fi mkdir -p $spmi_collect_dir export spmi_file_extension=so - if [[ $system_name == "Darwin" ]]; then + if [[ "$(uname -s)" == "Darwin" ]]; then export spmi_file_extension=dylib fi export SuperPMIShimLogPath=$spmi_collect_dir @@ -190,11 +220,27 @@ if [[ $test_exitcode -ne 0 ]]; then echo ulimit -c value: $(ulimit -c) fi -if [[ $system_name == "Linux" && $test_exitcode -ne 0 ]]; then +if [[ "$(uname -s)" == "Linux" && $test_exitcode -ne 0 ]]; then + if [ -n "$HELIX_WORKITEM_PAYLOAD" ]; then + + # For abrupt failures, in Helix, dump some of the kernel log, in case there is a hint + if [[ $test_exitcode -ne 1 ]]; then + dmesg | tail -50 + fi + + have_sleep=$(which sleep) + if [ -x "$have_sleep" ]; then + echo Waiting a few seconds for any dump to be written.. + sleep 10s + fi + fi + echo cat /proc/sys/kernel/core_pattern: $(cat /proc/sys/kernel/core_pattern) echo cat /proc/sys/kernel/core_uses_pid: $(cat /proc/sys/kernel/core_uses_pid) echo cat /proc/sys/kernel/coredump_filter: $(cat /proc/sys/kernel/coredump_filter) + echo Looking around for any Linux dump.. + # Depending on distro/configuration, the core files may either be named "core" # or "core." by default. We read /proc/sys/kernel/core_uses_pid to # determine which it is. @@ -202,47 +248,23 @@ if [[ $system_name == "Linux" && $test_exitcode -ne 0 ]]; then if [[ -e /proc/sys/kernel/core_uses_pid && "1" == $(cat /proc/sys/kernel/core_uses_pid) ]]; then core_name_uses_pid=1 fi - - # The osx dumps are too large to egress the machine - echo Looking around for any Linux dumps... if [[ "$core_name_uses_pid" == "1" ]]; then # We don't know what the PID of the process was, so let's look at all core # files whose name matches core.NUMBER - echo "Looking for files matching core.* ..." - for f in $(find . -name "core.*"); do - [[ $f =~ core.[0-9]+ ]] && move_core_file_to_temp_location "$f" + echo Looking for files matching core.* ... + for f in core.*; do + [[ $f =~ core.[0-9]+ ]] && print_info_from_core_file "$f" "dotnet" && copy_core_file_to_temp_location "$f" && rm "$f" done + elif [ -f core ]; then + echo found a dump named core in $EXECUTION_DIR ! + print_info_from_core_file "core" "dotnet" + copy_core_file_to_temp_location "core" + rm "core" + else + echo ... found no dump in $PWD fi - - if [ -f core ]; then - move_core_file_to_temp_location "core" - fi -fi - -if [ -n "$HELIX_WORKITEM_PAYLOAD" ]; then - # For abrupt failures, in Helix, dump some of the kernel log, in case there is a hint - if [[ $test_exitcode -ne 1 ]]; then - dmesg | tail -50 - fi - -fi - -if [[ -z "$__IsXUnitLogCheckerSupported" ]]; then - echo "The '__IsXUnitLogCheckerSupported' env var is not set." -elif [[ "$__IsXUnitLogCheckerSupported" != "1" ]]; then - echo "XUnitLogChecker not supported for this test case. Skipping." -else - echo ----- start =============== XUnitLogChecker Output ===================================================== - - invoke_xunitlogchecker "$HELIX_DUMP_FOLDER" - - if [[ $xunitlogchecker_exit_code -ne 0 ]]; then - test_exitcode=$xunitlogchecker_exit_code - fi - echo ----- end =============== XUnitLogChecker Output - exit code $xunitlogchecker_exit_code =========================== fi - popd >/dev/null # ======================== END Core File Inspection ========================== # The helix work item should not exit with non-zero if tests ran and produced results diff --git a/eng/testing/gen-debug-dump-docs.py b/eng/testing/gen-debug-dump-docs.py index 788e382ae16e2..04f860b031452 100644 --- a/eng/testing/gen-debug-dump-docs.py +++ b/eng/testing/gen-debug-dump-docs.py @@ -2,11 +2,6 @@ import sys import platform -scriptname = os.path.basename(__file__) - -def print_detail(str): - print(f"{scriptname}: {str}") - build_id = '' job_id = '' workitem = '' @@ -21,7 +16,7 @@ def print_detail(str): idx += 1 if arg == '-buildid': if idx >= args_len or sys.argv[idx].startswith('-'): - print_detail("Must specify a value for -buildid") + print("Must specify a value for -buildid") exit(1) build_id = sys.argv[idx] @@ -29,7 +24,7 @@ def print_detail(str): if arg == '-jobid': if idx >= args_len or sys.argv[idx].startswith('-'): - print_detail("Must specify a value for -jobid") + print("Must specify a value for -jobid") exit(1) job_id = sys.argv[idx] @@ -37,7 +32,7 @@ def print_detail(str): if arg == '-workitem': if idx >= args_len or sys.argv[idx].startswith('-'): - print_detail("Must specify a value for -workitem") + print("Must specify a value for -workitem") exit(1) workitem = sys.argv[idx] @@ -45,7 +40,7 @@ def print_detail(str): if arg == '-templatedir': if idx >= args_len or sys.argv[idx].startswith('-'): - print_detail("Must specify a value for -templatedir") + print("Must specify a value for -templatedir") exit(1) template_dir = sys.argv[idx] @@ -53,7 +48,7 @@ def print_detail(str): if arg == '-outdir': if idx >= args_len or sys.argv[idx].startswith('-'): - print_detail("Must specify a value for -outdir") + print("Must specify a value for -outdir") exit(1) out_dir = sys.argv[idx] @@ -61,7 +56,7 @@ def print_detail(str): if arg == '-dumpdir': if idx >= args_len or sys.argv[idx].startswith('-'): - print_detail("Must specify a value for -dumpdir") + print("Must specify a value for -dumpdir") exit(1) dump_dir = sys.argv[idx] @@ -69,7 +64,7 @@ def print_detail(str): if arg == '-productver': if idx >= args_len or sys.argv[idx].startswith('-'): - print_detail("Must specify a value for -productver") + print("Must specify a value for -productver") exit(1) product_ver = sys.argv[idx] @@ -82,23 +77,23 @@ def print_detail(str): dump_names.append(filename) if len(dump_names) == 0: - print_detail("Did not find dumps, skipping dump docs generation.") + print("Did not find dumps, skipping dump docs generation.") exit(0) if build_id == '': - print_detail("ERROR: unespecified required argument -buildid") + print("ERROR: unespecified required argument -buildid") exit(1) if workitem == '': - print_detail("ERROR: unespecified required argument -workitem") + print("ERROR: unespecified required argument -workitem") exit(1) if job_id == '': - print_detail("ERROR: unespecified required argument -jobid") + print("ERROR: unespecified required argument -jobid") exit(1) if product_ver == '': - print_detail("ERROR: unespecified required argument -productver") + print("ERROR: unespecified required argument -productver") exit(1) replace_string = '' @@ -109,7 +104,7 @@ def print_detail(str): with open(source_file, 'r') as f: file_text = f.read() - print_detail('read file: ' + source_file) + print('read file: ' + source_file) replace_string = file_text.replace('%JOBID%', job_id) replace_string = replace_string.replace('%WORKITEM%', workitem) @@ -120,7 +115,7 @@ def print_detail(str): output_file = out_dir + dir_separator + 'how-to-debug-dump.md' with open(output_file, 'w+') as output: - print_detail('writing output file: ' + output_file) + print('writing output file: ' + output_file) lines = replace_string.split(os.linesep) lin_dump_dir= workitem + "/workitems/" + workitem + "/" @@ -138,4 +133,4 @@ def print_detail(str): else: output.write(line + os.linesep) -print_detail('done writing debug dump information') +print('done writing debug dump information') diff --git a/src/libraries/sendtohelixhelp.proj b/src/libraries/sendtohelixhelp.proj index 08022b485977e..700a08bdf30bd 100644 --- a/src/libraries/sendtohelixhelp.proj +++ b/src/libraries/sendtohelixhelp.proj @@ -175,6 +175,7 @@ + @@ -232,24 +233,6 @@ - - <_ShouldSetXUnitLogCheckerEnvVar>1 - - $(HelixPreCommands); - set __TestArchitecture=$(TargetArchitecture); - set __IsXUnitLogCheckerSupported=$(_ShouldSetXUnitLogCheckerEnvVar) - - - $(HelixPreCommands); - export __TestArchitecture=$(TargetArchitecture); - export __IsXUnitLogCheckerSupported=$(_ShouldSetXUnitLogCheckerEnvVar) - - - - - - - + diff --git a/src/libraries/tests.proj b/src/libraries/tests.proj index f0308552bad3d..ad3ac69f49b88 100644 --- a/src/libraries/tests.proj +++ b/src/libraries/tests.proj @@ -703,12 +703,6 @@ BuildInParallel="$(Samples_BuildInParallel)" /> - - - - DateTime.Compare(File.GetCreationTime(dmp), testRunDateTime) >= 0); } else { - dumpsFound = Directory.GetFiles(s_configuration.DumpsPath, "*.dmp"); + dumpsFound = Directory.GetFiles(s_configuration.DumpsPath, + "*coredump*.dmp"); } if (dumpsFound.Count() == 0) @@ -509,7 +510,7 @@ static void PrintStackTracesFromDumps(string testLogPath) { if (OperatingSystem.IsWindows()) { - WriteLineTimestamp($"Reading crash dump '{dumpPath}'..."); + WriteLineTimestamp("Reading crash dump '{dumpPath}'..."); WriteLineTimestamp("Stack Trace Found:\n"); CoreclrTestWrapperLib.TryPrintStackTraceFromDmp(dumpPath, @@ -526,7 +527,7 @@ static void PrintStackTracesFromDumps(string testLogPath) continue; } - WriteLineTimestamp($"Reading crash report '{crashReportPath}'..."); + WriteLineTimestamp("Reading crash report '{crashReportPath}'..."); WriteLineTimestamp("Stack Trace Found:\n"); CoreclrTestWrapperLib.TryPrintStackTraceFromCrashReport(crashReportPath,