From 2bd129a29bcf39ce1d0354dc440792766c70037c Mon Sep 17 00:00:00 2001 From: Mischan Toosarani-Hausberger Date: Wed, 22 May 2024 11:24:44 +0200 Subject: [PATCH] fix: correctly support stack overflows across platforms (#982) * fix: stack-overflow handling on Windows * black * docs * add warning exception for recursion example * ignore tests on other platforms * temporary reason to fix pytests * De Morgan * disable sibling call optimization to provoke stack-overflow on Linux * Simplify crash-trigger across gcc/clang for `sentry_example` * Check whether old NDK build can be fixed with dynamic sigaltstack. * Ensure we check not only sigaltstack errors but also invalid stacks * better reflect the two changes in the change-log. --- CHANGELOG.md | 2 + CMakeLists.txt | 6 ++- CONTRIBUTING.md | 1 + examples/example.c | 11 ++++++ src/backends/sentry_backend_breakpad.cpp | 4 ++ src/backends/sentry_backend_crashpad.cpp | 4 ++ src/backends/sentry_backend_inproc.c | 50 +++++++++++++----------- src/sentry_os.c | 35 ++++++++++++++++- src/sentry_os.h | 3 ++ tests/assertions.py | 26 +++++++----- tests/test_integration_crashpad.py | 43 ++++++++++++++++++-- tests/test_integration_stdout.py | 36 ++++++++++++++++- 12 files changed, 182 insertions(+), 39 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index fefb2521b..9181bd562 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,8 @@ **Fixes**: - Allow `crashpad` to run under [Epic's Anti-Cheat Client](https://dev.epicgames.com/docs/game-services/anti-cheat/using-anti-cheat#external-crash-dumpers) by deferring the full `crashpad_handler` access rights to the client application until a crash occurred. ([#980](https://github.com/getsentry/sentry-native/pull/980), [crashpad#99](https://github.com/getsentry/crashpad/pull/99)) +- Reserve enough stack space on Windows for our handler to run when the stack is exhausted from stack-overflow. ([#982](https://github.com/getsentry/sentry-native/pull/982)) +- Only configure a `sigaltstack` in `inproc` if no previous configuration exists on Linux and Android. ([#982](https://github.com/getsentry/sentry-native/pull/982)) - Store transaction `data` in the event property `extra` since the `data` property is discarded by `relay`. ([#986](https://github.com/getsentry/sentry-native/issues/986)) **Docs**: diff --git a/CMakeLists.txt b/CMakeLists.txt index 3996e83db..00ac137ab 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -566,10 +566,14 @@ if(SENTRY_BUILD_EXAMPLES) target_link_libraries(sentry_example PRIVATE sentry) if(MSVC) - target_compile_options(sentry_example PRIVATE $) + target_compile_options(sentry_example PRIVATE $) # to test handling SEH by-passing exceptions we need to enable the control flow guard target_compile_options(sentry_example PRIVATE $) + else() + # Disable all optimizations for the `sentry_example` in gcc/clang. This allows us to keep crash triggers simple. + # The effects besides reproducible code-gen across compiler versions, will be negligible for build- and runtime. + target_compile_options(sentry_example PRIVATE $) endif() # set static runtime if enabled diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 2af86e966..4678c4d74 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -145,6 +145,7 @@ The example currently supports the following commands: - `on-crash`: Installs an `on_crash()` callback that retains the crash event. - `discarding-on-crash`: Installs an `on_crash()` callback that discards the crash event. - `override-sdk-name`: Changes the SDK name via the options at runtime. +- `stack-overflow`: Provokes a stack-overflow. Only on Windows using crashpad with its WER handler module: diff --git a/examples/example.c b/examples/example.c index 5eac53ac8..42e76231e 100644 --- a/examples/example.c +++ b/examples/example.c @@ -17,6 +17,7 @@ #include #ifdef SENTRY_PLATFORM_WINDOWS +# include # include # define sleep_s(SECONDS) Sleep((SECONDS)*1000) #else @@ -161,6 +162,13 @@ trigger_crash() memset((char *)invalid_mem, 1, 100); } +static void +trigger_stack_overflow() +{ + alloca(1024); + trigger_stack_overflow(); +} + int main(int argc, char **argv) { @@ -322,6 +330,9 @@ main(int argc, char **argv) if (has_arg(argc, argv, "crash")) { trigger_crash(); } + if (has_arg(argc, argv, "stack-overflow")) { + trigger_stack_overflow(); + } #if defined(SENTRY_PLATFORM_WINDOWS) && !defined(__MINGW32__) \ && !defined(__MINGW64__) if (has_arg(argc, argv, "fastfail")) { diff --git a/src/backends/sentry_backend_breakpad.cpp b/src/backends/sentry_backend_breakpad.cpp index 40dc6a189..68718eaaf 100644 --- a/src/backends/sentry_backend_breakpad.cpp +++ b/src/backends/sentry_backend_breakpad.cpp @@ -7,6 +7,9 @@ extern "C" { #include "sentry_database.h" #include "sentry_envelope.h" #include "sentry_options.h" +#ifdef SENTRY_PLATFORM_WINDOWS +# include "sentry_os.h" +#endif #include "sentry_path.h" #include "sentry_string.h" #include "sentry_sync.h" @@ -209,6 +212,7 @@ sentry__breakpad_backend_startup( sentry_path_t *current_run_folder = options->run->run_path; #ifdef SENTRY_PLATFORM_WINDOWS + sentry__reserve_thread_stack(); backend->data = new google_breakpad::ExceptionHandler( current_run_folder->path, NULL, sentry__breakpad_backend_callback, NULL, google_breakpad::ExceptionHandler::HANDLER_EXCEPTION); diff --git a/src/backends/sentry_backend_crashpad.cpp b/src/backends/sentry_backend_crashpad.cpp index 711dc5700..ad7d5846a 100644 --- a/src/backends/sentry_backend_crashpad.cpp +++ b/src/backends/sentry_backend_crashpad.cpp @@ -340,6 +340,10 @@ crashpad_backend_startup( } } +#ifdef SENTRY_PLATFORM_WINDOWS + sentry__reserve_thread_stack(); +#endif + // The crashpad client uses shell lookup rules (absolute path, relative // path, or bare executable name that is looked up in $PATH). // However, it crashes hard when it cant resolve the handler, so we make diff --git a/src/backends/sentry_backend_inproc.c b/src/backends/sentry_backend_inproc.c index a18b2944b..18869ef6b 100644 --- a/src/backends/sentry_backend_inproc.c +++ b/src/backends/sentry_backend_inproc.c @@ -6,6 +6,9 @@ #include "sentry_database.h" #include "sentry_envelope.h" #include "sentry_options.h" +#if defined(SENTRY_PLATFORM_WINDOWS) +# include "sentry_os.h" +#endif #include "sentry_scope.h" #include "sentry_sync.h" #include "sentry_transport.h" @@ -34,10 +37,6 @@ * Both breakpad and crashpad are way more defensive in the setup of their * signal stacks and take existing stacks into account (or reuse them). */ -#ifndef SENTRY_PLATFORM_ANDROID -# define SETUP_SIGALTSTACK -#endif - #define SIGNAL_DEF(Sig, Desc) \ { \ Sig, #Sig, Desc \ @@ -57,9 +56,7 @@ struct signal_slot { # define SIGNAL_STACK_SIZE 65536 static struct sigaction g_sigaction; static struct sigaction g_previous_handlers[SIGNAL_COUNT]; -# ifdef SETUP_SIGALTSTACK -static stack_t g_signal_stack; -# endif +static stack_t g_signal_stack = { 0 }; static const struct signal_slot SIGNAL_DEFINITIONS[SIGNAL_COUNT] = { SIGNAL_DEF(SIGILL, "IllegalInstruction"), SIGNAL_DEF(SIGTRAP, "Trap"), @@ -112,16 +109,24 @@ startup_inproc_backend( } } - // install our own signal handler -# ifdef SETUP_SIGALTSTACK - g_signal_stack.ss_sp = sentry_malloc(SIGNAL_STACK_SIZE); - if (!g_signal_stack.ss_sp) { - return 1; + // set up an alternate signal stack if noone defined one before + stack_t old_sig_stack; + if (sigaltstack(NULL, &old_sig_stack) == -1 || old_sig_stack.ss_sp == NULL + || old_sig_stack.ss_size == 0) { + SENTRY_TRACEF("installing signal stack (size: %d)", SIGNAL_STACK_SIZE); + g_signal_stack.ss_sp = sentry_malloc(SIGNAL_STACK_SIZE); + if (!g_signal_stack.ss_sp) { + return 1; + } + g_signal_stack.ss_size = SIGNAL_STACK_SIZE; + g_signal_stack.ss_flags = 0; + sigaltstack(&g_signal_stack, 0); + } else { + SENTRY_TRACEF( + "using existing signal stack (size: %d)", old_sig_stack.ss_size); } - g_signal_stack.ss_size = SIGNAL_STACK_SIZE; - g_signal_stack.ss_flags = 0; - sigaltstack(&g_signal_stack, 0); -# endif + + // install our own signal handler sigemptyset(&g_sigaction.sa_mask); g_sigaction.sa_sigaction = handle_signal; g_sigaction.sa_flags = SA_SIGINFO | SA_ONSTACK; @@ -134,12 +139,12 @@ startup_inproc_backend( static void shutdown_inproc_backend(sentry_backend_t *UNUSED(backend)) { -# ifdef SETUP_SIGALTSTACK - g_signal_stack.ss_flags = SS_DISABLE; - sigaltstack(&g_signal_stack, 0); - sentry_free(g_signal_stack.ss_sp); - g_signal_stack.ss_sp = NULL; -# endif + if (g_signal_stack.ss_sp) { + g_signal_stack.ss_flags = SS_DISABLE; + sigaltstack(&g_signal_stack, 0); + sentry_free(g_signal_stack.ss_sp); + g_signal_stack.ss_sp = NULL; + } reset_signal_handlers(); } @@ -184,6 +189,7 @@ static int startup_inproc_backend( sentry_backend_t *UNUSED(backend), const sentry_options_t *UNUSED(options)) { + sentry__reserve_thread_stack(); g_previous_handler = SetUnhandledExceptionFilter(&handle_exception); SetErrorMode(SEM_FAILCRITICALERRORS); return 0; diff --git a/src/sentry_os.c b/src/sentry_os.c index 4675575c2..66434863b 100644 --- a/src/sentry_os.c +++ b/src/sentry_os.c @@ -4,7 +4,7 @@ #ifdef SENTRY_PLATFORM_WINDOWS -# include +# include # define CURRENT_VERSION "SOFTWARE\\Microsoft\\Windows NT\\CurrentVersion" void * @@ -137,6 +137,39 @@ sentry__get_os_context(void) return sentry_value_new_null(); } +void +sentry__reserve_thread_stack(void) +{ + const unsigned long expected_stack_size = 64 * 1024; + unsigned long stack_size = 0; + SetThreadStackGuarantee(&stack_size); + if (stack_size < expected_stack_size) { + stack_size = expected_stack_size; + SetThreadStackGuarantee(&stack_size); + } +} + +# if defined(SENTRY_BUILD_SHARED) + +BOOL APIENTRY +DllMain(HMODULE hModule, DWORD ul_reason_for_call, LPVOID lpReserved) +{ + (void)hModule; + (void)lpReserved; + + switch (ul_reason_for_call) { + case DLL_PROCESS_ATTACH: + case DLL_THREAD_ATTACH: + sentry__reserve_thread_stack(); + break; + default: + return TRUE; + } + return TRUE; +} + +# endif // defined(SENTRY_BUILD_SHARED) + #elif defined(SENTRY_PLATFORM_MACOS) # include diff --git a/src/sentry_os.h b/src/sentry_os.h index cefff7978..fc422d52e 100644 --- a/src/sentry_os.h +++ b/src/sentry_os.h @@ -4,6 +4,7 @@ #include "sentry_boot.h" #ifdef SENTRY_PLATFORM_WINDOWS + typedef struct { uint32_t major; uint32_t minor; @@ -13,6 +14,8 @@ typedef struct { int sentry__get_kernel_version(windows_version_t *win_ver); int sentry__get_windows_version(windows_version_t *win_ver); +void sentry__reserve_thread_stack(void); + #endif sentry_value_t sentry__get_os_context(void); diff --git a/tests/assertions.py b/tests/assertions.py index e9d86878c..12765a561 100644 --- a/tests/assertions.py +++ b/tests/assertions.py @@ -154,9 +154,7 @@ def assert_stacktrace(envelope, inside_exception=False, check_size=True): ) -def assert_breadcrumb(envelope): - event = envelope.get_event() - +def assert_breadcrumb_inner(breadcrumbs): expected = { "type": "http", "message": "debug crumb", @@ -169,7 +167,12 @@ def assert_breadcrumb(envelope): "reason": "OK", }, } - assert any(matches(b, expected) for b in event["breadcrumbs"]) + assert any(matches(b, expected) for b in breadcrumbs) + + +def assert_breadcrumb(envelope): + event = envelope.get_event() + assert_breadcrumb_inner(event["breadcrumbs"]) def assert_attachment(envelope): @@ -297,17 +300,22 @@ def _validate_breadcrumb_seq(seq, breadcrumb_func): assert is_valid_timestamp(breadcrumb["timestamp"]) -def assert_crashpad_upload(req): - multipart = gzip.decompress(req.get_data()) - msg = email.message_from_bytes(bytes(str(req.headers), encoding="utf8") + multipart) - attachments = _load_crashpad_attachments(msg) - +def assert_overflowing_breadcrumb(attachments): if len(attachments.breadcrumb1) > 3: _validate_breadcrumb_seq(range(97), lambda i: attachments.breadcrumb1[3 + i]) _validate_breadcrumb_seq( range(97, 101), lambda i: attachments.breadcrumb2[i - 97] ) + else: + assert_breadcrumb_inner(attachments.breadcrumb1) + + +def assert_crashpad_upload(req): + multipart = gzip.decompress(req.get_data()) + msg = email.message_from_bytes(bytes(str(req.headers), encoding="utf8") + multipart) + attachments = _load_crashpad_attachments(msg) + assert_overflowing_breadcrumb(attachments) assert attachments.event["level"] == "fatal" assert any( diff --git a/tests/test_integration_crashpad.py b/tests/test_integration_crashpad.py index 28647bcc4..ccbbfdea5 100644 --- a/tests/test_integration_crashpad.py +++ b/tests/test_integration_crashpad.py @@ -81,10 +81,6 @@ def test_crashpad_reinstall(cmake, httpserver): def test_crashpad_wer_crash(cmake, httpserver, run_args): tmp_path = cmake(["sentry_example"], {"SENTRY_BACKEND": "crashpad"}) - # If we are building on a Windows without WER enabled this test doesn't make sense - if not os.path.exists(tmp_path / "crashpad_wer.dll"): - return - # make sure we are isolated from previous runs shutil.rmtree(tmp_path / ".sentry-native", ignore_errors=True) @@ -193,6 +189,45 @@ def test_crashpad_dumping_crash(cmake, httpserver, run_args, build_args): assert_crashpad_upload(multipart) +def test_crashpad_dumping_stack_overflow(cmake, httpserver): + tmp_path = cmake(["sentry_example"], {"SENTRY_BACKEND": "crashpad"}) + + # make sure we are isolated from previous runs + shutil.rmtree(tmp_path / ".sentry-native", ignore_errors=True) + + env = dict(os.environ, SENTRY_DSN=make_dsn(httpserver)) + httpserver.expect_oneshot_request("/api/123456/minidump/").respond_with_data("OK") + httpserver.expect_request("/api/123456/envelope/").respond_with_data("OK") + + with httpserver.wait(timeout=10) as waiting: + child = run( + tmp_path, + "sentry_example", + ["log", "start-session", "attachment", "stack-overflow"], + env=env, + ) + assert child.returncode # well, it's a crash after all + + assert waiting.result + + # the session crash heuristic on Mac uses timestamps, so make sure we have + # a small delay here + time.sleep(1) + + run(tmp_path, "sentry_example", ["log", "no-setup"], check=True, env=env) + + assert len(httpserver.log) == 2 + session, multipart = ( + (httpserver.log[0][0], httpserver.log[1][0]) + if is_session_envelope(httpserver.log[0][0].get_data()) + else (httpserver.log[1][0], httpserver.log[0][0]) + ) + + envelope = Envelope.deserialize(session.get_data()) + assert_session(envelope, {"status": "crashed", "errors": 1}) + assert_crashpad_upload(multipart) + + def is_session_envelope(data): return b'"type":"session"' in data diff --git a/tests/test_integration_stdout.py b/tests/test_integration_stdout.py index e44a90ead..4e6b8dd07 100644 --- a/tests/test_integration_stdout.py +++ b/tests/test_integration_stdout.py @@ -135,18 +135,22 @@ def test_multi_process(cmake): assert len(runs) == 0 -def run_crash_stdout_for(backend, cmake, example_args): +def run_stdout_for(backend, cmake, example_args): tmp_path = cmake( ["sentry_example"], {"SENTRY_BACKEND": backend, "SENTRY_TRANSPORT": "none"}, ) - child = run(tmp_path, "sentry_example", ["attachment", "crash"] + example_args) + child = run(tmp_path, "sentry_example", example_args) assert child.returncode # well, it's a crash after all return tmp_path, check_output(tmp_path, "sentry_example", ["stdout", "no-setup"]) +def run_crash_stdout_for(backend, cmake, example_args): + return run_stdout_for(backend, cmake, ["attachment", "crash"] + example_args) + + def test_inproc_crash_stdout(cmake): tmp_path, output = run_crash_stdout_for("inproc", cmake, []) @@ -198,6 +202,18 @@ def test_inproc_crash_stdout_before_send_and_on_crash(cmake): assert_inproc_crash(envelope) +def test_inproc_stack_overflow_stdout(cmake): + tmp_path, output = run_stdout_for("inproc", cmake, ["attachment", "stack-overflow"]) + + envelope = Envelope.deserialize(output) + + assert_crash_timestamp(has_files, tmp_path) + assert_meta(envelope, integration="inproc") + assert_breadcrumb(envelope) + assert_attachment(envelope) + assert_inproc_crash(envelope) + + @pytest.mark.skipif(not has_breakpad, reason="test needs breakpad backend") def test_breakpad_crash_stdout(cmake): tmp_path, output = run_crash_stdout_for("breakpad", cmake, []) @@ -253,3 +269,19 @@ def test_breakpad_crash_stdout_before_send_and_on_crash(cmake): assert_breadcrumb(envelope) assert_attachment(envelope) assert_breakpad_crash(envelope) + + +@pytest.mark.skipif(not has_breakpad, reason="test needs breakpad backend") +def test_breakpad_stack_overflow_stdout(cmake): + tmp_path, output = run_stdout_for( + "breakpad", cmake, ["attachment", "stack-overflow"] + ) + + envelope = Envelope.deserialize(output) + + assert_crash_timestamp(has_files, tmp_path) + assert_meta(envelope, integration="breakpad") + assert_breadcrumb(envelope) + assert_attachment(envelope) + assert_minidump(envelope) + assert_breakpad_crash(envelope)