From e575c37ff88bc3e7f8e8a37ddd6e58b3bfa4cbde Mon Sep 17 00:00:00 2001 From: Diogo Netto <61364108+d-netto@users.noreply.github.com> Date: Fri, 22 Mar 2024 14:06:26 -0300 Subject: [PATCH] --safe-crash-log-file flag (#140) * --safe-crash-log-file flag * Update init.c * json escape jl_safe_printf when safe crash log file * add timestamp to json logs * port it to aarch64 darwin * fix minor warning * missing double quote * Suggestion from code review: make sig_stack_size a const in signals-unix.c Co-authored-by: Kiran Pamnany * Suggestion from code review: make sig_stack size a const in signals-win.c Co-authored-by: Kiran Pamnany * more suggestions from Kiran's review * more suggestions from review --------- Co-authored-by: Malte Sandstede Co-authored-by: Adnan Alhomssi Co-authored-by: Kiran Pamnany --- base/options.jl | 1 + src/init.c | 9 ++++++++ src/jl_uv.c | 54 +++++++++++++++++++++++++++++++++++++++++++ src/jloptions.c | 10 +++++++- src/jloptions.h | 1 + src/julia_internal.h | 26 +++++++++++++++++++++ src/signal-handling.c | 2 ++ src/signals-unix.c | 9 +------- src/signals-win.c | 2 +- 9 files changed, 104 insertions(+), 10 deletions(-) diff --git a/base/options.jl b/base/options.jl index a94936391fa8da..cb35b2b3806f45 100644 --- a/base/options.jl +++ b/base/options.jl @@ -57,6 +57,7 @@ struct JLOptions strip_ir::Int8 permalloc_pkgimg::Int8 heap_size_hint::UInt64 + safe_crash_log_file::Ptr{UInt8} end # This runs early in the sysimage != is not defined yet diff --git a/src/init.c b/src/init.c index f7a35e95daa1b5..83f528b35ac11b 100644 --- a/src/init.c +++ b/src/init.c @@ -823,6 +823,15 @@ JL_DLLEXPORT void julia_init(JL_IMAGE_SEARCH rel) if (jl_options.handle_signals == JL_OPTIONS_HANDLE_SIGNALS_ON) jl_install_default_signal_handlers(); +#if (defined(_OS_LINUX_) && defined(_CPU_X86_64_)) || (defined(_OS_DARWIN_) && defined(_CPU_AARCH64_)) + if (jl_options.safe_crash_log_file != NULL) { + jl_sig_fd = open(jl_options.safe_crash_log_file, O_WRONLY | O_CREAT | O_APPEND, 0600); + if (jl_sig_fd == -1) { + jl_error("fatal error: could not open safe crash log file for writing"); + } + } +#endif + jl_gc_init(); arraylist_new(&jl_linkage_blobs, 0); diff --git a/src/jl_uv.c b/src/jl_uv.c index 62dc3a628d0857..a5441e90c96f87 100644 --- a/src/jl_uv.c +++ b/src/jl_uv.c @@ -15,6 +15,7 @@ #include "errno.h" #include #include +#include #endif #include "julia.h" @@ -677,6 +678,56 @@ JL_DLLEXPORT int jl_printf(uv_stream_t *s, const char *format, ...) return c; } +STATIC_INLINE void print_error_msg_as_json(char *buf) JL_NOTSAFEPOINT +{ + // Our telemetry on SPCS expects a JSON object per line + // The following lines prepare the timestamp string and the JSON object + struct timeval tv; + struct tm* tm_info; + char timestamp_buffer[50]; + // Get current time + gettimeofday(&tv, NULL); + tm_info = gmtime(&tv.tv_sec); + // Format time + int offset = strftime(timestamp_buffer, 25, "%Y-%m-%dT%H:%M:%S", tm_info); + // Append milliseconds + snprintf(timestamp_buffer + offset, 25, ".%03d", tv.tv_usec / 1000); + const char *json_preamble_p1 = "\n{\"level\":\"Error\", \"timestamp\":\""; + const char *json_preamble_p2 = "\", \"message\": \""; + const char *json_postamble = "\"}\n"; + // Ignore write failures because there is nothing we can do + write(jl_sig_fd, json_preamble_p1, strlen(json_preamble_p1)); + write(jl_sig_fd, timestamp_buffer, strlen(timestamp_buffer)); + write(jl_sig_fd, json_preamble_p2, strlen(json_preamble_p2)); + // JSON escape the input string + for(size_t i = 0; i < strlen(buf); i += 1) { + switch (buf[i]) { + case '"': + write(jl_sig_fd, "\\\"", 2); + break; + case '\b': + write(jl_sig_fd, "\\b", 2); + break; + case '\n': + write(jl_sig_fd, "\\n", 2); + break; + case '\r': + write(jl_sig_fd, "\\r", 2); + break; + case '\t': + write(jl_sig_fd, "\\t", 2); + break; + case '\\': + write(jl_sig_fd, "\\\\", 2); + break; + default: + write(jl_sig_fd, buf + i, 1); + } + } + write(jl_sig_fd, json_postamble, strlen(json_postamble)); + fdatasync(jl_sig_fd); +} + JL_DLLEXPORT void jl_safe_printf(const char *fmt, ...) { static char buf[1000]; @@ -693,6 +744,9 @@ JL_DLLEXPORT void jl_safe_printf(const char *fmt, ...) va_end(args); buf[999] = '\0'; + if (jl_inside_signal_handler() && jl_sig_fd != 0) { + print_error_msg_as_json(buf); + } if (write(STDERR_FILENO, buf, strlen(buf)) < 0) { // nothing we can do; ignore the failure } diff --git a/src/jloptions.c b/src/jloptions.c index e81c34d37ef472..47f78f2de2a17f 100644 --- a/src/jloptions.c +++ b/src/jloptions.c @@ -90,6 +90,7 @@ JL_DLLEXPORT void jl_init_options(void) 0, // strip-ir 0, // permalloc_pkgimg 0, // heap-size-hint + NULL, // safe_crash_log_file }; jl_options_initialized = 1; } @@ -258,7 +259,8 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp) opt_strip_ir, opt_heap_size_hint, opt_gc_threads, - opt_permalloc_pkgimg + opt_permalloc_pkgimg, + opt_safe_crash_log_file, }; static const char* const shortopts = "+vhqH:e:E:L:J:C:it:p:O:g:"; static const struct option longopts[] = { @@ -320,6 +322,7 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp) { "strip-ir", no_argument, 0, opt_strip_ir }, { "permalloc-pkgimg",required_argument, 0, opt_permalloc_pkgimg }, { "heap-size-hint", required_argument, 0, opt_heap_size_hint }, + { "safe-crash-log-file", required_argument, 0, opt_safe_crash_log_file }, { 0, 0, 0, 0 } }; @@ -850,6 +853,11 @@ JL_DLLEXPORT void jl_parse_opts(int *argcp, char ***argvp) else jl_errorf("julia: invalid argument to --permalloc-pkgimg={yes|no} (%s)", optarg); break; + case opt_safe_crash_log_file: + jl_options.safe_crash_log_file = strdup(optarg); + if (jl_options.safe_crash_log_file == NULL) + jl_error("julia: failed to allocate memory for --safe-crash-log-file"); + break; default: jl_errorf("julia: unhandled option -- %c\n" "This is a bug, please report it.", c); diff --git a/src/jloptions.h b/src/jloptions.h index 8649c405112d71..b633291c6ed41e 100644 --- a/src/jloptions.h +++ b/src/jloptions.h @@ -61,6 +61,7 @@ typedef struct { int8_t strip_ir; int8_t permalloc_pkgimg; uint64_t heap_size_hint; + const char *safe_crash_log_file; } jl_options_t; #endif diff --git a/src/julia_internal.h b/src/julia_internal.h index 3211d0fe7b4d99..9940dbf68ba493 100644 --- a/src/julia_internal.h +++ b/src/julia_internal.h @@ -702,6 +702,32 @@ JL_CALLABLE(jl_f_opaque_closure_call); void jl_install_default_signal_handlers(void); void restore_signals(void); void jl_install_thread_signal_handler(jl_ptls_t ptls); +extern const size_t sig_stack_size; +STATIC_INLINE int is_addr_on_sigstack(jl_ptls_t ptls, void *ptr) +{ + // One guard page for signal_stack. + return !((char*)ptr < (char*)ptls->signal_stack - jl_page_size || + (char*)ptr > (char*)ptls->signal_stack + sig_stack_size); +} +STATIC_INLINE int jl_inside_signal_handler(void) +{ +#if (defined(_OS_LINUX_) && defined(_CPU_X86_64_)) || (defined(_OS_DARWIN_) && defined(_CPU_AARCH64_)) + // Read the stack pointer + size_t sp; +#if defined(_OS_LINUX_) && defined(_CPU_X86_64_) + __asm__ __volatile__("movq %%rsp, %0" : "=r"(sp)); +#elif defined(_OS_DARWIN_) && defined(_CPU_AARCH64_) + __asm__ __volatile__("mov %0, sp" : "=r"(sp)); +#endif + // Check if the stack pointer is within the signal stack + jl_ptls_t ptls = jl_current_task->ptls; + return is_addr_on_sigstack(ptls, (void*)sp); +#else + return 0; +#endif +} +// File-descriptor for safe logging on signal handling +extern int jl_sig_fd; JL_DLLEXPORT jl_fptr_args_t jl_get_builtin_fptr(jl_value_t *b); diff --git a/src/signal-handling.c b/src/signal-handling.c index b280fc1e0aeef6..c2a344a7525472 100644 --- a/src/signal-handling.c +++ b/src/signal-handling.c @@ -28,6 +28,8 @@ static const uint64_t GIGA = 1000000000ULL; // Timers to take samples at intervals JL_DLLEXPORT void jl_profile_stop_timer(void); JL_DLLEXPORT int jl_profile_start_timer(void); +// File-descriptor for safe logging on signal handling +int jl_sig_fd; /////////////////////// // Utility functions // diff --git a/src/signals-unix.c b/src/signals-unix.c index a79043d78e682a..7852b7ebb26181 100644 --- a/src/signals-unix.c +++ b/src/signals-unix.c @@ -38,7 +38,7 @@ // 8M signal stack, same as default stack size and enough // for reasonable finalizers. // Should also be enough for parallel GC when we have it =) -#define sig_stack_size (8 * 1024 * 1024) +const size_t sig_stack_size = (8 * 1024 * 1024); #include "julia_assert.h" @@ -91,13 +91,6 @@ static inline __attribute__((unused)) uintptr_t jl_get_rsp_from_ctx(const void * #endif } -static int is_addr_on_sigstack(jl_ptls_t ptls, void *ptr) -{ - // One guard page for signal_stack. - return !((char*)ptr < (char*)ptls->signal_stack - jl_page_size || - (char*)ptr > (char*)ptls->signal_stack + sig_stack_size); -} - // Modify signal context `_ctx` so that `fptr` will execute when the signal // returns. `fptr` will execute on the signal stack, and must not return. // jl_call_in_ctx is also currently executing on that signal stack, diff --git a/src/signals-win.c b/src/signals-win.c index cbf3c7436c8ff1..bcb3a1fd246f0e 100644 --- a/src/signals-win.c +++ b/src/signals-win.c @@ -4,7 +4,7 @@ // Note that this file is `#include`d by "signal-handling.c" #include // hidden by LEAN_AND_MEAN -#define sig_stack_size 131072 // 128k reserved for SEGV handling +const size_t sig_stack_size = 131072; // 128k reserved for SEGV handling // Copied from MINGW_FLOAT_H which may not be found due to a collision with the builtin gcc float.h // eventually we can probably integrate this into OpenLibm.