From 2a11d34be21f1d298d068995eabdc2553b782d37 Mon Sep 17 00:00:00 2001 From: Alexandre Fonseca Date: Tue, 21 Nov 2023 15:09:25 +0000 Subject: [PATCH] [PROF-8543] Address comments --- benchmarks/profiler_sample_loop_v2.rb | 4 +- .../collectors_cpu_and_wall_time_worker.c | 76 +++++----- .../collectors_thread_context.c | 5 +- .../extconf.rb | 2 +- .../heap_recorder.c | 134 +++++++++++------- .../heap_recorder.h | 6 +- .../ruby_helpers.c | 10 ++ .../ruby_helpers.h | 8 ++ .../stack_recorder.c | 44 +++--- .../stack_recorder.h | 2 +- lib/datadog/core/configuration/settings.rb | 56 ++++---- .../collectors/cpu_and_wall_time_worker.rb | 22 +-- lib/datadog/profiling/component.rb | 58 +++++++- lib/datadog/profiling/ext.rb | 24 ++++ 14 files changed, 285 insertions(+), 166 deletions(-) diff --git a/benchmarks/profiler_sample_loop_v2.rb b/benchmarks/profiler_sample_loop_v2.rb index 953da0a4990..4722a6c050f 100644 --- a/benchmarks/profiler_sample_loop_v2.rb +++ b/benchmarks/profiler_sample_loop_v2.rb @@ -18,8 +18,8 @@ class ProfilerSampleLoopBenchmark def create_profiler @recorder = Datadog::Profiling::StackRecorder.new( cpu_time_enabled: true, - alloc_samples_enabled: true, - heap_samples_enabled: true + alloc_samples_enabled: false, + heap_samples_enabled: false ) @collector = Datadog::Profiling::Collectors::ThreadContext.new( recorder: @recorder, max_frames: 400, tracer: nil, endpoint_collection_enabled: false, timeline_enabled: false diff --git a/ext/ddtrace_profiling_native_extension/collectors_cpu_and_wall_time_worker.c b/ext/ddtrace_profiling_native_extension/collectors_cpu_and_wall_time_worker.c index 1c13a452d35..aa32adf8764 100644 --- a/ext/ddtrace_profiling_native_extension/collectors_cpu_and_wall_time_worker.c +++ b/ext/ddtrace_profiling_native_extension/collectors_cpu_and_wall_time_worker.c @@ -81,10 +81,11 @@ struct cpu_and_wall_time_worker_state { bool gc_profiling_enabled; bool allocation_counting_enabled; - bool heap_counting_enabled; bool no_signals_workaround_enabled; bool dynamic_sampling_rate_enabled; - int allocation_sample_every; // Temporarily used for development/testing of allocation profiling + int allocation_sample_every; + bool allocation_profiling_enabled; + bool heap_profiling_enabled; VALUE self_instance; VALUE thread_context_collector_instance; VALUE idle_sampling_helper_instance; @@ -152,10 +153,11 @@ static VALUE _native_initialize( VALUE gc_profiling_enabled, VALUE idle_sampling_helper_instance, VALUE allocation_counting_enabled, - VALUE heap_counting_enabled, VALUE no_signals_workaround_enabled, VALUE dynamic_sampling_rate_enabled, - VALUE allocation_sample_every + VALUE allocation_sample_every, + VALUE allocation_profiling_enabled, + VALUE heap_profiling_enabled ); static void cpu_and_wall_time_worker_typed_data_mark(void *state_ptr); static VALUE _native_sampling_loop(VALUE self, VALUE instance); @@ -193,7 +195,6 @@ static void on_freeobj_event(VALUE tracepoint_data, DDTRACE_UNUSED void *unused) static void disable_tracepoints(struct cpu_and_wall_time_worker_state *state); static VALUE _native_with_blocked_sigprof(DDTRACE_UNUSED VALUE self); static VALUE rescued_sample_allocation(VALUE tracepoint_data); -static VALUE rescued_sample_free(VALUE tracepoint_data); // Note on sampler global state safety: // @@ -231,7 +232,7 @@ void collectors_cpu_and_wall_time_worker_init(VALUE profiling_module) { // https://bugs.ruby-lang.org/issues/18007 for a discussion around this. rb_define_alloc_func(collectors_cpu_and_wall_time_worker_class, _native_new); - rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_initialize", _native_initialize, 9); + rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_initialize", _native_initialize, 10); rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_sampling_loop", _native_sampling_loop, 1); rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_stop", _native_stop, 2); rb_define_singleton_method(collectors_cpu_and_wall_time_worker_class, "_native_reset_after_fork", _native_reset_after_fork, 1); @@ -270,10 +271,11 @@ static VALUE _native_new(VALUE klass) { state->gc_profiling_enabled = false; state->allocation_counting_enabled = false; - state->heap_counting_enabled = false; state->no_signals_workaround_enabled = false; state->dynamic_sampling_rate_enabled = true; state->allocation_sample_every = 0; + state->allocation_profiling_enabled = false; + state->heap_profiling_enabled = false; state->thread_context_collector_instance = Qnil; state->idle_sampling_helper_instance = Qnil; state->owner_thread = Qnil; @@ -300,30 +302,37 @@ static VALUE _native_initialize( VALUE gc_profiling_enabled, VALUE idle_sampling_helper_instance, VALUE allocation_counting_enabled, - VALUE heap_counting_enabled, VALUE no_signals_workaround_enabled, VALUE dynamic_sampling_rate_enabled, - VALUE allocation_sample_every + VALUE allocation_sample_every, + VALUE allocation_profiling_enabled, + VALUE heap_profiling_enabled ) { ENFORCE_BOOLEAN(gc_profiling_enabled); ENFORCE_BOOLEAN(allocation_counting_enabled); - ENFORCE_BOOLEAN(heap_counting_enabled); ENFORCE_BOOLEAN(no_signals_workaround_enabled); ENFORCE_BOOLEAN(dynamic_sampling_rate_enabled); ENFORCE_TYPE(allocation_sample_every, T_FIXNUM); + ENFORCE_BOOLEAN(allocation_profiling_enabled); + ENFORCE_BOOLEAN(heap_profiling_enabled); struct cpu_and_wall_time_worker_state *state; TypedData_Get_Struct(self_instance, struct cpu_and_wall_time_worker_state, &cpu_and_wall_time_worker_typed_data, state); state->gc_profiling_enabled = (gc_profiling_enabled == Qtrue); state->allocation_counting_enabled = (allocation_counting_enabled == Qtrue); - state->heap_counting_enabled = state->allocation_counting_enabled && (heap_counting_enabled == Qtrue); state->no_signals_workaround_enabled = (no_signals_workaround_enabled == Qtrue); state->dynamic_sampling_rate_enabled = (dynamic_sampling_rate_enabled == Qtrue); state->allocation_sample_every = NUM2INT(allocation_sample_every); + state->allocation_profiling_enabled = (allocation_profiling_enabled == Qtrue); + state->heap_profiling_enabled = (heap_profiling_enabled == Qtrue); - if (state->allocation_sample_every < 0) { - rb_raise(rb_eArgError, "Unexpected value for allocation_sample_every: %d. This value must be >= 0.", state->allocation_sample_every); + if (state->allocation_sample_every <= 0) { + rb_raise(rb_eArgError, "Unexpected value for allocation_sample_every: %d. This value must be > 0.", state->allocation_sample_every); + } + + if (state->heap_profiling_enabled && !state->allocation_profiling_enabled) { + rb_raise(rb_eArgError, "Heap profiling requires allocation profiling to be enabled but it isn't."); } state->thread_context_collector_instance = enforce_thread_context_collector_instance(thread_context_collector_instance); @@ -644,8 +653,8 @@ static VALUE release_gvl_and_run_sampling_trigger_loop(VALUE instance) { // because they may raise exceptions. install_sigprof_signal_handler(handle_sampling_signal, "handle_sampling_signal"); if (state->gc_profiling_enabled) rb_tracepoint_enable(state->gc_tracepoint); - if (state->allocation_counting_enabled) rb_tracepoint_enable(state->object_allocation_tracepoint); - if (state->heap_counting_enabled) rb_tracepoint_enable(state->object_free_tracepoint); + if (state->allocation_counting_enabled || state->allocation_profiling_enabled) rb_tracepoint_enable(state->object_allocation_tracepoint); + if (state->heap_profiling_enabled) rb_tracepoint_enable(state->object_free_tracepoint); rb_thread_call_without_gvl(run_sampling_trigger_loop, state, interrupt_sampling_trigger_loop, state); @@ -929,15 +938,11 @@ static void on_newobj_event(VALUE tracepoint_data, DDTRACE_UNUSED void *unused) return; } - // @ivoanjo: Strictly speaking, this is not needed because Ruby should not call the same tracepoint while a previous - // invocation is still pending, (e.g. it wouldn't call `on_newobj_event` while it's already running), but I decided - // to keep this here for consistency -- every call to the thread context (other than the special gc calls which are - // defined as not being able to allocate) sets this. state->during_sample = true; // TODO: This is a placeholder sampling decision strategy. We plan to replace it with a better one soon (e.g. before // beta), and having something here allows us to test the rest of feature, sampling decision aside. - if (state->allocation_sample_every > 0 && ((allocation_count % state->allocation_sample_every) == 0)) { + if (state->allocation_profiling_enabled && state->allocation_sample_every > 0 && ((allocation_count % state->allocation_sample_every) == 0)) { // Rescue against any exceptions that happen during sampling safely_call(rescued_sample_allocation, tracepoint_data, state->self_instance); } @@ -945,6 +950,9 @@ static void on_newobj_event(VALUE tracepoint_data, DDTRACE_UNUSED void *unused) state->during_sample = false; } +// Safety: This function may get called while Ruby is doing garbage collection. While Ruby is doing garbage collection, +// *NO ALLOCATION* is allowed. This function, and any it calls must never trigger memory or object allocation. +// This includes exceptions and use of ruby_xcalloc (because xcalloc can trigger GC)! static void on_freeobj_event(VALUE tracepoint_data, DDTRACE_UNUSED void *unused) { struct cpu_and_wall_time_worker_state *state = active_sampler_instance_state; // Read from global variable, see "sampler global state safety" note above @@ -952,15 +960,14 @@ static void on_freeobj_event(VALUE tracepoint_data, DDTRACE_UNUSED void *unused) // and disabled before it is cleared, but just in case... if (state == NULL) return; - // @ivoanjo: Strictly speaking, this is not needed because Ruby should not call the same tracepoint while a previous - // invocation is still pending, (e.g. it wouldn't call `on_newobj_event` while it's already running), but I decided - // to keep this here for consistency -- every call to the thread context (other than the special gc calls which are - // defined as not being able to allocate) sets this. - state->during_sample = true; + // NOTE: Because this is likely to be happening during GC, handling of this tracepoint does not do any allocation. + // We also do not want to lose any frees as that would affect the accuracy of our live heap tracking so we skip + // the typical `state->during_sample` dropping that other sampling tracepoints have. - safely_call(rescued_sample_free, tracepoint_data, state->self_instance); + rb_trace_arg_t *data = rb_tracearg_from_tracepoint(tracepoint_data); + VALUE freed_object = rb_tracearg_object(data); - state->during_sample = false; + thread_context_collector_sample_free(state->thread_context_collector_instance, freed_object); } static void disable_tracepoints(struct cpu_and_wall_time_worker_state *state) { @@ -996,18 +1003,3 @@ static VALUE rescued_sample_allocation(VALUE tracepoint_data) { // Return a dummy VALUE because we're called from rb_rescue2 which requires it return Qnil; } - -static VALUE rescued_sample_free(VALUE tracepoint_data) { - struct cpu_and_wall_time_worker_state *state = active_sampler_instance_state; // Read from global variable, see "sampler global state safety" note above - - // This should not happen in a normal situation because on_newobj_event already checked for this, but just in case... - if (state == NULL) return Qnil; - - rb_trace_arg_t *data = rb_tracearg_from_tracepoint(tracepoint_data); - VALUE freed_object = rb_tracearg_object(data); - - thread_context_collector_sample_free(state->thread_context_collector_instance, freed_object); - - // Return a dummy VALUE because we're called from rb_rescue2 which requires it - return Qnil; -} diff --git a/ext/ddtrace_profiling_native_extension/collectors_thread_context.c b/ext/ddtrace_profiling_native_extension/collectors_thread_context.c index 84f485382e7..8183825ade7 100644 --- a/ext/ddtrace_profiling_native_extension/collectors_thread_context.c +++ b/ext/ddtrace_profiling_native_extension/collectors_thread_context.c @@ -1203,7 +1203,7 @@ void thread_context_collector_sample_allocation(VALUE self_instance, unsigned in } } - record_obj_allocation(state->recorder_instance, new_object, sample_weight, optional_class_name); + track_obj_allocation(state->recorder_instance, new_object, sample_weight); trigger_sample_for_thread( state, @@ -1218,6 +1218,9 @@ void thread_context_collector_sample_allocation(VALUE self_instance, unsigned in ); } +// Safety: This function may get called while Ruby is doing garbage collection. While Ruby is doing garbage collection, +// *NO ALLOCATION* is allowed. This function, and any it calls must never trigger memory or object allocation. +// This includes exceptions and use of ruby_xcalloc (because xcalloc can trigger GC)! void thread_context_collector_sample_free(VALUE self_instance, VALUE freed_object) { struct thread_context_collector_state *state; TypedData_Get_Struct(self_instance, struct thread_context_collector_state, &thread_context_collector_typed_data, state); diff --git a/ext/ddtrace_profiling_native_extension/extconf.rb b/ext/ddtrace_profiling_native_extension/extconf.rb index 44c224ee527..064b36e7445 100644 --- a/ext/ddtrace_profiling_native_extension/extconf.rb +++ b/ext/ddtrace_profiling_native_extension/extconf.rb @@ -120,7 +120,7 @@ def add_compiler_flag(flag) add_compiler_flag '-Wall' add_compiler_flag '-Wextra' -if ENV['DEBUG'] +if ENV['DDTRACE_DEBUG'] CONFIG['optflags'] = '-O0' CONFIG['debugflags'] = '-ggdb3' end diff --git a/ext/ddtrace_profiling_native_extension/heap_recorder.c b/ext/ddtrace_profiling_native_extension/heap_recorder.c index 37d3f4ff971..09a453676e1 100644 --- a/ext/ddtrace_profiling_native_extension/heap_recorder.c +++ b/ext/ddtrace_profiling_native_extension/heap_recorder.c @@ -22,6 +22,9 @@ static st_index_t heap_frame_hash(heap_frame*, st_index_t seed); typedef struct { heap_frame *frames; uint64_t frames_len; + st_index_t hash; + st_index_t hash_seed; + bool hash_calculated; } heap_stack; static heap_stack* heap_stack_init(ddog_prof_Slice_Location); static void heap_stack_free(heap_stack*); @@ -57,7 +60,6 @@ static void object_record_free(object_record*); typedef struct { VALUE obj; unsigned int weight; - ddog_CharSlice *class_name; } partial_heap_recording; typedef struct sample { @@ -166,14 +168,14 @@ static int st_heap_records_iterate(st_data_t key, st_data_t value, st_data_t ext return ST_CONTINUE; } -void heap_recorder_iterate_stacks(heap_recorder *heap_recorder, void (*for_each_callback)(stack_iteration_data stack_data, void *extra_arg), void *for_each_callback_extra_arg) { - pthread_mutex_lock(&heap_recorder->records_mutex); +void heap_recorder_iterate_stacks_without_gvl(heap_recorder *heap_recorder, void (*for_each_callback)(stack_iteration_data stack_data, void *extra_arg), void *for_each_callback_extra_arg) { + ENFORCE_SUCCESS_NO_GVL(pthread_mutex_lock(&heap_recorder->records_mutex)); internal_iteration_data internal_iteration_data; internal_iteration_data.for_each_callback = for_each_callback; internal_iteration_data.for_each_callback_extra_arg = for_each_callback_extra_arg; internal_iteration_data.heap_recorder = heap_recorder; st_foreach(heap_recorder->heap_records, st_heap_records_iterate, (st_data_t) &internal_iteration_data); - pthread_mutex_unlock(&heap_recorder->records_mutex); + ENFORCE_SUCCESS_NO_GVL(pthread_mutex_unlock(&heap_recorder->records_mutex)); } void commit_allocation(heap_recorder *heap_recorder, heap_stack *heap_stack, VALUE obj, unsigned int weight) { @@ -181,8 +183,8 @@ void commit_allocation(heap_recorder *heap_recorder, heap_stack *heap_stack, VAL if (!st_lookup(heap_recorder->heap_records, (st_data_t) heap_stack, (st_data_t*) &heap_record)) { heap_record = heap_record_init(heap_stack); if (st_insert(heap_recorder->heap_records, (st_data_t) heap_stack, (st_data_t) heap_record)) { + // This should not be possible but just in case something bugs out, lets error out rb_raise(rb_eRuntimeError, "Duplicate heap stack tracking: %p", heap_stack); - return; }; } else { // FIXME: Figure out a way to not have to instantiate a new stack only to free it if it's @@ -194,9 +196,9 @@ void commit_allocation(heap_recorder *heap_recorder, heap_stack *heap_stack, VAL object_record *object_record = object_record_init(obj, weight, heap_record); if (st_insert(heap_recorder->object_records, (st_data_t) obj, (st_data_t) object_record) != 0) { // Object already tracked? + // FIXME: This seems to happen in practice. Research how/why and handle differently. object_record_free(object_record); rb_raise(rb_eRuntimeError, "Duplicate heap object tracking: %lu", obj); - return; } fprintf(stderr, "Committed allocation of %lu (heap_record=%p, object_record=%p)\n", obj, heap_record, object_record); @@ -209,9 +211,9 @@ void commit_free(heap_recorder *heap_recorder, VALUE obj) { st_data_t key = (st_data_t) obj; object_record *object_record = NULL; if (!st_delete(heap_recorder->object_records, (st_data_t*) &key, (st_data_t*) &object_record)) { - // Object not tracked? + // This should not be possible since we're already checking for tracked objects during the free + // tracepoint but just in case something bugs out, lets error out rb_raise(rb_eRuntimeError, "Committing free of untracked object"); - return; } heap_record *heap_record = object_record->heap_record; @@ -223,6 +225,7 @@ void commit_free(heap_recorder *heap_recorder, VALUE obj) { object_record_free(object_record); } +// NOTE: Must be holding the records_mutex lock static void flush_queue(heap_recorder *heap_recorder) { for (size_t i = 0; i < heap_recorder->queued_samples_len; i++) { sample *queued_sample = &heap_recorder->queued_samples[i]; @@ -241,9 +244,28 @@ static void flush_queue(heap_recorder *heap_recorder) { heap_recorder->queued_samples_len = 0; } +void heap_recorder_flush(heap_recorder *heap_recorder) { + int error = pthread_mutex_lock(&heap_recorder->records_mutex); + if (!error) { + // We were able to get a lock to heap_records so lets flush any pending samples + // that might have been queued previously before adding this new one. + flush_queue(heap_recorder); + } else { + ENFORCE_SUCCESS_GVL(error) + return; + } + + pthread_mutex_unlock(&heap_recorder->records_mutex); +} + +// Safety: This function may get called while Ruby is doing garbage collection. While Ruby is doing garbage collection, +// *NO ALLOCATION* is allowed. This function, and any it calls must never trigger memory or object allocation. +// This includes exceptions and use of ruby_xcalloc (because xcalloc can trigger GC)! static void enqueue_sample(heap_recorder *heap_recorder, sample new_sample) { fprintf(stderr, "Enqueuing sample for %lu (weight=%u free=%i)\n", new_sample.obj, new_sample.weight, new_sample.free); if (heap_recorder->queued_samples_len >= MAX_QUEUE_LIMIT) { + // FIXME: If we're droppping a free sample here, the accuracy of our heap profiles will be affected. + // Should we completely give up or should we trigger a flag that we can then use to add a warning in the UI? fprintf(stderr, "Dropping sample on the floor.\n"); return; } @@ -262,6 +284,9 @@ static void enqueue_allocation(heap_recorder *heap_recorder, heap_stack *heap_st }); } +// Safety: This function may get called while Ruby is doing garbage collection. While Ruby is doing garbage collection, +// *NO ALLOCATION* is allowed. This function, and any it calls must never trigger memory or object allocation. +// This includes exceptions and use of ruby_xcalloc (because xcalloc can trigger GC)! static void enqueue_free(heap_recorder *heap_recorder, VALUE obj) { enqueue_sample(heap_recorder, (sample) { .stack = NULL, @@ -272,38 +297,32 @@ static void enqueue_free(heap_recorder *heap_recorder, VALUE obj) { }); } -void start_heap_allocation_recording(heap_recorder* heap_recorder, VALUE new_obj, unsigned int weight, ddog_CharSlice *class_name) { +void start_heap_allocation_recording(heap_recorder* heap_recorder, VALUE new_obj, unsigned int weight) { fprintf(stderr, "Started recording allocation of %lu with weight %u\n", new_obj, weight); - partial_heap_recording *active_recording = &heap_recorder->active_recording; - active_recording->obj = new_obj; - active_recording->weight = weight; - active_recording->class_name = class_name; + heap_recorder->active_recording = (partial_heap_recording) { + .obj = new_obj, + .weight = weight, + }; } void end_heap_allocation_recording(struct heap_recorder *heap_recorder, ddog_prof_Slice_Location locations) { - // TODO: Make use of active_recording->class_name partial_heap_recording *active_recording = &heap_recorder->active_recording; VALUE new_obj = active_recording->obj; if (!new_obj) { // Recording ended without having been started? rb_raise(rb_eRuntimeError, "Ended a heap recording that was not started"); - return; } int weight = active_recording->weight; // From now on, mark active recording as invalid so we can short-circuit at any point and // not end up with a still active recording. new_obj still holds the object for this recording - active_recording->obj = 0; + active_recording->obj = Qnil; heap_stack *heap_stack = heap_stack_init(locations); int error = pthread_mutex_trylock(&heap_recorder->records_mutex); - if (!error) { - // We were able to get a lock to heap_records so lets flush any pending samples - // that might have been queued previously before adding this new one. - flush_queue(heap_recorder); - } else { + if (error) { // We weren't able to get a lock, so enqueue this sample for later processing // and end early if (error == EBUSY) { @@ -314,14 +333,23 @@ void end_heap_allocation_recording(struct heap_recorder *heap_recorder, ddog_pro return; } - // If we got this far, we got a write lock so we can commit the record + // We were able to get a lock to heap_records so lets flush any pending samples + // that might have been queued previously before adding this new one. + flush_queue(heap_recorder); + + // And then add the new allocation commit_allocation(heap_recorder, heap_stack, new_obj, weight); - pthread_mutex_unlock(&heap_recorder->records_mutex); + ENFORCE_SUCCESS_GVL(pthread_mutex_unlock(&heap_recorder->records_mutex)); } +// Safety: This function can get called while Ruby is doing garbage collection. While Ruby is doing garbage collection, +// *NO ALLOCATION* is allowed. This function, and any it calls must never trigger memory or object allocation. +// This includes exceptions and use of ruby_xcalloc (because xcalloc can trigger GC)! void record_heap_free(heap_recorder *heap_recorder, VALUE obj) { object_record *object_record = NULL; + // lookups require hashing and traversal over hash buckets but should not require doing any allocations + // and should thus be safe to run in GC. st_lookup(heap_recorder->object_records, (st_data_t) obj, (st_data_t*) &object_record); if (object_record == NULL) { @@ -329,7 +357,7 @@ void record_heap_free(heap_recorder *heap_recorder, VALUE obj) { // check if the allocation sample is in the queue for (size_t i = 0; i < heap_recorder->queued_samples_len; i++) { sample *queued_sample = &heap_recorder->queued_samples[i]; - if (queued_sample->obj == obj) { + if (queued_sample->obj == obj && !queued_sample->skip) { queued_sample->skip = true; break; } @@ -339,23 +367,14 @@ void record_heap_free(heap_recorder *heap_recorder, VALUE obj) { return; } - // if we got this far, we freed a tracked object so need to update records! - int error = pthread_mutex_trylock(&heap_recorder->records_mutex); - if (error) { - // We weren't able to get a lock, so enqueue this sample for later processing - // and exit early - if (error == EBUSY) { - enqueue_free(heap_recorder, obj); - } else { - ENFORCE_SUCCESS_GVL(error) - } - return; - } - - // If we got this far, we got a write lock so we can commit the record - commit_free(heap_recorder, obj); - - pthread_mutex_unlock(&heap_recorder->records_mutex); + // If we got this far, we freed a tracked object so we need to update and remove records! + // However, there's a caveat: we're under tight constraints and may be running during a GC where we are forbidden + // to do any more allocations. In certain situations, even calling ruby_xfree on an object_record may trigger + // such allocations (https://github.com/ruby/ruby/blob/ffb1eb37e74334ae85d6bfee07d784a145e23dd8/gc.c#L12599). + // We also do not want to risk triggering reentrant free sampling. Therefore, we take the extremely cautious + // approach of enqueuing this free to be applied at next allocation recording or flush with no explicit heap + // allocations or frees, direct or otherwise, happening during the execution of this method. + enqueue_free(heap_recorder, obj); } // =============== @@ -392,15 +411,15 @@ void object_record_free(object_record *record) { // Heap Frame API // ============== int heap_frame_cmp(heap_frame *f1, heap_frame *f2) { - int cmp = strcmp(f1->name, f2->name); - if (cmp != 0) { - return cmp; + int line_diff = (int) (f1->line - f2->line); + if (line_diff != 0) { + return line_diff; } - cmp = strcmp(f1->filename, f2->filename); + int cmp = strcmp(f1->name, f2->name); if (cmp != 0) { return cmp; } - return (int) (f1->line - f2->line); + return strcmp(f1->filename, f2->filename); } st_index_t string_hash(char *str, st_index_t seed) { @@ -424,14 +443,20 @@ st_index_t char_slice_hash(ddog_CharSlice char_slice, st_index_t seed) { // ============== heap_stack* heap_stack_init(ddog_prof_Slice_Location locations) { heap_stack *stack = ruby_xcalloc(1, sizeof(heap_stack)); - stack->frames = ruby_xcalloc(locations.len, sizeof(heap_frame)); - stack->frames_len = locations.len; + *stack = (heap_stack) { + .frames = ruby_xcalloc(locations.len, sizeof(heap_frame)), + .frames_len = locations.len, + .hash = 0, + .hash_seed = 0, + .hash_calculated = false + }; for (uint64_t i = 0; i < locations.len; i++) { const ddog_prof_Location *location = &locations.ptr[i]; - heap_frame *frame = &stack->frames[i]; - frame->name = ruby_strdup(location->function.name.ptr); - frame->filename = ruby_strdup(location->function.filename.ptr); - frame->line = location->line; + stack->frames[i] = (heap_frame) { + .name = ruby_strndup(location->function.name.ptr, location->function.name.len), + .filename = ruby_strndup(location->function.filename.ptr, location->function.name.len), + .line = location->line, + }; } return stack; } @@ -477,6 +502,11 @@ int heap_stack_cmp_st(st_data_t key1, st_data_t key2) { } st_index_t heap_stack_hash(heap_stack *stack, st_index_t seed) { + if (stack->hash_calculated && stack->hash_seed == seed) { + // fast path, hash is already known + return stack->hash; + } + st_index_t hash = seed; for (uint64_t i = 0; i < stack->frames_len; i++) { hash = heap_frame_hash(&stack->frames[i], hash); diff --git a/ext/ddtrace_profiling_native_extension/heap_recorder.h b/ext/ddtrace_profiling_native_extension/heap_recorder.h index 7b25780c8d4..f45b87492b6 100644 --- a/ext/ddtrace_profiling_native_extension/heap_recorder.h +++ b/ext/ddtrace_profiling_native_extension/heap_recorder.h @@ -1,6 +1,5 @@ #pragma once -#include "stack_recorder.h" #include #include @@ -13,7 +12,8 @@ typedef struct { heap_recorder* heap_recorder_init(void); void heap_recorder_free(heap_recorder *heap_recorder); -void heap_recorder_iterate_stacks(heap_recorder *heap_recorder, void (*for_each_callback)(stack_iteration_data stack_data, void* extra_arg), void *for_each_callback_extra_arg); -void start_heap_allocation_recording(heap_recorder *heap_recorder, VALUE new_obj, unsigned int weight, ddog_CharSlice *class_name); +void heap_recorder_flush(heap_recorder *heap_recorder); +void heap_recorder_iterate_stacks_without_gvl(heap_recorder *heap_recorder, void (*for_each_callback)(stack_iteration_data stack_data, void* extra_arg), void *for_each_callback_extra_arg); +void start_heap_allocation_recording(heap_recorder *heap_recorder, VALUE new_obj, unsigned int weight); void end_heap_allocation_recording(heap_recorder *heap_recorder, ddog_prof_Slice_Location locations); void record_heap_free(heap_recorder *heap_recorder, VALUE obj); diff --git a/ext/ddtrace_profiling_native_extension/ruby_helpers.c b/ext/ddtrace_profiling_native_extension/ruby_helpers.c index b874d1f249a..1d0f95e23dc 100644 --- a/ext/ddtrace_profiling_native_extension/ruby_helpers.c +++ b/ext/ddtrace_profiling_native_extension/ruby_helpers.c @@ -108,3 +108,13 @@ void raise_syserr( grab_gvl_and_raise_syserr(syserr_errno, "Failure returned by '%s' at %s:%d:in `%s'", expression, file, line, function_name); } } + +char* ruby_strndup(const char *str, size_t size) { + char *tmp; + + tmp = xmalloc(size + 1); + memcpy(tmp, str, size); + tmp[size] = '\0'; + + return tmp; +} diff --git a/ext/ddtrace_profiling_native_extension/ruby_helpers.h b/ext/ddtrace_profiling_native_extension/ruby_helpers.h index 84889fb83dd..157edeee9d4 100644 --- a/ext/ddtrace_profiling_native_extension/ruby_helpers.h +++ b/ext/ddtrace_profiling_native_extension/ruby_helpers.h @@ -87,3 +87,11 @@ NORETURN(void raise_syserr( int line, const char *function_name )); + +// Alternative to ruby_strdup that takes a size argument. +// Similar to C's strndup but slightly less smart as size is expected to +// be smaller or equal to the real size of str (minus null termination if it +// exists). +// A new string will be returned with size+1 bytes and last byte set to '\0'. +// The returned string must be freed explicitly. +char* ruby_strndup(const char *str, size_t size); diff --git a/ext/ddtrace_profiling_native_extension/stack_recorder.c b/ext/ddtrace_profiling_native_extension/stack_recorder.c index 0a51b8961e7..2e9d4add4cf 100644 --- a/ext/ddtrace_profiling_native_extension/stack_recorder.c +++ b/ext/ddtrace_profiling_native_extension/stack_recorder.c @@ -278,6 +278,8 @@ static VALUE _native_new(VALUE klass) { VALUE stack_recorder = TypedData_Wrap_Struct(klass, &stack_recorder_typed_data, state); + state->heap_recorder = heap_recorder_init(); + // Note: Don't raise exceptions after this point, since it'll lead to libdatadog memory leaking! initialize_profiles(state, sample_types); @@ -338,8 +340,6 @@ static VALUE _native_initialize(DDTRACE_UNUSED VALUE _self, VALUE recorder_insta struct stack_recorder_state *state; TypedData_Get_Struct(recorder_instance, struct stack_recorder_state, &stack_recorder_typed_data, state); - state->heap_recorder = heap_recorder_init(); - if (cpu_time_enabled == Qtrue && alloc_samples_enabled == Qtrue) return Qtrue; // Nothing to do, this is the default // When some sample types are disabled, we need to reconfigure libdatadog to record less types, @@ -374,7 +374,7 @@ static VALUE _native_initialize(DDTRACE_UNUSED VALUE _self, VALUE recorder_insta state->position_for[ALLOC_SAMPLES_VALUE_ID] = next_disabled_pos++; } - if (alloc_samples_enabled == Qtrue && heap_samples_enabled == Qtrue) { + if (heap_samples_enabled == Qtrue) { enabled_value_types[next_enabled_pos] = (ddog_prof_ValueType) HEAP_SAMPLES_VALUE; state->position_for[HEAP_SAMPLES_VALUE_ID] = next_enabled_pos++; } else { @@ -398,6 +398,11 @@ static VALUE _native_serialize(DDTRACE_UNUSED VALUE _self, VALUE recorder_instan // Need to do this while still holding on to the Global VM Lock; see comments on method for why serializer_set_start_timestamp_for_next_profile(state, finish_timestamp); + // Flush any pending data in the heap recorder prior to doing the iteration during serialization + // This needs to happen while holding on to the Global VM Lock as flushing may do allocations, + // frees and complex hash table rebalancings. + heap_recorder_flush(state->heap_recorder); + // We'll release the Global VM Lock while we're calling serialize, so that the Ruby VM can continue to work while this // is pending struct call_serialize_without_gvl_arguments args = {.state = state, .finish_timestamp = finish_timestamp, .serialize_ran = false}; @@ -460,6 +465,9 @@ void record_sample(VALUE recorder_instance, ddog_prof_Slice_Location locations, metric_values[position_for[ALLOC_SAMPLES_VALUE_ID]] = values.alloc_samples; if (values.alloc_samples != 0) { + // FIXME: Heap sampling is currently being done in 2 parts because the construction of locations is happening + // very late in the allocation-sampling path (which is shared with the cpu sampling path). This can + // be fixed with some refactoring but for now this is a less impactful change. end_heap_allocation_recording(state->heap_recorder, locations); } @@ -480,12 +488,15 @@ void record_sample(VALUE recorder_instance, ddog_prof_Slice_Location locations, } } -void record_obj_allocation(VALUE recorder_instance, VALUE new_object, unsigned int sample_weight, ddog_CharSlice *optional_class_name) { +void track_obj_allocation(VALUE recorder_instance, VALUE new_object, unsigned int sample_weight) { struct stack_recorder_state *state; TypedData_Get_Struct(recorder_instance, struct stack_recorder_state, &stack_recorder_typed_data, state); - start_heap_allocation_recording(state->heap_recorder, new_object, sample_weight, optional_class_name); + start_heap_allocation_recording(state->heap_recorder, new_object, sample_weight); } +// Safety: This function can get called while Ruby is doing garbage collection. While Ruby is doing garbage collection, +// *NO ALLOCATION* is allowed. This function, and any it calls must never trigger memory or object allocation. +// This includes exceptions and use of ruby_xcalloc (because xcalloc can trigger GC)! void record_obj_free(VALUE recorder_instance, VALUE freed_object) { struct stack_recorder_state *state; TypedData_Get_Struct(recorder_instance, struct stack_recorder_state, &stack_recorder_typed_data, state); @@ -512,13 +523,9 @@ typedef struct stack_iteration_context { ddog_prof_Profile *profile; } stack_iteration_context; -static void add_heap_sample_to_active_profile(stack_iteration_data stack_data, void *extra_arg) { +static void add_heap_sample_to_active_profile_without_gvl(stack_iteration_data stack_data, void *extra_arg) { stack_iteration_context *context = (stack_iteration_context*) extra_arg; - // Note: We initialize this array to have ALL_VALUE_TYPES_COUNT but only tell libdatadog to use the first - // state->enabled_values_count values. This simplifies handling disabled value types -- we still put them on the - // array, but in _native_initialize we arrange so their position starts from state->enabled_values_count and thus - // libdatadog doesn't touch them. int64_t metric_values[ALL_VALUE_TYPES_COUNT] = {0}; uint8_t *position_for = context->state->position_for; @@ -534,15 +541,18 @@ static void add_heap_sample_to_active_profile(stack_iteration_data stack_data, v ); if (result.tag == DDOG_PROF_PROFILE_RESULT_ERR) { - rb_raise(rb_eArgError, "Failed to record sample: %"PRIsVALUE, get_error_details_and_drop(&result.err)); + // NOTE: Can't use get_error_details_and_drop since it builds new ruby strings and we're outside the GVL + ddog_CharSlice errorMsg = ddog_Error_message(&result.err); + grab_gvl_and_raise(rb_eArgError, "Failed to record sample: %.*s", (int) errorMsg.len, errorMsg.ptr); } } -static void build_heap_profile(struct stack_recorder_state *state, ddog_prof_Profile *profile) { - stack_iteration_context iteration_context; - iteration_context.state = state; - iteration_context.profile = profile; - heap_recorder_iterate_stacks(state->heap_recorder, add_heap_sample_to_active_profile, (void*) &iteration_context); +static void build_heap_profile_without_gvl(struct stack_recorder_state *state, ddog_prof_Profile *profile) { + stack_iteration_context iteration_context = { + .state = state, + .profile = profile + }; + heap_recorder_iterate_stacks_without_gvl(state->heap_recorder, add_heap_sample_to_active_profile_without_gvl, (void*) &iteration_context); } static void *call_serialize_without_gvl(void *call_args) { @@ -552,7 +562,7 @@ static void *call_serialize_without_gvl(void *call_args) { // Now that we have the inactive profile with all but heap samples, lets fill it with heap data // without needing to race with the active sampler - build_heap_profile(args->state, args->profile); + build_heap_profile_without_gvl(args->state, args->profile); // Note: The profile gets reset by the serialize call args->result = ddog_prof_Profile_serialize(args->profile, &args->finish_timestamp, NULL /* duration_nanos is optional */, NULL /* start_time is optional */); diff --git a/ext/ddtrace_profiling_native_extension/stack_recorder.h b/ext/ddtrace_profiling_native_extension/stack_recorder.h index 2f8acbb3b92..aa492cacfa9 100644 --- a/ext/ddtrace_profiling_native_extension/stack_recorder.h +++ b/ext/ddtrace_profiling_native_extension/stack_recorder.h @@ -22,6 +22,6 @@ typedef struct sample_labels { void record_sample(VALUE recorder_instance, ddog_prof_Slice_Location locations, sample_values values, sample_labels labels); void record_endpoint(VALUE recorder_instance, uint64_t local_root_span_id, ddog_CharSlice endpoint); -void record_obj_allocation(VALUE recorder_instance, VALUE new_object, unsigned int sample_weight, ddog_CharSlice *optional_class_name); +void track_obj_allocation(VALUE recorder_instance, VALUE new_object, unsigned int sample_weight); void record_obj_free(VALUE recorder_instance, VALUE freed_object); VALUE enforce_recorder_instance(VALUE object); diff --git a/lib/datadog/core/configuration/settings.rb b/lib/datadog/core/configuration/settings.rb index 981996997f2..bbb071b8c40 100644 --- a/lib/datadog/core/configuration/settings.rb +++ b/lib/datadog/core/configuration/settings.rb @@ -314,45 +314,53 @@ def initialize(*_) # Can be used to enable/disable the Datadog::Profiling.allocation_count feature. # - # This feature is safe and enabled by default on Ruby 2.x, but has a few caveats on Ruby 3.x. - # - # Caveat 1 (severe): - # On Ruby versions 3.0 (all), 3.1.0 to 3.1.3, and 3.2.0 to 3.2.2 this is disabled by default because it - # can trigger a VM bug that causes a segmentation fault during garbage collection of Ractors - # (https://bugs.ruby-lang.org/issues/18464). We don't recommend using this feature on such Rubies. - # This bug is fixed on Ruby versions 3.1.4, 3.2.3 and 3.3.0. - # - # Caveat 2 (annoyance): - # On all known versions of Ruby 3.x, due to https://bugs.ruby-lang.org/issues/19112, when a ractor gets - # garbage collected, Ruby will disable all active tracepoints, which this feature internally relies on. - # Thus this feature is only usable if you're not using Ractors. - # - # Caveat 3 (severe): - # Ruby 3.2.0 to 3.2.2 have a bug in the newobj tracepoint (https://bugs.ruby-lang.org/issues/19482, - # https://github.com/ruby/ruby/pull/7464) so that's an extra reason why it's not safe on those Rubies. - # This bug is fixed on Ruby versions 3.2.3 and 3.3.0. + # This feature is safe and enabled by default only on Rubies where we haven't identified issues. + # Refer to {Datadog::Profiling::Ext::IS_ALLOC_SAMPLING_SUPPORTED} for the details. # # @default `true` on Ruby 2.x and 3.1.4+, 3.2.3+ and 3.3.0+; `false` for Ruby 3.0 and unpatched Rubies. option :allocation_counting_enabled do |o| o.type :bool - o.env 'DD_PROFILING_EXPERIMENTAL_ALLOCATION_ENABLED' o.default do - RUBY_VERSION.start_with?('2.') || - (RUBY_VERSION.start_with?('3.1.') && RUBY_VERSION >= '3.1.4') || - (RUBY_VERSION.start_with?('3.2.') && RUBY_VERSION >= '3.2.3') || - RUBY_VERSION >= '3.3.' + Profiling::Ext::IS_ALLOCATION_SAMPLING_SUPPORTED end end - # Can be used to enable/disable the Datadog::Profiling.heap_count feature. + # Can be used to enable/disable collection of allocation profiles. # # This feature is alpha and disabled by default - option :heap_counting_enabled do |o| + # + # @default `DD_PROFILING_EXPERIMENTAL_ALLOCATION_ENABLED` environment variable as a boolean, otherwise `false` + option :experimental_allocation_enabled do |o| + o.type :bool + o.env 'DD_PROFILING_EXPERIMENTAL_ALLOCATION_ENABLED' + o.default false + end + + # Can be used to enable/disable the collection of heap profiles. + # + # This feature is alpha and disabled by default + # + # @default `DD_PROFILING_EXPERIMENTAL_HEAP_ENABLED` environment variable as a boolean, otherwise `false` + option :experimental_heap_enabled do |o| o.type :bool o.env 'DD_PROFILING_EXPERIMENTAL_HEAP_ENABLED' o.default false end + # Can be used to configure the allocation sampling rate: a sample will be collected every x allocations. + # + # The lower the value, the more accuracy in allocation and heap tracking but the bigger the overhead. In + # particular, a value of 1 will sample ALL allocations. + # + # This feature is not supported in all Rubies. Refer to {Datadog::Profiling::Ext::IS_ALLOC_SAMPLING_SUPPORTED} + # + # @default `DD_PROFILING_EXPERIMENTAL_ALLOCATION_SAMPLE_RATE` environment variable, otherwise `50`. + option :experimental_allocation_sample_rate do |o| + o.type :int + o.env 'DD_PROFILING_EXPERIMENTAL_ALLOCATION_SAMPLE_RATE' + o.default 50 + end + # Can be used to disable checking which version of `libmysqlclient` is being used by the `mysql2` gem. # # This setting is only used when the `mysql2` gem is installed. diff --git a/lib/datadog/profiling/collectors/cpu_and_wall_time_worker.rb b/lib/datadog/profiling/collectors/cpu_and_wall_time_worker.rb index c1669e28f9c..cb3182aab00 100644 --- a/lib/datadog/profiling/collectors/cpu_and_wall_time_worker.rb +++ b/lib/datadog/profiling/collectors/cpu_and_wall_time_worker.rb @@ -16,14 +16,15 @@ class CpuAndWallTimeWorker def initialize( gc_profiling_enabled:, allocation_counting_enabled:, - heap_counting_enabled:, no_signals_workaround_enabled:, thread_context_collector:, idle_sampling_helper: IdleSamplingHelper.new, # **NOTE**: This should only be used for testing; disabling the dynamic sampling rate will increase the # profiler overhead! dynamic_sampling_rate_enabled: true, - allocation_sample_every: 0 # Currently only for testing; Setting this to > 0 can add a lot of overhead! + allocation_sample_every:, + allocation_profiling_enabled:, + heap_profiling_enabled: ) unless dynamic_sampling_rate_enabled Datadog.logger.warn( @@ -31,30 +32,17 @@ def initialize( ) end - if allocation_counting_enabled && allocation_sample_every > 0 - Datadog.logger.warn( - "Enabled experimental allocation profiling: allocation_sample_every=#{allocation_sample_every}. This is " \ - 'experimental, not recommended, and will increase overhead!' - ) - - if heap_counting_enabled - Datadog.logger.warn( - "Enabled experimental heap profiling: allocation_sample_every=#{allocation_sample_every}. This is " \ - 'experimental, not recommended, and will increase overhead!' - ) - end - end - self.class._native_initialize( self, thread_context_collector, gc_profiling_enabled, idle_sampling_helper, allocation_counting_enabled, - heap_counting_enabled, no_signals_workaround_enabled, dynamic_sampling_rate_enabled, allocation_sample_every, + allocation_profiling_enabled, + heap_profiling_enabled, ) @worker_thread = nil @failure_exception = nil diff --git a/lib/datadog/profiling/component.rb b/lib/datadog/profiling/component.rb index 10ae9e909b5..9f16d3cc163 100644 --- a/lib/datadog/profiling/component.rb +++ b/lib/datadog/profiling/component.rb @@ -41,12 +41,14 @@ def self.build_profiler_component(settings:, agent_settings:, optional_tracer:) no_signals_workaround_enabled = no_signals_workaround_enabled?(settings) timeline_enabled = settings.profiling.advanced.experimental_timeline_enabled + allocation_sample_every = get_allocation_sample_every(settings) + allocation_profiling_enabled = enable_allocation_profiling?(settings, allocation_sample_every) + heap_profiling_enabled = enable_heap_profiling?(settings, allocation_profiling_enabled) recorder = Datadog::Profiling::StackRecorder.new( cpu_time_enabled: RUBY_PLATFORM.include?('linux'), # Only supported on Linux currently - # FIXME: Don't hardcode this - alloc_samples_enabled: true, - heap_samples_enabled: true + alloc_samples_enabled: allocation_profiling_enabled, + heap_samples_enabled: heap_profiling_enabled, ) thread_context_collector = Datadog::Profiling::Collectors::ThreadContext.new( recorder: recorder, @@ -58,11 +60,11 @@ def self.build_profiler_component(settings:, agent_settings:, optional_tracer:) worker = Datadog::Profiling::Collectors::CpuAndWallTimeWorker.new( gc_profiling_enabled: enable_gc_profiling?(settings), allocation_counting_enabled: settings.profiling.advanced.allocation_counting_enabled, - heap_counting_enabled: settings.profiling.advanced.heap_counting_enabled, no_signals_workaround_enabled: no_signals_workaround_enabled, thread_context_collector: thread_context_collector, - # FIXME: Don't hardcode this - allocation_sample_every: 50, + allocation_sample_every: allocation_sample_every, + allocation_profiling_enabled: allocation_profiling_enabled, + heap_profiling_enabled: heap_profiling_enabled, ) internal_metadata = { @@ -114,6 +116,50 @@ def self.build_profiler_component(settings:, agent_settings:, optional_tracer:) end end + private_class_method def self.get_allocation_sample_every(settings) + allocation_sample_rate = settings.profiling.advanced.experimental_allocation_sample_rate + + if allocation_sample_rate <= 0 + raise("Allocation sample rate must be a positive integer. Was #{allocation_sample_rate}") + end + + allocation_sample_rate + end + + private_class_method def self.enable_allocation_profiling?(settings, allocation_sample_every) + allocation_profiling_enabled = settings.profiling.advanced.experimental_allocation_enabled + + if allocation_profiling_enabled + Datadog.logger.warn( + "Enabled experimental allocation profiling: allocation_sample_rate=#{allocation_sample_every}. This is " \ + 'experimental, not recommended, and will increase overhead!' + ) + end + + if allocation_profiling_enabled && !Ext::IS_ALLOCATION_SAMPLING_SUPPORTED + Datadog.logger.warn( + "Current Ruby version (#{RUBY_VERSION}) does not officially support allocation profiling but it was " \ + 'requested. There may be unexpected problems during execution.' + ) + end + + allocation_profiling_enabled + end + + private_class_method def self.enable_heap_profiling?(settings, allocation_profiling_enabled) + heap_profiling_enabled = settings.profiling.advanced.experimental_allocation_enabled + + raise('Heap profiling requires allocation profiling to be enabled') unless allocation_profiling_enabled + + if heap_profiling_enabled + Datadog.logger.warn( + 'Enabled experimental heap profiling. This is experimental, not recommended, and will increase overhead!' + ) + end + + heap_profiling_enabled + end + private_class_method def self.no_signals_workaround_enabled?(settings) # rubocop:disable Metrics/MethodLength setting_value = settings.profiling.advanced.no_signals_workaround_enabled legacy_ruby_that_should_use_workaround = RUBY_VERSION.start_with?('2.3.', '2.4.', '2.5.') diff --git a/lib/datadog/profiling/ext.rb b/lib/datadog/profiling/ext.rb index 2122ec7a95f..b3836e2e6f4 100644 --- a/lib/datadog/profiling/ext.rb +++ b/lib/datadog/profiling/ext.rb @@ -9,6 +9,30 @@ module Ext ENV_AGENTLESS = 'DD_PROFILING_AGENTLESS' ENV_ENDPOINT_COLLECTION_ENABLED = 'DD_PROFILING_ENDPOINT_COLLECTION_ENABLED' + # Allocation sampling is safe and supported on Ruby 2.x, but has a few caveats on Ruby 3.x. + # + # TL;DR: Supported on (2.x, 3.1.4+, 3.2.3+, and 3.3.0+). + # + # Caveat 1 (severe): + # On Ruby versions 3.0 (all), 3.1.0 to 3.1.3, and 3.2.0 to 3.2.2 this is disabled by default because it + # can trigger a VM bug that causes a segmentation fault during garbage collection of Ractors + # (https://bugs.ruby-lang.org/issues/18464). We don't recommend using this feature on such Rubies. + # This bug is fixed on Ruby versions 3.1.4, 3.2.3 and 3.3.0. + # + # Caveat 2 (annoyance): + # On all known versions of Ruby 3.x, due to https://bugs.ruby-lang.org/issues/19112, when a ractor gets + # garbage collected, Ruby will disable all active tracepoints, which this feature internally relies on. + # Thus this feature is only usable if you're not using Ractors. + # + # Caveat 3 (severe): + # Ruby 3.2.0 to 3.2.2 have a bug in the newobj tracepoint (https://bugs.ruby-lang.org/issues/19482, + # https://github.com/ruby/ruby/pull/7464) so that's an extra reason why it's not safe on those Rubies. + # This bug is fixed on Ruby versions 3.2.3 and 3.3.0. + IS_ALLOCATION_SAMPLING_SUPPORTED = RUBY_VERSION.start_with?('2.') || + (RUBY_VERSION.start_with?('3.1.') && RUBY_VERSION >= '3.1.4') || + (RUBY_VERSION.start_with?('3.2.') && RUBY_VERSION >= '3.2.3') || + RUBY_VERSION >= '3.3.' + module Transport module HTTP FORM_FIELD_TAG_ENV = 'env'