diff --git a/ext/ddtrace_profiling_native_extension/collectors_thread_context.c b/ext/ddtrace_profiling_native_extension/collectors_thread_context.c index a86ce1dba9d..cbfb06c3e58 100644 --- a/ext/ddtrace_profiling_native_extension/collectors_thread_context.c +++ b/ext/ddtrace_profiling_native_extension/collectors_thread_context.c @@ -1244,7 +1244,7 @@ void thread_context_collector_sample_allocation(VALUE self_instance, unsigned in } } - track_object(state->recorder_instance, new_object, sample_weight); + track_object(state->recorder_instance, new_object, sample_weight, optional_class_name); trigger_sample_for_thread( state, diff --git a/ext/ddtrace_profiling_native_extension/heap_recorder.c b/ext/ddtrace_profiling_native_extension/heap_recorder.c index 6bcc2bc60de..1545f165200 100644 --- a/ext/ddtrace_profiling_native_extension/heap_recorder.c +++ b/ext/ddtrace_profiling_native_extension/heap_recorder.c @@ -4,6 +4,7 @@ #include "ruby_helpers.h" #include #include "collectors_stack.h" +#include "libdatadog_helpers.h" // A compact representation of a stacktrace frame for a heap allocation. typedef struct { @@ -92,15 +93,6 @@ typedef struct { static object_record* object_record_new(long, heap_record*, live_object_data); static void object_record_free(object_record*); -// Allows storing data passed to ::start_heap_allocation_recording to make it accessible to -// ::end_heap_allocation_recording. -// -// obj_id != 0 flags this struct as holding a valid partial heap recording. -typedef struct { - long obj_id; - live_object_data object_data; -} partial_heap_recording; - struct heap_recorder { // Map[key: heap_record_key*, record: heap_record*] // NOTE: We always use heap_record_key.type == HEAP_STACK for storage but support lookups @@ -125,7 +117,7 @@ struct heap_recorder { st_table *object_records_snapshot; // Data for a heap recording that was started but not yet ended - partial_heap_recording active_recording; + object_record *partial_object_record; // Reusable location array, implementing a flyweight pattern for things like iteration. ddog_prof_Location *reusable_locations; @@ -138,7 +130,7 @@ static int st_object_record_entry_free_if_invalid(st_data_t, st_data_t, st_data_ static int st_object_records_iterate(st_data_t, st_data_t, st_data_t); static int st_object_records_debug(st_data_t key, st_data_t value, st_data_t extra); static int update_object_record_entry(st_data_t*, st_data_t*, st_data_t, int); -static void commit_allocation(heap_recorder*, heap_record*, long, live_object_data); +static void commit_allocation(heap_recorder*, heap_record*, object_record*); // ========================== // Heap Recorder External API @@ -156,11 +148,7 @@ heap_recorder* heap_recorder_new(void) { recorder->object_records = st_init_numtable(); recorder->object_records_snapshot = NULL; recorder->reusable_locations = ruby_xcalloc(MAX_FRAMES_LIMIT, sizeof(ddog_prof_Location)); - recorder->active_recording = (partial_heap_recording) { - .obj_id = 0, // 0 is actually the obj_id of false, but we'll never track that one in heap so we use - // it as invalid/unset value. - .object_data = {0}, - }; + recorder->partial_object_record = NULL; return recorder; } @@ -176,12 +164,21 @@ void heap_recorder_free(heap_recorder *heap_recorder) { heap_recorder_finish_iteration(heap_recorder); } + // Clean-up all object records st_foreach(heap_recorder->object_records, st_object_record_entry_free, 0); st_free_table(heap_recorder->object_records); + // Clean-up all heap records (this includes those only referred to by queued_samples) st_foreach(heap_recorder->heap_records, st_heap_record_entry_free, 0); st_free_table(heap_recorder->heap_records); + if (heap_recorder->partial_object_record != NULL) { + // If there's a partial object record, clean it up as well + object_record_free(heap_recorder->partial_object_record); + } + + ruby_xfree(heap_recorder->reusable_locations); + ruby_xfree(heap_recorder); } @@ -207,7 +204,7 @@ void heap_recorder_after_fork(heap_recorder *heap_recorder) { // nothing for us to do here. } -void start_heap_allocation_recording(heap_recorder *heap_recorder, VALUE new_obj, unsigned int weight) { +void start_heap_allocation_recording(heap_recorder *heap_recorder, VALUE new_obj, unsigned int weight, ddog_CharSlice *alloc_class) { if (heap_recorder == NULL) { return; } @@ -217,12 +214,15 @@ void start_heap_allocation_recording(heap_recorder *heap_recorder, VALUE new_obj rb_raise(rb_eRuntimeError, "Detected a bignum object id. These are not supported by heap profiling."); } - heap_recorder->active_recording = (partial_heap_recording) { - .obj_id = FIX2LONG(ruby_obj_id), - .object_data = (live_object_data) { - .weight = weight, - }, - }; + if (heap_recorder->partial_object_record != NULL) { + rb_raise(rb_eRuntimeError, "Detected consecutive heap allocation recording starts without end."); + } + + heap_recorder->partial_object_record = object_record_new(FIX2LONG(ruby_obj_id), NULL, (live_object_data) { + .weight = weight, + .class = alloc_class != NULL ? string_from_char_slice(*alloc_class) : NULL, + .alloc_gen = rb_gc_count(), + }); } void end_heap_allocation_recording(struct heap_recorder *heap_recorder, ddog_prof_Slice_Location locations) { @@ -230,22 +230,21 @@ void end_heap_allocation_recording(struct heap_recorder *heap_recorder, ddog_pro return; } - partial_heap_recording *active_recording = &heap_recorder->active_recording; + object_record *partial_object_record = heap_recorder->partial_object_record; - long obj_id = active_recording->obj_id; - if (obj_id == 0) { + if (partial_object_record == NULL) { // Recording ended without having been started? rb_raise(rb_eRuntimeError, "Ended a heap recording that was not started"); } // From now on, mark active recording as invalid so we can short-circuit at any point and - // not end up with a still active recording. new_obj still holds the object for this recording - active_recording->obj_id = 0; + // not end up with a still active recording. partial_object_record still holds the object for this recording + heap_recorder->partial_object_record = NULL; heap_record *heap_record = get_or_create_heap_record(heap_recorder, locations); // And then commit the new allocation. - commit_allocation(heap_recorder, heap_record, obj_id, active_recording->object_data); + commit_allocation(heap_recorder, heap_record, partial_object_record); } void heap_recorder_prepare_iteration(heap_recorder *heap_recorder) { @@ -450,13 +449,16 @@ static int update_object_record_entry(DDTRACE_UNUSED st_data_t *key, st_data_t * return ST_CONTINUE; } -static void commit_allocation(heap_recorder *heap_recorder, heap_record *heap_record, long obj_id, live_object_data object_data) { +static void commit_allocation(heap_recorder *heap_recorder, heap_record *heap_record, object_record *object_record) { + // Link the object record with the corresponding heap record. + object_record->heap_record = heap_record; + // Update object_records object_record_update_data update_data = (object_record_update_data) { .heap_recorder = heap_recorder, - .new_object_record = object_record_new(obj_id, heap_record, object_data), + .new_object_record = object_record, }; - if (!st_update(heap_recorder->object_records, obj_id, update_object_record_entry, (st_data_t) &update_data)) { + if (!st_update(heap_recorder->object_records, object_record->obj_id, update_object_record_entry, (st_data_t) &update_data)) { // We are sure there was no previous record for this id so let the heap record know there now is one // extra record associated with this stack. if (heap_record->num_tracked_objects == UINT32_MAX) { @@ -563,6 +565,9 @@ object_record* object_record_new(long obj_id, heap_record *heap_record, live_obj } void object_record_free(object_record *record) { + if (record->object_data.class != NULL) { + ruby_xfree(record->object_data.class); + } ruby_xfree(record); } @@ -633,8 +638,8 @@ heap_stack* heap_stack_new(ddog_prof_Slice_Location locations) { for (uint16_t i = 0; i < stack->frames_len; i++) { const ddog_prof_Location *location = &locations.ptr[i]; stack->frames[i] = (heap_frame) { - .name = ruby_strndup(location->function.name.ptr, location->function.name.len), - .filename = ruby_strndup(location->function.filename.ptr, location->function.filename.len), + .name = string_from_char_slice(location->function.name), + .filename = string_from_char_slice(location->function.filename), // ddog_prof_Location is a int64_t. We don't expect to have to profile files with more than // 2M lines so this cast should be fairly safe? .line = (int32_t) location->line, diff --git a/ext/ddtrace_profiling_native_extension/heap_recorder.h b/ext/ddtrace_profiling_native_extension/heap_recorder.h index 27918c0f2cd..768b24522fd 100644 --- a/ext/ddtrace_profiling_native_extension/heap_recorder.h +++ b/ext/ddtrace_profiling_native_extension/heap_recorder.h @@ -26,6 +26,15 @@ typedef struct live_object_data { // Example: If we were sampling every 50 objects, then each sampled object // could be seen as being representative of 50 objects. unsigned int weight; + + // The class of the object that we're tracking. + // NOTE: This is optional and will be set to NULL if not set. + char* class; + + // The GC allocation gen in which we saw this object being allocated. + // + // This enables us to calculate the age of this object in terms of GC executions. + size_t alloc_gen; } live_object_data; // Data that is made available to iterators of heap recorder data for each live object @@ -56,7 +65,7 @@ void heap_recorder_after_fork(heap_recorder *heap_recorder); // The sampling weight of this object. // // WARN: It needs to be paired with a ::end_heap_allocation_recording call. -void start_heap_allocation_recording(heap_recorder *heap_recorder, VALUE new_obj, unsigned int weight); +void start_heap_allocation_recording(heap_recorder *heap_recorder, VALUE new_obj, unsigned int weight, ddog_CharSlice *alloc_class); // End a previously started heap allocation recording on the heap recorder. // diff --git a/ext/ddtrace_profiling_native_extension/libdatadog_helpers.h b/ext/ddtrace_profiling_native_extension/libdatadog_helpers.h index 7d4593fa550..307d4d0fb3e 100644 --- a/ext/ddtrace_profiling_native_extension/libdatadog_helpers.h +++ b/ext/ddtrace_profiling_native_extension/libdatadog_helpers.h @@ -34,3 +34,9 @@ size_t read_ddogerr_string_and_drop(ddog_Error *error, char *string, size_t capa // ruby_value_type that Ruby uses so that we can also use this for debugging. const char *ruby_value_type_to_string(enum ruby_value_type type); ddog_CharSlice ruby_value_type_to_char_slice(enum ruby_value_type type); + +// Returns a dynamically allocated string from the provided char slice. +// WARN: The returned string must be explicitly freed with ruby_xfree. +inline static char* string_from_char_slice(ddog_CharSlice slice) { + return ruby_strndup(slice.ptr, slice.len); +} diff --git a/ext/ddtrace_profiling_native_extension/stack_recorder.c b/ext/ddtrace_profiling_native_extension/stack_recorder.c index a3398c343f5..c77b93ebce9 100644 --- a/ext/ddtrace_profiling_native_extension/stack_recorder.c +++ b/ext/ddtrace_profiling_native_extension/stack_recorder.c @@ -230,7 +230,7 @@ static VALUE _native_reset_after_fork(DDTRACE_UNUSED VALUE self, VALUE recorder_ static void serializer_set_start_timestamp_for_next_profile(struct stack_recorder_state *state, ddog_Timespec start_time); static VALUE _native_record_endpoint(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance, VALUE local_root_span_id, VALUE endpoint); static void reset_profile(ddog_prof_Profile *profile, ddog_Timespec *start_time /* Can be null */); -static VALUE _native_track_object(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance, VALUE new_obj, VALUE weight); +static VALUE _native_track_object(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance, VALUE new_obj, VALUE weight, VALUE alloc_class); static VALUE _native_check_heap_hashes(DDTRACE_UNUSED VALUE _self, VALUE locations); static VALUE _native_start_fake_slow_heap_serialization(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance); static VALUE _native_end_fake_slow_heap_serialization(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance); @@ -259,7 +259,7 @@ void stack_recorder_init(VALUE profiling_module) { rb_define_singleton_method(testing_module, "_native_slot_one_mutex_locked?", _native_is_slot_one_mutex_locked, 1); rb_define_singleton_method(testing_module, "_native_slot_two_mutex_locked?", _native_is_slot_two_mutex_locked, 1); rb_define_singleton_method(testing_module, "_native_record_endpoint", _native_record_endpoint, 3); - rb_define_singleton_method(testing_module, "_native_track_object", _native_track_object, 3); + rb_define_singleton_method(testing_module, "_native_track_object", _native_track_object, 4); rb_define_singleton_method(testing_module, "_native_check_heap_hashes", _native_check_heap_hashes, 1); rb_define_singleton_method(testing_module, "_native_start_fake_slow_heap_serialization", _native_start_fake_slow_heap_serialization, 1); @@ -546,13 +546,13 @@ void record_sample(VALUE recorder_instance, ddog_prof_Slice_Location locations, } } -void track_object(VALUE recorder_instance, VALUE new_object, unsigned int sample_weight) { +void track_object(VALUE recorder_instance, VALUE new_object, unsigned int sample_weight, ddog_CharSlice *alloc_class) { struct stack_recorder_state *state; TypedData_Get_Struct(recorder_instance, struct stack_recorder_state, &stack_recorder_typed_data, state); // FIXME: Heap sampling currently has to be done in 2 parts because the construction of locations is happening // very late in the allocation-sampling path (which is shared with the cpu sampling path). This can // be fixed with some refactoring but for now this leads to a less impactful change. - start_heap_allocation_recording(state->heap_recorder, new_object, sample_weight); + start_heap_allocation_recording(state->heap_recorder, new_object, sample_weight, alloc_class); } void record_endpoint(VALUE recorder_instance, uint64_t local_root_span_id, ddog_CharSlice endpoint) { @@ -577,24 +577,50 @@ void record_endpoint(VALUE recorder_instance, uint64_t local_root_span_id, ddog_ typedef struct heap_recorder_iteration_context { struct stack_recorder_state *state; ddog_prof_Profile *profile; + bool error; char error_msg[MAX_LEN_HEAP_ITERATION_ERROR_MSG]; + + size_t profile_gen; } heap_recorder_iteration_context; static bool add_heap_sample_to_active_profile_without_gvl(heap_recorder_iteration_data iteration_data, void *extra_arg) { heap_recorder_iteration_context *context = (heap_recorder_iteration_context*) extra_arg; + live_object_data *object_data = &iteration_data.object_data; + int64_t metric_values[ALL_VALUE_TYPES_COUNT] = {0}; uint8_t *position_for = context->state->position_for; - metric_values[position_for[HEAP_SAMPLES_VALUE_ID]] = iteration_data.object_data.weight; + metric_values[position_for[HEAP_SAMPLES_VALUE_ID]] = object_data->weight; + + ddog_prof_Label labels[2]; + size_t label_offset = 0; + + if (object_data->class != NULL) { + labels[label_offset++] = (ddog_prof_Label) { + .key = DDOG_CHARSLICE_C("allocation class"), + .str = (ddog_CharSlice) { + .ptr = object_data->class, + .len = strlen(object_data->class), + }, + .num = 0, // This shouldn't be needed but the tracer-2.7 docker image ships a buggy gcc that complains about this + }; + } + labels[label_offset++] = (ddog_prof_Label) { + .key = DDOG_CHARSLICE_C("gc gen age"), + .num = context->profile_gen - object_data->alloc_gen, + }; ddog_prof_Profile_Result result = ddog_prof_Profile_add( context->profile, (ddog_prof_Sample) { .locations = iteration_data.locations, .values = (ddog_Slice_I64) {.ptr = metric_values, .len = context->state->enabled_values_count}, - .labels = {0}, + .labels = (ddog_prof_Slice_Label) { + .ptr = labels, + .len = label_offset, + } }, 0 ); @@ -616,6 +642,7 @@ static void build_heap_profile_without_gvl(struct stack_recorder_state *state, d .profile = profile, .error = false, .error_msg = {0}, + .profile_gen = rb_gc_count(), }; bool iterated = heap_recorder_for_each_live_object(state->heap_recorder, add_heap_sample_to_active_profile_without_gvl, (void*) &iteration_context); // We wait until we're out of the iteration to grab the gvl and raise. This is important because during @@ -779,9 +806,10 @@ static VALUE _native_record_endpoint(DDTRACE_UNUSED VALUE _self, VALUE recorder_ return Qtrue; } -static VALUE _native_track_object(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance, VALUE new_obj, VALUE weight) { +static VALUE _native_track_object(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance, VALUE new_obj, VALUE weight, VALUE alloc_class) { ENFORCE_TYPE(weight, T_FIXNUM); - track_object(recorder_instance, new_obj, NUM2UINT(weight)); + ddog_CharSlice alloc_class_slice = char_slice_from_ruby_string(alloc_class); + track_object(recorder_instance, new_obj, NUM2UINT(weight), &alloc_class_slice); return Qtrue; } diff --git a/ext/ddtrace_profiling_native_extension/stack_recorder.h b/ext/ddtrace_profiling_native_extension/stack_recorder.h index 4a9f2ead7b2..cfb93f7a25f 100644 --- a/ext/ddtrace_profiling_native_extension/stack_recorder.h +++ b/ext/ddtrace_profiling_native_extension/stack_recorder.h @@ -23,5 +23,5 @@ typedef struct sample_labels { void record_sample(VALUE recorder_instance, ddog_prof_Slice_Location locations, sample_values values, sample_labels labels); void record_endpoint(VALUE recorder_instance, uint64_t local_root_span_id, ddog_CharSlice endpoint); -void track_object(VALUE recorder_instance, VALUE new_object, unsigned int sample_weight); +void track_object(VALUE recorder_instance, VALUE new_object, unsigned int sample_weight, ddog_CharSlice *alloc_class); VALUE enforce_recorder_instance(VALUE object); diff --git a/spec/datadog/profiling/collectors/cpu_and_wall_time_worker_spec.rb b/spec/datadog/profiling/collectors/cpu_and_wall_time_worker_spec.rb index 78ce334d800..b4e18d9fa98 100644 --- a/spec/datadog/profiling/collectors/cpu_and_wall_time_worker_spec.rb +++ b/spec/datadog/profiling/collectors/cpu_and_wall_time_worker_spec.rb @@ -572,10 +572,6 @@ it 'records live heap objects' do stub_const('CpuAndWallTimeWorkerSpec::TestStruct', Struct.new(:foo)) - # Warm this up to remove VM-related allocations - # TODO: Remove this when we can match on allocation class - CpuAndWallTimeWorkerSpec::TestStruct.new - start live_objects = Array.new(test_num_allocated_object) @@ -587,7 +583,10 @@ test_struct_heap_sample = lambda { |sample| first_frame = sample.locations.first - first_frame.lineno == allocation_line && first_frame.path == __FILE__ && first_frame.base_label == 'new' && + first_frame.lineno == allocation_line && + first_frame.path == __FILE__ && + first_frame.base_label == 'new' && + sample.labels[:'allocation class'] == 'CpuAndWallTimeWorkerSpec::TestStruct' && (sample.values[:'heap-live-samples'] || 0) > 0 } diff --git a/spec/datadog/profiling/stack_recorder_spec.rb b/spec/datadog/profiling/stack_recorder_spec.rb index 116e06d01c8..f09290ec517 100644 --- a/spec/datadog/profiling/stack_recorder_spec.rb +++ b/spec/datadog/profiling/stack_recorder_spec.rb @@ -355,7 +355,7 @@ def sample_types_from(decoded_profile) def sample_allocation(obj) # Heap sampling currently requires this 2-step process to first pass data about the allocated object... - described_class::Testing._native_track_object(stack_recorder, obj, sample_rate) + described_class::Testing._native_track_object(stack_recorder, obj, sample_rate, obj.class.name) Datadog::Profiling::Collectors::Stack::Testing ._native_sample(Thread.current, stack_recorder, metric_values, labels, numeric_labels, 400, false) end @@ -372,6 +372,7 @@ def sample_allocation(obj) sample_allocation(obj) # rubocop:disable Style/IdenticalConditionalBranches end @num_allocations += 1 + GC.start # Force each allocation to be done in its own GC epoch for interesting GC age labels end allocations.clear # The literals in the previous array are now dangling @@ -417,14 +418,73 @@ def sample_allocation(obj) samples.select { |s| s.values[:'heap-live-samples'] == 0 } end - it 'include the stack and sample counts for the objects still left alive' do - # We sample from 2 distinct locations - expect(heap_samples.size).to eq(2) + it 'include the stack, sample counts and labels for the objects still left alive' do + # There should be 3 different allocation class labels so we expect 3 different heap samples + expect(heap_samples.size).to eq(3) - sum_heap_samples = 0 - heap_samples.each { |s| sum_heap_samples += s.values[:'heap-live-samples'] } + string_sample = heap_samples.find { |s| s.labels[:'allocation class'] == 'String' } + expect(string_sample.values).to match( + hash_including( + :"heap-live-samples" => sample_rate, + ) + ) + expect(string_sample.labels).to match( + { + :'allocation class' => 'String', + :'gc gen age' => be_a(Integer).and(be >= 0), + } + ) + + array_sample = heap_samples.find { |s| s.labels[:'allocation class'] == 'Array' } + expect(array_sample.values).to match( + hash_including( + :"heap-live-samples" => sample_rate, + ) + ) + expect(array_sample.labels).to match( + { + :'allocation class' => 'Array', + :'gc gen age' => be_a(Integer).and(be >= 0), + } + ) + + hash_sample = heap_samples.find { |s| s.labels[:'allocation class'] == 'Hash' } + expect(hash_sample.values).to match( + hash_including( + :"heap-live-samples" => sample_rate, + ) + ) + expect(hash_sample.labels).to match( + { + :'allocation class' => 'Hash', + :'gc gen age' => be_a(Integer).and(be >= 0), + } + ) + end - expect(sum_heap_samples).to(eq([a_string, an_array, a_hash].size * sample_rate)) + it 'include accurate object ages' do + string_sample = heap_samples.find { |s| s.labels[:'allocation class'] == 'String' } + string_age = string_sample.labels[:'gc gen age'] + + array_sample = heap_samples.find { |s| s.labels[:'allocation class'] == 'Array' } + array_age = array_sample.labels[:'gc gen age'] + + hash_sample = heap_samples.find { |s| s.labels[:'allocation class'] == 'Hash' } + hash_age = hash_sample.labels[:'gc gen age'] + + unique_sorted_ages = [string_age, array_age, hash_age].uniq.sort + # Expect all ages to be different and to be in the reverse order of allocation + # Last to allocate => Lower age + expect(unique_sorted_ages).to match([hash_age, array_age, string_age]) + + # Validate that the age of the newest object makes sense. + # * We force a GC after each allocation and the hash sample should correspond to + # the 5th allocation in 7 (which means we expect at least 3 GC after all allocations + # are done) + # * We forced 2 extra GC at the end of our before (+2) + # * This test isn't memory intensive otherwise so lets give us an extra margin of 1 GC to account for any + # GC out of our control + expect(hash_age).to be_between(5, 6) end it 'keeps on reporting accurate samples for other profile types' do