From bf9c073e24faa176e271a034fc99019f59d27165 Mon Sep 17 00:00:00 2001 From: Alexandre Fonseca Date: Fri, 15 Dec 2023 18:09:27 +0000 Subject: [PATCH] [PROF-8667] Heap Profiling - Part 5 - Size --- .../heap_recorder.c | 15 +++++--- .../heap_recorder.h | 3 ++ .../ruby_helpers.c | 36 ++++++++++++++++++ .../ruby_helpers.h | 5 +++ .../stack_recorder.c | 17 ++++++--- .../cpu_and_wall_time_worker_spec.rb | 2 + spec/datadog/profiling/stack_recorder_spec.rb | 37 +++++++++++++++---- 7 files changed, 98 insertions(+), 17 deletions(-) diff --git a/ext/ddtrace_profiling_native_extension/heap_recorder.c b/ext/ddtrace_profiling_native_extension/heap_recorder.c index 89fa0d288d6..72033f07d43 100644 --- a/ext/ddtrace_profiling_native_extension/heap_recorder.c +++ b/ext/ddtrace_profiling_native_extension/heap_recorder.c @@ -142,7 +142,7 @@ static heap_record* get_or_create_heap_record(heap_recorder*, ddog_prof_Slice_Lo static void cleanup_heap_record_if_unused(heap_recorder*, heap_record*); static int st_heap_record_entry_free(st_data_t, st_data_t, st_data_t); static int st_object_record_entry_free(st_data_t, st_data_t, st_data_t); -static int st_object_record_entry_free_if_invalid(st_data_t, st_data_t, st_data_t); +static int st_object_record_update(st_data_t, st_data_t, st_data_t); static int st_object_records_iterate(st_data_t, st_data_t, st_data_t); static int update_object_record_entry(st_data_t*, st_data_t*, st_data_t, int); static void commit_allocation(heap_recorder*, heap_record*, long, live_object_data*); @@ -304,7 +304,7 @@ void heap_recorder_flush(heap_recorder *heap_recorder) { ENFORCE_SUCCESS_GVL(pthread_mutex_lock(&heap_recorder->records_mutex)); flush_queue(heap_recorder); - st_foreach(heap_recorder->object_records, st_object_record_entry_free_if_invalid, (st_data_t) heap_recorder); + st_foreach(heap_recorder->object_records, st_object_record_update, (st_data_t) heap_recorder); ENFORCE_SUCCESS_GVL(pthread_mutex_unlock(&heap_recorder->records_mutex)); } @@ -388,13 +388,15 @@ static int st_object_record_entry_free(DDTRACE_UNUSED st_data_t key, st_data_t v return ST_DELETE; } -static int st_object_record_entry_free_if_invalid(st_data_t key, st_data_t value, st_data_t extra_arg) { +static int st_object_record_update(st_data_t key, st_data_t value, st_data_t extra_arg) { long obj_id = (long) key; object_record *record = (object_record*) value; heap_recorder *recorder = (heap_recorder*) extra_arg; - if (!ruby_ref_from_id(LONG2NUM(obj_id), NULL)) { - // Id no longer associated with a valid ref. Need to clean things up! + VALUE ref; + + if (!ruby_ref_from_id(LONG2NUM(obj_id), &ref)) { + // Id no longer associated with a valid ref. Need to delete this object record! // Starting with the associated heap record. There will now be one less tracked object pointing to it heap_record *heap_record = record->heap_record; @@ -407,6 +409,9 @@ static int st_object_record_entry_free_if_invalid(st_data_t key, st_data_t value return ST_DELETE; } + // If we got this far, entry is still valid so lets update its size + record->object_data->size = ruby_obj_memsize_of(ref); + return ST_CONTINUE; } diff --git a/ext/ddtrace_profiling_native_extension/heap_recorder.h b/ext/ddtrace_profiling_native_extension/heap_recorder.h index 99b0b2e05b8..acef7176675 100644 --- a/ext/ddtrace_profiling_native_extension/heap_recorder.h +++ b/ext/ddtrace_profiling_native_extension/heap_recorder.h @@ -27,6 +27,9 @@ typedef struct live_object_data { // could be seen as being representative of 50 objects. unsigned int weight; + // Size of this object on last flush/update. + size_t size; + // The class of the object that we're tracking. char* alloc_class; diff --git a/ext/ddtrace_profiling_native_extension/ruby_helpers.c b/ext/ddtrace_profiling_native_extension/ruby_helpers.c index 0f3dcb48459..e13b6574376 100644 --- a/ext/ddtrace_profiling_native_extension/ruby_helpers.c +++ b/ext/ddtrace_profiling_native_extension/ruby_helpers.c @@ -167,3 +167,39 @@ bool ruby_ref_from_id(VALUE obj_id, VALUE *value) { return true; } + +// Not part of public headers but is externed from Ruby +size_t rb_obj_memsize_of(VALUE obj); + +// Wrapper around rb_obj_memsize_of to avoid hitting crashing paths. +size_t ruby_obj_memsize_of(VALUE obj) { + switch (BUILTIN_TYPE(obj)) { + case T_OBJECT: + case T_MODULE: + case T_CLASS: + case T_ICLASS: + case T_STRING: + case T_ARRAY: + case T_HASH: + case T_REGEXP: + case T_DATA: + case T_MATCH: + case T_FILE: + case T_RATIONAL: + case T_COMPLEX: + case T_IMEMO: + case T_FLOAT: + case T_SYMBOL: + case T_BIGNUM: + // case T_NODE: -> Throws exception in rb_obj_memsize_of + case T_STRUCT: + case T_ZOMBIE: + #ifndef NO_T_MOVED + case T_MOVED: + #endif + return rb_obj_memsize_of(obj); + default: + // Unsupported, return 0 instead of erroring like rb_obj_memsize_of likes doing + return 0; + } +} diff --git a/ext/ddtrace_profiling_native_extension/ruby_helpers.h b/ext/ddtrace_profiling_native_extension/ruby_helpers.h index d0bf3cfcb0e..1ca01652b4a 100644 --- a/ext/ddtrace_profiling_native_extension/ruby_helpers.h +++ b/ext/ddtrace_profiling_native_extension/ruby_helpers.h @@ -106,3 +106,8 @@ char* ruby_strndup(const char *str, size_t size); // writes the ref to the value pointer parameter if !NULL. False if id doesn't // reference a valid object (in which case value is not changed). bool ruby_ref_from_id(size_t id, VALUE *value); + +// Native wrapper to get the approximate/estimated current size of the passed +// object. +size_t ruby_obj_memsize_of(VALUE obj); + diff --git a/ext/ddtrace_profiling_native_extension/stack_recorder.c b/ext/ddtrace_profiling_native_extension/stack_recorder.c index 64282bb8d81..d6945750bb9 100644 --- a/ext/ddtrace_profiling_native_extension/stack_recorder.c +++ b/ext/ddtrace_profiling_native_extension/stack_recorder.c @@ -153,13 +153,15 @@ static VALUE error_symbol = Qnil; // :error in Ruby #define ALLOC_SAMPLES_VALUE_ID 3 #define HEAP_SAMPLES_VALUE {.type_ = VALUE_STRING("heap-live-samples"), .unit = VALUE_STRING("count")} #define HEAP_SAMPLES_VALUE_ID 4 +#define HEAP_SIZE_VALUE {.type_ = VALUE_STRING("heap-live-size"), .unit = VALUE_STRING("bytes")} +#define HEAP_SIZE_VALUE_ID 5 -static const ddog_prof_ValueType all_value_types[] = {CPU_TIME_VALUE, CPU_SAMPLES_VALUE, WALL_TIME_VALUE, ALLOC_SAMPLES_VALUE, HEAP_SAMPLES_VALUE}; +static const ddog_prof_ValueType all_value_types[] = {CPU_TIME_VALUE, CPU_SAMPLES_VALUE, WALL_TIME_VALUE, ALLOC_SAMPLES_VALUE, HEAP_SAMPLES_VALUE, HEAP_SIZE_VALUE}; // This array MUST be kept in sync with all_value_types above and is intended to act as a "hashmap" between VALUE_ID and the position it // occupies on the all_value_types array. // E.g. all_value_types_positions[CPU_TIME_VALUE_ID] => 0, means that CPU_TIME_VALUE was declared at position 0 of all_value_types. -static const uint8_t all_value_types_positions[] = {CPU_TIME_VALUE_ID, CPU_SAMPLES_VALUE_ID, WALL_TIME_VALUE_ID, ALLOC_SAMPLES_VALUE_ID, HEAP_SAMPLES_VALUE_ID}; +static const uint8_t all_value_types_positions[] = {CPU_TIME_VALUE_ID, CPU_SAMPLES_VALUE_ID, WALL_TIME_VALUE_ID, ALLOC_SAMPLES_VALUE_ID, HEAP_SAMPLES_VALUE_ID, HEAP_SIZE_VALUE_ID}; #define ALL_VALUE_TYPES_COUNT (sizeof(all_value_types) / sizeof(ddog_prof_ValueType)) @@ -363,7 +365,7 @@ static VALUE _native_initialize(DDTRACE_UNUSED VALUE _self, VALUE recorder_insta state->enabled_values_count = ALL_VALUE_TYPES_COUNT - (cpu_time_enabled == Qtrue ? 0 : 1) - (alloc_samples_enabled == Qtrue? 0 : 1) - - (heap_samples_enabled == Qtrue ? 0 : 1); + (heap_samples_enabled == Qtrue ? 0 : 2); ddog_prof_ValueType enabled_value_types[ALL_VALUE_TYPES_COUNT]; uint8_t next_enabled_pos = 0; @@ -394,8 +396,11 @@ static VALUE _native_initialize(DDTRACE_UNUSED VALUE _self, VALUE recorder_insta if (heap_samples_enabled == Qtrue) { enabled_value_types[next_enabled_pos] = (ddog_prof_ValueType) HEAP_SAMPLES_VALUE; state->position_for[HEAP_SAMPLES_VALUE_ID] = next_enabled_pos++; + enabled_value_types[next_enabled_pos] = (ddog_prof_ValueType) HEAP_SIZE_VALUE; + state->position_for[HEAP_SIZE_VALUE_ID] = next_enabled_pos++; } else { state->position_for[HEAP_SAMPLES_VALUE_ID] = next_disabled_pos++; + state->position_for[HEAP_SIZE_VALUE_ID] = next_disabled_pos++; // Turns out heap sampling is disabled but we initialized everything in _native_new // assuming all samples were enabled. We need to deinitialize the heap recorder. @@ -554,9 +559,11 @@ static bool add_heap_sample_to_active_profile_without_gvl(heap_recorder_iteratio int64_t metric_values[ALL_VALUE_TYPES_COUNT] = {0}; uint8_t *position_for = context->state->position_for; - metric_values[position_for[HEAP_SAMPLES_VALUE_ID]] = iteration_data.object_data->weight; - const live_object_data *object_data = iteration_data.object_data; + + metric_values[position_for[HEAP_SAMPLES_VALUE_ID]] = object_data->weight; + metric_values[position_for[HEAP_SIZE_VALUE_ID]] = object_data->size * object_data->weight; + ddog_prof_Label labels[2]; size_t num_labels = 2; diff --git a/spec/datadog/profiling/collectors/cpu_and_wall_time_worker_spec.rb b/spec/datadog/profiling/collectors/cpu_and_wall_time_worker_spec.rb index e0b0e054b66..a6ee7ff53ce 100644 --- a/spec/datadog/profiling/collectors/cpu_and_wall_time_worker_spec.rb +++ b/spec/datadog/profiling/collectors/cpu_and_wall_time_worker_spec.rb @@ -594,6 +594,8 @@ .find(&test_struct_heap_sample) expect(relevant_sample.values[:'heap-live-samples']).to eq test_num_allocated_object + # 40 is the size of a basic object and we have test_num_allocated_object of them + expect(relevant_sample.values[:'heap-live-size']).to eq test_num_allocated_object * 40 end end diff --git a/spec/datadog/profiling/stack_recorder_spec.rb b/spec/datadog/profiling/stack_recorder_spec.rb index b6a87c2251c..8754427f398 100644 --- a/spec/datadog/profiling/stack_recorder_spec.rb +++ b/spec/datadog/profiling/stack_recorder_spec.rb @@ -130,6 +130,7 @@ def slot_two_mutex_locked? 'wall-time' => 'nanoseconds', 'alloc-samples' => 'count', 'heap-live-samples' => 'count', + 'heap-live-size' => 'bytes', ) end end @@ -145,6 +146,7 @@ def slot_two_mutex_locked? 'wall-time' => 'nanoseconds', 'alloc-samples' => 'count', 'heap-live-samples' => 'count', + 'heap-live-size' => 'bytes', ) end end @@ -160,16 +162,17 @@ def slot_two_mutex_locked? 'cpu-samples' => 'count', 'wall-time' => 'nanoseconds', 'heap-live-samples' => 'count', + 'heap-live-size' => 'bytes', ) end end - context 'when heap-live-samples is disabled' do + context 'when heap samples are disabled' do let(:cpu_time_enabled) { true } let(:alloc_samples_enabled) { true } let(:heap_samples_enabled) { false } - it 'returns a pprof without the heap-live-samples type' do + it 'returns a pprof without the heap profile types' do expect(sample_types_from(decoded_profile)).to eq( 'cpu-time' => 'nanoseconds', 'cpu-samples' => 'count', @@ -344,8 +347,8 @@ def sample_types_from(decoded_profile) let(:labels) { { 'label_a' => 'value_a', 'label_b' => 'value_b', 'state' => 'unknown' }.to_a } let(:a_string) { 'a beautiful string' } - let(:an_array) { [1..10] } - let(:a_hash) { { 'a' => 1, 'b' => '2', 'c' => true } } + let(:an_array) { (1..100).to_a } + let(:a_hash) { { 'a' => 1, 'b' => '2', 'c' => true, 'd' => Object.new } } let(:samples) { samples_from_pprof(encoded_pprof) } @@ -383,6 +386,7 @@ def sample_allocation(obj, _unused = 0) # We sample from 2 distinct locations expect(samples.size).to eq(2) expect(samples.select { |s| s.values.key?('heap-live-samples') }).to be_empty + expect(samples.select { |s| s.values.key?('heap-live-size') }).to be_empty end end @@ -413,7 +417,7 @@ def sample_allocation(obj, _unused = 0) end end - it 'include the stack, sample counts and labels for the objects still left alive' do + it 'include the stack, counts, sizes and labels for the objects still left alive' do # There should be 3 different allocation class labels so we expect 3 different heap samples expect(heap_samples.size).to eq(3) @@ -422,6 +426,12 @@ def sample_allocation(obj, _unused = 0) expect(string_sample.values).to match( hash_including( :"heap-live-samples" => sample_rate, + :"heap-live-size" => be_between( + # Basic object + 18 UTF-8 characters at minimum. + (40 + 18 * 2) * sample_rate, + # Add some extra padding for extra data (length for instance?). + (40 + 18 * 2 + 10) * sample_rate + ), ) ) expect(string_sample.labels).to match( @@ -436,6 +446,13 @@ def sample_allocation(obj, _unused = 0) expect(array_sample.values).to match( hash_including( :"heap-live-samples" => sample_rate, + # Basic object + 100 FIXNUMs (32 or 64 bits) + :"heap-live-size" => be_between( + # Basic object + 100 FIXNUMs (32 bits) + (40 + 100 * 4) * sample_rate, + # Basic object + 128 FIXNUMs (64 bits and round to nearest power) and eventual extra data + (40 + 128 * 8 + 10) * sample_rate, + ) ) ) expect(array_sample.labels).to match( @@ -447,9 +464,15 @@ def sample_allocation(obj, _unused = 0) hash_sample = heap_samples.find { |s| s.labels[:'allocation class'] == 'Hash' } expect(hash_sample).not_to be nil - expect(array_sample.values).to match( + expect(hash_sample.values).to match( hash_including( :"heap-live-samples" => sample_rate, + :"heap-live-size" => be_between( + # Basic object + 4 table entries + 8 bins + (40 + 4 * 16 + 8) * sample_rate, + # Add extra padding to hash itself as well as each entry and bin + (80 + 4 * 32 + 8 * 2) * sample_rate, + ) ) ) expect(hash_sample.labels).to match( @@ -473,7 +496,7 @@ def sample_allocation(obj, _unused = 0) # We use the same metric_values in all sample calls in before. So we'd expect # the summed values to match `@num_allocations * metric_values[profile-type]` # for each profile-type there in. - expected_summed_values = { :'heap-live-samples' => 0 } + expected_summed_values = { :'heap-live-samples' => 0, :'heap-live-size' => 0, } metric_values.each_pair do |k, v| expected_summed_values[k.to_sym] = v * @num_allocations end