Skip to content

Commit

Permalink
[PROF-8667] Heap Profiling - Part 5 - Size
Browse files Browse the repository at this point in the history
  • Loading branch information
AlexJF committed Dec 15, 2023
1 parent b8baedf commit bf9c073
Show file tree
Hide file tree
Showing 7 changed files with 98 additions and 17 deletions.
15 changes: 10 additions & 5 deletions ext/ddtrace_profiling_native_extension/heap_recorder.c
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ static heap_record* get_or_create_heap_record(heap_recorder*, ddog_prof_Slice_Lo
static void cleanup_heap_record_if_unused(heap_recorder*, heap_record*);
static int st_heap_record_entry_free(st_data_t, st_data_t, st_data_t);
static int st_object_record_entry_free(st_data_t, st_data_t, st_data_t);
static int st_object_record_entry_free_if_invalid(st_data_t, st_data_t, st_data_t);
static int st_object_record_update(st_data_t, st_data_t, st_data_t);
static int st_object_records_iterate(st_data_t, st_data_t, st_data_t);
static int update_object_record_entry(st_data_t*, st_data_t*, st_data_t, int);
static void commit_allocation(heap_recorder*, heap_record*, long, live_object_data*);
Expand Down Expand Up @@ -304,7 +304,7 @@ void heap_recorder_flush(heap_recorder *heap_recorder) {

ENFORCE_SUCCESS_GVL(pthread_mutex_lock(&heap_recorder->records_mutex));
flush_queue(heap_recorder);
st_foreach(heap_recorder->object_records, st_object_record_entry_free_if_invalid, (st_data_t) heap_recorder);
st_foreach(heap_recorder->object_records, st_object_record_update, (st_data_t) heap_recorder);
ENFORCE_SUCCESS_GVL(pthread_mutex_unlock(&heap_recorder->records_mutex));
}

Expand Down Expand Up @@ -388,13 +388,15 @@ static int st_object_record_entry_free(DDTRACE_UNUSED st_data_t key, st_data_t v
return ST_DELETE;
}

static int st_object_record_entry_free_if_invalid(st_data_t key, st_data_t value, st_data_t extra_arg) {
static int st_object_record_update(st_data_t key, st_data_t value, st_data_t extra_arg) {
long obj_id = (long) key;
object_record *record = (object_record*) value;
heap_recorder *recorder = (heap_recorder*) extra_arg;

if (!ruby_ref_from_id(LONG2NUM(obj_id), NULL)) {
// Id no longer associated with a valid ref. Need to clean things up!
VALUE ref;

if (!ruby_ref_from_id(LONG2NUM(obj_id), &ref)) {
// Id no longer associated with a valid ref. Need to delete this object record!

// Starting with the associated heap record. There will now be one less tracked object pointing to it
heap_record *heap_record = record->heap_record;
Expand All @@ -407,6 +409,9 @@ static int st_object_record_entry_free_if_invalid(st_data_t key, st_data_t value
return ST_DELETE;
}

// If we got this far, entry is still valid so lets update its size
record->object_data->size = ruby_obj_memsize_of(ref);

return ST_CONTINUE;
}

Expand Down
3 changes: 3 additions & 0 deletions ext/ddtrace_profiling_native_extension/heap_recorder.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,9 @@ typedef struct live_object_data {
// could be seen as being representative of 50 objects.
unsigned int weight;

// Size of this object on last flush/update.
size_t size;

// The class of the object that we're tracking.
char* alloc_class;

Expand Down
36 changes: 36 additions & 0 deletions ext/ddtrace_profiling_native_extension/ruby_helpers.c
Original file line number Diff line number Diff line change
Expand Up @@ -167,3 +167,39 @@ bool ruby_ref_from_id(VALUE obj_id, VALUE *value) {

return true;
}

// Not part of public headers but is externed from Ruby
size_t rb_obj_memsize_of(VALUE obj);

// Wrapper around rb_obj_memsize_of to avoid hitting crashing paths.
size_t ruby_obj_memsize_of(VALUE obj) {
switch (BUILTIN_TYPE(obj)) {
case T_OBJECT:
case T_MODULE:
case T_CLASS:
case T_ICLASS:
case T_STRING:
case T_ARRAY:
case T_HASH:
case T_REGEXP:
case T_DATA:
case T_MATCH:
case T_FILE:
case T_RATIONAL:
case T_COMPLEX:
case T_IMEMO:
case T_FLOAT:
case T_SYMBOL:
case T_BIGNUM:
// case T_NODE: -> Throws exception in rb_obj_memsize_of
case T_STRUCT:
case T_ZOMBIE:
#ifndef NO_T_MOVED
case T_MOVED:
#endif
return rb_obj_memsize_of(obj);
default:
// Unsupported, return 0 instead of erroring like rb_obj_memsize_of likes doing
return 0;
}
}
5 changes: 5 additions & 0 deletions ext/ddtrace_profiling_native_extension/ruby_helpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -106,3 +106,8 @@ char* ruby_strndup(const char *str, size_t size);
// writes the ref to the value pointer parameter if !NULL. False if id doesn't
// reference a valid object (in which case value is not changed).
bool ruby_ref_from_id(size_t id, VALUE *value);

// Native wrapper to get the approximate/estimated current size of the passed
// object.
size_t ruby_obj_memsize_of(VALUE obj);

17 changes: 12 additions & 5 deletions ext/ddtrace_profiling_native_extension/stack_recorder.c
Original file line number Diff line number Diff line change
Expand Up @@ -153,13 +153,15 @@ static VALUE error_symbol = Qnil; // :error in Ruby
#define ALLOC_SAMPLES_VALUE_ID 3
#define HEAP_SAMPLES_VALUE {.type_ = VALUE_STRING("heap-live-samples"), .unit = VALUE_STRING("count")}
#define HEAP_SAMPLES_VALUE_ID 4
#define HEAP_SIZE_VALUE {.type_ = VALUE_STRING("heap-live-size"), .unit = VALUE_STRING("bytes")}
#define HEAP_SIZE_VALUE_ID 5

static const ddog_prof_ValueType all_value_types[] = {CPU_TIME_VALUE, CPU_SAMPLES_VALUE, WALL_TIME_VALUE, ALLOC_SAMPLES_VALUE, HEAP_SAMPLES_VALUE};
static const ddog_prof_ValueType all_value_types[] = {CPU_TIME_VALUE, CPU_SAMPLES_VALUE, WALL_TIME_VALUE, ALLOC_SAMPLES_VALUE, HEAP_SAMPLES_VALUE, HEAP_SIZE_VALUE};

// This array MUST be kept in sync with all_value_types above and is intended to act as a "hashmap" between VALUE_ID and the position it
// occupies on the all_value_types array.
// E.g. all_value_types_positions[CPU_TIME_VALUE_ID] => 0, means that CPU_TIME_VALUE was declared at position 0 of all_value_types.
static const uint8_t all_value_types_positions[] = {CPU_TIME_VALUE_ID, CPU_SAMPLES_VALUE_ID, WALL_TIME_VALUE_ID, ALLOC_SAMPLES_VALUE_ID, HEAP_SAMPLES_VALUE_ID};
static const uint8_t all_value_types_positions[] = {CPU_TIME_VALUE_ID, CPU_SAMPLES_VALUE_ID, WALL_TIME_VALUE_ID, ALLOC_SAMPLES_VALUE_ID, HEAP_SAMPLES_VALUE_ID, HEAP_SIZE_VALUE_ID};

#define ALL_VALUE_TYPES_COUNT (sizeof(all_value_types) / sizeof(ddog_prof_ValueType))

Expand Down Expand Up @@ -363,7 +365,7 @@ static VALUE _native_initialize(DDTRACE_UNUSED VALUE _self, VALUE recorder_insta
state->enabled_values_count = ALL_VALUE_TYPES_COUNT -
(cpu_time_enabled == Qtrue ? 0 : 1) -
(alloc_samples_enabled == Qtrue? 0 : 1) -
(heap_samples_enabled == Qtrue ? 0 : 1);
(heap_samples_enabled == Qtrue ? 0 : 2);

ddog_prof_ValueType enabled_value_types[ALL_VALUE_TYPES_COUNT];
uint8_t next_enabled_pos = 0;
Expand Down Expand Up @@ -394,8 +396,11 @@ static VALUE _native_initialize(DDTRACE_UNUSED VALUE _self, VALUE recorder_insta
if (heap_samples_enabled == Qtrue) {
enabled_value_types[next_enabled_pos] = (ddog_prof_ValueType) HEAP_SAMPLES_VALUE;
state->position_for[HEAP_SAMPLES_VALUE_ID] = next_enabled_pos++;
enabled_value_types[next_enabled_pos] = (ddog_prof_ValueType) HEAP_SIZE_VALUE;
state->position_for[HEAP_SIZE_VALUE_ID] = next_enabled_pos++;
} else {
state->position_for[HEAP_SAMPLES_VALUE_ID] = next_disabled_pos++;
state->position_for[HEAP_SIZE_VALUE_ID] = next_disabled_pos++;

// Turns out heap sampling is disabled but we initialized everything in _native_new
// assuming all samples were enabled. We need to deinitialize the heap recorder.
Expand Down Expand Up @@ -554,9 +559,11 @@ static bool add_heap_sample_to_active_profile_without_gvl(heap_recorder_iteratio
int64_t metric_values[ALL_VALUE_TYPES_COUNT] = {0};
uint8_t *position_for = context->state->position_for;

metric_values[position_for[HEAP_SAMPLES_VALUE_ID]] = iteration_data.object_data->weight;

const live_object_data *object_data = iteration_data.object_data;

metric_values[position_for[HEAP_SAMPLES_VALUE_ID]] = object_data->weight;
metric_values[position_for[HEAP_SIZE_VALUE_ID]] = object_data->size * object_data->weight;

ddog_prof_Label labels[2];
size_t num_labels = 2;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -594,6 +594,8 @@
.find(&test_struct_heap_sample)

expect(relevant_sample.values[:'heap-live-samples']).to eq test_num_allocated_object
# 40 is the size of a basic object and we have test_num_allocated_object of them
expect(relevant_sample.values[:'heap-live-size']).to eq test_num_allocated_object * 40
end
end

Expand Down
37 changes: 30 additions & 7 deletions spec/datadog/profiling/stack_recorder_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,7 @@ def slot_two_mutex_locked?
'wall-time' => 'nanoseconds',
'alloc-samples' => 'count',
'heap-live-samples' => 'count',
'heap-live-size' => 'bytes',
)
end
end
Expand All @@ -145,6 +146,7 @@ def slot_two_mutex_locked?
'wall-time' => 'nanoseconds',
'alloc-samples' => 'count',
'heap-live-samples' => 'count',
'heap-live-size' => 'bytes',
)
end
end
Expand All @@ -160,16 +162,17 @@ def slot_two_mutex_locked?
'cpu-samples' => 'count',
'wall-time' => 'nanoseconds',
'heap-live-samples' => 'count',
'heap-live-size' => 'bytes',
)
end
end

context 'when heap-live-samples is disabled' do
context 'when heap samples are disabled' do
let(:cpu_time_enabled) { true }
let(:alloc_samples_enabled) { true }
let(:heap_samples_enabled) { false }

it 'returns a pprof without the heap-live-samples type' do
it 'returns a pprof without the heap profile types' do
expect(sample_types_from(decoded_profile)).to eq(
'cpu-time' => 'nanoseconds',
'cpu-samples' => 'count',
Expand Down Expand Up @@ -344,8 +347,8 @@ def sample_types_from(decoded_profile)
let(:labels) { { 'label_a' => 'value_a', 'label_b' => 'value_b', 'state' => 'unknown' }.to_a }

let(:a_string) { 'a beautiful string' }
let(:an_array) { [1..10] }
let(:a_hash) { { 'a' => 1, 'b' => '2', 'c' => true } }
let(:an_array) { (1..100).to_a }
let(:a_hash) { { 'a' => 1, 'b' => '2', 'c' => true, 'd' => Object.new } }

let(:samples) { samples_from_pprof(encoded_pprof) }

Expand Down Expand Up @@ -383,6 +386,7 @@ def sample_allocation(obj, _unused = 0)
# We sample from 2 distinct locations
expect(samples.size).to eq(2)
expect(samples.select { |s| s.values.key?('heap-live-samples') }).to be_empty
expect(samples.select { |s| s.values.key?('heap-live-size') }).to be_empty
end
end

Expand Down Expand Up @@ -413,7 +417,7 @@ def sample_allocation(obj, _unused = 0)
end
end

it 'include the stack, sample counts and labels for the objects still left alive' do
it 'include the stack, counts, sizes and labels for the objects still left alive' do
# There should be 3 different allocation class labels so we expect 3 different heap samples
expect(heap_samples.size).to eq(3)

Expand All @@ -422,6 +426,12 @@ def sample_allocation(obj, _unused = 0)
expect(string_sample.values).to match(
hash_including(
:"heap-live-samples" => sample_rate,
:"heap-live-size" => be_between(
# Basic object + 18 UTF-8 characters at minimum.
(40 + 18 * 2) * sample_rate,
# Add some extra padding for extra data (length for instance?).
(40 + 18 * 2 + 10) * sample_rate
),
)
)
expect(string_sample.labels).to match(
Expand All @@ -436,6 +446,13 @@ def sample_allocation(obj, _unused = 0)
expect(array_sample.values).to match(
hash_including(
:"heap-live-samples" => sample_rate,
# Basic object + 100 FIXNUMs (32 or 64 bits)
:"heap-live-size" => be_between(
# Basic object + 100 FIXNUMs (32 bits)
(40 + 100 * 4) * sample_rate,
# Basic object + 128 FIXNUMs (64 bits and round to nearest power) and eventual extra data
(40 + 128 * 8 + 10) * sample_rate,
)
)
)
expect(array_sample.labels).to match(
Expand All @@ -447,9 +464,15 @@ def sample_allocation(obj, _unused = 0)

hash_sample = heap_samples.find { |s| s.labels[:'allocation class'] == 'Hash' }
expect(hash_sample).not_to be nil
expect(array_sample.values).to match(
expect(hash_sample.values).to match(
hash_including(
:"heap-live-samples" => sample_rate,
:"heap-live-size" => be_between(
# Basic object + 4 table entries + 8 bins
(40 + 4 * 16 + 8) * sample_rate,
# Add extra padding to hash itself as well as each entry and bin
(80 + 4 * 32 + 8 * 2) * sample_rate,
)
)
)
expect(hash_sample.labels).to match(
Expand All @@ -473,7 +496,7 @@ def sample_allocation(obj, _unused = 0)
# We use the same metric_values in all sample calls in before. So we'd expect
# the summed values to match `@num_allocations * metric_values[profile-type]`
# for each profile-type there in.
expected_summed_values = { :'heap-live-samples' => 0 }
expected_summed_values = { :'heap-live-samples' => 0, :'heap-live-size' => 0, }
metric_values.each_pair do |k, v|
expected_summed_values[k.to_sym] = v * @num_allocations
end
Expand Down

0 comments on commit bf9c073

Please sign in to comment.