Skip to content

Commit

Permalink
[PROF-8667] Heap Profiling - Part 4 - Labels
Browse files Browse the repository at this point in the history
[PROF-8667] Rework queue system into a snapshot system.

[PROF-8667] Address some comments

[PROF-8667] Address comments
  • Loading branch information
AlexJF committed Dec 21, 2023
1 parent d4b2b3e commit c20d042
Show file tree
Hide file tree
Showing 8 changed files with 188 additions and 61 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -1244,7 +1244,7 @@ void thread_context_collector_sample_allocation(VALUE self_instance, unsigned in
}
}

track_object(state->recorder_instance, new_object, sample_weight);
track_object(state->recorder_instance, new_object, sample_weight, optional_class_name);

trigger_sample_for_thread(
state,
Expand Down
80 changes: 45 additions & 35 deletions ext/ddtrace_profiling_native_extension/heap_recorder.c
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#include "ruby_helpers.h"
#include <errno.h>
#include "collectors_stack.h"
#include "libdatadog_helpers.h"

// A compact representation of a stacktrace frame for a heap allocation.
typedef struct {
Expand Down Expand Up @@ -92,15 +93,6 @@ typedef struct {
static object_record* object_record_new(long, heap_record*, live_object_data);
static void object_record_free(object_record*);

// Allows storing data passed to ::start_heap_allocation_recording to make it accessible to
// ::end_heap_allocation_recording.
//
// obj_id != 0 flags this struct as holding a valid partial heap recording.
typedef struct {
long obj_id;
live_object_data object_data;
} partial_heap_recording;

struct heap_recorder {
// Map[key: heap_record_key*, record: heap_record*]
// NOTE: We always use heap_record_key.type == HEAP_STACK for storage but support lookups
Expand All @@ -125,7 +117,7 @@ struct heap_recorder {
st_table *object_records_snapshot;

// Data for a heap recording that was started but not yet ended
partial_heap_recording active_recording;
object_record *partial_object_record;

// Reusable location array, implementing a flyweight pattern for things like iteration.
ddog_prof_Location *reusable_locations;
Expand All @@ -138,7 +130,7 @@ static int st_object_record_entry_free_if_invalid(st_data_t, st_data_t, st_data_
static int st_object_records_iterate(st_data_t, st_data_t, st_data_t);
static int st_object_records_debug(st_data_t key, st_data_t value, st_data_t extra);
static int update_object_record_entry(st_data_t*, st_data_t*, st_data_t, int);
static void commit_allocation(heap_recorder*, heap_record*, long, live_object_data);
static void commit_allocation(heap_recorder*, heap_record*, object_record*);

// ==========================
// Heap Recorder External API
Expand All @@ -156,11 +148,7 @@ heap_recorder* heap_recorder_new(void) {
recorder->object_records = st_init_numtable();
recorder->object_records_snapshot = NULL;
recorder->reusable_locations = ruby_xcalloc(MAX_FRAMES_LIMIT, sizeof(ddog_prof_Location));
recorder->active_recording = (partial_heap_recording) {
.obj_id = 0, // 0 is actually the obj_id of false, but we'll never track that one in heap so we use
// it as invalid/unset value.
.object_data = {0},
};
recorder->partial_object_record = NULL;

return recorder;
}
Expand All @@ -176,12 +164,21 @@ void heap_recorder_free(heap_recorder *heap_recorder) {
heap_recorder_finish_iteration(heap_recorder);
}

// Clean-up all object records
st_foreach(heap_recorder->object_records, st_object_record_entry_free, 0);
st_free_table(heap_recorder->object_records);

// Clean-up all heap records (this includes those only referred to by queued_samples)
st_foreach(heap_recorder->heap_records, st_heap_record_entry_free, 0);
st_free_table(heap_recorder->heap_records);

if (heap_recorder->partial_object_record != NULL) {
// If there's a partial object record, clean it up as well
object_record_free(heap_recorder->partial_object_record);
}

ruby_xfree(heap_recorder->reusable_locations);

ruby_xfree(heap_recorder);
}

Expand All @@ -207,7 +204,7 @@ void heap_recorder_after_fork(heap_recorder *heap_recorder) {
// nothing for us to do here.
}

void start_heap_allocation_recording(heap_recorder *heap_recorder, VALUE new_obj, unsigned int weight) {
void start_heap_allocation_recording(heap_recorder *heap_recorder, VALUE new_obj, unsigned int weight, ddog_CharSlice *alloc_class) {
if (heap_recorder == NULL) {
return;
}
Expand All @@ -217,35 +214,37 @@ void start_heap_allocation_recording(heap_recorder *heap_recorder, VALUE new_obj
rb_raise(rb_eRuntimeError, "Detected a bignum object id. These are not supported by heap profiling.");
}

heap_recorder->active_recording = (partial_heap_recording) {
.obj_id = FIX2LONG(ruby_obj_id),
.object_data = (live_object_data) {
.weight = weight,
},
};
if (heap_recorder->partial_object_record != NULL) {
rb_raise(rb_eRuntimeError, "Detected consecutive heap allocation recording starts without end.");
}

heap_recorder->partial_object_record = object_record_new(FIX2LONG(ruby_obj_id), NULL, (live_object_data) {
.weight = weight,
.class = alloc_class != NULL ? string_from_char_slice(*alloc_class) : NULL,
.alloc_gen = rb_gc_count(),
});
}

void end_heap_allocation_recording(struct heap_recorder *heap_recorder, ddog_prof_Slice_Location locations) {
if (heap_recorder == NULL) {
return;
}

partial_heap_recording *active_recording = &heap_recorder->active_recording;
object_record *partial_object_record = heap_recorder->partial_object_record;

long obj_id = active_recording->obj_id;
if (obj_id == 0) {
if (partial_object_record == NULL) {
// Recording ended without having been started?
rb_raise(rb_eRuntimeError, "Ended a heap recording that was not started");
}

// From now on, mark active recording as invalid so we can short-circuit at any point and
// not end up with a still active recording. new_obj still holds the object for this recording
active_recording->obj_id = 0;
// not end up with a still active recording. partial_object_record still holds the object for this recording
heap_recorder->partial_object_record = NULL;

heap_record *heap_record = get_or_create_heap_record(heap_recorder, locations);

// And then commit the new allocation.
commit_allocation(heap_recorder, heap_record, obj_id, active_recording->object_data);
commit_allocation(heap_recorder, heap_record, partial_object_record);
}

void heap_recorder_prepare_iteration(heap_recorder *heap_recorder) {
Expand Down Expand Up @@ -416,7 +415,12 @@ static int st_object_records_debug(DDTRACE_UNUSED st_data_t key, st_data_t value
object_record *record = (object_record*) value;

heap_frame top_frame = record->heap_record->stack->frames[0];
rb_str_catf(debug_str, "obj_id=%ld weight=%d location=%s:%d ", record->obj_id, record->object_data.weight, top_frame.filename, (int) top_frame.line);
rb_str_catf(debug_str, "obj_id=%ld weight=%d location=%s:%d alloc_gen=%zu ", record->obj_id, record->object_data.weight, top_frame.filename, (int) top_frame.line, record->object_data.alloc_gen);

const char *class = record->object_data.class;
if (class != NULL) {
rb_str_catf(debug_str, "class=%s ", class);
}

VALUE ref;
if (!ruby_ref_from_id(LONG2NUM(record->obj_id), &ref)) {
Expand Down Expand Up @@ -450,13 +454,16 @@ static int update_object_record_entry(DDTRACE_UNUSED st_data_t *key, st_data_t *
return ST_CONTINUE;
}

static void commit_allocation(heap_recorder *heap_recorder, heap_record *heap_record, long obj_id, live_object_data object_data) {
static void commit_allocation(heap_recorder *heap_recorder, heap_record *heap_record, object_record *object_record) {
// Link the object record with the corresponding heap record.
object_record->heap_record = heap_record;

// Update object_records
object_record_update_data update_data = (object_record_update_data) {
.heap_recorder = heap_recorder,
.new_object_record = object_record_new(obj_id, heap_record, object_data),
.new_object_record = object_record,
};
if (!st_update(heap_recorder->object_records, obj_id, update_object_record_entry, (st_data_t) &update_data)) {
if (!st_update(heap_recorder->object_records, object_record->obj_id, update_object_record_entry, (st_data_t) &update_data)) {
// We are sure there was no previous record for this id so let the heap record know there now is one
// extra record associated with this stack.
if (heap_record->num_tracked_objects == UINT32_MAX) {
Expand Down Expand Up @@ -563,6 +570,9 @@ object_record* object_record_new(long obj_id, heap_record *heap_record, live_obj
}

void object_record_free(object_record *record) {
if (record->object_data.class != NULL) {
ruby_xfree(record->object_data.class);
}
ruby_xfree(record);
}

Expand Down Expand Up @@ -633,8 +643,8 @@ heap_stack* heap_stack_new(ddog_prof_Slice_Location locations) {
for (uint16_t i = 0; i < stack->frames_len; i++) {
const ddog_prof_Location *location = &locations.ptr[i];
stack->frames[i] = (heap_frame) {
.name = ruby_strndup(location->function.name.ptr, location->function.name.len),
.filename = ruby_strndup(location->function.filename.ptr, location->function.filename.len),
.name = string_from_char_slice(location->function.name),
.filename = string_from_char_slice(location->function.filename),
// ddog_prof_Location is a int64_t. We don't expect to have to profile files with more than
// 2M lines so this cast should be fairly safe?
.line = (int32_t) location->line,
Expand Down
11 changes: 10 additions & 1 deletion ext/ddtrace_profiling_native_extension/heap_recorder.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,15 @@ typedef struct live_object_data {
// Example: If we were sampling every 50 objects, then each sampled object
// could be seen as being representative of 50 objects.
unsigned int weight;

// The class of the object that we're tracking.
// NOTE: This is optional and will be set to NULL if not set.
char* class;

// The GC allocation gen in which we saw this object being allocated.
//
// This enables us to calculate the age of this object in terms of GC executions.
size_t alloc_gen;
} live_object_data;

// Data that is made available to iterators of heap recorder data for each live object
Expand Down Expand Up @@ -56,7 +65,7 @@ void heap_recorder_after_fork(heap_recorder *heap_recorder);
// The sampling weight of this object.
//
// WARN: It needs to be paired with a ::end_heap_allocation_recording call.
void start_heap_allocation_recording(heap_recorder *heap_recorder, VALUE new_obj, unsigned int weight);
void start_heap_allocation_recording(heap_recorder *heap_recorder, VALUE new_obj, unsigned int weight, ddog_CharSlice *alloc_class);

// End a previously started heap allocation recording on the heap recorder.
//
Expand Down
6 changes: 6 additions & 0 deletions ext/ddtrace_profiling_native_extension/libdatadog_helpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,3 +34,9 @@ size_t read_ddogerr_string_and_drop(ddog_Error *error, char *string, size_t capa
// ruby_value_type that Ruby uses so that we can also use this for debugging.
const char *ruby_value_type_to_string(enum ruby_value_type type);
ddog_CharSlice ruby_value_type_to_char_slice(enum ruby_value_type type);

// Returns a dynamically allocated string from the provided char slice.
// WARN: The returned string must be explicitly freed with ruby_xfree.
inline static char* string_from_char_slice(ddog_CharSlice slice) {
return ruby_strndup(slice.ptr, slice.len);
}
44 changes: 36 additions & 8 deletions ext/ddtrace_profiling_native_extension/stack_recorder.c
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,7 @@ static VALUE _native_reset_after_fork(DDTRACE_UNUSED VALUE self, VALUE recorder_
static void serializer_set_start_timestamp_for_next_profile(struct stack_recorder_state *state, ddog_Timespec start_time);
static VALUE _native_record_endpoint(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance, VALUE local_root_span_id, VALUE endpoint);
static void reset_profile(ddog_prof_Profile *profile, ddog_Timespec *start_time /* Can be null */);
static VALUE _native_track_object(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance, VALUE new_obj, VALUE weight);
static VALUE _native_track_object(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance, VALUE new_obj, VALUE weight, VALUE alloc_class);
static VALUE _native_check_heap_hashes(DDTRACE_UNUSED VALUE _self, VALUE locations);
static VALUE _native_start_fake_slow_heap_serialization(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance);
static VALUE _native_end_fake_slow_heap_serialization(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance);
Expand Down Expand Up @@ -259,7 +259,7 @@ void stack_recorder_init(VALUE profiling_module) {
rb_define_singleton_method(testing_module, "_native_slot_one_mutex_locked?", _native_is_slot_one_mutex_locked, 1);
rb_define_singleton_method(testing_module, "_native_slot_two_mutex_locked?", _native_is_slot_two_mutex_locked, 1);
rb_define_singleton_method(testing_module, "_native_record_endpoint", _native_record_endpoint, 3);
rb_define_singleton_method(testing_module, "_native_track_object", _native_track_object, 3);
rb_define_singleton_method(testing_module, "_native_track_object", _native_track_object, 4);
rb_define_singleton_method(testing_module, "_native_check_heap_hashes", _native_check_heap_hashes, 1);
rb_define_singleton_method(testing_module, "_native_start_fake_slow_heap_serialization",
_native_start_fake_slow_heap_serialization, 1);
Expand Down Expand Up @@ -546,13 +546,13 @@ void record_sample(VALUE recorder_instance, ddog_prof_Slice_Location locations,
}
}

void track_object(VALUE recorder_instance, VALUE new_object, unsigned int sample_weight) {
void track_object(VALUE recorder_instance, VALUE new_object, unsigned int sample_weight, ddog_CharSlice *alloc_class) {
struct stack_recorder_state *state;
TypedData_Get_Struct(recorder_instance, struct stack_recorder_state, &stack_recorder_typed_data, state);
// FIXME: Heap sampling currently has to be done in 2 parts because the construction of locations is happening
// very late in the allocation-sampling path (which is shared with the cpu sampling path). This can
// be fixed with some refactoring but for now this leads to a less impactful change.
start_heap_allocation_recording(state->heap_recorder, new_object, sample_weight);
start_heap_allocation_recording(state->heap_recorder, new_object, sample_weight, alloc_class);
}

void record_endpoint(VALUE recorder_instance, uint64_t local_root_span_id, ddog_CharSlice endpoint) {
Expand All @@ -577,24 +577,50 @@ void record_endpoint(VALUE recorder_instance, uint64_t local_root_span_id, ddog_
typedef struct heap_recorder_iteration_context {
struct stack_recorder_state *state;
ddog_prof_Profile *profile;

bool error;
char error_msg[MAX_LEN_HEAP_ITERATION_ERROR_MSG];

size_t profile_gen;
} heap_recorder_iteration_context;

static bool add_heap_sample_to_active_profile_without_gvl(heap_recorder_iteration_data iteration_data, void *extra_arg) {
heap_recorder_iteration_context *context = (heap_recorder_iteration_context*) extra_arg;

live_object_data *object_data = &iteration_data.object_data;

int64_t metric_values[ALL_VALUE_TYPES_COUNT] = {0};
uint8_t *position_for = context->state->position_for;

metric_values[position_for[HEAP_SAMPLES_VALUE_ID]] = iteration_data.object_data.weight;
metric_values[position_for[HEAP_SAMPLES_VALUE_ID]] = object_data->weight;

ddog_prof_Label labels[2];
size_t label_offset = 0;

if (object_data->class != NULL) {
labels[label_offset++] = (ddog_prof_Label) {
.key = DDOG_CHARSLICE_C("allocation class"),
.str = (ddog_CharSlice) {
.ptr = object_data->class,
.len = strlen(object_data->class),
},
.num = 0, // This shouldn't be needed but the tracer-2.7 docker image ships a buggy gcc that complains about this
};
}
labels[label_offset++] = (ddog_prof_Label) {
.key = DDOG_CHARSLICE_C("gc gen age"),
.num = context->profile_gen - object_data->alloc_gen,
};

ddog_prof_Profile_Result result = ddog_prof_Profile_add(
context->profile,
(ddog_prof_Sample) {
.locations = iteration_data.locations,
.values = (ddog_Slice_I64) {.ptr = metric_values, .len = context->state->enabled_values_count},
.labels = {0},
.labels = (ddog_prof_Slice_Label) {
.ptr = labels,
.len = label_offset,
}
},
0
);
Expand All @@ -616,6 +642,7 @@ static void build_heap_profile_without_gvl(struct stack_recorder_state *state, d
.profile = profile,
.error = false,
.error_msg = {0},
.profile_gen = rb_gc_count(),
};
bool iterated = heap_recorder_for_each_live_object(state->heap_recorder, add_heap_sample_to_active_profile_without_gvl, (void*) &iteration_context);
// We wait until we're out of the iteration to grab the gvl and raise. This is important because during
Expand Down Expand Up @@ -779,9 +806,10 @@ static VALUE _native_record_endpoint(DDTRACE_UNUSED VALUE _self, VALUE recorder_
return Qtrue;
}

static VALUE _native_track_object(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance, VALUE new_obj, VALUE weight) {
static VALUE _native_track_object(DDTRACE_UNUSED VALUE _self, VALUE recorder_instance, VALUE new_obj, VALUE weight, VALUE alloc_class) {
ENFORCE_TYPE(weight, T_FIXNUM);
track_object(recorder_instance, new_obj, NUM2UINT(weight));
ddog_CharSlice alloc_class_slice = char_slice_from_ruby_string(alloc_class);
track_object(recorder_instance, new_obj, NUM2UINT(weight), &alloc_class_slice);
return Qtrue;
}

Expand Down
2 changes: 1 addition & 1 deletion ext/ddtrace_profiling_native_extension/stack_recorder.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,5 +23,5 @@ typedef struct sample_labels {

void record_sample(VALUE recorder_instance, ddog_prof_Slice_Location locations, sample_values values, sample_labels labels);
void record_endpoint(VALUE recorder_instance, uint64_t local_root_span_id, ddog_CharSlice endpoint);
void track_object(VALUE recorder_instance, VALUE new_object, unsigned int sample_weight);
void track_object(VALUE recorder_instance, VALUE new_object, unsigned int sample_weight, ddog_CharSlice *alloc_class);
VALUE enforce_recorder_instance(VALUE object);
Original file line number Diff line number Diff line change
Expand Up @@ -572,10 +572,6 @@
it 'records live heap objects' do
stub_const('CpuAndWallTimeWorkerSpec::TestStruct', Struct.new(:foo))

# Warm this up to remove VM-related allocations
# TODO: Remove this when we can match on allocation class
CpuAndWallTimeWorkerSpec::TestStruct.new

start

live_objects = Array.new(test_num_allocated_object)
Expand All @@ -587,7 +583,10 @@

test_struct_heap_sample = lambda { |sample|
first_frame = sample.locations.first
first_frame.lineno == allocation_line && first_frame.path == __FILE__ && first_frame.base_label == 'new' &&
first_frame.lineno == allocation_line &&
first_frame.path == __FILE__ &&
first_frame.base_label == 'new' &&
sample.labels[:'allocation class'] == 'CpuAndWallTimeWorkerSpec::TestStruct' &&
(sample.values[:'heap-live-samples'] || 0) > 0
}

Expand Down
Loading

0 comments on commit c20d042

Please sign in to comment.