Skip to content

Commit

Permalink
[PROF-8667] Heap Profiling - Part 5 - Size (#3333)
Browse files Browse the repository at this point in the history
* [PROF-8667] Heap Profiling - Part 5 - Size

* [PROF-8667] Make size calculations optional

* [PROF-8667] Support special consts in ruby_obj_memsize_of

* [PROF-8667] Fix types

* [PROF-8667] Address comments
  • Loading branch information
AlexJF authored Jan 5, 2024
1 parent 7b996fc commit d809f33
Show file tree
Hide file tree
Showing 12 changed files with 169 additions and 18 deletions.
1 change: 1 addition & 0 deletions benchmarks/profiler_sample_loop_v2.rb
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ def create_profiler
cpu_time_enabled: true,
alloc_samples_enabled: false,
heap_samples_enabled: false,
heap_size_enabled: false,
timeline_enabled: false,
)
@collector = Datadog::Profiling::Collectors::ThreadContext.new(
Expand Down
33 changes: 27 additions & 6 deletions ext/ddtrace_profiling_native_extension/heap_recorder.c
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,10 @@ static object_record* object_record_new(long, heap_record*, live_object_data);
static void object_record_free(object_record*);

struct heap_recorder {
// Config
// Whether the recorder should try to determine approximate sizes for tracked objects.
bool size_enabled;

// Map[key: heap_record_key*, record: heap_record*]
// NOTE: We always use heap_record_key.type == HEAP_STACK for storage but support lookups
// via heap_record_key.type == LOCATION_SLICE to allow for allocation-free fast-paths.
Expand Down Expand Up @@ -126,7 +130,7 @@ static heap_record* get_or_create_heap_record(heap_recorder*, ddog_prof_Slice_Lo
static void cleanup_heap_record_if_unused(heap_recorder*, heap_record*);
static int st_heap_record_entry_free(st_data_t, st_data_t, st_data_t);
static int st_object_record_entry_free(st_data_t, st_data_t, st_data_t);
static int st_object_record_entry_free_if_invalid(st_data_t, st_data_t, st_data_t);
static int st_object_record_update(st_data_t, st_data_t, st_data_t);
static int st_object_records_iterate(st_data_t, st_data_t, st_data_t);
static int st_object_records_debug(st_data_t key, st_data_t value, st_data_t extra);
static int update_object_record_entry(st_data_t*, st_data_t*, st_data_t, int);
Expand All @@ -149,6 +153,7 @@ heap_recorder* heap_recorder_new(void) {
recorder->object_records_snapshot = NULL;
recorder->reusable_locations = ruby_xcalloc(MAX_FRAMES_LIMIT, sizeof(ddog_prof_Location));
recorder->partial_object_record = NULL;
recorder->size_enabled = true;

return recorder;
}
Expand Down Expand Up @@ -182,6 +187,10 @@ void heap_recorder_free(heap_recorder *heap_recorder) {
ruby_xfree(heap_recorder);
}

void heap_recorder_set_size_enabled(heap_recorder *heap_recorder, bool size_enabled) {
heap_recorder->size_enabled = size_enabled;
}

// WARN: Assumes this gets called before profiler is reinitialized on the fork
void heap_recorder_after_fork(heap_recorder *heap_recorder) {
if (heap_recorder == NULL) {
Expand Down Expand Up @@ -260,7 +269,7 @@ void heap_recorder_prepare_iteration(heap_recorder *heap_recorder) {
rb_raise(rb_eRuntimeError, "New heap recorder iteration prepared without the previous one having been finished.");
}

st_foreach(heap_recorder->object_records, st_object_record_entry_free_if_invalid, (st_data_t) heap_recorder);
st_foreach(heap_recorder->object_records, st_object_record_update, (st_data_t) heap_recorder);

heap_recorder->object_records_snapshot = st_copy(heap_recorder->object_records);
if (heap_recorder->object_records_snapshot == NULL) {
Expand Down Expand Up @@ -361,13 +370,15 @@ static int st_object_record_entry_free(DDTRACE_UNUSED st_data_t key, st_data_t v
return ST_DELETE;
}

static int st_object_record_entry_free_if_invalid(st_data_t key, st_data_t value, st_data_t extra_arg) {
static int st_object_record_update(st_data_t key, st_data_t value, st_data_t extra_arg) {
long obj_id = (long) key;
object_record *record = (object_record*) value;
heap_recorder *recorder = (heap_recorder*) extra_arg;

if (!ruby_ref_from_id(LONG2NUM(obj_id), NULL)) {
// Id no longer associated with a valid ref. Need to clean things up!
VALUE ref;

if (!ruby_ref_from_id(LONG2NUM(obj_id), &ref)) {
// Id no longer associated with a valid ref. Need to delete this object record!

// Starting with the associated heap record. There will now be one less tracked object pointing to it
heap_record *heap_record = record->heap_record;
Expand All @@ -380,6 +391,16 @@ static int st_object_record_entry_free_if_invalid(st_data_t key, st_data_t value
return ST_DELETE;
}

// If we got this far, entry is still valid

if (recorder->size_enabled && !record->object_data.is_frozen) {
// if we were asked to update sizes and this object was not already seen as being frozen,
// update size again.
record->object_data.size = ruby_obj_memsize_of(ref);
// Check if it's now frozen so we skip a size update next time
record->object_data.is_frozen = RB_OBJ_FROZEN(ref);
}

return ST_CONTINUE;
}

Expand Down Expand Up @@ -418,7 +439,7 @@ static int st_object_records_debug(DDTRACE_UNUSED st_data_t key, st_data_t value
object_record *record = (object_record*) value;

heap_frame top_frame = record->heap_record->stack->frames[0];
rb_str_catf(debug_str, "obj_id=%ld weight=%d location=%s:%d alloc_gen=%zu ", record->obj_id, record->object_data.weight, top_frame.filename, (int) top_frame.line, record->object_data.alloc_gen);
rb_str_catf(debug_str, "obj_id=%ld weight=%d size=%zu location=%s:%d alloc_gen=%zu ", record->obj_id, record->object_data.weight, record->object_data.size, top_frame.filename, (int) top_frame.line, record->object_data.alloc_gen);

const char *class = record->object_data.class;
if (class != NULL) {
Expand Down
19 changes: 19 additions & 0 deletions ext/ddtrace_profiling_native_extension/heap_recorder.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,9 @@ typedef struct live_object_data {
// could be seen as being representative of 50 objects.
unsigned int weight;

// Size of this object on last flush/update.
size_t size;

// The class of the object that we're tracking.
// NOTE: This is optional and will be set to NULL if not set.
char* class;
Expand All @@ -35,6 +38,11 @@ typedef struct live_object_data {
//
// This enables us to calculate the age of this object in terms of GC executions.
size_t alloc_gen;

// Whether this object was previously seen as being frozen. If this is the case,
// we'll skip any further size updates since frozen objects are supposed to be
// immutable.
bool is_frozen;
} live_object_data;

// Data that is made available to iterators of heap recorder data for each live object
Expand All @@ -50,6 +58,17 @@ heap_recorder* heap_recorder_new(void);
// Free a previously initialized heap recorder.
void heap_recorder_free(heap_recorder *heap_recorder);

// Sets whether this heap recorder should keep track of sizes or not.
//
// If set to true, the heap recorder will attempt to determine the approximate sizes of
// tracked objects and wield them during iteration.
// If set to false, sizes returned during iteration should not be used/relied on (they
// may be 0 or the last determined size before disabling the tracking of sizes).
//
// NOTE: Default is true, i.e., it will attempt to determine approximate sizes of tracked
// objects.
void heap_recorder_set_size_enabled(heap_recorder *heap_recorder, bool size_enabled);

// Do any cleanup needed after forking.
// WARN: Assumes this gets called before profiler is reinitialized on the fork
void heap_recorder_after_fork(heap_recorder *heap_recorder);
Expand Down
44 changes: 44 additions & 0 deletions ext/ddtrace_profiling_native_extension/ruby_helpers.c
Original file line number Diff line number Diff line change
Expand Up @@ -166,3 +166,47 @@ bool ruby_ref_from_id(VALUE obj_id, VALUE *value) {

return true;
}

// Not part of public headers but is externed from Ruby
size_t rb_obj_memsize_of(VALUE obj);

// Wrapper around rb_obj_memsize_of to avoid hitting crashing paths.
//
// The crashing paths are due to calls to rb_bug so should hopefully
// be situations that can't happen. But given that rb_obj_memsize_of
// isn't fully public (it's externed but not part of public headers)
// there is a possibility that it is just assumed that whoever calls
// it, will do proper checking for those cases. We want to be cautious
// so we'll assume that's the case and will skip over known crashing
// paths in this wrapper.
size_t ruby_obj_memsize_of(VALUE obj) {
switch (rb_type(obj)) {
case T_OBJECT:
case T_MODULE:
case T_CLASS:
case T_ICLASS:
case T_STRING:
case T_ARRAY:
case T_HASH:
case T_REGEXP:
case T_DATA:
case T_MATCH:
case T_FILE:
case T_RATIONAL:
case T_COMPLEX:
case T_IMEMO:
case T_FLOAT:
case T_SYMBOL:
case T_BIGNUM:
// case T_NODE: -> Crashes the vm in rb_obj_memsize_of
case T_STRUCT:
case T_ZOMBIE:
#ifndef NO_T_MOVED
case T_MOVED:
#endif
return rb_obj_memsize_of(obj);
default:
// Unsupported, return 0 instead of erroring like rb_obj_memsize_of likes doing
return 0;
}
}
5 changes: 5 additions & 0 deletions ext/ddtrace_profiling_native_extension/ruby_helpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -106,3 +106,8 @@ char* ruby_strndup(const char *str, size_t size);
// writes the ref to the value pointer parameter if !NULL. False if id doesn't
// reference a valid object (in which case value is not changed).
bool ruby_ref_from_id(size_t id, VALUE *value);

// Native wrapper to get the approximate/estimated current size of the passed
// object.
size_t ruby_obj_memsize_of(VALUE obj);

25 changes: 21 additions & 4 deletions ext/ddtrace_profiling_native_extension/stack_recorder.c
Original file line number Diff line number Diff line change
Expand Up @@ -153,17 +153,19 @@ static VALUE error_symbol = Qnil; // :error in Ruby
#define ALLOC_SAMPLES_VALUE_ID 3
#define HEAP_SAMPLES_VALUE {.type_ = VALUE_STRING("heap-live-samples"), .unit = VALUE_STRING("count")}
#define HEAP_SAMPLES_VALUE_ID 4
#define HEAP_SIZE_VALUE {.type_ = VALUE_STRING("heap-live-size"), .unit = VALUE_STRING("bytes")}
#define HEAP_SIZE_VALUE_ID 5
#define TIMELINE_VALUE {.type_ = VALUE_STRING("timeline"), .unit = VALUE_STRING("nanoseconds")}
#define TIMELINE_VALUE_ID 5
#define TIMELINE_VALUE_ID 6

static const ddog_prof_ValueType all_value_types[] =
{CPU_TIME_VALUE, CPU_SAMPLES_VALUE, WALL_TIME_VALUE, ALLOC_SAMPLES_VALUE, HEAP_SAMPLES_VALUE, TIMELINE_VALUE};
{CPU_TIME_VALUE, CPU_SAMPLES_VALUE, WALL_TIME_VALUE, ALLOC_SAMPLES_VALUE, HEAP_SAMPLES_VALUE, HEAP_SIZE_VALUE, TIMELINE_VALUE};

// This array MUST be kept in sync with all_value_types above and is intended to act as a "hashmap" between VALUE_ID and the position it
// occupies on the all_value_types array.
// E.g. all_value_types_positions[CPU_TIME_VALUE_ID] => 0, means that CPU_TIME_VALUE was declared at position 0 of all_value_types.
static const uint8_t all_value_types_positions[] =
{CPU_TIME_VALUE_ID, CPU_SAMPLES_VALUE_ID, WALL_TIME_VALUE_ID, ALLOC_SAMPLES_VALUE_ID, HEAP_SAMPLES_VALUE_ID, TIMELINE_VALUE_ID};
{CPU_TIME_VALUE_ID, CPU_SAMPLES_VALUE_ID, WALL_TIME_VALUE_ID, ALLOC_SAMPLES_VALUE_ID, HEAP_SAMPLES_VALUE_ID, HEAP_SIZE_VALUE_ID, TIMELINE_VALUE_ID};

#define ALL_VALUE_TYPES_COUNT (sizeof(all_value_types) / sizeof(ddog_prof_ValueType))

Expand Down Expand Up @@ -214,6 +216,7 @@ static VALUE _native_initialize(
VALUE cpu_time_enabled,
VALUE alloc_samples_enabled,
VALUE heap_samples_enabled,
VALUE heap_size_enabled,
VALUE timeline_enabled
);
static VALUE _native_serialize(VALUE self, VALUE recorder_instance);
Expand Down Expand Up @@ -253,7 +256,7 @@ void stack_recorder_init(VALUE profiling_module) {
// https://bugs.ruby-lang.org/issues/18007 for a discussion around this.
rb_define_alloc_func(stack_recorder_class, _native_new);

rb_define_singleton_method(stack_recorder_class, "_native_initialize", _native_initialize, 5);
rb_define_singleton_method(stack_recorder_class, "_native_initialize", _native_initialize, 6);
rb_define_singleton_method(stack_recorder_class, "_native_serialize", _native_serialize, 1);
rb_define_singleton_method(stack_recorder_class, "_native_reset_after_fork", _native_reset_after_fork, 1);
rb_define_singleton_method(testing_module, "_native_active_slot", _native_active_slot, 1);
Expand Down Expand Up @@ -367,11 +370,13 @@ static VALUE _native_initialize(
VALUE cpu_time_enabled,
VALUE alloc_samples_enabled,
VALUE heap_samples_enabled,
VALUE heap_size_enabled,
VALUE timeline_enabled
) {
ENFORCE_BOOLEAN(cpu_time_enabled);
ENFORCE_BOOLEAN(alloc_samples_enabled);
ENFORCE_BOOLEAN(heap_samples_enabled);
ENFORCE_BOOLEAN(heap_size_enabled);
ENFORCE_BOOLEAN(timeline_enabled);

struct stack_recorder_state *state;
Expand All @@ -381,6 +386,7 @@ static VALUE _native_initialize(
(cpu_time_enabled == Qtrue ? 0 : 1) -
(alloc_samples_enabled == Qtrue? 0 : 1) -
(heap_samples_enabled == Qtrue ? 0 : 1) -
(heap_size_enabled == Qtrue ? 0 : 1) -
(timeline_enabled == Qtrue ? 0 : 1);

if (requested_values_count == ALL_VALUE_TYPES_COUNT) return Qtrue; // Nothing to do, this is the default
Expand Down Expand Up @@ -422,7 +428,17 @@ static VALUE _native_initialize(
state->position_for[HEAP_SAMPLES_VALUE_ID] = next_enabled_pos++;
} else {
state->position_for[HEAP_SAMPLES_VALUE_ID] = next_disabled_pos++;
}

if (heap_size_enabled == Qtrue) {
enabled_value_types[next_enabled_pos] = (ddog_prof_ValueType) HEAP_SIZE_VALUE;
state->position_for[HEAP_SIZE_VALUE_ID] = next_enabled_pos++;
} else {
state->position_for[HEAP_SIZE_VALUE_ID] = next_disabled_pos++;
}
heap_recorder_set_size_enabled(state->heap_recorder, heap_size_enabled);

if (heap_samples_enabled == Qfalse && heap_size_enabled == Qfalse) {
// Turns out heap sampling is disabled but we initialized everything in _native_new
// assuming all samples were enabled. We need to deinitialize the heap recorder.
heap_recorder_free(state->heap_recorder);
Expand Down Expand Up @@ -599,6 +615,7 @@ static bool add_heap_sample_to_active_profile_without_gvl(heap_recorder_iteratio
uint8_t *position_for = context->state->position_for;

metric_values[position_for[HEAP_SAMPLES_VALUE_ID]] = object_data->weight;
metric_values[position_for[HEAP_SIZE_VALUE_ID]] = object_data->size * object_data->weight;

ddog_prof_Label labels[2];
size_t label_offset = 0;
Expand Down
1 change: 1 addition & 0 deletions lib/datadog/profiling/component.rb
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,7 @@ def self.build_profiler_component(settings:, agent_settings:, optional_tracer:)
cpu_time_enabled: RUBY_PLATFORM.include?('linux'), # Only supported on Linux currently
alloc_samples_enabled: allocation_profiling_enabled,
heap_samples_enabled: heap_profiling_enabled,
heap_size_enabled: heap_profiling_enabled,
timeline_enabled: timeline_enabled,
)
end
Expand Down
6 changes: 5 additions & 1 deletion lib/datadog/profiling/stack_recorder.rb
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,10 @@ module Profiling
# Note that `record_sample` is only accessible from native code.
# Methods prefixed with _native_ are implemented in `stack_recorder.c`
class StackRecorder
def initialize(cpu_time_enabled:, alloc_samples_enabled:, heap_samples_enabled:, timeline_enabled:)
def initialize(
cpu_time_enabled:, alloc_samples_enabled:, heap_samples_enabled:, heap_size_enabled:,
timeline_enabled:
)
# This mutex works in addition to the fancy C-level mutexes we have in the native side (see the docs there).
# It prevents multiple Ruby threads calling serialize at the same time -- something like
# `10.times { Thread.new { stack_recorder.serialize } }`.
Expand All @@ -18,6 +21,7 @@ def initialize(cpu_time_enabled:, alloc_samples_enabled:, heap_samples_enabled:,
cpu_time_enabled,
alloc_samples_enabled,
heap_samples_enabled,
heap_size_enabled,
timeline_enabled,
)
end
Expand Down
9 changes: 8 additions & 1 deletion sig/datadog/profiling/stack_recorder.rbs
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,20 @@ module Datadog
class StackRecorder
@no_concurrent_synchronize_mutex: ::Thread::Mutex

def initialize: (cpu_time_enabled: bool, alloc_samples_enabled: bool, heap_samples_enabled: bool, timeline_enabled: bool) -> void
def initialize: (
cpu_time_enabled: bool,
alloc_samples_enabled: bool,
heap_samples_enabled: bool,
heap_size_enabled: bool,
timeline_enabled: bool,
) -> void

def self._native_initialize: (
Datadog::Profiling::StackRecorder recorder_instance,
bool cpu_time_enabled,
bool alloc_samples_enabled,
bool heap_samples_enabled,
bool heap_size_enabled,
bool timeline_enabled,
) -> true

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,9 @@
let(:allocation_sample_every) { 50 }
let(:allocation_profiling_enabled) { false }
let(:heap_profiling_enabled) { false }
let(:recorder) { build_stack_recorder(heap_samples_enabled: heap_profiling_enabled) }
let(:recorder) do
build_stack_recorder(heap_samples_enabled: heap_profiling_enabled, heap_size_enabled: heap_profiling_enabled)
end
let(:no_signals_workaround_enabled) { false }
let(:timeline_enabled) { false }
let(:options) { {} }
Expand Down Expand Up @@ -594,6 +596,8 @@
.find(&test_struct_heap_sample)

expect(relevant_sample.values[:'heap-live-samples']).to eq test_num_allocated_object
# 40 is the size of a basic object and we have test_num_allocated_object of them
expect(relevant_sample.values[:'heap-live-size']).to eq test_num_allocated_object * 40
end
end

Expand Down
3 changes: 2 additions & 1 deletion spec/datadog/profiling/spec_helper.rb
Original file line number Diff line number Diff line change
Expand Up @@ -81,11 +81,12 @@ def samples_for_thread(samples, thread)

# We disable heap_sample collection by default in tests since it requires some extra mocking/
# setup for it to properly work.
def build_stack_recorder(heap_samples_enabled: false, timeline_enabled: false)
def build_stack_recorder(heap_samples_enabled: false, heap_size_enabled: false, timeline_enabled: false)
Datadog::Profiling::StackRecorder.new(
cpu_time_enabled: true,
alloc_samples_enabled: true,
heap_samples_enabled: heap_samples_enabled,
heap_size_enabled: heap_size_enabled,
timeline_enabled: timeline_enabled,
)
end
Expand Down
Loading

0 comments on commit d809f33

Please sign in to comment.