Skip to content

Commit

Permalink
Merge pull request #2644 from DataDog/ivoanjo/avoid-thread-list-alloc…
Browse files Browse the repository at this point in the history
…ation

Reuse array for listing threads instead of creating one every time
  • Loading branch information
ivoanjo authored Feb 24, 2023
2 parents 4ea0c70 + 31f644d commit 484d0c1
Show file tree
Hide file tree
Showing 4 changed files with 36 additions and 17 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,8 @@ struct cpu_and_wall_time_collector_state {
// Currently **outside** of stats struct because we also use it to decide when to clean the contexts, and thus this
// is not (just) a stat.
unsigned int sample_count;
// Reusable array to get list of threads
VALUE thread_list_buffer;

struct stats {
// Track how many garbage collection samples we've taken.
Expand Down Expand Up @@ -174,6 +176,7 @@ static VALUE _native_stats(VALUE self, VALUE collector_instance);
static void trace_identifiers_for(struct cpu_and_wall_time_collector_state *state, VALUE thread, struct trace_identifiers *trace_identifiers_result);
static bool is_type_web(VALUE root_span_type);
static VALUE _native_reset_after_fork(DDTRACE_UNUSED VALUE self, VALUE collector_instance);
static VALUE thread_list(struct cpu_and_wall_time_collector_state *state);

void collectors_cpu_and_wall_time_init(VALUE profiling_module) {
VALUE collectors_module = rb_define_module_under(profiling_module, "Collectors");
Expand Down Expand Up @@ -231,6 +234,7 @@ static void cpu_and_wall_time_collector_typed_data_mark(void *state_ptr) {
// Update this when modifying state struct
rb_gc_mark(state->recorder_instance);
st_foreach(state->hash_map_per_thread_context, hash_map_per_thread_context_mark, 0 /* unused */);
rb_gc_mark(state->thread_list_buffer);
}

static void cpu_and_wall_time_collector_typed_data_free(void *state_ptr) {
Expand Down Expand Up @@ -274,6 +278,7 @@ static VALUE _native_new(VALUE klass) {
st_init_numtable();
state->recorder_instance = Qnil;
state->tracer_context_key = MISSING_TRACER_CONTEXT_KEY;
state->thread_list_buffer = rb_ary_new();

return TypedData_Wrap_Struct(klass, &cpu_and_wall_time_collector_typed_data, state);
}
Expand Down Expand Up @@ -349,7 +354,7 @@ void cpu_and_wall_time_collector_sample(VALUE self_instance, long current_monoto
struct per_thread_context *current_thread_context = get_or_create_context_for(current_thread, state);
long cpu_time_at_sample_start_for_current_thread = cpu_time_now_ns(current_thread_context);

VALUE threads = ddtrace_thread_list();
VALUE threads = thread_list(state);

const long thread_count = RARRAY_LEN(threads);
for (long i = 0; i < thread_count; i++) {
Expand Down Expand Up @@ -513,7 +518,7 @@ VALUE cpu_and_wall_time_collector_sample_after_gc(VALUE self_instance) {
struct cpu_and_wall_time_collector_state *state;
TypedData_Get_Struct(self_instance, struct cpu_and_wall_time_collector_state, &cpu_and_wall_time_collector_typed_data, state);

VALUE threads = ddtrace_thread_list();
VALUE threads = thread_list(state);
bool sampled_any_thread = false;

const long thread_count = RARRAY_LEN(threads);
Expand Down Expand Up @@ -661,7 +666,9 @@ static void trigger_sample_for_thread(
// This method exists only to enable testing Datadog::Profiling::Collectors::CpuAndWallTime behavior using RSpec.
// It SHOULD NOT be used for other purposes.
static VALUE _native_thread_list(DDTRACE_UNUSED VALUE _self) {
return ddtrace_thread_list();
VALUE result = rb_ary_new();
ddtrace_thread_list(result);
return result;
}

static struct per_thread_context *get_or_create_context_for(VALUE thread, struct cpu_and_wall_time_collector_state *state) {
Expand Down Expand Up @@ -934,3 +941,10 @@ static VALUE _native_reset_after_fork(DDTRACE_UNUSED VALUE self, VALUE collector

return Qtrue;
}

static VALUE thread_list(struct cpu_and_wall_time_collector_state *state) {
VALUE result = state->thread_list_buffer;
rb_ary_clear(result);
ddtrace_thread_list(result);
return result;
}
Original file line number Diff line number Diff line change
Expand Up @@ -204,8 +204,7 @@ ptrdiff_t stack_depth_for(VALUE thread) {
#endif

// Tries to match rb_thread_list() but that method isn't accessible to extensions
VALUE ddtrace_thread_list(void) {
VALUE result = rb_ary_new();
void ddtrace_thread_list(VALUE result_array) {
rb_thread_t *thread = NULL;

// Ruby 3 Safety: Our implementation is inspired by `rb_ractor_thread_list` BUT that method wraps the operations below
Expand Down Expand Up @@ -234,13 +233,11 @@ VALUE ddtrace_thread_list(void) {
case THREAD_RUNNABLE:
case THREAD_STOPPED:
case THREAD_STOPPED_FOREVER:
rb_ary_push(result, thread->self);
rb_ary_push(result_array, thread->self);
default:
break;
}
}

return result;
}

bool is_thread_alive(VALUE thread) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ bool is_current_thread_holding_the_gvl(void);
current_gvl_owner gvl_owner(void);
uint64_t native_thread_id_for(VALUE thread);
ptrdiff_t stack_depth_for(VALUE thread);
VALUE ddtrace_thread_list(void);
void ddtrace_thread_list(VALUE result_array);
bool is_thread_alive(VALUE thread);
VALUE thread_name_for(VALUE thread);

Expand Down
24 changes: 16 additions & 8 deletions spec/datadog/profiling/collectors/cpu_and_wall_time_worker_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -542,16 +542,24 @@
end

it 'returns the number of allocations between two calls of the method' do
# To get the exact expected number of allocations, we run this once before so that Ruby can create and cache all
# it needs to
new_object = proc { Object.new }
1.times(&new_object)

before_allocations = described_class._native_allocation_count
100.times { Object.new }
100.times(&new_object)
after_allocations = described_class._native_allocation_count

# The profiler can (currently) cause extra allocations, which is why this is not exactly 100
expect(after_allocations - before_allocations).to be >= 100
expect(after_allocations - before_allocations).to be < 110
expect(after_allocations - before_allocations).to be 100
end

it 'returns different numbers of allocations for different threads' do
# To get the exact expected number of allocations, we run this once before so that Ruby can create and cache all
# it needs to
new_object = proc { Object.new }
1.times(&new_object)

t1_can_run = Queue.new
t1_has_run = Queue.new
before_t1 = nil
Expand All @@ -561,7 +569,7 @@
before_t1 = described_class._native_allocation_count
t1_can_run.pop

100.times { Object.new }
100.times(&new_object)
after_t1 = described_class._native_allocation_count
t1_has_run << true
end
Expand All @@ -573,10 +581,10 @@

background_t1.join

# This test checks that even though we observed >= 100 allocations in a background thread t1, the counters for
# the current thread were not affected by this change (even though they may be slightly affected by the profiler)
# This test checks that even though we observed 100 allocations in a background thread t1, the counters for
# the current thread were not affected by this change

expect(after_t1 - before_t1).to be >= 100
expect(after_t1 - before_t1).to be 100
expect(after_allocations - before_allocations).to be < 10
end
end
Expand Down

0 comments on commit 484d0c1

Please sign in to comment.