From 4f72d84c6f7c6ab2d239dc403210264eece867a9 Mon Sep 17 00:00:00 2001 From: Ivo Anjo Date: Tue, 5 Sep 2023 09:41:02 +0100 Subject: [PATCH] Fix missing endpoint profiling when request_queuing is enabled in rack instrumentation **What does this PR do?** This PR tweaks the logic that decides if the profiler should collect the resource from an active trace to also consider the span type `proxy`, which is used when the `request_queuing` feature is in use in the rack instrumentation. Without this fix, the endpoint profiling feature (e.g. allowing customers to drill into their profiles by endpoint) does not work in combination with `request_queuing`, and the feature is hidden in the profiler UX. **Motivation:** Make sure that customers that enable `request_queuing` are still able to look at their profiles per-endpoint. **Additional Notes:** N/A **How to test the change?** The change includes test coverage. If you're interested in testing it out with a real http app, you can do so by enabling the `request_queuing` feature: ```ruby Datadog.configure do |c| c.tracing.instrument :rack, request_queuing: true end ``` and then perform a request to the app including the `X-Request-Start` header: ``` $ curl --header "X-Request-Start: t=1693901946000" http://localhost:8080/foo ``` This will generate a `queue` span, and with this change, the endpoint will show up in the right sidebar when looking at profiles for this application. Without this change, it would not show up at all for requests with the `queue` span. --- .../collectors_thread_context.c | 20 ++++++++++++++----- .../collectors/thread_context_spec.rb | 17 +++++++++++++--- 2 files changed, 29 insertions(+), 8 deletions(-) diff --git a/ext/ddtrace_profiling_native_extension/collectors_thread_context.c b/ext/ddtrace_profiling_native_extension/collectors_thread_context.c index 47ab3d08eb6..4938c611d05 100644 --- a/ext/ddtrace_profiling_native_extension/collectors_thread_context.c +++ b/ext/ddtrace_profiling_native_extension/collectors_thread_context.c @@ -201,7 +201,7 @@ static long cpu_time_now_ns(struct per_thread_context *thread_context); static long thread_id_for(VALUE thread); static VALUE _native_stats(VALUE self, VALUE collector_instance); static void trace_identifiers_for(struct thread_context_collector_state *state, VALUE thread, struct trace_identifiers *trace_identifiers_result); -static bool is_type_web(VALUE root_span_type); +static bool should_collect_resource(VALUE root_span_type); static VALUE _native_reset_after_fork(DDTRACE_UNUSED VALUE self, VALUE collector_instance); static VALUE thread_list(struct thread_context_collector_state *state); static VALUE _native_sample_allocation(DDTRACE_UNUSED VALUE self, VALUE collector_instance, VALUE sample_weight, VALUE new_object); @@ -1041,7 +1041,7 @@ static void trace_identifiers_for(struct thread_context_collector_state *state, if (!state->endpoint_collection_enabled) return; VALUE root_span_type = rb_ivar_get(root_span, at_type_id /* @type */); - if (root_span_type == Qnil || !is_type_web(root_span_type)) return; + if (root_span_type == Qnil || !should_collect_resource(root_span_type)) return; VALUE trace_resource = rb_ivar_get(active_trace, at_resource_id /* @resource */); if (RB_TYPE_P(trace_resource, T_STRING)) { @@ -1052,11 +1052,21 @@ static void trace_identifiers_for(struct thread_context_collector_state *state, } } -static bool is_type_web(VALUE root_span_type) { +// We only collect the resource for spans of types: +// * 'web', for web requests +// * proxy', used by the rack integration with request_queuing: true (e.g. also represents a web request) +// +// NOTE: Currently we're only interested in HTTP service endpoints. Over time, this list may be expanded. +// Resources MUST NOT include personal identifiable information (PII); this should not be the case with +// ddtrace integrations, but worth mentioning just in case :) +static bool should_collect_resource(VALUE root_span_type) { ENFORCE_TYPE(root_span_type, T_STRING); - return RSTRING_LEN(root_span_type) == strlen("web") && - (memcmp("web", StringValuePtr(root_span_type), strlen("web")) == 0); + int root_span_type_length = RSTRING_LEN(root_span_type); + const char *root_span_type_value = StringValuePtr(root_span_type); + + return (root_span_type_length == strlen("web") && (memcmp("web", root_span_type_value, strlen("web")) == 0)) || + (root_span_type_length == strlen("proxy") && (memcmp("proxy", root_span_type_value, strlen("proxy")) == 0)); } // After the Ruby VM forks, this method gets called in the child process to clean up any leftover state from the parent. diff --git a/spec/datadog/profiling/collectors/thread_context_spec.rb b/spec/datadog/profiling/collectors/thread_context_spec.rb index 3efce8da20b..b2aa16cf490 100644 --- a/spec/datadog/profiling/collectors/thread_context_spec.rb +++ b/spec/datadog/profiling/collectors/thread_context_spec.rb @@ -411,9 +411,7 @@ def another_way_of_calling_sample(profiler_overhead_stack_thread: Thread.current expect(t1_sample.labels).to_not include(:'trace endpoint' => anything) end - context 'when local root span type is web' do - let(:root_span_type) { 'web' } - + shared_examples_for 'samples with code hotspots information' do it 'includes the "trace endpoint" label in the samples' do sample @@ -523,6 +521,19 @@ def another_way_of_calling_sample(profiler_overhead_stack_thread: Thread.current end end end + + context 'when local root span type is web' do + let(:root_span_type) { 'web' } + + it_behaves_like 'samples with code hotspots information' + end + + # Used by the rack integration with request_queuing: true + context 'when local root span type is proxy' do + let(:root_span_type) { 'proxy' } + + it_behaves_like 'samples with code hotspots information' + end end end end