From 421ff8f27ed04b7c962519a7bde70a74946f7128 Mon Sep 17 00:00:00 2001 From: David Feldman Date: Wed, 3 Jun 2020 11:33:31 -0500 Subject: [PATCH 1/2] Writer thread safety --- lib/ddtrace/writer.rb | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/lib/ddtrace/writer.rb b/lib/ddtrace/writer.rb index abbe4d0f5bd..2b45bf2605c 100644 --- a/lib/ddtrace/writer.rb +++ b/lib/ddtrace/writer.rb @@ -43,9 +43,17 @@ def initialize(options = {}) @worker = nil end - # spawns a worker for spans; they share the same transport which is thread-safe def start - @pid = Process.pid + @mutex_after_fork.synchronize do + pid = Process.pid + return if @worker && pid == @pid + @pid = pid + start_worker + end + end + + # spawns a worker for spans; they share the same transport which is thread-safe + def start_worker @trace_handler = ->(items, transport) { send_spans(items, transport) } @worker = Datadog::Workers::AsyncTransport.new( transport: @transport, @@ -57,14 +65,19 @@ def start @worker.start end - # stops worker for spans. def stop - return if worker.nil? + @mutex_after_fork.synchronize { stop_worker } + end + + def stop_worker + return if @worker.nil? @worker.stop @worker = nil true end + private :start_worker, :stop_worker + # flush spans to the trace-agent, handles spans only def send_spans(traces, transport) return true if traces.empty? @@ -106,13 +119,7 @@ def write(trace, services = nil) # # This check ensures that if a process doesn't own the current +Writer+, async workers # will be initialized again (but only once for each process). - pid = Process.pid - if pid != @pid # avoid using Mutex when pids are equal - @mutex_after_fork.synchronize do - # we should start threads because the worker doesn't own this - start if pid != @pid - end - end + start if @worker.nil? || @pid != Process.pid # TODO: Remove this, and have the tracer pump traces directly to runtime metrics # instead of working through the trace writer. From a88bda3bd4c348c1f5026ab84d5ec59dbec9f41b Mon Sep 17 00:00:00 2001 From: David Feldman Date: Wed, 3 Jun 2020 12:04:46 -0500 Subject: [PATCH 2/2] nicer error message when one thread stops a writer another thread is in the middle of writing to --- lib/ddtrace/writer.rb | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/lib/ddtrace/writer.rb b/lib/ddtrace/writer.rb index 2b45bf2605c..b0c07492f28 100644 --- a/lib/ddtrace/writer.rb +++ b/lib/ddtrace/writer.rb @@ -49,6 +49,7 @@ def start return if @worker && pid == @pid @pid = pid start_worker + true end end @@ -128,7 +129,13 @@ def write(trace, services = nil) Datadog.runtime_metrics.associate_with_span(trace.first) end - @worker.enqueue_trace(trace) + worker_local = @worker + + if worker_local + worker_local.enqueue_trace(trace) + else + Datadog.logger.debug('Writer either failed to start or was stopped before #write could complete') + end end # stats returns a dictionary of stats about the writer.