From 5ca1262a6022a09e40bcc69ab602a49c795f1fe6 Mon Sep 17 00:00:00 2001 From: Marco Costa Date: Fri, 11 Aug 2023 15:58:17 -0700 Subject: [PATCH] Enable OpenTelemetry tests for Ruby --- tests/parametric/test_otel_span_methods.py | 19 +- tests/parametric/test_otel_span_with_w3c.py | 2 - tests/parametric/test_otel_tracer.py | 2 - utils/build/docker/ruby/parametric/Gemfile | 3 + utils/build/docker/ruby/parametric/server.rb | 186 +++++++++++++++++-- 5 files changed, 186 insertions(+), 26 deletions(-) diff --git a/tests/parametric/test_otel_span_methods.py b/tests/parametric/test_otel_span_methods.py index ca2d78a432..30c2775c29 100644 --- a/tests/parametric/test_otel_span_methods.py +++ b/tests/parametric/test_otel_span_methods.py @@ -22,7 +22,6 @@ class Test_Otel_Span_Methods: @irrelevant(context.library == "cpp", reason="library does not implement OpenTelemetry") @missing_feature(context.library == "php", reason="Not implemented") - @missing_feature(context.library == "ruby", reason="Not implemented") def test_otel_start_span(self, test_agent, test_library): """ - Start/end a span with start and end options @@ -47,7 +46,6 @@ def test_otel_start_span(self, test_agent, test_library): @irrelevant(context.library == "cpp", reason="library does not implement OpenTelemetry") @missing_feature(context.library == "php", reason="Not implemented") - @missing_feature(context.library == "ruby", reason="Not implemented") def test_otel_set_service_name(self, test_agent, test_library): """ - Update the service name on a span @@ -64,7 +62,6 @@ def test_otel_set_service_name(self, test_agent, test_library): @irrelevant(context.library == "cpp", reason="library does not implement OpenTelemetry") @missing_feature(context.library == "nodejs", reason="Empty string attribute value are not supported") @missing_feature(context.library == "php", reason="Not implemented") - @missing_feature(context.library == "ruby", reason="Not implemented") def test_otel_set_attributes_different_types(self, test_agent, test_library): """ - Set attributes of multiple types for an otel span @@ -110,13 +107,21 @@ def test_otel_set_attributes_different_types(self, test_agent, test_library): assert root_span["meta"]["d_bool_val"] == "false" assert root_span["meta"]["array_val_int"] == "[10, 20]" assert root_span["meta"]["array_val_double"] == "[10.1, 20.2]" - elif root_span["meta"]["language"] == "dotnet": + elif root_span["meta"]["language"] in "dotnet": assert root_span["meta"]["bool_val"] == "true" assert root_span["meta"]["array_val_bool"] == "[true,false]" assert root_span["meta"]["array_val_str"] == '["val1","val2"]' assert root_span["meta"]["d_bool_val"] == "false" assert root_span["meta"]["array_val_int"] == "[10,20]" assert root_span["meta"]["array_val_double"] == "[10.1,20.2]" + elif root_span["meta"]["language"] in "ruby": + assert root_span["meta"]["bool_val"] == "true" + assert root_span["meta"]["array_val_bool"] == "[true, false]" + assert root_span["meta"]["array_val_str"] == '["val1", "val2"]' + + assert root_span["meta"]["d_bool_val"] == "false" + assert root_span["meta"]["array_val_int"] == "[10, 20]" + assert root_span["meta"]["array_val_double"] == "[10.1, 20.2]" else: assert root_span["meta"]["bool_val"] == "True" assert root_span["meta"]["array_val_bool"] == "[True, False]" @@ -137,7 +142,6 @@ def test_otel_set_attributes_different_types(self, test_agent, test_library): reason=".NET's native implementation does not change IsAllDataRequested to false after ending a span. OpenTelemetry follows this as well for IsRecording.", ) @missing_feature(context.library == "php", reason="Not implemented") - @missing_feature(context.library == "ruby", reason="Not implemented") def test_otel_span_is_recording(self, test_agent, test_library): """ Test functionality of ending a span. @@ -156,7 +160,6 @@ def test_otel_span_is_recording(self, test_agent, test_library): context.library == "dotnet", reason=".NET's native implementation does not change IsAllDataRequested to false after ending a span. OpenTelemetry follows this as well for IsRecording.", ) - @missing_feature(context.library == "ruby", reason="Not implemented") @missing_feature(context.library == "php", reason="Not implemented") def test_otel_span_finished_end_options(self, test_agent, test_library): """ @@ -179,7 +182,6 @@ def test_otel_span_finished_end_options(self, test_agent, test_library): @irrelevant(context.library == "cpp", reason="library does not implement OpenTelemetry") @missing_feature(context.library == "php", reason="Not implemented") - @missing_feature(context.library == "ruby", reason="Not implemented") def test_otel_span_end(self, test_agent, test_library): """ Test functionality of ending a span. After ending: @@ -212,7 +214,6 @@ def test_otel_span_end(self, test_agent, test_library): context.library == "dotnet", reason=".NET's native implementation unsets the error message. OpenTelemetry also unsets the error message.", ) - @missing_feature(context.library == "ruby", reason="Not implemented") @missing_feature(context.library == "php", reason="Not implemented") def test_otel_set_span_status_error(self, test_agent, test_library): """ @@ -238,7 +239,6 @@ def test_otel_set_span_status_error(self, test_agent, test_library): context.library == "dotnet", reason=".NET's native implementation and OpenTelemetry implementation do not enforce this and allow the status to be changed.", ) - @missing_feature(context.library == "ruby", reason="Not implemented") @missing_feature(context.library == "php", reason="Not implemented") @missing_feature( context.library == "python", @@ -269,7 +269,6 @@ def test_otel_set_span_status_ok(self, test_agent, test_library): assert span.get("name") == "ok_span" @irrelevant(context.library == "cpp", reason="library does not implement OpenTelemetry") - @missing_feature(context.library == "ruby", reason="Not implemented") @missing_feature(context.library == "php", reason="Not implemented") def test_otel_get_span_context(self, test_agent, test_library): """ diff --git a/tests/parametric/test_otel_span_with_w3c.py b/tests/parametric/test_otel_span_with_w3c.py index fd3735324a..f605dc5f2c 100644 --- a/tests/parametric/test_otel_span_with_w3c.py +++ b/tests/parametric/test_otel_span_with_w3c.py @@ -19,7 +19,6 @@ class Test_Otel_Span_With_W3c: @irrelevant(context.library == "cpp", reason="library does not implement OpenTelemetry") @missing_feature(context.library == "php", reason="Not implemented") - @missing_feature(context.library == "ruby", reason="Not implemented") def test_otel_start_span_with_w3c(self, test_agent, test_library): """ - Start/end a span with start and end options @@ -44,7 +43,6 @@ def test_otel_start_span_with_w3c(self, test_agent, test_library): @irrelevant(context.library == "cpp", reason="library does not implement OpenTelemetry") @missing_feature(context.library == "php", reason="Not implemented") - @missing_feature(context.library == "ruby", reason="Not implemented") def test_otel_span_with_w3c_headers(self, test_agent, test_library): with test_library: with test_library.otel_start_span( diff --git a/tests/parametric/test_otel_tracer.py b/tests/parametric/test_otel_tracer.py index 4a12f13990..7f8beba0a8 100644 --- a/tests/parametric/test_otel_tracer.py +++ b/tests/parametric/test_otel_tracer.py @@ -17,7 +17,6 @@ @scenarios.parametric class Test_Otel_Tracer: @irrelevant(context.library == "cpp", reason="library does not implement OpenTelemetry") - @missing_feature(context.library == "ruby", reason="Not implemented") @missing_feature(context.library == "php", reason="Not implemented") def test_otel_simple_trace(self, test_agent, test_library): """ @@ -58,7 +57,6 @@ def test_otel_simple_trace(self, test_agent, test_library): @irrelevant(context.library == "cpp", reason="library does not implement OpenTelemetry") @missing_feature(context.library == "php", reason="Not implemented") - @missing_feature(context.library == "ruby", reason="Not implemented") def test_force_flush_otel(self, test_agent, test_library): """ Verify that force flush flushed the spans diff --git a/utils/build/docker/ruby/parametric/Gemfile b/utils/build/docker/ruby/parametric/Gemfile index fb7fdaaec3..00908630d8 100644 --- a/utils/build/docker/ruby/parametric/Gemfile +++ b/utils/build/docker/ruby/parametric/Gemfile @@ -9,3 +9,6 @@ end gem 'grpc' gem 'grpc-tools' + +# Only used for OpenTelemetry testing. +gem 'opentelemetry-sdk' diff --git a/utils/build/docker/ruby/parametric/server.rb b/utils/build/docker/ruby/parametric/server.rb index d5b5777635..c5cd88fbb7 100644 --- a/utils/build/docker/ruby/parametric/server.rb +++ b/utils/build/docker/ruby/parametric/server.rb @@ -7,10 +7,17 @@ require 'datadog/tracing/contrib/grpc/distributed/propagation' # Loads optional `Datadog::Tracing::Contrib::GRPC::Distributed` require 'apm_test_client_services_pb' +# Only used for OpenTelemetry testing. +require 'opentelemetry/sdk' +require 'datadog/opentelemetry' # TODO: Remove when DD_TRACE_OTEL_ENABLED=true works out of the box for Ruby APM + +OpenTelemetry::SDK.configure # Initialize OpenTelemetry + Datadog.configure do |c| c.diagnostics.debug = true # When tests fail, ensure there's enough data to debug the failure. c.logger.instance = Logger.new(STDOUT) # Make sure logs are available for inspection from outside the container. c.tracing.instrument :http # Used for `http_client_request` + c.tracing.distributed_tracing.propagation_extract_style = ['Datadog', 'tracecontext'] end if Datadog::Core::Remote.active_remote @@ -30,7 +37,7 @@ def start_span(start_span_args, _call) end digest = if start_span_args.http_headers.http_headers.size != 0 - Datadog::Tracing::Contrib::GRPC::Distributed::Propagation.new.extract(start_span_args.http_headers.http_headers.map { |t| [t.key, t.value] }.to_h) + Datadog::Tracing::Contrib::GRPC::Distributed::Propagation.new.extract(header_hash(start_span_args.http_headers)) elsif !start_span_args.origin.empty? || start_span_args.parent_id != 0 # DEV: Parametric tests do not differentiate between a distributed span request from a span parenting request. # DEV: We have to consider the parent_id being present present and origin being absent as a span parenting request. @@ -131,7 +138,8 @@ def inject_headers(inject_headers_args, _call) end def flush_spans(flush_spans_args, _call) - sleep 0.05 until Datadog.send(:components).tracer.writer.worker&.trace_buffer.empty? + wait_for_flush(5) + FlushSpansReturn.new end @@ -139,20 +147,105 @@ def flush_trace_stats(flush_trace_stats_args, _call) FlushTraceStatsReturn.new end - # TODO: Implement the following OTel methods - # :otel_start_span, ::OtelStartSpanArgs, ::OtelStartSpanReturn - # :otel_end_span, ::OtelEndSpanArgs, ::OtelEndSpanReturn - # :otel_is_recording, ::OtelIsRecordingArgs, ::OtelIsRecordingReturn - # :otel_span_context, ::OtelSpanContextArgs, ::OtelSpanContextReturn - # :otel_set_status, ::OtelSetStatusArgs, ::OtelSetStatusReturn - # :otel_set_name, ::OtelSetNameArgs, ::OtelSetNameReturn - # :otel_set_attributes, ::OtelSetAttributesArgs, ::OtelSetAttributesReturn - # :otel_flush_spans, ::OtelFlushSpansArgs, ::OtelFlushSpansReturn - # :otel_flush_trace_stats, ::OtelFlushTraceStatsArgs, ::OtelFlushTraceStatsReturn + OTEL_SPAN_KIND = { + 1 => :internal, + 2 => :server, + 3 => :client, + 4 => :producer, + 5 => :consumer, + } + + def otel_start_span(otel_start_span_args, _call) + headers = header_hash(otel_start_span_args.http_headers) + if !headers.empty? + parent_context = OpenTelemetry.propagation.extract(headers) + elsif otel_start_span_args.parent_id != 0 + parent_span = find_otel_span(otel_start_span_args.parent_id) + parent_context = OpenTelemetry::Trace.context_with_span(parent_span) + end + + span = otel_tracer.start_span( + otel_start_span_args.name, + with_parent: parent_context, + attributes: otel_parse_attributes(otel_start_span_args.attributes), + start_timestamp: otel_correct_time(otel_start_span_args.timestamp), + kind: OTEL_SPAN_KIND[otel_start_span_args.span_kind] + ) + + context = span.context + + @otel_spans[otel_id_to_i(context.span_id)] = span + + OtelStartSpanReturn.new(span_id: otel_id_to_i(context.span_id), trace_id: otel_id_to_i(context.trace_id)) + end + + def otel_end_span(otel_end_span_args, _call) + span = find_otel_span(otel_end_span_args.id) + span.finish(end_timestamp: otel_correct_time(otel_end_span_args.timestamp)) + + OtelEndSpanReturn.new + end + + def otel_is_recording(otel_is_recording_args, _call) + span = find_otel_span(otel_is_recording_args.span_id) + OtelIsRecordingReturn.new(is_recording: span.recording?) + end + + def otel_span_context(otel_span_context_args, _call) + span = find_otel_span(otel_span_context_args.span_id) + context = span.context + + OtelSpanContextReturn.new( + span_id: format('%016x', otel_id_to_i(context.span_id)), + trace_id: format('%032x', otel_id_to_i(context.trace_id)), + trace_flags: context.trace_flags.sampled? ? '01' : '00', + trace_state: context.tracestate.to_s, + remote: context.remote?, + ) + end + + def otel_set_status(otel_set_status_args, _call) + span = find_otel_span(otel_set_status_args.span_id) + + span.status = OpenTelemetry::Trace::Status.public_send( + otel_set_status_args.code.downcase, + otel_set_status_args.description + ) + + OtelSetStatusReturn.new + end + + def otel_set_name(otel_set_name_args, _call) + span = find_otel_span(otel_set_name_args.span_id) + span.name = otel_set_name_args.name + OtelSetNameReturn.new + end + + def otel_set_attributes(otel_set_attributes_args, _call) + span = find_otel_span(otel_set_attributes_args.span_id + ) + otel_parse_attributes(otel_set_attributes_args.attributes).each do |key, value| + span.set_attribute(key, value) + end + + OtelSetAttributesReturn.new + end + + def otel_flush_spans(otel_flush_spans_args, _call) + success = wait_for_flush(otel_flush_spans_args.seconds) + + OtelFlushSpansReturn.new(success: success) + end + + def otel_flush_trace_stats(_otel_flush_trace_stats_args, _call) + OtelFlushTraceStatsReturn.new + end def stop_tracer(stop_tracer_args, _call) Datadog.shutdown! StopTracerReturn.new + + @otel_spans.clear end # The Ruby tracer holds spans on a per-Fiber basis. @@ -178,6 +271,9 @@ def initialize @return_queue.push(e) end end + + # A list of OpenTelemetry Span objects that allow for retrieving spans in-between API calls. + @otel_spans = {} end # Wrap all public methods to ensure they execute in a single thread. @@ -205,6 +301,70 @@ def find_span(span_id) span end + + def wait_for_flush(seconds) + return true unless (worker = Datadog.send(:components).tracer.writer.worker) + + count = 0 + sleep_time = seconds / 100.0 + until worker.trace_buffer&.empty? + sleep sleep_time + count += 1 + return false if count >= 100 + end + + true + end + + def header_hash(http_headers) + http_headers.http_headers.map { |t| [t.key, t.value] }.to_h + end + + def find_otel_span(id) + span = @otel_spans[id] + raise "Requested span #{id} not found. All spans: #{@otel_spans.map{|s|s.context.span_id}}" unless span + + span + end + + # Convert OTel's String representation to an unsigned 64-bit Integer. + def otel_id_to_i(span_id_or_trace_id) + span_id_or_trace_id.unpack1('Q') + end + + # Convert an unsigned 64-bit Integer to OTel's String representation. + def i_to_otel_id(span_id_or_trace_id) + [span_id_or_trace_id].pack('Q') + end + + # OTel system tests provide times in microseconds, but Ruby OTel + # measures time in seconds (Float). + def otel_correct_time(microseconds) + microseconds &./ 1000000.0 + end + + # Convert Protobuf attributes to native Ruby objects + # e.g. `Attributes.new(key_vals: { my_key:ListVal.new(val: [AttrVal.new(bool_val: true)])})` + def otel_parse_attributes(attributes) + attributes.key_vals.map do |k, v| + [k, v.val.map do |union| + union[union.val.to_s] + end.yield_self do |value| + # Flatten array of 1 element into a scalar. + # This is due to the gRPC API not differentiating between a + # single value and an array with 1 value + if value.size == 1 + value[0] + else + value + end + end] + end.to_h + end + + def otel_tracer + OpenTelemetry.tracer_provider.tracer('otel-tracer') + end end port = ENV.fetch('APM_TEST_CLIENT_SERVER_PORT', 50051) @@ -228,6 +388,8 @@ def find_span(span_id) end if ENV['DEBUG'] == '1' puts 'Running gRPC server...' +STDOUT.flush + s.handle(ServerImpl.new()) # Runs the server with SIGHUP, SIGINT and SIGQUIT signal handlers to