Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enable OpenTelemetry tests for Ruby #1456

Merged
merged 21 commits into from
Nov 22, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion manifests/ruby.yml
Original file line number Diff line number Diff line change
Expand Up @@ -212,7 +212,10 @@ tests/:
parametric/:
test_dynamic_configuration.py:
TestDynamicConfigV1: v1.13.0
test_otel_span_methods.py: missing_feature
test_otel_span_methods.py:
Test_Otel_Span_Methods: v1.17.0
test_otel_span_with_w3c.py:
Test_Otel_Span_With_W3c: v1.17.0
test_span_links.py: missing_feature
test_tracer.py:
Test_TracerSCITagging: missing_feature
Expand Down
1 change: 1 addition & 0 deletions tests/parametric/test_otel_span_methods.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ def test_otel_set_service_name(self, test_agent, test_library):
context.library == "java",
reason="Old array encoding was removed in 1.22.0 and new span naming introduced in 1.24.0: no version elligible for this test.",
)
@irrelevant(context.library == "ruby", reason="Old array encoding no longer supported")
@missing_feature(context.library == "nodejs", reason="New operation name mapping not yet implemented")
@missing_feature(context.library == "dotnet", reason="New operation name mapping not yet implemented")
@missing_feature(context.library == "python", reason="New operation name mapping not yet implemented")
Expand Down
2 changes: 0 additions & 2 deletions tests/parametric/test_otel_span_with_w3c.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
class Test_Otel_Span_With_W3c:
@irrelevant(context.library == "cpp", reason="library does not implement OpenTelemetry")
@missing_feature(context.library == "php", reason="Not implemented")
@missing_feature(context.library == "ruby", reason="Not implemented")
@missing_feature(context.library == "python", reason="Not implemented")
@missing_feature(context.library == "python_http", reason="Not implemented")
@missing_feature(context.library <= "[email protected]", reason="OTel resource naming implemented in 1.24.0")
Expand Down Expand Up @@ -49,7 +48,6 @@ def test_otel_start_span_with_w3c(self, test_agent, test_library):

@irrelevant(context.library == "cpp", reason="library does not implement OpenTelemetry")
@missing_feature(context.library == "php", reason="Not implemented")
@missing_feature(context.library == "ruby", reason="Not implemented")
def test_otel_span_with_w3c_headers(self, test_agent, test_library):
with test_library:
with test_library.otel_start_span(
Expand Down
5 changes: 3 additions & 2 deletions tests/parametric/test_otel_tracer.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
@scenarios.parametric
class Test_Otel_Tracer:
@irrelevant(context.library == "cpp", reason="library does not implement OpenTelemetry")
@missing_feature(context.library == "ruby", reason="Not implemented")
@missing_feature(context.library == "php", reason="Not implemented")
def test_otel_simple_trace(self, test_agent, test_library):
"""
Expand Down Expand Up @@ -58,12 +57,14 @@ def test_otel_simple_trace(self, test_agent, test_library):

@irrelevant(context.library == "cpp", reason="library does not implement OpenTelemetry")
@missing_feature(context.library == "php", reason="Not implemented")
@missing_feature(context.library == "ruby", reason="Not implemented")
@missing_feature(context.library == "python", reason="Not implemented")
@missing_feature(context.library == "python_http", reason="Not implemented")
@missing_feature(context.library <= "[email protected]", reason="OTel resource naming implemented in 1.24.0")
@missing_feature(context.library == "nodejs", reason="Not implemented")
@missing_feature(context.library == "dotnet", reason="Not implemented")
@missing_feature(
context.library == "ruby", reason="Ruby is instrumenting telemetry calls, creating 2 spans instead of 1"
)
def test_otel_force_flush(self, test_agent, test_library):
"""
Verify that force flush flushed the spans
Expand Down
3 changes: 3 additions & 0 deletions utils/build/docker/ruby/parametric/Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,6 @@ end

gem 'grpc'
gem 'grpc-tools'

# Only used for OpenTelemetry testing.
gem 'opentelemetry-sdk'
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
FROM ruby:3.2.1-bullseye

ARG BUILD_MODULE=''
ENV RUBY_DDTRACE_SHA=$BUILD_MODULE

WORKDIR /client
RUN gem install ddtrace # Install a baseline ddtrace version, to cache all dependencies
COPY ./utils/build/docker/ruby/parametric/Gemfile /client/
COPY ./utils/build/docker/ruby/parametric/install_dependencies.sh /client/
ARG BUILD_MODULE=''
ENV RUBY_DDTRACE_SHA=$BUILD_MODULE
RUN bash install_dependencies.sh
CMD bundle exec ruby -e 'puts Gem.loaded_specs["ddtrace"].version'
183 changes: 172 additions & 11 deletions utils/build/docker/ruby/parametric/server.rb
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,12 @@
require 'datadog/tracing/contrib/grpc/distributed/propagation' # Loads optional `Datadog::Tracing::Contrib::GRPC::Distributed`
require 'apm_test_client_services_pb'

# Only used for OpenTelemetry testing.
require 'opentelemetry/sdk'
require 'datadog/opentelemetry' # TODO: Remove when DD_TRACE_OTEL_ENABLED=true works out of the box for Ruby APM

OpenTelemetry::SDK.configure # Initialize OpenTelemetry

Datadog.configure do |c|
c.diagnostics.debug = true # When tests fail, ensure there's enough data to debug the failure.
c.logger.instance = Logger.new(STDOUT) # Make sure logs are available for inspection from outside the container.
Expand Down Expand Up @@ -136,28 +142,114 @@ def inject_headers(inject_headers_args, _call)
end

def flush_spans(flush_spans_args, _call)
sleep 0.05 until Datadog.send(:components).tracer.writer.worker&.trace_buffer.empty?
wait_for_flush(5)

FlushSpansReturn.new
end

def flush_trace_stats(flush_trace_stats_args, _call)
FlushTraceStatsReturn.new
end

# TODO: Implement the following OTel methods
# :otel_start_span, ::OtelStartSpanArgs, ::OtelStartSpanReturn
# :otel_end_span, ::OtelEndSpanArgs, ::OtelEndSpanReturn
# :otel_is_recording, ::OtelIsRecordingArgs, ::OtelIsRecordingReturn
# :otel_span_context, ::OtelSpanContextArgs, ::OtelSpanContextReturn
# :otel_set_status, ::OtelSetStatusArgs, ::OtelSetStatusReturn
# :otel_set_name, ::OtelSetNameArgs, ::OtelSetNameReturn
# :otel_set_attributes, ::OtelSetAttributesArgs, ::OtelSetAttributesReturn
# :otel_flush_spans, ::OtelFlushSpansArgs, ::OtelFlushSpansReturn
# :otel_flush_trace_stats, ::OtelFlushTraceStatsArgs, ::OtelFlushTraceStatsReturn
OTEL_SPAN_KIND = {
1 => :internal,
2 => :server,
3 => :client,
4 => :producer,
5 => :consumer,
}

def otel_start_span(otel_start_span_args, _call)
headers = header_hash(otel_start_span_args.http_headers)
if !headers.empty?
parent_context = OpenTelemetry.propagation.extract(headers)
elsif otel_start_span_args.parent_id != 0
parent_span = find_otel_span(otel_start_span_args.parent_id)
parent_context = OpenTelemetry::Trace.context_with_span(parent_span)
end

span = otel_tracer.start_span(
otel_start_span_args.name,
with_parent: parent_context,
attributes: otel_parse_attributes(otel_start_span_args.attributes),
start_timestamp: otel_correct_time(otel_start_span_args.timestamp),
kind: OTEL_SPAN_KIND[otel_start_span_args.span_kind]
)

context = span.context

@otel_spans[otel_id_to_i(context.span_id)] = span

OtelStartSpanReturn.new(span_id: otel_id_to_i(context.span_id), trace_id: otel_id_to_i(context.trace_id))
end

def otel_end_span(otel_end_span_args, _call)
span = find_otel_span(otel_end_span_args.id)
span.finish(end_timestamp: otel_correct_time(otel_end_span_args.timestamp))

OtelEndSpanReturn.new
end

def otel_is_recording(otel_is_recording_args, _call)
span = find_otel_span(otel_is_recording_args.span_id)
OtelIsRecordingReturn.new(is_recording: span.recording?)
end

def otel_span_context(otel_span_context_args, _call)
span = find_otel_span(otel_span_context_args.span_id)
context = span.context

OtelSpanContextReturn.new(
span_id: format('%016x', otel_id_to_i(context.span_id)),
trace_id: format('%032x', otel_id_to_i(context.trace_id)),
trace_flags: context.trace_flags.sampled? ? '01' : '00',
trace_state: context.tracestate.to_s,
remote: context.remote?,
)
end

def otel_set_status(otel_set_status_args, _call)
span = find_otel_span(otel_set_status_args.span_id)

span.status = OpenTelemetry::Trace::Status.public_send(
otel_set_status_args.code.downcase,
otel_set_status_args.description
)

OtelSetStatusReturn.new
end

def otel_set_name(otel_set_name_args, _call)
span = find_otel_span(otel_set_name_args.span_id)
span.name = otel_set_name_args.name
OtelSetNameReturn.new
end

def otel_set_attributes(otel_set_attributes_args, _call)
span = find_otel_span(otel_set_attributes_args.span_id
)
otel_parse_attributes(otel_set_attributes_args.attributes).each do |key, value|
span.set_attribute(key, value)
end

OtelSetAttributesReturn.new
end

def otel_flush_spans(otel_flush_spans_args, _call)
success = wait_for_flush(otel_flush_spans_args.seconds)

OtelFlushSpansReturn.new(success: success)
end

def otel_flush_trace_stats(_otel_flush_trace_stats_args, _call)
OtelFlushTraceStatsReturn.new
end

def stop_tracer(stop_tracer_args, _call)
Datadog.shutdown!
StopTracerReturn.new

@otel_spans.clear
end

# The Ruby tracer holds spans on a per-Fiber basis.
Expand All @@ -183,6 +275,9 @@ def initialize
@return_queue.push(e)
end
end

# A list of OpenTelemetry Span objects that allow for retrieving spans in-between API calls.
@otel_spans = {}
end

# Wrap all public methods to ensure they execute in a single thread.
Expand Down Expand Up @@ -210,6 +305,70 @@ def find_span(span_id)

span
end

def wait_for_flush(seconds)
return true unless (worker = Datadog.send(:components).tracer.writer.worker)

count = 0
sleep_time = seconds / 100.0
until worker.trace_buffer&.empty?
sleep sleep_time
count += 1
return false if count >= 100
end

true
end

def header_hash(http_headers)
http_headers.http_headers.map { |t| [t.key, t.value] }.to_h
end

def find_otel_span(id)
span = @otel_spans[id]
raise "Requested span #{id} not found. All spans: #{@otel_spans.map{|s|s.context.span_id}}" unless span

span
end

# Convert OTel's String representation to an unsigned 64-bit Integer.
def otel_id_to_i(span_id_or_trace_id)
span_id_or_trace_id.unpack1('Q')
end

# Convert an unsigned 64-bit Integer to OTel's String representation.
def i_to_otel_id(span_id_or_trace_id)
[span_id_or_trace_id].pack('Q')
end

# OTel system tests provide times in microseconds, but Ruby OTel
# measures time in seconds (Float).
def otel_correct_time(microseconds)
microseconds &./ 1000000.0
end

# Convert Protobuf attributes to native Ruby objects
# e.g. `Attributes.new(key_vals: { my_key:ListVal.new(val: [AttrVal.new(bool_val: true)])})`
def otel_parse_attributes(attributes)
attributes.key_vals.map do |k, v|
[k, v.val.map do |union|
union[union.val.to_s]
end.yield_self do |value|
# Flatten array of 1 element into a scalar.
# This is due to the gRPC API not differentiating between a
# single value and an array with 1 value
if value.size == 1
value[0]
else
value
end
end]
end.to_h
end

def otel_tracer
OpenTelemetry.tracer_provider.tracer('otel-tracer')
end
end

port = ENV.fetch('APM_TEST_CLIENT_SERVER_PORT', 50051)
Expand All @@ -233,6 +392,8 @@ def find_span(span_id)
end if ENV['DEBUG'] == '1'

puts 'Running gRPC server...'
STDOUT.flush

s.handle(ServerImpl.new())

# Runs the server with SIGHUP, SIGINT and SIGQUIT signal handlers to
Expand Down