Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Simplify and correct ProbabilitySampler behaviour #111

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 14 additions & 19 deletions sdk/lib/opentelemetry/sdk/trace/samplers.rb
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,11 @@ module Trace
module Samplers
RECORD_AND_PROPAGATE = Result.new(decision: Decision::RECORD_AND_PROPAGATE)
NOT_RECORD = Result.new(decision: Decision::NOT_RECORD)
RECORD = Result.new(decision: Decision::RECORD)
SAMPLING_HINTS = [Decision::NOT_RECORD, Decision::RECORD, Decision::RECORD_AND_PROPAGATE].freeze
APPLY_PROBABILITY_TO_SYMBOLS = %i[root_spans root_spans_and_remote_parent all_spans].freeze

private_constant(:RECORD_AND_PROPAGATE, :NOT_RECORD, :SAMPLING_HINTS)
private_constant(:RECORD_AND_PROPAGATE, :NOT_RECORD, :RECORD, :SAMPLING_HINTS, :APPLY_PROBABILITY_TO_SYMBOLS)

# rubocop:disable Lint/UnusedBlockArgument

Expand All @@ -66,6 +68,7 @@ module Samplers
end
end
# rubocop:enable Style/Lambda
# rubocop:enable Lint/UnusedBlockArgument

# Returns a new sampler. The probability of sampling a trace is equal
# to that of the specified probability.
Expand All @@ -76,35 +79,27 @@ module Samplers
# ignore. Defaults to ignore {OpenTelemetry::Trace::SamplingHint::RECORD}.
# @param [optional Boolean] ignore_parent Whether to ignore parent
# sampling. Defaults to not ignore parent sampling.
# @param [optional Boolean] apply_to_root_spans Whether to apply
# probability sampling to root spans. Defaults to true.
# @param [optional Boolean] apply_to_remote_parent Whether to apply
# probability sampling to remote parent. Defaults to true.
# @param [optional Boolean] apply_to_all_spans Whether to apply
# probability sampling to all spans. Defaults to false.
# @param [optional Symbol] apply_probability_to Whether to apply
# probability sampling to root spans, root spans and remote parents,
# or all spans. Allowed values include :root_spans, :root_spans_and_remote_parent,
# and :all_spans. Defaults to :root_spans_and_remote_parent.
# @raise [ArgumentError] if probability is out of range
# @raise [ArgumentError] if ignore_hints contains invalid hints
# @raise [ArgumentError] unless apply_to_all_spans implies apply_to_root_spans
# and apply_to_remote_parent
# @return [OpenTelemetry::Trace::Samplers::Sampler]
# @raise [ArgumentError] if apply_probability_to is not one of the allowed symbols
def self.probability(probability,
ignore_hints: [OpenTelemetry::Trace::SamplingHint::RECORD],
ignore_parent: false,
apply_to_root_spans: true,
apply_to_remote_parent: true,
apply_to_all_spans: false)
apply_probability_to: :root_spans_and_remote_parent)
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The collection of boolean flags was confusing, interdependent, incorrectly implemented, and didn't capture either the spirit or intent of the OTEP. Since we're in Ruby, we can have Symbols, so this uses a single Symbol argument to distinguish the 3 supported cases.

raise ArgumentError, 'probability must be in range [0.0, 1.0]' unless (0.0..1.0).include?(probability)
raise ArgumentError, 'ignore_hints' unless (ignore_hints.to_a - SAMPLING_HINTS).empty?
raise ArgumentError if apply_to_all_spans && (!apply_to_root_spans || !apply_to_remote_parent)
raise ArgumentError, 'apply_probability_to' unless APPLY_PROBABILITY_TO_SYMBOLS.include?(apply_probability_to)

ProbabilitySampler.new(probability,
hints: SAMPLING_HINTS - ignore_hints.to_a,
ignore_hints: ignore_hints.to_a,
ignore_parent: ignore_parent,
apply_to_root_spans: apply_to_root_spans,
apply_to_remote_parent: apply_to_remote_parent,
apply_to_all_spans: apply_to_all_spans)
apply_to_remote_parent: apply_probability_to != :root_spans,
apply_to_all_spans: apply_probability_to == :all_spans)
end
# rubocop:enable Lint/UnusedBlockArgument
end
end
end
Expand Down
69 changes: 32 additions & 37 deletions sdk/lib/opentelemetry/sdk/trace/samplers/probability_sampler.rb
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,16 @@ module Samplers
#
# Implements sampling based on a probability.
class ProbabilitySampler
def initialize(probability, hints:, ignore_parent:, apply_to_root_spans:, apply_to_remote_parent:, apply_to_all_spans:)
HINT_RECORD_AND_PROPAGATE = OpenTelemetry::Trace::SamplingHint::RECORD_AND_PROPAGATE
HINT_RECORD = OpenTelemetry::Trace::SamplingHint::RECORD

private_constant(:HINT_RECORD_AND_PROPAGATE, :HINT_RECORD)

def initialize(probability, ignore_hints:, ignore_parent:, apply_to_remote_parent:, apply_to_all_spans:)
@probability = probability
@id_upper_bound = format('%016x', (probability * (2**64 - 1)).ceil)
@result_from_hint = hints.map { |hint| [hint, Result.new(decision: hint)] }.to_h.freeze
@ignore_parent = ignore_parent
@apply_to_root_spans = apply_to_root_spans
@ignored_hints = ignore_hints
@use_parent_sampled_flag = !ignore_parent
@apply_to_remote_parent = apply_to_remote_parent
@apply_to_all_spans = apply_to_all_spans
end
Expand All @@ -26,51 +30,42 @@ def initialize(probability, hints:, ignore_parent:, apply_to_root_spans:, apply_
#
# Callable interface for probability sampler. See {Samplers}.
def call(trace_id:, span_id:, parent_context:, hint:, links:, name:, kind:, attributes:)
take_hint(hint) ||
use_parent_sampling(parent_context) ||
use_link_sampling(links) ||
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The OTEP talks about links, but the decision table for ProbabilitySampler doesn't use them.

maybe_dont_apply(parent_context) ||
use_probability_sampling(trace_id) ||
NOT_RECORD
end
# Ignored for sampling decision: links, name, kind, attributes.

private
hint = nil if @ignored_hints.include?(hint)

# Take the hint if one is provided and we're not ignoring it.
def take_hint(hint)
@result_from_hint[hint] if hint
end
sampled_flag = sample?(hint, trace_id, parent_context)
record_events = hint == HINT_RECORD || sampled_flag

# If the parent is sampled and we're not ignoring it keep the sampling decision.
def use_parent_sampling(parent_context)
RECORD_AND_PROPAGATE if !@ignore_parent && parent_context&.trace_flags&.sampled?
end

# If any link is sampled keep the sampling decision.
def use_link_sampling(links)
RECORD_AND_PROPAGATE if links&.any? { |link| link.context.trace_flags.sampled? }
if sampled_flag && record_events
RECORD_AND_PROPAGATE
elsif record_events
RECORD
else
NOT_RECORD
end
end

def maybe_dont_apply(parent_context)
dont_apply_to_root_span(parent_context) ||
dont_apply_to_remote_parent(parent_context) ||
dont_apply_to_local_child(parent_context)
end
private

def dont_apply_to_root_span(parent_context)
NOT_RECORD if !@apply_to_root_spans && parent_context.nil?
def sample?(hint, trace_id, parent_context)
if parent_context.nil?
hint == HINT_RECORD_AND_PROPAGATE || sample_trace_id?(trace_id)
else
parent_sampled?(parent_context) || hint == HINT_RECORD_AND_PROPAGATE || sample_trace_id_for_child?(parent_context, trace_id)
end
end

def dont_apply_to_remote_parent(parent_context)
NOT_RECORD if !@apply_to_remote_parent && parent_context&.remote?
def parent_sampled?(parent_context)
@use_parent_sampled_flag && parent_context.trace_flags.sampled?
end

def dont_apply_to_local_child(parent_context)
NOT_RECORD if !@apply_to_all_spans && parent_context && !parent_context.remote?
def sample_trace_id_for_child?(parent_context, trace_id)
(@apply_to_all_spans || (@apply_to_remote_parent && parent_context.remote?)) && sample_trace_id?(trace_id)
end

def use_probability_sampling(trace_id)
RECORD_AND_PROPAGATE if @probability == 1.0 || trace_id[16, 16] < @id_upper_bound
def sample_trace_id?(trace_id)
@probability == 1.0 || trace_id[16, 16] < @id_upper_bound
end
end
end
Expand Down
57 changes: 34 additions & 23 deletions sdk/test/opentelemetry/sdk/trace/samplers_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -60,12 +60,6 @@
result.wont_be :sampled?
end

it 'respects link sampling' do
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ProbabilitySampler doesn't respect Link sampling.

link = OpenTelemetry::Trace::Link.new(context)
result = call_sampler(sampler, links: [link])
result.must_be :sampled?
end

it 'returns a result' do
result = call_sampler(sampler, trace_id: trace_id(123))
result.must_be_instance_of(Result)
Expand All @@ -83,33 +77,51 @@
result.wont_be :sampled?
end

it 'does not sample a root span unless apply_to_root_spans' do
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is not a supported case for ProbabilitySampler. The probability applies to "root spans", "root spans and remote parent", or "all spans".

sampler = Samplers.probability(1, apply_to_root_spans: false)
result = call_sampler(sampler, parent_context: nil)
it 'does not sample a remote parent if apply_probability_to == :root_spans' do
context = OpenTelemetry::Trace::SpanContext.new(
trace_flags: OpenTelemetry::Trace::TraceFlags.from_byte(0),
remote: true
)
sampler = Samplers.probability(1, apply_probability_to: :root_spans)
result = call_sampler(sampler, parent_context: context)
result.wont_be :sampled?
end

it 'does not sample a remote parent unless apply_to_remote_parent' do
it 'samples a local child span if apply_probability_to == :all_spans' do
sampler = Samplers.probability(1, apply_probability_to: :all_spans)
result = call_sampler(sampler, parent_context: context, trace_id: trace_id(1))
result.must_be :sampled?
end

it 'samples a remote parent if apply_probability_to == :root_spans_and_remote_parent' do
context = OpenTelemetry::Trace::SpanContext.new(
trace_flags: OpenTelemetry::Trace::TraceFlags.from_byte(0),
remote: true
)
sampler = Samplers.probability(1, apply_to_remote_parent: false)
sampler = Samplers.probability(1, apply_probability_to: :root_spans_and_remote_parent)
result = call_sampler(sampler, parent_context: context)
result.must_be :sampled?
end

it 'does not sample a local child span if apply_probability_to == :root_spans_and_remote_parent' do
context = OpenTelemetry::Trace::SpanContext.new(trace_flags: OpenTelemetry::Trace::TraceFlags.from_byte(0))
sampler = Samplers.probability(1, apply_probability_to: :root_spans_and_remote_parent)
result = call_sampler(sampler, parent_context: context, trace_id: trace_id(1))
result.wont_be :sampled?
end

it 'samples a local child span if apply_to_all_spans' do
sampler = Samplers.probability(1, apply_to_all_spans: true)
it 'does not sample a local child span if apply_probability_to == :root_spans' do
context = OpenTelemetry::Trace::SpanContext.new(trace_flags: OpenTelemetry::Trace::TraceFlags.from_byte(0))
sampler = Samplers.probability(1, apply_probability_to: :root_spans)
result = call_sampler(sampler, parent_context: context, trace_id: trace_id(1))
result.must_be :sampled?
result.wont_be :sampled?
end

it 'returns result with hint if supplied' do
sampler = Samplers.probability(1, ignore_hints: nil)
not_record_result = call_sampler(sampler, hint: Decision::NOT_RECORD)
record_result = call_sampler(sampler, hint: Decision::RECORD)
record_and_propagate_result = call_sampler(sampler, hint: Decision::RECORD_AND_PROPAGATE)
sampler = Samplers.probability(0, ignore_hints: nil)
not_record_result = call_sampler(sampler, hint: OpenTelemetry::Trace::SamplingHint::NOT_RECORD)
record_result = call_sampler(sampler, hint: OpenTelemetry::Trace::SamplingHint::RECORD)
record_and_propagate_result = call_sampler(sampler, hint: OpenTelemetry::Trace::SamplingHint::RECORD_AND_PROPAGATE)
not_record_result.wont_be :sampled?
not_record_result.wont_be :record_events?
record_result.wont_be :sampled?
Expand All @@ -122,9 +134,8 @@
proc { Samplers.probability(1, ignore_hints: [:hint]) }.must_raise(ArgumentError)
end

it 'apply_to_all_spans implies apply_to_root_spans and apply_to_remote_parent' do
proc { Samplers.probability(1, apply_to_root_spans: false, apply_to_all_spans: true) }.must_raise(ArgumentError)
proc { Samplers.probability(1, apply_to_remote_parent: false, apply_to_all_spans: true) }.must_raise(ArgumentError)
it 'does not allow invalid symbols in apply_probability_to' do
proc { Samplers.probability(1, apply_probability_to: :foo) }.must_raise(ArgumentError)
end

it 'samples the smallest probability larger than the smallest trace_id' do
Expand Down Expand Up @@ -162,8 +173,8 @@ def trace_id(id)

def call_sampler(sampler, trace_id: nil, span_id: nil, parent_context: nil, hint: nil, links: nil, name: nil, kind: nil, attributes: nil)
sampler.call(
trace_id: trace_id,
span_id: span_id,
trace_id: trace_id || OpenTelemetry::Trace.generate_trace_id,
span_id: span_id || OpenTelemetry::Trace.generate_span_id,
parent_context: parent_context,
hint: hint,
links: links,
Expand Down