Skip to content

Commit

Permalink
propagate "decision maker" in X-Datadog-Tags (#230)
Browse files Browse the repository at this point in the history
* remove all remnants of UpstreamService

* undo non-test related removals from #218

* fix bug

* add additional "sample" test

* add "span" tests

* clang-format

* allow configuration of trace_tags_propagation_max_length in dynamic load JSON

* add "decision maker" tag to nginx integration tests

* document DD_TRACE_TAGS_PROPAGATION_MAX_LENGTH

* clang-format (again)

* no UpstreamService talk

* remove base64 (was used only by UpstreamService)

* max_size -> inject_max_size

* trace_tags_propagation_max_length -> tags_header_size
  • Loading branch information
dgoffredo authored Jul 1, 2022
1 parent c5308db commit 9b9248f
Show file tree
Hide file tree
Showing 25 changed files with 484 additions and 420 deletions.
4 changes: 0 additions & 4 deletions BUILD.bazel
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
cc_library(
name = "dd_opentracing_cpp",
srcs = [
"src/base64_rfc4648.cpp",
"src/base64_rfc4648.h",
"src/bool.cpp",
"src/bool.h",
"src/clock.h",
Expand Down Expand Up @@ -38,8 +36,6 @@ cc_library(
"src/tracer.h",
"src/tracer_options.cpp",
"src/tracer_options.h",
"src/upstream_service.cpp",
"src/upstream_service.h",
"src/version.cpp",
"src/writer.cpp",
"src/writer.h",
Expand Down
14 changes: 14 additions & 0 deletions doc/configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -244,6 +244,20 @@ For more information about the configuration of trace sampling, see
- **Environment variable**: `DD_TRACE_RATE_LIMIT`
- **Default value**: `100`

### Trace Tags Propagation Max Length
Certain information, such as the _reason_ for a sampling decision having been
made, is propagated between services along the trace in the form of the
`X-Datadog-Tags` HTTP request header.

`X-Datadog-Tags`'s length is limited to a certain maximum number of bytes in
order to prevent rejection by peers or other HTTP header policies. This
configuration option is that limit, in bytes.

- **TracerOptions member**: `uint64_t tags_header_size`
- **JSON property**: `tags_header_size` _(number)_
- **Environment variable**: `DD_TRACE_TAGS_PROPAGATION_MAX_LENGTH`
- **Default value**: `512`

[1]: /include/datadog/opentracing.h
[2]: https://docs.datadoghq.com/tracing/setup_overview/proxy_setup/?tab=nginx#nginx-configuration
[3]: https://docs.datadoghq.com/tracing/setup_overview/setup/cpp/?tab=containers#dynamic-loading
Expand Down
8 changes: 8 additions & 0 deletions include/datadog/opentracing.h
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,14 @@ struct TracerOptions {
// the implicit "catch-all" rule appended to `sampling_rules`. This option
// is also configurable as the environment variable DD_TRACE_RATE_LIMIT.
double sampling_limit_per_second = 100;
// Some tags are associated with an entire trace, rather than with a
// particular span in the trace. Some of these trace-wide tags are
// propagated between services. The tags are injected into a carrier (e.g.
// an HTTP header) in a particular format.
// `tags_header_size` is the maximum length of the
// serialized tags allowed. Trace-wide tags whose serialized length exceeds
// this limit are not propagated.
uint64_t tags_header_size = 512;
};

// TraceEncoder exposes the data required to encode and submit traces to the
Expand Down
71 changes: 0 additions & 71 deletions src/base64_rfc4648.cpp

This file was deleted.

20 changes: 0 additions & 20 deletions src/base64_rfc4648.h

This file was deleted.

40 changes: 25 additions & 15 deletions src/pending_trace.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ namespace {
const std::string sampling_priority_metric = "_sampling_priority_v1";
const std::string datadog_origin_tag = "_dd.origin";
const std::string datadog_hostname_tag = "_dd.hostname";
const std::string datadog_upstream_services_tag = "_dd.p.upstream_services";
const std::string datadog_decision_maker_tag = "_dd.p.dm";
const std::string datadog_propagation_error_tag = "_dd.propagation_error";
const std::string event_sample_rate_metric = "_dd1.sr.eausr";
const std::string rules_sampler_applied_rate = "_dd.rule_psr";
Expand Down Expand Up @@ -62,6 +62,11 @@ void finish_root_span(PendingTrace& trace, SpanData& span) {
if (!std::isnan(trace.sample_result.priority_rate)) {
span.metrics[priority_sampler_applied_rate] = trace.sample_result.priority_rate;
}
trace.applySamplingDecisionToTraceTags();
span.meta.insert(trace.trace_tags.begin(), trace.trace_tags.end());
if (!trace.propagation_error.empty()) {
span.meta[datadog_propagation_error_tag] = trace.propagation_error;
}
// Forward to the finisher that applies to all spans (not just root spans).
finish_span(trace, span);
}
Expand Down Expand Up @@ -94,16 +99,14 @@ void PendingTrace::finish() {
}
}

void PendingTrace::applySamplingDecisionToUpstreamServices() {
if (applied_sampling_decision_to_upstream_services || sampling_decision_extracted ||
sampling_priority == nullptr) {
// We did not make the sampling decision, or we've already done this.
void PendingTrace::applySamplingDecisionToTraceTags() {
if (sampling_decision_extracted || sampling_priority == nullptr) {
// We did not make the sampling decision.
return;
}

// In unit tests, we sometimes don't have a service name. In those cases,
// omit our `UpstreamService` entry (those tests are not looking for the
// corresponding tag).
// omit the tag (those tests are not looking for the corresponding tag).
if (service.empty()) {
return;
}
Expand All @@ -113,14 +116,21 @@ void PendingTrace::applySamplingDecisionToUpstreamServices() {
// have set a corresponding sampling mechanism.
assert(sample_result.sampling_mechanism != nullptr);

UpstreamService this_service;
this_service.service_name = service;
this_service.sampling_priority = *sampling_priority;
this_service.sampling_mechanism = int(sample_result.sampling_mechanism.get<SamplingMechanism>());
this_service.sampling_rate = sample_result.applied_rate;

appendUpstreamService(trace_tags[upstream_services_tag], this_service);
applied_sampling_decision_to_upstream_services = true;
// The "decision maker" is formatted as:
//
// <maybe someday service name hashed> "-" <sampling mechanism>
//
// So for now it's just
//
// "-" <sampling mechanism>
//
// e.g.
//
// -4
//
// That's a separating hyphen, not a minus sign.
const int mechanism = int(sample_result.sampling_mechanism.get<SamplingMechanism>());
trace_tags[datadog_decision_maker_tag] = "-" + std::to_string(mechanism);
}

} // namespace opentracing
Expand Down
20 changes: 4 additions & 16 deletions src/pending_trace.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,9 @@ struct PendingTrace {

void finish();
// If this tracer did not inherit a sampling decision from an upstream
// service, but instead made a sampling decision, then append an
// `UpstreamService` record to the "_dd.p.upstream_services" member of
// `trace_tags`. Note that this function is idempotent.
// Note that this function is not currently used.
void applySamplingDecisionToUpstreamServices();
// service, but instead made a sampling decision, then record that decision
// in the "_dd.p.dm" member of `trace_tags`.
void applySamplingDecisionToTraceTags();

std::shared_ptr<const Logger> logger;
uint64_t trace_id;
Expand All @@ -49,9 +47,7 @@ struct PendingTrace {
// `finish_root_span` in `span_buffer.cpp`). In addition to those tags,
// `trace_tags` are similarly added.
// `trace_tags` originate from extracted trace context (`SpanContext`). Some
// trace tags require special handling, e.g. "_dd.p.upstream_services".
// Note that trace tags are currently disabled. These data structures remain
// for possible future use.
// trace tags require special handling, e.g. "_dd.p.dm".
std::unordered_map<std::string, std::string> trace_tags;
// `service` is the name of the service associated with this trace. If the
// service name changes (such as by calling `Span::setServiceName`), then
Expand All @@ -62,20 +58,12 @@ struct PendingTrace {
// tag will be set on the local root span to the value of
// `propagation_error`. If no error occurs, then `propagation_error` will be
// empty and the "_dd.propagation_error" tag will not be added.
// Note that since trace tag propagation is disabled, `propagation_error` is
// always empty; it remains here for possible future use.
std::string propagation_error;
// `sampling_decision_extracted` is whether `sampling_priority` was
// determined by a decision within this tracer (`true`), or inherited from an
// upstream service when span context was extracted (`false`), or has not yet
// been decided (`false`).
bool sampling_decision_extracted = false;
// `applied_sampling_decision_to_upstream_services` is whether the function
// `applySamplingDecisionToUpstreamServices` has done its work. Note that
// since trace tag propagation is disabled,
// `applied_sampling_decision_to_upstream_services` is always `false`; it
// remains here for possible future use.
bool applied_sampling_decision_to_upstream_services = false;
};

} // namespace opentracing
Expand Down
8 changes: 3 additions & 5 deletions src/span_buffer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -190,16 +190,14 @@ std::string SpanBuffer::serializeTraceTags(uint64_t trace_id) {

auto& trace = trace_found->second;

trace.applySamplingDecisionToUpstreamServices();
trace.applySamplingDecisionToTraceTags();
for (const auto& entry : trace.trace_tags) {
appendTag(result, entry.first, entry.second);
}

// This feature has been removed, so this is dead code.
// const auto configured_max = options_.trace_tags_propagation_max_length;
const uint64_t configured_max = 512;
const auto configured_max = options_.tags_header_size;
if (result.size() > configured_max) {
trace.propagation_error = "max_size";
trace.propagation_error = "inject_max_size";
std::ostringstream message;
message
<< "Serialized trace tags are too large for propagation. Configured maximum length is "
Expand Down
6 changes: 2 additions & 4 deletions src/span_buffer.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,8 @@ struct SpanBufferOptions {
std::string hostname;
double analytics_rate = std::nan("");
std::string service;
// The corresponding field in `TracerOptions` has been removed along with the
// corresponding feature.
uint64_t trace_tags_propagation_max_length;
// See the corresponding field in `TracerOptions`.
uint64_t tags_header_size;
};

// Keeps track of Spans until there is a complete trace, and sends completed
Expand Down Expand Up @@ -71,7 +70,6 @@ class SpanBuffer {
// that an error occurred. If an encoding error occurs, a corresponding
// `_dd.propagation_error` tag value will be added to the relevant trace's
// local root span.
// Note that this function is not currently used.
std::string serializeTraceTags(uint64_t trace_id);

// Change the name of the service associated with the trace having the
Expand Down
26 changes: 25 additions & 1 deletion src/span_context.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@ struct HeadersImpl {
const char *span_id_header;
const char *sampling_priority_header;
const char *origin_header;
// Certain tags that are associated with the entire trace are propagated.
// See `tag_propagation.h`.
const char *tags_header;
const int base;
std::string (*encode_id)(uint64_t);
std::string (*encode_sampling_priority)(SamplingPriority);
Expand Down Expand Up @@ -54,6 +57,7 @@ constexpr struct {
"x-datadog-parent-id",
"x-datadog-sampling-priority",
"x-datadog-origin",
"x-datadog-tags",
10,
std::to_string,
to_string};
Expand All @@ -62,6 +66,7 @@ constexpr struct {
"X-B3-SpanId",
"X-B3-Sampled",
"x-datadog-origin",
"x-datadog-tags",
16,
asHex,
clampB3SamplingPriorityValue};
Expand Down Expand Up @@ -140,6 +145,7 @@ std::vector<ot::string_view> getPropagationHeaderNames(const std::set<Propagatio
headers.push_back(propagation_headers[style].sampling_priority_header);
headers.push_back(propagation_headers[style].origin_header);
}
headers.push_back(propagation_headers[style].tags_header);
}
return headers;
}
Expand Down Expand Up @@ -319,6 +325,10 @@ ot::expected<void> SpanContext::serialize(std::ostream &writer,
j[json_origin_key] = origin_;
}
}
std::string tags = pending_traces->serializeTraceTags(trace_id_);
if (!tags.empty()) {
j[json_tags_key] = std::move(tags);
}
j[json_baggage_key] = baggage_;

writer << j.dump();
Expand Down Expand Up @@ -387,6 +397,14 @@ ot::expected<void> SpanContext::serialize(const ot::TextMapWriter &writer,
}
}

const std::string tags = pending_traces->serializeTraceTags(trace_id_);
if (!tags.empty()) {
result = writer.Set(headers_impl.tags_header, tags);
}
if (!result) {
return result;
}

for (auto baggage_item : baggage_) {
std::string key = std::string(baggage_prefix) + baggage_item.first;
result = writer.Set(key, baggage_item.second);
Expand Down Expand Up @@ -549,13 +567,19 @@ ot::expected<std::unique_ptr<ot::SpanContext>> SpanContext::deserialize(
} else if (has_prefix(key, baggage_prefix)) {
baggage.emplace(std::string{std::begin(key) + baggage_prefix.size(), std::end(key)},
value);
} else if (equals_ignore_case(key, headers_impl.tags_header)) {
trace_tags = deserializeTags(value);
}
} catch (const std::logic_error &error) {
std::ostringstream message;
message << "Error decoding context key " << json_quote(key) << " with value "
<< json_quote(value) << ": " << error.what();
logger->Log(LogLevel::error, message.str());
return ot::make_unexpected(ot::span_context_corrupted_error);
// Tolerate failure to parse `tags_header`, but not e.g.
// `trace_id_header`.
if (!equals_ignore_case(key, headers_impl.tags_header)) {
return ot::make_unexpected(ot::span_context_corrupted_error);
}
}
return {};
});
Expand Down
Loading

0 comments on commit 9b9248f

Please sign in to comment.