From ab8e116415bbebc0f6bdf9ac63a9083366ad3466 Mon Sep 17 00:00:00 2001 From: Nate Mara Date: Thu, 3 Dec 2020 14:11:57 -0800 Subject: [PATCH 1/2] Support trace-level sampling The libhoney-rust library has a `sample_rate` configuration option, which provides random sampling of events sent to honeycomb. Unfortunately, this sampling is done at the event level, rather than the trace level. This means that spans or events belonging to a single trace will have separate sampling decisions made for them. This in turn means that when libhoney's sampling is enabled, traces will never have complete span information within them. This severely limits the utility of this sampling. This proposed change introduces sampling based on the trace ID, meaning that spans and events which are a part of the same trace will always have the same sampling decision made for them. This will ensure that a complete picture of the trace is sent up to honeycomb. There are no breaking changes in behavior or API in this changeset, you can still use the sampling logic built into libhoney if desired, but the new function `new_honeycomb_telemetry_layer_with_trace_sampling` has been introduced, which creates a telemetry layer that uses this new sampling logic. Alternatives: - Use the `sample_rate` from the `libhoney` config struct as the trace-level sample rate, and reset the field to 1 before giving it to libhoney. This is undesirable as it makes the current behavior impossible to use, however it also makes it impossible to misuse this new function by providing a sample_rate to this function, and libhoney at the same time. - Perform this trace-level sampling within `libhoney`. This is undesirable as it would require either parsing the traceID from the string provided to libhoney, or using a hash of this string. It is simpler to do the calculation in this library, where we have the raw trace ID. This would also be a breaking change for libhoney, whereas here it can be opt-in. --- tracing-honeycomb/Cargo.toml | 4 ++-- tracing-honeycomb/src/honeycomb.rs | 25 ++++++++++++++++------ tracing-honeycomb/src/lib.rs | 33 +++++++++++++++++++++++++++++- 3 files changed, 53 insertions(+), 9 deletions(-) diff --git a/tracing-honeycomb/Cargo.toml b/tracing-honeycomb/Cargo.toml index 9ea794344..1dd700556 100644 --- a/tracing-honeycomb/Cargo.toml +++ b/tracing-honeycomb/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "tracing-honeycomb" -version = "0.2.0" +version = "0.2.1" authors = ["Inanna Malick "] edition = "2018" description = "Honeycomb.io tracing layer for multiprocess telemetry" @@ -13,7 +13,7 @@ readme = "README.md" [dependencies] tracing = "0.1.12" tracing-core = "0.1.9" -tracing-distributed = { path = "../tracing-distributed" , version = "0.2.0"} +tracing-distributed = { path = "../tracing-distributed", version = "0.2.0" } libhoney-rust = "0.1.3" rand = "0.7" chrono = "0.4.9" diff --git a/tracing-honeycomb/src/honeycomb.rs b/tracing-honeycomb/src/honeycomb.rs index f6a20013e..16f4a9eea 100644 --- a/tracing-honeycomb/src/honeycomb.rs +++ b/tracing-honeycomb/src/honeycomb.rs @@ -9,17 +9,18 @@ use tracing_distributed::{Event, Span, Telemetry}; #[derive(Debug)] pub struct HoneycombTelemetry { honeycomb_client: Mutex>, + sample_rate: usize, } impl HoneycombTelemetry { - pub(crate) fn new(cfg: libhoney::Config) -> Self { + pub(crate) fn new(cfg: libhoney::Config, sample_rate: usize) -> Self { let honeycomb_client = libhoney::init(cfg); // publishing requires &mut so just mutex-wrap it // FIXME: may not be performant, investigate options (eg mpsc) let honeycomb_client = Mutex::new(honeycomb_client); - HoneycombTelemetry { honeycomb_client } + HoneycombTelemetry { honeycomb_client, sample_rate } } fn report_data(&self, data: HashMap) { @@ -34,6 +35,14 @@ impl HoneycombTelemetry { eprintln!("error sending event to honeycomb, {:?}", err); } } + + fn should_report(&self, trace_id: TraceId) -> bool { + if self.sample_rate <= 1 { + return true; + } + + trace_id.0 as usize % self.sample_rate == 0 + } } impl Telemetry for HoneycombTelemetry { @@ -46,13 +55,17 @@ impl Telemetry for HoneycombTelemetry { } fn report_span(&self, span: Span) { - let data = span_to_values(span); - self.report_data(data); + if self.should_report(span.trace_id) { + let data = span_to_values(span); + self.report_data(data); + } } fn report_event(&self, event: Event) { - let data = event_to_values(event); - self.report_data(data); + if self.should_report(event.trace_id) { + let data = event_to_values(event); + self.report_data(data); + } } } diff --git a/tracing-honeycomb/src/lib.rs b/tracing-honeycomb/src/lib.rs index 28a178cbf..5ae24b1b8 100644 --- a/tracing-honeycomb/src/lib.rs +++ b/tracing-honeycomb/src/lib.rs @@ -66,7 +66,38 @@ pub fn new_honeycomb_telemetry_layer( let instance_id: u64 = rand::thread_rng().gen(); TelemetryLayer::new( service_name, - HoneycombTelemetry::new(honeycomb_config), + HoneycombTelemetry::new(honeycomb_config, 1), + move |tracing_id| SpanId { + instance_id, + tracing_id, + }, + ) +} + +/// Construct a TelemetryLayer that publishes telemetry to honeycomb.io using the +/// provided honeycomb config, and sample rate. This function differs from +/// `new_honeycomb_telemetry_layer` and the `sample_rate` on the +/// `libhoney::Config` there in an important way. `libhoney` samples `Event` +/// data, which is individual spans on each trace. This means that using the +/// sampling logic in libhoney may result in missing event data or incomplete +/// traces. Calling this function provides trace-level sampling, meaning sampling +/// decisions are based on a modulo of the traceID, and events in a single trace +/// will not be sampled differently. If the trace is sampled, then all spans +/// under it will be sent to honeycomb. If a trace is not sampled, no spans or +/// events under it will be sent. When using this trace-level sampling, the +/// `sample_rate` parameter on the `libhoney::Config` should be set to 1, which +/// is the default. +/// +/// Specialized to the honeycomb.io-specific SpanId and TraceId provided by this crate. +pub fn new_honeycomb_telemetry_layer_with_trace_sampling( + service_name: &'static str, + honeycomb_config: libhoney::Config, + sample_rate: usize, +) -> TelemetryLayer { + let instance_id: u64 = rand::thread_rng().gen(); + TelemetryLayer::new( + service_name, + HoneycombTelemetry::new(honeycomb_config, sample_rate), move |tracing_id| SpanId { instance_id, tracing_id, From 2fbf6a0109819c72911416a5f2ece6792c50a887 Mon Sep 17 00:00:00 2001 From: Nate Mara Date: Fri, 18 Dec 2020 13:20:28 -0800 Subject: [PATCH 2/2] fixup! Support trace-level sampling --- tracing-honeycomb/src/honeycomb.rs | 11 +++++------ tracing-honeycomb/src/lib.rs | 6 +++--- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/tracing-honeycomb/src/honeycomb.rs b/tracing-honeycomb/src/honeycomb.rs index 16f4a9eea..b034dff68 100644 --- a/tracing-honeycomb/src/honeycomb.rs +++ b/tracing-honeycomb/src/honeycomb.rs @@ -9,11 +9,11 @@ use tracing_distributed::{Event, Span, Telemetry}; #[derive(Debug)] pub struct HoneycombTelemetry { honeycomb_client: Mutex>, - sample_rate: usize, + sample_rate: Option, } impl HoneycombTelemetry { - pub(crate) fn new(cfg: libhoney::Config, sample_rate: usize) -> Self { + pub(crate) fn new(cfg: libhoney::Config, sample_rate: Option) -> Self { let honeycomb_client = libhoney::init(cfg); // publishing requires &mut so just mutex-wrap it @@ -37,11 +37,10 @@ impl HoneycombTelemetry { } fn should_report(&self, trace_id: TraceId) -> bool { - if self.sample_rate <= 1 { - return true; + match self.sample_rate { + Some(sample_rate) => trace_id.0 % sample_rate == 0, + None => true, } - - trace_id.0 as usize % self.sample_rate == 0 } } diff --git a/tracing-honeycomb/src/lib.rs b/tracing-honeycomb/src/lib.rs index 5ae24b1b8..77d11c04c 100644 --- a/tracing-honeycomb/src/lib.rs +++ b/tracing-honeycomb/src/lib.rs @@ -66,7 +66,7 @@ pub fn new_honeycomb_telemetry_layer( let instance_id: u64 = rand::thread_rng().gen(); TelemetryLayer::new( service_name, - HoneycombTelemetry::new(honeycomb_config, 1), + HoneycombTelemetry::new(honeycomb_config, None), move |tracing_id| SpanId { instance_id, tracing_id, @@ -92,12 +92,12 @@ pub fn new_honeycomb_telemetry_layer( pub fn new_honeycomb_telemetry_layer_with_trace_sampling( service_name: &'static str, honeycomb_config: libhoney::Config, - sample_rate: usize, + sample_rate: u128, ) -> TelemetryLayer { let instance_id: u64 = rand::thread_rng().gen(); TelemetryLayer::new( service_name, - HoneycombTelemetry::new(honeycomb_config, sample_rate), + HoneycombTelemetry::new(honeycomb_config, Some(sample_rate)), move |tracing_id| SpanId { instance_id, tracing_id,