-
Notifications
You must be signed in to change notification settings - Fork 272
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
small performance improvements for telemetry (#3656)
* check span names from a HashSet instead of iterating through an array of strings * move the SpanMetricsExporter to a tracing Layer (#3669) SpanMetricsExporter is an OpenTelemetry Exporter, which means that if it is loaded, then the entire OpenTelemetry handling infrastructure is loaded, and especially the part that allocates data for the entire list of spans. Unfortunately, the SpanMetricsExporter was always loaded, even if we do not export the metrics outside of the router, which means that there's a constant overhead of telemetry even when it is not used. This moves the SpanMetricsExporter to a lightweight tracing-subscriber Layer which performs the same busy/idle accounting as OpenTelemetryLayer, then generates the same events as before when the span closes
- Loading branch information
Showing
4 changed files
with
142 additions
and
73 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
### small performance improvements for telemetry ([PR #3656](https://github.com/apollographql/router/pull/3656)) | ||
|
||
The SpanMetricsExporter, used to report span timings hade a few inefficiencies in the way it recognized spans, and it brought a constant overhead to the router usage, even when telemetry was not configured. It has now been isolated and optimized | ||
|
||
By [@Geal](https://github.com/Geal) in https://github.com/apollographql/router/pull/3656 |
191 changes: 125 additions & 66 deletions
191
apollo-router/src/plugins/telemetry/metrics/span_metrics_exporter.rs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,81 +1,140 @@ | ||
use async_trait::async_trait; | ||
use futures::future::BoxFuture; | ||
use futures::FutureExt; | ||
use opentelemetry::sdk::export::trace::ExportResult; | ||
use opentelemetry::sdk::export::trace::SpanData; | ||
use opentelemetry::sdk::export::trace::SpanExporter; | ||
use opentelemetry::Key; | ||
use opentelemetry::Value; | ||
use std::collections::HashSet; | ||
use std::time::Instant; | ||
|
||
use tracing_core::field::Visit; | ||
use tracing_core::span; | ||
use tracing_core::Field; | ||
use tracing_core::Subscriber; | ||
use tracing_subscriber::layer::Context; | ||
use tracing_subscriber::registry::LookupSpan; | ||
use tracing_subscriber::Layer; | ||
|
||
use crate::axum_factory::utils::REQUEST_SPAN_NAME; | ||
use crate::plugins::telemetry::EXECUTION_SPAN_NAME; | ||
use crate::plugins::telemetry::SUBGRAPH_SPAN_NAME; | ||
use crate::plugins::telemetry::SUPERGRAPH_SPAN_NAME; | ||
use crate::services::QUERY_PLANNING_SPAN_NAME; | ||
|
||
const SPAN_NAMES: &[&str] = &[ | ||
REQUEST_SPAN_NAME, | ||
SUPERGRAPH_SPAN_NAME, | ||
SUBGRAPH_SPAN_NAME, | ||
QUERY_PLANNING_SPAN_NAME, | ||
EXECUTION_SPAN_NAME, | ||
]; | ||
|
||
const BUSY_NS_ATTRIBUTE_NAME: Key = Key::from_static_str("busy_ns"); | ||
const IDLE_NS_ATTRIBUTE_NAME: Key = Key::from_static_str("idle_ns"); | ||
const SUBGRAPH_ATTRIBUTE_NAME: Key = Key::from_static_str("apollo.subgraph.name"); | ||
|
||
#[derive(Debug, Default)] | ||
pub(crate) struct Exporter {} | ||
#[async_trait] | ||
impl SpanExporter for Exporter { | ||
/// Export spans metrics to real metrics | ||
fn export(&mut self, batch: Vec<SpanData>) -> BoxFuture<'static, ExportResult> { | ||
for span in batch | ||
.into_iter() | ||
.filter(|s| SPAN_NAMES.contains(&s.name.as_ref())) | ||
{ | ||
let busy = span | ||
.attributes | ||
.get(&BUSY_NS_ATTRIBUTE_NAME) | ||
.and_then(|attr| match attr { | ||
Value::I64(v) => Some(*v), | ||
_ => None, | ||
}) | ||
.unwrap_or_default(); | ||
let idle = span | ||
.attributes | ||
.get(&IDLE_NS_ATTRIBUTE_NAME) | ||
.and_then(|attr| match attr { | ||
Value::I64(v) => Some(*v), | ||
_ => None, | ||
}) | ||
.unwrap_or_default(); | ||
let duration = span | ||
.end_time | ||
.duration_since(span.start_time) | ||
.unwrap_or_default() | ||
.as_secs_f64(); | ||
const SUBGRAPH_ATTRIBUTE_NAME: &str = "apollo.subgraph.name"; | ||
|
||
#[derive(Debug)] | ||
pub(crate) struct SpanMetricsLayer { | ||
span_names: HashSet<&'static str>, | ||
} | ||
|
||
impl Default for SpanMetricsLayer { | ||
fn default() -> Self { | ||
Self { | ||
span_names: [ | ||
REQUEST_SPAN_NAME, | ||
SUPERGRAPH_SPAN_NAME, | ||
SUBGRAPH_SPAN_NAME, | ||
QUERY_PLANNING_SPAN_NAME, | ||
EXECUTION_SPAN_NAME, | ||
] | ||
.into(), | ||
} | ||
} | ||
} | ||
|
||
impl<S> Layer<S> for SpanMetricsLayer | ||
where | ||
S: Subscriber + for<'span> LookupSpan<'span>, | ||
{ | ||
fn on_new_span(&self, attrs: &span::Attributes<'_>, id: &span::Id, ctx: Context<'_, S>) { | ||
let span = ctx.span(id).expect("Span not found, this is a bug"); | ||
let mut extensions = span.extensions_mut(); | ||
|
||
let name = attrs.metadata().name(); | ||
if self.span_names.contains(name) && extensions.get_mut::<Timings>().is_none() { | ||
let mut timings = Timings::new(); | ||
if name == SUBGRAPH_SPAN_NAME { | ||
attrs.values().record(&mut ValueVisitor { | ||
timings: &mut timings, | ||
}); | ||
} | ||
extensions.insert(Timings::new()); | ||
} | ||
} | ||
|
||
fn on_record(&self, _span: &span::Id, _values: &span::Record<'_>, _ctx: Context<'_, S>) {} | ||
|
||
fn on_close(&self, id: span::Id, ctx: Context<'_, S>) { | ||
let span = ctx.span(&id).expect("Span not found, this is a bug"); | ||
let mut extensions = span.extensions_mut(); | ||
|
||
if let Some(timings) = extensions.get_mut::<Timings>() { | ||
let duration = timings.start.elapsed().as_secs_f64(); | ||
|
||
// Convert it in seconds | ||
let idle: f64 = idle as f64 / 1_000_000_000_f64; | ||
let busy: f64 = busy as f64 / 1_000_000_000_f64; | ||
if span.name == SUBGRAPH_SPAN_NAME { | ||
let subgraph_name = span | ||
.attributes | ||
.get(&SUBGRAPH_ATTRIBUTE_NAME) | ||
.map(|name| name.as_str()) | ||
.unwrap_or_default(); | ||
::tracing::info!(histogram.apollo_router_span = duration, kind = %"duration", span = %span.name, subgraph = %subgraph_name); | ||
::tracing::info!(histogram.apollo_router_span = idle, kind = %"idle", span = %span.name, subgraph = %subgraph_name); | ||
::tracing::info!(histogram.apollo_router_span = busy, kind = %"busy", span = %span.name, subgraph = %subgraph_name); | ||
let idle: f64 = timings.idle as f64 / 1_000_000_000_f64; | ||
let busy: f64 = timings.busy as f64 / 1_000_000_000_f64; | ||
let name = span.metadata().name(); | ||
if let Some(subgraph_name) = timings.subgraph.take() { | ||
::tracing::info!(histogram.apollo_router_span = duration, kind = %"duration", span = %name, subgraph = %subgraph_name); | ||
::tracing::info!(histogram.apollo_router_span = idle, kind = %"idle", span = %name, subgraph = %subgraph_name); | ||
::tracing::info!(histogram.apollo_router_span = busy, kind = %"busy", span = %name, subgraph = %subgraph_name); | ||
} else { | ||
::tracing::info!(histogram.apollo_router_span = duration, kind = %"duration", span = %span.name); | ||
::tracing::info!(histogram.apollo_router_span = idle, kind = %"idle", span = %span.name); | ||
::tracing::info!(histogram.apollo_router_span = busy, kind = %"busy", span = %span.name); | ||
::tracing::info!(histogram.apollo_router_span = duration, kind = %"duration", span = %name); | ||
::tracing::info!(histogram.apollo_router_span = idle, kind = %"idle", span = %name); | ||
::tracing::info!(histogram.apollo_router_span = busy, kind = %"busy", span = %name); | ||
} | ||
} | ||
} | ||
|
||
async { Ok(()) }.boxed() | ||
fn on_enter(&self, id: &span::Id, ctx: Context<'_, S>) { | ||
let span = ctx.span(id).expect("Span not found, this is a bug"); | ||
let mut extensions = span.extensions_mut(); | ||
|
||
if let Some(timings) = extensions.get_mut::<Timings>() { | ||
let now = Instant::now(); | ||
timings.idle += (now - timings.last).as_nanos() as i64; | ||
timings.last = now; | ||
} | ||
} | ||
|
||
fn on_exit(&self, id: &span::Id, ctx: Context<'_, S>) { | ||
let span = ctx.span(id).expect("Span not found, this is a bug"); | ||
let mut extensions = span.extensions_mut(); | ||
|
||
if let Some(timings) = extensions.get_mut::<Timings>() { | ||
let now = Instant::now(); | ||
timings.busy += (now - timings.last).as_nanos() as i64; | ||
timings.last = now; | ||
} | ||
} | ||
} | ||
|
||
struct Timings { | ||
idle: i64, | ||
busy: i64, | ||
last: Instant, | ||
start: Instant, | ||
subgraph: Option<String>, | ||
} | ||
|
||
impl Timings { | ||
fn new() -> Self { | ||
Self { | ||
idle: 0, | ||
busy: 0, | ||
last: Instant::now(), | ||
start: Instant::now(), | ||
subgraph: None, | ||
} | ||
} | ||
} | ||
|
||
struct ValueVisitor<'a> { | ||
timings: &'a mut Timings, | ||
} | ||
|
||
impl<'a> Visit for ValueVisitor<'a> { | ||
fn record_debug(&mut self, _field: &Field, _value: &dyn std::fmt::Debug) {} | ||
|
||
fn record_str(&mut self, field: &Field, value: &str) { | ||
if field.name() == SUBGRAPH_ATTRIBUTE_NAME { | ||
self.timings.subgraph = Some(value.to_string()); | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters