Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Migrate monotonic counter metrics to u64_counter! #6350

Merged
merged 28 commits into from
Dec 3, 2024
Merged
Show file tree
Hide file tree
Changes from 15 commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
304919a
Use metrics macros in persisted queries layer
goto-bus-stop Nov 25, 2024
30fc35e
Use new metrics macro for counters in subgraph service
goto-bus-stop Nov 25, 2024
148d9c4
Use new metrics macro for counters in graphql::Request
goto-bus-stop Nov 25, 2024
be452c9
Use new metrics macro for counting defer requests and responses
goto-bus-stop Nov 25, 2024
7f744e9
Use new metrics macro for counting uplink fetches
goto-bus-stop Nov 25, 2024
eb3a8f7
Use new metrics macro for counting cache hits
goto-bus-stop Nov 25, 2024
a93408d
Use new metrics macro for counting authentication uses
goto-bus-stop Nov 25, 2024
f3bc20b
Use new metrics macros in timeout layer
goto-bus-stop Nov 27, 2024
9130dbc
Use new metrics macros for reporting graphql error counts
goto-bus-stop Nov 27, 2024
551170f
Use new metrics macros for reporting subscription events
goto-bus-stop Nov 27, 2024
b5497c6
Use new metrics macros for reporting state changes
goto-bus-stop Nov 27, 2024
69e71b1
Use new metrics macros for reporting which telemetry system is enabled
goto-bus-stop Nov 27, 2024
4858d24
Add back the deprecated metrics
goto-bus-stop Nov 27, 2024
fce826c
fmt
goto-bus-stop Nov 27, 2024
3c43714
Simplify plugin_metrics
goto-bus-stop Nov 28, 2024
fd39d9b
Tweak `apollo.router.operations.telemetry` description
goto-bus-stop Nov 28, 2024
0d6e222
docs: list the newly deprecated metrics
goto-bus-stop Nov 28, 2024
01ebdef
Add a description for subgraph SigV4 signing
goto-bus-stop Nov 28, 2024
aca6024
Add apollo.router.operations.authorization description
goto-bus-stop Nov 28, 2024
cdb7940
Tweak `apollo_router_skipped_event_count` description
goto-bus-stop Nov 28, 2024
54b62dd
docs: add migration instructions for metrics deprecated in favour of …
goto-bus-stop Nov 29, 2024
7e02584
Docs-deprecate `apollo.router.graphql_error` metric
goto-bus-stop Nov 29, 2024
35a24bb
Revert "Docs-deprecate `apollo.router.graphql_error` metric"
goto-bus-stop Nov 29, 2024
947cb6d
Update deprecated metrics docs
goto-bus-stop Nov 29, 2024
239e933
Document apollo_require_authentication_failure_count migration
goto-bus-stop Dec 2, 2024
d00ddbc
add changeset
goto-bus-stop Dec 2, 2024
d3a487f
we speak american
goto-bus-stop Dec 2, 2024
6564296
Merge branch 'dev' into renee/ROUTER-297-monotonic-counters
goto-bus-stop Dec 3, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 24 additions & 16 deletions apollo-router/src/cache/storage.rs
Original file line number Diff line number Diff line change
Expand Up @@ -170,10 +170,12 @@ where

match res {
Some(v) => {
tracing::info!(
monotonic_counter.apollo_router_cache_hit_count = 1u64,
kind = %self.caller,
storage = &tracing::field::display(CacheStorageName::Memory),
u64_counter!(
"apollo_router_cache_hit_count",
"Number of cache hits",
1,
kind = self.caller,
storage = CacheStorageName::Memory.to_string()
);
let duration = instant_memory.elapsed().as_secs_f64();
tracing::info!(
Expand All @@ -190,10 +192,12 @@ where
kind = %self.caller,
storage = &tracing::field::display(CacheStorageName::Memory),
);
tracing::info!(
monotonic_counter.apollo_router_cache_miss_count = 1u64,
kind = %self.caller,
storage = &tracing::field::display(CacheStorageName::Memory),
u64_counter!(
"apollo_router_cache_miss_count",
"Number of cache misses",
1,
kind = self.caller,
storage = CacheStorageName::Memory.to_string()
);

let instant_redis = Instant::now();
Expand All @@ -214,10 +218,12 @@ where
Some(v) => {
self.insert_in_memory(key.clone(), v.0.clone()).await;

tracing::info!(
monotonic_counter.apollo_router_cache_hit_count = 1u64,
kind = %self.caller,
storage = &tracing::field::display(CacheStorageName::Redis),
u64_counter!(
"apollo_router_cache_hit_count",
"Number of cache hits",
1,
kind = self.caller,
storage = CacheStorageName::Redis.to_string()
);
let duration = instant_redis.elapsed().as_secs_f64();
tracing::info!(
Expand All @@ -228,10 +234,12 @@ where
Some(v.0)
}
None => {
tracing::info!(
monotonic_counter.apollo_router_cache_miss_count = 1u64,
kind = %self.caller,
storage = &tracing::field::display(CacheStorageName::Redis),
u64_counter!(
"apollo_router_cache_miss_count",
"Number of cache misses",
1,
kind = self.caller,
storage = CacheStorageName::Redis.to_string()
);
let duration = instant_redis.elapsed().as_secs_f64();
tracing::info!(
Expand Down
16 changes: 10 additions & 6 deletions apollo-router/src/graphql/request.rs
Original file line number Diff line number Diff line change
Expand Up @@ -202,9 +202,11 @@ impl Request {
mode = %BatchingMode::BatchHttpLink // Only supported mode right now
);

tracing::info!(
monotonic_counter.apollo.router.operations.batching = 1u64,
mode = %BatchingMode::BatchHttpLink // Only supported mode right now
u64_counter!(
"apollo.router.operations.batching",
"Total requests with batched operations",
1,
mode = BatchingMode::BatchHttpLink.to_string() // Only supported mode right now
);
for entry in value
.as_array()
Expand All @@ -229,9 +231,11 @@ impl Request {
mode = "batch_http_link" // Only supported mode right now
);

tracing::info!(
monotonic_counter.apollo.router.operations.batching = 1u64,
mode = "batch_http_link" // Only supported mode right now
u64_counter!(
"apollo.router.operations.batching",
"Total requests with batched operations",
1,
mode = BatchingMode::BatchHttpLink.to_string() // Only supported mode right now
);
for entry in value
.as_array()
Expand Down
31 changes: 30 additions & 1 deletion apollo-router/src/metrics/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -496,7 +496,9 @@ pub(crate) fn meter_provider() -> AggregateMeterProvider {
}

#[macro_export]
/// Get or create a u64 monotonic counter metric and add a value to it
/// Get or create a `u64` monotonic counter metric and add a value to it.
///
/// Each metric needs a description.
///
/// This macro is a replacement for the telemetry crate's MetricsLayer. We will eventually convert all metrics to use these macros and deprecate the MetricsLayer.
/// The reason for this is that the MetricsLayer has:
Expand All @@ -506,6 +508,33 @@ pub(crate) fn meter_provider() -> AggregateMeterProvider {
/// * Imperfect mapping to metrics API that can only be checked at runtime.
///
/// New metrics should be added using these macros.
///
/// # Examples
/// ```ignore
/// // Count a thing:
/// u64_counter!(
/// "apollo.router.operations.frobbles",
/// "The amount of frobbles we've operated on",
/// 1
/// );
/// // Count a thing with attributes:
/// u64_counter!(
/// "apollo.router.operations.frobbles",
/// "The amount of frobbles we've operated on",
/// 1,
/// frobbles.color = "blue"
/// );
/// // Count a thing with dynamic attributes:
/// let attributes = [
/// opentelemetry::KeyValue::new("frobbles.color".to_string(), "blue".into()),
/// ];
/// u64_counter!(
/// "apollo.router.operations.frobbles",
/// "The amount of frobbles we've operated on",
/// 1,
/// attributes
/// );
/// ```
#[allow(unused_macros)]
macro_rules! u64_counter {
($($name:ident).+, $description:literal, $value: expr, $($attr_key:literal = $attr_value:expr),+) => {
Expand Down
6 changes: 5 additions & 1 deletion apollo-router/src/notification.rs
Original file line number Diff line number Diff line change
Expand Up @@ -510,7 +510,11 @@ where

match Pin::new(&mut this.msg_receiver).poll_next(cx) {
Poll::Ready(Some(Err(BroadcastStreamRecvError::Lagged(_)))) => {
tracing::info!(monotonic_counter.apollo_router_skipped_event_count = 1u64,);
u64_counter!(
"apollo_router_skipped_event_count",
"Amount of times the router missed subscription events",
goto-bus-stop marked this conversation as resolved.
Show resolved Hide resolved
1u64
);
self.poll_next(cx)
}
Poll::Ready(None) => Poll::Ready(None),
Expand Down
35 changes: 19 additions & 16 deletions apollo-router/src/plugins/authentication/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -539,8 +539,6 @@ fn authenticate(
jwks_manager: &JwksManager,
request: router::Request,
) -> ControlFlow<router::Response, router::Request> {
const AUTHENTICATION_KIND: &str = "JWT";

// We are going to do a lot of similar checking so let's define a local function
// to help reduce repetition
fn failure_message(
Expand All @@ -549,17 +547,16 @@ fn authenticate(
status: StatusCode,
) -> ControlFlow<router::Response, router::Request> {
// This is a metric and will not appear in the logs
tracing::info!(
monotonic_counter.apollo_authentication_failure_count = 1u64,
kind = %AUTHENTICATION_KIND
u64_counter!(
"apollo_authentication_failure_count",
"Number of requests with failed JWT authentication (deprecated)",
1,
kind = "JWT"
);
tracing::info!(
monotonic_counter
.apollo
.router
.operations
.authentication
.jwt = 1,
u64_counter!(
"apollo.router.operations.authentication.jwt",
"Number of requests with JWT authentication",
1,
authentication.jwt.failed = true
);
tracing::info!(message = %error, "jwt authentication failure");
Expand Down Expand Up @@ -662,11 +659,17 @@ fn authenticate(
);
}
// This is a metric and will not appear in the logs
tracing::info!(
monotonic_counter.apollo_authentication_success_count = 1u64,
kind = %AUTHENTICATION_KIND
u64_counter!(
"apollo_authentication_success_count",
"Number of requests with successful JWT authentication (deprecated)",
1,
kind = "JWT"
);
u64_counter!(
"apollo.router.operations.jwt",
"Number of requests with JWT authentication",
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm wondering if we should not add authentication.jwt.failed = false in the future (for 2.0) to be consistent with what sigv4 is doing for example

1
);
tracing::info!(monotonic_counter.apollo.router.operations.jwt = 1u64);
return ControlFlow::Continue(request);
}

Expand Down
16 changes: 10 additions & 6 deletions apollo-router/src/plugins/authentication/subgraph.rs
Original file line number Diff line number Diff line change
Expand Up @@ -409,17 +409,21 @@ impl SigningParamsConfig {
}

fn increment_success_counter(subgraph_name: &str) {
tracing::info!(
monotonic_counter.apollo.router.operations.authentication.aws.sigv4 = 1u64,
u64_counter!(
"apollo.router.operations.authentication.aws.sigv4",
"",
goto-bus-stop marked this conversation as resolved.
Show resolved Hide resolved
1,
authentication.aws.sigv4.failed = false,
subgraph.service.name = %subgraph_name,
subgraph.service.name = subgraph_name.to_string()
);
}
fn increment_failure_counter(subgraph_name: &str) {
tracing::info!(
monotonic_counter.apollo.router.operations.authentication.aws.sigv4 = 1u64,
u64_counter!(
"apollo.router.operations.authentication.aws.sigv4",
"",
goto-bus-stop marked this conversation as resolved.
Show resolved Hide resolved
1,
authentication.aws.sigv4.failed = true,
subgraph.service.name = %subgraph_name,
subgraph.service.name = subgraph_name.to_string()
);
}

Expand Down
14 changes: 9 additions & 5 deletions apollo-router/src/plugins/authorization/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -556,8 +556,10 @@ impl Plugin for AuthorizationPlugin {
Ok(ControlFlow::Continue(request))
} else {
// This is a metric and will not appear in the logs
tracing::info!(
monotonic_counter.apollo_require_authentication_failure_count = 1u64,
u64_counter!(
"apollo_require_authentication_failure_count",
"Number of unauthenticated requests (deprecated)",
goto-bus-stop marked this conversation as resolved.
Show resolved Hide resolved
1
);
tracing::error!("rejecting unauthenticated request");
let response = supergraph::Response::error_builder()
Expand Down Expand Up @@ -588,11 +590,13 @@ impl Plugin for AuthorizationPlugin {
let needs_requires_scopes = request.context.contains_key(REQUIRED_SCOPES_KEY);

if needs_authenticated || needs_requires_scopes {
tracing::info!(
monotonic_counter.apollo.router.operations.authorization = 1u64,
u64_counter!(
"apollo.router.operations.authorization",
"",
goto-bus-stop marked this conversation as resolved.
Show resolved Hide resolved
1,
authorization.filtered = filtered,
authorization.needs_authenticated = needs_authenticated,
authorization.needs_requires_scopes = needs_requires_scopes,
authorization.needs_requires_scopes = needs_requires_scopes
);
}

Expand Down
20 changes: 12 additions & 8 deletions apollo-router/src/plugins/subscription.rs
Original file line number Diff line number Diff line change
Expand Up @@ -503,10 +503,12 @@ impl Service<router::Request> for CallbackService {
};
// Keep the subscription to the client opened
payload.subscribed = Some(true);
tracing::info!(
monotonic_counter.apollo.router.operations.subscriptions.events = 1u64,
subscriptions.mode="callback"
);
u64_counter!(
"apollo.router.operations.subscriptions.events",
"Number of subscription events",
1,
subscriptions.mode = "callback"
);
handle.send_sync(payload)?;

Ok(router::Response {
Expand Down Expand Up @@ -626,10 +628,12 @@ impl Service<router::Request> for CallbackService {
});
}
};
tracing::info!(
monotonic_counter.apollo.router.operations.subscriptions.events = 1u64,
subscriptions.mode="callback",
subscriptions.complete=true
u64_counter!(
"apollo.router.operations.subscriptions.events",
"Number of subscription events",
1,
subscriptions.mode = "callback",
subscriptions.complete = true
);
if let Err(_err) = handle.send_sync(
graphql::Response::builder().errors(errors).build(),
Expand Down
49 changes: 26 additions & 23 deletions apollo-router/src/plugins/telemetry/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,6 @@ use crate::plugins::telemetry::reload::OPENTELEMETRY_TRACER_HANDLE;
use crate::plugins::telemetry::tracing::apollo_telemetry::decode_ftv1_trace;
use crate::plugins::telemetry::tracing::apollo_telemetry::APOLLO_PRIVATE_OPERATION_SIGNATURE;
use crate::plugins::telemetry::tracing::TracingConfigurator;
use crate::plugins::telemetry::utils::TracingUtils;
use crate::query_planner::OperationKind;
use crate::register_plugin;
use crate::router_factory::Endpoint;
Expand Down Expand Up @@ -1747,28 +1746,32 @@ impl Telemetry {
}

fn plugin_metrics(config: &Arc<Conf>) {
let metrics_prom_used = config.exporters.metrics.prometheus.enabled;
let metrics_otlp_used = MetricsConfigurator::enabled(&config.exporters.metrics.otlp);
let tracing_otlp_used = TracingConfigurator::enabled(&config.exporters.tracing.otlp);
let tracing_datadog_used = config.exporters.tracing.datadog.enabled();
let tracing_jaeger_used = config.exporters.tracing.jaeger.enabled();
let tracing_zipkin_used = config.exporters.tracing.zipkin.enabled();

if metrics_prom_used
|| metrics_otlp_used
|| tracing_jaeger_used
|| tracing_otlp_used
|| tracing_zipkin_used
|| tracing_datadog_used
{
::tracing::info!(
monotonic_counter.apollo.router.operations.telemetry = 1u64,
telemetry.metrics.otlp = metrics_otlp_used.or_empty(),
telemetry.metrics.prometheus = metrics_prom_used.or_empty(),
telemetry.tracing.otlp = tracing_otlp_used.or_empty(),
telemetry.tracing.datadog = tracing_datadog_used.or_empty(),
telemetry.tracing.jaeger = tracing_jaeger_used.or_empty(),
telemetry.tracing.zipkin = tracing_zipkin_used.or_empty(),
let mut attributes = Vec::new();
if MetricsConfigurator::enabled(&config.exporters.metrics.otlp) {
attributes.push(KeyValue::new("telemetry.metrics.otlp", true));
}
if config.exporters.metrics.prometheus.enabled {
attributes.push(KeyValue::new("telemetry.metrics.prometheus", true));
}
if TracingConfigurator::enabled(&config.exporters.tracing.otlp) {
attributes.push(KeyValue::new("telemetry.tracing.otlp", true));
}
if config.exporters.tracing.datadog.enabled() {
attributes.push(KeyValue::new("telemetry.tracing.datadog", true));
}
if config.exporters.tracing.jaeger.enabled() {
attributes.push(KeyValue::new("telemetry.tracing.jaeger", true));
}
if config.exporters.tracing.zipkin.enabled() {
attributes.push(KeyValue::new("telemetry.tracing.zipkin", true));
}

if !attributes.is_empty() {
u64_counter!(
"apollo.router.operations.telemetry",
"Telemetry systems in use",
goto-bus-stop marked this conversation as resolved.
Show resolved Hide resolved
1,
attributes
);
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,11 @@ where
match Pin::new(&mut this.sleep).poll(cx) {
Poll::Pending => Poll::Pending,
Poll::Ready(_) => {
tracing::info!(monotonic_counter.apollo_router_timeout = 1u64,);
u64_counter!(
"apollo_router_timeout",
"Number of timed out client requests",
goto-bus-stop marked this conversation as resolved.
Show resolved Hide resolved
1
);
Poll::Ready(Err(Elapsed::new().into()))
}
}
Expand Down
Loading