Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Migrate histogram metrics to {f,u}64_histogram! #6356

Merged
merged 11 commits into from
Dec 20, 2024
54 changes: 31 additions & 23 deletions apollo-router/src/cache/storage.rs
Original file line number Diff line number Diff line change
Expand Up @@ -170,27 +170,31 @@ where

match res {
Some(v) => {
let duration = instant_memory.elapsed();
f64_histogram!(
"apollo_router_cache_hit_time",
"Time to get a value from the cache in seconds",
duration.as_secs_f64(),
kind = self.caller,
storage = CacheStorageName::Memory.to_string()
);
u64_counter!(
"apollo_router_cache_hit_count",
"Number of cache hits",
1,
kind = self.caller,
storage = CacheStorageName::Memory.to_string()
);
let duration = instant_memory.elapsed().as_secs_f64();
tracing::info!(
histogram.apollo_router_cache_hit_time = duration,
kind = %self.caller,
storage = &tracing::field::display(CacheStorageName::Memory),
);
Some(v)
}
None => {
let duration = instant_memory.elapsed().as_secs_f64();
tracing::info!(
histogram.apollo_router_cache_miss_time = duration,
kind = %self.caller,
storage = &tracing::field::display(CacheStorageName::Memory),
let duration = instant_memory.elapsed();
f64_histogram!(
"apollo_router_cache_miss_time",
"Time to check the cache for an uncached value in seconds",
duration.as_secs_f64(),
kind = self.caller,
storage = CacheStorageName::Memory.to_string()
);
u64_counter!(
"apollo_router_cache_miss_count",
Expand Down Expand Up @@ -218,35 +222,39 @@ where
Some(v) => {
self.insert_in_memory(key.clone(), v.0.clone()).await;

let duration = instant_redis.elapsed();
f64_histogram!(
"apollo_router_cache_hit_time",
"Time to get a value from the cache in seconds",
duration.as_secs_f64(),
kind = self.caller,
storage = CacheStorageName::Redis.to_string()
);
u64_counter!(
"apollo_router_cache_hit_count",
"Number of cache hits",
1,
kind = self.caller,
storage = CacheStorageName::Redis.to_string()
);
let duration = instant_redis.elapsed().as_secs_f64();
tracing::info!(
histogram.apollo_router_cache_hit_time = duration,
kind = %self.caller,
storage = &tracing::field::display(CacheStorageName::Redis),
);
Some(v.0)
}
None => {
let duration = instant_redis.elapsed();
f64_histogram!(
"apollo_router_cache_miss_time",
"Time to check the cache for an uncached value in seconds",
duration.as_secs_f64(),
kind = self.caller,
storage = CacheStorageName::Redis.to_string()
);
u64_counter!(
"apollo_router_cache_miss_count",
"Number of cache misses",
1,
kind = self.caller,
storage = CacheStorageName::Redis.to_string()
);
let duration = instant_redis.elapsed().as_secs_f64();
tracing::info!(
histogram.apollo_router_cache_miss_time = duration,
kind = %self.caller,
storage = &tracing::field::display(CacheStorageName::Redis),
);
None
}
}
Expand Down
16 changes: 10 additions & 6 deletions apollo-router/src/graphql/request.rs
Original file line number Diff line number Diff line change
Expand Up @@ -197,9 +197,11 @@ impl Request {
let mut result = Request::allocate_result_array(value);

if value.is_array() {
tracing::info!(
histogram.apollo.router.operations.batching.size = result.len() as f64,
mode = %BatchingMode::BatchHttpLink // Only supported mode right now
f64_histogram!(
"apollo.router.operations.batching.size",
"Number of queries contained within each query batch",
result.len() as f64,
mode = BatchingMode::BatchHttpLink.to_string() // Only supported mode right now
);
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could we safely change this to u64_histogram!, or would that be a breaking change?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think it breaks anything. @BrynCooke do you know ?


u64_counter!(
Expand All @@ -226,9 +228,11 @@ impl Request {
let mut result = Request::allocate_result_array(value);

if value.is_array() {
tracing::info!(
histogram.apollo.router.operations.batching.size = result.len() as f64,
mode = "batch_http_link" // Only supported mode right now
f64_histogram!(
"apollo.router.operations.batching.size",
"Number of queries contained within each query batch",
result.len() as f64,
mode = BatchingMode::BatchHttpLink.to_string() // Only supported mode right now
);

u64_counter!(
Expand Down
20 changes: 13 additions & 7 deletions apollo-router/src/plugins/cache/metrics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -244,15 +244,21 @@ impl CacheCounter {

for (typename, (cache_hit, total_entities)) in seen.into_iter() {
if separate_metrics_per_type {
::tracing::info!(
histogram.apollo.router.operations.entity.cache_hit = (cache_hit as f64 / total_entities as f64) * 100f64,
entity_type = %typename,
subgraph = %subgraph_name,
f64_histogram!(
"apollo.router.operations.entity.cache_hit",
"Hit rate percentage of cached entities",
(cache_hit as f64 / total_entities as f64) * 100f64,
// Can't just `Arc::clone` these because they're `Arc<String>`,
// while opentelemetry supports `Arc<str>`
entity_type = typename.to_string(),
subgraph = subgraph_name.to_string()
);
} else {
::tracing::info!(
histogram.apollo.router.operations.entity.cache_hit = (cache_hit as f64 / total_entities as f64) * 100f64,
subgraph = %subgraph_name,
f64_histogram!(
"apollo.router.operations.entity.cache_hit",
"Hit rate percentage of cached entities",
(cache_hit as f64 / total_entities as f64) * 100f64,
subgraph = subgraph_name.to_string()
);
}
}
Expand Down
15 changes: 4 additions & 11 deletions apollo-router/src/plugins/coprocessor/execution.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ use tower::BoxError;
use tower::ServiceBuilder;
use tower_service::Service;

use super::externalize_header_map;
use super::*;
use crate::graphql;
use crate::layers::async_checkpoint::OneShotAsyncCheckpointLayer;
Expand Down Expand Up @@ -235,12 +234,9 @@ where
let guard = request.context.enter_active_request();
let start = Instant::now();
let co_processor_result = payload.call(http_client, &coprocessor_url).await;
let duration = start.elapsed().as_secs_f64();
let duration = start.elapsed();
drop(guard);
tracing::info!(
histogram.apollo.router.operations.coprocessor.duration = duration,
coprocessor.stage = %PipelineStep::ExecutionRequest,
);
record_coprocessor_duration(PipelineStep::ExecutionRequest, duration);

tracing::debug!(?co_processor_result, "co-processor returned");
let co_processor_output = co_processor_result?;
Expand Down Expand Up @@ -381,12 +377,9 @@ where
let guard = response.context.enter_active_request();
let start = Instant::now();
let co_processor_result = payload.call(http_client.clone(), &coprocessor_url).await;
let duration = start.elapsed().as_secs_f64();
let duration = start.elapsed();
drop(guard);
tracing::info!(
histogram.apollo.router.operations.coprocessor.duration = duration,
coprocessor.stage = %PipelineStep::ExecutionResponse,
);
record_coprocessor_duration(PipelineStep::ExecutionResponse, duration);

tracing::debug!(?co_processor_result, "co-processor returned");
let co_processor_output = co_processor_result?;
Expand Down
37 changes: 17 additions & 20 deletions apollo-router/src/plugins/coprocessor/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -343,6 +343,15 @@ fn default_timeout() -> Duration {
DEFAULT_EXTERNALIZATION_TIMEOUT
}

fn record_coprocessor_duration(stage: PipelineStep, duration: Duration) {
f64_histogram!(
"apollo.router.operations.coprocessor.duration",
"Time spent waiting for the coprocessor to answer, in seconds",
duration.as_secs_f64(),
coprocessor.stage = stage.to_string()
);
}

#[derive(Clone, Debug, Default, Deserialize, PartialEq, Serialize, JsonSchema)]
#[serde(default)]
pub(super) struct RouterStage {
Expand Down Expand Up @@ -671,12 +680,9 @@ where
let guard = request.context.enter_active_request();
let start = Instant::now();
let co_processor_result = payload.call(http_client, &coprocessor_url).await;
let duration = start.elapsed().as_secs_f64();
let duration = start.elapsed();
drop(guard);
tracing::info!(
histogram.apollo.router.operations.coprocessor.duration = duration,
coprocessor.stage = %PipelineStep::RouterRequest,
);
record_coprocessor_duration(PipelineStep::RouterRequest, duration);

tracing::debug!(?co_processor_result, "co-processor returned");
let mut co_processor_output = co_processor_result?;
Expand Down Expand Up @@ -846,12 +852,9 @@ where
let guard = response.context.enter_active_request();
let start = Instant::now();
let co_processor_result = payload.call(http_client.clone(), &coprocessor_url).await;
let duration = start.elapsed().as_secs_f64();
let duration = start.elapsed();
drop(guard);
tracing::info!(
histogram.apollo.router.operations.coprocessor.duration = duration,
coprocessor.stage = %PipelineStep::RouterResponse,
);
record_coprocessor_duration(PipelineStep::RouterResponse, duration);

tracing::debug!(?co_processor_result, "co-processor returned");
let co_processor_output = co_processor_result?;
Expand Down Expand Up @@ -1034,12 +1037,9 @@ where
let guard = request.context.enter_active_request();
let start = Instant::now();
let co_processor_result = payload.call(http_client, &coprocessor_url).await;
let duration = start.elapsed().as_secs_f64();
let duration = start.elapsed();
drop(guard);
tracing::info!(
histogram.apollo.router.operations.coprocessor.duration = duration,
coprocessor.stage = %PipelineStep::SubgraphRequest,
);
record_coprocessor_duration(PipelineStep::SubgraphRequest, duration);

tracing::debug!(?co_processor_result, "co-processor returned");
let co_processor_output = co_processor_result?;
Expand Down Expand Up @@ -1193,12 +1193,9 @@ where
let guard = response.context.enter_active_request();
let start = Instant::now();
let co_processor_result = payload.call(http_client, &coprocessor_url).await;
let duration = start.elapsed().as_secs_f64();
let duration = start.elapsed();
drop(guard);
tracing::info!(
histogram.apollo.router.operations.coprocessor.duration = duration,
coprocessor.stage = %PipelineStep::SubgraphResponse,
);
record_coprocessor_duration(PipelineStep::SubgraphResponse, duration);

tracing::debug!(?co_processor_result, "co-processor returned");
let co_processor_output = co_processor_result?;
Expand Down
15 changes: 4 additions & 11 deletions apollo-router/src/plugins/coprocessor/supergraph.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ use tower::BoxError;
use tower::ServiceBuilder;
use tower_service::Service;

use super::externalize_header_map;
use super::*;
use crate::graphql;
use crate::layers::async_checkpoint::OneShotAsyncCheckpointLayer;
Expand Down Expand Up @@ -243,12 +242,9 @@ where
let guard = request.context.enter_active_request();
let start = Instant::now();
let co_processor_result = payload.call(http_client, &coprocessor_url).await;
let duration = start.elapsed().as_secs_f64();
let duration = start.elapsed();
drop(guard);
tracing::info!(
histogram.apollo.router.operations.coprocessor.duration = duration,
coprocessor.stage = %PipelineStep::SupergraphRequest,
);
record_coprocessor_duration(PipelineStep::SupergraphRequest, duration);

tracing::debug!(?co_processor_result, "co-processor returned");
let co_processor_output = co_processor_result?;
Expand Down Expand Up @@ -397,12 +393,9 @@ where
let guard = response.context.enter_active_request();
let start = Instant::now();
let co_processor_result = payload.call(http_client.clone(), &coprocessor_url).await;
let duration = start.elapsed().as_secs_f64();
let duration = start.elapsed();
drop(guard);
tracing::info!(
histogram.apollo.router.operations.coprocessor.duration = duration,
coprocessor.stage = %PipelineStep::SupergraphResponse,
);
record_coprocessor_duration(PipelineStep::SupergraphResponse, duration);

tracing::debug!(?co_processor_result, "co-processor returned");
let co_processor_output = co_processor_result?;
Expand Down
6 changes: 5 additions & 1 deletion apollo-router/src/plugins/telemetry/utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,11 @@ use std::time::Instant;
/// Timer implementing Drop to automatically compute the duration between the moment it has been created until it's dropped
///```ignore
/// Timer::new(|duration| {
/// tracing::info!(histogram.apollo_router_test = duration.as_secs_f64());
/// f64_histogram!(
/// "apollo_router_test",
/// "Time spent testing the timer",
/// duration.as_secs_f64()
/// );
/// })
/// ```
pub(crate) struct Timer<F>
Expand Down
6 changes: 4 additions & 2 deletions apollo-router/src/query_planner/caching_query_planner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -138,8 +138,10 @@ where
experimental_pql_prewarm: &PersistedQueriesPrewarmQueryPlanCache,
) {
let _timer = Timer::new(|duration| {
::tracing::info!(
histogram.apollo.router.query.planning.warmup.duration = duration.as_secs_f64()
f64_histogram!(
"apollo.router.query_planning.warmup.duration",
"Time spent warming up the query planner queries in seconds",
duration.as_secs_f64()
);
});

Expand Down
9 changes: 6 additions & 3 deletions apollo-router/src/services/subgraph_service.rs
Original file line number Diff line number Diff line change
Expand Up @@ -871,9 +871,12 @@ pub(crate) async fn process_batch(
let client = client_factory.create(&service);

// Update our batching metrics (just before we fetch)
tracing::info!(histogram.apollo.router.operations.batching.size = listener_count as f64,
mode = %BatchingMode::BatchHttpLink, // Only supported mode right now
subgraph = &service
f64_histogram!(
"apollo.router.operations.batching.size",
"Number of queries contained within each query batch",
listener_count as f64,
mode = BatchingMode::BatchHttpLink.to_string(), // Only supported mode right now
subgraph = service.clone()
);

u64_counter!(
Expand Down
Loading
Loading