Skip to content

Commit

Permalink
Add experimental caching metrics (#3558)
Browse files Browse the repository at this point in the history
Fix #3554

This creates a new metric recorded only if we set the configuration
option `telemetry.metrics.common.experimental_cache_metrics.enabled` to
`true`.

* `apollo.router.operations.entity` (histogram): cache hit ratio per
subgraph and entity type

This simulates an entity cache to find out if it would be useful. Each
time we do a subgraph query, we use as cache key:
- subgraph name
- entity type
- query
- vary headers
- entity key

We record if we have seen this entity before (using a bloom filter) and
calculate the cache hit ratio for that query, per subgraph and entity
type.

---------

Signed-off-by: Benjamin Coenen <[email protected]>
Co-authored-by: Benjamin Coenen <[email protected]>
  • Loading branch information
2 people authored and garypen committed Sep 12, 2023
1 parent 0cceca7 commit 961fff4
Show file tree
Hide file tree
Showing 9 changed files with 393 additions and 31 deletions.
38 changes: 38 additions & 0 deletions .changesets/maint_bnjjj_caching_metrics.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
### Add experimental caching metrics ([PR #3532](https://github.com/apollographql/router/pull/3532))

It adds a metric only if you configure `telemetry.metrics.common.experimental_cache_metrics.enabled` to `true`. It will generate metrics to evaluate which entities would benefit from caching. It simulates a cache with a TTL, configurable at `telemetry.metrics.common.experimental_cache_metrics.ttl` (default: 5 seconds), and measures the cache hit rate per entity type and subgraph.

example

```
# HELP apollo.router.operations.entity.cache_hit
# TYPE apollo_router_operations_entity.cache_hit histogram
apollo_router_operations_entity_cache_hitbucket{entity_type="Product",service_name="apollo-router",subgraph="products",otel_scope_name="apollo/router",otel_scope_version="",le="0.05"} 0
apollo_router_operations_entity_cache_hitbucket{entity_type="Product",service_name="apollo-router",subgraph="products",otel_scope_name="apollo/router",otel_scope_version="",le="0.1"} 0
apollo_router_operations_entity_cache_hitbucket{entity_type="Product",service_name="apollo-router",subgraph="products",otel_scope_name="apollo/router",otel_scope_version="",le="0.25"} 0
apollo_router_operations_entity_cache_hitbucket{entity_type="Product",service_name="apollo-router",subgraph="products",otel_scope_name="apollo/router",otel_scope_version="",le="0.5"} 0
apollo_router_operations_entity_cache_hitbucket{entity_type="Product",service_name="apollo-router",subgraph="products",otel_scope_name="apollo/router",otel_scope_version="",le="1"} 0
apollo_router_operations_entity_cache_hitbucket{entity_type="Product",service_name="apollo-router",subgraph="products",otel_scope_name="apollo/router",otel_scope_version="",le="2.5"} 3
apollo_router_operations_entity_cache_hitbucket{entity_type="Product",service_name="apollo-router",subgraph="products",otel_scope_name="apollo/router",otel_scope_version="",le="5"} 4
apollo_router_operations_entity_cache_hitbucket{entity_type="Product",service_name="apollo-router",subgraph="products",otel_scope_name="apollo/router",otel_scope_version="",le="10"} 4
apollo_router_operations_entity_cache_hitbucket{entity_type="Product",service_name="apollo-router",subgraph="products",otel_scope_name="apollo/router",otel_scope_version="",le="20"} 4
apollo_router_operations_entity_cache_hitbucket{entity_type="Product",service_name="apollo-router",subgraph="products",otel_scope_name="apollo/router",otel_scope_version="",le="1000"} 4
apollo_router_operations_entity_cache_hitbucket{entity_type="Product",service_name="apollo-router",subgraph="products",otel_scope_name="apollo/router",otel_scope_version="",le="+Inf"} 4
apollo_router_operations_entity_cache_hitsum{entity_type="Product",service_name="apollo-router",subgraph="products",otel_scope_name="apollo/router",otel_scope_version=""} 7
apollo_router_operations_entity_cache_hitcount{entity_type="Product",service_name="apollo-router",subgraph="products",otel_scope_name="apollo/router",otel_scope_version=""} 4
apollo_router_operations_entity_cache_hitbucket{entity_type="User",service_name="apollo-router",subgraph="users",otel_scope_name="apollo/router",otel_scope_version="",le="0.05"} 0
apollo_router_operations_entity_cache_hitbucket{entity_type="User",service_name="apollo-router",subgraph="users",otel_scope_name="apollo/router",otel_scope_version="",le="0.1"} 0
apollo_router_operations_entity_cache_hitbucket{entity_type="User",service_name="apollo-router",subgraph="users",otel_scope_name="apollo/router",otel_scope_version="",le="0.25"} 0
apollo_router_operations_entity_cache_hitbucket{entity_type="User",service_name="apollo-router",subgraph="users",otel_scope_name="apollo/router",otel_scope_version="",le="0.5"} 0
apollo_router_operations_entity_cache_hitbucket{entity_type="User",service_name="apollo-router",subgraph="users",otel_scope_name="apollo/router",otel_scope_version="",le="1"} 0
apollo_router_operations_entity_cache_hitbucket{entity_type="User",service_name="apollo-router",subgraph="users",otel_scope_name="apollo/router",otel_scope_version="",le="2.5"} 1
apollo_router_operations_entity_cache_hitbucket{entity_type="User",service_name="apollo-router",subgraph="users",otel_scope_name="apollo/router",otel_scope_version="",le="5"} 1
apollo_router_operations_entity_cache_hitbucket{entity_type="User",service_name="apollo-router",subgraph="users",otel_scope_name="apollo/router",otel_scope_version="",le="10"} 1
apollo_router_operations_entity_cache_hitbucket{entity_type="User",service_name="apollo-router",subgraph="users",otel_scope_name="apollo/router",otel_scope_version="",le="20"} 1
apollo_router_operations_entity_cache_hitbucket{entity_type="User",service_name="apollo-router",subgraph="users",otel_scope_name="apollo/router",otel_scope_version="",le="1000"} 1
apollo_router_operations_entity_cache_hitbucket{entity_type="User",service_name="apollo-router",subgraph="users",otel_scope_name="apollo/router",otel_scope_version="",le="+Inf"} 1
apollo_router_operations_entity_cache_hitsum{entity_type="User",service_name="apollo-router",subgraph="users",otel_scope_name="apollo/router",otel_scope_version=""} 1
apollo_router_operations_entity_cache_hitcount{entity_type="User",service_name="apollo-router",subgraph="users",otel_scope_name="apollo/router",otel_scope_version=""} 1
```

By [@bnjjj](https://github.com/bnjjj) [@Geal](https://github.com/geal) in https://github.com/apollographql/router/pull/3532
18 changes: 18 additions & 0 deletions Cargo.lock
Original file line number Diff line number Diff line change
Expand Up @@ -281,6 +281,7 @@ dependencies = [
"aws-types",
"axum",
"base64 0.21.2",
"bloomfilter",
"brotli",
"buildstructor 0.5.3",
"bytes",
Expand Down Expand Up @@ -1098,6 +1099,17 @@ dependencies = [
"generic-array 0.14.7",
]

[[package]]
name = "bloomfilter"
version = "1.0.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b92db7965d438b8b4b1c1d0aedd188440a1084593c9eb7f6657e3df7e906d934"
dependencies = [
"bit-vec",
"getrandom 0.2.10",
"siphasher",
]

[[package]]
name = "brotli"
version = "3.3.4"
Expand Down Expand Up @@ -5793,6 +5805,12 @@ dependencies = [
"time",
]

[[package]]
name = "siphasher"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "54ac45299ccbd390721be55b412d41931911f654fa99e2cb8bfb57184b2061fe"

[[package]]
name = "slab"
version = "0.4.8"
Expand Down
3 changes: 2 additions & 1 deletion apollo-router/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ async-trait = "0.1.73"
atty = "0.2.14"
axum = { version = "0.6.20", features = ["headers", "json", "original-uri"] }
base64 = "0.21.2"
bloomfilter = "1.0.12"
buildstructor = "0.5.3"
bytes = "1.4.0"
clap = { version = "4.4.2", default-features = false, features = [
Expand Down Expand Up @@ -163,6 +164,7 @@ prost = "0.11.9"
prost-types = "0.11.9"
proteus = "0.5.0"
rand = "0.8.5"
rand_core = "0.6.4"
rhai = { version = "1.15.1", features = ["sync", "serde", "internals"] }
regex = "1.9.5"
reqwest = { version = "0.11.19", default-features = false, features = [
Expand Down Expand Up @@ -236,7 +238,6 @@ memchr = "2.6.3"
brotli = "3.3.4"
zstd = "0.12.4"
zstd-safe = "6.0.6"
rand_core = "0.6.4"
# note: AWS dependencies should always use the same version
aws-sigv4 = "0.56.0"
aws-credential-types = "0.56.0"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4314,6 +4314,23 @@ expression: "&schema"
"format": "double"
}
},
"experimental_cache_metrics": {
"description": "Experimental metrics to know more about caching strategies",
"type": "object",
"properties": {
"enabled": {
"description": "Enable experimental metrics",
"default": false,
"type": "boolean"
},
"ttl": {
"description": "Potential TTL for a cache if we had one (default: 5secs)",
"default": "5s",
"type": "string"
}
},
"additionalProperties": false
},
"resources": {
"description": "Resources",
"default": {},
Expand Down
23 changes: 23 additions & 0 deletions apollo-router/src/plugins/telemetry/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,28 @@ pub(crate) struct MetricsCommon {
/// Custom buckets for histograms
#[serde(default = "default_buckets")]
pub(crate) buckets: Vec<f64>,
/// Experimental metrics to know more about caching strategies
pub(crate) experimental_cache_metrics: ExperimentalCacheMetricsConf,
}

#[derive(Clone, Debug, Deserialize, JsonSchema)]
#[serde(deny_unknown_fields, rename_all = "snake_case", default)]
pub(crate) struct ExperimentalCacheMetricsConf {
/// Enable experimental metrics
pub(crate) enabled: bool,
#[serde(with = "humantime_serde")]
#[schemars(with = "String")]
/// Potential TTL for a cache if we had one (default: 5secs)
pub(crate) ttl: Duration,
}

impl Default for ExperimentalCacheMetricsConf {
fn default() -> Self {
Self {
enabled: false,
ttl: Duration::from_secs(5),
}
}
}

fn default_buckets() -> Vec<f64> {
Expand All @@ -110,6 +132,7 @@ impl Default for MetricsCommon {
service_namespace: None,
resources: HashMap::new(),
buckets: default_buckets(),
experimental_cache_metrics: ExperimentalCacheMetricsConf::default(),
}
}
}
Expand Down
Loading

0 comments on commit 961fff4

Please sign in to comment.