From f6a67b6ef5425a0d37e8be890bf6b422b9bde14c Mon Sep 17 00:00:00 2001 From: bryn Date: Fri, 15 Sep 2023 14:18:45 +0100 Subject: [PATCH 01/30] Update to opentelemetry 0.20 This upgrade has many changes due to a new metrics API upstream. Metrics have largely been reworked, and in addition, some new metrics macros have been added to enabled us to move towards a better long term metrics story. --- Cargo.lock | 430 ++++-- apollo-router/Cargo.toml | 36 +- apollo-router/src/executable.rs | 9 +- apollo-router/src/lib.rs | 3 + apollo-router/src/metrics/aggregation.rs | 431 ++++++ apollo-router/src/metrics/filter.rs | 333 +++++ .../{plugins/telemetry => }/metrics/layer.rs | 58 +- apollo-router/src/metrics/mod.rs | 781 ++++++++++ .../src/plugins/telemetry/formatters/mod.rs | 8 +- .../plugins/telemetry/metrics/aggregation.rs | 240 ---- .../src/plugins/telemetry/metrics/apollo.rs | 59 +- .../src/plugins/telemetry/metrics/filter.rs | 300 ---- .../src/plugins/telemetry/metrics/mod.rs | 115 +- .../src/plugins/telemetry/metrics/otlp.rs | 61 +- .../plugins/telemetry/metrics/prometheus.rs | 131 +- apollo-router/src/plugins/telemetry/mod.rs | 1253 +++++++---------- apollo-router/src/plugins/telemetry/otlp.rs | 25 + apollo-router/src/plugins/telemetry/reload.rs | 49 +- ...ry__tests__it_test_prometheus_metrics.snap | 19 +- ...est_prometheus_metrics_custom_buckets.snap | 22 +- .../telemetry/testdata/config.router.yaml | 101 ++ .../testdata/custom_attributes.router.yaml | 51 + .../telemetry/testdata/prometheus.router.yaml | 7 + .../prometheus_custom_buckets.router.yaml | 13 + .../telemetry/tracing/apollo_telemetry.rs | 3 +- .../src/plugins/telemetry/tracing/jaeger.rs | 42 +- .../src/plugins/telemetry/tracing/reload.rs | 4 +- apollo-router/src/tracer.rs | 27 +- apollo-router/tests/metrics_tests.rs | 10 +- ...acing_tests__traced_basic_composition.snap | 8 - .../tracing_tests__traced_basic_request.snap | 8 - .../snapshots/tracing_tests__variables.snap | 8 - 32 files changed, 2841 insertions(+), 1804 deletions(-) create mode 100644 apollo-router/src/metrics/aggregation.rs create mode 100644 apollo-router/src/metrics/filter.rs rename apollo-router/src/{plugins/telemetry => }/metrics/layer.rs (90%) create mode 100644 apollo-router/src/metrics/mod.rs delete mode 100644 apollo-router/src/plugins/telemetry/metrics/aggregation.rs create mode 100644 apollo-router/src/plugins/telemetry/testdata/config.router.yaml create mode 100644 apollo-router/src/plugins/telemetry/testdata/custom_attributes.router.yaml create mode 100644 apollo-router/src/plugins/telemetry/testdata/prometheus.router.yaml create mode 100644 apollo-router/src/plugins/telemetry/testdata/prometheus_custom_buckets.router.yaml diff --git a/Cargo.lock b/Cargo.lock index 687bc50e83..f58ce850d3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -330,6 +330,7 @@ dependencies = [ "multimap 0.9.0", "notify", "nu-ansi-term 0.49.0", + "num-traits", "once_cell", "opentelemetry", "opentelemetry-aws", @@ -339,6 +340,7 @@ dependencies = [ "opentelemetry-otlp", "opentelemetry-prometheus", "opentelemetry-semantic-conventions", + "opentelemetry-stdout", "opentelemetry-zipkin", "opentelemetry_api", "p256 0.12.0", @@ -358,7 +360,7 @@ dependencies = [ "router-bridge", "rstack", "rust-embed", - "rustls 0.21.6", + "rustls", "rustls-pemfile", "schemars", "serde", @@ -379,12 +381,12 @@ dependencies = [ "thiserror", "tikv-jemallocator", "tokio", - "tokio-rustls 0.24.1", + "tokio-rustls", "tokio-stream", "tokio-tungstenite", "tokio-util", "toml 0.7.6", - "tonic 0.8.3", + "tonic", "tonic-build", "tower", "tower-http", @@ -575,6 +577,109 @@ dependencies = [ "tokio", ] +[[package]] +name = "async-executor" +version = "1.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6fa3dc5f2a8564f07759c008b9109dc0d39de92a88d5588b8a5036d286383afb" +dependencies = [ + "async-lock", + "async-task", + "concurrent-queue", + "fastrand 1.9.0", + "futures-lite", + "slab", +] + +[[package]] +name = "async-global-executor" +version = "2.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1b6f5d7df27bd294849f8eec66ecfc63d11814df7a4f5d74168a2394467b776" +dependencies = [ + "async-channel", + "async-executor", + "async-io", + "async-lock", + "blocking", + "futures-lite", + "once_cell", +] + +[[package]] +name = "async-io" +version = "1.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fc5b45d93ef0529756f812ca52e44c221b35341892d3dcc34132ac02f3dd2af" +dependencies = [ + "async-lock", + "autocfg", + "cfg-if", + "concurrent-queue", + "futures-lite", + "log", + "parking", + "polling", + "rustix 0.37.23", + "slab", + "socket2 0.4.9", + "waker-fn", +] + +[[package]] +name = "async-lock" +version = "2.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "287272293e9d8c41773cec55e365490fe034813a2f172f502d6ddcf75b2f582b" +dependencies = [ + "event-listener", +] + +[[package]] +name = "async-process" +version = "1.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7a9d28b1d97e08915212e2e45310d47854eafa69600756fc735fb788f75199c9" +dependencies = [ + "async-io", + "async-lock", + "autocfg", + "blocking", + "cfg-if", + "event-listener", + "futures-lite", + "rustix 0.37.23", + "signal-hook", + "windows-sys 0.48.0", +] + +[[package]] +name = "async-std" +version = "1.12.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62565bb4402e926b29953c785397c6dc0391b7b446e45008b0049eb43cec6f5d" +dependencies = [ + "async-channel", + "async-global-executor", + "async-io", + "async-lock", + "async-process", + "crossbeam-utils", + "futures-channel", + "futures-core", + "futures-io", + "futures-lite", + "gloo-timers", + "kv-log-macro", + "log", + "memchr", + "once_cell", + "pin-project-lite", + "pin-utils", + "slab", + "wasm-bindgen-futures", +] + [[package]] name = "async-stream" version = "0.3.5" @@ -597,6 +702,12 @@ dependencies = [ "syn 2.0.29", ] +[[package]] +name = "async-task" +version = "4.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ecc7ab41815b3c653ccd2978ec3255c81349336702dfdf62ee6f7069b12a3aae" + [[package]] name = "async-trait" version = "0.1.73" @@ -608,6 +719,12 @@ dependencies = [ "syn 2.0.29", ] +[[package]] +name = "atomic-waker" +version = "1.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1181e1e0d1fce796a03db1ae795d67167da795f9cf4a39c37589e85ef57f26d3" + [[package]] name = "atty" version = "0.2.14" @@ -806,7 +923,7 @@ dependencies = [ "hyper-rustls", "lazy_static", "pin-project-lite", - "rustls 0.21.6", + "rustls", "tokio", "tower", "tracing", @@ -1099,6 +1216,21 @@ dependencies = [ "generic-array 0.14.7", ] +[[package]] +name = "blocking" +version = "1.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77231a1c8f801696fc0123ec6150ce92cffb8e164a02afb9c8ddee0e9b65ad65" +dependencies = [ + "async-channel", + "async-lock", + "async-task", + "atomic-waker", + "fastrand 1.9.0", + "futures-lite", + "log", +] + [[package]] name = "bloomfilter" version = "1.0.12" @@ -1511,7 +1643,7 @@ checksum = "c2895653b4d9f1538a83970077cb01dfc77a4810524e51a110944688e916b18e" dependencies = [ "prost", "prost-types", - "tonic 0.9.2", + "tonic", "tracing-core", ] @@ -1533,7 +1665,7 @@ dependencies = [ "thread_local", "tokio", "tokio-stream", - "tonic 0.9.2", + "tonic", "tracing", "tracing-core", "tracing-subscriber", @@ -2563,13 +2695,13 @@ dependencies = [ "parking_lot 0.12.1", "rand 0.8.5", "redis-protocol", - "rustls 0.21.6", + "rustls", "rustls-native-certs", "rustls-webpki", "semver 1.0.18", "sha-1", "tokio", - "tokio-rustls 0.24.1", + "tokio-rustls", "tokio-stream", "tokio-util", "url", @@ -2820,6 +2952,18 @@ dependencies = [ "regex", ] +[[package]] +name = "gloo-timers" +version = "0.2.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b995a66bb87bebce9a0f4a95aed01daca4872c050bfcb21653361c03bc35e5c" +dependencies = [ + "futures-channel", + "futures-core", + "js-sys", + "wasm-bindgen", +] + [[package]] name = "graphql-introspection-query" version = "0.2.0" @@ -3181,10 +3325,10 @@ dependencies = [ "http", "hyper", "log", - "rustls 0.21.6", + "rustls", "rustls-native-certs", "tokio", - "tokio-rustls 0.24.1", + "tokio-rustls", ] [[package]] @@ -3325,6 +3469,17 @@ dependencies = [ "ghost", ] +[[package]] +name = "io-lifetimes" +version = "1.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eae7b9aee968036d54dce06cebaefd919e4472e753296daccd6d344e3e2df0c2" +dependencies = [ + "hermit-abi 0.3.2", + "libc", + "windows-sys 0.48.0", +] + [[package]] name = "ipnet" version = "2.8.0" @@ -3338,7 +3493,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cb0889898416213fab133e1d33a0e5858a48177452750691bde3666d0fdbaf8b" dependencies = [ "hermit-abi 0.3.2", - "rustix", + "rustix 0.38.8", "windows-sys 0.48.0", ] @@ -3490,6 +3645,15 @@ dependencies = [ "libc", ] +[[package]] +name = "kv-log-macro" +version = "1.0.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0de8b303297635ad57c9f5059fd9cee7a47f8e8daa09df0fcd07dd39fb22977f" +dependencies = [ + "log", +] + [[package]] name = "lazy-regex" version = "2.5.0" @@ -3624,6 +3788,12 @@ dependencies = [ "syn 2.0.29", ] +[[package]] +name = "linux-raw-sys" +version = "0.3.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef53942eb7bf7ff43a617b3e2c1c4a5ecf5944a7c1bc12d7ee39bbb15e5c1519" + [[package]] name = "linux-raw-sys" version = "0.4.5" @@ -3645,6 +3815,9 @@ name = "log" version = "0.4.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f" +dependencies = [ + "value-bag", +] [[package]] name = "lru" @@ -4073,9 +4246,9 @@ dependencies = [ [[package]] name = "opentelemetry" -version = "0.19.0" +version = "0.20.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f4b8347cc26099d3aeee044065ecc3ae11469796b4d65d065a23a584ed92a6f" +checksum = "9591d937bc0e6d2feb6f71a559540ab300ea49955229c347a517a28d27784c54" dependencies = [ "opentelemetry_api", "opentelemetry_sdk", @@ -4083,21 +4256,20 @@ dependencies = [ [[package]] name = "opentelemetry-aws" -version = "0.7.0" +version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72a394d24777936802edd6c03a68daab4db39630418c7e431a5648e9befa80b8" +checksum = "31120a0109c172a42096766ef10e772f4a89422932be2c3b7f335858ff49380d" dependencies = [ "once_cell", - "opentelemetry", + "opentelemetry_api", ] [[package]] name = "opentelemetry-datadog" -version = "0.7.0" +version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "daf08569fbddd2149b268e2bde2bca0bab84bc19ee2efcc234f855f49a911536" +checksum = "b5f4ecf595095d3b641dd2761a0c3d1f175d3d6c28f38e65418d8004ea3255dd" dependencies = [ - "async-trait", "futures-core", "http", "indexmap 1.9.3", @@ -4114,9 +4286,9 @@ dependencies = [ [[package]] name = "opentelemetry-http" -version = "0.8.0" +version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a819b71d6530c4297b49b3cae2939ab3a8cc1b9f382826a1bc29dd0ca3864906" +checksum = "c7594ec0e11d8e33faf03530a4c49af7064ebba81c1480e01be67d90b356508b" dependencies = [ "async-trait", "bytes", @@ -4127,83 +4299,97 @@ dependencies = [ [[package]] name = "opentelemetry-jaeger" -version = "0.18.0" +version = "0.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08e028dc9f4f304e9320ce38c80e7cf74067415b1ad5a8750a38bae54a4d450d" +checksum = "876958ba9084f390f913fcf04ddf7bbbb822898867bb0a51cc28f2b9e5c1b515" dependencies = [ "async-trait", - "futures", - "futures-executor", + "futures-core", + "futures-util", "headers", "http", - "once_cell", "opentelemetry", "opentelemetry-http", "opentelemetry-semantic-conventions", "reqwest", - "thiserror", "thrift", "tokio", ] [[package]] name = "opentelemetry-otlp" -version = "0.12.0" +version = "0.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8af72d59a4484654ea8eb183fea5ae4eb6a41d7ac3e3bae5f4d2a282a3a7d3ca" +checksum = "7e5e5a5c4135864099f3faafbe939eb4d7f9b80ebf68a8448da961b32a7c1275" dependencies = [ "async-trait", - "futures", - "futures-util", + "futures-core", "http", - "opentelemetry", "opentelemetry-http", "opentelemetry-proto", + "opentelemetry-semantic-conventions", + "opentelemetry_api", + "opentelemetry_sdk", "prost", "reqwest", "thiserror", "tokio", - "tonic 0.8.3", + "tonic", ] [[package]] name = "opentelemetry-prometheus" -version = "0.12.0" +version = "0.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a9f186f6293ebb693caddd0595e66b74a6068fa51048e26e0bf9c95478c639c" +checksum = "c7d81bc254e2d572120363a2b16cdb0d715d301b5789be0cfc26ad87e4e10e53" dependencies = [ - "opentelemetry", + "once_cell", + "opentelemetry_api", + "opentelemetry_sdk", "prometheus", "protobuf", ] [[package]] name = "opentelemetry-proto" -version = "0.2.0" +version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "045f8eea8c0fa19f7d48e7bc3128a39c2e5c533d5c61298c548dfefc1064474c" +checksum = "b1e3f814aa9f8c905d0ee4bde026afd3b2577a97c10e1699912e3e44f0c4cbeb" dependencies = [ - "futures", - "futures-util", - "opentelemetry", + "opentelemetry_api", + "opentelemetry_sdk", "prost", - "tonic 0.8.3", + "tonic", ] [[package]] name = "opentelemetry-semantic-conventions" -version = "0.11.0" +version = "0.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "24e33428e6bf08c6f7fcea4ddb8e358fab0fe48ab877a87c70c6ebe20f673ce5" +checksum = "73c9f9340ad135068800e7f1b24e9e09ed9e7143f5bf8518ded3d3ec69789269" dependencies = [ "opentelemetry", ] +[[package]] +name = "opentelemetry-stdout" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8bd550321bc0f9d3f6dcbfe5c75262789de5b3e2776da2cbcfd2392aa05db0c6" +dependencies = [ + "futures-util", + "opentelemetry_api", + "opentelemetry_sdk", + "ordered-float 3.9.0", + "serde", + "serde_json", +] + [[package]] name = "opentelemetry-zipkin" -version = "0.17.0" +version = "0.18.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e1fd48caee5e1db71454c95be32d1daeb6fae265321ff8f51b1efc8a50b0be80" +checksum = "eb966f01235207a6933c0aec98374fe9782df1c1d2b3d1db35c458451d138143" dependencies = [ "async-trait", "futures-core", @@ -4221,14 +4407,14 @@ dependencies = [ [[package]] name = "opentelemetry_api" -version = "0.19.0" +version = "0.20.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ed41783a5bf567688eb38372f2b7a8530f5a607a4b49d38dd7573236c23ca7e2" +checksum = "8a81f725323db1b1206ca3da8bb19874bbd3f57c3bcd59471bfb04525b265b9b" dependencies = [ - "fnv", "futures-channel", "futures-util", "indexmap 1.9.3", + "js-sys", "once_cell", "pin-project-lite", "thiserror", @@ -4237,21 +4423,23 @@ dependencies = [ [[package]] name = "opentelemetry_sdk" -version = "0.19.0" +version = "0.20.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b3a2a91fdbfdd4d212c0dcc2ab540de2c2bcbbd90be17de7a7daf8822d010c1" +checksum = "fa8e705a0612d48139799fcbaba0d4a90f06277153e43dd2bdc16c6f0edd8026" dependencies = [ + "async-std", "async-trait", "crossbeam-channel", - "dashmap", - "fnv", "futures-channel", "futures-executor", "futures-util", "once_cell", "opentelemetry_api", + "ordered-float 3.9.0", "percent-encoding", "rand 0.8.5", + "regex", + "serde_json", "thiserror", "tokio", "tokio-stream", @@ -4572,6 +4760,22 @@ dependencies = [ "syn 2.0.29", ] +[[package]] +name = "polling" +version = "2.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b2d323e8ca7996b3e23126511a523f7e62924d93ecd5ae73b333815b0eb3dce" +dependencies = [ + "autocfg", + "bitflags 1.3.2", + "cfg-if", + "concurrent-queue", + "libc", + "log", + "pin-project-lite", + "windows-sys 0.48.0", +] + [[package]] name = "polyval" version = "0.6.1" @@ -5024,14 +5228,14 @@ dependencies = [ "once_cell", "percent-encoding", "pin-project-lite", - "rustls 0.21.6", + "rustls", "rustls-native-certs", "rustls-pemfile", "serde", "serde_json", "serde_urlencoded", "tokio", - "tokio-rustls 0.24.1", + "tokio-rustls", "tokio-util", "tower-service", "url", @@ -5325,27 +5529,29 @@ dependencies = [ [[package]] name = "rustix" -version = "0.38.8" +version = "0.37.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "19ed4fa021d81c8392ce04db050a3da9a60299050b7ae1cf482d862b54a7218f" +checksum = "4d69718bf81c6127a49dc64e44a742e8bb9213c0ff8869a22c308f84c1d4ab06" dependencies = [ - "bitflags 2.4.0", + "bitflags 1.3.2", "errno", + "io-lifetimes", "libc", - "linux-raw-sys", + "linux-raw-sys 0.3.8", "windows-sys 0.48.0", ] [[package]] -name = "rustls" -version = "0.20.8" +name = "rustix" +version = "0.38.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fff78fc74d175294f4e83b28343315ffcfb114b156f0185e9741cb5570f50e2f" +checksum = "19ed4fa021d81c8392ce04db050a3da9a60299050b7ae1cf482d862b54a7218f" dependencies = [ - "log", - "ring", - "sct", - "webpki", + "bitflags 2.4.0", + "errno", + "libc", + "linux-raw-sys 0.4.5", + "windows-sys 0.48.0", ] [[package]] @@ -5755,6 +5961,16 @@ dependencies = [ "dirs", ] +[[package]] +name = "signal-hook" +version = "0.3.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8621587d4798caf8eb44879d42e56b9a93ea5dcd315a6487c357130095b62801" +dependencies = [ + "libc", + "signal-hook-registry", +] + [[package]] name = "signal-hook-registry" version = "1.4.1" @@ -6049,7 +6265,7 @@ dependencies = [ "cfg-if", "fastrand 2.0.0", "redox_syscall 0.3.5", - "rustix", + "rustix 0.38.8", "windows-sys 0.48.0", ] @@ -6339,24 +6555,13 @@ dependencies = [ "syn 2.0.29", ] -[[package]] -name = "tokio-rustls" -version = "0.23.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c43ee83903113e03984cb9e5cebe6c04a5116269e900e3ddba8f068a62adda59" -dependencies = [ - "rustls 0.20.8", - "tokio", - "webpki", -] - [[package]] name = "tokio-rustls" version = "0.24.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c28327cf380ac148141087fbfb9de9d7bd4e84ab5d2c28fbc911d753de8a7081" dependencies = [ - "rustls 0.21.6", + "rustls", "tokio", ] @@ -6393,10 +6598,10 @@ checksum = "2b2dbec703c26b00d74844519606ef15d09a7d6857860f84ad223dec002ddea2" dependencies = [ "futures-util", "log", - "rustls 0.21.6", + "rustls", "rustls-native-certs", "tokio", - "tokio-rustls 0.24.1", + "tokio-rustls", "tungstenite", ] @@ -6460,14 +6665,14 @@ dependencies = [ [[package]] name = "tonic" -version = "0.8.3" +version = "0.9.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f219fad3b929bef19b1f86fbc0358d35daed8f2cac972037ac0dc10bbb8d5fb" +checksum = "3082666a3a6433f7f511c7192923fa1fe07c69332d3c6a2e6bb040b569199d5a" dependencies = [ "async-stream", "async-trait", "axum", - "base64 0.13.1", + "base64 0.21.2", "bytes", "flate2", "futures-core", @@ -6480,41 +6685,10 @@ dependencies = [ "percent-encoding", "pin-project", "prost", - "prost-derive", "rustls-native-certs", "rustls-pemfile", "tokio", - "tokio-rustls 0.23.4", - "tokio-stream", - "tokio-util", - "tower", - "tower-layer", - "tower-service", - "tracing", - "tracing-futures", -] - -[[package]] -name = "tonic" -version = "0.9.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3082666a3a6433f7f511c7192923fa1fe07c69332d3c6a2e6bb040b569199d5a" -dependencies = [ - "async-trait", - "axum", - "base64 0.21.2", - "bytes", - "futures-core", - "futures-util", - "h2", - "http", - "http-body", - "hyper", - "hyper-timeout", - "percent-encoding", - "pin-project", - "prost", - "tokio", + "tokio-rustls", "tokio-stream", "tower", "tower-layer", @@ -6663,12 +6837,14 @@ dependencies = [ [[package]] name = "tracing-opentelemetry" -version = "0.19.0" +version = "0.21.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "00a39dcf9bfc1742fa4d6215253b33a6e474be78275884c216fc2a06267b3600" +checksum = "75327c6b667828ddc28f5e3f169036cb793c3f588d83bf0f262a7f062ffed3c8" dependencies = [ "once_cell", "opentelemetry", + "opentelemetry_sdk", + "smallvec", "tracing", "tracing-core", "tracing-log", @@ -6789,7 +6965,7 @@ dependencies = [ "httparse", "log", "rand 0.8.5", - "rustls 0.21.6", + "rustls", "sha1 0.10.5", "thiserror", "url", @@ -7038,6 +7214,12 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "830b7e5d4d90034032940e4ace0d9a9a057e7a45cd94e6c007832e39edb82f6d" +[[package]] +name = "value-bag" +version = "1.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d92ccd67fb88503048c01b59152a04effd0782d035a83a6d256ce6085f08f4a3" + [[package]] name = "vcpkg" version = "0.2.15" @@ -7194,16 +7376,6 @@ dependencies = [ "wasm-bindgen", ] -[[package]] -name = "webpki" -version = "0.22.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f0e74f82d49d545ad128049b7e88f6576df2da6b02e9ce565c6f533be576957e" -dependencies = [ - "ring", - "untrusted", -] - [[package]] name = "webpki-roots" version = "0.25.2" diff --git a/apollo-router/Cargo.toml b/apollo-router/Cargo.toml index f1082d64bd..afc0d86dc0 100644 --- a/apollo-router/Cargo.toml +++ b/apollo-router/Cargo.toml @@ -133,30 +133,35 @@ once_cell = "1.18.0" # groups `^tracing` and `^opentelemetry*` dependencies together as of # https://github.com/apollographql/router/pull/1509. A comment which exists # there (and on `tracing` packages below) should be updated should this change. -opentelemetry = { version = "0.19.0", features = ["rt-tokio", "metrics"] } -opentelemetry_api = "0.19.0" -opentelemetry-aws = "0.7.0" -opentelemetry-datadog = { version = "0.7.0", features = ["reqwest-client"] } -opentelemetry-http = "0.8.0" -opentelemetry-jaeger = { version = "0.18.0", features = [ +opentelemetry = { version = "0.20.0", features = [ + "rt-tokio", + "metrics", + "testing" +] } +opentelemetry_api = "0.20.0" +opentelemetry-aws = "0.8.0" +opentelemetry-datadog = { version = "0.8.0", features = ["reqwest-client"] } +opentelemetry-http = "0.9.0" +opentelemetry-jaeger = { version = "0.19.0", features = [ "collector_client", "reqwest_collector_client", "rt-tokio", ] } -opentelemetry-otlp = { version = "0.12.0", default-features = false, features = [ +opentelemetry-otlp = { version = "0.13.0", default-features = false, features = [ "grpc-tonic", + "gzip-tonic", "tonic", "tls", "http-proto", "metrics", "reqwest-client", ] } -opentelemetry-semantic-conventions = "0.11.0" -opentelemetry-zipkin = { version = "0.17.0", default-features = false, features = [ +opentelemetry-semantic-conventions = "0.12.0" +opentelemetry-zipkin = { version = "0.18.0", default-features = false, features = [ "reqwest-client", "reqwest-rustls", ] } -opentelemetry-prometheus = "0.12.0" +opentelemetry-prometheus = "0.13.0" paste = "1.0.14" pin-project-lite = "0.2.12" prometheus = "0.13" @@ -196,12 +201,7 @@ thiserror = "1.0.48" tokio = { version = "1.32.0", features = ["full"] } tokio-stream = { version = "0.1.14", features = ["sync", "net"] } tokio-util = { version = "0.7.8", features = ["net", "codec", "time"] } -tonic = { version = "0.8.3", features = [ - "transport", - "tls", - "tls-roots", - "gzip", -] } +tonic = { version = "0.9.2", features = ["transport", "tls", "tls-roots", "gzip"] } tower = { version = "0.4.13", features = ["full"] } tower-http = { version = "0.4.3", features = [ "add-extension", @@ -219,7 +219,7 @@ tower-service = "0.3.2" tracing = "0.1.37" tracing-core = "0.1.31" tracing-futures = { version = "0.2.5", features = ["futures-03"] } -tracing-opentelemetry = "0.19.0" +tracing-opentelemetry = "0.21.0" tracing-subscriber = { version = "0.3.17", features = ["env-filter", "json"] } url = { version = "2.4.1", features = ["serde"] } urlencoding = "2.1.3" @@ -263,10 +263,12 @@ ecdsa = { version = "0.15.1", features = ["signing", "pem", "pkcs8"] } fred = { version = "6.3.1", features = ["enable-rustls", "no-client-setname"] } futures-test = "0.3.28" insta = { version = "1.31.0", features = ["json", "redactions", "yaml"] } +num-traits = "0.2.16" maplit = "1.0.2" memchr = { version = "2.6.3", default-features = false } mockall = "0.11.4" once_cell = "1.18.0" +opentelemetry-stdout = { version = "0.1.0", features = ["trace"] } p256 = "0.12.0" rand_core = "0.6.4" redis = { version = "0.21.7", features = ["tokio-comp"] } diff --git a/apollo-router/src/executable.rs b/apollo-router/src/executable.rs index 8c1f669ed9..a340f480af 100644 --- a/apollo-router/src/executable.rs +++ b/apollo-router/src/executable.rs @@ -28,6 +28,7 @@ use url::Url; use crate::configuration::generate_config_schema; use crate::configuration::generate_upgrade; use crate::configuration::Discussed; +use crate::metrics::meter_provider; use crate::plugin::plugins; use crate::plugins::telemetry::reload::init_telemetry; use crate::router::ConfigurationSource; @@ -468,8 +469,12 @@ impl Executable { None => Self::inner_start(shutdown, schema, config, license, opt).await, }; - //We should be good to shutdown the tracer provider now as the router should have finished everything. - opentelemetry::global::shutdown_tracer_provider(); + // We should be good to shutdown OpenTelemetry now as the router should have finished everything. + tokio::task::spawn_blocking(move || { + opentelemetry::global::shutdown_tracer_provider(); + meter_provider().shutdown(); + }) + .await?; result } diff --git a/apollo-router/src/lib.rs b/apollo-router/src/lib.rs index 9b0f04db47..fb5befb06e 100644 --- a/apollo-router/src/lib.rs +++ b/apollo-router/src/lib.rs @@ -47,6 +47,9 @@ mod json_ext; #[macro_use] pub mod plugin; +#[macro_use] +pub(crate) mod metrics; + pub(crate) mod axum_factory; mod cache; mod configuration; diff --git a/apollo-router/src/metrics/aggregation.rs b/apollo-router/src/metrics/aggregation.rs new file mode 100644 index 0000000000..e42fdde123 --- /dev/null +++ b/apollo-router/src/metrics/aggregation.rs @@ -0,0 +1,431 @@ +use std::any::Any; +use std::borrow::Cow; +use std::collections::HashMap; +use std::mem; +use std::sync::Arc; +use std::sync::Mutex; + +use derive_more::From; +use itertools::Itertools; +use opentelemetry::metrics::Callback; +use opentelemetry::metrics::Counter; +use opentelemetry::metrics::Histogram; +use opentelemetry::metrics::InstrumentProvider; +use opentelemetry::metrics::Meter; +use opentelemetry::metrics::MeterProvider; +use opentelemetry::metrics::ObservableCounter; +use opentelemetry::metrics::ObservableGauge; +use opentelemetry::metrics::ObservableUpDownCounter; +use opentelemetry::metrics::SyncCounter; +use opentelemetry::metrics::SyncHistogram; +use opentelemetry::metrics::SyncUpDownCounter; +use opentelemetry::metrics::Unit; +use opentelemetry::metrics::UpDownCounter; +use opentelemetry::KeyValue; +use opentelemetry_api::global::ObjectSafeMeterProvider; +use opentelemetry_api::metrics::AsyncInstrument; +use opentelemetry_api::metrics::CallbackRegistration; +use opentelemetry_api::metrics::MetricsError; +use opentelemetry_api::metrics::Observer; + +use crate::metrics::filter::FilterMeterProvider; + +// This meter provider enables us to combine multiple meter providers. The reasons we need this are: +// 1. Prometheus meters are special. To dispose a meter is to dispose the entire registry. This means we need to make a best effort to keep them around. +// 2. To implement filtering we use a view. However this must be set during build of the meter provider, thus we need separate ones for Apollo and general metrics. +// Unlike the regular meter provider this implementation will return an existing meter if one has been created already rather than a new one. +// This is within the spec: https://opentelemetry.io/docs/specs/otel/metrics/api/#get-a-meter +// `Meters are identified by name, version, and schema_url fields. When more than one Meter of the same name, version, and schema_url is created, it is unspecified whether or under which conditions the same or different Meter instances are returned. It is a user error to create Meters with different attributes but the same identity.` + +#[derive(Hash, Ord, PartialOrd, Eq, PartialEq, Copy, Clone, Debug)] +pub(crate) enum MeterProviderType { + PublicPrometheus, + Apollo, + Public, +} + +#[derive(Clone, Default)] +pub(crate) struct AggregateMeterProvider { + inner: Arc>, +} + +#[derive(Default)] +struct Inner { + providers: HashMap)>, + registered_instruments: Vec, +} + +#[derive(From)] +pub(crate) enum InstrumentWrapper { + U64Counter(Arc>), + F64Counter(Arc>), + I64UpDownCounter(Arc>), + F64UpDownCounter(Arc>), + I64Histogram(Arc>), + U64Histogram(Arc>), + F64Histogram(Arc>), +} + +#[derive(Eq, PartialEq, Hash)] +struct MeterId { + name: Cow<'static, str>, + version: Option>, + schema_url: Option>, + // Note that attributes are not part of the meter ID. +} + +impl AggregateMeterProvider { + /// The behaviour of this function is that if None is passed in, the meter will be left as is. + /// To disable meter_providers use a noop meter provider. + /// The old meter_provider if any is returned, and it is up to the caller to clean up. + /// Any registered instruments must be invalidated so that they are fetched again. + pub(crate) fn set( + &self, + meter_provider_type: MeterProviderType, + meter_provider: Option, + ) -> Option { + let mut inner = self.inner.lock().expect("lock poisoned"); + // As we are changing a meter provider we need to invalidate any registered instruments. + // Clearing these allows any weak references at callsites to be invalidated. + inner.registered_instruments.clear(); + + //Now update the meter provider + if let Some(meter_provider) = meter_provider { + inner + .providers + .insert( + meter_provider_type, + (meter_provider.clone(), HashMap::new()), + ) + .map(|(old_provider, _)| old_provider) + } else { + None + } + } + + /// Shutdown MUST be called from a blocking thread. + pub(crate) fn shutdown(&self) { + let inner = self.inner.lock().expect("lock poisoned"); + for (meter_provider_type, (meter_provider, _)) in &inner.providers { + if let Err(e) = meter_provider.shutdown() { + ::tracing::error!(error = %e, meter_provider_type = ?meter_provider_type, "failed to shutdown meter provider") + } + } + } + + /// Register an instrument. This enables caching at callsites and invalidation at the meter provider via weak reference. + #[allow(dead_code)] + pub(crate) fn register_instrument(&self, instrument: T) -> Arc + where + Arc: Into, + { + let instrument = Arc::new(instrument); + self.inner + .lock() + .expect("lock poisoned") + .registered_instruments + .push(instrument.clone().into()); + instrument + } + + #[cfg(test)] + pub(crate) fn registered_instruments(&self) -> usize { + self.inner + .lock() + .expect("lock poisoned") + .registered_instruments + .len() + } +} + +impl MeterProvider for AggregateMeterProvider { + fn versioned_meter( + &self, + name: impl Into>, + version: Option>>, + schema_url: Option>>, + attributes: Option>, + ) -> Meter { + let name = name.into(); + let version = version.map(|v| v.into()); + let schema_url = schema_url.map(|v| v.into()); + + let mut meters = Vec::new(); + let mut inner = self.inner.lock().expect("lock poisoned"); + for (provider, existing_meters) in inner.providers.values_mut() { + meters.push( + existing_meters + .entry(MeterId { + name: name.clone(), + version: version.clone(), + schema_url: schema_url.clone(), + }) + .or_insert_with(|| { + provider.versioned_meter_cow( + name.clone(), + version.clone(), + schema_url.clone(), + attributes.clone(), + ) + }) + .clone(), + ); + } + + Meter::new(Arc::new(AggregateInstrumentProvider { meters })) + } +} + +pub(crate) struct AggregateInstrumentProvider { + meters: Vec, +} + +pub(crate) struct AggregateCounter { + delegates: Vec>, +} + +impl SyncCounter for AggregateCounter { + fn add(&self, value: T, attributes: &[KeyValue]) { + for counter in &self.delegates { + counter.add(value, attributes) + } + } +} + +pub(crate) struct AggregateObservableCounter { + delegates: Vec>, +} + +impl AsyncInstrument for AggregateObservableCounter { + fn observe(&self, value: T, attributes: &[KeyValue]) { + for counter in &self.delegates { + counter.observe(value, attributes) + } + } + + fn as_any(&self) -> Arc { + unreachable!() + } +} + +pub(crate) struct AggregateHistogram { + delegates: Vec>, +} + +impl SyncHistogram for AggregateHistogram { + fn record(&self, value: T, attributes: &[KeyValue]) { + for histogram in &self.delegates { + histogram.record(value, attributes) + } + } +} + +pub(crate) struct AggregateUpDownCounter { + delegates: Vec>, +} + +impl SyncUpDownCounter for AggregateUpDownCounter { + fn add(&self, value: T, attributes: &[KeyValue]) { + for counter in &self.delegates { + counter.add(value, attributes) + } + } +} + +pub(crate) struct AggregateObservableUpDownCounter { + delegates: Vec>, +} + +impl AsyncInstrument for AggregateObservableUpDownCounter { + fn observe(&self, value: T, attributes: &[KeyValue]) { + for counter in &self.delegates { + counter.observe(value, attributes) + } + } + + fn as_any(&self) -> Arc { + unreachable!() + } +} + +pub(crate) struct AggregateObservableGauge { + delegates: Vec>, +} + +impl AsyncInstrument for AggregateObservableGauge { + fn observe(&self, measurement: T, attributes: &[KeyValue]) { + for gauge in &self.delegates { + gauge.observe(measurement, attributes) + } + } + + fn as_any(&self) -> Arc { + unreachable!() + } +} +macro_rules! aggregate_instrument_fn { + ($name:ident, $ty:ty, $wrapper:ident, $implementation:ident) => { + fn $name( + &self, + name: Cow<'static, str>, + description: Option>, + unit: Option, + ) -> opentelemetry::metrics::Result<$wrapper<$ty>> { + let delegates = self + .meters + .iter() + .map(|p| { + let mut b = p.$name(name.clone()); + if let Some(description) = &description { + b = b.with_description(description.clone()); + } + if let Some(unit) = &unit { + b = b.with_unit(unit.clone()); + } + b.try_init() + }) + .try_collect()?; + Ok($wrapper::new(Arc::new($implementation { delegates }))) + } + }; +} + +// Observable instruments don't need to have a ton of optimisation because they are only read on demand. +macro_rules! aggregate_observable_instrument_fn { + ($name:ident, $ty:ty, $wrapper:ident, $implementation:ident) => { + fn $name( + &self, + name: Cow<'static, str>, + description: Option>, + unit: Option, + callback: Vec>, + ) -> opentelemetry::metrics::Result<$wrapper<$ty>> { + let callback: Vec>> = + callback.into_iter().map(|c| Arc::new(c)).collect_vec(); + let delegates = self + .meters + .iter() + .map(|p| { + let mut b = p.$name(name.clone()); + if let Some(description) = &description { + b = b.with_description(description.clone()); + } + if let Some(unit) = &unit { + b = b.with_unit(unit.clone()); + } + for callback in &callback { + let callback = callback.clone(); + b = b.with_callback(move |c| (*callback)(c)); + } + b.try_init() + }) + .try_collect()?; + Ok($wrapper::new(Arc::new($implementation { delegates }))) + } + }; +} + +impl InstrumentProvider for AggregateInstrumentProvider { + aggregate_instrument_fn!(u64_counter, u64, Counter, AggregateCounter); + aggregate_instrument_fn!(f64_counter, f64, Counter, AggregateCounter); + + aggregate_observable_instrument_fn!( + f64_observable_counter, + f64, + ObservableCounter, + AggregateObservableCounter + ); + aggregate_observable_instrument_fn!( + u64_observable_counter, + u64, + ObservableCounter, + AggregateObservableCounter + ); + + aggregate_instrument_fn!(u64_histogram, u64, Histogram, AggregateHistogram); + aggregate_instrument_fn!(f64_histogram, f64, Histogram, AggregateHistogram); + aggregate_instrument_fn!(i64_histogram, i64, Histogram, AggregateHistogram); + + aggregate_instrument_fn!( + i64_up_down_counter, + i64, + UpDownCounter, + AggregateUpDownCounter + ); + aggregate_instrument_fn!( + f64_up_down_counter, + f64, + UpDownCounter, + AggregateUpDownCounter + ); + + aggregate_observable_instrument_fn!( + i64_observable_up_down_counter, + i64, + ObservableUpDownCounter, + AggregateObservableUpDownCounter + ); + aggregate_observable_instrument_fn!( + f64_observable_up_down_counter, + f64, + ObservableUpDownCounter, + AggregateObservableUpDownCounter + ); + + aggregate_observable_instrument_fn!( + f64_observable_gauge, + f64, + ObservableGauge, + AggregateObservableGauge + ); + aggregate_observable_instrument_fn!( + i64_observable_gauge, + i64, + ObservableGauge, + AggregateObservableGauge + ); + aggregate_observable_instrument_fn!( + u64_observable_gauge, + u64, + ObservableGauge, + AggregateObservableGauge + ); + + fn register_callback( + &self, + instruments: &[Arc], + callbacks: Box, + ) -> opentelemetry_api::metrics::Result> { + // The reason that this is OK is that calling observe outside of a callback is a no-op. + // So the callback is called, an observable is updated, but only the observable associated with the correct meter will take effect + + let callback = Arc::new(callbacks); + let mut callback_registrations = Vec::new(); + for meter in &self.meters { + let callback = callback.clone(); + // If this fails there is no recovery as some callbacks may be registered + callback_registrations.push(meter.register_callback(instruments, move |c| callback(c))?) + } + Ok(Box::new(AggregatedCallbackRegistrations( + callback_registrations, + ))) + } +} + +struct AggregatedCallbackRegistrations(Vec>); +impl CallbackRegistration for AggregatedCallbackRegistrations { + fn unregister(&mut self) -> opentelemetry_api::metrics::Result<()> { + let mut errors = vec![]; + for mut registration in mem::take(&mut self.0) { + if let Err(err) = registration.unregister() { + errors.push(err); + } + } + + if errors.is_empty() { + Ok(()) + } else { + Err(MetricsError::Other(format!("{errors:?}"))) + } + } +} diff --git a/apollo-router/src/metrics/filter.rs b/apollo-router/src/metrics/filter.rs new file mode 100644 index 0000000000..c490d42025 --- /dev/null +++ b/apollo-router/src/metrics/filter.rs @@ -0,0 +1,333 @@ +use std::any::Any; +use std::borrow::Cow; +use std::sync::Arc; + +use buildstructor::buildstructor; +use opentelemetry::metrics::noop::NoopMeterProvider; +use opentelemetry::metrics::Callback; +use opentelemetry::metrics::Counter; +use opentelemetry::metrics::Histogram; +use opentelemetry::metrics::InstrumentProvider; +use opentelemetry::metrics::Meter; +use opentelemetry::metrics::MeterProvider; +use opentelemetry::metrics::ObservableCounter; +use opentelemetry::metrics::ObservableGauge; +use opentelemetry::metrics::ObservableUpDownCounter; +use opentelemetry::metrics::Unit; +use opentelemetry::metrics::UpDownCounter; +use opentelemetry_api::metrics::CallbackRegistration; +use opentelemetry_api::metrics::Observer; +use opentelemetry_api::Context; +use opentelemetry_api::KeyValue; +use regex::Regex; + +#[derive(Clone)] +pub(crate) struct FilterMeterProvider { + delegate: opentelemetry::sdk::metrics::MeterProvider, + deny: Option, + allow: Option, +} + +#[buildstructor] +impl FilterMeterProvider { + #[builder] + fn new( + delegate: opentelemetry::sdk::metrics::MeterProvider, + deny: Option, + allow: Option, + ) -> Self { + FilterMeterProvider { + delegate, + deny, + allow, + } + } + + pub(crate) fn private_metrics(delegate: opentelemetry::sdk::metrics::MeterProvider) -> Self { + FilterMeterProvider::builder() + .delegate(delegate) + .allow( + Regex::new( + r"apollo\.(graphos\.cloud|router\.(operations?|config|schema|query))(\..*|$)", + ) + .expect("regex should have been valid"), + ) + .build() + } + + pub(crate) fn public_metrics(delegate: opentelemetry::sdk::metrics::MeterProvider) -> Self { + FilterMeterProvider::builder() + .delegate(delegate) + .deny( + Regex::new(r"apollo\.router\.(config|entities)(\..*|$)") + .expect("regex should have been valid"), + ) + .build() + } + + pub(crate) fn shutdown(&self) -> opentelemetry::metrics::Result<()> { + self.delegate.shutdown() + } + + #[allow(dead_code)] + pub(crate) fn force_flush(&self, cx: &Context) -> opentelemetry::metrics::Result<()> { + self.delegate.force_flush(cx) + } +} + +struct FilteredInstrumentProvider { + delegate: Meter, + noop: Meter, + deny: Option, + allow: Option, +} + +macro_rules! filter_instrument_fn { + ($name:ident, $ty:ty, $wrapper:ident) => { + fn $name( + &self, + name: Cow<'static, str>, + description: Option>, + unit: Option, + ) -> opentelemetry::metrics::Result<$wrapper<$ty>> { + let mut builder = match (&self.deny, &self.allow) { + (Some(deny), Some(allow)) if deny.is_match(&name) && !allow.is_match(&name) => { + self.noop.$name(name) + } + (Some(deny), None) if deny.is_match(&name) => self.noop.$name(name), + (None, Some(allow)) if !allow.is_match(&name) => self.noop.$name(name), + (_, _) => self.delegate.$name(name), + }; + if let Some(description) = &description { + builder = builder.with_description(description.clone()) + } + if let Some(unit) = &unit { + builder = builder.with_unit(unit.clone()); + } + builder.try_init() + } + }; +} + +macro_rules! filter_observable_instrument_fn { + ($name:ident, $ty:ty, $wrapper:ident) => { + fn $name( + &self, + name: Cow<'static, str>, + description: Option>, + unit: Option, + callback: Vec>, + ) -> opentelemetry::metrics::Result<$wrapper<$ty>> { + let mut builder = match (&self.deny, &self.allow) { + (Some(deny), Some(allow)) if deny.is_match(&name) && !allow.is_match(&name) => { + self.noop.$name(name) + } + (Some(deny), None) if deny.is_match(&name) => self.noop.$name(name), + (None, Some(allow)) if !allow.is_match(&name) => self.noop.$name(name), + (_, _) => self.delegate.$name(name), + }; + if let Some(description) = &description { + builder = builder.with_description(description.clone()); + } + if let Some(unit) = &unit { + builder = builder.with_unit(unit.clone()); + } + + for callback in callback { + builder = builder.with_callback(callback); + } + + builder.try_init() + } + }; +} + +impl InstrumentProvider for FilteredInstrumentProvider { + filter_instrument_fn!(u64_counter, u64, Counter); + filter_instrument_fn!(f64_counter, f64, Counter); + + filter_observable_instrument_fn!(f64_observable_counter, f64, ObservableCounter); + filter_observable_instrument_fn!(u64_observable_counter, u64, ObservableCounter); + + filter_instrument_fn!(u64_histogram, u64, Histogram); + filter_instrument_fn!(f64_histogram, f64, Histogram); + filter_instrument_fn!(i64_histogram, i64, Histogram); + + filter_instrument_fn!(i64_up_down_counter, i64, UpDownCounter); + filter_instrument_fn!(f64_up_down_counter, f64, UpDownCounter); + + filter_observable_instrument_fn!(i64_observable_up_down_counter, i64, ObservableUpDownCounter); + filter_observable_instrument_fn!(f64_observable_up_down_counter, f64, ObservableUpDownCounter); + + filter_observable_instrument_fn!(f64_observable_gauge, f64, ObservableGauge); + filter_observable_instrument_fn!(i64_observable_gauge, i64, ObservableGauge); + filter_observable_instrument_fn!(u64_observable_gauge, u64, ObservableGauge); + + fn register_callback( + &self, + instruments: &[Arc], + callbacks: Box, + ) -> opentelemetry_api::metrics::Result> { + self.delegate.register_callback(instruments, callbacks) + } +} + +impl MeterProvider for FilterMeterProvider { + fn versioned_meter( + &self, + name: impl Into>, + version: Option>>, + schema_url: Option>>, + attributes: Option>, + ) -> Meter { + let delegate = self + .delegate + .versioned_meter(name, version, schema_url, attributes); + Meter::new(Arc::new(FilteredInstrumentProvider { + noop: NoopMeterProvider::default().meter(""), + delegate, + deny: self.deny.clone(), + allow: self.allow.clone(), + })) + } +} + +#[cfg(test)] +mod test { + + use opentelemetry::metrics::MeterProvider; + use opentelemetry::metrics::Unit; + use opentelemetry::runtime; + use opentelemetry::sdk::metrics::MeterProviderBuilder; + use opentelemetry::sdk::metrics::PeriodicReader; + use opentelemetry::testing::metrics::InMemoryMetricsExporter; + use opentelemetry_api::Context; + + use crate::metrics::filter::FilterMeterProvider; + + #[tokio::test(flavor = "multi_thread")] + async fn test_private_metrics() { + let exporter = InMemoryMetricsExporter::default(); + let meter_provider = FilterMeterProvider::private_metrics( + MeterProviderBuilder::default() + .with_reader(PeriodicReader::builder(exporter.clone(), runtime::Tokio).build()) + .build(), + ); + let cx = Context::default(); + let filtered = meter_provider.versioned_meter("filtered", "".into(), "".into(), None); + filtered + .u64_counter("apollo.router.operations") + .init() + .add(1, &[]); + filtered + .u64_counter("apollo.router.operations.test") + .init() + .add(1, &[]); + filtered + .u64_counter("apollo.graphos.cloud.test") + .init() + .add(1, &[]); + filtered + .u64_counter("apollo.router.unknown.test") + .init() + .add(1, &[]); + meter_provider.force_flush(&cx).unwrap(); + + let metrics: Vec<_> = exporter + .get_finished_metrics() + .unwrap() + .into_iter() + .flat_map(|m| m.scope_metrics.into_iter()) + .flat_map(|m| m.metrics) + .collect(); + assert!(metrics + .iter() + .any(|m| m.name == "apollo.router.operations.test")); + + assert!(metrics.iter().any(|m| m.name == "apollo.router.operations")); + + assert!(metrics + .iter() + .any(|m| m.name == "apollo.graphos.cloud.test")); + + assert!(!metrics + .iter() + .any(|m| m.name == "apollo.router.unknown.test")); + } + + #[tokio::test(flavor = "multi_thread")] + async fn test_public_metrics() { + let exporter = InMemoryMetricsExporter::default(); + let meter_provider = FilterMeterProvider::public_metrics( + MeterProviderBuilder::default() + .with_reader(PeriodicReader::builder(exporter.clone(), runtime::Tokio).build()) + .build(), + ); + let cx = Context::default(); + let filtered = meter_provider.versioned_meter("filtered", "".into(), "".into(), None); + filtered + .u64_counter("apollo.router.config") + .init() + .add(1, &[]); + filtered + .u64_counter("apollo.router.config.test") + .init() + .add(1, &[]); + filtered + .u64_counter("apollo.router.entities") + .init() + .add(1, &[]); + filtered + .u64_counter("apollo.router.entities.test") + .init() + .add(1, &[]); + meter_provider.force_flush(&cx).unwrap(); + + let metrics: Vec<_> = exporter + .get_finished_metrics() + .unwrap() + .into_iter() + .flat_map(|m| m.scope_metrics.into_iter()) + .flat_map(|m| m.metrics) + .collect(); + + assert!(!metrics.iter().any(|m| m.name == "apollo.router.config")); + assert!(!metrics + .iter() + .any(|m| m.name == "apollo.router.config.test")); + assert!(!metrics.iter().any(|m| m.name == "apollo.router.entities")); + assert!(!metrics + .iter() + .any(|m| m.name == "apollo.router.entities.test")); + } + + #[tokio::test(flavor = "multi_thread")] + async fn test_description_and_unit() { + let exporter = InMemoryMetricsExporter::default(); + let meter_provider = FilterMeterProvider::private_metrics( + MeterProviderBuilder::default() + .with_reader(PeriodicReader::builder(exporter.clone(), runtime::Tokio).build()) + .build(), + ); + let cx = Context::default(); + let filtered = meter_provider.versioned_meter("filtered", "".into(), "".into(), None); + filtered + .u64_counter("apollo.router.operations") + .with_description("desc") + .with_unit(Unit::new("ms")) + .init() + .add(1, &[]); + meter_provider.force_flush(&cx).unwrap(); + + let metrics: Vec<_> = exporter + .get_finished_metrics() + .unwrap() + .into_iter() + .flat_map(|m| m.scope_metrics.into_iter()) + .flat_map(|m| m.metrics) + .collect(); + assert!(metrics.iter().any(|m| m.name == "apollo.router.operations" + && m.description == "desc" + && m.unit == Unit::new("ms"))); + } +} diff --git a/apollo-router/src/plugins/telemetry/metrics/layer.rs b/apollo-router/src/metrics/layer.rs similarity index 90% rename from apollo-router/src/plugins/telemetry/metrics/layer.rs rename to apollo-router/src/metrics/layer.rs index f80b5552f9..d044cc3c09 100644 --- a/apollo-router/src/plugins/telemetry/metrics/layer.rs +++ b/apollo-router/src/metrics/layer.rs @@ -1,5 +1,6 @@ use std::collections::HashMap; use std::fmt; +use std::sync::Arc; use std::sync::RwLock; use opentelemetry::metrics::Counter; @@ -8,7 +9,6 @@ use opentelemetry::metrics::Meter; use opentelemetry::metrics::MeterProvider; use opentelemetry::metrics::ObservableGauge; use opentelemetry::metrics::UpDownCounter; -use opentelemetry::Context as OtelContext; use opentelemetry::Key; use opentelemetry::KeyValue; use opentelemetry::Value; @@ -19,10 +19,12 @@ use tracing_subscriber::layer::Context; use tracing_subscriber::registry::LookupSpan; use tracing_subscriber::Layer; -use super::METRIC_PREFIX_COUNTER; -use super::METRIC_PREFIX_HISTOGRAM; -use super::METRIC_PREFIX_MONOTONIC_COUNTER; -use super::METRIC_PREFIX_VALUE; +use crate::metrics::aggregation::AggregateMeterProvider; + +pub(crate) const METRIC_PREFIX_MONOTONIC_COUNTER: &str = "monotonic_counter."; +pub(crate) const METRIC_PREFIX_COUNTER: &str = "counter."; +pub(crate) const METRIC_PREFIX_HISTOGRAM: &str = "histogram."; +pub(crate) const METRIC_PREFIX_VALUE: &str = "value."; macro_rules! log_and_panic_in_debug_build { ($($tokens:tt)+) => {{ @@ -61,7 +63,6 @@ pub(crate) enum InstrumentType { impl Instruments { pub(crate) fn update_metric( &self, - cx: &OtelContext, meter: &Meter, instrument_type: InstrumentType, metric_name: &'static str, @@ -97,7 +98,7 @@ impl Instruments { &self.u64_counter, metric_name, || meter.u64_counter(metric_name).init(), - |ctr| ctr.add(cx, value, custom_attributes), + |ctr| ctr.add(value, custom_attributes), ); } InstrumentType::CounterF64(value) => { @@ -105,7 +106,7 @@ impl Instruments { &self.f64_counter, metric_name, || meter.f64_counter(metric_name).init(), - |ctr| ctr.add(cx, value, custom_attributes), + |ctr| ctr.add(value, custom_attributes), ); } InstrumentType::UpDownCounterI64(value) => { @@ -113,7 +114,7 @@ impl Instruments { &self.i64_up_down_counter, metric_name, || meter.i64_up_down_counter(metric_name).init(), - |ctr| ctr.add(cx, value, custom_attributes), + |ctr| ctr.add(value, custom_attributes), ); } InstrumentType::UpDownCounterF64(value) => { @@ -121,7 +122,7 @@ impl Instruments { &self.f64_up_down_counter, metric_name, || meter.f64_up_down_counter(metric_name).init(), - |ctr| ctr.add(cx, value, custom_attributes), + |ctr| ctr.add(value, custom_attributes), ); } InstrumentType::HistogramU64(value) => { @@ -129,7 +130,7 @@ impl Instruments { &self.u64_histogram, metric_name, || meter.u64_histogram(metric_name).init(), - |rec| rec.record(cx, value, custom_attributes), + |rec| rec.record(value, custom_attributes), ); } InstrumentType::HistogramI64(value) => { @@ -137,7 +138,7 @@ impl Instruments { &self.i64_histogram, metric_name, || meter.i64_histogram(metric_name).init(), - |rec| rec.record(cx, value, custom_attributes), + |rec| rec.record(value, custom_attributes), ); } InstrumentType::HistogramF64(value) => { @@ -145,7 +146,7 @@ impl Instruments { &self.f64_histogram, metric_name, || meter.f64_histogram(metric_name).init(), - |rec| rec.record(cx, value, custom_attributes), + |rec| rec.record(value, custom_attributes), ); } InstrumentType::GaugeU64(value) => { @@ -153,7 +154,7 @@ impl Instruments { &self.u64_gauge, metric_name, || meter.u64_observable_gauge(metric_name).init(), - |gauge| gauge.observe(cx, value, custom_attributes), + |gauge| gauge.observe(value, custom_attributes), ); } }; @@ -161,10 +162,10 @@ impl Instruments { } pub(crate) struct MetricVisitor<'a> { + pub(crate) meter: &'a Meter, pub(crate) instruments: &'a Instruments, pub(crate) metric: Option<(&'static str, InstrumentType)>, pub(crate) custom_attributes: Vec, - pub(crate) meter: &'a Meter, attributes_ignored: bool, } @@ -413,9 +414,7 @@ impl<'a> Visit for MetricVisitor<'a> { impl<'a> MetricVisitor<'a> { fn finish(self) { if let Some((metric_name, instrument_type)) = self.metric { - let cx = OtelContext::current(); self.instruments.update_metric( - &cx, self.meter, instrument_type, metric_name, @@ -425,18 +424,36 @@ impl<'a> MetricVisitor<'a> { } } +#[derive(Clone)] pub(crate) struct MetricsLayer { + meter_provider: AggregateMeterProvider, + inner: Arc>, +} + +struct MetricsLayerInner { meter: Meter, instruments: Instruments, } impl MetricsLayer { - pub(crate) fn new(meter_provider: &impl MeterProvider) -> Self { + pub(crate) fn new(meter_provider: AggregateMeterProvider) -> Self { Self { + inner: Arc::new(RwLock::new(Self::new_inner(&meter_provider))), + meter_provider, + } + } + + fn new_inner(meter_provider: &AggregateMeterProvider) -> MetricsLayerInner { + MetricsLayerInner { meter: meter_provider.meter("apollo/router"), instruments: Default::default(), } } + /// Remove all the instruments from the metrics layer. These will be obtained again from the meter provider upon next use. + pub(crate) fn clear(&self) { + let mut inner = self.inner.write().expect("lock poisoned"); + *inner = Self::new_inner(&self.meter_provider); + } } impl Layer for MetricsLayer @@ -444,9 +461,10 @@ where S: Subscriber + for<'span> LookupSpan<'span>, { fn on_event(&self, event: &tracing::Event<'_>, _ctx: Context<'_, S>) { + let inner = self.inner.read().expect("lock poisoned"); let mut metric_visitor = MetricVisitor { - instruments: &self.instruments, - meter: &self.meter, + meter: &inner.meter, + instruments: &inner.instruments, metric: None, custom_attributes: Vec::new(), attributes_ignored: false, diff --git a/apollo-router/src/metrics/mod.rs b/apollo-router/src/metrics/mod.rs new file mode 100644 index 0000000000..07b3361a2f --- /dev/null +++ b/apollo-router/src/metrics/mod.rs @@ -0,0 +1,781 @@ +#[cfg(test)] +use std::future::Future; +#[cfg(test)] +use std::pin::Pin; +use std::sync::OnceLock; + +#[cfg(test)] +use futures::FutureExt; + +use crate::metrics::aggregation::AggregateMeterProvider; + +pub(crate) mod aggregation; +pub(crate) mod filter; +pub(crate) mod layer; + +// During tests this is a task local so that we can test metrics without having to worry about other tests interfering. + +#[cfg(test)] +mod test_utils { + use std::fmt::Debug; + use std::fmt::Display; + use std::sync::Arc; + use std::sync::OnceLock; + use std::sync::Weak; + + use itertools::Itertools; + use num_traits::NumCast; + use num_traits::ToPrimitive; + use opentelemetry::sdk::metrics::data::Gauge; + use opentelemetry::sdk::metrics::data::Histogram; + use opentelemetry::sdk::metrics::data::ResourceMetrics; + use opentelemetry::sdk::metrics::data::Sum; + use opentelemetry::sdk::metrics::data::Temporality; + use opentelemetry::sdk::metrics::reader::AggregationSelector; + use opentelemetry::sdk::metrics::reader::MetricProducer; + use opentelemetry::sdk::metrics::reader::MetricReader; + use opentelemetry::sdk::metrics::reader::TemporalitySelector; + use opentelemetry::sdk::metrics::Aggregation; + use opentelemetry::sdk::metrics::InstrumentKind; + use opentelemetry::sdk::metrics::ManualReader; + use opentelemetry::sdk::metrics::MeterProviderBuilder; + use opentelemetry::sdk::metrics::Pipeline; + use opentelemetry::sdk::AttributeSet; + use opentelemetry_api::Array; + use opentelemetry_api::Context; + use opentelemetry_api::KeyValue; + use opentelemetry_api::Value; + use tokio::task_local; + + use crate::metrics::aggregation::AggregateMeterProvider; + use crate::metrics::aggregation::MeterProviderType; + use crate::metrics::filter::FilterMeterProvider; + task_local! { + pub(crate) static AGGREGATE_METER_PROVIDER_ASYNC: OnceLock<(AggregateMeterProvider, ClonableManualReader)>; + } + thread_local! { + pub(crate) static AGGREGATE_METER_PROVIDER: OnceLock<(AggregateMeterProvider, ClonableManualReader)> = OnceLock::new(); + } + + #[derive(Debug, Clone, Default)] + pub(crate) struct ClonableManualReader { + reader: Arc, + } + + impl TemporalitySelector for ClonableManualReader { + fn temporality(&self, kind: InstrumentKind) -> Temporality { + self.reader.temporality(kind) + } + } + + impl AggregationSelector for ClonableManualReader { + fn aggregation(&self, kind: InstrumentKind) -> Aggregation { + self.reader.aggregation(kind) + } + } + impl MetricReader for ClonableManualReader { + fn register_pipeline(&self, pipeline: Weak) { + self.reader.register_pipeline(pipeline) + } + + fn register_producer(&self, producer: Box) { + self.reader.register_producer(producer) + } + + fn collect(&self, rm: &mut ResourceMetrics) -> opentelemetry_api::metrics::Result<()> { + self.reader.collect(rm) + } + + fn force_flush(&self, cx: &Context) -> opentelemetry_api::metrics::Result<()> { + self.reader.force_flush(cx) + } + + fn shutdown(&self) -> opentelemetry_api::metrics::Result<()> { + self.reader.shutdown() + } + } + + fn create_test_meter_provider() -> (AggregateMeterProvider, ClonableManualReader) { + { + let meter_provider = AggregateMeterProvider::default(); + let reader = ClonableManualReader::default(); + + meter_provider.set( + MeterProviderType::Public, + Some(FilterMeterProvider::public_metrics( + MeterProviderBuilder::default() + .with_reader(reader.clone()) + .build(), + )), + ); + + (meter_provider, reader) + } + } + #[cfg(test)] + pub(crate) fn meter_provider_and_readers() -> (AggregateMeterProvider, ClonableManualReader) { + if tokio::runtime::Handle::try_current().is_ok() { + if let Ok(task_local) = AGGREGATE_METER_PROVIDER_ASYNC + .try_with(|cell| cell.get_or_init(create_test_meter_provider).clone()) + { + task_local + } else { + // We need to silently fail here. Otherwise we fail every multi-threaded test that touches metrics + ( + AggregateMeterProvider::default(), + ClonableManualReader::default(), + ) + } + } else { + AGGREGATE_METER_PROVIDER + .with(|cell| cell.get_or_init(create_test_meter_provider).clone()) + } + } + + #[cfg(not(test))] + fn meter_provider_and_readers() -> (AggregateMeterProvider, ClonableManualReader) { + AGGREGATE_METER_PROVIDER.with(|cell| cell.get_or_init(create_test_meter_provider).clone()) + } + + pub(crate) struct Metrics { + resource_metrics: ResourceMetrics, + } + + impl Default for Metrics { + fn default() -> Self { + Metrics { + resource_metrics: ResourceMetrics { + resource: Default::default(), + scope_metrics: vec![], + }, + } + } + } + + pub(crate) fn collect_metrics() -> Metrics { + let mut metrics = Metrics::default(); + let (_, reader) = meter_provider_and_readers(); + reader.collect(&mut metrics.resource_metrics).unwrap(); + metrics + } + + impl Metrics { + pub(crate) fn find( + &self, + name: &str, + ) -> Option<&opentelemetry::sdk::metrics::data::Metric> { + self.resource_metrics + .scope_metrics + .iter() + .flat_map(|scope_metrics| { + scope_metrics + .metrics + .iter() + .filter(|metric| metric.name == name) + }) + .next() + } + + pub(crate) fn assert( + &self, + name: &str, + value: T, + attributes: &[KeyValue], + ) { + let attributes = AttributeSet::from(attributes); + if let Some(value) = value.to_u64() { + if self.metric_exists(name, value, &attributes) { + return; + } + } + + if let Some(value) = value.to_i64() { + if self.metric_exists(name, value, &attributes) { + return; + } + } + + if let Some(value) = value.to_f64() { + if self.metric_exists(name, value, &attributes) { + return; + } + } + + self.panic_metric_not_found(name, value, &attributes); + } + + fn panic_metric_not_found( + &self, + name: &str, + value: T, + attributes: &AttributeSet, + ) { + panic!( + "metric: {}, {}, {} not found.\nMetrics that were found:\n{}", + name, + value, + Self::pretty_attributes(attributes), + self.resource_metrics + .scope_metrics + .iter() + .flat_map(|scope_metrics| { scope_metrics.metrics.iter() }) + .flat_map(|metric| { Self::pretty_metric(metric) }) + .map(|metric| { format!(" {}", metric) }) + .join("\n") + ) + } + + fn pretty_metric(metric: &opentelemetry::sdk::metrics::data::Metric) -> Vec { + let mut results = Vec::new(); + results.append(&mut Self::pretty_data_point::(metric)); + results.append(&mut Self::pretty_data_point::(metric)); + results.append(&mut Self::pretty_data_point::(metric)); + results + } + + fn pretty_data_point( + metric: &opentelemetry::sdk::metrics::data::Metric, + ) -> Vec { + let mut results = Vec::new(); + if let Some(gauge) = metric.data.as_any().downcast_ref::>() { + for datapoint in gauge.data_points.iter() { + results.push(format!( + "\"{}\", {}, {}", + metric.name, + datapoint.value, + Self::pretty_attributes(&datapoint.attributes) + )); + } + } + if let Some(sum) = metric.data.as_any().downcast_ref::>() { + for datapoint in sum.data_points.iter() { + results.push(format!( + "\"{}\", {}, {}", + metric.name, + datapoint.value, + Self::pretty_attributes(&datapoint.attributes) + )); + } + } + if let Some(histogram) = metric.data.as_any().downcast_ref::>() { + for datapoint in histogram.data_points.iter() { + results.push(format!( + "\"{}\", {}, {}", + metric.name, + datapoint.sum, + Self::pretty_attributes(&datapoint.attributes) + )); + } + } + + results + } + + fn pretty_attributes(attributes: &AttributeSet) -> String { + attributes + .iter() + .map(|(key, value)| { + format!( + "\"{}\" => {}", + key.as_str(), + match value { + Value::Bool(v) => { + v.to_string() + } + Value::I64(v) => { + v.to_string() + } + Value::F64(v) => { + format!("{}f64", v) + } + Value::String(v) => { + format!("\"{}\"", v) + } + Value::Array(Array::Bool(v)) => { + format!("[{}]", v.iter().map(|v| v.to_string()).join(", ")) + } + Value::Array(Array::F64(v)) => { + format!("[{}]", v.iter().map(|v| format!("{}f64", v)).join(", ")) + } + Value::Array(Array::I64(v)) => { + format!("[{}]", v.iter().map(|v| v.to_string()).join(", ")) + } + Value::Array(Array::String(v)) => { + format!("[{}]", v.iter().map(|v| format!("\"{}\"", v)).join(", ")) + } + } + ) + }) + .join(", ") + } + + fn metric_exists( + &self, + name: &str, + value: T, + attributes: &AttributeSet, + ) -> bool { + if let Some(metric) = self.find(name) { + // Try to downcast the metric to each type of aggregation and assert that the value is correct. + if let Some(gauge) = metric.data.as_any().downcast_ref::>() { + // Find the datapoint with the correct attributes. + return gauge.data_points.iter().any(|datapoint| { + datapoint.attributes == *attributes && datapoint.value == value + }); + } else if let Some(sum) = metric.data.as_any().downcast_ref::>() { + return sum.data_points.iter().any(|datapoint| { + datapoint.attributes == *attributes && datapoint.value == value + }); + } else if let Some(histogram) = metric.data.as_any().downcast_ref::>() + { + if let Some(value) = value.to_u64() { + return histogram.data_points.iter().any(|datapoint| { + datapoint.attributes == *attributes && datapoint.count == value + }); + } + } + } + false + } + } +} +#[cfg(test)] +pub(crate) fn meter_provider() -> AggregateMeterProvider { + test_utils::meter_provider_and_readers().0 +} + +#[cfg(test)] +pub(crate) use test_utils::collect_metrics; + +#[cfg(not(test))] +static AGGREGATE_METER_PROVIDER: OnceLock = OnceLock::new(); +#[cfg(not(test))] +pub(crate) fn meter_provider() -> AggregateMeterProvider { + AGGREGATE_METER_PROVIDER + .get_or_init(Default::default) + .clone() +} + +#[macro_export] +/// Get or create a u64 monotonic counter metric and add a value to it +/// +/// This macro is a replacement for the telemetry crate's MetricsLayer. We will eventually convert all metrics to use these macros and deprecate the MetricsLayer. +/// The reason for this is that the MetricsLayer has: +/// * No support for dynamic attributes +/// * No support dynamic metrics. +/// * Imperfect mapping to metrics API that can only be checked at runtime. +/// New metrics should be added using these macros. +#[allow(unused_macros)] +macro_rules! u64_counter { + ($name:literal, $description:literal, $value: expr, $($attr_key:expr => $attr_value:expr),+) => { + let attributes = vec![$(opentelemetry::KeyValue::new($attr_key, $attr_value)),+]; + metric!(u64, counter, add, $name, $description, $value, &attributes); + }; + + ($name:literal, $description:literal, $value: expr, $attrs: expr) => { + metric!(u64, counter, add, $name, $description, $value, $attrs); + }; + + ($name:literal, $description:literal, $value: expr) => { + metric!(u64, counter, add, $name, $description, $value, &[]); + } +} + +/// Get or create a f64 monotonic counter metric and add a value to it +/// +/// This macro is a replacement for the telemetry crate's MetricsLayer. We will eventually convert all metrics to use these macros and deprecate the MetricsLayer. +/// The reason for this is that the MetricsLayer has: +/// * No support for dynamic attributes +/// * No support dynamic metrics. +/// * Imperfect mapping to metrics API that can only be checked at runtime. +/// New metrics should be added using these macros. +#[allow(unused_macros)] +macro_rules! f64_counter { + ($name:literal, $description:literal, $value: expr, $($attr_key:expr => $attr_value:expr),+) => { + let attributes = vec![$(opentelemetry::KeyValue::new($attr_key, $attr_value)),+]; + metric!(f64, counter, add, $name, $description, $value, &attributes); + }; + + ($name:literal, $description:literal, $value: expr, $attrs: expr) => { + metric!(f64, counter, add, $name, $description, $value, $attrs); + }; + + ($name:literal, $description:literal, $value: expr) => { + metric!(f64, counter, add, $name, $description, $value, &[]); + } +} + +/// Get or create an i64 up down counter metric and add a value to it +/// +/// This macro is a replacement for the telemetry crate's MetricsLayer. We will eventually convert all metrics to use these macros and deprecate the MetricsLayer. +/// The reason for this is that the MetricsLayer has: +/// * No support for dynamic attributes +/// * No support dynamic metrics. +/// * Imperfect mapping to metrics API that can only be checked at runtime. +/// New metrics should be added using these macros. + +#[allow(unused_macros)] +macro_rules! i64_up_down_counter { + ($name:literal, $description:literal, $value: expr, $($attr_key:expr => $attr_value:expr),+) => { + let attributes = vec![$(opentelemetry::KeyValue::new($attr_key, $attr_value)),+]; + metric!(i64, histogram, record, $name, $description, $value, &attributes); + }; + + ($name:literal, $description:literal, $value: expr, $attrs: expr) => { + metric!(i64, histogram, record, $name, $description, $value, $attrs); + }; + + ($name:literal, $description:literal, $value: expr) => { + metric!(i64, histogram, record, $name, $description, $value, &[]); + }; +} + +/// Get or create an f64 up down counter metric and add a value to it +/// +/// This macro is a replacement for the telemetry crate's MetricsLayer. We will eventually convert all metrics to use these macros and deprecate the MetricsLayer. +/// The reason for this is that the MetricsLayer has: +/// * No support for dynamic attributes +/// * No support dynamic metrics. +/// * Imperfect mapping to metrics API that can only be checked at runtime. +/// New metrics should be added using these macros. +#[allow(unused_macros)] +macro_rules! f64_up_down_counter { + ($name:literal, $description:literal, $value: expr, $($attr_key:expr => $attr_value:expr),+) => { + let attributes = vec![$(opentelemetry::KeyValue::new($attr_key, $attr_value)),+]; + metric!(f64, histogram, record, $name, $description, $value, &attributes); + }; + + ($name:literal, $description:literal, $value: expr, $attrs: expr) => { + metric!(f64, histogram, record, $name, $description, $value, $attrs); + }; + + ($name:literal, $description:literal, $value: expr) => { + metric!(f64, histogram, record, $name, $description, $value, &[]); + }; +} + +/// Get or create an f64 histogram metric and add a value to it +/// +/// This macro is a replacement for the telemetry crate's MetricsLayer. We will eventually convert all metrics to use these macros and deprecate the MetricsLayer. +/// The reason for this is that the MetricsLayer has: +/// * No support for dynamic attributes +/// * No support dynamic metrics. +/// * Imperfect mapping to metrics API that can only be checked at runtime. +/// New metrics should be added using these macros. +#[allow(unused_macros)] +macro_rules! f64_histogram { + ($name:literal, $description:literal, $value: expr, $($attr_key:expr => $attr_value:expr),+) => { + let attributes = vec![$(opentelemetry::KeyValue::new($attr_key, $attr_value)),+]; + metric!(f64, histogram, record, $name, $description, $value, &attributes); + }; + + ($name:literal, $description:literal, $value: expr, $attrs: expr) => { + metric!(f64, histogram, record, $name, $description, $value, $attrs); + }; + + ($name:literal, $description:literal, $value: expr) => { + metric!(f64, histogram, record, $name, $description, $value, &[]); + }; +} + +/// Get or create an u64 histogram metric and add a value to it +/// +/// This macro is a replacement for the telemetry crate's MetricsLayer. We will eventually convert all metrics to use these macros and deprecate the MetricsLayer. +/// The reason for this is that the MetricsLayer has: +/// * No support for dynamic attributes +/// * No support dynamic metrics. +/// * Imperfect mapping to metrics API that can only be checked at runtime. +/// New metrics should be added using these macros. +#[allow(unused_macros)] +macro_rules! u64_histogram { + ($name:literal, $description:literal, $value: expr, $($attr_key:expr => $attr_value:expr),+) => { + let attributes = vec![$(opentelemetry::KeyValue::new($attr_key, $attr_value)),+]; + metric!(u64, histogram, record, $name, $description, $value, &attributes); + }; + + ($name:literal, $description:literal, $value: expr, $attrs: expr) => { + metric!(u64, histogram, record, $name, $description, $value, $attrs); + }; + + ($name:literal, $description:literal, $value: expr) => { + metric!(u64, histogram, record, $name, $description, $value, &[]); + }; +} + +/// Get or create an i64 histogram metric and add a value to it +/// +/// This macro is a replacement for the telemetry crate's MetricsLayer. We will eventually convert all metrics to use these macros and deprecate the MetricsLayer. +/// The reason for this is that the MetricsLayer has: +/// * No support for dynamic attributes +/// * No support dynamic metrics. +/// * Imperfect mapping to metrics API that can only be checked at runtime. +/// New metrics should be added using these macros. +#[allow(unused_macros)] +macro_rules! i64_histogram { + ($name:literal, $description:literal, $value: expr, $($attr_key:expr => $attr_value:expr),+) => { + let attributes = vec![$(opentelemetry::KeyValue::new($attr_key, $attr_value)),+]; + metric!(i64, histogram, record, $name, $description, $value, &attributes); + }; + + ($name:literal, $description:literal, $value: expr, $attrs: expr) => { + metric!(i64, histogram, record, $name, $description, $value, $attrs); + }; + + ($name:literal, $description:literal, $value: expr) => { + metric!(i64, histogram, record, $name, $description, $value, &[]); + }; +} + +thread_local! { + // This is used exactly once in testing callsite caching. + #[cfg(test)] + pub(crate) static CACHE_CALLSITE: std::sync::atomic::AtomicBool = const {std::sync::atomic::AtomicBool::new(false)}; +} +macro_rules! metric { + ($ty:ident, $instrument:ident, $mutation:ident, $name:literal, $description:literal, $value: expr, $attrs: expr) => { + + // The way this works is that we have a static at each call site that holds a weak reference to the instrument. + // We make a call we try to upgrade the weak reference. If it succeeds we use the instrument. + // Otherwise we create a new instrument and update the static. + // The aggregate meter provider is used to hold on to references of all instruments that have been created and will clear references when the underlying configuration has changed. + // There is a Mutex involved, however it is only locked for the duration of the upgrade once the instrument has been created. + // The Reason a Mutex is used rather than an RwLock is that we are not holding the lock for any significant period of time and the cost of an RwLock is potentially higher. + // If we profile and deem it's worth switching to RwLock then we can do that. + + paste::paste! { + { + // There is a single test for caching callsites. Other tests do not cache because they will interfere with each other due to them using a task local meter provider to aid testing. + #[cfg(test)] + let cache_callsite = crate::metrics::CACHE_CALLSITE.with(|cell| cell.load(std::sync::atomic::Ordering::SeqCst)); + + // The compiler will optimize this in non test builds + #[cfg(not(test))] + let cache_callsite = true; + + if cache_callsite { + static INSTRUMENT_CACHE: std::sync::OnceLock]<$ty>>>> = std::sync::OnceLock::new(); + + let mut instrument_guard = INSTRUMENT_CACHE + .get_or_init(|| { + let meter_provider = crate::metrics::meter_provider(); + let meter = opentelemetry::metrics::MeterProvider::meter(&meter_provider, "apollo/router"); + let instrument = meter.[<$ty _ $instrument>]($name).with_description($description).init(); + let instrument_ref = meter_provider.register_instrument(instrument); + std::sync::Mutex::new(std::sync::Arc::downgrade(&instrument_ref)) + }) + .lock() + .expect("lock poisoned"); + let instrument = if let Some(instrument) = instrument_guard.upgrade() { + // Fast path, we got the instrument, drop the mutex guard immediately. + drop(instrument_guard); + instrument + } else { + // Slow path, we need to obtain the instrument again. + let meter_provider = crate::metrics::meter_provider(); + let meter = opentelemetry::metrics::MeterProvider::meter(&meter_provider, "apollo/router"); + let instrument = meter.[<$ty _ $instrument>]($name).with_description($description).init(); + let instrument_ref = meter_provider.register_instrument(instrument); + *instrument_guard = std::sync::Arc::downgrade(&instrument_ref); + // We've updated the instrument and got a strong reference to it. We can drop the mutex guard now. + drop(instrument_guard); + instrument_ref + }; + instrument.$mutation($value, &$attrs); + } + else { + let meter_provider = crate::metrics::meter_provider(); + let meter = opentelemetry::metrics::MeterProvider::meter(&meter_provider, "apollo/router"); + let instrument = meter.[<$ty _ $instrument>]($name).with_description($description).init(); + instrument.$mutation($value, &$attrs); + } + } + } + }; +} + +#[cfg(test)] +macro_rules! assert_metric { + ($name:literal, $value: expr, $($attr_key:expr => $attr_value:expr),+) => { + let attributes = vec![$(opentelemetry::KeyValue::new($attr_key, $attr_value)),+]; + crate::metrics::collect_metrics().assert($name, $value, &attributes); + }; + ($name:literal, $value: expr) => { + crate::metrics::collect_metrics().assert($name, $value, &[]); + }; +} + +#[cfg(test)] +pub(crate) type MetricFuture = Pin::Output> + Send>>; + +#[cfg(test)] +pub(crate) trait FutureMetricsExt { + fn with_metrics( + self, + ) -> tokio::task::futures::TaskLocalFuture< + OnceLock<(AggregateMeterProvider, test_utils::ClonableManualReader)>, + MetricFuture, + > + where + Self: Sized + Future + Send + 'static, + ::Output: Send + 'static, + { + test_utils::AGGREGATE_METER_PROVIDER_ASYNC.scope( + Default::default(), + async move { + let result = self.await; + let _ = tokio::task::spawn_blocking(|| { + meter_provider().shutdown(); + }) + .await; + result + } + .boxed(), + ) + } +} + +#[cfg(test)] +impl FutureMetricsExt for T where T: Future {} + +#[cfg(test)] +mod test { + use crate::metrics::aggregation::MeterProviderType; + use crate::metrics::meter_provider; + use crate::metrics::FutureMetricsExt; + + #[test] + fn test_non_async() { + // Each test is run in a separate thread, metrics are stored in a thread local. + u64_counter!("test", "test description", 1, "attr" => "val"); + assert_metric!("test", 1, "attr" => "val"); + } + + #[tokio::test(flavor = "multi_thread")] + async fn test_async_multi() { + // Multi-threaded runtime needs to use a tokio task local to avoid tests interfering with each other + async { + u64_counter!("test", "test description", 1, "attr" => "val"); + assert_metric!("test", 1, "attr" => "val"); + } + .with_metrics() + .await; + } + + #[tokio::test] + async fn test_async_single() { + async { + // It's a single threaded tokio runtime, so we can still use a thread local + u64_counter!("test", "test description", 1, "attr" => "val"); + assert_metric!("test", 1, "attr" => "val"); + } + .with_metrics() + .await; + } + + #[tokio::test] + async fn test_u64_counter() { + async { + u64_counter!("test", "test description", 1, "attr" => "val"); + assert_metric!("test", 1, "attr" => "val"); + } + .with_metrics() + .await; + } + + #[tokio::test] + async fn test_f64_counter() { + async { + f64_counter!("test", "test description", 1.5, "attr" => "val"); + assert_metric!("test", 1.5, "attr" => "val"); + } + .with_metrics() + .await; + } + + #[tokio::test] + async fn test_i64_up_down_counter() { + async { + i64_up_down_counter!("test", "test description", 1, "attr" => "val"); + assert_metric!("test", 1, "attr" => "val"); + } + .with_metrics() + .await; + } + + #[tokio::test] + async fn test_f64_up_down_counter() { + async { + f64_up_down_counter!("test", "test description", 1.5, "attr" => "val"); + assert_metric!("test", 1.5, "attr" => "val"); + } + .with_metrics() + .await; + } + + #[tokio::test] + async fn test_u64_histogram() { + async { + u64_histogram!("test", "test description", 1, "attr" => "val"); + assert_metric!("test", 1, "attr" => "val"); + } + .with_metrics() + .await; + } + + #[tokio::test] + async fn test_i64_histogram() { + async { + i64_histogram!("test", "test description", 1, "attr" => "val"); + assert_metric!("test", 1, "attr" => "val"); + } + .with_metrics() + .await; + } + + #[tokio::test] + async fn test_f64_histogram() { + async { + f64_histogram!("test", "test description", 1.0, "attr" => "val"); + assert_metric!("test", 1, "attr" => "val"); + } + .with_metrics() + .await; + } + + #[test] + fn test_callsite_caching() { + // Creating instruments may be slow due to multiple levels of locking that needs to happen through the various metrics layers. + // Callsite caching is implemented to prevent this happening on every call. + // See the metric macro above to see more information. + super::CACHE_CALLSITE.with(|cell| cell.store(true, std::sync::atomic::Ordering::SeqCst)); + fn test() { + // This is a single callsite so should only have one metric + u64_counter!("test", "test description", 1, "attr" => "val"); + } + + // Callsite hasn't been used yet, so there should be no metrics + assert_eq!(meter_provider().registered_instruments(), 0); + + // Call the metrics, it will be registered + test(); + assert_metric!("test", 1, "attr" => "val"); + assert_eq!(meter_provider().registered_instruments(), 1); + + // Call the metrics again, but the second call will not register a new metric because it will have be retrieved from the static + test(); + assert_metric!("test", 2, "attr" => "val"); + assert_eq!(meter_provider().registered_instruments(), 1); + + // Force invalidation of instruments + meter_provider().set(MeterProviderType::PublicPrometheus, None); + assert_eq!(meter_provider().registered_instruments(), 0); + + // Slow path + test(); + assert_eq!(meter_provider().registered_instruments(), 1); + + // Fast path + test(); + assert_eq!(meter_provider().registered_instruments(), 1); + } +} diff --git a/apollo-router/src/plugins/telemetry/formatters/mod.rs b/apollo-router/src/plugins/telemetry/formatters/mod.rs index 7237ddfa1a..e6bbd4c782 100644 --- a/apollo-router/src/plugins/telemetry/formatters/mod.rs +++ b/apollo-router/src/plugins/telemetry/formatters/mod.rs @@ -10,10 +10,10 @@ use tracing_subscriber::fmt::FormatEvent; use tracing_subscriber::fmt::FormatFields; use tracing_subscriber::registry::LookupSpan; -use super::metrics::METRIC_PREFIX_COUNTER; -use super::metrics::METRIC_PREFIX_HISTOGRAM; -use super::metrics::METRIC_PREFIX_MONOTONIC_COUNTER; -use super::metrics::METRIC_PREFIX_VALUE; +use crate::metrics::layer::METRIC_PREFIX_COUNTER; +use crate::metrics::layer::METRIC_PREFIX_HISTOGRAM; +use crate::metrics::layer::METRIC_PREFIX_MONOTONIC_COUNTER; +use crate::metrics::layer::METRIC_PREFIX_VALUE; pub(crate) const TRACE_ID_FIELD_NAME: &str = "trace_id"; diff --git a/apollo-router/src/plugins/telemetry/metrics/aggregation.rs b/apollo-router/src/plugins/telemetry/metrics/aggregation.rs deleted file mode 100644 index 10d321af87..0000000000 --- a/apollo-router/src/plugins/telemetry/metrics/aggregation.rs +++ /dev/null @@ -1,240 +0,0 @@ -use std::sync::Arc; - -use itertools::Itertools; -use opentelemetry::metrics::AsyncCounter; -use opentelemetry::metrics::AsyncGauge; -use opentelemetry::metrics::AsyncUpDownCounter; -use opentelemetry::metrics::Counter; -use opentelemetry::metrics::Histogram; -use opentelemetry::metrics::InstrumentProvider; -use opentelemetry::metrics::Meter; -use opentelemetry::metrics::MeterProvider; -use opentelemetry::metrics::ObservableCounter; -use opentelemetry::metrics::ObservableGauge; -use opentelemetry::metrics::ObservableUpDownCounter; -use opentelemetry::metrics::SyncCounter; -use opentelemetry::metrics::SyncHistogram; -use opentelemetry::metrics::SyncUpDownCounter; -use opentelemetry::metrics::Unit; -use opentelemetry::metrics::UpDownCounter; -use opentelemetry::Context; -use opentelemetry::InstrumentationLibrary; -use opentelemetry::KeyValue; - -#[derive(Clone, Default)] -pub(crate) struct AggregateMeterProvider { - providers: Vec>, -} -impl AggregateMeterProvider { - pub(crate) fn new( - providers: Vec>, - ) -> AggregateMeterProvider { - AggregateMeterProvider { providers } - } -} - -impl MeterProvider for AggregateMeterProvider { - fn versioned_meter( - &self, - name: &'static str, - version: Option<&'static str>, - schema_url: Option<&'static str>, - ) -> Meter { - Meter::new( - InstrumentationLibrary::new(name, version, schema_url), - Arc::new(AggregateInstrumentProvider { - meters: self - .providers - .iter() - .map(|p| p.versioned_meter(name, version, schema_url)) - .collect(), - }), - ) - } -} - -pub(crate) struct AggregateInstrumentProvider { - meters: Vec, -} - -pub(crate) struct AggregateCounter { - delegates: Vec>, -} - -impl SyncCounter for AggregateCounter { - fn add(&self, cx: &Context, value: T, attributes: &[KeyValue]) { - for counter in &self.delegates { - counter.add(cx, value, attributes) - } - } -} - -pub(crate) struct AggregateObservableCounter { - delegates: Vec>, -} - -impl AsyncCounter for AggregateObservableCounter { - fn observe(&self, cx: &Context, value: T, attributes: &[KeyValue]) { - for counter in &self.delegates { - counter.observe(cx, value, attributes) - } - } -} - -pub(crate) struct AggregateHistogram { - delegates: Vec>, -} - -impl SyncHistogram for AggregateHistogram { - fn record(&self, cx: &Context, value: T, attributes: &[KeyValue]) { - for histogram in &self.delegates { - histogram.record(cx, value, attributes) - } - } -} - -pub(crate) struct AggregateUpDownCounter { - delegates: Vec>, -} - -impl SyncUpDownCounter for AggregateUpDownCounter { - fn add(&self, cx: &Context, value: T, attributes: &[KeyValue]) { - for counter in &self.delegates { - counter.add(cx, value, attributes) - } - } -} - -pub(crate) struct AggregateObservableUpDownCounter { - delegates: Vec>, -} - -impl AsyncUpDownCounter for AggregateObservableUpDownCounter { - fn observe(&self, cx: &Context, value: T, attributes: &[KeyValue]) { - for counter in &self.delegates { - counter.observe(cx, value, attributes) - } - } -} - -pub(crate) struct AggregateObservableGauge { - delegates: Vec>, -} - -impl AsyncGauge for AggregateObservableGauge { - fn observe(&self, cx: &Context, value: T, attributes: &[KeyValue]) { - for gauge in &self.delegates { - gauge.observe(cx, value, attributes) - } - } -} - -macro_rules! aggregate_meter_fn { - ($name:ident, $ty:ty, $wrapper:ident, $implementation:ident) => { - fn $name( - &self, - name: String, - description: Option, - unit: Option, - ) -> opentelemetry::metrics::Result<$wrapper<$ty>> { - let delegates = self - .meters - .iter() - .map(|p| { - let mut b = p.$name(name.clone()); - if let Some(description) = &description { - b = b.with_description(description); - } - if let Some(unit) = &unit { - b = b.with_unit(unit.clone()); - } - b.try_init() - }) - .try_collect()?; - Ok($wrapper::new(Arc::new($implementation { delegates }))) - } - }; -} - -impl InstrumentProvider for AggregateInstrumentProvider { - aggregate_meter_fn!(u64_counter, u64, Counter, AggregateCounter); - aggregate_meter_fn!(f64_counter, f64, Counter, AggregateCounter); - - aggregate_meter_fn!( - f64_observable_counter, - f64, - ObservableCounter, - AggregateObservableCounter - ); - aggregate_meter_fn!( - u64_observable_counter, - u64, - ObservableCounter, - AggregateObservableCounter - ); - - aggregate_meter_fn!(u64_histogram, u64, Histogram, AggregateHistogram); - aggregate_meter_fn!(f64_histogram, f64, Histogram, AggregateHistogram); - aggregate_meter_fn!(i64_histogram, i64, Histogram, AggregateHistogram); - - aggregate_meter_fn!( - i64_up_down_counter, - i64, - UpDownCounter, - AggregateUpDownCounter - ); - aggregate_meter_fn!( - f64_up_down_counter, - f64, - UpDownCounter, - AggregateUpDownCounter - ); - - aggregate_meter_fn!( - i64_observable_up_down_counter, - i64, - ObservableUpDownCounter, - AggregateObservableUpDownCounter - ); - aggregate_meter_fn!( - f64_observable_up_down_counter, - f64, - ObservableUpDownCounter, - AggregateObservableUpDownCounter - ); - - aggregate_meter_fn!( - f64_observable_gauge, - f64, - ObservableGauge, - AggregateObservableGauge - ); - aggregate_meter_fn!( - i64_observable_gauge, - i64, - ObservableGauge, - AggregateObservableGauge - ); - aggregate_meter_fn!( - u64_observable_gauge, - u64, - ObservableGauge, - AggregateObservableGauge - ); - - fn register_callback( - &self, - callback: Box, - ) -> opentelemetry::metrics::Result<()> { - // The reason that this is OK is that calling observe outside of a callback is a no-op. - // So the callback is called, an observable is updated, but only the observable associated with the correct meter will take effect - - let callback = Arc::new(callback); - for meter in &self.meters { - let callback = callback.clone(); - // If this fails there is no recovery as some callbacks may be registered - meter.register_callback(move |c| callback(c))? - } - Ok(()) - } -} diff --git a/apollo-router/src/plugins/telemetry/metrics/apollo.rs b/apollo-router/src/plugins/telemetry/metrics/apollo.rs index 6424705c24..59ee3f4735 100644 --- a/apollo-router/src/plugins/telemetry/metrics/apollo.rs +++ b/apollo-router/src/plugins/telemetry/metrics/apollo.rs @@ -4,10 +4,11 @@ use std::sync::atomic::Ordering; use std::sync::OnceLock; use std::time::Duration; -use opentelemetry::sdk::export::metrics::aggregation; -use opentelemetry::sdk::metrics::selectors; +use opentelemetry::runtime; +use opentelemetry::sdk::metrics::PeriodicReader; use opentelemetry::sdk::Resource; -use opentelemetry::KeyValue; +use opentelemetry_api::KeyValue; +use opentelemetry_otlp::MetricsExporterBuilder; use opentelemetry_otlp::WithExportConfig; use sys_info::hostname; use tonic::metadata::MetadataMap; @@ -19,9 +20,10 @@ use crate::plugins::telemetry::apollo::Config; use crate::plugins::telemetry::apollo_exporter::get_uname; use crate::plugins::telemetry::apollo_exporter::ApolloExporter; use crate::plugins::telemetry::config::MetricsCommon; -use crate::plugins::telemetry::metrics::filter::FilterMeterProvider; +use crate::plugins::telemetry::metrics::CustomAggregationSelector; use crate::plugins::telemetry::metrics::MetricsBuilder; use crate::plugins::telemetry::metrics::MetricsConfigurator; +use crate::plugins::telemetry::otlp::CustomTemporalitySelector; use crate::plugins::telemetry::tracing::BatchProcessorConfig; mod duration_histogram; @@ -102,13 +104,31 @@ impl Config { tracing::debug!(endpoint = %endpoint, "creating Apollo OTLP metrics exporter"); let mut metadata = MetadataMap::new(); metadata.insert("apollo.api.key", key.parse()?); + let exporter = MetricsExporterBuilder::Tonic( + opentelemetry_otlp::new_exporter() + .tonic() + .with_endpoint(endpoint.as_str()) + .with_timeout(batch_processor.max_export_timeout) + .with_metadata(metadata) + .with_compression(opentelemetry_otlp::Compression::Gzip), + ) + .build_metrics_exporter( + Box::new(CustomTemporalitySelector( + opentelemetry::sdk::metrics::data::Temporality::Delta, + )), + Box::new( + CustomAggregationSelector::builder() + .boundaries(default_buckets()) + .build(), + ), + )?; + let reader = PeriodicReader::builder(exporter, runtime::Tokio) + .with_interval(Duration::from_secs(60)) + .build(); - let exporter = opentelemetry_otlp::new_pipeline() - .metrics( - selectors::simple::histogram(default_buckets()), - aggregation::delta_temporality_selector(), - opentelemetry::runtime::Tokio, - ) + builder.apollo_meter_provider_builder = builder + .apollo_meter_provider_builder + .with_reader(reader) .with_resource(Resource::new([ KeyValue::new( "apollo.router.id", @@ -126,24 +146,12 @@ impl Config { ), KeyValue::new("apollo.client.host", hostname()?), KeyValue::new("apollo.client.uname", get_uname()?), - ])) - .with_period(Duration::from_secs(60)) - .with_exporter( - opentelemetry_otlp::new_exporter() - .tonic() - .with_endpoint(endpoint.as_str()) - .with_timeout(batch_processor.max_export_timeout) - .with_metadata(metadata), - ) - .build()?; - builder = - builder.with_meter_provider(FilterMeterProvider::apollo_metrics(exporter.clone())); - builder = builder.with_exporter(exporter); + ])); Ok(builder) } fn configure_apollo_metrics( - builder: MetricsBuilder, + mut builder: MetricsBuilder, endpoint: &Url, key: &str, reference: &str, @@ -155,7 +163,8 @@ impl Config { let exporter = ApolloExporter::new(endpoint, batch_processor_config, key, reference, schema_id)?; - Ok(builder.with_apollo_metrics_collector(exporter.start())) + builder.apollo_metrics_sender = exporter.start(); + Ok(builder) } } diff --git a/apollo-router/src/plugins/telemetry/metrics/filter.rs b/apollo-router/src/plugins/telemetry/metrics/filter.rs index 929a8a9d17..8b13789179 100644 --- a/apollo-router/src/plugins/telemetry/metrics/filter.rs +++ b/apollo-router/src/plugins/telemetry/metrics/filter.rs @@ -1,301 +1 @@ -use std::sync::Arc; -use buildstructor::buildstructor; -use opentelemetry::metrics::noop::NoopMeterProvider; -use opentelemetry::metrics::Counter; -use opentelemetry::metrics::Histogram; -use opentelemetry::metrics::InstrumentProvider; -use opentelemetry::metrics::Meter; -use opentelemetry::metrics::MeterProvider; -use opentelemetry::metrics::ObservableCounter; -use opentelemetry::metrics::ObservableGauge; -use opentelemetry::metrics::ObservableUpDownCounter; -use opentelemetry::metrics::Unit; -use opentelemetry::metrics::UpDownCounter; -use opentelemetry::Context; -use opentelemetry::InstrumentationLibrary; -use regex::Regex; - -pub(crate) struct FilterMeterProvider { - delegate: T, - deny: Option, - allow: Option, -} - -#[buildstructor] -impl FilterMeterProvider { - #[builder] - fn new(delegate: T, deny: Option, allow: Option) -> Self { - FilterMeterProvider { - delegate, - deny, - allow, - } - } - - pub(crate) fn apollo_metrics(delegate: T) -> Self { - FilterMeterProvider::builder() - .delegate(delegate) - .allow( - Regex::new( - r"apollo\.(graphos\.cloud|router\.(operations?|config|schema|query))(\..*|$)", - ) - .expect("regex should have been valid"), - ) - .build() - } - - pub(crate) fn public_metrics(delegate: T) -> Self { - FilterMeterProvider::builder() - .delegate(delegate) - .deny( - Regex::new(r"apollo\.router\.(config|entities)(\..*|$)") - .expect("regex should have been valid"), - ) - .build() - } -} - -struct FilteredInstrumentProvider { - noop: Meter, - delegate: Meter, - deny: Option, - allow: Option, -} -macro_rules! filter_meter_fn { - ($name:ident, $ty:ty, $wrapper:ident) => { - fn $name( - &self, - name: String, - description: Option, - unit: Option, - ) -> opentelemetry::metrics::Result<$wrapper<$ty>> { - let mut builder = match (&self.deny, &self.allow) { - (Some(deny), Some(allow)) if deny.is_match(&name) && !allow.is_match(&name) => { - self.noop.$name(name) - } - (Some(deny), None) if deny.is_match(&name) => self.noop.$name(name), - (None, Some(allow)) if !allow.is_match(&name) => self.noop.$name(name), - (_, _) => self.delegate.$name(name), - }; - if let Some(description) = &description { - builder = builder.with_description(description); - } - if let Some(unit) = &unit { - builder = builder.with_unit(unit.clone()); - } - builder.try_init() - } - }; -} - -impl InstrumentProvider for FilteredInstrumentProvider { - filter_meter_fn!(u64_counter, u64, Counter); - filter_meter_fn!(f64_counter, f64, Counter); - - filter_meter_fn!(f64_observable_counter, f64, ObservableCounter); - filter_meter_fn!(u64_observable_counter, u64, ObservableCounter); - - filter_meter_fn!(u64_histogram, u64, Histogram); - filter_meter_fn!(f64_histogram, f64, Histogram); - filter_meter_fn!(i64_histogram, i64, Histogram); - - filter_meter_fn!(i64_up_down_counter, i64, UpDownCounter); - filter_meter_fn!(f64_up_down_counter, f64, UpDownCounter); - - filter_meter_fn!(i64_observable_up_down_counter, i64, ObservableUpDownCounter); - filter_meter_fn!(f64_observable_up_down_counter, f64, ObservableUpDownCounter); - - filter_meter_fn!(f64_observable_gauge, f64, ObservableGauge); - filter_meter_fn!(i64_observable_gauge, i64, ObservableGauge); - filter_meter_fn!(u64_observable_gauge, u64, ObservableGauge); - - fn register_callback( - &self, - callback: Box, - ) -> opentelemetry::metrics::Result<()> { - self.delegate.register_callback(callback) - } -} - -impl MeterProvider for FilterMeterProvider { - fn versioned_meter( - &self, - name: &'static str, - version: Option<&'static str>, - schema_url: Option<&'static str>, - ) -> Meter { - let delegate = self.delegate.versioned_meter(name, version, schema_url); - Meter::new( - InstrumentationLibrary::new(name, version, schema_url), - Arc::new(FilteredInstrumentProvider { - noop: NoopMeterProvider::new().versioned_meter(name, version, schema_url), - delegate, - deny: self.deny.clone(), - allow: self.allow.clone(), - }), - ) - } -} - -#[cfg(test)] -mod test { - use std::collections::HashSet; - use std::sync::atomic::AtomicU64; - use std::sync::atomic::Ordering; - use std::sync::Arc; - use std::sync::Mutex; - - use opentelemetry::metrics::noop; - use opentelemetry::metrics::Counter; - use opentelemetry::metrics::InstrumentProvider; - use opentelemetry::metrics::Meter; - use opentelemetry::metrics::MeterProvider; - use opentelemetry::metrics::Unit; - use opentelemetry::Context; - use opentelemetry::InstrumentationLibrary; - - use crate::plugins::telemetry::metrics::filter::FilterMeterProvider; - - #[derive(Default, Clone)] - struct MockInstrumentProvider { - #[allow(clippy::type_complexity)] - counters_created: Arc, Option)>>>, - callbacks_registered: Arc, - } - - impl InstrumentProvider for MockInstrumentProvider { - // We're only going to bother with testing counters and callbacks because the code is implemented as a macro and if it's right for counters it's right for everything else. - fn u64_counter( - &self, - name: String, - description: Option, - unit: Option, - ) -> opentelemetry::metrics::Result> { - self.counters_created - .lock() - .expect("lock should not be poisoned") - .insert((name, description, unit)); - Ok(Counter::new(Arc::new(noop::NoopSyncInstrument::new()))) - } - - fn register_callback( - &self, - _callback: Box, - ) -> opentelemetry::metrics::Result<()> { - self.callbacks_registered.fetch_add(1, Ordering::SeqCst); - Ok(()) - } - } - - #[derive(Default, Clone)] - struct MockMeterProvider { - instrument_provider: Arc, - } - - impl MeterProvider for MockMeterProvider { - fn versioned_meter( - &self, - name: &'static str, - version: Option<&'static str>, - schema_url: Option<&'static str>, - ) -> Meter { - Meter::new( - InstrumentationLibrary::new(name, version, schema_url), - self.instrument_provider.clone(), - ) - } - } - - #[test] - fn test_apollo_metrics() { - let delegate = MockMeterProvider::default(); - let filtered = FilterMeterProvider::apollo_metrics(delegate.clone()) - .versioned_meter("filtered", None, None); - filtered.u64_counter("apollo.router.operations").init(); - filtered.u64_counter("apollo.router.operations.test").init(); - filtered.u64_counter("apollo.graphos.cloud.test").init(); - filtered.u64_counter("apollo.router.unknown.test").init(); - assert!(delegate - .instrument_provider - .counters_created - .lock() - .unwrap() - .contains(&("apollo.router.operations.test".to_string(), None, None))); - assert!(delegate - .instrument_provider - .counters_created - .lock() - .unwrap() - .contains(&("apollo.router.operations".to_string(), None, None))); - assert!(delegate - .instrument_provider - .counters_created - .lock() - .unwrap() - .contains(&("apollo.graphos.cloud.test".to_string(), None, None))); - assert!(!delegate - .instrument_provider - .counters_created - .lock() - .unwrap() - .contains(&("apollo.router.unknown.test".to_string(), None, None))); - } - - #[test] - fn test_public_metrics() { - let delegate = MockMeterProvider::default(); - let filtered = FilterMeterProvider::public_metrics(delegate.clone()) - .versioned_meter("filtered", None, None); - filtered.u64_counter("apollo.router.config").init(); - filtered.u64_counter("apollo.router.config.test").init(); - filtered.u64_counter("apollo.router.entities").init(); - filtered.u64_counter("apollo.router.entities.test").init(); - assert!(!delegate - .instrument_provider - .counters_created - .lock() - .unwrap() - .contains(&("apollo.router.config".to_string(), None, None))); - assert!(!delegate - .instrument_provider - .counters_created - .lock() - .unwrap() - .contains(&("apollo.router.config.test".to_string(), None, None))); - assert!(!delegate - .instrument_provider - .counters_created - .lock() - .unwrap() - .contains(&("apollo.router.entities".to_string(), None, None))); - assert!(!delegate - .instrument_provider - .counters_created - .lock() - .unwrap() - .contains(&("apollo.router.entities.test".to_string(), None, None))); - } - - #[test] - fn test_description_and_unit() { - let delegate = MockMeterProvider::default(); - let filtered = FilterMeterProvider::apollo_metrics(delegate.clone()) - .versioned_meter("filtered", None, None); - filtered - .u64_counter("apollo.router.operations") - .with_description("desc") - .with_unit(Unit::new("ms")) - .init(); - assert!(delegate - .instrument_provider - .counters_created - .lock() - .unwrap() - .contains(&( - "apollo.router.operations".to_string(), - Some("desc".to_string()), - Some(Unit::new("ms")) - ))); - } -} diff --git a/apollo-router/src/plugins/telemetry/metrics/mod.rs b/apollo-router/src/plugins/telemetry/metrics/mod.rs index 08a4f6637d..a9556795a4 100644 --- a/apollo-router/src/plugins/telemetry/metrics/mod.rs +++ b/apollo-router/src/plugins/telemetry/metrics/mod.rs @@ -1,6 +1,4 @@ -use std::any::Any; use std::collections::HashMap; -use std::sync::Arc; use ::serde::Deserialize; use access_json::JSONQuery; @@ -8,9 +6,9 @@ use http::header::HeaderName; use http::response::Parts; use http::HeaderMap; use multimap::MultiMap; -use opentelemetry::metrics::Counter; -use opentelemetry::metrics::Histogram; -use opentelemetry::metrics::MeterProvider; +use opentelemetry::sdk::metrics::reader::AggregationSelector; +use opentelemetry::sdk::metrics::Aggregation; +use opentelemetry::sdk::metrics::InstrumentKind; use regex::Regex; use schemars::JsonSchema; use serde::Serialize; @@ -25,26 +23,15 @@ use crate::plugin::serde::deserialize_regex; use crate::plugins::telemetry::apollo_exporter::Sender; use crate::plugins::telemetry::config::AttributeValue; use crate::plugins::telemetry::config::MetricsCommon; -use crate::plugins::telemetry::metrics::aggregation::AggregateMeterProvider; use crate::router_factory::Endpoint; use crate::Context; use crate::ListenAddr; -pub(crate) mod aggregation; pub(crate) mod apollo; -pub(crate) mod filter; -pub(crate) mod layer; pub(crate) mod otlp; pub(crate) mod prometheus; pub(crate) mod span_metrics_exporter; -pub(crate) const METRIC_PREFIX_MONOTONIC_COUNTER: &str = "monotonic_counter."; -pub(crate) const METRIC_PREFIX_COUNTER: &str = "counter."; -pub(crate) const METRIC_PREFIX_HISTOGRAM: &str = "histogram."; -pub(crate) const METRIC_PREFIX_VALUE: &str = "value."; - -pub(crate) type MetricsExporterHandle = Box; - #[derive(Debug, Clone, Deserialize, JsonSchema)] #[serde(deny_unknown_fields)] /// Configuration to add custom attributes/labels on metrics @@ -489,51 +476,11 @@ impl AttributesForwardConf { #[derive(Default)] pub(crate) struct MetricsBuilder { - exporters: Vec, - meter_providers: Vec>, - custom_endpoints: MultiMap, - apollo_metrics: Sender, -} - -impl MetricsBuilder { - pub(crate) fn exporters(&mut self) -> Vec { - std::mem::take(&mut self.exporters) - } - pub(crate) fn meter_provider(&mut self) -> AggregateMeterProvider { - AggregateMeterProvider::new(std::mem::take(&mut self.meter_providers)) - } - pub(crate) fn custom_endpoints(&mut self) -> MultiMap { - std::mem::take(&mut self.custom_endpoints) - } - - pub(crate) fn apollo_metrics_provider(&mut self) -> Sender { - self.apollo_metrics.clone() - } -} - -impl MetricsBuilder { - fn with_exporter(mut self, handle: T) -> Self { - self.exporters.push(Box::new(handle)); - self - } - - fn with_meter_provider( - mut self, - meter_provider: T, - ) -> Self { - self.meter_providers.push(Arc::new(meter_provider)); - self - } - - fn with_custom_endpoint(mut self, listen_addr: ListenAddr, endpoint: Endpoint) -> Self { - self.custom_endpoints.insert(listen_addr, endpoint); - self - } - - fn with_apollo_metrics_collector(mut self, apollo_metrics: Sender) -> Self { - self.apollo_metrics = apollo_metrics; - self - } + pub(crate) public_meter_provider_builder: opentelemetry::sdk::metrics::MeterProviderBuilder, + pub(crate) apollo_meter_provider_builder: opentelemetry::sdk::metrics::MeterProviderBuilder, + pub(crate) prometheus_meter_provider: Option, + pub(crate) custom_endpoints: MultiMap, + pub(crate) apollo_metrics_sender: Sender, } pub(crate) trait MetricsConfigurator { @@ -544,24 +491,38 @@ pub(crate) trait MetricsConfigurator { ) -> Result; } -#[derive(Clone)] -pub(crate) struct BasicMetrics { - pub(crate) http_requests_total: Counter, - pub(crate) http_requests_duration: Histogram, +#[derive(Clone, Default, Debug)] +pub(crate) struct CustomAggregationSelector { + boundaries: Vec, + record_min_max: bool, +} + +#[buildstructor::buildstructor] +impl CustomAggregationSelector { + #[builder] + pub(crate) fn new( + boundaries: Vec, + record_min_max: Option, + ) -> CustomAggregationSelector { + Self { + boundaries, + record_min_max: record_min_max.unwrap_or(true), + } + } } -impl BasicMetrics { - pub(crate) fn new(meter_provider: &impl MeterProvider) -> BasicMetrics { - let meter = meter_provider.meter("apollo/router"); - BasicMetrics { - http_requests_total: meter - .u64_counter("apollo_router_http_requests_total") - .with_description("Total number of HTTP requests made.") - .init(), - http_requests_duration: meter - .f64_histogram("apollo_router_http_request_duration_seconds") - .with_description("Duration of HTTP requests.") - .init(), +impl AggregationSelector for CustomAggregationSelector { + fn aggregation(&self, kind: InstrumentKind) -> Aggregation { + match kind { + InstrumentKind::Counter + | InstrumentKind::UpDownCounter + | InstrumentKind::ObservableCounter + | InstrumentKind::ObservableUpDownCounter => Aggregation::Sum, + InstrumentKind::ObservableGauge => Aggregation::LastValue, + InstrumentKind::Histogram => Aggregation::ExplicitBucketHistogram { + boundaries: self.boundaries.clone(), + record_min_max: self.record_min_max, + }, } } } diff --git a/apollo-router/src/plugins/telemetry/metrics/otlp.rs b/apollo-router/src/plugins/telemetry/metrics/otlp.rs index cbbdb06514..d38d91ad0e 100644 --- a/apollo-router/src/plugins/telemetry/metrics/otlp.rs +++ b/apollo-router/src/plugins/telemetry/metrics/otlp.rs @@ -1,16 +1,14 @@ -use opentelemetry::sdk::export::metrics::aggregation; -use opentelemetry::sdk::metrics::selectors; -use opentelemetry::sdk::Resource; -use opentelemetry::KeyValue; +use opentelemetry::runtime; +use opentelemetry::sdk::metrics::PeriodicReader; use opentelemetry_otlp::HttpExporterBuilder; +use opentelemetry_otlp::MetricsExporterBuilder; use opentelemetry_otlp::TonicExporterBuilder; use tower::BoxError; use crate::plugins::telemetry::config::MetricsCommon; -use crate::plugins::telemetry::metrics::filter::FilterMeterProvider; +use crate::plugins::telemetry::metrics::CustomAggregationSelector; use crate::plugins::telemetry::metrics::MetricsBuilder; use crate::plugins::telemetry::metrics::MetricsConfigurator; -use crate::plugins::telemetry::otlp::Temporality; // TODO Remove MetricExporterBuilder once upstream issue is fixed // This has to exist because Http is not currently supported for metrics export @@ -43,41 +41,22 @@ impl MetricsConfigurator for super::super::otlp::Config { match exporter.exporter { Some(exporter) => { - let exporter = match self.temporality { - Temporality::Cumulative => opentelemetry_otlp::new_pipeline() - .metrics( - selectors::simple::histogram(metrics_config.buckets.clone()), - aggregation::stateless_temporality_selector(), - opentelemetry::runtime::Tokio, - ) - .with_exporter(exporter) - .with_resource(Resource::new( - metrics_config - .resources - .clone() - .into_iter() - .map(|(k, v)| KeyValue::new(k, v)), - )) - .build()?, - Temporality::Delta => opentelemetry_otlp::new_pipeline() - .metrics( - selectors::simple::histogram(metrics_config.buckets.clone()), - aggregation::delta_temporality_selector(), - opentelemetry::runtime::Tokio, - ) - .with_exporter(exporter) - .with_resource(Resource::new( - metrics_config - .resources - .clone() - .into_iter() - .map(|(k, v)| KeyValue::new(k, v)), - )) - .build()?, - }; - builder = builder - .with_meter_provider(FilterMeterProvider::public_metrics(exporter.clone())); - builder = builder.with_exporter(exporter); + let exporter = MetricsExporterBuilder::Tonic(exporter).build_metrics_exporter( + (&self.temporality).into(), + Box::new( + CustomAggregationSelector::builder() + .boundaries(metrics_config.buckets.clone()) + .build(), + ), + )?; + + builder.public_meter_provider_builder = + builder.public_meter_provider_builder.with_reader( + PeriodicReader::builder(exporter, runtime::Tokio) + .with_interval(self.batch_processor.scheduled_delay) + .with_timeout(self.batch_processor.max_export_timeout) + .build(), + ); Ok(builder) } None => Err("otlp metric export does not support http yet".into()), diff --git a/apollo-router/src/plugins/telemetry/metrics/prometheus.rs b/apollo-router/src/plugins/telemetry/metrics/prometheus.rs index d5c8a4b2c4..c3169ebb4c 100644 --- a/apollo-router/src/plugins/telemetry/metrics/prometheus.rs +++ b/apollo-router/src/plugins/telemetry/metrics/prometheus.rs @@ -5,13 +5,10 @@ use std::task::Poll; use futures::future::BoxFuture; use http::StatusCode; use once_cell::sync::Lazy; -use opentelemetry::sdk::export::metrics::aggregation; -use opentelemetry::sdk::metrics::controllers; -use opentelemetry::sdk::metrics::controllers::BasicController; -use opentelemetry::sdk::metrics::processors; -use opentelemetry::sdk::metrics::selectors; +use opentelemetry::sdk::metrics::MeterProvider; +use opentelemetry::sdk::metrics::MeterProviderBuilder; use opentelemetry::sdk::Resource; -use opentelemetry::KeyValue; +use opentelemetry_api::KeyValue; use prometheus::Encoder; use prometheus::Registry; use prometheus::TextEncoder; @@ -22,7 +19,7 @@ use tower::ServiceExt; use tower_service::Service; use crate::plugins::telemetry::config::MetricsCommon; -use crate::plugins::telemetry::metrics::filter::FilterMeterProvider; +use crate::plugins::telemetry::metrics::CustomAggregationSelector; use crate::plugins::telemetry::metrics::MetricsBuilder; use crate::plugins::telemetry::metrics::MetricsConfigurator; use crate::router_factory::Endpoint; @@ -62,18 +59,26 @@ impl Default for Config { } // Prometheus metrics are special. We want them to persist between restarts if possible. -// This means reusing the existing controller if we can. -// These statics will keep track of new controllers for commit when the telemetry plugin is activated. -static CONTROLLER: Lazy>> = Lazy::new(Default::default); -static NEW_CONTROLLER: Lazy>> = Lazy::new(Default::default); - -pub(crate) fn commit_new_controller() { - if let Some(controller) = NEW_CONTROLLER.lock().expect("lock poisoned").take() { - tracing::debug!("committing prometheus controller"); - CONTROLLER +// This means reusing the existing registry and meter provider if we can. +// These statics will keep track of new registry for commit when the telemetry plugin is activated. +static EXISTING_PROMETHEUS: Lazy>> = + Lazy::new(Default::default); +static NEW_PROMETHEUS: Lazy>> = + Lazy::new(Default::default); + +#[derive(PartialEq, Clone)] +struct PrometheusConfig { + resource: Resource, + buckets: Vec, +} + +pub(crate) fn commit_prometheus() { + if let Some(prometheus) = NEW_PROMETHEUS.lock().expect("lock poisoned").take() { + tracing::debug!("committing prometheus registry"); + EXISTING_PROMETHEUS .lock() .expect("lock poisoned") - .replace(controller); + .replace(prometheus); } } @@ -83,56 +88,86 @@ impl MetricsConfigurator for Config { mut builder: MetricsBuilder, metrics_config: &MetricsCommon, ) -> Result { + // Prometheus metrics are special, they must persist between reloads. This means that we only want to create something new if the resources have changed. + // The prometheus exporter, and the associated registry are linked, so replacing one means replacing the other. + + let resource = Resource::new( + metrics_config + .resources + .clone() + .into_iter() + .map(|(k, v)| KeyValue::new(k, v)), + ); + + let prometheus_config = PrometheusConfig { + resource: resource.clone(), + buckets: metrics_config.buckets.clone(), + }; + if self.enabled { - let mut controller = controllers::basic(processors::factory( - selectors::simple::histogram(metrics_config.buckets.clone()), - aggregation::stateless_temporality_selector(), - )) - .with_resource(Resource::new( - metrics_config - .resources - .clone() - .into_iter() - .map(|(k, v)| KeyValue::new(k, v)), - )) - .build(); - - // Check the last controller to see if the resources are the same, if they are we can use it as is. + // Check the last registry to see if the resources are the same, if they are we can use it as is. // Otherwise go with the new controller and store it so that it can be committed during telemetry activation. - if let Some(last_controller) = CONTROLLER.lock().expect("lock poisoned").clone() { - if controller.resource() == last_controller.resource() { - tracing::debug!("prometheus controller can be reused"); - controller = last_controller + if let Some((last_config, last_registry)) = + EXISTING_PROMETHEUS.lock().expect("lock poisoned").clone() + { + if prometheus_config == last_config { + tracing::debug!("prometheus registry can be reused"); + builder.custom_endpoints.insert( + self.listen.clone(), + Endpoint::from_router_service( + self.path.clone(), + PrometheusService { + registry: last_registry.clone(), + } + .boxed(), + ), + ); + return Ok(builder); } else { - tracing::debug!("prometheus controller cannot be reused"); + tracing::debug!("prometheus registry cannot be reused"); } } - NEW_CONTROLLER - .lock() - .expect("lock poisoned") - .replace(controller.clone()); - let exporter = opentelemetry_prometheus::exporter(controller).try_init()?; - - builder = builder.with_custom_endpoint( + let registry = prometheus::Registry::new(); + + let exporter = opentelemetry_prometheus::exporter() + .with_aggregation_selector( + CustomAggregationSelector::builder() + .boundaries(metrics_config.buckets.clone()) + .record_min_max(true) + .build(), + ) + .with_registry(registry.clone()) + .build()?; + + let meter_provider = MeterProvider::builder() + .with_reader(exporter) + .with_resource(resource) + .build(); + builder.custom_endpoints.insert( self.listen.clone(), Endpoint::from_router_service( self.path.clone(), PrometheusService { - registry: exporter.registry().clone(), + registry: registry.clone(), } .boxed(), ), ); - builder = builder.with_meter_provider(FilterMeterProvider::public_metrics( - exporter.meter_provider()?, - )); - builder = builder.with_exporter(exporter); + builder.prometheus_meter_provider = Some(meter_provider.clone()); + + NEW_PROMETHEUS + .lock() + .expect("lock poisoned") + .replace((prometheus_config, registry)); + tracing::info!( "Prometheus endpoint exposed at {}{}", self.listen, self.path ); + } else { + builder.prometheus_meter_provider = Some(MeterProviderBuilder::default().build()); } Ok(builder) } diff --git a/apollo-router/src/plugins/telemetry/mod.rs b/apollo-router/src/plugins/telemetry/mod.rs index 1c792e3159..221293f216 100644 --- a/apollo-router/src/plugins/telemetry/mod.rs +++ b/apollo-router/src/plugins/telemetry/mod.rs @@ -28,16 +28,15 @@ use opentelemetry::propagation::text_map_propagator::FieldIter; use opentelemetry::propagation::Extractor; use opentelemetry::propagation::Injector; use opentelemetry::propagation::TextMapPropagator; -use opentelemetry::sdk::metrics::controllers::BasicController; use opentelemetry::sdk::propagation::TextMapCompositePropagator; use opentelemetry::sdk::trace::Builder; +use opentelemetry::sdk::Resource; use opentelemetry::trace::SpanContext; use opentelemetry::trace::SpanId; use opentelemetry::trace::TraceContextExt; use opentelemetry::trace::TraceFlags; use opentelemetry::trace::TraceState; use opentelemetry::trace::TracerProvider; -use opentelemetry::Context as OtelContext; use opentelemetry::KeyValue; use parking_lot::Mutex; use rand::Rng; @@ -68,11 +67,9 @@ use self::metrics::apollo::studio::SingleTypeStat; use self::metrics::AttributesForwardConf; use self::metrics::MetricsAttributesConf; use self::reload::reload_fmt; -use self::reload::reload_metrics; use self::reload::LayeredTracer; use self::reload::NullFieldFormatter; use self::reload::SamplingFilter; -use self::reload::OPENTELEMETRY_TRACER_HANDLE; use self::tracing::apollo_telemetry::APOLLO_PRIVATE_DURATION_NS; use super::traffic_shaping::cache::hash_request; use super::traffic_shaping::cache::hash_vary_headers; @@ -80,6 +77,9 @@ use super::traffic_shaping::cache::REPRESENTATIONS; use crate::axum_factory::utils::REQUEST_SPAN_NAME; use crate::context::OPERATION_NAME; use crate::layers::ServiceBuilderExt; +use crate::metrics::aggregation::MeterProviderType; +use crate::metrics::filter::FilterMeterProvider; +use crate::metrics::meter_provider; use crate::plugin::Plugin; use crate::plugin::PluginInit; use crate::plugins::telemetry::apollo::ForwardHeaders; @@ -92,17 +92,16 @@ use crate::plugins::telemetry::config::Trace; use crate::plugins::telemetry::config::Tracing; use crate::plugins::telemetry::formatters::filter_metric_events; use crate::plugins::telemetry::formatters::FilteringFormatter; -use crate::plugins::telemetry::metrics::aggregation::AggregateMeterProvider; use crate::plugins::telemetry::metrics::apollo::studio::SingleContextualizedStats; use crate::plugins::telemetry::metrics::apollo::studio::SinglePathErrorStats; use crate::plugins::telemetry::metrics::apollo::studio::SingleQueryLatencyStats; use crate::plugins::telemetry::metrics::apollo::studio::SingleStats; use crate::plugins::telemetry::metrics::apollo::studio::SingleStatsReport; -use crate::plugins::telemetry::metrics::layer::MetricsLayer; -use crate::plugins::telemetry::metrics::BasicMetrics; +use crate::plugins::telemetry::metrics::prometheus::commit_prometheus; use crate::plugins::telemetry::metrics::MetricsBuilder; use crate::plugins::telemetry::metrics::MetricsConfigurator; -use crate::plugins::telemetry::metrics::MetricsExporterHandle; +use crate::plugins::telemetry::reload::metrics_layer; +use crate::plugins::telemetry::reload::OPENTELEMETRY_TRACER_HANDLE; use crate::plugins::telemetry::tracing::apollo_telemetry::decode_ftv1_trace; use crate::plugins::telemetry::tracing::apollo_telemetry::APOLLO_PRIVATE_OPERATION_SIGNATURE; use crate::plugins::telemetry::tracing::TracingConfigurator; @@ -156,18 +155,16 @@ const DEFAULT_EXPOSE_TRACE_ID_HEADER: &str = "apollo-trace-id"; #[doc(hidden)] // Only public for integration tests pub(crate) struct Telemetry { config: Arc, - metrics: BasicMetrics, - // Do not remove metrics_exporters. Metrics will not be exported if it is removed. - // Typically the handles are a PushController but may be something else. Dropping the handle will - // shutdown exporter. - metrics_exporters: Vec, custom_endpoints: MultiMap, apollo_metrics_sender: apollo_exporter::Sender, field_level_instrumentation_ratio: f64, sampling_filter_ratio: SamplerOption, tracer_provider: Option, - meter_provider: AggregateMeterProvider, + // We have to have separate meter providers for prometheus metrics so that they don't get zapped on router reload. + public_meter_provider: Option, + public_prometheus_meter_provider: Option, + private_meter_provider: Option, counter: Option>>, } @@ -206,36 +203,10 @@ fn setup_metrics_exporter( impl Drop for Telemetry { fn drop(&mut self) { - // If we can downcast the metrics exporter to be a `BasicController`, then we - // should stop it to ensure metrics are transmitted before the exporter is dropped. - for exporter in self.metrics_exporters.drain(..) { - if let Ok(controller) = MetricsExporterHandle::downcast::(exporter) { - ::tracing::debug!("stopping basic controller: {controller:?}"); - let cx = OtelContext::current(); - - thread::spawn(move || { - if let Err(e) = controller.stop(&cx) { - ::tracing::error!("error during basic controller stop: {e}"); - } - ::tracing::debug!("stopped basic controller: {controller:?}"); - }); - } - } - // If for some reason we didn't use the trace provider then safely discard it e.g. some other plugin failed `new` - // To ensure we don't hang tracing providers are dropped in a blocking task. - // https://github.com/open-telemetry/opentelemetry-rust/issues/868#issuecomment-1250387989 - // We don't have to worry about timeouts as every exporter is batched, which has a timeout on it already. - if let Some(tracer_provider) = self.tracer_provider.take() { - // If we have no runtime then we don't need to spawn a task as we are already in a blocking context. - if Handle::try_current().is_ok() { - // This is a thread for a reason! - // Tokio doesn't finish executing tasks before termination https://github.com/tokio-rs/tokio/issues/1156. - // This means that if the runtime is shutdown there is potentially a race where the provider may not be flushed. - // By using a thread it doesn't matter if the tokio runtime is shut down. - // This is likely to happen in tests due to the tokio runtime being destroyed when the test method exits. - thread::spawn(move || drop(tracer_provider)); - } - } + Self::safe_shutdown_meter_provider(&mut self.private_meter_provider); + Self::safe_shutdown_meter_provider(&mut self.public_meter_provider); + Self::safe_shutdown_meter_provider(&mut self.public_prometheus_meter_provider); + self.safe_shutown_tracer(); } } @@ -244,13 +215,16 @@ impl Plugin for Telemetry { type Config = config::Conf; async fn new(init: PluginInit) -> Result { + opentelemetry::global::set_error_handler(handle_error) + .expect("otel error handler lock poisoned, fatal"); + let config = init.config; config.logging.validate()?; let field_level_instrumentation_ratio = config.calculate_field_level_instrumentation_ratio()?; - let mut metrics_builder = Self::create_metrics_builder(&config)?; - let meter_provider = metrics_builder.meter_provider(); + let metrics_builder = Self::create_metrics_builder(&config)?; + let counter = config .metrics .as_ref() @@ -267,13 +241,17 @@ impl Plugin for Telemetry { let (sampling_filter_ratio, tracer_provider) = Self::create_tracer_provider(&config)?; Ok(Telemetry { - custom_endpoints: metrics_builder.custom_endpoints(), - metrics_exporters: metrics_builder.exporters(), - metrics: BasicMetrics::new(&meter_provider), - apollo_metrics_sender: metrics_builder.apollo_metrics_provider(), + custom_endpoints: metrics_builder.custom_endpoints, + apollo_metrics_sender: metrics_builder.apollo_metrics_sender, field_level_instrumentation_ratio, tracer_provider: Some(tracer_provider), - meter_provider, + public_meter_provider: Some(metrics_builder.public_meter_provider_builder.build()) + .map(FilterMeterProvider::public_metrics), + private_meter_provider: Some(metrics_builder.apollo_meter_provider_builder.build()) + .map(FilterMeterProvider::private_metrics), + public_prometheus_meter_provider: metrics_builder + .prometheus_meter_provider + .map(FilterMeterProvider::public_metrics), sampling_filter_ratio, config: Arc::new(config), counter, @@ -385,7 +363,6 @@ impl Plugin for Telemetry { fn supergraph_service(&self, service: supergraph::BoxService) -> supergraph::BoxService { let metrics_sender = self.apollo_metrics_sender.clone(); - let metrics = self.metrics.clone(); let config = self.config.clone(); let config_map_res_first = config.clone(); let config_map_res = config.clone(); @@ -440,7 +417,6 @@ impl Plugin for Telemetry { }, move |ctx: Context, fut| { let config = config_map_res.clone(); - let metrics = metrics.clone(); let sender = metrics_sender.clone(); let start = Instant::now(); @@ -449,13 +425,12 @@ impl Plugin for Telemetry { result = Self::update_otel_metrics( config.clone(), ctx.clone(), - metrics.clone(), result, start.elapsed(), ) .await; Self::update_metrics_on_response_events( - &ctx, config, field_level_instrumentation_ratio, metrics, sender, start, result, + &ctx, config, field_level_instrumentation_ratio, sender, start, result, ) } }, @@ -494,7 +469,6 @@ impl Plugin for Telemetry { } fn subgraph_service(&self, name: &str, service: subgraph::BoxService) -> subgraph::BoxService { - let metrics = self.metrics.clone(); let subgraph_attribute = KeyValue::new("subgraph", name.to_string()); let subgraph_metrics_conf_req = self.create_subgraph_metrics_conf(name); let subgraph_metrics_conf_resp = subgraph_metrics_conf_req.clone(); @@ -550,7 +524,6 @@ impl Plugin for Telemetry { }, move |(context, cache_attributes): (Context, Option), f: BoxFuture<'static, Result>| { - let metrics = metrics.clone(); let subgraph_attribute = subgraph_attribute.clone(); let subgraph_metrics_conf = subgraph_metrics_conf_resp.clone(); let counter = counter.clone(); @@ -559,7 +532,6 @@ impl Plugin for Telemetry { f.map(move |result: Result| { Self::store_subgraph_response_attributes( &context, - metrics, subgraph_attribute, subgraph_metrics_conf, now, @@ -599,6 +571,7 @@ impl Telemetry { let tracer = tracer_provider.versioned_tracer( GLOBAL_TRACER_NAME, Some(env!("CARGO_PKG_VERSION")), + None::, None, ); hot_tracer.reload(tracer); @@ -608,13 +581,12 @@ impl Telemetry { // https://github.com/open-telemetry/opentelemetry-rust/issues/868#issuecomment-1250387989 // We don't have to worry about timeouts as every exporter is batched, which has a timeout on it already. tokio::task::spawn_blocking(move || drop(last_provider)); - opentelemetry::global::set_error_handler(handle_error) - .expect("otel error handler lock poisoned, fatal"); opentelemetry::global::set_text_map_propagator(Self::create_propagator(&self.config)); } - reload_metrics(MetricsLayer::new(&self.meter_provider)); + self.reload_metrics(); + reload_fmt(Self::create_fmt_layer(&self.config)); } @@ -727,6 +699,18 @@ impl Telemetry { } let mut builder = MetricsBuilder::default(); + builder.public_meter_provider_builder = builder + .public_meter_provider_builder + .with_resource(Resource::new( + config + .metrics + .as_ref() + .and_then(|m| m.common.as_ref()) + .map(|c| c.resources.clone()) + .unwrap_or_default() + .into_iter() + .map(|(k, v)| KeyValue::new(k, v)), + )); builder = setup_metrics_exporter(builder, &config.apollo, metrics_common_config)?; builder = setup_metrics_exporter(builder, &metrics_config.prometheus, metrics_common_config)?; @@ -842,7 +826,6 @@ impl Telemetry { async fn update_otel_metrics( config: Arc, context: Context, - metrics: BasicMetrics, result: Result, request_duration: Duration, ) -> Result { @@ -893,10 +876,7 @@ impl Telemetry { if !parts.status.is_success() { metric_attrs.push(KeyValue::new("error", parts.status.to_string())); } - ::tracing::info!( - monotonic_counter.apollo.router.operations = 1u64, - http.response.status_code = parts.status.as_u16() as i64, - ); + u64_counter!("apollo.router.operations", "The number of graphql operations performed by the Router", 1, "http.response.status_code" => parts.status.as_u16() as i64); let response = http::Response::from_parts( parts, once(ready(first_response.unwrap_or_default())) @@ -908,26 +888,25 @@ impl Telemetry { } Err(err) => { metric_attrs.push(KeyValue::new("status", "500")); - - ::tracing::info!( - monotonic_counter.apollo.router.operations = 1u64, - http.response.status_code = 500i64, - ); + u64_counter!("apollo.router.operations", "The number of graphql operations performed by the Router", 1, "http.response.status_code" => 500); Err(err) } }; // http_requests_total - the total number of HTTP requests received - metrics - .http_requests_total - .add(&opentelemetry::Context::current(), 1, &metric_attrs); + u64_counter!( + "apollo_router_http_requests_total", + "Total number of HTTP requests made.", + 1, + metric_attrs + ); - metrics.http_requests_duration.record( - &opentelemetry::Context::current(), + f64_histogram!( + "apollo_router_http_requests_duration", + "Duration of HTTP requests.", request_duration.as_secs_f64(), - &metric_attrs, + metric_attrs ); - res } @@ -1154,7 +1133,6 @@ impl Telemetry { #[allow(clippy::too_many_arguments)] fn store_subgraph_response_attributes( context: &Context, - metrics: BasicMetrics, subgraph_attribute: KeyValue, attribute_forward_config: Arc>, now: Instant, @@ -1223,10 +1201,11 @@ impl Telemetry { ); } - metrics.http_requests_total.add( - &opentelemetry::Context::current(), + u64_counter!( + "apollo_router_http_requests_total", + "Total number of HTTP requests made.", 1, - &metric_attrs, + metric_attrs ); } Err(err) => { @@ -1241,17 +1220,19 @@ impl Telemetry { ); } - metrics.http_requests_total.add( - &opentelemetry::Context::current(), + u64_counter!( + "apollo_router_http_requests_total", + "Total number of HTTP requests made.", 1, - &metric_attrs, + metric_attrs ); } } - metrics.http_requests_duration.record( - &opentelemetry::Context::current(), + f64_histogram!( + "http_requests_duration", + "Duration of HTTP requests.", now.elapsed().as_secs_f64(), - &metric_attrs, + metric_attrs ); } @@ -1260,7 +1241,6 @@ impl Telemetry { ctx: &Context, config: Arc, field_level_instrumentation_ratio: f64, - metrics: BasicMetrics, sender: Sender, start: Instant, result: Result, @@ -1300,10 +1280,11 @@ impl Telemetry { ); } - metrics.http_requests_total.add( - &opentelemetry::Context::current(), + u64_counter!( + "apollo_router_http_requests_total", + "Total number of HTTP requests made.", 1, - &metric_attrs, + metric_attrs ); Err(e) @@ -1669,6 +1650,72 @@ impl Telemetry { ); } } + fn reload_metrics(&mut self) { + let meter_provider = meter_provider(); + if self.public_prometheus_meter_provider.is_some() { + commit_prometheus(); + } + let mut old_meter_providers = Vec::new(); + if let Some(old_provider) = meter_provider.set( + MeterProviderType::PublicPrometheus, + self.public_prometheus_meter_provider.take(), + ) { + old_meter_providers.push((MeterProviderType::PublicPrometheus, old_provider)); + } + + if let Some(old_provider) = meter_provider.set( + MeterProviderType::Apollo, + self.private_meter_provider.take(), + ) { + old_meter_providers.push((MeterProviderType::Apollo, old_provider)); + } + if let Some(old_provider) = + meter_provider.set(MeterProviderType::Public, self.public_meter_provider.take()) + { + old_meter_providers.push((MeterProviderType::Public, old_provider)); + } + + metrics_layer().clear(); + + // Old providers MUST be shut down in a blocking thread. + tokio::task::spawn_blocking(move || { + for (meter_provider_type, meter_provider) in old_meter_providers { + if let Err(e) = meter_provider.shutdown() { + ::tracing::error!(error = %e, meter_provider_type = ?meter_provider_type, "failed to shutdown meter provider") + } + } + }); + } + + fn safe_shutdown_meter_provider(meter_provider: &mut Option) { + if Handle::try_current().is_ok() { + if let Some(meter_provider) = meter_provider.take() { + thread::spawn(move || { + if let Err(e) = meter_provider.shutdown() { + ::tracing::error!(error = %e, "failed to shutdown meter provider") + } + }); + } + } + } + + fn safe_shutown_tracer(&mut self) { + // If for some reason we didn't use the trace provider then safely discard it e.g. some other plugin failed `new` + // To ensure we don't hang tracing providers are dropped in a blocking task. + // https://github.com/open-telemetry/opentelemetry-rust/issues/868#issuecomment-1250387989 + // We don't have to worry about timeouts as every exporter is batched, which has a timeout on it already. + if let Some(tracer_provider) = self.tracer_provider.take() { + // If we have no runtime then we don't need to spawn a task as we are already in a blocking context. + if Handle::try_current().is_ok() { + // This is a thread for a reason! + // Tokio doesn't finish executing tasks before termination https://github.com/tokio-rs/tokio/issues/1156. + // This means that if the runtime is shutdown there is potentially a race where the provider may not be flushed. + // By using a thread it doesn't matter if the tokio runtime is shut down. + // This is likely to happen in tests due to the tokio runtime being destroyed when the test method exits. + thread::spawn(move || drop(tracer_provider)); + } + } + } } #[derive(Debug, Clone)] @@ -1844,6 +1891,14 @@ fn handle_error>(err: T) { // We have to rate limit these errors because when they happen they are very frequent. // Use a dashmap to store the message type with the last time it was logged. let last_logged_map = OTEL_ERROR_LAST_LOGGED.get_or_init(DashMap::new); + + handle_error_internal(err, last_logged_map); +} + +fn handle_error_internal>( + err: T, + last_logged_map: &DashMap, +) { let err = err.into(); // We don't want the dashmap to get big, so we key the error messages by type. @@ -1874,7 +1929,9 @@ fn handle_error>(err: T) { ::tracing::error!("OpenTelemetry trace error occurred: {}", err) } opentelemetry::global::Error::Metric(err) => { - ::tracing::error!("OpenTelemetry metric error occurred: {}", err) + if err.to_string() != "Metrics error: reader is shut down or not registered" { + ::tracing::error!("OpenTelemetry metric error occurred: {}", err) + } } opentelemetry::global::Error::Other(err) => { ::tracing::error!("OpenTelemetry error occurred: {}", err) @@ -2012,12 +2069,12 @@ struct EnableSubgraphFtv1; mod tests { use std::fmt::Debug; use std::ops::DerefMut; - use std::str::FromStr; use std::sync::Arc; use std::sync::Mutex; use std::time::Duration; use axum::headers::HeaderName; + use dashmap::DashMap; use http::HeaderMap; use http::HeaderValue; use http::StatusCode; @@ -2039,431 +2096,49 @@ mod tests { use tracing_subscriber::Layer; use super::apollo::ForwardHeaders; + use super::Telemetry; use crate::error::FetchError; use crate::graphql::Error; use crate::graphql::Request; use crate::http_ext; use crate::json_ext::Object; + use crate::metrics::FutureMetricsExt; use crate::plugin::test::MockSubgraphService; use crate::plugin::test::MockSupergraphService; use crate::plugin::DynPlugin; - use crate::plugins::telemetry::handle_error; + use crate::plugins::telemetry::handle_error_internal; use crate::services::SubgraphRequest; use crate::services::SubgraphResponse; use crate::services::SupergraphRequest; use crate::services::SupergraphResponse; - #[tokio::test(flavor = "multi_thread")] - async fn plugin_registered() { - crate::plugin::plugins() + async fn create_plugin_with_config(config: &str) -> Box { + let prometheus_support = config.contains("prometheus"); + let config: Value = serde_yaml::from_str(config).expect("yaml must be valid"); + let telemetry_config = config + .as_object() + .expect("must be an object") + .get("telemetry") + .expect("root key must be telemetry"); + let mut plugin = crate::plugin::plugins() .find(|factory| factory.name == "apollo.telemetry") .expect("Plugin not found") - .create_instance( - &serde_json::json!({"apollo": {"schema_id":"abc"}, "tracing": {}}), - Default::default(), - Default::default(), - ) + .create_instance(telemetry_config, Default::default(), Default::default()) .await .unwrap(); - } - #[tokio::test(flavor = "multi_thread")] - async fn attribute_serialization() { - crate::plugin::plugins() - .find(|factory| factory.name == "apollo.telemetry") - .expect("Plugin not found") - .create_instance( - &serde_json::json!({ - "apollo": {"schema_id":"abc"}, - "tracing": { - "trace_config": { - "service_name": "router", - "attributes": { - "str": "a", - "int": 1, - "float": 1.0, - "bool": true, - "str_arr": ["a", "b"], - "int_arr": [1, 2], - "float_arr": [1.0, 2.0], - "bool_arr": [true, false] - } - } - }, - "metrics": { - "common": { - "attributes": { - "supergraph": { - "static": [ - { - "name": "myname", - "value": "label_value" - } - ], - "request": { - "header": [{ - "named": "test", - "default": "default_value", - "rename": "renamed_value" - }], - "body": [{ - "path": ".data.test", - "name": "my_new_name", - "default": "default_value" - }] - }, - "response": { - "header": [{ - "named": "test", - "default": "default_value", - "rename": "renamed_value", - }, { - "named": "test", - "default": "default_value", - "rename": "renamed_value", - }], - "body": [{ - "path": ".data.test", - "name": "my_new_name", - "default": "default_value" - }] - } - }, - "subgraph": { - "all": { - "static": [ - { - "name": "myname", - "value": "label_value" - } - ], - "request": { - "header": [{ - "named": "test", - "default": "default_value", - "rename": "renamed_value", - }], - "body": [{ - "path": ".data.test", - "name": "my_new_name", - "default": "default_value" - }] - }, - "response": { - "header": [{ - "named": "test", - "default": "default_value", - "rename": "renamed_value", - }, { - "named": "test", - "default": "default_value", - "rename": "renamed_value", - }], - "body": [{ - "path": ".data.test", - "name": "my_new_name", - "default": "default_value" - }] - } - }, - "subgraphs": { - "subgraph_name_test": { - "static": [ - { - "name": "myname", - "value": "label_value" - } - ], - "request": { - "header": [{ - "named": "test", - "default": "default_value", - "rename": "renamed_value", - }], - "body": [{ - "path": ".data.test", - "name": "my_new_name", - "default": "default_value" - }] - }, - "response": { - "header": [{ - "named": "test", - "default": "default_value", - "rename": "renamed_value", - }, { - "named": "test", - "default": "default_value", - "rename": "renamed_value", - }], - "body": [{ - "path": ".data.test", - "name": "my_new_name", - "default": "default_value" - }] - } - } - } - } - } - } - } - }), - Default::default(), - Default::default(), - ) - .await - .unwrap(); + if prometheus_support { + plugin + .as_any_mut() + .downcast_mut::() + .unwrap() + .reload_metrics(); + } + plugin } - #[tokio::test(flavor = "multi_thread")] - async fn it_test_prometheus_metrics() { - let mut mock_service = MockSupergraphService::new(); - mock_service - .expect_call() - .times(1) - .returning(move |req: SupergraphRequest| { - Ok(SupergraphResponse::fake_builder() - .context(req.context) - .header("x-custom", "coming_from_header") - .data(json!({"data": {"my_value": 2usize}})) - .build() - .unwrap()) - }); - - let mut mock_bad_request_service = MockSupergraphService::new(); - mock_bad_request_service - .expect_call() - .times(1) - .returning(move |req: SupergraphRequest| { - Ok(SupergraphResponse::fake_builder() - .context(req.context) - .status_code(StatusCode::BAD_REQUEST) - .data(json!({"errors": [{"message": "nope"}]})) - .build() - .unwrap()) - }); - - let mut mock_subgraph_service = MockSubgraphService::new(); - mock_subgraph_service - .expect_call() - .times(1) - .returning(move |req: SubgraphRequest| { - let mut extension = Object::new(); - extension.insert( - serde_json_bytes::ByteString::from("status"), - serde_json_bytes::Value::String(ByteString::from("INTERNAL_SERVER_ERROR")), - ); - let _ = req - .context - .insert("my_key", "my_custom_attribute_from_context".to_string()) - .unwrap(); - Ok(SubgraphResponse::fake_builder() - .context(req.context) - .error( - Error::builder() - .message(String::from("an error occured")) - .extensions(extension) - .extension_code("FETCH_ERROR") - .build(), - ) - .build()) - }); - - let mut mock_subgraph_service_in_error = MockSubgraphService::new(); - mock_subgraph_service_in_error - .expect_call() - .times(1) - .returning(move |_req: SubgraphRequest| { - Err(Box::new(FetchError::SubrequestHttpError { - status_code: None, - service: String::from("my_subgraph_name_error"), - reason: String::from("cannot contact the subgraph"), - })) - }); - - let dyn_plugin: Box = crate::plugin::plugins() - .find(|factory| factory.name == "apollo.telemetry") - .expect("Plugin not found") - .create_instance( - &Value::from_str( - r#"{ - "apollo": { - "client_name_header": "name_header", - "client_version_header": "version_header", - "schema_id": "schema_sha" - }, - "metrics": { - "common": { - "service_name": "apollo-router", - "attributes": { - "supergraph": { - "static": [ - { - "name": "myname", - "value": "label_value" - } - ], - "request": { - "header": [ - { - "named": "test", - "default": "default_value", - "rename": "renamed_value" - }, - { - "named": "another_test", - "default": "my_default_value" - } - ] - }, - "response": { - "header": [{ - "named": "x-custom" - }], - "body": [{ - "path": ".data.data.my_value", - "name": "my_value" - }] - } - }, - "subgraph": { - "all": { - "errors": { - "include_messages": true, - "extensions": [{ - "name": "subgraph_error_extended_code", - "path": ".code" - }, { - "name": "message", - "path": ".reason" - }] - } - }, - "subgraphs": { - "my_subgraph_name": { - "request": { - "body": [{ - "path": ".query", - "name": "query_from_request" - }, { - "path": ".data", - "name": "unknown_data", - "default": "default_value" - }, { - "path": ".data2", - "name": "unknown_data_bis" - }] - }, - "response": { - "body": [{ - "path": ".errors[0].extensions.status", - "name": "error" - }] - }, - "context": [ - { - "named": "my_key" - } - ] - } - } - } - } - }, - "prometheus": { - "enabled": true - } - } - }"#, - ) - .unwrap(), - Default::default(), - Default::default(), - ) - .await - .unwrap(); - let mut supergraph_service = dyn_plugin.supergraph_service(BoxService::new(mock_service)); - let router_req = SupergraphRequest::fake_builder().header("test", "my_value_set"); - - let _router_response = supergraph_service - .ready() - .await - .unwrap() - .call(router_req.build().unwrap()) - .await - .unwrap() - .next_response() - .await - .unwrap(); - - let mut bad_request_supergraph_service = - dyn_plugin.supergraph_service(BoxService::new(mock_bad_request_service)); - let router_req = SupergraphRequest::fake_builder().header("test", "my_value_set"); - - let _router_response = bad_request_supergraph_service - .ready() - .await - .unwrap() - .call(router_req.build().unwrap()) - .await - .unwrap() - .next_response() - .await - .unwrap(); - - let mut subgraph_service = - dyn_plugin.subgraph_service("my_subgraph_name", BoxService::new(mock_subgraph_service)); - let subgraph_req = SubgraphRequest::fake_builder() - .subgraph_request( - http_ext::Request::fake_builder() - .header("test", "my_value_set") - .body( - Request::fake_builder() - .query(String::from("query { test }")) - .build(), - ) - .build() - .unwrap(), - ) - .build(); - let _subgraph_response = subgraph_service - .ready() - .await - .unwrap() - .call(subgraph_req) - .await - .unwrap(); - // Another subgraph - let mut subgraph_service = dyn_plugin.subgraph_service( - "my_subgraph_name_error", - BoxService::new(mock_subgraph_service_in_error), - ); - let subgraph_req = SubgraphRequest::fake_builder() - .subgraph_request( - http_ext::Request::fake_builder() - .header("test", "my_value_set") - .body( - Request::fake_builder() - .query(String::from("query { test }")) - .build(), - ) - .build() - .unwrap(), - ) - .build(); - let _subgraph_response = subgraph_service - .ready() - .await - .unwrap() - .call(subgraph_req) - .await - .expect_err("Must be in error"); - - let http_req_prom = http::Request::get("http://localhost:9090/WRONG/URL/metrics") - .body(Default::default()) - .unwrap(); - let mut web_endpoint = dyn_plugin + async fn get_prometheus_metrics(plugin: &dyn DynPlugin) -> String { + let web_endpoint = plugin .web_endpoints() .into_iter() .next() @@ -2473,14 +2148,6 @@ mod tests { .next() .unwrap() .into_router(); - let resp = web_endpoint - .ready() - .await - .unwrap() - .call(http_req_prom) - .await - .unwrap(); - assert_eq!(resp.status(), StatusCode::NOT_FOUND); let http_req_prom = http::Request::get("http://localhost:9090/metrics") .body(Default::default()) @@ -2488,17 +2155,15 @@ mod tests { let mut resp = web_endpoint.oneshot(http_req_prom).await.unwrap(); assert_eq!(resp.status(), StatusCode::OK); let body = hyper::body::to_bytes(resp.body_mut()).await.unwrap(); - let prom_metrics = String::from_utf8_lossy(&body) + String::from_utf8_lossy(&body) .to_string() .split('\n') - .filter(|l| l.contains("_count") && !l.contains("apollo_router_span_count")) + .filter(|l| l.contains("bucket") && !l.contains("apollo_router_span_count")) .sorted() - .join("\n"); - assert_snapshot!(prom_metrics); + .join("\n") } - #[tokio::test(flavor = "multi_thread")] - async fn it_test_prometheus_metrics_custom_buckets() { + async fn make_supergraph_request(plugin: &dyn DynPlugin) { let mut mock_service = MockSupergraphService::new(); mock_service .expect_call() @@ -2512,162 +2177,8 @@ mod tests { .unwrap()) }); - let mut mock_bad_request_service = MockSupergraphService::new(); - mock_bad_request_service - .expect_call() - .times(1) - .returning(move |req: SupergraphRequest| { - Ok(SupergraphResponse::fake_builder() - .context(req.context) - .status_code(StatusCode::BAD_REQUEST) - .data(json!({"errors": [{"message": "nope"}]})) - .build() - .unwrap()) - }); - - let mut mock_subgraph_service = MockSubgraphService::new(); - mock_subgraph_service - .expect_call() - .times(1) - .returning(move |req: SubgraphRequest| { - let mut extension = Object::new(); - extension.insert( - serde_json_bytes::ByteString::from("status"), - serde_json_bytes::Value::String(ByteString::from("INTERNAL_SERVER_ERROR")), - ); - let _ = req - .context - .insert("my_key", "my_custom_attribute_from_context".to_string()) - .unwrap(); - Ok(SubgraphResponse::fake_builder() - .context(req.context) - .error( - Error::builder() - .message(String::from("an error occured")) - .extensions(extension) - .extension_code("FETCH_ERROR") - .build(), - ) - .build()) - }); - - let mut mock_subgraph_service_in_error = MockSubgraphService::new(); - mock_subgraph_service_in_error - .expect_call() - .times(1) - .returning(move |_req: SubgraphRequest| { - Err(Box::new(FetchError::SubrequestHttpError { - status_code: None, - service: String::from("my_subgraph_name_error"), - reason: String::from("cannot contact the subgraph"), - })) - }); - - let dyn_plugin: Box = crate::plugin::plugins() - .find(|factory| factory.name == "apollo.telemetry") - .expect("Plugin not found") - .create_instance( - &Value::from_str( - r#"{ - "apollo": { - "client_name_header": "name_header", - "client_version_header": "version_header", - "schema_id": "schema_sha" - }, - "metrics": { - "common": { - "service_name": "apollo-router", - "buckets": [5.0, 10.0, 20.0], - "attributes": { - "supergraph": { - "static": [ - { - "name": "myname", - "value": "label_value" - } - ], - "request": { - "header": [ - { - "named": "test", - "default": "default_value", - "rename": "renamed_value" - }, - { - "named": "another_test", - "default": "my_default_value" - } - ] - }, - "response": { - "header": [{ - "named": "x-custom" - }], - "body": [{ - "path": ".data.data.my_value", - "name": "my_value" - }] - } - }, - "subgraph": { - "all": { - "errors": { - "include_messages": true, - "extensions": [{ - "name": "subgraph_error_extended_code", - "path": ".code" - }, { - "name": "message", - "path": ".reason" - }] - } - }, - "subgraphs": { - "my_subgraph_name": { - "request": { - "body": [{ - "path": ".query", - "name": "query_from_request" - }, { - "path": ".data", - "name": "unknown_data", - "default": "default_value" - }, { - "path": ".data2", - "name": "unknown_data_bis" - }] - }, - "response": { - "body": [{ - "path": ".errors[0].extensions.status", - "name": "error" - }] - }, - "context": [ - { - "named": "my_key" - } - ] - } - } - } - } - }, - "prometheus": { - "enabled": true - } - } - }"#, - ) - .unwrap(), - Default::default(), - Default::default(), - ) - .await - .unwrap(); - let mut supergraph_service = dyn_plugin.supergraph_service(BoxService::new(mock_service)); + let mut supergraph_service = plugin.supergraph_service(BoxService::new(mock_service)); let router_req = SupergraphRequest::fake_builder().header("test", "my_value_set"); - let _router_response = supergraph_service .ready() .await @@ -2678,105 +2189,296 @@ mod tests { .next_response() .await .unwrap(); + } - let mut bad_request_supergraph_service = - dyn_plugin.supergraph_service(BoxService::new(mock_bad_request_service)); - let router_req = SupergraphRequest::fake_builder().header("test", "my_value_set"); - - let _router_response = bad_request_supergraph_service - .ready() - .await - .unwrap() - .call(router_req.build().unwrap()) - .await - .unwrap() - .next_response() - .await - .unwrap(); - - let mut subgraph_service = - dyn_plugin.subgraph_service("my_subgraph_name", BoxService::new(mock_subgraph_service)); - let subgraph_req = SubgraphRequest::fake_builder() - .subgraph_request( - http_ext::Request::fake_builder() - .header("test", "my_value_set") - .body( - Request::fake_builder() - .query(String::from("query { test }")) - .build(), - ) - .build() - .unwrap(), + #[tokio::test(flavor = "multi_thread")] + async fn plugin_registered() { + crate::plugin::plugins() + .find(|factory| factory.name == "apollo.telemetry") + .expect("Plugin not found") + .create_instance( + &serde_json::json!({"apollo": {"schema_id":"abc"}, "tracing": {}}), + Default::default(), + Default::default(), ) - .build(); - let _subgraph_response = subgraph_service - .ready() - .await - .unwrap() - .call(subgraph_req) .await .unwrap(); - // Another subgraph - let mut subgraph_service = dyn_plugin.subgraph_service( - "my_subgraph_name_error", - BoxService::new(mock_subgraph_service_in_error), - ); - let subgraph_req = SubgraphRequest::fake_builder() - .subgraph_request( - http_ext::Request::fake_builder() - .header("test", "my_value_set") - .body( - Request::fake_builder() - .query(String::from("query { test }")) - .build(), - ) - .build() - .unwrap(), - ) - .build(); - let _subgraph_response = subgraph_service - .ready() - .await - .unwrap() - .call(subgraph_req) - .await - .expect_err("Must be in error"); + } - let http_req_prom = http::Request::get("http://localhost:9090/WRONG/URL/metrics") - .body(Default::default()) - .unwrap(); - let mut web_endpoint = dyn_plugin - .web_endpoints() - .into_iter() - .next() - .unwrap() - .1 - .into_iter() - .next() - .unwrap() - .into_router(); - let resp = web_endpoint - .ready() - .await - .unwrap() - .call(http_req_prom) - .await - .unwrap(); - assert_eq!(resp.status(), StatusCode::NOT_FOUND); + #[tokio::test] + async fn config_serialization() { + create_plugin_with_config(include_str!("testdata/config.router.yaml")).await; + } - let http_req_prom = http::Request::get("http://localhost:9090/metrics") - .body(Default::default()) - .unwrap(); - let mut resp = web_endpoint.oneshot(http_req_prom).await.unwrap(); - assert_eq!(resp.status(), StatusCode::OK); - let body = hyper::body::to_bytes(resp.body_mut()).await.unwrap(); - let prom_metrics = String::from_utf8_lossy(&body) - .to_string() - .split('\n') - .filter(|l| l.contains("bucket") && !l.contains("apollo_router_span_count")) - .sorted() - .join("\n"); - assert_snapshot!(prom_metrics); + #[tokio::test] + async fn test_supergraph_metrics_ok() { + async { + let plugin = + create_plugin_with_config(include_str!("testdata/custom_attributes.router.yaml")).await; + make_supergraph_request(plugin.as_ref()).await; + + assert_metric!("apollo_router_http_requests_total", 1, "another_test" => "my_default_value", "my_value" => 2, "myname" => "label_value", "renamed_value" => "my_value_set", "status" => "200", "x-custom" => "coming_from_header"); + }.with_metrics().await; + } + + #[tokio::test] + async fn test_supergraph_metrics_bad_request() { + async { + let plugin = + create_plugin_with_config(include_str!("testdata/custom_attributes.router.yaml")).await; + + let mut mock_bad_request_service = MockSupergraphService::new(); + mock_bad_request_service + .expect_call() + .times(1) + .returning(move |req: SupergraphRequest| { + Ok(SupergraphResponse::fake_builder() + .context(req.context) + .status_code(StatusCode::BAD_REQUEST) + .data(json!({"errors": [{"message": "nope"}]})) + .build() + .unwrap()) + }); + let mut bad_request_supergraph_service = + plugin.supergraph_service(BoxService::new(mock_bad_request_service)); + let router_req = SupergraphRequest::fake_builder().header("test", "my_value_set"); + let _router_response = bad_request_supergraph_service + .ready() + .await + .unwrap() + .call(router_req.build().unwrap()) + .await + .unwrap() + .next_response() + .await + .unwrap(); + + assert_metric!("apollo_router_http_requests_total", 1, "another_test" => "my_default_value", "error" => "400 Bad Request", "myname" => "label_value", "renamed_value" => "my_value_set", "status" => "400"); + }.with_metrics().await; + } + + #[tokio::test] + async fn test_subgraph_metrics_ok() { + async { + let plugin = + create_plugin_with_config(include_str!("testdata/custom_attributes.router.yaml")).await; + + let mut mock_subgraph_service = MockSubgraphService::new(); + mock_subgraph_service + .expect_call() + .times(1) + .returning(move |req: SubgraphRequest| { + let mut extension = Object::new(); + extension.insert( + serde_json_bytes::ByteString::from("status"), + serde_json_bytes::Value::String(ByteString::from( + "custom_error_for_propagation", + )), + ); + let _ = req + .context + .insert("my_key", "my_custom_attribute_from_context".to_string()) + .unwrap(); + Ok(SubgraphResponse::fake_builder() + .context(req.context) + .error( + Error::builder() + .message(String::from("an error occured")) + .extensions(extension) + .extension_code("FETCH_ERROR") + .build(), + ) + .build()) + }); + + let mut subgraph_service = + plugin.subgraph_service("my_subgraph_name", BoxService::new(mock_subgraph_service)); + let subgraph_req = SubgraphRequest::fake_builder() + .subgraph_request( + http_ext::Request::fake_builder() + .header("test", "my_value_set") + .body( + Request::fake_builder() + .query(String::from("query { test }")) + .build(), + ) + .build() + .unwrap(), + ) + .build(); + let _subgraph_response = subgraph_service + .ready() + .await + .unwrap() + .call(subgraph_req) + .await + .unwrap(); + + assert_metric!("apollo_router_http_requests_total", 1, "error" => "custom_error_for_propagation", "my_key" => "my_custom_attribute_from_context", "query_from_request" => "query { test }", "status" => "200", "subgraph" => "my_subgraph_name", "unknown_data" => "default_value"); + }.with_metrics().await; + } + + #[tokio::test] + async fn test_subgraph_metrics_http_error() { + async { + let plugin = + create_plugin_with_config(include_str!("testdata/custom_attributes.router.yaml")).await; + + let mut mock_subgraph_service_in_error = MockSubgraphService::new(); + mock_subgraph_service_in_error + .expect_call() + .times(1) + .returning(move |_req: SubgraphRequest| { + Err(Box::new(FetchError::SubrequestHttpError { + status_code: None, + service: String::from("my_subgraph_name_error"), + reason: String::from("cannot contact the subgraph"), + })) + }); + + let mut subgraph_service = plugin.subgraph_service( + "my_subgraph_name_error", + BoxService::new(mock_subgraph_service_in_error), + ); + + let subgraph_req = SubgraphRequest::fake_builder() + .subgraph_request( + http_ext::Request::fake_builder() + .header("test", "my_value_set") + .body( + Request::fake_builder() + .query(String::from("query { test }")) + .build(), + ) + .build() + .unwrap(), + ) + .build(); + let _subgraph_response = subgraph_service + .ready() + .await + .unwrap() + .call(subgraph_req) + .await + .expect_err("should be an error"); + + assert_metric!("apollo_router_http_requests_total", 1, "message" => "cannot contact the subgraph", "status" => "500", "subgraph" => "my_subgraph_name_error", "subgraph_error_extended_code" => "SUBREQUEST_HTTP_ERROR"); + }.with_metrics().await; + } + + #[tokio::test] + async fn test_subgraph_metrics_bad_request() { + async { + let plugin = + create_plugin_with_config(include_str!("testdata/custom_attributes.router.yaml")).await; + + let mut mock_bad_request_service = MockSupergraphService::new(); + mock_bad_request_service + .expect_call() + .times(1) + .returning(move |req: SupergraphRequest| { + Ok(SupergraphResponse::fake_builder() + .context(req.context) + .status_code(StatusCode::BAD_REQUEST) + .data(json!({"errors": [{"message": "nope"}]})) + .build() + .unwrap()) + }); + + let mut bad_request_supergraph_service = + plugin.supergraph_service(BoxService::new(mock_bad_request_service)); + + let router_req = SupergraphRequest::fake_builder().header("test", "my_value_set"); + + let _router_response = bad_request_supergraph_service + .ready() + .await + .unwrap() + .call(router_req.build().unwrap()) + .await + .unwrap() + .next_response() + .await + .unwrap(); + + assert_metric!("apollo_router_http_requests_total", 1, "another_test" => "my_default_value", "error" => "400 Bad Request", "myname" => "label_value", "renamed_value" => "my_value_set", "status" => "400"); + }.with_metrics().await; + } + + #[tokio::test] + async fn it_test_prometheus_wrong_endpoint() { + async { + let plugin = + create_plugin_with_config(include_str!("testdata/prometheus.router.yaml")).await; + + let mut web_endpoint = plugin + .web_endpoints() + .into_iter() + .next() + .unwrap() + .1 + .into_iter() + .next() + .unwrap() + .into_router(); + + let http_req_prom = http::Request::get("http://localhost:9090/WRONG/URL/metrics") + .body(Default::default()) + .unwrap(); + + let resp = web_endpoint + .ready() + .await + .unwrap() + .call(http_req_prom) + .await + .unwrap(); + assert_eq!(resp.status(), StatusCode::NOT_FOUND); + } + .with_metrics() + .await; + } + + static PROMETHEUS_LOCK: std::sync::OnceLock>> = + std::sync::OnceLock::new(); + + #[tokio::test(flavor = "multi_thread")] + async fn it_test_prometheus_metrics() { + let _lock = lock_prometheus().await; + async { + let plugin = + create_plugin_with_config(include_str!("testdata/prometheus.router.yaml")).await; + make_supergraph_request(plugin.as_ref()).await; + let prometheus_metrics = get_prometheus_metrics(plugin.as_ref()).await; + assert_snapshot!(prometheus_metrics); + } + .with_metrics() + .await; + } + + #[tokio::test(flavor = "multi_thread")] + async fn it_test_prometheus_metrics_custom_buckets() { + let _lock = lock_prometheus().await; + async { + let plugin = create_plugin_with_config(include_str!( + "testdata/prometheus_custom_buckets.router.yaml" + )) + .await; + make_supergraph_request(plugin.as_ref()).await; + let prometheus_metrics = get_prometheus_metrics(plugin.as_ref()).await; + + assert_snapshot!(prometheus_metrics); + } + .with_metrics() + .await; + } + + // Prometheus support has some globals. Ideally we fix the plugin API to allow migration of data across reloads. This means that some tests cannot be run in parallel due to interaction with globals. + async fn lock_prometheus() -> tokio::sync::MutexGuard<'static, ()> { + PROMETHEUS_LOCK + .get_or_init(|| Arc::new(tokio::sync::Mutex::new(()))) + .lock() + .await } #[test] @@ -2817,6 +2519,7 @@ mod tests { #[tokio::test] async fn test_handle_error_throttling() { + let error_map = DashMap::new(); // Set up a fake subscriber so we can check log events. If this is useful then maybe it can be factored out into something reusable #[derive(Default)] struct TestVisitor { @@ -2855,15 +2558,18 @@ mod tests { async { // Log twice rapidly, they should get deduped - handle_error(opentelemetry::global::Error::Other( - "other error".to_string(), - )); - handle_error(opentelemetry::global::Error::Other( - "other error".to_string(), - )); - handle_error(opentelemetry::global::Error::Trace( - "trace error".to_string().into(), - )); + handle_error_internal( + opentelemetry::global::Error::Other("other error".to_string()), + &error_map, + ); + handle_error_internal( + opentelemetry::global::Error::Other("other error".to_string()), + &error_map, + ); + handle_error_internal( + opentelemetry::global::Error::Trace("trace error".to_string().into()), + &error_map, + ); } .with_subscriber(tracing_subscriber::registry().with(test_layer.clone())) .await; @@ -2874,9 +2580,10 @@ mod tests { // Sleep a bit and then log again, it should get logged tokio::time::sleep(Duration::from_millis(200)).await; async { - handle_error(opentelemetry::global::Error::Other( - "other error".to_string(), - )); + handle_error_internal( + opentelemetry::global::Error::Other("other error".to_string()), + &error_map, + ); } .with_subscriber(tracing_subscriber::registry().with(test_layer.clone())) .await; diff --git a/apollo-router/src/plugins/telemetry/otlp.rs b/apollo-router/src/plugins/telemetry/otlp.rs index 5c522721c9..28df409585 100644 --- a/apollo-router/src/plugins/telemetry/otlp.rs +++ b/apollo-router/src/plugins/telemetry/otlp.rs @@ -3,6 +3,8 @@ use std::collections::HashMap; use indexmap::map::Entry; use indexmap::IndexMap; +use opentelemetry::sdk::metrics::reader::TemporalitySelector; +use opentelemetry::sdk::metrics::InstrumentKind; use opentelemetry_otlp::HttpExporterBuilder; use opentelemetry_otlp::TonicExporterBuilder; use opentelemetry_otlp::WithExportConfig; @@ -218,6 +220,29 @@ pub(crate) enum Temporality { Delta, } +pub(crate) struct CustomTemporalitySelector( + pub(crate) opentelemetry::sdk::metrics::data::Temporality, +); + +impl TemporalitySelector for CustomTemporalitySelector { + fn temporality(&self, _kind: InstrumentKind) -> opentelemetry::sdk::metrics::data::Temporality { + self.0 + } +} + +impl From<&Temporality> for Box { + fn from(value: &Temporality) -> Self { + Box::new(match value { + Temporality::Cumulative => CustomTemporalitySelector( + opentelemetry::sdk::metrics::data::Temporality::Cumulative, + ), + Temporality::Delta => { + CustomTemporalitySelector(opentelemetry::sdk::metrics::data::Temporality::Delta) + } + }) + } +} + mod metadata_map_serde { use tonic::metadata::KeyAndValueRef; use tonic::metadata::MetadataKey; diff --git a/apollo-router/src/plugins/telemetry/reload.rs b/apollo-router/src/plugins/telemetry/reload.rs index 2dcf270f41..d24468d2ac 100644 --- a/apollo-router/src/plugins/telemetry/reload.rs +++ b/apollo-router/src/plugins/telemetry/reload.rs @@ -5,7 +5,6 @@ use std::sync::atomic::Ordering; use anyhow::anyhow; use anyhow::Result; use once_cell::sync::OnceCell; -use opentelemetry::metrics::noop::NoopMeterProvider; use opentelemetry::sdk::trace::Tracer; use opentelemetry::trace::TraceContextExt; use opentelemetry::trace::TracerProvider; @@ -30,11 +29,11 @@ use tracing_subscriber::Registry; use super::config::SamplerOption; use super::metrics::span_metrics_exporter::SpanMetricsLayer; use crate::axum_factory::utils::REQUEST_SPAN_NAME; +use crate::metrics::layer::MetricsLayer; +use crate::metrics::meter_provider; use crate::plugins::telemetry::formatters::filter_metric_events; use crate::plugins::telemetry::formatters::text::TextFormatter; use crate::plugins::telemetry::formatters::FilteringFormatter; -use crate::plugins::telemetry::metrics; -use crate::plugins::telemetry::metrics::layer::MetricsLayer; use crate::plugins::telemetry::tracing::reload::ReloadTracer; pub(crate) type LayeredRegistry = Layered; @@ -54,29 +53,25 @@ pub(super) static OPENTELEMETRY_TRACER_HANDLE: OnceCell< ReloadTracer, > = OnceCell::new(); -#[allow(clippy::type_complexity)] -static METRICS_LAYER_HANDLE: OnceCell< - Handle< - MetricsLayer, - Layered< - tracing_subscriber::reload::Layer< - Box + Send + Sync>, - LayeredTracer, - >, - LayeredTracer, - >, - >, -> = OnceCell::new(); - static FMT_LAYER_HANDLE: OnceCell< Handle + Send + Sync>, LayeredTracer>, > = OnceCell::new(); pub(super) static SPAN_SAMPLING_RATE: AtomicU64 = AtomicU64::new(0); +pub(super) static METRICS_LAYER: OnceCell = OnceCell::new(); +pub(crate) fn metrics_layer() -> &'static MetricsLayer { + METRICS_LAYER.get_or_init(|| MetricsLayer::new(meter_provider().clone())) +} + pub(crate) fn init_telemetry(log_level: &str) -> Result<()> { let hot_tracer = ReloadTracer::new( - opentelemetry::sdk::trace::TracerProvider::default().versioned_tracer("noop", None, None), + opentelemetry::sdk::trace::TracerProvider::default().versioned_tracer( + "noop", + None::, + None::, + None, + ), ); let opentelemetry_layer = tracing_opentelemetry::layer() .with_tracer(hot_tracer.clone()) @@ -112,8 +107,7 @@ pub(crate) fn init_telemetry(log_level: &str) -> Result<()> { let (fmt_layer, fmt_handle) = tracing_subscriber::reload::Layer::new(fmt); - let (metrics_layer, metrics_handle) = - tracing_subscriber::reload::Layer::new(MetricsLayer::new(&NoopMeterProvider::default())); + let metrics_layer = metrics_layer(); // Stash the reload handles so that we can hot reload later OPENTELEMETRY_TRACER_HANDLE @@ -127,16 +121,13 @@ pub(crate) fn init_telemetry(log_level: &str) -> Result<()> { .with(SpanMetricsLayer::default()) .with(opentelemetry_layer) .with(fmt_layer) - .with(metrics_layer) + .with(metrics_layer.clone()) .with(EnvFilter::try_new(log_level)?) .try_init()?; Ok(hot_tracer) }) .map_err(|e: BoxError| anyhow!("failed to set OpenTelemetry tracer: {e}"))?; - METRICS_LAYER_HANDLE - .set(metrics_handle) - .map_err(|_| anyhow!("failed to set metrics layer handle"))?; FMT_LAYER_HANDLE .set(fmt_handle) .map_err(|_| anyhow!("failed to set fmt layer handle"))?; @@ -144,16 +135,6 @@ pub(crate) fn init_telemetry(log_level: &str) -> Result<()> { Ok(()) } -pub(super) fn reload_metrics(layer: MetricsLayer) { - if let Some(handle) = METRICS_LAYER_HANDLE.get() { - // If we are now going live with a new controller then maybe stash it. - metrics::prometheus::commit_new_controller(); - handle - .reload(layer) - .expect("metrics layer reload must succeed"); - } -} - pub(super) fn reload_fmt(layer: Box + Send + Sync>) { if let Some(handle) = FMT_LAYER_HANDLE.get() { handle.reload(layer).expect("fmt layer reload must succeed"); diff --git a/apollo-router/src/plugins/telemetry/snapshots/apollo_router__plugins__telemetry__tests__it_test_prometheus_metrics.snap b/apollo-router/src/plugins/telemetry/snapshots/apollo_router__plugins__telemetry__tests__it_test_prometheus_metrics.snap index 4608c849dc..0f56d4f32a 100644 --- a/apollo-router/src/plugins/telemetry/snapshots/apollo_router__plugins__telemetry__tests__it_test_prometheus_metrics.snap +++ b/apollo-router/src/plugins/telemetry/snapshots/apollo_router__plugins__telemetry__tests__it_test_prometheus_metrics.snap @@ -1,8 +1,17 @@ --- source: apollo-router/src/plugins/telemetry/mod.rs -expression: prom_metrics +expression: prometheus_metrics --- -apollo_router_http_request_duration_seconds_count{another_test="my_default_value",error="400 Bad Request",myname="label_value",renamed_value="my_value_set",service_name="apollo-router",status="400",otel_scope_name="apollo/router",otel_scope_version=""} 1 -apollo_router_http_request_duration_seconds_count{another_test="my_default_value",my_value="2",myname="label_value",renamed_value="my_value_set",service_name="apollo-router",status="200",x_custom="coming_from_header",otel_scope_name="apollo/router",otel_scope_version=""} 1 -apollo_router_http_request_duration_seconds_count{error="INTERNAL_SERVER_ERROR",my_key="my_custom_attribute_from_context",query_from_request="query { test }",service_name="apollo-router",status="200",subgraph="my_subgraph_name",unknown_data="default_value",otel_scope_name="apollo/router",otel_scope_version=""} 1 -apollo_router_http_request_duration_seconds_count{message="cannot contact the subgraph",service_name="apollo-router",status="500",subgraph="my_subgraph_name_error",subgraph_error_extended_code="SUBREQUEST_HTTP_ERROR",otel_scope_name="apollo/router",otel_scope_version=""} 1 +apollo_router_http_requests_duration_bucket{status="200",otel_scope_name="apollo/router",le="+Inf"} 1 +apollo_router_http_requests_duration_bucket{status="200",otel_scope_name="apollo/router",le="0.001"} 1 +apollo_router_http_requests_duration_bucket{status="200",otel_scope_name="apollo/router",le="0.005"} 1 +apollo_router_http_requests_duration_bucket{status="200",otel_scope_name="apollo/router",le="0.015"} 1 +apollo_router_http_requests_duration_bucket{status="200",otel_scope_name="apollo/router",le="0.05"} 1 +apollo_router_http_requests_duration_bucket{status="200",otel_scope_name="apollo/router",le="0.1"} 1 +apollo_router_http_requests_duration_bucket{status="200",otel_scope_name="apollo/router",le="0.2"} 1 +apollo_router_http_requests_duration_bucket{status="200",otel_scope_name="apollo/router",le="0.3"} 1 +apollo_router_http_requests_duration_bucket{status="200",otel_scope_name="apollo/router",le="0.4"} 1 +apollo_router_http_requests_duration_bucket{status="200",otel_scope_name="apollo/router",le="0.5"} 1 +apollo_router_http_requests_duration_bucket{status="200",otel_scope_name="apollo/router",le="1"} 1 +apollo_router_http_requests_duration_bucket{status="200",otel_scope_name="apollo/router",le="10"} 1 +apollo_router_http_requests_duration_bucket{status="200",otel_scope_name="apollo/router",le="5"} 1 diff --git a/apollo-router/src/plugins/telemetry/snapshots/apollo_router__plugins__telemetry__tests__it_test_prometheus_metrics_custom_buckets.snap b/apollo-router/src/plugins/telemetry/snapshots/apollo_router__plugins__telemetry__tests__it_test_prometheus_metrics_custom_buckets.snap index eecbdbd9fe..d60cea7aee 100644 --- a/apollo-router/src/plugins/telemetry/snapshots/apollo_router__plugins__telemetry__tests__it_test_prometheus_metrics_custom_buckets.snap +++ b/apollo-router/src/plugins/telemetry/snapshots/apollo_router__plugins__telemetry__tests__it_test_prometheus_metrics_custom_buckets.snap @@ -1,20 +1,8 @@ --- source: apollo-router/src/plugins/telemetry/mod.rs -expression: prom_metrics +expression: prometheus_metrics --- -apollo_router_http_request_duration_seconds_bucket{another_test="my_default_value",error="400 Bad Request",myname="label_value",renamed_value="my_value_set",service_name="apollo-router",status="400",otel_scope_name="apollo/router",otel_scope_version="",le="+Inf"} 1 -apollo_router_http_request_duration_seconds_bucket{another_test="my_default_value",error="400 Bad Request",myname="label_value",renamed_value="my_value_set",service_name="apollo-router",status="400",otel_scope_name="apollo/router",otel_scope_version="",le="10"} 1 -apollo_router_http_request_duration_seconds_bucket{another_test="my_default_value",error="400 Bad Request",myname="label_value",renamed_value="my_value_set",service_name="apollo-router",status="400",otel_scope_name="apollo/router",otel_scope_version="",le="20"} 1 -apollo_router_http_request_duration_seconds_bucket{another_test="my_default_value",error="400 Bad Request",myname="label_value",renamed_value="my_value_set",service_name="apollo-router",status="400",otel_scope_name="apollo/router",otel_scope_version="",le="5"} 1 -apollo_router_http_request_duration_seconds_bucket{another_test="my_default_value",my_value="2",myname="label_value",renamed_value="my_value_set",service_name="apollo-router",status="200",x_custom="coming_from_header",otel_scope_name="apollo/router",otel_scope_version="",le="+Inf"} 1 -apollo_router_http_request_duration_seconds_bucket{another_test="my_default_value",my_value="2",myname="label_value",renamed_value="my_value_set",service_name="apollo-router",status="200",x_custom="coming_from_header",otel_scope_name="apollo/router",otel_scope_version="",le="10"} 1 -apollo_router_http_request_duration_seconds_bucket{another_test="my_default_value",my_value="2",myname="label_value",renamed_value="my_value_set",service_name="apollo-router",status="200",x_custom="coming_from_header",otel_scope_name="apollo/router",otel_scope_version="",le="20"} 1 -apollo_router_http_request_duration_seconds_bucket{another_test="my_default_value",my_value="2",myname="label_value",renamed_value="my_value_set",service_name="apollo-router",status="200",x_custom="coming_from_header",otel_scope_name="apollo/router",otel_scope_version="",le="5"} 1 -apollo_router_http_request_duration_seconds_bucket{error="INTERNAL_SERVER_ERROR",my_key="my_custom_attribute_from_context",query_from_request="query { test }",service_name="apollo-router",status="200",subgraph="my_subgraph_name",unknown_data="default_value",otel_scope_name="apollo/router",otel_scope_version="",le="+Inf"} 1 -apollo_router_http_request_duration_seconds_bucket{error="INTERNAL_SERVER_ERROR",my_key="my_custom_attribute_from_context",query_from_request="query { test }",service_name="apollo-router",status="200",subgraph="my_subgraph_name",unknown_data="default_value",otel_scope_name="apollo/router",otel_scope_version="",le="10"} 1 -apollo_router_http_request_duration_seconds_bucket{error="INTERNAL_SERVER_ERROR",my_key="my_custom_attribute_from_context",query_from_request="query { test }",service_name="apollo-router",status="200",subgraph="my_subgraph_name",unknown_data="default_value",otel_scope_name="apollo/router",otel_scope_version="",le="20"} 1 -apollo_router_http_request_duration_seconds_bucket{error="INTERNAL_SERVER_ERROR",my_key="my_custom_attribute_from_context",query_from_request="query { test }",service_name="apollo-router",status="200",subgraph="my_subgraph_name",unknown_data="default_value",otel_scope_name="apollo/router",otel_scope_version="",le="5"} 1 -apollo_router_http_request_duration_seconds_bucket{message="cannot contact the subgraph",service_name="apollo-router",status="500",subgraph="my_subgraph_name_error",subgraph_error_extended_code="SUBREQUEST_HTTP_ERROR",otel_scope_name="apollo/router",otel_scope_version="",le="+Inf"} 1 -apollo_router_http_request_duration_seconds_bucket{message="cannot contact the subgraph",service_name="apollo-router",status="500",subgraph="my_subgraph_name_error",subgraph_error_extended_code="SUBREQUEST_HTTP_ERROR",otel_scope_name="apollo/router",otel_scope_version="",le="10"} 1 -apollo_router_http_request_duration_seconds_bucket{message="cannot contact the subgraph",service_name="apollo-router",status="500",subgraph="my_subgraph_name_error",subgraph_error_extended_code="SUBREQUEST_HTTP_ERROR",otel_scope_name="apollo/router",otel_scope_version="",le="20"} 1 -apollo_router_http_request_duration_seconds_bucket{message="cannot contact the subgraph",service_name="apollo-router",status="500",subgraph="my_subgraph_name_error",subgraph_error_extended_code="SUBREQUEST_HTTP_ERROR",otel_scope_name="apollo/router",otel_scope_version="",le="5"} 1 +apollo_router_http_requests_duration_bucket{status="200",otel_scope_name="apollo/router",le="+Inf"} 1 +apollo_router_http_requests_duration_bucket{status="200",otel_scope_name="apollo/router",le="10"} 1 +apollo_router_http_requests_duration_bucket{status="200",otel_scope_name="apollo/router",le="20"} 1 +apollo_router_http_requests_duration_bucket{status="200",otel_scope_name="apollo/router",le="5"} 1 diff --git a/apollo-router/src/plugins/telemetry/testdata/config.router.yaml b/apollo-router/src/plugins/telemetry/testdata/config.router.yaml new file mode 100644 index 0000000000..598d8d80ed --- /dev/null +++ b/apollo-router/src/plugins/telemetry/testdata/config.router.yaml @@ -0,0 +1,101 @@ +telemetry: + tracing: + trace_config: + service_name: router + attributes: + str: a + int: 1 + float: 1 + bool: true + str_arr: + - a + - b + int_arr: + - 1 + - 2 + float_arr: + - 1 + - 2 + bool_arr: + - true + - false + metrics: + common: + attributes: + supergraph: + static: + - name: myname + value: label_value + request: + header: + - named: test + default: default_value + rename: renamed_value + body: + - path: .data.test + name: my_new_name + default: default_value + response: + header: + - named: test + default: default_value + rename: renamed_value + - named: test + default: default_value + rename: renamed_value + body: + - path: .data.test + name: my_new_name + default: default_value + subgraph: + all: + static: + - name: myname + value: label_value + request: + header: + - named: test + default: default_value + rename: renamed_value + body: + - path: .data.test + name: my_new_name + default: default_value + response: + header: + - named: test + default: default_value + rename: renamed_value + - named: test + default: default_value + rename: renamed_value + body: + - path: .data.test + name: my_new_name + default: default_value + subgraphs: + subgraph_name_test: + static: + - name: myname + value: label_value + request: + header: + - named: test + default: default_value + rename: renamed_value + body: + - path: .data.test + name: my_new_name + default: default_value + response: + header: + - named: test + default: default_value + rename: renamed_value + - named: test + default: default_value + rename: renamed_value + body: + - path: .data.test + name: my_new_name + default: default_value diff --git a/apollo-router/src/plugins/telemetry/testdata/custom_attributes.router.yaml b/apollo-router/src/plugins/telemetry/testdata/custom_attributes.router.yaml new file mode 100644 index 0000000000..4310d122d1 --- /dev/null +++ b/apollo-router/src/plugins/telemetry/testdata/custom_attributes.router.yaml @@ -0,0 +1,51 @@ +telemetry: + apollo: + client_name_header: name_header + client_version_header: version_header + metrics: + common: + service_name: apollo-router + attributes: + supergraph: + static: + - name: myname + value: label_value + request: + header: + - named: test + default: default_value + rename: renamed_value + - named: another_test + default: my_default_value + response: + header: + - named: x-custom + body: + - path: .data.data.my_value + name: my_value + subgraph: + all: + errors: + include_messages: true + extensions: + - name: subgraph_error_extended_code + path: .code + - name: message + path: .reason + subgraphs: + my_subgraph_name: + request: + body: + - path: .query + name: query_from_request + - path: .data + name: unknown_data + default: default_value + - path: .data2 + name: unknown_data_bis + response: + body: + - path: .errors[0].extensions.status + name: error + context: + - named: my_key diff --git a/apollo-router/src/plugins/telemetry/testdata/prometheus.router.yaml b/apollo-router/src/plugins/telemetry/testdata/prometheus.router.yaml new file mode 100644 index 0000000000..47637813ab --- /dev/null +++ b/apollo-router/src/plugins/telemetry/testdata/prometheus.router.yaml @@ -0,0 +1,7 @@ +telemetry: + apollo: + client_name_header: name_header + client_version_header: version_header + metrics: + prometheus: + enabled: true diff --git a/apollo-router/src/plugins/telemetry/testdata/prometheus_custom_buckets.router.yaml b/apollo-router/src/plugins/telemetry/testdata/prometheus_custom_buckets.router.yaml new file mode 100644 index 0000000000..e96a4b5d71 --- /dev/null +++ b/apollo-router/src/plugins/telemetry/testdata/prometheus_custom_buckets.router.yaml @@ -0,0 +1,13 @@ +telemetry: + apollo: + client_name_header: name_header + client_version_header: version_header + metrics: + common: + service_name: apollo-router + buckets: + - 5 + - 10 + - 20 + prometheus: + enabled: true diff --git a/apollo-router/src/plugins/telemetry/tracing/apollo_telemetry.rs b/apollo-router/src/plugins/telemetry/tracing/apollo_telemetry.rs index 25f260977c..2b725bc938 100644 --- a/apollo-router/src/plugins/telemetry/tracing/apollo_telemetry.rs +++ b/apollo-router/src/plugins/telemetry/tracing/apollo_telemetry.rs @@ -23,7 +23,6 @@ use opentelemetry::trace::SpanId; use opentelemetry::trace::TraceError; use opentelemetry::Key; use opentelemetry::Value; -use opentelemetry_semantic_conventions::trace::HTTP_METHOD; use prost::Message; use serde::de::DeserializeOwned; use thiserror::Error; @@ -74,6 +73,8 @@ use crate::query_planner::FLATTEN_SPAN_NAME; use crate::query_planner::PARALLEL_SPAN_NAME; use crate::query_planner::SEQUENCE_SPAN_NAME; +// TODO Remove this and use otel constants again https://github.com/apollographql/router/issues/3833 +const HTTP_METHOD: Key = Key::from_static_str("http.method"); const APOLLO_PRIVATE_REQUEST: Key = Key::from_static_str("apollo_private.request"); pub(crate) const APOLLO_PRIVATE_DURATION_NS: &str = "apollo_private.duration_ns"; const APOLLO_PRIVATE_DURATION_NS_KEY: Key = Key::from_static_str(APOLLO_PRIVATE_DURATION_NS); diff --git a/apollo-router/src/plugins/telemetry/tracing/jaeger.rs b/apollo-router/src/plugins/telemetry/tracing/jaeger.rs index d7650c76b7..ac123a0e6c 100644 --- a/apollo-router/src/plugins/telemetry/tracing/jaeger.rs +++ b/apollo-router/src/plugins/telemetry/tracing/jaeger.rs @@ -1,14 +1,9 @@ //! Configuration for jaeger tracing. use std::fmt::Debug; -use opentelemetry::sdk::export::trace::SpanData; +use opentelemetry::runtime; use opentelemetry::sdk::trace::BatchSpanProcessor; use opentelemetry::sdk::trace::Builder; -use opentelemetry::sdk::trace::Span; -use opentelemetry::sdk::trace::SpanProcessor; -use opentelemetry::sdk::trace::TracerProvider; -use opentelemetry::trace::TraceResult; -use opentelemetry::Context; use schemars::JsonSchema; use serde::Deserialize; use serde::Serialize; @@ -102,7 +97,7 @@ impl TracingConfigurator for Config { tracing::info!("Configuring Jaeger tracing: {}", batch_processor); // We are waiting for a release of https://github.com/open-telemetry/opentelemetry-rust/issues/894 // Until that time we need to wrap a tracer provider with Jeager in. - let tracer_provider = opentelemetry_jaeger::new_collector_pipeline() + let exporter = opentelemetry_jaeger::new_collector_pipeline() .with_trace_config(trace_config.into()) .with_service_name(trace_config.service_name.clone()) .with(&collector.username, |b, u| b.with_username(u)) @@ -110,34 +105,13 @@ impl TracingConfigurator for Config { .with_endpoint(&collector.endpoint.to_string()) .with_reqwest() .with_batch_processor_config(batch_processor.clone().into()) - .build_batch(opentelemetry::runtime::Tokio)?; - Ok(builder - .with_span_processor(DelegateSpanProcessor { tracer_provider }.filtered())) + .build_collector_exporter::()?; + Ok(builder.with_span_processor( + BatchSpanProcessor::builder(exporter, opentelemetry::runtime::Tokio) + .with_batch_config(batch_processor.clone().into()) + .build(), + )) } } } } - -#[derive(Debug)] -struct DelegateSpanProcessor { - tracer_provider: TracerProvider, -} - -impl SpanProcessor for DelegateSpanProcessor { - fn on_start(&self, span: &mut Span, cx: &Context) { - self.tracer_provider.span_processors()[0].on_start(span, cx) - } - - fn on_end(&self, span: SpanData) { - self.tracer_provider.span_processors()[0].on_end(span) - } - - fn force_flush(&self) -> TraceResult<()> { - self.tracer_provider.span_processors()[0].force_flush() - } - - fn shutdown(&mut self) -> TraceResult<()> { - // It's safe to not call shutdown as dropping tracer_provider will cause shutdown to happen separately. - Ok(()) - } -} diff --git a/apollo-router/src/plugins/telemetry/tracing/reload.rs b/apollo-router/src/plugins/telemetry/tracing/reload.rs index fcf7d1a395..7c3aa1a5c0 100644 --- a/apollo-router/src/plugins/telemetry/tracing/reload.rs +++ b/apollo-router/src/plugins/telemetry/tracing/reload.rs @@ -22,14 +22,14 @@ impl PreSampledTracer for ReloadTracer { .sampled_context(data) } - fn new_trace_id(&self) -> opentelemetry::trace::TraceId { + fn new_trace_id(&self) -> opentelemetry_api::trace::TraceId { self.parent .read() .expect("parent tracer must be available") .new_trace_id() } - fn new_span_id(&self) -> opentelemetry::trace::SpanId { + fn new_span_id(&self) -> opentelemetry_api::trace::SpanId { self.parent .read() .expect("parent tracer must be available") diff --git a/apollo-router/src/tracer.rs b/apollo-router/src/tracer.rs index 2084f4be9b..783254bdcc 100644 --- a/apollo-router/src/tracer.rs +++ b/apollo-router/src/tracer.rs @@ -54,7 +54,7 @@ mod test { use std::sync::Mutex; use once_cell::sync::Lazy; - use opentelemetry::sdk::export::trace::stdout; + use opentelemetry_api::trace::TracerProvider; use tracing_subscriber::layer::SubscriberExt; use tracing_subscriber::Registry; @@ -85,13 +85,22 @@ mod test { assert!(other_id == my_id); } - #[test] - fn it_returns_valid_trace_id() { + #[tokio::test] + async fn it_returns_valid_trace_id() { let _guard = TRACING_LOCK .lock() .unwrap_or_else(|poisoned| poisoned.into_inner()); // Create a tracing layer with the configured tracer - let tracer = stdout::new_pipeline().install_simple(); + + let provider = opentelemetry::sdk::trace::TracerProvider::builder() + .with_simple_exporter( + opentelemetry_stdout::SpanExporter::builder() + .with_writer(std::io::stdout()) + .build(), + ) + .build(); + let tracer = provider.versioned_tracer("noop", None::, None::, None); + let telemetry = tracing_opentelemetry::layer().with_tracer(tracer); // Use the tracing subscriber `Registry`, or any other subscriber // that impls `LookupSpan` @@ -112,7 +121,10 @@ mod test { let my_id = TraceId::maybe_new(); assert!(my_id.is_none()); // Create a tracing layer with the configured tracer - let tracer = stdout::new_pipeline().install_simple(); + let provider = opentelemetry::sdk::trace::TracerProvider::builder() + .with_simple_exporter(opentelemetry_stdout::SpanExporter::default()) + .build(); + let tracer = provider.versioned_tracer("noop", None::, None::, None); let telemetry = tracing_opentelemetry::layer().with_tracer(tracer); // Use the tracing subscriber `Registry`, or any other subscriber // that impls `LookupSpan` @@ -134,7 +146,10 @@ mod test { .lock() .unwrap_or_else(|poisoned| poisoned.into_inner()); // Create a tracing layer with the configured tracer - let tracer = stdout::new_pipeline().install_simple(); + let provider = opentelemetry::sdk::trace::TracerProvider::builder() + .with_simple_exporter(opentelemetry_stdout::SpanExporter::default()) + .build(); + let tracer = provider.versioned_tracer("noop", None::, None::, None); let telemetry = tracing_opentelemetry::layer().with_tracer(tracer); // Use the tracing subscriber `Registry`, or any other subscriber // that impls `LookupSpan` diff --git a/apollo-router/tests/metrics_tests.rs b/apollo-router/tests/metrics_tests.rs index ff089008fd..5dc48a2aed 100644 --- a/apollo-router/tests/metrics_tests.rs +++ b/apollo-router/tests/metrics_tests.rs @@ -43,8 +43,8 @@ async fn test_metrics_reloading() -> Result<(), BoxError> { router.assert_reloaded().await; } - router.assert_metrics_contains(r#"apollo_router_cache_hit_count_total{kind="query planner",service_name="apollo-router",storage="memory",otel_scope_name="apollo/router",otel_scope_version=""} 4"#, None).await; - router.assert_metrics_contains(r#"apollo_router_cache_miss_count_total{kind="query planner",service_name="apollo-router",storage="memory",otel_scope_name="apollo/router",otel_scope_version=""} 2"#, None).await; + router.assert_metrics_contains(r#"apollo_router_cache_hit_count_total{kind="query planner",storage="memory",otel_scope_name="apollo/router"} 4"#, None).await; + router.assert_metrics_contains(r#"apollo_router_cache_miss_count_total{kind="query planner",storage="memory",otel_scope_name="apollo/router"} 2"#, None).await; router .assert_metrics_contains(r#"apollo_router_cache_hit_time"#, None) .await; @@ -65,8 +65,8 @@ async fn test_metrics_reloading() -> Result<(), BoxError> { .await; if std::env::var("APOLLO_KEY").is_ok() && std::env::var("APOLLO_GRAPH_REF").is_ok() { - router.assert_metrics_contains(r#"apollo_router_uplink_fetch_duration_seconds_count{kind="unchanged",query="License",service_name="apollo-router",url="https://uplink.api.apollographql.com/",otel_scope_name="apollo/router",otel_scope_version=""}"#, Some(Duration::from_secs(120))).await; - router.assert_metrics_contains(r#"apollo_router_uplink_fetch_count_total{query="License",service_name="apollo-router",status="success",otel_scope_name="apollo/router",otel_scope_version=""}"#, Some(Duration::from_secs(1))).await; + router.assert_metrics_contains(r#"apollo_router_uplink_fetch_duration_seconds_count{kind="unchanged",query="License",url="https://uplink.api.apollographql.com/",otel_scope_name="apollo/router"}"#, Some(Duration::from_secs(120))).await; + router.assert_metrics_contains(r#"apollo_router_uplink_fetch_count_total{query="License",status="success",otel_scope_name="apollo/router"}"#, Some(Duration::from_secs(1))).await; } Ok(()) @@ -107,7 +107,7 @@ async fn test_subgraph_auth_metrics() -> Result<(), BoxError> { .unwrap() ); - router.assert_metrics_contains(r#"apollo_router_operations_authentication_aws_sigv4_total{authentication_aws_sigv4_failed="false",service_name="apollo-router",subgraph_service_name="products",otel_scope_name="apollo/router",otel_scope_version=""} 2"#, None).await; + router.assert_metrics_contains(r#"apollo_router_operations_authentication_aws_sigv4_total{authentication_aws_sigv4_failed="false",subgraph_service_name="products",otel_scope_name="apollo/router"} 2"#, None).await; Ok(()) } diff --git a/apollo-router/tests/snapshots/tracing_tests__traced_basic_composition.snap b/apollo-router/tests/snapshots/tracing_tests__traced_basic_composition.snap index 8c976a59d0..cf291813c3 100644 --- a/apollo-router/tests/snapshots/tracing_tests__traced_basic_composition.snap +++ b/apollo-router/tests/snapshots/tracing_tests__traced_basic_composition.snap @@ -200,14 +200,6 @@ expression: get_spans() [ "apollo_private.operation_signature", "# -\n{topProducts{name reviews{author{id name}id product{name}}upc}}" - ], - [ - "monotonic_counter.apollo.router.operations", - 1 - ], - [ - "http.response.status_code", - 200 ] ], "metadata": { diff --git a/apollo-router/tests/snapshots/tracing_tests__traced_basic_request.snap b/apollo-router/tests/snapshots/tracing_tests__traced_basic_request.snap index e5c11265d4..bdb77d329c 100644 --- a/apollo-router/tests/snapshots/tracing_tests__traced_basic_request.snap +++ b/apollo-router/tests/snapshots/tracing_tests__traced_basic_request.snap @@ -200,14 +200,6 @@ expression: get_spans() [ "apollo_private.operation_signature", "# -\n{topProducts{name name}}" - ], - [ - "monotonic_counter.apollo.router.operations", - 1 - ], - [ - "http.response.status_code", - 200 ] ], "metadata": { diff --git a/apollo-router/tests/snapshots/tracing_tests__variables.snap b/apollo-router/tests/snapshots/tracing_tests__variables.snap index 6ea9d10fa8..0d300d41b0 100644 --- a/apollo-router/tests/snapshots/tracing_tests__variables.snap +++ b/apollo-router/tests/snapshots/tracing_tests__variables.snap @@ -188,14 +188,6 @@ expression: get_spans() [ "apollo_private.operation_signature", "# ExampleQuery\nquery ExampleQuery($reviewsForAuthorAuthorId:ID!,$topProductsFirst:Int){topProducts(first:$topProductsFirst){name reviewsForAuthor(authorID:$reviewsForAuthorAuthorId){author{id name}body}}}" - ], - [ - "monotonic_counter.apollo.router.operations", - 1 - ], - [ - "http.response.status_code", - 400 ] ], "metadata": { From b85c699784364ae6c5f573a5ac87c93ea48959ed Mon Sep 17 00:00:00 2001 From: bryn Date: Mon, 18 Sep 2023 17:03:30 +0100 Subject: [PATCH 02/30] Add comment --- apollo-router/src/plugins/telemetry/mod.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/apollo-router/src/plugins/telemetry/mod.rs b/apollo-router/src/plugins/telemetry/mod.rs index 221293f216..81f66cccd6 100644 --- a/apollo-router/src/plugins/telemetry/mod.rs +++ b/apollo-router/src/plugins/telemetry/mod.rs @@ -1690,6 +1690,11 @@ impl Telemetry { fn safe_shutdown_meter_provider(meter_provider: &mut Option) { if Handle::try_current().is_ok() { if let Some(meter_provider) = meter_provider.take() { + // This is a thread for a reason! + // Tokio doesn't finish executing tasks before termination https://github.com/tokio-rs/tokio/issues/1156. + // This means that if the runtime is shutdown there is potentially a race where the provider may not be flushed. + // By using a thread it doesn't matter if the tokio runtime is shut down. + // This is likely to happen in tests due to the tokio runtime being destroyed when the test method exits. thread::spawn(move || { if let Err(e) = meter_provider.shutdown() { ::tracing::error!(error = %e, "failed to shutdown meter provider") From e4ab716a45a18350f10b8de22c7b338370069bcc Mon Sep 17 00:00:00 2001 From: bryn Date: Tue, 19 Sep 2023 09:27:44 +0100 Subject: [PATCH 03/30] Make creation of registered metrics atomic otherwise there would be a risk of the aggregate meter provider being cleared after a meter has been created but before it has been registered. --- apollo-router/src/metrics/aggregation.rs | 51 +++++++++++++++++------- apollo-router/src/metrics/mod.rs | 8 +--- 2 files changed, 38 insertions(+), 21 deletions(-) diff --git a/apollo-router/src/metrics/aggregation.rs b/apollo-router/src/metrics/aggregation.rs index e42fdde123..89309d6ba6 100644 --- a/apollo-router/src/metrics/aggregation.rs +++ b/apollo-router/src/metrics/aggregation.rs @@ -2,6 +2,7 @@ use std::any::Any; use std::borrow::Cow; use std::collections::HashMap; use std::mem; +use std::ops::DerefMut; use std::sync::Arc; use std::sync::Mutex; @@ -50,7 +51,7 @@ pub(crate) struct AggregateMeterProvider { } #[derive(Default)] -struct Inner { +pub(crate) struct Inner { providers: HashMap)>, registered_instruments: Vec, } @@ -113,18 +114,18 @@ impl AggregateMeterProvider { } } - /// Register an instrument. This enables caching at callsites and invalidation at the meter provider via weak reference. + /// Create a registered instrument. This enables caching at callsites and invalidation at the meter provider via weak reference. #[allow(dead_code)] - pub(crate) fn register_instrument(&self, instrument: T) -> Arc + pub(crate) fn create_registered_instrument( + &self, + create_fn: impl Fn(&mut Inner) -> T, + ) -> Arc where Arc: Into, { - let instrument = Arc::new(instrument); - self.inner - .lock() - .expect("lock poisoned") - .registered_instruments - .push(instrument.clone().into()); + let mut guard = self.inner.lock().expect("lock poisoned"); + let instrument = Arc::new((create_fn)(guard.deref_mut())); + guard.registered_instruments.push(instrument.clone().into()); instrument } @@ -138,9 +139,17 @@ impl AggregateMeterProvider { } } -impl MeterProvider for AggregateMeterProvider { - fn versioned_meter( - &self, +impl Inner { + pub(crate) fn meter(&mut self, name: impl Into>) -> Meter { + self.versioned_meter( + name, + None::>, + None::>, + None, + ) + } + pub(crate) fn versioned_meter( + &mut self, name: impl Into>, version: Option>>, schema_url: Option>>, @@ -149,10 +158,9 @@ impl MeterProvider for AggregateMeterProvider { let name = name.into(); let version = version.map(|v| v.into()); let schema_url = schema_url.map(|v| v.into()); - let mut meters = Vec::new(); - let mut inner = self.inner.lock().expect("lock poisoned"); - for (provider, existing_meters) in inner.providers.values_mut() { + + for (provider, existing_meters) in self.providers.values_mut() { meters.push( existing_meters .entry(MeterId { @@ -176,6 +184,19 @@ impl MeterProvider for AggregateMeterProvider { } } +impl MeterProvider for AggregateMeterProvider { + fn versioned_meter( + &self, + name: impl Into>, + version: Option>>, + schema_url: Option>>, + attributes: Option>, + ) -> Meter { + let mut inner = self.inner.lock().expect("lock poisoned"); + inner.versioned_meter(name, version, schema_url, attributes) + } +} + pub(crate) struct AggregateInstrumentProvider { meters: Vec, } diff --git a/apollo-router/src/metrics/mod.rs b/apollo-router/src/metrics/mod.rs index 07b3361a2f..c147acfb94 100644 --- a/apollo-router/src/metrics/mod.rs +++ b/apollo-router/src/metrics/mod.rs @@ -558,9 +558,7 @@ macro_rules! metric { let mut instrument_guard = INSTRUMENT_CACHE .get_or_init(|| { let meter_provider = crate::metrics::meter_provider(); - let meter = opentelemetry::metrics::MeterProvider::meter(&meter_provider, "apollo/router"); - let instrument = meter.[<$ty _ $instrument>]($name).with_description($description).init(); - let instrument_ref = meter_provider.register_instrument(instrument); + let instrument_ref = meter_provider.create_registered_instrument(|p| p.meter("apollo/router").[<$ty _ $instrument>]($name).with_description($description).init()); std::sync::Mutex::new(std::sync::Arc::downgrade(&instrument_ref)) }) .lock() @@ -572,9 +570,7 @@ macro_rules! metric { } else { // Slow path, we need to obtain the instrument again. let meter_provider = crate::metrics::meter_provider(); - let meter = opentelemetry::metrics::MeterProvider::meter(&meter_provider, "apollo/router"); - let instrument = meter.[<$ty _ $instrument>]($name).with_description($description).init(); - let instrument_ref = meter_provider.register_instrument(instrument); + let instrument_ref = meter_provider.create_registered_instrument(|p| p.meter("apollo/router").[<$ty _ $instrument>]($name).with_description($description).init()); *instrument_guard = std::sync::Arc::downgrade(&instrument_ref); // We've updated the instrument and got a strong reference to it. We can drop the mutex guard now. drop(instrument_guard); From 8ede8111ece5b9678714dfe1ee425a763314bb36 Mon Sep 17 00:00:00 2001 From: bryn Date: Tue, 19 Sep 2023 12:33:04 +0100 Subject: [PATCH 04/30] Add dev docs --- dev-docs/metrics.md | 122 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 122 insertions(+) create mode 100644 dev-docs/metrics.md diff --git a/dev-docs/metrics.md b/dev-docs/metrics.md new file mode 100644 index 0000000000..36420c05fe --- /dev/null +++ b/dev-docs/metrics.md @@ -0,0 +1,122 @@ +# Metrics + +The Router uses OpenTelemetry metrics to support Prometheus and OTLP exporters. + +## Requirements +* Filtering of metrics to Public and Private exporters. This is to support Apollo only metrics and to exclude sending of legacy metrics to Apollo. +* Mutliple exporters - Prometheus and OTLP. +* Prometheus metrics must persist across reloads. +* Metrics must be testable. + +## Entities +```mermaid + +erDiagram + + callsite-tracing ||--|{ metrics-layer : uses + callsite-macro ||--|{ aggregate-meter-provider : uses + callsite-macro ||--|{ instrument : mutates + + metrics-layer ||--|| aggregate-meter-provider : uses + metrics-layer ||--|{ instrument : mutates + + telemetry-plugin ||--|| metrics-layer : clears + telemetry-plugin ||--|| aggregate-meter-provider : configures + + aggregate-meter-provider ||--|| public-filtered-meter-provider : creates + aggregate-meter-provider ||--|| public-filtered-prometheus-meter-provider : creates + aggregate-meter-provider ||--|| private-filtered-meter-provider : creates + + public-filtered-meter-provider ||--|{ public-meter : creates + public-filtered-prometheus-meter-provider ||--|{ public-prometheus-meter : creates + private-filtered-meter-provider ||--|{ private-meter : creates + + public-meter-provider ||--|{ public-meter : creates + public-prometheus-meter-provider ||--|{ public-prometheus-meter : creates + private-meter-provider ||--|{ private-meter : creates + + public-meter ||--|{ instrument : creates + public-prometheus-meter ||--|{ instrument : creates + private-meter ||--|{ instrument : creates + + instrument + + exporter ||--|{ public-meter : observes + prometheus-exporter ||--|{ public-prometheus-meter : observes + private-otlp-exporter ||--|{ private-meter : observes +``` + +### Instrument +A histogram or counter that is used to record metrics. + +### Meter +Creates instruments, also contains a reference to exporters so that when instruments are created the + +### Meter provider +Creates meters + +### Filter meter provider +Depending on a meter name will return no-op or delegate to a meter provider. Used to filter public vs private metrics. + +### Aggregate meter provider +A meter provider that wraps public, public prometheus, and private meter providers. Used to create a single meter provider that can be used by the metrics layer and metrics macros. + +### Metrics layer +The tracing-opentelemetry layer that is used to create instruments and meters. This will cache instruments after they have been created. + +### Metrics macros +New macros that will be used for metrics going forward. Allows unit testing of metrics. + +## Design gotchas +The metrics code is substantial, however there are reasons that it is structured in the way that it is. + +1. There is no way to filter instruments at the exporter level. This is the reason that we have aggregate meter providers that wrap the public, public prometheus, and private meter providers. This allows us to filter out private metrics at the meter provider level. +2. The meter provider and meter layer are both globals. This has made testing hard. The new metrics macros should be used as they have built in support for testing by moving the meter provider to a task or thread local. +3. Prometheus meters need to be kept around across reloads otherwise metrics are reset. This is why the aggregate meter provider allows internal mutability. + +## Using metrics macros + +Metrics macros are a replacement for the tracing-opentelemetry metrics-layer. +They are highly optimised, allow dynamic attributes, are easy to use and support unit testing. + +### Usage + +When using the macro in a test you will need a different pattern depending on if you are writing a sync or async test. + +#### Sync +```rust + #[test] + fn test_non_async() { + // Each test is run in a separate thread, metrics are stored in a thread local. + u64_counter!("test", "test description", 1, "attr" => "val"); + assert_metric!("test", 1, "attr" => "val"); + } +``` + +#### Async + +Make sure to use `.with_metrics()` method on the async block to ensure that the metrics are stored in a task local. +*Tests will silently fail to record metrics if this is not done.* +```rust + #[tokio::test(flavor = "multi_thread")] + async fn test_async_multi() { + // Multi-threaded runtime needs to use a tokio task local to avoid tests interfering with each other + async { + u64_counter!("test", "test description", 1, "attr" => "val"); + assert_metric!("test", 1, "attr" => "val"); + } + .with_metrics() + .await; + } + + #[tokio::test] + async fn test_async_single() { + async { + // It's a single threaded tokio runtime, so we can still use a thread local + u64_counter!("test", "test description", 1, "attr" => "val"); + assert_metric!("test", 1, "attr" => "val"); + } + .with_metrics() + .await; + } +``` From e9b6b8fab97a1a70680b1b4ae2411791b3daaa66 Mon Sep 17 00:00:00 2001 From: bryn Date: Wed, 20 Sep 2023 09:18:16 +0100 Subject: [PATCH 05/30] Improve dev docs a bit more --- dev-docs/metrics.md | 39 +++++++++++++++++++++++++++++++++------ 1 file changed, 33 insertions(+), 6 deletions(-) diff --git a/dev-docs/metrics.md b/dev-docs/metrics.md index 36420c05fe..05abc768cc 100644 --- a/dev-docs/metrics.md +++ b/dev-docs/metrics.md @@ -23,13 +23,13 @@ erDiagram telemetry-plugin ||--|| metrics-layer : clears telemetry-plugin ||--|| aggregate-meter-provider : configures - aggregate-meter-provider ||--|| public-filtered-meter-provider : creates - aggregate-meter-provider ||--|| public-filtered-prometheus-meter-provider : creates - aggregate-meter-provider ||--|| private-filtered-meter-provider : creates + aggregate-meter-provider ||--|| public-filtered-meter-provider : uses + aggregate-meter-provider ||--|| public-filtered-prometheus-meter-provider : uses + aggregate-meter-provider ||--|| private-filtered-meter-provider : uses - public-filtered-meter-provider ||--|{ public-meter : creates - public-filtered-prometheus-meter-provider ||--|{ public-prometheus-meter : creates - private-filtered-meter-provider ||--|{ private-meter : creates + public-filtered-meter-provider ||--|{ public-meter : uses + public-filtered-prometheus-meter-provider ||--|{ public-prometheus-meter : uses + private-filtered-meter-provider ||--|{ private-meter : uses public-meter-provider ||--|{ public-meter : creates public-prometheus-meter-provider ||--|{ public-prometheus-meter : creates @@ -60,6 +60,7 @@ Depending on a meter name will return no-op or delegate to a meter provider. Use ### Aggregate meter provider A meter provider that wraps public, public prometheus, and private meter providers. Used to create a single meter provider that can be used by the metrics layer and metrics macros. +This meter provider is also responsible for maintaining a strong reference to all instruments that are currently valid. This enables [callsite instrument caching](#callsite-instrument-caching). ### Metrics layer The tracing-opentelemetry layer that is used to create instruments and meters. This will cache instruments after they have been created. @@ -120,3 +121,29 @@ Make sure to use `.with_metrics()` method on the async block to ensure that the .await; } ``` + +## Callsite instrument caching + +When using the new metrics macros a reference to an instrument is cached to ensure that the meter provider does not have to be queried over and over. + +```mermaid + +flowchart TD + Callsite --> RefCheck + RefCheck -->|not upgradable| Create + RefCheck -->|upgradable| Use + Create --> Store + Store --> Use + RefCheck{"Static\nMutex < Weak < Instrument > >"} + Create("Create instrument Arc < Instrument >") + Store("Store downgraded clone in Mutex") + Use("Use strong reference to instrument") +``` + +Aggregate meter provider is responsible for maintaining a strong reference to all instruments that are valid. + +Strong references to instruments will be discarded when changes to the aggregate meter provider take place. This will cause every callsite to refresh its reference to the instrument. + +On the fast path the mutex is locked for the period that it takes to upgrade the weak reference. This is a fast operation, and should not block the thread for any meaningful period of time. + +If there is shown to be contention in future profiling we can revisit. From 0cb3f3df508c84f8424cd5e26f83fe6f61036cc2 Mon Sep 17 00:00:00 2001 From: bryn Date: Wed, 20 Sep 2023 09:29:03 +0100 Subject: [PATCH 06/30] Improve dev docs a bit more --- dev-docs/metrics.md | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/dev-docs/metrics.md b/dev-docs/metrics.md index 05abc768cc..9aca2347f5 100644 --- a/dev-docs/metrics.md +++ b/dev-docs/metrics.md @@ -41,19 +41,28 @@ erDiagram instrument - exporter ||--|{ public-meter : observes + "exporter(s)" ||--|{ public-meter : observes prometheus-exporter ||--|{ public-prometheus-meter : observes + prometheus-registry ||--|| prometheus-exporter : observes private-otlp-exporter ||--|{ private-meter : observes + ``` ### Instrument A histogram or counter that is used to record metrics. ### Meter -Creates instruments, also contains a reference to exporters so that when instruments are created the +Creates instruments, also contains a reference to exporters so that when instruments are created the +* __Public meter__ - Exports to all public metrics to configured exporters except for Prometheus. +* __Public prometheus meter__ - Exports to all public metrics to Prometheus. +* __Private meter__ - Exports to all public metrics to Apollo. + ### Meter provider Creates meters +* __Public meter provider__ - Creates public meters (see above). +* __Public prometheus meter provider__ - Creates public prometheus meters (see above). +* __Private meter provider__ - Creates private meters (see above). ### Filter meter provider Depending on a meter name will return no-op or delegate to a meter provider. Used to filter public vs private metrics. @@ -68,6 +77,9 @@ The tracing-opentelemetry layer that is used to create instruments and meters. T ### Metrics macros New macros that will be used for metrics going forward. Allows unit testing of metrics. +### Prometheus registry +Used to render prometheus metrics. Contains no state. + ## Design gotchas The metrics code is substantial, however there are reasons that it is structured in the way that it is. From 4b6181e4b9fe63f27adee3849bb5b571d9bd248c Mon Sep 17 00:00:00 2001 From: Bryn Cooke Date: Wed, 20 Sep 2023 14:58:24 +0100 Subject: [PATCH 07/30] Update dev-docs/metrics.md Co-authored-by: Coenen Benjamin --- dev-docs/metrics.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dev-docs/metrics.md b/dev-docs/metrics.md index 9aca2347f5..0898f6f7ee 100644 --- a/dev-docs/metrics.md +++ b/dev-docs/metrics.md @@ -4,7 +4,7 @@ The Router uses OpenTelemetry metrics to support Prometheus and OTLP exporters. ## Requirements * Filtering of metrics to Public and Private exporters. This is to support Apollo only metrics and to exclude sending of legacy metrics to Apollo. -* Mutliple exporters - Prometheus and OTLP. +* Multiple exporters - Prometheus and OTLP. * Prometheus metrics must persist across reloads. * Metrics must be testable. From 630c6348b03c3b25ab347c0ea15a5222124371a6 Mon Sep 17 00:00:00 2001 From: bryn Date: Wed, 20 Sep 2023 16:55:31 +0100 Subject: [PATCH 08/30] Add test for multiple calls to metrics --- apollo-router/src/metrics/mod.rs | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/apollo-router/src/metrics/mod.rs b/apollo-router/src/metrics/mod.rs index c147acfb94..532e19e890 100644 --- a/apollo-router/src/metrics/mod.rs +++ b/apollo-router/src/metrics/mod.rs @@ -639,6 +639,20 @@ mod test { use crate::metrics::meter_provider; use crate::metrics::FutureMetricsExt; + #[test] + fn test_multiple_calls() { + // Each test is run in a separate thread, metrics are stored in a thread local. + fn my_method(val: &'static str) { + u64_counter!("test", "test description", 1, "attr" => val); + } + + my_method("jill"); + my_method("jill"); + my_method("bob"); + assert_metric!("test", 2, "attr" => "jill"); + assert_metric!("test", 1, "attr" => "bob"); + } + #[test] fn test_non_async() { // Each test is run in a separate thread, metrics are stored in a thread local. From 26fc686424fcfef3dc293cf2f72d1e911a0fcc84 Mon Sep 17 00:00:00 2001 From: bryn Date: Wed, 20 Sep 2023 16:55:39 +0100 Subject: [PATCH 09/30] Doc fixes --- dev-docs/metrics.md | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/dev-docs/metrics.md b/dev-docs/metrics.md index 0898f6f7ee..c1c2eae32b 100644 --- a/dev-docs/metrics.md +++ b/dev-docs/metrics.md @@ -27,9 +27,9 @@ erDiagram aggregate-meter-provider ||--|| public-filtered-prometheus-meter-provider : uses aggregate-meter-provider ||--|| private-filtered-meter-provider : uses - public-filtered-meter-provider ||--|{ public-meter : uses - public-filtered-prometheus-meter-provider ||--|{ public-prometheus-meter : uses - private-filtered-meter-provider ||--|{ private-meter : uses + public-filtered-meter-provider ||--|{ public-meter-provider : uses + public-filtered-prometheus-meter-provider ||--|{ public-prometheus-meter-provider : uses + private-filtered-meter-provider ||--|{ private-meter-provider : uses public-meter-provider ||--|{ public-meter : creates public-prometheus-meter-provider ||--|{ public-prometheus-meter : creates @@ -49,7 +49,7 @@ erDiagram ``` ### Instrument -A histogram or counter that is used to record metrics. +A histogram, counter or gauge that is used to record metrics. ### Meter Creates instruments, also contains a reference to exporters so that when instruments are created the @@ -94,9 +94,19 @@ They are highly optimised, allow dynamic attributes, are easy to use and support ### Usage + + +```rust + u64_counter!("test", "test description", 1, "attr" => "val"); + u64_counter!("test", "test description", 1, &attributes); + u64_counter!("test", "test description", 1); + +``` + +### Testing When using the macro in a test you will need a different pattern depending on if you are writing a sync or async test. -#### Sync +#### Testing Sync ```rust #[test] fn test_non_async() { @@ -106,7 +116,7 @@ When using the macro in a test you will need a different pattern depending on if } ``` -#### Async +#### Testing Async Make sure to use `.with_metrics()` method on the async block to ensure that the metrics are stored in a task local. *Tests will silently fail to record metrics if this is not done.* From 62b2d69fa9439a23471333248cc7a88410482431 Mon Sep 17 00:00:00 2001 From: bryn Date: Wed, 20 Sep 2023 17:01:34 +0100 Subject: [PATCH 10/30] No need to use a boxed meter --- apollo-router/src/metrics/aggregation.rs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/apollo-router/src/metrics/aggregation.rs b/apollo-router/src/metrics/aggregation.rs index 89309d6ba6..5ea2e9d4d4 100644 --- a/apollo-router/src/metrics/aggregation.rs +++ b/apollo-router/src/metrics/aggregation.rs @@ -23,7 +23,6 @@ use opentelemetry::metrics::SyncUpDownCounter; use opentelemetry::metrics::Unit; use opentelemetry::metrics::UpDownCounter; use opentelemetry::KeyValue; -use opentelemetry_api::global::ObjectSafeMeterProvider; use opentelemetry_api::metrics::AsyncInstrument; use opentelemetry_api::metrics::CallbackRegistration; use opentelemetry_api::metrics::MetricsError; @@ -169,7 +168,7 @@ impl Inner { schema_url: schema_url.clone(), }) .or_insert_with(|| { - provider.versioned_meter_cow( + provider.versioned_meter( name.clone(), version.clone(), schema_url.clone(), From 356673c7400d0f291d9f38571bef92d2b6e0fede Mon Sep 17 00:00:00 2001 From: bryn Date: Wed, 20 Sep 2023 17:03:20 +0100 Subject: [PATCH 11/30] Preallocate vecs to the correct size. --- apollo-router/src/metrics/aggregation.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/apollo-router/src/metrics/aggregation.rs b/apollo-router/src/metrics/aggregation.rs index 5ea2e9d4d4..261e6765d7 100644 --- a/apollo-router/src/metrics/aggregation.rs +++ b/apollo-router/src/metrics/aggregation.rs @@ -157,7 +157,7 @@ impl Inner { let name = name.into(); let version = version.map(|v| v.into()); let schema_url = schema_url.map(|v| v.into()); - let mut meters = Vec::new(); + let mut meters = Vec::with_capacity(self.providers.len()); for (provider, existing_meters) in self.providers.values_mut() { meters.push( @@ -420,7 +420,7 @@ impl InstrumentProvider for AggregateInstrumentProvider { // So the callback is called, an observable is updated, but only the observable associated with the correct meter will take effect let callback = Arc::new(callbacks); - let mut callback_registrations = Vec::new(); + let mut callback_registrations = Vec::with_capacity(self.meters.len()); for meter in &self.meters { let callback = callback.clone(); // If this fails there is no recovery as some callbacks may be registered From 9f9a1a68d2f19f1f3522751f4d2ecec23c6d7db2 Mon Sep 17 00:00:00 2001 From: bryn Date: Wed, 20 Sep 2023 17:35:27 +0100 Subject: [PATCH 12/30] Fix metric that accidentally got renamed. --- apollo-router/src/plugins/telemetry/mod.rs | 2 +- ...ry__tests__it_test_prometheus_metrics.snap | 26 +++++++++---------- ...est_prometheus_metrics_custom_buckets.snap | 8 +++--- 3 files changed, 18 insertions(+), 18 deletions(-) diff --git a/apollo-router/src/plugins/telemetry/mod.rs b/apollo-router/src/plugins/telemetry/mod.rs index 81f66cccd6..4cd9bc7e4a 100644 --- a/apollo-router/src/plugins/telemetry/mod.rs +++ b/apollo-router/src/plugins/telemetry/mod.rs @@ -902,7 +902,7 @@ impl Telemetry { ); f64_histogram!( - "apollo_router_http_requests_duration", + "apollo_router_http_request_duration_seconds", "Duration of HTTP requests.", request_duration.as_secs_f64(), metric_attrs diff --git a/apollo-router/src/plugins/telemetry/snapshots/apollo_router__plugins__telemetry__tests__it_test_prometheus_metrics.snap b/apollo-router/src/plugins/telemetry/snapshots/apollo_router__plugins__telemetry__tests__it_test_prometheus_metrics.snap index 0f56d4f32a..eae5de460a 100644 --- a/apollo-router/src/plugins/telemetry/snapshots/apollo_router__plugins__telemetry__tests__it_test_prometheus_metrics.snap +++ b/apollo-router/src/plugins/telemetry/snapshots/apollo_router__plugins__telemetry__tests__it_test_prometheus_metrics.snap @@ -2,16 +2,16 @@ source: apollo-router/src/plugins/telemetry/mod.rs expression: prometheus_metrics --- -apollo_router_http_requests_duration_bucket{status="200",otel_scope_name="apollo/router",le="+Inf"} 1 -apollo_router_http_requests_duration_bucket{status="200",otel_scope_name="apollo/router",le="0.001"} 1 -apollo_router_http_requests_duration_bucket{status="200",otel_scope_name="apollo/router",le="0.005"} 1 -apollo_router_http_requests_duration_bucket{status="200",otel_scope_name="apollo/router",le="0.015"} 1 -apollo_router_http_requests_duration_bucket{status="200",otel_scope_name="apollo/router",le="0.05"} 1 -apollo_router_http_requests_duration_bucket{status="200",otel_scope_name="apollo/router",le="0.1"} 1 -apollo_router_http_requests_duration_bucket{status="200",otel_scope_name="apollo/router",le="0.2"} 1 -apollo_router_http_requests_duration_bucket{status="200",otel_scope_name="apollo/router",le="0.3"} 1 -apollo_router_http_requests_duration_bucket{status="200",otel_scope_name="apollo/router",le="0.4"} 1 -apollo_router_http_requests_duration_bucket{status="200",otel_scope_name="apollo/router",le="0.5"} 1 -apollo_router_http_requests_duration_bucket{status="200",otel_scope_name="apollo/router",le="1"} 1 -apollo_router_http_requests_duration_bucket{status="200",otel_scope_name="apollo/router",le="10"} 1 -apollo_router_http_requests_duration_bucket{status="200",otel_scope_name="apollo/router",le="5"} 1 +apollo_router_http_request_duration_seconds_bucket{status="200",otel_scope_name="apollo/router",le="+Inf"} 1 +apollo_router_http_request_duration_seconds_bucket{status="200",otel_scope_name="apollo/router",le="0.001"} 1 +apollo_router_http_request_duration_seconds_bucket{status="200",otel_scope_name="apollo/router",le="0.005"} 1 +apollo_router_http_request_duration_seconds_bucket{status="200",otel_scope_name="apollo/router",le="0.015"} 1 +apollo_router_http_request_duration_seconds_bucket{status="200",otel_scope_name="apollo/router",le="0.05"} 1 +apollo_router_http_request_duration_seconds_bucket{status="200",otel_scope_name="apollo/router",le="0.1"} 1 +apollo_router_http_request_duration_seconds_bucket{status="200",otel_scope_name="apollo/router",le="0.2"} 1 +apollo_router_http_request_duration_seconds_bucket{status="200",otel_scope_name="apollo/router",le="0.3"} 1 +apollo_router_http_request_duration_seconds_bucket{status="200",otel_scope_name="apollo/router",le="0.4"} 1 +apollo_router_http_request_duration_seconds_bucket{status="200",otel_scope_name="apollo/router",le="0.5"} 1 +apollo_router_http_request_duration_seconds_bucket{status="200",otel_scope_name="apollo/router",le="1"} 1 +apollo_router_http_request_duration_seconds_bucket{status="200",otel_scope_name="apollo/router",le="10"} 1 +apollo_router_http_request_duration_seconds_bucket{status="200",otel_scope_name="apollo/router",le="5"} 1 diff --git a/apollo-router/src/plugins/telemetry/snapshots/apollo_router__plugins__telemetry__tests__it_test_prometheus_metrics_custom_buckets.snap b/apollo-router/src/plugins/telemetry/snapshots/apollo_router__plugins__telemetry__tests__it_test_prometheus_metrics_custom_buckets.snap index d60cea7aee..3f346c2ad6 100644 --- a/apollo-router/src/plugins/telemetry/snapshots/apollo_router__plugins__telemetry__tests__it_test_prometheus_metrics_custom_buckets.snap +++ b/apollo-router/src/plugins/telemetry/snapshots/apollo_router__plugins__telemetry__tests__it_test_prometheus_metrics_custom_buckets.snap @@ -2,7 +2,7 @@ source: apollo-router/src/plugins/telemetry/mod.rs expression: prometheus_metrics --- -apollo_router_http_requests_duration_bucket{status="200",otel_scope_name="apollo/router",le="+Inf"} 1 -apollo_router_http_requests_duration_bucket{status="200",otel_scope_name="apollo/router",le="10"} 1 -apollo_router_http_requests_duration_bucket{status="200",otel_scope_name="apollo/router",le="20"} 1 -apollo_router_http_requests_duration_bucket{status="200",otel_scope_name="apollo/router",le="5"} 1 +apollo_router_http_request_duration_seconds_bucket{status="200",otel_scope_name="apollo/router",le="+Inf"} 1 +apollo_router_http_request_duration_seconds_bucket{status="200",otel_scope_name="apollo/router",le="10"} 1 +apollo_router_http_request_duration_seconds_bucket{status="200",otel_scope_name="apollo/router",le="20"} 1 +apollo_router_http_request_duration_seconds_bucket{status="200",otel_scope_name="apollo/router",le="5"} 1 From 2117044f2c12cdfffe6e434e6e839809e24c3c07 Mon Sep 17 00:00:00 2001 From: bryn Date: Wed, 20 Sep 2023 17:38:12 +0100 Subject: [PATCH 13/30] Add another lock for a prometheus test --- apollo-router/src/plugins/telemetry/mod.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/apollo-router/src/plugins/telemetry/mod.rs b/apollo-router/src/plugins/telemetry/mod.rs index 4cd9bc7e4a..c9182777b7 100644 --- a/apollo-router/src/plugins/telemetry/mod.rs +++ b/apollo-router/src/plugins/telemetry/mod.rs @@ -2412,6 +2412,7 @@ mod tests { #[tokio::test] async fn it_test_prometheus_wrong_endpoint() { + let _lock = lock_prometheus().await; async { let plugin = create_plugin_with_config(include_str!("testdata/prometheus.router.yaml")).await; From 0b8561f56f73e6d91cc158d06812fac2094ab320 Mon Sep 17 00:00:00 2001 From: bryn Date: Thu, 21 Sep 2023 10:04:05 +0100 Subject: [PATCH 14/30] Fix service.name detection. Order of priority: env yaml-attributes yaml-service-name --- .../src/plugins/telemetry/metrics/mod.rs | 92 ++++++++++++++++++- .../plugins/telemetry/metrics/prometheus.rs | 13 +-- apollo-router/src/plugins/telemetry/mod.rs | 49 +--------- 3 files changed, 98 insertions(+), 56 deletions(-) diff --git a/apollo-router/src/plugins/telemetry/metrics/mod.rs b/apollo-router/src/plugins/telemetry/metrics/mod.rs index a9556795a4..e108044759 100644 --- a/apollo-router/src/plugins/telemetry/metrics/mod.rs +++ b/apollo-router/src/plugins/telemetry/metrics/mod.rs @@ -1,4 +1,5 @@ use std::collections::HashMap; +use std::time::Duration; use ::serde::Deserialize; use access_json::JSONQuery; @@ -9,6 +10,10 @@ use multimap::MultiMap; use opentelemetry::sdk::metrics::reader::AggregationSelector; use opentelemetry::sdk::metrics::Aggregation; use opentelemetry::sdk::metrics::InstrumentKind; +use opentelemetry::sdk::resource::ResourceDetector; +use opentelemetry::sdk::resource::SdkProvidedResourceDetector; +use opentelemetry::sdk::Resource; +use opentelemetry_api::KeyValue; use regex::Regex; use schemars::JsonSchema; use serde::Serialize; @@ -22,6 +27,7 @@ use crate::plugin::serde::deserialize_json_query; use crate::plugin::serde::deserialize_regex; use crate::plugins::telemetry::apollo_exporter::Sender; use crate::plugins::telemetry::config::AttributeValue; +use crate::plugins::telemetry::config::Conf; use crate::plugins::telemetry::config::MetricsCommon; use crate::router_factory::Endpoint; use crate::Context; @@ -474,13 +480,97 @@ impl AttributesForwardConf { } } -#[derive(Default)] pub(crate) struct MetricsBuilder { pub(crate) public_meter_provider_builder: opentelemetry::sdk::metrics::MeterProviderBuilder, pub(crate) apollo_meter_provider_builder: opentelemetry::sdk::metrics::MeterProviderBuilder, pub(crate) prometheus_meter_provider: Option, pub(crate) custom_endpoints: MultiMap, pub(crate) apollo_metrics_sender: Sender, + pub(crate) resource: Resource, +} + +struct ConfigResourceDetector(MetricsCommon); + +impl ResourceDetector for ConfigResourceDetector { + fn detect(&self, _timeout: Duration) -> Resource { + let mut resource = Resource::new( + vec![ + self.0.service_name.clone().map(|service_name| { + KeyValue::new( + opentelemetry_semantic_conventions::resource::SERVICE_NAME, + service_name, + ) + }), + self.0.service_namespace.clone().map(|service_namespace| { + KeyValue::new( + opentelemetry_semantic_conventions::resource::SERVICE_NAMESPACE, + service_namespace, + ) + }), + ] + .into_iter() + .filter_map(|x| x) + .collect::>(), + ); + resource = resource.merge(&mut Resource::new( + self.0 + .resources + .clone() + .into_iter() + .map(|(k, v)| KeyValue::new(k, v)), + )); + resource + } +} + +impl MetricsBuilder { + pub(crate) fn new(config: &Conf) -> Self { + let metrics_common_config = config + .metrics + .clone() + .map(|m| m.common) + .flatten() + .unwrap_or_default(); + + let mut resource = Resource::from_detectors( + Duration::from_secs(0), + vec![ + Box::new(ConfigResourceDetector(metrics_common_config.clone())), + Box::new(SdkProvidedResourceDetector), + Box::new(opentelemetry::sdk::resource::EnvResourceDetector::new()), + ], + ); + + // Otel resources can be initialized from env variables, there is an override mechanism, but it's broken for service name as it will always override service.name + // If the service name is set to unknown service then override it from the config + if resource.get(opentelemetry_semantic_conventions::resource::SERVICE_NAME) + == Some("unknown_service".into()) + { + if let Some(service_name) = Resource::from_detectors( + Duration::from_secs(0), + vec![Box::new(ConfigResourceDetector( + metrics_common_config.clone(), + ))], + ) + .get(opentelemetry_semantic_conventions::resource::SERVICE_NAME) + { + resource = resource.merge(&mut Resource::new(vec![KeyValue::new( + opentelemetry_semantic_conventions::resource::SERVICE_NAME, + service_name, + )])); + } + } + + Self { + resource: resource.clone(), + public_meter_provider_builder: opentelemetry::sdk::metrics::MeterProvider::builder() + .with_resource(resource.clone()), + apollo_meter_provider_builder: opentelemetry::sdk::metrics::MeterProvider::builder(), + prometheus_meter_provider: None, + custom_endpoints: MultiMap::new(), + apollo_metrics_sender: Sender::default(), + } + } } pub(crate) trait MetricsConfigurator { diff --git a/apollo-router/src/plugins/telemetry/metrics/prometheus.rs b/apollo-router/src/plugins/telemetry/metrics/prometheus.rs index c3169ebb4c..0ac2524803 100644 --- a/apollo-router/src/plugins/telemetry/metrics/prometheus.rs +++ b/apollo-router/src/plugins/telemetry/metrics/prometheus.rs @@ -8,7 +8,6 @@ use once_cell::sync::Lazy; use opentelemetry::sdk::metrics::MeterProvider; use opentelemetry::sdk::metrics::MeterProviderBuilder; use opentelemetry::sdk::Resource; -use opentelemetry_api::KeyValue; use prometheus::Encoder; use prometheus::Registry; use prometheus::TextEncoder; @@ -91,16 +90,8 @@ impl MetricsConfigurator for Config { // Prometheus metrics are special, they must persist between reloads. This means that we only want to create something new if the resources have changed. // The prometheus exporter, and the associated registry are linked, so replacing one means replacing the other. - let resource = Resource::new( - metrics_config - .resources - .clone() - .into_iter() - .map(|(k, v)| KeyValue::new(k, v)), - ); - let prometheus_config = PrometheusConfig { - resource: resource.clone(), + resource: builder.resource.clone(), buckets: metrics_config.buckets.clone(), }; @@ -142,7 +133,7 @@ impl MetricsConfigurator for Config { let meter_provider = MeterProvider::builder() .with_reader(exporter) - .with_resource(resource) + .with_resource(builder.resource.clone()) .build(); builder.custom_endpoints.insert( self.listen.clone(), diff --git a/apollo-router/src/plugins/telemetry/mod.rs b/apollo-router/src/plugins/telemetry/mod.rs index c9182777b7..e1ae8e1bed 100644 --- a/apollo-router/src/plugins/telemetry/mod.rs +++ b/apollo-router/src/plugins/telemetry/mod.rs @@ -30,7 +30,6 @@ use opentelemetry::propagation::Injector; use opentelemetry::propagation::TextMapPropagator; use opentelemetry::sdk::propagation::TextMapCompositePropagator; use opentelemetry::sdk::trace::Builder; -use opentelemetry::sdk::Resource; use opentelemetry::trace::SpanContext; use opentelemetry::trace::SpanId; use opentelemetry::trace::TraceContextExt; @@ -148,7 +147,6 @@ pub(crate) const OPERATION_KIND: &str = "apollo_telemetry::operation_kind"; pub(crate) const STUDIO_EXCLUDE: &str = "apollo_telemetry::studio::exclude"; pub(crate) const LOGGING_DISPLAY_HEADERS: &str = "apollo_telemetry::logging::display_headers"; pub(crate) const LOGGING_DISPLAY_BODY: &str = "apollo_telemetry::logging::display_body"; -const DEFAULT_SERVICE_NAME: &str = "apollo-router"; const GLOBAL_TRACER_NAME: &str = "apollo-router"; const DEFAULT_EXPOSE_TRACE_ID_HEADER: &str = "apollo-trace-id"; @@ -672,49 +670,12 @@ impl Telemetry { fn create_metrics_builder(config: &config::Conf) -> Result { let metrics_config = config.metrics.clone().unwrap_or_default(); - let metrics_common_config = &mut metrics_config.common.unwrap_or_default(); - // Set default service name for metrics - if metrics_common_config - .resources - .get(opentelemetry_semantic_conventions::resource::SERVICE_NAME.as_str()) - .is_none() - { - metrics_common_config.resources.insert( - String::from(opentelemetry_semantic_conventions::resource::SERVICE_NAME.as_str()), - String::from( - metrics_common_config - .service_name - .as_deref() - .unwrap_or(DEFAULT_SERVICE_NAME), - ), - ); - } - if let Some(service_namespace) = &metrics_common_config.service_namespace { - metrics_common_config.resources.insert( - String::from( - opentelemetry_semantic_conventions::resource::SERVICE_NAMESPACE.as_str(), - ), - service_namespace.clone(), - ); - } - - let mut builder = MetricsBuilder::default(); - builder.public_meter_provider_builder = builder - .public_meter_provider_builder - .with_resource(Resource::new( - config - .metrics - .as_ref() - .and_then(|m| m.common.as_ref()) - .map(|c| c.resources.clone()) - .unwrap_or_default() - .into_iter() - .map(|(k, v)| KeyValue::new(k, v)), - )); - builder = setup_metrics_exporter(builder, &config.apollo, metrics_common_config)?; + let metrics_common_config = metrics_config.common.unwrap_or_default().clone(); + let mut builder = MetricsBuilder::new(config); + builder = setup_metrics_exporter(builder, &config.apollo, &metrics_common_config)?; builder = - setup_metrics_exporter(builder, &metrics_config.prometheus, metrics_common_config)?; - builder = setup_metrics_exporter(builder, &metrics_config.otlp, metrics_common_config)?; + setup_metrics_exporter(builder, &metrics_config.prometheus, &metrics_common_config)?; + builder = setup_metrics_exporter(builder, &metrics_config.otlp, &metrics_common_config)?; Ok(builder) } From 094c2b9fe28d884befca9335b5ea97bd48f9a1f3 Mon Sep 17 00:00:00 2001 From: bryn Date: Thu, 21 Sep 2023 10:38:41 +0100 Subject: [PATCH 15/30] Lint --- .../src/plugins/telemetry/metrics/mod.rs | 5 +- licenses.html | 504 +++++++++++++++--- 2 files changed, 433 insertions(+), 76 deletions(-) diff --git a/apollo-router/src/plugins/telemetry/metrics/mod.rs b/apollo-router/src/plugins/telemetry/metrics/mod.rs index e108044759..1b341c0cc8 100644 --- a/apollo-router/src/plugins/telemetry/metrics/mod.rs +++ b/apollo-router/src/plugins/telemetry/metrics/mod.rs @@ -509,7 +509,7 @@ impl ResourceDetector for ConfigResourceDetector { }), ] .into_iter() - .filter_map(|x| x) + .flatten() .collect::>(), ); resource = resource.merge(&mut Resource::new( @@ -528,8 +528,7 @@ impl MetricsBuilder { let metrics_common_config = config .metrics .clone() - .map(|m| m.common) - .flatten() + .and_then(|m| m.common) .unwrap_or_default(); let mut resource = Resource::from_detectors( diff --git a/licenses.html b/licenses.html index 28c09f483c..9384317a87 100644 --- a/licenses.html +++ b/licenses.html @@ -45,9 +45,9 @@

Third Party Licenses

Overview of licenses:

  • MIT License (96)
  • -
  • Apache License 2.0 (62)
  • +
  • Apache License 2.0 (64)
  • BSD 3-Clause "New" or "Revised" License (10)
  • -
  • ISC License (9)
  • +
  • ISC License (7)
  • Mozilla Public License 2.0 (3)
  • Creative Commons Zero v1.0 Universal (2)
  • Elastic License 2.0 (2)
  • @@ -1711,21 +1711,230 @@

    Used by:

    Apache License 2.0

    Used by:

    +
                                     Apache License
    +                           Version 2.0, January 2004
    +                        http://www.apache.org/licenses/
    +
    +   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
    +
    +   1. Definitions.
    +
    +      "License" shall mean the terms and conditions for use, reproduction,
    +      and distribution as defined by Sections 1 through 9 of this document.
    +
    +      "Licensor" shall mean the copyright owner or entity authorized by
    +      the copyright owner that is granting the License.
    +
    +      "Legal Entity" shall mean the union of the acting entity and all
    +      other entities that control, are controlled by, or are under common
    +      control with that entity. For the purposes of this definition,
    +      "control" means (i) the power, direct or indirect, to cause the
    +      direction or management of such entity, whether by contract or
    +      otherwise, or (ii) ownership of fifty percent (50%) or more of the
    +      outstanding shares, or (iii) beneficial ownership of such entity.
    +
    +      "You" (or "Your") shall mean an individual or Legal Entity
    +      exercising permissions granted by this License.
    +
    +      "Source" form shall mean the preferred form for making modifications,
    +      including but not limited to software source code, documentation
    +      source, and configuration files.
    +
    +      "Object" form shall mean any form resulting from mechanical
    +      transformation or translation of a Source form, including but
    +      not limited to compiled object code, generated documentation,
    +      and conversions to other media types.
    +
    +      "Work" shall mean the work of authorship, whether in Source or
    +      Object form, made available under the License, as indicated by a
    +      copyright notice that is included in or attached to the work
    +      (an example is provided in the Appendix below).
    +
    +      "Derivative Works" shall mean any work, whether in Source or Object
    +      form, that is based on (or derived from) the Work and for which the
    +      editorial revisions, annotations, elaborations, or other modifications
    +      represent, as a whole, an original work of authorship. For the purposes
    +      of this License, Derivative Works shall not include works that remain
    +      separable from, or merely link (or bind by name) to the interfaces of,
    +      the Work and Derivative Works thereof.
    +
    +      "Contribution" shall mean any work of authorship, including
    +      the original version of the Work and any modifications or additions
    +      to that Work or Derivative Works thereof, that is intentionally
    +      submitted to Licensor for inclusion in the Work by the copyright owner
    +      or by an individual or Legal Entity authorized to submit on behalf of
    +      the copyright owner. For the purposes of this definition, "submitted"
    +      means any form of electronic, verbal, or written communication sent
    +      to the Licensor or its representatives, including but not limited to
    +      communication on electronic mailing lists, source code control systems,
    +      and issue tracking systems that are managed by, or on behalf of, the
    +      Licensor for the purpose of discussing and improving the Work, but
    +      excluding communication that is conspicuously marked or otherwise
    +      designated in writing by the copyright owner as "Not a Contribution."
    +
    +      "Contributor" shall mean Licensor and any individual or Legal Entity
    +      on behalf of whom a Contribution has been received by Licensor and
    +      subsequently incorporated within the Work.
    +
    +   2. Grant of Copyright License. Subject to the terms and conditions of
    +      this License, each Contributor hereby grants to You a perpetual,
    +      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
    +      copyright license to reproduce, prepare Derivative Works of,
    +      publicly display, publicly perform, sublicense, and distribute the
    +      Work and such Derivative Works in Source or Object form.
    +
    +   3. Grant of Patent License. Subject to the terms and conditions of
    +      this License, each Contributor hereby grants to You a perpetual,
    +      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
    +      (except as stated in this section) patent license to make, have made,
    +      use, offer to sell, sell, import, and otherwise transfer the Work,
    +      where such license applies only to those patent claims licensable
    +      by such Contributor that are necessarily infringed by their
    +      Contribution(s) alone or by combination of their Contribution(s)
    +      with the Work to which such Contribution(s) was submitted. If You
    +      institute patent litigation against any entity (including a
    +      cross-claim or counterclaim in a lawsuit) alleging that the Work
    +      or a Contribution incorporated within the Work constitutes direct
    +      or contributory patent infringement, then any patent licenses
    +      granted to You under this License for that Work shall terminate
    +      as of the date such litigation is filed.
    +
    +   4. Redistribution. You may reproduce and distribute copies of the
    +      Work or Derivative Works thereof in any medium, with or without
    +      modifications, and in Source or Object form, provided that You
    +      meet the following conditions:
    +
    +      (a) You must give any other recipients of the Work or
    +          Derivative Works a copy of this License; and
    +
    +      (b) You must cause any modified files to carry prominent notices
    +          stating that You changed the files; and
    +
    +      (c) You must retain, in the Source form of any Derivative Works
    +          that You distribute, all copyright, patent, trademark, and
    +          attribution notices from the Source form of the Work,
    +          excluding those notices that do not pertain to any part of
    +          the Derivative Works; and
    +
    +      (d) If the Work includes a "NOTICE" text file as part of its
    +          distribution, then any Derivative Works that You distribute must
    +          include a readable copy of the attribution notices contained
    +          within such NOTICE file, excluding those notices that do not
    +          pertain to any part of the Derivative Works, in at least one
    +          of the following places: within a NOTICE text file distributed
    +          as part of the Derivative Works; within the Source form or
    +          documentation, if provided along with the Derivative Works; or,
    +          within a display generated by the Derivative Works, if and
    +          wherever such third-party notices normally appear. The contents
    +          of the NOTICE file are for informational purposes only and
    +          do not modify the License. You may add Your own attribution
    +          notices within Derivative Works that You distribute, alongside
    +          or as an addendum to the NOTICE text from the Work, provided
    +          that such additional attribution notices cannot be construed
    +          as modifying the License.
    +
    +      You may add Your own copyright statement to Your modifications and
    +      may provide additional or different license terms and conditions
    +      for use, reproduction, or distribution of Your modifications, or
    +      for any such Derivative Works as a whole, provided Your use,
    +      reproduction, and distribution of the Work otherwise complies with
    +      the conditions stated in this License.
    +
    +   5. Submission of Contributions. Unless You explicitly state otherwise,
    +      any Contribution intentionally submitted for inclusion in the Work
    +      by You to the Licensor shall be under the terms and conditions of
    +      this License, without any additional terms or conditions.
    +      Notwithstanding the above, nothing herein shall supersede or modify
    +      the terms of any separate license agreement you may have executed
    +      with Licensor regarding such Contributions.
    +
    +   6. Trademarks. This License does not grant permission to use the trade
    +      names, trademarks, service marks, or product names of the Licensor,
    +      except as required for reasonable and customary use in describing the
    +      origin of the Work and reproducing the content of the NOTICE file.
    +
    +   7. Disclaimer of Warranty. Unless required by applicable law or
    +      agreed to in writing, Licensor provides the Work (and each
    +      Contributor provides its Contributions) on an "AS IS" BASIS,
    +      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
    +      implied, including, without limitation, any warranties or conditions
    +      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
    +      PARTICULAR PURPOSE. You are solely responsible for determining the
    +      appropriateness of using or redistributing the Work and assume any
    +      risks associated with Your exercise of permissions under this License.
    +
    +   8. Limitation of Liability. In no event and under no legal theory,
    +      whether in tort (including negligence), contract, or otherwise,
    +      unless required by applicable law (such as deliberate and grossly
    +      negligent acts) or agreed to in writing, shall any Contributor be
    +      liable to You for damages, including any direct, indirect, special,
    +      incidental, or consequential damages of any character arising as a
    +      result of this License or out of the use or inability to use the
    +      Work (including but not limited to damages for loss of goodwill,
    +      work stoppage, computer failure or malfunction, or any and all
    +      other commercial damages or losses), even if such Contributor
    +      has been advised of the possibility of such damages.
    +
    +   9. Accepting Warranty or Additional Liability. While redistributing
    +      the Work or Derivative Works thereof, You may choose to offer,
    +      and charge a fee for, acceptance of support, warranty, indemnity,
    +      or other liability obligations and/or rights consistent with this
    +      License. However, in accepting such obligations, You may act only
    +      on Your own behalf and on Your sole responsibility, not on behalf
    +      of any other Contributor, and only if You agree to indemnify,
    +      defend, and hold each Contributor harmless for any liability
    +      incurred by, or claims asserted against, such Contributor by reason
    +      of your accepting any such warranty or additional liability.
    +
    +   END OF TERMS AND CONDITIONS
    +
    +   APPENDIX: How to apply the Apache License to your work.
    +
    +      To apply the Apache License to your work, attach the following
    +      boilerplate notice, with the fields enclosed by brackets "[]"
    +      replaced with your own identifying information. (Don't include
    +      the brackets!)  The text should be enclosed in the appropriate
    +      comment syntax for the file format. We also recommend that a
    +      file or class name and description of purpose be included on the
    +      same "printed page" as the copyright notice for easier
    +      identification within third-party archives.
    +
    +   Copyright 2023 The OpenTelemetry Authors
    +
    +   Licensed under the Apache License, Version 2.0 (the "License");
    +   you may not use this file except in compliance with the License.
    +   You may obtain a copy of the License at
    +
    +       http://www.apache.org/licenses/LICENSE-2.0
    +
    +   Unless required by applicable law or agreed to in writing, software
    +   distributed under the License is distributed on an "AS IS" BASIS,
    +   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    +   See the License for the specific language governing permissions and
    +   limitations under the License.
    +
    + +
  • +

    Apache License 2.0

    +

    Used by:

    + @@ -3378,6 +3587,204 @@

    Used by:

    of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS + +
  • +
  • +

    Apache License 2.0

    +

    Used by:

    + +
                                     Apache License
    +                           Version 2.0, January 2004
    +                        http://www.apache.org/licenses/
    +
    +   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
    +
    +   1. Definitions.
    +
    +      "License" shall mean the terms and conditions for use, reproduction,
    +      and distribution as defined by Sections 1 through 9 of this document.
    +
    +      "Licensor" shall mean the copyright owner or entity authorized by
    +      the copyright owner that is granting the License.
    +
    +      "Legal Entity" shall mean the union of the acting entity and all
    +      other entities that control, are controlled by, or are under common
    +      control with that entity. For the purposes of this definition,
    +      "control" means (i) the power, direct or indirect, to cause the
    +      direction or management of such entity, whether by contract or
    +      otherwise, or (ii) ownership of fifty percent (50%) or more of the
    +      outstanding shares, or (iii) beneficial ownership of such entity.
    +
    +      "You" (or "Your") shall mean an individual or Legal Entity
    +      exercising permissions granted by this License.
    +
    +      "Source" form shall mean the preferred form for making modifications,
    +      including but not limited to software source code, documentation
    +      source, and configuration files.
    +
    +      "Object" form shall mean any form resulting from mechanical
    +      transformation or translation of a Source form, including but
    +      not limited to compiled object code, generated documentation,
    +      and conversions to other media types.
    +
    +      "Work" shall mean the work of authorship, whether in Source or
    +      Object form, made available under the License, as indicated by a
    +      copyright notice that is included in or attached to the work
    +      (an example is provided in the Appendix below).
    +
    +      "Derivative Works" shall mean any work, whether in Source or Object
    +      form, that is based on (or derived from) the Work and for which the
    +      editorial revisions, annotations, elaborations, or other modifications
    +      represent, as a whole, an original work of authorship. For the purposes
    +      of this License, Derivative Works shall not include works that remain
    +      separable from, or merely link (or bind by name) to the interfaces of,
    +      the Work and Derivative Works thereof.
    +
    +      "Contribution" shall mean any work of authorship, including
    +      the original version of the Work and any modifications or additions
    +      to that Work or Derivative Works thereof, that is intentionally
    +      submitted to Licensor for inclusion in the Work by the copyright owner
    +      or by an individual or Legal Entity authorized to submit on behalf of
    +      the copyright owner. For the purposes of this definition, "submitted"
    +      means any form of electronic, verbal, or written communication sent
    +      to the Licensor or its representatives, including but not limited to
    +      communication on electronic mailing lists, source code control systems,
    +      and issue tracking systems that are managed by, or on behalf of, the
    +      Licensor for the purpose of discussing and improving the Work, but
    +      excluding communication that is conspicuously marked or otherwise
    +      designated in writing by the copyright owner as "Not a Contribution."
    +
    +      "Contributor" shall mean Licensor and any individual or Legal Entity
    +      on behalf of whom a Contribution has been received by Licensor and
    +      subsequently incorporated within the Work.
    +
    +   2. Grant of Copyright License. Subject to the terms and conditions of
    +      this License, each Contributor hereby grants to You a perpetual,
    +      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
    +      copyright license to reproduce, prepare Derivative Works of,
    +      publicly display, publicly perform, sublicense, and distribute the
    +      Work and such Derivative Works in Source or Object form.
    +
    +   3. Grant of Patent License. Subject to the terms and conditions of
    +      this License, each Contributor hereby grants to You a perpetual,
    +      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
    +      (except as stated in this section) patent license to make, have made,
    +      use, offer to sell, sell, import, and otherwise transfer the Work,
    +      where such license applies only to those patent claims licensable
    +      by such Contributor that are necessarily infringed by their
    +      Contribution(s) alone or by combination of their Contribution(s)
    +      with the Work to which such Contribution(s) was submitted. If You
    +      institute patent litigation against any entity (including a
    +      cross-claim or counterclaim in a lawsuit) alleging that the Work
    +      or a Contribution incorporated within the Work constitutes direct
    +      or contributory patent infringement, then any patent licenses
    +      granted to You under this License for that Work shall terminate
    +      as of the date such litigation is filed.
    +
    +   4. Redistribution. You may reproduce and distribute copies of the
    +      Work or Derivative Works thereof in any medium, with or without
    +      modifications, and in Source or Object form, provided that You
    +      meet the following conditions:
    +
    +      (a) You must give any other recipients of the Work or
    +          Derivative Works a copy of this License; and
    +
    +      (b) You must cause any modified files to carry prominent notices
    +          stating that You changed the files; and
    +
    +      (c) You must retain, in the Source form of any Derivative Works
    +          that You distribute, all copyright, patent, trademark, and
    +          attribution notices from the Source form of the Work,
    +          excluding those notices that do not pertain to any part of
    +          the Derivative Works; and
    +
    +      (d) If the Work includes a "NOTICE" text file as part of its
    +          distribution, then any Derivative Works that You distribute must
    +          include a readable copy of the attribution notices contained
    +          within such NOTICE file, excluding those notices that do not
    +          pertain to any part of the Derivative Works, in at least one
    +          of the following places: within a NOTICE text file distributed
    +          as part of the Derivative Works; within the Source form or
    +          documentation, if provided along with the Derivative Works; or,
    +          within a display generated by the Derivative Works, if and
    +          wherever such third-party notices normally appear. The contents
    +          of the NOTICE file are for informational purposes only and
    +          do not modify the License. You may add Your own attribution
    +          notices within Derivative Works that You distribute, alongside
    +          or as an addendum to the NOTICE text from the Work, provided
    +          that such additional attribution notices cannot be construed
    +          as modifying the License.
    +
    +      You may add Your own copyright statement to Your modifications and
    +      may provide additional or different license terms and conditions
    +      for use, reproduction, or distribution of Your modifications, or
    +      for any such Derivative Works as a whole, provided Your use,
    +      reproduction, and distribution of the Work otherwise complies with
    +      the conditions stated in this License.
    +
    +   5. Submission of Contributions. Unless You explicitly state otherwise,
    +      any Contribution intentionally submitted for inclusion in the Work
    +      by You to the Licensor shall be under the terms and conditions of
    +      this License, without any additional terms or conditions.
    +      Notwithstanding the above, nothing herein shall supersede or modify
    +      the terms of any separate license agreement you may have executed
    +      with Licensor regarding such Contributions.
    +
    +   6. Trademarks. This License does not grant permission to use the trade
    +      names, trademarks, service marks, or product names of the Licensor,
    +      except as required for reasonable and customary use in describing the
    +      origin of the Work and reproducing the content of the NOTICE file.
    +
    +   7. Disclaimer of Warranty. Unless required by applicable law or
    +      agreed to in writing, Licensor provides the Work (and each
    +      Contributor provides its Contributions) on an "AS IS" BASIS,
    +      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
    +      implied, including, without limitation, any warranties or conditions
    +      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
    +      PARTICULAR PURPOSE. You are solely responsible for determining the
    +      appropriateness of using or redistributing the Work and assume any
    +      risks associated with Your exercise of permissions under this License.
    +
    +   8. Limitation of Liability. In no event and under no legal theory,
    +      whether in tort (including negligence), contract, or otherwise,
    +      unless required by applicable law (such as deliberate and grossly
    +      negligent acts) or agreed to in writing, shall any Contributor be
    +      liable to You for damages, including any direct, indirect, special,
    +      incidental, or consequential damages of any character arising as a
    +      result of this License or out of the use or inability to use the
    +      Work (including but not limited to damages for loss of goodwill,
    +      work stoppage, computer failure or malfunction, or any and all
    +      other commercial damages or losses), even if such Contributor
    +      has been advised of the possibility of such damages.
    +
    +   9. Accepting Warranty or Additional Liability. While redistributing
    +      the Work or Derivative Works thereof, You may choose to offer,
    +      and charge a fee for, acceptance of support, warranty, indemnity,
    +      or other liability obligations and/or rights consistent with this
    +      License. However, in accepting such obligations, You may act only
    +      on Your own behalf and on Your sole responsibility, not on behalf
    +      of any other Contributor, and only if You agree to indemnify,
    +      defend, and hold each Contributor harmless for any liability
    +      incurred by, or claims asserted against, such Contributor by reason
    +      of your accepting any such warranty or additional liability.
    +
    +   END OF TERMS AND CONDITIONS
    +
    +   Copyright 2019 Yoshua Wuyts
    +
    +   Licensed under the Apache License, Version 2.0 (the "License");
    +   you may not use this file except in compliance with the License.
    +   You may obtain a copy of the License at
    +
    +       http://www.apache.org/licenses/LICENSE-2.0
    +
    +   Unless required by applicable law or agreed to in writing, software
    +   distributed under the License is distributed on an "AS IS" BASIS,
    +   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    +   See the License for the specific language governing permissions and
    +   limitations under the License.
     
  • @@ -5254,7 +5661,6 @@

    Used by:

    Apache License 2.0

    Used by:

                                  Apache License
    @@ -7146,12 +7552,21 @@ 

    Used by:

  • arc-swap
  • async-channel
  • async-compression
  • +
  • async-executor
  • +
  • async-global-executor
  • +
  • async-io
  • +
  • async-lock
  • +
  • async-process
  • +
  • async-std
  • +
  • async-task
  • +
  • atomic-waker
  • autocfg
  • backtrace
  • base64
  • base64
  • bitflags
  • bitflags
  • +
  • blocking
  • bstr
  • bumpalo
  • bytes-utils
  • @@ -7204,6 +7619,7 @@

    Used by:

  • indexmap
  • indexmap
  • inventory
  • +
  • io-lifetimes
  • itertools
  • itertools
  • jobserver
  • @@ -7215,6 +7631,7 @@

    Used by:

  • libz-ng-sys
  • libz-sys
  • linux-raw-sys
  • +
  • linux-raw-sys
  • lock_api
  • log
  • maplit
  • @@ -7265,7 +7682,7 @@

    Used by:

  • rustc_version
  • rustc_version
  • rustix
  • -
  • rustls
  • +
  • rustix
  • rustls
  • rustls-native-certs
  • rustls-pemfile
  • @@ -7282,6 +7699,7 @@

    Used by:

  • serde_spanned
  • serde_yaml
  • shellexpand
  • +
  • signal-hook
  • signal-hook-registry
  • smallvec
  • socket2
  • @@ -7298,7 +7716,6 @@

    Used by:

  • try_match
  • try_match
  • tungstenite
  • -
  • tungstenite
  • typed-builder
  • typetag
  • typetag-impl
  • @@ -7311,6 +7728,7 @@

    Used by:

  • unicode-width
  • url
  • uuid
  • +
  • value-bag
  • version_check
  • waker-fn
  • wasi
  • @@ -9841,6 +10259,7 @@

    Used by:

                                  Apache License
    @@ -10496,6 +10915,7 @@ 

    Used by:

  • apollo-compiler
  • apollo-encoder
  • apollo-parser
  • +
  • apollo-parser
  • apollo-smith
../../LICENSE-APACHE
@@ -11148,12 +11568,12 @@

Apache License 2.0

Used by:

  • apollo-compiler
  • -
  • apollo-parser
  • curve25519-dalek-derive
  • deadpool-runtime
  • deno-proc-macro-rules
  • deno-proc-macro-rules-macros
  • dunce
  • +
  • gloo-timers
  • graphql-introspection-query
  • graphql_client
  • graphql_client_codegen
  • @@ -12194,41 +12614,6 @@

    Used by:

    * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ - - -
  • -

    ISC License

    -

    Used by:

    - -
    // Copyright 2015 The Chromium Authors. All rights reserved.
    -//
    -// Redistribution and use in source and binary forms, with or without
    -// modification, are permitted provided that the following conditions are
    -// met:
    -//
    -//    * Redistributions of source code must retain the above copyright
    -// notice, this list of conditions and the following disclaimer.
    -//    * Redistributions in binary form must reproduce the above
    -// copyright notice, this list of conditions and the following disclaimer
    -// in the documentation and/or other materials provided with the
    -// distribution.
    -//    * Neither the name of Google Inc. nor the names of its
    -// contributors may be used to endorse or promote products derived from
    -// this software without specific prior written permission.
    -//
    -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
    -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
    -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
    -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
    -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
    -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
    -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
    -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
    -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
    -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
    -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     
  • @@ -12312,33 +12697,6 @@

    Used by:

    OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - -
  • -
  • -

    ISC License

    -

    Used by:

    - -
    Except as otherwise noted, this project is licensed under the following
    -(ISC-style) terms:
    -
    -Copyright 2015 Brian Smith.
    -
    -Permission to use, copy, modify, and/or distribute this software for any
    -purpose with or without fee is hereby granted, provided that the above
    -copyright notice and this permission notice appear in all copies.
    -
    -THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
    -WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
    -MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
    -ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
    -WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
    -ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
    -OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
    -
    -The files under third-party/chromium are licensed as described in
    -third-party/chromium/LICENSE.
     
  • @@ -12811,7 +13169,6 @@

    MIT License

    Used by:

    Copyright (c) 2017 Daniel Abramov
     Copyright (c) 2017 Alexey Galakhov
    @@ -15117,6 +15474,7 @@ 

    Used by:

  • regex-automata
  • same-file
  • termcolor
  • +
  • walkdir
  • winapi-util
This project is dual-licensed under the Unlicense and MIT licenses.

From 95dc6146346cba5c12bfbdddb93cf83673d77ac4 Mon Sep 17 00:00:00 2001
From: bryn 
Date: Thu, 21 Sep 2023 10:38:46 +0100
Subject: [PATCH 16/30] Changelog

---
 .changesets/maint_bryn_otel_update.md | 10 ++++++++++
 1 file changed, 10 insertions(+)
 create mode 100644 .changesets/maint_bryn_otel_update.md

diff --git a/.changesets/maint_bryn_otel_update.md b/.changesets/maint_bryn_otel_update.md
new file mode 100644
index 0000000000..b28c8270b4
--- /dev/null
+++ b/.changesets/maint_bryn_otel_update.md
@@ -0,0 +1,10 @@
+### Update to OpenTelemetry 0.20.0 ([PR #3649](https://github.com/apollographql/router/pull/3649))
+
+The router now uses OpenTelemetry 0.20.0. This includes a number of fixes and improvements from upstream.
+
+In particular:
+* Prometheus metrics are now aligned with the [OpenTelemetry spec](https://opentelemetry.io/docs/specs/otel/compatibility/prometheus_and_openmetrics/). 
+Users should check that their dashboards and alerts are properly configured.
+* The default metrics service name is now `unknown_service` as per the [OpenTelemetry spec](https://opentelemetry.io/docs/concepts/sdk-configuration/general-sdk-configuration/#otel_service_name). We will be following bring tracing into alignment in future. 
+
+By [@BrynCooke](https://github.com/BrynCooke) in https://github.com/apollographql/router/pull/3649

From 588d67fdf02be868a262245227fb65311beb532d Mon Sep 17 00:00:00 2001
From: bryn 
Date: Thu, 21 Sep 2023 10:57:40 +0100
Subject: [PATCH 17/30] Changelog

---
 .changesets/maint_bryn_otel_update.md | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/.changesets/maint_bryn_otel_update.md b/.changesets/maint_bryn_otel_update.md
index b28c8270b4..e2cb3f36ab 100644
--- a/.changesets/maint_bryn_otel_update.md
+++ b/.changesets/maint_bryn_otel_update.md
@@ -2,9 +2,15 @@
 
 The router now uses OpenTelemetry 0.20.0. This includes a number of fixes and improvements from upstream.
 
-In particular:
-* Prometheus metrics are now aligned with the [OpenTelemetry spec](https://opentelemetry.io/docs/specs/otel/compatibility/prometheus_and_openmetrics/). 
-Users should check that their dashboards and alerts are properly configured.
-* The default metrics service name is now `unknown_service` as per the [OpenTelemetry spec](https://opentelemetry.io/docs/concepts/sdk-configuration/general-sdk-configuration/#otel_service_name). We will be following bring tracing into alignment in future. 
+In particular metrics have some significant changes:
+* Prometheus metrics are now aligned with the [OpenTelemetry spec](https://opentelemetry.io/docs/specs/otel/compatibility/prometheus_and_openmetrics/), and will not report `service_name` on each individual metric. Resource attributes are now moved to a single `target_info` metric.
+
+  Users should check that their dashboards and alerts are properly configured when upgrading.
+
+* The default service name for metrics is now `unknown_service` as per the [OpenTelemetry spec](https://opentelemetry.io/docs/concepts/sdk-configuration/general-sdk-configuration/#otel_service_name).
+
+  Users should ensure to configure service name via router.yaml, or via the `OTEL_SERVICE_NAME` environment variable. 
+
+  We will be following bring tracing into alignment in future. 
 
 By [@BrynCooke](https://github.com/BrynCooke) in https://github.com/apollographql/router/pull/3649

From 1c5869bb00702c6c881a082c852d57b8eb0e9c18 Mon Sep 17 00:00:00 2001
From: bryn 
Date: Thu, 21 Sep 2023 11:46:58 +0100
Subject: [PATCH 18/30] Docs and changelog

---
 .changesets/maint_bryn_otel_update.md | 6 ++++++
 docs/source/configuration/metrics.mdx | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/.changesets/maint_bryn_otel_update.md b/.changesets/maint_bryn_otel_update.md
index e2cb3f36ab..91fac9c527 100644
--- a/.changesets/maint_bryn_otel_update.md
+++ b/.changesets/maint_bryn_otel_update.md
@@ -13,4 +13,10 @@ In particular metrics have some significant changes:
 
   We will be following bring tracing into alignment in future. 
 
+* The order of priority for setting service name has been brought into line with the rest of the router configuration. The order of priority is now:
+  1. `OTEL_RESOURCE_ATTRIBUTES` environment variable
+  2. `OTEL_SERVICE_NAME` environment variable
+  3. `resource_attributes` in router.yaml
+  4. `service_name` in router.yaml
+
 By [@BrynCooke](https://github.com/BrynCooke) in https://github.com/apollographql/router/pull/3649
diff --git a/docs/source/configuration/metrics.mdx b/docs/source/configuration/metrics.mdx
index f85ff5d428..bf6ed273ac 100644
--- a/docs/source/configuration/metrics.mdx
+++ b/docs/source/configuration/metrics.mdx
@@ -213,7 +213,7 @@ telemetry:
                   path: .type # JSON query path to fetch data from extensions
                 - name: message
                   path: .reason
-            # Will create this kind of metric for example apollo_router_http_requests_error_total{message="cannot contact the subgraph",service_name="apollo-router",subgraph="my_subgraph_name",subgraph_error_extended_type="SubrequestHttpError"}
+            # Will create this kind of metric for example apollo_router_http_requests_error_total{message="cannot contact the subgraph",subgraph="my_subgraph_name",subgraph_error_extended_type="SubrequestHttpError"}
           subgraphs:
             my_subgraph_name: # Apply these rules only for the subgraph named `my_subgraph_name`
               request:

From f0c9354daabb38d2a3b7c45810fdd09cfa8fc70a Mon Sep 17 00:00:00 2001
From: bryn 
Date: Thu, 21 Sep 2023 14:10:20 +0100
Subject: [PATCH 19/30] Add gauge test and add some docs.

---
 apollo-router/src/metrics/mod.rs | 29 +++++++++++++++++++++++++++--
 dev-docs/metrics.md              | 18 ++++++++++++++----
 2 files changed, 41 insertions(+), 6 deletions(-)

diff --git a/apollo-router/src/metrics/mod.rs b/apollo-router/src/metrics/mod.rs
index 532e19e890..16211a9215 100644
--- a/apollo-router/src/metrics/mod.rs
+++ b/apollo-router/src/metrics/mod.rs
@@ -377,7 +377,7 @@ macro_rules! u64_counter {
     };
 
     ($name:literal, $description:literal, $value: expr) => {
-        metric!(u64, counter, add, $name, $description, $value, &[]);
+        metric!(u64, counter, add, $name, $description, $value, []);
     }
 }
 
@@ -635,13 +635,38 @@ impl FutureMetricsExt for T where T: Future {}
 
 #[cfg(test)]
 mod test {
+    use opentelemetry_api::metrics::MeterProvider;
+    use opentelemetry_api::KeyValue;
+
     use crate::metrics::aggregation::MeterProviderType;
     use crate::metrics::meter_provider;
     use crate::metrics::FutureMetricsExt;
 
+    #[test]
+    fn test_gauge() {
+        meter_provider()
+            .meter("test")
+            .u64_observable_gauge("test")
+            .with_callback(|m| m.observe(5, &[]))
+            .init();
+        assert_metric!("test", 5);
+    }
+
+    #[test]
+    fn test_no_attributes() {
+        u64_counter!("test", "test description", 1);
+        assert_metric!("test", 1);
+    }
+
+    #[test]
+    fn test_dynamic_attributes() {
+        let attributes = vec![KeyValue::new("attr", "val")];
+        u64_counter!("test", "test description", 1, attributes);
+        assert_metric!("test", 1, "attr" => "val");
+    }
+
     #[test]
     fn test_multiple_calls() {
-        // Each test is run in a separate thread, metrics are stored in a thread local.
         fn my_method(val: &'static str) {
             u64_counter!("test", "test description", 1, "attr" => val);
         }
diff --git a/dev-docs/metrics.md b/dev-docs/metrics.md
index c1c2eae32b..e6caa8c9da 100644
--- a/dev-docs/metrics.md
+++ b/dev-docs/metrics.md
@@ -94,13 +94,23 @@ They are highly optimised, allow dynamic attributes, are easy to use and support
 
 ### Usage
 
+There are two classes of instrument, observable and non-observable. Observable instruments will ask for their value when they are exported, non-observable will update at the point of mutation.
 
+Observable gauges are attached to a particular meter, so they MUST be created after the telemetry plugin `activate()` has been called as this is the point where meters will updated.
+We're going to have to think about how to make this less brittle.
 
 ```rust
-    u64_counter!("test", "test description", 1, "attr" => "val");
-    u64_counter!("test", "test description", 1, &attributes);
-    u64_counter!("test", "test description", 1);
-
+// non-observable instruments - good for histograms and counters
+u64_counter!("test", "test description", 1, vec![KeyValue::new("attr", "val")]);    
+u64_counter!("test", "test description", 1, "attr" => "val");
+u64_counter!("test", "test description", 1);
+
+// observable instruments - good for gauges
+meter_provider()
+  .meter("test")
+  .u64_observable_gauge("test")
+  .with_callback(|m| m.observe(5, &[]))
+  .init();
 ```
 
 ### Testing

From aa2fab770219ba78560cb1673f7c435bab997836 Mon Sep 17 00:00:00 2001
From: bryn 
Date: Thu, 21 Sep 2023 15:12:56 +0100
Subject: [PATCH 20/30] Fix accidentally renamed metric

---
 apollo-router/src/plugins/telemetry/mod.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/apollo-router/src/plugins/telemetry/mod.rs b/apollo-router/src/plugins/telemetry/mod.rs
index e1ae8e1bed..cba3910004 100644
--- a/apollo-router/src/plugins/telemetry/mod.rs
+++ b/apollo-router/src/plugins/telemetry/mod.rs
@@ -1190,7 +1190,7 @@ impl Telemetry {
             }
         }
         f64_histogram!(
-            "http_requests_duration",
+            "apollo_router_http_request_duration_seconds",
             "Duration of HTTP requests.",
             now.elapsed().as_secs_f64(),
             metric_attrs

From 67957bd7454f14126a32f07cc4d8312652226dde Mon Sep 17 00:00:00 2001
From: bryn 
Date: Thu, 21 Sep 2023 15:39:25 +0100
Subject: [PATCH 21/30] Add tests for metric names

---
 apollo-router/src/plugins/telemetry/mod.rs | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/apollo-router/src/plugins/telemetry/mod.rs b/apollo-router/src/plugins/telemetry/mod.rs
index cba3910004..ea1456c657 100644
--- a/apollo-router/src/plugins/telemetry/mod.rs
+++ b/apollo-router/src/plugins/telemetry/mod.rs
@@ -2184,6 +2184,7 @@ mod tests {
             make_supergraph_request(plugin.as_ref()).await;
 
             assert_metric!("apollo_router_http_requests_total", 1, "another_test" => "my_default_value", "my_value" => 2, "myname" => "label_value", "renamed_value" => "my_value_set", "status" => "200", "x-custom" => "coming_from_header");
+            assert_metric!("apollo_router_http_request_duration_seconds", 1, "another_test" => "my_default_value", "my_value" => 2, "myname" => "label_value", "renamed_value" => "my_value_set", "status" => "200", "x-custom" => "coming_from_header");
         }.with_metrics().await;
     }
 
@@ -2368,6 +2369,8 @@ mod tests {
                 .unwrap();
 
             assert_metric!("apollo_router_http_requests_total", 1, "another_test" => "my_default_value", "error" => "400 Bad Request", "myname" => "label_value", "renamed_value" => "my_value_set", "status" => "400");
+            assert_metric!("apollo_router_http_request_duration_seconds", 1, "another_test" => "my_default_value", "error" => "400 Bad Request", "myname" => "label_value", "renamed_value" => "my_value_set", "status" => "400");
+
         }.with_metrics().await;
     }
 

From 040dfb7ec8251c7b17b0aebf4060ff37263ad780 Mon Sep 17 00:00:00 2001
From: Coenen Benjamin 
Date: Fri, 22 Sep 2023 14:13:03 +0200
Subject: [PATCH 22/30] improve metrics macros (#3880)

improve metrics macros

Signed-off-by: Benjamin Coenen <5719034+bnjjj@users.noreply.github.com>
---
 apollo-router/src/metrics/mod.rs           | 222 +++++++++++++++++----
 apollo-router/src/plugins/telemetry/mod.rs | 140 ++++++++++---
 2 files changed, 290 insertions(+), 72 deletions(-)

diff --git a/apollo-router/src/metrics/mod.rs b/apollo-router/src/metrics/mod.rs
index 16211a9215..f0bfa62b57 100644
--- a/apollo-router/src/metrics/mod.rs
+++ b/apollo-router/src/metrics/mod.rs
@@ -367,11 +367,26 @@ pub(crate) fn meter_provider() -> AggregateMeterProvider {
 /// New metrics should be added using these macros.
 #[allow(unused_macros)]
 macro_rules! u64_counter {
-    ($name:literal, $description:literal, $value: expr, $($attr_key:expr => $attr_value:expr),+) => {
+    ($($name:ident).+, $description:literal, $value: expr, $($attr_key:literal = $attr_value:expr),+) => {
+        let attributes = vec![$(opentelemetry::KeyValue::new($attr_key, $attr_value)),+];
+        metric!(u64, counter, add, stringify!($($name).+), $description, $value, &attributes);
+    };
+
+    ($($name:ident).+, $description:literal, $value: expr, $($($attr_key:ident).+ = $attr_value:expr),+) => {
+        let attributes = vec![$(opentelemetry::KeyValue::new(stringify!($($attr_key).+), $attr_value)),+];
+        metric!(u64, counter, add, stringify!($($name).+), $description, $value, &attributes);
+    };
+
+    ($name:literal, $description:literal, $value: expr, $($attr_key:literal = $attr_value:expr),+) => {
         let attributes = vec![$(opentelemetry::KeyValue::new($attr_key, $attr_value)),+];
         metric!(u64, counter, add, $name, $description, $value, &attributes);
     };
 
+    ($name:literal, $description:literal, $value: expr, $($($attr_key:ident).+ = $attr_value:expr),+) => {
+        let attributes = vec![$(opentelemetry::KeyValue::new(stringify!($($attr_key).+), $attr_value)),+];
+        metric!(u64, counter, add, $name, $description, $value, &attributes);
+    };
+
     ($name:literal, $description:literal, $value: expr, $attrs: expr) => {
         metric!(u64, counter, add, $name, $description, $value, $attrs);
     };
@@ -391,11 +406,25 @@ macro_rules! u64_counter {
 /// New metrics should be added using these macros.
 #[allow(unused_macros)]
 macro_rules! f64_counter {
-    ($name:literal, $description:literal, $value: expr, $($attr_key:expr => $attr_value:expr),+) => {
+    ($($name:ident).+, $description:literal, $value: expr, $($attr_key:literal = $attr_value:expr),+) => {
+        let attributes = vec![$(opentelemetry::KeyValue::new($attr_key, $attr_value)),+];
+        metric!(f64, counter, add, stringify!($($name).+), $description, $value, &attributes);
+    };
+
+    ($($name:ident).+, $description:literal, $value: expr, $($($attr_key:ident).+ = $attr_value:expr),+) => {
+        let attributes = vec![$(opentelemetry::KeyValue::new(stringify!($($attr_key).+), $attr_value)),+];
+        metric!(f64, counter, add, stringify!($($name).+), $description, $value, &attributes);
+    };
+
+    ($name:literal, $description:literal, $value: expr, $($attr_key:literal = $attr_value:expr),+) => {
         let attributes = vec![$(opentelemetry::KeyValue::new($attr_key, $attr_value)),+];
         metric!(f64, counter, add, $name, $description, $value, &attributes);
     };
 
+    ($name:literal, $description:literal, $value: expr, $($($attr_key:ident).+ = $attr_value:expr),+) => {
+        let attributes = vec![$(opentelemetry::KeyValue::new(stringify!($($attr_key).+), $attr_value)),+];
+        metric!(f64, counter, add, $name, $description, $value, &attributes);
+    };
     ($name:literal, $description:literal, $value: expr, $attrs: expr) => {
         metric!(f64, counter, add, $name, $description, $value, $attrs);
     };
@@ -416,17 +445,32 @@ macro_rules! f64_counter {
 
 #[allow(unused_macros)]
 macro_rules! i64_up_down_counter {
-    ($name:literal, $description:literal, $value: expr, $($attr_key:expr => $attr_value:expr),+) => {
+    ($($name:ident).+, $description:literal, $value: expr, $($attr_key:literal = $attr_value:expr),+) => {
         let attributes = vec![$(opentelemetry::KeyValue::new($attr_key, $attr_value)),+];
-        metric!(i64, histogram, record, $name, $description, $value, &attributes);
+        metric!(i64, up_down_counter, add, stringify!($($name).+), $description, $value, &attributes);
+    };
+
+    ($($name:ident).+, $description:literal, $value: expr, $($($attr_key:ident).+ = $attr_value:expr),+) => {
+        let attributes = vec![$(opentelemetry::KeyValue::new(stringify!($($attr_key).+), $attr_value)),+];
+        metric!(i64, up_down_counter, add, stringify!($($name).+), $description, $value, &attributes);
+    };
+
+    ($name:literal, $description:literal, $value: expr, $($attr_key:literal = $attr_value:expr),+) => {
+        let attributes = vec![$(opentelemetry::KeyValue::new($attr_key, $attr_value)),+];
+        metric!(i64, up_down_counter, add, $name, $description, $value, &attributes);
+    };
+
+    ($name:literal, $description:literal, $value: expr, $($($attr_key:ident).+ = $attr_value:expr),+) => {
+        let attributes = vec![$(opentelemetry::KeyValue::new(stringify!($($attr_key).+), $attr_value)),+];
+        metric!(i64, up_down_counter, add, $name, $description, $value, &attributes);
     };
 
     ($name:literal, $description:literal, $value: expr, $attrs: expr) => {
-        metric!(i64, histogram, record, $name, $description, $value, $attrs);
+        metric!(i64, up_down_counter, add, $name, $description, $value, $attrs);
     };
 
     ($name:literal, $description:literal, $value: expr) => {
-        metric!(i64, histogram, record, $name, $description, $value, &[]);
+        metric!(i64, up_down_counter, add, $name, $description, $value, &[]);
     };
 }
 
@@ -440,17 +484,32 @@ macro_rules! i64_up_down_counter {
 /// New metrics should be added using these macros.
 #[allow(unused_macros)]
 macro_rules! f64_up_down_counter {
-    ($name:literal, $description:literal, $value: expr, $($attr_key:expr => $attr_value:expr),+) => {
+    ($($name:ident).+, $description:literal, $value: expr, $($attr_key:literal = $attr_value:expr),+) => {
         let attributes = vec![$(opentelemetry::KeyValue::new($attr_key, $attr_value)),+];
-        metric!(f64, histogram, record, $name, $description, $value, &attributes);
+        metric!(f64, up_down_counter, add, stringify!($($name).+), $description, $value, &attributes);
+    };
+
+    ($($name:ident).+, $description:literal, $value: expr, $($($attr_key:ident).+ = $attr_value:expr),+) => {
+        let attributes = vec![$(opentelemetry::KeyValue::new(stringify!($($attr_key).+), $attr_value)),+];
+        metric!(f64, up_down_counter, add, stringify!($($name).+), $description, $value, &attributes);
+    };
+
+    ($name:literal, $description:literal, $value: expr, $($attr_key:literal = $attr_value:expr),+) => {
+        let attributes = vec![$(opentelemetry::KeyValue::new($attr_key, $attr_value)),+];
+        metric!(f64, up_down_counter, add, $name, $description, $value, &attributes);
+    };
+
+    ($name:literal, $description:literal, $value: expr, $($($attr_key:ident).+ = $attr_value:expr),+) => {
+        let attributes = vec![$(opentelemetry::KeyValue::new(stringify!($($attr_key).+), $attr_value)),+];
+        metric!(f64, up_down_counter, add, $name, $description, $value, &attributes);
     };
 
     ($name:literal, $description:literal, $value: expr, $attrs: expr) => {
-        metric!(f64, histogram, record, $name, $description, $value, $attrs);
+        metric!(f64, up_down_counter, add, $name, $description, $value, $attrs);
     };
 
     ($name:literal, $description:literal, $value: expr) => {
-        metric!(f64, histogram, record, $name, $description, $value, &[]);
+        metric!(f64, up_down_counter, add, $name, $description, $value, &[]);
     };
 }
 
@@ -464,11 +523,26 @@ macro_rules! f64_up_down_counter {
 /// New metrics should be added using these macros.
 #[allow(unused_macros)]
 macro_rules! f64_histogram {
-    ($name:literal, $description:literal, $value: expr, $($attr_key:expr => $attr_value:expr),+) => {
+    ($($name:ident).+, $description:literal, $value: expr, $($attr_key:literal = $attr_value:expr),+) => {
+        let attributes = vec![$(opentelemetry::KeyValue::new($attr_key, $attr_value)),+];
+        metric!(f64, histogram, record, stringify!($($name).+), $description, $value, &attributes);
+    };
+
+    ($($name:ident).+, $description:literal, $value: expr, $($($attr_key:ident).+ = $attr_value:expr),+) => {
+        let attributes = vec![$(opentelemetry::KeyValue::new(stringify!($($attr_key).+), $attr_value)),+];
+        metric!(f64, histogram, record, stringify!($($name).+), $description, $value, &attributes);
+    };
+
+    ($name:literal, $description:literal, $value: expr, $($attr_key:literal = $attr_value:expr),+) => {
         let attributes = vec![$(opentelemetry::KeyValue::new($attr_key, $attr_value)),+];
         metric!(f64, histogram, record, $name, $description, $value, &attributes);
     };
 
+    ($name:literal, $description:literal, $value: expr, $($($attr_key:ident).+ = $attr_value:expr),+) => {
+        let attributes = vec![$(opentelemetry::KeyValue::new(stringify!($($attr_key).+), $attr_value)),+];
+        metric!(f64, histogram, record, $name, $description, $value, &attributes);
+    };
+
     ($name:literal, $description:literal, $value: expr, $attrs: expr) => {
         metric!(f64, histogram, record, $name, $description, $value, $attrs);
     };
@@ -488,11 +562,26 @@ macro_rules! f64_histogram {
 /// New metrics should be added using these macros.
 #[allow(unused_macros)]
 macro_rules! u64_histogram {
-    ($name:literal, $description:literal, $value: expr, $($attr_key:expr => $attr_value:expr),+) => {
+    ($($name:ident).+, $description:literal, $value: expr, $($attr_key:literal = $attr_value:expr),+) => {
+        let attributes = vec![$(opentelemetry::KeyValue::new($attr_key, $attr_value)),+];
+        metric!(u64, histogram, record, stringify!($($name).+), $description, $value, &attributes);
+    };
+
+    ($($name:ident).+, $description:literal, $value: expr, $($($attr_key:ident).+ = $attr_value:expr),+) => {
+        let attributes = vec![$(opentelemetry::KeyValue::new(stringify!($($attr_key).+), $attr_value)),+];
+        metric!(u64, histogram, record, stringify!($($name).+), $description, $value, &attributes);
+    };
+
+    ($name:literal, $description:literal, $value: expr, $($attr_key:literal = $attr_value:expr),+) => {
         let attributes = vec![$(opentelemetry::KeyValue::new($attr_key, $attr_value)),+];
         metric!(u64, histogram, record, $name, $description, $value, &attributes);
     };
 
+    ($name:literal, $description:literal, $value: expr, $($($attr_key:ident).+ = $attr_value:expr),+) => {
+        let attributes = vec![$(opentelemetry::KeyValue::new(stringify!($($attr_key).+), $attr_value)),+];
+        metric!(u64, histogram, record, $name, $description, $value, &attributes);
+    };
+
     ($name:literal, $description:literal, $value: expr, $attrs: expr) => {
         metric!(u64, histogram, record, $name, $description, $value, $attrs);
     };
@@ -512,11 +601,26 @@ macro_rules! u64_histogram {
 /// New metrics should be added using these macros.
 #[allow(unused_macros)]
 macro_rules! i64_histogram {
-    ($name:literal, $description:literal, $value: expr, $($attr_key:expr => $attr_value:expr),+) => {
+    ($($name:ident).+, $description:literal, $value: expr, $($attr_key:literal = $attr_value:expr),+) => {
+        let attributes = vec![$(opentelemetry::KeyValue::new($attr_key, $attr_value)),+];
+        metric!(i64, histogram, record, stringify!($($name).+), $description, $value, &attributes);
+    };
+
+    ($($name:ident).+, $description:literal, $value: expr, $($($attr_key:ident).+ = $attr_value:expr),+) => {
+        let attributes = vec![$(opentelemetry::KeyValue::new(stringify!($($attr_key).+), $attr_value)),+];
+        metric!(i64, histogram, record, stringify!($($name).+), $description, $value, &attributes);
+    };
+
+    ($name:literal, $description:literal, $value: expr, $($attr_key:literal = $attr_value:expr),+) => {
         let attributes = vec![$(opentelemetry::KeyValue::new($attr_key, $attr_value)),+];
         metric!(i64, histogram, record, $name, $description, $value, &attributes);
     };
 
+    ($name:literal, $description:literal, $value: expr, $($($attr_key:ident).+ = $attr_value:expr),+) => {
+        let attributes = vec![$(opentelemetry::KeyValue::new(stringify!($($attr_key).+), $attr_value)),+];
+        metric!(i64, histogram, record, $name, $description, $value, &attributes);
+    };
+
     ($name:literal, $description:literal, $value: expr, $attrs: expr) => {
         metric!(i64, histogram, record, $name, $description, $value, $attrs);
     };
@@ -532,7 +636,7 @@ thread_local! {
     pub(crate) static CACHE_CALLSITE: std::sync::atomic::AtomicBool = const {std::sync::atomic::AtomicBool::new(false)};
 }
 macro_rules! metric {
-    ($ty:ident, $instrument:ident, $mutation:ident, $name:literal, $description:literal, $value: expr, $attrs: expr) => {
+    ($ty:ident, $instrument:ident, $mutation:ident, $name:expr, $description:literal, $value: expr, $attrs: expr) => {
 
         // The way this works is that we have a static at each call site that holds a weak reference to the instrument.
         // We make a call we try to upgrade the weak reference. If it succeeds we use the instrument.
@@ -591,10 +695,26 @@ macro_rules! metric {
 
 #[cfg(test)]
 macro_rules! assert_metric {
-    ($name:literal, $value: expr, $($attr_key:expr => $attr_value:expr),+) => {
+    ($($name:ident).+, $value: expr, $($attr_key:literal = $attr_value:expr),+) => {
         let attributes = vec![$(opentelemetry::KeyValue::new($attr_key, $attr_value)),+];
+        crate::metrics::collect_metrics().assert(stringify!($($name).+), $value, &attributes);
+    };
+
+    ($($name:ident).+, $value: expr, $($($attr_key:ident).+ = $attr_value:expr),+) => {
+        let attributes = vec![$(opentelemetry::KeyValue::new(stringify!($($attr_key).+), $attr_value)),+];
+        crate::metrics::collect_metrics().assert(stringify!($($name).+), $value, &attributes);
+    };
+
+    ($name:literal, $value: expr, $($attr_key:literal = $attr_value:expr),+) => {
+        let attributes = vec![$(opentelemetry::KeyValue::new($attr_key, $attr_value)),+];
+        crate::metrics::collect_metrics().assert($name, $value, &attributes);
+    };
+
+    ($name:literal, $value: expr, $($($attr_key:ident).+ = $attr_value:expr),+) => {
+        let attributes = vec![$(opentelemetry::KeyValue::new(stringify!($($attr_key).+), $attr_value)),+];
         crate::metrics::collect_metrics().assert($name, $value, &attributes);
     };
+
     ($name:literal, $value: expr) => {
         crate::metrics::collect_metrics().assert($name, $value, &[]);
     };
@@ -662,35 +782,35 @@ mod test {
     fn test_dynamic_attributes() {
         let attributes = vec![KeyValue::new("attr", "val")];
         u64_counter!("test", "test description", 1, attributes);
-        assert_metric!("test", 1, "attr" => "val");
+        assert_metric!("test", 1, "attr" = "val");
     }
 
     #[test]
     fn test_multiple_calls() {
         fn my_method(val: &'static str) {
-            u64_counter!("test", "test description", 1, "attr" => val);
+            u64_counter!("test", "test description", 1, "attr" = val);
         }
 
         my_method("jill");
         my_method("jill");
         my_method("bob");
-        assert_metric!("test", 2, "attr" => "jill");
-        assert_metric!("test", 1, "attr" => "bob");
+        assert_metric!("test", 2, "attr" = "jill");
+        assert_metric!("test", 1, "attr" = "bob");
     }
 
     #[test]
     fn test_non_async() {
         // Each test is run in a separate thread, metrics are stored in a thread local.
-        u64_counter!("test", "test description", 1, "attr" => "val");
-        assert_metric!("test", 1, "attr" => "val");
+        u64_counter!("test", "test description", 1, "attr" = "val");
+        assert_metric!("test", 1, "attr" = "val");
     }
 
     #[tokio::test(flavor = "multi_thread")]
     async fn test_async_multi() {
         // Multi-threaded runtime needs to use a tokio task local to avoid tests interfering with each other
         async {
-            u64_counter!("test", "test description", 1, "attr" => "val");
-            assert_metric!("test", 1, "attr" => "val");
+            u64_counter!("test", "test description", 1, "attr" = "val");
+            assert_metric!("test", 1, "attr" = "val");
         }
         .with_metrics()
         .await;
@@ -700,8 +820,8 @@ mod test {
     async fn test_async_single() {
         async {
             // It's a single threaded tokio runtime, so we can still use a thread local
-            u64_counter!("test", "test description", 1, "attr" => "val");
-            assert_metric!("test", 1, "attr" => "val");
+            u64_counter!("test", "test description", 1, "attr" = "val");
+            assert_metric!("test", 1, "attr" = "val");
         }
         .with_metrics()
         .await;
@@ -710,8 +830,26 @@ mod test {
     #[tokio::test]
     async fn test_u64_counter() {
         async {
-            u64_counter!("test", "test description", 1, "attr" => "val");
-            assert_metric!("test", 1, "attr" => "val");
+            u64_counter!("test", "test description", 1, attr = "val");
+            u64_counter!("test", "test description", 1, attr.test = "val");
+            u64_counter!("test", "test description", 1, attr.test_underscore = "val");
+            u64_counter!(
+                test.dot,
+                "test description",
+                1,
+                "attr.test_underscore" = "val"
+            );
+            u64_counter!(
+                test.dot,
+                "test description",
+                1,
+                attr.test_underscore = "val"
+            );
+            assert_metric!("test", 1, "attr" = "val");
+            assert_metric!("test", 1, "attr.test" = "val");
+            assert_metric!("test", 1, attr.test_underscore = "val");
+            assert_metric!(test.dot, 2, attr.test_underscore = "val");
+            assert_metric!(test.dot, 2, "attr.test_underscore" = "val");
         }
         .with_metrics()
         .await;
@@ -720,8 +858,8 @@ mod test {
     #[tokio::test]
     async fn test_f64_counter() {
         async {
-            f64_counter!("test", "test description", 1.5, "attr" => "val");
-            assert_metric!("test", 1.5, "attr" => "val");
+            f64_counter!("test", "test description", 1.5, "attr" = "val");
+            assert_metric!("test", 1.5, "attr" = "val");
         }
         .with_metrics()
         .await;
@@ -730,8 +868,8 @@ mod test {
     #[tokio::test]
     async fn test_i64_up_down_counter() {
         async {
-            i64_up_down_counter!("test", "test description", 1, "attr" => "val");
-            assert_metric!("test", 1, "attr" => "val");
+            i64_up_down_counter!("test", "test description", 1, "attr" = "val");
+            assert_metric!("test", 1, "attr" = "val");
         }
         .with_metrics()
         .await;
@@ -740,8 +878,8 @@ mod test {
     #[tokio::test]
     async fn test_f64_up_down_counter() {
         async {
-            f64_up_down_counter!("test", "test description", 1.5, "attr" => "val");
-            assert_metric!("test", 1.5, "attr" => "val");
+            f64_up_down_counter!("test", "test description", 1.5, "attr" = "val");
+            assert_metric!("test", 1.5, "attr" = "val");
         }
         .with_metrics()
         .await;
@@ -750,8 +888,8 @@ mod test {
     #[tokio::test]
     async fn test_u64_histogram() {
         async {
-            u64_histogram!("test", "test description", 1, "attr" => "val");
-            assert_metric!("test", 1, "attr" => "val");
+            u64_histogram!("test", "test description", 1, "attr" = "val");
+            assert_metric!("test", 1, "attr" = "val");
         }
         .with_metrics()
         .await;
@@ -760,8 +898,8 @@ mod test {
     #[tokio::test]
     async fn test_i64_histogram() {
         async {
-            i64_histogram!("test", "test description", 1, "attr" => "val");
-            assert_metric!("test", 1, "attr" => "val");
+            i64_histogram!("test", "test description", 1, "attr" = "val");
+            assert_metric!("test", 1, "attr" = "val");
         }
         .with_metrics()
         .await;
@@ -770,8 +908,8 @@ mod test {
     #[tokio::test]
     async fn test_f64_histogram() {
         async {
-            f64_histogram!("test", "test description", 1.0, "attr" => "val");
-            assert_metric!("test", 1, "attr" => "val");
+            f64_histogram!("test", "test description", 1.0, "attr" = "val");
+            assert_metric!("test", 1, "attr" = "val");
         }
         .with_metrics()
         .await;
@@ -785,7 +923,7 @@ mod test {
         super::CACHE_CALLSITE.with(|cell| cell.store(true, std::sync::atomic::Ordering::SeqCst));
         fn test() {
             // This is a single callsite so should only have one metric
-            u64_counter!("test", "test description", 1, "attr" => "val");
+            u64_counter!("test", "test description", 1, "attr" = "val");
         }
 
         // Callsite hasn't been used yet, so there should be no metrics
@@ -793,12 +931,12 @@ mod test {
 
         // Call the metrics, it will be registered
         test();
-        assert_metric!("test", 1, "attr" => "val");
+        assert_metric!("test", 1, "attr" = "val");
         assert_eq!(meter_provider().registered_instruments(), 1);
 
         // Call the metrics again, but the second call will not register a new metric because it will have be retrieved from the static
         test();
-        assert_metric!("test", 2, "attr" => "val");
+        assert_metric!("test", 2, "attr" = "val");
         assert_eq!(meter_provider().registered_instruments(), 1);
 
         // Force invalidation of instruments
diff --git a/apollo-router/src/plugins/telemetry/mod.rs b/apollo-router/src/plugins/telemetry/mod.rs
index ea1456c657..403142e584 100644
--- a/apollo-router/src/plugins/telemetry/mod.rs
+++ b/apollo-router/src/plugins/telemetry/mod.rs
@@ -837,7 +837,12 @@ impl Telemetry {
                 if !parts.status.is_success() {
                     metric_attrs.push(KeyValue::new("error", parts.status.to_string()));
                 }
-                u64_counter!("apollo.router.operations", "The number of graphql operations performed by the Router", 1, "http.response.status_code" => parts.status.as_u16() as i64);
+                u64_counter!(
+                    "apollo.router.operations",
+                    "The number of graphql operations performed by the Router",
+                    1,
+                    "http.response.status_code" = parts.status.as_u16() as i64
+                );
                 let response = http::Response::from_parts(
                     parts,
                     once(ready(first_response.unwrap_or_default()))
@@ -849,7 +854,12 @@ impl Telemetry {
             }
             Err(err) => {
                 metric_attrs.push(KeyValue::new("status", "500"));
-                u64_counter!("apollo.router.operations", "The number of graphql operations performed by the Router", 1, "http.response.status_code" => 500);
+                u64_counter!(
+                    "apollo.router.operations",
+                    "The number of graphql operations performed by the Router",
+                    1,
+                    "http.response.status_code" = 500
+                );
                 Err(err)
             }
         };
@@ -2180,32 +2190,53 @@ mod tests {
     async fn test_supergraph_metrics_ok() {
         async {
             let plugin =
-                create_plugin_with_config(include_str!("testdata/custom_attributes.router.yaml")).await;
+                create_plugin_with_config(include_str!("testdata/custom_attributes.router.yaml"))
+                    .await;
             make_supergraph_request(plugin.as_ref()).await;
 
-            assert_metric!("apollo_router_http_requests_total", 1, "another_test" => "my_default_value", "my_value" => 2, "myname" => "label_value", "renamed_value" => "my_value_set", "status" => "200", "x-custom" => "coming_from_header");
-            assert_metric!("apollo_router_http_request_duration_seconds", 1, "another_test" => "my_default_value", "my_value" => 2, "myname" => "label_value", "renamed_value" => "my_value_set", "status" => "200", "x-custom" => "coming_from_header");
-        }.with_metrics().await;
+            assert_metric!(
+                "apollo_router_http_requests_total",
+                1,
+                "another_test" = "my_default_value",
+                "my_value" = 2,
+                "myname" = "label_value",
+                "renamed_value" = "my_value_set",
+                "status" = "200",
+                "x-custom" = "coming_from_header"
+            );
+            assert_metric!(
+                "apollo_router_http_request_duration_seconds",
+                1,
+                "another_test" = "my_default_value",
+                "my_value" = 2,
+                "myname" = "label_value",
+                "renamed_value" = "my_value_set",
+                "status" = "200",
+                "x-custom" = "coming_from_header"
+            );
+        }
+        .with_metrics()
+        .await;
     }
 
     #[tokio::test]
     async fn test_supergraph_metrics_bad_request() {
         async {
             let plugin =
-                create_plugin_with_config(include_str!("testdata/custom_attributes.router.yaml")).await;
+                create_plugin_with_config(include_str!("testdata/custom_attributes.router.yaml"))
+                    .await;
 
             let mut mock_bad_request_service = MockSupergraphService::new();
-            mock_bad_request_service
-                .expect_call()
-                .times(1)
-                .returning(move |req: SupergraphRequest| {
+            mock_bad_request_service.expect_call().times(1).returning(
+                move |req: SupergraphRequest| {
                     Ok(SupergraphResponse::fake_builder()
                         .context(req.context)
                         .status_code(StatusCode::BAD_REQUEST)
                         .data(json!({"errors": [{"message": "nope"}]}))
                         .build()
                         .unwrap())
-                });
+                },
+            );
             let mut bad_request_supergraph_service =
                 plugin.supergraph_service(BoxService::new(mock_bad_request_service));
             let router_req = SupergraphRequest::fake_builder().header("test", "my_value_set");
@@ -2220,15 +2251,26 @@ mod tests {
                 .await
                 .unwrap();
 
-            assert_metric!("apollo_router_http_requests_total", 1, "another_test" => "my_default_value", "error" => "400 Bad Request", "myname" => "label_value", "renamed_value" => "my_value_set", "status" => "400");
-        }.with_metrics().await;
+            assert_metric!(
+                "apollo_router_http_requests_total",
+                1,
+                "another_test" = "my_default_value",
+                "error" = "400 Bad Request",
+                "myname" = "label_value",
+                "renamed_value" = "my_value_set",
+                "status" = "400"
+            );
+        }
+        .with_metrics()
+        .await;
     }
 
     #[tokio::test]
     async fn test_subgraph_metrics_ok() {
         async {
             let plugin =
-                create_plugin_with_config(include_str!("testdata/custom_attributes.router.yaml")).await;
+                create_plugin_with_config(include_str!("testdata/custom_attributes.router.yaml"))
+                    .await;
 
             let mut mock_subgraph_service = MockSubgraphService::new();
             mock_subgraph_service
@@ -2281,15 +2323,27 @@ mod tests {
                 .await
                 .unwrap();
 
-            assert_metric!("apollo_router_http_requests_total", 1, "error" => "custom_error_for_propagation", "my_key" => "my_custom_attribute_from_context", "query_from_request" => "query { test }", "status" => "200", "subgraph" => "my_subgraph_name", "unknown_data" => "default_value");
-        }.with_metrics().await;
+            assert_metric!(
+                "apollo_router_http_requests_total",
+                1,
+                "error" = "custom_error_for_propagation",
+                "my_key" = "my_custom_attribute_from_context",
+                "query_from_request" = "query { test }",
+                "status" = "200",
+                "subgraph" = "my_subgraph_name",
+                "unknown_data" = "default_value"
+            );
+        }
+        .with_metrics()
+        .await;
     }
 
     #[tokio::test]
     async fn test_subgraph_metrics_http_error() {
         async {
             let plugin =
-                create_plugin_with_config(include_str!("testdata/custom_attributes.router.yaml")).await;
+                create_plugin_with_config(include_str!("testdata/custom_attributes.router.yaml"))
+                    .await;
 
             let mut mock_subgraph_service_in_error = MockSubgraphService::new();
             mock_subgraph_service_in_error
@@ -2329,28 +2383,37 @@ mod tests {
                 .await
                 .expect_err("should be an error");
 
-            assert_metric!("apollo_router_http_requests_total", 1, "message" => "cannot contact the subgraph", "status" => "500", "subgraph" => "my_subgraph_name_error", "subgraph_error_extended_code" => "SUBREQUEST_HTTP_ERROR");
-        }.with_metrics().await;
+            assert_metric!(
+                "apollo_router_http_requests_total",
+                1,
+                "message" = "cannot contact the subgraph",
+                "status" = "500",
+                "subgraph" = "my_subgraph_name_error",
+                "subgraph_error_extended_code" = "SUBREQUEST_HTTP_ERROR"
+            );
+        }
+        .with_metrics()
+        .await;
     }
 
     #[tokio::test]
     async fn test_subgraph_metrics_bad_request() {
         async {
             let plugin =
-                create_plugin_with_config(include_str!("testdata/custom_attributes.router.yaml")).await;
+                create_plugin_with_config(include_str!("testdata/custom_attributes.router.yaml"))
+                    .await;
 
             let mut mock_bad_request_service = MockSupergraphService::new();
-            mock_bad_request_service
-                .expect_call()
-                .times(1)
-                .returning(move |req: SupergraphRequest| {
+            mock_bad_request_service.expect_call().times(1).returning(
+                move |req: SupergraphRequest| {
                     Ok(SupergraphResponse::fake_builder()
                         .context(req.context)
                         .status_code(StatusCode::BAD_REQUEST)
                         .data(json!({"errors": [{"message": "nope"}]}))
                         .build()
                         .unwrap())
-                });
+                },
+            );
 
             let mut bad_request_supergraph_service =
                 plugin.supergraph_service(BoxService::new(mock_bad_request_service));
@@ -2368,10 +2431,27 @@ mod tests {
                 .await
                 .unwrap();
 
-            assert_metric!("apollo_router_http_requests_total", 1, "another_test" => "my_default_value", "error" => "400 Bad Request", "myname" => "label_value", "renamed_value" => "my_value_set", "status" => "400");
-            assert_metric!("apollo_router_http_request_duration_seconds", 1, "another_test" => "my_default_value", "error" => "400 Bad Request", "myname" => "label_value", "renamed_value" => "my_value_set", "status" => "400");
-
-        }.with_metrics().await;
+            assert_metric!(
+                "apollo_router_http_requests_total",
+                1,
+                "another_test" = "my_default_value",
+                "error" = "400 Bad Request",
+                "myname" = "label_value",
+                "renamed_value" = "my_value_set",
+                "status" = "400"
+            );
+            assert_metric!(
+                "apollo_router_http_request_duration_seconds",
+                1,
+                "another_test" = "my_default_value",
+                "error" = "400 Bad Request",
+                "myname" = "label_value",
+                "renamed_value" = "my_value_set",
+                "status" = "400"
+            );
+        }
+        .with_metrics()
+        .await;
     }
 
     #[tokio::test]

From 48ecaa3dbd131ae817f98b2c0e75d91ace87aafb Mon Sep 17 00:00:00 2001
From: bryn 
Date: Mon, 25 Sep 2023 11:00:01 +0100
Subject: [PATCH 23/30] Make metrics assertions type specific

---
 apollo-router/src/metrics/mod.rs           | 227 ++++++++++++++++-----
 apollo-router/src/plugins/telemetry/mod.rs |  14 +-
 dev-docs/metrics.md                        |   6 +-
 3 files changed, 191 insertions(+), 56 deletions(-)

diff --git a/apollo-router/src/metrics/mod.rs b/apollo-router/src/metrics/mod.rs
index f0bfa62b57..092b1627ae 100644
--- a/apollo-router/src/metrics/mod.rs
+++ b/apollo-router/src/metrics/mod.rs
@@ -16,7 +16,7 @@ pub(crate) mod layer;
 // During tests this is a task local so that we can test metrics without having to worry about other tests interfering.
 
 #[cfg(test)]
-mod test_utils {
+pub(crate) mod test_utils {
     use std::fmt::Debug;
     use std::fmt::Display;
     use std::sync::Arc;
@@ -112,7 +112,6 @@ mod test_utils {
             (meter_provider, reader)
         }
     }
-    #[cfg(test)]
     pub(crate) fn meter_provider_and_readers() -> (AggregateMeterProvider, ClonableManualReader) {
         if tokio::runtime::Handle::try_current().is_ok() {
             if let Ok(task_local) = AGGREGATE_METER_PROVIDER_ASYNC
@@ -132,11 +131,6 @@ mod test_utils {
         }
     }
 
-    #[cfg(not(test))]
-    fn meter_provider_and_readers() -> (AggregateMeterProvider, ClonableManualReader) {
-        AGGREGATE_METER_PROVIDER.with(|cell| cell.get_or_init(create_test_meter_provider).clone())
-    }
-
     pub(crate) struct Metrics {
         resource_metrics: ResourceMetrics,
     }
@@ -179,24 +173,25 @@ mod test_utils {
         pub(crate) fn assert(
             &self,
             name: &str,
+            ty: MetricType,
             value: T,
             attributes: &[KeyValue],
         ) {
             let attributes = AttributeSet::from(attributes);
             if let Some(value) = value.to_u64() {
-                if self.metric_exists(name, value, &attributes) {
+                if self.metric_exists(name, &ty, value, &attributes) {
                     return;
                 }
             }
 
             if let Some(value) = value.to_i64() {
-                if self.metric_exists(name, value, &attributes) {
+                if self.metric_exists(name, &ty, value, &attributes) {
                     return;
                 }
             }
 
             if let Some(value) = value.to_f64() {
-                if self.metric_exists(name, value, &attributes) {
+                if self.metric_exists(name, &ty, value, &attributes) {
                     return;
                 }
             }
@@ -312,6 +307,7 @@ mod test_utils {
         fn metric_exists(
             &self,
             name: &str,
+            ty: &MetricType,
             value: T,
             attributes: &AttributeSet,
         ) -> bool {
@@ -319,25 +315,39 @@ mod test_utils {
                 // Try to downcast the metric to each type of aggregation and assert that the value is correct.
                 if let Some(gauge) = metric.data.as_any().downcast_ref::>() {
                     // Find the datapoint with the correct attributes.
-                    return gauge.data_points.iter().any(|datapoint| {
-                        datapoint.attributes == *attributes && datapoint.value == value
-                    });
+                    if matches!(ty, MetricType::Gauge) {
+                        return gauge.data_points.iter().any(|datapoint| {
+                            datapoint.attributes == *attributes && datapoint.value == value
+                        });
+                    }
                 } else if let Some(sum) = metric.data.as_any().downcast_ref::>() {
-                    return sum.data_points.iter().any(|datapoint| {
-                        datapoint.attributes == *attributes && datapoint.value == value
-                    });
+                    // Note that we can't actually tell if the sum is monotonic or not, so we just check if it's a sum.
+                    if matches!(ty, MetricType::Counter | MetricType::UpDownCounter) {
+                        return sum.data_points.iter().any(|datapoint| {
+                            datapoint.attributes == *attributes && datapoint.value == value
+                        });
+                    }
                 } else if let Some(histogram) = metric.data.as_any().downcast_ref::>()
                 {
-                    if let Some(value) = value.to_u64() {
-                        return histogram.data_points.iter().any(|datapoint| {
-                            datapoint.attributes == *attributes && datapoint.count == value
-                        });
+                    if matches!(ty, MetricType::Histogram) {
+                        if let Some(value) = value.to_u64() {
+                            return histogram.data_points.iter().any(|datapoint| {
+                                datapoint.attributes == *attributes && datapoint.count == value
+                            });
+                        }
                     }
                 }
             }
             false
         }
     }
+
+    pub(crate) enum MetricType {
+        Counter,
+        UpDownCounter,
+        Histogram,
+        Gauge,
+    }
 }
 #[cfg(test)]
 pub(crate) fn meter_provider() -> AggregateMeterProvider {
@@ -694,25 +704,106 @@ macro_rules! metric {
 }
 
 #[cfg(test)]
-macro_rules! assert_metric {
+macro_rules! assert_counter {
     ($($name:ident).+, $value: expr, $($attr_key:literal = $attr_value:expr),+) => {
         let attributes = vec![$(opentelemetry::KeyValue::new($attr_key, $attr_value)),+];
-        crate::metrics::collect_metrics().assert(stringify!($($name).+), $value, &attributes);
+        crate::metrics::collect_metrics().assert(stringify!($($name).+), crate::metrics::test_utils::MetricType::Counter, $value, &attributes);
+    };
+
+    ($($name:ident).+, $value: expr, $($($attr_key:ident).+ = $attr_value:expr),+) => {
+        let attributes = vec![$(opentelemetry::KeyValue::new(stringify!($($attr_key).+), $attr_value)),+];
+        crate::metrics::collect_metrics().assert(stringify!($($name).+), crate::metrics::test_utils::MetricType::Counter, $value, &attributes);
+    };
+
+    ($name:literal, $value: expr, $($attr_key:literal = $attr_value:expr),+) => {
+        let attributes = vec![$(opentelemetry::KeyValue::new($attr_key, $attr_value)),+];
+        crate::metrics::collect_metrics().assert($name, crate::metrics::test_utils::MetricType::Counter, $value, &attributes);
+    };
+
+    ($name:literal, $value: expr, $($($attr_key:ident).+ = $attr_value:expr),+) => {
+        let attributes = vec![$(opentelemetry::KeyValue::new(stringify!($($attr_key).+), $attr_value)),+];
+        crate::metrics::collect_metrics().assert($name, crate::metrics::test_utils::MetricType::Counter, $value, &attributes);
+    };
+
+    ($name:literal, $value: expr) => {
+        crate::metrics::collect_metrics().assert($name, crate::metrics::test_utils::MetricType::Counter, $value, &[]);
+    };
+}
+
+#[cfg(test)]
+macro_rules! assert_up_down_counter {
+    ($($name:ident).+, $ty: expr, $value: expr, $($attr_key:literal = $attr_value:expr),+) => {
+        let attributes = vec![$(opentelemetry::KeyValue::new($attr_key, $attr_value)),+];
+        crate::metrics::collect_metrics().assert(stringify!($($name).+), crate::metrics::test_utils::MetricType::UpDownCounter, $value, &attributes);
+    };
+
+    ($($name:ident).+, $value: expr, $($($attr_key:ident).+ = $attr_value:expr),+) => {
+        let attributes = vec![$(opentelemetry::KeyValue::new(stringify!($($attr_key).+), $attr_value)),+];
+        crate::metrics::collect_metrics().assert(stringify!($($name).+), crate::metrics::test_utils::MetricType::UpDownCounter, $value, &attributes);
+    };
+
+    ($name:literal, $value: expr, $($attr_key:literal = $attr_value:expr),+) => {
+        let attributes = vec![$(opentelemetry::KeyValue::new($attr_key, $attr_value)),+];
+        crate::metrics::collect_metrics().assert($name, crate::metrics::test_utils::MetricType::UpDownCounter, $value, &attributes);
+    };
+
+    ($name:literal, $value: expr, $($($attr_key:ident).+ = $attr_value:expr),+) => {
+        let attributes = vec![$(opentelemetry::KeyValue::new(stringify!($($attr_key).+), $attr_value)),+];
+        crate::metrics::collect_metrics().assert($name, crate::metrics::test_utils::MetricType::UpDownCounter, $value, &attributes);
+    };
+
+    ($name:literal, $value: expr) => {
+        crate::metrics::collect_metrics().assert($name, crate::metrics::test_utils::MetricType::UpDownCounter, $value, &[]);
+    };
+}
+
+#[cfg(test)]
+macro_rules! assert_gauge {
+    ($($name:ident).+, $ty: expr, $value: expr, $($attr_key:literal = $attr_value:expr),+) => {
+        let attributes = vec![$(opentelemetry::KeyValue::new($attr_key, $attr_value)),+];
+        crate::metrics::collect_metrics().assert(stringify!($($name).+), crate::metrics::test_utils::MetricType::Gauge, $value, &attributes);
     };
 
     ($($name:ident).+, $value: expr, $($($attr_key:ident).+ = $attr_value:expr),+) => {
         let attributes = vec![$(opentelemetry::KeyValue::new(stringify!($($attr_key).+), $attr_value)),+];
-        crate::metrics::collect_metrics().assert(stringify!($($name).+), $value, &attributes);
+        crate::metrics::collect_metrics().assert(stringify!($($name).+), crate::metrics::test_utils::MetricType::Gauge, $value, &attributes);
     };
 
     ($name:literal, $value: expr, $($attr_key:literal = $attr_value:expr),+) => {
         let attributes = vec![$(opentelemetry::KeyValue::new($attr_key, $attr_value)),+];
-        crate::metrics::collect_metrics().assert($name, $value, &attributes);
+        crate::metrics::collect_metrics().assert($name, crate::metrics::test_utils::MetricType::Gauge, $value, &attributes);
     };
 
     ($name:literal, $value: expr, $($($attr_key:ident).+ = $attr_value:expr),+) => {
         let attributes = vec![$(opentelemetry::KeyValue::new(stringify!($($attr_key).+), $attr_value)),+];
-        crate::metrics::collect_metrics().assert($name, $value, &attributes);
+        crate::metrics::collect_metrics().assert($name, crate::metrics::test_utils::MetricType::Gauge, $value, &attributes);
+    };
+
+    ($name:literal, $value: expr) => {
+        crate::metrics::collect_metrics().assert($name, crate::metrics::test_utils::MetricType::Gauge, $value, &[]);
+    };
+}
+
+#[cfg(test)]
+macro_rules! assert_histogram {
+    ($($name:ident).+, $ty: expr, $value: expr, $($attr_key:literal = $attr_value:expr),+) => {
+        let attributes = vec![$(opentelemetry::KeyValue::new($attr_key, $attr_value)),+];
+        crate::metrics::collect_metrics().assert(stringify!($($name).+), crate::metrics::test_utils::MetricType::Histogram, $value, &attributes);
+    };
+
+    ($($name:ident).+, $value: expr, $($($attr_key:ident).+ = $attr_value:expr),+) => {
+        let attributes = vec![$(opentelemetry::KeyValue::new(stringify!($($attr_key).+), $attr_value)),+];
+        crate::metrics::collect_metrics().assert(stringify!($($name).+), crate::metrics::test_utils::MetricType::Histogram, $value, &attributes);
+    };
+
+    ($name:literal, $value: expr, $($attr_key:literal = $attr_value:expr),+) => {
+        let attributes = vec![$(opentelemetry::KeyValue::new($attr_key, $attr_value)),+];
+        crate::metrics::collect_metrics().assert($name, crate::metrics::test_utils::MetricType::Histogram, $value, &attributes);
+    };
+
+    ($name:literal, $value: expr, $($($attr_key:ident).+ = $attr_value:expr),+) => {
+        let attributes = vec![$(opentelemetry::KeyValue::new(stringify!($($attr_key).+), $attr_value)),+];
+        crate::metrics::collect_metrics().assert($name, crate::metrics::test_utils::MetricType::Histogram, $value, &attributes);
     };
 
     ($name:literal, $value: expr) => {
@@ -769,20 +860,20 @@ mod test {
             .u64_observable_gauge("test")
             .with_callback(|m| m.observe(5, &[]))
             .init();
-        assert_metric!("test", 5);
+        assert_gauge!("test", 5);
     }
 
     #[test]
     fn test_no_attributes() {
         u64_counter!("test", "test description", 1);
-        assert_metric!("test", 1);
+        assert_counter!("test", 1);
     }
 
     #[test]
     fn test_dynamic_attributes() {
         let attributes = vec![KeyValue::new("attr", "val")];
         u64_counter!("test", "test description", 1, attributes);
-        assert_metric!("test", 1, "attr" = "val");
+        assert_counter!("test", 1, "attr" = "val");
     }
 
     #[test]
@@ -794,15 +885,15 @@ mod test {
         my_method("jill");
         my_method("jill");
         my_method("bob");
-        assert_metric!("test", 2, "attr" = "jill");
-        assert_metric!("test", 1, "attr" = "bob");
+        assert_counter!("test", 2, "attr" = "jill");
+        assert_counter!("test", 1, "attr" = "bob");
     }
 
     #[test]
     fn test_non_async() {
         // Each test is run in a separate thread, metrics are stored in a thread local.
         u64_counter!("test", "test description", 1, "attr" = "val");
-        assert_metric!("test", 1, "attr" = "val");
+        assert_counter!("test", 1, "attr" = "val");
     }
 
     #[tokio::test(flavor = "multi_thread")]
@@ -810,7 +901,7 @@ mod test {
         // Multi-threaded runtime needs to use a tokio task local to avoid tests interfering with each other
         async {
             u64_counter!("test", "test description", 1, "attr" = "val");
-            assert_metric!("test", 1, "attr" = "val");
+            assert_counter!("test", 1, "attr" = "val");
         }
         .with_metrics()
         .await;
@@ -821,7 +912,7 @@ mod test {
         async {
             // It's a single threaded tokio runtime, so we can still use a thread local
             u64_counter!("test", "test description", 1, "attr" = "val");
-            assert_metric!("test", 1, "attr" = "val");
+            assert_counter!("test", 1, "attr" = "val");
         }
         .with_metrics()
         .await;
@@ -845,11 +936,11 @@ mod test {
                 1,
                 attr.test_underscore = "val"
             );
-            assert_metric!("test", 1, "attr" = "val");
-            assert_metric!("test", 1, "attr.test" = "val");
-            assert_metric!("test", 1, attr.test_underscore = "val");
-            assert_metric!(test.dot, 2, attr.test_underscore = "val");
-            assert_metric!(test.dot, 2, "attr.test_underscore" = "val");
+            assert_counter!("test", 1, "attr" = "val");
+            assert_counter!("test", 1, "attr.test" = "val");
+            assert_counter!("test", 1, attr.test_underscore = "val");
+            assert_counter!(test.dot, 2, attr.test_underscore = "val");
+            assert_counter!(test.dot, 2, "attr.test_underscore" = "val");
         }
         .with_metrics()
         .await;
@@ -859,7 +950,7 @@ mod test {
     async fn test_f64_counter() {
         async {
             f64_counter!("test", "test description", 1.5, "attr" = "val");
-            assert_metric!("test", 1.5, "attr" = "val");
+            assert_counter!("test", 1.5, "attr" = "val");
         }
         .with_metrics()
         .await;
@@ -869,7 +960,7 @@ mod test {
     async fn test_i64_up_down_counter() {
         async {
             i64_up_down_counter!("test", "test description", 1, "attr" = "val");
-            assert_metric!("test", 1, "attr" = "val");
+            assert_up_down_counter!("test", 1, "attr" = "val");
         }
         .with_metrics()
         .await;
@@ -879,7 +970,7 @@ mod test {
     async fn test_f64_up_down_counter() {
         async {
             f64_up_down_counter!("test", "test description", 1.5, "attr" = "val");
-            assert_metric!("test", 1.5, "attr" = "val");
+            assert_up_down_counter!("test", 1.5, "attr" = "val");
         }
         .with_metrics()
         .await;
@@ -889,7 +980,7 @@ mod test {
     async fn test_u64_histogram() {
         async {
             u64_histogram!("test", "test description", 1, "attr" = "val");
-            assert_metric!("test", 1, "attr" = "val");
+            assert_histogram!("test", 1, "attr" = "val");
         }
         .with_metrics()
         .await;
@@ -899,7 +990,7 @@ mod test {
     async fn test_i64_histogram() {
         async {
             i64_histogram!("test", "test description", 1, "attr" = "val");
-            assert_metric!("test", 1, "attr" = "val");
+            assert_histogram!("test", 1, "attr" = "val");
         }
         .with_metrics()
         .await;
@@ -909,7 +1000,51 @@ mod test {
     async fn test_f64_histogram() {
         async {
             f64_histogram!("test", "test description", 1.0, "attr" = "val");
-            assert_metric!("test", 1, "attr" = "val");
+            assert_histogram!("test", 1, "attr" = "val");
+        }
+        .with_metrics()
+        .await;
+    }
+
+    #[tokio::test]
+    #[should_panic]
+    async fn test_type_histogram() {
+        async {
+            f64_histogram!("test", "test description", 1.0, "attr" = "val");
+            assert_counter!("test", 1, "attr" = "val");
+        }
+        .with_metrics()
+        .await;
+    }
+
+    #[tokio::test]
+    #[should_panic]
+    async fn test_type_counter() {
+        async {
+            f64_counter!("test", "test description", 1.0, "attr" = "val");
+            assert_histogram!("test", 1, "attr" = "val");
+        }
+        .with_metrics()
+        .await;
+    }
+
+    #[tokio::test]
+    #[should_panic]
+    async fn test_type_up_down_counter() {
+        async {
+            f64_up_down_counter!("test", "test description", 1.0, "attr" = "val");
+            assert_histogram!("test", 1, "attr" = "val");
+        }
+        .with_metrics()
+        .await;
+    }
+
+    #[tokio::test]
+    #[should_panic]
+    async fn test_type_gauge() {
+        async {
+            f64_up_down_counter!("test", "test description", 1.0, "attr" = "val");
+            assert_histogram!("test", 1, "attr" = "val");
         }
         .with_metrics()
         .await;
@@ -931,12 +1066,12 @@ mod test {
 
         // Call the metrics, it will be registered
         test();
-        assert_metric!("test", 1, "attr" = "val");
+        assert_counter!("test", 1, "attr" = "val");
         assert_eq!(meter_provider().registered_instruments(), 1);
 
         // Call the metrics again, but the second call will not register a new metric because it will have be retrieved from the static
         test();
-        assert_metric!("test", 2, "attr" = "val");
+        assert_counter!("test", 2, "attr" = "val");
         assert_eq!(meter_provider().registered_instruments(), 1);
 
         // Force invalidation of instruments
diff --git a/apollo-router/src/plugins/telemetry/mod.rs b/apollo-router/src/plugins/telemetry/mod.rs
index 403142e584..4915426766 100644
--- a/apollo-router/src/plugins/telemetry/mod.rs
+++ b/apollo-router/src/plugins/telemetry/mod.rs
@@ -2194,7 +2194,7 @@ mod tests {
                     .await;
             make_supergraph_request(plugin.as_ref()).await;
 
-            assert_metric!(
+            assert_counter!(
                 "apollo_router_http_requests_total",
                 1,
                 "another_test" = "my_default_value",
@@ -2204,7 +2204,7 @@ mod tests {
                 "status" = "200",
                 "x-custom" = "coming_from_header"
             );
-            assert_metric!(
+            assert_histogram!(
                 "apollo_router_http_request_duration_seconds",
                 1,
                 "another_test" = "my_default_value",
@@ -2251,7 +2251,7 @@ mod tests {
                 .await
                 .unwrap();
 
-            assert_metric!(
+            assert_counter!(
                 "apollo_router_http_requests_total",
                 1,
                 "another_test" = "my_default_value",
@@ -2323,7 +2323,7 @@ mod tests {
                 .await
                 .unwrap();
 
-            assert_metric!(
+            assert_counter!(
                 "apollo_router_http_requests_total",
                 1,
                 "error" = "custom_error_for_propagation",
@@ -2383,7 +2383,7 @@ mod tests {
                 .await
                 .expect_err("should be an error");
 
-            assert_metric!(
+            assert_counter!(
                 "apollo_router_http_requests_total",
                 1,
                 "message" = "cannot contact the subgraph",
@@ -2431,7 +2431,7 @@ mod tests {
                 .await
                 .unwrap();
 
-            assert_metric!(
+            assert_counter!(
                 "apollo_router_http_requests_total",
                 1,
                 "another_test" = "my_default_value",
@@ -2440,7 +2440,7 @@ mod tests {
                 "renamed_value" = "my_value_set",
                 "status" = "400"
             );
-            assert_metric!(
+            assert_histogram!(
                 "apollo_router_http_request_duration_seconds",
                 1,
                 "another_test" = "my_default_value",
diff --git a/dev-docs/metrics.md b/dev-docs/metrics.md
index e6caa8c9da..34530201ef 100644
--- a/dev-docs/metrics.md
+++ b/dev-docs/metrics.md
@@ -122,7 +122,7 @@ When using the macro in a test you will need a different pattern depending on if
     fn test_non_async() {
         // Each test is run in a separate thread, metrics are stored in a thread local.
         u64_counter!("test", "test description", 1, "attr" => "val");
-        assert_metric!("test", 1, "attr" => "val");
+        assert_counter!("test", 1, "attr" => "val");
     }
 ```
 
@@ -136,7 +136,7 @@ Make sure to use `.with_metrics()` method on the async block to ensure that the
         // Multi-threaded runtime needs to use a tokio task local to avoid tests interfering with each other
         async {
             u64_counter!("test", "test description", 1, "attr" => "val");
-            assert_metric!("test", 1, "attr" => "val");
+            assert_counter!("test", 1, "attr" => "val");
         }
         .with_metrics()
         .await;
@@ -147,7 +147,7 @@ Make sure to use `.with_metrics()` method on the async block to ensure that the
         async {
             // It's a single threaded tokio runtime, so we can still use a thread local
             u64_counter!("test", "test description", 1, "attr" => "val");
-            assert_metric!("test", 1, "attr" => "val");
+            assert_counter!("test", 1, "attr" => "val");
         }
         .with_metrics()
         .await;

From a3fa4579fec0fb27d5d7039be3ef37a01ca4c66b Mon Sep 17 00:00:00 2001
From: bryn 
Date: Mon, 25 Sep 2023 11:30:55 +0100
Subject: [PATCH 24/30] Fix gauge test

---
 apollo-router/src/metrics/mod.rs | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/apollo-router/src/metrics/mod.rs b/apollo-router/src/metrics/mod.rs
index 092b1627ae..bbfbe78d29 100644
--- a/apollo-router/src/metrics/mod.rs
+++ b/apollo-router/src/metrics/mod.rs
@@ -1043,7 +1043,11 @@ mod test {
     #[should_panic]
     async fn test_type_gauge() {
         async {
-            f64_up_down_counter!("test", "test description", 1.0, "attr" = "val");
+            meter_provider()
+                .meter("test")
+                .u64_observable_gauge("test")
+                .with_callback(|m| m.observe(5, &[]))
+                .init();
             assert_histogram!("test", 1, "attr" = "val");
         }
         .with_metrics()

From 0a65766a2edfea8b627466bcb5933ad1cafb45a4 Mon Sep 17 00:00:00 2001
From: bryn 
Date: Mon, 25 Sep 2023 12:56:54 +0100
Subject: [PATCH 25/30] Make prom tests ignore reload. It doesn't really work
 with the test level meter provider and is tested via an integration test.

---
 .../src/plugins/telemetry/metrics/prometheus.rs    |  3 +++
 apollo-router/src/plugins/telemetry/mod.rs         | 14 --------------
 2 files changed, 3 insertions(+), 14 deletions(-)

diff --git a/apollo-router/src/plugins/telemetry/metrics/prometheus.rs b/apollo-router/src/plugins/telemetry/metrics/prometheus.rs
index 0ac2524803..591165e8fe 100644
--- a/apollo-router/src/plugins/telemetry/metrics/prometheus.rs
+++ b/apollo-router/src/plugins/telemetry/metrics/prometheus.rs
@@ -98,6 +98,9 @@ impl MetricsConfigurator for Config {
         if self.enabled {
             // Check the last registry to see if the resources are the same, if they are we can use it as is.
             // Otherwise go with the new controller and store it so that it can be committed during telemetry activation.
+            // Note that during tests the prom registry cannot be reused as we have a different meter provider for each test.
+            // Prom reloading IS tested in an integration test.
+            #[cfg(not(test))]
             if let Some((last_config, last_registry)) =
                 EXISTING_PROMETHEUS.lock().expect("lock poisoned").clone()
             {
diff --git a/apollo-router/src/plugins/telemetry/mod.rs b/apollo-router/src/plugins/telemetry/mod.rs
index 4915426766..5a0bf53dbb 100644
--- a/apollo-router/src/plugins/telemetry/mod.rs
+++ b/apollo-router/src/plugins/telemetry/mod.rs
@@ -2456,7 +2456,6 @@ mod tests {
 
     #[tokio::test]
     async fn it_test_prometheus_wrong_endpoint() {
-        let _lock = lock_prometheus().await;
         async {
             let plugin =
                 create_plugin_with_config(include_str!("testdata/prometheus.router.yaml")).await;
@@ -2489,12 +2488,8 @@ mod tests {
         .await;
     }
 
-    static PROMETHEUS_LOCK: std::sync::OnceLock>> =
-        std::sync::OnceLock::new();
-
     #[tokio::test(flavor = "multi_thread")]
     async fn it_test_prometheus_metrics() {
-        let _lock = lock_prometheus().await;
         async {
             let plugin =
                 create_plugin_with_config(include_str!("testdata/prometheus.router.yaml")).await;
@@ -2508,7 +2503,6 @@ mod tests {
 
     #[tokio::test(flavor = "multi_thread")]
     async fn it_test_prometheus_metrics_custom_buckets() {
-        let _lock = lock_prometheus().await;
         async {
             let plugin = create_plugin_with_config(include_str!(
                 "testdata/prometheus_custom_buckets.router.yaml"
@@ -2523,14 +2517,6 @@ mod tests {
         .await;
     }
 
-    // Prometheus support has some globals. Ideally we fix the plugin API to allow migration of data across reloads. This means that some tests cannot be run in parallel due to interaction with globals.
-    async fn lock_prometheus() -> tokio::sync::MutexGuard<'static, ()> {
-        PROMETHEUS_LOCK
-            .get_or_init(|| Arc::new(tokio::sync::Mutex::new(())))
-            .lock()
-            .await
-    }
-
     #[test]
     fn it_test_send_headers_to_studio() {
         let fw_headers = ForwardHeaders::Only(vec![

From 286f3e7139af9d0af970701edcdbf36ca1de3bf7 Mon Sep 17 00:00:00 2001
From: bryn 
Date: Tue, 26 Sep 2023 09:52:53 +0100
Subject: [PATCH 26/30] Make linux use xlarge for builds in circle

---
 .circleci/config.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.circleci/config.yml b/.circleci/config.yml
index 3a91555b3f..575e979401 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -10,7 +10,7 @@ executors:
   amd_linux_build: &amd_linux_build_executor
     docker:
       - image: cimg/base:stable
-    resource_class: medium
+    resource_class: xlarge
     environment:
       CARGO_BUILD_JOBS: 4
       RUST_TEST_THREADS: 6

From 8a65c2ef5bdc68141a5fbeabd54d8fb49b58b043 Mon Sep 17 00:00:00 2001
From: bryn 
Date: Tue, 26 Sep 2023 15:26:37 +0100
Subject: [PATCH 27/30] Update changelog

---
 .changesets/maint_bryn_otel_update.md | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/.changesets/maint_bryn_otel_update.md b/.changesets/maint_bryn_otel_update.md
index 91fac9c527..f2735b645a 100644
--- a/.changesets/maint_bryn_otel_update.md
+++ b/.changesets/maint_bryn_otel_update.md
@@ -9,9 +9,7 @@ In particular metrics have some significant changes:
 
 * The default service name for metrics is now `unknown_service` as per the [OpenTelemetry spec](https://opentelemetry.io/docs/concepts/sdk-configuration/general-sdk-configuration/#otel_service_name).
 
-  Users should ensure to configure service name via router.yaml, or via the `OTEL_SERVICE_NAME` environment variable. 
-
-  We will be following bring tracing into alignment in future. 
+  Users should ensure to configure service name via router.yaml, or via the `OTEL_SERVICE_NAME` environment variable.
 
 * The order of priority for setting service name has been brought into line with the rest of the router configuration. The order of priority is now:
   1. `OTEL_RESOURCE_ATTRIBUTES` environment variable

From fba92d2400c3b7dcbae8ede451e316ae04f2436e Mon Sep 17 00:00:00 2001
From: bryn 
Date: Tue, 26 Sep 2023 15:27:22 +0100
Subject: [PATCH 28/30] Fix alpha ordering in cargo toml

---
 apollo-router/Cargo.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/apollo-router/Cargo.toml b/apollo-router/Cargo.toml
index 30e74ce656..aa4e4c6775 100644
--- a/apollo-router/Cargo.toml
+++ b/apollo-router/Cargo.toml
@@ -263,10 +263,10 @@ ecdsa = { version = "0.15.1", features = ["signing", "pem", "pkcs8"] }
 fred = { version = "6.3.2", features = ["enable-rustls", "no-client-setname"] }
 futures-test = "0.3.28"
 insta = { version = "1.32.0", features = ["json", "redactions", "yaml"] }
-num-traits = "0.2.16"
 maplit = "1.0.2"
 memchr = { version = "2.6.3", default-features = false }
 mockall = "0.11.4"
+num-traits = "0.2.16"
 once_cell = "1.18.0"
 opentelemetry-stdout = { version = "0.1.0", features = ["trace"] }
 p256 = "0.12.0"

From 40acb7bec3ce47527462f033a5bff408e59b6a9b Mon Sep 17 00:00:00 2001
From: bryn 
Date: Tue, 26 Sep 2023 15:54:10 +0100
Subject: [PATCH 29/30] Add constant for unknown_service

---
 apollo-router/src/plugins/telemetry/metrics/mod.rs | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/apollo-router/src/plugins/telemetry/metrics/mod.rs b/apollo-router/src/plugins/telemetry/metrics/mod.rs
index 1b341c0cc8..a6b0087da9 100644
--- a/apollo-router/src/plugins/telemetry/metrics/mod.rs
+++ b/apollo-router/src/plugins/telemetry/metrics/mod.rs
@@ -37,6 +37,7 @@ pub(crate) mod apollo;
 pub(crate) mod otlp;
 pub(crate) mod prometheus;
 pub(crate) mod span_metrics_exporter;
+static UNKNOWN_SERVICE: &str = "unknown_service";
 
 #[derive(Debug, Clone, Deserialize, JsonSchema)]
 #[serde(deny_unknown_fields)]
@@ -543,7 +544,7 @@ impl MetricsBuilder {
         // Otel resources can be initialized from env variables, there is an override mechanism, but it's broken for service name as it will always override service.name
         // If the service name is set to unknown service then override it from the config
         if resource.get(opentelemetry_semantic_conventions::resource::SERVICE_NAME)
-            == Some("unknown_service".into())
+            == Some(UNKNOWN_SERVICE.into())
         {
             if let Some(service_name) = Resource::from_detectors(
                 Duration::from_secs(0),

From 6516f2cf43260df223ce1edf6ee698437f52f272 Mon Sep 17 00:00:00 2001
From: bryn 
Date: Wed, 27 Sep 2023 10:09:36 +0100
Subject: [PATCH 30/30] Update cargo lock

---
 Cargo.lock | 463 ++++++++++++++++++++++++++++++++++++++---------------
 1 file changed, 333 insertions(+), 130 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 692b09baae..7977cd5d21 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -331,6 +331,7 @@ dependencies = [
  "multimap 0.9.0",
  "notify",
  "nu-ansi-term 0.49.0",
+ "num-traits",
  "once_cell",
  "opentelemetry",
  "opentelemetry-aws",
@@ -340,6 +341,7 @@ dependencies = [
  "opentelemetry-otlp",
  "opentelemetry-prometheus",
  "opentelemetry-semantic-conventions",
+ "opentelemetry-stdout",
  "opentelemetry-zipkin",
  "opentelemetry_api",
  "p256 0.12.0",
@@ -359,7 +361,7 @@ dependencies = [
  "router-bridge",
  "rstack",
  "rust-embed",
- "rustls 0.21.7",
+ "rustls",
  "rustls-pemfile",
  "schemars",
  "serde",
@@ -380,12 +382,12 @@ dependencies = [
  "thiserror",
  "tikv-jemallocator",
  "tokio",
- "tokio-rustls 0.24.1",
+ "tokio-rustls",
  "tokio-stream",
  "tokio-tungstenite",
  "tokio-util",
  "toml 0.7.6",
- "tonic 0.8.3",
+ "tonic",
  "tonic-build",
  "tower",
  "tower-http",
@@ -558,7 +560,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "81953c529336010edd6d8e358f886d9581267795c61b19475b71314bffa46d35"
 dependencies = [
  "concurrent-queue",
- "event-listener",
+ "event-listener 2.5.3",
  "futures-core",
 ]
 
@@ -576,6 +578,127 @@ dependencies = [
  "tokio",
 ]
 
+[[package]]
+name = "async-executor"
+version = "1.5.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "78f2db9467baa66a700abce2a18c5ad793f6f83310aca1284796fc3921d113fd"
+dependencies = [
+ "async-lock",
+ "async-task",
+ "concurrent-queue",
+ "fastrand 2.0.0",
+ "futures-lite",
+ "slab",
+]
+
+[[package]]
+name = "async-global-executor"
+version = "2.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f1b6f5d7df27bd294849f8eec66ecfc63d11814df7a4f5d74168a2394467b776"
+dependencies = [
+ "async-channel",
+ "async-executor",
+ "async-io",
+ "async-lock",
+ "blocking",
+ "futures-lite",
+ "once_cell",
+]
+
+[[package]]
+name = "async-io"
+version = "1.13.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0fc5b45d93ef0529756f812ca52e44c221b35341892d3dcc34132ac02f3dd2af"
+dependencies = [
+ "async-lock",
+ "autocfg",
+ "cfg-if",
+ "concurrent-queue",
+ "futures-lite",
+ "log",
+ "parking",
+ "polling",
+ "rustix 0.37.23",
+ "slab",
+ "socket2 0.4.9",
+ "waker-fn",
+]
+
+[[package]]
+name = "async-lock"
+version = "2.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "287272293e9d8c41773cec55e365490fe034813a2f172f502d6ddcf75b2f582b"
+dependencies = [
+ "event-listener 2.5.3",
+]
+
+[[package]]
+name = "async-process"
+version = "1.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bf012553ce51eb7aa6dc2143804cc8252bd1cb681a1c5cb7fa94ca88682dee1d"
+dependencies = [
+ "async-io",
+ "async-lock",
+ "async-signal",
+ "blocking",
+ "cfg-if",
+ "event-listener 3.0.0",
+ "futures-lite",
+ "rustix 0.38.8",
+ "windows-sys 0.48.0",
+]
+
+[[package]]
+name = "async-signal"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4af361a844928cb7d36590d406709473a1b574f443094422ef166daa3b493208"
+dependencies = [
+ "async-io",
+ "async-lock",
+ "atomic-waker",
+ "cfg-if",
+ "concurrent-queue",
+ "futures-core",
+ "futures-io",
+ "libc",
+ "signal-hook-registry",
+ "slab",
+ "windows-sys 0.48.0",
+]
+
+[[package]]
+name = "async-std"
+version = "1.12.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "62565bb4402e926b29953c785397c6dc0391b7b446e45008b0049eb43cec6f5d"
+dependencies = [
+ "async-channel",
+ "async-global-executor",
+ "async-io",
+ "async-lock",
+ "async-process",
+ "crossbeam-utils",
+ "futures-channel",
+ "futures-core",
+ "futures-io",
+ "futures-lite",
+ "gloo-timers",
+ "kv-log-macro",
+ "log",
+ "memchr",
+ "once_cell",
+ "pin-project-lite",
+ "pin-utils",
+ "slab",
+ "wasm-bindgen-futures",
+]
+
 [[package]]
 name = "async-stream"
 version = "0.3.5"
@@ -598,6 +721,12 @@ dependencies = [
  "syn 2.0.29",
 ]
 
+[[package]]
+name = "async-task"
+version = "4.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b9441c6b2fe128a7c2bf680a44c34d0df31ce09e5b7e401fcca3faa483dbc921"
+
 [[package]]
 name = "async-trait"
 version = "0.1.73"
@@ -609,6 +738,12 @@ dependencies = [
  "syn 2.0.29",
 ]
 
+[[package]]
+name = "atomic-waker"
+version = "1.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0"
+
 [[package]]
 name = "atty"
 version = "0.2.14"
@@ -818,7 +953,7 @@ dependencies = [
  "hyper-rustls",
  "lazy_static",
  "pin-project-lite",
- "rustls 0.21.7",
+ "rustls",
  "tokio",
  "tower",
  "tracing",
@@ -1111,6 +1246,22 @@ dependencies = [
  "generic-array 0.14.7",
 ]
 
+[[package]]
+name = "blocking"
+version = "1.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "94c4ef1f913d78636d78d538eec1f18de81e481f44b1be0a81060090530846e1"
+dependencies = [
+ "async-channel",
+ "async-lock",
+ "async-task",
+ "fastrand 2.0.0",
+ "futures-io",
+ "futures-lite",
+ "piper",
+ "tracing",
+]
+
 [[package]]
 name = "bloomfilter"
 version = "1.0.12"
@@ -1509,7 +1660,7 @@ checksum = "c2895653b4d9f1538a83970077cb01dfc77a4810524e51a110944688e916b18e"
 dependencies = [
  "prost",
  "prost-types",
- "tonic 0.9.2",
+ "tonic",
  "tracing-core",
 ]
 
@@ -1531,7 +1682,7 @@ dependencies = [
  "thread_local",
  "tokio",
  "tokio-stream",
- "tonic 0.9.2",
+ "tonic",
  "tracing",
  "tracing-core",
  "tracing-subscriber",
@@ -2360,6 +2511,17 @@ version = "2.5.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "0206175f82b8d6bf6652ff7d71a1e27fd2e4efde587fd368662814d6ec1d9ce0"
 
+[[package]]
+name = "event-listener"
+version = "3.0.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "29e56284f00d94c1bc7fd3c77027b4623c88c1f53d8d2394c6199f2921dea325"
+dependencies = [
+ "concurrent-queue",
+ "parking",
+ "pin-project-lite",
+]
+
 [[package]]
 name = "external-subgraph"
 version = "0.1.0"
@@ -2561,13 +2723,13 @@ dependencies = [
  "parking_lot 0.12.1",
  "rand 0.8.5",
  "redis-protocol",
- "rustls 0.21.7",
+ "rustls",
  "rustls-native-certs",
  "rustls-webpki",
  "semver 1.0.18",
  "sha-1",
  "tokio",
- "tokio-rustls 0.24.1",
+ "tokio-rustls",
  "tokio-stream",
  "tokio-util",
  "url",
@@ -2818,6 +2980,18 @@ dependencies = [
  "regex",
 ]
 
+[[package]]
+name = "gloo-timers"
+version = "0.2.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9b995a66bb87bebce9a0f4a95aed01daca4872c050bfcb21653361c03bc35e5c"
+dependencies = [
+ "futures-channel",
+ "futures-core",
+ "js-sys",
+ "wasm-bindgen",
+]
+
 [[package]]
 name = "graphql-introspection-query"
 version = "0.2.0"
@@ -3179,10 +3353,10 @@ dependencies = [
  "http",
  "hyper",
  "log",
- "rustls 0.21.7",
+ "rustls",
  "rustls-native-certs",
  "tokio",
- "tokio-rustls 0.24.1",
+ "tokio-rustls",
 ]
 
 [[package]]
@@ -3323,6 +3497,17 @@ dependencies = [
  "ghost",
 ]
 
+[[package]]
+name = "io-lifetimes"
+version = "1.0.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "eae7b9aee968036d54dce06cebaefd919e4472e753296daccd6d344e3e2df0c2"
+dependencies = [
+ "hermit-abi 0.3.2",
+ "libc",
+ "windows-sys 0.48.0",
+]
+
 [[package]]
 name = "ipnet"
 version = "2.8.0"
@@ -3336,7 +3521,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "cb0889898416213fab133e1d33a0e5858a48177452750691bde3666d0fdbaf8b"
 dependencies = [
  "hermit-abi 0.3.2",
- "rustix",
+ "rustix 0.38.8",
  "windows-sys 0.48.0",
 ]
 
@@ -3488,6 +3673,15 @@ dependencies = [
  "libc",
 ]
 
+[[package]]
+name = "kv-log-macro"
+version = "1.0.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0de8b303297635ad57c9f5059fd9cee7a47f8e8daa09df0fcd07dd39fb22977f"
+dependencies = [
+ "log",
+]
+
 [[package]]
 name = "lazy-regex"
 version = "2.5.0"
@@ -3622,6 +3816,12 @@ dependencies = [
  "syn 2.0.29",
 ]
 
+[[package]]
+name = "linux-raw-sys"
+version = "0.3.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ef53942eb7bf7ff43a617b3e2c1c4a5ecf5944a7c1bc12d7ee39bbb15e5c1519"
+
 [[package]]
 name = "linux-raw-sys"
 version = "0.4.5"
@@ -3643,6 +3843,9 @@ name = "log"
 version = "0.4.20"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f"
+dependencies = [
+ "value-bag",
+]
 
 [[package]]
 name = "lru"
@@ -4071,9 +4274,9 @@ dependencies = [
 
 [[package]]
 name = "opentelemetry"
-version = "0.19.0"
+version = "0.20.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5f4b8347cc26099d3aeee044065ecc3ae11469796b4d65d065a23a584ed92a6f"
+checksum = "9591d937bc0e6d2feb6f71a559540ab300ea49955229c347a517a28d27784c54"
 dependencies = [
  "opentelemetry_api",
  "opentelemetry_sdk",
@@ -4081,21 +4284,20 @@ dependencies = [
 
 [[package]]
 name = "opentelemetry-aws"
-version = "0.7.0"
+version = "0.8.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "72a394d24777936802edd6c03a68daab4db39630418c7e431a5648e9befa80b8"
+checksum = "31120a0109c172a42096766ef10e772f4a89422932be2c3b7f335858ff49380d"
 dependencies = [
  "once_cell",
- "opentelemetry",
+ "opentelemetry_api",
 ]
 
 [[package]]
 name = "opentelemetry-datadog"
-version = "0.7.0"
+version = "0.8.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "daf08569fbddd2149b268e2bde2bca0bab84bc19ee2efcc234f855f49a911536"
+checksum = "b5f4ecf595095d3b641dd2761a0c3d1f175d3d6c28f38e65418d8004ea3255dd"
 dependencies = [
- "async-trait",
  "futures-core",
  "http",
  "indexmap 1.9.3",
@@ -4112,9 +4314,9 @@ dependencies = [
 
 [[package]]
 name = "opentelemetry-http"
-version = "0.8.0"
+version = "0.9.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a819b71d6530c4297b49b3cae2939ab3a8cc1b9f382826a1bc29dd0ca3864906"
+checksum = "c7594ec0e11d8e33faf03530a4c49af7064ebba81c1480e01be67d90b356508b"
 dependencies = [
  "async-trait",
  "bytes",
@@ -4125,83 +4327,97 @@ dependencies = [
 
 [[package]]
 name = "opentelemetry-jaeger"
-version = "0.18.0"
+version = "0.19.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "08e028dc9f4f304e9320ce38c80e7cf74067415b1ad5a8750a38bae54a4d450d"
+checksum = "876958ba9084f390f913fcf04ddf7bbbb822898867bb0a51cc28f2b9e5c1b515"
 dependencies = [
  "async-trait",
- "futures",
- "futures-executor",
+ "futures-core",
+ "futures-util",
  "headers",
  "http",
- "once_cell",
  "opentelemetry",
  "opentelemetry-http",
  "opentelemetry-semantic-conventions",
  "reqwest",
- "thiserror",
  "thrift",
  "tokio",
 ]
 
 [[package]]
 name = "opentelemetry-otlp"
-version = "0.12.0"
+version = "0.13.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8af72d59a4484654ea8eb183fea5ae4eb6a41d7ac3e3bae5f4d2a282a3a7d3ca"
+checksum = "7e5e5a5c4135864099f3faafbe939eb4d7f9b80ebf68a8448da961b32a7c1275"
 dependencies = [
  "async-trait",
- "futures",
- "futures-util",
+ "futures-core",
  "http",
- "opentelemetry",
  "opentelemetry-http",
  "opentelemetry-proto",
+ "opentelemetry-semantic-conventions",
+ "opentelemetry_api",
+ "opentelemetry_sdk",
  "prost",
  "reqwest",
  "thiserror",
  "tokio",
- "tonic 0.8.3",
+ "tonic",
 ]
 
 [[package]]
 name = "opentelemetry-prometheus"
-version = "0.12.0"
+version = "0.13.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9a9f186f6293ebb693caddd0595e66b74a6068fa51048e26e0bf9c95478c639c"
+checksum = "c7d81bc254e2d572120363a2b16cdb0d715d301b5789be0cfc26ad87e4e10e53"
 dependencies = [
- "opentelemetry",
+ "once_cell",
+ "opentelemetry_api",
+ "opentelemetry_sdk",
  "prometheus",
  "protobuf",
 ]
 
 [[package]]
 name = "opentelemetry-proto"
-version = "0.2.0"
+version = "0.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "045f8eea8c0fa19f7d48e7bc3128a39c2e5c533d5c61298c548dfefc1064474c"
+checksum = "b1e3f814aa9f8c905d0ee4bde026afd3b2577a97c10e1699912e3e44f0c4cbeb"
 dependencies = [
- "futures",
- "futures-util",
- "opentelemetry",
+ "opentelemetry_api",
+ "opentelemetry_sdk",
  "prost",
- "tonic 0.8.3",
+ "tonic",
 ]
 
 [[package]]
 name = "opentelemetry-semantic-conventions"
-version = "0.11.0"
+version = "0.12.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "24e33428e6bf08c6f7fcea4ddb8e358fab0fe48ab877a87c70c6ebe20f673ce5"
+checksum = "73c9f9340ad135068800e7f1b24e9e09ed9e7143f5bf8518ded3d3ec69789269"
 dependencies = [
  "opentelemetry",
 ]
 
+[[package]]
+name = "opentelemetry-stdout"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8bd550321bc0f9d3f6dcbfe5c75262789de5b3e2776da2cbcfd2392aa05db0c6"
+dependencies = [
+ "futures-util",
+ "opentelemetry_api",
+ "opentelemetry_sdk",
+ "ordered-float 3.9.0",
+ "serde",
+ "serde_json",
+]
+
 [[package]]
 name = "opentelemetry-zipkin"
-version = "0.17.0"
+version = "0.18.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e1fd48caee5e1db71454c95be32d1daeb6fae265321ff8f51b1efc8a50b0be80"
+checksum = "eb966f01235207a6933c0aec98374fe9782df1c1d2b3d1db35c458451d138143"
 dependencies = [
  "async-trait",
  "futures-core",
@@ -4219,14 +4435,14 @@ dependencies = [
 
 [[package]]
 name = "opentelemetry_api"
-version = "0.19.0"
+version = "0.20.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ed41783a5bf567688eb38372f2b7a8530f5a607a4b49d38dd7573236c23ca7e2"
+checksum = "8a81f725323db1b1206ca3da8bb19874bbd3f57c3bcd59471bfb04525b265b9b"
 dependencies = [
- "fnv",
  "futures-channel",
  "futures-util",
  "indexmap 1.9.3",
+ "js-sys",
  "once_cell",
  "pin-project-lite",
  "thiserror",
@@ -4235,21 +4451,23 @@ dependencies = [
 
 [[package]]
 name = "opentelemetry_sdk"
-version = "0.19.0"
+version = "0.20.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8b3a2a91fdbfdd4d212c0dcc2ab540de2c2bcbbd90be17de7a7daf8822d010c1"
+checksum = "fa8e705a0612d48139799fcbaba0d4a90f06277153e43dd2bdc16c6f0edd8026"
 dependencies = [
+ "async-std",
  "async-trait",
  "crossbeam-channel",
- "dashmap",
- "fnv",
  "futures-channel",
  "futures-executor",
  "futures-util",
  "once_cell",
  "opentelemetry_api",
+ "ordered-float 3.9.0",
  "percent-encoding",
  "rand 0.8.5",
+ "regex",
+ "serde_json",
  "thiserror",
  "tokio",
  "tokio-stream",
@@ -4497,6 +4715,17 @@ version = "0.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184"
 
+[[package]]
+name = "piper"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "668d31b1c4eba19242f2088b2bf3316b82ca31082a8335764db4e083db7485d4"
+dependencies = [
+ "atomic-waker",
+ "fastrand 2.0.0",
+ "futures-io",
+]
+
 [[package]]
 name = "pkcs1"
 version = "0.4.1"
@@ -4570,6 +4799,22 @@ dependencies = [
  "syn 2.0.29",
 ]
 
+[[package]]
+name = "polling"
+version = "2.8.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4b2d323e8ca7996b3e23126511a523f7e62924d93ecd5ae73b333815b0eb3dce"
+dependencies = [
+ "autocfg",
+ "bitflags 1.3.2",
+ "cfg-if",
+ "concurrent-queue",
+ "libc",
+ "log",
+ "pin-project-lite",
+ "windows-sys 0.48.0",
+]
+
 [[package]]
 name = "polyval"
 version = "0.6.1"
@@ -5022,14 +5267,14 @@ dependencies = [
  "once_cell",
  "percent-encoding",
  "pin-project-lite",
- "rustls 0.21.7",
+ "rustls",
  "rustls-native-certs",
  "rustls-pemfile",
  "serde",
  "serde_json",
  "serde_urlencoded",
  "tokio",
- "tokio-rustls 0.24.1",
+ "tokio-rustls",
  "tokio-util",
  "tower-service",
  "url",
@@ -5323,27 +5568,29 @@ dependencies = [
 
 [[package]]
 name = "rustix"
-version = "0.38.8"
+version = "0.37.23"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "19ed4fa021d81c8392ce04db050a3da9a60299050b7ae1cf482d862b54a7218f"
+checksum = "4d69718bf81c6127a49dc64e44a742e8bb9213c0ff8869a22c308f84c1d4ab06"
 dependencies = [
- "bitflags 2.4.0",
+ "bitflags 1.3.2",
  "errno",
+ "io-lifetimes",
  "libc",
- "linux-raw-sys",
+ "linux-raw-sys 0.3.8",
  "windows-sys 0.48.0",
 ]
 
 [[package]]
-name = "rustls"
-version = "0.20.8"
+name = "rustix"
+version = "0.38.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "fff78fc74d175294f4e83b28343315ffcfb114b156f0185e9741cb5570f50e2f"
+checksum = "19ed4fa021d81c8392ce04db050a3da9a60299050b7ae1cf482d862b54a7218f"
 dependencies = [
- "log",
- "ring",
- "sct",
- "webpki",
+ "bitflags 2.4.0",
+ "errno",
+ "libc",
+ "linux-raw-sys 0.4.5",
+ "windows-sys 0.48.0",
 ]
 
 [[package]]
@@ -6047,7 +6294,7 @@ dependencies = [
  "cfg-if",
  "fastrand 2.0.0",
  "redox_syscall 0.3.5",
- "rustix",
+ "rustix 0.38.8",
  "windows-sys 0.48.0",
 ]
 
@@ -6347,24 +6594,13 @@ dependencies = [
  "syn 2.0.29",
 ]
 
-[[package]]
-name = "tokio-rustls"
-version = "0.23.4"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c43ee83903113e03984cb9e5cebe6c04a5116269e900e3ddba8f068a62adda59"
-dependencies = [
- "rustls 0.20.8",
- "tokio",
- "webpki",
-]
-
 [[package]]
 name = "tokio-rustls"
 version = "0.24.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "c28327cf380ac148141087fbfb9de9d7bd4e84ab5d2c28fbc911d753de8a7081"
 dependencies = [
- "rustls 0.21.7",
+ "rustls",
  "tokio",
 ]
 
@@ -6401,10 +6637,10 @@ checksum = "212d5dcb2a1ce06d81107c3d0ffa3121fe974b73f068c8282cb1c32328113b6c"
 dependencies = [
  "futures-util",
  "log",
- "rustls 0.21.7",
+ "rustls",
  "rustls-native-certs",
  "tokio",
- "tokio-rustls 0.24.1",
+ "tokio-rustls",
  "tungstenite",
 ]
 
@@ -6468,14 +6704,14 @@ dependencies = [
 
 [[package]]
 name = "tonic"
-version = "0.8.3"
+version = "0.9.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8f219fad3b929bef19b1f86fbc0358d35daed8f2cac972037ac0dc10bbb8d5fb"
+checksum = "3082666a3a6433f7f511c7192923fa1fe07c69332d3c6a2e6bb040b569199d5a"
 dependencies = [
  "async-stream",
  "async-trait",
  "axum",
- "base64 0.13.1",
+ "base64 0.21.4",
  "bytes",
  "flate2",
  "futures-core",
@@ -6488,41 +6724,10 @@ dependencies = [
  "percent-encoding",
  "pin-project",
  "prost",
- "prost-derive",
  "rustls-native-certs",
  "rustls-pemfile",
  "tokio",
- "tokio-rustls 0.23.4",
- "tokio-stream",
- "tokio-util",
- "tower",
- "tower-layer",
- "tower-service",
- "tracing",
- "tracing-futures",
-]
-
-[[package]]
-name = "tonic"
-version = "0.9.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "3082666a3a6433f7f511c7192923fa1fe07c69332d3c6a2e6bb040b569199d5a"
-dependencies = [
- "async-trait",
- "axum",
- "base64 0.21.4",
- "bytes",
- "futures-core",
- "futures-util",
- "h2",
- "http",
- "http-body",
- "hyper",
- "hyper-timeout",
- "percent-encoding",
- "pin-project",
- "prost",
- "tokio",
+ "tokio-rustls",
  "tokio-stream",
  "tower",
  "tower-layer",
@@ -6671,12 +6876,14 @@ dependencies = [
 
 [[package]]
 name = "tracing-opentelemetry"
-version = "0.19.0"
+version = "0.21.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "00a39dcf9bfc1742fa4d6215253b33a6e474be78275884c216fc2a06267b3600"
+checksum = "75327c6b667828ddc28f5e3f169036cb793c3f588d83bf0f262a7f062ffed3c8"
 dependencies = [
  "once_cell",
  "opentelemetry",
+ "opentelemetry_sdk",
+ "smallvec",
  "tracing",
  "tracing-core",
  "tracing-log",
@@ -6776,7 +6983,7 @@ dependencies = [
  "httparse",
  "log",
  "rand 0.8.5",
- "rustls 0.21.7",
+ "rustls",
  "sha1 0.10.6",
  "thiserror",
  "url",
@@ -7025,6 +7232,12 @@ version = "0.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "830b7e5d4d90034032940e4ace0d9a9a057e7a45cd94e6c007832e39edb82f6d"
 
+[[package]]
+name = "value-bag"
+version = "1.4.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d92ccd67fb88503048c01b59152a04effd0782d035a83a6d256ce6085f08f4a3"
+
 [[package]]
 name = "vcpkg"
 version = "0.2.15"
@@ -7181,16 +7394,6 @@ dependencies = [
  "wasm-bindgen",
 ]
 
-[[package]]
-name = "webpki"
-version = "0.22.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f0e74f82d49d545ad128049b7e88f6576df2da6b02e9ce565c6f533be576957e"
-dependencies = [
- "ring",
- "untrusted",
-]
-
 [[package]]
 name = "webpki-roots"
 version = "0.25.2"