Skip to content

Commit

Permalink
Unify resource handling
Browse files Browse the repository at this point in the history
The telemetry code had separate logic for constructing resources to pass to tracing and metrics exporters. This has now been unified and tests added.

As a consequence, the default service.name for the router in tracing has now been brought into line with the otel spec, where if unspecified the value will be `unknown_service`.
  • Loading branch information
bryn committed Oct 13, 2023
1 parent 0f656f4 commit 541cd67
Show file tree
Hide file tree
Showing 10 changed files with 524 additions and 180 deletions.
15 changes: 15 additions & 0 deletions .changesets/fix_bryn_unify_resources.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
### Bring Utel `service.name` into line with the Otel spec ([PR #4034](https://github.com/apollographql/router/pull/4034))

Handling of Otel `service.name` has been brought into line with the [Otel spec](https://opentelemetry.io/docs/concepts/sdk-configuration/general-sdk-configuration/#otel_service_name) across traces and metrics.

Service name discovery is handled in the following order:
1. `OTEL_SERVICE_NAME` env
2. `OTEL_RESOURCE_ATTRIBUTES` env
3. `router.yaml` `service_name`
4. `router.yaml` `resources` (attributes)

If none of the above ar found then the service name will be set to `unknown_service:apollo_router` or `unknown_service` if the executable name cannot be determined.

Users who have not explicitly configured their service name should do so either via the yaml config file or via the `OTEL_SERVICE_NAME` environment variable.

By [@BrynCooke](https://github.com/BrynCooke) in https://github.com/apollographql/router/pull/4034
Original file line number Diff line number Diff line change
Expand Up @@ -4427,11 +4427,65 @@ expression: "&schema"
"additionalProperties": false
},
"resources": {
"description": "Resources",
"description": "Otel configuration via resource",
"default": {},
"type": "object",
"additionalProperties": {
"type": "string"
"anyOf": [
{
"description": "bool values",
"type": "boolean"
},
{
"description": "i64 values",
"type": "integer",
"format": "int64"
},
{
"description": "f64 values",
"type": "number",
"format": "double"
},
{
"description": "String values",
"type": "string"
},
{
"description": "Array of homogeneous values",
"anyOf": [
{
"description": "Array of bools",
"type": "array",
"items": {
"type": "boolean"
}
},
{
"description": "Array of integers",
"type": "array",
"items": {
"type": "integer",
"format": "int64"
}
},
{
"description": "Array of floats",
"type": "array",
"items": {
"type": "number",
"format": "double"
}
},
{
"description": "Array of strings",
"type": "array",
"items": {
"type": "string"
}
}
]
}
]
}
},
"service_name": {
Expand Down Expand Up @@ -5081,7 +5135,7 @@ expression: "&schema"
"type": "object",
"properties": {
"attributes": {
"description": "Default attributes",
"description": "The resources configured on the tracing pipeline",
"default": {},
"type": "object",
"additionalProperties": {
Expand Down Expand Up @@ -5212,13 +5266,15 @@ expression: "&schema"
},
"service_name": {
"description": "The trace service name",
"default": "router",
"type": "string"
"default": null,
"type": "string",
"nullable": true
},
"service_namespace": {
"description": "The trace service namespace",
"default": "",
"type": "string"
"default": null,
"type": "string",
"nullable": true
}
},
"additionalProperties": false
Expand Down
129 changes: 33 additions & 96 deletions apollo-router/src/plugins/telemetry/config.rs
Original file line number Diff line number Diff line change
@@ -1,15 +1,10 @@
//! Configuration for the telemetry plugin.
use std::collections::BTreeMap;
use std::env;
use std::io::IsTerminal;

use axum::headers::HeaderName;
use opentelemetry::sdk::resource::EnvResourceDetector;
use opentelemetry::sdk::resource::ResourceDetector;
use opentelemetry::sdk::trace::SpanLimits;
use opentelemetry::sdk::Resource;
use opentelemetry::Array;
use opentelemetry::KeyValue;
use opentelemetry::Value;
use regex::Regex;
use schemars::JsonSchema;
Expand All @@ -22,6 +17,7 @@ use crate::configuration::ConfigurationError;
use crate::plugin::serde::deserialize_option_header_name;
use crate::plugin::serde::deserialize_regex;
use crate::plugins::telemetry::metrics;
use crate::plugins::telemetry::resource::ConfigResource;

#[derive(thiserror::Error, Debug)]
pub(crate) enum Error {
Expand Down Expand Up @@ -89,8 +85,8 @@ pub(crate) struct MetricsCommon {
pub(crate) service_name: Option<String>,
/// Set a service.namespace attribute in your metrics
pub(crate) service_namespace: Option<String>,
/// Resources
pub(crate) resources: HashMap<String, String>,
/// Otel configuration via resource
pub(crate) resources: BTreeMap<String, AttributeValue>,
/// Custom buckets for histograms
pub(crate) buckets: Vec<f64>,
/// Experimental metrics to know more about caching strategies
Expand Down Expand Up @@ -123,7 +119,7 @@ impl Default for MetricsCommon {
attributes: Default::default(),
service_name: None,
service_namespace: None,
resources: HashMap::new(),
resources: BTreeMap::new(),
buckets: vec![
0.001, 0.005, 0.015, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 1.0, 5.0, 10.0,
],
Expand Down Expand Up @@ -349,9 +345,9 @@ pub(crate) struct RequestPropagation {
#[non_exhaustive]
pub(crate) struct Trace {
/// The trace service name
pub(crate) service_name: String,
pub(crate) service_name: Option<String>,
/// The trace service namespace
pub(crate) service_namespace: String,
pub(crate) service_namespace: Option<String>,
/// The sampler, always_on, always_off or a decimal between 0.0 and 1.0
pub(crate) sampler: SamplerOption,
/// Whether to use parent based sampling
Expand All @@ -366,10 +362,34 @@ pub(crate) struct Trace {
pub(crate) max_attributes_per_event: u32,
/// The maximum attributes per link before discarding
pub(crate) max_attributes_per_link: u32,
/// Default attributes
/// The resources configured on the tracing pipeline
pub(crate) attributes: BTreeMap<String, AttributeValue>,
}

impl ConfigResource for Trace {
fn service_name(&self) -> Option<String> {
self.service_name.clone()
}
fn service_namespace(&self) -> Option<String> {
self.service_namespace.clone()
}
fn resource(&self) -> &BTreeMap<String, AttributeValue> {
&self.attributes
}
}

impl ConfigResource for MetricsCommon {
fn service_name(&self) -> Option<String> {
self.service_name.clone()
}
fn service_namespace(&self) -> Option<String> {
self.service_namespace.clone()
}
fn resource(&self) -> &BTreeMap<String, AttributeValue> {
&self.resources
}
}

fn default_parent_based_sampler() -> bool {
true
}
Expand All @@ -381,7 +401,7 @@ fn default_sampler() -> SamplerOption {
impl Default for Trace {
fn default() -> Self {
Self {
service_name: "router".to_string(),
service_name: Default::default(),
service_namespace: Default::default(),
sampler: default_sampler(),
parent_based_sampler: default_parent_based_sampler(),
Expand Down Expand Up @@ -563,58 +583,8 @@ impl From<&Trace> for opentelemetry::sdk::trace::Config {
trace_config = trace_config.with_max_attributes_per_event(config.max_attributes_per_event);
trace_config = trace_config.with_max_attributes_per_link(config.max_attributes_per_link);

let mut resource_defaults = vec![];
resource_defaults.push(KeyValue::new(
opentelemetry_semantic_conventions::resource::SERVICE_NAME,
config.service_name.clone(),
));
resource_defaults.push(KeyValue::new(
opentelemetry_semantic_conventions::resource::SERVICE_NAMESPACE,
config.service_namespace.clone(),
));
resource_defaults.push(KeyValue::new(
opentelemetry_semantic_conventions::resource::SERVICE_VERSION,
std::env!("CARGO_PKG_VERSION"),
));

if let Some(executable_name) = std::env::current_exe().ok().and_then(|path| {
path.file_name()
.and_then(|p| p.to_str().map(|s| s.to_string()))
}) {
resource_defaults.push(KeyValue::new(
opentelemetry_semantic_conventions::resource::PROCESS_EXECUTABLE_NAME,
executable_name,
));
}

// Take the default first, then config, then env resources, then env variable. Last entry wins
let resource = Resource::new(resource_defaults)
.merge(&Resource::new(
config
.attributes
.iter()
.map(|(k, v)| {
KeyValue::new(
opentelemetry::Key::from(k.clone()),
opentelemetry::Value::from(v.clone()),
)
})
.collect::<Vec<KeyValue>>(),
))
.merge(&EnvResourceDetector::new().detect(Duration::from_secs(0)))
.merge(&Resource::new(
env::var("OTEL_SERVICE_NAME")
.ok()
.map(|v| {
vec![KeyValue::new(
opentelemetry_semantic_conventions::resource::SERVICE_NAME,
v,
)]
})
.unwrap_or_default(),
));

trace_config = trace_config.with_resource(resource);
trace_config = trace_config.with_resource(config.to_resource());
trace_config
}
}
Expand Down Expand Up @@ -679,8 +649,6 @@ impl Conf {

#[cfg(test)]
mod tests {
use opentelemetry::sdk::trace::Config;
use opentelemetry_semantic_conventions::resource::SERVICE_NAME;
use serde_json::json;

use super::*;
Expand Down Expand Up @@ -837,35 +805,4 @@ mod tests {
AttributeValue::try_from(json!([1.1, true])).expect_err("mixed conversion must fail");
AttributeValue::try_from(json!([true, "bar"])).expect_err("mixed conversion must fail");
}

#[test]
fn test_service_name() {
let router_config = Trace {
service_name: "foo".to_string(),
..Default::default()
};
let otel_config: Config = (&router_config).into();
assert_eq!(
Some(Value::String("foo".into())),
otel_config.resource.get(SERVICE_NAME)
);

// Env should take precedence
env::set_var("OTEL_RESOURCE_ATTRIBUTES", "service.name=bar");
let otel_config: Config = (&router_config).into();
assert_eq!(
Some(Value::String("bar".into())),
otel_config.resource.get(SERVICE_NAME)
);

// Env should take precedence
env::set_var("OTEL_SERVICE_NAME", "bif");
let otel_config: Config = (&router_config).into();
assert_eq!(
Some(Value::String("bif".into())),
otel_config.resource.get(SERVICE_NAME)
);
env::remove_var("OTEL_SERVICE_NAME");
env::remove_var("OTEL_RESOURCE_ATTRIBUTES");
}
}
34 changes: 2 additions & 32 deletions apollo-router/src/plugins/telemetry/metrics/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ use opentelemetry::sdk::metrics::reader::AggregationSelector;
use opentelemetry::sdk::metrics::Aggregation;
use opentelemetry::sdk::metrics::InstrumentKind;
use opentelemetry::sdk::resource::ResourceDetector;
use opentelemetry::sdk::resource::SdkProvidedResourceDetector;
use opentelemetry::sdk::Resource;
use opentelemetry_api::KeyValue;
use regex::Regex;
Expand All @@ -29,6 +28,7 @@ use crate::plugins::telemetry::apollo_exporter::Sender;
use crate::plugins::telemetry::config::AttributeValue;
use crate::plugins::telemetry::config::Conf;
use crate::plugins::telemetry::config::MetricsCommon;
use crate::plugins::telemetry::resource::ConfigResource;
use crate::router_factory::Endpoint;
use crate::Context;
use crate::ListenAddr;
Expand All @@ -37,7 +37,6 @@ pub(crate) mod apollo;
pub(crate) mod otlp;
pub(crate) mod prometheus;
pub(crate) mod span_metrics_exporter;
static UNKNOWN_SERVICE: &str = "unknown_service";

#[derive(Debug, Clone, Deserialize, JsonSchema, Default)]
#[serde(deny_unknown_fields, default)]
Expand Down Expand Up @@ -475,36 +474,7 @@ impl ResourceDetector for ConfigResourceDetector {

impl MetricsBuilder {
pub(crate) fn new(config: &Conf) -> Self {
let metrics_common_config = &config.metrics.common;

let mut resource = Resource::from_detectors(
Duration::from_secs(0),
vec![
Box::new(ConfigResourceDetector(metrics_common_config.clone())),
Box::new(SdkProvidedResourceDetector),
Box::new(opentelemetry::sdk::resource::EnvResourceDetector::new()),
],
);

// Otel resources can be initialized from env variables, there is an override mechanism, but it's broken for service name as it will always override service.name
// If the service name is set to unknown service then override it from the config
if resource.get(opentelemetry_semantic_conventions::resource::SERVICE_NAME)
== Some(UNKNOWN_SERVICE.into())
{
if let Some(service_name) = Resource::from_detectors(
Duration::from_secs(0),
vec![Box::new(ConfigResourceDetector(
metrics_common_config.clone(),
))],
)
.get(opentelemetry_semantic_conventions::resource::SERVICE_NAME)
{
resource = resource.merge(&mut Resource::new(vec![KeyValue::new(
opentelemetry_semantic_conventions::resource::SERVICE_NAME,
service_name,
)]));
}
}
let resource = config.metrics.common.to_resource();

Self {
resource: resource.clone(),
Expand Down
1 change: 1 addition & 0 deletions apollo-router/src/plugins/telemetry/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,7 @@ pub(crate) mod formatters;
pub(crate) mod metrics;
mod otlp;
pub(crate) mod reload;
mod resource;
pub(crate) mod tracing;
pub(crate) mod utils;

Expand Down
Loading

0 comments on commit 541cd67

Please sign in to comment.