Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Unify resource handling #4034

Merged
merged 3 commits into from
Oct 17, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions .changesets/fix_bryn_unify_resources.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
### Bring Otel `service.name` into line with the Otel spec ([PR #4034](https://github.com/apollographql/router/pull/4034))

Handling of Otel `service.name` has been brought into line with the [Otel spec](https://opentelemetry.io/docs/concepts/sdk-configuration/general-sdk-configuration/#otel_service_name) across traces and metrics.

Service name discovery is handled in the following order:
1. `OTEL_SERVICE_NAME` env
2. `OTEL_RESOURCE_ATTRIBUTES` env
3. `router.yaml` `service_name`
4. `router.yaml` `resources` (attributes)

If none of the above are found then the service name will be set to `unknown_service:apollo_router` or `unknown_service` if the executable name cannot be determined.

Users who have not explicitly configured their service name should do so either via the yaml config file or via the `OTEL_SERVICE_NAME` environment variable.

By [@BrynCooke](https://github.com/BrynCooke) in https://github.com/apollographql/router/pull/4034
Original file line number Diff line number Diff line change
Expand Up @@ -4427,11 +4427,65 @@ expression: "&schema"
"additionalProperties": false
},
"resources": {
"description": "Resources",
"description": "Otel configuration via resource",
"default": {},
"type": "object",
"additionalProperties": {
"type": "string"
"anyOf": [
{
"description": "bool values",
"type": "boolean"
},
{
"description": "i64 values",
"type": "integer",
"format": "int64"
},
{
"description": "f64 values",
"type": "number",
"format": "double"
},
{
"description": "String values",
"type": "string"
},
{
"description": "Array of homogeneous values",
"anyOf": [
{
"description": "Array of bools",
"type": "array",
"items": {
"type": "boolean"
}
},
{
"description": "Array of integers",
"type": "array",
"items": {
"type": "integer",
"format": "int64"
}
},
{
"description": "Array of floats",
"type": "array",
"items": {
"type": "number",
"format": "double"
}
},
{
"description": "Array of strings",
"type": "array",
"items": {
"type": "string"
}
}
]
}
]
}
},
"service_name": {
Expand Down Expand Up @@ -5081,7 +5135,7 @@ expression: "&schema"
"type": "object",
"properties": {
"attributes": {
"description": "Default attributes",
"description": "The resources configured on the tracing pipeline",
"default": {},
"type": "object",
"additionalProperties": {
Expand Down Expand Up @@ -5212,13 +5266,15 @@ expression: "&schema"
},
"service_name": {
"description": "The trace service name",
"default": "router",
"type": "string"
"default": null,
"type": "string",
"nullable": true
},
"service_namespace": {
"description": "The trace service namespace",
"default": "",
"type": "string"
"default": null,
"type": "string",
"nullable": true
}
},
"additionalProperties": false
Expand Down
129 changes: 33 additions & 96 deletions apollo-router/src/plugins/telemetry/config.rs
Original file line number Diff line number Diff line change
@@ -1,15 +1,10 @@
//! Configuration for the telemetry plugin.
use std::collections::BTreeMap;
use std::env;
use std::io::IsTerminal;

use axum::headers::HeaderName;
use opentelemetry::sdk::resource::EnvResourceDetector;
use opentelemetry::sdk::resource::ResourceDetector;
use opentelemetry::sdk::trace::SpanLimits;
use opentelemetry::sdk::Resource;
use opentelemetry::Array;
use opentelemetry::KeyValue;
use opentelemetry::Value;
use regex::Regex;
use schemars::JsonSchema;
Expand All @@ -22,6 +17,7 @@ use crate::configuration::ConfigurationError;
use crate::plugin::serde::deserialize_option_header_name;
use crate::plugin::serde::deserialize_regex;
use crate::plugins::telemetry::metrics;
use crate::plugins::telemetry::resource::ConfigResource;

#[derive(thiserror::Error, Debug)]
pub(crate) enum Error {
Expand Down Expand Up @@ -89,8 +85,8 @@ pub(crate) struct MetricsCommon {
pub(crate) service_name: Option<String>,
/// Set a service.namespace attribute in your metrics
pub(crate) service_namespace: Option<String>,
/// Resources
pub(crate) resources: HashMap<String, String>,
/// Otel configuration via resource
pub(crate) resources: BTreeMap<String, AttributeValue>,
/// Custom buckets for histograms
pub(crate) buckets: Vec<f64>,
/// Experimental metrics to know more about caching strategies
Expand Down Expand Up @@ -123,7 +119,7 @@ impl Default for MetricsCommon {
attributes: Default::default(),
service_name: None,
service_namespace: None,
resources: HashMap::new(),
resources: BTreeMap::new(),
buckets: vec![
0.001, 0.005, 0.015, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 1.0, 5.0, 10.0,
],
Expand Down Expand Up @@ -349,9 +345,9 @@ pub(crate) struct RequestPropagation {
#[non_exhaustive]
pub(crate) struct Trace {
/// The trace service name
pub(crate) service_name: String,
pub(crate) service_name: Option<String>,
/// The trace service namespace
pub(crate) service_namespace: String,
pub(crate) service_namespace: Option<String>,
/// The sampler, always_on, always_off or a decimal between 0.0 and 1.0
pub(crate) sampler: SamplerOption,
/// Whether to use parent based sampling
Expand All @@ -366,10 +362,34 @@ pub(crate) struct Trace {
pub(crate) max_attributes_per_event: u32,
/// The maximum attributes per link before discarding
pub(crate) max_attributes_per_link: u32,
/// Default attributes
/// The resources configured on the tracing pipeline
pub(crate) attributes: BTreeMap<String, AttributeValue>,
}

impl ConfigResource for Trace {
fn service_name(&self) -> Option<String> {
self.service_name.clone()
}
fn service_namespace(&self) -> Option<String> {
self.service_namespace.clone()
}
fn resource(&self) -> &BTreeMap<String, AttributeValue> {
&self.attributes
}
}

impl ConfigResource for MetricsCommon {
fn service_name(&self) -> Option<String> {
self.service_name.clone()
}
fn service_namespace(&self) -> Option<String> {
self.service_namespace.clone()
}
fn resource(&self) -> &BTreeMap<String, AttributeValue> {
&self.resources
}
}

fn default_parent_based_sampler() -> bool {
true
}
Expand All @@ -381,7 +401,7 @@ fn default_sampler() -> SamplerOption {
impl Default for Trace {
fn default() -> Self {
Self {
service_name: "router".to_string(),
service_name: Default::default(),
service_namespace: Default::default(),
sampler: default_sampler(),
parent_based_sampler: default_parent_based_sampler(),
Expand Down Expand Up @@ -563,58 +583,8 @@ impl From<&Trace> for opentelemetry::sdk::trace::Config {
trace_config = trace_config.with_max_attributes_per_event(config.max_attributes_per_event);
trace_config = trace_config.with_max_attributes_per_link(config.max_attributes_per_link);

let mut resource_defaults = vec![];
resource_defaults.push(KeyValue::new(
opentelemetry_semantic_conventions::resource::SERVICE_NAME,
config.service_name.clone(),
));
resource_defaults.push(KeyValue::new(
opentelemetry_semantic_conventions::resource::SERVICE_NAMESPACE,
config.service_namespace.clone(),
));
resource_defaults.push(KeyValue::new(
opentelemetry_semantic_conventions::resource::SERVICE_VERSION,
std::env!("CARGO_PKG_VERSION"),
));

if let Some(executable_name) = std::env::current_exe().ok().and_then(|path| {
path.file_name()
.and_then(|p| p.to_str().map(|s| s.to_string()))
}) {
resource_defaults.push(KeyValue::new(
opentelemetry_semantic_conventions::resource::PROCESS_EXECUTABLE_NAME,
executable_name,
));
}

// Take the default first, then config, then env resources, then env variable. Last entry wins
let resource = Resource::new(resource_defaults)
.merge(&Resource::new(
config
.attributes
.iter()
.map(|(k, v)| {
KeyValue::new(
opentelemetry::Key::from(k.clone()),
opentelemetry::Value::from(v.clone()),
)
})
.collect::<Vec<KeyValue>>(),
))
.merge(&EnvResourceDetector::new().detect(Duration::from_secs(0)))
.merge(&Resource::new(
env::var("OTEL_SERVICE_NAME")
.ok()
.map(|v| {
vec![KeyValue::new(
opentelemetry_semantic_conventions::resource::SERVICE_NAME,
v,
)]
})
.unwrap_or_default(),
));

trace_config = trace_config.with_resource(resource);
trace_config = trace_config.with_resource(config.to_resource());
trace_config
}
}
Expand Down Expand Up @@ -679,8 +649,6 @@ impl Conf {

#[cfg(test)]
mod tests {
use opentelemetry::sdk::trace::Config;
use opentelemetry_semantic_conventions::resource::SERVICE_NAME;
use serde_json::json;

use super::*;
Expand Down Expand Up @@ -837,35 +805,4 @@ mod tests {
AttributeValue::try_from(json!([1.1, true])).expect_err("mixed conversion must fail");
AttributeValue::try_from(json!([true, "bar"])).expect_err("mixed conversion must fail");
}

#[test]
fn test_service_name() {
let router_config = Trace {
service_name: "foo".to_string(),
..Default::default()
};
let otel_config: Config = (&router_config).into();
assert_eq!(
Some(Value::String("foo".into())),
otel_config.resource.get(SERVICE_NAME)
);

// Env should take precedence
env::set_var("OTEL_RESOURCE_ATTRIBUTES", "service.name=bar");
let otel_config: Config = (&router_config).into();
assert_eq!(
Some(Value::String("bar".into())),
otel_config.resource.get(SERVICE_NAME)
);

// Env should take precedence
env::set_var("OTEL_SERVICE_NAME", "bif");
let otel_config: Config = (&router_config).into();
assert_eq!(
Some(Value::String("bif".into())),
otel_config.resource.get(SERVICE_NAME)
);
env::remove_var("OTEL_SERVICE_NAME");
env::remove_var("OTEL_RESOURCE_ATTRIBUTES");
}
}
34 changes: 2 additions & 32 deletions apollo-router/src/plugins/telemetry/metrics/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ use opentelemetry::sdk::metrics::reader::AggregationSelector;
use opentelemetry::sdk::metrics::Aggregation;
use opentelemetry::sdk::metrics::InstrumentKind;
use opentelemetry::sdk::resource::ResourceDetector;
use opentelemetry::sdk::resource::SdkProvidedResourceDetector;
use opentelemetry::sdk::Resource;
use opentelemetry_api::KeyValue;
use regex::Regex;
Expand All @@ -29,6 +28,7 @@ use crate::plugins::telemetry::apollo_exporter::Sender;
use crate::plugins::telemetry::config::AttributeValue;
use crate::plugins::telemetry::config::Conf;
use crate::plugins::telemetry::config::MetricsCommon;
use crate::plugins::telemetry::resource::ConfigResource;
use crate::router_factory::Endpoint;
use crate::Context;
use crate::ListenAddr;
Expand All @@ -37,7 +37,6 @@ pub(crate) mod apollo;
pub(crate) mod otlp;
pub(crate) mod prometheus;
pub(crate) mod span_metrics_exporter;
static UNKNOWN_SERVICE: &str = "unknown_service";

#[derive(Debug, Clone, Deserialize, JsonSchema, Default)]
#[serde(deny_unknown_fields, default)]
Expand Down Expand Up @@ -475,36 +474,7 @@ impl ResourceDetector for ConfigResourceDetector {

impl MetricsBuilder {
pub(crate) fn new(config: &Conf) -> Self {
let metrics_common_config = &config.metrics.common;

let mut resource = Resource::from_detectors(
Duration::from_secs(0),
vec![
Box::new(ConfigResourceDetector(metrics_common_config.clone())),
Box::new(SdkProvidedResourceDetector),
Box::new(opentelemetry::sdk::resource::EnvResourceDetector::new()),
],
);

// Otel resources can be initialized from env variables, there is an override mechanism, but it's broken for service name as it will always override service.name
// If the service name is set to unknown service then override it from the config
if resource.get(opentelemetry_semantic_conventions::resource::SERVICE_NAME)
== Some(UNKNOWN_SERVICE.into())
{
if let Some(service_name) = Resource::from_detectors(
Duration::from_secs(0),
vec![Box::new(ConfigResourceDetector(
metrics_common_config.clone(),
))],
)
.get(opentelemetry_semantic_conventions::resource::SERVICE_NAME)
{
resource = resource.merge(&mut Resource::new(vec![KeyValue::new(
opentelemetry_semantic_conventions::resource::SERVICE_NAME,
service_name,
)]));
}
}
let resource = config.metrics.common.to_resource();

Self {
resource: resource.clone(),
Expand Down
1 change: 1 addition & 0 deletions apollo-router/src/plugins/telemetry/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -129,6 +129,7 @@ pub(crate) mod formatters;
pub(crate) mod metrics;
mod otlp;
pub(crate) mod reload;
mod resource;
pub(crate) mod tracing;
pub(crate) mod utils;

Expand Down
Loading