Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/dev' into simon/compiler-v1.0
Browse files Browse the repository at this point in the history
  • Loading branch information
SimonSapin committed Oct 17, 2023
2 parents 61bf9b9 + d949232 commit 4908f85
Show file tree
Hide file tree
Showing 11 changed files with 630 additions and 204 deletions.
15 changes: 15 additions & 0 deletions .changesets/fix_bryn_unify_resources.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
### Bring Otel `service.name` into line with the Otel spec ([PR #4034](https://github.com/apollographql/router/pull/4034))

Handling of Otel `service.name` has been brought into line with the [Otel spec](https://opentelemetry.io/docs/concepts/sdk-configuration/general-sdk-configuration/#otel_service_name) across traces and metrics.

Service name discovery is handled in the following order:
1. `OTEL_SERVICE_NAME` env
2. `OTEL_RESOURCE_ATTRIBUTES` env
3. `router.yaml` `service_name`
4. `router.yaml` `resources` (attributes)

If none of the above are found then the service name will be set to `unknown_service:apollo_router` or `unknown_service` if the executable name cannot be determined.

Users who have not explicitly configured their service name should do so either via the yaml config file or via the `OTEL_SERVICE_NAME` environment variable.

By [@BrynCooke](https://github.com/BrynCooke) in https://github.com/apollographql/router/pull/4034
Original file line number Diff line number Diff line change
Expand Up @@ -4427,11 +4427,65 @@ expression: "&schema"
"additionalProperties": false
},
"resources": {
"description": "Resources",
"description": "Otel configuration via resource",
"default": {},
"type": "object",
"additionalProperties": {
"type": "string"
"anyOf": [
{
"description": "bool values",
"type": "boolean"
},
{
"description": "i64 values",
"type": "integer",
"format": "int64"
},
{
"description": "f64 values",
"type": "number",
"format": "double"
},
{
"description": "String values",
"type": "string"
},
{
"description": "Array of homogeneous values",
"anyOf": [
{
"description": "Array of bools",
"type": "array",
"items": {
"type": "boolean"
}
},
{
"description": "Array of integers",
"type": "array",
"items": {
"type": "integer",
"format": "int64"
}
},
{
"description": "Array of floats",
"type": "array",
"items": {
"type": "number",
"format": "double"
}
},
{
"description": "Array of strings",
"type": "array",
"items": {
"type": "string"
}
}
]
}
]
}
},
"service_name": {
Expand Down Expand Up @@ -5081,7 +5135,7 @@ expression: "&schema"
"type": "object",
"properties": {
"attributes": {
"description": "Default attributes",
"description": "The resources configured on the tracing pipeline",
"default": {},
"type": "object",
"additionalProperties": {
Expand Down Expand Up @@ -5212,13 +5266,15 @@ expression: "&schema"
},
"service_name": {
"description": "The trace service name",
"default": "router",
"type": "string"
"default": null,
"type": "string",
"nullable": true
},
"service_namespace": {
"description": "The trace service namespace",
"default": "",
"type": "string"
"default": null,
"type": "string",
"nullable": true
}
},
"additionalProperties": false
Expand Down
3 changes: 3 additions & 0 deletions apollo-router/src/configuration/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -393,8 +393,11 @@ cors:

#[test]
fn validate_project_config_files() {
std::env::set_var("DATADOG_AGENT_HOST", "http://example.com");
std::env::set_var("JAEGER_HOST", "http://example.com");
std::env::set_var("JAEGER_USERNAME", "username");
std::env::set_var("JAEGER_PASSWORD", "pass");
std::env::set_var("ZIPKIN_HOST", "http://example.com");
std::env::set_var("TEST_CONFIG_ENDPOINT", "http://example.com");
std::env::set_var("TEST_CONFIG_COLLECTOR_ENDPOINT", "http://example.com");

Expand Down
129 changes: 33 additions & 96 deletions apollo-router/src/plugins/telemetry/config.rs
Original file line number Diff line number Diff line change
@@ -1,15 +1,10 @@
//! Configuration for the telemetry plugin.
use std::collections::BTreeMap;
use std::env;
use std::io::IsTerminal;

use axum::headers::HeaderName;
use opentelemetry::sdk::resource::EnvResourceDetector;
use opentelemetry::sdk::resource::ResourceDetector;
use opentelemetry::sdk::trace::SpanLimits;
use opentelemetry::sdk::Resource;
use opentelemetry::Array;
use opentelemetry::KeyValue;
use opentelemetry::Value;
use regex::Regex;
use schemars::JsonSchema;
Expand All @@ -22,6 +17,7 @@ use crate::configuration::ConfigurationError;
use crate::plugin::serde::deserialize_option_header_name;
use crate::plugin::serde::deserialize_regex;
use crate::plugins::telemetry::metrics;
use crate::plugins::telemetry::resource::ConfigResource;

#[derive(thiserror::Error, Debug)]
pub(crate) enum Error {
Expand Down Expand Up @@ -89,8 +85,8 @@ pub(crate) struct MetricsCommon {
pub(crate) service_name: Option<String>,
/// Set a service.namespace attribute in your metrics
pub(crate) service_namespace: Option<String>,
/// Resources
pub(crate) resources: HashMap<String, String>,
/// Otel configuration via resource
pub(crate) resources: BTreeMap<String, AttributeValue>,
/// Custom buckets for histograms
pub(crate) buckets: Vec<f64>,
/// Experimental metrics to know more about caching strategies
Expand Down Expand Up @@ -123,7 +119,7 @@ impl Default for MetricsCommon {
attributes: Default::default(),
service_name: None,
service_namespace: None,
resources: HashMap::new(),
resources: BTreeMap::new(),
buckets: vec![
0.001, 0.005, 0.015, 0.05, 0.1, 0.2, 0.3, 0.4, 0.5, 1.0, 5.0, 10.0,
],
Expand Down Expand Up @@ -349,9 +345,9 @@ pub(crate) struct RequestPropagation {
#[non_exhaustive]
pub(crate) struct Trace {
/// The trace service name
pub(crate) service_name: String,
pub(crate) service_name: Option<String>,
/// The trace service namespace
pub(crate) service_namespace: String,
pub(crate) service_namespace: Option<String>,
/// The sampler, always_on, always_off or a decimal between 0.0 and 1.0
pub(crate) sampler: SamplerOption,
/// Whether to use parent based sampling
Expand All @@ -366,10 +362,34 @@ pub(crate) struct Trace {
pub(crate) max_attributes_per_event: u32,
/// The maximum attributes per link before discarding
pub(crate) max_attributes_per_link: u32,
/// Default attributes
/// The resources configured on the tracing pipeline
pub(crate) attributes: BTreeMap<String, AttributeValue>,
}

impl ConfigResource for Trace {
fn service_name(&self) -> Option<String> {
self.service_name.clone()
}
fn service_namespace(&self) -> Option<String> {
self.service_namespace.clone()
}
fn resource(&self) -> &BTreeMap<String, AttributeValue> {
&self.attributes
}
}

impl ConfigResource for MetricsCommon {
fn service_name(&self) -> Option<String> {
self.service_name.clone()
}
fn service_namespace(&self) -> Option<String> {
self.service_namespace.clone()
}
fn resource(&self) -> &BTreeMap<String, AttributeValue> {
&self.resources
}
}

fn default_parent_based_sampler() -> bool {
true
}
Expand All @@ -381,7 +401,7 @@ fn default_sampler() -> SamplerOption {
impl Default for Trace {
fn default() -> Self {
Self {
service_name: "router".to_string(),
service_name: Default::default(),
service_namespace: Default::default(),
sampler: default_sampler(),
parent_based_sampler: default_parent_based_sampler(),
Expand Down Expand Up @@ -563,58 +583,8 @@ impl From<&Trace> for opentelemetry::sdk::trace::Config {
trace_config = trace_config.with_max_attributes_per_event(config.max_attributes_per_event);
trace_config = trace_config.with_max_attributes_per_link(config.max_attributes_per_link);

let mut resource_defaults = vec![];
resource_defaults.push(KeyValue::new(
opentelemetry_semantic_conventions::resource::SERVICE_NAME,
config.service_name.clone(),
));
resource_defaults.push(KeyValue::new(
opentelemetry_semantic_conventions::resource::SERVICE_NAMESPACE,
config.service_namespace.clone(),
));
resource_defaults.push(KeyValue::new(
opentelemetry_semantic_conventions::resource::SERVICE_VERSION,
std::env!("CARGO_PKG_VERSION"),
));

if let Some(executable_name) = std::env::current_exe().ok().and_then(|path| {
path.file_name()
.and_then(|p| p.to_str().map(|s| s.to_string()))
}) {
resource_defaults.push(KeyValue::new(
opentelemetry_semantic_conventions::resource::PROCESS_EXECUTABLE_NAME,
executable_name,
));
}

// Take the default first, then config, then env resources, then env variable. Last entry wins
let resource = Resource::new(resource_defaults)
.merge(&Resource::new(
config
.attributes
.iter()
.map(|(k, v)| {
KeyValue::new(
opentelemetry::Key::from(k.clone()),
opentelemetry::Value::from(v.clone()),
)
})
.collect::<Vec<KeyValue>>(),
))
.merge(&EnvResourceDetector::new().detect(Duration::from_secs(0)))
.merge(&Resource::new(
env::var("OTEL_SERVICE_NAME")
.ok()
.map(|v| {
vec![KeyValue::new(
opentelemetry_semantic_conventions::resource::SERVICE_NAME,
v,
)]
})
.unwrap_or_default(),
));

trace_config = trace_config.with_resource(resource);
trace_config = trace_config.with_resource(config.to_resource());
trace_config
}
}
Expand Down Expand Up @@ -679,8 +649,6 @@ impl Conf {

#[cfg(test)]
mod tests {
use opentelemetry::sdk::trace::Config;
use opentelemetry_semantic_conventions::resource::SERVICE_NAME;
use serde_json::json;

use super::*;
Expand Down Expand Up @@ -837,35 +805,4 @@ mod tests {
AttributeValue::try_from(json!([1.1, true])).expect_err("mixed conversion must fail");
AttributeValue::try_from(json!([true, "bar"])).expect_err("mixed conversion must fail");
}

#[test]
fn test_service_name() {
let router_config = Trace {
service_name: "foo".to_string(),
..Default::default()
};
let otel_config: Config = (&router_config).into();
assert_eq!(
Some(Value::String("foo".into())),
otel_config.resource.get(SERVICE_NAME)
);

// Env should take precedence
env::set_var("OTEL_RESOURCE_ATTRIBUTES", "service.name=bar");
let otel_config: Config = (&router_config).into();
assert_eq!(
Some(Value::String("bar".into())),
otel_config.resource.get(SERVICE_NAME)
);

// Env should take precedence
env::set_var("OTEL_SERVICE_NAME", "bif");
let otel_config: Config = (&router_config).into();
assert_eq!(
Some(Value::String("bif".into())),
otel_config.resource.get(SERVICE_NAME)
);
env::remove_var("OTEL_SERVICE_NAME");
env::remove_var("OTEL_RESOURCE_ATTRIBUTES");
}
}
34 changes: 2 additions & 32 deletions apollo-router/src/plugins/telemetry/metrics/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ use opentelemetry::sdk::metrics::reader::AggregationSelector;
use opentelemetry::sdk::metrics::Aggregation;
use opentelemetry::sdk::metrics::InstrumentKind;
use opentelemetry::sdk::resource::ResourceDetector;
use opentelemetry::sdk::resource::SdkProvidedResourceDetector;
use opentelemetry::sdk::Resource;
use opentelemetry_api::KeyValue;
use regex::Regex;
Expand All @@ -29,6 +28,7 @@ use crate::plugins::telemetry::apollo_exporter::Sender;
use crate::plugins::telemetry::config::AttributeValue;
use crate::plugins::telemetry::config::Conf;
use crate::plugins::telemetry::config::MetricsCommon;
use crate::plugins::telemetry::resource::ConfigResource;
use crate::router_factory::Endpoint;
use crate::Context;
use crate::ListenAddr;
Expand All @@ -37,7 +37,6 @@ pub(crate) mod apollo;
pub(crate) mod otlp;
pub(crate) mod prometheus;
pub(crate) mod span_metrics_exporter;
static UNKNOWN_SERVICE: &str = "unknown_service";

#[derive(Debug, Clone, Deserialize, JsonSchema, Default)]
#[serde(deny_unknown_fields, default)]
Expand Down Expand Up @@ -475,36 +474,7 @@ impl ResourceDetector for ConfigResourceDetector {

impl MetricsBuilder {
pub(crate) fn new(config: &Conf) -> Self {
let metrics_common_config = &config.metrics.common;

let mut resource = Resource::from_detectors(
Duration::from_secs(0),
vec![
Box::new(ConfigResourceDetector(metrics_common_config.clone())),
Box::new(SdkProvidedResourceDetector),
Box::new(opentelemetry::sdk::resource::EnvResourceDetector::new()),
],
);

// Otel resources can be initialized from env variables, there is an override mechanism, but it's broken for service name as it will always override service.name
// If the service name is set to unknown service then override it from the config
if resource.get(opentelemetry_semantic_conventions::resource::SERVICE_NAME)
== Some(UNKNOWN_SERVICE.into())
{
if let Some(service_name) = Resource::from_detectors(
Duration::from_secs(0),
vec![Box::new(ConfigResourceDetector(
metrics_common_config.clone(),
))],
)
.get(opentelemetry_semantic_conventions::resource::SERVICE_NAME)
{
resource = resource.merge(&mut Resource::new(vec![KeyValue::new(
opentelemetry_semantic_conventions::resource::SERVICE_NAME,
service_name,
)]));
}
}
let resource = config.metrics.common.to_resource();

Self {
resource: resource.clone(),
Expand Down
Loading

0 comments on commit 4908f85

Please sign in to comment.