From 77346d68f1b70e85f589e716716a49c395f5bebb Mon Sep 17 00:00:00 2001 From: Joris Bayer Date: Fri, 20 Sep 2024 14:43:48 +0200 Subject: [PATCH 1/5] wip --- relay-cabi/src/processing.rs | 1 + relay-dynamic-config/src/global.rs | 11 ++- relay-event-normalization/src/event.rs | 16 ++-- .../src/transactions/processor.rs | 86 ++++++++++++++++--- relay-server/src/services/processor.rs | 1 + .../src/services/processor/span/processing.rs | 11 ++- 6 files changed, 103 insertions(+), 23 deletions(-) diff --git a/relay-cabi/src/processing.rs b/relay-cabi/src/processing.rs index c8b86a20f4..45af9564d2 100644 --- a/relay-cabi/src/processing.rs +++ b/relay-cabi/src/processing.rs @@ -275,6 +275,7 @@ pub unsafe extern "C" fn relay_store_normalizer_normalize_event( replay_id: config.replay_id, span_allowed_hosts: &[], // only supported in relay scrub_mongo_description: ScrubMongoDescription::Disabled, // only supported in relay + span_op_defaults: Default::default(), // only supported in relay }; normalize_event(&mut event, &normalization_config); diff --git a/relay-dynamic-config/src/global.rs b/relay-dynamic-config/src/global.rs index 692c9da82b..e65f40d436 100644 --- a/relay-dynamic-config/src/global.rs +++ b/relay-dynamic-config/src/global.rs @@ -5,7 +5,7 @@ use std::io::BufReader; use std::path::Path; use relay_base_schema::metrics::MetricNamespace; -use relay_event_normalization::{MeasurementsConfig, ModelCosts}; +use relay_event_normalization::{MeasurementsConfig, ModelCosts, SpanOpDefaults}; use relay_filter::GenericFiltersConfig; use relay_quotas::Quota; use serde::{de, Deserialize, Serialize}; @@ -49,6 +49,13 @@ pub struct GlobalConfig { /// Configuration for AI span measurements. #[serde(skip_serializing_if = "is_missing")] pub ai_model_costs: ErrorBoundary, + + /// Configuration to derive the `span.op` from other span fields. + #[serde( + deserialize_with = "default_on_error", + skip_serializing_if = "is_default" + )] + pub span_op_defaults: SpanOpDefaults, } impl GlobalConfig { @@ -337,7 +344,7 @@ pub enum BucketEncoding { /// Returns `true` if this value is equal to `Default::default()`. fn is_default(t: &T) -> bool { - *t == T::default() + t == &T::default() } fn default_on_error<'de, D, T>(deserializer: D) -> Result diff --git a/relay-event-normalization/src/event.rs b/relay-event-normalization/src/event.rs index 08ae145af6..c241b4152e 100644 --- a/relay-event-normalization/src/event.rs +++ b/relay-event-normalization/src/event.rs @@ -34,9 +34,9 @@ use crate::span::tag_extraction::extract_span_tags_from_event; use crate::utils::{self, get_event_user_tag, MAX_DURATION_MOBILE_MS}; use crate::{ breakdowns, event_error, legacy, mechanism, remove_other, schema, span, stacktrace, - transactions, trimming, user_agent, BreakdownsConfig, CombinedMeasurementsConfig, GeoIpLookup, - MaxChars, ModelCosts, PerformanceScoreConfig, RawUserAgentInfo, SpanDescriptionRule, - TransactionNameConfig, + transactions, trimming, user_agent, BorrowedSpanOpDefaults, BreakdownsConfig, + CombinedMeasurementsConfig, GeoIpLookup, MaxChars, ModelCosts, PerformanceScoreConfig, + RawUserAgentInfo, SpanDescriptionRule, TransactionNameConfig, }; /// Configuration for [`normalize_event`]. @@ -161,6 +161,9 @@ pub struct NormalizationConfig<'a> { /// Controls whether or not MongoDB span descriptions will be scrubbed. pub scrub_mongo_description: ScrubMongoDescription, + + /// Rules to infer `span.op` from other span fields. + pub span_op_defaults: BorrowedSpanOpDefaults<'a>, } impl<'a> Default for NormalizationConfig<'a> { @@ -194,6 +197,7 @@ impl<'a> Default for NormalizationConfig<'a> { replay_id: Default::default(), span_allowed_hosts: Default::default(), scrub_mongo_description: ScrubMongoDescription::Disabled, + span_op_defaults: Default::default(), } } } @@ -244,8 +248,10 @@ fn normalize(event: &mut Event, meta: &mut Meta, config: &NormalizationConfig) { // (internally noops for non-transaction events). // TODO: Parts of this processor should probably be a filter so we // can revert some changes to ProcessingAction) - let mut transactions_processor = - transactions::TransactionsProcessor::new(config.transaction_name_config.clone()); + let mut transactions_processor = transactions::TransactionsProcessor::new( + config.transaction_name_config.clone(), + config.span_op_defaults.clone(), + ); let _ = transactions_processor.process_event(event, meta, ProcessingState::root()); // Process security reports first to ensure all props. diff --git a/relay-event-normalization/src/transactions/processor.rs b/relay-event-normalization/src/transactions/processor.rs index 880bcc6f8a..f0514c5e0b 100644 --- a/relay-event-normalization/src/transactions/processor.rs +++ b/relay-event-normalization/src/transactions/processor.rs @@ -7,7 +7,8 @@ use relay_event_schema::processor::{ self, ProcessValue, ProcessingResult, ProcessingState, Processor, }; use relay_event_schema::protocol::{Event, Span, SpanStatus, TraceContext, TransactionSource}; -use relay_protocol::{Annotated, Meta, Remark, RemarkType}; +use relay_protocol::{Annotated, Meta, Remark, RemarkType, RuleCondition}; +use serde::{Deserialize, Serialize}; use crate::regexes::TRANSACTION_NAME_NORMALIZER_REGEX; use crate::TransactionNameRule; @@ -76,12 +77,27 @@ pub fn apply_transaction_rename_rules( #[derive(Debug, Default)] pub struct TransactionsProcessor<'r> { name_config: TransactionNameConfig<'r>, + span_op_defaults: BorrowedSpanOpDefaults<'r>, } impl<'r> TransactionsProcessor<'r> { /// Creates a new `TransactionsProcessor` instance. - pub fn new(name_config: TransactionNameConfig<'r>) -> Self { - Self { name_config } + pub fn new( + name_config: TransactionNameConfig<'r>, + span_op_defaults: BorrowedSpanOpDefaults<'r>, + ) -> Self { + Self { + name_config, + span_op_defaults, + } + } + + #[cfg(test)] + fn new_name_config(name_config: TransactionNameConfig<'r>) -> Self { + Self { + name_config, + ..Default::default() + } } /// Returns `true` if the given transaction name should be treated as a URL. @@ -163,14 +179,58 @@ impl Processor for TransactionsProcessor<'_> { _meta: &mut Meta, state: &ProcessingState<'_>, ) -> ProcessingResult { - span.op.get_or_insert_with(|| "default".to_owned()); - + if span.op.value().is_none() { + *span.op.value_mut() = Some(self.span_op_defaults.infer(span)); + } span.process_child_values(self, state)?; Ok(()) } } +/// TODO: docs +#[derive(Clone, Debug, Default, Deserialize, Serialize, PartialEq)] +pub struct SpanOpDefaults { + rules: Vec, +} + +impl SpanOpDefaults { + /// Gets a borrowed version of this config. + pub fn borrow(&self) -> BorrowedSpanOpDefaults { + BorrowedSpanOpDefaults { + rules: self.rules.as_slice(), + } + } +} + +/// TODO: docs +#[derive(Clone, Debug, Default)] +pub struct BorrowedSpanOpDefaults<'a> { + rules: &'a [SpanOpDefaultRule], +} + +impl BorrowedSpanOpDefaults<'_> { + /// Infer the span op from a set of rules. + /// + /// The first matching rule determines the span op. + /// If no rule matches, `"default"` is returned. + fn infer(&self, span: &Span) -> String { + for rule in self.rules { + if rule.condition.matches(span) { + return rule.value.clone(); + } + } + "default".to_owned() + } +} + +/// TODO: docs +#[derive(Clone, Debug, Deserialize, Serialize, PartialEq)] +pub struct SpanOpDefaultRule { + condition: RuleCondition, + value: String, +} + /// Span status codes for the Ruby Rack integration that indicate raw URLs being sent as /// transaction names. These cases are considered as high-cardinality. /// @@ -911,7 +971,7 @@ mod tests { process_value( &mut event, - &mut TransactionsProcessor::new(TransactionNameConfig { + &mut TransactionsProcessor::new_name_config(TransactionNameConfig { rules: &[rule1, rule2, rule3], }), ProcessingState::root(), @@ -992,7 +1052,7 @@ mod tests { process_value( &mut event, - &mut TransactionsProcessor::new(TransactionNameConfig { + &mut TransactionsProcessor::new_name_config(TransactionNameConfig { rules: &[rule1, rule2, rule3], }), ProcessingState::root(), @@ -1059,7 +1119,7 @@ mod tests { let mut event = Annotated::::from_json(json).unwrap(); - let mut processor = TransactionsProcessor::new(TransactionNameConfig { + let mut processor = TransactionsProcessor::new_name_config(TransactionNameConfig { rules: rules.as_ref(), }); process_value(&mut event, &mut processor, ProcessingState::root()).unwrap(); @@ -1174,7 +1234,7 @@ mod tests { // This must not normalize transaction name, since it's disabled. process_value( &mut event, - &mut TransactionsProcessor::new(TransactionNameConfig { + &mut TransactionsProcessor::new_name_config(TransactionNameConfig { rules: rules.as_ref(), }), ProcessingState::root(), @@ -1231,7 +1291,7 @@ mod tests { process_value( &mut event, - &mut TransactionsProcessor::new(TransactionNameConfig { + &mut TransactionsProcessor::new_name_config(TransactionNameConfig { rules: rules.as_ref(), }), ProcessingState::root(), @@ -1316,7 +1376,7 @@ mod tests { process_value( &mut event, - &mut TransactionsProcessor::new(TransactionNameConfig { rules: &[rule] }), + &mut TransactionsProcessor::new_name_config(TransactionNameConfig { rules: &[rule] }), ProcessingState::root(), ) .unwrap(); @@ -1521,7 +1581,7 @@ mod tests { process_value( &mut event, - &mut TransactionsProcessor::new(TransactionNameConfig { + &mut TransactionsProcessor::new_name_config(TransactionNameConfig { rules: &[TransactionNameRule { pattern: LazyGlob::new("/remains/*/1234567890/".to_owned()), expiry: Utc.with_ymd_and_hms(3000, 1, 1, 1, 1, 1).unwrap(), @@ -1587,7 +1647,7 @@ mod tests { process_value( &mut event, - &mut TransactionsProcessor::new(TransactionNameConfig { + &mut TransactionsProcessor::new_name_config(TransactionNameConfig { rules: &[TransactionNameRule { pattern: LazyGlob::new("/remains/*/**".to_owned()), expiry: Utc.with_ymd_and_hms(3000, 1, 1, 1, 1, 1).unwrap(), diff --git a/relay-server/src/services/processor.rs b/relay-server/src/services/processor.rs index 8fc647bf69..5f5e66acce 100644 --- a/relay-server/src/services/processor.rs +++ b/relay-server/src/services/processor.rs @@ -1610,6 +1610,7 @@ impl EnvelopeProcessorService { } else { ScrubMongoDescription::Disabled }, + span_op_defaults: global_config.span_op_defaults.borrow(), }; metric!(timer(RelayTimers::EventProcessingNormalization), { diff --git a/relay-server/src/services/processor/span/processing.rs b/relay-server/src/services/processor/span/processing.rs index 9f778813f3..ec21db68b9 100644 --- a/relay-server/src/services/processor/span/processing.rs +++ b/relay-server/src/services/processor/span/processing.rs @@ -16,8 +16,9 @@ use relay_event_normalization::{ TransactionsProcessor, }; use relay_event_normalization::{ - normalize_transaction_name, ClientHints, FromUserAgentInfo, GeoIpLookup, ModelCosts, - SchemaProcessor, TimestampProcessor, TransactionNameRule, TrimmingProcessor, + normalize_transaction_name, BorrowedSpanOpDefaults, ClientHints, FromUserAgentInfo, + GeoIpLookup, ModelCosts, SchemaProcessor, TimestampProcessor, TransactionNameRule, + TrimmingProcessor, }; use relay_event_schema::processor::{process_value, ProcessingState}; use relay_event_schema::protocol::{BrowserContext, IpAddr, Span, SpanData}; @@ -370,6 +371,7 @@ struct NormalizeSpanConfig<'a> { client_ip: Option, /// An initialized GeoIP lookup. geo_lookup: Option<&'a GeoIpLookup>, + span_op_defaults: BorrowedSpanOpDefaults<'a>, } impl<'a> NormalizeSpanConfig<'a> { @@ -419,6 +421,7 @@ impl<'a> NormalizeSpanConfig<'a> { }, client_ip, geo_lookup, + span_op_defaults: global_config.span_op_defaults.borrow(), } } } @@ -474,6 +477,7 @@ fn normalize( scrub_mongo_description, client_ip, geo_lookup, + span_op_defaults, } = config; set_segment_attributes(annotated_span); @@ -497,7 +501,7 @@ fn normalize( } process_value( annotated_span, - &mut TransactionsProcessor::new(Default::default()), + &mut TransactionsProcessor::new(Default::default(), span_op_defaults), ProcessingState::root(), )?; @@ -1098,6 +1102,7 @@ mod tests { scrub_mongo_description: ScrubMongoDescription::Disabled, client_ip: Some(IpAddr("2.125.160.216".to_owned())), geo_lookup: Some(&GEO_LOOKUP), + span_op_defaults: Default::default(), } } From 3bc3164b9df812294bc6429914796204ee1d1efa Mon Sep 17 00:00:00 2001 From: Joris Bayer Date: Fri, 20 Sep 2024 15:01:21 +0200 Subject: [PATCH 2/5] test --- .../src/transactions/processor.rs | 60 +++++++++++++++++-- 1 file changed, 55 insertions(+), 5 deletions(-) diff --git a/relay-event-normalization/src/transactions/processor.rs b/relay-event-normalization/src/transactions/processor.rs index f0514c5e0b..5eeae3adc3 100644 --- a/relay-event-normalization/src/transactions/processor.rs +++ b/relay-event-normalization/src/transactions/processor.rs @@ -188,7 +188,7 @@ impl Processor for TransactionsProcessor<'_> { } } -/// TODO: docs +/// Rules used to infer `span.op` from other span fields. #[derive(Clone, Debug, Default, Deserialize, Serialize, PartialEq)] pub struct SpanOpDefaults { rules: Vec, @@ -203,7 +203,7 @@ impl SpanOpDefaults { } } -/// TODO: docs +/// Borrowed version of [`SpanOpDefaults`]. #[derive(Clone, Debug, Default)] pub struct BorrowedSpanOpDefaults<'a> { rules: &'a [SpanOpDefaultRule], @@ -224,9 +224,8 @@ impl BorrowedSpanOpDefaults<'_> { } } -/// TODO: docs #[derive(Clone, Debug, Deserialize, Serialize, PartialEq)] -pub struct SpanOpDefaultRule { +struct SpanOpDefaultRule { condition: RuleCondition, value: String, } @@ -396,7 +395,6 @@ fn scrub_identifiers_with_regex( #[cfg(test)] mod tests { - use chrono::{Duration, TimeZone, Utc}; use insta::assert_debug_snapshot; use itertools::Itertools; @@ -404,6 +402,7 @@ mod tests { use relay_event_schema::processor::process_value; use relay_event_schema::protocol::{ClientSdkInfo, Contexts, SpanId, TraceId}; use relay_protocol::{assert_annotated_snapshot, get_value}; + use serde_json::json; use crate::validation::validate_event; use crate::{EventValidationConfig, RedactionRule}; @@ -1687,4 +1686,55 @@ mod tests { }, ]"#); } + + #[test] + fn test_infer_span_op_default() { + let span = Annotated::from_json(r#"{}"#).unwrap(); + let defaults: SpanOpDefaults = serde_json::from_value(json!({ + "rules": [{ + "condition": { + "op": "not", + "inner": { + "op": "eq", + "name": "span.data.messaging\\.system", + "value": null, + }, + }, + "value": "message" + }] + } + )) + .unwrap(); + let op = defaults.borrow().infer(span.value().unwrap()); + assert_eq!(&op, "default"); + } + + #[test] + fn test_infer_span_op_messaging() { + let span = Annotated::from_json( + r#"{ + "data": { + "messaging.system": "activemq" + } + }"#, + ) + .unwrap(); + let defaults: SpanOpDefaults = serde_json::from_value(json!({ + "rules": [{ + "condition": { + "op": "not", + "inner": { + "op": "eq", + "name": "span.data.messaging\\.system", + "value": null, + }, + }, + "value": "message" + }] + } + )) + .unwrap(); + let op = defaults.borrow().infer(span.value().unwrap()); + assert_eq!(&op, "message"); + } } From dfd413ea6fa6efb55fdf452394c6baf389a5045e Mon Sep 17 00:00:00 2001 From: Joris Bayer Date: Fri, 20 Sep 2024 15:19:31 +0200 Subject: [PATCH 3/5] doc, ref --- relay-event-normalization/src/event.rs | 4 ++-- .../src/transactions/processor.rs | 16 ++++++++++------ 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/relay-event-normalization/src/event.rs b/relay-event-normalization/src/event.rs index c241b4152e..05365e193c 100644 --- a/relay-event-normalization/src/event.rs +++ b/relay-event-normalization/src/event.rs @@ -249,8 +249,8 @@ fn normalize(event: &mut Event, meta: &mut Meta, config: &NormalizationConfig) { // TODO: Parts of this processor should probably be a filter so we // can revert some changes to ProcessingAction) let mut transactions_processor = transactions::TransactionsProcessor::new( - config.transaction_name_config.clone(), - config.span_op_defaults.clone(), + config.transaction_name_config, + config.span_op_defaults, ); let _ = transactions_processor.process_event(event, meta, ProcessingState::root()); diff --git a/relay-event-normalization/src/transactions/processor.rs b/relay-event-normalization/src/transactions/processor.rs index 5eeae3adc3..dc2dee3d2e 100644 --- a/relay-event-normalization/src/transactions/processor.rs +++ b/relay-event-normalization/src/transactions/processor.rs @@ -14,7 +14,7 @@ use crate::regexes::TRANSACTION_NAME_NORMALIZER_REGEX; use crate::TransactionNameRule; /// Configuration for sanitizing unparameterized transaction names. -#[derive(Clone, Debug, Default)] +#[derive(Clone, Copy, Debug, Default)] pub struct TransactionNameConfig<'r> { /// Rules for identifier replacement that were discovered by Sentry's transaction clusterer. pub rules: &'r [TransactionNameRule], @@ -191,7 +191,8 @@ impl Processor for TransactionsProcessor<'_> { /// Rules used to infer `span.op` from other span fields. #[derive(Clone, Debug, Default, Deserialize, Serialize, PartialEq)] pub struct SpanOpDefaults { - rules: Vec, + /// List of rules to apply. First match wins. + pub rules: Vec, } impl SpanOpDefaults { @@ -204,7 +205,7 @@ impl SpanOpDefaults { } /// Borrowed version of [`SpanOpDefaults`]. -#[derive(Clone, Debug, Default)] +#[derive(Clone, Copy, Debug, Default)] pub struct BorrowedSpanOpDefaults<'a> { rules: &'a [SpanOpDefaultRule], } @@ -224,10 +225,13 @@ impl BorrowedSpanOpDefaults<'_> { } } +/// A rule to infer [`Span::op`] from other span fields. #[derive(Clone, Debug, Deserialize, Serialize, PartialEq)] -struct SpanOpDefaultRule { - condition: RuleCondition, - value: String, +pub struct SpanOpDefaultRule { + /// When to set the given value. + pub condition: RuleCondition, + /// Value for the [`Span::op`]. Only set if omitted by the SDK. + pub value: String, } /// Span status codes for the Ruby Rack integration that indicate raw URLs being sent as From 332f4a58e66fb8213d919898b94e246fb6b8e5ec Mon Sep 17 00:00:00 2001 From: Joris Bayer Date: Fri, 20 Sep 2024 15:35:14 +0200 Subject: [PATCH 4/5] changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8328a8462f..9258f918d6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ - Add a config option to add default tags to all Relay Sentry events. ([#3944](https://github.com/getsentry/relay/pull/3944)) - Automatically derive `client.address` and `user.geo` for standalone spans. ([#4047](https://github.com/getsentry/relay/pull/4047)) +- Configurable span.op inference. ([#4056](https://github.com/getsentry/relay/pull/4056)) ## 24.9.0 From 4efd67760a5fc3214aaacae0926c35690c2a44ea Mon Sep 17 00:00:00 2001 From: Joris Bayer Date: Tue, 24 Sep 2024 10:20:16 +0200 Subject: [PATCH 5/5] changelog --- py/CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/py/CHANGELOG.md b/py/CHANGELOG.md index e7bbae9a9e..2885917a0d 100644 --- a/py/CHANGELOG.md +++ b/py/CHANGELOG.md @@ -1,5 +1,9 @@ # Changelog +## Unreleased + +- Add `spanOpDefaults` to global config. ([#4056](https://github.com/getsentry/relay/pull/4056)) + ## 0.9.1 - Add REPLAY_VIDEO entry to DataCategory. ([#3847](https://github.com/getsentry/relay/pull/3847))