From ea30222840c4ef6bdbcebda8d5a45d79934f6071 Mon Sep 17 00:00:00 2001 From: Philip Jenvey Date: Mon, 30 Oct 2023 11:29:48 -0700 Subject: [PATCH] feat: emit metrics for megaphone polling (#488) only emit uncommon errors to sentry. don't emit WS ProtocolErrors either SYNC-3978 Co-authored-by: JR Conlin --- Cargo.lock | 1 + autoconnect/autoconnect-common/Cargo.toml | 1 + .../autoconnect-common/src/megaphone.rs | 37 ++++++++++++++++--- .../autoconnect-settings/src/app_state.rs | 1 + autoconnect/autoconnect-ws/src/error.rs | 32 ++++++++++------ 5 files changed, 54 insertions(+), 18 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index c218f8ade..eafd8f279 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -571,6 +571,7 @@ version = "1.69.1" dependencies = [ "actix-web", "autopush_common", + "cadence", "futures 0.3.28", "futures-locks 0.7.1", "reqwest 0.11.22", diff --git a/autoconnect/autoconnect-common/Cargo.toml b/autoconnect/autoconnect-common/Cargo.toml index 5d0fdaf93..ec7375775 100644 --- a/autoconnect/autoconnect-common/Cargo.toml +++ b/autoconnect/autoconnect-common/Cargo.toml @@ -8,6 +8,7 @@ version.workspace = true [dependencies] actix-web.workspace = true +cadence.workspace = true futures.workspace = true futures-locks.workspace = true reqwest.workspace = true diff --git a/autoconnect/autoconnect-common/src/megaphone.rs b/autoconnect/autoconnect-common/src/megaphone.rs index bfa291460..fe140d0ab 100644 --- a/autoconnect/autoconnect-common/src/megaphone.rs +++ b/autoconnect/autoconnect-common/src/megaphone.rs @@ -1,6 +1,7 @@ use std::{collections::HashMap, sync::Arc, time::Duration}; use actix_web::rt; +use cadence::{CountedExt, StatsdClient}; use serde_derive::Deserialize; use tokio::sync::RwLock; @@ -20,22 +21,25 @@ pub struct MegaphoneResponse { pub async fn init_and_spawn_megaphone_updater( broadcaster: &Arc>, http: &reqwest::Client, + metrics: &Arc, url: &str, token: &str, poll_interval: Duration, ) -> reqwest::Result<()> { - megaphone_updater(broadcaster, http, url, token).await?; + updater(broadcaster, http, url, token).await?; let broadcaster = Arc::clone(broadcaster); let http = http.clone(); + let metrics = Arc::clone(metrics); let url = url.to_owned(); let token = token.to_owned(); rt::spawn(async move { loop { rt::time::sleep(poll_interval).await; - if let Err(e) = megaphone_updater(&broadcaster, &http, &url, &token).await { - error!("📢megaphone_updater failed: {}", e); - sentry::capture_event(sentry::event_from_error(&e)); + if let Err(e) = updater(&broadcaster, &http, &url, &token).await { + report_updater_error(&metrics, e); + } else { + metrics.incr_with_tags("megaphone.updater.ok").send(); } } }); @@ -43,14 +47,35 @@ pub async fn init_and_spawn_megaphone_updater( Ok(()) } +/// Emits a log, metric and Sentry event depending on the type of Error +fn report_updater_error(metrics: &Arc, err: reqwest::Error) { + let reason = if err.is_timeout() { + "timeout" + } else if err.is_connect() { + "connect" + } else { + "unknown" + }; + metrics + .incr_with_tags("megaphone.updater.error") + .with_tag("reason", reason) + .send(); + if reason == "unknown" { + error!("📢megaphone::updater failed: {}", err); + sentry::capture_event(sentry::event_from_error(&err)); + } else { + trace!("📢megaphone::updater failed (reason: {}): {}", reason, err); + } +} + /// Refresh the `BroadcastChangeTracker`'s Broadcasts from the Megaphone service -async fn megaphone_updater( +async fn updater( broadcaster: &Arc>, http: &reqwest::Client, url: &str, token: &str, ) -> reqwest::Result<()> { - trace!("📢BroadcastChangeTracker::megaphone_updater"); + trace!("📢megaphone::updater"); let MegaphoneResponse { broadcasts } = http .get(url) .header("Authorization", token) diff --git a/autoconnect/autoconnect-settings/src/app_state.rs b/autoconnect/autoconnect-settings/src/app_state.rs index d7c8627f5..78b8c70ad 100644 --- a/autoconnect/autoconnect-settings/src/app_state.rs +++ b/autoconnect/autoconnect-settings/src/app_state.rs @@ -123,6 +123,7 @@ impl AppState { init_and_spawn_megaphone_updater( &self.broadcaster, &self.http, + &self.metrics, url, token, self.settings.megaphone_poll_interval, diff --git a/autoconnect/autoconnect-ws/src/error.rs b/autoconnect/autoconnect-ws/src/error.rs index 9575f692d..c04b97d54 100644 --- a/autoconnect/autoconnect-ws/src/error.rs +++ b/autoconnect/autoconnect-ws/src/error.rs @@ -33,7 +33,7 @@ where { fn from(item: T) -> Self { let kind = WSErrorKind::from(item); - let backtrace = kind.capture_backtrace().then(Backtrace::new); + let backtrace = (kind.is_sentry_event() && kind.capture_backtrace()).then(Backtrace::new); Self { kind, backtrace } } } @@ -77,16 +77,14 @@ impl ReportableError for WSError { } fn is_sentry_event(&self) -> bool { - match &self.kind { - WSErrorKind::SM(e) => e.is_sentry_event(), - WSErrorKind::Protocol(_) | WSErrorKind::RegistryDisconnected => true, - _ => false, - } + self.kind.is_sentry_event() } fn metric_label(&self) -> Option<&'static str> { match &self.kind { WSErrorKind::SM(e) => e.metric_label(), + // Legacy autoconnect ignored these: possibly not worth tracking + WSErrorKind::Protocol(_) => Some("ua.ws_protocol_error"), _ => None, } } @@ -123,14 +121,24 @@ pub enum WSErrorKind { } impl WSErrorKind { + /// Whether this error is reported to Sentry + fn is_sentry_event(&self) -> bool { + match self { + WSErrorKind::SM(e) => e.is_sentry_event(), + WSErrorKind::RegistryDisconnected => true, + _ => false, + } + } + /// Whether this variant has a `Backtrace` captured /// - /// Some Error variants have obvious call sites and thus don't need a - /// `Backtrace` + /// Some Error variants have obvious call sites or more relevant backtraces + /// in their sources and thus don't need a `Backtrace`. Furthermore + /// backtraces are only captured for variants returning true from + /// [Self::is_sentry_event]. fn capture_backtrace(&self) -> bool { - matches!( - self, - WSErrorKind::Json(_) | WSErrorKind::Protocol(_) | WSErrorKind::SessionClosed(_) - ) + // Nothing currently (RegistryDisconnected has a unique call site) but + // we may want to capture other variants in the future + false } }