From 6e02e70be766bf4634548ab65f5f97aaac06bcc7 Mon Sep 17 00:00:00 2001 From: jrconlin Date: Thu, 22 Feb 2024 13:24:34 -0800 Subject: [PATCH 1/6] bug: Explicity return 503 for add_user fail in dual --- autoendpoint/src/error.rs | 2 ++ autopush-common/src/db/dual/mod.rs | 5 ++++- autopush-common/src/db/error.rs | 8 ++++++++ 3 files changed, 14 insertions(+), 1 deletion(-) diff --git a/autoendpoint/src/error.rs b/autoendpoint/src/error.rs index 28d61ad68..671fd9f31 100644 --- a/autoendpoint/src/error.rs +++ b/autoendpoint/src/error.rs @@ -150,6 +150,8 @@ impl ApiErrorKind { ApiErrorKind::LogCheck => StatusCode::IM_A_TEAPOT, + ApiErrorKind::Database(DbError::Backoff(_)) => StatusCode::SERVICE_UNAVAILABLE, + ApiErrorKind::General(_) | ApiErrorKind::Io(_) | ApiErrorKind::Metrics(_) diff --git a/autopush-common/src/db/dual/mod.rs b/autopush-common/src/db/dual/mod.rs index 086c07f26..b9b1cc730 100644 --- a/autopush-common/src/db/dual/mod.rs +++ b/autopush-common/src/db/dual/mod.rs @@ -189,7 +189,10 @@ impl DbClient for DualClientImpl { // for Bigtable debug_assert!(user.version.is_none()); user.version = Some(Uuid::new_v4()); - self.primary.add_user(&user).await?; + self.primary + .add_user(&user) + .await + .map_err(|e| DbError::Backoff(e.to_string()))?; self.metrics.incr_with_tags("database.migrate").send(); let channels = self.secondary.get_channels(uaid).await?; if !channels.is_empty() { diff --git a/autopush-common/src/db/error.rs b/autopush-common/src/db/error.rs index 64fb3082c..13f3a697b 100644 --- a/autopush-common/src/db/error.rs +++ b/autopush-common/src/db/error.rs @@ -79,6 +79,10 @@ pub enum DbError { #[error("Unknown Database Error {0}")] General(String), + + // Return a 503 error + #[error("Process pending, please wait.")] + Backoff(String), } impl ReportableError for DbError { @@ -106,6 +110,7 @@ impl ReportableError for DbError { match &self { #[cfg(feature = "bigtable")] DbError::BTError(e) => e.metric_label(), + DbError::Backoff(_) => Some("storage.error.backoff"), _ => None, } } @@ -114,6 +119,9 @@ impl ReportableError for DbError { match &self { #[cfg(feature = "bigtable")] DbError::BTError(e) => e.extras(), + DbError::Backoff(e) => { + vec![("raw", e.to_string())] + } _ => vec![], } } From 31fbcea00800487fb12ef18f487efa9f29ea23ed Mon Sep 17 00:00:00 2001 From: jrconlin Date: Thu, 22 Feb 2024 13:39:15 -0800 Subject: [PATCH 2/6] f add timer --- autopush-common/src/db/dual/mod.rs | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/autopush-common/src/db/dual/mod.rs b/autopush-common/src/db/dual/mod.rs index b9b1cc730..96b8ce39c 100644 --- a/autopush-common/src/db/dual/mod.rs +++ b/autopush-common/src/db/dual/mod.rs @@ -10,7 +10,7 @@ use std::collections::HashSet; use std::sync::Arc; use async_trait::async_trait; -use cadence::{CountedExt, StatsdClient}; +use cadence::{CountedExt, StatsdClient, Timed}; use serde::Deserialize; use serde_json::from_str; use uuid::Uuid; @@ -182,6 +182,7 @@ impl DbClient for DualClientImpl { Ok(None) => { if is_primary { // The user wasn't in the current primary, so fetch them from the secondary. + let start = std::time::Instant::now(); if let Ok(Some(mut user)) = self.secondary.get_user(uaid).await { // copy the user record over to the new data store. debug!("⚖ Found user record in secondary, moving to primary"); @@ -200,6 +201,12 @@ impl DbClient for DualClientImpl { // user.version is still valid self.primary.add_channels(uaid, channels).await?; } + self.metrics + .time_with_tags( + "database.migrate.time", + (std::time::Instant::now() - start).as_millis() as u64, + ) + .send(); return Ok(Some(user)); } } From bbec4decbe71041bddb838a4d2f29156c4b914d7 Mon Sep 17 00:00:00 2001 From: jrconlin Date: Thu, 22 Feb 2024 13:46:03 -0800 Subject: [PATCH 3/6] f conditionally return 503 based on the error being "conditional" --- autopush-common/src/db/dual/mod.rs | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/autopush-common/src/db/dual/mod.rs b/autopush-common/src/db/dual/mod.rs index 96b8ce39c..8d7fccb3c 100644 --- a/autopush-common/src/db/dual/mod.rs +++ b/autopush-common/src/db/dual/mod.rs @@ -190,10 +190,13 @@ impl DbClient for DualClientImpl { // for Bigtable debug_assert!(user.version.is_none()); user.version = Some(Uuid::new_v4()); - self.primary - .add_user(&user) - .await - .map_err(|e| DbError::Backoff(e.to_string()))?; + self.primary.add_user(&user).await.map_err(|e| { + if e.to_string().contains("conditional") { + DbError::Backoff(e.to_string()) + } else { + e + } + })?; self.metrics.incr_with_tags("database.migrate").send(); let channels = self.secondary.get_channels(uaid).await?; if !channels.is_empty() { From d7a570559078e0be9aab8c0e944bbd691327d110 Mon Sep 17 00:00:00 2001 From: jrconlin Date: Thu, 22 Feb 2024 14:28:03 -0800 Subject: [PATCH 4/6] f use real error name, add filter set for bigtable get_user --- .../src/db/bigtable/bigtable_client/mod.rs | 4 +++- autopush-common/src/db/dual/mod.rs | 20 +++++++++++++------ 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/autopush-common/src/db/bigtable/bigtable_client/mod.rs b/autopush-common/src/db/bigtable/bigtable_client/mod.rs index 3bed0fada..a5b30ddd0 100644 --- a/autopush-common/src/db/bigtable/bigtable_client/mod.rs +++ b/autopush-common/src/db/bigtable/bigtable_client/mod.rs @@ -728,7 +728,9 @@ impl DbClient for BigTableClientImpl { async fn get_user(&self, uaid: &Uuid) -> DbResult> { let row_key = uaid.as_simple().to_string(); let mut req = self.read_row_request(&row_key); - req.set_filter(family_filter(format!("^{ROUTER_FAMILY}$"))); + let mut filters = vec![router_gc_policy_filter()]; + filters.push(family_filter(format!("^{ROUTER_FAMILY}$"))); + req.set_filter(filter_chain(filters)); let Some(mut row) = self.read_row(req).await? else { return Ok(None); }; diff --git a/autopush-common/src/db/dual/mod.rs b/autopush-common/src/db/dual/mod.rs index 8d7fccb3c..b6a186d08 100644 --- a/autopush-common/src/db/dual/mod.rs +++ b/autopush-common/src/db/dual/mod.rs @@ -190,13 +190,21 @@ impl DbClient for DualClientImpl { // for Bigtable debug_assert!(user.version.is_none()); user.version = Some(Uuid::new_v4()); - self.primary.add_user(&user).await.map_err(|e| { - if e.to_string().contains("conditional") { - DbError::Backoff(e.to_string()) - } else { - e + if let Err(e) = self.primary.add_user(&user).await { + if matches!(e, DbError::Conditional) { + // User is being migrated underneath us. + // Try fetching the record from primary again + let user = self.primary.get_user(uaid).await?; + // Possibly a higher number of these occur than + // expected, so sanity check that a user now + // exists + self.metrics + .incr_with_tags("database.already_migrated") + .with_tag("exists", &user.is_some().to_string()) + .send(); + return Ok(user); } - })?; + }; self.metrics.incr_with_tags("database.migrate").send(); let channels = self.secondary.get_channels(uaid).await?; if !channels.is_empty() { From 920214d582a343bf43749a0070295b6d252498ed Mon Sep 17 00:00:00 2001 From: jrconlin Date: Thu, 22 Feb 2024 14:43:40 -0800 Subject: [PATCH 5/6] f add backoff on second try --- autopush-common/src/db/dual/mod.rs | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/autopush-common/src/db/dual/mod.rs b/autopush-common/src/db/dual/mod.rs index b6a186d08..3248e8cd4 100644 --- a/autopush-common/src/db/dual/mod.rs +++ b/autopush-common/src/db/dual/mod.rs @@ -193,8 +193,15 @@ impl DbClient for DualClientImpl { if let Err(e) = self.primary.add_user(&user).await { if matches!(e, DbError::Conditional) { // User is being migrated underneath us. - // Try fetching the record from primary again - let user = self.primary.get_user(uaid).await?; + // Try fetching the record from primary again, + // and back off if still not there. + let user = self.primary.get_user(uaid).await.map_err(|e| { + if matches!(e, DbError::Conditional) { + DbError::Backoff(e.to_string()) + } else { + e + } + })?; // Possibly a higher number of these occur than // expected, so sanity check that a user now // exists From 6ff0f2cedef29edd843c86afeff8a1f3d4dd2ead Mon Sep 17 00:00:00 2001 From: jrconlin Date: Thu, 22 Feb 2024 14:59:30 -0800 Subject: [PATCH 6/6] f r's --- autopush-common/src/db/dual/mod.rs | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/autopush-common/src/db/dual/mod.rs b/autopush-common/src/db/dual/mod.rs index 3248e8cd4..7adf26988 100644 --- a/autopush-common/src/db/dual/mod.rs +++ b/autopush-common/src/db/dual/mod.rs @@ -195,13 +195,7 @@ impl DbClient for DualClientImpl { // User is being migrated underneath us. // Try fetching the record from primary again, // and back off if still not there. - let user = self.primary.get_user(uaid).await.map_err(|e| { - if matches!(e, DbError::Conditional) { - DbError::Backoff(e.to_string()) - } else { - e - } - })?; + let user = self.primary.get_user(uaid).await?; // Possibly a higher number of these occur than // expected, so sanity check that a user now // exists @@ -209,6 +203,9 @@ impl DbClient for DualClientImpl { .incr_with_tags("database.already_migrated") .with_tag("exists", &user.is_some().to_string()) .send(); + if user.is_none() { + return Err(DbError::Backoff("Move in progress".to_owned())); + }; return Ok(user); } };