Skip to content

Commit

Permalink
feat: emit pool timeouts/conditional failures as metrics (not sentry)
Browse files Browse the repository at this point in the history
and refactor BigtableClientManager::Error to use BigTableError

Closes: SYNC-4190
  • Loading branch information
pjenvey committed Apr 5, 2024
1 parent 69dae89 commit 9ef2ecf
Show file tree
Hide file tree
Showing 8 changed files with 95 additions and 107 deletions.
45 changes: 21 additions & 24 deletions autoconnect/autoconnect-settings/src/app_state.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ use autopush_common::db::dual::DualClientImpl;
#[cfg(feature = "dynamodb")]
use autopush_common::db::dynamodb::DdbClientImpl;
use cadence::StatsdClient;
use config::ConfigError;
use fernet::{Fernet, MultiFernet};
use tokio::sync::RwLock;

Expand All @@ -15,7 +16,6 @@ use autoconnect_common::{
registry::ClientRegistry,
};
use autopush_common::db::{client::DbClient, DbSettings, StorageType};
use autopush_common::errors::{ApcErrorKind, Result};

use crate::{Settings, ENV_PREFIX};

Expand All @@ -39,32 +39,27 @@ pub struct AppState {
}

impl AppState {
pub fn from_settings(settings: Settings) -> Result<Self> {
pub fn from_settings(settings: Settings) -> Result<Self, ConfigError> {
let crypto_key = &settings.crypto_key;
if !(crypto_key.starts_with('[') && crypto_key.ends_with(']')) {
return Err(
ApcErrorKind::ConfigError(config::ConfigError::Message(format!(
"Invalid {}_CRYPTO_KEY",
ENV_PREFIX
)))
.into(),
);
return Err(ConfigError::Message(format!(
"Invalid {ENV_PREFIX}_CRYPTO_KEY"
)));
}
let crypto_key = &crypto_key[1..crypto_key.len() - 1];
debug!("🔐 Fernet keys: {:?}", &crypto_key);
let fernets: Vec<Fernet> = crypto_key
.split(',')
.map(|s| s.trim().to_string())
.map(|key| {
Fernet::new(&key).unwrap_or_else(|| panic!("Invalid {}_CRYPTO_KEY", ENV_PREFIX))
})
.map(|key| Fernet::new(&key)._or_else(|| panic!("Invalid {ENV_PREFIX}_CRYPTO_KEY")))
.collect();
let fernet = MultiFernet::new(fernets);
let metrics = autopush_common::metrics::builder(
&settings.statsd_label,
&settings.statsd_host,
settings.statsd_port,
)?
)
.map_err(|e| ConfigError::Message(e.to_string()))?
// Temporary tag to distinguish from the legacy autopush(connect)
.with_tag("autoconnect", "true")
.build();
Expand All @@ -78,16 +73,21 @@ impl AppState {
#[allow(unused)]
let db: Box<dyn DbClient> = match storage_type {
#[cfg(feature = "dynamodb")]
StorageType::DynamoDb => Box::new(DdbClientImpl::new(metrics.clone(), &db_settings)?),
StorageType::DynamoDb => Box::new(
DdbClientImpl::new(metrics.clone(), &db_settings)
.map_err(|e| ConfigError::Message(e.to_string()))?,
),
#[cfg(feature = "bigtable")]
StorageType::BigTable => {
let client = BigTableClientImpl::new(metrics.clone(), &db_settings)?;
let client = BigTableClientImpl::new(metrics.clone(), &db_settings)
.map_err(|e| ConfigError::Message(e.to_string()))?;
client.spawn_sweeper(Duration::from_secs(30));
Box::new(client)
}
#[cfg(all(feature = "bigtable", feature = "dynamodb"))]
StorageType::Dual => {
let client = DualClientImpl::new(metrics.clone(), &db_settings)?;
let client = DualClientImpl::new(metrics.clone(), &db_settings)
.map_err(|e| ConfigError::Message(e.to_string()))?;
client.spawn_sweeper(Duration::from_secs(30));
Box::new(client)
}
Expand Down Expand Up @@ -122,17 +122,14 @@ impl AppState {
/// Initialize the `BroadcastChangeTracker`
///
/// Via `autoconnect_common::megaphone::init_and_spawn_megaphone_updater`
pub async fn init_and_spawn_megaphone_updater(&self) -> Result<()> {
pub async fn init_and_spawn_megaphone_updater(&self) -> Result<(), ConfigError> {
let Some(ref url) = self.settings.megaphone_api_url else {
return Ok(());
};
let Some(ref token) = self.settings.megaphone_api_token else {
return Err(
ApcErrorKind::ConfigError(config::ConfigError::Message(format!(
"{ENV_PREFIX}__MEGAPHONE_API_URL requires {ENV_PREFIX}__MEGAPHONE_API_TOKEN"
)))
.into(),
);
return Err(ConfigError::Message(format!(
"{ENV_PREFIX}__MEGAPHONE_API_URL requires {ENV_PREFIX}__MEGAPHONE_API_TOKEN"
)));
};
init_and_spawn_megaphone_updater(
&self.broadcaster,
Expand All @@ -143,7 +140,7 @@ impl AppState {
self.settings.megaphone_poll_interval,
)
.await
.map_err(|e| ApcErrorKind::GeneralError(format!("{}", e)))?;
.map_err(|e| ConfigError::Message(e.to_string()))?;
Ok(())
}
}
Expand Down
33 changes: 19 additions & 14 deletions autoendpoint/src/error.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
//! Error types and transformations
// TODO: Collpase these into `autopush_common::error`
use crate::headers::vapid::VapidError;
use crate::routers::RouterError;
Expand Down Expand Up @@ -82,6 +81,9 @@ pub enum ApiErrorKind {
#[error("Database error: {0}")]
Database(#[from] DbError),

#[error("Conditional database operation failed: {0}")]
Conditional(String),

#[error("Invalid token")]
InvalidToken,

Expand Down Expand Up @@ -150,7 +152,9 @@ impl ApiErrorKind {

ApiErrorKind::LogCheck => StatusCode::IM_A_TEAPOT,

ApiErrorKind::Database(DbError::Backoff(_)) => StatusCode::SERVICE_UNAVAILABLE,
ApiErrorKind::Database(DbError::Backoff(_)) | ApiErrorKind::Conditional(_) => {
StatusCode::SERVICE_UNAVAILABLE
}

ApiErrorKind::General(_)
| ApiErrorKind::Io(_)
Expand Down Expand Up @@ -192,6 +196,7 @@ impl ApiErrorKind {
ApiErrorKind::Io(_) => "io",
ApiErrorKind::Metrics(_) => "metrics",
ApiErrorKind::Database(_) => "database",
ApiErrorKind::Conditional(_) => "conditional",
ApiErrorKind::EndpointUrl(_) => "endpoint_url",
ApiErrorKind::RegistrationSecretHash(_) => "registration_secret_hash",
})
Expand All @@ -202,22 +207,21 @@ impl ApiErrorKind {
match self {
// ignore selected validation errors.
ApiErrorKind::Router(e) => e.is_sentry_event(),
_ => !matches!(
self,
// Ignore common webpush errors
ApiErrorKind::NoTTL | ApiErrorKind::InvalidEncryption(_) |
// Ignore common VAPID erros
ApiErrorKind::VapidError(_)
// Ignore common webpush errors
ApiErrorKind::NoTTL | ApiErrorKind::InvalidEncryption(_) |
// Ignore common VAPID erros
ApiErrorKind::VapidError(_)
| ApiErrorKind::Jwt(_)
| ApiErrorKind::TokenHashValidation(_)
| ApiErrorKind::InvalidAuthentication
| ApiErrorKind::InvalidLocalAuth(_) |
// Ignore missing or invalid user errors
ApiErrorKind::NoUser | ApiErrorKind::NoSubscription |
// Ignore oversized payload.
ApiErrorKind::PayloadError(_) |
ApiErrorKind::Validation(_),
),
// Ignore missing or invalid user errors
ApiErrorKind::NoUser | ApiErrorKind::NoSubscription |
// Ignore oversized payload.
ApiErrorKind::PayloadError(_) |
ApiErrorKind::Validation(_) |
ApiErrorKind::Conditional(_) => false,
_ => true,
}
}

Expand Down Expand Up @@ -259,6 +263,7 @@ impl ApiErrorKind {
| ApiErrorKind::Io(_)
| ApiErrorKind::Metrics(_)
| ApiErrorKind::Database(_)
| ApiErrorKind::Conditional(_)
| ApiErrorKind::PayloadError(_)
| ApiErrorKind::InvalidRouterToken
| ApiErrorKind::RegistrationSecretHash(_)
Expand Down
4 changes: 2 additions & 2 deletions autoendpoint/src/routes/registration.rs
Original file line number Diff line number Diff line change
Expand Up @@ -116,8 +116,8 @@ pub async fn update_token_route(
trace!("🌍 Updating user with UAID {uaid}");
trace!("🌍 user = {user:?}");
if !app_state.db.update_user(&mut user).await? {
// Unlikely to occur on mobile records
return Err(ApiErrorKind::General("Conditional update failed".to_owned()).into());
// Occurs occasionally on mobile records
return Err(ApiErrorKind::Conditional("update_user".to_owned()).into());
}

trace!("🌍 Finished updating token for UAID {uaid}");
Expand Down
36 changes: 24 additions & 12 deletions autopush-common/src/db/bigtable/bigtable_client/error.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use std::fmt::{self, Display};

use backtrace::Backtrace;
use deadpool::managed::{PoolError, TimeoutType};
use thiserror::Error;

use crate::errors::ReportableError;
Expand Down Expand Up @@ -109,12 +110,13 @@ pub enum BigTableError {
#[error("BigTable Admin Error: {0}")]
Admin(String, Option<String>),

#[error("Bigtable Recycle request")]
Recycle,

/// General Pool builder errors.
/// General Pool errors
#[error("Pool Error: {0}")]
Pool(String),
Pool(Box<PoolError<BigTableError>>),

/// Timeout occurred while getting a pooled connection
#[error("Pool Timeout: {0:?}")]
PoolTimeout(TimeoutType),

#[error("BigTable config error: {0}")]
Config(String),
Expand All @@ -130,30 +132,40 @@ impl ReportableError for BigTableError {
}

fn is_sentry_event(&self) -> bool {
// eventually, only capture important errors
//matches!(&self, BigTableError::Admin(_, _))
true
#[allow(clippy::match_like_matches_macro)]
match self {
BigTableError::PoolTimeout(_) => false,
_ => true,
}
}

fn metric_label(&self) -> Option<&'static str> {
let err = match &self {
let err = match self {
BigTableError::InvalidRowResponse(_) => "storage.bigtable.error.invalid_row_response",
BigTableError::InvalidChunk(_) => "storage.bigtable.error.invalid_chunk",
BigTableError::Read(_) => "storage.bigtable.error.read",
BigTableError::Write(_) => "storage.bigtable.error.write",
BigTableError::Status(_, _) => "storage.bigtable.error.status",
BigTableError::WriteTime(_) => "storage.bigtable.error.writetime",
BigTableError::Admin(_, _) => "storage.bigtable.error.admin",
BigTableError::Recycle => "storage.bigtable.error.recycle",
BigTableError::Pool(_) => "storage.bigtable.error.pool",
BigTableError::PoolTimeout(_) => "storage.bigtable.error.pool_timeout",
BigTableError::GRPC(_) => "storage.bigtable.error.grpc",
BigTableError::Config(_) => "storage.bigtable.error.config",
};
Some(err)
}

fn tags(&self) -> Vec<(&str, String)> {
#[allow(clippy::match_like_matches_macro)]
match self {
BigTableError::PoolTimeout(tt) => vec![("type", format!("{tt:?}").to_lowercase())],
_ => vec![],
}
}

fn extras(&self) -> Vec<(&str, String)> {
match &self {
match self {
BigTableError::InvalidRowResponse(s) => vec![("error", s.to_string())],
BigTableError::InvalidChunk(s) => vec![("error", s.to_string())],
BigTableError::GRPC(s) => vec![("error", s.to_string())],
Expand All @@ -170,7 +182,7 @@ impl ReportableError for BigTableError {
};
x
}
BigTableError::Pool(s) => vec![("error", s.to_owned())],
BigTableError::Pool(e) => vec![("error", e.to_string())],
_ => vec![],
}
}
Expand Down
12 changes: 8 additions & 4 deletions autopush-common/src/db/bigtable/bigtable_client/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -779,7 +779,10 @@ impl BigtableDb {
/// stack.
///
///
pub async fn health_check(&mut self, metrics: Arc<StatsdClient>) -> DbResult<bool> {
pub async fn health_check(
&mut self,
metrics: Arc<StatsdClient>,
) -> Result<bool, error::BigTableError> {
// It is recommended that we pick a random key to perform the health check. Selecting
// a single key for all health checks causes a "hot tablet" to arise. The `PingAndWarm`
// is intended to be used prior to large updates and is not recommended for use in
Expand All @@ -800,7 +803,7 @@ impl BigtableDb {
retryable_error(metrics.clone()),
)
.await
.map_err(|e| DbError::General(format!("BigTable connectivity error: {:?}", e)))?;
.map_err(error::BigTableError::Read)?;

debug!("🉑 health check");
Ok(true)
Expand Down Expand Up @@ -1318,11 +1321,12 @@ impl DbClient for BigTableClientImpl {
}

async fn health_check(&self) -> DbResult<bool> {
self.pool
Ok(self
.pool
.get()
.await?
.health_check(self.metrics.clone())
.await
.await?)
}

/// Returns true, because there's only one table in BigTable. We divide things up
Expand Down
Loading

0 comments on commit 9ef2ecf

Please sign in to comment.