Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: emit pool timeouts/conditional failures as metrics (not sentry) #671

Merged
merged 2 commits into from
Apr 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 21 additions & 22 deletions autoconnect/autoconnect-settings/src/app_state.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ use autopush_common::db::dual::DualClientImpl;
#[cfg(feature = "dynamodb")]
use autopush_common::db::dynamodb::DdbClientImpl;
use cadence::StatsdClient;
use config::ConfigError;
use fernet::{Fernet, MultiFernet};
use tokio::sync::RwLock;

Expand All @@ -15,7 +16,6 @@ use autoconnect_common::{
registry::ClientRegistry,
};
use autopush_common::db::{client::DbClient, DbSettings, StorageType};
use autopush_common::errors::{ApcErrorKind, Result};

use crate::{Settings, ENV_PREFIX};

Expand All @@ -39,32 +39,29 @@ pub struct AppState {
}

impl AppState {
pub fn from_settings(settings: Settings) -> Result<Self> {
pub fn from_settings(settings: Settings) -> Result<Self, ConfigError> {
let crypto_key = &settings.crypto_key;
if !(crypto_key.starts_with('[') && crypto_key.ends_with(']')) {
return Err(
ApcErrorKind::ConfigError(config::ConfigError::Message(format!(
"Invalid {}_CRYPTO_KEY",
ENV_PREFIX
)))
.into(),
);
return Err(ConfigError::Message(format!(
"Invalid {ENV_PREFIX}_CRYPTO_KEY"
)));
}
let crypto_key = &crypto_key[1..crypto_key.len() - 1];
debug!("🔐 Fernet keys: {:?}", &crypto_key);
let fernets: Vec<Fernet> = crypto_key
.split(',')
.map(|s| s.trim().to_string())
.map(|key| {
Fernet::new(&key).unwrap_or_else(|| panic!("Invalid {}_CRYPTO_KEY", ENV_PREFIX))
Fernet::new(&key).unwrap_or_else(|| panic!("Invalid {ENV_PREFIX}_CRYPTO_KEY"))
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Huh, not sure why that didn't get flagged by the analyzer when I loaded you branch.

Goes and tries to figure out what's broken

})
.collect();
let fernet = MultiFernet::new(fernets);
let metrics = autopush_common::metrics::builder(
&settings.statsd_label,
&settings.statsd_host,
settings.statsd_port,
)?
)
.map_err(|e| ConfigError::Message(e.to_string()))?
// Temporary tag to distinguish from the legacy autopush(connect)
.with_tag("autoconnect", "true")
.build();
Expand All @@ -78,16 +75,21 @@ impl AppState {
#[allow(unused)]
let db: Box<dyn DbClient> = match storage_type {
#[cfg(feature = "dynamodb")]
StorageType::DynamoDb => Box::new(DdbClientImpl::new(metrics.clone(), &db_settings)?),
StorageType::DynamoDb => Box::new(
DdbClientImpl::new(metrics.clone(), &db_settings)
.map_err(|e| ConfigError::Message(e.to_string()))?,
),
#[cfg(feature = "bigtable")]
StorageType::BigTable => {
let client = BigTableClientImpl::new(metrics.clone(), &db_settings)?;
let client = BigTableClientImpl::new(metrics.clone(), &db_settings)
.map_err(|e| ConfigError::Message(e.to_string()))?;
client.spawn_sweeper(Duration::from_secs(30));
Box::new(client)
}
#[cfg(all(feature = "bigtable", feature = "dynamodb"))]
StorageType::Dual => {
let client = DualClientImpl::new(metrics.clone(), &db_settings)?;
let client = DualClientImpl::new(metrics.clone(), &db_settings)
.map_err(|e| ConfigError::Message(e.to_string()))?;
client.spawn_sweeper(Duration::from_secs(30));
Box::new(client)
}
Expand Down Expand Up @@ -122,17 +124,14 @@ impl AppState {
/// Initialize the `BroadcastChangeTracker`
///
/// Via `autoconnect_common::megaphone::init_and_spawn_megaphone_updater`
pub async fn init_and_spawn_megaphone_updater(&self) -> Result<()> {
pub async fn init_and_spawn_megaphone_updater(&self) -> Result<(), ConfigError> {
let Some(ref url) = self.settings.megaphone_api_url else {
return Ok(());
};
let Some(ref token) = self.settings.megaphone_api_token else {
return Err(
ApcErrorKind::ConfigError(config::ConfigError::Message(format!(
"{ENV_PREFIX}__MEGAPHONE_API_URL requires {ENV_PREFIX}__MEGAPHONE_API_TOKEN"
)))
.into(),
);
return Err(ConfigError::Message(format!(
"{ENV_PREFIX}__MEGAPHONE_API_URL requires {ENV_PREFIX}__MEGAPHONE_API_TOKEN"
)));
};
init_and_spawn_megaphone_updater(
&self.broadcaster,
Expand All @@ -143,7 +142,7 @@ impl AppState {
self.settings.megaphone_poll_interval,
)
.await
.map_err(|e| ApcErrorKind::GeneralError(format!("{}", e)))?;
.map_err(|e| ConfigError::Message(e.to_string()))?;
Ok(())
}
}
Expand Down
33 changes: 19 additions & 14 deletions autoendpoint/src/error.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
//! Error types and transformations
// TODO: Collpase these into `autopush_common::error`

use crate::headers::vapid::VapidError;
use crate::routers::RouterError;
Expand Down Expand Up @@ -82,6 +81,9 @@ pub enum ApiErrorKind {
#[error("Database error: {0}")]
Database(#[from] DbError),

#[error("Conditional database operation failed: {0}")]
Conditional(String),

#[error("Invalid token")]
InvalidToken,

Expand Down Expand Up @@ -150,7 +152,9 @@ impl ApiErrorKind {

ApiErrorKind::LogCheck => StatusCode::IM_A_TEAPOT,

ApiErrorKind::Database(DbError::Backoff(_)) => StatusCode::SERVICE_UNAVAILABLE,
ApiErrorKind::Database(DbError::Backoff(_)) | ApiErrorKind::Conditional(_) => {
StatusCode::SERVICE_UNAVAILABLE
}

ApiErrorKind::General(_)
| ApiErrorKind::Io(_)
Expand Down Expand Up @@ -192,6 +196,7 @@ impl ApiErrorKind {
ApiErrorKind::Io(_) => "io",
ApiErrorKind::Metrics(_) => "metrics",
ApiErrorKind::Database(_) => "database",
ApiErrorKind::Conditional(_) => "conditional",
ApiErrorKind::EndpointUrl(_) => "endpoint_url",
ApiErrorKind::RegistrationSecretHash(_) => "registration_secret_hash",
})
Expand All @@ -202,22 +207,21 @@ impl ApiErrorKind {
match self {
// ignore selected validation errors.
ApiErrorKind::Router(e) => e.is_sentry_event(),
_ => !matches!(
self,
// Ignore common webpush errors
ApiErrorKind::NoTTL | ApiErrorKind::InvalidEncryption(_) |
// Ignore common VAPID erros
ApiErrorKind::VapidError(_)
// Ignore common webpush errors
ApiErrorKind::NoTTL | ApiErrorKind::InvalidEncryption(_) |
// Ignore common VAPID erros
ApiErrorKind::VapidError(_)
| ApiErrorKind::Jwt(_)
| ApiErrorKind::TokenHashValidation(_)
| ApiErrorKind::InvalidAuthentication
| ApiErrorKind::InvalidLocalAuth(_) |
// Ignore missing or invalid user errors
ApiErrorKind::NoUser | ApiErrorKind::NoSubscription |
// Ignore oversized payload.
ApiErrorKind::PayloadError(_) |
ApiErrorKind::Validation(_),
),
// Ignore missing or invalid user errors
ApiErrorKind::NoUser | ApiErrorKind::NoSubscription |
// Ignore oversized payload.
ApiErrorKind::PayloadError(_) |
ApiErrorKind::Validation(_) |
ApiErrorKind::Conditional(_) => false,
_ => true,
}
}

Expand Down Expand Up @@ -259,6 +263,7 @@ impl ApiErrorKind {
| ApiErrorKind::Io(_)
| ApiErrorKind::Metrics(_)
| ApiErrorKind::Database(_)
| ApiErrorKind::Conditional(_)
| ApiErrorKind::PayloadError(_)
| ApiErrorKind::InvalidRouterToken
| ApiErrorKind::RegistrationSecretHash(_)
Expand Down
4 changes: 2 additions & 2 deletions autoendpoint/src/routes/registration.rs
Original file line number Diff line number Diff line change
Expand Up @@ -116,8 +116,8 @@ pub async fn update_token_route(
trace!("🌍 Updating user with UAID {uaid}");
trace!("🌍 user = {user:?}");
if !app_state.db.update_user(&mut user).await? {
// Unlikely to occur on mobile records
return Err(ApiErrorKind::General("Conditional update failed".to_owned()).into());
// Occurs occasionally on mobile records
return Err(ApiErrorKind::Conditional("update_user".to_owned()).into());
}

trace!("🌍 Finished updating token for UAID {uaid}");
Expand Down
36 changes: 24 additions & 12 deletions autopush-common/src/db/bigtable/bigtable_client/error.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
use std::fmt::{self, Display};

use backtrace::Backtrace;
use deadpool::managed::{PoolError, TimeoutType};
use thiserror::Error;

use crate::errors::ReportableError;
Expand Down Expand Up @@ -109,12 +110,13 @@ pub enum BigTableError {
#[error("BigTable Admin Error: {0}")]
Admin(String, Option<String>),

#[error("Bigtable Recycle request")]
Recycle,

/// General Pool builder errors.
/// General Pool errors
#[error("Pool Error: {0}")]
Pool(String),
Pool(Box<PoolError<BigTableError>>),

/// Timeout occurred while getting a pooled connection
#[error("Pool Timeout: {0:?}")]
PoolTimeout(TimeoutType),

#[error("BigTable config error: {0}")]
Config(String),
Expand All @@ -130,30 +132,40 @@ impl ReportableError for BigTableError {
}

fn is_sentry_event(&self) -> bool {
// eventually, only capture important errors
//matches!(&self, BigTableError::Admin(_, _))
true
#[allow(clippy::match_like_matches_macro)]
match self {
BigTableError::PoolTimeout(_) => false,
_ => true,
}
}

fn metric_label(&self) -> Option<&'static str> {
let err = match &self {
let err = match self {
BigTableError::InvalidRowResponse(_) => "storage.bigtable.error.invalid_row_response",
BigTableError::InvalidChunk(_) => "storage.bigtable.error.invalid_chunk",
BigTableError::Read(_) => "storage.bigtable.error.read",
BigTableError::Write(_) => "storage.bigtable.error.write",
BigTableError::Status(_, _) => "storage.bigtable.error.status",
BigTableError::WriteTime(_) => "storage.bigtable.error.writetime",
BigTableError::Admin(_, _) => "storage.bigtable.error.admin",
BigTableError::Recycle => "storage.bigtable.error.recycle",
BigTableError::Pool(_) => "storage.bigtable.error.pool",
BigTableError::PoolTimeout(_) => "storage.bigtable.error.pool_timeout",
BigTableError::GRPC(_) => "storage.bigtable.error.grpc",
BigTableError::Config(_) => "storage.bigtable.error.config",
};
Some(err)
}

fn tags(&self) -> Vec<(&str, String)> {
#[allow(clippy::match_like_matches_macro)]
match self {
BigTableError::PoolTimeout(tt) => vec![("type", format!("{tt:?}").to_lowercase())],
_ => vec![],
}
}

fn extras(&self) -> Vec<(&str, String)> {
match &self {
match self {
BigTableError::InvalidRowResponse(s) => vec![("error", s.to_string())],
BigTableError::InvalidChunk(s) => vec![("error", s.to_string())],
BigTableError::GRPC(s) => vec![("error", s.to_string())],
Expand All @@ -170,7 +182,7 @@ impl ReportableError for BigTableError {
};
x
}
BigTableError::Pool(s) => vec![("error", s.to_owned())],
BigTableError::Pool(e) => vec![("error", e.to_string())],
_ => vec![],
}
}
Expand Down
12 changes: 8 additions & 4 deletions autopush-common/src/db/bigtable/bigtable_client/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -779,7 +779,10 @@ impl BigtableDb {
/// stack.
///
///
pub async fn health_check(&mut self, metrics: Arc<StatsdClient>) -> DbResult<bool> {
pub async fn health_check(
&mut self,
metrics: Arc<StatsdClient>,
) -> Result<bool, error::BigTableError> {
// It is recommended that we pick a random key to perform the health check. Selecting
// a single key for all health checks causes a "hot tablet" to arise. The `PingAndWarm`
// is intended to be used prior to large updates and is not recommended for use in
Expand All @@ -800,7 +803,7 @@ impl BigtableDb {
retryable_error(metrics.clone()),
)
.await
.map_err(|e| DbError::General(format!("BigTable connectivity error: {:?}", e)))?;
.map_err(error::BigTableError::Read)?;

debug!("🉑 health check");
Ok(true)
Expand Down Expand Up @@ -1318,11 +1321,12 @@ impl DbClient for BigTableClientImpl {
}

async fn health_check(&self) -> DbResult<bool> {
self.pool
Ok(self
.pool
.get()
.await?
.health_check(self.metrics.clone())
.await
.await?)
}

/// Returns true, because there's only one table in BigTable. We divide things up
Expand Down
Loading