diff --git a/autoendpoint/src/db/client.rs b/autoendpoint/src/db/client.rs index a040f0c03..b3838d7d8 100644 --- a/autoendpoint/src/db/client.rs +++ b/autoendpoint/src/db/client.rs @@ -356,12 +356,35 @@ impl DbClient for DbClientImpl { ..Default::default() }; - retry_policy() + if let Err(err) = retry_policy() .retry_if( || self.ddb.update_item(input.clone()), retryable_updateitem_error(self.metrics.clone()), ) - .await?; + .await + { + /* + UpdateItem errors frequently occur during a redeploy due to a variety of reasons + (e.g. since a redeploy forces UAs to disconnect and changes the router host name, + the associated node_id will be different for legitimate reasons.) We should eat + these errors because they're not actionable and part of the normal run process. + */ + + if let rusoto_core::RusotoError::Service( + rusoto_dynamodb::UpdateItemError::ConditionalCheckFailed(_), + ) = err + { + // Most likely, the node_id recorded in the db does not match the + // node_id contained in the notification + self.metrics + .incr_with_tags("error.node.update") + .with_tag("node_id", &node_id) + .try_send() + .ok(); + return Ok(()); + } + return Err(DbError::from(err)); + }; Ok(()) } diff --git a/autoendpoint/src/routers/webpush.rs b/autoendpoint/src/routers/webpush.rs index 7f16eefe0..3ca13330d 100644 --- a/autoendpoint/src/routers/webpush.rs +++ b/autoendpoint/src/routers/webpush.rs @@ -1,5 +1,5 @@ use crate::db::client::DbClient; -use crate::error::{ApiError, ApiErrorKind, ApiResult}; +use crate::error::{ApiErrorKind, ApiResult}; use crate::extractors::notification::Notification; use crate::extractors::router_data_input::RouterDataInput; use crate::routers::{Router, RouterError, RouterResponse}; @@ -38,6 +38,7 @@ impl Router for WebPushRouter { } async fn route_notification(&self, notification: &Notification) -> ApiResult { + // The notification contains the original subscription information let user = ¬ification.subscription.user; debug!( "Routing WebPush notification to UAID {}", @@ -65,9 +66,8 @@ impl Router for WebPushRouter { ); } Err(error) => { - // We should stop sending notifications to this node for this user debug!("Error while sending webpush notification: {}", error); - self.remove_node_id(user, node_id.clone()).await?; + self.remove_node_id(user, node_id).await? } } } @@ -142,7 +142,7 @@ impl Router for WebPushRouter { Err(error) => { // Can't communicate with the node, so we should stop using it debug!("Error while triggering notification check: {}", error); - self.remove_node_id(&user, node_id.clone()).await?; + self.remove_node_id(&user, node_id).await?; Ok(self.make_stored_response(notification)) } } @@ -186,13 +186,12 @@ impl WebPushRouter { /// Remove the node ID from a user. This is done if the user is no longer /// connected to the node. - async fn remove_node_id(&self, user: &DynamoDbUser, node_id: String) -> ApiResult<()> { + async fn remove_node_id(&self, user: &DynamoDbUser, node_id: &str) -> ApiResult<()> { self.metrics.incr("updates.client.host_gone").ok(); - self.ddb - .remove_node_id(user.uaid, node_id, user.connected_at) - .await - .map_err(ApiError::from) + .remove_node_id(user.uaid, node_id.to_owned(), user.connected_at) + .await?; + Ok(()) } /// Update metrics and create a response for when a notification has been directly forwarded to diff --git a/autopush/src/megaphone.rs b/autopush/src/megaphone.rs index 9cb7753d2..fc6154e59 100644 --- a/autopush/src/megaphone.rs +++ b/autopush/src/megaphone.rs @@ -8,8 +8,7 @@ use autopush_common::errors::{ApcErrorKind, Result}; use crate::server::protocol::BroadcastValue; // A Broadcast entry Key in a BroadcastRegistry -#[derive(Clone, Copy, Debug, Default, Eq, PartialEq, Hash)] -struct BroadcastKey(u32); +type BroadcastKey = u32; // Broadcasts a client is subscribed to and the last change seen #[derive(Debug, Default)] @@ -20,7 +19,7 @@ pub struct BroadcastSubs { #[derive(Debug)] struct BroadcastRegistry { - lookup: HashMap, + lookup: HashMap, table: Vec, } @@ -41,20 +40,20 @@ impl BroadcastRegistry { // exists fn add_broadcast(&mut self, broadcast_id: String) -> BroadcastKey { if let Some(v) = self.lookup.get(&broadcast_id) { - return BroadcastKey(*v); + return *v; } - let i = self.table.len(); + let i = self.table.len() as BroadcastKey; self.table.push(broadcast_id.clone()); - self.lookup.insert(broadcast_id, i as u32); - BroadcastKey(i as u32) + self.lookup.insert(broadcast_id, i); + i } fn lookup_id(&self, key: BroadcastKey) -> Option { - self.table.get(key.0 as usize).cloned() + self.table.get(key as usize).cloned() } fn lookup_key(&self, broadcast_id: &str) -> Option { - self.lookup.get(broadcast_id).cloned().map(BroadcastKey) + self.lookup.get(broadcast_id).copied() } } diff --git a/autopush/src/server/mod.rs b/autopush/src/server/mod.rs index 660d20c8d..5d7dad261 100644 --- a/autopush/src/server/mod.rs +++ b/autopush/src/server/mod.rs @@ -94,6 +94,10 @@ impl AutopushServer { ..Default::default() }, )); + /* + Sentry 0.29+ automatically enables `PanicIntegration`. + see https://docs.rs/sentry-panic/latest/sentry_panic/ + */ Some(guard) } else { None