From 96cef485390118c8237f0738cb725856dfa3559e Mon Sep 17 00:00:00 2001 From: JR Conlin Date: Wed, 1 Jun 2022 08:20:08 -0700 Subject: [PATCH] bug: Fix GCM handling (#309) * bug: Fix GCM handling * exclude GCM auth messages from sentry reporting We can't fix them, and they could overflow our quota. DEPLOY NOTE: Remove the sentry.io filter for "Bridge authentication error". Closes: #308 --- autoendpoint/Cargo.toml | 2 +- autoendpoint/src/extractors/user.rs | 2 +- autoendpoint/src/routers/common.rs | 11 ++ autoendpoint/src/routers/fcm/client.rs | 152 +++++++++++++++++++++---- autoendpoint/src/routers/fcm/router.rs | 11 +- autoendpoint/src/routers/mod.rs | 6 + autoendpoint/src/routes/webpush.rs | 5 +- 7 files changed, 156 insertions(+), 33 deletions(-) diff --git a/autoendpoint/Cargo.toml b/autoendpoint/Cargo.toml index 8775fbbe4..c8d7a3da1 100644 --- a/autoendpoint/Cargo.toml +++ b/autoendpoint/Cargo.toml @@ -58,7 +58,7 @@ validator_derive = "0.14" yup-oauth2 = "4.1.2" # 5.0+ requires tokio 1.1+ # For mockito test debugging -# ureq={ version="2.4", features=["json"] } +#ureq={ version="2.4", features=["json"] } [dev-dependencies] mockall = "0.8.3" # 0.9+ requires reworking tests diff --git a/autoendpoint/src/extractors/user.rs b/autoendpoint/src/extractors/user.rs index 2be2e5212..ea4ea8037 100644 --- a/autoendpoint/src/extractors/user.rs +++ b/autoendpoint/src/extractors/user.rs @@ -69,7 +69,7 @@ async fn validate_webpush_user( } /// Drop a user and increment associated metric -async fn drop_user(uaid: Uuid, ddb: &dyn DbClient, metrics: &StatsdClient) -> ApiResult<()> { +pub async fn drop_user(uaid: Uuid, ddb: &dyn DbClient, metrics: &StatsdClient) -> ApiResult<()> { metrics .incr_with_tags("updates.drop_user") .with_tag("errno", "102") diff --git a/autoendpoint/src/routers/common.rs b/autoendpoint/src/routers/common.rs index f56b4563d..477a7c1c3 100644 --- a/autoendpoint/src/routers/common.rs +++ b/autoendpoint/src/routers/common.rs @@ -57,6 +57,17 @@ pub async fn handle_error( error.errno(), ); } + RouterError::GCMAuthentication => { + warn!("GCM Authentication error"); + incr_error_metric( + metrics, + platform, + app_id, + "gcm authentication", + error.status(), + error.errno(), + ); + } RouterError::RequestTimeout => { warn!("Bridge timeout"); incr_error_metric( diff --git a/autoendpoint/src/routers/fcm/client.rs b/autoendpoint/src/routers/fcm/client.rs index 2c6232c8d..ee5a0af4e 100644 --- a/autoendpoint/src/routers/fcm/client.rs +++ b/autoendpoint/src/routers/fcm/client.rs @@ -17,6 +17,7 @@ const OAUTH_SCOPES: &[&str] = &["https://www.googleapis.com/auth/firebase.messag /// handles sending notifications to Firebase. pub struct FcmClient { endpoint: Url, + gcm_endpoint: Url, timeout: Duration, max_data: usize, fcm_authenticator: Option, @@ -75,6 +76,10 @@ impl FcmClient { server_credential.project_id )) .expect("Project ID is not URL-safe"), + gcm_endpoint: settings + .base_url + .join("fcm/send") + .expect("GCM Project ID is not URL-safe"), timeout: Duration::from_secs(settings.timeout as u64), max_data: settings.max_data, fcm_authenticator: auth, @@ -84,6 +89,17 @@ impl FcmClient { } /// Send the message as GCM + /// Note: GCM is a beyond deprecated protocol, however older clients + /// may still use it (which includes about half of our traffic at + /// the time this comment was written). GCM documentation was also + /// removed from Google's site. It still persists in obscure corners + /// of the internet, but I have no idea for how long. + /// + /// + /// There is no way to generate GCM credentials. (They are disabled + /// on Google's server side.) There is no way to test GCM compatibility + /// other than monitoring errors. Users really should be migrated to the + /// new FCM endpoints, but we don't have full control over that. pub async fn send_gcm( &self, data: HashMap<&'static str, String>, @@ -109,9 +125,9 @@ impl FcmClient { // You can also turn on internal debugging, see // https://docs.rs/mockito/latest/mockito/#debug dbg!(self.endpoint.clone().as_str()); - let rr = ureq::post(self.endpoint.clone().as_str()) + let rr = ureq::post(self.gcm_endpoint.clone().as_str()) .set("Authorization", - format!("key={}", self.credential.credential.as_str()).as_str()) + format!("key={}", self.server_credential.server_access_token.as_str()).as_str()) .set("Content-Type", "application/json") .send_json(&message).unwrap(); dbg!(rr.status(), rr.status_text()); @@ -121,7 +137,7 @@ impl FcmClient { let server_access_token = &self.server_credential.server_access_token; let response = self .http_client - .post(self.endpoint.clone()) + .post(self.gcm_endpoint.clone()) .header("Authorization", format!("key={}", server_access_token)) .header("Content-Type", "application/json") .json(&message) @@ -138,24 +154,33 @@ impl FcmClient { // Handle error let status = response.status(); - if status.is_client_error() || status.is_server_error() { - let data: FcmResponse = response - .json() - .await - .map_err(FcmError::DeserializeResponse)?; - - return Err(match (status, data.error) { - (StatusCode::UNAUTHORIZED, _) => RouterError::Authentication, - (StatusCode::NOT_FOUND, _) => RouterError::NotFound, - (_, Some(error)) => RouterError::Upstream { - status: error.status, - message: error.message, - }, - (status, None) => RouterError::Upstream { - status: status.to_string(), - message: "Unknown reason".to_string(), + let data: GcmResponse = response + .json() + .await + .map_err(FcmError::DeserializeResponse)?; + if status.is_client_error() || status.is_server_error() || data.failure > 0 { + let invalid = GcmResult::invalid(); + return Err( + match (status, &data.results.get(0).unwrap_or(&invalid).error) { + (StatusCode::UNAUTHORIZED, _) => RouterError::GCMAuthentication, + (StatusCode::NOT_FOUND, _) => RouterError::NotFound, + (_, Some(error)) => match error.as_str() { + "NotRegistered" | "InvalidRegistration" => RouterError::NotFound, + "Unavailable" => RouterError::Upstream { + status: "USER UNAVAILABLE".to_owned(), + message: "User Unavailable. Try again later.".to_owned(), + }, + _ => RouterError::Upstream { + status: StatusCode::BAD_GATEWAY.to_string(), + message: format!("Unexpected error: {}", error), + }, + }, + (status, None) => RouterError::Upstream { + status: status.to_string(), + message: "Unknown reason".to_string(), + }, }, - }); + ); } Ok(()) @@ -220,6 +245,7 @@ impl FcmClient { .await .map_err(FcmError::DeserializeResponse)?; + // we only ever send one. return Err(match (status, data.error) { (StatusCode::UNAUTHORIZED, _) => RouterError::Authentication, (StatusCode::NOT_FOUND, _) => RouterError::NotFound, @@ -249,6 +275,40 @@ struct FcmErrorResponse { message: String, } +/// This is a joint structure that would reflect the status of each delivered +/// message. (We only send one at a time.) +#[derive(Clone, Deserialize, Debug, Default)] +struct GcmResult { + error: Option, // Optional, standardized error message + #[serde(rename = "message_id")] + _message_id: Option, // Optional identifier for a successful send + #[serde(rename = "registration_id")] + _registration_id: Option, // Optional replacement registration ID +} + +impl GcmResult { + pub fn invalid() -> GcmResult { + Self { + error: Some("Invalid GCM Response".to_string()), + ..Default::default() + } + } +} + +// The expected GCM response message. (Being explicit here because +// the offical documentation has been removed) +#[derive(Clone, Deserialize, Debug, Default)] +struct GcmResponse { + results: Vec, + #[serde(rename = "multicast_id")] + _multicast_id: u64, // ID for this set of messages/results + #[serde(rename = "success")] + _success: u32, // Number of messages succeeding. + failure: u32, // number of messages failing. + #[serde(rename = "canonical_ids")] + _canonical_ids: u32, // number of IDs that are reassigned. +} + #[cfg(test)] pub mod tests { use crate::routers::fcm::client::FcmClient; @@ -300,6 +360,10 @@ pub mod tests { ) } + pub fn mock_gcm_endpoint_builder() -> mockito::Mock { + mockito::mock("POST", "/fcm/send") + } + /// Make a FcmClient from the service auth data async fn make_client(credential: FcmServerCredential) -> FcmClient { FcmClient::new( @@ -351,18 +415,21 @@ pub mod tests { server_access_token: registration_id.to_owned(), }) .await; - let _token_mock = mock_token_endpoint(); - let body = format!("{{\"data\":{{\"is_test\":\"true\"}},\"delay_while_idle\":false,\"registration_ids\":[\"{}\"],\"time_to_live\":42}}", ®istration_id); - let fcm_mock = mock_fcm_endpoint_builder(project_id) + let body = format!( + r#"{{"data":{{"is_test":"true"}},"delay_while_idle":false,"registration_ids":["{}"],"time_to_live":42}}"#, + ®istration_id + ); + let gcm_mock = mock_gcm_endpoint_builder() .match_header("Authorization", format!("key={}", registration_id).as_str()) .match_header("Content-Type", "application/json") + .with_body(r#"{"multicast_id":216,"success":1,"failure":0,"canonical_ids":0,"results":[{"message_id":"1:02"}]}"#,) .match_body(body.as_str()) .create(); let mut data = HashMap::new(); data.insert("is_test", "true".to_string()); let result = client.send_gcm(data, registration_id.to_owned(), 42).await; assert!(result.is_ok(), "result={:?}", result); - fcm_mock.assert(); + gcm_mock.assert(); } /// Authorization errors are handled @@ -390,6 +457,43 @@ pub mod tests { ); } + /// GCM errors are handled + #[tokio::test] + async fn gcm_unavailable() { + let token = make_service_key(); + let client = make_client(FcmServerCredential { + project_id: PROJECT_ID.to_owned(), + server_access_token: token.clone(), + }) + .await; + let _token_mock = mock_token_endpoint(); + // Other potential GCM errors: + // "Unavailable" => client unavailable, retry sending. + // "InvalidRegistration" => registration corrupted, remove. + let _gcm_mock = mock_gcm_endpoint_builder() + .match_body(r#"{"data":{"is_test":"true"},"delay_while_idle":false,"registration_ids":["test-token"],"time_to_live":42}"#) + .match_header("Authorization", format!("key={}", token).as_str()) + .match_header("Content-Type", "application/json") + .with_status(200) + .with_body(r#"{"multicast_id":216,"success":0,"failure":1,"canonical_ids":0,"results":[{"error":"NotRegistered"}]}"#, + ) + .create(); + + let result = client + .send_gcm( + HashMap::from([("is_test", "true".to_owned())]), + "test-token".to_string(), + 42, + ) + .await; + assert!(result.is_err()); + assert!( + matches!(result.as_ref().unwrap_err(), RouterError::NotFound), + "result = {:?}", + result + ); + } + /// 404 errors are handled #[tokio::test] async fn not_found() { diff --git a/autoendpoint/src/routers/fcm/router.rs b/autoendpoint/src/routers/fcm/router.rs index 6842c670b..3bc4ea9fe 100644 --- a/autoendpoint/src/routers/fcm/router.rs +++ b/autoendpoint/src/routers/fcm/router.rs @@ -229,8 +229,8 @@ mod tests { use crate::extractors::routers::RouterType; use crate::routers::common::tests::{make_notification, CHANNEL_ID}; use crate::routers::fcm::client::tests::{ - make_service_key, mock_fcm_endpoint_builder, mock_token_endpoint, GCM_PROJECT_ID, - PROJECT_ID, + make_service_key, mock_fcm_endpoint_builder, mock_gcm_endpoint_builder, + mock_token_endpoint, GCM_PROJECT_ID, PROJECT_ID, }; use crate::routers::fcm::error::FcmError; use crate::routers::fcm::router::FcmRouter; @@ -342,7 +342,7 @@ mod tests { async fn successful_gcm_fallback() { let auth_key = "AIzaSyB0ecSrqnEDXQ7yjLXqVc0CUGOeSlq9BsM"; // this is a nonce value used only for testing. let registration_id = "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"; - let project_id = GCM_PROJECT_ID; + // let project_id = GCM_PROJECT_ID; let ddb = MockDbClient::new().into_boxed_arc(); let router = make_router(make_service_key(), auth_key.to_owned(), ddb).await; assert!(router.active()); @@ -357,9 +357,12 @@ mod tests { }) .to_string(); let _token_mock = mock_token_endpoint(); - let fcm_mock = mock_fcm_endpoint_builder(project_id) + let fcm_mock = mock_gcm_endpoint_builder() .match_header("Authorization", format!("key={}", &auth_key).as_str()) .match_header("Content-Type", "application/json") + .with_body( + r#"{ "multicast_id": 216,"success":1,"failure":0,"canonical_ids":0,"results":[{"message_id":"1:02"}]}"#, + ) .match_body(body.as_str()) .create(); let notification = make_notification( diff --git a/autoendpoint/src/routers/mod.rs b/autoendpoint/src/routers/mod.rs index 1cc5900bc..76e43ceff 100644 --- a/autoendpoint/src/routers/mod.rs +++ b/autoendpoint/src/routers/mod.rs @@ -95,6 +95,9 @@ pub enum RouterError { #[error("Bridge authentication error")] Authentication, + #[error("GCM Bridge authentication error")] + GCMAuthentication, + #[error("Bridge request timeout")] RequestTimeout, @@ -123,6 +126,7 @@ impl RouterError { RouterError::TooMuchData(_) => StatusCode::PAYLOAD_TOO_LARGE, RouterError::Authentication + | RouterError::GCMAuthentication | RouterError::RequestTimeout | RouterError::Connect(_) | RouterError::Upstream { .. } => StatusCode::BAD_GATEWAY, @@ -150,6 +154,8 @@ impl RouterError { RouterError::RequestTimeout => Some(903), + RouterError::GCMAuthentication => Some(904), + RouterError::Upstream { .. } => None, } } diff --git a/autoendpoint/src/routes/webpush.rs b/autoendpoint/src/routes/webpush.rs index e59637aab..cba9ee434 100644 --- a/autoendpoint/src/routes/webpush.rs +++ b/autoendpoint/src/routes/webpush.rs @@ -12,15 +12,14 @@ use actix_web::HttpResponse; pub async fn webpush_route( notification: Notification, routers: Routers, + _state: Data, ) -> ApiResult { let router = routers.get( RouterType::from_str(¬ification.subscription.user.router_type) .map_err(|_| ApiErrorKind::InvalidRouterType)?, ); - let response = router.route_notification(¬ification).await?; - - Ok(response.into()) + Ok(router.route_notification(¬ification).await?.into()) } /// Handle the `DELETE /m/{message_id}` route