Skip to content

Commit

Permalink
bug: Fix GCM handling (#309)
Browse files Browse the repository at this point in the history
* bug: Fix GCM handling
* exclude GCM auth messages from sentry reporting

We can't fix them, and they could overflow our quota.

DEPLOY NOTE:
Remove the sentry.io filter for "Bridge authentication error".

Closes: #308
  • Loading branch information
jrconlin authored Jun 1, 2022
1 parent 2829fa4 commit 96cef48
Show file tree
Hide file tree
Showing 7 changed files with 156 additions and 33 deletions.
2 changes: 1 addition & 1 deletion autoendpoint/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ validator_derive = "0.14"
yup-oauth2 = "4.1.2" # 5.0+ requires tokio 1.1+

# For mockito test debugging
# ureq={ version="2.4", features=["json"] }
#ureq={ version="2.4", features=["json"] }

[dev-dependencies]
mockall = "0.8.3" # 0.9+ requires reworking tests
Expand Down
2 changes: 1 addition & 1 deletion autoendpoint/src/extractors/user.rs
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ async fn validate_webpush_user(
}

/// Drop a user and increment associated metric
async fn drop_user(uaid: Uuid, ddb: &dyn DbClient, metrics: &StatsdClient) -> ApiResult<()> {
pub async fn drop_user(uaid: Uuid, ddb: &dyn DbClient, metrics: &StatsdClient) -> ApiResult<()> {
metrics
.incr_with_tags("updates.drop_user")
.with_tag("errno", "102")
Expand Down
11 changes: 11 additions & 0 deletions autoendpoint/src/routers/common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,17 @@ pub async fn handle_error(
error.errno(),
);
}
RouterError::GCMAuthentication => {
warn!("GCM Authentication error");
incr_error_metric(
metrics,
platform,
app_id,
"gcm authentication",
error.status(),
error.errno(),
);
}
RouterError::RequestTimeout => {
warn!("Bridge timeout");
incr_error_metric(
Expand Down
152 changes: 128 additions & 24 deletions autoendpoint/src/routers/fcm/client.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ const OAUTH_SCOPES: &[&str] = &["https://www.googleapis.com/auth/firebase.messag
/// handles sending notifications to Firebase.
pub struct FcmClient {
endpoint: Url,
gcm_endpoint: Url,
timeout: Duration,
max_data: usize,
fcm_authenticator: Option<DefaultAuthenticator>,
Expand Down Expand Up @@ -75,6 +76,10 @@ impl FcmClient {
server_credential.project_id
))
.expect("Project ID is not URL-safe"),
gcm_endpoint: settings
.base_url
.join("fcm/send")
.expect("GCM Project ID is not URL-safe"),
timeout: Duration::from_secs(settings.timeout as u64),
max_data: settings.max_data,
fcm_authenticator: auth,
Expand All @@ -84,6 +89,17 @@ impl FcmClient {
}

/// Send the message as GCM
/// Note: GCM is a beyond deprecated protocol, however older clients
/// may still use it (which includes about half of our traffic at
/// the time this comment was written). GCM documentation was also
/// removed from Google's site. It still persists in obscure corners
/// of the internet, but I have no idea for how long.
/// <https://stuff.mit.edu/afs/sipb/project/android/docs/google/gcm/gcm.html>
///
/// There is no way to generate GCM credentials. (They are disabled
/// on Google's server side.) There is no way to test GCM compatibility
/// other than monitoring errors. Users really should be migrated to the
/// new FCM endpoints, but we don't have full control over that.
pub async fn send_gcm(
&self,
data: HashMap<&'static str, String>,
Expand All @@ -109,9 +125,9 @@ impl FcmClient {
// You can also turn on internal debugging, see
// https://docs.rs/mockito/latest/mockito/#debug
dbg!(self.endpoint.clone().as_str());
let rr = ureq::post(self.endpoint.clone().as_str())
let rr = ureq::post(self.gcm_endpoint.clone().as_str())
.set("Authorization",
format!("key={}", self.credential.credential.as_str()).as_str())
format!("key={}", self.server_credential.server_access_token.as_str()).as_str())
.set("Content-Type", "application/json")
.send_json(&message).unwrap();
dbg!(rr.status(), rr.status_text());
Expand All @@ -121,7 +137,7 @@ impl FcmClient {
let server_access_token = &self.server_credential.server_access_token;
let response = self
.http_client
.post(self.endpoint.clone())
.post(self.gcm_endpoint.clone())
.header("Authorization", format!("key={}", server_access_token))
.header("Content-Type", "application/json")
.json(&message)
Expand All @@ -138,24 +154,33 @@ impl FcmClient {

// Handle error
let status = response.status();
if status.is_client_error() || status.is_server_error() {
let data: FcmResponse = response
.json()
.await
.map_err(FcmError::DeserializeResponse)?;

return Err(match (status, data.error) {
(StatusCode::UNAUTHORIZED, _) => RouterError::Authentication,
(StatusCode::NOT_FOUND, _) => RouterError::NotFound,
(_, Some(error)) => RouterError::Upstream {
status: error.status,
message: error.message,
},
(status, None) => RouterError::Upstream {
status: status.to_string(),
message: "Unknown reason".to_string(),
let data: GcmResponse = response
.json()
.await
.map_err(FcmError::DeserializeResponse)?;
if status.is_client_error() || status.is_server_error() || data.failure > 0 {
let invalid = GcmResult::invalid();
return Err(
match (status, &data.results.get(0).unwrap_or(&invalid).error) {
(StatusCode::UNAUTHORIZED, _) => RouterError::GCMAuthentication,
(StatusCode::NOT_FOUND, _) => RouterError::NotFound,
(_, Some(error)) => match error.as_str() {
"NotRegistered" | "InvalidRegistration" => RouterError::NotFound,
"Unavailable" => RouterError::Upstream {
status: "USER UNAVAILABLE".to_owned(),
message: "User Unavailable. Try again later.".to_owned(),
},
_ => RouterError::Upstream {
status: StatusCode::BAD_GATEWAY.to_string(),
message: format!("Unexpected error: {}", error),
},
},
(status, None) => RouterError::Upstream {
status: status.to_string(),
message: "Unknown reason".to_string(),
},
},
});
);
}

Ok(())
Expand Down Expand Up @@ -220,6 +245,7 @@ impl FcmClient {
.await
.map_err(FcmError::DeserializeResponse)?;

// we only ever send one.
return Err(match (status, data.error) {
(StatusCode::UNAUTHORIZED, _) => RouterError::Authentication,
(StatusCode::NOT_FOUND, _) => RouterError::NotFound,
Expand Down Expand Up @@ -249,6 +275,40 @@ struct FcmErrorResponse {
message: String,
}

/// This is a joint structure that would reflect the status of each delivered
/// message. (We only send one at a time.)
#[derive(Clone, Deserialize, Debug, Default)]
struct GcmResult {
error: Option<String>, // Optional, standardized error message
#[serde(rename = "message_id")]
_message_id: Option<String>, // Optional identifier for a successful send
#[serde(rename = "registration_id")]
_registration_id: Option<String>, // Optional replacement registration ID
}

impl GcmResult {
pub fn invalid() -> GcmResult {
Self {
error: Some("Invalid GCM Response".to_string()),
..Default::default()
}
}
}

// The expected GCM response message. (Being explicit here because
// the offical documentation has been removed)
#[derive(Clone, Deserialize, Debug, Default)]
struct GcmResponse {
results: Vec<GcmResult>,
#[serde(rename = "multicast_id")]
_multicast_id: u64, // ID for this set of messages/results
#[serde(rename = "success")]
_success: u32, // Number of messages succeeding.
failure: u32, // number of messages failing.
#[serde(rename = "canonical_ids")]
_canonical_ids: u32, // number of IDs that are reassigned.
}

#[cfg(test)]
pub mod tests {
use crate::routers::fcm::client::FcmClient;
Expand Down Expand Up @@ -300,6 +360,10 @@ pub mod tests {
)
}

pub fn mock_gcm_endpoint_builder() -> mockito::Mock {
mockito::mock("POST", "/fcm/send")
}

/// Make a FcmClient from the service auth data
async fn make_client(credential: FcmServerCredential) -> FcmClient {
FcmClient::new(
Expand Down Expand Up @@ -351,18 +415,21 @@ pub mod tests {
server_access_token: registration_id.to_owned(),
})
.await;
let _token_mock = mock_token_endpoint();
let body = format!("{{\"data\":{{\"is_test\":\"true\"}},\"delay_while_idle\":false,\"registration_ids\":[\"{}\"],\"time_to_live\":42}}", &registration_id);
let fcm_mock = mock_fcm_endpoint_builder(project_id)
let body = format!(
r#"{{"data":{{"is_test":"true"}},"delay_while_idle":false,"registration_ids":["{}"],"time_to_live":42}}"#,
&registration_id
);
let gcm_mock = mock_gcm_endpoint_builder()
.match_header("Authorization", format!("key={}", registration_id).as_str())
.match_header("Content-Type", "application/json")
.with_body(r#"{"multicast_id":216,"success":1,"failure":0,"canonical_ids":0,"results":[{"message_id":"1:02"}]}"#,)
.match_body(body.as_str())
.create();
let mut data = HashMap::new();
data.insert("is_test", "true".to_string());
let result = client.send_gcm(data, registration_id.to_owned(), 42).await;
assert!(result.is_ok(), "result={:?}", result);
fcm_mock.assert();
gcm_mock.assert();
}

/// Authorization errors are handled
Expand Down Expand Up @@ -390,6 +457,43 @@ pub mod tests {
);
}

/// GCM errors are handled
#[tokio::test]
async fn gcm_unavailable() {
let token = make_service_key();
let client = make_client(FcmServerCredential {
project_id: PROJECT_ID.to_owned(),
server_access_token: token.clone(),
})
.await;
let _token_mock = mock_token_endpoint();
// Other potential GCM errors:
// "Unavailable" => client unavailable, retry sending.
// "InvalidRegistration" => registration corrupted, remove.
let _gcm_mock = mock_gcm_endpoint_builder()
.match_body(r#"{"data":{"is_test":"true"},"delay_while_idle":false,"registration_ids":["test-token"],"time_to_live":42}"#)
.match_header("Authorization", format!("key={}", token).as_str())
.match_header("Content-Type", "application/json")
.with_status(200)
.with_body(r#"{"multicast_id":216,"success":0,"failure":1,"canonical_ids":0,"results":[{"error":"NotRegistered"}]}"#,
)
.create();

let result = client
.send_gcm(
HashMap::from([("is_test", "true".to_owned())]),
"test-token".to_string(),
42,
)
.await;
assert!(result.is_err());
assert!(
matches!(result.as_ref().unwrap_err(), RouterError::NotFound),
"result = {:?}",
result
);
}

/// 404 errors are handled
#[tokio::test]
async fn not_found() {
Expand Down
11 changes: 7 additions & 4 deletions autoendpoint/src/routers/fcm/router.rs
Original file line number Diff line number Diff line change
Expand Up @@ -229,8 +229,8 @@ mod tests {
use crate::extractors::routers::RouterType;
use crate::routers::common::tests::{make_notification, CHANNEL_ID};
use crate::routers::fcm::client::tests::{
make_service_key, mock_fcm_endpoint_builder, mock_token_endpoint, GCM_PROJECT_ID,
PROJECT_ID,
make_service_key, mock_fcm_endpoint_builder, mock_gcm_endpoint_builder,
mock_token_endpoint, GCM_PROJECT_ID, PROJECT_ID,
};
use crate::routers::fcm::error::FcmError;
use crate::routers::fcm::router::FcmRouter;
Expand Down Expand Up @@ -342,7 +342,7 @@ mod tests {
async fn successful_gcm_fallback() {
let auth_key = "AIzaSyB0ecSrqnEDXQ7yjLXqVc0CUGOeSlq9BsM"; // this is a nonce value used only for testing.
let registration_id = "AAAAAAAAAAAAAAAAAAAAAAAAAAAAAA";
let project_id = GCM_PROJECT_ID;
// let project_id = GCM_PROJECT_ID;
let ddb = MockDbClient::new().into_boxed_arc();
let router = make_router(make_service_key(), auth_key.to_owned(), ddb).await;
assert!(router.active());
Expand All @@ -357,9 +357,12 @@ mod tests {
})
.to_string();
let _token_mock = mock_token_endpoint();
let fcm_mock = mock_fcm_endpoint_builder(project_id)
let fcm_mock = mock_gcm_endpoint_builder()
.match_header("Authorization", format!("key={}", &auth_key).as_str())
.match_header("Content-Type", "application/json")
.with_body(
r#"{ "multicast_id": 216,"success":1,"failure":0,"canonical_ids":0,"results":[{"message_id":"1:02"}]}"#,
)
.match_body(body.as_str())
.create();
let notification = make_notification(
Expand Down
6 changes: 6 additions & 0 deletions autoendpoint/src/routers/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,9 @@ pub enum RouterError {
#[error("Bridge authentication error")]
Authentication,

#[error("GCM Bridge authentication error")]
GCMAuthentication,

#[error("Bridge request timeout")]
RequestTimeout,

Expand Down Expand Up @@ -123,6 +126,7 @@ impl RouterError {
RouterError::TooMuchData(_) => StatusCode::PAYLOAD_TOO_LARGE,

RouterError::Authentication
| RouterError::GCMAuthentication
| RouterError::RequestTimeout
| RouterError::Connect(_)
| RouterError::Upstream { .. } => StatusCode::BAD_GATEWAY,
Expand Down Expand Up @@ -150,6 +154,8 @@ impl RouterError {

RouterError::RequestTimeout => Some(903),

RouterError::GCMAuthentication => Some(904),

RouterError::Upstream { .. } => None,
}
}
Expand Down
5 changes: 2 additions & 3 deletions autoendpoint/src/routes/webpush.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,15 +12,14 @@ use actix_web::HttpResponse;
pub async fn webpush_route(
notification: Notification,
routers: Routers,
_state: Data<ServerState>,
) -> ApiResult<HttpResponse> {
let router = routers.get(
RouterType::from_str(&notification.subscription.user.router_type)
.map_err(|_| ApiErrorKind::InvalidRouterType)?,
);

let response = router.route_notification(&notification).await?;

Ok(response.into())
Ok(router.route_notification(&notification).await?.into())
}

/// Handle the `DELETE /m/{message_id}` route
Expand Down

0 comments on commit 96cef48

Please sign in to comment.