From 960301cb15442a0410ef40d204e256a8445f1507 Mon Sep 17 00:00:00 2001 From: jrconlin Date: Thu, 19 Oct 2017 10:33:58 -0700 Subject: [PATCH] feat: address HTTP2 errors in APNS Converts two errors into metrics and attempts retry on Connection or HTTP2 errors. Closes #1052 --- autopush/router/apns2.py | 66 +++++++++++++++++------------- autopush/router/apnsrouter.py | 33 +++++++-------- configs/autopush_shared.ini.sample | 9 ++-- 3 files changed, 61 insertions(+), 47 deletions(-) diff --git a/autopush/router/apns2.py b/autopush/router/apns2.py index 4a9184a3..22533525 100644 --- a/autopush/router/apns2.py +++ b/autopush/router/apns2.py @@ -41,7 +41,8 @@ def __init__(self, cert_file, key_file, topic, alt=False, use_sandbox=False, max_connections=APNS_MAX_CONNECTIONS, logger=None, metrics=None, - load_connections=True): + load_connections=True, + max_retry=2): """Create the APNS client connector. The cert_file and key_file can be derived from the exported `.p12` @@ -76,6 +77,8 @@ def __init__(self, cert_file, key_file, topic, :type metrics: autopush.metrics.IMetric :param load_connections: used for testing :type load_connections: bool + :param max_retry: Number of HTTP2 transmit attempts + :type max_retry: int """ self.server = SANDBOX if use_sandbox else SERVER @@ -84,6 +87,7 @@ def __init__(self, cert_file, key_file, topic, self.metrics = metrics self.topic = topic self._max_connections = max_connections + self._max_retry = max_retry self.connections = deque(maxlen=max_connections) if load_connections: self.ssl_context = hyper.tls.init_context(cert=(cert_file, @@ -129,33 +133,39 @@ def send(self, router_token, payload, apns_id, headers['apns-expiration'] = str(exp) url = '/3/device/' + router_token connection = self._get_connection() - try: - # request auto-opens closed connections, so if a connection - # has timed out or failed for other reasons, it's automatically - # re-established. - stream_id = connection.request( - 'POST', url=url, body=body, headers=headers) - # get_response() may return an AttributeError. Not really sure - # how it happens, but the connected socket may get set to None. - # We'll treat that as a premature socket closure. - response = connection.get_response(stream_id) - if response.status != 200: - reason = json.loads(response.read().decode('utf-8'))['reason'] - raise RouterException( - "APNS Transmit Error {}:{}".format(response.status, - reason), - status_code=502, - response_body="APNS could not process " - "your message {}".format(reason), - log_exception=False - ) - except (HTTP20Error, AttributeError): - connection.close() - raise - finally: - # Returning a closed connection to the pool is ok. - # hyper will reconnect on .request() - self._return_connection(connection) + attempt = 0 + while True: + try: + # request auto-opens closed connections, so if a connection + # has timed out or failed for other reasons, it's automatically + # re-established. + stream_id = connection.request( + 'POST', url=url, body=body, headers=headers) + # get_response() may return an AttributeError. Not really sure + # how it happens, but the connected socket may get set to None. + # We'll treat that as a premature socket closure. + response = connection.get_response(stream_id) + if response.status != 200: + reason = json.loads(response.read().decode('utf-8'))['reason'] + raise RouterException( + "APNS Transmit Error {}:{}".format(response.status, + reason), + status_code=502, + response_body="APNS could not process " + "your message {}".format(reason), + log_exception=False + ) + break + except HTTP20Error: + connection.close() + attempt += 1 + if attempt < self._max_retry: + continue + raise + finally: + # Returning a closed connection to the pool is ok. + # hyper will reconnect on .request() + self._return_connection(connection) def _get_connection(self): try: diff --git a/autopush/router/apnsrouter.py b/autopush/router/apnsrouter.py index fb5059ba..45dcbf0c 100644 --- a/autopush/router/apnsrouter.py +++ b/autopush/router/apnsrouter.py @@ -5,7 +5,6 @@ from hyper.http20.exceptions import ConnectionError, HTTP20Error from twisted.internet.threads import deferToThread from twisted.logger import Logger -from twisted.python.failure import Failure from autopush.exceptions import RouterException from autopush.metrics import make_tags @@ -46,7 +45,9 @@ def _connect(self, rel_channel, load_connections=True): topic=cert_info.get("topic", default_topic), logger=self.log, metrics=self.metrics, - load_connections=load_connections) + load_connections=load_connections, + max_retry=cert_info.get('max_retry', 2) + ) def __init__(self, conf, router_conf, metrics, load_connections=True): """Create a new APNS router and connect to APNS @@ -139,31 +140,31 @@ def _route(self, notification, router_data): "alert": {"title": " ", "body": " "} }) apns_id = str(uuid.uuid4()).lower() + # APNs may force close a connection on us without warning. + # if that happens, retry the message. + success = False try: apns_client.send(router_token=router_token, payload=payload, apns_id=apns_id) - except (ConnectionError, AttributeError) as ex: - self.metrics.increment("notification.bridge.error", + success = True + except ConnectionError: + self.metrics.increment("notification.bridge.connection.error", + tags=make_tags( + self._base_tags, + application=rel_channel, + reason="connection_error")) + except HTTP20Error: + self.metrics.increment("notification.bridge.connection.error", tags=make_tags(self._base_tags, application=rel_channel, - reason="connection_error")) - self.log.error("Connection Error sending to APNS", - log_failure=Failure(ex)) + reason="http2_error")) + if not success: raise RouterException( "Server error", status_code=502, response_body="APNS returned an error processing request", log_exception=False, ) - except HTTP20Error as ex: - self.log.error("HTTP2 Error sending to APNS", - log_failure=Failure(ex)) - raise RouterException( - "Server error", - status_code=502, - response_body="APNS returned an error processing request", - ) - location = "%s/m/%s" % (self.conf.endpoint_url, notification.version) self.metrics.increment("notification.bridge.sent", tags=make_tags(self._base_tags, diff --git a/configs/autopush_shared.ini.sample b/configs/autopush_shared.ini.sample index 442d46f3..6994d1e6 100644 --- a/configs/autopush_shared.ini.sample +++ b/configs/autopush_shared.ini.sample @@ -124,8 +124,11 @@ endpoint_port = 8082 ; "key": "path_to_key_file", ; "topic": "com.mozilla.org.Firefox", // Bundle ID for associated app ; "max_connections": 100, // Max number of connection pool entries. -; "sandbox": False}, // Use the APNs sandbox feature -; ... } -; e.g {"firefox":{"cert":"certs/main.cert","key":"certs/main.key","topic":"com.mozilla.org.Firefox"},"beta":{"cert":"certs/beta.cert","key":"certs/beta.key","topic":"com.mozilla.org.FirefoxBeta"}} +; "sandbox": False, // Use the APNs sandbox feature +; "max_retry": 2, // Max number of retries in event of an HTTP2 error +; }, +; ... +; } +; e.g {"firefox":{"cert":"certs/main.cert","key":"certs/main.key","topic":"com.mozilla.org.Firefox","max_retry":2},"beta":{"cert":"certs/beta.cert","key":"certs/beta.key","topic":"com.mozilla.org.FirefoxBeta"}} #apns_creds =