Skip to content

Commit

Permalink
ref: Refactor ASGI middleware and improve contextvars error message (#…
Browse files Browse the repository at this point in the history
…701)

Found multiple issues with the asgi middleware:

    lack of warning if contextvars are broken -- as part of that I refactored/unified the error message we give in such situations, also added more information as gevent just recently released a version that deals with contextvars better
    exposed methods that were meant for overriding.. but all that is done in there can be done in event processors, so we make them private

Fix #630
Fix #700
Fix #694
  • Loading branch information
untitaker authored Jun 3, 2020
1 parent 8326668 commit 36ed64e
Show file tree
Hide file tree
Showing 8 changed files with 190 additions and 64 deletions.
5 changes: 3 additions & 2 deletions sentry_sdk/integrations/aiohttp.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
event_from_exception,
transaction_from_function,
HAS_REAL_CONTEXTVARS,
CONTEXTVARS_ERROR_MESSAGE,
AnnotatedValue,
)

Expand Down Expand Up @@ -60,9 +61,9 @@ def setup_once():
if not HAS_REAL_CONTEXTVARS:
# We better have contextvars or we're going to leak state between
# requests.
raise RuntimeError(
raise DidNotEnable(
"The aiohttp integration for Sentry requires Python 3.7+ "
" or aiocontextvars package"
" or aiocontextvars package." + CONTEXTVARS_ERROR_MESSAGE
)

ignore_logger("aiohttp.server")
Expand Down
116 changes: 78 additions & 38 deletions sentry_sdk/integrations/asgi.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,13 @@
from sentry_sdk._types import MYPY
from sentry_sdk.hub import Hub, _should_send_default_pii
from sentry_sdk.integrations._wsgi_common import _filter_headers
from sentry_sdk.utils import ContextVar, event_from_exception, transaction_from_function
from sentry_sdk.utils import (
ContextVar,
event_from_exception,
transaction_from_function,
HAS_REAL_CONTEXTVARS,
CONTEXTVARS_ERROR_MESSAGE,
)
from sentry_sdk.tracing import Span

if MYPY:
Expand All @@ -21,11 +27,15 @@
from typing import Optional
from typing import Callable

from typing_extensions import Literal

from sentry_sdk._types import Event, Hint


_asgi_middleware_applied = ContextVar("sentry_asgi_middleware_applied")

_DEFAULT_TRANSACTION_NAME = "generic ASGI request"


def _capture_exception(hub, exc):
# type: (Hub, Any) -> None
Expand Down Expand Up @@ -59,8 +69,23 @@ def _looks_like_asgi3(app):
class SentryAsgiMiddleware:
__slots__ = ("app", "__call__")

def __init__(self, app):
# type: (Any) -> None
def __init__(self, app, unsafe_context_data=False):
# type: (Any, bool) -> None
"""
Instrument an ASGI application with Sentry. Provides HTTP/websocket
data to sent events and basic handling for exceptions bubbling up
through the middleware.
:param unsafe_context_data: Disable errors when a proper contextvars installation could not be found. We do not recommend changing this from the default.
"""

if not unsafe_context_data and not HAS_REAL_CONTEXTVARS:
# We better have contextvars or we're going to leak state between
# requests.
raise RuntimeError(
"The ASGI middleware for Sentry requires Python 3.7+ "
"or the aiocontextvars package." + CONTEXTVARS_ERROR_MESSAGE
)
self.app = app

if _looks_like_asgi3(app):
Expand Down Expand Up @@ -95,15 +120,17 @@ async def _run_app(self, scope, callback):
processor = partial(self.event_processor, asgi_scope=scope)
sentry_scope.add_event_processor(processor)

if scope["type"] in ("http", "websocket"):
ty = scope["type"]

if ty in ("http", "websocket"):
span = Span.continue_from_headers(dict(scope["headers"]))
span.op = "{}.server".format(scope["type"])
span.op = "{}.server".format(ty)
else:
span = Span()
span.op = "asgi.server"

span.set_tag("asgi.type", scope["type"])
span.transaction = "generic ASGI request"
span.set_tag("asgi.type", ty)
span.transaction = _DEFAULT_TRANSACTION_NAME

with hub.start_span(span) as span:
# XXX: Would be cool to have correct span status, but we
Expand All @@ -121,38 +148,55 @@ def event_processor(self, event, hint, asgi_scope):
# type: (Event, Hint, Any) -> Optional[Event]
request_info = event.get("request", {})

if asgi_scope["type"] in ("http", "websocket"):
request_info["url"] = self.get_url(asgi_scope)
request_info["method"] = asgi_scope["method"]
request_info["headers"] = _filter_headers(self.get_headers(asgi_scope))
request_info["query_string"] = self.get_query(asgi_scope)

if asgi_scope.get("client") and _should_send_default_pii():
request_info["env"] = {"REMOTE_ADDR": asgi_scope["client"][0]}

if asgi_scope.get("endpoint"):
ty = asgi_scope["type"]
if ty in ("http", "websocket"):
request_info["method"] = asgi_scope.get("method")
request_info["headers"] = headers = _filter_headers(
self._get_headers(asgi_scope)
)
request_info["query_string"] = self._get_query(asgi_scope)

request_info["url"] = self._get_url(
asgi_scope, "http" if ty == "http" else "ws", headers.get("host")
)

client = asgi_scope.get("client")
if client and _should_send_default_pii():
request_info["env"] = {"REMOTE_ADDR": client[0]}

if (
event.get("transaction", _DEFAULT_TRANSACTION_NAME)
== _DEFAULT_TRANSACTION_NAME
):
endpoint = asgi_scope.get("endpoint")
# Webframeworks like Starlette mutate the ASGI env once routing is
# done, which is sometime after the request has started. If we have
# an endpoint, overwrite our path-based transaction name.
event["transaction"] = self.get_transaction(asgi_scope)
# an endpoint, overwrite our generic transaction name.
if endpoint:
event["transaction"] = transaction_from_function(endpoint)

event["request"] = request_info

return event

def get_url(self, scope):
# type: (Any) -> str
# Helper functions for extracting request data.
#
# Note: Those functions are not public API. If you want to mutate request
# data to your liking it's recommended to use the `before_send` callback
# for that.

def _get_url(self, scope, default_scheme, host):
# type: (Dict[str, Any], Literal["ws", "http"], Optional[str]) -> str
"""
Extract URL from the ASGI scope, without also including the querystring.
"""
scheme = scope.get("scheme", "http")
scheme = scope.get("scheme", default_scheme)

server = scope.get("server", None)
path = scope.get("root_path", "") + scope["path"]
path = scope.get("root_path", "") + scope.get("path", "")

for key, value in scope["headers"]:
if key == b"host":
host_header = value.decode("latin-1")
return "%s://%s%s" % (scheme, host_header, path)
if host:
return "%s://%s%s" % (scheme, host, path)

if server is not None:
host, port = server
Expand All @@ -162,15 +206,18 @@ def get_url(self, scope):
return "%s://%s%s" % (scheme, host, path)
return path

def get_query(self, scope):
def _get_query(self, scope):
# type: (Any) -> Any
"""
Extract querystring from the ASGI scope, in the format that the Sentry protocol expects.
"""
return urllib.parse.unquote(scope["query_string"].decode("latin-1"))
qs = scope.get("query_string")
if not qs:
return None
return urllib.parse.unquote(qs.decode("latin-1"))

def get_headers(self, scope):
# type: (Any) -> Dict[str, Any]
def _get_headers(self, scope):
# type: (Any) -> Dict[str, str]
"""
Extract headers from the ASGI scope, in the format that the Sentry protocol expects.
"""
Expand All @@ -183,10 +230,3 @@ def get_headers(self, scope):
else:
headers[key] = value
return headers

def get_transaction(self, scope):
# type: (Any) -> Optional[str]
"""
Return a transaction string to identify the routed endpoint.
"""
return transaction_from_function(scope["endpoint"])
18 changes: 9 additions & 9 deletions sentry_sdk/integrations/django/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
from sentry_sdk.tracing import record_sql_queries
from sentry_sdk.utils import (
HAS_REAL_CONTEXTVARS,
CONTEXTVARS_ERROR_MESSAGE,
logger,
capture_internal_exceptions,
event_from_exception,
Expand Down Expand Up @@ -301,11 +302,12 @@ def _patch_channels():
# requests.
#
# We cannot hard-raise here because channels may not be used at all in
# the current process.
# the current process. That is the case when running traditional WSGI
# workers in gunicorn+gevent and the websocket stuff in a separate
# process.
logger.warning(
"We detected that you are using Django channels 2.0. To get proper "
"instrumentation for ASGI requests, the Sentry SDK requires "
"Python 3.7+ or the aiocontextvars package from PyPI."
"We detected that you are using Django channels 2.0."
+ CONTEXTVARS_ERROR_MESSAGE
)

from sentry_sdk.integrations.django.asgi import patch_channels_asgi_handler_impl
Expand All @@ -324,12 +326,10 @@ def _patch_django_asgi_handler():
# We better have contextvars or we're going to leak state between
# requests.
#
# We cannot hard-raise here because Django may not be used at all in
# the current process.
# We cannot hard-raise here because Django's ASGI stuff may not be used
# at all.
logger.warning(
"We detected that you are using Django 3. To get proper "
"instrumentation for ASGI requests, the Sentry SDK requires "
"Python 3.7+ or the aiocontextvars package from PyPI."
"We detected that you are using Django 3." + CONTEXTVARS_ERROR_MESSAGE
)

from sentry_sdk.integrations.django.asgi import patch_django_asgi_handler_impl
Expand Down
8 changes: 6 additions & 2 deletions sentry_sdk/integrations/django/asgi.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,9 @@ async def sentry_patched_asgi_handler(self, scope, receive, send):
if Hub.current.get_integration(DjangoIntegration) is None:
return await old_app(self, scope, receive, send)

middleware = SentryAsgiMiddleware(old_app.__get__(self, cls))._run_asgi3
middleware = SentryAsgiMiddleware(
old_app.__get__(self, cls), unsafe_context_data=True
)._run_asgi3
return await middleware(scope, receive, send)

cls.__call__ = sentry_patched_asgi_handler
Expand All @@ -40,7 +42,9 @@ async def sentry_patched_asgi_handler(self, receive, send):
if Hub.current.get_integration(DjangoIntegration) is None:
return await old_app(self, receive, send)

middleware = SentryAsgiMiddleware(lambda _scope: old_app.__get__(self, cls))
middleware = SentryAsgiMiddleware(
lambda _scope: old_app.__get__(self, cls), unsafe_context_data=True
)

return await middleware(self.scope)(receive, send)

Expand Down
3 changes: 2 additions & 1 deletion sentry_sdk/integrations/sanic.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
capture_internal_exceptions,
event_from_exception,
HAS_REAL_CONTEXTVARS,
CONTEXTVARS_ERROR_MESSAGE,
)
from sentry_sdk.integrations import Integration, DidNotEnable
from sentry_sdk.integrations._wsgi_common import RequestExtractor, _filter_headers
Expand Down Expand Up @@ -55,7 +56,7 @@ def setup_once():
# requests.
raise DidNotEnable(
"The sanic integration for Sentry requires Python 3.7+ "
" or aiocontextvars package"
" or the aiocontextvars package." + CONTEXTVARS_ERROR_MESSAGE
)

if SANIC_VERSION.startswith("0.8."):
Expand Down
4 changes: 3 additions & 1 deletion sentry_sdk/integrations/tornado.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from sentry_sdk.hub import Hub, _should_send_default_pii
from sentry_sdk.utils import (
HAS_REAL_CONTEXTVARS,
CONTEXTVARS_ERROR_MESSAGE,
event_from_exception,
capture_internal_exceptions,
transaction_from_function,
Expand Down Expand Up @@ -48,7 +49,8 @@ def setup_once():
# Tornado is async. We better have contextvars or we're going to leak
# state between requests.
raise DidNotEnable(
"The tornado integration for Sentry requires Python 3.6+ or the aiocontextvars package"
"The tornado integration for Sentry requires Python 3.7+ or the aiocontextvars package"
+ CONTEXTVARS_ERROR_MESSAGE
)

ignore_logger("tornado.access")
Expand Down
38 changes: 28 additions & 10 deletions sentry_sdk/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -724,10 +724,15 @@ def strip_string(value, max_length=None):

def _is_contextvars_broken():
# type: () -> bool
"""
Returns whether gevent/eventlet have patched the stdlib in a way where thread locals are now more "correct" than contextvars.
"""
try:
from gevent.monkey import is_object_patched # type: ignore

if is_object_patched("threading", "local"):
# Gevent 20.5 is able to patch both thread locals and contextvars,
# in that case all is good.
if is_object_patched("contextvars", "ContextVar"):
return False

Expand All @@ -749,31 +754,35 @@ def _is_contextvars_broken():
def _get_contextvars():
# type: () -> Tuple[bool, type]
"""
Try to import contextvars and use it if it's deemed safe. We should not use
contextvars if gevent or eventlet have patched thread locals, as
contextvars are unaffected by that patch.
Figure out the "right" contextvars installation to use. Returns a
`contextvars.ContextVar`-like class with a limited API.
https://github.com/gevent/gevent/issues/1407
See https://docs.sentry.io/platforms/python/contextvars/ for more information.
"""
if not _is_contextvars_broken():
# aiocontextvars is a PyPI package that ensures that the contextvars
# backport (also a PyPI package) works with asyncio under Python 3.6
#
# Import it if available.
if not PY2 and sys.version_info < (3, 7):
if sys.version_info < (3, 7):
# `aiocontextvars` is absolutely required for functional
# contextvars on Python 3.6.
try:
from aiocontextvars import ContextVar # noqa

return True, ContextVar
except ImportError:
pass
else:
# On Python 3.7 contextvars are functional.
try:
from contextvars import ContextVar

try:
from contextvars import ContextVar
return True, ContextVar
except ImportError:
pass

return True, ContextVar
except ImportError:
pass
# Fall back to basic thread-local usage.

from threading import local

Expand All @@ -798,6 +807,15 @@ def set(self, value):

HAS_REAL_CONTEXTVARS, ContextVar = _get_contextvars()

CONTEXTVARS_ERROR_MESSAGE = """
With asyncio/ASGI applications, the Sentry SDK requires a functional
installation of `contextvars` to avoid leaking scope/context data across
requests.
Please refer to https://docs.sentry.io/platforms/python/contextvars/ for more information.
"""


def transaction_from_function(func):
# type: (Callable[..., Any]) -> Optional[str]
Expand Down
Loading

0 comments on commit 36ed64e

Please sign in to comment.