Skip to content

Commit

Permalink
Serialize vars early to avoid living references (#3409)
Browse files Browse the repository at this point in the history
  • Loading branch information
sl0thentr0py authored Aug 8, 2024
1 parent 5529c70 commit 7d46709
Show file tree
Hide file tree
Showing 6 changed files with 67 additions and 57 deletions.
16 changes: 8 additions & 8 deletions sentry_sdk/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,12 @@
from collections.abc import Mapping
from datetime import datetime, timezone
from importlib import import_module
from typing import cast

from sentry_sdk._compat import PY37, check_uwsgi_thread_support
from sentry_sdk.utils import (
capture_internal_exceptions,
current_stacktrace,
disable_capture_event,
format_timestamp,
get_sdk_name,
get_type_name,
Expand Down Expand Up @@ -525,10 +525,13 @@ def _prepare_event(
# Postprocess the event here so that annotated types do
# generally not surface in before_send
if event is not None:
event = serialize(
event,
max_request_body_size=self.options.get("max_request_body_size"),
max_value_length=self.options.get("max_value_length"),
event = cast(
"Event",
serialize(
cast("Dict[str, Any]", event),
max_request_body_size=self.options.get("max_request_body_size"),
max_value_length=self.options.get("max_value_length"),
),
)

before_send = self.options["before_send"]
Expand Down Expand Up @@ -726,9 +729,6 @@ def capture_event(
:returns: An event ID. May be `None` if there is no DSN set or of if the SDK decided to discard the event for other reasons. In such situations setting `debug=True` on `init()` may help.
"""
if disable_capture_event.get(False):
return None

if hint is None:
hint = {}
event_id = event.get("event_id")
Expand Down
3 changes: 2 additions & 1 deletion sentry_sdk/integrations/pure_eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,8 @@ def start(n):
atok = source.asttokens()

expressions.sort(key=closeness, reverse=True)
return {
vars = {
atok.get_text(nodes[0]): value
for nodes, value in expressions[: serializer.MAX_DATABAG_BREADTH]
}
return serializer.serialize(vars, is_vars=True)
10 changes: 10 additions & 0 deletions sentry_sdk/scope.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
capture_internal_exception,
capture_internal_exceptions,
ContextVar,
disable_capture_event,
event_from_exception,
exc_info_from_error,
logger,
Expand Down Expand Up @@ -1130,6 +1131,9 @@ def capture_event(self, event, hint=None, scope=None, **scope_kwargs):
:returns: An `event_id` if the SDK decided to send the event (see :py:meth:`sentry_sdk.client._Client.capture_event`).
"""
if disable_capture_event.get(False):
return None

scope = self._merge_scopes(scope, scope_kwargs)

event_id = self.get_client().capture_event(event=event, hint=hint, scope=scope)
Expand Down Expand Up @@ -1157,6 +1161,9 @@ def capture_message(self, message, level=None, scope=None, **scope_kwargs):
:returns: An `event_id` if the SDK decided to send the event (see :py:meth:`sentry_sdk.client._Client.capture_event`).
"""
if disable_capture_event.get(False):
return None

if level is None:
level = "info"

Expand All @@ -1182,6 +1189,9 @@ def capture_exception(self, error=None, scope=None, **scope_kwargs):
:returns: An `event_id` if the SDK decided to send the event (see :py:meth:`sentry_sdk.client._Client.capture_event`).
"""
if disable_capture_event.get(False):
return None

if error is not None:
exc_info = exc_info_from_error(error)
else:
Expand Down
74 changes: 27 additions & 47 deletions sentry_sdk/serializer.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
from typing import Type
from typing import Union

from sentry_sdk._types import NotImplementedType, Event
from sentry_sdk._types import NotImplementedType

Span = Dict[str, Any]

Expand Down Expand Up @@ -95,7 +95,25 @@ def __exit__(


def serialize(event, **kwargs):
# type: (Event, **Any) -> Event
# type: (Dict[str, Any], **Any) -> Dict[str, Any]
"""
A very smart serializer that takes a dict and emits a json-friendly dict.
Currently used for serializing the final Event and also prematurely while fetching the stack
local variables for each frame in a stacktrace.
It works internally with 'databags' which are arbitrary data structures like Mapping, Sequence and Set.
The algorithm itself is a recursive graph walk down the data structures it encounters.
It has the following responsibilities:
* Trimming databags and keeping them within MAX_DATABAG_BREADTH and MAX_DATABAG_DEPTH.
* Calling safe_repr() on objects appropriately to keep them informative and readable in the final payload.
* Annotating the payload with the _meta field whenever trimming happens.
:param max_request_body_size: If set to "always", will never trim request bodies.
:param max_value_length: The max length to strip strings to, defaults to sentry_sdk.consts.DEFAULT_MAX_VALUE_LENGTH
:param is_vars: If we're serializing vars early, we want to repr() things that are JSON-serializable to make their type more apparent. For example, it's useful to see the difference between a unicode-string and a bytestring when viewing a stacktrace.
"""
memo = Memo()
path = [] # type: List[Segment]
meta_stack = [] # type: List[Dict[str, Any]]
Expand All @@ -104,6 +122,7 @@ def serialize(event, **kwargs):
kwargs.pop("max_request_body_size", None) == "always"
) # type: bool
max_value_length = kwargs.pop("max_value_length", None) # type: Optional[int]
is_vars = kwargs.pop("is_vars", False)

def _annotate(**meta):
# type: (**Any) -> None
Expand All @@ -118,56 +137,17 @@ def _annotate(**meta):

meta_stack[-1].setdefault("", {}).update(meta)

def _should_repr_strings():
# type: () -> Optional[bool]
"""
By default non-serializable objects are going through
safe_repr(). For certain places in the event (local vars) we
want to repr() even things that are JSON-serializable to
make their type more apparent. For example, it's useful to
see the difference between a unicode-string and a bytestring
when viewing a stacktrace.
For container-types we still don't do anything different.
Generally we just try to make the Sentry UI present exactly
what a pretty-printed repr would look like.
:returns: `True` if we are somewhere in frame variables, and `False` if
we are in a position where we will never encounter frame variables
when recursing (for example, we're in `event.extra`). `None` if we
are not (yet) in frame variables, but might encounter them when
recursing (e.g. we're in `event.exception`)
"""
try:
p0 = path[0]
if p0 == "stacktrace" and path[1] == "frames" and path[3] == "vars":
return True

if (
p0 in ("threads", "exception")
and path[1] == "values"
and path[3] == "stacktrace"
and path[4] == "frames"
and path[6] == "vars"
):
return True
except IndexError:
return None

return False

def _is_databag():
# type: () -> Optional[bool]
"""
A databag is any value that we need to trim.
True for stuff like vars, request bodies, breadcrumbs and extra.
:returns: Works like `_should_repr_strings()`. `True` for "yes",
`False` for :"no", `None` for "maybe soon".
:returns: `True` for "yes", `False` for :"no", `None` for "maybe soon".
"""
try:
rv = _should_repr_strings()
if rv in (True, None):
return rv
if is_vars:
return True

is_request_body = _is_request_body()
if is_request_body in (True, None):
Expand Down Expand Up @@ -253,7 +233,7 @@ def _serialize_node_impl(
if isinstance(obj, AnnotatedValue):
should_repr_strings = False
if should_repr_strings is None:
should_repr_strings = _should_repr_strings()
should_repr_strings = is_vars

if is_databag is None:
is_databag = _is_databag()
Expand Down Expand Up @@ -387,7 +367,7 @@ def _serialize_node_impl(
disable_capture_event.set(True)
try:
serialized_event = _serialize_node(event, **kwargs)
if meta_stack and isinstance(serialized_event, dict):
if not is_vars and meta_stack and isinstance(serialized_event, dict):
serialized_event["_meta"] = meta_stack[0]

return serialized_event
Expand Down
4 changes: 3 additions & 1 deletion sentry_sdk/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -616,7 +616,9 @@ def serialize_frame(
)

if include_local_variables:
rv["vars"] = frame.f_locals.copy()
from sentry_sdk.serializer import serialize

rv["vars"] = serialize(dict(frame.f_locals), is_vars=True)

return rv

Expand Down
17 changes: 17 additions & 0 deletions tests/test_scrubber.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,3 +187,20 @@ def test_recursive_event_scrubber(sentry_init, capture_events):

(event,) = events
assert event["extra"]["deep"]["deeper"][0]["deepest"]["password"] == "'[Filtered]'"


def test_recursive_scrubber_does_not_override_original(sentry_init, capture_events):
sentry_init(event_scrubber=EventScrubber(recursive=True))
events = capture_events()

data = {"csrf": "secret"}
try:
raise RuntimeError("An error")
except Exception:
capture_exception()

(event,) = events
frames = event["exception"]["values"][0]["stacktrace"]["frames"]
(frame,) = frames
assert data["csrf"] == "secret"
assert frame["vars"]["data"]["csrf"] == "[Filtered]"

0 comments on commit 7d46709

Please sign in to comment.