Skip to content

Commit

Permalink
Use content-length header in ASGI instead of reading request body (#1646
Browse files Browse the repository at this point in the history
, #1631, #1595, #1573)

* Do not read request body to determine content length.
* Made AnnotatedValue understandable
  • Loading branch information
antonpirker authored Oct 3, 2022
1 parent 067d80c commit e5b80d6
Show file tree
Hide file tree
Showing 11 changed files with 87 additions and 80 deletions.
19 changes: 5 additions & 14 deletions sentry_sdk/integrations/_wsgi_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,19 +64,13 @@ def extract_into_event(self, event):
request_info["cookies"] = dict(self.cookies())

if not request_body_within_bounds(client, content_length):
data = AnnotatedValue(
"",
{"rem": [["!config", "x", 0, content_length]], "len": content_length},
)
data = AnnotatedValue.removed_because_over_size_limit()
else:
parsed_body = self.parsed_body()
if parsed_body is not None:
data = parsed_body
elif self.raw_data():
data = AnnotatedValue(
"",
{"rem": [["!raw", "x", 0, content_length]], "len": content_length},
)
data = AnnotatedValue.removed_because_raw_data()
else:
data = None

Expand Down Expand Up @@ -110,11 +104,8 @@ def parsed_body(self):
files = self.files()
if form or files:
data = dict(iteritems(form))
for k, v in iteritems(files):
size = self.size_of_file(v)
data[k] = AnnotatedValue(
"", {"len": size, "rem": [["!raw", "x", 0, size]]}
)
for key, _ in iteritems(files):
data[key] = AnnotatedValue.removed_because_raw_data()

return data

Expand Down Expand Up @@ -175,7 +166,7 @@ def _filter_headers(headers):
k: (
v
if k.upper().replace("-", "_") not in SENSITIVE_HEADERS
else AnnotatedValue("", {"rem": [["!config", "x", 0, len(v)]]})
else AnnotatedValue.removed_because_over_size_limit()
)
for k, v in iteritems(headers)
}
5 changes: 1 addition & 4 deletions sentry_sdk/integrations/aiohttp.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,11 +218,8 @@ def get_aiohttp_request_data(hub, request):
if bytes_body is not None:
# we have body to show
if not request_body_within_bounds(hub.client, len(bytes_body)):
return AnnotatedValue.removed_because_over_size_limit()

return AnnotatedValue(
"",
{"rem": [["!config", "x", 0, len(bytes_body)]], "len": len(bytes_body)},
)
encoding = request.charset or "utf-8"
return bytes_body.decode(encoding, "replace")

Expand Down
2 changes: 1 addition & 1 deletion sentry_sdk/integrations/aws_lambda.py
Original file line number Diff line number Diff line change
Expand Up @@ -377,7 +377,7 @@ def event_processor(sentry_event, hint, start_time=start_time):
if aws_event.get("body", None):
# Unfortunately couldn't find a way to get structured body from AWS
# event. Meaning every body is unstructured to us.
request["data"] = AnnotatedValue("", {"rem": [["!raw", "x", 0, 0]]})
request["data"] = AnnotatedValue.removed_because_raw_data()

sentry_event["request"] = request

Expand Down
2 changes: 1 addition & 1 deletion sentry_sdk/integrations/gcp.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,7 @@ def event_processor(event, hint):
if hasattr(gcp_event, "data"):
# Unfortunately couldn't find a way to get structured body from GCP
# event. Meaning every body is unstructured to us.
request["data"] = AnnotatedValue("", {"rem": [["!raw", "x", 0, 0]]})
request["data"] = AnnotatedValue.removed_because_raw_data()

event["request"] = request

Expand Down
58 changes: 23 additions & 35 deletions sentry_sdk/integrations/starlette.py
Original file line number Diff line number Diff line change
Expand Up @@ -438,49 +438,40 @@ async def extract_request_info(self):
if client is None:
return None

data = None # type: Union[Dict[str, Any], AnnotatedValue, None]

content_length = await self.content_length()
request_info = {} # type: Dict[str, Any]

with capture_internal_exceptions():
if _should_send_default_pii():
request_info["cookies"] = self.cookies()

if not request_body_within_bounds(client, content_length):
data = AnnotatedValue(
"",
{
"rem": [["!config", "x", 0, content_length]],
"len": content_length,
},
)
else:
parsed_body = await self.parsed_body()
if parsed_body is not None:
data = parsed_body
elif await self.raw_data():
data = AnnotatedValue(
"",
{
"rem": [["!raw", "x", 0, content_length]],
"len": content_length,
},
)
content_length = await self.content_length()

if content_length:
data = None # type: Union[Dict[str, Any], AnnotatedValue, None]

if not request_body_within_bounds(client, content_length):
data = AnnotatedValue.removed_because_over_size_limit()

else:
data = None
parsed_body = await self.parsed_body()
if parsed_body is not None:
data = parsed_body
elif await self.raw_data():
data = AnnotatedValue.removed_because_raw_data()
else:
data = None

if data is not None:
request_info["data"] = data
if data is not None:
request_info["data"] = data

return request_info

async def content_length(self):
# type: (StarletteRequestExtractor) -> int
raw_data = await self.raw_data()
if raw_data is None:
return 0
return len(raw_data)
# type: (StarletteRequestExtractor) -> Optional[int]
if "content-length" in self.request.headers:
return int(self.request.headers["content-length"])

return None

def cookies(self):
# type: (StarletteRequestExtractor) -> Dict[str, Any]
Expand Down Expand Up @@ -525,10 +516,7 @@ async def parsed_body(self):
data = {}
for key, val in iteritems(form):
if isinstance(val, UploadFile):
size = len(await val.read())
data[key] = AnnotatedValue(
"", {"len": size, "rem": [["!raw", "x", 0, size]]}
)
data[key] = AnnotatedValue.removed_because_raw_data()
else:
data[key] = val

Expand Down
39 changes: 39 additions & 0 deletions sentry_sdk/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -283,13 +283,52 @@ def to_header(self):


class AnnotatedValue(object):
"""
Meta information for a data field in the event payload.
This is to tell Relay that we have tampered with the fields value.
See:
https://github.com/getsentry/relay/blob/be12cd49a0f06ea932ed9b9f93a655de5d6ad6d1/relay-general/src/types/meta.rs#L407-L423
"""

__slots__ = ("value", "metadata")

def __init__(self, value, metadata):
# type: (Optional[Any], Dict[str, Any]) -> None
self.value = value
self.metadata = metadata

@classmethod
def removed_because_raw_data(cls):
# type: () -> AnnotatedValue
"""The value was removed because it could not be parsed. This is done for request body values that are not json nor a form."""
return AnnotatedValue(
value="",
metadata={
"rem": [ # Remark
[
"!raw", # Unparsable raw data
"x", # The fields original value was removed
]
]
},
)

@classmethod
def removed_because_over_size_limit(cls):
# type: () -> AnnotatedValue
"""The actual value was removed because the size of the field exceeded the configured maximum size (specified with the request_bodies sdk option)"""
return AnnotatedValue(
value="",
metadata={
"rem": [ # Remark
[
"!config", # Because of configured maximum size
"x", # The fields original value was removed
]
]
},
)


if MYPY:
from typing import TypeVar
Expand Down
9 changes: 3 additions & 6 deletions tests/integrations/bottle/test_bottle.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,9 +234,7 @@ def index():
assert response[1] == "200 OK"

(event,) = events
assert event["_meta"]["request"]["data"] == {
"": {"len": 2000, "rem": [["!config", "x", 0, 2000]]}
}
assert event["_meta"]["request"]["data"] == {"": {"rem": [["!config", "x"]]}}
assert not event["request"]["data"]


Expand Down Expand Up @@ -271,9 +269,8 @@ def index():

assert event["_meta"]["request"]["data"]["file"] == {
"": {
"len": -1,
"rem": [["!raw", "x", 0, -1]],
} # bottle default content-length is -1
"rem": [["!raw", "x"]],
}
}
assert not event["request"]["data"]["file"]

Expand Down
3 changes: 1 addition & 2 deletions tests/integrations/django/test_basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -520,8 +520,7 @@ def test_request_body(sentry_init, client, capture_events):
assert event["message"] == "hi"
assert event["request"]["data"] == ""
assert event["_meta"]["request"]["data"][""] == {
"len": 6,
"rem": [["!raw", "x", 0, 6]],
"rem": [["!raw", "x"]],
}

del events[:]
Expand Down
8 changes: 2 additions & 6 deletions tests/integrations/flask/test_flask.py
Original file line number Diff line number Diff line change
Expand Up @@ -414,9 +414,7 @@ def index():
assert response.status_code == 200

(event,) = events
assert event["_meta"]["request"]["data"] == {
"": {"len": 2000, "rem": [["!config", "x", 0, 2000]]}
}
assert event["_meta"]["request"]["data"] == {"": {"rem": [["!config", "x"]]}}
assert not event["request"]["data"]


Expand Down Expand Up @@ -445,9 +443,7 @@ def index():
}
assert len(event["request"]["data"]["foo"]) == 1024

assert event["_meta"]["request"]["data"]["file"] == {
"": {"len": 0, "rem": [["!raw", "x", 0, 0]]}
}
assert event["_meta"]["request"]["data"]["file"] == {"": {"rem": [["!raw", "x"]]}}
assert not event["request"]["data"]["file"]


Expand Down
4 changes: 1 addition & 3 deletions tests/integrations/pyramid/test_pyramid.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,9 +213,7 @@ def index(request):
}
assert len(event["request"]["data"]["foo"]) == 1024

assert event["_meta"]["request"]["data"]["file"] == {
"": {"len": 0, "rem": [["!raw", "x", 0, 0]]}
}
assert event["_meta"]["request"]["data"]["file"] == {"": {"rem": [["!raw", "x"]]}}
assert not event["request"]["data"]["file"]


Expand Down
18 changes: 10 additions & 8 deletions tests/integrations/starlette/test_starlette.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,9 +56,7 @@
PARSED_BODY = {
"username": "Jane",
"password": "hello123",
"photo": AnnotatedValue(
"", {"len": 28023, "rem": [["!raw", "x", 0, 28023]]}
), # size of photo.jpg read above
"photo": AnnotatedValue("", {"rem": [["!raw", "x"]]}),
}

# Dummy ASGI scope for creating mock Starlette requests
Expand Down Expand Up @@ -160,7 +158,11 @@ async def test_starlettrequestextractor_content_length(sentry_init):
"starlette.requests.Request.stream",
return_value=AsyncIterator(json.dumps(BODY_JSON)),
):
starlette_request = starlette.requests.Request(SCOPE)
scope = SCOPE.copy()
scope["headers"] = [
[b"content-length", str(len(json.dumps(BODY_JSON))).encode()],
]
starlette_request = starlette.requests.Request(scope)
extractor = StarletteRequestExtractor(starlette_request)

assert await extractor.content_length() == len(json.dumps(BODY_JSON))
Expand Down Expand Up @@ -266,6 +268,7 @@ async def test_starlettrequestextractor_extract_request_info_too_big(sentry_init
scope = SCOPE.copy()
scope["headers"] = [
[b"content-type", b"multipart/form-data; boundary=fd721ef49ea403a6"],
[b"content-length", str(len(BODY_FORM)).encode()],
[b"cookie", b"yummy_cookie=choco; tasty_cookie=strawberry"],
]
with mock.patch(
Expand All @@ -283,10 +286,7 @@ async def test_starlettrequestextractor_extract_request_info_too_big(sentry_init
"yummy_cookie": "choco",
}
# Because request is too big only the AnnotatedValue is extracted.
assert request_info["data"].metadata == {
"rem": [["!config", "x", 0, 28355]],
"len": 28355,
}
assert request_info["data"].metadata == {"rem": [["!config", "x"]]}


@pytest.mark.asyncio
Expand All @@ -298,6 +298,7 @@ async def test_starlettrequestextractor_extract_request_info(sentry_init):
scope = SCOPE.copy()
scope["headers"] = [
[b"content-type", b"application/json"],
[b"content-length", str(len(json.dumps(BODY_JSON))).encode()],
[b"cookie", b"yummy_cookie=choco; tasty_cookie=strawberry"],
]

Expand Down Expand Up @@ -327,6 +328,7 @@ async def test_starlettrequestextractor_extract_request_info_no_pii(sentry_init)
scope = SCOPE.copy()
scope["headers"] = [
[b"content-type", b"application/json"],
[b"content-length", str(len(json.dumps(BODY_JSON))).encode()],
[b"cookie", b"yummy_cookie=choco; tasty_cookie=strawberry"],
]

Expand Down

0 comments on commit e5b80d6

Please sign in to comment.