From e5b80d6a96c625ffcdf3768f4ba415d836457d8d Mon Sep 17 00:00:00 2001 From: Anton Pirker Date: Mon, 3 Oct 2022 16:50:46 +0200 Subject: [PATCH] Use content-length header in ASGI instead of reading request body (#1646, #1631, #1595, #1573) * Do not read request body to determine content length. * Made AnnotatedValue understandable --- sentry_sdk/integrations/_wsgi_common.py | 19 ++---- sentry_sdk/integrations/aiohttp.py | 5 +- sentry_sdk/integrations/aws_lambda.py | 2 +- sentry_sdk/integrations/gcp.py | 2 +- sentry_sdk/integrations/starlette.py | 58 ++++++++----------- sentry_sdk/utils.py | 39 +++++++++++++ tests/integrations/bottle/test_bottle.py | 9 +-- tests/integrations/django/test_basic.py | 3 +- tests/integrations/flask/test_flask.py | 8 +-- tests/integrations/pyramid/test_pyramid.py | 4 +- .../integrations/starlette/test_starlette.py | 18 +++--- 11 files changed, 87 insertions(+), 80 deletions(-) diff --git a/sentry_sdk/integrations/_wsgi_common.py b/sentry_sdk/integrations/_wsgi_common.py index 4f253acc35..1b7b222f18 100644 --- a/sentry_sdk/integrations/_wsgi_common.py +++ b/sentry_sdk/integrations/_wsgi_common.py @@ -64,19 +64,13 @@ def extract_into_event(self, event): request_info["cookies"] = dict(self.cookies()) if not request_body_within_bounds(client, content_length): - data = AnnotatedValue( - "", - {"rem": [["!config", "x", 0, content_length]], "len": content_length}, - ) + data = AnnotatedValue.removed_because_over_size_limit() else: parsed_body = self.parsed_body() if parsed_body is not None: data = parsed_body elif self.raw_data(): - data = AnnotatedValue( - "", - {"rem": [["!raw", "x", 0, content_length]], "len": content_length}, - ) + data = AnnotatedValue.removed_because_raw_data() else: data = None @@ -110,11 +104,8 @@ def parsed_body(self): files = self.files() if form or files: data = dict(iteritems(form)) - for k, v in iteritems(files): - size = self.size_of_file(v) - data[k] = AnnotatedValue( - "", {"len": size, "rem": [["!raw", "x", 0, size]]} - ) + for key, _ in iteritems(files): + data[key] = AnnotatedValue.removed_because_raw_data() return data @@ -175,7 +166,7 @@ def _filter_headers(headers): k: ( v if k.upper().replace("-", "_") not in SENSITIVE_HEADERS - else AnnotatedValue("", {"rem": [["!config", "x", 0, len(v)]]}) + else AnnotatedValue.removed_because_over_size_limit() ) for k, v in iteritems(headers) } diff --git a/sentry_sdk/integrations/aiohttp.py b/sentry_sdk/integrations/aiohttp.py index f07790173d..c9a637eeb4 100644 --- a/sentry_sdk/integrations/aiohttp.py +++ b/sentry_sdk/integrations/aiohttp.py @@ -218,11 +218,8 @@ def get_aiohttp_request_data(hub, request): if bytes_body is not None: # we have body to show if not request_body_within_bounds(hub.client, len(bytes_body)): + return AnnotatedValue.removed_because_over_size_limit() - return AnnotatedValue( - "", - {"rem": [["!config", "x", 0, len(bytes_body)]], "len": len(bytes_body)}, - ) encoding = request.charset or "utf-8" return bytes_body.decode(encoding, "replace") diff --git a/sentry_sdk/integrations/aws_lambda.py b/sentry_sdk/integrations/aws_lambda.py index 8f41ce52cb..365247781c 100644 --- a/sentry_sdk/integrations/aws_lambda.py +++ b/sentry_sdk/integrations/aws_lambda.py @@ -377,7 +377,7 @@ def event_processor(sentry_event, hint, start_time=start_time): if aws_event.get("body", None): # Unfortunately couldn't find a way to get structured body from AWS # event. Meaning every body is unstructured to us. - request["data"] = AnnotatedValue("", {"rem": [["!raw", "x", 0, 0]]}) + request["data"] = AnnotatedValue.removed_because_raw_data() sentry_event["request"] = request diff --git a/sentry_sdk/integrations/gcp.py b/sentry_sdk/integrations/gcp.py index e401daa9ca..6025d38c45 100644 --- a/sentry_sdk/integrations/gcp.py +++ b/sentry_sdk/integrations/gcp.py @@ -190,7 +190,7 @@ def event_processor(event, hint): if hasattr(gcp_event, "data"): # Unfortunately couldn't find a way to get structured body from GCP # event. Meaning every body is unstructured to us. - request["data"] = AnnotatedValue("", {"rem": [["!raw", "x", 0, 0]]}) + request["data"] = AnnotatedValue.removed_because_raw_data() event["request"] = request diff --git a/sentry_sdk/integrations/starlette.py b/sentry_sdk/integrations/starlette.py index 2d23250fa0..28993611e6 100644 --- a/sentry_sdk/integrations/starlette.py +++ b/sentry_sdk/integrations/starlette.py @@ -438,49 +438,40 @@ async def extract_request_info(self): if client is None: return None - data = None # type: Union[Dict[str, Any], AnnotatedValue, None] - - content_length = await self.content_length() request_info = {} # type: Dict[str, Any] with capture_internal_exceptions(): if _should_send_default_pii(): request_info["cookies"] = self.cookies() - if not request_body_within_bounds(client, content_length): - data = AnnotatedValue( - "", - { - "rem": [["!config", "x", 0, content_length]], - "len": content_length, - }, - ) - else: - parsed_body = await self.parsed_body() - if parsed_body is not None: - data = parsed_body - elif await self.raw_data(): - data = AnnotatedValue( - "", - { - "rem": [["!raw", "x", 0, content_length]], - "len": content_length, - }, - ) + content_length = await self.content_length() + + if content_length: + data = None # type: Union[Dict[str, Any], AnnotatedValue, None] + + if not request_body_within_bounds(client, content_length): + data = AnnotatedValue.removed_because_over_size_limit() + else: - data = None + parsed_body = await self.parsed_body() + if parsed_body is not None: + data = parsed_body + elif await self.raw_data(): + data = AnnotatedValue.removed_because_raw_data() + else: + data = None - if data is not None: - request_info["data"] = data + if data is not None: + request_info["data"] = data return request_info async def content_length(self): - # type: (StarletteRequestExtractor) -> int - raw_data = await self.raw_data() - if raw_data is None: - return 0 - return len(raw_data) + # type: (StarletteRequestExtractor) -> Optional[int] + if "content-length" in self.request.headers: + return int(self.request.headers["content-length"]) + + return None def cookies(self): # type: (StarletteRequestExtractor) -> Dict[str, Any] @@ -525,10 +516,7 @@ async def parsed_body(self): data = {} for key, val in iteritems(form): if isinstance(val, UploadFile): - size = len(await val.read()) - data[key] = AnnotatedValue( - "", {"len": size, "rem": [["!raw", "x", 0, size]]} - ) + data[key] = AnnotatedValue.removed_because_raw_data() else: data[key] = val diff --git a/sentry_sdk/utils.py b/sentry_sdk/utils.py index 05e620a0ca..5e74885b32 100644 --- a/sentry_sdk/utils.py +++ b/sentry_sdk/utils.py @@ -283,6 +283,13 @@ def to_header(self): class AnnotatedValue(object): + """ + Meta information for a data field in the event payload. + This is to tell Relay that we have tampered with the fields value. + See: + https://github.com/getsentry/relay/blob/be12cd49a0f06ea932ed9b9f93a655de5d6ad6d1/relay-general/src/types/meta.rs#L407-L423 + """ + __slots__ = ("value", "metadata") def __init__(self, value, metadata): @@ -290,6 +297,38 @@ def __init__(self, value, metadata): self.value = value self.metadata = metadata + @classmethod + def removed_because_raw_data(cls): + # type: () -> AnnotatedValue + """The value was removed because it could not be parsed. This is done for request body values that are not json nor a form.""" + return AnnotatedValue( + value="", + metadata={ + "rem": [ # Remark + [ + "!raw", # Unparsable raw data + "x", # The fields original value was removed + ] + ] + }, + ) + + @classmethod + def removed_because_over_size_limit(cls): + # type: () -> AnnotatedValue + """The actual value was removed because the size of the field exceeded the configured maximum size (specified with the request_bodies sdk option)""" + return AnnotatedValue( + value="", + metadata={ + "rem": [ # Remark + [ + "!config", # Because of configured maximum size + "x", # The fields original value was removed + ] + ] + }, + ) + if MYPY: from typing import TypeVar diff --git a/tests/integrations/bottle/test_bottle.py b/tests/integrations/bottle/test_bottle.py index 9a209fd896..dfd6e52f80 100644 --- a/tests/integrations/bottle/test_bottle.py +++ b/tests/integrations/bottle/test_bottle.py @@ -234,9 +234,7 @@ def index(): assert response[1] == "200 OK" (event,) = events - assert event["_meta"]["request"]["data"] == { - "": {"len": 2000, "rem": [["!config", "x", 0, 2000]]} - } + assert event["_meta"]["request"]["data"] == {"": {"rem": [["!config", "x"]]}} assert not event["request"]["data"] @@ -271,9 +269,8 @@ def index(): assert event["_meta"]["request"]["data"]["file"] == { "": { - "len": -1, - "rem": [["!raw", "x", 0, -1]], - } # bottle default content-length is -1 + "rem": [["!raw", "x"]], + } } assert not event["request"]["data"]["file"] diff --git a/tests/integrations/django/test_basic.py b/tests/integrations/django/test_basic.py index 7809239c30..a62f1bb073 100644 --- a/tests/integrations/django/test_basic.py +++ b/tests/integrations/django/test_basic.py @@ -520,8 +520,7 @@ def test_request_body(sentry_init, client, capture_events): assert event["message"] == "hi" assert event["request"]["data"] == "" assert event["_meta"]["request"]["data"][""] == { - "len": 6, - "rem": [["!raw", "x", 0, 6]], + "rem": [["!raw", "x"]], } del events[:] diff --git a/tests/integrations/flask/test_flask.py b/tests/integrations/flask/test_flask.py index be3e57c407..8983c4e5ff 100644 --- a/tests/integrations/flask/test_flask.py +++ b/tests/integrations/flask/test_flask.py @@ -414,9 +414,7 @@ def index(): assert response.status_code == 200 (event,) = events - assert event["_meta"]["request"]["data"] == { - "": {"len": 2000, "rem": [["!config", "x", 0, 2000]]} - } + assert event["_meta"]["request"]["data"] == {"": {"rem": [["!config", "x"]]}} assert not event["request"]["data"] @@ -445,9 +443,7 @@ def index(): } assert len(event["request"]["data"]["foo"]) == 1024 - assert event["_meta"]["request"]["data"]["file"] == { - "": {"len": 0, "rem": [["!raw", "x", 0, 0]]} - } + assert event["_meta"]["request"]["data"]["file"] == {"": {"rem": [["!raw", "x"]]}} assert not event["request"]["data"]["file"] diff --git a/tests/integrations/pyramid/test_pyramid.py b/tests/integrations/pyramid/test_pyramid.py index 495f19b16f..0f8755ac6b 100644 --- a/tests/integrations/pyramid/test_pyramid.py +++ b/tests/integrations/pyramid/test_pyramid.py @@ -213,9 +213,7 @@ def index(request): } assert len(event["request"]["data"]["foo"]) == 1024 - assert event["_meta"]["request"]["data"]["file"] == { - "": {"len": 0, "rem": [["!raw", "x", 0, 0]]} - } + assert event["_meta"]["request"]["data"]["file"] == {"": {"rem": [["!raw", "x"]]}} assert not event["request"]["data"]["file"] diff --git a/tests/integrations/starlette/test_starlette.py b/tests/integrations/starlette/test_starlette.py index 52d9ad4fe8..5908ebae52 100644 --- a/tests/integrations/starlette/test_starlette.py +++ b/tests/integrations/starlette/test_starlette.py @@ -56,9 +56,7 @@ PARSED_BODY = { "username": "Jane", "password": "hello123", - "photo": AnnotatedValue( - "", {"len": 28023, "rem": [["!raw", "x", 0, 28023]]} - ), # size of photo.jpg read above + "photo": AnnotatedValue("", {"rem": [["!raw", "x"]]}), } # Dummy ASGI scope for creating mock Starlette requests @@ -160,7 +158,11 @@ async def test_starlettrequestextractor_content_length(sentry_init): "starlette.requests.Request.stream", return_value=AsyncIterator(json.dumps(BODY_JSON)), ): - starlette_request = starlette.requests.Request(SCOPE) + scope = SCOPE.copy() + scope["headers"] = [ + [b"content-length", str(len(json.dumps(BODY_JSON))).encode()], + ] + starlette_request = starlette.requests.Request(scope) extractor = StarletteRequestExtractor(starlette_request) assert await extractor.content_length() == len(json.dumps(BODY_JSON)) @@ -266,6 +268,7 @@ async def test_starlettrequestextractor_extract_request_info_too_big(sentry_init scope = SCOPE.copy() scope["headers"] = [ [b"content-type", b"multipart/form-data; boundary=fd721ef49ea403a6"], + [b"content-length", str(len(BODY_FORM)).encode()], [b"cookie", b"yummy_cookie=choco; tasty_cookie=strawberry"], ] with mock.patch( @@ -283,10 +286,7 @@ async def test_starlettrequestextractor_extract_request_info_too_big(sentry_init "yummy_cookie": "choco", } # Because request is too big only the AnnotatedValue is extracted. - assert request_info["data"].metadata == { - "rem": [["!config", "x", 0, 28355]], - "len": 28355, - } + assert request_info["data"].metadata == {"rem": [["!config", "x"]]} @pytest.mark.asyncio @@ -298,6 +298,7 @@ async def test_starlettrequestextractor_extract_request_info(sentry_init): scope = SCOPE.copy() scope["headers"] = [ [b"content-type", b"application/json"], + [b"content-length", str(len(json.dumps(BODY_JSON))).encode()], [b"cookie", b"yummy_cookie=choco; tasty_cookie=strawberry"], ] @@ -327,6 +328,7 @@ async def test_starlettrequestextractor_extract_request_info_no_pii(sentry_init) scope = SCOPE.copy() scope["headers"] = [ [b"content-type", b"application/json"], + [b"content-length", str(len(json.dumps(BODY_JSON))).encode()], [b"cookie", b"yummy_cookie=choco; tasty_cookie=strawberry"], ]