From 12c9a84ed916e4ebb06f9604f56ae9409ed8f2cc Mon Sep 17 00:00:00 2001 From: Marcelo Galigniana Date: Thu, 27 Oct 2022 00:27:21 -0300 Subject: [PATCH] fix(utils): strip_string() checks text length counting bytes not chars The truncation and indexes in the AnnotatedValues it's done by number of bytes and not number of characters. Fixes GH-1691 --- sentry_sdk/utils.py | 2 +- tests/utils/test_general.py | 21 +++++++++++++++++++++ 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/sentry_sdk/utils.py b/sentry_sdk/utils.py index 9b970a307d..c000a3bd2c 100644 --- a/sentry_sdk/utils.py +++ b/sentry_sdk/utils.py @@ -841,7 +841,7 @@ def strip_string(value, max_length=None): # This is intentionally not just the default such that one can patch `MAX_STRING_LENGTH` and affect `strip_string`. max_length = MAX_STRING_LENGTH - length = len(value) + length = len(value.encode("utf-8")) if length > max_length: return AnnotatedValue( diff --git a/tests/utils/test_general.py b/tests/utils/test_general.py index b85975b4bb..f2d0069ba3 100644 --- a/tests/utils/test_general.py +++ b/tests/utils/test_general.py @@ -15,6 +15,8 @@ iter_event_stacktraces, to_base64, from_base64, + strip_string, + AnnotatedValue, ) from sentry_sdk._compat import text_type, string_types @@ -217,3 +219,22 @@ def test_failed_base64_conversion(input): # failures if type(input) not in string_types: assert to_base64(input) is None + + +def test_strip_string(): + # If value is None returns None. + assert strip_string(None) is None + + # If max_length is not passed, returns the full text (up to 1024 bytes). + text_1024_long = "a" * 1024 + assert strip_string(text_1024_long).count("a") == 1024 + + # If value exceeds the max_length, returns an AnnotatedValue. + text_1025_long = "a" * 1025 + stripped_text = strip_string(text_1025_long) + assert isinstance(stripped_text, AnnotatedValue) + assert stripped_text.value.count("a") == 1021 # + '...' is 1024 + + # If text has unicode characters, it counts bytes and not number of characters. + text_with_unicode_character = "éê" + assert strip_string(text_with_unicode_character, max_length=2).value == "é..."