From d0978b589034f653ec5d1b2acfa792ac50120d2a Mon Sep 17 00:00:00 2001 From: Tony Xiao Date: Wed, 19 Oct 2022 17:55:41 -0400 Subject: [PATCH 1/3] perf(profiling): Tune the sample profile generation code for performance We noticed that generating the sample format at the end of a profile can get rather slow and this aims to improve what we can here with minimal changes. A few things we took advantage of to accomplish this: - Turning the extracted stack into a tuple so it is hashable so it can be used as a dictionary key. This let's us check if the stack is indexed first, and skip indexing the frames again. This is especially effective in profiles where it's blocking on a network request for example, since there will be many identical stacks. - Using the hash of the stack as the dictionary key. Hashing the entire stack can be an expensive operation since a stack can have up to 128 frames. Using it as a dictionary key means it needs to be rehashed each time. To avoid this, we pre-hash the stack and use the hash as a dictionary key which is more efficient. - Convert numbers to strings ahead of time if we know have to. Values like the tid and elapsed since start ns needs to be sent as a string. However, many samples share the same value for it, and we're doing the conversion each time. Instead, we convert them to a string upfront and reuse it as needed in order to minimize unnecessary calculations. --- sentry_sdk/profiler.py | 69 ++++++++++++++++++++++-------------------- tests/test_profiler.py | 40 ++++++++++++------------ 2 files changed, 57 insertions(+), 52 deletions(-) diff --git a/sentry_sdk/profiler.py b/sentry_sdk/profiler.py index b9fc911878..c8d970ba50 100644 --- a/sentry_sdk/profiler.py +++ b/sentry_sdk/profiler.py @@ -45,7 +45,7 @@ from typing_extensions import TypedDict import sentry_sdk.tracing - RawSampleData = Tuple[int, Sequence[Tuple[int, Sequence[RawFrameData]]]] + RawSampleData = Tuple[int, Sequence[Tuple[str, Sequence[RawFrameData]]]] ProcessedStack = Tuple[int, ...] @@ -162,14 +162,14 @@ def extract_stack(frame, max_stack_depth=MAX_STACK_DEPTH): stack.append(frame) frame = frame.f_back - return [ + return tuple( RawFrameData( function=get_frame_name(frame), abs_path=frame.f_code.co_filename, lineno=frame.f_lineno, ) for frame in stack - ] + ) def get_frame_name(frame): @@ -324,7 +324,7 @@ def write(self, sample): def slice_profile(self, start_ns, stop_ns): # type: (int, int) -> ProcessedProfile samples = [] # type: List[ProcessedSample] - stacks = dict() # type: Dict[ProcessedStack, int] + stacks = dict() # type: Dict[int, int] stacks_list = list() # type: List[ProcessedStack] frames = dict() # type: Dict[RawFrameData, int] frames_list = list() # type: List[ProcessedFrame] @@ -334,39 +334,44 @@ def slice_profile(self, start_ns, stop_ns): # # Is it safe to assume that the samples are always in # chronological order and binary search the buffer? - for raw_sample in self.buffer: - if raw_sample is None: - continue - - ts = raw_sample[0] + for ts, sample in filter(None, self.buffer): if start_ns > ts or ts > stop_ns: continue - for tid, stack in raw_sample[1]: - current_stack = [] - - for frame in stack: - if frame not in frames: - frames[frame] = len(frames) - frames_list.append( - { - "function": frame.function, - "filename": frame.abs_path, - "lineno": frame.lineno, - } - ) - current_stack.append(frames[frame]) - - current_stack = tuple(current_stack) - if current_stack not in stacks: - stacks[current_stack] = len(stacks) - stacks_list.append(current_stack) + elapsed_since_start_ns = str(ts - start_ns) + + for tid, stack in sample: + # Instead of mapping the stack into frame ids and hashing + # that as a tuple, we can directly hash the stack. + # This saves us from having to generate yet another list. + # Additionally, using the stack as the key directly is + # costly because the stack can be large, so we pre-hash + # the stack, and use the hash as the key as this will be + # needed a few times to improve performance. + hashed_stack = hash(stack) + + # Check if the stack is indexed first, this lets us skip + # indexing frames if it's not necessary + if hashed_stack not in stacks: + for frame in stack: + if frame not in frames: + frames[frame] = len(frames) + frames_list.append( + { + "function": frame.function, + "filename": frame.abs_path, + "lineno": frame.lineno, + } + ) + + stacks[hashed_stack] = len(stacks) + stacks_list.append(tuple(frames[frame] for frame in stack)) samples.append( { - "elapsed_since_start_ns": str(ts - start_ns), - "thread_id": str(tid), - "stack_id": stacks[current_stack], + "elapsed_since_start_ns": elapsed_since_start_ns, + "thread_id": tid, + "stack_id": stacks[hashed_stack], } ) @@ -401,7 +406,7 @@ def _sample_stack(*args, **kwargs): ( nanosecond_time(), [ - (tid, extract_stack(frame)) + (str(tid), extract_stack(frame)) for tid, frame in sys._current_frames().items() ], ) diff --git a/tests/test_profiler.py b/tests/test_profiler.py index 963c8af298..0c99dde0c8 100644 --- a/tests/test_profiler.py +++ b/tests/test_profiler.py @@ -211,7 +211,7 @@ def _sample_stack(*args, **kwargs): ) def test_thread_scheduler_takes_first_samples(scheduler_class): sample_buffer = DummySampleBuffer( - capacity=1, sample_data=[(0, [(0, [RawFrameData("name", "file", 1)])])] + capacity=1, sample_data=[(0, [(0, (RawFrameData("name", "file", 1),))])] ) scheduler = scheduler_class(sample_buffer=sample_buffer, frequency=1000) assert scheduler.start_profiling() @@ -237,7 +237,7 @@ def test_thread_scheduler_takes_first_samples(scheduler_class): def test_thread_scheduler_takes_more_samples(scheduler_class): sample_buffer = DummySampleBuffer( capacity=10, - sample_data=[(i, [(0, [RawFrameData("name", "file", 1)])]) for i in range(3)], + sample_data=[(i, [(0, (RawFrameData("name", "file", 1),))]) for i in range(3)], ) scheduler = scheduler_class(sample_buffer=sample_buffer, frequency=1000) assert scheduler.start_profiling() @@ -330,7 +330,7 @@ def test_thread_scheduler_single_background_thread(scheduler_class): 10, 0, 1, - [(2, [(1, [RawFrameData("name", "file", 1)])])], + [(2, [(1, (RawFrameData("name", "file", 1),))])], { "frames": [], "samples": [], @@ -343,7 +343,7 @@ def test_thread_scheduler_single_background_thread(scheduler_class): 10, 0, 1, - [(0, [(1, [RawFrameData("name", "file", 1)])])], + [(0, [("1", (RawFrameData("name", "file", 1),))])], { "frames": [ { @@ -369,8 +369,8 @@ def test_thread_scheduler_single_background_thread(scheduler_class): 0, 1, [ - (0, [(1, [RawFrameData("name", "file", 1)])]), - (1, [(1, [RawFrameData("name", "file", 1)])]), + (0, [("1", (RawFrameData("name", "file", 1),))]), + (1, [("1", (RawFrameData("name", "file", 1),))]), ], { "frames": [ @@ -402,16 +402,16 @@ def test_thread_scheduler_single_background_thread(scheduler_class): 0, 1, [ - (0, [(1, [RawFrameData("name1", "file", 1)])]), + (0, [("1", (RawFrameData("name1", "file", 1),))]), ( 1, [ ( - 1, - [ + "1", + ( RawFrameData("name1", "file", 1), RawFrameData("name2", "file", 2), - ], + ), ) ], ), @@ -455,11 +455,11 @@ def test_thread_scheduler_single_background_thread(scheduler_class): 0, [ ( - 1, - [ + "1", + ( RawFrameData("name1", "file", 1), RawFrameData("name2", "file", 2), - ], + ), ) ], ), @@ -467,11 +467,11 @@ def test_thread_scheduler_single_background_thread(scheduler_class): 1, [ ( - 1, - [ + "1", + ( RawFrameData("name3", "file", 3), RawFrameData("name4", "file", 4), - ], + ), ) ], ), @@ -521,16 +521,16 @@ def test_thread_scheduler_single_background_thread(scheduler_class): 0, 1, [ - (0, [(1, [RawFrameData("name1", "file", 1)])]), + (0, [("1", (RawFrameData("name1", "file", 1),))]), ( 1, [ ( - 1, - [ + "1", + ( RawFrameData("name2", "file", 2), RawFrameData("name3", "file", 3), - ], + ), ) ], ), From c182a4f9d0c847795ea98b5841a00edf34625358 Mon Sep 17 00:00:00 2001 From: Tony Xiao Date: Thu, 20 Oct 2022 10:15:11 -0400 Subject: [PATCH 2/3] fix type in tests --- tests/test_profiler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_profiler.py b/tests/test_profiler.py index 0c99dde0c8..183f3bb1c6 100644 --- a/tests/test_profiler.py +++ b/tests/test_profiler.py @@ -330,7 +330,7 @@ def test_thread_scheduler_single_background_thread(scheduler_class): 10, 0, 1, - [(2, [(1, (RawFrameData("name", "file", 1),))])], + [(2, [("1", (RawFrameData("name", "file", 1),))])], { "frames": [], "samples": [], From 3b316c208ef1b5968a66f12296b6af3a275b7add Mon Sep 17 00:00:00 2001 From: Tony Xiao Date: Thu, 20 Oct 2022 10:48:28 -0400 Subject: [PATCH 3/3] stringify thread names --- sentry_sdk/profiler.py | 2 +- tests/test_profiler.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/sentry_sdk/profiler.py b/sentry_sdk/profiler.py index c8d970ba50..cfe7ff2494 100644 --- a/sentry_sdk/profiler.py +++ b/sentry_sdk/profiler.py @@ -380,7 +380,7 @@ def slice_profile(self, start_ns, stop_ns): # will not have any metadata associated with it. thread_metadata = { str(thread.ident): { - "name": thread.name, + "name": str(thread.name), } for thread in threading.enumerate() } # type: Dict[str, ProcessedThreadMetadata] diff --git a/tests/test_profiler.py b/tests/test_profiler.py index 183f3bb1c6..d0d3221020 100644 --- a/tests/test_profiler.py +++ b/tests/test_profiler.py @@ -305,7 +305,7 @@ def test_thread_scheduler_single_background_thread(scheduler_class): current_thread = threading.current_thread() thread_metadata = { str(current_thread.ident): { - "name": current_thread.name, + "name": str(current_thread.name), }, }