From d0978b589034f653ec5d1b2acfa792ac50120d2a Mon Sep 17 00:00:00 2001
From: Tony Xiao <txiao@sentry.io>
Date: Wed, 19 Oct 2022 17:55:41 -0400
Subject: [PATCH 1/3] perf(profiling): Tune the sample profile generation code
 for performance

We noticed that generating the sample format at the end of a profile can get
rather slow and this aims to improve what we can here with minimal changes. A
few things we took advantage of to accomplish this:

- Turning the extracted stack into a tuple so it is hashable so it can be used
  as a dictionary key. This let's us check if the stack is indexed first, and
  skip indexing the frames again. This is especially effective in profiles where
  it's blocking on a network request for example, since there will be many
  identical stacks.
- Using the hash of the stack as the dictionary key. Hashing the entire stack
  can be an expensive operation since a stack can have up to 128 frames. Using
  it as a dictionary key means it needs to be rehashed each time. To avoid this,
  we pre-hash the stack and use the hash as a dictionary key which is more
  efficient.
- Convert numbers to strings ahead of time if we know have to. Values like the
  tid and elapsed since start ns needs to be sent as a string. However, many
  samples share the same value for it, and we're doing the conversion each time.
  Instead, we convert them to a string upfront and reuse it as needed in order
  to minimize unnecessary calculations.
---
 sentry_sdk/profiler.py | 69 ++++++++++++++++++++++--------------------
 tests/test_profiler.py | 40 ++++++++++++------------
 2 files changed, 57 insertions(+), 52 deletions(-)

diff --git a/sentry_sdk/profiler.py b/sentry_sdk/profiler.py
index b9fc911878..c8d970ba50 100644
--- a/sentry_sdk/profiler.py
+++ b/sentry_sdk/profiler.py
@@ -45,7 +45,7 @@
     from typing_extensions import TypedDict
     import sentry_sdk.tracing
 
-    RawSampleData = Tuple[int, Sequence[Tuple[int, Sequence[RawFrameData]]]]
+    RawSampleData = Tuple[int, Sequence[Tuple[str, Sequence[RawFrameData]]]]
 
     ProcessedStack = Tuple[int, ...]
 
@@ -162,14 +162,14 @@ def extract_stack(frame, max_stack_depth=MAX_STACK_DEPTH):
         stack.append(frame)
         frame = frame.f_back
 
-    return [
+    return tuple(
         RawFrameData(
             function=get_frame_name(frame),
             abs_path=frame.f_code.co_filename,
             lineno=frame.f_lineno,
         )
         for frame in stack
-    ]
+    )
 
 
 def get_frame_name(frame):
@@ -324,7 +324,7 @@ def write(self, sample):
     def slice_profile(self, start_ns, stop_ns):
         # type: (int, int) -> ProcessedProfile
         samples = []  # type: List[ProcessedSample]
-        stacks = dict()  # type: Dict[ProcessedStack, int]
+        stacks = dict()  # type: Dict[int, int]
         stacks_list = list()  # type: List[ProcessedStack]
         frames = dict()  # type: Dict[RawFrameData, int]
         frames_list = list()  # type: List[ProcessedFrame]
@@ -334,39 +334,44 @@ def slice_profile(self, start_ns, stop_ns):
         #
         # Is it safe to assume that the samples are always in
         # chronological order and binary search the buffer?
-        for raw_sample in self.buffer:
-            if raw_sample is None:
-                continue
-
-            ts = raw_sample[0]
+        for ts, sample in filter(None, self.buffer):
             if start_ns > ts or ts > stop_ns:
                 continue
 
-            for tid, stack in raw_sample[1]:
-                current_stack = []
-
-                for frame in stack:
-                    if frame not in frames:
-                        frames[frame] = len(frames)
-                        frames_list.append(
-                            {
-                                "function": frame.function,
-                                "filename": frame.abs_path,
-                                "lineno": frame.lineno,
-                            }
-                        )
-                    current_stack.append(frames[frame])
-
-                current_stack = tuple(current_stack)
-                if current_stack not in stacks:
-                    stacks[current_stack] = len(stacks)
-                    stacks_list.append(current_stack)
+            elapsed_since_start_ns = str(ts - start_ns)
+
+            for tid, stack in sample:
+                # Instead of mapping the stack into frame ids and hashing
+                # that as a tuple, we can directly hash the stack.
+                # This saves us from having to generate yet another list.
+                # Additionally, using the stack as the key directly is
+                # costly because the stack can be large, so we pre-hash
+                # the stack, and use the hash as the key as this will be
+                # needed a few times to improve performance.
+                hashed_stack = hash(stack)
+
+                # Check if the stack is indexed first, this lets us skip
+                # indexing frames if it's not necessary
+                if hashed_stack not in stacks:
+                    for frame in stack:
+                        if frame not in frames:
+                            frames[frame] = len(frames)
+                            frames_list.append(
+                                {
+                                    "function": frame.function,
+                                    "filename": frame.abs_path,
+                                    "lineno": frame.lineno,
+                                }
+                            )
+
+                    stacks[hashed_stack] = len(stacks)
+                    stacks_list.append(tuple(frames[frame] for frame in stack))
 
                 samples.append(
                     {
-                        "elapsed_since_start_ns": str(ts - start_ns),
-                        "thread_id": str(tid),
-                        "stack_id": stacks[current_stack],
+                        "elapsed_since_start_ns": elapsed_since_start_ns,
+                        "thread_id": tid,
+                        "stack_id": stacks[hashed_stack],
                     }
                 )
 
@@ -401,7 +406,7 @@ def _sample_stack(*args, **kwargs):
                 (
                     nanosecond_time(),
                     [
-                        (tid, extract_stack(frame))
+                        (str(tid), extract_stack(frame))
                         for tid, frame in sys._current_frames().items()
                     ],
                 )
diff --git a/tests/test_profiler.py b/tests/test_profiler.py
index 963c8af298..0c99dde0c8 100644
--- a/tests/test_profiler.py
+++ b/tests/test_profiler.py
@@ -211,7 +211,7 @@ def _sample_stack(*args, **kwargs):
 )
 def test_thread_scheduler_takes_first_samples(scheduler_class):
     sample_buffer = DummySampleBuffer(
-        capacity=1, sample_data=[(0, [(0, [RawFrameData("name", "file", 1)])])]
+        capacity=1, sample_data=[(0, [(0, (RawFrameData("name", "file", 1),))])]
     )
     scheduler = scheduler_class(sample_buffer=sample_buffer, frequency=1000)
     assert scheduler.start_profiling()
@@ -237,7 +237,7 @@ def test_thread_scheduler_takes_first_samples(scheduler_class):
 def test_thread_scheduler_takes_more_samples(scheduler_class):
     sample_buffer = DummySampleBuffer(
         capacity=10,
-        sample_data=[(i, [(0, [RawFrameData("name", "file", 1)])]) for i in range(3)],
+        sample_data=[(i, [(0, (RawFrameData("name", "file", 1),))]) for i in range(3)],
     )
     scheduler = scheduler_class(sample_buffer=sample_buffer, frequency=1000)
     assert scheduler.start_profiling()
@@ -330,7 +330,7 @@ def test_thread_scheduler_single_background_thread(scheduler_class):
             10,
             0,
             1,
-            [(2, [(1, [RawFrameData("name", "file", 1)])])],
+            [(2, [(1, (RawFrameData("name", "file", 1),))])],
             {
                 "frames": [],
                 "samples": [],
@@ -343,7 +343,7 @@ def test_thread_scheduler_single_background_thread(scheduler_class):
             10,
             0,
             1,
-            [(0, [(1, [RawFrameData("name", "file", 1)])])],
+            [(0, [("1", (RawFrameData("name", "file", 1),))])],
             {
                 "frames": [
                     {
@@ -369,8 +369,8 @@ def test_thread_scheduler_single_background_thread(scheduler_class):
             0,
             1,
             [
-                (0, [(1, [RawFrameData("name", "file", 1)])]),
-                (1, [(1, [RawFrameData("name", "file", 1)])]),
+                (0, [("1", (RawFrameData("name", "file", 1),))]),
+                (1, [("1", (RawFrameData("name", "file", 1),))]),
             ],
             {
                 "frames": [
@@ -402,16 +402,16 @@ def test_thread_scheduler_single_background_thread(scheduler_class):
             0,
             1,
             [
-                (0, [(1, [RawFrameData("name1", "file", 1)])]),
+                (0, [("1", (RawFrameData("name1", "file", 1),))]),
                 (
                     1,
                     [
                         (
-                            1,
-                            [
+                            "1",
+                            (
                                 RawFrameData("name1", "file", 1),
                                 RawFrameData("name2", "file", 2),
-                            ],
+                            ),
                         )
                     ],
                 ),
@@ -455,11 +455,11 @@ def test_thread_scheduler_single_background_thread(scheduler_class):
                     0,
                     [
                         (
-                            1,
-                            [
+                            "1",
+                            (
                                 RawFrameData("name1", "file", 1),
                                 RawFrameData("name2", "file", 2),
-                            ],
+                            ),
                         )
                     ],
                 ),
@@ -467,11 +467,11 @@ def test_thread_scheduler_single_background_thread(scheduler_class):
                     1,
                     [
                         (
-                            1,
-                            [
+                            "1",
+                            (
                                 RawFrameData("name3", "file", 3),
                                 RawFrameData("name4", "file", 4),
-                            ],
+                            ),
                         )
                     ],
                 ),
@@ -521,16 +521,16 @@ def test_thread_scheduler_single_background_thread(scheduler_class):
             0,
             1,
             [
-                (0, [(1, [RawFrameData("name1", "file", 1)])]),
+                (0, [("1", (RawFrameData("name1", "file", 1),))]),
                 (
                     1,
                     [
                         (
-                            1,
-                            [
+                            "1",
+                            (
                                 RawFrameData("name2", "file", 2),
                                 RawFrameData("name3", "file", 3),
-                            ],
+                            ),
                         )
                     ],
                 ),

From c182a4f9d0c847795ea98b5841a00edf34625358 Mon Sep 17 00:00:00 2001
From: Tony Xiao <txiao@sentry.io>
Date: Thu, 20 Oct 2022 10:15:11 -0400
Subject: [PATCH 2/3] fix type in tests

---
 tests/test_profiler.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_profiler.py b/tests/test_profiler.py
index 0c99dde0c8..183f3bb1c6 100644
--- a/tests/test_profiler.py
+++ b/tests/test_profiler.py
@@ -330,7 +330,7 @@ def test_thread_scheduler_single_background_thread(scheduler_class):
             10,
             0,
             1,
-            [(2, [(1, (RawFrameData("name", "file", 1),))])],
+            [(2, [("1", (RawFrameData("name", "file", 1),))])],
             {
                 "frames": [],
                 "samples": [],

From 3b316c208ef1b5968a66f12296b6af3a275b7add Mon Sep 17 00:00:00 2001
From: Tony Xiao <txiao@sentry.io>
Date: Thu, 20 Oct 2022 10:48:28 -0400
Subject: [PATCH 3/3] stringify thread names

---
 sentry_sdk/profiler.py | 2 +-
 tests/test_profiler.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/sentry_sdk/profiler.py b/sentry_sdk/profiler.py
index c8d970ba50..cfe7ff2494 100644
--- a/sentry_sdk/profiler.py
+++ b/sentry_sdk/profiler.py
@@ -380,7 +380,7 @@ def slice_profile(self, start_ns, stop_ns):
         # will not have any metadata associated with it.
         thread_metadata = {
             str(thread.ident): {
-                "name": thread.name,
+                "name": str(thread.name),
             }
             for thread in threading.enumerate()
         }  # type: Dict[str, ProcessedThreadMetadata]
diff --git a/tests/test_profiler.py b/tests/test_profiler.py
index 183f3bb1c6..d0d3221020 100644
--- a/tests/test_profiler.py
+++ b/tests/test_profiler.py
@@ -305,7 +305,7 @@ def test_thread_scheduler_single_background_thread(scheduler_class):
 current_thread = threading.current_thread()
 thread_metadata = {
     str(current_thread.ident): {
-        "name": current_thread.name,
+        "name": str(current_thread.name),
     },
 }