test(profiling): refactor to avoid infinite loops (#4999)

This change refactors some of the profiler tests to avoid incurring in potentially infinite loops that would cause the CI runs to stall indefinitely.
DataDog · Jan 30, 2023 · 501a686 · 501a686
1 parent 57f466e
commit 501a686
Show file tree

Hide file tree

Showing 3 changed files with 43 additions and 51 deletions.
diff --git a/tests/profiling/_test_multiprocessing.py b/tests/profiling/_test_multiprocessing.py
@@ -24,4 +24,4 @@ def f():
     p = multiprocessing.Process(target=f)
     p.start()
     print(p.pid)
-    p.join()
+    p.join(120)
diff --git a/tests/profiling/collector/test_stack.py b/tests/profiling/collector/test_stack.py
@@ -50,6 +50,18 @@ def func5():
     return nogevent.sleep(1)
 
 
+def wait_for_event(collector, cond=lambda _: True, retries=10, interval=1):
+    for _ in range(retries):
+        matched = list(filter(cond, collector.recorder.events[stack_event.StackSampleEvent]))
+        if matched:
+            return matched[0]
+
+        collector.recorder.events[stack_event.StackSampleEvent].clear()
+        time.sleep(interval)
+
+    raise RuntimeError("event wait timeout")
+
+
 def test_collect_truncate():
     r = recorder.Recorder()
     c = stack.StackCollector(r, nframes=5)
@@ -245,14 +257,17 @@ def test_ignore_profiler_gevent_task(monkeypatch, ignore):
     # sure to catch it with the StackProfiler and that it's not ignored.
     c = CollectorTest(p._profiler._recorder, interval=0.00001)
     c.start()
-    # Wait forever and stop when we finally find an event with our collector task id
-    while True:
+
+    for _ in range(100):
         events = p._profiler._recorder.reset()
         ids = {e.task_id for e in events[stack_event.StackSampleEvent]}
         if (c._worker.ident in ids) != ignore:
             break
         # Give some time for gevent to switch greenlets
         time.sleep(0.1)
+    else:
+        assert False
+
     c.stop()
     p.stop(flush=False)
 
@@ -402,7 +417,7 @@ def test_exception_collection():
     assert e.sampling_period > 0
     assert e.thread_id == nogevent.thread_get_ident()
     assert e.thread_name == "MainThread"
-    assert e.frames == [(__file__, 396, "test_exception_collection", "")]
+    assert e.frames == [(__file__, 411, "test_exception_collection", "")]
     assert e.nframes == 1
     assert e.exc_type == ValueError
 
@@ -434,7 +449,7 @@ def test_exception_collection_trace(
     assert e.sampling_period > 0
     assert e.thread_id == nogevent.thread_get_ident()
     assert e.thread_name == "MainThread"
-    assert e.frames == [(__file__, 423, "test_exception_collection_trace", "")]
+    assert e.frames == [(__file__, 438, "test_exception_collection_trace", "")]
     assert e.nframes == 1
     assert e.exc_type == ValueError
     assert e.span_id == span.span_id
@@ -531,36 +546,22 @@ def test_collect_span_id(tracer_and_collector):
     resource = str(uuid.uuid4())
     span_type = str(uuid.uuid4())
     span = t.start_span("foobar", activate=True, resource=resource, span_type=span_type)
-    # This test will run forever if it fails. Don't make it fail.
-    while True:
-        try:
-            event = c.recorder.events[stack_event.StackSampleEvent].pop()
-        except IndexError:
-            # No event left or no event yet
-            continue
-        if span.span_id == event.span_id:
-            assert event.trace_resource_container[0] == resource
-            assert event.trace_type == span_type
-            assert event.local_root_span_id == span._local_root.span_id
-            break
+    event = wait_for_event(c, lambda e: span.span_id == e.span_id)
+    assert span.span_id == event.span_id
+    assert event.trace_resource_container[0] == resource
+    assert event.trace_type == span_type
+    assert event.local_root_span_id == span._local_root.span_id
 
 
 def test_collect_span_resource_after_finish(tracer_and_collector):
     t, c = tracer_and_collector
     resource = str(uuid.uuid4())
     span_type = str(uuid.uuid4())
     span = t.start_span("foobar", activate=True, span_type=span_type)
-    # This test will run forever if it fails. Don't make it fail.
-    while True:
-        try:
-            event = c.recorder.events[stack_event.StackSampleEvent].pop()
-        except IndexError:
-            # No event left or no event yet
-            continue
-        if span.span_id == event.span_id:
-            assert event.trace_resource_container[0] == "foobar"
-            assert event.trace_type == span_type
-            break
+    event = wait_for_event(c, lambda e: span.span_id == e.span_id)
+    assert span.span_id == event.span_id
+    assert event.trace_resource_container[0] == "foobar"
+    assert event.trace_type == span_type
     span.resource = resource
     span.finish()
     assert event.trace_resource_container[0] == resource
@@ -574,17 +575,10 @@ def test_resource_not_collected(monkeypatch, tracer):
         resource = str(uuid.uuid4())
         span_type = str(uuid.uuid4())
         span = tracer.start_span("foobar", activate=True, resource=resource, span_type=span_type)
-        # This test will run forever if it fails. Don't make it fail.
-        while True:
-            try:
-                event = collector.recorder.events[stack_event.StackSampleEvent].pop()
-            except IndexError:
-                # No event left or no event yet
-                continue
-            if span.span_id == event.span_id:
-                assert event.trace_resource_container is None
-                assert event.trace_type == span_type
-                break
+        event = wait_for_event(collector, lambda e: span.span_id == e.span_id)
+        assert span.span_id == event.span_id
+        assert event.trace_resource_container is None
+        assert event.trace_type == span_type
     finally:
         collector.stop()
 
@@ -595,17 +589,9 @@ def test_collect_multiple_span_id(tracer_and_collector):
     span_type = str(uuid.uuid4())
     span = t.start_span("foobar", activate=True, resource=resource, span_type=span_type)
     child = t.start_span("foobar", child_of=span, activate=True)
-    # This test will run forever if it fails. Don't make it fail.
-    while True:
-        try:
-            event = c.recorder.events[stack_event.StackSampleEvent].pop()
-        except IndexError:
-            # No event left or no event yet
-            continue
-        if child.span_id == event.span_id:
-            assert event.trace_resource_container[0] == resource
-            assert event.trace_type == span_type
-            break
+    event = wait_for_event(c, lambda e: child.span_id == e.span_id)
+    assert event.trace_resource_container[0] == resource
+    assert event.trace_type == span_type
 
 
 def test_stress_trace_collection(tracer_and_collector):

diff --git a/tests/profiling/simple_program_gevent.py b/tests/profiling/simple_program_gevent.py
@@ -4,6 +4,7 @@
 monkey.patch_all()
 
 import threading
+import time
 
 from ddtrace.profiling import bootstrap
 # do not use ddtrace-run; the monkey-patching would be done too late
@@ -22,7 +23,9 @@ def fibonacci(n):
 
 # When not using our special PeriodicThread based on real threads, there's 0 event captured.
 i = 1
-while len(bootstrap.profiler._profiler._recorder.events[stack_event.StackSampleEvent]) < 10:
+for _ in range(50):
+    if len(bootstrap.profiler._profiler._recorder.events[stack_event.StackSampleEvent]) >= 10:
+        break
     threads = []
     for _ in range(10):
         t = threading.Thread(target=fibonacci, args=(i,))
@@ -31,3 +34,6 @@ def fibonacci(n):
     i += 1
     for t in threads:
         t.join()
+    time.sleep(0.1)
+else:
+    assert False, "Not enough events captured"