From 501a6862b9c535920cc8e7f834c80628ae1f1eb9 Mon Sep 17 00:00:00 2001 From: "Gabriele N. Tornetta" Date: Mon, 30 Jan 2023 15:48:16 +0000 Subject: [PATCH] test(profiling): refactor to avoid infinite loops (#4999) This change refactors some of the profiler tests to avoid incurring in potentially infinite loops that would cause the CI runs to stall indefinitely. --- tests/profiling/_test_multiprocessing.py | 2 +- tests/profiling/collector/test_stack.py | 84 ++++++++++-------------- tests/profiling/simple_program_gevent.py | 8 ++- 3 files changed, 43 insertions(+), 51 deletions(-) diff --git a/tests/profiling/_test_multiprocessing.py b/tests/profiling/_test_multiprocessing.py index 225cf9370e5..6ce7522b7ba 100644 --- a/tests/profiling/_test_multiprocessing.py +++ b/tests/profiling/_test_multiprocessing.py @@ -24,4 +24,4 @@ def f(): p = multiprocessing.Process(target=f) p.start() print(p.pid) - p.join() + p.join(120) diff --git a/tests/profiling/collector/test_stack.py b/tests/profiling/collector/test_stack.py index 3722275f46c..060bef5043a 100644 --- a/tests/profiling/collector/test_stack.py +++ b/tests/profiling/collector/test_stack.py @@ -50,6 +50,18 @@ def func5(): return nogevent.sleep(1) +def wait_for_event(collector, cond=lambda _: True, retries=10, interval=1): + for _ in range(retries): + matched = list(filter(cond, collector.recorder.events[stack_event.StackSampleEvent])) + if matched: + return matched[0] + + collector.recorder.events[stack_event.StackSampleEvent].clear() + time.sleep(interval) + + raise RuntimeError("event wait timeout") + + def test_collect_truncate(): r = recorder.Recorder() c = stack.StackCollector(r, nframes=5) @@ -245,14 +257,17 @@ def test_ignore_profiler_gevent_task(monkeypatch, ignore): # sure to catch it with the StackProfiler and that it's not ignored. c = CollectorTest(p._profiler._recorder, interval=0.00001) c.start() - # Wait forever and stop when we finally find an event with our collector task id - while True: + + for _ in range(100): events = p._profiler._recorder.reset() ids = {e.task_id for e in events[stack_event.StackSampleEvent]} if (c._worker.ident in ids) != ignore: break # Give some time for gevent to switch greenlets time.sleep(0.1) + else: + assert False + c.stop() p.stop(flush=False) @@ -402,7 +417,7 @@ def test_exception_collection(): assert e.sampling_period > 0 assert e.thread_id == nogevent.thread_get_ident() assert e.thread_name == "MainThread" - assert e.frames == [(__file__, 396, "test_exception_collection", "")] + assert e.frames == [(__file__, 411, "test_exception_collection", "")] assert e.nframes == 1 assert e.exc_type == ValueError @@ -434,7 +449,7 @@ def test_exception_collection_trace( assert e.sampling_period > 0 assert e.thread_id == nogevent.thread_get_ident() assert e.thread_name == "MainThread" - assert e.frames == [(__file__, 423, "test_exception_collection_trace", "")] + assert e.frames == [(__file__, 438, "test_exception_collection_trace", "")] assert e.nframes == 1 assert e.exc_type == ValueError assert e.span_id == span.span_id @@ -531,18 +546,11 @@ def test_collect_span_id(tracer_and_collector): resource = str(uuid.uuid4()) span_type = str(uuid.uuid4()) span = t.start_span("foobar", activate=True, resource=resource, span_type=span_type) - # This test will run forever if it fails. Don't make it fail. - while True: - try: - event = c.recorder.events[stack_event.StackSampleEvent].pop() - except IndexError: - # No event left or no event yet - continue - if span.span_id == event.span_id: - assert event.trace_resource_container[0] == resource - assert event.trace_type == span_type - assert event.local_root_span_id == span._local_root.span_id - break + event = wait_for_event(c, lambda e: span.span_id == e.span_id) + assert span.span_id == event.span_id + assert event.trace_resource_container[0] == resource + assert event.trace_type == span_type + assert event.local_root_span_id == span._local_root.span_id def test_collect_span_resource_after_finish(tracer_and_collector): @@ -550,17 +558,10 @@ def test_collect_span_resource_after_finish(tracer_and_collector): resource = str(uuid.uuid4()) span_type = str(uuid.uuid4()) span = t.start_span("foobar", activate=True, span_type=span_type) - # This test will run forever if it fails. Don't make it fail. - while True: - try: - event = c.recorder.events[stack_event.StackSampleEvent].pop() - except IndexError: - # No event left or no event yet - continue - if span.span_id == event.span_id: - assert event.trace_resource_container[0] == "foobar" - assert event.trace_type == span_type - break + event = wait_for_event(c, lambda e: span.span_id == e.span_id) + assert span.span_id == event.span_id + assert event.trace_resource_container[0] == "foobar" + assert event.trace_type == span_type span.resource = resource span.finish() assert event.trace_resource_container[0] == resource @@ -574,17 +575,10 @@ def test_resource_not_collected(monkeypatch, tracer): resource = str(uuid.uuid4()) span_type = str(uuid.uuid4()) span = tracer.start_span("foobar", activate=True, resource=resource, span_type=span_type) - # This test will run forever if it fails. Don't make it fail. - while True: - try: - event = collector.recorder.events[stack_event.StackSampleEvent].pop() - except IndexError: - # No event left or no event yet - continue - if span.span_id == event.span_id: - assert event.trace_resource_container is None - assert event.trace_type == span_type - break + event = wait_for_event(collector, lambda e: span.span_id == e.span_id) + assert span.span_id == event.span_id + assert event.trace_resource_container is None + assert event.trace_type == span_type finally: collector.stop() @@ -595,17 +589,9 @@ def test_collect_multiple_span_id(tracer_and_collector): span_type = str(uuid.uuid4()) span = t.start_span("foobar", activate=True, resource=resource, span_type=span_type) child = t.start_span("foobar", child_of=span, activate=True) - # This test will run forever if it fails. Don't make it fail. - while True: - try: - event = c.recorder.events[stack_event.StackSampleEvent].pop() - except IndexError: - # No event left or no event yet - continue - if child.span_id == event.span_id: - assert event.trace_resource_container[0] == resource - assert event.trace_type == span_type - break + event = wait_for_event(c, lambda e: child.span_id == e.span_id) + assert event.trace_resource_container[0] == resource + assert event.trace_type == span_type def test_stress_trace_collection(tracer_and_collector): diff --git a/tests/profiling/simple_program_gevent.py b/tests/profiling/simple_program_gevent.py index e4fd8237703..d2d516be6ad 100644 --- a/tests/profiling/simple_program_gevent.py +++ b/tests/profiling/simple_program_gevent.py @@ -4,6 +4,7 @@ monkey.patch_all() import threading +import time from ddtrace.profiling import bootstrap # do not use ddtrace-run; the monkey-patching would be done too late @@ -22,7 +23,9 @@ def fibonacci(n): # When not using our special PeriodicThread based on real threads, there's 0 event captured. i = 1 -while len(bootstrap.profiler._profiler._recorder.events[stack_event.StackSampleEvent]) < 10: +for _ in range(50): + if len(bootstrap.profiler._profiler._recorder.events[stack_event.StackSampleEvent]) >= 10: + break threads = [] for _ in range(10): t = threading.Thread(target=fibonacci, args=(i,)) @@ -31,3 +34,6 @@ def fibonacci(n): i += 1 for t in threads: t.join() + time.sleep(0.1) +else: + assert False, "Not enough events captured"