Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(profiling): Introduce different profiler schedulers #1616

Merged
merged 7 commits into from
Sep 19, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions sentry_sdk/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,9 +134,9 @@ def _capture_envelope(envelope):
profiles_sample_rate = self.options["_experiments"].get("profiles_sample_rate")
if profiles_sample_rate is not None and profiles_sample_rate > 0:
try:
setup_profiler()
except ValueError:
logger.debug("Profiling can only be enabled from the main thread.")
setup_profiler(self.options)
except ValueError as e:
logger.debug(str(e))

@property
def dsn(self):
Expand Down
282 changes: 240 additions & 42 deletions sentry_sdk/profiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,18 +64,15 @@ def nanosecond_time():
_scheduler = None # type: Optional[_Scheduler]


def setup_profiler(buffer_secs=60, frequency=101):
# type: (int, int) -> None
def setup_profiler(options):
# type: (Dict[str, Any]) -> None

"""
This method sets up the application so that it can be profiled.
It MUST be called from the main thread. This is a limitation of
python's signal library where it only allows the main thread to
set a signal handler.

`buffer_secs` determines the max time a sample will be buffered for
`frequency` determines the number of samples to take per second (Hz)
"""
buffer_secs = 60
frequency = 101

global _sample_buffer
global _scheduler
Expand All @@ -86,11 +83,19 @@ def setup_profiler(buffer_secs=60, frequency=101):
# a capcity of `buffer_secs * frequency`.
_sample_buffer = _SampleBuffer(capacity=buffer_secs * frequency)

_scheduler = _Scheduler(frequency=frequency)
profiler_mode = options["_experiments"].get("profiler_mode", _SigprofScheduler.mode)
if profiler_mode == _SigprofScheduler.mode:
_scheduler = _SigprofScheduler(frequency=frequency)
elif profiler_mode == _SigalrmScheduler.mode:
_scheduler = _SigalrmScheduler(frequency=frequency)
elif profiler_mode == _SleepScheduler.mode:
_scheduler = _SleepScheduler(frequency=frequency)
elif profiler_mode == _EventScheduler.mode:
_scheduler = _EventScheduler(frequency=frequency)
else:
raise ValueError("Unknown profiler mode: {}".format(profiler_mode))
_scheduler.setup()

# This setups a process wide signal handler that will be called
# at an interval to record samples.
signal.signal(signal.SIGPROF, _sample_stack)
atexit.register(teardown_profiler)


Expand All @@ -100,32 +105,18 @@ def teardown_profiler():
global _sample_buffer
global _scheduler

if _scheduler is not None:
_scheduler.teardown()

_sample_buffer = None
_scheduler = None

# setting the timer with 0 will stop will clear the timer
signal.setitimer(signal.ITIMER_PROF, 0)

# put back the default signal handler
signal.signal(signal.SIGPROF, signal.SIG_DFL)


def _sample_stack(_signal_num, _frame):
# type: (int, Frame) -> None
def _sample_stack(*args, **kwargs):
# type: (*Any, **Any) -> None
"""
Take a sample of the stack on all the threads in the process.
This handler is called to handle the signal at a set interval.

See https://www.gnu.org/software/libc/manual/html_node/Alarm-Signals.html

This is not based on wall time, and you may see some variances
in the frequency at which this handler is called.

Notably, it looks like only threads started using the threading
module counts towards the time elapsed. It is unclear why that
is the case right now. However, we are able to get samples from
threading._DummyThread if this handler is called as a result of
another thread (e.g. the main thread).
This should be called at a regular interval to collect samples.
"""

assert _sample_buffer is not None
Expand Down Expand Up @@ -298,33 +289,240 @@ def slice_profile(self, start_ns, stop_ns):


class _Scheduler(object):
mode = "unknown"

def __init__(self, frequency):
# type: (int) -> None
self._lock = threading.Lock()
self._count = 0
self._interval = 1.0 / frequency

def setup(self):
# type: () -> None
raise NotImplementedError

def teardown(self):
# type: () -> None
raise NotImplementedError

def start_profiling(self):
# type: () -> bool
with self._lock:
# we only need to start the timer if we're starting the first profile
should_start_timer = self._count == 0
self._count += 1

if should_start_timer:
signal.setitimer(signal.ITIMER_PROF, self._interval, self._interval)
return should_start_timer
return self._count == 1

def stop_profiling(self):
# type: () -> bool
with self._lock:
# we only need to stop the timer if we're stoping the last profile
should_stop_timer = self._count == 1
self._count -= 1
return self._count == 0


class _ThreadScheduler(_Scheduler):
"""
This abstract scheduler is based on running a daemon thread that will call
the sampler at a regular interval.
"""

mode = "thread"

def __init__(self, frequency):
# type: (int) -> None
super(_ThreadScheduler, self).__init__(frequency)
self.event = threading.Event()

def setup(self):
# type: () -> None
pass

def teardown(self):
# type: () -> None
pass

def start_profiling(self):
# type: () -> bool
if super(_ThreadScheduler, self).start_profiling():
# make sure to clear the event as we reuse the same event
# over the lifetime of the scheduler
self.event.clear()

# make sure the thread is a daemon here otherwise this
# can keep the application running after other threads
# have exited
thread = threading.Thread(target=self.run, daemon=True)
thread.start()
return True
return False

def stop_profiling(self):
# type: () -> bool
if super(_ThreadScheduler, self).stop_profiling():
# make sure the set the event here so that the thread
# can check to see if it should keep running
self.event.set()
return True
return False

def run(self):
# type: () -> None
raise NotImplementedError


class _SleepScheduler(_ThreadScheduler):
"""
This scheduler uses time.sleep to wait the required interval before calling
the sampling function.
"""

mode = "sleep"

def run(self):
# type: () -> None
while True:
if self.event.is_set():
break
time.sleep(self._interval)
_sample_stack()


class _EventScheduler(_ThreadScheduler):
"""
This scheduler uses threading.Event to wait the required interval before
calling the sampling function.
"""

mode = "event"

if should_stop_timer:
signal.setitimer(signal.ITIMER_PROF, 0)
return should_stop_timer
def run(self):
# type: () -> None
while True:
if self.event.is_set():
break
self.event.wait(timeout=self._interval)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

the only event.set is in stop_profiling so won't this always timeout?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, that's the intent. The idea with this scheduler is to use this timeout mechanism to schedule the next time the sampling function is called. One possible concern with this approach is that CPU usage may spike due to the wait but this will be tested more and verified before we make a final decision.

Copy link
Member

@sl0thentr0py sl0thentr0py Sep 19, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ah gotcha, thx for explaining!

_sample_stack()


class _SignalScheduler(_Scheduler):
"""
This abstract scheduler is based on UNIX signals. It sets up a
signal handler for the specified signal, and the matching itimer in order
for the signal handler to fire at a regular interval.

See https://www.gnu.org/software/libc/manual/html_node/Alarm-Signals.html
"""

mode = "signal"

@property
def signal_num(self):
# type: () -> signal.Signals
raise NotImplementedError

@property
def signal_timer(self):
# type: () -> int
raise NotImplementedError

def setup(self):
# type: () -> None
"""
This method sets up the application so that it can be profiled.
It MUST be called from the main thread. This is a limitation of
python's signal library where it only allows the main thread to
set a signal handler.
"""

# This setups a process wide signal handler that will be called
# at an interval to record samples.
try:
signal.signal(self.signal_num, _sample_stack)
except ValueError:
raise ValueError(
"Signal based profiling can only be enabled from the main thread."
)

# Ensures that system calls interrupted by signals are restarted
# automatically. Otherwise, we may see some strage behaviours
# such as IOErrors caused by the system call being interrupted.
signal.siginterrupt(self.signal_num, False)

def teardown(self):
# type: () -> None

# setting the timer with 0 will stop will clear the timer
signal.setitimer(self.signal_timer, 0)

# put back the default signal handler
signal.signal(self.signal_num, signal.SIG_DFL)

def start_profiling(self):
# type: () -> bool
if super(_SignalScheduler, self).start_profiling():
signal.setitimer(self.signal_timer, self._interval, self._interval)
return True
return False

def stop_profiling(self):
# type: () -> bool
if super(_SignalScheduler, self).stop_profiling():
signal.setitimer(self.signal_timer, 0)
return True
return False


class _SigprofScheduler(_SignalScheduler):
"""
This scheduler uses SIGPROF to regularly call a signal handler where the
samples will be taken.

This is not based on wall time, and you may see some variances
in the frequency at which this handler is called.

This has some limitations:
- Only the main thread counts towards the time elapsed. This means that if
the main thread is blocking on a sleep() or select() system call, then
this clock will not count down. Some examples of this in practice are
- When using uwsgi with multiple threads in a worker, the non main
threads will only be profiled if the main thread is actively running
at the same time.
- When using gunicorn with threads, the main thread does not handle the
requests directly, so the clock counts down slower than expected since
its mostly idling while waiting for requests.
"""

mode = "sigprof"

@property
def signal_num(self):
# type: () -> signal.Signals
return signal.SIGPROF

@property
def signal_timer(self):
# type: () -> int
return signal.ITIMER_PROF


class _SigalrmScheduler(_SignalScheduler):
"""
This scheduler uses SIGALRM to regularly call a signal handler where the
samples will be taken.

This is based on real time, so it *should* be called close to the expected
frequency.
"""

mode = "sigalrm"

@property
def signal_num(self):
# type: () -> signal.Signals
return signal.SIGALRM

@property
def signal_timer(self):
# type: () -> int
return signal.ITIMER_REAL


def _should_profile(transaction, hub):
Expand Down