-
Notifications
You must be signed in to change notification settings - Fork 223
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
bug-1911367: Remove processor heartbeat and process metrics #6776
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9,8 +9,6 @@ | |
|
||
LOGGER = logging.getLogger(__name__) | ||
|
||
HEARTBEAT_INTERVAL = 60 | ||
|
||
|
||
def default_task_func(a_param): | ||
"""Default task function. | ||
|
@@ -21,16 +19,6 @@ def default_task_func(a_param): | |
""" | ||
|
||
|
||
def default_heartbeat(): | ||
"""Runs once a second from the main thread. | ||
|
||
Note: If this raises an exception, it could kill the process or put it in a | ||
weird state. | ||
|
||
""" | ||
LOGGER.info("THUMP") | ||
|
||
|
||
def default_iterator(): | ||
"""Default iterator for tasks. | ||
|
||
|
@@ -76,7 +64,6 @@ def __init__( | |
idle_delay=7, | ||
quit_on_empty_queue=False, | ||
job_source_iterator=default_iterator, | ||
heartbeat_func=default_heartbeat, | ||
task_func=default_task_func, | ||
): | ||
""" | ||
|
@@ -88,14 +75,12 @@ def __init__( | |
instantiated with a config object can be iterated. The iterator must | ||
yield a tuple consisting of a function's tuple of args and, optionally, | ||
a mapping of kwargs. Ex: (('a', 17), {'x': 23}) | ||
:arg heartbeat_func: a function to run every second | ||
:arg task_func: a function that will accept the args and kwargs yielded | ||
by the job_source_iterator | ||
""" | ||
self.idle_delay = idle_delay | ||
self.quit_on_empty_queue = quit_on_empty_queue | ||
self.job_source_iterator = job_source_iterator | ||
self.heartbeat_func = heartbeat_func | ||
self.task_func = task_func | ||
|
||
self._pid = os.getpid() | ||
|
@@ -109,7 +94,7 @@ def _get_iterator(self): | |
job_source_iterator can be one of a few things: | ||
|
||
* a class that can be instantiated and iterated over | ||
* a function that returns an interator | ||
* a function that returns an iterator | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I set up pre-commit and it's now fixing typos. |
||
* an actual iterator/generator | ||
* an iterable collection | ||
|
||
|
@@ -124,15 +109,15 @@ def _get_iterator(self): | |
def _responsive_sleep(self, seconds, wait_log_interval=0, wait_reason=""): | ||
"""Responsive sleep that checks for quit flag | ||
|
||
When there is litte work to do, the queuing thread sleeps a lot. It can't sleep | ||
When there is little work to do, the queuing thread sleeps a lot. It can't sleep | ||
for too long without checking for the quit flag and/or logging about why it is | ||
sleeping. | ||
|
||
:arg seconds: the number of seconds to sleep | ||
:arg wait_log_interval: while sleeping, it is helpful if the thread | ||
periodically announces itself so that we know that it is still alive. | ||
This number is the time in seconds between log entries. | ||
:arg wait_reason: the is for the explaination of why the thread is | ||
:arg wait_reason: the is for the explanation of why the thread is | ||
sleeping. This is likely to be a message like: 'there is no work to do'. | ||
|
||
This was also partially motivated by old versions' of Python inability to | ||
|
@@ -146,14 +131,10 @@ def _responsive_sleep(self, seconds, wait_log_interval=0, wait_reason=""): | |
|
||
def blocking_start(self): | ||
"""This function starts the task manager running to do tasks.""" | ||
next_heartbeat = time.time() + HEARTBEAT_INTERVAL | ||
self.logger.debug("threadless start") | ||
try: | ||
# May never exhaust | ||
for job_params in self._get_iterator(): | ||
if time.time() > next_heartbeat: | ||
self.heartbeat_func() | ||
next_heartbeat = time.time() + HEARTBEAT_INTERVAL | ||
self.logger.debug("received %r", job_params) | ||
if job_params is None: | ||
if self.quit_on_empty_queue: | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -27,7 +27,6 @@ | |
|
||
from fillmore.libsentry import set_up_sentry | ||
from fillmore.scrubber import Scrubber, SCRUB_RULES_DEFAULT | ||
import psutil | ||
import sentry_sdk | ||
from sentry_sdk.integrations.atexit import AtexitIntegration | ||
from sentry_sdk.integrations.dedupe import DedupeIntegration | ||
|
@@ -270,73 +269,13 @@ def _set_up_task_manager(self): | |
manager_settings.update( | ||
{ | ||
"job_source_iterator": self.source_iterator, | ||
"heartbeat_func": self.heartbeat, | ||
"task_func": self.transform, | ||
} | ||
) | ||
self.task_manager = build_instance( | ||
class_path=manager_class, kwargs=manager_settings | ||
) | ||
|
||
def heartbeat(self): | ||
"""Runs once a second from the main thread. | ||
|
||
Note: If this raises an exception, it could kill the process or put it in a | ||
weird state. | ||
|
||
""" | ||
try: | ||
processes_by_type = {} | ||
processes_by_status = {} | ||
open_files = 0 | ||
for proc in psutil.process_iter(["cmdline", "status", "open_files"]): | ||
try: | ||
# NOTE(willkg): This is all intertwined with exactly how we run the | ||
# processor in a Docker container. If we ever make changes to that, this | ||
# will change, too. However, even if we never update this, seeing | ||
# "zombie" and "orphaned" as process statuses or seeing lots of | ||
# processes as a type will be really fishy and suggestive that evil is a | ||
# foot. | ||
cmdline = proc.cmdline() or ["unknown"] | ||
|
||
if cmdline[0] in ["/bin/sh", "/bin/bash"]: | ||
proc_type = "shell" | ||
elif cmdline[0] in ["python", "/usr/local/bin/python"]: | ||
proc_type = "python" | ||
elif "stackwalk" in cmdline[0]: | ||
proc_type = "stackwalker" | ||
else: | ||
proc_type = "other" | ||
|
||
open_files_count = len(proc.open_files()) | ||
proc_status = proc.status() | ||
|
||
except psutil.Error: | ||
# For any psutil error, we want to track that we saw a process, but | ||
# the details don't matter | ||
proc_type = "unknown" | ||
proc_status = "unknown" | ||
open_files_count = 0 | ||
|
||
processes_by_type[proc_type] = processes_by_type.get(proc_type, 0) + 1 | ||
processes_by_status[proc_status] = ( | ||
processes_by_status.get(proc_status, 0) + 1 | ||
) | ||
open_files += open_files_count | ||
|
||
METRICS.gauge("processor.open_files", open_files) | ||
for proc_type, val in processes_by_type.items(): | ||
METRICS.gauge( | ||
"processor.processes_by_type", val, tags=[f"proctype:{proc_type}"] | ||
) | ||
for status, val in processes_by_status.items(): | ||
METRICS.gauge( | ||
"processor.processes_by_status", val, tags=[f"procstatus:{status}"] | ||
) | ||
|
||
except Exception as exc: | ||
sentry_sdk.capture_exception(exc) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Buh-bye! One nice thing about removing this code is that when you have the processor running in a local dev environment, it's constantly producing output to the console even when it's just sitting around which was kind of annoying and now it won't be anymore. |
||
|
||
def close(self): | ||
"""Clean up the processor on shutdown.""" | ||
with suppress(AttributeError): | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Socorro still has this TaskManager and ThreadedTaskManager which is the last remnants for how socorro service processes were structured a long long time ago.
In this PR, I removed the heartbeat function from the processor app as well as the scaffolding for heartbeats for any app that uses the task managers figuring we don't need that anymore. (We can always re-add it if we do need it.)