-
Notifications
You must be signed in to change notification settings - Fork 7
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Issue/7694 executor cleanup #7765
Changes from 22 commits
0d980fd
ed30a8c
ca300eb
2578db9
996c9a5
0531ecb
368a2d3
77b38ba
9ccb204
4d633a8
f216ec4
b120b1a
750f0a3
400f1d7
dd1cea8
78063a9
7181c79
9470de1
44c2622
ef2851f
fcd7eb4
8170cc7
42f2455
25c95e0
32c98cd
8c58031
76ee7ac
2653273
2a715fd
c9239d9
5532ff4
33fd080
a35b38d
76a2842
1a6682a
74c2c50
79b04f7
ed14455
f2e10d0
9fd9db2
61c8aa2
f2954c9
895b619
fffd6a1
cf7a1c7
a0469d5
9d4f7f1
8d5963a
558cdde
b536897
92f4b90
142cfc0
08ee9e9
025a81b
060a005
e815992
2bedb4a
9b7c7b9
7cba0da
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -16,6 +16,7 @@ | |||||
Contact: [email protected] | ||||||
""" | ||||||
|
||||||
import abc | ||||||
import asyncio | ||||||
import collections | ||||||
import concurrent.futures | ||||||
|
@@ -29,8 +30,9 @@ | |||||
import socket | ||||||
import typing | ||||||
import uuid | ||||||
from abc import abstractmethod | ||||||
from asyncio import Future, transports | ||||||
from datetime import timedelta | ||||||
from typing import Optional, Sequence | ||||||
|
||||||
import inmanta.agent.cache | ||||||
import inmanta.agent.executor | ||||||
|
@@ -175,7 +177,7 @@ def __init__(self, name: str): | |||||
# Keeps track of when this client was active last | ||||||
self.last_used_at: datetime.datetime = datetime.datetime.now().astimezone() | ||||||
|
||||||
def get_idle_time(self) -> timedelta: | ||||||
def get_idle_time(self) -> datetime.timedelta: | ||||||
return datetime.datetime.now().astimezone() - self.last_used_at | ||||||
|
||||||
@typing.overload | ||||||
|
@@ -201,8 +203,8 @@ def has_outstanding_calls(self) -> bool: | |||||
return len(self.requests) > 0 | ||||||
|
||||||
def process_reply(self, frame: IPCReplyFrame) -> None: | ||||||
self.last_used_at = datetime.datetime.now().astimezone() | ||||||
super().process_reply(frame) | ||||||
self.last_used_at = datetime.datetime.now().astimezone() | ||||||
|
||||||
|
||||||
class StopCommand(inmanta.protocol.ipc_light.IPCMethod[ExecutorContext, None]): | ||||||
|
@@ -397,7 +399,21 @@ async def serve() -> None: | |||||
exit(0) | ||||||
|
||||||
|
||||||
class MPExecutor(executor.Executor): | ||||||
class PoolMember(abc.ABC): | ||||||
@abstractmethod | ||||||
def can_be_cleaned_up(self, retention_time: int) -> bool: | ||||||
pass | ||||||
|
||||||
@abstractmethod | ||||||
def last_used(self) -> datetime.datetime: | ||||||
pass | ||||||
|
||||||
@abstractmethod | ||||||
async def stop(self) -> None: | ||||||
pass | ||||||
|
||||||
|
||||||
class MPExecutor(executor.Executor, PoolMember): | ||||||
"""A Single Child Executor""" | ||||||
|
||||||
def __init__( | ||||||
|
@@ -435,7 +451,10 @@ def can_be_cleaned_up(self, retention_time: int) -> bool: | |||||
if self.connection.has_outstanding_calls(): | ||||||
return False | ||||||
|
||||||
return self.connection.get_idle_time() > timedelta(seconds=retention_time) | ||||||
return self.connection.get_idle_time() > datetime.timedelta(seconds=retention_time) | ||||||
|
||||||
def last_used(self) -> datetime.datetime: | ||||||
return self.connection.last_used_at | ||||||
|
||||||
async def force_stop(self, grace_time: float = inmanta.const.SHUTDOWN_GRACE_HARD) -> None: | ||||||
"""Stop by process close""" | ||||||
|
@@ -518,7 +537,24 @@ async def get_facts(self, resource: "inmanta.agent.executor.ResourceDetails") -> | |||||
return await self.connection.call(FactsCommand(resource)) | ||||||
|
||||||
|
||||||
class MPManager(executor.ExecutorManager[MPExecutor]): | ||||||
class PoolManager: | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. To be honest, I don't see the benefit of this class, or even the
Am I missing something? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Agreed, this is not useful right now, I'll remove it There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That's not entirely what I'm trying to say. I think the concept is very good, if it allows us to share behavior between both use cases. So when @wouterdb proposed it, I'm assuming that's what he had in mind? By removing it, we're back to square one, aren't we? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. |
||||||
def __init__(self, max_time: int, max_pool_size: int, min_pool_size: int): | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
pass | ||||||
|
||||||
@abstractmethod | ||||||
async def start(self) -> None: | ||||||
pass | ||||||
|
||||||
@abstractmethod | ||||||
async def stop(self) -> None: | ||||||
pass | ||||||
|
||||||
@abstractmethod | ||||||
def get_pool_members(self) -> Sequence[PoolMember]: | ||||||
pass | ||||||
|
||||||
|
||||||
class MPManager(executor.ExecutorManager[MPExecutor], PoolManager): | ||||||
""" | ||||||
This is the executor that provides the new behavior (ISO8+), | ||||||
where the agent forks executors in specific venvs to prevent code reloading. | ||||||
|
@@ -569,7 +605,9 @@ def __init__( | |||||
self.executor_retention_time = inmanta.agent.config.agent_executor_retention_time.get() | ||||||
self.max_executors_per_agent = inmanta.agent.config.agent_executor_cap.get() | ||||||
|
||||||
self.start_periodic_executor_cleanup() | ||||||
# We keep a reference to the periodic cleanup task to prevent it | ||||||
# from disappearing mid-execution https://docs.python.org/3.11/library/asyncio-task.html#creating-tasks | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. wow, nice! |
||||||
self.cleanup_job: Optional[asyncio.Task[None]] = None | ||||||
|
||||||
def __add_executor(self, theid: executor.ExecutorId, the_executor: MPExecutor) -> None: | ||||||
self.executor_map[theid] = the_executor | ||||||
|
@@ -588,6 +626,9 @@ def __remove_executor(self, the_executor: MPExecutor) -> None: | |||||
self.executor_map.pop(theid) | ||||||
self.agent_map[theid.agent_name].discard(theid) | ||||||
|
||||||
def get_pool_members(self) -> Sequence[MPExecutor]: | ||||||
return self.children | ||||||
|
||||||
@classmethod | ||||||
def init_once(cls) -> None: | ||||||
try: | ||||||
|
@@ -737,7 +778,12 @@ def _make_child(self, name: str, log_level: int, cli_log: bool) -> tuple[multipr | |||||
|
||||||
return p, parent_conn | ||||||
|
||||||
async def start(self) -> None: | ||||||
self.running = True | ||||||
self.cleanup_job = asyncio.create_task(self.cleanup_inactive_executors()) | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. in the join method, you could |
||||||
|
||||||
async def stop(self) -> None: | ||||||
self.running = False | ||||||
await asyncio.gather(*(child.stop() for child in self.children)) | ||||||
|
||||||
async def force_stop(self, grace_time: float) -> None: | ||||||
|
@@ -746,47 +792,52 @@ async def force_stop(self, grace_time: float) -> None: | |||||
async def join(self, thread_pool_finalizer: list[concurrent.futures.ThreadPoolExecutor], timeout: float) -> None: | ||||||
thread_pool_finalizer.append(self.thread_pool) | ||||||
await asyncio.gather(*(child.join(timeout) for child in self.children)) | ||||||
if self.cleanup_job: | ||||||
await self.cleanup_job | ||||||
|
||||||
async def stop_for_agent(self, agent_name: str) -> list[MPExecutor]: | ||||||
children_ids = self.agent_map[agent_name] | ||||||
children = [self.executor_map[child_id] for child_id in children_ids] | ||||||
await asyncio.gather(*(child.stop() for child in children)) | ||||||
return children | ||||||
|
||||||
def start_periodic_executor_cleanup(self) -> None: | ||||||
async def cleanup_inactive_executors() -> None: | ||||||
""" | ||||||
This task cleans up idle executors and reschedules itself | ||||||
""" | ||||||
async def cleanup_inactive_executors(self) -> None: | ||||||
""" | ||||||
This task periodically cleans up idle executors | ||||||
""" | ||||||
while self.running: | ||||||
cleanup_start = datetime.datetime.now().astimezone() | ||||||
|
||||||
now = datetime.datetime.now().astimezone() | ||||||
reschedule_interval: float = self.executor_retention_time | ||||||
for _executor in self.executor_map.values(): | ||||||
if _executor.can_be_cleaned_up(self.executor_retention_time): | ||||||
LOGGER.debug( | ||||||
"Stopping executor %s because it " | ||||||
"was inactive for %d s, which is longer then the retention time of %d s.", | ||||||
_executor.executor_id.identity(), | ||||||
_executor.connection.get_idle_time().total_seconds(), | ||||||
self.executor_retention_time, | ||||||
) | ||||||
async with self._locks.get(_executor.executor_id.identity()): | ||||||
sanderr marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
try: | ||||||
await _executor.stop() | ||||||
except Exception: | ||||||
LOGGER.debug( | ||||||
"Unexpected error during executor %s cleanup:", _executor.executor_id.identity(), exc_info=True | ||||||
) | ||||||
# Check that the executor can still be cleaned up by the time we have acquired the lock | ||||||
if _executor.can_be_cleaned_up(self.executor_retention_time): | ||||||
try: | ||||||
LOGGER.debug( | ||||||
"Stopping executor %s because it " | ||||||
"was inactive for %d s, which is longer then the retention time of %d s.", | ||||||
_executor.executor_id.identity(), | ||||||
_executor.connection.get_idle_time().total_seconds(), | ||||||
self.executor_retention_time, | ||||||
) | ||||||
await _executor.stop() | ||||||
except Exception: | ||||||
LOGGER.debug( | ||||||
"Unexpected error during executor %s cleanup:", | ||||||
_executor.executor_id.identity(), | ||||||
exc_info=True, | ||||||
) | ||||||
else: | ||||||
reschedule_interval = min( | ||||||
reschedule_interval, | ||||||
( | ||||||
now - _executor.connection.last_used_at + timedelta(seconds=self.executor_retention_time) | ||||||
datetime.timedelta(seconds=self.executor_retention_time) | ||||||
- (cleanup_start - _executor.connection.last_used_at) | ||||||
).total_seconds(), | ||||||
) | ||||||
|
||||||
LOGGER.debug(f"Scheduling next cleanup_inactive_executors in {reschedule_interval} s.") | ||||||
await asyncio.sleep(reschedule_interval) | ||||||
asyncio.create_task(cleanup_inactive_executors()) | ||||||
cleanup_end = datetime.datetime.now().astimezone() | ||||||
|
||||||
asyncio.ensure_future(cleanup_inactive_executors()) | ||||||
await asyncio.sleep(max(0.0, reschedule_interval - (cleanup_end - cleanup_start).total_seconds())) |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -86,8 +86,8 @@ def set_custom_executor_policy(): | |
inmanta.agent.config.agent_executor_cap.set("2") | ||
|
||
old_retention_value = inmanta.agent.config.agent_executor_retention_time.get() | ||
# Clean up executors after 2s of inactivity | ||
inmanta.agent.config.agent_executor_retention_time.set("2") | ||
# Clean up executors after 3s of inactivity | ||
inmanta.agent.config.agent_executor_retention_time.set("3") | ||
Comment on lines
+89
to
+90
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Increased this a bit because it was causing intempestive cleanup in the test case: the old executor is supposed to get cleaned up when the agent cap is reached, but with this setting too low, it would automatically get cleaned up before then. |
||
|
||
yield | ||
|
||
|
@@ -115,6 +115,8 @@ async def test_executor_server(set_custom_executor_policy, mpmanager: MPManager, | |
import lorem # noqa: F401 | ||
|
||
manager = mpmanager | ||
await manager.start() | ||
|
||
inmanta.config.Config.set("test", "aaa", "bbbb") | ||
|
||
# Simple empty venv | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -16,12 +16,14 @@ | |
Contact: [email protected] | ||
""" | ||
|
||
import functools | ||
import logging | ||
import os | ||
import random | ||
import socket | ||
|
||
import netifaces | ||
import pytest | ||
from tornado import netutil | ||
|
||
import inmanta.agent.config as cfg | ||
|
@@ -347,3 +349,18 @@ def test_option_is_list_empty(): | |
option: Option = Option("test", "list", "default,values", "documentation", cfg.is_list) | ||
option.set("") | ||
assert option.get() == [] | ||
|
||
|
||
def test_option_is_lower_bounded_int(): | ||
lower_bound = 1 | ||
option: Option = Option( | ||
"test", "lb_int", lower_bound, "documentation", functools.partial(cfg.is_lower_bounded_int, lower_bound) | ||
) | ||
option.set("2") | ||
assert option.get() == 2 | ||
|
||
option.set("0") | ||
with pytest.raises(ValueError) as exc_info: | ||
option.get() | ||
|
||
assert f"Value can not be lower than {lower_bound}" in str(exc_info.value) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nice! Could use a docstring though.