Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[3006.x] Fix opt typo #64072

Merged
merged 6 commits into from
Apr 13, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changelog/63747.fixed.md
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add `fileserver_interval` and `maintenance_interval` master configuration options. These options control how often to restart the FileServerUpdate and Maintenance processes. Some file server and pillar configurations are known to cause memory leaks over time. A notable example of this are configurations that use pygit2. Salt can not guarantee dependency libraries like pygit2 won't leak memory. Restarting any long running processes that use pygit2 guarantees we can keep the master's memory usage in check.
29 changes: 28 additions & 1 deletion doc/ref/configuration/master.rst
Original file line number Diff line number Diff line change
Expand Up @@ -371,10 +371,24 @@ Set the default timeout for the salt command and api.

Default: ``60``

The loop_interval option controls the seconds for the master's maintenance
The loop_interval option controls the seconds for the master's Maintenance
process check cycle. This process updates file server backends, cleans the
job cache and executes the scheduler.

``maintenance_interval``
------------------------

.. versionadded:: 3006.0

Default: ``3600``

Defines how often to restart the master's Maintenance process.

.. code-block:: yaml

maintenance_interval: 9600


.. conf_master:: output

``output``
Expand Down Expand Up @@ -4077,6 +4091,19 @@ This option defines the update interval (in seconds) for s3fs.

s3fs_update_interval: 120

``fileserver_interval``
***********************

.. versionadded:: 3006.0

Default: ``3600``

Defines how often to restart the master's FilesServerUpdate process.

.. code-block:: yaml

fileserver_interval: 9600


.. _pillar-configuration-master:

Expand Down
9 changes: 9 additions & 0 deletions doc/ref/file_server/file_roots.rst
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,15 @@ individual environments can span across multiple directory roots
to create overlays and to allow for files to be organized in many flexible
ways.

Periodic Restarts
=================

The file server will restart periodically. The reason for this is to prevent any
files erver backends which may not properly handle resources from endlessly
consuming memory. A notable example of this is using a git backend with the
pygit2 library. How often the file server restarts can be controlled with the
``fileserver_interval`` in your master's config file.

Environments
============

Expand Down
6 changes: 6 additions & 0 deletions salt/config/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -987,6 +987,10 @@ def _gather_buffer_space():
"pass_gnupghome": str,
# pass renderer: Set PASSWORD_STORE_DIR env for Pass
"pass_dir": str,
# Maintenence process restart interval
"maintenance_interval": int,
# Fileserver process restart interval
"fileserver_interval": int,
}
)

Expand Down Expand Up @@ -1635,6 +1639,8 @@ def _gather_buffer_space():
"pass_gnupghome": "",
"pass_dir": "",
"netapi_enable_clients": [],
"maintenance_interval": 3600,
"fileserver_interval": 3600,
}
)

Expand Down
29 changes: 21 additions & 8 deletions salt/master.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,7 @@ def __init__(self, opts, **kwargs):
# Track key rotation intervals
self.rotate = int(time.time())
# A serializer for general maint operations
self.restart_interval = int(self.opts["maintenance_interval"])

def _post_fork_init(self):
"""
Expand Down Expand Up @@ -243,21 +244,28 @@ def run(self):
# init things that need to be done after the process is forked
self._post_fork_init()

# Make Start Times
last = int(time.time())
# Start of process for maintenance process restart interval
start = time.time()

# Unset last value will cause the interval items to run on the first
# loop iteration. This ensurs we always run them even if
# maintenance_interval happens to be less than loop_interval or
# git_update_interval
last = None

# update git_pillar on first loop
last_git_pillar_update = 0
now = int(time.time())

git_pillar_update_interval = self.opts.get("git_pillar_update_interval", 0)
old_present = set()
while True:
now = int(time.time())
while time.time() - start < self.restart_interval:
log.trace("Running maintenance routines")
if (now - last) >= self.loop_interval:
if not last or (now - last) >= self.loop_interval:
salt.daemons.masterapi.clean_old_jobs(self.opts)
salt.daemons.masterapi.clean_expired_tokens(self.opts)
salt.daemons.masterapi.clean_pub_auth(self.opts)
if (now - last_git_pillar_update) >= git_pillar_update_interval:
if not last or (now - last_git_pillar_update) >= git_pillar_update_interval:
last_git_pillar_update = now
self.handle_git_pillar()
self.handle_schedule()
Expand All @@ -266,6 +274,7 @@ def run(self):
self.handle_key_rotate(now)
salt.utils.verify.check_max_open_files(self.opts)
last = now
now = int(time.time())
time.sleep(self.loop_interval)

def handle_key_cache(self):
Expand Down Expand Up @@ -462,7 +471,7 @@ def _do_update(backends):
)

@classmethod
def update(cls, interval, backends, timeout=300):
def update(cls, interval, backends, timeout):
"""
Threading target which handles all updates for a given wait interval
"""
Expand Down Expand Up @@ -503,7 +512,11 @@ def run(self):
for interval in self.buckets:
self.update_threads[interval] = threading.Thread(
target=self.update,
args=(interval, self.buckets[interval]),
args=(
interval,
self.buckets[interval],
self.opts["fileserver_interval"],
),
)
self.update_threads[interval].start()

Expand Down
32 changes: 32 additions & 0 deletions tests/pytests/unit/test_master.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,39 @@ def encrypted_requests(tmp_path):
)


def test_maintenance_duration():
"""
Validate Maintenance process duration.
"""
opts = {
"loop_interval": 10,
"maintenance_interval": 1,
"cachedir": "/tmp",
"sock_dir": "/tmp",
"maintenance_niceness": 1,
"key_cache": "sched",
"conf_file": "",
"master_job_cache": "",
"pki_dir": "/tmp",
"eauth_tokens": "",
}
mp = salt.master.Maintenance(opts)
with patch("salt.utils.verify.check_max_open_files") as check_files, patch.object(
mp, "handle_key_cache"
) as handle_key_cache, patch("salt.daemons") as salt_daemons, patch.object(
mp, "handle_git_pillar"
) as handle_git_pillar:
mp.run()
assert salt_daemons.masterapi.clean_old_jobs.called
assert salt_daemons.masterapi.clean_expired_tokens.called
assert salt_daemons.masterapi.clean_pub_auth.called
assert handle_git_pillar.called


def test_fileserver_duration():
"""
Validate Fileserver process duration.
"""
with patch("salt.master.FileserverUpdate._do_update") as update:
start = time.time()
salt.master.FileserverUpdate.update(1, {}, 1)
Expand Down
12 changes: 7 additions & 5 deletions tests/unit/test_master.py
Original file line number Diff line number Diff line change
Expand Up @@ -681,7 +681,9 @@ class MaintenanceTestCase(TestCase, AdaptedConfigurationTestCaseMixin):
"""

def setUp(self):
opts = self.get_temp_config("master", git_pillar_update_interval=180)
opts = self.get_temp_config(
"master", git_pillar_update_interval=180, maintenance_interval=181
)
self.main_class = salt.master.Maintenance(opts)
self.main_class._after_fork_methods = self.main_class._finalize_methods = []

Expand Down Expand Up @@ -758,10 +760,10 @@ def __call__(self, *args, **kwargs):
self.assertEqual(str(exc), "Time passes")
self.assertEqual(mocked_time._calls, [60] * 4)
self.assertEqual(mocked__post_fork_init.call_times, [0])
self.assertEqual(mocked_clean_old_jobs.call_times, [60, 120, 180])
self.assertEqual(mocked_clean_expired_tokens.call_times, [60, 120, 180])
self.assertEqual(mocked_clean_pub_auth.call_times, [60, 120, 180])
self.assertEqual(mocked_handle_git_pillar.call_times, [0, 180])
self.assertEqual(mocked_clean_old_jobs.call_times, [0, 120, 180])
self.assertEqual(mocked_clean_expired_tokens.call_times, [0, 120, 180])
self.assertEqual(mocked_clean_pub_auth.call_times, [0, 120, 180])
self.assertEqual(mocked_handle_git_pillar.call_times, [0])
self.assertEqual(mocked_handle_schedule.call_times, [0, 60, 120, 180])
self.assertEqual(mocked_handle_key_cache.call_times, [0, 60, 120, 180])
self.assertEqual(mocked_handle_presence.call_times, [0, 60, 120, 180])
Expand Down