Skip to content

Commit

Permalink
Remove crontab (#3369)
Browse files Browse the repository at this point in the history
* Remove crontab

PBENCH-1086

Use systemd to periodically run the pbench-index service instead of requiring
that we generate a crontab file and install components to use it.

We add the `sd_notify` service to the server shell to better synchronize with
`systemd` so that it knows our service isn't "running" once we've been exec-d,
but only when we tell it. I also added some `STATUS` updates, although these
don't matter much unless someone is watching with `systemctl status`. The
critical bit is that we issue `READY=1` when we want `systemd` to consider the
server "active". This triggers the new `pbench-index.timer`, which will begin
to run every minute (more or less as with `crontab`). The `sd_notify` delay
ensures that the server will have initialized PostgreSQL and Elasticsearch to
avoid any conflicts. This introduces the `Type = notify` and `NotifyAccess`
settings. (I chose `all` as the `NotifyAccess` because it appears that we're
now seeing notifications from `gunicorn` as well, and `all` allows `systemd`
to recogize `sd_notify` calls from children of the main service PID instead
of writing a warning into the journal.)

We use `User = pbench` system services rather than user services principally
because the latter would complicate the build and deployment without giving
any real benefits. Because `buildah` builds images without an `initd` pid 1,
the necessary `loginctl enable-linger pbench` and `systemctl --user` commands
aren't available during container build. While I experimented with deferring
these until we stand up the real container during deployment, that never felt
"clean".

Note that I removed the pidfile and kill configuration from the service file.
These are actually unnecessary unless the service is doing something unusual:
instead we simply tell `systemd` to use `SIGTERM` on the primary service PID.
(Though even that isn't strictly necessary as `SIGTERM` is the default.)
  • Loading branch information
dbutenhof authored Apr 5, 2023
1 parent c10bb8a commit 2914daf
Show file tree
Hide file tree
Showing 9 changed files with 72 additions and 210 deletions.
73 changes: 30 additions & 43 deletions lib/pbench/cli/server/shell.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import sys

from flask import Flask
import sdnotify

from pbench.common import wait_for_uri
from pbench.common.exceptions import BadConfig
Expand Down Expand Up @@ -40,47 +41,15 @@ def find_the_unicorn(logger: Logger):
)


def generate_crontab_if_necessary(
crontab_dir: str, bin_dir: Path, cwd: str, logger: Logger
) -> int:
"""Generate and install the crontab for the Pbench Server.
If a crontab file already exists, no action is taken, otherwise a crontab
file is created using `pbench-create-crontab` and then installed using the
`crontab` command.
If either of those operations fail, the crontab file is removed.
Return 0 on success, 1 on failure.
"""
ret_val = 0
crontab_f = Path(crontab_dir) / "crontab"
if not crontab_f.exists():
os.environ["PATH"] = ":".join([str(bin_dir), os.environ["PATH"]])
# Create the crontab file from the server configuration.
cp = subprocess.run(["pbench-create-crontab", crontab_dir], cwd=cwd)
if cp.returncode != 0:
logger.error(
"Failed to create crontab file from configuration: {}", cp.returncode
)
ret_val = 1
else:
# Install the created crontab file.
cp = subprocess.run(["crontab", f"{crontab_dir}/crontab"], cwd=cwd)
if cp.returncode != 0:
logger.error("Failed to install crontab file: {}", cp.returncode)
ret_val = 1
if ret_val != 0:
crontab_f.unlink(missing_ok=True)
return ret_val


def run_gunicorn(server_config: PbenchServerConfig, logger: Logger) -> int:
"""Setup of the Gunicorn Pbench Server Flask application.
Returns:
1 on error, or the gunicorn sub-process status code
"""
notifier = sdnotify.SystemdNotifier()

notifier.notify("STATUS=Identifying configuration")
if site.ENABLE_USER_SITE:
find_the_unicorn(logger)
try:
Expand All @@ -92,12 +61,14 @@ def run_gunicorn(server_config: PbenchServerConfig, logger: Logger) -> int:
es_wait_timeout = int(server_config.get("Indexing", "wait_timeout"))
workers = str(server_config.get("pbench-server", "workers"))
worker_timeout = str(server_config.get("pbench-server", "worker_timeout"))
crontab_dir = server_config.get("pbench-server", "crontab-dir")
server_config.get("flask-app", "secret-key")
except (NoOptionError, NoSectionError) as exc:
logger.error("Error fetching required configuration: {}", exc)
notifier.notify("STOPPING=1")
notifier.notify("STATUS=Unable to configure gunicorn")
return 1

notifier.notify("STATUS=Waiting for database")
logger.info(
"Waiting at most {:d} seconds for database instance {} to become available.",
db_wait_timeout,
Expand All @@ -107,11 +78,16 @@ def run_gunicorn(server_config: PbenchServerConfig, logger: Logger) -> int:
wait_for_uri(db_uri, db_wait_timeout)
except BadConfig as exc:
logger.error(f"{exc}")
notifier.notify("STOPPING=1")
notifier.notify(f"STATUS=Bad DB config {exc}")
return 1
except ConnectionRefusedError:
logger.error("Database {} not responding", db_uri)
notifier.notify("STOPPING=1")
notifier.notify("STATUS=DB not responding")
return 1

notifier.notify("STATUS=Waiting for Elasticsearch instance")
logger.info(
"Waiting at most {:d} seconds for the Elasticsearch instance {} to become available.",
es_wait_timeout,
Expand All @@ -121,48 +97,56 @@ def run_gunicorn(server_config: PbenchServerConfig, logger: Logger) -> int:
wait_for_uri(es_uri, es_wait_timeout)
except BadConfig as exc:
logger.error(f"{exc}")
notifier.notify("STOPPING=1")
notifier.notify(f"STATUS=Bad index config {exc}")
return 1
except ConnectionRefusedError:
logger.error("Elasticsearch {} not responding", es_uri)
logger.error("Index {} not responding", es_uri)
notifier.notify("STOPPING=1")
notifier.notify("STATUS=Index service not responding")
return 1

notifier.notify("STATUS=Initializing OIDC")
try:
oidc_server = OpenIDClient.wait_for_oidc_server(server_config, logger)
except OpenIDClient.NotConfigured as exc:
logger.warning("OpenID Connect client not configured, {}", exc)
notifier.notify("STOPPING=1")
notifier.notify("STATUS=OPENID broker not responding")
else:
logger.info("Pbench server using OIDC server {}", oidc_server)

# Multiple gunicorn workers will attempt to connect to the DB; rather than
# attempt to synchronize them, detect a missing DB (from the database URI)
# and create it here. It's safer to do this here, where we're
# single-threaded.
notifier.notify("STATUS=Initializing database")
logger.info("Performing database setup")
Database.create_if_missing(db_uri, logger)
try:
init_db(server_config, logger)
except (NoOptionError, NoSectionError) as exc:
logger.error("Invalid database configuration: {}", exc)
notifier.notify("STOPPING=1")
notifier.notify(f"STATUS=Error initializing database: {exc}")
return 1

# Multiple cron jobs will attempt to file reports with the Elasticsearch
# instance when they start and finish, causing them to all to try to
# initialize the templates in the Indexing sub-system. To avoid race
# conditions that can create stack traces, we initialize the indexing sub-
# system before we start the cron jobs.
notifier.notify("STATUS=Initializing Elasticsearch")
logger.info("Performing Elasticsearch indexing setup")
try:
init_indexing(PROG, server_config, logger)
except (NoOptionError, NoSectionError) as exc:
logger.error("Invalid indexing configuration: {}", exc)
notifier.notify("STOPPING=1")
notifier.notify(f"STATUS=Invalid indexing config {exc}")
return 1

logger.info("Generating new crontab file, if necessary")
ret_val = generate_crontab_if_necessary(
crontab_dir, server_config.BINDIR, server_config.log_dir, logger
)
if ret_val != 0:
return ret_val
notifier.notify("READY=1")

# Beginning of the gunicorn command to start the pbench-server.
cmd_line = [
Expand Down Expand Up @@ -198,8 +182,11 @@ def run_gunicorn(server_config: PbenchServerConfig, logger: Logger) -> int:

cmd_line.append("pbench.cli.server.shell:app()")
logger.info("Starting Gunicorn Pbench Server application")
notifier.notify("STATUS=Starting gunicorn")
cp = subprocess.run(cmd_line, cwd=server_config.log_dir)
logger.info("Gunicorn Pbench Server application exited with {}", cp.returncode)
notifier.notify(f"STATUS=Gunicorn terminated with {cp.returncode}")
notifier.notify("STOPPING=1")
return cp.returncode


Expand Down
151 changes: 4 additions & 147 deletions lib/pbench/test/unit/server/test_shell_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,120 +69,6 @@ def test_find_the_unicorn(monkeypatch, make_logger):
"/bin:ONE:TWO"
), f"Expected PATH to end with '/bin:ONE:TWO', found PATH == '{os.environ['PATH']}'"

@staticmethod
def test_generate_crontab_if_necessary_no_action(monkeypatch, make_logger):
"""Test site.generate_crontab_if_necessary with no action taken"""

called = {"run": False}

def run(args, cwd: Optional[str] = None) -> subprocess.CompletedProcess:
called["run"] = True

monkeypatch.setenv("PATH", "one:two")
# An existing crontab does nothing.
monkeypatch.setattr(Path, "exists", exists_true)
monkeypatch.setattr(subprocess, "run", run)

ret_val = shell.generate_crontab_if_necessary(
"/tmp", Path("bindir"), "cwd", make_logger
)

assert ret_val == 0
assert os.environ["PATH"] == "one:two", f"PATH='{os.environ['PATH']}'"
assert not called[
"run"
], "generate_crontab_if_necessary() took action unexpectedly"

@staticmethod
def test_generate_crontab_if_necessary_created(monkeypatch, make_logger):
"""Test site.generate_crontab_if_necessary creating the crontab"""

commands = []

def run_success(args, cwd: Optional[str] = None) -> subprocess.CompletedProcess:
commands.append(args)
return subprocess.CompletedProcess(args, 0)

monkeypatch.setenv("PATH", "a:b")
# We don't have an existing crontab
monkeypatch.setattr(Path, "exists", exists_false)
monkeypatch.setattr(subprocess, "run", run_success)

ret_val = shell.generate_crontab_if_necessary(
"/tmp", Path("bindir"), "cwd", make_logger
)

assert ret_val == 0
assert os.environ["PATH"] == "bindir:a:b", f"PATH='{os.environ['PATH']}'"
assert commands == [
["pbench-create-crontab", "/tmp"],
["crontab", "/tmp/crontab"],
]

@staticmethod
def test_generate_crontab_if_necessary_create_failed(monkeypatch, make_logger):
monkeypatch.setenv("PATH", "a:b")

unlink_record = []

def unlink(*args, **kwargs):
unlink_record.append("unlink")

monkeypatch.setattr(Path, "unlink", unlink)

commands = []

def run(args, cwd: Optional[str] = None) -> subprocess.CompletedProcess:
commands.append(args)
return subprocess.CompletedProcess(args, 1)

# We don't have an existing crontab
monkeypatch.setattr(Path, "exists", exists_false)
monkeypatch.setattr(subprocess, "run", run)

ret_val = shell.generate_crontab_if_necessary(
"/tmp", Path("bindir"), "cwd", make_logger
)

assert ret_val == 1
assert os.environ["PATH"] == "bindir:a:b", f"PATH='{os.environ['PATH']}'"
assert commands == [["pbench-create-crontab", "/tmp"]]
assert unlink_record == ["unlink"]

@staticmethod
def test_generate_crontab_if_necessary_crontab_failed(monkeypatch, make_logger):
monkeypatch.setenv("PATH", "a:b")

unlink_record = []

def unlink(*args, **kwargs):
unlink_record.append("unlink")

monkeypatch.setattr(Path, "unlink", unlink)

commands = []

def run(args, cwd: Optional[str] = None) -> subprocess.CompletedProcess:
commands.append(args)
ret_val = 1 if args[0] == "crontab" else 0
return subprocess.CompletedProcess(args, ret_val)

# We don't have an existing crontab
monkeypatch.setattr(Path, "exists", exists_false)
monkeypatch.setattr(subprocess, "run", run)

ret_val = shell.generate_crontab_if_necessary(
"/tmp", Path("bindir"), "cwd", make_logger
)

assert ret_val == 1
assert os.environ["PATH"] == "bindir:a:b", f"PATH='{os.environ['PATH']}'"
assert commands == [
["pbench-create-crontab", "/tmp"],
["crontab", "/tmp/crontab"],
]
assert unlink_record == ["unlink"]

@staticmethod
@pytest.mark.parametrize("user_site", [False, True])
@pytest.mark.parametrize("oidc_conf", [False, True])
Expand Down Expand Up @@ -234,16 +120,14 @@ def run(args, cwd: Optional[str] = None) -> subprocess.CompletedProcess:
"init_indexing",
]
assert called == expected_called
assert len(commands) == 3, f"{commands!r}"
assert commands[0][0] == "pbench-create-crontab"
assert commands[1][0] == "crontab"
gunicorn_command = commands[2]
assert len(commands) == 1, f"{commands!r}"
gunicorn_command = commands[0]
assert gunicorn_command[-1] == "pbench.cli.server.shell:app()", f"{commands!r}"
gunicorn_command = gunicorn_command[:-1]
if user_site:
assert (
gunicorn_command[-2:][0] == "--pythonpath"
), f"commands[2] = {commands[2]!r}"
), f"commands[0] = {commands[0]!r}"
gunicorn_command = gunicorn_command[:-2]
expected_command = [
"gunicorn",
Expand All @@ -261,31 +145,7 @@ def run(args, cwd: Optional[str] = None) -> subprocess.CompletedProcess:
]
assert (
gunicorn_command == expected_command
), f"expected_command = {expected_command!r}, commands[2] = {commands[2]!r}"

@staticmethod
def test_main_crontab_failed(monkeypatch, make_logger, mock_get_server_config):
def immediate_success(*args, **kwargs):
pass

def generate_crontab_if_necessary(*args, **kwargs) -> int:
return 43

monkeypatch.setattr(
shell.OpenIDClient,
"wait_for_oidc_server",
lambda config, logger: "https://oidc.example.com",
)
monkeypatch.setattr(shell.site, "ENABLE_USER_SITE", False)
monkeypatch.setattr(shell, "wait_for_uri", immediate_success)
monkeypatch.setattr(shell, "init_indexing", immediate_success)
monkeypatch.setattr(
shell, "generate_crontab_if_necessary", generate_crontab_if_necessary
)

ret_val = shell.main()

assert ret_val == 43
), f"expected_command = {expected_command!r}, commands[0] = {commands[0]!r}"

@staticmethod
@pytest.mark.parametrize(
Expand Down Expand Up @@ -457,7 +317,6 @@ def get_server_config() -> PbenchServerConfig:
"wait_timeout",
"workers",
"worker_timeout",
"crontab-dir",
],
)
def test_main_server_config_no_option(
Expand All @@ -469,8 +328,6 @@ def get_server_config() -> PbenchServerConfig:
section = (
"database"
if option in frozenset(("uri", "wait_timeout"))
else "DEFAULT"
if option == "crontab-dir"
else "pbench-server"
)
make_logger.error(config._conf[section][option])
Expand Down
1 change: 0 additions & 1 deletion server/lib/config/pbench-server-default.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ default-group = pbench
# We won't define it here by default to avoid unintended behaviors.
script-dir = %(install-dir)s/bin
lib-dir = %(install-dir)s/lib
crontab-dir = %(lib-dir)s/crontab
lock-dir = %(lib-dir)s/locks

###########################################################################
Expand Down
14 changes: 14 additions & 0 deletions server/lib/systemd/pbench-index.service
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
[Unit]
Description=Index Pbench Server datasets
Wants=pbench-index.timer

[Service]
Type = simple
User = pbench
Group = pbench
Environment = _PBENCH_SERVER_CONFIG=/opt/pbench-server/lib/config/pbench-server.cfg
ExecStart=-/opt/pbench-server/bin/pbench-index
KillSignal = TERM

[Install]
WantedBy=pbench-server.service
13 changes: 13 additions & 0 deletions server/lib/systemd/pbench-index.timer
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
[Unit]
Description=Pbench Server indexer timer
After=pbench-server.service
Requires=pbench-index.service

[Timer]
Unit=pbench-index.service
OnUnitActiveSec=240
OnCalendar=*-*-* *:*:15

[Install]
WantedBy=timers.target
BindsTo=pbench-server.service
Loading

0 comments on commit 2914daf

Please sign in to comment.