Skip to content

Commit

Permalink
fix: don't invert shutdown_on_stop config file setting's meaning (#155)
Browse files Browse the repository at this point in the history
Problems:
1. The configuration file setting "shutdown_on_stop" inadvertently had
   its meaning inverted. If set to 'true' then it would *not* shutdown
   the host when requested; it's supposed to shutdown the host.
2. The log for choosing to not shutdown the host when the service
   suggests it was a debug level log.

Solutions:
1. Invert the use of 'shutdown_on_stop' internally so that the meaning
   of the option is as the plain language meaning says.
2. Rework the logging around host shutdown to be clearer regarding
   what's happening.

Signed-off-by: Daniel Neilson <[email protected]>
  • Loading branch information
ddneilson authored Feb 14, 2024
1 parent 04e84c4 commit 1a7329f
Show file tree
Hide file tree
Showing 5 changed files with 65 additions and 18 deletions.
6 changes: 4 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -95,14 +95,16 @@ module = [
]

[tool.ruff]
line-length = 100

[tool.ruff.lint]
ignore = [
"E501",
"E722",
"F811",
]
line-length = 100

[tool.ruff.isort]
[tool.ruff.lint.isort]
known-first-party = [
"deadline_worker_agent",
"deadline",
Expand Down
6 changes: 3 additions & 3 deletions src/deadline_worker_agent/startup/config_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,8 @@ def as_settings(
output_settings["worker_persistence_dir"] = self.worker.worker_persistence_dir
if self.aws.profile is not None:
output_settings["profile"] = self.aws.profile
if self.aws.allow_ec2_instance_profile is not None:
output_settings["allow_instance_profile"] = self.aws.allow_ec2_instance_profile
if self.logging.verbose is not None:
output_settings["verbose"] = self.logging.verbose
if self.logging.worker_logs_dir is not None:
Expand All @@ -129,13 +131,11 @@ def as_settings(
"host_metrics_logging_interval_seconds"
] = self.logging.host_metrics_logging_interval_seconds
if self.os.shutdown_on_stop is not None:
output_settings["no_shutdown"] = self.os.shutdown_on_stop
output_settings["no_shutdown"] = not self.os.shutdown_on_stop
if self.os.run_jobs_as_agent_user is not None:
output_settings["run_jobs_as_agent_user"] = self.os.run_jobs_as_agent_user
if self.os.posix_job_user is not None:
output_settings["posix_job_user"] = self.os.posix_job_user
if self.aws.allow_ec2_instance_profile is not None:
output_settings["allow_instance_profile"] = self.aws.allow_ec2_instance_profile
if self.capabilities is not None:
output_settings["capabilities"] = self.capabilities

Expand Down
13 changes: 6 additions & 7 deletions src/deadline_worker_agent/startup/entrypoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,7 @@ def filter(self, record: logging.LogRecord) -> bool:

# conditional shutdown
if shutdown_requested:
_logger.info("The service has requested that the host be shutdown")
_system_shutdown(config=config)
except ConfigurationError as e:
sys.stderr.write(f"ERROR: {e}{os.linesep}")
Expand All @@ -186,7 +187,11 @@ def filter(self, record: logging.LogRecord) -> bool:
def _system_shutdown(config: Configuration) -> None:
"""Shuts the system down"""

_logger.info("Shutting down the instance")
if config.no_shutdown:
_logger.info("NOT shutting down the host. Local configuration settings say not to.")
return

_logger.info("Shutting down the host")

shutdown_command: list[str]

Expand All @@ -195,12 +200,6 @@ def _system_shutdown(config: Configuration) -> None:
else:
shutdown_command = ["sudo", "shutdown", "now"]

if config.no_shutdown:
_logger.debug(
f"Skipping system shutdown. The following command would have been run: '{shutdown_command}'"
)
return

# flush all the logs before initiating the shutdown command.
for handler in _logger.handlers:
handler.flush()
Expand Down
49 changes: 48 additions & 1 deletion test/unit/startup/test_config_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from pathlib import Path
from deadline_worker_agent.startup.capabilities import Capabilities

from pydantic import ValidationError
from pydantic import ValidationError, BaseSettings
import pytest

try:
Expand Down Expand Up @@ -608,6 +608,7 @@ def when() -> ConfigFile:
verbose = true
worker_logs_dir = "/var/log/amazon/deadline"
local_session_logs = false
host_metrics_logging = true
host_metrics_logging_interval_seconds = 1
[os]
Expand Down Expand Up @@ -680,6 +681,7 @@ def test_config_load_full_toml(
assert config.logging.verbose is True
assert config.logging.worker_logs_dir == Path("/var/log/amazon/deadline")
assert config.logging.local_session_logs is False
assert config.logging.host_metrics_logging is True
assert config.logging.host_metrics_logging_interval_seconds == 1

assert config.os.run_jobs_as_agent_user is False
Expand Down Expand Up @@ -724,3 +726,48 @@ def test_config_load_toml_decode_error(
config_path_open.assert_called_once_with(mode="rb")
mock_load_toml.assert_called_once_with(config_path_fh_ctx)
mock_parse_obj.assert_not_called()

def test_config_full_toml_as_settings(
self,
) -> None:
# GIVEN
with tempfile.TemporaryDirectory() as tmpdir:
config_file = Path(tmpdir) / "config_file.toml"
with config_file.open("w", encoding="utf-8") as fh:
fh.write(FULL_CONFIG_FILE)
config = ConfigFile.load(config_file)

# WHEN
settings = config.as_settings(BaseSettings())

# THEN
expected = {
# worker
"cleanup_session_user_processes": True,
"farm_id": "farm-1f0ece77172c441ebe295491a51cf6d5",
"fleet_id": "fleet-c4a9481caa88404fa878a7fb98f8a4dd",
"worker_persistence_dir": Path("/my/worker/persistence"),
# aws
"profile": "my_aws_profile_name",
"allow_instance_profile": True,
# logging
"verbose": True,
"worker_logs_dir": Path("/var/log/amazon/deadline"),
"local_session_logs": False,
"host_metrics_logging": True,
"host_metrics_logging_interval_seconds": 1,
# os
"run_jobs_as_agent_user": False,
"posix_job_user": "user:group",
"no_shutdown": True, # opposite of 'shutdown_on_stop'
# capabilities
"capabilities": Capabilities(
amounts={"amount.slots": 20, "deadline:amount.pets": 99},
attributes={
"attr.groups": ["simulation", "maya", "nuke"],
"acmewidgetsco:attr.admins": ["bob", "alice"],
},
),
}

assert settings == expected
9 changes: 4 additions & 5 deletions test/unit/startup/test_entrypoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -456,7 +456,7 @@ def test_system_shutdown(
entrypoint_mod._system_shutdown(config=configuration)

# THEN
logger_info_mock.assert_any_call("Shutting down the instance")
logger_info_mock.assert_any_call("Shutting down the host")
subprocess_popen_mock.assert_called_once_with(
expected_command,
stdout=subprocess.PIPE,
Expand Down Expand Up @@ -501,7 +501,7 @@ def test_system_shutdown_failure(
entrypoint_mod._system_shutdown(config=configuration)

# THEN
logger_mock.info.assert_any_call("Shutting down the instance")
logger_mock.info.assert_any_call("Shutting down the host")
subprocess_popen_mock.assert_called_once_with(
expected_command,
stdout=subprocess.PIPE,
Expand Down Expand Up @@ -542,9 +542,8 @@ def test_no_shutdown_only_log(
entrypoint_mod._system_shutdown(config=configuration)

# THEN
logger_info_mock.assert_called_with("Shutting down the instance")
logger_debug_mock.assert_called_with(
f"Skipping system shutdown. The following command would have been run: '{expected_command}'"
logger_info_mock.assert_called_with(
"NOT shutting down the host. Local configuration settings say not to."
)
system_mock.assert_not_called()

Expand Down

0 comments on commit 1a7329f

Please sign in to comment.