Skip to content

Commit

Permalink
test: add windows test that worker service restarts process (#451)
Browse files Browse the repository at this point in the history
Signed-off-by: Yutong Li <[email protected]>
  • Loading branch information
YutongLi291 authored Oct 28, 2024
1 parent 8e776e1 commit f95a6ce
Showing 1 changed file with 98 additions and 36 deletions.
134 changes: 98 additions & 36 deletions test/e2e/test_worker_status.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,31 +17,6 @@

@pytest.mark.parametrize("operating_system", [os.environ["OPERATING_SYSTEM"]], indirect=True)
class TestWorkerStatus:
def test_worker_lifecycle_status_is_expected(
self,
deadline_resources,
deadline_client: DeadlineClient,
function_worker: EC2InstanceWorker,
) -> None:
# Verifies that Worker Status returned by the GetWorker API is as expected when we start/stop workers

assert function_worker.worker_id is not None # To fix linter type mismatch

assert is_worker_started(
deadline_client=deadline_client,
farm_id=deadline_resources.farm.id,
fleet_id=deadline_resources.fleet.id,
worker_id=function_worker.worker_id,
)

function_worker.stop_worker_service()

assert is_worker_stopped(
deadline_client=deadline_client,
farm_id=deadline_resources.farm.id,
fleet_id=deadline_resources.fleet.id,
worker_id=function_worker.worker_id,
)

@pytest.mark.skipif(
os.environ["OPERATING_SYSTEM"] == "windows",
Expand All @@ -51,17 +26,17 @@ def test_linux_worker_restarts_process(
self,
deadline_resources,
deadline_client: DeadlineClient,
function_worker: EC2InstanceWorker,
class_worker: EC2InstanceWorker,
) -> None:
# Verifies that Linux Worker service restarts the process when we start/stop worker process

assert function_worker.worker_id is not None # This fixes linter type mismatch
assert class_worker.worker_id is not None # This fixes linter type mismatch

assert is_worker_started(
deadline_client=deadline_client,
farm_id=deadline_resources.farm.id,
fleet_id=deadline_resources.fleet.id,
worker_id=function_worker.worker_id,
worker_id=class_worker.worker_id,
)

# First check that the worker service is running
Expand All @@ -74,9 +49,7 @@ def test_linux_worker_restarts_process(
)
def check_service_is_active() -> None:
# The service should be active
service_check_result = function_worker.send_command(
"systemctl is-active deadline-worker"
)
service_check_result = class_worker.send_command("systemctl is-active deadline-worker")
assert (
service_check_result.exit_code == 0
), "Unable to check whether deadline-worker is active"
Expand All @@ -90,8 +63,8 @@ def check_service_is_active() -> None:
# Check that the worker process is running

def check_worker_processes_exist() -> None:
process_check_result = function_worker.send_command(
f"pgrep --count --full -u {function_worker.configuration.agent_user} deadline-worker-agent"
process_check_result = class_worker.send_command(
f"pgrep --count --full -u {class_worker.configuration.agent_user} deadline-worker-agent"
)

assert (
Expand All @@ -102,8 +75,8 @@ def check_worker_processes_exist() -> None:
time_that_worker_was_killed: datetime = datetime.now(timezone.utc)

# Kill the worker process
pkill_command_result = function_worker.send_command(
f"sudo pkill -9 --full -u {function_worker.configuration.agent_user} deadline-worker-agent"
pkill_command_result = class_worker.send_command(
f"sudo pkill -9 --full -u {class_worker.configuration.agent_user} deadline-worker-agent"
)
assert (
pkill_command_result.exit_code == 0
Expand All @@ -114,7 +87,7 @@ def check_worker_processes_exist() -> None:
check_service_is_active()

# Check that the service active time is strictly after when we killed the process, since it should have restarted after the kill
service_active_enter_timestamp_result = function_worker.send_command(
service_active_enter_timestamp_result = class_worker.send_command(
"systemctl show --property=ActiveEnterTimestamp deadline-worker"
)
assert service_active_enter_timestamp_result.exit_code == 0
Expand All @@ -130,3 +103,92 @@ def check_worker_processes_exist() -> None:

# Check that there are worker processes running
check_worker_processes_exist()

@pytest.mark.skipif(
os.environ["OPERATING_SYSTEM"] == "linux",
reason="Windows specific test",
)
def test_windows_worker_restarts_process(
self,
deadline_resources,
deadline_client: DeadlineClient,
class_worker: EC2InstanceWorker,
) -> None:
# Verifies that Windows Worker service restarts the process when we start/stop worker process

assert class_worker.worker_id is not None # This fixes linter type mismatch

assert is_worker_started(
deadline_client=deadline_client,
farm_id=deadline_resources.farm.id,
fleet_id=deadline_resources.fleet.id,
worker_id=class_worker.worker_id,
)

# First check that the worker service is running

@backoff.on_exception(
backoff.constant,
Exception,
max_time=30,
interval=2,
)
def check_service_is_running() -> None:
# The service should be running
service_check_result = class_worker.send_command(
'(Get-Service -Name "DeadlineWorker").Status'
)
assert (
service_check_result.exit_code == 0
), "Unable to check whether DeadlineWorker service is running"
assert (
"Running" in service_check_result.stdout
), f"DeadlineWorker service is in unexpected status {service_check_result.stdout}"

check_service_is_running()

# Check that the worker process is running

def check_worker_processes_exist() -> None:
process_check_result = class_worker.send_command("Get-Process pythonservice")

assert process_check_result.exit_code == 0, "Worker agent process is not running"

check_worker_processes_exist()
# Kill the worker process
pkill_command_result = class_worker.send_command("Stop-Process pythonservice")
assert (
pkill_command_result.exit_code == 0
), f"Failed to kill the worker agent process: {pkill_command_result}"

# Wait for the process to be restarted by the service

check_service_is_running()

check_worker_processes_exist()

def test_worker_lifecycle_status_is_expected(
self,
deadline_resources,
deadline_client: DeadlineClient,
class_worker: EC2InstanceWorker,
) -> None:
# Verifies that Worker Status returned by the GetWorker API is as expected when we start/stop workers

assert class_worker.worker_id is not None # To fix linter type mismatch

assert is_worker_started(
deadline_client=deadline_client,
farm_id=deadline_resources.farm.id,
fleet_id=deadline_resources.fleet.id,
worker_id=class_worker.worker_id,
)

class_worker.stop_worker_service()

assert is_worker_stopped(
deadline_client=deadline_client,
farm_id=deadline_resources.farm.id,
fleet_id=deadline_resources.fleet.id,
worker_id=class_worker.worker_id,
)

0 comments on commit f95a6ce

Please sign in to comment.