Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Test that pbs driver ignores qstat flakiness #7414

Merged
merged 1 commit into from
Apr 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 58 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,16 @@
import fileinput
import json
import logging
import os
import pkgutil
import resource
import shutil
import stat
import sys
from argparse import ArgumentParser
from os.path import dirname
from pathlib import Path
from textwrap import dedent
from typing import TYPE_CHECKING, cast
from unittest.mock import MagicMock

Expand Down Expand Up @@ -440,3 +443,58 @@ def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)

monkeypatch.setattr("ert.cli.main.EvaluatorServerConfig", MockESConfig)


QSTAT_HEADER = (
"Job id Name User Time Use S Queue\n"
"----------------------------- --------------- --------------- -------- - ---------------\n"
)
QSTAT_HEADER_FORMAT = "%-30s %-15s %-15s %-8s %-1s %-5s"


@pytest.fixture
def create_mock_flaky_qstat(monkeypatch, tmp_path):
bin_path = tmp_path / "bin"
bin_path.mkdir()
monkeypatch.chdir(bin_path)
monkeypatch.setenv("PATH", f"{bin_path}:{os.environ['PATH']}")
yield _mock_flaky_qstat
berland marked this conversation as resolved.
Show resolved Hide resolved


def _mock_flaky_qstat(error_message_to_output: str):
qsub_path = Path("qsub")
qsub_path.write_text("#!/bin/sh\necho '1'")
qsub_path.chmod(qsub_path.stat().st_mode | stat.S_IEXEC)
qstat_path = Path("qstat")
qstat_path.write_text(
"#!/bin/sh"
+ dedent(
f"""
count=0
if [ -f counter_file ]; then
count=$(cat counter_file)
fi
echo "$((count+1))" > counter_file
if [ $count -ge 3 ]; then
json_flag_set=false;
while [ "$#" -gt 0 ]; do
case "$1" in
-Fjson)
json_flag_set=true
;;
esac
shift
done
if [ "$json_flag_set" = true ]; then
echo '{json.dumps({"Jobs": {"1": {"Job_Name": "1", "job_state": "E", "Exit_status": "0"}}})}'
else
echo "{QSTAT_HEADER}"; printf "{QSTAT_HEADER_FORMAT}" 1 foo someuser 0 E normal
fi
else
echo "{error_message_to_output}" >&2
exit 2
fi
"""
)
)
qstat_path.chmod(qstat_path.stat().st_mode | stat.S_IEXEC)
4 changes: 2 additions & 2 deletions tests/integration_tests/scheduler/bin/qstat.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@
from typing import Any, Dict, Optional

QSTAT_HEADER = (
"Job id Name User Time Use S Queue\n"
"---------------- ---------------- ---------------- -------- - -----\n"
"Job id Name User Time Use S Queue\n"
"----------------------------- --------------- --------------- -------- - ---------------\n"
)


Expand Down
30 changes: 30 additions & 0 deletions tests/integration_tests/scheduler/test_openpbs_driver.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import os
from functools import partial
from pathlib import Path

import pytest

Expand Down Expand Up @@ -27,6 +28,35 @@ def queue_name_config():
return ""


@pytest.mark.timeout(30)
@pytest.mark.integration_test
@pytest.mark.usefixtures("copy_poly_case")
@pytest.mark.parametrize(
"text_to_ignore",
[
"pbs_iff: cannot connect to host\npbs_iff: all reserved ports in use",
"qstat: Invalid credential",
],
)
def test_that_openpbs_driver_ignores_qstat_flakiness(
text_to_ignore, caplog, capsys, create_mock_flaky_qstat
):

create_mock_flaky_qstat(text_to_ignore)
with open("poly.ert", mode="a+", encoding="utf-8") as f:
f.write("QUEUE_SYSTEM TORQUE\nNUM_REALIZATIONS 1")
run_cli(
ENSEMBLE_EXPERIMENT_MODE,
"--enable-scheduler",
"poly.ert",
)
assert Path("counter_file").exists()
assert int(Path("counter_file").read_text(encoding="utf-8")) >= 3
assert text_to_ignore not in capsys.readouterr().out
assert text_to_ignore not in capsys.readouterr().err
assert text_to_ignore not in caplog.text
jonathan-eq marked this conversation as resolved.
Show resolved Hide resolved


async def mock_failure(message, *args, **kwargs):
raise RuntimeError(message)

Expand Down
49 changes: 37 additions & 12 deletions tests/unit_tests/scheduler/test_openpbs_driver.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import pytest
from hypothesis import given
from hypothesis import strategies as st
from tests.conftest import QSTAT_HEADER, QSTAT_HEADER_FORMAT
from tests.utils import poll

from ert.scheduler import OpenPBSDriver
Expand Down Expand Up @@ -200,25 +201,18 @@ async def test_cluster_label():
assert "-l foobar" in Path("captured_qsub_args").read_text(encoding="utf-8")


QSTAT_HEADER = (
"Job id Name User Time Use S Queue\n"
"----------------------------- --------------- --------------- -------- - ---------------\n"
)
formatter = "%-30s %-15s %-15s %-8s %-1s %-5s"


@pytest.mark.parametrize(
"qstat_script, started_expected",
[
pytest.param(
f"echo '{QSTAT_HEADER}';printf '{formatter}' 1 foo someuser 0 R normal; exit 0",
f"echo '{QSTAT_HEADER}';printf '{QSTAT_HEADER_FORMAT}' 1 foo someuser 0 R normal; exit 0",
True,
id="all-good",
),
pytest.param(
(
f"echo '{QSTAT_HEADER}'; "
f"printf '{formatter}' 1 foo someuser 0 R normal"
f"printf '{QSTAT_HEADER_FORMAT}' 1 foo someuser 0 R normal"
),
True,
id="all-good-properly-formatted",
Expand All @@ -234,19 +228,19 @@ async def test_cluster_label():
id="empty_cluster_specific_id",
),
pytest.param(
f"printf '{formatter}' 1 foo someuser 0 Z normal",
f"printf '{QSTAT_HEADER_FORMAT}' 1 foo someuser 0 Z normal",
False,
id="unknown_jobstate_token_from_pbs", # Never observed
),
pytest.param(
f"echo '{QSTAT_HEADER}'; printf '{formatter}' 1 foo someuser 0 R normal; "
f"echo '{QSTAT_HEADER}'; printf '{QSTAT_HEADER_FORMAT}' 1 foo someuser 0 R normal; "
"echo 'qstat: Unknown Job Id 2' >&2 ; exit 153",
# If we have some success and some failures, actual command returns 153
True,
id="error_for_irrelevant_job_id",
),
pytest.param(
f"echo '{QSTAT_HEADER}'; printf '{formatter}' 2 foo someuser 0 R normal",
f"echo '{QSTAT_HEADER}'; printf '{QSTAT_HEADER_FORMAT}' 2 foo someuser 0 R normal",
False,
id="wrong-job-id",
),
Expand Down Expand Up @@ -468,3 +462,34 @@ async def test_keep_qsub_output(
assert " -o /dev/null -e /dev/null" in Path("captured_qsub_args").read_text(
encoding="utf-8"
)


@pytest.mark.parametrize(
"text_to_ignore",
[
"pbs_iff: cannot connect to host\npbs_iff: all reserved ports in use",
"qstat: Invalid credential",
],
)
async def test_that_openpbs_driver_ignores_qstat_flakiness(
text_to_ignore: str, create_mock_flaky_qstat, caplog, capsys
):
create_mock_flaky_qstat(error_message_to_output=text_to_ignore)
driver = OpenPBSDriver()
await driver.submit(0, "sleep")

was_started = False

async def started(iens):
nonlocal was_started
if iens == 0:
was_started = True

with contextlib.suppress(TypeError):
await asyncio.wait_for(poll(driver, expected={0}, started=started), timeout=10)

assert Path("counter_file").exists()
assert int(Path("counter_file").read_text(encoding="utf-8")) >= 3
assert text_to_ignore not in capsys.readouterr().out
assert text_to_ignore not in capsys.readouterr().err
assert text_to_ignore not in caplog.text
Loading