Skip to content

Commit

Permalink
Remove deprecated slurm options MEMORY and MEMORY_PER_CPU
Browse files Browse the repository at this point in the history
  • Loading branch information
jonathan-eq committed Dec 11, 2024
1 parent c9d43bc commit dcdecd5
Show file tree
Hide file tree
Showing 13 changed files with 11 additions and 189 deletions.
4 changes: 2 additions & 2 deletions docs/ert/reference/configuration/keywords.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1829,8 +1829,8 @@ in :ref:`queue-system-chapter`. In brief, the queue systems have the following o
``NUM_CPUS_PER_NODE``, ``MEMORY_PER_JOB``, ``KEEP_QSUB_OUTPUT``, ``SUBMIT_SLEEP``,
``QUEUE_QUERY_TIMEOUT``
* :ref:`SLURM <slurm-systems>` — ``SBATCH``, ``SCANCEL``, ``SCONTROL``, ``SACCT``,
``SQUEUE``, ``PARTITION``, ``SQUEUE_TIMEOUT``, ``MAX_RUNTIME``, ``MEMORY``,
``MEMORY_PER_CPU``, ``INCLUDE_HOST``, ``EXCLUDE_HOST``, ``MAX_RUNNING``
``SQUEUE``, ``PARTITION``, ``SQUEUE_TIMEOUT``, ``MAX_RUNTIME``, ``INCLUDE_HOST``,
``EXCLUDE_HOST``, ``MAX_RUNNING``

In addition, some options apply to all queue systems:

Expand Down
14 changes: 0 additions & 14 deletions docs/ert/reference/configuration/queue.rst
Original file line number Diff line number Diff line change
Expand Up @@ -449,20 +449,6 @@ only the most necessary options have been added.

QUEUE_OPTION SLURM MAX_RUNTIME 100

.. _slurm_memory:
.. topic:: MEMORY

Memory (in MiB) required per node, for example::

QUEUE_OPTION SLURM MEMORY 16000

.. _slurm_memory_per_cpu:
.. topic:: MEMORY_PER_CPU

Memory (in MiB) required per allocated CPU, for example::

QUEUE_OPTION SLURM MEMORY_PER_CPU 4000

.. _slurm_include_host:
.. topic:: INCLUDE_HOST

Expand Down
12 changes: 0 additions & 12 deletions docs/everest/config_generated.rst
Original file line number Diff line number Diff line change
Expand Up @@ -920,18 +920,6 @@ Simulation settings
should be included in the slurm run


**max_memory (optional)**
Type: *Optional[str]*

Maximum memory usage for a slurm job.


**max_memory_cpu (optional)**
Type: *Optional[str]*

Maximum memory usage per cpu for a slurm job.


**max_runtime (optional)**
Type: *Optional[NonNegativeInt]*

Expand Down
12 changes: 0 additions & 12 deletions src/ert/config/parsing/config_schema_deprecations.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,18 +211,6 @@
"the future. Replace by REALIZATION_MEMORY.",
check=lambda line: "MEMORY_PER_JOB" in line,
),
DeprecationInfo(
keyword="QUEUE_OPTION",
message="MEMORY as QUEUE_OPTION to SLURM is deprecated and will be removed in "
"the future. Replace by REALIZATION_MEMORY.",
check=lambda line: "MEMORY" in line,
),
DeprecationInfo(
keyword="QUEUE_OPTION",
message="MEMORY_PER_CPU as QUEUE_OPTION to SLURM is deprecated and will be removed in "
"the future. Use REALIZATION_MEMORY instead.",
check=lambda line: "MEMORY_PER_CPU" in line,
),
DeprecationInfo(
keyword="QUEUE_OPTION",
message="Memory requirements in LSF should now be set using REALIZATION_MEMORY and not"
Expand Down
35 changes: 5 additions & 30 deletions src/ert/config/queue_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
import re
import shutil
from abc import abstractmethod
from collections.abc import Mapping
from dataclasses import asdict, field, fields
from typing import Annotated, Any, Literal, no_type_check

Expand Down Expand Up @@ -151,7 +150,7 @@ def driver_options(self) -> dict[str, Any]:
@pydantic.field_validator("memory_per_job")
@classmethod
def check_memory_per_job(cls, value: str | None) -> str | None:
if not queue_memory_usage_formats[QueueSystem.TORQUE].validate(value):
if not torque_memory_usage_format.validate(value):
raise ValueError("wrong memory format")
return value

Expand All @@ -166,8 +165,6 @@ class SlurmQueueOptions(QueueOptions):
squeue: NonEmptyString = "squeue"
exclude_host: str = ""
include_host: str = ""
memory: NonEmptyString | None = None
memory_per_cpu: NonEmptyString | None = None
partition: NonEmptyString | None = None # aka queue_name
squeue_timeout: pydantic.PositiveFloat = 2
max_runtime: pydantic.NonNegativeFloat | None = None
Expand All @@ -188,13 +185,6 @@ def driver_options(self) -> dict[str, Any]:
driver_dict.pop("submit_sleep")
return driver_dict

@pydantic.field_validator("memory", "memory_per_cpu")
@classmethod
def check_memory_per_job(cls, value: str | None) -> str | None:
if not queue_memory_usage_formats[QueueSystem.SLURM].validate(value):
raise ValueError("wrong memory format")
return value


@dataclass
class QueueMemoryStringFormat:
Expand All @@ -212,12 +202,10 @@ def validate(self, mem_str_format: str | None) -> bool:
)


queue_memory_usage_formats: Mapping[str, QueueMemoryStringFormat] = {
QueueSystem.SLURM: QueueMemoryStringFormat(suffixes=["", "K", "M", "G", "T"]),
QueueSystem.TORQUE: QueueMemoryStringFormat(
suffixes=["kb", "mb", "gb", "KB", "MB", "GB"]
),
}
torque_memory_usage_format: QueueMemoryStringFormat = QueueMemoryStringFormat(
suffixes=["kb", "mb", "gb", "KB", "MB", "GB"]
)

valid_options: dict[str, list[str]] = {
QueueSystem.LOCAL.name: [field.name.upper() for field in fields(LocalQueueOptions)],
QueueSystem.LSF.name: [field.name.upper() for field in fields(LsfQueueOptions)],
Expand Down Expand Up @@ -350,19 +338,6 @@ def from_dict(cls, config_dict: ConfigDict) -> QueueConfig:
"MEMORY_PER_JOB",
selected_queue_system == QueueSystem.TORQUE,
)
if isinstance(_queue_vals, SlurmQueueOptions) and realization_memory:
if _queue_vals.memory:
_throw_error_or_warning(
"Do not specify both REALIZATION_MEMORY and SLURM option MEMORY",
"MEMORY",
selected_queue_system == QueueSystem.SLURM,
)
if _queue_vals.memory_per_cpu:
_throw_error_or_warning(
"Do not specify both REALIZATION_MEMORY and SLURM option MEMORY_PER_CPU",
"MEMORY_PER_CPU",
selected_queue_system == QueueSystem.SLURM,
)

return QueueConfig(
job_script,
Expand Down
23 changes: 3 additions & 20 deletions src/ert/scheduler/slurm_driver.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,22 +67,17 @@ def __init__(
scancel_cmd: str = "scancel",
sbatch_cmd: str = "sbatch",
user: str | None = None,
memory: str | None = "",
realization_memory: int | None = 0,
queue_name: str | None = None,
memory_per_cpu: str | None = None,
max_runtime: float | None = None,
squeue_timeout: float = 2,
project_code: str | None = None,
activate_script: str = "",
) -> None:
"""
The arguments "memory" and "realization_memory" are currently both
present, where the latter has been added later. "realization_memory" is
a global keyword in Ert and not queue specific. It is always supplied
in bytes to the driver. "memory" is a string with a Slurm unit and is a
Slurm queue specific queue option. They are both supplied to sbatch
using "--mem" so they cannot both be defined at the same time.
The argument "realization_memory" is a global keyword in Ert
and not queue specific. It is always supplied in bytes to
the driver, and forwarded to sbatch using "--mem".
In slurm, --mem==0 requests all memory on a node. In Ert,
zero "realization memory" is the default and means no intended
Expand All @@ -93,16 +88,8 @@ def __init__(
self._iens2jobid: dict[int, str] = {}
self._jobs: dict[str, JobData] = {}
self._job_error_message_by_iens: dict[int, str] = {}
self._memory_per_cpu = memory_per_cpu
self._memory = memory
self._realization_memory = realization_memory

if self._realization_memory and self._memory:
raise ValueError(
"Overspecified memory, use either memory "
"or realization_memory, not both"
)

self._max_runtime = max_runtime
self._queue_name = queue_name

Expand Down Expand Up @@ -148,8 +135,6 @@ def _submit_cmd(
# In slurm, --mem==0 requests all memory on a node. In Ert,
# zero realization memory means no intended memory allocation.
sbatch_with_args.append(f"--mem={self._realization_memory // 1024**2}M")
if self._memory:
sbatch_with_args.append(f"--mem={self._memory}")
if self._include_hosts:
sbatch_with_args.append(f"--nodelist={self._include_hosts}")
if self._exclude_hosts:
Expand All @@ -158,8 +143,6 @@ def _submit_cmd(
sbatch_with_args.append(
f"--time={_seconds_to_slurm_time_format(self._max_runtime)}"
)
if self._memory_per_cpu:
sbatch_with_args.append(f"--mem-per-cpu={self._memory_per_cpu}")
if self._queue_name:
sbatch_with_args.append(f"--partition={self._queue_name}")
if self._project_code:
Expand Down
8 changes: 0 additions & 8 deletions src/everest/config/simulator_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,14 +43,6 @@ class SimulatorConfig(BaseModel, HasErtQueueOptions, extra="forbid"): # type: i
description="""Comma separated list of nodes that
should be included in the slurm run""",
)
max_memory: str | None = Field(
default=None,
description="Maximum memory usage for a slurm job.",
)
max_memory_cpu: str | None = Field(
default=None,
description="Maximum memory usage per cpu for a slurm job.",
)
max_runtime: NonNegativeInt | None = Field(
default=None,
description="""Maximum allowed running time of a forward model. When
Expand Down
2 changes: 0 additions & 2 deletions src/everest/config_keys.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,8 +66,6 @@ class ConfigKeys:
SLURM_SQUEUE = "squeue"
SLURM_SQUEUE_TIMEOUT = "squeue_timeout"
SLURM_MAX_RUNTIME = "slurm_timeout"
SLURM_MEMORY = "max_memory"
SLURM_MEMORY_PER_CPU = "max_memory_cpu"
SLURM_EXCLUDE_HOST_OPTION = "exclude_host"
SLURM_INCLUDE_HOST_OPTION = "include_host"
MAX = "max"
Expand Down
2 changes: 0 additions & 2 deletions src/everest/queue_driver/queue_driver.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,6 @@
(ConfigKeys.SLURM_SACCT, "SACCT"),
(ConfigKeys.SLURM_SQUEUE, "SQUEUE"),
(ConfigKeys.SLURM_MAX_RUNTIME, "MAX_RUNTIME"),
(ConfigKeys.SLURM_MEMORY, "MEMORY"),
(ConfigKeys.SLURM_MEMORY_PER_CPU, "MEMORY_PER_CPU"),
(ConfigKeys.SLURM_SQUEUE_TIMEOUT, "SQUEUE_TIMEOUT"),
(ConfigKeys.SLURM_EXCLUDE_HOST_OPTION, "EXCLUDE_HOST"),
(ConfigKeys.SLURM_INCLUDE_HOST_OPTION, "INCLUDE_HOST"),
Expand Down
7 changes: 0 additions & 7 deletions tests/ert/unit_tests/config/config_dict_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,18 +93,12 @@ def report_steps(draw):
small_floats = st.floats(min_value=1.0, max_value=10.0, allow_nan=False)
positives = st.integers(min_value=1, max_value=10000)
queue_systems = st.sampled_from(QueueSystem)
memory_unit_slurm = st.sampled_from(["", "K", "G", "M", "T"])
memory_unit_torque = st.sampled_from(["kb", "mb", "gb"])
memory_unit_lsf = st.sampled_from(
["", "KB", "K", "MB", "M", "GB", "G", "TB", "T", "PB", "P", "EB", "E", "ZB", "Z"]
)


@st.composite
def memory_with_unit_slurm(draw):
return f"{draw(positives)}{draw(memory_unit_slurm)}"


@st.composite
def memory_with_unit_torque(draw):
return f"{draw(positives)}{draw(memory_unit_torque)}"
Expand All @@ -116,7 +110,6 @@ def memory_with_unit_lsf(draw):


memory_with_unit = {
QueueSystemWithGeneric.SLURM: memory_with_unit_slurm,
QueueSystemWithGeneric.TORQUE: memory_with_unit_torque,
QueueSystemWithGeneric.LSF: memory_with_unit_lsf,
QueueSystemWithGeneric.LOCAL: memory_with_unit_lsf, # Just a dummy value
Expand Down
39 changes: 0 additions & 39 deletions tests/ert/unit_tests/config/test_queue_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,29 +154,6 @@ def test_invalid_realization_memory(invalid_memory_spec: str):
)


def test_conflicting_realization_slurm_memory():
with (
pytest.raises(ConfigValidationError),
pytest.warns(ConfigWarning, match="deprecated"),
):
ErtConfig.from_file_contents(
"NUM_REALIZATIONS 1\n"
"REALIZATION_MEMORY 10Mb\n"
"QUEUE_SYSTEM SLURM\n"
"QUEUE_OPTION SLURM MEMORY 20M\n"
)


def test_conflicting_realization_slurm_memory_per_cpu():
with pytest.raises(ConfigValidationError):
ErtConfig.from_file_contents(
"NUM_REALIZATIONS 1\n"
"REALIZATION_MEMORY 10Mb\n"
"QUEUE_SYSTEM SLURM\n"
"QUEUE_OPTION SLURM MEMORY_PER_CPU 20M\n"
)


def test_conflicting_realization_openpbs_memory_per_job():
with (
pytest.raises(ConfigValidationError),
Expand Down Expand Up @@ -294,22 +271,6 @@ def test_that_configuring_another_queue_system_gives_warning():
)


def test_that_slurm_queue_mem_options_are_validated():
with pytest.raises(ConfigValidationError) as e:
ErtConfig.from_file_contents(
"NUM_REALIZATIONS 1\n"
"QUEUE_SYSTEM SLURM\n"
"QUEUE_OPTION SLURM MEMORY_PER_CPU 5mb\n"
)

info = e.value.errors[0]

assert "Value error, wrong memory format. Got input '5mb'." in info.message
assert info.line == 3
assert info.column == 35
assert info.end_column == info.column + 3


@pytest.mark.parametrize(
"mem_per_job",
["5gb", "5mb", "5kb"],
Expand Down
40 changes: 1 addition & 39 deletions tests/ert/unit_tests/scheduler/test_slurm_driver.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,54 +94,16 @@ async def test_numcpu_sets_ntasks(num_cpu):
)


@pytest.mark.usefixtures("capturing_sbatch")
@given(memory_in_mb=st.integers(min_value=1))
async def test_memory_is_set(memory_in_mb):
driver = SlurmDriver(memory=f"{memory_in_mb}mb")
await driver.submit(0, "sleep", name="myjobname")
assert f"--mem={memory_in_mb}mb" in Path("captured_sbatch_args").read_text(
encoding="utf-8"
)


@pytest.mark.usefixtures("capturing_sbatch")
@given(memory_in_bytes=st.integers(min_value=1))
async def test_realization_memoryt(memory_in_bytes):
async def test_realization_memory(memory_in_bytes):
driver = SlurmDriver(realization_memory=memory_in_bytes)
await driver.submit(0, "sleep", name="myjobname")
assert f"--mem={memory_in_bytes // 1024**2}M" in Path(
"captured_sbatch_args"
).read_text(encoding="utf-8")


@pytest.mark.parametrize(
"memory_value, realization_memory_value, expectation",
[
# Emptry string for memory is equivalent to zero integer for realiation_memory
("", 1, does_not_raise()),
("1mb", 0, does_not_raise()),
("1mb", 1, pytest.raises(ValueError)),
("0", 1, pytest.raises(ValueError)),
],
)
async def test_overspecified_memory_allocation(
memory_value, realization_memory_value, expectation
):
# Emptry string for memory is equivalent to zero integer for realiation_memory
with expectation:
SlurmDriver(memory=memory_value, realization_memory=realization_memory_value)


@pytest.mark.usefixtures("capturing_sbatch")
@given(memory_per_cpu_in_mb=st.integers(min_value=1))
async def test_memory_per_cpu_is_set(memory_per_cpu_in_mb):
driver = SlurmDriver(memory_per_cpu=f"{memory_per_cpu_in_mb}mb")
await driver.submit(0, "sleep", name="myjobname")
assert f"--mem-per-cpu={memory_per_cpu_in_mb}mb" in Path(
"captured_sbatch_args"
).read_text(encoding="utf-8")


@pytest.mark.usefixtures("capturing_sbatch")
@given(exclude=st.text(st.characters(whitelist_categories=("Lu",)), min_size=1))
async def test_exclude_is_set(exclude):
Expand Down
2 changes: 0 additions & 2 deletions tests/everest/test_res_initialization.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,14 +53,12 @@
{
"queue_system": "slurm",
"name": "default-queue",
"max_memory": "1000M",
"exclude_host": "host1,host2,host3,host4",
"include_host": "host5,host6,host7,host8",
},
{
"exclude_hosts": "host1,host2,host3,host4",
"include_hosts": "host5,host6,host7,host8",
"memory": "1000M",
"queue_name": "default-queue",
"sacct_cmd": "sacct",
"sbatch_cmd": "sbatch",
Expand Down

0 comments on commit dcdecd5

Please sign in to comment.