Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

✨ Personalized resources: Allow override of computational service needed resources (⚠️ devops) #3989

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
4315a96
add boot mode
sanderegg Mar 17, 2023
2eb559a
catalog changes
sanderegg Mar 17, 2023
07e1869
compute vram in dask-sidecar
sanderegg Mar 17, 2023
6a638f8
dask-sidecar compute VRAM on machine
sanderegg Mar 17, 2023
b0213e1
director-v2
sanderegg Mar 17, 2023
6fc5e0a
webserver
sanderegg Mar 17, 2023
3cbba49
removed boot mode from sidecar
sanderegg Mar 20, 2023
ea799c7
removed MPI test
sanderegg Mar 20, 2023
6fd4b4a
refactor
sanderegg Mar 20, 2023
8ccc3ff
improve coverage
sanderegg Mar 20, 2023
97a2c2c
refactor
sanderegg Mar 20, 2023
026140d
invert testing order for better logs
sanderegg Mar 20, 2023
a07b88f
types
sanderegg Mar 20, 2023
c95954b
check for expected ENVs
sanderegg Mar 20, 2023
3f68e3c
only test what is necessary
sanderegg Mar 20, 2023
be75e8d
fix get current task resources
sanderegg Mar 20, 2023
adb409d
fix test to check resources
sanderegg Mar 20, 2023
ffa6cf1
pylance
sanderegg Mar 20, 2023
f820c11
pylance
sanderegg Mar 20, 2023
8c145ad
fix parsing of isolve progress
sanderegg Mar 20, 2023
16898be
pass running mode to dask backend
sanderegg Mar 20, 2023
382f09d
reduce duplication
sanderegg Mar 20, 2023
890d3ab
better name
sanderegg Mar 20, 2023
be7f9c4
@mguidon review: misleading comment
sanderegg Mar 20, 2023
da9edf4
@mguidon review: add docs
sanderegg Mar 20, 2023
4fda234
fix resources testing
sanderegg Mar 20, 2023
ccfef5f
linter
sanderegg Mar 20, 2023
209fa99
fix tests
sanderegg Mar 20, 2023
ac084de
completely remove MPI
sanderegg Mar 20, 2023
670efe8
upgrade openapi
sanderegg Mar 20, 2023
1da94fa
remove all MPI
sanderegg Mar 20, 2023
15a9b0c
sonarcloud
sanderegg Mar 20, 2023
d991327
missing boot mode
sanderegg Mar 20, 2023
277e7dd
fix start computation test
sanderegg Mar 21, 2023
0965155
ensure an empty list is returned if the label does not exist
sanderegg Mar 21, 2023
b4304b6
added project-directory option
sanderegg Mar 21, 2023
3be0dbd
now calls docker-compose-config.bash script
sanderegg Mar 21, 2023
53afe47
typo
sanderegg Mar 21, 2023
52348da
fix public-api calls
sanderegg Mar 21, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
import logging
from typing import Any, Final, Union
from enum import auto
from typing import Any, Final, Optional, Union

from models_library.docker import DockerGenericTag
from models_library.utils.enums import StrAutoEnum
from pydantic import (
BaseModel,
ByteSize,
Expand Down Expand Up @@ -58,6 +60,12 @@ class Config:
ResourcesDict = dict[ResourceName, ResourceValue]


class BootMode(StrAutoEnum):
CPU = auto()
GPU = auto()
MPI = auto()


class ImageResources(BaseModel):
image: DockerGenericTag = Field(
...,
Expand All @@ -69,6 +77,10 @@ class ImageResources(BaseModel):
),
)
resources: ResourcesDict
boot_modes: list[BootMode] = Field(
default=[BootMode.CPU],
description="describe how a service shall be booted, using CPU, MPI, openMP or GPU",
)

class Config:
schema_extra = {
Expand Down Expand Up @@ -96,13 +108,17 @@ class ServiceResourcesDictHelpers:
def create_from_single_service(
image: DockerGenericTag,
resources: ResourcesDict,
boot_modes: Optional[list[BootMode]] = None,
) -> ServiceResourcesDict:
if boot_modes is None:
boot_modes = [BootMode.CPU]
return parse_obj_as(
ServiceResourcesDict,
{
DEFAULT_SINGLE_SERVICE_NAME: {
"image": image,
"resources": resources,
"boot_modes": boot_modes,
}
},
)
Expand All @@ -127,6 +143,7 @@ class Config:
"reservation": parse_obj_as(ByteSize, "2Gib"),
},
},
"boot_modes": [BootMode.CPU],
},
},
# service with a compose spec
Expand All @@ -137,6 +154,7 @@ class Config:
"CPU": {"limit": 0.3, "reservation": 0.3},
"RAM": {"limit": 53687091232, "reservation": 53687091232},
},
"boot_modes": [BootMode.CPU],
},
"s4l-core": {
"image": "simcore/services/dynamic/s4l-core-dy:3.0.0",
Expand All @@ -145,6 +163,7 @@ class Config:
"RAM": {"limit": 17179869184, "reservation": 536870912},
"VRAM": {"limit": 1, "reservation": 1},
},
"boot_modes": [BootMode.GPU],
},
"sym-server": {
"image": "simcore/services/dynamic/sym-server:3.0.0",
Expand All @@ -155,6 +174,7 @@ class Config:
"reservation": parse_obj_as(ByteSize, "2Gib"),
},
},
"boot_modes": [BootMode.CPU],
},
},
# compose spec with image outside the platform
Expand All @@ -168,6 +188,7 @@ class Config:
"reservation": parse_obj_as(ByteSize, "2Gib"),
},
},
"boot_modes": [BootMode.CPU],
},
"proxy": {
"image": "traefik:v2.6.6",
Expand All @@ -178,6 +199,7 @@ class Config:
"reservation": parse_obj_as(ByteSize, "2Gib"),
},
},
"boot_modes": [BootMode.CPU],
},
},
]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,7 @@ def env_file_for_testing(
@pytest.fixture(scope="module")
def simcore_docker_compose(
osparc_simcore_root_dir: Path,
osparc_simcore_scripts_dir: Path,
env_file_for_testing: Path,
temp_folder: Path,
) -> dict[str, Any]:
Expand All @@ -155,6 +156,7 @@ def simcore_docker_compose(

compose_specs = run_docker_compose_config(
project_dir=osparc_simcore_root_dir / "services",
scripts_dir=osparc_simcore_scripts_dir,
docker_compose_paths=docker_compose_paths,
env_file_path=env_file_for_testing,
destination_path=temp_folder / "simcore_docker_compose.yml",
Expand Down Expand Up @@ -205,6 +207,7 @@ def inject_filestash_config_path(
@pytest.fixture(scope="module")
def ops_docker_compose(
osparc_simcore_root_dir: Path,
osparc_simcore_scripts_dir: Path,
env_file_for_testing: Path,
temp_folder: Path,
inject_filestash_config_path: None,
Expand All @@ -224,6 +227,7 @@ def ops_docker_compose(

compose_specs = run_docker_compose_config(
project_dir=osparc_simcore_root_dir / "services",
scripts_dir=osparc_simcore_scripts_dir,
docker_compose_paths=docker_compose_path,
env_file_path=env_file_for_testing,
destination_path=temp_folder / "ops_docker_compose.yml",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ def get_service_published_port(

def run_docker_compose_config(
docker_compose_paths: Union[list[Path], Path],
scripts_dir: Path,
project_dir: Path,
env_file_path: Path,
destination_path: Optional[Path] = None,
Expand Down Expand Up @@ -131,26 +132,26 @@ def run_docker_compose_config(
# SEE https://docs.docker.com/compose/reference/

global_options = [
"--project-directory",
"-p",
str(project_dir), # Specify an alternate working directory
]
# https://docs.docker.com/compose/environment-variables/#using-the---env-file--option
global_options += [
"-e",
str(env_file_path), # Custom environment variables
]

# Specify an alternate compose files
# - When you use multiple Compose files, all paths in the files are relative to the first configuration file specified with -f.
# You can use the --project-directory option to override this base path.
for docker_compose_path in docker_compose_paths:
global_options += ["--file", os.path.relpath(docker_compose_path, project_dir)]

# https://docs.docker.com/compose/environment-variables/#using-the---env-file--option
global_options += [
"--env-file",
str(env_file_path), # Custom environment variables
]
global_options += [os.path.relpath(docker_compose_path, project_dir)]

# SEE https://docs.docker.com/compose/reference/config/
cmd_options = []
docker_compose_path = scripts_dir / "docker" / "docker-compose-config.bash"
assert docker_compose_path.exists()

cmd = ["docker-compose"] + global_options + ["config"] + cmd_options
cmd = [f"{docker_compose_path}"] + global_options
print(" ".join(cmd))

process = subprocess.run(
Expand Down
13 changes: 12 additions & 1 deletion scripts/docker/docker-compose-config.bash
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,16 @@ show_error() {


env_file=".env"
project_directory=""
# Parse command line arguments
while getopts ":e:" opt; do
while getopts ":e:p:" opt; do
case $opt in
e)
env_file="$OPTARG"
;;
p)
project_directory="$OPTARG"
;;
\?)
show_error "Invalid option: -$OPTARG"
exit 1
Expand Down Expand Up @@ -64,6 +68,10 @@ docker \
compose \
--env-file ${env_file}"

if [ "$project_directory" ]; then
docker_command+=" --project-directory ${project_directory}"
fi

for compose_file_path in "$@"
do
docker_command+=" --file=${compose_file_path}"
Expand Down Expand Up @@ -91,6 +99,9 @@ docker-compose \
do
docker_command+=" --file=${compose_file_path} "
done
if [ "$project_directory" ]; then
docker_command+=" --project-directory ${project_directory}"
fi
docker_command+=" \
config \
| sed --regexp-extended 's/cpus: ([0-9\\.]+)/cpus: \"\\1\"/'"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -145,7 +145,9 @@ async def create_job(

# -> director2: ComputationTaskOut = JobStatus
# consistency check
task: ComputationTaskGet = await director2_api.create_computation(job.id, user_id)
task: ComputationTaskGet = await director2_api.create_computation(
job.id, user_id, product_name
)
assert task.id == job.id # nosec

job_status: JobStatus = create_jobstatus_from_task(task)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -102,14 +102,18 @@ class DirectorV2Api(BaseServiceClientApi):
# ServiceUnabalabe: 503

async def create_computation(
self, project_id: UUID, user_id: PositiveInt
self,
project_id: UUID,
user_id: PositiveInt,
product_name: str,
) -> ComputationTaskGet:
resp = await self.client.post(
"/v2/computations",
json={
"user_id": user_id,
"project_id": str(project_id),
"start_pipeline": False,
"product_name": product_name,
},
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
)
from models_library.services import ServiceKey, ServiceVersion
from models_library.services_resources import (
BootMode,
ImageResources,
ResourcesDict,
ServiceResourcesDict,
Expand Down Expand Up @@ -41,6 +42,50 @@
logger = logging.getLogger(__name__)

SIMCORE_SERVICE_COMPOSE_SPEC_LABEL: Final[str] = "simcore.service.compose-spec"
_DEPRECATED_RESOURCES: Final[list[str]] = ["MPI"]
_BOOT_MODE_TO_RESOURCE_NAME_MAP: Final[dict[str, str]] = {"MPI": "MPI", "GPU": "VRAM"}


def _compute_service_available_boot_modes(
settings: list[SimcoreServiceSettingLabelEntry],
service_key: ServiceKey,
service_version: ServiceVersion,
) -> list[BootMode]:
"""returns the service boot-modes.
currently this uses the simcore.service.settings labels if available for backwards compatiblity.
if MPI is found, then boot mode is set to MPI, if GPU is found then boot mode is set to GPU, else to CPU.
sanderegg marked this conversation as resolved.
Show resolved Hide resolved
In the future a dedicated label might be used, to add openMP for example. and to not abuse the resources of a service.
Also these will be used in a project to allow the user to choose among different boot modes
"""

resource_entries = filter(lambda entry: entry.name.lower() == "resources", settings)
generic_resources = {}
for entry in resource_entries:
if not isinstance(entry.value, dict):
logger.warning(
"resource %s for %s got invalid type",
f"{entry.dict()!r}",
f"{service_key}:{service_version}",
)
continue
generic_resources |= parse_generic_resource(
entry.value.get("Reservations", {}).get("GenericResources", []),
)
# currently these are unique boot modes
for mode in BootMode:
if (
_BOOT_MODE_TO_RESOURCE_NAME_MAP.get(mode.value, mode.value)
in generic_resources
):
return [mode]

return [BootMode.CPU]


def _remove_deprecated_resources(resources: ResourcesDict) -> ResourcesDict:
for res_name in _DEPRECATED_RESOURCES:
resources.pop(res_name, None)
return resources


def _resources_from_settings(
Expand Down Expand Up @@ -82,7 +127,7 @@ def _resources_from_settings(
entry.value.get("Reservations", {}).get("GenericResources", []),
)

return service_resources
return _remove_deprecated_resources(service_resources)


async def _get_service_labels(
Expand Down Expand Up @@ -116,7 +161,7 @@ def _get_service_settings(
) -> list[SimcoreServiceSettingLabelEntry]:
service_settings = parse_raw_as(
list[SimcoreServiceSettingLabelEntry],
labels.get(SIMCORE_SERVICE_SETTINGS_LABELS, ""),
labels.get(SIMCORE_SERVICE_SETTINGS_LABELS, "[]"),
)
logger.debug("received %s", f"{service_settings=}")
return service_settings
Expand Down Expand Up @@ -162,6 +207,10 @@ async def get_service_resources(
service_resources = _resources_from_settings(
service_settings, default_service_resources, service_key, service_version
)
service_boot_modes = _compute_service_available_boot_modes(
service_settings, service_key, service_version
)

user_specific_service_specs = await services_repo.get_service_specifications(
service_key,
service_version,
Expand All @@ -174,7 +223,7 @@ async def get_service_resources(
)

return ServiceResourcesDictHelpers.create_from_single_service(
image_version, service_resources
image_version, service_resources, service_boot_modes
)

# compose specifications available, potentially multiple services
Expand All @@ -199,15 +248,19 @@ async def get_service_resources(
)

if not spec_service_labels:
spec_service_resources = default_service_resources
spec_service_resources: ResourcesDict = default_service_resources
service_boot_modes = [BootMode.CPU]
else:
spec_service_settings = _get_service_settings(spec_service_labels)
spec_service_resources = _resources_from_settings(
spec_service_resources: ResourcesDict = _resources_from_settings(
spec_service_settings,
default_service_resources,
service_key,
service_version,
)
service_boot_modes = _compute_service_available_boot_modes(
spec_service_settings, service_key, service_version
)
user_specific_service_specs = (
await services_repo.get_service_specifications(
key,
Expand All @@ -225,6 +278,7 @@ async def get_service_resources(
{
"image": image,
"resources": spec_service_resources,
"boot_modes": service_boot_modes,
}
)

Expand Down
Loading