Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

♻️/✨use placement constraints instead of generic resources ⚠️ #5255

Merged
Show file tree
Hide file tree
Changes from 25 commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
b66e984
works with legacy services
Jan 19, 2024
f332fb1
updated specs
Jan 22, 2024
27e78ec
Merge remote-tracking branch 'upstream/master' into pr-osparc-generic…
Jan 22, 2024
d3360cc
added tests
Jan 22, 2024
3a4d373
remove commented
Jan 22, 2024
8ee27e7
Merge remote-tracking branch 'upstream/master' into pr-osparc-generic…
Jan 22, 2024
71b8524
fixed unit tests
Jan 22, 2024
4d928fa
fixed broken test
Jan 22, 2024
1c512a7
adding missing
Jan 22, 2024
1a34861
fixed checking
Jan 22, 2024
7603d77
refactor validation
Jan 22, 2024
435b093
rename
Jan 24, 2024
61e5295
rename director-v0
Jan 24, 2024
9d694ca
refactor to always replace and warn when doing so
Jan 24, 2024
a8eb68f
refactor director-v0
Jan 24, 2024
3b63c1a
refactor with new logic
Jan 24, 2024
33350c0
Merge remote-tracking branch 'upstream/master' into pr-osparc-generic…
Jan 24, 2024
910478d
refactor
Jan 24, 2024
f1d31ee
refactor
Jan 24, 2024
96be798
fix typing
Jan 24, 2024
35f972d
no constructors for private attributes
Jan 24, 2024
398d0a3
using new method to pull in constraints
Jan 24, 2024
c29066d
fixed constraints
Jan 24, 2024
0bbe82e
Merge remote-tracking branch 'upstream/master' into pr-osparc-generic…
Jan 24, 2024
f32f1a9
using setter and getter
Jan 24, 2024
69219e5
renaming and moving
Jan 24, 2024
fb99b98
add uniqueness check
Jan 24, 2024
956140a
adding unique dict values
Jan 24, 2024
b54eaf4
refactor
Jan 24, 2024
c506467
rename
Jan 24, 2024
2ca14cc
fix uniqueness check
Jan 24, 2024
90bd772
using old value
Jan 24, 2024
c1cc32e
fix import
Jan 24, 2024
b3cfd3a
using correct type
Jan 24, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .env-devel
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ DASK_SCHEDULER_PORT=8786

DIRECTOR_REGISTRY_CACHING_TTL=900
DIRECTOR_REGISTRY_CACHING=True
DIRECTOR_GENERIC_RESOURCE_PLACEMENT_CONSTRAINTS_SUBSTITUTIONS='{}'

COMPUTATIONAL_BACKEND_DEFAULT_CLUSTER_URL=tcp://dask-scheduler:8786
COMPUTATIONAL_BACKEND_DEFAULT_CLUSTER_FILE_LINK_TYPE=S3
Expand Down Expand Up @@ -202,6 +203,7 @@ DIRECTOR_V2_HOST=director-v2
DIRECTOR_V2_PORT=8000
DIRECTOR_V2_DYNAMIC_SCHEDULER_IGNORE_SERVICES_SHUTDOWN_WHEN_CREDITS_LIMIT_REACHED=1
DIRECTOR_V2_SERVICES_CUSTOM_CONSTRAINTS=[]
DIRECTOR_V2_GENERIC_RESOURCE_PLACEMENT_CONSTRAINTS_SUBSTITUTIONS='{}'
DYNAMIC_SIDECAR_ENABLE_VOLUME_LIMITS=False

DYNAMIC_SCHEDULER_STOP_SERVICE_TIMEOUT=3600
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,18 @@ class SimcoreServiceSettingLabelEntry(BaseModel):
description="The value of the service setting (shall follow Docker REST API scheme for services",
)

def set_destination_containers(self, value: list[str]) -> None:
# NOTE: private attributes cannot be transformed into properties
# since it conflicts with pydantic's internals which treats them
# as fields
self._destination_containers = value

def get_destination_containers(self) -> list[str]:
# NOTE: private attributes cannot be transformed into properties
# since it conflicts with pydantic's internals which treats them
# as fields
return self._destination_containers

@validator("setting_type", pre=True)
@classmethod
def ensure_backwards_compatible_setting_type(cls, v):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -86,10 +86,7 @@ def test_service_settings():
# ensure private attribute assignment
for service_setting in simcore_settings_settings_label:
# pylint: disable=protected-access
service_setting._destination_containers = [ # noqa: SLF001
"random_value1",
"random_value2",
]
service_setting.set_destination_containers(["random_value1", "random_value2"])


@pytest.mark.parametrize("model_cls", [SimcoreServiceLabels])
Expand Down
2 changes: 2 additions & 0 deletions services/director-v2/.env-devel
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ DIRECTOR_V2_SELF_SIGNED_SSL_SECRET_ID=1234
DIRECTOR_V2_SELF_SIGNED_SSL_SECRET_NAME=1234
DIRECTOR_V2_SELF_SIGNED_SSL_FILENAME=filename

DIRECTOR_V2_GENERIC_RESOURCE_PLACEMENT_CONSTRAINTS_SUBSTITUTIONS='{}'

LOG_LEVEL=DEBUG

POSTGRES_ENDPOINT=${POSTGRES_ENDPOINT}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from .egress_proxy import EgressProxySettings
from .proxy import DynamicSidecarProxySettings
from .scheduler import DynamicServicesSchedulerSettings
from .sidecar import DynamicSidecarSettings
from .sidecar import DynamicSidecarSettings, PlacementSettings


class DynamicServicesSettings(BaseCustomSettings):
Expand All @@ -25,3 +25,7 @@ class DynamicServicesSettings(BaseCustomSettings):
DYNAMIC_SIDECAR_EGRESS_PROXY_SETTINGS: EgressProxySettings = Field(
auto_default_from_env=True
)

DYNAMIC_SIDECAR_PLACEMENT_SETTINGS: PlacementSettings = Field(
auto_default_from_env=True
)
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
import logging
import re
import warnings
from enum import Enum
from pathlib import Path
from typing import Final

from models_library.basic_types import BootModeEnum, PortInt
from pydantic import Field, NonNegativeInt, PositiveInt, validator
from pydantic import ConstrainedStr, Field, PositiveInt, validator
from settings_library.base import BaseCustomSettings
from settings_library.r_clone import RCloneSettings as SettingsLibraryRCloneSettings
from settings_library.utils_logging import MixinLoggingSettings
Expand All @@ -14,8 +15,6 @@

_logger = logging.getLogger(__name__)

_MINUTE: Final[NonNegativeInt] = 60


class VFSCacheMode(str, Enum):
__slots__ = ()
Expand Down Expand Up @@ -50,6 +49,46 @@ def enforce_r_clone_requirement(cls, v: int, values) -> PositiveInt:
return v


class PlacementConstraintStr(ConstrainedStr):
GitHK marked this conversation as resolved.
Show resolved Hide resolved
strip_whitespace = True
regex = re.compile(
r"^(?!-)(?![.])(?!.*--)(?!.*[.][.])[a-zA-Z0-9.-]*(?<!-)(?<![.])(!=|==)[a-zA-Z0-9_. -]*$"
)


class PlacementSettings(BaseCustomSettings):
# This is just a service placement constraint, see
# https://docs.docker.com/engine/swarm/services/#control-service-placement.
DIRECTOR_V2_SERVICES_CUSTOM_CONSTRAINTS: list[PlacementConstraintStr] = Field(
default_factory=list,
example='["node.labels.region==east", "one!=yes"]',
GitHK marked this conversation as resolved.
Show resolved Hide resolved
GitHK marked this conversation as resolved.
Show resolved Hide resolved
)

DIRECTOR_V2_GENERIC_RESOURCE_PLACEMENT_CONSTRAINTS_SUBSTITUTIONS: dict[
str, PlacementConstraintStr
] = Field(
default_factory=dict,
description=(
"Use placement constraints in place of generic resources, for details "
"see https://github.com/ITISFoundation/osparc-simcore/issues/5250 "
"When `None` (default), uses generic resources"
),
example='{"AIRAM": "node.labels.custom==true"}',
)

@validator("DIRECTOR_V2_GENERIC_RESOURCE_PLACEMENT_CONSTRAINTS_SUBSTITUTIONS")
@classmethod
def warn_if_any_values_provided(cls, value: dict) -> dict:
if len(value) > 0:
warnings.warn( # noqa: B028
"Generic resources will be replaced by the following "
f"placement constraints {value}. This is a workaround "
"for https://github.com/moby/swarmkit/pull/3162",
UserWarning,
)
return value


class DynamicSidecarSettings(BaseCustomSettings, MixinLoggingSettings):
DYNAMIC_SIDECAR_SC_BOOT_MODE: BootModeEnum = Field(
...,
Expand All @@ -73,6 +112,10 @@ class DynamicSidecarSettings(BaseCustomSettings, MixinLoggingSettings):

DYNAMIC_SIDECAR_R_CLONE_SETTINGS: RCloneSettings = Field(auto_default_from_env=True)

DYNAMIC_SIDECAR_PLACEMENT_SETTINGS: PlacementSettings = Field(
auto_default_from_env=True
)

#
# DEVELOPMENT ONLY config
#
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@


import datetime
import re
from functools import cached_property

from models_library.basic_types import (
Expand All @@ -19,15 +18,7 @@
ClusterAuthentication,
NoAuthentication,
)
from pydantic import (
AnyHttpUrl,
AnyUrl,
ConstrainedStr,
Field,
NonNegativeInt,
parse_obj_as,
validator,
)
from pydantic import AnyHttpUrl, AnyUrl, Field, NonNegativeInt, parse_obj_as, validator
from settings_library.base import BaseCustomSettings
from settings_library.catalog import CatalogSettings
from settings_library.docker_registry import RegistrySettings
Expand All @@ -50,13 +41,6 @@
from .dynamic_services_settings import DynamicServicesSettings


class PlacementConstraintStr(ConstrainedStr):
strip_whitespace = True
regex = re.compile(
r"^(?!-)(?![.])(?!.*--)(?!.*[.][.])[a-zA-Z0-9.-]*(?<!-)(?<![.])(!=|==)[a-zA-Z0-9_. -]*$"
)


class DirectorV0Settings(BaseCustomSettings):
DIRECTOR_V0_ENABLED: bool = True

Expand Down Expand Up @@ -225,13 +209,6 @@ class AppSettings(BaseCustomSettings, MixinLoggingSettings):
description="resource usage tracker service client's plugin",
)

# This is just a service placement constraint, see
# https://docs.docker.com/engine/swarm/services/#control-service-placement.
DIRECTOR_V2_SERVICES_CUSTOM_CONSTRAINTS: list[PlacementConstraintStr] = Field(
default_factory=list,
example='["node.labels.region==east", "one!=yes"]',
)

@validator("LOG_LEVEL", pre=True)
@classmethod
def _validate_loglevel(cls, value: str) -> str:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
MATCH_SERVICE_VERSION,
)

from ....core.dynamic_services_settings.sidecar import PlacementConstraintStr
from ....modules.director_v0 import DirectorV0Client
from ..errors import DynamicSidecarError

Expand Down Expand Up @@ -280,8 +281,7 @@ def _add_compose_destination_containers_to_settings_entries(
def _inject_destination_container(
item: SimcoreServiceSettingLabelEntry,
) -> SimcoreServiceSettingLabelEntry:
# pylint: disable=protected-access
item._destination_containers = destination_containers
item.set_destination_containers(destination_containers)
return item

return [_inject_destination_container(x) for x in settings]
Expand All @@ -290,6 +290,8 @@ def _inject_destination_container(
def _merge_resources_in_settings(
settings: deque[SimcoreServiceSettingLabelEntry],
service_resources: ServiceResourcesDict,
*,
placement_substitutions: dict[str, PlacementConstraintStr],
) -> deque[SimcoreServiceSettingLabelEntry]:
"""All oSPARC services which have defined resource requirements will be added"""
log.debug("MERGING\n%s\nAND\n%s", f"{settings=}", f"{service_resources}")
Expand Down Expand Up @@ -338,6 +340,9 @@ def _merge_resources_in_settings(
"MemoryBytes"
] += resource_value.reservation
else: # generic resources
if resource_name in placement_substitutions:
# NOTE: placement constraint will be used in favour of this generic resource
continue
generic_resource = {
"DiscreteResourceSpec": {
"Kind": resource_name,
Expand Down Expand Up @@ -382,8 +387,7 @@ def _format_env_var(env_var: str, destination_container: list[str]) -> str:
# process entry
list_of_env_vars = entry.value if entry.value else []

# pylint: disable=protected-access
destination_containers: list[str] = entry._destination_containers
destination_containers: list[str] = entry.get_destination_containers()

# transforms settings defined environment variables
# from `ENV_VAR=PAYLOAD`
Expand Down Expand Up @@ -459,10 +463,12 @@ async def get_labels_for_involved_services(

async def merge_settings_before_use(
director_v0_client: DirectorV0Client,
*,
service_key: str,
service_tag: str,
service_user_selection_boot_options: dict[EnvVarKey, str],
service_resources: ServiceResourcesDict,
placement_substitutions: dict[str, PlacementConstraintStr],
) -> SimcoreServiceSettingsLabel:
labels_for_involved_services = await get_labels_for_involved_services(
director_v0_client=director_v0_client,
Expand Down Expand Up @@ -501,7 +507,9 @@ async def merge_settings_before_use(
)
)

settings = _merge_resources_in_settings(settings, service_resources)
settings = _merge_resources_in_settings(
settings, service_resources, placement_substitutions=placement_substitutions
)
settings = _patch_target_service_into_env_vars(settings)

return SimcoreServiceSettingsLabel.parse_obj(settings)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,11 @@
from ....core.dynamic_services_settings.scheduler import (
DynamicServicesSchedulerSettings,
)
from ....core.dynamic_services_settings.sidecar import DynamicSidecarSettings
from ....core.settings import AppSettings, PlacementConstraintStr
from ....core.dynamic_services_settings.sidecar import (
DynamicSidecarSettings,
PlacementConstraintStr,
)
from ....core.settings import AppSettings
from ....models.dynamic_services_scheduler import SchedulerData
from .._namespace import get_compose_namespace
from ..volumes import DynamicSidecarVolumesPathsResolver
Expand Down Expand Up @@ -361,8 +364,11 @@ def get_dynamic_sidecar_spec(
| standard_simcore_docker_labels
)

placement_settings = (
app_settings.DYNAMIC_SERVICES.DYNAMIC_SIDECAR.DYNAMIC_SIDECAR_PLACEMENT_SETTINGS
)
placement_constraints = deepcopy(
app_settings.DIRECTOR_V2_SERVICES_CUSTOM_CONSTRAINTS
placement_settings.DIRECTOR_V2_SERVICES_CUSTOM_CONSTRAINTS
)
# if service has a pricing plan apply constraints for autoscaling
if hardware_info and len(hardware_info.aws_ec2_instances) == 1:
Expand All @@ -374,6 +380,16 @@ def get_dynamic_sidecar_spec(
)
)

placement_substitutions: dict[
str, PlacementConstraintStr
] = (
placement_settings.DIRECTOR_V2_GENERIC_RESOURCE_PLACEMENT_CONSTRAINTS_SUBSTITUTIONS
)
for image_resources in scheduler_data.service_resources.values():
for resource_name in image_resources.resources:
if resource_name in placement_substitutions:
placement_constraints.append(placement_substitutions[resource_name])

# -----------
create_service_params = {
"endpoint_spec": {"Ports": ports} if ports else {},
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,10 @@
from .....core.dynamic_services_settings.scheduler import (
DynamicServicesSchedulerSettings,
)
from .....core.dynamic_services_settings.sidecar import DynamicSidecarSettings
from .....core.dynamic_services_settings.sidecar import (
DynamicSidecarSettings,
PlacementSettings,
)
from .....models.dynamic_services_scheduler import (
DockerContainerInspect,
DockerStatus,
Expand Down Expand Up @@ -125,6 +128,10 @@ async def action(cls, app: FastAPI, scheduler_data: SchedulerData) -> None:
dynamic_services_scheduler_settings: DynamicServicesSchedulerSettings = (
app.state.settings.DYNAMIC_SERVICES.DYNAMIC_SCHEDULER
)
dynamic_services_placement_settings: PlacementSettings = (
app.state.settings.DYNAMIC_SERVICES.DYNAMIC_SIDECAR.DYNAMIC_SIDECAR_PLACEMENT_SETTINGS
)

# the dynamic-sidecar should merge all the settings, especially:
# resources and placement derived from all the images in
# the provided docker-compose spec
Expand Down Expand Up @@ -152,6 +159,7 @@ async def action(cls, app: FastAPI, scheduler_data: SchedulerData) -> None:
service_tag=scheduler_data.version,
service_user_selection_boot_options=boot_options,
service_resources=scheduler_data.service_resources,
placement_substitutions=dynamic_services_placement_settings.DIRECTOR_V2_GENERIC_RESOURCE_PLACEMENT_CONSTRAINTS_SUBSTITUTIONS,
)

groups_extra_properties = get_repository(app, GroupsExtraPropertiesRepository)
Expand Down
Loading
Loading