Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

✨ add EFS mount option to sidecar #5873

Merged
merged 44 commits into from
Jun 10, 2024
Merged
Show file tree
Hide file tree
Changes from 34 commits
Commits
Show all changes
44 commits
Select commit Hold shift + click to select a range
9702719
add EFS mount option to sidecar
matusdrobuliak66 May 27, 2024
21c495f
Merge branch 'master' into mount-efs-to-sidecar
matusdrobuliak66 May 27, 2024
99e9d87
add EFS mount option to sidecar
matusdrobuliak66 May 27, 2024
4109b7b
fix
matusdrobuliak66 May 27, 2024
1004f7b
fix efs volume driver config:
May 28, 2024
a4bdbe5
daily work
matusdrobuliak66 Jun 4, 2024
5b5f042
Merge branch 'master' into efs/add-create-directory-functionality
matusdrobuliak66 Jun 4, 2024
417d9d9
adding tests
matusdrobuliak66 Jun 5, 2024
3538485
adding tests
matusdrobuliak66 Jun 5, 2024
155108c
Merge branch 'master' into efs/add-create-directory-functionality
matusdrobuliak66 Jun 5, 2024
0590ddf
review @sanderegg
matusdrobuliak66 Jun 6, 2024
bf39b37
review @sanderegg
matusdrobuliak66 Jun 6, 2024
8fe75a2
Merge branch 'efs/add-create-directory-functionality' of github.com:m…
matusdrobuliak66 Jun 6, 2024
8a42167
Merge branch 'master' into efs/add-create-directory-functionality
matusdrobuliak66 Jun 6, 2024
2fa1f4a
fix
matusdrobuliak66 Jun 6, 2024
a11d65d
fix
matusdrobuliak66 Jun 6, 2024
46d0db0
Merge branch 'master' of github.com:ITISFoundation/osparc-simcore int…
matusdrobuliak66 Jun 6, 2024
1849a9b
Merge branch 'efs/add-create-directory-functionality' into mount-efs-…
matusdrobuliak66 Jun 6, 2024
16ad846
modifying settings
matusdrobuliak66 Jun 6, 2024
1ceab31
fix test
matusdrobuliak66 Jun 6, 2024
9bdde39
Merge branch 'efs/add-create-directory-functionality' into mount-efs-…
matusdrobuliak66 Jun 6, 2024
2bd963d
merge master
matusdrobuliak66 Jun 6, 2024
b470a93
Merge branch 'master' into mount-efs-to-sidecar
matusdrobuliak66 Jun 6, 2024
f204a1c
fix
matusdrobuliak66 Jun 6, 2024
6bd4b84
fix
matusdrobuliak66 Jun 6, 2024
22340aa
fix
matusdrobuliak66 Jun 6, 2024
2c33266
fix test
matusdrobuliak66 Jun 6, 2024
9f871fd
Merge branch 'master' into mount-efs-to-sidecar
matusdrobuliak66 Jun 6, 2024
bcd16ad
fix test
matusdrobuliak66 Jun 6, 2024
188c039
Merge branch 'mount-efs-to-sidecar' of github.com:matusdrobuliak66/os…
matusdrobuliak66 Jun 6, 2024
5dae025
fix import
matusdrobuliak66 Jun 6, 2024
61c8359
fix path
matusdrobuliak66 Jun 6, 2024
0d4d9a7
fix path
matusdrobuliak66 Jun 6, 2024
a7e564a
review @GitHK
matusdrobuliak66 Jun 6, 2024
f0ab1d9
review @GitHK
matusdrobuliak66 Jun 6, 2024
cc3f1b1
review @GitHK
matusdrobuliak66 Jun 6, 2024
37dd243
fix
matusdrobuliak66 Jun 6, 2024
d1baa6f
review @mrnicegyu11
matusdrobuliak66 Jun 6, 2024
218d906
revert back to list for now:
matusdrobuliak66 Jun 6, 2024
1e46207
remove all defaults review @mrnicegyu11
matusdrobuliak66 Jun 6, 2024
72a918d
add env
matusdrobuliak66 Jun 6, 2024
92d1ae3
fix
matusdrobuliak66 Jun 6, 2024
47e822d
Merge branch 'master' into mount-efs-to-sidecar
matusdrobuliak66 Jun 6, 2024
38f1428
Merge branch 'master' into mount-efs-to-sidecar
matusdrobuliak66 Jun 7, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .env-devel
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ DIRECTOR_REGISTRY_CACHING=True

EFS_DNS_NAME=fs-xxx.efs.us-east-1.amazonaws.com
EFS_MOUNTED_PATH=/tmp/efs
EFS_ENABLED_FOR_USERS=[]
matusdrobuliak66 marked this conversation as resolved.
Show resolved Hide resolved

# DIRECTOR_V2 ----
COMPUTATIONAL_BACKEND_DEFAULT_CLUSTER_AUTH='{"type":"tls","tls_ca_file":"/home/scu/.dask/dask-crt.pem","tls_client_cert":"/home/scu/.dask/dask-crt.pem","tls_client_key":"/home/scu/.dask/dask-key.pem"}'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ async def create_project_specific_data_dir(
*,
project_id: ProjectID,
node_id: NodeID,
storage_directory_name: str,
) -> Path:
output: Path = await rabbitmq_rpc_client.request(
EFS_GUARDIAN_RPC_NAMESPACE,
Expand Down
21 changes: 21 additions & 0 deletions packages/settings-library/src/settings_library/efs.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
from pathlib import Path

from pydantic import Field

from .base import BaseCustomSettings


class AwsEfsSettings(BaseCustomSettings):
EFS_DNS_NAME: str = Field(
mrnicegyu11 marked this conversation as resolved.
Show resolved Hide resolved
description="AWS Elastic File System DNS name",
example="fs-xxx.efs.us-east-1.amazonaws.com",
)
matusdrobuliak66 marked this conversation as resolved.
Show resolved Hide resolved
EFS_PROJECT_SPECIFIC_DATA_DIRECTORY: str = Field(default="project-specific-data")
EFS_MOUNTED_PATH: Path = Field(
default=Path("/data/efs"),
mrnicegyu11 marked this conversation as resolved.
Show resolved Hide resolved
description="This is the path where EFS is mounted to the EC2 machine",
)
EFS_ENABLED_FOR_USERS: list[int] = Field(
description="This is temporary solution so we can enable it for specific users for testing purpose",
matusdrobuliak66 marked this conversation as resolved.
Show resolved Hide resolved
example=[1],
)
matusdrobuliak66 marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
)
from pydantic import Field, PositiveInt, validator
from settings_library.base import BaseCustomSettings
from settings_library.efs import AwsEfsSettings
from settings_library.r_clone import RCloneSettings as SettingsLibraryRCloneSettings
from settings_library.utils_logging import MixinLoggingSettings
from settings_library.utils_service import DEFAULT_FASTAPI_PORT
Expand Down Expand Up @@ -124,6 +125,10 @@ class DynamicSidecarSettings(BaseCustomSettings, MixinLoggingSettings):

DYNAMIC_SIDECAR_R_CLONE_SETTINGS: RCloneSettings = Field(auto_default_from_env=True)

DYNAMIC_SIDECAR_EFS_SETTINGS: AwsEfsSettings | None = Field(
auto_default_from_env=True
matusdrobuliak66 marked this conversation as resolved.
Show resolved Hide resolved
)

DYNAMIC_SIDECAR_PLACEMENT_SETTINGS: PlacementSettings = Field(
auto_default_from_env=True
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
from models_library.service_settings_labels import SimcoreServiceSettingsLabel
from models_library.utils.json_serialization import json_dumps
from pydantic import ByteSize, parse_obj_as
from servicelib.rabbitmq import RabbitMQRPCClient
from servicelib.rabbitmq.rpc_interfaces.efs_guardian import efs_manager
from servicelib.utils import unused_port
from settings_library.node_ports import StorageAuthSettings

Expand Down Expand Up @@ -196,13 +198,14 @@ def get_prometheus_monitoring_networks(
)


def _get_mounts(
async def _get_mounts(
*,
scheduler_data: SchedulerData,
dynamic_sidecar_settings: DynamicSidecarSettings,
dynamic_services_scheduler_settings: DynamicServicesSchedulerSettings,
app_settings: AppSettings,
has_quota_support: bool,
rpc_client: RabbitMQRPCClient,
) -> list[dict[str, Any]]:
mounts: list[dict[str, Any]] = [
# docker socket needed to use the docker api
Expand Down Expand Up @@ -252,10 +255,41 @@ def _get_mounts(
volume_size_limit=volume_size_limits.get(f"{path_to_mount}"),
)
)

# We check whether user has access to EFS feature
use_efs = False
efs_settings = dynamic_sidecar_settings.DYNAMIC_SIDECAR_EFS_SETTINGS
if efs_settings and scheduler_data.user_id in efs_settings.EFS_ENABLED_FOR_USERS:
use_efs = True

# state paths now get mounted via different driver and are synced to s3 automatically
for path_to_mount in scheduler_data.paths_mapping.state_paths:
if use_efs:
assert dynamic_sidecar_settings.DYNAMIC_SIDECAR_EFS_SETTINGS # nosec

_storage_directory_name = DynamicSidecarVolumesPathsResolver.volume_name(
path_to_mount
).strip("_")
matusdrobuliak66 marked this conversation as resolved.
Show resolved Hide resolved
await efs_manager.create_project_specific_data_dir(
rpc_client,
project_id=scheduler_data.project_id,
node_id=scheduler_data.node_uuid,
storage_directory_name=_storage_directory_name,
)
mounts.append(
DynamicSidecarVolumesPathsResolver.mount_efs(
swarm_stack_name=dynamic_services_scheduler_settings.SWARM_STACK_NAME,
path=path_to_mount,
node_uuid=scheduler_data.node_uuid,
run_id=scheduler_data.run_id,
project_id=scheduler_data.project_id,
user_id=scheduler_data.user_id,
efs_settings=dynamic_sidecar_settings.DYNAMIC_SIDECAR_EFS_SETTINGS,
storage_directory_name=_storage_directory_name,
)
)
# for now only enable this with dev features enabled
if app_settings.DIRECTOR_V2_DEV_FEATURE_R_CLONE_MOUNTS_ENABLED:
elif app_settings.DIRECTOR_V2_DEV_FEATURE_R_CLONE_MOUNTS_ENABLED:
mounts.append(
DynamicSidecarVolumesPathsResolver.mount_r_clone(
swarm_stack_name=dynamic_services_scheduler_settings.SWARM_STACK_NAME,
Expand Down Expand Up @@ -351,7 +385,7 @@ def _get_ports(
return ports


def get_dynamic_sidecar_spec( # pylint:disable=too-many-arguments# noqa: PLR0913
async def get_dynamic_sidecar_spec( # pylint:disable=too-many-arguments# noqa: PLR0913
scheduler_data: SchedulerData,
dynamic_sidecar_settings: DynamicSidecarSettings,
dynamic_services_scheduler_settings: DynamicServicesSchedulerSettings,
Expand All @@ -364,6 +398,7 @@ def get_dynamic_sidecar_spec( # pylint:disable=too-many-arguments# noqa: PLR091
hardware_info: HardwareInfo | None,
metrics_collection_allowed: bool,
telemetry_enabled: bool,
rpc_client: RabbitMQRPCClient,
) -> AioDockerServiceSpec:
"""
The dynamic-sidecar is responsible for managing the lifecycle
Expand All @@ -375,12 +410,13 @@ def get_dynamic_sidecar_spec( # pylint:disable=too-many-arguments# noqa: PLR091
"""
compose_namespace = get_compose_namespace(scheduler_data.node_uuid)

mounts = _get_mounts(
mounts = await _get_mounts(
scheduler_data=scheduler_data,
dynamic_services_scheduler_settings=dynamic_services_scheduler_settings,
dynamic_sidecar_settings=dynamic_sidecar_settings,
app_settings=app_settings,
has_quota_support=has_quota_support,
rpc_client=rpc_client,
)

ports = _get_ports(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from models_library.service_settings_labels import SimcoreServiceSettingsLabel
from models_library.services import RunID
from models_library.utils.json_serialization import json_dumps
from servicelib.rabbitmq import RabbitMQClient
from servicelib.rabbitmq import RabbitMQClient, RabbitMQRPCClient
from simcore_postgres_database.models.comp_tasks import NodeClass

from .....core.dynamic_services_settings import DynamicServicesSettings
Expand Down Expand Up @@ -222,9 +222,11 @@ async def action(cls, app: FastAPI, scheduler_data: SchedulerData) -> None:
user_id=scheduler_data.user_id, product_name=scheduler_data.product_name
)

rpc_client: RabbitMQRPCClient = app.state.rabbitmq_rpc_client

# WARNING: do NOT log, this structure has secrets in the open
# If you want to log, please use an obfuscator
dynamic_sidecar_service_spec_base: AioDockerServiceSpec = get_dynamic_sidecar_spec(
dynamic_sidecar_service_spec_base: AioDockerServiceSpec = await get_dynamic_sidecar_spec(
scheduler_data=scheduler_data,
dynamic_sidecar_settings=dynamic_sidecar_settings,
dynamic_services_scheduler_settings=dynamic_services_scheduler_settings,
Expand All @@ -236,6 +238,7 @@ async def action(cls, app: FastAPI, scheduler_data: SchedulerData) -> None:
allow_internet_access=allow_internet_access,
metrics_collection_allowed=metrics_collection_allowed,
telemetry_enabled=is_telemetry_enabled,
rpc_client=rpc_client,
)

catalog_client = CatalogClient.instance(app)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from models_library.services import RunID
from models_library.users import UserID
from servicelib.docker_constants import PREFIX_DYNAMIC_SIDECAR_VOLUMES
from settings_library.efs import AwsEfsSettings
from settings_library.r_clone import S3Provider

from ...core.dynamic_services_settings.sidecar import RCloneSettings
Expand Down Expand Up @@ -75,6 +76,23 @@ def _get_s3_volume_driver_config(
return driver_config


def _get_efs_volume_driver_config(
efs_settings: AwsEfsSettings,
project_id: ProjectID,
node_uuid: NodeID,
storage_directory_name: str,
) -> dict[str, Any]:
assert "/" not in storage_directory_name # nosec
driver_config: dict[str, Any] = {
"Options": {
"type": "nfs",
"o": f"addr={efs_settings.EFS_DNS_NAME},rw,nfsvers=4.1,rsize=1048576,wsize=1048576,hard,timeo=600,retrans=2,noresvport",
matusdrobuliak66 marked this conversation as resolved.
Show resolved Hide resolved
"device": f":/{efs_settings.EFS_PROJECT_SPECIFIC_DATA_DIRECTORY}/{project_id}/{node_uuid}/{storage_directory_name}",
},
}
return driver_config


class DynamicSidecarVolumesPathsResolver:
BASE_PATH: Path = Path("/dy-volumes")

Expand All @@ -85,7 +103,7 @@ def target(cls, path: Path) -> str:
return f"{target_path}"

@classmethod
def _volume_name(cls, path: Path) -> str:
def volume_name(cls, path: Path) -> str:
return f"{path}".replace(os.sep, "_")
matusdrobuliak66 marked this conversation as resolved.
Show resolved Hide resolved

@classmethod
Expand All @@ -104,7 +122,7 @@ def source(cls, path: Path, node_uuid: NodeID, run_id: RunID) -> str:
# NOTE: issues can occur when the paths of the mounted outputs, inputs
# and state folders are very long and share the same subdirectory path.
# Reversing volume name to prevent these issues from happening.
reversed_volume_name = cls._volume_name(path)[::-1]
reversed_volume_name = cls.volume_name(path)[::-1]
unique_name = f"{PREFIX_DYNAMIC_SIDECAR_VOLUMES}_{run_id}_{node_uuid}_{reversed_volume_name}"
return unique_name[:255]

Expand Down Expand Up @@ -217,7 +235,41 @@ def mount_r_clone(
r_clone_settings=r_clone_settings,
project_id=project_id,
node_uuid=node_uuid,
storage_directory_name=cls._volume_name(path).strip("_"),
storage_directory_name=cls.volume_name(path).strip("_"),
),
},
}

@classmethod
def mount_efs(
cls,
swarm_stack_name: str,
path: Path,
node_uuid: NodeID,
run_id: RunID,
project_id: ProjectID,
user_id: UserID,
efs_settings: AwsEfsSettings,
storage_directory_name: str,
) -> dict[str, Any]:
return {
"Source": cls.source(path, node_uuid, run_id),
"Target": cls.target(path),
"Type": "volume",
"VolumeOptions": {
"Labels": {
"source": cls.source(path, node_uuid, run_id),
"run_id": f"{run_id}",
"node_uuid": f"{node_uuid}",
"study_id": f"{project_id}",
"user_id": f"{user_id}",
"swarm_stack_name": swarm_stack_name,
},
"DriverConfig": _get_efs_volume_driver_config(
efs_settings=efs_settings,
project_id=project_id,
node_uuid=node_uuid,
storage_directory_name=storage_directory_name,
),
},
}
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import json
from collections.abc import Mapping
from typing import Any, cast
from unittest.mock import Mock

import pytest
import respx
Expand Down Expand Up @@ -408,7 +409,7 @@ def expected_dynamic_sidecar_spec(
}


def test_get_dynamic_proxy_spec(
async def test_get_dynamic_proxy_spec(
mocked_catalog_service_api: respx.MockRouter,
minimal_app: FastAPI,
scheduler_data: SchedulerData,
Expand Down Expand Up @@ -436,7 +437,7 @@ def test_get_dynamic_proxy_spec(
for count in range(1, 11): # loop to check it does not repeat copies
print(f"{count:*^50}")

dynamic_sidecar_spec: AioDockerServiceSpec = get_dynamic_sidecar_spec(
dynamic_sidecar_spec: AioDockerServiceSpec = await get_dynamic_sidecar_spec(
scheduler_data=scheduler_data,
dynamic_sidecar_settings=dynamic_sidecar_settings,
dynamic_services_scheduler_settings=dynamic_services_scheduler_settings,
Expand All @@ -448,6 +449,7 @@ def test_get_dynamic_proxy_spec(
allow_internet_access=False,
metrics_collection_allowed=True,
telemetry_enabled=True,
rpc_client=Mock(),
)

exclude_keys: Mapping[int | str, Any] = {
Expand Down Expand Up @@ -530,7 +532,7 @@ async def test_merge_dynamic_sidecar_specs_with_user_specific_specs(
hardware_info: HardwareInfo,
fake_service_specifications: dict[str, Any],
):
dynamic_sidecar_spec: AioDockerServiceSpec = get_dynamic_sidecar_spec(
dynamic_sidecar_spec: AioDockerServiceSpec = await get_dynamic_sidecar_spec(
scheduler_data=scheduler_data,
dynamic_sidecar_settings=dynamic_sidecar_settings,
dynamic_services_scheduler_settings=dynamic_services_scheduler_settings,
Expand All @@ -542,6 +544,7 @@ async def test_merge_dynamic_sidecar_specs_with_user_specific_specs(
allow_internet_access=False,
metrics_collection_allowed=True,
telemetry_enabled=True,
rpc_client=Mock(),
)
assert dynamic_sidecar_spec
dynamic_sidecar_spec_dict = dynamic_sidecar_spec.dict()
Expand Down
5 changes: 5 additions & 0 deletions services/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -300,6 +300,10 @@ services:
R_CLONE_OPTION_TRANSFERS: ${R_CLONE_OPTION_TRANSFERS}
R_CLONE_PROVIDER: ${R_CLONE_PROVIDER}

EFS_DNS_NAME: ${EFS_DNS_NAME}
EFS_MOUNTED_PATH: ${EFS_MOUNTED_PATH}
EFS_ENABLED_FOR_USERS: ${EFS_ENABLED_FOR_USERS}
matusdrobuliak66 marked this conversation as resolved.
Show resolved Hide resolved

RABBIT_HOST: ${RABBIT_HOST}
RABBIT_PASSWORD: ${RABBIT_PASSWORD}
RABBIT_PORT: ${RABBIT_PORT}
Expand Down Expand Up @@ -358,6 +362,7 @@ services:
RABBIT_USER: ${RABBIT_USER}
EFS_DNS_NAME: ${EFS_DNS_NAME}
EFS_MOUNTED_PATH: ${EFS_MOUNTED_PATH}
EFS_ENABLED_FOR_USERS: ${EFS_ENABLED_FOR_USERS}

invitations:
image: ${DOCKER_REGISTRY:-itisfoundation}/invitations:${DOCKER_IMAGE_TAG:-latest}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,12 @@

@router.expose(reraise_if_error_type=())
async def create_project_specific_data_dir(
app: FastAPI,
*,
project_id: ProjectID,
node_id: NodeID,
app: FastAPI, *, project_id: ProjectID, node_id: NodeID, storage_directory_name: str
) -> Path:
_efs_manager = get_efs_manager(app)

return await _efs_manager.create_project_specific_data_dir(
project_id=project_id,
node_id=node_id,
storage_directory_name=storage_directory_name,
)
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
from functools import cached_property
from pathlib import Path
from typing import Final, cast

from fastapi import FastAPI
Expand All @@ -11,6 +10,7 @@
)
from pydantic import Field, PositiveInt, validator
from settings_library.base import BaseCustomSettings
from settings_library.efs import AwsEfsSettings
from settings_library.rabbit import RabbitSettings
from settings_library.utils_logging import MixinLoggingSettings

Expand All @@ -19,18 +19,6 @@
EFS_GUARDIAN_ENV_PREFIX: Final[str] = "EFS_GUARDIAN_"


class AwsEfsSettings(BaseCustomSettings):
EFS_DNS_NAME: str = Field(
description="AWS Elastic File System DNS name",
example="fs-xxx.efs.us-east-1.amazonaws.com",
)
EFS_PROJECT_SPECIFIC_DATA_DIRECTORY: str = Field(default="project-specific-data")
EFS_MOUNTED_PATH: Path = Field(
default=Path("/data/efs"),
description="This is the path where EFS is mounted to the EC2 machine",
)


class ApplicationSettings(BaseCustomSettings, MixinLoggingSettings):
# CODE STATICS ---------------------------------------------------------
API_VERSION: str = API_VERSION
Expand Down
Loading
Loading