Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

✨Comp-backend override resources with selected hardware #4954

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
ae870c9
ruffing
sanderegg Oct 30, 2023
0c53bc5
fix creating project with predefined project does not set resources
sanderegg Oct 30, 2023
e669a46
refactor
sanderegg Oct 30, 2023
bd0ddce
refactor
sanderegg Oct 30, 2023
8ebd1f5
initial override of service resources
sanderegg Oct 30, 2023
45565cf
refactor
sanderegg Oct 30, 2023
52cc87e
Revert "refactor"
sanderegg Oct 30, 2023
a9a96cf
new dependency
sanderegg Oct 30, 2023
74d7c93
get ec2 types
sanderegg Oct 30, 2023
1003be2
renamed exception
sanderegg Oct 30, 2023
6d9daf1
create an interface
sanderegg Oct 30, 2023
cefa882
create interface in service lib instead
sanderegg Oct 30, 2023
9546867
no need
sanderegg Oct 30, 2023
f1e6d11
refactor
sanderegg Oct 30, 2023
fa3a3b2
fix import
sanderegg Oct 30, 2023
3c7bbf1
refactor
sanderegg Oct 30, 2023
60d35d5
cleanup
sanderegg Oct 30, 2023
273bd1f
linters
sanderegg Oct 30, 2023
f6b4432
test function in servicelib
sanderegg Oct 30, 2023
76c87fd
code climate?
sanderegg Oct 30, 2023
64fa4c6
fix imports
sanderegg Oct 30, 2023
c67efda
move interface to service lib
sanderegg Oct 30, 2023
0a61529
fixed imports
sanderegg Oct 30, 2023
0aedb11
add a margin of 1GiB for now
sanderegg Oct 30, 2023
35d9405
fix import
sanderegg Oct 30, 2023
448fd8e
@pcrespov review: timeout constant
sanderegg Oct 31, 2023
77d1a7e
@pcrespov review: document
sanderegg Oct 31, 2023
01787a6
added doc + 1 test
sanderegg Oct 31, 2023
0b59162
cleanup
sanderegg Oct 31, 2023
6b7ca5b
cleanup
sanderegg Oct 31, 2023
6cc783f
Squashed commit of the following:
sanderegg Oct 31, 2023
6ecd4dc
@GitHK review: use suppress
sanderegg Oct 31, 2023
ea52979
codeclimate
sanderegg Oct 31, 2023
e452704
move back to original path
sanderegg Oct 31, 2023
5438213
CPU is nicer as a float
sanderegg Oct 31, 2023
bb5b808
code climate
sanderegg Oct 31, 2023
b7f18ae
linter
sanderegg Oct 31, 2023
f3beec4
mypy
sanderegg Oct 31, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,9 @@ def from_entries(cls, entries: dict[str, str]) -> "RPCNamespace":
"""
composed_string = "-".join(f"{k}_{v}" for k, v in sorted(entries.items()))
return parse_obj_as(cls, composed_string)


class RPCMethodName(ConstrainedStr):
min_length: int = 1
max_length: int = 252
regex: re.Pattern[str] | None = re.compile(REGEX_RABBIT_QUEUE_ALLOWED_SYMBOLS)
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,9 @@ def ensure_limits_are_equal_or_above_reservations(cls, values):
def set_reservation_same_as_limit(self) -> None:
self.reservation = self.limit

def set_value(self, value: StrictInt | StrictFloat | str) -> None:
self.limit = self.reservation = value

class Config:
validate_assignment = True

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
RPCNotInitializedError,
RPCServerError,
)
from ._models import RPCMethodName
from ._rpc_router import RPCRouter
from ._utils import wait_till_rabbitmq_responsive

Expand All @@ -17,7 +16,6 @@
"RabbitMQClient",
"RabbitMQRPCClient",
"RemoteMethodNotRegisteredError",
"RPCMethodName",
"RPCNamespace",
"RPCNotInitializedError",
"RPCRouter",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,14 +6,14 @@
from typing import Any

import aio_pika
from models_library.rabbitmq_basic_types import RPCNamespace
from models_library.rabbitmq_basic_types import RPCMethodName, RPCNamespace
from pydantic import PositiveInt
from settings_library.rabbit import RabbitSettings

from ..logging_utils import log_context
from ._client_base import RabbitMQClientBase
from ._errors import RemoteMethodNotRegisteredError, RPCNotInitializedError
from ._models import RPCMethodName, RPCNamespacedMethodName
from ._models import RPCNamespacedMethodName
from ._rpc_router import RPCRouter
from ._utils import get_rabbitmq_client_unique_name

Expand Down
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from typing import Final

BIND_TO_ALL_TOPICS: Final[str] = "#"
RPC_REMOTE_METHOD_TIMEOUT_S: Final[int] = 30
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

from models_library.rabbitmq_basic_types import (
REGEX_RABBIT_QUEUE_ALLOWED_SYMBOLS,
RPCMethodName,
RPCNamespace,
)
from pydantic import ConstrainedStr, parse_obj_as
Expand All @@ -19,12 +20,6 @@ def routing_key(self) -> str | None:
...


class RPCMethodName(ConstrainedStr):
min_length: int = 1
max_length: int = 252
regex: re.Pattern[str] | None = re.compile(REGEX_RABBIT_QUEUE_ALLOWED_SYMBOLS)


class RPCNamespacedMethodName(ConstrainedStr):
min_length: int = 1
max_length: int = 255
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,11 @@
from dataclasses import dataclass, field
from typing import Any, TypeVar

from models_library.rabbitmq_basic_types import RPCMethodName
from pydantic import SecretStr

from ..logging_utils import log_context
from ._errors import RPCServerError
from ._models import RPCMethodName

DecoratedCallable = TypeVar("DecoratedCallable", bound=Callable[..., Any])

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
from models_library.api_schemas_clusters_keeper import CLUSTERS_KEEPER_RPC_NAMESPACE
from models_library.api_schemas_clusters_keeper.clusters import OnDemandCluster
from models_library.rabbitmq_basic_types import RPCMethodName
from models_library.users import UserID
from models_library.wallets import WalletID

from ..._client_rpc import RabbitMQRPCClient
from ..._constants import RPC_REMOTE_METHOD_TIMEOUT_S


async def get_or_create_cluster(
client: RabbitMQRPCClient, *, user_id: UserID, wallet_id: WalletID | None
) -> OnDemandCluster:
"""**Remote method**

Raises:
RPCServerError -- if anything happens remotely
"""
on_demand_cluster: OnDemandCluster = await client.request(
CLUSTERS_KEEPER_RPC_NAMESPACE,
RPCMethodName("get_or_create_cluster"),
timeout_s=RPC_REMOTE_METHOD_TIMEOUT_S,
user_id=user_id,
wallet_id=wallet_id,
)
return on_demand_cluster
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
from models_library.api_schemas_clusters_keeper import CLUSTERS_KEEPER_RPC_NAMESPACE
from models_library.api_schemas_clusters_keeper.ec2_instances import EC2InstanceType
from models_library.rabbitmq_basic_types import RPCMethodName

from ..._client_rpc import RabbitMQRPCClient
from ..._constants import RPC_REMOTE_METHOD_TIMEOUT_S


async def get_instance_type_details(
client: RabbitMQRPCClient, *, instance_type_names: set[str]
) -> list[EC2InstanceType]:
"""**Remote method**

Raises:
RPCServerError -- if anything happens remotely

"""
instance_types: list[EC2InstanceType] = await client.request(
sanderegg marked this conversation as resolved.
Show resolved Hide resolved
CLUSTERS_KEEPER_RPC_NAMESPACE,
RPCMethodName("get_instance_type_details"),
timeout_s=RPC_REMOTE_METHOD_TIMEOUT_S,
instance_type_names=instance_type_names,
)
return instance_types
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,11 @@
from typing import Any, Final

import pytest
from models_library.rabbitmq_basic_types import RPCMethodName
from pydantic import NonNegativeInt, ValidationError
from servicelib.rabbitmq import (
RabbitMQRPCClient,
RemoteMethodNotRegisteredError,
RPCMethodName,
RPCNamespace,
RPCNotInitializedError,
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,8 @@

import pytest
from faker import Faker
from servicelib.rabbitmq import (
RabbitMQRPCClient,
RPCMethodName,
RPCNamespace,
RPCRouter,
)
from models_library.rabbitmq_basic_types import RPCMethodName
from servicelib.rabbitmq import RabbitMQRPCClient, RPCNamespace, RPCRouter

pytest_simcore_core_services_selection = [
"rabbit",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,16 @@ async def get_ec2_instance_capabilities(
self,
instance_type_names: set[InstanceTypeType],
) -> list[EC2InstanceType]:
"""instance_type_names must be a set of unique values"""
"""returns the ec2 instance types from a list of instance type names
NOTE: the order might differ!
Arguments:
instance_type_names -- the types to filter with

Raises:
Ec2InstanceTypeInvalidError: some invalid types were used as filter
ClustersKeeperRuntimeError: unexpected error communicating with EC2

"""
try:
instance_types = await self.client.describe_instance_types(
InstanceTypes=list(instance_type_names)
Expand Down
17 changes: 9 additions & 8 deletions services/clusters-keeper/tests/unit/test_rpc_clusters.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,14 @@
import pytest
from faker import Faker
from fastapi import FastAPI
from models_library.api_schemas_clusters_keeper import CLUSTERS_KEEPER_RPC_NAMESPACE
from models_library.api_schemas_clusters_keeper.clusters import OnDemandCluster
from models_library.users import UserID
from models_library.wallets import WalletID
from pytest_mock.plugin import MockerFixture
from servicelib.rabbitmq import RabbitMQRPCClient, RPCMethodName
from servicelib.rabbitmq import RabbitMQRPCClient
from servicelib.rabbitmq.rpc_interfaces.clusters_keeper.clusters import (
get_or_create_cluster,
)
from simcore_service_clusters_keeper.utils.ec2 import HEARTBEAT_TAG_KEY
from types_aiobotocore_ec2 import EC2Client

Expand Down Expand Up @@ -105,9 +107,9 @@ async def test_get_or_create_cluster(
mocked_dask_ping_scheduler: MockedDaskModule,
):
# send rabbitmq rpc to create_cluster
rpc_response = await clusters_keeper_rabbitmq_rpc_client.request(
CLUSTERS_KEEPER_RPC_NAMESPACE,
RPCMethodName("get_or_create_cluster"),

rpc_response = await get_or_create_cluster(
clusters_keeper_rabbitmq_rpc_client,
user_id=user_id,
wallet_id=wallet_id if use_wallet_id else None,
)
Expand All @@ -121,9 +123,8 @@ async def test_get_or_create_cluster(
mocked_dask_ping_scheduler.ping_scheduler.reset_mock()

# calling it again returns the existing cluster
rpc_response = await clusters_keeper_rabbitmq_rpc_client.request(
CLUSTERS_KEEPER_RPC_NAMESPACE,
RPCMethodName("get_or_create_cluster"),
rpc_response = await get_or_create_cluster(
clusters_keeper_rabbitmq_rpc_client,
user_id=user_id,
wallet_id=wallet_id if use_wallet_id else None,
)
Expand Down
32 changes: 21 additions & 11 deletions services/clusters-keeper/tests/unit/test_rpc_ec2_instances.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,11 @@

import pytest
from fastapi import FastAPI
from models_library.api_schemas_clusters_keeper import CLUSTERS_KEEPER_RPC_NAMESPACE
from models_library.api_schemas_clusters_keeper.ec2_instances import EC2InstanceType
from servicelib.rabbitmq import RabbitMQRPCClient, RPCMethodName
from servicelib.rabbitmq import RabbitMQRPCClient, RPCServerError
from servicelib.rabbitmq.rpc_interfaces.clusters_keeper.ec2_instances import (
get_instance_type_details,
)

pytest_simcore_core_services_selection = [
"rabbit",
Expand Down Expand Up @@ -34,10 +36,9 @@ async def test_get_instance_type_details_all_options(
clusters_keeper_rabbitmq_rpc_client: RabbitMQRPCClient,
):
# an empty set returns all options
rpc_response = await clusters_keeper_rabbitmq_rpc_client.request(
CLUSTERS_KEEPER_RPC_NAMESPACE,
RPCMethodName("get_instance_type_details"),
instance_type_names=set(),

rpc_response = await get_instance_type_details(
clusters_keeper_rabbitmq_rpc_client, instance_type_names=[]
)
assert rpc_response
assert isinstance(rpc_response, list)
Expand All @@ -48,14 +49,23 @@ async def test_get_instance_type_details_specific_type_names(
_base_configuration: None,
clusters_keeper_rabbitmq_rpc_client: RabbitMQRPCClient,
):
# an empty set returns all options
rpc_response = await clusters_keeper_rabbitmq_rpc_client.request(
CLUSTERS_KEEPER_RPC_NAMESPACE,
RPCMethodName("get_instance_type_details"),
rpc_response = await get_instance_type_details(
clusters_keeper_rabbitmq_rpc_client,
instance_type_names={"t2.micro", "g4dn.xlarge"},
)
assert rpc_response
assert isinstance(rpc_response, list)
assert len(rpc_response) == 2
assert rpc_response[0].name == "g4dn.xlarge"
assert rpc_response[1].name == "t2.micro"
assert rpc_response[0].name == "g4dn.xlarge"


async def test_get_instance_type_details_with_invalid_type_names(
_base_configuration: None,
clusters_keeper_rabbitmq_rpc_client: RabbitMQRPCClient,
):
with pytest.raises(RPCServerError):
await get_instance_type_details(
clusters_keeper_rabbitmq_rpc_client,
instance_type_names={"t2.micro", "g4dn.xlarge", "invalid.name"},
)
Original file line number Diff line number Diff line change
@@ -1,7 +1,13 @@
from fastapi import Request
from servicelib.rabbitmq import RabbitMQClient
from servicelib.rabbitmq import RabbitMQClient, RabbitMQRPCClient

from ...modules.rabbitmq import get_rabbitmq_rpc_client


def get_rabbitmq_client(request: Request) -> RabbitMQClient:
assert type(request.app.state.rabbitmq_client) == RabbitMQClient # nosec
return request.app.state.rabbitmq_client


def rabbitmq_rpc_client(request: Request) -> RabbitMQRPCClient:
return get_rabbitmq_rpc_client(request.app)
Loading
Loading