From 8e4ba9edae266dd7d84efc1b3577b968c7b05454 Mon Sep 17 00:00:00 2001 From: Pedro Crespo-Valero <32402063+pcrespov@users.noreply.github.com> Date: Tue, 6 Feb 2024 13:53:47 +0100 Subject: [PATCH] =?UTF-8?q?=F0=9F=8E=A8=20Fixes=20flaky=20`test=5Fstudies?= =?UTF-8?q?=5Fdispatcher=5Fstudies=5Faccess`=20(#5304)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../studies_dispatcher/_errors.py | 4 + .../studies_dispatcher/_studies_access.py | 10 +-- .../studies_dispatcher/_users.py | 84 +++++++++++++------ 3 files changed, 66 insertions(+), 32 deletions(-) diff --git a/services/web/server/src/simcore_service_webserver/studies_dispatcher/_errors.py b/services/web/server/src/simcore_service_webserver/studies_dispatcher/_errors.py index 3d5326a8606..eb04db2b924 100644 --- a/services/web/server/src/simcore_service_webserver/studies_dispatcher/_errors.py +++ b/services/web/server/src/simcore_service_webserver/studies_dispatcher/_errors.py @@ -26,3 +26,7 @@ class InvalidRedirectionParams(PydanticErrorMixin, StudyDispatcherError): "The link you provided is invalid because it doesn't contain any information related to data or a service." " Please check the link and make sure it is correct." ) + + +class GuestUsersLimitError(PydanticErrorMixin, StudyDispatcherError): + msg_template = "Maximum number of guests was reached. Please login with a registered user or try again later" diff --git a/services/web/server/src/simcore_service_webserver/studies_dispatcher/_studies_access.py b/services/web/server/src/simcore_service_webserver/studies_dispatcher/_studies_access.py index 6c3068dd173..10f7d1bc275 100644 --- a/services/web/server/src/simcore_service_webserver/studies_dispatcher/_studies_access.py +++ b/services/web/server/src/simcore_service_webserver/studies_dispatcher/_studies_access.py @@ -40,6 +40,7 @@ MSG_TOO_MANY_GUESTS, MSG_UNEXPECTED_ERROR, ) +from ._errors import GuestUsersLimitError from ._users import create_temporary_guest_user, get_authorized_user _logger = logging.getLogger(__name__) @@ -288,16 +289,15 @@ async def get_redirection_to_study_page(request: web.Request) -> web.Response: user = await create_temporary_guest_user(request) is_anonymous_user = True - except Exception as exc: - # NOTE: when lock times out it is because a user cannot - # be create in less that 3 seconds. That shows that the system - # is really busy and we rather stop creating GUEST users. For that + except GuestUsersLimitError as exc: + # + # NOTE: Creation of guest users is limited. For that # reason we respond with 429 and inform the user that temporarily # we cannot accept any more users. # error_code = create_error_code(exc) _logger.exception( - "Failed to create guest user. Responded with 429 Too Many Requests[%s]", + "Failed to create guest user. Responded with 429 Too Many Requests [%s]", f"{error_code}", extra={"error_code": error_code}, ) diff --git a/services/web/server/src/simcore_service_webserver/studies_dispatcher/_users.py b/services/web/server/src/simcore_service_webserver/studies_dispatcher/_users.py index 5a39e46963d..bae7bffb472 100644 --- a/services/web/server/src/simcore_service_webserver/studies_dispatcher/_users.py +++ b/services/web/server/src/simcore_service_webserver/studies_dispatcher/_users.py @@ -15,7 +15,10 @@ from aiohttp import web from models_library.emails import LowerCaseEmailStr from pydantic import BaseModel, parse_obj_as +from redis.exceptions import LockNotOwnedError +from servicelib.aiohttp.application_keys import APP_FIRE_AND_FORGET_TASKS_KEY from servicelib.logging_utils import log_decorator +from servicelib.utils import fire_and_forget_task from ..garbage_collector.settings import GUEST_USER_RC_LOCK_FORMAT from ..groups.api import auto_add_user_to_product_group @@ -32,6 +35,7 @@ from ..users.api import get_user from ..users.exceptions import UserNotFoundError from ._constants import MSG_GUESTS_NOT_ALLOWED +from ._errors import GuestUsersLimitError from .settings import StudiesDispatcherSettings, get_plugin_settings _logger = logging.getLogger(__name__) @@ -61,7 +65,7 @@ async def create_temporary_guest_user(request: web.Request): """Creates a guest user with a random name and Raises: - LockNotOwnedError: Cannot release a lock that's no longer owned (e.g. when redis_locks_client times out) + MaxGuestUsersError: No more guest users allowed """ db: AsyncpgStorage = get_plugin_storage(request.app) @@ -84,7 +88,7 @@ async def create_temporary_guest_user(request: web.Request): # - the timeout here is the TTL of the lock in Redis. in case the webserver is overwhelmed and cannot create # a user during that time or crashes, then redis will ensure the lock disappears and let the garbage collector do its work # - MAX_DELAY_TO_CREATE_USER = 4 # secs + MAX_DELAY_TO_CREATE_USER = 5 # secs # # 2. During initialization # - Prevents the GC from deleting this GUEST user, with ID assigned, while it gets initialized and acquires it's first resource @@ -101,31 +105,57 @@ async def create_temporary_guest_user(request: web.Request): # # (1) read details above - async with redis_locks_client.lock( - GUEST_USER_RC_LOCK_FORMAT.format(user_id=random_user_name), - timeout=MAX_DELAY_TO_CREATE_USER, - ): - # NOTE: usr Dict is incomplete, e.g. does not contain primary_gid - usr = await db.create_user( - { - "name": random_user_name, - "email": email, - "password_hash": encrypt_password(password), - "status": ACTIVE, - "role": GUEST, - "expires_at": expires_at, - } - ) - user: dict = await get_user(request.app, usr["id"]) - await auto_add_user_to_product_group( - request.app, user_id=user["id"], product_name=product_name - ) - - # (2) read details above - await redis_locks_client.lock( - GUEST_USER_RC_LOCK_FORMAT.format(user_id=user["id"]), - timeout=MAX_DELAY_TO_GUEST_FIRST_CONNECTION, - ).acquire() + usr = {} + try: + async with redis_locks_client.lock( + GUEST_USER_RC_LOCK_FORMAT.format(user_id=random_user_name), + timeout=MAX_DELAY_TO_CREATE_USER, + ): + # NOTE: usr Dict is incomplete, e.g. does not contain primary_gid + usr = await db.create_user( + { + "name": random_user_name, + "email": email, + "password_hash": encrypt_password(password), + "status": ACTIVE, + "role": GUEST, + "expires_at": expires_at, + } + ) + user: dict = await get_user(request.app, usr["id"]) + await auto_add_user_to_product_group( + request.app, user_id=user["id"], product_name=product_name + ) + + # (2) read details above + await redis_locks_client.lock( + GUEST_USER_RC_LOCK_FORMAT.format(user_id=user["id"]), + timeout=MAX_DELAY_TO_GUEST_FIRST_CONNECTION, + ).acquire() + + except LockNotOwnedError as err: + # NOTE: The policy on number of GUETS users allowed is bound to the + # load of the system. + # If the lock times-out it is because a user cannot + # be create in less that MAX_DELAY_TO_CREATE_USER seconds. + # That shows that the system is really loaded and we rather + # stop creating GUEST users. + + # NOTE: here we cleanup but if any trace is left it will be deleted by gc + if usr.get("id"): + + async def _cleanup(draft_user): + with suppress(Exception): + await db.delete_user(draft_user) + + fire_and_forget_task( + _cleanup(usr), + task_suffix_name="cleanup_temporary_guest_user", + fire_and_forget_tasks_collection=request.app[ + APP_FIRE_AND_FORGET_TASKS_KEY + ], + ) + raise GuestUsersLimitError from err return user