From d834eff82827c731963e2ff64cf4045f77da002b Mon Sep 17 00:00:00 2001 From: Chris Burr Date: Tue, 3 Oct 2023 19:54:06 +0200 Subject: [PATCH] Get SE Name from SandboxStoreSettings --- src/diracx/db/sql/sandbox_metadata/db.py | 22 +++++++++------------ src/diracx/routers/job_manager/sandboxes.py | 21 ++++++++++++++------ tests/db/test_sandbox_metadata.py | 10 +++++----- 3 files changed, 29 insertions(+), 24 deletions(-) diff --git a/src/diracx/db/sql/sandbox_metadata/db.py b/src/diracx/db/sql/sandbox_metadata/db.py index a95dd93f2..8a5a42526 100644 --- a/src/diracx/db/sql/sandbox_metadata/db.py +++ b/src/diracx/db/sql/sandbox_metadata/db.py @@ -8,12 +8,6 @@ from .schema import Base as SandboxMetadataDBBase from .schema import sb_Owners, sb_SandBoxes -# In legacy DIRAC the SEName column was used to support multiple different -# storage backends. This is no longer the case, so we hardcode the value to -# S3 to represent the new DiracX system. -SE_NAME = "ProductionSandboxSE" -PFN_PREFIX = "/S3/" - class SandboxMetadataDB(BaseSQLDB): metadata = SandboxMetadataDBBase.metadata @@ -50,13 +44,15 @@ def get_pfn(bucket_name: str, user: UserInfo, sandbox_info: SandboxInfo) -> str: ] return "/" + "/".join(parts) - async def insert_sandbox(self, user: UserInfo, pfn: str, size: int) -> None: + async def insert_sandbox( + self, se_name: str, user: UserInfo, pfn: str, size: int + ) -> None: """Add a new sandbox in SandboxMetadataDB""" # TODO: Follow https://github.com/DIRACGrid/diracx/issues/49 owner_id = await self.upsert_owner(user) stmt = sqlalchemy.insert(sb_SandBoxes).values( OwnerId=owner_id, - SEName=SE_NAME, + SEName=se_name, SEPFN=pfn, Bytes=size, RegistrationTime=utcnow(), @@ -65,23 +61,23 @@ async def insert_sandbox(self, user: UserInfo, pfn: str, size: int) -> None: try: result = await self.conn.execute(stmt) except sqlalchemy.exc.IntegrityError: - await self.update_sandbox_last_access_time(pfn) + await self.update_sandbox_last_access_time(se_name, pfn) else: assert result.rowcount == 1 - async def update_sandbox_last_access_time(self, pfn: str) -> None: + async def update_sandbox_last_access_time(self, se_name: str, pfn: str) -> None: stmt = ( sqlalchemy.update(sb_SandBoxes) - .where(sb_SandBoxes.SEName == SE_NAME, sb_SandBoxes.SEPFN == pfn) + .where(sb_SandBoxes.SEName == se_name, sb_SandBoxes.SEPFN == pfn) .values(LastAccessTime=utcnow()) ) result = await self.conn.execute(stmt) assert result.rowcount == 1 - async def sandbox_is_assigned(self, pfn: str) -> bool: + async def sandbox_is_assigned(self, se_name: str, pfn: str) -> bool: """Checks if a sandbox exists and has been assigned.""" stmt: sqlalchemy.Executable = sqlalchemy.select(sb_SandBoxes.Assigned).where( - sb_SandBoxes.SEName == SE_NAME, sb_SandBoxes.SEPFN == pfn + sb_SandBoxes.SEName == se_name, sb_SandBoxes.SEPFN == pfn ) result = await self.conn.execute(stmt) is_assigned = result.scalar_one() diff --git a/src/diracx/routers/job_manager/sandboxes.py b/src/diracx/routers/job_manager/sandboxes.py index e832c50f3..5150ffffa 100644 --- a/src/diracx/routers/job_manager/sandboxes.py +++ b/src/diracx/routers/job_manager/sandboxes.py @@ -41,6 +41,7 @@ class SandboxStoreSettings(ServiceSettingsBase, env_prefix="DIRACX_SANDBOX_STORE s3_client_kwargs: dict[str, str] auto_create_bucket: bool = False url_validity_seconds: int = 5 * 60 + se_name: str = "SandboxSE" _client: S3Client = PrivateAttr(None) @contextlib.asynccontextmanager @@ -97,9 +98,12 @@ async def initiate_sandbox_upload( ) pfn = sandbox_metadata_db.get_pfn(settings.bucket_name, user_info, sandbox_info) + full_pfn = f"SB:{settings.se_name}|{pfn}" try: - exists_and_assigned = await sandbox_metadata_db.sandbox_is_assigned(pfn) + exists_and_assigned = await sandbox_metadata_db.sandbox_is_assigned( + settings.se_name, pfn + ) except NoResultFound: # The sandbox doesn't exist in the database pass @@ -112,8 +116,10 @@ async def initiate_sandbox_upload( if exists_and_assigned or s3_object_exists( settings.s3_client, settings.bucket_name, pfn_to_key(pfn) ): - await sandbox_metadata_db.update_sandbox_last_access_time(pfn) - return SandboxUploadResponse(pfn=pfn) + await sandbox_metadata_db.update_sandbox_last_access_time( + settings.se_name, pfn + ) + return SandboxUploadResponse(pfn=full_pfn) upload_info = await generate_presigned_upload( settings.s3_client, @@ -124,9 +130,11 @@ async def initiate_sandbox_upload( sandbox_info.size, settings.url_validity_seconds, ) - await sandbox_metadata_db.insert_sandbox(user_info, pfn, sandbox_info.size) + await sandbox_metadata_db.insert_sandbox( + settings.se_name, user_info, pfn, sandbox_info.size + ) - return SandboxUploadResponse(**upload_info, pfn=pfn) + return SandboxUploadResponse(**upload_info, pfn=full_pfn) class SandboxDownloadResponse(BaseModel): @@ -144,7 +152,7 @@ def pfn_to_key(pfn: str) -> str: SANDBOX_PFN_REGEX = ( # Starts with /S3/ - r"^/S3/[a-z0-9\.\-]{3,63}" + r"^(:?SB:[A-Za-z]+\|)?/S3/[a-z0-9\.\-]{3,63}" # Followed ////:. r"(?:/[^/]+){3}/[a-z0-9]{3,10}:[0-9a-f]{64}\.[a-z0-9\.]+$" ) @@ -164,6 +172,7 @@ async def get_sandbox_file( most storage backends return an error when they receive an authorization header for a presigned URL. """ + pfn = pfn.split("|", 1)[-1] required_prefix = ( "/" + "/".join( diff --git a/tests/db/test_sandbox_metadata.py b/tests/db/test_sandbox_metadata.py index 6757af13b..9d19f73cd 100644 --- a/tests/db/test_sandbox_metadata.py +++ b/tests/db/test_sandbox_metadata.py @@ -46,18 +46,18 @@ async def test_insert_sandbox(sandbox_metadata_db: SandboxMetadataDB): assert pfn1 not in db_contents async with sandbox_metadata_db: with pytest.raises(sqlalchemy.exc.NoResultFound): - await sandbox_metadata_db.sandbox_is_assigned(pfn1) + await sandbox_metadata_db.sandbox_is_assigned("SandboxSE", pfn1) # Insert the sandbox async with sandbox_metadata_db: - await sandbox_metadata_db.insert_sandbox(user_info, pfn1, 100) + await sandbox_metadata_db.insert_sandbox("SandboxSE", user_info, pfn1, 100) db_contents = await _dump_db(sandbox_metadata_db) owner_id1, last_access_time1 = db_contents[pfn1] # Inserting again should update the last access time await asyncio.sleep(1) # The timestamp only has second precision async with sandbox_metadata_db: - await sandbox_metadata_db.insert_sandbox(user_info, pfn1, 100) + await sandbox_metadata_db.insert_sandbox("SandboxSE", user_info, pfn1, 100) db_contents = await _dump_db(sandbox_metadata_db) owner_id2, last_access_time2 = db_contents[pfn1] assert owner_id1 == owner_id2 @@ -65,14 +65,14 @@ async def test_insert_sandbox(sandbox_metadata_db: SandboxMetadataDB): # The sandbox still hasn't been assigned async with sandbox_metadata_db: - assert not await sandbox_metadata_db.sandbox_is_assigned(pfn1) + assert not await sandbox_metadata_db.sandbox_is_assigned("SandboxSE", pfn1) # Inserting again should update the last access time await asyncio.sleep(1) # The timestamp only has second precision last_access_time3 = (await _dump_db(sandbox_metadata_db))[pfn1][1] assert last_access_time2 == last_access_time3 async with sandbox_metadata_db: - await sandbox_metadata_db.update_sandbox_last_access_time(pfn1) + await sandbox_metadata_db.update_sandbox_last_access_time("SandboxSE", pfn1) last_access_time4 = (await _dump_db(sandbox_metadata_db))[pfn1][1] assert last_access_time2 < last_access_time4