Skip to content

Commit

Permalink
Get SE Name from SandboxStoreSettings
Browse files Browse the repository at this point in the history
  • Loading branch information
chrisburr committed Oct 3, 2023
1 parent 73fb8b0 commit d834eff
Show file tree
Hide file tree
Showing 3 changed files with 29 additions and 24 deletions.
22 changes: 9 additions & 13 deletions src/diracx/db/sql/sandbox_metadata/db.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,6 @@
from .schema import Base as SandboxMetadataDBBase
from .schema import sb_Owners, sb_SandBoxes

# In legacy DIRAC the SEName column was used to support multiple different
# storage backends. This is no longer the case, so we hardcode the value to
# S3 to represent the new DiracX system.
SE_NAME = "ProductionSandboxSE"
PFN_PREFIX = "/S3/"


class SandboxMetadataDB(BaseSQLDB):
metadata = SandboxMetadataDBBase.metadata
Expand Down Expand Up @@ -50,13 +44,15 @@ def get_pfn(bucket_name: str, user: UserInfo, sandbox_info: SandboxInfo) -> str:
]
return "/" + "/".join(parts)

async def insert_sandbox(self, user: UserInfo, pfn: str, size: int) -> None:
async def insert_sandbox(
self, se_name: str, user: UserInfo, pfn: str, size: int
) -> None:
"""Add a new sandbox in SandboxMetadataDB"""
# TODO: Follow https://github.com/DIRACGrid/diracx/issues/49
owner_id = await self.upsert_owner(user)
stmt = sqlalchemy.insert(sb_SandBoxes).values(
OwnerId=owner_id,
SEName=SE_NAME,
SEName=se_name,
SEPFN=pfn,
Bytes=size,
RegistrationTime=utcnow(),
Expand All @@ -65,23 +61,23 @@ async def insert_sandbox(self, user: UserInfo, pfn: str, size: int) -> None:
try:
result = await self.conn.execute(stmt)
except sqlalchemy.exc.IntegrityError:
await self.update_sandbox_last_access_time(pfn)
await self.update_sandbox_last_access_time(se_name, pfn)
else:
assert result.rowcount == 1

async def update_sandbox_last_access_time(self, pfn: str) -> None:
async def update_sandbox_last_access_time(self, se_name: str, pfn: str) -> None:
stmt = (
sqlalchemy.update(sb_SandBoxes)
.where(sb_SandBoxes.SEName == SE_NAME, sb_SandBoxes.SEPFN == pfn)
.where(sb_SandBoxes.SEName == se_name, sb_SandBoxes.SEPFN == pfn)
.values(LastAccessTime=utcnow())
)
result = await self.conn.execute(stmt)
assert result.rowcount == 1

async def sandbox_is_assigned(self, pfn: str) -> bool:
async def sandbox_is_assigned(self, se_name: str, pfn: str) -> bool:
"""Checks if a sandbox exists and has been assigned."""
stmt: sqlalchemy.Executable = sqlalchemy.select(sb_SandBoxes.Assigned).where(
sb_SandBoxes.SEName == SE_NAME, sb_SandBoxes.SEPFN == pfn
sb_SandBoxes.SEName == se_name, sb_SandBoxes.SEPFN == pfn
)
result = await self.conn.execute(stmt)
is_assigned = result.scalar_one()
Expand Down
21 changes: 15 additions & 6 deletions src/diracx/routers/job_manager/sandboxes.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ class SandboxStoreSettings(ServiceSettingsBase, env_prefix="DIRACX_SANDBOX_STORE
s3_client_kwargs: dict[str, str]
auto_create_bucket: bool = False
url_validity_seconds: int = 5 * 60
se_name: str = "SandboxSE"
_client: S3Client = PrivateAttr(None)

@contextlib.asynccontextmanager
Expand Down Expand Up @@ -97,9 +98,12 @@ async def initiate_sandbox_upload(
)

pfn = sandbox_metadata_db.get_pfn(settings.bucket_name, user_info, sandbox_info)
full_pfn = f"SB:{settings.se_name}|{pfn}"

try:
exists_and_assigned = await sandbox_metadata_db.sandbox_is_assigned(pfn)
exists_and_assigned = await sandbox_metadata_db.sandbox_is_assigned(
settings.se_name, pfn
)
except NoResultFound:
# The sandbox doesn't exist in the database
pass
Expand All @@ -112,8 +116,10 @@ async def initiate_sandbox_upload(
if exists_and_assigned or s3_object_exists(
settings.s3_client, settings.bucket_name, pfn_to_key(pfn)
):
await sandbox_metadata_db.update_sandbox_last_access_time(pfn)
return SandboxUploadResponse(pfn=pfn)
await sandbox_metadata_db.update_sandbox_last_access_time(
settings.se_name, pfn
)
return SandboxUploadResponse(pfn=full_pfn)

upload_info = await generate_presigned_upload(
settings.s3_client,
Expand All @@ -124,9 +130,11 @@ async def initiate_sandbox_upload(
sandbox_info.size,
settings.url_validity_seconds,
)
await sandbox_metadata_db.insert_sandbox(user_info, pfn, sandbox_info.size)
await sandbox_metadata_db.insert_sandbox(
settings.se_name, user_info, pfn, sandbox_info.size
)

return SandboxUploadResponse(**upload_info, pfn=pfn)
return SandboxUploadResponse(**upload_info, pfn=full_pfn)


class SandboxDownloadResponse(BaseModel):
Expand All @@ -144,7 +152,7 @@ def pfn_to_key(pfn: str) -> str:

SANDBOX_PFN_REGEX = (
# Starts with /S3/<bucket_name>
r"^/S3/[a-z0-9\.\-]{3,63}"
r"^(:?SB:[A-Za-z]+\|)?/S3/[a-z0-9\.\-]{3,63}"
# Followed /<vo>/<group>/<username>/<checksum_algorithm>:<checksum>.<format>
r"(?:/[^/]+){3}/[a-z0-9]{3,10}:[0-9a-f]{64}\.[a-z0-9\.]+$"
)
Expand All @@ -164,6 +172,7 @@ async def get_sandbox_file(
most storage backends return an error when they receive an authorization
header for a presigned URL.
"""
pfn = pfn.split("|", 1)[-1]
required_prefix = (
"/"
+ "/".join(
Expand Down
10 changes: 5 additions & 5 deletions tests/db/test_sandbox_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,33 +46,33 @@ async def test_insert_sandbox(sandbox_metadata_db: SandboxMetadataDB):
assert pfn1 not in db_contents
async with sandbox_metadata_db:
with pytest.raises(sqlalchemy.exc.NoResultFound):
await sandbox_metadata_db.sandbox_is_assigned(pfn1)
await sandbox_metadata_db.sandbox_is_assigned("SandboxSE", pfn1)

# Insert the sandbox
async with sandbox_metadata_db:
await sandbox_metadata_db.insert_sandbox(user_info, pfn1, 100)
await sandbox_metadata_db.insert_sandbox("SandboxSE", user_info, pfn1, 100)
db_contents = await _dump_db(sandbox_metadata_db)
owner_id1, last_access_time1 = db_contents[pfn1]

# Inserting again should update the last access time
await asyncio.sleep(1) # The timestamp only has second precision
async with sandbox_metadata_db:
await sandbox_metadata_db.insert_sandbox(user_info, pfn1, 100)
await sandbox_metadata_db.insert_sandbox("SandboxSE", user_info, pfn1, 100)
db_contents = await _dump_db(sandbox_metadata_db)
owner_id2, last_access_time2 = db_contents[pfn1]
assert owner_id1 == owner_id2
assert last_access_time2 > last_access_time1

# The sandbox still hasn't been assigned
async with sandbox_metadata_db:
assert not await sandbox_metadata_db.sandbox_is_assigned(pfn1)
assert not await sandbox_metadata_db.sandbox_is_assigned("SandboxSE", pfn1)

# Inserting again should update the last access time
await asyncio.sleep(1) # The timestamp only has second precision
last_access_time3 = (await _dump_db(sandbox_metadata_db))[pfn1][1]
assert last_access_time2 == last_access_time3
async with sandbox_metadata_db:
await sandbox_metadata_db.update_sandbox_last_access_time(pfn1)
await sandbox_metadata_db.update_sandbox_last_access_time("SandboxSE", pfn1)
last_access_time4 = (await _dump_db(sandbox_metadata_db))[pfn1][1]
assert last_access_time2 < last_access_time4

Expand Down

0 comments on commit d834eff

Please sign in to comment.