From 4c0082d643540843639ba8cf97cd42ecf98970f2 Mon Sep 17 00:00:00 2001 From: Nikita Karetnikov Date: Sat, 4 Nov 2023 21:54:05 +0000 Subject: [PATCH 01/12] Use shorter `build_key` Fixes #611. --- .../64a749e87619_add_build_key_version.py | 28 ++++++++++++ conda-store-server/conda_store_server/orm.py | 32 +++++++++++--- conda-store-server/tests/test_actions.py | 43 ++++++++++++++----- 3 files changed, 88 insertions(+), 15 deletions(-) create mode 100644 conda-store-server/conda_store_server/alembic/versions/64a749e87619_add_build_key_version.py diff --git a/conda-store-server/conda_store_server/alembic/versions/64a749e87619_add_build_key_version.py b/conda-store-server/conda_store_server/alembic/versions/64a749e87619_add_build_key_version.py new file mode 100644 index 000000000..113f6cba2 --- /dev/null +++ b/conda-store-server/conda_store_server/alembic/versions/64a749e87619_add_build_key_version.py @@ -0,0 +1,28 @@ +"""add build_key_version + +Revision ID: 64a749e87619 +Revises: b387747ca9b7 +Create Date: 2023-11-05 00:59:57.132192 + +""" +import sqlalchemy as sa +from alembic import op + +# revision identifiers, used by Alembic. +revision = "64a749e87619" +down_revision = "b387747ca9b7" +branch_labels = None +depends_on = None + + +def upgrade(): + op.add_column( + "build", + sa.Column( + "build_key_version", sa.Integer(), nullable=False, server_default="1" + ), + ) + + +def downgrade(): + op.drop_column("build", "build_key_version") diff --git a/conda-store-server/conda_store_server/orm.py b/conda-store-server/conda_store_server/orm.py index 557451c5c..aeb52038a 100644 --- a/conda-store-server/conda_store_server/orm.py +++ b/conda-store-server/conda_store_server/orm.py @@ -182,6 +182,14 @@ class Build(Base): started_on = Column(DateTime, default=None) ended_on = Column(DateTime, default=None) deleted_on = Column(DateTime, default=None) + build_key_version = Column(Integer, default=2, nullable=False) + + @validates("build_key_version") + def validate_build_key_version(self, key, build_key_version): + if build_key_version not in [1, 2]: + raise ValueError(f"invalid build_key_version={build_key_version}") + + return build_key_version build_artifacts = relationship( "BuildArtifact", back_populates="build", cascade="all, delete-orphan" @@ -234,15 +242,29 @@ def build_key(self): The build key should be a key that allows for the environment build to be easily identified and found in the database. """ - datetime_format = "%Y%m%d-%H%M%S-%f" - return f"{self.specification.sha256}-{self.scheduled_on.strftime(datetime_format)}-{self.id}-{self.specification.name}" + if self.build_key_version == 1: + datetime_format = "%Y%m%d-%H%M%S-%f" + return f"{self.specification.sha256}-{self.scheduled_on.strftime(datetime_format)}-{self.id}-{self.specification.name}" + elif self.build_key_version == 2: + hash = self.specification.sha256[:4] + timestamp = int(self.scheduled_on.timestamp()) + id = self.id + name = self.specification.name[:16] + return f"{hash}-{timestamp}-{id}-{name}" + else: + raise ValueError(f"invalid build key version: {self.build_key_version}") @staticmethod def parse_build_key(key): parts = key.split("-") - if len(parts) < 5: - return None - return int(parts[4]) # build_id + # Note: cannot rely on the number of dashes to differentiate between + # versions because name can contain dashes. Instead, this relies on the + # hash size to infer the format. The name is the last field, so indexing + # to find the id is okay. + if key[4] == "-": # v2 + return int(parts[2]) # build_id + else: # v1 + return int(parts[4]) # build_id @property def log_key(self): diff --git a/conda-store-server/tests/test_actions.py b/conda-store-server/tests/test_actions.py index 288808fcf..fbd47e452 100644 --- a/conda-store-server/tests/test_actions.py +++ b/conda-store-server/tests/test_actions.py @@ -1,4 +1,5 @@ import asyncio +import datetime import pathlib import re import sys @@ -226,17 +227,20 @@ def test_add_lockfile_packages( @pytest.mark.parametrize( - "is_legacy_build", + "is_legacy_build, build_key_version", [ - False, - True, + (False, 0), # invalid + (False, 1), # long (legacy) + (False, 2), # short (default) + (True, 1), # build_key_version doesn't matter because there's no lockfile ], ) def test_api_get_build_lockfile( - request, conda_store, db, simple_specification_with_pip, conda_prefix, is_legacy_build + request, conda_store, db, simple_specification_with_pip, conda_prefix, is_legacy_build, build_key_version ): # initializes data needed to get the lockfile specification = simple_specification_with_pip + specification.name = "this-is-a-long-environment-name-to-test-truncation" namespace = "pytest" class MyAuthentication(DummyAuthentication): @@ -252,6 +256,16 @@ def authorize_request(self, *args, **kwargs): ) db.commit() build = api.get_build(db, build_id=build_id) + # makes this more visible in the lockfile + build_id = 12345678 + build.id = build_id + if build_key_version is 0: # invalid + with pytest.raises(ValueError, match=r"invalid build_key_version=0"): + build.build_key_version = build_key_version + return # invalid, nothing more to test + build.build_key_version = build_key_version + # makes sure the timestamp in build_key is always the same + build.scheduled_on = datetime.datetime(2023, 11, 5, 3, 54, 10, 510258) environment = api.get_environment(db, namespace=namespace) # adds packages (returned in the lockfile) @@ -295,12 +309,21 @@ def authorize_request(self, *args, **kwargs): assert re.match("http.*//.*tar.bz2#.*", lines[2]) is not None else: # new build: redirects to lockfile generated by conda-lock - lockfile_url_pattern = ( - "lockfile/" - "89e5a99aa094689b7aafc66c47987fa186e08f9d619a02ab1a469d0759da3b8b-" - ".*-test.yml" - ) + def lockfile_url(build_key): + return f"lockfile/{build_key}.yml" + if build_key_version is 1: + build_key = ( + "7a0c9317530e3732a25f22c2017a881dcd6f84ff85c96a609210168deea280ef-" + "20231105-035410-510258-12345678-this-is-a-long-environment-name-to-test-truncation" + ) + elif build_key_version is 2: + build_key = "7a0c-1699156450-12345678-this-is-a-long-e" + else: + raise ValueError(f"unexpected build_key_version: {build_key_version}") assert type(res) is RedirectResponse assert key == res.headers['location'] - assert re.match(lockfile_url_pattern, res.headers['location']) is not None + assert build.build_key == build_key + assert build.parse_build_key(build_key) == 12345678 + assert lockfile_url(build_key) == build.conda_lock_key + assert lockfile_url(build_key) == res.headers['location'] assert res.status_code == 307 From fe2e9c736f2e0b3f2c4c6e6b3899978cc1e2374c Mon Sep 17 00:00:00 2001 From: Nikita Karetnikov Date: Mon, 13 Nov 2023 00:23:52 +0000 Subject: [PATCH 02/12] Truncate only the hash, add a constant --- conda-store-server/conda_store_server/orm.py | 9 ++++++--- conda-store-server/tests/test_actions.py | 8 ++++---- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/conda-store-server/conda_store_server/orm.py b/conda-store-server/conda_store_server/orm.py index aeb52038a..b1549be67 100644 --- a/conda-store-server/conda_store_server/orm.py +++ b/conda-store-server/conda_store_server/orm.py @@ -155,6 +155,9 @@ class Solve(Base): ) +_BUILD_KEY_V2_HASH_SIZE = 8 + + class Build(Base): """The state of a build of a given specification""" @@ -246,10 +249,10 @@ def build_key(self): datetime_format = "%Y%m%d-%H%M%S-%f" return f"{self.specification.sha256}-{self.scheduled_on.strftime(datetime_format)}-{self.id}-{self.specification.name}" elif self.build_key_version == 2: - hash = self.specification.sha256[:4] + hash = self.specification.sha256[:_BUILD_KEY_V2_HASH_SIZE] timestamp = int(self.scheduled_on.timestamp()) id = self.id - name = self.specification.name[:16] + name = self.specification.name return f"{hash}-{timestamp}-{id}-{name}" else: raise ValueError(f"invalid build key version: {self.build_key_version}") @@ -261,7 +264,7 @@ def parse_build_key(key): # versions because name can contain dashes. Instead, this relies on the # hash size to infer the format. The name is the last field, so indexing # to find the id is okay. - if key[4] == "-": # v2 + if key[_BUILD_KEY_V2_HASH_SIZE] == "-": # v2 return int(parts[2]) # build_id else: # v1 return int(parts[4]) # build_id diff --git a/conda-store-server/tests/test_actions.py b/conda-store-server/tests/test_actions.py index fbd47e452..7e0758778 100644 --- a/conda-store-server/tests/test_actions.py +++ b/conda-store-server/tests/test_actions.py @@ -240,7 +240,7 @@ def test_api_get_build_lockfile( ): # initializes data needed to get the lockfile specification = simple_specification_with_pip - specification.name = "this-is-a-long-environment-name-to-test-truncation" + specification.name = "this-is-a-long-environment-name" namespace = "pytest" class MyAuthentication(DummyAuthentication): @@ -313,11 +313,11 @@ def lockfile_url(build_key): return f"lockfile/{build_key}.yml" if build_key_version is 1: build_key = ( - "7a0c9317530e3732a25f22c2017a881dcd6f84ff85c96a609210168deea280ef-" - "20231105-035410-510258-12345678-this-is-a-long-environment-name-to-test-truncation" + "c7afdeffbe2bda7d16ca69beecc8bebeb29280a95d4f3ed92849e4047710923b-" + "20231105-035410-510258-12345678-this-is-a-long-environment-name" ) elif build_key_version is 2: - build_key = "7a0c-1699156450-12345678-this-is-a-long-e" + build_key = "c7afdeff-1699156450-12345678-this-is-a-long-environment-name" else: raise ValueError(f"unexpected build_key_version: {build_key_version}") assert type(res) is RedirectResponse From 542d911c62b5bdc07809821ae20080af55243a56 Mon Sep 17 00:00:00 2001 From: Nikita Karetnikov Date: Mon, 13 Nov 2023 01:31:26 +0000 Subject: [PATCH 03/12] Fix syntax warnings --- conda-store-server/tests/test_actions.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/conda-store-server/tests/test_actions.py b/conda-store-server/tests/test_actions.py index 7e0758778..7fa74f39c 100644 --- a/conda-store-server/tests/test_actions.py +++ b/conda-store-server/tests/test_actions.py @@ -259,7 +259,7 @@ def authorize_request(self, *args, **kwargs): # makes this more visible in the lockfile build_id = 12345678 build.id = build_id - if build_key_version is 0: # invalid + if build_key_version == 0: # invalid with pytest.raises(ValueError, match=r"invalid build_key_version=0"): build.build_key_version = build_key_version return # invalid, nothing more to test @@ -311,12 +311,12 @@ def authorize_request(self, *args, **kwargs): # new build: redirects to lockfile generated by conda-lock def lockfile_url(build_key): return f"lockfile/{build_key}.yml" - if build_key_version is 1: + if build_key_version == 1: build_key = ( "c7afdeffbe2bda7d16ca69beecc8bebeb29280a95d4f3ed92849e4047710923b-" "20231105-035410-510258-12345678-this-is-a-long-environment-name" ) - elif build_key_version is 2: + elif build_key_version == 2: build_key = "c7afdeff-1699156450-12345678-this-is-a-long-environment-name" else: raise ValueError(f"unexpected build_key_version: {build_key_version}") From 9b9bb6504c839e33f6cb9a04f857145dc24d7af7 Mon Sep 17 00:00:00 2001 From: Nikita Karetnikov Date: Fri, 17 Nov 2023 09:05:04 +0000 Subject: [PATCH 04/12] Allow to set `build_key_version` via the config --- .../conda_store_server/__init__.py | 4 ++++ conda-store-server/conda_store_server/app.py | 21 +++++++++++++++++++ conda-store-server/conda_store_server/orm.py | 14 ++++++++++++- 3 files changed, 38 insertions(+), 1 deletion(-) diff --git a/conda-store-server/conda_store_server/__init__.py b/conda-store-server/conda_store_server/__init__.py index d3b075936..eca3025f1 100644 --- a/conda-store-server/conda_store_server/__init__.py +++ b/conda-store-server/conda_store_server/__init__.py @@ -4,3 +4,7 @@ CONDA_STORE_DIR = Path.home() / ".conda-store" + + +# Default build_key_version. Must be None here, initialized from the config file +_BUILD_KEY_VERSION = None diff --git a/conda-store-server/conda_store_server/app.py b/conda-store-server/conda_store_server/app.py index a3aa62564..623953297 100644 --- a/conda-store-server/conda_store_server/app.py +++ b/conda-store-server/conda_store_server/app.py @@ -106,6 +106,22 @@ class CondaStore(LoggingConfigurable): config=True, ) + build_key_version = Integer( + 2, + help="Build key version to use: 1 (long, legacy), 2 (short, default)", + config=True, + ) + + @validate("build_key_version") + def _check_build_key_version(self, proposal): + expected = [1, 2] + if proposal.value not in expected: + raise TraitError( + f"c.CondaStore.build_key_version: invalid build key version: " + f"{proposal.value}, expected: {expected}" + ) + return proposal.value + conda_command = Unicode( "mamba", help="conda executable to use for solves", @@ -364,6 +380,11 @@ def session_factory(self): url=self.database_url, poolclass=QueuePool, ) + + # Sets the default build_key_version value in the DB based on the config + import conda_store_server + + conda_store_server._BUILD_KEY_VERSION = self.build_key_version return self._session_factory # Do not define this as a FastAPI dependency! That would cause Sessions diff --git a/conda-store-server/conda_store_server/orm.py b/conda-store-server/conda_store_server/orm.py index b1549be67..681ea58eb 100644 --- a/conda-store-server/conda_store_server/orm.py +++ b/conda-store-server/conda_store_server/orm.py @@ -158,6 +158,18 @@ class Solve(Base): _BUILD_KEY_V2_HASH_SIZE = 8 +# Avoids a cyclic dependency between the orm module and the module defining +# CondaStore.build_key_version. Because the orm module is loaded early on +# startup, we want to delay initialization of the build_key_version field until +# it's been read from the config. +def _get_build_key_version(): + from conda_store_server import _BUILD_KEY_VERSION + + # None means the value is not set, likely due to an import error + assert _BUILD_KEY_VERSION is not None + return _BUILD_KEY_VERSION + + class Build(Base): """The state of a build of a given specification""" @@ -185,7 +197,7 @@ class Build(Base): started_on = Column(DateTime, default=None) ended_on = Column(DateTime, default=None) deleted_on = Column(DateTime, default=None) - build_key_version = Column(Integer, default=2, nullable=False) + build_key_version = Column(Integer, default=_get_build_key_version, nullable=False) @validates("build_key_version") def validate_build_key_version(self, key, build_key_version): From 8f155ecf3dea8f81c17b04ac016241984202a157 Mon Sep 17 00:00:00 2001 From: Nikita Karetnikov Date: Fri, 17 Nov 2023 14:37:42 +0000 Subject: [PATCH 05/12] Update migration --- ...ersion.py => 30b37e725c32_add_build_key_version.py} | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) rename conda-store-server/conda_store_server/alembic/versions/{64a749e87619_add_build_key_version.py => 30b37e725c32_add_build_key_version.py} (73%) diff --git a/conda-store-server/conda_store_server/alembic/versions/64a749e87619_add_build_key_version.py b/conda-store-server/conda_store_server/alembic/versions/30b37e725c32_add_build_key_version.py similarity index 73% rename from conda-store-server/conda_store_server/alembic/versions/64a749e87619_add_build_key_version.py rename to conda-store-server/conda_store_server/alembic/versions/30b37e725c32_add_build_key_version.py index 113f6cba2..0313a95f0 100644 --- a/conda-store-server/conda_store_server/alembic/versions/64a749e87619_add_build_key_version.py +++ b/conda-store-server/conda_store_server/alembic/versions/30b37e725c32_add_build_key_version.py @@ -1,16 +1,16 @@ """add build_key_version -Revision ID: 64a749e87619 -Revises: b387747ca9b7 -Create Date: 2023-11-05 00:59:57.132192 +Revision ID: 30b37e725c32 +Revises: d78e9889566a +Create Date: 2023-11-17 14:34:40.688865 """ import sqlalchemy as sa from alembic import op # revision identifiers, used by Alembic. -revision = "64a749e87619" -down_revision = "b387747ca9b7" +revision = "30b37e725c32" +down_revision = "d78e9889566a" branch_labels = None depends_on = None From 998512f1fa9002fc4251953f51e8d6fc15dbee2b Mon Sep 17 00:00:00 2001 From: Nikita Karetnikov Date: Thu, 23 Nov 2023 12:29:33 +0100 Subject: [PATCH 06/12] Add `BuildKey` class --- .../conda_store_server/__init__.py | 81 ++++++++++++++++++- conda-store-server/conda_store_server/app.py | 18 ++--- conda-store-server/conda_store_server/orm.py | 56 +++++-------- conda-store-server/tests/test_actions.py | 31 +++++-- 4 files changed, 128 insertions(+), 58 deletions(-) diff --git a/conda-store-server/conda_store_server/__init__.py b/conda-store-server/conda_store_server/__init__.py index eca3025f1..600df9120 100644 --- a/conda-store-server/conda_store_server/__init__.py +++ b/conda-store-server/conda_store_server/__init__.py @@ -1,3 +1,4 @@ +import datetime from pathlib import Path __version__ = "2023.10.1" @@ -6,5 +7,81 @@ CONDA_STORE_DIR = Path.home() / ".conda-store" -# Default build_key_version. Must be None here, initialized from the config file -_BUILD_KEY_VERSION = None +class BuildKey: + # Avoids a cyclic dependency between the orm module and the module defining + # CondaStore.build_key_version. Because the orm module is loaded early on + # startup, we want to delay initialization of the Build.build_key_version + # field until CondaStore.build_key_version has been read from the config. + + # Default version, must be None here. Initialized in CondaStore.build_key_version + _current_version = None + + _version2_hash_size = 8 + + def _version1_fmt(build: "Build"): # noqa: F821 + datetime_format = "%Y%m%d-%H%M%S-%f" + hash = build.specification.sha256 + timestamp = build.scheduled_on.strftime(datetime_format) + id = build.id + name = build.specification.name + return f"{hash}-{timestamp}-{id}-{name}" + + def _version2_fmt(build: "Build"): # noqa: F821 + tzinfo = datetime.timezone.utc + hash = build.specification.sha256[: BuildKey._version2_hash_size] + timestamp = int(build.scheduled_on.replace(tzinfo=tzinfo).timestamp()) + id = build.id + name = build.specification.name + return f"{hash}-{timestamp}-{id}-{name}" + + # version -> fmt function + _fmt = { + 1: _version1_fmt, + 2: _version2_fmt, + } + + @classmethod + def _check_version(cls, build_key_version): + if build_key_version not in cls.versions(): + raise ValueError( + f"invalid build key version: {build_key_version}, " + f"expected: {cls.versions()}" + ) + + @classmethod + def set_current_version(cls, build_key_version: int): + """Sets provided build key version as current and returns it""" + cls._check_version(build_key_version) + cls._current_version = build_key_version + return build_key_version + + @classmethod + def current_version(cls): + """Returns currently selected build key version""" + # None means the value is not set, likely due to an import error + assert cls._current_version is not None + return cls._current_version + + @classmethod + def versions(cls): + """Returns available build key versions""" + return tuple(cls._fmt.keys()) + + @classmethod + def get_build_key(cls, build: "Build"): # noqa: F821 + """Returns build key for this build""" + cls._check_version(build.build_key_version) + return cls._fmt.get(build.build_key_version)(build) + + @classmethod + def parse_build_key(cls, build_key: str): + """Returns build id from build key""" + parts = build_key.split("-") + # Note: cannot rely on the number of dashes to differentiate between + # versions because name can contain dashes. Instead, this relies on the + # hash size to infer the format. The name is the last field, so indexing + # to find the id is okay. + if build_key[cls._version2_hash_size] == "-": # v2 + return int(parts[2]) # build_id + else: # v1 + return int(parts[4]) # build_id diff --git a/conda-store-server/conda_store_server/app.py b/conda-store-server/conda_store_server/app.py index 623953297..72edef33d 100644 --- a/conda-store-server/conda_store_server/app.py +++ b/conda-store-server/conda_store_server/app.py @@ -8,6 +8,7 @@ from celery import Celery, group from conda_store_server import ( CONDA_STORE_DIR, + BuildKey, api, conda_utils, environment, @@ -107,20 +108,17 @@ class CondaStore(LoggingConfigurable): ) build_key_version = Integer( - 2, + BuildKey.set_current_version(2), help="Build key version to use: 1 (long, legacy), 2 (short, default)", config=True, ) @validate("build_key_version") def _check_build_key_version(self, proposal): - expected = [1, 2] - if proposal.value not in expected: - raise TraitError( - f"c.CondaStore.build_key_version: invalid build key version: " - f"{proposal.value}, expected: {expected}" - ) - return proposal.value + try: + return BuildKey.set_current_version(proposal.value) + except Exception as e: + raise TraitError(f"c.CondaStore.build_key_version: {e}") conda_command = Unicode( "mamba", @@ -381,10 +379,6 @@ def session_factory(self): poolclass=QueuePool, ) - # Sets the default build_key_version value in the DB based on the config - import conda_store_server - - conda_store_server._BUILD_KEY_VERSION = self.build_key_version return self._session_factory # Do not define this as a FastAPI dependency! That would cause Sessions diff --git a/conda-store-server/conda_store_server/orm.py b/conda-store-server/conda_store_server/orm.py index 681ea58eb..1139047f3 100644 --- a/conda-store-server/conda_store_server/orm.py +++ b/conda-store-server/conda_store_server/orm.py @@ -155,21 +155,6 @@ class Solve(Base): ) -_BUILD_KEY_V2_HASH_SIZE = 8 - - -# Avoids a cyclic dependency between the orm module and the module defining -# CondaStore.build_key_version. Because the orm module is loaded early on -# startup, we want to delay initialization of the build_key_version field until -# it's been read from the config. -def _get_build_key_version(): - from conda_store_server import _BUILD_KEY_VERSION - - # None means the value is not set, likely due to an import error - assert _BUILD_KEY_VERSION is not None - return _BUILD_KEY_VERSION - - class Build(Base): """The state of a build of a given specification""" @@ -197,14 +182,21 @@ class Build(Base): started_on = Column(DateTime, default=None) ended_on = Column(DateTime, default=None) deleted_on = Column(DateTime, default=None) + + @staticmethod + def _get_build_key_version(): + # Uses local import to make sure current version is initialized + from conda_store_server import BuildKey + + return BuildKey.current_version() + build_key_version = Column(Integer, default=_get_build_key_version, nullable=False) @validates("build_key_version") def validate_build_key_version(self, key, build_key_version): - if build_key_version not in [1, 2]: - raise ValueError(f"invalid build_key_version={build_key_version}") + from conda_store_server import BuildKey - return build_key_version + return BuildKey.set_current_version(build_key_version) build_artifacts = relationship( "BuildArtifact", back_populates="build", cascade="all, delete-orphan" @@ -257,29 +249,17 @@ def build_key(self): The build key should be a key that allows for the environment build to be easily identified and found in the database. """ - if self.build_key_version == 1: - datetime_format = "%Y%m%d-%H%M%S-%f" - return f"{self.specification.sha256}-{self.scheduled_on.strftime(datetime_format)}-{self.id}-{self.specification.name}" - elif self.build_key_version == 2: - hash = self.specification.sha256[:_BUILD_KEY_V2_HASH_SIZE] - timestamp = int(self.scheduled_on.timestamp()) - id = self.id - name = self.specification.name - return f"{hash}-{timestamp}-{id}-{name}" - else: - raise ValueError(f"invalid build key version: {self.build_key_version}") + # Uses local import to make sure BuildKey is initialized + from conda_store_server import BuildKey + + return BuildKey.get_build_key(self) @staticmethod def parse_build_key(key): - parts = key.split("-") - # Note: cannot rely on the number of dashes to differentiate between - # versions because name can contain dashes. Instead, this relies on the - # hash size to infer the format. The name is the last field, so indexing - # to find the id is okay. - if key[_BUILD_KEY_V2_HASH_SIZE] == "-": # v2 - return int(parts[2]) # build_id - else: # v1 - return int(parts[4]) # build_id + # Uses local import to make sure BuildKey is initialized + from conda_store_server import BuildKey + + return BuildKey.parse_build_key(key) @property def log_key(self): diff --git a/conda-store-server/tests/test_actions.py b/conda-store-server/tests/test_actions.py index 7fa74f39c..e3a455489 100644 --- a/conda-store-server/tests/test_actions.py +++ b/conda-store-server/tests/test_actions.py @@ -5,10 +5,20 @@ import sys import pytest -from conda_store_server import action, api, conda_utils, orm, schema, server, utils +from conda_store_server import ( + BuildKey, + action, + api, + conda_utils, + orm, + schema, + server, + utils, +) from conda_store_server.server.auth import DummyAuthentication from fastapi import Request from fastapi.responses import RedirectResponse +from traitlets import TraitError def test_action_decorator(): @@ -238,6 +248,18 @@ def test_add_lockfile_packages( def test_api_get_build_lockfile( request, conda_store, db, simple_specification_with_pip, conda_prefix, is_legacy_build, build_key_version ): + # sets build_key_version + if build_key_version == 0: # invalid + with pytest.raises(TraitError, match=( + r"c.CondaStore.build_key_version: invalid build key version: 0, " + r"expected: \(1, 2\)" + )): + conda_store.build_key_version = build_key_version + return # invalid, nothing more to test + conda_store.build_key_version = build_key_version + assert BuildKey.current_version() == build_key_version + assert BuildKey.versions() == (1, 2) + # initializes data needed to get the lockfile specification = simple_specification_with_pip specification.name = "this-is-a-long-environment-name" @@ -259,11 +281,6 @@ def authorize_request(self, *args, **kwargs): # makes this more visible in the lockfile build_id = 12345678 build.id = build_id - if build_key_version == 0: # invalid - with pytest.raises(ValueError, match=r"invalid build_key_version=0"): - build.build_key_version = build_key_version - return # invalid, nothing more to test - build.build_key_version = build_key_version # makes sure the timestamp in build_key is always the same build.scheduled_on = datetime.datetime(2023, 11, 5, 3, 54, 10, 510258) environment = api.get_environment(db, namespace=namespace) @@ -323,7 +340,9 @@ def lockfile_url(build_key): assert type(res) is RedirectResponse assert key == res.headers['location'] assert build.build_key == build_key + assert BuildKey.get_build_key(build) == build_key assert build.parse_build_key(build_key) == 12345678 + assert BuildKey.parse_build_key(build_key) == 12345678 assert lockfile_url(build_key) == build.conda_lock_key assert lockfile_url(build_key) == res.headers['location'] assert res.status_code == 307 From 24be46a6da7f44cf972b8cd8a644285e743d356b Mon Sep 17 00:00:00 2001 From: Nikita Karetnikov Date: Fri, 24 Nov 2023 23:07:32 +0100 Subject: [PATCH 07/12] Use the same "local import" comment --- conda-store-server/conda_store_server/orm.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/conda-store-server/conda_store_server/orm.py b/conda-store-server/conda_store_server/orm.py index 1139047f3..8ab5d1a20 100644 --- a/conda-store-server/conda_store_server/orm.py +++ b/conda-store-server/conda_store_server/orm.py @@ -185,7 +185,7 @@ class Build(Base): @staticmethod def _get_build_key_version(): - # Uses local import to make sure current version is initialized + # Uses local import to make sure BuildKey is initialized from conda_store_server import BuildKey return BuildKey.current_version() @@ -194,6 +194,7 @@ def _get_build_key_version(): @validates("build_key_version") def validate_build_key_version(self, key, build_key_version): + # Uses local import to make sure BuildKey is initialized from conda_store_server import BuildKey return BuildKey.set_current_version(build_key_version) From 9013d1ae4b1cb90f1645bc6be126958d9be804e4 Mon Sep 17 00:00:00 2001 From: Nikita Karetnikov Date: Sat, 25 Nov 2023 17:02:16 +0100 Subject: [PATCH 08/12] Add missing type hints to `BuildKey` --- .../conda_store_server/__init__.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/conda-store-server/conda_store_server/__init__.py b/conda-store-server/conda_store_server/__init__.py index 600df9120..d318d76aa 100644 --- a/conda-store-server/conda_store_server/__init__.py +++ b/conda-store-server/conda_store_server/__init__.py @@ -1,4 +1,5 @@ import datetime +import typing from pathlib import Path __version__ = "2023.10.1" @@ -18,7 +19,7 @@ class BuildKey: _version2_hash_size = 8 - def _version1_fmt(build: "Build"): # noqa: F821 + def _version1_fmt(build: "Build") -> str: # noqa: F821 datetime_format = "%Y%m%d-%H%M%S-%f" hash = build.specification.sha256 timestamp = build.scheduled_on.strftime(datetime_format) @@ -26,7 +27,7 @@ def _version1_fmt(build: "Build"): # noqa: F821 name = build.specification.name return f"{hash}-{timestamp}-{id}-{name}" - def _version2_fmt(build: "Build"): # noqa: F821 + def _version2_fmt(build: "Build") -> str: # noqa: F821 tzinfo = datetime.timezone.utc hash = build.specification.sha256[: BuildKey._version2_hash_size] timestamp = int(build.scheduled_on.replace(tzinfo=tzinfo).timestamp()) @@ -41,7 +42,7 @@ def _version2_fmt(build: "Build"): # noqa: F821 } @classmethod - def _check_version(cls, build_key_version): + def _check_version(cls, build_key_version: int): if build_key_version not in cls.versions(): raise ValueError( f"invalid build key version: {build_key_version}, " @@ -49,32 +50,32 @@ def _check_version(cls, build_key_version): ) @classmethod - def set_current_version(cls, build_key_version: int): + def set_current_version(cls, build_key_version: int) -> int: """Sets provided build key version as current and returns it""" cls._check_version(build_key_version) cls._current_version = build_key_version return build_key_version @classmethod - def current_version(cls): + def current_version(cls) -> int: """Returns currently selected build key version""" # None means the value is not set, likely due to an import error assert cls._current_version is not None return cls._current_version @classmethod - def versions(cls): + def versions(cls) -> typing.Tuple[int]: """Returns available build key versions""" return tuple(cls._fmt.keys()) @classmethod - def get_build_key(cls, build: "Build"): # noqa: F821 + def get_build_key(cls, build: "Build") -> str: # noqa: F821 """Returns build key for this build""" cls._check_version(build.build_key_version) return cls._fmt.get(build.build_key_version)(build) @classmethod - def parse_build_key(cls, build_key: str): + def parse_build_key(cls, build_key: str) -> int: """Returns build id from build key""" parts = build_key.split("-") # Note: cannot rely on the number of dashes to differentiate between From 0ae713b2c7f8049965464171663b568da4305ca1 Mon Sep 17 00:00:00 2001 From: Nikita Karetnikov Date: Sat, 25 Nov 2023 17:53:16 +0100 Subject: [PATCH 09/12] Document build key versions --- docs/administration.md | 49 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/docs/administration.md b/docs/administration.md index 427adc26b..c31ddb9b6 100644 --- a/docs/administration.md +++ b/docs/administration.md @@ -571,6 +571,55 @@ affect both the `build_path` length and the filename length. [relocatable]: https://docs.conda.io/projects/conda-build/en/latest/resources/make-relocatable.html +### Build key versions + +The part of the build path that identifies a particular environment build is the +build key. Originally, conda-store used the following format, known as version +1: + +``` +c7afdeffbe2bda7d16ca69beecc8bebeb29280a95d4f3ed92849e4047710923b-20231105-035410-510258-12345678-this-is-a-long-environment-name +^ (1) ^ (2) ^ (3) ^ (4) +``` + +It consists of: +1. a SHA-256 hash of the environment specification +2. a human-readable timestamp (year, month, day, `-`, hour, minute, second, `-`, microsecond) +3. the id of a build +4. the environment name. + +To help mitigate build path length issues, a shorter build key format was +introduced, known as version 2: + +``` +c7afdeff-1699156450-12345678-this-is-a-long-environment-name +^ (1) ^ (2) ^ (3) ^ (4) +``` + +It consists of: +1. a truncated SHA-256 hash of the environment specification +2. a Unix timestamp +3. the id of a build +4. the environment name. + +The version 2 format is now the default. Environments created using the version +1 format will continue to be accessible in the UI, but new builds will use the +version 2 format. No changes are needed for existing deployments of conda-store. + +There is no real reason to use the version 1 format anymore, but it can be +explicitly set via the config: + +``` +c.CondaStore.build_key_version = 1 +``` + +The version 2 format can also be explicitly set if needed (this is the same as +the default): + +``` +c.CondaStore.build_key_version = 2 +``` + ### Long paths on Windows conda-store supports Windows in standalone mode. However, when creating From b44be1b5172e34b5bd346bb3924af72c95f98f11 Mon Sep 17 00:00:00 2001 From: Nikita Karetnikov Date: Sat, 25 Nov 2023 19:30:08 +0100 Subject: [PATCH 10/12] Mention `build_key_version` in `CondaStore` options --- docs/administration.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docs/administration.md b/docs/administration.md index c31ddb9b6..0a62cbc9d 100644 --- a/docs/administration.md +++ b/docs/administration.md @@ -96,6 +96,9 @@ for symlinking Conda environment builds. Available keys: store_directory, namespace, name. The default will put all environments in the same namespace within the same directory. +`CondaStore.build_key_version` is the [build key version](#build-key-versions) +to use: 1 (long, legacy), 2 (short, default). + `CondaStore.validate_specification` callable function taking `conda_store` and `specification` as input arguments to apply for validating and modifying a given specification. If there are From aa4598d441c5531336d319c70d729985bb519206 Mon Sep 17 00:00:00 2001 From: Nikita Karetnikov Date: Tue, 28 Nov 2023 09:28:26 +0100 Subject: [PATCH 11/12] Explain how hash is computed from `CondaSpecification` --- docs/administration.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/docs/administration.md b/docs/administration.md index 0a62cbc9d..cb520a72d 100644 --- a/docs/administration.md +++ b/docs/administration.md @@ -587,6 +587,9 @@ c7afdeffbe2bda7d16ca69beecc8bebeb29280a95d4f3ed92849e4047710923b-20231105-035410 It consists of: 1. a SHA-256 hash of the environment specification + (`CondaSpecification`, which represents a user-provided environment, is + converted to a dict and passed to `datastructure_hash`, which recursively sorts + it and calculates the SHA-256 hash) 2. a human-readable timestamp (year, month, day, `-`, hour, minute, second, `-`, microsecond) 3. the id of a build 4. the environment name. @@ -601,6 +604,9 @@ c7afdeff-1699156450-12345678-this-is-a-long-environment-name It consists of: 1. a truncated SHA-256 hash of the environment specification + (`CondaSpecification`, which represents a user-provided environment, is + converted to a dict and passed to `datastructure_hash`, which recursively sorts + it and calculates the SHA-256 hash) 2. a Unix timestamp 3. the id of a build 4. the environment name. From 36da8f04e00674b25241ad06e958420148209a4f Mon Sep 17 00:00:00 2001 From: Nikita Karetnikov Date: Tue, 28 Nov 2023 11:24:11 +0100 Subject: [PATCH 12/12] Add a docstring to `BuildKey` --- .../conda_store_server/__init__.py | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/conda-store-server/conda_store_server/__init__.py b/conda-store-server/conda_store_server/__init__.py index d318d76aa..a964b50fa 100644 --- a/conda-store-server/conda_store_server/__init__.py +++ b/conda-store-server/conda_store_server/__init__.py @@ -9,10 +9,21 @@ class BuildKey: - # Avoids a cyclic dependency between the orm module and the module defining - # CondaStore.build_key_version. Because the orm module is loaded early on - # startup, we want to delay initialization of the Build.build_key_version - # field until CondaStore.build_key_version has been read from the config. + """ + Used to configure the build key format, which identifies a particular + environment build + + Avoids a cyclic dependency between the `orm` module and the module defining + `CondaStore.build_key_version`. Because the `orm` module is loaded early on + startup, we want to delay initialization of the `Build.build_key_version` + field until `CondaStore.build_key_version` has been read from the config. + + Because the build key version needs to be the same for the entire + application, this class implements the singleton pattern. Users are expected + to use class methods instead of creating class instances. All implementation + details are hidden within the class, preventing potential issues caused by + cyclic imports. + """ # Default version, must be None here. Initialized in CondaStore.build_key_version _current_version = None