From 82e214c4b88d4ed98512d97c2b4831a9c0b7e951 Mon Sep 17 00:00:00 2001 From: Nikita Karetnikov Date: Sat, 9 Mar 2024 13:59:53 +0100 Subject: [PATCH 01/13] Support hash-only build paths Fixes #678. --- .../conda_store_server/__init__.py | 37 +++++++++++++++- conda-store-server/conda_store_server/app.py | 4 +- .../conda_store_server/build.py | 6 ++- conda-store-server/conda_store_server/orm.py | 25 +++++++++-- .../server/views/registry.py | 2 +- .../conda_store_server/worker/tasks.py | 3 +- conda-store-server/tests/test_actions.py | 11 +++-- .../references/configuration-options.md | 2 +- docusaurus-docs/conda-store/references/faq.md | 43 ++++++++++++++++--- 9 files changed, 112 insertions(+), 21 deletions(-) diff --git a/conda-store-server/conda_store_server/__init__.py b/conda-store-server/conda_store_server/__init__.py index 5737a0e60..97ce80061 100644 --- a/conda-store-server/conda_store_server/__init__.py +++ b/conda-store-server/conda_store_server/__init__.py @@ -1,4 +1,5 @@ import datetime +import hashlib import typing from pathlib import Path @@ -30,6 +31,8 @@ class BuildKey: _version2_hash_size = 8 + _version3_hash_size = 32 + def _version1_fmt(build: "Build") -> str: # noqa: F821 datetime_format = "%Y%m%d-%H%M%S-%f" hash = build.specification.sha256 @@ -46,10 +49,28 @@ def _version2_fmt(build: "Build") -> str: # noqa: F821 name = build.specification.name return f"{hash}-{timestamp}-{id}-{name}" + def _version3_fmt(build: "Build") -> str: # noqa: F821 + # Adds namespace here to separate builds performed by different users + # since all builds are stored in the same directory in v3, see + # Build.build_path in orm.py. Additionally, this also hashes the + # timestamp and build id just to make collisions very unlikely + namespace_name = build.environment.namespace.name + specification_hash = build.specification.sha256 + tzinfo = datetime.timezone.utc + timestamp = int(build.scheduled_on.replace(tzinfo=tzinfo).timestamp()) + build_id = build.id + hash_input = ( + namespace_name + specification_hash + str(timestamp) + str(build_id) + ) + hash = hashlib.sha256(hash_input.encode("utf-8")).hexdigest() + hash = hash[: BuildKey._version3_hash_size] + return hash + # version -> fmt function _fmt = { 1: _version1_fmt, 2: _version2_fmt, + 3: _version3_fmt, } @classmethod @@ -86,8 +107,13 @@ def get_build_key(cls, build: "Build") -> str: # noqa: F821 return cls._fmt.get(build.build_key_version)(build) @classmethod - def parse_build_key(cls, build_key: str) -> int: + def parse_build_key( + cls, conda_store: "CondaStore", build_key: str # noqa: F821 + ) -> int: """Returns build id from build key""" + # This import is here to avoid cyclic imports + from conda_store_server import orm + parts = build_key.split("-") # Note: cannot rely on the number of dashes to differentiate between # versions because name can contain dashes. Instead, this relies on the @@ -95,5 +121,14 @@ def parse_build_key(cls, build_key: str) -> int: # to find the id is okay. if build_key[cls._version2_hash_size] == "-": # v2 return int(parts[2]) # build_id + elif "-" not in build_key: # v3 + with conda_store.get_db() as db: + builds = db.query(orm.Build).all() + # Note: this uses a separate for-loop instead of using filter + # because the latter doesn't allow for passing lambdas + for build in builds: + if cls._version3_fmt(build) == build_key: + return build.id + return None else: # v1 return int(parts[4]) # build_id diff --git a/conda-store-server/conda_store_server/app.py b/conda-store-server/conda_store_server/app.py index 8d68dd138..e7e20b834 100644 --- a/conda-store-server/conda_store_server/app.py +++ b/conda-store-server/conda_store_server/app.py @@ -108,8 +108,8 @@ class CondaStore(LoggingConfigurable): ) build_key_version = Integer( - BuildKey.set_current_version(2), - help="Build key version to use: 1 (long, legacy), 2 (short, default)", + BuildKey.set_current_version(3), + help="Build key version to use: 1 (long, legacy), 2 (shorter hash), 3 (hash-only, default)", config=True, ) diff --git a/conda-store-server/conda_store_server/build.py b/conda-store-server/conda_store_server/build.py index f2b349b08..d2f2b50c4 100644 --- a/conda-store-server/conda_store_server/build.py +++ b/conda-store-server/conda_store_server/build.py @@ -167,7 +167,8 @@ def build_conda_environment(db: Session, conda_store, build): conda_prefix.parent.mkdir(parents=True, exist_ok=True) environment_prefix = build.environment_path(conda_store) - environment_prefix.parent.mkdir(parents=True, exist_ok=True) + if environment_prefix is not None: + environment_prefix.parent.mkdir(parents=True, exist_ok=True) with utils.timer(conda_store.log, f"building conda_prefix={conda_prefix}"): context = action.action_solve_lockfile( @@ -223,7 +224,8 @@ def build_conda_environment(db: Session, conda_store, build): + "\n::endgroup::\n", ) - utils.symlink(conda_prefix, environment_prefix) + if environment_prefix is not None: + utils.symlink(conda_prefix, environment_prefix) action.action_set_conda_prefix_permissions( conda_prefix=conda_prefix, diff --git a/conda-store-server/conda_store_server/orm.py b/conda-store-server/conda_store_server/orm.py index 3a626e82c..96151b952 100644 --- a/conda-store-server/conda_store_server/orm.py +++ b/conda-store-server/conda_store_server/orm.py @@ -256,8 +256,15 @@ def build_path(self, conda_store): build the environment """ + # Uses local import to make sure BuildKey is initialized + from conda_store_server import BuildKey + + if BuildKey.current_version() < 3: + namespace = self.environment.namespace.name + else: + namespace = "" + store_directory = os.path.abspath(conda_store.store_directory) - namespace = self.environment.namespace.name res = ( pathlib.Path( conda_store.build_directory.format( @@ -283,6 +290,18 @@ def environment_path(self, conda_store): path """ + # Uses local import to make sure BuildKey is initialized + from conda_store_server import BuildKey + + # This is not used with v3 because the whole point of v3 is to avoid any + # dependence on user-provided variable-sized data in the filesystem and + # by default the environment path contains the namespace and the + # environment name, which can be arbitrary large. By setting this to + # None, we're making it clear that this shouldn't be used by other + # functions, such as when creating symlinks + if BuildKey.current_version() >= 3: + return None + store_directory = os.path.abspath(conda_store.store_directory) namespace = self.environment.namespace.name name = self.specification.name @@ -315,11 +334,11 @@ def build_key(self): return BuildKey.get_build_key(self) @staticmethod - def parse_build_key(key): + def parse_build_key(conda_store: "CondaStore", key: str): # noqa: F821 # Uses local import to make sure BuildKey is initialized from conda_store_server import BuildKey - return BuildKey.parse_build_key(key) + return BuildKey.parse_build_key(conda_store, key) @property def log_key(self): diff --git a/conda-store-server/conda_store_server/server/views/registry.py b/conda-store-server/conda_store_server/server/views/registry.py index 7acd480c5..204b1b2a7 100644 --- a/conda-store-server/conda_store_server/server/views/registry.py +++ b/conda-store-server/conda_store_server/server/views/registry.py @@ -101,7 +101,7 @@ def get_docker_image_manifest(conda_store, image, tag, timeout=10 * 60): else: build_key = tag - build_id = orm.Build.parse_build_key(build_key) + build_id = orm.Build.parse_build_key(conda_store, build_key) if build_id is None: return docker_error_message(schema.DockerRegistryError.MANIFEST_UNKNOWN) diff --git a/conda-store-server/conda_store_server/worker/tasks.py b/conda-store-server/conda_store_server/worker/tasks.py index e87cc6a99..5a3db198e 100644 --- a/conda-store-server/conda_store_server/worker/tasks.py +++ b/conda-store-server/conda_store_server/worker/tasks.py @@ -260,7 +260,8 @@ def task_update_environment_build(self, environment_id): conda_prefix = environment.current_build.build_path(conda_store) environment_prefix = environment.current_build.environment_path(conda_store) - utils.symlink(conda_prefix, environment_prefix) + if environment_prefix is not None: + utils.symlink(conda_prefix, environment_prefix) if conda_store.post_update_environment_build_hook: conda_store.post_update_environment_build_hook(conda_store, environment) diff --git a/conda-store-server/tests/test_actions.py b/conda-store-server/tests/test_actions.py index 02f710481..1954052f7 100644 --- a/conda-store-server/tests/test_actions.py +++ b/conda-store-server/tests/test_actions.py @@ -330,6 +330,7 @@ def test_add_lockfile_packages( (False, 0), # invalid (False, 1), # long (legacy) (False, 2), # short (default) + (False, 3), # short (default) (True, 1), # build_key_version doesn't matter because there's no lockfile ], ) @@ -348,14 +349,14 @@ def test_api_get_build_lockfile( TraitError, match=( r"c.CondaStore.build_key_version: invalid build key version: 0, " - r"expected: \(1, 2\)" + r"expected: \(1, 2, 3\)" ), ): conda_store.build_key_version = build_key_version return # invalid, nothing more to test conda_store.build_key_version = build_key_version assert BuildKey.current_version() == build_key_version - assert BuildKey.versions() == (1, 2) + assert BuildKey.versions() == (1, 2, 3) # initializes data needed to get the lockfile specification = simple_specification_with_pip @@ -434,14 +435,16 @@ def lockfile_url(build_key): ) elif build_key_version == 2: build_key = "c7afdeff-1699156450-12345678-this-is-a-long-environment-name" + elif build_key_version == 3: + build_key = "c1f206a26263e1166e5b43548f69aa0c" else: raise ValueError(f"unexpected build_key_version: {build_key_version}") assert type(res) is RedirectResponse assert key == res.headers["location"] assert build.build_key == build_key assert BuildKey.get_build_key(build) == build_key - assert build.parse_build_key(build_key) == 12345678 - assert BuildKey.parse_build_key(build_key) == 12345678 + assert build.parse_build_key(conda_store, build_key) == 12345678 + assert BuildKey.parse_build_key(conda_store, build_key) == 12345678 assert lockfile_url(build_key) == build.conda_lock_key assert lockfile_url(build_key) == res.headers["location"] assert res.status_code == 307 diff --git a/docusaurus-docs/conda-store/references/configuration-options.md b/docusaurus-docs/conda-store/references/configuration-options.md index ce39322e4..ae6e5ec06 100644 --- a/docusaurus-docs/conda-store/references/configuration-options.md +++ b/docusaurus-docs/conda-store/references/configuration-options.md @@ -48,7 +48,7 @@ store_directory, namespace, name. The default will put all environments in the same namespace within the same directory. `CondaStore.build_key_version` is the [build key version](#build-key-versions) -to use: 1 (long, legacy), 2 (short, default). +to use: 1 (long, legacy), 2 (shorter hash), 3 (hash-only, default). `CondaStore.validate_specification` callable function taking `conda_store` and `specification` as input arguments to apply for diff --git a/docusaurus-docs/conda-store/references/faq.md b/docusaurus-docs/conda-store/references/faq.md index 356d13548..20c399a5a 100644 --- a/docusaurus-docs/conda-store/references/faq.md +++ b/docusaurus-docs/conda-store/references/faq.md @@ -94,19 +94,50 @@ It consists of: 3. the id of a build 4. the environment name. -The version 2 format is now the default. Environments created using the version -1 format will continue to be accessible in the UI, but new builds will use the -version 2 format. No changes are needed for existing deployments of conda-store. +However, version 2 build paths don't solve the problem completely because they +include user-provided data, like the environment name, and that data can be +arbitrary large. -There is no real reason to use the version 1 format anymore, but it can be +To solve this problem, version 3 was introduced, which will always have the same +size. It looks like this: + +```bash +64a943764b70e8fe181643404894f7ae +``` + +It's a truncated SHA-256 hex digest, which is calculated based on: + +- namespace name +- specification hash (also SHA-256) +- build timestamp +- build id. + +See `BuildKey._version3_fmt` for details. + +:::note +When version 3 is used, `Build.build_path` will not include the namespace name, +because it's not fixed size, so all builds will be placed right into +`CondaStore.store_directory`. Additionally, `CondaStore.environment_directory` +will be completely ignored, so no symlinks will be created, because the +environment directory format also includes variable-size data (the namespace and +environment names). This shouldn't impact anything because the generated +artifacts rely on storage or use the database, and those are not affected by +these changes to the state directory. +::: + +The version 3 format is now the default. Environments created using the version +1 and 2 formats will continue to be accessible in the UI, but new builds will +use the version 3 format. No changes are needed for existing deployments of +conda-store. + +There is no real reason to use version 1 and 2 formats anymore, but these can be explicitly set via the config: ```python c.CondaStore.build_key_version = 1 ``` -The version 2 format can also be explicitly set if needed (this is the same as -the default): +or ```python c.CondaStore.build_key_version = 2 From e17fd8d8dbf9a8ceee75aa4c197c7379381e3584 Mon Sep 17 00:00:00 2001 From: Nikita Karetnikov Date: Mon, 11 Mar 2024 02:38:40 +0100 Subject: [PATCH 02/13] Update tests --- tests/test_api.py | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/tests/test_api.py b/tests/test_api.py index 9583249b1..a08fe98c6 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -641,10 +641,15 @@ def get_logs(): assert quartiles[0] < threshold -# Only testing size values that will always cause errors. Smaller values could -# cause errors as well, but would be flaky since the test conda-store state -# directory might have different lengths on different systems, for instance, -# due to different username lengths. +# This used to fail with build_key_version < 3. With v3, the sizes of namespace +# and environment names have no effect on the build key, so the build should +# complete without failures. The comments below describe errors that used to +# happen previously: +# +# > Only testing size values that will always cause errors. Smaller values could +# > cause errors as well, but would be flaky since the test conda-store state +# > directory might have different lengths on different systems, for instance, +# > due to different username lengths. @pytest.mark.parametrize( "size", [ @@ -669,16 +674,13 @@ def test_create_specification_auth_env_name_too_long(testclient, size): "specification": json.dumps({"name": environment_name}), }, ) - if size > 255: - assert response.status_code == 500 - return # error, nothing to do response.raise_for_status() r = schema.APIPostSpecification.parse_obj(response.json()) assert r.status == schema.APIStatus.OK build_id = r.data.build_id - # Try checking that the status is 'FAILED' + # Try checking that the status is 'COMPLETED' is_updated = False for _ in range(60): time.sleep(10) @@ -692,8 +694,7 @@ def test_create_specification_auth_env_name_too_long(testclient, size): assert r.data.specification.name == environment_name if r.data.status == "QUEUED": continue # checked too fast, try again - assert r.data.status == "FAILED" - assert r.data.status_info == "build_path too long: must be <= 255 characters" + assert r.data.status == "COMPLETED" is_updated = True break From 4b1a7d33a239eb93c64b8f095ca479c7434867c4 Mon Sep 17 00:00:00 2001 From: Nikita Karetnikov Date: Mon, 11 Mar 2024 02:42:03 +0100 Subject: [PATCH 03/13] Fix comment typos --- conda-store-server/conda_store_server/orm.py | 2 +- conda-store-server/tests/test_actions.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/conda-store-server/conda_store_server/orm.py b/conda-store-server/conda_store_server/orm.py index 96151b952..e7e715a30 100644 --- a/conda-store-server/conda_store_server/orm.py +++ b/conda-store-server/conda_store_server/orm.py @@ -294,7 +294,7 @@ def environment_path(self, conda_store): from conda_store_server import BuildKey # This is not used with v3 because the whole point of v3 is to avoid any - # dependence on user-provided variable-sized data in the filesystem and + # dependence on user-provided variable-sized data on the filesystem and # by default the environment path contains the namespace and the # environment name, which can be arbitrary large. By setting this to # None, we're making it clear that this shouldn't be used by other diff --git a/conda-store-server/tests/test_actions.py b/conda-store-server/tests/test_actions.py index 1954052f7..51a849b0d 100644 --- a/conda-store-server/tests/test_actions.py +++ b/conda-store-server/tests/test_actions.py @@ -329,8 +329,8 @@ def test_add_lockfile_packages( [ (False, 0), # invalid (False, 1), # long (legacy) - (False, 2), # short (default) - (False, 3), # short (default) + (False, 2), # shorter hash + (False, 3), # hash-only (default) (True, 1), # build_key_version doesn't matter because there's no lockfile ], ) From 8551ba5f091b66ec173134f9ebc8021c583af3ce Mon Sep 17 00:00:00 2001 From: Nikita Karetnikov Date: Mon, 11 Mar 2024 03:12:36 +0100 Subject: [PATCH 04/13] Update test --- tests/test_api.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/test_api.py b/tests/test_api.py index a08fe98c6..09b509782 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -674,6 +674,9 @@ def test_create_specification_auth_env_name_too_long(testclient, size): "specification": json.dumps({"name": environment_name}), }, ) + if size > 255: # SQL error, expected + assert response.status_code == 500 + return # error, nothing to do response.raise_for_status() r = schema.APIPostSpecification.parse_obj(response.json()) @@ -692,7 +695,7 @@ def test_create_specification_auth_env_name_too_long(testclient, size): r = schema.APIGetBuild.parse_obj(response.json()) assert r.status == schema.APIStatus.OK assert r.data.specification.name == environment_name - if r.data.status == "QUEUED": + if r.data.status in ("QUEUED", "BUILDING"): continue # checked too fast, try again assert r.data.status == "COMPLETED" is_updated = True From 3db9fe85f8a15054aa456ba02f73eb096d1ea998 Mon Sep 17 00:00:00 2001 From: Nikita Karetnikov Date: Mon, 11 Mar 2024 21:00:42 +0100 Subject: [PATCH 05/13] Switch `build_key_version` to 2 --- conda-store-server/conda_store_server/app.py | 4 +- conda-store-server/tests/test_actions.py | 4 +- .../references/configuration-options.md | 2 +- docusaurus-docs/conda-store/references/faq.md | 43 +++++++++++-------- 4 files changed, 30 insertions(+), 23 deletions(-) diff --git a/conda-store-server/conda_store_server/app.py b/conda-store-server/conda_store_server/app.py index e7e20b834..991ee958b 100644 --- a/conda-store-server/conda_store_server/app.py +++ b/conda-store-server/conda_store_server/app.py @@ -108,8 +108,8 @@ class CondaStore(LoggingConfigurable): ) build_key_version = Integer( - BuildKey.set_current_version(3), - help="Build key version to use: 1 (long, legacy), 2 (shorter hash), 3 (hash-only, default)", + BuildKey.set_current_version(2), + help="Build key version to use: 1 (long, legacy), 2 (shorter hash, default), 3 (hash-only)", config=True, ) diff --git a/conda-store-server/tests/test_actions.py b/conda-store-server/tests/test_actions.py index 51a849b0d..312f7ba42 100644 --- a/conda-store-server/tests/test_actions.py +++ b/conda-store-server/tests/test_actions.py @@ -329,8 +329,8 @@ def test_add_lockfile_packages( [ (False, 0), # invalid (False, 1), # long (legacy) - (False, 2), # shorter hash - (False, 3), # hash-only (default) + (False, 2), # shorter hash (default) + (False, 3), # hash-only (True, 1), # build_key_version doesn't matter because there's no lockfile ], ) diff --git a/docusaurus-docs/conda-store/references/configuration-options.md b/docusaurus-docs/conda-store/references/configuration-options.md index ae6e5ec06..7e92608cb 100644 --- a/docusaurus-docs/conda-store/references/configuration-options.md +++ b/docusaurus-docs/conda-store/references/configuration-options.md @@ -48,7 +48,7 @@ store_directory, namespace, name. The default will put all environments in the same namespace within the same directory. `CondaStore.build_key_version` is the [build key version](#build-key-versions) -to use: 1 (long, legacy), 2 (shorter hash), 3 (hash-only, default). +to use: 1 (long, legacy), 2 (shorter hash, default), 3 (hash-only). `CondaStore.validate_specification` callable function taking `conda_store` and `specification` as input arguments to apply for diff --git a/docusaurus-docs/conda-store/references/faq.md b/docusaurus-docs/conda-store/references/faq.md index 20c399a5a..9a8b1f6d4 100644 --- a/docusaurus-docs/conda-store/references/faq.md +++ b/docusaurus-docs/conda-store/references/faq.md @@ -117,30 +117,37 @@ See `BuildKey._version3_fmt` for details. :::note When version 3 is used, `Build.build_path` will not include the namespace name, because it's not fixed size, so all builds will be placed right into -`CondaStore.store_directory`. Additionally, `CondaStore.environment_directory` -will be completely ignored, so no symlinks will be created, because the -environment directory format also includes variable-size data (the namespace and -environment names). This shouldn't impact anything because the generated -artifacts rely on storage or use the database, and those are not affected by -these changes to the state directory. +`CondaStore.store_directory`. + +Additionally, `CondaStore.environment_directory` will be completely ignored, so +no symlinks connecting an environment name to its corresponding build will be +created, because the environment directory format also includes variable-size +data (the namespace and environment names). + +The lack of symlinks doesn't prevent server artifacts from being generated, +which are available for download via the UI (lockfiles, archives, etc.), because +those rely on storage or use the database. + +But it does impact conda integration or tools that rely on it, like when +conda-store is used with JupyterLab as part of a Nebari deployment. Without +environment symlinks, there'll be no way to tell conda where to look for +environments, which is done by setting `envs_dirs` in `.condarc`, so `conda env +list` will return nothing and no environments will show up in JupyterLab. ::: -The version 3 format is now the default. Environments created using the version -1 and 2 formats will continue to be accessible in the UI, but new builds will -use the version 3 format. No changes are needed for existing deployments of -conda-store. +The version 2 format is the default because it supports environment symlinks and +doesn't usually run into path length limitations. If you do experience problems +with the latter and don't need the former, then consider using the version 3 +format. -There is no real reason to use version 1 and 2 formats anymore, but these can be -explicitly set via the config: +No matter what format you choose, environments that were previously created +using other version formats will be accessible in the conda-store web UI. -```python -c.CondaStore.build_key_version = 1 -``` - -or +There is no real reason to use version 1 format anymore, but any version can be +explicitly set via the config, for example: ```python -c.CondaStore.build_key_version = 2 +c.CondaStore.build_key_version = 1 ``` ## Long paths on Windows From 555ae1530f6667b5957195576668aeb3facccd55 Mon Sep 17 00:00:00 2001 From: Nikita Karetnikov Date: Fri, 15 Mar 2024 08:47:51 +0100 Subject: [PATCH 06/13] Revert changes to integration tests --- tests/test_api.py | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/tests/test_api.py b/tests/test_api.py index 09b509782..9583249b1 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -641,15 +641,10 @@ def get_logs(): assert quartiles[0] < threshold -# This used to fail with build_key_version < 3. With v3, the sizes of namespace -# and environment names have no effect on the build key, so the build should -# complete without failures. The comments below describe errors that used to -# happen previously: -# -# > Only testing size values that will always cause errors. Smaller values could -# > cause errors as well, but would be flaky since the test conda-store state -# > directory might have different lengths on different systems, for instance, -# > due to different username lengths. +# Only testing size values that will always cause errors. Smaller values could +# cause errors as well, but would be flaky since the test conda-store state +# directory might have different lengths on different systems, for instance, +# due to different username lengths. @pytest.mark.parametrize( "size", [ @@ -674,7 +669,7 @@ def test_create_specification_auth_env_name_too_long(testclient, size): "specification": json.dumps({"name": environment_name}), }, ) - if size > 255: # SQL error, expected + if size > 255: assert response.status_code == 500 return # error, nothing to do response.raise_for_status() @@ -683,7 +678,7 @@ def test_create_specification_auth_env_name_too_long(testclient, size): assert r.status == schema.APIStatus.OK build_id = r.data.build_id - # Try checking that the status is 'COMPLETED' + # Try checking that the status is 'FAILED' is_updated = False for _ in range(60): time.sleep(10) @@ -695,9 +690,10 @@ def test_create_specification_auth_env_name_too_long(testclient, size): r = schema.APIGetBuild.parse_obj(response.json()) assert r.status == schema.APIStatus.OK assert r.data.specification.name == environment_name - if r.data.status in ("QUEUED", "BUILDING"): + if r.data.status == "QUEUED": continue # checked too fast, try again - assert r.data.status == "COMPLETED" + assert r.data.status == "FAILED" + assert r.data.status_info == "build_path too long: must be <= 255 characters" is_updated = True break From 3aa46e650975870fce488534da54f8725840a46f Mon Sep 17 00:00:00 2001 From: Nikita Karetnikov Date: Fri, 15 Mar 2024 10:46:44 +0100 Subject: [PATCH 07/13] Fix typos --- conda-store-server/conda_store_server/orm.py | 2 +- docusaurus-docs/conda-store/references/faq.md | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/conda-store-server/conda_store_server/orm.py b/conda-store-server/conda_store_server/orm.py index e7e715a30..7bf58ac26 100644 --- a/conda-store-server/conda_store_server/orm.py +++ b/conda-store-server/conda_store_server/orm.py @@ -294,7 +294,7 @@ def environment_path(self, conda_store): from conda_store_server import BuildKey # This is not used with v3 because the whole point of v3 is to avoid any - # dependence on user-provided variable-sized data on the filesystem and + # dependence on user-provided variable-size data on the filesystem and # by default the environment path contains the namespace and the # environment name, which can be arbitrary large. By setting this to # None, we're making it clear that this shouldn't be used by other diff --git a/docusaurus-docs/conda-store/references/faq.md b/docusaurus-docs/conda-store/references/faq.md index 9a8b1f6d4..d2da47cc4 100644 --- a/docusaurus-docs/conda-store/references/faq.md +++ b/docusaurus-docs/conda-store/references/faq.md @@ -143,8 +143,8 @@ format. No matter what format you choose, environments that were previously created using other version formats will be accessible in the conda-store web UI. -There is no real reason to use version 1 format anymore, but any version can be -explicitly set via the config, for example: +There is no real reason to use the version 1 format anymore, but any version can +be explicitly set via the config, for example: ```python c.CondaStore.build_key_version = 1 From b2f5fd6d06f58a99fbfd7ed05dd982793241398e Mon Sep 17 00:00:00 2001 From: Nikita Karetnikov Date: Fri, 15 Mar 2024 21:00:00 +0100 Subject: [PATCH 08/13] Bug fix: handle duplicate artifacts on deletion --- conda-store-server/conda_store_server/storage.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/conda-store-server/conda_store_server/storage.py b/conda-store-server/conda_store_server/storage.py index 1ee77a5ea..72a9864e9 100644 --- a/conda-store-server/conda_store_server/storage.py +++ b/conda-store-server/conda_store_server/storage.py @@ -228,5 +228,12 @@ def get_url(self, key): def delete(self, db, build_id, key): filename = os.path.join(self.storage_path, key) - os.remove(filename) + try: + os.remove(filename) + except FileNotFoundError: + # The DB can contain multiple entries pointing to the same key, like + # a log file. This skips files that were previously processed and + # deleted. See LocalStorage.fset and Storage.fset, which are used + # for saving build artifacts + pass super().delete(db, build_id, key) From 39cfeef46bded3d03472cfdf79f594a500530637 Mon Sep 17 00:00:00 2001 From: Nikita Karetnikov Date: Thu, 21 Mar 2024 14:44:59 +0100 Subject: [PATCH 09/13] Doc: improve wording Co-authored-by: jaimergp --- docusaurus-docs/conda-store/references/faq.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docusaurus-docs/conda-store/references/faq.md b/docusaurus-docs/conda-store/references/faq.md index d2da47cc4..166a3bab9 100644 --- a/docusaurus-docs/conda-store/references/faq.md +++ b/docusaurus-docs/conda-store/references/faq.md @@ -96,7 +96,7 @@ It consists of: However, version 2 build paths don't solve the problem completely because they include user-provided data, like the environment name, and that data can be -arbitrary large. +arbitrarily large. To solve this problem, version 3 was introduced, which will always have the same size. It looks like this: From ced96748344f55c7aa8b405a0ba0568bbf77611b Mon Sep 17 00:00:00 2001 From: Nikita Karetnikov Date: Sat, 23 Mar 2024 20:43:30 +0100 Subject: [PATCH 10/13] Doc: add more info about symlinks --- docusaurus-docs/conda-store/references/faq.md | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/docusaurus-docs/conda-store/references/faq.md b/docusaurus-docs/conda-store/references/faq.md index 166a3bab9..68c23c8e3 100644 --- a/docusaurus-docs/conda-store/references/faq.md +++ b/docusaurus-docs/conda-store/references/faq.md @@ -124,6 +124,20 @@ no symlinks connecting an environment name to its corresponding build will be created, because the environment directory format also includes variable-size data (the namespace and environment names). +For context, these symlinks are created because that's how conda is usually +used: each environment name points to a particular directory on the filesystem, +and symlinks connect this directory to the current build. + +In the following example, which uses the version 2 format, there are two +environments in the `default` namespace: `test` and `test2`. The former points +to build 3 and the latter points to build 2: + +``` +$ ls -l ~/.conda-store/state/default/envs +test -> /home/user/.conda-store/state/default/b3109fbf-1710602415-3-test +test2 -> /home/user/.conda-store/state/default/2aad045f-1710602357-2-test2 +``` + The lack of symlinks doesn't prevent server artifacts from being generated, which are available for download via the UI (lockfiles, archives, etc.), because those rely on storage or use the database. From 617792aa0b8e3e73516ca149af9d28411288c3f0 Mon Sep 17 00:00:00 2001 From: Nikita Karetnikov Date: Tue, 26 Mar 2024 04:14:51 +0100 Subject: [PATCH 11/13] Add `Build.hash` to speed up `build_key` lookups --- .../conda_store_server/__init__.py | 14 ++++++----- .../versions/e17b4cc6e086_add_build_hash.py | 23 +++++++++++++++++++ conda-store-server/conda_store_server/orm.py | 3 +++ 3 files changed, 34 insertions(+), 6 deletions(-) create mode 100644 conda-store-server/conda_store_server/alembic/versions/e17b4cc6e086_add_build_hash.py diff --git a/conda-store-server/conda_store_server/__init__.py b/conda-store-server/conda_store_server/__init__.py index 97ce80061..25bd70a64 100644 --- a/conda-store-server/conda_store_server/__init__.py +++ b/conda-store-server/conda_store_server/__init__.py @@ -50,6 +50,10 @@ def _version2_fmt(build: "Build") -> str: # noqa: F821 return f"{hash}-{timestamp}-{id}-{name}" def _version3_fmt(build: "Build") -> str: # noqa: F821 + # Caches the hash value for faster lookup later + if build.hash is not None: + return build.hash + # Adds namespace here to separate builds performed by different users # since all builds are stored in the same directory in v3, see # Build.build_path in orm.py. Additionally, this also hashes the @@ -64,6 +68,7 @@ def _version3_fmt(build: "Build") -> str: # noqa: F821 ) hash = hashlib.sha256(hash_input.encode("utf-8")).hexdigest() hash = hash[: BuildKey._version3_hash_size] + build.hash = hash return hash # version -> fmt function @@ -123,12 +128,9 @@ def parse_build_key( return int(parts[2]) # build_id elif "-" not in build_key: # v3 with conda_store.get_db() as db: - builds = db.query(orm.Build).all() - # Note: this uses a separate for-loop instead of using filter - # because the latter doesn't allow for passing lambdas - for build in builds: - if cls._version3_fmt(build) == build_key: - return build.id + build = db.query(orm.Build).filter(orm.Build.hash == build_key).first() + if build is not None: + return build.id return None else: # v1 return int(parts[4]) # build_id diff --git a/conda-store-server/conda_store_server/alembic/versions/e17b4cc6e086_add_build_hash.py b/conda-store-server/conda_store_server/alembic/versions/e17b4cc6e086_add_build_hash.py new file mode 100644 index 000000000..1a2859c54 --- /dev/null +++ b/conda-store-server/conda_store_server/alembic/versions/e17b4cc6e086_add_build_hash.py @@ -0,0 +1,23 @@ +"""add build hash + +Revision ID: e17b4cc6e086 +Revises: 03c839888c82 +Create Date: 2024-03-26 04:39:24.275214 + +""" +import sqlalchemy as sa +from alembic import op + +# revision identifiers, used by Alembic. +revision = "e17b4cc6e086" +down_revision = "03c839888c82" +branch_labels = None +depends_on = None + + +def upgrade(): + op.add_column("build", sa.Column("hash", sa.Unicode(length=32), nullable=True)) + + +def downgrade(): + op.drop_column("build", "hash") diff --git a/conda-store-server/conda_store_server/orm.py b/conda-store-server/conda_store_server/orm.py index 7bf58ac26..7bb1a545e 100644 --- a/conda-store-server/conda_store_server/orm.py +++ b/conda-store-server/conda_store_server/orm.py @@ -231,6 +231,9 @@ class Build(Base): ended_on = Column(DateTime, default=None) deleted_on = Column(DateTime, default=None) + # Only used by build_key_version 3, not necessary for earlier versions + hash = Column(Unicode(32), default=None) + @staticmethod def _get_build_key_version(): # Uses local import to make sure BuildKey is initialized From 514cb44a9af12b276dce7d3ec4cc648b1da1cc21 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 30 Mar 2024 23:26:21 +0000 Subject: [PATCH 12/13] [pre-commit.ci] Apply automatic pre-commit fixes --- .../alembic/versions/e17b4cc6e086_add_build_hash.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/conda-store-server/conda_store_server/alembic/versions/e17b4cc6e086_add_build_hash.py b/conda-store-server/conda_store_server/alembic/versions/e17b4cc6e086_add_build_hash.py index 1a2859c54..f7223aec7 100644 --- a/conda-store-server/conda_store_server/alembic/versions/e17b4cc6e086_add_build_hash.py +++ b/conda-store-server/conda_store_server/alembic/versions/e17b4cc6e086_add_build_hash.py @@ -5,9 +5,12 @@ Create Date: 2024-03-26 04:39:24.275214 """ + import sqlalchemy as sa + from alembic import op + # revision identifiers, used by Alembic. revision = "e17b4cc6e086" down_revision = "03c839888c82" From 489b6362d2b0d6660f6957e00b2511388d208ccc Mon Sep 17 00:00:00 2001 From: Nikita Karetnikov Date: Thu, 4 Apr 2024 16:42:53 +0200 Subject: [PATCH 13/13] Mark v3 build key as experimental --- conda-store-server/conda_store_server/__init__.py | 9 +++++---- conda-store-server/conda_store_server/app.py | 2 +- conda-store-server/tests/test_actions.py | 2 +- .../community/policies/backwards-compatibility.md | 7 +++++++ .../conda-store/references/configuration-options.md | 2 +- docusaurus-docs/conda-store/references/faq.md | 6 +++++- 6 files changed, 20 insertions(+), 8 deletions(-) diff --git a/conda-store-server/conda_store_server/__init__.py b/conda-store-server/conda_store_server/__init__.py index 25bd70a64..c8a4409ad 100644 --- a/conda-store-server/conda_store_server/__init__.py +++ b/conda-store-server/conda_store_server/__init__.py @@ -31,7 +31,7 @@ class BuildKey: _version2_hash_size = 8 - _version3_hash_size = 32 + _version3_experimental_hash_size = 32 def _version1_fmt(build: "Build") -> str: # noqa: F821 datetime_format = "%Y%m%d-%H%M%S-%f" @@ -49,7 +49,8 @@ def _version2_fmt(build: "Build") -> str: # noqa: F821 name = build.specification.name return f"{hash}-{timestamp}-{id}-{name}" - def _version3_fmt(build: "Build") -> str: # noqa: F821 + # Warning: this is an experimental version and can be changed at any time + def _version3_experimental_fmt(build: "Build") -> str: # noqa: F821 # Caches the hash value for faster lookup later if build.hash is not None: return build.hash @@ -67,7 +68,7 @@ def _version3_fmt(build: "Build") -> str: # noqa: F821 namespace_name + specification_hash + str(timestamp) + str(build_id) ) hash = hashlib.sha256(hash_input.encode("utf-8")).hexdigest() - hash = hash[: BuildKey._version3_hash_size] + hash = hash[: BuildKey._version3_experimental_hash_size] build.hash = hash return hash @@ -75,7 +76,7 @@ def _version3_fmt(build: "Build") -> str: # noqa: F821 _fmt = { 1: _version1_fmt, 2: _version2_fmt, - 3: _version3_fmt, + 3: _version3_experimental_fmt, } @classmethod diff --git a/conda-store-server/conda_store_server/app.py b/conda-store-server/conda_store_server/app.py index 991ee958b..4153e8a76 100644 --- a/conda-store-server/conda_store_server/app.py +++ b/conda-store-server/conda_store_server/app.py @@ -109,7 +109,7 @@ class CondaStore(LoggingConfigurable): build_key_version = Integer( BuildKey.set_current_version(2), - help="Build key version to use: 1 (long, legacy), 2 (shorter hash, default), 3 (hash-only)", + help="Build key version to use: 1 (long, legacy), 2 (shorter hash, default), 3 (hash-only, experimental)", config=True, ) diff --git a/conda-store-server/tests/test_actions.py b/conda-store-server/tests/test_actions.py index 312f7ba42..9baeaaa91 100644 --- a/conda-store-server/tests/test_actions.py +++ b/conda-store-server/tests/test_actions.py @@ -330,7 +330,7 @@ def test_add_lockfile_packages( (False, 0), # invalid (False, 1), # long (legacy) (False, 2), # shorter hash (default) - (False, 3), # hash-only + (False, 3), # hash-only (experimental) (True, 1), # build_key_version doesn't matter because there's no lockfile ], ) diff --git a/docusaurus-docs/community/policies/backwards-compatibility.md b/docusaurus-docs/community/policies/backwards-compatibility.md index 068b12f3f..f243f31b7 100644 --- a/docusaurus-docs/community/policies/backwards-compatibility.md +++ b/docusaurus-docs/community/policies/backwards-compatibility.md @@ -222,6 +222,13 @@ objects as public if there is an explicit need to do so. Keeping code private by default limits the public API that the conda-store project developers are committing to supporting. +### Build keys + +conda-store ships with several build key versions. The build key determines the +location of environment builds and build artifacts. Build key versions marked as +experimental can be changed at any time, see `BuildKey` and the FAQ for more +information. + #### Deprecating Python APIs Under exceptional circumstances such as a serious security vulnerability which diff --git a/docusaurus-docs/conda-store/references/configuration-options.md b/docusaurus-docs/conda-store/references/configuration-options.md index 7e92608cb..dadd8f884 100644 --- a/docusaurus-docs/conda-store/references/configuration-options.md +++ b/docusaurus-docs/conda-store/references/configuration-options.md @@ -48,7 +48,7 @@ store_directory, namespace, name. The default will put all environments in the same namespace within the same directory. `CondaStore.build_key_version` is the [build key version](#build-key-versions) -to use: 1 (long, legacy), 2 (shorter hash, default), 3 (hash-only). +to use: 1 (long, legacy), 2 (shorter hash, default), 3 (hash-only, experimental). `CondaStore.validate_specification` callable function taking `conda_store` and `specification` as input arguments to apply for diff --git a/docusaurus-docs/conda-store/references/faq.md b/docusaurus-docs/conda-store/references/faq.md index 68c23c8e3..a08b86adf 100644 --- a/docusaurus-docs/conda-store/references/faq.md +++ b/docusaurus-docs/conda-store/references/faq.md @@ -105,6 +105,10 @@ size. It looks like this: 64a943764b70e8fe181643404894f7ae ``` +:::warning +Version 3 is experimental and can be changed at any time. +::: + It's a truncated SHA-256 hex digest, which is calculated based on: - namespace name @@ -112,7 +116,7 @@ It's a truncated SHA-256 hex digest, which is calculated based on: - build timestamp - build id. -See `BuildKey._version3_fmt` for details. +See `BuildKey._version3_experimental_fmt` for details. :::note When version 3 is used, `Build.build_path` will not include the namespace name,