Skip to content

Commit

Permalink
Merge pull request #653 from nkaretnikov/prefix-size-check-649
Browse files Browse the repository at this point in the history
Check the size of `build_path`
  • Loading branch information
Nikita Karetnikov authored Nov 16, 2023
2 parents 0760fa2 + e2a7618 commit a5680f8
Show file tree
Hide file tree
Showing 9 changed files with 222 additions and 23 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
"""add status_info
Revision ID: d78e9889566a
Revises: b387747ca9b7
Create Date: 2023-11-07 12:25:04.416192
"""
import sqlalchemy as sa
from alembic import op

# revision identifiers, used by Alembic.
revision = "d78e9889566a"
down_revision = "b387747ca9b7"
branch_labels = None
depends_on = None


def upgrade():
op.add_column("build", sa.Column("status_info", sa.UnicodeText(), nullable=True))


def downgrade():
op.drop_column("build", "status_info")
55 changes: 35 additions & 20 deletions conda-store-server/conda_store_server/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@

import yaml
from conda_store_server import action, api, conda_utils, orm, schema, utils
from conda_store_server.utils import BuildPathError
from sqlalchemy.orm import Session


Expand Down Expand Up @@ -38,8 +39,11 @@ def set_build_started(db: Session, build: orm.Build):
db.commit()


def set_build_failed(db: Session, build: orm.Build):
def set_build_failed(
db: Session, build: orm.Build, status_info: typing.Optional[str] = None
):
build.status = schema.BuildStatus.FAILED
build.status_info = status_info
build.ended_on = datetime.datetime.utcnow()
db.commit()

Expand Down Expand Up @@ -107,27 +111,29 @@ def build_conda_environment(db: Session, conda_store, build):
symlink the build to a named environment
"""
set_build_started(db, build)
append_to_logs(
db,
conda_store,
build,
f"starting build of conda environment {datetime.datetime.utcnow()} UTC\n",
)
try:
set_build_started(db, build)
# Note: even append_to_logs can fail due to filename size limit, so
# check build_path length first
conda_prefix = build.build_path(conda_store)
append_to_logs(
db,
conda_store,
build,
f"starting build of conda environment {datetime.datetime.utcnow()} UTC\n",
)

settings = conda_store.get_settings(
db=db,
namespace=build.environment.namespace.name,
environment_name=build.environment.name,
)
settings = conda_store.get_settings(
db=db,
namespace=build.environment.namespace.name,
environment_name=build.environment.name,
)

conda_prefix = build.build_path(conda_store)
conda_prefix.parent.mkdir(parents=True, exist_ok=True)
conda_prefix.parent.mkdir(parents=True, exist_ok=True)

environment_prefix = build.environment_path(conda_store)
environment_prefix.parent.mkdir(parents=True, exist_ok=True)
environment_prefix = build.environment_path(conda_store)
environment_prefix.parent.mkdir(parents=True, exist_ok=True)

try:
with utils.timer(conda_store.log, f"building conda_prefix={conda_prefix}"):
context = action.action_solve_lockfile(
settings.conda_command,
Expand Down Expand Up @@ -201,15 +207,24 @@ def build_conda_environment(db: Session, conda_store, build):
build.size = context.result["disk_usage"]

set_build_completed(db, conda_store, build)
# Always mark build as failed first since other functions may throw an
# exception
except subprocess.CalledProcessError as e:
set_build_failed(db, build)
conda_store.log.exception(e)
append_to_logs(db, conda_store, build, e.output)
set_build_failed(db, build)
raise e
except Exception as e:
except BuildPathError as e:
# Provide status_info, which will be exposed to the user, ONLY in this
# case because the error message doesn't expose sensitive information
set_build_failed(db, build, status_info=e.message)
conda_store.log.exception(e)
append_to_logs(db, conda_store, build, traceback.format_exc())
raise e
except Exception as e:
set_build_failed(db, build)
conda_store.log.exception(e)
append_to_logs(db, conda_store, build, traceback.format_exc())
raise e


Expand Down
15 changes: 12 additions & 3 deletions conda-store-server/conda_store_server/orm.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

from conda_store_server import conda_utils, schema, utils
from conda_store_server.environment import validate_environment
from conda_store_server.utils import BuildPathError
from sqlalchemy import (
JSON,
BigInteger,
Expand Down Expand Up @@ -173,6 +174,9 @@ class Build(Base):
package_builds = relationship("CondaPackageBuild", secondary=build_conda_package)

status = Column(Enum(schema.BuildStatus), default=schema.BuildStatus.QUEUED)
# Additional status info that will be provided to the user. DO NOT put
# sensitive data here
status_info = Column(UnicodeText, default=None)
size = Column(BigInteger, default=0)
scheduled_on = Column(DateTime, default=datetime.datetime.utcnow)
started_on = Column(DateTime, default=None)
Expand All @@ -190,15 +194,20 @@ def build_path(self, conda_store):
"""
store_directory = os.path.abspath(conda_store.store_directory)
namespace = self.environment.namespace.name
name = self.specification.name
return (
res = (
pathlib.Path(
conda_store.build_directory.format(
store_directory=store_directory, namespace=namespace, name=name
store_directory=store_directory,
namespace=namespace,
)
)
/ self.build_key
)
# conda prefix must be less or equal to 255 chars
# https://github.com/conda-incubator/conda-store/issues/649
if len(str(res)) > 255:
raise BuildPathError("build_path too long: must be <= 255 characters")
return res

def environment_path(self, conda_store):
"""Environment path is the path for the symlink to the build
Expand Down
1 change: 1 addition & 0 deletions conda-store-server/conda_store_server/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,7 @@ class Build(BaseModel):
specification: Optional[Specification]
packages: Optional[List[CondaPackage]]
status: BuildStatus
status_info: Optional[str]
size: int
scheduled_on: datetime.datetime
started_on: Optional[datetime.datetime]
Expand Down
4 changes: 4 additions & 0 deletions conda-store-server/conda_store_server/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,10 @@ def message(self):
return self.args[0]


class BuildPathError(CondaStoreError):
pass


def symlink(source, target):
if os.path.islink(target):
os.unlink(target)
Expand Down
11 changes: 11 additions & 0 deletions conda-store-server/tests/test_db_api.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import pytest
from conda_store_server import api
from conda_store_server.orm import NamespaceRoleMapping
from conda_store_server.utils import BuildPathError


def test_namespace_crud(db):
Expand Down Expand Up @@ -134,3 +135,13 @@ def test_get_set_keyvaluestore(db):
# test updating a prefix
api.set_kvstore_key_values(db, "pytest", {"c": 999, "d": 999}, update=False)
assert {**setting_1, **setting_2} == api.get_kvstore_key_values(db, "pytest")


def test_build_path_too_long(db, conda_store, simple_specification):
conda_store.store_directory = 'A' * 800
build_id = conda_store.register_environment(
db, specification=simple_specification, namespace="pytest"
)
build = api.get_build(db, build_id=build_id)
with pytest.raises(BuildPathError, match=r"build_path too long: must be <= 255 characters"):
build.build_path(conda_store)
55 changes: 55 additions & 0 deletions conda-store-server/tests/test_server.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import json
import time

import pytest
import yaml
Expand Down Expand Up @@ -335,6 +336,60 @@ def test_create_specification_unauth(testclient):
assert r.status == schema.APIStatus.ERROR


# Only testing size values that will always cause errors. Smaller values could
# cause errors as well, but would be flaky since the test conda-store state
# directory might have different lengths on different systems, for instance,
# due to different username lengths.
@pytest.mark.parametrize(
"size",
[
# OSError: [Errno 36] File name too long
255,
# OSError: [Errno 36] File name too long
256,
],
)
def test_create_specification_auth_env_name_too_long(testclient, celery_worker, authenticate, size):
namespace = "default"
environment_name = 'A' * size

response = testclient.post(
"api/v1/specification",
json={
"namespace": namespace,
"specification": json.dumps({"name": environment_name}),
},
)
response.raise_for_status()

r = schema.APIPostSpecification.parse_obj(response.json())
assert r.status == schema.APIStatus.OK
build_id = r.data.build_id

# Try checking that the status is 'FAILED'
is_updated = False
for _ in range(5):
time.sleep(5)

# check for the given build
response = testclient.get(f"api/v1/build/{build_id}")
response.raise_for_status()

r = schema.APIGetBuild.parse_obj(response.json())
assert r.status == schema.APIStatus.OK
assert r.data.specification.name == environment_name
if r.data.status == "QUEUED":
continue # checked too fast, try again
assert r.data.status == "FAILED"
assert r.data.status_info == "build_path too long: must be <= 255 characters"
is_updated = True
break

# If we're here, the task didn't update the status on failure
if not is_updated:
assert False, f"failed to update status"


def test_create_specification_auth(testclient, celery_worker, authenticate):
namespace = "default"
environment_name = "pytest"
Expand Down
19 changes: 19 additions & 0 deletions docs/administration.md
Original file line number Diff line number Diff line change
Expand Up @@ -552,6 +552,25 @@ for several reasons:
- worker was killed for other reasons e.g. forced restart
- bugs in conda-store
### Build path length
Conda packages are guaranteed to be [relocatable] as long as the environment
prefix length is <= 255 characters. In conda-store, the said prefix is specified
in `Build.build_path`. When building an environment, you might see an error like
this:
```
build_path too long: must be <= 255 characters
```
If so, try configuring the conda-store `CondaStore.store_directory` to be as
close to the filesystem root as possible. Additionally, 255 characters is also a
common limit for individual files on many filesystems. When creating
environments, try using shorter `namespace` and `environment` names since they
affect both the `build_path` length and the filename length.
[relocatable]: https://docs.conda.io/projects/conda-build/en/latest/resources/make-relocatable.html
### Long paths on Windows
conda-store supports Windows in standalone mode. However, when creating
Expand Down
62 changes: 62 additions & 0 deletions tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
"""
import asyncio
import json
import time
import uuid
from typing import List

Expand Down Expand Up @@ -458,6 +459,67 @@ def test_create_specification_auth(testclient):
assert r.data.namespace.name == namespace


# Only testing size values that will always cause errors. Smaller values could
# cause errors as well, but would be flaky since the test conda-store state
# directory might have different lengths on different systems, for instance,
# due to different username lengths.
@pytest.mark.parametrize(
"size",
[
# minio.error.MinioException: S3 operation failed; code:
# XMinioInvalidObjectName, message: Object name contains unsupported
# characters.
# The error message is misleading: it's a size issue.
255,
# SQL error: value too long for type character varying(255)
256,
],
)
def test_create_specification_auth_env_name_too_long(testclient, size):
namespace = "default"
environment_name = 'A' * size

testclient.login()
response = testclient.post(
"api/v1/specification",
json={
"namespace": namespace,
"specification": json.dumps({"name": environment_name}),
},
)
if size > 255:
assert response.status_code == 500
return # error, nothing to do
response.raise_for_status()

r = schema.APIPostSpecification.parse_obj(response.json())
assert r.status == schema.APIStatus.OK
build_id = r.data.build_id

# Try checking that the status is 'FAILED'
is_updated = False
for _ in range(5):
time.sleep(5)

# check for the given build
response = testclient.get(f"api/v1/build/{build_id}")
response.raise_for_status()

r = schema.APIGetBuild.parse_obj(response.json())
assert r.status == schema.APIStatus.OK
assert r.data.specification.name == environment_name
if r.data.status == "QUEUED":
continue # checked too fast, try again
assert r.data.status == "FAILED"
assert r.data.status_info == "build_path too long: must be <= 255 characters"
is_updated = True
break

# If we're here, the task didn't update the status on failure
if not is_updated:
assert False, f"failed to update status"


def test_create_specification_auth_no_namespace_specified(testclient):
namespace = "username" # same as login username
environment_name = f"pytest-{uuid.uuid4()}"
Expand Down

0 comments on commit a5680f8

Please sign in to comment.