Skip to content

Commit

Permalink
Merge pull request #1885 from dandi/1839-path-ordering
Browse files Browse the repository at this point in the history
Use different collation for Asset `path` field
  • Loading branch information
jjnesbitt authored Mar 6, 2024
2 parents b5da025 + 187e13c commit ebce2d2
Show file tree
Hide file tree
Showing 3 changed files with 44 additions and 1 deletion.
24 changes: 24 additions & 0 deletions dandiapi/api/migrations/0006_asset_path_collation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
# Generated by Django 4.1.13 on 2024-03-06 17:04
from __future__ import annotations

from django.db import migrations, models

import dandiapi.api.models.asset


class Migration(migrations.Migration):
dependencies = [
('api', '0005_null_charfield'),
]

operations = [
migrations.AlterField(
model_name='asset',
name='path',
field=models.CharField(
db_collation='C.utf8',
max_length=512,
validators=[dandiapi.api.models.asset.validate_asset_path],
),
),
]
2 changes: 1 addition & 1 deletion dandiapi/api/models/asset.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ class Status(models.TextChoices):
INVALID = 'Invalid'

asset_id = models.UUIDField(unique=True, default=uuid.uuid4)
path = models.CharField(max_length=512, validators=[validate_asset_path])
path = models.CharField(max_length=512, validators=[validate_asset_path], db_collation='C.utf8')
blob = models.ForeignKey(
AssetBlob, related_name='assets', on_delete=models.CASCADE, null=True, blank=True
)
Expand Down
19 changes: 19 additions & 0 deletions dandiapi/api/tests/test_asset.py
Original file line number Diff line number Diff line change
Expand Up @@ -369,6 +369,25 @@ def test_asset_rest_list_ordering(api_client, version, asset_factory, order_para
assert result_paths == ordering


@pytest.mark.django_db()
def test_asset_path_ordering(api_client, version, asset_factory):
# The default collation will ignore special characters, including slashes, on the first pass. If
# there are ties, it uses these characters to break ties. This means that in the below example,
# removing the slashes leads to a comparison of 'az' and 'aaz', which would obviously sort the
# latter before the former. However, with the slashes, it's clear that 'a/z' should come before
# 'aa/z'. This is fixed by changing the collation of the path field, and as such this test
# serves as a regression test.
a = asset_factory(path='a/z')
b = asset_factory(path='aa/z')
version.assets.add(a)
version.assets.add(b)

asset_listing = Asset.objects.filter(versions__in=[version]).order_by('path')
assert asset_listing.count() == 2
assert asset_listing[0].pk == a.pk
assert asset_listing[1].pk == b.pk


@pytest.mark.django_db()
def test_asset_rest_retrieve(api_client, version, asset, asset_factory):
version.assets.add(asset)
Expand Down

0 comments on commit ebce2d2

Please sign in to comment.