diff --git a/dandiapi/api/migrations/0006_asset_path_collation.py b/dandiapi/api/migrations/0006_asset_path_collation.py new file mode 100644 index 000000000..300e0b6e8 --- /dev/null +++ b/dandiapi/api/migrations/0006_asset_path_collation.py @@ -0,0 +1,24 @@ +# Generated by Django 4.1.13 on 2024-03-06 17:04 +from __future__ import annotations + +from django.db import migrations, models + +import dandiapi.api.models.asset + + +class Migration(migrations.Migration): + dependencies = [ + ('api', '0005_null_charfield'), + ] + + operations = [ + migrations.AlterField( + model_name='asset', + name='path', + field=models.CharField( + db_collation='C.utf8', + max_length=512, + validators=[dandiapi.api.models.asset.validate_asset_path], + ), + ), + ] diff --git a/dandiapi/api/models/asset.py b/dandiapi/api/models/asset.py index b967d9b5c..5330562b6 100644 --- a/dandiapi/api/models/asset.py +++ b/dandiapi/api/models/asset.py @@ -134,7 +134,7 @@ class Status(models.TextChoices): INVALID = 'Invalid' asset_id = models.UUIDField(unique=True, default=uuid.uuid4) - path = models.CharField(max_length=512, validators=[validate_asset_path]) + path = models.CharField(max_length=512, validators=[validate_asset_path], db_collation='C.utf8') blob = models.ForeignKey( AssetBlob, related_name='assets', on_delete=models.CASCADE, null=True, blank=True ) diff --git a/dandiapi/api/tests/test_asset.py b/dandiapi/api/tests/test_asset.py index 715c5fc82..14588dec9 100644 --- a/dandiapi/api/tests/test_asset.py +++ b/dandiapi/api/tests/test_asset.py @@ -369,6 +369,25 @@ def test_asset_rest_list_ordering(api_client, version, asset_factory, order_para assert result_paths == ordering +@pytest.mark.django_db() +def test_asset_path_ordering(api_client, version, asset_factory): + # The default collation will ignore special characters, including slashes, on the first pass. If + # there are ties, it uses these characters to break ties. This means that in the below example, + # removing the slashes leads to a comparison of 'az' and 'aaz', which would obviously sort the + # latter before the former. However, with the slashes, it's clear that 'a/z' should come before + # 'aa/z'. This is fixed by changing the collation of the path field, and as such this test + # serves as a regression test. + a = asset_factory(path='a/z') + b = asset_factory(path='aa/z') + version.assets.add(a) + version.assets.add(b) + + asset_listing = Asset.objects.filter(versions__in=[version]).order_by('path') + assert asset_listing.count() == 2 + assert asset_listing[0].pk == a.pk + assert asset_listing[1].pk == b.pk + + @pytest.mark.django_db() def test_asset_rest_retrieve(api_client, version, asset, asset_factory): version.assets.add(asset)