Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Convert longer media varchar fields to text in the API #4315

Merged
merged 2 commits into from
May 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
124 changes: 124 additions & 0 deletions api/api/migrations/0061_convert_varchar_to_text.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
# Generated by Django 4.2.11 on 2024-05-10 22:07

import api.models.fields
from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('api', '0060_fill_out_help_text'),
]

operations = [
migrations.AlterField(
model_name='audio',
name='audio_set_foreign_identifier',
field=models.TextField(blank=True, help_text='Reference to set of which this track is a part.', null=True),
),
migrations.AlterField(
model_name='audio',
name='creator',
field=models.TextField(blank=True, help_text='The name of the media creator.', null=True),
),
migrations.AlterField(
model_name='audio',
name='creator_url',
field=api.models.fields.URLTextField(blank=True, help_text='A direct link to the media creator.', max_length=2000, null=True),
),
migrations.AlterField(
model_name='audio',
name='foreign_identifier',
field=models.TextField(blank=True, db_index=True, help_text='The identifier provided by the upstream source.', null=True),
),
migrations.AlterField(
model_name='audio',
name='foreign_landing_url',
field=api.models.fields.URLTextField(blank=True, help_text='The landing page of the work.', null=True),
),
migrations.AlterField(
model_name='audio',
name='thumbnail',
field=api.models.fields.URLTextField(blank=True, help_text='The thumbnail for the media.', null=True),
),
migrations.AlterField(
model_name='audio',
name='title',
field=models.TextField(blank=True, help_text='The name of the media.', null=True),
),
migrations.AlterField(
model_name='audio',
name='url',
field=api.models.fields.URLTextField(blank=True, help_text='The actual URL to the media file.', max_length=1000, null=True, unique=True),
),
migrations.AlterField(
model_name='audioset',
name='creator',
field=models.TextField(blank=True, help_text='The name of the media creator.', null=True),
),
migrations.AlterField(
model_name='audioset',
name='creator_url',
field=api.models.fields.URLTextField(blank=True, help_text='A direct link to the media creator.', max_length=2000, null=True),
),
migrations.AlterField(
model_name='audioset',
name='foreign_identifier',
field=models.TextField(blank=True, db_index=True, help_text='The identifier provided by the upstream source.', null=True),
),
migrations.AlterField(
model_name='audioset',
name='foreign_landing_url',
field=api.models.fields.URLTextField(blank=True, help_text='The landing page of the work.', null=True),
),
migrations.AlterField(
model_name='audioset',
name='thumbnail',
field=api.models.fields.URLTextField(blank=True, help_text='The thumbnail for the media.', null=True),
),
migrations.AlterField(
model_name='audioset',
name='title',
field=models.TextField(blank=True, help_text='The name of the media.', null=True),
),
migrations.AlterField(
model_name='audioset',
name='url',
field=api.models.fields.URLTextField(blank=True, help_text='The actual URL to the media file.', max_length=1000, null=True, unique=True),
),
migrations.AlterField(
model_name='image',
name='creator',
field=models.TextField(blank=True, help_text='The name of the media creator.', null=True),
),
migrations.AlterField(
model_name='image',
name='creator_url',
field=api.models.fields.URLTextField(blank=True, help_text='A direct link to the media creator.', max_length=2000, null=True),
),
migrations.AlterField(
model_name='image',
name='foreign_identifier',
field=models.TextField(blank=True, db_index=True, help_text='The identifier provided by the upstream source.', null=True),
),
migrations.AlterField(
model_name='image',
name='foreign_landing_url',
field=api.models.fields.URLTextField(blank=True, help_text='The landing page of the work.', null=True),
),
migrations.AlterField(
model_name='image',
name='thumbnail',
field=api.models.fields.URLTextField(blank=True, help_text='The thumbnail for the media.', null=True),
),
migrations.AlterField(
model_name='image',
name='title',
field=models.TextField(blank=True, help_text='The name of the media.', null=True),
),
migrations.AlterField(
model_name='image',
name='url',
field=api.models.fields.URLTextField(blank=True, help_text='The actual URL to the media file.', max_length=1000, null=True, unique=True),
),
]
3 changes: 1 addition & 2 deletions api/api/models/audio.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,8 +163,7 @@ class Audio(AudioFileMixin, AbstractMedia):
)

# Replaces the foreign key to AudioSet
audio_set_foreign_identifier = models.CharField(
max_length=1000,
audio_set_foreign_identifier = models.TextField(
blank=True,
null=True,
help_text="Reference to set of which this track is a part.",
Expand Down
21 changes: 21 additions & 0 deletions api/api/models/fields.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
from django import forms
from django.core import validators
from django.db import models
from django.utils.translation import gettext_lazy as _


class URLTextField(models.TextField):
"""URL field which uses the underlying Postgres TEXT column type."""

default_validators = [validators.URLValidator()]
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(Up front context) I know we currently use a URL field for some of these (though this would be a new detail for creator_url, I think), so this is only relevant feedback for this PR due to the need to add new code to continue supporting it. Just wanting to clarify up front I know this isn't a decision you're making about the model, and that my intention is to ask whether we need this at all, and if not, then to give feedback that this code is unnecessary and can be removed.


This validator is the only thing that differentiates the URL field from a regular text field. I question whether we need this. Not blocking because it's a trivial thing to remove in the future or as a fast-follow if we like (would be a no-SQL migration). I'd prefer we didn't add this code, however. These write-time Django validators are irrelevant for our domain and usage of the ORM because we never write to these tables with the Django ORM (except in tests). Data validation either has to happen in the catalogue, data refresh, or not at all, but definitely not in Django write-time validators.

The change requested would be to use a TextField and forego write-time validators as a code-quality improvement and clarification of the intention of these models and the domain they're actually concerned with (i.e. reading).

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's an interesting point. I can't think of a use case right now, but I also think it doesn't hurt to leave the validation for this kind of field. I have no strong opinion on either side.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am very happy to remove this code as a fast-follow! I wanted to make as functionally minimal a change as possible here, even if it meant adding more code. I'll follow up with an issue and a PR later this week.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@AetherUnbound Just checking, were you able to create the issue?

@krysal I guess generally there is a rationale of removing code that isn't used. All code is a liability, either as a vulnerability or increased maintenance cost (the latter being most relevant and in fact exemplified here), so as much of it as we don't need is a good idea to remove.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Created in #4320

description = _("URL")

def formfield(self, **kwargs):
# As with CharField, this will cause URL validation to be performed
# twice.
Comment on lines +14 to +15
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Twice meaning it would happen in the form and at the database level?

return super().formfield(
**{
"form_class": forms.URLField,
**kwargs,
}
)
35 changes: 16 additions & 19 deletions api/api/models/mixins.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

from django.db import models

from api.models import fields


class IdentifierMixin(models.Model):
"""
Expand Down Expand Up @@ -32,11 +34,10 @@ class ForeignIdentifierMixin(models.Model):

This mixin adds

- foreign_identifier: CharField
- foreign_identifier: TextField
"""

foreign_identifier = models.CharField(
max_length=1000,
foreign_identifier = models.TextField(
blank=True,
null=True,
db_index=True,
Expand All @@ -55,34 +56,31 @@ class MediaMixin(models.Model):

The mixin adds

- title: CharField
- foreign_landing_url: CharField
- creator: CharField
- creator_url: CharField
- thumbnail: URLField
- title: TextField
- foreign_landing_url: URLTextField
- creator: TextField
- creator_url: URLTextField
- thumbnail: URLTextField
- provider: CharField
"""

title = models.CharField(
max_length=2000,
title = models.TextField(
blank=True,
null=True,
help_text="The name of the media.",
)
foreign_landing_url = models.CharField(
max_length=1000,
foreign_landing_url = fields.URLTextField(
blank=True,
null=True,
help_text="The landing page of the work.",
)

creator = models.CharField(
max_length=2000,
creator = models.TextField(
blank=True,
null=True,
help_text="The name of the media creator.",
)
creator_url = models.URLField(
creator_url = fields.URLTextField(
max_length=2000,
blank=True,
null=True,
Expand All @@ -92,8 +90,7 @@ class MediaMixin(models.Model):
# Because all forms of media have a thumbnail for visual representation
# For images, this field is not used as images are generated using Photon.
# For audio, this field points to the artwork, or is ``null``.
thumbnail = models.URLField(
max_length=1000,
thumbnail = fields.URLTextField(
blank=True,
null=True,
help_text="The thumbnail for the media.",
Expand All @@ -120,12 +117,12 @@ class FileMixin(models.Model):

This mixin adds

- url: URLField
- url: URLTextField
- filesize: IntegerField
- filetype: CharField
"""

url = models.URLField(
url = fields.URLTextField(
unique=True,
max_length=1000,
help_text="The actual URL to the media file.",
Expand Down
2 changes: 1 addition & 1 deletion api/latest_migrations/api
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@
# If you have a merge conflict in this file, it means you need to run:
# manage.py makemigrations --merge
# in order to resolve the conflict between migrations.
0060_fill_out_help_text
0061_convert_varchar_to_text
5 changes: 5 additions & 0 deletions api/latest_migrations/django_structlog
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# This file is autogenerated by makemigrations.
# If you have a merge conflict in this file, it means you need to run:
# manage.py makemigrations --merge
# in order to resolve the conflict between migrations.

Loading
Loading