Skip to content

Commit

Permalink
Add package key and generalize resource hash
Browse files Browse the repository at this point in the history
The idea with package key is that it will be a sanitized form of the package title, and will form part of the archival path for a resource upload, as in {archive.url}/{provider.key}/{package.key}/{file_path}, instead of using the package uuid in that path position. This will make it easier for curators/admins to see at a glance what package a folder relates to when viewing it in Nextcloud. The API will need to consider how to deal with package title changes. It doesn't really matter from an archive/package/resource management perspective, but it might be confusing for curators/admins if a package's files are split over multiple directories.

Resource title is also made nullable. This is more sensible when we start looking at multiple file uploads and really it is unnecessary. We do however add a check such that title must be given if filename is not (relevant for externally managed datasets e.g. Obs DB).
  • Loading branch information
marksparkza committed Sep 12, 2024
1 parent 110988e commit cb2fb61
Show file tree
Hide file tree
Showing 3 changed files with 75 additions and 3 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
"""Add package key and generalize resource hash
Revision ID: c8c1df35dea4
Revises: 23e051502f55
Create Date: 2024-09-12 15:08:58.593167
"""
from alembic import op
import sqlalchemy as sa
from sqlalchemy.dialects import postgresql

# revision identifiers, used by Alembic.
revision = 'c8c1df35dea4'
down_revision = '23e051502f55'
branch_labels = None
depends_on = None


def upgrade():
# ### commands auto generated by Alembic - adjusted ###
op.execute("create type hashalgorithm as enum ('md5', 'sha256')")
op.add_column('package', sa.Column('key', sa.String(), nullable=False))
op.create_unique_constraint('package_provider_id_package_key', 'package', ['provider_id', 'key'])
op.add_column('resource', sa.Column('hash', sa.String(), nullable=True))
op.add_column('resource', sa.Column('hash_algorithm', postgresql.ENUM(name='hashalgorithm', create_type=False), nullable=True))
op.alter_column('resource', 'title',
existing_type=sa.VARCHAR(),
nullable=True)
op.drop_column('resource', 'md5')
op.create_check_constraint('resource_hash_algorithm_check',
'resource',
'hash is null or hash_algorithm is not null')
op.create_check_constraint('resource_title_or_filename_check',
'resource',
'title is not null or filename is not null')
# ### end Alembic commands ###


def downgrade():
# ### commands auto generated by Alembic - adjusted ###
op.drop_constraint('resource_title_or_filename_check', 'resource', type_='check')
op.drop_constraint('resource_hash_algorithm_check', 'resource', type_='check')
op.add_column('resource', sa.Column('md5', sa.VARCHAR(), autoincrement=False, nullable=True))
op.alter_column('resource', 'title',
existing_type=sa.VARCHAR(),
nullable=False)
op.drop_column('resource', 'hash_algorithm')
op.drop_column('resource', 'hash')
op.drop_constraint('package_provider_id_package_key', 'package', type_='unique')
op.drop_column('package', 'key')
op.execute('drop type hashalgorithm')
# ### end Alembic commands ###
7 changes: 7 additions & 0 deletions odp/db/models/package.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,19 @@
class Package(Base):
"""A submission information package originating from a data provider.
The package `key` is unique to the provider.
All package metadata - besides the title - are supplied via tags.
"""

__tablename__ = 'package'

__table_args__ = (
UniqueConstraint('provider_id', 'key'),
)

id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4()))
key = Column(String, nullable=False)
title = Column(String, nullable=False)
status = Column(Enum(PackageStatus), nullable=False)
timestamp = Column(TIMESTAMP(timezone=True), nullable=False)
Expand Down
19 changes: 16 additions & 3 deletions odp/db/models/resource.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
import uuid

from sqlalchemy import BigInteger, Column, ForeignKey, String, TIMESTAMP
from sqlalchemy import BigInteger, CheckConstraint, Column, Enum, ForeignKey, String, TIMESTAMP
from sqlalchemy.ext.associationproxy import association_proxy
from sqlalchemy.orm import relationship

from odp.const.db import HashAlgorithm
from odp.db import Base


Expand All @@ -12,13 +13,25 @@ class Resource(Base):

__tablename__ = 'resource'

__table_args__ = (
CheckConstraint(
'hash is null or hash_algorithm is not null',
name='resource_hash_algorithm_check',
),
CheckConstraint(
'title is not null or filename is not null',
name='resource_title_or_filename_check',
),
)

id = Column(String, primary_key=True, default=lambda: str(uuid.uuid4()))
title = Column(String, nullable=False)
title = Column(String)
description = Column(String)
filename = Column(String)
mimetype = Column(String)
size = Column(BigInteger)
md5 = Column(String)
hash = Column(String)
hash_algorithm = Column(Enum(HashAlgorithm))
timestamp = Column(TIMESTAMP(timezone=True), nullable=False)

provider_id = Column(String, ForeignKey('provider.id', ondelete='RESTRICT'), nullable=False)
Expand Down

0 comments on commit cb2fb61

Please sign in to comment.