Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SFR-2141: Delete Duplicate Work Identifiers #356

Merged
merged 4 commits into from
Oct 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions managers/sfrRecord.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import json
from Levenshtein import jaro_winkler
import pycountry
import re
import re, copy
from uuid import uuid4

from model import Work, Edition, Item, Identifier, Link, Rights
Expand Down Expand Up @@ -39,7 +39,7 @@ def mergeRecords(self):

matchedWorks.sort(key=lambda x: x[1])

allIdentifiers = self.work.identifiers
allIdentifiers = copy.deepcopy(self.work.identifiers)

for edition in self.work.editions:
allIdentifiers.extend(edition.identifiers)
Expand Down
24 changes: 24 additions & 0 deletions migrations/versions/cc966d5a6ca0_add_unique_constraint_to_work_.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
"""add unique constraint to work_identifiers table

Revision ID: cc966d5a6ca0
Revises: 54e57fb2e1c6
Create Date: 2024-09-05 16:48:16.569654

"""
from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision = 'cc966d5a6ca0'
down_revision = '54e57fb2e1c6'
branch_labels = None
depends_on = None


def upgrade():
op.create_unique_constraint('unique_work_identifier', 'work_identifiers', ['work_id', 'identifier_id'])


def downgrade():
op.drop_constraint('unique_work_identifier', 'work_identifiers', type_='unique')
4 changes: 2 additions & 2 deletions model/postgres/work.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@
from .base import Base, Core

WORK_IDENTIFIERS = Table('work_identifiers', Base.metadata,
Column('work_id', Integer, ForeignKey('works.id', ondelete='CASCADE')),
Column('identifier_id', Integer, ForeignKey('identifiers.id', ondelete='CASCADE'))
Column('work_id', Integer, ForeignKey('works.id', ondelete='CASCADE'), unique=True),
Column('identifier_id', Integer, ForeignKey('identifiers.id', ondelete='CASCADE'), unique=True)
)

WORK_RIGHTS = Table('work_rights', Base.metadata,
Expand Down
4 changes: 2 additions & 2 deletions processes/sfrCluster.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from datetime import datetime, timedelta, timezone
from math import ceil
import re
from sqlalchemy.exc import DataError
from sqlalchemy.exc import DataError, IntegrityError
from sqlalchemy.orm.exc import StaleDataError

from .core import CoreProcess
Expand Down Expand Up @@ -101,7 +101,7 @@ def clusterRecord(self, rec):

try:
self.session.flush()
except (DataError, StaleDataError) as e:
except Exception as e:
self.session.rollback()
logger.error('Unable to cluster {}'.format(rec))
logger.debug(e)
Expand Down
Loading