Skip to content

Commit

Permalink
CU-8696ent8k: make prep_docs AnnotationEntity addition atomic.
Browse files Browse the repository at this point in the history
  • Loading branch information
tomolopolis committed Nov 29, 2024
1 parent 8093d3d commit 92ac80f
Show file tree
Hide file tree
Showing 3 changed files with 41 additions and 36 deletions.
17 changes: 9 additions & 8 deletions webapp/api/api/data_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from typing import Dict

from django.contrib.auth.models import User
from django.db import transaction
from django.db.models import Q

from core.settings import MEDIA_ROOT
Expand Down Expand Up @@ -42,14 +43,14 @@ def dataset_from_file(dataset: Dataset):
"The 'name' column are document IDs, and the 'text' column is the text you're "
"collecting annotations for")


for i, row in enumerate(df.iterrows()):
row = row[1]
document = Document()
document.name = row['name']
document.text = sanitise_input(row['text'])
document.dataset = dataset
document.save()
with transaction.atomic():
for i, row in enumerate(df.iterrows()):
row = row[1]
document = Document()
document.name = row['name']
document.text = sanitise_input(row['text'])
document.dataset = dataset
document.save()


def sanitise_input(text: str):
Expand Down
16 changes: 9 additions & 7 deletions webapp/api/api/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

from background_task import background
from django.contrib.auth.models import User
from django.db import transaction
from django.db.models.signals import post_save
from django.dispatch import receiver
from medcat.cat import CAT
Expand Down Expand Up @@ -251,13 +252,14 @@ def prep_docs(project_id: List[int], doc_ids: List[int], user_id: int):
logger.info(f'Running MedCAT model for project {project.id}:{project.name} over doc: {doc.id}')
spacy_doc = cat(doc.text)
anns = AnnotatedEntity.objects.filter(document=doc).filter(project=project)
add_annotations(spacy_doc=spacy_doc,
user=user,
project=project,
document=doc,
cat=cat,
existing_annotations=anns)
# add doc to prepared_documents
with transaction.atomic():
add_annotations(spacy_doc=spacy_doc,
user=user,
project=project,
document=doc,
cat=cat,
existing_annotations=anns)
# add doc to prepared_documents
project.prepared_documents.add(doc)
project.save()
logger.info('Prepared all docs for project: %s, docs processed: %s',
Expand Down
44 changes: 23 additions & 21 deletions webapp/api/api/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from background_task.models import Task, CompletedTask
from django.contrib.auth.views import PasswordResetView
from django.core.exceptions import ObjectDoesNotExist
from django.db import transaction
from django.http import HttpResponseBadRequest, HttpResponseServerError, HttpResponse
from django.shortcuts import render
from django.utils import timezone
Expand Down Expand Up @@ -248,7 +249,6 @@ def prepare_documents(request):
'description': 'Missing CUI filter file, %s, cannot be found on the filesystem, '
'but is still set on the project. To fix remove and reset the '
'cui filter file' % project.cuis_file}, status=500)

try:
for d_id in d_ids:
document = Document.objects.get(id=d_id)
Expand All @@ -264,26 +264,28 @@ def prepare_documents(request):

is_validated = document in project.validated_documents.all()

# If the document is not already annotated, annotate it
if (len(anns) == 0 and not is_validated) or update:
# Based on the project id get the right medcat
cat = get_medcat(project=project)
logger.info('loaded medcat model for project: %s', project.id)

# Set CAT filters
cat.config.linking['filters']['cuis'] = cuis

spacy_doc = cat(document.text)
add_annotations(spacy_doc=spacy_doc,
user=user,
project=project,
document=document,
cat=cat,
existing_annotations=anns)

# add doc to prepared_documents
project.prepared_documents.add(document)
project.save()
with transaction.atomic():
# If the document is not already annotated, annotate it
if (len(anns) == 0 and not is_validated) or update:
# Based on the project id get the right medcat
cat = get_medcat(project=project)
logger.info('loaded medcat model for project: %s', project.id)

# Set CAT filters
cat.config.linking['filters']['cuis'] = cuis

spacy_doc = cat(document.text)

add_annotations(spacy_doc=spacy_doc,
user=user,
project=project,
document=document,
cat=cat,
existing_annotations=anns)

# add doc to prepared_documents
project.prepared_documents.add(document)
project.save()

except Exception as e:
stack = traceback.format_exc()
Expand Down

0 comments on commit 92ac80f

Please sign in to comment.