cvat-ai · zhiltsov-max · Dec 19, 2024 · Dec 4, 2024 · Dec 4, 2024 · Dec 5, 2024
@@ -0,0 +1,8 @@
+### Fixed
+
+- \[Server API\] Significantly improved preformance of honeypot changes in tasks
+  (<https://github.com/cvat-ai/cvat/pull/8789>)
+- \[Server API\] `PATCH tasks/id/validation_layout` responses now include correct
+  `disabled_frames` and handle simultaneous updates of
+  `disabled_frames` and honeypot frames correctly
+  (<https://github.com/cvat-ai/cvat/pull/8789>)
@@ -50,7 +50,7 @@ class Attribute(NamedTuple):
         value: Any
 
     @classmethod
-    def add_prefetch_info(cls, queryset: QuerySet):
+    def add_prefetch_info(cls, queryset: QuerySet[Label]) -> QuerySet[Label]:
         assert issubclass(queryset.model, Label)
 
         return add_prefetch_fields(queryset, [

@@ -20,7 +20,7 @@
 from cvat.apps.engine import models, serializers
 from cvat.apps.engine.plugins import plugin_decorator
 from cvat.apps.engine.log import DatasetLogManager
-from cvat.apps.engine.utils import chunked_list
+from cvat.apps.engine.utils import take_by
 from cvat.apps.events.handlers import handle_annotations_change
 from cvat.apps.profiler import silk_profile
 
@@ -84,7 +84,7 @@ def merge_table_rows(rows, keys_for_merge, field_id):
 
 class JobAnnotation:
     @classmethod
-    def add_prefetch_info(cls, queryset: QuerySet, prefetch_images: bool = True):
+    def add_prefetch_info(cls, queryset: QuerySet[models.Job], prefetch_images: bool = True) -> QuerySet[models.Job]:
         assert issubclass(queryset.model, models.Job)
 
         label_qs = add_prefetch_fields(models.Label.objects.all(), [
@@ -530,13 +530,13 @@ def _delete(self, data=None):
             self.ir_data.shapes = data['shapes']
             self.ir_data.tracks = data['tracks']
 
-            for labeledimage_ids_chunk in chunked_list(labeledimage_ids, chunk_size=1000):
+            for labeledimage_ids_chunk in take_by(labeledimage_ids, chunk_size=1000):
                 self._delete_job_labeledimages(labeledimage_ids_chunk)
 
-            for labeledshape_ids_chunk in chunked_list(labeledshape_ids, chunk_size=1000):
+            for labeledshape_ids_chunk in take_by(labeledshape_ids, chunk_size=1000):
                 self._delete_job_labeledshapes(labeledshape_ids_chunk)
 
-            for labeledtrack_ids_chunk in chunked_list(labeledtrack_ids, chunk_size=1000):
+            for labeledtrack_ids_chunk in take_by(labeledtrack_ids, chunk_size=1000):
                 self._delete_job_labeledtracks(labeledtrack_ids_chunk)
 
             deleted_data = {

@@ -37,13 +37,13 @@
 import PIL.Image
 import PIL.ImageOps
 import rq
-from rq.job import JobStatus as RQJobStatus
 from django.conf import settings
 from django.core.cache import caches
 from django.db import models as django_models
 from django.utils import timezone as django_tz
 from redis.exceptions import LockError
 from rest_framework.exceptions import NotFound, ValidationError
+from rq.job import JobStatus as RQJobStatus
 
 from cvat.apps.engine import models
 from cvat.apps.engine.cloud_provider import (
@@ -65,7 +65,12 @@
     load_image,
 )
 from cvat.apps.engine.rq_job_handler import RQJobMetaField
-from cvat.apps.engine.utils import CvatChunkTimestampMismatchError, get_rq_lock_for_job, md5_hash
+from cvat.apps.engine.utils import (
+    CvatChunkTimestampMismatchError,
+    format_list,
+    get_rq_lock_for_job,
+    md5_hash,
+)
 from utils.dataset_manifest import ImageManifestManager
 
 slogger = ServerLogManager(__name__)
@@ -91,7 +96,8 @@ def enqueue_create_chunk_job(
                 # Enqueue the job if the chunk was deleted but the RQ job still exists.
                 # This can happen in cases involving jobs with honeypots and
                 # if the job wasn't collected by the requesting process for any reason.
-                rq_job.get_status(refresh=False) in {RQJobStatus.FINISHED, RQJobStatus.FAILED, RQJobStatus.CANCELED}
+                rq_job.get_status(refresh=False)
+                in {RQJobStatus.FINISHED, RQJobStatus.FAILED, RQJobStatus.CANCELED}
             ):
                 rq_job = queue.enqueue(
                     create_callback,
@@ -275,11 +281,12 @@ def _create_cache_item(
         return item
 
     def _delete_cache_item(self, key: str):
-        try:
-            self._cache().delete(key)
-            slogger.glob.info(f"Removed chunk from the cache: key {key}")
-        except pickle.UnpicklingError:
-            slogger.glob.error(f"Failed to remove item from the cache: key {key}", exc_info=True)
+        self._cache().delete(key)
+        slogger.glob.info(f"Removed the cache key {key}")
+
+    def _bulk_delete_cache_items(self, keys: Sequence[str]):
+        self._cache().delete_many(keys)
+        slogger.glob.info(f"Removed the cache keys {format_list(keys)}")
 
     def _get_cache_item(self, key: str) -> Optional[_CacheItem]:
         try:
@@ -474,6 +481,20 @@ def remove_segment_chunk(
             self._make_chunk_key(db_segment, chunk_number=chunk_number, quality=quality)
         )
 
+    def remove_segment_chunks(self, params: Sequence[dict[str, Any]]) -> None:
+        """
+        Removes several segment chunks from the cache.
+
+        The function expects a sequence of remove_segment_chunk() parameters as dicts.
+        """
+
+        keys_to_remove = []
+        for item_params in params:
+            db_obj = item_params.pop("db_segment")
+            keys_to_remove.append(self._make_chunk_key(db_obj, **item_params))
+
+        self._bulk_delete_cache_items(keys_to_remove)
+
     def get_cloud_preview(self, db_storage: models.CloudStorage) -> Optional[DataWithMime]:
         return self._to_data_with_mime(self._get_cache_item(self._make_preview_key(db_storage)))
 

@@ -21,7 +21,6 @@
 from botocore.client import Config
 from botocore.exceptions import ClientError
 from botocore.handlers import disable_signing
-from datumaro.util import take_by # can be changed to itertools.batched after migration to python3.12
 from django.conf import settings
 from google.cloud import storage
 from google.cloud.exceptions import Forbidden as GoogleCloudForbidden
@@ -32,7 +31,7 @@
 
 from cvat.apps.engine.log import ServerLogManager
 from cvat.apps.engine.models import CloudProviderChoice, CredentialsTypeChoice
-from cvat.apps.engine.utils import get_cpu_number
+from cvat.apps.engine.utils import get_cpu_number, take_by
 from cvat.utils.http import PROXIES_FOR_UNTRUSTED_URLS
 
 class NamedBytesIO(BytesIO):
@@ -242,7 +241,7 @@ def bulk_download_to_memory(
         threads_number = normalize_threads_number(threads_number, len(files))
 
         with ThreadPoolExecutor(max_workers=threads_number) as executor:
-            for batch_links in take_by(files, count=threads_number):
+            for batch_links in take_by(files, chunk_size=threads_number):
                 yield from executor.map(func, batch_links)
 
     def bulk_download_to_dir(

@@ -31,7 +31,6 @@
 import av
 import cv2
 import numpy as np
-from datumaro.util import take_by
 from django.conf import settings
 from PIL import Image
 from rest_framework.exceptions import ValidationError
@@ -46,6 +45,7 @@
     ZipReader,
 )
 from cvat.apps.engine.mime_types import mimetypes
+from cvat.apps.engine.utils import take_by
 
 _T = TypeVar("_T")
 

@@ -12,7 +12,7 @@
 import uuid
 from enum import Enum
 from functools import cached_property
-from typing import Any, ClassVar, Collection, Dict, Optional
+from typing import Any, ClassVar, Collection, Dict, Optional, Sequence
 
 from django.conf import settings
 from django.contrib.auth.models import User
@@ -27,7 +27,7 @@
 
 from cvat.apps.engine.lazy_list import LazyList
 from cvat.apps.engine.model_utils import MaybeUndefined
-from cvat.apps.engine.utils import chunked_list, parse_specific_attributes
+from cvat.apps.engine.utils import parse_specific_attributes, take_by
 from cvat.apps.events.utils import cache_deleted
 
 
@@ -276,6 +276,11 @@ class ValidationLayout(models.Model):
     disabled_frames = IntArrayField(store_sorted=True, unique_values=True)
     "Stores task frame numbers of the disabled (deleted) validation frames"
 
+    @property
+    def active_frames(self) -> Sequence[int]:
+        "An ordered sequence of active (non-disabled) validation frames"
+        return set(self.frames).difference(self.disabled_frames)
+
 class Data(models.Model):
     MANIFEST_FILENAME: ClassVar[str] = 'manifest.jsonl'
 
@@ -426,7 +431,7 @@ def touch(self) -> None:
 
 @transaction.atomic(savepoint=False)
 def clear_annotations_in_jobs(job_ids):
-    for job_ids_chunk in chunked_list(job_ids, chunk_size=1000):
+    for job_ids_chunk in take_by(job_ids, chunk_size=1000):
         TrackedShapeAttributeVal.objects.filter(shape__track__job_id__in=job_ids_chunk).delete()
         TrackedShape.objects.filter(track__job_id__in=job_ids_chunk).delete()
         LabeledTrackAttributeVal.objects.filter(track__job_id__in=job_ids_chunk).delete()
@@ -436,6 +441,30 @@ def clear_annotations_in_jobs(job_ids):
         LabeledImageAttributeVal.objects.filter(image__job_id__in=job_ids_chunk).delete()
         LabeledImage.objects.filter(job_id__in=job_ids_chunk).delete()
 
+@transaction.atomic(savepoint=False)
+def clear_annotations_on_frames_in_honeypot_task(db_task: Task, frames: Sequence[int]):
+    if db_task.data.validation_mode != ValidationMode.GT_POOL:
+        # Tracks are prohibited in honeypot tasks
+        raise AssertionError
+
+    for frames_batch in take_by(frames, chunk_size=1000):
+        LabeledShapeAttributeVal.objects.filter(
+            shape__job_id__segment__task_id=db_task.id,
+            shape__frame__in=frames_batch,
+        ).delete()
+        LabeledShape.objects.filter(
+            job_id__segment__task_id=db_task.id,
+            frame__in=frames_batch,
+        ).delete()
+        LabeledImageAttributeVal.objects.filter(
+            image__job_id__segment__task_id=db_task.id,
+            image__frame__in=frames_batch,
+        ).delete()
+        LabeledImage.objects.filter(
+            job_id__segment__task_id=db_task.id,
+            frame__in=frames_batch,
+        ).delete()
+
 class Project(TimestampedModel):
     name = SafeCharField(max_length=256)
     owner = models.ForeignKey(User, null=True, blank=True,