From 663fab7324a56d9a55826adcbf4fdf6c4e47ad84 Mon Sep 17 00:00:00 2001 From: Maria Khrustaleva Date: Thu, 31 Oct 2024 14:01:32 +0100 Subject: [PATCH] Fix exporting projects with honeypots (#8597) --- ...export_honeypots_when_exporting_project.md | 4 + cvat/apps/dataset_manager/bindings.py | 75 +++++++++++++------ cvat/apps/dataset_manager/formats/cvat.py | 12 +-- tests/python/rest_api/test_projects.py | 64 +++++++++++++++- tests/python/rest_api/test_requests.py | 1 + 5 files changed, 126 insertions(+), 30 deletions(-) create mode 100644 changelog.d/20241029_105216_maria_do_not_export_honeypots_when_exporting_project.md diff --git a/changelog.d/20241029_105216_maria_do_not_export_honeypots_when_exporting_project.md b/changelog.d/20241029_105216_maria_do_not_export_honeypots_when_exporting_project.md new file mode 100644 index 000000000000..e6d8950a55b2 --- /dev/null +++ b/changelog.d/20241029_105216_maria_do_not_export_honeypots_when_exporting_project.md @@ -0,0 +1,4 @@ +### Fixed + +- Exporting projects with tasks containing honeypots. Honeypots are no longer exported. + () diff --git a/cvat/apps/dataset_manager/bindings.py b/cvat/apps/dataset_manager/bindings.py index 35d4b902a53a..1c70520a7090 100644 --- a/cvat/apps/dataset_manager/bindings.py +++ b/cvat/apps/dataset_manager/bindings.py @@ -285,6 +285,7 @@ def __init__(self, self._db_data: models.Data = db_task.data self._use_server_track_ids = use_server_track_ids self._required_frames = included_frames + self._initialized_included_frames: Optional[Set[int]] = None self._db_subset = db_task.subset super().__init__(db_task) @@ -536,12 +537,14 @@ def shapes(self): yield self._export_labeled_shape(shape) def get_included_frames(self): - return set( - i for i in self.rel_range - if not self._is_frame_deleted(i) - and not self._is_frame_excluded(i) - and self._is_frame_required(i) - ) + if self._initialized_included_frames is None: + self._initialized_included_frames = set( + i for i in self.rel_range + if not self._is_frame_deleted(i) + and not self._is_frame_excluded(i) + and self._is_frame_required(i) + ) + return self._initialized_included_frames def _is_frame_deleted(self, frame): return frame in self._deleted_frames @@ -1112,7 +1115,10 @@ def _init_frame_info(self): } for frame in range(task.data.size)}) else: self._frame_info.update({(task.id, self.rel_frame_id(task.id, db_image.frame)): { - "path": mangle_image_name(db_image.path, defaulted_subset, original_names), + # do not modify honeypot names since they will be excluded from the dataset + # and their quantity should not affect the validation frame name + "path": mangle_image_name(db_image.path, defaulted_subset, original_names) \ + if not db_image.is_placeholder else db_image.path, "id": db_image.id, "width": db_image.width, "height": db_image.height, @@ -1271,25 +1277,36 @@ def get_frame(task_id: int, idx: int) -> ProjectData.Frame: return frames[(frame_info["subset"], abs_frame)] if include_empty: - for ident in sorted(self._frame_info): - if ident not in self._deleted_frames: - get_frame(*ident) + for task_id, frame in sorted(self._frame_info): + if not self._tasks_data.get(task_id): + self.init_task_data(task_id) + + task_included_frames = self._tasks_data[task_id].get_included_frames() + if frame in task_included_frames: + get_frame(task_id, frame) + + for task_data in self.task_data: + task: Task = task_data.db_instance - for task in self._db_tasks.values(): anno_manager = AnnotationManager( self._annotation_irs[task.id], dimension=self._annotation_irs[task.id].dimension ) + task_included_frames = task_data.get_included_frames() + for shape in sorted( anno_manager.to_shapes( task.data.size, + included_frames=task_included_frames, include_outside=False, use_server_track_ids=self._use_server_track_ids ), key=lambda shape: shape.get("z_order", 0) ): - if (task.id, shape['frame']) not in self._frame_info or (task.id, shape['frame']) in self._deleted_frames: + if shape['frame'] in task_data.deleted_frames: continue + assert (task.id, shape['frame']) in self._frame_info + if 'track_id' in shape: if shape['outside']: continue @@ -1368,23 +1385,33 @@ def soft_attribute_import(self, value: bool): for task_data in self._tasks_data.values(): task_data.soft_attribute_import = value + + def init_task_data(self, task_id: int) -> TaskData: + try: + task = self._db_tasks[task_id] + except KeyError as ex: + raise Exception("There is no such task in the project") from ex + + task_data = TaskData( + annotation_ir=self._annotation_irs[task_id], + db_task=task, + host=self._host, + create_callback=self._task_annotations[task_id].create \ + if self._task_annotations is not None else None, + ) + task_data._MAX_ANNO_SIZE //= len(self._db_tasks) + task_data.soft_attribute_import = self.soft_attribute_import + self._tasks_data[task_id] = task_data + + return task_data + @property def task_data(self): - for task_id, task in self._db_tasks.items(): + for task_id in self._db_tasks.keys(): if task_id in self._tasks_data: yield self._tasks_data[task_id] else: - task_data = TaskData( - annotation_ir=self._annotation_irs[task_id], - db_task=task, - host=self._host, - create_callback=self._task_annotations[task_id].create \ - if self._task_annotations is not None else None, - ) - task_data._MAX_ANNO_SIZE //= len(self._db_tasks) - task_data.soft_attribute_import = self.soft_attribute_import - self._tasks_data[task_id] = task_data - yield task_data + yield self.init_task_data(task_id) @staticmethod def _get_filename(path): diff --git a/cvat/apps/dataset_manager/formats/cvat.py b/cvat/apps/dataset_manager/formats/cvat.py index 4651fd398451..03ef389599e8 100644 --- a/cvat/apps/dataset_manager/formats/cvat.py +++ b/cvat/apps/dataset_manager/formats/cvat.py @@ -9,7 +9,7 @@ from collections import OrderedDict from glob import glob from io import BufferedWriter -from typing import Callable +from typing import Callable, Union from datumaro.components.annotation import (AnnotationType, Bbox, Label, LabelCategories, Points, Polygon, @@ -22,7 +22,7 @@ from datumaro.util.image import Image from defusedxml import ElementTree -from cvat.apps.dataset_manager.bindings import (ProjectData, CommonData, detect_dataset, +from cvat.apps.dataset_manager.bindings import (ProjectData, TaskData, JobData, detect_dataset, get_defaulted_subset, import_dm_annotations, match_dm_item) @@ -1370,7 +1370,7 @@ def dump_project_anno(dst_file: BufferedWriter, project_data: ProjectData, callb callback(dumper, project_data) dumper.close_document() -def dump_media_files(instance_data: CommonData, img_dir: str, project_data: ProjectData = None): +def dump_media_files(instance_data: Union[TaskData, JobData], img_dir: str, project_data: ProjectData = None): frame_provider = make_frame_provider(instance_data.db_instance) ext = '' @@ -1383,9 +1383,11 @@ def dump_media_files(instance_data: CommonData, img_dir: str, project_data: Proj quality=FrameQuality.ORIGINAL, out_type=FrameOutputType.BUFFER, ) + included_frames = instance_data.get_included_frames() + for frame_id, frame in zip(instance_data.rel_range, frames): - if (project_data is not None and (instance_data.db_instance.id, frame_id) in project_data.deleted_frames) \ - or frame_id in instance_data.deleted_frames: + # exclude deleted frames and honeypots + if frame_id not in included_frames: continue frame_name = instance_data.frame_info[frame_id]['path'] if project_data is None \ else project_data.frame_info[(instance_data.db_instance.id, frame_id)]['path'] diff --git a/tests/python/rest_api/test_projects.py b/tests/python/rest_api/test_projects.py index c44ec5d8373a..73e3c02ad286 100644 --- a/tests/python/rest_api/test_projects.py +++ b/tests/python/rest_api/test_projects.py @@ -34,8 +34,14 @@ patch_method, post_method, ) +from shared.utils.helpers import generate_image_files -from .utils import CollectionSimpleFilterTestBase, export_project_backup, export_project_dataset +from .utils import ( + CollectionSimpleFilterTestBase, + create_task, + export_project_backup, + export_project_dataset, +) @pytest.mark.usefixtures("restore_db_per_class") @@ -611,6 +617,7 @@ def _check_cvat_for_video_project_annotations_meta(content, values_to_be_checked @pytest.mark.usefixtures("restore_db_per_function") @pytest.mark.usefixtures("restore_redis_inmem_per_function") +@pytest.mark.usefixtures("restore_redis_ondisk_per_function") class TestImportExportDatasetProject: @pytest.fixture(autouse=True) @@ -1038,6 +1045,61 @@ def test_creates_subfolders_for_subsets_on_export( len([f for f in zip_file.namelist() if f.startswith(folder_prefix)]) > 0 ), f"No {folder_prefix} in {zip_file.namelist()}" + def test_export_project_with_honeypots( + self, + admin_user: str, + ): + project_spec = { + "name": "Project with honeypots", + "labels": [{"name": "cat"}], + } + + with make_api_client(admin_user) as api_client: + project, _ = api_client.projects_api.create(project_spec) + + image_files = generate_image_files(3) + image_names = [i.name for i in image_files] + + task_params = { + "name": "Task with honeypots", + "segment_size": 1, + "project_id": project.id, + } + + data_params = { + "image_quality": 70, + "client_files": image_files, + "sorting_method": "random", + "validation_params": { + "mode": "gt_pool", + "frame_selection_method": "manual", + "frames_per_job_count": 1, + "frames": [image_files[-1].name], + }, + } + + create_task(admin_user, spec=task_params, data=data_params) + + dataset = export_project_dataset( + admin_user, api_version=2, save_images=True, id=project.id, format="COCO 1.0" + ) + + with zipfile.ZipFile(io.BytesIO(dataset)) as zip_file: + subset_path = "images/default" + assert ( + sorted( + [ + f[len(subset_path) + 1 :] + for f in zip_file.namelist() + if f.startswith(subset_path) + ] + ) + == image_names + ) + with zip_file.open("annotations/instances_default.json") as anno_file: + annotations = json.load(anno_file) + assert sorted([a["file_name"] for a in annotations["images"]]) == image_names + @pytest.mark.usefixtures("restore_db_per_function") class TestPatchProjectLabel: diff --git a/tests/python/rest_api/test_requests.py b/tests/python/rest_api/test_requests.py index f06e97ae7fba..1d076bf7f585 100644 --- a/tests/python/rest_api/test_requests.py +++ b/tests/python/rest_api/test_requests.py @@ -29,6 +29,7 @@ @pytest.mark.usefixtures("restore_db_per_class") @pytest.mark.usefixtures("restore_redis_inmem_per_function") +@pytest.mark.usefixtures("restore_redis_ondisk_per_function") @pytest.mark.timeout(30) class TestRequestsListFilters(CollectionSimpleFilterTestBase):