From 602e6cd97b42db409f695693f7e60b1eb9e1a7ce Mon Sep 17 00:00:00 2001 From: maya Date: Fri, 25 Oct 2024 18:42:11 +0200 Subject: [PATCH 1/7] Fix project export --- cvat/apps/dataset_manager/bindings.py | 75 +++++++++++++++-------- cvat/apps/dataset_manager/formats/cvat.py | 4 +- 2 files changed, 53 insertions(+), 26 deletions(-) diff --git a/cvat/apps/dataset_manager/bindings.py b/cvat/apps/dataset_manager/bindings.py index 35d4b902a53a..d6a84565a07b 100644 --- a/cvat/apps/dataset_manager/bindings.py +++ b/cvat/apps/dataset_manager/bindings.py @@ -285,6 +285,7 @@ def __init__(self, self._db_data: models.Data = db_task.data self._use_server_track_ids = use_server_track_ids self._required_frames = included_frames + self._initialized_included_frames: Optional[Set[int]] = None self._db_subset = db_task.subset super().__init__(db_task) @@ -536,12 +537,14 @@ def shapes(self): yield self._export_labeled_shape(shape) def get_included_frames(self): - return set( - i for i in self.rel_range - if not self._is_frame_deleted(i) - and not self._is_frame_excluded(i) - and self._is_frame_required(i) - ) + if self._initialized_included_frames is None: + self._initialized_included_frames = set( + i for i in self.rel_range + if not self._is_frame_deleted(i) + and not self._is_frame_excluded(i) + and self._is_frame_required(i) + ) + return self._initialized_included_frames def _is_frame_deleted(self, frame): return frame in self._deleted_frames @@ -1112,7 +1115,10 @@ def _init_frame_info(self): } for frame in range(task.data.size)}) else: self._frame_info.update({(task.id, self.rel_frame_id(task.id, db_image.frame)): { - "path": mangle_image_name(db_image.path, defaulted_subset, original_names), + # do not modify honeypot names since they will be excluded from the dataset + # and their quantity should not affect the validation frame name + "path": mangle_image_name(db_image.path, defaulted_subset, original_names) \ + if not db_image.is_placeholder else db_image.path, "id": db_image.id, "width": db_image.width, "height": db_image.height, @@ -1271,25 +1277,36 @@ def get_frame(task_id: int, idx: int) -> ProjectData.Frame: return frames[(frame_info["subset"], abs_frame)] if include_empty: - for ident in sorted(self._frame_info): - if ident not in self._deleted_frames: - get_frame(*ident) + for task_id, frame in sorted(self._frame_info): + if not self._tasks_data.get(task_id): + self.init_task_data(task_id) + + task_included_frames = self._tasks_data[task_id].get_included_frames() + if (task_id, frame) not in self._deleted_frames and frame in task_included_frames: + get_frame(task_id, frame) + + for t_data in self.task_data: + task: Task = t_data.db_instance - for task in self._db_tasks.values(): anno_manager = AnnotationManager( self._annotation_irs[task.id], dimension=self._annotation_irs[task.id].dimension ) + task_included_frames = t_data.get_included_frames() + for shape in sorted( anno_manager.to_shapes( task.data.size, + included_frames=task_included_frames, include_outside=False, use_server_track_ids=self._use_server_track_ids ), key=lambda shape: shape.get("z_order", 0) ): - if (task.id, shape['frame']) not in self._frame_info or (task.id, shape['frame']) in self._deleted_frames: + if shape['frame'] in t_data.deleted_frames: continue + assert (task.id, shape['frame']) in self._frame_info + if 'track_id' in shape: if shape['outside']: continue @@ -1368,23 +1385,33 @@ def soft_attribute_import(self, value: bool): for task_data in self._tasks_data.values(): task_data.soft_attribute_import = value + + def init_task_data(self, task_id: int) -> TaskData: + try: + task = self._db_tasks[task_id] + except KeyError as ex: + raise Exception("There is no such task in the project") from ex + + task_data = TaskData( + annotation_ir=self._annotation_irs[task_id], + db_task=task, + host=self._host, + create_callback=self._task_annotations[task_id].create \ + if self._task_annotations is not None else None, + ) + task_data._MAX_ANNO_SIZE //= len(self._db_tasks) + task_data.soft_attribute_import = self.soft_attribute_import + self._tasks_data[task_id] = task_data + + return task_data + @property def task_data(self): - for task_id, task in self._db_tasks.items(): + for task_id, _ in self._db_tasks.items(): if task_id in self._tasks_data: yield self._tasks_data[task_id] else: - task_data = TaskData( - annotation_ir=self._annotation_irs[task_id], - db_task=task, - host=self._host, - create_callback=self._task_annotations[task_id].create \ - if self._task_annotations is not None else None, - ) - task_data._MAX_ANNO_SIZE //= len(self._db_tasks) - task_data.soft_attribute_import = self.soft_attribute_import - self._tasks_data[task_id] = task_data - yield task_data + yield self.init_task_data(task_id) @staticmethod def _get_filename(path): diff --git a/cvat/apps/dataset_manager/formats/cvat.py b/cvat/apps/dataset_manager/formats/cvat.py index 4651fd398451..6f8ee8b543da 100644 --- a/cvat/apps/dataset_manager/formats/cvat.py +++ b/cvat/apps/dataset_manager/formats/cvat.py @@ -1384,8 +1384,8 @@ def dump_media_files(instance_data: CommonData, img_dir: str, project_data: Proj out_type=FrameOutputType.BUFFER, ) for frame_id, frame in zip(instance_data.rel_range, frames): - if (project_data is not None and (instance_data.db_instance.id, frame_id) in project_data.deleted_frames) \ - or frame_id in instance_data.deleted_frames: + # exclude deleted frames and honeypots + if frame_id in instance_data.deleted_frames or frame_id in instance_data._excluded_frames: continue frame_name = instance_data.frame_info[frame_id]['path'] if project_data is None \ else project_data.frame_info[(instance_data.db_instance.id, frame_id)]['path'] From 9641fa1ddc2c4b64559ef2ccc12978b29cd26a12 Mon Sep 17 00:00:00 2001 From: maya Date: Mon, 28 Oct 2024 10:02:33 +0100 Subject: [PATCH 2/7] Add REST API test --- cvat/apps/dataset_manager/bindings.py | 8 ++-- tests/python/rest_api/test_projects.py | 63 +++++++++++++++++++++++++- 2 files changed, 66 insertions(+), 5 deletions(-) diff --git a/cvat/apps/dataset_manager/bindings.py b/cvat/apps/dataset_manager/bindings.py index d6a84565a07b..9e30144a87f7 100644 --- a/cvat/apps/dataset_manager/bindings.py +++ b/cvat/apps/dataset_manager/bindings.py @@ -1285,13 +1285,13 @@ def get_frame(task_id: int, idx: int) -> ProjectData.Frame: if (task_id, frame) not in self._deleted_frames and frame in task_included_frames: get_frame(task_id, frame) - for t_data in self.task_data: - task: Task = t_data.db_instance + for task_data in self.task_data: + task: Task = task_data.db_instance anno_manager = AnnotationManager( self._annotation_irs[task.id], dimension=self._annotation_irs[task.id].dimension ) - task_included_frames = t_data.get_included_frames() + task_included_frames = task_data.get_included_frames() for shape in sorted( anno_manager.to_shapes( @@ -1302,7 +1302,7 @@ def get_frame(task_id: int, idx: int) -> ProjectData.Frame: ), key=lambda shape: shape.get("z_order", 0) ): - if shape['frame'] in t_data.deleted_frames: + if shape['frame'] in task_data.deleted_frames: continue assert (task.id, shape['frame']) in self._frame_info diff --git a/tests/python/rest_api/test_projects.py b/tests/python/rest_api/test_projects.py index b0c8a3b247c4..d4a5c0b218a6 100644 --- a/tests/python/rest_api/test_projects.py +++ b/tests/python/rest_api/test_projects.py @@ -34,8 +34,14 @@ patch_method, post_method, ) +from shared.utils.helpers import generate_image_files -from .utils import CollectionSimpleFilterTestBase, export_project_backup, export_project_dataset +from .utils import ( + CollectionSimpleFilterTestBase, + create_task, + export_project_backup, + export_project_dataset, +) @pytest.mark.usefixtures("restore_db_per_class") @@ -1038,6 +1044,61 @@ def test_creates_subfolders_for_subsets_on_export( len([f for f in zip_file.namelist() if f.startswith(folder_prefix)]) > 0 ), f"No {folder_prefix} in {zip_file.namelist()}" + def test_export_project_with_honeypots( + self, + admin_user: str, + ): + project_spec = { + "name": f"Project with honeypots", + "labels": [{"name": "cat"}], + } + + with make_api_client(admin_user) as api_client: + project, _ = api_client.projects_api.create(project_spec) + + image_files = generate_image_files(3) + image_names = [i.name for i in image_files] + + task_params = { + "name": "Task with honeypots", + "segment_size": 1, + "project_id": project.id, + } + + data_params = { + "image_quality": 70, + "client_files": image_files, + "sorting_method": "random", + "validation_params": { + "mode": "gt_pool", + "frame_selection_method": "manual", + "frames_per_job_count": 1, + "frames": [image_files[-1].name], + }, + } + + create_task(admin_user, spec=task_params, data=data_params) + + dataset = export_project_dataset( + admin_user, api_version=2, save_images=True, id=project.id, format="COCO 1.0" + ) + + with zipfile.ZipFile(io.BytesIO(dataset)) as zip_file: + subset_path = "images/default" + assert ( + sorted( + [ + f[len(subset_path) + 1 :] + for f in zip_file.namelist() + if f.startswith(subset_path) + ] + ) + == image_names + ) + with zip_file.open("annotations/instances_default.json") as anno_file: + annotations = json.load(anno_file) + assert sorted([a["file_name"] for a in annotations["images"]]) == image_names + @pytest.mark.usefixtures("restore_db_per_function") class TestPatchProjectLabel: From 7cccc305cf5c0bd52ed0123795ce23e5ad1ef673 Mon Sep 17 00:00:00 2001 From: maya Date: Tue, 29 Oct 2024 10:57:39 +0100 Subject: [PATCH 3/7] Add changelog --- ...16_maria_do_not_export_honeypots_when_exporting_project.md | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 changelog.d/20241029_105216_maria_do_not_export_honeypots_when_exporting_project.md diff --git a/changelog.d/20241029_105216_maria_do_not_export_honeypots_when_exporting_project.md b/changelog.d/20241029_105216_maria_do_not_export_honeypots_when_exporting_project.md new file mode 100644 index 000000000000..e6d8950a55b2 --- /dev/null +++ b/changelog.d/20241029_105216_maria_do_not_export_honeypots_when_exporting_project.md @@ -0,0 +1,4 @@ +### Fixed + +- Exporting projects with tasks containing honeypots. Honeypots are no longer exported. + () From 07b5790529ce15d933fee2bb9256f1a516af17bf Mon Sep 17 00:00:00 2001 From: maya Date: Tue, 29 Oct 2024 11:00:48 +0100 Subject: [PATCH 4/7] Try to fix unstable test --- tests/python/rest_api/test_projects.py | 2 +- tests/python/rest_api/test_requests.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/python/rest_api/test_projects.py b/tests/python/rest_api/test_projects.py index d4a5c0b218a6..e818824ae9a7 100644 --- a/tests/python/rest_api/test_projects.py +++ b/tests/python/rest_api/test_projects.py @@ -1049,7 +1049,7 @@ def test_export_project_with_honeypots( admin_user: str, ): project_spec = { - "name": f"Project with honeypots", + "name": "Project with honeypots", "labels": [{"name": "cat"}], } diff --git a/tests/python/rest_api/test_requests.py b/tests/python/rest_api/test_requests.py index f06e97ae7fba..1d076bf7f585 100644 --- a/tests/python/rest_api/test_requests.py +++ b/tests/python/rest_api/test_requests.py @@ -29,6 +29,7 @@ @pytest.mark.usefixtures("restore_db_per_class") @pytest.mark.usefixtures("restore_redis_inmem_per_function") +@pytest.mark.usefixtures("restore_redis_ondisk_per_function") @pytest.mark.timeout(30) class TestRequestsListFilters(CollectionSimpleFilterTestBase): From 3d21517c39efe1addc0397ef0d0f76caaa29dbbe Mon Sep 17 00:00:00 2001 From: Maria Khrustaleva Date: Tue, 29 Oct 2024 11:04:08 +0100 Subject: [PATCH 5/7] Update cvat/apps/dataset_manager/bindings.py --- cvat/apps/dataset_manager/bindings.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cvat/apps/dataset_manager/bindings.py b/cvat/apps/dataset_manager/bindings.py index 9e30144a87f7..477634feea5c 100644 --- a/cvat/apps/dataset_manager/bindings.py +++ b/cvat/apps/dataset_manager/bindings.py @@ -1282,7 +1282,7 @@ def get_frame(task_id: int, idx: int) -> ProjectData.Frame: self.init_task_data(task_id) task_included_frames = self._tasks_data[task_id].get_included_frames() - if (task_id, frame) not in self._deleted_frames and frame in task_included_frames: + if frame in task_included_frames: get_frame(task_id, frame) for task_data in self.task_data: From 6c671f3c52713e89ab9ff756996009c398eae05c Mon Sep 17 00:00:00 2001 From: maya Date: Tue, 29 Oct 2024 11:14:48 +0100 Subject: [PATCH 6/7] Small fixes --- cvat/apps/dataset_manager/formats/cvat.py | 10 ++++++---- tests/python/rest_api/test_projects.py | 1 + 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/cvat/apps/dataset_manager/formats/cvat.py b/cvat/apps/dataset_manager/formats/cvat.py index 6f8ee8b543da..03ef389599e8 100644 --- a/cvat/apps/dataset_manager/formats/cvat.py +++ b/cvat/apps/dataset_manager/formats/cvat.py @@ -9,7 +9,7 @@ from collections import OrderedDict from glob import glob from io import BufferedWriter -from typing import Callable +from typing import Callable, Union from datumaro.components.annotation import (AnnotationType, Bbox, Label, LabelCategories, Points, Polygon, @@ -22,7 +22,7 @@ from datumaro.util.image import Image from defusedxml import ElementTree -from cvat.apps.dataset_manager.bindings import (ProjectData, CommonData, detect_dataset, +from cvat.apps.dataset_manager.bindings import (ProjectData, TaskData, JobData, detect_dataset, get_defaulted_subset, import_dm_annotations, match_dm_item) @@ -1370,7 +1370,7 @@ def dump_project_anno(dst_file: BufferedWriter, project_data: ProjectData, callb callback(dumper, project_data) dumper.close_document() -def dump_media_files(instance_data: CommonData, img_dir: str, project_data: ProjectData = None): +def dump_media_files(instance_data: Union[TaskData, JobData], img_dir: str, project_data: ProjectData = None): frame_provider = make_frame_provider(instance_data.db_instance) ext = '' @@ -1383,9 +1383,11 @@ def dump_media_files(instance_data: CommonData, img_dir: str, project_data: Proj quality=FrameQuality.ORIGINAL, out_type=FrameOutputType.BUFFER, ) + included_frames = instance_data.get_included_frames() + for frame_id, frame in zip(instance_data.rel_range, frames): # exclude deleted frames and honeypots - if frame_id in instance_data.deleted_frames or frame_id in instance_data._excluded_frames: + if frame_id not in included_frames: continue frame_name = instance_data.frame_info[frame_id]['path'] if project_data is None \ else project_data.frame_info[(instance_data.db_instance.id, frame_id)]['path'] diff --git a/tests/python/rest_api/test_projects.py b/tests/python/rest_api/test_projects.py index e818824ae9a7..c5c85c2f57a3 100644 --- a/tests/python/rest_api/test_projects.py +++ b/tests/python/rest_api/test_projects.py @@ -617,6 +617,7 @@ def _check_cvat_for_video_project_annotations_meta(content, values_to_be_checked @pytest.mark.usefixtures("restore_db_per_function") @pytest.mark.usefixtures("restore_redis_inmem_per_function") +@pytest.mark.usefixtures("restore_redis_ondisk_per_function") class TestImportExportDatasetProject: @pytest.fixture(autouse=True) From c6cd5433e55b7ac58dc601051bac26f8d11c36ac Mon Sep 17 00:00:00 2001 From: maya Date: Wed, 30 Oct 2024 13:43:46 +0100 Subject: [PATCH 7/7] Apply comment --- cvat/apps/dataset_manager/bindings.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cvat/apps/dataset_manager/bindings.py b/cvat/apps/dataset_manager/bindings.py index 477634feea5c..1c70520a7090 100644 --- a/cvat/apps/dataset_manager/bindings.py +++ b/cvat/apps/dataset_manager/bindings.py @@ -1407,7 +1407,7 @@ def init_task_data(self, task_id: int) -> TaskData: @property def task_data(self): - for task_id, _ in self._db_tasks.items(): + for task_id in self._db_tasks.keys(): if task_id in self._tasks_data: yield self._tasks_data[task_id] else: