diff --git a/Dockerfile b/Dockerfile index 7bed5fac9e81..ab52f3fbefe9 100644 --- a/Dockerfile +++ b/Dockerfile @@ -126,6 +126,7 @@ RUN apt-get update && \ python3-venv \ supervisor \ tzdata \ + unrar \ && ln -fs /usr/share/zoneinfo/${TZ} /etc/localtime && \ dpkg-reconfigure -f noninteractive tzdata && \ rm -rf /var/lib/apt/lists/* && \ diff --git a/changelog.d/20240405_091941_klakhov_rar_support.md b/changelog.d/20240405_091941_klakhov_rar_support.md new file mode 100644 index 000000000000..a390854ef672 --- /dev/null +++ b/changelog.d/20240405_091941_klakhov_rar_support.md @@ -0,0 +1,4 @@ +### Added + +- Support for `.rar`, `.tar`, `.gz`, `.bz2`, `.cpio`, `.7z` archives + () diff --git a/cvat/apps/engine/media.mimetypes b/cvat/apps/engine/media.mimetypes index 43cd80839b80..0bc8c1040f11 100644 --- a/cvat/apps/engine/media.mimetypes +++ b/cvat/apps/engine/media.mimetypes @@ -200,7 +200,7 @@ image/x-quicktime qif # possible archive mimetypes (limited set) application/gzip gz -application/rar rar +application/x-rar-compressed rar application/x-7z-compressed 7z application/x-bzip bz bz2 application/x-bzip-compressed-tar tar.bz tar.bz2 tb2 tbz tbz2 diff --git a/cvat/apps/engine/media_extractors.py b/cvat/apps/engine/media_extractors.py index a7f601776c6b..d6d306121527 100644 --- a/cvat/apps/engine/media_extractors.py +++ b/cvat/apps/engine/media_extractors.py @@ -1,8 +1,10 @@ # Copyright (C) 2019-2022 Intel Corporation +# Copyright (C) 2024 CVAT.ai Corporation # # SPDX-License-Identifier: MIT import os +import sysconfig import tempfile import shutil import zipfile @@ -266,7 +268,8 @@ def __init__(self, self._archive_source = source_path[0] tmp_dir = extract_dir if extract_dir else os.path.dirname(source_path[0]) - Archive(self._archive_source).extractall(tmp_dir) + patool_path = os.path.join(sysconfig.get_path('scripts'), 'patool') + Archive(self._archive_source).extractall(tmp_dir, False, patool_path) if not extract_dir: os.remove(self._archive_source) super().__init__( @@ -845,7 +848,7 @@ def _is_archive(path): encoding = mime[1] supportedArchives = ['application/x-rar-compressed', 'application/x-tar', 'application/x-7z-compressed', 'application/x-cpio', - 'gzip', 'bzip2'] + 'application/gzip', 'application/x-bzip'] return mime_type in supportedArchives or encoding in supportedArchives def _is_video(path): diff --git a/cvat/apps/engine/tests/assets/test_rar.rar b/cvat/apps/engine/tests/assets/test_rar.rar new file mode 100644 index 000000000000..ef82c0441190 Binary files /dev/null and b/cvat/apps/engine/tests/assets/test_rar.rar differ diff --git a/cvat/apps/engine/tests/test_rest_api.py b/cvat/apps/engine/tests/test_rest_api.py index 17113f3851a1..37f4fbd44c4e 100644 --- a/cvat/apps/engine/tests/test_rest_api.py +++ b/cvat/apps/engine/tests/test_rest_api.py @@ -9,6 +9,7 @@ import os import random import shutil +import sysconfig import tempfile import xml.etree.ElementTree as ET import zipfile @@ -25,6 +26,7 @@ import av import numpy as np from pdf2image import convert_from_bytes +from pyunpack import Archive from django.conf import settings from django.contrib.auth.models import Group, User from django.http import HttpResponse @@ -3186,6 +3188,18 @@ def setUpClass(cls): image_sizes.append((int(data["WIDTH"]), int(data["HEIGHT"]))) cls._share_image_sizes[filename] = image_sizes + filename = "test_rar.rar" + source_path = os.path.join(os.path.dirname(__file__), 'assets', filename) + path = os.path.join(settings.SHARE_ROOT, filename) + shutil.copyfile(source_path, path) + image_sizes = [] + images = cls._extract_rar_archive(source_path) + for [f, image] in images: + width, height = image.size + image_sizes.append((width, height)) + cls._share_image_sizes[filename] = image_sizes + cls._share_files.append(filename) + filename = "test_velodyne_points.zip" path = os.path.join(os.path.dirname(__file__), 'assets', filename) image_sizes = [] @@ -3364,6 +3378,17 @@ def _extract_zip_archive(archive, dimension=DimensionType.DIM_2D): for f in sorted(chunk.namelist()) ] + @staticmethod + def _extract_rar_archive(archive): + with tempfile.TemporaryDirectory(dir=settings.TMP_FILES_ROOT) as archive_dir: + patool_path = os.path.join(sysconfig.get_path('scripts'), 'patool') + Archive(archive).extractall_patool(archive_dir, patool_path) + + images = [(image, Image.open(os.path.join(archive_dir, image))) + for image in os.listdir(archive_dir) + ] + return images + @classmethod def _extract_zip_chunk(cls, chunk_buffer, dimension=DimensionType.DIM_2D): return [f[1] for f in cls._extract_zip_archive(chunk_buffer, dimension=dimension)] @@ -3519,6 +3544,10 @@ def _test_api_v2_tasks_id_data_spec(self, user, spec, data, if zipfile.is_zipfile(f): for frame_name, frame in self._extract_zip_archive(f, dimension=dimension): source_images[frame_name] = frame + elif isinstance(f, str) and f.endswith('.rar'): + archive_frames = self._extract_rar_archive(f) + for fn, frame in archive_frames: + source_images[fn] = frame elif isinstance(f, str) and f.endswith('.pdf'): with open(f, 'rb') as pdf_file: for i, frame in enumerate(convert_from_bytes(pdf_file.read(), fmt='png')): @@ -4562,6 +4591,28 @@ def _send_data_and_fail(*args, **kwargs): image_sizes, StorageMethodChoice.FILE_SYSTEM, StorageChoice.LOCAL, send_data_callback=_send_data_and_fail) + def _test_api_v2_tasks_id_data_create_can_use_server_rar(self, user): + task_spec = { + "name": 'task rar in the shared folder #32', + "overlap": 0, + "segment_size": 0, + "labels": [ + {"name": "car"}, + {"name": "person"}, + ] + } + + task_data = { + "server_files[0]": "test_rar.rar", + "image_quality": 75, + "copy_data": False, + "use_cache": True, + } + image_sizes = self._share_image_sizes[task_data["server_files[0]"]] + + self._test_api_v2_tasks_id_data_spec(user, task_spec, task_data, self.ChunkType.IMAGESET, self.ChunkType.IMAGESET, + image_sizes, StorageMethodChoice.CACHE, StorageChoice.LOCAL) + def _test_api_v2_tasks_id_data_create(self, user): method_list = { func: getattr(self, func) for func in dir(self)