Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Added support for .rar and some other archives #7729

Merged
merged 13 commits into from
Apr 11, 2024
1 change: 1 addition & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,7 @@ RUN apt-get update && \
python3-venv \
supervisor \
tzdata \
unrar \
&& ln -fs /usr/share/zoneinfo/${TZ} /etc/localtime && \
dpkg-reconfigure -f noninteractive tzdata && \
rm -rf /var/lib/apt/lists/* && \
Expand Down
4 changes: 4 additions & 0 deletions changelog.d/20240405_091941_klakhov_rar_support.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
### Added

- Support for `.rar`, `.tar`, `.gz`, `.bz2`, `.cpio`, `.7z` archives
(<https://github.com/opencv/cvat/pull/7729>)
2 changes: 1 addition & 1 deletion cvat/apps/engine/media.mimetypes
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,7 @@ image/x-quicktime qif

# possible archive mimetypes (limited set)
application/gzip gz
application/rar rar
application/x-rar-compressed rar
application/x-7z-compressed 7z
application/x-bzip bz bz2
application/x-bzip-compressed-tar tar.bz tar.bz2 tb2 tbz tbz2
Expand Down
7 changes: 5 additions & 2 deletions cvat/apps/engine/media_extractors.py
klakhov marked this conversation as resolved.
Show resolved Hide resolved
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
# Copyright (C) 2019-2022 Intel Corporation
# Copyright (C) 2024 CVAT.ai Corporation
#
# SPDX-License-Identifier: MIT

import os
import sysconfig
import tempfile
import shutil
import zipfile
Expand Down Expand Up @@ -266,7 +268,8 @@ def __init__(self,

self._archive_source = source_path[0]
tmp_dir = extract_dir if extract_dir else os.path.dirname(source_path[0])
Archive(self._archive_source).extractall(tmp_dir)
patool_path = os.path.join(sysconfig.get_path('scripts'), 'patool')
Archive(self._archive_source).extractall(tmp_dir, False, patool_path)
if not extract_dir:
os.remove(self._archive_source)
super().__init__(
Expand Down Expand Up @@ -845,7 +848,7 @@ def _is_archive(path):
encoding = mime[1]
supportedArchives = ['application/x-rar-compressed',
'application/x-tar', 'application/x-7z-compressed', 'application/x-cpio',
'gzip', 'bzip2']
'application/gzip', 'application/x-bzip']
return mime_type in supportedArchives or encoding in supportedArchives

def _is_video(path):
Expand Down
Binary file added cvat/apps/engine/tests/assets/test_rar.rar
Binary file not shown.
61 changes: 60 additions & 1 deletion cvat/apps/engine/tests/test_rest_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,9 @@
import os
import random
import shutil
import sysconfig
import tempfile
from uuid import uuid4
import xml.etree.ElementTree as ET
import zipfile
from collections import defaultdict
Expand All @@ -25,6 +27,7 @@
import av
import numpy as np
from pdf2image import convert_from_bytes
from pyunpack import Archive
from django.conf import settings
from django.contrib.auth.models import Group, User
from django.http import HttpResponse
Expand Down Expand Up @@ -3126,6 +3129,7 @@ def setUpClass(cls):

cls._share_image_sizes = {}
cls._share_files = []
cls._unpack_dirs = []
klakhov marked this conversation as resolved.
Show resolved Hide resolved

for filename in [
"test_1.jpg", "test_2.jpg", "test_3.jpg", "test_10.jpg", "test_qwe.jpg",
Expand Down Expand Up @@ -3186,6 +3190,18 @@ def setUpClass(cls):
image_sizes.append((int(data["WIDTH"]), int(data["HEIGHT"])))
cls._share_image_sizes[filename] = image_sizes

filename = "test_rar.rar"
source_path = os.path.join(os.path.dirname(__file__), 'assets', filename)
path = os.path.join(settings.SHARE_ROOT, filename)
shutil.copyfile(source_path, path)
image_sizes = []
images = cls._extract_rar_archive(source_path)
for [f, image] in images:
width, height = image.size
image_sizes.append((width, height))
cls._share_image_sizes[filename] = image_sizes
cls._share_files.append(filename)

filename = "test_velodyne_points.zip"
path = os.path.join(os.path.dirname(__file__), 'assets', filename)
image_sizes = []
Expand Down Expand Up @@ -3298,6 +3314,9 @@ def tearDownClass(cls):
dirs.add(os.path.dirname(filename))
os.remove(os.path.join(settings.SHARE_ROOT, filename))

for unpack_dir in cls._unpack_dirs:
shutil.rmtree(unpack_dir)

for dirname in sorted(dirs, reverse=True):
path = os.path.join(settings.SHARE_ROOT, dirname)
if not os.listdir(path):
Expand Down Expand Up @@ -3364,6 +3383,21 @@ def _extract_zip_archive(archive, dimension=DimensionType.DIM_2D):
for f in sorted(chunk.namelist())
]

@staticmethod
def _extract_rar_archive(archive):
rand_name = uuid4().hex
archive_dir = os.path.join(settings.TMP_FILES_ROOT, rand_name)
os.makedirs(archive_dir)
klakhov marked this conversation as resolved.
Show resolved Hide resolved

patool_path = os.path.join(sysconfig.get_path('scripts'), 'patool')
Archive(archive).extractall_patool(archive_dir, patool_path)

images = [(image, Image.open(os.path.join(archive_dir, image)))
for image in os.listdir(archive_dir)
]
shutil.rmtree(archive_dir)
return images

@classmethod
def _extract_zip_chunk(cls, chunk_buffer, dimension=DimensionType.DIM_2D):
return [f[1] for f in cls._extract_zip_archive(chunk_buffer, dimension=dimension)]
Expand Down Expand Up @@ -3512,13 +3546,16 @@ def _test_api_v2_tasks_id_data_spec(self, user, spec, data,
manifest = next((v for v in source_files if _name_key(v).endswith('.jsonl')), None)
source_files = [_add_prefix(f)
for f in source_files if not _name_key(f).endswith('jsonl')]

klakhov marked this conversation as resolved.
Show resolved Hide resolved
# Load images
source_images = {}
for f in source_files:
if zipfile.is_zipfile(f):
for frame_name, frame in self._extract_zip_archive(f, dimension=dimension):
source_images[frame_name] = frame
elif isinstance(f, str) and f.endswith('.rar'):
archive_frames = self._extract_rar_archive(f)
for fn, frame in archive_frames:
source_images[fn] = frame
elif isinstance(f, str) and f.endswith('.pdf'):
with open(f, 'rb') as pdf_file:
for i, frame in enumerate(convert_from_bytes(pdf_file.read(), fmt='png')):
Expand Down Expand Up @@ -4562,6 +4599,28 @@ def _send_data_and_fail(*args, **kwargs):
image_sizes, StorageMethodChoice.FILE_SYSTEM, StorageChoice.LOCAL,
send_data_callback=_send_data_and_fail)

def _test_api_v2_tasks_id_data_create_can_use_server_rar(self, user):
SpecLad marked this conversation as resolved.
Show resolved Hide resolved
task_spec = {
"name": 'task rar in the shared folder #32',
"overlap": 0,
"segment_size": 0,
"labels": [
{"name": "car"},
{"name": "person"},
]
}

task_data = {
"server_files[0]": "test_rar.rar",
"image_quality": 75,
"copy_data": False,
"use_cache": True,
}
image_sizes = self._share_image_sizes[task_data["server_files[0]"]]

self._test_api_v2_tasks_id_data_spec(user, task_spec, task_data, self.ChunkType.IMAGESET, self.ChunkType.IMAGESET,
image_sizes, StorageMethodChoice.CACHE, StorageChoice.LOCAL)

def _test_api_v2_tasks_id_data_create(self, user):
method_list = {
func: getattr(self, func) for func in dir(self)
Expand Down
Loading