Skip to content

Commit

Permalink
Added support for .rar and some other archives (#7729)
Browse files Browse the repository at this point in the history
<!-- Raise an issue to propose your change
(https://github.com/opencv/cvat/issues).
It helps to avoid duplication of efforts from multiple independent
contributors.
Discuss your ideas with maintainers to be sure that changes will be
approved and merged.
Read the [Contribution
guide](https://opencv.github.io/cvat/docs/contributing/). -->

<!-- Provide a general summary of your changes in the Title above -->

### Motivation and context
<!-- Why is this change required? What problem does it solve? If it
fixes an open
issue, please link to the issue here. Describe your changes in detail,
add
screenshots. -->
This PR allows users to upload `.rar` archives during task creation.
It fixes several problems: 
1. Outdated mimetype for `.rar` type which is now `x-rar-compressed` 
2. Adds usage of `patool` util for `pyunpack` package, which previously
used only `zipfile`. Patool can handle different kinds of archives,
current supported list(by mimetype) can be found in the
`supportedArchives`
1. The pipeline for rar archives now is `pyunpack` -> `patool` ->
`unrar`

### How has this been tested?
<!-- Please describe in detail how you tested your changes.
Include details of your testing environment, and the tests you ran to
see how your change affects other areas of the code, etc. -->

### Checklist
<!-- Go over all the following points, and put an `x` in all the boxes
that apply.
If an item isn't applicable for some reason, then ~~explicitly
strikethrough~~ the whole
line. If you don't do that, GitHub will show incorrect progress for the
pull request.
If you're unsure about any of these, don't hesitate to ask. We're here
to help! -->
- [x] I submit my changes into the `develop` branch
- [ ] I have created a changelog fragment <!-- see top comment in
CHANGELOG.md -->
- [ ] I have updated the documentation accordingly
- [ ] I have added tests to cover my changes
- [x] I have linked related issues (see [GitHub docs](

https://help.github.com/en/github/managing-your-work-on-github/linking-a-pull-request-to-an-issue#linking-a-pull-request-to-an-issue-using-a-keyword))
- ~~[ ] I have increased versions of npm packages if it is necessary

([cvat-canvas](https://github.com/opencv/cvat/tree/develop/cvat-canvas#versioning),

[cvat-core](https://github.com/opencv/cvat/tree/develop/cvat-core#versioning),

[cvat-data](https://github.com/opencv/cvat/tree/develop/cvat-data#versioning)
and

[cvat-ui](https://github.com/opencv/cvat/tree/develop/cvat-ui#versioning))~~

### License

- [x] I submit _my code changes_ under the same [MIT License](
https://github.com/opencv/cvat/blob/develop/LICENSE) that covers the
project.
  Feel free to contact the maintainers if that's a concern.
  • Loading branch information
klakhov authored Apr 11, 2024
1 parent 90d5e1f commit 3fe396c
Show file tree
Hide file tree
Showing 6 changed files with 62 additions and 3 deletions.
1 change: 1 addition & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,7 @@ RUN apt-get update && \
python3-venv \
supervisor \
tzdata \
unrar \
&& ln -fs /usr/share/zoneinfo/${TZ} /etc/localtime && \
dpkg-reconfigure -f noninteractive tzdata && \
rm -rf /var/lib/apt/lists/* && \
Expand Down
4 changes: 4 additions & 0 deletions changelog.d/20240405_091941_klakhov_rar_support.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
### Added

- Support for `.rar`, `.tar`, `.gz`, `.bz2`, `.cpio`, `.7z` archives
(<https://github.com/opencv/cvat/pull/7729>)
2 changes: 1 addition & 1 deletion cvat/apps/engine/media.mimetypes
Original file line number Diff line number Diff line change
Expand Up @@ -200,7 +200,7 @@ image/x-quicktime qif

# possible archive mimetypes (limited set)
application/gzip gz
application/rar rar
application/x-rar-compressed rar
application/x-7z-compressed 7z
application/x-bzip bz bz2
application/x-bzip-compressed-tar tar.bz tar.bz2 tb2 tbz tbz2
Expand Down
7 changes: 5 additions & 2 deletions cvat/apps/engine/media_extractors.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
# Copyright (C) 2019-2022 Intel Corporation
# Copyright (C) 2024 CVAT.ai Corporation
#
# SPDX-License-Identifier: MIT

import os
import sysconfig
import tempfile
import shutil
import zipfile
Expand Down Expand Up @@ -266,7 +268,8 @@ def __init__(self,

self._archive_source = source_path[0]
tmp_dir = extract_dir if extract_dir else os.path.dirname(source_path[0])
Archive(self._archive_source).extractall(tmp_dir)
patool_path = os.path.join(sysconfig.get_path('scripts'), 'patool')
Archive(self._archive_source).extractall(tmp_dir, False, patool_path)
if not extract_dir:
os.remove(self._archive_source)
super().__init__(
Expand Down Expand Up @@ -845,7 +848,7 @@ def _is_archive(path):
encoding = mime[1]
supportedArchives = ['application/x-rar-compressed',
'application/x-tar', 'application/x-7z-compressed', 'application/x-cpio',
'gzip', 'bzip2']
'application/gzip', 'application/x-bzip']
return mime_type in supportedArchives or encoding in supportedArchives

def _is_video(path):
Expand Down
Binary file added cvat/apps/engine/tests/assets/test_rar.rar
Binary file not shown.
51 changes: 51 additions & 0 deletions cvat/apps/engine/tests/test_rest_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import os
import random
import shutil
import sysconfig
import tempfile
import xml.etree.ElementTree as ET
import zipfile
Expand All @@ -25,6 +26,7 @@
import av
import numpy as np
from pdf2image import convert_from_bytes
from pyunpack import Archive
from django.conf import settings
from django.contrib.auth.models import Group, User
from django.http import HttpResponse
Expand Down Expand Up @@ -3186,6 +3188,18 @@ def setUpClass(cls):
image_sizes.append((int(data["WIDTH"]), int(data["HEIGHT"])))
cls._share_image_sizes[filename] = image_sizes

filename = "test_rar.rar"
source_path = os.path.join(os.path.dirname(__file__), 'assets', filename)
path = os.path.join(settings.SHARE_ROOT, filename)
shutil.copyfile(source_path, path)
image_sizes = []
images = cls._extract_rar_archive(source_path)
for [f, image] in images:
width, height = image.size
image_sizes.append((width, height))
cls._share_image_sizes[filename] = image_sizes
cls._share_files.append(filename)

filename = "test_velodyne_points.zip"
path = os.path.join(os.path.dirname(__file__), 'assets', filename)
image_sizes = []
Expand Down Expand Up @@ -3364,6 +3378,17 @@ def _extract_zip_archive(archive, dimension=DimensionType.DIM_2D):
for f in sorted(chunk.namelist())
]

@staticmethod
def _extract_rar_archive(archive):
with tempfile.TemporaryDirectory(dir=settings.TMP_FILES_ROOT) as archive_dir:
patool_path = os.path.join(sysconfig.get_path('scripts'), 'patool')
Archive(archive).extractall_patool(archive_dir, patool_path)

images = [(image, Image.open(os.path.join(archive_dir, image)))
for image in os.listdir(archive_dir)
]
return images

@classmethod
def _extract_zip_chunk(cls, chunk_buffer, dimension=DimensionType.DIM_2D):
return [f[1] for f in cls._extract_zip_archive(chunk_buffer, dimension=dimension)]
Expand Down Expand Up @@ -3519,6 +3544,10 @@ def _test_api_v2_tasks_id_data_spec(self, user, spec, data,
if zipfile.is_zipfile(f):
for frame_name, frame in self._extract_zip_archive(f, dimension=dimension):
source_images[frame_name] = frame
elif isinstance(f, str) and f.endswith('.rar'):
archive_frames = self._extract_rar_archive(f)
for fn, frame in archive_frames:
source_images[fn] = frame
elif isinstance(f, str) and f.endswith('.pdf'):
with open(f, 'rb') as pdf_file:
for i, frame in enumerate(convert_from_bytes(pdf_file.read(), fmt='png')):
Expand Down Expand Up @@ -4562,6 +4591,28 @@ def _send_data_and_fail(*args, **kwargs):
image_sizes, StorageMethodChoice.FILE_SYSTEM, StorageChoice.LOCAL,
send_data_callback=_send_data_and_fail)

def _test_api_v2_tasks_id_data_create_can_use_server_rar(self, user):
task_spec = {
"name": 'task rar in the shared folder #32',
"overlap": 0,
"segment_size": 0,
"labels": [
{"name": "car"},
{"name": "person"},
]
}

task_data = {
"server_files[0]": "test_rar.rar",
"image_quality": 75,
"copy_data": False,
"use_cache": True,
}
image_sizes = self._share_image_sizes[task_data["server_files[0]"]]

self._test_api_v2_tasks_id_data_spec(user, task_spec, task_data, self.ChunkType.IMAGESET, self.ChunkType.IMAGESET,
image_sizes, StorageMethodChoice.CACHE, StorageChoice.LOCAL)

def _test_api_v2_tasks_id_data_create(self, user):
method_list = {
func: getattr(self, func) for func in dir(self)
Expand Down

0 comments on commit 3fe396c

Please sign in to comment.