Skip to content

Commit

Permalink
Memory optimization for image chunk preparation
Browse files Browse the repository at this point in the history
  • Loading branch information
azhavoro committed Oct 22, 2024
1 parent 4354f72 commit 29151a6
Show file tree
Hide file tree
Showing 3 changed files with 14 additions and 12 deletions.
13 changes: 6 additions & 7 deletions cvat/apps/engine/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@
ZipChunkWriter,
ZipCompressedChunkWriter,
)
from cvat.apps.engine.utils import md5_hash, preload_images
from cvat.apps.engine.utils import md5_hash, load_images
from utils.dataset_manifest import ImageManifestManager

slogger = ServerLogManager(__name__)
Expand Down Expand Up @@ -321,15 +321,14 @@ def _read_raw_images(
cloud_storage_instance.bulk_download_to_dir(
files=files_to_download, upload_dir=tmp_dir
)
media = preload_images(media)
media = load_images(images=media, preload=False)

for checksum, (_, fs_filename, _) in zip(checksums, media):
if checksum and not md5_hash(fs_filename) == checksum:
for checksum, media_item in zip(checksums, media):
if checksum and not md5_hash(media_item[1]) == checksum:
slogger.cloud_storage[db_cloud_storage.id].warning(
"Hash sums of files {} do not match".format(file_name)
)

yield from media
yield media_item
else:
requested_frame_iter = iter(frame_ids)
next_requested_frame_id = next(requested_frame_iter, None)
Expand Down Expand Up @@ -359,7 +358,7 @@ def _read_raw_images(
assert next_requested_frame_id is None

if db_task.dimension == models.DimensionType.DIM_2D:
media = preload_images(media)
media = load_images(images=media, preload=False)

yield from media

Expand Down
4 changes: 2 additions & 2 deletions cvat/apps/engine/task.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
from cvat.apps.engine.models import RequestAction, RequestTarget
from cvat.apps.engine.utils import (
av_scan_paths, format_list,get_rq_job_meta,
define_dependent_job, get_rq_lock_by_user, preload_images
define_dependent_job, get_rq_lock_by_user, load_images
)
from cvat.apps.engine.rq_job_handler import RQId
from cvat.utils.http import make_requests_session, PROXIES_FOR_UNTRUSTED_URLS
Expand Down Expand Up @@ -1537,7 +1537,7 @@ def save_chunks(
MEDIA_TYPES['archive']['extractor'],
))
):
chunk_data = preload_images(chunk_data)
chunk_data = load_images(images=chunk_data, preload=True)

# TODO: extract into a class

Expand Down
9 changes: 6 additions & 3 deletions cvat/apps/engine/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -363,13 +363,16 @@ def sendfile(

return _sendfile(request, filename, attachment, attachment_filename, mimetype, encoding)

def preload_image(image: tuple[str, str, str])-> tuple[Image.Image, str, str]:
def load_image(image: tuple[str, str, str])-> tuple[Image.Image, str, str]:
pil_img = Image.open(image[0])
pil_img.load()
return pil_img, image[1], image[2]

def preload_images(images: Iterable[tuple[str, str, str]]) -> list[tuple[Image.Image, str, str]]:
return list(map(preload_image, images))
def load_images(images: Iterable[tuple[str, str, str]], preload: bool = False) -> Iterable[tuple[Image.Image, str, str]]:
images_ = map(load_image, images)
if preload:
return list(images_)
return images_

def build_backup_file_name(
*,
Expand Down

0 comments on commit 29151a6

Please sign in to comment.