Skip to content

Commit

Permalink
Do not copy extra files if both server_files and job_file_mapping are… (
Browse files Browse the repository at this point in the history
  • Loading branch information
azhavoro authored Sep 21, 2023
1 parent 657e361 commit 26693dd
Show file tree
Hide file tree
Showing 7 changed files with 180 additions and 54 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -279,7 +279,7 @@ jobs:

- uses: actions/setup-node@v3
with:
node-version: '16.x'
node-version: '18.x'

- name: Download CVAT server image
uses: actions/download-artifact@v3
Expand Down
3 changes: 2 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- TDB

### Fixed
- TDB
- Downloading additional data from cloud storage if use_cache=true and job_file_mapping are specified
(<https://github.com/opencv/cvat/pull/6879>)

### Security
- TDB
Expand Down
27 changes: 21 additions & 6 deletions cvat/apps/engine/serializers.py
Original file line number Diff line number Diff line change
Expand Up @@ -791,12 +791,18 @@ def __init__(self, *args, **kwargs):

class JobFileMapping(serializers.ListField):
"""
Represents a file-to-job mapping. Useful to specify a custom job
configuration during task creation. This option is not compatible with
most other job split-related options. Files in the jobs must not overlap or repeat.
Represents a file-to-job mapping.
Useful to specify a custom job configuration during task creation.
This option is not compatible with most other job split-related options.
Files in the jobs must not overlap or repeat.
Job file mapping files must be a subset of the input files.
If directories are specified in server_files, all files obtained by recursive search
in the specified directories will be used as input files.
In case of missing items in the input files, an error will be raised.
Example:
[
["file1.jpg", "file2.jpg"], # job #1 files
["file3.png"], # job #2 files
["file4.jpg", "file5.png", "file6.bmp"], # job #3 files
Expand Down Expand Up @@ -824,9 +830,15 @@ class DataSerializer(serializers.ModelSerializer):
When false, video chunks are represented as video segments
"""))
client_files = ClientFileSerializer(many=True, default=[],
help_text="Uploaded files")
help_text=textwrap.dedent("""
Uploaded files.
Must contain all files from job_file_mapping if job_file_mapping is not empty.
"""))
server_files = ServerFileSerializer(many=True, default=[],
help_text="Paths to files from a file share mounted on the server, or from a cloud storage")
help_text=textwrap.dedent("""
Paths to files from a file share mounted on the server, or from a cloud storage.
Must contain all files from job_file_mapping if job_file_mapping is not empty.
"""))
server_files_exclude = serializers.ListField(required=False, default=[],
child=serializers.CharField(max_length=1024),
help_text=textwrap.dedent("""\
Expand All @@ -845,7 +857,10 @@ class DataSerializer(serializers.ModelSerializer):
""")
)
remote_files = RemoteFileSerializer(many=True, default=[],
help_text="Direct download URLs for files")
help_text=textwrap.dedent("""
Direct download URLs for files.
Must contain all files from job_file_mapping if job_file_mapping is not empty.
"""))
use_cache = serializers.BooleanField(default=False,
help_text=textwrap.dedent("""\
Enable or disable task data chunk caching for the task.
Expand Down
9 changes: 9 additions & 0 deletions cvat/apps/engine/task.py
Original file line number Diff line number Diff line change
Expand Up @@ -623,6 +623,15 @@ def _create_thread(

data['server_files'].extend(additional_files)

# We only need to process the files specified in job_file_mapping
if job_file_mapping is not None:
filtered_files = []
for f in itertools.chain.from_iterable(job_file_mapping):
if f not in data['server_files']:
raise ValidationError(f"Job mapping file {f} is not specified in input files")
filtered_files.append(f)
data['server_files'] = filtered_files

if db_data.storage_method == models.StorageMethodChoice.FILE_SYSTEM or not settings.USE_CACHE:
_download_data_from_cloud_storage(db_data.cloud_storage, data['server_files'], upload_dir)
is_data_in_cloud = False
Expand Down
28 changes: 21 additions & 7 deletions cvat/schema.yml
Original file line number Diff line number Diff line change
Expand Up @@ -6733,24 +6733,32 @@ components:
type: string
format: binary
default: []
description: Uploaded files
description: |2
Uploaded files.
Must contain all files from job_file_mapping if job_file_mapping is not empty.
server_files:
type: array
items:
type: string
minLength: 1
maxLength: 1024
default: []
description: Paths to files from a file share mounted on the server, or
from a cloud storage
description: |2
Paths to files from a file share mounted on the server, or from a cloud storage.
Must contain all files from job_file_mapping if job_file_mapping is not empty.
remote_files:
type: array
items:
type: string
minLength: 1
maxLength: 1024
default: []
description: Direct download URLs for files
description: |2
Direct download URLs for files.
Must contain all files from job_file_mapping if job_file_mapping is not empty.
use_zip_chunks:
type: boolean
default: false
Expand Down Expand Up @@ -6825,12 +6833,18 @@ components:
writeOnly: true
description: |2
Represents a file-to-job mapping. Useful to specify a custom job
configuration during task creation. This option is not compatible with
most other job split-related options. Files in the jobs must not overlap or repeat.
Represents a file-to-job mapping.
Useful to specify a custom job configuration during task creation.
This option is not compatible with most other job split-related options.
Files in the jobs must not overlap or repeat.
Job file mapping files must be a subset of the input files.
If directories are specified in server_files, all files obtained by recursive search
in the specified directories will be used as input files.
In case of missing items in the input files, an error will be raised.
Example:
[
["file1.jpg", "file2.jpg"], # job #1 files
["file3.png"], # job #2 files
["file4.jpg", "file5.png", "file6.bmp"], # job #3 files
Expand Down
2 changes: 1 addition & 1 deletion tests/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
"@cypress/code-coverage": "^3.9.10",
"archiver": "^5.3.0",
"cy-verify-downloads": "^0.0.5",
"cypress": "^12.5.1",
"cypress": "^12.17.4",
"cypress-file-upload": "^5.0.8",
"cypress-localstorage-commands": "^1.7.0",
"cypress-real-events": "^1.6.0",
Expand Down
Loading

0 comments on commit 26693dd

Please sign in to comment.