Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Tus for task annotations import #4327

Merged
merged 16 commits into from
Mar 11, 2022
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Basic page with jobs list, basic filtration to this list (<https://github.com/openvinotoolkit/cvat/pull/4258>)
- Added OpenCV.js TrackerMIL as tracking tool (<https://github.com/openvinotoolkit/cvat/pull/4200>)
- Ability to continue working from the latest frame where an annotator was before (<https://github.com/openvinotoolkit/cvat/pull/4297>)

- Task annotations importing via chunk uploads (<https://github.com/openvinotoolkit/cvat/pull/4327>)

### Changed
- Users don't have access to a task object anymore if they are assigneed only on some jobs of the task (<https://github.com/openvinotoolkit/cvat/pull/3788>)
Expand Down
4 changes: 2 additions & 2 deletions cvat-core/package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion cvat-core/package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "cvat-core",
"version": "4.2.0",
"version": "4.2.1",
"description": "Part of Computer Vision Tool which presents an interface for client-side integration",
"main": "babel.config.js",
"scripts": {
Expand Down
158 changes: 97 additions & 61 deletions cvat-core/src/server-proxy.js
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,49 @@
});
}

async function chunkUpload(file, uploadConfig) {
const params = enableOrganization();
const {
endpoint, chunkSize, totalSize, onUpdate,
} = uploadConfig;
let { totalSentSize } = uploadConfig;
return new Promise((resolve, reject) => {
const upload = new tus.Upload(file, {
endpoint,
metadata: {
filename: file.name,
filetype: file.type,
},
headers: {
Authorization: Axios.defaults.headers.common.Authorization,
},
chunkSize,
retryDelays: null,
onError(error) {
reject(error);
},
onBeforeRequest(req) {
const xhr = req.getUnderlyingObject();
const { org } = params;
req.setHeader('X-Organization', org);
xhr.withCredentials = true;
},
onProgress(bytesUploaded) {
if (Number.isInteger(totalSentSize) && Number.isInteger(totalSize)) {
const currentUploadedSize = totalSentSize + bytesUploaded;
const percentage = currentUploadedSize / totalSize;
if (onUpdate) onUpdate(percentage);
klakhov marked this conversation as resolved.
Show resolved Hide resolved
}
},
onSuccess() {
if (totalSentSize) totalSentSize += file.size;
resolve(totalSentSize);
},
});
upload.start();
});
}

function generateError(errorData) {
if (errorData.response) {
const message = `${errorData.message}. ${JSON.stringify(errorData.response.data) || ''}.`;
Expand Down Expand Up @@ -816,42 +859,6 @@

onUpdate('The data are being uploaded to the server..', null);

async function chunkUpload(taskId, file) {
return new Promise((resolve, reject) => {
const upload = new tus.Upload(file, {
endpoint: `${origin}${backendAPI}/tasks/${taskId}/data/`,
metadata: {
filename: file.name,
filetype: file.type,
},
headers: {
Authorization: `Token ${store.get('token')}`,
},
chunkSize,
retryDelays: null,
onError(error) {
reject(error);
},
onBeforeRequest(req) {
const xhr = req.getUnderlyingObject();
const { org } = params;
req.setHeader('X-Organization', org);
xhr.withCredentials = true;
},
onProgress(bytesUploaded) {
const currentUploadedSize = totalSentSize + bytesUploaded;
const percentage = currentUploadedSize / totalSize;
onUpdate('The data are being uploaded to the server', percentage);
},
onSuccess() {
totalSentSize += file.size;
resolve();
},
});
upload.start();
});
}

async function bulkUpload(taskId, files) {
const fileBulks = files.reduce((fileGroups, file) => {
const lastBulk = fileGroups[fileGroups.length - 1];
Expand Down Expand Up @@ -891,8 +898,17 @@
proxy: config.proxy,
headers: { 'Upload-Start': true },
});
const uploadConfig = {
endpoint: `${origin}${backendAPI}/tasks/${response.data.id}/data/`,
onUpdate: (percentage) => {
onUpdate('The data are being uploaded to the server', percentage);
},
chunkSize,
totalSize,
totalSentSize,
};
for (const file of chunkFiles) {
await chunkUpload(response.data.id, file);
uploadConfig.totalSentSize += await chunkUpload(file, uploadConfig);
}
if (bulkFiles.length > 0) {
await bulkUpload(response.data.id, bulkFiles);
Expand Down Expand Up @@ -1219,34 +1235,54 @@
const params = {
...enableOrganization(),
format,
filename: file.name,
};
let annotationData = new FormData();
annotationData.append('annotation_file', file);
const chunkSize = config.uploadChunkSize * 1024 * 1024;

return new Promise((resolve, reject) => {
async function request() {
try {
const response = await Axios.put(
`${backendAPI}/${session}s/${id}/annotations`,
annotationData,
{
params,
proxy: config.proxy,
},
);
if (response.status === 202) {
annotationData = new FormData();
setTimeout(request, 3000);
} else {
resolve();
const uploadConfig = {
chunkSize,
endpoint: `${origin}${backendAPI}/${session}s/${id}/annotations/`,
};
try {
await Axios.post(`${backendAPI}/${session}s/${id}/annotations`,
new FormData(), {
params,
proxy: config.proxy,
headers: { 'Upload-Start': true },
});
await chunkUpload(file, uploadConfig);
await Axios.post(`${backendAPI}/${session}s/${id}/annotations`,
new FormData(), {
params,
proxy: config.proxy,
headers: { 'Upload-Finish': true },
});
return new Promise((resolve, reject) => {
async function requestStatus() {
try {
const response = await Axios.put(
`${backendAPI}/${session}s/${id}/annotations`,
new FormData(),
{
params,
proxy: config.proxy,
},
);
if (response.status === 202) {
setTimeout(requestStatus, 3000);
} else {
resolve();
}
} catch (errorData) {
reject(generateError(errorData));
}
} catch (errorData) {
reject(generateError(errorData));
}
}

setTimeout(request);
});
setTimeout(requestStatus);
});
} catch (errorData) {
generateError(errorData);
return null;
}
}

// Session is 'task' or 'job'
Expand Down
64 changes: 30 additions & 34 deletions cvat/apps/engine/mixins.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ def __init__(self, file_id, upload_dir):
self.offset = cache.get("tus-uploads/{}/offset".format(file_id))

def init_file(self):
os.makedirs(self.upload_dir, exist_ok=True)
file_path = os.path.join(self.upload_dir, self.file_id)
with open(file_path, 'wb') as file:
file.seek(self.file_size - 1)
Expand Down Expand Up @@ -101,6 +102,7 @@ class UploadMixin(object):
'Cache-Control': 'no-store'
}
_file_id_regex = r'(?P<file_id>\b[0-9a-f]{8}\b-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-\b[0-9a-f]{12}\b)'
upload_url = r'((data/)|(annotations/))'

def _tus_response(self, status, data=None, extra_headers=None):
response = Response(data, status)
Expand All @@ -126,33 +128,36 @@ def _get_metadata(self, request):
metadata[splited_metadata[0]] = ""
return metadata

def upload_data(self, request):
def _get_chunk_data_type(self, request):
path_elements = request.path.split('/')
# When we use TUS chunk upload we consider path structure as <protocol>://<domain>/.../<data type>/<file id>
chunk_data_type = path_elements[-2]
return chunk_data_type

def upload_data(self, request, data_type):
tus_request = request.headers.get('Upload-Length', None) is not None or request.method == 'OPTIONS'
bulk_file_upload = request.headers.get('Upload-Multiple', None) is not None
start_upload = request.headers.get('Upload-Start', None) is not None
finish_upload = request.headers.get('Upload-Finish', None) is not None
one_request_upload = start_upload and finish_upload
if one_request_upload or finish_upload:
return self.upload_finished(request)
return self.upload_finished(request, data_type)
elif start_upload:
return Response(status=status.HTTP_202_ACCEPTED)
elif tus_request:
return self.init_tus_upload(request)
return self.init_tus_upload(request, data_type)
elif bulk_file_upload:
return self.append(request)
return self.append(request, data_type)
else: # backward compatibility case - no upload headers were found
return self.upload_finished(request)
return self.upload_finished(request, data_type)

def init_tus_upload(self, request):
def init_tus_upload(self, request, data_type):
if request.method == 'OPTIONS':
return self._tus_response(status=status.HTTP_204)
else:
if not self.can_upload():
return self._tus_response(data='Adding more data is not allowed',
status=status.HTTP_400_BAD_REQUEST)
metadata = self._get_metadata(request)
filename = metadata.get('filename', '')
if not self.validate_filename(filename):
if not self.validate_filename(filename, data_type):
return self._tus_response(status=status.HTTP_400_BAD_REQUEST,
data="File name {} is not allowed".format(filename))

Expand All @@ -161,7 +166,7 @@ def init_tus_upload(self, request):
if message_id:
metadata["message_id"] = base64.b64decode(message_id)

file_exists = os.path.lexists(os.path.join(self.get_upload_dir(), filename))
file_exists = os.path.lexists(os.path.join(self.get_upload_dir(data_type), filename))
if file_exists:
return self._tus_response(status=status.HTTP_409_CONFLICT,
data="File with same name already exists")
Expand All @@ -171,24 +176,25 @@ def init_tus_upload(self, request):
return self._tus_response(status=status.HTTP_413_REQUEST_ENTITY_TOO_LARGE,
data="File size exceeds max limit of {} bytes".format(self._tus_max_file_size))

tus_file = TusFile.create_file(metadata, file_size, self.get_upload_dir())
tus_file = TusFile.create_file(metadata, file_size, self.get_upload_dir(data_type))

return self._tus_response(
status=status.HTTP_201_CREATED,
extra_headers={'Location': '{}{}'.format(request.build_absolute_uri(), tus_file.file_id)})

@action(detail=True, methods=['HEAD', 'PATCH'], url_path=r'data/'+_file_id_regex)
@action(detail=True, methods=['HEAD', 'PATCH'], url_path=upload_url+_file_id_regex)
def append_tus_chunk(self, request, pk, file_id):
self.get_object() # call check_object_permissions as well
self._object = self.get_object()
data_type = self._get_chunk_data_type(request)
if request.method == 'HEAD':
tus_file = TusFile.get_tusfile(str(file_id), self.get_upload_dir())
tus_file = TusFile.get_tusfile(str(file_id), self.get_upload_dir(data_type))
if tus_file:
return self._tus_response(status=status.HTTP_200_OK, extra_headers={
'Upload-Offset': tus_file.offset,
'Upload-Length': tus_file.file_size})
return self._tus_response(status=status.HTTP_404_NOT_FOUND)
else:
tus_file = TusFile.get_tusfile(str(file_id), self.get_upload_dir())
tus_file = TusFile.get_tusfile(str(file_id), self.get_upload_dir(data_type))
chunk = TusChunk(request)

if chunk.offset != tus_file.offset:
Expand All @@ -206,39 +212,29 @@ def append_tus_chunk(self, request, pk, file_id):
return self._tus_response(status=status.HTTP_204_NO_CONTENT,
extra_headers={'Upload-Offset': tus_file.offset})

def validate_filename(self, filename):
upload_dir = self.get_upload_dir()
def validate_filename(self, filename, data_type):
upload_dir = self.get_upload_dir(data_type)
file_path = os.path.join(upload_dir, filename)
return os.path.commonprefix((os.path.realpath(file_path), upload_dir)) == upload_dir

def can_upload(self):
db_model = self.get_object()
model_data = db_model.data
return model_data.size == 0

def get_upload_dir(self):
db_model = self.get_object()
return db_model.data.get_upload_dirname()
def get_upload_dir(self, data_type):
return self._object.data.get_upload_dirname()

def get_request_client_files(self, request):
db_model = self.get_object()
serializer = DataSerializer(db_model, data=request.data)
serializer = DataSerializer(self._object, data=request.data)
serializer.is_valid(raise_exception=True)
data = {k: v for k, v in serializer.validated_data.items()}
return data.get('client_files', None);

def append(self, request):
if not self.can_upload():
return Response(data='Adding more data is not allowed',
status=status.HTTP_400_BAD_REQUEST)
def append(self, request, data_type):
client_files = self.get_request_client_files(request)
if client_files:
upload_dir = self.get_upload_dir()
upload_dir = self.get_upload_dir(data_type)
for client_file in client_files:
with open(os.path.join(upload_dir, client_file['file'].name), 'ab+') as destination:
destination.write(client_file['file'].read())
return Response(status=status.HTTP_200_OK)

# override this to do stuff after upload
def upload_finished(self, request):
def upload_finished(self, request, data_type):
raise NotImplementedError('You need to implement upload_finished in UploadMixin')
3 changes: 3 additions & 0 deletions cvat/apps/engine/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -304,6 +304,9 @@ def get_log_path(self):
def get_task_artifacts_dirname(self):
return os.path.join(self.get_task_dirname(), 'artifacts')

def get_tmp_dirname(self):
return os.path.join(self.get_task_dirname(), "tmp")

def __str__(self):
return self.name

Expand Down
Loading