Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix upload over 2 gb #524

Merged
merged 20 commits into from
Feb 23, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"editor.codeActionsOnSave": {
"source.fixAll.eslint": true
"source.fixAll.eslint": "explicit"
},
"[javascript]": {
"editor.tabSize": 2,
Expand Down
4 changes: 2 additions & 2 deletions src/api.v2/controllers/cellLevelMetaController.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ const bucketNames = require('../../config/bucketNames');
const sqlClient = require('../../sql/sqlClient');
const CellLevelMeta = require('../model/CellLevelMeta');
const CellLevelMetaToExperiment = require('../model/CellLevelMetaToExperiment');
const { getFileUploadUrls } = require('../helpers/s3/signedUrl');
const { createMultipartUpload } = require('../helpers/s3/signedUrl');
const getLogger = require('../../utils/getLogger');
const OK = require('../../utils/responses/OK');

Expand All @@ -28,7 +28,7 @@ const upload = async (req, res) => {
await new CellLevelMeta(trx).create(newCellLevelMetaFile);
await new CellLevelMetaToExperiment(trx).setNewFile(experimentId, cellLevelMetaKey);

uploadUrlParams = await getFileUploadUrls(cellLevelMetaKey, {}, size, bucketName);
uploadUrlParams = await createMultipartUpload(cellLevelMetaKey, {}, bucketName);
uploadUrlParams = { ...uploadUrlParams, fileId: cellLevelMetaKey };
});

Expand Down
12 changes: 5 additions & 7 deletions src/api.v2/controllers/sampleFileController.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ const Sample = require('../model/Sample');
const SampleFile = require('../model/SampleFile');
const bucketNames = require('../../config/bucketNames');

const { getFileUploadUrls, getSampleFileDownloadUrl } = require('../helpers/s3/signedUrl');
const { getSampleFileDownloadUrl, createMultipartUpload } = require('../helpers/s3/signedUrl');
const { OK, MethodNotAllowedError } = require('../../utils/responses');
const getLogger = require('../../utils/getLogger');

Expand All @@ -25,8 +25,6 @@ const createFile = async (req, res) => {
upload_status: 'uploading',
};



await sqlClient.get().transaction(async (trx) => {
await new SampleFile(trx).create(newSampleFile);
await new Sample(trx).setNewFile(sampleId, sampleFileId, sampleFileType);
Expand All @@ -41,7 +39,7 @@ const createFile = async (req, res) => {
const beginUpload = async (req, res) => {
const {
params: { experimentId, sampleFileId },
body: { metadata, size },
body: { metadata },
} = req;

const { uploadStatus } = await new SampleFile().findById(sampleFileId).first();
Expand All @@ -55,9 +53,9 @@ const beginUpload = async (req, res) => {
);
}

logger.log(`Generating multipart upload urls for ${experimentId}, sample file ${sampleFileId}`);
const uploadParams = await getFileUploadUrls(
sampleFileId, metadata, size, bucketNames.SAMPLE_FILES,
logger.log(`Creating multipart upload for ${experimentId}, sample file ${sampleFileId}`);
const uploadParams = await createMultipartUpload(
sampleFileId, metadata, bucketNames.SAMPLE_FILES,
);

res.json(uploadParams);
Expand Down
20 changes: 20 additions & 0 deletions src/api.v2/controllers/uploadController.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
const { getPartUploadSignedUrl } = require('../helpers/s3/signedUrl');

const getLogger = require('../../utils/getLogger');

const logger = getLogger('[uploadController] - ');

const getUploadPartSignedUrl = async (req, res) => {
const { experimentId, uploadId, partNumber } = req.params;
const { bucket, key } = req.query;

logger.log(`Experiment: ${experimentId}, getting part ${partNumber} for upload: ${uploadId}`);

const signedUrl = await getPartUploadSignedUrl(key, bucket, uploadId, partNumber);

logger.log(`Finished getting part ${partNumber} for experiment ${experimentId}, upload: ${uploadId}`);

res.json(signedUrl);
};

module.exports = { getUploadPartSignedUrl };
81 changes: 39 additions & 42 deletions src/api.v2/helpers/s3/signedUrl.js
Original file line number Diff line number Diff line change
Expand Up @@ -22,47 +22,61 @@ const getSignedUrl = async (operation, params) => {
return s3.getSignedUrlPromise(operation, params);
};

const FILE_CHUNK_SIZE = 10000000;

const createMultipartUpload = async (params, size) => {
if (!params.Bucket) throw new Error('Bucket is required');
if (!params.Key) throw new Error('Key is required');
const createMultipartUpload = async (key, metadata, bucket) => {
if (!key) throw new Error('key is required');
if (!bucket) throw new Error('bucket is required');

const S3Config = {
apiVersion: '2006-03-01',
signatureVersion: 'v4',
region: config.awsRegion,
};

const s3 = new AWS.S3(S3Config);

const { UploadId } = await s3.createMultipartUpload(params).promise();

const baseParams = {
...params,
UploadId,
const params = {
Bucket: bucket,
Key: key,
// 1 hour timeout of upload link
Expires: 3600,
};

const promises = [];
const parts = Math.ceil(size / FILE_CHUNK_SIZE);

for (let i = 0; i < parts; i += 1) {
promises.push(
s3.getSignedUrlPromise('uploadPart', {
...baseParams,
PartNumber: i + 1,
}),
);
if (metadata.cellrangerVersion) {
params.Metadata = {
cellranger_version: metadata.cellrangerVersion,
};
}

const signedUrls = await Promise.all(promises);
const s3 = new AWS.S3(S3Config);

// @ts-ignore
const { UploadId } = await s3.createMultipartUpload(params).promise();

return {
signedUrls,
uploadId: UploadId,
bucket,
key,
};
};

const getPartUploadSignedUrl = async (key, bucketName, uploadId, partNumber) => {
const S3Config = {
apiVersion: '2006-03-01',
signatureVersion: 'v4',
region: config.awsRegion,
};

const s3 = new AWS.S3(S3Config);

const params = {
Key: key,
Bucket: bucketName,
// 1 hour timeout of upload link
Expires: 3600,
UploadId: uploadId,
PartNumber: partNumber,
};

return await s3.getSignedUrlPromise('uploadPart', params);
};

const completeMultipartUpload = async (key, parts, uploadId, bucketName) => {
const params = {
Expand All @@ -82,23 +96,6 @@ const completeMultipartUpload = async (key, parts, uploadId, bucketName) => {
await s3.completeMultipartUpload(params).promise();
};

const getFileUploadUrls = async (key, metadata, size, bucketName) => {
const params = {
Bucket: bucketName,
Key: key,
// 1 hour timeout of upload link
Expires: 3600,
};

if (metadata.cellrangerVersion) {
params.Metadata = {
cellranger_version: metadata.cellrangerVersion,
};
}

return await createMultipartUpload(params, size);
};

const fileNameToReturn = {
matrix10x: 'matrix.mtx.gz',
barcodes10x: 'barcodes.tsv.gz',
Expand Down Expand Up @@ -132,9 +129,9 @@ const getSampleFileDownloadUrl = async (experimentId, sampleId, fileType) => {
};

module.exports = {
getFileUploadUrls,
getSampleFileDownloadUrl,
getSignedUrl,
createMultipartUpload,
completeMultipartUpload,
getPartUploadSignedUrl,
};
9 changes: 9 additions & 0 deletions src/api.v2/routes/upload.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
const { getUploadPartSignedUrl } = require('../controllers/uploadController');
const { expressAuthorizationMiddleware } = require('../middlewares/authMiddlewares');

module.exports = {
'upload#getUploadPartSignedUrl': [
expressAuthorizationMiddleware,
(req, res, next) => getUploadPartSignedUrl(req, res).catch(next),
],
};
68 changes: 66 additions & 2 deletions src/specs/api.v2.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1114,8 +1114,8 @@ paths:
properties:
name:
type: string
size:
type: integer
required:
- name
responses:
"200":
description: Success
Expand Down Expand Up @@ -1636,6 +1636,70 @@ paths:
schema:
$ref: "#/components/schemas/HTTPError"

"/experiments/{experimentId}/upload/{uploadId}/part/{partNumber}/signedUrl":
get:
summary: Gets signed url to upload a part
description: Gets the upload url for uploading partNumber of an ongoing multipart upload
operationId: getUploadPartSignedUrl
x-eov-operation-id: upload#getUploadPartSignedUrl
x-eov-operation-handler: routes/upload
parameters:
- name: experimentId
required: true
in: path
description: ID of the experiment.
schema:
type: string
- name: uploadId
in: path
description: UploadId used to identify s3 multipart uploads.
required: true
schema:
type: string
- name: partNumber
in: path
description: PartNumber used to identify which part of the s3 multipart upload this is.
required: true
schema:
type: integer
- name: bucket
description: Bucket receiving the upload
schema:
type: string
in: query
required: true
- name: key
description: Key of the file being uploaded in the bucket
schema:
type: string
in: query
required: true
responses:
"200":
description: Success
content:
application/json:
schema:
type: string
"400":
description: Bad Request
content:
application/json:
schema:
$ref: "#/components/schemas/HTTPError"
"401":
description: The request lacks authentication credentials.
content:
application/json:
schema:
$ref: "#/components/schemas/HTTPError"
"403":
description: Forbidden request for this user.
content:
application/json:
schema:
$ref: "#/components/schemas/HTTPError"

"/experiments/{experimentId}/sampleFiles/{sampleFileId}/beginUpload":
post:
summary: Begins the multipart upload of a sample file
Expand Down
3 changes: 0 additions & 3 deletions src/specs/models/samples-bodies/BeginSampleFileUpload.v2.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@ title: Begin sample file multipart upload
description: 'Data to begin an s3 multipart upload'
type: object
properties:
size:
type: number
metadata:
description: Metadata to append to the s3 object on upload
type: object
Expand All @@ -15,7 +13,6 @@ properties:
- pattern: v3

required:
- size
StefanBabukov marked this conversation as resolved.
Show resolved Hide resolved
- metadata

additionalProperties: false
9 changes: 5 additions & 4 deletions tests/api.v2/controllers/cellLevelMetaController.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,9 @@
const cellLevelMetaController = require('../../../src/api.v2/controllers/cellLevelMetaController');
const CellLevelMeta = require('../../../src/api.v2/model/CellLevelMeta');
const CellLevelMetaToExperiment = require('../../../src/api.v2/model/CellLevelMetaToExperiment');
const { getFileUploadUrls } = require('../../../src/api.v2/helpers/s3/signedUrl');
const { createMultipartUpload } = require('../../../src/api.v2/helpers/s3/signedUrl');
const { OK } = require('../../../src/utils/responses');
const bucketNames = require('../../../src/config/bucketNames');
const { mockSqlClient, mockTrx } = require('../mocks/getMockSqlClient')();

jest.mock('../../../src/api.v2/model/CellLevelMeta');
Expand Down Expand Up @@ -33,13 +34,13 @@ describe('CellLevelMetaController', () => {

const mockUploadUrlParams = { url: 'signedUrl', fileId: 'fileId' };

getFileUploadUrls.mockResolvedValue(mockUploadUrlParams);
createMultipartUpload.mockResolvedValue(mockUploadUrlParams);
await cellLevelMetaController.upload(mockReq, mockRes);

expect(CellLevelMeta).toHaveBeenCalledWith(mockTrx);
expect(CellLevelMetaToExperiment).toHaveBeenCalledWith(mockTrx);
expect(getFileUploadUrls).toHaveBeenCalledWith(
expect.any(String), {}, mockReq.body.size, expect.any(String),
expect(createMultipartUpload).toHaveBeenCalledWith(
expect.any(String), {}, bucketNames.CELL_LEVEL_META,
);

expect(mockRes.json).toHaveBeenCalledWith(
Expand Down
13 changes: 2 additions & 11 deletions tests/api.v2/controllers/sampleFileController.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,12 @@ const mockRes = {
};

describe('sampleFileController', () => {
const mockUploadParams = { signedUrls: ['signedUrl1', 'signedUrl2'], fileId: 'mockfileId' };
const mockUploadParams = { fileId: 'mockfileId', bucket: 'mockBucket', key: 'mockKey' };

beforeEach(async () => {
jest.clearAllMocks();

signedUrl.getFileUploadUrls.mockReturnValue(Promise.resolve(mockUploadParams));
signedUrl.createMultipartUpload.mockReturnValueOnce(Promise.resolve(mockUploadParams));
});

it('createFile works correctly', async () => {
Expand Down Expand Up @@ -104,10 +104,6 @@ describe('sampleFileController', () => {

await sampleFileController.beginUpload(mockReq, mockRes);

expect(signedUrl.getFileUploadUrls).toHaveBeenCalledWith(
sampleFileId, metadata, size, bucketNames.SAMPLE_FILES,
);

expect(mockRes.json).toHaveBeenCalledWith(mockUploadParams);

expect(sampleFileInstance.findById.mock.calls).toMatchSnapshot();
Expand All @@ -128,10 +124,6 @@ describe('sampleFileController', () => {

await sampleFileController.beginUpload(mockReq, mockRes);

expect(signedUrl.getFileUploadUrls).toHaveBeenCalledWith(
sampleFileId, metadata, size, bucketNames.SAMPLE_FILES,
);

expect(mockRes.json).toHaveBeenCalledWith(mockUploadParams);

expect(sampleFileInstance.findById.mock.calls).toMatchSnapshot();
Expand All @@ -157,7 +149,6 @@ describe('sampleFileController', () => {
),
);

expect(signedUrl.getFileUploadUrls).not.toHaveBeenCalled();
expect(mockRes.json).not.toHaveBeenCalled();
});

Expand Down
Loading