Skip to content

Commit

Permalink
Merge pull request hms-dbmi-cellenics#524 from biomage-org/fix-upload…
Browse files Browse the repository at this point in the history
…-over-2-gb

Fix upload over 2 gb
  • Loading branch information
cosa65 authored Feb 23, 2024
2 parents 31d3016 + 4e30768 commit abe83eb
Show file tree
Hide file tree
Showing 16 changed files with 279 additions and 272 deletions.
2 changes: 1 addition & 1 deletion .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"editor.codeActionsOnSave": {
"source.fixAll.eslint": true
"source.fixAll.eslint": "explicit"
},
"[javascript]": {
"editor.tabSize": 2,
Expand Down
4 changes: 2 additions & 2 deletions src/api.v2/controllers/cellLevelMetaController.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ const bucketNames = require('../../config/bucketNames');
const sqlClient = require('../../sql/sqlClient');
const CellLevelMeta = require('../model/CellLevelMeta');
const CellLevelMetaToExperiment = require('../model/CellLevelMetaToExperiment');
const { getFileUploadUrls } = require('../helpers/s3/signedUrl');
const { createMultipartUpload } = require('../helpers/s3/signedUrl');
const getLogger = require('../../utils/getLogger');
const OK = require('../../utils/responses/OK');

Expand All @@ -28,7 +28,7 @@ const upload = async (req, res) => {
await new CellLevelMeta(trx).create(newCellLevelMetaFile);
await new CellLevelMetaToExperiment(trx).setNewFile(experimentId, cellLevelMetaKey);

uploadUrlParams = await getFileUploadUrls(cellLevelMetaKey, {}, size, bucketName);
uploadUrlParams = await createMultipartUpload(cellLevelMetaKey, {}, bucketName);
uploadUrlParams = { ...uploadUrlParams, fileId: cellLevelMetaKey };
});

Expand Down
12 changes: 5 additions & 7 deletions src/api.v2/controllers/sampleFileController.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ const Sample = require('../model/Sample');
const SampleFile = require('../model/SampleFile');
const bucketNames = require('../../config/bucketNames');

const { getFileUploadUrls, getSampleFileDownloadUrl } = require('../helpers/s3/signedUrl');
const { getSampleFileDownloadUrl, createMultipartUpload } = require('../helpers/s3/signedUrl');
const { OK, MethodNotAllowedError } = require('../../utils/responses');
const getLogger = require('../../utils/getLogger');

Expand All @@ -25,8 +25,6 @@ const createFile = async (req, res) => {
upload_status: 'uploading',
};



await sqlClient.get().transaction(async (trx) => {
await new SampleFile(trx).create(newSampleFile);
await new Sample(trx).setNewFile(sampleId, sampleFileId, sampleFileType);
Expand All @@ -41,7 +39,7 @@ const createFile = async (req, res) => {
const beginUpload = async (req, res) => {
const {
params: { experimentId, sampleFileId },
body: { metadata, size },
body: { metadata },
} = req;

const { uploadStatus } = await new SampleFile().findById(sampleFileId).first();
Expand All @@ -55,9 +53,9 @@ const beginUpload = async (req, res) => {
);
}

logger.log(`Generating multipart upload urls for ${experimentId}, sample file ${sampleFileId}`);
const uploadParams = await getFileUploadUrls(
sampleFileId, metadata, size, bucketNames.SAMPLE_FILES,
logger.log(`Creating multipart upload for ${experimentId}, sample file ${sampleFileId}`);
const uploadParams = await createMultipartUpload(
sampleFileId, metadata, bucketNames.SAMPLE_FILES,
);

res.json(uploadParams);
Expand Down
20 changes: 20 additions & 0 deletions src/api.v2/controllers/uploadController.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
const { getPartUploadSignedUrl } = require('../helpers/s3/signedUrl');

const getLogger = require('../../utils/getLogger');

const logger = getLogger('[uploadController] - ');

const getUploadPartSignedUrl = async (req, res) => {
const { experimentId, uploadId, partNumber } = req.params;
const { bucket, key } = req.query;

logger.log(`Experiment: ${experimentId}, getting part ${partNumber} for upload: ${uploadId}`);

const signedUrl = await getPartUploadSignedUrl(key, bucket, uploadId, partNumber);

logger.log(`Finished getting part ${partNumber} for experiment ${experimentId}, upload: ${uploadId}`);

res.json(signedUrl);
};

module.exports = { getUploadPartSignedUrl };
81 changes: 39 additions & 42 deletions src/api.v2/helpers/s3/signedUrl.js
Original file line number Diff line number Diff line change
Expand Up @@ -22,47 +22,61 @@ const getSignedUrl = async (operation, params) => {
return s3.getSignedUrlPromise(operation, params);
};

const FILE_CHUNK_SIZE = 10000000;

const createMultipartUpload = async (params, size) => {
if (!params.Bucket) throw new Error('Bucket is required');
if (!params.Key) throw new Error('Key is required');
const createMultipartUpload = async (key, metadata, bucket) => {
if (!key) throw new Error('key is required');
if (!bucket) throw new Error('bucket is required');

const S3Config = {
apiVersion: '2006-03-01',
signatureVersion: 'v4',
region: config.awsRegion,
};

const s3 = new AWS.S3(S3Config);

const { UploadId } = await s3.createMultipartUpload(params).promise();

const baseParams = {
...params,
UploadId,
const params = {
Bucket: bucket,
Key: key,
// 1 hour timeout of upload link
Expires: 3600,
};

const promises = [];
const parts = Math.ceil(size / FILE_CHUNK_SIZE);

for (let i = 0; i < parts; i += 1) {
promises.push(
s3.getSignedUrlPromise('uploadPart', {
...baseParams,
PartNumber: i + 1,
}),
);
if (metadata.cellrangerVersion) {
params.Metadata = {
cellranger_version: metadata.cellrangerVersion,
};
}

const signedUrls = await Promise.all(promises);
const s3 = new AWS.S3(S3Config);

// @ts-ignore
const { UploadId } = await s3.createMultipartUpload(params).promise();

return {
signedUrls,
uploadId: UploadId,
bucket,
key,
};
};

const getPartUploadSignedUrl = async (key, bucketName, uploadId, partNumber) => {
const S3Config = {
apiVersion: '2006-03-01',
signatureVersion: 'v4',
region: config.awsRegion,
};

const s3 = new AWS.S3(S3Config);

const params = {
Key: key,
Bucket: bucketName,
// 1 hour timeout of upload link
Expires: 3600,
UploadId: uploadId,
PartNumber: partNumber,
};

return await s3.getSignedUrlPromise('uploadPart', params);
};

const completeMultipartUpload = async (key, parts, uploadId, bucketName) => {
const params = {
Expand All @@ -82,23 +96,6 @@ const completeMultipartUpload = async (key, parts, uploadId, bucketName) => {
await s3.completeMultipartUpload(params).promise();
};

const getFileUploadUrls = async (key, metadata, size, bucketName) => {
const params = {
Bucket: bucketName,
Key: key,
// 1 hour timeout of upload link
Expires: 3600,
};

if (metadata.cellrangerVersion) {
params.Metadata = {
cellranger_version: metadata.cellrangerVersion,
};
}

return await createMultipartUpload(params, size);
};

const fileNameToReturn = {
matrix10x: 'matrix.mtx.gz',
barcodes10x: 'barcodes.tsv.gz',
Expand Down Expand Up @@ -132,9 +129,9 @@ const getSampleFileDownloadUrl = async (experimentId, sampleId, fileType) => {
};

module.exports = {
getFileUploadUrls,
getSampleFileDownloadUrl,
getSignedUrl,
createMultipartUpload,
completeMultipartUpload,
getPartUploadSignedUrl,
};
9 changes: 9 additions & 0 deletions src/api.v2/routes/upload.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
const { getUploadPartSignedUrl } = require('../controllers/uploadController');
const { expressAuthorizationMiddleware } = require('../middlewares/authMiddlewares');

module.exports = {
'upload#getUploadPartSignedUrl': [
expressAuthorizationMiddleware,
(req, res, next) => getUploadPartSignedUrl(req, res).catch(next),
],
};
68 changes: 66 additions & 2 deletions src/specs/api.v2.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1114,8 +1114,8 @@ paths:
properties:
name:
type: string
size:
type: integer
required:
- name
responses:
"200":
description: Success
Expand Down Expand Up @@ -1636,6 +1636,70 @@ paths:
schema:
$ref: "#/components/schemas/HTTPError"

"/experiments/{experimentId}/upload/{uploadId}/part/{partNumber}/signedUrl":
get:
summary: Gets signed url to upload a part
description: Gets the upload url for uploading partNumber of an ongoing multipart upload
operationId: getUploadPartSignedUrl
x-eov-operation-id: upload#getUploadPartSignedUrl
x-eov-operation-handler: routes/upload
parameters:
- name: experimentId
required: true
in: path
description: ID of the experiment.
schema:
type: string
- name: uploadId
in: path
description: UploadId used to identify s3 multipart uploads.
required: true
schema:
type: string
- name: partNumber
in: path
description: PartNumber used to identify which part of the s3 multipart upload this is.
required: true
schema:
type: integer
- name: bucket
description: Bucket receiving the upload
schema:
type: string
in: query
required: true
- name: key
description: Key of the file being uploaded in the bucket
schema:
type: string
in: query
required: true
responses:
"200":
description: Success
content:
application/json:
schema:
type: string
"400":
description: Bad Request
content:
application/json:
schema:
$ref: "#/components/schemas/HTTPError"
"401":
description: The request lacks authentication credentials.
content:
application/json:
schema:
$ref: "#/components/schemas/HTTPError"
"403":
description: Forbidden request for this user.
content:
application/json:
schema:
$ref: "#/components/schemas/HTTPError"

"/experiments/{experimentId}/sampleFiles/{sampleFileId}/beginUpload":
post:
summary: Begins the multipart upload of a sample file
Expand Down
3 changes: 0 additions & 3 deletions src/specs/models/samples-bodies/BeginSampleFileUpload.v2.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,6 @@ title: Begin sample file multipart upload
description: 'Data to begin an s3 multipart upload'
type: object
properties:
size:
type: number
metadata:
description: Metadata to append to the s3 object on upload
type: object
Expand All @@ -15,7 +13,6 @@ properties:
- pattern: v3

required:
- size
- metadata

additionalProperties: false
9 changes: 5 additions & 4 deletions tests/api.v2/controllers/cellLevelMetaController.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,9 @@
const cellLevelMetaController = require('../../../src/api.v2/controllers/cellLevelMetaController');
const CellLevelMeta = require('../../../src/api.v2/model/CellLevelMeta');
const CellLevelMetaToExperiment = require('../../../src/api.v2/model/CellLevelMetaToExperiment');
const { getFileUploadUrls } = require('../../../src/api.v2/helpers/s3/signedUrl');
const { createMultipartUpload } = require('../../../src/api.v2/helpers/s3/signedUrl');
const { OK } = require('../../../src/utils/responses');
const bucketNames = require('../../../src/config/bucketNames');
const { mockSqlClient, mockTrx } = require('../mocks/getMockSqlClient')();

jest.mock('../../../src/api.v2/model/CellLevelMeta');
Expand Down Expand Up @@ -33,13 +34,13 @@ describe('CellLevelMetaController', () => {

const mockUploadUrlParams = { url: 'signedUrl', fileId: 'fileId' };

getFileUploadUrls.mockResolvedValue(mockUploadUrlParams);
createMultipartUpload.mockResolvedValue(mockUploadUrlParams);
await cellLevelMetaController.upload(mockReq, mockRes);

expect(CellLevelMeta).toHaveBeenCalledWith(mockTrx);
expect(CellLevelMetaToExperiment).toHaveBeenCalledWith(mockTrx);
expect(getFileUploadUrls).toHaveBeenCalledWith(
expect.any(String), {}, mockReq.body.size, expect.any(String),
expect(createMultipartUpload).toHaveBeenCalledWith(
expect.any(String), {}, bucketNames.CELL_LEVEL_META,
);

expect(mockRes.json).toHaveBeenCalledWith(
Expand Down
13 changes: 2 additions & 11 deletions tests/api.v2/controllers/sampleFileController.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,12 @@ const mockRes = {
};

describe('sampleFileController', () => {
const mockUploadParams = { signedUrls: ['signedUrl1', 'signedUrl2'], fileId: 'mockfileId' };
const mockUploadParams = { fileId: 'mockfileId', bucket: 'mockBucket', key: 'mockKey' };

beforeEach(async () => {
jest.clearAllMocks();

signedUrl.getFileUploadUrls.mockReturnValue(Promise.resolve(mockUploadParams));
signedUrl.createMultipartUpload.mockReturnValueOnce(Promise.resolve(mockUploadParams));
});

it('createFile works correctly', async () => {
Expand Down Expand Up @@ -104,10 +104,6 @@ describe('sampleFileController', () => {

await sampleFileController.beginUpload(mockReq, mockRes);

expect(signedUrl.getFileUploadUrls).toHaveBeenCalledWith(
sampleFileId, metadata, size, bucketNames.SAMPLE_FILES,
);

expect(mockRes.json).toHaveBeenCalledWith(mockUploadParams);

expect(sampleFileInstance.findById.mock.calls).toMatchSnapshot();
Expand All @@ -128,10 +124,6 @@ describe('sampleFileController', () => {

await sampleFileController.beginUpload(mockReq, mockRes);

expect(signedUrl.getFileUploadUrls).toHaveBeenCalledWith(
sampleFileId, metadata, size, bucketNames.SAMPLE_FILES,
);

expect(mockRes.json).toHaveBeenCalledWith(mockUploadParams);

expect(sampleFileInstance.findById.mock.calls).toMatchSnapshot();
Expand All @@ -157,7 +149,6 @@ describe('sampleFileController', () => {
),
);

expect(signedUrl.getFileUploadUrls).not.toHaveBeenCalled();
expect(mockRes.json).not.toHaveBeenCalled();
});

Expand Down
Loading

0 comments on commit abe83eb

Please sign in to comment.