Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Delete files in s3 for deleted samples #146

Merged
merged 10 commits into from
Jun 9, 2021
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 11 additions & 8 deletions src/api/general-services/pipeline-manage/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,8 @@ const createQCPipeline = async (experimentId, processingConfigUpdates) => {

const { samples } = await samplesService.getSamplesByExperimentId(experimentId);

const sampleIds = Object.keys(samples);

if (processingConfigUpdates) {
processingConfigUpdates.forEach(({ name, body }) => {
if (!processingConfig[name]) {
Expand All @@ -182,14 +184,14 @@ const createQCPipeline = async (experimentId, processingConfigUpdates) => {
const mergedProcessingConfig = _.cloneDeepWith(processingConfig, (o) => {
if (_.isObject(o) && !o.dataIntegration && !o.embeddingSettings && typeof o.enabled === 'boolean') {
// Find which samples have sample-specific configurations.
const sampleConfigs = _.intersection(Object.keys(o), samples.ids);
const sampleConfigs = _.intersection(Object.keys(o), sampleIds);

// Get an object that is only the "raw" configuration.
const rawConfig = _.omit(o, sampleConfigs);

const result = {};

samples.ids.forEach((sample) => {
sampleIds.forEach((sample) => {
result[sample] = _.merge({}, rawConfig, o[sample]);
});

Expand Down Expand Up @@ -219,7 +221,7 @@ const createQCPipeline = async (experimentId, processingConfigUpdates) => {
logger.log(`State machine with ARN ${stateMachineArn} created, launching it...`);

const execInput = {
samples: samples.ids.map((sampleUuid, index) => ({ sampleUuid, index })),
samples: sampleIds.map((sampleUuid, index) => ({ sampleUuid, index })),
};

const executionArn = await executeStateMachine(stateMachineArn, execInput);
Expand All @@ -237,24 +239,25 @@ const createGem2SPipeline = async (experimentId) => {
const { metadataKeys } = await projectService.getProject(experiment.projectId);

const defaultMetadataValue = 'N.A.';
const samplesEntries = Object.entries(samples);

const taskParams = {
projectId: experiment.projectId,
experimentName: experiment.experimentName,
organism: experiment.meta.organism,
input: { type: experiment.meta.type },
sampleIds: samples.ids,
sampleNames: samples.ids.map((id) => samples[id].name),
sampleIds: samplesEntries.map(([sampleId]) => sampleId),
aerlaut marked this conversation as resolved.
Show resolved Hide resolved
sampleNames: samplesEntries.map(([, sample]) => sample.name),
};

if (metadataKeys.length) {
taskParams.metadata = metadataKeys.reduce((acc, key) => {
// Make sure the key does not contain '-' as it will cause failure in GEM2S
const sanitizedKey = key.replace(/-+/g, '_');

acc[sanitizedKey] = samples.ids.map(
// Fetch using unsanitized key as it is the key used to store metadata in sample
(sampleUuid) => samples[sampleUuid].metadata[key] || defaultMetadataValue,
// Fetch using unsanitized key as it is the key used to store metadata in sample
acc[sanitizedKey] = samplesEntries.map(
([, sample]) => sample.metadata[key] || defaultMetadataValue,
);
return acc;
}, {});
Expand Down
3 changes: 1 addition & 2 deletions src/api/route-services/__mocks__/samples.js
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
const { OK } = require('../../../utils/responses');

const mockSamples = {
ids: ['sample-1'],
'sample-1': {
name: 'sample-1',
projectUuid: 'project-1',
Expand Down Expand Up @@ -75,7 +74,7 @@ const mock = jest.fn().mockImplementation(() => ({
getSamples: mockGetSamples,
getSamplesByExperimentId: mockGetByExperimentId,
updateSamples: mockUpdateSamples,
deleteSamples: mockDeleteSamples,
deleteSamplesEntry: mockDeleteSamples,
}));

module.exports = mock;
4 changes: 2 additions & 2 deletions src/api/route-services/projects.js
Original file line number Diff line number Diff line change
Expand Up @@ -191,12 +191,12 @@ class ProjectsService {
const dynamodb = createDynamoDbInstance();

try {
const { experiments } = await this.getProject(projectUuid);
const { experiments, samples: sampleUuids } = await this.getProject(projectUuid);

if (experiments.length > 0) {
const deletePromises = experiments.reduce((acc, experimentId) => {
acc.push(experimentService.deleteExperiment(experimentId));
acc.push(samplesService.deleteSamples(projectUuid, experimentId));
acc.push(samplesService.deleteSamplesEntry(projectUuid, experimentId, sampleUuids));
return acc;
}, []);

Expand Down
94 changes: 86 additions & 8 deletions src/api/route-services/samples.js
Original file line number Diff line number Diff line change
@@ -1,15 +1,19 @@
const _ = require('lodash');

const { NotFoundError, OK } = require('../../utils/responses');

const config = require('../../config');
const {
createDynamoDbInstance, convertToJsObject, convertToDynamoDbRecord,
} = require('../../utils/dynamoDb');
const logger = require('../../utils/logging');
const AWS = require('../../utils/requireAWS');

const logger = require('../../utils/logging');

class SamplesService {
constructor() {
this.tableName = `samples-${config.clusterEnv}`;
this.sampleFilesBucketName = `biomage-originals-${config.clusterEnv}`;
}

async getSamples(projectUuid) {
Expand All @@ -33,7 +37,14 @@ class SamplesService {
throw new NotFoundError('Samples not found!');
}

return items.map((item) => convertToJsObject(item));
return items.map((item) => {
const prettyItem = convertToJsObject(item);

// Remove ids property from old sample entries that still have it
delete prettyItem.samples.ids;

return prettyItem;
});
}


Expand All @@ -54,6 +65,9 @@ class SamplesService {

if (response.Item) {
const prettyResponse = convertToJsObject(response.Item);

// Remove ids property from old sample entries that still have it
delete prettyResponse.samples.ids;
return prettyResponse;
}

Expand All @@ -72,7 +86,6 @@ class SamplesService {
':projectUuid': projectUuid,
});


// Update samples
const params = {
TableName: this.tableName,
Expand All @@ -92,29 +105,94 @@ class SamplesService {
}
}

async deleteSamples(projectUuid, experimentId) {
logger.log(`Deleting sample for project ${projectUuid} and expId ${experimentId}`);
async deleteSamplesFromS3(projectUuid, sampleUuids) {
const s3 = new AWS.S3();

const sampleObjectsToDelete = sampleUuids.map((sampleUuid) => (
[
{ Key: `${projectUuid}/${sampleUuid}/barcodes.tsv.gz` },
{ Key: `${projectUuid}/${sampleUuid}/features.tsv.gz` },
{ Key: `${projectUuid}/${sampleUuid}/matrix.mtx.gz` },
]
));
cosa65 marked this conversation as resolved.
Show resolved Hide resolved

const s3Params = {
Bucket: this.sampleFilesBucketName,
Delete: {
Objects: _.flatten(sampleObjectsToDelete),
Quiet: false,
},
};

const result = await s3.deleteObjects(s3Params).promise();

if (result.Errors.length) {
throw Error(`Delete S3 object errors: ${JSON.stringify(result.Errors)}`);
}
}

async removeSamples(projectUuid, experimentId, sampleUuids) {
logger.log(`Removing samples in an entry for project ${projectUuid} and expId ${experimentId}`);

const marshalledKey = convertToDynamoDbRecord({
experimentId,
});

const removeSamplesList = sampleUuids.map((sampleUuid, index) => `samples.#val${index}`);
const expressionAttributeNames = sampleUuids.reduce((acc, sampleId, index) => {
acc[`#val${index}`] = sampleId;
return acc;
}, {});

const params = {
TableName: this.tableName,
Key: marshalledKey,
UpdateExpression: `REMOVE ${removeSamplesList.join(', ')}`,
ExpressionAttributeNames: expressionAttributeNames,
ReturnValues: 'ALL_NEW',
};

const promises = [
createDynamoDbInstance().updateItem(params).promise(),
this.deleteSamplesFromS3(projectUuid, sampleUuids),
];

await Promise.all(promises);

return OK();
}

async deleteSamplesEntry(projectUuid, experimentId, sampleUuids) {
logger.log(`Deleting samples entry for project ${projectUuid} and expId ${experimentId}`);

const marshalledKey = convertToDynamoDbRecord({
experimentId,
});

const dynamodbParams = {
TableName: this.tableName,
Key: marshalledKey,
};

const dynamodb = createDynamoDbInstance();

const promises = [];

try {
await dynamodb.deleteItem(params).send();
return OK();
promises.push(await dynamodb.deleteItem(dynamodbParams).send());
} catch (e) {
if (e.statusCode === 404) throw NotFoundError('Project not found');
throw e;
}

if (sampleUuids.length) {
promises.push(await this.deleteSamplesFromS3(projectUuid, sampleUuids));
}

await Promise.all(promises);

return OK();
}
}


module.exports = SamplesService;
18 changes: 18 additions & 0 deletions src/api/route-services/utils.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
// Updates each sub attribute separately for
// one particular attribute (of type object) of a dynamodb entry

const removePropertiesFromObject = async (
entryKey, attributeName, propertyToRemove,
tableName, dynamodb,
) => {
const params = {
TableName: tableName,
Key: entryKey,
UpdateExpression: `REMOVE ${attributeName}.${propertyToRemove}`,
ReturnValues: 'ALL_NEW',
};

await dynamodb.updateItem(params).promise();
};

module.exports = { removePropertiesFromObject };
8 changes: 8 additions & 0 deletions src/api/routes/samples.js
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,16 @@ module.exports = {
},
'samples#update': (req, res, next) => {
const { body, params: { projectUuid } } = req;

samplesService.updateSamples(projectUuid, body)
.then((data) => res.json(data))
.catch(next);
},
'samples#remove': (req, res, next) => {
const { body: { ids }, params: { projectUuid, experimentId } } = req;

samplesService.removeSamples(projectUuid, experimentId, ids)
.then((data) => res.json(data))
.catch(next);
},
};
48 changes: 40 additions & 8 deletions src/specs/api.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -806,6 +806,7 @@ paths:
type: string
get:
summary: Get samples under a project
description: Get all samples under a project
operationId: getProjectSamples
x-eov-operation-id: samples#get
x-eov-operation-handler: routes/samples
Expand All @@ -831,7 +832,6 @@ paths:
application/json:
schema:
$ref: ./models/HTTPError.v1.yaml
description: Get all samples under a project
'/projects/{projectUuid}/{experimentId}/samples':
parameters:
- name: projectUuid
Expand All @@ -845,8 +845,8 @@ paths:
schema:
type: string
put:
summary: Update sample
operationId: updateSample
summary: Update samples
operationId: updateSamples
x-eov-operation-id: samples#update
x-eov-operation-handler: routes/samples
responses:
Expand Down Expand Up @@ -874,19 +874,51 @@ paths:
application/json:
schema:
$ref: ./models/HTTPError.v1.yaml
description: Updates a sample
description: Updates a samples entry
requestBody:
content:
application/json:
schema:
type: object
properties:
projectUuid:
type: string
experimentId:
type: string
samples:
$ref: ./models/api-body-schemas/Samples.v1.yaml
delete:
summary: Remove samples
operationId: removeSamples
x-eov-operation-id: samples#remove
x-eov-operation-handler: routes/samples
responses:
'200':
description: OK
content:
application/json:
schema:
$ref: ./models/HTTPSuccess.v1.yaml
'400':
description: Bad Request
content:
application/json:
schema:
$ref: ./models/HTTPError.v1.yaml
'404':
description: Not Found
content:
application/json:
schema:
$ref: ./models/HTTPError.v1.yaml
'415':
description: Unsupported Media Type
content:
application/json:
schema:
$ref: ./models/HTTPError.v1.yaml
description: Removes samples from an entry
requestBody:
content:
application/json:
schema:
$ref: ./models/samples-bodies/SamplesRemoveBody.v1.yaml
'/projects':
get:
summary: 'Get the list of projects'
Expand Down
Loading