Skip to content

Commit

Permalink
ci: fix cleanup script throttling (aws-amplify#11716)
Browse files Browse the repository at this point in the history
* ci: update cleanup script to reduce throttling

* chore: add extract api file

* ci: fix condition

* chore: cleanup

* chore: add back logs

* chore: move cleanup to start to avoid deleting apps while running

* chore: update cleanup script

* chore: revert build change

* chore: fix md file

* chore: cleanup

* chore: address pr feedback

* chore: comments

* chore: refactor

* chore: fix lint issues
  • Loading branch information
awsluja authored and Amplifiyer committed Jan 13, 2023
1 parent ef7c15a commit b34f2ea
Showing 1 changed file with 113 additions and 45 deletions.
158 changes: 113 additions & 45 deletions packages/amplify-e2e-tests/src/cleanup-e2e-resources.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ import * as aws from 'aws-sdk';
import _ from 'lodash';
import fs from 'fs-extra';
import path from 'path';
import { deleteS3Bucket } from '@aws-amplify/amplify-e2e-core';
import { deleteS3Bucket, sleep } from '@aws-amplify/amplify-e2e-core';

// Ensure to update scripts/split-e2e-tests.ts is also updated this gets updated
const AWS_REGIONS_TO_RUN_TESTS = [
Expand All @@ -21,6 +21,19 @@ const AWS_REGIONS_TO_RUN_TESTS = [
'ap-southeast-2',
];

// Limits are efforced per region
// we collect resources from each region & then delete as an entire batch
const DELETE_LIMITS = {
PER_REGION: {
OTHER: 25,
CFN_STACK: 50,
},
PER_BATCH: {
OTHER: 50,
CFN_STACK: 100,
}
}

const reportPath = path.normalize(path.join(__dirname, '..', 'amplify-e2e-reports', 'stale-resources.json'));

const MULTI_JOB_APP = '<Amplify App reused by multiple apps>';
Expand Down Expand Up @@ -88,8 +101,8 @@ type ReportEntry = {
workflowId?: string;
lifecycle?: string;
cciJobDetails?: CircleCIJobDetails;
amplifyApps: Record<string, AmplifyAppInfo>;
stacks: Record<string, StackInfo>;
amplifyApps: AmplifyAppInfo[];
stacks: StackInfo[];
buckets: Record<string, S3BucketInfo>;
roles: Record<string, IamRoleInfo>;
pinpointApps: Record<string, PinpointAppInfo>;
Expand All @@ -110,6 +123,7 @@ type AWSAccountInfo = {
accessKeyId: string;
secretAccessKey: string;
sessionToken: string;
parent: boolean;
};

const PINPOINT_TEST_REGEX = /integtest/;
Expand Down Expand Up @@ -236,30 +250,38 @@ const getAWSConfig = ({ accessKeyId, secretAccessKey, sessionToken }: AWSAccount
* @returns Promise<AmplifyAppInfo[]> a list of Amplify Apps in the region with build info
*/
const getAmplifyApps = async (account: AWSAccountInfo, region: string): Promise<AmplifyAppInfo[]> => {
if(region === 'us-east-1' && account.parent){
return []; // temporarily disabled until us-east-1 is re-enabled for this account
}
const amplifyClient = new aws.Amplify(getAWSConfig(account, region));
const amplifyApps = await amplifyClient.listApps({ maxResults: 50 }).promise(); // keeping it to 50 as max supported is 50
const result: AmplifyAppInfo[] = [];
for (const app of amplifyApps.apps) {
const backends: Record<string, StackInfo> = {};
try {
const backendEnvironments = await amplifyClient.listBackendEnvironments({ appId: app.appId, maxResults: 50 }).promise();
for (const backendEnv of backendEnvironments.backendEnvironments) {
const buildInfo = await getStackDetails(backendEnv.stackName, account, region);
if (buildInfo) {
backends[backendEnv.environmentName] = buildInfo;
try {
const amplifyApps = await amplifyClient.listApps({ maxResults: 25 }).promise(); // keeping it to 25 as max supported is 25
const result: AmplifyAppInfo[] = [];
for (const app of amplifyApps.apps) {
const backends: Record<string, StackInfo> = {};
try {
const backendEnvironments = await amplifyClient.listBackendEnvironments({ appId: app.appId, maxResults: 5 }).promise();
for (const backendEnv of backendEnvironments.backendEnvironments) {
const buildInfo = await getStackDetails(backendEnv.stackName, account, region);
if (buildInfo) {
backends[backendEnv.environmentName] = buildInfo;
}
}
} catch (e) {
// console.log(e);
}
} catch (e) {
console.log(e);
result.push({
appId: app.appId,
name: app.name,
region,
backends,
});
}
result.push({
appId: app.appId,
name: app.name,
region,
backends,
});
return result;
} catch (e){
console.log(e);
return [];
}
return result;
};

/**
Expand Down Expand Up @@ -308,24 +330,47 @@ const getStackDetails = async (stackName: string, account: AWSAccountInfo, regio

const getStacks = async (account: AWSAccountInfo, region: string): Promise<StackInfo[]> => {
const cfnClient = new aws.CloudFormation(getAWSConfig(account, region));
const stackStatusFilter = [
'CREATE_COMPLETE',
'ROLLBACK_FAILED',
'ROLLBACK_COMPLETE',
'DELETE_FAILED',
'UPDATE_COMPLETE',
'UPDATE_ROLLBACK_FAILED',
'UPDATE_ROLLBACK_COMPLETE',
'IMPORT_COMPLETE',
'IMPORT_ROLLBACK_FAILED',
'IMPORT_ROLLBACK_COMPLETE',
];
const stacks = await cfnClient
.listStacks({
StackStatusFilter: [
'CREATE_COMPLETE',
'ROLLBACK_FAILED',
'DELETE_FAILED',
'UPDATE_COMPLETE',
'UPDATE_ROLLBACK_FAILED',
'UPDATE_ROLLBACK_COMPLETE',
'IMPORT_COMPLETE',
'IMPORT_ROLLBACK_FAILED',
'IMPORT_ROLLBACK_COMPLETE',
],
StackStatusFilter: stackStatusFilter,
})
.promise();
// loop
let nextToken = stacks.NextToken;
while (nextToken && stacks.StackSummaries.length < DELETE_LIMITS.PER_REGION.CFN_STACK) {
const nextPage = await cfnClient
.listStacks({
StackStatusFilter: stackStatusFilter,
NextToken: nextToken,
})
.promise();
stacks.StackSummaries.push(...nextPage.StackSummaries);
nextToken = nextPage.NextToken;
}

// We are interested in only the root stacks that are deployed by amplify-cli
const rootStacks = stacks.StackSummaries.filter(stack => !stack.RootId);
// NOTE: every few months, we should disable the filter , and clean up all stacks (not just root stacks)
// this is because some child stacks fail to delete (but we don't let that stop us from deleting root stacks)
// eventually, we must clean up those child stacks too.
let rootStacks = stacks.StackSummaries.filter(stack => !stack.RootId);
if(rootStacks.length > DELETE_LIMITS.PER_REGION.CFN_STACK){
// we can only delete 100 stacks accross all regions every batch,
// so we shouldn't take more than 50 stacks from each of those 8 regions.
// this should at least limit calls to getStackDetails below
rootStacks = rootStacks.slice(0, DELETE_LIMITS.PER_REGION.CFN_STACK);
}
const results: StackInfo[] = [];
for (const stack of rootStacks) {
try {
Expand Down Expand Up @@ -519,7 +564,7 @@ const mergeResourcesByCCIJob = (
};

const deleteAmplifyApps = async (account: AWSAccountInfo, accountIndex: number, apps: AmplifyAppInfo[]): Promise<void> => {
await Promise.all(apps.map(app => deleteAmplifyApp(account, accountIndex, app)));
await Promise.all(apps.slice(0, DELETE_LIMITS.PER_BATCH.OTHER).map(app => deleteAmplifyApp(account, accountIndex, app)));
};

const deleteAmplifyApp = async (account: AWSAccountInfo, accountIndex: number, app: AmplifyAppInfo): Promise<void> => {
Expand All @@ -537,7 +582,7 @@ const deleteAmplifyApp = async (account: AWSAccountInfo, accountIndex: number, a
};

const deleteIamRoles = async (account: AWSAccountInfo, accountIndex: number, roles: IamRoleInfo[]): Promise<void> => {
await Promise.all(roles.map(role => deleteIamRole(account, accountIndex, role)));
await Promise.all(roles.slice(0, DELETE_LIMITS.PER_BATCH.OTHER).map(role => deleteIamRole(account, accountIndex, role)));
};

const deleteIamRole = async (account: AWSAccountInfo, accountIndex: number, role: IamRoleInfo): Promise<void> => {
Expand Down Expand Up @@ -601,7 +646,7 @@ const deleteIamRolePolicy = async (account: AWSAccountInfo, accountIndex: number
};

const deleteBuckets = async (account: AWSAccountInfo, accountIndex: number, buckets: S3BucketInfo[]): Promise<void> => {
await Promise.all(buckets.map(bucket => deleteBucket(account, accountIndex, bucket)));
await Promise.all(buckets.slice(0, DELETE_LIMITS.PER_BATCH.OTHER).map(bucket => deleteBucket(account, accountIndex, bucket)));
};

const deleteBucket = async (account: AWSAccountInfo, accountIndex: number, bucket: S3BucketInfo): Promise<void> => {
Expand All @@ -620,7 +665,7 @@ const deleteBucket = async (account: AWSAccountInfo, accountIndex: number, bucke
};

const deletePinpointApps = async (account: AWSAccountInfo, accountIndex: number, apps: PinpointAppInfo[]): Promise<void> => {
await Promise.all(apps.map(app => deletePinpointApp(account, accountIndex, app)));
await Promise.all(apps.slice(0, DELETE_LIMITS.PER_BATCH.OTHER).map(app => deletePinpointApp(account, accountIndex, app)));
};

const deletePinpointApp = async (account: AWSAccountInfo, accountIndex: number, app: PinpointAppInfo): Promise<void> => {
Expand All @@ -636,7 +681,7 @@ const deletePinpointApp = async (account: AWSAccountInfo, accountIndex: number,
};

const deleteAppSyncApis = async (account: AWSAccountInfo, accountIndex: number, apis: AppSyncApiInfo[]): Promise<void> => {
await Promise.all(apis.map(api => deleteAppSyncApi(account, accountIndex, api)));
await Promise.all(apis.slice(0, DELETE_LIMITS.PER_BATCH.OTHER).map(api => deleteAppSyncApi(account, accountIndex, api)));
};

const deleteAppSyncApi = async (account: AWSAccountInfo, accountIndex: number, api: AppSyncApiInfo): Promise<void> => {
Expand All @@ -653,7 +698,7 @@ const deleteAppSyncApi = async (account: AWSAccountInfo, accountIndex: number, a
};

const deleteCfnStacks = async (account: AWSAccountInfo, accountIndex: number, stacks: StackInfo[]): Promise<void> => {
await Promise.all(stacks.map(stack => deleteCfnStack(account, accountIndex, stack)));
await Promise.all(stacks.slice(0, DELETE_LIMITS.PER_BATCH.CFN_STACK).map(stack => deleteCfnStack(account, accountIndex, stack)));
};

const deleteCfnStack = async (account: AWSAccountInfo, accountIndex: number, stack: StackInfo): Promise<void> => {
Expand All @@ -663,8 +708,8 @@ const deleteCfnStack = async (account: AWSAccountInfo, accountIndex: number, sta
try {
const cfnClient = new aws.CloudFormation(getAWSConfig(account, region));
await cfnClient.deleteStack({ StackName: stackName, RetainResources: resourceToRetain }).promise();
// we'll only wait up to 10 minutes before moving on
await cfnClient.waitFor('stackDeleteComplete', { StackName: stackName, $waiter: { maxAttempts: 20 } }).promise();
// we'll only wait up to a minute before moving on
await cfnClient.waitFor('stackDeleteComplete', { StackName: stackName, $waiter: { maxAttempts: 2 } }).promise();
} catch (e) {
console.log(`Deleting CloudFormation stack ${stackName} failed with error ${e.message}`);
if (e.code === 'ExpiredTokenException') {
Expand All @@ -690,26 +735,32 @@ const deleteResources = async (
for (const jobId of Object.keys(staleResources)) {
const resources = staleResources[jobId];
if (resources.amplifyApps) {
console.log(`Deleting up to ${DELETE_LIMITS.PER_BATCH.OTHER} of ${resources.amplifyApps.length} apps on ACCOUNT[${accountIndex}]`);
await deleteAmplifyApps(account, accountIndex, Object.values(resources.amplifyApps));
}

if (resources.stacks) {
console.log(`Deleting up to ${DELETE_LIMITS.PER_BATCH.CFN_STACK} of ${resources.stacks.length} stacks on ACCOUNT[${accountIndex}]`);
await deleteCfnStacks(account, accountIndex, Object.values(resources.stacks));
}

if (resources.buckets) {
console.log(`Deleting up to ${DELETE_LIMITS.PER_BATCH.OTHER} of ${resources.buckets.length} buckets on ACCOUNT[${accountIndex}]`);
await deleteBuckets(account, accountIndex, Object.values(resources.buckets));
}

if (resources.roles) {
console.log(`Deleting up to ${DELETE_LIMITS.PER_BATCH.OTHER} of ${resources.roles.length} roles on ACCOUNT[${accountIndex}]`);
await deleteIamRoles(account, accountIndex, Object.values(resources.roles));
}

if (resources.pinpointApps) {
console.log(`Deleting up to ${DELETE_LIMITS.PER_BATCH.OTHER} of ${resources.pinpointApps.length} pinpoint apps on ACCOUNT[${accountIndex}]`);
await deletePinpointApps(account, accountIndex, Object.values(resources.pinpointApps));
}

if (resources.appSyncApis) {
console.log(`Deleting up to ${DELETE_LIMITS.PER_BATCH.OTHER} of ${resources.appSyncApis.length} appSyncApis on ACCOUNT[${accountIndex}]`);
await deleteAppSyncApis(account, accountIndex, Object.values(resources.appSyncApis));
}
}
Expand Down Expand Up @@ -771,6 +822,7 @@ const getAccountsToCleanup = async (): Promise<AWSAccountInfo[]> => {
accessKeyId: process.env.AWS_ACCESS_KEY_ID,
secretAccessKey: process.env.AWS_SECRET_ACCESS_KEY,
sessionToken: process.env.AWS_SESSION_TOKEN,
parent: true,
};
}
const randomNumber = Math.floor(Math.random() * 100000);
Expand All @@ -786,6 +838,7 @@ const getAccountsToCleanup = async (): Promise<AWSAccountInfo[]> => {
accessKeyId: assumeRoleRes.Credentials.AccessKeyId,
secretAccessKey: assumeRoleRes.Credentials.SecretAccessKey,
sessionToken: assumeRoleRes.Credentials.SessionToken,
parent: false,
};
});
return await Promise.all(accountCredentialPromises);
Expand All @@ -799,6 +852,7 @@ const getAccountsToCleanup = async (): Promise<AWSAccountInfo[]> => {
accessKeyId: process.env.AWS_ACCESS_KEY_ID,
secretAccessKey: process.env.AWS_SECRET_ACCESS_KEY,
sessionToken: process.env.AWS_SESSION_TOKEN,
parent: true,
},
];
}
Expand All @@ -824,6 +878,15 @@ const cleanupAccount = async (account: AWSAccountInfo, accountIndex: number, fil
const allResources = mergeResourcesByCCIJob(
apps, stacks, buckets, orphanBuckets, orphanIamRoles, orphanPinpointApplications, orphanAppSyncApis
);
// cleanup resources that are <unknown> but that are definitely amplify resources
// this includes apps with names that include "test" or stacks that include both "amplify" & "test"
const testApps = allResources["<unknown>"].amplifyApps?.filter(a => a.name.toLocaleLowerCase().includes('test'));
const testStacks = allResources["<unknown>"].stacks?.filter(s => s.stackName.toLocaleLowerCase().includes('test') && s.stackName.toLocaleLowerCase().includes('amplify'));
const orphanedResources = allResources["<orphan>"];
orphanedResources.amplifyApps = orphanedResources.amplifyApps ?? [];
orphanedResources.stacks = orphanedResources.stacks ?? [];
orphanedResources.amplifyApps.push(...(testApps ? testApps : []));
orphanedResources.stacks.push(...(testStacks ? testStacks : []));
const staleResources = _.pickBy(allResources, filterPredicate);

generateReport(staleResources);
Expand Down Expand Up @@ -859,9 +922,14 @@ const cleanup = async (): Promise<void> => {

const filterPredicate = getFilterPredicate(args);
const accounts = await getAccountsToCleanup();

await Promise.all(accounts.map((account, i) => cleanupAccount(account, i, filterPredicate)));
for(let i = 0 ;i < 5; i ++){
console.log("CLEANUP ROUND: ", i + 1);
await Promise.all(accounts.map((account, i) => {
return cleanupAccount(account, i, filterPredicate);
}));
await sleep(60 * 1000);// run again after 60 seconds
}
console.log('Done cleaning all accounts!');
};

void cleanup();
cleanup();

0 comments on commit b34f2ea

Please sign in to comment.