Skip to content

Commit

Permalink
fix(logs): log retention custom resource timed out during deploy (#26995
Browse files Browse the repository at this point in the history
)

We use a custom resource to set the log retention for log groups created by the Lambda service.
This custom resource handler code has a built-in retry mechanism to avoid throttling when executing many LogRetention CRs.
Users can customize the number of possible retries, potentially retrying for a long time.
This can cause the situation that further retries should be attempted, but the Lambda Function timeout is exceeded.

The change sets the lambda execution timeout to its maximum value to allow for up to 15 minutes of retries.
If the retry budget is exhausted, the handler will throw an error and exit early.

Closes #24485

----

*By submitting this pull request, I confirm that my contribution is made under the terms of the Apache-2.0 license*
  • Loading branch information
mrgrain authored and Mike Wrighton committed Sep 14, 2023
1 parent 7f363c8 commit 8721a51
Show file tree
Hide file tree
Showing 128 changed files with 3,056 additions and 5,182 deletions.
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
{
"version": "33.0.0",
"version": "34.0.0",
"files": {
"4763e20569cc1d6f7ae496bbfb0b3e9bc205a1811c78c9a6bc18d949d737c2a9": {
"a8515c042d9c942705087943220417be929ac44f968d8fcef2681681b400c0c0": {
"source": {
"path": "asset.4763e20569cc1d6f7ae496bbfb0b3e9bc205a1811c78c9a6bc18d949d737c2a9",
"path": "asset.a8515c042d9c942705087943220417be929ac44f968d8fcef2681681b400c0c0",
"packaging": "zip"
},
"destinations": {
"current_account-current_region": {
"bucketName": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}",
"objectKey": "4763e20569cc1d6f7ae496bbfb0b3e9bc205a1811c78c9a6bc18d949d737c2a9.zip",
"objectKey": "a8515c042d9c942705087943220417be929ac44f968d8fcef2681681b400c0c0.zip",
"assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-file-publishing-role-${AWS::AccountId}-${AWS::Region}"
}
}
Expand Down Expand Up @@ -53,15 +53,15 @@
}
}
},
"39bbb561d1400a68adab4e3b7ca0a4b5af2936645d1bd8ce29d72a4e7985c830": {
"0351d32f030d2f7023c76f5a073f44cdcf81c0ddc63dae64a49d9d33269539ee": {
"source": {
"path": "AppSyncJsResolverTestStack.template.json",
"packaging": "file"
},
"destinations": {
"current_account-current_region": {
"bucketName": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}",
"objectKey": "39bbb561d1400a68adab4e3b7ca0a4b5af2936645d1bd8ce29d72a4e7985c830.json",
"objectKey": "0351d32f030d2f7023c76f5a073f44cdcf81c0ddc63dae64a49d9d33269539ee.json",
"assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-file-publishing-role-${AWS::AccountId}-${AWS::Region}"
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -272,11 +272,12 @@
"Properties": {
"Handler": "index.handler",
"Runtime": "nodejs18.x",
"Timeout": 900,
"Code": {
"S3Bucket": {
"Fn::Sub": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}"
},
"S3Key": "4763e20569cc1d6f7ae496bbfb0b3e9bc205a1811c78c9a6bc18d949d737c2a9.zip"
"S3Key": "a8515c042d9c942705087943220417be929ac44f968d8fcef2681681b400c0c0.zip"
},
"Role": {
"Fn::GetAtt": [
Expand Down
Original file line number Diff line number Diff line change
@@ -1,28 +1,28 @@
{
"version": "33.0.0",
"version": "34.0.0",
"files": {
"18f77fb224555ccad15f9c2e0f71ae6930fc011792c4f74ec74daaa2bbd9a33f": {
"c7f4a8ca56d10961cf5e40d181faddf14f2ea8b39c5d65c7b61a366b17c7a2ce": {
"source": {
"path": "asset.18f77fb224555ccad15f9c2e0f71ae6930fc011792c4f74ec74daaa2bbd9a33f.bundle",
"path": "asset.c7f4a8ca56d10961cf5e40d181faddf14f2ea8b39c5d65c7b61a366b17c7a2ce.bundle",
"packaging": "zip"
},
"destinations": {
"current_account-current_region": {
"bucketName": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}",
"objectKey": "18f77fb224555ccad15f9c2e0f71ae6930fc011792c4f74ec74daaa2bbd9a33f.zip",
"objectKey": "c7f4a8ca56d10961cf5e40d181faddf14f2ea8b39c5d65c7b61a366b17c7a2ce.zip",
"assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-file-publishing-role-${AWS::AccountId}-${AWS::Region}"
}
}
},
"9b28bfafd748e7836cc59ca3e1167ed0048cf831b6df7efa11648ffa3c43e10e": {
"763d169d10d22f0a01c5cab6482fd43062c1c41461ccb9546f484d0d8a4239bd": {
"source": {
"path": "JsResolverIntegTestDefaultTestDeployAssert57AD8D20.template.json",
"packaging": "file"
},
"destinations": {
"current_account-current_region": {
"bucketName": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}",
"objectKey": "9b28bfafd748e7836cc59ca3e1167ed0048cf831b6df7efa11648ffa3c43e10e.json",
"objectKey": "763d169d10d22f0a01c5cab6482fd43062c1c41461ccb9546f484d0d8a4239bd.json",
"assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-file-publishing-role-${AWS::AccountId}-${AWS::Region}"
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@
"Payload.data.addTest.id",
"Payload.data.addTest.id"
],
"salt": "1691091128259"
"salt": "1693924790471"
},
"UpdateReplacePolicy": "Delete",
"DeletionPolicy": "Delete"
Expand Down Expand Up @@ -158,7 +158,7 @@
"S3Bucket": {
"Fn::Sub": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}"
},
"S3Key": "18f77fb224555ccad15f9c2e0f71ae6930fc011792c4f74ec74daaa2bbd9a33f.zip"
"S3Key": "c7f4a8ca56d10961cf5e40d181faddf14f2ea8b39c5d65c7b61a366b17c7a2ce.zip"
},
"Timeout": 120,
"Handler": "index.handler",
Expand Down Expand Up @@ -226,7 +226,7 @@
}
},
"flattenResponse": "false",
"salt": "1691091128260"
"salt": "1693924790503"
},
"UpdateReplacePolicy": "Delete",
"DeletionPolicy": "Delete"
Expand Down

This file was deleted.

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -91,11 +91,13 @@ export async function handler(event: LogRetentionEvent, context: AWSLambda.Conte
const logGroupRegion = event.ResourceProperties.LogGroupRegion;

// Parse to AWS SDK retry options
const withDelay = makeWithDelay(parseIntOptional(event.ResourceProperties.SdkRetry?.maxRetries));
const maxRetries = parseIntOptional(event.ResourceProperties.SdkRetry?.maxRetries) ?? 5;
const withDelay = makeWithDelay(maxRetries);

const sdkConfig: Logs.CloudWatchLogsClientConfig = {
logger: console,
region: logGroupRegion,
maxAttempts: Math.max(5, maxRetries), // Use a minimum for SDK level retries, because it might include retryable failures that withDelay isn't checking for
};
const client = new Logs.CloudWatchLogsClient(sdkConfig);

Expand Down Expand Up @@ -185,7 +187,7 @@ function parseIntOptional(value?: string, base = 10): number | undefined {
}

function makeWithDelay(
maxRetries: number = 5,
maxRetries: number,
delayBase: number = 100,
delayCap = 10 * 1000, // 10s
): (block: () => Promise<void>) => Promise<void> {
Expand All @@ -202,7 +204,11 @@ function makeWithDelay(
try {
return await block();
} catch (error: any) {
if (error instanceof Logs.OperationAbortedException || error.name === 'OperationAbortedException') {
if (
error instanceof Logs.OperationAbortedException
|| error.name === 'OperationAbortedException'
|| error.name === 'ThrottlingException' // There is no class to check with instanceof, see https://github.com/aws/aws-sdk-js-v3/issues/5140
) {
if (attempts < maxRetries ) {
attempts++;
await new Promise(resolve => setTimeout(resolve, calculateDelay(attempts, delayBase, delayCap)));
Expand Down
Loading

0 comments on commit 8721a51

Please sign in to comment.