Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(logs): LogRetention resources fail with rate exceeded errors #26858

Merged
merged 4 commits into from
Aug 24, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
{
"version": "33.0.0",
"files": {
"21fbb51d7b23f6a6c262b46a9caee79d744a3ac019fd45422d988b96d44b2a22": {
"source": {
"path": "LogRetentionIntegRetriesDefaultTestDeployAssert6D1A1A1C.template.json",
"packaging": "file"
},
"destinations": {
"current_account-current_region": {
"bucketName": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}",
"objectKey": "21fbb51d7b23f6a6c262b46a9caee79d744a3ac019fd45422d988b96d44b2a22.json",
"assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-file-publishing-role-${AWS::AccountId}-${AWS::Region}"
}
}
}
},
"dockerImages": {}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
{
"Parameters": {
"BootstrapVersion": {
"Type": "AWS::SSM::Parameter::Value<String>",
"Default": "/cdk-bootstrap/hnb659fds/version",
"Description": "Version of the CDK Bootstrap resources in this environment, automatically retrieved from SSM Parameter Store. [cdk:skip]"
}
},
"Rules": {
"CheckBootstrapVersion": {
"Assertions": [
{
"Assert": {
"Fn::Not": [
{
"Fn::Contains": [
[
"1",
"2",
"3",
"4",
"5"
],
{
"Ref": "BootstrapVersion"
}
]
}
]
},
"AssertDescription": "CDK bootstrap stack version 6 required. Please run 'cdk bootstrap' with a recent version of the CDK CLI."
}
]
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
interface LogRetentionEvent extends Omit<AWSLambda.CloudFormationCustomResourceEvent, 'ResourceProperties'> {
ResourceProperties: {
ServiceToken: string;
LogGroupName: string;
LogGroupRegion?: string;
RetentionInDays?: string;
SdkRetry?: {
maxRetries?: string;
};
RemovalPolicy?: string;
};
}
export declare function handler(event: LogRetentionEvent, context: AWSLambda.Context): Promise<void>;
export {};

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1,229 @@
/* eslint-disable no-console */
// eslint-disable-next-line import/no-extraneous-dependencies
import * as Logs from '@aws-sdk/client-cloudwatch-logs';

interface LogRetentionEvent extends Omit<AWSLambda.CloudFormationCustomResourceEvent, 'ResourceProperties'> {
ResourceProperties: {
ServiceToken: string;
LogGroupName: string;
LogGroupRegion?: string;
RetentionInDays?: string;
SdkRetry?: {
maxRetries?: string;
};
RemovalPolicy?: string
};
}

/**
* Creates a log group and doesn't throw if it exists.
*/
async function createLogGroupSafe(logGroupName: string, client: Logs.CloudWatchLogsClient, withDelay: (block: () => Promise<void>) => Promise<void>) {
await withDelay(async () => {
try {
const params = { logGroupName };
const command = new Logs.CreateLogGroupCommand(params);
await client.send(command);

} catch (error: any) {
if (error instanceof Logs.ResourceAlreadyExistsException || error.name === 'ResourceAlreadyExistsException') {
// The log group is already created by the lambda execution
return;
}

throw error;
}
});
}

/**
* Deletes a log group and doesn't throw if it does not exist.
*/
async function deleteLogGroup(logGroupName: string, client: Logs.CloudWatchLogsClient, withDelay: (block: () => Promise<void>) => Promise<void>) {
await withDelay(async () => {
try {
const params = { logGroupName };
const command = new Logs.DeleteLogGroupCommand(params);
await client.send(command);

} catch (error: any) {
if (error instanceof Logs.ResourceNotFoundException || error.name === 'ResourceNotFoundException') {
// The log group doesn't exist
return;
}

throw error;
}
});
}

/**
* Puts or deletes a retention policy on a log group.
*/
async function setRetentionPolicy(
logGroupName: string,
client: Logs.CloudWatchLogsClient,
withDelay: (block: () => Promise<void>) => Promise<void>,
retentionInDays?: number,
) {

await withDelay(async () => {
if (!retentionInDays) {
const params = { logGroupName };
const deleteCommand = new Logs.DeleteRetentionPolicyCommand(params);
await client.send(deleteCommand);
} else {
const params = { logGroupName, retentionInDays };
const putCommand = new Logs.PutRetentionPolicyCommand(params);
await client.send(putCommand);
}
});
}

export async function handler(event: LogRetentionEvent, context: AWSLambda.Context) {
try {
console.log(JSON.stringify({ ...event, ResponseURL: '...' }));

// The target log group
const logGroupName = event.ResourceProperties.LogGroupName;

// The region of the target log group
const logGroupRegion = event.ResourceProperties.LogGroupRegion;

// Parse to AWS SDK retry options
const maxRetries = parseIntOptional(event.ResourceProperties.SdkRetry?.maxRetries) ?? 5;
const withDelay = makeWithDelay(maxRetries);

const sdkConfig: Logs.CloudWatchLogsClientConfig = {
logger: console,
region: logGroupRegion,
maxAttempts: Math.max(5, maxRetries), // Use a minimum for SDK level retries, because it might include retryable failures that withDelay isn't checking for
};
const client = new Logs.CloudWatchLogsClient(sdkConfig);

if (event.RequestType === 'Create' || event.RequestType === 'Update') {
// Act on the target log group
await createLogGroupSafe(logGroupName, client, withDelay);
await setRetentionPolicy(logGroupName, client, withDelay, parseIntOptional(event.ResourceProperties.RetentionInDays));

// Configure the Log Group for the Custom Resource function itself
if (event.RequestType === 'Create') {
const clientForCustomResourceFunction = new Logs.CloudWatchLogsClient({
logger: console,
region: process.env.AWS_REGION,
});
// Set a retention policy of 1 day on the logs of this very function.
// Due to the async nature of the log group creation, the log group for this function might
// still be not created yet at this point. Therefore we attempt to create it.
// In case it is being created, createLogGroupSafe will handle the conflict.
await createLogGroupSafe(`/aws/lambda/${context.functionName}`, clientForCustomResourceFunction, withDelay);
// If createLogGroupSafe fails, the log group is not created even after multiple attempts.
// In this case we have nothing to set the retention policy on but an exception will skip
// the next line.
await setRetentionPolicy(`/aws/lambda/${context.functionName}`, clientForCustomResourceFunction, withDelay, 1);
}
}

// When the requestType is delete, delete the log group if the removal policy is delete
if (event.RequestType === 'Delete' && event.ResourceProperties.RemovalPolicy === 'destroy') {
await deleteLogGroup(logGroupName, client, withDelay);
// else retain the log group
}

await respond('SUCCESS', 'OK', logGroupName);
} catch (e: any) {
console.log(e);
await respond('FAILED', e.message, event.ResourceProperties.LogGroupName);
}

function respond(responseStatus: string, reason: string, physicalResourceId: string) {
const responseBody = JSON.stringify({
Status: responseStatus,
Reason: reason,
PhysicalResourceId: physicalResourceId,
StackId: event.StackId,
RequestId: event.RequestId,
LogicalResourceId: event.LogicalResourceId,
Data: {
// Add log group name as part of the response so that it's available via Fn::GetAtt
LogGroupName: event.ResourceProperties.LogGroupName,
},
});

console.log('Responding', responseBody);

// eslint-disable-next-line @typescript-eslint/no-require-imports
const parsedUrl = require('url').parse(event.ResponseURL);
const requestOptions = {
hostname: parsedUrl.hostname,
path: parsedUrl.path,
method: 'PUT',
headers: {
'content-type': '',
'content-length': Buffer.byteLength(responseBody, 'utf8'),
},
};

return new Promise((resolve, reject) => {
try {
// eslint-disable-next-line @typescript-eslint/no-require-imports
const request = require('https').request(requestOptions, resolve);
request.on('error', reject);
request.write(responseBody);
request.end();
} catch (e) {
reject(e);
}
});
}
}

function parseIntOptional(value?: string, base = 10): number | undefined {
if (value === undefined) {
return undefined;
}

return parseInt(value, base);
}

function makeWithDelay(
maxRetries: number,
delayBase: number = 100,
delayCap = 10 * 1000, // 10s
): (block: () => Promise<void>) => Promise<void> {
// If we try to update the log group, then due to the async nature of
// Lambda logging there could be a race condition when the same log group is
// already being created by the lambda execution. This can sometime result in
// an error "OperationAbortedException: A conflicting operation is currently
// in progress...Please try again."
// To avoid an error, we do as requested and try again.

return async (block: () => Promise<void>) => {
let attempts = 0;
do {
try {
return await block();
} catch (error: any) {
if (
error instanceof Logs.OperationAbortedException
|| error.name === 'OperationAbortedException'
|| error.name === 'ThrottlingException' // There is no class to check with instanceof, see https://github.com/aws/aws-sdk-js-v3/issues/5140
) {
if (attempts < maxRetries ) {
attempts++;
await new Promise(resolve => setTimeout(resolve, calculateDelay(attempts, delayBase, delayCap)));
continue;
} else {
// The log group is still being changed by another execution but we are out of retries
throw new Error('Out of attempts to change log group');
}
}
throw error;
}
} while (true); // exit happens on retry count check
};
}

function calculateDelay(attempt: number, base: number, cap: number): number {
return Math.round(Math.random() * Math.min(cap, base * 2 ** attempt));
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
{
"version": "33.0.0",
"files": {
"a8515c042d9c942705087943220417be929ac44f968d8fcef2681681b400c0c0": {
"source": {
"path": "asset.a8515c042d9c942705087943220417be929ac44f968d8fcef2681681b400c0c0",
"packaging": "zip"
},
"destinations": {
"current_account-current_region": {
"bucketName": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}",
"objectKey": "a8515c042d9c942705087943220417be929ac44f968d8fcef2681681b400c0c0.zip",
"assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-file-publishing-role-${AWS::AccountId}-${AWS::Region}"
}
}
},
"9990a29f03d0c5431a972aeb27fc605359cf0093ddd08bfbdf611189e8116726": {
"source": {
"path": "aws-cdk-log-retention-integ-retries0.template.json",
"packaging": "file"
},
"destinations": {
"current_account-current_region": {
"bucketName": "cdk-hnb659fds-assets-${AWS::AccountId}-${AWS::Region}",
"objectKey": "9990a29f03d0c5431a972aeb27fc605359cf0093ddd08bfbdf611189e8116726.json",
"assumeRoleArn": "arn:${AWS::Partition}:iam::${AWS::AccountId}:role/cdk-hnb659fds-file-publishing-role-${AWS::AccountId}-${AWS::Region}"
}
}
}
},
"dockerImages": {}
}
Loading