diff --git a/.circleci/nuke_config.yml b/.circleci/nuke_config.yml index b3908cf1..d97b5076 100644 --- a/.circleci/nuke_config.yml +++ b/.circleci/nuke_config.yml @@ -39,3 +39,48 @@ OIDCProvider: names_regex: # We have an active OIDC Provider used by github actions - ".*token.actions.githubusercontent.com.*" + +CloudWatchLogGroup: + # Use an allow list instead of block list because we haven't done the due diligence yet to figure out what log groups + # we need to keep. This is hard to do in the current scenario as we have thousands of log groups in the accounts. + include: + names_regex: + - "/aws/containerinsights/eks-cluster-[a-zA-Z0-9]{6}/.*" + - "/aws/containerinsights/eks-service-catalog-[a-zA-Z0-9]{6}/.*" + - "/aws/ecs/containerinsights/[a-zA-Z0-9]{6}-cluster/.*" + - "/aws/ecs/containerinsights/[a-zA-Z0-9]{6}-ecs-cluster/.*" + - "/aws/ecs/containerinsights/Test-cluster[a-zA-Z0-9]{6}/.*" + - "/aws/eks/eks-cluster-[a-zA-Z0-9]{6}.*" + - "/aws/eks/eks-service-catalog-[a-zA-Z0-9]{6}.*" + - "/aws/lambda/Test.*" + - "/aws/lambda/[a-zA-Z0-9]{6}-ecs-deploy-runner.*" + - "/aws/lambda/ecs-deploy-runner-[a-zA-Z0-9]{6}-invoker$" + - "/aws/lambda/es-cluster-[a-zA-Z0-9]{6}.*" + - "/aws/lambda/jenkins-[a-zA-Z0-9]{6}.*" + - "/aws/lambda/jenkins[a-zA-Z0-9]{6}-0-backup$" + - "/aws/lambda/test-aurora-[a-zA-Z0-9]{6}.*" + - "/aws/lambda/[a-zA-Z0-9]{6}-handler$" + - "/aws/lambda/test[a-zA-Z0-9]{6}.*" + - "/aws/lambda/lambda-[a-zA-Z0-9]{6}$" + - "/aws/lambda/us-east-1\\.[a-zA-Z0-9]{6}cloudfront.*" + - "/aws/lambda/cleanup-expired-iam-certs-[a-zA-Z0-9]{6}$" + - "/aws/lambda/create-snapshot-example-(aurora|mysql)$" + - "/aws/lambda/HoustonExpress[a-zA-Z0-9]{6}.*" + - "/aws/rds/cluster/test[a-zA-Z0-9]{6}.*" + - "/aws/rds/cluster/test-aurora-[a-zA-Z0-9]{6}.*" + - "eks-service-catalog-[a-zA-Z0-9]{6}$" + - "eks-cluster-[a-zA-Z0-9]{6}-container-logs$" + - "es-[a-zA-Z0-9]{6}-lg$" + - "TestCloudWatchLogAggregation.*" + - "API-Gateway-Execution-Logs_.*/example$" + - "asg-[a-zA-Z0-9]{6}$" + - "^[a-zA-Z0-9]{6}-service$" + - "^[a-zA-Z0-9]{6}-ecs-deploy-runner-[a-zA-Z0-9]{6}$" + - "ecs-deploy-runner-[a-zA-Z0-9]{6}$" + - "^[a-zA-Z0-9]{6}(stage|prod)-ec2-syslog$" + - "Ubuntu1804-[a-zA-Z0-9]{6}-logs$" + - "bastion-host-[a-zA-Z0-9]{6}$" + - "ec2-instance-[a-zA-Z0-9]{6}$" + - "jenkins-[a-zA-Z0-9]{6}$" + - "openvpn-server-[a-zA-Z0-9]{6}_log_group$" + - "vpc-test-[a-zA-Z0-9]{6}.*" diff --git a/README.md b/README.md index c71e0ba4..b7f33cc8 100644 --- a/README.md +++ b/README.md @@ -38,6 +38,7 @@ The currently supported functionality includes: - Deleting all OpenSearch Domains in an AWS account - Deleting all IAM OpenID Connect Providers - Deleting all Customer managed keys from Key Management Service in an AWS account +- Deleting all CloudWatch Log Groups in an AWS Account ### BEWARE! @@ -197,6 +198,9 @@ The following resources support the Config file: - IAM OpenID Connect Providers - Resource type: `oidcprovider` - Config key: `OIDCProvider` +- CloudWatch LogGroups + - Resource type: `cloudwatch-loggroup` + - Config key: `CloudWatchLogGroup` - KMS customer keys - Resource type: `kmscustomerkeys` @@ -287,28 +291,29 @@ Be careful when nuking and append the `--dry-run` option if you're unsure. Even To find out what we options are supported in the config file today, consult this table. Resource types at the top level of the file that are supported are listed here. -| resource type | names | names_regex | tags | tags_regex | -|--------------------|-------|-------------|------|------------| -| s3 | none | ✅ | none | none | -| iam | none | ✅ | none | none | -| ecsserv | none | ✅ | none | none | -| ecscluster | none | ✅ | none | none | -| secretsmanager | none | ✅ | none | none | -| nat-gateway | none | ✅ | none | none | -| accessanalyzer | none | ✅ | none | none | -| dynamodb | none | ✅ | none | none | -| ebs | none | ✅ | none | none | -| lambda | none | ✅ | none | none | -| elbv2 | none | ✅ | none | none | -| ecs | none | ✅ | none | none | -| elasticache | none | ✅ | none | none | -| vpc | none | ✅ | none | none | -| oidcprovider | none | ✅ | none | none | -| kmscustomerkeys | none | ✅ | none | none | -| acmpca | none | none | none | none | -| ec2 instance | none | none | none | none | -| iam role | none | none | none | none | -| ... (more to come) | none | none | none | none | +| resource type | names | names_regex | tags | tags_regex | +|---------------------|-------|-------------|------|------------| +| s3 | none | ✅ | none | none | +| iam | none | ✅ | none | none | +| ecsserv | none | ✅ | none | none | +| ecscluster | none | ✅ | none | none | +| secretsmanager | none | ✅ | none | none | +| nat-gateway | none | ✅ | none | none | +| accessanalyzer | none | ✅ | none | none | +| dynamodb | none | ✅ | none | none | +| ebs | none | ✅ | none | none | +| lambda | none | ✅ | none | none | +| elbv2 | none | ✅ | none | none | +| ecs | none | ✅ | none | none | +| elasticache | none | ✅ | none | none | +| vpc | none | ✅ | none | none | +| oidcprovider | none | ✅ | none | none | +| cloudwatch-loggroup | none | ✅ | none | none | +| kmscustomerkeys | none | ✅ | none | none | +| acmpca | none | none | none | none | +| ec2 instance | none | none | none | none | +| iam role | none | none | none | none | +| ... (more to come) | none | none | none | none | diff --git a/aws/aws.go b/aws/aws.go index d54b4faf..02017cdb 100644 --- a/aws/aws.go +++ b/aws/aws.go @@ -606,6 +606,20 @@ func GetAllResources(targetRegions []string, excludeAfter time.Time, resourceTyp } // End CloudWatchDashboard + // CloudWatchLogGroup + cloudwatchLogGroups := CloudWatchLogGroups{} + if IsNukeable(cloudwatchLogGroups.ResourceName(), resourceTypes) { + lgNames, err := getAllCloudWatchLogGroups(session, excludeAfter, configObj) + if err != nil { + return nil, errors.WithStackTrace(err) + } + if len(lgNames) > 0 { + cloudwatchLogGroups.Names = awsgo.StringValueSlice(lgNames) + resourcesInRegion.Resources = append(resourcesInRegion.Resources, cloudwatchLogGroups) + } + } + // End CloudWatchLogGroup + // S3 Buckets s3Buckets := S3Buckets{} if IsNukeable(s3Buckets.ResourceName(), resourceTypes) { @@ -801,6 +815,7 @@ func ListResourceTypes() []string { Elasticaches{}.ResourceName(), OIDCProviders{}.ResourceName(), KmsCustomerKeys{}.ResourceName(), + CloudWatchLogGroups{}.ResourceName(), } sort.Strings(resourceTypes) return resourceTypes diff --git a/aws/cloudwatch_loggroup.go b/aws/cloudwatch_loggroup.go new file mode 100644 index 00000000..7af42797 --- /dev/null +++ b/aws/cloudwatch_loggroup.go @@ -0,0 +1,134 @@ +package aws + +import ( + "sync" + "time" + + "github.com/aws/aws-sdk-go/aws" + "github.com/aws/aws-sdk-go/aws/awserr" + "github.com/aws/aws-sdk-go/aws/session" + "github.com/aws/aws-sdk-go/service/cloudwatchlogs" + "github.com/gruntwork-io/cloud-nuke/config" + "github.com/gruntwork-io/cloud-nuke/logging" + "github.com/gruntwork-io/go-commons/errors" + "github.com/hashicorp/go-multierror" +) + +func getAllCloudWatchLogGroups(session *session.Session, excludeAfter time.Time, configObj config.Config) ([]*string, error) { + svc := cloudwatchlogs.New(session) + + allLogGroups := []*string{} + err := svc.DescribeLogGroupsPages( + &cloudwatchlogs.DescribeLogGroupsInput{}, + func(page *cloudwatchlogs.DescribeLogGroupsOutput, lastPage bool) bool { + for _, logGroup := range page.LogGroups { + if shouldIncludeCloudWatchLogGroup(logGroup, excludeAfter, configObj) { + allLogGroups = append(allLogGroups, logGroup.LogGroupName) + } + } + return !lastPage + }, + ) + if err != nil { + return nil, errors.WithStackTrace(err) + } + return allLogGroups, nil +} + +func shouldIncludeCloudWatchLogGroup(logGroup *cloudwatchlogs.LogGroup, excludeAfter time.Time, configObj config.Config) bool { + if logGroup == nil { + return false + } + + if logGroup.CreationTime != nil { + // Convert milliseconds since epoch to time.Time object + creationTime := time.Unix(0, aws.Int64Value(logGroup.CreationTime)*int64(time.Millisecond)) + if excludeAfter.Before(creationTime) { + return false + } + } + + return config.ShouldInclude( + aws.StringValue(logGroup.LogGroupName), + configObj.CloudWatchLogGroup.IncludeRule.NamesRegExp, + configObj.CloudWatchLogGroup.ExcludeRule.NamesRegExp, + ) +} + +func nukeAllCloudWatchLogGroups(session *session.Session, identifiers []*string) error { + region := aws.StringValue(session.Config.Region) + svc := cloudwatchlogs.New(session) + + if len(identifiers) == 0 { + logging.Logger.Infof("No CloudWatch Log Groups to nuke in region %s", *session.Config.Region) + return nil + } + + // NOTE: we don't need to do pagination here, because the pagination is handled by the caller to this function, + // based on CloudWatchLogGroup.MaxBatchSize, however we add a guard here to warn users when the batching fails and + // has a chance of throttling AWS. Since we concurrently make one call for each identifier, we pick 100 for the + // limit here because many APIs in AWS have a limit of 100 requests per second. + if len(identifiers) > 100 { + logging.Logger.Errorf("Nuking too many CloudWatch LogGroups at once (100): halting to avoid hitting AWS API rate limiting") + return TooManyLogGroupsErr{} + } + + // There is no bulk delete CloudWatch Log Group API, so we delete the batch of CloudWatch Log Groups concurrently + // using go routines. + logging.Logger.Infof("Deleting CloudWatch Log Groups in region %s", region) + wg := new(sync.WaitGroup) + wg.Add(len(identifiers)) + errChans := make([]chan error, len(identifiers)) + for i, logGroupName := range identifiers { + errChans[i] = make(chan error, 1) + go deleteCloudWatchLogGroupAsync(wg, errChans[i], svc, logGroupName, region) + } + wg.Wait() + + // Collect all the errors from the async delete calls into a single error struct. + // NOTE: We ignore OperationAbortedException which is thrown when there is an eventual consistency issue, where + // cloud-nuke picks up a Log Group that is already requested to be deleted. + var allErrs *multierror.Error + for _, errChan := range errChans { + if err := <-errChan; err != nil { + if awsErr, ok := err.(awserr.Error); ok && awsErr.Code() != "OperationAbortedException" { + allErrs = multierror.Append(allErrs, err) + } + } + } + finalErr := allErrs.ErrorOrNil() + if finalErr != nil { + return errors.WithStackTrace(finalErr) + } + return nil +} + +// deleteCloudWatchLogGroupAsync deletes the provided Log Group asynchronously in a goroutine, using wait groups for +// concurrency control and a return channel for errors. +func deleteCloudWatchLogGroupAsync( + wg *sync.WaitGroup, + errChan chan error, + svc *cloudwatchlogs.CloudWatchLogs, + logGroupName *string, + region string, +) { + defer wg.Done() + input := &cloudwatchlogs.DeleteLogGroupInput{LogGroupName: logGroupName} + _, err := svc.DeleteLogGroup(input) + errChan <- err + + logGroupNameStr := aws.StringValue(logGroupName) + if err == nil { + logging.Logger.Infof("[OK] CloudWatch Log Group %s deleted in %s", logGroupNameStr, region) + } else { + logging.Logger.Errorf("[Failed] Error deleting CloudWatch Log Group %s in %s: %s", logGroupNameStr, region, err) + } +} + +// Custom errors + +type TooManyLogGroupsErr struct{} + +func (err TooManyLogGroupsErr) Error() string { + return "Too many LogGroups requested at once." +} diff --git a/aws/cloudwatch_loggroup_test.go b/aws/cloudwatch_loggroup_test.go new file mode 100644 index 00000000..da05ca60 --- /dev/null +++ b/aws/cloudwatch_loggroup_test.go @@ -0,0 +1,147 @@ +package aws + +import ( + "fmt" + "strings" + "testing" + "time" + + "github.com/aws/aws-sdk-go/aws" + "github.com/aws/aws-sdk-go/aws/session" + "github.com/aws/aws-sdk-go/service/cloudwatchlogs" + "github.com/gruntwork-io/cloud-nuke/config" + "github.com/gruntwork-io/cloud-nuke/util" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestListCloudWatchLogGroups(t *testing.T) { + t.Parallel() + + region, err := getRandomRegion() + require.NoError(t, err) + + session, err := session.NewSession(&aws.Config{Region: aws.String(region)}) + require.NoError(t, err) + + svc := cloudwatchlogs.New(session) + + lgName := createCloudWatchLogGroup(t, svc) + defer deleteCloudWatchLogGroup(t, svc, lgName, true) + + lgNames, err := getAllCloudWatchLogGroups(session, time.Now(), config.Config{}) + require.NoError(t, err) + assert.Contains(t, aws.StringValueSlice(lgNames), aws.StringValue(lgName)) +} + +func TestTimeFilterExclusionNewlyCreatedCloudWatchLogGroup(t *testing.T) { + t.Parallel() + + region, err := getRandomRegion() + require.NoError(t, err) + + session, err := session.NewSession(&aws.Config{Region: aws.String(region)}) + require.NoError(t, err) + svc := cloudwatchlogs.New(session) + + lgName := createCloudWatchLogGroup(t, svc) + defer deleteCloudWatchLogGroup(t, svc, lgName, true) + + // Assert CloudWatch Dashboard is picked up without filters + lgNames, err := getAllCloudWatchLogGroups(session, time.Now(), config.Config{}) + require.NoError(t, err) + assert.Contains(t, aws.StringValueSlice(lgNames), aws.StringValue(lgName)) + + // Assert user doesn't appear when we look at users older than 1 Hour + olderThan := time.Now().Add(-1 * time.Hour) + lgNamesOlder, err := getAllCloudWatchLogGroups(session, olderThan, config.Config{}) + require.NoError(t, err) + assert.NotContains(t, aws.StringValueSlice(lgNamesOlder), aws.StringValue(lgName)) +} + +func TestNukeCloudWatchLogGroupOne(t *testing.T) { + t.Parallel() + + region, err := getRandomRegion() + require.NoError(t, err) + + session, err := session.NewSession(&aws.Config{Region: aws.String(region)}) + require.NoError(t, err) + svc := cloudwatchlogs.New(session) + + // We ignore errors in the delete call here, because it is intended to be a stop gap in case there is a bug in nuke. + lgName := createCloudWatchLogGroup(t, svc) + defer deleteCloudWatchLogGroup(t, svc, lgName, false) + identifiers := []*string{lgName} + + require.NoError( + t, + nukeAllCloudWatchLogGroups(session, identifiers), + ) + + // Make sure the CloudWatch Dashboard is deleted. + assertCloudWatchLogGroupsDeleted(t, svc, identifiers) +} + +func TestNukeCloudWatchLogGroupMoreThanOne(t *testing.T) { + t.Parallel() + + region, err := getRandomRegion() + require.NoError(t, err) + + session, err := session.NewSession(&aws.Config{Region: aws.String(region)}) + require.NoError(t, err) + svc := cloudwatchlogs.New(session) + + lgNames := []*string{} + for i := 0; i < 3; i++ { + // We ignore errors in the delete call here, because it is intended to be a stop gap in case there is a bug in nuke. + lgName := createCloudWatchLogGroup(t, svc) + defer deleteCloudWatchLogGroup(t, svc, lgName, false) + lgNames = append(lgNames, lgName) + } + + require.NoError( + t, + nukeAllCloudWatchLogGroups(session, lgNames), + ) + + // Make sure the CloudWatch Dashboard is deleted. + assertCloudWatchLogGroupsDeleted(t, svc, lgNames) +} + +func createCloudWatchLogGroup(t *testing.T, svc *cloudwatchlogs.CloudWatchLogs) *string { + uniqueID := util.UniqueID() + name := fmt.Sprintf("cloud-nuke-test-%s", strings.ToLower(uniqueID)) + + _, err := svc.CreateLogGroup(&cloudwatchlogs.CreateLogGroupInput{ + LogGroupName: aws.String(name), + }) + require.NoError(t, err) + + // Add an arbitrary sleep to account for eventual consistency + time.Sleep(15 * time.Second) + return &name +} + +// deleteCloudWatchLogGroup is a function to delete the given CloudWatch Log Group. +func deleteCloudWatchLogGroup(t *testing.T, svc *cloudwatchlogs.CloudWatchLogs, name *string, checkErr bool) { + _, err := svc.DeleteLogGroup(&cloudwatchlogs.DeleteLogGroupInput{ + LogGroupName: name, + }) + if checkErr { + require.NoError(t, err) + } +} + +func assertCloudWatchLogGroupsDeleted(t *testing.T, svc *cloudwatchlogs.CloudWatchLogs, identifiers []*string) { + for _, name := range identifiers { + resp, err := svc.DescribeLogGroups(&cloudwatchlogs.DescribeLogGroupsInput{ + LogGroupNamePrefix: name, + }) + require.NoError(t, err) + if len(resp.LogGroups) > 0 { + t.Fatalf("Log Group %s is not deleted", aws.StringValue(name)) + } + } +} diff --git a/aws/cloudwatch_loggroup_types.go b/aws/cloudwatch_loggroup_types.go new file mode 100644 index 00000000..525d4f67 --- /dev/null +++ b/aws/cloudwatch_loggroup_types.go @@ -0,0 +1,38 @@ +package aws + +import ( + awsgo "github.com/aws/aws-sdk-go/aws" + "github.com/aws/aws-sdk-go/aws/session" + "github.com/gruntwork-io/go-commons/errors" +) + +// CloudWatchLogGroup - represents all ec2 instances +type CloudWatchLogGroups struct { + Names []string +} + +// ResourceName - the simple name of the aws resource +func (r CloudWatchLogGroups) ResourceName() string { + return "cloudwatch-loggroup" +} + +// ResourceIdentifiers - The instance ids of the ec2 instances +func (r CloudWatchLogGroups) ResourceIdentifiers() []string { + return r.Names +} + +func (r CloudWatchLogGroups) MaxBatchSize() int { + // Tentative batch size to ensure AWS doesn't throttle. Note that CloudWatch Logs does not support bulk delete, so + // we will be deleting this many in parallel using go routines. We pick 35 here, which is half of what the AWS web + // console will do. We pick a conservative number here to avoid hitting AWS API rate limits. + return 35 +} + +// Nuke - nuke 'em all!!! +func (r CloudWatchLogGroups) Nuke(session *session.Session, identifiers []string) error { + if err := nukeAllCloudWatchLogGroups(session, awsgo.StringSlice(identifiers)); err != nil { + return errors.WithStackTrace(err) + } + + return nil +} diff --git a/config/config.go b/config/config.go index b1867537..a51a02bc 100644 --- a/config/config.go +++ b/config/config.go @@ -26,6 +26,7 @@ type Config struct { Elasticache ResourceType `yaml:"Elasticache"` VPC ResourceType `yaml:"VPC"` OIDCProvider ResourceType `yaml:"OIDCProvider"` + CloudWatchLogGroup ResourceType `yaml:"CloudWatchLogGroup"` } type ResourceType struct { diff --git a/config/config_test.go b/config/config_test.go index cf9949ab..61b62b16 100644 --- a/config/config_test.go +++ b/config/config_test.go @@ -27,6 +27,7 @@ func emptyConfig() *Config { ResourceType{FilterRule{}, FilterRule{}}, ResourceType{FilterRule{}, FilterRule{}}, ResourceType{FilterRule{}, FilterRule{}}, + ResourceType{FilterRule{}, FilterRule{}}, } }