From 4477fd287e97f7e0a8c629286ffaf76e110b4e35 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Taveira=20Ara=C3=BAjo?= Date: Wed, 22 May 2024 13:42:58 -0700 Subject: [PATCH] fix(metricstream): set recommended filter by omission Previously we were collecting all metrics by omission. This commit instead loads a recommended set of filters from a file on disk. The file is kept in sync with the filter we provide for cloudformation. We could have loaded the filter dynamically directly from the hosted URL, but that would be at odds with the idempotent nature of terraform modules. Since we already have to update the binary versions of lambda functions, we can piggyback on that process to mirror the file. --- .github/workflows/update-deps.yaml | 3 + Makefile | 2 + modules/metricstream/README.md | 4 +- modules/metricstream/filters/recommended.yaml | 268 ++++++++++++++++++ modules/metricstream/main.tf | 13 +- .../tests/metricstream.tftest.hcl | 7 + modules/metricstream/variables.tf | 11 +- utilities/update-filters.sh | 13 + 8 files changed, 308 insertions(+), 13 deletions(-) create mode 100644 modules/metricstream/filters/recommended.yaml create mode 100755 utilities/update-filters.sh diff --git a/.github/workflows/update-deps.yaml b/.github/workflows/update-deps.yaml index de94539..b67305e 100644 --- a/.github/workflows/update-deps.yaml +++ b/.github/workflows/update-deps.yaml @@ -41,6 +41,9 @@ jobs: - name: Update binaries run: make update-binaries + - name: Update filters + run: make update-filters + # Use Peter Evans Pull Request Action to create a pull request - name: Create Pull Request uses: peter-evans/create-pull-request@v6 diff --git a/Makefile b/Makefile index 754be6d..faa796c 100644 --- a/Makefile +++ b/Makefile @@ -12,3 +12,5 @@ update-binaries-forwarder: update-binaries: update-binaries-logwriter update-binaries-forwarder +update-filters: utilities/update-filters.sh + diff --git a/modules/metricstream/README.md b/modules/metricstream/README.md index deda0c7..2daef60 100644 --- a/modules/metricstream/README.md +++ b/modules/metricstream/README.md @@ -62,8 +62,8 @@ No modules. | [bucket\_arn](#input\_bucket\_arn) | S3 Bucket ARN to write log records to. | `string` | n/a | yes | | [buffering\_interval](#input\_buffering\_interval) | Buffer incoming data for the specified period of time, in seconds, before
delivering it to S3. | `number` | `60` | no | | [buffering\_size](#input\_buffering\_size) | Buffer incoming data to the specified size, in MiBs, before delivering it
to S3. | `number` | `1` | no | -| [exclude\_filters](#input\_exclude\_filters) | List of exclusion filters. Mutually exclusive with inclusion filters |
list(object({
namespace = string
metric_names = list(string)
}))
| `[]` | no | -| [include\_filters](#input\_include\_filters) | List of inclusion filters. |
list(object({
namespace = string
metric_names = list(string)
}))
| `[]` | no | +| [exclude\_filters](#input\_exclude\_filters) | List of exclusion filters. Mutually exclusive with inclusion filters. |
list(object({
namespace = string
metric_names = list(string)
}))
| `null` | no | +| [include\_filters](#input\_include\_filters) | List of inclusion filters. If neither include\_filters or exclude\_filters is
set, a default filter will be used. |
list(object({
namespace = string
metric_names = list(string)
}))
| `null` | no | | [name](#input\_name) | Name for resources. | `string` | n/a | yes | | [output\_format](#input\_output\_format) | The output format for CloudWatch Metrics. | `string` | `"json"` | no | | [prefix](#input\_prefix) | Optional prefix to write log records to. | `string` | `""` | no | diff --git a/modules/metricstream/filters/recommended.yaml b/modules/metricstream/filters/recommended.yaml new file mode 100644 index 0000000..fe986a9 --- /dev/null +++ b/modules/metricstream/filters/recommended.yaml @@ -0,0 +1,268 @@ +ExcludeFilters: + - Namespace: AWS/RDS + # https://docs.datadoghq.com/integrations/amazon_rds/ + MetricNames: + - AbortedClients + # - ActiveTransactions + # - AuroraBinlogReplicaLag + - AuroraDMLRejectedMasterFull + - AuroraDMLRejectedWriterFull + - AuroraEstimatedSharedMemoryBytes + # - AuroraReplicaLag + # - AuroraReplicaLagMaximum + # - AuroraReplicaLagMinimum + - AuroraSlowConnectionHandleCount + - AuroraSlowHandshakeCount + - AuroraVolumeBytesLeftTotal + - Aurora_pq_request_attempted + - Aurora_pq_request_executed + - Aurora_pq_request_failed + - Aurora_pq_request_in_progress + - Aurora_pq_request_not_chosen + - Aurora_pq_request_not_chosen_below_min_rows + - Aurora_pq_request_not_chosen_column_bit + - Aurora_pq_request_not_chosen_column_geometry + - Aurora_pq_request_not_chosen_column_lob + - Aurora_pq_request_not_chosen_column_virtual + - Aurora_pq_request_not_chosen_custom_charset + - Aurora_pq_request_not_chosen_fast_ddl + - Aurora_pq_request_not_chosen_few_pages_outside_buffer_pool + - Aurora_pq_request_not_chosen_full_text_index + - Aurora_pq_request_not_chosen_high_buffer_pool_pct + - Aurora_pq_request_not_chosen_index_hint + - Aurora_pq_request_not_chosen_innodb_table_format + - Aurora_pq_request_not_chosen_instant_ddl + - Aurora_pq_request_not_chosen_long_trx + - Aurora_pq_request_not_chosen_no_where_clause + - Aurora_pq_request_not_chosen_range_scan + - Aurora_pq_request_not_chosen_row_length_too_long + - Aurora_pq_request_not_chosen_small_table + - Aurora_pq_request_not_chosen_temporary_table + - Aurora_pq_request_not_chosen_tx_isolation + - Aurora_pq_request_not_chosen_unsupported_access + - Aurora_pq_request_not_chosen_unsupported_storage_type + - Aurora_pq_request_not_chosen_update_delete_stmts + - Aurora_pq_request_throttled + - AvailabilityPercentage + # - BackupRetentionPeriodStorageUsed + # - BinLogDiskUsage + # - BlockedTransactions + # - BufferCacheHitRatio + # - BurstBalance + - CheckpointLag + - ClientConnections + - ClientConnectionsClosed + - ClientConnectionsNoTLS + - ClientConnectionsReceived + - ClientConnectionsSetupSucceeded + - ClientConnectionsTLS + # - CommitLatency + # - CommitThroughput + - ConnectionAttempts + # - CommitThroughput + # - CPUCreditBalance + # - CPUCreditUsage + # - CPUSurplusCreditBalance + # - CPUSurplusCreditsCharged + # - CPUUtilization + # - DBLoad + # - DBLoadCPU + # - DBLoadNonCPU + # - DDLLatency + # - DDLThroughput + - DatabaseConnectionRequests + - DatabaseConnectionRequestsWithTLS + - DatabaseConnections + - DatabaseConnectionsBorrowLatency + - DatabaseConnectionsCurrentlyBorrowed + - DatabaseConnectionsCurrentlyInTransaction + - DatabaseConnectionsCurrentlySessionPinned + - DatabaseConnectionsSetupSucceeded + - DatabaseConnectionsWithTLS + # - Deadlocks + # - DeleteLatency + # - DeleteThroughput + # - DiskQueueDepth + # - DMLLatency + # - DMLThroughput + - EBSByteBalance% + - EBSIOBalance% + # - EngineUptime + - ForwardingMasterDMLLatency + - ForwardingMasterDMLThroughput + - ForwardingMasterOpenSessions + - ForwardingReplicaDMLLatency + - ForwardingReplicaDMLThroughput + - ForwardingReplicaOpenSessions + - ForwardingReplicaReadWaitLatency + - ForwardingReplicaReadWaitThroughput + - ForwardingReplicaSelectLatency + - ForwardingReplicaSelectThroughput + - ForwardingWriterDMLLatency + - ForwardingWriterDMLThroughput + - ForwardingWriterOpenSessions + # - FreeLocalStorage + # - FreeStorageSpace + # - FreeableMemory + # - InsertLatency + # - InsertThroughput + # - LoginFailures + - MaxDatabaseConnectionsAllowed + # - MaximumUsedTransactionIDs + # - NetworkReceiveThroughput + # - NetworkThroughput + # - NetworkTransmitThroughput + - NumBinaryLogFiles + # - OldestReplicationSlotLag + - PurgeBoundary + - PurgeFinishedPoint + # - Queries + - QueryDatabaseResponseLatency + - QueryRequests + - QueryRequestsTLS + - QueryResponseLatency + - RDSToAuroraPostgreSQLReplicaLag + # - ReadIOPS + # - ReadLatency + # - ReadThroughput + # - ReplicationSlotDiskUsage + # - ResultSetCacheHitRatio + - RollbackSegmentHistoryListLength + - RowLockTime + # - SelectLatency + # - SelectThroughput + - StorageNetworkReceiveThroughput + - StorageNetworkThroughput + - StorageNetworkTransmitThroughput + - SumBinaryLogSize + # - SwapUsage + # - TotalBackupStorageBilled + # - TransactionLogsDiskUsage + # - TransactionLogsGeneration + - TruncateFinishedPoint + # - UpdateLatency + # - UpdateThroughput + # - VolumeBytesUsed + # - VolumeReadIOPs + # - VolumeWriteIOPs + # - WriteIOPS + # - WriteLatency + # - WriteThroughput + - Namespace: AWS/ApplicationELB + # https://docs.datadoghq.com/integrations/amazon_elb/#metrics + MetricNames: + # - ActiveConnectionCount + - AnomalousHostCount + # - ClientTLSNegotiationErrorCount + # - ConsumedLCUs + - DesyncMitigationMode_NonCompliant_Request_Count + - ForwardedInvalidHeaderRequestCount + # - HealthyHostCount + - HealthyStateDNS + - HealthyStateRouting + - HTTPCode_ELB_3XX_Count + # - HTTPCode_ELB_4XX_Count + # - HTTPCode_ELB_502_Count + # - HTTPCode_ELB_503_Count + # - HTTPCode_ELB_504_Count + # - HTTPCode_ELB_5XX_Count + # - HTTPCode_Target_2XX_Count + # - HTTPCode_Target_3XX_Count + # - HTTPCode_Target_4XX_Count + # - HTTPCode_Target_5XX_Count + # - HTTP_Redirect_Count + - MitigatedHostCount + # - NewConnectionCount + # - ProcessedBytes + # - RequestCount + # - RequestCountPerTarget + # - TargetResponseTime + # - UnHealthyHostCount + - UnhealthyStateDNS + - UnhealthyStateRouting + - Namespace: AWS/AmazonMQ + # https://docs.datadoghq.com/integrations/amazon_mq/ + MetricNames: + # - AckRate + # - ChannelCount + # - ConfirmRate + # - ConnectionCount + # - ConsumerCount + # - ExchangeCount + # - MessageCount + # - MessageReadyCount + # - MessageUnacknowledgedCount + # - PublishRate + # - QueueCount + # - RabbitMQDiskFree + # - RabbitMQDiskFreeLimit + # - RabbitMQFdUsed + - RabbitMQIOReadAverageTime + - RabbitMQIOWriteAverageTime + # - RabbitMQMemLimit + # - RabbitMQMemUsed + # - SystemCpuUtilization + - Namespace: AWS/ElastiCache + # https://docs.datadoghq.com/integrations/amazon_elasticache/ + MetricNames: + # - ActiveDefragHits + - AuthenticationFailures + # - BytesUsedForCache + # - CacheHitRate + # - CacheHits + # - CacheMisses + - ChannelAuthorizationFailures + - CommandAuthorizationFailures + # - CPUCreditBalance + # - CPUCreditUsage + # - CPUUtilization + # - CurrConnections + # - CurrItems + - CurrVolatileItems + - DatabaseCapacityUsageCountedForEvictPercentage + - DatabaseCapacityUsagePercentage + - DatabaseMemoryUsageCountedForEvictPercentage + # - DatabaseMemoryUsagePercentage + # - DB0AverageTTL + # - EngineCPUUtilization + # - EvalBasedCmds + # - Evictions + # - FreeableMemory + # - GetTypeCmds + # - GetTypeCmdsLatency + # - HashBasedCmds + - IamAuthenticationExpirations + - IamAuthenticationThrottling + # - IsMaster + - KeyAuthorizationFailures + # - KeyBasedCmds + # - KeyBasedCmdsLatency + - KeysTracked + # - ListBasedCmds + - ListBasedCmdsLatency + # - MasterLinkHealthStatus + # - MemoryFragmentationRatio + - NetworkBandwidthInAllowanceExceeded + - NetworkBandwidthOutAllowanceExceeded + # - NetworkBytesIn + # - NetworkBytesOut + - NetworkConntrackAllowanceExceeded + - NetworkMaxBytesIn + - NetworkMaxBytesOut + - NetworkMaxPacketsIn + - NetworkMaxPacketsOut + # - NetworkPacketsIn + # - NetworkPacketsOut + - NetworkPacketsPerSecondAllowanceExceeded + # - Reclaimed + # - ReplicationBytes + # - ReplicationLag + # - SaveInProgress + # - SetTypeCmds + # - SetTypeCmdsLatency + # - SortedSetBasedCmds + # - SortedSetBasedCmdsLatency + # - StringBasedCmds + # - StringBasedCmdsLatency + # - SwapUsage + - TrafficManagementActive diff --git a/modules/metricstream/main.tf b/modules/metricstream/main.tf index b669c86..7a724b5 100644 --- a/modules/metricstream/main.tf +++ b/modules/metricstream/main.tf @@ -1,9 +1,12 @@ locals { - account_id = data.aws_caller_identity.current.account_id - region = data.aws_region.current.name - name_prefix = "${substr(var.name, 0, 37)}-" - include_filters = var.include_filters - exclude_filters = var.exclude_filters + account_id = data.aws_caller_identity.current.account_id + region = data.aws_region.current.name + name_prefix = "${substr(var.name, 0, 37)}-" + recommended_filters = yamldecode(file("${path.module}/filters/recommended.yaml")) + use_recommended = var.include_filters == null && var.exclude_filters == null + # must convert from cloudformation CamelCase to terraform snake_case when falling back to recommended filter + include_filters = local.use_recommended ? try([for v in local.recommended_filters["IncludeFilters"] : { namespace = v.Namespace, metric_names = v.MetricNames }], []) : var.include_filters + exclude_filters = var.exclude_filters != null ? try([for v in local.recommended_filters["ExcludeFilters"] : { namespace = v.Namespace, metric_names = v.MetricNames }], []) : var.exclude_filters } data "aws_caller_identity" "current" {} diff --git a/modules/metricstream/tests/metricstream.tftest.hcl b/modules/metricstream/tests/metricstream.tftest.hcl index a609f45..42f477b 100644 --- a/modules/metricstream/tests/metricstream.tftest.hcl +++ b/modules/metricstream/tests/metricstream.tftest.hcl @@ -15,6 +15,13 @@ run "create_bucket" { } run "install" { + variables { + name = run.setup.id + bucket_arn = run.create_bucket.arn + } +} + +run "update_filters" { variables { name = run.setup.id bucket_arn = run.create_bucket.arn diff --git a/modules/metricstream/variables.tf b/modules/metricstream/variables.tf index 85da4de..e37aa2c 100644 --- a/modules/metricstream/variables.tf +++ b/modules/metricstream/variables.tf @@ -40,26 +40,25 @@ variable "output_format" { variable "include_filters" { description = <<-EOF - List of inclusion filters. + List of inclusion filters. If neither include_filters or exclude_filters is + set, a default filter will be used. EOF type = list(object({ namespace = string metric_names = list(string) })) - default = [] - nullable = false + default = null } variable "exclude_filters" { description = <<-EOF - List of exclusion filters. Mutually exclusive with inclusion filters + List of exclusion filters. Mutually exclusive with inclusion filters. EOF type = list(object({ namespace = string metric_names = list(string) })) - default = [] - nullable = false + default = null } variable "buffering_interval" { diff --git a/utilities/update-filters.sh b/utilities/update-filters.sh new file mode 100755 index 0000000..e8232b3 --- /dev/null +++ b/utilities/update-filters.sh @@ -0,0 +1,13 @@ +#!/bin/bash +set -euo pipefail + +DIE() { echo "$*" 1>&2; exit 1; } + +BUCKET=${BUCKET:-observeinc} + +FILTERS=recommended.yaml + +for FILTER in ${FILTERS}; do \ + curl -s https://${BUCKET}.s3.amazonaws.com/cloudwatchmetrics/filters/${FILTER} > modules/metricstream/filters/${FILTER} +done; +