From d1d2b018a1d6534af7f2b2e5125dafaaddb276a5 Mon Sep 17 00:00:00 2001 From: MakoWish <26614684+MakoWish@users.noreply.github.com> Date: Thu, 20 Jul 2023 19:33:58 +0300 Subject: [PATCH] [nats] Ensure event.kind is correctly set for pipeline errors --- packages/aws/_dev/build/docs/emr.md | 13 +- packages/aws/changelog.yml | 5 + .../_dev/test/pipeline/test-common-config.yml | 7 + .../_dev/test/pipeline/test-emr-hadoop.log | 92 + .../test-emr-hadoop.log-expected.json | 1526 +++++++++++++++++ .../agent/stream/aws-cloudwatch.yml.hbs | 100 ++ .../emr_logs/agent/stream/aws-s3.yml.hbs | 78 + .../elasticsearch/ingest_pipeline/default.yml | 62 + .../aws/data_stream/emr_logs/fields/agent.yml | 35 + .../emr_logs/fields/base-fields.yml | 20 + .../aws/data_stream/emr_logs/fields/ecs.yml | 60 + .../data_stream/emr_logs/fields/fields.yml | 28 + .../aws/data_stream/emr_logs/manifest.yml | 232 +++ .../data_stream/emr_logs/sample_event.json | 24 + packages/aws/docs/emr.md | 90 +- packages/aws/manifest.yml | 11 +- .../elasticsearch/ingest_pipeline/default.yml | 5 +- 17 files changed, 2383 insertions(+), 5 deletions(-) create mode 100644 packages/aws/data_stream/emr_logs/_dev/test/pipeline/test-common-config.yml create mode 100644 packages/aws/data_stream/emr_logs/_dev/test/pipeline/test-emr-hadoop.log create mode 100644 packages/aws/data_stream/emr_logs/_dev/test/pipeline/test-emr-hadoop.log-expected.json create mode 100644 packages/aws/data_stream/emr_logs/agent/stream/aws-cloudwatch.yml.hbs create mode 100644 packages/aws/data_stream/emr_logs/agent/stream/aws-s3.yml.hbs create mode 100644 packages/aws/data_stream/emr_logs/elasticsearch/ingest_pipeline/default.yml create mode 100644 packages/aws/data_stream/emr_logs/fields/agent.yml create mode 100644 packages/aws/data_stream/emr_logs/fields/base-fields.yml create mode 100644 packages/aws/data_stream/emr_logs/fields/ecs.yml create mode 100644 packages/aws/data_stream/emr_logs/fields/fields.yml create mode 100644 packages/aws/data_stream/emr_logs/manifest.yml create mode 100644 packages/aws/data_stream/emr_logs/sample_event.json diff --git a/packages/aws/_dev/build/docs/emr.md b/packages/aws/_dev/build/docs/emr.md index 831b28f6489..4c88fa8b1d5 100644 --- a/packages/aws/_dev/build/docs/emr.md +++ b/packages/aws/_dev/build/docs/emr.md @@ -10,11 +10,14 @@ For example, you could use this data to track Amazon EMR cluster progress and cl ## Data streams -The Amazon EMR integration collects one type of data: metrics. +The Amazon EMR integration collects two types of data: metrics and logs. **Metrics** give you insight into the state of Amazon EMR. The metrics collected by the Amazon EMR integration include cluster progress, cluster state, cluster or node storage, and more. See more details in the [Metrics reference](#metrics-reference) +**Logs** help you keep a record of events happening in Amazon EMR. +Logs collected by the Amazon EMR integration include the cluster status, node status details and more. + ## Requirements You need Elasticsearch for storing and searching your data and Kibana for visualizing and managing it. @@ -41,4 +44,10 @@ For step-by-step instructions on how to set up an integration, see the {{event "emr_metrics"}} -{{fields "emr_metrics"}} \ No newline at end of file +{{fields "emr_metrics"}} + +## Logs reference + +{{event "emr_logs"}} + +{{fields "emr_logs"}} \ No newline at end of file diff --git a/packages/aws/changelog.yml b/packages/aws/changelog.yml index b31db9c42af..82421a79dad 100644 --- a/packages/aws/changelog.yml +++ b/packages/aws/changelog.yml @@ -1,4 +1,9 @@ # newer versions go on top +- version: "1.50.0" + changes: + - description: Add EMR logs data stream. + type: enhancement + link: https://github.com/elastic/integrations/pull/6895 - version: "1.49.0" changes: - description: Add API Gateway logs datastream diff --git a/packages/aws/data_stream/emr_logs/_dev/test/pipeline/test-common-config.yml b/packages/aws/data_stream/emr_logs/_dev/test/pipeline/test-common-config.yml new file mode 100644 index 00000000000..7780f71c7be --- /dev/null +++ b/packages/aws/data_stream/emr_logs/_dev/test/pipeline/test-common-config.yml @@ -0,0 +1,7 @@ +fields: + tags: + - preserve_original_event +multiline: + first_line_pattern: '^[0-9]' + negate: true + match: after \ No newline at end of file diff --git a/packages/aws/data_stream/emr_logs/_dev/test/pipeline/test-emr-hadoop.log b/packages/aws/data_stream/emr_logs/_dev/test/pipeline/test-emr-hadoop.log new file mode 100644 index 00000000000..c4c3d07ded6 --- /dev/null +++ b/packages/aws/data_stream/emr_logs/_dev/test/pipeline/test-emr-hadoop.log @@ -0,0 +1,92 @@ +2023-06-26 13:45:49,685 INFO namenode.NameNode: STARTUP_MSG: +/************************************************************ +STARTUP_MSG: Starting NameNode +STARTUP_MSG: host = ip-172-31-25-102.eu-central-1.compute.internal/172.31.25.102 +STARTUP_MSG: args = [-format, -nonInteractive] +STARTUP_MSG: version = 3.3.3-amzn-3 +STARTUP_MSG: classpath = /etc/hadoop/conf:/usr/lib/hadoop/lib/jetty-security-9.4.48.v20220622.jar:/usr/lib/hadoop/lib/accessors-smart-2.4.7.jar:/usr/lib/hadoop/lib/jersey-core-1.19.jar:/usr/lib/hadoop/lib/animal-sniffer-annotations-1.17.jar +STARTUP_MSG: build = Unknown -r Unknown; compiled by 'release' on 2023-05-31T03:49Z +STARTUP_MSG: java = 1.8.0_372 +************************************************************/ +2023-06-26 13:45:49,697 INFO namenode.NameNode: registered UNIX signal handlers for [TERM, HUP, INT] +2023-06-26 13:45:49,823 INFO namenode.NameNode: createNameNode [-format, -nonInteractive] +2023-06-26 13:45:50,318 INFO common.Util: Assuming 'file' scheme for path /mnt/namenode in configuration. +2023-06-26 13:45:50,319 INFO common.Util: Assuming 'file' scheme for path /mnt1/namenode in configuration. +2023-06-26 13:45:50,319 INFO common.Util: Assuming 'file' scheme for path /mnt/namenode in configuration. +2023-06-26 13:45:50,319 INFO common.Util: Assuming 'file' scheme for path /mnt1/namenode in configuration. +2023-06-26 13:45:50,330 INFO namenode.NameNode: Formatting using clusterid: CID-1b3b14b6-5518-47c3-b981-e5cb6b0ce38c +2023-06-26 13:45:50,394 INFO namenode.FSEditLog (main): Edit logging is async:true +2023-06-26 13:45:50,484 INFO namenode.FSNamesystem: KeyProvider: KeyProviderCryptoExtension: org.apache.hadoop.crypto.key.kms.LoadBalancingKMSClientProvider@a530d0a +2023-06-26 13:45:50,486 INFO namenode.FSNamesystem: fsLock is fair: true +2023-06-26 13:45:50,486 INFO namenode.FSNamesystem: Detailed lock hold time metrics enabled: false +2023-06-26 13:45:50,492 INFO namenode.FSNamesystem: fsOwner = hdfs (auth:SIMPLE) +2023-06-26 13:45:50,493 INFO namenode.FSNamesystem: supergroup = hdfsadmingroup +2023-06-26 13:45:50,493 INFO namenode.FSNamesystem: isPermissionEnabled = true +2023-06-26 13:45:50,493 INFO namenode.FSNamesystem: isStoragePolicyEnabled = true +2023-06-26 13:45:50,493 INFO namenode.FSNamesystem: HA Enabled: false +2023-06-26 13:45:50,566 INFO common.Util: dfs.datanode.fileio.profiling.sampling.percentage set to 0. Disabling file IO profiling +2023-06-26 13:45:50,601 INFO blockmanagement.DatanodeManager: dfs.block.invalidate.limit: configured=1000, counted=60, effected=1000 +2023-06-26 13:45:50,601 INFO blockmanagement.DatanodeManager: dfs.namenode.datanode.registration.ip-hostname-check=true +2023-06-26 13:45:50,610 INFO blockmanagement.BlockManager: dfs.namenode.startup.delay.block.deletion.sec is set to 000:00:00:00.000 +2023-06-26 13:45:50,611 INFO blockmanagement.BlockManager: The block deletion will start around 2023 Jun 26 13:45:50 +2023-06-26 13:45:50,612 INFO util.GSet: Computing capacity for map BlocksMap +2023-06-26 13:45:50,613 INFO util.GSet: VM type = 64-bit +2023-06-26 13:45:50,614 INFO util.GSet: 2.0% max memory 864 MB = 17.3 MB +2023-06-26 13:45:50,614 INFO util.GSet: capacity = 2^21 = 2097152 entries +2023-06-26 13:45:50,623 INFO blockmanagement.BlockManager: Storage policy satisfier is disabled +2023-06-26 13:45:50,623 INFO blockmanagement.BlockManager: dfs.block.access.token.enable = false +2023-06-26 13:45:50,631 INFO blockmanagement.BlockManagerSafeMode: dfs.namenode.safemode.threshold-pct = 0.999 +2023-06-26 13:45:50,631 INFO blockmanagement.BlockManagerSafeMode: dfs.namenode.safemode.min.datanodes = 0 +2023-06-26 13:45:50,631 INFO blockmanagement.BlockManagerSafeMode: dfs.namenode.safemode.extension = 5000 +2023-06-26 13:45:50,632 INFO blockmanagement.BlockManager: defaultReplication = 1 +2023-06-26 13:45:50,632 INFO blockmanagement.BlockManager: maxReplication = 512 +2023-06-26 13:45:50,632 INFO blockmanagement.BlockManager: minReplication = 1 +2023-06-26 13:45:50,632 INFO blockmanagement.BlockManager: maxReplicationStreams = 100 +2023-06-26 13:45:50,633 INFO blockmanagement.BlockManager: redundancyRecheckInterval = 3000ms +2023-06-26 13:45:50,633 INFO blockmanagement.BlockManager: encryptDataTransfer = false +2023-06-26 13:45:50,633 INFO blockmanagement.BlockManager: maxNumBlocksToLog = 1000 +2023-06-26 13:45:50,670 INFO namenode.FSDirectory: GLOBAL serial map: bits=29 maxEntries=536870911 +2023-06-26 13:45:50,671 INFO namenode.FSDirectory: USER serial map: bits=24 maxEntries=16777215 +2023-06-26 13:45:50,671 INFO namenode.FSDirectory: GROUP serial map: bits=24 maxEntries=16777215 +2023-06-26 13:45:50,671 INFO namenode.FSDirectory: XATTR serial map: bits=24 maxEntries=16777215 +2023-06-26 13:45:50,696 INFO util.GSet: Computing capacity for map INodeMap +2023-06-26 13:45:50,696 INFO util.GSet: VM type = 64-bit +2023-06-26 13:45:50,697 INFO util.GSet: 1.0% max memory 864 MB = 8.6 MB +2023-06-26 13:45:50,697 INFO util.GSet: capacity = 2^20 = 1048576 entries +2023-06-26 13:45:50,699 INFO namenode.FSDirectory: ACLs enabled? true +2023-06-26 13:45:50,699 INFO namenode.FSDirectory: POSIX ACL inheritance enabled? true +2023-06-26 13:45:50,699 INFO namenode.FSDirectory: XAttrs enabled? true +2023-06-26 13:45:50,700 INFO namenode.NameNode: Caching file names occurring more than 10 times +2023-06-26 13:45:50,708 INFO namenode.ReencryptionHandler: Configured throttleLimitHandlerRatio=1.0 for re-encryption +2023-06-26 13:45:50,717 INFO snapshot.SnapshotManager: Loaded config captureOpenFiles: false, skipCaptureAccessTimeOnlyChange: false, snapshotDiffAllowSnapRootDescendant: true, maxSnapshotLimit: 65536 +2023-06-26 13:45:50,720 INFO snapshot.SnapshotManager: SkipList is disabled +2023-06-26 13:45:50,748 INFO util.GSet: Computing capacity for map cachedBlocks +2023-06-26 13:45:50,748 INFO util.GSet: VM type = 64-bit +2023-06-26 13:45:50,748 INFO util.GSet: 0.25% max memory 864 MB = 2.2 MB +2023-06-26 13:45:50,749 INFO util.GSet: capacity = 2^18 = 262144 entries +2023-06-26 13:45:50,764 INFO metrics.TopMetrics: NNTop conf: dfs.namenode.top.window.num.buckets = 10 +2023-06-26 13:45:50,764 INFO metrics.TopMetrics: NNTop conf: dfs.namenode.top.num.users = 10 +2023-06-26 13:45:50,764 INFO metrics.TopMetrics: NNTop conf: dfs.namenode.top.windows.minutes = 1,5,25 +2023-06-26 13:45:50,768 INFO namenode.FSNamesystem: Retry cache on namenode is enabled +2023-06-26 13:45:50,768 INFO namenode.FSNamesystem: Retry cache will use 0.03 of total heap and retry cache entry expiry time is 600000 millis +2023-06-26 13:45:50,771 INFO util.GSet: Computing capacity for map NameNodeRetryCache +2023-06-26 13:45:50,771 INFO util.GSet: VM type = 64-bit +2023-06-26 13:45:50,771 INFO util.GSet: 0.029999999329447746% max memory 864 MB = 265.4 KB +2023-06-26 13:45:50,771 INFO util.GSet: capacity = 2^15 = 32768 entries +2023-06-26 13:45:50,774 INFO namenode.FSNamesystem: Removal of Expired Lease on Open Files is enabled +2023-06-26 13:45:50,811 INFO namenode.FSImage: Allocated new BlockPoolId: BP-1979673447-172.31.25.102-1687787150800 +2023-06-26 13:45:50,824 INFO common.Storage: Storage directory /mnt/namenode has been successfully formatted. +2023-06-26 13:45:50,826 INFO common.Storage: Storage directory /mnt1/namenode has been successfully formatted. +2023-06-26 13:45:50,868 INFO namenode.FSImageFormatProtobuf: Saving image file /mnt1/namenode/current/fsimage.ckpt_0000000000000000000 using no compression +2023-06-26 13:45:50,868 INFO namenode.FSImageFormatProtobuf: Saving image file /mnt/namenode/current/fsimage.ckpt_0000000000000000000 using no compression +2023-06-26 13:45:51,228 INFO namenode.FSImageFormatProtobuf: Image file /mnt1/namenode/current/fsimage.ckpt_0000000000000000000 of size 403 bytes saved in 0 seconds . +2023-06-26 13:45:51,230 INFO namenode.FSImageFormatProtobuf: Image file /mnt/namenode/current/fsimage.ckpt_0000000000000000000 of size 403 bytes saved in 0 seconds . +2023-06-26 13:45:51,244 INFO namenode.NNStorageRetentionManager: Going to retain 1 images with txid >= 0 +2023-06-26 13:45:51,301 INFO namenode.FSNamesystem: Stopping services started for active state +2023-06-26 13:45:51,301 INFO namenode.FSNamesystem: Stopping services started for standby state +2023-06-26 13:45:51,305 INFO namenode.FSImage: FSImageSaver clean checkpoint: txid=0 when meet shutdown. +2023-06-26 13:45:51,306 INFO namenode.FSImage: FSImageSaver clean checkpoint: txid=0 when meet shutdown. +2023-06-26 13:45:51,306 INFO namenode.NameNode: SHUTDOWN_MSG: +/************************************************************ +SHUTDOWN_MSG: Shutting down NameNode at ip-172-31-25-102.eu-central-1.compute.internal/172.31.25.102 +************************************************************/ \ No newline at end of file diff --git a/packages/aws/data_stream/emr_logs/_dev/test/pipeline/test-emr-hadoop.log-expected.json b/packages/aws/data_stream/emr_logs/_dev/test/pipeline/test-emr-hadoop.log-expected.json new file mode 100644 index 00000000000..25994a1ff2f --- /dev/null +++ b/packages/aws/data_stream/emr_logs/_dev/test/pipeline/test-emr-hadoop.log-expected.json @@ -0,0 +1,1526 @@ +{ + "expected": [ + { + "@timestamp": "2023-06-26T13:45:49.685Z", + "ecs": { + "version": "8.0.0" + }, + "event": { + "original": "2023-06-26 13:45:49,685 INFO namenode.NameNode: STARTUP_MSG: \n/************************************************************\nSTARTUP_MSG: Starting NameNode\nSTARTUP_MSG: host = ip-172-31-25-102.eu-central-1.compute.internal/172.31.25.102\nSTARTUP_MSG: args = [-format, -nonInteractive]\nSTARTUP_MSG: version = 3.3.3-amzn-3\nSTARTUP_MSG: classpath = /etc/hadoop/conf:/usr/lib/hadoop/lib/jetty-security-9.4.48.v20220622.jar:/usr/lib/hadoop/lib/accessors-smart-2.4.7.jar:/usr/lib/hadoop/lib/jersey-core-1.19.jar:/usr/lib/hadoop/lib/animal-sniffer-annotations-1.17.jar\nSTARTUP_MSG: build = Unknown -r Unknown; compiled by 'release' on 2023-05-31T03:49Z\nSTARTUP_MSG: java = 1.8.0_372\n************************************************************/" + }, + "log": { + "level": "INFO" + }, + "message": "STARTUP_MSG: ", + "process": { + "message": "/************************************************************\nSTARTUP_MSG: Starting NameNode\nSTARTUP_MSG: host = ip-172-31-25-102.eu-central-1.compute.internal/172.31.25.102\nSTARTUP_MSG: args = [-format, -nonInteractive]\nSTARTUP_MSG: version = 3.3.3-amzn-3\nSTARTUP_MSG: classpath = /etc/hadoop/conf:/usr/lib/hadoop/lib/jetty-security-9.4.48.v20220622.jar:/usr/lib/hadoop/lib/accessors-smart-2.4.7.jar:/usr/lib/hadoop/lib/jersey-core-1.19.jar:/usr/lib/hadoop/lib/animal-sniffer-annotations-1.17.jar\nSTARTUP_MSG: build = Unknown -r Unknown; compiled by 'release' on 2023-05-31T03:49Z\nSTARTUP_MSG: java = 1.8.0_372\n************************************************************/", + "name": "namenode.NameNode" + }, + "tags": [ + "preserve_original_event" + ] + }, + { + "@timestamp": "2023-06-26T13:45:49.697Z", + "ecs": { + "version": "8.0.0" + }, + "event": { + "original": "2023-06-26 13:45:49,697 INFO namenode.NameNode: registered UNIX signal handlers for [TERM, HUP, INT]" + }, + "log": { + "level": "INFO" + }, + "message": "registered UNIX signal handlers for [TERM, HUP, INT]", + "process": { + "name": "namenode.NameNode" + }, + "tags": [ + "preserve_original_event" + ] + }, + { + "@timestamp": "2023-06-26T13:45:49.823Z", + "ecs": { + "version": "8.0.0" + }, + "event": { + "original": "2023-06-26 13:45:49,823 INFO namenode.NameNode: createNameNode [-format, -nonInteractive]" + }, + "log": { + "level": "INFO" + }, + "message": "createNameNode [-format, -nonInteractive]", + "process": { + "name": "namenode.NameNode" + }, + "tags": [ + "preserve_original_event" + ] + }, + { + "@timestamp": "2023-06-26T13:45:50.318Z", + "ecs": { + "version": "8.0.0" + }, + "event": { + "original": "2023-06-26 13:45:50,318 INFO common.Util: Assuming 'file' scheme for path /mnt/namenode in configuration." + }, + "log": { + "level": "INFO" + }, + "message": "Assuming 'file' scheme for path /mnt/namenode in configuration.", + "process": { + "name": "common.Util" + }, + "tags": [ + "preserve_original_event" + ] + }, + { + "@timestamp": "2023-06-26T13:45:50.319Z", + "ecs": { + "version": "8.0.0" + }, + "event": { + "original": "2023-06-26 13:45:50,319 INFO common.Util: Assuming 'file' scheme for path /mnt1/namenode in configuration." + }, + "log": { + "level": "INFO" + }, + "message": "Assuming 'file' scheme for path /mnt1/namenode in configuration.", + "process": { + "name": "common.Util" + }, + "tags": [ + "preserve_original_event" + ] + }, + { + "@timestamp": "2023-06-26T13:45:50.319Z", + "ecs": { + "version": "8.0.0" + }, + "event": { + "original": "2023-06-26 13:45:50,319 INFO common.Util: Assuming 'file' scheme for path /mnt/namenode in configuration." + }, + "log": { + "level": "INFO" + }, + "message": "Assuming 'file' scheme for path /mnt/namenode in configuration.", + "process": { + "name": "common.Util" + }, + "tags": [ + "preserve_original_event" + ] + }, + { + "@timestamp": "2023-06-26T13:45:50.319Z", + "ecs": { + "version": "8.0.0" + }, + "event": { + "original": "2023-06-26 13:45:50,319 INFO common.Util: Assuming 'file' scheme for path /mnt1/namenode in configuration." + }, + "log": { + "level": "INFO" + }, + "message": "Assuming 'file' scheme for path /mnt1/namenode in configuration.", + "process": { + "name": "common.Util" + }, + "tags": [ + "preserve_original_event" + ] + }, + { + "@timestamp": "2023-06-26T13:45:50.330Z", + "ecs": { + "version": "8.0.0" + }, + "event": { + "original": "2023-06-26 13:45:50,330 INFO namenode.NameNode: Formatting using clusterid: CID-1b3b14b6-5518-47c3-b981-e5cb6b0ce38c" + }, + "log": { + "level": "INFO" + }, + "message": "Formatting using clusterid: CID-1b3b14b6-5518-47c3-b981-e5cb6b0ce38c", + "process": { + "name": "namenode.NameNode" + }, + "tags": [ + "preserve_original_event" + ] + }, + { + "@timestamp": "2023-06-26T13:45:50.394Z", + "ecs": { + "version": "8.0.0" + }, + "event": { + "original": "2023-06-26 13:45:50,394 INFO namenode.FSEditLog (main): Edit logging is async:true" + }, + "log": { + "level": "INFO" + }, + "message": "Edit logging is async:true", + "process": { + "name": "namenode.FSEditLog (main)" + }, + "tags": [ + "preserve_original_event" + ] + }, + { + "@timestamp": "2023-06-26T13:45:50.484Z", + "ecs": { + "version": "8.0.0" + }, + "event": { + "original": "2023-06-26 13:45:50,484 INFO namenode.FSNamesystem: KeyProvider: KeyProviderCryptoExtension: org.apache.hadoop.crypto.key.kms.LoadBalancingKMSClientProvider@a530d0a" + }, + "log": { + "level": "INFO" + }, + "message": "KeyProvider: KeyProviderCryptoExtension: org.apache.hadoop.crypto.key.kms.LoadBalancingKMSClientProvider@a530d0a", + "process": { + "name": "namenode.FSNamesystem" + }, + "tags": [ + "preserve_original_event" + ] + }, + { + "@timestamp": "2023-06-26T13:45:50.486Z", + "ecs": { + "version": "8.0.0" + }, + "event": { + "original": "2023-06-26 13:45:50,486 INFO namenode.FSNamesystem: fsLock is fair: true" + }, + "log": { + "level": "INFO" + }, + "message": "fsLock is fair: true", + "process": { + "name": "namenode.FSNamesystem" + }, + "tags": [ + "preserve_original_event" + ] + }, + { + "@timestamp": "2023-06-26T13:45:50.486Z", + "ecs": { + "version": "8.0.0" + }, + "event": { + "original": "2023-06-26 13:45:50,486 INFO namenode.FSNamesystem: Detailed lock hold time metrics enabled: false" + }, + "log": { + "level": "INFO" + }, + "message": "Detailed lock hold time metrics enabled: false", + "process": { + "name": "namenode.FSNamesystem" + }, + "tags": [ + "preserve_original_event" + ] + }, + { + "@timestamp": "2023-06-26T13:45:50.492Z", + "ecs": { + "version": "8.0.0" + }, + "event": { + "original": "2023-06-26 13:45:50,492 INFO namenode.FSNamesystem: fsOwner = hdfs (auth:SIMPLE)" + }, + "log": { + "level": "INFO" + }, + "message": "fsOwner = hdfs (auth:SIMPLE)", + "process": { + "name": "namenode.FSNamesystem" + }, + "tags": [ + "preserve_original_event" + ] + }, + { + "@timestamp": "2023-06-26T13:45:50.493Z", + "ecs": { + "version": "8.0.0" + }, + "event": { + "original": "2023-06-26 13:45:50,493 INFO namenode.FSNamesystem: supergroup = hdfsadmingroup" + }, + "log": { + "level": "INFO" + }, + "message": "supergroup = hdfsadmingroup", + "process": { + "name": "namenode.FSNamesystem" + }, + "tags": [ + "preserve_original_event" + ] + }, + { + "@timestamp": "2023-06-26T13:45:50.493Z", + "ecs": { + "version": "8.0.0" + }, + "event": { + "original": "2023-06-26 13:45:50,493 INFO namenode.FSNamesystem: isPermissionEnabled = true" + }, + "log": { + "level": "INFO" + }, + "message": "isPermissionEnabled = true", + "process": { + "name": "namenode.FSNamesystem" + }, + "tags": [ + "preserve_original_event" + ] + }, + { + "@timestamp": "2023-06-26T13:45:50.493Z", + "ecs": { + "version": "8.0.0" + }, + "event": { + "original": "2023-06-26 13:45:50,493 INFO namenode.FSNamesystem: isStoragePolicyEnabled = true" + }, + "log": { + "level": "INFO" + }, + "message": "isStoragePolicyEnabled = true", + "process": { + "name": "namenode.FSNamesystem" + }, + "tags": [ + "preserve_original_event" + ] + }, + { + "@timestamp": "2023-06-26T13:45:50.493Z", + "ecs": { + "version": "8.0.0" + }, + "event": { + "original": "2023-06-26 13:45:50,493 INFO namenode.FSNamesystem: HA Enabled: false" + }, + "log": { + "level": "INFO" + }, + "message": "HA Enabled: false", + "process": { + "name": "namenode.FSNamesystem" + }, + "tags": [ + "preserve_original_event" + ] + }, + { + "@timestamp": "2023-06-26T13:45:50.566Z", + "ecs": { + "version": "8.0.0" + }, + "event": { + "original": "2023-06-26 13:45:50,566 INFO common.Util: dfs.datanode.fileio.profiling.sampling.percentage set to 0. Disabling file IO profiling" + }, + "log": { + "level": "INFO" + }, + "message": "dfs.datanode.fileio.profiling.sampling.percentage set to 0. Disabling file IO profiling", + "process": { + "name": "common.Util" + }, + "tags": [ + "preserve_original_event" + ] + }, + { + "@timestamp": "2023-06-26T13:45:50.601Z", + "ecs": { + "version": "8.0.0" + }, + "event": { + "original": "2023-06-26 13:45:50,601 INFO blockmanagement.DatanodeManager: dfs.block.invalidate.limit: configured=1000, counted=60, effected=1000" + }, + "log": { + "level": "INFO" + }, + "message": "dfs.block.invalidate.limit: configured=1000, counted=60, effected=1000", + "process": { + "name": "blockmanagement.DatanodeManager" + }, + "tags": [ + "preserve_original_event" + ] + }, + { + "@timestamp": "2023-06-26T13:45:50.601Z", + "ecs": { + "version": "8.0.0" + }, + "event": { + "original": "2023-06-26 13:45:50,601 INFO blockmanagement.DatanodeManager: dfs.namenode.datanode.registration.ip-hostname-check=true" + }, + "log": { + "level": "INFO" + }, + "message": "dfs.namenode.datanode.registration.ip-hostname-check=true", + "process": { + "name": "blockmanagement.DatanodeManager" + }, + "tags": [ + "preserve_original_event" + ] + }, + { + "@timestamp": "2023-06-26T13:45:50.610Z", + "ecs": { + "version": "8.0.0" + }, + "event": { + "original": "2023-06-26 13:45:50,610 INFO blockmanagement.BlockManager: dfs.namenode.startup.delay.block.deletion.sec is set to 000:00:00:00.000" + }, + "log": { + "level": "INFO" + }, + "message": "dfs.namenode.startup.delay.block.deletion.sec is set to 000:00:00:00.000", + "process": { + "name": "blockmanagement.BlockManager" + }, + "tags": [ + "preserve_original_event" + ] + }, + { + "@timestamp": "2023-06-26T13:45:50.611Z", + "ecs": { + "version": "8.0.0" + }, + "event": { + "original": "2023-06-26 13:45:50,611 INFO blockmanagement.BlockManager: The block deletion will start around 2023 Jun 26 13:45:50" + }, + "log": { + "level": "INFO" + }, + "message": "The block deletion will start around 2023 Jun 26 13:45:50", + "process": { + "name": "blockmanagement.BlockManager" + }, + "tags": [ + "preserve_original_event" + ] + }, + { + "@timestamp": "2023-06-26T13:45:50.612Z", + "ecs": { + "version": "8.0.0" + }, + "event": { + "original": "2023-06-26 13:45:50,612 INFO util.GSet: Computing capacity for map BlocksMap" + }, + "log": { + "level": "INFO" + }, + "message": "Computing capacity for map BlocksMap", + "process": { + "name": "util.GSet" + }, + "tags": [ + "preserve_original_event" + ] + }, + { + "@timestamp": "2023-06-26T13:45:50.613Z", + "ecs": { + "version": "8.0.0" + }, + "event": { + "original": "2023-06-26 13:45:50,613 INFO util.GSet: VM type = 64-bit" + }, + "log": { + "level": "INFO" + }, + "message": "VM type = 64-bit", + "process": { + "name": "util.GSet" + }, + "tags": [ + "preserve_original_event" + ] + }, + { + "@timestamp": "2023-06-26T13:45:50.614Z", + "ecs": { + "version": "8.0.0" + }, + "event": { + "original": "2023-06-26 13:45:50,614 INFO util.GSet: 2.0% max memory 864 MB = 17.3 MB" + }, + "log": { + "level": "INFO" + }, + "message": "2.0% max memory 864 MB = 17.3 MB", + "process": { + "name": "util.GSet" + }, + "tags": [ + "preserve_original_event" + ] + }, + { + "@timestamp": "2023-06-26T13:45:50.614Z", + "ecs": { + "version": "8.0.0" + }, + "event": { + "original": "2023-06-26 13:45:50,614 INFO util.GSet: capacity = 2^21 = 2097152 entries" + }, + "log": { + "level": "INFO" + }, + "message": "capacity = 2^21 = 2097152 entries", + "process": { + "name": "util.GSet" + }, + "tags": [ + "preserve_original_event" + ] + }, + { + "@timestamp": "2023-06-26T13:45:50.623Z", + "ecs": { + "version": "8.0.0" + }, + "event": { + "original": "2023-06-26 13:45:50,623 INFO blockmanagement.BlockManager: Storage policy satisfier is disabled" + }, + "log": { + "level": "INFO" + }, + "message": "Storage policy satisfier is disabled", + "process": { + "name": "blockmanagement.BlockManager" + }, + "tags": [ + "preserve_original_event" + ] + }, + { + "@timestamp": "2023-06-26T13:45:50.623Z", + "ecs": { + "version": "8.0.0" + }, + "event": { + "original": "2023-06-26 13:45:50,623 INFO blockmanagement.BlockManager: dfs.block.access.token.enable = false" + }, + "log": { + "level": "INFO" + }, + "message": "dfs.block.access.token.enable = false", + "process": { + "name": "blockmanagement.BlockManager" + }, + "tags": [ + "preserve_original_event" + ] + }, + { + "@timestamp": "2023-06-26T13:45:50.631Z", + "ecs": { + "version": "8.0.0" + }, + "event": { + "original": "2023-06-26 13:45:50,631 INFO blockmanagement.BlockManagerSafeMode: dfs.namenode.safemode.threshold-pct = 0.999" + }, + "log": { + "level": "INFO" + }, + "message": "dfs.namenode.safemode.threshold-pct = 0.999", + "process": { + "name": "blockmanagement.BlockManagerSafeMode" + }, + "tags": [ + "preserve_original_event" + ] + }, + { + "@timestamp": "2023-06-26T13:45:50.631Z", + "ecs": { + "version": "8.0.0" + }, + "event": { + "original": "2023-06-26 13:45:50,631 INFO blockmanagement.BlockManagerSafeMode: dfs.namenode.safemode.min.datanodes = 0" + }, + "log": { + "level": "INFO" + }, + "message": "dfs.namenode.safemode.min.datanodes = 0", + "process": { + "name": "blockmanagement.BlockManagerSafeMode" + }, + "tags": [ + "preserve_original_event" + ] + }, + { + "@timestamp": "2023-06-26T13:45:50.631Z", + "ecs": { + "version": "8.0.0" + }, + "event": { + "original": "2023-06-26 13:45:50,631 INFO blockmanagement.BlockManagerSafeMode: dfs.namenode.safemode.extension = 5000" + }, + "log": { + "level": "INFO" + }, + "message": "dfs.namenode.safemode.extension = 5000", + "process": { + "name": "blockmanagement.BlockManagerSafeMode" + }, + "tags": [ + "preserve_original_event" + ] + }, + { + "@timestamp": "2023-06-26T13:45:50.632Z", + "ecs": { + "version": "8.0.0" + }, + "event": { + "original": "2023-06-26 13:45:50,632 INFO blockmanagement.BlockManager: defaultReplication = 1" + }, + "log": { + "level": "INFO" + }, + "message": "defaultReplication = 1", + "process": { + "name": "blockmanagement.BlockManager" + }, + "tags": [ + "preserve_original_event" + ] + }, + { + "@timestamp": "2023-06-26T13:45:50.632Z", + "ecs": { + "version": "8.0.0" + }, + "event": { + "original": "2023-06-26 13:45:50,632 INFO blockmanagement.BlockManager: maxReplication = 512" + }, + "log": { + "level": "INFO" + }, + "message": "maxReplication = 512", + "process": { + "name": "blockmanagement.BlockManager" + }, + "tags": [ + "preserve_original_event" + ] + }, + { + "@timestamp": "2023-06-26T13:45:50.632Z", + "ecs": { + "version": "8.0.0" + }, + "event": { + "original": "2023-06-26 13:45:50,632 INFO blockmanagement.BlockManager: minReplication = 1" + }, + "log": { + "level": "INFO" + }, + "message": "minReplication = 1", + "process": { + "name": "blockmanagement.BlockManager" + }, + "tags": [ + "preserve_original_event" + ] + }, + { + "@timestamp": "2023-06-26T13:45:50.632Z", + "ecs": { + "version": "8.0.0" + }, + "event": { + "original": "2023-06-26 13:45:50,632 INFO blockmanagement.BlockManager: maxReplicationStreams = 100" + }, + "log": { + "level": "INFO" + }, + "message": "maxReplicationStreams = 100", + "process": { + "name": "blockmanagement.BlockManager" + }, + "tags": [ + "preserve_original_event" + ] + }, + { + "@timestamp": "2023-06-26T13:45:50.633Z", + "ecs": { + "version": "8.0.0" + }, + "event": { + "original": "2023-06-26 13:45:50,633 INFO blockmanagement.BlockManager: redundancyRecheckInterval = 3000ms" + }, + "log": { + "level": "INFO" + }, + "message": "redundancyRecheckInterval = 3000ms", + "process": { + "name": "blockmanagement.BlockManager" + }, + "tags": [ + "preserve_original_event" + ] + }, + { + "@timestamp": "2023-06-26T13:45:50.633Z", + "ecs": { + "version": "8.0.0" + }, + "event": { + "original": "2023-06-26 13:45:50,633 INFO blockmanagement.BlockManager: encryptDataTransfer = false" + }, + "log": { + "level": "INFO" + }, + "message": "encryptDataTransfer = false", + "process": { + "name": "blockmanagement.BlockManager" + }, + "tags": [ + "preserve_original_event" + ] + }, + { + "@timestamp": "2023-06-26T13:45:50.633Z", + "ecs": { + "version": "8.0.0" + }, + "event": { + "original": "2023-06-26 13:45:50,633 INFO blockmanagement.BlockManager: maxNumBlocksToLog = 1000" + }, + "log": { + "level": "INFO" + }, + "message": "maxNumBlocksToLog = 1000", + "process": { + "name": "blockmanagement.BlockManager" + }, + "tags": [ + "preserve_original_event" + ] + }, + { + "@timestamp": "2023-06-26T13:45:50.670Z", + "ecs": { + "version": "8.0.0" + }, + "event": { + "original": "2023-06-26 13:45:50,670 INFO namenode.FSDirectory: GLOBAL serial map: bits=29 maxEntries=536870911" + }, + "log": { + "level": "INFO" + }, + "message": "GLOBAL serial map: bits=29 maxEntries=536870911", + "process": { + "name": "namenode.FSDirectory" + }, + "tags": [ + "preserve_original_event" + ] + }, + { + "@timestamp": "2023-06-26T13:45:50.671Z", + "ecs": { + "version": "8.0.0" + }, + "event": { + "original": "2023-06-26 13:45:50,671 INFO namenode.FSDirectory: USER serial map: bits=24 maxEntries=16777215" + }, + "log": { + "level": "INFO" + }, + "message": "USER serial map: bits=24 maxEntries=16777215", + "process": { + "name": "namenode.FSDirectory" + }, + "tags": [ + "preserve_original_event" + ] + }, + { + "@timestamp": "2023-06-26T13:45:50.671Z", + "ecs": { + "version": "8.0.0" + }, + "event": { + "original": "2023-06-26 13:45:50,671 INFO namenode.FSDirectory: GROUP serial map: bits=24 maxEntries=16777215" + }, + "log": { + "level": "INFO" + }, + "message": "GROUP serial map: bits=24 maxEntries=16777215", + "process": { + "name": "namenode.FSDirectory" + }, + "tags": [ + "preserve_original_event" + ] + }, + { + "@timestamp": "2023-06-26T13:45:50.671Z", + "ecs": { + "version": "8.0.0" + }, + "event": { + "original": "2023-06-26 13:45:50,671 INFO namenode.FSDirectory: XATTR serial map: bits=24 maxEntries=16777215" + }, + "log": { + "level": "INFO" + }, + "message": "XATTR serial map: bits=24 maxEntries=16777215", + "process": { + "name": "namenode.FSDirectory" + }, + "tags": [ + "preserve_original_event" + ] + }, + { + "@timestamp": "2023-06-26T13:45:50.696Z", + "ecs": { + "version": "8.0.0" + }, + "event": { + "original": "2023-06-26 13:45:50,696 INFO util.GSet: Computing capacity for map INodeMap" + }, + "log": { + "level": "INFO" + }, + "message": "Computing capacity for map INodeMap", + "process": { + "name": "util.GSet" + }, + "tags": [ + "preserve_original_event" + ] + }, + { + "@timestamp": "2023-06-26T13:45:50.696Z", + "ecs": { + "version": "8.0.0" + }, + "event": { + "original": "2023-06-26 13:45:50,696 INFO util.GSet: VM type = 64-bit" + }, + "log": { + "level": "INFO" + }, + "message": "VM type = 64-bit", + "process": { + "name": "util.GSet" + }, + "tags": [ + "preserve_original_event" + ] + }, + { + "@timestamp": "2023-06-26T13:45:50.697Z", + "ecs": { + "version": "8.0.0" + }, + "event": { + "original": "2023-06-26 13:45:50,697 INFO util.GSet: 1.0% max memory 864 MB = 8.6 MB" + }, + "log": { + "level": "INFO" + }, + "message": "1.0% max memory 864 MB = 8.6 MB", + "process": { + "name": "util.GSet" + }, + "tags": [ + "preserve_original_event" + ] + }, + { + "@timestamp": "2023-06-26T13:45:50.697Z", + "ecs": { + "version": "8.0.0" + }, + "event": { + "original": "2023-06-26 13:45:50,697 INFO util.GSet: capacity = 2^20 = 1048576 entries" + }, + "log": { + "level": "INFO" + }, + "message": "capacity = 2^20 = 1048576 entries", + "process": { + "name": "util.GSet" + }, + "tags": [ + "preserve_original_event" + ] + }, + { + "@timestamp": "2023-06-26T13:45:50.699Z", + "ecs": { + "version": "8.0.0" + }, + "event": { + "original": "2023-06-26 13:45:50,699 INFO namenode.FSDirectory: ACLs enabled? true" + }, + "log": { + "level": "INFO" + }, + "message": "ACLs enabled? true", + "process": { + "name": "namenode.FSDirectory" + }, + "tags": [ + "preserve_original_event" + ] + }, + { + "@timestamp": "2023-06-26T13:45:50.699Z", + "ecs": { + "version": "8.0.0" + }, + "event": { + "original": "2023-06-26 13:45:50,699 INFO namenode.FSDirectory: POSIX ACL inheritance enabled? true" + }, + "log": { + "level": "INFO" + }, + "message": "POSIX ACL inheritance enabled? true", + "process": { + "name": "namenode.FSDirectory" + }, + "tags": [ + "preserve_original_event" + ] + }, + { + "@timestamp": "2023-06-26T13:45:50.699Z", + "ecs": { + "version": "8.0.0" + }, + "event": { + "original": "2023-06-26 13:45:50,699 INFO namenode.FSDirectory: XAttrs enabled? true" + }, + "log": { + "level": "INFO" + }, + "message": "XAttrs enabled? true", + "process": { + "name": "namenode.FSDirectory" + }, + "tags": [ + "preserve_original_event" + ] + }, + { + "@timestamp": "2023-06-26T13:45:50.700Z", + "ecs": { + "version": "8.0.0" + }, + "event": { + "original": "2023-06-26 13:45:50,700 INFO namenode.NameNode: Caching file names occurring more than 10 times" + }, + "log": { + "level": "INFO" + }, + "message": "Caching file names occurring more than 10 times", + "process": { + "name": "namenode.NameNode" + }, + "tags": [ + "preserve_original_event" + ] + }, + { + "@timestamp": "2023-06-26T13:45:50.708Z", + "ecs": { + "version": "8.0.0" + }, + "event": { + "original": "2023-06-26 13:45:50,708 INFO namenode.ReencryptionHandler: Configured throttleLimitHandlerRatio=1.0 for re-encryption" + }, + "log": { + "level": "INFO" + }, + "message": "Configured throttleLimitHandlerRatio=1.0 for re-encryption", + "process": { + "name": "namenode.ReencryptionHandler" + }, + "tags": [ + "preserve_original_event" + ] + }, + { + "@timestamp": "2023-06-26T13:45:50.717Z", + "ecs": { + "version": "8.0.0" + }, + "event": { + "original": "2023-06-26 13:45:50,717 INFO snapshot.SnapshotManager: Loaded config captureOpenFiles: false, skipCaptureAccessTimeOnlyChange: false, snapshotDiffAllowSnapRootDescendant: true, maxSnapshotLimit: 65536" + }, + "log": { + "level": "INFO" + }, + "message": "Loaded config captureOpenFiles: false, skipCaptureAccessTimeOnlyChange: false, snapshotDiffAllowSnapRootDescendant: true, maxSnapshotLimit: 65536", + "process": { + "name": "snapshot.SnapshotManager" + }, + "tags": [ + "preserve_original_event" + ] + }, + { + "@timestamp": "2023-06-26T13:45:50.720Z", + "ecs": { + "version": "8.0.0" + }, + "event": { + "original": "2023-06-26 13:45:50,720 INFO snapshot.SnapshotManager: SkipList is disabled" + }, + "log": { + "level": "INFO" + }, + "message": "SkipList is disabled", + "process": { + "name": "snapshot.SnapshotManager" + }, + "tags": [ + "preserve_original_event" + ] + }, + { + "@timestamp": "2023-06-26T13:45:50.748Z", + "ecs": { + "version": "8.0.0" + }, + "event": { + "original": "2023-06-26 13:45:50,748 INFO util.GSet: Computing capacity for map cachedBlocks" + }, + "log": { + "level": "INFO" + }, + "message": "Computing capacity for map cachedBlocks", + "process": { + "name": "util.GSet" + }, + "tags": [ + "preserve_original_event" + ] + }, + { + "@timestamp": "2023-06-26T13:45:50.748Z", + "ecs": { + "version": "8.0.0" + }, + "event": { + "original": "2023-06-26 13:45:50,748 INFO util.GSet: VM type = 64-bit" + }, + "log": { + "level": "INFO" + }, + "message": "VM type = 64-bit", + "process": { + "name": "util.GSet" + }, + "tags": [ + "preserve_original_event" + ] + }, + { + "@timestamp": "2023-06-26T13:45:50.748Z", + "ecs": { + "version": "8.0.0" + }, + "event": { + "original": "2023-06-26 13:45:50,748 INFO util.GSet: 0.25% max memory 864 MB = 2.2 MB" + }, + "log": { + "level": "INFO" + }, + "message": "0.25% max memory 864 MB = 2.2 MB", + "process": { + "name": "util.GSet" + }, + "tags": [ + "preserve_original_event" + ] + }, + { + "@timestamp": "2023-06-26T13:45:50.749Z", + "ecs": { + "version": "8.0.0" + }, + "event": { + "original": "2023-06-26 13:45:50,749 INFO util.GSet: capacity = 2^18 = 262144 entries" + }, + "log": { + "level": "INFO" + }, + "message": "capacity = 2^18 = 262144 entries", + "process": { + "name": "util.GSet" + }, + "tags": [ + "preserve_original_event" + ] + }, + { + "@timestamp": "2023-06-26T13:45:50.764Z", + "ecs": { + "version": "8.0.0" + }, + "event": { + "original": "2023-06-26 13:45:50,764 INFO metrics.TopMetrics: NNTop conf: dfs.namenode.top.window.num.buckets = 10" + }, + "log": { + "level": "INFO" + }, + "message": "NNTop conf: dfs.namenode.top.window.num.buckets = 10", + "process": { + "name": "metrics.TopMetrics" + }, + "tags": [ + "preserve_original_event" + ] + }, + { + "@timestamp": "2023-06-26T13:45:50.764Z", + "ecs": { + "version": "8.0.0" + }, + "event": { + "original": "2023-06-26 13:45:50,764 INFO metrics.TopMetrics: NNTop conf: dfs.namenode.top.num.users = 10" + }, + "log": { + "level": "INFO" + }, + "message": "NNTop conf: dfs.namenode.top.num.users = 10", + "process": { + "name": "metrics.TopMetrics" + }, + "tags": [ + "preserve_original_event" + ] + }, + { + "@timestamp": "2023-06-26T13:45:50.764Z", + "ecs": { + "version": "8.0.0" + }, + "event": { + "original": "2023-06-26 13:45:50,764 INFO metrics.TopMetrics: NNTop conf: dfs.namenode.top.windows.minutes = 1,5,25" + }, + "log": { + "level": "INFO" + }, + "message": "NNTop conf: dfs.namenode.top.windows.minutes = 1,5,25", + "process": { + "name": "metrics.TopMetrics" + }, + "tags": [ + "preserve_original_event" + ] + }, + { + "@timestamp": "2023-06-26T13:45:50.768Z", + "ecs": { + "version": "8.0.0" + }, + "event": { + "original": "2023-06-26 13:45:50,768 INFO namenode.FSNamesystem: Retry cache on namenode is enabled" + }, + "log": { + "level": "INFO" + }, + "message": "Retry cache on namenode is enabled", + "process": { + "name": "namenode.FSNamesystem" + }, + "tags": [ + "preserve_original_event" + ] + }, + { + "@timestamp": "2023-06-26T13:45:50.768Z", + "ecs": { + "version": "8.0.0" + }, + "event": { + "original": "2023-06-26 13:45:50,768 INFO namenode.FSNamesystem: Retry cache will use 0.03 of total heap and retry cache entry expiry time is 600000 millis" + }, + "log": { + "level": "INFO" + }, + "message": "Retry cache will use 0.03 of total heap and retry cache entry expiry time is 600000 millis", + "process": { + "name": "namenode.FSNamesystem" + }, + "tags": [ + "preserve_original_event" + ] + }, + { + "@timestamp": "2023-06-26T13:45:50.771Z", + "ecs": { + "version": "8.0.0" + }, + "event": { + "original": "2023-06-26 13:45:50,771 INFO util.GSet: Computing capacity for map NameNodeRetryCache" + }, + "log": { + "level": "INFO" + }, + "message": "Computing capacity for map NameNodeRetryCache", + "process": { + "name": "util.GSet" + }, + "tags": [ + "preserve_original_event" + ] + }, + { + "@timestamp": "2023-06-26T13:45:50.771Z", + "ecs": { + "version": "8.0.0" + }, + "event": { + "original": "2023-06-26 13:45:50,771 INFO util.GSet: VM type = 64-bit" + }, + "log": { + "level": "INFO" + }, + "message": "VM type = 64-bit", + "process": { + "name": "util.GSet" + }, + "tags": [ + "preserve_original_event" + ] + }, + { + "@timestamp": "2023-06-26T13:45:50.771Z", + "ecs": { + "version": "8.0.0" + }, + "event": { + "original": "2023-06-26 13:45:50,771 INFO util.GSet: 0.029999999329447746% max memory 864 MB = 265.4 KB" + }, + "log": { + "level": "INFO" + }, + "message": "0.029999999329447746% max memory 864 MB = 265.4 KB", + "process": { + "name": "util.GSet" + }, + "tags": [ + "preserve_original_event" + ] + }, + { + "@timestamp": "2023-06-26T13:45:50.771Z", + "ecs": { + "version": "8.0.0" + }, + "event": { + "original": "2023-06-26 13:45:50,771 INFO util.GSet: capacity = 2^15 = 32768 entries" + }, + "log": { + "level": "INFO" + }, + "message": "capacity = 2^15 = 32768 entries", + "process": { + "name": "util.GSet" + }, + "tags": [ + "preserve_original_event" + ] + }, + { + "@timestamp": "2023-06-26T13:45:50.774Z", + "ecs": { + "version": "8.0.0" + }, + "event": { + "original": "2023-06-26 13:45:50,774 INFO namenode.FSNamesystem: Removal of Expired Lease on Open Files is enabled" + }, + "log": { + "level": "INFO" + }, + "message": "Removal of Expired Lease on Open Files is enabled", + "process": { + "name": "namenode.FSNamesystem" + }, + "tags": [ + "preserve_original_event" + ] + }, + { + "@timestamp": "2023-06-26T13:45:50.811Z", + "ecs": { + "version": "8.0.0" + }, + "event": { + "original": "2023-06-26 13:45:50,811 INFO namenode.FSImage: Allocated new BlockPoolId: BP-1979673447-172.31.25.102-1687787150800" + }, + "log": { + "level": "INFO" + }, + "message": "Allocated new BlockPoolId: BP-1979673447-172.31.25.102-1687787150800", + "process": { + "name": "namenode.FSImage" + }, + "tags": [ + "preserve_original_event" + ] + }, + { + "@timestamp": "2023-06-26T13:45:50.824Z", + "ecs": { + "version": "8.0.0" + }, + "event": { + "original": "2023-06-26 13:45:50,824 INFO common.Storage: Storage directory /mnt/namenode has been successfully formatted." + }, + "log": { + "level": "INFO" + }, + "message": "Storage directory /mnt/namenode has been successfully formatted.", + "process": { + "name": "common.Storage" + }, + "tags": [ + "preserve_original_event" + ] + }, + { + "@timestamp": "2023-06-26T13:45:50.826Z", + "ecs": { + "version": "8.0.0" + }, + "event": { + "original": "2023-06-26 13:45:50,826 INFO common.Storage: Storage directory /mnt1/namenode has been successfully formatted." + }, + "log": { + "level": "INFO" + }, + "message": "Storage directory /mnt1/namenode has been successfully formatted.", + "process": { + "name": "common.Storage" + }, + "tags": [ + "preserve_original_event" + ] + }, + { + "@timestamp": "2023-06-26T13:45:50.868Z", + "ecs": { + "version": "8.0.0" + }, + "event": { + "original": "2023-06-26 13:45:50,868 INFO namenode.FSImageFormatProtobuf: Saving image file /mnt1/namenode/current/fsimage.ckpt_0000000000000000000 using no compression" + }, + "log": { + "level": "INFO" + }, + "message": "Saving image file /mnt1/namenode/current/fsimage.ckpt_0000000000000000000 using no compression", + "process": { + "name": "namenode.FSImageFormatProtobuf" + }, + "tags": [ + "preserve_original_event" + ] + }, + { + "@timestamp": "2023-06-26T13:45:50.868Z", + "ecs": { + "version": "8.0.0" + }, + "event": { + "original": "2023-06-26 13:45:50,868 INFO namenode.FSImageFormatProtobuf: Saving image file /mnt/namenode/current/fsimage.ckpt_0000000000000000000 using no compression" + }, + "log": { + "level": "INFO" + }, + "message": "Saving image file /mnt/namenode/current/fsimage.ckpt_0000000000000000000 using no compression", + "process": { + "name": "namenode.FSImageFormatProtobuf" + }, + "tags": [ + "preserve_original_event" + ] + }, + { + "@timestamp": "2023-06-26T13:45:51.228Z", + "ecs": { + "version": "8.0.0" + }, + "event": { + "original": "2023-06-26 13:45:51,228 INFO namenode.FSImageFormatProtobuf: Image file /mnt1/namenode/current/fsimage.ckpt_0000000000000000000 of size 403 bytes saved in 0 seconds ." + }, + "log": { + "level": "INFO" + }, + "message": "Image file /mnt1/namenode/current/fsimage.ckpt_0000000000000000000 of size 403 bytes saved in 0 seconds .", + "process": { + "name": "namenode.FSImageFormatProtobuf" + }, + "tags": [ + "preserve_original_event" + ] + }, + { + "@timestamp": "2023-06-26T13:45:51.230Z", + "ecs": { + "version": "8.0.0" + }, + "event": { + "original": "2023-06-26 13:45:51,230 INFO namenode.FSImageFormatProtobuf: Image file /mnt/namenode/current/fsimage.ckpt_0000000000000000000 of size 403 bytes saved in 0 seconds ." + }, + "log": { + "level": "INFO" + }, + "message": "Image file /mnt/namenode/current/fsimage.ckpt_0000000000000000000 of size 403 bytes saved in 0 seconds .", + "process": { + "name": "namenode.FSImageFormatProtobuf" + }, + "tags": [ + "preserve_original_event" + ] + }, + { + "@timestamp": "2023-06-26T13:45:51.244Z", + "ecs": { + "version": "8.0.0" + }, + "event": { + "original": "2023-06-26 13:45:51,244 INFO namenode.NNStorageRetentionManager: Going to retain 1 images with txid \u003e= 0" + }, + "log": { + "level": "INFO" + }, + "message": "Going to retain 1 images with txid \u003e= 0", + "process": { + "name": "namenode.NNStorageRetentionManager" + }, + "tags": [ + "preserve_original_event" + ] + }, + { + "@timestamp": "2023-06-26T13:45:51.301Z", + "ecs": { + "version": "8.0.0" + }, + "event": { + "original": "2023-06-26 13:45:51,301 INFO namenode.FSNamesystem: Stopping services started for active state" + }, + "log": { + "level": "INFO" + }, + "message": "Stopping services started for active state", + "process": { + "name": "namenode.FSNamesystem" + }, + "tags": [ + "preserve_original_event" + ] + }, + { + "@timestamp": "2023-06-26T13:45:51.301Z", + "ecs": { + "version": "8.0.0" + }, + "event": { + "original": "2023-06-26 13:45:51,301 INFO namenode.FSNamesystem: Stopping services started for standby state" + }, + "log": { + "level": "INFO" + }, + "message": "Stopping services started for standby state", + "process": { + "name": "namenode.FSNamesystem" + }, + "tags": [ + "preserve_original_event" + ] + }, + { + "@timestamp": "2023-06-26T13:45:51.305Z", + "ecs": { + "version": "8.0.0" + }, + "event": { + "original": "2023-06-26 13:45:51,305 INFO namenode.FSImage: FSImageSaver clean checkpoint: txid=0 when meet shutdown." + }, + "log": { + "level": "INFO" + }, + "message": "FSImageSaver clean checkpoint: txid=0 when meet shutdown.", + "process": { + "name": "namenode.FSImage" + }, + "tags": [ + "preserve_original_event" + ] + }, + { + "@timestamp": "2023-06-26T13:45:51.306Z", + "ecs": { + "version": "8.0.0" + }, + "event": { + "original": "2023-06-26 13:45:51,306 INFO namenode.FSImage: FSImageSaver clean checkpoint: txid=0 when meet shutdown." + }, + "log": { + "level": "INFO" + }, + "message": "FSImageSaver clean checkpoint: txid=0 when meet shutdown.", + "process": { + "name": "namenode.FSImage" + }, + "tags": [ + "preserve_original_event" + ] + }, + { + "@timestamp": "2023-06-26T13:45:51.306Z", + "ecs": { + "version": "8.0.0" + }, + "event": { + "original": "2023-06-26 13:45:51,306 INFO namenode.NameNode: SHUTDOWN_MSG: \n/************************************************************\nSHUTDOWN_MSG: Shutting down NameNode at ip-172-31-25-102.eu-central-1.compute.internal/172.31.25.102\n************************************************************/" + }, + "log": { + "level": "INFO" + }, + "message": "SHUTDOWN_MSG: ", + "process": { + "message": "/************************************************************\nSHUTDOWN_MSG: Shutting down NameNode at ip-172-31-25-102.eu-central-1.compute.internal/172.31.25.102\n************************************************************/", + "name": "namenode.NameNode" + }, + "tags": [ + "preserve_original_event" + ] + } + ] +} \ No newline at end of file diff --git a/packages/aws/data_stream/emr_logs/agent/stream/aws-cloudwatch.yml.hbs b/packages/aws/data_stream/emr_logs/agent/stream/aws-cloudwatch.yml.hbs new file mode 100644 index 00000000000..934f97a44c3 --- /dev/null +++ b/packages/aws/data_stream/emr_logs/agent/stream/aws-cloudwatch.yml.hbs @@ -0,0 +1,100 @@ +{{#unless log_group_name}} +{{#unless log_group_name_prefix}} +{{#if log_group_arn }} +log_group_arn: {{ log_group_arn }} +{{/if}} +{{/unless}} +{{/unless}} + +{{#unless log_group_arn}} +{{#unless log_group_name}} +{{#if log_group_name_prefix }} +log_group_name_prefix: {{ log_group_name_prefix }} +{{/if}} +{{/unless}} +{{/unless}} + +{{#unless log_group_arn}} +{{#unless log_group_name_prefix}} +{{#if log_group_name }} +log_group_name: {{ log_group_name }} +{{/if}} +{{/unless}} +{{/unless}} + +{{#unless log_group_arn}} +region_name: {{ region_name }} +{{/unless}} + +{{#unless log_stream_prefix}} +{{#if log_streams }} +log_streams: {{ log_streams }} +{{/if}} +{{/unless}} + +{{#unless log_streams}} +{{#if log_stream_prefix }} +log_stream_prefix: {{ log_stream_prefix }} +{{/if}} +{{/unless}} + +{{#if start_position }} +start_position: {{ start_position }} +{{/if}} + +{{#if scan_frequency }} +scan_frequency: {{ scan_frequency }} +{{/if}} + +{{#if api_sleep }} +api_sleep: {{ api_sleep }} +{{/if}} + +{{#if latency }} +latency: {{ latency }} +{{/if}} +{{#if number_of_workers }} +number_of_workers: {{ number_of_workers }} +{{/if}} + +{{#if credential_profile_name}} +credential_profile_name: {{credential_profile_name}} +{{/if}} +{{#if shared_credential_file}} +shared_credential_file: {{shared_credential_file}} +{{/if}} +{{#if api_timeout}} +api_timeout: {{api_timeout}} +{{/if}} +{{#if default_region}} +default_region: {{default_region}} +{{/if}} +{{#if access_key_id}} +access_key_id: {{access_key_id}} +{{/if}} +{{#if secret_access_key}} +secret_access_key: {{secret_access_key}} +{{/if}} +{{#if session_token}} +session_token: {{session_token}} +{{/if}} +{{#if role_arn}} +role_arn: {{role_arn}} +{{/if}} +{{#if proxy_url }} +proxy_url: {{proxy_url}} +{{/if}} +tags: +{{#if preserve_original_event}} + - preserve_original_event +{{/if}} +{{#each tags as |tag i|}} + - {{tag}} +{{/each}} +{{#contains "forwarded" tags}} +publisher_pipeline.disable_host: true +{{/contains}} +{{#if processors}} +processors: +{{processors}} +{{/if}} diff --git a/packages/aws/data_stream/emr_logs/agent/stream/aws-s3.yml.hbs b/packages/aws/data_stream/emr_logs/agent/stream/aws-s3.yml.hbs new file mode 100644 index 00000000000..c479d4b6dd0 --- /dev/null +++ b/packages/aws/data_stream/emr_logs/agent/stream/aws-s3.yml.hbs @@ -0,0 +1,78 @@ +{{#if collect_s3_logs}} + +{{#if bucket_arn}} +bucket_arn: {{bucket_arn}} +{{/if}} +{{#if number_of_workers}} +number_of_workers: {{number_of_workers}} +{{/if}} +{{#if interval}} +bucket_list_interval: {{interval}} +{{/if}} +{{#if bucket_list_prefix}} +bucket_list_prefix: {{bucket_list_prefix}} +{{/if}} + +{{else}} + +{{#if queue_url}} +queue_url: {{queue_url}} +{{/if}} +{{#if visibility_timeout}} +visibility_timeout: {{visibility_timeout}} +{{/if}} +{{#if api_timeout}} +api_timeout: {{api_timeout}} +{{/if}} +{{#if max_number_of_messages}} +max_number_of_messages: {{max_number_of_messages}} +{{/if}} +{{#if file_selectors}} +file_selectors: +{{file_selectors}} +{{/if}} + +{{/if}} + + +{{#if default_region}} +default_region: {{default_region}} +{{/if}} +{{#if credential_profile_name}} +credential_profile_name: {{credential_profile_name}} +{{/if}} +{{#if shared_credential_file}} +shared_credential_file: {{shared_credential_file}} +{{/if}} +{{#if access_key_id}} +access_key_id: {{access_key_id}} +{{/if}} +{{#if secret_access_key}} +secret_access_key: {{secret_access_key}} +{{/if}} +{{#if session_token}} +session_token: {{session_token}} +{{/if}} +{{#if role_arn}} +role_arn: {{role_arn}} +{{/if}} +{{#if fips_enabled}} +fips_enabled: {{fips_enabled}} +{{/if}} +{{#if proxy_url }} +proxy_url: {{proxy_url}} +{{/if}} +tags: +{{#if preserve_original_event}} + - preserve_original_event +{{/if}} +{{#each tags as |tag i|}} + - {{tag}} +{{/each}} +{{#contains "forwarded" tags}} +publisher_pipeline.disable_host: true +{{/contains}} +{{#if processors}} +processors: +{{processors}} +{{/if}} \ No newline at end of file diff --git a/packages/aws/data_stream/emr_logs/elasticsearch/ingest_pipeline/default.yml b/packages/aws/data_stream/emr_logs/elasticsearch/ingest_pipeline/default.yml new file mode 100644 index 00000000000..55b24bc86c4 --- /dev/null +++ b/packages/aws/data_stream/emr_logs/elasticsearch/ingest_pipeline/default.yml @@ -0,0 +1,62 @@ +--- +description: "Pipeline for EMR logs" +processors: + - set: + field: ecs.version + value: '8.0.0' + - rename: + field: message + target_field: event.original + ignore_missing: true + if: 'ctx.event?.original == null' + description: 'Renames the original `message` field to `event.original` to store a copy of the original message. The `event.original` field is not touched if the document already has one; it may happen when Logstash sends the document.' + - remove: + field: message + ignore_missing: true + if: 'ctx.event?.original != null' + description: 'The `message` field is no longer required if the document has an `event.original` field.' + - grok: + field: event.original + pattern_definitions: + GREEDYMULTILINE: "(.|\\n)*" + patterns: + - '%{TIMESTAMP_ISO8601:_tmp.timestamp}%{SPACE}%{LOGLEVEL:log.level}%{SPACE}%{DATA:process.name}(?:\\[%{GREEDYDATA:process.entrypoint}\\])?:%{SPACE}%{GREEDYDATA:message}%{SPACE}%{GREEDYMULTILINE:process.message}' + ignore_missing: true + - date: + field: _tmp.timestamp + target_field: '@timestamp' + ignore_failure: true + formats: + - ISO8601 + - yyyy-MM-dd HH:mm:ss,SSS + - remove: + field: + - _tmp + ignore_missing: true + - script: + description: Drops null/empty values recursively + lang: painless + ignore_failure: true + source: | + boolean drop(Object o) { + if (o == null || o == "") { + return true; + } else if (o instanceof Map) { + ((Map) o).values().removeIf(v -> drop(v)); + return (((Map) o).size() == 0); + } else if (o instanceof List) { + ((List) o).removeIf(v -> drop(v)); + return (((List) o).length == 0); + } + return false; + } + drop(ctx); + - remove: + field: event.original + if: "ctx?.tags == null || !(ctx.tags.contains('preserve_original_event'))" + ignore_failure: true + ignore_missing: true +on_failure: + - set: + field: 'error.message' + value: '{{ _ingest.on_failure_message }}' \ No newline at end of file diff --git a/packages/aws/data_stream/emr_logs/fields/agent.yml b/packages/aws/data_stream/emr_logs/fields/agent.yml new file mode 100644 index 00000000000..13e13903cc4 --- /dev/null +++ b/packages/aws/data_stream/emr_logs/fields/agent.yml @@ -0,0 +1,35 @@ +- name: cloud + title: Cloud + group: 2 + description: Fields related to the cloud or infrastructure the events are coming from. + footnote: 'Examples: If Metricbeat is running on an EC2 host and fetches data from its host, the cloud info contains the data about this machine. If Metricbeat runs on a remote machine outside the cloud and fetches data from a service running in the cloud, the field contains cloud data from the machine the service is running on.' + type: group + fields: + - name: image.id + type: keyword + description: Image ID for the cloud instance. +- name: host + title: Host + group: 2 + description: 'A host is defined as a general computing instance. + + ECS host.* fields should be populated with details about the host on which the event happened, or from which the measurement was taken. Host types include hardware, virtual machines, Docker containers, and Kubernetes nodes.' + type: group + fields: + - name: containerized + type: boolean + description: > + If the host is a container. + + - name: os.build + type: keyword + example: "18D109" + description: > + OS build information. + + - name: os.codename + type: keyword + example: "stretch" + description: > + OS codename, if any. + diff --git a/packages/aws/data_stream/emr_logs/fields/base-fields.yml b/packages/aws/data_stream/emr_logs/fields/base-fields.yml new file mode 100644 index 00000000000..ae928a4e452 --- /dev/null +++ b/packages/aws/data_stream/emr_logs/fields/base-fields.yml @@ -0,0 +1,20 @@ +- name: data_stream.type + type: constant_keyword + description: Data stream type. +- name: data_stream.dataset + type: constant_keyword + description: Data stream dataset. +- name: data_stream.namespace + type: constant_keyword + description: Data stream namespace. +- name: '@timestamp' + type: date + description: Event timestamp. +- name: event.module + type: constant_keyword + description: Event module + value: aws +- name: event.dataset + type: constant_keyword + description: Event dataset + value: aws.emr_logs diff --git a/packages/aws/data_stream/emr_logs/fields/ecs.yml b/packages/aws/data_stream/emr_logs/fields/ecs.yml new file mode 100644 index 00000000000..249a3bb663a --- /dev/null +++ b/packages/aws/data_stream/emr_logs/fields/ecs.yml @@ -0,0 +1,60 @@ +- external: ecs + name: ecs.version +- external: ecs + name: error.message +- external: ecs + name: message +- external: ecs + name: tags +- external: ecs + name: host.architecture +- external: ecs + name: host.domain +- external: ecs + name: host.hostname +- external: ecs + name: host.id +- external: ecs + name: host.ip +- external: ecs + name: host.mac +- external: ecs + name: host.name +- external: ecs + name: host.os.family +- external: ecs + name: host.os.kernel +- external: ecs + name: host.os.name +- external: ecs + name: host.os.platform +- external: ecs + name: host.os.version +- external: ecs + name: host.type +- external: ecs + name: cloud.account.id +- external: ecs + name: cloud.availability_zone +- external: ecs + name: cloud.instance.id +- external: ecs + name: cloud.instance.name +- external: ecs + name: cloud.machine.type +- external: ecs + name: cloud.provider +- external: ecs + name: cloud.region +- external: ecs + name: cloud.project.id +- external: ecs + name: container.id +- external: ecs + name: container.image.name +- external: ecs + name: container.labels +- external: ecs + name: container.name +- external: ecs + name: log.level \ No newline at end of file diff --git a/packages/aws/data_stream/emr_logs/fields/fields.yml b/packages/aws/data_stream/emr_logs/fields/fields.yml new file mode 100644 index 00000000000..d25df0d5191 --- /dev/null +++ b/packages/aws/data_stream/emr_logs/fields/fields.yml @@ -0,0 +1,28 @@ +- name: aws.s3 + type: group + fields: + - name: bucket.name + type: keyword + description: | + Name of a S3 bucket. + - name: bucket.arn + type: keyword + description: | + ARN of the S3 bucket that this log retrieved from. + - name: object.key + type: keyword + description: | + Name of the S3 object that this log retrieved from. + - name: metadata + type: flattened + description: | + AWS S3 object metadata values. +- name: process.name + type: keyword + description: Process name. +- name: process.entrypoint + type: keyword + description: Process entrypoint. +- name: process.message + type: keyword + description: Process message. \ No newline at end of file diff --git a/packages/aws/data_stream/emr_logs/manifest.yml b/packages/aws/data_stream/emr_logs/manifest.yml new file mode 100644 index 00000000000..68dade863cc --- /dev/null +++ b/packages/aws/data_stream/emr_logs/manifest.yml @@ -0,0 +1,232 @@ +title: AWS EMR logs +type: logs +streams: + - input: aws-s3 + template_path: aws-s3.yml.hbs + title: AWS EMR Logs via S3 + description: Collect AWS EMR logs using s3 input + enabled: true + vars: + - name: collect_s3_logs + required: true + show_user: true + title: Collect logs via S3 Bucket + description: To Collect logs via S3 bucket enable the toggle switch. + type: bool + multi: false + default: true + - name: bucket_arn + type: text + title: "[S3] Bucket ARN" + multi: false + required: false + show_user: true + description: Mandatory if the "Collect logs via S3 Bucket" switch is on. It is a required parameter for collecting logs via the AWS S3 Bucket. + - name: queue_url + type: text + title: "[SQS] Queue URL" + multi: false + required: false + show_user: true + default: false + description: Mandatory if the "Collect logs via S3 Bucket" switch is off. URL of the AWS SQS queue that messages will be received from. + - name: bucket_list_prefix + type: text + title: "[S3] Bucket Prefix" + multi: false + required: false + show_user: false + description: Prefix to apply for the list request to the S3 bucket. + - name: interval + type: text + title: "[S3] Interval" + multi: false + required: false + show_user: false + default: 1m + description: "Time interval for polling listing of the S3 bucket. NOTE: Supported units for this parameter are h/m/s." + - name: number_of_workers + type: integer + title: "[S3] Number of Workers" + multi: false + required: false + show_user: false + default: 5 + description: Number of workers that will process the S3 objects listed. + - name: visibility_timeout + type: text + title: "[SQS] Visibility Timeout" + multi: false + required: false + show_user: false + description: The duration that the received messages are hidden from subsequent retrieve requests after being retrieved by a ReceiveMessage request. The maximum is 12 hours. + - name: api_timeout + type: text + title: "[SQS] API Timeout" + multi: false + required: false + show_user: false + description: The maximum duration of AWS API can take. The maximum is half of the visibility timeout value. + - name: max_number_of_messages + type: integer + title: "[SQS] Maximum Concurrent SQS Messages" + description: The maximum number of SQS messages that can be inflight at any time. + default: 5 + required: false + show_user: false + - name: fips_enabled + type: bool + title: Enable S3 FIPS + default: false + multi: false + required: false + show_user: false + description: Enabling this option changes the service name from `s3` to `s3-fips` for connecting to the correct service endpoint. + - name: tags + type: text + title: Tags + multi: true + required: true + show_user: false + default: + - forwarded + - aws-emr-logs + - name: processors + type: yaml + title: Processors + multi: false + required: false + show_user: false + description: > + Processors are used to reduce the number of fields in the exported event or to enhance the event with metadata. This executes in the agent before the logs are parsed. See [Processors](https://www.elastic.co/guide/en/beats/filebeat/current/filtering-and-enhancing-data.html) for details. + + - name: preserve_original_event + required: true + show_user: true + title: Preserve original event + description: Preserves a raw copy of the original event, added to the field `event.original` + type: bool + multi: false + default: false + - input: aws-cloudwatch + template_path: aws-cloudwatch.yml.hbs + title: AWS EMR Logs via CloudWatch + description: Collect AWS EMR logs using cloudwatch input. + enabled: false + vars: + - name: log_group_arn + type: text + title: Log Group ARN + multi: false + required: false + show_user: true + description: ARN of the log group to collect logs from. + - name: log_group_name + type: text + title: Log Group Name + multi: false + required: false + show_user: false + description: Name of the log group to collect logs from. `region_name` is required when `log_group_name` is given. + - name: log_group_name_prefix + type: text + title: Log Group Name Prefix + multi: false + required: false + show_user: false + description: The prefix for a group of log group names. `region_name` is required when `log_group_name_prefix` is given. `log_group_name` and `log_group_name_prefix` cannot be given at the same time. + - name: region_name + type: text + title: Region Name + multi: false + required: false + show_user: false + description: Region that the specified log group or log group prefix belongs to. + - name: log_streams + type: text + title: Log Streams + multi: true + required: false + show_user: false + description: A list of strings of log streams names that Filebeat collect log events from. + - name: log_stream_prefix + type: text + title: Log Stream Prefix + multi: false + required: false + show_user: false + description: A string to filter the results to include only log events from log streams that have names starting with this prefix. + - name: start_position + type: text + title: Start Position + multi: false + required: false + default: beginning + show_user: true + description: Allows user to specify if this input should read log files from the beginning or from the end. + - name: scan_frequency + type: text + title: Scan Frequency + multi: false + required: false + show_user: false + default: 1m + description: This config parameter sets how often Filebeat checks for new log events from the specified log group. + - name: api_timeput + type: text + title: API Timeout + multi: false + required: false + show_user: false + default: 120s + description: The maximum duration of AWS API can take. If it exceeds the timeout, AWS API will be interrupted. + - name: api_sleep + type: text + title: API Sleep + multi: false + required: false + show_user: false + default: 200ms + description: This is used to sleep between AWS FilterLogEvents API calls inside the same collection period. `FilterLogEvents` API has a quota of 5 transactions per second (TPS)/account/Region. This value should only be adjusted when there are multiple Filebeats or multiple Filebeat inputs collecting logs from the same region and AWS account. + - name: latency + type: text + title: Latency + multi: false + required: false + show_user: false + description: "The amount of time required for the logs to be available to CloudWatch Logs. Sample values, `1m` or `5m` — see Golang [time.ParseDuration](https://pkg.go.dev/time#ParseDuration) for more details. Latency translates the query's time range to consider the CloudWatch Logs latency. Example: `5m` means that the integration will query CloudWatch to search for logs available 5 minutes ago." + - name: number_of_workers + type: integer + title: Number of workers + required: false + show_user: false + description: The number of workers assigned to reading from log groups. Each worker will read log events from one of the log groups matching `log_group_name_prefix`. For example, if `log_group_name_prefix` matches five log groups, then `number_of_workers` should be set to `5`. The default value is `1`. + - name: tags + type: text + title: Tags + multi: true + required: true + show_user: false + default: + - forwarded + - aws-emr-logs + - name: processors + type: yaml + title: Processors + multi: false + required: false + show_user: false + description: > + Processors are used to reduce the number of fields in the exported event or to enhance the event with metadata. This executes in the agent before the logs are parsed. See [Processors](https://www.elastic.co/guide/en/beats/filebeat/current/filtering-and-enhancing-data.html) for details. + + - name: preserve_original_event + required: true + show_user: true + title: Preserve original event + description: Preserves a raw copy of the original event, added to the field `event.original` + type: bool + multi: false + default: false +# Ensures agents have permissions to write data to `logs-*-*` +elasticsearch.dynamic_dataset: true +elasticsearch.dynamic_namespace: true diff --git a/packages/aws/data_stream/emr_logs/sample_event.json b/packages/aws/data_stream/emr_logs/sample_event.json new file mode 100644 index 00000000000..e5506058298 --- /dev/null +++ b/packages/aws/data_stream/emr_logs/sample_event.json @@ -0,0 +1,24 @@ +{ + "data_stream": { + "namespace": "default", + "type": "logs", + "dataset": "aws.emr_logs" + }, + "@timestamp": "2020-02-20T07:01:01.000Z", + "ecs": { + "version": "8.0.0" + }, + "log": { + "level": "INFO" + }, + "event": { + "original": "2023-06-26 13:45:50,566 INFO common.Util: dfs.datanode.fileio.profiling.sampling.percentage set to 0. Disabling file IO profiling" + }, + "process": { + "name": "blockmanagement.BlockManager" + }, + "message": "dfs.datanode.fileio.profiling.sampling.percentage set to 0. Disabling file IO profiling", + "tags": [ + "preserve_original_event" + ] +} \ No newline at end of file diff --git a/packages/aws/docs/emr.md b/packages/aws/docs/emr.md index c52fabbbd4e..ce6167d337c 100644 --- a/packages/aws/docs/emr.md +++ b/packages/aws/docs/emr.md @@ -10,11 +10,14 @@ For example, you could use this data to track Amazon EMR cluster progress and cl ## Data streams -The Amazon EMR integration collects one type of data: metrics. +The Amazon EMR integration collects two types of data: metrics and logs. **Metrics** give you insight into the state of Amazon EMR. The metrics collected by the Amazon EMR integration include cluster progress, cluster state, cluster or node storage, and more. See more details in the [Metrics reference](#metrics-reference) +**Logs** help you keep a record of events happening in Amazon EMR. +Logs collected by the Amazon EMR integration include the cluster status, node status details and more. + ## Requirements You need Elasticsearch for storing and searching your data and Kibana for visualizing and managing it. @@ -214,3 +217,88 @@ An example event for `emr` looks as following: | host.os.version | Operating system version as a raw string. | keyword | | | | host.type | Type of host. For Cloud providers this can be the machine type like `t2.medium`. If vm, this could be the container, for example, or other information meaningful in your environment. | keyword | | | | service.type | The type of the service data is collected from. The type can be used to group and correlate logs and metrics from one service type. Example: If logs or metrics are collected from Elasticsearch, `service.type` would be `elasticsearch`. | keyword | | | + + +## Logs reference + +An example event for `emr` looks as following: + +```json +{ + "data_stream": { + "namespace": "default", + "type": "logs", + "dataset": "aws.emr_logs" + }, + "@timestamp": "2020-02-20T07:01:01.000Z", + "ecs": { + "version": "8.0.0" + }, + "log": { + "level": "INFO" + }, + "event": { + "original": "2023-06-26 13:45:50,566 INFO common.Util: dfs.datanode.fileio.profiling.sampling.percentage set to 0. Disabling file IO profiling" + }, + "process": { + "name": "blockmanagement.BlockManager" + }, + "message": "dfs.datanode.fileio.profiling.sampling.percentage set to 0. Disabling file IO profiling", + "tags": [ + "preserve_original_event" + ] +} +``` + +**Exported fields** + +| Field | Description | Type | +|---|---|---| +| @timestamp | Event timestamp. | date | +| aws.s3.bucket.arn | ARN of the S3 bucket that this log retrieved from. | keyword | +| aws.s3.bucket.name | Name of a S3 bucket. | keyword | +| aws.s3.metadata | AWS S3 object metadata values. | flattened | +| aws.s3.object.key | Name of the S3 object that this log retrieved from. | keyword | +| cloud.account.id | The cloud account or organization id used to identify different entities in a multi-tenant environment. Examples: AWS account id, Google Cloud ORG Id, or other unique identifier. | keyword | +| cloud.availability_zone | Availability zone in which this host, resource, or service is located. | keyword | +| cloud.image.id | Image ID for the cloud instance. | keyword | +| cloud.instance.id | Instance ID of the host machine. | keyword | +| cloud.instance.name | Instance name of the host machine. | keyword | +| cloud.machine.type | Machine type of the host machine. | keyword | +| cloud.project.id | The cloud project identifier. Examples: Google Cloud Project id, Azure Project id. | keyword | +| cloud.provider | Name of the cloud provider. Example values are aws, azure, gcp, or digitalocean. | keyword | +| cloud.region | Region in which this host, resource, or service is located. | keyword | +| container.id | Unique container id. | keyword | +| container.image.name | Name of the image the container was built on. | keyword | +| container.labels | Image labels. | object | +| container.name | Container name. | keyword | +| data_stream.dataset | Data stream dataset. | constant_keyword | +| data_stream.namespace | Data stream namespace. | constant_keyword | +| data_stream.type | Data stream type. | constant_keyword | +| ecs.version | ECS version this event conforms to. `ecs.version` is a required field and must exist in all events. When querying across multiple indices -- which may conform to slightly different ECS versions -- this field lets integrations adjust to the schema version of the events. | keyword | +| error.message | Error message. | match_only_text | +| event.dataset | Event dataset | constant_keyword | +| event.module | Event module | constant_keyword | +| host.architecture | Operating system architecture. | keyword | +| host.containerized | If the host is a container. | boolean | +| host.domain | Name of the domain of which the host is a member. For example, on Windows this could be the host's Active Directory domain or NetBIOS domain name. For Linux this could be the domain of the host's LDAP provider. | keyword | +| host.hostname | Hostname of the host. It normally contains what the `hostname` command returns on the host machine. | keyword | +| host.id | Unique host id. As hostname is not always unique, use values that are meaningful in your environment. Example: The current usage of `beat.name`. | keyword | +| host.ip | Host ip addresses. | ip | +| host.mac | Host MAC addresses. The notation format from RFC 7042 is suggested: Each octet (that is, 8-bit byte) is represented by two [uppercase] hexadecimal digits giving the value of the octet as an unsigned integer. Successive octets are separated by a hyphen. | keyword | +| host.name | Name of the host. It can contain what `hostname` returns on Unix systems, the fully qualified domain name, or a name specified by the user. The sender decides which value to use. | keyword | +| host.os.build | OS build information. | keyword | +| host.os.codename | OS codename, if any. | keyword | +| host.os.family | OS family (such as redhat, debian, freebsd, windows). | keyword | +| host.os.kernel | Operating system kernel version as a raw string. | keyword | +| host.os.name | Operating system name, without the version. | keyword | +| host.os.name.text | Multi-field of `host.os.name`. | match_only_text | +| host.os.platform | Operating system platform (such centos, ubuntu, windows). | keyword | +| host.os.version | Operating system version as a raw string. | keyword | +| host.type | Type of host. For Cloud providers this can be the machine type like `t2.medium`. If vm, this could be the container, for example, or other information meaningful in your environment. | keyword | +| log.level | Original log level of the log event. If the source of the event provides a log level or textual severity, this is the one that goes in `log.level`. If your source doesn't specify one, you may put your event transport's severity here (e.g. Syslog severity). Some examples are `warn`, `err`, `i`, `informational`. | keyword | +| message | For log events the message field contains the log message, optimized for viewing in a log viewer. For structured logs without an original message field, other fields can be concatenated to form a human-readable summary of the event. If multiple messages exist, they can be combined into one message. | match_only_text | +| process.entrypoint | Process entrypoint. | keyword | +| process.message | Process message. | keyword | +| process.name | Process name. | keyword | +| tags | List of keywords used to tag each event. | keyword | diff --git a/packages/aws/manifest.yml b/packages/aws/manifest.yml index cb3a77f08a8..64f1ff6aac8 100644 --- a/packages/aws/manifest.yml +++ b/packages/aws/manifest.yml @@ -1,7 +1,7 @@ format_version: 1.0.0 name: aws title: AWS -version: 1.49.0 +version: 1.50.0 license: basic description: Collect logs and metrics from Amazon Web Services (AWS) with Elastic Agent. type: integration @@ -770,6 +770,7 @@ policy_templates: description: Collect logs and metrics for Amazon EMR service with Elastic Agent data_streams: - emr_metrics + - emr_logs categories: - observability inputs: @@ -777,6 +778,14 @@ policy_templates: title: Collect EMR metrics description: Collect EMR metrics using AWS CloudWatch input_group: metrics + - type: aws-s3 + title: Collect EMR logs from S3 + description: Collecting logs from EMR using aws-s3 input + input_group: logs + - type: aws-cloudwatch + title: Collect EMR logs from CloudWatch + description: Collecting logs from EMR using aws-cloudwatch input + input_group: logs icons: - src: /img/logo_emr.svg title: AWS EMR logo diff --git a/packages/nats/data_stream/log/elasticsearch/ingest_pipeline/default.yml b/packages/nats/data_stream/log/elasticsearch/ingest_pipeline/default.yml index b9556f56f56..28db414fd93 100644 --- a/packages/nats/data_stream/log/elasticsearch/ingest_pipeline/default.yml +++ b/packages/nats/data_stream/log/elasticsearch/ingest_pipeline/default.yml @@ -203,5 +203,8 @@ processors: ignore_missing: true on_failure: - set: + field: event.kind + value: pipeline_error + - append: field: error.message - value: '{{ _ingest.on_failure_message }}' + value: '{{{ _ingest.on_failure_message }}}'