From d5d133858efd9c7301fbac6264892a5ecdc3e8f7 Mon Sep 17 00:00:00 2001 From: Lee Hinman <57081003+leehinman@users.noreply.github.com> Date: Thu, 25 Jun 2020 08:51:41 -0500 Subject: [PATCH] Improve ECS categorization field mappings in traefik module (#19379) - event.kind - event.category - event.type - event.outcome - related.ip - related.user Closes #16183 (cherry picked from commit f814f41afd8316b5593297d09126409b1b4ba195) --- CHANGELOG.next.asciidoc | 1 + .../traefik/access/ingest/pipeline.json | 131 ------------------ .../module/traefik/access/ingest/pipeline.yml | 106 ++++++++++++++ filebeat/module/traefik/access/manifest.yml | 2 +- .../access/test/test.log-expected.json | 80 +++++++++++ 5 files changed, 188 insertions(+), 132 deletions(-) delete mode 100644 filebeat/module/traefik/access/ingest/pipeline.json create mode 100644 filebeat/module/traefik/access/ingest/pipeline.yml diff --git a/CHANGELOG.next.asciidoc b/CHANGELOG.next.asciidoc index f0830fa2cff..0e4fb382549 100644 --- a/CHANGELOG.next.asciidoc +++ b/CHANGELOG.next.asciidoc @@ -503,6 +503,7 @@ field. You can revert this change by configuring tags for the module and omittin - Explicitly set ECS version in all Filebeat modules. {pull}19198[19198] - Add awscloudwatch input. {pull}19025[19025] - Add support for timezone offsets and `Z` to decode_cef timestamp parser. {pull}19346[19346] +- Improve ECS categorization field mappings in traefik module. {issue}16183[16183] {pull}19379[19379] *Heartbeat* diff --git a/filebeat/module/traefik/access/ingest/pipeline.json b/filebeat/module/traefik/access/ingest/pipeline.json deleted file mode 100644 index c367374e2fa..00000000000 --- a/filebeat/module/traefik/access/ingest/pipeline.json +++ /dev/null @@ -1,131 +0,0 @@ -{ - "description": "Pipeline for parsing Traefik access logs. Requires the geoip and user_agent plugins.", - "processors": [ - { - "dissect": { - "field": "message", - "pattern": "%{source.address} %{traefik.access.user_identifier} %{user.name} [%{traefik.access.time}] \"%{http.request.method} %{url.original} HTTP/%{http.version}\" %{http.response.status_code} %{traefik.access.message}" - } - }, - { - "grok": { - "field": "traefik.access.message", - "patterns": [ - "(?:%{NUMBER:http.response.body.bytes:long}|-)( (?:\"%{DATA:http.request.referrer}\"|-)?( (?:\"%{DATA:user_agent.original}\"|-)?)?( (?:%{NUMBER:traefik.access.request_count:long}|-)?)?( (?:\"%{DATA:traefik.access.frontend_name}\"|-)?)?( \"%{DATA:traefik.access.backend_url}\")?( %{NUMBER:temp.duration:long}ms)?)?" - ], - "ignore_missing": true - } - }, - { - "remove": { - "field": "message", - "ignore_missing": true - } - }, - { - "remove": { - "field": "traefik.access.message", - "ignore_missing": true - } - }, - { - "rename": { - "field": "@timestamp", - "target_field": "event.created" - } - }, - { - "date": { - "field": "traefik.access.time", - "target_field": "@timestamp", - "formats": [ - "dd/MMM/yyyy:H:m:s Z" - ] - } - }, - { - "remove": { - "field": "traefik.access.time" - } - }, - - { - "convert": { - "field": "http.response.status_code", - "type": "long" - } - }, - - { - "grok": { - "field": "source.address", - "patterns": [ - "^(%{IP:source.ip}|%{HOSTNAME:source.domain})$" - ] - } - }, - - { - "script": { - "lang": "painless", - "source": "ctx.event.duration = Math.round(ctx.temp.duration * params.scale)", - "params": { "scale": 1000000 }, - "if": "ctx.temp?.duration != null" - } - }, - { - "remove": { - "field": "temp.duration", - "ignore_missing": true - } - }, - - { - "user_agent": { - "field": "user_agent.original", - "ignore_failure": true - } - }, - { - "geoip": { - "field": "source.ip", - "target_field": "source.geo", - "ignore_missing": true - } - }, - { - "geoip": { - "database_file": "GeoLite2-ASN.mmdb", - "field": "source.ip", - "target_field": "source.as", - "properties": [ - "asn", - "organization_name" - ], - "ignore_missing": true - } - }, - { - "rename": { - "field": "source.as.asn", - "target_field": "source.as.number", - "ignore_missing": true - } - }, - { - "rename": { - "field": "source.as.organization_name", - "target_field": "source.as.organization.name", - "ignore_missing": true - } - } - ], - "on_failure": [ - { - "set": { - "field": "error.message", - "value": "{{ _ingest.on_failure_message }}" - } - } - ] -} diff --git a/filebeat/module/traefik/access/ingest/pipeline.yml b/filebeat/module/traefik/access/ingest/pipeline.yml new file mode 100644 index 00000000000..ce489a4a92c --- /dev/null +++ b/filebeat/module/traefik/access/ingest/pipeline.yml @@ -0,0 +1,106 @@ +description: Pipeline for parsing Traefik access logs. Requires the geoip and user_agent + plugins. +processors: +- dissect: + field: message + pattern: '%{source.address} %{traefik.access.user_identifier} %{user.name} [%{traefik.access.time}] + "%{http.request.method} %{url.original} HTTP/%{http.version}" %{http.response.status_code} + %{traefik.access.message}' +- grok: + field: traefik.access.message + patterns: + - (?:%{NUMBER:http.response.body.bytes:long}|-)( (?:"%{DATA:http.request.referrer}"|-)?( + (?:"%{DATA:user_agent.original}"|-)?)?( (?:%{NUMBER:traefik.access.request_count:long}|-)?)?( + (?:"%{DATA:traefik.access.frontend_name}"|-)?)?( "%{DATA:traefik.access.backend_url}")?( + %{NUMBER:temp.duration:long}ms)?)? + ignore_missing: true +- remove: + field: message + ignore_missing: true +- remove: + field: traefik.access.message + ignore_missing: true +- rename: + field: '@timestamp' + target_field: event.created +- date: + field: traefik.access.time + target_field: '@timestamp' + formats: + - dd/MMM/yyyy:H:m:s Z +- remove: + field: traefik.access.time +- convert: + field: http.response.status_code + type: long +- grok: + field: source.address + patterns: + - ^(%{IP:source.ip}|%{HOSTNAME:source.domain})$ +- script: + lang: painless + source: ctx.event.duration = Math.round(ctx.temp.duration * params.scale) + params: + scale: 1000000 + if: ctx.temp?.duration != null +- remove: + field: temp.duration + ignore_missing: true +- user_agent: + field: user_agent.original + ignore_failure: true +- geoip: + field: source.ip + target_field: source.geo + ignore_missing: true +- geoip: + database_file: GeoLite2-ASN.mmdb + field: source.ip + target_field: source.as + properties: + - asn + - organization_name + ignore_missing: true +- rename: + field: source.as.asn + target_field: source.as.number + ignore_missing: true +- rename: + field: source.as.organization_name + target_field: source.as.organization.name + ignore_missing: true +- set: + field: event.kind + value: event +- append: + field: event.category + value: web + if: "ctx?.http?.request?.method != null && ctx.http.request.method != '-'" +- append: + field: event.type + value: access + if: "ctx?.http?.request?.method != null && ctx.http.request.method != '-'" +- set: + field: event.outcome + value: success + if: "ctx?.http?.response?.status_code != null && ctx.http.response.status_code < 400" +- set: + field: event.outcome + value: failure + if: "ctx?.http?.response?.status_code != null && ctx.http.response.status_code >= 400" +- append: + field: related.ip + value: "{{source.ip}}" + if: "ctx?.source?.ip != null" +- append: + field: related.ip + value: "{{destination.ip}}" + if: "ctx?.destination?.ip != null" +- append: + field: related.user + value: "{{user.name}}" + if: "ctx?.user?.name != null && ctx.user.name != '-'" +on_failure: +- set: + field: error.message + value: '{{ _ingest.on_failure_message }}' diff --git a/filebeat/module/traefik/access/manifest.yml b/filebeat/module/traefik/access/manifest.yml index c72c12d4111..fcf8168f0ee 100644 --- a/filebeat/module/traefik/access/manifest.yml +++ b/filebeat/module/traefik/access/manifest.yml @@ -9,7 +9,7 @@ var: os.windows: - c:/programdata/traefik/logs/*access.log* -ingest_pipeline: ingest/pipeline.json +ingest_pipeline: ingest/pipeline.yml input: config/traefik-access.yml requires.processors: diff --git a/filebeat/module/traefik/access/test/test.log-expected.json b/filebeat/module/traefik/access/test/test.log-expected.json index 067dec2349d..a5723ed5c27 100644 --- a/filebeat/module/traefik/access/test/test.log-expected.json +++ b/filebeat/module/traefik/access/test/test.log-expected.json @@ -1,9 +1,17 @@ [ { "@timestamp": "2017-10-02T20:22:07.000Z", + "event.category": [ + "web" + ], "event.dataset": "traefik.access", "event.duration": 2000000, + "event.kind": "event", "event.module": "traefik", + "event.outcome": "success", + "event.type": [ + "access" + ], "fileset.name": "access", "http.request.method": "GET", "http.request.referrer": "http://example.com/login", @@ -12,6 +20,9 @@ "http.version": "1.1", "input.type": "log", "log.offset": 0, + "related.ip": [ + "192.168.33.1" + ], "service.type": "traefik", "source.address": "192.168.33.1", "source.ip": "192.168.33.1", @@ -29,9 +40,17 @@ }, { "@timestamp": "2017-10-02T20:22:08.000Z", + "event.category": [ + "web" + ], "event.dataset": "traefik.access", "event.duration": 3000000, + "event.kind": "event", "event.module": "traefik", + "event.outcome": "success", + "event.type": [ + "access" + ], "fileset.name": "access", "http.request.method": "GET", "http.request.referrer": "http://example.com/login", @@ -40,6 +59,9 @@ "http.version": "1.1", "input.type": "log", "log.offset": 280, + "related.ip": [ + "85.181.35.98" + ], "service.type": "traefik", "source.address": "85.181.35.98", "source.as.number": 6805, @@ -66,9 +88,17 @@ }, { "@timestamp": "2018-02-28T17:30:33.000Z", + "event.category": [ + "web" + ], "event.dataset": "traefik.access", "event.duration": 247000000, + "event.kind": "event", "event.module": "traefik", + "event.outcome": "success", + "event.type": [ + "access" + ], "fileset.name": "access", "http.request.method": "GET", "http.response.body.bytes": 2814, @@ -76,6 +106,9 @@ "http.version": "2.0", "input.type": "log", "log.offset": 553, + "related.ip": [ + "70.29.80.15" + ], "service.type": "traefik", "source.address": "70.29.80.15", "source.as.number": 577, @@ -104,9 +137,17 @@ }, { "@timestamp": "2018-11-29T15:03:51.000Z", + "event.category": [ + "web" + ], "event.dataset": "traefik.access", "event.duration": 0, + "event.kind": "event", "event.module": "traefik", + "event.outcome": "failure", + "event.type": [ + "access" + ], "fileset.name": "access", "http.request.method": "GET", "http.request.referrer": "-", @@ -115,6 +156,9 @@ "http.version": "1.1", "input.type": "log", "log.offset": 821, + "related.ip": [ + "::1" + ], "service.type": "traefik", "source.address": "::1", "source.ip": "::1", @@ -131,9 +175,17 @@ }, { "@timestamp": "2018-01-19T10:01:02.000Z", + "event.category": [ + "web" + ], "event.dataset": "traefik.access", "event.duration": 13000000, + "event.kind": "event", "event.module": "traefik", + "event.outcome": "success", + "event.type": [ + "access" + ], "fileset.name": "access", "http.request.method": "GET", "http.response.body.bytes": 85, @@ -141,6 +193,9 @@ "http.version": "1.1", "input.type": "log", "log.offset": 931, + "related.ip": [ + "94.254.131.115" + ], "service.type": "traefik", "source.address": "94.254.131.115", "source.as.number": 39603, @@ -166,9 +221,17 @@ }, { "@timestamp": "2018-01-19T10:01:02.000Z", + "event.category": [ + "web" + ], "event.dataset": "traefik.access", "event.duration": 8000000, + "event.kind": "event", "event.module": "traefik", + "event.outcome": "success", + "event.type": [ + "access" + ], "fileset.name": "access", "http.request.method": "GET", "http.response.body.bytes": 150, @@ -176,6 +239,9 @@ "http.version": "1.1", "input.type": "log", "log.offset": 1267, + "related.ip": [ + "89.64.35.193" + ], "service.type": "traefik", "source.address": "89.64.35.193", "source.as.number": 6830, @@ -201,8 +267,16 @@ }, { "@timestamp": "2000-10-10T20:55:36.000Z", + "event.category": [ + "web" + ], "event.dataset": "traefik.access", + "event.kind": "event", "event.module": "traefik", + "event.outcome": "success", + "event.type": [ + "access" + ], "fileset.name": "access", "http.request.method": "GET", "http.response.body.bytes": 2326, @@ -210,6 +284,12 @@ "http.version": "1.0", "input.type": "log", "log.offset": 1581, + "related.ip": [ + "127.0.0.1" + ], + "related.user": [ + "frank" + ], "service.type": "traefik", "source.address": "127.0.0.1", "source.ip": "127.0.0.1",