From 240e30736a9d7aa85b8f8809b7dffab08a877741 Mon Sep 17 00:00:00 2001 From: Taylor Swanson <90622908+taylor-swanson@users.noreply.github.com> Date: Thu, 8 Aug 2024 08:51:33 -0500 Subject: [PATCH 1/2] [syslog] Fix handling of escaped characters in structured data (#40446) - Improved parser to handle escaped closing square brackets in structured data, along with square brackets in the normal, non-structured data portion of the message. - Fix incorrect offset being passed to removeBytes function, which would not remove escaped characters from structured data values. - The non-compliant-sd unit test cases now include escapes on the closing brackets within the structured data, something that should have always been there. - Add tests (cherry picked from commit 1c01d0e4fc9da960785a84341a819e2adc85eee9) --- CHANGELOG.next.asciidoc | 8 + libbeat/reader/syslog/message_test.go | 14 ++ libbeat/reader/syslog/parser/rfc5424.rl | 6 +- libbeat/reader/syslog/rfc5424_gen.go | 220 ++++++++++-------------- libbeat/reader/syslog/rfc5424_test.go | 53 +++++- 5 files changed, 166 insertions(+), 135 deletions(-) diff --git a/CHANGELOG.next.asciidoc b/CHANGELOG.next.asciidoc index 8fadad36f17..52c53b20e6b 100644 --- a/CHANGELOG.next.asciidoc +++ b/CHANGELOG.next.asciidoc @@ -68,6 +68,14 @@ https://github.com/elastic/beats/compare/v8.8.1\...main[Check the HEAD diff] - Support Elastic Agent control protocol chunking support {pull}37343[37343] - Lower logging level to debug when attempting to configure beats with unknown fields from autodiscovered events/environments {pull}[37816][37816] - Set timeout of 1 minute for FQDN requests {pull}37756[37756] +- Fix the paths in the .cmd script added to the path by the Windows MSI to point to the new C:\Program Files installation location. https://github.com/elastic/elastic-stack-installers/pull/238 +- Change cache processor documentation from `write_period` to `write_interval`. {pull}38561[38561] +- Fix cache processor expiries heap cleanup on partial file writes. {pull}38561[38561] +- Fix cache processor expiries infinite growth when large a large TTL is used and recurring keys are cached. {pull}38561[38561] +- Fix parsing of RFC 3164 process IDs in syslog processor. {issue}38947[38947] {pull}38982[38982] +- Rename the field "apache2.module.error" to "apache.module.error" in Apache error visualization. {issue}39480[39480] {pull}39481[39481] +- Validate config of the `replace` processor {pull}40047[40047] +- Fix handling of escaped brackets in syslog structured data. {issue}40445[40445] {pull}40446[40446] *Auditbeat* diff --git a/libbeat/reader/syslog/message_test.go b/libbeat/reader/syslog/message_test.go index 29fe756c784..d42f2e46119 100644 --- a/libbeat/reader/syslog/message_test.go +++ b/libbeat/reader/syslog/message_test.go @@ -522,6 +522,20 @@ func TestParseStructuredData(t *testing.T) { }, }, }, + "multi-key-with-escape": { + in: `[exampleSDID@32473 iut="3" eventSource="Application" eventID="1011" somekey="[value\] more data"][examplePriority@32473 class="high"]`, + want: map[string]interface{}{ + "exampleSDID@32473": map[string]interface{}{ + "iut": "3", + "eventSource": "Application", + "eventID": "1011", + "somekey": "[value] more data", + }, + "examplePriority@32473": map[string]interface{}{ + "class": "high", + }, + }, + }, "repeated-id": { in: `[exampleSDID@32473 iut="3"][exampleSDID@32473 class="high"]`, want: map[string]interface{}{ diff --git a/libbeat/reader/syslog/parser/rfc5424.rl b/libbeat/reader/syslog/parser/rfc5424.rl index f7c5d1fb920..267c823b912 100644 --- a/libbeat/reader/syslog/parser/rfc5424.rl +++ b/libbeat/reader/syslog/parser/rfc5424.rl @@ -36,7 +36,7 @@ action set_param_value { if subMap, ok := structuredData[s.sdID].(map[string]interface{}); ok { - subMap[s.sdParamName] = removeBytes(data[tok:p], s.sdValueEscapes, p) + subMap[s.sdParamName] = removeBytes(data[tok:p], s.sdValueEscapes, tok) } } @@ -73,7 +73,9 @@ header = priority version sp timestamp sp hostname sp app_name sp proc_id sp msg_id; - sd_raw = nil_value | ('[' any+ ']') >tok %set_sd_raw; + sd_raw_escape = (bs | ']'); + sd_raw_values = ((bs ']') | (any - sd_raw_escape)); + sd_raw = nil_value | ('[' sd_raw_values+ ']')+ >tok %set_sd_raw; msg = any* >tok %set_msg; }%% diff --git a/libbeat/reader/syslog/rfc5424_gen.go b/libbeat/reader/syslog/rfc5424_gen.go index fdfd46a2594..1dc8f6eb72d 100644 --- a/libbeat/reader/syslog/rfc5424_gen.go +++ b/libbeat/reader/syslog/rfc5424_gen.go @@ -25,7 +25,7 @@ import ( ) const rfc5424_start int = 1 -const rfc5424_first_final int = 23 +const rfc5424_first_final int = 24 const rfc5424_error int = 0 const rfc5424_en_main int = 1 @@ -90,32 +90,28 @@ func parseRFC5424(data string) (message, error) { goto st_case_15 case 16: goto st_case_16 - case 23: - goto st_case_23 case 24: goto st_case_24 case 25: goto st_case_25 + case 26: + goto st_case_26 case 17: goto st_case_17 case 18: goto st_case_18 - case 26: - goto st_case_26 - case 27: - goto st_case_27 - case 28: - goto st_case_28 - case 29: - goto st_case_29 case 19: goto st_case_19 + case 27: + goto st_case_27 case 20: goto st_case_20 case 21: goto st_case_21 case 22: goto st_case_22 + case 23: + goto st_case_23 } goto st_out st_case_1: @@ -226,7 +222,7 @@ func parseRFC5424(data string) (message, error) { goto st8 } goto tr0 - tr31: + tr32: if err := m.setTimestampRFC3339(data[tok:p]); err != nil { errs = multierr.Append(errs, &ValidationError{Err: err, Pos: tok + 1}) @@ -364,29 +360,23 @@ func parseRFC5424(data string) (message, error) { st_case_16: switch data[p] { case 45: - goto st23 + goto st24 case 91: goto tr26 } goto tr0 - st23: - if p++; p == pe { - goto _test_eof23 - } - st_case_23: - if data[p] == 32 { - goto st24 - } - goto tr0 st24: if p++; p == pe { goto _test_eof24 } st_case_24: - goto tr34 - tr34: + if data[p] == 32 { + goto st25 + } + goto tr0 + tr37: - tok = p + m.setRawSDValue(data[tok:p]) goto st25 st25: @@ -394,7 +384,18 @@ func parseRFC5424(data string) (message, error) { goto _test_eof25 } st_case_25: - goto st25 + goto tr35 + tr35: + + tok = p + + goto st26 + st26: + if p++; p == pe { + goto _test_eof26 + } + st_case_26: + goto st26 tr26: tok = p @@ -405,148 +406,121 @@ func parseRFC5424(data string) (message, error) { goto _test_eof17 } st_case_17: + switch data[p] { + case 92: + goto st19 + case 93: + goto tr0 + } goto st18 st18: if p++; p == pe { goto _test_eof18 } st_case_18: - if data[p] == 93 { - goto st26 - } - goto st18 - st26: - if p++; p == pe { - goto _test_eof26 - } - st_case_26: switch data[p] { - case 32: - goto tr36 + case 92: + goto st19 case 93: - goto st26 + goto st27 } goto st18 - tr36: - - m.setRawSDValue(data[tok:p]) - - goto st27 - st27: - if p++; p == pe { - goto _test_eof27 - } - st_case_27: - if data[p] == 93 { - goto tr38 - } - goto tr37 - tr37: - - tok = p - - goto st28 - st28: + st19: if p++; p == pe { - goto _test_eof28 + goto _test_eof19 } - st_case_28: + st_case_19: if data[p] == 93 { - goto st29 + goto st18 } - goto st28 - tr38: - - tok = p - - goto st29 - st29: + goto tr0 + st27: if p++; p == pe { - goto _test_eof29 + goto _test_eof27 } - st_case_29: + st_case_27: switch data[p] { case 32: - goto tr36 - case 93: - goto st29 + goto tr37 + case 91: + goto st17 } - goto st28 + goto tr0 tr11: tok = p - goto st19 - st19: + goto st20 + st20: if p++; p == pe { - goto _test_eof19 + goto _test_eof20 } - st_case_19: + st_case_20: switch data[p] { case 43: - goto st20 + goto st21 case 58: - goto st20 + goto st21 } switch { case data[p] < 48: if 45 <= data[p] && data[p] <= 46 { - goto st20 + goto st21 } case data[p] > 57: switch { case data[p] > 90: if 97 <= data[p] && data[p] <= 122 { - goto st20 + goto st21 } case data[p] >= 65: - goto st20 + goto st21 } default: - goto st21 + goto st22 } goto tr0 - st20: + st21: if p++; p == pe { - goto _test_eof20 + goto _test_eof21 } - st_case_20: + st_case_21: if data[p] == 32 { - goto tr31 + goto tr32 } if 48 <= data[p] && data[p] <= 57 { - goto st19 + goto st20 } goto tr0 - st21: + st22: if p++; p == pe { - goto _test_eof21 + goto _test_eof22 } - st_case_21: + st_case_22: switch data[p] { case 32: - goto tr31 + goto tr32 case 43: - goto st20 + goto st21 case 58: - goto st20 + goto st21 } switch { case data[p] < 48: if 45 <= data[p] && data[p] <= 46 { - goto st20 + goto st21 } case data[p] > 57: switch { case data[p] > 90: if 97 <= data[p] && data[p] <= 122 { - goto st20 + goto st21 } case data[p] >= 65: - goto st20 + goto st21 } default: - goto st21 + goto st22 } goto tr0 tr9: @@ -555,7 +529,7 @@ func parseRFC5424(data string) (message, error) { errs = multierr.Append(errs, &ValidationError{Err: err, Pos: tok + 1}) } - goto st22 + goto st23 tr6: if err := m.setPriority(data[tok:p]); err != nil { @@ -564,12 +538,12 @@ func parseRFC5424(data string) (message, error) { tok = p - goto st22 - st22: + goto st23 + st23: if p++; p == pe { - goto _test_eof22 + goto _test_eof23 } - st_case_22: + st_case_23: switch data[p] { case 32: goto tr7 @@ -626,36 +600,27 @@ func parseRFC5424(data string) (message, error) { _test_eof16: cs = 16 goto _test_eof - _test_eof23: - cs = 23 - goto _test_eof _test_eof24: cs = 24 goto _test_eof _test_eof25: cs = 25 goto _test_eof + _test_eof26: + cs = 26 + goto _test_eof _test_eof17: cs = 17 goto _test_eof _test_eof18: cs = 18 goto _test_eof - _test_eof26: - cs = 26 + _test_eof19: + cs = 19 goto _test_eof _test_eof27: cs = 27 goto _test_eof - _test_eof28: - cs = 28 - goto _test_eof - _test_eof29: - cs = 29 - goto _test_eof - _test_eof19: - cs = 19 - goto _test_eof _test_eof20: cs = 20 goto _test_eof @@ -665,37 +630,34 @@ func parseRFC5424(data string) (message, error) { _test_eof22: cs = 22 goto _test_eof + _test_eof23: + cs = 23 + goto _test_eof _test_eof: { } if p == eof { switch cs { - case 25, 28: + case 26: m.setMsg(data[tok:p]) - case 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22: + case 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23: errs = multierr.Append(errs, &ParseError{Err: io.ErrUnexpectedEOF, Pos: p + 1}) p-- - case 26: + case 27: m.setRawSDValue(data[tok:p]) - case 24, 27: + case 25: tok = p m.setMsg(data[tok:p]) - case 29: - - m.setRawSDValue(data[tok:p]) - - m.setMsg(data[tok:p]) - } } @@ -1842,7 +1804,7 @@ func parseStructuredData(data string) map[string]interface{} { tr43: if subMap, ok := structuredData[s.sdID].(map[string]interface{}); ok { - subMap[s.sdParamName] = removeBytes(data[tok:p], s.sdValueEscapes, p) + subMap[s.sdParamName] = removeBytes(data[tok:p], s.sdValueEscapes, tok) } s.sdValueEscapes = nil diff --git a/libbeat/reader/syslog/rfc5424_test.go b/libbeat/reader/syslog/rfc5424_test.go index 47cb9ee0e36..51a3873743e 100644 --- a/libbeat/reader/syslog/rfc5424_test.go +++ b/libbeat/reader/syslog/rfc5424_test.go @@ -98,8 +98,53 @@ func TestParseRFC5424(t *testing.T) { rawSDValue: `[exampleSDID@32473 iut="3" eventSource="Application" eventID="1011"][examplePriority@32473 class="high"]`, }, }, + "sd-with-escape": { + in: `<165>1 2003-10-11T22:14:15.003Z mymachine.example.com evntslog - ID47 [exampleSDID@32473 iut="3" eventSource="Application" eventID="1011" somekey="[value\] more data"][examplePriority@32473 class="high"] This is a message`, + want: message{ + timestamp: mustParseTime(time.RFC3339Nano, "2003-10-11T22:14:15.003Z", nil), + priority: 165, + facility: 20, + severity: 5, + version: 1, + hostname: "mymachine.example.com", + process: "evntslog", + msgID: "ID47", + msg: "This is a message", + rawSDValue: `[exampleSDID@32473 iut="3" eventSource="Application" eventID="1011" somekey="[value\] more data"][examplePriority@32473 class="high"]`, + }, + }, + "sd-with-escape-2": { + in: `<165>1 2003-10-11T22:14:15.003Z mymachine.example.com evntslog - ID47 [exampleSDID@32473 iut="3" eventSource="Application" eventID="1011" somekey="[value\] more data"][examplePriority@32473 class="high"] Some message [value] more data`, + want: message{ + timestamp: mustParseTime(time.RFC3339Nano, "2003-10-11T22:14:15.003Z", nil), + priority: 165, + facility: 20, + severity: 5, + version: 1, + hostname: "mymachine.example.com", + process: "evntslog", + msgID: "ID47", + msg: "Some message [value] more data", + rawSDValue: `[exampleSDID@32473 iut="3" eventSource="Application" eventID="1011" somekey="[value\] more data"][examplePriority@32473 class="high"]`, + }, + }, + "sd-with-escape-3": { + in: `<165>1 2003-10-11T22:14:15.003Z mymachine.example.com evntslog - ID47 [exampleSDID@32473 iut="3" eventSource="Application" eventID="1011" somekey="[value\] more data"][examplePriority@32473 class="high"] ` + utf8BOM + `Some message [value] more data`, + want: message{ + timestamp: mustParseTime(time.RFC3339Nano, "2003-10-11T22:14:15.003Z", nil), + priority: 165, + facility: 20, + severity: 5, + version: 1, + hostname: "mymachine.example.com", + process: "evntslog", + msgID: "ID47", + msg: "Some message [value] more data", + rawSDValue: `[exampleSDID@32473 iut="3" eventSource="Application" eventID="1011" somekey="[value\] more data"][examplePriority@32473 class="high"]`, + }, + }, "non-compliant-sd": { - in: `<165>1 2003-10-11T22:14:15.003Z mymachine.example.com evntslog - ID47 [action:"Drop"; flags:"278528"; ifdir:"inbound"; ifname:"bond1.3999"; loguid:"{0x60928f1d,0x8,0x40de101f,0xfcdbb197}"; origin:"127.0.0.1"; originsicname:"CN=CP,O=cp.com.9jjkfo"; sequencenum:"62"; time:"1620217629"; version:"5"; __policy_id_tag:"product=VPN-1 & FireWall-1[db_tag={F6212FB3-54CE-6344-9164-B224119E2B92};mgmt=cp-m;date=1620031791;policy_name=CP-Cluster]"; action_reason:"Dropped by multiportal infrastructure"; dst:"81.2.69.144"; product:"VPN & FireWall"; proto:"6"; s_port:"52780"; service:"80"; src:"81.2.69.144"]`, + in: `<165>1 2003-10-11T22:14:15.003Z mymachine.example.com evntslog - ID47 [action:"Drop"; flags:"278528"; ifdir:"inbound"; ifname:"bond1.3999"; loguid:"{0x60928f1d,0x8,0x40de101f,0xfcdbb197}"; origin:"127.0.0.1"; originsicname:"CN=CP,O=cp.com.9jjkfo"; sequencenum:"62"; time:"1620217629"; version:"5"; __policy_id_tag:"product=VPN-1 & FireWall-1[db_tag={F6212FB3-54CE-6344-9164-B224119E2B92};mgmt=cp-m;date=1620031791;policy_name=CP-Cluster\]"; action_reason:"Dropped by multiportal infrastructure"; dst:"81.2.69.144"; product:"VPN & FireWall"; proto:"6"; s_port:"52780"; service:"80"; src:"81.2.69.144"]`, want: message{ timestamp: mustParseTime(time.RFC3339Nano, "2003-10-11T22:14:15.003Z", nil), priority: 165, @@ -109,11 +154,11 @@ func TestParseRFC5424(t *testing.T) { hostname: "mymachine.example.com", process: "evntslog", msgID: "ID47", - rawSDValue: `[action:"Drop"; flags:"278528"; ifdir:"inbound"; ifname:"bond1.3999"; loguid:"{0x60928f1d,0x8,0x40de101f,0xfcdbb197}"; origin:"127.0.0.1"; originsicname:"CN=CP,O=cp.com.9jjkfo"; sequencenum:"62"; time:"1620217629"; version:"5"; __policy_id_tag:"product=VPN-1 & FireWall-1[db_tag={F6212FB3-54CE-6344-9164-B224119E2B92};mgmt=cp-m;date=1620031791;policy_name=CP-Cluster]"; action_reason:"Dropped by multiportal infrastructure"; dst:"81.2.69.144"; product:"VPN & FireWall"; proto:"6"; s_port:"52780"; service:"80"; src:"81.2.69.144"]`, + rawSDValue: `[action:"Drop"; flags:"278528"; ifdir:"inbound"; ifname:"bond1.3999"; loguid:"{0x60928f1d,0x8,0x40de101f,0xfcdbb197}"; origin:"127.0.0.1"; originsicname:"CN=CP,O=cp.com.9jjkfo"; sequencenum:"62"; time:"1620217629"; version:"5"; __policy_id_tag:"product=VPN-1 & FireWall-1[db_tag={F6212FB3-54CE-6344-9164-B224119E2B92};mgmt=cp-m;date=1620031791;policy_name=CP-Cluster\]"; action_reason:"Dropped by multiportal infrastructure"; dst:"81.2.69.144"; product:"VPN & FireWall"; proto:"6"; s_port:"52780"; service:"80"; src:"81.2.69.144"]`, }, }, "non-compliant-sd-with-msg": { - in: `<165>1 2003-10-11T22:14:15.003Z mymachine.example.com evntslog - ID47 [action:"Drop"; flags:"278528"; ifdir:"inbound"; ifname:"bond1.3999"; loguid:"{0x60928f1d,0x8,0x40de101f,0xfcdbb197}"; origin:"127.0.0.1"; originsicname:"CN=CP,O=cp.com.9jjkfo"; sequencenum:"62"; time:"1620217629"; version:"5"; __policy_id_tag:"product=VPN-1 & FireWall-1[db_tag={F6212FB3-54CE-6344-9164-B224119E2B92};mgmt=cp-m;date=1620031791;policy_name=CP-Cluster]"; action_reason:"Dropped by multiportal infrastructure"; dst:"81.2.69.144"; product:"VPN & FireWall"; proto:"6"; s_port:"52780"; service:"80"; src:"81.2.69.144"] This is a test message`, + in: `<165>1 2003-10-11T22:14:15.003Z mymachine.example.com evntslog - ID47 [action:"Drop"; flags:"278528"; ifdir:"inbound"; ifname:"bond1.3999"; loguid:"{0x60928f1d,0x8,0x40de101f,0xfcdbb197}"; origin:"127.0.0.1"; originsicname:"CN=CP,O=cp.com.9jjkfo"; sequencenum:"62"; time:"1620217629"; version:"5"; __policy_id_tag:"product=VPN-1 & FireWall-1[db_tag={F6212FB3-54CE-6344-9164-B224119E2B92};mgmt=cp-m;date=1620031791;policy_name=CP-Cluster\]"; action_reason:"Dropped by multiportal infrastructure"; dst:"81.2.69.144"; product:"VPN & FireWall"; proto:"6"; s_port:"52780"; service:"80"; src:"81.2.69.144"] This is a test message`, want: message{ timestamp: mustParseTime(time.RFC3339Nano, "2003-10-11T22:14:15.003Z", nil), priority: 165, @@ -123,7 +168,7 @@ func TestParseRFC5424(t *testing.T) { hostname: "mymachine.example.com", process: "evntslog", msgID: "ID47", - rawSDValue: `[action:"Drop"; flags:"278528"; ifdir:"inbound"; ifname:"bond1.3999"; loguid:"{0x60928f1d,0x8,0x40de101f,0xfcdbb197}"; origin:"127.0.0.1"; originsicname:"CN=CP,O=cp.com.9jjkfo"; sequencenum:"62"; time:"1620217629"; version:"5"; __policy_id_tag:"product=VPN-1 & FireWall-1[db_tag={F6212FB3-54CE-6344-9164-B224119E2B92};mgmt=cp-m;date=1620031791;policy_name=CP-Cluster]"; action_reason:"Dropped by multiportal infrastructure"; dst:"81.2.69.144"; product:"VPN & FireWall"; proto:"6"; s_port:"52780"; service:"80"; src:"81.2.69.144"]`, + rawSDValue: `[action:"Drop"; flags:"278528"; ifdir:"inbound"; ifname:"bond1.3999"; loguid:"{0x60928f1d,0x8,0x40de101f,0xfcdbb197}"; origin:"127.0.0.1"; originsicname:"CN=CP,O=cp.com.9jjkfo"; sequencenum:"62"; time:"1620217629"; version:"5"; __policy_id_tag:"product=VPN-1 & FireWall-1[db_tag={F6212FB3-54CE-6344-9164-B224119E2B92};mgmt=cp-m;date=1620031791;policy_name=CP-Cluster\]"; action_reason:"Dropped by multiportal infrastructure"; dst:"81.2.69.144"; product:"VPN & FireWall"; proto:"6"; s_port:"52780"; service:"80"; src:"81.2.69.144"]`, msg: "This is a test message", }, }, From 5fe9882a7e385b97c4963687a9279b9d5275135c Mon Sep 17 00:00:00 2001 From: Taylor Swanson Date: Thu, 8 Aug 2024 09:30:51 -0500 Subject: [PATCH 2/2] fix up changelog --- CHANGELOG.next.asciidoc | 7 ------- 1 file changed, 7 deletions(-) diff --git a/CHANGELOG.next.asciidoc b/CHANGELOG.next.asciidoc index 52c53b20e6b..41114545315 100644 --- a/CHANGELOG.next.asciidoc +++ b/CHANGELOG.next.asciidoc @@ -68,13 +68,6 @@ https://github.com/elastic/beats/compare/v8.8.1\...main[Check the HEAD diff] - Support Elastic Agent control protocol chunking support {pull}37343[37343] - Lower logging level to debug when attempting to configure beats with unknown fields from autodiscovered events/environments {pull}[37816][37816] - Set timeout of 1 minute for FQDN requests {pull}37756[37756] -- Fix the paths in the .cmd script added to the path by the Windows MSI to point to the new C:\Program Files installation location. https://github.com/elastic/elastic-stack-installers/pull/238 -- Change cache processor documentation from `write_period` to `write_interval`. {pull}38561[38561] -- Fix cache processor expiries heap cleanup on partial file writes. {pull}38561[38561] -- Fix cache processor expiries infinite growth when large a large TTL is used and recurring keys are cached. {pull}38561[38561] -- Fix parsing of RFC 3164 process IDs in syslog processor. {issue}38947[38947] {pull}38982[38982] -- Rename the field "apache2.module.error" to "apache.module.error" in Apache error visualization. {issue}39480[39480] {pull}39481[39481] -- Validate config of the `replace` processor {pull}40047[40047] - Fix handling of escaped brackets in syslog structured data. {issue}40445[40445] {pull}40446[40446] *Auditbeat*