Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

enhancement(file source): Better multi-line support #1852

Merged
merged 18 commits into from
Feb 22, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .github/CODEOWNERS
Validating CODEOWNERS rules …
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,8 @@
/src/sinks/tcp.rs @lukesteensen

/src/sources/docker.rs @LucioFranco
/src/sources/file.rs @LucioFranco
/src/sources/file/mod.rs @LucioFranco
/src/sources/file/line_agg.rs @MOZGIII
/src/sources/journald.rs @bruceg
/src/sources/kafka.rs @a-rodin
/src/sources/stdin.rs @bruceg
Expand Down
86 changes: 84 additions & 2 deletions .meta/sources/file.toml
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ fingerprint. This is helpful if all files share a common header.\

[sources.file.options.message_start_indicator]
type = "string"
category = "Multi-line"
category = "Multi-line (deprecated)"
examples = ["^(INFO|ERROR)"]
description = """\
When present, Vector will aggregate multiple lines into a single event, using \
Expand All @@ -149,7 +149,7 @@ a regular expression, so remember to anchor as appropriate.\

[sources.file.options.multi_line_timeout]
type = "int"
category = "Multi-line"
category = "Multi-line (deprecated)"
default = 1000
unit = "milliseconds"
description = """\
Expand Down Expand Up @@ -178,6 +178,88 @@ Instead of balancing read capacity fairly across all watched files, prioritize \
draining the oldest files before moving on to read data from younger files.\
"""

[sources.file.options.multiline]
type = "table"
category = "Multiline"
common = true
required = false
description = """\
Multiline parsing configuration. \
If not speicified, multiline parsing is disabled.\

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Spelling issue. Should be "specified."

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@rcollette That spelling error has been addressed in later code. Thanks for letting us know though!

"""

[sources.file.options.multiline.children.start_pattern]
type = "string"
category = "Multiline"
examples = ["^[^\\s]", "\\\\$", "^(INFO|ERROR) ", "[^;]$"]
common = true
required = true
description = """\
Start pattern to look for as a beginning of the message.\
"""

[sources.file.options.multiline.children.condition_pattern]
type = "string"
category = "Multiline"
examples = ["^[\\s]+", "\\\\$", "^(INFO|ERROR) ", ";$"]
common = true
required = true
description = """\
Condition pattern to look for. Exact behavior is configured via `mode`.\
"""

[sources.file.options.multiline.children.mode]
type = "string"
category = "Multiline"
examples = ["continue_through", "continue_past", "halt_before", "halt_with"]
common = true
required = true
description = """\
Mode of operation, specifies how the condition pattern is interpreted.\
"""

[sources.file.options.multiline.children.mode.enum]
continue_through = """\
All consecutive lines matching this pattern are included in the group. \
The first line (the line that matched the start pattern) does not need \
to match the `ContinueThrough` pattern. \
This is useful in cases such as a Java stack trace, where some indicator \
in the line (such as leading whitespace) indicates that it is an \
extension of the preceeding line.\
"""
continue_past = """\
All consecutive lines matching this pattern, plus one additional line, \
are included in the group. \
This is useful in cases where a log message ends with a continuation \
marker, such as a backslash, indicating that the following line is part \
of the same message.\
"""
halt_before = """\
All consecutive lines not matching this pattern are included in the \
group. \
This is useful where a log line contains a marker indicating that it \
begins a new message.\
"""
halt_with = """\
All consecutive lines, up to and including the first line matching this \
pattern, are included in the group. \
This is useful where a log line ends with a termination marker, such as \
a semicolon.\
"""

[sources.file.options.multiline.children.timeout_ms]
type = "int"
category = "Multiline"
examples = [1000, 600000]
unit = "milliseconds"
common = true
required = true
description = """\
The maximum time to wait for the continuation. Once this timeout is \
reached, the buffered message is guaraneed to be flushed, even if \
incomplete.\
"""

[sources.file.output.log.fields.file]
type = "string"
examples = ["/var/log/nginx.log"]
Expand Down
44 changes: 43 additions & 1 deletion config/vector.spec.toml
Original file line number Diff line number Diff line change
Expand Up @@ -233,7 +233,7 @@ dns_servers = ["0.0.0.0:53"]
host_key = "host"

#
# Multi-line
# Multi-line (deprecated)
#

# When present, Vector will aggregate multiple lines into a single event, using
Expand Down Expand Up @@ -314,6 +314,48 @@ dns_servers = ["0.0.0.0:53"]
strategy = "checksum"
strategy = "device_and_inode"

#
# Multiline
#

[sources.file.multiline]
# Condition pattern to look for. Exact behavior is configured via `mode`.
#
# * required
# * type: string
condition_pattern = "^[\\s]+"
condition_pattern = "\\\\$"
condition_pattern = "^(INFO|ERROR) "
condition_pattern = ";$"

# Mode of operation, specifies how the condition pattern is interpreted.
#
# * required
# * type: string
# * enum: "continue_through", "continue_past", "halt_before", and "halt_with"
mode = "continue_through"
mode = "continue_past"
mode = "halt_before"
mode = "halt_with"

# Start pattern to look for as a beginning of the message.
#
# * required
# * type: string
start_pattern = "^[^\\s]"
start_pattern = "\\\\$"
start_pattern = "^(INFO|ERROR) "
start_pattern = "[^;]$"

# The maximum time to wait for the continuation. Once this timeout is reached,
# the buffered message is guaraneed to be flushed, even if incomplete.
#
# * required
# * type: int
# * unit: milliseconds
timeout_ms = 1000
timeout_ms = 600000

# Ingests data through log records from journald and outputs `log` events.
[sources.journald]
# The component type. This is a required field that tells Vector which
Expand Down
Loading