Skip to content

Commit

Permalink
confgenerator : Sample Fluent-bit Self Logs as Health Logs (#1300)
Browse files Browse the repository at this point in the history
This PR adds filters to sample specific logs fluent-bit self-logs and transform them into ops-agent-health logs, following the Structured Ops Agent Health Logs format(#1290). The following fluent-bit logs are sampled :

- Code : LogPipelineErr, Match : [error] [lib] backend failed
- Code : LogParseErr, Match : [error] [parser] cannot parse
  • Loading branch information
franciscovalentecastro authored Aug 3, 2023
1 parent a61363a commit 4330da4
Show file tree
Hide file tree
Showing 2,423 changed files with 43,229 additions and 31,009 deletions.
136 changes: 104 additions & 32 deletions confgenerator/self_logs.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,18 +27,20 @@ import (
)

var (
agentKind string = "ops-agent"
schemaVersion string = "v1"
agentKind string = "ops-agent"
schemaVersion string = "v1"
)

const (
fluentBitSelfLogsTag string = "ops-agent-fluent-bit"
healthLogsTag string = "ops-agent-health"
severityKey string = "logging.googleapis.com/severity"
sourceLocationKey string = "logging.googleapis.com/sourceLocation"
agentVersionKey string = "agent.googleapis.com/health/agentVersion"
agentKindKey string = "agent.googleapis.com/health/agentKind"
schemaVersionKey string = "agent.googleapis.com/health/schemaVersion"
opsAgentLogsMatch string = "ops-agent-*"
fluentBitSelfLogsTag string = "ops-agent-fluent-bit"
healthLogsTag string = "ops-agent-health"
severityKey string = "logging.googleapis.com/severity"
sourceLocationKey string = "logging.googleapis.com/sourceLocation"
agentVersionKey string = "agent.googleapis.com/health/agentVersion"
agentKindKey string = "agent.googleapis.com/health/agentKind"
schemaVersionKey string = "agent.googleapis.com/health/schemaVersion"
troubleshootFindInfoURL string = "https://cloud.google.com/stackdriver/docs/solutions/agents/ops-agent/troubleshoot-find-info"
)

func fluentbitSelfLogsPath(p platform.Platform) string {
Expand Down Expand Up @@ -69,23 +71,16 @@ func generateHealthChecksLogsComponents(ctx context.Context) []fluentbit.Compone
},
}.Components(ctx, healthLogsTag, "health-checks-json")...)
out = append(out, []fluentbit.Component{
// This is used to exclude any previous content of the health-checks file that
// does not contain the `severity` field.
// This is used to exclude any previous content of the `health-checks.log` file that does not contain
// the `jsonPayload.severity` field. Due to `https://github.com/fluent/fluent-bit/issues/7092` the
// filtering can't be done directly to the `logging.googleapis.com/severity` field.
// We cannot use `LoggingProcessorExcludeLogs` here since it doesn't exclude when the field is missing.
{
Kind: "FILTER",
Config: map[string]string{
"Name": "grep",
"Match": healthLogsTag,
"Regex": fmt.Sprintf("%s INFO|ERROR|WARNING|DEBUG|DEFAULT", logs.SeverityZapKey),
},
},
{
Kind: "FILTER",
OrderedConfig: [][2]string{
{"Name", "modify"},
{"Match", healthLogsTag},
{"Rename", fmt.Sprintf("%s %s", logs.SeverityZapKey, severityKey)},
{"Rename", fmt.Sprintf("%s %s", logs.SourceLocationZapKey, sourceLocationKey)},
"Regex": fmt.Sprintf("%s INFO|ERROR|WARNING|DEBUG|info|error|warning|debug", logs.SeverityZapKey),
},
},
}...)
Expand All @@ -112,20 +107,73 @@ func generateFluentBitSelfLogsComponents(ctx context.Context) []fluentbit.Compon
"severity": "string",
},
},
}.Components(ctx, fluentBitSelfLogsTag, "self-logs-severity")...)
out = append(out, fluentbit.TranslationComponents(fluentBitSelfLogsTag, "severity", severityKey, true,
[]struct{ SrcVal, DestVal string }{
{"debug", "DEBUG"},
{"error", "ERROR"},
{"info", "INFO"},
{"warn", "WARNING"},
})...,
)
}.Components(ctx, fluentBitSelfLogsTag, "fluent-bit-self-log-regex-parsing")...)
return out
}

// This method creates a component adds metadata labels to all ops agent health logs.
type selfLogTranslationEntry struct {
regexMatch string
message string
code string
}

var selfLogTranslationList = []selfLogTranslationEntry{
{
regexMatch: `\[error\]\s\[lib\]\sbackend\sfailed`,
message: fmt.Sprintf("Ops Agent logging pipeline failed, Code: LogPipelineErr, Documentation: %s", troubleshootFindInfoURL),
code: "LogPipelineErr",
},
{
regexMatch: `\[error\]\s\[parser\]\scannot\sparse`,
message: fmt.Sprintf("Ops Agent failed to parse logs, Code: LogParseErr, Documentation: %s", troubleshootFindInfoURL),
code: "LogParseErr",
},
}

func generateSelfLogsSamplingComponents(ctx context.Context) []fluentbit.Component {
out := make([]fluentbit.Component, 0)

for _, m := range selfLogTranslationList {
// This filter samples specific fluent-bit logs by matching with regex and re-emits
// an `ops-agent-health` log.
out = append(out, fluentbit.Component{
Kind: "FILTER",
Config: map[string]string{
"Name": "rewrite_tag",
"Match": fluentBitSelfLogsTag,
"Rule": fmt.Sprintf(`message %s %s true`, m.regexMatch, healthLogsTag),
},
})
// This filter sets the appropiate health code to the previously sampled logs. The `code` is also
// set to the `message` field for later translation in the pipeline.
// The current fluent-bit submodule doesn't accept whitespaces in the `Set` values, so `code` is
// used as a placeholder. This can be updated when the fix arrives to the current fluent-bit submodule
// `https://github.com/fluent/fluent-bit/issues/4286`.
out = append(out, fluentbit.Component{
Kind: "FILTER",
OrderedConfig: [][2]string{
{"Name", "modify"},
{"Match", healthLogsTag},
{"Condition", fmt.Sprintf(`Key_value_matches message %s`, m.regexMatch)},
{"Set", fmt.Sprintf(`message %s`, m.code)},
{"Set", fmt.Sprintf(`code %s`, m.code)},
},
})
}

return out
}

// This method creates a component that enforces the `Structured Health Logs` format to
// all `ops-agent-health` logs. It sets `agentKind`, `agentVersion` and `schemaVersion`.
// It also translates `code` to the rich text message from the `selfLogTranslationList`.
func generateStructuredHealthLogsComponents(ctx context.Context) []fluentbit.Component {
// Convert translation list to map.
mapMessageFromCode := make(map[string]string)
for _, m := range selfLogTranslationList {
mapMessageFromCode[m.code] = m.message
}

return LoggingProcessorModifyFields{
Fields: map[string]*ModifyField{
fmt.Sprintf(`labels."%s"`, agentKindKey): {
Expand All @@ -137,15 +185,39 @@ func generateStructuredHealthLogsComponents(ctx context.Context) []fluentbit.Com
fmt.Sprintf(`labels."%s"`, schemaVersionKey): {
StaticValue: &schemaVersion,
},
"jsonPayload.message": {
MapValues: mapMessageFromCode,
MapValuesExclusive: false,
},
},
}.Components(ctx, healthLogsTag, "set-structured-health-logs")
}

// This method processes all self logs to set the fields correctly before reaching the output plugin.
func generateSelfLogsProcessingComponents(ctx context.Context) []fluentbit.Component {
return LoggingProcessorModifyFields{
Fields: map[string]*ModifyField{
"severity": {
MoveFrom: "jsonPayload.severity",
MapValues: map[string]string{
"error": "ERROR",
"warn": "WARNING",
"info": "INFO",
"debug": "DEBUG",
},
MapValuesExclusive: false,
},
},
}.Components(ctx, healthLogsTag, "setstructuredhealthlogs")
}.Components(ctx, opsAgentLogsMatch, "self-logs-processing")
}

func generateSelfLogsComponents(ctx context.Context, userAgent string) []fluentbit.Component {
out := make([]fluentbit.Component, 0)
out = append(out, generateFluentBitSelfLogsComponents(ctx)...)
out = append(out, generateHealthChecksLogsComponents(ctx)...)
out = append(out, generateSelfLogsSamplingComponents(ctx)...)
out = append(out, generateStructuredHealthLogsComponents(ctx)...)
out = append(out, generateSelfLogsProcessingComponents(ctx)...)
out = append(out, stackdriverOutputComponent(strings.Join([]string{fluentBitSelfLogsTag, healthLogsTag}, "|"), userAgent, ""))
return out
}

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@

function process(tag, timestamp, record)
local __field_0 = (function()
return record["severity"]
end)();
(function(value)
record["severity"] = value
end)(nil);
local v = __field_0;
if v == "debug" then v = "DEBUG"
elseif v == "error" then v = "ERROR"
elseif v == "info" then v = "INFO"
elseif v == "warn" then v = "WARNING"
end
(function(value)
record["logging.googleapis.com/severity"] = value
end)(v)
return 2, timestamp, record
end
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@

function process(tag, timestamp, record)
local __field_0 = (function()
return record["message"]
end)();
local v = __field_0;
if v == "LogParseErr" then v = "Ops Agent failed to parse logs, Code: LogParseErr, Documentation: https://cloud.google.com/stackdriver/docs/solutions/agents/ops-agent/troubleshoot-find-info"
elseif v == "LogPipelineErr" then v = "Ops Agent logging pipeline failed, Code: LogPipelineErr, Documentation: https://cloud.google.com/stackdriver/docs/solutions/agents/ops-agent/troubleshoot-find-info"
end
(function(value)
record["message"] = value
end)(v)
local v = "ops-agent";
(function(value)
if record["logging.googleapis.com/labels"] == nil
then
record["logging.googleapis.com/labels"] = {}
end
record["logging.googleapis.com/labels"]["agent.googleapis.com/health/agentKind"] = value
end)(v)
local v = "latest";
(function(value)
if record["logging.googleapis.com/labels"] == nil
then
record["logging.googleapis.com/labels"] = {}
end
record["logging.googleapis.com/labels"]["agent.googleapis.com/health/agentVersion"] = value
end)(v)
local v = "v1";
(function(value)
if record["logging.googleapis.com/labels"] == nil
then
record["logging.googleapis.com/labels"] = {}
end
record["logging.googleapis.com/labels"]["agent.googleapis.com/health/schemaVersion"] = value
end)(v)
return 2, timestamp, record
end
Original file line number Diff line number Diff line change
Expand Up @@ -80,42 +80,14 @@
Name parser
Preserve_Key True
Reserve_Data True
Parser ops-agent-fluent-bit.self-logs-severity
Parser ops-agent-fluent-bit.fluent-bit-self-log-regex-parsing

[FILTER]
Match ops-agent-fluent-bit
Name lua
call parser_merge_record
script 5fc5f42c16c9e1ab8292e3d42f74f3be.lua

[FILTER]
Add logging.googleapis.com/severity DEBUG
Condition Key_Value_Equals severity debug
Match ops-agent-fluent-bit
Name modify
Remove severity

[FILTER]
Add logging.googleapis.com/severity ERROR
Condition Key_Value_Equals severity error
Match ops-agent-fluent-bit
Name modify
Remove severity

[FILTER]
Add logging.googleapis.com/severity INFO
Condition Key_Value_Equals severity info
Match ops-agent-fluent-bit
Name modify
Remove severity

[FILTER]
Add logging.googleapis.com/severity WARNING
Condition Key_Value_Equals severity warn
Match ops-agent-fluent-bit
Name modify
Remove severity

[FILTER]
Match ops-agent-health
Name lua
Expand All @@ -138,19 +110,43 @@
[FILTER]
Match ops-agent-health
Name grep
Regex severity INFO|ERROR|WARNING|DEBUG|DEFAULT
Regex severity INFO|ERROR|WARNING|DEBUG|info|error|warning|debug

[FILTER]
Name modify
Match ops-agent-health
Rename severity logging.googleapis.com/severity
Rename sourceLocation logging.googleapis.com/sourceLocation
Match ops-agent-fluent-bit
Name rewrite_tag
Rule message \[error\]\s\[lib\]\sbackend\sfailed ops-agent-health true

[FILTER]
Name modify
Match ops-agent-health
Condition Key_value_matches message \[error\]\s\[lib\]\sbackend\sfailed
Set message LogPipelineErr
Set code LogPipelineErr

[FILTER]
Match ops-agent-fluent-bit
Name rewrite_tag
Rule message \[error\]\s\[parser\]\scannot\sparse ops-agent-health true

[FILTER]
Name modify
Match ops-agent-health
Condition Key_value_matches message \[error\]\s\[parser\]\scannot\sparse
Set message LogParseErr
Set code LogParseErr

[FILTER]
Match ops-agent-health
Name lua
call process
script 0f15dbe303dc7122d43443c9a4c31632.lua
script 68d6a3a0e8edf37868e1bf94adc737f4.lua

[FILTER]
Match ops-agent-*
Name lua
call process
script 4d6012ff003886818fb9b9285b4af962.lua

[OUTPUT]
Match_Regex ^(default_pipeline\.syslog)$
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[PARSER]
Format regex
Name ops-agent-fluent-bit.self-logs-severity
Name ops-agent-fluent-bit.fluent-bit-self-log-regex-parsing
Regex (?<message>\[[ ]*(?<time>\d+\/\d+\/\d+ \d+:\d+:\d+)] \[[ ]*(?<severity>[a-z]+)\].*)
Time_Format %Y/%m/%d %H:%M:%S
Time_Key time
Expand Down
Loading

0 comments on commit 4330da4

Please sign in to comment.