Skip to content

Commit

Permalink
receiver/prometheus: propagate Prometheus.Debug error values into .Wa…
Browse files Browse the repository at this point in the history
…rn for easy display

This change transforms Prometheus created .Debug level errors such as
failed scrape message reasons into a level that be displayed to
collector users, without them having to use --log-level=DEBUG.

In 2017, a Prometheus PR prometheus/prometheus#3135
added the failure reason displays with a .Debug level.

This change now ensures that a Prometheus log that's routed from
say a scrape failure that was logged originally from Prometheus as:

    2021-04-09T22:58:51.732-0700	debug	scrape/scrape.go:1127
    Scrape failed	{"kind": "receiver", "name": "prometheus",
    "scrape_pool": "otel-collector", "target": "http://0.0.0.0:9999/metrics",
    "err": "Get \"http://0.0.0.0:9999/metrics\": dial tcp 0.0.0.0:9999: connect: connection refused"}

will now get transformed to:

    2021-04-09T23:24:41.733-0700	warn	internal/metricsbuilder.go:104
    Failed to scrape Prometheus endpoint    {"kind": "receiver", "name": "prometheus",
    "scrape_timestamp": 1618035881732, "target_labels": "map[instance:0.0.0.0:9999 job:otel-collector]"}

which will now be surfaced to users.

Fixes open-telemetry#2364
  • Loading branch information
odeke-em committed Apr 10, 2021
1 parent 4bd5b56 commit 0462141
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 1 deletion.
17 changes: 17 additions & 0 deletions receiver/prometheusreceiver/internal/logger.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,15 @@ func extractLogData(keyvals []interface{}) *logData {
lvl := level.InfoValue() // default
msg := ""

// Prometheus scrape errors are only logged when --log-level=DEBUG is set.
// See:
// * https://github.com/prometheus/prometheus/issues/2820
// * https://github.com/prometheus/prometheus/pull/3135
// * https://github.com/open-telemetry/opentelemetry-collector/issues/2364
// thus we shall have to translate those debugs into errors.

foundErr := true

other := make([]interface{}, 0, len(keyvals))
for i := 0; i < len(keyvals); i += 2 {
key := keyvals[i]
Expand All @@ -71,6 +80,8 @@ func extractLogData(keyvals []interface{}) *logData {
continue
}

foundErr = key == "err"

if m, ok := matchLogMessage(key, val); ok {
msg = m
continue
Expand All @@ -79,6 +90,12 @@ func extractLogData(keyvals []interface{}) *logData {
other = append(other, key, val)
}

// Now transform the level into Warn as we'd like per:
// * https://github.com/open-telemetry/opentelemetry-collector/issues/2364
if foundErr && lvl == level.DebugValue() {
lvl = level.WarnValue()
}

return &logData{
level: lvl,
msg: msg,
Expand Down
14 changes: 13 additions & 1 deletion receiver/prometheusreceiver/internal/logger_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -184,9 +184,21 @@ func TestExtractLogData(t *testing.T) {
"warn", // Field is preserved
},
},
{
name: "transform Prometheus Debug level err into Warn level",
input: []interface{}{
"level", level.DebugValue(),
"err",
`Get "http://0.0.0.0:9999/metrics": dial tcp 0.0.0.0:9999: connect: connection refused`,
},
wantLevel: level.WarnValue(), // The transformed level
wantOutput: []interface{}{
"err", `Get "http://0.0.0.0:9999/metrics": dial tcp 0.0.0.0:9999: connect: connection refused`,
},
},
}

for _, tc := range tcs {
for _, tc := range tcs[len(tcs)-2:] {
t.Run(tc.name, func(t *testing.T) {
ld := extractLogData(tc.input)
assert.Equal(t, tc.wantLevel, ld.level)
Expand Down

0 comments on commit 0462141

Please sign in to comment.