Skip to content

Commit

Permalink
log: create crdb-v2 log entry parser
Browse files Browse the repository at this point in the history
The crdb-v2 log file format is not recognized by some tools like debug merge-log.
This is makes it difficult/impossible to merge log files properly.
This patch creates a new parser and integrates it into debug merge-log.

Release note (cli change): the tool debug merge-log is now integrated with a
crdb-v2 log entry parser.
  • Loading branch information
cameronnunez committed Jun 7, 2021
1 parent b031988 commit b05a534
Show file tree
Hide file tree
Showing 5 changed files with 390 additions and 7 deletions.
6 changes: 3 additions & 3 deletions pkg/cli/debug.go
Original file line number Diff line number Diff line change
Expand Up @@ -1239,7 +1239,7 @@ func removeDeadReplicas(
return batch, nil
}

var debugMergeLogsCommand = &cobra.Command{
var debugMergeLogsCmd = &cobra.Command{
Use: "merge-logs <log file globs>",
Short: "merge multiple log files from different machines into a single stream",
Long: `
Expand Down Expand Up @@ -1314,7 +1314,7 @@ var debugCmds = append(DebugCmdsForRocksDB,
debugUnsafeRemoveDeadReplicasCmd,
debugEnvCmd,
debugZipCmd,
debugMergeLogsCommand,
debugMergeLogsCmd,
debugListFilesCmd,
debugResetQuorumCmd,
)
Expand Down Expand Up @@ -1410,7 +1410,7 @@ func init() {
f.IntSliceVar(&removeDeadReplicasOpts.deadStoreIDs, "dead-store-ids", nil,
"list of dead store IDs")

f = debugMergeLogsCommand.Flags()
f = debugMergeLogsCmd.Flags()
f.Var(flagutil.Time(&debugMergeLogsOpts.from), "from",
"time before which messages should be filtered")
// TODO(knz): the "to" should be named "until" - it's a time boundary, not a space boundary.
Expand Down
8 changes: 4 additions & 4 deletions pkg/cli/debug_merge_logs_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -156,16 +156,16 @@ var cases = []testCase{

func (c testCase) run(t *testing.T) {
outBuf := bytes.Buffer{}
debugMergeLogsCommand.Flags().Visit(func(f *pflag.Flag) {
debugMergeLogsCmd.Flags().Visit(func(f *pflag.Flag) {
if err := f.Value.Set(f.DefValue); err != nil {
t.Fatalf("Failed to set flag to default: %v", err)
}
})
debugMergeLogsCommand.SetOutput(&outBuf)
if err := debugMergeLogsCommand.ParseFlags(c.flags); err != nil {
debugMergeLogsCmd.SetOutput(&outBuf)
if err := debugMergeLogsCmd.ParseFlags(c.flags); err != nil {
t.Fatalf("Failed to set flags: %v", err)
}
if err := debugMergeLogsCommand.RunE(debugMergeLogsCommand, c.args); err != nil {
if err := debugMergeLogsCmd.RunE(debugMergeLogsCmd, c.args); err != nil {
t.Fatalf("Failed to run command: %v", err)
}
// The expected output lives in filepath.Join(testdataPath, "results", testCase.name)
Expand Down
136 changes: 136 additions & 0 deletions pkg/util/log/format_crdb_v2.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,16 @@
package log

import (
"fmt"
"io"
"regexp"
"strconv"
"strings"
"time"
"unicode/utf8"

"github.com/cockroachdb/cockroach/pkg/util/log/channel"
"github.com/cockroachdb/cockroach/pkg/util/log/logpb"
"github.com/cockroachdb/cockroach/pkg/util/log/severity"
"github.com/cockroachdb/cockroach/pkg/util/timeutil"
"github.com/cockroachdb/redact"
Expand Down Expand Up @@ -447,3 +453,133 @@ func (buf *buffer) maybeMultiLine(
var startRedactionMarker = string(redact.StartMarker())
var endRedactionMarker = string(redact.EndMarker())
var markersStartWithMultiByteRune = startRedactionMarker[0] >= utf8.RuneSelf && endRedactionMarker[0] >= utf8.RuneSelf

// We don't include a capture group for the log message here, just for the
// preamble, because a capture group that handles multiline messages is very
// slow when running on the large buffers passed to EntryDecoder.split.
var entryREV2 = regexp.MustCompile(
`(?m)^` +
/* Severity */ `([IWEF])` +
/* Date and time */ `(\d{6} \d{2}:\d{2}:\d{2}.\d{6}) ` +
/* Goroutine ID */ `(?:(\d+) )` +
/* Go standard library flag */ `(\(gostd\) )?` +
/* Channel */ `(\d+@)?` +
/* File */ `([^:]+):` +
/* Line */ `(?:(\d+) )` +
/* Redactable flag */ `((?:` + redactableIndicator + `)?) ` +
/* Context tags */ `(?:\[((?:[^]]|\][^ ])+)\] )` +
/* Counter */ `((?:\d+)?) ` +
/* Continuation marker */ `([ =!+|])` +
/* Message */ `(.*)$`,
)

// EntryDecoderV2 reads successive encoded log entries from the data.
type EntryDecoderV2 struct {
re *regexp.Regexp
data []byte
}

// NewEntryDecoderV2 creates a new instance of EntryDecoderV2.
func NewEntryDecoderV2(data []byte) *EntryDecoderV2 {
return &EntryDecoderV2{
re: entryREV2,
data: data,
}
}

// DecodeV2 decodes the next log entry into the provided protobuf message.
func (d *EntryDecoderV2) DecodeV2(entry *logpb.Entry) error {
m := d.re.FindSubmatch(d.data)
if m == nil {
return io.EOF
}

// Erase all the fields, to be sure.
*entry = logpb.Entry{}

// Process the severity.
entry.Severity = Severity(strings.IndexByte(severityChar, m[1][0]) + 1)

// Process the timestamp.
t, err := time.Parse(MessageTimeFormat, string(m[2]))
if err != nil {
return err
}
entry.Time = t.UnixNano()

// Process the goroutine ID.
goroutine, err := strconv.Atoi(string(m[3]))
if err != nil {
return err
}
entry.Goroutine = int64(goroutine)

// Process the channel.
if len(string(m[5])) != 0 {
ch, err := strconv.Atoi(string(m[5])[:len(string(m[5]))-1])
if err != nil {
return err
}
entry.Channel = Channel(ch)
}

// Process the file.
entry.File = string(m[6])

// Process the line.
line, err := strconv.Atoi(string(m[7]))
if err != nil {
return err
}
entry.Line = int64(line)

// Process the redactable marker.
entry.Redactable = len(m[8]) != 0

// Process the context tags.
entry.Tags = string(m[9])

// Process the counter.
if len(m[10]) != 0 {
counter, err := strconv.Atoi(string(m[10]))
if err != nil {
return err
}
entry.Counter = uint64(counter)
}

// Process the log message.
// While the entry has additional lines, collect the full message.
var entryMsg strings.Builder

fmt.Fprintf(&entryMsg, string(trimFinalNewLines(m[12])))

linesLoop:for {
// Remove the top line from the log data.
d.data = d.data[len(m[0]):]

// Match the next line in the log data.
m = d.re.FindSubmatch(d.data)
if m == nil {
break
}

// Determine whether this line is a continuation of the entry and append the message accordingly.
cont := string(m[11])
msg := string(m[12])
switch cont {
case "+":
fmt.Fprintf(&entryMsg, "\n"+msg)
case "|":
fmt.Fprintf(&entryMsg, msg)
case "!":
fmt.Fprintf(&entryMsg, "\nstack trace:\n"+msg)
default:
break linesLoop
}
}

entry.Message = entryMsg.String()

return nil
}
29 changes: 29 additions & 0 deletions pkg/util/log/format_crdb_v2_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,18 @@ import (
"bytes"
"context"
"fmt"
"io"
"strings"
"testing"
"time"

"github.com/cockroachdb/cockroach/pkg/util/log/channel"
"github.com/cockroachdb/cockroach/pkg/util/log/eventpb"
"github.com/cockroachdb/cockroach/pkg/util/log/logpb"
"github.com/cockroachdb/cockroach/pkg/util/log/severity"
"github.com/cockroachdb/datadriven"
"github.com/cockroachdb/logtags"
"github.com/kr/pretty"
)

func TestFormatCrdbV2(t *testing.T) {
Expand Down Expand Up @@ -178,3 +181,29 @@ func TestFormatCrdbV2LongLineBreaks(t *testing.T) {
return out
})
}

func TestFormatCrdbV2Decode(t *testing.T) {
datadriven.RunTest(t, "testdata/parse",
func(t *testing.T, td *datadriven.TestData) string {
switch td.Cmd {
case "log":
var out strings.Builder
d := NewEntryDecoderV2([]byte(td.Input))
for {
var e logpb.Entry
if err := d.DecodeV2(&e); err != nil {
if err == io.EOF {
break
}
td.Fatalf(t, "error while decoding: %v", err)
}
fmt.Fprintf(&out, "%# v \n", pretty.Formatter(e))
}
return out.String()
default:
t.Fatalf("unknown directive: %q", td.Cmd)
}
// unreachable
return ""
})
}
Loading

0 comments on commit b05a534

Please sign in to comment.