diff --git a/pkg/cli/debug_merge_logs.go b/pkg/cli/debug_merge_logs.go index dd43bfc8cabe..e362accf5167 100644 --- a/pkg/cli/debug_merge_logs.go +++ b/pkg/cli/debug_merge_logs.go @@ -502,6 +502,14 @@ func (s *fileLogStream) open() bool { if s.f, s.err = os.Open(s.fi.path); s.err != nil { return false } + if s.format == "" { + if _, s.format, s.err = log.ReadFormatFromLogFile(s.f); s.err != nil { + return false + } + if _, s.err = s.f.Seek(0, io.SeekStart); s.err != nil { + return false + } + } if s.err = seekToFirstAfterFrom(s.f, s.from, s.editMode, s.format); s.err != nil { return false } diff --git a/pkg/cli/testdata/merge_logs_v1/missing_format/cockroach.test-0001.ubuntu.2018-11-30T22_06_47Z.004130.log b/pkg/cli/testdata/merge_logs_v1/missing_format/cockroach.test-0001.ubuntu.2018-11-30T22_06_47Z.004130.log index 47ec4e95eca4..98cb6ac9d91c 100644 --- a/pkg/cli/testdata/merge_logs_v1/missing_format/cockroach.test-0001.ubuntu.2018-11-30T22_06_47Z.004130.log +++ b/pkg/cli/testdata/merge_logs_v1/missing_format/cockroach.test-0001.ubuntu.2018-11-30T22_06_47Z.004130.log @@ -1,3 +1,5 @@ -I210801 21:05:59.364923 1 util/log/sync_buffer.go:70 [config] binary: CockroachDB CCL v20.1.17 (x86_64-apple-darwin14, built 2021/05/17 16:30:22, -I210801 21:05:59.364923 1 util/log/sync_buffer.go:70 [config] arguments: [./cockroach start] -I210801 21:05:59.364923 1 util/log/sync_buffer.go:70 line format: [IWEF]yymmdd hh:mm:ss.uuuuuu goid file:line msg utf8=✓ +{"header":1,"timestamp":"1631568932.530915000","goroutine":1,"file":"util/log/file_sync_buffer.go","line":238,"redactable":1,"tags":{"config":""},"message":"file created at: 2021/09/13 21:35:32"} +{"header":1,"timestamp":"1631568932.530923000","goroutine":1,"file":"util/log/file_sync_buffer.go","line":238,"redactable":1,"tags":{"config":""},"message":"running on machine: "} +{"header":1,"timestamp":"1631568932.530929000","goroutine":1,"file":"util/log/file_sync_buffer.go","line":238,"redactable":1,"tags":{"config":""},"message":"binary: CockroachDB CCL v21.2.0-alpha.00000000-4019-g6d1becda18-dirty (x86_64-apple-darwin20.6.0, built , go1.16.6)"} +{"header":1,"timestamp":"1631568932.530934000","goroutine":1,"file":"util/log/file_sync_buffer.go","line":238,"redactable":1,"tags":{"config":""},"message":"arguments: []"} +{"header":1,"timestamp":"1631568932.530945000","goroutine":1,"file":"util/log/file_sync_buffer.go","line":238,"redactable":1,"tags":{"config":""},"message":"log format (utf8=✓): json"} diff --git a/pkg/util/log/BUILD.bazel b/pkg/util/log/BUILD.bazel index 6b839e99a058..069c6668f8ab 100644 --- a/pkg/util/log/BUILD.bazel +++ b/pkg/util/log/BUILD.bazel @@ -142,6 +142,7 @@ go_test( "helpers_test.go", "http_sink_test.go", "intercept_test.go", + "log_decoder_test.go", "main_test.go", "redact_test.go", "secondary_log_test.go", diff --git a/pkg/util/log/log_decoder.go b/pkg/util/log/log_decoder.go index ed457c37c148..7fe172788cc8 100644 --- a/pkg/util/log/log_decoder.go +++ b/pkg/util/log/log_decoder.go @@ -21,6 +21,21 @@ import ( "github.com/cockroachdb/errors" ) +var ( + formatRE = regexp.MustCompile( + `(?m)^` + + /* Prefix */ `(?:.*\[config\][ ]+log format \(utf8=.+\): )` + + /* Format */ `(.*)$`, + ) + v1IndicatorRE = regexp.MustCompile( + `(?m)^` + + /* crdb-v1 Indicator */ `(?:.*line format: \[IWEF\]yymmdd hh:mm:ss.uuuuuu goid file:line.*)$`, + ) + v2IndicatorRE = regexp.MustCompile( + `(?m)^` + + /* crdb-v2 Indicator */ `(?:.*line format: \[IWEF\]yymmdd hh:mm:ss.uuuuuu goid \[chan@\]file:line.*)$`) +) + // EntryDecoder is used to decode log entries. type EntryDecoder interface { Decode(entry *logpb.Entry) error @@ -35,34 +50,23 @@ func NewEntryDecoder(in io.Reader, editMode EditSensitiveData) (EntryDecoder, er // NewEntryDecoderWithFormat is like NewEntryDecoder but the caller can specify the format of the log file. // The header lines do not need to be searched for the log entry format when 'logFormat' is non-empty. func NewEntryDecoderWithFormat( - in io.Reader, editMode EditSensitiveData, logFormat string, + in io.Reader, editMode EditSensitiveData, format string, ) (EntryDecoder, error) { var d EntryDecoder - var format string // If the log format has not been specified, get the format from the first few header lines of the log file. - if logFormat == "" { - var buf bytes.Buffer - rest := bufio.NewReader(in) - r := io.TeeReader(rest, &buf) - { - const headerBytes = 8096 - header := make([]byte, headerBytes) - n, err := r.Read(header) - if err != nil { - return nil, err - } - header = header[:n] - logFormat, err = getLogFormat(header) - if err != nil { - return nil, errors.Wrap(err, "decoding format") - } + if format == "" { + var read io.Reader + var err error + read, format, err = ReadFormatFromLogFile(in) + if err != nil { + return nil, err } - in = io.MultiReader(&buf, rest) + in = io.MultiReader(read, in) } - f, ok := formatParsers[logFormat] + f, ok := formatParsers[format] if !ok { - return nil, errors.Newf("unknown log file format: %s", logFormat) + return nil, errors.Newf("unknown log file format: %s", format) } format = f @@ -92,18 +96,40 @@ func NewEntryDecoderWithFormat( return d, nil } +// ReadFormatFromLogFile attempts to read the format from the header data of +// in. It returns the data consumed from input in the read return value. +func ReadFormatFromLogFile(in io.Reader) (read io.Reader, format string, err error) { + var buf bytes.Buffer + rest := bufio.NewReader(in) + r := io.TeeReader(rest, &buf) + const headerBytes = 8096 + header := make([]byte, headerBytes) + n, err := r.Read(header) + if err != nil { + return nil, "", err + } + header = header[:n] + format, err = getLogFormat(header) + if err != nil { + return nil, "", errors.Wrap(err, "decoding format") + } + return &buf, format, nil +} + // getLogFormat retrieves the log format recorded at the top of a log. func getLogFormat(data []byte) (string, error) { - var re = regexp.MustCompile( - `(?m)^` + - /* Prefix */ `(?:.*\[config\] log format \(utf8=.+\): )` + - /* Format */ `(.*)$`, - ) + if m := formatRE.FindSubmatch(data); m != nil { + return string(m[1]), nil + } + + // If the log format is not specified in the log, determine the format based on the line format entry. + if v1IndicatorRE.Match(data) { + return "crdb-v1", nil + } - m := re.FindSubmatch(data) - if m == nil { - return "", errors.New("failed to extract log file format from the log") + if v2IndicatorRE.Match(data) { + return "crdb-v2", nil } - return string(m[1]), nil + return "", errors.New("failed to extract log file format from the log") } diff --git a/pkg/util/log/log_decoder_test.go b/pkg/util/log/log_decoder_test.go new file mode 100644 index 000000000000..51e8d5b01d8e --- /dev/null +++ b/pkg/util/log/log_decoder_test.go @@ -0,0 +1,36 @@ +// Copyright 2021 The Cockroach Authors. +// +// Use of this software is governed by the Business Source License +// included in the file licenses/BSL.txt. +// +// As of the Change Date specified in that file, in accordance with +// the Business Source License, use of this software will be governed +// by the Apache License, Version 2.0, included in the file +// licenses/APL.txt. + +package log + +import ( + "strings" + "testing" + + "github.com/cockroachdb/datadriven" +) + +func TestReadLogFormat(t *testing.T) { + datadriven.RunTest(t, "testdata/read_header", + func(t *testing.T, td *datadriven.TestData) string { + switch td.Cmd { + case "log": + _, format, err := ReadFormatFromLogFile(strings.NewReader(td.Input)) + if err != nil { + td.Fatalf(t, "error while reading format from the log file: %v", err) + } + return format + default: + t.Fatalf("unknown directive: %q", td.Cmd) + } + // unreachable + return "" + }) +} diff --git a/pkg/util/log/testdata/read_header b/pkg/util/log/testdata/read_header new file mode 100644 index 000000000000..8592bf47af98 --- /dev/null +++ b/pkg/util/log/testdata/read_header @@ -0,0 +1,53 @@ +subtest detect_v1_format + +log +I210621 19:01:01.426193 1 util/log/sync_buffer.go:195 ⋮ [config] file created at: 2021/06/21 19:01:01 +I210621 19:01:01.426200 1 util/log/sync_buffer.go:195 ⋮ [config] running on machine: +I210621 19:01:01.426205 1 util/log/sync_buffer.go:195 ⋮ [config] binary: CockroachDB CCL v20.2.7 (x86_64-apple-darwin14, built 2021/03/29 17:56:44, go1.13.14) +I210621 19:01:01.426208 1 util/log/sync_buffer.go:195 ⋮ [config] arguments: +I210609 21:45:59.308670 1 util/log/sync_buffer.go:195 ⋮ [config] log format (utf8=✓): crdb-v1 +I210621 19:01:01.426216 1 util/log/sync_buffer.go:195 ⋮ [config] line format: [IWEF]yymmdd hh:mm:ss.uuuuuu goid file:line msg utf8=✓ +---- +crdb-v1 + +subtest end + +subtest detect_v2_format + +log +I210816 21:25:47.681121 1 util/log/file_sync_buffer.go:238 ⋮ [config] file created at: 2021/08/16 21:25:47 +I210816 21:25:47.681129 1 util/log/file_sync_buffer.go:238 ⋮ [config] running on machine: +I210816 21:25:47.681135 1 util/log/file_sync_buffer.go:238 ⋮ [config] binary: CockroachDB CCL v21.2.0-alpha.00000000-3306-g39d59f1fc9-dirty (x86_64-apple-darwin20.6.0, built , go1.16.6) +I210816 21:25:47.681140 1 util/log/file_sync_buffer.go:238 ⋮ [config] arguments: [] +I210609 21:45:59.308670 1 util/log/file_sync_buffer.go:238 ⋮ [config] log format (utf8=✓): crdb-v2 +I210816 21:25:47.681152 1 util/log/file_sync_buffer.go:238 ⋮ [config] line format: [IWEF]yymmdd hh:mm:ss.uuuuuu goid [chan@]file:line redactionmark \[tags\] [counter] msg +---- +crdb-v2 + +subtest end + +subtest default_to_v1_format + +log +I210621 19:01:01.426193 1 util/log/sync_buffer.go:195 ⋮ [config] file created at: 2021/06/21 19:01:01 +I210621 19:01:01.426200 1 util/log/sync_buffer.go:195 ⋮ [config] running on machine: +I210621 19:01:01.426205 1 util/log/sync_buffer.go:195 ⋮ [config] binary: CockroachDB CCL v20.2.7 (x86_64-apple-darwin14, built 2021/03/29 17:56:44, go1.13.14) +I210621 19:01:01.426208 1 util/log/sync_buffer.go:195 ⋮ [config] arguments: +I210621 19:01:01.426216 1 util/log/sync_buffer.go:195 ⋮ [config] line format: [IWEF]yymmdd hh:mm:ss.uuuuuu goid file:line msg utf8=✓ +---- +crdb-v1 + +subtest end + +subtest default_to_v2_format + +log +I210816 21:25:47.681121 1 util/log/file_sync_buffer.go:238 ⋮ [config] file created at: 2021/08/16 21:25:47 +I210816 21:25:47.681129 1 util/log/file_sync_buffer.go:238 ⋮ [config] running on machine: +I210816 21:25:47.681135 1 util/log/file_sync_buffer.go:238 ⋮ [config] binary: CockroachDB CCL v21.2.0-alpha.00000000-3306-g39d59f1fc9-dirty (x86_64-apple-darwin20.6.0, built , go1.16.6) +I210816 21:25:47.681140 1 util/log/file_sync_buffer.go:238 ⋮ [config] arguments: [] +I210816 21:25:47.681152 1 util/log/file_sync_buffer.go:238 ⋮ [config] line format: [IWEF]yymmdd hh:mm:ss.uuuuuu goid [chan@]file:line redactionmark \[tags\] [counter] msg +---- +crdb-v2 + +subtest end