Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
63783: cli: walk directories in debug merge-logs r=stevendanna a=tbg

This is a UX improvement. Instead of typing out

`./cockroach debug merge-logs nodes/*/logs/*.log`

(as one does for each `debug.zip` or roachtest investigation),
you can now simply use

`./cockroach debug merge-logs .`.

Additionally, default the program filter to disabled.

The program filter is what matches the `<program>.<host>.<morestuff>.log` in
the log files handed to `merge-logs`. The default is generally a reasonable
choice but when you have an oddly named binary (does happen) merge-logs
completely stops doing anything and time is wasted figuring out why.
`merge-logs` is already very unlikely to pick up stray files since its default
`file-pattern` matches the entire logging file format (it delegates only
matching of `<program>` to the program-filter), so turning the program filter
off by default seems right.


Co-authored-by: Tobias Grieger <[email protected]>
  • Loading branch information
craig[bot] and tbg committed Apr 22, 2021
2 parents 1693aa6 + aa4c6ad commit d85d49d
Show file tree
Hide file tree
Showing 4 changed files with 55 additions and 23 deletions.
16 changes: 8 additions & 8 deletions pkg/cli/debug.go
Original file line number Diff line number Diff line change
Expand Up @@ -1159,13 +1159,13 @@ var debugMergeLogsCommand = &cobra.Command{
Short: "merge multiple log files from different machines into a single stream",
Long: `
Takes a list of glob patterns (not left exclusively to the shell because of
MAX_ARG_STRLEN, usually 128kB) pointing to log files and merges them into a
single stream printed to stdout. Files not matching the log file name pattern
are ignored. If log lines appear out of order within a file (which happens), the
timestamp is ratcheted to the highest value seen so far. The command supports
efficient time filtering as well as multiline regexp pattern matching via flags.
If the filter regexp contains captures, such as '^abc(hello)def(world)', only
the captured parts will be printed.
MAX_ARG_STRLEN, usually 128kB) which will be walked and whose contained log
files and merged them into a single stream printed to stdout. Files not matching
the log file name pattern are ignored. If log lines appear out of order within
a file (which happens), the timestamp is ratcheted to the highest value seen so far.
The command supports efficient time filtering as well as multiline regexp pattern
matching via flags. If the filter regexp contains captures, such as
'^abc(hello)def(world)', only the captured parts will be printed.
`,
Args: cobra.MinimumNArgs(1),
RunE: runDebugMergeLogs,
Expand All @@ -1183,7 +1183,7 @@ var debugMergeLogsOpts = struct {
keepRedactable bool
redactInput bool
}{
program: regexp.MustCompile("^cockroach.*$"),
program: nil, // match everything
file: regexp.MustCompile(log.FilePattern),
keepRedactable: true,
redactInput: false,
Expand Down
36 changes: 21 additions & 15 deletions pkg/cli/debug_merge_logs.go
Original file line number Diff line number Diff line change
Expand Up @@ -331,31 +331,37 @@ type fileInfo struct {
func findLogFiles(
paths []string, filePattern, programFilter *regexp.Regexp, programGroup int, to time.Time,
) ([]fileInfo, error) {
if programGroup == 0 || programFilter == nil {
programGroup = 0
}
to = to.Truncate(time.Second) // log files only have second resolution
fileChan := make(chan fileInfo, len(paths))
var wg sync.WaitGroup
wg.Add(len(paths))
var files []fileInfo
for _, p := range paths {
go func(p string) {
defer wg.Done()
// NB: come go1.16, we should use WalkDir here as it is more efficient.
if err := filepath.Walk(p, func(p string, info os.FileInfo, err error) error {
if err != nil {
return err
}
if info.IsDir() {
// Don't act on the directory itself, Walk will visit it for us.
return nil
}
// We're looking at a file.
fi, ok := getLogFileInfo(p, filePattern)
if !ok {
return
return nil
}
if programGroup > 0 {
program := fi.path[fi.matches[2*programGroup]:fi.matches[2*programGroup+1]]
if !programFilter.MatchString(program) {
return
return nil
}
}
fileChan <- fi
}(p)
}
wg.Wait()
files := make([]fileInfo, 0, len(fileChan))
close(fileChan)
for f := range fileChan {
files = append(files, f)
files = append(files, fi)
return nil
}); err != nil {
return nil, err
}
}
return files, nil
}
Expand Down
6 changes: 6 additions & 0 deletions pkg/cli/debug_merge_logs_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,12 @@ var cases = []testCase{
args: []string{"testdata/merge_logs/2/*/*"},
flags: []string{"--redact=false", "--redactable-output=false"},
},
{
// NB: the output here matches 2.multiple-files-from-node.
name: "2.walk-directory",
args: []string{"testdata/merge_logs/2"},
flags: []string{"--redact=false", "--redactable-output=false"},
},
{
name: "2.skip-file",
args: []string{"testdata/merge_logs/2/*/*"},
Expand Down
20 changes: 20 additions & 0 deletions pkg/cli/testdata/merge_logs/results/2.walk-directory
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
test-0001> I181130 22:14:34.828612 740 storage/store_rebalancer.go:277 [n1,s1,store-rebalancer] load-based lease transfers successfully brought s1 down to 37128.65 qps (mean=29834.62, upperThreshold=37293.27)
test-0001> I181130 22:14:37.516378 441 server/status/runtime.go:465 [n1] runtime stats: 900 MiB RSS, 1509 goroutines, 421 MiB/144 MiB/777 MiB GO alloc/idle/total, 96 MiB/144 MiB CGO alloc/total, 154632.4 CGO/sec, 2206.9/216.0 %(u/s)time, 0.4 %gc (43x), 182 MiB/204 MiB (r/w)net
test-0001> I181130 22:14:47.400515 437 gossip/gossip.go:555 [n1] gossip status (ok, 3 nodes)
gossip client (0/3 cur/max conns)
gossip server (2/3 cur/max conns, infos 4077/2242 sent/received, bytes 1275332B/555352B sent/received)
2: ajwerner-test-0002:26257 (8m0s)
3: ajwerner-test-0003:26257 (8m0s)
test-0001> I181130 22:14:47.519324 441 server/status/runtime.go:465 [n1] runtime stats: 957 MiB RSS, 1690 goroutines, 420 MiB/173 MiB/777 MiB GO alloc/idle/total, 124 MiB/174 MiB CGO alloc/total, 143837.6 CGO/sec, 2245.9/218.3 %(u/s)time, 0.5 %gc (47x), 199 MiB/210 MiB (r/w)net
test-0002> I181130 22:14:49.706516 6152567 storage/split_queue.go:213 [n2,split,s2,r163/3:/Table/53/1/-36{84178…-35915…}] initiating a split based on load at key /Table/53/1/-3661595080111255049
test-0002> I181130 22:14:49.706563 6152567 storage/replica_command.go:342 [n2,split,s2,r163/3:/Table/53/1/-36{84178…-35915…}] initiating a split of this range at key /Table/53/1/-3661595080111255049 [r586]
test-0002> I181130 22:14:49.732930 6153357 storage/split_queue.go:213 [n2,split,s2,r269/3:/Table/53/1/-81{56147…-10716…}] initiating a split based on load at key /Table/53/1/-8134885765915906309
test-0002> I181130 22:14:49.732976 6153357 storage/replica_command.go:342 [n2,split,s2,r269/3:/Table/53/1/-81{56147…-10716…}] initiating a split of this range at key /Table/53/1/-8134885765915906309 [r587]
test-0002> I181130 22:14:50.203263 6163815 storage/split_queue.go:213 [n2,split,s2,r171/3:/Table/53/1/-87{94402…-45781…}] initiating a split based on load at key /Table/53/1/-8768249731886885702
test-0002> I181130 22:14:50.203299 6163815 storage/replica_command.go:342 [n2,split,s2,r171/3:/Table/53/1/-87{94402…-45781…}] initiating a split of this range at key /Table/53/1/-8768249731886885702 [r588]
test-0001> I181130 22:14:57.522236 441 server/status/runtime.go:465 [n1] runtime stats: 986 MiB RSS, 1332 goroutines, 438 MiB/140 MiB/777 MiB GO alloc/idle/total, 151 MiB/203 MiB CGO alloc/total, 137663.6 CGO/sec, 2168.3/222.8 %(u/s)time, 0.4 %gc (45x), 192 MiB/203 MiB (r/w)net
test-0002> I181130 22:14:57.650774 499 server/status/runtime.go:465 [n2] runtime stats: 729 MiB RSS, 1127 goroutines, 416 MiB/91 MiB/584 MiB GO alloc/idle/total, 75 MiB/128 MiB CGO alloc/total, 111534.7 CGO/sec, 2013.0/232.3 %(u/s)time, 0.4 %gc (56x), 189 MiB/189 MiB (r/w)net
test-0002> I181130 22:15:05.892547 6507691 storage/split_queue.go:213 [n2,split,s2,r129/3:/Table/53/1/-{722633…-673989…}] initiating a split based on load at key /Table/53/1/-698671514117501959
test-0002> I181130 22:15:05.892592 6507691 storage/replica_command.go:342 [n2,split,s2,r129/3:/Table/53/1/-{722633…-673989…}] initiating a split of this range at key /Table/53/1/-698671514117501959 [r589]
test-0001> I181130 22:15:07.525316 441 server/status/runtime.go:465 [n1] runtime stats: 952 MiB RSS, 1422 goroutines, 397 MiB/165 MiB/777 MiB GO alloc/idle/total, 112 MiB/164 MiB CGO alloc/total, 133359.9 CGO/sec, 2137.8/225.1 %(u/s)time, 0.4 %gc (43x), 187 MiB/198 MiB (r/w)net
test-0002> I181130 22:15:07.653821 499 server/status/runtime.go:465 [n2] runtime stats: 756 MiB RSS, 1127 goroutines, 338 MiB/113 MiB/584 MiB GO alloc/idle/total, 94 MiB/149 MiB CGO alloc/total, 108469.2 CGO/sec, 1964.7/227.0 %(u/s)time, 0.4 %gc (54x), 185 MiB/184 MiB (r/w)net

0 comments on commit d85d49d

Please sign in to comment.