diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 92c1ced8..cccc5cc3 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -3,6 +3,7 @@ on: push: branches: - master + - add-gsym-support pull_request: schedule: - cron: '0 2 * * *' # Run every day, at 2AM UTC. diff --git a/internal/binutils/addr2liner_gsym.go b/internal/binutils/addr2liner_gsym.go new file mode 100644 index 00000000..9cc2b97f --- /dev/null +++ b/internal/binutils/addr2liner_gsym.go @@ -0,0 +1,172 @@ +// Copyright 2021 Google Inc. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package binutils + +import ( + "bufio" + "fmt" + "io" + "os/exec" + "regexp" + "strconv" + "strings" + "sync" + + "github.com/google/pprof/internal/plugin" +) + +const ( + defaultLLVMGsymUtil = "llvm-gsymutil" +) + +var prefixRegex *regexp.Regexp = regexp.MustCompile(`^(0x[[:xdigit:]]+:\s|\s+)`) + +// Matches output lines like: +// _ZNK2sf12RefCountBaseILb0EE9removeRefEv + 3 @ /home/user/repo/x/../src/foo/Bar.hpp:67 [inlined] +var frameRegex *regexp.Regexp = regexp.MustCompile(`(\S+).* @ (.*):([[:digit:]]+)`) + +// llvmGsymUtil is a connection to an llvm-symbolizer command for +// obtaining address and line number information from a binary. +type llvmGsymUtil struct { + sync.Mutex + filename string + rw lineReaderWriter + base uint64 +} + +type llvmGsymUtilJob struct { + cmd *exec.Cmd + in io.WriteCloser + out *bufio.Reader +} + +func (a *llvmGsymUtilJob) write(s string) error { + _, err := fmt.Fprintln(a.in, s) + return err +} + +func (a *llvmGsymUtilJob) readLine() (string, error) { + s, err := a.out.ReadString('\n') + if err != nil { + return "", err + } + return strings.TrimSpace(s), nil +} + +// close releases any resources used by the llvmGsymUtil object. +func (a *llvmGsymUtilJob) close() { + a.in.Close() + a.cmd.Wait() +} + +// newLLVMGsymUtil starts the given llvmGsymUtil command reporting +// information about the given executable file. If file is a shared +// library, base should be the address at which it was mapped in the +// program under consideration. +func newLLVMGsymUtil(cmd, file string, base uint64, isData bool) (*llvmGsymUtil, error) { + if cmd == "" { + cmd = defaultLLVMGsymUtil + } + + j := &llvmGsymUtilJob{ + cmd: exec.Command(cmd, "--addresses-from-stdin"), + } + + var err error + if j.in, err = j.cmd.StdinPipe(); err != nil { + return nil, err + } + + outPipe, err := j.cmd.StdoutPipe() + if err != nil { + return nil, err + } + + j.out = bufio.NewReader(outPipe) + if err := j.cmd.Start(); err != nil { + return nil, err + } + + a := &llvmGsymUtil{ + filename: file, + rw: j, + base: base, + } + + return a, nil +} + +// readFrame parses the llvm-symbolizer output for a single address. It +// returns a populated plugin.Frame and whether it has reached the end of the +// data. +func (d *llvmGsymUtil) readFrame() (plugin.Frame, bool) { + line, err := d.rw.readLine() + if err != nil || len(line) == 0 { + return plugin.Frame{}, true + } + + // The first frame contains an address: prefix. We don't need that. The remaining frames start with spaces. + suffix := prefixRegex.ReplaceAllString(line, "") + + if strings.HasPrefix(suffix, "error:") { + // Skip empty line that follows. + _, _ = d.rw.readLine() + return plugin.Frame{}, true + } + + frameMatch := frameRegex.FindStringSubmatch(suffix) + if frameMatch == nil { + return plugin.Frame{}, true + } + + // TODO handle cases where no source file/line is available + // TODO handle column number? + + funcname := frameMatch[1] + sourceFile := frameMatch[2] + sourceLineStr := frameMatch[3] + + sourceLine := 0 + if line, err := strconv.Atoi(sourceLineStr); err == nil { + sourceLine = line + } + + return plugin.Frame{Func: funcname, File: sourceFile, Line: sourceLine}, false +} + +// addrInfo returns the stack frame information for a specific program +// address. It returns nil if the address could not be identified. +func (d *llvmGsymUtil) addrInfo(addr uint64) ([]plugin.Frame, error) { + d.Lock() + defer d.Unlock() + + if err := d.rw.write(fmt.Sprintf("0x%x %s.gsym", addr-d.base, d.filename)); err != nil { + return nil, err + } + + var stack []plugin.Frame + for { + frame, end := d.readFrame() + if end { + break + } + + if frame != (plugin.Frame{}) { + stack = append(stack, frame) + } + } + + return stack, nil +} diff --git a/internal/binutils/binutils.go b/internal/binutils/binutils.go index e920eeb2..ae99f0f8 100644 --- a/internal/binutils/binutils.go +++ b/internal/binutils/binutils.go @@ -62,9 +62,12 @@ type binrep struct { objdump string objdumpFound bool isLLVMObjdump bool + llvmGsymUtil string + llvmGsymUtilFound bool // if fast, perform symbolization using nm (symbol names only), // instead of file-line detail from the slower addr2line. + // TODO update the comment and handling depending on whether llvm-gsymutil is as fast as nm fast bool } @@ -98,7 +101,7 @@ func (bu *Binutils) update(fn func(r *binrep)) { // String returns string representation of the binutils state for debug logging. func (bu *Binutils) String() string { r := bu.get() - var llvmSymbolizer, addr2line, nm, objdump string + var llvmSymbolizer, addr2line, nm, objdump, llvmGsymUtil string if r.llvmSymbolizerFound { llvmSymbolizer = r.llvmSymbolizer } @@ -111,13 +114,17 @@ func (bu *Binutils) String() string { if r.objdumpFound { objdump = r.objdump } - return fmt.Sprintf("llvm-symbolizer=%q addr2line=%q nm=%q objdump=%q fast=%t", - llvmSymbolizer, addr2line, nm, objdump, r.fast) + if r.llvmGsymUtilFound { + llvmGsymUtil = r.llvmGsymUtil + } + return fmt.Sprintf("llvm-symbolizer=%q addr2line=%q nm=%q objdump=%q llvmGsymUtil=%q fast=%t", + llvmSymbolizer, addr2line, nm, objdump, llvmGsymUtil, r.fast) } // SetFastSymbolization sets a toggle that makes binutils use fast // symbolization (using nm), which is much faster than addr2line but // provides only symbol name information (no file/line). +// TODO update the comment and handling depending on whether llvm-gsymutil is as fast as nm func (bu *Binutils) SetFastSymbolization(fast bool) { bu.update(func(r *binrep) { r.fast = fast }) } @@ -147,9 +154,11 @@ func initTools(b *binrep, config string) { b.addr2line, b.addr2lineFound = chooseExe([]string{"addr2line"}, []string{"gaddr2line"}, append(paths["addr2line"], defaultPath...)) // The "-n" option is supported by LLVM since 2011. The output of llvm-nm // and GNU nm with "-n" option is interchangeable for our purposes, so we do - // not need to differrentiate them. + // not need to differentiate them. b.nm, b.nmFound = chooseExe([]string{"llvm-nm", "nm"}, []string{"gnm"}, append(paths["nm"], defaultPath...)) b.objdump, b.objdumpFound, b.isLLVMObjdump = findObjdump(append(paths["objdump"], defaultPath...)) + b.llvmGsymUtil, b.llvmGsymUtilFound = chooseExe([]string{"llvm-gsymutil"}, []string{}, append(paths["llvm-gsymutil"], defaultPath...)) + // TODO check if llvm-gsymutil is recent enough to support --addresses-from-stdin } // findObjdump finds and returns path to preferred objdump binary. @@ -681,6 +690,7 @@ type fileAddr2Line struct { file addr2liner *addr2Liner llvmSymbolizer *llvmSymbolizer + llvmGsymUtil *llvmGsymUtil isData bool } @@ -690,6 +700,9 @@ func (f *fileAddr2Line) SourceLine(addr uint64) ([]plugin.Frame, error) { return nil, f.baseErr } f.once.Do(f.init) + if f.llvmGsymUtil != nil { + return f.llvmGsymUtil.addrInfo(addr) + } if f.llvmSymbolizer != nil { return f.llvmSymbolizer.addrInfo(addr) } @@ -700,6 +713,13 @@ func (f *fileAddr2Line) SourceLine(addr uint64) ([]plugin.Frame, error) { } func (f *fileAddr2Line) init() { + if _, err := os.Stat(f.name + ".gsym"); err == nil { + if llvmGsymUtil, err := newLLVMGsymUtil(f.b.llvmGsymUtil, f.name, f.base, f.isData); err == nil { + f.llvmGsymUtil = llvmGsymUtil + return + } + } + if llvmSymbolizer, err := newLLVMSymbolizer(f.b.llvmSymbolizer, f.name, f.base, f.isData); err == nil { f.llvmSymbolizer = llvmSymbolizer return @@ -711,6 +731,8 @@ func (f *fileAddr2Line) init() { // When addr2line encounters some gcc compiled binaries, it // drops interesting parts of names in anonymous namespaces. // Fallback to NM for better function names. + // This seems to have been fixed in binutils 2.26 though, see + // https://sourceware.org/bugzilla/show_bug.cgi?id=17541 if nm, err := newAddr2LinerNM(f.b.nm, f.name, f.base); err == nil { f.addr2liner.nm = nm } @@ -718,6 +740,9 @@ func (f *fileAddr2Line) init() { } func (f *fileAddr2Line) Close() error { + if f.llvmGsymUtil != nil { + f.llvmGsymUtil = nil + } if f.llvmSymbolizer != nil { f.llvmSymbolizer.rw.close() f.llvmSymbolizer = nil diff --git a/internal/binutils/testdata/exe_linux_64.gsym b/internal/binutils/testdata/exe_linux_64.gsym new file mode 100644 index 00000000..2d026788 Binary files /dev/null and b/internal/binutils/testdata/exe_linux_64.gsym differ diff --git a/internal/plugin/plugin.go b/internal/plugin/plugin.go index a57a0b20..a40b5141 100644 --- a/internal/plugin/plugin.go +++ b/internal/plugin/plugin.go @@ -128,7 +128,7 @@ type Inst struct { // An ObjFile is a single object file: a shared library or executable. type ObjFile interface { - // Name returns the underlyinf file name, if available + // Name returns the underlying file name, if available Name() string // ObjAddr returns the objdump (linker) address corresponding to a runtime diff --git a/internal/symbolizer/symbolizer.go b/internal/symbolizer/symbolizer.go index d741e7ad..faa87d08 100644 --- a/internal/symbolizer/symbolizer.go +++ b/internal/symbolizer/symbolizer.go @@ -150,6 +150,11 @@ func doLocalSymbolize(prof *profile.Profile, fast, force bool, obj plugin.ObjToo stack, err := segment.SourceLine(l.Address) if err != nil || len(stack) == 0 { + + if err != nil { + fmt.Println(err.Error()) + } + // No answers from addr2line. continue }