From fcdc1c17f8e9476ac2f876bd2fce39ad5c4117ce Mon Sep 17 00:00:00 2001 From: Simon Giesecke Date: Wed, 12 May 2021 13:19:40 +0000 Subject: [PATCH 1/5] TEMP enable workflow --- .github/workflows/ci.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index 92c1ced8..cccc5cc3 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -3,6 +3,7 @@ on: push: branches: - master + - add-gsym-support pull_request: schedule: - cron: '0 2 * * *' # Run every day, at 2AM UTC. From a875dc8b8809ed33bd076350e55a22ae93516c2a Mon Sep 17 00:00:00 2001 From: Simon Giesecke Date: Tue, 27 Apr 2021 13:45:02 +0000 Subject: [PATCH 2/5] Fix typo in comment. --- internal/binutils/binutils.go | 2 +- internal/plugin/plugin.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/internal/binutils/binutils.go b/internal/binutils/binutils.go index e920eeb2..65b894d7 100644 --- a/internal/binutils/binutils.go +++ b/internal/binutils/binutils.go @@ -147,7 +147,7 @@ func initTools(b *binrep, config string) { b.addr2line, b.addr2lineFound = chooseExe([]string{"addr2line"}, []string{"gaddr2line"}, append(paths["addr2line"], defaultPath...)) // The "-n" option is supported by LLVM since 2011. The output of llvm-nm // and GNU nm with "-n" option is interchangeable for our purposes, so we do - // not need to differrentiate them. + // not need to differentiate them. b.nm, b.nmFound = chooseExe([]string{"llvm-nm", "nm"}, []string{"gnm"}, append(paths["nm"], defaultPath...)) b.objdump, b.objdumpFound, b.isLLVMObjdump = findObjdump(append(paths["objdump"], defaultPath...)) } diff --git a/internal/plugin/plugin.go b/internal/plugin/plugin.go index a57a0b20..a40b5141 100644 --- a/internal/plugin/plugin.go +++ b/internal/plugin/plugin.go @@ -128,7 +128,7 @@ type Inst struct { // An ObjFile is a single object file: a shared library or executable. type ObjFile interface { - // Name returns the underlyinf file name, if available + // Name returns the underlying file name, if available Name() string // ObjAddr returns the objdump (linker) address corresponding to a runtime From 25f3fce7c6a60b3ead44e85d9b585b0f5ce3c440 Mon Sep 17 00:00:00 2001 From: Simon Giesecke Date: Tue, 27 Apr 2021 13:45:45 +0000 Subject: [PATCH 3/5] Add note on issue fixed in binutils 2.26 addr2line. --- internal/binutils/binutils.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/internal/binutils/binutils.go b/internal/binutils/binutils.go index 65b894d7..9670c49c 100644 --- a/internal/binutils/binutils.go +++ b/internal/binutils/binutils.go @@ -711,6 +711,8 @@ func (f *fileAddr2Line) init() { // When addr2line encounters some gcc compiled binaries, it // drops interesting parts of names in anonymous namespaces. // Fallback to NM for better function names. + // This seems to have been fixed in binutils 2.26 though, see + // https://sourceware.org/bugzilla/show_bug.cgi?id=17541 if nm, err := newAddr2LinerNM(f.b.nm, f.name, f.base); err == nil { f.addr2liner.nm = nm } From 609c4a47d9e42ba419eccf8361573a65cebf3e39 Mon Sep 17 00:00:00 2001 From: Simon Giesecke Date: Wed, 12 May 2021 12:26:09 +0000 Subject: [PATCH 4/5] Add GSYM support. Fixes #632 --- internal/binutils/addr2liner_gsym.go | 179 +++++++++++++++++++ internal/binutils/binutils.go | 37 +++- internal/binutils/testdata/exe_linux_64.gsym | Bin 0 -> 448 bytes internal/symbolizer/symbolizer.go | 5 + 4 files changed, 218 insertions(+), 3 deletions(-) create mode 100644 internal/binutils/addr2liner_gsym.go create mode 100644 internal/binutils/testdata/exe_linux_64.gsym diff --git a/internal/binutils/addr2liner_gsym.go b/internal/binutils/addr2liner_gsym.go new file mode 100644 index 00000000..49a066a7 --- /dev/null +++ b/internal/binutils/addr2liner_gsym.go @@ -0,0 +1,179 @@ +// Copyright 2014 Google Inc. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package binutils + +import ( + "bufio" + "fmt" + "io" + "os/exec" + "regexp" + "strconv" + "strings" + "sync" + + "github.com/google/pprof/internal/plugin" +) + +const ( + defaultLLVMGsymUtil = "llvm-gsymutil" +) + +// llvmGsymUtil is a connection to an llvm-symbolizer command for +// obtaining address and line number information from a binary. +type llvmGsymUtil struct { + sync.Mutex + filename string + rw lineReaderWriter + base uint64 +} + +type llvmGsymUtilJob struct { + cmd *exec.Cmd + in io.WriteCloser + out *bufio.Reader +} + +func (a *llvmGsymUtilJob) write(s string) error { + _, err := fmt.Fprintln(a.in, s) + return err +} + +func (a *llvmGsymUtilJob) readLine() (string, error) { + s, err := a.out.ReadString('\n') + if err != nil { + return "", err + } + return strings.TrimSpace(s), nil +} + +// close releases any resources used by the llvmGsymUtil object. +func (a *llvmGsymUtilJob) close() { + a.in.Close() + a.cmd.Wait() +} + +// newLLVMGsymUtil starts the given llvmGsymUtil command reporting +// information about the given executable file. If file is a shared +// library, base should be the address at which it was mapped in the +// program under consideration. +func newLLVMGsymUtil(cmd, file string, base uint64, isData bool) (*llvmGsymUtil, error) { + if cmd == "" { + cmd = defaultLLVMGsymUtil + } + + j := &llvmGsymUtilJob{ + cmd: exec.Command(cmd, "--addresses-from-stdin"), + } + + var err error + if j.in, err = j.cmd.StdinPipe(); err != nil { + return nil, err + } + + outPipe, err := j.cmd.StdoutPipe() + if err != nil { + return nil, err + } + + j.out = bufio.NewReader(outPipe) + if err := j.cmd.Start(); err != nil { + return nil, err + } + + a := &llvmGsymUtil{ + filename: file, + rw: j, + base: base, + } + + return a, nil +} + +// readFrame parses the llvm-symbolizer output for a single address. It +// returns a populated plugin.Frame and whether it has reached the end of the +// data. +func (d *llvmGsymUtil) readFrame() (plugin.Frame, bool) { + line, err := d.rw.readLine() + if err != nil || len(line) == 0 { + return plugin.Frame{}, true + } + + print("read line: " + line + "\n") + + // TODO compile regexes once + prefixRegex := regexp.MustCompile(`^(0x[[:xdigit:]]+:\s|\s+)`) + // _ZNK2sf12RefCountBaseILb0EE9removeRefEv + 3 @ /home/sgiesecke/Snowflake/trunk/ExecPlatform/build/ReleaseClangLTO/../../src/core/ptr/RefCountBase.hpp:67 [inlined] + frameRegex := regexp.MustCompile(`(\S+).* @ (.*):([[:digit:]]+)`) + + // The first frame contains an address: prefix. We don't need that. The remaining frames start with spaces. + suffix := prefixRegex.ReplaceAllString(line, "") + + print("suffix is: " + suffix + "\n") + + if strings.HasPrefix(suffix, "error:") { + // Skip empty line that follows. + _, _ = d.rw.readLine() + return plugin.Frame{}, true + } + + frameMatch := frameRegex.FindStringSubmatch(suffix) + if frameMatch == nil { + print("frame regex didn't match\n") + + return plugin.Frame{}, true + } + + // TODO handle cases where no source file/line is available + // TODO handle column number? + + funcname := frameMatch[1] + sourceFile := frameMatch[2] + sourceLineStr := frameMatch[3] + + sourceLine := 0 + if line, err := strconv.Atoi(sourceLineStr); err == nil { + sourceLine = line + } + + return plugin.Frame{Func: funcname, File: sourceFile, Line: sourceLine}, false +} + +// addrInfo returns the stack frame information for a specific program +// address. It returns nil if the address could not be identified. +func (d *llvmGsymUtil) addrInfo(addr uint64) ([]plugin.Frame, error) { + d.Lock() + defer d.Unlock() + + print("querying gsym: " + fmt.Sprintf("0x%x %s.gsym", addr-d.base, d.filename) + "\n") + + if err := d.rw.write(fmt.Sprintf("0x%x %s.gsym", addr-d.base, d.filename)); err != nil { + return nil, err + } + + var stack []plugin.Frame + for { + frame, end := d.readFrame() + if end { + break + } + + if frame != (plugin.Frame{}) { + stack = append(stack, frame) + } + } + + return stack, nil +} diff --git a/internal/binutils/binutils.go b/internal/binutils/binutils.go index 9670c49c..21af861c 100644 --- a/internal/binutils/binutils.go +++ b/internal/binutils/binutils.go @@ -62,9 +62,12 @@ type binrep struct { objdump string objdumpFound bool isLLVMObjdump bool + llvmGsymUtil string + llvmGsymUtilFound bool // if fast, perform symbolization using nm (symbol names only), // instead of file-line detail from the slower addr2line. + // TODO update the comment and handling depending on whether llvm-gsymutil is as fast as nm fast bool } @@ -98,7 +101,7 @@ func (bu *Binutils) update(fn func(r *binrep)) { // String returns string representation of the binutils state for debug logging. func (bu *Binutils) String() string { r := bu.get() - var llvmSymbolizer, addr2line, nm, objdump string + var llvmSymbolizer, addr2line, nm, objdump, llvmGsymUtil string if r.llvmSymbolizerFound { llvmSymbolizer = r.llvmSymbolizer } @@ -111,13 +114,17 @@ func (bu *Binutils) String() string { if r.objdumpFound { objdump = r.objdump } - return fmt.Sprintf("llvm-symbolizer=%q addr2line=%q nm=%q objdump=%q fast=%t", - llvmSymbolizer, addr2line, nm, objdump, r.fast) + if r.llvmGsymUtilFound { + llvmGsymUtil = r.llvmGsymUtil + } + return fmt.Sprintf("llvm-symbolizer=%q addr2line=%q nm=%q objdump=%q llvmGsymUtil=%q fast=%t", + llvmSymbolizer, addr2line, nm, objdump, llvmGsymUtil, r.fast) } // SetFastSymbolization sets a toggle that makes binutils use fast // symbolization (using nm), which is much faster than addr2line but // provides only symbol name information (no file/line). +// TODO update the comment and handling depending on whether llvm-gsymutil is as fast as nm func (bu *Binutils) SetFastSymbolization(fast bool) { bu.update(func(r *binrep) { r.fast = fast }) } @@ -150,6 +157,13 @@ func initTools(b *binrep, config string) { // not need to differentiate them. b.nm, b.nmFound = chooseExe([]string{"llvm-nm", "nm"}, []string{"gnm"}, append(paths["nm"], defaultPath...)) b.objdump, b.objdumpFound, b.isLLVMObjdump = findObjdump(append(paths["objdump"], defaultPath...)) + b.llvmGsymUtil, b.llvmGsymUtilFound = chooseExe([]string{"llvm-gsymutil"}, []string{}, append(paths["llvm-gsymutil"], defaultPath...)) + + if !b.llvmGsymUtilFound { + fmt.Println("llvm-gsymutil not found") + + os.Exit(3) + } } // findObjdump finds and returns path to preferred objdump binary. @@ -681,6 +695,7 @@ type fileAddr2Line struct { file addr2liner *addr2Liner llvmSymbolizer *llvmSymbolizer + llvmGsymUtil *llvmGsymUtil isData bool } @@ -690,6 +705,10 @@ func (f *fileAddr2Line) SourceLine(addr uint64) ([]plugin.Frame, error) { return nil, f.baseErr } f.once.Do(f.init) + // TODO only use this if we have a matching gsym file! + if f.llvmGsymUtil != nil { + return f.llvmGsymUtil.addrInfo(addr) + } if f.llvmSymbolizer != nil { return f.llvmSymbolizer.addrInfo(addr) } @@ -700,6 +719,15 @@ func (f *fileAddr2Line) SourceLine(addr uint64) ([]plugin.Frame, error) { } func (f *fileAddr2Line) init() { + if llvmGsymUtil, err := newLLVMGsymUtil(f.b.llvmGsymUtil, f.name, f.base, f.isData); err == nil { + f.llvmGsymUtil = llvmGsymUtil + + // For testing + return + } + + fmt.Println("newLLVMGsymUtil failed") + if llvmSymbolizer, err := newLLVMSymbolizer(f.b.llvmSymbolizer, f.name, f.base, f.isData); err == nil { f.llvmSymbolizer = llvmSymbolizer return @@ -720,6 +748,9 @@ func (f *fileAddr2Line) init() { } func (f *fileAddr2Line) Close() error { + if f.llvmGsymUtil != nil { + f.llvmGsymUtil = nil + } if f.llvmSymbolizer != nil { f.llvmSymbolizer.rw.close() f.llvmSymbolizer = nil diff --git a/internal/binutils/testdata/exe_linux_64.gsym b/internal/binutils/testdata/exe_linux_64.gsym new file mode 100644 index 0000000000000000000000000000000000000000..2d026788516c06fedc97e3e1e254b3022d8041fb GIT binary patch literal 448 zcmeZ`40dN^U=m?qa9{udE+B3I;t4=Jkvr(s-E$%9y7z6FKly;~TS5Etz7LokSPEDc zusmR4VAW-HVBNsFg;ju&fk6R?4S?7Ihy#E)0f-BLcmfbF0OAcm%mg$O2H1cMZV)Fo zF*A=rza+PSAtN;>Ctoj_AtkjaH9fPqB(*5MBsV@eCqFN>m;sAWe0)lNe0olPQesYg zN=bfEaeQKF1w&d|7*10FeYw@c;k- literal 0 HcmV?d00001 diff --git a/internal/symbolizer/symbolizer.go b/internal/symbolizer/symbolizer.go index d741e7ad..faa87d08 100644 --- a/internal/symbolizer/symbolizer.go +++ b/internal/symbolizer/symbolizer.go @@ -150,6 +150,11 @@ func doLocalSymbolize(prof *profile.Profile, fast, force bool, obj plugin.ObjToo stack, err := segment.SourceLine(l.Address) if err != nil || len(stack) == 0 { + + if err != nil { + fmt.Println(err.Error()) + } + // No answers from addr2line. continue } From 51875c2db00e6d143534eb84884831b347750574 Mon Sep 17 00:00:00 2001 From: Simon Giesecke Date: Thu, 17 Jun 2021 09:50:37 +0000 Subject: [PATCH 5/5] Resolve TODOs, remove debug output --- internal/binutils/addr2liner_gsym.go | 21 +++++++-------------- internal/binutils/binutils.go | 20 ++++++-------------- 2 files changed, 13 insertions(+), 28 deletions(-) diff --git a/internal/binutils/addr2liner_gsym.go b/internal/binutils/addr2liner_gsym.go index 49a066a7..9cc2b97f 100644 --- a/internal/binutils/addr2liner_gsym.go +++ b/internal/binutils/addr2liner_gsym.go @@ -1,4 +1,4 @@ -// Copyright 2014 Google Inc. All Rights Reserved. +// Copyright 2021 Google Inc. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. @@ -31,6 +31,12 @@ const ( defaultLLVMGsymUtil = "llvm-gsymutil" ) +var prefixRegex *regexp.Regexp = regexp.MustCompile(`^(0x[[:xdigit:]]+:\s|\s+)`) + +// Matches output lines like: +// _ZNK2sf12RefCountBaseILb0EE9removeRefEv + 3 @ /home/user/repo/x/../src/foo/Bar.hpp:67 [inlined] +var frameRegex *regexp.Regexp = regexp.MustCompile(`(\S+).* @ (.*):([[:digit:]]+)`) + // llvmGsymUtil is a connection to an llvm-symbolizer command for // obtaining address and line number information from a binary. type llvmGsymUtil struct { @@ -111,18 +117,9 @@ func (d *llvmGsymUtil) readFrame() (plugin.Frame, bool) { return plugin.Frame{}, true } - print("read line: " + line + "\n") - - // TODO compile regexes once - prefixRegex := regexp.MustCompile(`^(0x[[:xdigit:]]+:\s|\s+)`) - // _ZNK2sf12RefCountBaseILb0EE9removeRefEv + 3 @ /home/sgiesecke/Snowflake/trunk/ExecPlatform/build/ReleaseClangLTO/../../src/core/ptr/RefCountBase.hpp:67 [inlined] - frameRegex := regexp.MustCompile(`(\S+).* @ (.*):([[:digit:]]+)`) - // The first frame contains an address: prefix. We don't need that. The remaining frames start with spaces. suffix := prefixRegex.ReplaceAllString(line, "") - print("suffix is: " + suffix + "\n") - if strings.HasPrefix(suffix, "error:") { // Skip empty line that follows. _, _ = d.rw.readLine() @@ -131,8 +128,6 @@ func (d *llvmGsymUtil) readFrame() (plugin.Frame, bool) { frameMatch := frameRegex.FindStringSubmatch(suffix) if frameMatch == nil { - print("frame regex didn't match\n") - return plugin.Frame{}, true } @@ -157,8 +152,6 @@ func (d *llvmGsymUtil) addrInfo(addr uint64) ([]plugin.Frame, error) { d.Lock() defer d.Unlock() - print("querying gsym: " + fmt.Sprintf("0x%x %s.gsym", addr-d.base, d.filename) + "\n") - if err := d.rw.write(fmt.Sprintf("0x%x %s.gsym", addr-d.base, d.filename)); err != nil { return nil, err } diff --git a/internal/binutils/binutils.go b/internal/binutils/binutils.go index 21af861c..ae99f0f8 100644 --- a/internal/binutils/binutils.go +++ b/internal/binutils/binutils.go @@ -158,12 +158,7 @@ func initTools(b *binrep, config string) { b.nm, b.nmFound = chooseExe([]string{"llvm-nm", "nm"}, []string{"gnm"}, append(paths["nm"], defaultPath...)) b.objdump, b.objdumpFound, b.isLLVMObjdump = findObjdump(append(paths["objdump"], defaultPath...)) b.llvmGsymUtil, b.llvmGsymUtilFound = chooseExe([]string{"llvm-gsymutil"}, []string{}, append(paths["llvm-gsymutil"], defaultPath...)) - - if !b.llvmGsymUtilFound { - fmt.Println("llvm-gsymutil not found") - - os.Exit(3) - } + // TODO check if llvm-gsymutil is recent enough to support --addresses-from-stdin } // findObjdump finds and returns path to preferred objdump binary. @@ -705,7 +700,6 @@ func (f *fileAddr2Line) SourceLine(addr uint64) ([]plugin.Frame, error) { return nil, f.baseErr } f.once.Do(f.init) - // TODO only use this if we have a matching gsym file! if f.llvmGsymUtil != nil { return f.llvmGsymUtil.addrInfo(addr) } @@ -719,15 +713,13 @@ func (f *fileAddr2Line) SourceLine(addr uint64) ([]plugin.Frame, error) { } func (f *fileAddr2Line) init() { - if llvmGsymUtil, err := newLLVMGsymUtil(f.b.llvmGsymUtil, f.name, f.base, f.isData); err == nil { - f.llvmGsymUtil = llvmGsymUtil - - // For testing - return + if _, err := os.Stat(f.name + ".gsym"); err == nil { + if llvmGsymUtil, err := newLLVMGsymUtil(f.b.llvmGsymUtil, f.name, f.base, f.isData); err == nil { + f.llvmGsymUtil = llvmGsymUtil + return + } } - fmt.Println("newLLVMGsymUtil failed") - if llvmSymbolizer, err := newLLVMSymbolizer(f.b.llvmSymbolizer, f.name, f.base, f.isData); err == nil { f.llvmSymbolizer = llvmSymbolizer return