Skip to content

Commit

Permalink
pkg/perf: Compact perf map entries after deduplication
Browse files Browse the repository at this point in the history
  • Loading branch information
brancz committed Nov 20, 2023
1 parent f80b0eb commit f375ec7
Show file tree
Hide file tree
Showing 6 changed files with 127 additions and 101 deletions.
59 changes: 0 additions & 59 deletions pkg/perf/conv.go

This file was deleted.

5 changes: 3 additions & 2 deletions pkg/perf/dump.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,8 +68,9 @@ func ReadJITdump(logger log.Logger, fileName string) (Map, error) {
}

addrs := make([]MapAddr, 0, len(dump.CodeLoads))
st := NewStringTable(16*1024, 1024)
for _, cl := range dump.CodeLoads {
addrs = append(addrs, MapAddr{cl.CodeAddr, cl.CodeAddr + cl.CodeSize, cl.Name})
addrs = append(addrs, MapAddr{cl.CodeAddr, cl.CodeAddr + cl.CodeSize, st.GetOrAdd([]byte(cl.Name))})
}

// Sorted by end address to allow binary search during look-up. End to find
Expand All @@ -79,7 +80,7 @@ func ReadJITdump(logger log.Logger, fileName string) (Map, error) {
return addrs[i].End < addrs[j].End
})

return Map{Path: fileName, addrs: addrs}, nil
return Map{Path: fileName, addrs: addrs, stringTable: st}, nil
}

func NewJITDumpCache(logger log.Logger, reg prometheus.Registerer, profilingDuration time.Duration) *JITDumpCache {
Expand Down
49 changes: 44 additions & 5 deletions pkg/perf/map.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,32 +23,71 @@ var ErrNoSymbolFound = errors.New("no symbol found")
type MapAddr struct {
Start uint64
End uint64
Symbol string
Symbol int
}

type Map struct {
Path string

addrs []MapAddr
addrs []MapAddr
stringTable *StringTable
}

func (p *Map) Deduplicate() *Map {
newAddrs := make([]MapAddr, len(p.addrs))

// For deduplication to be most effective we need to also remove entries
// from the string table.
stringTableUsage := make([]int, p.stringTable.Len())

j := len(p.addrs) - 1
for i := len(p.addrs) - 1; i >= 0; i-- {
// The last symbol is the most up to date one, so if any earlier ones
// intersect with it we only keep the latest one.
if i > 0 && p.addrs[i-1].End > p.addrs[i].Start {
continue
}

stringTableUsage[p.addrs[i].Symbol]++

newAddrs[j] = p.addrs[i]
j--
}

newStringTableSize := uint32(0)
newStringTableEntries := 0
for i, usage := range stringTableUsage {
if usage > 0 {
newStringTableSize += p.stringTable.LengthOf(i)
newStringTableEntries++
}
}

newStringTable := NewStringTable(int(newStringTableSize), newStringTableEntries)
translation := make([]int, p.stringTable.Len())
for i, usage := range stringTableUsage {
if usage > 0 {
translation[i] = newStringTable.GetOrAdd(p.stringTable.GetBytes(i))
}
}

newAddrs = newAddrs[j+1:]

// We need to do this so we can free the memory used by the old slice that
// contains duplicates.
compacted := make([]MapAddr, len(newAddrs))
for i, addr := range newAddrs {
compacted[i] = MapAddr{
Start: addr.Start,
End: addr.End,
Symbol: translation[addr.Symbol],
}
}

return &Map{
Path: p.Path,
addrs: newAddrs[j+1:],
Path: p.Path,
addrs: compacted,
stringTable: newStringTable,
}
}

Expand All @@ -60,5 +99,5 @@ func (p *Map) Lookup(addr uint64) (string, error) {
return "", ErrNoSymbolFound
}

return p.addrs[idx].Symbol, nil
return p.stringTable.Get(p.addrs[idx].Symbol), nil
}
55 changes: 37 additions & 18 deletions pkg/perf/perf.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@ var (
func ReadPerfMap(
logger log.Logger,
fileName string,
prev *Map,
) (*Map, error) {
fd, err := os.Open(fileName)
if err != nil {
Expand All @@ -70,22 +71,35 @@ func ReadPerfMap(
return nil, err
}

// Estimate the number of lines in the map file
// and allocate a string converter when the file is sufficiently large.
const (
avgLineLen = 60
avgFuncLen = 42
var (
addrs []MapAddr
st *StringTable
)
fileSize := stat.Size()
linesCount := int(fileSize / avgLineLen)
convBufSize := 0
if linesCount > 400 {
convBufSize = linesCount * avgFuncLen

if prev != nil {
// If we have the previous map we use some stats to preallocate so we
// have a good starting point.
addrs = make([]MapAddr, 0, len(prev.addrs))
st = NewStringTable(prev.stringTable.DataLength(), prev.stringTable.Len())
} else {
// Estimate the number of lines in the map file
// and allocate a string converter when the file is sufficiently large.
const (
avgLineLen = 60
avgFuncLen = 42
)
fileSize := stat.Size()
linesCount := int(fileSize / avgLineLen)
convBufSize := 0
if linesCount > 400 {
convBufSize = linesCount * avgFuncLen
}

addrs = make([]MapAddr, 0, linesCount)
st = NewStringTable(convBufSize, linesCount)
}

r := bufio.NewReader(fd)
addrs := make([]MapAddr, 0, linesCount)
conv := newStringConverter(convBufSize)
i := 0
var multiError error
for {
Expand All @@ -97,7 +111,7 @@ func ReadPerfMap(
return nil, fmt.Errorf("read perf map line: %w", err)
}

line, err := parsePerfMapLine(b, conv)
line, err := parsePerfMapLine(b, st)
if err != nil {
multiError = errors.Join(multiError, fmt.Errorf("parse perf map line %d: %w", i, err))
}
Expand All @@ -121,10 +135,14 @@ func ReadPerfMap(
return addrs[i].End < addrs[j].End
})

return (&Map{Path: fileName, addrs: addrs}).Deduplicate(), nil
return (&Map{
Path: fileName,
addrs: addrs,
stringTable: st,
}).Deduplicate(), nil
}

func parsePerfMapLine(b []byte, conv *stringConverter) (MapAddr, error) {
func parsePerfMapLine(b []byte, st *StringTable) (MapAddr, error) {
firstSpace := bytes.Index(b, []byte(" "))
if firstSpace == -1 {
return MapAddr{}, errors.New("invalid line")
Expand Down Expand Up @@ -169,7 +187,7 @@ func parsePerfMapLine(b []byte, conv *stringConverter) (MapAddr, error) {
return MapAddr{
Start: start,
End: start + size,
Symbol: conv.String(symbolBytes),
Symbol: st.GetOrAdd(symbolBytes),
}, nil
}

Expand Down Expand Up @@ -209,14 +227,15 @@ func (p *PerfMapCache) PerfMapForPID(pid int) (*Map, error) {
return nil, err
}

if v, ok := p.cache.Get(pid); ok {
v, ok := p.cache.Get(pid)
if ok {
if v.fileModTime == info.ModTime() && v.fileSize == info.Size() {
return v.m, nil
}
level.Debug(p.logger).Log("msg", "cached value is outdated", "pid", pid)
}

m, err := ReadPerfMap(p.logger, perfFile)
m, err := ReadPerfMap(p.logger, perfFile, v.m)
if err != nil {
return nil, err
}
Expand Down
17 changes: 9 additions & 8 deletions pkg/perf/perf_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,12 @@ import (
)

func TestPerfMapParse(t *testing.T) {
res, err := ReadPerfMap(log.NewNopLogger(), "testdata/nodejs-perf-map")
res, err := ReadPerfMap(log.NewNopLogger(), "testdata/nodejs-perf-map", nil)
require.NoError(t, err)
require.Len(t, res.addrs, 28)
// Check for 4edd3cca B0 LazyCompile:~Timeout internal/timers.js:55
require.Equal(t, MapAddr{0x4edd4f12, 0x4edd4f47, "LazyCompile:~remove internal/linkedlist.js:15"}, res.addrs[12])

require.Equal(t, "LazyCompile:~remove internal/linkedlist.js:15", res.stringTable.Get(18))
require.Equal(t, MapAddr{0x4edd4f12, 0x4edd4f47, 18}, res.addrs[12])

// Look-up a symbol.
sym, err := res.Lookup(0x4edd4f12 + 4)
Expand All @@ -37,25 +38,25 @@ func TestPerfMapParse(t *testing.T) {
}

func TestPerfMapCorruptLine(t *testing.T) {
_, err := parsePerfMapLine([]byte(" Script:~ evalmachine.<anonymous>:1\r\n"), newStringConverter(5000))
_, err := parsePerfMapLine([]byte(" Script:~ evalmachine.<anonymous>:1\r\n"), NewStringTable(16*1024, 1024))
require.Error(t, err)
}

func TestPerfMapRegression(t *testing.T) {
_, err := ReadPerfMap(log.NewNopLogger(), "testdata/nodejs-perf-map-regression")
_, err := ReadPerfMap(log.NewNopLogger(), "testdata/nodejs-perf-map-regression", nil)
require.NoError(t, err)
}

func TestPerfMapParseErlangPerfMap(t *testing.T) {
_, err := ReadPerfMap(log.NewNopLogger(), "testdata/erlang-perf-map")
_, err := ReadPerfMap(log.NewNopLogger(), "testdata/erlang-perf-map", nil)
require.NoError(t, err)
}

func BenchmarkPerfMapParse(b *testing.B) {
logger := log.NewNopLogger()
b.ResetTimer()
for i := 0; i < b.N; i++ {
_, err := ReadPerfMap(logger, "testdata/nodejs-perf-map")
_, err := ReadPerfMap(logger, "testdata/nodejs-perf-map", nil)
require.NoError(b, err)
}
}
Expand All @@ -64,7 +65,7 @@ func BenchmarkPerfMapParseBig(b *testing.B) {
logger := log.NewNopLogger()
b.ResetTimer()
for i := 0; i < b.N; i++ {
_, err := ReadPerfMap(logger, "testdata/erlang-perf-map")
_, err := ReadPerfMap(logger, "testdata/erlang-perf-map", nil)
require.NoError(b, err)
}
}
43 changes: 34 additions & 9 deletions pkg/perf/string_table.go
Original file line number Diff line number Diff line change
@@ -1,5 +1,20 @@
// Copyright 2023 The Parca Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package perf

import "unsafe"

type entry struct {
offset uint32
length uint32
Expand All @@ -20,32 +35,42 @@ func NewStringTable(dataCapacity, entriesCapacity int) *StringTable {
}
}

func (t *StringTable) AddOrGet(s string) int {
if i, ok := t.dict[s]; ok {
func (t *StringTable) GetOrAdd(s []byte) int {
if i, ok := t.dict[unsafeString(s)]; ok {
return i
}

// double the capacity until we have enough
for len(t.data)+len(s) > cap(t.data) {
newData := make([]byte, cap(t.data)*2)
copy(newData, t.data)
t.data = newData[:len(t.data)]
}
offset := uint32(len(t.data))
t.data = append(t.data, s...)
t.entries = append(t.entries, entry{
offset: offset,
length: uint32(len(s)),
})
i := len(t.entries) - 1
t.dict[s] = i
t.dict[string(s)] = i
return i
}

func unsafeString(b []byte) string {
return *(*string)(unsafe.Pointer(&b))
}

func (t *StringTable) Get(i int) string {
return string(t.GetBytes(i))
}

func (t *StringTable) LengthOf(i int) uint32 {
return t.entries[i].length
}

func (t *StringTable) Len() int {
return len(t.entries)
}

func (t *StringTable) DataLength() int {
return len(t.data)
}

func (t *StringTable) GetBytes(i int) []byte {
e := t.entries[i]
return t.data[e.offset : e.offset+e.length]
Expand Down

0 comments on commit f375ec7

Please sign in to comment.