From f80b0eb93a3a3d202547d2884000d4891a5ecb4b Mon Sep 17 00:00:00 2001 From: Frederic Branczyk Date: Mon, 20 Nov 2023 09:30:01 +0100 Subject: [PATCH 1/2] pkg/perf: Add string table --- pkg/perf/string_table.go | 52 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) create mode 100644 pkg/perf/string_table.go diff --git a/pkg/perf/string_table.go b/pkg/perf/string_table.go new file mode 100644 index 0000000000..7b5576a73d --- /dev/null +++ b/pkg/perf/string_table.go @@ -0,0 +1,52 @@ +package perf + +type entry struct { + offset uint32 + length uint32 +} + +type StringTable struct { + data []byte + entries []entry + + dict map[string]int +} + +func NewStringTable(dataCapacity, entriesCapacity int) *StringTable { + return &StringTable{ + data: make([]byte, 0, dataCapacity), + entries: make([]entry, 0, entriesCapacity), + dict: make(map[string]int), + } +} + +func (t *StringTable) AddOrGet(s string) int { + if i, ok := t.dict[s]; ok { + return i + } + + // double the capacity until we have enough + for len(t.data)+len(s) > cap(t.data) { + newData := make([]byte, cap(t.data)*2) + copy(newData, t.data) + t.data = newData[:len(t.data)] + } + offset := uint32(len(t.data)) + t.data = append(t.data, s...) + t.entries = append(t.entries, entry{ + offset: offset, + length: uint32(len(s)), + }) + i := len(t.entries) - 1 + t.dict[s] = i + return i +} + +func (t *StringTable) Get(i int) string { + return string(t.GetBytes(i)) +} + +func (t *StringTable) GetBytes(i int) []byte { + e := t.entries[i] + return t.data[e.offset : e.offset+e.length] +} From f375ec7c9b91ce58701f63de0b46e4066afd5b74 Mon Sep 17 00:00:00 2001 From: Frederic Branczyk Date: Mon, 20 Nov 2023 13:34:47 +0100 Subject: [PATCH 2/2] pkg/perf: Compact perf map entries after deduplication --- pkg/perf/conv.go | 59 ---------------------------------------- pkg/perf/dump.go | 5 ++-- pkg/perf/map.go | 49 +++++++++++++++++++++++++++++---- pkg/perf/perf.go | 55 +++++++++++++++++++++++++------------ pkg/perf/perf_test.go | 17 ++++++------ pkg/perf/string_table.go | 43 +++++++++++++++++++++++------ 6 files changed, 127 insertions(+), 101 deletions(-) delete mode 100644 pkg/perf/conv.go diff --git a/pkg/perf/conv.go b/pkg/perf/conv.go deleted file mode 100644 index 52a05c603d..0000000000 --- a/pkg/perf/conv.go +++ /dev/null @@ -1,59 +0,0 @@ -// Copyright 2023 The Parca Authors -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package perf - -import "unsafe" - -func newStringConverter(capacity int) *stringConverter { - conv := stringConverter{} - if capacity <= 0 { - return &conv - } - - conv.buf = make([]byte, 0, capacity) - return &conv -} - -// stringConverter converts bytes to strings with less allocs. -// The idea is to accumulate bytes in a buffer with specified capacity -// and create strings with unsafe.String using bytes from a buffer. -// For example, 10 "fizz" strings written to a 40-byte buffer -// will result in 1 alloc instead of 10. -// -// Once a buffer is filled, strings will be allocated as usually. -type stringConverter struct { - // buf is a temporary buffer where decoded strings are batched. - buf []byte - // offset is a buffer position where the last string was written. - offset int -} - -// String converts bytes to a string. -func (c *stringConverter) String(b []byte) string { - n := len(b) - if n == 0 { - return "" - } - // Must allocate because a string doesn't fit into the buffer. - if len(c.buf)+n > cap(c.buf) { - return string(b) - } - - c.buf = append(c.buf, b...) - b = c.buf[c.offset:] - s := unsafe.String(&b[0], n) - c.offset += n - - return s -} diff --git a/pkg/perf/dump.go b/pkg/perf/dump.go index 3e2d059dee..29953150c4 100644 --- a/pkg/perf/dump.go +++ b/pkg/perf/dump.go @@ -68,8 +68,9 @@ func ReadJITdump(logger log.Logger, fileName string) (Map, error) { } addrs := make([]MapAddr, 0, len(dump.CodeLoads)) + st := NewStringTable(16*1024, 1024) for _, cl := range dump.CodeLoads { - addrs = append(addrs, MapAddr{cl.CodeAddr, cl.CodeAddr + cl.CodeSize, cl.Name}) + addrs = append(addrs, MapAddr{cl.CodeAddr, cl.CodeAddr + cl.CodeSize, st.GetOrAdd([]byte(cl.Name))}) } // Sorted by end address to allow binary search during look-up. End to find @@ -79,7 +80,7 @@ func ReadJITdump(logger log.Logger, fileName string) (Map, error) { return addrs[i].End < addrs[j].End }) - return Map{Path: fileName, addrs: addrs}, nil + return Map{Path: fileName, addrs: addrs, stringTable: st}, nil } func NewJITDumpCache(logger log.Logger, reg prometheus.Registerer, profilingDuration time.Duration) *JITDumpCache { diff --git a/pkg/perf/map.go b/pkg/perf/map.go index 0668a0358b..c48bda22d8 100644 --- a/pkg/perf/map.go +++ b/pkg/perf/map.go @@ -23,18 +23,23 @@ var ErrNoSymbolFound = errors.New("no symbol found") type MapAddr struct { Start uint64 End uint64 - Symbol string + Symbol int } type Map struct { Path string - addrs []MapAddr + addrs []MapAddr + stringTable *StringTable } func (p *Map) Deduplicate() *Map { newAddrs := make([]MapAddr, len(p.addrs)) + // For deduplication to be most effective we need to also remove entries + // from the string table. + stringTableUsage := make([]int, p.stringTable.Len()) + j := len(p.addrs) - 1 for i := len(p.addrs) - 1; i >= 0; i-- { // The last symbol is the most up to date one, so if any earlier ones @@ -42,13 +47,47 @@ func (p *Map) Deduplicate() *Map { if i > 0 && p.addrs[i-1].End > p.addrs[i].Start { continue } + + stringTableUsage[p.addrs[i].Symbol]++ + newAddrs[j] = p.addrs[i] j-- } + newStringTableSize := uint32(0) + newStringTableEntries := 0 + for i, usage := range stringTableUsage { + if usage > 0 { + newStringTableSize += p.stringTable.LengthOf(i) + newStringTableEntries++ + } + } + + newStringTable := NewStringTable(int(newStringTableSize), newStringTableEntries) + translation := make([]int, p.stringTable.Len()) + for i, usage := range stringTableUsage { + if usage > 0 { + translation[i] = newStringTable.GetOrAdd(p.stringTable.GetBytes(i)) + } + } + + newAddrs = newAddrs[j+1:] + + // We need to do this so we can free the memory used by the old slice that + // contains duplicates. + compacted := make([]MapAddr, len(newAddrs)) + for i, addr := range newAddrs { + compacted[i] = MapAddr{ + Start: addr.Start, + End: addr.End, + Symbol: translation[addr.Symbol], + } + } + return &Map{ - Path: p.Path, - addrs: newAddrs[j+1:], + Path: p.Path, + addrs: compacted, + stringTable: newStringTable, } } @@ -60,5 +99,5 @@ func (p *Map) Lookup(addr uint64) (string, error) { return "", ErrNoSymbolFound } - return p.addrs[idx].Symbol, nil + return p.stringTable.Get(p.addrs[idx].Symbol), nil } diff --git a/pkg/perf/perf.go b/pkg/perf/perf.go index 45d4d776fb..1a0bd6946c 100644 --- a/pkg/perf/perf.go +++ b/pkg/perf/perf.go @@ -58,6 +58,7 @@ var ( func ReadPerfMap( logger log.Logger, fileName string, + prev *Map, ) (*Map, error) { fd, err := os.Open(fileName) if err != nil { @@ -70,22 +71,35 @@ func ReadPerfMap( return nil, err } - // Estimate the number of lines in the map file - // and allocate a string converter when the file is sufficiently large. - const ( - avgLineLen = 60 - avgFuncLen = 42 + var ( + addrs []MapAddr + st *StringTable ) - fileSize := stat.Size() - linesCount := int(fileSize / avgLineLen) - convBufSize := 0 - if linesCount > 400 { - convBufSize = linesCount * avgFuncLen + + if prev != nil { + // If we have the previous map we use some stats to preallocate so we + // have a good starting point. + addrs = make([]MapAddr, 0, len(prev.addrs)) + st = NewStringTable(prev.stringTable.DataLength(), prev.stringTable.Len()) + } else { + // Estimate the number of lines in the map file + // and allocate a string converter when the file is sufficiently large. + const ( + avgLineLen = 60 + avgFuncLen = 42 + ) + fileSize := stat.Size() + linesCount := int(fileSize / avgLineLen) + convBufSize := 0 + if linesCount > 400 { + convBufSize = linesCount * avgFuncLen + } + + addrs = make([]MapAddr, 0, linesCount) + st = NewStringTable(convBufSize, linesCount) } r := bufio.NewReader(fd) - addrs := make([]MapAddr, 0, linesCount) - conv := newStringConverter(convBufSize) i := 0 var multiError error for { @@ -97,7 +111,7 @@ func ReadPerfMap( return nil, fmt.Errorf("read perf map line: %w", err) } - line, err := parsePerfMapLine(b, conv) + line, err := parsePerfMapLine(b, st) if err != nil { multiError = errors.Join(multiError, fmt.Errorf("parse perf map line %d: %w", i, err)) } @@ -121,10 +135,14 @@ func ReadPerfMap( return addrs[i].End < addrs[j].End }) - return (&Map{Path: fileName, addrs: addrs}).Deduplicate(), nil + return (&Map{ + Path: fileName, + addrs: addrs, + stringTable: st, + }).Deduplicate(), nil } -func parsePerfMapLine(b []byte, conv *stringConverter) (MapAddr, error) { +func parsePerfMapLine(b []byte, st *StringTable) (MapAddr, error) { firstSpace := bytes.Index(b, []byte(" ")) if firstSpace == -1 { return MapAddr{}, errors.New("invalid line") @@ -169,7 +187,7 @@ func parsePerfMapLine(b []byte, conv *stringConverter) (MapAddr, error) { return MapAddr{ Start: start, End: start + size, - Symbol: conv.String(symbolBytes), + Symbol: st.GetOrAdd(symbolBytes), }, nil } @@ -209,14 +227,15 @@ func (p *PerfMapCache) PerfMapForPID(pid int) (*Map, error) { return nil, err } - if v, ok := p.cache.Get(pid); ok { + v, ok := p.cache.Get(pid) + if ok { if v.fileModTime == info.ModTime() && v.fileSize == info.Size() { return v.m, nil } level.Debug(p.logger).Log("msg", "cached value is outdated", "pid", pid) } - m, err := ReadPerfMap(p.logger, perfFile) + m, err := ReadPerfMap(p.logger, perfFile, v.m) if err != nil { return nil, err } diff --git a/pkg/perf/perf_test.go b/pkg/perf/perf_test.go index 6e3631411a..a4b2fa8a73 100644 --- a/pkg/perf/perf_test.go +++ b/pkg/perf/perf_test.go @@ -21,11 +21,12 @@ import ( ) func TestPerfMapParse(t *testing.T) { - res, err := ReadPerfMap(log.NewNopLogger(), "testdata/nodejs-perf-map") + res, err := ReadPerfMap(log.NewNopLogger(), "testdata/nodejs-perf-map", nil) require.NoError(t, err) require.Len(t, res.addrs, 28) - // Check for 4edd3cca B0 LazyCompile:~Timeout internal/timers.js:55 - require.Equal(t, MapAddr{0x4edd4f12, 0x4edd4f47, "LazyCompile:~remove internal/linkedlist.js:15"}, res.addrs[12]) + + require.Equal(t, "LazyCompile:~remove internal/linkedlist.js:15", res.stringTable.Get(18)) + require.Equal(t, MapAddr{0x4edd4f12, 0x4edd4f47, 18}, res.addrs[12]) // Look-up a symbol. sym, err := res.Lookup(0x4edd4f12 + 4) @@ -37,17 +38,17 @@ func TestPerfMapParse(t *testing.T) { } func TestPerfMapCorruptLine(t *testing.T) { - _, err := parsePerfMapLine([]byte(" Script:~ evalmachine.:1\r\n"), newStringConverter(5000)) + _, err := parsePerfMapLine([]byte(" Script:~ evalmachine.:1\r\n"), NewStringTable(16*1024, 1024)) require.Error(t, err) } func TestPerfMapRegression(t *testing.T) { - _, err := ReadPerfMap(log.NewNopLogger(), "testdata/nodejs-perf-map-regression") + _, err := ReadPerfMap(log.NewNopLogger(), "testdata/nodejs-perf-map-regression", nil) require.NoError(t, err) } func TestPerfMapParseErlangPerfMap(t *testing.T) { - _, err := ReadPerfMap(log.NewNopLogger(), "testdata/erlang-perf-map") + _, err := ReadPerfMap(log.NewNopLogger(), "testdata/erlang-perf-map", nil) require.NoError(t, err) } @@ -55,7 +56,7 @@ func BenchmarkPerfMapParse(b *testing.B) { logger := log.NewNopLogger() b.ResetTimer() for i := 0; i < b.N; i++ { - _, err := ReadPerfMap(logger, "testdata/nodejs-perf-map") + _, err := ReadPerfMap(logger, "testdata/nodejs-perf-map", nil) require.NoError(b, err) } } @@ -64,7 +65,7 @@ func BenchmarkPerfMapParseBig(b *testing.B) { logger := log.NewNopLogger() b.ResetTimer() for i := 0; i < b.N; i++ { - _, err := ReadPerfMap(logger, "testdata/erlang-perf-map") + _, err := ReadPerfMap(logger, "testdata/erlang-perf-map", nil) require.NoError(b, err) } } diff --git a/pkg/perf/string_table.go b/pkg/perf/string_table.go index 7b5576a73d..0f0b1545de 100644 --- a/pkg/perf/string_table.go +++ b/pkg/perf/string_table.go @@ -1,5 +1,20 @@ +// Copyright 2023 The Parca Authors +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + package perf +import "unsafe" + type entry struct { offset uint32 length uint32 @@ -20,17 +35,11 @@ func NewStringTable(dataCapacity, entriesCapacity int) *StringTable { } } -func (t *StringTable) AddOrGet(s string) int { - if i, ok := t.dict[s]; ok { +func (t *StringTable) GetOrAdd(s []byte) int { + if i, ok := t.dict[unsafeString(s)]; ok { return i } - // double the capacity until we have enough - for len(t.data)+len(s) > cap(t.data) { - newData := make([]byte, cap(t.data)*2) - copy(newData, t.data) - t.data = newData[:len(t.data)] - } offset := uint32(len(t.data)) t.data = append(t.data, s...) t.entries = append(t.entries, entry{ @@ -38,14 +47,30 @@ func (t *StringTable) AddOrGet(s string) int { length: uint32(len(s)), }) i := len(t.entries) - 1 - t.dict[s] = i + t.dict[string(s)] = i return i } +func unsafeString(b []byte) string { + return *(*string)(unsafe.Pointer(&b)) +} + func (t *StringTable) Get(i int) string { return string(t.GetBytes(i)) } +func (t *StringTable) LengthOf(i int) uint32 { + return t.entries[i].length +} + +func (t *StringTable) Len() int { + return len(t.entries) +} + +func (t *StringTable) DataLength() int { + return len(t.data) +} + func (t *StringTable) GetBytes(i int) []byte { e := t.entries[i] return t.data[e.offset : e.offset+e.length]