Skip to content

Commit

Permalink
.eh_frame: Initial DWARF expression support
Browse files Browse the repository at this point in the history
This commits adds partial support for DWARF expressions, in particular,
for unwind ranges that cover PLTs for the CFA calculation
(`DW_CFA_def_cfa_expression`).

Arbitrary expressions require a VM to evaluate them, which is not easy
to do in BPF, so after doing some quick data analysis (see below) it was
clear that by adding some hardcoded expressions to increase the success
ratio of the stack walker without bloating it too much. I don't think we
should add every "common" expression, but the PLT ones seemed like
good bang for the buck.

On my Fedora machine, these were the most common expressions for
binaries, and libraries, as well as for processes running:

```
[javierhonduco@fedora parca-agent]$ sudo readelf -wf /bin/* 2>/dev/null | grep DW_CFA_def_cfa_expression | sort | uniq -c | sort -k1h
      1   DW_CFA_def_cfa_expression (DW_OP_breg7 (rsp): 160; DW_OP_deref)
      5   DW_CFA_def_cfa_expression (DW_OP_breg4 (esp): 4; DW_OP_breg8 (eip): 0; DW_OP_lit15; DW_OP_and; DW_OP_lit9; DW_OP_ge; DW_OP_lit2; DW_OP_shl; DW_OP_plus)
      5   DW_CFA_def_cfa_expression (DW_OP_breg5 (ebp): -16; DW_OP_deref)
     12   DW_CFA_def_cfa_expression (DW_OP_breg6 (rbp): -40; DW_OP_deref)
     62   DW_CFA_def_cfa_expression (DW_OP_breg7 (rsp): 8; DW_OP_breg16 (rip): 0; DW_OP_lit15; DW_OP_and; DW_OP_lit11; DW_OP_ge; DW_OP_lit3; DW_OP_shl; DW_OP_plus)
   1673   DW_CFA_def_cfa_expression (DW_OP_breg7 (rsp): 8; DW_OP_breg16 (rip): 0; DW_OP_lit15; DW_OP_and; DW_OP_lit10; DW_OP_ge; DW_OP_lit3; DW_OP_shl; DW_OP_plus)
```

```
[javierhonduco@fedora parca-agent]$ sudo readelf -wf /lib64/* 2>/dev/null | grep DW_CFA_def_cfa_expression | sort | uniq -c | sort -k1h
[...]
     47   DW_CFA_def_cfa_expression (DW_OP_breg7 (rsp): 8; DW_OP_breg16 (rip): 0; DW_OP_lit15; DW_OP_and; DW_OP_lit11; DW_OP_ge; DW_OP_lit3; DW_OP_shl; DW_OP_plus)
    237   DW_CFA_def_cfa_expression (DW_OP_breg6 (rbp): -24; DW_OP_deref)
    910   DW_CFA_def_cfa_expression (DW_OP_breg6 (rbp): -8; DW_OP_deref)
   1006   DW_CFA_def_cfa_expression (DW_OP_breg6 (rbp): -40; DW_OP_deref)
   2563   DW_CFA_def_cfa_expression (DW_OP_breg7 (rsp): 8; DW_OP_breg16 (rip): 0; DW_OP_lit15; DW_OP_and; DW_OP_lit10; DW_OP_ge; DW_OP_lit3; DW_OP_shl; DW_OP_plus)
```

```
[javierhonduco@fedora parca-agent]$ sudo readelf -wf /proc/*/exe 2>/dev/null | grep DW_CFA_def_cfa_expression | sort | uniq -c | sort -k1h
[...]
     25   DW_CFA_def_cfa_expression (DW_OP_breg7 (rsp): 120; DW_OP_deref; DW_OP_plus_uconst: 8)
     25   DW_CFA_def_cfa_expression (DW_OP_breg7 (rsp): 152; DW_OP_deref; DW_OP_plus_uconst: 8)
     25   DW_CFA_def_cfa_expression (DW_OP_breg7 (rsp): 88; DW_OP_deref; DW_OP_plus_uconst: 8)
     30   DW_CFA_def_cfa_expression (DW_OP_breg7 (rsp): 0; DW_OP_deref; DW_OP_plus_uconst: 8)
     35   DW_CFA_def_cfa_expression (DW_OP_breg7 (rsp): 40; DW_OP_deref; DW_OP_plus_uconst: 8)
     45   DW_CFA_def_cfa_expression (DW_OP_breg7 (rsp): -8; DW_OP_deref; DW_OP_plus_uconst: 8)
    130   DW_CFA_def_cfa_expression (DW_OP_breg7 (rsp): 8; DW_OP_breg16 (rip): 0; DW_OP_lit15; DW_OP_and; DW_OP_lit10; DW_OP_ge; DW_OP_lit3; DW_OP_shl; DW_OP_plus)
```

As it can be seen above, by adding PLT expression support, we can get ~50% more unwind sections working.

PLT sections are particularly important as libc is typically dynamically
linked. In this case, most libc calls go through the Procedure Linkage
Table, and before this commit, we will stop walking the stack.

See test plan in the PR.

Signed-off-by: Francisco Javier Honduvilla Coto <[email protected]>
  • Loading branch information
javierhonduco committed Nov 22, 2022
1 parent 3f0d04f commit b7d873d
Show file tree
Hide file tree
Showing 8 changed files with 179 additions and 39 deletions.
39 changes: 30 additions & 9 deletions bpf/cpu/cpu.bpf.c
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,11 @@
// Size of the unwind table.
#define MAX_UNWIND_TABLE_SIZE 250 * 1000

// Values for dwarf expressions.
#define DWARF_EXPRESSION_UNKNOWN 0
#define DWARF_EXPRESSION_PLT1 1
#define DWARF_EXPRESSION_PLT2 2

// Values for the unwind table's CFA type.
#define CFA_TYPE_RBP 1
#define CFA_TYPE_RSP 2
Expand Down Expand Up @@ -519,15 +524,8 @@ int walk_user_stacktrace_impl(struct bpf_perf_event_data *ctx) {
s16 found_cfa_offset = unwind_table->rows[table_idx].cfa_offset;
s16 found_rbp_offset = unwind_table->rows[table_idx].rbp_offset;

bpf_printk("\tcfa reg: $%s, offset: %d (row pc: %llx)",
found_cfa_type == CFA_TYPE_RSP ? "rsp" : "rbp", found_cfa_offset,
found_pc);

if (found_cfa_type == CFA_TYPE_EXPRESSION) {
bpf_printk("\t!!!! CFA is an expression, bailing out");
BUMP_UNWIND_UNSUPPORTED_EXPRESSION();
return 1;
}
bpf_printk("\tcfa type: %d, offset: %d (row pc: %llx)", found_cfa_type,
found_cfa_offset, found_pc);

if (found_rbp_type == RBP_TYPE_REGISTER ||
found_rbp_type == RBP_TYPE_EXPRESSION) {
Expand All @@ -542,6 +540,29 @@ int walk_user_stacktrace_impl(struct bpf_perf_event_data *ctx) {
previous_rsp = unwind_state->bp + found_cfa_offset;
} else if (found_cfa_type == CFA_TYPE_RSP) {
previous_rsp = unwind_state->sp + found_cfa_offset;
} else if (found_cfa_type == CFA_TYPE_EXPRESSION) {
if (found_cfa_offset == DWARF_EXPRESSION_UNKNOWN) {
bpf_printk("[error] CFA is an unsupported expression, bailing out");
BUMP_UNWIND_UNSUPPORTED_EXPRESSION();
return 1;
}

bpf_printk("CFA expression found with id %d", found_cfa_offset);

u64 threshold = 0;
if (found_cfa_offset == DWARF_EXPRESSION_PLT1) {
threshold = 11;
} else if (found_cfa_offset == DWARF_EXPRESSION_PLT2) {
threshold = 10;
}

if (threshold == 0) {
BUMP_UNWIND_SHOULD_NEVER_HAPPEN_ERROR();
return 1;
}

previous_rsp = unwind_state->sp + 8 +
((((unwind_state->ip & 15) >= threshold)) << 3);
} else {
bpf_printk("\t[error] register %d not valid (expected $rbp or $rsp)",
found_cfa_type);
Expand Down
4 changes: 2 additions & 2 deletions docs/native-stack-walking/design.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,15 +35,15 @@ typedef struct {
- 2 reserved bytes, which are unused at the moment. They help explicitly align the structure and in the future they will most likely be used to add support for other architectures.
- 1 byte for the CFA "type", whether we should evaluate an expression, if it's stored in a register, or if it's an an offset from `$rsp` or `$rbp`.
- 1 byte for the frame pointer "type", which works as the CFA type field.
- 1 byte for the CFA offset, that stored the offset we should apply to either base register to compute the CFA. In the future it will encode other information depending on its type.
- 1 byte for the CFA offset, that stored the offset we should apply to either base register to compute the CFA. If this CFA's rule is an expression, it will contain the expression identifier (`DWARF_EXPRESSION_*`).
- 1 byte for the rbp offset, which can be zero, to indicate that it doesn't change. Otherwise it will be the offset at which the previous frame pointer was pushed in the stack at `$current_rbp + offset`.

### Features / limitations

- **Architecture**: only x86_64 is supported
- **DWARF**:
- Based on version 5 of the spec
- No dwarf expression support (such as `DW_CFA_def_cfa_expression`)
- DWARF expressions in Procedure Linkage Tables (PLTs) are supported for CFA's calculation (`DW_CFA_def_cfa_expression`)
- No dwarf register support (`DW_CFA_register` and others)
- Support for `.eh_frame` DWARF unwind information
- **Size limitations**: Due to the unwind table's design, there's some limits on the values we can accept:
Expand Down
2 changes: 1 addition & 1 deletion docs/native-stack-walking/hacking.md
Original file line number Diff line number Diff line change
Expand Up @@ -253,4 +253,4 @@ Another thing to bear in mind when setting breakpoints is that there could be mo
- [1]: This is of course not very efficient. Once the implementation is more mature, we will use the smallest data types we can, but we need to be careful and ensure that the C ABI is correct while loading data in the BPF maps.


(*): This is not always the case, such as in dwarf expressions, for example, but an overlwhelming majority of the times it is
(*): This is not always the case, such as in DWARF expressions, for example, but an overlwhelming majority of the times it is
21 changes: 11 additions & 10 deletions internal/dwarf/frame/expression_constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,16 @@ package frame

// Operation opcodes.
const (
DW_OP_addr = 0x03
DW_OP_const1s = 0x09
DW_OP_addr = 0x03
DW_OP_deref = 0x06
)

const (
DW_OP_const2u = 0x0a
DW_OP_const2s = 0x0b
DW_OP_const4u = iota
DW_OP_const1u = iota + 0x08
DW_OP_const1s
DW_OP_const2u
DW_OP_const2s
DW_OP_const4u
DW_OP_const4s
DW_OP_const8u
DW_OP_const8s
Expand Down Expand Up @@ -38,20 +40,20 @@ const (
DW_OP_shr
DW_OP_shra
DW_OP_xor
DW_OP_skip
DW_OP_bra
DW_OP_eq
DW_OP_ge
DW_OP_gt
DW_OP_le
DW_OP_lt
DW_OP_ne
DW_OP_skip
)

const (
DW_OP_lit0 = 0x30
DW_OP_lit1 = 0x31
DW_OP_lit2 = iota
DW_OP_lit0 = iota + 0x30
DW_OP_lit1
DW_OP_lit2
DW_OP_lit3
DW_OP_lit4
DW_OP_lit5
Expand Down Expand Up @@ -159,7 +161,6 @@ const (
DW_OP_form_tls_address
DW_OP_call_frame_cfa
DW_OP_bit_piece

DW_OP_lo_user = 0xe0
DW_OP_hi_user = 0xff
)
19 changes: 19 additions & 0 deletions internal/dwarf/frame/expression_constants_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
package frame

import (
"testing"

"github.com/stretchr/testify/require"
)

// Spot check some of the constants.
func TestDWARFExpressionConstants(t *testing.T) {
require.Equal(t, 0x0a, DW_OP_const2u)
require.Equal(t, 0x2f, DW_OP_skip)
require.Equal(t, 0x30, DW_OP_lit0)
require.Equal(t, 0x9d, DW_OP_bit_piece)
require.Equal(t, 0xe0, DW_OP_lo_user)
require.Equal(t, 0xff, DW_OP_hi_user)
require.Equal(t, 0x22, DW_OP_plus)
require.Equal(t, 0x08, DW_OP_const1u)
}
34 changes: 18 additions & 16 deletions pkg/profiler/cpu/maps.go
Original file line number Diff line number Diff line change
Expand Up @@ -323,22 +323,24 @@ func (m *bpfMaps) setUnwindTable(pid int, ut unwind.UnwindTable) error {
return fmt.Errorf("write CFA register bytes: %w", err)
}

var CFARegister uint8
var RBPRegister uint8
var CFAOffset int16
var RBPOffset int16
var CfaRegister uint8
var RbpRegister uint8
var CfaOffset int16
var RbpOffset int16

// CFA.
switch row.CFA.Rule {
case frame.RuleCFA:
if row.CFA.Reg == frame.X86_64FramePointer {
CFARegister = uint8(CfaRegisterRbp)
CfaRegister = uint8(CfaRegisterRbp)
} else if row.CFA.Reg == frame.X86_64StackPointer {
CFARegister = uint8(CfaRegisterRsp)
CfaRegister = uint8(CfaRegisterRsp)
}
CFAOffset = int16(row.CFA.Offset)
CfaOffset = int16(row.CFA.Offset)
case frame.RuleExpression:
CFARegister = uint8(CfaRegisterExpression)
CfaRegister = uint8(CfaRegisterExpression)
CfaOffset = int16(unwind.ExpressionIdentifier(row.CFA.Expression))

default:
return fmt.Errorf("CFA rule is not valid. This should never happen")
}
Expand All @@ -347,31 +349,31 @@ func (m *bpfMaps) setUnwindTable(pid int, ut unwind.UnwindTable) error {
switch row.RBP.Rule {
case frame.RuleUndefined:
case frame.RuleOffset:
RBPRegister = uint8(RbpRuleOffset)
RBPOffset = int16(row.RBP.Offset)
RbpRegister = uint8(RbpRuleOffset)
RbpOffset = int16(row.RBP.Offset)
case frame.RuleRegister:
RBPRegister = uint8(RbpRuleRegister)
RbpRegister = uint8(RbpRuleRegister)
case frame.RuleExpression:
RBPRegister = uint8(RbpRegisterExpression)
RbpRegister = uint8(RbpRegisterExpression)
}

// Write CFA type (.cfa_type).
if err := binary.Write(buf, m.byteOrder, CFARegister); err != nil {
if err := binary.Write(buf, m.byteOrder, CfaRegister); err != nil {
return fmt.Errorf("write CFA register bytes: %w", err)
}

// Write frame pointer type (.rbp_type).
if err := binary.Write(buf, m.byteOrder, RBPRegister); err != nil {
if err := binary.Write(buf, m.byteOrder, RbpRegister); err != nil {
return fmt.Errorf("write CFA register bytes: %w", err)
}

// Write CFA offset (.cfa_offset).
if err := binary.Write(buf, m.byteOrder, CFAOffset); err != nil {
if err := binary.Write(buf, m.byteOrder, CfaOffset); err != nil {
return fmt.Errorf("write CFA offset bytes: %w", err)
}

// Write frame pointer offset (.rbp_offset).
if err := binary.Write(buf, m.byteOrder, RBPOffset); err != nil {
if err := binary.Write(buf, m.byteOrder, RbpOffset); err != nil {
return fmt.Errorf("write RBP offset bytes: %w", err)
}
}
Expand Down
92 changes: 92 additions & 0 deletions pkg/stack/unwind/dwarf_expression.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
// Copyright 2022 The Parca Authors
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//

package unwind

import (
"github.com/parca-dev/parca-agent/internal/dwarf/frame"
)

type DwarfExpressionID int16

const (
ExpressionUnknown DwarfExpressionID = iota
ExpressionPlt1
ExpressionPlt2
)

// DWARF expressions that we recognize.

// plt1 is equivalent to: sp + 8 + ((((ip & 15) >= 11)) << 3.
var plt1 = [...]byte{
frame.DW_OP_breg7,
frame.DW_OP_const1u,
frame.DW_OP_breg16,
frame.DW_OP_lit15,
frame.DW_OP_and,
frame.DW_OP_lit11,
frame.DW_OP_ge,
frame.DW_OP_lit3,
frame.DW_OP_shl,
frame.DW_OP_plus,
}

// plt2 is quivalent to: sp + 8 + ((((ip & 15) >= 10)) << 3.
var plt2 = [...]byte{
frame.DW_OP_breg7,
frame.DW_OP_const1u,
frame.DW_OP_breg16,
frame.DW_OP_lit15,
frame.DW_OP_and,
frame.DW_OP_lit10,
frame.DW_OP_ge,
frame.DW_OP_lit3,
frame.DW_OP_shl,
frame.DW_OP_plus,
}

// equalBytes checks whether two byte slices are equal.
func equalBytes(a, b []byte) bool {
if len(a) != len(b) {
return false
}
for i := range a {
if a[i] != b[i] {
return false
}
}
return true
}

// ExpressionIdentifier returns the identifier for recognized
// DWARF expressions.
func ExpressionIdentifier(expression []byte) DwarfExpressionID {
cleanedExpression := make([]byte, 0, len(expression))
for _, opcode := range expression {
if opcode == 0x0 {
continue
}
cleanedExpression = append(cleanedExpression, opcode)
}

if equalBytes(plt1[:], cleanedExpression) {
return ExpressionPlt1
}

if equalBytes(plt2[:], cleanedExpression) {
return ExpressionPlt2
}

return ExpressionUnknown
}
7 changes: 6 additions & 1 deletion pkg/stack/unwind/unwind_table.go
Original file line number Diff line number Diff line change
Expand Up @@ -199,7 +199,12 @@ func (ptb *UnwindTableBuilder) PrintTable(writer io.Writer, path string) error {
CFAReg := x64RegisterToString(tableRow.CFA.Reg)
fmt.Fprintf(writer, "\tLoc: %x CFA: $%s=%-4d", tableRow.Loc, CFAReg, tableRow.CFA.Offset)
case frame.RuleExpression:
fmt.Fprintf(writer, "\tLoc: %x CFA: exp ", tableRow.Loc)
expressionID := ExpressionIdentifier(tableRow.CFA.Expression)
if expressionID == ExpressionUnknown {
fmt.Fprintf(writer, "\tLoc: %x CFA: exp ", tableRow.Loc)
} else {
fmt.Fprintf(writer, "\tLoc: %x CFA: exp (plt %d)", tableRow.Loc, expressionID)
}
default:
return fmt.Errorf("CFA rule is not valid. This should never happen")
}
Expand Down

0 comments on commit b7d873d

Please sign in to comment.