From 72d4374c4e3c926e372725c368be757e32aa0d61 Mon Sep 17 00:00:00 2001 From: "W. J. van der Laan" Date: Tue, 27 Jul 2021 19:45:40 +0200 Subject: [PATCH] Merge bitcoin/bitcoin#22006: tracing: first tracepoints and documentation on User-Space, Statically Defined Tracing (USDT) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 8f37f5c2a562c38c83fc40234ade9c301fc4e685 tracing: Tracepoint for connected blocks (0xb10c) 4224dec22baa66547303840707cf1d4f15a49b20 tracing: Tracepoints for in- and outbound P2P msgs (0xb10c) 469b71ae629228b2591a55831817a0e5fad89360 doc: document systemtap dependency (0xb10c) 84ace9aef116a05e034730f2bb2f109d1d77aac7 doc: Add initial USDT documentation (0xb10c) Pull request description: This PR adds documentation for User-Space, Statically Defined Tracing (USDT) as well as three tracepoints (including documentation and usage examples). ## Context The `TRACEx` macros for tracepoints and build system changes for USDT were merged in https://github.com/bitcoin/bitcoin/pull/19866 earlier this year. Issue https://github.com/bitcoin/bitcoin/issues/20981 contains discussion about potential tracepoints and guidelines for adding them (also documented with this PR). USDT was a topic in a [core-dev-meeting discussion](https://bitcoin.jonasschnelli.ch/ircmeetings/logs/bitcoin-core-dev/2021/bitcoin-core-dev.2021-01-21-19.00.moin.txt) on 21st Jan, 2021. - [collabora.com: An eBPF overview, part 1: Introduction](https://www.collabora.com/news-and-blog/blog/2019/04/05/an-ebpf-overview-part-1-introduction/) - [collabora.com: An eBPF overview, part 2: Machine & bytecode](https://www.collabora.com/news-and-blog/blog/2019/04/15/an-ebpf-overview-part-2-machine-and-bytecode/) - [Brendan D. Gregg's blog posts, and book on on eBPF](http://www.brendangregg.com/) - [Brendan D. Gregg: Linux bcc/BPF Node.js USDT Tracing](http://www.brendangregg.com/blog/2016-10-12/linux-bcc-nodejs-usdt.html) ## USDT? Stablecoin? User-space, Statically Defined Tracing (USDT) allows for more observability during development, debugging, code review, and production usage. The tracepoints make it possible to keep track of custom statistics and enable detailed monitoring of otherwise hidden internals and have little to no performance impact when unused. Linux kernels (4.x or newer) can hook into the tracepoints and execute [eBPF] programs in a kernel VM once the tracepoint is called. This PR includes, for example, tracepoints for in- and outbound P2P messages. ``` USDT and eBPF Overview ====================== ┌──────────────────┐ ┌──────────────┐ │ tracing script │ │ bitcoind │ │==================│ 2. │==============│ │ eBPF │ tracing │ hooks │ │ │ code │ logic │ into┌─┤►tracepoint 1─┼───┐ 3. └────┬───┴──▲──────┘ ├─┤►tracepoint 2 │ │ pass args 1. │ │ 4. │ │ ... │ │ to eBPF User compiles │ │ pass data to │ └──────────────┘ │ program Space & loads │ │ tracing script │ │ ─────────────────┼──────┼─────────────────┼────────────────────┼─── Kernel │ │ │ │ Space ┌──┬─▼──────┴─────────────────┴────────────┐ │ │ │ eBPF program │◄──────┘ │ └───────────────────────────────────────┤ │ eBPF kernel Virtual Machine (sandboxed) │ └──────────────────────────────────────────┘ 1. The tracing script compiles the eBPF code and loads the eBFP program into a kernel VM 2. The eBPF program hooks into one or more tracepoints 3. When the tracepoint is called, the arguments are passed to the eBPF program 4. The eBPF program processes the arguments and returns data to the tracing script ``` The two main [eBPF] front-ends with support for USDT are [bpftrace] an [BPF Compiler Collection (BCC)]. BCC is used for complex tools and daemons and `bpftrace` is preferred for one-liners and shorter scripts. Example tracing scripts for both are provided with this PR. [eBPF]: https://ebpf.io/ [bpftrace]: https://github.com/iovisor/bpftrace [BPF Compiler Collection (BCC)]: https://github.com/iovisor/bcc This PR adds three tracepoints: - `net:inbound_message` - `net:outbound_message` - `valildation:block_connected` See `doc/tracing.md` and `contrib/tracing/` for documentation and example tracing scripts. ## Open Questions (Not in scope for this PR) - How to use these tracepoints under macOS? - Release builds with USDT support? - Should and can the tracepoints be automatically tested? ## Todo (before undraft) - [x] bcc example showing how to read raw P2P messages up to 32kb - [x] document that you need `sys/sdt.h` from `systemtap` for USDT support in Bitcoin Core (`apt install systemtap-sdt-dev` on debian-like). See https://github.com/bitcoin/bitcoin/pull/19866/commits/933ab8a720cb9b3341adec4109cffb6dc5b322a5 - [ ] release notes? ACKs for top commit: laanwj: re-ACK 8f37f5c2a562c38c83fc40234ade9c301fc4e685 jb55: ACK 8f37f5c2a562c38c83fc40234ade9c301fc4e685 Tree-SHA512: a92a8a2dfcd28465f58a6e5f50d39486817ef5f51214ec40bdb02a6843b9c08ea154fadb31558825ff3a4687477b90f2a5da5d6451989eef978e128a264c289d --- contrib/tracing/README.md | 241 ++++++++++++++++++++ contrib/tracing/connectblock_benchmark.bt | 150 ++++++++++++ contrib/tracing/log_p2p_traffic.bt | 28 +++ contrib/tracing/log_raw_p2p_msgs.py | 180 +++++++++++++++ contrib/tracing/p2p_monitor.py | 250 ++++++++++++++++++++ doc/build-unix.md | 9 + doc/dependencies.md | 2 + doc/tracing.md | 266 ++++++++++++++++++++++ src/net.cpp | 15 +- src/net_processing.cpp | 11 +- src/validation.cpp | 11 + 11 files changed, 1158 insertions(+), 5 deletions(-) create mode 100644 contrib/tracing/README.md create mode 100755 contrib/tracing/connectblock_benchmark.bt create mode 100755 contrib/tracing/log_p2p_traffic.bt create mode 100755 contrib/tracing/log_raw_p2p_msgs.py create mode 100755 contrib/tracing/p2p_monitor.py create mode 100644 doc/tracing.md diff --git a/contrib/tracing/README.md b/contrib/tracing/README.md new file mode 100644 index 0000000000000..047354cda17b1 --- /dev/null +++ b/contrib/tracing/README.md @@ -0,0 +1,241 @@ +Example scripts for User-space, Statically Defined Tracing (USDT) +================================================================= + +This directory contains scripts showcasing User-space, Statically Defined +Tracing (USDT) support for Bitcoin Core on Linux using. For more information on +USDT support in Bitcoin Core see the [USDT documentation]. + +[USDT documentation]: ../../doc/tracing.md + + +Examples for the two main eBPF front-ends, [bpftrace] and +[BPF Compiler Collection (BCC)], with support for USDT, are listed. BCC is used +for complex tools and daemons and `bpftrace` is preferred for one-liners and +shorter scripts. + +[bpftrace]: https://github.com/iovisor/bpftrace +[BPF Compiler Collection (BCC)]: https://github.com/iovisor/bcc + + +To develop and run bpftrace and BCC scripts you need to install the +corresponding packages. See [installing bpftrace] and [installing BCC] for more +information. For development there exist a [bpftrace Reference Guide], a +[BCC Reference Guide], and a [bcc Python Developer Tutorial]. + +[installing bpftrace]: https://github.com/iovisor/bpftrace/blob/master/INSTALL.md +[installing BCC]: https://github.com/iovisor/bcc/blob/master/INSTALL.md +[bpftrace Reference Guide]: https://github.com/iovisor/bpftrace/blob/master/docs/reference_guide.md +[BCC Reference Guide]: https://github.com/iovisor/bcc/blob/master/docs/reference_guide.md +[bcc Python Developer Tutorial]: https://github.com/iovisor/bcc/blob/master/docs/tutorial_bcc_python_developer.md + +## Examples + +The bpftrace examples contain a relative path to the `bitcoind` binary. By +default, the scripts should be run from the repository-root and assume a +self-compiled `bitcoind` binary. The paths in the examples can be changed, for +example, to point to release builds if needed. See the +[Bitcoin Core USDT documentation] on how to list available tracepoints in your +`bitcoind` binary. + +[Bitcoin Core USDT documentation]: ../../doc/tracing.md#listing-available-tracepoints + +**WARNING: eBPF programs require root privileges to be loaded into a Linux +kernel VM. This means the bpftrace and BCC examples must be executed with root +privileges. Make sure to carefully review any scripts that you run with root +privileges first!** + +### log_p2p_traffic.bt + +A bpftrace script logging information about inbound and outbound P2P network +messages. Based on the `net:inbound_message` and `net:outbound_message` +tracepoints. + +By default, `bpftrace` limits strings to 64 bytes due to the limited stack size +in the eBPF VM. For example, Tor v3 addresses exceed the string size limit which +results in the port being cut off during logging. The string size limit can be +increased with the `BPFTRACE_STRLEN` environment variable (`BPFTRACE_STRLEN=70` +works fine). + +``` +$ bpftrace contrib/tracing/log_p2p_traffic.bt +``` + +Output +``` +outbound 'ping' msg to peer 11 (outbound-full-relay, [2a02:b10c:f747:1:ef:fake:ipv6:addr]:8333) with 8 bytes +inbound 'pong' msg from peer 11 (outbound-full-relay, [2a02:b10c:f747:1:ef:fake:ipv6:addr]:8333) with 8 bytes +inbound 'inv' msg from peer 16 (outbound-full-relay, XX.XX.XXX.121:8333) with 37 bytes +outbound 'getdata' msg to peer 16 (outbound-full-relay, XX.XX.XXX.121:8333) with 37 bytes +inbound 'tx' msg from peer 16 (outbound-full-relay, XX.XX.XXX.121:8333) with 222 bytes +outbound 'inv' msg to peer 9 (outbound-full-relay, faketorv3addressa2ufa6odvoi3s77j4uegey0xb10csyfyve2t33curbyd.onion:8333) with 37 bytes +outbound 'inv' msg to peer 7 (outbound-full-relay, XX.XX.XXX.242:8333) with 37 bytes +… +``` + +### p2p_monitor.py + +A BCC Python script using curses for an interactive P2P message monitor. Based +on the `net:inbound_message` and `net:outbound_message` tracepoints. + +Inbound and outbound traffic is listed for each peer together with information +about the connection. Peers can be selected individually to view recent P2P +messages. + +``` +$ python3 contrib/tracing/p2p_monitor.py ./src/bitcoind +``` + +Lists selectable peers and traffic and connection information. +``` + P2P Message Monitor + Navigate with UP/DOWN or J/K and select a peer with ENTER or SPACE to see individual P2P messages + + PEER OUTBOUND INBOUND TYPE ADDR + 0 46 398 byte 61 1407590 byte block-relay-only XX.XX.XXX.196:8333 + 11 1156 253570 byte 3431 2394924 byte outbound-full-relay XXX.X.XX.179:8333 + 13 3425 1809620 byte 1236 305458 byte inbound XXX.X.X.X:60380 + 16 1046 241633 byte 1589 1199220 byte outbound-full-relay 4faketorv2pbfu7x.onion:8333 + 19 577 181679 byte 390 148951 byte outbound-full-relay kfake4vctorjv2o2.onion:8333 + 20 11 1248 byte 13 1283 byte block-relay-only [2600:fake:64d9:b10c:4436:aaaa:fe:bb]:8333 + 21 11 1248 byte 13 1299 byte block-relay-only XX.XXX.X.155:8333 + 22 5 103 byte 1 102 byte feeler XX.XX.XXX.173:8333 + 23 11 1248 byte 12 1255 byte block-relay-only XX.XXX.XXX.220:8333 + 24 3 103 byte 1 102 byte feeler XXX.XXX.XXX.64:8333 +… +``` + +Showing recent P2P messages between our node and a selected peer. + +``` + ---------------------------------------------------------------------- + | PEER 16 (4faketorv2pbfu7x.onion:8333) | + | OUR NODE outbound-full-relay PEER | + | <--- sendcmpct (9 bytes) | + | inv (37 byte) ---> | + | <--- ping (8 bytes) | + | pong (8 byte) ---> | + | inv (37 byte) ---> | + | <--- addr (31 bytes) | + | inv (37 byte) ---> | + | <--- getheaders (1029 bytes) | + | headers (1 byte) ---> | + | <--- feefilter (8 bytes) | + | <--- pong (8 bytes) | + | <--- headers (82 bytes) | + | <--- addr (30003 bytes) | + | inv (1261 byte) ---> | + | … | + +``` + +### log_raw_p2p_msgs.py + +A BCC Python script showcasing eBPF and USDT limitations when passing data +larger than about 32kb. Based on the `net:inbound_message` and +`net:outbound_message` tracepoints. + +Bitcoin P2P messages can be larger than 32kb (e.g. `tx`, `block`, ...). The +eBPF VM's stack is limited to 512 bytes, and we can't allocate more than about +32kb for a P2P message in the eBPF VM. The **message data is cut off** when the +message is larger than MAX_MSG_DATA_LENGTH (see script). This can be detected +in user-space by comparing the data length to the message length variable. The +message is cut off when the data length is smaller than the message length. +A warning is included with the printed message data. + +Data is submitted to user-space (i.e. to this script) via a ring buffer. The +throughput of the ring buffer is limited. Each p2p_message is about 32kb in +size. In- or outbound messages submitted to the ring buffer in rapid +succession fill the ring buffer faster than it can be read. Some messages are +lost. BCC prints: `Possibly lost 2 samples` on lost messages. + + +``` +$ python3 contrib/tracing/log_raw_p2p_msgs.py ./src/bitcoind +``` + +``` +Logging raw P2P messages. +Messages larger that about 32kb will be cut off! +Some messages might be lost! + outbound msg 'inv' from peer 4 (outbound-full-relay, XX.XXX.XX.4:8333) with 253 bytes: 0705000000be2245c8f844c9f763748e1a7… +… +Warning: incomplete message (only 32568 out of 53552 bytes)! inbound msg 'tx' from peer 32 (outbound-full-relay, XX.XXX.XXX.43:8333) with 53552 bytes: 020000000001fd3c01939c85ad6756ed9fc… +… +Possibly lost 2 samples +``` + +### connectblock_benchmark.bt + +A `bpftrace` script to benchmark the `ConnectBlock()` function during, for +example, a blockchain re-index. Based on the `validation:block_connected` USDT +tracepoint. + +The script takes three positional arguments. The first two arguments, the start, +and end height indicate between which blocks the benchmark should be run. The +third acts as a duration threshold in milliseconds. When the `ConnectBlock()` +function takes longer than the threshold, information about the block, is +printed. For more details, see the header comment in the script. + +By default, `bpftrace` limits strings to 64 bytes due to the limited stack size +in the kernel VM. Block hashes as zero-terminated hex strings are 65 bytes which +exceed the string limit. The string size limit can be set to 65 bytes with the +environment variable `BPFTRACE_STRLEN`. + +The following command can be used to benchmark, for example, `ConnectBlock()` +between height 20000 and 38000 on SigNet while logging all blocks that take +longer than 25ms to connect. + +``` +$ BPFTRACE_STRLEN=65 bpftrace contrib/tracing/connectblock_benchmark.bt 20000 38000 25 +``` + +In a different terminal, starting Bitcoin Core in SigNet mode and with +re-indexing enabled. + +``` +$ ./src/bitcoind -signet -reindex +``` + +This produces the following output. +``` +Attaching 5 probes... +ConnectBlock Benchmark between height 20000 and 38000 inclusive +Logging blocks taking longer than 25 ms to connect. +Starting Connect Block Benchmark between height 20000 and 38000. +BENCH 39 blk/s 59 tx/s 59 inputs/s 20 sigops/s (height 20038) +Block 20492 (000000f555653bb05e2f3c6e79925e01a20dd57033f4dc7c354b46e34735d32b) 20 tx 2319 ins 2318 sigops took 38 ms +BENCH 1840 blk/s 2117 tx/s 4478 inputs/s 2471 sigops/s (height 21879) +BENCH 1816 blk/s 4972 tx/s 4982 inputs/s 125 sigops/s (height 23695) +BENCH 2095 blk/s 2890 tx/s 2910 inputs/s 152 sigops/s (height 25790) +BENCH 1684 blk/s 3979 tx/s 4053 inputs/s 288 sigops/s (height 27474) +BENCH 1155 blk/s 3216 tx/s 3252 inputs/s 115 sigops/s (height 28629) +BENCH 1797 blk/s 2488 tx/s 2503 inputs/s 111 sigops/s (height 30426) +BENCH 1849 blk/s 6318 tx/s 6569 inputs/s 12189 sigops/s (height 32275) +BENCH 946 blk/s 20209 tx/s 20775 inputs/s 83809 sigops/s (height 33221) +Block 33406 (0000002adfe4a15cfcd53bd890a89bbae836e5bb7f38bac566f61ad4548c87f6) 25 tx 2045 ins 2090 sigops took 29 ms +Block 33687 (00000073231307a9828e5607ceb8156b402efe56747271a4442e75eb5b77cd36) 52 tx 1797 ins 1826 sigops took 26 ms +BENCH 582 blk/s 21581 tx/s 27673 inputs/s 60345 sigops/s (height 33803) +BENCH 1035 blk/s 19735 tx/s 19776 inputs/s 51355 sigops/s (height 34838) +Block 35625 (0000006b00b347390c4768ea9df2655e9ff4b120f29d78594a2a702f8a02c997) 20 tx 3374 ins 3371 sigops took 49 ms +BENCH 887 blk/s 17857 tx/s 22191 inputs/s 24404 sigops/s (height 35725) +Block 35937 (000000d816d13d6e39b471cd4368db60463a764ba1f29168606b04a22b81ea57) 75 tx 3943 ins 3940 sigops took 61 ms +BENCH 823 blk/s 16298 tx/s 21031 inputs/s 18440 sigops/s (height 36548) +Block 36583 (000000c3e260556dbf42968aae3f904dba8b8c1ff96a6f6e3aa5365d2e3ad317) 24 tx 2198 ins 2194 sigops took 34 ms +Block 36700 (000000b3b173de9e65a3cfa738d976af6347aaf83fa17ab3f2a4d2ede3ddfac4) 73 tx 1615 ins 1611 sigops took 31 ms +Block 36832 (0000007859578c02c1ac37dabd1b9ec19b98f350b56935f5dd3a41e9f79f836e) 34 tx 1440 ins 1436 sigops took 26 ms +BENCH 613 blk/s 16718 tx/s 25074 inputs/s 23022 sigops/s (height 37161) +Block 37870 (000000f5c1086291ba2d943fb0c3bc82e71c5ee341ee117681d1456fbf6c6c38) 25 tx 1517 ins 1514 sigops took 29 ms +BENCH 811 blk/s 16031 tx/s 20921 inputs/s 18696 sigops/s (height 37972) + +Took 14055 ms to connect the blocks between height 20000 and 38000. + +Histogram of block connection times in milliseconds (ms). +@durations: +[0] 16838 |@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@| +[1] 882 |@@ | +[2, 4) 236 | | +[4, 8) 23 | | +[8, 16) 9 | | +[16, 32) 9 | | +[32, 64) 4 | | +``` diff --git a/contrib/tracing/connectblock_benchmark.bt b/contrib/tracing/connectblock_benchmark.bt new file mode 100755 index 0000000000000..d268eff7f8b8c --- /dev/null +++ b/contrib/tracing/connectblock_benchmark.bt @@ -0,0 +1,150 @@ +#!/usr/bin/env bpftrace + +/* + + USAGE: + + BPFTRACE_STRLEN=65 bpftrace contrib/tracing/connectblock_benchmark.bt + + - The environment variable BPFTRACE_STRLEN needs to be set to 65 chars as + strings are limited to 64 chars by default. Hex strings with Bitcoin block + hashes are 64 hex chars + 1 null-termination char. + - sets the height at which the benchmark should start. Setting + the start height to 0 starts the benchmark immediately, even before the + first block is connected. + - sets the height after which the benchmark should end. Setting + the end height to 0 disables the benchmark. The script only logs blocks + over . + - Threshold + + This script requires a 'bitcoind' binary compiled with eBPF support and the + 'validation:block_connected' USDT. By default, it's assumed that 'bitcoind' is + located in './src/bitcoind'. This can be modified in the script below. + + EXAMPLES: + + BPFTRACE_STRLEN=65 bpftrace contrib/tracing/connectblock_benchmark.bt 300000 680000 1000 + + When run together 'bitcoind -reindex', this benchmarks the time it takes to + connect the blocks between height 300.000 and 680.000 (inclusive) and prints + details about all blocks that take longer than 1000ms to connect. Prints a + histogram with block connection times when the benchmark is finished. + + + BPFTRACE_STRLEN=65 bpftrace contrib/tracing/connectblock_benchmark.bt 0 0 500 + + When running together 'bitcoind', all newly connected blocks that + take longer than 500ms to connect are logged. A histogram with block + connection times is shown when the script is terminated. + +*/ + +BEGIN +{ + $start_height = $1; + $end_height = $2; + $logging_threshold_ms = $3; + + if ($end_height < $start_height) { + printf("Error: start height (%d) larger than end height (%d)!\n", $start_height, $end_height); + exit(); + } + + if ($end_height > 0) { + printf("ConnectBlock benchmark between height %d and %d inclusive\n", $start_height, $end_height); + } else { + printf("ConnectBlock logging starting at height %d\n", $start_height); + } + + if ($logging_threshold_ms > 0) { + printf("Logging blocks taking longer than %d ms to connect.\n", $3); + } + + if ($start_height == 0) { + @start = nsecs; + } +} + +/* + Attaches to the 'validation:block_connected' USDT and collects stats when the + connected block is between the start and end height (or the end height is + unset). +*/ +usdt:./src/bitcoind:validation:block_connected /arg1 >= $1 && (arg1 <= $2 || $2 == 0 )/ +{ + $height = arg1; + $transactions = arg2; + $inputs = arg3; + $sigops = arg4; + $duration = (uint64) arg5; + + @height = $height; + + @blocks = @blocks + 1; + @transactions = @transactions + $transactions; + @inputs = @inputs + $inputs; + @sigops = @sigops + $sigops; + + @durations = hist($duration / 1000); + + if ($height == $1 && $height != 0) { + @start = nsecs; + printf("Starting Connect Block Benchmark between height %d and %d.\n", $1, $2); + } + + if ($2 > 0 && $height >= $2) { + @end = nsecs; + $duration = @end - @start; + printf("\nTook %d ms to connect the blocks between height %d and %d.\n", $duration / 1000000, $1, $2); + exit(); + } +} + +/* + Attaches to the 'validation:block_connected' USDT and logs information about + blocks where the time it took to connect the block is above the + . +*/ +usdt:./src/bitcoind:validation:block_connected / (uint64) arg5 / 1000> $3 / +{ + $hash_str = str(arg0); + $height = (int32) arg1; + $transactions = (uint64) arg2; + $inputs = (int32) arg3; + $sigops = (int64) arg4; + $duration = (int64) arg5; + + printf("Block %d (%s) %4d tx %5d ins %5d sigops took %4d ms\n", $height, $hash_str, $transactions, $inputs, $sigops, (uint64) $duration / 1000); +} + + +/* + Prints stats about the blocks, transactions, inputs, and sigops processed in + the last second (if any). +*/ +interval:s:1 { + if (@blocks > 0) { + printf("BENCH %4d blk/s %6d tx/s %7d inputs/s %8d sigops/s (height %d)\n", @blocks, @transactions, @inputs, @sigops, @height); + + zero(@blocks); + zero(@transactions); + zero(@inputs); + zero(@sigops); + } +} + +END +{ + printf("\nHistogram of block connection times in milliseconds (ms).\n"); + print(@durations); + + clear(@durations); + clear(@blocks); + clear(@transactions); + clear(@inputs); + clear(@sigops); + clear(@height); + clear(@start); + clear(@end); +} + diff --git a/contrib/tracing/log_p2p_traffic.bt b/contrib/tracing/log_p2p_traffic.bt new file mode 100755 index 0000000000000..f62956aa5e1c3 --- /dev/null +++ b/contrib/tracing/log_p2p_traffic.bt @@ -0,0 +1,28 @@ +#!/usr/bin/env bpftrace + +BEGIN +{ + printf("Logging P2P traffic\n") +} + +usdt:./src/bitcoind:net:inbound_message +{ + $peer_id = (int64) arg0; + $peer_addr = str(arg1); + $peer_type = str(arg2); + $msg_type = str(arg3); + $msg_len = arg4; + printf("inbound '%s' msg from peer %d (%s, %s) with %d bytes\n", $msg_type, $peer_id, $peer_type, $peer_addr, $msg_len); +} + +usdt:./src/bitcoind:net:outbound_message +{ + $peer_id = (int64) arg0; + $peer_addr = str(arg1); + $peer_type = str(arg2); + $msg_type = str(arg3); + $msg_len = arg4; + + printf("outbound '%s' msg to peer %d (%s, %s) with %d bytes\n", $msg_type, $peer_id, $peer_type, $peer_addr, $msg_len); +} + diff --git a/contrib/tracing/log_raw_p2p_msgs.py b/contrib/tracing/log_raw_p2p_msgs.py new file mode 100755 index 0000000000000..b5b57556324e3 --- /dev/null +++ b/contrib/tracing/log_raw_p2p_msgs.py @@ -0,0 +1,180 @@ +#!/usr/bin/env python3 + +""" Demonstration of eBPF limitations and the effect on USDT with the + net:inbound_message and net:outbound_message tracepoints. """ + +# This script shows a limitation of eBPF when data larger than 32kb is passed to +# user-space. It uses BCC (https://github.com/iovisor/bcc) to load a sandboxed +# eBPF program into the Linux kernel (root privileges are required). The eBPF +# program attaches to two statically defined tracepoints. The tracepoint +# 'net:inbound_message' is called when a new P2P message is received, and +# 'net:outbound_message' is called on outbound P2P messages. The eBPF program +# submits the P2P messages to this script via a BPF ring buffer. The submitted +# messages are printed. + +# eBPF Limitations: +# +# Bitcoin P2P messages can be larger than 32kb (e.g. tx, block, ...). The eBPF +# VM's stack is limited to 512 bytes, and we can't allocate more than about 32kb +# for a P2P message in the eBPF VM. The message data is cut off when the message +# is larger than MAX_MSG_DATA_LENGTH (see definition below). This can be detected +# in user-space by comparing the data length to the message length variable. The +# message is cut off when the data length is smaller than the message length. +# A warning is included with the printed message data. +# +# Data is submitted to user-space (i.e. to this script) via a ring buffer. The +# throughput of the ring buffer is limited. Each p2p_message is about 32kb in +# size. In- or outbound messages submitted to the ring buffer in rapid +# succession fill the ring buffer faster than it can be read. Some messages are +# lost. +# +# BCC prints: "Possibly lost 2 samples" on lost messages. + +import sys +from bcc import BPF, USDT + +# BCC: The C program to be compiled to an eBPF program (by BCC) and loaded into +# a sandboxed Linux kernel VM. +program = """ +#include + +#define MIN(a,b) ({ __typeof__ (a) _a = (a); __typeof__ (b) _b = (b); _a < _b ? _a : _b; }) + +// Maximum possible allocation size +// from include/linux/percpu.h in the Linux kernel +#define PCPU_MIN_UNIT_SIZE (32 << 10) + +// Tor v3 addresses are 62 chars + 6 chars for the port (':12345'). +#define MAX_PEER_ADDR_LENGTH 62 + 6 +#define MAX_PEER_CONN_TYPE_LENGTH 20 +#define MAX_MSG_TYPE_LENGTH 20 +#define MAX_MSG_DATA_LENGTH PCPU_MIN_UNIT_SIZE - 200 + +struct p2p_message +{ + u64 peer_id; + char peer_addr[MAX_PEER_ADDR_LENGTH]; + char peer_conn_type[MAX_PEER_CONN_TYPE_LENGTH]; + char msg_type[MAX_MSG_TYPE_LENGTH]; + u64 msg_size; + u8 msg[MAX_MSG_DATA_LENGTH]; +}; + +// We can't store the p2p_message struct on the eBPF stack as it is limited to +// 512 bytes and P2P message can be bigger than 512 bytes. However, we can use +// an BPF-array with a length of 1 to allocate up to 32768 bytes (this is +// defined by PCPU_MIN_UNIT_SIZE in include/linux/percpu.h in the Linux kernel). +// Also see https://github.com/iovisor/bcc/issues/2306 +BPF_ARRAY(msg_arr, struct p2p_message, 1); + +// Two BPF perf buffers for pushing data (here P2P messages) to user-space. +BPF_PERF_OUTPUT(inbound_messages); +BPF_PERF_OUTPUT(outbound_messages); + +int trace_inbound_message(struct pt_regs *ctx) { + int idx = 0; + struct p2p_message *msg = msg_arr.lookup(&idx); + + // lookup() does not return a NULL pointer. However, the BPF verifier + // requires an explicit check that that the `msg` pointer isn't a NULL + // pointer. See https://github.com/iovisor/bcc/issues/2595 + if (msg == NULL) return 1; + + bpf_usdt_readarg(1, ctx, &msg->peer_id); + bpf_usdt_readarg_p(2, ctx, &msg->peer_addr, MAX_PEER_ADDR_LENGTH); + bpf_usdt_readarg_p(3, ctx, &msg->peer_conn_type, MAX_PEER_CONN_TYPE_LENGTH); + bpf_usdt_readarg_p(4, ctx, &msg->msg_type, MAX_MSG_TYPE_LENGTH); + bpf_usdt_readarg(5, ctx, &msg->msg_size); + bpf_usdt_readarg_p(6, ctx, &msg->msg, MIN(msg->msg_size, MAX_MSG_DATA_LENGTH)); + + inbound_messages.perf_submit(ctx, msg, sizeof(*msg)); + return 0; +}; + +int trace_outbound_message(struct pt_regs *ctx) { + int idx = 0; + struct p2p_message *msg = msg_arr.lookup(&idx); + + // lookup() does not return a NULL pointer. However, the BPF verifier + // requires an explicit check that that the `msg` pointer isn't a NULL + // pointer. See https://github.com/iovisor/bcc/issues/2595 + if (msg == NULL) return 1; + + bpf_usdt_readarg(1, ctx, &msg->peer_id); + bpf_usdt_readarg_p(2, ctx, &msg->peer_addr, MAX_PEER_ADDR_LENGTH); + bpf_usdt_readarg_p(3, ctx, &msg->peer_conn_type, MAX_PEER_CONN_TYPE_LENGTH); + bpf_usdt_readarg_p(4, ctx, &msg->msg_type, MAX_MSG_TYPE_LENGTH); + bpf_usdt_readarg(5, ctx, &msg->msg_size); + bpf_usdt_readarg_p(6, ctx, &msg->msg, MIN(msg->msg_size, MAX_MSG_DATA_LENGTH)); + + outbound_messages.perf_submit(ctx, msg, sizeof(*msg)); + return 0; +}; +""" + + +def print_message(event, inbound): + print(f"%s %s msg '%s' from peer %d (%s, %s) with %d bytes: %s" % + ( + f"Warning: incomplete message (only %d out of %d bytes)!" % ( + len(event.msg), event.msg_size) if len(event.msg) < event.msg_size else "", + "inbound" if inbound else "outbound", + event.msg_type.decode("utf-8"), + event.peer_id, + event.peer_conn_type.decode("utf-8"), + event.peer_addr.decode("utf-8"), + event.msg_size, + bytes(event.msg[:event.msg_size]).hex(), + ) + ) + + +def main(bitcoind_path): + bitcoind_with_usdts = USDT(path=str(bitcoind_path)) + + # attaching the trace functions defined in the BPF program to the tracepoints + bitcoind_with_usdts.enable_probe( + probe="inbound_message", fn_name="trace_inbound_message") + bitcoind_with_usdts.enable_probe( + probe="outbound_message", fn_name="trace_outbound_message") + bpf = BPF(text=program, usdt_contexts=[bitcoind_with_usdts]) + + # BCC: perf buffer handle function for inbound_messages + def handle_inbound(_, data, size): + """ Inbound message handler. + + Called each time a message is submitted to the inbound_messages BPF table.""" + + event = bpf["inbound_messages"].event(data) + print_message(event, True) + + # BCC: perf buffer handle function for outbound_messages + + def handle_outbound(_, data, size): + """ Outbound message handler. + + Called each time a message is submitted to the outbound_messages BPF table.""" + + event = bpf["outbound_messages"].event(data) + print_message(event, False) + + # BCC: add handlers to the inbound and outbound perf buffers + bpf["inbound_messages"].open_perf_buffer(handle_inbound) + bpf["outbound_messages"].open_perf_buffer(handle_outbound) + + print("Logging raw P2P messages.") + print("Messages larger that about 32kb will be cut off!") + print("Some messages might be lost!") + while True: + try: + bpf.perf_buffer_poll() + except KeyboardInterrupt: + exit() + + +if __name__ == "__main__": + if len(sys.argv) < 2: + print("USAGE:", sys.argv[0], "path/to/bitcoind") + exit() + path = sys.argv[1] + main(path) diff --git a/contrib/tracing/p2p_monitor.py b/contrib/tracing/p2p_monitor.py new file mode 100755 index 0000000000000..14e3e3a80123a --- /dev/null +++ b/contrib/tracing/p2p_monitor.py @@ -0,0 +1,250 @@ +#!/usr/bin/env python3 + +""" Interactive bitcoind P2P network traffic monitor utilizing USDT and the + net:inbound_message and net:outbound_message tracepoints. """ + +# This script demonstrates what USDT for Bitcoin Core can enable. It uses BCC +# (https://github.com/iovisor/bcc) to load a sandboxed eBPF program into the +# Linux kernel (root privileges are required). The eBPF program attaches to two +# statically defined tracepoints. The tracepoint 'net:inbound_message' is called +# when a new P2P message is received, and 'net:outbound_message' is called on +# outbound P2P messages. The eBPF program submits the P2P messages to +# this script via a BPF ring buffer. + +import sys +import curses +from curses import wrapper, panel +from bcc import BPF, USDT + +# BCC: The C program to be compiled to an eBPF program (by BCC) and loaded into +# a sandboxed Linux kernel VM. +program = """ +#include + +// Tor v3 addresses are 62 chars + 6 chars for the port (':12345'). +// I2P addresses are 60 chars + 6 chars for the port (':12345'). +#define MAX_PEER_ADDR_LENGTH 62 + 6 +#define MAX_PEER_CONN_TYPE_LENGTH 20 +#define MAX_MSG_TYPE_LENGTH 20 + +struct p2p_message +{ + u64 peer_id; + char peer_addr[MAX_PEER_ADDR_LENGTH]; + char peer_conn_type[MAX_PEER_CONN_TYPE_LENGTH]; + char msg_type[MAX_MSG_TYPE_LENGTH]; + u64 msg_size; +}; + + +// Two BPF perf buffers for pushing data (here P2P messages) to user space. +BPF_PERF_OUTPUT(inbound_messages); +BPF_PERF_OUTPUT(outbound_messages); + +int trace_inbound_message(struct pt_regs *ctx) { + struct p2p_message msg = {}; + + bpf_usdt_readarg(1, ctx, &msg.peer_id); + bpf_usdt_readarg_p(2, ctx, &msg.peer_addr, MAX_PEER_ADDR_LENGTH); + bpf_usdt_readarg_p(3, ctx, &msg.peer_conn_type, MAX_PEER_CONN_TYPE_LENGTH); + bpf_usdt_readarg_p(4, ctx, &msg.msg_type, MAX_MSG_TYPE_LENGTH); + bpf_usdt_readarg(5, ctx, &msg.msg_size); + + inbound_messages.perf_submit(ctx, &msg, sizeof(msg)); + return 0; +}; + +int trace_outbound_message(struct pt_regs *ctx) { + struct p2p_message msg = {}; + + bpf_usdt_readarg(1, ctx, &msg.peer_id); + bpf_usdt_readarg_p(2, ctx, &msg.peer_addr, MAX_PEER_ADDR_LENGTH); + bpf_usdt_readarg_p(3, ctx, &msg.peer_conn_type, MAX_PEER_CONN_TYPE_LENGTH); + bpf_usdt_readarg_p(4, ctx, &msg.msg_type, MAX_MSG_TYPE_LENGTH); + bpf_usdt_readarg(5, ctx, &msg.msg_size); + + outbound_messages.perf_submit(ctx, &msg, sizeof(msg)); + return 0; +}; +""" + + +class Message: + """ A P2P network message. """ + msg_type = "" + size = 0 + data = bytes() + inbound = False + + def __init__(self, msg_type, size, inbound): + self.msg_type = msg_type + self.size = size + self.inbound = inbound + + +class Peer: + """ A P2P network peer. """ + id = 0 + address = "" + connection_type = "" + last_messages = list() + + total_inbound_msgs = 0 + total_inbound_bytes = 0 + total_outbound_msgs = 0 + total_outbound_bytes = 0 + + def __init__(self, id, address, connection_type): + self.id = id + self.address = address + self.connection_type = connection_type + self.last_messages = list() + + def add_message(self, message): + self.last_messages.append(message) + if len(self.last_messages) > 25: + self.last_messages.pop(0) + if message.inbound: + self.total_inbound_bytes += message.size + self.total_inbound_msgs += 1 + else: + self.total_outbound_bytes += message.size + self.total_outbound_msgs += 1 + + +def main(bitcoind_path): + peers = dict() + + bitcoind_with_usdts = USDT(path=str(bitcoind_path)) + + # attaching the trace functions defined in the BPF program to the tracepoints + bitcoind_with_usdts.enable_probe( + probe="inbound_message", fn_name="trace_inbound_message") + bitcoind_with_usdts.enable_probe( + probe="outbound_message", fn_name="trace_outbound_message") + bpf = BPF(text=program, usdt_contexts=[bitcoind_with_usdts]) + + # BCC: perf buffer handle function for inbound_messages + def handle_inbound(_, data, size): + """ Inbound message handler. + + Called each time a message is submitted to the inbound_messages BPF table.""" + event = bpf["inbound_messages"].event(data) + if event.peer_id not in peers: + peer = Peer(event.peer_id, event.peer_addr.decode( + "utf-8"), event.peer_conn_type.decode("utf-8")) + peers[peer.id] = peer + peers[event.peer_id].add_message( + Message(event.msg_type.decode("utf-8"), event.msg_size, True)) + + # BCC: perf buffer handle function for outbound_messages + def handle_outbound(_, data, size): + """ Outbound message handler. + + Called each time a message is submitted to the outbound_messages BPF table.""" + event = bpf["outbound_messages"].event(data) + if event.peer_id not in peers: + peer = Peer(event.peer_id, event.peer_addr.decode( + "utf-8"), event.peer_conn_type.decode("utf-8")) + peers[peer.id] = peer + peers[event.peer_id].add_message( + Message(event.msg_type.decode("utf-8"), event.msg_size, False)) + + # BCC: add handlers to the inbound and outbound perf buffers + bpf["inbound_messages"].open_perf_buffer(handle_inbound) + bpf["outbound_messages"].open_perf_buffer(handle_outbound) + + wrapper(loop, bpf, peers) + + +def loop(screen, bpf, peers): + screen.nodelay(1) + cur_list_pos = 0 + win = curses.newwin(30, 70, 2, 7) + win.erase() + win.border(ord("|"), ord("|"), ord("-"), ord("-"), + ord("-"), ord("-"), ord("-"), ord("-")) + info_panel = panel.new_panel(win) + info_panel.hide() + + ROWS_AVALIABLE_FOR_LIST = curses.LINES - 5 + scroll = 0 + + while True: + try: + # BCC: poll the perf buffers for new events or timeout after 50ms + bpf.perf_buffer_poll(timeout=50) + + ch = screen.getch() + if (ch == curses.KEY_DOWN or ch == ord("j")) and cur_list_pos < len( + peers.keys()) -1 and info_panel.hidden(): + cur_list_pos += 1 + if cur_list_pos >= ROWS_AVALIABLE_FOR_LIST: + scroll += 1 + if (ch == curses.KEY_UP or ch == ord("k")) and cur_list_pos > 0 and info_panel.hidden(): + cur_list_pos -= 1 + if scroll > 0: + scroll -= 1 + if ch == ord('\n') or ch == ord(' '): + if info_panel.hidden(): + info_panel.show() + else: + info_panel.hide() + screen.erase() + render(screen, peers, cur_list_pos, scroll, ROWS_AVALIABLE_FOR_LIST, info_panel) + curses.panel.update_panels() + screen.refresh() + except KeyboardInterrupt: + exit() + + +def render(screen, peers, cur_list_pos, scroll, ROWS_AVALIABLE_FOR_LIST, info_panel): + """ renders the list of peers and details panel + + This code is unrelated to USDT, BCC and BPF. + """ + header_format = "%6s %-20s %-20s %-22s %-67s" + row_format = "%6s %-5d %9d byte %-5d %9d byte %-22s %-67s" + + screen.addstr(0, 1, (" P2P Message Monitor "), curses.A_REVERSE) + screen.addstr( + 1, 0, (" Navigate with UP/DOWN or J/K and select a peer with ENTER or SPACE to see individual P2P messages"), curses.A_NORMAL) + screen.addstr(3, 0, + header_format % ("PEER", "OUTBOUND", "INBOUND", "TYPE", "ADDR"), curses.A_BOLD | curses.A_UNDERLINE) + peer_list = sorted(peers.keys())[scroll:ROWS_AVALIABLE_FOR_LIST+scroll] + for i, peer_id in enumerate(peer_list): + peer = peers[peer_id] + screen.addstr(i + 4, 0, + row_format % (peer.id, peer.total_outbound_msgs, peer.total_outbound_bytes, + peer.total_inbound_msgs, peer.total_inbound_bytes, + peer.connection_type, peer.address), + curses.A_REVERSE if i + scroll == cur_list_pos else curses.A_NORMAL) + if i + scroll == cur_list_pos: + info_window = info_panel.window() + info_window.erase() + info_window.border( + ord("|"), ord("|"), ord("-"), ord("-"), + ord("-"), ord("-"), ord("-"), ord("-")) + + info_window.addstr( + 1, 1, f"PEER {peer.id} ({peer.address})".center(68), curses.A_REVERSE | curses.A_BOLD) + info_window.addstr( + 2, 1, f" OUR NODE{peer.connection_type:^54}PEER ", + curses.A_BOLD) + for i, msg in enumerate(peer.last_messages): + if msg.inbound: + info_window.addstr( + i + 3, 1, "%68s" % + (f"<--- {msg.msg_type} ({msg.size} bytes) "), curses.A_NORMAL) + else: + info_window.addstr( + i + 3, 1, " %s (%d byte) --->" % + (msg.msg_type, msg.size), curses.A_NORMAL) + + +if __name__ == "__main__": + if len(sys.argv) < 2: + print("USAGE:", sys.argv[0], "path/to/bitcoind") + exit() + path = sys.argv[1] + main(path) diff --git a/doc/build-unix.md b/doc/build-unix.md index eb37c6744c4b8..102c1855e9677 100644 --- a/doc/build-unix.md +++ b/doc/build-unix.md @@ -48,6 +48,7 @@ Optional dependencies: libqrencode | QR codes in GUI | Optional for generating QR codes (only needed when GUI enabled) libzmq3 | ZMQ notification | Optional, allows generating ZMQ notifications (requires ZMQ version >= 4.0.0) sqlite3 | SQLite DB | Wallet storage (only needed when wallet enabled) + systemtap | Tracing (USDT) | Optional, statically defined tracepoints For the versions used, see [dependencies.md](dependencies.md) @@ -103,6 +104,10 @@ GMP dependencies (provides platform-optimized routines): sudo apt-get install libgmp-dev +User-Space, Statically Defined Tracing (USDT) dependencies: + + sudo apt install systemtap-sdt-dev + GUI dependencies: If you want to build dash-qt, make sure that the required packages for Qt development @@ -166,6 +171,10 @@ GMP dependencies (provides platform-optimized routines): sudo dnf install gmp-devel +User-Space, Statically Defined Tracing (USDT) dependencies: + + sudo dnf install systemtap + GUI dependencies: If you want to build dash-qt, make sure that the required packages for Qt development diff --git a/doc/dependencies.md b/doc/dependencies.md index e645e72ad9f08..94a4ce7d28010 100644 --- a/doc/dependencies.md +++ b/doc/dependencies.md @@ -25,6 +25,7 @@ These are the dependencies currently used by Dash Core. You can find instruction | SQLite | [3.32.1](https://sqlite.org/download.html) | [3.7.17](https://github.com/bitcoin/bitcoin/pull/19077) | | | | | XCB | | | | | [Yes](https://github.com/dashpay/dash/blob/develop/depends/packages/qt.mk) (Linux only) | | xkbcommon | | | | | [Yes](https://github.com/dashpay/dash/blob/develop/depends/packages/qt.mk) (Linux only) | +| systemtap ([tracing](tracing.md))| | | | | | | ZeroMQ | [4.3.1](https://github.com/zeromq/libzmq/releases) | 4.0.0 | No | | | | zlib | | | | | [Yes](https://github.com/dashpay/dash/blob/develop/depends/packages/qt.mk) | @@ -41,6 +42,7 @@ Some dependencies are not needed in all configurations. The following are some f * libnatpmp is not needed with `--without-natpmp`. * Qt is not needed with `--without-gui`. * If the qrencode dependency is absent, QR support won't be added. To force an error when that happens, pass `--with-qrencode`. +* If the systemtap dependency is absent, USDT support won't compiled in. * ZeroMQ is needed only with the `--with-zmq` option. #### Other diff --git a/doc/tracing.md b/doc/tracing.md new file mode 100644 index 0000000000000..1242a0d25002b --- /dev/null +++ b/doc/tracing.md @@ -0,0 +1,266 @@ +# User-space, Statically Defined Tracing (USDT) for Bitcoin Core + +Bitcoin Core includes statically defined tracepoints to allow for more +observability during development, debugging, code review, and production usage. +These tracepoints make it possible to keep track of custom statistics and +enable detailed monitoring of otherwise hidden internals. They have +little to no performance impact when unused. + +``` +eBPF and USDT Overview +====================== + + ┌──────────────────┐ ┌──────────────┐ + │ tracing script │ │ bitcoind │ + │==================│ 2. │==============│ + │ eBPF │ tracing │ hooks │ │ + │ code │ logic │ into┌─┤►tracepoint 1─┼───┐ 3. + └────┬───┴──▲──────┘ ├─┤►tracepoint 2 │ │ pass args + 1. │ │ 4. │ │ ... │ │ to eBPF + User compiles │ │ pass data to │ └──────────────┘ │ program + Space & loads │ │ tracing script │ │ + ─────────────────┼──────┼─────────────────┼────────────────────┼─── + Kernel │ │ │ │ + Space ┌──┬─▼──────┴─────────────────┴────────────┐ │ + │ │ eBPF program │◄──────┘ + │ └───────────────────────────────────────┤ + │ eBPF kernel Virtual Machine (sandboxed) │ + └──────────────────────────────────────────┘ + +1. The tracing script compiles the eBPF code and loads the eBPF program into a kernel VM +2. The eBPF program hooks into one or more tracepoints +3. When the tracepoint is called, the arguments are passed to the eBPF program +4. The eBPF program processes the arguments and returns data to the tracing script +``` + +The Linux kernel can hook into the tracepoints during runtime and pass data to +sandboxed [eBPF] programs running in the kernel. These eBPF programs can, for +example, collect statistics or pass data back to user-space scripts for further +processing. + +[eBPF]: https://ebpf.io/ + +The two main eBPF front-ends with support for USDT are [bpftrace] and +[BPF Compiler Collection (BCC)]. BCC is used for complex tools and daemons and +`bpftrace` is preferred for one-liners and shorter scripts. Examples for both can +be found in [contrib/tracing]. + +[bpftrace]: https://github.com/iovisor/bpftrace +[BPF Compiler Collection (BCC)]: https://github.com/iovisor/bcc +[contrib/tracing]: ../contrib/tracing/ + +## Tracepoint documentation + +The currently available tracepoints are listed here. + +### Context `net` + +#### Tracepoint `net:inbound_message` + +Is called when a message is received from a peer over the P2P network. Passes +information about our peer, the connection and the message as arguments. + +Arguments passed: +1. Peer ID as `int64` +2. Peer Address and Port (IPv4, IPv6, Tor v3, I2P, ...) as `pointer to C-style String` (max. length 68 characters) +3. Connection Type (inbound, feeler, outbound-full-relay, ...) as `pointer to C-style String` (max. length 20 characters) +4. Message Type (inv, ping, getdata, addrv2, ...) as `pointer to C-style String` (max. length 20 characters) +5. Message Size in bytes as `uint64` +6. Message Bytes as `pointer to unsigned chars` (i.e. bytes) + +Note: The message is passed to the tracepoint in full, however, due to space +limitations in the eBPF kernel VM it might not be possible to pass the message +to user-space in full. Messages longer than a 32kb might be cut off. This can +be detected in tracing scripts by comparing the message size to the length of +the passed message. + +#### Tracepoint `net:outbound_message` + +Is called when a message is send to a peer over the P2P network. Passes +information about our peer, the connection and the message as arguments. + +Arguments passed: +1. Peer ID as `int64` +2. Peer Address and Port (IPv4, IPv6, Tor v3, I2P, ...) as `pointer to C-style String` (max. length 68 characters) +3. Connection Type (inbound, feeler, outbound-full-relay, ...) as `pointer to C-style String` (max. length 20 characters) +4. Message Type (inv, ping, getdata, addrv2, ...) as `pointer to C-style String` (max. length 20 characters) +5. Message Size in bytes as `uint64` +6. Message Bytes as `pointer to unsigned chars` (i.e. bytes) + +Note: The message is passed to the tracepoint in full, however, due to space +limitations in the eBPF kernel VM it might not be possible to pass the message +to user-space in full. Messages longer than a 32kb might be cut off. This can +be detected in tracing scripts by comparing the message size to the length of +the passed message. + +### Context `validation` + +#### Tracepoint `validation:block_connected` + +Is called *after* a block is connected to the chain. Can, for example, be used +to benchmark block connections together with `-reindex`. + +Arguments passed: +1. Block Header Hash as `pointer to C-style String` (64 characters) +2. Block Height as `int32` +3. Transactions in the Block as `uint64` +4. Inputs spend in the Block as `int32` +5. SigOps in the Block (excluding coinbase SigOps) `uint64` +6. Time it took to connect the Block in microseconds (µs) as `uint64` +7. Block Header Hash as `pointer to unsigned chars` (i.e. 32 bytes in little-endian) + +Note: The 7th argument can't be accessed by bpftrace and is purposefully chosen +to be the block header hash as bytes. See [bpftrace argument limit] for more +details. + +[bpftrace argument limit]: #bpftrace-argument-limit + +## Adding tracepoints to Bitcoin Core + +To add a new tracepoint, `#include ` in the compilation unit where +the tracepoint is inserted. Use one of the `TRACEx` macros listed below +depending on the number of arguments passed to the tracepoint. Up to 12 +arguments can be provided. The `context` and `event` specify the names by which +the tracepoint is referred to. Please use `snake_case` and try to make sure that +the tracepoint names make sense even without detailed knowledge of the +implementation details. Do not forget to update the tracepoint list in this +document. + +```c +#define TRACE(context, event) +#define TRACE1(context, event, a) +#define TRACE2(context, event, a, b) +#define TRACE3(context, event, a, b, c) +#define TRACE4(context, event, a, b, c, d) +#define TRACE5(context, event, a, b, c, d, e) +#define TRACE6(context, event, a, b, c, d, e, f) +#define TRACE7(context, event, a, b, c, d, e, f, g) +#define TRACE8(context, event, a, b, c, d, e, f, g, h) +#define TRACE9(context, event, a, b, c, d, e, f, g, h, i) +#define TRACE10(context, event, a, b, c, d, e, f, g, h, i, j) +#define TRACE11(context, event, a, b, c, d, e, f, g, h, i, j, k) +#define TRACE12(context, event, a, b, c, d, e, f, g, h, i, j, k, l) +``` + +For example: + +```C++ +TRACE6(net, inbound_message, + pnode->GetId(), + pnode->GetAddrName().c_str(), + pnode->ConnectionTypeAsString().c_str(), + sanitizedType.c_str(), + msg.data.size(), + msg.data.data() +); +``` + +### Guidelines and best practices + +#### Clear motivation and use-case +Tracepoints need a clear motivation and use-case. The motivation should +outweigh the impact on, for example, code readability. There is no point in +adding tracepoints that don't end up being used. + +#### Provide an example +When adding a new tracepoint, provide an example. Examples can show the use case +and help reviewers testing that the tracepoint works as intended. The examples +can be kept simple but should give others a starting point when working with +the tracepoint. See existing examples in [contrib/tracing/]. + +[contrib/tracing/]: ../contrib/tracing/ + +#### No expensive computations for tracepoints +Data passed to the tracepoint should be inexpensive to compute. Although the +tracepoint itself only has overhead when enabled, the code to compute arguments +is always run - even if the tracepoint is not used. For example, avoid +serialization and parsing. + +#### Semi-stable API +Tracepoints should have a semi-stable API. Users should be able to rely on the +tracepoints for scripting. This means tracepoints need to be documented, and the +argument order ideally should not change. If there is an important reason to +change argument order, make sure to document the change and update the examples +using the tracepoint. + +#### eBPF Virtual Machine limits +Keep the eBPF Virtual Machine limits in mind. eBPF programs receiving data from +the tracepoints run in a sandboxed Linux kernel VM. This VM has a limited stack +size of 512 bytes. Check if it makes sense to pass larger amounts of data, for +example, with a tracing script that can handle the passed data. + +#### `bpftrace` argument limit +While tracepoints can have up to 12 arguments, bpftrace scripts currently only +support reading from the first six arguments (`arg0` till `arg5`) on `x86_64`. +bpftrace currently lacks real support for handling and printing binary data, +like block header hashes and txids. When a tracepoint passes more than six +arguments, then string and integer arguments should preferably be placed in the +first six argument fields. Binary data can be placed in later arguments. The BCC +supports reading from all 12 arguments. + +#### Strings as C-style String +Generally, strings should be passed into the `TRACEx` macros as pointers to +C-style strings (a null-terminated sequence of characters). For C++ +`std::strings`, [`c_str()`] can be used. It's recommended to document the +maximum expected string size if known. + + +[`c_str()`]: https://www.cplusplus.com/reference/string/string/c_str/ + + +## Listing available tracepoints + +Multiple tools can list the available tracepoints in a `bitcoind` binary with +USDT support. + +### GDB - GNU Project Debugger + +To list probes in Bitcoin Core, use `info probes` in `gdb`: + +``` +$ gdb ./src/bitcoind +… +(gdb) info probes +Type Provider Name Where Semaphore Object +stap net inbound_message 0x000000000014419e /src/bitcoind +stap net outbound_message 0x0000000000107c05 /src/bitcoind +stap validation block_connected 0x00000000002fb10c /src/bitcoind +… +``` + +### With `readelf` + +The `readelf` tool can be used to display the USDT tracepoints in Bitcoin Core. +Look for the notes with the description `NT_STAPSDT`. + +``` +$ readelf -n ./src/bitcoind | grep NT_STAPSDT -A 4 -B 2 +Displaying notes found in: .note.stapsdt + Owner Data size Description + stapsdt 0x0000005d NT_STAPSDT (SystemTap probe descriptors) + Provider: net + Name: outbound_message + Location: 0x0000000000107c05, Base: 0x0000000000579c90, Semaphore: 0x0000000000000000 + Arguments: -8@%r12 8@%rbx 8@%rdi 8@192(%rsp) 8@%rax 8@%rdx +… +``` + +### With `tplist` + +The `tplist` tool is provided by BCC (see [Installing BCC]). It displays kernel +tracepoints or USDT probes and their formats (for more information, see the +[`tplist` usage demonstration]). There are slight binary naming differences +between distributions. For example, on +[Ubuntu the binary is called `tplist-bpfcc`][ubuntu binary]. + +[Installing BCC]: https://github.com/iovisor/bcc/blob/master/INSTALL.md +[`tplist` usage demonstration]: https://github.com/iovisor/bcc/blob/master/tools/tplist_example.txt +[ubuntu binary]: https://github.com/iovisor/bcc/blob/master/INSTALL.md#ubuntu---binary + +``` +$ tplist -l ./src/bitcoind -v +b'net':b'outbound_message' [sema 0x0] + 1 location(s) + 6 argument(s) +… +``` diff --git a/src/net.cpp b/src/net.cpp index 396ccad5438f6..51a655fb51a88 100644 --- a/src/net.cpp +++ b/src/net.cpp @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include // for fDIP0001ActiveAtTip @@ -4170,10 +4171,16 @@ void CConnman::PushMessage(CNode* pnode, CSerializedNetMsg&& msg) { AssertLockNotHeld(m_total_bytes_sent_mutex); size_t nMessageSize = msg.data.size(); - LogPrint(BCLog::NET, "sending %s (%d bytes) peer=%d\n", SanitizeString(msg.m_type), nMessageSize, pnode->GetId()); - if (gArgs.GetBoolArg("-capturemessages", false)) { - CaptureMessage(pnode->addr, msg.m_type, msg.data, /* incoming */ false); - } + LogPrint(BCLog::NET, "sending %s (%d bytes) peer=%d\n", msg.m_type, nMessageSize, pnode->GetId()); + + TRACE6(net, outbound_message, + pnode->GetId(), + pnode->GetAddrName().c_str(), + pnode->ConnectionTypeAsString().c_str(), + msg.m_type.c_str(), + msg.data.size(), + msg.data.data() + ); // make sure we use the appropriate network transport format std::vector serializedHeader; diff --git a/src/net_processing.cpp b/src/net_processing.cpp index a55e8494bfa35..00b907cdde393 100644 --- a/src/net_processing.cpp +++ b/src/net_processing.cpp @@ -32,7 +32,7 @@ #include // For NDEBUG compile time check #include #include - +#include #include #include #include @@ -5011,6 +5011,15 @@ bool PeerManagerImpl::ProcessMessages(CNode* pfrom, std::atomic& interrupt } CNetMessage& msg(msgs.front()); + TRACE6(net, inbound_message, + pfrom->GetId(), + pfrom->GetAddrName().c_str(), + pfrom->ConnectionTypeAsString().c_str(), + msg.m_command.c_str(), + msg.m_recv.size(), + msg.m_recv.data() + ); + if (gArgs.GetBoolArg("-capturemessages", false)) { CaptureMessage(pfrom->addr, msg.m_type, MakeUCharSpan(msg.m_recv), /* incoming */ true); } diff --git a/src/validation.cpp b/src/validation.cpp index 93a80d5b169db..c3b3915fd207d 100644 --- a/src/validation.cpp +++ b/src/validation.cpp @@ -46,6 +46,7 @@ #include // For NDEBUG compile time check #include #include +#include #include #include #include @@ -2522,6 +2523,16 @@ bool CChainState::ConnectBlock(const CBlock& block, BlockValidationState& state, statsClient.gauge("blocks.tip.NumTransactions", block.vtx.size(), 1.0f); statsClient.gauge("blocks.tip.SigOps", nSigOps, 1.0f); + TRACE7(validation, block_connected, + block.GetHash().ToString().c_str(), + pindex->nHeight, + block.vtx.size(), + nInputs, + nSigOpsCost, + GetTimeMicros() - nTimeStart, // in microseconds (µs) + block.GetHash().data() + ); + return true; }