From 5b7b1e2ea371b9a9e4aeada530b1795efbef7c13 Mon Sep 17 00:00:00 2001 From: Gabriele Tornetta Date: Sun, 16 Oct 2022 14:43:36 +0100 Subject: [PATCH] refactor: libaustin --- .github/workflows/release.yml | 6 +- .github/workflows/release_arch.yml | 7 +- .github/workflows/tests.yml | 4 +- README.md | 72 ++++++++- configure.ac | 3 + include/libaustin.h | 178 ++++++++++++++++++++++ src/Makefile.am | 10 +- src/austin.c | 74 ++++++++- src/austin.h | 2 +- src/code.h | 37 +++++ src/error.h | 13 -- src/frame.h | 236 ++++++++++++++++++++++++++++ src/linux/common.h | 4 + src/logging.h | 9 ++ src/mac/py_proc.h | 4 - src/py_proc.c | 116 +++++++++++++- src/py_proc.h | 27 +++- src/py_string.h | 1 + src/py_thread.c | 33 +++- src/py_thread.h | 9 ++ src/stack.h | 237 +---------------------------- test/cunit/libaustin.py | 25 +++ test/cunit/test_libaustin.py | 66 ++++++++ test/libaustin/frame.c | 41 +++++ test/libaustin/where.c | 43 ++++++ 25 files changed, 991 insertions(+), 266 deletions(-) create mode 100644 include/libaustin.h create mode 100644 src/code.h create mode 100644 src/frame.h create mode 100644 test/cunit/libaustin.py create mode 100644 test/cunit/test_libaustin.py create mode 100644 test/libaustin/frame.c create mode 100644 test/libaustin/where.c diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index a939fd0b..dd633d31 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -28,6 +28,10 @@ jobs: pushd src tar -Jcf austin-$VERSION-gnu-linux-amd64.tar.xz austin tar -Jcf austinp-$VERSION-gnu-linux-amd64.tar.xz austinp + + cp ../include/libaustin.h . + cp .libs/libaustin.a . + tar -Jcf libaustin-$VERSION-gnu-linux-amd64.tar.xz libaustin.* popd # Build with musl @@ -40,7 +44,7 @@ jobs: uses: svenstaro/upload-release-action@v2 with: repo_token: ${{ secrets.GITHUB_TOKEN }} - file: src/austin-*.tar.xz + file: src/*austin-*.tar.xz tag: ${{ github.ref }} overwrite: true file_glob: true diff --git a/.github/workflows/release_arch.yml b/.github/workflows/release_arch.yml index edd6980d..334afa12 100644 --- a/.github/workflows/release_arch.yml +++ b/.github/workflows/release_arch.yml @@ -42,9 +42,14 @@ jobs: musl-gcc -O3 -Os -s -Wall -pthread *.c -o austin -D__MUSL__ tar -Jcf austin-$VERSION-musl-linux-${{ matrix.arch }}.tar.xz austin + cp ../include/libaustin.h . + cp .libs/libaustin.a . + tar -Jcf libaustin-$VERSION-gnu-linux-${{ matrix.arch }}.tar.xz libaustin.* + mv austin-$VERSION-gnu-linux-${{ matrix.arch }}.tar.xz /artifacts mv austinp-$VERSION-gnu-linux-${{ matrix.arch }}.tar.xz /artifacts mv austin-$VERSION-musl-linux-${{ matrix.arch }}.tar.xz /artifacts + mv libaustin-$VERSION-gnu-linux-${{ matrix.arch }}.tar.xz /artifacts popd - name: Show artifacts @@ -55,7 +60,7 @@ jobs: uses: svenstaro/upload-release-action@v2 with: repo_token: ${{ secrets.GITHUB_TOKEN }} - file: artifacts/austin-*.tar.xz + file: artifacts/*austin-*.tar.xz tag: ${{ github.ref }} overwrite: true file_glob: true diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index d105cd48..27ce6190 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -78,7 +78,7 @@ jobs: steps: - uses: actions/checkout@v2 - name: Compile Austin - run: gcc -Wall -O3 -g src/*.c -o src/austin + run: gcc -Wall -O3 -g src/*.c -Iinclude -o src/austin - name: Install Python uses: actions/setup-python@v4 @@ -123,7 +123,7 @@ jobs: - name: Compile Austin run: | gcc.exe --version - gcc.exe -O3 -g -o src/austin.exe src/*.c -lpsapi -lntdll -Wall + gcc.exe -O3 -g -o src/austin.exe src/*.c -Iinclude -lpsapi -lntdll -Wall src\austin.exe --help - name: Install Python diff --git a/README.md b/README.md index 5b50d1ce..44879414 100644 --- a/README.md +++ b/README.md @@ -276,19 +276,19 @@ git clone --depth=1 https://github.com/P403n1x87/austin.git On Linux, one can then use the command ~~~ console -gcc -O3 -Os -Wall -pthread src/*.c -o src/austin +gcc -O3 -Os -Wall -pthread src/*.c -Iinclude -o src/austin ~~~ whereas on macOS it is enough to run ~~~ console -gcc -O3 -Os -Wall src/*.c -o src/austin +gcc -O3 -Os -Wall src/*.c -Iinclude -o src/austin ~~~ On Windows, the `-lpsapi -lntdll` switches are needed ~~~ console -gcc -O3 -Os -Wall -lpsapi -lntdll src/*.c -o src/austin +gcc -O3 -Os -Wall -lpsapi -lntdll src/*.c -Iinclude -o src/austin ~~~ Add `-DDEBUG` if you need a more verbose log. This is useful if you encounter a @@ -503,7 +503,7 @@ sudo apt install libunwind-dev binutils-dev and compile with ~~~ console -gcc -O3 -Os -Wall -pthread src/*.c -DAUSTINP -lunwind-ptrace -lunwind-generic -lbfd -o src/austinp +gcc -O3 -Os -Wall -pthread src/*.c -Iinclude -DAUSTINP -lunwind-ptrace -lunwind-generic -lbfd -o src/austinp ~~~ then use as per normal. The extra `-k/--kernel` option is available with @@ -545,6 +545,70 @@ show both native and Python frames. Highlighting helps tell frames apart. The > executable will be available as `austin.p` from the command line. +## libaustin + +The code base can also be used to generate a library to embed Austin in your +projects to unwind Python stacks or extract frame information from a running +Python process. + +A shared library on Linux can be obtained with + +~~~ console +gcc -O3 -Wall -pthread src/*.c -Iinclude -o src/libaustin.so -shared -fPIC -DLIBAUSTIN +~~~ + +The required headers are in the `include/` subfolder. More information on how +to use the library can be found in the `libaustin.h` header file. + +At a glance, this is a typical use. The library needs to be initialised at +runtime before it can be used. Hence, before you call any other of the exported +functions, you must do + +~~~ c +#include + +... + +{ + ... + + austin_up(); + + ... +} +~~~ + +After this call, you can attach any Python process with + +~~~ c +pid_t pid = ...; +... +austin_handle_t proc_handle = austin_attach(pid); +~~~ + +When you want to sample the call stack of an attached process, call + +~~~ c +austin_sample(proc_handle, cb); +~~~ + +where `cb` is a callback function with signature `void ()(pid_t pid, pid_t tid)` +that gets called once a thread stack is ready to be retrieved. To get the frames +from the sampled stack, call `austin_pop_frame()` until it returns `NULL`. + +To sample a single thread, use `austin_sample_thread(proc_handle, tid)` instead. +Then retrieve the sampled stack with `austin_pop_frame()` as above. + +Once you are done with the process, you should detach it with + +~~~ c +austin_detach(proc_handle); +~~~ + +Once you are done with libaustin entirely, make sure to release resources with +`austin_down()`. + + ## Logging Austin uses `syslog` on Linux and macOS, and `%TEMP%\austin.log` on Windows diff --git a/configure.ac b/configure.ac index fa507967..1803a75b 100644 --- a/configure.ac +++ b/configure.ac @@ -16,6 +16,9 @@ AM_INIT_AUTOMAKE AC_PROG_CC_C99 AC_PROG_CPP +# Use libtool for libaustin. +LT_INIT + # Use the C language and compiler for the following checks AC_LANG([C]) diff --git a/include/libaustin.h b/include/libaustin.h new file mode 100644 index 00000000..a5fede9d --- /dev/null +++ b/include/libaustin.h @@ -0,0 +1,178 @@ +// This file is part of "austin" which is released under GPL. +// +// See file LICENCE or go to http://www.gnu.org/licenses/ for full license +// details. +// +// Austin is a Python frame stack sampler for CPython. +// +// Copyright (c) 2018 Gabriele N. Tornetta . +// All rights reserved. +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +#pragma once + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include + + +/** + * Austin stack callback. + * + * This function is called back once the unwinding of thread stack has been + * completed, and frames are ready to be retrieved with austin_pop_frame(). + * The callback is called with the following arguments: + * + * @param pid_t the PID of the process the thread belongs to. + * @param pid_t the TID of the thread. +*/ +typedef void (*austin_callback_t)(pid_t, pid_t); + + +/** + * Austin process handle. + * + * This is a handle to an attached process that is generally required to perfom + * unwinding operations. +*/ +typedef struct _py_proc_t * austin_handle_t; + + +/** + * The Austin frame structure. + * + * This structure is used to return frame stack information to the user. +*/ +typedef struct { + uintptr_t key; // a key that uniquely identifies the frame. + char * filename; // the file name of the source containing the code. + char * scope; // the name of the scope, e.g. the function name + unsigned int line; // the line number. +} austin_frame_t; + + +/** + * Initialise the Austin library. + * + * This function must be called before any other function in the library. +*/ +extern int +austin_up(); + + +/** + * Finalise the Austin library. + * + * This function should be called once the Austin library is no longer needed, + * to free up resources. +*/ +extern void +austin_down(); + + +/** + * Attach to a Python process. + * + * This function tries to attach to a running Python process, identified by its + * PID. If the process exists and is a valid Python process, a non-NULL handle + * is returned to the caller, to be used for further operations. + * + * Note that attaching to a Python process is a lightweight operation that does + * not interfere with the execution of the process in any way. + * + * @param pid_t The PID of the process to attach to. + * + * @return a handle to the process, or NULL if the process is not a valid + * Python process. +*/ +extern austin_handle_t +austin_attach(pid_t); + + +/** + * Detach from a Python process. + * + * This function detaches from a Python process, identified by its handle. +*/ +extern void +austin_detach(austin_handle_t); + + +/** + * Sample an attached Python process. + * + * This function samples the call stack of all the threads within the attached + * process. The passed callback function is called after every thread has been + * sampled. Frames are available to be retrieved with austin_pop_frame(). + * + * @param austin_handle_t the handle to the process to sample. + * @param austin_callback_t the callback function to call after sampling. + * + * @return 0 if the sampling was successful. +*/ +extern int +austin_sample(austin_handle_t, austin_callback_t); + + +/** + * Sample a single thread. + * + * This function samples the call stack of a single thread within the attached + * process. + * + * @param austin_handle_t the handle to the process to sample. + * @param pid_t the TID of the thread to sample. + * + * @return 0 if the sampling was successful. +*/ +extern int +austin_sample_thread(austin_handle_t, pid_t); + + +/** + * Pop a frame from the stack. + * + * This function pops a frame from the stack of the last sampled thread. This + * function should be called iteratively until it returns NULL, to retrieve all + * the frames in the stack. + * + * @return a valid reference to a frame structure, or NULL otherwise. +*/ +extern austin_frame_t * +austin_pop_frame(); + + +/** + * Read a single frame from the attached process. + * + * This function reads a single frame from an attached process, at the given + * remote memory location. This is useful if one is intercepting calls to, e.g. + * _PyEval_EvalFrameDefault and has access to the frame pointer (the second + * argument). This function can then be used to resolve the frame details. + * + * @param austin_handle_t the handle to the process. + * @param void * the remote memory location of the frame. + * + * @return a valid reference to a frame structure, or NULL otherwise. +*/ +extern austin_frame_t * +austin_read_frame(austin_handle_t, void *); + + +#ifdef __cplusplus +} +#endif diff --git a/src/Makefile.am b/src/Makefile.am index 92188e95..2b0efa31 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -20,7 +20,7 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see . -AM_CFLAGS = -I$(srcdir) -Wall -pthread +AM_CFLAGS = -I$(srcdir) -I$(srcdir)/../include -Wall -pthread OPT_FLAGS = -O3 STRIP_FLAGS = -Os -s @@ -39,6 +39,8 @@ man_MANS = austin.1 bin_PROGRAMS = austin +noinst_LTLIBRARIES = libaustin.la + # ---- Austin ---- austin_CFLAGS = $(AM_CFLAGS) $(OPT_FLAGS) $(STRIP_FLAGS) $(COVERAGE_FLAGS) $(DEBUG_OPTS) @@ -64,3 +66,9 @@ austinp_SOURCES = $(austin_SOURCES) austinp_CFLAGS = $(austin_CFLAGS) @AUSTINP_CFLAGS@ austinp_LDADD = @AUSTINP_LDADD@ endif + + +# ---- libaustin ---- + +libaustin_la_CFLAGS = $(austin_CFLAGS) -DLIBAUSTIN +libaustin_la_SOURCES = $(austin_SOURCES) diff --git a/src/austin.c b/src/austin.c index c1f9d73f..d87de8b5 100644 --- a/src/austin.c +++ b/src/austin.c @@ -30,7 +30,6 @@ #include #include -#include "argparse.h" #include "austin.h" #include "error.h" #include "events.h" @@ -45,11 +44,78 @@ #include "timing.h" #include "version.h" -#include "py_proc.h" #include "py_proc_list.h" #include "py_thread.h" +#if defined LIBAUSTIN + +#include "libaustin.h" + +int +austin_up() { + return py_thread_allocate(); +} + +void +austin_down() { + py_thread_free(); +} + +austin_handle_t +austin_attach(pid_t pid) { + py_proc_t * proc = py_proc_new(pid); + if (!isvalid(proc)) + return NULL; + + if (fail(py_proc__attach(proc, pid))) + goto failed; + + return (austin_handle_t) proc; + +failed: + py_proc__destroy(proc); + return NULL; +} + + +void +austin_detach(austin_handle_t proc) { + py_proc__destroy((py_proc_t *) proc); +} + +int +austin_sample(austin_handle_t proc, austin_callback_t callback) { + if (fail(py_proc__sample_cb((py_proc_t *) proc, callback))) + FAIL; + + SUCCESS; +} + +extern int +austin_sample_thread(austin_handle_t proc, pid_t tid) { + if (fail(py_proc__sample_thread((py_proc_t *) proc, tid))) + FAIL; + + SUCCESS; +} + +austin_frame_t * +austin_pop_frame() { + return (austin_frame_t *) py_thread_pop_frame(); +} + + +austin_frame_t * +austin_read_frame(austin_handle_t proc, void * frame_remote_address) { + return (austin_frame_t *) py_proc__read_frame((py_proc_t *) proc, frame_remote_address); +} + +#else /* LIBAUSTIN */ + +#include "argparse.h" + + // ---- SIGNAL HANDLING ------------------------------------------------------- static int interrupt = FALSE; @@ -405,4 +471,6 @@ int main(int argc, char ** argv) { return retval; } /* main */ -#endif +#endif /* LIBAUSTIN */ + +#endif /* AUSTIN_C */ diff --git a/src/austin.h b/src/austin.h index 75c1437d..1d3a75a6 100644 --- a/src/austin.h +++ b/src/austin.h @@ -35,4 +35,4 @@ print(f'#define VERSION "{version()}"') #define VERSION "3.4.0" // [[[end]]] -#endif +#endif // AUSTIN_H diff --git a/src/code.h b/src/code.h new file mode 100644 index 00000000..ea8ad1de --- /dev/null +++ b/src/code.h @@ -0,0 +1,37 @@ +// This file is part of "austin" which is released under GPL. +// +// See file LICENCE or go to http://www.gnu.org/licenses/ for full license +// details. +// +// Austin is a Python frame stack sampler for CPython. +// +// Copyright (c) 2018-2022 Gabriele N. Tornetta . +// All rights reserved. +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +#pragma once + +#include "py_string.h" + +#define _code__get_filename(self, pid, py_v) \ + _string_from_raddr(pid, *((void **)((void *)self + py_v->py_code.o_filename)), py_v) + +#define _code__get_name(self, pid, py_v) \ + _string_from_raddr(pid, *((void **)((void *)self + py_v->py_code.o_name)), py_v) + +#define _code__get_qualname(self, pid, py_v) \ + _string_from_raddr(pid, *((void **)((void *)self + py_v->py_code.o_qualname)), py_v) + +#define _code__get_lnotab(self, pid, len, py_v) \ + _bytes_from_raddr(pid, *((void **)((void *)self + py_v->py_code.o_lnotab)), len, py_v) diff --git a/src/error.h b/src/error.h index 7fd69e92..251ea914 100644 --- a/src/error.h +++ b/src/error.h @@ -25,9 +25,6 @@ #include -#include "logging.h" - - // generic messages #define EOK 0 #define EMMAP 1 @@ -106,15 +103,6 @@ error_get_msg(error_t); const int is_fatal(error_t); - -/** - * Log the last error - */ -#define log_error() { \ - ( is_fatal(austin_errno) ? log_f(get_last_error()) : log_e(get_last_error()) ); \ -} - - /** * Set and log the given error. * @@ -122,7 +110,6 @@ is_fatal(error_t); */ #define set_error(x) { \ austin_errno = (x); \ - log_error(); \ } #endif // ERROR_H diff --git a/src/frame.h b/src/frame.h new file mode 100644 index 00000000..ac0d2640 --- /dev/null +++ b/src/frame.h @@ -0,0 +1,236 @@ +// This file is part of "austin" which is released under GPL. +// +// See file LICENCE or go to http://www.gnu.org/licenses/ for full license +// details. +// +// Austin is a Python frame stack sampler for CPython. +// +// Copyright (c) 2018-2022 Gabriele N. Tornetta . +// All rights reserved. +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// You should have received a copy of the GNU General Public License +// along with this program. If not, see . + +#pragma once + +#include + +#include "cache.h" +#include "code.h" +#include "hints.h" +#include "libaustin.h" +#include "mojo.h" + + +typedef struct { + void * origin; + void * code; + int lasti; +} py_frame_t; + + +typedef austin_frame_t frame_t; + + +// ---------------------------------------------------------------------------- +static inline frame_t * +frame_new(key_dt key, char * filename, char * scope, unsigned int line) { + frame_t * frame = (frame_t *) malloc(sizeof(frame_t)); + if (!isvalid(frame)) { + return NULL; + } + + frame->key = key; + frame->filename = filename; + frame->scope = scope; + frame->line = line; + + return frame; +} + + +// ---------------------------------------------------------------------------- +static inline void +frame__destroy(frame_t * self) { + sfree(self); +} + + +#include "py_proc.h" + +#define py_frame_key(code, lasti) (((key_dt) code << 16) | lasti) +#define py_string_key(code, field) ((key_dt) *((void **) ((void *) &code + py_v->py_code.field))) + + +// ---------------------------------------------------------------------------- +static inline int +_read_varint(unsigned char * lnotab, size_t * i) { + int val = lnotab[++*i] & 63; + int shift = 0; + while (lnotab[*i] & 64) { + shift += 6; + val |= (lnotab[++*i] & 63) << shift; + } + return val; +} + + +// ---------------------------------------------------------------------------- +static inline int +_read_signed_varint(unsigned char * lnotab, size_t * i) { + int val = _read_varint(lnotab, i); + return (val & 1) ? -(val >> 1) : (val >> 1); +} + +// ---------------------------------------------------------------------------- +static inline frame_t * +_frame_from_code_raddr(py_proc_t * py_proc, void * code_raddr, int lasti, python_v * py_v) { + PyCodeObject code; + unsigned char * lnotab = NULL; + proc_ref_t pref = py_proc->proc_ref; + + if (fail(copy_py(pref, code_raddr, py_code, code))) { + log_ie("Cannot read remote PyCodeObject"); + return NULL; + } + + key_dt string_key = py_string_key(code, o_filename); + char * filename = lru_cache__maybe_hit(py_proc->string_cache, string_key); + if (!isvalid(filename)) { + filename = _code__get_filename(&code, pref, py_v); + if (!isvalid(filename)) { + log_ie("Cannot get file name from PyCodeObject"); + return NULL; + } + lru_cache__store(py_proc->string_cache, string_key, filename); + if (pargs.binary) { + mojo_string_event(filename, filename); + } + } + + string_key = V_MIN(3, 11) ? py_string_key(code, o_qualname) : py_string_key(code, o_name); + char * scope = lru_cache__maybe_hit(py_proc->string_cache, string_key); + if (!isvalid(scope)) { + scope = V_MIN(3, 11) + ? _code__get_qualname(&code, pref, py_v) + : _code__get_name(&code, pref, py_v); + if (!isvalid(scope)) { + log_ie("Cannot get scope name from PyCodeObject"); + return NULL; + } + lru_cache__store(py_proc->string_cache, string_key, scope); + if (pargs.binary) { + mojo_string_event(scope, scope); + } + } + + ssize_t len = 0; + int lineno = V_FIELD(unsigned int, code, py_code, o_firstlineno); + + if (V_MIN(3, 11)) { + lnotab = _code__get_lnotab(&code, pref, &len, py_v); + if (!isvalid(lnotab) || len == 0) { + log_ie("Cannot get line information from PyCodeObject"); + goto failed; + } + + lasti >>= 1; + + for (size_t i = 0, bc = 0; i < len; i++) { + bc += (lnotab[i] & 7) + 1; + int code = (lnotab[i] >> 3) & 15; + switch (code) { + case 15: + break; + + case 14: // Long form + lineno += _read_signed_varint(lnotab, &i); + _read_varint(lnotab, &i); // end line + _read_varint(lnotab, &i); // column + _read_varint(lnotab, &i); // end column + break; + + case 13: // No column data + lineno += _read_signed_varint(lnotab, &i); + break; + + case 12: // New lineno + case 11: + case 10: + lineno += code - 10; + i += 2; // skip column + end column + break; + + default: + i++; // skip column + } + + if (bc > lasti) + break; + } + } + else { + lnotab = _code__get_lnotab(&code, pref, &len, py_v); + if (!isvalid(lnotab) || len % 2) { + log_ie("Cannot get line information from PyCodeObject"); + goto failed; + } + + if (V_MIN(3, 10)) { + lasti <<= 1; + for (register int i = 0, bc = 0; i < len; i++) { + int sdelta = lnotab[i++]; + if (sdelta == 0xff) + break; + + bc += sdelta; + + int ldelta = lnotab[i]; + if (ldelta == 0x80) + ldelta = 0; + else if (ldelta > 0x80) + lineno -= 0x100; + + lineno += ldelta; + if (bc > lasti) + break; + } + } + else { // Python < 3.10 + for (register int i = 0, bc = 0; i < len; i++) { + bc += lnotab[i++]; + if (bc > lasti) + break; + + if (lnotab[i] >= 0x80) + lineno -= 0x100; + + lineno += lnotab[i]; + } + } + } + + free(lnotab); + + frame_t * frame = frame_new(py_frame_key(code_raddr, lasti), filename, scope, lineno); + if (!isvalid(frame)) { + log_e("Failed to create frame object"); + goto failed; + } + + return frame; + +failed: + sfree(lnotab); + + return NULL; +} \ No newline at end of file diff --git a/src/linux/common.h b/src/linux/common.h index 6e8bc8cb..5363e696 100644 --- a/src/linux/common.h +++ b/src/linux/common.h @@ -37,7 +37,11 @@ static uintptr_t _pthread_buffer[PTHREAD_BUFFER_ITEMS]; (copy_memory(pid, addr, sizeof(_pthread_buffer), _pthread_buffer)) +typedef pid_t proc_ref_t; + + struct _proc_extra_info { + pid_t proc_ref; unsigned int page_size; char statm_file[24]; pthread_t wait_thread_id; diff --git a/src/logging.h b/src/logging.h index db4cb8d1..44261878 100644 --- a/src/logging.h +++ b/src/logging.h @@ -29,6 +29,7 @@ #include "argparse.h" #include "austin.h" +#include "error.h" #define META_HEAD "# " @@ -126,4 +127,12 @@ logger_close(void); void log_meta_header(void); +/** + * Log the last error + */ +#define log_error() { \ + ( is_fatal(austin_errno) ? log_f(get_last_error()) : log_e(get_last_error()) ); \ +} + + #endif // LOGGING_H diff --git a/src/mac/py_proc.h b/src/mac/py_proc.h index b451bc1c..670e79ff 100644 --- a/src/mac/py_proc.h +++ b/src/mac/py_proc.h @@ -448,11 +448,7 @@ _py_proc__get_maps(py_proc_t * self) { while (mach_vm_region( self->proc_ref, - &address, &size, - VM_REGION_BASIC_INFO_64, - (vm_region_info_t) ®ion_info, - &count, &object_name ) == KERN_SUCCESS) { if ((void *) address < self->min_raddr) diff --git a/src/py_proc.c b/src/py_proc.c index d7bbcd31..1b02ac30 100644 --- a/src/py_proc.c +++ b/src/py_proc.c @@ -46,7 +46,6 @@ #include "hints.h" #include "logging.h" #include "mem.h" -#include "stack.h" #include "stats.h" #include "py_proc.h" @@ -1402,3 +1401,118 @@ py_proc__destroy(py_proc_t * self) { free(self); } + + +#ifdef LIBAUSTIN +int +py_proc__sample_cb(py_proc_t * self, void (*cb)(pid_t, pid_t)) { + V_DESC(self->py_v); + + PyInterpreterState is; + if (fail(py_proc__get_type(self, self->is_raddr, is))) { + FAIL; + } + + void * tstate_head = V_FIELD(void *, is, py_is, o_tstate_head); + if (isvalid(tstate_head)) { + raddr_t raddr = { .pref = self->proc_ref, .addr = tstate_head }; + py_thread_t py_thread; + + if (fail(py_thread__fill_from_raddr(&py_thread, &raddr, self))) { + if (is_fatal(austin_errno)) { + FAIL; + } + SUCCESS; + } + + do { + py_thread__unwind_stack(&py_thread); + cb(self->pid, py_thread.tid); + } while (success(py_thread__next(&py_thread))); + } + + SUCCESS; +} + +int +py_proc__sample_thread(py_proc_t * self, pid_t tid) { + V_DESC(self->py_v); + + PyInterpreterState is; + if (fail(py_proc__get_type(self, self->is_raddr, is))) { + FAIL; + } + + void * tstate_head = V_FIELD(void *, is, py_is, o_tstate_head); + if (isvalid(tstate_head)) { + raddr_t raddr = { .pref = self->proc_ref, .addr = tstate_head }; + py_thread_t py_thread; + + if (fail(py_thread__fill_from_raddr(&py_thread, &raddr, self))) { + if (is_fatal(austin_errno)) { + FAIL; + } + SUCCESS; + } + + do { + if (py_thread.tid == tid) { + py_thread__unwind_stack(&py_thread); + SUCCESS; + } + } while (success(py_thread__next(&py_thread))); + } + + FAIL; +} + + +// ---------------------------------------------------------------------------- +frame_t * +py_proc__read_frame(py_proc_t * proc, void * remote_address) { + V_DESC(proc->py_v); + + lru_cache_t * cache = proc->frame_cache; + + void * code = NULL; + int lasti = 0; + + if (V_MIN(3, 11)) { + PyInterpreterFrame iframe; + + if (fail(copy_py(proc->proc_ref, remote_address, py_iframe, iframe))) { + log_ie("Cannot read remote PyInterpreterFrame"); + return NULL; + } + + code = V_FIELD(void *, iframe, py_iframe, o_code); + lasti = ((int)(V_FIELD(void *, iframe, py_iframe, o_prev_instr) - code)) - py_v->py_code.o_code; + } + else { + PyFrameObject frame_obj; + + if (fail(copy_py(proc->proc_ref, remote_address, py_frame, frame_obj))) { + log_ie("Cannot read remote PyFrameObject"); + return NULL; + } + + code = V_FIELD(void *, frame_obj, py_frame, o_code); + lasti = V_FIELD(int , frame_obj, py_frame, o_lasti); + } + + key_dt frame_key = py_frame_key(code, lasti); + frame_t * frame = lru_cache__maybe_hit(cache, frame_key); + if (!isvalid(frame)) { + frame = _frame_from_code_raddr(proc, code, lasti, py_v); + if (!isvalid(frame)) { + log_ie("Failed to get frame from code object"); + return NULL; + } + lru_cache__store(cache, frame_key, frame); + } + + return frame; +} + + +#endif /* LIBAUSTIN */ \ No newline at end of file diff --git a/src/py_proc.h b/src/py_proc.h index 462ad174..2d943fa7 100644 --- a/src/py_proc.h +++ b/src/py_proc.h @@ -56,7 +56,7 @@ typedef struct { typedef struct _proc_extra_info proc_extra_info; // Forward declaration. -typedef struct { +typedef struct _py_proc_t { pid_t pid; proc_ref_t proc_ref; int child; @@ -233,4 +233,29 @@ py_proc__terminate(py_proc_t *); void py_proc__destroy(py_proc_t *); +#ifdef LIBAUSTIN +#include "frame.h" + +/** + * Sample the frame stack of each thread of the given Python process. + * + * @param py_proc_t * self. + * @param (void*) (*callback)(pid_t, pid_t) the callback function to cal + * when a thread stack is available. + + * @return 0 if the sampling succeeded; 1 otherwise. + */ +int +py_proc__sample_cb(py_proc_t *, void (*)(pid_t, pid_t)); + + +int +py_proc__sample_thread(py_proc_t *, pid_t); + + +frame_t * +py_proc__read_frame(py_proc_t *, void *); + +#endif + #endif // PY_PROC_H diff --git a/src/py_string.h b/src/py_string.h index 7914d0e8..3e41c1e4 100644 --- a/src/py_string.h +++ b/src/py_string.h @@ -26,6 +26,7 @@ #include #include +#include "error.h" #include "hints.h" #include "logging.h" #include "mem.h" diff --git a/src/py_thread.c b/src/py_thread.c index 0cd7a2ce..b21915d9 100644 --- a/src/py_thread.c +++ b/src/py_thread.c @@ -173,7 +173,7 @@ _py_thread__resolve_py_stack(py_thread_t * self) { frame_t * frame = lru_cache__maybe_hit(cache, frame_key); if (!isvalid(frame)) { - frame = _frame_from_code_raddr(self, py_frame.code, lasti, self->proc->py_v); + frame = _frame_from_code_raddr(self->proc, py_frame.code, lasti, self->proc->py_v); if (!isvalid(frame)) { log_ie("Failed to get frame from code object"); // Truncate the stack to the point where we have successfully resolved. @@ -1091,3 +1091,34 @@ py_thread_free(void) { sfree(_kstacks); #endif } + + +#ifdef LIBAUSTIN +void +py_thread__unwind_stack(py_thread_t * self) { + + V_DESC(self->proc->py_v); + + if (isvalid(self->top_frame)) { + if (V_MIN(3, 11)) { + if (fail(_py_thread__unwind_cframe_stack(self))) { + } + } + else { + if (fail(_py_thread__unwind_frame_stack(self))) { + } + } + + if (fail(_py_thread__resolve_py_stack(self))) { + } + } +} /* py_thread__unwind_stack */ + + +frame_t * +py_thread_pop_frame() { + return stack_is_empty() ? NULL : stack_pop(); +} /* py_thread_pop_frame */ + + +#endif /* LIBAUSTIN */ \ No newline at end of file diff --git a/src/py_thread.h b/src/py_thread.h index 31fd7206..59eb4d8b 100644 --- a/src/py_thread.h +++ b/src/py_thread.h @@ -26,6 +26,7 @@ #include #include +#include "frame.h" #include "mem.h" #include "py_proc.h" #include "stats.h" @@ -117,5 +118,13 @@ int py_thread__save_kernel_stack(py_thread_t *); #endif +#ifdef LIBAUSTIN +void +py_thread__unwind_stack(py_thread_t *); + +frame_t * +py_thread_pop_frame(); + +#endif // LIBAUSTIN #endif // PY_THREAD_H diff --git a/src/stack.h b/src/stack.h index 6e8dd9a9..ecfda8e4 100644 --- a/src/stack.h +++ b/src/stack.h @@ -26,30 +26,10 @@ #include #include -#include "cache.h" #include "hints.h" -#include "py_proc.h" -#include "py_string.h" -#include "py_thread.h" +#include "frame.h" #include "version.h" -typedef struct { - key_dt key; - char * filename; - char * scope; - unsigned int line; -} frame_t; - -#define py_frame_key(code, lasti) (((key_dt) code << 16) | lasti) -#define py_string_key(code, field) ((key_dt) *((void **) ((void *) &code + py_v->py_code.field))) - -#ifdef PY_THREAD_C - -typedef struct { - void * origin; - void * code; - int lasti; -} py_frame_t; typedef struct { size_t size; @@ -65,22 +45,8 @@ typedef struct { #endif } stack_dt; -static stack_dt * _stack; - -static inline frame_t * -frame_new(key_dt key, char * filename, char * scope, unsigned int line) { - frame_t * frame = (frame_t *) malloc(sizeof(frame_t)); - if (!isvalid(frame)) { - return NULL; - } - - frame->key = key; - frame->filename = filename; - frame->scope = scope; - frame->line = line; - return frame; -} +static stack_dt * _stack; static inline int stack_allocate(size_t size) { @@ -115,6 +81,8 @@ stack_deallocate(void) { #endif free(_stack); + + _stack = NULL; } #ifdef NATIVE @@ -179,201 +147,4 @@ stack_py_push(void * origin, void * code, int lasti) { #endif -// ---------------------------------------------------------------------------- -#define _code__get_filename(self, pid, py_v) \ - _string_from_raddr( \ - pid, *((void **) ((void *) self + py_v->py_code.o_filename)), py_v \ - ) - -#define _code__get_name(self, pid, py_v) \ - _string_from_raddr( \ - pid, *((void **) ((void *) self + py_v->py_code.o_name)), py_v \ - ) - -#define _code__get_qualname(self, pid, py_v) \ - _string_from_raddr( \ - pid, *((void **) ((void *) self + py_v->py_code.o_qualname)), py_v \ - ) - -#define _code__get_lnotab(self, pid, len, py_v) \ - _bytes_from_raddr( \ - pid, *((void **) ((void *) self + py_v->py_code.o_lnotab)), len, py_v \ - ) - - -// ---------------------------------------------------------------------------- -static inline int -_read_varint(unsigned char * lnotab, size_t * i) { - int val = lnotab[++*i] & 63; - int shift = 0; - while (lnotab[*i] & 64) { - shift += 6; - val |= (lnotab[++*i] & 63) << shift; - } - return val; -} - - -// ---------------------------------------------------------------------------- -static inline int -_read_signed_varint(unsigned char * lnotab, size_t * i) { - int val = _read_varint(lnotab, i); - return (val & 1) ? -(val >> 1) : (val >> 1); -} - -// ---------------------------------------------------------------------------- -static inline frame_t * -_frame_from_code_raddr(py_thread_t * py_thread, void * code_raddr, int lasti, python_v * py_v) { - PyCodeObject code; - unsigned char * lnotab = NULL; - py_proc_t * py_proc = py_thread->proc; - pid_t pid = py_thread->raddr.pref; - - if (fail(copy_py(pid, code_raddr, py_code, code))) { - log_ie("Cannot read remote PyCodeObject"); - return NULL; - } - - key_dt string_key = py_string_key(code, o_filename); - char * filename = lru_cache__maybe_hit(py_proc->string_cache, string_key); - if (!isvalid(filename)) { - filename = _code__get_filename(&code, pid, py_v); - if (!isvalid(filename)) { - log_ie("Cannot get file name from PyCodeObject"); - return NULL; - } - lru_cache__store(py_proc->string_cache, string_key, filename); - if (pargs.binary) { - mojo_string_event(filename, filename); - } - } - - string_key = V_MIN(3, 11) ? py_string_key(code, o_qualname) : py_string_key(code, o_name); - char * scope = lru_cache__maybe_hit(py_proc->string_cache, string_key); - if (!isvalid(scope)) { - scope = V_MIN(3, 11) - ? _code__get_qualname(&code, pid, py_v) - : _code__get_name(&code, pid, py_v); - if (!isvalid(scope)) { - log_ie("Cannot get scope name from PyCodeObject"); - return NULL; - } - lru_cache__store(py_proc->string_cache, string_key, scope); - if (pargs.binary) { - mojo_string_event(scope, scope); - } - } - - ssize_t len = 0; - int lineno = V_FIELD(unsigned int, code, py_code, o_firstlineno); - - if (V_MIN(3, 11)) { - lnotab = _code__get_lnotab(&code, pid, &len, py_v); - if (!isvalid(lnotab) || len == 0) { - log_ie("Cannot get line information from PyCodeObject"); - goto failed; - } - - lasti >>= 1; - - for (size_t i = 0, bc = 0; i < len; i++) { - bc += (lnotab[i] & 7) + 1; - int code = (lnotab[i] >> 3) & 15; - switch (code) { - case 15: - break; - - case 14: // Long form - lineno += _read_signed_varint(lnotab, &i); - _read_varint(lnotab, &i); // end line - _read_varint(lnotab, &i); // column - _read_varint(lnotab, &i); // end column - break; - - case 13: // No column data - lineno += _read_signed_varint(lnotab, &i); - break; - - case 12: // New lineno - case 11: - case 10: - lineno += code - 10; - i += 2; // skip column + end column - break; - - default: - i++; // skip column - } - - if (bc > lasti) - break; - } - } - else { - lnotab = _code__get_lnotab(&code, pid, &len, py_v); - if (!isvalid(lnotab) || len % 2) { - log_ie("Cannot get line information from PyCodeObject"); - goto failed; - } - - if (V_MIN(3, 10)) { - lasti <<= 1; - for (register int i = 0, bc = 0; i < len; i++) { - int sdelta = lnotab[i++]; - if (sdelta == 0xff) - break; - - bc += sdelta; - - int ldelta = lnotab[i]; - if (ldelta == 0x80) - ldelta = 0; - else if (ldelta > 0x80) - lineno -= 0x100; - - lineno += ldelta; - if (bc > lasti) - break; - } - } - else { // Python < 3.10 - for (register int i = 0, bc = 0; i < len; i++) { - bc += lnotab[i++]; - if (bc > lasti) - break; - - if (lnotab[i] >= 0x80) - lineno -= 0x100; - - lineno += lnotab[i]; - } - } - } - - free(lnotab); - - frame_t * frame = frame_new(py_frame_key(code_raddr, lasti), filename, scope, lineno); - if (!isvalid(frame)) { - log_e("Failed to create frame object"); - goto failed; - } - - return frame; - -failed: - sfree(lnotab); - - return NULL; -} - - -#endif // PY_THREAD_C - - -// ---------------------------------------------------------------------------- -static inline void -frame__destroy(frame_t * self) { - sfree(self); -} - #endif // STACK_H diff --git a/test/cunit/libaustin.py b/test/cunit/libaustin.py new file mode 100644 index 00000000..819d0b0d --- /dev/null +++ b/test/cunit/libaustin.py @@ -0,0 +1,25 @@ +import sys +import typing as t +from ctypes import CDLL, POINTER, Structure, c_char_p, c_int, c_ulong, c_void_p +from test.cunit import SRC +from types import ModuleType + +la = CDLL(str(SRC / "libaustin.so")) + + +class Frame(Structure): + _fields_ = [ + ("key", c_ulong), + ("filename", c_char_p), + ("scope", c_char_p), + ("line", c_int), + ] + + +la.austin_pop_frame.restype = POINTER(Frame) + +la.austin_read_frame.argtypes = [c_void_p, c_ulong] +la.austin_read_frame.restype = POINTER(Frame) + + +sys.modules[__name__] = t.cast(ModuleType, la) diff --git a/test/cunit/test_libaustin.py b/test/cunit/test_libaustin.py new file mode 100644 index 00000000..dbe263dd --- /dev/null +++ b/test/cunit/test_libaustin.py @@ -0,0 +1,66 @@ +import ctypes as c +import os +import sys +import test.cunit.libaustin as la +from test.utils import requires_sudo + +import pytest + + +@pytest.fixture +def handle(): + la.austin_up() + + pid = os.getpid() + handle = la.austin_attach(pid) + + assert handle + + yield handle + + la.austin_detach(handle) + + la.austin_down() + + +def test_libaustin_up_down(): + for _ in range(10): + la.austin_up() + la.austin_down() + + +@requires_sudo +def test_libaustin_sample(handle): + last_frame = None + seen_pid = None + + @c.CFUNCTYPE(None, c.c_int, c.c_int) + def cb(pid, tid): + nonlocal last_frame, seen_pid + + seen_pid = pid + while True: + frame = la.austin_pop_frame() + if not frame: + return + + last_frame = frame.contents + + la.austin_sample(handle, cb) + + frame = sys._getframe() + + assert seen_pid == os.getpid() + + assert last_frame.filename.decode() == frame.f_code.co_filename + assert last_frame.scope.decode() == frame.f_code.co_name + + +@requires_sudo +def test_libaustin_read_frame(handle): + frame = sys._getframe() + + austin_frame = la.austin_read_frame(handle, id(frame)).contents + + assert austin_frame.filename.decode() == frame.f_code.co_filename + assert austin_frame.scope.decode() == frame.f_code.co_name diff --git a/test/libaustin/frame.c b/test/libaustin/frame.c new file mode 100644 index 00000000..44d94fea --- /dev/null +++ b/test/libaustin/frame.c @@ -0,0 +1,41 @@ +#include +#include +#include + +#include "libaustin.h" + +int main(int argc, char **argv) +{ + if (argc != 3) + { + fprintf(stderr, "Usage: frame \n"); + return -1; + } + + pid_t pid = atoi(argv[1]); + void *frame_addr = (void *)atoll(argv[2]); + + if (austin_up() != 0) + { + perror("austin_up() failed"); + return -1; + } + + austin_handle_t proc_handle = austin_attach(pid); + + if (proc_handle == NULL) + { + perror("Failed to attach to process"); + return -1; + } + + austin_frame_t *frame = austin_read_frame(proc_handle, frame_addr); + + printf("%s (%s:%d)\n", frame->scope, frame->filename, frame->line); + + austin_detach(proc_handle); + + austin_down(); + + return 0; +} diff --git a/test/libaustin/where.c b/test/libaustin/where.c new file mode 100644 index 00000000..13c56c7a --- /dev/null +++ b/test/libaustin/where.c @@ -0,0 +1,43 @@ +#include +#include +#include + +#include "libaustin.h" + +void unwind(pid_t pid, pid_t tid) { + printf("\n\npid: %d, tid: %d\n\n", pid, tid); + + austin_frame_t *frame = NULL; + while ((frame = austin_pop_frame()) != NULL) { + printf(" %s (%s:%d)\n", frame->scope, frame->filename, frame->line); + } +} + +int main(int argc, char **argv) { + if (argc != 2) { + fprintf(stderr, "Usage: test_libaustin \n"); + return -1; + } + + pid_t pid = atoi(argv[1]); + + if (austin_up() != 0) { + perror("austin_up() failed"); + return -1; + } + + austin_handle_t proc_handle = austin_attach(pid); + + if (proc_handle == NULL) { + perror("Failed to attach to process"); + return -1; + } + + austin_sample(proc_handle, unwind); + + austin_detach(proc_handle); + + austin_down(); + + return 0; +}