diff --git a/README.md b/README.md
index f9bb756..230e544 100644
--- a/README.md
+++ b/README.md
@@ -58,6 +58,7 @@ options:
-c, --cpu sample on-CPU stacks only
-x EXPOSURE, --exposure EXPOSURE
exposure time, in seconds
+ -m, --memory Collect memory allocation events
-n, --native sample native stacks
-o OUTPUT, --output OUTPUT
output location (can use %(pid) to insert the process ID)
@@ -101,6 +102,25 @@ You can normally send a `SIGQUIT` signal with the CTRL+\\
key combination.
+## Memory mode
+
+Besides wall time and CPU time, Echion can be used to profile memory
+allocations. In this mode, Echion tracks the Python memory domain allocators and
+accounts for each single event. Because of the tracing nature, this mode
+introduces considerable overhead, but gives pretty accurate results that can be
+used to investigate potential memory leaks. To fully understand that data that
+is collected in this mode, one should be aware of how Echion tracks allocations
+and deallocations. When an allocation is made, Echion records the frame stack
+that was involved and maps it to the returned memory address. When a
+deallocation for a tracked memory address is made, the freed memory is accounted
+for the same stack. Therefore, objects that are allocated and deallocated during
+the tracking period account for a total of 0 allocated bytes. This means that
+all the non-negative values reported by Echion represent memory that was still
+allocated by the time the tracking ended.
+
+*Since Echion 0.3.0*.
+
+
## Why Echion?
Sampling in-process comes with some benefits. One has easier access to more
diff --git a/echion/__main__.py b/echion/__main__.py
index e7693ce..f342fee 100644
--- a/echion/__main__.py
+++ b/echion/__main__.py
@@ -114,6 +114,12 @@ def main() -> None:
help="exposure time, in seconds",
type=int,
)
+ parser.add_argument(
+ "-m",
+ "--memory",
+ help="Collect memory allocation events",
+ action="store_true",
+ )
parser.add_argument(
"-n",
"--native",
@@ -165,6 +171,7 @@ def main() -> None:
env["ECHION_INTERVAL"] = str(args.interval)
env["ECHION_CPU"] = str(int(bool(args.cpu)))
+ env["ECHION_MEMORY"] = str(int(bool(args.memory)))
env["ECHION_NATIVE"] = str(int(bool(args.native)))
env["ECHION_OUTPUT"] = args.output.replace("%%(pid)", str(os.getpid()))
env["ECHION_STEALTH"] = str(int(bool(args.stealth)))
diff --git a/echion/bootstrap/__init__.py b/echion/bootstrap/__init__.py
index 056558d..35d8a2a 100644
--- a/echion/bootstrap/__init__.py
+++ b/echion/bootstrap/__init__.py
@@ -32,6 +32,7 @@ def start():
# Set the configuration
ec.set_interval(int(os.getenv("ECHION_INTERVAL", 1000)))
ec.set_cpu(bool(int(os.getenv("ECHION_CPU", 0))))
+ ec.set_memory(bool(int(os.getenv("ECHION_MEMORY", 0))))
ec.set_native(bool(int(os.getenv("ECHION_NATIVE", 0))))
ec.set_where(bool(int(os.getenv("ECHION_WHERE", 0) or 0)))
diff --git a/echion/config.h b/echion/config.h
index dfcb66f..852f9c8 100644
--- a/echion/config.h
+++ b/echion/config.h
@@ -17,8 +17,8 @@ static unsigned int interval = 1000;
// CPU Time mode
static int cpu = 0;
-// Output stream
-static std::ofstream output;
+// Memory events
+static int memory = 0;
// Native stack sampling
static int native = 0;
@@ -55,6 +55,19 @@ set_cpu(PyObject *Py_UNUSED(m), PyObject *args)
Py_RETURN_NONE;
}
+// ----------------------------------------------------------------------------
+static PyObject *
+set_memory(PyObject *Py_UNUSED(m), PyObject *args)
+{
+ int new_memory;
+ if (!PyArg_ParseTuple(args, "p", &new_memory))
+ return NULL;
+
+ memory = new_memory;
+
+ Py_RETURN_NONE;
+}
+
// ----------------------------------------------------------------------------
static PyObject *
set_native(PyObject *Py_UNUSED(m), PyObject *args)
diff --git a/echion/core.pyi b/echion/core.pyi
index 51f7854..9ab40a0 100644
--- a/echion/core.pyi
+++ b/echion/core.pyi
@@ -21,6 +21,7 @@ def init_asyncio(
# Configuration interface
def set_interval(interval: int) -> None: ...
def set_cpu(cpu: bool) -> None: ...
+def set_memory(memory: bool) -> None: ...
def set_native(native: bool) -> None: ...
def set_where(where: bool) -> None: ...
def set_pipe_name(name: str) -> None: ...
diff --git a/echion/coremodule.cc b/echion/coremodule.cc
index c5238c4..42782fe 100644
--- a/echion/coremodule.cc
+++ b/echion/coremodule.cc
@@ -28,6 +28,8 @@
#include
#include
+#include
+#include
#include
#include
#include
@@ -104,18 +106,72 @@ _start()
{
init_frame_cache(MAX_FRAMES * (1 + native));
+ try
+ {
+ mojo.open();
+ }
+ catch (MojoWriter::Error &)
+ {
+ return;
+ }
+
install_signals();
#if defined PL_DARWIN
// Get the wall time clock resource.
host_get_clock_service(mach_host_self(), CALENDAR_CLOCK, &cclock);
#endif
+
+ if (where)
+ {
+ std::ofstream pipe(pipe_name, std::ios::out);
+
+ if (pipe)
+ do_where(pipe);
+
+ else
+ std::cerr << "Failed to open pipe " << pipe_name << std::endl;
+
+ running = 0;
+
+ return;
+ }
+
+ setup_where();
+
+ mojo.header();
+
+ if (memory)
+ {
+ mojo.metadata("mode", "memory");
+ }
+ else
+ {
+ mojo.metadata("mode", (cpu ? "cpu" : "wall"));
+ }
+ mojo.metadata("interval", std::to_string(interval));
+ mojo.metadata("sampler", "echion");
+
+ // DEV: Workaround for the austin-python library: we send an empty sample
+ // to set the PID. We also map the key value 0 to the empty string, to
+ // support task name frames.
+ mojo.stack(pid, 0, "MainThread");
+ mojo.string(0, "");
+ mojo.string(1, "");
+ mojo.string(2, "");
+ mojo.metric_time(0);
+
+ if (memory)
+ setup_memory();
}
// ----------------------------------------------------------------------------
static inline void
_stop()
{
+ if (memory)
+ teardown_memory();
+
// Clean up the thread info map. When not running async, we need to guard
// the map lock because we are not in control of the sampling thread.
{
@@ -125,12 +181,16 @@ _stop()
string_table.clear();
}
+ teardown_where();
+
#if defined PL_DARWIN
mach_port_deallocate(mach_task_self(), cclock);
#endif
restore_signals();
+ mojo.close();
+
reset_frame_cache();
}
@@ -142,71 +202,37 @@ _sampler()
// hold:
// 1. The interpreter state object lives as long as the process itself.
- if (where)
- {
- std::ofstream pipe(pipe_name, std::ios::out);
-
- if (pipe)
- do_where(pipe);
-
- else
- std::cerr << "Failed to open pipe " << pipe_name << std::endl;
-
- running = 0;
-
- return;
- }
-
- setup_where();
-
last_time = gettime();
- output.open(std::getenv("ECHION_OUTPUT"));
- if (!output.is_open())
- {
- std::cerr << "Failed to open output file " << std::getenv("ECHION_OUTPUT") << std::endl;
- return;
- }
-
- mojo_header();
-
- mojo_metadata("mode", (cpu ? "cpu" : "wall"));
- mojo_metadata("interval", interval);
- mojo_metadata("sampler", "echion");
-
- // DEV: Workaround for the austin-python library: we send an empty sample
- // to set the PID. We also map the key value 0 to the empty string, to
- // support task name frames.
- mojo_stack(pid, 0, "");
- mojo_string_event(0, "");
- mojo_string_event(1, "");
- mojo_string_event(2, "");
- mojo_metric_time(0);
-
while (running)
{
microsecond_t now = gettime();
microsecond_t end_time = now + interval;
- microsecond_t wall_time = now - last_time;
- for_each_interp(
- [=](PyInterpreterState *interp) -> void
- {
- for_each_thread(
- interp,
- [=](PyThreadState *tstate, ThreadInfo &thread)
- { thread.sample(interp->id, tstate, wall_time); });
- });
+ if (memory)
+ {
+ if (rss_tracker.check())
+ stack_stats.flush();
+ }
+ else
+ {
+ microsecond_t wall_time = now - last_time;
+
+ for_each_interp(
+ [=](PyInterpreterState *interp) -> void
+ {
+ for_each_thread(
+ interp,
+ [=](PyThreadState *tstate, ThreadInfo &thread)
+ { thread.sample(interp->id, tstate, wall_time); });
+ });
+ }
while (gettime() < end_time && running)
sched_yield();
last_time = now;
}
-
- output.close();
-
- teardown_where();
}
static void
@@ -400,6 +426,7 @@ static PyMethodDef echion_core_methods[] = {
// Configuration interface
{"set_interval", set_interval, METH_VARARGS, "Set the sampling interval"},
{"set_cpu", set_cpu, METH_VARARGS, "Set whether to use CPU time instead of wall time"},
+ {"set_memory", set_memory, METH_VARARGS, "Set whether to sample memory usage"},
{"set_native", set_native, METH_VARARGS, "Set whether to sample the native stacks"},
{"set_where", set_where, METH_VARARGS, "Set whether to use where mode"},
{"set_pipe_name", set_pipe_name, METH_VARARGS, "Set the pipe name"},
diff --git a/echion/frame.h b/echion/frame.h
index 85b8d86..cafc094 100644
--- a/echion/frame.h
+++ b/echion/frame.h
@@ -27,6 +27,35 @@
#include
#include
+// ----------------------------------------------------------------------------
+#if PY_VERSION_HEX >= 0x030b0000
+static inline int
+_read_varint(unsigned char *table, ssize_t size, ssize_t *i)
+{
+ ssize_t guard = size - 1;
+ if (*i >= guard)
+ return 0;
+
+ int val = table[++*i] & 63;
+ int shift = 0;
+ while (table[*i] & 64 && *i < guard)
+ {
+ shift += 6;
+ val |= (table[++*i] & 63) << shift;
+ }
+ return val;
+}
+
+// ----------------------------------------------------------------------------
+static inline int
+_read_signed_varint(unsigned char *table, ssize_t size, ssize_t *i)
+{
+ int val = _read_varint(table, size, i);
+ return (val & 1) ? -(val >> 1) : (val >> 1);
+}
+#endif
+
+// ----------------------------------------------------------------------------
class Frame
{
public:
@@ -34,6 +63,7 @@ class Frame
using Ptr = std::unique_ptr;
using Key = uintptr_t;
+ // ------------------------------------------------------------------------
class Error : public std::exception
{
public:
@@ -43,6 +73,7 @@ class Frame
}
};
+ // ------------------------------------------------------------------------
class LocationError : public Error
{
public:
@@ -52,6 +83,8 @@ class Frame
}
};
+ // ------------------------------------------------------------------------
+
Key cache_key = 0;
StringTable::Key filename = 0;
StringTable::Key name = 0;
@@ -63,11 +96,87 @@ class Frame
int column = 0;
int column_end = 0;
} location;
+
#if PY_VERSION_HEX >= 0x030b0000
bool is_entry = false;
#endif
- void render(std::ostream &stream)
+ // ------------------------------------------------------------------------
+
+ Frame(StringTable::Key name) : name(name){};
+
+ static Frame &read(PyObject *frame_addr, PyObject **prev_addr);
+
+ static Frame &get(PyCodeObject *code_addr, int lasti);
+ static Frame &get(PyObject *frame);
+ static Frame &get(unw_cursor_t &cursor);
+ static Frame &get(StringTable::Key name);
+
+ // ------------------------------------------------------------------------
+ Frame(PyObject *frame)
+ {
+#if PY_VERSION_HEX >= 0x030b0000
+ const _PyInterpreterFrame *iframe = (_PyInterpreterFrame *)frame;
+ const int lasti = _PyInterpreterFrame_LASTI(iframe);
+ PyCodeObject *code = iframe->f_code;
+
+ PyCode_Addr2Location(code, lasti << 1, &location.line, &location.column, &location.line_end, &location.column_end);
+ location.column++;
+ location.column_end++;
+ name = string_table.key_unsafe(code->co_qualname);
+#if PY_VERSION_HEX >= 0x030c0000
+ is_entry = (iframe->owner == FRAME_OWNED_BY_CSTACK); // Shim frame
+#else
+ is_entry = iframe->is_entry;
+#endif
+
+#else
+ PyFrameObject *py_frame = (PyFrameObject *)frame;
+ const int lasti = py_frame->f_lasti;
+ PyCodeObject *code = py_frame->f_code;
+
+ location.line = PyFrame_GetLineNumber(py_frame);
+ name = string_table.key_unsafe(code->co_name);
+#endif
+ filename = string_table.key_unsafe(code->co_filename);
+ }
+
+ // ------------------------------------------------------------------------
+ Frame(PyCodeObject *code, int lasti)
+ {
+ try
+ {
+ filename = string_table.key(code->co_filename);
+#if PY_VERSION_HEX >= 0x030b0000
+ name = string_table.key(code->co_qualname);
+#else
+ name = string_table.key(code->co_name);
+#endif
+ }
+ catch (StringTable::Error &)
+ {
+ throw Error();
+ }
+
+ infer_location(code, lasti);
+ }
+
+ // ------------------------------------------------------------------------
+ Frame(unw_cursor_t &cursor, unw_word_t pc)
+ {
+ try
+ {
+ filename = string_table.key(pc);
+ name = string_table.key(cursor);
+ }
+ catch (StringTable::Error &)
+ {
+ throw Error();
+ }
+ }
+
+ // ------------------------------------------------------------------------
+ void inline render(std::ostream &stream)
{
stream
<< ";" << string_table.lookup(filename)
@@ -75,6 +184,7 @@ class Frame
<< ":" << location.line;
}
+ // ------------------------------------------------------------------------
void render_where(std::ostream &stream)
{
if ((string_table.lookup(filename)).rfind("native@", 0) == 0)
@@ -89,235 +199,223 @@ class Frame
<< "\033[0m)" << std::endl;
}
- Frame(StringTable::Key name) : name(name){};
-
- static Frame &read(PyObject *, PyObject **);
+ // ------------------------------------------------------------------------
static Frame &read(PyObject *frame_addr)
{
PyObject *unused;
- return Frame::read(frame_addr, &unused);
+ return read(frame_addr, &unused);
}
- static Frame &get(PyCodeObject *, int);
- static Frame &get(unw_cursor_t &);
- static Frame &get(StringTable::Key);
-
- Frame(PyCodeObject *, int);
- Frame(unw_cursor_t &, unw_word_t);
-
private:
- void infer_location(PyCodeObject *, int);
-
- static inline Key key(PyCodeObject *code, int lasti)
+ // ------------------------------------------------------------------------
+ void inline infer_location(PyCodeObject *code, int lasti)
{
- return (((uintptr_t)(((uintptr_t)code) & MOJO_INT32) << 16) | lasti);
- }
-};
-
-static auto INVALID_FRAME = Frame(StringTable::INVALID);
-static auto UNKNOWN_FRAME = Frame(StringTable::UNKNOWN);
+ unsigned int lineno = code->co_firstlineno;
+ Py_ssize_t len = 0;
#if PY_VERSION_HEX >= 0x030b0000
-// ----------------------------------------------------------------------------
-static inline int
-_read_varint(unsigned char *table, ssize_t size, ssize_t *i)
-{
- ssize_t guard = size - 1;
- if (*i >= guard)
- return 0;
+ auto table = pybytes_to_bytes_and_size(code->co_linetable, &len);
+ if (table == nullptr)
+ throw LocationError();
- int val = table[++*i] & 63;
- int shift = 0;
- while (table[*i] & 64 && *i < guard)
- {
- shift += 6;
- val |= (table[++*i] & 63) << shift;
- }
- return val;
-}
-
-// ----------------------------------------------------------------------------
-static inline int
-_read_signed_varint(unsigned char *table, ssize_t size, ssize_t *i)
-{
- int val = _read_varint(table, size, i);
- return (val & 1) ? -(val >> 1) : (val >> 1);
-}
-#endif
-
-// ----------------------------------------------------------------------------
-void Frame::infer_location(PyCodeObject *code, int lasti)
-{
- unsigned int lineno = code->co_firstlineno;
- Py_ssize_t len = 0;
+ auto table_data = table.get();
-#if PY_VERSION_HEX >= 0x030b0000
- auto table = pybytes_to_bytes_and_size(code->co_linetable, &len);
- if (table == nullptr)
- throw LocationError();
-
- auto table_data = table.get();
-
- for (Py_ssize_t i = 0, bc = 0; i < len; i++)
- {
- bc += (table[i] & 7) + 1;
- int code = (table[i] >> 3) & 15;
- unsigned char next_byte = 0;
- switch (code)
+ for (Py_ssize_t i = 0, bc = 0; i < len; i++)
{
- case 15:
- break;
+ bc += (table[i] & 7) + 1;
+ int code = (table[i] >> 3) & 15;
+ unsigned char next_byte = 0;
+ switch (code)
+ {
+ case 15:
+ break;
- case 14: // Long form
- lineno += _read_signed_varint(table_data, len, &i);
+ case 14: // Long form
+ lineno += _read_signed_varint(table_data, len, &i);
- this->location.line = lineno;
- this->location.line_end = lineno + _read_varint(table_data, len, &i);
- this->location.column = _read_varint(table_data, len, &i);
- this->location.column_end = _read_varint(table_data, len, &i);
+ this->location.line = lineno;
+ this->location.line_end = lineno + _read_varint(table_data, len, &i);
+ this->location.column = _read_varint(table_data, len, &i);
+ this->location.column_end = _read_varint(table_data, len, &i);
- break;
+ break;
- case 13: // No column data
- lineno += _read_signed_varint(table_data, len, &i);
+ case 13: // No column data
+ lineno += _read_signed_varint(table_data, len, &i);
- this->location.line = lineno;
- this->location.line_end = lineno;
- this->location.column = this->location.column_end = 0;
+ this->location.line = lineno;
+ this->location.line_end = lineno;
+ this->location.column = this->location.column_end = 0;
- break;
+ break;
- case 12: // New lineno
- case 11:
- case 10:
- if (i >= len - 2)
- throw LocationError();
+ case 12: // New lineno
+ case 11:
+ case 10:
+ if (i >= len - 2)
+ throw LocationError();
- lineno += code - 10;
+ lineno += code - 10;
- this->location.line = lineno;
- this->location.line_end = lineno;
- this->location.column = 1 + table[++i];
- this->location.column_end = 1 + table[++i];
+ this->location.line = lineno;
+ this->location.line_end = lineno;
+ this->location.column = 1 + table[++i];
+ this->location.column_end = 1 + table[++i];
- break;
+ break;
- default:
- if (i >= len - 1)
- throw LocationError();
+ default:
+ if (i >= len - 1)
+ throw LocationError();
- next_byte = table[++i];
+ next_byte = table[++i];
- this->location.line = lineno;
- this->location.line_end = lineno;
- this->location.column = 1 + (code << 3) + ((next_byte >> 4) & 7);
- this->location.column_end = this->location.column + (next_byte & 15);
- }
+ this->location.line = lineno;
+ this->location.line_end = lineno;
+ this->location.column = 1 + (code << 3) + ((next_byte >> 4) & 7);
+ this->location.column_end = this->location.column + (next_byte & 15);
+ }
- if (bc > lasti)
- break;
- }
+ if (bc > lasti)
+ break;
+ }
#elif PY_VERSION_HEX >= 0x030a0000
- auto table = pybytes_to_bytes_and_size(code->co_linetable, &len);
- if (table == nullptr)
- throw LocationError();
+ auto table = pybytes_to_bytes_and_size(code->co_linetable, &len);
+ if (table == nullptr)
+ throw LocationError();
- lasti <<= 1;
- for (int i = 0, bc = 0; i < len; i++)
- {
- int sdelta = table[i++];
- if (sdelta == 0xff)
- break;
+ lasti <<= 1;
+ for (int i = 0, bc = 0; i < len; i++)
+ {
+ int sdelta = table[i++];
+ if (sdelta == 0xff)
+ break;
- bc += sdelta;
+ bc += sdelta;
- int ldelta = table[i];
- if (ldelta == 0x80)
- ldelta = 0;
- else if (ldelta > 0x80)
- lineno -= 0x100;
+ int ldelta = table[i];
+ if (ldelta == 0x80)
+ ldelta = 0;
+ else if (ldelta > 0x80)
+ lineno -= 0x100;
- lineno += ldelta;
- if (bc > lasti)
- break;
- }
+ lineno += ldelta;
+ if (bc > lasti)
+ break;
+ }
#else
- auto table = pybytes_to_bytes_and_size(code->co_lnotab, &len);
- if (table == nullptr)
- throw LocationError();
+ auto table = pybytes_to_bytes_and_size(code->co_lnotab, &len);
+ if (table == nullptr)
+ throw LocationError();
- for (int i = 0, bc = 0; i < len; i++)
- {
- bc += table[i++];
- if (bc > lasti)
- break;
+ for (int i = 0, bc = 0; i < len; i++)
+ {
+ bc += table[i++];
+ if (bc > lasti)
+ break;
- if (table[i] >= 0x80)
- lineno -= 0x100;
+ if (table[i] >= 0x80)
+ lineno -= 0x100;
- lineno += table[i];
- }
+ lineno += table[i];
+ }
#endif
- this->location.line = lineno;
- this->location.line_end = lineno;
- this->location.column = 0;
- this->location.column_end = 0;
-}
-
-// ----------------------------------------------------------------------------
-Frame::Frame(PyCodeObject *code, int lasti)
-{
- try
- {
- filename = string_table.key(code->co_filename);
-#if PY_VERSION_HEX >= 0x030b0000
- name = string_table.key(code->co_qualname);
-#else
- name = string_table.key(code->co_name);
-#endif
- }
- catch (StringTable::Error &)
- {
- throw Error();
+ this->location.line = lineno;
+ this->location.line_end = lineno;
+ this->location.column = 0;
+ this->location.column_end = 0;
}
- infer_location(code, lasti);
-}
-
-Frame::Frame(unw_cursor_t &cursor, unw_word_t pc)
-{
- try
+ // ------------------------------------------------------------------------
+ static inline Key key(PyCodeObject *code, int lasti)
{
- filename = string_table.key(pc);
- name = string_table.key(cursor);
+ return (((uintptr_t)(((uintptr_t)code) & MOJO_INT32) << 16) | lasti);
}
- catch (StringTable::Error &)
+
+ // ------------------------------------------------------------------------
+ static inline Key key(PyObject *frame)
{
- throw Error();
+#if PY_VERSION_HEX >= 0x030b0000
+ const _PyInterpreterFrame *iframe = (_PyInterpreterFrame *)frame;
+ const int lasti = _PyInterpreterFrame_LASTI(iframe);
+ PyCodeObject *code = iframe->f_code;
+#else
+ const PyFrameObject *py_frame = (PyFrameObject *)frame;
+ const int lasti = py_frame->f_lasti;
+ PyCodeObject *code = py_frame->f_code;
+#endif
+ return key(code, lasti);
}
-}
+};
// ----------------------------------------------------------------------------
+static auto INVALID_FRAME = Frame(StringTable::INVALID);
+static auto UNKNOWN_FRAME = Frame(StringTable::UNKNOWN);
+
// We make this a raw pointer to prevent its destruction on exit, since we
// control the lifetime of the cache.
static LRUCache *frame_cache = nullptr;
+// ----------------------------------------------------------------------------
static void init_frame_cache(size_t capacity)
{
frame_cache = new LRUCache(capacity);
}
+// ----------------------------------------------------------------------------
static void reset_frame_cache()
{
delete frame_cache;
frame_cache = nullptr;
}
+// ------------------------------------------------------------------------
+Frame &Frame::read(PyObject *frame_addr, PyObject **prev_addr)
+{
+#if PY_VERSION_HEX >= 0x030b0000
+ _PyInterpreterFrame iframe;
+
+ if (copy_type(frame_addr, iframe))
+ throw Error();
+
+ // We cannot use _PyInterpreterFrame_LASTI because _PyCode_CODE reads
+ // from the code object.
+ const int lasti = ((int)(iframe.prev_instr - (_Py_CODEUNIT *)(iframe.f_code))) - offsetof(PyCodeObject, co_code_adaptive) / sizeof(_Py_CODEUNIT);
+ auto &frame = Frame::get(iframe.f_code, lasti);
+
+ if (&frame != &INVALID_FRAME)
+ {
+#if PY_VERSION_HEX >= 0x030c0000
+ frame.is_entry = (iframe.owner == FRAME_OWNED_BY_CSTACK); // Shim frame
+#else
+ frame.is_entry = iframe.is_entry;
+#endif
+ }
+
+ *prev_addr = &frame == &INVALID_FRAME ? NULL : (PyObject *)iframe.previous;
+
+#else // Python < 3.11
+ // Unwind the stack from leaf to root and store it in a stack. This way we
+ // can print it from root to leaf.
+ PyFrameObject py_frame;
+
+ if (copy_type(frame_addr, py_frame))
+ throw Error();
+
+ auto &frame = Frame::get(py_frame.f_code, py_frame.f_lasti);
+
+ *prev_addr = (&frame == &INVALID_FRAME) ? NULL : (PyObject *)py_frame.f_back;
+#endif
+
+ return frame;
+}
+
+// ----------------------------------------------------------------------------
Frame &Frame::get(PyCodeObject *code_addr, int lasti)
{
PyCodeObject code;
@@ -337,7 +435,12 @@ Frame &Frame::get(PyCodeObject *code_addr, int lasti)
auto new_frame = std::make_unique(&code, lasti);
new_frame->cache_key = frame_key;
auto &f = *new_frame;
- mojo_frame(frame_key, new_frame);
+ mojo.frame(
+ frame_key,
+ new_frame->filename,
+ new_frame->name,
+ new_frame->location.line, new_frame->location.line_end,
+ new_frame->location.column, new_frame->location.column_end);
frame_cache->store(frame_key, std::move(new_frame));
return f;
}
@@ -348,6 +451,32 @@ Frame &Frame::get(PyCodeObject *code_addr, int lasti)
}
}
+// ----------------------------------------------------------------------------
+Frame &Frame::get(PyObject *frame)
+{
+ auto frame_key = Frame::key(frame);
+
+ try
+ {
+ return frame_cache->lookup(frame_key);
+ }
+ catch (LRUCache::LookupError &)
+ {
+ auto new_frame = std::make_unique(frame);
+ new_frame->cache_key = frame_key;
+ auto &f = *new_frame;
+ mojo.frame(
+ frame_key,
+ new_frame->filename,
+ new_frame->name,
+ new_frame->location.line, new_frame->location.line_end,
+ new_frame->location.column, new_frame->location.column_end);
+ frame_cache->store(frame_key, std::move(new_frame));
+ return f;
+ }
+}
+
+// ----------------------------------------------------------------------------
Frame &Frame::get(unw_cursor_t &cursor)
{
unw_word_t pc;
@@ -367,7 +496,12 @@ Frame &Frame::get(unw_cursor_t &cursor)
auto frame = std::make_unique(cursor, pc);
frame->cache_key = frame_key;
auto &f = *frame;
- mojo_frame(frame_key, frame);
+ mojo.frame(
+ frame_key,
+ frame->filename,
+ frame->name,
+ frame->location.line, frame->location.line_end,
+ frame->location.column, frame->location.column_end);
frame_cache->store(frame_key, std::move(frame));
return f;
}
@@ -378,6 +512,7 @@ Frame &Frame::get(unw_cursor_t &cursor)
}
}
+// ----------------------------------------------------------------------------
Frame &Frame::get(StringTable::Key name)
{
uintptr_t frame_key = (uintptr_t)name;
@@ -390,48 +525,13 @@ Frame &Frame::get(StringTable::Key name)
auto frame = std::make_unique(name);
frame->cache_key = frame_key;
auto &f = *frame;
- mojo_frame(frame_key, frame);
+ mojo.frame(
+ frame_key,
+ frame->filename,
+ frame->name,
+ frame->location.line, frame->location.line_end,
+ frame->location.column, frame->location.column_end);
frame_cache->store(frame_key, std::move(frame));
return f;
}
}
-
-Frame &Frame::read(PyObject *frame_addr, PyObject **prev_addr)
-{
-#if PY_VERSION_HEX >= 0x030b0000
- _PyInterpreterFrame iframe;
-
- if (copy_type(frame_addr, iframe))
- throw Error();
-
- // We cannot use _PyInterpreterFrame_LASTI because _PyCode_CODE reads
- // from the code object.
- const int lasti = ((int)(iframe.prev_instr - (_Py_CODEUNIT *)(iframe.f_code))) - offsetof(PyCodeObject, co_code_adaptive) / sizeof(_Py_CODEUNIT);
- auto &frame = Frame::get(iframe.f_code, lasti);
-
- if (&frame != &INVALID_FRAME)
- {
-#if PY_VERSION_HEX >= 0x030c0000
- frame.is_entry = (iframe.owner == FRAME_OWNED_BY_CSTACK); // Shim frame
-#else
- frame.is_entry = iframe.is_entry;
-#endif
- }
-
- *prev_addr = &frame == &INVALID_FRAME ? NULL : (PyObject *)iframe.previous;
-
-#else // Python < 3.11
- // Unwind the stack from leaf to root and store it in a stack. This way we
- // can print it from root to leaf.
- PyFrameObject py_frame;
-
- if (copy_type(frame_addr, py_frame))
- throw Error();
-
- auto &frame = Frame::get(py_frame.f_code, py_frame.f_lasti);
-
- *prev_addr = (&frame == &INVALID_FRAME) ? NULL : (PyObject *)py_frame.f_back;
-#endif
-
- return frame;
-}
diff --git a/echion/interp.h b/echion/interp.h
index cdde1fc..f686a9f 100644
--- a/echion/interp.h
+++ b/echion/interp.h
@@ -2,6 +2,8 @@
//
// Copyright (c) 2023 Gabriele N. Tornetta .
+#pragma once
+
#define PY_SSIZE_T_CLEAN
#include
diff --git a/echion/memory.h b/echion/memory.h
new file mode 100644
index 0000000..983ee34
--- /dev/null
+++ b/echion/memory.h
@@ -0,0 +1,350 @@
+// This file is part of "echion" which is released under MIT.
+//
+// Copyright (c) 2023 Gabriele N. Tornetta .
+
+#pragma once
+
+#include
+
+#include
+#include
+#include
+
+#include
+
+#include
+#include
+#include
+#include
+#include
+
+// ----------------------------------------------------------------------------
+class ResidentMemoryTracker
+{
+public:
+ size_t size;
+
+ // ------------------------------------------------------------------------
+ ResidentMemoryTracker()
+ {
+ update();
+ }
+
+ // ------------------------------------------------------------------------
+ bool inline check()
+ {
+ size_t old_size = size;
+ update();
+ return size != old_size;
+ }
+
+private:
+ // ------------------------------------------------------------------------
+ void inline update()
+ {
+ struct rusage usage;
+ getrusage(RUSAGE_SELF, &usage);
+ size = usage.ru_maxrss;
+ }
+};
+
+ResidentMemoryTracker rss_tracker;
+
+// ----------------------------------------------------------------------------
+
+class MemoryStats
+{
+public:
+ int64_t iid;
+ std::string thread_name;
+
+ FrameStack::Key stack;
+
+ size_t count;
+ ssize_t size;
+
+ // ------------------------------------------------------------------------
+ MemoryStats(int iid, std::string thread_name, FrameStack::Key stack, size_t count, size_t size)
+ : iid(iid), thread_name(thread_name), stack(stack), count(count), size(size)
+ {
+ }
+
+ // ------------------------------------------------------------------------
+ void inline render()
+ {
+ mojo.stack(pid, iid, thread_name);
+
+ stack_table.retrieve(stack).render();
+
+ mojo.metric_memory(size);
+ }
+};
+
+// ----------------------------------------------------------------------------
+struct MemoryTableEntry
+{
+ FrameStack::Key stack;
+ size_t size;
+};
+
+// ----------------------------------------------------------------------------
+class MemoryTable : public std::unordered_map
+{
+public:
+ // ------------------------------------------------------------------------
+ void link(void *address, FrameStack::Key stack, size_t size)
+ {
+ std::lock_guard lock(this->lock);
+
+ this->emplace(address, (MemoryTableEntry){stack, size});
+ }
+
+ // ------------------------------------------------------------------------
+ std::optional unlink(void *address)
+ {
+ std::lock_guard lock(this->lock);
+
+ auto it = this->find(address);
+
+ if (it != this->end())
+ {
+ auto entry = it->second;
+ erase(it);
+ return {entry};
+ }
+
+ return {};
+ }
+
+private:
+ std::mutex lock;
+};
+
+// ----------------------------------------------------------------------------
+class StackStats
+{
+public:
+ // ------------------------------------------------------------------------
+ void inline update(PyThreadState *tstate, FrameStack::Key stack, size_t size)
+ {
+ std::lock_guard lock(this->lock);
+
+ auto stack_entry = map.find(stack);
+
+ if (stack_entry == map.end())
+ {
+ if (tstate == NULL)
+ // Invalid thread state, nothing we can do.
+ return;
+
+ std::lock_guard ti_lock(thread_info_map_lock);
+
+ // Map the memory address with the stack so that we can account for
+ // the deallocations.
+ map.emplace(
+ stack,
+ MemoryStats(
+ tstate->interp->id,
+ thread_info_map[tstate->thread_id]->name,
+ stack,
+ 1,
+ size));
+ }
+ else
+ {
+ stack_entry->second.count++;
+ stack_entry->second.size += size;
+ }
+ }
+
+ // ------------------------------------------------------------------------
+ void inline update(MemoryTableEntry &entry)
+ {
+ std::lock_guard lock(this->lock);
+
+ auto stack_entry = map.find(entry.stack);
+
+ if (stack_entry != map.end())
+ stack_entry->second.size -= entry.size;
+ }
+
+ // ------------------------------------------------------------------------
+ void flush()
+ {
+ std::lock_guard lock(this->lock);
+
+ for (auto &entry : map)
+ {
+ // Emit non-trivial stack stats only
+ if (entry.second.size != 0)
+ entry.second.render();
+
+ // Reset the stats
+ entry.second.size = 0;
+ entry.second.count = 0;
+ }
+ }
+
+ // ------------------------------------------------------------------------
+ void clear()
+ {
+ std::lock_guard lock(this->lock);
+
+ map.clear();
+ }
+
+private:
+ std::mutex lock;
+ std::unordered_map map;
+};
+
+// ----------------------------------------------------------------------------
+
+// We make this a reference to a heap-allocated object so that we can avoid
+// the destruction on exit. We are in charge of cleaning up the object. Note
+// that the object will leak, but this is not a problem.
+static auto &stack_stats = *(new StackStats());
+static auto &memory_table = *(new MemoryTable());
+
+// ----------------------------------------------------------------------------
+static inline void
+general_alloc(void *address, size_t size)
+{
+ auto stack = std::make_unique();
+ auto *tstate = PyThreadState_Get(); // DEV: This should be called with the GIL held
+
+ // DEV: We unwind the stack by reading the data out of live Python objects.
+ // This works under the assumption that the objects/data structures we are
+ // interested in belong to the thread whose stack we are unwinding.
+ // Therefore, we expect these structures to remain valid and essentially
+ // immutable for the duration of the unwinding process, which happens
+ // in-line with the allocation within the calling thread.
+ unwind_python_stack_unsafe(tstate, *stack);
+
+ // Store the stack and get its key for reference
+ // TODO: Handle collision exception
+ auto stack_key = stack_table.store(std::move(stack));
+
+ // Link the memory address with the stack
+ memory_table.link(address, stack_key, size);
+
+ // Update the stack stats
+ stack_stats.update(tstate, stack_key, size);
+}
+
+// ----------------------------------------------------------------------------
+static inline void
+general_free(void *address)
+{
+ // Retrieve the stack that made the allocation
+ if (auto entry = memory_table.unlink(address))
+ // Update the stack stats
+ stack_stats.update(*entry);
+}
+
+// ----------------------------------------------------------------------------
+static void *
+echion_malloc(void *ctx, size_t n)
+{
+ auto *alloc = (PyMemAllocatorEx *)ctx;
+
+ // Make the actual allocation
+ auto address = alloc->malloc(alloc->ctx, n);
+
+ // Handle the allocation event
+ if (address != NULL)
+ general_alloc(address, n);
+
+ return address;
+}
+
+// ----------------------------------------------------------------------------
+static void *
+echion_calloc(void *ctx, size_t nelem, size_t elsize)
+{
+ auto *alloc = (PyMemAllocatorEx *)ctx;
+
+ // Make the actual allocation
+ auto address = alloc->calloc(alloc->ctx, nelem, elsize);
+
+ // Handle the allocation event
+ if (address != NULL)
+ general_alloc(address, nelem * elsize);
+
+ return address;
+}
+
+// ----------------------------------------------------------------------------
+static void *
+echion_realloc(void *ctx, void *p, size_t n)
+{
+ auto *alloc = (PyMemAllocatorEx *)ctx;
+
+ // Model this as a deallocation followed by an allocation
+ if (p != NULL)
+ general_free(p);
+
+ auto address = alloc->realloc(alloc->ctx, p, n);
+
+ if (address != NULL)
+ general_alloc(address, n);
+
+ return address;
+}
+
+// ----------------------------------------------------------------------------
+static void
+echion_free(void *ctx, void *p)
+{
+ auto *alloc = (PyMemAllocatorEx *)ctx;
+
+ // Handle the deallocation event
+ if (p != NULL)
+ general_free(p);
+
+ alloc->free(alloc->ctx, p);
+}
+
+// ----------------------------------------------------------------------------
+
+// DEV: We define this macro on the basis of the knowledge that the domains are
+// defined as an enum.
+#define ALLOC_DOMAIN_COUNT 3
+
+static PyMemAllocatorEx original_allocators[ALLOC_DOMAIN_COUNT];
+static PyMemAllocatorEx echion_allocator = {
+ NULL,
+ echion_malloc,
+ echion_calloc,
+ echion_realloc,
+ echion_free};
+
+// ----------------------------------------------------------------------------
+static void
+setup_memory()
+{
+ for (int i = 0; i < ALLOC_DOMAIN_COUNT; i++)
+ {
+ // Save the original allocators
+ PyMem_GetAllocator(static_cast(i), &original_allocators[i]);
+
+ // Install the new allocators
+ echion_allocator.ctx = (void *)&original_allocators[i];
+ PyMem_SetAllocator(static_cast(i), &echion_allocator);
+ }
+}
+
+// ----------------------------------------------------------------------------
+static void
+teardown_memory()
+{
+ // Restore the original allocators
+ for (int i = 0; i < ALLOC_DOMAIN_COUNT; i++)
+ PyMem_SetAllocator(static_cast(i), &original_allocators[i]);
+
+ stack_stats.flush();
+
+ stack_stats.clear();
+ stack_table.clear();
+ memory_table.clear();
+}
diff --git a/echion/mojo.h b/echion/mojo.h
index d329325..78152e7 100644
--- a/echion/mojo.h
+++ b/echion/mojo.h
@@ -4,11 +4,12 @@
#pragma once
-#include
+#include
+#include
#define MOJO_VERSION 3
-enum
+enum MojoEvent
{
MOJO_RESERVED,
MOJO_METADATA,
@@ -27,114 +28,197 @@ enum
};
#if defined __arm__
-typedef unsigned long mojo_int_t;
+using mojo_int_t = long;
+using mojo_uint_t = unsigned long;
+using mojo_ref_t = unsigned long;
#else
-typedef unsigned long long mojo_int_t;
+using mojo_int_t = long long;
+using mojo_uint_t = unsigned long long;
+using mojo_ref_t = unsigned long long;
#endif
// Bitmask to ensure that we encode at most 4 bytes for an integer.
-#define MOJO_INT32 ((mojo_int_t)(1 << (6 + 7 * 3)) - 1)
+#define MOJO_INT32 ((mojo_ref_t)(1 << (6 + 7 * 3)) - 1)
-// Primitives
+// ----------------------------------------------------------------------------
+class MojoWriter
+{
+public:
+ MojoWriter() {}
+
+ class Error : public std::exception
+ {
+ };
-#define mojo_event(event) \
- { \
- output.put((char)event); \
+ // ------------------------------------------------------------------------
+ void
+ open()
+ {
+ output.open(std::getenv("ECHION_OUTPUT"));
+ if (!output.is_open())
+ {
+ std::cerr << "Failed to open output file " << std::getenv("ECHION_OUTPUT") << std::endl;
+ throw Error();
+ }
}
-#define mojo_string(string) \
- output << string; \
- output.put('\0');
+ // ------------------------------------------------------------------------
+ void close()
+ {
+ std::lock_guard guard(lock);
-static inline void
-mojo_integer(mojo_int_t integer, int sign)
-{
- unsigned char byte = integer & 0x3f;
- if (sign)
- byte |= 0x40;
+ output.flush();
+ output.close();
+ }
- integer >>= 6;
- if (integer)
- byte |= 0x80;
+ // ------------------------------------------------------------------------
+ void inline header()
+ {
+ std::lock_guard guard(lock);
- output.put(byte);
+ output << "MOJ";
+ integer(MOJO_VERSION);
+ }
- while (integer)
+ // ------------------------------------------------------------------------
+ void inline metadata(const std::string &label, const std::string &value)
{
- byte = integer & 0x7f;
- integer >>= 7;
- if (integer)
- byte |= 0x80;
- output.put(byte);
+ std::lock_guard guard(lock);
+
+ event(MOJO_METADATA);
+ string(label);
+ string(value);
}
-}
-// We expect the least significant bits to be varied enough to provide a valid
-// key. This way we can keep the size of references to a maximum of 4 bytes.
-#define mojo_ref(integer) (mojo_integer(MOJO_INT32 & ((mojo_int_t)integer), 0))
+ // ------------------------------------------------------------------------
+ void inline stack(mojo_int_t pid, mojo_int_t iid, const std::string &thread_name)
+ {
+ std::lock_guard guard(lock);
-// Mojo events
+ event(MOJO_STACK);
+ integer(pid);
+ integer(iid);
+ string(thread_name);
+ }
-#define mojo_header() \
- { \
- output << "MOJ"; \
- mojo_integer(MOJO_VERSION, 0); \
- output.flush(); \
+ // ------------------------------------------------------------------------
+ void inline frame(
+ mojo_ref_t key,
+ mojo_ref_t filename,
+ mojo_ref_t name,
+ mojo_int_t line,
+ mojo_int_t line_end,
+ mojo_int_t column,
+ mojo_int_t column_end)
+ {
+ std::lock_guard guard(lock);
+
+ event(MOJO_FRAME);
+ ref(key);
+ ref(filename);
+ ref(name);
+ integer(line);
+ integer(line_end);
+ integer(column);
+ integer(column_end);
}
-#define mojo_metadata(label, value) \
- mojo_event(MOJO_METADATA); \
- mojo_string(label); \
- mojo_string(value);
-
-#define mojo_stack(pid, iid, tid) \
- mojo_event(MOJO_STACK); \
- mojo_integer(pid, 0); \
- mojo_integer(iid, 0); \
- output << std::hex << tid; \
- output.put('\0');
-
-#define mojo_frame(key, frame) \
- mojo_event(MOJO_FRAME); \
- mojo_integer(frame->cache_key, 0); \
- mojo_ref(frame->filename); \
- mojo_ref(frame->name); \
- mojo_integer(frame->location.line, 0); \
- mojo_integer(frame->location.line_end, 0); \
- mojo_integer(frame->location.column, 0); \
- mojo_integer(frame->location.column_end, 0);
-
-static inline void
-mojo_frame_ref(mojo_int_t key)
-{
- if (key == 0)
+ // ------------------------------------------------------------------------
+ void inline frame_ref(mojo_ref_t key)
+ {
+ std::lock_guard guard(lock);
+
+ if (key == 0)
+ {
+ event(MOJO_FRAME_INVALID);
+ }
+ else
+ {
+ event(MOJO_FRAME_REF);
+ ref(key);
+ }
+ }
+
+ // ------------------------------------------------------------------------
+ void inline frame_kernel(const std::string &scope)
+ {
+ std::lock_guard guard(lock);
+
+ event(MOJO_FRAME_KERNEL);
+ string(scope);
+ }
+
+ // ------------------------------------------------------------------------
+ void inline metric_time(mojo_int_t value)
+ {
+ std::lock_guard guard(lock);
+
+ event(MOJO_METRIC_TIME);
+ integer(value);
+ }
+
+ // ------------------------------------------------------------------------
+ void inline metric_memory(mojo_int_t value)
{
- mojo_event(MOJO_FRAME_INVALID);
+ std::lock_guard guard(lock);
+
+ event(MOJO_METRIC_MEMORY);
+ integer(value);
+ }
+
+ // ------------------------------------------------------------------------
+ void inline string(mojo_ref_t key, const std::string &value)
+ {
+ std::lock_guard guard(lock);
+
+ event(MOJO_STRING);
+ ref(key);
+ string(value);
}
- else
+
+ // ------------------------------------------------------------------------
+ void inline string_ref(mojo_ref_t key)
{
- mojo_event(MOJO_FRAME_REF);
- mojo_integer(key, 0);
+ std::lock_guard guard(lock);
+
+ event(MOJO_STRING_REF);
+ ref(key);
}
-}
-#define mojo_frame_kernel(scope) \
- mojo_event(MOJO_FRAME_KERNEL); \
- mojo_string(scope);
+private:
+ std::ofstream output;
+ std::mutex lock;
+
+ void inline event(MojoEvent event) { output.put((char)event); }
+ void inline string(const std::string &string) { output << string << '\0'; }
+ void inline string(const char *string) { output << string << '\0'; }
+ void inline ref(mojo_ref_t value) { integer(MOJO_INT32 & value); }
+ void inline integer(mojo_int_t n)
+ {
+ mojo_uint_t integer = n < 0 ? -n : n;
+ bool sign = n < 0;
-#define mojo_metric_time(value) \
- mojo_event(MOJO_METRIC_TIME); \
- mojo_integer(value, 0);
+ unsigned char byte = integer & 0x3f;
+ if (sign)
+ byte |= 0x40;
-#define mojo_metric_memory(value) \
- mojo_event(MOJO_METRIC_MEMORY); \
- mojo_integer(value < 0 ? -value : value, value < 0);
+ integer >>= 6;
+ if (integer)
+ byte |= 0x80;
+
+ output.put(byte);
+
+ while (integer)
+ {
+ byte = integer & 0x7f;
+ integer >>= 7;
+ if (integer)
+ byte |= 0x80;
+ output.put(byte);
+ }
+ }
+};
-#define mojo_string_event(key, string) \
- mojo_event(MOJO_STRING); \
- mojo_ref(key); \
- mojo_string(string);
+// ----------------------------------------------------------------------------
-#define mojo_string_ref(key) \
- mojo_event(MOJO_STRING_REF); \
- mojo_ref(key);
+static MojoWriter mojo;
diff --git a/echion/stacks.h b/echion/stacks.h
index a99efca..5bd37d6 100644
--- a/echion/stacks.h
+++ b/echion/stacks.h
@@ -8,19 +8,37 @@
#include
#include
+#include
+#include
#include
#define UNW_LOCAL_ONLY
#include
#include
+#include
#define MAX_FRAMES 2048
class FrameStack : public std::deque
{
public:
- void render(std::ostream &output)
+ using Ptr = std::unique_ptr;
+ using Key = Frame::Key;
+
+ // ------------------------------------------------------------------------
+ Key key()
+ {
+ Key h = 0;
+
+ for (auto it = this->begin(); it != this->end(); ++it)
+ h = rotl(h) ^ (*it).get().cache_key;
+
+ return h;
+ }
+
+ // ------------------------------------------------------------------------
+ void render()
{
for (auto it = this->rbegin(); it != this->rend(); ++it)
{
@@ -29,11 +47,20 @@ class FrameStack : public std::deque
// This is a shim frame so we skip it.
continue;
#endif
- mojo_frame_ref((*it).get().cache_key);
+ mojo.frame_ref((*it).get().cache_key);
}
}
+
+private:
+ // ------------------------------------------------------------------------
+ static inline Frame::Key rotl(Key key)
+ {
+ return (key << 1) | (key >> (CHAR_BIT * sizeof(key) - 1));
+ }
};
+// ----------------------------------------------------------------------------
+
static FrameStack python_stack;
static FrameStack native_stack;
static FrameStack interleaved_stack;
@@ -95,12 +122,40 @@ unwind_frame(PyObject *frame_addr, FrameStack &stack)
return count;
}
+// ----------------------------------------------------------------------------
+static size_t
+unwind_frame_unsafe(PyObject *frame, FrameStack &stack)
+{
+ std::unordered_set seen_frames; // Used to detect cycles in the stack
+ int count = 0;
+
+ PyObject *current_frame = frame;
+ while (current_frame != NULL && stack.size() < MAX_FRAMES)
+ {
+
+ if (seen_frames.find(current_frame) != seen_frames.end())
+ break;
+
+ count++;
+
+ seen_frames.insert(current_frame);
+
+ stack.push_back(Frame::get(current_frame));
+
+#if PY_VERSION_HEX >= 0x030b0000
+ current_frame = (PyObject *)((_PyInterpreterFrame *)current_frame)->previous;
+#else
+ current_frame = (PyObject *)((PyFrameObject *)current_frame)->f_back;
+#endif
+ }
+
+ return count;
+}
+
// ----------------------------------------------------------------------------
static void
unwind_python_stack(PyThreadState *tstate, FrameStack &stack)
{
- std::unordered_set seen_frames; // Used to detect cycles in the stack
-
stack.clear();
#if PY_VERSION_HEX >= 0x030b0000
@@ -117,6 +172,20 @@ unwind_python_stack(PyThreadState *tstate, FrameStack &stack)
unwind_frame(frame_addr, stack);
}
+// ----------------------------------------------------------------------------
+static void
+unwind_python_stack_unsafe(PyThreadState *tstate, FrameStack &stack)
+{
+ stack.clear();
+
+#if PY_VERSION_HEX >= 0x030b0000
+ PyObject *frame_addr = (PyObject *)tstate->cframe->current_frame;
+#else // Python < 3.11
+ PyObject *frame_addr = (PyObject *)tstate->frame;
+#endif
+ unwind_frame_unsafe(frame_addr, stack);
+}
+
// ----------------------------------------------------------------------------
static void
unwind_python_stack(PyThreadState *tstate)
@@ -186,3 +255,56 @@ interleave_stacks()
{
interleave_stacks(python_stack);
}
+
+// ----------------------------------------------------------------------------
+// This table is used to store entire stacks and index them by key. This is
+// used when profiling memory events to account for deallocations.
+class StackTable
+{
+public:
+ // ------------------------------------------------------------------------
+ FrameStack::Key inline store(FrameStack::Ptr stack)
+ {
+ std::lock_guard lock(this->lock);
+
+ auto stack_key = stack->key();
+
+ auto stack_entry = table.find(stack_key);
+ if (stack_entry == table.end())
+ {
+ table.emplace(stack_key, std::move(stack));
+ }
+ else
+ {
+ // TODO: Check for collisions.
+ }
+
+ return stack_key;
+ }
+
+ // ------------------------------------------------------------------------
+ FrameStack &retrieve(FrameStack::Key stack_key)
+ {
+ std::lock_guard lock(this->lock);
+
+ return *table.find(stack_key)->second;
+ }
+
+ // ------------------------------------------------------------------------
+ void clear()
+ {
+ std::lock_guard lock(this->lock);
+
+ table.clear();
+ }
+
+private:
+ std::unordered_map> table;
+ std::mutex lock;
+};
+
+// ----------------------------------------------------------------------------
+// We make this a reference to a heap-allocated object so that we can avoid
+// the destruction on exit. We are in charge of cleaning up the object. Note
+// that the object will leak, but this is not a problem.
+static auto &stack_table = *(new StackTable());
diff --git a/echion/strings.h b/echion/strings.h
index 726fee0..df1671c 100644
--- a/echion/strings.h
+++ b/echion/strings.h
@@ -107,7 +107,7 @@ class StringTable : public std::unordered_map
auto str = pyunicode_to_utf8(s);
#endif
this->emplace(k, str);
- mojo_string_event(k, str);
+ mojo.string(k, str);
}
catch (StringError &)
{
@@ -118,6 +118,28 @@ class StringTable : public std::unordered_map
return k;
};
+ // Python string object
+ inline Key key_unsafe(PyObject *s)
+ {
+ auto k = (Key)s;
+
+ if (this->find(k) == this->end())
+ {
+#if PY_VERSION_HEX >= 0x030c0000
+ // The task name might hold a PyLong for deferred task name formatting.
+ auto str = (PyLong_CheckExact(s))
+ ? "Task-" + std::to_string(PyLong_AsLong(s))
+ : std::string(PyUnicode_AsUTF8(s));
+#else
+ auto str = std::string(PyUnicode_AsUTF8(s));
+#endif
+ this->emplace(k, str);
+ mojo.string(k, str);
+ }
+
+ return k;
+ };
+
// Native filename by program counter
inline Key key(unw_word_t pc)
{
@@ -130,7 +152,7 @@ class StringTable : public std::unordered_map
char buffer[32] = {0};
std::snprintf(buffer, 32, "native@%p", (void *)k);
this->emplace(k, buffer);
- mojo_string_event(k, buffer);
+ mojo.string(k, buffer);
}
catch (StringError &)
{
@@ -170,7 +192,7 @@ class StringTable : public std::unordered_map
}
this->emplace(k, name);
- mojo_string_event(k, name);
+ mojo.string(k, name);
if (demangled)
std::free(demangled);
diff --git a/echion/threads.h b/echion/threads.h
index f875c4d..7055454 100644
--- a/echion/threads.h
+++ b/echion/threads.h
@@ -68,6 +68,7 @@ class ThreadInfo
(*it).get().render_where(output);
}
+ // ------------------------------------------------------------------------
ThreadInfo(uintptr_t thread_id, unsigned long native_id, const char *name)
: thread_id(thread_id), native_id(native_id), name(name)
{
@@ -191,6 +192,7 @@ void ThreadInfo::unwind(PyThreadState *tstate)
{
unwind_python_stack(tstate);
if (asyncio_loop)
+ {
try
{
unwind_tasks();
@@ -199,6 +201,7 @@ void ThreadInfo::unwind(PyThreadState *tstate)
{
// We failed to unwind tasks
}
+ }
}
}
@@ -337,37 +340,37 @@ void ThreadInfo::sample(int64_t iid, PyThreadState *tstate, microsecond_t delta)
if (current_tasks.empty())
{
// Print the PID and thread name
- mojo_stack(pid, iid, name);
+ mojo.stack(pid, iid, name);
// Print the stack
if (native)
{
interleave_stacks();
- interleaved_stack.render(output);
+ interleaved_stack.render();
}
else
- python_stack.render(output);
+ python_stack.render();
// Print the metric
- mojo_metric_time(delta);
+ mojo.metric_time(delta);
}
else
{
for (auto &task_stack : current_tasks)
{
- mojo_stack(pid, iid, name);
+ mojo.stack(pid, iid, name);
if (native)
{
// NOTE: These stacks might be non-sensical, especially with
// Python < 3.11.
interleave_stacks(*task_stack);
- interleaved_stack.render(output);
+ interleaved_stack.render();
}
else
- task_stack->render(output);
+ task_stack->render();
- mojo_metric_time(delta);
+ mojo.metric_time(delta);
}
current_tasks.clear();
diff --git a/tests/target_mem.py b/tests/target_mem.py
new file mode 100644
index 0000000..1a9c218
--- /dev/null
+++ b/tests/target_mem.py
@@ -0,0 +1,25 @@
+# This file is part of "echion" which is released under MIT.
+#
+# Copyright (c) 2023 Gabriele N. Tornetta .
+
+
+from dataclasses import dataclass
+
+
+a = []
+
+
+@dataclass
+class Foo:
+ n: int
+
+
+def leak():
+ global a
+
+ for i in range(100_000):
+ a.append(Foo(i))
+
+
+if __name__ == "__main__":
+ leak()
diff --git a/tests/test_memory.py b/tests/test_memory.py
new file mode 100644
index 0000000..9cd42bc
--- /dev/null
+++ b/tests/test_memory.py
@@ -0,0 +1,19 @@
+from tests.utils import DataSummary
+from tests.utils import run_target
+
+
+def test_memory():
+ result, data = run_target("target_mem", "-m")
+ assert result.returncode == 0, result.stderr.decode()
+
+ md = data.metadata
+ assert md["mode"] == "memory"
+ assert md["interval"] == "1000"
+
+ summary = DataSummary(data)
+
+ expected_nthreads = 1
+ assert summary.nthreads == expected_nthreads
+ assert summary.total_metric >= 1e6 * expected_nthreads
+
+ assert summary.query("0:MainThread", (("", 25), ("leak", 21))) is not None, summary.threads["0:MainThread"]