From 9b7cddacf9f4b25acd0613c5d3f17dce5619d224 Mon Sep 17 00:00:00 2001 From: "Gabriele N. Tornetta" Date: Sat, 28 Oct 2023 11:45:07 +0100 Subject: [PATCH] feat!: switch to MOJO output We make MOJO the output format for Echion. --- echion/coremodule.cc | 16 ++++- echion/frame.h | 19 ++++-- echion/mojo.h | 140 +++++++++++++++++++++++++++++++++++++++++++ echion/stacks.h | 2 +- echion/strings.h | 12 ++-- echion/threads.h | 8 +-- pyproject.toml | 2 +- tests/utils.py | 48 ++++++++------- 8 files changed, 204 insertions(+), 43 deletions(-) create mode 100644 echion/mojo.h diff --git a/echion/coremodule.cc b/echion/coremodule.cc index 357b53f..c5238c4 100644 --- a/echion/coremodule.cc +++ b/echion/coremodule.cc @@ -168,8 +168,20 @@ _sampler() return; } - output << "# mode: " << (cpu ? "cpu" : "wall") << std::endl; - output << "# interval: " << interval << std::endl; + mojo_header(); + + mojo_metadata("mode", (cpu ? "cpu" : "wall")); + mojo_metadata("interval", interval); + mojo_metadata("sampler", "echion"); + + // DEV: Workaround for the austin-python library: we send an empty sample + // to set the PID. We also map the key value 0 to the empty string, to + // support task name frames. + mojo_stack(pid, 0, ""); + mojo_string_event(0, ""); + mojo_string_event(1, ""); + mojo_string_event(2, ""); + mojo_metric_time(0); while (running) { diff --git a/echion/frame.h b/echion/frame.h index 3409fe6..85b8d86 100644 --- a/echion/frame.h +++ b/echion/frame.h @@ -23,16 +23,16 @@ #include #include +#include #include #include -#define MOJO_INT32 ((uintptr_t)(1 << (6 + 7 * 3)) - 1) - class Frame { public: - typedef std::reference_wrapper Ref; - typedef std::unique_ptr Ptr; + using Ref = std::reference_wrapper; + using Ptr = std::unique_ptr; + using Key = uintptr_t; class Error : public std::exception { @@ -52,6 +52,7 @@ class Frame } }; + Key cache_key = 0; StringTable::Key filename = 0; StringTable::Key name = 0; @@ -107,7 +108,7 @@ class Frame private: void infer_location(PyCodeObject *, int); - static inline uintptr_t key(PyCodeObject *code, int lasti) + static inline Key key(PyCodeObject *code, int lasti) { return (((uintptr_t)(((uintptr_t)code) & MOJO_INT32) << 16) | lasti); } @@ -323,7 +324,7 @@ Frame &Frame::get(PyCodeObject *code_addr, int lasti) if (copy_type(code_addr, code)) return INVALID_FRAME; - uintptr_t frame_key = Frame::key(code_addr, lasti); + auto frame_key = Frame::key(code_addr, lasti); try { @@ -334,7 +335,9 @@ Frame &Frame::get(PyCodeObject *code_addr, int lasti) try { auto new_frame = std::make_unique(&code, lasti); + new_frame->cache_key = frame_key; auto &f = *new_frame; + mojo_frame(frame_key, new_frame); frame_cache->store(frame_key, std::move(new_frame)); return f; } @@ -362,7 +365,9 @@ Frame &Frame::get(unw_cursor_t &cursor) try { auto frame = std::make_unique(cursor, pc); + frame->cache_key = frame_key; auto &f = *frame; + mojo_frame(frame_key, frame); frame_cache->store(frame_key, std::move(frame)); return f; } @@ -383,7 +388,9 @@ Frame &Frame::get(StringTable::Key name) catch (LRUCache::LookupError &) { auto frame = std::make_unique(name); + frame->cache_key = frame_key; auto &f = *frame; + mojo_frame(frame_key, frame); frame_cache->store(frame_key, std::move(frame)); return f; } diff --git a/echion/mojo.h b/echion/mojo.h new file mode 100644 index 0000000..d329325 --- /dev/null +++ b/echion/mojo.h @@ -0,0 +1,140 @@ +// This file is part of "echion" which is released under MIT. +// +// Copyright (c) 2023 Gabriele N. Tornetta . + +#pragma once + +#include + +#define MOJO_VERSION 3 + +enum +{ + MOJO_RESERVED, + MOJO_METADATA, + MOJO_STACK, + MOJO_FRAME, + MOJO_FRAME_INVALID, + MOJO_FRAME_REF, + MOJO_FRAME_KERNEL, + MOJO_GC, + MOJO_IDLE, + MOJO_METRIC_TIME, + MOJO_METRIC_MEMORY, + MOJO_STRING, + MOJO_STRING_REF, + MOJO_MAX, +}; + +#if defined __arm__ +typedef unsigned long mojo_int_t; +#else +typedef unsigned long long mojo_int_t; +#endif + +// Bitmask to ensure that we encode at most 4 bytes for an integer. +#define MOJO_INT32 ((mojo_int_t)(1 << (6 + 7 * 3)) - 1) + +// Primitives + +#define mojo_event(event) \ + { \ + output.put((char)event); \ + } + +#define mojo_string(string) \ + output << string; \ + output.put('\0'); + +static inline void +mojo_integer(mojo_int_t integer, int sign) +{ + unsigned char byte = integer & 0x3f; + if (sign) + byte |= 0x40; + + integer >>= 6; + if (integer) + byte |= 0x80; + + output.put(byte); + + while (integer) + { + byte = integer & 0x7f; + integer >>= 7; + if (integer) + byte |= 0x80; + output.put(byte); + } +} + +// We expect the least significant bits to be varied enough to provide a valid +// key. This way we can keep the size of references to a maximum of 4 bytes. +#define mojo_ref(integer) (mojo_integer(MOJO_INT32 & ((mojo_int_t)integer), 0)) + +// Mojo events + +#define mojo_header() \ + { \ + output << "MOJ"; \ + mojo_integer(MOJO_VERSION, 0); \ + output.flush(); \ + } + +#define mojo_metadata(label, value) \ + mojo_event(MOJO_METADATA); \ + mojo_string(label); \ + mojo_string(value); + +#define mojo_stack(pid, iid, tid) \ + mojo_event(MOJO_STACK); \ + mojo_integer(pid, 0); \ + mojo_integer(iid, 0); \ + output << std::hex << tid; \ + output.put('\0'); + +#define mojo_frame(key, frame) \ + mojo_event(MOJO_FRAME); \ + mojo_integer(frame->cache_key, 0); \ + mojo_ref(frame->filename); \ + mojo_ref(frame->name); \ + mojo_integer(frame->location.line, 0); \ + mojo_integer(frame->location.line_end, 0); \ + mojo_integer(frame->location.column, 0); \ + mojo_integer(frame->location.column_end, 0); + +static inline void +mojo_frame_ref(mojo_int_t key) +{ + if (key == 0) + { + mojo_event(MOJO_FRAME_INVALID); + } + else + { + mojo_event(MOJO_FRAME_REF); + mojo_integer(key, 0); + } +} + +#define mojo_frame_kernel(scope) \ + mojo_event(MOJO_FRAME_KERNEL); \ + mojo_string(scope); + +#define mojo_metric_time(value) \ + mojo_event(MOJO_METRIC_TIME); \ + mojo_integer(value, 0); + +#define mojo_metric_memory(value) \ + mojo_event(MOJO_METRIC_MEMORY); \ + mojo_integer(value < 0 ? -value : value, value < 0); + +#define mojo_string_event(key, string) \ + mojo_event(MOJO_STRING); \ + mojo_ref(key); \ + mojo_string(string); + +#define mojo_string_ref(key) \ + mojo_event(MOJO_STRING_REF); \ + mojo_ref(key); diff --git a/echion/stacks.h b/echion/stacks.h index fc4bb46..a99efca 100644 --- a/echion/stacks.h +++ b/echion/stacks.h @@ -29,7 +29,7 @@ class FrameStack : public std::deque // This is a shim frame so we skip it. continue; #endif - (*it).get().render(output); + mojo_frame_ref((*it).get().cache_key); } } }; diff --git a/echion/strings.h b/echion/strings.h index 2e84ac3..726fee0 100644 --- a/echion/strings.h +++ b/echion/strings.h @@ -95,7 +95,6 @@ class StringTable : public std::unordered_map if (this->find(k) == this->end()) { - // TODO: Emit MOJO string signal try { #if PY_VERSION_HEX >= 0x030c0000 @@ -108,6 +107,7 @@ class StringTable : public std::unordered_map auto str = pyunicode_to_utf8(s); #endif this->emplace(k, str); + mojo_string_event(k, str); } catch (StringError &) { @@ -125,12 +125,12 @@ class StringTable : public std::unordered_map if (this->find(k) == this->end()) { - // TODO: Emit MOJO string signal try { - auto s = std::string(32, '\0'); - std::snprintf((char *)s.c_str(), 32, "native@%p", (void *)k); - this->emplace(k, s); + char buffer[32] = {0}; + std::snprintf(buffer, 32, "native@%p", (void *)k); + this->emplace(k, buffer); + mojo_string_event(k, buffer); } catch (StringError &) { @@ -152,7 +152,6 @@ class StringTable : public std::unordered_map if (this->find(k) == this->end()) { - // TODO: Emit MOJO string signal unw_word_t offset; // Ignored. All the information is in the PC anyway. char sym[256]; if (unw_get_proc_name(&cursor, sym, sizeof(sym), &offset)) @@ -171,6 +170,7 @@ class StringTable : public std::unordered_map } this->emplace(k, name); + mojo_string_event(k, name); if (demangled) std::free(demangled); diff --git a/echion/threads.h b/echion/threads.h index 13c3a0b..f875c4d 100644 --- a/echion/threads.h +++ b/echion/threads.h @@ -337,7 +337,7 @@ void ThreadInfo::sample(int64_t iid, PyThreadState *tstate, microsecond_t delta) if (current_tasks.empty()) { // Print the PID and thread name - output << "P" << pid << ";T" << iid << ":" << name; + mojo_stack(pid, iid, name); // Print the stack if (native) @@ -349,13 +349,13 @@ void ThreadInfo::sample(int64_t iid, PyThreadState *tstate, microsecond_t delta) python_stack.render(output); // Print the metric - output << " " << delta << std::endl; + mojo_metric_time(delta); } else { for (auto &task_stack : current_tasks) { - output << "P" << pid << ";T" << iid << ":" << name; + mojo_stack(pid, iid, name); if (native) { @@ -367,7 +367,7 @@ void ThreadInfo::sample(int64_t iid, PyThreadState *tstate, microsecond_t delta) else task_stack->render(output); - output << " " << delta << std::endl; + mojo_metric_time(delta); } current_tasks.clear(); diff --git a/pyproject.toml b/pyproject.toml index 97cdb30..d31431d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -41,7 +41,7 @@ template = "tests" dependencies = [ "pytest>=5.4.2", "pytest-cov>=2.8.1", - "austin-python", + "austin-python~=1.7", "bytecode", ] diff --git a/tests/utils.py b/tests/utils.py index 51c2f66..1335921 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -12,28 +12,14 @@ from time import sleep import pytest -from austin.stats import AustinFileReader -from austin.stats import MetricType -from austin.stats import Sample +from austin.format.mojo import MojoFile PY = sys.version_info[:2] -class Data: - def __init__(self, file: Path) -> None: - self.source = file - self.samples = [] - - with AustinFileReader(str(file)) as afr: - for s in afr: - self.samples.append(Sample.parse(s, MetricType.TIME)[0]) - - self.metadata = afr.metadata - - class DataSummary: - def __init__(self, data: Data) -> None: + def __init__(self, data: MojoFile) -> None: self.data = data self.metadata = data.metadata @@ -44,15 +30,19 @@ def __init__(self, data: Data) -> None: for sample in data.samples: self.nsamples += 1 frames = sample.frames - v = sample.metric.value + v = sample.metrics[0].value + + if not sample.thread or not v: + continue self.total_metric += v - stacks = self.threads.setdefault(sample.thread, {}) - stack = tuple((f.function, f.line) for f in frames) + stacks = self.threads.setdefault(f"{sample.iid}:{sample.thread}", {}) + + stack = tuple((f.scope.string.value, f.line) for f in frames) stacks[stack] = stacks.get(stack, 0) + v - fstack = tuple(f.function for f in frames) + fstack = tuple(f.scope.string.value for f in frames) stacks[fstack] = stacks.get(fstack, 0) + v @property @@ -60,7 +50,12 @@ def nthreads(self): return len(self.threads) def query(self, thread_name, frames): - stacks = self.threads[thread_name] + try: + stacks = self.threads[thread_name] + except KeyError as e: + raise AssertionError( + f"Expected thread {thread_name}, found {list(self.threads.keys())}" + ) from e for stack in stacks: for i in range(0, len(stack) - len(frames) + 1): if stack[i : i + len(frames)] == frames: @@ -119,7 +114,9 @@ def run_echion(*args: str) -> CompletedProcess: raise -def run_target(target: Path, *args: str) -> t.Tuple[CompletedProcess, t.Optional[Data]]: +def run_target( + target: Path, *args: str +) -> t.Tuple[CompletedProcess, t.Optional[MojoFile]]: with TemporaryDirectory(prefix="echion") as td: output_file = Path(td) / "output.echion" @@ -132,7 +129,12 @@ def run_target(target: Path, *args: str) -> t.Tuple[CompletedProcess, t.Optional f"tests.{target}", ) - return result, (Data(output_file) if output_file.is_file() else None) + if not output_file.is_file(): + return result, None + + m = MojoFile(output_file.open(mode="rb")) + m.unwind() + return result, m def run_with_signal(target: Path, signal: int, delay: float, *args: str) -> Popen: