From 3f6ee57e82f95368318527cfb199bf654b66c0f7 Mon Sep 17 00:00:00 2001 From: "Gabriele N. Tornetta" Date: Tue, 17 Oct 2023 16:23:34 +0100 Subject: [PATCH] chore: introduce a string table We introduce a string table to centralise the handling of strings used for frame information. This is an intermediate step to get to implementing support for the MOJO binary format for data compression. --- echion/coremodule.cc | 1 + echion/frame.h | 97 ++++++++++++++++---------------- echion/stacks.h | 17 ++---- echion/strings.h | 131 +++++++++++++++++++++++++++++++++++++++++++ echion/tasks.h | 14 +---- echion/threads.h | 2 +- 6 files changed, 192 insertions(+), 70 deletions(-) diff --git a/echion/coremodule.cc b/echion/coremodule.cc index 40ce2ac..8133aee 100644 --- a/echion/coremodule.cc +++ b/echion/coremodule.cc @@ -117,6 +117,7 @@ _stop() const std::lock_guard guard(thread_info_map_lock); thread_info_map.clear(); + string_table.clear(); } #if defined PL_DARWIN diff --git a/echion/frame.h b/echion/frame.h index 959eed1..3409fe6 100644 --- a/echion/frame.h +++ b/echion/frame.h @@ -52,8 +52,8 @@ class Frame } }; - std::string filename = ""; - std::string name = ""; + StringTable::Key filename = 0; + StringTable::Key name = 0; struct _location { @@ -68,25 +68,27 @@ class Frame void render(std::ostream &stream) { - stream << ";" << filename << ":" << name << ":" << location.line; + stream + << ";" << string_table.lookup(filename) + << ":" << string_table.lookup(name) + << ":" << location.line; } void render_where(std::ostream &stream) { - if ((filename).rfind("native@", 0) == 0) - stream << " \033[38;5;248;1m" << name - << "\033[0m \033[38;5;246m(" << filename + if ((string_table.lookup(filename)).rfind("native@", 0) == 0) + stream << " \033[38;5;248;1m" << string_table.lookup(name) + << "\033[0m \033[38;5;246m(" << string_table.lookup(filename) << "\033[0m:\033[38;5;246m" << location.line << ")\033[0m" << std::endl; else - stream << " \033[33;1m" << name - << "\033[0m (\033[36m" << filename + stream << " \033[33;1m" << string_table.lookup(name) + << "\033[0m (\033[36m" << string_table.lookup(filename) << "\033[0m:\033[32m" << location.line << "\033[0m)" << std::endl; } - Frame(const char *name) : name({std::string(name)}){}; - Frame(std::string &name) : name(name){}; + Frame(StringTable::Key name) : name(name){}; static Frame &read(PyObject *, PyObject **); static Frame &read(PyObject *frame_addr) @@ -95,12 +97,12 @@ class Frame return Frame::read(frame_addr, &unused); } - static Frame &get(PyCodeObject *code, int lasti); - static Frame &get(unw_word_t pc, const char *name, unw_word_t offset); - static Frame &get(PyObject *, std::string &); + static Frame &get(PyCodeObject *, int); + static Frame &get(unw_cursor_t &); + static Frame &get(StringTable::Key); Frame(PyCodeObject *, int); - Frame(unw_word_t, const char *, unw_word_t); + Frame(unw_cursor_t &, unw_word_t); private: void infer_location(PyCodeObject *, int); @@ -111,6 +113,9 @@ class Frame } }; +static auto INVALID_FRAME = Frame(StringTable::INVALID); +static auto UNKNOWN_FRAME = Frame(StringTable::UNKNOWN); + #if PY_VERSION_HEX >= 0x030b0000 // ---------------------------------------------------------------------------- static inline int @@ -267,14 +272,14 @@ Frame::Frame(PyCodeObject *code, int lasti) { try { - filename = pyunicode_to_utf8(code->co_filename); + filename = string_table.key(code->co_filename); #if PY_VERSION_HEX >= 0x030b0000 - name = pyunicode_to_utf8(code->co_qualname); + name = string_table.key(code->co_qualname); #else - name = pyunicode_to_utf8(code->co_name); + name = string_table.key(code->co_name); #endif } - catch (StringError &) + catch (StringTable::Error &) { throw Error(); } @@ -282,35 +287,21 @@ Frame::Frame(PyCodeObject *code, int lasti) infer_location(code, lasti); } -Frame::Frame(unw_word_t pc, const char *name, unw_word_t offset) +Frame::Frame(unw_cursor_t &cursor, unw_word_t pc) { - filename = std::string(32, '\0'); - std::snprintf((char *)filename.c_str(), 32, "native@%p", (void *)pc); - - // Try to demangle C++ names - char *demangled = NULL; - if (name[0] == '_' && name[1] == 'Z') + try { - int status; - demangled = abi::__cxa_demangle(name, NULL, NULL, &status); - if (status == 0) - name = demangled; + filename = string_table.key(pc); + name = string_table.key(cursor); + } + catch (StringTable::Error &) + { + throw Error(); } - - // Make a copy - this->name = std::string(name); - - if (demangled != NULL) - std::free(demangled); - - location.line = offset; } // ---------------------------------------------------------------------------- -static Frame INVALID_FRAME(""); -static Frame UNKNOWN_FRAME(""); - // We make this a raw pointer to prevent its destruction on exit, since we // control the lifetime of the cache. static LRUCache *frame_cache = nullptr; @@ -354,8 +345,13 @@ Frame &Frame::get(PyCodeObject *code_addr, int lasti) } } -Frame &Frame::get(unw_word_t pc, const char *name, unw_word_t offset) +Frame &Frame::get(unw_cursor_t &cursor) { + unw_word_t pc; + unw_get_reg(&cursor, UNW_REG_IP, &pc); + if (pc == 0) + throw Error(); + uintptr_t frame_key = (uintptr_t)pc; try { @@ -363,16 +359,23 @@ Frame &Frame::get(unw_word_t pc, const char *name, unw_word_t offset) } catch (LRUCache::LookupError &) { - auto frame = std::make_unique(pc, name, offset); - auto &f = *frame; - frame_cache->store(frame_key, std::move(frame)); - return f; + try + { + auto frame = std::make_unique(cursor, pc); + auto &f = *frame; + frame_cache->store(frame_key, std::move(frame)); + return f; + } + catch (Frame::Error &) + { + return UNKNOWN_FRAME; + } } } -Frame &Frame::get(PyObject *origin, std::string &name) +Frame &Frame::get(StringTable::Key name) { - uintptr_t frame_key = (uintptr_t)origin; + uintptr_t frame_key = (uintptr_t)name; try { return frame_cache->lookup(frame_key); diff --git a/echion/stacks.h b/echion/stacks.h index c206660..fc4bb46 100644 --- a/echion/stacks.h +++ b/echion/stacks.h @@ -51,19 +51,14 @@ void unwind_native_stack() while (unw_step(&cursor) > 0 && native_stack.size() < MAX_FRAMES) { - unw_word_t offset, pc; - unw_get_reg(&cursor, UNW_REG_IP, &pc); - if (pc == 0) + try + { + native_stack.push_back(Frame::get(cursor)); + } + catch (Frame::Error &) { - // TODO: Invalid stack break; } - - char sym[256]; - native_stack.push_back( - unw_get_proc_name(&cursor, sym, sizeof(sym), &offset) - ? UNKNOWN_FRAME - : Frame::get(pc, sym, offset)); } } @@ -142,7 +137,7 @@ interleave_stacks(FrameStack &python_stack) { auto native_frame = *n; - if (native_frame.get().name.find("PyEval_EvalFrameDefault") != std::string::npos) + if (string_table.lookup(native_frame.get().name).find("PyEval_EvalFrameDefault") != std::string::npos) { if (p == python_stack.rend()) { diff --git a/echion/strings.h b/echion/strings.h index 99c4433..2e84ac3 100644 --- a/echion/strings.h +++ b/echion/strings.h @@ -69,3 +69,134 @@ pyunicode_to_utf8(PyObject *str_addr) return dest; } + +// ---------------------------------------------------------------------------- + +class StringTable : public std::unordered_map +{ +public: + using Key = uintptr_t; + + class Error : public std::exception + { + }; + + class LookupError : public Error + { + }; + + static constexpr Key INVALID = 1; + static constexpr Key UNKNOWN = 2; + + // Python string object + inline Key key(PyObject *s) + { + auto k = (Key)s; + + if (this->find(k) == this->end()) + { + // TODO: Emit MOJO string signal + try + { +#if PY_VERSION_HEX >= 0x030c0000 + // The task name might hold a PyLong for deferred task name formatting. + PyLongObject l; + auto str = (!copy_type(s, l) && PyLong_CheckExact(&l)) + ? "Task-" + std::to_string(PyLong_AsLong((PyObject *)&l)) + : pyunicode_to_utf8(s); +#else + auto str = pyunicode_to_utf8(s); +#endif + this->emplace(k, str); + } + catch (StringError &) + { + throw Error(); + } + } + + return k; + }; + + // Native filename by program counter + inline Key key(unw_word_t pc) + { + auto k = (Key)pc; + + if (this->find(k) == this->end()) + { + // TODO: Emit MOJO string signal + try + { + auto s = std::string(32, '\0'); + std::snprintf((char *)s.c_str(), 32, "native@%p", (void *)k); + this->emplace(k, s); + } + catch (StringError &) + { + throw Error(); + } + } + + return k; + } + + // Native scope name by unwinding cursor + inline Key key(unw_cursor_t &cursor) + { + unw_proc_info_t pi; + if ((unw_get_proc_info(&cursor, &pi))) + throw Error(); + + auto k = (Key)pi.start_ip; + + if (this->find(k) == this->end()) + { + // TODO: Emit MOJO string signal + unw_word_t offset; // Ignored. All the information is in the PC anyway. + char sym[256]; + if (unw_get_proc_name(&cursor, sym, sizeof(sym), &offset)) + throw Error(); + + char *name = sym; + + // Try to demangle C++ names + char *demangled = NULL; + if (name[0] == '_' && name[1] == 'Z') + { + int status; + demangled = abi::__cxa_demangle(name, NULL, NULL, &status); + if (status == 0) + name = demangled; + } + + this->emplace(k, name); + + if (demangled) + std::free(demangled); + } + + return k; + } + + inline std::string &lookup(Key key) + { + auto it = this->find(key); + if (it == this->end()) + throw LookupError(); + + return it->second; + }; + + StringTable() : std::unordered_map() + { + this->emplace(0, ""); + this->emplace(INVALID, ""); + this->emplace(UNKNOWN, ""); + }; +}; + +// We make this a reference to a heap-allocated object so that we can avoid +// the destruction on exit. We are in charge of cleaning up the object. Note +// that the object will leak, but this is not a problem. +static StringTable &string_table = *(new StringTable()); diff --git a/echion/tasks.h b/echion/tasks.h index 0ad5247..aef9e81 100644 --- a/echion/tasks.h +++ b/echion/tasks.h @@ -133,7 +133,7 @@ class TaskInfo GenInfo::Ptr coro = nullptr; - std::string name; + StringTable::Key name; // Information to reconstruct the async stack as best as we can TaskInfo::Ptr waiter = nullptr; @@ -167,17 +167,9 @@ TaskInfo::TaskInfo(TaskObj *task_addr) try { -#if PY_VERSION_HEX >= 0x030c0000 - // The task name might hold a PyLong for deferred task name formatting. - PyLongObject name_obj; - name = (!copy_type(task.task_name, name_obj) && PyLong_CheckExact(&name_obj)) - ? "Task-" + std::to_string(PyLong_AsLong((PyObject *)&name_obj)) - : pyunicode_to_utf8(task.task_name); -#else - name = pyunicode_to_utf8(task.task_name); -#endif + name = string_table.key(task.task_name); } - catch (StringError &) + catch (StringTable::Error &) { throw Error(); } diff --git a/echion/threads.h b/echion/threads.h index 0cacc7d..1ae3a91 100644 --- a/echion/threads.h +++ b/echion/threads.h @@ -279,7 +279,7 @@ void ThreadInfo::unwind_tasks() } // Add the task name frame - stack->push_back(Frame::get(task.origin, task.name)); + stack->push_back(Frame::get(task.name)); // Get the next task in the chain PyObject *task_origin = task.origin;