Skip to content

Commit

Permalink
chore: introduce a string table
Browse files Browse the repository at this point in the history
We introduce a string table to centralise the handling of strings used
for frame information. This is an intermediate step to get to
implementing support for the MOJO binary format for data compression.
  • Loading branch information
P403n1x87 committed Oct 17, 2023
1 parent e6a06c5 commit 3f6ee57
Show file tree
Hide file tree
Showing 6 changed files with 192 additions and 70 deletions.
1 change: 1 addition & 0 deletions echion/coremodule.cc
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,7 @@ _stop()
const std::lock_guard<std::mutex> guard(thread_info_map_lock);

thread_info_map.clear();
string_table.clear();
}

#if defined PL_DARWIN
Expand Down
97 changes: 50 additions & 47 deletions echion/frame.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,8 @@ class Frame
}
};

std::string filename = "";
std::string name = "";
StringTable::Key filename = 0;
StringTable::Key name = 0;

struct _location
{
Expand All @@ -68,25 +68,27 @@ class Frame

void render(std::ostream &stream)
{
stream << ";" << filename << ":" << name << ":" << location.line;
stream
<< ";" << string_table.lookup(filename)
<< ":" << string_table.lookup(name)
<< ":" << location.line;
}

void render_where(std::ostream &stream)
{
if ((filename).rfind("native@", 0) == 0)
stream << " \033[38;5;248;1m" << name
<< "\033[0m \033[38;5;246m(" << filename
if ((string_table.lookup(filename)).rfind("native@", 0) == 0)
stream << " \033[38;5;248;1m" << string_table.lookup(name)
<< "\033[0m \033[38;5;246m(" << string_table.lookup(filename)
<< "\033[0m:\033[38;5;246m" << location.line
<< ")\033[0m" << std::endl;
else
stream << " \033[33;1m" << name
<< "\033[0m (\033[36m" << filename
stream << " \033[33;1m" << string_table.lookup(name)
<< "\033[0m (\033[36m" << string_table.lookup(filename)
<< "\033[0m:\033[32m" << location.line
<< "\033[0m)" << std::endl;
}

Frame(const char *name) : name({std::string(name)}){};
Frame(std::string &name) : name(name){};
Frame(StringTable::Key name) : name(name){};

static Frame &read(PyObject *, PyObject **);
static Frame &read(PyObject *frame_addr)
Expand All @@ -95,12 +97,12 @@ class Frame
return Frame::read(frame_addr, &unused);
}

static Frame &get(PyCodeObject *code, int lasti);
static Frame &get(unw_word_t pc, const char *name, unw_word_t offset);
static Frame &get(PyObject *, std::string &);
static Frame &get(PyCodeObject *, int);
static Frame &get(unw_cursor_t &);
static Frame &get(StringTable::Key);

Frame(PyCodeObject *, int);
Frame(unw_word_t, const char *, unw_word_t);
Frame(unw_cursor_t &, unw_word_t);

private:
void infer_location(PyCodeObject *, int);
Expand All @@ -111,6 +113,9 @@ class Frame
}
};

static auto INVALID_FRAME = Frame(StringTable::INVALID);
static auto UNKNOWN_FRAME = Frame(StringTable::UNKNOWN);

#if PY_VERSION_HEX >= 0x030b0000
// ----------------------------------------------------------------------------
static inline int
Expand Down Expand Up @@ -267,50 +272,36 @@ Frame::Frame(PyCodeObject *code, int lasti)
{
try
{
filename = pyunicode_to_utf8(code->co_filename);
filename = string_table.key(code->co_filename);
#if PY_VERSION_HEX >= 0x030b0000
name = pyunicode_to_utf8(code->co_qualname);
name = string_table.key(code->co_qualname);
#else
name = pyunicode_to_utf8(code->co_name);
name = string_table.key(code->co_name);
#endif
}
catch (StringError &)
catch (StringTable::Error &)
{
throw Error();
}

infer_location(code, lasti);
}

Frame::Frame(unw_word_t pc, const char *name, unw_word_t offset)
Frame::Frame(unw_cursor_t &cursor, unw_word_t pc)
{
filename = std::string(32, '\0');
std::snprintf((char *)filename.c_str(), 32, "native@%p", (void *)pc);

// Try to demangle C++ names
char *demangled = NULL;
if (name[0] == '_' && name[1] == 'Z')
try
{
int status;
demangled = abi::__cxa_demangle(name, NULL, NULL, &status);
if (status == 0)
name = demangled;
filename = string_table.key(pc);
name = string_table.key(cursor);
}
catch (StringTable::Error &)
{
throw Error();
}

// Make a copy
this->name = std::string(name);

if (demangled != NULL)
std::free(demangled);

location.line = offset;
}

// ----------------------------------------------------------------------------

static Frame INVALID_FRAME("<invalid>");
static Frame UNKNOWN_FRAME("<unknown>");

// We make this a raw pointer to prevent its destruction on exit, since we
// control the lifetime of the cache.
static LRUCache<uintptr_t, Frame> *frame_cache = nullptr;
Expand Down Expand Up @@ -354,25 +345,37 @@ Frame &Frame::get(PyCodeObject *code_addr, int lasti)
}
}

Frame &Frame::get(unw_word_t pc, const char *name, unw_word_t offset)
Frame &Frame::get(unw_cursor_t &cursor)
{
unw_word_t pc;
unw_get_reg(&cursor, UNW_REG_IP, &pc);
if (pc == 0)
throw Error();

uintptr_t frame_key = (uintptr_t)pc;
try
{
return frame_cache->lookup(frame_key);
}
catch (LRUCache<uintptr_t, Frame>::LookupError &)
{
auto frame = std::make_unique<Frame>(pc, name, offset);
auto &f = *frame;
frame_cache->store(frame_key, std::move(frame));
return f;
try
{
auto frame = std::make_unique<Frame>(cursor, pc);
auto &f = *frame;
frame_cache->store(frame_key, std::move(frame));
return f;
}
catch (Frame::Error &)
{
return UNKNOWN_FRAME;
}
}
}

Frame &Frame::get(PyObject *origin, std::string &name)
Frame &Frame::get(StringTable::Key name)
{
uintptr_t frame_key = (uintptr_t)origin;
uintptr_t frame_key = (uintptr_t)name;
try
{
return frame_cache->lookup(frame_key);
Expand Down
17 changes: 6 additions & 11 deletions echion/stacks.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,19 +51,14 @@ void unwind_native_stack()

while (unw_step(&cursor) > 0 && native_stack.size() < MAX_FRAMES)
{
unw_word_t offset, pc;
unw_get_reg(&cursor, UNW_REG_IP, &pc);
if (pc == 0)
try
{
native_stack.push_back(Frame::get(cursor));
}
catch (Frame::Error &)
{
// TODO: Invalid stack
break;
}

char sym[256];
native_stack.push_back(
unw_get_proc_name(&cursor, sym, sizeof(sym), &offset)
? UNKNOWN_FRAME
: Frame::get(pc, sym, offset));
}
}

Expand Down Expand Up @@ -142,7 +137,7 @@ interleave_stacks(FrameStack &python_stack)
{
auto native_frame = *n;

if (native_frame.get().name.find("PyEval_EvalFrameDefault") != std::string::npos)
if (string_table.lookup(native_frame.get().name).find("PyEval_EvalFrameDefault") != std::string::npos)
{
if (p == python_stack.rend())
{
Expand Down
131 changes: 131 additions & 0 deletions echion/strings.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,3 +69,134 @@ pyunicode_to_utf8(PyObject *str_addr)

return dest;
}

// ----------------------------------------------------------------------------

class StringTable : public std::unordered_map<uintptr_t, std::string>
{
public:
using Key = uintptr_t;

class Error : public std::exception
{
};

class LookupError : public Error
{
};

static constexpr Key INVALID = 1;
static constexpr Key UNKNOWN = 2;

// Python string object
inline Key key(PyObject *s)
{
auto k = (Key)s;

if (this->find(k) == this->end())
{
// TODO: Emit MOJO string signal
try
{
#if PY_VERSION_HEX >= 0x030c0000
// The task name might hold a PyLong for deferred task name formatting.
PyLongObject l;
auto str = (!copy_type(s, l) && PyLong_CheckExact(&l))
? "Task-" + std::to_string(PyLong_AsLong((PyObject *)&l))
: pyunicode_to_utf8(s);
#else
auto str = pyunicode_to_utf8(s);
#endif
this->emplace(k, str);
}
catch (StringError &)
{
throw Error();
}
}

return k;
};

// Native filename by program counter
inline Key key(unw_word_t pc)
{
auto k = (Key)pc;

if (this->find(k) == this->end())
{
// TODO: Emit MOJO string signal
try
{
auto s = std::string(32, '\0');
std::snprintf((char *)s.c_str(), 32, "native@%p", (void *)k);
this->emplace(k, s);
}
catch (StringError &)
{
throw Error();
}
}

return k;
}

// Native scope name by unwinding cursor
inline Key key(unw_cursor_t &cursor)
{
unw_proc_info_t pi;
if ((unw_get_proc_info(&cursor, &pi)))
throw Error();

auto k = (Key)pi.start_ip;

if (this->find(k) == this->end())
{
// TODO: Emit MOJO string signal
unw_word_t offset; // Ignored. All the information is in the PC anyway.
char sym[256];
if (unw_get_proc_name(&cursor, sym, sizeof(sym), &offset))
throw Error();

char *name = sym;

// Try to demangle C++ names
char *demangled = NULL;
if (name[0] == '_' && name[1] == 'Z')
{
int status;
demangled = abi::__cxa_demangle(name, NULL, NULL, &status);
if (status == 0)
name = demangled;
}

this->emplace(k, name);

if (demangled)
std::free(demangled);
}

return k;
}

inline std::string &lookup(Key key)
{
auto it = this->find(key);
if (it == this->end())
throw LookupError();

return it->second;
};

StringTable() : std::unordered_map<uintptr_t, std::string>()
{
this->emplace(0, "");
this->emplace(INVALID, "<invalid>");
this->emplace(UNKNOWN, "<unknown>");
};
};

// We make this a reference to a heap-allocated object so that we can avoid
// the destruction on exit. We are in charge of cleaning up the object. Note
// that the object will leak, but this is not a problem.
static StringTable &string_table = *(new StringTable());
14 changes: 3 additions & 11 deletions echion/tasks.h
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ class TaskInfo

GenInfo::Ptr coro = nullptr;

std::string name;
StringTable::Key name;

// Information to reconstruct the async stack as best as we can
TaskInfo::Ptr waiter = nullptr;
Expand Down Expand Up @@ -167,17 +167,9 @@ TaskInfo::TaskInfo(TaskObj *task_addr)

try
{
#if PY_VERSION_HEX >= 0x030c0000
// The task name might hold a PyLong for deferred task name formatting.
PyLongObject name_obj;
name = (!copy_type(task.task_name, name_obj) && PyLong_CheckExact(&name_obj))
? "Task-" + std::to_string(PyLong_AsLong((PyObject *)&name_obj))
: pyunicode_to_utf8(task.task_name);
#else
name = pyunicode_to_utf8(task.task_name);
#endif
name = string_table.key(task.task_name);
}
catch (StringError &)
catch (StringTable::Error &)
{
throw Error();
}
Expand Down
2 changes: 1 addition & 1 deletion echion/threads.h
Original file line number Diff line number Diff line change
Expand Up @@ -279,7 +279,7 @@ void ThreadInfo::unwind_tasks()
}

// Add the task name frame
stack->push_back(Frame::get(task.origin, task.name));
stack->push_back(Frame::get(task.name));

// Get the next task in the chain
PyObject *task_origin = task.origin;
Expand Down

0 comments on commit 3f6ee57

Please sign in to comment.