From b5a3c60c32ba428af9d2c02991ef71b18f7fdf65 Mon Sep 17 00:00:00 2001
From: Laurence Tratt <laurie@tratt.net>
Date: Sat, 5 Dec 2020 09:06:21 +0000
Subject: [PATCH] Don't create a malloc'd block each time we start tracing.

When we detect a possibly closed loop in the end user's program, we need to
determine whether the current thread is the one tracing it or not. We previously
allocated a chunk of memory when we started tracing and used its address as a
temporary thread ID.

Ideally we would use the system provided thread ID to determine this, but that
turns out to be a non-portable minefield. pthreads provides the pthread_t ID
type but that's notionally opaque (on OpenBSD, as far as I can tell from a quick
search, it's really an opaque struct about which no guarantees are provided,
though some programs such as Chrome do seem to rely on it being equivalent to a
usize). Some platforms (at least Linux and OpenBSD) use the PID ID type pid_t
for threads too and that's guaranteed to be a signed integer though not it seems
of a guaranteed size (though on both platforms it's actually a C int, so 32 bits
in practise). Rust provides a ThreadID type and an unstable "as_64" function
(https://github.com/rust-lang/rust/issues/67939) but that provides no guarantees
about how much of the 64-bit integer space is used.

The challenge we have is that whatever ID we use it must fit into
(numbits(usize) - numbits(PHASE_TAGS)) i.e. we have 62 bits for the ID on a
64-bit system. If we rely on the system thread IDs it feels like we're storing
up a portability time bomb that might explode one day in the dim and distant
future. At least for now, the (minimal) extra performance we might get from that
doesn't seem worth the danger.

This commit is thus a relatively simple change. Rather than allocating a
malloc'd block every time we start tracing a thread, which means we have to be
quite careful about freeing memory, we allocate it on thread construction. This
simplifies the code slightly and (since it's an aligned address) we can feel
fairly safe that it plays nicely with PHASE_TAGS. We could, if we wanted,
allocate this block lazily the first time we trace but that feels like an
unnecessary optimisation at this point.
---
 ykrt/src/mt.rs | 42 ++++++++++++++++++++++++------------------
 1 file changed, 24 insertions(+), 18 deletions(-)

diff --git a/ykrt/src/mt.rs b/ykrt/src/mt.rs
index 76af95ad2..ce75eb927 100644
--- a/ykrt/src/mt.rs
+++ b/ykrt/src/mt.rs
@@ -1,6 +1,7 @@
 #[cfg(test)]
 use std::time::Duration;
 use std::{
+    alloc::{alloc, dealloc, Layout},
     io, mem,
     panic::{catch_unwind, resume_unwind, UnwindSafe},
     rc::Rc,
@@ -238,17 +239,15 @@ impl MTThread {
                             // count further.
                             return None;
                         }
-                        let loc_id = Box::into_raw(Box::new(0u8));
-                        let new_pack = loc_id as usize | PHASE_TRACING;
+                        let new_pack = self.inner.tid as usize | PHASE_TRACING;
                         if loc.pack.compare_and_swap(lp, new_pack, Ordering::Release) == lp {
                             Rc::get_mut(&mut self.inner).unwrap().tracer =
-                                Some((start_tracing(self.inner.tracing_kind), loc_id));
+                                Some((start_tracing(self.inner.tracing_kind), self.inner.tid));
                             return None;
-                        } else {
-                            // We raced with another thread that's also trying to trace this
-                            // Location, so free the malloc'd block.
-                            unsafe { Box::from_raw(loc_id) };
                         }
+                    // We raced with another thread that's (probably) trying to trace this
+                    // Location or (less likely) has already compiled it so we go around the
+                    // loop again to see what we should do.
                     } else {
                         let new_pack = PHASE_COUNTING | ((count + 1) << PHASE_NUM_BITS);
                         if loc.pack.compare_and_swap(lp, new_pack, Ordering::Release) == lp {
@@ -259,18 +258,17 @@ impl MTThread {
                     }
                 }
                 PHASE_TRACING => {
-                    let loc_id = if let Some((_, loc_id)) = self.inner.tracer {
+                    if let Some((_, tid)) = self.inner.tracer {
                         // This thread is tracing something...
-                        if loc_id != ((lp & !PHASE_TAG) as *mut u8) {
+                        if tid != ((lp & !PHASE_TAG) as *mut u8) {
                             // ...but we didn't start at the current Location.
                             return None;
                         }
-                        // ...and we started at this Location, so we've got a complete loop!
-                        loc_id
+                    // ...and we started at this Location, so we've got a complete loop!
                     } else {
                         // Another thread is tracing this location.
                         return None;
-                    };
+                    }
 
                     let sir_trace = Rc::get_mut(&mut self.inner)
                         .unwrap()
@@ -291,8 +289,6 @@ impl MTThread {
 
                     let new_pack = ptr | PHASE_COMPILED;
                     loc.pack.store(new_pack, Ordering::Release);
-                    // Free the small block of memory we used as a Location ID.
-                    unsafe { Box::from_raw(loc_id) };
                     Rc::get_mut(&mut self.inner).unwrap().tracer = None;
                     return None;
                 }
@@ -316,6 +312,11 @@ impl MTThread {
 /// The innards of a meta-tracer thread.
 struct MTThreadInner {
     mt: MT,
+    /// A value that uniquely identifies a thread. Since this ID needs to be ORable with PHASE_TAG,
+    /// we use a pointer to a malloc'd chunk of memory. We guarantee a) that chunk is aligned to a
+    /// machine word b) that it is a non-zero chunk of memory (and thus guaranteed to be a unique
+    /// pointer).
+    tid: *mut u8,
     hot_threshold: HotThreshold,
     #[allow(dead_code)]
     tracing_kind: TracingKind,
@@ -329,8 +330,14 @@ impl MTThreadInner {
     fn init(mt: MT) -> MTThread {
         let hot_threshold = mt.hot_threshold();
         let tracing_kind = mt.tracing_kind();
+        let tid = {
+            let layout =
+                Layout::from_size_align(mem::size_of::<usize>(), mem::size_of::<usize>()).unwrap();
+            unsafe { alloc(layout) }
+        };
         let inner = MTThreadInner {
             mt,
+            tid,
             hot_threshold,
             tracing_kind,
             tracer: None,
@@ -343,10 +350,9 @@ impl MTThreadInner {
 
 impl Drop for MTThreadInner {
     fn drop(&mut self) {
-        if let Some((_, loc_id)) = self.tracer {
-            // We were trying to trace something.
-            unsafe { Box::from_raw(loc_id) };
-        }
+        let layout =
+            Layout::from_size_align(mem::size_of::<usize>(), mem::size_of::<usize>()).unwrap();
+        unsafe { dealloc(self.tid, layout) };
     }
 }