Skip to content

Commit

Permalink
i#1568 Mac64 TLS: add initial support for TLS on MacOS 64-bit (#3542)
Browse files Browse the repository at this point in the history
For 64-bit MacOS, there is no way to set the %fs base which stops
us from using DR's scheme used on other unix platforms. This commit
provides initial support to MacOS 64-bit by stealing a TLS slot
from the app for DR's TLS base.
+ implement is_thread_tls_initialized for MacOS 64-bit
+ implement tls_thread_init and tls_thread_free
+ set MACOS64 define in cmake script
+ add WRITE_TLS_SLOT_IMM etc. for MacOS 64-bit
+ add read_thread_register for MacOS 64-bit to get pthread_t base

Issue: #1568, #1979
  • Loading branch information
derekbruening authored and Hendrik Greving committed Apr 22, 2019
1 parent 7fa5ab4 commit 4ed1b3d
Show file tree
Hide file tree
Showing 5 changed files with 132 additions and 27 deletions.
77 changes: 63 additions & 14 deletions core/unix/os.c
Original file line number Diff line number Diff line change
Expand Up @@ -1388,7 +1388,49 @@ os_timeout(int time_in_milliseconds)
* precise constraint, then the compiler would be able to optimize better. See
* glibc comments on THREAD_SELF.
*/
#ifdef X86
#ifdef MACOS64
# define WRITE_TLS_SLOT_IMM(imm, var) \
IF_NOT_HAVE_TLS(ASSERT_NOT_REACHED()); \
ASSERT(sizeof(var) == sizeof(void *)); \
__asm__ __volatile__( \
"mov %%gs:%1, %%" ASM_XAX " \n\t" \
"movq %0, %c2(%%" ASM_XAX ") \n\t" \
: \
: "r"(var), "m"(*(void **)(DR_TLS_BASE_SLOT * sizeof(void *))), "i"(imm) \
: "memory", ASM_XAX);

# define READ_TLS_SLOT_IMM(imm, var) \
IF_NOT_HAVE_TLS(ASSERT_NOT_REACHED()); \
ASSERT(sizeof(var) == sizeof(void *)); \
__asm__ __volatile__("mov %%gs:%1, %%" ASM_XAX " \n\t" \
"movq %c2(%%" ASM_XAX "), %0 \n\t" \
: "=r"(var) \
: "m"(*(void **)(DR_TLS_BASE_SLOT * sizeof(void *))), \
"i"(imm) \
: ASM_XAX);

# define WRITE_TLS_SLOT(offs, var) \
IF_NOT_HAVE_TLS(ASSERT_NOT_REACHED()); \
__asm__ __volatile__("mov %%gs:%0, %%" ASM_XAX " \n\t" \
"movzwq %1, %%" ASM_XDX " \n\t" \
"movq %2, (%%" ASM_XAX ", %%" ASM_XDX ") \n\t" \
: \
: "m"(*(void **)(DR_TLS_BASE_SLOT * sizeof(void *))), \
"m"(offs), "r"(var) \
: "memory", ASM_XAX, ASM_XDX);

# define READ_TLS_SLOT(offs, var) \
IF_NOT_HAVE_TLS(ASSERT_NOT_REACHED()); \
ASSERT(sizeof(var) == sizeof(void *)); \
__asm__ __volatile__("mov %%gs:%1, %%" ASM_XAX " \n\t" \
"movzwq %2, %%" ASM_XDX " \n\t" \
"movq (%%" ASM_XAX ", %%" ASM_XDX "), %0 \n\t" \
: "=r"(var) \
: "m"(*(void **)(DR_TLS_BASE_SLOT * sizeof(void *))), \
"m"(offs) \
: "memory", ASM_XAX, ASM_XDX);

#elif defined(X86)
# define WRITE_TLS_SLOT_IMM(imm, var) \
IF_NOT_HAVE_TLS(ASSERT_NOT_REACHED()); \
ASSERT(sizeof(var) == sizeof(void *)); \
Expand Down Expand Up @@ -1483,7 +1525,14 @@ static os_local_state_t uninit_tls; /* has .magic == 0 */
static bool
is_thread_tls_initialized(void)
{
#ifdef X86
#ifdef MACOS64
byte **tls_swap_slot;
tls_swap_slot = (byte **)get_app_tls_swap_slot_addr();
if (tls_swap_slot == NULL || *tls_swap_slot == NULL ||
*tls_swap_slot == TLS_SLOT_VAL_EXITED)
return false;
return true;
#elif defined(X86)
if (INTERNAL_OPTION(safe_read_tls_init)) {
/* Avoid faults during early init or during exit when we have no handler.
* It's not worth extending the handler as the faults are a perf hit anyway.
Expand Down Expand Up @@ -1586,7 +1635,7 @@ is_DR_segment_reader_entry(app_pc pc)
static bool
is_thread_tls_allocated(void)
{
# ifdef X86
# if defined(X86) && !defined(MACOS64)
if (INTERNAL_OPTION(safe_read_tls_init)) {
/* We use this routine to allow currently-native threads, for which
* is_thread_tls_initialized() (and thus is_thread_initialized()) will
Expand Down Expand Up @@ -1771,7 +1820,9 @@ d_r_set_tls(ushort tls_offs, void *value)
byte *
get_segment_base(uint seg)
{
#ifdef X86
#ifdef MACOS64
return (byte *)read_thread_register(seg);
#elif defined(X86)
if (seg == SEG_CS || seg == SEG_SS || seg == SEG_DS || seg == SEG_ES)
return NULL;
# ifdef HAVE_TLS
Expand Down Expand Up @@ -1902,7 +1953,7 @@ static void
os_tls_app_seg_init(os_local_state_t *os_tls, void *segment)
{
app_pc app_lib_tls_base, app_alt_tls_base;
#ifdef X86
#if defined(X86) && !defined(MACOS64)
int i, index;
our_modify_ldt_t *desc;

Expand All @@ -1920,7 +1971,7 @@ os_tls_app_seg_init(os_local_state_t *os_tls, void *segment)
os_tls->app_alt_tls_base =
is_dynamo_address(app_alt_tls_base) ? NULL : app_alt_tls_base;

#ifdef X86
#if defined(X86) && !defined(MACOS64)
/* get all TLS thread area value */
/* XXX: is get_thread_area supported in 64-bit kernel?
* It has syscall number 211.
Expand All @@ -1934,7 +1985,6 @@ os_tls_app_seg_init(os_local_state_t *os_tls, void *segment)
tls_get_descriptor(i + index, &desc[i]);
}
#endif /* X86 */

os_tls->os_seg_info.dr_tls_base = segment;
os_tls->os_seg_info.priv_alt_tls_base = IF_X86_ELSE(segment, NULL);

Expand All @@ -1943,8 +1993,7 @@ os_tls_app_seg_init(os_local_state_t *os_tls, void *segment)
os_tls->os_seg_info.priv_lib_tls_base = IF_UNIT_TEST_ELSE(
os_tls->app_lib_tls_base, privload_tls_init(os_tls->app_lib_tls_base));
}

#ifdef X86
#if defined(X86) && !defined(MACOSX64)
LOG(THREAD_GET, LOG_THREADS, 1,
"thread " TIDFMT " app lib tls reg: 0x%x, alt tls reg: 0x%x\n",
d_r_get_thread_id(), os_tls->app_lib_tls_reg, os_tls->app_alt_tls_reg);
Expand Down Expand Up @@ -2053,7 +2102,7 @@ os_tls_thread_exit(local_state_t *local_state)
if (should_zero_tls_at_thread_exit()) {
tls_thread_free(tls_type, index);

# if defined(X86) && defined(X64)
# if defined(X86) && defined(X64) && !defined(MACOS)
if (tls_type == TLS_TYPE_ARCH_PRCTL) {
/* syscall re-sets gs register so re-clear it */
if (read_thread_register(SEG_TLS) != 0) {
Expand Down Expand Up @@ -2085,14 +2134,14 @@ void
os_tls_exit(local_state_t *local_state, bool other_thread)
{
#ifdef HAVE_TLS
# ifdef X86
# if defined(X86) && !defined(MACOS64)
static const ptr_uint_t zero = 0;
# endif /* X86 */
/* We can't read from fs: as we can be called from other threads */
/* ASSUMPTION: local_state_t is laid out at same start as local_state_extended_t */
os_local_state_t *os_tls =
(os_local_state_t *)(((byte *)local_state) - offsetof(os_local_state_t, state));
# ifdef X86
# if defined(X86) && !defined(MACOS64)
/* If the MSR is in use, writing to the reg faults. We rely on it being 0
* to indicate that.
*/
Expand Down Expand Up @@ -2131,7 +2180,7 @@ os_tls_get_gdt_index(dcontext_t *dcontext)
void
os_tls_pre_init(int gdt_index)
{
#ifdef X86
#if defined(X86) && !defined(MACOS64)
/* Only set to above 0 for tls_type == TLS_TYPE_GDT */
if (gdt_index > 0) {
/* PR 458917: clear gdt slot to avoid leak across exec */
Expand Down Expand Up @@ -6073,7 +6122,7 @@ os_switch_seg_to_base(dcontext_t *dcontext, os_local_state_t *os_tls, reg_id_t s
ASSERT(IF_X86_ELSE((seg == SEG_FS || seg == SEG_GS),
(seg == DR_REG_TPIDRURW || DR_REG_TPIDRURO)));
switch (os_tls->tls_type) {
# ifdef X64
# if defined(X64) && !defined(MACOS)
case TLS_TYPE_ARCH_PRCTL: {
res = tls_set_fs_gs_segment_base(os_tls->tls_type, seg, base, NULL);
ASSERT(res);
Expand Down
16 changes: 15 additions & 1 deletion core/unix/os_exports.h
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,10 @@
* PR 205276 covers transparently stealing our segment selector.
*/
#ifdef X86
# ifdef X64
# if defined(MACOS64)
# define SEG_TLS SEG_FS /* XXX: no way to set on MacOS 64-bit */
# define LIB_SEG_TLS SEG_GS /* libc+loader tls */
# elif defined(X64)
# define SEG_TLS SEG_GS
# define ASM_SEG "%gs"
# define LIB_SEG_TLS SEG_FS /* libc+loader tls */
Expand Down Expand Up @@ -112,6 +115,17 @@
# error NYI
#endif

#ifdef MACOS64
/* FIXME i#1568: current pthread_t struct has the first TLS entry at offset 28. We should
* provide a dynamic method to determine the first entry for forward compatability.
* Starting w/ libpthread-218.1.3 they now leave slots 6 and 11 unused to allow
* limited interoperability w/ code targeting the Windows x64 ABI. We steal slot 6
* for our own use.
*/
# define DR_TLS_BASE_OFFSET 34 /* offset from pthread_t struct to slot 6 */
# define DR_TLS_BASE_SLOT 6 /* the TLS slot for DR's TLS base */
#endif

#ifdef AARCHXX
# ifdef ANDROID
/* We have our own slot at the end of our instance of Android's
Expand Down
24 changes: 21 additions & 3 deletions core/unix/tls.h
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,11 @@ typedef struct _our_modify_ldt_t {
#define GDT_SELECTOR(idx) ((idx) << 3 | ((GDT_NOT_LDT) << 2) | (USER_PRIVILEGE))
#define SELECTOR_INDEX(sel) ((sel) >> 3)

#ifdef X86
#ifdef MACOS64
# define WRITE_DR_SEG(val) ASSERT_NOT_REACHED()
# define WRITE_LIB_SEG(val) ASSERT_NOT_REACHED()
# define TLS_SLOT_VAL_EXITED ((byte *)PTR_UINT_MINUS_1)
#elif defined(X86)
# define WRITE_DR_SEG(val) \
do { \
ASSERT(sizeof(val) == sizeof(reg_t)); \
Expand All @@ -125,7 +129,17 @@ typedef struct _our_modify_ldt_t {
static inline ptr_uint_t
read_thread_register(reg_id_t reg)
{
#ifdef X86
#if defined(MACOS64)
ptr_uint_t sel;
if (reg == SEG_GS) {
asm volatile("mov %%gs:%1, %0" : "=r"(sel) : "m"(*(void **)0));
} else if (reg == SEG_FS) {
return 0;
} else {
ASSERT_NOT_REACHED();
return 0;
}
#elif defined(X86)
uint sel;
if (reg == SEG_FS) {
asm volatile("movl %%fs, %0" : "=r"(sel));
Expand Down Expand Up @@ -242,7 +256,6 @@ typedef struct _os_local_state_t {
int ldt_index;
/* tid needed to ensure children are set up properly */
thread_id_t tid;

#ifdef X86
/* i#107 application's tls value and pointed-at base */
ushort app_lib_tls_reg; /* for mangling seg update/query */
Expand Down Expand Up @@ -278,6 +291,11 @@ byte **
get_dr_tls_base_addr(void);
#endif

#ifdef MACOS64
byte **
get_app_tls_swap_slot_addr(void);
#endif

#ifdef X86
/* Assumes it's passed either SEG_FS or SEG_GS.
* Returns POINTER_MAX on failure.
Expand Down
36 changes: 28 additions & 8 deletions core/unix/tls_macos.c
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@
#endif

/* From the (short) machdep syscall table */
#define SYS_thread_set_tsd_base 3
#define SYS_thread_set_user_ldt 4
#define SYS_i386_set_ldt 5
#define SYS_i386_get_ldt 6
Expand All @@ -62,14 +63,29 @@

static uint tls_app_index;

#ifdef X64
byte **
get_app_tls_swap_slot_addr(void)
{
byte **app_tls_base = (byte **)read_thread_register(TLS_REG_LIB);
if (app_tls_base == NULL) {
ASSERT_NOT_IMPLEMENTED(false);
}
return (byte **)(app_tls_base + DR_TLS_BASE_OFFSET);
}
#endif

void
tls_thread_init(os_local_state_t *os_tls, byte *segment)
{
#ifdef X64
/* FIXME: for 64-bit, our only option is thread_fast_set_cthread_self64
* and sharing with the app. No way to read current base?!?
*/
ASSERT_NOT_IMPLEMENTED(false);
byte **tls_swap_slot;
ASSERT((byte *)(os_tls->self) == segment);
tls_swap_slot = get_app_tls_swap_slot_addr();
/* we assume the swap slot is initialized as 0 */
ASSERT_NOT_IMPLEMENTED(*tls_swap_slot == NULL);
*tls_swap_slot = segment;
os_tls->tls_type = TLS_TYPE_SLOT;
#else
/* SYS_thread_set_user_ldt looks appealing, as it has built-in kernel
* support which swaps it on thread switches.
Expand Down Expand Up @@ -133,10 +149,14 @@ void
tls_thread_free(tls_type_t tls_type, int index)
{
#ifdef X64
/* FIXME: for 64-bit, our only option is thread_fast_set_cthread_self64
* and sharing with the app. No way to read current base?!?
*/
ASSERT_NOT_IMPLEMENTED(false);
byte **tls_swap_slot;
os_local_state_t *os_tls;
ASSERT(tls_type == TLS_TYPE_SLOT);
tls_swap_slot = get_app_tls_swap_slot_addr();
ASSERT(tls_swap_slot != NULL);
os_tls = (os_local_state_t *)*tls_swap_slot;
ASSERT(os_tls->self == os_tls);
*tls_swap_slot = TLS_SLOT_VAL_EXITED;
#else
int res = dynamorio_mach_dep_syscall(SYS_thread_set_user_ldt, 3, NULL, 0, 0);
if (res < 0) {
Expand Down
6 changes: 5 additions & 1 deletion make/configure.cmake.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/* **********************************************************
* Copyright (c) 2011-2018 Google, Inc. All rights reserved.
* Copyright (c) 2011-2019 Google, Inc. All rights reserved.
* Copyright (c) 2009-2010 VMware, Inc. All rights reserved.
* **********************************************************/

Expand Down Expand Up @@ -58,6 +58,10 @@
#if defined(MACOS) || defined (LINUX) || defined(VMKERNEL) || defined(ANDROID)
# define UNIX
#endif
#if defined(MACOS) && defined (X64)
/* Used a lot due to the different TLS. We thus provide a convenience define. */
# define MACOS64
#endif

/* set by high-level VMAP/VMSAFE/VPS configurations */
#cmakedefine PROGRAM_SHEPHERDING
Expand Down

0 comments on commit 4ed1b3d

Please sign in to comment.