diff --git a/core/unix/os.c b/core/unix/os.c index 50dc8a45c56..d38cc1198cf 100644 --- a/core/unix/os.c +++ b/core/unix/os.c @@ -1388,7 +1388,49 @@ os_timeout(int time_in_milliseconds) * precise constraint, then the compiler would be able to optimize better. See * glibc comments on THREAD_SELF. */ -#ifdef X86 +#ifdef MACOS64 +# define WRITE_TLS_SLOT_IMM(imm, var) \ + IF_NOT_HAVE_TLS(ASSERT_NOT_REACHED()); \ + ASSERT(sizeof(var) == sizeof(void *)); \ + __asm__ __volatile__( \ + "mov %%gs:%1, %%" ASM_XAX " \n\t" \ + "movq %0, %c2(%%" ASM_XAX ") \n\t" \ + : \ + : "r"(var), "m"(*(void **)(DR_TLS_BASE_SLOT * sizeof(void *))), "i"(imm) \ + : "memory", ASM_XAX); + +# define READ_TLS_SLOT_IMM(imm, var) \ + IF_NOT_HAVE_TLS(ASSERT_NOT_REACHED()); \ + ASSERT(sizeof(var) == sizeof(void *)); \ + __asm__ __volatile__("mov %%gs:%1, %%" ASM_XAX " \n\t" \ + "movq %c2(%%" ASM_XAX "), %0 \n\t" \ + : "=r"(var) \ + : "m"(*(void **)(DR_TLS_BASE_SLOT * sizeof(void *))), \ + "i"(imm) \ + : ASM_XAX); + +# define WRITE_TLS_SLOT(offs, var) \ + IF_NOT_HAVE_TLS(ASSERT_NOT_REACHED()); \ + __asm__ __volatile__("mov %%gs:%0, %%" ASM_XAX " \n\t" \ + "movzwq %1, %%" ASM_XDX " \n\t" \ + "movq %2, (%%" ASM_XAX ", %%" ASM_XDX ") \n\t" \ + : \ + : "m"(*(void **)(DR_TLS_BASE_SLOT * sizeof(void *))), \ + "m"(offs), "r"(var) \ + : "memory", ASM_XAX, ASM_XDX); + +# define READ_TLS_SLOT(offs, var) \ + IF_NOT_HAVE_TLS(ASSERT_NOT_REACHED()); \ + ASSERT(sizeof(var) == sizeof(void *)); \ + __asm__ __volatile__("mov %%gs:%1, %%" ASM_XAX " \n\t" \ + "movzwq %2, %%" ASM_XDX " \n\t" \ + "movq (%%" ASM_XAX ", %%" ASM_XDX "), %0 \n\t" \ + : "=r"(var) \ + : "m"(*(void **)(DR_TLS_BASE_SLOT * sizeof(void *))), \ + "m"(offs) \ + : "memory", ASM_XAX, ASM_XDX); + +#elif defined(X86) # define WRITE_TLS_SLOT_IMM(imm, var) \ IF_NOT_HAVE_TLS(ASSERT_NOT_REACHED()); \ ASSERT(sizeof(var) == sizeof(void *)); \ @@ -1483,7 +1525,14 @@ static os_local_state_t uninit_tls; /* has .magic == 0 */ static bool is_thread_tls_initialized(void) { -#ifdef X86 +#ifdef MACOS64 + byte **tls_swap_slot; + tls_swap_slot = (byte **)get_app_tls_swap_slot_addr(); + if (tls_swap_slot == NULL || *tls_swap_slot == NULL || + *tls_swap_slot == TLS_SLOT_VAL_EXITED) + return false; + return true; +#elif defined(X86) if (INTERNAL_OPTION(safe_read_tls_init)) { /* Avoid faults during early init or during exit when we have no handler. * It's not worth extending the handler as the faults are a perf hit anyway. @@ -1586,7 +1635,7 @@ is_DR_segment_reader_entry(app_pc pc) static bool is_thread_tls_allocated(void) { -# ifdef X86 +# if defined(X86) && !defined(MACOS64) if (INTERNAL_OPTION(safe_read_tls_init)) { /* We use this routine to allow currently-native threads, for which * is_thread_tls_initialized() (and thus is_thread_initialized()) will @@ -1771,7 +1820,9 @@ d_r_set_tls(ushort tls_offs, void *value) byte * get_segment_base(uint seg) { -#ifdef X86 +#ifdef MACOS64 + return (byte *)read_thread_register(seg); +#elif defined(X86) if (seg == SEG_CS || seg == SEG_SS || seg == SEG_DS || seg == SEG_ES) return NULL; # ifdef HAVE_TLS @@ -1902,7 +1953,7 @@ static void os_tls_app_seg_init(os_local_state_t *os_tls, void *segment) { app_pc app_lib_tls_base, app_alt_tls_base; -#ifdef X86 +#if defined(X86) && !defined(MACOS64) int i, index; our_modify_ldt_t *desc; @@ -1920,7 +1971,7 @@ os_tls_app_seg_init(os_local_state_t *os_tls, void *segment) os_tls->app_alt_tls_base = is_dynamo_address(app_alt_tls_base) ? NULL : app_alt_tls_base; -#ifdef X86 +#if defined(X86) && !defined(MACOS64) /* get all TLS thread area value */ /* XXX: is get_thread_area supported in 64-bit kernel? * It has syscall number 211. @@ -1934,7 +1985,6 @@ os_tls_app_seg_init(os_local_state_t *os_tls, void *segment) tls_get_descriptor(i + index, &desc[i]); } #endif /* X86 */ - os_tls->os_seg_info.dr_tls_base = segment; os_tls->os_seg_info.priv_alt_tls_base = IF_X86_ELSE(segment, NULL); @@ -1943,8 +1993,7 @@ os_tls_app_seg_init(os_local_state_t *os_tls, void *segment) os_tls->os_seg_info.priv_lib_tls_base = IF_UNIT_TEST_ELSE( os_tls->app_lib_tls_base, privload_tls_init(os_tls->app_lib_tls_base)); } - -#ifdef X86 +#if defined(X86) && !defined(MACOSX64) LOG(THREAD_GET, LOG_THREADS, 1, "thread " TIDFMT " app lib tls reg: 0x%x, alt tls reg: 0x%x\n", d_r_get_thread_id(), os_tls->app_lib_tls_reg, os_tls->app_alt_tls_reg); @@ -2053,7 +2102,7 @@ os_tls_thread_exit(local_state_t *local_state) if (should_zero_tls_at_thread_exit()) { tls_thread_free(tls_type, index); -# if defined(X86) && defined(X64) +# if defined(X86) && defined(X64) && !defined(MACOS) if (tls_type == TLS_TYPE_ARCH_PRCTL) { /* syscall re-sets gs register so re-clear it */ if (read_thread_register(SEG_TLS) != 0) { @@ -2085,14 +2134,14 @@ void os_tls_exit(local_state_t *local_state, bool other_thread) { #ifdef HAVE_TLS -# ifdef X86 +# if defined(X86) && !defined(MACOS64) static const ptr_uint_t zero = 0; # endif /* X86 */ /* We can't read from fs: as we can be called from other threads */ /* ASSUMPTION: local_state_t is laid out at same start as local_state_extended_t */ os_local_state_t *os_tls = (os_local_state_t *)(((byte *)local_state) - offsetof(os_local_state_t, state)); -# ifdef X86 +# if defined(X86) && !defined(MACOS64) /* If the MSR is in use, writing to the reg faults. We rely on it being 0 * to indicate that. */ @@ -2131,7 +2180,7 @@ os_tls_get_gdt_index(dcontext_t *dcontext) void os_tls_pre_init(int gdt_index) { -#ifdef X86 +#if defined(X86) && !defined(MACOS64) /* Only set to above 0 for tls_type == TLS_TYPE_GDT */ if (gdt_index > 0) { /* PR 458917: clear gdt slot to avoid leak across exec */ @@ -6073,7 +6122,7 @@ os_switch_seg_to_base(dcontext_t *dcontext, os_local_state_t *os_tls, reg_id_t s ASSERT(IF_X86_ELSE((seg == SEG_FS || seg == SEG_GS), (seg == DR_REG_TPIDRURW || DR_REG_TPIDRURO))); switch (os_tls->tls_type) { -# ifdef X64 +# if defined(X64) && !defined(MACOS) case TLS_TYPE_ARCH_PRCTL: { res = tls_set_fs_gs_segment_base(os_tls->tls_type, seg, base, NULL); ASSERT(res); diff --git a/core/unix/os_exports.h b/core/unix/os_exports.h index a55af29efef..7b51fa8c4b8 100644 --- a/core/unix/os_exports.h +++ b/core/unix/os_exports.h @@ -70,7 +70,10 @@ * PR 205276 covers transparently stealing our segment selector. */ #ifdef X86 -# ifdef X64 +# if defined(MACOS64) +# define SEG_TLS SEG_FS /* XXX: no way to set on MacOS 64-bit */ +# define LIB_SEG_TLS SEG_GS /* libc+loader tls */ +# elif defined(X64) # define SEG_TLS SEG_GS # define ASM_SEG "%gs" # define LIB_SEG_TLS SEG_FS /* libc+loader tls */ @@ -112,6 +115,17 @@ # error NYI #endif +#ifdef MACOS64 +/* FIXME i#1568: current pthread_t struct has the first TLS entry at offset 28. We should + * provide a dynamic method to determine the first entry for forward compatability. + * Starting w/ libpthread-218.1.3 they now leave slots 6 and 11 unused to allow + * limited interoperability w/ code targeting the Windows x64 ABI. We steal slot 6 + * for our own use. + */ +# define DR_TLS_BASE_OFFSET 34 /* offset from pthread_t struct to slot 6 */ +# define DR_TLS_BASE_SLOT 6 /* the TLS slot for DR's TLS base */ +#endif + #ifdef AARCHXX # ifdef ANDROID /* We have our own slot at the end of our instance of Android's diff --git a/core/unix/tls.h b/core/unix/tls.h index 856b989893f..138a52891fd 100644 --- a/core/unix/tls.h +++ b/core/unix/tls.h @@ -99,7 +99,11 @@ typedef struct _our_modify_ldt_t { #define GDT_SELECTOR(idx) ((idx) << 3 | ((GDT_NOT_LDT) << 2) | (USER_PRIVILEGE)) #define SELECTOR_INDEX(sel) ((sel) >> 3) -#ifdef X86 +#ifdef MACOS64 +# define WRITE_DR_SEG(val) ASSERT_NOT_REACHED() +# define WRITE_LIB_SEG(val) ASSERT_NOT_REACHED() +# define TLS_SLOT_VAL_EXITED ((byte *)PTR_UINT_MINUS_1) +#elif defined(X86) # define WRITE_DR_SEG(val) \ do { \ ASSERT(sizeof(val) == sizeof(reg_t)); \ @@ -125,7 +129,17 @@ typedef struct _our_modify_ldt_t { static inline ptr_uint_t read_thread_register(reg_id_t reg) { -#ifdef X86 +#if defined(MACOS64) + ptr_uint_t sel; + if (reg == SEG_GS) { + asm volatile("mov %%gs:%1, %0" : "=r"(sel) : "m"(*(void **)0)); + } else if (reg == SEG_FS) { + return 0; + } else { + ASSERT_NOT_REACHED(); + return 0; + } +#elif defined(X86) uint sel; if (reg == SEG_FS) { asm volatile("movl %%fs, %0" : "=r"(sel)); @@ -242,7 +256,6 @@ typedef struct _os_local_state_t { int ldt_index; /* tid needed to ensure children are set up properly */ thread_id_t tid; - #ifdef X86 /* i#107 application's tls value and pointed-at base */ ushort app_lib_tls_reg; /* for mangling seg update/query */ @@ -278,6 +291,11 @@ byte ** get_dr_tls_base_addr(void); #endif +#ifdef MACOS64 +byte ** +get_app_tls_swap_slot_addr(void); +#endif + #ifdef X86 /* Assumes it's passed either SEG_FS or SEG_GS. * Returns POINTER_MAX on failure. diff --git a/core/unix/tls_macos.c b/core/unix/tls_macos.c index 38e93ccc4f4..3936945e87d 100644 --- a/core/unix/tls_macos.c +++ b/core/unix/tls_macos.c @@ -49,6 +49,7 @@ #endif /* From the (short) machdep syscall table */ +#define SYS_thread_set_tsd_base 3 #define SYS_thread_set_user_ldt 4 #define SYS_i386_set_ldt 5 #define SYS_i386_get_ldt 6 @@ -62,14 +63,29 @@ static uint tls_app_index; +#ifdef X64 +byte ** +get_app_tls_swap_slot_addr(void) +{ + byte **app_tls_base = (byte **)read_thread_register(TLS_REG_LIB); + if (app_tls_base == NULL) { + ASSERT_NOT_IMPLEMENTED(false); + } + return (byte **)(app_tls_base + DR_TLS_BASE_OFFSET); +} +#endif + void tls_thread_init(os_local_state_t *os_tls, byte *segment) { #ifdef X64 - /* FIXME: for 64-bit, our only option is thread_fast_set_cthread_self64 - * and sharing with the app. No way to read current base?!? - */ - ASSERT_NOT_IMPLEMENTED(false); + byte **tls_swap_slot; + ASSERT((byte *)(os_tls->self) == segment); + tls_swap_slot = get_app_tls_swap_slot_addr(); + /* we assume the swap slot is initialized as 0 */ + ASSERT_NOT_IMPLEMENTED(*tls_swap_slot == NULL); + *tls_swap_slot = segment; + os_tls->tls_type = TLS_TYPE_SLOT; #else /* SYS_thread_set_user_ldt looks appealing, as it has built-in kernel * support which swaps it on thread switches. @@ -133,10 +149,14 @@ void tls_thread_free(tls_type_t tls_type, int index) { #ifdef X64 - /* FIXME: for 64-bit, our only option is thread_fast_set_cthread_self64 - * and sharing with the app. No way to read current base?!? - */ - ASSERT_NOT_IMPLEMENTED(false); + byte **tls_swap_slot; + os_local_state_t *os_tls; + ASSERT(tls_type == TLS_TYPE_SLOT); + tls_swap_slot = get_app_tls_swap_slot_addr(); + ASSERT(tls_swap_slot != NULL); + os_tls = (os_local_state_t *)*tls_swap_slot; + ASSERT(os_tls->self == os_tls); + *tls_swap_slot = TLS_SLOT_VAL_EXITED; #else int res = dynamorio_mach_dep_syscall(SYS_thread_set_user_ldt, 3, NULL, 0, 0); if (res < 0) { diff --git a/make/configure.cmake.h b/make/configure.cmake.h index c96867b5979..d107d781bc0 100644 --- a/make/configure.cmake.h +++ b/make/configure.cmake.h @@ -1,5 +1,5 @@ /* ********************************************************** - * Copyright (c) 2011-2018 Google, Inc. All rights reserved. + * Copyright (c) 2011-2019 Google, Inc. All rights reserved. * Copyright (c) 2009-2010 VMware, Inc. All rights reserved. * **********************************************************/ @@ -58,6 +58,10 @@ #if defined(MACOS) || defined (LINUX) || defined(VMKERNEL) || defined(ANDROID) # define UNIX #endif +#if defined(MACOS) && defined (X64) +/* Used a lot due to the different TLS. We thus provide a convenience define. */ +# define MACOS64 +#endif /* set by high-level VMAP/VMSAFE/VPS configurations */ #cmakedefine PROGRAM_SHEPHERDING