Skip to content

Commit

Permalink
Allow emscripten to use the minimal ubsan runtime (emscripten-core#8617)
Browse files Browse the repository at this point in the history
This works with the wasm backend, whether generating wasm or asm.js.

Also added `emscripten_return_address` which implements the functionality of `__builtin_return_address` when running both wasm and asm.js.
  • Loading branch information
quantum5 authored and VirtualTim committed May 21, 2019
1 parent 79a1955 commit 82292b5
Show file tree
Hide file tree
Showing 17 changed files with 478 additions and 2 deletions.
1 change: 1 addition & 0 deletions AUTHORS
Original file line number Diff line number Diff line change
Expand Up @@ -403,3 +403,4 @@ a license to everyone to use it as detailed in LICENSE.)
* Simon Cooper <[email protected]>
* Amir Rasouli <[email protected]>
* Nico Weber <[email protected]>
* Guanzhong Chen <[email protected]> (copyright owned by Google, Inc.)
2 changes: 2 additions & 0 deletions ChangeLog.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ full changeset diff at the end of each section.

Current Trunk
-------------
- Add `emscripten_return_address` which implements the functionality of
gcc/clang's `__builtin_return_address`. (#8617)

v1.38.31: 04/24/2019
--------------------
Expand Down
5 changes: 5 additions & 0 deletions embuilder.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@
'libc-mt',
'pthreads',
'pthreads_stub',
'ubsan-minimal-rt-wasm',
]

for debug in ['', '_debug']:
Expand Down Expand Up @@ -312,6 +313,10 @@ def is_flag(arg):
}
''', [libname('libpthreads_stub')])
elif what == 'ubsan-minimal-rt-wasm':
if not shared.Settings.WASM_BACKEND:
continue
build(C_BARE, ['libubsan_minimal_rt_wasm.a'], ['-fsanitize=undefined', '-fsanitize-minimal-runtime', '-s', 'WASM=1'])
elif what == 'al':
build('''
#include "AL/al.h"
Expand Down
3 changes: 3 additions & 0 deletions emcc.py
Original file line number Diff line number Diff line change
Expand Up @@ -1423,6 +1423,9 @@ def check(input_file):
if options.separate_asm:
exit_with_error('cannot --separate-asm when emitting wasm, since not emitting asm.js')

if '-fsanitize-minimal-runtime' in newargs:
shared.Settings.UBSAN_RUNTIME = 1

if shared.Settings.WASM_BACKEND:
options.js_opts = None

Expand Down
30 changes: 30 additions & 0 deletions src/library.js
Original file line number Diff line number Diff line change
Expand Up @@ -4647,6 +4647,36 @@ LibraryManager.library = {
else return lengthBytesUTF8(str);
},

// Returns a representation of a call site of the caller of this function, in a manner
// similar to __builtin_return_address. If level is 0, we return the call site of the
// caller of this function.
// The exact return value depends in whether we are running WASM or JS, and whether
// the engine supports offsets into WASM. See the function body for details.
emscripten_return_address__deps: ['emscripten_get_callstack_js'],
emscripten_return_address: function(level) {
var callstack = _emscripten_get_callstack_js(0).split('\n');

// skip this function and the caller to get caller's return address
var frame = callstack[level + 2];
var match;

if (match = /\s+at.*wasm-function\[\d+\]:(0x[0-9a-f]+)/.exec(frame)) {
// some engines give the binary offset directly, so we use that as return address
return +match[1];
} else if (match = /\s+at.*wasm-function\[(\d+)\]:(\d+)/.exec(frame)) {
// other engines only give function index and offset in the function,
// so we pack these into a "return address"
return (+match[1] << 16) + +match[2];
} else if (match = /at.*:(\d+):\d+/.exec(frame)) {
// if we are in js, we can use the js line number as the "return address"
// this should work for wasm2js and fastcomp
return +match[1];
} else {
// return 0 if we can't find any
return 0;
}
},

//============================
// i64 math
//============================
Expand Down
6 changes: 6 additions & 0 deletions src/settings.js
Original file line number Diff line number Diff line change
Expand Up @@ -1394,6 +1394,12 @@ var AUTODEBUG = 0;
// wasm normally, then compile that to JS).
var WASM2JS = 0;

// Whether we should link in the runtime for ubsan.
// 0 means do not link ubsan, 1 means link minimal ubsan runtime.
// This is not meant to be used with `-s`. Instead, to use ubsan, use clang flag
// -fsanitize=undefined. To use minimal runtime, also pass `-fsanitize-minimal-runtime`.
var UBSAN_RUNTIME = 0;

// Legacy settings that have been removed or renamed.
// For renamed settings the format is:
// [OLD_NAME, NEW_NAME]
Expand Down
14 changes: 14 additions & 0 deletions system/lib/compiler-rt/lib/ubsan_minimal/readme.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
These files are from compiler-rt.

Last Changed Rev: 351636
Last Changed Date: 2019-01-19

===========================================================================

* `ubsan_minimal_handlers.cpp` -- changed to use
`emscripten_return_address` as `__builtin_return_address` is not yet
available in clang target `wasm-unknown-emscripten`.
* `sanitizer_atomic.h` -- based on `sanitizer_atomic.h`,
`sanitizer_atomic_clang.h` and `sanitizer_atomic_clang_other.h` in
upstream, combined into one to avoid pulling in and porting the entire
`sanitizer_common` directory from upstream.
155 changes: 155 additions & 0 deletions system/lib/compiler-rt/lib/ubsan_minimal/sanitizer_atomic.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
/**
* This file is based on sanitizer_atomic.h, sanitizer_atomic_clang.h, and
* sanitizer_atomic_clang_other.h from compiler-rt.
* Last changed revision: 351636.
* Last changed date: 2019-01-19.
*
* The files are combined into one to avoid depending on many unneeded headers
* in compiler-rt/lib/sanitizer_common.
*/

//===-- sanitizer_atomic.h --------------------------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// This file is a part of ThreadSanitizer/AddressSanitizer runtime.
//
//===----------------------------------------------------------------------===//

#ifndef SANITIZER_ATOMIC_H
#define SANITIZER_ATOMIC_H

#include <cinttypes>

#ifndef INLINE
#define INLINE inline
#endif
#define ALIGNED(x) __attribute__((aligned(x)))
#define DCHECK(a)

namespace __sanitizer {

enum memory_order {
memory_order_relaxed = 1 << 0,
memory_order_seq_cst = 1 << 5
};

struct atomic_uint8_t {
typedef std::uint8_t Type;
volatile Type val_dont_use;
};

struct atomic_uint16_t {
typedef std::uint16_t Type;
volatile Type val_dont_use;
};

struct atomic_sint32_t {
typedef std::int32_t Type;
volatile Type val_dont_use;
};

struct atomic_uint32_t {
typedef std::uint32_t Type;
volatile Type val_dont_use;
};

struct atomic_uint64_t {
typedef std::uint64_t Type;
// On 32-bit platforms u64 is not necessary aligned on 8 bytes.
volatile ALIGNED(8) Type val_dont_use;
};

struct atomic_uintptr_t {
typedef std::uintptr_t Type;
volatile Type val_dont_use;
};

typedef std::uintptr_t uptr;

template<typename T>
INLINE typename T::Type atomic_load(
const volatile T *a, memory_order mo) {
DCHECK(mo & memory_order_relaxed);
DCHECK(!((uptr)a % sizeof(*a)));
typename T::Type v;

if (sizeof(*a) < 8 || sizeof(void*) == 8) {
// Assume that aligned loads are atomic.
v = a->val_dont_use;
} else {
// 64-bit load on 32-bit platform.
// Gross, but simple and reliable.
// Assume that it is not in read-only memory.
v = __sync_fetch_and_add(
const_cast<typename T::Type volatile *>(&a->val_dont_use), 0);
}
return v;
}

template<typename T>
INLINE void atomic_store(volatile T *a, typename T::Type v, memory_order mo) {
DCHECK(mo & memory_order_relaxed);
DCHECK(!((uptr)a % sizeof(*a)));

if (sizeof(*a) < 8 || sizeof(void*) == 8) {
// Assume that aligned loads are atomic.
a->val_dont_use = v;
} else {
// 64-bit store on 32-bit platform.
// Gross, but simple and reliable.
typename T::Type cmp = a->val_dont_use;
typename T::Type cur;
for (;;) {
cur = __sync_val_compare_and_swap(&a->val_dont_use, cmp, v);
if (cur == cmp || cur == v)
break;
cmp = cur;
}
}
}

// We would like to just use compiler builtin atomic operations
// for loads and stores, but they are mostly broken in clang:
// - they lead to vastly inefficient code generation
// (http://llvm.org/bugs/show_bug.cgi?id=17281)
// - 64-bit atomic operations are not implemented on x86_32
// (http://llvm.org/bugs/show_bug.cgi?id=15034)
// - they are not implemented on ARM
// error: undefined reference to '__atomic_load_4'

// See http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
// for mappings of the memory model to different processors.

template <typename T>
INLINE bool atomic_compare_exchange_strong(volatile T *a, typename T::Type *cmp,
typename T::Type xchg,
memory_order mo) {
typedef typename T::Type Type;
Type cmpv = *cmp;
Type prev;
prev = __sync_val_compare_and_swap(&a->val_dont_use, cmpv, xchg);
if (prev == cmpv) return true;
*cmp = prev;
return false;
}

// Clutter-reducing helpers.

template<typename T>
INLINE typename T::Type atomic_load_relaxed(const volatile T *a) {
return atomic_load(a, memory_order_relaxed);
}

template<typename T>
INLINE void atomic_store_relaxed(volatile T *a, typename T::Type v) {
atomic_store(a, v, memory_order_relaxed);
}

} // namespace __sanitizer

#endif // SANITIZER_ATOMIC_H
130 changes: 130 additions & 0 deletions system/lib/compiler-rt/lib/ubsan_minimal/ubsan_minimal_handlers.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
/**
* Copied from compiler-rt.
* Last changed revision: 351178
* Last changed date: 2019-01-15.
*
* Changes:
* * switched to using emscripten_return_address instead of
__builtin_return_address. clang currently rejects the latter on wasm.
*/
#include "sanitizer_atomic.h"

#include <stdlib.h>
#include <stdint.h>
#include <string.h>
#include <unistd.h>

#ifdef KERNEL_USE
extern "C" void ubsan_message(const char *msg);
static void message(const char *msg) { ubsan_message(msg); }
#else
static void message(const char *msg) {
write(2, msg, strlen(msg));
}
#endif

static const int kMaxCallerPcs = 20;
static __sanitizer::atomic_uintptr_t caller_pcs[kMaxCallerPcs];
// Number of elements in caller_pcs. A special value of kMaxCallerPcs + 1 means
// that "too many errors" has already been reported.
static __sanitizer::atomic_uint32_t caller_pcs_sz;

__attribute__((noinline)) static bool report_this_error(void *caller_p) {
uintptr_t caller = reinterpret_cast<uintptr_t>(caller_p);
if (caller == 0) return false;
while (true) {
unsigned sz = __sanitizer::atomic_load_relaxed(&caller_pcs_sz);
if (sz > kMaxCallerPcs) return false; // early exit
// when sz==kMaxCallerPcs print "too many errors", but only when cmpxchg
// succeeds in order to not print it multiple times.
if (sz > 0 && sz < kMaxCallerPcs) {
uintptr_t p;
for (unsigned i = 0; i < sz; ++i) {
p = __sanitizer::atomic_load_relaxed(&caller_pcs[i]);
if (p == 0) break; // Concurrent update.
if (p == caller) return false;
}
if (p == 0) continue; // FIXME: yield?
}

if (!__sanitizer::atomic_compare_exchange_strong(
&caller_pcs_sz, &sz, sz + 1, __sanitizer::memory_order_seq_cst))
continue; // Concurrent update! Try again from the start.

if (sz == kMaxCallerPcs) {
message("ubsan: too many errors\n");
return false;
}
__sanitizer::atomic_store_relaxed(&caller_pcs[sz], caller);
return true;
}
}

#if defined(__ANDROID__)
extern "C" __attribute__((weak)) void android_set_abort_message(const char *);
static void abort_with_message(const char *msg) {
if (&android_set_abort_message) android_set_abort_message(msg);
abort();
}
#else
static void abort_with_message(const char *) { abort(); }
#endif

#if SANITIZER_DEBUG
namespace __sanitizer {
// The DCHECK macro needs this symbol to be defined.
void NORETURN CheckFailed(const char *file, int, const char *cond, u64, u64) {
message("Sanitizer CHECK failed: ");
message(file);
message(":?? : "); // FIXME: Show line number.
message(cond);
abort();
}
} // namespace __sanitizer
#endif

extern "C" void *emscripten_return_address(int level);

#define INTERFACE extern "C" __attribute__((visibility("default")))

// FIXME: add caller pc to the error message (possibly as "ubsan: error-type
// @1234ABCD").
#define HANDLER_RECOVER(name, msg) \
INTERFACE void __ubsan_handle_##name##_minimal() { \
if (!report_this_error(emscripten_return_address(0))) return; \
message("ubsan: " msg "\n"); \
}

#define HANDLER_NORECOVER(name, msg) \
INTERFACE void __ubsan_handle_##name##_minimal_abort() { \
message("ubsan: " msg "\n"); \
abort_with_message("ubsan: " msg); \
}

#define HANDLER(name, msg) \
HANDLER_RECOVER(name, msg) \
HANDLER_NORECOVER(name, msg)

HANDLER(type_mismatch, "type-mismatch")
HANDLER(alignment_assumption, "alignment-assumption")
HANDLER(add_overflow, "add-overflow")
HANDLER(sub_overflow, "sub-overflow")
HANDLER(mul_overflow, "mul-overflow")
HANDLER(negate_overflow, "negate-overflow")
HANDLER(divrem_overflow, "divrem-overflow")
HANDLER(shift_out_of_bounds, "shift-out-of-bounds")
HANDLER(out_of_bounds, "out-of-bounds")
HANDLER_RECOVER(builtin_unreachable, "builtin-unreachable")
HANDLER_RECOVER(missing_return, "missing-return")
HANDLER(vla_bound_not_positive, "vla-bound-not-positive")
HANDLER(float_cast_overflow, "float-cast-overflow")
HANDLER(load_invalid_value, "load-invalid-value")
HANDLER(invalid_builtin, "invalid-builtin")
HANDLER(function_type_mismatch, "function-type-mismatch")
HANDLER(implicit_conversion, "implicit-conversion")
HANDLER(nonnull_arg, "nonnull-arg")
HANDLER(nonnull_return, "nonnull-return")
HANDLER(nullability_arg, "nullability-arg")
HANDLER(nullability_return, "nullability-return")
HANDLER(pointer_overflow, "pointer-overflow")
HANDLER(cfi_check_fail, "cfi-check-fail")
Loading

0 comments on commit 82292b5

Please sign in to comment.