From 851a961e2d0bedb9254025ba9599dacdf37b468b Mon Sep 17 00:00:00 2001 From: Hendrik Greving Date: Tue, 2 Apr 2019 20:11:38 -0700 Subject: [PATCH 1/5] i#1312 AVX-512 support: Add get_opmask_caller_saved() function. (#3499) Adds function get_opmask_caller_saved() that saves 16-bit AVX-512 OpMask registers to a buffer. It still lacks support for AVX512BW, which will need to save 64-bit registers. Adds a test for get_opmask_caller_saved(). Adds the define MCXT_NUM_OPMASK_SLOTS that will also be used for a future structure in DynamoRIO's mcontext. Adds a missing release note that dr_zmm_t was added and adds release notes for this patch. Issue: #1312 --- api/docs/release.dox | 3 ++ core/arch/arch.h | 2 ++ core/arch/arch_exports.h | 1 + core/arch/x86/x86.asm | 32 ++++++++++++++++++ core/arch/x86_code_test.c | 68 +++++++++++++++++++++++++++++++++++++++ core/lib/globals_shared.h | 10 +++++- 6 files changed, 115 insertions(+), 1 deletion(-) diff --git a/api/docs/release.dox b/api/docs/release.dox index 21c16a85dea..4f60d192187 100644 --- a/api/docs/release.dox +++ b/api/docs/release.dox @@ -145,6 +145,9 @@ compatibility changes: expression and code relying on this needs to be rewritten. DynamoRIO_NUM_SIMD_SLOTS_COMPATIBILITY is set automatically if clients target version 7.1.0 or earlier. + - Added the type dr_zmm_t. + - Added the type dr_opmask_t. + - Added the define #MCXT_NUM_OPMASK_SLOTS for the number of AVX-512 OpMask registers. Further non-compatibility-affecting changes include: diff --git a/core/arch/arch.h b/core/arch/arch.h index 254c8d2b406..1c0447fa9e6 100644 --- a/core/arch/arch.h +++ b/core/arch/arch.h @@ -1329,6 +1329,8 @@ void get_ymm_caller_saved(dr_zmm_t *ymm_caller_saved_buf); void get_zmm_caller_saved(dr_zmm_t *zmm_caller_saved_buf); +void +get_opmask_caller_saved(dr_opmask_t *opmask_caller_saved_buf); /* in encode.c */ byte * diff --git a/core/arch/arch_exports.h b/core/arch/arch_exports.h index ff93d327d21..259800f7915 100644 --- a/core/arch/arch_exports.h +++ b/core/arch/arch_exports.h @@ -59,6 +59,7 @@ # define XMM_REG_SIZE 16 # define YMM_REG_SIZE 32 # define ZMM_REG_SIZE 64 +# define OPMASK_REG_SIZE 8 # define MCXT_SIMD_SLOT_SIZE ZMM_REG_SIZE # define MCXT_TOTAL_SIMD_SLOTS_SIZE (MCXT_NUM_SIMD_SLOTS * MCXT_SIMD_SLOT_SIZE) /* Indicates OS support, not just processor support (xref i#1278) */ diff --git a/core/arch/x86/x86.asm b/core/arch/x86/x86.asm index e866ed08604..7e673cb1e82 100644 --- a/core/arch/x86/x86.asm +++ b/core/arch/x86/x86.asm @@ -2371,6 +2371,38 @@ GLOBAL_LABEL(get_zmm_caller_saved:) ret END_FUNC(get_zmm_caller_saved) +/* void get_opmask_caller_saved(byte *opmask_caller_saved_buf) + * stores the values of k0 through k7 consecutively in 8 byte slots each into + * opmask_caller_saved_buf. opmask_caller_saved_buf need not be 8-byte aligned. + * The caller must ensure that the underlying processor supports AVX-512! + * XXX i#1312: Eventually this routine must dynamically switch the instructions + * used dependent on whether AVX512BW is enabled or not (2 bytes vs. 8 bytes + * OpMask registers). + */ + DECLARE_FUNC(get_opmask_caller_saved) +GLOBAL_LABEL(get_opmask_caller_saved:) + mov REG_XAX, ARG1 + /* + * c5 f8 91 00 kmovw %k0,(%rax) + * c5 f8 91 48 08 kmovw %k1,0x8(%rax) + * c5 f8 91 50 10 kmovw %k2,0x10(%rax) + * c5 f8 91 58 18 kmovw %k3,0x18(%rax) + * c5 f8 91 60 20 kmovw %k4,0x20(%rax) + * c5 f8 91 68 28 kmovw %k5,0x28(%rax) + * c5 f8 91 70 30 kmovw %k6,0x30(%rax) + * c5 f8 91 78 38 kmovw %k7,0x38(%rax) + */ + RAW(c5) RAW(f8) RAW(91) RAW(00) + RAW(c5) RAW(f8) RAW(91) RAW(48) RAW(08) + RAW(c5) RAW(f8) RAW(91) RAW(50) RAW(10) + RAW(c5) RAW(f8) RAW(91) RAW(58) RAW(18) + RAW(c5) RAW(f8) RAW(91) RAW(60) RAW(20) + RAW(c5) RAW(f8) RAW(91) RAW(68) RAW(28) + RAW(c5) RAW(f8) RAW(91) RAW(70) RAW(30) + RAW(c5) RAW(f8) RAW(91) RAW(78) RAW(38) + ret + END_FUNC(get_opmask_caller_saved) + /* void hashlookup_null_handler(void) * PR 305731: if the app targets NULL, it ends up here, which indirects * through hashlookup_null_target to end up in an ibl miss routine. diff --git a/core/arch/x86_code_test.c b/core/arch/x86_code_test.c index 3dff9b5cbfc..a383618a788 100644 --- a/core/arch/x86_code_test.c +++ b/core/arch/x86_code_test.c @@ -324,6 +324,73 @@ unit_test_get_zmm_caller_saved() 0); } +static void +unit_test_get_opmask_caller_saved() +{ + /* While DynamoRIO's dr_opmask_t type is 8 bytes, the actual machine register is + * really only 8 bytes if the processor and OS support AVX512BW. Otherwise it is + * 2 Bytes. + */ + dr_opmask_t ref_buffer[MCXT_NUM_OPMASK_SLOTS]; + dr_opmask_t get_buffer[MCXT_NUM_OPMASK_SLOTS]; + ASSERT(sizeof(dr_opmask_t) == OPMASK_REG_SIZE); + uint base = 0x0000348e; + +# ifdef __AVX512BW__ + /* i#1312: Modern AVX-512 machines support AVX512BW which extends the OpMask registers + * to 8 bytes. The right compile flags must then to be used to compile this test, and + * the type will be __mmask64. Also DynamoRIO's get_opmask_caller_saved has to + * dynamically switch dependent on a proc_ flag indicating AVX512BW is enabled. + */ +# error "Unimplemented. Should test using __mmask64 instructions." +# else + ASSERT(MCXT_NUM_OPMASK_SLOTS == 8); + register __mmask16 k0 asm("k0"); + register __mmask16 k1 asm("k1"); + register __mmask16 k2 asm("k2"); + register __mmask16 k3 asm("k3"); + register __mmask16 k4 asm("k4"); + register __mmask16 k5 asm("k5"); + register __mmask16 k6 asm("k6"); + register __mmask16 k7 asm("k7"); +# endif + + for (int regno = 0; regno < MCXT_NUM_OPMASK_SLOTS; ++regno) { + get_buffer[regno] = 0; + ref_buffer[regno] = base++; + } + +# define MAKE_OPMASK_REG(num) k##num +# define MOVE_TO_OPMASK(buf, num) \ + asm volatile("kmovw %1, %0" : "=k"(MAKE_OPMASK_REG(num)) : "m"(buf[num]) :); + + MOVE_TO_OPMASK(ref_buffer, 0) + MOVE_TO_OPMASK(ref_buffer, 1) + MOVE_TO_OPMASK(ref_buffer, 2) + MOVE_TO_OPMASK(ref_buffer, 3) + MOVE_TO_OPMASK(ref_buffer, 4) + MOVE_TO_OPMASK(ref_buffer, 5) + MOVE_TO_OPMASK(ref_buffer, 6) + MOVE_TO_OPMASK(ref_buffer, 7) + + get_opmask_caller_saved(get_buffer); + + /* Barrier, as described in unit_test_get_zmm_caller_saved. */ + asm volatile("" ::: "k0", "k1", "k2", "k3", "k4", "k5", "k6", "k7"); + + for (int regno = 0; regno < MCXT_NUM_OPMASK_SLOTS; ++regno) { + print_file(STDERR, "K%d ref\n:", regno); + dump_buffer_as_bytes(STDERR, &ref_buffer[regno], sizeof(ref_buffer[regno]), + DUMP_RAW | DUMP_DWORD); + print_file(STDERR, "\nK%d get\n:", regno); + dump_buffer_as_bytes(STDERR, &get_buffer[regno], sizeof(get_buffer[regno]), + DUMP_RAW | DUMP_DWORD); + print_file(STDERR, "\n"); + } + EXPECT(memcmp(ref_buffer, get_buffer, MCXT_NUM_OPMASK_SLOTS * sizeof(dr_opmask_t)), + 0); +} + # endif void @@ -338,6 +405,7 @@ unit_test_asm(dcontext_t *dc) # endif # ifdef __AVX512F__ unit_test_get_zmm_caller_saved(); + unit_test_get_opmask_caller_saved(); # endif # endif } diff --git a/core/lib/globals_shared.h b/core/lib/globals_shared.h index 927864969d6..afecb3b965b 100644 --- a/core/lib/globals_shared.h +++ b/core/lib/globals_shared.h @@ -1838,6 +1838,11 @@ typedef union _dr_zmm_t { reg_t reg[IF_X64_ELSE(8, 16)]; /**< Representation as 8 or 16 registers. */ } dr_zmm_t; +/* OpMask (k-)register. The register may be only 16 bits on systems w/o AVX512BW, but + * can be up to MAX_KL = 64 bits. + */ +typedef uint64 dr_opmask_t; + #if defined(AARCHXX) /** * 128-bit ARM SIMD Vn register. @@ -1873,6 +1878,7 @@ typedef union _dr_simd_t { # define PRE_SIMD_PADDING \ 0 /**< Bytes of padding before xmm/ymm dr_mcontext_t slots \ */ +# define MCXT_NUM_OPMASK_SLOTS 0 /**< No simd masks */ #elif defined(X86) @@ -1906,7 +1912,9 @@ typedef union _dr_simd_t { # define PRE_XMM_PADDING \ 24 /**< Bytes of padding before xmm/ymm dr_mcontext_t slots */ # endif - +# define MCXT_NUM_OPMASK_SLOTS \ + 8 /**< Number of 16-64-bit OpMask Kn slots in dr_mcontext_t \ + */ #else # error NYI #endif /* AARCHXX/X86 */ From 2950d360e0d5b5ab7964a625812e6d8ff65233b8 Mon Sep 17 00:00:00 2001 From: Hendrik Greving Date: Wed, 3 Apr 2019 08:10:03 -0700 Subject: [PATCH 2/5] i#3479: Add a limited selection of tests to release-external-64. (#3493) Adds a small subset of tests to release-external-64 to run on Travis. Can be manually run with ctest -L RUN_IN_RELEASE. Fixes #3479 --- suite/runsuite.cmake | 11 ++++++++-- suite/tests/CMakeLists.txt | 44 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 53 insertions(+), 2 deletions(-) diff --git a/suite/runsuite.cmake b/suite/runsuite.cmake index ba33eafd9ff..f386a402d2a 100644 --- a/suite/runsuite.cmake +++ b/suite/runsuite.cmake @@ -1,5 +1,5 @@ # ********************************************************** -# Copyright (c) 2010-2018 Google, Inc. All rights reserved. +# Copyright (c) 2010-2019 Google, Inc. All rights reserved. # Copyright (c) 2009-2010 VMware, Inc. All rights reserved. # ********************************************************** @@ -262,7 +262,9 @@ endif () # for short suite, don't build tests for builds that don't run tests # (since building takes forever on windows): so we only turn -# on BUILD_TESTS for TEST_LONG or debug-internal-{32,64} +# on BUILD_TESTS for TEST_LONG or debug-internal-{32,64}. BUILD_TESTS is +# also turned on for release-external-64, but ctest will run with label +# RUN_IN_RELEASE. if (NOT cross_aarchxx_linux_only AND NOT cross_android_only) # For cross-arch execve test we need to "make install" @@ -305,12 +307,16 @@ if (NOT cross_aarchxx_linux_only AND NOT cross_android_only) else () set(32bit_path "") endif () + set(orig_extra_ctest_args extra_ctest_args) + set(extra_ctest_args INCLUDE_LABEL RUN_IN_RELEASE) testbuild_ex("release-external-64" ON " DEBUG:BOOL=OFF INTERNAL:BOOL=OFF + BUILD_TESTS:BOOL=ON ${install_path_cache} ${32bit_path} " OFF ${arg_package} "${install_build_args}") + set(extra_ctest_args orig_extra_ctest_args) if (DO_ALL_BUILDS) # we rarely use internal release builds but keep them working in long # suite (not much burden) in case we need to tweak internal options @@ -424,6 +430,7 @@ if (UNIX AND ARCH_IS_X86) INTERNAL:BOOL=OFF CMAKE_TOOLCHAIN_FILE:PATH=${CTEST_SOURCE_DIRECTORY}/make/toolchain-arm64.cmake " OFF ${arg_package} "") + set(run_tests ${prev_run_tests}) set(optional_cross_compile ${prev_optional_cross_compile}) diff --git a/suite/tests/CMakeLists.txt b/suite/tests/CMakeLists.txt index 21c629bf616..8a7fe73dc57 100644 --- a/suite/tests/CMakeLists.txt +++ b/suite/tests/CMakeLists.txt @@ -3748,6 +3748,50 @@ foreach (run ${run_list}) endif (enabled) endforeach(run) +# A subset of release tests has been added and labeled with RUN_IN_RELEASE. It focuses on +# Google's use of the drcachesim client. We do not want to extend the Travis regression +# time by too much and keep the following list concise. +if (NOT DEBUG AND UNIX AND X64 AND NOT CMAKE_COMPILER_IS_CLANG) + set_tests_properties( + samples_proj + code_api|common.eflags + code_api|tool.drcachesim.simple + code_api|tool.drcachesim.simple-config-file + code_api|tool.drcachesim.TLB-simple + code_api|tool.drcachesim.missfile + code_api|tool.drcachesim.missfile-config-file + code_api|tool.drcachesim.phys + code_api|tool.drcachesim.filter-simple + code_api|tool.drcachesim.filter-no-i + code_api|tool.drcachesim.filter-no-d + code_api|tool.drcachesim.delay-simple + code_api|tool.drcachesim.warmup-valid + code_api|tool.drcachesim.warmup-zeros + code_api|tool.drcachesim.threads + code_api|tool.drcachesim.threads-with-config-file + code_api|tool.drcachesim.TLB-threads + code_api|tool.drcachesim.multiproc + code_api|tool.drcachesim.miss_analyzer + code_api|tool.drcacheoff.simple + code_api|tool.drcacheoff.multiproc + code_api|tool.drcacheoff.filter + code_api|tool.drcacheoff.basic_counts + code_api|tool.drcacheoff.opcode_mix + code_api|tool.drcacheoff.view + code_api|tool.drcacheoff.burst_static + code_api|tool.drcacheoff.burst_replace + code_api|tool.drcacheoff.burst_replaceall + code_api|tool.drcacheoff.burst_noreach + code_api|tool.drcacheoff.burst_maps + code_api|tool.drcacheoff.burst_threads + code_api|tool.drcacheoff.burst_malloc + code_api|tool.drcacheoff.burst_threadfilter + code_api|tool.drcacheoff.burst_threadL0filter + code_api|tool.drcacheoff.burst_client + code_api|tool.drcacheoff.legacy + PROPERTIES LABELS RUN_IN_RELEASE) +endif () + if (APPLE) # Enable just the quarter of the tests that pass, to get our test suite started # on the bot without perturbing all the code above and without disabling tests From 87663b9e74b0ebf032c558eaa6844022c286054e Mon Sep 17 00:00:00 2001 From: Hendrik Greving Date: Wed, 3 Apr 2019 09:29:36 -0700 Subject: [PATCH 3/5] i#1312 AVX-512 support: Fix API comment and doxygen links. (#3501) Amends commit 851a961 with doxygen links and fixes a documentation bug for MCXT_NUM_OPMASK_SLOTS. Issue: #1312 --- api/docs/release.dox | 6 +++--- core/lib/globals_shared.h | 17 ++++++++++++----- 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/api/docs/release.dox b/api/docs/release.dox index 4f60d192187..00164ba068e 100644 --- a/api/docs/release.dox +++ b/api/docs/release.dox @@ -145,9 +145,6 @@ compatibility changes: expression and code relying on this needs to be rewritten. DynamoRIO_NUM_SIMD_SLOTS_COMPATIBILITY is set automatically if clients target version 7.1.0 or earlier. - - Added the type dr_zmm_t. - - Added the type dr_opmask_t. - - Added the define #MCXT_NUM_OPMASK_SLOTS for the number of AVX-512 OpMask registers. Further non-compatibility-affecting changes include: @@ -156,6 +153,9 @@ Further non-compatibility-affecting changes include: - Added new fields to #dr_os_version_info_t which contain the build number, edition, and Windows 10 release identifier. - Added the function instr_is_xsave(). + - Added the type #dr_zmm_t. + - Added the type #dr_opmask_t. + - Added the define #MCXT_NUM_OPMASK_SLOTS for the number of AVX-512 OpMask registers. **************************************************
diff --git a/core/lib/globals_shared.h b/core/lib/globals_shared.h index afecb3b965b..3f1adf2077a 100644 --- a/core/lib/globals_shared.h +++ b/core/lib/globals_shared.h @@ -1838,9 +1838,12 @@ typedef union _dr_zmm_t { reg_t reg[IF_X64_ELSE(8, 16)]; /**< Representation as 8 or 16 registers. */ } dr_zmm_t; -/* OpMask (k-)register. The register may be only 16 bits on systems w/o AVX512BW, but - * can be up to MAX_KL = 64 bits. +/** AVX-512 OpMask (k-)register. */ +#ifdef AVOID_API_EXPORT +/* The register may be only 16 bits wide on systems without AVX512BW, but can be up to + * MAX_KL = 64 bits. */ +#endif typedef uint64 dr_opmask_t; #if defined(AARCHXX) @@ -1878,7 +1881,10 @@ typedef union _dr_simd_t { # define PRE_SIMD_PADDING \ 0 /**< Bytes of padding before xmm/ymm dr_mcontext_t slots \ */ -# define MCXT_NUM_OPMASK_SLOTS 0 /**< No simd masks */ +# define MCXT_NUM_OPMASK_SLOTS \ + 0 /**< Number of 16-64-bit OpMask Kn slots in dr_mcontext_t, \ + * if architecture supports. \ + */ #elif defined(X86) @@ -1912,8 +1918,9 @@ typedef union _dr_simd_t { # define PRE_XMM_PADDING \ 24 /**< Bytes of padding before xmm/ymm dr_mcontext_t slots */ # endif -# define MCXT_NUM_OPMASK_SLOTS \ - 8 /**< Number of 16-64-bit OpMask Kn slots in dr_mcontext_t \ +# define MCXT_NUM_OPMASK_SLOTS \ + 8 /**< Number of 16-64-bit OpMask Kn slots in dr_mcontext_t, \ + * if architecture supports. \ */ #else # error NYI From f2cf681551de24f3a3d1c8ff6b716d9bc730867d Mon Sep 17 00:00:00 2001 From: Derek Bruening Date: Wed, 3 Apr 2019 14:33:43 -0400 Subject: [PATCH 4/5] i#3314: Free drmodtrack user memory on parsing errors (#3500) Adds freeing of drmodtrack custom user allocations on parsing errors. Previously, we would leak the memory. Adds a test to drmodtrack-test. Confirmed that without this fix the test fails. Fixes #3314 --- ext/drcovlib/modules.c | 10 +- .../client-interface/drmodtrack-test.dll.cpp | 94 +++++++++++++++++-- 2 files changed, 96 insertions(+), 8 deletions(-) diff --git a/ext/drcovlib/modules.c b/ext/drcovlib/modules.c index df20199cc37..286277ae639 100644 --- a/ext/drcovlib/modules.c +++ b/ext/drcovlib/modules.c @@ -1,5 +1,5 @@ /* *************************************************************************** - * Copyright (c) 2012-2017 Google, Inc. All rights reserved. + * Copyright (c) 2012-2019 Google, Inc. All rights reserved. * ***************************************************************************/ /* @@ -595,7 +595,7 @@ drmodtrack_offline_read(file_t file, const char *map, OUT const char **next_line OUT void **handle, OUT uint *num_mods) { module_read_info_t *info = NULL; - uint i; + uint i, mods_parsed = 0; uint64 file_size; size_t map_size = 0; const char *buf, *map_start; @@ -644,6 +644,7 @@ drmodtrack_offline_read(file_t file, const char *map, OUT const char **next_line info->mod = (module_read_entry_t *)dr_global_alloc(*num_mods * sizeof(*info->mod)); /* module lists */ + mods_parsed = 0; for (i = 0; i < *num_mods; i++) { uint mod_id; if (version == 1) { @@ -692,6 +693,7 @@ drmodtrack_offline_read(file_t file, const char *map, OUT const char **next_line buf = module_parse_cb(buf, &info->mod[i].custom); if (buf == NULL) goto read_error; + ++mods_parsed; info->mod[i].path = info->mod[i].path_buf; if (dr_sscanf(buf, " %[^\n\r]", info->mod[i].path) != 1) goto read_error; @@ -706,6 +708,10 @@ drmodtrack_offline_read(file_t file, const char *map, OUT const char **next_line return DRCOVLIB_SUCCESS; read_error: + if (module_free_cb != NULL) { + for (i = 0; i < mods_parsed; i++) + module_free_cb(info->mod[i].custom); + } if (info != NULL) { dr_global_free(info->mod, *num_mods * sizeof(*info->mod)); dr_global_free(info, sizeof(*info)); diff --git a/suite/tests/client-interface/drmodtrack-test.dll.cpp b/suite/tests/client-interface/drmodtrack-test.dll.cpp index 199a05a760e..7f41686ce0a 100644 --- a/suite/tests/client-interface/drmodtrack-test.dll.cpp +++ b/suite/tests/client-interface/drmodtrack-test.dll.cpp @@ -1,5 +1,5 @@ /* ********************************************************** - * Copyright (c) 2017 Google, Inc. All rights reserved. + * Copyright (c) 2017-2019 Google, Inc. All rights reserved. * **********************************************************/ /* @@ -82,6 +82,63 @@ free_cb(void *data) /* Nothing. */ } +/* We do simple leak checking via a counter. We assume single-threaded code. */ +static int alloc_counter; + +static void * +my_alloc(void) +{ + ++alloc_counter; + return dr_global_alloc(sizeof(app_pc)); +} + +static void +my_free(void *ptr) +{ + CHECK(alloc_counter > 0, "double free"); + --alloc_counter; + dr_global_free(ptr, sizeof(app_pc)); +} + +static const char * +parse_alloc_cb(const char *src, OUT void **data) +{ + const char *res; + app_pc module_start; + if (dr_sscanf(src, PIFX ",", &module_start) != 1) + return NULL; + /* We store it on the heap to test leaks. */ + app_pc *ptr_to_start = (app_pc *)my_alloc(); + *ptr_to_start = module_start; + *data = ptr_to_start; + res = strchr(src, ','); + return (res == NULL) ? NULL : res + 1; +} + +static const char * +parse_alloc_error_cb(const char *src, OUT void **data) +{ + static int count_calls; + if (++count_calls == 4) + return NULL; /* fail to test parsing errors */ + const char *res; + app_pc module_start; + if (dr_sscanf(src, PIFX ",", &module_start) != 1) + return NULL; + /* We store it on the heap to test leaks. */ + app_pc *ptr_to_start = (app_pc *)my_alloc(); + *ptr_to_start = module_start; + *data = ptr_to_start; + res = strchr(src, ','); + return (res == NULL) ? NULL : res + 1; +} + +static void +free_alloc_cb(void *data) +{ + my_free(data); +} + static void event_exit(void) { @@ -165,16 +222,41 @@ event_exit(void) CHECK(wrote == strlen(buf_offline) + 1 /*null*/, "returned size off"); CHECK(strcmp(buf_online, buf_offline) == 0, "buffers do not match"); - dr_close_file(f); - ok = dr_delete_file(fname); - CHECK(ok, "failed to delete file"); - res = drmodtrack_offline_exit(modhandle); CHECK(res == DRCOVLIB_SUCCESS, "exit failed"); - + modhandle = NULL; dr_global_free(buf_online, size_online); dr_global_free(buf_offline, size_offline); + /* We do some more offline reads, to test leaks on parsing errors. */ + /* First ensure no leaks on successful parsing. */ + res = drmodtrack_add_custom_data(load_cb, print_cb, parse_alloc_cb, free_alloc_cb); + CHECK(res == DRCOVLIB_SUCCESS, "customization failed"); + void *modhandle2; + res = drmodtrack_offline_read(f, NULL, NULL, &modhandle2, &num_mods); + CHECK(res == DRCOVLIB_SUCCESS, "read failed"); + res = drmodtrack_offline_exit(modhandle2); + CHECK(res == DRCOVLIB_SUCCESS, "exit failed"); + modhandle2 = NULL; + CHECK(alloc_counter == 0, "memory leak"); + /* Now ensure no leaks on a parsing error. */ + res = drmodtrack_add_custom_data(load_cb, print_cb, parse_alloc_error_cb, + free_alloc_cb); + CHECK(res == DRCOVLIB_SUCCESS, "customization failed"); + void *modhandle3; + res = drmodtrack_offline_read(f, NULL, NULL, &modhandle3, &num_mods); + CHECK(res == DRCOVLIB_ERROR, "read should fail"); + modhandle3 = NULL; + CHECK(alloc_counter == 0, "memory leak"); + + /* Final cleanup. */ + dr_close_file(f); + ok = dr_delete_file(fname); + CHECK(ok, "failed to delete file"); + + /* We have to restore the old free_cb as it will be called on the live table. */ + res = drmodtrack_add_custom_data(load_cb, print_cb, parse_cb, free_cb); + CHECK(res == DRCOVLIB_SUCCESS, "customization failed"); res = drmodtrack_exit(); CHECK(res == DRCOVLIB_SUCCESS, "module exit failed"); } From 39aaaf888556b04349cf49694219b01640eea1d0 Mon Sep 17 00:00:00 2001 From: Hendrik Greving Date: Wed, 3 Apr 2019 12:42:26 -0700 Subject: [PATCH 5/5] i#1312 AVX-512 support: Partially change REG_ into DR_REG_ in encode. (#3503) For AVX-512, we will be adding DR_REG_ZMM0 - DR_REG_ZMM31 registers. We are planning not to add the compatibility REG_ versions. This patch changes the only internal use of REG_ that will also require the ZMM versions to using the DR_REG_ enums instead. Issue: #1312 --- core/arch/arm/encode.c | 4 ++-- core/arch/x86/encode.c | 54 +++++++++++++++++++++++------------------- 2 files changed, 31 insertions(+), 27 deletions(-) diff --git a/core/arch/arm/encode.c b/core/arch/arm/encode.c index 8809604c342..039e67e2516 100644 --- a/core/arch/arm/encode.c +++ b/core/arch/arm/encode.c @@ -759,9 +759,9 @@ encode_reset_it_block(dcontext_t *dcontext) void encode_debug_checks(void) { - CLIENT_ASSERT(sizeof(dr_reg_fixer) / sizeof(dr_reg_fixer[0]) == REG_LAST_ENUM + 1, + CLIENT_ASSERT(sizeof(dr_reg_fixer) / sizeof(dr_reg_fixer[0]) == DR_REG_LAST_ENUM + 1, "internal register enum error"); - CLIENT_ASSERT(sizeof(reg_names) / sizeof(reg_names[0]) == REG_LAST_ENUM + 1, + CLIENT_ASSERT(sizeof(reg_names) / sizeof(reg_names[0]) == DR_REG_LAST_ENUM + 1, "reg_names missing an entry"); CLIENT_ASSERT(sizeof(type_names) / sizeof(type_names[0]) == TYPE_BEYOND_LAST_ENUM, "type_names missing an entry"); diff --git a/core/arch/x86/encode.c b/core/arch/x86/encode.c index c7213a4263f..0c1292e5111 100644 --- a/core/arch/x86/encode.c +++ b/core/arch/x86/encode.c @@ -140,38 +140,42 @@ const char *const reg_names[] = { /* Maps sub-registers to their containing register. */ const reg_id_t dr_reg_fixer[] = { - REG_NULL, REG_XAX, REG_XCX, REG_XDX, REG_XBX, REG_XSP, REG_XBP, - REG_XSI, REG_XDI, REG_R8, REG_R9, REG_R10, REG_R11, REG_R12, - REG_R13, REG_R14, REG_R15, REG_XAX, REG_XCX, REG_XDX, REG_XBX, - REG_XSP, REG_XBP, REG_XSI, REG_XDI, REG_R8, REG_R9, REG_R10, - REG_R11, REG_R12, REG_R13, REG_R14, REG_R15, REG_XAX, REG_XCX, - REG_XDX, REG_XBX, REG_XSP, REG_XBP, REG_XSI, REG_XDI, REG_R8, - REG_R9, REG_R10, REG_R11, REG_R12, REG_R13, REG_R14, REG_R15, - REG_XAX, REG_XCX, REG_XDX, REG_XBX, REG_XAX, REG_XCX, REG_XDX, - REG_XBX, REG_R8, REG_R9, REG_R10, REG_R11, REG_R12, REG_R13, - REG_R14, REG_R15, REG_XSP, REG_XBP, REG_XSI, REG_XDI, /* i#201 */ - REG_MM0, REG_MM1, REG_MM2, REG_MM3, REG_MM4, REG_MM5, REG_MM6, - REG_MM7, REG_YMM0, REG_YMM1, REG_YMM2, REG_YMM3, REG_YMM4, REG_YMM5, - REG_YMM6, REG_YMM7, REG_YMM8, REG_YMM9, REG_YMM10, REG_YMM11, REG_YMM12, - REG_YMM13, REG_YMM14, REG_YMM15, REG_ST0, REG_ST1, REG_ST2, REG_ST3, - REG_ST4, REG_ST5, REG_ST6, REG_ST7, SEG_ES, SEG_CS, SEG_SS, - SEG_DS, SEG_FS, SEG_GS, REG_DR0, REG_DR1, REG_DR2, REG_DR3, - REG_DR4, REG_DR5, REG_DR6, REG_DR7, REG_DR8, REG_DR9, REG_DR10, - REG_DR11, REG_DR12, REG_DR13, REG_DR14, REG_DR15, REG_CR0, REG_CR1, - REG_CR2, REG_CR3, REG_CR4, REG_CR5, REG_CR6, REG_CR7, REG_CR8, - REG_CR9, REG_CR10, REG_CR11, REG_CR12, REG_CR13, REG_CR14, REG_CR15, - REG_INVALID, REG_YMM0, REG_YMM1, REG_YMM2, REG_YMM3, REG_YMM4, REG_YMM5, - REG_YMM6, REG_YMM7, REG_YMM8, REG_YMM9, REG_YMM10, REG_YMM11, REG_YMM12, - REG_YMM13, REG_YMM14, REG_YMM15, + DR_REG_NULL, DR_REG_XAX, DR_REG_XCX, DR_REG_XDX, DR_REG_XBX, DR_REG_XSP, + DR_REG_XBP, DR_REG_XSI, DR_REG_XDI, DR_REG_R8, DR_REG_R9, DR_REG_R10, + DR_REG_R11, DR_REG_R12, DR_REG_R13, DR_REG_R14, DR_REG_R15, DR_REG_XAX, + DR_REG_XCX, DR_REG_XDX, DR_REG_XBX, DR_REG_XSP, DR_REG_XBP, DR_REG_XSI, + DR_REG_XDI, DR_REG_R8, DR_REG_R9, DR_REG_R10, DR_REG_R11, DR_REG_R12, + DR_REG_R13, DR_REG_R14, DR_REG_R15, DR_REG_XAX, DR_REG_XCX, DR_REG_XDX, + DR_REG_XBX, DR_REG_XSP, DR_REG_XBP, DR_REG_XSI, DR_REG_XDI, DR_REG_R8, + DR_REG_R9, DR_REG_R10, DR_REG_R11, DR_REG_R12, DR_REG_R13, DR_REG_R14, + DR_REG_R15, DR_REG_XAX, DR_REG_XCX, DR_REG_XDX, DR_REG_XBX, DR_REG_XAX, + DR_REG_XCX, DR_REG_XDX, DR_REG_XBX, DR_REG_R8, DR_REG_R9, DR_REG_R10, + DR_REG_R11, DR_REG_R12, DR_REG_R13, DR_REG_R14, DR_REG_R15, DR_REG_XSP, + DR_REG_XBP, DR_REG_XSI, DR_REG_XDI, /* i#201 */ + DR_REG_MM0, DR_REG_MM1, DR_REG_MM2, DR_REG_MM3, DR_REG_MM4, DR_REG_MM5, + DR_REG_MM6, DR_REG_MM7, DR_REG_YMM0, DR_REG_YMM1, DR_REG_YMM2, DR_REG_YMM3, + DR_REG_YMM4, DR_REG_YMM5, DR_REG_YMM6, DR_REG_YMM7, DR_REG_YMM8, DR_REG_YMM9, + DR_REG_YMM10, DR_REG_YMM11, DR_REG_YMM12, DR_REG_YMM13, DR_REG_YMM14, DR_REG_YMM15, + DR_REG_ST0, DR_REG_ST1, DR_REG_ST2, DR_REG_ST3, DR_REG_ST4, DR_REG_ST5, + DR_REG_ST6, DR_REG_ST7, DR_SEG_ES, DR_SEG_CS, DR_SEG_SS, DR_SEG_DS, + DR_SEG_FS, DR_SEG_GS, DR_REG_DR0, DR_REG_DR1, DR_REG_DR2, DR_REG_DR3, + DR_REG_DR4, DR_REG_DR5, DR_REG_DR6, DR_REG_DR7, DR_REG_DR8, DR_REG_DR9, + DR_REG_DR10, DR_REG_DR11, DR_REG_DR12, DR_REG_DR13, DR_REG_DR14, DR_REG_DR15, + DR_REG_CR0, DR_REG_CR1, DR_REG_CR2, DR_REG_CR3, DR_REG_CR4, DR_REG_CR5, + DR_REG_CR6, DR_REG_CR7, DR_REG_CR8, DR_REG_CR9, DR_REG_CR10, DR_REG_CR11, + DR_REG_CR12, DR_REG_CR13, DR_REG_CR14, DR_REG_CR15, DR_REG_INVALID, DR_REG_YMM0, + DR_REG_YMM1, DR_REG_YMM2, DR_REG_YMM3, DR_REG_YMM4, DR_REG_YMM5, DR_REG_YMM6, + DR_REG_YMM7, DR_REG_YMM8, DR_REG_YMM9, DR_REG_YMM10, DR_REG_YMM11, DR_REG_YMM12, + DR_REG_YMM13, DR_REG_YMM14, DR_REG_YMM15, }; #ifdef DEBUG void encode_debug_checks(void) { - CLIENT_ASSERT(sizeof(dr_reg_fixer) / sizeof(dr_reg_fixer[0]) == REG_LAST_ENUM + 1, + CLIENT_ASSERT(sizeof(dr_reg_fixer) / sizeof(dr_reg_fixer[0]) == DR_REG_LAST_ENUM + 1, "internal register enum error"); - CLIENT_ASSERT(sizeof(reg_names) / sizeof(reg_names[0]) == REG_LAST_ENUM + 1, + CLIENT_ASSERT(sizeof(reg_names) / sizeof(reg_names[0]) == DR_REG_LAST_ENUM + 1, "reg_names missing an entry"); CLIENT_ASSERT(sizeof(type_names) / sizeof(type_names[0]) == TYPE_BEYOND_LAST_ENUM, "type_names missing an entry");