From 66cb50bba18b41cb19910cb14c0b057b04fdae71 Mon Sep 17 00:00:00 2001 From: SChernykh Date: Wed, 18 Oct 2023 21:21:23 +0200 Subject: [PATCH] Use own clear cache function for aarch64 --- .github/workflows/c-cpp.yml | 1 - src/jit_compiler_a64.cpp | 16 +++------- src/jit_compiler_a64_static.S | 53 ++++++++++++++++++++++++++++++++- src/jit_compiler_a64_static.hpp | 1 + 4 files changed, 57 insertions(+), 14 deletions(-) diff --git a/.github/workflows/c-cpp.yml b/.github/workflows/c-cpp.yml index 47ade398..5bd0e4de 100644 --- a/.github/workflows/c-cpp.yml +++ b/.github/workflows/c-cpp.yml @@ -2,7 +2,6 @@ name: C/C++ CI on: push: - branches: [ master ] pull_request: jobs: diff --git a/src/jit_compiler_a64.cpp b/src/jit_compiler_a64.cpp index 91e31d64..13aa9df4 100644 --- a/src/jit_compiler_a64.cpp +++ b/src/jit_compiler_a64.cpp @@ -98,9 +98,7 @@ JitCompilerA64::JitCompilerA64() memset(reg_changed_offset, 0, sizeof(reg_changed_offset)); memcpy(code, (void*) randomx_program_aarch64, CodeSize); -#ifdef __GNUC__ - __builtin___clear_cache(reinterpret_cast(code), reinterpret_cast(code + CodeSize)); -#endif + randomx_clear_cache(code, code + CodeSize); } JitCompilerA64::~JitCompilerA64() @@ -169,9 +167,7 @@ void JitCompilerA64::generateProgram(Program& program, ProgramConfiguration& con codePos = ((uint8_t*)randomx_program_aarch64_update_spMix1) - ((uint8_t*)randomx_program_aarch64); emit32(ARMV8A::EOR | 10 | (IntRegMap[config.readReg0] << 5) | (IntRegMap[config.readReg1] << 16), code, codePos); -#ifdef __GNUC__ - __builtin___clear_cache(reinterpret_cast(code + MainLoopBegin), reinterpret_cast(code + codePos)); -#endif + randomx_clear_cache(code + MainLoopBegin, code + codePos); } void JitCompilerA64::generateProgramLight(Program& program, ProgramConfiguration& config, uint32_t datasetOffset) @@ -226,9 +222,7 @@ void JitCompilerA64::generateProgramLight(Program& program, ProgramConfiguration emit32(ARMV8A::ADD_IMM_LO | 2 | (2 << 5) | (imm_lo << 10), code, codePos); emit32(ARMV8A::ADD_IMM_HI | 2 | (2 << 5) | (imm_hi << 10), code, codePos); -#ifdef __GNUC__ - __builtin___clear_cache(reinterpret_cast(code + MainLoopBegin), reinterpret_cast(code + codePos)); -#endif + randomx_clear_cache(code + MainLoopBegin, code + codePos); } template @@ -344,9 +338,7 @@ void JitCompilerA64::generateSuperscalarHash(SuperscalarProgram(&programs)[N], s memcpy(code + codePos, p1, p2 - p1); codePos += p2 - p1; -#ifdef __GNUC__ - __builtin___clear_cache(reinterpret_cast(code + CodeSize), reinterpret_cast(code + codePos)); -#endif + randomx_clear_cache(code + CodeSize, code + codePos); } template void JitCompilerA64::generateSuperscalarHash(SuperscalarProgram(&programs)[RANDOMX_CACHE_ACCESSES], std::vector &reciprocalCache); diff --git a/src/jit_compiler_a64_static.S b/src/jit_compiler_a64_static.S index 4886fcf3..f0c04ba5 100644 --- a/src/jit_compiler_a64_static.S +++ b/src/jit_compiler_a64_static.S @@ -33,6 +33,7 @@ .arch armv8-a .text + .global DECL(randomx_clear_cache) .global DECL(randomx_program_aarch64) .global DECL(randomx_program_aarch64_main_loop) .global DECL(randomx_program_aarch64_vm_instructions) @@ -52,6 +53,57 @@ .global DECL(randomx_calc_dataset_item_aarch64_store_result) .global DECL(randomx_calc_dataset_item_aarch64_end) + .balign 4 +DECL(randomx_clear_cache): + # x0 = begin + # x1 = end + + # Range check + cmp x0, x1 + bhs randomx_clear_cache_exit + + # Read "Cache Type Register, EL0" + # https://developer.arm.com/documentation/ddi0488/h/system-control/aarch64-register-descriptions/cache-type-register--el0 + mrs x4, ctr_el0 + + # [19:16] DminLine + # Log2 of the number of words in the smallest cache line of all the data and unified caches that the processor controls + lsr x2, x4, 16 + and x2, x2, 15 + mov x3, 4 + lsl x3, x3, x2 + + # Invalidate all data cache lines between x0 and x1 + mov x2, x0 +randomx_dcache_invalidate_loop: + #dc cvau, x2 + add x2, x2, x3 + cmp x2, x1 + blo randomx_clear_cache_loop + + # Data Synchronization Barrier + dsb ish + + # [3:0] IminLine + # Log2 of the number of words in the smallest cache line of all the Instruction Caches that the processor controls + and x2, x4, 15 + mov x3, 4 + lsl x3, x3, x2 + + # Invalidate all instruction cache lines between x0 and x1 + mov x2, x0 +randomx_clear_cache_loop: + #ic ivau, x2 + add x2, x2, x3 + cmp x2, x1 + blo randomx_clear_cache_loop + + # Instruction Synchronization Barrier + isb sy + +randomx_clear_cache_exit: + ret + #include "configuration.h" # Register allocation @@ -106,7 +158,6 @@ # v30 -> E 'or' mask = 0x3*00000000******3*00000000****** # v31 -> scale mask = 0x81f000000000000081f0000000000000 - .balign 4 DECL(randomx_program_aarch64): # Save callee-saved registers sub sp, sp, 192 diff --git a/src/jit_compiler_a64_static.hpp b/src/jit_compiler_a64_static.hpp index a9b922e2..81d7113c 100644 --- a/src/jit_compiler_a64_static.hpp +++ b/src/jit_compiler_a64_static.hpp @@ -30,6 +30,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #pragma once extern "C" { + void randomx_clear_cache(void* begin, void* end); void randomx_program_aarch64(void* reg, void* mem, void* scratchpad, uint64_t iterations); void randomx_program_aarch64_main_loop(); void randomx_program_aarch64_vm_instructions();