From 22ef9b6a4ec0072710dedffc1c45e3152050aee9 Mon Sep 17 00:00:00 2001 From: SChernykh Date: Tue, 26 Sep 2023 17:40:05 +0200 Subject: [PATCH] aarch64 JIT: support v1/v2 switching --- src/jit_compiler_a64.cpp | 18 ++++++++++++++---- src/jit_compiler_a64_static.S | 18 ++++++++++++++++-- src/jit_compiler_a64_static.hpp | 1 + src/tests/benchmark.cpp | 14 +++++++++++--- 4 files changed, 42 insertions(+), 9 deletions(-) diff --git a/src/jit_compiler_a64.cpp b/src/jit_compiler_a64.cpp index 42ad86a5..dcc4f781 100644 --- a/src/jit_compiler_a64.cpp +++ b/src/jit_compiler_a64.cpp @@ -169,6 +169,14 @@ void JitCompilerA64::generateProgram(Program& program, ProgramConfiguration& con codePos = ((uint8_t*)randomx_program_aarch64_update_spMix1) - ((uint8_t*)randomx_program_aarch64); emit32(ARMV8A::EOR | 10 | (IntRegMap[config.readReg0] << 5) | (IntRegMap[config.readReg1] << 16), code, codePos); + // Enable RandomX v2 AES tweak + if (flags & RANDOMX_FLAG_V2) { + codePos = ((uint8_t*)randomx_program_aarch64_v2_FE_mix) - ((uint8_t*)randomx_program_aarch64); + + // Disable the jump to RandomX v1 FE mix code by writing "movi v28.4s, 0" instruction + emit32(0x4F00041C, code, codePos); + } + #ifdef __GNUC__ __builtin___clear_cache(reinterpret_cast(code + MainLoopBegin), reinterpret_cast(code + codePos)); #endif @@ -987,11 +995,13 @@ void JitCompilerA64::h_CFROUND(Instruction& instr, uint32_t& codePos) // ror tmp_reg, src, imm emit32(ARMV8A::ROR_IMM | tmp_reg | (src << 5) | ((instr.getImm32() & 63) << 10) | (src << 16), code, k); - // tst tmp_reg, 60 - emit32(0xF27E0E5F, code, k); + if (flags & RANDOMX_FLAG_V2) { + // tst tmp_reg, 60 + emit32(0xF27E0E5F, code, k); - // bne next - emit32(0x54000081, code, k); + // bne next + emit32(0x54000081, code, k); + } // bfi fpcr_tmp_reg, tmp_reg, 40, 2 emit32(0xB3580400 | fpcr_tmp_reg | (tmp_reg << 5), code, k); diff --git a/src/jit_compiler_a64_static.S b/src/jit_compiler_a64_static.S index 5dd8e5b7..18094c78 100644 --- a/src/jit_compiler_a64_static.S +++ b/src/jit_compiler_a64_static.S @@ -41,6 +41,7 @@ .global DECL(randomx_program_aarch64_cacheline_align_mask1) .global DECL(randomx_program_aarch64_cacheline_align_mask2) .global DECL(randomx_program_aarch64_update_spMix1) + .global DECL(randomx_program_aarch64_v2_FE_mix) .global DECL(randomx_program_aarch64_vm_instructions_end_light) .global DECL(randomx_program_aarch64_light_cacheline_align_mask) .global DECL(randomx_program_aarch64_light_dataset_offset) @@ -359,9 +360,11 @@ DECL(randomx_program_aarch64_update_spMix1): stp x14, x15, [x17, 48] # RandomX v2 AES tweak (mix group F and group E registers using AES) +DECL(randomx_program_aarch64_v2_FE_mix): - # Use the temporary register as zero register - movi v28.4s, 0 + # Jump to v1 FE mix code if we're running RandomX v1 + # JIT compiler will write a "movi v28.4s, 0" (set v28 to all 0) here if we're running RandomX v2 + b randomx_program_aarch64_v1_FE_mix # f0 = aesenc(f0, e0), f1 = aesdec(f1, e0), f2 = aesenc(f2, e0), f3 = aesdec(f3, e0) @@ -431,6 +434,17 @@ DECL(randomx_program_aarch64_update_spMix1): eor v18.16b, v18.16b, v23.16b eor v19.16b, v19.16b, v23.16b + # Skip v1 FE mix code because we already did v2 FE mix + b randomx_program_aarch64_FE_store + +randomx_program_aarch64_v1_FE_mix: + eor v16.16b, v16.16b, v20.16b + eor v17.16b, v17.16b, v21.16b + eor v18.16b, v18.16b, v22.16b + eor v19.16b, v19.16b, v23.16b + +randomx_program_aarch64_FE_store: + # Store FP registers to scratchpad (spAddr0) stp q16, q17, [x16, 0] stp q18, q19, [x16, 32] diff --git a/src/jit_compiler_a64_static.hpp b/src/jit_compiler_a64_static.hpp index a9b922e2..a21267e3 100644 --- a/src/jit_compiler_a64_static.hpp +++ b/src/jit_compiler_a64_static.hpp @@ -38,6 +38,7 @@ extern "C" { void randomx_program_aarch64_cacheline_align_mask1(); void randomx_program_aarch64_cacheline_align_mask2(); void randomx_program_aarch64_update_spMix1(); + void randomx_program_aarch64_v2_FE_mix(); void randomx_program_aarch64_vm_instructions_end_light(); void randomx_program_aarch64_light_cacheline_align_mask(); void randomx_program_aarch64_light_dataset_offset(); diff --git a/src/tests/benchmark.cpp b/src/tests/benchmark.cpp index 29ec404a..d557fc1a 100644 --- a/src/tests/benchmark.cpp +++ b/src/tests/benchmark.cpp @@ -97,6 +97,7 @@ void printUsage(const char* executable) { std::cout << " --auto select the best options for the current CPU" << std::endl; std::cout << " --noBatch calculate hashes one by one (default: batch)" << std::endl; std::cout << " --commit calculate commitments instead of hashes (default: hashes)" << std::endl; + std::cout << " --v2 calculate RandomX v2 hashes" << std::endl; } struct MemoryException : public std::exception { @@ -150,7 +151,7 @@ void mine(randomx_vm* vm, std::atomic& atomicNonce, AtomicHash& result } int main(int argc, char** argv) { - bool softAes, miningMode, verificationMode, help, largePages, jit, secure, commit; + bool softAes, miningMode, verificationMode, help, largePages, jit, secure, commit, v2; bool ssse3, avx2, autoFlags, noBatch; int noncesCount, threadCount, initThreadCount; uint64_t threadAffinity; @@ -177,6 +178,7 @@ int main(int argc, char** argv) { readOption("--auto", argc, argv, autoFlags); readOption("--noBatch", argc, argv, noBatch); readOption("--commit", argc, argv, commit); + readOption("--v2", argc, argv, v2); store32(&seed, seedValue); @@ -236,6 +238,10 @@ int main(int argc, char** argv) { } #endif + if (v2) { + flags |= RANDOMX_FLAG_V2; + } + if (flags & RANDOMX_FLAG_ARGON2_AVX2) { std::cout << " - Argon2 implementation: AVX2" << std::endl; } @@ -394,8 +400,10 @@ int main(int argc, char** argv) { randomx_release_cache(cache); std::cout << "Calculated result: "; result.print(std::cout); - if (noncesCount == 1000 && seedValue == 0 && !commit) - std::cout << "Reference result: ff5326fbba7402e7af3373b25f10dbf71be0a4be91fc5a0db6af8b9faf708ed3" << std::endl; + if (noncesCount == 1000 && seedValue == 0 && !commit) { + const char* r = v2 ? "ff5326fbba7402e7af3373b25f10dbf71be0a4be91fc5a0db6af8b9faf708ed3" : "10b649a3f15c7c7f88277812f2e74b337a0f20ce909af09199cccb960771cfa1"; + std::cout << "Reference result: " << r << std::endl; + } if (!miningMode) { std::cout << "Performance: " << 1000 * elapsed / noncesCount << " ms per hash" << std::endl; }