Skip to content

Commit

Permalink
Merge xmrig-cuda v6.22.0 into master
Browse files Browse the repository at this point in the history
  • Loading branch information
MoneroOcean committed Aug 12, 2024
2 parents e821970 + 01d5348 commit beb3cd0
Show file tree
Hide file tree
Showing 11 changed files with 289 additions and 18 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
# v6.22.0
- [#201](https://github.com/xmrig/xmrig-cuda/pull/201) Added support for [Yada](https://yadacoin.io/) (`rx/yada` algorithm).

# v6.21.1
- The binary downloads now only support the latest version of each major CUDA release.
- Improved build speed with CUDA 11.3 or higher.
Expand Down
3 changes: 3 additions & 0 deletions cmake/CUDA.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ if (NOT CUDA_VERSION VERSION_LESS 11.5)
endif()

if (NOT CUDA_VERSION VERSION_LESS 11.8)
list(APPEND DEFAULT_CUDA_ARCH "89")
list(APPEND DEFAULT_CUDA_ARCH "90")
endif()
list(SORT DEFAULT_CUDA_ARCH)
Expand Down Expand Up @@ -236,6 +237,8 @@ if (WITH_RANDOMX)
src/RandomX/randomx.cu
src/RandomX/wownero/configuration.h
src/RandomX/wownero/randomx_wownero.cu
src/RandomX/yada/configuration.h
src/RandomX/yada/randomx_yada.cu
)
else()
set(CUDA_RANDOMX_SOURCES "")
Expand Down
103 changes: 103 additions & 0 deletions src/RandomX/blake2b_cuda.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,90 @@ __device__ void blake2b_512_process_double_block(uint64_t *out, uint64_t* m, con
if (out_len > 56) out[7] = h[7] ^ v[7] ^ v[15];
}

template<uint32_t out_len>
__device__ void blake2b_512_process_big_block(uint64_t* out, const uint64_t* in, uint32_t in_len, uint32_t nonce, uint32_t nonce_offset)
{
uint64_t h[8] = { Blake2b_IV::iv0 ^ (0x01010000u | out_len), Blake2b_IV::iv1, Blake2b_IV::iv2, Blake2b_IV::iv3, Blake2b_IV::iv4, Blake2b_IV::iv5, Blake2b_IV::iv6, Blake2b_IV::iv7 };

for (uint32_t t = 128; t < in_len; t += 128, in += 16) {
uint64_t m[16] = { in[0], in[1], in[2], in[3], in[4], in[5], in[6], in[7], in[8], in[9], in[10], in[11], in[12], in[13], in[14], in[15] };

const uint32_t k0 = (nonce_offset + 0) - (t - 128);
const uint32_t k1 = (nonce_offset + 1) - (t - 128);
const uint32_t k2 = (nonce_offset + 2) - (t - 128);
const uint32_t k3 = (nonce_offset + 3) - (t - 128);

if (k0 < 128) m[k0 / 8] |= (uint64_t)((nonce >> 0) & 255) << ((k0 % 8) * 8);
if (k1 < 128) m[k1 / 8] |= (uint64_t)((nonce >> 8) & 255) << ((k1 % 8) * 8);
if (k2 < 128) m[k2 / 8] |= (uint64_t)((nonce >> 16) & 255) << ((k2 % 8) * 8);
if (k3 < 128) m[k3 / 8] |= (uint64_t)((nonce >> 24) & 255) << ((k3 % 8) * 8);

uint64_t v[16] = { h[0], h[1], h[2], h[3], h[4], h[5], h[6], h[7], Blake2b_IV::iv0, Blake2b_IV::iv1, Blake2b_IV::iv2, Blake2b_IV::iv3, Blake2b_IV::iv4 ^ t, Blake2b_IV::iv5, Blake2b_IV::iv6, Blake2b_IV::iv7 };

BLAKE2B_ROUNDS();

h[0] ^= v[0] ^ v[8];
h[1] ^= v[1] ^ v[9];
h[2] ^= v[2] ^ v[10];
h[3] ^= v[3] ^ v[11];
h[4] ^= v[4] ^ v[12];
h[5] ^= v[5] ^ v[13];
h[6] ^= v[6] ^ v[14];
h[7] ^= v[7] ^ v[15];
}

uint32_t k = in_len & 127;
if (k == 0) k = 128;

uint64_t m[16] = {
(k > 0) ? in[0] : 0,
(k > 8) ? in[1] : 0,
(k > 16) ? in[2] : 0,
(k > 24) ? in[3] : 0,
(k > 32) ? in[4] : 0,
(k > 40) ? in[5] : 0,
(k > 48) ? in[6] : 0,
(k > 56) ? in[7] : 0,
(k > 64) ? in[8] : 0,
(k > 72) ? in[9] : 0,
(k > 80) ? in[10] : 0,
(k > 88) ? in[11] : 0,
(k > 96) ? in[12] : 0,
(k > 104) ? in[13] : 0,
(k > 112) ? in[14] : 0,
(k > 120) ? in[15] : 0
};

const uint32_t t = in_len - k;

const uint32_t k0 = nonce_offset + 0 - t;
const uint32_t k1 = nonce_offset + 1 - t;
const uint32_t k2 = nonce_offset + 2 - t;
const uint32_t k3 = nonce_offset + 3 - t;

if (k0 < k) m[k0 / 8] |= (uint64_t)((nonce >> 0) & 255) << ((k0 % 8) * 8);
if (k1 < k) m[k1 / 8] |= (uint64_t)((nonce >> 8) & 255) << ((k1 % 8) * 8);
if (k2 < k) m[k2 / 8] |= (uint64_t)((nonce >> 16) & 255) << ((k2 % 8) * 8);
if (k3 < k) m[k3 / 8] |= (uint64_t)((nonce >> 24) & 255) << ((k3 % 8) * 8);

if (k % 8) {
m[k / 8] &= (uint64_t)(-1) >> (64 - (k % 8) * 8);
}

uint64_t v[16] = { h[0], h[1], h[2], h[3], h[4], h[5], h[6], h[7], Blake2b_IV::iv0, Blake2b_IV::iv1, Blake2b_IV::iv2, Blake2b_IV::iv3, Blake2b_IV::iv4 ^ in_len, Blake2b_IV::iv5, ~Blake2b_IV::iv6, Blake2b_IV::iv7 };

BLAKE2B_ROUNDS();

if (out_len > 0) out[0] = h[0] ^ v[0] ^ v[8];
if (out_len > 8) out[1] = h[1] ^ v[1] ^ v[9];
if (out_len > 16) out[2] = h[2] ^ v[2] ^ v[10];
if (out_len > 24) out[3] = h[3] ^ v[3] ^ v[11];
if (out_len > 32) out[4] = h[4] ^ v[4] ^ v[12];
if (out_len > 40) out[5] = h[5] ^ v[5] ^ v[13];
if (out_len > 48) out[6] = h[6] ^ v[6] ^ v[14];
if (out_len > 56) out[7] = h[7] ^ v[7] ^ v[15];
}

#undef G
#undef ROUND
#undef BLAKE2B_ROUNDS
Expand Down Expand Up @@ -280,6 +364,25 @@ __global__ void blake2b_initial_hash_double(void* out, const void* blockTemplate
t[7] = hash[7];
}

__global__ void blake2b_initial_hash_big(void* out, const void* blockTemplate, uint32_t blockTemplateSize, uint32_t start_nonce, uint32_t nonce_offset)
{
const uint32_t global_index = blockIdx.x * blockDim.x + threadIdx.x;
const uint64_t* p = (const uint64_t*)blockTemplate;

uint64_t hash[8];
blake2b_512_process_big_block<64>(hash, p, blockTemplateSize, start_nonce + global_index, nonce_offset);

uint64_t* t = ((uint64_t*) out) + global_index * 8;
t[0] = hash[0];
t[1] = hash[1];
t[2] = hash[2];
t[3] = hash[3];
t[4] = hash[4];
t[5] = hash[5];
t[6] = hash[6];
t[7] = hash[7];
}

template<uint32_t registers_len, uint32_t registers_stride, uint32_t out_len>
__global__ void blake2b_hash_registers(void *out, const void* in)
{
Expand Down
5 changes: 2 additions & 3 deletions src/RandomX/hash.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ __global__ void find_shares(const void* hashes, uint64_t target, uint32_t* share
}
}

void hash(nvid_ctx *ctx, uint32_t nonce, uint64_t target, uint32_t *rescount, uint32_t *resnonce, uint32_t batch_size)
void hash(nvid_ctx *ctx, uint32_t nonce, uint32_t nonce_offset, uint64_t target, uint32_t *rescount, uint32_t *resnonce, uint32_t batch_size)
{
if (ctx->algorithm.id() == xmrig_cuda::Algorithm::RX_XLA) {
// sipesh(tempHash, sizeof(tempHash), input, inputSize, input, inputSize, 0, 0);
Expand All @@ -46,8 +46,7 @@ void hash(nvid_ctx *ctx, uint32_t nonce, uint64_t target, uint32_t *rescount, ui
CUDA_CHECK_KERNEL(ctx->device_id, blake2b_initial_hash_double << <batch_size / 32, 32 >> > (ctx->d_rx_hashes, ctx->d_input, ctx->inputlen, nonce));
}
else {
*rescount = 0;
return;
CUDA_CHECK_KERNEL(ctx->device_id, blake2b_initial_hash_big << <batch_size / 32, 32 >> > (ctx->d_rx_hashes, ctx->d_input, ctx->inputlen, nonce, nonce_offset));
}

CUDA_CHECK_KERNEL(ctx->device_id, fillAes1Rx4<RANDOMX_SCRATCHPAD_L3, false, 64><<<batch_size / 32, 32 * 4>>>(ctx->d_rx_hashes, ctx->d_long_state, batch_size));
Expand Down
125 changes: 125 additions & 0 deletions src/RandomX/yada/configuration.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
/*
Copyright (c) 2018-2019, tevador <[email protected]>
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the copyright holder nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/

#pragma once

//Cache size in KiB. Must be a power of 2.
#define RANDOMX_ARGON_MEMORY 262144

//Number of Argon2d iterations for Cache initialization.
#define RANDOMX_ARGON_ITERATIONS 4

//Number of parallel lanes for Cache initialization.
#define RANDOMX_ARGON_LANES 1

//Argon2d salt
#define RANDOMX_ARGON_SALT "RandomXYadaCoin\x03"

//Number of random Cache accesses per Dataset item. Minimum is 2.
#define RANDOMX_CACHE_ACCESSES 8

//Target latency for SuperscalarHash (in cycles of the reference CPU).
#define RANDOMX_SUPERSCALAR_LATENCY 150

//Dataset base size in bytes. Must be a power of 2.
#define RANDOMX_DATASET_BASE_SIZE 2147483648

//Dataset extra size. Must be divisible by 64.
#define RANDOMX_DATASET_EXTRA_SIZE 33554368

//Number of instructions in a RandomX program. Must be divisible by 8.
#define RANDOMX_PROGRAM_SIZE 256

//Number of iterations during VM execution.
#define RANDOMX_PROGRAM_ITERATIONS 2048

//Number of chained VM executions per hash.
#define RANDOMX_PROGRAM_COUNT 8

//Scratchpad L3 size in bytes. Must be a power of 2.
#define RANDOMX_SCRATCHPAD_L3 2097152

//Scratchpad L2 size in bytes. Must be a power of two and less than or equal to RANDOMX_SCRATCHPAD_L3.
#define RANDOMX_SCRATCHPAD_L2 262144

//Scratchpad L1 size in bytes. Must be a power of two (minimum 64) and less than or equal to RANDOMX_SCRATCHPAD_L2.
#define RANDOMX_SCRATCHPAD_L1 16384

//Jump condition mask size in bits.
#define RANDOMX_JUMP_BITS 8

//Jump condition mask offset in bits. The sum of RANDOMX_JUMP_BITS and RANDOMX_JUMP_OFFSET must not exceed 16.
#define RANDOMX_JUMP_OFFSET 8

/*
Instruction frequencies (per 256 opcodes)
Total sum of frequencies must be 256
*/

//Integer instructions
#define RANDOMX_FREQ_IADD_RS 16
#define RANDOMX_FREQ_IADD_M 7
#define RANDOMX_FREQ_ISUB_R 16
#define RANDOMX_FREQ_ISUB_M 7
#define RANDOMX_FREQ_IMUL_R 16
#define RANDOMX_FREQ_IMUL_M 4
#define RANDOMX_FREQ_IMULH_R 4
#define RANDOMX_FREQ_IMULH_M 1
#define RANDOMX_FREQ_ISMULH_R 4
#define RANDOMX_FREQ_ISMULH_M 1
#define RANDOMX_FREQ_IMUL_RCP 8
#define RANDOMX_FREQ_INEG_R 2
#define RANDOMX_FREQ_IXOR_R 15
#define RANDOMX_FREQ_IXOR_M 5
#define RANDOMX_FREQ_IROR_R 8
#define RANDOMX_FREQ_IROL_R 2
#define RANDOMX_FREQ_ISWAP_R 4

//Floating point instructions
#define RANDOMX_FREQ_FSWAP_R 4
#define RANDOMX_FREQ_FADD_R 16
#define RANDOMX_FREQ_FADD_M 5
#define RANDOMX_FREQ_FSUB_R 16
#define RANDOMX_FREQ_FSUB_M 5
#define RANDOMX_FREQ_FSCAL_R 6
#define RANDOMX_FREQ_FMUL_R 32
#define RANDOMX_FREQ_FDIV_M 4
#define RANDOMX_FREQ_FSQRT_R 6

//Control instructions
#define RANDOMX_FREQ_CBRANCH 25
#define RANDOMX_FREQ_CFROUND 1

//Store instruction
#define RANDOMX_FREQ_ISTORE 16

//No-op instruction
#define RANDOMX_FREQ_NOP 0
/* ------
256
*/
33 changes: 33 additions & 0 deletions src/RandomX/yada/randomx_yada.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
/*
Copyright (c) 2019-2020 SChernykh
This file is part of RandomX CUDA.
RandomX CUDA is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
RandomX CUDA is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with RandomX CUDA. If not, see<http://www.gnu.org/licenses/>.
*/

#include "cryptonight.h"
#include "cuda_device.hpp"


#include <cuda.h>
#include <cuda_runtime.h>
#include <cstdint>


namespace RandomX_Yada {
#include "configuration.h"
#define fillAes4Rx4 fillAes4Rx4_v104
#include "RandomX/common.hpp"
}
2 changes: 1 addition & 1 deletion src/crypto/common/Algorithm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ xmrig_cuda::Algorithm::Id xmrig_cuda::Algorithm::parse(uint32_t id)
# endif
# ifdef XMRIG_ALGO_RANDOMX
RX_XLA,
RX_0, RX_WOW, RX_ARQ, RX_GRAFT, RX_SFX, RX_KEVA,
RX_0, RX_WOW, RX_ARQ, RX_GRAFT, RX_SFX, RX_KEVA, RX_YADA,
# endif
# ifdef XMRIG_ALGO_ARGON2
AR2_CHUKWA, AR2_CHUKWA_V2, AR2_WRKZ,
Expand Down
1 change: 1 addition & 0 deletions src/crypto/common/Algorithm.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ class Algorithm
RX_GRAFT = 0x72151267, // "rx/graft" RandomGRAFT (Graft).
RX_SFX = 0x72151273, // "rx/sfx" RandomSFX (Safex Cash).
RX_KEVA = 0x7214116b, // "rx/keva" RandomKEVA (Keva).
RX_YADA = 0x72151279, // "rx/yada" RandomYada (YadaCoin).
AR2_CHUKWA = 0x61130000, // "argon2/chukwa" Argon2id (Chukwa).
AR2_CHUKWA_V2 = 0x61140000, // "argon2/chukwav2" Argon2id (Chukwa v2).
AR2_WRKZ = 0x61120000, // "argon2/wrkz" Argon2id (WRKZ)
Expand Down
12 changes: 6 additions & 6 deletions src/cryptonight.h
Original file line number Diff line number Diff line change
Expand Up @@ -121,12 +121,12 @@ void cryptonight_extra_cpu_final(nvid_ctx *ctx, uint32_t startNonce, uint64_t ta
void cuda_extra_cpu_set_data(nvid_ctx *ctx, const void *data, size_t len);
void randomx_prepare(nvid_ctx *ctx, const void *dataset, size_t dataset_size, uint32_t batch_size);

namespace RandomX_Arqma { void hash(nvid_ctx *ctx, uint32_t nonce, uint64_t target, uint32_t *rescount, uint32_t *resnonce, uint32_t batch_size); }
namespace RandomX_Monero { void hash(nvid_ctx *ctx, uint32_t nonce, uint64_t target, uint32_t *rescount, uint32_t *resnonce, uint32_t batch_size); }
namespace RandomX_Wownero { void hash(nvid_ctx *ctx, uint32_t nonce, uint64_t target, uint32_t *rescount, uint32_t *resnonce, uint32_t batch_size); }
namespace RandomX_Keva { void hash(nvid_ctx *ctx, uint32_t nonce, uint64_t target, uint32_t *rescount, uint32_t *resnonce, uint32_t batch_size); }
namespace RandomX_DefyX { void hash(nvid_ctx *ctx, uint32_t nonce, uint64_t target, uint32_t *rescount, uint32_t *resnonce, uint32_t batch_size); }
namespace RandomX_Graft { void hash(nvid_ctx *ctx, uint32_t nonce, uint64_t target, uint32_t *rescount, uint32_t *resnonce, uint32_t batch_size); }
namespace RandomX_Arqma { void hash(nvid_ctx *ctx, uint32_t nonce, uint32_t nonce_offset, uint64_t target, uint32_t *rescount, uint32_t *resnonce, uint32_t batch_size); }
namespace RandomX_Monero { void hash(nvid_ctx *ctx, uint32_t nonce, uint32_t nonce_offset, uint64_t target, uint32_t *rescount, uint32_t *resnonce, uint32_t batch_size); }
namespace RandomX_Wownero { void hash(nvid_ctx *ctx, uint32_t nonce, uint32_t nonce_offset, uint64_t target, uint32_t *rescount, uint32_t *resnonce, uint32_t batch_size); }
namespace RandomX_Keva { void hash(nvid_ctx *ctx, uint32_t nonce, uint32_t nonce_offset, uint64_t target, uint32_t *rescount, uint32_t *resnonce, uint32_t batch_size); }
namespace RandomX_Graft { void hash(nvid_ctx *ctx, uint32_t nonce, uint32_t nonce_offset, uint64_t target, uint32_t *rescount, uint32_t *resnonce, uint32_t batch_size); }
namespace RandomX_Yada { void hash(nvid_ctx* ctx, uint32_t nonce, uint32_t nonce_offset, uint64_t target, uint32_t* rescount, uint32_t* resnonce, uint32_t batch_size); }

#ifdef XMRIG_ALGO_KAWPOW
void kawpow_prepare(nvid_ctx *ctx, const void* cache, size_t cache_size, const void* dag_precalc, size_t dag_size, uint32_t height, const uint64_t* dag_sizes);
Expand Down
6 changes: 3 additions & 3 deletions src/version.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,14 @@
#define APP_ID "xmrig-cuda"
#define APP_NAME "XMRig"
#define APP_DESC "XMRig CUDA plugin"
#define APP_VERSION "6.21.1-mo1"
#define APP_VERSION "6.22.0-mo1"
#define APP_DOMAIN "xmrig.com"
#define APP_SITE "www.xmrig.com"
#define APP_COPYRIGHT "Copyright (C) 2016-2024 xmrig.com"

#define APP_VER_MAJOR 6
#define APP_VER_MINOR 21
#define APP_VER_PATCH 1
#define APP_VER_MINOR 22
#define APP_VER_PATCH 0

#define API_VERSION 4

Expand Down
Loading

0 comments on commit beb3cd0

Please sign in to comment.