diff --git a/Makefile.am b/Makefile.am index 3069bbe64..6606ad1a0 100644 --- a/Makefile.am +++ b/Makefile.am @@ -50,6 +50,7 @@ sgminer_SOURCES += config_parser.c config_parser.h sgminer_SOURCES += events.c events.h sgminer_SOURCES += ocl/build_kernel.c ocl/build_kernel.h sgminer_SOURCES += ocl/binary_kernel.c ocl/binary_kernel.h +sgminer_SOURCES += ocl/patch_kernel.c ocl/patch_kernel.h sgminer_SOURCES += kernel/*.cl sgminer_SOURCES += algorithm/scrypt.c algorithm/scrypt.h @@ -83,6 +84,7 @@ sgminer_SOURCES += algorithm/blake256.c algorithm/blake256.h sgminer_SOURCES += algorithm/blakecoin.c algorithm/blakecoin.h sgminer_SOURCES += algorithm/decred.c algorithm/decred.h sgminer_SOURCES += algorithm/pascal.c algorithm/pascal.h +sgminer_SOURCES += algorithm/evocoin.c algorithm/evocoin.h sgminer_SOURCES += algorithm/lbry.c algorithm/lbry.h bin_SCRIPTS = $(top_srcdir)/kernel/*.cl diff --git a/algorithm.c b/algorithm.c index 9d2f458e8..a320f2ba3 100644 --- a/algorithm.c +++ b/algorithm.c @@ -44,6 +44,7 @@ #include "algorithm/pascal.h" #include "algorithm/lbry.h" #include "algorithm/sibcoin.h" +#include "algorithm/evocoin.h" #include "compat.h" @@ -1229,6 +1230,8 @@ static algorithm_settings_t algos[] = { { "lbry", ALGO_LBRY, "", 1, 256, 256, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 2, 4 * 8 * 4194304, 0, lbry_regenhash, NULL, NULL, queue_lbry_kernel, gen_hash, NULL }, { "pascal", ALGO_PASCAL, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 0, 0, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, pascal_regenhash, pascal_midstate, NULL, queue_pascal_kernel, NULL, NULL }, + { "evocoin", ALGO_X11EVO, "", 1, 1, 1, 0, 0, 0xFF, 0xFFFFULL, 0x0000ffffUL, 0, 0, CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE, evocoin_regenhash, NULL, NULL, queue_sph_kernel, gen_hash, append_x11_compiler_options }, + // Terminator (do not remove) { NULL, ALGO_UNK, "", 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL } @@ -1290,6 +1293,7 @@ static const char *lookup_algorithm_alias(const char *lookup_alias, uint8_t *nfa ALGO_ALIAS_NF("nscrypt", "ckolivas", 11); ALGO_ALIAS_NF("adaptive-nscrypt", "ckolivas", 11); ALGO_ALIAS_NF("adaptive-n-scrypt", "ckolivas", 11); + ALGO_ALIAS("x11evo", "evocoin"); ALGO_ALIAS("x11mod", "darkcoin-mod"); ALGO_ALIAS("x11", "darkcoin-mod"); ALGO_ALIAS("x11-gost", "sibcoin-mod"); diff --git a/algorithm.h b/algorithm.h index d3238a139..ed206570a 100644 --- a/algorithm.h +++ b/algorithm.h @@ -17,6 +17,7 @@ typedef enum { ALGO_SCRYPT, ALGO_NSCRYPT, ALGO_PASCAL, + ALGO_X11EVO, ALGO_X11, ALGO_X13, ALGO_X14, diff --git a/algorithm/evocoin.c b/algorithm/evocoin.c new file mode 100644 index 000000000..0517900c3 --- /dev/null +++ b/algorithm/evocoin.c @@ -0,0 +1,327 @@ +/*- +* Copyright 2009 Colin Percival, 2011 ArtForz +* All rights reserved. +* +* Redistribution and use in source and binary forms, with or without +* modification, are permitted provided that the following conditions +* are met: +* 1. Redistributions of source code must retain the above copyright +* notice, this list of conditions and the following disclaimer. +* 2. Redistributions in binary form must reproduce the above copyright +* notice, this list of conditions and the following disclaimer in the +* documentation and/or other materials provided with the distribution. +* +* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND +* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE +* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +* SUCH DAMAGE. +* +* This file was originally written by Colin Percival as part of the Tarsnap +* online backup system. +*/ + +#include "config.h" +#include "miner.h" + +#include +#include +#include +//#include + +#include "sph/sph_blake.h" +#include "sph/sph_bmw.h" +#include "sph/sph_groestl.h" +#include "sph/sph_jh.h" +#include "sph/sph_keccak.h" +#include "sph/sph_skein.h" +#include "sph/sph_luffa.h" +#include "sph/sph_cubehash.h" +#include "sph/sph_shavite.h" +#include "sph/sph_simd.h" +#include "sph/sph_echo.h" + +#include "evocoin.h" + + +/* Move init out of loop, so init once externally, and then use one single memcpy with that bigger memory block */ +typedef struct { + sph_blake512_context blake1; + sph_bmw512_context bmw1; + sph_groestl512_context groestl1; + sph_skein512_context skein1; + sph_jh512_context jh1; + sph_keccak512_context keccak1; + sph_luffa512_context luffa1; + sph_cubehash512_context cubehash1; + sph_shavite512_context shavite1; + sph_simd512_context simd1; + sph_echo512_context echo1; +} Xhash_context_holder; + +static Xhash_context_holder base_contexts; + + +static void init_Xhash_contexts() +{ + sph_blake512_init(&base_contexts.blake1); + sph_bmw512_init(&base_contexts.bmw1); + sph_groestl512_init(&base_contexts.groestl1); + sph_skein512_init(&base_contexts.skein1); + sph_jh512_init(&base_contexts.jh1); + sph_keccak512_init(&base_contexts.keccak1); + sph_luffa512_init(&base_contexts.luffa1); + sph_cubehash512_init(&base_contexts.cubehash1); + sph_shavite512_init(&base_contexts.shavite1); + sph_simd512_init(&base_contexts.simd1); + sph_echo512_init(&base_contexts.echo1); +} + + +uint32_t getCurrentAlgoSeq(uint32_t current_time, uint32_t base_time) { + return (current_time - base_time) / (60 * 60 * 24); +} + +void swap(uint8_t *a, uint8_t *b) { + uint8_t __tmp = *a; + *a = *b; + *b = __tmp; +} + +void initPerm(uint8_t n[], uint8_t count) { + int i; + for (i = 0; i0 && n[i - 1] >= n[i]; i--); + tail = i; + + if (tail > 0) { + for (j = count - 1; j>tail && n[j] <= n[tail - 1]; j--); + swap(&n[tail - 1], &n[j]); + } + + for (i = tail, j = count - 1; i diff1targ) + return -1; + if (tmp_hash7 > Htarg) + return 0; + return 1; +} + +void evocoin_regenhash(struct work *work) +{ + uint32_t data[20]; + uint32_t *nonce = (uint32_t *)(work->data + 76); + uint32_t *ohash = (uint32_t *)(work->hash); + + be32enc_vect(data, (const uint32_t *)work->data, 19); + data[19] = htobe32(*nonce); + + + unsigned char bin[4]; + uint32_t h32, *be32 = (uint32_t *)bin; + hex2bin(bin, work->pool->swork.ntime, 4); + h32 = be32toh(*be32); + + xhash(ohash, data, work->pool->swork.ntime); +} + +bool scanhash_evocoin(struct thr_info *thr, const unsigned char __maybe_unused *pmidstate, + unsigned char *pdata, unsigned char __maybe_unused *phash1, + unsigned char __maybe_unused *phash, const unsigned char *ptarget, + uint32_t max_nonce, uint32_t *last_nonce, uint32_t n) +{ + uint32_t *nonce = (uint32_t *)(pdata + 76); + uint32_t data[20]; + uint32_t tmp_hash7; + uint32_t Htarg = le32toh(((const uint32_t *)ptarget)[7]); + bool ret = false; + + be32enc_vect(data, (const uint32_t *)pdata, 19); + + while (1) { + uint32_t ostate[8]; + + *nonce = ++n; + data[19] = (n); + xhash(ostate, data, DEFAULT_NTIME); + tmp_hash7 = (ostate[7]); + + applog(LOG_INFO, "data7 %08lx", + (long unsigned int)data[7]); + + if (unlikely(tmp_hash7 <= Htarg)) { + ((uint32_t *)pdata)[19] = htobe32(n); + *last_nonce = n; + ret = true; + break; + } + + if (unlikely((n >= max_nonce) || thr->work_restart)) { + *last_nonce = n; + break; + } + } + + return ret; +} + + + + diff --git a/algorithm/evocoin.h b/algorithm/evocoin.h new file mode 100644 index 000000000..47221217d --- /dev/null +++ b/algorithm/evocoin.h @@ -0,0 +1,14 @@ +#ifndef EVOCOIN_H +#define EVOCOIN_H + +#include "miner.h" + +#define INITIAL_DATE 1462060800 +#define HASH_FUNC_COUNT 11 +#define DEFAULT_NTIME "00000000" + +extern int evocoin_test(unsigned char *pdata, const unsigned char *ptarget, uint32_t nonce); +extern void evocoin_regenhash(struct work *work); +extern void evocoin_twisted_code(char *result, const char *ntime, uint8_t *code); + +#endif /* EVOCOIN_H */ diff --git a/driver-opencl.c b/driver-opencl.c index 3e6667bcc..a7121d3ac 100644 --- a/driver-opencl.c +++ b/driver-opencl.c @@ -1138,7 +1138,7 @@ void *reinit_gpu(void *userdata) //free(clState); applog(LOG_INFO, "Reinit GPU thread %d", thr_id); - clStates[thr_id] = initCl(virtual_gpu, name, sizeof(name), &cgpu->algorithm); + clStates[thr_id] = initCl(virtual_gpu, name, sizeof(name), &cgpu->algorithm, thr); if (!clStates[thr_id]) { applog(LOG_ERR, "Failed to reinit GPU thread %d", thr_id); goto select_cgpu; @@ -1286,7 +1286,7 @@ static bool opencl_thread_prepare(struct thr_info *thr) strcpy(name, ""); applog(LOG_INFO, "Init GPU thread %i GPU %i virtual GPU %i", i, gpu, virtual_gpu); - clStates[i] = initCl(virtual_gpu, name, sizeof(name), &cgpu->algorithm); + clStates[i] = initCl(virtual_gpu, name, sizeof(name), &cgpu->algorithm, thr); if (!clStates[i]) { #ifdef HAVE_CURSES if (use_curses) diff --git a/miner.h b/miner.h index 0b0bceee8..f59784cbd 100644 --- a/miner.h +++ b/miner.h @@ -634,6 +634,8 @@ struct thr_info { int pool_no; struct timeval last; struct timeval sick; + uint8_t curSequence[12]; + struct work *work; bool pause; bool paused; diff --git a/ocl.c b/ocl.c index 44920d2ea..f267f3994 100644 --- a/ocl.c +++ b/ocl.c @@ -38,6 +38,7 @@ #include "algorithm/pluck.h" #include "algorithm/yescrypt.h" #include "algorithm/lyra2rev2.h" +#include "algorithm/evocoin.h" /* FIXME: only here for global config vars, replace with configuration.h * or similar as soon as config is in a struct instead of littered all @@ -180,7 +181,22 @@ static cl_int create_opencl_command_queue(cl_command_queue *command_queue, cl_co return status; } -_clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *algorithm) +static bool get_opencl_bit_align_support(cl_device_id *device) +{ + char extensions[1024]; + const char * camo = "cl_amd_media_ops"; + char *find; + cl_int status; + + status = clGetDeviceInfo(*device, CL_DEVICE_EXTENSIONS, 1024, (void *)extensions, NULL); + if (status != CL_SUCCESS) { + return false; + } + find = strstr(extensions, camo); + return !!find; +} + +_clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *algorithm, struct thr_info *thr) { cl_int status = 0; size_t compute_units = 0; @@ -248,6 +264,8 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg applog(LOG_ERR, "Error %d: Creating Command Queue. (clCreateCommandQueue)", status); return NULL; } + + clState->hasBitAlign = get_opencl_bit_align_support(&devices[gpu]); status = clGetDeviceInfo(devices[gpu], CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, sizeof(cl_uint), (void *)&preferred_vwidth, NULL); if (status != CL_SUCCESS) { @@ -696,14 +714,29 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg build_data->kernel_path = (*opt_kernel_path) ? opt_kernel_path : NULL; build_data->work_size = clState->wsize; + + build_data->has_bit_align = clState->hasBitAlign; + build_data->opencl_version = get_opencl_version(devices[gpu]); + build_data->patch_bfi = needs_bfi_patch(build_data); strcpy(build_data->binary_filename, filename); build_data->binary_filename[strlen(filename) - 3] = 0x00; // And one NULL terminator, cutting off the .cl suffix. strcat(build_data->binary_filename, pbuff[gpu]); - if (clState->goffset) { + if (clState->goffset) strcat(build_data->binary_filename, "g"); + + uint8_t x11EvoCode[12] = { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }; + + if (cgpu->algorithm.type == ALGO_X11EVO) { + char algoSuffixCode[100]; + char *ntime = "00000000"; + if (thr && thr->work) { + ntime = thr->work->pool->swork.ntime; + } + evocoin_twisted_code(algoSuffixCode, ntime, x11EvoCode); + strcat(build_data->binary_filename, algoSuffixCode); } set_base_compiler_options(build_data); @@ -718,17 +751,27 @@ _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *alg if (!(clState->program = load_opencl_binary_kernel(build_data))) { applog(LOG_NOTICE, "Building binary %s", build_data->binary_filename); - if (!(clState->program = build_opencl_kernel(build_data, filename))) { + if (!(clState->program = build_opencl_kernel(build_data, filename, x11EvoCode))) { return NULL; } // If it doesn't work, oh well, build it again next run - save_opencl_kernel(build_data, clState->program); + if (save_opencl_kernel(build_data, clState->program)) { + /* Program needs to be rebuilt, because the binary was patched */ + if (build_data->patch_bfi) { + clReleaseProgram(clState->program); + clState->program = load_opencl_binary_kernel(build_data); + } + } else { + if (build_data->patch_bfi) + quit(1, "Could not save kernel to file, but it is necessary to apply BFI patch"); + } } // Load kernels - applog(LOG_NOTICE, "Initialising kernel %s with nfactor %d, n %d", - filename, algorithm->nfactor, algorithm->n); + applog(LOG_NOTICE, "Initialising kernel %s with%s bitalign, %spatched BFI, nfactor %d, n %d", + filename, clState->hasBitAlign ? "" : "out", build_data->patch_bfi ? "" : "un", + algorithm->nfactor, algorithm->n); /* get a kernel object handle for a kernel with the given name */ clState->kernel = clCreateKernel(clState->program, "search", &status); diff --git a/ocl.h b/ocl.h index 311db29ec..0216b3319 100644 --- a/ocl.h +++ b/ocl.h @@ -18,6 +18,7 @@ typedef struct __clState { cl_mem buffer2; cl_mem buffer3; unsigned char cldata[256]; + bool hasBitAlign; bool goffset; cl_uint vwidth; size_t max_work_size; @@ -26,6 +27,6 @@ typedef struct __clState { } _clState; extern int clDevicesNum(void); -extern _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *algorithm); +extern _clState *initCl(unsigned int gpu, char *name, size_t nameSize, algorithm_t *algorithm, struct thr_info *thr); #endif /* OCL_H */ diff --git a/ocl/build_kernel.c b/ocl/build_kernel.c index a5407a010..95c296584 100644 --- a/ocl/build_kernel.c +++ b/ocl/build_kernel.c @@ -2,6 +2,8 @@ #include "miner.h" #include "build_kernel.h" +#include "patch_kernel.h" +#include "../algorithm/evocoin.h" static char *file_contents(const char *filename, int *length) { @@ -63,20 +65,141 @@ void set_base_compiler_options(build_kernel_data *data) sprintf(buf, "w%dl%d", (int)data->work_size, (int)sizeof(long)); strcat(data->binary_filename, buf); + if (data->has_bit_align) { + strcat(data->compiler_options, " -D BITALIGN"); + applog(LOG_DEBUG, "cl_amd_media_ops found, setting BITALIGN"); + } else + applog(LOG_DEBUG, "cl_amd_media_ops not found, will not set BITALIGN"); + if (data->kernel_path) { strcat(data->compiler_options, " -I \""); strcat(data->compiler_options, data->kernel_path); strcat(data->compiler_options, "\""); } + + if (data->patch_bfi) { + strcat(data->compiler_options, " -D BFI_INT"); + applog(LOG_DEBUG, "BFI_INT patch requiring device found, patched source with BFI_INT"); + } else + applog(LOG_DEBUG, "BFI_INT patch requiring device not found, will not BFI_INT patch"); if (data->opencl_version < 1.1) strcat(data->compiler_options, " -D OCL1"); } -cl_program build_opencl_kernel(build_kernel_data *data, const char *filename) +bool needs_bfi_patch(build_kernel_data *data) +{ + if (data->has_bit_align && + (data->opencl_version < 1.2) && + (strstr(data->platform, "Cedar") || + strstr(data->platform, "Redwood") || + strstr(data->platform, "Juniper") || + strstr(data->platform, "Cypress" ) || + strstr(data->platform, "Hemlock" ) || + strstr(data->platform, "Caicos" ) || + strstr(data->platform, "Turks" ) || + strstr(data->platform, "Barts" ) || + strstr(data->platform, "Cayman" ) || + strstr(data->platform, "Antilles" ) || + strstr(data->platform, "Wrestler" ) || + strstr(data->platform, "Zacate" ) || + strstr(data->platform, "WinterPark" ))) + return true; + else + return false; +} + + +typedef struct _algorithm_get_settings_t { + const char *algo; + bool inverter; + bool req_inverted; // inverted is 1 +} algorithm_get_settings_t; + +static algorithm_get_settings_t algo[] = { + {"x11evo_blake.cl", true, false }, + {"x11evo_bmw.cl", false, true }, + {"x11evo_groestl.cl", false, true }, + {"x11evo_skein.cl", false, true }, + {"x11evo_jh.cl", false, true }, + {"x11evo_keccak.cl", false, true }, + {"x11evo_luffa.cl", false, true }, + {"x11evo_cubehash.cl", true, true }, + {"x11evo_shavite.cl", false, false }, + {"x11evo_simd.cl", false, false }, + {"x11evo_echo.cl", false, false } +}; + + +char *generateSource(const uint8_t *code) +{ + + char *result; + result = (char *)malloc(65535); + + int pl; + char *source; + char path[255]; + + strcpy(path, "x11evo/x11evo_header.cl"); + source = file_contents(path, &pl); + strcpy (result, source); + if (source) free(source); + + // start from non-inverted signal + bool curState = false; + int i; + for (i = 0; i < HASH_FUNC_COUNT; i++) { + + // extract index + uint8_t idx = code[i]; + // calc swap requirements + if (curState != algo[idx].req_inverted) { + + // insert swap + strcat(result, "\nSWAP_RESULT;\n"); + + curState = !curState; + } + + if (algo[idx].inverter) { + curState = !curState; + } + + strcpy(path, "x11evo/"); + strcat(path, algo[idx].algo); + + source = file_contents(path, &pl); + strcat(result, source); + + if (source) free(source); + } + + // check final state + if (curState) { + strcat(result, "\nSWAP_RESULT;\n"); + } + + strcpy(path, "x11evo/x11evo_footer.cl"); + source = file_contents(path, &pl); + strcat(result, source); + if (source) free(source); + + return result; +} + +cl_program build_opencl_kernel(build_kernel_data *data, const char *filename, const uint8_t *x11EvoCode) { int pl; - char *source = file_contents(data->source_filename, &pl); + char *source; + if (x11EvoCode[0] != 255) { + source = generateSource(x11EvoCode); + pl = strlen(source) + 1; + } + else { + source = file_contents(data->source_filename, &pl); + } + size_t sourceSize[] = {(size_t)pl}; cl_int status; cl_program program = NULL; @@ -164,6 +287,14 @@ bool save_opencl_kernel(build_kernel_data *data, cl_program program) applog(LOG_ERR, "OpenCL compiler generated a zero sized binary!"); goto out; } + + /* Patch the kernel if the hardware supports BFI_INT but it needs to + * be hacked in */ + if (data->patch_bfi) { + if (kernel_bfi_patch(binaries[slot], binary_sizes[slot]) != 0) { + quit(1, "Could not patch BFI_INT, please report this issue."); + } + } /* Save the binary to be loaded next time */ binaryfile = fopen(data->binary_filename, "wb"); diff --git a/ocl/build_kernel.h b/ocl/build_kernel.h index edb57c892..82b24a5e6 100644 --- a/ocl/build_kernel.h +++ b/ocl/build_kernel.h @@ -22,10 +22,13 @@ typedef struct _build_kernel_data { char sgminer_path[255]; const char *kernel_path; size_t work_size; + bool has_bit_align; + bool patch_bfi; float opencl_version; } build_kernel_data; -cl_program build_opencl_kernel(build_kernel_data *data, const char *filename); +bool needs_bfi_patch(build_kernel_data *data); +cl_program build_opencl_kernel(build_kernel_data *data, const char *filename, const uint8_t *x11EvoCode); bool save_opencl_kernel(build_kernel_data *data, cl_program program); void set_base_compiler_options(build_kernel_data *data); diff --git a/ocl/patch_kernel.c b/ocl/patch_kernel.c new file mode 100644 index 000000000..7c72cebc9 --- /dev/null +++ b/ocl/patch_kernel.c @@ -0,0 +1,97 @@ +#include "patch_kernel.h" +#include "logging.h" +#include +#include + +static int advance(char **area, unsigned *remaining, const char *marker) +{ + char *find = (char *)memmem(*area, *remaining, (void *)marker, strlen(marker)); + + if (!find) { + applog(LOG_DEBUG, "Marker \"%s\" not found", marker); + return 0; + } + *remaining -= find - *area; + *area = find; + return 1; +} + +#define OP3_INST_BFE_UINT 4ULL +#define OP3_INST_BFE_INT 5ULL +#define OP3_INST_BFI_INT 6ULL +#define OP3_INST_BIT_ALIGN_INT 12ULL +#define OP3_INST_BYTE_ALIGN_INT 13ULL + +static void patch_opcodes(char *w, unsigned remaining) +{ + uint64_t *opcode = (uint64_t *)w; + int patched = 0; + int count_bfe_int = 0; + int count_bfe_uint = 0; + int count_byte_align = 0; + while (42) { + int clamp = (*opcode >> (32 + 31)) & 0x1; + int dest_rel = (*opcode >> (32 + 28)) & 0x1; + int alu_inst = (*opcode >> (32 + 13)) & 0x1f; + int s2_neg = (*opcode >> (32 + 12)) & 0x1; + int s2_rel = (*opcode >> (32 + 9)) & 0x1; + int pred_sel = (*opcode >> 29) & 0x3; + if (!clamp && !dest_rel && !s2_neg && !s2_rel && !pred_sel) { + if (alu_inst == OP3_INST_BFE_INT) { + count_bfe_int++; + } else if (alu_inst == OP3_INST_BFE_UINT) { + count_bfe_uint++; + } else if (alu_inst == OP3_INST_BYTE_ALIGN_INT) { + count_byte_align++; + // patch this instruction to BFI_INT + *opcode &= 0xfffc1fffffffffffULL; + *opcode |= OP3_INST_BFI_INT << (32 + 13); + patched++; + } + } + if (remaining <= 8) + break; + opcode++; + remaining -= 8; + } + applog(LOG_DEBUG, "Potential OP3 instructions identified: " + "%i BFE_INT, %i BFE_UINT, %i BYTE_ALIGN", + count_bfe_int, count_bfe_uint, count_byte_align); + applog(LOG_DEBUG, "Patched a total of %i BFI_INT instructions", patched); +} + +bool kernel_bfi_patch(char *binary, unsigned binary_size) +{ + unsigned remaining = binary_size; + char *w = binary; + unsigned int start, length; + + /* Find 2nd incidence of .text, and copy the program's + * position and length at a fixed offset from that. Then go + * back and find the 2nd incidence of \x7ELF (rewind by one + * from ELF) and then patch the opcocdes */ + if (!advance(&w, &remaining, ".text")) + return false; + w++; remaining--; + if (!advance(&w, &remaining, ".text")) { + /* 32 bit builds only one ELF */ + w--; remaining++; + } + memcpy(&start, w + 285, 4); + memcpy(&length, w + 289, 4); + w = binary; remaining = binary_size; + if (!advance(&w, &remaining, "ELF")) + return false; + w++; remaining--; + if (!advance(&w, &remaining, "ELF")) { + /* 32 bit builds only one ELF */ + w--; remaining++; + } + w--; remaining++; + w += start; remaining -= start; + applog(LOG_DEBUG, "At %p (%u rem. bytes), to begin patching", + w, remaining); + patch_opcodes(w, length); + + return true; +} diff --git a/ocl/patch_kernel.h b/ocl/patch_kernel.h new file mode 100644 index 000000000..d13b1869b --- /dev/null +++ b/ocl/patch_kernel.h @@ -0,0 +1,10 @@ +#ifndef PATCH_KERNEL_H +#define PATCH_KERNEL_H + +#include + +bool kernel_bfi_patch(char *binary, unsigned binary_size); + +#endif /* PATCH_KERNEL_H */ + + diff --git a/sgminer.c b/sgminer.c index 8a79d8bf7..4de0fc5f0 100644 --- a/sgminer.c +++ b/sgminer.c @@ -68,6 +68,8 @@ char *curly = ":D"; #include #endif +#include "algorithm/evocoin.h" +#define DEFAULT_SEQUENCE "0123456789A" static char packagename[256]; @@ -1910,6 +1912,19 @@ static bool jobj_binary(const json_t *obj, const char *key, } #endif +static void calc_midstate(struct work *work) +{ + unsigned char data[64]; + uint32_t *data32 = (uint32_t *)data; + sph_sha256_context ctx; + + flip64(data32, work->data); + sph_sha256_init(&ctx); + sph_sha256(&ctx, data, 64); + memcpy(work->midstate, ctx.val, 32); + endian_flip32(work->midstate, work->midstate); +} + static struct work *make_work(void) { struct work *w = (struct work *)calloc(1, sizeof(struct work)); @@ -1944,6 +1959,7 @@ void free_work(struct work *w) } static void calc_diff(struct work *work, double known); +char *workpadding = "000000800000000000000000000000000000000000000000000000000000000000000000000000000000000080020000"; #ifdef HAVE_LIBCURL /* Process transactions with GBT by storing the binary value of the first @@ -2138,7 +2154,9 @@ static void gen_gbt_work(struct pool *pool, struct work *work) flip32(work->data + 4 + 32, merkleroot); free(merkleroot); - memset(work->data + 4 + 32 + 32 + 4 + 4, 0, 4 + 48); /* nonce + padding */ + memset(work->data + 4 + 32 + 32 + 4 + 4, 0, 4); /* nonce */ + + hex2bin(work->data + 4 + 32 + 32 + 4 + 4 + 4, workpadding, 48); if (opt_debug) { char *header = bin2hex(work->data, 128); @@ -6752,12 +6770,190 @@ static void mutex_unlock_cleanup_handler(void *mutex) mutex_unlock((pthread_mutex_t *) mutex); } +static bool checkIfNeedSwitch(struct thr_info *mythr, struct work *work) +{ + bool algoSwitch = true; + + if (work && work->pool) { + + char result[100]; + uint8_t code[12]; + + evocoin_twisted_code(result, work->pool->swork.ntime, code); + + if (memcmp(code, mythr->curSequence, 12) == 0) { + algoSwitch = false; + } else { + memcpy(mythr->curSequence, code, 12); + } + } + + return ((work->pool->algorithm.type == ALGO_X11EVO) && (algoSwitch || !mythr->work)); +} + +static void twistTheRevolver(struct thr_info *mythr, struct work *work) +{ + applog(LOG_DEBUG, "Twist the revolver. Time = %s" , work->pool->swork.ntime); + + bool softReset = true; + int i; + + pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, NULL); + mutex_lock(&algo_switch_lock); + + mutex_lock(&algo_switch_wait_lock); + algo_switch_n++; + mutex_unlock(&algo_switch_wait_lock); + + //get the number of active threads to know when to switch... if we only check total threads, we may wait for ever on a disabled GPU + int active_threads = 0; + + rd_lock(&mining_thr_lock); + for (i = 0; i < mining_threads; i++) + { + struct cgpu_info *cgpu = mining_thr[i]->cgpu; + + //dont count dead/sick GPU threads or we may wait for ever also... + if (cgpu->deven != DEV_DISABLED && cgpu->status != LIFE_SICK && cgpu->status != LIFE_DEAD) + active_threads++; + } + rd_unlock(&mining_thr_lock); + + // If all threads are waiting now + if (algo_switch_n >= active_threads) + { + const char *opt; + + applog(LOG_DEBUG, "Applying pool settings for %s...", isnull(get_pool_name(work->pool), "")); + rd_lock(&mining_thr_lock); + + // Shutdown all threads first (necessary) + if (softReset) + { + applog(LOG_DEBUG, "Soft Reset... Shutdown threads..."); + for (i = 0; i < mining_threads; i++) + { + struct thr_info *thr = mining_thr[i]; + thr->cgpu->drv->thread_shutdown(thr); + } + } + + // Reset stats (e.g. for working_diff to be set properly in hash_sole_work) + zero_stats(); + + //apply switcher options + apply_switcher_options(pool_switch_options, work->pool); + + // Change algorithm for each thread (thread_prepare calls initCl) + if (softReset) + applog(LOG_DEBUG, "Soft Reset... Restarting threads..."); + + struct thr_info *thr; + + + for (i = 0; i < mining_threads; i++) + { + thr = mining_thr[i]; + + if (softReset) + { + thr->work = work; + thr->cgpu->drv->thread_prepare(thr); + thr->cgpu->drv->thread_init(thr); + } + + // Necessary because algorithms can have dramatically different diffs + thr->cgpu->drv->working_diff = 1; + } + + rd_unlock(&mining_thr_lock); + mutex_unlock(&algo_switch_lock); + + // Hard restart if needed + if (!softReset) + { + applog(LOG_DEBUG, "Hard Reset Mining Threads..."); + + //if devices changed... enable/disable as needed + //if (opt_isset(pool_switch_options, SWITCHER_APPLY_DEVICE)) + // enable_devices(); + + //figure out how many mining threads we'll need + unsigned int n_threads = 0; + pthread_t restart_thr; + +#ifdef HAVE_ADL + //change gpu threads if needed + //if (opt_isset(pool_switch_options, SWITCHER_APPLY_GT)) + //{ + // if (!empty_string((opt = get_pool_setting(work->pool->gpu_threads, default_profile.gpu_threads)))) + // set_gpu_threads(opt); + //} + + rd_lock(&devices_lock); + for (i = 0; i < total_devices; i++) + if (!opt_removedisabled || !opt_devs_enabled || devices_enabled[i]) + n_threads += devices[i]->threads; + rd_unlock(&devices_lock); +#else + n_threads = mining_threads; +#endif + + if (unlikely(pthread_create(&restart_thr, NULL, restart_mining_threads_thread, (void *)(intptr_t)n_threads))) + quit(1, "restart_mining_threads create thread failed"); + + applog(LOG_DEBUG, "Hard reset: Exiting mining thread %d", mythr->id); + pthread_exit(NULL); + } + else + { + // Signal other threads to start working now + mutex_lock(&algo_switch_wait_lock); + algo_switch_n = 0; + pthread_cond_broadcast(&algo_switch_wait_cond); + mutex_unlock(&algo_switch_wait_lock); + + pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL); + + // no need to wait, exit + return; + } + } + else { + mutex_unlock(&algo_switch_lock); + + if (!softReset) { + applog(LOG_DEBUG, "Hard reset: Exiting mining thread %d", mythr->id); + pthread_exit(NULL); + } + } + + pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL); + + // Set cleanup instructions in the event that the thread is cancelled + pthread_cleanup_push(mutex_unlock_cleanup_handler, (void *)&algo_switch_wait_lock); + // Wait for signal to start working again + mutex_lock(&algo_switch_wait_lock); + while (algo_switch_n > 0) + pthread_cond_wait(&algo_switch_wait_cond, &algo_switch_wait_lock); + // Non-zero argument will execute the cleanup handler after popping it + pthread_cleanup_pop(1); +} + + static void get_work_prepare_thread(struct thr_info *mythr, struct work *work) { int i; applog(LOG_DEBUG, "[THR%d] get_work_prepare_thread", mythr->id); + + + if (checkIfNeedSwitch(mythr, work)) { + twistTheRevolver(mythr, work); + return; + } + //if switcher is disabled if(opt_switchmode == SWITCH_OFF) return; @@ -8698,6 +8894,9 @@ static void restart_mining_threads(unsigned int new_n_threads) thr = mining_thr[k]; thr->id = k; thr->pool_no = pool->pool_no; + // init sequence + strcpy(thr->curSequence, DEFAULT_SEQUENCE); + applog(LOG_DEBUG, "Thread %d set pool = %d (%s)", k, thr->pool_no, isnull(get_pool_name(pools[thr->pool_no]), "")); thr->cgpu = cgpu; thr->device_thread = j; diff --git a/x11evo/x11evo_blake.cl b/x11evo/x11evo_blake.cl new file mode 100644 index 000000000..fee159948 --- /dev/null +++ b/x11evo/x11evo_blake.cl @@ -0,0 +1,50 @@ + +// blake + { + sph_u64 H0 = SPH_C64(0x6A09E667F3BCC908), H1 = SPH_C64(0xBB67AE8584CAA73B); + sph_u64 H2 = SPH_C64(0x3C6EF372FE94F82B), H3 = SPH_C64(0xA54FF53A5F1D36F1); + sph_u64 H4 = SPH_C64(0x510E527FADE682D1), H5 = SPH_C64(0x9B05688C2B3E6C1F); + sph_u64 H6 = SPH_C64(0x1F83D9ABFB41BD6B), H7 = SPH_C64(0x5BE0CD19137E2179); + sph_u64 S0 = 0, S1 = 0, S2 = 0, S3 = 0; + sph_u64 T0 = SPH_C64(0xFFFFFFFFFFFFFC00) + (80 << 3), T1 = 0xFFFFFFFFFFFFFFFF;; + + if ((T0 = SPH_T64(T0 + 1024)) < 1024) + { + T1 = SPH_T64(T1 + 1); + } + sph_u64 M0, M1, M2, M3, M4, M5, M6, M7; + sph_u64 M8, M9, MA, MB, MC, MD, ME, MF; + sph_u64 V0, V1, V2, V3, V4, V5, V6, V7; + sph_u64 V8, V9, VA, VB, VC, VD, VE, VF; + M0 = DEC64BE(block + 0); + M1 = DEC64BE(block + 8); + M2 = DEC64BE(block + 16); + M3 = DEC64BE(block + 24); + M4 = DEC64BE(block + 32); + M5 = DEC64BE(block + 40); + M6 = DEC64BE(block + 48); + M7 = DEC64BE(block + 56); + M8 = DEC64BE(block + 64); + M9 = DEC64BE(block + 72); + M9 &= 0xFFFFFFFF00000000; + M9 ^= SWAP4(gid); + MA = 0x8000000000000000; + MB = 0; + MC = 0; + MD = 1; + ME = 0; + MF = 0x280; + + COMPRESS64; + + hash.h8[0] = H0; + hash.h8[1] = H1; + hash.h8[2] = H2; + hash.h8[3] = H3; + hash.h8[4] = H4; + hash.h8[5] = H5; + hash.h8[6] = H6; + hash.h8[7] = H7; + } + + \ No newline at end of file diff --git a/x11evo/x11evo_bmw.cl b/x11evo/x11evo_bmw.cl new file mode 100644 index 000000000..7e975feb4 --- /dev/null +++ b/x11evo/x11evo_bmw.cl @@ -0,0 +1,57 @@ + +// bmw + { + sph_u64 BMW_H[16]; + for (unsigned u = 0; u < 16; u++) + BMW_H[u] = BMW_IV512[u]; + + sph_u64 BMW_h1[16], BMW_h2[16]; + sph_u64 mv[16]; + + mv[0] = SWAP8(hash.h8[0]); + mv[1] = SWAP8(hash.h8[1]); + mv[2] = SWAP8(hash.h8[2]); + mv[3] = SWAP8(hash.h8[3]); + mv[4] = SWAP8(hash.h8[4]); + mv[5] = SWAP8(hash.h8[5]); + mv[6] = SWAP8(hash.h8[6]); + mv[7] = SWAP8(hash.h8[7]); + mv[8] = 0x80; + mv[9] = 0; + mv[10] = 0; + mv[11] = 0; + mv[12] = 0; + mv[13] = 0; + mv[14] = 0; + mv[15] = 0x200; +#define M(x) (mv[x]) +#define H(x) (BMW_H[x]) +#define dH(x) (BMW_h2[x]) + + FOLDb; + +#undef M +#undef H +#undef dH + +#define M(x) (BMW_h2[x]) +#define H(x) (final_b[x]) +#define dH(x) (BMW_h1[x]) + + FOLDb; + +#undef M +#undef H +#undef dH + + hash.h8[0] = SWAP8(BMW_h1[8]); + hash.h8[1] = SWAP8(BMW_h1[9]); + hash.h8[2] = SWAP8(BMW_h1[10]); + hash.h8[3] = SWAP8(BMW_h1[11]); + hash.h8[4] = SWAP8(BMW_h1[12]); + hash.h8[5] = SWAP8(BMW_h1[13]); + hash.h8[6] = SWAP8(BMW_h1[14]); + hash.h8[7] = SWAP8(BMW_h1[15]); + + } + diff --git a/x11evo/x11evo_cubehash.cl b/x11evo/x11evo_cubehash.cl new file mode 100644 index 000000000..3c4fd3a6e --- /dev/null +++ b/x11evo/x11evo_cubehash.cl @@ -0,0 +1,60 @@ + +// cubehash.h1 + { + sph_u32 x0 = SPH_C32(0x2AEA2A61), x1 = SPH_C32(0x50F494D4), x2 = SPH_C32(0x2D538B8B), x3 = SPH_C32(0x4167D83E); + sph_u32 x4 = SPH_C32(0x3FEE2313), x5 = SPH_C32(0xC701CF8C), x6 = SPH_C32(0xCC39968E), x7 = SPH_C32(0x50AC5695); + sph_u32 x8 = SPH_C32(0x4D42C787), x9 = SPH_C32(0xA647A8B3), xa = SPH_C32(0x97CF0BEF), xb = SPH_C32(0x825B4537); + sph_u32 xc = SPH_C32(0xEEF864D2), xd = SPH_C32(0xF22090C4), xe = SPH_C32(0xD0E5CD33), xf = SPH_C32(0xA23911AE); + sph_u32 xg = SPH_C32(0xFCD398D9), xh = SPH_C32(0x148FE485), xi = SPH_C32(0x1B017BEF), xj = SPH_C32(0xB6444532); + sph_u32 xk = SPH_C32(0x6A536159), xl = SPH_C32(0x2FF5781C), xm = SPH_C32(0x91FA7934), xn = SPH_C32(0x0DBADEA9); + sph_u32 xo = SPH_C32(0xD65C8A2B), xp = SPH_C32(0xA5A70E75), xq = SPH_C32(0xB1C62456), xr = SPH_C32(0xBC796576); + sph_u32 xs = SPH_C32(0x1921C8F7), xt = SPH_C32(0xE7989AF1), xu = SPH_C32(0x7795D246), xv = SPH_C32(0xD43E3B44); + + x0 ^= SWAP4(hash.h4[1]); + x1 ^= SWAP4(hash.h4[0]); + x2 ^= SWAP4(hash.h4[3]); + x3 ^= SWAP4(hash.h4[2]); + x4 ^= SWAP4(hash.h4[5]); + x5 ^= SWAP4(hash.h4[4]); + x6 ^= SWAP4(hash.h4[7]); + x7 ^= SWAP4(hash.h4[6]); + + for (int i = 0; i < 13; i++) { + SIXTEEN_ROUNDS; + + if (i == 0) { + x0 ^= SWAP4(hash.h4[9]); + x1 ^= SWAP4(hash.h4[8]); + x2 ^= SWAP4(hash.h4[11]); + x3 ^= SWAP4(hash.h4[10]); + x4 ^= SWAP4(hash.h4[13]); + x5 ^= SWAP4(hash.h4[12]); + x6 ^= SWAP4(hash.h4[15]); + x7 ^= SWAP4(hash.h4[14]); + } + else if (i == 1) { + x0 ^= 0x80; + } + else if (i == 2) { + xv ^= SPH_C32(1); + } + } + + hash.h4[0] = x0; + hash.h4[1] = x1; + hash.h4[2] = x2; + hash.h4[3] = x3; + hash.h4[4] = x4; + hash.h4[5] = x5; + hash.h4[6] = x6; + hash.h4[7] = x7; + hash.h4[8] = x8; + hash.h4[9] = x9; + hash.h4[10] = xa; + hash.h4[11] = xb; + hash.h4[12] = xc; + hash.h4[13] = xd; + hash.h4[14] = xe; + hash.h4[15] = xf; + + } diff --git a/x11evo/x11evo_echo.cl b/x11evo/x11evo_echo.cl new file mode 100644 index 000000000..0f0b75cde --- /dev/null +++ b/x11evo/x11evo_echo.cl @@ -0,0 +1,69 @@ + +// echo + { + sph_u64 W00, W01, W10, W11, W20, W21, W30, W31, W40, W41, W50, W51, W60, W61, W70, W71, W80, W81, W90, W91, WA0, WA1, WB0, WB1, WC0, WC1, WD0, WD1, WE0, WE1, WF0, WF1; + sph_u64 Vb00, Vb01, Vb10, Vb11, Vb20, Vb21, Vb30, Vb31, Vb40, Vb41, Vb50, Vb51, Vb60, Vb61, Vb70, Vb71; + Vb00 = Vb10 = Vb20 = Vb30 = Vb40 = Vb50 = Vb60 = Vb70 = 512UL; + Vb01 = Vb11 = Vb21 = Vb31 = Vb41 = Vb51 = Vb61 = Vb71 = 0; + + sph_u32 K0 = 512; + sph_u32 K1 = 0; + sph_u32 K2 = 0; + sph_u32 K3 = 0; + + W00 = Vb00; + W01 = Vb01; + W10 = Vb10; + W11 = Vb11; + W20 = Vb20; + W21 = Vb21; + W30 = Vb30; + W31 = Vb31; + W40 = Vb40; + W41 = Vb41; + W50 = Vb50; + W51 = Vb51; + W60 = Vb60; + W61 = Vb61; + W70 = Vb70; + W71 = Vb71; + W80 = hash.h8[0]; + W81 = hash.h8[1]; + W90 = hash.h8[2]; + W91 = hash.h8[3]; + WA0 = hash.h8[4]; + WA1 = hash.h8[5]; + WB0 = hash.h8[6]; + WB1 = hash.h8[7]; + WC0 = 0x80; + WC1 = 0; + WD0 = 0; + WD1 = 0; + WE0 = 0; + WE1 = 0x200000000000000; + WF0 = 0x200; + WF1 = 0; + + for (unsigned u = 0; u < 10; u++) { + BIG_ROUND; + } + + Vb00 ^= hash.h8[0] ^ W00 ^ W80; + Vb01 ^= hash.h8[1] ^ W01 ^ W81; + Vb10 ^= hash.h8[2] ^ W10 ^ W90; + Vb11 ^= hash.h8[3] ^ W11 ^ W91; + Vb20 ^= hash.h8[4] ^ W20 ^ WA0; + Vb21 ^= hash.h8[5] ^ W21 ^ WA1; + Vb30 ^= hash.h8[6] ^ W30 ^ WB0; + Vb31 ^= hash.h8[7] ^ W31 ^ WB1; + + hash.h8[0] = Vb00; + hash.h8[1] = Vb01; + hash.h8[2] = Vb10; + hash.h8[3] = Vb11; + hash.h8[4] = Vb20; + hash.h8[5] = Vb21; + hash.h8[6] = Vb30; + hash.h8[7] = Vb31; + + } diff --git a/x11evo/x11evo_footer.cl b/x11evo/x11evo_footer.cl new file mode 100644 index 000000000..ea83175db --- /dev/null +++ b/x11evo/x11evo_footer.cl @@ -0,0 +1,7 @@ + + bool result = (hash.h8[3] <= target); + if (result) + output[output[0xFF]++] = SWAP4(gid); +} + +#endif // X11EVO_CL \ No newline at end of file diff --git a/x11evo/x11evo_groestl.cl b/x11evo/x11evo_groestl.cl new file mode 100644 index 000000000..a3f57ba02 --- /dev/null +++ b/x11evo/x11evo_groestl.cl @@ -0,0 +1,47 @@ + +// groestl + { + sph_u64 H[16]; + for (unsigned int u = 0; u < 15; u++) + H[u] = 0; +#if USE_LE + H[15] = ((sph_u64)(512 & 0xFF) << 56) | ((sph_u64)(512 & 0xFF00) << 40); +#else + H[15] = (sph_u64)512; +#endif + + sph_u64 g[16], m[16]; + m[0] = DEC64E(hash.h8[0]); + m[1] = DEC64E(hash.h8[1]); + m[2] = DEC64E(hash.h8[2]); + m[3] = DEC64E(hash.h8[3]); + m[4] = DEC64E(hash.h8[4]); + m[5] = DEC64E(hash.h8[5]); + m[6] = DEC64E(hash.h8[6]); + m[7] = DEC64E(hash.h8[7]); + for (unsigned int u = 0; u < 16; u++) + g[u] = m[u] ^ H[u]; + m[8] = 0x80; g[8] = m[8] ^ H[8]; + m[9] = 0; g[9] = m[9] ^ H[9]; + m[10] = 0; g[10] = m[10] ^ H[10]; + m[11] = 0; g[11] = m[11] ^ H[11]; + m[12] = 0; g[12] = m[12] ^ H[12]; + m[13] = 0; g[13] = m[13] ^ H[13]; + m[14] = 0; g[14] = m[14] ^ H[14]; + m[15] = 0x100000000000000; g[15] = m[15] ^ H[15]; + PERM_BIG_P(g); + PERM_BIG_Q(m); + for (unsigned int u = 0; u < 16; u++) + H[u] ^= g[u] ^ m[u]; + sph_u64 xH[16]; + for (unsigned int u = 0; u < 16; u++) + xH[u] = H[u]; + PERM_BIG_P(xH); + for (unsigned int u = 0; u < 16; u++) + H[u] ^= xH[u]; + for (unsigned int u = 0; u < 8; u++) + hash.h8[u] = DEC64E(H[u + 8]); + + } + + diff --git a/x11evo/x11evo_header.cl b/x11evo/x11evo_header.cl new file mode 100644 index 000000000..a3fcabe96 --- /dev/null +++ b/x11evo/x11evo_header.cl @@ -0,0 +1,103 @@ +#ifndef X11EVO_CL +#define X11EVO_CL + +#if __ENDIAN_LITTLE__ +#define SPH_LITTLE_ENDIAN 1 +#else +#define SPH_BIG_ENDIAN 1 +#endif + +#define SPH_UPTR sph_u64 + +typedef unsigned int sph_u32; +typedef int sph_s32; +#ifndef __OPENCL_VERSION__ +typedef unsigned long long sph_u64; +typedef long long sph_s64; +#else +typedef unsigned long sph_u64; +typedef long sph_s64; +#endif + +#define SPH_64 1 +#define SPH_64_TRUE 1 + +#define SPH_C32(x) ((sph_u32)(x ## U)) +#define SPH_T32(x) ((x) & SPH_C32(0xFFFFFFFF)) +#define SPH_ROTL32(x, n) SPH_T32(((x) << (n)) | ((x) >> (32 - (n)))) +#define SPH_ROTR32(x, n) SPH_ROTL32(x, (32 - (n))) + +#define SPH_C64(x) ((sph_u64)(x ## UL)) +#define SPH_T64(x) ((x) & SPH_C64(0xFFFFFFFFFFFFFFFF)) +#define SPH_ROTL64(x, n) SPH_T64(((x) << (n)) | ((x) >> (64 - (n)))) +#define SPH_ROTR64(x, n) SPH_ROTL64(x, (64 - (n))) + +#define SPH_ECHO_64 1 +#define SPH_KECCAK_64 1 +#define SPH_JH_64 1 +#define SPH_SIMD_NOCOPY 0 +#define SPH_KECCAK_NOCOPY 0 +#define SPH_SMALL_FOOTPRINT_GROESTL 0 +#define SPH_GROESTL_BIG_ENDIAN 0 +#define SPH_CUBEHASH_UNROLL 0 +#define SPH_COMPACT_BLAKE_64 0 +#define SPH_LUFFA_PARALLEL 0 +#define SPH_KECCAK_UNROLL 0 + +#define SWAP_RESULT do { \ + hash.h8[0] = SWAP8(hash.h8[0]); \ + hash.h8[1] = SWAP8(hash.h8[1]); \ + hash.h8[2] = SWAP8(hash.h8[2]); \ + hash.h8[3] = SWAP8(hash.h8[3]); \ + hash.h8[4] = SWAP8(hash.h8[4]); \ + hash.h8[5] = SWAP8(hash.h8[5]); \ + hash.h8[6] = SWAP8(hash.h8[6]); \ + hash.h8[7] = SWAP8(hash.h8[7]); \ + } while (0) + + +#include "blake.cl" +#include "bmw.cl" +#include "groestl.cl" +#include "jh.cl" +#include "keccak.cl" +#include "skein.cl" +#include "luffa.cl" +#include "cubehash.cl" +#include "shavite.cl" +#include "simd.cl" +#include "echo.cl" + +#define SWAP4(x) as_uint(as_uchar4(x).wzyx) +#define SWAP8(x) as_ulong(as_uchar8(x).s76543210) + +#if SPH_BIG_ENDIAN +#define DEC64E(x) (x) +#define DEC64BE(x) (*(const __global sph_u64 *) (x)); +#else +#define DEC64E(x) SWAP8(x) +#define DEC64BE(x) SWAP8(*(const __global sph_u64 *) (x)); +#endif + +__attribute__((reqd_work_group_size(WORKSIZE, 1, 1))) +__kernel void search(__global unsigned char* block, volatile __global uint* output, const ulong target) +{ + uint gid = get_global_id(0); + union { + unsigned char h1[64]; + uint h4[16]; + ulong h8[8]; + } hash; + + __local sph_u32 AES0[256], AES1[256], AES2[256], AES3[256]; + int init = get_local_id(0); + int step = get_local_size(0); + for (int i = init; i < 256; i += step) + { + AES0[i] = AES0_C[i]; + AES1[i] = AES1_C[i]; + AES2[i] = AES2_C[i]; + AES3[i] = AES3_C[i]; + } + barrier(CLK_LOCAL_MEM_FENCE); + diff --git a/x11evo/x11evo_jh.cl b/x11evo/x11evo_jh.cl new file mode 100644 index 000000000..2c6b99d32 --- /dev/null +++ b/x11evo/x11evo_jh.cl @@ -0,0 +1,48 @@ + +// jh + { + sph_u64 h0h = C64e(0x6fd14b963e00aa17), h0l = C64e(0x636a2e057a15d543), h1h = C64e(0x8a225e8d0c97ef0b), h1l = C64e(0xe9341259f2b3c361), h2h = C64e(0x891da0c1536f801e), h2l = C64e(0x2aa9056bea2b6d80), h3h = C64e(0x588eccdb2075baa6), h3l = C64e(0xa90f3a76baf83bf7); + sph_u64 h4h = C64e(0x0169e60541e34a69), h4l = C64e(0x46b58a8e2e6fe65a), h5h = C64e(0x1047a7d0c1843c24), h5l = C64e(0x3b6e71b12d5ac199), h6h = C64e(0xcf57f6ec9db1f856), h6l = C64e(0xa706887c5716b156), h7h = C64e(0xe3c2fcdfe68517fb), h7l = C64e(0x545a4678cc8cdd4b); + sph_u64 tmp; + + for (int i = 0; i < 2; i++) + { + if (i == 0) { + h0h ^= DEC64E(hash.h8[0]); + h0l ^= DEC64E(hash.h8[1]); + h1h ^= DEC64E(hash.h8[2]); + h1l ^= DEC64E(hash.h8[3]); + h2h ^= DEC64E(hash.h8[4]); + h2l ^= DEC64E(hash.h8[5]); + h3h ^= DEC64E(hash.h8[6]); + h3l ^= DEC64E(hash.h8[7]); + } + else if (i == 1) { + h4h ^= DEC64E(hash.h8[0]); + h4l ^= DEC64E(hash.h8[1]); + h5h ^= DEC64E(hash.h8[2]); + h5l ^= DEC64E(hash.h8[3]); + h6h ^= DEC64E(hash.h8[4]); + h6l ^= DEC64E(hash.h8[5]); + h7h ^= DEC64E(hash.h8[6]); + h7l ^= DEC64E(hash.h8[7]); + + h0h ^= 0x80; + h3l ^= 0x2000000000000; + } + E8; + } + + h4h ^= 0x80; + h7l ^= 0x2000000000000; + + hash.h8[0] = DEC64E(h4h); + hash.h8[1] = DEC64E(h4l); + hash.h8[2] = DEC64E(h5h); + hash.h8[3] = DEC64E(h5l); + hash.h8[4] = DEC64E(h6h); + hash.h8[5] = DEC64E(h6l); + hash.h8[6] = DEC64E(h7h); + hash.h8[7] = DEC64E(h7l); + } + diff --git a/x11evo/x11evo_keccak.cl b/x11evo/x11evo_keccak.cl new file mode 100644 index 000000000..f04d6d900 --- /dev/null +++ b/x11evo/x11evo_keccak.cl @@ -0,0 +1,44 @@ + +// keccak + { + sph_u64 a00 = 0, a01 = 0, a02 = 0, a03 = 0, a04 = 0; + sph_u64 a10 = 0, a11 = 0, a12 = 0, a13 = 0, a14 = 0; + sph_u64 a20 = 0, a21 = 0, a22 = 0, a23 = 0, a24 = 0; + sph_u64 a30 = 0, a31 = 0, a32 = 0, a33 = 0, a34 = 0; + sph_u64 a40 = 0, a41 = 0, a42 = 0, a43 = 0, a44 = 0; + + a10 = SPH_C64(0xFFFFFFFFFFFFFFFF); + a20 = SPH_C64(0xFFFFFFFFFFFFFFFF); + a31 = SPH_C64(0xFFFFFFFFFFFFFFFF); + a22 = SPH_C64(0xFFFFFFFFFFFFFFFF); + a23 = SPH_C64(0xFFFFFFFFFFFFFFFF); + a04 = SPH_C64(0xFFFFFFFFFFFFFFFF); + + a00 ^= SWAP8(hash.h8[0]); + a10 ^= SWAP8(hash.h8[1]); + a20 ^= SWAP8(hash.h8[2]); + a30 ^= SWAP8(hash.h8[3]); + a40 ^= SWAP8(hash.h8[4]); + a01 ^= SWAP8(hash.h8[5]); + a11 ^= SWAP8(hash.h8[6]); + a21 ^= SWAP8(hash.h8[7]); + a31 ^= 0x8000000000000001; + KECCAK_F_1600; + // Finalize the "lane complement" + a10 = ~a10; + a20 = ~a20; + + hash.h8[0] = SWAP8(a00); + hash.h8[1] = SWAP8(a10); + hash.h8[2] = SWAP8(a20); + hash.h8[3] = SWAP8(a30); + hash.h8[4] = SWAP8(a40); + hash.h8[5] = SWAP8(a01); + hash.h8[6] = SWAP8(a11); + hash.h8[7] = SWAP8(a21); + + + + + } + diff --git a/x11evo/x11evo_luffa.cl b/x11evo/x11evo_luffa.cl new file mode 100644 index 000000000..0dcb90589 --- /dev/null +++ b/x11evo/x11evo_luffa.cl @@ -0,0 +1,64 @@ + +// luffa + { + sph_u32 V00 = SPH_C32(0x6d251e69), V01 = SPH_C32(0x44b051e0), V02 = SPH_C32(0x4eaa6fb4), V03 = SPH_C32(0xdbf78465), V04 = SPH_C32(0x6e292011), V05 = SPH_C32(0x90152df4), V06 = SPH_C32(0xee058139), V07 = SPH_C32(0xdef610bb); + sph_u32 V10 = SPH_C32(0xc3b44b95), V11 = SPH_C32(0xd9d2f256), V12 = SPH_C32(0x70eee9a0), V13 = SPH_C32(0xde099fa3), V14 = SPH_C32(0x5d9b0557), V15 = SPH_C32(0x8fc944b3), V16 = SPH_C32(0xcf1ccf0e), V17 = SPH_C32(0x746cd581); + sph_u32 V20 = SPH_C32(0xf7efc89d), V21 = SPH_C32(0x5dba5781), V22 = SPH_C32(0x04016ce5), V23 = SPH_C32(0xad659c05), V24 = SPH_C32(0x0306194f), V25 = SPH_C32(0x666d1836), V26 = SPH_C32(0x24aa230a), V27 = SPH_C32(0x8b264ae7); + sph_u32 V30 = SPH_C32(0x858075d5), V31 = SPH_C32(0x36d79cce), V32 = SPH_C32(0xe571f7d7), V33 = SPH_C32(0x204b1f67), V34 = SPH_C32(0x35870c6a), V35 = SPH_C32(0x57e9e923), V36 = SPH_C32(0x14bcb808), V37 = SPH_C32(0x7cde72ce); + sph_u32 V40 = SPH_C32(0x6c68e9be), V41 = SPH_C32(0x5ec41e22), V42 = SPH_C32(0xc825b7c7), V43 = SPH_C32(0xaffb4363), V44 = SPH_C32(0xf5df3999), V45 = SPH_C32(0x0fc688f1), V46 = SPH_C32(0xb07224cc), V47 = SPH_C32(0x03e86cea); + + DECL_TMP8(M); + + M0 = hash.h4[1]; + M1 = hash.h4[0]; + M2 = hash.h4[3]; + M3 = hash.h4[2]; + M4 = hash.h4[5]; + M5 = hash.h4[4]; + M6 = hash.h4[7]; + M7 = hash.h4[6]; + + for (uint i = 0; i < 5; i++) + { + MI5; + LUFFA_P5; + + if (i == 0) { + M0 = hash.h4[9]; + M1 = hash.h4[8]; + M2 = hash.h4[11]; + M3 = hash.h4[10]; + M4 = hash.h4[13]; + M5 = hash.h4[12]; + M6 = hash.h4[15]; + M7 = hash.h4[14]; + } + else if (i == 1) { + M0 = 0x80000000; + M1 = M2 = M3 = M4 = M5 = M6 = M7 = 0; + } + else if (i == 2) { + M0 = M1 = M2 = M3 = M4 = M5 = M6 = M7 = 0; + } + else if (i == 3) { + hash.h4[1] = V00 ^ V10 ^ V20 ^ V30 ^ V40; + hash.h4[0] = V01 ^ V11 ^ V21 ^ V31 ^ V41; + hash.h4[3] = V02 ^ V12 ^ V22 ^ V32 ^ V42; + hash.h4[2] = V03 ^ V13 ^ V23 ^ V33 ^ V43; + hash.h4[5] = V04 ^ V14 ^ V24 ^ V34 ^ V44; + hash.h4[4] = V05 ^ V15 ^ V25 ^ V35 ^ V45; + hash.h4[7] = V06 ^ V16 ^ V26 ^ V36 ^ V46; + hash.h4[6] = V07 ^ V17 ^ V27 ^ V37 ^ V47; + } + } + hash.h4[9] = V00 ^ V10 ^ V20 ^ V30 ^ V40; + hash.h4[8] = V01 ^ V11 ^ V21 ^ V31 ^ V41; + hash.h4[11] = V02 ^ V12 ^ V22 ^ V32 ^ V42; + hash.h4[10] = V03 ^ V13 ^ V23 ^ V33 ^ V43; + hash.h4[13] = V04 ^ V14 ^ V24 ^ V34 ^ V44; + hash.h4[12] = V05 ^ V15 ^ V25 ^ V35 ^ V45; + hash.h4[15] = V06 ^ V16 ^ V26 ^ V36 ^ V46; + hash.h4[14] = V07 ^ V17 ^ V27 ^ V37 ^ V47; + + } + diff --git a/x11evo/x11evo_shavite.cl b/x11evo/x11evo_shavite.cl new file mode 100644 index 000000000..98b403982 --- /dev/null +++ b/x11evo/x11evo_shavite.cl @@ -0,0 +1,63 @@ + + +// shavite + { + // IV + sph_u32 h0 = SPH_C32(0x72FCCDD8), h1 = SPH_C32(0x79CA4727), h2 = SPH_C32(0x128A077B), h3 = SPH_C32(0x40D55AEC); + sph_u32 h4 = SPH_C32(0xD1901A06), h5 = SPH_C32(0x430AE307), h6 = SPH_C32(0xB29F5CD1), h7 = SPH_C32(0xDF07FBFC); + sph_u32 h8 = SPH_C32(0x8E45D73D), h9 = SPH_C32(0x681AB538), hA = SPH_C32(0xBDE86578), hB = SPH_C32(0xDD577E47); + sph_u32 hC = SPH_C32(0xE275EADE), hD = SPH_C32(0x502D9FCD), hE = SPH_C32(0xB9357178), hF = SPH_C32(0x022A4B9A); + + // state + sph_u32 rk00, rk01, rk02, rk03, rk04, rk05, rk06, rk07; + sph_u32 rk08, rk09, rk0A, rk0B, rk0C, rk0D, rk0E, rk0F; + sph_u32 rk10, rk11, rk12, rk13, rk14, rk15, rk16, rk17; + sph_u32 rk18, rk19, rk1A, rk1B, rk1C, rk1D, rk1E, rk1F; + + sph_u32 sc_count0 = (64 << 3), sc_count1 = 0, sc_count2 = 0, sc_count3 = 0; + + rk00 = hash.h4[0]; + rk01 = hash.h4[1]; + rk02 = hash.h4[2]; + rk03 = hash.h4[3]; + rk04 = hash.h4[4]; + rk05 = hash.h4[5]; + rk06 = hash.h4[6]; + rk07 = hash.h4[7]; + rk08 = hash.h4[8]; + rk09 = hash.h4[9]; + rk0A = hash.h4[10]; + rk0B = hash.h4[11]; + rk0C = hash.h4[12]; + rk0D = hash.h4[13]; + rk0E = hash.h4[14]; + rk0F = hash.h4[15]; + rk10 = 0x80; + rk11 = rk12 = rk13 = rk14 = rk15 = rk16 = rk17 = rk18 = rk19 = rk1A = 0; + rk1B = 0x2000000; + rk1C = rk1D = rk1E = 0; + rk1F = 0x2000000; + + c512(buf); + + hash.h4[0] = h0; + hash.h4[1] = h1; + hash.h4[2] = h2; + hash.h4[3] = h3; + hash.h4[4] = h4; + hash.h4[5] = h5; + hash.h4[6] = h6; + hash.h4[7] = h7; + hash.h4[8] = h8; + hash.h4[9] = h9; + hash.h4[10] = hA; + hash.h4[11] = hB; + hash.h4[12] = hC; + hash.h4[13] = hD; + hash.h4[14] = hE; + hash.h4[15] = hF; + + } + + + diff --git a/x11evo/x11evo_simd.cl b/x11evo/x11evo_simd.cl new file mode 100644 index 000000000..68f7770df --- /dev/null +++ b/x11evo/x11evo_simd.cl @@ -0,0 +1,119 @@ + +// simd + { + s32 q[256]; + unsigned char x[128]; + for (unsigned int i = 0; i < 64; i++) + x[i] = hash.h1[i]; + for (unsigned int i = 64; i < 128; i++) + x[i] = 0; + + u32 A0 = C32(0x0BA16B95), A1 = C32(0x72F999AD), A2 = C32(0x9FECC2AE), A3 = C32(0xBA3264FC), A4 = C32(0x5E894929), A5 = C32(0x8E9F30E5), A6 = C32(0x2F1DAA37), A7 = C32(0xF0F2C558); + u32 B0 = C32(0xAC506643), B1 = C32(0xA90635A5), B2 = C32(0xE25B878B), B3 = C32(0xAAB7878F), B4 = C32(0x88817F7A), B5 = C32(0x0A02892B), B6 = C32(0x559A7550), B7 = C32(0x598F657E); + u32 C0 = C32(0x7EEF60A1), C1 = C32(0x6B70E3E8), C2 = C32(0x9C1714D1), C3 = C32(0xB958E2A8), C4 = C32(0xAB02675E), C5 = C32(0xED1C014F), C6 = C32(0xCD8D65BB), C7 = C32(0xFDB7A257); + u32 D0 = C32(0x09254899), D1 = C32(0xD699C7BC), D2 = C32(0x9019B6DC), D3 = C32(0x2B9022E4), D4 = C32(0x8FA14956), D5 = C32(0x21BF9BD3), D6 = C32(0xB94D0943), D7 = C32(0x6FFDDC22); + + FFT256(0, 1, 0, ll1); + for (int i = 0; i < 256; i++) { + s32 tq; + + tq = q[i] + yoff_b_n[i]; + tq = REDS2(tq); + tq = REDS1(tq); + tq = REDS1(tq); + q[i] = (tq <= 128 ? tq : tq - 257); + } + + A0 ^= hash.h4[0]; + A1 ^= hash.h4[1]; + A2 ^= hash.h4[2]; + A3 ^= hash.h4[3]; + A4 ^= hash.h4[4]; + A5 ^= hash.h4[5]; + A6 ^= hash.h4[6]; + A7 ^= hash.h4[7]; + B0 ^= hash.h4[8]; + B1 ^= hash.h4[9]; + B2 ^= hash.h4[10]; + B3 ^= hash.h4[11]; + B4 ^= hash.h4[12]; + B5 ^= hash.h4[13]; + B6 ^= hash.h4[14]; + B7 ^= hash.h4[15]; + + ONE_ROUND_BIG(0_, 0, 3, 23, 17, 27); + ONE_ROUND_BIG(1_, 1, 28, 19, 22, 7); + ONE_ROUND_BIG(2_, 2, 29, 9, 15, 5); + ONE_ROUND_BIG(3_, 3, 4, 13, 10, 25); + + STEP_BIG( + C32(0x0BA16B95), C32(0x72F999AD), C32(0x9FECC2AE), C32(0xBA3264FC), + C32(0x5E894929), C32(0x8E9F30E5), C32(0x2F1DAA37), C32(0xF0F2C558), + IF, 4, 13, PP8_4_); + STEP_BIG( + C32(0xAC506643), C32(0xA90635A5), C32(0xE25B878B), C32(0xAAB7878F), + C32(0x88817F7A), C32(0x0A02892B), C32(0x559A7550), C32(0x598F657E), + IF, 13, 10, PP8_5_); + STEP_BIG( + C32(0x7EEF60A1), C32(0x6B70E3E8), C32(0x9C1714D1), C32(0xB958E2A8), + C32(0xAB02675E), C32(0xED1C014F), C32(0xCD8D65BB), C32(0xFDB7A257), + IF, 10, 25, PP8_6_); + STEP_BIG( + C32(0x09254899), C32(0xD699C7BC), C32(0x9019B6DC), C32(0x2B9022E4), + C32(0x8FA14956), C32(0x21BF9BD3), C32(0xB94D0943), C32(0x6FFDDC22), + IF, 25, 4, PP8_0_); + + u32 COPY_A0 = A0, COPY_A1 = A1, COPY_A2 = A2, COPY_A3 = A3, COPY_A4 = A4, COPY_A5 = A5, COPY_A6 = A6, COPY_A7 = A7; + u32 COPY_B0 = B0, COPY_B1 = B1, COPY_B2 = B2, COPY_B3 = B3, COPY_B4 = B4, COPY_B5 = B5, COPY_B6 = B6, COPY_B7 = B7; + u32 COPY_C0 = C0, COPY_C1 = C1, COPY_C2 = C2, COPY_C3 = C3, COPY_C4 = C4, COPY_C5 = C5, COPY_C6 = C6, COPY_C7 = C7; + u32 COPY_D0 = D0, COPY_D1 = D1, COPY_D2 = D2, COPY_D3 = D3, COPY_D4 = D4, COPY_D5 = D5, COPY_D6 = D6, COPY_D7 = D7; + +#define q SIMD_Q + + A0 ^= 0x200; + + ONE_ROUND_BIG(0_, 0, 3, 23, 17, 27); + ONE_ROUND_BIG(1_, 1, 28, 19, 22, 7); + ONE_ROUND_BIG(2_, 2, 29, 9, 15, 5); + ONE_ROUND_BIG(3_, 3, 4, 13, 10, 25); + STEP_BIG( + COPY_A0, COPY_A1, COPY_A2, COPY_A3, + COPY_A4, COPY_A5, COPY_A6, COPY_A7, + IF, 4, 13, PP8_4_); + STEP_BIG( + COPY_B0, COPY_B1, COPY_B2, COPY_B3, + COPY_B4, COPY_B5, COPY_B6, COPY_B7, + IF, 13, 10, PP8_5_); + STEP_BIG( + COPY_C0, COPY_C1, COPY_C2, COPY_C3, + COPY_C4, COPY_C5, COPY_C6, COPY_C7, + IF, 10, 25, PP8_6_); + STEP_BIG( + COPY_D0, COPY_D1, COPY_D2, COPY_D3, + COPY_D4, COPY_D5, COPY_D6, COPY_D7, + IF, 25, 4, PP8_0_); +#undef q + + hash.h4[0] = A0; + hash.h4[1] = A1; + hash.h4[2] = A2; + hash.h4[3] = A3; + hash.h4[4] = A4; + hash.h4[5] = A5; + hash.h4[6] = A6; + hash.h4[7] = A7; + hash.h4[8] = B0; + hash.h4[9] = B1; + hash.h4[10] = B2; + hash.h4[11] = B3; + hash.h4[12] = B4; + hash.h4[13] = B5; + hash.h4[14] = B6; + hash.h4[15] = B7; + + } + + + + + diff --git a/x11evo/x11evo_skein.cl b/x11evo/x11evo_skein.cl new file mode 100644 index 000000000..4744394f5 --- /dev/null +++ b/x11evo/x11evo_skein.cl @@ -0,0 +1,32 @@ + +// skein + { + sph_u64 h0 = SPH_C64(0x4903ADFF749C51CE), h1 = SPH_C64(0x0D95DE399746DF03), h2 = SPH_C64(0x8FD1934127C79BCE), h3 = SPH_C64(0x9A255629FF352CB1), h4 = SPH_C64(0x5DB62599DF6CA7B0), h5 = SPH_C64(0xEABE394CA9D5C3F4), h6 = SPH_C64(0x991112C71A75B523), h7 = SPH_C64(0xAE18A40B660FCC33); + sph_u64 m0, m1, m2, m3, m4, m5, m6, m7; + sph_u64 bcount = 0; + + m0 = SWAP8(hash.h8[0]); + m1 = SWAP8(hash.h8[1]); + m2 = SWAP8(hash.h8[2]); + m3 = SWAP8(hash.h8[3]); + m4 = SWAP8(hash.h8[4]); + m5 = SWAP8(hash.h8[5]); + m6 = SWAP8(hash.h8[6]); + m7 = SWAP8(hash.h8[7]); + UBI_BIG(480, 64); + bcount = 0; + m0 = m1 = m2 = m3 = m4 = m5 = m6 = m7 = 0; + UBI_BIG(510, 8); + + + hash.h8[0] = SWAP8(h0); + hash.h8[1] = SWAP8(h1); + hash.h8[2] = SWAP8(h2); + hash.h8[3] = SWAP8(h3); + hash.h8[4] = SWAP8(h4); + hash.h8[5] = SWAP8(h5); + hash.h8[6] = SWAP8(h6); + hash.h8[7] = SWAP8(h7); + + } +