From 45652a21a8e3504e9d589b35d6ecf7820f7cae90 Mon Sep 17 00:00:00 2001 From: Jack Grigg Date: Thu, 4 Jan 2024 00:33:19 +0000 Subject: [PATCH 01/17] equihash: Import Tromp solver Source: zcash/zcash@01d5576a979816c928d524967e36c859adec49b6 License: MIT --- components/equihash/tromp/equi.h | 105 ++++ components/equihash/tromp/equi_miner.h | 651 ++++++++++++++++++++++++ components/equihash/tromp/osx_barrier.h | 75 +++ 3 files changed, 831 insertions(+) create mode 100644 components/equihash/tromp/equi.h create mode 100644 components/equihash/tromp/equi_miner.h create mode 100644 components/equihash/tromp/osx_barrier.h diff --git a/components/equihash/tromp/equi.h b/components/equihash/tromp/equi.h new file mode 100644 index 0000000000..90beb785be --- /dev/null +++ b/components/equihash/tromp/equi.h @@ -0,0 +1,105 @@ +// Equihash solver +// Copyright (c) 2016-2016 John Tromp, The Zcash developers + +#ifndef ZCASH_POW_TROMP_EQUI_H +#define ZCASH_POW_TROMP_EQUI_H + +#ifdef __APPLE__ +#include "pow/tromp/osx_barrier.h" +#endif +#include "compat/endian.h" + +#include // for types uint32_t,uint64_t +#include // for functions memset +#include // for function qsort + +#include + +typedef uint32_t u32; +typedef unsigned char uchar; + +// algorithm parameters, prefixed with W to reduce include file conflicts + +#ifndef WN +#define WN 200 +#endif + +#ifndef WK +#define WK 9 +#endif + +#define NDIGITS (WK+1) +#define DIGITBITS (WN/(NDIGITS)) + +static const u32 PROOFSIZE = 1<= *indices1) + return POW_OUT_OF_ORDER; + uchar hash0[WN/8], hash1[WN/8]; + int vrf0 = verifyrec(ctx, indices, hash0, r-1); + if (vrf0 != POW_OK) + return vrf0; + int vrf1 = verifyrec(ctx, indices1, hash1, r-1); + if (vrf1 != POW_OK) + return vrf1; + for (int i=0; i < WN/8; i++) + hash[i] = hash0[i] ^ hash1[i]; + int i, b = r * DIGITBITS; + for (i = 0; i < b/8; i++) + if (hash[i]) + return POW_NONZERO_XOR; + if ((b%8) && hash[i] >> (8-(b%8))) + return POW_NONZERO_XOR; + return POW_OK; +} + +int compu32(const void *pa, const void *pb) { + u32 a = *(u32 *)pa, b = *(u32 *)pb; + return a0 the leftmost leaf of its left subtree +// is less than the leftmost leaf of its right subtree + +// The algorithm below solves this by maintaining the trees +// in a graph of K layers, each split into buckets +// with buckets indexed by the first n-RESTBITS bits following +// the i*n 0s, each bucket having 4 * 2^RESTBITS slots, +// twice the number of subtrees expected to land there. + +#ifndef ZCASH_POW_TROMP_EQUI_MINER_H +#define ZCASH_POW_TROMP_EQUI_MINER_H + +#include "pow/tromp/equi.h" +#include +#include +#include +#include + +typedef uint16_t u16; +typedef uint64_t u64; + +#ifdef EQUIHASH_TROMP_ATOMIC +#include +typedef std::atomic au32; +#else +typedef u32 au32; +#endif + +#ifndef RESTBITS +#define RESTBITS 8 +#endif + +// 2_log of number of buckets +#define BUCKBITS (DIGITBITS-RESTBITS) + +#ifndef SAVEMEM +#if RESTBITS == 4 +// can't save memory in such small buckets +#define SAVEMEM 1 +#elif RESTBITS >= 8 +// take advantage of law of large numbers (sum of 2^8 random numbers) +// this reduces (200,9) memory to under 144MB, with negligible discarding +#define SAVEMEM 9/14 +#endif +#endif + +// number of buckets +static const u32 NBUCKETS = 1<> (2 * SLOTBITS - 1); +#else + return bid_s0_s1 >> (2 * SLOTBITS); +#endif + } + u32 slotid0() const { +#ifdef SLOTDIFF + return (bid_s0_s1 >> (SLOTBITS-1)) & SLOTMASK; +#else + return (bid_s0_s1 >> SLOTBITS) & SLOTMASK; +#endif + } + u32 slotid1() const { +#ifdef SLOTDIFF + return (slotid0() + 1 + (bid_s0_s1 & (SLOTMASK>>1))) & SLOTMASK; +#else + return bid_s0_s1 & SLOTMASK; +#endif + } +}; + +union hashunit { + u32 word; + uchar bytes[sizeof(u32)]; +}; + +#define WORDS(bits) ((bits + 31) / 32) +#define HASHWORDS0 WORDS(WN - DIGITBITS + RESTBITS) +#define HASHWORDS1 WORDS(WN - 2*DIGITBITS + RESTBITS) + +struct slot0 { + tree attr; + hashunit hash[HASHWORDS0]; +}; + +struct slot1 { + tree attr; + hashunit hash[HASHWORDS1]; +}; + +// a bucket is NSLOTS treenodes +typedef slot0 bucket0[NSLOTS]; +typedef slot1 bucket1[NSLOTS]; +// the N-bit hash consists of K+1 n-bit "digits" +// each of which corresponds to a layer of NBUCKETS buckets +typedef bucket0 digit0[NBUCKETS]; +typedef bucket1 digit1[NBUCKETS]; + +// size (in bytes) of hash in round 0 <= r < WK +u32 hashsize(const u32 r) { + const u32 hashbits = WN - (r+1) * DIGITBITS + RESTBITS; + return (hashbits + 7) / 8; +} + +u32 hashwords(u32 bytes) { + return (bytes + 3) / 4; +} + +// manages hash and tree data +struct htalloc { + u32 *heap0; + u32 *heap1; + bucket0 *trees0[(WK+1)/2]; + bucket1 *trees1[WK/2]; + u32 alloced; + htalloc() { + alloced = 0; + } + void alloctrees() { +// optimize xenoncat's fixed memory layout, avoiding any waste +// digit trees hashes trees hashes +// 0 0 A A A A A A . . . . . . +// 1 0 A A A A A A 1 B B B B B +// 2 0 2 C C C C C 1 B B B B B +// 3 0 2 C C C C C 1 3 D D D D +// 4 0 2 4 E E E E 1 3 D D D D +// 5 0 2 4 E E E E 1 3 5 F F F +// 6 0 2 4 6 . G G 1 3 5 F F F +// 7 0 2 4 6 . G G 1 3 5 7 H H +// 8 0 2 4 6 8 . I 1 3 5 7 H H + assert(DIGITBITS >= 16); // ensures hashes shorten by 1 unit every 2 digits + heap0 = (u32 *)alloc(1, sizeof(digit0)); + heap1 = (u32 *)alloc(1, sizeof(digit1)); + for (int r=0; r indices[size]) { + for (u32 i=0; i < size; i++) { + const u32 tmp = indices[i]; + indices[i] = indices[size+i]; + indices[size+i] = tmp; + } + } + } + void listindices0(u32 r, const tree t, u32 *indices) { + if (r == 0) { + *indices = t.getindex(); + return; + } + const bucket1 &buck = hta.trees1[--r/2][t.bucketid()]; + const u32 size = 1 << r; + u32 *indices1 = indices + size; + listindices1(r, buck[t.slotid0()].attr, indices); + listindices1(r, buck[t.slotid1()].attr, indices1); + orderindices(indices, size); + } + void listindices1(u32 r, const tree t, u32 *indices) { + const bucket0 &buck = hta.trees0[--r/2][t.bucketid()]; + const u32 size = 1 << r; + u32 *indices1 = indices + size; + listindices0(r, buck[t.slotid0()].attr, indices); + listindices0(r, buck[t.slotid1()].attr, indices1); + orderindices(indices, size); + } + void candidate(const tree t) { + proof prf; + listindices1(WK, t, prf); // assume WK odd + qsort(prf, PROOFSIZE, sizeof(u32), &compu32); + for (u32 i=1; i> (SLOTBITS-6); + binsizes[bsize]++; + } + for (u32 i=0; i < 65; i++) { +#ifdef HIST +// printf(" %d:%d", i, binsizes[i]); +#else +#ifdef SPARK + u32 sparks = binsizes[i] / SPARKSCALE; +#else + u32 sparks = 0; + for (u32 bs = binsizes[i]; bs; bs >>= 1) sparks++; + sparks = sparks * 7 / SPARKSCALE; +#endif +// printf("\342\226%c", '\201' + sparks); +#endif + } +// printf("\n"); +#endif + } + + struct htlayout { + htalloc hta; + u32 prevhashunits; + u32 nexthashunits; + u32 dunits; + u32 prevbo; + u32 nextbo; + + htlayout(equi *eq, u32 r): hta(eq->hta), prevhashunits(0), dunits(0) { + u32 nexthashbytes = hashsize(r); + nexthashunits = hashwords(nexthashbytes); + prevbo = 0; + nextbo = nexthashunits * sizeof(hashunit) - nexthashbytes; // 0-3 + if (r) { + u32 prevhashbytes = hashsize(r-1); + prevhashunits = hashwords(prevhashbytes); + prevbo = prevhashunits * sizeof(hashunit) - prevhashbytes; // 0-3 + dunits = prevhashunits - nexthashunits; + } + } + u32 getxhash0(const slot0* pslot) const { +#if WN == 200 && RESTBITS == 4 + return pslot->hash->bytes[prevbo] >> 4; +#elif WN == 200 && RESTBITS == 8 + return (pslot->hash->bytes[prevbo] & 0xf) << 4 | pslot->hash->bytes[prevbo+1] >> 4; +#elif WN == 200 && RESTBITS == 9 + return (pslot->hash->bytes[prevbo] & 0x1f) << 4 | pslot->hash->bytes[prevbo+1] >> 4; +#elif WN == 144 && RESTBITS == 4 + return pslot->hash->bytes[prevbo] & 0xf; +#else +#error non implemented +#endif + } + u32 getxhash1(const slot1* pslot) const { +#if WN == 200 && RESTBITS == 4 + return pslot->hash->bytes[prevbo] & 0xf; +#elif WN == 200 && RESTBITS == 8 + return pslot->hash->bytes[prevbo]; +#elif WN == 200 && RESTBITS == 9 + return (pslot->hash->bytes[prevbo]&1) << 8 | pslot->hash->bytes[prevbo+1]; +#elif WN == 144 && RESTBITS == 4 + return pslot->hash->bytes[prevbo] & 0xf; +#else +#error non implemented +#endif + } + bool equal(const hashunit *hash0, const hashunit *hash1) const { + return hash0[prevhashunits-1].word == hash1[prevhashunits-1].word; + } + }; + + struct collisiondata { +#ifdef XBITMAP +#if NSLOTS > 64 +#error can't use XBITMAP with more than 64 slots +#endif + u64 xhashmap[NRESTS]; + u64 xmap; +#else +#if RESTBITS <= 6 + typedef uchar xslot; +#else + typedef u16 xslot; +#endif + xslot nxhashslots[NRESTS]; + xslot xhashslots[NRESTS][XFULL]; + xslot *xx; + u32 n0; + u32 n1; +#endif + u32 s0; + + void clear() { +#ifdef XBITMAP + memset(xhashmap, 0, NRESTS * sizeof(u64)); +#else + memset(nxhashslots, 0, NRESTS * sizeof(xslot)); +#endif + } + bool addslot(u32 s1, u32 xh) { +#ifdef XBITMAP + xmap = xhashmap[xh]; + xhashmap[xh] |= (u64)1 << s1; + s0 = -1; + return true; +#else + n1 = (u32)nxhashslots[xh]++; + if (n1 >= XFULL) + return false; + xx = xhashslots[xh]; + xx[n1] = s1; + n0 = 0; + return true; +#endif + } + bool nextcollision() const { +#ifdef XBITMAP + return xmap != 0; +#else + return n0 < n1; +#endif + } + u32 slot() { +#ifdef XBITMAP + const u32 ffs = __builtin_ffsll(xmap); + s0 += ffs; xmap >>= ffs; + return s0; +#else + return (u32)xx[n0++]; +#endif + } + }; + + void digit0(const u32 id) { + uchar hash[HASHOUT]; + BLAKE2bState* state; + htlayout htl(this, 0); + const u32 hashbytes = hashsize(0); + for (u32 block = id; block < NBLOCKS; block += nthreads) { + state = blake2b_clone(blake_ctx); + u32 leb = htole32(block); + blake2b_update(state, (uchar *)&leb, sizeof(u32)); + blake2b_finalize(state, hash, HASHOUT); + blake2b_free(state); + for (u32 i = 0; i> 4; +#elif BUCKBITS == 11 && RESTBITS == 9 + const u32 bucketid = ((u32)ph[0] << 3) | ph[1] >> 5; +#elif BUCKBITS == 20 && RESTBITS == 4 + const u32 bucketid = ((((u32)ph[0] << 8) | ph[1]) << 4) | ph[2] >> 4; +#elif BUCKBITS == 12 && RESTBITS == 4 + const u32 bucketid = ((u32)ph[0] << 4) | ph[1] >> 4; + const u32 xhash = ph[1] & 0xf; +#else +#error not implemented +#endif + const u32 slot = getslot(0, bucketid); + if (slot >= NSLOTS) { + bfull++; + continue; + } + slot0 &s = hta.trees0[0][bucketid][slot]; + s.attr = tree(block * HASHESPERBLAKE + i); + memcpy(s.hash->bytes+htl.nextbo, ph+WN/8-hashbytes, hashbytes); + } + } + } + + void digitodd(const u32 r, const u32 id) { + htlayout htl(this, r); + collisiondata cd; + for (u32 bucketid=id; bucketid < NBUCKETS; bucketid += nthreads) { + cd.clear(); + slot0 *buck = htl.hta.trees0[(r-1)/2][bucketid]; // optimize by updating previous buck?! + u32 bsize = getnslots(r-1, bucketid); // optimize by putting bucketsize with block?! + for (u32 s1 = 0; s1 < bsize; s1++) { + const slot0 *pslot1 = buck + s1; // optimize by updating previous pslot1?! + if (!cd.addslot(s1, htl.getxhash0(pslot1))) { + xfull++; + continue; + } + for (; cd.nextcollision(); ) { + const u32 s0 = cd.slot(); + const slot0 *pslot0 = buck + s0; + if (htl.equal(pslot0->hash, pslot1->hash)) { + hfull++; + continue; + } + u32 xorbucketid; + const uchar *bytes0 = pslot0->hash->bytes, *bytes1 = pslot1->hash->bytes; +#if WN == 200 && BUCKBITS == 12 && RESTBITS == 8 + xorbucketid = (((u32)(bytes0[htl.prevbo+1] ^ bytes1[htl.prevbo+1]) & 0xf) << 8) + | (bytes0[htl.prevbo+2] ^ bytes1[htl.prevbo+2]); +#elif WN == 200 && BUCKBITS == 11 && RESTBITS == 9 + xorbucketid = (((u32)(bytes0[htl.prevbo+1] ^ bytes1[htl.prevbo+1]) & 0xf) << 7) + | (bytes0[htl.prevbo+2] ^ bytes1[htl.prevbo+2]) >> 1; +#elif WN == 144 && BUCKBITS == 20 && RESTBITS == 4 + xorbucketid = ((((u32)(bytes0[htl.prevbo+1] ^ bytes1[htl.prevbo+1]) << 8) + | (bytes0[htl.prevbo+2] ^ bytes1[htl.prevbo+2])) << 4) + | (bytes0[htl.prevbo+3] ^ bytes1[htl.prevbo+3]) >> 4; +#elif WN == 96 && BUCKBITS == 12 && RESTBITS == 4 + xorbucketid = ((u32)(bytes0[htl.prevbo+1] ^ bytes1[htl.prevbo+1]) << 4) + | (bytes0[htl.prevbo+2] ^ bytes1[htl.prevbo+2]) >> 4; +#else +#error not implemented +#endif + const u32 xorslot = getslot(r, xorbucketid); + if (xorslot >= NSLOTS) { + bfull++; + continue; + } + slot1 &xs = htl.hta.trees1[r/2][xorbucketid][xorslot]; + xs.attr = tree(bucketid, s0, s1); + for (u32 i=htl.dunits; i < htl.prevhashunits; i++) + xs.hash[i-htl.dunits].word = pslot0->hash[i].word ^ pslot1->hash[i].word; + } + } + } + } + + void digiteven(const u32 r, const u32 id) { + htlayout htl(this, r); + collisiondata cd; + for (u32 bucketid=id; bucketid < NBUCKETS; bucketid += nthreads) { + cd.clear(); + slot1 *buck = htl.hta.trees1[(r-1)/2][bucketid]; // OPTIMIZE BY UPDATING PREVIOUS + u32 bsize = getnslots(r-1, bucketid); + for (u32 s1 = 0; s1 < bsize; s1++) { + const slot1 *pslot1 = buck + s1; // OPTIMIZE BY UPDATING PREVIOUS + if (!cd.addslot(s1, htl.getxhash1(pslot1))) { + xfull++; + continue; + } + for (; cd.nextcollision(); ) { + const u32 s0 = cd.slot(); + const slot1 *pslot0 = buck + s0; + if (htl.equal(pslot0->hash, pslot1->hash)) { + hfull++; + continue; + } + u32 xorbucketid; + const uchar *bytes0 = pslot0->hash->bytes, *bytes1 = pslot1->hash->bytes; +#if WN == 200 && BUCKBITS == 12 && RESTBITS == 8 + xorbucketid = ((u32)(bytes0[htl.prevbo+1] ^ bytes1[htl.prevbo+1]) << 4) + | (bytes0[htl.prevbo+2] ^ bytes1[htl.prevbo+2]) >> 4; +#elif WN == 200 && BUCKBITS == 11 && RESTBITS == 9 + xorbucketid = ((u32)(bytes0[htl.prevbo+2] ^ bytes1[htl.prevbo+2]) << 3) + | (bytes0[htl.prevbo+3] ^ bytes1[htl.prevbo+3]) >> 5; +#elif WN == 144 && BUCKBITS == 20 && RESTBITS == 4 + xorbucketid = ((((u32)(bytes0[htl.prevbo+1] ^ bytes1[htl.prevbo+1]) << 8) + | (bytes0[htl.prevbo+2] ^ bytes1[htl.prevbo+2])) << 4) + | (bytes0[htl.prevbo+3] ^ bytes1[htl.prevbo+3]) >> 4; +#elif WN == 96 && BUCKBITS == 12 && RESTBITS == 4 + xorbucketid = ((u32)(bytes0[htl.prevbo+1] ^ bytes1[htl.prevbo+1]) << 4) + | (bytes0[htl.prevbo+2] ^ bytes1[htl.prevbo+2]) >> 4; +#else +#error not implemented +#endif + const u32 xorslot = getslot(r, xorbucketid); + if (xorslot >= NSLOTS) { + bfull++; + continue; + } + slot0 &xs = htl.hta.trees0[r/2][xorbucketid][xorslot]; + xs.attr = tree(bucketid, s0, s1); + for (u32 i=htl.dunits; i < htl.prevhashunits; i++) + xs.hash[i-htl.dunits].word = pslot0->hash[i].word ^ pslot1->hash[i].word; + } + } + } + } + + void digitK(const u32 id) { + collisiondata cd; + htlayout htl(this, WK); +u32 nc = 0; + for (u32 bucketid = id; bucketid < NBUCKETS; bucketid += nthreads) { + cd.clear(); + slot0 *buck = htl.hta.trees0[(WK-1)/2][bucketid]; + u32 bsize = getnslots(WK-1, bucketid); + for (u32 s1 = 0; s1 < bsize; s1++) { + const slot0 *pslot1 = buck + s1; + if (!cd.addslot(s1, htl.getxhash0(pslot1))) // assume WK odd + continue; + for (; cd.nextcollision(); ) { + const u32 s0 = cd.slot(); + if (htl.equal(buck[s0].hash, pslot1->hash)) +nc++, candidate(tree(bucketid, s0, s1)); + } + } + } +//printf(" %d candidates ", nc); + } +}; + +typedef struct { + u32 id; + pthread_t thread; + equi *eq; +} thread_ctx; + +void barrier(pthread_barrier_t *barry) { + const int rc = pthread_barrier_wait(barry); + if (rc != 0 && rc != PTHREAD_BARRIER_SERIAL_THREAD) { +// printf("Could not wait on barrier\n"); + pthread_exit(NULL); + } +} + +void *worker(void *vp) { + thread_ctx *tp = (thread_ctx *)vp; + equi *eq = tp->eq; + + if (tp->id == 0) +// printf("Digit 0\n"); + barrier(&eq->barry); + eq->digit0(tp->id); + barrier(&eq->barry); + if (tp->id == 0) { + eq->xfull = eq->bfull = eq->hfull = 0; + eq->showbsizes(0); + } + barrier(&eq->barry); + for (u32 r = 1; r < WK; r++) { + if (tp->id == 0) +// printf("Digit %d", r); + barrier(&eq->barry); + r&1 ? eq->digitodd(r, tp->id) : eq->digiteven(r, tp->id); + barrier(&eq->barry); + if (tp->id == 0) { +// printf(" x%d b%d h%d\n", eq->xfull, eq->bfull, eq->hfull); + eq->xfull = eq->bfull = eq->hfull = 0; + eq->showbsizes(r); + } + barrier(&eq->barry); + } + if (tp->id == 0) +// printf("Digit %d\n", WK); + eq->digitK(tp->id); + barrier(&eq->barry); + pthread_exit(NULL); + return 0; +} + +#endif // ZCASH_POW_TROMP_EQUI_MINER_H diff --git a/components/equihash/tromp/osx_barrier.h b/components/equihash/tromp/osx_barrier.h new file mode 100644 index 0000000000..659c40bf59 --- /dev/null +++ b/components/equihash/tromp/osx_barrier.h @@ -0,0 +1,75 @@ +#ifndef ZCASH_POW_TROMP_OSX_BARRIER_H +#define ZCASH_POW_TROMP_OSX_BARRIER_H + +#ifdef __APPLE__ + +#ifndef PTHREAD_BARRIER_H_ +#define PTHREAD_BARRIER_H_ + +#include +#include + +typedef int pthread_barrierattr_t; +#define PTHREAD_BARRIER_SERIAL_THREAD 1 + +typedef struct +{ + pthread_mutex_t mutex; + pthread_cond_t cond; + int count; + int tripCount; +} pthread_barrier_t; + + +int pthread_barrier_init(pthread_barrier_t *barrier, const pthread_barrierattr_t *attr, unsigned int count) +{ + if(count == 0) + { + errno = EINVAL; + return -1; + } + if(pthread_mutex_init(&barrier->mutex, 0) < 0) + { + return -1; + } + if(pthread_cond_init(&barrier->cond, 0) < 0) + { + pthread_mutex_destroy(&barrier->mutex); + return -1; + } + barrier->tripCount = count; + barrier->count = 0; + + return 0; +} + +int pthread_barrier_destroy(pthread_barrier_t *barrier) +{ + pthread_cond_destroy(&barrier->cond); + pthread_mutex_destroy(&barrier->mutex); + return 0; +} + +int pthread_barrier_wait(pthread_barrier_t *barrier) +{ + pthread_mutex_lock(&barrier->mutex); + ++(barrier->count); + if(barrier->count >= barrier->tripCount) + { + barrier->count = 0; + pthread_cond_broadcast(&barrier->cond); + pthread_mutex_unlock(&barrier->mutex); + return PTHREAD_BARRIER_SERIAL_THREAD; + } + else + { + pthread_cond_wait(&barrier->cond, &(barrier->mutex)); + pthread_mutex_unlock(&barrier->mutex); + return 0; + } +} + +#endif // PTHREAD_BARRIER_H_ +#endif // __APPLE__ + +#endif // ZCASH_POW_TROMP_OSX_BARRIER_H From 7ab6c47d5bb2378900a4a07efe8baab7f8da44bc Mon Sep 17 00:00:00 2001 From: Jack Grigg Date: Thu, 4 Jan 2024 00:43:19 +0000 Subject: [PATCH 02/17] equihash: Import `blake2b_simd` C bindings from `zcashd` Source: zcash/zcash@01d5576a979816c928d524967e36c859adec49b6 License: MIT --- components/equihash/src/blake2b.rs | 56 +++++++++++++++++++++++++++ components/equihash/tromp/blake2b.h | 59 +++++++++++++++++++++++++++++ 2 files changed, 115 insertions(+) create mode 100644 components/equihash/src/blake2b.rs create mode 100644 components/equihash/tromp/blake2b.h diff --git a/components/equihash/src/blake2b.rs b/components/equihash/src/blake2b.rs new file mode 100644 index 0000000000..432c4cb79b --- /dev/null +++ b/components/equihash/src/blake2b.rs @@ -0,0 +1,56 @@ +// Copyright (c) 2020-2022 The Zcash developers +// Distributed under the MIT software license, see the accompanying +// file COPYING or https://www.opensource.org/licenses/mit-license.php . + +use blake2b_simd::{State, PERSONALBYTES}; +use libc::{c_uchar, size_t}; +use std::ptr; +use std::slice; + +#[no_mangle] +pub extern "C" fn blake2b_init( + output_len: size_t, + personalization: *const [c_uchar; PERSONALBYTES], +) -> *mut State { + let personalization = unsafe { personalization.as_ref().unwrap() }; + + Box::into_raw(Box::new( + blake2b_simd::Params::new() + .hash_length(output_len) + .personal(personalization) + .to_state(), + )) +} + +#[no_mangle] +pub extern "C" fn blake2b_clone(state: *const State) -> *mut State { + unsafe { state.as_ref() } + .map(|state| Box::into_raw(Box::new(state.clone()))) + .unwrap_or(ptr::null_mut()) +} + +#[no_mangle] +pub extern "C" fn blake2b_free(state: *mut State) { + if !state.is_null() { + drop(unsafe { Box::from_raw(state) }); + } +} + +#[no_mangle] +pub extern "C" fn blake2b_update(state: *mut State, input: *const c_uchar, input_len: size_t) { + let state = unsafe { state.as_mut().unwrap() }; + let input = unsafe { slice::from_raw_parts(input, input_len) }; + + state.update(input); +} + +#[no_mangle] +pub extern "C" fn blake2b_finalize(state: *mut State, output: *mut c_uchar, output_len: size_t) { + let state = unsafe { state.as_mut().unwrap() }; + let output = unsafe { slice::from_raw_parts_mut(output, output_len) }; + + // Allow consuming only part of the output. + let hash = state.finalize(); + assert!(output_len <= hash.as_bytes().len()); + output.copy_from_slice(&hash.as_bytes()[..output_len]); +} diff --git a/components/equihash/tromp/blake2b.h b/components/equihash/tromp/blake2b.h new file mode 100644 index 0000000000..39f377ff75 --- /dev/null +++ b/components/equihash/tromp/blake2b.h @@ -0,0 +1,59 @@ +// Copyright (c) 2020-2022 The Zcash developers +// Distributed under the MIT software license, see the accompanying +// file COPYING or https://www.opensource.org/licenses/mit-license.php . + +#ifndef ZCASH_RUST_INCLUDE_RUST_BLAKE2B_H +#define ZCASH_RUST_INCLUDE_RUST_BLAKE2B_H + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +struct BLAKE2bState; +typedef struct BLAKE2bState BLAKE2bState; +#define BLAKE2bPersonalBytes 16U + +/// Initializes a BLAKE2b state with no key and no salt. +/// +/// `personalization` MUST be a pointer to a 16-byte array. +/// +/// Please free this with `blake2b_free` when you are done. +BLAKE2bState* blake2b_init( + size_t output_len, + const unsigned char* personalization); + +/// Clones the given BLAKE2b state. +/// +/// Both states need to be separately freed with `blake2b_free` when you are +/// done. +BLAKE2bState* blake2b_clone(const BLAKE2bState* state); + +/// Frees a BLAKE2b state returned by `blake2b_init`. +void blake2b_free(BLAKE2bState* state); + +/// Adds input to the hash. You can call this any number of times. +void blake2b_update( + BLAKE2bState* state, + const unsigned char* input, + size_t input_len); + +/// Finalizes the `state` and stores the result in `output`. +/// +/// `output_len` MUST be less than or equal to the value that was passed as the +/// first parameter to `blake2b_init`. +/// +/// This method is idempotent, and calling it multiple times will give the same +/// result. It's also possible to call `blake2b_update` with more input in +/// between. +void blake2b_finalize( + BLAKE2bState* state, + unsigned char* output, + size_t output_len); + +#ifdef __cplusplus +} +#endif + +#endif // ZCASH_RUST_INCLUDE_RUST_BLAKE2B_H From 3aaeb8b7196b21a2cb81b5f4f5955450627e0f18 Mon Sep 17 00:00:00 2001 From: Jack Grigg Date: Thu, 4 Jan 2024 02:51:24 +0000 Subject: [PATCH 03/17] equihash: Modify Tromp solver to compile as C Co-authored-by: teor --- Cargo.lock | 1 + components/equihash/Cargo.toml | 3 + components/equihash/build.rs | 11 + components/equihash/tromp/blake2b.h | 8 - components/equihash/tromp/equi.h | 18 +- .../tromp/{equi_miner.h => equi_miner.c} | 411 ++++++++++-------- 6 files changed, 252 insertions(+), 200 deletions(-) create mode 100644 components/equihash/build.rs rename components/equihash/tromp/{equi_miner.h => equi_miner.c} (57%) diff --git a/Cargo.lock b/Cargo.lock index 390f2615ad..0a62dc38a3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -659,6 +659,7 @@ version = "0.2.0" dependencies = [ "blake2b_simd", "byteorder", + "cc", ] [[package]] diff --git a/components/equihash/Cargo.toml b/components/equihash/Cargo.toml index 2eb7c023d1..5998e54cd0 100644 --- a/components/equihash/Cargo.toml +++ b/components/equihash/Cargo.toml @@ -13,5 +13,8 @@ rust-version = "1.56.1" blake2b_simd = "1" byteorder = "1" +[build-dependencies] +cc = "1" + [lib] bench = false diff --git a/components/equihash/build.rs b/components/equihash/build.rs new file mode 100644 index 0000000000..86c77774c1 --- /dev/null +++ b/components/equihash/build.rs @@ -0,0 +1,11 @@ +//! Build script for the equihash tromp solver in C. + +fn main() { + cc::Build::new() + .include("tromp/") + .file("tromp/equi_miner.c") + .compile("equitromp"); + + // Tell Cargo to only rerun this build script if the tromp C files or headers change. + println!("cargo:rerun-if-changed=tromp"); +} diff --git a/components/equihash/tromp/blake2b.h b/components/equihash/tromp/blake2b.h index 39f377ff75..6a0927182a 100644 --- a/components/equihash/tromp/blake2b.h +++ b/components/equihash/tromp/blake2b.h @@ -7,10 +7,6 @@ #include -#ifdef __cplusplus -extern "C" { -#endif - struct BLAKE2bState; typedef struct BLAKE2bState BLAKE2bState; #define BLAKE2bPersonalBytes 16U @@ -52,8 +48,4 @@ void blake2b_finalize( unsigned char* output, size_t output_len); -#ifdef __cplusplus -} -#endif - #endif // ZCASH_RUST_INCLUDE_RUST_BLAKE2B_H diff --git a/components/equihash/tromp/equi.h b/components/equihash/tromp/equi.h index 90beb785be..165825ba67 100644 --- a/components/equihash/tromp/equi.h +++ b/components/equihash/tromp/equi.h @@ -5,15 +5,15 @@ #define ZCASH_POW_TROMP_EQUI_H #ifdef __APPLE__ -#include "pow/tromp/osx_barrier.h" +#include "osx_barrier.h" #endif -#include "compat/endian.h" +#include // for type bool #include // for types uint32_t,uint64_t #include // for functions memset #include // for function qsort -#include +#include "blake2b.h" typedef uint32_t u32; typedef unsigned char uchar; @@ -31,11 +31,11 @@ typedef unsigned char uchar; #define NDIGITS (WK+1) #define DIGITBITS (WN/(NDIGITS)) -static const u32 PROOFSIZE = 1< #include #include @@ -31,8 +31,8 @@ typedef uint16_t u16; typedef uint64_t u64; #ifdef EQUIHASH_TROMP_ATOMIC -#include -typedef std::atomic au32; +#include +typedef atomic_uint au32; #else typedef u32 au32; #endif @@ -56,21 +56,23 @@ typedef u32 au32; #endif // number of buckets -static const u32 NBUCKETS = 1<bid_s0_s1; } - u32 bucketid() const { + u32 bucketid(const tree *t) { #ifdef SLOTDIFF - return bid_s0_s1 >> (2 * SLOTBITS - 1); + return t->bid_s0_s1 >> (2 * SLOTBITS - 1); #else - return bid_s0_s1 >> (2 * SLOTBITS); + return t->bid_s0_s1 >> (2 * SLOTBITS); #endif } - u32 slotid0() const { + u32 slotid0(const tree *t) { #ifdef SLOTDIFF - return (bid_s0_s1 >> (SLOTBITS-1)) & SLOTMASK; + return (t->bid_s0_s1 >> (SLOTBITS-1)) & SLOTMASK; #else - return (bid_s0_s1 >> SLOTBITS) & SLOTMASK; + return (t->bid_s0_s1 >> SLOTBITS) & SLOTMASK; #endif } - u32 slotid1() const { + u32 slotid1(const tree *t) { #ifdef SLOTDIFF - return (slotid0() + 1 + (bid_s0_s1 & (SLOTMASK>>1))) & SLOTMASK; + return (slotid0() + 1 + (t->bid_s0_s1 & (SLOTMASK>>1))) & SLOTMASK; #else - return bid_s0_s1 & SLOTMASK; + return t->bid_s0_s1 & SLOTMASK; #endif } -}; union hashunit { u32 word; uchar bytes[sizeof(u32)]; }; +typedef union hashunit hashunit; #define WORDS(bits) ((bits + 31) / 32) #define HASHWORDS0 WORDS(WN - DIGITBITS + RESTBITS) @@ -133,11 +141,13 @@ struct slot0 { tree attr; hashunit hash[HASHWORDS0]; }; +typedef struct slot0 slot0; struct slot1 { tree attr; hashunit hash[HASHWORDS1]; }; +typedef struct slot1 slot1; // a bucket is NSLOTS treenodes typedef slot0 bucket0[NSLOTS]; @@ -164,10 +174,15 @@ struct htalloc { bucket0 *trees0[(WK+1)/2]; bucket1 *trees1[WK/2]; u32 alloced; - htalloc() { - alloced = 0; +}; +typedef struct htalloc htalloc; + htalloc htalloc_new() { + htalloc hta; + hta.alloced = 0; + return hta; } - void alloctrees() { + void *htalloc_alloc(htalloc *hta, const u32 n, const u32 sz); + void alloctrees(htalloc *hta) { // optimize xenoncat's fixed memory layout, avoiding any waste // digit trees hashes trees hashes // 0 0 A A A A A A . . . . . . @@ -180,29 +195,28 @@ struct htalloc { // 7 0 2 4 6 . G G 1 3 5 7 H H // 8 0 2 4 6 8 . I 1 3 5 7 H H assert(DIGITBITS >= 16); // ensures hashes shorten by 1 unit every 2 digits - heap0 = (u32 *)alloc(1, sizeof(digit0)); - heap1 = (u32 *)alloc(1, sizeof(digit1)); + hta->heap0 = (u32 *)htalloc_alloc(hta, 1, sizeof(digit0)); + hta->heap1 = (u32 *)htalloc_alloc(hta, 1, sizeof(digit1)); for (int r=0; rtrees0[r/2] = (bucket0 *)(hta->heap0 + r/2); else - trees1[r/2] = (bucket1 *)(heap1 + r/2); + hta->trees1[r/2] = (bucket1 *)(hta->heap1 + r/2); } - void dealloctrees() { - free(heap0); - free(heap1); + void dealloctrees(htalloc *hta) { + free(hta->heap0); + free(hta->heap1); } - void *alloc(const u32 n, const u32 sz) { + void *htalloc_alloc(htalloc *hta, const u32 n, const u32 sz) { void *mem = calloc(n, sz); assert(mem); - alloced += n * sz; + hta->alloced += n * sz; return mem; } -}; typedef au32 bsizes[NBUCKETS]; -u32 min(const u32 a, const u32 b) { +u32 minu32(const u32 a, const u32 b) { return a < b ? a : b; } @@ -217,37 +231,55 @@ struct equi { u32 hfull; u32 bfull; pthread_barrier_t barry; - equi(const u32 n_threads) { +}; +typedef struct equi equi; + void equi_clearslots(equi *eq); + equi *equi_new(const u32 n_threads) { assert(sizeof(hashunit) == 4); - nthreads = n_threads; - const int err = pthread_barrier_init(&barry, NULL, nthreads); + equi *eq = malloc(sizeof(equi)); + eq->nthreads = n_threads; + const int err = pthread_barrier_init(&eq->barry, NULL, eq->nthreads); assert(!err); - hta.alloctrees(); - nslots = (bsizes *)hta.alloc(2 * NBUCKETS, sizeof(au32)); - sols = (proof *)hta.alloc(MAXSOLS, sizeof(proof)); + + alloctrees(&eq->hta); + eq->nslots = (bsizes *)htalloc_alloc(&eq->hta, 2 * NBUCKETS, sizeof(au32)); + eq->sols = (proof *)htalloc_alloc(&eq->hta, MAXSOLS, sizeof(proof)); + + // C malloc() does not guarantee zero-initialized memory (but calloc() does) + eq->blake_ctx = NULL; + eq->nsols = 0; + equi_clearslots(eq); + + return eq; } - ~equi() { - hta.dealloctrees(); - free(nslots); - free(sols); - blake2b_free(blake_ctx); + void equi_free(equi *eq) { + dealloctrees(&eq->hta); + + free(eq->nslots); + free(eq->sols); + blake2b_free(eq->blake_ctx); + free(eq); } - void setstate(const BLAKE2bState *ctx) { - blake_ctx = blake2b_clone(ctx); - memset(nslots, 0, NBUCKETS * sizeof(au32)); // only nslots[0] needs zeroing - nsols = 0; + void equi_setstate(equi *eq, const BLAKE2bState *ctx) { + if (eq->blake_ctx) { + blake2b_free(eq->blake_ctx); + } + + eq->blake_ctx = blake2b_clone(ctx); + memset(eq->nslots, 0, NBUCKETS * sizeof(au32)); // only nslots[0] needs zeroing + eq->nsols = 0; } - u32 getslot(const u32 r, const u32 bucketi) { + u32 getslot(equi *eq, const u32 r, const u32 bucketi) { #ifdef EQUIHASH_TROMP_ATOMIC - return std::atomic_fetch_add_explicit(&nslots[r&1][bucketi], 1U, std::memory_order_relaxed); + return std::atomic_fetch_add_explicit(&eq->nslots[r&1][bucketi], 1U, std::memory_order_relaxed); #else - return nslots[r&1][bucketi]++; + return eq->nslots[r&1][bucketi]++; #endif } - u32 getnslots(const u32 r, const u32 bid) { // SHOULD BE METHOD IN BUCKET STRUCT - au32 &nslot = nslots[r&1][bid]; - const u32 n = min(nslot, NSLOTS); - nslot = 0; + u32 getnslots(equi *eq, const u32 r, const u32 bid) { // SHOULD BE METHOD IN BUCKET STRUCT + au32 *nslot = &eq->nslots[r&1][bid]; + const u32 n = minu32(*nslot, NSLOTS); + *nslot = 0; return n; } void orderindices(u32 *indices, u32 size) { @@ -259,47 +291,49 @@ struct equi { } } } - void listindices0(u32 r, const tree t, u32 *indices) { + void listindices1(equi *eq, u32 r, const tree t, u32 *indices); + void listindices0(equi *eq, u32 r, const tree t, u32 *indices) { if (r == 0) { - *indices = t.getindex(); + *indices = getindex(&t); return; } - const bucket1 &buck = hta.trees1[--r/2][t.bucketid()]; + const bucket1 *buck = &eq->hta.trees1[--r/2][bucketid(&t)]; const u32 size = 1 << r; u32 *indices1 = indices + size; - listindices1(r, buck[t.slotid0()].attr, indices); - listindices1(r, buck[t.slotid1()].attr, indices1); + listindices1(eq, r, (*buck)[slotid0(&t)].attr, indices); + listindices1(eq, r, (*buck)[slotid1(&t)].attr, indices1); orderindices(indices, size); } - void listindices1(u32 r, const tree t, u32 *indices) { - const bucket0 &buck = hta.trees0[--r/2][t.bucketid()]; + void listindices1(equi *eq, u32 r, const tree t, u32 *indices) { + const bucket0 *buck = &eq->hta.trees0[--r/2][bucketid(&t)]; const u32 size = 1 << r; u32 *indices1 = indices + size; - listindices0(r, buck[t.slotid0()].attr, indices); - listindices0(r, buck[t.slotid1()].attr, indices1); + listindices0(eq, r, (*buck)[slotid0(&t)].attr, indices); + listindices0(eq, r, (*buck)[slotid1(&t)].attr, indices1); orderindices(indices, size); } - void candidate(const tree t) { + void candidate(equi *eq, const tree t) { proof prf; - listindices1(WK, t, prf); // assume WK odd + listindices1(eq, WK, t, prf); // assume WK odd qsort(prf, PROOFSIZE, sizeof(u32), &compu32); for (u32 i=1; insols, 1U, std::memory_order_relaxed); #else - u32 soli = nsols++; + u32 soli = eq->nsols++; #endif if (soli < MAXSOLS) - listindices1(WK, t, sols[soli]); // assume WK odd + listindices1(eq, WK, t, eq->sols[soli]); // assume WK odd } - void showbsizes(u32 r) { +#ifdef EQUIHASH_SHOW_BUCKET_SIZES + void showbsizes(equi *eq, u32 r) { #if defined(HIST) || defined(SPARK) || defined(LOGSPARK) u32 binsizes[65]; memset(binsizes, 0, 65 * sizeof(u32)); for (u32 bucketid = 0; bucketid < NBUCKETS; bucketid++) { - u32 bsize = min(nslots[r&1][bucketid], NSLOTS) >> (SLOTBITS-6); + u32 bsize = minu32(eq->nslots[r&1][bucketid], NSLOTS) >> (SLOTBITS-6); binsizes[bsize]++; } for (u32 i=0; i < 65; i++) { @@ -319,6 +353,7 @@ struct equi { // printf("\n"); #endif } +#endif struct htlayout { htalloc hta; @@ -327,63 +362,69 @@ struct equi { u32 dunits; u32 prevbo; u32 nextbo; + }; + typedef struct htlayout htlayout; - htlayout(equi *eq, u32 r): hta(eq->hta), prevhashunits(0), dunits(0) { + htlayout htlayout_new(equi *eq, u32 r) { + htlayout htl; + htl.hta = eq->hta; + htl.prevhashunits = 0; + htl.dunits = 0; u32 nexthashbytes = hashsize(r); - nexthashunits = hashwords(nexthashbytes); - prevbo = 0; - nextbo = nexthashunits * sizeof(hashunit) - nexthashbytes; // 0-3 + htl.nexthashunits = hashwords(nexthashbytes); + htl.prevbo = 0; + htl.nextbo = htl.nexthashunits * sizeof(hashunit) - nexthashbytes; // 0-3 if (r) { u32 prevhashbytes = hashsize(r-1); - prevhashunits = hashwords(prevhashbytes); - prevbo = prevhashunits * sizeof(hashunit) - prevhashbytes; // 0-3 - dunits = prevhashunits - nexthashunits; + htl.prevhashunits = hashwords(prevhashbytes); + htl.prevbo = htl.prevhashunits * sizeof(hashunit) - prevhashbytes; // 0-3 + htl.dunits = htl.prevhashunits - htl.nexthashunits; } + return htl; } - u32 getxhash0(const slot0* pslot) const { + u32 getxhash0(const htlayout *htl, const slot0* pslot) { #if WN == 200 && RESTBITS == 4 - return pslot->hash->bytes[prevbo] >> 4; + return pslot->hash->bytes[htl->prevbo] >> 4; #elif WN == 200 && RESTBITS == 8 - return (pslot->hash->bytes[prevbo] & 0xf) << 4 | pslot->hash->bytes[prevbo+1] >> 4; + return (pslot->hash->bytes[htl->prevbo] & 0xf) << 4 | pslot->hash->bytes[htl->prevbo+1] >> 4; #elif WN == 200 && RESTBITS == 9 - return (pslot->hash->bytes[prevbo] & 0x1f) << 4 | pslot->hash->bytes[prevbo+1] >> 4; + return (pslot->hash->bytes[htl->prevbo] & 0x1f) << 4 | pslot->hash->bytes[htl->prevbo+1] >> 4; #elif WN == 144 && RESTBITS == 4 - return pslot->hash->bytes[prevbo] & 0xf; + return pslot->hash->bytes[htl->prevbo] & 0xf; #else #error non implemented #endif } - u32 getxhash1(const slot1* pslot) const { + u32 getxhash1(const htlayout *htl, const slot1* pslot) { #if WN == 200 && RESTBITS == 4 - return pslot->hash->bytes[prevbo] & 0xf; + return pslot->hash->bytes[htl->prevbo] & 0xf; #elif WN == 200 && RESTBITS == 8 - return pslot->hash->bytes[prevbo]; + return pslot->hash->bytes[htl->prevbo]; #elif WN == 200 && RESTBITS == 9 - return (pslot->hash->bytes[prevbo]&1) << 8 | pslot->hash->bytes[prevbo+1]; + return (pslot->hash->bytes[htl->prevbo]&1) << 8 | pslot->hash->bytes[htl->prevbo+1]; #elif WN == 144 && RESTBITS == 4 - return pslot->hash->bytes[prevbo] & 0xf; + return pslot->hash->bytes[htl->prevbo] & 0xf; #else #error non implemented #endif } - bool equal(const hashunit *hash0, const hashunit *hash1) const { - return hash0[prevhashunits-1].word == hash1[prevhashunits-1].word; + bool htlayout_equal(const htlayout *htl, const hashunit *hash0, const hashunit *hash1) { + return hash0[htl->prevhashunits-1].word == hash1[htl->prevhashunits-1].word; } - }; +#if RESTBITS <= 6 + typedef uchar xslot; +#else + typedef u16 xslot; +#endif struct collisiondata { #ifdef XBITMAP #if NSLOTS > 64 -#error can't use XBITMAP with more than 64 slots +#error cant use XBITMAP with more than 64 slots #endif u64 xhashmap[NRESTS]; u64 xmap; #else -#if RESTBITS <= 6 - typedef uchar xslot; -#else - typedef u16 xslot; -#endif xslot nxhashslots[NRESTS]; xslot xhashslots[NRESTS][XFULL]; xslot *xx; @@ -391,55 +432,56 @@ struct equi { u32 n1; #endif u32 s0; + }; + typedef struct collisiondata collisiondata; - void clear() { + void collisiondata_clear(collisiondata *cd) { #ifdef XBITMAP - memset(xhashmap, 0, NRESTS * sizeof(u64)); + memset(cd->xhashmap, 0, NRESTS * sizeof(u64)); #else - memset(nxhashslots, 0, NRESTS * sizeof(xslot)); + memset(cd->nxhashslots, 0, NRESTS * sizeof(xslot)); #endif } - bool addslot(u32 s1, u32 xh) { + bool addslot(collisiondata *cd, u32 s1, u32 xh) { #ifdef XBITMAP xmap = xhashmap[xh]; xhashmap[xh] |= (u64)1 << s1; s0 = -1; return true; #else - n1 = (u32)nxhashslots[xh]++; - if (n1 >= XFULL) + cd->n1 = (u32)cd->nxhashslots[xh]++; + if (cd->n1 >= XFULL) return false; - xx = xhashslots[xh]; - xx[n1] = s1; - n0 = 0; + cd->xx = cd->xhashslots[xh]; + cd->xx[cd->n1] = s1; + cd->n0 = 0; return true; #endif } - bool nextcollision() const { + bool nextcollision(const collisiondata *cd) { #ifdef XBITMAP - return xmap != 0; + return cd->xmap != 0; #else - return n0 < n1; + return cd->n0 < cd->n1; #endif } - u32 slot() { + u32 slot(collisiondata *cd) { #ifdef XBITMAP - const u32 ffs = __builtin_ffsll(xmap); - s0 += ffs; xmap >>= ffs; + const u32 ffs = __builtin_ffsll(cd->xmap); + s0 += ffs; cd->xmap >>= ffs; return s0; #else - return (u32)xx[n0++]; + return (u32)cd->xx[cd->n0++]; #endif } - }; - void digit0(const u32 id) { + void equi_digit0(equi *eq, const u32 id) { uchar hash[HASHOUT]; BLAKE2bState* state; - htlayout htl(this, 0); + htlayout htl = htlayout_new(eq, 0); const u32 hashbytes = hashsize(0); - for (u32 block = id; block < NBLOCKS; block += nthreads) { - state = blake2b_clone(blake_ctx); + for (u32 block = id; block < NBLOCKS; block += eq->nthreads) { + state = blake2b_clone(eq->blake_ctx); u32 leb = htole32(block); blake2b_update(state, (uchar *)&leb, sizeof(u32)); blake2b_finalize(state, hash, HASHOUT); @@ -460,36 +502,36 @@ struct equi { #else #error not implemented #endif - const u32 slot = getslot(0, bucketid); + const u32 slot = getslot(eq, 0, bucketid); if (slot >= NSLOTS) { - bfull++; + eq->bfull++; continue; } - slot0 &s = hta.trees0[0][bucketid][slot]; - s.attr = tree(block * HASHESPERBLAKE + i); - memcpy(s.hash->bytes+htl.nextbo, ph+WN/8-hashbytes, hashbytes); + slot0 *s = &eq->hta.trees0[0][bucketid][slot]; + s->attr = tree_from_idx(block * HASHESPERBLAKE + i); + memcpy(s->hash->bytes+htl.nextbo, ph+WN/8-hashbytes, hashbytes); } } } - void digitodd(const u32 r, const u32 id) { - htlayout htl(this, r); + void equi_digitodd(equi *eq, const u32 r, const u32 id) { + htlayout htl = htlayout_new(eq, r); collisiondata cd; - for (u32 bucketid=id; bucketid < NBUCKETS; bucketid += nthreads) { - cd.clear(); + for (u32 bucketid=id; bucketid < NBUCKETS; bucketid += eq->nthreads) { + collisiondata_clear(&cd); slot0 *buck = htl.hta.trees0[(r-1)/2][bucketid]; // optimize by updating previous buck?! - u32 bsize = getnslots(r-1, bucketid); // optimize by putting bucketsize with block?! + u32 bsize = getnslots(eq, r-1, bucketid); // optimize by putting bucketsize with block?! for (u32 s1 = 0; s1 < bsize; s1++) { const slot0 *pslot1 = buck + s1; // optimize by updating previous pslot1?! - if (!cd.addslot(s1, htl.getxhash0(pslot1))) { - xfull++; + if (!addslot(&cd, s1, getxhash0(&htl, pslot1))) { + eq->xfull++; continue; } - for (; cd.nextcollision(); ) { - const u32 s0 = cd.slot(); + for (; nextcollision(&cd); ) { + const u32 s0 = slot(&cd); const slot0 *pslot0 = buck + s0; - if (htl.equal(pslot0->hash, pslot1->hash)) { - hfull++; + if (htlayout_equal(&htl, pslot0->hash, pslot1->hash)) { + eq->hfull++; continue; } u32 xorbucketid; @@ -510,38 +552,38 @@ struct equi { #else #error not implemented #endif - const u32 xorslot = getslot(r, xorbucketid); + const u32 xorslot = getslot(eq, r, xorbucketid); if (xorslot >= NSLOTS) { - bfull++; + eq->bfull++; continue; } - slot1 &xs = htl.hta.trees1[r/2][xorbucketid][xorslot]; - xs.attr = tree(bucketid, s0, s1); + slot1 *xs = &htl.hta.trees1[r/2][xorbucketid][xorslot]; + xs->attr = tree_from_bid(bucketid, s0, s1); for (u32 i=htl.dunits; i < htl.prevhashunits; i++) - xs.hash[i-htl.dunits].word = pslot0->hash[i].word ^ pslot1->hash[i].word; + xs->hash[i-htl.dunits].word = pslot0->hash[i].word ^ pslot1->hash[i].word; } } } } - void digiteven(const u32 r, const u32 id) { - htlayout htl(this, r); + void equi_digiteven(equi *eq, const u32 r, const u32 id) { + htlayout htl = htlayout_new(eq, r); collisiondata cd; - for (u32 bucketid=id; bucketid < NBUCKETS; bucketid += nthreads) { - cd.clear(); + for (u32 bucketid=id; bucketid < NBUCKETS; bucketid += eq->nthreads) { + collisiondata_clear(&cd); slot1 *buck = htl.hta.trees1[(r-1)/2][bucketid]; // OPTIMIZE BY UPDATING PREVIOUS - u32 bsize = getnslots(r-1, bucketid); + u32 bsize = getnslots(eq, r-1, bucketid); for (u32 s1 = 0; s1 < bsize; s1++) { const slot1 *pslot1 = buck + s1; // OPTIMIZE BY UPDATING PREVIOUS - if (!cd.addslot(s1, htl.getxhash1(pslot1))) { - xfull++; + if (!addslot(&cd, s1, getxhash1(&htl, pslot1))) { + eq->xfull++; continue; } - for (; cd.nextcollision(); ) { - const u32 s0 = cd.slot(); + for (; nextcollision(&cd); ) { + const u32 s0 = slot(&cd); const slot1 *pslot0 = buck + s0; - if (htl.equal(pslot0->hash, pslot1->hash)) { - hfull++; + if (htlayout_equal(&htl, pslot0->hash, pslot1->hash)) { + eq->hfull++; continue; } u32 xorbucketid; @@ -562,42 +604,41 @@ struct equi { #else #error not implemented #endif - const u32 xorslot = getslot(r, xorbucketid); + const u32 xorslot = getslot(eq, r, xorbucketid); if (xorslot >= NSLOTS) { - bfull++; + eq->bfull++; continue; } - slot0 &xs = htl.hta.trees0[r/2][xorbucketid][xorslot]; - xs.attr = tree(bucketid, s0, s1); + slot0 *xs = &htl.hta.trees0[r/2][xorbucketid][xorslot]; + xs->attr = tree_from_bid(bucketid, s0, s1); for (u32 i=htl.dunits; i < htl.prevhashunits; i++) - xs.hash[i-htl.dunits].word = pslot0->hash[i].word ^ pslot1->hash[i].word; + xs->hash[i-htl.dunits].word = pslot0->hash[i].word ^ pslot1->hash[i].word; } } } } - void digitK(const u32 id) { + void equi_digitK(equi *eq, const u32 id) { collisiondata cd; - htlayout htl(this, WK); + htlayout htl = htlayout_new(eq, WK); u32 nc = 0; - for (u32 bucketid = id; bucketid < NBUCKETS; bucketid += nthreads) { - cd.clear(); + for (u32 bucketid = id; bucketid < NBUCKETS; bucketid += eq->nthreads) { + collisiondata_clear(&cd); slot0 *buck = htl.hta.trees0[(WK-1)/2][bucketid]; - u32 bsize = getnslots(WK-1, bucketid); + u32 bsize = getnslots(eq, WK-1, bucketid); for (u32 s1 = 0; s1 < bsize; s1++) { const slot0 *pslot1 = buck + s1; - if (!cd.addslot(s1, htl.getxhash0(pslot1))) // assume WK odd + if (!addslot(&cd, s1, getxhash0(&htl, pslot1))) // assume WK odd continue; - for (; cd.nextcollision(); ) { - const u32 s0 = cd.slot(); - if (htl.equal(buck[s0].hash, pslot1->hash)) -nc++, candidate(tree(bucketid, s0, s1)); + for (; nextcollision(&cd); ) { + const u32 s0 = slot(&cd); + if (htlayout_equal(&htl, buck[s0].hash, pslot1->hash)) +nc++, candidate(eq, tree_from_bid(bucketid, s0, s1)); } } } //printf(" %d candidates ", nc); } -}; typedef struct { u32 id; @@ -617,32 +658,36 @@ void *worker(void *vp) { thread_ctx *tp = (thread_ctx *)vp; equi *eq = tp->eq; - if (tp->id == 0) +// if (tp->id == 0) // printf("Digit 0\n"); barrier(&eq->barry); - eq->digit0(tp->id); + equi_digit0(eq, tp->id); barrier(&eq->barry); if (tp->id == 0) { - eq->xfull = eq->bfull = eq->hfull = 0; - eq->showbsizes(0); + equi_clearslots(eq); +#ifdef EQUIHASH_SHOW_BUCKET_SIZES + showbsizes(eq, 0); +#endif } barrier(&eq->barry); for (u32 r = 1; r < WK; r++) { - if (tp->id == 0) +// if (tp->id == 0) // printf("Digit %d", r); barrier(&eq->barry); - r&1 ? eq->digitodd(r, tp->id) : eq->digiteven(r, tp->id); + r&1 ? equi_digitodd(eq, r, tp->id) : equi_digiteven(eq, r, tp->id); barrier(&eq->barry); if (tp->id == 0) { // printf(" x%d b%d h%d\n", eq->xfull, eq->bfull, eq->hfull); - eq->xfull = eq->bfull = eq->hfull = 0; - eq->showbsizes(r); + equi_clearslots(eq); +#ifdef EQUIHASH_SHOW_BUCKET_SIZES + showbsizes(eq, r); +#endif } barrier(&eq->barry); } - if (tp->id == 0) +// if (tp->id == 0) // printf("Digit %d\n", WK); - eq->digitK(tp->id); + equi_digitK(eq, tp->id); barrier(&eq->barry); pthread_exit(NULL); return 0; From 45e7238b8011d556bde2b222afaef2f505726fdb Mon Sep 17 00:00:00 2001 From: Jack Grigg Date: Thu, 4 Jan 2024 04:34:59 +0000 Subject: [PATCH 04/17] equihash: Pass `blake2b_simd` bindings to Tromp solver as callbacks This avoids linker errors by removing cycles. --- components/equihash/tromp/blake2b.h | 10 ++--- components/equihash/tromp/equi.h | 54 -------------------------- components/equihash/tromp/equi_miner.c | 33 ++++++++++++---- 3 files changed, 30 insertions(+), 67 deletions(-) diff --git a/components/equihash/tromp/blake2b.h b/components/equihash/tromp/blake2b.h index 6a0927182a..23a7409b74 100644 --- a/components/equihash/tromp/blake2b.h +++ b/components/equihash/tromp/blake2b.h @@ -16,7 +16,7 @@ typedef struct BLAKE2bState BLAKE2bState; /// `personalization` MUST be a pointer to a 16-byte array. /// /// Please free this with `blake2b_free` when you are done. -BLAKE2bState* blake2b_init( +typedef BLAKE2bState* (*blake2b_init)( size_t output_len, const unsigned char* personalization); @@ -24,13 +24,13 @@ BLAKE2bState* blake2b_init( /// /// Both states need to be separately freed with `blake2b_free` when you are /// done. -BLAKE2bState* blake2b_clone(const BLAKE2bState* state); +typedef BLAKE2bState* (*blake2b_clone)(const BLAKE2bState* state); /// Frees a BLAKE2b state returned by `blake2b_init`. -void blake2b_free(BLAKE2bState* state); +typedef void (*blake2b_free)(BLAKE2bState* state); /// Adds input to the hash. You can call this any number of times. -void blake2b_update( +typedef void (*blake2b_update)( BLAKE2bState* state, const unsigned char* input, size_t input_len); @@ -43,7 +43,7 @@ void blake2b_update( /// This method is idempotent, and calling it multiple times will give the same /// result. It's also possible to call `blake2b_update` with more input in /// between. -void blake2b_finalize( +typedef void (*blake2b_finalize)( BLAKE2bState* state, unsigned char* output, size_t output_len); diff --git a/components/equihash/tromp/equi.h b/components/equihash/tromp/equi.h index 165825ba67..2bf0794edf 100644 --- a/components/equihash/tromp/equi.h +++ b/components/equihash/tromp/equi.h @@ -43,63 +43,9 @@ typedef u32 proof[PROOFSIZE]; enum verify_code { POW_OK, POW_DUPLICATE, POW_OUT_OF_ORDER, POW_NONZERO_XOR }; const char *errstr[] = { "OK", "duplicate index", "indices out of order", "nonzero xor" }; -void genhash(const BLAKE2bState *ctx, u32 idx, uchar *hash) { - BLAKE2bState* state = blake2b_clone(ctx); - u32 leb = htole32(idx / HASHESPERBLAKE); - blake2b_update(state, (uchar *)&leb, sizeof(u32)); - uchar blakehash[HASHOUT]; - blake2b_finalize(state, blakehash, HASHOUT); - blake2b_free(state); - memcpy(hash, blakehash + (idx % HASHESPERBLAKE) * WN/8, WN/8); -} - -int verifyrec(const BLAKE2bState *ctx, u32 *indices, uchar *hash, int r) { - if (r == 0) { - genhash(ctx, *indices, hash); - return POW_OK; - } - u32 *indices1 = indices + (1 << (r-1)); - if (*indices >= *indices1) - return POW_OUT_OF_ORDER; - uchar hash0[WN/8], hash1[WN/8]; - int vrf0 = verifyrec(ctx, indices, hash0, r-1); - if (vrf0 != POW_OK) - return vrf0; - int vrf1 = verifyrec(ctx, indices1, hash1, r-1); - if (vrf1 != POW_OK) - return vrf1; - for (int i=0; i < WN/8; i++) - hash[i] = hash0[i] ^ hash1[i]; - int i, b = r * DIGITBITS; - for (i = 0; i < b/8; i++) - if (hash[i]) - return POW_NONZERO_XOR; - if ((b%8) && hash[i] >> (8-(b%8))) - return POW_NONZERO_XOR; - return POW_OK; -} - int compu32(const void *pa, const void *pb) { u32 a = *(u32 *)pa, b = *(u32 *)pb; return anthreads = n_threads; + eq->blake2b_clone = blake2b_clone; + eq->blake2b_free = blake2b_free; + eq->blake2b_update = blake2b_update; + eq->blake2b_finalize = blake2b_finalize; + const int err = pthread_barrier_init(&eq->barry, NULL, eq->nthreads); assert(!err); @@ -257,15 +272,16 @@ typedef struct equi equi; free(eq->nslots); free(eq->sols); - blake2b_free(eq->blake_ctx); + eq->blake2b_free(eq->blake_ctx); + free(eq); } void equi_setstate(equi *eq, const BLAKE2bState *ctx) { if (eq->blake_ctx) { - blake2b_free(eq->blake_ctx); + eq->blake2b_free(eq->blake_ctx); } - eq->blake_ctx = blake2b_clone(ctx); + eq->blake_ctx = eq->blake2b_clone(ctx); memset(eq->nslots, 0, NBUCKETS * sizeof(au32)); // only nslots[0] needs zeroing eq->nsols = 0; } @@ -481,11 +497,12 @@ typedef struct equi equi; htlayout htl = htlayout_new(eq, 0); const u32 hashbytes = hashsize(0); for (u32 block = id; block < NBLOCKS; block += eq->nthreads) { - state = blake2b_clone(eq->blake_ctx); + state = eq->blake2b_clone(eq->blake_ctx); u32 leb = htole32(block); - blake2b_update(state, (uchar *)&leb, sizeof(u32)); - blake2b_finalize(state, hash, HASHOUT); - blake2b_free(state); + eq->blake2b_update(state, (uchar *)&leb, sizeof(u32)); + eq->blake2b_finalize(state, hash, HASHOUT); + eq->blake2b_free(state); + for (u32 i = 0; i Date: Thu, 4 Jan 2024 04:39:00 +0000 Subject: [PATCH 05/17] equihash: Add Rust API for Tromp solver Co-authored-by: teor --- components/equihash/src/blake2b.rs | 13 +- components/equihash/src/lib.rs | 3 + components/equihash/src/tromp.rs | 174 +++++++++++++++++++++++++ components/equihash/src/verify.rs | 2 +- components/equihash/tromp/equi_miner.c | 10 ++ 5 files changed, 196 insertions(+), 6 deletions(-) create mode 100644 components/equihash/src/tromp.rs diff --git a/components/equihash/src/blake2b.rs b/components/equihash/src/blake2b.rs index 432c4cb79b..75da59d5ab 100644 --- a/components/equihash/src/blake2b.rs +++ b/components/equihash/src/blake2b.rs @@ -2,15 +2,18 @@ // Distributed under the MIT software license, see the accompanying // file COPYING or https://www.opensource.org/licenses/mit-license.php . +// This module uses unsafe code for FFI into blake2b. +#![allow(unsafe_code)] + use blake2b_simd::{State, PERSONALBYTES}; -use libc::{c_uchar, size_t}; + use std::ptr; use std::slice; #[no_mangle] pub extern "C" fn blake2b_init( - output_len: size_t, - personalization: *const [c_uchar; PERSONALBYTES], + output_len: usize, + personalization: *const [u8; PERSONALBYTES], ) -> *mut State { let personalization = unsafe { personalization.as_ref().unwrap() }; @@ -37,7 +40,7 @@ pub extern "C" fn blake2b_free(state: *mut State) { } #[no_mangle] -pub extern "C" fn blake2b_update(state: *mut State, input: *const c_uchar, input_len: size_t) { +pub extern "C" fn blake2b_update(state: *mut State, input: *const u8, input_len: usize) { let state = unsafe { state.as_mut().unwrap() }; let input = unsafe { slice::from_raw_parts(input, input_len) }; @@ -45,7 +48,7 @@ pub extern "C" fn blake2b_update(state: *mut State, input: *const c_uchar, input } #[no_mangle] -pub extern "C" fn blake2b_finalize(state: *mut State, output: *mut c_uchar, output_len: size_t) { +pub extern "C" fn blake2b_finalize(state: *mut State, output: *mut u8, output_len: usize) { let state = unsafe { state.as_mut().unwrap() }; let output = unsafe { slice::from_raw_parts_mut(output, output_len) }; diff --git a/components/equihash/src/lib.rs b/components/equihash/src/lib.rs index cb6131ca3b..0000c20535 100644 --- a/components/equihash/src/lib.rs +++ b/components/equihash/src/lib.rs @@ -28,3 +28,6 @@ mod verify; mod test_vectors; pub use verify::{is_valid_solution, Error}; + +mod blake2b; +pub mod tromp; diff --git a/components/equihash/src/tromp.rs b/components/equihash/src/tromp.rs new file mode 100644 index 0000000000..b8ee0e43e0 --- /dev/null +++ b/components/equihash/src/tromp.rs @@ -0,0 +1,174 @@ +//! Rust interface to the tromp equihash solver. + +use std::marker::{PhantomData, PhantomPinned}; +use std::slice; + +use blake2b_simd::State; + +use crate::{blake2b, params::Params, verify}; + +#[repr(C)] +struct CEqui { + _f: [u8; 0], + _m: PhantomData<(*mut u8, PhantomPinned)>, +} + +#[link(name = "equitromp")] +extern "C" { + #[allow(improper_ctypes)] + fn equi_new( + n_threads: u32, + blake2b_clone: extern "C" fn(state: *const State) -> *mut State, + blake2b_free: extern "C" fn(state: *mut State), + blake2b_update: extern "C" fn(state: *mut State, input: *const u8, input_len: usize), + blake2b_finalize: extern "C" fn(state: *mut State, output: *mut u8, output_len: usize), + ) -> *mut CEqui; + fn equi_free(eq: *mut CEqui); + #[allow(improper_ctypes)] + fn equi_setstate(eq: *mut CEqui, ctx: *const State); + fn equi_clearslots(eq: *mut CEqui); + fn equi_digit0(eq: *mut CEqui, id: u32); + fn equi_digitodd(eq: *mut CEqui, r: u32, id: u32); + fn equi_digiteven(eq: *mut CEqui, r: u32, id: u32); + fn equi_digitK(eq: *mut CEqui, id: u32); + fn equi_nsols(eq: *const CEqui) -> usize; + /// Returns `equi_nsols()` solutions of length `2^K`, in a single memory allocation. + fn equi_sols(eq: *const CEqui) -> *const u32; +} + +/// Performs a single equihash solver run with equihash parameters `p` and hash state `curr_state`. +/// Returns zero or more unique solutions. +/// +/// # SAFETY +/// +/// The parameters to this function must match the hard-coded parameters in the C++ code. +/// +/// This function uses unsafe code for FFI into the tromp solver. +#[allow(unsafe_code)] +unsafe fn worker(p: Params, curr_state: &State) -> Vec> { + // Create solver and initialize it. + let eq = equi_new( + 1, + blake2b::blake2b_clone, + blake2b::blake2b_free, + blake2b::blake2b_update, + blake2b::blake2b_finalize, + ); + equi_setstate(eq, curr_state); + + // Initialization done, start algo driver. + equi_digit0(eq, 0); + equi_clearslots(eq); + // SAFETY: caller must supply a `p` instance that matches the hard-coded values in the C code. + for r in 1..p.k { + if (r & 1) != 0 { + equi_digitodd(eq, r, 0) + } else { + equi_digiteven(eq, r, 0) + }; + equi_clearslots(eq); + } + // Review Note: nsols is increased here, but only if the solution passes the strictly ordered check. + // With 256 nonces, we get to around 6/9 digits strictly ordered. + equi_digitK(eq, 0); + + let solutions = { + let nsols = equi_nsols(eq); + let sols = equi_sols(eq); + let solution_len = 1 << p.k; + + // SAFETY: + // - caller must supply a `p` instance that matches the hard-coded values in the C code. + // - `sols` is a single allocation containing at least `nsols` solutions. + // - this slice is a shared ref to the memory in a valid `eq` instance supplied by the caller. + let solutions: &[u32] = slice::from_raw_parts(sols, nsols * solution_len); + + let mut chunks = solutions.chunks_exact(solution_len); + + // SAFETY: + // - caller must supply a `p` instance that matches the hard-coded values in the C code. + // - each solution contains `solution_len` u32 values. + // - the temporary slices are shared refs to a valid `eq` instance supplied by the caller. + // - the bytes in the shared ref are copied before they are returned. + // - dropping `solutions: &[u32]` does not drop the underlying memory owned by `eq`. + let mut solutions = (&mut chunks) + .map(|solution| solution.to_vec()) + .collect::>(); + + assert_eq!(chunks.remainder().len(), 0); + + // Sometimes the solver returns identical solutions. + solutions.sort(); + solutions.dedup(); + + solutions + }; + + equi_free(eq); + + solutions +} + +/// Performs multiple equihash solver runs with equihash parameters `200, 9`, initialising the hash with +/// the supplied partial `input`. Between each run, generates a new nonce of length `N` using the +/// `next_nonce` function. +/// +/// Returns zero or more unique solutions. +pub fn solve_200_9( + input: &[u8], + mut next_nonce: impl FnMut() -> Option<[u8; N]>, +) -> Vec> { + let p = Params::new(200, 9).expect("should be valid"); + let mut state = verify::initialise_state(p.n, p.k, p.hash_output()); + state.update(input); + + loop { + let nonce = match next_nonce() { + Some(nonce) => nonce, + None => break vec![], + }; + + let mut curr_state = state.clone(); + curr_state.update(&nonce); + + // SAFETY: the parameters 200,9 match the hard-coded parameters in the C++ code. + #[allow(unsafe_code)] + let solutions = unsafe { worker(p, &curr_state) }; + if !solutions.is_empty() { + break solutions; + } + } +} + +#[cfg(test)] +mod tests { + use super::solve_200_9; + + #[test] + #[allow(clippy::print_stdout)] + fn run_solver() { + let input = b"Equihash is an asymmetric PoW based on the Generalised Birthday problem."; + let mut nonce = [ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, + ]; + + let solutions = solve_200_9(input, || { + nonce[0] += 1; + if nonce[0] == 0 { + None + } else { + Some(nonce) + } + }); + + if solutions.is_empty() { + println!("Found no solutions"); + } else { + println!("Found {} solutions:", solutions.len()); + for solution in solutions { + println!("- {:?}", solution); + } + } + } +} diff --git a/components/equihash/src/verify.rs b/components/equihash/src/verify.rs index 53071ddc01..0cc4d27771 100644 --- a/components/equihash/src/verify.rs +++ b/components/equihash/src/verify.rs @@ -114,7 +114,7 @@ impl fmt::Display for Kind { } } -fn initialise_state(n: u32, k: u32, digest_len: u8) -> Blake2bState { +pub(crate) fn initialise_state(n: u32, k: u32, digest_len: u8) -> Blake2bState { let mut personalization: Vec = Vec::from("ZcashPoW"); personalization.write_u32::(n).unwrap(); personalization.write_u32::(k).unwrap(); diff --git a/components/equihash/tromp/equi_miner.c b/components/equihash/tromp/equi_miner.c index 1c4882cd16..f1c767a4ac 100644 --- a/components/equihash/tromp/equi_miner.c +++ b/components/equihash/tromp/equi_miner.c @@ -285,6 +285,9 @@ typedef struct equi equi; memset(eq->nslots, 0, NBUCKETS * sizeof(au32)); // only nslots[0] needs zeroing eq->nsols = 0; } + void equi_clearslots(equi *eq) { + eq->xfull = eq->bfull = eq->hfull = 0; + } u32 getslot(equi *eq, const u32 r, const u32 bucketi) { #ifdef EQUIHASH_TROMP_ATOMIC return std::atomic_fetch_add_explicit(&eq->nslots[r&1][bucketi], 1U, std::memory_order_relaxed); @@ -657,6 +660,13 @@ nc++, candidate(eq, tree_from_bid(bucketid, s0, s1)); //printf(" %d candidates ", nc); } + size_t equi_nsols(const equi *eq) { + return eq->nsols; + } + proof *equi_sols(const equi *eq) { + return eq->sols; + } + typedef struct { u32 id; pthread_t thread; From 1b20c15053587d9d7fac582913e712464353115d Mon Sep 17 00:00:00 2001 From: teor Date: Fri, 5 Jan 2024 09:47:50 +1000 Subject: [PATCH 06/17] equihash: Add Rust APIs for compressed solutions --- components/equihash/src/minimal.rs | 82 +++++++++++++++++++++++++++--- components/equihash/src/tromp.rs | 20 +++++++- 2 files changed, 95 insertions(+), 7 deletions(-) diff --git a/components/equihash/src/minimal.rs b/components/equihash/src/minimal.rs index 81da63e657..838907840f 100644 --- a/components/equihash/src/minimal.rs +++ b/components/equihash/src/minimal.rs @@ -5,6 +5,49 @@ use byteorder::{BigEndian, ReadBytesExt}; use crate::params::Params; +// Rough translation of CompressArray() from: +// https://github.com/zcash/zcash/blob/6fdd9f1b81d3b228326c9826fa10696fc516444b/src/crypto/equihash.cpp#L39-L76 +#[cfg(any(feature = "solver", test))] +fn compress_array(array: &[u8], bit_len: usize, byte_pad: usize) -> Vec { + let index_bytes = (u32::BITS / 8) as usize; + assert!(bit_len >= 8); + assert!(8 * index_bytes >= 7 + bit_len); + + let in_width: usize = (bit_len + 7) / 8 + byte_pad; + let out_len = bit_len * array.len() / (8 * in_width); + + let mut out = Vec::with_capacity(out_len); + let bit_len_mask: u32 = (1 << (bit_len as u32)) - 1; + + // The acc_bits least-significant bits of acc_value represent a bit sequence + // in big-endian order. + let mut acc_bits: usize = 0; + let mut acc_value: u32 = 0; + + let mut j: usize = 0; + for _i in 0..out_len { + // When we have fewer than 8 bits left in the accumulator, read the next + // input element. + if acc_bits < 8 { + acc_value <<= bit_len; + for x in byte_pad..in_width { + acc_value |= ( + // Apply bit_len_mask across byte boundaries + (array[j + x] & ((bit_len_mask >> (8 * (in_width - x - 1))) as u8)) as u32 + ) + .wrapping_shl(8 * (in_width - x - 1) as u32); // Big-endian + } + j += in_width; + acc_bits += bit_len; + } + + acc_bits -= 8; + out.push((acc_value >> acc_bits) as u8); + } + + out +} + pub(crate) fn expand_array(vin: &[u8], bit_len: usize, byte_pad: usize) -> Vec { assert!(bit_len >= 8); assert!(u32::BITS as usize >= 7 + bit_len); @@ -50,6 +93,31 @@ pub(crate) fn expand_array(vin: &[u8], bit_len: usize, byte_pad: usize) -> Vec Vec { + let c_bit_len = p.collision_bit_length(); + let index_bytes = (u32::BITS / 8) as usize; + let digit_bytes = ((c_bit_len + 1) + 7) / 8; + assert!(digit_bytes <= index_bytes); + + let len_indices = indices.len() * index_bytes; + let byte_pad = index_bytes - digit_bytes; + + // Rough translation of EhIndexToArray(index, array_pointer) from: + // https://github.com/zcash/zcash/blob/6fdd9f1b81d3b228326c9826fa10696fc516444b/src/crypto/equihash.cpp#L123-L128 + // + // Big-endian so that lexicographic array comparison is equivalent to integer comparison. + let array: Vec = indices + .iter() + .flat_map(|index| index.to_be_bytes()) + .collect(); + assert_eq!(array.len(), len_indices); + + compress_array(&array, c_bit_len + 1, byte_pad) +} + /// Returns `None` if the parameters are invalid for this minimal encoding. pub(crate) fn indices_from_minimal(p: Params, minimal: &[u8]) -> Option> { let c_bit_len = p.collision_bit_length(); @@ -76,11 +144,14 @@ pub(crate) fn indices_from_minimal(p: Params, minimal: &[u8]) -> Option #[cfg(test)] mod tests { - use super::{expand_array, indices_from_minimal, Params}; + use crate::minimal::minimal_from_indices; + + use super::{compress_array, expand_array, indices_from_minimal, Params}; #[test] - fn array_expansion() { + fn array_compression_and_expansion() { let check_array = |(bit_len, byte_pad), compact, expanded| { + assert_eq!(compress_array(expanded, bit_len, byte_pad), compact); assert_eq!(expand_array(compact, bit_len, byte_pad), expanded); }; @@ -149,10 +220,9 @@ mod tests { #[test] fn minimal_solution_repr() { let check_repr = |minimal, indices| { - assert_eq!( - indices_from_minimal(Params { n: 80, k: 3 }, minimal).unwrap(), - indices, - ); + let p = Params { n: 80, k: 3 }; + assert_eq!(minimal_from_indices(p, indices), minimal); + assert_eq!(indices_from_minimal(p, minimal).unwrap(), indices); }; // The solutions here are not intended to be valid. diff --git a/components/equihash/src/tromp.rs b/components/equihash/src/tromp.rs index b8ee0e43e0..d74099e0ef 100644 --- a/components/equihash/src/tromp.rs +++ b/components/equihash/src/tromp.rs @@ -5,7 +5,7 @@ use std::slice; use blake2b_simd::State; -use crate::{blake2b, params::Params, verify}; +use crate::{blake2b, minimal::minimal_from_indices, params::Params, verify}; #[repr(C)] struct CEqui { @@ -140,6 +140,24 @@ pub fn solve_200_9( } } +/// Performs multiple equihash solver runs with equihash parameters `200, 9`, initialising the hash with +/// the supplied partial `input`. Between each run, generates a new nonce of length `N` using the +/// `next_nonce` function. +/// +/// Returns zero or more unique compressed solutions. +pub fn solve_200_9_compressed( + input: &[u8], + next_nonce: impl FnMut() -> Option<[u8; N]>, +) -> Vec> { + let p = Params::new(200, 9).expect("should be valid"); + let solutions = solve_200_9(input, next_nonce); + + solutions + .iter() + .map(|solution| minimal_from_indices(p, solution)) + .collect() +} + #[cfg(test)] mod tests { use super::solve_200_9; From fe3b269f3a97d32da5bbb5e981dc3f794ba122eb Mon Sep 17 00:00:00 2001 From: teor Date: Fri, 5 Jan 2024 09:47:50 +1000 Subject: [PATCH 07/17] equihash: Verify compressed solutions in tests --- Cargo.lock | 1 + components/equihash/Cargo.toml | 3 +++ components/equihash/src/tromp.rs | 42 +++++++++++++++++++++++--------- 3 files changed, 34 insertions(+), 12 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 0a62dc38a3..2e77c75da1 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -660,6 +660,7 @@ dependencies = [ "blake2b_simd", "byteorder", "cc", + "hex", ] [[package]] diff --git a/components/equihash/Cargo.toml b/components/equihash/Cargo.toml index 5998e54cd0..a4d3403134 100644 --- a/components/equihash/Cargo.toml +++ b/components/equihash/Cargo.toml @@ -16,5 +16,8 @@ byteorder = "1" [build-dependencies] cc = "1" +[dev-dependencies] +hex = "0.4" + [lib] bench = false diff --git a/components/equihash/src/tromp.rs b/components/equihash/src/tromp.rs index d74099e0ef..74b431c0f0 100644 --- a/components/equihash/src/tromp.rs +++ b/components/equihash/src/tromp.rs @@ -160,32 +160,50 @@ pub fn solve_200_9_compressed( #[cfg(test)] mod tests { - use super::solve_200_9; + use super::solve_200_9_compressed; #[test] #[allow(clippy::print_stdout)] fn run_solver() { let input = b"Equihash is an asymmetric PoW based on the Generalised Birthday problem."; - let mut nonce = [ + let mut nonce: [u8; 32] = [ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ]; + let mut nonces = 0..=32_u32; + let nonce_count = nonces.clone().count(); - let solutions = solve_200_9(input, || { - nonce[0] += 1; - if nonce[0] == 0 { - None - } else { - Some(nonce) - } + let solutions = solve_200_9_compressed(input, || { + let variable_nonce = nonces.next()?; + println!("Using variable nonce [0..4] of {}", variable_nonce); + + let variable_nonce = variable_nonce.to_le_bytes(); + nonce[0] = variable_nonce[0]; + nonce[1] = variable_nonce[1]; + nonce[2] = variable_nonce[2]; + nonce[3] = variable_nonce[3]; + + Some(nonce) }); if solutions.is_empty() { - println!("Found no solutions"); + // Expected solution rate is documented at: + // https://github.com/tromp/equihash/blob/master/README.md + panic!("Found no solutions after {nonce_count} runs, expected 1.88 solutions per run",); } else { println!("Found {} solutions:", solutions.len()); - for solution in solutions { - println!("- {:?}", solution); + for (sol_num, solution) in solutions.iter().enumerate() { + println!("Validating solution {sol_num}:-\n{}", hex::encode(solution)); + crate::is_valid_solution(200, 9, input, &nonce, solution).unwrap_or_else(|error| { + panic!( + "unexpected invalid equihash 200, 9 solution:\n\ + error: {error:?}\n\ + input: {input:?}\n\ + nonce: {nonce:?}\n\ + solution: {solution:?}" + ) + }); + println!("Solution {sol_num} is valid!\n"); } } } From 463e7d9958da89516318218afad6117ed22126a9 Mon Sep 17 00:00:00 2001 From: teor Date: Fri, 5 Jan 2024 10:51:38 +1000 Subject: [PATCH 08/17] equihash: Move allocation out of the loop --- components/equihash/src/tromp.rs | 52 +++++++++++++++++++++++--------- 1 file changed, 37 insertions(+), 15 deletions(-) diff --git a/components/equihash/src/tromp.rs b/components/equihash/src/tromp.rs index 74b431c0f0..e5f2157ff5 100644 --- a/components/equihash/src/tromp.rs +++ b/components/equihash/src/tromp.rs @@ -45,15 +45,10 @@ extern "C" { /// /// This function uses unsafe code for FFI into the tromp solver. #[allow(unsafe_code)] -unsafe fn worker(p: Params, curr_state: &State) -> Vec> { - // Create solver and initialize it. - let eq = equi_new( - 1, - blake2b::blake2b_clone, - blake2b::blake2b_free, - blake2b::blake2b_update, - blake2b::blake2b_finalize, - ); +unsafe fn worker(eq: *mut CEqui, p: Params, curr_state: &State) -> Vec> { + // SAFETY: caller must supply a valid `eq` instance. + // + // Review Note: nsols is set to zero in C++ here equi_setstate(eq, curr_state); // Initialization done, start algo driver. @@ -104,8 +99,6 @@ unsafe fn worker(p: Params, curr_state: &State) -> Vec> { solutions }; - equi_free(eq); - solutions } @@ -122,22 +115,51 @@ pub fn solve_200_9( let mut state = verify::initialise_state(p.n, p.k, p.hash_output()); state.update(input); - loop { + // Create solver and initialize it. + // + // # SAFETY + // - the parameters 200,9 match the hard-coded parameters in the C++ code. + // - tromp is compiled without multi-threading support, so each instance can only support 1 thread. + // - the blake2b functions are in the correct order in Rust and C++ initializers. + #[allow(unsafe_code)] + let eq = unsafe { + equi_new( + 1, + blake2b::blake2b_clone, + blake2b::blake2b_free, + blake2b::blake2b_update, + blake2b::blake2b_finalize, + ) + }; + + let solutions = loop { let nonce = match next_nonce() { Some(nonce) => nonce, None => break vec![], }; let mut curr_state = state.clone(); + // Review Note: these hashes are changing when the nonce changes curr_state.update(&nonce); - // SAFETY: the parameters 200,9 match the hard-coded parameters in the C++ code. + // SAFETY: + // - the parameters 200,9 match the hard-coded parameters in the C++ code. + // - the eq instance is initilized above. #[allow(unsafe_code)] - let solutions = unsafe { worker(p, &curr_state) }; + let solutions = unsafe { worker(eq, p, &curr_state) }; if !solutions.is_empty() { break solutions; } - } + }; + + // SAFETY: + // - the eq instance is initilized above, and not used after this point. + #[allow(unsafe_code)] + unsafe { + equi_free(eq) + }; + + solutions } /// Performs multiple equihash solver runs with equihash parameters `200, 9`, initialising the hash with From 3c78bf60a14d7de0e62613687aca7170a8a4adaa Mon Sep 17 00:00:00 2001 From: teor Date: Mon, 8 Jan 2024 07:40:58 +1000 Subject: [PATCH 09/17] equihash: Set C pointers to NULL after freeing them to avoid double-frees Also includes some redundant cleanup code for defense in depth. --- components/equihash/tromp/equi_miner.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/components/equihash/tromp/equi_miner.c b/components/equihash/tromp/equi_miner.c index f1c767a4ac..a57bca34a2 100644 --- a/components/equihash/tromp/equi_miner.c +++ b/components/equihash/tromp/equi_miner.c @@ -204,8 +204,22 @@ typedef struct htalloc htalloc; hta->trees1[r/2] = (bucket1 *)(hta->heap1 + r/2); } void dealloctrees(htalloc *hta) { + if (hta == NULL) { + return; + } + free(hta->heap0); free(hta->heap1); + // Avoid use-after-free and double-free + hta->heap0 = NULL; + hta->heap1 = NULL; + + for (int r=0; rtrees0[r/2] = NULL; + else + hta->trees1[r/2] = NULL; + hta->alloced = 0; } void *htalloc_alloc(htalloc *hta, const u32 n, const u32 sz) { void *mem = calloc(n, sz); @@ -268,11 +282,19 @@ typedef struct equi equi; return eq; } void equi_free(equi *eq) { + if (eq == NULL) { + return; + } + dealloctrees(&eq->hta); free(eq->nslots); free(eq->sols); eq->blake2b_free(eq->blake_ctx); + // Avoid use-after-free and double-free + eq->nslots = NULL; + eq->sols = NULL; + eq->blake_ctx = NULL; free(eq); } @@ -505,6 +527,8 @@ typedef struct equi equi; eq->blake2b_update(state, (uchar *)&leb, sizeof(u32)); eq->blake2b_finalize(state, hash, HASHOUT); eq->blake2b_free(state); + // Avoid use-after-free and double-free + state = NULL; for (u32 i = 0; i Date: Mon, 8 Jan 2024 08:16:02 +1000 Subject: [PATCH 10/17] equihash: Place solver behind a feature flag --- components/equihash/Cargo.toml | 8 +++++++- components/equihash/build.rs | 6 ++++++ components/equihash/src/lib.rs | 2 ++ 3 files changed, 15 insertions(+), 1 deletion(-) diff --git a/components/equihash/Cargo.toml b/components/equihash/Cargo.toml index a4d3403134..e2a9e0b054 100644 --- a/components/equihash/Cargo.toml +++ b/components/equihash/Cargo.toml @@ -9,12 +9,18 @@ license = "MIT OR Apache-2.0" edition = "2021" rust-version = "1.56.1" +[features] +default = [] + +## Builds the C++ tromp solver and Rust FFI layer. +solver = ["dep:cc"] + [dependencies] blake2b_simd = "1" byteorder = "1" [build-dependencies] -cc = "1" +cc = { version = "1", optional = true } [dev-dependencies] hex = "0.4" diff --git a/components/equihash/build.rs b/components/equihash/build.rs index 86c77774c1..74122e450a 100644 --- a/components/equihash/build.rs +++ b/components/equihash/build.rs @@ -1,6 +1,12 @@ //! Build script for the equihash tromp solver in C. fn main() { + #[cfg(feature = "solver")] + build_tromp_solver(); +} + +#[cfg(feature = "solver")] +fn build_tromp_solver() { cc::Build::new() .include("tromp/") .file("tromp/equi_miner.c") diff --git a/components/equihash/src/lib.rs b/components/equihash/src/lib.rs index 0000c20535..e0ddf8c5f3 100644 --- a/components/equihash/src/lib.rs +++ b/components/equihash/src/lib.rs @@ -29,5 +29,7 @@ mod test_vectors; pub use verify::{is_valid_solution, Error}; +#[cfg(feature = "solver")] mod blake2b; +#[cfg(feature = "solver")] pub mod tromp; From d7ccd07d0b7ea3718f81568551966ee2176dc219 Mon Sep 17 00:00:00 2001 From: teor Date: Thu, 11 Jan 2024 09:14:19 +1000 Subject: [PATCH 11/17] equihash: Ensure returned compressed solutions are unique --- components/equihash/src/tromp.rs | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/components/equihash/src/tromp.rs b/components/equihash/src/tromp.rs index e5f2157ff5..ecd0658278 100644 --- a/components/equihash/src/tromp.rs +++ b/components/equihash/src/tromp.rs @@ -174,10 +174,16 @@ pub fn solve_200_9_compressed( let p = Params::new(200, 9).expect("should be valid"); let solutions = solve_200_9(input, next_nonce); - solutions + let mut solutions: Vec> = solutions .iter() .map(|solution| minimal_from_indices(p, solution)) - .collect() + .collect(); + + // Just in case the solver returns solutions that become the same when compressed. + solutions.sort(); + solutions.dedup(); + + solutions } #[cfg(test)] From 989f40ee9bceef17195238917911bd6effcd8ad2 Mon Sep 17 00:00:00 2001 From: teor Date: Fri, 12 Jan 2024 07:57:42 +1000 Subject: [PATCH 12/17] equihash: Add a portable endian.h for `htole32()` on macOS and Windows Source: mikepb/endian.h@0f885cbba627efe9b8f763e1c2872e904fe0c0b1 License: Public Domain (or "BSD OR MIT OR Apache-2.0") --- components/equihash/tromp/equi_miner.c | 4 + components/equihash/tromp/portable_endian.h | 128 ++++++++++++++++++++ 2 files changed, 132 insertions(+) create mode 100644 components/equihash/tromp/portable_endian.h diff --git a/components/equihash/tromp/equi_miner.c b/components/equihash/tromp/equi_miner.c index a57bca34a2..b1b0ae9a19 100644 --- a/components/equihash/tromp/equi_miner.c +++ b/components/equihash/tromp/equi_miner.c @@ -22,6 +22,10 @@ #define ZCASH_POW_TROMP_EQUI_MINER_H #include "equi.h" + +// Provides htole32() on macOS and Windows +#include "portable_endian.h" + #include #include #include diff --git a/components/equihash/tromp/portable_endian.h b/components/equihash/tromp/portable_endian.h new file mode 100644 index 0000000000..74575fcd18 --- /dev/null +++ b/components/equihash/tromp/portable_endian.h @@ -0,0 +1,128 @@ +// +// endian.h +// +// https://gist.github.com/panzi/6856583 +// +// I, Mathias Panzenböck, place this file hereby into the public domain. Use +// it at your own risk for whatever you like. In case there are +// jurisdictions that don't support putting things in the public domain you +// can also consider it to be "dual licensed" under the BSD, MIT and Apache +// licenses, if you want to. This code is trivial anyway. Consider it an +// example on how to get the endian conversion functions on different +// platforms. + +// Downloaded from https://raw.githubusercontent.com/mikepb/endian.h/master/endian.h +// on 12 January 2024. + +#ifndef PORTABLE_ENDIAN_H__ +#define PORTABLE_ENDIAN_H__ + +#if (defined(_WIN16) || defined(_WIN32) || defined(_WIN64)) && !defined(__WINDOWS__) + +# define __WINDOWS__ + +#endif + +#if defined(__linux__) || defined(__CYGWIN__) + +# include + +#elif defined(__APPLE__) + +# include + +# define htobe16(x) OSSwapHostToBigInt16(x) +# define htole16(x) OSSwapHostToLittleInt16(x) +# define be16toh(x) OSSwapBigToHostInt16(x) +# define le16toh(x) OSSwapLittleToHostInt16(x) + +# define htobe32(x) OSSwapHostToBigInt32(x) +# define htole32(x) OSSwapHostToLittleInt32(x) +# define be32toh(x) OSSwapBigToHostInt32(x) +# define le32toh(x) OSSwapLittleToHostInt32(x) + +# define htobe64(x) OSSwapHostToBigInt64(x) +# define htole64(x) OSSwapHostToLittleInt64(x) +# define be64toh(x) OSSwapBigToHostInt64(x) +# define le64toh(x) OSSwapLittleToHostInt64(x) + +# define __BYTE_ORDER BYTE_ORDER +# define __BIG_ENDIAN BIG_ENDIAN +# define __LITTLE_ENDIAN LITTLE_ENDIAN +# define __PDP_ENDIAN PDP_ENDIAN + +#elif defined(__OpenBSD__) + +# include + +#elif defined(__NetBSD__) || defined(__FreeBSD__) || defined(__DragonFly__) + +# include + +# define be16toh(x) betoh16(x) +# define le16toh(x) letoh16(x) + +# define be32toh(x) betoh32(x) +# define le32toh(x) letoh32(x) + +# define be64toh(x) betoh64(x) +# define le64toh(x) letoh64(x) + +#elif defined(__WINDOWS__) + +# include +# include + +# if BYTE_ORDER == LITTLE_ENDIAN + +# define htobe16(x) htons(x) +# define htole16(x) (x) +# define be16toh(x) ntohs(x) +# define le16toh(x) (x) + +# define htobe32(x) htonl(x) +# define htole32(x) (x) +# define be32toh(x) ntohl(x) +# define le32toh(x) (x) + +# define htobe64(x) htonll(x) +# define htole64(x) (x) +# define be64toh(x) ntohll(x) +# define le64toh(x) (x) + +# elif BYTE_ORDER == BIG_ENDIAN + + /* that would be xbox 360 */ +# define htobe16(x) (x) +# define htole16(x) __builtin_bswap16(x) +# define be16toh(x) (x) +# define le16toh(x) __builtin_bswap16(x) + +# define htobe32(x) (x) +# define htole32(x) __builtin_bswap32(x) +# define be32toh(x) (x) +# define le32toh(x) __builtin_bswap32(x) + +# define htobe64(x) (x) +# define htole64(x) __builtin_bswap64(x) +# define be64toh(x) (x) +# define le64toh(x) __builtin_bswap64(x) + +# else + +# error byte order not supported + +# endif + +# define __BYTE_ORDER BYTE_ORDER +# define __BIG_ENDIAN BIG_ENDIAN +# define __LITTLE_ENDIAN LITTLE_ENDIAN +# define __PDP_ENDIAN PDP_ENDIAN + +#else + +# error platform not supported + +#endif + +#endif From b737d0fe26967f961abf1dcfdd8669dd8daabc15 Mon Sep 17 00:00:00 2001 From: teor Date: Fri, 12 Jan 2024 08:05:19 +1000 Subject: [PATCH 13/17] equihash: Remove unused thread support to enable Windows compilation --- components/equihash/src/tromp.rs | 2 - components/equihash/tromp/equi.h | 4 -- components/equihash/tromp/equi_miner.c | 41 +++----------- components/equihash/tromp/osx_barrier.h | 75 ------------------------- 4 files changed, 8 insertions(+), 114 deletions(-) delete mode 100644 components/equihash/tromp/osx_barrier.h diff --git a/components/equihash/src/tromp.rs b/components/equihash/src/tromp.rs index ecd0658278..af53b4a596 100644 --- a/components/equihash/src/tromp.rs +++ b/components/equihash/src/tromp.rs @@ -17,7 +17,6 @@ struct CEqui { extern "C" { #[allow(improper_ctypes)] fn equi_new( - n_threads: u32, blake2b_clone: extern "C" fn(state: *const State) -> *mut State, blake2b_free: extern "C" fn(state: *mut State), blake2b_update: extern "C" fn(state: *mut State, input: *const u8, input_len: usize), @@ -124,7 +123,6 @@ pub fn solve_200_9( #[allow(unsafe_code)] let eq = unsafe { equi_new( - 1, blake2b::blake2b_clone, blake2b::blake2b_free, blake2b::blake2b_update, diff --git a/components/equihash/tromp/equi.h b/components/equihash/tromp/equi.h index 2bf0794edf..7b3969f52f 100644 --- a/components/equihash/tromp/equi.h +++ b/components/equihash/tromp/equi.h @@ -4,10 +4,6 @@ #ifndef ZCASH_POW_TROMP_EQUI_H #define ZCASH_POW_TROMP_EQUI_H -#ifdef __APPLE__ -#include "osx_barrier.h" -#endif - #include // for type bool #include // for types uint32_t,uint64_t #include // for functions memset diff --git a/components/equihash/tromp/equi_miner.c b/components/equihash/tromp/equi_miner.c index b1b0ae9a19..d2682e6747 100644 --- a/components/equihash/tromp/equi_miner.c +++ b/components/equihash/tromp/equi_miner.c @@ -28,7 +28,6 @@ #include #include -#include #include typedef uint16_t u16; @@ -248,16 +247,13 @@ struct equi { bsizes *nslots; // PUT IN BUCKET STRUCT proof *sols; au32 nsols; - u32 nthreads; u32 xfull; u32 hfull; u32 bfull; - pthread_barrier_t barry; }; typedef struct equi equi; void equi_clearslots(equi *eq); equi *equi_new( - const u32 n_threads, blake2b_clone blake2b_clone, blake2b_free blake2b_free, blake2b_update blake2b_update, @@ -265,15 +261,11 @@ typedef struct equi equi; ) { assert(sizeof(hashunit) == 4); equi *eq = malloc(sizeof(equi)); - eq->nthreads = n_threads; eq->blake2b_clone = blake2b_clone; eq->blake2b_free = blake2b_free; eq->blake2b_update = blake2b_update; eq->blake2b_finalize = blake2b_finalize; - const int err = pthread_barrier_init(&eq->barry, NULL, eq->nthreads); - assert(!err); - alloctrees(&eq->hta); eq->nslots = (bsizes *)htalloc_alloc(&eq->hta, 2 * NBUCKETS, sizeof(au32)); eq->sols = (proof *)htalloc_alloc(&eq->hta, MAXSOLS, sizeof(proof)); @@ -409,7 +401,7 @@ typedef struct equi equi; u32 nextbo; }; typedef struct htlayout htlayout; - + htlayout htlayout_new(equi *eq, u32 r) { htlayout htl; htl.hta = eq->hta; @@ -525,7 +517,7 @@ typedef struct equi equi; BLAKE2bState* state; htlayout htl = htlayout_new(eq, 0); const u32 hashbytes = hashsize(0); - for (u32 block = id; block < NBLOCKS; block += eq->nthreads) { + for (u32 block = id; block < NBLOCKS; block++) { state = eq->blake2b_clone(eq->blake_ctx); u32 leb = htole32(block); eq->blake2b_update(state, (uchar *)&leb, sizeof(u32)); @@ -561,11 +553,11 @@ typedef struct equi equi; } } } - + void equi_digitodd(equi *eq, const u32 r, const u32 id) { htlayout htl = htlayout_new(eq, r); collisiondata cd; - for (u32 bucketid=id; bucketid < NBUCKETS; bucketid += eq->nthreads) { + for (u32 bucketid=id; bucketid < NBUCKETS; bucketid++) { collisiondata_clear(&cd); slot0 *buck = htl.hta.trees0[(r-1)/2][bucketid]; // optimize by updating previous buck?! u32 bsize = getnslots(eq, r-1, bucketid); // optimize by putting bucketsize with block?! @@ -613,11 +605,11 @@ typedef struct equi equi; } } } - + void equi_digiteven(equi *eq, const u32 r, const u32 id) { htlayout htl = htlayout_new(eq, r); collisiondata cd; - for (u32 bucketid=id; bucketid < NBUCKETS; bucketid += eq->nthreads) { + for (u32 bucketid=id; bucketid < NBUCKETS; bucketid++) { collisiondata_clear(&cd); slot1 *buck = htl.hta.trees1[(r-1)/2][bucketid]; // OPTIMIZE BY UPDATING PREVIOUS u32 bsize = getnslots(eq, r-1, bucketid); @@ -665,12 +657,12 @@ typedef struct equi equi; } } } - + void equi_digitK(equi *eq, const u32 id) { collisiondata cd; htlayout htl = htlayout_new(eq, WK); u32 nc = 0; - for (u32 bucketid = id; bucketid < NBUCKETS; bucketid += eq->nthreads) { + for (u32 bucketid = id; bucketid < NBUCKETS; bucketid++) { collisiondata_clear(&cd); slot0 *buck = htl.hta.trees0[(WK-1)/2][bucketid]; u32 bsize = getnslots(eq, WK-1, bucketid); @@ -697,40 +689,26 @@ nc++, candidate(eq, tree_from_bid(bucketid, s0, s1)); typedef struct { u32 id; - pthread_t thread; equi *eq; } thread_ctx; -void barrier(pthread_barrier_t *barry) { - const int rc = pthread_barrier_wait(barry); - if (rc != 0 && rc != PTHREAD_BARRIER_SERIAL_THREAD) { -// printf("Could not wait on barrier\n"); - pthread_exit(NULL); - } -} - void *worker(void *vp) { thread_ctx *tp = (thread_ctx *)vp; equi *eq = tp->eq; // if (tp->id == 0) // printf("Digit 0\n"); - barrier(&eq->barry); equi_digit0(eq, tp->id); - barrier(&eq->barry); if (tp->id == 0) { equi_clearslots(eq); #ifdef EQUIHASH_SHOW_BUCKET_SIZES showbsizes(eq, 0); #endif } - barrier(&eq->barry); for (u32 r = 1; r < WK; r++) { // if (tp->id == 0) // printf("Digit %d", r); - barrier(&eq->barry); r&1 ? equi_digitodd(eq, r, tp->id) : equi_digiteven(eq, r, tp->id); - barrier(&eq->barry); if (tp->id == 0) { // printf(" x%d b%d h%d\n", eq->xfull, eq->bfull, eq->hfull); equi_clearslots(eq); @@ -738,13 +716,10 @@ void *worker(void *vp) { showbsizes(eq, r); #endif } - barrier(&eq->barry); } // if (tp->id == 0) // printf("Digit %d\n", WK); equi_digitK(eq, tp->id); - barrier(&eq->barry); - pthread_exit(NULL); return 0; } diff --git a/components/equihash/tromp/osx_barrier.h b/components/equihash/tromp/osx_barrier.h deleted file mode 100644 index 659c40bf59..0000000000 --- a/components/equihash/tromp/osx_barrier.h +++ /dev/null @@ -1,75 +0,0 @@ -#ifndef ZCASH_POW_TROMP_OSX_BARRIER_H -#define ZCASH_POW_TROMP_OSX_BARRIER_H - -#ifdef __APPLE__ - -#ifndef PTHREAD_BARRIER_H_ -#define PTHREAD_BARRIER_H_ - -#include -#include - -typedef int pthread_barrierattr_t; -#define PTHREAD_BARRIER_SERIAL_THREAD 1 - -typedef struct -{ - pthread_mutex_t mutex; - pthread_cond_t cond; - int count; - int tripCount; -} pthread_barrier_t; - - -int pthread_barrier_init(pthread_barrier_t *barrier, const pthread_barrierattr_t *attr, unsigned int count) -{ - if(count == 0) - { - errno = EINVAL; - return -1; - } - if(pthread_mutex_init(&barrier->mutex, 0) < 0) - { - return -1; - } - if(pthread_cond_init(&barrier->cond, 0) < 0) - { - pthread_mutex_destroy(&barrier->mutex); - return -1; - } - barrier->tripCount = count; - barrier->count = 0; - - return 0; -} - -int pthread_barrier_destroy(pthread_barrier_t *barrier) -{ - pthread_cond_destroy(&barrier->cond); - pthread_mutex_destroy(&barrier->mutex); - return 0; -} - -int pthread_barrier_wait(pthread_barrier_t *barrier) -{ - pthread_mutex_lock(&barrier->mutex); - ++(barrier->count); - if(barrier->count >= barrier->tripCount) - { - barrier->count = 0; - pthread_cond_broadcast(&barrier->cond); - pthread_mutex_unlock(&barrier->mutex); - return PTHREAD_BARRIER_SERIAL_THREAD; - } - else - { - pthread_cond_wait(&barrier->cond, &(barrier->mutex)); - pthread_mutex_unlock(&barrier->mutex); - return 0; - } -} - -#endif // PTHREAD_BARRIER_H_ -#endif // __APPLE__ - -#endif // ZCASH_POW_TROMP_OSX_BARRIER_H From 2bd7bc8f8e201bb19b6dc7d9b839b9d074f556d8 Mon Sep 17 00:00:00 2001 From: teor Date: Fri, 12 Jan 2024 08:22:48 +1000 Subject: [PATCH 14/17] equihash: Don't import a header that's missing in Windows CI --- components/equihash/tromp/portable_endian.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/components/equihash/tromp/portable_endian.h b/components/equihash/tromp/portable_endian.h index 74575fcd18..4a71ce7a7a 100644 --- a/components/equihash/tromp/portable_endian.h +++ b/components/equihash/tromp/portable_endian.h @@ -71,7 +71,9 @@ #elif defined(__WINDOWS__) # include -# include + +// Not available in librustzcash CI +//# include # if BYTE_ORDER == LITTLE_ENDIAN From 9391e65c2167032bbe9a6502c2be8c5b6891761b Mon Sep 17 00:00:00 2001 From: teor Date: Fri, 12 Jan 2024 11:12:25 +1000 Subject: [PATCH 15/17] equihash: Clear slots when setting the hash state The equivalent change is made to the C worker, which is unused. --- components/equihash/tromp/equi_miner.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/components/equihash/tromp/equi_miner.c b/components/equihash/tromp/equi_miner.c index d2682e6747..17dfb89373 100644 --- a/components/equihash/tromp/equi_miner.c +++ b/components/equihash/tromp/equi_miner.c @@ -301,6 +301,7 @@ typedef struct equi equi; eq->blake_ctx = eq->blake2b_clone(ctx); memset(eq->nslots, 0, NBUCKETS * sizeof(au32)); // only nslots[0] needs zeroing + equi_clearslots(eq); eq->nsols = 0; } void equi_clearslots(equi *eq) { @@ -698,6 +699,9 @@ void *worker(void *vp) { // if (tp->id == 0) // printf("Digit 0\n"); + if (tp->id == 0) { + equi_clearslots(eq); + } equi_digit0(eq, tp->id); if (tp->id == 0) { equi_clearslots(eq); From 76131db25a2772c5e139d066ec68975f26d988b5 Mon Sep 17 00:00:00 2001 From: teor Date: Fri, 5 Jan 2024 10:50:28 +1000 Subject: [PATCH 16/17] Add commented-out prints of solution candidates for debugging --- components/equihash/src/tromp.rs | 20 ++++++++++++++++++++ components/equihash/tromp/equi_miner.c | 11 +++++++++-- 2 files changed, 29 insertions(+), 2 deletions(-) diff --git a/components/equihash/src/tromp.rs b/components/equihash/src/tromp.rs index af53b4a596..37daa6d741 100644 --- a/components/equihash/src/tromp.rs +++ b/components/equihash/src/tromp.rs @@ -44,6 +44,7 @@ extern "C" { /// /// This function uses unsafe code for FFI into the tromp solver. #[allow(unsafe_code)] +#[allow(clippy::print_stdout)] unsafe fn worker(eq: *mut CEqui, p: Params, curr_state: &State) -> Vec> { // SAFETY: caller must supply a valid `eq` instance. // @@ -70,6 +71,7 @@ unsafe fn worker(eq: *mut CEqui, p: Params, curr_state: &State) -> Vec> let nsols = equi_nsols(eq); let sols = equi_sols(eq); let solution_len = 1 << p.k; + //println!("{nsols} solutions of length {solution_len} at {sols:?}"); // SAFETY: // - caller must supply a `p` instance that matches the hard-coded values in the C code. @@ -77,6 +79,13 @@ unsafe fn worker(eq: *mut CEqui, p: Params, curr_state: &State) -> Vec> // - this slice is a shared ref to the memory in a valid `eq` instance supplied by the caller. let solutions: &[u32] = slice::from_raw_parts(sols, nsols * solution_len); + /* + println!( + "{nsols} solutions of length {solution_len} as a slice of length {:?}", + solutions.len() + ); + */ + let mut chunks = solutions.chunks_exact(solution_len); // SAFETY: @@ -98,6 +107,17 @@ unsafe fn worker(eq: *mut CEqui, p: Params, curr_state: &State) -> Vec> solutions }; + /* + println!( + "{} solutions as cloned vectors of length {:?}", + solutions.len(), + solutions + .iter() + .map(|solution| solution.len()) + .collect::>() + ); + */ + solutions } diff --git a/components/equihash/tromp/equi_miner.c b/components/equihash/tromp/equi_miner.c index 17dfb89373..a435e5c513 100644 --- a/components/equihash/tromp/equi_miner.c +++ b/components/equihash/tromp/equi_miner.c @@ -355,8 +355,15 @@ typedef struct equi equi; listindices1(eq, WK, t, prf); // assume WK odd qsort(prf, PROOFSIZE, sizeof(u32), &compu32); for (u32 i=1; i proof[%d], actual: %d <= %d\n", + i, i-1, prf[i], prf[i-1] + ); + */ return; + } #ifdef EQUIHASH_TROMP_ATOMIC u32 soli = std::atomic_fetch_add_explicit(&eq->nsols, 1U, std::memory_order_relaxed); #else @@ -678,7 +685,7 @@ nc++, candidate(eq, tree_from_bid(bucketid, s0, s1)); } } } -//printf(" %d candidates ", nc); +//printf(" %d candidates\n", nc); } size_t equi_nsols(const equi *eq) { From 634285d2f15461cba3230d17b3bd14946d81d891 Mon Sep 17 00:00:00 2001 From: Daira-Emma Hopwood Date: Thu, 31 Oct 2024 17:35:55 +0000 Subject: [PATCH 17/17] Note in Cargo.toml that this crate is experimental Co-authored-by: Arya --- components/equihash/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/components/equihash/Cargo.toml b/components/equihash/Cargo.toml index e2a9e0b054..ea3dd29cce 100644 --- a/components/equihash/Cargo.toml +++ b/components/equihash/Cargo.toml @@ -12,7 +12,7 @@ rust-version = "1.56.1" [features] default = [] -## Builds the C++ tromp solver and Rust FFI layer. +# Experimental tromp solver support, builds the C++ tromp solver and Rust FFI layer. solver = ["dep:cc"] [dependencies]