From 45652a21a8e3504e9d589b35d6ecf7820f7cae90 Mon Sep 17 00:00:00 2001
From: Jack Grigg <jack@electriccoin.co>
Date: Thu, 4 Jan 2024 00:33:19 +0000
Subject: [PATCH 01/17] equihash: Import Tromp solver

Source: zcash/zcash@01d5576a979816c928d524967e36c859adec49b6
License: MIT
---
 components/equihash/tromp/equi.h        | 105 ++++
 components/equihash/tromp/equi_miner.h  | 651 ++++++++++++++++++++++++
 components/equihash/tromp/osx_barrier.h |  75 +++
 3 files changed, 831 insertions(+)
 create mode 100644 components/equihash/tromp/equi.h
 create mode 100644 components/equihash/tromp/equi_miner.h
 create mode 100644 components/equihash/tromp/osx_barrier.h

diff --git a/components/equihash/tromp/equi.h b/components/equihash/tromp/equi.h
new file mode 100644
index 0000000000..90beb785be
--- /dev/null
+++ b/components/equihash/tromp/equi.h
@@ -0,0 +1,105 @@
+// Equihash solver
+// Copyright (c) 2016-2016 John Tromp, The Zcash developers
+
+#ifndef ZCASH_POW_TROMP_EQUI_H
+#define ZCASH_POW_TROMP_EQUI_H
+
+#ifdef __APPLE__
+#include "pow/tromp/osx_barrier.h"
+#endif
+#include "compat/endian.h"
+
+#include <stdint.h> // for types uint32_t,uint64_t
+#include <string.h> // for functions memset
+#include <stdlib.h> // for function qsort
+
+#include <rust/blake2b.h>
+
+typedef uint32_t u32;
+typedef unsigned char uchar;
+
+// algorithm parameters, prefixed with W to reduce include file conflicts
+
+#ifndef WN
+#define WN	200
+#endif
+
+#ifndef WK
+#define WK	9
+#endif
+
+#define NDIGITS		(WK+1)
+#define DIGITBITS	(WN/(NDIGITS))
+
+static const u32 PROOFSIZE = 1<<WK;
+static const u32 BASE = 1<<DIGITBITS;
+static const u32 NHASHES = 2*BASE;
+static const u32 HASHESPERBLAKE = 512/WN;
+static const u32 HASHOUT = HASHESPERBLAKE*WN/8;
+
+typedef u32 proof[PROOFSIZE];
+
+
+enum verify_code { POW_OK, POW_DUPLICATE, POW_OUT_OF_ORDER, POW_NONZERO_XOR };
+const char *errstr[] = { "OK", "duplicate index", "indices out of order", "nonzero xor" };
+
+void genhash(const BLAKE2bState *ctx, u32 idx, uchar *hash) {
+  auto state = blake2b_clone(ctx);
+  u32 leb = htole32(idx / HASHESPERBLAKE);
+  blake2b_update(state, (uchar *)&leb, sizeof(u32));
+  uchar blakehash[HASHOUT];
+  blake2b_finalize(state, blakehash, HASHOUT);
+  blake2b_free(state);
+  memcpy(hash, blakehash + (idx % HASHESPERBLAKE) * WN/8, WN/8);
+}
+
+int verifyrec(const BLAKE2bState *ctx, u32 *indices, uchar *hash, int r) {
+  if (r == 0) {
+    genhash(ctx, *indices, hash);
+    return POW_OK;
+  }
+  u32 *indices1 = indices + (1 << (r-1));
+  if (*indices >= *indices1)
+    return POW_OUT_OF_ORDER;
+  uchar hash0[WN/8], hash1[WN/8];
+  int vrf0 = verifyrec(ctx, indices,  hash0, r-1);
+  if (vrf0 != POW_OK)
+    return vrf0;
+  int vrf1 = verifyrec(ctx, indices1, hash1, r-1);
+  if (vrf1 != POW_OK)
+    return vrf1;
+  for (int i=0; i < WN/8; i++)
+    hash[i] = hash0[i] ^ hash1[i];
+  int i, b = r * DIGITBITS;
+  for (i = 0; i < b/8; i++)
+    if (hash[i])
+      return POW_NONZERO_XOR;
+  if ((b%8) && hash[i] >> (8-(b%8)))
+    return POW_NONZERO_XOR;
+  return POW_OK;
+}
+
+int compu32(const void *pa, const void *pb) {
+  u32 a = *(u32 *)pa, b = *(u32 *)pb;
+  return a<b ? -1 : a==b ? 0 : +1;
+}
+
+bool duped(proof prf) {
+  proof sortprf;
+  memcpy(sortprf, prf, sizeof(proof));
+  qsort(sortprf, PROOFSIZE, sizeof(u32), &compu32);
+  for (u32 i=1; i<PROOFSIZE; i++)
+    if (sortprf[i] <= sortprf[i-1])
+      return true;
+  return false;
+}
+
+// verify Wagner conditions
+int verify(u32 indices[PROOFSIZE], const BLAKE2bState *ctx) {
+  if (duped(indices))
+    return POW_DUPLICATE;
+  uchar hash[WN/8];
+  return verifyrec(ctx, indices, hash, WK);
+}
+
+#endif // ZCASH_POW_TROMP_EQUI_H
diff --git a/components/equihash/tromp/equi_miner.h b/components/equihash/tromp/equi_miner.h
new file mode 100644
index 0000000000..5efbbaa693
--- /dev/null
+++ b/components/equihash/tromp/equi_miner.h
@@ -0,0 +1,651 @@
+// Equihash solver
+// Copyright (c) 2016 John Tromp, The Zcash developers
+
+// Fix N, K, such that n = N/(k+1) is integer
+// Fix M = 2^{n+1} hashes each of length N bits,
+// H_0, ... , H_{M-1}, generated from (n+1)-bit indices.
+// Problem: find binary tree on 2^K distinct indices,
+// for which the exclusive-or of leaf hashes is all 0s.
+// Additionally, it should satisfy the Wagner conditions:
+// for each height i subtree, the exclusive-or
+// of its 2^i corresponding hashes starts with i*n 0 bits,
+// and for i>0 the leftmost leaf of its left subtree
+// is less than the leftmost leaf of its right subtree
+
+// The algorithm below solves this by maintaining the trees
+// in a graph of K layers, each split into buckets
+// with buckets indexed by the first n-RESTBITS bits following
+// the i*n 0s, each bucket having 4 * 2^RESTBITS slots,
+// twice the number of subtrees expected to land there.
+
+#ifndef ZCASH_POW_TROMP_EQUI_MINER_H
+#define ZCASH_POW_TROMP_EQUI_MINER_H
+
+#include "pow/tromp/equi.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <pthread.h>
+#include <assert.h>
+
+typedef uint16_t u16;
+typedef uint64_t u64;
+
+#ifdef EQUIHASH_TROMP_ATOMIC
+#include <atomic>
+typedef std::atomic<u32> au32;
+#else
+typedef u32 au32;
+#endif
+
+#ifndef RESTBITS
+#define RESTBITS	8
+#endif
+
+// 2_log of number of buckets
+#define BUCKBITS (DIGITBITS-RESTBITS)
+
+#ifndef SAVEMEM
+#if RESTBITS == 4
+// can't save memory in such small buckets
+#define SAVEMEM 1
+#elif RESTBITS >= 8
+// take advantage of law of large numbers (sum of 2^8 random numbers)
+// this reduces (200,9) memory to under 144MB, with negligible discarding
+#define SAVEMEM 9/14
+#endif
+#endif
+
+// number of buckets
+static const u32 NBUCKETS = 1<<BUCKBITS;
+// 2_log of number of slots per bucket
+static const u32 SLOTBITS = RESTBITS+1+1;
+static const u32 SLOTRANGE = 1<<SLOTBITS;
+static const u32 SLOTMSB = 1<<(SLOTBITS-1);
+// number of slots per bucket
+static const u32 NSLOTS = SLOTRANGE * SAVEMEM;
+// number of per-xhash slots
+static const u32 XFULL = 16;
+// SLOTBITS mask
+static const u32 SLOTMASK = SLOTRANGE-1;
+// number of possible values of xhash (rest of n) bits
+static const u32 NRESTS = 1<<RESTBITS;
+// number of blocks of hashes extracted from single 512 bit blake2b output
+static const u32 NBLOCKS = (NHASHES+HASHESPERBLAKE-1)/HASHESPERBLAKE;
+// nothing larger found in 100000 runs
+static const u32 MAXSOLS = 8;
+
+// tree node identifying its children as two different slots in
+// a bucket on previous layer with the same rest bits (x-tra hash)
+struct tree {
+  u32 bid_s0_s1; // manual bitfields
+
+  tree(const u32 idx) {
+    bid_s0_s1 = idx;
+  }
+  tree(const u32 bid, const u32 s0, const u32 s1) {
+#ifdef SLOTDIFF
+    u32 ds10 = (s1 - s0) & SLOTMASK;
+    if (ds10 & SLOTMSB) {
+      bid_s0_s1 = (((bid << SLOTBITS) | s1) << (SLOTBITS-1)) | (SLOTMASK & ~ds10);
+    } else {
+      bid_s0_s1 = (((bid << SLOTBITS) | s0) << (SLOTBITS-1)) | (ds10 - 1);
+    }
+#else
+    bid_s0_s1 = (((bid << SLOTBITS) | s0) << SLOTBITS) | s1;
+#endif
+  }
+  u32 getindex() const {
+    return bid_s0_s1;
+  }
+  u32 bucketid() const {
+#ifdef SLOTDIFF
+    return bid_s0_s1 >> (2 * SLOTBITS - 1);
+#else
+    return bid_s0_s1 >> (2 * SLOTBITS);
+#endif
+  }
+  u32 slotid0() const {
+#ifdef SLOTDIFF
+    return (bid_s0_s1 >> (SLOTBITS-1)) & SLOTMASK;
+#else
+    return (bid_s0_s1 >> SLOTBITS) & SLOTMASK;
+#endif
+  }
+  u32 slotid1() const {
+#ifdef SLOTDIFF
+    return (slotid0() + 1 + (bid_s0_s1 & (SLOTMASK>>1))) & SLOTMASK;
+#else
+    return bid_s0_s1 & SLOTMASK;
+#endif
+  }
+};
+
+union hashunit {
+  u32 word;
+  uchar bytes[sizeof(u32)];
+};
+
+#define WORDS(bits)	((bits + 31) / 32)
+#define HASHWORDS0 WORDS(WN - DIGITBITS + RESTBITS)
+#define HASHWORDS1 WORDS(WN - 2*DIGITBITS + RESTBITS)
+
+struct slot0 {
+  tree attr;
+  hashunit hash[HASHWORDS0];
+};
+
+struct slot1 {
+  tree attr;
+  hashunit hash[HASHWORDS1];
+};
+
+// a bucket is NSLOTS treenodes
+typedef slot0 bucket0[NSLOTS];
+typedef slot1 bucket1[NSLOTS];
+// the N-bit hash consists of K+1 n-bit "digits"
+// each of which corresponds to a layer of NBUCKETS buckets
+typedef bucket0 digit0[NBUCKETS];
+typedef bucket1 digit1[NBUCKETS];
+
+// size (in bytes) of hash in round 0 <= r < WK
+u32 hashsize(const u32 r) {
+  const u32 hashbits = WN - (r+1) * DIGITBITS + RESTBITS;
+  return (hashbits + 7) / 8;
+}
+
+u32 hashwords(u32 bytes) {
+  return (bytes + 3) / 4;
+}
+
+// manages hash and tree data
+struct htalloc {
+  u32 *heap0;
+  u32 *heap1;
+  bucket0 *trees0[(WK+1)/2];
+  bucket1 *trees1[WK/2];
+  u32 alloced;
+  htalloc() {
+    alloced = 0;
+  }
+  void alloctrees() {
+// optimize xenoncat's fixed memory layout, avoiding any waste
+// digit  trees  hashes  trees hashes
+// 0      0 A A A A A A   . . . . . .
+// 1      0 A A A A A A   1 B B B B B
+// 2      0 2 C C C C C   1 B B B B B
+// 3      0 2 C C C C C   1 3 D D D D
+// 4      0 2 4 E E E E   1 3 D D D D
+// 5      0 2 4 E E E E   1 3 5 F F F
+// 6      0 2 4 6 . G G   1 3 5 F F F
+// 7      0 2 4 6 . G G   1 3 5 7 H H
+// 8      0 2 4 6 8 . I   1 3 5 7 H H
+    assert(DIGITBITS >= 16); // ensures hashes shorten by 1 unit every 2 digits
+    heap0 = (u32 *)alloc(1, sizeof(digit0));
+    heap1 = (u32 *)alloc(1, sizeof(digit1));
+    for (int r=0; r<WK; r++)
+      if ((r&1) == 0)
+        trees0[r/2]  = (bucket0 *)(heap0 + r/2);
+      else
+        trees1[r/2]  = (bucket1 *)(heap1 + r/2);
+  }
+  void dealloctrees() {
+    free(heap0);
+    free(heap1);
+  }
+  void *alloc(const u32 n, const u32 sz) {
+    void *mem  = calloc(n, sz);
+    assert(mem);
+    alloced += n * sz;
+    return mem;
+  }
+};
+
+typedef au32 bsizes[NBUCKETS];
+
+u32 min(const u32 a, const u32 b) {
+  return a < b ? a : b;
+}
+
+struct equi {
+  BLAKE2bState* blake_ctx;
+  htalloc hta;
+  bsizes *nslots; // PUT IN BUCKET STRUCT
+  proof *sols;
+  au32 nsols;
+  u32 nthreads;
+  u32 xfull;
+  u32 hfull;
+  u32 bfull;
+  pthread_barrier_t barry;
+  equi(const u32 n_threads) {
+    assert(sizeof(hashunit) == 4);
+    nthreads = n_threads;
+    const int err = pthread_barrier_init(&barry, NULL, nthreads);
+    assert(!err);
+    hta.alloctrees();
+    nslots = (bsizes *)hta.alloc(2 * NBUCKETS, sizeof(au32));
+    sols   =  (proof *)hta.alloc(MAXSOLS, sizeof(proof));
+  }
+  ~equi() {
+    hta.dealloctrees();
+    free(nslots);
+    free(sols);
+    blake2b_free(blake_ctx);
+  }
+  void setstate(const BLAKE2bState *ctx) {
+    blake_ctx = blake2b_clone(ctx);
+    memset(nslots, 0, NBUCKETS * sizeof(au32)); // only nslots[0] needs zeroing
+    nsols = 0;
+  }
+  u32 getslot(const u32 r, const u32 bucketi) {
+#ifdef EQUIHASH_TROMP_ATOMIC
+    return std::atomic_fetch_add_explicit(&nslots[r&1][bucketi], 1U, std::memory_order_relaxed);
+#else
+    return nslots[r&1][bucketi]++;
+#endif
+  }
+  u32 getnslots(const u32 r, const u32 bid) { // SHOULD BE METHOD IN BUCKET STRUCT
+    au32 &nslot = nslots[r&1][bid];
+    const u32 n = min(nslot, NSLOTS);
+    nslot = 0;
+    return n;
+  }
+  void orderindices(u32 *indices, u32 size) {
+    if (indices[0] > indices[size]) {
+      for (u32 i=0; i < size; i++) {
+        const u32 tmp = indices[i];
+        indices[i] = indices[size+i];
+        indices[size+i] = tmp;
+      }
+    }
+  }
+  void listindices0(u32 r, const tree t, u32 *indices) {
+    if (r == 0) {
+      *indices = t.getindex();
+      return;
+    }
+    const bucket1 &buck = hta.trees1[--r/2][t.bucketid()];
+    const u32 size = 1 << r;
+    u32 *indices1 = indices + size;
+    listindices1(r, buck[t.slotid0()].attr, indices);
+    listindices1(r, buck[t.slotid1()].attr, indices1);
+    orderindices(indices, size);
+  }
+  void listindices1(u32 r, const tree t, u32 *indices) {
+    const bucket0 &buck = hta.trees0[--r/2][t.bucketid()];
+    const u32 size = 1 << r;
+    u32 *indices1 = indices + size;
+    listindices0(r, buck[t.slotid0()].attr, indices);
+    listindices0(r, buck[t.slotid1()].attr, indices1);
+    orderindices(indices, size);
+  }
+  void candidate(const tree t) {
+    proof prf;
+    listindices1(WK, t, prf); // assume WK odd
+    qsort(prf, PROOFSIZE, sizeof(u32), &compu32);
+    for (u32 i=1; i<PROOFSIZE; i++)
+      if (prf[i] <= prf[i-1])
+        return;
+#ifdef EQUIHASH_TROMP_ATOMIC
+    u32 soli = std::atomic_fetch_add_explicit(&nsols, 1U, std::memory_order_relaxed);
+#else
+    u32 soli = nsols++;
+#endif
+    if (soli < MAXSOLS)
+      listindices1(WK, t, sols[soli]); // assume WK odd
+  }
+  void showbsizes(u32 r) {
+#if defined(HIST) || defined(SPARK) || defined(LOGSPARK)
+    u32 binsizes[65];
+    memset(binsizes, 0, 65 * sizeof(u32));
+    for (u32 bucketid = 0; bucketid < NBUCKETS; bucketid++) {
+      u32 bsize = min(nslots[r&1][bucketid], NSLOTS) >> (SLOTBITS-6);
+      binsizes[bsize]++;
+    }
+    for (u32 i=0; i < 65; i++) {
+#ifdef HIST
+//      printf(" %d:%d", i, binsizes[i]);
+#else
+#ifdef SPARK
+      u32 sparks = binsizes[i] / SPARKSCALE;
+#else
+      u32 sparks = 0;
+      for (u32 bs = binsizes[i]; bs; bs >>= 1) sparks++;
+      sparks = sparks * 7 / SPARKSCALE;
+#endif
+//      printf("\342\226%c", '\201' + sparks);
+#endif
+    }
+//    printf("\n");
+#endif
+  }
+
+  struct htlayout {
+    htalloc hta;
+    u32 prevhashunits;
+    u32 nexthashunits;
+    u32 dunits;
+    u32 prevbo;
+    u32 nextbo;
+  
+    htlayout(equi *eq, u32 r): hta(eq->hta), prevhashunits(0), dunits(0) {
+      u32 nexthashbytes = hashsize(r);
+      nexthashunits = hashwords(nexthashbytes);
+      prevbo = 0;
+      nextbo = nexthashunits * sizeof(hashunit) - nexthashbytes; // 0-3
+      if (r) {
+        u32 prevhashbytes = hashsize(r-1);
+        prevhashunits = hashwords(prevhashbytes);
+        prevbo = prevhashunits * sizeof(hashunit) - prevhashbytes; // 0-3
+        dunits = prevhashunits - nexthashunits;
+      }
+    }
+    u32 getxhash0(const slot0* pslot) const {
+#if WN == 200 && RESTBITS == 4
+      return pslot->hash->bytes[prevbo] >> 4;
+#elif WN == 200 && RESTBITS == 8
+      return (pslot->hash->bytes[prevbo] & 0xf) << 4 | pslot->hash->bytes[prevbo+1] >> 4;
+#elif WN == 200 && RESTBITS == 9
+      return (pslot->hash->bytes[prevbo] & 0x1f) << 4 | pslot->hash->bytes[prevbo+1] >> 4;
+#elif WN == 144 && RESTBITS == 4
+      return pslot->hash->bytes[prevbo] & 0xf;
+#else
+#error non implemented
+#endif
+    }
+    u32 getxhash1(const slot1* pslot) const {
+#if WN == 200 && RESTBITS == 4
+      return pslot->hash->bytes[prevbo] & 0xf;
+#elif WN == 200 && RESTBITS == 8
+      return pslot->hash->bytes[prevbo];
+#elif WN == 200 && RESTBITS == 9
+      return (pslot->hash->bytes[prevbo]&1) << 8 | pslot->hash->bytes[prevbo+1];
+#elif WN == 144 && RESTBITS == 4
+      return pslot->hash->bytes[prevbo] & 0xf;
+#else
+#error non implemented
+#endif
+    }
+    bool equal(const hashunit *hash0, const hashunit *hash1) const {
+      return hash0[prevhashunits-1].word == hash1[prevhashunits-1].word;
+    }
+  };
+
+  struct collisiondata {
+#ifdef XBITMAP
+#if NSLOTS > 64
+#error can't use XBITMAP with more than 64 slots
+#endif
+    u64 xhashmap[NRESTS];
+    u64 xmap;
+#else
+#if RESTBITS <= 6
+    typedef uchar xslot;
+#else
+    typedef u16 xslot;
+#endif
+    xslot nxhashslots[NRESTS];
+    xslot xhashslots[NRESTS][XFULL];
+    xslot *xx;
+    u32 n0;
+    u32 n1;
+#endif
+    u32 s0;
+
+    void clear() {
+#ifdef XBITMAP
+      memset(xhashmap, 0, NRESTS * sizeof(u64));
+#else
+      memset(nxhashslots, 0, NRESTS * sizeof(xslot));
+#endif
+    }
+    bool addslot(u32 s1, u32 xh) {
+#ifdef XBITMAP
+      xmap = xhashmap[xh];
+      xhashmap[xh] |= (u64)1 << s1;
+      s0 = -1;
+      return true;
+#else
+      n1 = (u32)nxhashslots[xh]++;
+      if (n1 >= XFULL)
+        return false;
+      xx = xhashslots[xh];
+      xx[n1] = s1;
+      n0 = 0;
+      return true;
+#endif
+    }
+    bool nextcollision() const {
+#ifdef XBITMAP
+      return xmap != 0;
+#else
+      return n0 < n1;
+#endif
+    }
+    u32 slot() {
+#ifdef XBITMAP
+      const u32 ffs = __builtin_ffsll(xmap);
+      s0 += ffs; xmap >>= ffs;
+      return s0;
+#else
+      return (u32)xx[n0++];
+#endif
+    }
+  };
+
+  void digit0(const u32 id) {
+    uchar hash[HASHOUT];
+    BLAKE2bState* state;
+    htlayout htl(this, 0);
+    const u32 hashbytes = hashsize(0);
+    for (u32 block = id; block < NBLOCKS; block += nthreads) {
+      state = blake2b_clone(blake_ctx);
+      u32 leb = htole32(block);
+      blake2b_update(state, (uchar *)&leb, sizeof(u32));
+      blake2b_finalize(state, hash, HASHOUT);
+      blake2b_free(state);
+      for (u32 i = 0; i<HASHESPERBLAKE; i++) {
+        const uchar *ph = hash + i * WN/8;
+#if BUCKBITS == 16 && RESTBITS == 4
+        const u32 bucketid = ((u32)ph[0] << 8) | ph[1];
+#elif BUCKBITS == 12 && RESTBITS == 8
+        const u32 bucketid = ((u32)ph[0] << 4) | ph[1] >> 4;
+#elif BUCKBITS == 11 && RESTBITS == 9
+        const u32 bucketid = ((u32)ph[0] << 3) | ph[1] >> 5;
+#elif BUCKBITS == 20 && RESTBITS == 4
+        const u32 bucketid = ((((u32)ph[0] << 8) | ph[1]) << 4) | ph[2] >> 4;
+#elif BUCKBITS == 12 && RESTBITS == 4
+        const u32 bucketid = ((u32)ph[0] << 4) | ph[1] >> 4;
+        const u32 xhash = ph[1] & 0xf;
+#else
+#error not implemented
+#endif
+        const u32 slot = getslot(0, bucketid);
+        if (slot >= NSLOTS) {
+          bfull++;
+          continue;
+        }
+        slot0 &s = hta.trees0[0][bucketid][slot];
+        s.attr = tree(block * HASHESPERBLAKE + i);
+        memcpy(s.hash->bytes+htl.nextbo, ph+WN/8-hashbytes, hashbytes);
+      }
+    }
+  }
+  
+  void digitodd(const u32 r, const u32 id) {
+    htlayout htl(this, r);
+    collisiondata cd;
+    for (u32 bucketid=id; bucketid < NBUCKETS; bucketid += nthreads) {
+      cd.clear();
+      slot0 *buck = htl.hta.trees0[(r-1)/2][bucketid]; // optimize by updating previous buck?!
+      u32 bsize = getnslots(r-1, bucketid);       // optimize by putting bucketsize with block?!
+      for (u32 s1 = 0; s1 < bsize; s1++) {
+        const slot0 *pslot1 = buck + s1;          // optimize by updating previous pslot1?!
+        if (!cd.addslot(s1, htl.getxhash0(pslot1))) {
+          xfull++;
+          continue;
+        }
+        for (; cd.nextcollision(); ) {
+          const u32 s0 = cd.slot();
+          const slot0 *pslot0 = buck + s0;
+          if (htl.equal(pslot0->hash, pslot1->hash)) {
+            hfull++;
+            continue;
+          }
+          u32 xorbucketid;
+          const uchar *bytes0 = pslot0->hash->bytes, *bytes1 = pslot1->hash->bytes;
+#if WN == 200 && BUCKBITS == 12 && RESTBITS == 8
+          xorbucketid = (((u32)(bytes0[htl.prevbo+1] ^ bytes1[htl.prevbo+1]) & 0xf) << 8)
+                             | (bytes0[htl.prevbo+2] ^ bytes1[htl.prevbo+2]);
+#elif WN == 200 && BUCKBITS == 11 && RESTBITS == 9
+          xorbucketid = (((u32)(bytes0[htl.prevbo+1] ^ bytes1[htl.prevbo+1]) & 0xf) << 7)
+                             | (bytes0[htl.prevbo+2] ^ bytes1[htl.prevbo+2]) >> 1;
+#elif WN == 144 && BUCKBITS == 20 && RESTBITS == 4
+          xorbucketid = ((((u32)(bytes0[htl.prevbo+1] ^ bytes1[htl.prevbo+1]) << 8)
+                              | (bytes0[htl.prevbo+2] ^ bytes1[htl.prevbo+2])) << 4)
+                              | (bytes0[htl.prevbo+3] ^ bytes1[htl.prevbo+3]) >> 4;
+#elif WN == 96 && BUCKBITS == 12 && RESTBITS == 4
+          xorbucketid = ((u32)(bytes0[htl.prevbo+1] ^ bytes1[htl.prevbo+1]) << 4)
+                            | (bytes0[htl.prevbo+2] ^ bytes1[htl.prevbo+2]) >> 4;
+#else
+#error not implemented
+#endif
+          const u32 xorslot = getslot(r, xorbucketid);
+          if (xorslot >= NSLOTS) {
+            bfull++;
+            continue;
+          }
+          slot1 &xs = htl.hta.trees1[r/2][xorbucketid][xorslot];
+          xs.attr = tree(bucketid, s0, s1);
+          for (u32 i=htl.dunits; i < htl.prevhashunits; i++)
+            xs.hash[i-htl.dunits].word = pslot0->hash[i].word ^ pslot1->hash[i].word;
+        }
+      }
+    }
+  }
+  
+  void digiteven(const u32 r, const u32 id) {
+    htlayout htl(this, r);
+    collisiondata cd;
+    for (u32 bucketid=id; bucketid < NBUCKETS; bucketid += nthreads) {
+      cd.clear();
+      slot1 *buck = htl.hta.trees1[(r-1)/2][bucketid]; // OPTIMIZE BY UPDATING PREVIOUS
+      u32 bsize = getnslots(r-1, bucketid);
+      for (u32 s1 = 0; s1 < bsize; s1++) {
+        const slot1 *pslot1 = buck + s1;          // OPTIMIZE BY UPDATING PREVIOUS
+        if (!cd.addslot(s1, htl.getxhash1(pslot1))) {
+          xfull++;
+          continue;
+        }
+        for (; cd.nextcollision(); ) {
+          const u32 s0 = cd.slot();
+          const slot1 *pslot0 = buck + s0;
+          if (htl.equal(pslot0->hash, pslot1->hash)) {
+            hfull++;
+            continue;
+          }
+          u32 xorbucketid;
+          const uchar *bytes0 = pslot0->hash->bytes, *bytes1 = pslot1->hash->bytes;
+#if WN == 200 && BUCKBITS == 12 && RESTBITS == 8
+          xorbucketid = ((u32)(bytes0[htl.prevbo+1] ^ bytes1[htl.prevbo+1]) << 4)
+                            | (bytes0[htl.prevbo+2] ^ bytes1[htl.prevbo+2]) >> 4;
+#elif WN == 200 && BUCKBITS == 11 && RESTBITS == 9
+          xorbucketid = ((u32)(bytes0[htl.prevbo+2] ^ bytes1[htl.prevbo+2]) << 3)
+                            | (bytes0[htl.prevbo+3] ^ bytes1[htl.prevbo+3]) >> 5;
+#elif WN == 144 && BUCKBITS == 20 && RESTBITS == 4
+          xorbucketid = ((((u32)(bytes0[htl.prevbo+1] ^ bytes1[htl.prevbo+1]) << 8)
+                              | (bytes0[htl.prevbo+2] ^ bytes1[htl.prevbo+2])) << 4)
+                              | (bytes0[htl.prevbo+3] ^ bytes1[htl.prevbo+3]) >> 4;
+#elif WN == 96 && BUCKBITS == 12 && RESTBITS == 4
+          xorbucketid = ((u32)(bytes0[htl.prevbo+1] ^ bytes1[htl.prevbo+1]) << 4)
+                            | (bytes0[htl.prevbo+2] ^ bytes1[htl.prevbo+2]) >> 4;
+#else
+#error not implemented
+#endif
+          const u32 xorslot = getslot(r, xorbucketid);
+          if (xorslot >= NSLOTS) {
+            bfull++;
+            continue;
+          }
+          slot0 &xs = htl.hta.trees0[r/2][xorbucketid][xorslot];
+          xs.attr = tree(bucketid, s0, s1);
+          for (u32 i=htl.dunits; i < htl.prevhashunits; i++)
+            xs.hash[i-htl.dunits].word = pslot0->hash[i].word ^ pslot1->hash[i].word;
+        }
+      }
+    }
+  }
+  
+  void digitK(const u32 id) {
+    collisiondata cd;
+    htlayout htl(this, WK);
+u32 nc = 0;
+    for (u32 bucketid = id; bucketid < NBUCKETS; bucketid += nthreads) {
+      cd.clear();
+      slot0 *buck = htl.hta.trees0[(WK-1)/2][bucketid];
+      u32 bsize = getnslots(WK-1, bucketid);
+      for (u32 s1 = 0; s1 < bsize; s1++) {
+        const slot0 *pslot1 = buck + s1;
+        if (!cd.addslot(s1, htl.getxhash0(pslot1))) // assume WK odd
+          continue;
+        for (; cd.nextcollision(); ) {
+          const u32 s0 = cd.slot();
+          if (htl.equal(buck[s0].hash, pslot1->hash))
+nc++,       candidate(tree(bucketid, s0, s1));
+        }
+      }
+    }
+//printf(" %d candidates ", nc);
+  }
+};
+
+typedef struct {
+  u32 id;
+  pthread_t thread;
+  equi *eq;
+} thread_ctx;
+
+void barrier(pthread_barrier_t *barry) {
+  const int rc = pthread_barrier_wait(barry);
+  if (rc != 0 && rc != PTHREAD_BARRIER_SERIAL_THREAD) {
+//    printf("Could not wait on barrier\n");
+    pthread_exit(NULL);
+  }
+}
+
+void *worker(void *vp) {
+  thread_ctx *tp = (thread_ctx *)vp;
+  equi *eq = tp->eq;
+
+  if (tp->id == 0)
+//    printf("Digit 0\n");
+  barrier(&eq->barry);
+  eq->digit0(tp->id);
+  barrier(&eq->barry);
+  if (tp->id == 0) {
+    eq->xfull = eq->bfull = eq->hfull = 0;
+    eq->showbsizes(0);
+  }
+  barrier(&eq->barry);
+  for (u32 r = 1; r < WK; r++) {
+    if (tp->id == 0)
+//      printf("Digit %d", r);
+    barrier(&eq->barry);
+    r&1 ? eq->digitodd(r, tp->id) : eq->digiteven(r, tp->id);
+    barrier(&eq->barry);
+    if (tp->id == 0) {
+//      printf(" x%d b%d h%d\n", eq->xfull, eq->bfull, eq->hfull);
+      eq->xfull = eq->bfull = eq->hfull = 0;
+      eq->showbsizes(r);
+    }
+    barrier(&eq->barry);
+  }
+  if (tp->id == 0)
+//    printf("Digit %d\n", WK);
+  eq->digitK(tp->id);
+  barrier(&eq->barry);
+  pthread_exit(NULL);
+  return 0;
+}
+
+#endif // ZCASH_POW_TROMP_EQUI_MINER_H
diff --git a/components/equihash/tromp/osx_barrier.h b/components/equihash/tromp/osx_barrier.h
new file mode 100644
index 0000000000..659c40bf59
--- /dev/null
+++ b/components/equihash/tromp/osx_barrier.h
@@ -0,0 +1,75 @@
+#ifndef ZCASH_POW_TROMP_OSX_BARRIER_H
+#define ZCASH_POW_TROMP_OSX_BARRIER_H
+
+#ifdef __APPLE__
+
+#ifndef PTHREAD_BARRIER_H_
+#define PTHREAD_BARRIER_H_
+
+#include <pthread.h>
+#include <errno.h>
+
+typedef int pthread_barrierattr_t;
+#define PTHREAD_BARRIER_SERIAL_THREAD 1
+
+typedef struct
+{
+    pthread_mutex_t mutex;
+    pthread_cond_t cond;
+    int count;
+    int tripCount;
+} pthread_barrier_t;
+
+
+int pthread_barrier_init(pthread_barrier_t *barrier, const pthread_barrierattr_t *attr, unsigned int count)
+{
+    if(count == 0)
+    {
+        errno = EINVAL;
+        return -1;
+    }
+    if(pthread_mutex_init(&barrier->mutex, 0) < 0)
+    {
+        return -1;
+    }
+    if(pthread_cond_init(&barrier->cond, 0) < 0)
+    {
+        pthread_mutex_destroy(&barrier->mutex);
+        return -1;
+    }
+    barrier->tripCount = count;
+    barrier->count = 0;
+
+    return 0;
+}
+
+int pthread_barrier_destroy(pthread_barrier_t *barrier)
+{
+    pthread_cond_destroy(&barrier->cond);
+    pthread_mutex_destroy(&barrier->mutex);
+    return 0;
+}
+
+int pthread_barrier_wait(pthread_barrier_t *barrier)
+{
+    pthread_mutex_lock(&barrier->mutex);
+    ++(barrier->count);
+    if(barrier->count >= barrier->tripCount)
+    {
+        barrier->count = 0;
+        pthread_cond_broadcast(&barrier->cond);
+        pthread_mutex_unlock(&barrier->mutex);
+        return PTHREAD_BARRIER_SERIAL_THREAD;
+    }
+    else
+    {
+        pthread_cond_wait(&barrier->cond, &(barrier->mutex));
+        pthread_mutex_unlock(&barrier->mutex);
+        return 0;
+    }
+}
+
+#endif // PTHREAD_BARRIER_H_
+#endif // __APPLE__
+
+#endif // ZCASH_POW_TROMP_OSX_BARRIER_H

From 7ab6c47d5bb2378900a4a07efe8baab7f8da44bc Mon Sep 17 00:00:00 2001
From: Jack Grigg <jack@electriccoin.co>
Date: Thu, 4 Jan 2024 00:43:19 +0000
Subject: [PATCH 02/17] equihash: Import `blake2b_simd` C bindings from
 `zcashd`

Source: zcash/zcash@01d5576a979816c928d524967e36c859adec49b6
License: MIT
---
 components/equihash/src/blake2b.rs  | 56 +++++++++++++++++++++++++++
 components/equihash/tromp/blake2b.h | 59 +++++++++++++++++++++++++++++
 2 files changed, 115 insertions(+)
 create mode 100644 components/equihash/src/blake2b.rs
 create mode 100644 components/equihash/tromp/blake2b.h

diff --git a/components/equihash/src/blake2b.rs b/components/equihash/src/blake2b.rs
new file mode 100644
index 0000000000..432c4cb79b
--- /dev/null
+++ b/components/equihash/src/blake2b.rs
@@ -0,0 +1,56 @@
+// Copyright (c) 2020-2022 The Zcash developers
+// Distributed under the MIT software license, see the accompanying
+// file COPYING or https://www.opensource.org/licenses/mit-license.php .
+
+use blake2b_simd::{State, PERSONALBYTES};
+use libc::{c_uchar, size_t};
+use std::ptr;
+use std::slice;
+
+#[no_mangle]
+pub extern "C" fn blake2b_init(
+    output_len: size_t,
+    personalization: *const [c_uchar; PERSONALBYTES],
+) -> *mut State {
+    let personalization = unsafe { personalization.as_ref().unwrap() };
+
+    Box::into_raw(Box::new(
+        blake2b_simd::Params::new()
+            .hash_length(output_len)
+            .personal(personalization)
+            .to_state(),
+    ))
+}
+
+#[no_mangle]
+pub extern "C" fn blake2b_clone(state: *const State) -> *mut State {
+    unsafe { state.as_ref() }
+        .map(|state| Box::into_raw(Box::new(state.clone())))
+        .unwrap_or(ptr::null_mut())
+}
+
+#[no_mangle]
+pub extern "C" fn blake2b_free(state: *mut State) {
+    if !state.is_null() {
+        drop(unsafe { Box::from_raw(state) });
+    }
+}
+
+#[no_mangle]
+pub extern "C" fn blake2b_update(state: *mut State, input: *const c_uchar, input_len: size_t) {
+    let state = unsafe { state.as_mut().unwrap() };
+    let input = unsafe { slice::from_raw_parts(input, input_len) };
+
+    state.update(input);
+}
+
+#[no_mangle]
+pub extern "C" fn blake2b_finalize(state: *mut State, output: *mut c_uchar, output_len: size_t) {
+    let state = unsafe { state.as_mut().unwrap() };
+    let output = unsafe { slice::from_raw_parts_mut(output, output_len) };
+
+    // Allow consuming only part of the output.
+    let hash = state.finalize();
+    assert!(output_len <= hash.as_bytes().len());
+    output.copy_from_slice(&hash.as_bytes()[..output_len]);
+}
diff --git a/components/equihash/tromp/blake2b.h b/components/equihash/tromp/blake2b.h
new file mode 100644
index 0000000000..39f377ff75
--- /dev/null
+++ b/components/equihash/tromp/blake2b.h
@@ -0,0 +1,59 @@
+// Copyright (c) 2020-2022 The Zcash developers
+// Distributed under the MIT software license, see the accompanying
+// file COPYING or https://www.opensource.org/licenses/mit-license.php .
+
+#ifndef ZCASH_RUST_INCLUDE_RUST_BLAKE2B_H
+#define ZCASH_RUST_INCLUDE_RUST_BLAKE2B_H
+
+#include <stddef.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct BLAKE2bState;
+typedef struct BLAKE2bState BLAKE2bState;
+#define BLAKE2bPersonalBytes 16U
+
+/// Initializes a BLAKE2b state with no key and no salt.
+///
+/// `personalization` MUST be a pointer to a 16-byte array.
+///
+/// Please free this with `blake2b_free` when you are done.
+BLAKE2bState* blake2b_init(
+    size_t output_len,
+    const unsigned char* personalization);
+
+/// Clones the given BLAKE2b state.
+///
+/// Both states need to be separately freed with `blake2b_free` when you are
+/// done.
+BLAKE2bState* blake2b_clone(const BLAKE2bState* state);
+
+/// Frees a BLAKE2b state returned by `blake2b_init`.
+void blake2b_free(BLAKE2bState* state);
+
+/// Adds input to the hash. You can call this any number of times.
+void blake2b_update(
+    BLAKE2bState* state,
+    const unsigned char* input,
+    size_t input_len);
+
+/// Finalizes the `state` and stores the result in `output`.
+///
+/// `output_len` MUST be less than or equal to the value that was passed as the
+/// first parameter to `blake2b_init`.
+///
+/// This method is idempotent, and calling it multiple times will give the same
+/// result. It's also possible to call `blake2b_update` with more input in
+/// between.
+void blake2b_finalize(
+    BLAKE2bState* state,
+    unsigned char* output,
+    size_t output_len);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // ZCASH_RUST_INCLUDE_RUST_BLAKE2B_H

From 3aaeb8b7196b21a2cb81b5f4f5955450627e0f18 Mon Sep 17 00:00:00 2001
From: Jack Grigg <jack@electriccoin.co>
Date: Thu, 4 Jan 2024 02:51:24 +0000
Subject: [PATCH 03/17] equihash: Modify Tromp solver to compile as C

Co-authored-by: teor <teor@riseup.net>
---
 Cargo.lock                                    |   1 +
 components/equihash/Cargo.toml                |   3 +
 components/equihash/build.rs                  |  11 +
 components/equihash/tromp/blake2b.h           |   8 -
 components/equihash/tromp/equi.h              |  18 +-
 .../tromp/{equi_miner.h => equi_miner.c}      | 411 ++++++++++--------
 6 files changed, 252 insertions(+), 200 deletions(-)
 create mode 100644 components/equihash/build.rs
 rename components/equihash/tromp/{equi_miner.h => equi_miner.c} (57%)

diff --git a/Cargo.lock b/Cargo.lock
index 390f2615ad..0a62dc38a3 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -659,6 +659,7 @@ version = "0.2.0"
 dependencies = [
  "blake2b_simd",
  "byteorder",
+ "cc",
 ]
 
 [[package]]
diff --git a/components/equihash/Cargo.toml b/components/equihash/Cargo.toml
index 2eb7c023d1..5998e54cd0 100644
--- a/components/equihash/Cargo.toml
+++ b/components/equihash/Cargo.toml
@@ -13,5 +13,8 @@ rust-version = "1.56.1"
 blake2b_simd = "1"
 byteorder = "1"
 
+[build-dependencies]
+cc = "1"
+
 [lib]
 bench = false
diff --git a/components/equihash/build.rs b/components/equihash/build.rs
new file mode 100644
index 0000000000..86c77774c1
--- /dev/null
+++ b/components/equihash/build.rs
@@ -0,0 +1,11 @@
+//! Build script for the equihash tromp solver in C.
+
+fn main() {
+    cc::Build::new()
+        .include("tromp/")
+        .file("tromp/equi_miner.c")
+        .compile("equitromp");
+
+    // Tell Cargo to only rerun this build script if the tromp C files or headers change.
+    println!("cargo:rerun-if-changed=tromp");
+}
diff --git a/components/equihash/tromp/blake2b.h b/components/equihash/tromp/blake2b.h
index 39f377ff75..6a0927182a 100644
--- a/components/equihash/tromp/blake2b.h
+++ b/components/equihash/tromp/blake2b.h
@@ -7,10 +7,6 @@
 
 #include <stddef.h>
 
-#ifdef __cplusplus
-extern "C" {
-#endif
-
 struct BLAKE2bState;
 typedef struct BLAKE2bState BLAKE2bState;
 #define BLAKE2bPersonalBytes 16U
@@ -52,8 +48,4 @@ void blake2b_finalize(
     unsigned char* output,
     size_t output_len);
 
-#ifdef __cplusplus
-}
-#endif
-
 #endif // ZCASH_RUST_INCLUDE_RUST_BLAKE2B_H
diff --git a/components/equihash/tromp/equi.h b/components/equihash/tromp/equi.h
index 90beb785be..165825ba67 100644
--- a/components/equihash/tromp/equi.h
+++ b/components/equihash/tromp/equi.h
@@ -5,15 +5,15 @@
 #define ZCASH_POW_TROMP_EQUI_H
 
 #ifdef __APPLE__
-#include "pow/tromp/osx_barrier.h"
+#include "osx_barrier.h"
 #endif
-#include "compat/endian.h"
 
+#include <stdbool.h> // for type bool
 #include <stdint.h> // for types uint32_t,uint64_t
 #include <string.h> // for functions memset
 #include <stdlib.h> // for function qsort
 
-#include <rust/blake2b.h>
+#include "blake2b.h"
 
 typedef uint32_t u32;
 typedef unsigned char uchar;
@@ -31,11 +31,11 @@ typedef unsigned char uchar;
 #define NDIGITS		(WK+1)
 #define DIGITBITS	(WN/(NDIGITS))
 
-static const u32 PROOFSIZE = 1<<WK;
-static const u32 BASE = 1<<DIGITBITS;
-static const u32 NHASHES = 2*BASE;
-static const u32 HASHESPERBLAKE = 512/WN;
-static const u32 HASHOUT = HASHESPERBLAKE*WN/8;
+#define PROOFSIZE (1<<WK)
+#define BASE (1<<DIGITBITS)
+#define NHASHES (2*BASE)
+#define HASHESPERBLAKE (512/WN)
+#define HASHOUT (HASHESPERBLAKE*WN/8)
 
 typedef u32 proof[PROOFSIZE];
 
@@ -44,7 +44,7 @@ enum verify_code { POW_OK, POW_DUPLICATE, POW_OUT_OF_ORDER, POW_NONZERO_XOR };
 const char *errstr[] = { "OK", "duplicate index", "indices out of order", "nonzero xor" };
 
 void genhash(const BLAKE2bState *ctx, u32 idx, uchar *hash) {
-  auto state = blake2b_clone(ctx);
+  BLAKE2bState* state = blake2b_clone(ctx);
   u32 leb = htole32(idx / HASHESPERBLAKE);
   blake2b_update(state, (uchar *)&leb, sizeof(u32));
   uchar blakehash[HASHOUT];
diff --git a/components/equihash/tromp/equi_miner.h b/components/equihash/tromp/equi_miner.c
similarity index 57%
rename from components/equihash/tromp/equi_miner.h
rename to components/equihash/tromp/equi_miner.c
index 5efbbaa693..37a89f6626 100644
--- a/components/equihash/tromp/equi_miner.h
+++ b/components/equihash/tromp/equi_miner.c
@@ -21,7 +21,7 @@
 #ifndef ZCASH_POW_TROMP_EQUI_MINER_H
 #define ZCASH_POW_TROMP_EQUI_MINER_H
 
-#include "pow/tromp/equi.h"
+#include "equi.h"
 #include <stdio.h>
 #include <stdlib.h>
 #include <pthread.h>
@@ -31,8 +31,8 @@ typedef uint16_t u16;
 typedef uint64_t u64;
 
 #ifdef EQUIHASH_TROMP_ATOMIC
-#include <atomic>
-typedef std::atomic<u32> au32;
+#include <stdatomic.h>
+typedef atomic_uint au32;
 #else
 typedef u32 au32;
 #endif
@@ -56,21 +56,23 @@ typedef u32 au32;
 #endif
 
 // number of buckets
-static const u32 NBUCKETS = 1<<BUCKBITS;
+#define NBUCKETS (1<<BUCKBITS)
 // 2_log of number of slots per bucket
-static const u32 SLOTBITS = RESTBITS+1+1;
-static const u32 SLOTRANGE = 1<<SLOTBITS;
+#define SLOTBITS (RESTBITS+1+1)
+#define SLOTRANGE (1<<SLOTBITS)
+#ifdef SLOTDIFF
 static const u32 SLOTMSB = 1<<(SLOTBITS-1);
+#endif
 // number of slots per bucket
-static const u32 NSLOTS = SLOTRANGE * SAVEMEM;
+#define NSLOTS (SLOTRANGE * SAVEMEM)
 // number of per-xhash slots
-static const u32 XFULL = 16;
+#define XFULL 16
 // SLOTBITS mask
 static const u32 SLOTMASK = SLOTRANGE-1;
 // number of possible values of xhash (rest of n) bits
-static const u32 NRESTS = 1<<RESTBITS;
+#define NRESTS (1<<RESTBITS)
 // number of blocks of hashes extracted from single 512 bit blake2b output
-static const u32 NBLOCKS = (NHASHES+HASHESPERBLAKE-1)/HASHESPERBLAKE;
+#define NBLOCKS ((NHASHES+HASHESPERBLAKE-1)/HASHESPERBLAKE)
 // nothing larger found in 100000 runs
 static const u32 MAXSOLS = 8;
 
@@ -78,11 +80,16 @@ static const u32 MAXSOLS = 8;
 // a bucket on previous layer with the same rest bits (x-tra hash)
 struct tree {
   u32 bid_s0_s1; // manual bitfields
+};
+typedef struct tree tree;
 
-  tree(const u32 idx) {
-    bid_s0_s1 = idx;
+  tree tree_from_idx(const u32 idx) {
+    tree t;
+    t.bid_s0_s1 = idx;
+    return t;
   }
-  tree(const u32 bid, const u32 s0, const u32 s1) {
+  tree tree_from_bid(const u32 bid, const u32 s0, const u32 s1) {
+    tree t;
 #ifdef SLOTDIFF
     u32 ds10 = (s1 - s0) & SLOTMASK;
     if (ds10 & SLOTMSB) {
@@ -91,39 +98,40 @@ struct tree {
       bid_s0_s1 = (((bid << SLOTBITS) | s0) << (SLOTBITS-1)) | (ds10 - 1);
     }
 #else
-    bid_s0_s1 = (((bid << SLOTBITS) | s0) << SLOTBITS) | s1;
+    t.bid_s0_s1 = (((bid << SLOTBITS) | s0) << SLOTBITS) | s1;
 #endif
+    return t;
   }
-  u32 getindex() const {
-    return bid_s0_s1;
+  u32 getindex(const tree *t) {
+    return t->bid_s0_s1;
   }
-  u32 bucketid() const {
+  u32 bucketid(const tree *t) {
 #ifdef SLOTDIFF
-    return bid_s0_s1 >> (2 * SLOTBITS - 1);
+    return t->bid_s0_s1 >> (2 * SLOTBITS - 1);
 #else
-    return bid_s0_s1 >> (2 * SLOTBITS);
+    return t->bid_s0_s1 >> (2 * SLOTBITS);
 #endif
   }
-  u32 slotid0() const {
+  u32 slotid0(const tree *t) {
 #ifdef SLOTDIFF
-    return (bid_s0_s1 >> (SLOTBITS-1)) & SLOTMASK;
+    return (t->bid_s0_s1 >> (SLOTBITS-1)) & SLOTMASK;
 #else
-    return (bid_s0_s1 >> SLOTBITS) & SLOTMASK;
+    return (t->bid_s0_s1 >> SLOTBITS) & SLOTMASK;
 #endif
   }
-  u32 slotid1() const {
+  u32 slotid1(const tree *t) {
 #ifdef SLOTDIFF
-    return (slotid0() + 1 + (bid_s0_s1 & (SLOTMASK>>1))) & SLOTMASK;
+    return (slotid0() + 1 + (t->bid_s0_s1 & (SLOTMASK>>1))) & SLOTMASK;
 #else
-    return bid_s0_s1 & SLOTMASK;
+    return t->bid_s0_s1 & SLOTMASK;
 #endif
   }
-};
 
 union hashunit {
   u32 word;
   uchar bytes[sizeof(u32)];
 };
+typedef union hashunit hashunit;
 
 #define WORDS(bits)	((bits + 31) / 32)
 #define HASHWORDS0 WORDS(WN - DIGITBITS + RESTBITS)
@@ -133,11 +141,13 @@ struct slot0 {
   tree attr;
   hashunit hash[HASHWORDS0];
 };
+typedef struct slot0 slot0;
 
 struct slot1 {
   tree attr;
   hashunit hash[HASHWORDS1];
 };
+typedef struct slot1 slot1;
 
 // a bucket is NSLOTS treenodes
 typedef slot0 bucket0[NSLOTS];
@@ -164,10 +174,15 @@ struct htalloc {
   bucket0 *trees0[(WK+1)/2];
   bucket1 *trees1[WK/2];
   u32 alloced;
-  htalloc() {
-    alloced = 0;
+};
+typedef struct htalloc htalloc;
+  htalloc htalloc_new() {
+    htalloc hta;
+    hta.alloced = 0;
+    return hta;
   }
-  void alloctrees() {
+  void *htalloc_alloc(htalloc *hta, const u32 n, const u32 sz);
+  void alloctrees(htalloc *hta) {
 // optimize xenoncat's fixed memory layout, avoiding any waste
 // digit  trees  hashes  trees hashes
 // 0      0 A A A A A A   . . . . . .
@@ -180,29 +195,28 @@ struct htalloc {
 // 7      0 2 4 6 . G G   1 3 5 7 H H
 // 8      0 2 4 6 8 . I   1 3 5 7 H H
     assert(DIGITBITS >= 16); // ensures hashes shorten by 1 unit every 2 digits
-    heap0 = (u32 *)alloc(1, sizeof(digit0));
-    heap1 = (u32 *)alloc(1, sizeof(digit1));
+    hta->heap0 = (u32 *)htalloc_alloc(hta, 1, sizeof(digit0));
+    hta->heap1 = (u32 *)htalloc_alloc(hta, 1, sizeof(digit1));
     for (int r=0; r<WK; r++)
       if ((r&1) == 0)
-        trees0[r/2]  = (bucket0 *)(heap0 + r/2);
+        hta->trees0[r/2]  = (bucket0 *)(hta->heap0 + r/2);
       else
-        trees1[r/2]  = (bucket1 *)(heap1 + r/2);
+        hta->trees1[r/2]  = (bucket1 *)(hta->heap1 + r/2);
   }
-  void dealloctrees() {
-    free(heap0);
-    free(heap1);
+  void dealloctrees(htalloc *hta) {
+    free(hta->heap0);
+    free(hta->heap1);
   }
-  void *alloc(const u32 n, const u32 sz) {
+  void *htalloc_alloc(htalloc *hta, const u32 n, const u32 sz) {
     void *mem  = calloc(n, sz);
     assert(mem);
-    alloced += n * sz;
+    hta->alloced += n * sz;
     return mem;
   }
-};
 
 typedef au32 bsizes[NBUCKETS];
 
-u32 min(const u32 a, const u32 b) {
+u32 minu32(const u32 a, const u32 b) {
   return a < b ? a : b;
 }
 
@@ -217,37 +231,55 @@ struct equi {
   u32 hfull;
   u32 bfull;
   pthread_barrier_t barry;
-  equi(const u32 n_threads) {
+};
+typedef struct equi equi;
+  void equi_clearslots(equi *eq);
+  equi *equi_new(const u32 n_threads) {
     assert(sizeof(hashunit) == 4);
-    nthreads = n_threads;
-    const int err = pthread_barrier_init(&barry, NULL, nthreads);
+    equi *eq = malloc(sizeof(equi));
+    eq->nthreads = n_threads;
+    const int err = pthread_barrier_init(&eq->barry, NULL, eq->nthreads);
     assert(!err);
-    hta.alloctrees();
-    nslots = (bsizes *)hta.alloc(2 * NBUCKETS, sizeof(au32));
-    sols   =  (proof *)hta.alloc(MAXSOLS, sizeof(proof));
+
+    alloctrees(&eq->hta);
+    eq->nslots = (bsizes *)htalloc_alloc(&eq->hta, 2 * NBUCKETS, sizeof(au32));
+    eq->sols   =  (proof *)htalloc_alloc(&eq->hta, MAXSOLS, sizeof(proof));
+
+    // C malloc() does not guarantee zero-initialized memory (but calloc() does)
+    eq->blake_ctx = NULL;
+    eq->nsols = 0;
+    equi_clearslots(eq);
+
+    return eq;
   }
-  ~equi() {
-    hta.dealloctrees();
-    free(nslots);
-    free(sols);
-    blake2b_free(blake_ctx);
+  void equi_free(equi *eq) {
+    dealloctrees(&eq->hta);
+
+    free(eq->nslots);
+    free(eq->sols);
+    blake2b_free(eq->blake_ctx);
+    free(eq);
   }
-  void setstate(const BLAKE2bState *ctx) {
-    blake_ctx = blake2b_clone(ctx);
-    memset(nslots, 0, NBUCKETS * sizeof(au32)); // only nslots[0] needs zeroing
-    nsols = 0;
+  void equi_setstate(equi *eq, const BLAKE2bState *ctx) {
+    if (eq->blake_ctx) {
+      blake2b_free(eq->blake_ctx);
+    }
+
+    eq->blake_ctx = blake2b_clone(ctx);
+    memset(eq->nslots, 0, NBUCKETS * sizeof(au32)); // only nslots[0] needs zeroing
+    eq->nsols = 0;
   }
-  u32 getslot(const u32 r, const u32 bucketi) {
+  u32 getslot(equi *eq, const u32 r, const u32 bucketi) {
 #ifdef EQUIHASH_TROMP_ATOMIC
-    return std::atomic_fetch_add_explicit(&nslots[r&1][bucketi], 1U, std::memory_order_relaxed);
+    return std::atomic_fetch_add_explicit(&eq->nslots[r&1][bucketi], 1U, std::memory_order_relaxed);
 #else
-    return nslots[r&1][bucketi]++;
+    return eq->nslots[r&1][bucketi]++;
 #endif
   }
-  u32 getnslots(const u32 r, const u32 bid) { // SHOULD BE METHOD IN BUCKET STRUCT
-    au32 &nslot = nslots[r&1][bid];
-    const u32 n = min(nslot, NSLOTS);
-    nslot = 0;
+  u32 getnslots(equi *eq, const u32 r, const u32 bid) { // SHOULD BE METHOD IN BUCKET STRUCT
+    au32 *nslot = &eq->nslots[r&1][bid];
+    const u32 n = minu32(*nslot, NSLOTS);
+    *nslot = 0;
     return n;
   }
   void orderindices(u32 *indices, u32 size) {
@@ -259,47 +291,49 @@ struct equi {
       }
     }
   }
-  void listindices0(u32 r, const tree t, u32 *indices) {
+  void listindices1(equi *eq, u32 r, const tree t, u32 *indices);
+  void listindices0(equi *eq, u32 r, const tree t, u32 *indices) {
     if (r == 0) {
-      *indices = t.getindex();
+      *indices = getindex(&t);
       return;
     }
-    const bucket1 &buck = hta.trees1[--r/2][t.bucketid()];
+    const bucket1 *buck = &eq->hta.trees1[--r/2][bucketid(&t)];
     const u32 size = 1 << r;
     u32 *indices1 = indices + size;
-    listindices1(r, buck[t.slotid0()].attr, indices);
-    listindices1(r, buck[t.slotid1()].attr, indices1);
+    listindices1(eq, r, (*buck)[slotid0(&t)].attr, indices);
+    listindices1(eq, r, (*buck)[slotid1(&t)].attr, indices1);
     orderindices(indices, size);
   }
-  void listindices1(u32 r, const tree t, u32 *indices) {
-    const bucket0 &buck = hta.trees0[--r/2][t.bucketid()];
+  void listindices1(equi *eq, u32 r, const tree t, u32 *indices) {
+    const bucket0 *buck = &eq->hta.trees0[--r/2][bucketid(&t)];
     const u32 size = 1 << r;
     u32 *indices1 = indices + size;
-    listindices0(r, buck[t.slotid0()].attr, indices);
-    listindices0(r, buck[t.slotid1()].attr, indices1);
+    listindices0(eq, r, (*buck)[slotid0(&t)].attr, indices);
+    listindices0(eq, r, (*buck)[slotid1(&t)].attr, indices1);
     orderindices(indices, size);
   }
-  void candidate(const tree t) {
+  void candidate(equi *eq, const tree t) {
     proof prf;
-    listindices1(WK, t, prf); // assume WK odd
+    listindices1(eq, WK, t, prf); // assume WK odd
     qsort(prf, PROOFSIZE, sizeof(u32), &compu32);
     for (u32 i=1; i<PROOFSIZE; i++)
       if (prf[i] <= prf[i-1])
         return;
 #ifdef EQUIHASH_TROMP_ATOMIC
-    u32 soli = std::atomic_fetch_add_explicit(&nsols, 1U, std::memory_order_relaxed);
+    u32 soli = std::atomic_fetch_add_explicit(&eq->nsols, 1U, std::memory_order_relaxed);
 #else
-    u32 soli = nsols++;
+    u32 soli = eq->nsols++;
 #endif
     if (soli < MAXSOLS)
-      listindices1(WK, t, sols[soli]); // assume WK odd
+      listindices1(eq, WK, t, eq->sols[soli]); // assume WK odd
   }
-  void showbsizes(u32 r) {
+#ifdef EQUIHASH_SHOW_BUCKET_SIZES
+  void showbsizes(equi *eq, u32 r) {
 #if defined(HIST) || defined(SPARK) || defined(LOGSPARK)
     u32 binsizes[65];
     memset(binsizes, 0, 65 * sizeof(u32));
     for (u32 bucketid = 0; bucketid < NBUCKETS; bucketid++) {
-      u32 bsize = min(nslots[r&1][bucketid], NSLOTS) >> (SLOTBITS-6);
+      u32 bsize = minu32(eq->nslots[r&1][bucketid], NSLOTS) >> (SLOTBITS-6);
       binsizes[bsize]++;
     }
     for (u32 i=0; i < 65; i++) {
@@ -319,6 +353,7 @@ struct equi {
 //    printf("\n");
 #endif
   }
+#endif
 
   struct htlayout {
     htalloc hta;
@@ -327,63 +362,69 @@ struct equi {
     u32 dunits;
     u32 prevbo;
     u32 nextbo;
+  };
+  typedef struct htlayout htlayout;
   
-    htlayout(equi *eq, u32 r): hta(eq->hta), prevhashunits(0), dunits(0) {
+    htlayout htlayout_new(equi *eq, u32 r) {
+      htlayout htl;
+      htl.hta = eq->hta;
+      htl.prevhashunits = 0;
+      htl.dunits = 0;
       u32 nexthashbytes = hashsize(r);
-      nexthashunits = hashwords(nexthashbytes);
-      prevbo = 0;
-      nextbo = nexthashunits * sizeof(hashunit) - nexthashbytes; // 0-3
+      htl.nexthashunits = hashwords(nexthashbytes);
+      htl.prevbo = 0;
+      htl.nextbo = htl.nexthashunits * sizeof(hashunit) - nexthashbytes; // 0-3
       if (r) {
         u32 prevhashbytes = hashsize(r-1);
-        prevhashunits = hashwords(prevhashbytes);
-        prevbo = prevhashunits * sizeof(hashunit) - prevhashbytes; // 0-3
-        dunits = prevhashunits - nexthashunits;
+        htl.prevhashunits = hashwords(prevhashbytes);
+        htl.prevbo = htl.prevhashunits * sizeof(hashunit) - prevhashbytes; // 0-3
+        htl.dunits = htl.prevhashunits - htl.nexthashunits;
       }
+      return htl;
     }
-    u32 getxhash0(const slot0* pslot) const {
+    u32 getxhash0(const htlayout *htl, const slot0* pslot) {
 #if WN == 200 && RESTBITS == 4
-      return pslot->hash->bytes[prevbo] >> 4;
+      return pslot->hash->bytes[htl->prevbo] >> 4;
 #elif WN == 200 && RESTBITS == 8
-      return (pslot->hash->bytes[prevbo] & 0xf) << 4 | pslot->hash->bytes[prevbo+1] >> 4;
+      return (pslot->hash->bytes[htl->prevbo] & 0xf) << 4 | pslot->hash->bytes[htl->prevbo+1] >> 4;
 #elif WN == 200 && RESTBITS == 9
-      return (pslot->hash->bytes[prevbo] & 0x1f) << 4 | pslot->hash->bytes[prevbo+1] >> 4;
+      return (pslot->hash->bytes[htl->prevbo] & 0x1f) << 4 | pslot->hash->bytes[htl->prevbo+1] >> 4;
 #elif WN == 144 && RESTBITS == 4
-      return pslot->hash->bytes[prevbo] & 0xf;
+      return pslot->hash->bytes[htl->prevbo] & 0xf;
 #else
 #error non implemented
 #endif
     }
-    u32 getxhash1(const slot1* pslot) const {
+    u32 getxhash1(const htlayout *htl, const slot1* pslot) {
 #if WN == 200 && RESTBITS == 4
-      return pslot->hash->bytes[prevbo] & 0xf;
+      return pslot->hash->bytes[htl->prevbo] & 0xf;
 #elif WN == 200 && RESTBITS == 8
-      return pslot->hash->bytes[prevbo];
+      return pslot->hash->bytes[htl->prevbo];
 #elif WN == 200 && RESTBITS == 9
-      return (pslot->hash->bytes[prevbo]&1) << 8 | pslot->hash->bytes[prevbo+1];
+      return (pslot->hash->bytes[htl->prevbo]&1) << 8 | pslot->hash->bytes[htl->prevbo+1];
 #elif WN == 144 && RESTBITS == 4
-      return pslot->hash->bytes[prevbo] & 0xf;
+      return pslot->hash->bytes[htl->prevbo] & 0xf;
 #else
 #error non implemented
 #endif
     }
-    bool equal(const hashunit *hash0, const hashunit *hash1) const {
-      return hash0[prevhashunits-1].word == hash1[prevhashunits-1].word;
+    bool htlayout_equal(const htlayout *htl, const hashunit *hash0, const hashunit *hash1) {
+      return hash0[htl->prevhashunits-1].word == hash1[htl->prevhashunits-1].word;
     }
-  };
 
+#if RESTBITS <= 6
+    typedef uchar xslot;
+#else
+    typedef u16 xslot;
+#endif
   struct collisiondata {
 #ifdef XBITMAP
 #if NSLOTS > 64
-#error can't use XBITMAP with more than 64 slots
+#error cant use XBITMAP with more than 64 slots
 #endif
     u64 xhashmap[NRESTS];
     u64 xmap;
 #else
-#if RESTBITS <= 6
-    typedef uchar xslot;
-#else
-    typedef u16 xslot;
-#endif
     xslot nxhashslots[NRESTS];
     xslot xhashslots[NRESTS][XFULL];
     xslot *xx;
@@ -391,55 +432,56 @@ struct equi {
     u32 n1;
 #endif
     u32 s0;
+  };
+  typedef struct collisiondata collisiondata;
 
-    void clear() {
+    void collisiondata_clear(collisiondata *cd) {
 #ifdef XBITMAP
-      memset(xhashmap, 0, NRESTS * sizeof(u64));
+      memset(cd->xhashmap, 0, NRESTS * sizeof(u64));
 #else
-      memset(nxhashslots, 0, NRESTS * sizeof(xslot));
+      memset(cd->nxhashslots, 0, NRESTS * sizeof(xslot));
 #endif
     }
-    bool addslot(u32 s1, u32 xh) {
+    bool addslot(collisiondata *cd, u32 s1, u32 xh) {
 #ifdef XBITMAP
       xmap = xhashmap[xh];
       xhashmap[xh] |= (u64)1 << s1;
       s0 = -1;
       return true;
 #else
-      n1 = (u32)nxhashslots[xh]++;
-      if (n1 >= XFULL)
+      cd->n1 = (u32)cd->nxhashslots[xh]++;
+      if (cd->n1 >= XFULL)
         return false;
-      xx = xhashslots[xh];
-      xx[n1] = s1;
-      n0 = 0;
+      cd->xx = cd->xhashslots[xh];
+      cd->xx[cd->n1] = s1;
+      cd->n0 = 0;
       return true;
 #endif
     }
-    bool nextcollision() const {
+    bool nextcollision(const collisiondata *cd) {
 #ifdef XBITMAP
-      return xmap != 0;
+      return cd->xmap != 0;
 #else
-      return n0 < n1;
+      return cd->n0 < cd->n1;
 #endif
     }
-    u32 slot() {
+    u32 slot(collisiondata *cd) {
 #ifdef XBITMAP
-      const u32 ffs = __builtin_ffsll(xmap);
-      s0 += ffs; xmap >>= ffs;
+      const u32 ffs = __builtin_ffsll(cd->xmap);
+      s0 += ffs; cd->xmap >>= ffs;
       return s0;
 #else
-      return (u32)xx[n0++];
+      return (u32)cd->xx[cd->n0++];
 #endif
     }
-  };
 
-  void digit0(const u32 id) {
+  void equi_digit0(equi *eq, const u32 id) {
     uchar hash[HASHOUT];
     BLAKE2bState* state;
-    htlayout htl(this, 0);
+    htlayout htl = htlayout_new(eq, 0);
     const u32 hashbytes = hashsize(0);
-    for (u32 block = id; block < NBLOCKS; block += nthreads) {
-      state = blake2b_clone(blake_ctx);
+    for (u32 block = id; block < NBLOCKS; block += eq->nthreads) {
+      state = blake2b_clone(eq->blake_ctx);
       u32 leb = htole32(block);
       blake2b_update(state, (uchar *)&leb, sizeof(u32));
       blake2b_finalize(state, hash, HASHOUT);
@@ -460,36 +502,36 @@ struct equi {
 #else
 #error not implemented
 #endif
-        const u32 slot = getslot(0, bucketid);
+        const u32 slot = getslot(eq, 0, bucketid);
         if (slot >= NSLOTS) {
-          bfull++;
+          eq->bfull++;
           continue;
         }
-        slot0 &s = hta.trees0[0][bucketid][slot];
-        s.attr = tree(block * HASHESPERBLAKE + i);
-        memcpy(s.hash->bytes+htl.nextbo, ph+WN/8-hashbytes, hashbytes);
+        slot0 *s = &eq->hta.trees0[0][bucketid][slot];
+        s->attr = tree_from_idx(block * HASHESPERBLAKE + i);
+        memcpy(s->hash->bytes+htl.nextbo, ph+WN/8-hashbytes, hashbytes);
       }
     }
   }
   
-  void digitodd(const u32 r, const u32 id) {
-    htlayout htl(this, r);
+  void equi_digitodd(equi *eq, const u32 r, const u32 id) {
+    htlayout htl = htlayout_new(eq, r);
     collisiondata cd;
-    for (u32 bucketid=id; bucketid < NBUCKETS; bucketid += nthreads) {
-      cd.clear();
+    for (u32 bucketid=id; bucketid < NBUCKETS; bucketid += eq->nthreads) {
+      collisiondata_clear(&cd);
       slot0 *buck = htl.hta.trees0[(r-1)/2][bucketid]; // optimize by updating previous buck?!
-      u32 bsize = getnslots(r-1, bucketid);       // optimize by putting bucketsize with block?!
+      u32 bsize = getnslots(eq, r-1, bucketid);       // optimize by putting bucketsize with block?!
       for (u32 s1 = 0; s1 < bsize; s1++) {
         const slot0 *pslot1 = buck + s1;          // optimize by updating previous pslot1?!
-        if (!cd.addslot(s1, htl.getxhash0(pslot1))) {
-          xfull++;
+        if (!addslot(&cd, s1, getxhash0(&htl, pslot1))) {
+          eq->xfull++;
           continue;
         }
-        for (; cd.nextcollision(); ) {
-          const u32 s0 = cd.slot();
+        for (; nextcollision(&cd); ) {
+          const u32 s0 = slot(&cd);
           const slot0 *pslot0 = buck + s0;
-          if (htl.equal(pslot0->hash, pslot1->hash)) {
-            hfull++;
+          if (htlayout_equal(&htl, pslot0->hash, pslot1->hash)) {
+            eq->hfull++;
             continue;
           }
           u32 xorbucketid;
@@ -510,38 +552,38 @@ struct equi {
 #else
 #error not implemented
 #endif
-          const u32 xorslot = getslot(r, xorbucketid);
+          const u32 xorslot = getslot(eq, r, xorbucketid);
           if (xorslot >= NSLOTS) {
-            bfull++;
+            eq->bfull++;
             continue;
           }
-          slot1 &xs = htl.hta.trees1[r/2][xorbucketid][xorslot];
-          xs.attr = tree(bucketid, s0, s1);
+          slot1 *xs = &htl.hta.trees1[r/2][xorbucketid][xorslot];
+          xs->attr = tree_from_bid(bucketid, s0, s1);
           for (u32 i=htl.dunits; i < htl.prevhashunits; i++)
-            xs.hash[i-htl.dunits].word = pslot0->hash[i].word ^ pslot1->hash[i].word;
+            xs->hash[i-htl.dunits].word = pslot0->hash[i].word ^ pslot1->hash[i].word;
         }
       }
     }
   }
   
-  void digiteven(const u32 r, const u32 id) {
-    htlayout htl(this, r);
+  void equi_digiteven(equi *eq, const u32 r, const u32 id) {
+    htlayout htl = htlayout_new(eq, r);
     collisiondata cd;
-    for (u32 bucketid=id; bucketid < NBUCKETS; bucketid += nthreads) {
-      cd.clear();
+    for (u32 bucketid=id; bucketid < NBUCKETS; bucketid += eq->nthreads) {
+      collisiondata_clear(&cd);
       slot1 *buck = htl.hta.trees1[(r-1)/2][bucketid]; // OPTIMIZE BY UPDATING PREVIOUS
-      u32 bsize = getnslots(r-1, bucketid);
+      u32 bsize = getnslots(eq, r-1, bucketid);
       for (u32 s1 = 0; s1 < bsize; s1++) {
         const slot1 *pslot1 = buck + s1;          // OPTIMIZE BY UPDATING PREVIOUS
-        if (!cd.addslot(s1, htl.getxhash1(pslot1))) {
-          xfull++;
+        if (!addslot(&cd, s1, getxhash1(&htl, pslot1))) {
+          eq->xfull++;
           continue;
         }
-        for (; cd.nextcollision(); ) {
-          const u32 s0 = cd.slot();
+        for (; nextcollision(&cd); ) {
+          const u32 s0 = slot(&cd);
           const slot1 *pslot0 = buck + s0;
-          if (htl.equal(pslot0->hash, pslot1->hash)) {
-            hfull++;
+          if (htlayout_equal(&htl, pslot0->hash, pslot1->hash)) {
+            eq->hfull++;
             continue;
           }
           u32 xorbucketid;
@@ -562,42 +604,41 @@ struct equi {
 #else
 #error not implemented
 #endif
-          const u32 xorslot = getslot(r, xorbucketid);
+          const u32 xorslot = getslot(eq, r, xorbucketid);
           if (xorslot >= NSLOTS) {
-            bfull++;
+            eq->bfull++;
             continue;
           }
-          slot0 &xs = htl.hta.trees0[r/2][xorbucketid][xorslot];
-          xs.attr = tree(bucketid, s0, s1);
+          slot0 *xs = &htl.hta.trees0[r/2][xorbucketid][xorslot];
+          xs->attr = tree_from_bid(bucketid, s0, s1);
           for (u32 i=htl.dunits; i < htl.prevhashunits; i++)
-            xs.hash[i-htl.dunits].word = pslot0->hash[i].word ^ pslot1->hash[i].word;
+            xs->hash[i-htl.dunits].word = pslot0->hash[i].word ^ pslot1->hash[i].word;
         }
       }
     }
   }
   
-  void digitK(const u32 id) {
+  void equi_digitK(equi *eq, const u32 id) {
     collisiondata cd;
-    htlayout htl(this, WK);
+    htlayout htl = htlayout_new(eq, WK);
 u32 nc = 0;
-    for (u32 bucketid = id; bucketid < NBUCKETS; bucketid += nthreads) {
-      cd.clear();
+    for (u32 bucketid = id; bucketid < NBUCKETS; bucketid += eq->nthreads) {
+      collisiondata_clear(&cd);
       slot0 *buck = htl.hta.trees0[(WK-1)/2][bucketid];
-      u32 bsize = getnslots(WK-1, bucketid);
+      u32 bsize = getnslots(eq, WK-1, bucketid);
       for (u32 s1 = 0; s1 < bsize; s1++) {
         const slot0 *pslot1 = buck + s1;
-        if (!cd.addslot(s1, htl.getxhash0(pslot1))) // assume WK odd
+        if (!addslot(&cd, s1, getxhash0(&htl, pslot1))) // assume WK odd
           continue;
-        for (; cd.nextcollision(); ) {
-          const u32 s0 = cd.slot();
-          if (htl.equal(buck[s0].hash, pslot1->hash))
-nc++,       candidate(tree(bucketid, s0, s1));
+        for (; nextcollision(&cd); ) {
+          const u32 s0 = slot(&cd);
+          if (htlayout_equal(&htl, buck[s0].hash, pslot1->hash))
+nc++,       candidate(eq, tree_from_bid(bucketid, s0, s1));
         }
       }
     }
 //printf(" %d candidates ", nc);
   }
-};
 
 typedef struct {
   u32 id;
@@ -617,32 +658,36 @@ void *worker(void *vp) {
   thread_ctx *tp = (thread_ctx *)vp;
   equi *eq = tp->eq;
 
-  if (tp->id == 0)
+//  if (tp->id == 0)
 //    printf("Digit 0\n");
   barrier(&eq->barry);
-  eq->digit0(tp->id);
+  equi_digit0(eq, tp->id);
   barrier(&eq->barry);
   if (tp->id == 0) {
-    eq->xfull = eq->bfull = eq->hfull = 0;
-    eq->showbsizes(0);
+    equi_clearslots(eq);
+#ifdef EQUIHASH_SHOW_BUCKET_SIZES
+    showbsizes(eq, 0);
+#endif
   }
   barrier(&eq->barry);
   for (u32 r = 1; r < WK; r++) {
-    if (tp->id == 0)
+//    if (tp->id == 0)
 //      printf("Digit %d", r);
     barrier(&eq->barry);
-    r&1 ? eq->digitodd(r, tp->id) : eq->digiteven(r, tp->id);
+    r&1 ? equi_digitodd(eq, r, tp->id) : equi_digiteven(eq, r, tp->id);
     barrier(&eq->barry);
     if (tp->id == 0) {
 //      printf(" x%d b%d h%d\n", eq->xfull, eq->bfull, eq->hfull);
-      eq->xfull = eq->bfull = eq->hfull = 0;
-      eq->showbsizes(r);
+      equi_clearslots(eq);
+#ifdef EQUIHASH_SHOW_BUCKET_SIZES
+      showbsizes(eq, r);
+#endif
     }
     barrier(&eq->barry);
   }
-  if (tp->id == 0)
+//  if (tp->id == 0)
 //    printf("Digit %d\n", WK);
-  eq->digitK(tp->id);
+  equi_digitK(eq, tp->id);
   barrier(&eq->barry);
   pthread_exit(NULL);
   return 0;

From 45e7238b8011d556bde2b222afaef2f505726fdb Mon Sep 17 00:00:00 2001
From: Jack Grigg <jack@electriccoin.co>
Date: Thu, 4 Jan 2024 04:34:59 +0000
Subject: [PATCH 04/17] equihash: Pass `blake2b_simd` bindings to Tromp solver
 as callbacks

This avoids linker errors by removing cycles.
---
 components/equihash/tromp/blake2b.h    | 10 ++---
 components/equihash/tromp/equi.h       | 54 --------------------------
 components/equihash/tromp/equi_miner.c | 33 ++++++++++++----
 3 files changed, 30 insertions(+), 67 deletions(-)

diff --git a/components/equihash/tromp/blake2b.h b/components/equihash/tromp/blake2b.h
index 6a0927182a..23a7409b74 100644
--- a/components/equihash/tromp/blake2b.h
+++ b/components/equihash/tromp/blake2b.h
@@ -16,7 +16,7 @@ typedef struct BLAKE2bState BLAKE2bState;
 /// `personalization` MUST be a pointer to a 16-byte array.
 ///
 /// Please free this with `blake2b_free` when you are done.
-BLAKE2bState* blake2b_init(
+typedef BLAKE2bState* (*blake2b_init)(
     size_t output_len,
     const unsigned char* personalization);
 
@@ -24,13 +24,13 @@ BLAKE2bState* blake2b_init(
 ///
 /// Both states need to be separately freed with `blake2b_free` when you are
 /// done.
-BLAKE2bState* blake2b_clone(const BLAKE2bState* state);
+typedef BLAKE2bState* (*blake2b_clone)(const BLAKE2bState* state);
 
 /// Frees a BLAKE2b state returned by `blake2b_init`.
-void blake2b_free(BLAKE2bState* state);
+typedef void (*blake2b_free)(BLAKE2bState* state);
 
 /// Adds input to the hash. You can call this any number of times.
-void blake2b_update(
+typedef void (*blake2b_update)(
     BLAKE2bState* state,
     const unsigned char* input,
     size_t input_len);
@@ -43,7 +43,7 @@ void blake2b_update(
 /// This method is idempotent, and calling it multiple times will give the same
 /// result. It's also possible to call `blake2b_update` with more input in
 /// between.
-void blake2b_finalize(
+typedef void (*blake2b_finalize)(
     BLAKE2bState* state,
     unsigned char* output,
     size_t output_len);
diff --git a/components/equihash/tromp/equi.h b/components/equihash/tromp/equi.h
index 165825ba67..2bf0794edf 100644
--- a/components/equihash/tromp/equi.h
+++ b/components/equihash/tromp/equi.h
@@ -43,63 +43,9 @@ typedef u32 proof[PROOFSIZE];
 enum verify_code { POW_OK, POW_DUPLICATE, POW_OUT_OF_ORDER, POW_NONZERO_XOR };
 const char *errstr[] = { "OK", "duplicate index", "indices out of order", "nonzero xor" };
 
-void genhash(const BLAKE2bState *ctx, u32 idx, uchar *hash) {
-  BLAKE2bState* state = blake2b_clone(ctx);
-  u32 leb = htole32(idx / HASHESPERBLAKE);
-  blake2b_update(state, (uchar *)&leb, sizeof(u32));
-  uchar blakehash[HASHOUT];
-  blake2b_finalize(state, blakehash, HASHOUT);
-  blake2b_free(state);
-  memcpy(hash, blakehash + (idx % HASHESPERBLAKE) * WN/8, WN/8);
-}
-
-int verifyrec(const BLAKE2bState *ctx, u32 *indices, uchar *hash, int r) {
-  if (r == 0) {
-    genhash(ctx, *indices, hash);
-    return POW_OK;
-  }
-  u32 *indices1 = indices + (1 << (r-1));
-  if (*indices >= *indices1)
-    return POW_OUT_OF_ORDER;
-  uchar hash0[WN/8], hash1[WN/8];
-  int vrf0 = verifyrec(ctx, indices,  hash0, r-1);
-  if (vrf0 != POW_OK)
-    return vrf0;
-  int vrf1 = verifyrec(ctx, indices1, hash1, r-1);
-  if (vrf1 != POW_OK)
-    return vrf1;
-  for (int i=0; i < WN/8; i++)
-    hash[i] = hash0[i] ^ hash1[i];
-  int i, b = r * DIGITBITS;
-  for (i = 0; i < b/8; i++)
-    if (hash[i])
-      return POW_NONZERO_XOR;
-  if ((b%8) && hash[i] >> (8-(b%8)))
-    return POW_NONZERO_XOR;
-  return POW_OK;
-}
-
 int compu32(const void *pa, const void *pb) {
   u32 a = *(u32 *)pa, b = *(u32 *)pb;
   return a<b ? -1 : a==b ? 0 : +1;
 }
 
-bool duped(proof prf) {
-  proof sortprf;
-  memcpy(sortprf, prf, sizeof(proof));
-  qsort(sortprf, PROOFSIZE, sizeof(u32), &compu32);
-  for (u32 i=1; i<PROOFSIZE; i++)
-    if (sortprf[i] <= sortprf[i-1])
-      return true;
-  return false;
-}
-
-// verify Wagner conditions
-int verify(u32 indices[PROOFSIZE], const BLAKE2bState *ctx) {
-  if (duped(indices))
-    return POW_DUPLICATE;
-  uchar hash[WN/8];
-  return verifyrec(ctx, indices, hash, WK);
-}
-
 #endif // ZCASH_POW_TROMP_EQUI_H
diff --git a/components/equihash/tromp/equi_miner.c b/components/equihash/tromp/equi_miner.c
index 37a89f6626..1c4882cd16 100644
--- a/components/equihash/tromp/equi_miner.c
+++ b/components/equihash/tromp/equi_miner.c
@@ -222,6 +222,10 @@ u32 minu32(const u32 a, const u32 b) {
 
 struct equi {
   BLAKE2bState* blake_ctx;
+  blake2b_clone blake2b_clone;
+  blake2b_free blake2b_free;
+  blake2b_update blake2b_update;
+  blake2b_finalize blake2b_finalize;
   htalloc hta;
   bsizes *nslots; // PUT IN BUCKET STRUCT
   proof *sols;
@@ -234,10 +238,21 @@ struct equi {
 };
 typedef struct equi equi;
   void equi_clearslots(equi *eq);
-  equi *equi_new(const u32 n_threads) {
+  equi *equi_new(
+    const u32 n_threads,
+    blake2b_clone blake2b_clone,
+    blake2b_free blake2b_free,
+    blake2b_update blake2b_update,
+    blake2b_finalize blake2b_finalize
+  ) {
     assert(sizeof(hashunit) == 4);
     equi *eq = malloc(sizeof(equi));
     eq->nthreads = n_threads;
+    eq->blake2b_clone = blake2b_clone;
+    eq->blake2b_free = blake2b_free;
+    eq->blake2b_update = blake2b_update;
+    eq->blake2b_finalize = blake2b_finalize;
+
     const int err = pthread_barrier_init(&eq->barry, NULL, eq->nthreads);
     assert(!err);
 
@@ -257,15 +272,16 @@ typedef struct equi equi;
 
     free(eq->nslots);
     free(eq->sols);
-    blake2b_free(eq->blake_ctx);
+    eq->blake2b_free(eq->blake_ctx);
+
     free(eq);
   }
   void equi_setstate(equi *eq, const BLAKE2bState *ctx) {
     if (eq->blake_ctx) {
-      blake2b_free(eq->blake_ctx);
+      eq->blake2b_free(eq->blake_ctx);
     }
 
-    eq->blake_ctx = blake2b_clone(ctx);
+    eq->blake_ctx = eq->blake2b_clone(ctx);
     memset(eq->nslots, 0, NBUCKETS * sizeof(au32)); // only nslots[0] needs zeroing
     eq->nsols = 0;
   }
@@ -481,11 +497,12 @@ typedef struct equi equi;
     htlayout htl = htlayout_new(eq, 0);
     const u32 hashbytes = hashsize(0);
     for (u32 block = id; block < NBLOCKS; block += eq->nthreads) {
-      state = blake2b_clone(eq->blake_ctx);
+      state = eq->blake2b_clone(eq->blake_ctx);
       u32 leb = htole32(block);
-      blake2b_update(state, (uchar *)&leb, sizeof(u32));
-      blake2b_finalize(state, hash, HASHOUT);
-      blake2b_free(state);
+      eq->blake2b_update(state, (uchar *)&leb, sizeof(u32));
+      eq->blake2b_finalize(state, hash, HASHOUT);
+      eq->blake2b_free(state);
+
       for (u32 i = 0; i<HASHESPERBLAKE; i++) {
         const uchar *ph = hash + i * WN/8;
 #if BUCKBITS == 16 && RESTBITS == 4

From d07505de712341214026487a3acb47a1c0f6ed26 Mon Sep 17 00:00:00 2001
From: Jack Grigg <jack@electriccoin.co>
Date: Thu, 4 Jan 2024 04:39:00 +0000
Subject: [PATCH 05/17] equihash: Add Rust API for Tromp solver

Co-authored-by: teor <teor@riseup.net>
---
 components/equihash/src/blake2b.rs     |  13 +-
 components/equihash/src/lib.rs         |   3 +
 components/equihash/src/tromp.rs       | 174 +++++++++++++++++++++++++
 components/equihash/src/verify.rs      |   2 +-
 components/equihash/tromp/equi_miner.c |  10 ++
 5 files changed, 196 insertions(+), 6 deletions(-)
 create mode 100644 components/equihash/src/tromp.rs

diff --git a/components/equihash/src/blake2b.rs b/components/equihash/src/blake2b.rs
index 432c4cb79b..75da59d5ab 100644
--- a/components/equihash/src/blake2b.rs
+++ b/components/equihash/src/blake2b.rs
@@ -2,15 +2,18 @@
 // Distributed under the MIT software license, see the accompanying
 // file COPYING or https://www.opensource.org/licenses/mit-license.php .
 
+// This module uses unsafe code for FFI into blake2b.
+#![allow(unsafe_code)]
+
 use blake2b_simd::{State, PERSONALBYTES};
-use libc::{c_uchar, size_t};
+
 use std::ptr;
 use std::slice;
 
 #[no_mangle]
 pub extern "C" fn blake2b_init(
-    output_len: size_t,
-    personalization: *const [c_uchar; PERSONALBYTES],
+    output_len: usize,
+    personalization: *const [u8; PERSONALBYTES],
 ) -> *mut State {
     let personalization = unsafe { personalization.as_ref().unwrap() };
 
@@ -37,7 +40,7 @@ pub extern "C" fn blake2b_free(state: *mut State) {
 }
 
 #[no_mangle]
-pub extern "C" fn blake2b_update(state: *mut State, input: *const c_uchar, input_len: size_t) {
+pub extern "C" fn blake2b_update(state: *mut State, input: *const u8, input_len: usize) {
     let state = unsafe { state.as_mut().unwrap() };
     let input = unsafe { slice::from_raw_parts(input, input_len) };
 
@@ -45,7 +48,7 @@ pub extern "C" fn blake2b_update(state: *mut State, input: *const c_uchar, input
 }
 
 #[no_mangle]
-pub extern "C" fn blake2b_finalize(state: *mut State, output: *mut c_uchar, output_len: size_t) {
+pub extern "C" fn blake2b_finalize(state: *mut State, output: *mut u8, output_len: usize) {
     let state = unsafe { state.as_mut().unwrap() };
     let output = unsafe { slice::from_raw_parts_mut(output, output_len) };
 
diff --git a/components/equihash/src/lib.rs b/components/equihash/src/lib.rs
index cb6131ca3b..0000c20535 100644
--- a/components/equihash/src/lib.rs
+++ b/components/equihash/src/lib.rs
@@ -28,3 +28,6 @@ mod verify;
 mod test_vectors;
 
 pub use verify::{is_valid_solution, Error};
+
+mod blake2b;
+pub mod tromp;
diff --git a/components/equihash/src/tromp.rs b/components/equihash/src/tromp.rs
new file mode 100644
index 0000000000..b8ee0e43e0
--- /dev/null
+++ b/components/equihash/src/tromp.rs
@@ -0,0 +1,174 @@
+//! Rust interface to the tromp equihash solver.
+
+use std::marker::{PhantomData, PhantomPinned};
+use std::slice;
+
+use blake2b_simd::State;
+
+use crate::{blake2b, params::Params, verify};
+
+#[repr(C)]
+struct CEqui {
+    _f: [u8; 0],
+    _m: PhantomData<(*mut u8, PhantomPinned)>,
+}
+
+#[link(name = "equitromp")]
+extern "C" {
+    #[allow(improper_ctypes)]
+    fn equi_new(
+        n_threads: u32,
+        blake2b_clone: extern "C" fn(state: *const State) -> *mut State,
+        blake2b_free: extern "C" fn(state: *mut State),
+        blake2b_update: extern "C" fn(state: *mut State, input: *const u8, input_len: usize),
+        blake2b_finalize: extern "C" fn(state: *mut State, output: *mut u8, output_len: usize),
+    ) -> *mut CEqui;
+    fn equi_free(eq: *mut CEqui);
+    #[allow(improper_ctypes)]
+    fn equi_setstate(eq: *mut CEqui, ctx: *const State);
+    fn equi_clearslots(eq: *mut CEqui);
+    fn equi_digit0(eq: *mut CEqui, id: u32);
+    fn equi_digitodd(eq: *mut CEqui, r: u32, id: u32);
+    fn equi_digiteven(eq: *mut CEqui, r: u32, id: u32);
+    fn equi_digitK(eq: *mut CEqui, id: u32);
+    fn equi_nsols(eq: *const CEqui) -> usize;
+    /// Returns `equi_nsols()` solutions of length `2^K`, in a single memory allocation.
+    fn equi_sols(eq: *const CEqui) -> *const u32;
+}
+
+/// Performs a single equihash solver run with equihash parameters `p` and hash state `curr_state`.
+/// Returns zero or more unique solutions.
+///
+/// # SAFETY
+///
+/// The parameters to this function must match the hard-coded parameters in the C++ code.
+///
+/// This function uses unsafe code for FFI into the tromp solver.
+#[allow(unsafe_code)]
+unsafe fn worker(p: Params, curr_state: &State) -> Vec<Vec<u32>> {
+    // Create solver and initialize it.
+    let eq = equi_new(
+        1,
+        blake2b::blake2b_clone,
+        blake2b::blake2b_free,
+        blake2b::blake2b_update,
+        blake2b::blake2b_finalize,
+    );
+    equi_setstate(eq, curr_state);
+
+    // Initialization done, start algo driver.
+    equi_digit0(eq, 0);
+    equi_clearslots(eq);
+    // SAFETY: caller must supply a `p` instance that matches the hard-coded values in the C code.
+    for r in 1..p.k {
+        if (r & 1) != 0 {
+            equi_digitodd(eq, r, 0)
+        } else {
+            equi_digiteven(eq, r, 0)
+        };
+        equi_clearslots(eq);
+    }
+    // Review Note: nsols is increased here, but only if the solution passes the strictly ordered check.
+    // With 256 nonces, we get to around 6/9 digits strictly ordered.
+    equi_digitK(eq, 0);
+
+    let solutions = {
+        let nsols = equi_nsols(eq);
+        let sols = equi_sols(eq);
+        let solution_len = 1 << p.k;
+
+        // SAFETY:
+        // - caller must supply a `p` instance that matches the hard-coded values in the C code.
+        // - `sols` is a single allocation containing at least `nsols` solutions.
+        // - this slice is a shared ref to the memory in a valid `eq` instance supplied by the caller.
+        let solutions: &[u32] = slice::from_raw_parts(sols, nsols * solution_len);
+
+        let mut chunks = solutions.chunks_exact(solution_len);
+
+        // SAFETY:
+        // - caller must supply a `p` instance that matches the hard-coded values in the C code.
+        // - each solution contains `solution_len` u32 values.
+        // - the temporary slices are shared refs to a valid `eq` instance supplied by the caller.
+        // - the bytes in the shared ref are copied before they are returned.
+        // - dropping `solutions: &[u32]` does not drop the underlying memory owned by `eq`.
+        let mut solutions = (&mut chunks)
+            .map(|solution| solution.to_vec())
+            .collect::<Vec<_>>();
+
+        assert_eq!(chunks.remainder().len(), 0);
+
+        // Sometimes the solver returns identical solutions.
+        solutions.sort();
+        solutions.dedup();
+
+        solutions
+    };
+
+    equi_free(eq);
+
+    solutions
+}
+
+/// Performs multiple equihash solver runs with equihash parameters `200, 9`, initialising the hash with
+/// the supplied partial `input`. Between each run, generates a new nonce of length `N` using the
+/// `next_nonce` function.
+///
+/// Returns zero or more unique solutions.
+pub fn solve_200_9<const N: usize>(
+    input: &[u8],
+    mut next_nonce: impl FnMut() -> Option<[u8; N]>,
+) -> Vec<Vec<u32>> {
+    let p = Params::new(200, 9).expect("should be valid");
+    let mut state = verify::initialise_state(p.n, p.k, p.hash_output());
+    state.update(input);
+
+    loop {
+        let nonce = match next_nonce() {
+            Some(nonce) => nonce,
+            None => break vec![],
+        };
+
+        let mut curr_state = state.clone();
+        curr_state.update(&nonce);
+
+        // SAFETY: the parameters 200,9 match the hard-coded parameters in the C++ code.
+        #[allow(unsafe_code)]
+        let solutions = unsafe { worker(p, &curr_state) };
+        if !solutions.is_empty() {
+            break solutions;
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::solve_200_9;
+
+    #[test]
+    #[allow(clippy::print_stdout)]
+    fn run_solver() {
+        let input = b"Equihash is an asymmetric PoW based on the Generalised Birthday problem.";
+        let mut nonce = [
+            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+            0, 0, 0,
+        ];
+
+        let solutions = solve_200_9(input, || {
+            nonce[0] += 1;
+            if nonce[0] == 0 {
+                None
+            } else {
+                Some(nonce)
+            }
+        });
+
+        if solutions.is_empty() {
+            println!("Found no solutions");
+        } else {
+            println!("Found {} solutions:", solutions.len());
+            for solution in solutions {
+                println!("- {:?}", solution);
+            }
+        }
+    }
+}
diff --git a/components/equihash/src/verify.rs b/components/equihash/src/verify.rs
index 53071ddc01..0cc4d27771 100644
--- a/components/equihash/src/verify.rs
+++ b/components/equihash/src/verify.rs
@@ -114,7 +114,7 @@ impl fmt::Display for Kind {
     }
 }
 
-fn initialise_state(n: u32, k: u32, digest_len: u8) -> Blake2bState {
+pub(crate) fn initialise_state(n: u32, k: u32, digest_len: u8) -> Blake2bState {
     let mut personalization: Vec<u8> = Vec::from("ZcashPoW");
     personalization.write_u32::<LittleEndian>(n).unwrap();
     personalization.write_u32::<LittleEndian>(k).unwrap();
diff --git a/components/equihash/tromp/equi_miner.c b/components/equihash/tromp/equi_miner.c
index 1c4882cd16..f1c767a4ac 100644
--- a/components/equihash/tromp/equi_miner.c
+++ b/components/equihash/tromp/equi_miner.c
@@ -285,6 +285,9 @@ typedef struct equi equi;
     memset(eq->nslots, 0, NBUCKETS * sizeof(au32)); // only nslots[0] needs zeroing
     eq->nsols = 0;
   }
+  void equi_clearslots(equi *eq) {
+    eq->xfull = eq->bfull = eq->hfull = 0;
+  }
   u32 getslot(equi *eq, const u32 r, const u32 bucketi) {
 #ifdef EQUIHASH_TROMP_ATOMIC
     return std::atomic_fetch_add_explicit(&eq->nslots[r&1][bucketi], 1U, std::memory_order_relaxed);
@@ -657,6 +660,13 @@ nc++,       candidate(eq, tree_from_bid(bucketid, s0, s1));
 //printf(" %d candidates ", nc);
   }
 
+  size_t equi_nsols(const equi *eq) {
+    return eq->nsols;
+  }
+  proof *equi_sols(const equi *eq) {
+    return eq->sols;
+  }
+
 typedef struct {
   u32 id;
   pthread_t thread;

From 1b20c15053587d9d7fac582913e712464353115d Mon Sep 17 00:00:00 2001
From: teor <teor@riseup.net>
Date: Fri, 5 Jan 2024 09:47:50 +1000
Subject: [PATCH 06/17] equihash: Add Rust APIs for compressed solutions

---
 components/equihash/src/minimal.rs | 82 +++++++++++++++++++++++++++---
 components/equihash/src/tromp.rs   | 20 +++++++-
 2 files changed, 95 insertions(+), 7 deletions(-)

diff --git a/components/equihash/src/minimal.rs b/components/equihash/src/minimal.rs
index 81da63e657..838907840f 100644
--- a/components/equihash/src/minimal.rs
+++ b/components/equihash/src/minimal.rs
@@ -5,6 +5,49 @@ use byteorder::{BigEndian, ReadBytesExt};
 
 use crate::params::Params;
 
+// Rough translation of CompressArray() from:
+// https://github.com/zcash/zcash/blob/6fdd9f1b81d3b228326c9826fa10696fc516444b/src/crypto/equihash.cpp#L39-L76
+#[cfg(any(feature = "solver", test))]
+fn compress_array(array: &[u8], bit_len: usize, byte_pad: usize) -> Vec<u8> {
+    let index_bytes = (u32::BITS / 8) as usize;
+    assert!(bit_len >= 8);
+    assert!(8 * index_bytes >= 7 + bit_len);
+
+    let in_width: usize = (bit_len + 7) / 8 + byte_pad;
+    let out_len = bit_len * array.len() / (8 * in_width);
+
+    let mut out = Vec::with_capacity(out_len);
+    let bit_len_mask: u32 = (1 << (bit_len as u32)) - 1;
+
+    // The acc_bits least-significant bits of acc_value represent a bit sequence
+    // in big-endian order.
+    let mut acc_bits: usize = 0;
+    let mut acc_value: u32 = 0;
+
+    let mut j: usize = 0;
+    for _i in 0..out_len {
+        // When we have fewer than 8 bits left in the accumulator, read the next
+        // input element.
+        if acc_bits < 8 {
+            acc_value <<= bit_len;
+            for x in byte_pad..in_width {
+                acc_value |= (
+                    // Apply bit_len_mask across byte boundaries
+                    (array[j + x] & ((bit_len_mask >> (8 * (in_width - x - 1))) as u8)) as u32
+                )
+                    .wrapping_shl(8 * (in_width - x - 1) as u32); // Big-endian
+            }
+            j += in_width;
+            acc_bits += bit_len;
+        }
+
+        acc_bits -= 8;
+        out.push((acc_value >> acc_bits) as u8);
+    }
+
+    out
+}
+
 pub(crate) fn expand_array(vin: &[u8], bit_len: usize, byte_pad: usize) -> Vec<u8> {
     assert!(bit_len >= 8);
     assert!(u32::BITS as usize >= 7 + bit_len);
@@ -50,6 +93,31 @@ pub(crate) fn expand_array(vin: &[u8], bit_len: usize, byte_pad: usize) -> Vec<u
     vout
 }
 
+// Rough translation of GetMinimalFromIndices() from:
+// https://github.com/zcash/zcash/blob/6fdd9f1b81d3b228326c9826fa10696fc516444b/src/crypto/equihash.cpp#L130-L145
+#[cfg(any(feature = "solver", test))]
+pub(crate) fn minimal_from_indices(p: Params, indices: &[u32]) -> Vec<u8> {
+    let c_bit_len = p.collision_bit_length();
+    let index_bytes = (u32::BITS / 8) as usize;
+    let digit_bytes = ((c_bit_len + 1) + 7) / 8;
+    assert!(digit_bytes <= index_bytes);
+
+    let len_indices = indices.len() * index_bytes;
+    let byte_pad = index_bytes - digit_bytes;
+
+    // Rough translation of EhIndexToArray(index, array_pointer) from:
+    // https://github.com/zcash/zcash/blob/6fdd9f1b81d3b228326c9826fa10696fc516444b/src/crypto/equihash.cpp#L123-L128
+    //
+    // Big-endian so that lexicographic array comparison is equivalent to integer comparison.
+    let array: Vec<u8> = indices
+        .iter()
+        .flat_map(|index| index.to_be_bytes())
+        .collect();
+    assert_eq!(array.len(), len_indices);
+
+    compress_array(&array, c_bit_len + 1, byte_pad)
+}
+
 /// Returns `None` if the parameters are invalid for this minimal encoding.
 pub(crate) fn indices_from_minimal(p: Params, minimal: &[u8]) -> Option<Vec<u32>> {
     let c_bit_len = p.collision_bit_length();
@@ -76,11 +144,14 @@ pub(crate) fn indices_from_minimal(p: Params, minimal: &[u8]) -> Option<Vec<u32>
 
 #[cfg(test)]
 mod tests {
-    use super::{expand_array, indices_from_minimal, Params};
+    use crate::minimal::minimal_from_indices;
+
+    use super::{compress_array, expand_array, indices_from_minimal, Params};
 
     #[test]
-    fn array_expansion() {
+    fn array_compression_and_expansion() {
         let check_array = |(bit_len, byte_pad), compact, expanded| {
+            assert_eq!(compress_array(expanded, bit_len, byte_pad), compact);
             assert_eq!(expand_array(compact, bit_len, byte_pad), expanded);
         };
 
@@ -149,10 +220,9 @@ mod tests {
     #[test]
     fn minimal_solution_repr() {
         let check_repr = |minimal, indices| {
-            assert_eq!(
-                indices_from_minimal(Params { n: 80, k: 3 }, minimal).unwrap(),
-                indices,
-            );
+            let p = Params { n: 80, k: 3 };
+            assert_eq!(minimal_from_indices(p, indices), minimal);
+            assert_eq!(indices_from_minimal(p, minimal).unwrap(), indices);
         };
 
         // The solutions here are not intended to be valid.
diff --git a/components/equihash/src/tromp.rs b/components/equihash/src/tromp.rs
index b8ee0e43e0..d74099e0ef 100644
--- a/components/equihash/src/tromp.rs
+++ b/components/equihash/src/tromp.rs
@@ -5,7 +5,7 @@ use std::slice;
 
 use blake2b_simd::State;
 
-use crate::{blake2b, params::Params, verify};
+use crate::{blake2b, minimal::minimal_from_indices, params::Params, verify};
 
 #[repr(C)]
 struct CEqui {
@@ -140,6 +140,24 @@ pub fn solve_200_9<const N: usize>(
     }
 }
 
+/// Performs multiple equihash solver runs with equihash parameters `200, 9`, initialising the hash with
+/// the supplied partial `input`. Between each run, generates a new nonce of length `N` using the
+/// `next_nonce` function.
+///
+/// Returns zero or more unique compressed solutions.
+pub fn solve_200_9_compressed<const N: usize>(
+    input: &[u8],
+    next_nonce: impl FnMut() -> Option<[u8; N]>,
+) -> Vec<Vec<u8>> {
+    let p = Params::new(200, 9).expect("should be valid");
+    let solutions = solve_200_9(input, next_nonce);
+
+    solutions
+        .iter()
+        .map(|solution| minimal_from_indices(p, solution))
+        .collect()
+}
+
 #[cfg(test)]
 mod tests {
     use super::solve_200_9;

From fe3b269f3a97d32da5bbb5e981dc3f794ba122eb Mon Sep 17 00:00:00 2001
From: teor <teor@riseup.net>
Date: Fri, 5 Jan 2024 09:47:50 +1000
Subject: [PATCH 07/17] equihash: Verify compressed solutions in tests

---
 Cargo.lock                       |  1 +
 components/equihash/Cargo.toml   |  3 +++
 components/equihash/src/tromp.rs | 42 +++++++++++++++++++++++---------
 3 files changed, 34 insertions(+), 12 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 0a62dc38a3..2e77c75da1 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -660,6 +660,7 @@ dependencies = [
  "blake2b_simd",
  "byteorder",
  "cc",
+ "hex",
 ]
 
 [[package]]
diff --git a/components/equihash/Cargo.toml b/components/equihash/Cargo.toml
index 5998e54cd0..a4d3403134 100644
--- a/components/equihash/Cargo.toml
+++ b/components/equihash/Cargo.toml
@@ -16,5 +16,8 @@ byteorder = "1"
 [build-dependencies]
 cc = "1"
 
+[dev-dependencies]
+hex = "0.4"
+
 [lib]
 bench = false
diff --git a/components/equihash/src/tromp.rs b/components/equihash/src/tromp.rs
index d74099e0ef..74b431c0f0 100644
--- a/components/equihash/src/tromp.rs
+++ b/components/equihash/src/tromp.rs
@@ -160,32 +160,50 @@ pub fn solve_200_9_compressed<const N: usize>(
 
 #[cfg(test)]
 mod tests {
-    use super::solve_200_9;
+    use super::solve_200_9_compressed;
 
     #[test]
     #[allow(clippy::print_stdout)]
     fn run_solver() {
         let input = b"Equihash is an asymmetric PoW based on the Generalised Birthday problem.";
-        let mut nonce = [
+        let mut nonce: [u8; 32] = [
             0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
             0, 0, 0,
         ];
+        let mut nonces = 0..=32_u32;
+        let nonce_count = nonces.clone().count();
 
-        let solutions = solve_200_9(input, || {
-            nonce[0] += 1;
-            if nonce[0] == 0 {
-                None
-            } else {
-                Some(nonce)
-            }
+        let solutions = solve_200_9_compressed(input, || {
+            let variable_nonce = nonces.next()?;
+            println!("Using variable nonce [0..4] of {}", variable_nonce);
+
+            let variable_nonce = variable_nonce.to_le_bytes();
+            nonce[0] = variable_nonce[0];
+            nonce[1] = variable_nonce[1];
+            nonce[2] = variable_nonce[2];
+            nonce[3] = variable_nonce[3];
+
+            Some(nonce)
         });
 
         if solutions.is_empty() {
-            println!("Found no solutions");
+            // Expected solution rate is documented at:
+            // https://github.com/tromp/equihash/blob/master/README.md
+            panic!("Found no solutions after {nonce_count} runs, expected 1.88 solutions per run",);
         } else {
             println!("Found {} solutions:", solutions.len());
-            for solution in solutions {
-                println!("- {:?}", solution);
+            for (sol_num, solution) in solutions.iter().enumerate() {
+                println!("Validating solution {sol_num}:-\n{}", hex::encode(solution));
+                crate::is_valid_solution(200, 9, input, &nonce, solution).unwrap_or_else(|error| {
+                    panic!(
+                        "unexpected invalid equihash 200, 9 solution:\n\
+                             error: {error:?}\n\
+                             input: {input:?}\n\
+                             nonce: {nonce:?}\n\
+                             solution: {solution:?}"
+                    )
+                });
+                println!("Solution {sol_num} is valid!\n");
             }
         }
     }

From 463e7d9958da89516318218afad6117ed22126a9 Mon Sep 17 00:00:00 2001
From: teor <teor@riseup.net>
Date: Fri, 5 Jan 2024 10:51:38 +1000
Subject: [PATCH 08/17] equihash: Move allocation out of the loop

---
 components/equihash/src/tromp.rs | 52 +++++++++++++++++++++++---------
 1 file changed, 37 insertions(+), 15 deletions(-)

diff --git a/components/equihash/src/tromp.rs b/components/equihash/src/tromp.rs
index 74b431c0f0..e5f2157ff5 100644
--- a/components/equihash/src/tromp.rs
+++ b/components/equihash/src/tromp.rs
@@ -45,15 +45,10 @@ extern "C" {
 ///
 /// This function uses unsafe code for FFI into the tromp solver.
 #[allow(unsafe_code)]
-unsafe fn worker(p: Params, curr_state: &State) -> Vec<Vec<u32>> {
-    // Create solver and initialize it.
-    let eq = equi_new(
-        1,
-        blake2b::blake2b_clone,
-        blake2b::blake2b_free,
-        blake2b::blake2b_update,
-        blake2b::blake2b_finalize,
-    );
+unsafe fn worker(eq: *mut CEqui, p: Params, curr_state: &State) -> Vec<Vec<u32>> {
+    // SAFETY: caller must supply a valid `eq` instance.
+    //
+    // Review Note: nsols is set to zero in C++ here
     equi_setstate(eq, curr_state);
 
     // Initialization done, start algo driver.
@@ -104,8 +99,6 @@ unsafe fn worker(p: Params, curr_state: &State) -> Vec<Vec<u32>> {
         solutions
     };
 
-    equi_free(eq);
-
     solutions
 }
 
@@ -122,22 +115,51 @@ pub fn solve_200_9<const N: usize>(
     let mut state = verify::initialise_state(p.n, p.k, p.hash_output());
     state.update(input);
 
-    loop {
+    // Create solver and initialize it.
+    //
+    // # SAFETY
+    // - the parameters 200,9 match the hard-coded parameters in the C++ code.
+    // - tromp is compiled without multi-threading support, so each instance can only support 1 thread.
+    // - the blake2b functions are in the correct order in Rust and C++ initializers.
+    #[allow(unsafe_code)]
+    let eq = unsafe {
+        equi_new(
+            1,
+            blake2b::blake2b_clone,
+            blake2b::blake2b_free,
+            blake2b::blake2b_update,
+            blake2b::blake2b_finalize,
+        )
+    };
+
+    let solutions = loop {
         let nonce = match next_nonce() {
             Some(nonce) => nonce,
             None => break vec![],
         };
 
         let mut curr_state = state.clone();
+        // Review Note: these hashes are changing when the nonce changes
         curr_state.update(&nonce);
 
-        // SAFETY: the parameters 200,9 match the hard-coded parameters in the C++ code.
+        // SAFETY:
+        // - the parameters 200,9 match the hard-coded parameters in the C++ code.
+        // - the eq instance is initilized above.
         #[allow(unsafe_code)]
-        let solutions = unsafe { worker(p, &curr_state) };
+        let solutions = unsafe { worker(eq, p, &curr_state) };
         if !solutions.is_empty() {
             break solutions;
         }
-    }
+    };
+
+    // SAFETY:
+    // - the eq instance is initilized above, and not used after this point.
+    #[allow(unsafe_code)]
+    unsafe {
+        equi_free(eq)
+    };
+
+    solutions
 }
 
 /// Performs multiple equihash solver runs with equihash parameters `200, 9`, initialising the hash with

From 3c78bf60a14d7de0e62613687aca7170a8a4adaa Mon Sep 17 00:00:00 2001
From: teor <teor@riseup.net>
Date: Mon, 8 Jan 2024 07:40:58 +1000
Subject: [PATCH 09/17] equihash: Set C pointers to NULL after freeing them to
 avoid double-frees

Also includes some redundant cleanup code for defense in depth.
---
 components/equihash/tromp/equi_miner.c | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/components/equihash/tromp/equi_miner.c b/components/equihash/tromp/equi_miner.c
index f1c767a4ac..a57bca34a2 100644
--- a/components/equihash/tromp/equi_miner.c
+++ b/components/equihash/tromp/equi_miner.c
@@ -204,8 +204,22 @@ typedef struct htalloc htalloc;
         hta->trees1[r/2]  = (bucket1 *)(hta->heap1 + r/2);
   }
   void dealloctrees(htalloc *hta) {
+    if (hta == NULL) {
+      return;
+    }
+
     free(hta->heap0);
     free(hta->heap1);
+    // Avoid use-after-free and double-free
+    hta->heap0 = NULL;
+    hta->heap1 = NULL;
+
+    for (int r=0; r<WK; r++)
+      if ((r&1) == 0)
+        hta->trees0[r/2]  = NULL;
+      else
+        hta->trees1[r/2]  = NULL;
+    hta->alloced = 0;
   }
   void *htalloc_alloc(htalloc *hta, const u32 n, const u32 sz) {
     void *mem  = calloc(n, sz);
@@ -268,11 +282,19 @@ typedef struct equi equi;
     return eq;
   }
   void equi_free(equi *eq) {
+    if (eq == NULL) {
+      return;
+    }
+
     dealloctrees(&eq->hta);
 
     free(eq->nslots);
     free(eq->sols);
     eq->blake2b_free(eq->blake_ctx);
+    // Avoid use-after-free and double-free
+    eq->nslots = NULL;
+    eq->sols = NULL;
+    eq->blake_ctx = NULL;
 
     free(eq);
   }
@@ -505,6 +527,8 @@ typedef struct equi equi;
       eq->blake2b_update(state, (uchar *)&leb, sizeof(u32));
       eq->blake2b_finalize(state, hash, HASHOUT);
       eq->blake2b_free(state);
+      // Avoid use-after-free and double-free
+      state = NULL;
 
       for (u32 i = 0; i<HASHESPERBLAKE; i++) {
         const uchar *ph = hash + i * WN/8;

From 5f77bd79dbaa85285418c9e21cbd7f0fdbf0c21d Mon Sep 17 00:00:00 2001
From: teor <teor@riseup.net>
Date: Mon, 8 Jan 2024 08:16:02 +1000
Subject: [PATCH 10/17] equihash: Place solver behind a feature flag

---
 components/equihash/Cargo.toml | 8 +++++++-
 components/equihash/build.rs   | 6 ++++++
 components/equihash/src/lib.rs | 2 ++
 3 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/components/equihash/Cargo.toml b/components/equihash/Cargo.toml
index a4d3403134..e2a9e0b054 100644
--- a/components/equihash/Cargo.toml
+++ b/components/equihash/Cargo.toml
@@ -9,12 +9,18 @@ license = "MIT OR Apache-2.0"
 edition = "2021"
 rust-version = "1.56.1"
 
+[features]
+default = []
+
+## Builds the C++ tromp solver and Rust FFI layer.
+solver = ["dep:cc"]
+
 [dependencies]
 blake2b_simd = "1"
 byteorder = "1"
 
 [build-dependencies]
-cc = "1"
+cc = { version = "1", optional = true }
 
 [dev-dependencies]
 hex = "0.4"
diff --git a/components/equihash/build.rs b/components/equihash/build.rs
index 86c77774c1..74122e450a 100644
--- a/components/equihash/build.rs
+++ b/components/equihash/build.rs
@@ -1,6 +1,12 @@
 //! Build script for the equihash tromp solver in C.
 
 fn main() {
+    #[cfg(feature = "solver")]
+    build_tromp_solver();
+}
+
+#[cfg(feature = "solver")]
+fn build_tromp_solver() {
     cc::Build::new()
         .include("tromp/")
         .file("tromp/equi_miner.c")
diff --git a/components/equihash/src/lib.rs b/components/equihash/src/lib.rs
index 0000c20535..e0ddf8c5f3 100644
--- a/components/equihash/src/lib.rs
+++ b/components/equihash/src/lib.rs
@@ -29,5 +29,7 @@ mod test_vectors;
 
 pub use verify::{is_valid_solution, Error};
 
+#[cfg(feature = "solver")]
 mod blake2b;
+#[cfg(feature = "solver")]
 pub mod tromp;

From d7ccd07d0b7ea3718f81568551966ee2176dc219 Mon Sep 17 00:00:00 2001
From: teor <teor@riseup.net>
Date: Thu, 11 Jan 2024 09:14:19 +1000
Subject: [PATCH 11/17] equihash: Ensure returned compressed solutions are
 unique

---
 components/equihash/src/tromp.rs | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/components/equihash/src/tromp.rs b/components/equihash/src/tromp.rs
index e5f2157ff5..ecd0658278 100644
--- a/components/equihash/src/tromp.rs
+++ b/components/equihash/src/tromp.rs
@@ -174,10 +174,16 @@ pub fn solve_200_9_compressed<const N: usize>(
     let p = Params::new(200, 9).expect("should be valid");
     let solutions = solve_200_9(input, next_nonce);
 
-    solutions
+    let mut solutions: Vec<Vec<u8>> = solutions
         .iter()
         .map(|solution| minimal_from_indices(p, solution))
-        .collect()
+        .collect();
+
+    // Just in case the solver returns solutions that become the same when compressed.
+    solutions.sort();
+    solutions.dedup();
+
+    solutions
 }
 
 #[cfg(test)]

From 989f40ee9bceef17195238917911bd6effcd8ad2 Mon Sep 17 00:00:00 2001
From: teor <teor@riseup.net>
Date: Fri, 12 Jan 2024 07:57:42 +1000
Subject: [PATCH 12/17] equihash: Add a portable endian.h for `htole32()` on
 macOS and Windows

Source: mikepb/endian.h@0f885cbba627efe9b8f763e1c2872e904fe0c0b1
License: Public Domain (or "BSD OR MIT OR Apache-2.0")
---
 components/equihash/tromp/equi_miner.c      |   4 +
 components/equihash/tromp/portable_endian.h | 128 ++++++++++++++++++++
 2 files changed, 132 insertions(+)
 create mode 100644 components/equihash/tromp/portable_endian.h

diff --git a/components/equihash/tromp/equi_miner.c b/components/equihash/tromp/equi_miner.c
index a57bca34a2..b1b0ae9a19 100644
--- a/components/equihash/tromp/equi_miner.c
+++ b/components/equihash/tromp/equi_miner.c
@@ -22,6 +22,10 @@
 #define ZCASH_POW_TROMP_EQUI_MINER_H
 
 #include "equi.h"
+
+// Provides htole32() on macOS and Windows
+#include "portable_endian.h"
+
 #include <stdio.h>
 #include <stdlib.h>
 #include <pthread.h>
diff --git a/components/equihash/tromp/portable_endian.h b/components/equihash/tromp/portable_endian.h
new file mode 100644
index 0000000000..74575fcd18
--- /dev/null
+++ b/components/equihash/tromp/portable_endian.h
@@ -0,0 +1,128 @@
+//
+// endian.h
+//
+// https://gist.github.com/panzi/6856583
+//
+// I, Mathias Panzenböck, place this file hereby into the public domain. Use
+// it at your own risk for whatever you like. In case there are
+// jurisdictions that don't support putting things in the public domain you
+// can also consider it to be "dual licensed" under the BSD, MIT and Apache
+// licenses, if you want to. This code is trivial anyway. Consider it an
+// example on how to get the endian conversion functions on different
+// platforms.
+
+// Downloaded from https://raw.githubusercontent.com/mikepb/endian.h/master/endian.h
+// on 12 January 2024.
+
+#ifndef PORTABLE_ENDIAN_H__
+#define PORTABLE_ENDIAN_H__
+
+#if (defined(_WIN16) || defined(_WIN32) || defined(_WIN64)) && !defined(__WINDOWS__)
+
+#	define __WINDOWS__
+
+#endif
+
+#if defined(__linux__) || defined(__CYGWIN__)
+
+#	include <endian.h>
+
+#elif defined(__APPLE__)
+
+#	include <libkern/OSByteOrder.h>
+
+#	define htobe16(x) OSSwapHostToBigInt16(x)
+#	define htole16(x) OSSwapHostToLittleInt16(x)
+#	define be16toh(x) OSSwapBigToHostInt16(x)
+#	define le16toh(x) OSSwapLittleToHostInt16(x)
+
+#	define htobe32(x) OSSwapHostToBigInt32(x)
+#	define htole32(x) OSSwapHostToLittleInt32(x)
+#	define be32toh(x) OSSwapBigToHostInt32(x)
+#	define le32toh(x) OSSwapLittleToHostInt32(x)
+
+#	define htobe64(x) OSSwapHostToBigInt64(x)
+#	define htole64(x) OSSwapHostToLittleInt64(x)
+#	define be64toh(x) OSSwapBigToHostInt64(x)
+#	define le64toh(x) OSSwapLittleToHostInt64(x)
+
+#	define __BYTE_ORDER    BYTE_ORDER
+#	define __BIG_ENDIAN    BIG_ENDIAN
+#	define __LITTLE_ENDIAN LITTLE_ENDIAN
+#	define __PDP_ENDIAN    PDP_ENDIAN
+
+#elif defined(__OpenBSD__)
+
+#	include <sys/endian.h>
+
+#elif defined(__NetBSD__) || defined(__FreeBSD__) || defined(__DragonFly__)
+
+#	include <sys/endian.h>
+
+#	define be16toh(x) betoh16(x)
+#	define le16toh(x) letoh16(x)
+
+#	define be32toh(x) betoh32(x)
+#	define le32toh(x) letoh32(x)
+
+#	define be64toh(x) betoh64(x)
+#	define le64toh(x) letoh64(x)
+
+#elif defined(__WINDOWS__)
+
+#	include <winsock2.h>
+#	include <sys/param.h>
+
+#	if BYTE_ORDER == LITTLE_ENDIAN
+
+#		define htobe16(x) htons(x)
+#		define htole16(x) (x)
+#		define be16toh(x) ntohs(x)
+#		define le16toh(x) (x)
+
+#		define htobe32(x) htonl(x)
+#		define htole32(x) (x)
+#		define be32toh(x) ntohl(x)
+#		define le32toh(x) (x)
+
+#		define htobe64(x) htonll(x)
+#		define htole64(x) (x)
+#		define be64toh(x) ntohll(x)
+#		define le64toh(x) (x)
+
+#	elif BYTE_ORDER == BIG_ENDIAN
+
+		/* that would be xbox 360 */
+#		define htobe16(x) (x)
+#		define htole16(x) __builtin_bswap16(x)
+#		define be16toh(x) (x)
+#		define le16toh(x) __builtin_bswap16(x)
+
+#		define htobe32(x) (x)
+#		define htole32(x) __builtin_bswap32(x)
+#		define be32toh(x) (x)
+#		define le32toh(x) __builtin_bswap32(x)
+
+#		define htobe64(x) (x)
+#		define htole64(x) __builtin_bswap64(x)
+#		define be64toh(x) (x)
+#		define le64toh(x) __builtin_bswap64(x)
+
+#	else
+
+#		error byte order not supported
+
+#	endif
+
+#	define __BYTE_ORDER    BYTE_ORDER
+#	define __BIG_ENDIAN    BIG_ENDIAN
+#	define __LITTLE_ENDIAN LITTLE_ENDIAN
+#	define __PDP_ENDIAN    PDP_ENDIAN
+
+#else
+
+#	error platform not supported
+
+#endif
+
+#endif

From b737d0fe26967f961abf1dcfdd8669dd8daabc15 Mon Sep 17 00:00:00 2001
From: teor <teor@riseup.net>
Date: Fri, 12 Jan 2024 08:05:19 +1000
Subject: [PATCH 13/17] equihash: Remove unused thread support to enable
 Windows compilation

---
 components/equihash/src/tromp.rs        |  2 -
 components/equihash/tromp/equi.h        |  4 --
 components/equihash/tromp/equi_miner.c  | 41 +++-----------
 components/equihash/tromp/osx_barrier.h | 75 -------------------------
 4 files changed, 8 insertions(+), 114 deletions(-)
 delete mode 100644 components/equihash/tromp/osx_barrier.h

diff --git a/components/equihash/src/tromp.rs b/components/equihash/src/tromp.rs
index ecd0658278..af53b4a596 100644
--- a/components/equihash/src/tromp.rs
+++ b/components/equihash/src/tromp.rs
@@ -17,7 +17,6 @@ struct CEqui {
 extern "C" {
     #[allow(improper_ctypes)]
     fn equi_new(
-        n_threads: u32,
         blake2b_clone: extern "C" fn(state: *const State) -> *mut State,
         blake2b_free: extern "C" fn(state: *mut State),
         blake2b_update: extern "C" fn(state: *mut State, input: *const u8, input_len: usize),
@@ -124,7 +123,6 @@ pub fn solve_200_9<const N: usize>(
     #[allow(unsafe_code)]
     let eq = unsafe {
         equi_new(
-            1,
             blake2b::blake2b_clone,
             blake2b::blake2b_free,
             blake2b::blake2b_update,
diff --git a/components/equihash/tromp/equi.h b/components/equihash/tromp/equi.h
index 2bf0794edf..7b3969f52f 100644
--- a/components/equihash/tromp/equi.h
+++ b/components/equihash/tromp/equi.h
@@ -4,10 +4,6 @@
 #ifndef ZCASH_POW_TROMP_EQUI_H
 #define ZCASH_POW_TROMP_EQUI_H
 
-#ifdef __APPLE__
-#include "osx_barrier.h"
-#endif
-
 #include <stdbool.h> // for type bool
 #include <stdint.h> // for types uint32_t,uint64_t
 #include <string.h> // for functions memset
diff --git a/components/equihash/tromp/equi_miner.c b/components/equihash/tromp/equi_miner.c
index b1b0ae9a19..d2682e6747 100644
--- a/components/equihash/tromp/equi_miner.c
+++ b/components/equihash/tromp/equi_miner.c
@@ -28,7 +28,6 @@
 
 #include <stdio.h>
 #include <stdlib.h>
-#include <pthread.h>
 #include <assert.h>
 
 typedef uint16_t u16;
@@ -248,16 +247,13 @@ struct equi {
   bsizes *nslots; // PUT IN BUCKET STRUCT
   proof *sols;
   au32 nsols;
-  u32 nthreads;
   u32 xfull;
   u32 hfull;
   u32 bfull;
-  pthread_barrier_t barry;
 };
 typedef struct equi equi;
   void equi_clearslots(equi *eq);
   equi *equi_new(
-    const u32 n_threads,
     blake2b_clone blake2b_clone,
     blake2b_free blake2b_free,
     blake2b_update blake2b_update,
@@ -265,15 +261,11 @@ typedef struct equi equi;
   ) {
     assert(sizeof(hashunit) == 4);
     equi *eq = malloc(sizeof(equi));
-    eq->nthreads = n_threads;
     eq->blake2b_clone = blake2b_clone;
     eq->blake2b_free = blake2b_free;
     eq->blake2b_update = blake2b_update;
     eq->blake2b_finalize = blake2b_finalize;
 
-    const int err = pthread_barrier_init(&eq->barry, NULL, eq->nthreads);
-    assert(!err);
-
     alloctrees(&eq->hta);
     eq->nslots = (bsizes *)htalloc_alloc(&eq->hta, 2 * NBUCKETS, sizeof(au32));
     eq->sols   =  (proof *)htalloc_alloc(&eq->hta, MAXSOLS, sizeof(proof));
@@ -409,7 +401,7 @@ typedef struct equi equi;
     u32 nextbo;
   };
   typedef struct htlayout htlayout;
-  
+
     htlayout htlayout_new(equi *eq, u32 r) {
       htlayout htl;
       htl.hta = eq->hta;
@@ -525,7 +517,7 @@ typedef struct equi equi;
     BLAKE2bState* state;
     htlayout htl = htlayout_new(eq, 0);
     const u32 hashbytes = hashsize(0);
-    for (u32 block = id; block < NBLOCKS; block += eq->nthreads) {
+    for (u32 block = id; block < NBLOCKS; block++) {
       state = eq->blake2b_clone(eq->blake_ctx);
       u32 leb = htole32(block);
       eq->blake2b_update(state, (uchar *)&leb, sizeof(u32));
@@ -561,11 +553,11 @@ typedef struct equi equi;
       }
     }
   }
-  
+
   void equi_digitodd(equi *eq, const u32 r, const u32 id) {
     htlayout htl = htlayout_new(eq, r);
     collisiondata cd;
-    for (u32 bucketid=id; bucketid < NBUCKETS; bucketid += eq->nthreads) {
+    for (u32 bucketid=id; bucketid < NBUCKETS; bucketid++) {
       collisiondata_clear(&cd);
       slot0 *buck = htl.hta.trees0[(r-1)/2][bucketid]; // optimize by updating previous buck?!
       u32 bsize = getnslots(eq, r-1, bucketid);       // optimize by putting bucketsize with block?!
@@ -613,11 +605,11 @@ typedef struct equi equi;
       }
     }
   }
-  
+
   void equi_digiteven(equi *eq, const u32 r, const u32 id) {
     htlayout htl = htlayout_new(eq, r);
     collisiondata cd;
-    for (u32 bucketid=id; bucketid < NBUCKETS; bucketid += eq->nthreads) {
+    for (u32 bucketid=id; bucketid < NBUCKETS; bucketid++) {
       collisiondata_clear(&cd);
       slot1 *buck = htl.hta.trees1[(r-1)/2][bucketid]; // OPTIMIZE BY UPDATING PREVIOUS
       u32 bsize = getnslots(eq, r-1, bucketid);
@@ -665,12 +657,12 @@ typedef struct equi equi;
       }
     }
   }
-  
+
   void equi_digitK(equi *eq, const u32 id) {
     collisiondata cd;
     htlayout htl = htlayout_new(eq, WK);
 u32 nc = 0;
-    for (u32 bucketid = id; bucketid < NBUCKETS; bucketid += eq->nthreads) {
+    for (u32 bucketid = id; bucketid < NBUCKETS; bucketid++) {
       collisiondata_clear(&cd);
       slot0 *buck = htl.hta.trees0[(WK-1)/2][bucketid];
       u32 bsize = getnslots(eq, WK-1, bucketid);
@@ -697,40 +689,26 @@ nc++,       candidate(eq, tree_from_bid(bucketid, s0, s1));
 
 typedef struct {
   u32 id;
-  pthread_t thread;
   equi *eq;
 } thread_ctx;
 
-void barrier(pthread_barrier_t *barry) {
-  const int rc = pthread_barrier_wait(barry);
-  if (rc != 0 && rc != PTHREAD_BARRIER_SERIAL_THREAD) {
-//    printf("Could not wait on barrier\n");
-    pthread_exit(NULL);
-  }
-}
-
 void *worker(void *vp) {
   thread_ctx *tp = (thread_ctx *)vp;
   equi *eq = tp->eq;
 
 //  if (tp->id == 0)
 //    printf("Digit 0\n");
-  barrier(&eq->barry);
   equi_digit0(eq, tp->id);
-  barrier(&eq->barry);
   if (tp->id == 0) {
     equi_clearslots(eq);
 #ifdef EQUIHASH_SHOW_BUCKET_SIZES
     showbsizes(eq, 0);
 #endif
   }
-  barrier(&eq->barry);
   for (u32 r = 1; r < WK; r++) {
 //    if (tp->id == 0)
 //      printf("Digit %d", r);
-    barrier(&eq->barry);
     r&1 ? equi_digitodd(eq, r, tp->id) : equi_digiteven(eq, r, tp->id);
-    barrier(&eq->barry);
     if (tp->id == 0) {
 //      printf(" x%d b%d h%d\n", eq->xfull, eq->bfull, eq->hfull);
       equi_clearslots(eq);
@@ -738,13 +716,10 @@ void *worker(void *vp) {
       showbsizes(eq, r);
 #endif
     }
-    barrier(&eq->barry);
   }
 //  if (tp->id == 0)
 //    printf("Digit %d\n", WK);
   equi_digitK(eq, tp->id);
-  barrier(&eq->barry);
-  pthread_exit(NULL);
   return 0;
 }
 
diff --git a/components/equihash/tromp/osx_barrier.h b/components/equihash/tromp/osx_barrier.h
deleted file mode 100644
index 659c40bf59..0000000000
--- a/components/equihash/tromp/osx_barrier.h
+++ /dev/null
@@ -1,75 +0,0 @@
-#ifndef ZCASH_POW_TROMP_OSX_BARRIER_H
-#define ZCASH_POW_TROMP_OSX_BARRIER_H
-
-#ifdef __APPLE__
-
-#ifndef PTHREAD_BARRIER_H_
-#define PTHREAD_BARRIER_H_
-
-#include <pthread.h>
-#include <errno.h>
-
-typedef int pthread_barrierattr_t;
-#define PTHREAD_BARRIER_SERIAL_THREAD 1
-
-typedef struct
-{
-    pthread_mutex_t mutex;
-    pthread_cond_t cond;
-    int count;
-    int tripCount;
-} pthread_barrier_t;
-
-
-int pthread_barrier_init(pthread_barrier_t *barrier, const pthread_barrierattr_t *attr, unsigned int count)
-{
-    if(count == 0)
-    {
-        errno = EINVAL;
-        return -1;
-    }
-    if(pthread_mutex_init(&barrier->mutex, 0) < 0)
-    {
-        return -1;
-    }
-    if(pthread_cond_init(&barrier->cond, 0) < 0)
-    {
-        pthread_mutex_destroy(&barrier->mutex);
-        return -1;
-    }
-    barrier->tripCount = count;
-    barrier->count = 0;
-
-    return 0;
-}
-
-int pthread_barrier_destroy(pthread_barrier_t *barrier)
-{
-    pthread_cond_destroy(&barrier->cond);
-    pthread_mutex_destroy(&barrier->mutex);
-    return 0;
-}
-
-int pthread_barrier_wait(pthread_barrier_t *barrier)
-{
-    pthread_mutex_lock(&barrier->mutex);
-    ++(barrier->count);
-    if(barrier->count >= barrier->tripCount)
-    {
-        barrier->count = 0;
-        pthread_cond_broadcast(&barrier->cond);
-        pthread_mutex_unlock(&barrier->mutex);
-        return PTHREAD_BARRIER_SERIAL_THREAD;
-    }
-    else
-    {
-        pthread_cond_wait(&barrier->cond, &(barrier->mutex));
-        pthread_mutex_unlock(&barrier->mutex);
-        return 0;
-    }
-}
-
-#endif // PTHREAD_BARRIER_H_
-#endif // __APPLE__
-
-#endif // ZCASH_POW_TROMP_OSX_BARRIER_H

From 2bd7bc8f8e201bb19b6dc7d9b839b9d074f556d8 Mon Sep 17 00:00:00 2001
From: teor <teor@riseup.net>
Date: Fri, 12 Jan 2024 08:22:48 +1000
Subject: [PATCH 14/17] equihash: Don't import a header that's missing in
 Windows CI

---
 components/equihash/tromp/portable_endian.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/components/equihash/tromp/portable_endian.h b/components/equihash/tromp/portable_endian.h
index 74575fcd18..4a71ce7a7a 100644
--- a/components/equihash/tromp/portable_endian.h
+++ b/components/equihash/tromp/portable_endian.h
@@ -71,7 +71,9 @@
 #elif defined(__WINDOWS__)
 
 #	include <winsock2.h>
-#	include <sys/param.h>
+
+// Not available in librustzcash CI
+//#	include <sys/param.h>
 
 #	if BYTE_ORDER == LITTLE_ENDIAN
 

From 9391e65c2167032bbe9a6502c2be8c5b6891761b Mon Sep 17 00:00:00 2001
From: teor <teor@riseup.net>
Date: Fri, 12 Jan 2024 11:12:25 +1000
Subject: [PATCH 15/17] equihash: Clear slots when setting the hash state

The equivalent change is made to the C worker, which is unused.
---
 components/equihash/tromp/equi_miner.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/components/equihash/tromp/equi_miner.c b/components/equihash/tromp/equi_miner.c
index d2682e6747..17dfb89373 100644
--- a/components/equihash/tromp/equi_miner.c
+++ b/components/equihash/tromp/equi_miner.c
@@ -301,6 +301,7 @@ typedef struct equi equi;
 
     eq->blake_ctx = eq->blake2b_clone(ctx);
     memset(eq->nslots, 0, NBUCKETS * sizeof(au32)); // only nslots[0] needs zeroing
+    equi_clearslots(eq);
     eq->nsols = 0;
   }
   void equi_clearslots(equi *eq) {
@@ -698,6 +699,9 @@ void *worker(void *vp) {
 
 //  if (tp->id == 0)
 //    printf("Digit 0\n");
+  if (tp->id == 0) {
+    equi_clearslots(eq);
+  }
   equi_digit0(eq, tp->id);
   if (tp->id == 0) {
     equi_clearslots(eq);

From 76131db25a2772c5e139d066ec68975f26d988b5 Mon Sep 17 00:00:00 2001
From: teor <teor@riseup.net>
Date: Fri, 5 Jan 2024 10:50:28 +1000
Subject: [PATCH 16/17] Add commented-out prints of solution candidates for
 debugging

---
 components/equihash/src/tromp.rs       | 20 ++++++++++++++++++++
 components/equihash/tromp/equi_miner.c | 11 +++++++++--
 2 files changed, 29 insertions(+), 2 deletions(-)

diff --git a/components/equihash/src/tromp.rs b/components/equihash/src/tromp.rs
index af53b4a596..37daa6d741 100644
--- a/components/equihash/src/tromp.rs
+++ b/components/equihash/src/tromp.rs
@@ -44,6 +44,7 @@ extern "C" {
 ///
 /// This function uses unsafe code for FFI into the tromp solver.
 #[allow(unsafe_code)]
+#[allow(clippy::print_stdout)]
 unsafe fn worker(eq: *mut CEqui, p: Params, curr_state: &State) -> Vec<Vec<u32>> {
     // SAFETY: caller must supply a valid `eq` instance.
     //
@@ -70,6 +71,7 @@ unsafe fn worker(eq: *mut CEqui, p: Params, curr_state: &State) -> Vec<Vec<u32>>
         let nsols = equi_nsols(eq);
         let sols = equi_sols(eq);
         let solution_len = 1 << p.k;
+        //println!("{nsols} solutions of length {solution_len} at {sols:?}");
 
         // SAFETY:
         // - caller must supply a `p` instance that matches the hard-coded values in the C code.
@@ -77,6 +79,13 @@ unsafe fn worker(eq: *mut CEqui, p: Params, curr_state: &State) -> Vec<Vec<u32>>
         // - this slice is a shared ref to the memory in a valid `eq` instance supplied by the caller.
         let solutions: &[u32] = slice::from_raw_parts(sols, nsols * solution_len);
 
+        /*
+        println!(
+            "{nsols} solutions of length {solution_len} as a slice of length {:?}",
+            solutions.len()
+        );
+        */
+
         let mut chunks = solutions.chunks_exact(solution_len);
 
         // SAFETY:
@@ -98,6 +107,17 @@ unsafe fn worker(eq: *mut CEqui, p: Params, curr_state: &State) -> Vec<Vec<u32>>
         solutions
     };
 
+    /*
+    println!(
+        "{} solutions as cloned vectors of length {:?}",
+        solutions.len(),
+        solutions
+            .iter()
+            .map(|solution| solution.len())
+            .collect::<Vec<_>>()
+    );
+    */
+
     solutions
 }
 
diff --git a/components/equihash/tromp/equi_miner.c b/components/equihash/tromp/equi_miner.c
index 17dfb89373..a435e5c513 100644
--- a/components/equihash/tromp/equi_miner.c
+++ b/components/equihash/tromp/equi_miner.c
@@ -355,8 +355,15 @@ typedef struct equi equi;
     listindices1(eq, WK, t, prf); // assume WK odd
     qsort(prf, PROOFSIZE, sizeof(u32), &compu32);
     for (u32 i=1; i<PROOFSIZE; i++)
-      if (prf[i] <= prf[i-1])
+      if (prf[i] <= prf[i-1]) {
+        /*
+        printf(
+          "failed dup indexes check: wanted: proof[%d] > proof[%d], actual: %d <= %d\n",
+          i, i-1, prf[i], prf[i-1]
+        );
+        */
         return;
+      }
 #ifdef EQUIHASH_TROMP_ATOMIC
     u32 soli = std::atomic_fetch_add_explicit(&eq->nsols, 1U, std::memory_order_relaxed);
 #else
@@ -678,7 +685,7 @@ nc++,       candidate(eq, tree_from_bid(bucketid, s0, s1));
         }
       }
     }
-//printf(" %d candidates ", nc);
+//printf(" %d candidates\n", nc);
   }
 
   size_t equi_nsols(const equi *eq) {

From 634285d2f15461cba3230d17b3bd14946d81d891 Mon Sep 17 00:00:00 2001
From: Daira-Emma Hopwood <daira@jacaranda.org>
Date: Thu, 31 Oct 2024 17:35:55 +0000
Subject: [PATCH 17/17] Note in Cargo.toml that this crate is experimental

Co-authored-by: Arya <aryasolhi@gmail.com>
---
 components/equihash/Cargo.toml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/components/equihash/Cargo.toml b/components/equihash/Cargo.toml
index e2a9e0b054..ea3dd29cce 100644
--- a/components/equihash/Cargo.toml
+++ b/components/equihash/Cargo.toml
@@ -12,7 +12,7 @@ rust-version = "1.56.1"
 [features]
 default = []
 
-## Builds the C++ tromp solver and Rust FFI layer.
+# Experimental tromp solver support, builds the C++ tromp solver and Rust FFI layer.
 solver = ["dep:cc"]
 
 [dependencies]