Skip to content

Commit

Permalink
added carryless multiplication
Browse files Browse the repository at this point in the history
  • Loading branch information
Logan007 committed Jun 12, 2021
1 parent 88e5a3b commit 451b7fd
Show file tree
Hide file tree
Showing 2 changed files with 135 additions and 11 deletions.
16 changes: 10 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -288,28 +288,32 @@ long.
```c
x = ~x;
x ^= constant;
x *= constant | 1; // e.g. only odd constants
x *= constant | 1; // e.g. only odd constants
x += constant;
x ^= x >> constant;
x ^= x << constant;
x += x << constant;
x -= x << constant;
x <<<= constant; // left rotation
bswap(x); // byte swap - the endianess changer
shf(x, constant); // byte shuffle, permutation
x <<<= constant; // left rotation
bswap(x); // byte swap - the endianess changer
shf(x, constant); // byte shuffle, permutation
clmul(x, constant | 1); // carryless multiplication, odd constants
```
Technically `x = ~x` is covered by `x = ^= constant`. However, `~x` is
uniquely special and particularly useful. The generator is very unlikely
to generate the one correct constant for the XOR operator that achieves
the same effect.
`shf` pattern uses the SSSE3 byte shuffle instruction and only is available
on corresponding hardware; `shf:03020100` denotes identity (no change),
`shf` pattern uses the SSSE3 byte shuffle instruction and is available
on corresponding hardware only; `shf:03020100` denotes identity (no change),
`shf:00010203` equals the endianess changing byte swap. 64-bit hashes
optionally take a permutation of `{ 00, ... , 07 }` such as
`shf:0304050607020100`.
`clmul`, the carryless multiplication instruction, is also available on supported
hardware only.
## 16-bit hashes
Because the constraints are different for 16-bit hashes there's a separate
Expand Down
130 changes: 125 additions & 5 deletions prospector.c
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,19 @@
#include <sys/mman.h>
#include <sys/time.h>

#ifdef __SSSE3__
#define HAVE_SHF // we have SSSE3's byte SHuFfle
#if defined (__SSSE3__) || defined (__PCLMUL__)
#include <immintrin.h>
#ifdef __SSSE3__
#define HAVE_SHF // we have SSSE3's byte SHuFfle
#endif
#ifdef __PCLMUL__
#define HAVE_CLMUL // we have CarryLess MULtiplication
#endif
#endif




#define ABI __attribute__((sysv_abi))

#define countof(a) ((int)(sizeof(a) / sizeof(0[a])))
Expand All @@ -36,20 +44,26 @@ xoroshiro128plus(uint64_t s[2])
enum hf_type {
/* 32 bits */
HF32_XOR, // x ^= const32
#ifdef HAVE_CLMUL
HF32_CLMUL,// x = _mm_clmulepi64_si128(x, const32, opSelect)
#endif
HF32_MUL, // x *= const32 (odd)
HF32_ADD, // x += const32
HF32_ROT, // x = (x << const5) | (x >> (32 - const5))
HF32_NOT, // x = ~x
HF32_BSWAP,// x = bswap32(x)
#ifdef HAVE_SHF
HF32_SHF, // x = __mm_shuffle_epi8(x, const32)
HF32_SHF, // x = _mm_shuffle_epi8(x, const32)
#endif
HF32_XORL, // x ^= x << const5
HF32_XORR, // x ^= x >> const5
HF32_ADDL, // x += x << const5
HF32_SUBL, // x -= x << const5
/* 64 bits */
HF64_XOR,
#ifdef HAVE_CLMUL
HF64_CLMUL,
#endif
HF64_MUL,
HF64_ADD,
HF64_ROT,
Expand All @@ -66,6 +80,9 @@ enum hf_type {

static const char hf_names[][8] = {
[HF32_XOR] = "32xor",
#ifdef HAVE_CLMUL
[HF32_CLMUL]= "32clmul",
#endif
[HF32_MUL] = "32mul",
[HF32_ADD] = "32add",
[HF32_ROT] = "32rot",
Expand All @@ -79,6 +96,9 @@ static const char hf_names[][8] = {
[HF32_ADDL] = "32addl",
[HF32_SUBL] = "32subl",
[HF64_XOR] = "64xor",
#ifdef HAVE_CLMUL
[HF64_CLMUL]= "64clmul",
#endif
[HF64_MUL] = "64mul",
[HF64_ADD] = "64add",
[HF64_ROT] = "64rot",
Expand Down Expand Up @@ -148,6 +168,9 @@ hf_randomize(struct hf_op *op, uint64_t s[2])
case HF32_ADD:
op->constant = (uint32_t)r;
break;
#ifdef HAVE_CLMUL
case HF32_CLMUL:
#endif
case HF32_MUL:
op->constant = (uint32_t)r | 1;
break;
Expand All @@ -162,6 +185,9 @@ hf_randomize(struct hf_op *op, uint64_t s[2])
case HF64_ADD:
op->constant = r;
break;
#ifdef HAVE_CLMUL
case HF64_CLMUL:
#endif
case HF64_MUL:
op->constant = r | 1;
break;
Expand Down Expand Up @@ -199,6 +225,9 @@ hf_type_valid(enum hf_type a, enum hf_type b)
case HF32_SHF:
#endif
case HF32_XOR:
#ifdef HAVE_CLMUL
case HF32_CLMUL:
#endif
case HF32_MUL:
case HF32_ADD:
case HF32_ROT:
Expand All @@ -208,6 +237,9 @@ hf_type_valid(enum hf_type a, enum hf_type b)
case HF64_SHF:
#endif
case HF64_XOR:
#ifdef HAVE_CLMUL
case HF64_CLMUL:
#endif
case HF64_MUL:
case HF64_ADD:
case HF64_ROT:
Expand Down Expand Up @@ -263,12 +295,17 @@ hf_print(const struct hf_op *op, char *buf)
break;
#ifdef HAVE_SHF
case HF32_SHF:
sprintf(buf, "x = _mm_shuffle_epi8(x, __mm_cvtsi32_si128(0x%08llx));", c);
sprintf(buf, "x = _mm_cvtsi128_si32(_mm_shuffle_epi8(x, _mm_cvtsi32_si128(0x%08llx));", c);
break;
#endif
case HF32_XOR:
sprintf(buf, "x ^= 0x%08llx;", c);
break;
#ifdef HAVE_CLMUL
case HF32_CLMUL:
sprintf(buf, "x = _mm_cvtsi128_si32(_mm_clmulepi64_si128(_mm_cvtsi32_si128(x), _mm_cvtsi32_si128(0x%08llx), 0x00));", c);
break;
#endif
case HF32_MUL:
sprintf(buf, "x *= 0x%08llx;", c);
break;
Expand Down Expand Up @@ -298,6 +335,11 @@ hf_print(const struct hf_op *op, char *buf)
case HF64_XOR:
sprintf(buf, "x ^= 0x%016llx;", c);
break;
#ifdef HAVE_CLMUL
case HF64_CLMUL:
sprintf(buf, "x = _mm_cvtsi128_si64(_mm_clmulepi64_si128(_mm_cvtsi64_si128(x), _mm_cvtsi64_si128(0x%016llx), 0x00));", c);
break;
#endif
case HF64_MUL:
sprintf(buf, "x *= 0x%016llx;", c);
break;
Expand Down Expand Up @@ -330,7 +372,7 @@ hf_printfunc(const struct hf_op *ops, int n, FILE *f)
else
fprintf(f, "uint64_t\nhash(uint64_t x)\n{\n");
for (int i = 0; i < n; i++) {
char buf[80];
char buf[120];
hf_print(ops + i, buf);
fprintf(f, " %s\n", buf);
}
Expand Down Expand Up @@ -402,6 +444,38 @@ hf_compile(const struct hf_op *ops, int n, unsigned char *buf)
*buf++ = ops[i].constant >> 16;
*buf++ = ops[i].constant >> 24;
break;
#ifdef HAVE_CLMUL
case HF32_CLMUL:
/* movd xmm0, eax */
*buf++ = 0x66;
*buf++ = 0x0f;
*buf++ = 0x6e;
*buf++ = 0xc0;
/* mov edi, imm32 */
*buf++ = 0xbf;
*buf++ = ops[i].constant >> 0;
*buf++ = ops[i].constant >> 8;
*buf++ = ops[i].constant >> 16;
*buf++ = ops[i].constant >> 24;
/* movd xmm1, edi */
*buf++ = 0x66;
*buf++ = 0x0f;
*buf++ = 0x6e;
*buf++ = 0xcf;
/* pclmulqdq xmm0, xmm1, 0 */
*buf++ = 0x66;
*buf++ = 0x0f;
*buf++ = 0x3a;
*buf++ = 0x44;
*buf++ = 0xc1;
*buf++ = 0x00;
/* movd eax, xmm0 */
*buf++ = 0x66;
*buf++ = 0x0f;
*buf++ = 0x7e;
*buf++ = 0xc0;
break;
#endif
case HF32_MUL:
/* imul eax, eax, imm32 */
*buf++ = 0x69;
Expand Down Expand Up @@ -541,6 +615,46 @@ hf_compile(const struct hf_op *ops, int n, unsigned char *buf)
*buf++ = 0x31;
*buf++ = 0xf8;
break;
#ifdef HAVE_CLMUL
case HF64_CLMUL:
/* movq xmm0, rax */
*buf++ = 0x66;
*buf++ = 0x48;
*buf++ = 0x0f;
*buf++ = 0x6e;
*buf++ = 0xc0;
/* mov rdi, imm64 */
*buf++ = 0x48;
*buf++ = 0xbf;
*buf++ = ops[i].constant >> 0;
*buf++ = ops[i].constant >> 8;
*buf++ = ops[i].constant >> 16;
*buf++ = ops[i].constant >> 24;
*buf++ = ops[i].constant >> 32;
*buf++ = ops[i].constant >> 40;
*buf++ = ops[i].constant >> 48;
*buf++ = ops[i].constant >> 56;
/* movq xmm1, rdi */
*buf++ = 0x66;
*buf++ = 0x48;
*buf++ = 0x0f;
*buf++ = 0x6e;
*buf++ = 0xcf;
/* pclmulqdq xmm0, xmm1, 0x00 */
*buf++ = 0x66;
*buf++ = 0x0f;
*buf++ = 0x3a;
*buf++ = 0x44;
*buf++ = 0xc1;
*buf++ = 0x00;
/* movd rax, xmm0 */
*buf++ = 0x66;
*buf++ = 0x48;
*buf++ = 0x0f;
*buf++ = 0x7e;
*buf++ = 0xc0;
break;
#endif
case HF64_MUL:
/* mov rdi, imm64 */
*buf++ = 0x48;
Expand Down Expand Up @@ -853,9 +967,15 @@ parse_operand(struct hf_op *op, char *buf)
case HF64_BSWAP:
return 0;
case HF32_XOR:
#ifdef HAVE_CLMUL
case HF32_CLMUL:
#endif
case HF32_MUL:
case HF32_ADD:
case HF64_XOR:
#ifdef HAVE_CLMUL
case HF64_CLMUL:
#endif
case HF64_MUL:
case HF64_ADD:
op->constant = strtoull(buf, 0, 16);
Expand Down

0 comments on commit 451b7fd

Please sign in to comment.