-
Notifications
You must be signed in to change notification settings - Fork 0
/
xxh32.c
134 lines (126 loc) · 4 KB
/
xxh32.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
#include <stdint.h>
/*!
*/
static const uint32_t Prime1 = 2654435761U;
static const uint32_t Prime2 = 2246822519U;
static const uint32_t Prime3 = 3266489917U;
static const uint32_t Prime4 = 668265263U;
static const uint32_t Prime5 = 374761393U;
typedef uint32_t uint32x4_t __attribute__((__vector_size__(16)));
typedef struct _HashCtx HashCtx_t;
struct _HashCtx {
uint32x4_t state;
uint32x4_t block;
int offset;
uint32_t total_length;
};
#define ROTL32(x, n) ((x)<<n | (x)>>(32-n))
void xxh32_init(HashCtx_t *ctx, uint32_t seed)
{
uint32x4_t state = (uint32x4_t){Prime1 + Prime2, Prime2, 0, -Prime1};
state+=seed;
ctx->state = state;
ctx->block = (uint32x4_t){0};
ctx->offset=0;
}
void xxh32_update(HashCtx_t *ctx, uint8_t * data, int length)
{
uint32x4_t state = ctx->state;
if (ctx->offset){
int len = (16-ctx->offset)<length?(16-ctx->offset): length;
__builtin_memcpy((uint8_t*)&ctx->block + ctx->offset, data, len);
ctx->offset+=len;
if(ctx->offset!=16) return;
data +=len;
length-=len;
// static U32 XXH32_round(U32 seed, U32 input)
state = ROTL32(state + ctx->block*Prime2, 13) * Prime1;
ctx->block = (uint32x4_t){0};
ctx->offset=0;
}
int blocks = length>>4;
int i;
for (i=0; i<blocks; i++) {
uint32x4_t block;
__builtin_memcpy(&block, data, 16); data+=16;
state = ROTL32(state + block*Prime2, 13) * Prime1;
}
ctx->state = state;
length &= 0xF;
if (length) {
ctx->block = (uint32x4_t){0};
__builtin_memcpy(&ctx->block, data, length);
}
}
uint32_t xxh32_final(HashCtx_t *ctx)
{
uint32_t hash = (uint32_t)ctx->total_length;
if (hash >= 16)
hash += ROTL32(ctx->state[0], 1) +
ROTL32(ctx->state[1], 7) +
ROTL32(ctx->state[2], 12) +
ROTL32(ctx->state[3], 18);
else
hash += ctx->state[2] + Prime5;// seed+prime5
// process remaining bytes in temporary buffer
if(ctx->offset) {
uint32_t* s4 = (uint32_t*)&ctx->block;
int blocks = ctx->offset>>2;
int i;
for (i=0; i<blocks; i++)
hash = ROTL32(hash + s4[i] * Prime3, 17) * Prime4;
uint8_t* s = (uint8_t*)s4;
for (i*=4; i<ctx->offset; i++, s++)
hash = ROTL32(hash + s[i] * Prime5, 11) * Prime1;
}
/* mix all bits */
hash ^= hash >> 15;
hash *= Prime2;
hash ^= hash >> 13;
hash *= Prime3;
hash ^= hash >> 16;
return hash;
}
/*! \brief Расчет некриптографического хеш, используется в качестве контрольной суммы
*/
uint32_t xxh32(uint32_t hash, uint8_t* data, size_t data_len)
{
if (data_len>=16){
uint32x4_t state = (uint32x4_t){Prime1 + Prime2, Prime2, 0, -Prime1};
state+=hash;
int blocks = data_len>>4;
int i;
for (i=0; i<blocks; i++) {// вектор 128 бит
uint32x4_t block;
__builtin_memcpy(&block, data, 16); data+=16;
state = ROTL32(state + block*Prime2, 13) * Prime1;
}
hash = ROTL32(state[0], 1) +
ROTL32(state[1], 7) +
ROTL32(state[2], 12) +
ROTL32(state[3], 18);
} else {
hash = hash + Prime5;
}
hash += data_len;
data_len &= 0xF;
int i;
for (i=0; i < data_len>>2; i++, data+=4)
hash = ROTL32(hash + *(uint32_t*)data * Prime3, 17) * Prime4;
for (i*=4; i < data_len; i++, data++)
hash = ROTL32(hash + *data * Prime5, 11) * Prime1;
hash ^= hash >> 15;
hash *= Prime2;
hash ^= hash >> 13;
hash *= Prime3;
hash ^= hash >> 16;
return hash;
}
/*
Результат компиляции, в цикле получается такой код, цикл за два такта.
vmovdqa .LC0(%rip), %xmm0
vpmulld (%rdx), %xmm0, %xmm0
vpaddd (%rcx), %xmm0, %xmm0
vprold $13, %xmm0, %xmm0
vpmulld .LC1(%rip), %xmm0, %xmm0
*/