-
Notifications
You must be signed in to change notification settings - Fork 0
/
md5.cu
231 lines (194 loc) · 7.27 KB
/
md5.cu
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
#include <stdio.h>
#include <cuda.h>
#include "main.h"
#define UINT4 uint
extern __shared__ unsigned int words[]; // shared memory where hash will be stored
__constant__ unsigned int target_hash[4]; // constant has we will be searching for
__device__ unsigned int *format_shared_memory(unsigned int thread_id, unsigned int *memory) {
unsigned int *shared_memory;
unsigned int *global_memory;
int x;
// we need to get a pointer to our shared memory portion
shared_memory = &words[threadIdx.x * 16];
global_memory = &memory[thread_id * 16];
for(x=0; x < 16; x++) {
shared_memory[x] = global_memory[x];
}
return shared_memory;
}
/* F, G and H are basic MD5 functions: selection, majority, parity */
#define F(x, y, z) (((x) & (y)) | ((~x) & (z)))
#define G(x, y, z) (((x) & (z)) | ((y) & (~z)))
#define H(x, y, z) ((x) ^ (y) ^ (z))
#define I(x, y, z) ((y) ^ ((x) | (~z)))
/* ROTATE_LEFT rotates x left n bits */
#define ROTATE_LEFT(x, n) (((x) << (n)) | ((x) >> (32-(n))))
/* FF, GG, HH, and II transformations for rounds 1, 2, 3, and 4 */
/* Rotation is separate from addition to prevent recomputation */
#define FF(a, b, c, d, x, s, ac) \
{(a) += F ((b), (c), (d)) + (x) + (UINT4)(ac); \
(a) = ROTATE_LEFT ((a), (s)); \
(a) += (b); \
}
#define GG(a, b, c, d, x, s, ac) \
{(a) += G ((b), (c), (d)) + (x) + (UINT4)(ac); \
(a) = ROTATE_LEFT ((a), (s)); \
(a) += (b); \
}
#define HH(a, b, c, d, x, s, ac) \
{(a) += H ((b), (c), (d)) + (x) + (UINT4)(ac); \
(a) = ROTATE_LEFT ((a), (s)); \
(a) += (b); \
}
#define II(a, b, c, d, x, s, ac) \
{(a) += I ((b), (c), (d)) + (x) + (UINT4)(ac); \
(a) = ROTATE_LEFT ((a), (s)); \
(a) += (b); \
}
__device__ void md5(uint *in, uint *hash) {
uint a, b, c, d;
const uint a0 = 0x67452301;
const uint b0 = 0xEFCDAB89;
const uint c0 = 0x98BADCFE;
const uint d0 = 0x10325476;
a = a0;
b = b0;
c = c0;
d = d0;
/* Round 1 */
#define S11 7
#define S12 12
#define S13 17
#define S14 22
FF ( a, b, c, d, in[ 0], S11, 3614090360); /* 1 */
FF ( d, a, b, c, in[ 1], S12, 3905402710); /* 2 */
FF ( c, d, a, b, in[ 2], S13, 606105819); /* 3 */
FF ( b, c, d, a, in[ 3], S14, 3250441966); /* 4 */
FF ( a, b, c, d, in[ 4], S11, 4118548399); /* 5 */
FF ( d, a, b, c, in[ 5], S12, 1200080426); /* 6 */
FF ( c, d, a, b, in[ 6], S13, 2821735955); /* 7 */
FF ( b, c, d, a, in[ 7], S14, 4249261313); /* 8 */
FF ( a, b, c, d, in[ 8], S11, 1770035416); /* 9 */
FF ( d, a, b, c, in[ 9], S12, 2336552879); /* 10 */
FF ( c, d, a, b, in[10], S13, 4294925233); /* 11 */
FF ( b, c, d, a, in[11], S14, 2304563134); /* 12 */
FF ( a, b, c, d, in[12], S11, 1804603682); /* 13 */
FF ( d, a, b, c, in[13], S12, 4254626195); /* 14 */
FF ( c, d, a, b, in[14], S13, 2792965006); /* 15 */
FF ( b, c, d, a, in[15], S14, 1236535329); /* 16 */
/* Round 2 */
#define S21 5
#define S22 9
#define S23 14
#define S24 20
GG ( a, b, c, d, in[ 1], S21, 4129170786); /* 17 */
GG ( d, a, b, c, in[ 6], S22, 3225465664); /* 18 */
GG ( c, d, a, b, in[11], S23, 643717713); /* 19 */
GG ( b, c, d, a, in[ 0], S24, 3921069994); /* 20 */
GG ( a, b, c, d, in[ 5], S21, 3593408605); /* 21 */
GG ( d, a, b, c, in[10], S22, 38016083); /* 22 */
GG ( c, d, a, b, in[15], S23, 3634488961); /* 23 */
GG ( b, c, d, a, in[ 4], S24, 3889429448); /* 24 */
GG ( a, b, c, d, in[ 9], S21, 568446438); /* 25 */
GG ( d, a, b, c, in[14], S22, 3275163606); /* 26 */
GG ( c, d, a, b, in[ 3], S23, 4107603335); /* 27 */
GG ( b, c, d, a, in[ 8], S24, 1163531501); /* 28 */
GG ( a, b, c, d, in[13], S21, 2850285829); /* 29 */
GG ( d, a, b, c, in[ 2], S22, 4243563512); /* 30 */
GG ( c, d, a, b, in[ 7], S23, 1735328473); /* 31 */
GG ( b, c, d, a, in[12], S24, 2368359562); /* 32 */
/* Round 3 */
#define S31 4
#define S32 11
#define S33 16
#define S34 23
HH ( a, b, c, d, in[ 5], S31, 4294588738); /* 33 */
HH ( d, a, b, c, in[ 8], S32, 2272392833); /* 34 */
HH ( c, d, a, b, in[11], S33, 1839030562); /* 35 */
HH ( b, c, d, a, in[14], S34, 4259657740); /* 36 */
HH ( a, b, c, d, in[ 1], S31, 2763975236); /* 37 */
HH ( d, a, b, c, in[ 4], S32, 1272893353); /* 38 */
HH ( c, d, a, b, in[ 7], S33, 4139469664); /* 39 */
HH ( b, c, d, a, in[10], S34, 3200236656); /* 40 */
HH ( a, b, c, d, in[13], S31, 681279174); /* 41 */
HH ( d, a, b, c, in[ 0], S32, 3936430074); /* 42 */
HH ( c, d, a, b, in[ 3], S33, 3572445317); /* 43 */
HH ( b, c, d, a, in[ 6], S34, 76029189); /* 44 */
HH ( a, b, c, d, in[ 9], S31, 3654602809); /* 45 */
HH ( d, a, b, c, in[12], S32, 3873151461); /* 46 */
HH ( c, d, a, b, in[15], S33, 530742520); /* 47 */
HH ( b, c, d, a, in[ 2], S34, 3299628645); /* 48 */
/* Round 4 */
#define S41 6
#define S42 10
#define S43 15
#define S44 21
II ( a, b, c, d, in[ 0], S41, 4096336452); /* 49 */
II ( d, a, b, c, in[ 7], S42, 1126891415); /* 50 */
II ( c, d, a, b, in[14], S43, 2878612391); /* 51 */
II ( b, c, d, a, in[ 5], S44, 4237533241); /* 52 */
II ( a, b, c, d, in[12], S41, 1700485571); /* 53 */
II ( d, a, b, c, in[ 3], S42, 2399980690); /* 54 */
II ( c, d, a, b, in[10], S43, 4293915773); /* 55 */
II ( b, c, d, a, in[ 1], S44, 2240044497); /* 56 */
II ( a, b, c, d, in[ 8], S41, 1873313359); /* 57 */
II ( d, a, b, c, in[15], S42, 4264355552); /* 58 */
II ( c, d, a, b, in[ 6], S43, 2734768916); /* 59 */
II ( b, c, d, a, in[13], S44, 1309151649); /* 60 */
II ( a, b, c, d, in[ 4], S41, 4149444226); /* 61 */
II ( d, a, b, c, in[11], S42, 3174756917); /* 62 */
II ( c, d, a, b, in[ 2], S43, 718787259); /* 63 */
II ( b, c, d, a, in[ 9], S44, 3951481745); /* 64 */
a += a0;
b += b0;
c += c0;
d += d0;
hash[0] = a;
hash[1] = b;
hash[2] = c;
hash[3] = d;
return;
}
__global__ void md5_cuda_calculate(void *memory, struct device_stats *stats, unsigned int *debug_memory) {
unsigned int id;
unsigned int *shared_memory;
uint hash[4];
int x;
id = (blockIdx.x * blockDim.x) + threadIdx.x; // get our thread unique ID in this run
shared_memory = format_shared_memory(id, (unsigned int *)memory);
#ifdef DEBUG
// passes the computed hashes into debug memory
for(x=0; x<4; x++) {
debug_memory[(id * 4) + x] = (uint)shared_memory[x];
}
#endif
md5(shared_memory, hash); // actually calculate the MD5 hash
if (hash[0] == target_hash[0] && hash[1] == target_hash[1] && hash[2] == target_hash[2] && hash[3] == target_hash[3]) {
// !! WE HAVE A MATCH !!
stats->hash_found = 1;
for(x=0; x<64; x++) {
// copy the matched word accross
stats->word[x] = *(char *)((char *)shared_memory + x);
}
}
}
extern "C" void md5_calculate(struct cuda_device *device) {
cudaEvent_t start, stop;
float time;
#ifdef GPU_BENCHMARK
cudaEventCreate(&start);
cudaEventCreate(&stop);
cudaEventRecord(start, 0);
cudaThreadSynchronize();
#endif
md5_cuda_calculate <<< device->max_blocks, device->max_threads, device->shared_memory >>> (device->device_global_memory, (struct device_stats *)device->device_stats_memory, (unsigned int *)device->device_debug_memory);
#ifdef GPU_BENCHMARK
cudaEventRecord(stop, 0);
cudaEventSynchronize(stop);
cudaEventElapsedTime(&time, start, stop);
cudaEventDestroy(start);
cudaEventDestroy(stop);
printf("CUDA kernel took %fms to calculate %d x %d (%d) hashes\n", time, device->max_blocks, device->max_threads, device->max_blocks * device->max_threads);
// print GPU stats here
#endif
}