-
Notifications
You must be signed in to change notification settings - Fork 33
/
sshaes.c
298 lines (256 loc) · 8.88 KB
/
sshaes.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
/*
* Implementation of AES for PuTTY using AES-NI
* instuction set expansion was made by:
* @author Pavel Kryukov <[email protected]>
* @author Maxim Kuznetsov <[email protected]>
* @author Svyatoslav Kuzmich <[email protected]>
*
* For Putty AES NI project
* http://pavelkryukov.github.io/putty-aes-ni/
*/
#include <assert.h>
#include <stdlib.h>
#define NB (AES_BLOCKSZ / 4) /* no of uint32_t in cipher blk */
/*
* Select appropriate inline keyword for the compiler
*/
#if defined __GNUC__ || defined __clang__
# define INLINE __inline__
#elif defined (_MSC_VER)
# define INLINE __forceinline
#else
# define INLINE
#endif
typedef struct cf_aes_ni_context {
uint32_t rounds;
uint32_t ks_e[(CF_AES_MAXROUNDS + 1) * NB + 3];
uint32_t ks_d[(CF_AES_MAXROUNDS + 1) * NB + 3];
__m128i *keysched_e, *keysched_d;
} cf_aes_ni_context;
/*
* Check of compiler version
*/
#ifdef _FORCE_AES_NI
# define COMPILER_SUPPORTS_AES_NI
#elif defined(__clang__)
# if (__clang_major__ > 3 || (__clang_major__ == 3 && __clang_minor__ >= 8)) && (defined(__x86_64__) || defined(__i386))
# define COMPILER_SUPPORTS_AES_NI
# endif
#elif defined(__GNUC__)
# if (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 4)) && (defined(__x86_64__) || defined(__i386))
# define COMPILER_SUPPORTS_AES_NI
# endif
#elif defined (_MSC_VER)
# if (defined(_M_X64) || defined(_M_IX86)) && _MSC_FULL_VER >= 150030729
# define COMPILER_SUPPORTS_AES_NI
# endif
#endif
#ifdef COMPILER_SUPPORTS_AES_NI
/*
* Set target architecture for Clang and GCC
*/
#if !defined(__clang__) && defined(__GNUC__)
# pragma GCC target("aes")
# pragma GCC target("sse4.1")
#endif
#if defined(__clang__) || (defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)))
# define FUNC_ISA __attribute__ ((target("sse4.1,aes")))
#else
# define FUNC_ISA
#endif
#include <wmmintrin.h>
#include <smmintrin.h>
/*
* Determinators of CPU type
*/
#if defined(__clang__) || defined(__GNUC__)
#include <cpuid.h>
INLINE static int supports_aes_ni()
{
unsigned int CPUInfo[4];
__cpuid(1, CPUInfo[0], CPUInfo[1], CPUInfo[2], CPUInfo[3]);
return (CPUInfo[2] & (1 << 25)) && (CPUInfo[2] & (1 << 19)); /* Check AES and SSE4.1 */
}
#else /* defined(__clang__) || defined(__GNUC__) */
#include <intrin.h>
INLINE static int supports_aes_ni()
{
int CPUInfo[4];
__cpuid(CPUInfo, 1);
return (CPUInfo[2] & (1 << 25)) && (CPUInfo[2] & (1 << 19)); /* Check AES and SSE4.1 */
}
#endif /* defined(__clang__) || defined(__GNUC__) */
/*
* AES-NI encrypt/decrypt core
*/
FUNC_ISA
static void cf_aes_ni_encrypt(cf_aes_ni_context *ctx,
const uint8_t in[AES_BLOCKSZ],
uint8_t out[AES_BLOCKSZ])
{
__m128i *keysched = (__m128i *)ctx->keysched_e;
__m128i enc = _mm_xor_si128(_mm_loadu_si128((__m128i*)in), *keysched++);
switch (ctx->rounds) {
case 14:
enc = _mm_aesenc_si128(enc, *keysched++);
enc = _mm_aesenc_si128(enc, *keysched++);
case 12:
enc = _mm_aesenc_si128(enc, *keysched++);
enc = _mm_aesenc_si128(enc, *keysched++);
case 10:
enc = _mm_aesenc_si128(enc, *keysched++);
enc = _mm_aesenc_si128(enc, *keysched++);
enc = _mm_aesenc_si128(enc, *keysched++);
enc = _mm_aesenc_si128(enc, *keysched++);
enc = _mm_aesenc_si128(enc, *keysched++);
enc = _mm_aesenc_si128(enc, *keysched++);
enc = _mm_aesenc_si128(enc, *keysched++);
enc = _mm_aesenc_si128(enc, *keysched++);
enc = _mm_aesenc_si128(enc, *keysched++);
enc = _mm_aesenclast_si128(enc, *keysched++);
break;
default:
assert(0);
}
_mm_storeu_si128((__m128i*)out, enc);
}
FUNC_ISA
static void cf_aes_ni_decrypt(cf_aes_ni_context *ctx,
const uint8_t in[AES_BLOCKSZ],
uint8_t out[AES_BLOCKSZ])
{
__m128i *keysched = (__m128i *)ctx->keysched_d;
__m128i dec = _mm_xor_si128(_mm_loadu_si128((__m128i*)in), *keysched++);
switch (ctx->rounds) {
case 14:
dec = _mm_aesdec_si128(dec, *keysched++);
dec = _mm_aesdec_si128(dec, *keysched++);
case 12:
dec = _mm_aesdec_si128(dec, *keysched++);
dec = _mm_aesdec_si128(dec, *keysched++);
case 10:
dec = _mm_aesdec_si128(dec, *keysched++);
dec = _mm_aesdec_si128(dec, *keysched++);
dec = _mm_aesdec_si128(dec, *keysched++);
dec = _mm_aesdec_si128(dec, *keysched++);
dec = _mm_aesdec_si128(dec, *keysched++);
dec = _mm_aesdec_si128(dec, *keysched++);
dec = _mm_aesdec_si128(dec, *keysched++);
dec = _mm_aesdec_si128(dec, *keysched++);
dec = _mm_aesdec_si128(dec, *keysched++);
dec = _mm_aesdeclast_si128(dec, *keysched++);
break;
default:
assert(0);
}
_mm_storeu_si128((__m128i*)out, dec);
}
/*
* The main key expansion.
*/
static FUNC_ISA void cf_aes_ni_key_expand(const unsigned char *key, size_t key_words,
__m128i *keysched_e, __m128i *keysched_d)
{
const uint8_t key_setup_round_constants[] = {
/* The first few powers of X in GF(2^8), used during key setup.
* This can safely be a lookup table without side channel risks,
* because key setup iterates through it once in a standard way
* regardless of the key. */
0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36,
};
size_t rounds = key_words + 6;
size_t sched_words = (rounds + 1) * NB;
/*
* Store the key schedule as 32-bit integers during expansion, so
* that it's easy to refer back to individual previous words. We
* collect them into the final __m128i form at the end.
*/
uint32_t sched[(CF_AES_MAXROUNDS + 1) * NB];
unsigned rconpos = 0;
for (size_t i = 0; i < sched_words; i++) {
if (i < key_words) {
sched[i] = read32_le(key + 4 * i);
} else {
uint32_t temp = sched[i - 1];
bool rotate_and_round_constant = (i % key_words == 0);
bool only_sub = (key_words == 8 && i % 8 == 4);
if (rotate_and_round_constant) {
__m128i v = _mm_setr_epi32(0,temp,0,0);
v = _mm_aeskeygenassist_si128(v, 0);
temp = _mm_extract_epi32(v, 1);
assert(rconpos < lenof(key_setup_round_constants));
temp ^= key_setup_round_constants[rconpos++];
} else if (only_sub) {
__m128i v = _mm_setr_epi32(0,temp,0,0);
v = _mm_aeskeygenassist_si128(v, 0);
temp = _mm_extract_epi32(v, 0);
}
sched[i] = sched[i - key_words] ^ temp;
}
}
/*
* Combine the key schedule words into __m128i vectors and store
* them in the output context.
*/
for (size_t round = 0; round <= rounds; round++)
keysched_e[round] = _mm_setr_epi32(
sched[4*round ], sched[4*round+1],
sched[4*round+2], sched[4*round+3]);
memset(sched, 0, sizeof(sched));
/*
* Now prepare the modified keys for the inverse cipher.
*/
for (size_t eround = 0; eround <= rounds; eround++) {
size_t dround = rounds - eround;
__m128i rkey = keysched_e[eround];
if (eround && dround) /* neither first nor last */
rkey = _mm_aesimc_si128(rkey);
keysched_d[dround] = rkey;
}
}
/*
* Set up an cf_aes_ni_context. `keylen' is measured in
* bytes; it can be either 16 (128-bit), 24 (192-bit), or 32
* (256-bit).
*/
static int cf_aes_ni_setup(cf_aes_ni_context *ctx, const unsigned char *key, int keylen)
{
size_t bufaddr;
if (!supports_aes_ni())
return 0;
ctx->rounds = 6 + (keylen / 4);
/* Ensure the key schedule arrays are 16-byte aligned */
bufaddr = (size_t)ctx->ks_e;
ctx->keysched_e = (__m128i *)(ctx->ks_e + (0xF & (~bufaddr+1)) / sizeof(uint32_t));
assert((size_t)ctx->keysched % 16 == 0);
bufaddr = (size_t)ctx->ks_d;
ctx->keysched_d = (__m128i *)(ctx->ks_d + (0xF & (~bufaddr+1)) / sizeof(uint32_t));
assert((size_t)ctx->invkeysched % 16 == 0);
cf_aes_ni_key_expand(key, keylen / sizeof(uint32_t), ctx->keysched_e, ctx->keysched_d);
return 1;
}
#else /* COMPILER_SUPPORTS_AES_NI */
FUNC_ISA
static void cf_aes_ni_encrypt(cf_aes_ni_context * ctx,
const uint8_t in[AES_BLOCKSZ],
uint8_t out[AES_BLOCKSZ])
{
assert(0);
}
FUNC_ISA
static void cf_aes_ni_decrypt(cf_aes_ni_context * ctx,
const uint8_t in[AES_BLOCKSZ],
uint8_t out[AES_BLOCKSZ])
{
assert(0);
}
static void cf_aes_ni_setup(cf_aes_ni_context * ctx, unsigned char *key, int keylen)
{
assert(0);
}
static int supports_aes_ni()
{
return 0;
}
#endif /* COMPILER_SUPPORTS_AES_NI */
#undef INLINE