1 /*
2 * ====================================================================
3 * Written by Intel Corporation for the OpenSSL project to add support
4 * for Intel AES-NI instructions. Rights for redistribution and usage
5 * in source and binary forms are granted according to the OpenSSL
6 * license.
7 *
8 * Author: Huang Ying <ying.huang at intel dot com>
9 * Vinodh Gopal <vinodh.gopal at intel dot com>
10 * Kahraman Akdemir
11 *
12 * Intel AES-NI is a new set of Single Instruction Multiple Data (SIMD)
13 * instructions that are going to be introduced in the next generation
14 * of Intel processor, as of 2009. These instructions enable fast and
15 * secure data encryption and decryption, using the Advanced Encryption
16 * Standard (AES), defined by FIPS Publication number 197. The
17 * architecture introduces six instructions that offer full hardware
18 * support for AES. Four of them support high performance data
19 * encryption and decryption, and the other two instructions support
20 * the AES key expansion procedure.
21 * ====================================================================
22 */
23
24 /*
25 * ====================================================================
26 * Copyright (c) 1998-2008 The OpenSSL Project. All rights reserved.
27 *
28 * Redistribution and use in source and binary forms, with or without
29 * modification, are permitted provided that the following conditions
30 * are met:
31 *
32 * 1. Redistributions of source code must retain the above copyright
33 * notice, this list of conditions and the following disclaimer.
34 *
35 * 2. Redistributions in binary form must reproduce the above copyright
36 * notice, this list of conditions and the following disclaimer in
37 * the documentation and/or other materials provided with the
38 * distribution.
39 *
40 * 3. All advertising materials mentioning features or use of this
41 * software must display the following acknowledgment:
42 * "This product includes software developed by the OpenSSL Project
43 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
44 *
45 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
46 * endorse or promote products derived from this software without
47 * prior written permission. For written permission, please contact
48 * openssl-core@openssl.org.
49 *
50 * 5. Products derived from this software may not be called "OpenSSL"
51 * nor may "OpenSSL" appear in their names without prior written
52 * permission of the OpenSSL Project.
53 *
54 * 6. Redistributions of any form whatsoever must retain the following
55 * acknowledgment:
56 * "This product includes software developed by the OpenSSL Project
57 * for use in the OpenSSL Toolkit (http://www.openssl.org/)"
58 *
59 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
60 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
61 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
62 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
63 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
64 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
65 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
66 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
67 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
68 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
69 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
70 * OF THE POSSIBILITY OF SUCH DAMAGE.
71 * ====================================================================
72 */
73
74 /*
75 * ====================================================================
76 * OpenSolaris OS modifications
77 *
78 * This source originates as files aes-intel.S and eng_aesni_asm.pl, in
79 * patches sent sent Dec. 9, 2008 and Dec. 24, 2008, respectively, by
80 * Huang Ying of Intel to the openssl-dev mailing list under the subject
81 * of "Add support to Intel AES-NI instruction set for x86_64 platform".
82 *
83 * This OpenSolaris version has these major changes from the original source:
84 *
85 * 1. Added OpenSolaris ENTRY_NP/SET_SIZE macros from
86 * /usr/include/sys/asm_linkage.h, lint(1B) guards, and dummy C function
87 * definitions for lint.
88 *
89 * 2. Formatted code, added comments, and added #includes and #defines.
90 *
91 * 3. If bit CR0.TS is set, clear and set the TS bit, after and before
92 * calling kpreempt_disable() and kpreempt_enable().
93 * If the TS bit is not set, Save and restore %xmm registers at the beginning
94 * and end of function calls (%xmm* registers are not saved and restored by
95 * during kernel thread preemption).
96 *
97 * 4. Renamed functions, reordered parameters, and changed return value
98 * to match OpenSolaris:
99 *
100 * OpenSSL interface:
101 * int intel_AES_set_encrypt_key(const unsigned char *userKey,
102 * const int bits, AES_KEY *key);
103 * int intel_AES_set_decrypt_key(const unsigned char *userKey,
104 * const int bits, AES_KEY *key);
105 * Return values for above are non-zero on error, 0 on success.
106 *
107 * void intel_AES_encrypt(const unsigned char *in, unsigned char *out,
108 * const AES_KEY *key);
109 * void intel_AES_decrypt(const unsigned char *in, unsigned char *out,
110 * const AES_KEY *key);
111 * typedef struct aes_key_st {
112 * unsigned int rd_key[4 *(AES_MAXNR + 1)];
113 * int rounds;
114 * unsigned int pad[3];
115 * } AES_KEY;
116 * Note: AES_LONG is undefined (that is, Intel uses 32-bit key schedules
117 * (ks32) instead of 64-bit (ks64).
118 * Number of rounds (aka round count) is at offset 240 of AES_KEY.
119 *
120 * OpenSolaris OS interface (#ifdefs removed for readability):
121 * int rijndael_key_setup_dec_intel(uint32_t rk[],
122 * const uint32_t cipherKey[], uint64_t keyBits);
123 * int rijndael_key_setup_enc_intel(uint32_t rk[],
124 * const uint32_t cipherKey[], uint64_t keyBits);
125 * Return values for above are 0 on error, number of rounds on success.
126 *
127 * void aes_encrypt_intel(const aes_ks_t *ks, int Nr,
128 * const uint32_t pt[4], uint32_t ct[4]);
129 * void aes_decrypt_intel(const aes_ks_t *ks, int Nr,
130 * const uint32_t pt[4], uint32_t ct[4]);
131 * typedef union {uint64_t ks64[(MAX_AES_NR + 1) * 4];
132 * uint32_t ks32[(MAX_AES_NR + 1) * 4]; } aes_ks_t;
133 *
134 * typedef union {
135 * uint32_t ks32[((MAX_AES_NR) + 1) * (MAX_AES_NB)];
136 * } aes_ks_t;
137 * typedef struct aes_key {
138 * aes_ks_t encr_ks, decr_ks;
139 * long double align128;
140 * int flags, nr, type;
141 * } aes_key_t;
142 *
143 * Note: ks is the AES key schedule, Nr is number of rounds, pt is plain text,
144 * ct is crypto text, and MAX_AES_NR is 14.
145 * For the x86 64-bit architecture, OpenSolaris OS uses ks32 instead of ks64.
146 *
147 * Note2: aes_ks_t must be aligned on a 0 mod 128 byte boundary.
148 *
149 * ====================================================================
150 */
151
152
153 #if defined(lint) || defined(__lint)
154
155 #include <sys/types.h>
156
157 void
158 aes_encrypt_intel(const uint32_t rk[], int Nr, const uint32_t pt[4],
159 uint32_t ct[4]) {
160 (void) rk, (void) Nr, (void) pt, (void) ct;
161 }
162 void
163 aes_decrypt_intel(const uint32_t rk[], int Nr, const uint32_t ct[4],
164 uint32_t pt[4]) {
165 (void) rk, (void) Nr, (void) ct, (void) pt;
166 }
167 int
168 rijndael_key_setup_enc_intel(uint32_t rk[], const uint32_t cipherKey[],
169 uint64_t keyBits) {
170 (void) rk, (void) cipherKey, (void) keyBits;
171 return (0);
172 }
173 int
174 rijndael_key_setup_dec_intel(uint32_t rk[], const uint32_t cipherKey[],
175 uint64_t keyBits) {
176 (void) rk, (void) cipherKey, (void) keyBits;
177 return (0);
178 }
179
180
181 #elif defined(HAVE_AES) /* guard by instruction set */
182
183 #define _ASM
184 #include <sys/asm_linkage.h>
185
186 /*
187 * _key_expansion_128(), * _key_expansion_192a(), _key_expansion_192b(),
188 * _key_expansion_256a(), _key_expansion_256b()
189 *
190 * Helper functions called by rijndael_key_setup_inc_intel().
191 * Also used indirectly by rijndael_key_setup_dec_intel().
192 *
193 * Input:
194 * %xmm0 User-provided cipher key
195 * %xmm1 Round constant
196 * Output:
197 * (%rcx) AES key
198 */
199
200 ENTRY_NP2(_key_expansion_128, _key_expansion_256a)
201 _key_expansion_128_local:
202 _key_expansion_256a_local:
203 pshufd $0b11111111, %xmm1, %xmm1
204 shufps $0b00010000, %xmm0, %xmm4
205 pxor %xmm4, %xmm0
206 shufps $0b10001100, %xmm0, %xmm4
207 pxor %xmm4, %xmm0
208 pxor %xmm1, %xmm0
209 movups %xmm0, (%rcx)
210 add $0x10, %rcx
211 RET
212 nop
213 SET_SIZE(_key_expansion_128)
214 SET_SIZE(_key_expansion_256a)
215
216
217 ENTRY_NP(_key_expansion_192a)
218 _key_expansion_192a_local:
219 pshufd $0b01010101, %xmm1, %xmm1
220 shufps $0b00010000, %xmm0, %xmm4
221 pxor %xmm4, %xmm0
222 shufps $0b10001100, %xmm0, %xmm4
223 pxor %xmm4, %xmm0
224 pxor %xmm1, %xmm0
225
226 movups %xmm2, %xmm5
227 movups %xmm2, %xmm6
228 pslldq $4, %xmm5
229 pshufd $0b11111111, %xmm0, %xmm3
230 pxor %xmm3, %xmm2
231 pxor %xmm5, %xmm2
232
233 movups %xmm0, %xmm1
234 shufps $0b01000100, %xmm0, %xmm6
235 movups %xmm6, (%rcx)
236 shufps $0b01001110, %xmm2, %xmm1
237 movups %xmm1, 0x10(%rcx)
238 add $0x20, %rcx
239 RET
240 SET_SIZE(_key_expansion_192a)
241
242
243 ENTRY_NP(_key_expansion_192b)
244 _key_expansion_192b_local:
245 pshufd $0b01010101, %xmm1, %xmm1
246 shufps $0b00010000, %xmm0, %xmm4
247 pxor %xmm4, %xmm0
248 shufps $0b10001100, %xmm0, %xmm4
249 pxor %xmm4, %xmm0
250 pxor %xmm1, %xmm0
251
252 movups %xmm2, %xmm5
253 pslldq $4, %xmm5
254 pshufd $0b11111111, %xmm0, %xmm3
255 pxor %xmm3, %xmm2
256 pxor %xmm5, %xmm2
257
258 movups %xmm0, (%rcx)
259 add $0x10, %rcx
260 RET
261 SET_SIZE(_key_expansion_192b)
262
263
264 ENTRY_NP(_key_expansion_256b)
265 _key_expansion_256b_local:
266 pshufd $0b10101010, %xmm1, %xmm1
267 shufps $0b00010000, %xmm2, %xmm4
268 pxor %xmm4, %xmm2
269 shufps $0b10001100, %xmm2, %xmm4
270 pxor %xmm4, %xmm2
271 pxor %xmm1, %xmm2
272 movups %xmm2, (%rcx)
273 add $0x10, %rcx
274 RET
275 SET_SIZE(_key_expansion_256b)
276
277
278 /*
279 * rijndael_key_setup_enc_intel()
280 * Expand the cipher key into the encryption key schedule.
281 *
282 * For kernel code, caller is responsible for ensuring kpreempt_disable()
283 * has been called. This is because %xmm registers are not saved/restored.
284 * Clear and set the CR0.TS bit on entry and exit, respectively, if TS is set
285 * on entry. Otherwise, if TS is not set, save and restore %xmm registers
286 * on the stack.
287 *
288 * OpenSolaris interface:
289 * int rijndael_key_setup_enc_intel(uint32_t rk[], const uint32_t cipherKey[],
290 * uint64_t keyBits);
291 * Return value is 0 on error, number of rounds on success.
292 *
293 * Original Intel OpenSSL interface:
294 * int intel_AES_set_encrypt_key(const unsigned char *userKey,
295 * const int bits, AES_KEY *key);
296 * Return value is non-zero on error, 0 on success.
297 */
298
299 #ifdef OPENSSL_INTERFACE
300 #define rijndael_key_setup_enc_intel intel_AES_set_encrypt_key
301 #define rijndael_key_setup_dec_intel intel_AES_set_decrypt_key
302
303 #define USERCIPHERKEY rdi /* P1, 64 bits */
304 #define KEYSIZE32 esi /* P2, 32 bits */
305 #define KEYSIZE64 rsi /* P2, 64 bits */
306 #define AESKEY rdx /* P3, 64 bits */
307
308 #else /* OpenSolaris Interface */
309 #define AESKEY rdi /* P1, 64 bits */
310 #define USERCIPHERKEY rsi /* P2, 64 bits */
311 #define KEYSIZE32 edx /* P3, 32 bits */
312 #define KEYSIZE64 rdx /* P3, 64 bits */
313 #endif /* OPENSSL_INTERFACE */
314
315 #define ROUNDS32 KEYSIZE32 /* temp */
316 #define ROUNDS64 KEYSIZE64 /* temp */
317 #define ENDAESKEY USERCIPHERKEY /* temp */
318
319 ENTRY_NP(rijndael_key_setup_enc_intel)
320 rijndael_key_setup_enc_intel_local:
321 FRAME_BEGIN
322 // NULL pointer sanity check
323 test %USERCIPHERKEY, %USERCIPHERKEY
324 jz .Lenc_key_invalid_param
325 test %AESKEY, %AESKEY
326 jz .Lenc_key_invalid_param
327
328 movups (%USERCIPHERKEY), %xmm0 // user key (first 16 bytes)
329 movups %xmm0, (%AESKEY)
330 lea 0x10(%AESKEY), %rcx // key addr
331 pxor %xmm4, %xmm4 // xmm4 is assumed 0 in _key_expansion_x
332
333 cmp $256, %KEYSIZE32
334 jnz .Lenc_key192
335
336 // AES 256: 14 rounds in encryption key schedule
337 #ifdef OPENSSL_INTERFACE
338 mov $14, %ROUNDS32
339 movl %ROUNDS32, 240(%AESKEY) // key.rounds = 14
340 #endif /* OPENSSL_INTERFACE */
341
342 movups 0x10(%USERCIPHERKEY), %xmm2 // other user key (2nd 16 bytes)
343 movups %xmm2, (%rcx)
344 add $0x10, %rcx
345
346 aeskeygenassist $0x1, %xmm2, %xmm1 // expand the key
347 call _key_expansion_256a_local
348 aeskeygenassist $0x1, %xmm0, %xmm1
349 call _key_expansion_256b_local
350 aeskeygenassist $0x2, %xmm2, %xmm1 // expand the key
351 call _key_expansion_256a_local
352 aeskeygenassist $0x2, %xmm0, %xmm1
353 call _key_expansion_256b_local
354 aeskeygenassist $0x4, %xmm2, %xmm1 // expand the key
355 call _key_expansion_256a_local
356 aeskeygenassist $0x4, %xmm0, %xmm1
357 call _key_expansion_256b_local
358 aeskeygenassist $0x8, %xmm2, %xmm1 // expand the key
359 call _key_expansion_256a_local
360 aeskeygenassist $0x8, %xmm0, %xmm1
361 call _key_expansion_256b_local
362 aeskeygenassist $0x10, %xmm2, %xmm1 // expand the key
363 call _key_expansion_256a_local
364 aeskeygenassist $0x10, %xmm0, %xmm1
365 call _key_expansion_256b_local
366 aeskeygenassist $0x20, %xmm2, %xmm1 // expand the key
367 call _key_expansion_256a_local
368 aeskeygenassist $0x20, %xmm0, %xmm1
369 call _key_expansion_256b_local
370 aeskeygenassist $0x40, %xmm2, %xmm1 // expand the key
371 call _key_expansion_256a_local
372
373 #ifdef OPENSSL_INTERFACE
374 xor %rax, %rax // return 0 (OK)
375 #else /* Open Solaris Interface */
376 mov $14, %rax // return # rounds = 14
377 #endif
378 FRAME_END
379 RET
380
381 .balign 4
382 .Lenc_key192:
383 cmp $192, %KEYSIZE32
384 jnz .Lenc_key128
385
386 // AES 192: 12 rounds in encryption key schedule
387 #ifdef OPENSSL_INTERFACE
388 mov $12, %ROUNDS32
389 movl %ROUNDS32, 240(%AESKEY) // key.rounds = 12
390 #endif /* OPENSSL_INTERFACE */
391
392 movq 0x10(%USERCIPHERKEY), %xmm2 // other user key
393 aeskeygenassist $0x1, %xmm2, %xmm1 // expand the key
394 call _key_expansion_192a_local
395 aeskeygenassist $0x2, %xmm2, %xmm1 // expand the key
396 call _key_expansion_192b_local
397 aeskeygenassist $0x4, %xmm2, %xmm1 // expand the key
398 call _key_expansion_192a_local
399 aeskeygenassist $0x8, %xmm2, %xmm1 // expand the key
400 call _key_expansion_192b_local
401 aeskeygenassist $0x10, %xmm2, %xmm1 // expand the key
402 call _key_expansion_192a_local
403 aeskeygenassist $0x20, %xmm2, %xmm1 // expand the key
404 call _key_expansion_192b_local
405 aeskeygenassist $0x40, %xmm2, %xmm1 // expand the key
406 call _key_expansion_192a_local
407 aeskeygenassist $0x80, %xmm2, %xmm1 // expand the key
408 call _key_expansion_192b_local
409
410 #ifdef OPENSSL_INTERFACE
411 xor %rax, %rax // return 0 (OK)
412 #else /* OpenSolaris Interface */
413 mov $12, %rax // return # rounds = 12
414 #endif
415 FRAME_END
416 RET
417
418 .balign 4
419 .Lenc_key128:
420 cmp $128, %KEYSIZE32
421 jnz .Lenc_key_invalid_key_bits
422
423 // AES 128: 10 rounds in encryption key schedule
424 #ifdef OPENSSL_INTERFACE
425 mov $10, %ROUNDS32
426 movl %ROUNDS32, 240(%AESKEY) // key.rounds = 10
427 #endif /* OPENSSL_INTERFACE */
428
429 aeskeygenassist $0x1, %xmm0, %xmm1 // expand the key
430 call _key_expansion_128_local
431 aeskeygenassist $0x2, %xmm0, %xmm1 // expand the key
432 call _key_expansion_128_local
433 aeskeygenassist $0x4, %xmm0, %xmm1 // expand the key
434 call _key_expansion_128_local
435 aeskeygenassist $0x8, %xmm0, %xmm1 // expand the key
436 call _key_expansion_128_local
437 aeskeygenassist $0x10, %xmm0, %xmm1 // expand the key
438 call _key_expansion_128_local
439 aeskeygenassist $0x20, %xmm0, %xmm1 // expand the key
440 call _key_expansion_128_local
441 aeskeygenassist $0x40, %xmm0, %xmm1 // expand the key
442 call _key_expansion_128_local
443 aeskeygenassist $0x80, %xmm0, %xmm1 // expand the key
444 call _key_expansion_128_local
445 aeskeygenassist $0x1b, %xmm0, %xmm1 // expand the key
446 call _key_expansion_128_local
447 aeskeygenassist $0x36, %xmm0, %xmm1 // expand the key
448 call _key_expansion_128_local
449
450 #ifdef OPENSSL_INTERFACE
451 xor %rax, %rax // return 0 (OK)
452 #else /* OpenSolaris Interface */
453 mov $10, %rax // return # rounds = 10
454 #endif
455 FRAME_END
456 RET
457
458 .Lenc_key_invalid_param:
459 #ifdef OPENSSL_INTERFACE
460 mov $-1, %rax // user key or AES key pointer is NULL
461 FRAME_END
462 RET
463 #else
464 /* FALLTHROUGH */
465 #endif /* OPENSSL_INTERFACE */
466
467 .Lenc_key_invalid_key_bits:
468 #ifdef OPENSSL_INTERFACE
469 mov $-2, %rax // keysize is invalid
470 #else /* Open Solaris Interface */
471 xor %rax, %rax // a key pointer is NULL or invalid keysize
472 #endif /* OPENSSL_INTERFACE */
473 FRAME_END
474 RET
475 SET_SIZE(rijndael_key_setup_enc_intel)
476
477
478 /*
479 * rijndael_key_setup_dec_intel()
480 * Expand the cipher key into the decryption key schedule.
481 *
482 * For kernel code, caller is responsible for ensuring kpreempt_disable()
483 * has been called. This is because %xmm registers are not saved/restored.
484 * Clear and set the CR0.TS bit on entry and exit, respectively, if TS is set
485 * on entry. Otherwise, if TS is not set, save and restore %xmm registers
486 * on the stack.
487 *
488 * OpenSolaris interface:
489 * int rijndael_key_setup_dec_intel(uint32_t rk[], const uint32_t cipherKey[],
490 * uint64_t keyBits);
491 * Return value is 0 on error, number of rounds on success.
492 * P1->P2, P2->P3, P3->P1
493 *
494 * Original Intel OpenSSL interface:
495 * int intel_AES_set_decrypt_key(const unsigned char *userKey,
496 * const int bits, AES_KEY *key);
497 * Return value is non-zero on error, 0 on success.
498 */
499
500 ENTRY_NP(rijndael_key_setup_dec_intel)
501 FRAME_BEGIN
502 // Generate round keys used for encryption
503 call rijndael_key_setup_enc_intel_local
504 test %rax, %rax
505 #ifdef OPENSSL_INTERFACE
506 jnz .Ldec_key_exit // Failed if returned non-0
507 #else /* OpenSolaris Interface */
508 jz .Ldec_key_exit // Failed if returned 0
509 #endif /* OPENSSL_INTERFACE */
510
511 /*
512 * Convert round keys used for encryption
513 * to a form usable for decryption
514 */
515 #ifndef OPENSSL_INTERFACE /* OpenSolaris Interface */
516 mov %rax, %ROUNDS64 // set # rounds (10, 12, or 14)
517 // (already set for OpenSSL)
518 #endif
519
520 lea 0x10(%AESKEY), %rcx // key addr
521 shl $4, %ROUNDS32
522 add %AESKEY, %ROUNDS64
523 mov %ROUNDS64, %ENDAESKEY
524
525 .balign 4
526 .Ldec_key_reorder_loop:
527 movups (%AESKEY), %xmm0
528 movups (%ROUNDS64), %xmm1
529 movups %xmm0, (%ROUNDS64)
530 movups %xmm1, (%AESKEY)
531 lea 0x10(%AESKEY), %AESKEY
532 lea -0x10(%ROUNDS64), %ROUNDS64
533 cmp %AESKEY, %ROUNDS64
534 ja .Ldec_key_reorder_loop
535
536 .balign 4
537 .Ldec_key_inv_loop:
538 movups (%rcx), %xmm0
539 // Convert an encryption round key to a form usable for decryption
540 // with the "AES Inverse Mix Columns" instruction
541 aesimc %xmm0, %xmm1
542 movups %xmm1, (%rcx)
543 lea 0x10(%rcx), %rcx
544 cmp %ENDAESKEY, %rcx
545 jnz .Ldec_key_inv_loop
546
547 .Ldec_key_exit:
548 // OpenSolaris: rax = # rounds (10, 12, or 14) or 0 for error
549 // OpenSSL: rax = 0 for OK, or non-zero for error
550 FRAME_END
551 RET
552 SET_SIZE(rijndael_key_setup_dec_intel)
553
554
555 /*
556 * aes_encrypt_intel()
557 * Encrypt a single block (in and out can overlap).
558 *
559 * For kernel code, caller is responsible for ensuring kpreempt_disable()
560 * has been called. This is because %xmm registers are not saved/restored.
561 * Clear and set the CR0.TS bit on entry and exit, respectively, if TS is set
562 * on entry. Otherwise, if TS is not set, save and restore %xmm registers
563 * on the stack.
564 *
565 * Temporary register usage:
566 * %xmm0 State
567 * %xmm1 Key
568 *
569 * Original OpenSolaris Interface:
570 * void aes_encrypt_intel(const aes_ks_t *ks, int Nr,
571 * const uint32_t pt[4], uint32_t ct[4])
572 *
573 * Original Intel OpenSSL Interface:
574 * void intel_AES_encrypt(const unsigned char *in, unsigned char *out,
575 * const AES_KEY *key)
576 */
577
578 #ifdef OPENSSL_INTERFACE
579 #define aes_encrypt_intel intel_AES_encrypt
580 #define aes_decrypt_intel intel_AES_decrypt
581
582 #define INP rdi /* P1, 64 bits */
583 #define OUTP rsi /* P2, 64 bits */
584 #define KEYP rdx /* P3, 64 bits */
585
586 /* No NROUNDS parameter--offset 240 from KEYP saved in %ecx: */
587 #define NROUNDS32 ecx /* temporary, 32 bits */
588 #define NROUNDS cl /* temporary, 8 bits */
589
590 #else /* OpenSolaris Interface */
591 #define KEYP rdi /* P1, 64 bits */
592 #define NROUNDS esi /* P2, 32 bits */
593 #define INP rdx /* P3, 64 bits */
594 #define OUTP rcx /* P4, 64 bits */
595 #endif /* OPENSSL_INTERFACE */
596
597 #define STATE xmm0 /* temporary, 128 bits */
598 #define KEY xmm1 /* temporary, 128 bits */
599
600
601 ENTRY_NP(aes_encrypt_intel)
602
603 movups (%INP), %STATE // input
604 movups (%KEYP), %KEY // key
605 #ifdef OPENSSL_INTERFACE
606 mov 240(%KEYP), %NROUNDS32 // round count
607 #else /* OpenSolaris Interface */
608 /* Round count is already present as P2 in %rsi/%esi */
609 #endif /* OPENSSL_INTERFACE */
610
611 pxor %KEY, %STATE // round 0
612 lea 0x30(%KEYP), %KEYP
613 cmp $12, %NROUNDS
614 jb .Lenc128
615 lea 0x20(%KEYP), %KEYP
616 je .Lenc192
617
618 // AES 256
619 lea 0x20(%KEYP), %KEYP
620 movups -0x60(%KEYP), %KEY
621 aesenc %KEY, %STATE
622 movups -0x50(%KEYP), %KEY
623 aesenc %KEY, %STATE
624
625 .balign 4
626 .Lenc192:
627 // AES 192 and 256
628 movups -0x40(%KEYP), %KEY
629 aesenc %KEY, %STATE
630 movups -0x30(%KEYP), %KEY
631 aesenc %KEY, %STATE
632
633 .balign 4
634 .Lenc128:
635 // AES 128, 192, and 256
636 movups -0x20(%KEYP), %KEY
637 aesenc %KEY, %STATE
638 movups -0x10(%KEYP), %KEY
639 aesenc %KEY, %STATE
640 movups (%KEYP), %KEY
641 aesenc %KEY, %STATE
642 movups 0x10(%KEYP), %KEY
643 aesenc %KEY, %STATE
644 movups 0x20(%KEYP), %KEY
645 aesenc %KEY, %STATE
646 movups 0x30(%KEYP), %KEY
647 aesenc %KEY, %STATE
648 movups 0x40(%KEYP), %KEY
649 aesenc %KEY, %STATE
650 movups 0x50(%KEYP), %KEY
651 aesenc %KEY, %STATE
652 movups 0x60(%KEYP), %KEY
653 aesenc %KEY, %STATE
654 movups 0x70(%KEYP), %KEY
655 aesenclast %KEY, %STATE // last round
656 movups %STATE, (%OUTP) // output
657
658 RET
659 SET_SIZE(aes_encrypt_intel)
660
661
662 /*
663 * aes_decrypt_intel()
664 * Decrypt a single block (in and out can overlap).
665 *
666 * For kernel code, caller is responsible for ensuring kpreempt_disable()
667 * has been called. This is because %xmm registers are not saved/restored.
668 * Clear and set the CR0.TS bit on entry and exit, respectively, if TS is set
669 * on entry. Otherwise, if TS is not set, save and restore %xmm registers
670 * on the stack.
671 *
672 * Temporary register usage:
673 * %xmm0 State
674 * %xmm1 Key
675 *
676 * Original OpenSolaris Interface:
677 * void aes_decrypt_intel(const aes_ks_t *ks, int Nr,
678 * const uint32_t pt[4], uint32_t ct[4])/
679 *
680 * Original Intel OpenSSL Interface:
681 * void intel_AES_decrypt(const unsigned char *in, unsigned char *out,
682 * const AES_KEY *key);
683 */
684 ENTRY_NP(aes_decrypt_intel)
685
686 movups (%INP), %STATE // input
687 movups (%KEYP), %KEY // key
688 #ifdef OPENSSL_INTERFACE
689 mov 240(%KEYP), %NROUNDS32 // round count
690 #else /* OpenSolaris Interface */
691 /* Round count is already present as P2 in %rsi/%esi */
692 #endif /* OPENSSL_INTERFACE */
693
694 pxor %KEY, %STATE // round 0
695 lea 0x30(%KEYP), %KEYP
696 cmp $12, %NROUNDS
697 jb .Ldec128
698 lea 0x20(%KEYP), %KEYP
699 je .Ldec192
700
701 // AES 256
702 lea 0x20(%KEYP), %KEYP
703 movups -0x60(%KEYP), %KEY
704 aesdec %KEY, %STATE
705 movups -0x50(%KEYP), %KEY
706 aesdec %KEY, %STATE
707
708 .balign 4
709 .Ldec192:
710 // AES 192 and 256
711 movups -0x40(%KEYP), %KEY
712 aesdec %KEY, %STATE
713 movups -0x30(%KEYP), %KEY
714 aesdec %KEY, %STATE
715
716 .balign 4
717 .Ldec128:
718 // AES 128, 192, and 256
719 movups -0x20(%KEYP), %KEY
720 aesdec %KEY, %STATE
721 movups -0x10(%KEYP), %KEY
722 aesdec %KEY, %STATE
723 movups (%KEYP), %KEY
724 aesdec %KEY, %STATE
725 movups 0x10(%KEYP), %KEY
726 aesdec %KEY, %STATE
727 movups 0x20(%KEYP), %KEY
728 aesdec %KEY, %STATE
729 movups 0x30(%KEYP), %KEY
730 aesdec %KEY, %STATE
731 movups 0x40(%KEYP), %KEY
732 aesdec %KEY, %STATE
733 movups 0x50(%KEYP), %KEY
734 aesdec %KEY, %STATE
735 movups 0x60(%KEYP), %KEY
736 aesdec %KEY, %STATE
737 movups 0x70(%KEYP), %KEY
738 aesdeclast %KEY, %STATE // last round
739 movups %STATE, (%OUTP) // output
740
741 RET
742 SET_SIZE(aes_decrypt_intel)
743
744 #endif /* lint || __lint */
745
746 #ifdef __ELF__
747 .section .note.GNU-stack,"",%progbits
748 #endif
Cache object: 69d1125648b6d32b2addaefec76b3899
|