1 /* $FreeBSD$ */
2 /* Do not modify. This file is auto-generated from aesni-x86_64.pl. */
3 .text
4
5 .globl aesni_encrypt
6 .type aesni_encrypt,@function
7 .align 16
8 aesni_encrypt:
9 .cfi_startproc
10 movups (%rdi),%xmm2
11 movl 240(%rdx),%eax
12 movups (%rdx),%xmm0
13 movups 16(%rdx),%xmm1
14 leaq 32(%rdx),%rdx
15 xorps %xmm0,%xmm2
16 .Loop_enc1_1:
17 .byte 102,15,56,220,209
18 decl %eax
19 movups (%rdx),%xmm1
20 leaq 16(%rdx),%rdx
21 jnz .Loop_enc1_1
22 .byte 102,15,56,221,209
23 pxor %xmm0,%xmm0
24 pxor %xmm1,%xmm1
25 movups %xmm2,(%rsi)
26 pxor %xmm2,%xmm2
27 .byte 0xf3,0xc3
28 .cfi_endproc
29 .size aesni_encrypt,.-aesni_encrypt
30
31 .globl aesni_decrypt
32 .type aesni_decrypt,@function
33 .align 16
34 aesni_decrypt:
35 .cfi_startproc
36 movups (%rdi),%xmm2
37 movl 240(%rdx),%eax
38 movups (%rdx),%xmm0
39 movups 16(%rdx),%xmm1
40 leaq 32(%rdx),%rdx
41 xorps %xmm0,%xmm2
42 .Loop_dec1_2:
43 .byte 102,15,56,222,209
44 decl %eax
45 movups (%rdx),%xmm1
46 leaq 16(%rdx),%rdx
47 jnz .Loop_dec1_2
48 .byte 102,15,56,223,209
49 pxor %xmm0,%xmm0
50 pxor %xmm1,%xmm1
51 movups %xmm2,(%rsi)
52 pxor %xmm2,%xmm2
53 .byte 0xf3,0xc3
54 .cfi_endproc
55 .size aesni_decrypt, .-aesni_decrypt
56 .type _aesni_encrypt2,@function
57 .align 16
58 _aesni_encrypt2:
59 .cfi_startproc
60 movups (%rcx),%xmm0
61 shll $4,%eax
62 movups 16(%rcx),%xmm1
63 xorps %xmm0,%xmm2
64 xorps %xmm0,%xmm3
65 movups 32(%rcx),%xmm0
66 leaq 32(%rcx,%rax,1),%rcx
67 negq %rax
68 addq $16,%rax
69
70 .Lenc_loop2:
71 .byte 102,15,56,220,209
72 .byte 102,15,56,220,217
73 movups (%rcx,%rax,1),%xmm1
74 addq $32,%rax
75 .byte 102,15,56,220,208
76 .byte 102,15,56,220,216
77 movups -16(%rcx,%rax,1),%xmm0
78 jnz .Lenc_loop2
79
80 .byte 102,15,56,220,209
81 .byte 102,15,56,220,217
82 .byte 102,15,56,221,208
83 .byte 102,15,56,221,216
84 .byte 0xf3,0xc3
85 .cfi_endproc
86 .size _aesni_encrypt2,.-_aesni_encrypt2
87 .type _aesni_decrypt2,@function
88 .align 16
89 _aesni_decrypt2:
90 .cfi_startproc
91 movups (%rcx),%xmm0
92 shll $4,%eax
93 movups 16(%rcx),%xmm1
94 xorps %xmm0,%xmm2
95 xorps %xmm0,%xmm3
96 movups 32(%rcx),%xmm0
97 leaq 32(%rcx,%rax,1),%rcx
98 negq %rax
99 addq $16,%rax
100
101 .Ldec_loop2:
102 .byte 102,15,56,222,209
103 .byte 102,15,56,222,217
104 movups (%rcx,%rax,1),%xmm1
105 addq $32,%rax
106 .byte 102,15,56,222,208
107 .byte 102,15,56,222,216
108 movups -16(%rcx,%rax,1),%xmm0
109 jnz .Ldec_loop2
110
111 .byte 102,15,56,222,209
112 .byte 102,15,56,222,217
113 .byte 102,15,56,223,208
114 .byte 102,15,56,223,216
115 .byte 0xf3,0xc3
116 .cfi_endproc
117 .size _aesni_decrypt2,.-_aesni_decrypt2
118 .type _aesni_encrypt3,@function
119 .align 16
120 _aesni_encrypt3:
121 .cfi_startproc
122 movups (%rcx),%xmm0
123 shll $4,%eax
124 movups 16(%rcx),%xmm1
125 xorps %xmm0,%xmm2
126 xorps %xmm0,%xmm3
127 xorps %xmm0,%xmm4
128 movups 32(%rcx),%xmm0
129 leaq 32(%rcx,%rax,1),%rcx
130 negq %rax
131 addq $16,%rax
132
133 .Lenc_loop3:
134 .byte 102,15,56,220,209
135 .byte 102,15,56,220,217
136 .byte 102,15,56,220,225
137 movups (%rcx,%rax,1),%xmm1
138 addq $32,%rax
139 .byte 102,15,56,220,208
140 .byte 102,15,56,220,216
141 .byte 102,15,56,220,224
142 movups -16(%rcx,%rax,1),%xmm0
143 jnz .Lenc_loop3
144
145 .byte 102,15,56,220,209
146 .byte 102,15,56,220,217
147 .byte 102,15,56,220,225
148 .byte 102,15,56,221,208
149 .byte 102,15,56,221,216
150 .byte 102,15,56,221,224
151 .byte 0xf3,0xc3
152 .cfi_endproc
153 .size _aesni_encrypt3,.-_aesni_encrypt3
154 .type _aesni_decrypt3,@function
155 .align 16
156 _aesni_decrypt3:
157 .cfi_startproc
158 movups (%rcx),%xmm0
159 shll $4,%eax
160 movups 16(%rcx),%xmm1
161 xorps %xmm0,%xmm2
162 xorps %xmm0,%xmm3
163 xorps %xmm0,%xmm4
164 movups 32(%rcx),%xmm0
165 leaq 32(%rcx,%rax,1),%rcx
166 negq %rax
167 addq $16,%rax
168
169 .Ldec_loop3:
170 .byte 102,15,56,222,209
171 .byte 102,15,56,222,217
172 .byte 102,15,56,222,225
173 movups (%rcx,%rax,1),%xmm1
174 addq $32,%rax
175 .byte 102,15,56,222,208
176 .byte 102,15,56,222,216
177 .byte 102,15,56,222,224
178 movups -16(%rcx,%rax,1),%xmm0
179 jnz .Ldec_loop3
180
181 .byte 102,15,56,222,209
182 .byte 102,15,56,222,217
183 .byte 102,15,56,222,225
184 .byte 102,15,56,223,208
185 .byte 102,15,56,223,216
186 .byte 102,15,56,223,224
187 .byte 0xf3,0xc3
188 .cfi_endproc
189 .size _aesni_decrypt3,.-_aesni_decrypt3
190 .type _aesni_encrypt4,@function
191 .align 16
192 _aesni_encrypt4:
193 .cfi_startproc
194 movups (%rcx),%xmm0
195 shll $4,%eax
196 movups 16(%rcx),%xmm1
197 xorps %xmm0,%xmm2
198 xorps %xmm0,%xmm3
199 xorps %xmm0,%xmm4
200 xorps %xmm0,%xmm5
201 movups 32(%rcx),%xmm0
202 leaq 32(%rcx,%rax,1),%rcx
203 negq %rax
204 .byte 0x0f,0x1f,0x00
205 addq $16,%rax
206
207 .Lenc_loop4:
208 .byte 102,15,56,220,209
209 .byte 102,15,56,220,217
210 .byte 102,15,56,220,225
211 .byte 102,15,56,220,233
212 movups (%rcx,%rax,1),%xmm1
213 addq $32,%rax
214 .byte 102,15,56,220,208
215 .byte 102,15,56,220,216
216 .byte 102,15,56,220,224
217 .byte 102,15,56,220,232
218 movups -16(%rcx,%rax,1),%xmm0
219 jnz .Lenc_loop4
220
221 .byte 102,15,56,220,209
222 .byte 102,15,56,220,217
223 .byte 102,15,56,220,225
224 .byte 102,15,56,220,233
225 .byte 102,15,56,221,208
226 .byte 102,15,56,221,216
227 .byte 102,15,56,221,224
228 .byte 102,15,56,221,232
229 .byte 0xf3,0xc3
230 .cfi_endproc
231 .size _aesni_encrypt4,.-_aesni_encrypt4
232 .type _aesni_decrypt4,@function
233 .align 16
234 _aesni_decrypt4:
235 .cfi_startproc
236 movups (%rcx),%xmm0
237 shll $4,%eax
238 movups 16(%rcx),%xmm1
239 xorps %xmm0,%xmm2
240 xorps %xmm0,%xmm3
241 xorps %xmm0,%xmm4
242 xorps %xmm0,%xmm5
243 movups 32(%rcx),%xmm0
244 leaq 32(%rcx,%rax,1),%rcx
245 negq %rax
246 .byte 0x0f,0x1f,0x00
247 addq $16,%rax
248
249 .Ldec_loop4:
250 .byte 102,15,56,222,209
251 .byte 102,15,56,222,217
252 .byte 102,15,56,222,225
253 .byte 102,15,56,222,233
254 movups (%rcx,%rax,1),%xmm1
255 addq $32,%rax
256 .byte 102,15,56,222,208
257 .byte 102,15,56,222,216
258 .byte 102,15,56,222,224
259 .byte 102,15,56,222,232
260 movups -16(%rcx,%rax,1),%xmm0
261 jnz .Ldec_loop4
262
263 .byte 102,15,56,222,209
264 .byte 102,15,56,222,217
265 .byte 102,15,56,222,225
266 .byte 102,15,56,222,233
267 .byte 102,15,56,223,208
268 .byte 102,15,56,223,216
269 .byte 102,15,56,223,224
270 .byte 102,15,56,223,232
271 .byte 0xf3,0xc3
272 .cfi_endproc
273 .size _aesni_decrypt4,.-_aesni_decrypt4
274 .type _aesni_encrypt6,@function
275 .align 16
276 _aesni_encrypt6:
277 .cfi_startproc
278 movups (%rcx),%xmm0
279 shll $4,%eax
280 movups 16(%rcx),%xmm1
281 xorps %xmm0,%xmm2
282 pxor %xmm0,%xmm3
283 pxor %xmm0,%xmm4
284 .byte 102,15,56,220,209
285 leaq 32(%rcx,%rax,1),%rcx
286 negq %rax
287 .byte 102,15,56,220,217
288 pxor %xmm0,%xmm5
289 pxor %xmm0,%xmm6
290 .byte 102,15,56,220,225
291 pxor %xmm0,%xmm7
292 movups (%rcx,%rax,1),%xmm0
293 addq $16,%rax
294 jmp .Lenc_loop6_enter
295 .align 16
296 .Lenc_loop6:
297 .byte 102,15,56,220,209
298 .byte 102,15,56,220,217
299 .byte 102,15,56,220,225
300 .Lenc_loop6_enter:
301 .byte 102,15,56,220,233
302 .byte 102,15,56,220,241
303 .byte 102,15,56,220,249
304 movups (%rcx,%rax,1),%xmm1
305 addq $32,%rax
306 .byte 102,15,56,220,208
307 .byte 102,15,56,220,216
308 .byte 102,15,56,220,224
309 .byte 102,15,56,220,232
310 .byte 102,15,56,220,240
311 .byte 102,15,56,220,248
312 movups -16(%rcx,%rax,1),%xmm0
313 jnz .Lenc_loop6
314
315 .byte 102,15,56,220,209
316 .byte 102,15,56,220,217
317 .byte 102,15,56,220,225
318 .byte 102,15,56,220,233
319 .byte 102,15,56,220,241
320 .byte 102,15,56,220,249
321 .byte 102,15,56,221,208
322 .byte 102,15,56,221,216
323 .byte 102,15,56,221,224
324 .byte 102,15,56,221,232
325 .byte 102,15,56,221,240
326 .byte 102,15,56,221,248
327 .byte 0xf3,0xc3
328 .cfi_endproc
329 .size _aesni_encrypt6,.-_aesni_encrypt6
330 .type _aesni_decrypt6,@function
331 .align 16
332 _aesni_decrypt6:
333 .cfi_startproc
334 movups (%rcx),%xmm0
335 shll $4,%eax
336 movups 16(%rcx),%xmm1
337 xorps %xmm0,%xmm2
338 pxor %xmm0,%xmm3
339 pxor %xmm0,%xmm4
340 .byte 102,15,56,222,209
341 leaq 32(%rcx,%rax,1),%rcx
342 negq %rax
343 .byte 102,15,56,222,217
344 pxor %xmm0,%xmm5
345 pxor %xmm0,%xmm6
346 .byte 102,15,56,222,225
347 pxor %xmm0,%xmm7
348 movups (%rcx,%rax,1),%xmm0
349 addq $16,%rax
350 jmp .Ldec_loop6_enter
351 .align 16
352 .Ldec_loop6:
353 .byte 102,15,56,222,209
354 .byte 102,15,56,222,217
355 .byte 102,15,56,222,225
356 .Ldec_loop6_enter:
357 .byte 102,15,56,222,233
358 .byte 102,15,56,222,241
359 .byte 102,15,56,222,249
360 movups (%rcx,%rax,1),%xmm1
361 addq $32,%rax
362 .byte 102,15,56,222,208
363 .byte 102,15,56,222,216
364 .byte 102,15,56,222,224
365 .byte 102,15,56,222,232
366 .byte 102,15,56,222,240
367 .byte 102,15,56,222,248
368 movups -16(%rcx,%rax,1),%xmm0
369 jnz .Ldec_loop6
370
371 .byte 102,15,56,222,209
372 .byte 102,15,56,222,217
373 .byte 102,15,56,222,225
374 .byte 102,15,56,222,233
375 .byte 102,15,56,222,241
376 .byte 102,15,56,222,249
377 .byte 102,15,56,223,208
378 .byte 102,15,56,223,216
379 .byte 102,15,56,223,224
380 .byte 102,15,56,223,232
381 .byte 102,15,56,223,240
382 .byte 102,15,56,223,248
383 .byte 0xf3,0xc3
384 .cfi_endproc
385 .size _aesni_decrypt6,.-_aesni_decrypt6
386 .type _aesni_encrypt8,@function
387 .align 16
388 _aesni_encrypt8:
389 .cfi_startproc
390 movups (%rcx),%xmm0
391 shll $4,%eax
392 movups 16(%rcx),%xmm1
393 xorps %xmm0,%xmm2
394 xorps %xmm0,%xmm3
395 pxor %xmm0,%xmm4
396 pxor %xmm0,%xmm5
397 pxor %xmm0,%xmm6
398 leaq 32(%rcx,%rax,1),%rcx
399 negq %rax
400 .byte 102,15,56,220,209
401 pxor %xmm0,%xmm7
402 pxor %xmm0,%xmm8
403 .byte 102,15,56,220,217
404 pxor %xmm0,%xmm9
405 movups (%rcx,%rax,1),%xmm0
406 addq $16,%rax
407 jmp .Lenc_loop8_inner
408 .align 16
409 .Lenc_loop8:
410 .byte 102,15,56,220,209
411 .byte 102,15,56,220,217
412 .Lenc_loop8_inner:
413 .byte 102,15,56,220,225
414 .byte 102,15,56,220,233
415 .byte 102,15,56,220,241
416 .byte 102,15,56,220,249
417 .byte 102,68,15,56,220,193
418 .byte 102,68,15,56,220,201
419 .Lenc_loop8_enter:
420 movups (%rcx,%rax,1),%xmm1
421 addq $32,%rax
422 .byte 102,15,56,220,208
423 .byte 102,15,56,220,216
424 .byte 102,15,56,220,224
425 .byte 102,15,56,220,232
426 .byte 102,15,56,220,240
427 .byte 102,15,56,220,248
428 .byte 102,68,15,56,220,192
429 .byte 102,68,15,56,220,200
430 movups -16(%rcx,%rax,1),%xmm0
431 jnz .Lenc_loop8
432
433 .byte 102,15,56,220,209
434 .byte 102,15,56,220,217
435 .byte 102,15,56,220,225
436 .byte 102,15,56,220,233
437 .byte 102,15,56,220,241
438 .byte 102,15,56,220,249
439 .byte 102,68,15,56,220,193
440 .byte 102,68,15,56,220,201
441 .byte 102,15,56,221,208
442 .byte 102,15,56,221,216
443 .byte 102,15,56,221,224
444 .byte 102,15,56,221,232
445 .byte 102,15,56,221,240
446 .byte 102,15,56,221,248
447 .byte 102,68,15,56,221,192
448 .byte 102,68,15,56,221,200
449 .byte 0xf3,0xc3
450 .cfi_endproc
451 .size _aesni_encrypt8,.-_aesni_encrypt8
452 .type _aesni_decrypt8,@function
453 .align 16
454 _aesni_decrypt8:
455 .cfi_startproc
456 movups (%rcx),%xmm0
457 shll $4,%eax
458 movups 16(%rcx),%xmm1
459 xorps %xmm0,%xmm2
460 xorps %xmm0,%xmm3
461 pxor %xmm0,%xmm4
462 pxor %xmm0,%xmm5
463 pxor %xmm0,%xmm6
464 leaq 32(%rcx,%rax,1),%rcx
465 negq %rax
466 .byte 102,15,56,222,209
467 pxor %xmm0,%xmm7
468 pxor %xmm0,%xmm8
469 .byte 102,15,56,222,217
470 pxor %xmm0,%xmm9
471 movups (%rcx,%rax,1),%xmm0
472 addq $16,%rax
473 jmp .Ldec_loop8_inner
474 .align 16
475 .Ldec_loop8:
476 .byte 102,15,56,222,209
477 .byte 102,15,56,222,217
478 .Ldec_loop8_inner:
479 .byte 102,15,56,222,225
480 .byte 102,15,56,222,233
481 .byte 102,15,56,222,241
482 .byte 102,15,56,222,249
483 .byte 102,68,15,56,222,193
484 .byte 102,68,15,56,222,201
485 .Ldec_loop8_enter:
486 movups (%rcx,%rax,1),%xmm1
487 addq $32,%rax
488 .byte 102,15,56,222,208
489 .byte 102,15,56,222,216
490 .byte 102,15,56,222,224
491 .byte 102,15,56,222,232
492 .byte 102,15,56,222,240
493 .byte 102,15,56,222,248
494 .byte 102,68,15,56,222,192
495 .byte 102,68,15,56,222,200
496 movups -16(%rcx,%rax,1),%xmm0
497 jnz .Ldec_loop8
498
499 .byte 102,15,56,222,209
500 .byte 102,15,56,222,217
501 .byte 102,15,56,222,225
502 .byte 102,15,56,222,233
503 .byte 102,15,56,222,241
504 .byte 102,15,56,222,249
505 .byte 102,68,15,56,222,193
506 .byte 102,68,15,56,222,201
507 .byte 102,15,56,223,208
508 .byte 102,15,56,223,216
509 .byte 102,15,56,223,224
510 .byte 102,15,56,223,232
511 .byte 102,15,56,223,240
512 .byte 102,15,56,223,248
513 .byte 102,68,15,56,223,192
514 .byte 102,68,15,56,223,200
515 .byte 0xf3,0xc3
516 .cfi_endproc
517 .size _aesni_decrypt8,.-_aesni_decrypt8
518 .globl aesni_ecb_encrypt
519 .type aesni_ecb_encrypt,@function
520 .align 16
521 aesni_ecb_encrypt:
522 .cfi_startproc
523 andq $-16,%rdx
524 jz .Lecb_ret
525
526 movl 240(%rcx),%eax
527 movups (%rcx),%xmm0
528 movq %rcx,%r11
529 movl %eax,%r10d
530 testl %r8d,%r8d
531 jz .Lecb_decrypt
532
533 cmpq $0x80,%rdx
534 jb .Lecb_enc_tail
535
536 movdqu (%rdi),%xmm2
537 movdqu 16(%rdi),%xmm3
538 movdqu 32(%rdi),%xmm4
539 movdqu 48(%rdi),%xmm5
540 movdqu 64(%rdi),%xmm6
541 movdqu 80(%rdi),%xmm7
542 movdqu 96(%rdi),%xmm8
543 movdqu 112(%rdi),%xmm9
544 leaq 128(%rdi),%rdi
545 subq $0x80,%rdx
546 jmp .Lecb_enc_loop8_enter
547 .align 16
548 .Lecb_enc_loop8:
549 movups %xmm2,(%rsi)
550 movq %r11,%rcx
551 movdqu (%rdi),%xmm2
552 movl %r10d,%eax
553 movups %xmm3,16(%rsi)
554 movdqu 16(%rdi),%xmm3
555 movups %xmm4,32(%rsi)
556 movdqu 32(%rdi),%xmm4
557 movups %xmm5,48(%rsi)
558 movdqu 48(%rdi),%xmm5
559 movups %xmm6,64(%rsi)
560 movdqu 64(%rdi),%xmm6
561 movups %xmm7,80(%rsi)
562 movdqu 80(%rdi),%xmm7
563 movups %xmm8,96(%rsi)
564 movdqu 96(%rdi),%xmm8
565 movups %xmm9,112(%rsi)
566 leaq 128(%rsi),%rsi
567 movdqu 112(%rdi),%xmm9
568 leaq 128(%rdi),%rdi
569 .Lecb_enc_loop8_enter:
570
571 call _aesni_encrypt8
572
573 subq $0x80,%rdx
574 jnc .Lecb_enc_loop8
575
576 movups %xmm2,(%rsi)
577 movq %r11,%rcx
578 movups %xmm3,16(%rsi)
579 movl %r10d,%eax
580 movups %xmm4,32(%rsi)
581 movups %xmm5,48(%rsi)
582 movups %xmm6,64(%rsi)
583 movups %xmm7,80(%rsi)
584 movups %xmm8,96(%rsi)
585 movups %xmm9,112(%rsi)
586 leaq 128(%rsi),%rsi
587 addq $0x80,%rdx
588 jz .Lecb_ret
589
590 .Lecb_enc_tail:
591 movups (%rdi),%xmm2
592 cmpq $0x20,%rdx
593 jb .Lecb_enc_one
594 movups 16(%rdi),%xmm3
595 je .Lecb_enc_two
596 movups 32(%rdi),%xmm4
597 cmpq $0x40,%rdx
598 jb .Lecb_enc_three
599 movups 48(%rdi),%xmm5
600 je .Lecb_enc_four
601 movups 64(%rdi),%xmm6
602 cmpq $0x60,%rdx
603 jb .Lecb_enc_five
604 movups 80(%rdi),%xmm7
605 je .Lecb_enc_six
606 movdqu 96(%rdi),%xmm8
607 xorps %xmm9,%xmm9
608 call _aesni_encrypt8
609 movups %xmm2,(%rsi)
610 movups %xmm3,16(%rsi)
611 movups %xmm4,32(%rsi)
612 movups %xmm5,48(%rsi)
613 movups %xmm6,64(%rsi)
614 movups %xmm7,80(%rsi)
615 movups %xmm8,96(%rsi)
616 jmp .Lecb_ret
617 .align 16
618 .Lecb_enc_one:
619 movups (%rcx),%xmm0
620 movups 16(%rcx),%xmm1
621 leaq 32(%rcx),%rcx
622 xorps %xmm0,%xmm2
623 .Loop_enc1_3:
624 .byte 102,15,56,220,209
625 decl %eax
626 movups (%rcx),%xmm1
627 leaq 16(%rcx),%rcx
628 jnz .Loop_enc1_3
629 .byte 102,15,56,221,209
630 movups %xmm2,(%rsi)
631 jmp .Lecb_ret
632 .align 16
633 .Lecb_enc_two:
634 call _aesni_encrypt2
635 movups %xmm2,(%rsi)
636 movups %xmm3,16(%rsi)
637 jmp .Lecb_ret
638 .align 16
639 .Lecb_enc_three:
640 call _aesni_encrypt3
641 movups %xmm2,(%rsi)
642 movups %xmm3,16(%rsi)
643 movups %xmm4,32(%rsi)
644 jmp .Lecb_ret
645 .align 16
646 .Lecb_enc_four:
647 call _aesni_encrypt4
648 movups %xmm2,(%rsi)
649 movups %xmm3,16(%rsi)
650 movups %xmm4,32(%rsi)
651 movups %xmm5,48(%rsi)
652 jmp .Lecb_ret
653 .align 16
654 .Lecb_enc_five:
655 xorps %xmm7,%xmm7
656 call _aesni_encrypt6
657 movups %xmm2,(%rsi)
658 movups %xmm3,16(%rsi)
659 movups %xmm4,32(%rsi)
660 movups %xmm5,48(%rsi)
661 movups %xmm6,64(%rsi)
662 jmp .Lecb_ret
663 .align 16
664 .Lecb_enc_six:
665 call _aesni_encrypt6
666 movups %xmm2,(%rsi)
667 movups %xmm3,16(%rsi)
668 movups %xmm4,32(%rsi)
669 movups %xmm5,48(%rsi)
670 movups %xmm6,64(%rsi)
671 movups %xmm7,80(%rsi)
672 jmp .Lecb_ret
673
674 .align 16
675 .Lecb_decrypt:
676 cmpq $0x80,%rdx
677 jb .Lecb_dec_tail
678
679 movdqu (%rdi),%xmm2
680 movdqu 16(%rdi),%xmm3
681 movdqu 32(%rdi),%xmm4
682 movdqu 48(%rdi),%xmm5
683 movdqu 64(%rdi),%xmm6
684 movdqu 80(%rdi),%xmm7
685 movdqu 96(%rdi),%xmm8
686 movdqu 112(%rdi),%xmm9
687 leaq 128(%rdi),%rdi
688 subq $0x80,%rdx
689 jmp .Lecb_dec_loop8_enter
690 .align 16
691 .Lecb_dec_loop8:
692 movups %xmm2,(%rsi)
693 movq %r11,%rcx
694 movdqu (%rdi),%xmm2
695 movl %r10d,%eax
696 movups %xmm3,16(%rsi)
697 movdqu 16(%rdi),%xmm3
698 movups %xmm4,32(%rsi)
699 movdqu 32(%rdi),%xmm4
700 movups %xmm5,48(%rsi)
701 movdqu 48(%rdi),%xmm5
702 movups %xmm6,64(%rsi)
703 movdqu 64(%rdi),%xmm6
704 movups %xmm7,80(%rsi)
705 movdqu 80(%rdi),%xmm7
706 movups %xmm8,96(%rsi)
707 movdqu 96(%rdi),%xmm8
708 movups %xmm9,112(%rsi)
709 leaq 128(%rsi),%rsi
710 movdqu 112(%rdi),%xmm9
711 leaq 128(%rdi),%rdi
712 .Lecb_dec_loop8_enter:
713
714 call _aesni_decrypt8
715
716 movups (%r11),%xmm0
717 subq $0x80,%rdx
718 jnc .Lecb_dec_loop8
719
720 movups %xmm2,(%rsi)
721 pxor %xmm2,%xmm2
722 movq %r11,%rcx
723 movups %xmm3,16(%rsi)
724 pxor %xmm3,%xmm3
725 movl %r10d,%eax
726 movups %xmm4,32(%rsi)
727 pxor %xmm4,%xmm4
728 movups %xmm5,48(%rsi)
729 pxor %xmm5,%xmm5
730 movups %xmm6,64(%rsi)
731 pxor %xmm6,%xmm6
732 movups %xmm7,80(%rsi)
733 pxor %xmm7,%xmm7
734 movups %xmm8,96(%rsi)
735 pxor %xmm8,%xmm8
736 movups %xmm9,112(%rsi)
737 pxor %xmm9,%xmm9
738 leaq 128(%rsi),%rsi
739 addq $0x80,%rdx
740 jz .Lecb_ret
741
742 .Lecb_dec_tail:
743 movups (%rdi),%xmm2
744 cmpq $0x20,%rdx
745 jb .Lecb_dec_one
746 movups 16(%rdi),%xmm3
747 je .Lecb_dec_two
748 movups 32(%rdi),%xmm4
749 cmpq $0x40,%rdx
750 jb .Lecb_dec_three
751 movups 48(%rdi),%xmm5
752 je .Lecb_dec_four
753 movups 64(%rdi),%xmm6
754 cmpq $0x60,%rdx
755 jb .Lecb_dec_five
756 movups 80(%rdi),%xmm7
757 je .Lecb_dec_six
758 movups 96(%rdi),%xmm8
759 movups (%rcx),%xmm0
760 xorps %xmm9,%xmm9
761 call _aesni_decrypt8
762 movups %xmm2,(%rsi)
763 pxor %xmm2,%xmm2
764 movups %xmm3,16(%rsi)
765 pxor %xmm3,%xmm3
766 movups %xmm4,32(%rsi)
767 pxor %xmm4,%xmm4
768 movups %xmm5,48(%rsi)
769 pxor %xmm5,%xmm5
770 movups %xmm6,64(%rsi)
771 pxor %xmm6,%xmm6
772 movups %xmm7,80(%rsi)
773 pxor %xmm7,%xmm7
774 movups %xmm8,96(%rsi)
775 pxor %xmm8,%xmm8
776 pxor %xmm9,%xmm9
777 jmp .Lecb_ret
778 .align 16
779 .Lecb_dec_one:
780 movups (%rcx),%xmm0
781 movups 16(%rcx),%xmm1
782 leaq 32(%rcx),%rcx
783 xorps %xmm0,%xmm2
784 .Loop_dec1_4:
785 .byte 102,15,56,222,209
786 decl %eax
787 movups (%rcx),%xmm1
788 leaq 16(%rcx),%rcx
789 jnz .Loop_dec1_4
790 .byte 102,15,56,223,209
791 movups %xmm2,(%rsi)
792 pxor %xmm2,%xmm2
793 jmp .Lecb_ret
794 .align 16
795 .Lecb_dec_two:
796 call _aesni_decrypt2
797 movups %xmm2,(%rsi)
798 pxor %xmm2,%xmm2
799 movups %xmm3,16(%rsi)
800 pxor %xmm3,%xmm3
801 jmp .Lecb_ret
802 .align 16
803 .Lecb_dec_three:
804 call _aesni_decrypt3
805 movups %xmm2,(%rsi)
806 pxor %xmm2,%xmm2
807 movups %xmm3,16(%rsi)
808 pxor %xmm3,%xmm3
809 movups %xmm4,32(%rsi)
810 pxor %xmm4,%xmm4
811 jmp .Lecb_ret
812 .align 16
813 .Lecb_dec_four:
814 call _aesni_decrypt4
815 movups %xmm2,(%rsi)
816 pxor %xmm2,%xmm2
817 movups %xmm3,16(%rsi)
818 pxor %xmm3,%xmm3
819 movups %xmm4,32(%rsi)
820 pxor %xmm4,%xmm4
821 movups %xmm5,48(%rsi)
822 pxor %xmm5,%xmm5
823 jmp .Lecb_ret
824 .align 16
825 .Lecb_dec_five:
826 xorps %xmm7,%xmm7
827 call _aesni_decrypt6
828 movups %xmm2,(%rsi)
829 pxor %xmm2,%xmm2
830 movups %xmm3,16(%rsi)
831 pxor %xmm3,%xmm3
832 movups %xmm4,32(%rsi)
833 pxor %xmm4,%xmm4
834 movups %xmm5,48(%rsi)
835 pxor %xmm5,%xmm5
836 movups %xmm6,64(%rsi)
837 pxor %xmm6,%xmm6
838 pxor %xmm7,%xmm7
839 jmp .Lecb_ret
840 .align 16
841 .Lecb_dec_six:
842 call _aesni_decrypt6
843 movups %xmm2,(%rsi)
844 pxor %xmm2,%xmm2
845 movups %xmm3,16(%rsi)
846 pxor %xmm3,%xmm3
847 movups %xmm4,32(%rsi)
848 pxor %xmm4,%xmm4
849 movups %xmm5,48(%rsi)
850 pxor %xmm5,%xmm5
851 movups %xmm6,64(%rsi)
852 pxor %xmm6,%xmm6
853 movups %xmm7,80(%rsi)
854 pxor %xmm7,%xmm7
855
856 .Lecb_ret:
857 xorps %xmm0,%xmm0
858 pxor %xmm1,%xmm1
859 .byte 0xf3,0xc3
860 .cfi_endproc
861 .size aesni_ecb_encrypt,.-aesni_ecb_encrypt
862 .globl aesni_ccm64_encrypt_blocks
863 .type aesni_ccm64_encrypt_blocks,@function
864 .align 16
865 aesni_ccm64_encrypt_blocks:
866 .cfi_startproc
867 movl 240(%rcx),%eax
868 movdqu (%r8),%xmm6
869 movdqa .Lincrement64(%rip),%xmm9
870 movdqa .Lbswap_mask(%rip),%xmm7
871
872 shll $4,%eax
873 movl $16,%r10d
874 leaq 0(%rcx),%r11
875 movdqu (%r9),%xmm3
876 movdqa %xmm6,%xmm2
877 leaq 32(%rcx,%rax,1),%rcx
878 .byte 102,15,56,0,247
879 subq %rax,%r10
880 jmp .Lccm64_enc_outer
881 .align 16
882 .Lccm64_enc_outer:
883 movups (%r11),%xmm0
884 movq %r10,%rax
885 movups (%rdi),%xmm8
886
887 xorps %xmm0,%xmm2
888 movups 16(%r11),%xmm1
889 xorps %xmm8,%xmm0
890 xorps %xmm0,%xmm3
891 movups 32(%r11),%xmm0
892
893 .Lccm64_enc2_loop:
894 .byte 102,15,56,220,209
895 .byte 102,15,56,220,217
896 movups (%rcx,%rax,1),%xmm1
897 addq $32,%rax
898 .byte 102,15,56,220,208
899 .byte 102,15,56,220,216
900 movups -16(%rcx,%rax,1),%xmm0
901 jnz .Lccm64_enc2_loop
902 .byte 102,15,56,220,209
903 .byte 102,15,56,220,217
904 paddq %xmm9,%xmm6
905 decq %rdx
906 .byte 102,15,56,221,208
907 .byte 102,15,56,221,216
908
909 leaq 16(%rdi),%rdi
910 xorps %xmm2,%xmm8
911 movdqa %xmm6,%xmm2
912 movups %xmm8,(%rsi)
913 .byte 102,15,56,0,215
914 leaq 16(%rsi),%rsi
915 jnz .Lccm64_enc_outer
916
917 pxor %xmm0,%xmm0
918 pxor %xmm1,%xmm1
919 pxor %xmm2,%xmm2
920 movups %xmm3,(%r9)
921 pxor %xmm3,%xmm3
922 pxor %xmm8,%xmm8
923 pxor %xmm6,%xmm6
924 .byte 0xf3,0xc3
925 .cfi_endproc
926 .size aesni_ccm64_encrypt_blocks,.-aesni_ccm64_encrypt_blocks
927 .globl aesni_ccm64_decrypt_blocks
928 .type aesni_ccm64_decrypt_blocks,@function
929 .align 16
930 aesni_ccm64_decrypt_blocks:
931 .cfi_startproc
932 movl 240(%rcx),%eax
933 movups (%r8),%xmm6
934 movdqu (%r9),%xmm3
935 movdqa .Lincrement64(%rip),%xmm9
936 movdqa .Lbswap_mask(%rip),%xmm7
937
938 movaps %xmm6,%xmm2
939 movl %eax,%r10d
940 movq %rcx,%r11
941 .byte 102,15,56,0,247
942 movups (%rcx),%xmm0
943 movups 16(%rcx),%xmm1
944 leaq 32(%rcx),%rcx
945 xorps %xmm0,%xmm2
946 .Loop_enc1_5:
947 .byte 102,15,56,220,209
948 decl %eax
949 movups (%rcx),%xmm1
950 leaq 16(%rcx),%rcx
951 jnz .Loop_enc1_5
952 .byte 102,15,56,221,209
953 shll $4,%r10d
954 movl $16,%eax
955 movups (%rdi),%xmm8
956 paddq %xmm9,%xmm6
957 leaq 16(%rdi),%rdi
958 subq %r10,%rax
959 leaq 32(%r11,%r10,1),%rcx
960 movq %rax,%r10
961 jmp .Lccm64_dec_outer
962 .align 16
963 .Lccm64_dec_outer:
964 xorps %xmm2,%xmm8
965 movdqa %xmm6,%xmm2
966 movups %xmm8,(%rsi)
967 leaq 16(%rsi),%rsi
968 .byte 102,15,56,0,215
969
970 subq $1,%rdx
971 jz .Lccm64_dec_break
972
973 movups (%r11),%xmm0
974 movq %r10,%rax
975 movups 16(%r11),%xmm1
976 xorps %xmm0,%xmm8
977 xorps %xmm0,%xmm2
978 xorps %xmm8,%xmm3
979 movups 32(%r11),%xmm0
980 jmp .Lccm64_dec2_loop
981 .align 16
982 .Lccm64_dec2_loop:
983 .byte 102,15,56,220,209
984 .byte 102,15,56,220,217
985 movups (%rcx,%rax,1),%xmm1
986 addq $32,%rax
987 .byte 102,15,56,220,208
988 .byte 102,15,56,220,216
989 movups -16(%rcx,%rax,1),%xmm0
990 jnz .Lccm64_dec2_loop
991 movups (%rdi),%xmm8
992 paddq %xmm9,%xmm6
993 .byte 102,15,56,220,209
994 .byte 102,15,56,220,217
995 .byte 102,15,56,221,208
996 .byte 102,15,56,221,216
997 leaq 16(%rdi),%rdi
998 jmp .Lccm64_dec_outer
999
1000 .align 16
1001 .Lccm64_dec_break:
1002
1003 movl 240(%r11),%eax
1004 movups (%r11),%xmm0
1005 movups 16(%r11),%xmm1
1006 xorps %xmm0,%xmm8
1007 leaq 32(%r11),%r11
1008 xorps %xmm8,%xmm3
1009 .Loop_enc1_6:
1010 .byte 102,15,56,220,217
1011 decl %eax
1012 movups (%r11),%xmm1
1013 leaq 16(%r11),%r11
1014 jnz .Loop_enc1_6
1015 .byte 102,15,56,221,217
1016 pxor %xmm0,%xmm0
1017 pxor %xmm1,%xmm1
1018 pxor %xmm2,%xmm2
1019 movups %xmm3,(%r9)
1020 pxor %xmm3,%xmm3
1021 pxor %xmm8,%xmm8
1022 pxor %xmm6,%xmm6
1023 .byte 0xf3,0xc3
1024 .cfi_endproc
1025 .size aesni_ccm64_decrypt_blocks,.-aesni_ccm64_decrypt_blocks
1026 .globl aesni_ctr32_encrypt_blocks
1027 .type aesni_ctr32_encrypt_blocks,@function
1028 .align 16
1029 aesni_ctr32_encrypt_blocks:
1030 .cfi_startproc
1031 cmpq $1,%rdx
1032 jne .Lctr32_bulk
1033
1034
1035
1036 movups (%r8),%xmm2
1037 movups (%rdi),%xmm3
1038 movl 240(%rcx),%edx
1039 movups (%rcx),%xmm0
1040 movups 16(%rcx),%xmm1
1041 leaq 32(%rcx),%rcx
1042 xorps %xmm0,%xmm2
1043 .Loop_enc1_7:
1044 .byte 102,15,56,220,209
1045 decl %edx
1046 movups (%rcx),%xmm1
1047 leaq 16(%rcx),%rcx
1048 jnz .Loop_enc1_7
1049 .byte 102,15,56,221,209
1050 pxor %xmm0,%xmm0
1051 pxor %xmm1,%xmm1
1052 xorps %xmm3,%xmm2
1053 pxor %xmm3,%xmm3
1054 movups %xmm2,(%rsi)
1055 xorps %xmm2,%xmm2
1056 jmp .Lctr32_epilogue
1057
1058 .align 16
1059 .Lctr32_bulk:
1060 leaq (%rsp),%r11
1061 .cfi_def_cfa_register %r11
1062 pushq %rbp
1063 .cfi_offset %rbp,-16
1064 subq $128,%rsp
1065 andq $-16,%rsp
1066
1067
1068
1069
1070 movdqu (%r8),%xmm2
1071 movdqu (%rcx),%xmm0
1072 movl 12(%r8),%r8d
1073 pxor %xmm0,%xmm2
1074 movl 12(%rcx),%ebp
1075 movdqa %xmm2,0(%rsp)
1076 bswapl %r8d
1077 movdqa %xmm2,%xmm3
1078 movdqa %xmm2,%xmm4
1079 movdqa %xmm2,%xmm5
1080 movdqa %xmm2,64(%rsp)
1081 movdqa %xmm2,80(%rsp)
1082 movdqa %xmm2,96(%rsp)
1083 movq %rdx,%r10
1084 movdqa %xmm2,112(%rsp)
1085
1086 leaq 1(%r8),%rax
1087 leaq 2(%r8),%rdx
1088 bswapl %eax
1089 bswapl %edx
1090 xorl %ebp,%eax
1091 xorl %ebp,%edx
1092 .byte 102,15,58,34,216,3
1093 leaq 3(%r8),%rax
1094 movdqa %xmm3,16(%rsp)
1095 .byte 102,15,58,34,226,3
1096 bswapl %eax
1097 movq %r10,%rdx
1098 leaq 4(%r8),%r10
1099 movdqa %xmm4,32(%rsp)
1100 xorl %ebp,%eax
1101 bswapl %r10d
1102 .byte 102,15,58,34,232,3
1103 xorl %ebp,%r10d
1104 movdqa %xmm5,48(%rsp)
1105 leaq 5(%r8),%r9
1106 movl %r10d,64+12(%rsp)
1107 bswapl %r9d
1108 leaq 6(%r8),%r10
1109 movl 240(%rcx),%eax
1110 xorl %ebp,%r9d
1111 bswapl %r10d
1112 movl %r9d,80+12(%rsp)
1113 xorl %ebp,%r10d
1114 leaq 7(%r8),%r9
1115 movl %r10d,96+12(%rsp)
1116 bswapl %r9d
1117 movl OPENSSL_ia32cap_P+4(%rip),%r10d
1118 xorl %ebp,%r9d
1119 andl $71303168,%r10d
1120 movl %r9d,112+12(%rsp)
1121
1122 movups 16(%rcx),%xmm1
1123
1124 movdqa 64(%rsp),%xmm6
1125 movdqa 80(%rsp),%xmm7
1126
1127 cmpq $8,%rdx
1128 jb .Lctr32_tail
1129
1130 subq $6,%rdx
1131 cmpl $4194304,%r10d
1132 je .Lctr32_6x
1133
1134 leaq 128(%rcx),%rcx
1135 subq $2,%rdx
1136 jmp .Lctr32_loop8
1137
1138 .align 16
1139 .Lctr32_6x:
1140 shll $4,%eax
1141 movl $48,%r10d
1142 bswapl %ebp
1143 leaq 32(%rcx,%rax,1),%rcx
1144 subq %rax,%r10
1145 jmp .Lctr32_loop6
1146
1147 .align 16
1148 .Lctr32_loop6:
1149 addl $6,%r8d
1150 movups -48(%rcx,%r10,1),%xmm0
1151 .byte 102,15,56,220,209
1152 movl %r8d,%eax
1153 xorl %ebp,%eax
1154 .byte 102,15,56,220,217
1155 .byte 0x0f,0x38,0xf1,0x44,0x24,12
1156 leal 1(%r8),%eax
1157 .byte 102,15,56,220,225
1158 xorl %ebp,%eax
1159 .byte 0x0f,0x38,0xf1,0x44,0x24,28
1160 .byte 102,15,56,220,233
1161 leal 2(%r8),%eax
1162 xorl %ebp,%eax
1163 .byte 102,15,56,220,241
1164 .byte 0x0f,0x38,0xf1,0x44,0x24,44
1165 leal 3(%r8),%eax
1166 .byte 102,15,56,220,249
1167 movups -32(%rcx,%r10,1),%xmm1
1168 xorl %ebp,%eax
1169
1170 .byte 102,15,56,220,208
1171 .byte 0x0f,0x38,0xf1,0x44,0x24,60
1172 leal 4(%r8),%eax
1173 .byte 102,15,56,220,216
1174 xorl %ebp,%eax
1175 .byte 0x0f,0x38,0xf1,0x44,0x24,76
1176 .byte 102,15,56,220,224
1177 leal 5(%r8),%eax
1178 xorl %ebp,%eax
1179 .byte 102,15,56,220,232
1180 .byte 0x0f,0x38,0xf1,0x44,0x24,92
1181 movq %r10,%rax
1182 .byte 102,15,56,220,240
1183 .byte 102,15,56,220,248
1184 movups -16(%rcx,%r10,1),%xmm0
1185
1186 call .Lenc_loop6
1187
1188 movdqu (%rdi),%xmm8
1189 movdqu 16(%rdi),%xmm9
1190 movdqu 32(%rdi),%xmm10
1191 movdqu 48(%rdi),%xmm11
1192 movdqu 64(%rdi),%xmm12
1193 movdqu 80(%rdi),%xmm13
1194 leaq 96(%rdi),%rdi
1195 movups -64(%rcx,%r10,1),%xmm1
1196 pxor %xmm2,%xmm8
1197 movaps 0(%rsp),%xmm2
1198 pxor %xmm3,%xmm9
1199 movaps 16(%rsp),%xmm3
1200 pxor %xmm4,%xmm10
1201 movaps 32(%rsp),%xmm4
1202 pxor %xmm5,%xmm11
1203 movaps 48(%rsp),%xmm5
1204 pxor %xmm6,%xmm12
1205 movaps 64(%rsp),%xmm6
1206 pxor %xmm7,%xmm13
1207 movaps 80(%rsp),%xmm7
1208 movdqu %xmm8,(%rsi)
1209 movdqu %xmm9,16(%rsi)
1210 movdqu %xmm10,32(%rsi)
1211 movdqu %xmm11,48(%rsi)
1212 movdqu %xmm12,64(%rsi)
1213 movdqu %xmm13,80(%rsi)
1214 leaq 96(%rsi),%rsi
1215
1216 subq $6,%rdx
1217 jnc .Lctr32_loop6
1218
1219 addq $6,%rdx
1220 jz .Lctr32_done
1221
1222 leal -48(%r10),%eax
1223 leaq -80(%rcx,%r10,1),%rcx
1224 negl %eax
1225 shrl $4,%eax
1226 jmp .Lctr32_tail
1227
1228 .align 32
1229 .Lctr32_loop8:
1230 addl $8,%r8d
1231 movdqa 96(%rsp),%xmm8
1232 .byte 102,15,56,220,209
1233 movl %r8d,%r9d
1234 movdqa 112(%rsp),%xmm9
1235 .byte 102,15,56,220,217
1236 bswapl %r9d
1237 movups 32-128(%rcx),%xmm0
1238 .byte 102,15,56,220,225
1239 xorl %ebp,%r9d
1240 nop
1241 .byte 102,15,56,220,233
1242 movl %r9d,0+12(%rsp)
1243 leaq 1(%r8),%r9
1244 .byte 102,15,56,220,241
1245 .byte 102,15,56,220,249
1246 .byte 102,68,15,56,220,193
1247 .byte 102,68,15,56,220,201
1248 movups 48-128(%rcx),%xmm1
1249 bswapl %r9d
1250 .byte 102,15,56,220,208
1251 .byte 102,15,56,220,216
1252 xorl %ebp,%r9d
1253 .byte 0x66,0x90
1254 .byte 102,15,56,220,224
1255 .byte 102,15,56,220,232
1256 movl %r9d,16+12(%rsp)
1257 leaq 2(%r8),%r9
1258 .byte 102,15,56,220,240
1259 .byte 102,15,56,220,248
1260 .byte 102,68,15,56,220,192
1261 .byte 102,68,15,56,220,200
1262 movups 64-128(%rcx),%xmm0
1263 bswapl %r9d
1264 .byte 102,15,56,220,209
1265 .byte 102,15,56,220,217
1266 xorl %ebp,%r9d
1267 .byte 0x66,0x90
1268 .byte 102,15,56,220,225
1269 .byte 102,15,56,220,233
1270 movl %r9d,32+12(%rsp)
1271 leaq 3(%r8),%r9
1272 .byte 102,15,56,220,241
1273 .byte 102,15,56,220,249
1274 .byte 102,68,15,56,220,193
1275 .byte 102,68,15,56,220,201
1276 movups 80-128(%rcx),%xmm1
1277 bswapl %r9d
1278 .byte 102,15,56,220,208
1279 .byte 102,15,56,220,216
1280 xorl %ebp,%r9d
1281 .byte 0x66,0x90
1282 .byte 102,15,56,220,224
1283 .byte 102,15,56,220,232
1284 movl %r9d,48+12(%rsp)
1285 leaq 4(%r8),%r9
1286 .byte 102,15,56,220,240
1287 .byte 102,15,56,220,248
1288 .byte 102,68,15,56,220,192
1289 .byte 102,68,15,56,220,200
1290 movups 96-128(%rcx),%xmm0
1291 bswapl %r9d
1292 .byte 102,15,56,220,209
1293 .byte 102,15,56,220,217
1294 xorl %ebp,%r9d
1295 .byte 0x66,0x90
1296 .byte 102,15,56,220,225
1297 .byte 102,15,56,220,233
1298 movl %r9d,64+12(%rsp)
1299 leaq 5(%r8),%r9
1300 .byte 102,15,56,220,241
1301 .byte 102,15,56,220,249
1302 .byte 102,68,15,56,220,193
1303 .byte 102,68,15,56,220,201
1304 movups 112-128(%rcx),%xmm1
1305 bswapl %r9d
1306 .byte 102,15,56,220,208
1307 .byte 102,15,56,220,216
1308 xorl %ebp,%r9d
1309 .byte 0x66,0x90
1310 .byte 102,15,56,220,224
1311 .byte 102,15,56,220,232
1312 movl %r9d,80+12(%rsp)
1313 leaq 6(%r8),%r9
1314 .byte 102,15,56,220,240
1315 .byte 102,15,56,220,248
1316 .byte 102,68,15,56,220,192
1317 .byte 102,68,15,56,220,200
1318 movups 128-128(%rcx),%xmm0
1319 bswapl %r9d
1320 .byte 102,15,56,220,209
1321 .byte 102,15,56,220,217
1322 xorl %ebp,%r9d
1323 .byte 0x66,0x90
1324 .byte 102,15,56,220,225
1325 .byte 102,15,56,220,233
1326 movl %r9d,96+12(%rsp)
1327 leaq 7(%r8),%r9
1328 .byte 102,15,56,220,241
1329 .byte 102,15,56,220,249
1330 .byte 102,68,15,56,220,193
1331 .byte 102,68,15,56,220,201
1332 movups 144-128(%rcx),%xmm1
1333 bswapl %r9d
1334 .byte 102,15,56,220,208
1335 .byte 102,15,56,220,216
1336 .byte 102,15,56,220,224
1337 xorl %ebp,%r9d
1338 movdqu 0(%rdi),%xmm10
1339 .byte 102,15,56,220,232
1340 movl %r9d,112+12(%rsp)
1341 cmpl $11,%eax
1342 .byte 102,15,56,220,240
1343 .byte 102,15,56,220,248
1344 .byte 102,68,15,56,220,192
1345 .byte 102,68,15,56,220,200
1346 movups 160-128(%rcx),%xmm0
1347
1348 jb .Lctr32_enc_done
1349
1350 .byte 102,15,56,220,209
1351 .byte 102,15,56,220,217
1352 .byte 102,15,56,220,225
1353 .byte 102,15,56,220,233
1354 .byte 102,15,56,220,241
1355 .byte 102,15,56,220,249
1356 .byte 102,68,15,56,220,193
1357 .byte 102,68,15,56,220,201
1358 movups 176-128(%rcx),%xmm1
1359
1360 .byte 102,15,56,220,208
1361 .byte 102,15,56,220,216
1362 .byte 102,15,56,220,224
1363 .byte 102,15,56,220,232
1364 .byte 102,15,56,220,240
1365 .byte 102,15,56,220,248
1366 .byte 102,68,15,56,220,192
1367 .byte 102,68,15,56,220,200
1368 movups 192-128(%rcx),%xmm0
1369 je .Lctr32_enc_done
1370
1371 .byte 102,15,56,220,209
1372 .byte 102,15,56,220,217
1373 .byte 102,15,56,220,225
1374 .byte 102,15,56,220,233
1375 .byte 102,15,56,220,241
1376 .byte 102,15,56,220,249
1377 .byte 102,68,15,56,220,193
1378 .byte 102,68,15,56,220,201
1379 movups 208-128(%rcx),%xmm1
1380
1381 .byte 102,15,56,220,208
1382 .byte 102,15,56,220,216
1383 .byte 102,15,56,220,224
1384 .byte 102,15,56,220,232
1385 .byte 102,15,56,220,240
1386 .byte 102,15,56,220,248
1387 .byte 102,68,15,56,220,192
1388 .byte 102,68,15,56,220,200
1389 movups 224-128(%rcx),%xmm0
1390 jmp .Lctr32_enc_done
1391
1392 .align 16
1393 .Lctr32_enc_done:
1394 movdqu 16(%rdi),%xmm11
1395 pxor %xmm0,%xmm10
1396 movdqu 32(%rdi),%xmm12
1397 pxor %xmm0,%xmm11
1398 movdqu 48(%rdi),%xmm13
1399 pxor %xmm0,%xmm12
1400 movdqu 64(%rdi),%xmm14
1401 pxor %xmm0,%xmm13
1402 movdqu 80(%rdi),%xmm15
1403 pxor %xmm0,%xmm14
1404 pxor %xmm0,%xmm15
1405 .byte 102,15,56,220,209
1406 .byte 102,15,56,220,217
1407 .byte 102,15,56,220,225
1408 .byte 102,15,56,220,233
1409 .byte 102,15,56,220,241
1410 .byte 102,15,56,220,249
1411 .byte 102,68,15,56,220,193
1412 .byte 102,68,15,56,220,201
1413 movdqu 96(%rdi),%xmm1
1414 leaq 128(%rdi),%rdi
1415
1416 .byte 102,65,15,56,221,210
1417 pxor %xmm0,%xmm1
1418 movdqu 112-128(%rdi),%xmm10
1419 .byte 102,65,15,56,221,219
1420 pxor %xmm0,%xmm10
1421 movdqa 0(%rsp),%xmm11
1422 .byte 102,65,15,56,221,228
1423 .byte 102,65,15,56,221,237
1424 movdqa 16(%rsp),%xmm12
1425 movdqa 32(%rsp),%xmm13
1426 .byte 102,65,15,56,221,246
1427 .byte 102,65,15,56,221,255
1428 movdqa 48(%rsp),%xmm14
1429 movdqa 64(%rsp),%xmm15
1430 .byte 102,68,15,56,221,193
1431 movdqa 80(%rsp),%xmm0
1432 movups 16-128(%rcx),%xmm1
1433 .byte 102,69,15,56,221,202
1434
1435 movups %xmm2,(%rsi)
1436 movdqa %xmm11,%xmm2
1437 movups %xmm3,16(%rsi)
1438 movdqa %xmm12,%xmm3
1439 movups %xmm4,32(%rsi)
1440 movdqa %xmm13,%xmm4
1441 movups %xmm5,48(%rsi)
1442 movdqa %xmm14,%xmm5
1443 movups %xmm6,64(%rsi)
1444 movdqa %xmm15,%xmm6
1445 movups %xmm7,80(%rsi)
1446 movdqa %xmm0,%xmm7
1447 movups %xmm8,96(%rsi)
1448 movups %xmm9,112(%rsi)
1449 leaq 128(%rsi),%rsi
1450
1451 subq $8,%rdx
1452 jnc .Lctr32_loop8
1453
1454 addq $8,%rdx
1455 jz .Lctr32_done
1456 leaq -128(%rcx),%rcx
1457
1458 .Lctr32_tail:
1459
1460
1461 leaq 16(%rcx),%rcx
1462 cmpq $4,%rdx
1463 jb .Lctr32_loop3
1464 je .Lctr32_loop4
1465
1466
1467 shll $4,%eax
1468 movdqa 96(%rsp),%xmm8
1469 pxor %xmm9,%xmm9
1470
1471 movups 16(%rcx),%xmm0
1472 .byte 102,15,56,220,209
1473 .byte 102,15,56,220,217
1474 leaq 32-16(%rcx,%rax,1),%rcx
1475 negq %rax
1476 .byte 102,15,56,220,225
1477 addq $16,%rax
1478 movups (%rdi),%xmm10
1479 .byte 102,15,56,220,233
1480 .byte 102,15,56,220,241
1481 movups 16(%rdi),%xmm11
1482 movups 32(%rdi),%xmm12
1483 .byte 102,15,56,220,249
1484 .byte 102,68,15,56,220,193
1485
1486 call .Lenc_loop8_enter
1487
1488 movdqu 48(%rdi),%xmm13
1489 pxor %xmm10,%xmm2
1490 movdqu 64(%rdi),%xmm10
1491 pxor %xmm11,%xmm3
1492 movdqu %xmm2,(%rsi)
1493 pxor %xmm12,%xmm4
1494 movdqu %xmm3,16(%rsi)
1495 pxor %xmm13,%xmm5
1496 movdqu %xmm4,32(%rsi)
1497 pxor %xmm10,%xmm6
1498 movdqu %xmm5,48(%rsi)
1499 movdqu %xmm6,64(%rsi)
1500 cmpq $6,%rdx
1501 jb .Lctr32_done
1502
1503 movups 80(%rdi),%xmm11
1504 xorps %xmm11,%xmm7
1505 movups %xmm7,80(%rsi)
1506 je .Lctr32_done
1507
1508 movups 96(%rdi),%xmm12
1509 xorps %xmm12,%xmm8
1510 movups %xmm8,96(%rsi)
1511 jmp .Lctr32_done
1512
1513 .align 32
1514 .Lctr32_loop4:
1515 .byte 102,15,56,220,209
1516 leaq 16(%rcx),%rcx
1517 decl %eax
1518 .byte 102,15,56,220,217
1519 .byte 102,15,56,220,225
1520 .byte 102,15,56,220,233
1521 movups (%rcx),%xmm1
1522 jnz .Lctr32_loop4
1523 .byte 102,15,56,221,209
1524 .byte 102,15,56,221,217
1525 movups (%rdi),%xmm10
1526 movups 16(%rdi),%xmm11
1527 .byte 102,15,56,221,225
1528 .byte 102,15,56,221,233
1529 movups 32(%rdi),%xmm12
1530 movups 48(%rdi),%xmm13
1531
1532 xorps %xmm10,%xmm2
1533 movups %xmm2,(%rsi)
1534 xorps %xmm11,%xmm3
1535 movups %xmm3,16(%rsi)
1536 pxor %xmm12,%xmm4
1537 movdqu %xmm4,32(%rsi)
1538 pxor %xmm13,%xmm5
1539 movdqu %xmm5,48(%rsi)
1540 jmp .Lctr32_done
1541
1542 .align 32
1543 .Lctr32_loop3:
1544 .byte 102,15,56,220,209
1545 leaq 16(%rcx),%rcx
1546 decl %eax
1547 .byte 102,15,56,220,217
1548 .byte 102,15,56,220,225
1549 movups (%rcx),%xmm1
1550 jnz .Lctr32_loop3
1551 .byte 102,15,56,221,209
1552 .byte 102,15,56,221,217
1553 .byte 102,15,56,221,225
1554
1555 movups (%rdi),%xmm10
1556 xorps %xmm10,%xmm2
1557 movups %xmm2,(%rsi)
1558 cmpq $2,%rdx
1559 jb .Lctr32_done
1560
1561 movups 16(%rdi),%xmm11
1562 xorps %xmm11,%xmm3
1563 movups %xmm3,16(%rsi)
1564 je .Lctr32_done
1565
1566 movups 32(%rdi),%xmm12
1567 xorps %xmm12,%xmm4
1568 movups %xmm4,32(%rsi)
1569
1570 .Lctr32_done:
1571 xorps %xmm0,%xmm0
1572 xorl %ebp,%ebp
1573 pxor %xmm1,%xmm1
1574 pxor %xmm2,%xmm2
1575 pxor %xmm3,%xmm3
1576 pxor %xmm4,%xmm4
1577 pxor %xmm5,%xmm5
1578 pxor %xmm6,%xmm6
1579 pxor %xmm7,%xmm7
1580 movaps %xmm0,0(%rsp)
1581 pxor %xmm8,%xmm8
1582 movaps %xmm0,16(%rsp)
1583 pxor %xmm9,%xmm9
1584 movaps %xmm0,32(%rsp)
1585 pxor %xmm10,%xmm10
1586 movaps %xmm0,48(%rsp)
1587 pxor %xmm11,%xmm11
1588 movaps %xmm0,64(%rsp)
1589 pxor %xmm12,%xmm12
1590 movaps %xmm0,80(%rsp)
1591 pxor %xmm13,%xmm13
1592 movaps %xmm0,96(%rsp)
1593 pxor %xmm14,%xmm14
1594 movaps %xmm0,112(%rsp)
1595 pxor %xmm15,%xmm15
1596 movq -8(%r11),%rbp
1597 .cfi_restore %rbp
1598 leaq (%r11),%rsp
1599 .cfi_def_cfa_register %rsp
1600 .Lctr32_epilogue:
1601 .byte 0xf3,0xc3
1602 .cfi_endproc
1603 .size aesni_ctr32_encrypt_blocks,.-aesni_ctr32_encrypt_blocks
1604 .globl aesni_xts_encrypt
1605 .type aesni_xts_encrypt,@function
1606 .align 16
1607 aesni_xts_encrypt:
1608 .cfi_startproc
1609 leaq (%rsp),%r11
1610 .cfi_def_cfa_register %r11
1611 pushq %rbp
1612 .cfi_offset %rbp,-16
1613 subq $112,%rsp
1614 andq $-16,%rsp
1615 movups (%r9),%xmm2
1616 movl 240(%r8),%eax
1617 movl 240(%rcx),%r10d
1618 movups (%r8),%xmm0
1619 movups 16(%r8),%xmm1
1620 leaq 32(%r8),%r8
1621 xorps %xmm0,%xmm2
1622 .Loop_enc1_8:
1623 .byte 102,15,56,220,209
1624 decl %eax
1625 movups (%r8),%xmm1
1626 leaq 16(%r8),%r8
1627 jnz .Loop_enc1_8
1628 .byte 102,15,56,221,209
1629 movups (%rcx),%xmm0
1630 movq %rcx,%rbp
1631 movl %r10d,%eax
1632 shll $4,%r10d
1633 movq %rdx,%r9
1634 andq $-16,%rdx
1635
1636 movups 16(%rcx,%r10,1),%xmm1
1637
1638 movdqa .Lxts_magic(%rip),%xmm8
1639 movdqa %xmm2,%xmm15
1640 pshufd $0x5f,%xmm2,%xmm9
1641 pxor %xmm0,%xmm1
1642 movdqa %xmm9,%xmm14
1643 paddd %xmm9,%xmm9
1644 movdqa %xmm15,%xmm10
1645 psrad $31,%xmm14
1646 paddq %xmm15,%xmm15
1647 pand %xmm8,%xmm14
1648 pxor %xmm0,%xmm10
1649 pxor %xmm14,%xmm15
1650 movdqa %xmm9,%xmm14
1651 paddd %xmm9,%xmm9
1652 movdqa %xmm15,%xmm11
1653 psrad $31,%xmm14
1654 paddq %xmm15,%xmm15
1655 pand %xmm8,%xmm14
1656 pxor %xmm0,%xmm11
1657 pxor %xmm14,%xmm15
1658 movdqa %xmm9,%xmm14
1659 paddd %xmm9,%xmm9
1660 movdqa %xmm15,%xmm12
1661 psrad $31,%xmm14
1662 paddq %xmm15,%xmm15
1663 pand %xmm8,%xmm14
1664 pxor %xmm0,%xmm12
1665 pxor %xmm14,%xmm15
1666 movdqa %xmm9,%xmm14
1667 paddd %xmm9,%xmm9
1668 movdqa %xmm15,%xmm13
1669 psrad $31,%xmm14
1670 paddq %xmm15,%xmm15
1671 pand %xmm8,%xmm14
1672 pxor %xmm0,%xmm13
1673 pxor %xmm14,%xmm15
1674 movdqa %xmm15,%xmm14
1675 psrad $31,%xmm9
1676 paddq %xmm15,%xmm15
1677 pand %xmm8,%xmm9
1678 pxor %xmm0,%xmm14
1679 pxor %xmm9,%xmm15
1680 movaps %xmm1,96(%rsp)
1681
1682 subq $96,%rdx
1683 jc .Lxts_enc_short
1684
1685 movl $16+96,%eax
1686 leaq 32(%rbp,%r10,1),%rcx
1687 subq %r10,%rax
1688 movups 16(%rbp),%xmm1
1689 movq %rax,%r10
1690 leaq .Lxts_magic(%rip),%r8
1691 jmp .Lxts_enc_grandloop
1692
1693 .align 32
1694 .Lxts_enc_grandloop:
1695 movdqu 0(%rdi),%xmm2
1696 movdqa %xmm0,%xmm8
1697 movdqu 16(%rdi),%xmm3
1698 pxor %xmm10,%xmm2
1699 movdqu 32(%rdi),%xmm4
1700 pxor %xmm11,%xmm3
1701 .byte 102,15,56,220,209
1702 movdqu 48(%rdi),%xmm5
1703 pxor %xmm12,%xmm4
1704 .byte 102,15,56,220,217
1705 movdqu 64(%rdi),%xmm6
1706 pxor %xmm13,%xmm5
1707 .byte 102,15,56,220,225
1708 movdqu 80(%rdi),%xmm7
1709 pxor %xmm15,%xmm8
1710 movdqa 96(%rsp),%xmm9
1711 pxor %xmm14,%xmm6
1712 .byte 102,15,56,220,233
1713 movups 32(%rbp),%xmm0
1714 leaq 96(%rdi),%rdi
1715 pxor %xmm8,%xmm7
1716
1717 pxor %xmm9,%xmm10
1718 .byte 102,15,56,220,241
1719 pxor %xmm9,%xmm11
1720 movdqa %xmm10,0(%rsp)
1721 .byte 102,15,56,220,249
1722 movups 48(%rbp),%xmm1
1723 pxor %xmm9,%xmm12
1724
1725 .byte 102,15,56,220,208
1726 pxor %xmm9,%xmm13
1727 movdqa %xmm11,16(%rsp)
1728 .byte 102,15,56,220,216
1729 pxor %xmm9,%xmm14
1730 movdqa %xmm12,32(%rsp)
1731 .byte 102,15,56,220,224
1732 .byte 102,15,56,220,232
1733 pxor %xmm9,%xmm8
1734 movdqa %xmm14,64(%rsp)
1735 .byte 102,15,56,220,240
1736 .byte 102,15,56,220,248
1737 movups 64(%rbp),%xmm0
1738 movdqa %xmm8,80(%rsp)
1739 pshufd $0x5f,%xmm15,%xmm9
1740 jmp .Lxts_enc_loop6
1741 .align 32
1742 .Lxts_enc_loop6:
1743 .byte 102,15,56,220,209
1744 .byte 102,15,56,220,217
1745 .byte 102,15,56,220,225
1746 .byte 102,15,56,220,233
1747 .byte 102,15,56,220,241
1748 .byte 102,15,56,220,249
1749 movups -64(%rcx,%rax,1),%xmm1
1750 addq $32,%rax
1751
1752 .byte 102,15,56,220,208
1753 .byte 102,15,56,220,216
1754 .byte 102,15,56,220,224
1755 .byte 102,15,56,220,232
1756 .byte 102,15,56,220,240
1757 .byte 102,15,56,220,248
1758 movups -80(%rcx,%rax,1),%xmm0
1759 jnz .Lxts_enc_loop6
1760
1761 movdqa (%r8),%xmm8
1762 movdqa %xmm9,%xmm14
1763 paddd %xmm9,%xmm9
1764 .byte 102,15,56,220,209
1765 paddq %xmm15,%xmm15
1766 psrad $31,%xmm14
1767 .byte 102,15,56,220,217
1768 pand %xmm8,%xmm14
1769 movups (%rbp),%xmm10
1770 .byte 102,15,56,220,225
1771 .byte 102,15,56,220,233
1772 .byte 102,15,56,220,241
1773 pxor %xmm14,%xmm15
1774 movaps %xmm10,%xmm11
1775 .byte 102,15,56,220,249
1776 movups -64(%rcx),%xmm1
1777
1778 movdqa %xmm9,%xmm14
1779 .byte 102,15,56,220,208
1780 paddd %xmm9,%xmm9
1781 pxor %xmm15,%xmm10
1782 .byte 102,15,56,220,216
1783 psrad $31,%xmm14
1784 paddq %xmm15,%xmm15
1785 .byte 102,15,56,220,224
1786 .byte 102,15,56,220,232
1787 pand %xmm8,%xmm14
1788 movaps %xmm11,%xmm12
1789 .byte 102,15,56,220,240
1790 pxor %xmm14,%xmm15
1791 movdqa %xmm9,%xmm14
1792 .byte 102,15,56,220,248
1793 movups -48(%rcx),%xmm0
1794
1795 paddd %xmm9,%xmm9
1796 .byte 102,15,56,220,209
1797 pxor %xmm15,%xmm11
1798 psrad $31,%xmm14
1799 .byte 102,15,56,220,217
1800 paddq %xmm15,%xmm15
1801 pand %xmm8,%xmm14
1802 .byte 102,15,56,220,225
1803 .byte 102,15,56,220,233
1804 movdqa %xmm13,48(%rsp)
1805 pxor %xmm14,%xmm15
1806 .byte 102,15,56,220,241
1807 movaps %xmm12,%xmm13
1808 movdqa %xmm9,%xmm14
1809 .byte 102,15,56,220,249
1810 movups -32(%rcx),%xmm1
1811
1812 paddd %xmm9,%xmm9
1813 .byte 102,15,56,220,208
1814 pxor %xmm15,%xmm12
1815 psrad $31,%xmm14
1816 .byte 102,15,56,220,216
1817 paddq %xmm15,%xmm15
1818 pand %xmm8,%xmm14
1819 .byte 102,15,56,220,224
1820 .byte 102,15,56,220,232
1821 .byte 102,15,56,220,240
1822 pxor %xmm14,%xmm15
1823 movaps %xmm13,%xmm14
1824 .byte 102,15,56,220,248
1825
1826 movdqa %xmm9,%xmm0
1827 paddd %xmm9,%xmm9
1828 .byte 102,15,56,220,209
1829 pxor %xmm15,%xmm13
1830 psrad $31,%xmm0
1831 .byte 102,15,56,220,217
1832 paddq %xmm15,%xmm15
1833 pand %xmm8,%xmm0
1834 .byte 102,15,56,220,225
1835 .byte 102,15,56,220,233
1836 pxor %xmm0,%xmm15
1837 movups (%rbp),%xmm0
1838 .byte 102,15,56,220,241
1839 .byte 102,15,56,220,249
1840 movups 16(%rbp),%xmm1
1841
1842 pxor %xmm15,%xmm14
1843 .byte 102,15,56,221,84,36,0
1844 psrad $31,%xmm9
1845 paddq %xmm15,%xmm15
1846 .byte 102,15,56,221,92,36,16
1847 .byte 102,15,56,221,100,36,32
1848 pand %xmm8,%xmm9
1849 movq %r10,%rax
1850 .byte 102,15,56,221,108,36,48
1851 .byte 102,15,56,221,116,36,64
1852 .byte 102,15,56,221,124,36,80
1853 pxor %xmm9,%xmm15
1854
1855 leaq 96(%rsi),%rsi
1856 movups %xmm2,-96(%rsi)
1857 movups %xmm3,-80(%rsi)
1858 movups %xmm4,-64(%rsi)
1859 movups %xmm5,-48(%rsi)
1860 movups %xmm6,-32(%rsi)
1861 movups %xmm7,-16(%rsi)
1862 subq $96,%rdx
1863 jnc .Lxts_enc_grandloop
1864
1865 movl $16+96,%eax
1866 subl %r10d,%eax
1867 movq %rbp,%rcx
1868 shrl $4,%eax
1869
1870 .Lxts_enc_short:
1871
1872 movl %eax,%r10d
1873 pxor %xmm0,%xmm10
1874 addq $96,%rdx
1875 jz .Lxts_enc_done
1876
1877 pxor %xmm0,%xmm11
1878 cmpq $0x20,%rdx
1879 jb .Lxts_enc_one
1880 pxor %xmm0,%xmm12
1881 je .Lxts_enc_two
1882
1883 pxor %xmm0,%xmm13
1884 cmpq $0x40,%rdx
1885 jb .Lxts_enc_three
1886 pxor %xmm0,%xmm14
1887 je .Lxts_enc_four
1888
1889 movdqu (%rdi),%xmm2
1890 movdqu 16(%rdi),%xmm3
1891 movdqu 32(%rdi),%xmm4
1892 pxor %xmm10,%xmm2
1893 movdqu 48(%rdi),%xmm5
1894 pxor %xmm11,%xmm3
1895 movdqu 64(%rdi),%xmm6
1896 leaq 80(%rdi),%rdi
1897 pxor %xmm12,%xmm4
1898 pxor %xmm13,%xmm5
1899 pxor %xmm14,%xmm6
1900 pxor %xmm7,%xmm7
1901
1902 call _aesni_encrypt6
1903
1904 xorps %xmm10,%xmm2
1905 movdqa %xmm15,%xmm10
1906 xorps %xmm11,%xmm3
1907 xorps %xmm12,%xmm4
1908 movdqu %xmm2,(%rsi)
1909 xorps %xmm13,%xmm5
1910 movdqu %xmm3,16(%rsi)
1911 xorps %xmm14,%xmm6
1912 movdqu %xmm4,32(%rsi)
1913 movdqu %xmm5,48(%rsi)
1914 movdqu %xmm6,64(%rsi)
1915 leaq 80(%rsi),%rsi
1916 jmp .Lxts_enc_done
1917
1918 .align 16
1919 .Lxts_enc_one:
1920 movups (%rdi),%xmm2
1921 leaq 16(%rdi),%rdi
1922 xorps %xmm10,%xmm2
1923 movups (%rcx),%xmm0
1924 movups 16(%rcx),%xmm1
1925 leaq 32(%rcx),%rcx
1926 xorps %xmm0,%xmm2
1927 .Loop_enc1_9:
1928 .byte 102,15,56,220,209
1929 decl %eax
1930 movups (%rcx),%xmm1
1931 leaq 16(%rcx),%rcx
1932 jnz .Loop_enc1_9
1933 .byte 102,15,56,221,209
1934 xorps %xmm10,%xmm2
1935 movdqa %xmm11,%xmm10
1936 movups %xmm2,(%rsi)
1937 leaq 16(%rsi),%rsi
1938 jmp .Lxts_enc_done
1939
1940 .align 16
1941 .Lxts_enc_two:
1942 movups (%rdi),%xmm2
1943 movups 16(%rdi),%xmm3
1944 leaq 32(%rdi),%rdi
1945 xorps %xmm10,%xmm2
1946 xorps %xmm11,%xmm3
1947
1948 call _aesni_encrypt2
1949
1950 xorps %xmm10,%xmm2
1951 movdqa %xmm12,%xmm10
1952 xorps %xmm11,%xmm3
1953 movups %xmm2,(%rsi)
1954 movups %xmm3,16(%rsi)
1955 leaq 32(%rsi),%rsi
1956 jmp .Lxts_enc_done
1957
1958 .align 16
1959 .Lxts_enc_three:
1960 movups (%rdi),%xmm2
1961 movups 16(%rdi),%xmm3
1962 movups 32(%rdi),%xmm4
1963 leaq 48(%rdi),%rdi
1964 xorps %xmm10,%xmm2
1965 xorps %xmm11,%xmm3
1966 xorps %xmm12,%xmm4
1967
1968 call _aesni_encrypt3
1969
1970 xorps %xmm10,%xmm2
1971 movdqa %xmm13,%xmm10
1972 xorps %xmm11,%xmm3
1973 xorps %xmm12,%xmm4
1974 movups %xmm2,(%rsi)
1975 movups %xmm3,16(%rsi)
1976 movups %xmm4,32(%rsi)
1977 leaq 48(%rsi),%rsi
1978 jmp .Lxts_enc_done
1979
1980 .align 16
1981 .Lxts_enc_four:
1982 movups (%rdi),%xmm2
1983 movups 16(%rdi),%xmm3
1984 movups 32(%rdi),%xmm4
1985 xorps %xmm10,%xmm2
1986 movups 48(%rdi),%xmm5
1987 leaq 64(%rdi),%rdi
1988 xorps %xmm11,%xmm3
1989 xorps %xmm12,%xmm4
1990 xorps %xmm13,%xmm5
1991
1992 call _aesni_encrypt4
1993
1994 pxor %xmm10,%xmm2
1995 movdqa %xmm14,%xmm10
1996 pxor %xmm11,%xmm3
1997 pxor %xmm12,%xmm4
1998 movdqu %xmm2,(%rsi)
1999 pxor %xmm13,%xmm5
2000 movdqu %xmm3,16(%rsi)
2001 movdqu %xmm4,32(%rsi)
2002 movdqu %xmm5,48(%rsi)
2003 leaq 64(%rsi),%rsi
2004 jmp .Lxts_enc_done
2005
2006 .align 16
2007 .Lxts_enc_done:
2008 andq $15,%r9
2009 jz .Lxts_enc_ret
2010 movq %r9,%rdx
2011
2012 .Lxts_enc_steal:
2013 movzbl (%rdi),%eax
2014 movzbl -16(%rsi),%ecx
2015 leaq 1(%rdi),%rdi
2016 movb %al,-16(%rsi)
2017 movb %cl,0(%rsi)
2018 leaq 1(%rsi),%rsi
2019 subq $1,%rdx
2020 jnz .Lxts_enc_steal
2021
2022 subq %r9,%rsi
2023 movq %rbp,%rcx
2024 movl %r10d,%eax
2025
2026 movups -16(%rsi),%xmm2
2027 xorps %xmm10,%xmm2
2028 movups (%rcx),%xmm0
2029 movups 16(%rcx),%xmm1
2030 leaq 32(%rcx),%rcx
2031 xorps %xmm0,%xmm2
2032 .Loop_enc1_10:
2033 .byte 102,15,56,220,209
2034 decl %eax
2035 movups (%rcx),%xmm1
2036 leaq 16(%rcx),%rcx
2037 jnz .Loop_enc1_10
2038 .byte 102,15,56,221,209
2039 xorps %xmm10,%xmm2
2040 movups %xmm2,-16(%rsi)
2041
2042 .Lxts_enc_ret:
2043 xorps %xmm0,%xmm0
2044 pxor %xmm1,%xmm1
2045 pxor %xmm2,%xmm2
2046 pxor %xmm3,%xmm3
2047 pxor %xmm4,%xmm4
2048 pxor %xmm5,%xmm5
2049 pxor %xmm6,%xmm6
2050 pxor %xmm7,%xmm7
2051 movaps %xmm0,0(%rsp)
2052 pxor %xmm8,%xmm8
2053 movaps %xmm0,16(%rsp)
2054 pxor %xmm9,%xmm9
2055 movaps %xmm0,32(%rsp)
2056 pxor %xmm10,%xmm10
2057 movaps %xmm0,48(%rsp)
2058 pxor %xmm11,%xmm11
2059 movaps %xmm0,64(%rsp)
2060 pxor %xmm12,%xmm12
2061 movaps %xmm0,80(%rsp)
2062 pxor %xmm13,%xmm13
2063 movaps %xmm0,96(%rsp)
2064 pxor %xmm14,%xmm14
2065 pxor %xmm15,%xmm15
2066 movq -8(%r11),%rbp
2067 .cfi_restore %rbp
2068 leaq (%r11),%rsp
2069 .cfi_def_cfa_register %rsp
2070 .Lxts_enc_epilogue:
2071 .byte 0xf3,0xc3
2072 .cfi_endproc
2073 .size aesni_xts_encrypt,.-aesni_xts_encrypt
2074 .globl aesni_xts_decrypt
2075 .type aesni_xts_decrypt,@function
2076 .align 16
2077 aesni_xts_decrypt:
2078 .cfi_startproc
2079 leaq (%rsp),%r11
2080 .cfi_def_cfa_register %r11
2081 pushq %rbp
2082 .cfi_offset %rbp,-16
2083 subq $112,%rsp
2084 andq $-16,%rsp
2085 movups (%r9),%xmm2
2086 movl 240(%r8),%eax
2087 movl 240(%rcx),%r10d
2088 movups (%r8),%xmm0
2089 movups 16(%r8),%xmm1
2090 leaq 32(%r8),%r8
2091 xorps %xmm0,%xmm2
2092 .Loop_enc1_11:
2093 .byte 102,15,56,220,209
2094 decl %eax
2095 movups (%r8),%xmm1
2096 leaq 16(%r8),%r8
2097 jnz .Loop_enc1_11
2098 .byte 102,15,56,221,209
2099 xorl %eax,%eax
2100 testq $15,%rdx
2101 setnz %al
2102 shlq $4,%rax
2103 subq %rax,%rdx
2104
2105 movups (%rcx),%xmm0
2106 movq %rcx,%rbp
2107 movl %r10d,%eax
2108 shll $4,%r10d
2109 movq %rdx,%r9
2110 andq $-16,%rdx
2111
2112 movups 16(%rcx,%r10,1),%xmm1
2113
2114 movdqa .Lxts_magic(%rip),%xmm8
2115 movdqa %xmm2,%xmm15
2116 pshufd $0x5f,%xmm2,%xmm9
2117 pxor %xmm0,%xmm1
2118 movdqa %xmm9,%xmm14
2119 paddd %xmm9,%xmm9
2120 movdqa %xmm15,%xmm10
2121 psrad $31,%xmm14
2122 paddq %xmm15,%xmm15
2123 pand %xmm8,%xmm14
2124 pxor %xmm0,%xmm10
2125 pxor %xmm14,%xmm15
2126 movdqa %xmm9,%xmm14
2127 paddd %xmm9,%xmm9
2128 movdqa %xmm15,%xmm11
2129 psrad $31,%xmm14
2130 paddq %xmm15,%xmm15
2131 pand %xmm8,%xmm14
2132 pxor %xmm0,%xmm11
2133 pxor %xmm14,%xmm15
2134 movdqa %xmm9,%xmm14
2135 paddd %xmm9,%xmm9
2136 movdqa %xmm15,%xmm12
2137 psrad $31,%xmm14
2138 paddq %xmm15,%xmm15
2139 pand %xmm8,%xmm14
2140 pxor %xmm0,%xmm12
2141 pxor %xmm14,%xmm15
2142 movdqa %xmm9,%xmm14
2143 paddd %xmm9,%xmm9
2144 movdqa %xmm15,%xmm13
2145 psrad $31,%xmm14
2146 paddq %xmm15,%xmm15
2147 pand %xmm8,%xmm14
2148 pxor %xmm0,%xmm13
2149 pxor %xmm14,%xmm15
2150 movdqa %xmm15,%xmm14
2151 psrad $31,%xmm9
2152 paddq %xmm15,%xmm15
2153 pand %xmm8,%xmm9
2154 pxor %xmm0,%xmm14
2155 pxor %xmm9,%xmm15
2156 movaps %xmm1,96(%rsp)
2157
2158 subq $96,%rdx
2159 jc .Lxts_dec_short
2160
2161 movl $16+96,%eax
2162 leaq 32(%rbp,%r10,1),%rcx
2163 subq %r10,%rax
2164 movups 16(%rbp),%xmm1
2165 movq %rax,%r10
2166 leaq .Lxts_magic(%rip),%r8
2167 jmp .Lxts_dec_grandloop
2168
2169 .align 32
2170 .Lxts_dec_grandloop:
2171 movdqu 0(%rdi),%xmm2
2172 movdqa %xmm0,%xmm8
2173 movdqu 16(%rdi),%xmm3
2174 pxor %xmm10,%xmm2
2175 movdqu 32(%rdi),%xmm4
2176 pxor %xmm11,%xmm3
2177 .byte 102,15,56,222,209
2178 movdqu 48(%rdi),%xmm5
2179 pxor %xmm12,%xmm4
2180 .byte 102,15,56,222,217
2181 movdqu 64(%rdi),%xmm6
2182 pxor %xmm13,%xmm5
2183 .byte 102,15,56,222,225
2184 movdqu 80(%rdi),%xmm7
2185 pxor %xmm15,%xmm8
2186 movdqa 96(%rsp),%xmm9
2187 pxor %xmm14,%xmm6
2188 .byte 102,15,56,222,233
2189 movups 32(%rbp),%xmm0
2190 leaq 96(%rdi),%rdi
2191 pxor %xmm8,%xmm7
2192
2193 pxor %xmm9,%xmm10
2194 .byte 102,15,56,222,241
2195 pxor %xmm9,%xmm11
2196 movdqa %xmm10,0(%rsp)
2197 .byte 102,15,56,222,249
2198 movups 48(%rbp),%xmm1
2199 pxor %xmm9,%xmm12
2200
2201 .byte 102,15,56,222,208
2202 pxor %xmm9,%xmm13
2203 movdqa %xmm11,16(%rsp)
2204 .byte 102,15,56,222,216
2205 pxor %xmm9,%xmm14
2206 movdqa %xmm12,32(%rsp)
2207 .byte 102,15,56,222,224
2208 .byte 102,15,56,222,232
2209 pxor %xmm9,%xmm8
2210 movdqa %xmm14,64(%rsp)
2211 .byte 102,15,56,222,240
2212 .byte 102,15,56,222,248
2213 movups 64(%rbp),%xmm0
2214 movdqa %xmm8,80(%rsp)
2215 pshufd $0x5f,%xmm15,%xmm9
2216 jmp .Lxts_dec_loop6
2217 .align 32
2218 .Lxts_dec_loop6:
2219 .byte 102,15,56,222,209
2220 .byte 102,15,56,222,217
2221 .byte 102,15,56,222,225
2222 .byte 102,15,56,222,233
2223 .byte 102,15,56,222,241
2224 .byte 102,15,56,222,249
2225 movups -64(%rcx,%rax,1),%xmm1
2226 addq $32,%rax
2227
2228 .byte 102,15,56,222,208
2229 .byte 102,15,56,222,216
2230 .byte 102,15,56,222,224
2231 .byte 102,15,56,222,232
2232 .byte 102,15,56,222,240
2233 .byte 102,15,56,222,248
2234 movups -80(%rcx,%rax,1),%xmm0
2235 jnz .Lxts_dec_loop6
2236
2237 movdqa (%r8),%xmm8
2238 movdqa %xmm9,%xmm14
2239 paddd %xmm9,%xmm9
2240 .byte 102,15,56,222,209
2241 paddq %xmm15,%xmm15
2242 psrad $31,%xmm14
2243 .byte 102,15,56,222,217
2244 pand %xmm8,%xmm14
2245 movups (%rbp),%xmm10
2246 .byte 102,15,56,222,225
2247 .byte 102,15,56,222,233
2248 .byte 102,15,56,222,241
2249 pxor %xmm14,%xmm15
2250 movaps %xmm10,%xmm11
2251 .byte 102,15,56,222,249
2252 movups -64(%rcx),%xmm1
2253
2254 movdqa %xmm9,%xmm14
2255 .byte 102,15,56,222,208
2256 paddd %xmm9,%xmm9
2257 pxor %xmm15,%xmm10
2258 .byte 102,15,56,222,216
2259 psrad $31,%xmm14
2260 paddq %xmm15,%xmm15
2261 .byte 102,15,56,222,224
2262 .byte 102,15,56,222,232
2263 pand %xmm8,%xmm14
2264 movaps %xmm11,%xmm12
2265 .byte 102,15,56,222,240
2266 pxor %xmm14,%xmm15
2267 movdqa %xmm9,%xmm14
2268 .byte 102,15,56,222,248
2269 movups -48(%rcx),%xmm0
2270
2271 paddd %xmm9,%xmm9
2272 .byte 102,15,56,222,209
2273 pxor %xmm15,%xmm11
2274 psrad $31,%xmm14
2275 .byte 102,15,56,222,217
2276 paddq %xmm15,%xmm15
2277 pand %xmm8,%xmm14
2278 .byte 102,15,56,222,225
2279 .byte 102,15,56,222,233
2280 movdqa %xmm13,48(%rsp)
2281 pxor %xmm14,%xmm15
2282 .byte 102,15,56,222,241
2283 movaps %xmm12,%xmm13
2284 movdqa %xmm9,%xmm14
2285 .byte 102,15,56,222,249
2286 movups -32(%rcx),%xmm1
2287
2288 paddd %xmm9,%xmm9
2289 .byte 102,15,56,222,208
2290 pxor %xmm15,%xmm12
2291 psrad $31,%xmm14
2292 .byte 102,15,56,222,216
2293 paddq %xmm15,%xmm15
2294 pand %xmm8,%xmm14
2295 .byte 102,15,56,222,224
2296 .byte 102,15,56,222,232
2297 .byte 102,15,56,222,240
2298 pxor %xmm14,%xmm15
2299 movaps %xmm13,%xmm14
2300 .byte 102,15,56,222,248
2301
2302 movdqa %xmm9,%xmm0
2303 paddd %xmm9,%xmm9
2304 .byte 102,15,56,222,209
2305 pxor %xmm15,%xmm13
2306 psrad $31,%xmm0
2307 .byte 102,15,56,222,217
2308 paddq %xmm15,%xmm15
2309 pand %xmm8,%xmm0
2310 .byte 102,15,56,222,225
2311 .byte 102,15,56,222,233
2312 pxor %xmm0,%xmm15
2313 movups (%rbp),%xmm0
2314 .byte 102,15,56,222,241
2315 .byte 102,15,56,222,249
2316 movups 16(%rbp),%xmm1
2317
2318 pxor %xmm15,%xmm14
2319 .byte 102,15,56,223,84,36,0
2320 psrad $31,%xmm9
2321 paddq %xmm15,%xmm15
2322 .byte 102,15,56,223,92,36,16
2323 .byte 102,15,56,223,100,36,32
2324 pand %xmm8,%xmm9
2325 movq %r10,%rax
2326 .byte 102,15,56,223,108,36,48
2327 .byte 102,15,56,223,116,36,64
2328 .byte 102,15,56,223,124,36,80
2329 pxor %xmm9,%xmm15
2330
2331 leaq 96(%rsi),%rsi
2332 movups %xmm2,-96(%rsi)
2333 movups %xmm3,-80(%rsi)
2334 movups %xmm4,-64(%rsi)
2335 movups %xmm5,-48(%rsi)
2336 movups %xmm6,-32(%rsi)
2337 movups %xmm7,-16(%rsi)
2338 subq $96,%rdx
2339 jnc .Lxts_dec_grandloop
2340
2341 movl $16+96,%eax
2342 subl %r10d,%eax
2343 movq %rbp,%rcx
2344 shrl $4,%eax
2345
2346 .Lxts_dec_short:
2347
2348 movl %eax,%r10d
2349 pxor %xmm0,%xmm10
2350 pxor %xmm0,%xmm11
2351 addq $96,%rdx
2352 jz .Lxts_dec_done
2353
2354 pxor %xmm0,%xmm12
2355 cmpq $0x20,%rdx
2356 jb .Lxts_dec_one
2357 pxor %xmm0,%xmm13
2358 je .Lxts_dec_two
2359
2360 pxor %xmm0,%xmm14
2361 cmpq $0x40,%rdx
2362 jb .Lxts_dec_three
2363 je .Lxts_dec_four
2364
2365 movdqu (%rdi),%xmm2
2366 movdqu 16(%rdi),%xmm3
2367 movdqu 32(%rdi),%xmm4
2368 pxor %xmm10,%xmm2
2369 movdqu 48(%rdi),%xmm5
2370 pxor %xmm11,%xmm3
2371 movdqu 64(%rdi),%xmm6
2372 leaq 80(%rdi),%rdi
2373 pxor %xmm12,%xmm4
2374 pxor %xmm13,%xmm5
2375 pxor %xmm14,%xmm6
2376
2377 call _aesni_decrypt6
2378
2379 xorps %xmm10,%xmm2
2380 xorps %xmm11,%xmm3
2381 xorps %xmm12,%xmm4
2382 movdqu %xmm2,(%rsi)
2383 xorps %xmm13,%xmm5
2384 movdqu %xmm3,16(%rsi)
2385 xorps %xmm14,%xmm6
2386 movdqu %xmm4,32(%rsi)
2387 pxor %xmm14,%xmm14
2388 movdqu %xmm5,48(%rsi)
2389 pcmpgtd %xmm15,%xmm14
2390 movdqu %xmm6,64(%rsi)
2391 leaq 80(%rsi),%rsi
2392 pshufd $0x13,%xmm14,%xmm11
2393 andq $15,%r9
2394 jz .Lxts_dec_ret
2395
2396 movdqa %xmm15,%xmm10
2397 paddq %xmm15,%xmm15
2398 pand %xmm8,%xmm11
2399 pxor %xmm15,%xmm11
2400 jmp .Lxts_dec_done2
2401
2402 .align 16
2403 .Lxts_dec_one:
2404 movups (%rdi),%xmm2
2405 leaq 16(%rdi),%rdi
2406 xorps %xmm10,%xmm2
2407 movups (%rcx),%xmm0
2408 movups 16(%rcx),%xmm1
2409 leaq 32(%rcx),%rcx
2410 xorps %xmm0,%xmm2
2411 .Loop_dec1_12:
2412 .byte 102,15,56,222,209
2413 decl %eax
2414 movups (%rcx),%xmm1
2415 leaq 16(%rcx),%rcx
2416 jnz .Loop_dec1_12
2417 .byte 102,15,56,223,209
2418 xorps %xmm10,%xmm2
2419 movdqa %xmm11,%xmm10
2420 movups %xmm2,(%rsi)
2421 movdqa %xmm12,%xmm11
2422 leaq 16(%rsi),%rsi
2423 jmp .Lxts_dec_done
2424
2425 .align 16
2426 .Lxts_dec_two:
2427 movups (%rdi),%xmm2
2428 movups 16(%rdi),%xmm3
2429 leaq 32(%rdi),%rdi
2430 xorps %xmm10,%xmm2
2431 xorps %xmm11,%xmm3
2432
2433 call _aesni_decrypt2
2434
2435 xorps %xmm10,%xmm2
2436 movdqa %xmm12,%xmm10
2437 xorps %xmm11,%xmm3
2438 movdqa %xmm13,%xmm11
2439 movups %xmm2,(%rsi)
2440 movups %xmm3,16(%rsi)
2441 leaq 32(%rsi),%rsi
2442 jmp .Lxts_dec_done
2443
2444 .align 16
2445 .Lxts_dec_three:
2446 movups (%rdi),%xmm2
2447 movups 16(%rdi),%xmm3
2448 movups 32(%rdi),%xmm4
2449 leaq 48(%rdi),%rdi
2450 xorps %xmm10,%xmm2
2451 xorps %xmm11,%xmm3
2452 xorps %xmm12,%xmm4
2453
2454 call _aesni_decrypt3
2455
2456 xorps %xmm10,%xmm2
2457 movdqa %xmm13,%xmm10
2458 xorps %xmm11,%xmm3
2459 movdqa %xmm14,%xmm11
2460 xorps %xmm12,%xmm4
2461 movups %xmm2,(%rsi)
2462 movups %xmm3,16(%rsi)
2463 movups %xmm4,32(%rsi)
2464 leaq 48(%rsi),%rsi
2465 jmp .Lxts_dec_done
2466
2467 .align 16
2468 .Lxts_dec_four:
2469 movups (%rdi),%xmm2
2470 movups 16(%rdi),%xmm3
2471 movups 32(%rdi),%xmm4
2472 xorps %xmm10,%xmm2
2473 movups 48(%rdi),%xmm5
2474 leaq 64(%rdi),%rdi
2475 xorps %xmm11,%xmm3
2476 xorps %xmm12,%xmm4
2477 xorps %xmm13,%xmm5
2478
2479 call _aesni_decrypt4
2480
2481 pxor %xmm10,%xmm2
2482 movdqa %xmm14,%xmm10
2483 pxor %xmm11,%xmm3
2484 movdqa %xmm15,%xmm11
2485 pxor %xmm12,%xmm4
2486 movdqu %xmm2,(%rsi)
2487 pxor %xmm13,%xmm5
2488 movdqu %xmm3,16(%rsi)
2489 movdqu %xmm4,32(%rsi)
2490 movdqu %xmm5,48(%rsi)
2491 leaq 64(%rsi),%rsi
2492 jmp .Lxts_dec_done
2493
2494 .align 16
2495 .Lxts_dec_done:
2496 andq $15,%r9
2497 jz .Lxts_dec_ret
2498 .Lxts_dec_done2:
2499 movq %r9,%rdx
2500 movq %rbp,%rcx
2501 movl %r10d,%eax
2502
2503 movups (%rdi),%xmm2
2504 xorps %xmm11,%xmm2
2505 movups (%rcx),%xmm0
2506 movups 16(%rcx),%xmm1
2507 leaq 32(%rcx),%rcx
2508 xorps %xmm0,%xmm2
2509 .Loop_dec1_13:
2510 .byte 102,15,56,222,209
2511 decl %eax
2512 movups (%rcx),%xmm1
2513 leaq 16(%rcx),%rcx
2514 jnz .Loop_dec1_13
2515 .byte 102,15,56,223,209
2516 xorps %xmm11,%xmm2
2517 movups %xmm2,(%rsi)
2518
2519 .Lxts_dec_steal:
2520 movzbl 16(%rdi),%eax
2521 movzbl (%rsi),%ecx
2522 leaq 1(%rdi),%rdi
2523 movb %al,(%rsi)
2524 movb %cl,16(%rsi)
2525 leaq 1(%rsi),%rsi
2526 subq $1,%rdx
2527 jnz .Lxts_dec_steal
2528
2529 subq %r9,%rsi
2530 movq %rbp,%rcx
2531 movl %r10d,%eax
2532
2533 movups (%rsi),%xmm2
2534 xorps %xmm10,%xmm2
2535 movups (%rcx),%xmm0
2536 movups 16(%rcx),%xmm1
2537 leaq 32(%rcx),%rcx
2538 xorps %xmm0,%xmm2
2539 .Loop_dec1_14:
2540 .byte 102,15,56,222,209
2541 decl %eax
2542 movups (%rcx),%xmm1
2543 leaq 16(%rcx),%rcx
2544 jnz .Loop_dec1_14
2545 .byte 102,15,56,223,209
2546 xorps %xmm10,%xmm2
2547 movups %xmm2,(%rsi)
2548
2549 .Lxts_dec_ret:
2550 xorps %xmm0,%xmm0
2551 pxor %xmm1,%xmm1
2552 pxor %xmm2,%xmm2
2553 pxor %xmm3,%xmm3
2554 pxor %xmm4,%xmm4
2555 pxor %xmm5,%xmm5
2556 pxor %xmm6,%xmm6
2557 pxor %xmm7,%xmm7
2558 movaps %xmm0,0(%rsp)
2559 pxor %xmm8,%xmm8
2560 movaps %xmm0,16(%rsp)
2561 pxor %xmm9,%xmm9
2562 movaps %xmm0,32(%rsp)
2563 pxor %xmm10,%xmm10
2564 movaps %xmm0,48(%rsp)
2565 pxor %xmm11,%xmm11
2566 movaps %xmm0,64(%rsp)
2567 pxor %xmm12,%xmm12
2568 movaps %xmm0,80(%rsp)
2569 pxor %xmm13,%xmm13
2570 movaps %xmm0,96(%rsp)
2571 pxor %xmm14,%xmm14
2572 pxor %xmm15,%xmm15
2573 movq -8(%r11),%rbp
2574 .cfi_restore %rbp
2575 leaq (%r11),%rsp
2576 .cfi_def_cfa_register %rsp
2577 .Lxts_dec_epilogue:
2578 .byte 0xf3,0xc3
2579 .cfi_endproc
2580 .size aesni_xts_decrypt,.-aesni_xts_decrypt
2581 .globl aesni_ocb_encrypt
2582 .type aesni_ocb_encrypt,@function
2583 .align 32
2584 aesni_ocb_encrypt:
2585 .cfi_startproc
2586 leaq (%rsp),%rax
2587 pushq %rbx
2588 .cfi_adjust_cfa_offset 8
2589 .cfi_offset %rbx,-16
2590 pushq %rbp
2591 .cfi_adjust_cfa_offset 8
2592 .cfi_offset %rbp,-24
2593 pushq %r12
2594 .cfi_adjust_cfa_offset 8
2595 .cfi_offset %r12,-32
2596 pushq %r13
2597 .cfi_adjust_cfa_offset 8
2598 .cfi_offset %r13,-40
2599 pushq %r14
2600 .cfi_adjust_cfa_offset 8
2601 .cfi_offset %r14,-48
2602 movq 8(%rax),%rbx
2603 movq 8+8(%rax),%rbp
2604
2605 movl 240(%rcx),%r10d
2606 movq %rcx,%r11
2607 shll $4,%r10d
2608 movups (%rcx),%xmm9
2609 movups 16(%rcx,%r10,1),%xmm1
2610
2611 movdqu (%r9),%xmm15
2612 pxor %xmm1,%xmm9
2613 pxor %xmm1,%xmm15
2614
2615 movl $16+32,%eax
2616 leaq 32(%r11,%r10,1),%rcx
2617 movups 16(%r11),%xmm1
2618 subq %r10,%rax
2619 movq %rax,%r10
2620
2621 movdqu (%rbx),%xmm10
2622 movdqu (%rbp),%xmm8
2623
2624 testq $1,%r8
2625 jnz .Locb_enc_odd
2626
2627 bsfq %r8,%r12
2628 addq $1,%r8
2629 shlq $4,%r12
2630 movdqu (%rbx,%r12,1),%xmm7
2631 movdqu (%rdi),%xmm2
2632 leaq 16(%rdi),%rdi
2633
2634 call __ocb_encrypt1
2635
2636 movdqa %xmm7,%xmm15
2637 movups %xmm2,(%rsi)
2638 leaq 16(%rsi),%rsi
2639 subq $1,%rdx
2640 jz .Locb_enc_done
2641
2642 .Locb_enc_odd:
2643 leaq 1(%r8),%r12
2644 leaq 3(%r8),%r13
2645 leaq 5(%r8),%r14
2646 leaq 6(%r8),%r8
2647 bsfq %r12,%r12
2648 bsfq %r13,%r13
2649 bsfq %r14,%r14
2650 shlq $4,%r12
2651 shlq $4,%r13
2652 shlq $4,%r14
2653
2654 subq $6,%rdx
2655 jc .Locb_enc_short
2656 jmp .Locb_enc_grandloop
2657
2658 .align 32
2659 .Locb_enc_grandloop:
2660 movdqu 0(%rdi),%xmm2
2661 movdqu 16(%rdi),%xmm3
2662 movdqu 32(%rdi),%xmm4
2663 movdqu 48(%rdi),%xmm5
2664 movdqu 64(%rdi),%xmm6
2665 movdqu 80(%rdi),%xmm7
2666 leaq 96(%rdi),%rdi
2667
2668 call __ocb_encrypt6
2669
2670 movups %xmm2,0(%rsi)
2671 movups %xmm3,16(%rsi)
2672 movups %xmm4,32(%rsi)
2673 movups %xmm5,48(%rsi)
2674 movups %xmm6,64(%rsi)
2675 movups %xmm7,80(%rsi)
2676 leaq 96(%rsi),%rsi
2677 subq $6,%rdx
2678 jnc .Locb_enc_grandloop
2679
2680 .Locb_enc_short:
2681 addq $6,%rdx
2682 jz .Locb_enc_done
2683
2684 movdqu 0(%rdi),%xmm2
2685 cmpq $2,%rdx
2686 jb .Locb_enc_one
2687 movdqu 16(%rdi),%xmm3
2688 je .Locb_enc_two
2689
2690 movdqu 32(%rdi),%xmm4
2691 cmpq $4,%rdx
2692 jb .Locb_enc_three
2693 movdqu 48(%rdi),%xmm5
2694 je .Locb_enc_four
2695
2696 movdqu 64(%rdi),%xmm6
2697 pxor %xmm7,%xmm7
2698
2699 call __ocb_encrypt6
2700
2701 movdqa %xmm14,%xmm15
2702 movups %xmm2,0(%rsi)
2703 movups %xmm3,16(%rsi)
2704 movups %xmm4,32(%rsi)
2705 movups %xmm5,48(%rsi)
2706 movups %xmm6,64(%rsi)
2707
2708 jmp .Locb_enc_done
2709
2710 .align 16
2711 .Locb_enc_one:
2712 movdqa %xmm10,%xmm7
2713
2714 call __ocb_encrypt1
2715
2716 movdqa %xmm7,%xmm15
2717 movups %xmm2,0(%rsi)
2718 jmp .Locb_enc_done
2719
2720 .align 16
2721 .Locb_enc_two:
2722 pxor %xmm4,%xmm4
2723 pxor %xmm5,%xmm5
2724
2725 call __ocb_encrypt4
2726
2727 movdqa %xmm11,%xmm15
2728 movups %xmm2,0(%rsi)
2729 movups %xmm3,16(%rsi)
2730
2731 jmp .Locb_enc_done
2732
2733 .align 16
2734 .Locb_enc_three:
2735 pxor %xmm5,%xmm5
2736
2737 call __ocb_encrypt4
2738
2739 movdqa %xmm12,%xmm15
2740 movups %xmm2,0(%rsi)
2741 movups %xmm3,16(%rsi)
2742 movups %xmm4,32(%rsi)
2743
2744 jmp .Locb_enc_done
2745
2746 .align 16
2747 .Locb_enc_four:
2748 call __ocb_encrypt4
2749
2750 movdqa %xmm13,%xmm15
2751 movups %xmm2,0(%rsi)
2752 movups %xmm3,16(%rsi)
2753 movups %xmm4,32(%rsi)
2754 movups %xmm5,48(%rsi)
2755
2756 .Locb_enc_done:
2757 pxor %xmm0,%xmm15
2758 movdqu %xmm8,(%rbp)
2759 movdqu %xmm15,(%r9)
2760
2761 xorps %xmm0,%xmm0
2762 pxor %xmm1,%xmm1
2763 pxor %xmm2,%xmm2
2764 pxor %xmm3,%xmm3
2765 pxor %xmm4,%xmm4
2766 pxor %xmm5,%xmm5
2767 pxor %xmm6,%xmm6
2768 pxor %xmm7,%xmm7
2769 pxor %xmm8,%xmm8
2770 pxor %xmm9,%xmm9
2771 pxor %xmm10,%xmm10
2772 pxor %xmm11,%xmm11
2773 pxor %xmm12,%xmm12
2774 pxor %xmm13,%xmm13
2775 pxor %xmm14,%xmm14
2776 pxor %xmm15,%xmm15
2777 leaq 40(%rsp),%rax
2778 .cfi_def_cfa %rax,8
2779 movq -40(%rax),%r14
2780 .cfi_restore %r14
2781 movq -32(%rax),%r13
2782 .cfi_restore %r13
2783 movq -24(%rax),%r12
2784 .cfi_restore %r12
2785 movq -16(%rax),%rbp
2786 .cfi_restore %rbp
2787 movq -8(%rax),%rbx
2788 .cfi_restore %rbx
2789 leaq (%rax),%rsp
2790 .cfi_def_cfa_register %rsp
2791 .Locb_enc_epilogue:
2792 .byte 0xf3,0xc3
2793 .cfi_endproc
2794 .size aesni_ocb_encrypt,.-aesni_ocb_encrypt
2795
2796 .type __ocb_encrypt6,@function
2797 .align 32
2798 __ocb_encrypt6:
2799 .cfi_startproc
2800 pxor %xmm9,%xmm15
2801 movdqu (%rbx,%r12,1),%xmm11
2802 movdqa %xmm10,%xmm12
2803 movdqu (%rbx,%r13,1),%xmm13
2804 movdqa %xmm10,%xmm14
2805 pxor %xmm15,%xmm10
2806 movdqu (%rbx,%r14,1),%xmm15
2807 pxor %xmm10,%xmm11
2808 pxor %xmm2,%xmm8
2809 pxor %xmm10,%xmm2
2810 pxor %xmm11,%xmm12
2811 pxor %xmm3,%xmm8
2812 pxor %xmm11,%xmm3
2813 pxor %xmm12,%xmm13
2814 pxor %xmm4,%xmm8
2815 pxor %xmm12,%xmm4
2816 pxor %xmm13,%xmm14
2817 pxor %xmm5,%xmm8
2818 pxor %xmm13,%xmm5
2819 pxor %xmm14,%xmm15
2820 pxor %xmm6,%xmm8
2821 pxor %xmm14,%xmm6
2822 pxor %xmm7,%xmm8
2823 pxor %xmm15,%xmm7
2824 movups 32(%r11),%xmm0
2825
2826 leaq 1(%r8),%r12
2827 leaq 3(%r8),%r13
2828 leaq 5(%r8),%r14
2829 addq $6,%r8
2830 pxor %xmm9,%xmm10
2831 bsfq %r12,%r12
2832 bsfq %r13,%r13
2833 bsfq %r14,%r14
2834
2835 .byte 102,15,56,220,209
2836 .byte 102,15,56,220,217
2837 .byte 102,15,56,220,225
2838 .byte 102,15,56,220,233
2839 pxor %xmm9,%xmm11
2840 pxor %xmm9,%xmm12
2841 .byte 102,15,56,220,241
2842 pxor %xmm9,%xmm13
2843 pxor %xmm9,%xmm14
2844 .byte 102,15,56,220,249
2845 movups 48(%r11),%xmm1
2846 pxor %xmm9,%xmm15
2847
2848 .byte 102,15,56,220,208
2849 .byte 102,15,56,220,216
2850 .byte 102,15,56,220,224
2851 .byte 102,15,56,220,232
2852 .byte 102,15,56,220,240
2853 .byte 102,15,56,220,248
2854 movups 64(%r11),%xmm0
2855 shlq $4,%r12
2856 shlq $4,%r13
2857 jmp .Locb_enc_loop6
2858
2859 .align 32
2860 .Locb_enc_loop6:
2861 .byte 102,15,56,220,209
2862 .byte 102,15,56,220,217
2863 .byte 102,15,56,220,225
2864 .byte 102,15,56,220,233
2865 .byte 102,15,56,220,241
2866 .byte 102,15,56,220,249
2867 movups (%rcx,%rax,1),%xmm1
2868 addq $32,%rax
2869
2870 .byte 102,15,56,220,208
2871 .byte 102,15,56,220,216
2872 .byte 102,15,56,220,224
2873 .byte 102,15,56,220,232
2874 .byte 102,15,56,220,240
2875 .byte 102,15,56,220,248
2876 movups -16(%rcx,%rax,1),%xmm0
2877 jnz .Locb_enc_loop6
2878
2879 .byte 102,15,56,220,209
2880 .byte 102,15,56,220,217
2881 .byte 102,15,56,220,225
2882 .byte 102,15,56,220,233
2883 .byte 102,15,56,220,241
2884 .byte 102,15,56,220,249
2885 movups 16(%r11),%xmm1
2886 shlq $4,%r14
2887
2888 .byte 102,65,15,56,221,210
2889 movdqu (%rbx),%xmm10
2890 movq %r10,%rax
2891 .byte 102,65,15,56,221,219
2892 .byte 102,65,15,56,221,228
2893 .byte 102,65,15,56,221,237
2894 .byte 102,65,15,56,221,246
2895 .byte 102,65,15,56,221,255
2896 .byte 0xf3,0xc3
2897 .cfi_endproc
2898 .size __ocb_encrypt6,.-__ocb_encrypt6
2899
2900 .type __ocb_encrypt4,@function
2901 .align 32
2902 __ocb_encrypt4:
2903 .cfi_startproc
2904 pxor %xmm9,%xmm15
2905 movdqu (%rbx,%r12,1),%xmm11
2906 movdqa %xmm10,%xmm12
2907 movdqu (%rbx,%r13,1),%xmm13
2908 pxor %xmm15,%xmm10
2909 pxor %xmm10,%xmm11
2910 pxor %xmm2,%xmm8
2911 pxor %xmm10,%xmm2
2912 pxor %xmm11,%xmm12
2913 pxor %xmm3,%xmm8
2914 pxor %xmm11,%xmm3
2915 pxor %xmm12,%xmm13
2916 pxor %xmm4,%xmm8
2917 pxor %xmm12,%xmm4
2918 pxor %xmm5,%xmm8
2919 pxor %xmm13,%xmm5
2920 movups 32(%r11),%xmm0
2921
2922 pxor %xmm9,%xmm10
2923 pxor %xmm9,%xmm11
2924 pxor %xmm9,%xmm12
2925 pxor %xmm9,%xmm13
2926
2927 .byte 102,15,56,220,209
2928 .byte 102,15,56,220,217
2929 .byte 102,15,56,220,225
2930 .byte 102,15,56,220,233
2931 movups 48(%r11),%xmm1
2932
2933 .byte 102,15,56,220,208
2934 .byte 102,15,56,220,216
2935 .byte 102,15,56,220,224
2936 .byte 102,15,56,220,232
2937 movups 64(%r11),%xmm0
2938 jmp .Locb_enc_loop4
2939
2940 .align 32
2941 .Locb_enc_loop4:
2942 .byte 102,15,56,220,209
2943 .byte 102,15,56,220,217
2944 .byte 102,15,56,220,225
2945 .byte 102,15,56,220,233
2946 movups (%rcx,%rax,1),%xmm1
2947 addq $32,%rax
2948
2949 .byte 102,15,56,220,208
2950 .byte 102,15,56,220,216
2951 .byte 102,15,56,220,224
2952 .byte 102,15,56,220,232
2953 movups -16(%rcx,%rax,1),%xmm0
2954 jnz .Locb_enc_loop4
2955
2956 .byte 102,15,56,220,209
2957 .byte 102,15,56,220,217
2958 .byte 102,15,56,220,225
2959 .byte 102,15,56,220,233
2960 movups 16(%r11),%xmm1
2961 movq %r10,%rax
2962
2963 .byte 102,65,15,56,221,210
2964 .byte 102,65,15,56,221,219
2965 .byte 102,65,15,56,221,228
2966 .byte 102,65,15,56,221,237
2967 .byte 0xf3,0xc3
2968 .cfi_endproc
2969 .size __ocb_encrypt4,.-__ocb_encrypt4
2970
2971 .type __ocb_encrypt1,@function
2972 .align 32
2973 __ocb_encrypt1:
2974 .cfi_startproc
2975 pxor %xmm15,%xmm7
2976 pxor %xmm9,%xmm7
2977 pxor %xmm2,%xmm8
2978 pxor %xmm7,%xmm2
2979 movups 32(%r11),%xmm0
2980
2981 .byte 102,15,56,220,209
2982 movups 48(%r11),%xmm1
2983 pxor %xmm9,%xmm7
2984
2985 .byte 102,15,56,220,208
2986 movups 64(%r11),%xmm0
2987 jmp .Locb_enc_loop1
2988
2989 .align 32
2990 .Locb_enc_loop1:
2991 .byte 102,15,56,220,209
2992 movups (%rcx,%rax,1),%xmm1
2993 addq $32,%rax
2994
2995 .byte 102,15,56,220,208
2996 movups -16(%rcx,%rax,1),%xmm0
2997 jnz .Locb_enc_loop1
2998
2999 .byte 102,15,56,220,209
3000 movups 16(%r11),%xmm1
3001 movq %r10,%rax
3002
3003 .byte 102,15,56,221,215
3004 .byte 0xf3,0xc3
3005 .cfi_endproc
3006 .size __ocb_encrypt1,.-__ocb_encrypt1
3007
3008 .globl aesni_ocb_decrypt
3009 .type aesni_ocb_decrypt,@function
3010 .align 32
3011 aesni_ocb_decrypt:
3012 .cfi_startproc
3013 leaq (%rsp),%rax
3014 pushq %rbx
3015 .cfi_adjust_cfa_offset 8
3016 .cfi_offset %rbx,-16
3017 pushq %rbp
3018 .cfi_adjust_cfa_offset 8
3019 .cfi_offset %rbp,-24
3020 pushq %r12
3021 .cfi_adjust_cfa_offset 8
3022 .cfi_offset %r12,-32
3023 pushq %r13
3024 .cfi_adjust_cfa_offset 8
3025 .cfi_offset %r13,-40
3026 pushq %r14
3027 .cfi_adjust_cfa_offset 8
3028 .cfi_offset %r14,-48
3029 movq 8(%rax),%rbx
3030 movq 8+8(%rax),%rbp
3031
3032 movl 240(%rcx),%r10d
3033 movq %rcx,%r11
3034 shll $4,%r10d
3035 movups (%rcx),%xmm9
3036 movups 16(%rcx,%r10,1),%xmm1
3037
3038 movdqu (%r9),%xmm15
3039 pxor %xmm1,%xmm9
3040 pxor %xmm1,%xmm15
3041
3042 movl $16+32,%eax
3043 leaq 32(%r11,%r10,1),%rcx
3044 movups 16(%r11),%xmm1
3045 subq %r10,%rax
3046 movq %rax,%r10
3047
3048 movdqu (%rbx),%xmm10
3049 movdqu (%rbp),%xmm8
3050
3051 testq $1,%r8
3052 jnz .Locb_dec_odd
3053
3054 bsfq %r8,%r12
3055 addq $1,%r8
3056 shlq $4,%r12
3057 movdqu (%rbx,%r12,1),%xmm7
3058 movdqu (%rdi),%xmm2
3059 leaq 16(%rdi),%rdi
3060
3061 call __ocb_decrypt1
3062
3063 movdqa %xmm7,%xmm15
3064 movups %xmm2,(%rsi)
3065 xorps %xmm2,%xmm8
3066 leaq 16(%rsi),%rsi
3067 subq $1,%rdx
3068 jz .Locb_dec_done
3069
3070 .Locb_dec_odd:
3071 leaq 1(%r8),%r12
3072 leaq 3(%r8),%r13
3073 leaq 5(%r8),%r14
3074 leaq 6(%r8),%r8
3075 bsfq %r12,%r12
3076 bsfq %r13,%r13
3077 bsfq %r14,%r14
3078 shlq $4,%r12
3079 shlq $4,%r13
3080 shlq $4,%r14
3081
3082 subq $6,%rdx
3083 jc .Locb_dec_short
3084 jmp .Locb_dec_grandloop
3085
3086 .align 32
3087 .Locb_dec_grandloop:
3088 movdqu 0(%rdi),%xmm2
3089 movdqu 16(%rdi),%xmm3
3090 movdqu 32(%rdi),%xmm4
3091 movdqu 48(%rdi),%xmm5
3092 movdqu 64(%rdi),%xmm6
3093 movdqu 80(%rdi),%xmm7
3094 leaq 96(%rdi),%rdi
3095
3096 call __ocb_decrypt6
3097
3098 movups %xmm2,0(%rsi)
3099 pxor %xmm2,%xmm8
3100 movups %xmm3,16(%rsi)
3101 pxor %xmm3,%xmm8
3102 movups %xmm4,32(%rsi)
3103 pxor %xmm4,%xmm8
3104 movups %xmm5,48(%rsi)
3105 pxor %xmm5,%xmm8
3106 movups %xmm6,64(%rsi)
3107 pxor %xmm6,%xmm8
3108 movups %xmm7,80(%rsi)
3109 pxor %xmm7,%xmm8
3110 leaq 96(%rsi),%rsi
3111 subq $6,%rdx
3112 jnc .Locb_dec_grandloop
3113
3114 .Locb_dec_short:
3115 addq $6,%rdx
3116 jz .Locb_dec_done
3117
3118 movdqu 0(%rdi),%xmm2
3119 cmpq $2,%rdx
3120 jb .Locb_dec_one
3121 movdqu 16(%rdi),%xmm3
3122 je .Locb_dec_two
3123
3124 movdqu 32(%rdi),%xmm4
3125 cmpq $4,%rdx
3126 jb .Locb_dec_three
3127 movdqu 48(%rdi),%xmm5
3128 je .Locb_dec_four
3129
3130 movdqu 64(%rdi),%xmm6
3131 pxor %xmm7,%xmm7
3132
3133 call __ocb_decrypt6
3134
3135 movdqa %xmm14,%xmm15
3136 movups %xmm2,0(%rsi)
3137 pxor %xmm2,%xmm8
3138 movups %xmm3,16(%rsi)
3139 pxor %xmm3,%xmm8
3140 movups %xmm4,32(%rsi)
3141 pxor %xmm4,%xmm8
3142 movups %xmm5,48(%rsi)
3143 pxor %xmm5,%xmm8
3144 movups %xmm6,64(%rsi)
3145 pxor %xmm6,%xmm8
3146
3147 jmp .Locb_dec_done
3148
3149 .align 16
3150 .Locb_dec_one:
3151 movdqa %xmm10,%xmm7
3152
3153 call __ocb_decrypt1
3154
3155 movdqa %xmm7,%xmm15
3156 movups %xmm2,0(%rsi)
3157 xorps %xmm2,%xmm8
3158 jmp .Locb_dec_done
3159
3160 .align 16
3161 .Locb_dec_two:
3162 pxor %xmm4,%xmm4
3163 pxor %xmm5,%xmm5
3164
3165 call __ocb_decrypt4
3166
3167 movdqa %xmm11,%xmm15
3168 movups %xmm2,0(%rsi)
3169 xorps %xmm2,%xmm8
3170 movups %xmm3,16(%rsi)
3171 xorps %xmm3,%xmm8
3172
3173 jmp .Locb_dec_done
3174
3175 .align 16
3176 .Locb_dec_three:
3177 pxor %xmm5,%xmm5
3178
3179 call __ocb_decrypt4
3180
3181 movdqa %xmm12,%xmm15
3182 movups %xmm2,0(%rsi)
3183 xorps %xmm2,%xmm8
3184 movups %xmm3,16(%rsi)
3185 xorps %xmm3,%xmm8
3186 movups %xmm4,32(%rsi)
3187 xorps %xmm4,%xmm8
3188
3189 jmp .Locb_dec_done
3190
3191 .align 16
3192 .Locb_dec_four:
3193 call __ocb_decrypt4
3194
3195 movdqa %xmm13,%xmm15
3196 movups %xmm2,0(%rsi)
3197 pxor %xmm2,%xmm8
3198 movups %xmm3,16(%rsi)
3199 pxor %xmm3,%xmm8
3200 movups %xmm4,32(%rsi)
3201 pxor %xmm4,%xmm8
3202 movups %xmm5,48(%rsi)
3203 pxor %xmm5,%xmm8
3204
3205 .Locb_dec_done:
3206 pxor %xmm0,%xmm15
3207 movdqu %xmm8,(%rbp)
3208 movdqu %xmm15,(%r9)
3209
3210 xorps %xmm0,%xmm0
3211 pxor %xmm1,%xmm1
3212 pxor %xmm2,%xmm2
3213 pxor %xmm3,%xmm3
3214 pxor %xmm4,%xmm4
3215 pxor %xmm5,%xmm5
3216 pxor %xmm6,%xmm6
3217 pxor %xmm7,%xmm7
3218 pxor %xmm8,%xmm8
3219 pxor %xmm9,%xmm9
3220 pxor %xmm10,%xmm10
3221 pxor %xmm11,%xmm11
3222 pxor %xmm12,%xmm12
3223 pxor %xmm13,%xmm13
3224 pxor %xmm14,%xmm14
3225 pxor %xmm15,%xmm15
3226 leaq 40(%rsp),%rax
3227 .cfi_def_cfa %rax,8
3228 movq -40(%rax),%r14
3229 .cfi_restore %r14
3230 movq -32(%rax),%r13
3231 .cfi_restore %r13
3232 movq -24(%rax),%r12
3233 .cfi_restore %r12
3234 movq -16(%rax),%rbp
3235 .cfi_restore %rbp
3236 movq -8(%rax),%rbx
3237 .cfi_restore %rbx
3238 leaq (%rax),%rsp
3239 .cfi_def_cfa_register %rsp
3240 .Locb_dec_epilogue:
3241 .byte 0xf3,0xc3
3242 .cfi_endproc
3243 .size aesni_ocb_decrypt,.-aesni_ocb_decrypt
3244
3245 .type __ocb_decrypt6,@function
3246 .align 32
3247 __ocb_decrypt6:
3248 .cfi_startproc
3249 pxor %xmm9,%xmm15
3250 movdqu (%rbx,%r12,1),%xmm11
3251 movdqa %xmm10,%xmm12
3252 movdqu (%rbx,%r13,1),%xmm13
3253 movdqa %xmm10,%xmm14
3254 pxor %xmm15,%xmm10
3255 movdqu (%rbx,%r14,1),%xmm15
3256 pxor %xmm10,%xmm11
3257 pxor %xmm10,%xmm2
3258 pxor %xmm11,%xmm12
3259 pxor %xmm11,%xmm3
3260 pxor %xmm12,%xmm13
3261 pxor %xmm12,%xmm4
3262 pxor %xmm13,%xmm14
3263 pxor %xmm13,%xmm5
3264 pxor %xmm14,%xmm15
3265 pxor %xmm14,%xmm6
3266 pxor %xmm15,%xmm7
3267 movups 32(%r11),%xmm0
3268
3269 leaq 1(%r8),%r12
3270 leaq 3(%r8),%r13
3271 leaq 5(%r8),%r14
3272 addq $6,%r8
3273 pxor %xmm9,%xmm10
3274 bsfq %r12,%r12
3275 bsfq %r13,%r13
3276 bsfq %r14,%r14
3277
3278 .byte 102,15,56,222,209
3279 .byte 102,15,56,222,217
3280 .byte 102,15,56,222,225
3281 .byte 102,15,56,222,233
3282 pxor %xmm9,%xmm11
3283 pxor %xmm9,%xmm12
3284 .byte 102,15,56,222,241
3285 pxor %xmm9,%xmm13
3286 pxor %xmm9,%xmm14
3287 .byte 102,15,56,222,249
3288 movups 48(%r11),%xmm1
3289 pxor %xmm9,%xmm15
3290
3291 .byte 102,15,56,222,208
3292 .byte 102,15,56,222,216
3293 .byte 102,15,56,222,224
3294 .byte 102,15,56,222,232
3295 .byte 102,15,56,222,240
3296 .byte 102,15,56,222,248
3297 movups 64(%r11),%xmm0
3298 shlq $4,%r12
3299 shlq $4,%r13
3300 jmp .Locb_dec_loop6
3301
3302 .align 32
3303 .Locb_dec_loop6:
3304 .byte 102,15,56,222,209
3305 .byte 102,15,56,222,217
3306 .byte 102,15,56,222,225
3307 .byte 102,15,56,222,233
3308 .byte 102,15,56,222,241
3309 .byte 102,15,56,222,249
3310 movups (%rcx,%rax,1),%xmm1
3311 addq $32,%rax
3312
3313 .byte 102,15,56,222,208
3314 .byte 102,15,56,222,216
3315 .byte 102,15,56,222,224
3316 .byte 102,15,56,222,232
3317 .byte 102,15,56,222,240
3318 .byte 102,15,56,222,248
3319 movups -16(%rcx,%rax,1),%xmm0
3320 jnz .Locb_dec_loop6
3321
3322 .byte 102,15,56,222,209
3323 .byte 102,15,56,222,217
3324 .byte 102,15,56,222,225
3325 .byte 102,15,56,222,233
3326 .byte 102,15,56,222,241
3327 .byte 102,15,56,222,249
3328 movups 16(%r11),%xmm1
3329 shlq $4,%r14
3330
3331 .byte 102,65,15,56,223,210
3332 movdqu (%rbx),%xmm10
3333 movq %r10,%rax
3334 .byte 102,65,15,56,223,219
3335 .byte 102,65,15,56,223,228
3336 .byte 102,65,15,56,223,237
3337 .byte 102,65,15,56,223,246
3338 .byte 102,65,15,56,223,255
3339 .byte 0xf3,0xc3
3340 .cfi_endproc
3341 .size __ocb_decrypt6,.-__ocb_decrypt6
3342
3343 .type __ocb_decrypt4,@function
3344 .align 32
3345 __ocb_decrypt4:
3346 .cfi_startproc
3347 pxor %xmm9,%xmm15
3348 movdqu (%rbx,%r12,1),%xmm11
3349 movdqa %xmm10,%xmm12
3350 movdqu (%rbx,%r13,1),%xmm13
3351 pxor %xmm15,%xmm10
3352 pxor %xmm10,%xmm11
3353 pxor %xmm10,%xmm2
3354 pxor %xmm11,%xmm12
3355 pxor %xmm11,%xmm3
3356 pxor %xmm12,%xmm13
3357 pxor %xmm12,%xmm4
3358 pxor %xmm13,%xmm5
3359 movups 32(%r11),%xmm0
3360
3361 pxor %xmm9,%xmm10
3362 pxor %xmm9,%xmm11
3363 pxor %xmm9,%xmm12
3364 pxor %xmm9,%xmm13
3365
3366 .byte 102,15,56,222,209
3367 .byte 102,15,56,222,217
3368 .byte 102,15,56,222,225
3369 .byte 102,15,56,222,233
3370 movups 48(%r11),%xmm1
3371
3372 .byte 102,15,56,222,208
3373 .byte 102,15,56,222,216
3374 .byte 102,15,56,222,224
3375 .byte 102,15,56,222,232
3376 movups 64(%r11),%xmm0
3377 jmp .Locb_dec_loop4
3378
3379 .align 32
3380 .Locb_dec_loop4:
3381 .byte 102,15,56,222,209
3382 .byte 102,15,56,222,217
3383 .byte 102,15,56,222,225
3384 .byte 102,15,56,222,233
3385 movups (%rcx,%rax,1),%xmm1
3386 addq $32,%rax
3387
3388 .byte 102,15,56,222,208
3389 .byte 102,15,56,222,216
3390 .byte 102,15,56,222,224
3391 .byte 102,15,56,222,232
3392 movups -16(%rcx,%rax,1),%xmm0
3393 jnz .Locb_dec_loop4
3394
3395 .byte 102,15,56,222,209
3396 .byte 102,15,56,222,217
3397 .byte 102,15,56,222,225
3398 .byte 102,15,56,222,233
3399 movups 16(%r11),%xmm1
3400 movq %r10,%rax
3401
3402 .byte 102,65,15,56,223,210
3403 .byte 102,65,15,56,223,219
3404 .byte 102,65,15,56,223,228
3405 .byte 102,65,15,56,223,237
3406 .byte 0xf3,0xc3
3407 .cfi_endproc
3408 .size __ocb_decrypt4,.-__ocb_decrypt4
3409
3410 .type __ocb_decrypt1,@function
3411 .align 32
3412 __ocb_decrypt1:
3413 .cfi_startproc
3414 pxor %xmm15,%xmm7
3415 pxor %xmm9,%xmm7
3416 pxor %xmm7,%xmm2
3417 movups 32(%r11),%xmm0
3418
3419 .byte 102,15,56,222,209
3420 movups 48(%r11),%xmm1
3421 pxor %xmm9,%xmm7
3422
3423 .byte 102,15,56,222,208
3424 movups 64(%r11),%xmm0
3425 jmp .Locb_dec_loop1
3426
3427 .align 32
3428 .Locb_dec_loop1:
3429 .byte 102,15,56,222,209
3430 movups (%rcx,%rax,1),%xmm1
3431 addq $32,%rax
3432
3433 .byte 102,15,56,222,208
3434 movups -16(%rcx,%rax,1),%xmm0
3435 jnz .Locb_dec_loop1
3436
3437 .byte 102,15,56,222,209
3438 movups 16(%r11),%xmm1
3439 movq %r10,%rax
3440
3441 .byte 102,15,56,223,215
3442 .byte 0xf3,0xc3
3443 .cfi_endproc
3444 .size __ocb_decrypt1,.-__ocb_decrypt1
3445 .globl aesni_cbc_encrypt
3446 .type aesni_cbc_encrypt,@function
3447 .align 16
3448 aesni_cbc_encrypt:
3449 .cfi_startproc
3450 testq %rdx,%rdx
3451 jz .Lcbc_ret
3452
3453 movl 240(%rcx),%r10d
3454 movq %rcx,%r11
3455 testl %r9d,%r9d
3456 jz .Lcbc_decrypt
3457
3458 movups (%r8),%xmm2
3459 movl %r10d,%eax
3460 cmpq $16,%rdx
3461 jb .Lcbc_enc_tail
3462 subq $16,%rdx
3463 jmp .Lcbc_enc_loop
3464 .align 16
3465 .Lcbc_enc_loop:
3466 movups (%rdi),%xmm3
3467 leaq 16(%rdi),%rdi
3468
3469 movups (%rcx),%xmm0
3470 movups 16(%rcx),%xmm1
3471 xorps %xmm0,%xmm3
3472 leaq 32(%rcx),%rcx
3473 xorps %xmm3,%xmm2
3474 .Loop_enc1_15:
3475 .byte 102,15,56,220,209
3476 decl %eax
3477 movups (%rcx),%xmm1
3478 leaq 16(%rcx),%rcx
3479 jnz .Loop_enc1_15
3480 .byte 102,15,56,221,209
3481 movl %r10d,%eax
3482 movq %r11,%rcx
3483 movups %xmm2,0(%rsi)
3484 leaq 16(%rsi),%rsi
3485 subq $16,%rdx
3486 jnc .Lcbc_enc_loop
3487 addq $16,%rdx
3488 jnz .Lcbc_enc_tail
3489 pxor %xmm0,%xmm0
3490 pxor %xmm1,%xmm1
3491 movups %xmm2,(%r8)
3492 pxor %xmm2,%xmm2
3493 pxor %xmm3,%xmm3
3494 jmp .Lcbc_ret
3495
3496 .Lcbc_enc_tail:
3497 movq %rdx,%rcx
3498 xchgq %rdi,%rsi
3499 .long 0x9066A4F3
3500 movl $16,%ecx
3501 subq %rdx,%rcx
3502 xorl %eax,%eax
3503 .long 0x9066AAF3
3504 leaq -16(%rdi),%rdi
3505 movl %r10d,%eax
3506 movq %rdi,%rsi
3507 movq %r11,%rcx
3508 xorq %rdx,%rdx
3509 jmp .Lcbc_enc_loop
3510
3511 .align 16
3512 .Lcbc_decrypt:
3513 cmpq $16,%rdx
3514 jne .Lcbc_decrypt_bulk
3515
3516
3517
3518 movdqu (%rdi),%xmm2
3519 movdqu (%r8),%xmm3
3520 movdqa %xmm2,%xmm4
3521 movups (%rcx),%xmm0
3522 movups 16(%rcx),%xmm1
3523 leaq 32(%rcx),%rcx
3524 xorps %xmm0,%xmm2
3525 .Loop_dec1_16:
3526 .byte 102,15,56,222,209
3527 decl %r10d
3528 movups (%rcx),%xmm1
3529 leaq 16(%rcx),%rcx
3530 jnz .Loop_dec1_16
3531 .byte 102,15,56,223,209
3532 pxor %xmm0,%xmm0
3533 pxor %xmm1,%xmm1
3534 movdqu %xmm4,(%r8)
3535 xorps %xmm3,%xmm2
3536 pxor %xmm3,%xmm3
3537 movups %xmm2,(%rsi)
3538 pxor %xmm2,%xmm2
3539 jmp .Lcbc_ret
3540 .align 16
3541 .Lcbc_decrypt_bulk:
3542 leaq (%rsp),%r11
3543 .cfi_def_cfa_register %r11
3544 pushq %rbp
3545 .cfi_offset %rbp,-16
3546 subq $16,%rsp
3547 andq $-16,%rsp
3548 movq %rcx,%rbp
3549 movups (%r8),%xmm10
3550 movl %r10d,%eax
3551 cmpq $0x50,%rdx
3552 jbe .Lcbc_dec_tail
3553
3554 movups (%rcx),%xmm0
3555 movdqu 0(%rdi),%xmm2
3556 movdqu 16(%rdi),%xmm3
3557 movdqa %xmm2,%xmm11
3558 movdqu 32(%rdi),%xmm4
3559 movdqa %xmm3,%xmm12
3560 movdqu 48(%rdi),%xmm5
3561 movdqa %xmm4,%xmm13
3562 movdqu 64(%rdi),%xmm6
3563 movdqa %xmm5,%xmm14
3564 movdqu 80(%rdi),%xmm7
3565 movdqa %xmm6,%xmm15
3566 movl OPENSSL_ia32cap_P+4(%rip),%r9d
3567 cmpq $0x70,%rdx
3568 jbe .Lcbc_dec_six_or_seven
3569
3570 andl $71303168,%r9d
3571 subq $0x50,%rdx
3572 cmpl $4194304,%r9d
3573 je .Lcbc_dec_loop6_enter
3574 subq $0x20,%rdx
3575 leaq 112(%rcx),%rcx
3576 jmp .Lcbc_dec_loop8_enter
3577 .align 16
3578 .Lcbc_dec_loop8:
3579 movups %xmm9,(%rsi)
3580 leaq 16(%rsi),%rsi
3581 .Lcbc_dec_loop8_enter:
3582 movdqu 96(%rdi),%xmm8
3583 pxor %xmm0,%xmm2
3584 movdqu 112(%rdi),%xmm9
3585 pxor %xmm0,%xmm3
3586 movups 16-112(%rcx),%xmm1
3587 pxor %xmm0,%xmm4
3588 movq $-1,%rbp
3589 cmpq $0x70,%rdx
3590 pxor %xmm0,%xmm5
3591 pxor %xmm0,%xmm6
3592 pxor %xmm0,%xmm7
3593 pxor %xmm0,%xmm8
3594
3595 .byte 102,15,56,222,209
3596 pxor %xmm0,%xmm9
3597 movups 32-112(%rcx),%xmm0
3598 .byte 102,15,56,222,217
3599 .byte 102,15,56,222,225
3600 .byte 102,15,56,222,233
3601 .byte 102,15,56,222,241
3602 .byte 102,15,56,222,249
3603 .byte 102,68,15,56,222,193
3604 adcq $0,%rbp
3605 andq $128,%rbp
3606 .byte 102,68,15,56,222,201
3607 addq %rdi,%rbp
3608 movups 48-112(%rcx),%xmm1
3609 .byte 102,15,56,222,208
3610 .byte 102,15,56,222,216
3611 .byte 102,15,56,222,224
3612 .byte 102,15,56,222,232
3613 .byte 102,15,56,222,240
3614 .byte 102,15,56,222,248
3615 .byte 102,68,15,56,222,192
3616 .byte 102,68,15,56,222,200
3617 movups 64-112(%rcx),%xmm0
3618 nop
3619 .byte 102,15,56,222,209
3620 .byte 102,15,56,222,217
3621 .byte 102,15,56,222,225
3622 .byte 102,15,56,222,233
3623 .byte 102,15,56,222,241
3624 .byte 102,15,56,222,249
3625 .byte 102,68,15,56,222,193
3626 .byte 102,68,15,56,222,201
3627 movups 80-112(%rcx),%xmm1
3628 nop
3629 .byte 102,15,56,222,208
3630 .byte 102,15,56,222,216
3631 .byte 102,15,56,222,224
3632 .byte 102,15,56,222,232
3633 .byte 102,15,56,222,240
3634 .byte 102,15,56,222,248
3635 .byte 102,68,15,56,222,192
3636 .byte 102,68,15,56,222,200
3637 movups 96-112(%rcx),%xmm0
3638 nop
3639 .byte 102,15,56,222,209
3640 .byte 102,15,56,222,217
3641 .byte 102,15,56,222,225
3642 .byte 102,15,56,222,233
3643 .byte 102,15,56,222,241
3644 .byte 102,15,56,222,249
3645 .byte 102,68,15,56,222,193
3646 .byte 102,68,15,56,222,201
3647 movups 112-112(%rcx),%xmm1
3648 nop
3649 .byte 102,15,56,222,208
3650 .byte 102,15,56,222,216
3651 .byte 102,15,56,222,224
3652 .byte 102,15,56,222,232
3653 .byte 102,15,56,222,240
3654 .byte 102,15,56,222,248
3655 .byte 102,68,15,56,222,192
3656 .byte 102,68,15,56,222,200
3657 movups 128-112(%rcx),%xmm0
3658 nop
3659 .byte 102,15,56,222,209
3660 .byte 102,15,56,222,217
3661 .byte 102,15,56,222,225
3662 .byte 102,15,56,222,233
3663 .byte 102,15,56,222,241
3664 .byte 102,15,56,222,249
3665 .byte 102,68,15,56,222,193
3666 .byte 102,68,15,56,222,201
3667 movups 144-112(%rcx),%xmm1
3668 cmpl $11,%eax
3669 .byte 102,15,56,222,208
3670 .byte 102,15,56,222,216
3671 .byte 102,15,56,222,224
3672 .byte 102,15,56,222,232
3673 .byte 102,15,56,222,240
3674 .byte 102,15,56,222,248
3675 .byte 102,68,15,56,222,192
3676 .byte 102,68,15,56,222,200
3677 movups 160-112(%rcx),%xmm0
3678 jb .Lcbc_dec_done
3679 .byte 102,15,56,222,209
3680 .byte 102,15,56,222,217
3681 .byte 102,15,56,222,225
3682 .byte 102,15,56,222,233
3683 .byte 102,15,56,222,241
3684 .byte 102,15,56,222,249
3685 .byte 102,68,15,56,222,193
3686 .byte 102,68,15,56,222,201
3687 movups 176-112(%rcx),%xmm1
3688 nop
3689 .byte 102,15,56,222,208
3690 .byte 102,15,56,222,216
3691 .byte 102,15,56,222,224
3692 .byte 102,15,56,222,232
3693 .byte 102,15,56,222,240
3694 .byte 102,15,56,222,248
3695 .byte 102,68,15,56,222,192
3696 .byte 102,68,15,56,222,200
3697 movups 192-112(%rcx),%xmm0
3698 je .Lcbc_dec_done
3699 .byte 102,15,56,222,209
3700 .byte 102,15,56,222,217
3701 .byte 102,15,56,222,225
3702 .byte 102,15,56,222,233
3703 .byte 102,15,56,222,241
3704 .byte 102,15,56,222,249
3705 .byte 102,68,15,56,222,193
3706 .byte 102,68,15,56,222,201
3707 movups 208-112(%rcx),%xmm1
3708 nop
3709 .byte 102,15,56,222,208
3710 .byte 102,15,56,222,216
3711 .byte 102,15,56,222,224
3712 .byte 102,15,56,222,232
3713 .byte 102,15,56,222,240
3714 .byte 102,15,56,222,248
3715 .byte 102,68,15,56,222,192
3716 .byte 102,68,15,56,222,200
3717 movups 224-112(%rcx),%xmm0
3718 jmp .Lcbc_dec_done
3719 .align 16
3720 .Lcbc_dec_done:
3721 .byte 102,15,56,222,209
3722 .byte 102,15,56,222,217
3723 pxor %xmm0,%xmm10
3724 pxor %xmm0,%xmm11
3725 .byte 102,15,56,222,225
3726 .byte 102,15,56,222,233
3727 pxor %xmm0,%xmm12
3728 pxor %xmm0,%xmm13
3729 .byte 102,15,56,222,241
3730 .byte 102,15,56,222,249
3731 pxor %xmm0,%xmm14
3732 pxor %xmm0,%xmm15
3733 .byte 102,68,15,56,222,193
3734 .byte 102,68,15,56,222,201
3735 movdqu 80(%rdi),%xmm1
3736
3737 .byte 102,65,15,56,223,210
3738 movdqu 96(%rdi),%xmm10
3739 pxor %xmm0,%xmm1
3740 .byte 102,65,15,56,223,219
3741 pxor %xmm0,%xmm10
3742 movdqu 112(%rdi),%xmm0
3743 .byte 102,65,15,56,223,228
3744 leaq 128(%rdi),%rdi
3745 movdqu 0(%rbp),%xmm11
3746 .byte 102,65,15,56,223,237
3747 .byte 102,65,15,56,223,246
3748 movdqu 16(%rbp),%xmm12
3749 movdqu 32(%rbp),%xmm13
3750 .byte 102,65,15,56,223,255
3751 .byte 102,68,15,56,223,193
3752 movdqu 48(%rbp),%xmm14
3753 movdqu 64(%rbp),%xmm15
3754 .byte 102,69,15,56,223,202
3755 movdqa %xmm0,%xmm10
3756 movdqu 80(%rbp),%xmm1
3757 movups -112(%rcx),%xmm0
3758
3759 movups %xmm2,(%rsi)
3760 movdqa %xmm11,%xmm2
3761 movups %xmm3,16(%rsi)
3762 movdqa %xmm12,%xmm3
3763 movups %xmm4,32(%rsi)
3764 movdqa %xmm13,%xmm4
3765 movups %xmm5,48(%rsi)
3766 movdqa %xmm14,%xmm5
3767 movups %xmm6,64(%rsi)
3768 movdqa %xmm15,%xmm6
3769 movups %xmm7,80(%rsi)
3770 movdqa %xmm1,%xmm7
3771 movups %xmm8,96(%rsi)
3772 leaq 112(%rsi),%rsi
3773
3774 subq $0x80,%rdx
3775 ja .Lcbc_dec_loop8
3776
3777 movaps %xmm9,%xmm2
3778 leaq -112(%rcx),%rcx
3779 addq $0x70,%rdx
3780 jle .Lcbc_dec_clear_tail_collected
3781 movups %xmm9,(%rsi)
3782 leaq 16(%rsi),%rsi
3783 cmpq $0x50,%rdx
3784 jbe .Lcbc_dec_tail
3785
3786 movaps %xmm11,%xmm2
3787 .Lcbc_dec_six_or_seven:
3788 cmpq $0x60,%rdx
3789 ja .Lcbc_dec_seven
3790
3791 movaps %xmm7,%xmm8
3792 call _aesni_decrypt6
3793 pxor %xmm10,%xmm2
3794 movaps %xmm8,%xmm10
3795 pxor %xmm11,%xmm3
3796 movdqu %xmm2,(%rsi)
3797 pxor %xmm12,%xmm4
3798 movdqu %xmm3,16(%rsi)
3799 pxor %xmm3,%xmm3
3800 pxor %xmm13,%xmm5
3801 movdqu %xmm4,32(%rsi)
3802 pxor %xmm4,%xmm4
3803 pxor %xmm14,%xmm6
3804 movdqu %xmm5,48(%rsi)
3805 pxor %xmm5,%xmm5
3806 pxor %xmm15,%xmm7
3807 movdqu %xmm6,64(%rsi)
3808 pxor %xmm6,%xmm6
3809 leaq 80(%rsi),%rsi
3810 movdqa %xmm7,%xmm2
3811 pxor %xmm7,%xmm7
3812 jmp .Lcbc_dec_tail_collected
3813
3814 .align 16
3815 .Lcbc_dec_seven:
3816 movups 96(%rdi),%xmm8
3817 xorps %xmm9,%xmm9
3818 call _aesni_decrypt8
3819 movups 80(%rdi),%xmm9
3820 pxor %xmm10,%xmm2
3821 movups 96(%rdi),%xmm10
3822 pxor %xmm11,%xmm3
3823 movdqu %xmm2,(%rsi)
3824 pxor %xmm12,%xmm4
3825 movdqu %xmm3,16(%rsi)
3826 pxor %xmm3,%xmm3
3827 pxor %xmm13,%xmm5
3828 movdqu %xmm4,32(%rsi)
3829 pxor %xmm4,%xmm4
3830 pxor %xmm14,%xmm6
3831 movdqu %xmm5,48(%rsi)
3832 pxor %xmm5,%xmm5
3833 pxor %xmm15,%xmm7
3834 movdqu %xmm6,64(%rsi)
3835 pxor %xmm6,%xmm6
3836 pxor %xmm9,%xmm8
3837 movdqu %xmm7,80(%rsi)
3838 pxor %xmm7,%xmm7
3839 leaq 96(%rsi),%rsi
3840 movdqa %xmm8,%xmm2
3841 pxor %xmm8,%xmm8
3842 pxor %xmm9,%xmm9
3843 jmp .Lcbc_dec_tail_collected
3844
3845 .align 16
3846 .Lcbc_dec_loop6:
3847 movups %xmm7,(%rsi)
3848 leaq 16(%rsi),%rsi
3849 movdqu 0(%rdi),%xmm2
3850 movdqu 16(%rdi),%xmm3
3851 movdqa %xmm2,%xmm11
3852 movdqu 32(%rdi),%xmm4
3853 movdqa %xmm3,%xmm12
3854 movdqu 48(%rdi),%xmm5
3855 movdqa %xmm4,%xmm13
3856 movdqu 64(%rdi),%xmm6
3857 movdqa %xmm5,%xmm14
3858 movdqu 80(%rdi),%xmm7
3859 movdqa %xmm6,%xmm15
3860 .Lcbc_dec_loop6_enter:
3861 leaq 96(%rdi),%rdi
3862 movdqa %xmm7,%xmm8
3863
3864 call _aesni_decrypt6
3865
3866 pxor %xmm10,%xmm2
3867 movdqa %xmm8,%xmm10
3868 pxor %xmm11,%xmm3
3869 movdqu %xmm2,(%rsi)
3870 pxor %xmm12,%xmm4
3871 movdqu %xmm3,16(%rsi)
3872 pxor %xmm13,%xmm5
3873 movdqu %xmm4,32(%rsi)
3874 pxor %xmm14,%xmm6
3875 movq %rbp,%rcx
3876 movdqu %xmm5,48(%rsi)
3877 pxor %xmm15,%xmm7
3878 movl %r10d,%eax
3879 movdqu %xmm6,64(%rsi)
3880 leaq 80(%rsi),%rsi
3881 subq $0x60,%rdx
3882 ja .Lcbc_dec_loop6
3883
3884 movdqa %xmm7,%xmm2
3885 addq $0x50,%rdx
3886 jle .Lcbc_dec_clear_tail_collected
3887 movups %xmm7,(%rsi)
3888 leaq 16(%rsi),%rsi
3889
3890 .Lcbc_dec_tail:
3891 movups (%rdi),%xmm2
3892 subq $0x10,%rdx
3893 jbe .Lcbc_dec_one
3894
3895 movups 16(%rdi),%xmm3
3896 movaps %xmm2,%xmm11
3897 subq $0x10,%rdx
3898 jbe .Lcbc_dec_two
3899
3900 movups 32(%rdi),%xmm4
3901 movaps %xmm3,%xmm12
3902 subq $0x10,%rdx
3903 jbe .Lcbc_dec_three
3904
3905 movups 48(%rdi),%xmm5
3906 movaps %xmm4,%xmm13
3907 subq $0x10,%rdx
3908 jbe .Lcbc_dec_four
3909
3910 movups 64(%rdi),%xmm6
3911 movaps %xmm5,%xmm14
3912 movaps %xmm6,%xmm15
3913 xorps %xmm7,%xmm7
3914 call _aesni_decrypt6
3915 pxor %xmm10,%xmm2
3916 movaps %xmm15,%xmm10
3917 pxor %xmm11,%xmm3
3918 movdqu %xmm2,(%rsi)
3919 pxor %xmm12,%xmm4
3920 movdqu %xmm3,16(%rsi)
3921 pxor %xmm3,%xmm3
3922 pxor %xmm13,%xmm5
3923 movdqu %xmm4,32(%rsi)
3924 pxor %xmm4,%xmm4
3925 pxor %xmm14,%xmm6
3926 movdqu %xmm5,48(%rsi)
3927 pxor %xmm5,%xmm5
3928 leaq 64(%rsi),%rsi
3929 movdqa %xmm6,%xmm2
3930 pxor %xmm6,%xmm6
3931 pxor %xmm7,%xmm7
3932 subq $0x10,%rdx
3933 jmp .Lcbc_dec_tail_collected
3934
3935 .align 16
3936 .Lcbc_dec_one:
3937 movaps %xmm2,%xmm11
3938 movups (%rcx),%xmm0
3939 movups 16(%rcx),%xmm1
3940 leaq 32(%rcx),%rcx
3941 xorps %xmm0,%xmm2
3942 .Loop_dec1_17:
3943 .byte 102,15,56,222,209
3944 decl %eax
3945 movups (%rcx),%xmm1
3946 leaq 16(%rcx),%rcx
3947 jnz .Loop_dec1_17
3948 .byte 102,15,56,223,209
3949 xorps %xmm10,%xmm2
3950 movaps %xmm11,%xmm10
3951 jmp .Lcbc_dec_tail_collected
3952 .align 16
3953 .Lcbc_dec_two:
3954 movaps %xmm3,%xmm12
3955 call _aesni_decrypt2
3956 pxor %xmm10,%xmm2
3957 movaps %xmm12,%xmm10
3958 pxor %xmm11,%xmm3
3959 movdqu %xmm2,(%rsi)
3960 movdqa %xmm3,%xmm2
3961 pxor %xmm3,%xmm3
3962 leaq 16(%rsi),%rsi
3963 jmp .Lcbc_dec_tail_collected
3964 .align 16
3965 .Lcbc_dec_three:
3966 movaps %xmm4,%xmm13
3967 call _aesni_decrypt3
3968 pxor %xmm10,%xmm2
3969 movaps %xmm13,%xmm10
3970 pxor %xmm11,%xmm3
3971 movdqu %xmm2,(%rsi)
3972 pxor %xmm12,%xmm4
3973 movdqu %xmm3,16(%rsi)
3974 pxor %xmm3,%xmm3
3975 movdqa %xmm4,%xmm2
3976 pxor %xmm4,%xmm4
3977 leaq 32(%rsi),%rsi
3978 jmp .Lcbc_dec_tail_collected
3979 .align 16
3980 .Lcbc_dec_four:
3981 movaps %xmm5,%xmm14
3982 call _aesni_decrypt4
3983 pxor %xmm10,%xmm2
3984 movaps %xmm14,%xmm10
3985 pxor %xmm11,%xmm3
3986 movdqu %xmm2,(%rsi)
3987 pxor %xmm12,%xmm4
3988 movdqu %xmm3,16(%rsi)
3989 pxor %xmm3,%xmm3
3990 pxor %xmm13,%xmm5
3991 movdqu %xmm4,32(%rsi)
3992 pxor %xmm4,%xmm4
3993 movdqa %xmm5,%xmm2
3994 pxor %xmm5,%xmm5
3995 leaq 48(%rsi),%rsi
3996 jmp .Lcbc_dec_tail_collected
3997
3998 .align 16
3999 .Lcbc_dec_clear_tail_collected:
4000 pxor %xmm3,%xmm3
4001 pxor %xmm4,%xmm4
4002 pxor %xmm5,%xmm5
4003 pxor %xmm6,%xmm6
4004 pxor %xmm7,%xmm7
4005 pxor %xmm8,%xmm8
4006 pxor %xmm9,%xmm9
4007 .Lcbc_dec_tail_collected:
4008 movups %xmm10,(%r8)
4009 andq $15,%rdx
4010 jnz .Lcbc_dec_tail_partial
4011 movups %xmm2,(%rsi)
4012 pxor %xmm2,%xmm2
4013 jmp .Lcbc_dec_ret
4014 .align 16
4015 .Lcbc_dec_tail_partial:
4016 movaps %xmm2,(%rsp)
4017 pxor %xmm2,%xmm2
4018 movq $16,%rcx
4019 movq %rsi,%rdi
4020 subq %rdx,%rcx
4021 leaq (%rsp),%rsi
4022 .long 0x9066A4F3
4023 movdqa %xmm2,(%rsp)
4024
4025 .Lcbc_dec_ret:
4026 xorps %xmm0,%xmm0
4027 pxor %xmm1,%xmm1
4028 movq -8(%r11),%rbp
4029 .cfi_restore %rbp
4030 leaq (%r11),%rsp
4031 .cfi_def_cfa_register %rsp
4032 .Lcbc_ret:
4033 .byte 0xf3,0xc3
4034 .cfi_endproc
4035 .size aesni_cbc_encrypt,.-aesni_cbc_encrypt
4036 .globl aesni_set_decrypt_key
4037 .type aesni_set_decrypt_key,@function
4038 .align 16
4039 aesni_set_decrypt_key:
4040 .cfi_startproc
4041 .byte 0x48,0x83,0xEC,0x08
4042 .cfi_adjust_cfa_offset 8
4043 call __aesni_set_encrypt_key
4044 shll $4,%esi
4045 testl %eax,%eax
4046 jnz .Ldec_key_ret
4047 leaq 16(%rdx,%rsi,1),%rdi
4048
4049 movups (%rdx),%xmm0
4050 movups (%rdi),%xmm1
4051 movups %xmm0,(%rdi)
4052 movups %xmm1,(%rdx)
4053 leaq 16(%rdx),%rdx
4054 leaq -16(%rdi),%rdi
4055
4056 .Ldec_key_inverse:
4057 movups (%rdx),%xmm0
4058 movups (%rdi),%xmm1
4059 .byte 102,15,56,219,192
4060 .byte 102,15,56,219,201
4061 leaq 16(%rdx),%rdx
4062 leaq -16(%rdi),%rdi
4063 movups %xmm0,16(%rdi)
4064 movups %xmm1,-16(%rdx)
4065 cmpq %rdx,%rdi
4066 ja .Ldec_key_inverse
4067
4068 movups (%rdx),%xmm0
4069 .byte 102,15,56,219,192
4070 pxor %xmm1,%xmm1
4071 movups %xmm0,(%rdi)
4072 pxor %xmm0,%xmm0
4073 .Ldec_key_ret:
4074 addq $8,%rsp
4075 .cfi_adjust_cfa_offset -8
4076 .byte 0xf3,0xc3
4077 .cfi_endproc
4078 .LSEH_end_set_decrypt_key:
4079 .size aesni_set_decrypt_key,.-aesni_set_decrypt_key
4080 .globl aesni_set_encrypt_key
4081 .type aesni_set_encrypt_key,@function
4082 .align 16
4083 aesni_set_encrypt_key:
4084 __aesni_set_encrypt_key:
4085 .cfi_startproc
4086 .byte 0x48,0x83,0xEC,0x08
4087 .cfi_adjust_cfa_offset 8
4088 movq $-1,%rax
4089 testq %rdi,%rdi
4090 jz .Lenc_key_ret
4091 testq %rdx,%rdx
4092 jz .Lenc_key_ret
4093
4094 movl $268437504,%r10d
4095 movups (%rdi),%xmm0
4096 xorps %xmm4,%xmm4
4097 andl OPENSSL_ia32cap_P+4(%rip),%r10d
4098 leaq 16(%rdx),%rax
4099 cmpl $256,%esi
4100 je .L14rounds
4101 cmpl $192,%esi
4102 je .L12rounds
4103 cmpl $128,%esi
4104 jne .Lbad_keybits
4105
4106 .L10rounds:
4107 movl $9,%esi
4108 cmpl $268435456,%r10d
4109 je .L10rounds_alt
4110
4111 movups %xmm0,(%rdx)
4112 .byte 102,15,58,223,200,1
4113 call .Lkey_expansion_128_cold
4114 .byte 102,15,58,223,200,2
4115 call .Lkey_expansion_128
4116 .byte 102,15,58,223,200,4
4117 call .Lkey_expansion_128
4118 .byte 102,15,58,223,200,8
4119 call .Lkey_expansion_128
4120 .byte 102,15,58,223,200,16
4121 call .Lkey_expansion_128
4122 .byte 102,15,58,223,200,32
4123 call .Lkey_expansion_128
4124 .byte 102,15,58,223,200,64
4125 call .Lkey_expansion_128
4126 .byte 102,15,58,223,200,128
4127 call .Lkey_expansion_128
4128 .byte 102,15,58,223,200,27
4129 call .Lkey_expansion_128
4130 .byte 102,15,58,223,200,54
4131 call .Lkey_expansion_128
4132 movups %xmm0,(%rax)
4133 movl %esi,80(%rax)
4134 xorl %eax,%eax
4135 jmp .Lenc_key_ret
4136
4137 .align 16
4138 .L10rounds_alt:
4139 movdqa .Lkey_rotate(%rip),%xmm5
4140 movl $8,%r10d
4141 movdqa .Lkey_rcon1(%rip),%xmm4
4142 movdqa %xmm0,%xmm2
4143 movdqu %xmm0,(%rdx)
4144 jmp .Loop_key128
4145
4146 .align 16
4147 .Loop_key128:
4148 .byte 102,15,56,0,197
4149 .byte 102,15,56,221,196
4150 pslld $1,%xmm4
4151 leaq 16(%rax),%rax
4152
4153 movdqa %xmm2,%xmm3
4154 pslldq $4,%xmm2
4155 pxor %xmm2,%xmm3
4156 pslldq $4,%xmm2
4157 pxor %xmm2,%xmm3
4158 pslldq $4,%xmm2
4159 pxor %xmm3,%xmm2
4160
4161 pxor %xmm2,%xmm0
4162 movdqu %xmm0,-16(%rax)
4163 movdqa %xmm0,%xmm2
4164
4165 decl %r10d
4166 jnz .Loop_key128
4167
4168 movdqa .Lkey_rcon1b(%rip),%xmm4
4169
4170 .byte 102,15,56,0,197
4171 .byte 102,15,56,221,196
4172 pslld $1,%xmm4
4173
4174 movdqa %xmm2,%xmm3
4175 pslldq $4,%xmm2
4176 pxor %xmm2,%xmm3
4177 pslldq $4,%xmm2
4178 pxor %xmm2,%xmm3
4179 pslldq $4,%xmm2
4180 pxor %xmm3,%xmm2
4181
4182 pxor %xmm2,%xmm0
4183 movdqu %xmm0,(%rax)
4184
4185 movdqa %xmm0,%xmm2
4186 .byte 102,15,56,0,197
4187 .byte 102,15,56,221,196
4188
4189 movdqa %xmm2,%xmm3
4190 pslldq $4,%xmm2
4191 pxor %xmm2,%xmm3
4192 pslldq $4,%xmm2
4193 pxor %xmm2,%xmm3
4194 pslldq $4,%xmm2
4195 pxor %xmm3,%xmm2
4196
4197 pxor %xmm2,%xmm0
4198 movdqu %xmm0,16(%rax)
4199
4200 movl %esi,96(%rax)
4201 xorl %eax,%eax
4202 jmp .Lenc_key_ret
4203
4204 .align 16
4205 .L12rounds:
4206 movq 16(%rdi),%xmm2
4207 movl $11,%esi
4208 cmpl $268435456,%r10d
4209 je .L12rounds_alt
4210
4211 movups %xmm0,(%rdx)
4212 .byte 102,15,58,223,202,1
4213 call .Lkey_expansion_192a_cold
4214 .byte 102,15,58,223,202,2
4215 call .Lkey_expansion_192b
4216 .byte 102,15,58,223,202,4
4217 call .Lkey_expansion_192a
4218 .byte 102,15,58,223,202,8
4219 call .Lkey_expansion_192b
4220 .byte 102,15,58,223,202,16
4221 call .Lkey_expansion_192a
4222 .byte 102,15,58,223,202,32
4223 call .Lkey_expansion_192b
4224 .byte 102,15,58,223,202,64
4225 call .Lkey_expansion_192a
4226 .byte 102,15,58,223,202,128
4227 call .Lkey_expansion_192b
4228 movups %xmm0,(%rax)
4229 movl %esi,48(%rax)
4230 xorq %rax,%rax
4231 jmp .Lenc_key_ret
4232
4233 .align 16
4234 .L12rounds_alt:
4235 movdqa .Lkey_rotate192(%rip),%xmm5
4236 movdqa .Lkey_rcon1(%rip),%xmm4
4237 movl $8,%r10d
4238 movdqu %xmm0,(%rdx)
4239 jmp .Loop_key192
4240
4241 .align 16
4242 .Loop_key192:
4243 movq %xmm2,0(%rax)
4244 movdqa %xmm2,%xmm1
4245 .byte 102,15,56,0,213
4246 .byte 102,15,56,221,212
4247 pslld $1,%xmm4
4248 leaq 24(%rax),%rax
4249
4250 movdqa %xmm0,%xmm3
4251 pslldq $4,%xmm0
4252 pxor %xmm0,%xmm3
4253 pslldq $4,%xmm0
4254 pxor %xmm0,%xmm3
4255 pslldq $4,%xmm0
4256 pxor %xmm3,%xmm0
4257
4258 pshufd $0xff,%xmm0,%xmm3
4259 pxor %xmm1,%xmm3
4260 pslldq $4,%xmm1
4261 pxor %xmm1,%xmm3
4262
4263 pxor %xmm2,%xmm0
4264 pxor %xmm3,%xmm2
4265 movdqu %xmm0,-16(%rax)
4266
4267 decl %r10d
4268 jnz .Loop_key192
4269
4270 movl %esi,32(%rax)
4271 xorl %eax,%eax
4272 jmp .Lenc_key_ret
4273
4274 .align 16
4275 .L14rounds:
4276 movups 16(%rdi),%xmm2
4277 movl $13,%esi
4278 leaq 16(%rax),%rax
4279 cmpl $268435456,%r10d
4280 je .L14rounds_alt
4281
4282 movups %xmm0,(%rdx)
4283 movups %xmm2,16(%rdx)
4284 .byte 102,15,58,223,202,1
4285 call .Lkey_expansion_256a_cold
4286 .byte 102,15,58,223,200,1
4287 call .Lkey_expansion_256b
4288 .byte 102,15,58,223,202,2
4289 call .Lkey_expansion_256a
4290 .byte 102,15,58,223,200,2
4291 call .Lkey_expansion_256b
4292 .byte 102,15,58,223,202,4
4293 call .Lkey_expansion_256a
4294 .byte 102,15,58,223,200,4
4295 call .Lkey_expansion_256b
4296 .byte 102,15,58,223,202,8
4297 call .Lkey_expansion_256a
4298 .byte 102,15,58,223,200,8
4299 call .Lkey_expansion_256b
4300 .byte 102,15,58,223,202,16
4301 call .Lkey_expansion_256a
4302 .byte 102,15,58,223,200,16
4303 call .Lkey_expansion_256b
4304 .byte 102,15,58,223,202,32
4305 call .Lkey_expansion_256a
4306 .byte 102,15,58,223,200,32
4307 call .Lkey_expansion_256b
4308 .byte 102,15,58,223,202,64
4309 call .Lkey_expansion_256a
4310 movups %xmm0,(%rax)
4311 movl %esi,16(%rax)
4312 xorq %rax,%rax
4313 jmp .Lenc_key_ret
4314
4315 .align 16
4316 .L14rounds_alt:
4317 movdqa .Lkey_rotate(%rip),%xmm5
4318 movdqa .Lkey_rcon1(%rip),%xmm4
4319 movl $7,%r10d
4320 movdqu %xmm0,0(%rdx)
4321 movdqa %xmm2,%xmm1
4322 movdqu %xmm2,16(%rdx)
4323 jmp .Loop_key256
4324
4325 .align 16
4326 .Loop_key256:
4327 .byte 102,15,56,0,213
4328 .byte 102,15,56,221,212
4329
4330 movdqa %xmm0,%xmm3
4331 pslldq $4,%xmm0
4332 pxor %xmm0,%xmm3
4333 pslldq $4,%xmm0
4334 pxor %xmm0,%xmm3
4335 pslldq $4,%xmm0
4336 pxor %xmm3,%xmm0
4337 pslld $1,%xmm4
4338
4339 pxor %xmm2,%xmm0
4340 movdqu %xmm0,(%rax)
4341
4342 decl %r10d
4343 jz .Ldone_key256
4344
4345 pshufd $0xff,%xmm0,%xmm2
4346 pxor %xmm3,%xmm3
4347 .byte 102,15,56,221,211
4348
4349 movdqa %xmm1,%xmm3
4350 pslldq $4,%xmm1
4351 pxor %xmm1,%xmm3
4352 pslldq $4,%xmm1
4353 pxor %xmm1,%xmm3
4354 pslldq $4,%xmm1
4355 pxor %xmm3,%xmm1
4356
4357 pxor %xmm1,%xmm2
4358 movdqu %xmm2,16(%rax)
4359 leaq 32(%rax),%rax
4360 movdqa %xmm2,%xmm1
4361
4362 jmp .Loop_key256
4363
4364 .Ldone_key256:
4365 movl %esi,16(%rax)
4366 xorl %eax,%eax
4367 jmp .Lenc_key_ret
4368
4369 .align 16
4370 .Lbad_keybits:
4371 movq $-2,%rax
4372 .Lenc_key_ret:
4373 pxor %xmm0,%xmm0
4374 pxor %xmm1,%xmm1
4375 pxor %xmm2,%xmm2
4376 pxor %xmm3,%xmm3
4377 pxor %xmm4,%xmm4
4378 pxor %xmm5,%xmm5
4379 addq $8,%rsp
4380 .cfi_adjust_cfa_offset -8
4381 .byte 0xf3,0xc3
4382 .LSEH_end_set_encrypt_key:
4383
4384 .align 16
4385 .Lkey_expansion_128:
4386 movups %xmm0,(%rax)
4387 leaq 16(%rax),%rax
4388 .Lkey_expansion_128_cold:
4389 shufps $16,%xmm0,%xmm4
4390 xorps %xmm4,%xmm0
4391 shufps $140,%xmm0,%xmm4
4392 xorps %xmm4,%xmm0
4393 shufps $255,%xmm1,%xmm1
4394 xorps %xmm1,%xmm0
4395 .byte 0xf3,0xc3
4396
4397 .align 16
4398 .Lkey_expansion_192a:
4399 movups %xmm0,(%rax)
4400 leaq 16(%rax),%rax
4401 .Lkey_expansion_192a_cold:
4402 movaps %xmm2,%xmm5
4403 .Lkey_expansion_192b_warm:
4404 shufps $16,%xmm0,%xmm4
4405 movdqa %xmm2,%xmm3
4406 xorps %xmm4,%xmm0
4407 shufps $140,%xmm0,%xmm4
4408 pslldq $4,%xmm3
4409 xorps %xmm4,%xmm0
4410 pshufd $85,%xmm1,%xmm1
4411 pxor %xmm3,%xmm2
4412 pxor %xmm1,%xmm0
4413 pshufd $255,%xmm0,%xmm3
4414 pxor %xmm3,%xmm2
4415 .byte 0xf3,0xc3
4416
4417 .align 16
4418 .Lkey_expansion_192b:
4419 movaps %xmm0,%xmm3
4420 shufps $68,%xmm0,%xmm5
4421 movups %xmm5,(%rax)
4422 shufps $78,%xmm2,%xmm3
4423 movups %xmm3,16(%rax)
4424 leaq 32(%rax),%rax
4425 jmp .Lkey_expansion_192b_warm
4426
4427 .align 16
4428 .Lkey_expansion_256a:
4429 movups %xmm2,(%rax)
4430 leaq 16(%rax),%rax
4431 .Lkey_expansion_256a_cold:
4432 shufps $16,%xmm0,%xmm4
4433 xorps %xmm4,%xmm0
4434 shufps $140,%xmm0,%xmm4
4435 xorps %xmm4,%xmm0
4436 shufps $255,%xmm1,%xmm1
4437 xorps %xmm1,%xmm0
4438 .byte 0xf3,0xc3
4439
4440 .align 16
4441 .Lkey_expansion_256b:
4442 movups %xmm0,(%rax)
4443 leaq 16(%rax),%rax
4444
4445 shufps $16,%xmm2,%xmm4
4446 xorps %xmm4,%xmm2
4447 shufps $140,%xmm2,%xmm4
4448 xorps %xmm4,%xmm2
4449 shufps $170,%xmm1,%xmm1
4450 xorps %xmm1,%xmm2
4451 .byte 0xf3,0xc3
4452 .cfi_endproc
4453 .size aesni_set_encrypt_key,.-aesni_set_encrypt_key
4454 .size __aesni_set_encrypt_key,.-__aesni_set_encrypt_key
4455 .align 64
4456 .Lbswap_mask:
4457 .byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0
4458 .Lincrement32:
4459 .long 6,6,6,0
4460 .Lincrement64:
4461 .long 1,0,0,0
4462 .Lxts_magic:
4463 .long 0x87,0,1,0
4464 .Lincrement1:
4465 .byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1
4466 .Lkey_rotate:
4467 .long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d
4468 .Lkey_rotate192:
4469 .long 0x04070605,0x04070605,0x04070605,0x04070605
4470 .Lkey_rcon1:
4471 .long 1,1,1,1
4472 .Lkey_rcon1b:
4473 .long 0x1b,0x1b,0x1b,0x1b
4474
4475 .byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69,83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
4476 .align 64
Cache object: b1cba92bbfdfe16bba0dca779e48990c
|