1 /* $FreeBSD$ */
2 /* Do not modify. This file is auto-generated from rsaz-x86_64.pl. */
3 .text
4
5
6
7 .globl rsaz_512_sqr
8 .type rsaz_512_sqr,@function
9 .align 32
10 rsaz_512_sqr:
11 .cfi_startproc
12 pushq %rbx
13 .cfi_adjust_cfa_offset 8
14 .cfi_offset %rbx,-16
15 pushq %rbp
16 .cfi_adjust_cfa_offset 8
17 .cfi_offset %rbp,-24
18 pushq %r12
19 .cfi_adjust_cfa_offset 8
20 .cfi_offset %r12,-32
21 pushq %r13
22 .cfi_adjust_cfa_offset 8
23 .cfi_offset %r13,-40
24 pushq %r14
25 .cfi_adjust_cfa_offset 8
26 .cfi_offset %r14,-48
27 pushq %r15
28 .cfi_adjust_cfa_offset 8
29 .cfi_offset %r15,-56
30
31 subq $128+24,%rsp
32 .cfi_adjust_cfa_offset 128+24
33 .Lsqr_body:
34 .byte 102,72,15,110,202
35 movq (%rsi),%rdx
36 movq 8(%rsi),%rax
37 movq %rcx,128(%rsp)
38 movl $0x80100,%r11d
39 andl OPENSSL_ia32cap_P+8(%rip),%r11d
40 cmpl $0x80100,%r11d
41 je .Loop_sqrx
42 jmp .Loop_sqr
43
44 .align 32
45 .Loop_sqr:
46 movl %r8d,128+8(%rsp)
47
48 movq %rdx,%rbx
49 movq %rax,%rbp
50 mulq %rdx
51 movq %rax,%r8
52 movq 16(%rsi),%rax
53 movq %rdx,%r9
54
55 mulq %rbx
56 addq %rax,%r9
57 movq 24(%rsi),%rax
58 movq %rdx,%r10
59 adcq $0,%r10
60
61 mulq %rbx
62 addq %rax,%r10
63 movq 32(%rsi),%rax
64 movq %rdx,%r11
65 adcq $0,%r11
66
67 mulq %rbx
68 addq %rax,%r11
69 movq 40(%rsi),%rax
70 movq %rdx,%r12
71 adcq $0,%r12
72
73 mulq %rbx
74 addq %rax,%r12
75 movq 48(%rsi),%rax
76 movq %rdx,%r13
77 adcq $0,%r13
78
79 mulq %rbx
80 addq %rax,%r13
81 movq 56(%rsi),%rax
82 movq %rdx,%r14
83 adcq $0,%r14
84
85 mulq %rbx
86 addq %rax,%r14
87 movq %rbx,%rax
88 adcq $0,%rdx
89
90 xorq %rcx,%rcx
91 addq %r8,%r8
92 movq %rdx,%r15
93 adcq $0,%rcx
94
95 mulq %rax
96 addq %r8,%rdx
97 adcq $0,%rcx
98
99 movq %rax,(%rsp)
100 movq %rdx,8(%rsp)
101
102
103 movq 16(%rsi),%rax
104 mulq %rbp
105 addq %rax,%r10
106 movq 24(%rsi),%rax
107 movq %rdx,%rbx
108 adcq $0,%rbx
109
110 mulq %rbp
111 addq %rax,%r11
112 movq 32(%rsi),%rax
113 adcq $0,%rdx
114 addq %rbx,%r11
115 movq %rdx,%rbx
116 adcq $0,%rbx
117
118 mulq %rbp
119 addq %rax,%r12
120 movq 40(%rsi),%rax
121 adcq $0,%rdx
122 addq %rbx,%r12
123 movq %rdx,%rbx
124 adcq $0,%rbx
125
126 mulq %rbp
127 addq %rax,%r13
128 movq 48(%rsi),%rax
129 adcq $0,%rdx
130 addq %rbx,%r13
131 movq %rdx,%rbx
132 adcq $0,%rbx
133
134 mulq %rbp
135 addq %rax,%r14
136 movq 56(%rsi),%rax
137 adcq $0,%rdx
138 addq %rbx,%r14
139 movq %rdx,%rbx
140 adcq $0,%rbx
141
142 mulq %rbp
143 addq %rax,%r15
144 movq %rbp,%rax
145 adcq $0,%rdx
146 addq %rbx,%r15
147 adcq $0,%rdx
148
149 xorq %rbx,%rbx
150 addq %r9,%r9
151 movq %rdx,%r8
152 adcq %r10,%r10
153 adcq $0,%rbx
154
155 mulq %rax
156
157 addq %rcx,%rax
158 movq 16(%rsi),%rbp
159 addq %rax,%r9
160 movq 24(%rsi),%rax
161 adcq %rdx,%r10
162 adcq $0,%rbx
163
164 movq %r9,16(%rsp)
165 movq %r10,24(%rsp)
166
167
168 mulq %rbp
169 addq %rax,%r12
170 movq 32(%rsi),%rax
171 movq %rdx,%rcx
172 adcq $0,%rcx
173
174 mulq %rbp
175 addq %rax,%r13
176 movq 40(%rsi),%rax
177 adcq $0,%rdx
178 addq %rcx,%r13
179 movq %rdx,%rcx
180 adcq $0,%rcx
181
182 mulq %rbp
183 addq %rax,%r14
184 movq 48(%rsi),%rax
185 adcq $0,%rdx
186 addq %rcx,%r14
187 movq %rdx,%rcx
188 adcq $0,%rcx
189
190 mulq %rbp
191 addq %rax,%r15
192 movq 56(%rsi),%rax
193 adcq $0,%rdx
194 addq %rcx,%r15
195 movq %rdx,%rcx
196 adcq $0,%rcx
197
198 mulq %rbp
199 addq %rax,%r8
200 movq %rbp,%rax
201 adcq $0,%rdx
202 addq %rcx,%r8
203 adcq $0,%rdx
204
205 xorq %rcx,%rcx
206 addq %r11,%r11
207 movq %rdx,%r9
208 adcq %r12,%r12
209 adcq $0,%rcx
210
211 mulq %rax
212
213 addq %rbx,%rax
214 movq 24(%rsi),%r10
215 addq %rax,%r11
216 movq 32(%rsi),%rax
217 adcq %rdx,%r12
218 adcq $0,%rcx
219
220 movq %r11,32(%rsp)
221 movq %r12,40(%rsp)
222
223
224 movq %rax,%r11
225 mulq %r10
226 addq %rax,%r14
227 movq 40(%rsi),%rax
228 movq %rdx,%rbx
229 adcq $0,%rbx
230
231 movq %rax,%r12
232 mulq %r10
233 addq %rax,%r15
234 movq 48(%rsi),%rax
235 adcq $0,%rdx
236 addq %rbx,%r15
237 movq %rdx,%rbx
238 adcq $0,%rbx
239
240 movq %rax,%rbp
241 mulq %r10
242 addq %rax,%r8
243 movq 56(%rsi),%rax
244 adcq $0,%rdx
245 addq %rbx,%r8
246 movq %rdx,%rbx
247 adcq $0,%rbx
248
249 mulq %r10
250 addq %rax,%r9
251 movq %r10,%rax
252 adcq $0,%rdx
253 addq %rbx,%r9
254 adcq $0,%rdx
255
256 xorq %rbx,%rbx
257 addq %r13,%r13
258 movq %rdx,%r10
259 adcq %r14,%r14
260 adcq $0,%rbx
261
262 mulq %rax
263
264 addq %rcx,%rax
265 addq %rax,%r13
266 movq %r12,%rax
267 adcq %rdx,%r14
268 adcq $0,%rbx
269
270 movq %r13,48(%rsp)
271 movq %r14,56(%rsp)
272
273
274 mulq %r11
275 addq %rax,%r8
276 movq %rbp,%rax
277 movq %rdx,%rcx
278 adcq $0,%rcx
279
280 mulq %r11
281 addq %rax,%r9
282 movq 56(%rsi),%rax
283 adcq $0,%rdx
284 addq %rcx,%r9
285 movq %rdx,%rcx
286 adcq $0,%rcx
287
288 movq %rax,%r14
289 mulq %r11
290 addq %rax,%r10
291 movq %r11,%rax
292 adcq $0,%rdx
293 addq %rcx,%r10
294 adcq $0,%rdx
295
296 xorq %rcx,%rcx
297 addq %r15,%r15
298 movq %rdx,%r11
299 adcq %r8,%r8
300 adcq $0,%rcx
301
302 mulq %rax
303
304 addq %rbx,%rax
305 addq %rax,%r15
306 movq %rbp,%rax
307 adcq %rdx,%r8
308 adcq $0,%rcx
309
310 movq %r15,64(%rsp)
311 movq %r8,72(%rsp)
312
313
314 mulq %r12
315 addq %rax,%r10
316 movq %r14,%rax
317 movq %rdx,%rbx
318 adcq $0,%rbx
319
320 mulq %r12
321 addq %rax,%r11
322 movq %r12,%rax
323 adcq $0,%rdx
324 addq %rbx,%r11
325 adcq $0,%rdx
326
327 xorq %rbx,%rbx
328 addq %r9,%r9
329 movq %rdx,%r12
330 adcq %r10,%r10
331 adcq $0,%rbx
332
333 mulq %rax
334
335 addq %rcx,%rax
336 addq %rax,%r9
337 movq %r14,%rax
338 adcq %rdx,%r10
339 adcq $0,%rbx
340
341 movq %r9,80(%rsp)
342 movq %r10,88(%rsp)
343
344
345 mulq %rbp
346 addq %rax,%r12
347 movq %rbp,%rax
348 adcq $0,%rdx
349
350 xorq %rcx,%rcx
351 addq %r11,%r11
352 movq %rdx,%r13
353 adcq %r12,%r12
354 adcq $0,%rcx
355
356 mulq %rax
357
358 addq %rbx,%rax
359 addq %rax,%r11
360 movq %r14,%rax
361 adcq %rdx,%r12
362 adcq $0,%rcx
363
364 movq %r11,96(%rsp)
365 movq %r12,104(%rsp)
366
367
368 xorq %rbx,%rbx
369 addq %r13,%r13
370 adcq $0,%rbx
371
372 mulq %rax
373
374 addq %rcx,%rax
375 addq %r13,%rax
376 adcq %rbx,%rdx
377
378 movq (%rsp),%r8
379 movq 8(%rsp),%r9
380 movq 16(%rsp),%r10
381 movq 24(%rsp),%r11
382 movq 32(%rsp),%r12
383 movq 40(%rsp),%r13
384 movq 48(%rsp),%r14
385 movq 56(%rsp),%r15
386 .byte 102,72,15,126,205
387
388 movq %rax,112(%rsp)
389 movq %rdx,120(%rsp)
390
391 call __rsaz_512_reduce
392
393 addq 64(%rsp),%r8
394 adcq 72(%rsp),%r9
395 adcq 80(%rsp),%r10
396 adcq 88(%rsp),%r11
397 adcq 96(%rsp),%r12
398 adcq 104(%rsp),%r13
399 adcq 112(%rsp),%r14
400 adcq 120(%rsp),%r15
401 sbbq %rcx,%rcx
402
403 call __rsaz_512_subtract
404
405 movq %r8,%rdx
406 movq %r9,%rax
407 movl 128+8(%rsp),%r8d
408 movq %rdi,%rsi
409
410 decl %r8d
411 jnz .Loop_sqr
412 jmp .Lsqr_tail
413
414 .align 32
415 .Loop_sqrx:
416 movl %r8d,128+8(%rsp)
417 .byte 102,72,15,110,199
418
419 mulxq %rax,%r8,%r9
420 movq %rax,%rbx
421
422 mulxq 16(%rsi),%rcx,%r10
423 xorq %rbp,%rbp
424
425 mulxq 24(%rsi),%rax,%r11
426 adcxq %rcx,%r9
427
428 .byte 0xc4,0x62,0xf3,0xf6,0xa6,0x20,0x00,0x00,0x00
429 adcxq %rax,%r10
430
431 .byte 0xc4,0x62,0xfb,0xf6,0xae,0x28,0x00,0x00,0x00
432 adcxq %rcx,%r11
433
434 mulxq 48(%rsi),%rcx,%r14
435 adcxq %rax,%r12
436 adcxq %rcx,%r13
437
438 mulxq 56(%rsi),%rax,%r15
439 adcxq %rax,%r14
440 adcxq %rbp,%r15
441
442 mulxq %rdx,%rax,%rdi
443 movq %rbx,%rdx
444 xorq %rcx,%rcx
445 adoxq %r8,%r8
446 adcxq %rdi,%r8
447 adoxq %rbp,%rcx
448 adcxq %rbp,%rcx
449
450 movq %rax,(%rsp)
451 movq %r8,8(%rsp)
452
453
454 .byte 0xc4,0xe2,0xfb,0xf6,0x9e,0x10,0x00,0x00,0x00
455 adoxq %rax,%r10
456 adcxq %rbx,%r11
457
458 mulxq 24(%rsi),%rdi,%r8
459 adoxq %rdi,%r11
460 .byte 0x66
461 adcxq %r8,%r12
462
463 mulxq 32(%rsi),%rax,%rbx
464 adoxq %rax,%r12
465 adcxq %rbx,%r13
466
467 mulxq 40(%rsi),%rdi,%r8
468 adoxq %rdi,%r13
469 adcxq %r8,%r14
470
471 .byte 0xc4,0xe2,0xfb,0xf6,0x9e,0x30,0x00,0x00,0x00
472 adoxq %rax,%r14
473 adcxq %rbx,%r15
474
475 .byte 0xc4,0x62,0xc3,0xf6,0x86,0x38,0x00,0x00,0x00
476 adoxq %rdi,%r15
477 adcxq %rbp,%r8
478 mulxq %rdx,%rax,%rdi
479 adoxq %rbp,%r8
480 .byte 0x48,0x8b,0x96,0x10,0x00,0x00,0x00
481
482 xorq %rbx,%rbx
483 adoxq %r9,%r9
484
485 adcxq %rcx,%rax
486 adoxq %r10,%r10
487 adcxq %rax,%r9
488 adoxq %rbp,%rbx
489 adcxq %rdi,%r10
490 adcxq %rbp,%rbx
491
492 movq %r9,16(%rsp)
493 .byte 0x4c,0x89,0x94,0x24,0x18,0x00,0x00,0x00
494
495
496 mulxq 24(%rsi),%rdi,%r9
497 adoxq %rdi,%r12
498 adcxq %r9,%r13
499
500 mulxq 32(%rsi),%rax,%rcx
501 adoxq %rax,%r13
502 adcxq %rcx,%r14
503
504 .byte 0xc4,0x62,0xc3,0xf6,0x8e,0x28,0x00,0x00,0x00
505 adoxq %rdi,%r14
506 adcxq %r9,%r15
507
508 .byte 0xc4,0xe2,0xfb,0xf6,0x8e,0x30,0x00,0x00,0x00
509 adoxq %rax,%r15
510 adcxq %rcx,%r8
511
512 mulxq 56(%rsi),%rdi,%r9
513 adoxq %rdi,%r8
514 adcxq %rbp,%r9
515 mulxq %rdx,%rax,%rdi
516 adoxq %rbp,%r9
517 movq 24(%rsi),%rdx
518
519 xorq %rcx,%rcx
520 adoxq %r11,%r11
521
522 adcxq %rbx,%rax
523 adoxq %r12,%r12
524 adcxq %rax,%r11
525 adoxq %rbp,%rcx
526 adcxq %rdi,%r12
527 adcxq %rbp,%rcx
528
529 movq %r11,32(%rsp)
530 movq %r12,40(%rsp)
531
532
533 mulxq 32(%rsi),%rax,%rbx
534 adoxq %rax,%r14
535 adcxq %rbx,%r15
536
537 mulxq 40(%rsi),%rdi,%r10
538 adoxq %rdi,%r15
539 adcxq %r10,%r8
540
541 mulxq 48(%rsi),%rax,%rbx
542 adoxq %rax,%r8
543 adcxq %rbx,%r9
544
545 mulxq 56(%rsi),%rdi,%r10
546 adoxq %rdi,%r9
547 adcxq %rbp,%r10
548 mulxq %rdx,%rax,%rdi
549 adoxq %rbp,%r10
550 movq 32(%rsi),%rdx
551
552 xorq %rbx,%rbx
553 adoxq %r13,%r13
554
555 adcxq %rcx,%rax
556 adoxq %r14,%r14
557 adcxq %rax,%r13
558 adoxq %rbp,%rbx
559 adcxq %rdi,%r14
560 adcxq %rbp,%rbx
561
562 movq %r13,48(%rsp)
563 movq %r14,56(%rsp)
564
565
566 mulxq 40(%rsi),%rdi,%r11
567 adoxq %rdi,%r8
568 adcxq %r11,%r9
569
570 mulxq 48(%rsi),%rax,%rcx
571 adoxq %rax,%r9
572 adcxq %rcx,%r10
573
574 mulxq 56(%rsi),%rdi,%r11
575 adoxq %rdi,%r10
576 adcxq %rbp,%r11
577 mulxq %rdx,%rax,%rdi
578 movq 40(%rsi),%rdx
579 adoxq %rbp,%r11
580
581 xorq %rcx,%rcx
582 adoxq %r15,%r15
583
584 adcxq %rbx,%rax
585 adoxq %r8,%r8
586 adcxq %rax,%r15
587 adoxq %rbp,%rcx
588 adcxq %rdi,%r8
589 adcxq %rbp,%rcx
590
591 movq %r15,64(%rsp)
592 movq %r8,72(%rsp)
593
594
595 .byte 0xc4,0xe2,0xfb,0xf6,0x9e,0x30,0x00,0x00,0x00
596 adoxq %rax,%r10
597 adcxq %rbx,%r11
598
599 .byte 0xc4,0x62,0xc3,0xf6,0xa6,0x38,0x00,0x00,0x00
600 adoxq %rdi,%r11
601 adcxq %rbp,%r12
602 mulxq %rdx,%rax,%rdi
603 adoxq %rbp,%r12
604 movq 48(%rsi),%rdx
605
606 xorq %rbx,%rbx
607 adoxq %r9,%r9
608
609 adcxq %rcx,%rax
610 adoxq %r10,%r10
611 adcxq %rax,%r9
612 adcxq %rdi,%r10
613 adoxq %rbp,%rbx
614 adcxq %rbp,%rbx
615
616 movq %r9,80(%rsp)
617 movq %r10,88(%rsp)
618
619
620 .byte 0xc4,0x62,0xfb,0xf6,0xae,0x38,0x00,0x00,0x00
621 adoxq %rax,%r12
622 adoxq %rbp,%r13
623
624 mulxq %rdx,%rax,%rdi
625 xorq %rcx,%rcx
626 movq 56(%rsi),%rdx
627 adoxq %r11,%r11
628
629 adcxq %rbx,%rax
630 adoxq %r12,%r12
631 adcxq %rax,%r11
632 adoxq %rbp,%rcx
633 adcxq %rdi,%r12
634 adcxq %rbp,%rcx
635
636 .byte 0x4c,0x89,0x9c,0x24,0x60,0x00,0x00,0x00
637 .byte 0x4c,0x89,0xa4,0x24,0x68,0x00,0x00,0x00
638
639
640 mulxq %rdx,%rax,%rdx
641 xorq %rbx,%rbx
642 adoxq %r13,%r13
643
644 adcxq %rcx,%rax
645 adoxq %rbp,%rbx
646 adcxq %r13,%rax
647 adcxq %rdx,%rbx
648
649 .byte 102,72,15,126,199
650 .byte 102,72,15,126,205
651
652 movq 128(%rsp),%rdx
653 movq (%rsp),%r8
654 movq 8(%rsp),%r9
655 movq 16(%rsp),%r10
656 movq 24(%rsp),%r11
657 movq 32(%rsp),%r12
658 movq 40(%rsp),%r13
659 movq 48(%rsp),%r14
660 movq 56(%rsp),%r15
661
662 movq %rax,112(%rsp)
663 movq %rbx,120(%rsp)
664
665 call __rsaz_512_reducex
666
667 addq 64(%rsp),%r8
668 adcq 72(%rsp),%r9
669 adcq 80(%rsp),%r10
670 adcq 88(%rsp),%r11
671 adcq 96(%rsp),%r12
672 adcq 104(%rsp),%r13
673 adcq 112(%rsp),%r14
674 adcq 120(%rsp),%r15
675 sbbq %rcx,%rcx
676
677 call __rsaz_512_subtract
678
679 movq %r8,%rdx
680 movq %r9,%rax
681 movl 128+8(%rsp),%r8d
682 movq %rdi,%rsi
683
684 decl %r8d
685 jnz .Loop_sqrx
686
687 .Lsqr_tail:
688
689 leaq 128+24+48(%rsp),%rax
690 .cfi_def_cfa %rax,8
691 movq -48(%rax),%r15
692 .cfi_restore %r15
693 movq -40(%rax),%r14
694 .cfi_restore %r14
695 movq -32(%rax),%r13
696 .cfi_restore %r13
697 movq -24(%rax),%r12
698 .cfi_restore %r12
699 movq -16(%rax),%rbp
700 .cfi_restore %rbp
701 movq -8(%rax),%rbx
702 .cfi_restore %rbx
703 leaq (%rax),%rsp
704 .cfi_def_cfa_register %rsp
705 .Lsqr_epilogue:
706 .byte 0xf3,0xc3
707 .cfi_endproc
708 .size rsaz_512_sqr,.-rsaz_512_sqr
709 .globl rsaz_512_mul
710 .type rsaz_512_mul,@function
711 .align 32
712 rsaz_512_mul:
713 .cfi_startproc
714 pushq %rbx
715 .cfi_adjust_cfa_offset 8
716 .cfi_offset %rbx,-16
717 pushq %rbp
718 .cfi_adjust_cfa_offset 8
719 .cfi_offset %rbp,-24
720 pushq %r12
721 .cfi_adjust_cfa_offset 8
722 .cfi_offset %r12,-32
723 pushq %r13
724 .cfi_adjust_cfa_offset 8
725 .cfi_offset %r13,-40
726 pushq %r14
727 .cfi_adjust_cfa_offset 8
728 .cfi_offset %r14,-48
729 pushq %r15
730 .cfi_adjust_cfa_offset 8
731 .cfi_offset %r15,-56
732
733 subq $128+24,%rsp
734 .cfi_adjust_cfa_offset 128+24
735 .Lmul_body:
736 .byte 102,72,15,110,199
737 .byte 102,72,15,110,201
738 movq %r8,128(%rsp)
739 movl $0x80100,%r11d
740 andl OPENSSL_ia32cap_P+8(%rip),%r11d
741 cmpl $0x80100,%r11d
742 je .Lmulx
743 movq (%rdx),%rbx
744 movq %rdx,%rbp
745 call __rsaz_512_mul
746
747 .byte 102,72,15,126,199
748 .byte 102,72,15,126,205
749
750 movq (%rsp),%r8
751 movq 8(%rsp),%r9
752 movq 16(%rsp),%r10
753 movq 24(%rsp),%r11
754 movq 32(%rsp),%r12
755 movq 40(%rsp),%r13
756 movq 48(%rsp),%r14
757 movq 56(%rsp),%r15
758
759 call __rsaz_512_reduce
760 jmp .Lmul_tail
761
762 .align 32
763 .Lmulx:
764 movq %rdx,%rbp
765 movq (%rdx),%rdx
766 call __rsaz_512_mulx
767
768 .byte 102,72,15,126,199
769 .byte 102,72,15,126,205
770
771 movq 128(%rsp),%rdx
772 movq (%rsp),%r8
773 movq 8(%rsp),%r9
774 movq 16(%rsp),%r10
775 movq 24(%rsp),%r11
776 movq 32(%rsp),%r12
777 movq 40(%rsp),%r13
778 movq 48(%rsp),%r14
779 movq 56(%rsp),%r15
780
781 call __rsaz_512_reducex
782 .Lmul_tail:
783 addq 64(%rsp),%r8
784 adcq 72(%rsp),%r9
785 adcq 80(%rsp),%r10
786 adcq 88(%rsp),%r11
787 adcq 96(%rsp),%r12
788 adcq 104(%rsp),%r13
789 adcq 112(%rsp),%r14
790 adcq 120(%rsp),%r15
791 sbbq %rcx,%rcx
792
793 call __rsaz_512_subtract
794
795 leaq 128+24+48(%rsp),%rax
796 .cfi_def_cfa %rax,8
797 movq -48(%rax),%r15
798 .cfi_restore %r15
799 movq -40(%rax),%r14
800 .cfi_restore %r14
801 movq -32(%rax),%r13
802 .cfi_restore %r13
803 movq -24(%rax),%r12
804 .cfi_restore %r12
805 movq -16(%rax),%rbp
806 .cfi_restore %rbp
807 movq -8(%rax),%rbx
808 .cfi_restore %rbx
809 leaq (%rax),%rsp
810 .cfi_def_cfa_register %rsp
811 .Lmul_epilogue:
812 .byte 0xf3,0xc3
813 .cfi_endproc
814 .size rsaz_512_mul,.-rsaz_512_mul
815 .globl rsaz_512_mul_gather4
816 .type rsaz_512_mul_gather4,@function
817 .align 32
818 rsaz_512_mul_gather4:
819 .cfi_startproc
820 pushq %rbx
821 .cfi_adjust_cfa_offset 8
822 .cfi_offset %rbx,-16
823 pushq %rbp
824 .cfi_adjust_cfa_offset 8
825 .cfi_offset %rbp,-24
826 pushq %r12
827 .cfi_adjust_cfa_offset 8
828 .cfi_offset %r12,-32
829 pushq %r13
830 .cfi_adjust_cfa_offset 8
831 .cfi_offset %r13,-40
832 pushq %r14
833 .cfi_adjust_cfa_offset 8
834 .cfi_offset %r14,-48
835 pushq %r15
836 .cfi_adjust_cfa_offset 8
837 .cfi_offset %r15,-56
838
839 subq $152,%rsp
840 .cfi_adjust_cfa_offset 152
841 .Lmul_gather4_body:
842 movd %r9d,%xmm8
843 movdqa .Linc+16(%rip),%xmm1
844 movdqa .Linc(%rip),%xmm0
845
846 pshufd $0,%xmm8,%xmm8
847 movdqa %xmm1,%xmm7
848 movdqa %xmm1,%xmm2
849 paddd %xmm0,%xmm1
850 pcmpeqd %xmm8,%xmm0
851 movdqa %xmm7,%xmm3
852 paddd %xmm1,%xmm2
853 pcmpeqd %xmm8,%xmm1
854 movdqa %xmm7,%xmm4
855 paddd %xmm2,%xmm3
856 pcmpeqd %xmm8,%xmm2
857 movdqa %xmm7,%xmm5
858 paddd %xmm3,%xmm4
859 pcmpeqd %xmm8,%xmm3
860 movdqa %xmm7,%xmm6
861 paddd %xmm4,%xmm5
862 pcmpeqd %xmm8,%xmm4
863 paddd %xmm5,%xmm6
864 pcmpeqd %xmm8,%xmm5
865 paddd %xmm6,%xmm7
866 pcmpeqd %xmm8,%xmm6
867 pcmpeqd %xmm8,%xmm7
868
869 movdqa 0(%rdx),%xmm8
870 movdqa 16(%rdx),%xmm9
871 movdqa 32(%rdx),%xmm10
872 movdqa 48(%rdx),%xmm11
873 pand %xmm0,%xmm8
874 movdqa 64(%rdx),%xmm12
875 pand %xmm1,%xmm9
876 movdqa 80(%rdx),%xmm13
877 pand %xmm2,%xmm10
878 movdqa 96(%rdx),%xmm14
879 pand %xmm3,%xmm11
880 movdqa 112(%rdx),%xmm15
881 leaq 128(%rdx),%rbp
882 pand %xmm4,%xmm12
883 pand %xmm5,%xmm13
884 pand %xmm6,%xmm14
885 pand %xmm7,%xmm15
886 por %xmm10,%xmm8
887 por %xmm11,%xmm9
888 por %xmm12,%xmm8
889 por %xmm13,%xmm9
890 por %xmm14,%xmm8
891 por %xmm15,%xmm9
892
893 por %xmm9,%xmm8
894 pshufd $0x4e,%xmm8,%xmm9
895 por %xmm9,%xmm8
896 movl $0x80100,%r11d
897 andl OPENSSL_ia32cap_P+8(%rip),%r11d
898 cmpl $0x80100,%r11d
899 je .Lmulx_gather
900 .byte 102,76,15,126,195
901
902 movq %r8,128(%rsp)
903 movq %rdi,128+8(%rsp)
904 movq %rcx,128+16(%rsp)
905
906 movq (%rsi),%rax
907 movq 8(%rsi),%rcx
908 mulq %rbx
909 movq %rax,(%rsp)
910 movq %rcx,%rax
911 movq %rdx,%r8
912
913 mulq %rbx
914 addq %rax,%r8
915 movq 16(%rsi),%rax
916 movq %rdx,%r9
917 adcq $0,%r9
918
919 mulq %rbx
920 addq %rax,%r9
921 movq 24(%rsi),%rax
922 movq %rdx,%r10
923 adcq $0,%r10
924
925 mulq %rbx
926 addq %rax,%r10
927 movq 32(%rsi),%rax
928 movq %rdx,%r11
929 adcq $0,%r11
930
931 mulq %rbx
932 addq %rax,%r11
933 movq 40(%rsi),%rax
934 movq %rdx,%r12
935 adcq $0,%r12
936
937 mulq %rbx
938 addq %rax,%r12
939 movq 48(%rsi),%rax
940 movq %rdx,%r13
941 adcq $0,%r13
942
943 mulq %rbx
944 addq %rax,%r13
945 movq 56(%rsi),%rax
946 movq %rdx,%r14
947 adcq $0,%r14
948
949 mulq %rbx
950 addq %rax,%r14
951 movq (%rsi),%rax
952 movq %rdx,%r15
953 adcq $0,%r15
954
955 leaq 8(%rsp),%rdi
956 movl $7,%ecx
957 jmp .Loop_mul_gather
958
959 .align 32
960 .Loop_mul_gather:
961 movdqa 0(%rbp),%xmm8
962 movdqa 16(%rbp),%xmm9
963 movdqa 32(%rbp),%xmm10
964 movdqa 48(%rbp),%xmm11
965 pand %xmm0,%xmm8
966 movdqa 64(%rbp),%xmm12
967 pand %xmm1,%xmm9
968 movdqa 80(%rbp),%xmm13
969 pand %xmm2,%xmm10
970 movdqa 96(%rbp),%xmm14
971 pand %xmm3,%xmm11
972 movdqa 112(%rbp),%xmm15
973 leaq 128(%rbp),%rbp
974 pand %xmm4,%xmm12
975 pand %xmm5,%xmm13
976 pand %xmm6,%xmm14
977 pand %xmm7,%xmm15
978 por %xmm10,%xmm8
979 por %xmm11,%xmm9
980 por %xmm12,%xmm8
981 por %xmm13,%xmm9
982 por %xmm14,%xmm8
983 por %xmm15,%xmm9
984
985 por %xmm9,%xmm8
986 pshufd $0x4e,%xmm8,%xmm9
987 por %xmm9,%xmm8
988 .byte 102,76,15,126,195
989
990 mulq %rbx
991 addq %rax,%r8
992 movq 8(%rsi),%rax
993 movq %r8,(%rdi)
994 movq %rdx,%r8
995 adcq $0,%r8
996
997 mulq %rbx
998 addq %rax,%r9
999 movq 16(%rsi),%rax
1000 adcq $0,%rdx
1001 addq %r9,%r8
1002 movq %rdx,%r9
1003 adcq $0,%r9
1004
1005 mulq %rbx
1006 addq %rax,%r10
1007 movq 24(%rsi),%rax
1008 adcq $0,%rdx
1009 addq %r10,%r9
1010 movq %rdx,%r10
1011 adcq $0,%r10
1012
1013 mulq %rbx
1014 addq %rax,%r11
1015 movq 32(%rsi),%rax
1016 adcq $0,%rdx
1017 addq %r11,%r10
1018 movq %rdx,%r11
1019 adcq $0,%r11
1020
1021 mulq %rbx
1022 addq %rax,%r12
1023 movq 40(%rsi),%rax
1024 adcq $0,%rdx
1025 addq %r12,%r11
1026 movq %rdx,%r12
1027 adcq $0,%r12
1028
1029 mulq %rbx
1030 addq %rax,%r13
1031 movq 48(%rsi),%rax
1032 adcq $0,%rdx
1033 addq %r13,%r12
1034 movq %rdx,%r13
1035 adcq $0,%r13
1036
1037 mulq %rbx
1038 addq %rax,%r14
1039 movq 56(%rsi),%rax
1040 adcq $0,%rdx
1041 addq %r14,%r13
1042 movq %rdx,%r14
1043 adcq $0,%r14
1044
1045 mulq %rbx
1046 addq %rax,%r15
1047 movq (%rsi),%rax
1048 adcq $0,%rdx
1049 addq %r15,%r14
1050 movq %rdx,%r15
1051 adcq $0,%r15
1052
1053 leaq 8(%rdi),%rdi
1054
1055 decl %ecx
1056 jnz .Loop_mul_gather
1057
1058 movq %r8,(%rdi)
1059 movq %r9,8(%rdi)
1060 movq %r10,16(%rdi)
1061 movq %r11,24(%rdi)
1062 movq %r12,32(%rdi)
1063 movq %r13,40(%rdi)
1064 movq %r14,48(%rdi)
1065 movq %r15,56(%rdi)
1066
1067 movq 128+8(%rsp),%rdi
1068 movq 128+16(%rsp),%rbp
1069
1070 movq (%rsp),%r8
1071 movq 8(%rsp),%r9
1072 movq 16(%rsp),%r10
1073 movq 24(%rsp),%r11
1074 movq 32(%rsp),%r12
1075 movq 40(%rsp),%r13
1076 movq 48(%rsp),%r14
1077 movq 56(%rsp),%r15
1078
1079 call __rsaz_512_reduce
1080 jmp .Lmul_gather_tail
1081
1082 .align 32
1083 .Lmulx_gather:
1084 .byte 102,76,15,126,194
1085
1086 movq %r8,128(%rsp)
1087 movq %rdi,128+8(%rsp)
1088 movq %rcx,128+16(%rsp)
1089
1090 mulxq (%rsi),%rbx,%r8
1091 movq %rbx,(%rsp)
1092 xorl %edi,%edi
1093
1094 mulxq 8(%rsi),%rax,%r9
1095
1096 mulxq 16(%rsi),%rbx,%r10
1097 adcxq %rax,%r8
1098
1099 mulxq 24(%rsi),%rax,%r11
1100 adcxq %rbx,%r9
1101
1102 mulxq 32(%rsi),%rbx,%r12
1103 adcxq %rax,%r10
1104
1105 mulxq 40(%rsi),%rax,%r13
1106 adcxq %rbx,%r11
1107
1108 mulxq 48(%rsi),%rbx,%r14
1109 adcxq %rax,%r12
1110
1111 mulxq 56(%rsi),%rax,%r15
1112 adcxq %rbx,%r13
1113 adcxq %rax,%r14
1114 .byte 0x67
1115 movq %r8,%rbx
1116 adcxq %rdi,%r15
1117
1118 movq $-7,%rcx
1119 jmp .Loop_mulx_gather
1120
1121 .align 32
1122 .Loop_mulx_gather:
1123 movdqa 0(%rbp),%xmm8
1124 movdqa 16(%rbp),%xmm9
1125 movdqa 32(%rbp),%xmm10
1126 movdqa 48(%rbp),%xmm11
1127 pand %xmm0,%xmm8
1128 movdqa 64(%rbp),%xmm12
1129 pand %xmm1,%xmm9
1130 movdqa 80(%rbp),%xmm13
1131 pand %xmm2,%xmm10
1132 movdqa 96(%rbp),%xmm14
1133 pand %xmm3,%xmm11
1134 movdqa 112(%rbp),%xmm15
1135 leaq 128(%rbp),%rbp
1136 pand %xmm4,%xmm12
1137 pand %xmm5,%xmm13
1138 pand %xmm6,%xmm14
1139 pand %xmm7,%xmm15
1140 por %xmm10,%xmm8
1141 por %xmm11,%xmm9
1142 por %xmm12,%xmm8
1143 por %xmm13,%xmm9
1144 por %xmm14,%xmm8
1145 por %xmm15,%xmm9
1146
1147 por %xmm9,%xmm8
1148 pshufd $0x4e,%xmm8,%xmm9
1149 por %xmm9,%xmm8
1150 .byte 102,76,15,126,194
1151
1152 .byte 0xc4,0x62,0xfb,0xf6,0x86,0x00,0x00,0x00,0x00
1153 adcxq %rax,%rbx
1154 adoxq %r9,%r8
1155
1156 mulxq 8(%rsi),%rax,%r9
1157 adcxq %rax,%r8
1158 adoxq %r10,%r9
1159
1160 mulxq 16(%rsi),%rax,%r10
1161 adcxq %rax,%r9
1162 adoxq %r11,%r10
1163
1164 .byte 0xc4,0x62,0xfb,0xf6,0x9e,0x18,0x00,0x00,0x00
1165 adcxq %rax,%r10
1166 adoxq %r12,%r11
1167
1168 mulxq 32(%rsi),%rax,%r12
1169 adcxq %rax,%r11
1170 adoxq %r13,%r12
1171
1172 mulxq 40(%rsi),%rax,%r13
1173 adcxq %rax,%r12
1174 adoxq %r14,%r13
1175
1176 .byte 0xc4,0x62,0xfb,0xf6,0xb6,0x30,0x00,0x00,0x00
1177 adcxq %rax,%r13
1178 .byte 0x67
1179 adoxq %r15,%r14
1180
1181 mulxq 56(%rsi),%rax,%r15
1182 movq %rbx,64(%rsp,%rcx,8)
1183 adcxq %rax,%r14
1184 adoxq %rdi,%r15
1185 movq %r8,%rbx
1186 adcxq %rdi,%r15
1187
1188 incq %rcx
1189 jnz .Loop_mulx_gather
1190
1191 movq %r8,64(%rsp)
1192 movq %r9,64+8(%rsp)
1193 movq %r10,64+16(%rsp)
1194 movq %r11,64+24(%rsp)
1195 movq %r12,64+32(%rsp)
1196 movq %r13,64+40(%rsp)
1197 movq %r14,64+48(%rsp)
1198 movq %r15,64+56(%rsp)
1199
1200 movq 128(%rsp),%rdx
1201 movq 128+8(%rsp),%rdi
1202 movq 128+16(%rsp),%rbp
1203
1204 movq (%rsp),%r8
1205 movq 8(%rsp),%r9
1206 movq 16(%rsp),%r10
1207 movq 24(%rsp),%r11
1208 movq 32(%rsp),%r12
1209 movq 40(%rsp),%r13
1210 movq 48(%rsp),%r14
1211 movq 56(%rsp),%r15
1212
1213 call __rsaz_512_reducex
1214
1215 .Lmul_gather_tail:
1216 addq 64(%rsp),%r8
1217 adcq 72(%rsp),%r9
1218 adcq 80(%rsp),%r10
1219 adcq 88(%rsp),%r11
1220 adcq 96(%rsp),%r12
1221 adcq 104(%rsp),%r13
1222 adcq 112(%rsp),%r14
1223 adcq 120(%rsp),%r15
1224 sbbq %rcx,%rcx
1225
1226 call __rsaz_512_subtract
1227
1228 leaq 128+24+48(%rsp),%rax
1229 .cfi_def_cfa %rax,8
1230 movq -48(%rax),%r15
1231 .cfi_restore %r15
1232 movq -40(%rax),%r14
1233 .cfi_restore %r14
1234 movq -32(%rax),%r13
1235 .cfi_restore %r13
1236 movq -24(%rax),%r12
1237 .cfi_restore %r12
1238 movq -16(%rax),%rbp
1239 .cfi_restore %rbp
1240 movq -8(%rax),%rbx
1241 .cfi_restore %rbx
1242 leaq (%rax),%rsp
1243 .cfi_def_cfa_register %rsp
1244 .Lmul_gather4_epilogue:
1245 .byte 0xf3,0xc3
1246 .cfi_endproc
1247 .size rsaz_512_mul_gather4,.-rsaz_512_mul_gather4
1248 .globl rsaz_512_mul_scatter4
1249 .type rsaz_512_mul_scatter4,@function
1250 .align 32
1251 rsaz_512_mul_scatter4:
1252 .cfi_startproc
1253 pushq %rbx
1254 .cfi_adjust_cfa_offset 8
1255 .cfi_offset %rbx,-16
1256 pushq %rbp
1257 .cfi_adjust_cfa_offset 8
1258 .cfi_offset %rbp,-24
1259 pushq %r12
1260 .cfi_adjust_cfa_offset 8
1261 .cfi_offset %r12,-32
1262 pushq %r13
1263 .cfi_adjust_cfa_offset 8
1264 .cfi_offset %r13,-40
1265 pushq %r14
1266 .cfi_adjust_cfa_offset 8
1267 .cfi_offset %r14,-48
1268 pushq %r15
1269 .cfi_adjust_cfa_offset 8
1270 .cfi_offset %r15,-56
1271
1272 movl %r9d,%r9d
1273 subq $128+24,%rsp
1274 .cfi_adjust_cfa_offset 128+24
1275 .Lmul_scatter4_body:
1276 leaq (%r8,%r9,8),%r8
1277 .byte 102,72,15,110,199
1278 .byte 102,72,15,110,202
1279 .byte 102,73,15,110,208
1280 movq %rcx,128(%rsp)
1281
1282 movq %rdi,%rbp
1283 movl $0x80100,%r11d
1284 andl OPENSSL_ia32cap_P+8(%rip),%r11d
1285 cmpl $0x80100,%r11d
1286 je .Lmulx_scatter
1287 movq (%rdi),%rbx
1288 call __rsaz_512_mul
1289
1290 .byte 102,72,15,126,199
1291 .byte 102,72,15,126,205
1292
1293 movq (%rsp),%r8
1294 movq 8(%rsp),%r9
1295 movq 16(%rsp),%r10
1296 movq 24(%rsp),%r11
1297 movq 32(%rsp),%r12
1298 movq 40(%rsp),%r13
1299 movq 48(%rsp),%r14
1300 movq 56(%rsp),%r15
1301
1302 call __rsaz_512_reduce
1303 jmp .Lmul_scatter_tail
1304
1305 .align 32
1306 .Lmulx_scatter:
1307 movq (%rdi),%rdx
1308 call __rsaz_512_mulx
1309
1310 .byte 102,72,15,126,199
1311 .byte 102,72,15,126,205
1312
1313 movq 128(%rsp),%rdx
1314 movq (%rsp),%r8
1315 movq 8(%rsp),%r9
1316 movq 16(%rsp),%r10
1317 movq 24(%rsp),%r11
1318 movq 32(%rsp),%r12
1319 movq 40(%rsp),%r13
1320 movq 48(%rsp),%r14
1321 movq 56(%rsp),%r15
1322
1323 call __rsaz_512_reducex
1324
1325 .Lmul_scatter_tail:
1326 addq 64(%rsp),%r8
1327 adcq 72(%rsp),%r9
1328 adcq 80(%rsp),%r10
1329 adcq 88(%rsp),%r11
1330 adcq 96(%rsp),%r12
1331 adcq 104(%rsp),%r13
1332 adcq 112(%rsp),%r14
1333 adcq 120(%rsp),%r15
1334 .byte 102,72,15,126,214
1335 sbbq %rcx,%rcx
1336
1337 call __rsaz_512_subtract
1338
1339 movq %r8,0(%rsi)
1340 movq %r9,128(%rsi)
1341 movq %r10,256(%rsi)
1342 movq %r11,384(%rsi)
1343 movq %r12,512(%rsi)
1344 movq %r13,640(%rsi)
1345 movq %r14,768(%rsi)
1346 movq %r15,896(%rsi)
1347
1348 leaq 128+24+48(%rsp),%rax
1349 .cfi_def_cfa %rax,8
1350 movq -48(%rax),%r15
1351 .cfi_restore %r15
1352 movq -40(%rax),%r14
1353 .cfi_restore %r14
1354 movq -32(%rax),%r13
1355 .cfi_restore %r13
1356 movq -24(%rax),%r12
1357 .cfi_restore %r12
1358 movq -16(%rax),%rbp
1359 .cfi_restore %rbp
1360 movq -8(%rax),%rbx
1361 .cfi_restore %rbx
1362 leaq (%rax),%rsp
1363 .cfi_def_cfa_register %rsp
1364 .Lmul_scatter4_epilogue:
1365 .byte 0xf3,0xc3
1366 .cfi_endproc
1367 .size rsaz_512_mul_scatter4,.-rsaz_512_mul_scatter4
1368 .globl rsaz_512_mul_by_one
1369 .type rsaz_512_mul_by_one,@function
1370 .align 32
1371 rsaz_512_mul_by_one:
1372 .cfi_startproc
1373 pushq %rbx
1374 .cfi_adjust_cfa_offset 8
1375 .cfi_offset %rbx,-16
1376 pushq %rbp
1377 .cfi_adjust_cfa_offset 8
1378 .cfi_offset %rbp,-24
1379 pushq %r12
1380 .cfi_adjust_cfa_offset 8
1381 .cfi_offset %r12,-32
1382 pushq %r13
1383 .cfi_adjust_cfa_offset 8
1384 .cfi_offset %r13,-40
1385 pushq %r14
1386 .cfi_adjust_cfa_offset 8
1387 .cfi_offset %r14,-48
1388 pushq %r15
1389 .cfi_adjust_cfa_offset 8
1390 .cfi_offset %r15,-56
1391
1392 subq $128+24,%rsp
1393 .cfi_adjust_cfa_offset 128+24
1394 .Lmul_by_one_body:
1395 movl OPENSSL_ia32cap_P+8(%rip),%eax
1396 movq %rdx,%rbp
1397 movq %rcx,128(%rsp)
1398
1399 movq (%rsi),%r8
1400 pxor %xmm0,%xmm0
1401 movq 8(%rsi),%r9
1402 movq 16(%rsi),%r10
1403 movq 24(%rsi),%r11
1404 movq 32(%rsi),%r12
1405 movq 40(%rsi),%r13
1406 movq 48(%rsi),%r14
1407 movq 56(%rsi),%r15
1408
1409 movdqa %xmm0,(%rsp)
1410 movdqa %xmm0,16(%rsp)
1411 movdqa %xmm0,32(%rsp)
1412 movdqa %xmm0,48(%rsp)
1413 movdqa %xmm0,64(%rsp)
1414 movdqa %xmm0,80(%rsp)
1415 movdqa %xmm0,96(%rsp)
1416 andl $0x80100,%eax
1417 cmpl $0x80100,%eax
1418 je .Lby_one_callx
1419 call __rsaz_512_reduce
1420 jmp .Lby_one_tail
1421 .align 32
1422 .Lby_one_callx:
1423 movq 128(%rsp),%rdx
1424 call __rsaz_512_reducex
1425 .Lby_one_tail:
1426 movq %r8,(%rdi)
1427 movq %r9,8(%rdi)
1428 movq %r10,16(%rdi)
1429 movq %r11,24(%rdi)
1430 movq %r12,32(%rdi)
1431 movq %r13,40(%rdi)
1432 movq %r14,48(%rdi)
1433 movq %r15,56(%rdi)
1434
1435 leaq 128+24+48(%rsp),%rax
1436 .cfi_def_cfa %rax,8
1437 movq -48(%rax),%r15
1438 .cfi_restore %r15
1439 movq -40(%rax),%r14
1440 .cfi_restore %r14
1441 movq -32(%rax),%r13
1442 .cfi_restore %r13
1443 movq -24(%rax),%r12
1444 .cfi_restore %r12
1445 movq -16(%rax),%rbp
1446 .cfi_restore %rbp
1447 movq -8(%rax),%rbx
1448 .cfi_restore %rbx
1449 leaq (%rax),%rsp
1450 .cfi_def_cfa_register %rsp
1451 .Lmul_by_one_epilogue:
1452 .byte 0xf3,0xc3
1453 .cfi_endproc
1454 .size rsaz_512_mul_by_one,.-rsaz_512_mul_by_one
1455 .type __rsaz_512_reduce,@function
1456 .align 32
1457 __rsaz_512_reduce:
1458 .cfi_startproc
1459 movq %r8,%rbx
1460 imulq 128+8(%rsp),%rbx
1461 movq 0(%rbp),%rax
1462 movl $8,%ecx
1463 jmp .Lreduction_loop
1464
1465 .align 32
1466 .Lreduction_loop:
1467 mulq %rbx
1468 movq 8(%rbp),%rax
1469 negq %r8
1470 movq %rdx,%r8
1471 adcq $0,%r8
1472
1473 mulq %rbx
1474 addq %rax,%r9
1475 movq 16(%rbp),%rax
1476 adcq $0,%rdx
1477 addq %r9,%r8
1478 movq %rdx,%r9
1479 adcq $0,%r9
1480
1481 mulq %rbx
1482 addq %rax,%r10
1483 movq 24(%rbp),%rax
1484 adcq $0,%rdx
1485 addq %r10,%r9
1486 movq %rdx,%r10
1487 adcq $0,%r10
1488
1489 mulq %rbx
1490 addq %rax,%r11
1491 movq 32(%rbp),%rax
1492 adcq $0,%rdx
1493 addq %r11,%r10
1494 movq 128+8(%rsp),%rsi
1495
1496
1497 adcq $0,%rdx
1498 movq %rdx,%r11
1499
1500 mulq %rbx
1501 addq %rax,%r12
1502 movq 40(%rbp),%rax
1503 adcq $0,%rdx
1504 imulq %r8,%rsi
1505 addq %r12,%r11
1506 movq %rdx,%r12
1507 adcq $0,%r12
1508
1509 mulq %rbx
1510 addq %rax,%r13
1511 movq 48(%rbp),%rax
1512 adcq $0,%rdx
1513 addq %r13,%r12
1514 movq %rdx,%r13
1515 adcq $0,%r13
1516
1517 mulq %rbx
1518 addq %rax,%r14
1519 movq 56(%rbp),%rax
1520 adcq $0,%rdx
1521 addq %r14,%r13
1522 movq %rdx,%r14
1523 adcq $0,%r14
1524
1525 mulq %rbx
1526 movq %rsi,%rbx
1527 addq %rax,%r15
1528 movq 0(%rbp),%rax
1529 adcq $0,%rdx
1530 addq %r15,%r14
1531 movq %rdx,%r15
1532 adcq $0,%r15
1533
1534 decl %ecx
1535 jne .Lreduction_loop
1536
1537 .byte 0xf3,0xc3
1538 .cfi_endproc
1539 .size __rsaz_512_reduce,.-__rsaz_512_reduce
1540 .type __rsaz_512_reducex,@function
1541 .align 32
1542 __rsaz_512_reducex:
1543 .cfi_startproc
1544
1545 imulq %r8,%rdx
1546 xorq %rsi,%rsi
1547 movl $8,%ecx
1548 jmp .Lreduction_loopx
1549
1550 .align 32
1551 .Lreduction_loopx:
1552 movq %r8,%rbx
1553 mulxq 0(%rbp),%rax,%r8
1554 adcxq %rbx,%rax
1555 adoxq %r9,%r8
1556
1557 mulxq 8(%rbp),%rax,%r9
1558 adcxq %rax,%r8
1559 adoxq %r10,%r9
1560
1561 mulxq 16(%rbp),%rbx,%r10
1562 adcxq %rbx,%r9
1563 adoxq %r11,%r10
1564
1565 mulxq 24(%rbp),%rbx,%r11
1566 adcxq %rbx,%r10
1567 adoxq %r12,%r11
1568
1569 .byte 0xc4,0x62,0xe3,0xf6,0xa5,0x20,0x00,0x00,0x00
1570 movq %rdx,%rax
1571 movq %r8,%rdx
1572 adcxq %rbx,%r11
1573 adoxq %r13,%r12
1574
1575 mulxq 128+8(%rsp),%rbx,%rdx
1576 movq %rax,%rdx
1577
1578 mulxq 40(%rbp),%rax,%r13
1579 adcxq %rax,%r12
1580 adoxq %r14,%r13
1581
1582 .byte 0xc4,0x62,0xfb,0xf6,0xb5,0x30,0x00,0x00,0x00
1583 adcxq %rax,%r13
1584 adoxq %r15,%r14
1585
1586 mulxq 56(%rbp),%rax,%r15
1587 movq %rbx,%rdx
1588 adcxq %rax,%r14
1589 adoxq %rsi,%r15
1590 adcxq %rsi,%r15
1591
1592 decl %ecx
1593 jne .Lreduction_loopx
1594
1595 .byte 0xf3,0xc3
1596 .cfi_endproc
1597 .size __rsaz_512_reducex,.-__rsaz_512_reducex
1598 .type __rsaz_512_subtract,@function
1599 .align 32
1600 __rsaz_512_subtract:
1601 .cfi_startproc
1602 movq %r8,(%rdi)
1603 movq %r9,8(%rdi)
1604 movq %r10,16(%rdi)
1605 movq %r11,24(%rdi)
1606 movq %r12,32(%rdi)
1607 movq %r13,40(%rdi)
1608 movq %r14,48(%rdi)
1609 movq %r15,56(%rdi)
1610
1611 movq 0(%rbp),%r8
1612 movq 8(%rbp),%r9
1613 negq %r8
1614 notq %r9
1615 andq %rcx,%r8
1616 movq 16(%rbp),%r10
1617 andq %rcx,%r9
1618 notq %r10
1619 movq 24(%rbp),%r11
1620 andq %rcx,%r10
1621 notq %r11
1622 movq 32(%rbp),%r12
1623 andq %rcx,%r11
1624 notq %r12
1625 movq 40(%rbp),%r13
1626 andq %rcx,%r12
1627 notq %r13
1628 movq 48(%rbp),%r14
1629 andq %rcx,%r13
1630 notq %r14
1631 movq 56(%rbp),%r15
1632 andq %rcx,%r14
1633 notq %r15
1634 andq %rcx,%r15
1635
1636 addq (%rdi),%r8
1637 adcq 8(%rdi),%r9
1638 adcq 16(%rdi),%r10
1639 adcq 24(%rdi),%r11
1640 adcq 32(%rdi),%r12
1641 adcq 40(%rdi),%r13
1642 adcq 48(%rdi),%r14
1643 adcq 56(%rdi),%r15
1644
1645 movq %r8,(%rdi)
1646 movq %r9,8(%rdi)
1647 movq %r10,16(%rdi)
1648 movq %r11,24(%rdi)
1649 movq %r12,32(%rdi)
1650 movq %r13,40(%rdi)
1651 movq %r14,48(%rdi)
1652 movq %r15,56(%rdi)
1653
1654 .byte 0xf3,0xc3
1655 .cfi_endproc
1656 .size __rsaz_512_subtract,.-__rsaz_512_subtract
1657 .type __rsaz_512_mul,@function
1658 .align 32
1659 __rsaz_512_mul:
1660 .cfi_startproc
1661 leaq 8(%rsp),%rdi
1662
1663 movq (%rsi),%rax
1664 mulq %rbx
1665 movq %rax,(%rdi)
1666 movq 8(%rsi),%rax
1667 movq %rdx,%r8
1668
1669 mulq %rbx
1670 addq %rax,%r8
1671 movq 16(%rsi),%rax
1672 movq %rdx,%r9
1673 adcq $0,%r9
1674
1675 mulq %rbx
1676 addq %rax,%r9
1677 movq 24(%rsi),%rax
1678 movq %rdx,%r10
1679 adcq $0,%r10
1680
1681 mulq %rbx
1682 addq %rax,%r10
1683 movq 32(%rsi),%rax
1684 movq %rdx,%r11
1685 adcq $0,%r11
1686
1687 mulq %rbx
1688 addq %rax,%r11
1689 movq 40(%rsi),%rax
1690 movq %rdx,%r12
1691 adcq $0,%r12
1692
1693 mulq %rbx
1694 addq %rax,%r12
1695 movq 48(%rsi),%rax
1696 movq %rdx,%r13
1697 adcq $0,%r13
1698
1699 mulq %rbx
1700 addq %rax,%r13
1701 movq 56(%rsi),%rax
1702 movq %rdx,%r14
1703 adcq $0,%r14
1704
1705 mulq %rbx
1706 addq %rax,%r14
1707 movq (%rsi),%rax
1708 movq %rdx,%r15
1709 adcq $0,%r15
1710
1711 leaq 8(%rbp),%rbp
1712 leaq 8(%rdi),%rdi
1713
1714 movl $7,%ecx
1715 jmp .Loop_mul
1716
1717 .align 32
1718 .Loop_mul:
1719 movq (%rbp),%rbx
1720 mulq %rbx
1721 addq %rax,%r8
1722 movq 8(%rsi),%rax
1723 movq %r8,(%rdi)
1724 movq %rdx,%r8
1725 adcq $0,%r8
1726
1727 mulq %rbx
1728 addq %rax,%r9
1729 movq 16(%rsi),%rax
1730 adcq $0,%rdx
1731 addq %r9,%r8
1732 movq %rdx,%r9
1733 adcq $0,%r9
1734
1735 mulq %rbx
1736 addq %rax,%r10
1737 movq 24(%rsi),%rax
1738 adcq $0,%rdx
1739 addq %r10,%r9
1740 movq %rdx,%r10
1741 adcq $0,%r10
1742
1743 mulq %rbx
1744 addq %rax,%r11
1745 movq 32(%rsi),%rax
1746 adcq $0,%rdx
1747 addq %r11,%r10
1748 movq %rdx,%r11
1749 adcq $0,%r11
1750
1751 mulq %rbx
1752 addq %rax,%r12
1753 movq 40(%rsi),%rax
1754 adcq $0,%rdx
1755 addq %r12,%r11
1756 movq %rdx,%r12
1757 adcq $0,%r12
1758
1759 mulq %rbx
1760 addq %rax,%r13
1761 movq 48(%rsi),%rax
1762 adcq $0,%rdx
1763 addq %r13,%r12
1764 movq %rdx,%r13
1765 adcq $0,%r13
1766
1767 mulq %rbx
1768 addq %rax,%r14
1769 movq 56(%rsi),%rax
1770 adcq $0,%rdx
1771 addq %r14,%r13
1772 movq %rdx,%r14
1773 leaq 8(%rbp),%rbp
1774 adcq $0,%r14
1775
1776 mulq %rbx
1777 addq %rax,%r15
1778 movq (%rsi),%rax
1779 adcq $0,%rdx
1780 addq %r15,%r14
1781 movq %rdx,%r15
1782 adcq $0,%r15
1783
1784 leaq 8(%rdi),%rdi
1785
1786 decl %ecx
1787 jnz .Loop_mul
1788
1789 movq %r8,(%rdi)
1790 movq %r9,8(%rdi)
1791 movq %r10,16(%rdi)
1792 movq %r11,24(%rdi)
1793 movq %r12,32(%rdi)
1794 movq %r13,40(%rdi)
1795 movq %r14,48(%rdi)
1796 movq %r15,56(%rdi)
1797
1798 .byte 0xf3,0xc3
1799 .cfi_endproc
1800 .size __rsaz_512_mul,.-__rsaz_512_mul
1801 .type __rsaz_512_mulx,@function
1802 .align 32
1803 __rsaz_512_mulx:
1804 .cfi_startproc
1805 mulxq (%rsi),%rbx,%r8
1806 movq $-6,%rcx
1807
1808 mulxq 8(%rsi),%rax,%r9
1809 movq %rbx,8(%rsp)
1810
1811 mulxq 16(%rsi),%rbx,%r10
1812 adcq %rax,%r8
1813
1814 mulxq 24(%rsi),%rax,%r11
1815 adcq %rbx,%r9
1816
1817 mulxq 32(%rsi),%rbx,%r12
1818 adcq %rax,%r10
1819
1820 mulxq 40(%rsi),%rax,%r13
1821 adcq %rbx,%r11
1822
1823 mulxq 48(%rsi),%rbx,%r14
1824 adcq %rax,%r12
1825
1826 mulxq 56(%rsi),%rax,%r15
1827 movq 8(%rbp),%rdx
1828 adcq %rbx,%r13
1829 adcq %rax,%r14
1830 adcq $0,%r15
1831
1832 xorq %rdi,%rdi
1833 jmp .Loop_mulx
1834
1835 .align 32
1836 .Loop_mulx:
1837 movq %r8,%rbx
1838 mulxq (%rsi),%rax,%r8
1839 adcxq %rax,%rbx
1840 adoxq %r9,%r8
1841
1842 mulxq 8(%rsi),%rax,%r9
1843 adcxq %rax,%r8
1844 adoxq %r10,%r9
1845
1846 mulxq 16(%rsi),%rax,%r10
1847 adcxq %rax,%r9
1848 adoxq %r11,%r10
1849
1850 mulxq 24(%rsi),%rax,%r11
1851 adcxq %rax,%r10
1852 adoxq %r12,%r11
1853
1854 .byte 0x3e,0xc4,0x62,0xfb,0xf6,0xa6,0x20,0x00,0x00,0x00
1855 adcxq %rax,%r11
1856 adoxq %r13,%r12
1857
1858 mulxq 40(%rsi),%rax,%r13
1859 adcxq %rax,%r12
1860 adoxq %r14,%r13
1861
1862 mulxq 48(%rsi),%rax,%r14
1863 adcxq %rax,%r13
1864 adoxq %r15,%r14
1865
1866 mulxq 56(%rsi),%rax,%r15
1867 movq 64(%rbp,%rcx,8),%rdx
1868 movq %rbx,8+64-8(%rsp,%rcx,8)
1869 adcxq %rax,%r14
1870 adoxq %rdi,%r15
1871 adcxq %rdi,%r15
1872
1873 incq %rcx
1874 jnz .Loop_mulx
1875
1876 movq %r8,%rbx
1877 mulxq (%rsi),%rax,%r8
1878 adcxq %rax,%rbx
1879 adoxq %r9,%r8
1880
1881 .byte 0xc4,0x62,0xfb,0xf6,0x8e,0x08,0x00,0x00,0x00
1882 adcxq %rax,%r8
1883 adoxq %r10,%r9
1884
1885 .byte 0xc4,0x62,0xfb,0xf6,0x96,0x10,0x00,0x00,0x00
1886 adcxq %rax,%r9
1887 adoxq %r11,%r10
1888
1889 mulxq 24(%rsi),%rax,%r11
1890 adcxq %rax,%r10
1891 adoxq %r12,%r11
1892
1893 mulxq 32(%rsi),%rax,%r12
1894 adcxq %rax,%r11
1895 adoxq %r13,%r12
1896
1897 mulxq 40(%rsi),%rax,%r13
1898 adcxq %rax,%r12
1899 adoxq %r14,%r13
1900
1901 .byte 0xc4,0x62,0xfb,0xf6,0xb6,0x30,0x00,0x00,0x00
1902 adcxq %rax,%r13
1903 adoxq %r15,%r14
1904
1905 .byte 0xc4,0x62,0xfb,0xf6,0xbe,0x38,0x00,0x00,0x00
1906 adcxq %rax,%r14
1907 adoxq %rdi,%r15
1908 adcxq %rdi,%r15
1909
1910 movq %rbx,8+64-8(%rsp)
1911 movq %r8,8+64(%rsp)
1912 movq %r9,8+64+8(%rsp)
1913 movq %r10,8+64+16(%rsp)
1914 movq %r11,8+64+24(%rsp)
1915 movq %r12,8+64+32(%rsp)
1916 movq %r13,8+64+40(%rsp)
1917 movq %r14,8+64+48(%rsp)
1918 movq %r15,8+64+56(%rsp)
1919
1920 .byte 0xf3,0xc3
1921 .cfi_endproc
1922 .size __rsaz_512_mulx,.-__rsaz_512_mulx
1923 .globl rsaz_512_scatter4
1924 .type rsaz_512_scatter4,@function
1925 .align 16
1926 rsaz_512_scatter4:
1927 .cfi_startproc
1928 leaq (%rdi,%rdx,8),%rdi
1929 movl $8,%r9d
1930 jmp .Loop_scatter
1931 .align 16
1932 .Loop_scatter:
1933 movq (%rsi),%rax
1934 leaq 8(%rsi),%rsi
1935 movq %rax,(%rdi)
1936 leaq 128(%rdi),%rdi
1937 decl %r9d
1938 jnz .Loop_scatter
1939 .byte 0xf3,0xc3
1940 .cfi_endproc
1941 .size rsaz_512_scatter4,.-rsaz_512_scatter4
1942
1943 .globl rsaz_512_gather4
1944 .type rsaz_512_gather4,@function
1945 .align 16
1946 rsaz_512_gather4:
1947 .cfi_startproc
1948 movd %edx,%xmm8
1949 movdqa .Linc+16(%rip),%xmm1
1950 movdqa .Linc(%rip),%xmm0
1951
1952 pshufd $0,%xmm8,%xmm8
1953 movdqa %xmm1,%xmm7
1954 movdqa %xmm1,%xmm2
1955 paddd %xmm0,%xmm1
1956 pcmpeqd %xmm8,%xmm0
1957 movdqa %xmm7,%xmm3
1958 paddd %xmm1,%xmm2
1959 pcmpeqd %xmm8,%xmm1
1960 movdqa %xmm7,%xmm4
1961 paddd %xmm2,%xmm3
1962 pcmpeqd %xmm8,%xmm2
1963 movdqa %xmm7,%xmm5
1964 paddd %xmm3,%xmm4
1965 pcmpeqd %xmm8,%xmm3
1966 movdqa %xmm7,%xmm6
1967 paddd %xmm4,%xmm5
1968 pcmpeqd %xmm8,%xmm4
1969 paddd %xmm5,%xmm6
1970 pcmpeqd %xmm8,%xmm5
1971 paddd %xmm6,%xmm7
1972 pcmpeqd %xmm8,%xmm6
1973 pcmpeqd %xmm8,%xmm7
1974 movl $8,%r9d
1975 jmp .Loop_gather
1976 .align 16
1977 .Loop_gather:
1978 movdqa 0(%rsi),%xmm8
1979 movdqa 16(%rsi),%xmm9
1980 movdqa 32(%rsi),%xmm10
1981 movdqa 48(%rsi),%xmm11
1982 pand %xmm0,%xmm8
1983 movdqa 64(%rsi),%xmm12
1984 pand %xmm1,%xmm9
1985 movdqa 80(%rsi),%xmm13
1986 pand %xmm2,%xmm10
1987 movdqa 96(%rsi),%xmm14
1988 pand %xmm3,%xmm11
1989 movdqa 112(%rsi),%xmm15
1990 leaq 128(%rsi),%rsi
1991 pand %xmm4,%xmm12
1992 pand %xmm5,%xmm13
1993 pand %xmm6,%xmm14
1994 pand %xmm7,%xmm15
1995 por %xmm10,%xmm8
1996 por %xmm11,%xmm9
1997 por %xmm12,%xmm8
1998 por %xmm13,%xmm9
1999 por %xmm14,%xmm8
2000 por %xmm15,%xmm9
2001
2002 por %xmm9,%xmm8
2003 pshufd $0x4e,%xmm8,%xmm9
2004 por %xmm9,%xmm8
2005 movq %xmm8,(%rdi)
2006 leaq 8(%rdi),%rdi
2007 decl %r9d
2008 jnz .Loop_gather
2009 .byte 0xf3,0xc3
2010 .LSEH_end_rsaz_512_gather4:
2011 .cfi_endproc
2012 .size rsaz_512_gather4,.-rsaz_512_gather4
2013
2014 .align 64
2015 .Linc:
2016 .long 0,0, 1,1
2017 .long 2,2, 2,2
Cache object: 1f5c89196748143f0c8781c1a8a44654
|