1 /* $FreeBSD$ */
2 /* Do not modify. This file is auto-generated from x86_64-mont5.pl. */
3 .text
4
5
6
7 .globl bn_mul_mont_gather5
8 .type bn_mul_mont_gather5,@function
9 .align 64
10 bn_mul_mont_gather5:
11 .cfi_startproc
12 movl %r9d,%r9d
13 movq %rsp,%rax
14 .cfi_def_cfa_register %rax
15 testl $7,%r9d
16 jnz .Lmul_enter
17 movl OPENSSL_ia32cap_P+8(%rip),%r11d
18 jmp .Lmul4x_enter
19
20 .align 16
21 .Lmul_enter:
22 movd 8(%rsp),%xmm5
23 pushq %rbx
24 .cfi_offset %rbx,-16
25 pushq %rbp
26 .cfi_offset %rbp,-24
27 pushq %r12
28 .cfi_offset %r12,-32
29 pushq %r13
30 .cfi_offset %r13,-40
31 pushq %r14
32 .cfi_offset %r14,-48
33 pushq %r15
34 .cfi_offset %r15,-56
35
36 negq %r9
37 movq %rsp,%r11
38 leaq -280(%rsp,%r9,8),%r10
39 negq %r9
40 andq $-1024,%r10
41
42
43
44
45
46
47
48
49
50 subq %r10,%r11
51 andq $-4096,%r11
52 leaq (%r10,%r11,1),%rsp
53 movq (%rsp),%r11
54 cmpq %r10,%rsp
55 ja .Lmul_page_walk
56 jmp .Lmul_page_walk_done
57
58 .Lmul_page_walk:
59 leaq -4096(%rsp),%rsp
60 movq (%rsp),%r11
61 cmpq %r10,%rsp
62 ja .Lmul_page_walk
63 .Lmul_page_walk_done:
64
65 leaq .Linc(%rip),%r10
66 movq %rax,8(%rsp,%r9,8)
67 .cfi_escape 0x0f,0x0a,0x77,0x08,0x79,0x00,0x38,0x1e,0x22,0x06,0x23,0x08
68 .Lmul_body:
69
70 leaq 128(%rdx),%r12
71 movdqa 0(%r10),%xmm0
72 movdqa 16(%r10),%xmm1
73 leaq 24-112(%rsp,%r9,8),%r10
74 andq $-16,%r10
75
76 pshufd $0,%xmm5,%xmm5
77 movdqa %xmm1,%xmm4
78 movdqa %xmm1,%xmm2
79 paddd %xmm0,%xmm1
80 pcmpeqd %xmm5,%xmm0
81 .byte 0x67
82 movdqa %xmm4,%xmm3
83 paddd %xmm1,%xmm2
84 pcmpeqd %xmm5,%xmm1
85 movdqa %xmm0,112(%r10)
86 movdqa %xmm4,%xmm0
87
88 paddd %xmm2,%xmm3
89 pcmpeqd %xmm5,%xmm2
90 movdqa %xmm1,128(%r10)
91 movdqa %xmm4,%xmm1
92
93 paddd %xmm3,%xmm0
94 pcmpeqd %xmm5,%xmm3
95 movdqa %xmm2,144(%r10)
96 movdqa %xmm4,%xmm2
97
98 paddd %xmm0,%xmm1
99 pcmpeqd %xmm5,%xmm0
100 movdqa %xmm3,160(%r10)
101 movdqa %xmm4,%xmm3
102 paddd %xmm1,%xmm2
103 pcmpeqd %xmm5,%xmm1
104 movdqa %xmm0,176(%r10)
105 movdqa %xmm4,%xmm0
106
107 paddd %xmm2,%xmm3
108 pcmpeqd %xmm5,%xmm2
109 movdqa %xmm1,192(%r10)
110 movdqa %xmm4,%xmm1
111
112 paddd %xmm3,%xmm0
113 pcmpeqd %xmm5,%xmm3
114 movdqa %xmm2,208(%r10)
115 movdqa %xmm4,%xmm2
116
117 paddd %xmm0,%xmm1
118 pcmpeqd %xmm5,%xmm0
119 movdqa %xmm3,224(%r10)
120 movdqa %xmm4,%xmm3
121 paddd %xmm1,%xmm2
122 pcmpeqd %xmm5,%xmm1
123 movdqa %xmm0,240(%r10)
124 movdqa %xmm4,%xmm0
125
126 paddd %xmm2,%xmm3
127 pcmpeqd %xmm5,%xmm2
128 movdqa %xmm1,256(%r10)
129 movdqa %xmm4,%xmm1
130
131 paddd %xmm3,%xmm0
132 pcmpeqd %xmm5,%xmm3
133 movdqa %xmm2,272(%r10)
134 movdqa %xmm4,%xmm2
135
136 paddd %xmm0,%xmm1
137 pcmpeqd %xmm5,%xmm0
138 movdqa %xmm3,288(%r10)
139 movdqa %xmm4,%xmm3
140 paddd %xmm1,%xmm2
141 pcmpeqd %xmm5,%xmm1
142 movdqa %xmm0,304(%r10)
143
144 paddd %xmm2,%xmm3
145 .byte 0x67
146 pcmpeqd %xmm5,%xmm2
147 movdqa %xmm1,320(%r10)
148
149 pcmpeqd %xmm5,%xmm3
150 movdqa %xmm2,336(%r10)
151 pand 64(%r12),%xmm0
152
153 pand 80(%r12),%xmm1
154 pand 96(%r12),%xmm2
155 movdqa %xmm3,352(%r10)
156 pand 112(%r12),%xmm3
157 por %xmm2,%xmm0
158 por %xmm3,%xmm1
159 movdqa -128(%r12),%xmm4
160 movdqa -112(%r12),%xmm5
161 movdqa -96(%r12),%xmm2
162 pand 112(%r10),%xmm4
163 movdqa -80(%r12),%xmm3
164 pand 128(%r10),%xmm5
165 por %xmm4,%xmm0
166 pand 144(%r10),%xmm2
167 por %xmm5,%xmm1
168 pand 160(%r10),%xmm3
169 por %xmm2,%xmm0
170 por %xmm3,%xmm1
171 movdqa -64(%r12),%xmm4
172 movdqa -48(%r12),%xmm5
173 movdqa -32(%r12),%xmm2
174 pand 176(%r10),%xmm4
175 movdqa -16(%r12),%xmm3
176 pand 192(%r10),%xmm5
177 por %xmm4,%xmm0
178 pand 208(%r10),%xmm2
179 por %xmm5,%xmm1
180 pand 224(%r10),%xmm3
181 por %xmm2,%xmm0
182 por %xmm3,%xmm1
183 movdqa 0(%r12),%xmm4
184 movdqa 16(%r12),%xmm5
185 movdqa 32(%r12),%xmm2
186 pand 240(%r10),%xmm4
187 movdqa 48(%r12),%xmm3
188 pand 256(%r10),%xmm5
189 por %xmm4,%xmm0
190 pand 272(%r10),%xmm2
191 por %xmm5,%xmm1
192 pand 288(%r10),%xmm3
193 por %xmm2,%xmm0
194 por %xmm3,%xmm1
195 por %xmm1,%xmm0
196 pshufd $0x4e,%xmm0,%xmm1
197 por %xmm1,%xmm0
198 leaq 256(%r12),%r12
199 .byte 102,72,15,126,195
200
201 movq (%r8),%r8
202 movq (%rsi),%rax
203
204 xorq %r14,%r14
205 xorq %r15,%r15
206
207 movq %r8,%rbp
208 mulq %rbx
209 movq %rax,%r10
210 movq (%rcx),%rax
211
212 imulq %r10,%rbp
213 movq %rdx,%r11
214
215 mulq %rbp
216 addq %rax,%r10
217 movq 8(%rsi),%rax
218 adcq $0,%rdx
219 movq %rdx,%r13
220
221 leaq 1(%r15),%r15
222 jmp .L1st_enter
223
224 .align 16
225 .L1st:
226 addq %rax,%r13
227 movq (%rsi,%r15,8),%rax
228 adcq $0,%rdx
229 addq %r11,%r13
230 movq %r10,%r11
231 adcq $0,%rdx
232 movq %r13,-16(%rsp,%r15,8)
233 movq %rdx,%r13
234
235 .L1st_enter:
236 mulq %rbx
237 addq %rax,%r11
238 movq (%rcx,%r15,8),%rax
239 adcq $0,%rdx
240 leaq 1(%r15),%r15
241 movq %rdx,%r10
242
243 mulq %rbp
244 cmpq %r9,%r15
245 jne .L1st
246
247
248 addq %rax,%r13
249 adcq $0,%rdx
250 addq %r11,%r13
251 adcq $0,%rdx
252 movq %r13,-16(%rsp,%r9,8)
253 movq %rdx,%r13
254 movq %r10,%r11
255
256 xorq %rdx,%rdx
257 addq %r11,%r13
258 adcq $0,%rdx
259 movq %r13,-8(%rsp,%r9,8)
260 movq %rdx,(%rsp,%r9,8)
261
262 leaq 1(%r14),%r14
263 jmp .Louter
264 .align 16
265 .Louter:
266 leaq 24+128(%rsp,%r9,8),%rdx
267 andq $-16,%rdx
268 pxor %xmm4,%xmm4
269 pxor %xmm5,%xmm5
270 movdqa -128(%r12),%xmm0
271 movdqa -112(%r12),%xmm1
272 movdqa -96(%r12),%xmm2
273 movdqa -80(%r12),%xmm3
274 pand -128(%rdx),%xmm0
275 pand -112(%rdx),%xmm1
276 por %xmm0,%xmm4
277 pand -96(%rdx),%xmm2
278 por %xmm1,%xmm5
279 pand -80(%rdx),%xmm3
280 por %xmm2,%xmm4
281 por %xmm3,%xmm5
282 movdqa -64(%r12),%xmm0
283 movdqa -48(%r12),%xmm1
284 movdqa -32(%r12),%xmm2
285 movdqa -16(%r12),%xmm3
286 pand -64(%rdx),%xmm0
287 pand -48(%rdx),%xmm1
288 por %xmm0,%xmm4
289 pand -32(%rdx),%xmm2
290 por %xmm1,%xmm5
291 pand -16(%rdx),%xmm3
292 por %xmm2,%xmm4
293 por %xmm3,%xmm5
294 movdqa 0(%r12),%xmm0
295 movdqa 16(%r12),%xmm1
296 movdqa 32(%r12),%xmm2
297 movdqa 48(%r12),%xmm3
298 pand 0(%rdx),%xmm0
299 pand 16(%rdx),%xmm1
300 por %xmm0,%xmm4
301 pand 32(%rdx),%xmm2
302 por %xmm1,%xmm5
303 pand 48(%rdx),%xmm3
304 por %xmm2,%xmm4
305 por %xmm3,%xmm5
306 movdqa 64(%r12),%xmm0
307 movdqa 80(%r12),%xmm1
308 movdqa 96(%r12),%xmm2
309 movdqa 112(%r12),%xmm3
310 pand 64(%rdx),%xmm0
311 pand 80(%rdx),%xmm1
312 por %xmm0,%xmm4
313 pand 96(%rdx),%xmm2
314 por %xmm1,%xmm5
315 pand 112(%rdx),%xmm3
316 por %xmm2,%xmm4
317 por %xmm3,%xmm5
318 por %xmm5,%xmm4
319 pshufd $0x4e,%xmm4,%xmm0
320 por %xmm4,%xmm0
321 leaq 256(%r12),%r12
322
323 movq (%rsi),%rax
324 .byte 102,72,15,126,195
325
326 xorq %r15,%r15
327 movq %r8,%rbp
328 movq (%rsp),%r10
329
330 mulq %rbx
331 addq %rax,%r10
332 movq (%rcx),%rax
333 adcq $0,%rdx
334
335 imulq %r10,%rbp
336 movq %rdx,%r11
337
338 mulq %rbp
339 addq %rax,%r10
340 movq 8(%rsi),%rax
341 adcq $0,%rdx
342 movq 8(%rsp),%r10
343 movq %rdx,%r13
344
345 leaq 1(%r15),%r15
346 jmp .Linner_enter
347
348 .align 16
349 .Linner:
350 addq %rax,%r13
351 movq (%rsi,%r15,8),%rax
352 adcq $0,%rdx
353 addq %r10,%r13
354 movq (%rsp,%r15,8),%r10
355 adcq $0,%rdx
356 movq %r13,-16(%rsp,%r15,8)
357 movq %rdx,%r13
358
359 .Linner_enter:
360 mulq %rbx
361 addq %rax,%r11
362 movq (%rcx,%r15,8),%rax
363 adcq $0,%rdx
364 addq %r11,%r10
365 movq %rdx,%r11
366 adcq $0,%r11
367 leaq 1(%r15),%r15
368
369 mulq %rbp
370 cmpq %r9,%r15
371 jne .Linner
372
373 addq %rax,%r13
374 adcq $0,%rdx
375 addq %r10,%r13
376 movq (%rsp,%r9,8),%r10
377 adcq $0,%rdx
378 movq %r13,-16(%rsp,%r9,8)
379 movq %rdx,%r13
380
381 xorq %rdx,%rdx
382 addq %r11,%r13
383 adcq $0,%rdx
384 addq %r10,%r13
385 adcq $0,%rdx
386 movq %r13,-8(%rsp,%r9,8)
387 movq %rdx,(%rsp,%r9,8)
388
389 leaq 1(%r14),%r14
390 cmpq %r9,%r14
391 jb .Louter
392
393 xorq %r14,%r14
394 movq (%rsp),%rax
395 leaq (%rsp),%rsi
396 movq %r9,%r15
397 jmp .Lsub
398 .align 16
399 .Lsub: sbbq (%rcx,%r14,8),%rax
400 movq %rax,(%rdi,%r14,8)
401 movq 8(%rsi,%r14,8),%rax
402 leaq 1(%r14),%r14
403 decq %r15
404 jnz .Lsub
405
406 sbbq $0,%rax
407 movq $-1,%rbx
408 xorq %rax,%rbx
409 xorq %r14,%r14
410 movq %r9,%r15
411
412 .Lcopy:
413 movq (%rdi,%r14,8),%rcx
414 movq (%rsp,%r14,8),%rdx
415 andq %rbx,%rcx
416 andq %rax,%rdx
417 movq %r14,(%rsp,%r14,8)
418 orq %rcx,%rdx
419 movq %rdx,(%rdi,%r14,8)
420 leaq 1(%r14),%r14
421 subq $1,%r15
422 jnz .Lcopy
423
424 movq 8(%rsp,%r9,8),%rsi
425 .cfi_def_cfa %rsi,8
426 movq $1,%rax
427
428 movq -48(%rsi),%r15
429 .cfi_restore %r15
430 movq -40(%rsi),%r14
431 .cfi_restore %r14
432 movq -32(%rsi),%r13
433 .cfi_restore %r13
434 movq -24(%rsi),%r12
435 .cfi_restore %r12
436 movq -16(%rsi),%rbp
437 .cfi_restore %rbp
438 movq -8(%rsi),%rbx
439 .cfi_restore %rbx
440 leaq (%rsi),%rsp
441 .cfi_def_cfa_register %rsp
442 .Lmul_epilogue:
443 .byte 0xf3,0xc3
444 .cfi_endproc
445 .size bn_mul_mont_gather5,.-bn_mul_mont_gather5
446 .type bn_mul4x_mont_gather5,@function
447 .align 32
448 bn_mul4x_mont_gather5:
449 .cfi_startproc
450 .byte 0x67
451 movq %rsp,%rax
452 .cfi_def_cfa_register %rax
453 .Lmul4x_enter:
454 andl $0x80108,%r11d
455 cmpl $0x80108,%r11d
456 je .Lmulx4x_enter
457 pushq %rbx
458 .cfi_offset %rbx,-16
459 pushq %rbp
460 .cfi_offset %rbp,-24
461 pushq %r12
462 .cfi_offset %r12,-32
463 pushq %r13
464 .cfi_offset %r13,-40
465 pushq %r14
466 .cfi_offset %r14,-48
467 pushq %r15
468 .cfi_offset %r15,-56
469 .Lmul4x_prologue:
470
471 .byte 0x67
472 shll $3,%r9d
473 leaq (%r9,%r9,2),%r10
474 negq %r9
475
476
477
478
479
480
481
482
483
484
485 leaq -320(%rsp,%r9,2),%r11
486 movq %rsp,%rbp
487 subq %rdi,%r11
488 andq $4095,%r11
489 cmpq %r11,%r10
490 jb .Lmul4xsp_alt
491 subq %r11,%rbp
492 leaq -320(%rbp,%r9,2),%rbp
493 jmp .Lmul4xsp_done
494
495 .align 32
496 .Lmul4xsp_alt:
497 leaq 4096-320(,%r9,2),%r10
498 leaq -320(%rbp,%r9,2),%rbp
499 subq %r10,%r11
500 movq $0,%r10
501 cmovcq %r10,%r11
502 subq %r11,%rbp
503 .Lmul4xsp_done:
504 andq $-64,%rbp
505 movq %rsp,%r11
506 subq %rbp,%r11
507 andq $-4096,%r11
508 leaq (%r11,%rbp,1),%rsp
509 movq (%rsp),%r10
510 cmpq %rbp,%rsp
511 ja .Lmul4x_page_walk
512 jmp .Lmul4x_page_walk_done
513
514 .Lmul4x_page_walk:
515 leaq -4096(%rsp),%rsp
516 movq (%rsp),%r10
517 cmpq %rbp,%rsp
518 ja .Lmul4x_page_walk
519 .Lmul4x_page_walk_done:
520
521 negq %r9
522
523 movq %rax,40(%rsp)
524 .cfi_escape 0x0f,0x05,0x77,0x28,0x06,0x23,0x08
525 .Lmul4x_body:
526
527 call mul4x_internal
528
529 movq 40(%rsp),%rsi
530 .cfi_def_cfa %rsi,8
531 movq $1,%rax
532
533 movq -48(%rsi),%r15
534 .cfi_restore %r15
535 movq -40(%rsi),%r14
536 .cfi_restore %r14
537 movq -32(%rsi),%r13
538 .cfi_restore %r13
539 movq -24(%rsi),%r12
540 .cfi_restore %r12
541 movq -16(%rsi),%rbp
542 .cfi_restore %rbp
543 movq -8(%rsi),%rbx
544 .cfi_restore %rbx
545 leaq (%rsi),%rsp
546 .cfi_def_cfa_register %rsp
547 .Lmul4x_epilogue:
548 .byte 0xf3,0xc3
549 .cfi_endproc
550 .size bn_mul4x_mont_gather5,.-bn_mul4x_mont_gather5
551
552 .type mul4x_internal,@function
553 .align 32
554 mul4x_internal:
555 .cfi_startproc
556 shlq $5,%r9
557 movd 8(%rax),%xmm5
558 leaq .Linc(%rip),%rax
559 leaq 128(%rdx,%r9,1),%r13
560 shrq $5,%r9
561 movdqa 0(%rax),%xmm0
562 movdqa 16(%rax),%xmm1
563 leaq 88-112(%rsp,%r9,1),%r10
564 leaq 128(%rdx),%r12
565
566 pshufd $0,%xmm5,%xmm5
567 movdqa %xmm1,%xmm4
568 .byte 0x67,0x67
569 movdqa %xmm1,%xmm2
570 paddd %xmm0,%xmm1
571 pcmpeqd %xmm5,%xmm0
572 .byte 0x67
573 movdqa %xmm4,%xmm3
574 paddd %xmm1,%xmm2
575 pcmpeqd %xmm5,%xmm1
576 movdqa %xmm0,112(%r10)
577 movdqa %xmm4,%xmm0
578
579 paddd %xmm2,%xmm3
580 pcmpeqd %xmm5,%xmm2
581 movdqa %xmm1,128(%r10)
582 movdqa %xmm4,%xmm1
583
584 paddd %xmm3,%xmm0
585 pcmpeqd %xmm5,%xmm3
586 movdqa %xmm2,144(%r10)
587 movdqa %xmm4,%xmm2
588
589 paddd %xmm0,%xmm1
590 pcmpeqd %xmm5,%xmm0
591 movdqa %xmm3,160(%r10)
592 movdqa %xmm4,%xmm3
593 paddd %xmm1,%xmm2
594 pcmpeqd %xmm5,%xmm1
595 movdqa %xmm0,176(%r10)
596 movdqa %xmm4,%xmm0
597
598 paddd %xmm2,%xmm3
599 pcmpeqd %xmm5,%xmm2
600 movdqa %xmm1,192(%r10)
601 movdqa %xmm4,%xmm1
602
603 paddd %xmm3,%xmm0
604 pcmpeqd %xmm5,%xmm3
605 movdqa %xmm2,208(%r10)
606 movdqa %xmm4,%xmm2
607
608 paddd %xmm0,%xmm1
609 pcmpeqd %xmm5,%xmm0
610 movdqa %xmm3,224(%r10)
611 movdqa %xmm4,%xmm3
612 paddd %xmm1,%xmm2
613 pcmpeqd %xmm5,%xmm1
614 movdqa %xmm0,240(%r10)
615 movdqa %xmm4,%xmm0
616
617 paddd %xmm2,%xmm3
618 pcmpeqd %xmm5,%xmm2
619 movdqa %xmm1,256(%r10)
620 movdqa %xmm4,%xmm1
621
622 paddd %xmm3,%xmm0
623 pcmpeqd %xmm5,%xmm3
624 movdqa %xmm2,272(%r10)
625 movdqa %xmm4,%xmm2
626
627 paddd %xmm0,%xmm1
628 pcmpeqd %xmm5,%xmm0
629 movdqa %xmm3,288(%r10)
630 movdqa %xmm4,%xmm3
631 paddd %xmm1,%xmm2
632 pcmpeqd %xmm5,%xmm1
633 movdqa %xmm0,304(%r10)
634
635 paddd %xmm2,%xmm3
636 .byte 0x67
637 pcmpeqd %xmm5,%xmm2
638 movdqa %xmm1,320(%r10)
639
640 pcmpeqd %xmm5,%xmm3
641 movdqa %xmm2,336(%r10)
642 pand 64(%r12),%xmm0
643
644 pand 80(%r12),%xmm1
645 pand 96(%r12),%xmm2
646 movdqa %xmm3,352(%r10)
647 pand 112(%r12),%xmm3
648 por %xmm2,%xmm0
649 por %xmm3,%xmm1
650 movdqa -128(%r12),%xmm4
651 movdqa -112(%r12),%xmm5
652 movdqa -96(%r12),%xmm2
653 pand 112(%r10),%xmm4
654 movdqa -80(%r12),%xmm3
655 pand 128(%r10),%xmm5
656 por %xmm4,%xmm0
657 pand 144(%r10),%xmm2
658 por %xmm5,%xmm1
659 pand 160(%r10),%xmm3
660 por %xmm2,%xmm0
661 por %xmm3,%xmm1
662 movdqa -64(%r12),%xmm4
663 movdqa -48(%r12),%xmm5
664 movdqa -32(%r12),%xmm2
665 pand 176(%r10),%xmm4
666 movdqa -16(%r12),%xmm3
667 pand 192(%r10),%xmm5
668 por %xmm4,%xmm0
669 pand 208(%r10),%xmm2
670 por %xmm5,%xmm1
671 pand 224(%r10),%xmm3
672 por %xmm2,%xmm0
673 por %xmm3,%xmm1
674 movdqa 0(%r12),%xmm4
675 movdqa 16(%r12),%xmm5
676 movdqa 32(%r12),%xmm2
677 pand 240(%r10),%xmm4
678 movdqa 48(%r12),%xmm3
679 pand 256(%r10),%xmm5
680 por %xmm4,%xmm0
681 pand 272(%r10),%xmm2
682 por %xmm5,%xmm1
683 pand 288(%r10),%xmm3
684 por %xmm2,%xmm0
685 por %xmm3,%xmm1
686 por %xmm1,%xmm0
687 pshufd $0x4e,%xmm0,%xmm1
688 por %xmm1,%xmm0
689 leaq 256(%r12),%r12
690 .byte 102,72,15,126,195
691
692 movq %r13,16+8(%rsp)
693 movq %rdi,56+8(%rsp)
694
695 movq (%r8),%r8
696 movq (%rsi),%rax
697 leaq (%rsi,%r9,1),%rsi
698 negq %r9
699
700 movq %r8,%rbp
701 mulq %rbx
702 movq %rax,%r10
703 movq (%rcx),%rax
704
705 imulq %r10,%rbp
706 leaq 64+8(%rsp),%r14
707 movq %rdx,%r11
708
709 mulq %rbp
710 addq %rax,%r10
711 movq 8(%rsi,%r9,1),%rax
712 adcq $0,%rdx
713 movq %rdx,%rdi
714
715 mulq %rbx
716 addq %rax,%r11
717 movq 8(%rcx),%rax
718 adcq $0,%rdx
719 movq %rdx,%r10
720
721 mulq %rbp
722 addq %rax,%rdi
723 movq 16(%rsi,%r9,1),%rax
724 adcq $0,%rdx
725 addq %r11,%rdi
726 leaq 32(%r9),%r15
727 leaq 32(%rcx),%rcx
728 adcq $0,%rdx
729 movq %rdi,(%r14)
730 movq %rdx,%r13
731 jmp .L1st4x
732
733 .align 32
734 .L1st4x:
735 mulq %rbx
736 addq %rax,%r10
737 movq -16(%rcx),%rax
738 leaq 32(%r14),%r14
739 adcq $0,%rdx
740 movq %rdx,%r11
741
742 mulq %rbp
743 addq %rax,%r13
744 movq -8(%rsi,%r15,1),%rax
745 adcq $0,%rdx
746 addq %r10,%r13
747 adcq $0,%rdx
748 movq %r13,-24(%r14)
749 movq %rdx,%rdi
750
751 mulq %rbx
752 addq %rax,%r11
753 movq -8(%rcx),%rax
754 adcq $0,%rdx
755 movq %rdx,%r10
756
757 mulq %rbp
758 addq %rax,%rdi
759 movq (%rsi,%r15,1),%rax
760 adcq $0,%rdx
761 addq %r11,%rdi
762 adcq $0,%rdx
763 movq %rdi,-16(%r14)
764 movq %rdx,%r13
765
766 mulq %rbx
767 addq %rax,%r10
768 movq 0(%rcx),%rax
769 adcq $0,%rdx
770 movq %rdx,%r11
771
772 mulq %rbp
773 addq %rax,%r13
774 movq 8(%rsi,%r15,1),%rax
775 adcq $0,%rdx
776 addq %r10,%r13
777 adcq $0,%rdx
778 movq %r13,-8(%r14)
779 movq %rdx,%rdi
780
781 mulq %rbx
782 addq %rax,%r11
783 movq 8(%rcx),%rax
784 adcq $0,%rdx
785 movq %rdx,%r10
786
787 mulq %rbp
788 addq %rax,%rdi
789 movq 16(%rsi,%r15,1),%rax
790 adcq $0,%rdx
791 addq %r11,%rdi
792 leaq 32(%rcx),%rcx
793 adcq $0,%rdx
794 movq %rdi,(%r14)
795 movq %rdx,%r13
796
797 addq $32,%r15
798 jnz .L1st4x
799
800 mulq %rbx
801 addq %rax,%r10
802 movq -16(%rcx),%rax
803 leaq 32(%r14),%r14
804 adcq $0,%rdx
805 movq %rdx,%r11
806
807 mulq %rbp
808 addq %rax,%r13
809 movq -8(%rsi),%rax
810 adcq $0,%rdx
811 addq %r10,%r13
812 adcq $0,%rdx
813 movq %r13,-24(%r14)
814 movq %rdx,%rdi
815
816 mulq %rbx
817 addq %rax,%r11
818 movq -8(%rcx),%rax
819 adcq $0,%rdx
820 movq %rdx,%r10
821
822 mulq %rbp
823 addq %rax,%rdi
824 movq (%rsi,%r9,1),%rax
825 adcq $0,%rdx
826 addq %r11,%rdi
827 adcq $0,%rdx
828 movq %rdi,-16(%r14)
829 movq %rdx,%r13
830
831 leaq (%rcx,%r9,1),%rcx
832
833 xorq %rdi,%rdi
834 addq %r10,%r13
835 adcq $0,%rdi
836 movq %r13,-8(%r14)
837
838 jmp .Louter4x
839
840 .align 32
841 .Louter4x:
842 leaq 16+128(%r14),%rdx
843 pxor %xmm4,%xmm4
844 pxor %xmm5,%xmm5
845 movdqa -128(%r12),%xmm0
846 movdqa -112(%r12),%xmm1
847 movdqa -96(%r12),%xmm2
848 movdqa -80(%r12),%xmm3
849 pand -128(%rdx),%xmm0
850 pand -112(%rdx),%xmm1
851 por %xmm0,%xmm4
852 pand -96(%rdx),%xmm2
853 por %xmm1,%xmm5
854 pand -80(%rdx),%xmm3
855 por %xmm2,%xmm4
856 por %xmm3,%xmm5
857 movdqa -64(%r12),%xmm0
858 movdqa -48(%r12),%xmm1
859 movdqa -32(%r12),%xmm2
860 movdqa -16(%r12),%xmm3
861 pand -64(%rdx),%xmm0
862 pand -48(%rdx),%xmm1
863 por %xmm0,%xmm4
864 pand -32(%rdx),%xmm2
865 por %xmm1,%xmm5
866 pand -16(%rdx),%xmm3
867 por %xmm2,%xmm4
868 por %xmm3,%xmm5
869 movdqa 0(%r12),%xmm0
870 movdqa 16(%r12),%xmm1
871 movdqa 32(%r12),%xmm2
872 movdqa 48(%r12),%xmm3
873 pand 0(%rdx),%xmm0
874 pand 16(%rdx),%xmm1
875 por %xmm0,%xmm4
876 pand 32(%rdx),%xmm2
877 por %xmm1,%xmm5
878 pand 48(%rdx),%xmm3
879 por %xmm2,%xmm4
880 por %xmm3,%xmm5
881 movdqa 64(%r12),%xmm0
882 movdqa 80(%r12),%xmm1
883 movdqa 96(%r12),%xmm2
884 movdqa 112(%r12),%xmm3
885 pand 64(%rdx),%xmm0
886 pand 80(%rdx),%xmm1
887 por %xmm0,%xmm4
888 pand 96(%rdx),%xmm2
889 por %xmm1,%xmm5
890 pand 112(%rdx),%xmm3
891 por %xmm2,%xmm4
892 por %xmm3,%xmm5
893 por %xmm5,%xmm4
894 pshufd $0x4e,%xmm4,%xmm0
895 por %xmm4,%xmm0
896 leaq 256(%r12),%r12
897 .byte 102,72,15,126,195
898
899 movq (%r14,%r9,1),%r10
900 movq %r8,%rbp
901 mulq %rbx
902 addq %rax,%r10
903 movq (%rcx),%rax
904 adcq $0,%rdx
905
906 imulq %r10,%rbp
907 movq %rdx,%r11
908 movq %rdi,(%r14)
909
910 leaq (%r14,%r9,1),%r14
911
912 mulq %rbp
913 addq %rax,%r10
914 movq 8(%rsi,%r9,1),%rax
915 adcq $0,%rdx
916 movq %rdx,%rdi
917
918 mulq %rbx
919 addq %rax,%r11
920 movq 8(%rcx),%rax
921 adcq $0,%rdx
922 addq 8(%r14),%r11
923 adcq $0,%rdx
924 movq %rdx,%r10
925
926 mulq %rbp
927 addq %rax,%rdi
928 movq 16(%rsi,%r9,1),%rax
929 adcq $0,%rdx
930 addq %r11,%rdi
931 leaq 32(%r9),%r15
932 leaq 32(%rcx),%rcx
933 adcq $0,%rdx
934 movq %rdx,%r13
935 jmp .Linner4x
936
937 .align 32
938 .Linner4x:
939 mulq %rbx
940 addq %rax,%r10
941 movq -16(%rcx),%rax
942 adcq $0,%rdx
943 addq 16(%r14),%r10
944 leaq 32(%r14),%r14
945 adcq $0,%rdx
946 movq %rdx,%r11
947
948 mulq %rbp
949 addq %rax,%r13
950 movq -8(%rsi,%r15,1),%rax
951 adcq $0,%rdx
952 addq %r10,%r13
953 adcq $0,%rdx
954 movq %rdi,-32(%r14)
955 movq %rdx,%rdi
956
957 mulq %rbx
958 addq %rax,%r11
959 movq -8(%rcx),%rax
960 adcq $0,%rdx
961 addq -8(%r14),%r11
962 adcq $0,%rdx
963 movq %rdx,%r10
964
965 mulq %rbp
966 addq %rax,%rdi
967 movq (%rsi,%r15,1),%rax
968 adcq $0,%rdx
969 addq %r11,%rdi
970 adcq $0,%rdx
971 movq %r13,-24(%r14)
972 movq %rdx,%r13
973
974 mulq %rbx
975 addq %rax,%r10
976 movq 0(%rcx),%rax
977 adcq $0,%rdx
978 addq (%r14),%r10
979 adcq $0,%rdx
980 movq %rdx,%r11
981
982 mulq %rbp
983 addq %rax,%r13
984 movq 8(%rsi,%r15,1),%rax
985 adcq $0,%rdx
986 addq %r10,%r13
987 adcq $0,%rdx
988 movq %rdi,-16(%r14)
989 movq %rdx,%rdi
990
991 mulq %rbx
992 addq %rax,%r11
993 movq 8(%rcx),%rax
994 adcq $0,%rdx
995 addq 8(%r14),%r11
996 adcq $0,%rdx
997 movq %rdx,%r10
998
999 mulq %rbp
1000 addq %rax,%rdi
1001 movq 16(%rsi,%r15,1),%rax
1002 adcq $0,%rdx
1003 addq %r11,%rdi
1004 leaq 32(%rcx),%rcx
1005 adcq $0,%rdx
1006 movq %r13,-8(%r14)
1007 movq %rdx,%r13
1008
1009 addq $32,%r15
1010 jnz .Linner4x
1011
1012 mulq %rbx
1013 addq %rax,%r10
1014 movq -16(%rcx),%rax
1015 adcq $0,%rdx
1016 addq 16(%r14),%r10
1017 leaq 32(%r14),%r14
1018 adcq $0,%rdx
1019 movq %rdx,%r11
1020
1021 mulq %rbp
1022 addq %rax,%r13
1023 movq -8(%rsi),%rax
1024 adcq $0,%rdx
1025 addq %r10,%r13
1026 adcq $0,%rdx
1027 movq %rdi,-32(%r14)
1028 movq %rdx,%rdi
1029
1030 mulq %rbx
1031 addq %rax,%r11
1032 movq %rbp,%rax
1033 movq -8(%rcx),%rbp
1034 adcq $0,%rdx
1035 addq -8(%r14),%r11
1036 adcq $0,%rdx
1037 movq %rdx,%r10
1038
1039 mulq %rbp
1040 addq %rax,%rdi
1041 movq (%rsi,%r9,1),%rax
1042 adcq $0,%rdx
1043 addq %r11,%rdi
1044 adcq $0,%rdx
1045 movq %r13,-24(%r14)
1046 movq %rdx,%r13
1047
1048 movq %rdi,-16(%r14)
1049 leaq (%rcx,%r9,1),%rcx
1050
1051 xorq %rdi,%rdi
1052 addq %r10,%r13
1053 adcq $0,%rdi
1054 addq (%r14),%r13
1055 adcq $0,%rdi
1056 movq %r13,-8(%r14)
1057
1058 cmpq 16+8(%rsp),%r12
1059 jb .Louter4x
1060 xorq %rax,%rax
1061 subq %r13,%rbp
1062 adcq %r15,%r15
1063 orq %r15,%rdi
1064 subq %rdi,%rax
1065 leaq (%r14,%r9,1),%rbx
1066 movq (%rcx),%r12
1067 leaq (%rcx),%rbp
1068 movq %r9,%rcx
1069 sarq $3+2,%rcx
1070 movq 56+8(%rsp),%rdi
1071 decq %r12
1072 xorq %r10,%r10
1073 movq 8(%rbp),%r13
1074 movq 16(%rbp),%r14
1075 movq 24(%rbp),%r15
1076 jmp .Lsqr4x_sub_entry
1077 .cfi_endproc
1078 .size mul4x_internal,.-mul4x_internal
1079 .globl bn_power5
1080 .type bn_power5,@function
1081 .align 32
1082 bn_power5:
1083 .cfi_startproc
1084 movq %rsp,%rax
1085 .cfi_def_cfa_register %rax
1086 movl OPENSSL_ia32cap_P+8(%rip),%r11d
1087 andl $0x80108,%r11d
1088 cmpl $0x80108,%r11d
1089 je .Lpowerx5_enter
1090 pushq %rbx
1091 .cfi_offset %rbx,-16
1092 pushq %rbp
1093 .cfi_offset %rbp,-24
1094 pushq %r12
1095 .cfi_offset %r12,-32
1096 pushq %r13
1097 .cfi_offset %r13,-40
1098 pushq %r14
1099 .cfi_offset %r14,-48
1100 pushq %r15
1101 .cfi_offset %r15,-56
1102 .Lpower5_prologue:
1103
1104 shll $3,%r9d
1105 leal (%r9,%r9,2),%r10d
1106 negq %r9
1107 movq (%r8),%r8
1108
1109
1110
1111
1112
1113
1114
1115
1116 leaq -320(%rsp,%r9,2),%r11
1117 movq %rsp,%rbp
1118 subq %rdi,%r11
1119 andq $4095,%r11
1120 cmpq %r11,%r10
1121 jb .Lpwr_sp_alt
1122 subq %r11,%rbp
1123 leaq -320(%rbp,%r9,2),%rbp
1124 jmp .Lpwr_sp_done
1125
1126 .align 32
1127 .Lpwr_sp_alt:
1128 leaq 4096-320(,%r9,2),%r10
1129 leaq -320(%rbp,%r9,2),%rbp
1130 subq %r10,%r11
1131 movq $0,%r10
1132 cmovcq %r10,%r11
1133 subq %r11,%rbp
1134 .Lpwr_sp_done:
1135 andq $-64,%rbp
1136 movq %rsp,%r11
1137 subq %rbp,%r11
1138 andq $-4096,%r11
1139 leaq (%r11,%rbp,1),%rsp
1140 movq (%rsp),%r10
1141 cmpq %rbp,%rsp
1142 ja .Lpwr_page_walk
1143 jmp .Lpwr_page_walk_done
1144
1145 .Lpwr_page_walk:
1146 leaq -4096(%rsp),%rsp
1147 movq (%rsp),%r10
1148 cmpq %rbp,%rsp
1149 ja .Lpwr_page_walk
1150 .Lpwr_page_walk_done:
1151
1152 movq %r9,%r10
1153 negq %r9
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164 movq %r8,32(%rsp)
1165 movq %rax,40(%rsp)
1166 .cfi_escape 0x0f,0x05,0x77,0x28,0x06,0x23,0x08
1167 .Lpower5_body:
1168 .byte 102,72,15,110,207
1169 .byte 102,72,15,110,209
1170 .byte 102,73,15,110,218
1171 .byte 102,72,15,110,226
1172
1173 call __bn_sqr8x_internal
1174 call __bn_post4x_internal
1175 call __bn_sqr8x_internal
1176 call __bn_post4x_internal
1177 call __bn_sqr8x_internal
1178 call __bn_post4x_internal
1179 call __bn_sqr8x_internal
1180 call __bn_post4x_internal
1181 call __bn_sqr8x_internal
1182 call __bn_post4x_internal
1183
1184 .byte 102,72,15,126,209
1185 .byte 102,72,15,126,226
1186 movq %rsi,%rdi
1187 movq 40(%rsp),%rax
1188 leaq 32(%rsp),%r8
1189
1190 call mul4x_internal
1191
1192 movq 40(%rsp),%rsi
1193 .cfi_def_cfa %rsi,8
1194 movq $1,%rax
1195 movq -48(%rsi),%r15
1196 .cfi_restore %r15
1197 movq -40(%rsi),%r14
1198 .cfi_restore %r14
1199 movq -32(%rsi),%r13
1200 .cfi_restore %r13
1201 movq -24(%rsi),%r12
1202 .cfi_restore %r12
1203 movq -16(%rsi),%rbp
1204 .cfi_restore %rbp
1205 movq -8(%rsi),%rbx
1206 .cfi_restore %rbx
1207 leaq (%rsi),%rsp
1208 .cfi_def_cfa_register %rsp
1209 .Lpower5_epilogue:
1210 .byte 0xf3,0xc3
1211 .cfi_endproc
1212 .size bn_power5,.-bn_power5
1213
1214 .globl bn_sqr8x_internal
1215 .hidden bn_sqr8x_internal
1216 .type bn_sqr8x_internal,@function
1217 .align 32
1218 bn_sqr8x_internal:
1219 __bn_sqr8x_internal:
1220 .cfi_startproc
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294 leaq 32(%r10),%rbp
1295 leaq (%rsi,%r9,1),%rsi
1296
1297 movq %r9,%rcx
1298
1299
1300 movq -32(%rsi,%rbp,1),%r14
1301 leaq 48+8(%rsp,%r9,2),%rdi
1302 movq -24(%rsi,%rbp,1),%rax
1303 leaq -32(%rdi,%rbp,1),%rdi
1304 movq -16(%rsi,%rbp,1),%rbx
1305 movq %rax,%r15
1306
1307 mulq %r14
1308 movq %rax,%r10
1309 movq %rbx,%rax
1310 movq %rdx,%r11
1311 movq %r10,-24(%rdi,%rbp,1)
1312
1313 mulq %r14
1314 addq %rax,%r11
1315 movq %rbx,%rax
1316 adcq $0,%rdx
1317 movq %r11,-16(%rdi,%rbp,1)
1318 movq %rdx,%r10
1319
1320
1321 movq -8(%rsi,%rbp,1),%rbx
1322 mulq %r15
1323 movq %rax,%r12
1324 movq %rbx,%rax
1325 movq %rdx,%r13
1326
1327 leaq (%rbp),%rcx
1328 mulq %r14
1329 addq %rax,%r10
1330 movq %rbx,%rax
1331 movq %rdx,%r11
1332 adcq $0,%r11
1333 addq %r12,%r10
1334 adcq $0,%r11
1335 movq %r10,-8(%rdi,%rcx,1)
1336 jmp .Lsqr4x_1st
1337
1338 .align 32
1339 .Lsqr4x_1st:
1340 movq (%rsi,%rcx,1),%rbx
1341 mulq %r15
1342 addq %rax,%r13
1343 movq %rbx,%rax
1344 movq %rdx,%r12
1345 adcq $0,%r12
1346
1347 mulq %r14
1348 addq %rax,%r11
1349 movq %rbx,%rax
1350 movq 8(%rsi,%rcx,1),%rbx
1351 movq %rdx,%r10
1352 adcq $0,%r10
1353 addq %r13,%r11
1354 adcq $0,%r10
1355
1356
1357 mulq %r15
1358 addq %rax,%r12
1359 movq %rbx,%rax
1360 movq %r11,(%rdi,%rcx,1)
1361 movq %rdx,%r13
1362 adcq $0,%r13
1363
1364 mulq %r14
1365 addq %rax,%r10
1366 movq %rbx,%rax
1367 movq 16(%rsi,%rcx,1),%rbx
1368 movq %rdx,%r11
1369 adcq $0,%r11
1370 addq %r12,%r10
1371 adcq $0,%r11
1372
1373 mulq %r15
1374 addq %rax,%r13
1375 movq %rbx,%rax
1376 movq %r10,8(%rdi,%rcx,1)
1377 movq %rdx,%r12
1378 adcq $0,%r12
1379
1380 mulq %r14
1381 addq %rax,%r11
1382 movq %rbx,%rax
1383 movq 24(%rsi,%rcx,1),%rbx
1384 movq %rdx,%r10
1385 adcq $0,%r10
1386 addq %r13,%r11
1387 adcq $0,%r10
1388
1389
1390 mulq %r15
1391 addq %rax,%r12
1392 movq %rbx,%rax
1393 movq %r11,16(%rdi,%rcx,1)
1394 movq %rdx,%r13
1395 adcq $0,%r13
1396 leaq 32(%rcx),%rcx
1397
1398 mulq %r14
1399 addq %rax,%r10
1400 movq %rbx,%rax
1401 movq %rdx,%r11
1402 adcq $0,%r11
1403 addq %r12,%r10
1404 adcq $0,%r11
1405 movq %r10,-8(%rdi,%rcx,1)
1406
1407 cmpq $0,%rcx
1408 jne .Lsqr4x_1st
1409
1410 mulq %r15
1411 addq %rax,%r13
1412 leaq 16(%rbp),%rbp
1413 adcq $0,%rdx
1414 addq %r11,%r13
1415 adcq $0,%rdx
1416
1417 movq %r13,(%rdi)
1418 movq %rdx,%r12
1419 movq %rdx,8(%rdi)
1420 jmp .Lsqr4x_outer
1421
1422 .align 32
1423 .Lsqr4x_outer:
1424 movq -32(%rsi,%rbp,1),%r14
1425 leaq 48+8(%rsp,%r9,2),%rdi
1426 movq -24(%rsi,%rbp,1),%rax
1427 leaq -32(%rdi,%rbp,1),%rdi
1428 movq -16(%rsi,%rbp,1),%rbx
1429 movq %rax,%r15
1430
1431 mulq %r14
1432 movq -24(%rdi,%rbp,1),%r10
1433 addq %rax,%r10
1434 movq %rbx,%rax
1435 adcq $0,%rdx
1436 movq %r10,-24(%rdi,%rbp,1)
1437 movq %rdx,%r11
1438
1439 mulq %r14
1440 addq %rax,%r11
1441 movq %rbx,%rax
1442 adcq $0,%rdx
1443 addq -16(%rdi,%rbp,1),%r11
1444 movq %rdx,%r10
1445 adcq $0,%r10
1446 movq %r11,-16(%rdi,%rbp,1)
1447
1448 xorq %r12,%r12
1449
1450 movq -8(%rsi,%rbp,1),%rbx
1451 mulq %r15
1452 addq %rax,%r12
1453 movq %rbx,%rax
1454 adcq $0,%rdx
1455 addq -8(%rdi,%rbp,1),%r12
1456 movq %rdx,%r13
1457 adcq $0,%r13
1458
1459 mulq %r14
1460 addq %rax,%r10
1461 movq %rbx,%rax
1462 adcq $0,%rdx
1463 addq %r12,%r10
1464 movq %rdx,%r11
1465 adcq $0,%r11
1466 movq %r10,-8(%rdi,%rbp,1)
1467
1468 leaq (%rbp),%rcx
1469 jmp .Lsqr4x_inner
1470
1471 .align 32
1472 .Lsqr4x_inner:
1473 movq (%rsi,%rcx,1),%rbx
1474 mulq %r15
1475 addq %rax,%r13
1476 movq %rbx,%rax
1477 movq %rdx,%r12
1478 adcq $0,%r12
1479 addq (%rdi,%rcx,1),%r13
1480 adcq $0,%r12
1481
1482 .byte 0x67
1483 mulq %r14
1484 addq %rax,%r11
1485 movq %rbx,%rax
1486 movq 8(%rsi,%rcx,1),%rbx
1487 movq %rdx,%r10
1488 adcq $0,%r10
1489 addq %r13,%r11
1490 adcq $0,%r10
1491
1492 mulq %r15
1493 addq %rax,%r12
1494 movq %r11,(%rdi,%rcx,1)
1495 movq %rbx,%rax
1496 movq %rdx,%r13
1497 adcq $0,%r13
1498 addq 8(%rdi,%rcx,1),%r12
1499 leaq 16(%rcx),%rcx
1500 adcq $0,%r13
1501
1502 mulq %r14
1503 addq %rax,%r10
1504 movq %rbx,%rax
1505 adcq $0,%rdx
1506 addq %r12,%r10
1507 movq %rdx,%r11
1508 adcq $0,%r11
1509 movq %r10,-8(%rdi,%rcx,1)
1510
1511 cmpq $0,%rcx
1512 jne .Lsqr4x_inner
1513
1514 .byte 0x67
1515 mulq %r15
1516 addq %rax,%r13
1517 adcq $0,%rdx
1518 addq %r11,%r13
1519 adcq $0,%rdx
1520
1521 movq %r13,(%rdi)
1522 movq %rdx,%r12
1523 movq %rdx,8(%rdi)
1524
1525 addq $16,%rbp
1526 jnz .Lsqr4x_outer
1527
1528
1529 movq -32(%rsi),%r14
1530 leaq 48+8(%rsp,%r9,2),%rdi
1531 movq -24(%rsi),%rax
1532 leaq -32(%rdi,%rbp,1),%rdi
1533 movq -16(%rsi),%rbx
1534 movq %rax,%r15
1535
1536 mulq %r14
1537 addq %rax,%r10
1538 movq %rbx,%rax
1539 movq %rdx,%r11
1540 adcq $0,%r11
1541
1542 mulq %r14
1543 addq %rax,%r11
1544 movq %rbx,%rax
1545 movq %r10,-24(%rdi)
1546 movq %rdx,%r10
1547 adcq $0,%r10
1548 addq %r13,%r11
1549 movq -8(%rsi),%rbx
1550 adcq $0,%r10
1551
1552 mulq %r15
1553 addq %rax,%r12
1554 movq %rbx,%rax
1555 movq %r11,-16(%rdi)
1556 movq %rdx,%r13
1557 adcq $0,%r13
1558
1559 mulq %r14
1560 addq %rax,%r10
1561 movq %rbx,%rax
1562 movq %rdx,%r11
1563 adcq $0,%r11
1564 addq %r12,%r10
1565 adcq $0,%r11
1566 movq %r10,-8(%rdi)
1567
1568 mulq %r15
1569 addq %rax,%r13
1570 movq -16(%rsi),%rax
1571 adcq $0,%rdx
1572 addq %r11,%r13
1573 adcq $0,%rdx
1574
1575 movq %r13,(%rdi)
1576 movq %rdx,%r12
1577 movq %rdx,8(%rdi)
1578
1579 mulq %rbx
1580 addq $16,%rbp
1581 xorq %r14,%r14
1582 subq %r9,%rbp
1583 xorq %r15,%r15
1584
1585 addq %r12,%rax
1586 adcq $0,%rdx
1587 movq %rax,8(%rdi)
1588 movq %rdx,16(%rdi)
1589 movq %r15,24(%rdi)
1590
1591 movq -16(%rsi,%rbp,1),%rax
1592 leaq 48+8(%rsp),%rdi
1593 xorq %r10,%r10
1594 movq 8(%rdi),%r11
1595
1596 leaq (%r14,%r10,2),%r12
1597 shrq $63,%r10
1598 leaq (%rcx,%r11,2),%r13
1599 shrq $63,%r11
1600 orq %r10,%r13
1601 movq 16(%rdi),%r10
1602 movq %r11,%r14
1603 mulq %rax
1604 negq %r15
1605 movq 24(%rdi),%r11
1606 adcq %rax,%r12
1607 movq -8(%rsi,%rbp,1),%rax
1608 movq %r12,(%rdi)
1609 adcq %rdx,%r13
1610
1611 leaq (%r14,%r10,2),%rbx
1612 movq %r13,8(%rdi)
1613 sbbq %r15,%r15
1614 shrq $63,%r10
1615 leaq (%rcx,%r11,2),%r8
1616 shrq $63,%r11
1617 orq %r10,%r8
1618 movq 32(%rdi),%r10
1619 movq %r11,%r14
1620 mulq %rax
1621 negq %r15
1622 movq 40(%rdi),%r11
1623 adcq %rax,%rbx
1624 movq 0(%rsi,%rbp,1),%rax
1625 movq %rbx,16(%rdi)
1626 adcq %rdx,%r8
1627 leaq 16(%rbp),%rbp
1628 movq %r8,24(%rdi)
1629 sbbq %r15,%r15
1630 leaq 64(%rdi),%rdi
1631 jmp .Lsqr4x_shift_n_add
1632
1633 .align 32
1634 .Lsqr4x_shift_n_add:
1635 leaq (%r14,%r10,2),%r12
1636 shrq $63,%r10
1637 leaq (%rcx,%r11,2),%r13
1638 shrq $63,%r11
1639 orq %r10,%r13
1640 movq -16(%rdi),%r10
1641 movq %r11,%r14
1642 mulq %rax
1643 negq %r15
1644 movq -8(%rdi),%r11
1645 adcq %rax,%r12
1646 movq -8(%rsi,%rbp,1),%rax
1647 movq %r12,-32(%rdi)
1648 adcq %rdx,%r13
1649
1650 leaq (%r14,%r10,2),%rbx
1651 movq %r13,-24(%rdi)
1652 sbbq %r15,%r15
1653 shrq $63,%r10
1654 leaq (%rcx,%r11,2),%r8
1655 shrq $63,%r11
1656 orq %r10,%r8
1657 movq 0(%rdi),%r10
1658 movq %r11,%r14
1659 mulq %rax
1660 negq %r15
1661 movq 8(%rdi),%r11
1662 adcq %rax,%rbx
1663 movq 0(%rsi,%rbp,1),%rax
1664 movq %rbx,-16(%rdi)
1665 adcq %rdx,%r8
1666
1667 leaq (%r14,%r10,2),%r12
1668 movq %r8,-8(%rdi)
1669 sbbq %r15,%r15
1670 shrq $63,%r10
1671 leaq (%rcx,%r11,2),%r13
1672 shrq $63,%r11
1673 orq %r10,%r13
1674 movq 16(%rdi),%r10
1675 movq %r11,%r14
1676 mulq %rax
1677 negq %r15
1678 movq 24(%rdi),%r11
1679 adcq %rax,%r12
1680 movq 8(%rsi,%rbp,1),%rax
1681 movq %r12,0(%rdi)
1682 adcq %rdx,%r13
1683
1684 leaq (%r14,%r10,2),%rbx
1685 movq %r13,8(%rdi)
1686 sbbq %r15,%r15
1687 shrq $63,%r10
1688 leaq (%rcx,%r11,2),%r8
1689 shrq $63,%r11
1690 orq %r10,%r8
1691 movq 32(%rdi),%r10
1692 movq %r11,%r14
1693 mulq %rax
1694 negq %r15
1695 movq 40(%rdi),%r11
1696 adcq %rax,%rbx
1697 movq 16(%rsi,%rbp,1),%rax
1698 movq %rbx,16(%rdi)
1699 adcq %rdx,%r8
1700 movq %r8,24(%rdi)
1701 sbbq %r15,%r15
1702 leaq 64(%rdi),%rdi
1703 addq $32,%rbp
1704 jnz .Lsqr4x_shift_n_add
1705
1706 leaq (%r14,%r10,2),%r12
1707 .byte 0x67
1708 shrq $63,%r10
1709 leaq (%rcx,%r11,2),%r13
1710 shrq $63,%r11
1711 orq %r10,%r13
1712 movq -16(%rdi),%r10
1713 movq %r11,%r14
1714 mulq %rax
1715 negq %r15
1716 movq -8(%rdi),%r11
1717 adcq %rax,%r12
1718 movq -8(%rsi),%rax
1719 movq %r12,-32(%rdi)
1720 adcq %rdx,%r13
1721
1722 leaq (%r14,%r10,2),%rbx
1723 movq %r13,-24(%rdi)
1724 sbbq %r15,%r15
1725 shrq $63,%r10
1726 leaq (%rcx,%r11,2),%r8
1727 shrq $63,%r11
1728 orq %r10,%r8
1729 mulq %rax
1730 negq %r15
1731 adcq %rax,%rbx
1732 adcq %rdx,%r8
1733 movq %rbx,-16(%rdi)
1734 movq %r8,-8(%rdi)
1735 .byte 102,72,15,126,213
1736 __bn_sqr8x_reduction:
1737 xorq %rax,%rax
1738 leaq (%r9,%rbp,1),%rcx
1739 leaq 48+8(%rsp,%r9,2),%rdx
1740 movq %rcx,0+8(%rsp)
1741 leaq 48+8(%rsp,%r9,1),%rdi
1742 movq %rdx,8+8(%rsp)
1743 negq %r9
1744 jmp .L8x_reduction_loop
1745
1746 .align 32
1747 .L8x_reduction_loop:
1748 leaq (%rdi,%r9,1),%rdi
1749 .byte 0x66
1750 movq 0(%rdi),%rbx
1751 movq 8(%rdi),%r9
1752 movq 16(%rdi),%r10
1753 movq 24(%rdi),%r11
1754 movq 32(%rdi),%r12
1755 movq 40(%rdi),%r13
1756 movq 48(%rdi),%r14
1757 movq 56(%rdi),%r15
1758 movq %rax,(%rdx)
1759 leaq 64(%rdi),%rdi
1760
1761 .byte 0x67
1762 movq %rbx,%r8
1763 imulq 32+8(%rsp),%rbx
1764 movq 0(%rbp),%rax
1765 movl $8,%ecx
1766 jmp .L8x_reduce
1767
1768 .align 32
1769 .L8x_reduce:
1770 mulq %rbx
1771 movq 8(%rbp),%rax
1772 negq %r8
1773 movq %rdx,%r8
1774 adcq $0,%r8
1775
1776 mulq %rbx
1777 addq %rax,%r9
1778 movq 16(%rbp),%rax
1779 adcq $0,%rdx
1780 addq %r9,%r8
1781 movq %rbx,48-8+8(%rsp,%rcx,8)
1782 movq %rdx,%r9
1783 adcq $0,%r9
1784
1785 mulq %rbx
1786 addq %rax,%r10
1787 movq 24(%rbp),%rax
1788 adcq $0,%rdx
1789 addq %r10,%r9
1790 movq 32+8(%rsp),%rsi
1791 movq %rdx,%r10
1792 adcq $0,%r10
1793
1794 mulq %rbx
1795 addq %rax,%r11
1796 movq 32(%rbp),%rax
1797 adcq $0,%rdx
1798 imulq %r8,%rsi
1799 addq %r11,%r10
1800 movq %rdx,%r11
1801 adcq $0,%r11
1802
1803 mulq %rbx
1804 addq %rax,%r12
1805 movq 40(%rbp),%rax
1806 adcq $0,%rdx
1807 addq %r12,%r11
1808 movq %rdx,%r12
1809 adcq $0,%r12
1810
1811 mulq %rbx
1812 addq %rax,%r13
1813 movq 48(%rbp),%rax
1814 adcq $0,%rdx
1815 addq %r13,%r12
1816 movq %rdx,%r13
1817 adcq $0,%r13
1818
1819 mulq %rbx
1820 addq %rax,%r14
1821 movq 56(%rbp),%rax
1822 adcq $0,%rdx
1823 addq %r14,%r13
1824 movq %rdx,%r14
1825 adcq $0,%r14
1826
1827 mulq %rbx
1828 movq %rsi,%rbx
1829 addq %rax,%r15
1830 movq 0(%rbp),%rax
1831 adcq $0,%rdx
1832 addq %r15,%r14
1833 movq %rdx,%r15
1834 adcq $0,%r15
1835
1836 decl %ecx
1837 jnz .L8x_reduce
1838
1839 leaq 64(%rbp),%rbp
1840 xorq %rax,%rax
1841 movq 8+8(%rsp),%rdx
1842 cmpq 0+8(%rsp),%rbp
1843 jae .L8x_no_tail
1844
1845 .byte 0x66
1846 addq 0(%rdi),%r8
1847 adcq 8(%rdi),%r9
1848 adcq 16(%rdi),%r10
1849 adcq 24(%rdi),%r11
1850 adcq 32(%rdi),%r12
1851 adcq 40(%rdi),%r13
1852 adcq 48(%rdi),%r14
1853 adcq 56(%rdi),%r15
1854 sbbq %rsi,%rsi
1855
1856 movq 48+56+8(%rsp),%rbx
1857 movl $8,%ecx
1858 movq 0(%rbp),%rax
1859 jmp .L8x_tail
1860
1861 .align 32
1862 .L8x_tail:
1863 mulq %rbx
1864 addq %rax,%r8
1865 movq 8(%rbp),%rax
1866 movq %r8,(%rdi)
1867 movq %rdx,%r8
1868 adcq $0,%r8
1869
1870 mulq %rbx
1871 addq %rax,%r9
1872 movq 16(%rbp),%rax
1873 adcq $0,%rdx
1874 addq %r9,%r8
1875 leaq 8(%rdi),%rdi
1876 movq %rdx,%r9
1877 adcq $0,%r9
1878
1879 mulq %rbx
1880 addq %rax,%r10
1881 movq 24(%rbp),%rax
1882 adcq $0,%rdx
1883 addq %r10,%r9
1884 movq %rdx,%r10
1885 adcq $0,%r10
1886
1887 mulq %rbx
1888 addq %rax,%r11
1889 movq 32(%rbp),%rax
1890 adcq $0,%rdx
1891 addq %r11,%r10
1892 movq %rdx,%r11
1893 adcq $0,%r11
1894
1895 mulq %rbx
1896 addq %rax,%r12
1897 movq 40(%rbp),%rax
1898 adcq $0,%rdx
1899 addq %r12,%r11
1900 movq %rdx,%r12
1901 adcq $0,%r12
1902
1903 mulq %rbx
1904 addq %rax,%r13
1905 movq 48(%rbp),%rax
1906 adcq $0,%rdx
1907 addq %r13,%r12
1908 movq %rdx,%r13
1909 adcq $0,%r13
1910
1911 mulq %rbx
1912 addq %rax,%r14
1913 movq 56(%rbp),%rax
1914 adcq $0,%rdx
1915 addq %r14,%r13
1916 movq %rdx,%r14
1917 adcq $0,%r14
1918
1919 mulq %rbx
1920 movq 48-16+8(%rsp,%rcx,8),%rbx
1921 addq %rax,%r15
1922 adcq $0,%rdx
1923 addq %r15,%r14
1924 movq 0(%rbp),%rax
1925 movq %rdx,%r15
1926 adcq $0,%r15
1927
1928 decl %ecx
1929 jnz .L8x_tail
1930
1931 leaq 64(%rbp),%rbp
1932 movq 8+8(%rsp),%rdx
1933 cmpq 0+8(%rsp),%rbp
1934 jae .L8x_tail_done
1935
1936 movq 48+56+8(%rsp),%rbx
1937 negq %rsi
1938 movq 0(%rbp),%rax
1939 adcq 0(%rdi),%r8
1940 adcq 8(%rdi),%r9
1941 adcq 16(%rdi),%r10
1942 adcq 24(%rdi),%r11
1943 adcq 32(%rdi),%r12
1944 adcq 40(%rdi),%r13
1945 adcq 48(%rdi),%r14
1946 adcq 56(%rdi),%r15
1947 sbbq %rsi,%rsi
1948
1949 movl $8,%ecx
1950 jmp .L8x_tail
1951
1952 .align 32
1953 .L8x_tail_done:
1954 xorq %rax,%rax
1955 addq (%rdx),%r8
1956 adcq $0,%r9
1957 adcq $0,%r10
1958 adcq $0,%r11
1959 adcq $0,%r12
1960 adcq $0,%r13
1961 adcq $0,%r14
1962 adcq $0,%r15
1963 adcq $0,%rax
1964
1965 negq %rsi
1966 .L8x_no_tail:
1967 adcq 0(%rdi),%r8
1968 adcq 8(%rdi),%r9
1969 adcq 16(%rdi),%r10
1970 adcq 24(%rdi),%r11
1971 adcq 32(%rdi),%r12
1972 adcq 40(%rdi),%r13
1973 adcq 48(%rdi),%r14
1974 adcq 56(%rdi),%r15
1975 adcq $0,%rax
1976 movq -8(%rbp),%rcx
1977 xorq %rsi,%rsi
1978
1979 .byte 102,72,15,126,213
1980
1981 movq %r8,0(%rdi)
1982 movq %r9,8(%rdi)
1983 .byte 102,73,15,126,217
1984 movq %r10,16(%rdi)
1985 movq %r11,24(%rdi)
1986 movq %r12,32(%rdi)
1987 movq %r13,40(%rdi)
1988 movq %r14,48(%rdi)
1989 movq %r15,56(%rdi)
1990 leaq 64(%rdi),%rdi
1991
1992 cmpq %rdx,%rdi
1993 jb .L8x_reduction_loop
1994 .byte 0xf3,0xc3
1995 .cfi_endproc
1996 .size bn_sqr8x_internal,.-bn_sqr8x_internal
1997 .type __bn_post4x_internal,@function
1998 .align 32
1999 __bn_post4x_internal:
2000 .cfi_startproc
2001 movq 0(%rbp),%r12
2002 leaq (%rdi,%r9,1),%rbx
2003 movq %r9,%rcx
2004 .byte 102,72,15,126,207
2005 negq %rax
2006 .byte 102,72,15,126,206
2007 sarq $3+2,%rcx
2008 decq %r12
2009 xorq %r10,%r10
2010 movq 8(%rbp),%r13
2011 movq 16(%rbp),%r14
2012 movq 24(%rbp),%r15
2013 jmp .Lsqr4x_sub_entry
2014
2015 .align 16
2016 .Lsqr4x_sub:
2017 movq 0(%rbp),%r12
2018 movq 8(%rbp),%r13
2019 movq 16(%rbp),%r14
2020 movq 24(%rbp),%r15
2021 .Lsqr4x_sub_entry:
2022 leaq 32(%rbp),%rbp
2023 notq %r12
2024 notq %r13
2025 notq %r14
2026 notq %r15
2027 andq %rax,%r12
2028 andq %rax,%r13
2029 andq %rax,%r14
2030 andq %rax,%r15
2031
2032 negq %r10
2033 adcq 0(%rbx),%r12
2034 adcq 8(%rbx),%r13
2035 adcq 16(%rbx),%r14
2036 adcq 24(%rbx),%r15
2037 movq %r12,0(%rdi)
2038 leaq 32(%rbx),%rbx
2039 movq %r13,8(%rdi)
2040 sbbq %r10,%r10
2041 movq %r14,16(%rdi)
2042 movq %r15,24(%rdi)
2043 leaq 32(%rdi),%rdi
2044
2045 incq %rcx
2046 jnz .Lsqr4x_sub
2047
2048 movq %r9,%r10
2049 negq %r9
2050 .byte 0xf3,0xc3
2051 .cfi_endproc
2052 .size __bn_post4x_internal,.-__bn_post4x_internal
2053 .type bn_mulx4x_mont_gather5,@function
2054 .align 32
2055 bn_mulx4x_mont_gather5:
2056 .cfi_startproc
2057 movq %rsp,%rax
2058 .cfi_def_cfa_register %rax
2059 .Lmulx4x_enter:
2060 pushq %rbx
2061 .cfi_offset %rbx,-16
2062 pushq %rbp
2063 .cfi_offset %rbp,-24
2064 pushq %r12
2065 .cfi_offset %r12,-32
2066 pushq %r13
2067 .cfi_offset %r13,-40
2068 pushq %r14
2069 .cfi_offset %r14,-48
2070 pushq %r15
2071 .cfi_offset %r15,-56
2072 .Lmulx4x_prologue:
2073
2074 shll $3,%r9d
2075 leaq (%r9,%r9,2),%r10
2076 negq %r9
2077 movq (%r8),%r8
2078
2079
2080
2081
2082
2083
2084
2085
2086
2087
2088 leaq -320(%rsp,%r9,2),%r11
2089 movq %rsp,%rbp
2090 subq %rdi,%r11
2091 andq $4095,%r11
2092 cmpq %r11,%r10
2093 jb .Lmulx4xsp_alt
2094 subq %r11,%rbp
2095 leaq -320(%rbp,%r9,2),%rbp
2096 jmp .Lmulx4xsp_done
2097
2098 .Lmulx4xsp_alt:
2099 leaq 4096-320(,%r9,2),%r10
2100 leaq -320(%rbp,%r9,2),%rbp
2101 subq %r10,%r11
2102 movq $0,%r10
2103 cmovcq %r10,%r11
2104 subq %r11,%rbp
2105 .Lmulx4xsp_done:
2106 andq $-64,%rbp
2107 movq %rsp,%r11
2108 subq %rbp,%r11
2109 andq $-4096,%r11
2110 leaq (%r11,%rbp,1),%rsp
2111 movq (%rsp),%r10
2112 cmpq %rbp,%rsp
2113 ja .Lmulx4x_page_walk
2114 jmp .Lmulx4x_page_walk_done
2115
2116 .Lmulx4x_page_walk:
2117 leaq -4096(%rsp),%rsp
2118 movq (%rsp),%r10
2119 cmpq %rbp,%rsp
2120 ja .Lmulx4x_page_walk
2121 .Lmulx4x_page_walk_done:
2122
2123
2124
2125
2126
2127
2128
2129
2130
2131
2132
2133
2134
2135 movq %r8,32(%rsp)
2136 movq %rax,40(%rsp)
2137 .cfi_escape 0x0f,0x05,0x77,0x28,0x06,0x23,0x08
2138 .Lmulx4x_body:
2139 call mulx4x_internal
2140
2141 movq 40(%rsp),%rsi
2142 .cfi_def_cfa %rsi,8
2143 movq $1,%rax
2144
2145 movq -48(%rsi),%r15
2146 .cfi_restore %r15
2147 movq -40(%rsi),%r14
2148 .cfi_restore %r14
2149 movq -32(%rsi),%r13
2150 .cfi_restore %r13
2151 movq -24(%rsi),%r12
2152 .cfi_restore %r12
2153 movq -16(%rsi),%rbp
2154 .cfi_restore %rbp
2155 movq -8(%rsi),%rbx
2156 .cfi_restore %rbx
2157 leaq (%rsi),%rsp
2158 .cfi_def_cfa_register %rsp
2159 .Lmulx4x_epilogue:
2160 .byte 0xf3,0xc3
2161 .cfi_endproc
2162 .size bn_mulx4x_mont_gather5,.-bn_mulx4x_mont_gather5
2163
2164 .type mulx4x_internal,@function
2165 .align 32
2166 mulx4x_internal:
2167 .cfi_startproc
2168 movq %r9,8(%rsp)
2169 movq %r9,%r10
2170 negq %r9
2171 shlq $5,%r9
2172 negq %r10
2173 leaq 128(%rdx,%r9,1),%r13
2174 shrq $5+5,%r9
2175 movd 8(%rax),%xmm5
2176 subq $1,%r9
2177 leaq .Linc(%rip),%rax
2178 movq %r13,16+8(%rsp)
2179 movq %r9,24+8(%rsp)
2180 movq %rdi,56+8(%rsp)
2181 movdqa 0(%rax),%xmm0
2182 movdqa 16(%rax),%xmm1
2183 leaq 88-112(%rsp,%r10,1),%r10
2184 leaq 128(%rdx),%rdi
2185
2186 pshufd $0,%xmm5,%xmm5
2187 movdqa %xmm1,%xmm4
2188 .byte 0x67
2189 movdqa %xmm1,%xmm2
2190 .byte 0x67
2191 paddd %xmm0,%xmm1
2192 pcmpeqd %xmm5,%xmm0
2193 movdqa %xmm4,%xmm3
2194 paddd %xmm1,%xmm2
2195 pcmpeqd %xmm5,%xmm1
2196 movdqa %xmm0,112(%r10)
2197 movdqa %xmm4,%xmm0
2198
2199 paddd %xmm2,%xmm3
2200 pcmpeqd %xmm5,%xmm2
2201 movdqa %xmm1,128(%r10)
2202 movdqa %xmm4,%xmm1
2203
2204 paddd %xmm3,%xmm0
2205 pcmpeqd %xmm5,%xmm3
2206 movdqa %xmm2,144(%r10)
2207 movdqa %xmm4,%xmm2
2208
2209 paddd %xmm0,%xmm1
2210 pcmpeqd %xmm5,%xmm0
2211 movdqa %xmm3,160(%r10)
2212 movdqa %xmm4,%xmm3
2213 paddd %xmm1,%xmm2
2214 pcmpeqd %xmm5,%xmm1
2215 movdqa %xmm0,176(%r10)
2216 movdqa %xmm4,%xmm0
2217
2218 paddd %xmm2,%xmm3
2219 pcmpeqd %xmm5,%xmm2
2220 movdqa %xmm1,192(%r10)
2221 movdqa %xmm4,%xmm1
2222
2223 paddd %xmm3,%xmm0
2224 pcmpeqd %xmm5,%xmm3
2225 movdqa %xmm2,208(%r10)
2226 movdqa %xmm4,%xmm2
2227
2228 paddd %xmm0,%xmm1
2229 pcmpeqd %xmm5,%xmm0
2230 movdqa %xmm3,224(%r10)
2231 movdqa %xmm4,%xmm3
2232 paddd %xmm1,%xmm2
2233 pcmpeqd %xmm5,%xmm1
2234 movdqa %xmm0,240(%r10)
2235 movdqa %xmm4,%xmm0
2236
2237 paddd %xmm2,%xmm3
2238 pcmpeqd %xmm5,%xmm2
2239 movdqa %xmm1,256(%r10)
2240 movdqa %xmm4,%xmm1
2241
2242 paddd %xmm3,%xmm0
2243 pcmpeqd %xmm5,%xmm3
2244 movdqa %xmm2,272(%r10)
2245 movdqa %xmm4,%xmm2
2246
2247 paddd %xmm0,%xmm1
2248 pcmpeqd %xmm5,%xmm0
2249 movdqa %xmm3,288(%r10)
2250 movdqa %xmm4,%xmm3
2251 .byte 0x67
2252 paddd %xmm1,%xmm2
2253 pcmpeqd %xmm5,%xmm1
2254 movdqa %xmm0,304(%r10)
2255
2256 paddd %xmm2,%xmm3
2257 pcmpeqd %xmm5,%xmm2
2258 movdqa %xmm1,320(%r10)
2259
2260 pcmpeqd %xmm5,%xmm3
2261 movdqa %xmm2,336(%r10)
2262
2263 pand 64(%rdi),%xmm0
2264 pand 80(%rdi),%xmm1
2265 pand 96(%rdi),%xmm2
2266 movdqa %xmm3,352(%r10)
2267 pand 112(%rdi),%xmm3
2268 por %xmm2,%xmm0
2269 por %xmm3,%xmm1
2270 movdqa -128(%rdi),%xmm4
2271 movdqa -112(%rdi),%xmm5
2272 movdqa -96(%rdi),%xmm2
2273 pand 112(%r10),%xmm4
2274 movdqa -80(%rdi),%xmm3
2275 pand 128(%r10),%xmm5
2276 por %xmm4,%xmm0
2277 pand 144(%r10),%xmm2
2278 por %xmm5,%xmm1
2279 pand 160(%r10),%xmm3
2280 por %xmm2,%xmm0
2281 por %xmm3,%xmm1
2282 movdqa -64(%rdi),%xmm4
2283 movdqa -48(%rdi),%xmm5
2284 movdqa -32(%rdi),%xmm2
2285 pand 176(%r10),%xmm4
2286 movdqa -16(%rdi),%xmm3
2287 pand 192(%r10),%xmm5
2288 por %xmm4,%xmm0
2289 pand 208(%r10),%xmm2
2290 por %xmm5,%xmm1
2291 pand 224(%r10),%xmm3
2292 por %xmm2,%xmm0
2293 por %xmm3,%xmm1
2294 movdqa 0(%rdi),%xmm4
2295 movdqa 16(%rdi),%xmm5
2296 movdqa 32(%rdi),%xmm2
2297 pand 240(%r10),%xmm4
2298 movdqa 48(%rdi),%xmm3
2299 pand 256(%r10),%xmm5
2300 por %xmm4,%xmm0
2301 pand 272(%r10),%xmm2
2302 por %xmm5,%xmm1
2303 pand 288(%r10),%xmm3
2304 por %xmm2,%xmm0
2305 por %xmm3,%xmm1
2306 pxor %xmm1,%xmm0
2307 pshufd $0x4e,%xmm0,%xmm1
2308 por %xmm1,%xmm0
2309 leaq 256(%rdi),%rdi
2310 .byte 102,72,15,126,194
2311 leaq 64+32+8(%rsp),%rbx
2312
2313 movq %rdx,%r9
2314 mulxq 0(%rsi),%r8,%rax
2315 mulxq 8(%rsi),%r11,%r12
2316 addq %rax,%r11
2317 mulxq 16(%rsi),%rax,%r13
2318 adcq %rax,%r12
2319 adcq $0,%r13
2320 mulxq 24(%rsi),%rax,%r14
2321
2322 movq %r8,%r15
2323 imulq 32+8(%rsp),%r8
2324 xorq %rbp,%rbp
2325 movq %r8,%rdx
2326
2327 movq %rdi,8+8(%rsp)
2328
2329 leaq 32(%rsi),%rsi
2330 adcxq %rax,%r13
2331 adcxq %rbp,%r14
2332
2333 mulxq 0(%rcx),%rax,%r10
2334 adcxq %rax,%r15
2335 adoxq %r11,%r10
2336 mulxq 8(%rcx),%rax,%r11
2337 adcxq %rax,%r10
2338 adoxq %r12,%r11
2339 mulxq 16(%rcx),%rax,%r12
2340 movq 24+8(%rsp),%rdi
2341 movq %r10,-32(%rbx)
2342 adcxq %rax,%r11
2343 adoxq %r13,%r12
2344 mulxq 24(%rcx),%rax,%r15
2345 movq %r9,%rdx
2346 movq %r11,-24(%rbx)
2347 adcxq %rax,%r12
2348 adoxq %rbp,%r15
2349 leaq 32(%rcx),%rcx
2350 movq %r12,-16(%rbx)
2351 jmp .Lmulx4x_1st
2352
2353 .align 32
2354 .Lmulx4x_1st:
2355 adcxq %rbp,%r15
2356 mulxq 0(%rsi),%r10,%rax
2357 adcxq %r14,%r10
2358 mulxq 8(%rsi),%r11,%r14
2359 adcxq %rax,%r11
2360 mulxq 16(%rsi),%r12,%rax
2361 adcxq %r14,%r12
2362 mulxq 24(%rsi),%r13,%r14
2363 .byte 0x67,0x67
2364 movq %r8,%rdx
2365 adcxq %rax,%r13
2366 adcxq %rbp,%r14
2367 leaq 32(%rsi),%rsi
2368 leaq 32(%rbx),%rbx
2369
2370 adoxq %r15,%r10
2371 mulxq 0(%rcx),%rax,%r15
2372 adcxq %rax,%r10
2373 adoxq %r15,%r11
2374 mulxq 8(%rcx),%rax,%r15
2375 adcxq %rax,%r11
2376 adoxq %r15,%r12
2377 mulxq 16(%rcx),%rax,%r15
2378 movq %r10,-40(%rbx)
2379 adcxq %rax,%r12
2380 movq %r11,-32(%rbx)
2381 adoxq %r15,%r13
2382 mulxq 24(%rcx),%rax,%r15
2383 movq %r9,%rdx
2384 movq %r12,-24(%rbx)
2385 adcxq %rax,%r13
2386 adoxq %rbp,%r15
2387 leaq 32(%rcx),%rcx
2388 movq %r13,-16(%rbx)
2389
2390 decq %rdi
2391 jnz .Lmulx4x_1st
2392
2393 movq 8(%rsp),%rax
2394 adcq %rbp,%r15
2395 leaq (%rsi,%rax,1),%rsi
2396 addq %r15,%r14
2397 movq 8+8(%rsp),%rdi
2398 adcq %rbp,%rbp
2399 movq %r14,-8(%rbx)
2400 jmp .Lmulx4x_outer
2401
2402 .align 32
2403 .Lmulx4x_outer:
2404 leaq 16-256(%rbx),%r10
2405 pxor %xmm4,%xmm4
2406 .byte 0x67,0x67
2407 pxor %xmm5,%xmm5
2408 movdqa -128(%rdi),%xmm0
2409 movdqa -112(%rdi),%xmm1
2410 movdqa -96(%rdi),%xmm2
2411 pand 256(%r10),%xmm0
2412 movdqa -80(%rdi),%xmm3
2413 pand 272(%r10),%xmm1
2414 por %xmm0,%xmm4
2415 pand 288(%r10),%xmm2
2416 por %xmm1,%xmm5
2417 pand 304(%r10),%xmm3
2418 por %xmm2,%xmm4
2419 por %xmm3,%xmm5
2420 movdqa -64(%rdi),%xmm0
2421 movdqa -48(%rdi),%xmm1
2422 movdqa -32(%rdi),%xmm2
2423 pand 320(%r10),%xmm0
2424 movdqa -16(%rdi),%xmm3
2425 pand 336(%r10),%xmm1
2426 por %xmm0,%xmm4
2427 pand 352(%r10),%xmm2
2428 por %xmm1,%xmm5
2429 pand 368(%r10),%xmm3
2430 por %xmm2,%xmm4
2431 por %xmm3,%xmm5
2432 movdqa 0(%rdi),%xmm0
2433 movdqa 16(%rdi),%xmm1
2434 movdqa 32(%rdi),%xmm2
2435 pand 384(%r10),%xmm0
2436 movdqa 48(%rdi),%xmm3
2437 pand 400(%r10),%xmm1
2438 por %xmm0,%xmm4
2439 pand 416(%r10),%xmm2
2440 por %xmm1,%xmm5
2441 pand 432(%r10),%xmm3
2442 por %xmm2,%xmm4
2443 por %xmm3,%xmm5
2444 movdqa 64(%rdi),%xmm0
2445 movdqa 80(%rdi),%xmm1
2446 movdqa 96(%rdi),%xmm2
2447 pand 448(%r10),%xmm0
2448 movdqa 112(%rdi),%xmm3
2449 pand 464(%r10),%xmm1
2450 por %xmm0,%xmm4
2451 pand 480(%r10),%xmm2
2452 por %xmm1,%xmm5
2453 pand 496(%r10),%xmm3
2454 por %xmm2,%xmm4
2455 por %xmm3,%xmm5
2456 por %xmm5,%xmm4
2457 pshufd $0x4e,%xmm4,%xmm0
2458 por %xmm4,%xmm0
2459 leaq 256(%rdi),%rdi
2460 .byte 102,72,15,126,194
2461
2462 movq %rbp,(%rbx)
2463 leaq 32(%rbx,%rax,1),%rbx
2464 mulxq 0(%rsi),%r8,%r11
2465 xorq %rbp,%rbp
2466 movq %rdx,%r9
2467 mulxq 8(%rsi),%r14,%r12
2468 adoxq -32(%rbx),%r8
2469 adcxq %r14,%r11
2470 mulxq 16(%rsi),%r15,%r13
2471 adoxq -24(%rbx),%r11
2472 adcxq %r15,%r12
2473 mulxq 24(%rsi),%rdx,%r14
2474 adoxq -16(%rbx),%r12
2475 adcxq %rdx,%r13
2476 leaq (%rcx,%rax,1),%rcx
2477 leaq 32(%rsi),%rsi
2478 adoxq -8(%rbx),%r13
2479 adcxq %rbp,%r14
2480 adoxq %rbp,%r14
2481
2482 movq %r8,%r15
2483 imulq 32+8(%rsp),%r8
2484
2485 movq %r8,%rdx
2486 xorq %rbp,%rbp
2487 movq %rdi,8+8(%rsp)
2488
2489 mulxq 0(%rcx),%rax,%r10
2490 adcxq %rax,%r15
2491 adoxq %r11,%r10
2492 mulxq 8(%rcx),%rax,%r11
2493 adcxq %rax,%r10
2494 adoxq %r12,%r11
2495 mulxq 16(%rcx),%rax,%r12
2496 adcxq %rax,%r11
2497 adoxq %r13,%r12
2498 mulxq 24(%rcx),%rax,%r15
2499 movq %r9,%rdx
2500 movq 24+8(%rsp),%rdi
2501 movq %r10,-32(%rbx)
2502 adcxq %rax,%r12
2503 movq %r11,-24(%rbx)
2504 adoxq %rbp,%r15
2505 movq %r12,-16(%rbx)
2506 leaq 32(%rcx),%rcx
2507 jmp .Lmulx4x_inner
2508
2509 .align 32
2510 .Lmulx4x_inner:
2511 mulxq 0(%rsi),%r10,%rax
2512 adcxq %rbp,%r15
2513 adoxq %r14,%r10
2514 mulxq 8(%rsi),%r11,%r14
2515 adcxq 0(%rbx),%r10
2516 adoxq %rax,%r11
2517 mulxq 16(%rsi),%r12,%rax
2518 adcxq 8(%rbx),%r11
2519 adoxq %r14,%r12
2520 mulxq 24(%rsi),%r13,%r14
2521 movq %r8,%rdx
2522 adcxq 16(%rbx),%r12
2523 adoxq %rax,%r13
2524 adcxq 24(%rbx),%r13
2525 adoxq %rbp,%r14
2526 leaq 32(%rsi),%rsi
2527 leaq 32(%rbx),%rbx
2528 adcxq %rbp,%r14
2529
2530 adoxq %r15,%r10
2531 mulxq 0(%rcx),%rax,%r15
2532 adcxq %rax,%r10
2533 adoxq %r15,%r11
2534 mulxq 8(%rcx),%rax,%r15
2535 adcxq %rax,%r11
2536 adoxq %r15,%r12
2537 mulxq 16(%rcx),%rax,%r15
2538 movq %r10,-40(%rbx)
2539 adcxq %rax,%r12
2540 adoxq %r15,%r13
2541 movq %r11,-32(%rbx)
2542 mulxq 24(%rcx),%rax,%r15
2543 movq %r9,%rdx
2544 leaq 32(%rcx),%rcx
2545 movq %r12,-24(%rbx)
2546 adcxq %rax,%r13
2547 adoxq %rbp,%r15
2548 movq %r13,-16(%rbx)
2549
2550 decq %rdi
2551 jnz .Lmulx4x_inner
2552
2553 movq 0+8(%rsp),%rax
2554 adcq %rbp,%r15
2555 subq 0(%rbx),%rdi
2556 movq 8+8(%rsp),%rdi
2557 movq 16+8(%rsp),%r10
2558 adcq %r15,%r14
2559 leaq (%rsi,%rax,1),%rsi
2560 adcq %rbp,%rbp
2561 movq %r14,-8(%rbx)
2562
2563 cmpq %r10,%rdi
2564 jb .Lmulx4x_outer
2565
2566 movq -8(%rcx),%r10
2567 movq %rbp,%r8
2568 movq (%rcx,%rax,1),%r12
2569 leaq (%rcx,%rax,1),%rbp
2570 movq %rax,%rcx
2571 leaq (%rbx,%rax,1),%rdi
2572 xorl %eax,%eax
2573 xorq %r15,%r15
2574 subq %r14,%r10
2575 adcq %r15,%r15
2576 orq %r15,%r8
2577 sarq $3+2,%rcx
2578 subq %r8,%rax
2579 movq 56+8(%rsp),%rdx
2580 decq %r12
2581 movq 8(%rbp),%r13
2582 xorq %r8,%r8
2583 movq 16(%rbp),%r14
2584 movq 24(%rbp),%r15
2585 jmp .Lsqrx4x_sub_entry
2586 .cfi_endproc
2587 .size mulx4x_internal,.-mulx4x_internal
2588 .type bn_powerx5,@function
2589 .align 32
2590 bn_powerx5:
2591 .cfi_startproc
2592 movq %rsp,%rax
2593 .cfi_def_cfa_register %rax
2594 .Lpowerx5_enter:
2595 pushq %rbx
2596 .cfi_offset %rbx,-16
2597 pushq %rbp
2598 .cfi_offset %rbp,-24
2599 pushq %r12
2600 .cfi_offset %r12,-32
2601 pushq %r13
2602 .cfi_offset %r13,-40
2603 pushq %r14
2604 .cfi_offset %r14,-48
2605 pushq %r15
2606 .cfi_offset %r15,-56
2607 .Lpowerx5_prologue:
2608
2609 shll $3,%r9d
2610 leaq (%r9,%r9,2),%r10
2611 negq %r9
2612 movq (%r8),%r8
2613
2614
2615
2616
2617
2618
2619
2620
2621 leaq -320(%rsp,%r9,2),%r11
2622 movq %rsp,%rbp
2623 subq %rdi,%r11
2624 andq $4095,%r11
2625 cmpq %r11,%r10
2626 jb .Lpwrx_sp_alt
2627 subq %r11,%rbp
2628 leaq -320(%rbp,%r9,2),%rbp
2629 jmp .Lpwrx_sp_done
2630
2631 .align 32
2632 .Lpwrx_sp_alt:
2633 leaq 4096-320(,%r9,2),%r10
2634 leaq -320(%rbp,%r9,2),%rbp
2635 subq %r10,%r11
2636 movq $0,%r10
2637 cmovcq %r10,%r11
2638 subq %r11,%rbp
2639 .Lpwrx_sp_done:
2640 andq $-64,%rbp
2641 movq %rsp,%r11
2642 subq %rbp,%r11
2643 andq $-4096,%r11
2644 leaq (%r11,%rbp,1),%rsp
2645 movq (%rsp),%r10
2646 cmpq %rbp,%rsp
2647 ja .Lpwrx_page_walk
2648 jmp .Lpwrx_page_walk_done
2649
2650 .Lpwrx_page_walk:
2651 leaq -4096(%rsp),%rsp
2652 movq (%rsp),%r10
2653 cmpq %rbp,%rsp
2654 ja .Lpwrx_page_walk
2655 .Lpwrx_page_walk_done:
2656
2657 movq %r9,%r10
2658 negq %r9
2659
2660
2661
2662
2663
2664
2665
2666
2667
2668
2669
2670
2671 pxor %xmm0,%xmm0
2672 .byte 102,72,15,110,207
2673 .byte 102,72,15,110,209
2674 .byte 102,73,15,110,218
2675 .byte 102,72,15,110,226
2676 movq %r8,32(%rsp)
2677 movq %rax,40(%rsp)
2678 .cfi_escape 0x0f,0x05,0x77,0x28,0x06,0x23,0x08
2679 .Lpowerx5_body:
2680
2681 call __bn_sqrx8x_internal
2682 call __bn_postx4x_internal
2683 call __bn_sqrx8x_internal
2684 call __bn_postx4x_internal
2685 call __bn_sqrx8x_internal
2686 call __bn_postx4x_internal
2687 call __bn_sqrx8x_internal
2688 call __bn_postx4x_internal
2689 call __bn_sqrx8x_internal
2690 call __bn_postx4x_internal
2691
2692 movq %r10,%r9
2693 movq %rsi,%rdi
2694 .byte 102,72,15,126,209
2695 .byte 102,72,15,126,226
2696 movq 40(%rsp),%rax
2697
2698 call mulx4x_internal
2699
2700 movq 40(%rsp),%rsi
2701 .cfi_def_cfa %rsi,8
2702 movq $1,%rax
2703
2704 movq -48(%rsi),%r15
2705 .cfi_restore %r15
2706 movq -40(%rsi),%r14
2707 .cfi_restore %r14
2708 movq -32(%rsi),%r13
2709 .cfi_restore %r13
2710 movq -24(%rsi),%r12
2711 .cfi_restore %r12
2712 movq -16(%rsi),%rbp
2713 .cfi_restore %rbp
2714 movq -8(%rsi),%rbx
2715 .cfi_restore %rbx
2716 leaq (%rsi),%rsp
2717 .cfi_def_cfa_register %rsp
2718 .Lpowerx5_epilogue:
2719 .byte 0xf3,0xc3
2720 .cfi_endproc
2721 .size bn_powerx5,.-bn_powerx5
2722
2723 .globl bn_sqrx8x_internal
2724 .hidden bn_sqrx8x_internal
2725 .type bn_sqrx8x_internal,@function
2726 .align 32
2727 bn_sqrx8x_internal:
2728 __bn_sqrx8x_internal:
2729 .cfi_startproc
2730
2731
2732
2733
2734
2735
2736
2737
2738
2739
2740
2741
2742
2743
2744
2745
2746
2747
2748
2749
2750
2751
2752
2753
2754
2755
2756
2757
2758
2759
2760
2761
2762
2763
2764
2765
2766
2767
2768
2769
2770 leaq 48+8(%rsp),%rdi
2771 leaq (%rsi,%r9,1),%rbp
2772 movq %r9,0+8(%rsp)
2773 movq %rbp,8+8(%rsp)
2774 jmp .Lsqr8x_zero_start
2775
2776 .align 32
2777 .byte 0x66,0x66,0x66,0x2e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00
2778 .Lsqrx8x_zero:
2779 .byte 0x3e
2780 movdqa %xmm0,0(%rdi)
2781 movdqa %xmm0,16(%rdi)
2782 movdqa %xmm0,32(%rdi)
2783 movdqa %xmm0,48(%rdi)
2784 .Lsqr8x_zero_start:
2785 movdqa %xmm0,64(%rdi)
2786 movdqa %xmm0,80(%rdi)
2787 movdqa %xmm0,96(%rdi)
2788 movdqa %xmm0,112(%rdi)
2789 leaq 128(%rdi),%rdi
2790 subq $64,%r9
2791 jnz .Lsqrx8x_zero
2792
2793 movq 0(%rsi),%rdx
2794
2795 xorq %r10,%r10
2796 xorq %r11,%r11
2797 xorq %r12,%r12
2798 xorq %r13,%r13
2799 xorq %r14,%r14
2800 xorq %r15,%r15
2801 leaq 48+8(%rsp),%rdi
2802 xorq %rbp,%rbp
2803 jmp .Lsqrx8x_outer_loop
2804
2805 .align 32
2806 .Lsqrx8x_outer_loop:
2807 mulxq 8(%rsi),%r8,%rax
2808 adcxq %r9,%r8
2809 adoxq %rax,%r10
2810 mulxq 16(%rsi),%r9,%rax
2811 adcxq %r10,%r9
2812 adoxq %rax,%r11
2813 .byte 0xc4,0xe2,0xab,0xf6,0x86,0x18,0x00,0x00,0x00
2814 adcxq %r11,%r10
2815 adoxq %rax,%r12
2816 .byte 0xc4,0xe2,0xa3,0xf6,0x86,0x20,0x00,0x00,0x00
2817 adcxq %r12,%r11
2818 adoxq %rax,%r13
2819 mulxq 40(%rsi),%r12,%rax
2820 adcxq %r13,%r12
2821 adoxq %rax,%r14
2822 mulxq 48(%rsi),%r13,%rax
2823 adcxq %r14,%r13
2824 adoxq %r15,%rax
2825 mulxq 56(%rsi),%r14,%r15
2826 movq 8(%rsi),%rdx
2827 adcxq %rax,%r14
2828 adoxq %rbp,%r15
2829 adcq 64(%rdi),%r15
2830 movq %r8,8(%rdi)
2831 movq %r9,16(%rdi)
2832 sbbq %rcx,%rcx
2833 xorq %rbp,%rbp
2834
2835
2836 mulxq 16(%rsi),%r8,%rbx
2837 mulxq 24(%rsi),%r9,%rax
2838 adcxq %r10,%r8
2839 adoxq %rbx,%r9
2840 mulxq 32(%rsi),%r10,%rbx
2841 adcxq %r11,%r9
2842 adoxq %rax,%r10
2843 .byte 0xc4,0xe2,0xa3,0xf6,0x86,0x28,0x00,0x00,0x00
2844 adcxq %r12,%r10
2845 adoxq %rbx,%r11
2846 .byte 0xc4,0xe2,0x9b,0xf6,0x9e,0x30,0x00,0x00,0x00
2847 adcxq %r13,%r11
2848 adoxq %r14,%r12
2849 .byte 0xc4,0x62,0x93,0xf6,0xb6,0x38,0x00,0x00,0x00
2850 movq 16(%rsi),%rdx
2851 adcxq %rax,%r12
2852 adoxq %rbx,%r13
2853 adcxq %r15,%r13
2854 adoxq %rbp,%r14
2855 adcxq %rbp,%r14
2856
2857 movq %r8,24(%rdi)
2858 movq %r9,32(%rdi)
2859
2860 mulxq 24(%rsi),%r8,%rbx
2861 mulxq 32(%rsi),%r9,%rax
2862 adcxq %r10,%r8
2863 adoxq %rbx,%r9
2864 mulxq 40(%rsi),%r10,%rbx
2865 adcxq %r11,%r9
2866 adoxq %rax,%r10
2867 .byte 0xc4,0xe2,0xa3,0xf6,0x86,0x30,0x00,0x00,0x00
2868 adcxq %r12,%r10
2869 adoxq %r13,%r11
2870 .byte 0xc4,0x62,0x9b,0xf6,0xae,0x38,0x00,0x00,0x00
2871 .byte 0x3e
2872 movq 24(%rsi),%rdx
2873 adcxq %rbx,%r11
2874 adoxq %rax,%r12
2875 adcxq %r14,%r12
2876 movq %r8,40(%rdi)
2877 movq %r9,48(%rdi)
2878 mulxq 32(%rsi),%r8,%rax
2879 adoxq %rbp,%r13
2880 adcxq %rbp,%r13
2881
2882 mulxq 40(%rsi),%r9,%rbx
2883 adcxq %r10,%r8
2884 adoxq %rax,%r9
2885 mulxq 48(%rsi),%r10,%rax
2886 adcxq %r11,%r9
2887 adoxq %r12,%r10
2888 mulxq 56(%rsi),%r11,%r12
2889 movq 32(%rsi),%rdx
2890 movq 40(%rsi),%r14
2891 adcxq %rbx,%r10
2892 adoxq %rax,%r11
2893 movq 48(%rsi),%r15
2894 adcxq %r13,%r11
2895 adoxq %rbp,%r12
2896 adcxq %rbp,%r12
2897
2898 movq %r8,56(%rdi)
2899 movq %r9,64(%rdi)
2900
2901 mulxq %r14,%r9,%rax
2902 movq 56(%rsi),%r8
2903 adcxq %r10,%r9
2904 mulxq %r15,%r10,%rbx
2905 adoxq %rax,%r10
2906 adcxq %r11,%r10
2907 mulxq %r8,%r11,%rax
2908 movq %r14,%rdx
2909 adoxq %rbx,%r11
2910 adcxq %r12,%r11
2911
2912 adcxq %rbp,%rax
2913
2914 mulxq %r15,%r14,%rbx
2915 mulxq %r8,%r12,%r13
2916 movq %r15,%rdx
2917 leaq 64(%rsi),%rsi
2918 adcxq %r14,%r11
2919 adoxq %rbx,%r12
2920 adcxq %rax,%r12
2921 adoxq %rbp,%r13
2922
2923 .byte 0x67,0x67
2924 mulxq %r8,%r8,%r14
2925 adcxq %r8,%r13
2926 adcxq %rbp,%r14
2927
2928 cmpq 8+8(%rsp),%rsi
2929 je .Lsqrx8x_outer_break
2930
2931 negq %rcx
2932 movq $-8,%rcx
2933 movq %rbp,%r15
2934 movq 64(%rdi),%r8
2935 adcxq 72(%rdi),%r9
2936 adcxq 80(%rdi),%r10
2937 adcxq 88(%rdi),%r11
2938 adcq 96(%rdi),%r12
2939 adcq 104(%rdi),%r13
2940 adcq 112(%rdi),%r14
2941 adcq 120(%rdi),%r15
2942 leaq (%rsi),%rbp
2943 leaq 128(%rdi),%rdi
2944 sbbq %rax,%rax
2945
2946 movq -64(%rsi),%rdx
2947 movq %rax,16+8(%rsp)
2948 movq %rdi,24+8(%rsp)
2949
2950
2951 xorl %eax,%eax
2952 jmp .Lsqrx8x_loop
2953
2954 .align 32
2955 .Lsqrx8x_loop:
2956 movq %r8,%rbx
2957 mulxq 0(%rbp),%rax,%r8
2958 adcxq %rax,%rbx
2959 adoxq %r9,%r8
2960
2961 mulxq 8(%rbp),%rax,%r9
2962 adcxq %rax,%r8
2963 adoxq %r10,%r9
2964
2965 mulxq 16(%rbp),%rax,%r10
2966 adcxq %rax,%r9
2967 adoxq %r11,%r10
2968
2969 mulxq 24(%rbp),%rax,%r11
2970 adcxq %rax,%r10
2971 adoxq %r12,%r11
2972
2973 .byte 0xc4,0x62,0xfb,0xf6,0xa5,0x20,0x00,0x00,0x00
2974 adcxq %rax,%r11
2975 adoxq %r13,%r12
2976
2977 mulxq 40(%rbp),%rax,%r13
2978 adcxq %rax,%r12
2979 adoxq %r14,%r13
2980
2981 mulxq 48(%rbp),%rax,%r14
2982 movq %rbx,(%rdi,%rcx,8)
2983 movl $0,%ebx
2984 adcxq %rax,%r13
2985 adoxq %r15,%r14
2986
2987 .byte 0xc4,0x62,0xfb,0xf6,0xbd,0x38,0x00,0x00,0x00
2988 movq 8(%rsi,%rcx,8),%rdx
2989 adcxq %rax,%r14
2990 adoxq %rbx,%r15
2991 adcxq %rbx,%r15
2992
2993 .byte 0x67
2994 incq %rcx
2995 jnz .Lsqrx8x_loop
2996
2997 leaq 64(%rbp),%rbp
2998 movq $-8,%rcx
2999 cmpq 8+8(%rsp),%rbp
3000 je .Lsqrx8x_break
3001
3002 subq 16+8(%rsp),%rbx
3003 .byte 0x66
3004 movq -64(%rsi),%rdx
3005 adcxq 0(%rdi),%r8
3006 adcxq 8(%rdi),%r9
3007 adcq 16(%rdi),%r10
3008 adcq 24(%rdi),%r11
3009 adcq 32(%rdi),%r12
3010 adcq 40(%rdi),%r13
3011 adcq 48(%rdi),%r14
3012 adcq 56(%rdi),%r15
3013 leaq 64(%rdi),%rdi
3014 .byte 0x67
3015 sbbq %rax,%rax
3016 xorl %ebx,%ebx
3017 movq %rax,16+8(%rsp)
3018 jmp .Lsqrx8x_loop
3019
3020 .align 32
3021 .Lsqrx8x_break:
3022 xorq %rbp,%rbp
3023 subq 16+8(%rsp),%rbx
3024 adcxq %rbp,%r8
3025 movq 24+8(%rsp),%rcx
3026 adcxq %rbp,%r9
3027 movq 0(%rsi),%rdx
3028 adcq $0,%r10
3029 movq %r8,0(%rdi)
3030 adcq $0,%r11
3031 adcq $0,%r12
3032 adcq $0,%r13
3033 adcq $0,%r14
3034 adcq $0,%r15
3035 cmpq %rcx,%rdi
3036 je .Lsqrx8x_outer_loop
3037
3038 movq %r9,8(%rdi)
3039 movq 8(%rcx),%r9
3040 movq %r10,16(%rdi)
3041 movq 16(%rcx),%r10
3042 movq %r11,24(%rdi)
3043 movq 24(%rcx),%r11
3044 movq %r12,32(%rdi)
3045 movq 32(%rcx),%r12
3046 movq %r13,40(%rdi)
3047 movq 40(%rcx),%r13
3048 movq %r14,48(%rdi)
3049 movq 48(%rcx),%r14
3050 movq %r15,56(%rdi)
3051 movq 56(%rcx),%r15
3052 movq %rcx,%rdi
3053 jmp .Lsqrx8x_outer_loop
3054
3055 .align 32
3056 .Lsqrx8x_outer_break:
3057 movq %r9,72(%rdi)
3058 .byte 102,72,15,126,217
3059 movq %r10,80(%rdi)
3060 movq %r11,88(%rdi)
3061 movq %r12,96(%rdi)
3062 movq %r13,104(%rdi)
3063 movq %r14,112(%rdi)
3064 leaq 48+8(%rsp),%rdi
3065 movq (%rsi,%rcx,1),%rdx
3066
3067 movq 8(%rdi),%r11
3068 xorq %r10,%r10
3069 movq 0+8(%rsp),%r9
3070 adoxq %r11,%r11
3071 movq 16(%rdi),%r12
3072 movq 24(%rdi),%r13
3073
3074
3075 .align 32
3076 .Lsqrx4x_shift_n_add:
3077 mulxq %rdx,%rax,%rbx
3078 adoxq %r12,%r12
3079 adcxq %r10,%rax
3080 .byte 0x48,0x8b,0x94,0x0e,0x08,0x00,0x00,0x00
3081 .byte 0x4c,0x8b,0x97,0x20,0x00,0x00,0x00
3082 adoxq %r13,%r13
3083 adcxq %r11,%rbx
3084 movq 40(%rdi),%r11
3085 movq %rax,0(%rdi)
3086 movq %rbx,8(%rdi)
3087
3088 mulxq %rdx,%rax,%rbx
3089 adoxq %r10,%r10
3090 adcxq %r12,%rax
3091 movq 16(%rsi,%rcx,1),%rdx
3092 movq 48(%rdi),%r12
3093 adoxq %r11,%r11
3094 adcxq %r13,%rbx
3095 movq 56(%rdi),%r13
3096 movq %rax,16(%rdi)
3097 movq %rbx,24(%rdi)
3098
3099 mulxq %rdx,%rax,%rbx
3100 adoxq %r12,%r12
3101 adcxq %r10,%rax
3102 movq 24(%rsi,%rcx,1),%rdx
3103 leaq 32(%rcx),%rcx
3104 movq 64(%rdi),%r10
3105 adoxq %r13,%r13
3106 adcxq %r11,%rbx
3107 movq 72(%rdi),%r11
3108 movq %rax,32(%rdi)
3109 movq %rbx,40(%rdi)
3110
3111 mulxq %rdx,%rax,%rbx
3112 adoxq %r10,%r10
3113 adcxq %r12,%rax
3114 jrcxz .Lsqrx4x_shift_n_add_break
3115 .byte 0x48,0x8b,0x94,0x0e,0x00,0x00,0x00,0x00
3116 adoxq %r11,%r11
3117 adcxq %r13,%rbx
3118 movq 80(%rdi),%r12
3119 movq 88(%rdi),%r13
3120 movq %rax,48(%rdi)
3121 movq %rbx,56(%rdi)
3122 leaq 64(%rdi),%rdi
3123 nop
3124 jmp .Lsqrx4x_shift_n_add
3125
3126 .align 32
3127 .Lsqrx4x_shift_n_add_break:
3128 adcxq %r13,%rbx
3129 movq %rax,48(%rdi)
3130 movq %rbx,56(%rdi)
3131 leaq 64(%rdi),%rdi
3132 .byte 102,72,15,126,213
3133 __bn_sqrx8x_reduction:
3134 xorl %eax,%eax
3135 movq 32+8(%rsp),%rbx
3136 movq 48+8(%rsp),%rdx
3137 leaq -64(%rbp,%r9,1),%rcx
3138
3139 movq %rcx,0+8(%rsp)
3140 movq %rdi,8+8(%rsp)
3141
3142 leaq 48+8(%rsp),%rdi
3143 jmp .Lsqrx8x_reduction_loop
3144
3145 .align 32
3146 .Lsqrx8x_reduction_loop:
3147 movq 8(%rdi),%r9
3148 movq 16(%rdi),%r10
3149 movq 24(%rdi),%r11
3150 movq 32(%rdi),%r12
3151 movq %rdx,%r8
3152 imulq %rbx,%rdx
3153 movq 40(%rdi),%r13
3154 movq 48(%rdi),%r14
3155 movq 56(%rdi),%r15
3156 movq %rax,24+8(%rsp)
3157
3158 leaq 64(%rdi),%rdi
3159 xorq %rsi,%rsi
3160 movq $-8,%rcx
3161 jmp .Lsqrx8x_reduce
3162
3163 .align 32
3164 .Lsqrx8x_reduce:
3165 movq %r8,%rbx
3166 mulxq 0(%rbp),%rax,%r8
3167 adcxq %rbx,%rax
3168 adoxq %r9,%r8
3169
3170 mulxq 8(%rbp),%rbx,%r9
3171 adcxq %rbx,%r8
3172 adoxq %r10,%r9
3173
3174 mulxq 16(%rbp),%rbx,%r10
3175 adcxq %rbx,%r9
3176 adoxq %r11,%r10
3177
3178 mulxq 24(%rbp),%rbx,%r11
3179 adcxq %rbx,%r10
3180 adoxq %r12,%r11
3181
3182 .byte 0xc4,0x62,0xe3,0xf6,0xa5,0x20,0x00,0x00,0x00
3183 movq %rdx,%rax
3184 movq %r8,%rdx
3185 adcxq %rbx,%r11
3186 adoxq %r13,%r12
3187
3188 mulxq 32+8(%rsp),%rbx,%rdx
3189 movq %rax,%rdx
3190 movq %rax,64+48+8(%rsp,%rcx,8)
3191
3192 mulxq 40(%rbp),%rax,%r13
3193 adcxq %rax,%r12
3194 adoxq %r14,%r13
3195
3196 mulxq 48(%rbp),%rax,%r14
3197 adcxq %rax,%r13
3198 adoxq %r15,%r14
3199
3200 mulxq 56(%rbp),%rax,%r15
3201 movq %rbx,%rdx
3202 adcxq %rax,%r14
3203 adoxq %rsi,%r15
3204 adcxq %rsi,%r15
3205
3206 .byte 0x67,0x67,0x67
3207 incq %rcx
3208 jnz .Lsqrx8x_reduce
3209
3210 movq %rsi,%rax
3211 cmpq 0+8(%rsp),%rbp
3212 jae .Lsqrx8x_no_tail
3213
3214 movq 48+8(%rsp),%rdx
3215 addq 0(%rdi),%r8
3216 leaq 64(%rbp),%rbp
3217 movq $-8,%rcx
3218 adcxq 8(%rdi),%r9
3219 adcxq 16(%rdi),%r10
3220 adcq 24(%rdi),%r11
3221 adcq 32(%rdi),%r12
3222 adcq 40(%rdi),%r13
3223 adcq 48(%rdi),%r14
3224 adcq 56(%rdi),%r15
3225 leaq 64(%rdi),%rdi
3226 sbbq %rax,%rax
3227
3228 xorq %rsi,%rsi
3229 movq %rax,16+8(%rsp)
3230 jmp .Lsqrx8x_tail
3231
3232 .align 32
3233 .Lsqrx8x_tail:
3234 movq %r8,%rbx
3235 mulxq 0(%rbp),%rax,%r8
3236 adcxq %rax,%rbx
3237 adoxq %r9,%r8
3238
3239 mulxq 8(%rbp),%rax,%r9
3240 adcxq %rax,%r8
3241 adoxq %r10,%r9
3242
3243 mulxq 16(%rbp),%rax,%r10
3244 adcxq %rax,%r9
3245 adoxq %r11,%r10
3246
3247 mulxq 24(%rbp),%rax,%r11
3248 adcxq %rax,%r10
3249 adoxq %r12,%r11
3250
3251 .byte 0xc4,0x62,0xfb,0xf6,0xa5,0x20,0x00,0x00,0x00
3252 adcxq %rax,%r11
3253 adoxq %r13,%r12
3254
3255 mulxq 40(%rbp),%rax,%r13
3256 adcxq %rax,%r12
3257 adoxq %r14,%r13
3258
3259 mulxq 48(%rbp),%rax,%r14
3260 adcxq %rax,%r13
3261 adoxq %r15,%r14
3262
3263 mulxq 56(%rbp),%rax,%r15
3264 movq 72+48+8(%rsp,%rcx,8),%rdx
3265 adcxq %rax,%r14
3266 adoxq %rsi,%r15
3267 movq %rbx,(%rdi,%rcx,8)
3268 movq %r8,%rbx
3269 adcxq %rsi,%r15
3270
3271 incq %rcx
3272 jnz .Lsqrx8x_tail
3273
3274 cmpq 0+8(%rsp),%rbp
3275 jae .Lsqrx8x_tail_done
3276
3277 subq 16+8(%rsp),%rsi
3278 movq 48+8(%rsp),%rdx
3279 leaq 64(%rbp),%rbp
3280 adcq 0(%rdi),%r8
3281 adcq 8(%rdi),%r9
3282 adcq 16(%rdi),%r10
3283 adcq 24(%rdi),%r11
3284 adcq 32(%rdi),%r12
3285 adcq 40(%rdi),%r13
3286 adcq 48(%rdi),%r14
3287 adcq 56(%rdi),%r15
3288 leaq 64(%rdi),%rdi
3289 sbbq %rax,%rax
3290 subq $8,%rcx
3291
3292 xorq %rsi,%rsi
3293 movq %rax,16+8(%rsp)
3294 jmp .Lsqrx8x_tail
3295
3296 .align 32
3297 .Lsqrx8x_tail_done:
3298 xorq %rax,%rax
3299 addq 24+8(%rsp),%r8
3300 adcq $0,%r9
3301 adcq $0,%r10
3302 adcq $0,%r11
3303 adcq $0,%r12
3304 adcq $0,%r13
3305 adcq $0,%r14
3306 adcq $0,%r15
3307 adcq $0,%rax
3308
3309 subq 16+8(%rsp),%rsi
3310 .Lsqrx8x_no_tail:
3311 adcq 0(%rdi),%r8
3312 .byte 102,72,15,126,217
3313 adcq 8(%rdi),%r9
3314 movq 56(%rbp),%rsi
3315 .byte 102,72,15,126,213
3316 adcq 16(%rdi),%r10
3317 adcq 24(%rdi),%r11
3318 adcq 32(%rdi),%r12
3319 adcq 40(%rdi),%r13
3320 adcq 48(%rdi),%r14
3321 adcq 56(%rdi),%r15
3322 adcq $0,%rax
3323
3324 movq 32+8(%rsp),%rbx
3325 movq 64(%rdi,%rcx,1),%rdx
3326
3327 movq %r8,0(%rdi)
3328 leaq 64(%rdi),%r8
3329 movq %r9,8(%rdi)
3330 movq %r10,16(%rdi)
3331 movq %r11,24(%rdi)
3332 movq %r12,32(%rdi)
3333 movq %r13,40(%rdi)
3334 movq %r14,48(%rdi)
3335 movq %r15,56(%rdi)
3336
3337 leaq 64(%rdi,%rcx,1),%rdi
3338 cmpq 8+8(%rsp),%r8
3339 jb .Lsqrx8x_reduction_loop
3340 .byte 0xf3,0xc3
3341 .cfi_endproc
3342 .size bn_sqrx8x_internal,.-bn_sqrx8x_internal
3343 .align 32
3344 __bn_postx4x_internal:
3345 .cfi_startproc
3346 movq 0(%rbp),%r12
3347 movq %rcx,%r10
3348 movq %rcx,%r9
3349 negq %rax
3350 sarq $3+2,%rcx
3351
3352 .byte 102,72,15,126,202
3353 .byte 102,72,15,126,206
3354 decq %r12
3355 movq 8(%rbp),%r13
3356 xorq %r8,%r8
3357 movq 16(%rbp),%r14
3358 movq 24(%rbp),%r15
3359 jmp .Lsqrx4x_sub_entry
3360
3361 .align 16
3362 .Lsqrx4x_sub:
3363 movq 0(%rbp),%r12
3364 movq 8(%rbp),%r13
3365 movq 16(%rbp),%r14
3366 movq 24(%rbp),%r15
3367 .Lsqrx4x_sub_entry:
3368 andnq %rax,%r12,%r12
3369 leaq 32(%rbp),%rbp
3370 andnq %rax,%r13,%r13
3371 andnq %rax,%r14,%r14
3372 andnq %rax,%r15,%r15
3373
3374 negq %r8
3375 adcq 0(%rdi),%r12
3376 adcq 8(%rdi),%r13
3377 adcq 16(%rdi),%r14
3378 adcq 24(%rdi),%r15
3379 movq %r12,0(%rdx)
3380 leaq 32(%rdi),%rdi
3381 movq %r13,8(%rdx)
3382 sbbq %r8,%r8
3383 movq %r14,16(%rdx)
3384 movq %r15,24(%rdx)
3385 leaq 32(%rdx),%rdx
3386
3387 incq %rcx
3388 jnz .Lsqrx4x_sub
3389
3390 negq %r9
3391
3392 .byte 0xf3,0xc3
3393 .cfi_endproc
3394 .size __bn_postx4x_internal,.-__bn_postx4x_internal
3395 .globl bn_get_bits5
3396 .type bn_get_bits5,@function
3397 .align 16
3398 bn_get_bits5:
3399 .cfi_startproc
3400 leaq 0(%rdi),%r10
3401 leaq 1(%rdi),%r11
3402 movl %esi,%ecx
3403 shrl $4,%esi
3404 andl $15,%ecx
3405 leal -8(%rcx),%eax
3406 cmpl $11,%ecx
3407 cmovaq %r11,%r10
3408 cmoval %eax,%ecx
3409 movzwl (%r10,%rsi,2),%eax
3410 shrl %cl,%eax
3411 andl $31,%eax
3412 .byte 0xf3,0xc3
3413 .cfi_endproc
3414 .size bn_get_bits5,.-bn_get_bits5
3415
3416 .globl bn_scatter5
3417 .type bn_scatter5,@function
3418 .align 16
3419 bn_scatter5:
3420 .cfi_startproc
3421 cmpl $0,%esi
3422 jz .Lscatter_epilogue
3423 leaq (%rdx,%rcx,8),%rdx
3424 .Lscatter:
3425 movq (%rdi),%rax
3426 leaq 8(%rdi),%rdi
3427 movq %rax,(%rdx)
3428 leaq 256(%rdx),%rdx
3429 subl $1,%esi
3430 jnz .Lscatter
3431 .Lscatter_epilogue:
3432 .byte 0xf3,0xc3
3433 .cfi_endproc
3434 .size bn_scatter5,.-bn_scatter5
3435
3436 .globl bn_gather5
3437 .type bn_gather5,@function
3438 .align 32
3439 bn_gather5:
3440 .LSEH_begin_bn_gather5:
3441 .cfi_startproc
3442
3443 .byte 0x4c,0x8d,0x14,0x24
3444 .byte 0x48,0x81,0xec,0x08,0x01,0x00,0x00
3445 leaq .Linc(%rip),%rax
3446 andq $-16,%rsp
3447
3448 movd %ecx,%xmm5
3449 movdqa 0(%rax),%xmm0
3450 movdqa 16(%rax),%xmm1
3451 leaq 128(%rdx),%r11
3452 leaq 128(%rsp),%rax
3453
3454 pshufd $0,%xmm5,%xmm5
3455 movdqa %xmm1,%xmm4
3456 movdqa %xmm1,%xmm2
3457 paddd %xmm0,%xmm1
3458 pcmpeqd %xmm5,%xmm0
3459 movdqa %xmm4,%xmm3
3460
3461 paddd %xmm1,%xmm2
3462 pcmpeqd %xmm5,%xmm1
3463 movdqa %xmm0,-128(%rax)
3464 movdqa %xmm4,%xmm0
3465
3466 paddd %xmm2,%xmm3
3467 pcmpeqd %xmm5,%xmm2
3468 movdqa %xmm1,-112(%rax)
3469 movdqa %xmm4,%xmm1
3470
3471 paddd %xmm3,%xmm0
3472 pcmpeqd %xmm5,%xmm3
3473 movdqa %xmm2,-96(%rax)
3474 movdqa %xmm4,%xmm2
3475 paddd %xmm0,%xmm1
3476 pcmpeqd %xmm5,%xmm0
3477 movdqa %xmm3,-80(%rax)
3478 movdqa %xmm4,%xmm3
3479
3480 paddd %xmm1,%xmm2
3481 pcmpeqd %xmm5,%xmm1
3482 movdqa %xmm0,-64(%rax)
3483 movdqa %xmm4,%xmm0
3484
3485 paddd %xmm2,%xmm3
3486 pcmpeqd %xmm5,%xmm2
3487 movdqa %xmm1,-48(%rax)
3488 movdqa %xmm4,%xmm1
3489
3490 paddd %xmm3,%xmm0
3491 pcmpeqd %xmm5,%xmm3
3492 movdqa %xmm2,-32(%rax)
3493 movdqa %xmm4,%xmm2
3494 paddd %xmm0,%xmm1
3495 pcmpeqd %xmm5,%xmm0
3496 movdqa %xmm3,-16(%rax)
3497 movdqa %xmm4,%xmm3
3498
3499 paddd %xmm1,%xmm2
3500 pcmpeqd %xmm5,%xmm1
3501 movdqa %xmm0,0(%rax)
3502 movdqa %xmm4,%xmm0
3503
3504 paddd %xmm2,%xmm3
3505 pcmpeqd %xmm5,%xmm2
3506 movdqa %xmm1,16(%rax)
3507 movdqa %xmm4,%xmm1
3508
3509 paddd %xmm3,%xmm0
3510 pcmpeqd %xmm5,%xmm3
3511 movdqa %xmm2,32(%rax)
3512 movdqa %xmm4,%xmm2
3513 paddd %xmm0,%xmm1
3514 pcmpeqd %xmm5,%xmm0
3515 movdqa %xmm3,48(%rax)
3516 movdqa %xmm4,%xmm3
3517
3518 paddd %xmm1,%xmm2
3519 pcmpeqd %xmm5,%xmm1
3520 movdqa %xmm0,64(%rax)
3521 movdqa %xmm4,%xmm0
3522
3523 paddd %xmm2,%xmm3
3524 pcmpeqd %xmm5,%xmm2
3525 movdqa %xmm1,80(%rax)
3526 movdqa %xmm4,%xmm1
3527
3528 paddd %xmm3,%xmm0
3529 pcmpeqd %xmm5,%xmm3
3530 movdqa %xmm2,96(%rax)
3531 movdqa %xmm4,%xmm2
3532 movdqa %xmm3,112(%rax)
3533 jmp .Lgather
3534
3535 .align 32
3536 .Lgather:
3537 pxor %xmm4,%xmm4
3538 pxor %xmm5,%xmm5
3539 movdqa -128(%r11),%xmm0
3540 movdqa -112(%r11),%xmm1
3541 movdqa -96(%r11),%xmm2
3542 pand -128(%rax),%xmm0
3543 movdqa -80(%r11),%xmm3
3544 pand -112(%rax),%xmm1
3545 por %xmm0,%xmm4
3546 pand -96(%rax),%xmm2
3547 por %xmm1,%xmm5
3548 pand -80(%rax),%xmm3
3549 por %xmm2,%xmm4
3550 por %xmm3,%xmm5
3551 movdqa -64(%r11),%xmm0
3552 movdqa -48(%r11),%xmm1
3553 movdqa -32(%r11),%xmm2
3554 pand -64(%rax),%xmm0
3555 movdqa -16(%r11),%xmm3
3556 pand -48(%rax),%xmm1
3557 por %xmm0,%xmm4
3558 pand -32(%rax),%xmm2
3559 por %xmm1,%xmm5
3560 pand -16(%rax),%xmm3
3561 por %xmm2,%xmm4
3562 por %xmm3,%xmm5
3563 movdqa 0(%r11),%xmm0
3564 movdqa 16(%r11),%xmm1
3565 movdqa 32(%r11),%xmm2
3566 pand 0(%rax),%xmm0
3567 movdqa 48(%r11),%xmm3
3568 pand 16(%rax),%xmm1
3569 por %xmm0,%xmm4
3570 pand 32(%rax),%xmm2
3571 por %xmm1,%xmm5
3572 pand 48(%rax),%xmm3
3573 por %xmm2,%xmm4
3574 por %xmm3,%xmm5
3575 movdqa 64(%r11),%xmm0
3576 movdqa 80(%r11),%xmm1
3577 movdqa 96(%r11),%xmm2
3578 pand 64(%rax),%xmm0
3579 movdqa 112(%r11),%xmm3
3580 pand 80(%rax),%xmm1
3581 por %xmm0,%xmm4
3582 pand 96(%rax),%xmm2
3583 por %xmm1,%xmm5
3584 pand 112(%rax),%xmm3
3585 por %xmm2,%xmm4
3586 por %xmm3,%xmm5
3587 por %xmm5,%xmm4
3588 leaq 256(%r11),%r11
3589 pshufd $0x4e,%xmm4,%xmm0
3590 por %xmm4,%xmm0
3591 movq %xmm0,(%rdi)
3592 leaq 8(%rdi),%rdi
3593 subl $1,%esi
3594 jnz .Lgather
3595
3596 leaq (%r10),%rsp
3597 .byte 0xf3,0xc3
3598 .LSEH_end_bn_gather5:
3599 .cfi_endproc
3600 .size bn_gather5,.-bn_gather5
3601 .align 64
3602 .Linc:
3603 .long 0,0, 1,1
3604 .long 2,2, 2,2
3605 .byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,119,105,116,104,32,115,99,97,116,116,101,114,47,103,97,116,104,101,114,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
Cache object: 13973611771309e2f13c90dd70c9d98c
|