1 /* $FreeBSD$ */
2 /* Do not modify. This file is auto-generated from chacha-x86.pl. */
3 #ifdef PIC
4 .text
5 .globl ChaCha20_ctr32
6 .type ChaCha20_ctr32,@function
7 .align 16
8 ChaCha20_ctr32:
9 .L_ChaCha20_ctr32_begin:
10 pushl %ebp
11 pushl %ebx
12 pushl %esi
13 pushl %edi
14 xorl %eax,%eax
15 cmpl 28(%esp),%eax
16 je .L000no_data
17 call .Lpic_point
18 .Lpic_point:
19 popl %eax
20 leal OPENSSL_ia32cap_P-.Lpic_point(%eax),%ebp
21 testl $16777216,(%ebp)
22 jz .L001x86
23 testl $512,4(%ebp)
24 jz .L001x86
25 jmp .Lssse3_shortcut
26 .L001x86:
27 movl 32(%esp),%esi
28 movl 36(%esp),%edi
29 subl $132,%esp
30 movl (%esi),%eax
31 movl 4(%esi),%ebx
32 movl 8(%esi),%ecx
33 movl 12(%esi),%edx
34 movl %eax,80(%esp)
35 movl %ebx,84(%esp)
36 movl %ecx,88(%esp)
37 movl %edx,92(%esp)
38 movl 16(%esi),%eax
39 movl 20(%esi),%ebx
40 movl 24(%esi),%ecx
41 movl 28(%esi),%edx
42 movl %eax,96(%esp)
43 movl %ebx,100(%esp)
44 movl %ecx,104(%esp)
45 movl %edx,108(%esp)
46 movl (%edi),%eax
47 movl 4(%edi),%ebx
48 movl 8(%edi),%ecx
49 movl 12(%edi),%edx
50 subl $1,%eax
51 movl %eax,112(%esp)
52 movl %ebx,116(%esp)
53 movl %ecx,120(%esp)
54 movl %edx,124(%esp)
55 jmp .L002entry
56 .align 16
57 .L003outer_loop:
58 movl %ebx,156(%esp)
59 movl %eax,152(%esp)
60 movl %ecx,160(%esp)
61 .L002entry:
62 movl $1634760805,%eax
63 movl $857760878,4(%esp)
64 movl $2036477234,8(%esp)
65 movl $1797285236,12(%esp)
66 movl 84(%esp),%ebx
67 movl 88(%esp),%ebp
68 movl 104(%esp),%ecx
69 movl 108(%esp),%esi
70 movl 116(%esp),%edx
71 movl 120(%esp),%edi
72 movl %ebx,20(%esp)
73 movl %ebp,24(%esp)
74 movl %ecx,40(%esp)
75 movl %esi,44(%esp)
76 movl %edx,52(%esp)
77 movl %edi,56(%esp)
78 movl 92(%esp),%ebx
79 movl 124(%esp),%edi
80 movl 112(%esp),%edx
81 movl 80(%esp),%ebp
82 movl 96(%esp),%ecx
83 movl 100(%esp),%esi
84 addl $1,%edx
85 movl %ebx,28(%esp)
86 movl %edi,60(%esp)
87 movl %edx,112(%esp)
88 movl $10,%ebx
89 jmp .L004loop
90 .align 16
91 .L004loop:
92 addl %ebp,%eax
93 movl %ebx,128(%esp)
94 movl %ebp,%ebx
95 xorl %eax,%edx
96 roll $16,%edx
97 addl %edx,%ecx
98 xorl %ecx,%ebx
99 movl 52(%esp),%edi
100 roll $12,%ebx
101 movl 20(%esp),%ebp
102 addl %ebx,%eax
103 xorl %eax,%edx
104 movl %eax,(%esp)
105 roll $8,%edx
106 movl 4(%esp),%eax
107 addl %edx,%ecx
108 movl %edx,48(%esp)
109 xorl %ecx,%ebx
110 addl %ebp,%eax
111 roll $7,%ebx
112 xorl %eax,%edi
113 movl %ecx,32(%esp)
114 roll $16,%edi
115 movl %ebx,16(%esp)
116 addl %edi,%esi
117 movl 40(%esp),%ecx
118 xorl %esi,%ebp
119 movl 56(%esp),%edx
120 roll $12,%ebp
121 movl 24(%esp),%ebx
122 addl %ebp,%eax
123 xorl %eax,%edi
124 movl %eax,4(%esp)
125 roll $8,%edi
126 movl 8(%esp),%eax
127 addl %edi,%esi
128 movl %edi,52(%esp)
129 xorl %esi,%ebp
130 addl %ebx,%eax
131 roll $7,%ebp
132 xorl %eax,%edx
133 movl %esi,36(%esp)
134 roll $16,%edx
135 movl %ebp,20(%esp)
136 addl %edx,%ecx
137 movl 44(%esp),%esi
138 xorl %ecx,%ebx
139 movl 60(%esp),%edi
140 roll $12,%ebx
141 movl 28(%esp),%ebp
142 addl %ebx,%eax
143 xorl %eax,%edx
144 movl %eax,8(%esp)
145 roll $8,%edx
146 movl 12(%esp),%eax
147 addl %edx,%ecx
148 movl %edx,56(%esp)
149 xorl %ecx,%ebx
150 addl %ebp,%eax
151 roll $7,%ebx
152 xorl %eax,%edi
153 roll $16,%edi
154 movl %ebx,24(%esp)
155 addl %edi,%esi
156 xorl %esi,%ebp
157 roll $12,%ebp
158 movl 20(%esp),%ebx
159 addl %ebp,%eax
160 xorl %eax,%edi
161 movl %eax,12(%esp)
162 roll $8,%edi
163 movl (%esp),%eax
164 addl %edi,%esi
165 movl %edi,%edx
166 xorl %esi,%ebp
167 addl %ebx,%eax
168 roll $7,%ebp
169 xorl %eax,%edx
170 roll $16,%edx
171 movl %ebp,28(%esp)
172 addl %edx,%ecx
173 xorl %ecx,%ebx
174 movl 48(%esp),%edi
175 roll $12,%ebx
176 movl 24(%esp),%ebp
177 addl %ebx,%eax
178 xorl %eax,%edx
179 movl %eax,(%esp)
180 roll $8,%edx
181 movl 4(%esp),%eax
182 addl %edx,%ecx
183 movl %edx,60(%esp)
184 xorl %ecx,%ebx
185 addl %ebp,%eax
186 roll $7,%ebx
187 xorl %eax,%edi
188 movl %ecx,40(%esp)
189 roll $16,%edi
190 movl %ebx,20(%esp)
191 addl %edi,%esi
192 movl 32(%esp),%ecx
193 xorl %esi,%ebp
194 movl 52(%esp),%edx
195 roll $12,%ebp
196 movl 28(%esp),%ebx
197 addl %ebp,%eax
198 xorl %eax,%edi
199 movl %eax,4(%esp)
200 roll $8,%edi
201 movl 8(%esp),%eax
202 addl %edi,%esi
203 movl %edi,48(%esp)
204 xorl %esi,%ebp
205 addl %ebx,%eax
206 roll $7,%ebp
207 xorl %eax,%edx
208 movl %esi,44(%esp)
209 roll $16,%edx
210 movl %ebp,24(%esp)
211 addl %edx,%ecx
212 movl 36(%esp),%esi
213 xorl %ecx,%ebx
214 movl 56(%esp),%edi
215 roll $12,%ebx
216 movl 16(%esp),%ebp
217 addl %ebx,%eax
218 xorl %eax,%edx
219 movl %eax,8(%esp)
220 roll $8,%edx
221 movl 12(%esp),%eax
222 addl %edx,%ecx
223 movl %edx,52(%esp)
224 xorl %ecx,%ebx
225 addl %ebp,%eax
226 roll $7,%ebx
227 xorl %eax,%edi
228 roll $16,%edi
229 movl %ebx,28(%esp)
230 addl %edi,%esi
231 xorl %esi,%ebp
232 movl 48(%esp),%edx
233 roll $12,%ebp
234 movl 128(%esp),%ebx
235 addl %ebp,%eax
236 xorl %eax,%edi
237 movl %eax,12(%esp)
238 roll $8,%edi
239 movl (%esp),%eax
240 addl %edi,%esi
241 movl %edi,56(%esp)
242 xorl %esi,%ebp
243 roll $7,%ebp
244 decl %ebx
245 jnz .L004loop
246 movl 160(%esp),%ebx
247 addl $1634760805,%eax
248 addl 80(%esp),%ebp
249 addl 96(%esp),%ecx
250 addl 100(%esp),%esi
251 cmpl $64,%ebx
252 jb .L005tail
253 movl 156(%esp),%ebx
254 addl 112(%esp),%edx
255 addl 120(%esp),%edi
256 xorl (%ebx),%eax
257 xorl 16(%ebx),%ebp
258 movl %eax,(%esp)
259 movl 152(%esp),%eax
260 xorl 32(%ebx),%ecx
261 xorl 36(%ebx),%esi
262 xorl 48(%ebx),%edx
263 xorl 56(%ebx),%edi
264 movl %ebp,16(%eax)
265 movl %ecx,32(%eax)
266 movl %esi,36(%eax)
267 movl %edx,48(%eax)
268 movl %edi,56(%eax)
269 movl 4(%esp),%ebp
270 movl 8(%esp),%ecx
271 movl 12(%esp),%esi
272 movl 20(%esp),%edx
273 movl 24(%esp),%edi
274 addl $857760878,%ebp
275 addl $2036477234,%ecx
276 addl $1797285236,%esi
277 addl 84(%esp),%edx
278 addl 88(%esp),%edi
279 xorl 4(%ebx),%ebp
280 xorl 8(%ebx),%ecx
281 xorl 12(%ebx),%esi
282 xorl 20(%ebx),%edx
283 xorl 24(%ebx),%edi
284 movl %ebp,4(%eax)
285 movl %ecx,8(%eax)
286 movl %esi,12(%eax)
287 movl %edx,20(%eax)
288 movl %edi,24(%eax)
289 movl 28(%esp),%ebp
290 movl 40(%esp),%ecx
291 movl 44(%esp),%esi
292 movl 52(%esp),%edx
293 movl 60(%esp),%edi
294 addl 92(%esp),%ebp
295 addl 104(%esp),%ecx
296 addl 108(%esp),%esi
297 addl 116(%esp),%edx
298 addl 124(%esp),%edi
299 xorl 28(%ebx),%ebp
300 xorl 40(%ebx),%ecx
301 xorl 44(%ebx),%esi
302 xorl 52(%ebx),%edx
303 xorl 60(%ebx),%edi
304 leal 64(%ebx),%ebx
305 movl %ebp,28(%eax)
306 movl (%esp),%ebp
307 movl %ecx,40(%eax)
308 movl 160(%esp),%ecx
309 movl %esi,44(%eax)
310 movl %edx,52(%eax)
311 movl %edi,60(%eax)
312 movl %ebp,(%eax)
313 leal 64(%eax),%eax
314 subl $64,%ecx
315 jnz .L003outer_loop
316 jmp .L006done
317 .L005tail:
318 addl 112(%esp),%edx
319 addl 120(%esp),%edi
320 movl %eax,(%esp)
321 movl %ebp,16(%esp)
322 movl %ecx,32(%esp)
323 movl %esi,36(%esp)
324 movl %edx,48(%esp)
325 movl %edi,56(%esp)
326 movl 4(%esp),%ebp
327 movl 8(%esp),%ecx
328 movl 12(%esp),%esi
329 movl 20(%esp),%edx
330 movl 24(%esp),%edi
331 addl $857760878,%ebp
332 addl $2036477234,%ecx
333 addl $1797285236,%esi
334 addl 84(%esp),%edx
335 addl 88(%esp),%edi
336 movl %ebp,4(%esp)
337 movl %ecx,8(%esp)
338 movl %esi,12(%esp)
339 movl %edx,20(%esp)
340 movl %edi,24(%esp)
341 movl 28(%esp),%ebp
342 movl 40(%esp),%ecx
343 movl 44(%esp),%esi
344 movl 52(%esp),%edx
345 movl 60(%esp),%edi
346 addl 92(%esp),%ebp
347 addl 104(%esp),%ecx
348 addl 108(%esp),%esi
349 addl 116(%esp),%edx
350 addl 124(%esp),%edi
351 movl %ebp,28(%esp)
352 movl 156(%esp),%ebp
353 movl %ecx,40(%esp)
354 movl 152(%esp),%ecx
355 movl %esi,44(%esp)
356 xorl %esi,%esi
357 movl %edx,52(%esp)
358 movl %edi,60(%esp)
359 xorl %eax,%eax
360 xorl %edx,%edx
361 .L007tail_loop:
362 movb (%esi,%ebp,1),%al
363 movb (%esp,%esi,1),%dl
364 leal 1(%esi),%esi
365 xorb %dl,%al
366 movb %al,-1(%ecx,%esi,1)
367 decl %ebx
368 jnz .L007tail_loop
369 .L006done:
370 addl $132,%esp
371 .L000no_data:
372 popl %edi
373 popl %esi
374 popl %ebx
375 popl %ebp
376 ret
377 .size ChaCha20_ctr32,.-.L_ChaCha20_ctr32_begin
378 .globl ChaCha20_ssse3
379 .type ChaCha20_ssse3,@function
380 .align 16
381 ChaCha20_ssse3:
382 .L_ChaCha20_ssse3_begin:
383 pushl %ebp
384 pushl %ebx
385 pushl %esi
386 pushl %edi
387 .Lssse3_shortcut:
388 testl $2048,4(%ebp)
389 jnz .Lxop_shortcut
390 movl 20(%esp),%edi
391 movl 24(%esp),%esi
392 movl 28(%esp),%ecx
393 movl 32(%esp),%edx
394 movl 36(%esp),%ebx
395 movl %esp,%ebp
396 subl $524,%esp
397 andl $-64,%esp
398 movl %ebp,512(%esp)
399 leal .Lssse3_data-.Lpic_point(%eax),%eax
400 movdqu (%ebx),%xmm3
401 .L0081x:
402 movdqa 32(%eax),%xmm0
403 movdqu (%edx),%xmm1
404 movdqu 16(%edx),%xmm2
405 movdqa (%eax),%xmm6
406 movdqa 16(%eax),%xmm7
407 movl %ebp,48(%esp)
408 movdqa %xmm0,(%esp)
409 movdqa %xmm1,16(%esp)
410 movdqa %xmm2,32(%esp)
411 movdqa %xmm3,48(%esp)
412 movl $10,%edx
413 jmp .L009loop1x
414 .align 16
415 .L010outer1x:
416 movdqa 80(%eax),%xmm3
417 movdqa (%esp),%xmm0
418 movdqa 16(%esp),%xmm1
419 movdqa 32(%esp),%xmm2
420 paddd 48(%esp),%xmm3
421 movl $10,%edx
422 movdqa %xmm3,48(%esp)
423 jmp .L009loop1x
424 .align 16
425 .L009loop1x:
426 paddd %xmm1,%xmm0
427 pxor %xmm0,%xmm3
428 .byte 102,15,56,0,222
429 paddd %xmm3,%xmm2
430 pxor %xmm2,%xmm1
431 movdqa %xmm1,%xmm4
432 psrld $20,%xmm1
433 pslld $12,%xmm4
434 por %xmm4,%xmm1
435 paddd %xmm1,%xmm0
436 pxor %xmm0,%xmm3
437 .byte 102,15,56,0,223
438 paddd %xmm3,%xmm2
439 pxor %xmm2,%xmm1
440 movdqa %xmm1,%xmm4
441 psrld $25,%xmm1
442 pslld $7,%xmm4
443 por %xmm4,%xmm1
444 pshufd $78,%xmm2,%xmm2
445 pshufd $57,%xmm1,%xmm1
446 pshufd $147,%xmm3,%xmm3
447 nop
448 paddd %xmm1,%xmm0
449 pxor %xmm0,%xmm3
450 .byte 102,15,56,0,222
451 paddd %xmm3,%xmm2
452 pxor %xmm2,%xmm1
453 movdqa %xmm1,%xmm4
454 psrld $20,%xmm1
455 pslld $12,%xmm4
456 por %xmm4,%xmm1
457 paddd %xmm1,%xmm0
458 pxor %xmm0,%xmm3
459 .byte 102,15,56,0,223
460 paddd %xmm3,%xmm2
461 pxor %xmm2,%xmm1
462 movdqa %xmm1,%xmm4
463 psrld $25,%xmm1
464 pslld $7,%xmm4
465 por %xmm4,%xmm1
466 pshufd $78,%xmm2,%xmm2
467 pshufd $147,%xmm1,%xmm1
468 pshufd $57,%xmm3,%xmm3
469 decl %edx
470 jnz .L009loop1x
471 paddd (%esp),%xmm0
472 paddd 16(%esp),%xmm1
473 paddd 32(%esp),%xmm2
474 paddd 48(%esp),%xmm3
475 cmpl $64,%ecx
476 jb .L011tail
477 movdqu (%esi),%xmm4
478 movdqu 16(%esi),%xmm5
479 pxor %xmm4,%xmm0
480 movdqu 32(%esi),%xmm4
481 pxor %xmm5,%xmm1
482 movdqu 48(%esi),%xmm5
483 pxor %xmm4,%xmm2
484 pxor %xmm5,%xmm3
485 leal 64(%esi),%esi
486 movdqu %xmm0,(%edi)
487 movdqu %xmm1,16(%edi)
488 movdqu %xmm2,32(%edi)
489 movdqu %xmm3,48(%edi)
490 leal 64(%edi),%edi
491 subl $64,%ecx
492 jnz .L010outer1x
493 jmp .L012done
494 .L011tail:
495 movdqa %xmm0,(%esp)
496 movdqa %xmm1,16(%esp)
497 movdqa %xmm2,32(%esp)
498 movdqa %xmm3,48(%esp)
499 xorl %eax,%eax
500 xorl %edx,%edx
501 xorl %ebp,%ebp
502 .L013tail_loop:
503 movb (%esp,%ebp,1),%al
504 movb (%esi,%ebp,1),%dl
505 leal 1(%ebp),%ebp
506 xorb %dl,%al
507 movb %al,-1(%edi,%ebp,1)
508 decl %ecx
509 jnz .L013tail_loop
510 .L012done:
511 movl 512(%esp),%esp
512 popl %edi
513 popl %esi
514 popl %ebx
515 popl %ebp
516 ret
517 .size ChaCha20_ssse3,.-.L_ChaCha20_ssse3_begin
518 .align 64
519 .Lssse3_data:
520 .byte 2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13
521 .byte 3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14
522 .long 1634760805,857760878,2036477234,1797285236
523 .long 0,1,2,3
524 .long 4,4,4,4
525 .long 1,0,0,0
526 .long 4,0,0,0
527 .long 0,-1,-1,-1
528 .align 64
529 .byte 67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54
530 .byte 44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32
531 .byte 60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111
532 .byte 114,103,62,0
533 .globl ChaCha20_xop
534 .type ChaCha20_xop,@function
535 .align 16
536 ChaCha20_xop:
537 .L_ChaCha20_xop_begin:
538 pushl %ebp
539 pushl %ebx
540 pushl %esi
541 pushl %edi
542 .Lxop_shortcut:
543 movl 20(%esp),%edi
544 movl 24(%esp),%esi
545 movl 28(%esp),%ecx
546 movl 32(%esp),%edx
547 movl 36(%esp),%ebx
548 vzeroupper
549 movl %esp,%ebp
550 subl $524,%esp
551 andl $-64,%esp
552 movl %ebp,512(%esp)
553 leal .Lssse3_data-.Lpic_point(%eax),%eax
554 vmovdqu (%ebx),%xmm3
555 cmpl $256,%ecx
556 jb .L0141x
557 movl %edx,516(%esp)
558 movl %ebx,520(%esp)
559 subl $256,%ecx
560 leal 384(%esp),%ebp
561 vmovdqu (%edx),%xmm7
562 vpshufd $0,%xmm3,%xmm0
563 vpshufd $85,%xmm3,%xmm1
564 vpshufd $170,%xmm3,%xmm2
565 vpshufd $255,%xmm3,%xmm3
566 vpaddd 48(%eax),%xmm0,%xmm0
567 vpshufd $0,%xmm7,%xmm4
568 vpshufd $85,%xmm7,%xmm5
569 vpsubd 64(%eax),%xmm0,%xmm0
570 vpshufd $170,%xmm7,%xmm6
571 vpshufd $255,%xmm7,%xmm7
572 vmovdqa %xmm0,64(%ebp)
573 vmovdqa %xmm1,80(%ebp)
574 vmovdqa %xmm2,96(%ebp)
575 vmovdqa %xmm3,112(%ebp)
576 vmovdqu 16(%edx),%xmm3
577 vmovdqa %xmm4,-64(%ebp)
578 vmovdqa %xmm5,-48(%ebp)
579 vmovdqa %xmm6,-32(%ebp)
580 vmovdqa %xmm7,-16(%ebp)
581 vmovdqa 32(%eax),%xmm7
582 leal 128(%esp),%ebx
583 vpshufd $0,%xmm3,%xmm0
584 vpshufd $85,%xmm3,%xmm1
585 vpshufd $170,%xmm3,%xmm2
586 vpshufd $255,%xmm3,%xmm3
587 vpshufd $0,%xmm7,%xmm4
588 vpshufd $85,%xmm7,%xmm5
589 vpshufd $170,%xmm7,%xmm6
590 vpshufd $255,%xmm7,%xmm7
591 vmovdqa %xmm0,(%ebp)
592 vmovdqa %xmm1,16(%ebp)
593 vmovdqa %xmm2,32(%ebp)
594 vmovdqa %xmm3,48(%ebp)
595 vmovdqa %xmm4,-128(%ebp)
596 vmovdqa %xmm5,-112(%ebp)
597 vmovdqa %xmm6,-96(%ebp)
598 vmovdqa %xmm7,-80(%ebp)
599 leal 128(%esi),%esi
600 leal 128(%edi),%edi
601 jmp .L015outer_loop
602 .align 32
603 .L015outer_loop:
604 vmovdqa -112(%ebp),%xmm1
605 vmovdqa -96(%ebp),%xmm2
606 vmovdqa -80(%ebp),%xmm3
607 vmovdqa -48(%ebp),%xmm5
608 vmovdqa -32(%ebp),%xmm6
609 vmovdqa -16(%ebp),%xmm7
610 vmovdqa %xmm1,-112(%ebx)
611 vmovdqa %xmm2,-96(%ebx)
612 vmovdqa %xmm3,-80(%ebx)
613 vmovdqa %xmm5,-48(%ebx)
614 vmovdqa %xmm6,-32(%ebx)
615 vmovdqa %xmm7,-16(%ebx)
616 vmovdqa 32(%ebp),%xmm2
617 vmovdqa 48(%ebp),%xmm3
618 vmovdqa 64(%ebp),%xmm4
619 vmovdqa 80(%ebp),%xmm5
620 vmovdqa 96(%ebp),%xmm6
621 vmovdqa 112(%ebp),%xmm7
622 vpaddd 64(%eax),%xmm4,%xmm4
623 vmovdqa %xmm2,32(%ebx)
624 vmovdqa %xmm3,48(%ebx)
625 vmovdqa %xmm4,64(%ebx)
626 vmovdqa %xmm5,80(%ebx)
627 vmovdqa %xmm6,96(%ebx)
628 vmovdqa %xmm7,112(%ebx)
629 vmovdqa %xmm4,64(%ebp)
630 vmovdqa -128(%ebp),%xmm0
631 vmovdqa %xmm4,%xmm6
632 vmovdqa -64(%ebp),%xmm3
633 vmovdqa (%ebp),%xmm4
634 vmovdqa 16(%ebp),%xmm5
635 movl $10,%edx
636 nop
637 .align 32
638 .L016loop:
639 vpaddd %xmm3,%xmm0,%xmm0
640 vpxor %xmm0,%xmm6,%xmm6
641 .byte 143,232,120,194,246,16
642 vpaddd %xmm6,%xmm4,%xmm4
643 vpxor %xmm4,%xmm3,%xmm2
644 vmovdqa -112(%ebx),%xmm1
645 .byte 143,232,120,194,210,12
646 vmovdqa -48(%ebx),%xmm3
647 vpaddd %xmm2,%xmm0,%xmm0
648 vmovdqa 80(%ebx),%xmm7
649 vpxor %xmm0,%xmm6,%xmm6
650 vpaddd %xmm3,%xmm1,%xmm1
651 .byte 143,232,120,194,246,8
652 vmovdqa %xmm0,-128(%ebx)
653 vpaddd %xmm6,%xmm4,%xmm4
654 vmovdqa %xmm6,64(%ebx)
655 vpxor %xmm4,%xmm2,%xmm2
656 vpxor %xmm1,%xmm7,%xmm7
657 .byte 143,232,120,194,210,7
658 vmovdqa %xmm4,(%ebx)
659 .byte 143,232,120,194,255,16
660 vmovdqa %xmm2,-64(%ebx)
661 vpaddd %xmm7,%xmm5,%xmm5
662 vmovdqa 32(%ebx),%xmm4
663 vpxor %xmm5,%xmm3,%xmm3
664 vmovdqa -96(%ebx),%xmm0
665 .byte 143,232,120,194,219,12
666 vmovdqa -32(%ebx),%xmm2
667 vpaddd %xmm3,%xmm1,%xmm1
668 vmovdqa 96(%ebx),%xmm6
669 vpxor %xmm1,%xmm7,%xmm7
670 vpaddd %xmm2,%xmm0,%xmm0
671 .byte 143,232,120,194,255,8
672 vmovdqa %xmm1,-112(%ebx)
673 vpaddd %xmm7,%xmm5,%xmm5
674 vmovdqa %xmm7,80(%ebx)
675 vpxor %xmm5,%xmm3,%xmm3
676 vpxor %xmm0,%xmm6,%xmm6
677 .byte 143,232,120,194,219,7
678 vmovdqa %xmm5,16(%ebx)
679 .byte 143,232,120,194,246,16
680 vmovdqa %xmm3,-48(%ebx)
681 vpaddd %xmm6,%xmm4,%xmm4
682 vmovdqa 48(%ebx),%xmm5
683 vpxor %xmm4,%xmm2,%xmm2
684 vmovdqa -80(%ebx),%xmm1
685 .byte 143,232,120,194,210,12
686 vmovdqa -16(%ebx),%xmm3
687 vpaddd %xmm2,%xmm0,%xmm0
688 vmovdqa 112(%ebx),%xmm7
689 vpxor %xmm0,%xmm6,%xmm6
690 vpaddd %xmm3,%xmm1,%xmm1
691 .byte 143,232,120,194,246,8
692 vmovdqa %xmm0,-96(%ebx)
693 vpaddd %xmm6,%xmm4,%xmm4
694 vmovdqa %xmm6,96(%ebx)
695 vpxor %xmm4,%xmm2,%xmm2
696 vpxor %xmm1,%xmm7,%xmm7
697 .byte 143,232,120,194,210,7
698 .byte 143,232,120,194,255,16
699 vmovdqa %xmm2,-32(%ebx)
700 vpaddd %xmm7,%xmm5,%xmm5
701 vpxor %xmm5,%xmm3,%xmm3
702 vmovdqa -128(%ebx),%xmm0
703 .byte 143,232,120,194,219,12
704 vmovdqa -48(%ebx),%xmm2
705 vpaddd %xmm3,%xmm1,%xmm1
706 vpxor %xmm1,%xmm7,%xmm7
707 vpaddd %xmm2,%xmm0,%xmm0
708 .byte 143,232,120,194,255,8
709 vmovdqa %xmm1,-80(%ebx)
710 vpaddd %xmm7,%xmm5,%xmm5
711 vpxor %xmm5,%xmm3,%xmm3
712 vpxor %xmm0,%xmm7,%xmm6
713 .byte 143,232,120,194,219,7
714 .byte 143,232,120,194,246,16
715 vmovdqa %xmm3,-16(%ebx)
716 vpaddd %xmm6,%xmm4,%xmm4
717 vpxor %xmm4,%xmm2,%xmm2
718 vmovdqa -112(%ebx),%xmm1
719 .byte 143,232,120,194,210,12
720 vmovdqa -32(%ebx),%xmm3
721 vpaddd %xmm2,%xmm0,%xmm0
722 vmovdqa 64(%ebx),%xmm7
723 vpxor %xmm0,%xmm6,%xmm6
724 vpaddd %xmm3,%xmm1,%xmm1
725 .byte 143,232,120,194,246,8
726 vmovdqa %xmm0,-128(%ebx)
727 vpaddd %xmm6,%xmm4,%xmm4
728 vmovdqa %xmm6,112(%ebx)
729 vpxor %xmm4,%xmm2,%xmm2
730 vpxor %xmm1,%xmm7,%xmm7
731 .byte 143,232,120,194,210,7
732 vmovdqa %xmm4,32(%ebx)
733 .byte 143,232,120,194,255,16
734 vmovdqa %xmm2,-48(%ebx)
735 vpaddd %xmm7,%xmm5,%xmm5
736 vmovdqa (%ebx),%xmm4
737 vpxor %xmm5,%xmm3,%xmm3
738 vmovdqa -96(%ebx),%xmm0
739 .byte 143,232,120,194,219,12
740 vmovdqa -16(%ebx),%xmm2
741 vpaddd %xmm3,%xmm1,%xmm1
742 vmovdqa 80(%ebx),%xmm6
743 vpxor %xmm1,%xmm7,%xmm7
744 vpaddd %xmm2,%xmm0,%xmm0
745 .byte 143,232,120,194,255,8
746 vmovdqa %xmm1,-112(%ebx)
747 vpaddd %xmm7,%xmm5,%xmm5
748 vmovdqa %xmm7,64(%ebx)
749 vpxor %xmm5,%xmm3,%xmm3
750 vpxor %xmm0,%xmm6,%xmm6
751 .byte 143,232,120,194,219,7
752 vmovdqa %xmm5,48(%ebx)
753 .byte 143,232,120,194,246,16
754 vmovdqa %xmm3,-32(%ebx)
755 vpaddd %xmm6,%xmm4,%xmm4
756 vmovdqa 16(%ebx),%xmm5
757 vpxor %xmm4,%xmm2,%xmm2
758 vmovdqa -80(%ebx),%xmm1
759 .byte 143,232,120,194,210,12
760 vmovdqa -64(%ebx),%xmm3
761 vpaddd %xmm2,%xmm0,%xmm0
762 vmovdqa 96(%ebx),%xmm7
763 vpxor %xmm0,%xmm6,%xmm6
764 vpaddd %xmm3,%xmm1,%xmm1
765 .byte 143,232,120,194,246,8
766 vmovdqa %xmm0,-96(%ebx)
767 vpaddd %xmm6,%xmm4,%xmm4
768 vmovdqa %xmm6,80(%ebx)
769 vpxor %xmm4,%xmm2,%xmm2
770 vpxor %xmm1,%xmm7,%xmm7
771 .byte 143,232,120,194,210,7
772 .byte 143,232,120,194,255,16
773 vmovdqa %xmm2,-16(%ebx)
774 vpaddd %xmm7,%xmm5,%xmm5
775 vpxor %xmm5,%xmm3,%xmm3
776 vmovdqa -128(%ebx),%xmm0
777 .byte 143,232,120,194,219,12
778 vpaddd %xmm3,%xmm1,%xmm1
779 vmovdqa 64(%ebx),%xmm6
780 vpxor %xmm1,%xmm7,%xmm7
781 .byte 143,232,120,194,255,8
782 vmovdqa %xmm1,-80(%ebx)
783 vpaddd %xmm7,%xmm5,%xmm5
784 vmovdqa %xmm7,96(%ebx)
785 vpxor %xmm5,%xmm3,%xmm3
786 .byte 143,232,120,194,219,7
787 decl %edx
788 jnz .L016loop
789 vmovdqa %xmm3,-64(%ebx)
790 vmovdqa %xmm4,(%ebx)
791 vmovdqa %xmm5,16(%ebx)
792 vmovdqa %xmm6,64(%ebx)
793 vmovdqa %xmm7,96(%ebx)
794 vmovdqa -112(%ebx),%xmm1
795 vmovdqa -96(%ebx),%xmm2
796 vmovdqa -80(%ebx),%xmm3
797 vpaddd -128(%ebp),%xmm0,%xmm0
798 vpaddd -112(%ebp),%xmm1,%xmm1
799 vpaddd -96(%ebp),%xmm2,%xmm2
800 vpaddd -80(%ebp),%xmm3,%xmm3
801 vpunpckldq %xmm1,%xmm0,%xmm6
802 vpunpckldq %xmm3,%xmm2,%xmm7
803 vpunpckhdq %xmm1,%xmm0,%xmm0
804 vpunpckhdq %xmm3,%xmm2,%xmm2
805 vpunpcklqdq %xmm7,%xmm6,%xmm1
806 vpunpckhqdq %xmm7,%xmm6,%xmm6
807 vpunpcklqdq %xmm2,%xmm0,%xmm7
808 vpunpckhqdq %xmm2,%xmm0,%xmm3
809 vpxor -128(%esi),%xmm1,%xmm4
810 vpxor -64(%esi),%xmm6,%xmm5
811 vpxor (%esi),%xmm7,%xmm6
812 vpxor 64(%esi),%xmm3,%xmm7
813 leal 16(%esi),%esi
814 vmovdqa -64(%ebx),%xmm0
815 vmovdqa -48(%ebx),%xmm1
816 vmovdqa -32(%ebx),%xmm2
817 vmovdqa -16(%ebx),%xmm3
818 vmovdqu %xmm4,-128(%edi)
819 vmovdqu %xmm5,-64(%edi)
820 vmovdqu %xmm6,(%edi)
821 vmovdqu %xmm7,64(%edi)
822 leal 16(%edi),%edi
823 vpaddd -64(%ebp),%xmm0,%xmm0
824 vpaddd -48(%ebp),%xmm1,%xmm1
825 vpaddd -32(%ebp),%xmm2,%xmm2
826 vpaddd -16(%ebp),%xmm3,%xmm3
827 vpunpckldq %xmm1,%xmm0,%xmm6
828 vpunpckldq %xmm3,%xmm2,%xmm7
829 vpunpckhdq %xmm1,%xmm0,%xmm0
830 vpunpckhdq %xmm3,%xmm2,%xmm2
831 vpunpcklqdq %xmm7,%xmm6,%xmm1
832 vpunpckhqdq %xmm7,%xmm6,%xmm6
833 vpunpcklqdq %xmm2,%xmm0,%xmm7
834 vpunpckhqdq %xmm2,%xmm0,%xmm3
835 vpxor -128(%esi),%xmm1,%xmm4
836 vpxor -64(%esi),%xmm6,%xmm5
837 vpxor (%esi),%xmm7,%xmm6
838 vpxor 64(%esi),%xmm3,%xmm7
839 leal 16(%esi),%esi
840 vmovdqa (%ebx),%xmm0
841 vmovdqa 16(%ebx),%xmm1
842 vmovdqa 32(%ebx),%xmm2
843 vmovdqa 48(%ebx),%xmm3
844 vmovdqu %xmm4,-128(%edi)
845 vmovdqu %xmm5,-64(%edi)
846 vmovdqu %xmm6,(%edi)
847 vmovdqu %xmm7,64(%edi)
848 leal 16(%edi),%edi
849 vpaddd (%ebp),%xmm0,%xmm0
850 vpaddd 16(%ebp),%xmm1,%xmm1
851 vpaddd 32(%ebp),%xmm2,%xmm2
852 vpaddd 48(%ebp),%xmm3,%xmm3
853 vpunpckldq %xmm1,%xmm0,%xmm6
854 vpunpckldq %xmm3,%xmm2,%xmm7
855 vpunpckhdq %xmm1,%xmm0,%xmm0
856 vpunpckhdq %xmm3,%xmm2,%xmm2
857 vpunpcklqdq %xmm7,%xmm6,%xmm1
858 vpunpckhqdq %xmm7,%xmm6,%xmm6
859 vpunpcklqdq %xmm2,%xmm0,%xmm7
860 vpunpckhqdq %xmm2,%xmm0,%xmm3
861 vpxor -128(%esi),%xmm1,%xmm4
862 vpxor -64(%esi),%xmm6,%xmm5
863 vpxor (%esi),%xmm7,%xmm6
864 vpxor 64(%esi),%xmm3,%xmm7
865 leal 16(%esi),%esi
866 vmovdqa 64(%ebx),%xmm0
867 vmovdqa 80(%ebx),%xmm1
868 vmovdqa 96(%ebx),%xmm2
869 vmovdqa 112(%ebx),%xmm3
870 vmovdqu %xmm4,-128(%edi)
871 vmovdqu %xmm5,-64(%edi)
872 vmovdqu %xmm6,(%edi)
873 vmovdqu %xmm7,64(%edi)
874 leal 16(%edi),%edi
875 vpaddd 64(%ebp),%xmm0,%xmm0
876 vpaddd 80(%ebp),%xmm1,%xmm1
877 vpaddd 96(%ebp),%xmm2,%xmm2
878 vpaddd 112(%ebp),%xmm3,%xmm3
879 vpunpckldq %xmm1,%xmm0,%xmm6
880 vpunpckldq %xmm3,%xmm2,%xmm7
881 vpunpckhdq %xmm1,%xmm0,%xmm0
882 vpunpckhdq %xmm3,%xmm2,%xmm2
883 vpunpcklqdq %xmm7,%xmm6,%xmm1
884 vpunpckhqdq %xmm7,%xmm6,%xmm6
885 vpunpcklqdq %xmm2,%xmm0,%xmm7
886 vpunpckhqdq %xmm2,%xmm0,%xmm3
887 vpxor -128(%esi),%xmm1,%xmm4
888 vpxor -64(%esi),%xmm6,%xmm5
889 vpxor (%esi),%xmm7,%xmm6
890 vpxor 64(%esi),%xmm3,%xmm7
891 leal 208(%esi),%esi
892 vmovdqu %xmm4,-128(%edi)
893 vmovdqu %xmm5,-64(%edi)
894 vmovdqu %xmm6,(%edi)
895 vmovdqu %xmm7,64(%edi)
896 leal 208(%edi),%edi
897 subl $256,%ecx
898 jnc .L015outer_loop
899 addl $256,%ecx
900 jz .L017done
901 movl 520(%esp),%ebx
902 leal -128(%esi),%esi
903 movl 516(%esp),%edx
904 leal -128(%edi),%edi
905 vmovd 64(%ebp),%xmm2
906 vmovdqu (%ebx),%xmm3
907 vpaddd 96(%eax),%xmm2,%xmm2
908 vpand 112(%eax),%xmm3,%xmm3
909 vpor %xmm2,%xmm3,%xmm3
910 .L0141x:
911 vmovdqa 32(%eax),%xmm0
912 vmovdqu (%edx),%xmm1
913 vmovdqu 16(%edx),%xmm2
914 vmovdqa (%eax),%xmm6
915 vmovdqa 16(%eax),%xmm7
916 movl %ebp,48(%esp)
917 vmovdqa %xmm0,(%esp)
918 vmovdqa %xmm1,16(%esp)
919 vmovdqa %xmm2,32(%esp)
920 vmovdqa %xmm3,48(%esp)
921 movl $10,%edx
922 jmp .L018loop1x
923 .align 16
924 .L019outer1x:
925 vmovdqa 80(%eax),%xmm3
926 vmovdqa (%esp),%xmm0
927 vmovdqa 16(%esp),%xmm1
928 vmovdqa 32(%esp),%xmm2
929 vpaddd 48(%esp),%xmm3,%xmm3
930 movl $10,%edx
931 vmovdqa %xmm3,48(%esp)
932 jmp .L018loop1x
933 .align 16
934 .L018loop1x:
935 vpaddd %xmm1,%xmm0,%xmm0
936 vpxor %xmm0,%xmm3,%xmm3
937 .byte 143,232,120,194,219,16
938 vpaddd %xmm3,%xmm2,%xmm2
939 vpxor %xmm2,%xmm1,%xmm1
940 .byte 143,232,120,194,201,12
941 vpaddd %xmm1,%xmm0,%xmm0
942 vpxor %xmm0,%xmm3,%xmm3
943 .byte 143,232,120,194,219,8
944 vpaddd %xmm3,%xmm2,%xmm2
945 vpxor %xmm2,%xmm1,%xmm1
946 .byte 143,232,120,194,201,7
947 vpshufd $78,%xmm2,%xmm2
948 vpshufd $57,%xmm1,%xmm1
949 vpshufd $147,%xmm3,%xmm3
950 vpaddd %xmm1,%xmm0,%xmm0
951 vpxor %xmm0,%xmm3,%xmm3
952 .byte 143,232,120,194,219,16
953 vpaddd %xmm3,%xmm2,%xmm2
954 vpxor %xmm2,%xmm1,%xmm1
955 .byte 143,232,120,194,201,12
956 vpaddd %xmm1,%xmm0,%xmm0
957 vpxor %xmm0,%xmm3,%xmm3
958 .byte 143,232,120,194,219,8
959 vpaddd %xmm3,%xmm2,%xmm2
960 vpxor %xmm2,%xmm1,%xmm1
961 .byte 143,232,120,194,201,7
962 vpshufd $78,%xmm2,%xmm2
963 vpshufd $147,%xmm1,%xmm1
964 vpshufd $57,%xmm3,%xmm3
965 decl %edx
966 jnz .L018loop1x
967 vpaddd (%esp),%xmm0,%xmm0
968 vpaddd 16(%esp),%xmm1,%xmm1
969 vpaddd 32(%esp),%xmm2,%xmm2
970 vpaddd 48(%esp),%xmm3,%xmm3
971 cmpl $64,%ecx
972 jb .L020tail
973 vpxor (%esi),%xmm0,%xmm0
974 vpxor 16(%esi),%xmm1,%xmm1
975 vpxor 32(%esi),%xmm2,%xmm2
976 vpxor 48(%esi),%xmm3,%xmm3
977 leal 64(%esi),%esi
978 vmovdqu %xmm0,(%edi)
979 vmovdqu %xmm1,16(%edi)
980 vmovdqu %xmm2,32(%edi)
981 vmovdqu %xmm3,48(%edi)
982 leal 64(%edi),%edi
983 subl $64,%ecx
984 jnz .L019outer1x
985 jmp .L017done
986 .L020tail:
987 vmovdqa %xmm0,(%esp)
988 vmovdqa %xmm1,16(%esp)
989 vmovdqa %xmm2,32(%esp)
990 vmovdqa %xmm3,48(%esp)
991 xorl %eax,%eax
992 xorl %edx,%edx
993 xorl %ebp,%ebp
994 .L021tail_loop:
995 movb (%esp,%ebp,1),%al
996 movb (%esi,%ebp,1),%dl
997 leal 1(%ebp),%ebp
998 xorb %dl,%al
999 movb %al,-1(%edi,%ebp,1)
1000 decl %ecx
1001 jnz .L021tail_loop
1002 .L017done:
1003 vzeroupper
1004 movl 512(%esp),%esp
1005 popl %edi
1006 popl %esi
1007 popl %ebx
1008 popl %ebp
1009 ret
1010 .size ChaCha20_xop,.-.L_ChaCha20_xop_begin
1011 .comm OPENSSL_ia32cap_P,16,4
1012 #else
1013 .text
1014 .globl ChaCha20_ctr32
1015 .type ChaCha20_ctr32,@function
1016 .align 16
1017 ChaCha20_ctr32:
1018 .L_ChaCha20_ctr32_begin:
1019 pushl %ebp
1020 pushl %ebx
1021 pushl %esi
1022 pushl %edi
1023 xorl %eax,%eax
1024 cmpl 28(%esp),%eax
1025 je .L000no_data
1026 call .Lpic_point
1027 .Lpic_point:
1028 popl %eax
1029 leal OPENSSL_ia32cap_P,%ebp
1030 testl $16777216,(%ebp)
1031 jz .L001x86
1032 testl $512,4(%ebp)
1033 jz .L001x86
1034 jmp .Lssse3_shortcut
1035 .L001x86:
1036 movl 32(%esp),%esi
1037 movl 36(%esp),%edi
1038 subl $132,%esp
1039 movl (%esi),%eax
1040 movl 4(%esi),%ebx
1041 movl 8(%esi),%ecx
1042 movl 12(%esi),%edx
1043 movl %eax,80(%esp)
1044 movl %ebx,84(%esp)
1045 movl %ecx,88(%esp)
1046 movl %edx,92(%esp)
1047 movl 16(%esi),%eax
1048 movl 20(%esi),%ebx
1049 movl 24(%esi),%ecx
1050 movl 28(%esi),%edx
1051 movl %eax,96(%esp)
1052 movl %ebx,100(%esp)
1053 movl %ecx,104(%esp)
1054 movl %edx,108(%esp)
1055 movl (%edi),%eax
1056 movl 4(%edi),%ebx
1057 movl 8(%edi),%ecx
1058 movl 12(%edi),%edx
1059 subl $1,%eax
1060 movl %eax,112(%esp)
1061 movl %ebx,116(%esp)
1062 movl %ecx,120(%esp)
1063 movl %edx,124(%esp)
1064 jmp .L002entry
1065 .align 16
1066 .L003outer_loop:
1067 movl %ebx,156(%esp)
1068 movl %eax,152(%esp)
1069 movl %ecx,160(%esp)
1070 .L002entry:
1071 movl $1634760805,%eax
1072 movl $857760878,4(%esp)
1073 movl $2036477234,8(%esp)
1074 movl $1797285236,12(%esp)
1075 movl 84(%esp),%ebx
1076 movl 88(%esp),%ebp
1077 movl 104(%esp),%ecx
1078 movl 108(%esp),%esi
1079 movl 116(%esp),%edx
1080 movl 120(%esp),%edi
1081 movl %ebx,20(%esp)
1082 movl %ebp,24(%esp)
1083 movl %ecx,40(%esp)
1084 movl %esi,44(%esp)
1085 movl %edx,52(%esp)
1086 movl %edi,56(%esp)
1087 movl 92(%esp),%ebx
1088 movl 124(%esp),%edi
1089 movl 112(%esp),%edx
1090 movl 80(%esp),%ebp
1091 movl 96(%esp),%ecx
1092 movl 100(%esp),%esi
1093 addl $1,%edx
1094 movl %ebx,28(%esp)
1095 movl %edi,60(%esp)
1096 movl %edx,112(%esp)
1097 movl $10,%ebx
1098 jmp .L004loop
1099 .align 16
1100 .L004loop:
1101 addl %ebp,%eax
1102 movl %ebx,128(%esp)
1103 movl %ebp,%ebx
1104 xorl %eax,%edx
1105 roll $16,%edx
1106 addl %edx,%ecx
1107 xorl %ecx,%ebx
1108 movl 52(%esp),%edi
1109 roll $12,%ebx
1110 movl 20(%esp),%ebp
1111 addl %ebx,%eax
1112 xorl %eax,%edx
1113 movl %eax,(%esp)
1114 roll $8,%edx
1115 movl 4(%esp),%eax
1116 addl %edx,%ecx
1117 movl %edx,48(%esp)
1118 xorl %ecx,%ebx
1119 addl %ebp,%eax
1120 roll $7,%ebx
1121 xorl %eax,%edi
1122 movl %ecx,32(%esp)
1123 roll $16,%edi
1124 movl %ebx,16(%esp)
1125 addl %edi,%esi
1126 movl 40(%esp),%ecx
1127 xorl %esi,%ebp
1128 movl 56(%esp),%edx
1129 roll $12,%ebp
1130 movl 24(%esp),%ebx
1131 addl %ebp,%eax
1132 xorl %eax,%edi
1133 movl %eax,4(%esp)
1134 roll $8,%edi
1135 movl 8(%esp),%eax
1136 addl %edi,%esi
1137 movl %edi,52(%esp)
1138 xorl %esi,%ebp
1139 addl %ebx,%eax
1140 roll $7,%ebp
1141 xorl %eax,%edx
1142 movl %esi,36(%esp)
1143 roll $16,%edx
1144 movl %ebp,20(%esp)
1145 addl %edx,%ecx
1146 movl 44(%esp),%esi
1147 xorl %ecx,%ebx
1148 movl 60(%esp),%edi
1149 roll $12,%ebx
1150 movl 28(%esp),%ebp
1151 addl %ebx,%eax
1152 xorl %eax,%edx
1153 movl %eax,8(%esp)
1154 roll $8,%edx
1155 movl 12(%esp),%eax
1156 addl %edx,%ecx
1157 movl %edx,56(%esp)
1158 xorl %ecx,%ebx
1159 addl %ebp,%eax
1160 roll $7,%ebx
1161 xorl %eax,%edi
1162 roll $16,%edi
1163 movl %ebx,24(%esp)
1164 addl %edi,%esi
1165 xorl %esi,%ebp
1166 roll $12,%ebp
1167 movl 20(%esp),%ebx
1168 addl %ebp,%eax
1169 xorl %eax,%edi
1170 movl %eax,12(%esp)
1171 roll $8,%edi
1172 movl (%esp),%eax
1173 addl %edi,%esi
1174 movl %edi,%edx
1175 xorl %esi,%ebp
1176 addl %ebx,%eax
1177 roll $7,%ebp
1178 xorl %eax,%edx
1179 roll $16,%edx
1180 movl %ebp,28(%esp)
1181 addl %edx,%ecx
1182 xorl %ecx,%ebx
1183 movl 48(%esp),%edi
1184 roll $12,%ebx
1185 movl 24(%esp),%ebp
1186 addl %ebx,%eax
1187 xorl %eax,%edx
1188 movl %eax,(%esp)
1189 roll $8,%edx
1190 movl 4(%esp),%eax
1191 addl %edx,%ecx
1192 movl %edx,60(%esp)
1193 xorl %ecx,%ebx
1194 addl %ebp,%eax
1195 roll $7,%ebx
1196 xorl %eax,%edi
1197 movl %ecx,40(%esp)
1198 roll $16,%edi
1199 movl %ebx,20(%esp)
1200 addl %edi,%esi
1201 movl 32(%esp),%ecx
1202 xorl %esi,%ebp
1203 movl 52(%esp),%edx
1204 roll $12,%ebp
1205 movl 28(%esp),%ebx
1206 addl %ebp,%eax
1207 xorl %eax,%edi
1208 movl %eax,4(%esp)
1209 roll $8,%edi
1210 movl 8(%esp),%eax
1211 addl %edi,%esi
1212 movl %edi,48(%esp)
1213 xorl %esi,%ebp
1214 addl %ebx,%eax
1215 roll $7,%ebp
1216 xorl %eax,%edx
1217 movl %esi,44(%esp)
1218 roll $16,%edx
1219 movl %ebp,24(%esp)
1220 addl %edx,%ecx
1221 movl 36(%esp),%esi
1222 xorl %ecx,%ebx
1223 movl 56(%esp),%edi
1224 roll $12,%ebx
1225 movl 16(%esp),%ebp
1226 addl %ebx,%eax
1227 xorl %eax,%edx
1228 movl %eax,8(%esp)
1229 roll $8,%edx
1230 movl 12(%esp),%eax
1231 addl %edx,%ecx
1232 movl %edx,52(%esp)
1233 xorl %ecx,%ebx
1234 addl %ebp,%eax
1235 roll $7,%ebx
1236 xorl %eax,%edi
1237 roll $16,%edi
1238 movl %ebx,28(%esp)
1239 addl %edi,%esi
1240 xorl %esi,%ebp
1241 movl 48(%esp),%edx
1242 roll $12,%ebp
1243 movl 128(%esp),%ebx
1244 addl %ebp,%eax
1245 xorl %eax,%edi
1246 movl %eax,12(%esp)
1247 roll $8,%edi
1248 movl (%esp),%eax
1249 addl %edi,%esi
1250 movl %edi,56(%esp)
1251 xorl %esi,%ebp
1252 roll $7,%ebp
1253 decl %ebx
1254 jnz .L004loop
1255 movl 160(%esp),%ebx
1256 addl $1634760805,%eax
1257 addl 80(%esp),%ebp
1258 addl 96(%esp),%ecx
1259 addl 100(%esp),%esi
1260 cmpl $64,%ebx
1261 jb .L005tail
1262 movl 156(%esp),%ebx
1263 addl 112(%esp),%edx
1264 addl 120(%esp),%edi
1265 xorl (%ebx),%eax
1266 xorl 16(%ebx),%ebp
1267 movl %eax,(%esp)
1268 movl 152(%esp),%eax
1269 xorl 32(%ebx),%ecx
1270 xorl 36(%ebx),%esi
1271 xorl 48(%ebx),%edx
1272 xorl 56(%ebx),%edi
1273 movl %ebp,16(%eax)
1274 movl %ecx,32(%eax)
1275 movl %esi,36(%eax)
1276 movl %edx,48(%eax)
1277 movl %edi,56(%eax)
1278 movl 4(%esp),%ebp
1279 movl 8(%esp),%ecx
1280 movl 12(%esp),%esi
1281 movl 20(%esp),%edx
1282 movl 24(%esp),%edi
1283 addl $857760878,%ebp
1284 addl $2036477234,%ecx
1285 addl $1797285236,%esi
1286 addl 84(%esp),%edx
1287 addl 88(%esp),%edi
1288 xorl 4(%ebx),%ebp
1289 xorl 8(%ebx),%ecx
1290 xorl 12(%ebx),%esi
1291 xorl 20(%ebx),%edx
1292 xorl 24(%ebx),%edi
1293 movl %ebp,4(%eax)
1294 movl %ecx,8(%eax)
1295 movl %esi,12(%eax)
1296 movl %edx,20(%eax)
1297 movl %edi,24(%eax)
1298 movl 28(%esp),%ebp
1299 movl 40(%esp),%ecx
1300 movl 44(%esp),%esi
1301 movl 52(%esp),%edx
1302 movl 60(%esp),%edi
1303 addl 92(%esp),%ebp
1304 addl 104(%esp),%ecx
1305 addl 108(%esp),%esi
1306 addl 116(%esp),%edx
1307 addl 124(%esp),%edi
1308 xorl 28(%ebx),%ebp
1309 xorl 40(%ebx),%ecx
1310 xorl 44(%ebx),%esi
1311 xorl 52(%ebx),%edx
1312 xorl 60(%ebx),%edi
1313 leal 64(%ebx),%ebx
1314 movl %ebp,28(%eax)
1315 movl (%esp),%ebp
1316 movl %ecx,40(%eax)
1317 movl 160(%esp),%ecx
1318 movl %esi,44(%eax)
1319 movl %edx,52(%eax)
1320 movl %edi,60(%eax)
1321 movl %ebp,(%eax)
1322 leal 64(%eax),%eax
1323 subl $64,%ecx
1324 jnz .L003outer_loop
1325 jmp .L006done
1326 .L005tail:
1327 addl 112(%esp),%edx
1328 addl 120(%esp),%edi
1329 movl %eax,(%esp)
1330 movl %ebp,16(%esp)
1331 movl %ecx,32(%esp)
1332 movl %esi,36(%esp)
1333 movl %edx,48(%esp)
1334 movl %edi,56(%esp)
1335 movl 4(%esp),%ebp
1336 movl 8(%esp),%ecx
1337 movl 12(%esp),%esi
1338 movl 20(%esp),%edx
1339 movl 24(%esp),%edi
1340 addl $857760878,%ebp
1341 addl $2036477234,%ecx
1342 addl $1797285236,%esi
1343 addl 84(%esp),%edx
1344 addl 88(%esp),%edi
1345 movl %ebp,4(%esp)
1346 movl %ecx,8(%esp)
1347 movl %esi,12(%esp)
1348 movl %edx,20(%esp)
1349 movl %edi,24(%esp)
1350 movl 28(%esp),%ebp
1351 movl 40(%esp),%ecx
1352 movl 44(%esp),%esi
1353 movl 52(%esp),%edx
1354 movl 60(%esp),%edi
1355 addl 92(%esp),%ebp
1356 addl 104(%esp),%ecx
1357 addl 108(%esp),%esi
1358 addl 116(%esp),%edx
1359 addl 124(%esp),%edi
1360 movl %ebp,28(%esp)
1361 movl 156(%esp),%ebp
1362 movl %ecx,40(%esp)
1363 movl 152(%esp),%ecx
1364 movl %esi,44(%esp)
1365 xorl %esi,%esi
1366 movl %edx,52(%esp)
1367 movl %edi,60(%esp)
1368 xorl %eax,%eax
1369 xorl %edx,%edx
1370 .L007tail_loop:
1371 movb (%esi,%ebp,1),%al
1372 movb (%esp,%esi,1),%dl
1373 leal 1(%esi),%esi
1374 xorb %dl,%al
1375 movb %al,-1(%ecx,%esi,1)
1376 decl %ebx
1377 jnz .L007tail_loop
1378 .L006done:
1379 addl $132,%esp
1380 .L000no_data:
1381 popl %edi
1382 popl %esi
1383 popl %ebx
1384 popl %ebp
1385 ret
1386 .size ChaCha20_ctr32,.-.L_ChaCha20_ctr32_begin
1387 .globl ChaCha20_ssse3
1388 .type ChaCha20_ssse3,@function
1389 .align 16
1390 ChaCha20_ssse3:
1391 .L_ChaCha20_ssse3_begin:
1392 pushl %ebp
1393 pushl %ebx
1394 pushl %esi
1395 pushl %edi
1396 .Lssse3_shortcut:
1397 testl $2048,4(%ebp)
1398 jnz .Lxop_shortcut
1399 movl 20(%esp),%edi
1400 movl 24(%esp),%esi
1401 movl 28(%esp),%ecx
1402 movl 32(%esp),%edx
1403 movl 36(%esp),%ebx
1404 movl %esp,%ebp
1405 subl $524,%esp
1406 andl $-64,%esp
1407 movl %ebp,512(%esp)
1408 leal .Lssse3_data-.Lpic_point(%eax),%eax
1409 movdqu (%ebx),%xmm3
1410 .L0081x:
1411 movdqa 32(%eax),%xmm0
1412 movdqu (%edx),%xmm1
1413 movdqu 16(%edx),%xmm2
1414 movdqa (%eax),%xmm6
1415 movdqa 16(%eax),%xmm7
1416 movl %ebp,48(%esp)
1417 movdqa %xmm0,(%esp)
1418 movdqa %xmm1,16(%esp)
1419 movdqa %xmm2,32(%esp)
1420 movdqa %xmm3,48(%esp)
1421 movl $10,%edx
1422 jmp .L009loop1x
1423 .align 16
1424 .L010outer1x:
1425 movdqa 80(%eax),%xmm3
1426 movdqa (%esp),%xmm0
1427 movdqa 16(%esp),%xmm1
1428 movdqa 32(%esp),%xmm2
1429 paddd 48(%esp),%xmm3
1430 movl $10,%edx
1431 movdqa %xmm3,48(%esp)
1432 jmp .L009loop1x
1433 .align 16
1434 .L009loop1x:
1435 paddd %xmm1,%xmm0
1436 pxor %xmm0,%xmm3
1437 .byte 102,15,56,0,222
1438 paddd %xmm3,%xmm2
1439 pxor %xmm2,%xmm1
1440 movdqa %xmm1,%xmm4
1441 psrld $20,%xmm1
1442 pslld $12,%xmm4
1443 por %xmm4,%xmm1
1444 paddd %xmm1,%xmm0
1445 pxor %xmm0,%xmm3
1446 .byte 102,15,56,0,223
1447 paddd %xmm3,%xmm2
1448 pxor %xmm2,%xmm1
1449 movdqa %xmm1,%xmm4
1450 psrld $25,%xmm1
1451 pslld $7,%xmm4
1452 por %xmm4,%xmm1
1453 pshufd $78,%xmm2,%xmm2
1454 pshufd $57,%xmm1,%xmm1
1455 pshufd $147,%xmm3,%xmm3
1456 nop
1457 paddd %xmm1,%xmm0
1458 pxor %xmm0,%xmm3
1459 .byte 102,15,56,0,222
1460 paddd %xmm3,%xmm2
1461 pxor %xmm2,%xmm1
1462 movdqa %xmm1,%xmm4
1463 psrld $20,%xmm1
1464 pslld $12,%xmm4
1465 por %xmm4,%xmm1
1466 paddd %xmm1,%xmm0
1467 pxor %xmm0,%xmm3
1468 .byte 102,15,56,0,223
1469 paddd %xmm3,%xmm2
1470 pxor %xmm2,%xmm1
1471 movdqa %xmm1,%xmm4
1472 psrld $25,%xmm1
1473 pslld $7,%xmm4
1474 por %xmm4,%xmm1
1475 pshufd $78,%xmm2,%xmm2
1476 pshufd $147,%xmm1,%xmm1
1477 pshufd $57,%xmm3,%xmm3
1478 decl %edx
1479 jnz .L009loop1x
1480 paddd (%esp),%xmm0
1481 paddd 16(%esp),%xmm1
1482 paddd 32(%esp),%xmm2
1483 paddd 48(%esp),%xmm3
1484 cmpl $64,%ecx
1485 jb .L011tail
1486 movdqu (%esi),%xmm4
1487 movdqu 16(%esi),%xmm5
1488 pxor %xmm4,%xmm0
1489 movdqu 32(%esi),%xmm4
1490 pxor %xmm5,%xmm1
1491 movdqu 48(%esi),%xmm5
1492 pxor %xmm4,%xmm2
1493 pxor %xmm5,%xmm3
1494 leal 64(%esi),%esi
1495 movdqu %xmm0,(%edi)
1496 movdqu %xmm1,16(%edi)
1497 movdqu %xmm2,32(%edi)
1498 movdqu %xmm3,48(%edi)
1499 leal 64(%edi),%edi
1500 subl $64,%ecx
1501 jnz .L010outer1x
1502 jmp .L012done
1503 .L011tail:
1504 movdqa %xmm0,(%esp)
1505 movdqa %xmm1,16(%esp)
1506 movdqa %xmm2,32(%esp)
1507 movdqa %xmm3,48(%esp)
1508 xorl %eax,%eax
1509 xorl %edx,%edx
1510 xorl %ebp,%ebp
1511 .L013tail_loop:
1512 movb (%esp,%ebp,1),%al
1513 movb (%esi,%ebp,1),%dl
1514 leal 1(%ebp),%ebp
1515 xorb %dl,%al
1516 movb %al,-1(%edi,%ebp,1)
1517 decl %ecx
1518 jnz .L013tail_loop
1519 .L012done:
1520 movl 512(%esp),%esp
1521 popl %edi
1522 popl %esi
1523 popl %ebx
1524 popl %ebp
1525 ret
1526 .size ChaCha20_ssse3,.-.L_ChaCha20_ssse3_begin
1527 .align 64
1528 .Lssse3_data:
1529 .byte 2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13
1530 .byte 3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14
1531 .long 1634760805,857760878,2036477234,1797285236
1532 .long 0,1,2,3
1533 .long 4,4,4,4
1534 .long 1,0,0,0
1535 .long 4,0,0,0
1536 .long 0,-1,-1,-1
1537 .align 64
1538 .byte 67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54
1539 .byte 44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32
1540 .byte 60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111
1541 .byte 114,103,62,0
1542 .globl ChaCha20_xop
1543 .type ChaCha20_xop,@function
1544 .align 16
1545 ChaCha20_xop:
1546 .L_ChaCha20_xop_begin:
1547 pushl %ebp
1548 pushl %ebx
1549 pushl %esi
1550 pushl %edi
1551 .Lxop_shortcut:
1552 movl 20(%esp),%edi
1553 movl 24(%esp),%esi
1554 movl 28(%esp),%ecx
1555 movl 32(%esp),%edx
1556 movl 36(%esp),%ebx
1557 vzeroupper
1558 movl %esp,%ebp
1559 subl $524,%esp
1560 andl $-64,%esp
1561 movl %ebp,512(%esp)
1562 leal .Lssse3_data-.Lpic_point(%eax),%eax
1563 vmovdqu (%ebx),%xmm3
1564 cmpl $256,%ecx
1565 jb .L0141x
1566 movl %edx,516(%esp)
1567 movl %ebx,520(%esp)
1568 subl $256,%ecx
1569 leal 384(%esp),%ebp
1570 vmovdqu (%edx),%xmm7
1571 vpshufd $0,%xmm3,%xmm0
1572 vpshufd $85,%xmm3,%xmm1
1573 vpshufd $170,%xmm3,%xmm2
1574 vpshufd $255,%xmm3,%xmm3
1575 vpaddd 48(%eax),%xmm0,%xmm0
1576 vpshufd $0,%xmm7,%xmm4
1577 vpshufd $85,%xmm7,%xmm5
1578 vpsubd 64(%eax),%xmm0,%xmm0
1579 vpshufd $170,%xmm7,%xmm6
1580 vpshufd $255,%xmm7,%xmm7
1581 vmovdqa %xmm0,64(%ebp)
1582 vmovdqa %xmm1,80(%ebp)
1583 vmovdqa %xmm2,96(%ebp)
1584 vmovdqa %xmm3,112(%ebp)
1585 vmovdqu 16(%edx),%xmm3
1586 vmovdqa %xmm4,-64(%ebp)
1587 vmovdqa %xmm5,-48(%ebp)
1588 vmovdqa %xmm6,-32(%ebp)
1589 vmovdqa %xmm7,-16(%ebp)
1590 vmovdqa 32(%eax),%xmm7
1591 leal 128(%esp),%ebx
1592 vpshufd $0,%xmm3,%xmm0
1593 vpshufd $85,%xmm3,%xmm1
1594 vpshufd $170,%xmm3,%xmm2
1595 vpshufd $255,%xmm3,%xmm3
1596 vpshufd $0,%xmm7,%xmm4
1597 vpshufd $85,%xmm7,%xmm5
1598 vpshufd $170,%xmm7,%xmm6
1599 vpshufd $255,%xmm7,%xmm7
1600 vmovdqa %xmm0,(%ebp)
1601 vmovdqa %xmm1,16(%ebp)
1602 vmovdqa %xmm2,32(%ebp)
1603 vmovdqa %xmm3,48(%ebp)
1604 vmovdqa %xmm4,-128(%ebp)
1605 vmovdqa %xmm5,-112(%ebp)
1606 vmovdqa %xmm6,-96(%ebp)
1607 vmovdqa %xmm7,-80(%ebp)
1608 leal 128(%esi),%esi
1609 leal 128(%edi),%edi
1610 jmp .L015outer_loop
1611 .align 32
1612 .L015outer_loop:
1613 vmovdqa -112(%ebp),%xmm1
1614 vmovdqa -96(%ebp),%xmm2
1615 vmovdqa -80(%ebp),%xmm3
1616 vmovdqa -48(%ebp),%xmm5
1617 vmovdqa -32(%ebp),%xmm6
1618 vmovdqa -16(%ebp),%xmm7
1619 vmovdqa %xmm1,-112(%ebx)
1620 vmovdqa %xmm2,-96(%ebx)
1621 vmovdqa %xmm3,-80(%ebx)
1622 vmovdqa %xmm5,-48(%ebx)
1623 vmovdqa %xmm6,-32(%ebx)
1624 vmovdqa %xmm7,-16(%ebx)
1625 vmovdqa 32(%ebp),%xmm2
1626 vmovdqa 48(%ebp),%xmm3
1627 vmovdqa 64(%ebp),%xmm4
1628 vmovdqa 80(%ebp),%xmm5
1629 vmovdqa 96(%ebp),%xmm6
1630 vmovdqa 112(%ebp),%xmm7
1631 vpaddd 64(%eax),%xmm4,%xmm4
1632 vmovdqa %xmm2,32(%ebx)
1633 vmovdqa %xmm3,48(%ebx)
1634 vmovdqa %xmm4,64(%ebx)
1635 vmovdqa %xmm5,80(%ebx)
1636 vmovdqa %xmm6,96(%ebx)
1637 vmovdqa %xmm7,112(%ebx)
1638 vmovdqa %xmm4,64(%ebp)
1639 vmovdqa -128(%ebp),%xmm0
1640 vmovdqa %xmm4,%xmm6
1641 vmovdqa -64(%ebp),%xmm3
1642 vmovdqa (%ebp),%xmm4
1643 vmovdqa 16(%ebp),%xmm5
1644 movl $10,%edx
1645 nop
1646 .align 32
1647 .L016loop:
1648 vpaddd %xmm3,%xmm0,%xmm0
1649 vpxor %xmm0,%xmm6,%xmm6
1650 .byte 143,232,120,194,246,16
1651 vpaddd %xmm6,%xmm4,%xmm4
1652 vpxor %xmm4,%xmm3,%xmm2
1653 vmovdqa -112(%ebx),%xmm1
1654 .byte 143,232,120,194,210,12
1655 vmovdqa -48(%ebx),%xmm3
1656 vpaddd %xmm2,%xmm0,%xmm0
1657 vmovdqa 80(%ebx),%xmm7
1658 vpxor %xmm0,%xmm6,%xmm6
1659 vpaddd %xmm3,%xmm1,%xmm1
1660 .byte 143,232,120,194,246,8
1661 vmovdqa %xmm0,-128(%ebx)
1662 vpaddd %xmm6,%xmm4,%xmm4
1663 vmovdqa %xmm6,64(%ebx)
1664 vpxor %xmm4,%xmm2,%xmm2
1665 vpxor %xmm1,%xmm7,%xmm7
1666 .byte 143,232,120,194,210,7
1667 vmovdqa %xmm4,(%ebx)
1668 .byte 143,232,120,194,255,16
1669 vmovdqa %xmm2,-64(%ebx)
1670 vpaddd %xmm7,%xmm5,%xmm5
1671 vmovdqa 32(%ebx),%xmm4
1672 vpxor %xmm5,%xmm3,%xmm3
1673 vmovdqa -96(%ebx),%xmm0
1674 .byte 143,232,120,194,219,12
1675 vmovdqa -32(%ebx),%xmm2
1676 vpaddd %xmm3,%xmm1,%xmm1
1677 vmovdqa 96(%ebx),%xmm6
1678 vpxor %xmm1,%xmm7,%xmm7
1679 vpaddd %xmm2,%xmm0,%xmm0
1680 .byte 143,232,120,194,255,8
1681 vmovdqa %xmm1,-112(%ebx)
1682 vpaddd %xmm7,%xmm5,%xmm5
1683 vmovdqa %xmm7,80(%ebx)
1684 vpxor %xmm5,%xmm3,%xmm3
1685 vpxor %xmm0,%xmm6,%xmm6
1686 .byte 143,232,120,194,219,7
1687 vmovdqa %xmm5,16(%ebx)
1688 .byte 143,232,120,194,246,16
1689 vmovdqa %xmm3,-48(%ebx)
1690 vpaddd %xmm6,%xmm4,%xmm4
1691 vmovdqa 48(%ebx),%xmm5
1692 vpxor %xmm4,%xmm2,%xmm2
1693 vmovdqa -80(%ebx),%xmm1
1694 .byte 143,232,120,194,210,12
1695 vmovdqa -16(%ebx),%xmm3
1696 vpaddd %xmm2,%xmm0,%xmm0
1697 vmovdqa 112(%ebx),%xmm7
1698 vpxor %xmm0,%xmm6,%xmm6
1699 vpaddd %xmm3,%xmm1,%xmm1
1700 .byte 143,232,120,194,246,8
1701 vmovdqa %xmm0,-96(%ebx)
1702 vpaddd %xmm6,%xmm4,%xmm4
1703 vmovdqa %xmm6,96(%ebx)
1704 vpxor %xmm4,%xmm2,%xmm2
1705 vpxor %xmm1,%xmm7,%xmm7
1706 .byte 143,232,120,194,210,7
1707 .byte 143,232,120,194,255,16
1708 vmovdqa %xmm2,-32(%ebx)
1709 vpaddd %xmm7,%xmm5,%xmm5
1710 vpxor %xmm5,%xmm3,%xmm3
1711 vmovdqa -128(%ebx),%xmm0
1712 .byte 143,232,120,194,219,12
1713 vmovdqa -48(%ebx),%xmm2
1714 vpaddd %xmm3,%xmm1,%xmm1
1715 vpxor %xmm1,%xmm7,%xmm7
1716 vpaddd %xmm2,%xmm0,%xmm0
1717 .byte 143,232,120,194,255,8
1718 vmovdqa %xmm1,-80(%ebx)
1719 vpaddd %xmm7,%xmm5,%xmm5
1720 vpxor %xmm5,%xmm3,%xmm3
1721 vpxor %xmm0,%xmm7,%xmm6
1722 .byte 143,232,120,194,219,7
1723 .byte 143,232,120,194,246,16
1724 vmovdqa %xmm3,-16(%ebx)
1725 vpaddd %xmm6,%xmm4,%xmm4
1726 vpxor %xmm4,%xmm2,%xmm2
1727 vmovdqa -112(%ebx),%xmm1
1728 .byte 143,232,120,194,210,12
1729 vmovdqa -32(%ebx),%xmm3
1730 vpaddd %xmm2,%xmm0,%xmm0
1731 vmovdqa 64(%ebx),%xmm7
1732 vpxor %xmm0,%xmm6,%xmm6
1733 vpaddd %xmm3,%xmm1,%xmm1
1734 .byte 143,232,120,194,246,8
1735 vmovdqa %xmm0,-128(%ebx)
1736 vpaddd %xmm6,%xmm4,%xmm4
1737 vmovdqa %xmm6,112(%ebx)
1738 vpxor %xmm4,%xmm2,%xmm2
1739 vpxor %xmm1,%xmm7,%xmm7
1740 .byte 143,232,120,194,210,7
1741 vmovdqa %xmm4,32(%ebx)
1742 .byte 143,232,120,194,255,16
1743 vmovdqa %xmm2,-48(%ebx)
1744 vpaddd %xmm7,%xmm5,%xmm5
1745 vmovdqa (%ebx),%xmm4
1746 vpxor %xmm5,%xmm3,%xmm3
1747 vmovdqa -96(%ebx),%xmm0
1748 .byte 143,232,120,194,219,12
1749 vmovdqa -16(%ebx),%xmm2
1750 vpaddd %xmm3,%xmm1,%xmm1
1751 vmovdqa 80(%ebx),%xmm6
1752 vpxor %xmm1,%xmm7,%xmm7
1753 vpaddd %xmm2,%xmm0,%xmm0
1754 .byte 143,232,120,194,255,8
1755 vmovdqa %xmm1,-112(%ebx)
1756 vpaddd %xmm7,%xmm5,%xmm5
1757 vmovdqa %xmm7,64(%ebx)
1758 vpxor %xmm5,%xmm3,%xmm3
1759 vpxor %xmm0,%xmm6,%xmm6
1760 .byte 143,232,120,194,219,7
1761 vmovdqa %xmm5,48(%ebx)
1762 .byte 143,232,120,194,246,16
1763 vmovdqa %xmm3,-32(%ebx)
1764 vpaddd %xmm6,%xmm4,%xmm4
1765 vmovdqa 16(%ebx),%xmm5
1766 vpxor %xmm4,%xmm2,%xmm2
1767 vmovdqa -80(%ebx),%xmm1
1768 .byte 143,232,120,194,210,12
1769 vmovdqa -64(%ebx),%xmm3
1770 vpaddd %xmm2,%xmm0,%xmm0
1771 vmovdqa 96(%ebx),%xmm7
1772 vpxor %xmm0,%xmm6,%xmm6
1773 vpaddd %xmm3,%xmm1,%xmm1
1774 .byte 143,232,120,194,246,8
1775 vmovdqa %xmm0,-96(%ebx)
1776 vpaddd %xmm6,%xmm4,%xmm4
1777 vmovdqa %xmm6,80(%ebx)
1778 vpxor %xmm4,%xmm2,%xmm2
1779 vpxor %xmm1,%xmm7,%xmm7
1780 .byte 143,232,120,194,210,7
1781 .byte 143,232,120,194,255,16
1782 vmovdqa %xmm2,-16(%ebx)
1783 vpaddd %xmm7,%xmm5,%xmm5
1784 vpxor %xmm5,%xmm3,%xmm3
1785 vmovdqa -128(%ebx),%xmm0
1786 .byte 143,232,120,194,219,12
1787 vpaddd %xmm3,%xmm1,%xmm1
1788 vmovdqa 64(%ebx),%xmm6
1789 vpxor %xmm1,%xmm7,%xmm7
1790 .byte 143,232,120,194,255,8
1791 vmovdqa %xmm1,-80(%ebx)
1792 vpaddd %xmm7,%xmm5,%xmm5
1793 vmovdqa %xmm7,96(%ebx)
1794 vpxor %xmm5,%xmm3,%xmm3
1795 .byte 143,232,120,194,219,7
1796 decl %edx
1797 jnz .L016loop
1798 vmovdqa %xmm3,-64(%ebx)
1799 vmovdqa %xmm4,(%ebx)
1800 vmovdqa %xmm5,16(%ebx)
1801 vmovdqa %xmm6,64(%ebx)
1802 vmovdqa %xmm7,96(%ebx)
1803 vmovdqa -112(%ebx),%xmm1
1804 vmovdqa -96(%ebx),%xmm2
1805 vmovdqa -80(%ebx),%xmm3
1806 vpaddd -128(%ebp),%xmm0,%xmm0
1807 vpaddd -112(%ebp),%xmm1,%xmm1
1808 vpaddd -96(%ebp),%xmm2,%xmm2
1809 vpaddd -80(%ebp),%xmm3,%xmm3
1810 vpunpckldq %xmm1,%xmm0,%xmm6
1811 vpunpckldq %xmm3,%xmm2,%xmm7
1812 vpunpckhdq %xmm1,%xmm0,%xmm0
1813 vpunpckhdq %xmm3,%xmm2,%xmm2
1814 vpunpcklqdq %xmm7,%xmm6,%xmm1
1815 vpunpckhqdq %xmm7,%xmm6,%xmm6
1816 vpunpcklqdq %xmm2,%xmm0,%xmm7
1817 vpunpckhqdq %xmm2,%xmm0,%xmm3
1818 vpxor -128(%esi),%xmm1,%xmm4
1819 vpxor -64(%esi),%xmm6,%xmm5
1820 vpxor (%esi),%xmm7,%xmm6
1821 vpxor 64(%esi),%xmm3,%xmm7
1822 leal 16(%esi),%esi
1823 vmovdqa -64(%ebx),%xmm0
1824 vmovdqa -48(%ebx),%xmm1
1825 vmovdqa -32(%ebx),%xmm2
1826 vmovdqa -16(%ebx),%xmm3
1827 vmovdqu %xmm4,-128(%edi)
1828 vmovdqu %xmm5,-64(%edi)
1829 vmovdqu %xmm6,(%edi)
1830 vmovdqu %xmm7,64(%edi)
1831 leal 16(%edi),%edi
1832 vpaddd -64(%ebp),%xmm0,%xmm0
1833 vpaddd -48(%ebp),%xmm1,%xmm1
1834 vpaddd -32(%ebp),%xmm2,%xmm2
1835 vpaddd -16(%ebp),%xmm3,%xmm3
1836 vpunpckldq %xmm1,%xmm0,%xmm6
1837 vpunpckldq %xmm3,%xmm2,%xmm7
1838 vpunpckhdq %xmm1,%xmm0,%xmm0
1839 vpunpckhdq %xmm3,%xmm2,%xmm2
1840 vpunpcklqdq %xmm7,%xmm6,%xmm1
1841 vpunpckhqdq %xmm7,%xmm6,%xmm6
1842 vpunpcklqdq %xmm2,%xmm0,%xmm7
1843 vpunpckhqdq %xmm2,%xmm0,%xmm3
1844 vpxor -128(%esi),%xmm1,%xmm4
1845 vpxor -64(%esi),%xmm6,%xmm5
1846 vpxor (%esi),%xmm7,%xmm6
1847 vpxor 64(%esi),%xmm3,%xmm7
1848 leal 16(%esi),%esi
1849 vmovdqa (%ebx),%xmm0
1850 vmovdqa 16(%ebx),%xmm1
1851 vmovdqa 32(%ebx),%xmm2
1852 vmovdqa 48(%ebx),%xmm3
1853 vmovdqu %xmm4,-128(%edi)
1854 vmovdqu %xmm5,-64(%edi)
1855 vmovdqu %xmm6,(%edi)
1856 vmovdqu %xmm7,64(%edi)
1857 leal 16(%edi),%edi
1858 vpaddd (%ebp),%xmm0,%xmm0
1859 vpaddd 16(%ebp),%xmm1,%xmm1
1860 vpaddd 32(%ebp),%xmm2,%xmm2
1861 vpaddd 48(%ebp),%xmm3,%xmm3
1862 vpunpckldq %xmm1,%xmm0,%xmm6
1863 vpunpckldq %xmm3,%xmm2,%xmm7
1864 vpunpckhdq %xmm1,%xmm0,%xmm0
1865 vpunpckhdq %xmm3,%xmm2,%xmm2
1866 vpunpcklqdq %xmm7,%xmm6,%xmm1
1867 vpunpckhqdq %xmm7,%xmm6,%xmm6
1868 vpunpcklqdq %xmm2,%xmm0,%xmm7
1869 vpunpckhqdq %xmm2,%xmm0,%xmm3
1870 vpxor -128(%esi),%xmm1,%xmm4
1871 vpxor -64(%esi),%xmm6,%xmm5
1872 vpxor (%esi),%xmm7,%xmm6
1873 vpxor 64(%esi),%xmm3,%xmm7
1874 leal 16(%esi),%esi
1875 vmovdqa 64(%ebx),%xmm0
1876 vmovdqa 80(%ebx),%xmm1
1877 vmovdqa 96(%ebx),%xmm2
1878 vmovdqa 112(%ebx),%xmm3
1879 vmovdqu %xmm4,-128(%edi)
1880 vmovdqu %xmm5,-64(%edi)
1881 vmovdqu %xmm6,(%edi)
1882 vmovdqu %xmm7,64(%edi)
1883 leal 16(%edi),%edi
1884 vpaddd 64(%ebp),%xmm0,%xmm0
1885 vpaddd 80(%ebp),%xmm1,%xmm1
1886 vpaddd 96(%ebp),%xmm2,%xmm2
1887 vpaddd 112(%ebp),%xmm3,%xmm3
1888 vpunpckldq %xmm1,%xmm0,%xmm6
1889 vpunpckldq %xmm3,%xmm2,%xmm7
1890 vpunpckhdq %xmm1,%xmm0,%xmm0
1891 vpunpckhdq %xmm3,%xmm2,%xmm2
1892 vpunpcklqdq %xmm7,%xmm6,%xmm1
1893 vpunpckhqdq %xmm7,%xmm6,%xmm6
1894 vpunpcklqdq %xmm2,%xmm0,%xmm7
1895 vpunpckhqdq %xmm2,%xmm0,%xmm3
1896 vpxor -128(%esi),%xmm1,%xmm4
1897 vpxor -64(%esi),%xmm6,%xmm5
1898 vpxor (%esi),%xmm7,%xmm6
1899 vpxor 64(%esi),%xmm3,%xmm7
1900 leal 208(%esi),%esi
1901 vmovdqu %xmm4,-128(%edi)
1902 vmovdqu %xmm5,-64(%edi)
1903 vmovdqu %xmm6,(%edi)
1904 vmovdqu %xmm7,64(%edi)
1905 leal 208(%edi),%edi
1906 subl $256,%ecx
1907 jnc .L015outer_loop
1908 addl $256,%ecx
1909 jz .L017done
1910 movl 520(%esp),%ebx
1911 leal -128(%esi),%esi
1912 movl 516(%esp),%edx
1913 leal -128(%edi),%edi
1914 vmovd 64(%ebp),%xmm2
1915 vmovdqu (%ebx),%xmm3
1916 vpaddd 96(%eax),%xmm2,%xmm2
1917 vpand 112(%eax),%xmm3,%xmm3
1918 vpor %xmm2,%xmm3,%xmm3
1919 .L0141x:
1920 vmovdqa 32(%eax),%xmm0
1921 vmovdqu (%edx),%xmm1
1922 vmovdqu 16(%edx),%xmm2
1923 vmovdqa (%eax),%xmm6
1924 vmovdqa 16(%eax),%xmm7
1925 movl %ebp,48(%esp)
1926 vmovdqa %xmm0,(%esp)
1927 vmovdqa %xmm1,16(%esp)
1928 vmovdqa %xmm2,32(%esp)
1929 vmovdqa %xmm3,48(%esp)
1930 movl $10,%edx
1931 jmp .L018loop1x
1932 .align 16
1933 .L019outer1x:
1934 vmovdqa 80(%eax),%xmm3
1935 vmovdqa (%esp),%xmm0
1936 vmovdqa 16(%esp),%xmm1
1937 vmovdqa 32(%esp),%xmm2
1938 vpaddd 48(%esp),%xmm3,%xmm3
1939 movl $10,%edx
1940 vmovdqa %xmm3,48(%esp)
1941 jmp .L018loop1x
1942 .align 16
1943 .L018loop1x:
1944 vpaddd %xmm1,%xmm0,%xmm0
1945 vpxor %xmm0,%xmm3,%xmm3
1946 .byte 143,232,120,194,219,16
1947 vpaddd %xmm3,%xmm2,%xmm2
1948 vpxor %xmm2,%xmm1,%xmm1
1949 .byte 143,232,120,194,201,12
1950 vpaddd %xmm1,%xmm0,%xmm0
1951 vpxor %xmm0,%xmm3,%xmm3
1952 .byte 143,232,120,194,219,8
1953 vpaddd %xmm3,%xmm2,%xmm2
1954 vpxor %xmm2,%xmm1,%xmm1
1955 .byte 143,232,120,194,201,7
1956 vpshufd $78,%xmm2,%xmm2
1957 vpshufd $57,%xmm1,%xmm1
1958 vpshufd $147,%xmm3,%xmm3
1959 vpaddd %xmm1,%xmm0,%xmm0
1960 vpxor %xmm0,%xmm3,%xmm3
1961 .byte 143,232,120,194,219,16
1962 vpaddd %xmm3,%xmm2,%xmm2
1963 vpxor %xmm2,%xmm1,%xmm1
1964 .byte 143,232,120,194,201,12
1965 vpaddd %xmm1,%xmm0,%xmm0
1966 vpxor %xmm0,%xmm3,%xmm3
1967 .byte 143,232,120,194,219,8
1968 vpaddd %xmm3,%xmm2,%xmm2
1969 vpxor %xmm2,%xmm1,%xmm1
1970 .byte 143,232,120,194,201,7
1971 vpshufd $78,%xmm2,%xmm2
1972 vpshufd $147,%xmm1,%xmm1
1973 vpshufd $57,%xmm3,%xmm3
1974 decl %edx
1975 jnz .L018loop1x
1976 vpaddd (%esp),%xmm0,%xmm0
1977 vpaddd 16(%esp),%xmm1,%xmm1
1978 vpaddd 32(%esp),%xmm2,%xmm2
1979 vpaddd 48(%esp),%xmm3,%xmm3
1980 cmpl $64,%ecx
1981 jb .L020tail
1982 vpxor (%esi),%xmm0,%xmm0
1983 vpxor 16(%esi),%xmm1,%xmm1
1984 vpxor 32(%esi),%xmm2,%xmm2
1985 vpxor 48(%esi),%xmm3,%xmm3
1986 leal 64(%esi),%esi
1987 vmovdqu %xmm0,(%edi)
1988 vmovdqu %xmm1,16(%edi)
1989 vmovdqu %xmm2,32(%edi)
1990 vmovdqu %xmm3,48(%edi)
1991 leal 64(%edi),%edi
1992 subl $64,%ecx
1993 jnz .L019outer1x
1994 jmp .L017done
1995 .L020tail:
1996 vmovdqa %xmm0,(%esp)
1997 vmovdqa %xmm1,16(%esp)
1998 vmovdqa %xmm2,32(%esp)
1999 vmovdqa %xmm3,48(%esp)
2000 xorl %eax,%eax
2001 xorl %edx,%edx
2002 xorl %ebp,%ebp
2003 .L021tail_loop:
2004 movb (%esp,%ebp,1),%al
2005 movb (%esi,%ebp,1),%dl
2006 leal 1(%ebp),%ebp
2007 xorb %dl,%al
2008 movb %al,-1(%edi,%ebp,1)
2009 decl %ecx
2010 jnz .L021tail_loop
2011 .L017done:
2012 vzeroupper
2013 movl 512(%esp),%esp
2014 popl %edi
2015 popl %esi
2016 popl %ebx
2017 popl %ebp
2018 ret
2019 .size ChaCha20_xop,.-.L_ChaCha20_xop_begin
2020 .comm OPENSSL_ia32cap_P,16,4
2021 #endif
Cache object: 40c79e69d0229ad346193826a833c3da
|